1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software Foundation,
21 Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22
23 /** @file handler.cc
24
25 @brief
26 Handler-calling-functions
27 */
28
29 #include "handler.h"
30
31 #include "my_bit.h" // my_count_bits
32 #include "myisam.h" // TT_FOR_UPGRADE
33 #include "mysql_version.h" // MYSQL_VERSION_ID
34
35 #include "binlog.h" // mysql_bin_log
36 #include "debug_sync.h" // DEBUG_SYNC
37 #include "discover.h" // writefrm
38 #include "log.h" // sql_print_error
39 #include "log_event.h" // Write_rows_log_event
40 #include "my_bitmap.h" // MY_BITMAP
41 #include "probes_mysql.h" // MYSQL_HANDLER_WRLOCK_START
42 #include "opt_costconstantcache.h" // reload_optimizer_cost_constants
43 #include "rpl_handler.h" // RUN_HOOK
44 #include "sql_base.h" // free_io_cache
45 #include "sql_parse.h" // check_stack_overrun
46 #include "sql_plugin.h" // plugin_foreach
47 #include "sql_table.h" // build_table_filename
48 #include "transaction.h" // trans_commit_implicit
49 #include "trigger_def.h" // TRG_EXT
50 #include "sql_select.h" // actual_key_parts
51 #include "rpl_write_set_handler.h" // add_pke
52 #include "auth_common.h" // check_readonly() and SUPER_ACL
53
54
55 #include "pfs_file_provider.h"
56 #include "mysql/psi/mysql_file.h"
57
58 #include <pfs_table_provider.h>
59 #include <mysql/psi/mysql_table.h>
60
61 #include <pfs_transaction_provider.h>
62 #include <mysql/psi/mysql_transaction.h>
63 #include "opt_hints.h"
64
65 #ifdef WITH_WSREP
66 #include "partitioning/partition_handler.h"
67 #endif
68 #include <list>
69 #include <cstring>
70 #include <string>
71 #include <boost/foreach.hpp>
72 #include <boost/tokenizer.hpp>
73 #include <boost/algorithm/string.hpp>
74
75 /**
76 @def MYSQL_TABLE_IO_WAIT
77 Instrumentation helper for table io_waits.
78 Note that this helper is intended to be used from
79 within the handler class only, as it uses members
80 from @c handler
81 Performance schema events are instrumented as follows:
82 - in non batch mode, one event is generated per call
83 - in batch mode, the number of rows affected is saved
84 in @c m_psi_numrows, so that @c end_psi_batch_mode()
85 generates a single event for the batch.
86 @param OP the table operation to be performed
87 @param INDEX the table index used if any, or MAX_KEY.
88 @param PAYLOAD instrumented code to execute
89 @sa handler::end_psi_batch_mode.
90 */
91 #ifdef HAVE_PSI_TABLE_INTERFACE
92 #define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD) \
93 { \
94 if (m_psi != NULL) \
95 { \
96 switch (m_psi_batch_mode) \
97 { \
98 case PSI_BATCH_MODE_NONE: \
99 { \
100 PSI_table_locker *sub_locker= NULL; \
101 PSI_table_locker_state reentrant_safe_state; \
102 sub_locker= PSI_TABLE_CALL(start_table_io_wait) \
103 (& reentrant_safe_state, m_psi, OP, INDEX, \
104 __FILE__, __LINE__); \
105 PAYLOAD \
106 if (sub_locker != NULL) \
107 PSI_TABLE_CALL(end_table_io_wait) \
108 (sub_locker, 1); \
109 break; \
110 } \
111 case PSI_BATCH_MODE_STARTING: \
112 { \
113 m_psi_locker= PSI_TABLE_CALL(start_table_io_wait) \
114 (& m_psi_locker_state, m_psi, OP, INDEX, \
115 __FILE__, __LINE__); \
116 PAYLOAD \
117 if (!RESULT) \
118 m_psi_numrows++; \
119 m_psi_batch_mode= PSI_BATCH_MODE_STARTED; \
120 break; \
121 } \
122 case PSI_BATCH_MODE_STARTED: \
123 default: \
124 { \
125 assert(m_psi_batch_mode \
126 == PSI_BATCH_MODE_STARTED); \
127 PAYLOAD \
128 if (!RESULT) \
129 m_psi_numrows++; \
130 break; \
131 } \
132 } \
133 } \
134 else \
135 { \
136 PAYLOAD \
137 } \
138 }
139 #else
140 #define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD) \
141 PAYLOAD
142 #endif
143
144 /**
145 @def MYSQL_TABLE_LOCK_WAIT
146 Instrumentation helper for table io_waits.
147 @param OP the table operation to be performed
148 @param FLAGS per table operation flags.
149 @param PAYLOAD the code to instrument.
150 @sa MYSQL_END_TABLE_WAIT.
151 */
152 #ifdef HAVE_PSI_TABLE_INTERFACE
153 #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) \
154 { \
155 if (m_psi != NULL) \
156 { \
157 PSI_table_locker *locker; \
158 PSI_table_locker_state state; \
159 locker= PSI_TABLE_CALL(start_table_lock_wait) \
160 (& state, m_psi, OP, FLAGS, \
161 __FILE__, __LINE__); \
162 PAYLOAD \
163 if (locker != NULL) \
164 PSI_TABLE_CALL(end_table_lock_wait)(locker); \
165 } \
166 else \
167 { \
168 PAYLOAD \
169 } \
170 }
171 #else
172 #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) \
173 PAYLOAD
174 #endif
175
176 using std::min;
177 using std::max;
178 using std::list;
179
180 // This is a temporary backporting fix.
181 #ifndef HAVE_LOG2
182 /*
183 This will be slightly slower and perhaps a tiny bit less accurate than
184 doing it the IEEE754 way but log2() should be available on C99 systems.
185 */
log2(double x)186 inline double log2(double x)
187 {
188 return (log(x) / M_LN2);
189 }
190 #endif
191 #ifdef WITH_WSREP
192 #include "wsrep_mysqld.h"
193 #include "wsrep_xid.h"
194 #endif
195 /*
196 While we have legacy_db_type, we have this array to
197 check for dups and to find handlerton from legacy_db_type.
198 Remove when legacy_db_type is finally gone
199 */
200 st_plugin_int *hton2plugin[MAX_HA];
201
202 /**
203 Array allowing to check if handlerton is builtin without
204 acquiring LOCK_plugin.
205 */
206 static bool builtin_htons[MAX_HA];
207
ha_resolve_storage_engine_name(const handlerton * db_type)208 const char *ha_resolve_storage_engine_name(const handlerton *db_type)
209 {
210 return db_type == NULL ? "UNKNOWN" : hton2plugin[db_type->slot]->name.str;
211 }
212
213 static handlerton *installed_htons[128];
214
215 #define BITMAP_STACKBUF_SIZE (128/8)
216
217 KEY_CREATE_INFO default_key_create_info=
218 { HA_KEY_ALG_UNDEF, 0, {NullS, 0}, {NullS, 0}, true };
219
220 /* number of entries in handlertons[] */
221 ulong total_ha= 0;
222 /* number of storage engines (from handlertons[]) that support 2pc */
223 ulong total_ha_2pc= 0;
224 /* size of savepoint storage area (see ha_init) */
225 ulong savepoint_alloc_size= 0;
226
227 static const LEX_STRING sys_table_aliases[]=
228 {
229 { C_STRING_WITH_LEN("INNOBASE") }, { C_STRING_WITH_LEN("INNODB") },
230 { C_STRING_WITH_LEN("NDB") }, { C_STRING_WITH_LEN("NDBCLUSTER") },
231 { C_STRING_WITH_LEN("HEAP") }, { C_STRING_WITH_LEN("MEMORY") },
232 { C_STRING_WITH_LEN("MERGE") }, { C_STRING_WITH_LEN("MRG_MYISAM") },
233 {NullS, 0}
234 };
235
236 const char *ha_row_type[] = {
237 "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT",
238 /* Reserved to be "PAGE" in future versions */ "?",
239 "?","?","?"
240 };
241
242 const char *tx_isolation_names[] =
243 { "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
244 NullS};
245 TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
246 tx_isolation_names, NULL};
247
248 #ifndef NDEBUG
249
ha_legacy_type_name(legacy_db_type legacy_type)250 const char *ha_legacy_type_name(legacy_db_type legacy_type)
251 {
252 switch (legacy_type)
253 {
254 case DB_TYPE_UNKNOWN:
255 return "DB_TYPE_UNKNOWN";
256 case DB_TYPE_DIAB_ISAM:
257 return "DB_TYPE_DIAB_ISAM";
258 case DB_TYPE_HASH:
259 return "DB_TYPE_HASH";
260 case DB_TYPE_MISAM:
261 return "DB_TYPE_MISAM";
262 case DB_TYPE_PISAM:
263 return "DB_TYPE_PISAM";
264 case DB_TYPE_RMS_ISAM:
265 return "DB_TYPE_RMS_ISAM";
266 case DB_TYPE_HEAP:
267 return "DB_TYPE_HEAP";
268 case DB_TYPE_ISAM:
269 return "DB_TYPE_ISAM";
270 case DB_TYPE_MRG_ISAM:
271 return "DB_TYPE_MRG_ISAM";
272 case DB_TYPE_MYISAM:
273 return "DB_TYPE_MYISAM";
274 case DB_TYPE_MRG_MYISAM:
275 return "DB_TYPE_MRG_MYISAM";
276 case DB_TYPE_BERKELEY_DB:
277 return "DB_TYPE_BERKELEY_DB";
278 case DB_TYPE_INNODB:
279 return "DB_TYPE_INNODB";
280 case DB_TYPE_GEMINI:
281 return "DB_TYPE_GEMINI";
282 case DB_TYPE_NDBCLUSTER:
283 return "DB_TYPE_NDBCLUSTER";
284 case DB_TYPE_EXAMPLE_DB:
285 return "DB_TYPE_EXAMPLE_DB";
286 case DB_TYPE_ARCHIVE_DB:
287 return "DB_TYPE_ARCHIVE_DB";
288 case DB_TYPE_CSV_DB:
289 return "DB_TYPE_CSV_DB";
290 case DB_TYPE_FEDERATED_DB:
291 return "DB_TYPE_FEDERATED_DB";
292 case DB_TYPE_BLACKHOLE_DB:
293 return "DB_TYPE_BLACKHOLE_DB";
294 case DB_TYPE_PARTITION_DB:
295 return "DB_TYPE_PARTITION_DB";
296 case DB_TYPE_BINLOG:
297 return "DB_TYPE_BINLOG";
298 case DB_TYPE_SOLID:
299 return "DB_TYPE_SOLID";
300 case DB_TYPE_PBXT:
301 return "DB_TYPE_PBXT";
302 case DB_TYPE_TABLE_FUNCTION:
303 return "DB_TYPE_TABLE_FUNCTION";
304 case DB_TYPE_MEMCACHE:
305 return "DB_TYPE_MEMCACHE";
306 case DB_TYPE_FALCON:
307 return "DB_TYPE_FALCON";
308 case DB_TYPE_MARIA:
309 return "DB_TYPE_MARIA";
310 case DB_TYPE_PERFORMANCE_SCHEMA:
311 return "DB_TYPE_PERFORMANCE_SCHEMA";
312 default:
313 return "DB_TYPE_DYNAMIC";
314 }
315 }
316 #endif
317
318 /**
319 Database name that hold most of mysqld system tables.
320 Current code assumes that, there exists only some
321 specific "database name" designated as system database.
322 */
323 const char* mysqld_system_database= "mysql";
324
325 // System tables that belong to mysqld_system_database.
326 st_handler_tablename mysqld_system_tables[]= {
327 {mysqld_system_database, "db"},
328 {mysqld_system_database, "user"},
329 {mysqld_system_database, "host"},
330 {mysqld_system_database, "func"},
331 {mysqld_system_database, "proc"},
332 {mysqld_system_database, "event"},
333 {mysqld_system_database, "plugin"},
334 {mysqld_system_database, "servers"},
335 {mysqld_system_database, "procs_priv"},
336 {mysqld_system_database, "tables_priv"},
337 {mysqld_system_database, "proxies_priv"},
338 {mysqld_system_database, "columns_priv"},
339 {mysqld_system_database, "time_zone"},
340 {mysqld_system_database, "time_zone_name"},
341 {mysqld_system_database, "time_zone_leap_second"},
342 {mysqld_system_database, "time_zone_transition"},
343 {mysqld_system_database, "time_zone_transition_type"},
344 {mysqld_system_database, "help_category"},
345 {mysqld_system_database, "help_keyword"},
346 {mysqld_system_database, "help_relation"},
347 {mysqld_system_database, "help_topic"},
348 {mysqld_system_database, "innodb_table_stats"},
349 {mysqld_system_database, "innodb_index_stats"},
350 {(const char *)NULL, (const char *)NULL} /* This must be at the end */
351 };
352
353 /**
354 This static pointer holds list of system databases from SQL layer and
355 various SE's. The required memory is allocated once, and never freed.
356 */
357 static const char **known_system_databases= NULL;
358 static const char **ha_known_system_databases();
359
360 // Called for each SE to get SE specific system database.
361 static my_bool system_databases_handlerton(THD *unused, plugin_ref plugin,
362 void *arg);
363
364 // Called for each SE to check if given db.table_name is a system table.
365 static my_bool check_engine_system_table_handlerton(THD *unused,
366 plugin_ref plugin,
367 void *arg);
368 /**
369 Structure used by SE during check for system table.
370 This structure is passed to each SE handlerton and the status (OUT param)
371 is collected.
372 */
373 struct st_sys_tbl_chk_params
374 {
375 const char *db; // IN param
376 const char *table_name; // IN param
377 bool is_sql_layer_system_table; // IN param
378 legacy_db_type db_type; // IN param
379
380 enum enum_status
381 {
382 // db.table_name is user table.
383 USER_TABLE,
384 /*
385 db.table_name is a system table,
386 but may not be supported by SE.
387 */
388 SYSTEM_TABLE,
389 /*
390 db.table_name is a system table,
391 and is supported by SE.
392 */
393 SE_SUPPORTED_SYSTEM_TABLE
394 } status; // OUT param
395 };
396
397
ha_default_plugin(THD * thd)398 static plugin_ref ha_default_plugin(THD *thd)
399 {
400 if (thd->variables.table_plugin)
401 return thd->variables.table_plugin;
402 return my_plugin_lock(thd, &global_system_variables.table_plugin);
403 }
404
405
406 /** @brief
407 Return the default storage engine handlerton used for non-temp tables
408 for thread
409
410 SYNOPSIS
411 ha_default_handlerton(thd)
412 thd current thread
413
414 RETURN
415 pointer to handlerton
416 */
ha_default_handlerton(THD * thd)417 handlerton *ha_default_handlerton(THD *thd)
418 {
419 plugin_ref plugin= ha_default_plugin(thd);
420 assert(plugin);
421 handlerton *hton= plugin_data<handlerton*>(plugin);
422 assert(hton);
423 return hton;
424 }
425
426
ha_default_temp_plugin(THD * thd)427 static plugin_ref ha_default_temp_plugin(THD *thd)
428 {
429 if (thd->variables.temp_table_plugin)
430 return thd->variables.temp_table_plugin;
431 return my_plugin_lock(thd, &global_system_variables.temp_table_plugin);
432 }
433
434
435 /** @brief
436 Return the default storage engine handlerton used for explicitly
437 created temp tables for a thread
438
439 SYNOPSIS
440 ha_default_temp_handlerton(thd)
441 thd current thread
442
443 RETURN
444 pointer to handlerton
445 */
ha_default_temp_handlerton(THD * thd)446 handlerton *ha_default_temp_handlerton(THD *thd)
447 {
448 plugin_ref plugin= ha_default_temp_plugin(thd);
449 assert(plugin);
450 handlerton *hton= plugin_data<handlerton*>(plugin);
451 assert(hton);
452 return hton;
453 }
454
455
456 /**
457 Resolve handlerton plugin by name, without checking for "DEFAULT" or
458 HTON_NOT_USER_SELECTABLE.
459
460 @param thd Thread context.
461 @param name Plugin name.
462
463 @return plugin or NULL if not found.
464 */
ha_resolve_by_name_raw(THD * thd,const LEX_CSTRING & name)465 plugin_ref ha_resolve_by_name_raw(THD *thd, const LEX_CSTRING &name)
466 {
467 return plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN);
468 }
469
470 /** @brief
471 Return the storage engine handlerton for the supplied name
472
473 SYNOPSIS
474 ha_resolve_by_name(thd, name)
475 thd current thread
476 name name of storage engine
477
478 RETURN
479 pointer to storage engine plugin handle
480 */
ha_resolve_by_name(THD * thd,const LEX_STRING * name,bool is_temp_table)481 plugin_ref ha_resolve_by_name(THD *thd, const LEX_STRING *name,
482 bool is_temp_table)
483 {
484 const LEX_STRING *table_alias;
485 plugin_ref plugin;
486
487 redo:
488 /* my_strnncoll is a macro and gcc doesn't do early expansion of macro */
489 if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1,
490 (const uchar *)name->str, name->length,
491 (const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
492 return is_temp_table ?
493 ha_default_plugin(thd) : ha_default_temp_plugin(thd);
494
495 LEX_CSTRING cstring_name= {name->str, name->length};
496 if ((plugin= ha_resolve_by_name_raw(thd, cstring_name)))
497 {
498 handlerton *hton= plugin_data<handlerton*>(plugin);
499 if (hton && !(hton->flags & HTON_NOT_USER_SELECTABLE))
500 return plugin;
501
502 /*
503 unlocking plugin immediately after locking is relatively low cost.
504 */
505 plugin_unlock(thd, plugin);
506 }
507
508 /*
509 We check for the historical aliases.
510 */
511 for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
512 {
513 if (!my_strnncoll(&my_charset_latin1,
514 (const uchar *)name->str, name->length,
515 (const uchar *)table_alias->str, table_alias->length))
516 {
517 name= table_alias + 1;
518 goto redo;
519 }
520 }
521
522 return NULL;
523 }
524
525 std::string normalized_se_str= "";
526
527 /*
528 Parse comma separated list of disabled storage engine names
529 and create a normalized string by appending storage names that
530 have aliases. This normalized string is used to disallow
531 table/tablespace creation under the storage engines specified.
532 */
ha_set_normalized_disabled_se_str(const std::string & disabled_se)533 void ha_set_normalized_disabled_se_str(const std::string &disabled_se)
534 {
535 boost::char_separator<char> sep(",");
536 boost::tokenizer< boost::char_separator<char> > tokens(disabled_se, sep);
537 normalized_se_str.append(",");
538 BOOST_FOREACH (std::string se_name, tokens)
539 {
540 const LEX_STRING *table_alias;
541 boost::algorithm::to_upper(se_name);
542 for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
543 {
544 if (!strcasecmp(se_name.c_str(), table_alias->str) ||
545 !strcasecmp(se_name.c_str(), (table_alias+1)->str))
546 {
547 normalized_se_str.append(std::string(table_alias->str) + "," +
548 std::string((table_alias+1)->str) + ",");
549 break;
550 }
551 }
552
553 if (table_alias->str == NULL)
554 normalized_se_str.append(se_name+",");
555 }
556 }
557
558 // Check if storage engine is disabled for table/tablespace creation.
ha_is_storage_engine_disabled(handlerton * se_handle)559 bool ha_is_storage_engine_disabled(handlerton *se_handle)
560 {
561 if (normalized_se_str.size())
562 {
563 std::string se_name(",");
564 se_name.append(ha_resolve_storage_engine_name(se_handle));
565 se_name.append(",");
566 boost::algorithm::to_upper(se_name);
567 if(strstr(normalized_se_str.c_str(), se_name.c_str()))
568 return true;
569 }
570 return false;
571 }
572
573
ha_lock_engine(THD * thd,const handlerton * hton)574 plugin_ref ha_lock_engine(THD *thd, const handlerton *hton)
575 {
576 if (hton)
577 {
578 st_plugin_int **plugin= hton2plugin + hton->slot;
579
580 #ifdef NDEBUG
581 /*
582 Take a shortcut for builtin engines -- return pointer to plugin
583 without acquiring LOCK_plugin mutex. This is safe safe since such
584 plugins are not deleted until shutdown and we don't do reference
585 counting in non-debug builds for them.
586
587 Since we have reference to handlerton on our hands, this method
588 can't be called concurrently to non-builtin handlerton initialization/
589 deinitialization. So it is safe to access builtin_htons[] without
590 additional locking.
591 */
592 if (builtin_htons[hton->slot])
593 return *plugin;
594
595 return my_plugin_lock(thd, plugin);
596 #else
597 /*
598 We can't take shortcut in debug builds.
599 At least assert that builtin_htons[slot] is set correctly.
600 */
601 assert(builtin_htons[hton->slot] == (plugin[0]->plugin_dl == NULL));
602 return my_plugin_lock(thd, &plugin);
603 #endif
604 }
605 return NULL;
606 }
607
608
ha_resolve_by_legacy_type(THD * thd,enum legacy_db_type db_type)609 handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
610 {
611 plugin_ref plugin;
612 switch (db_type) {
613 case DB_TYPE_DEFAULT:
614 return ha_default_handlerton(thd);
615 default:
616 if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
617 (plugin= ha_lock_engine(thd, installed_htons[db_type])))
618 return plugin_data<handlerton*>(plugin);
619 /* fall through */
620 case DB_TYPE_UNKNOWN:
621 return NULL;
622 }
623 }
624
625
626 /**
627 Use other database handler if databasehandler is not compiled in.
628 */
ha_checktype(THD * thd,enum legacy_db_type database_type,bool no_substitute,bool report_error)629 handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type,
630 bool no_substitute, bool report_error)
631 {
632 handlerton *hton= ha_resolve_by_legacy_type(thd, database_type);
633 if (ha_storage_engine_is_enabled(hton))
634 return hton;
635
636 if (no_substitute)
637 {
638 if (report_error)
639 {
640 const char *engine_name= ha_resolve_storage_engine_name(hton);
641 my_error(ER_FEATURE_DISABLED,MYF(0),engine_name,engine_name);
642 }
643 return NULL;
644 }
645
646 (void) RUN_HOOK(transaction, after_rollback, (thd, FALSE));
647
648 switch (database_type) {
649 case DB_TYPE_MRG_ISAM:
650 return ha_resolve_by_legacy_type(thd, DB_TYPE_MRG_MYISAM);
651 default:
652 break;
653 }
654
655 return ha_default_handlerton(thd);
656 } /* ha_checktype */
657
658
get_new_handler(TABLE_SHARE * share,MEM_ROOT * alloc,handlerton * db_type)659 handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
660 handlerton *db_type)
661 {
662 handler *file;
663 DBUG_ENTER("get_new_handler");
664 DBUG_PRINT("enter", ("alloc: 0x%lx", (long) alloc));
665
666 if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create)
667 {
668 if ((file= db_type->create(db_type, share, alloc)))
669 file->init();
670 DBUG_RETURN(file);
671 }
672 /*
673 Try the default table type
674 Here the call to current_thd() is ok as we call this function a lot of
675 times but we enter this branch very seldom.
676 */
677 DBUG_RETURN(get_new_handler(share, alloc, ha_default_handlerton(current_thd)));
678 }
679
680
681 static const char **handler_errmsgs;
682
683 C_MODE_START
get_handler_errmsg(int nr)684 static const char *get_handler_errmsg(int nr)
685 {
686 return handler_errmsgs[nr - HA_ERR_FIRST];
687 }
688 C_MODE_END
689
690
691 /**
692 Register handler error messages for use with my_error().
693
694 @retval
695 0 OK
696 @retval
697 !=0 Error
698 */
699
ha_init_errors(void)700 int ha_init_errors(void)
701 {
702 #define SETMSG(nr, msg) handler_errmsgs[(nr) - HA_ERR_FIRST]= (msg)
703
704 /* Allocate a pointer array for the error message strings. */
705 /* Zerofill it to avoid uninitialized gaps. */
706 if (! (handler_errmsgs= (const char**) my_malloc(key_memory_handler_errmsgs,
707 HA_ERR_ERRORS * sizeof(char*),
708 MYF(MY_WME | MY_ZEROFILL))))
709 return 1;
710
711 /* Set the dedicated error messages. */
712 SETMSG(HA_ERR_KEY_NOT_FOUND, ER_DEFAULT(ER_KEY_NOT_FOUND));
713 SETMSG(HA_ERR_FOUND_DUPP_KEY, ER_DEFAULT(ER_DUP_KEY));
714 SETMSG(HA_ERR_RECORD_CHANGED, "Update wich is recoverable");
715 SETMSG(HA_ERR_WRONG_INDEX, "Wrong index given to function");
716 SETMSG(HA_ERR_CRASHED, ER_DEFAULT(ER_NOT_KEYFILE));
717 SETMSG(HA_ERR_WRONG_IN_RECORD, ER_DEFAULT(ER_CRASHED_ON_USAGE));
718 SETMSG(HA_ERR_OUT_OF_MEM, "Table handler out of memory");
719 SETMSG(HA_ERR_NOT_A_TABLE, "Incorrect file format '%.64s'");
720 SETMSG(HA_ERR_WRONG_COMMAND, "Command not supported");
721 SETMSG(HA_ERR_OLD_FILE, ER_DEFAULT(ER_OLD_KEYFILE));
722 SETMSG(HA_ERR_NO_ACTIVE_RECORD, "No record read in update");
723 SETMSG(HA_ERR_RECORD_DELETED, "Intern record deleted");
724 SETMSG(HA_ERR_RECORD_FILE_FULL, ER_DEFAULT(ER_RECORD_FILE_FULL));
725 SETMSG(HA_ERR_INDEX_FILE_FULL, "No more room in index file '%.64s'");
726 SETMSG(HA_ERR_END_OF_FILE, "End in next/prev/first/last");
727 SETMSG(HA_ERR_UNSUPPORTED, ER_DEFAULT(ER_ILLEGAL_HA));
728 SETMSG(HA_ERR_TOO_BIG_ROW, "Too big row");
729 SETMSG(HA_WRONG_CREATE_OPTION, "Wrong create option");
730 SETMSG(HA_ERR_FOUND_DUPP_UNIQUE, ER_DEFAULT(ER_DUP_UNIQUE));
731 SETMSG(HA_ERR_UNKNOWN_CHARSET, "Can't open charset");
732 SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF, ER_DEFAULT(ER_WRONG_MRG_TABLE));
733 SETMSG(HA_ERR_CRASHED_ON_REPAIR, ER_DEFAULT(ER_CRASHED_ON_REPAIR));
734 SETMSG(HA_ERR_CRASHED_ON_USAGE, ER_DEFAULT(ER_CRASHED_ON_USAGE));
735 SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT, ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT));
736 SETMSG(HA_ERR_LOCK_TABLE_FULL, ER_DEFAULT(ER_LOCK_TABLE_FULL));
737 SETMSG(HA_ERR_READ_ONLY_TRANSACTION, ER_DEFAULT(ER_READ_ONLY_TRANSACTION));
738 SETMSG(HA_ERR_LOCK_DEADLOCK, ER_DEFAULT(ER_LOCK_DEADLOCK));
739 SETMSG(HA_ERR_CANNOT_ADD_FOREIGN, ER_DEFAULT(ER_CANNOT_ADD_FOREIGN));
740 SETMSG(HA_ERR_NO_REFERENCED_ROW, ER_DEFAULT(ER_NO_REFERENCED_ROW_2));
741 SETMSG(HA_ERR_ROW_IS_REFERENCED, ER_DEFAULT(ER_ROW_IS_REFERENCED_2));
742 SETMSG(HA_ERR_NO_SAVEPOINT, "No savepoint with that name");
743 SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE, "Non unique key block size");
744 SETMSG(HA_ERR_NO_SUCH_TABLE, "No such table: '%.64s'");
745 SETMSG(HA_ERR_TABLE_EXIST, ER_DEFAULT(ER_TABLE_EXISTS_ERROR));
746 SETMSG(HA_ERR_NO_CONNECTION, "Could not connect to storage engine");
747 SETMSG(HA_ERR_TABLE_DEF_CHANGED, ER_DEFAULT(ER_TABLE_DEF_CHANGED));
748 SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY, "FK constraint would lead to duplicate key");
749 SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE, ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE));
750 SETMSG(HA_ERR_TABLE_READONLY, ER_DEFAULT(ER_OPEN_AS_READONLY));
751 SETMSG(HA_ERR_AUTOINC_READ_FAILED, ER_DEFAULT(ER_AUTOINC_READ_FAILED));
752 SETMSG(HA_ERR_AUTOINC_ERANGE, ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
753 SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
754 SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
755 SETMSG(HA_ERR_INDEX_CORRUPT, ER_DEFAULT(ER_INDEX_CORRUPT));
756 SETMSG(HA_FTS_INVALID_DOCID, "Invalid InnoDB FTS Doc ID");
757 SETMSG(HA_ERR_TABLE_IN_FK_CHECK, ER_DEFAULT(ER_TABLE_IN_FK_CHECK));
758 SETMSG(HA_ERR_TABLESPACE_EXISTS, "Tablespace already exists");
759 SETMSG(HA_ERR_TABLESPACE_MISSING, ER_DEFAULT(ER_TABLESPACE_MISSING));
760 SETMSG(HA_ERR_FTS_EXCEED_RESULT_CACHE_LIMIT, "FTS query exceeds result cache limit");
761 SETMSG(HA_ERR_TEMP_FILE_WRITE_FAILURE, ER_DEFAULT(ER_TEMP_FILE_WRITE_FAILURE));
762 SETMSG(HA_ERR_INNODB_FORCED_RECOVERY, ER_DEFAULT(ER_INNODB_FORCED_RECOVERY));
763 SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE, "Too many words in a FTS phrase or proximity search");
764 SETMSG(HA_ERR_TABLE_CORRUPT, ER_DEFAULT(ER_TABLE_CORRUPT));
765 SETMSG(HA_ERR_TABLESPACE_MISSING, ER_DEFAULT(ER_TABLESPACE_MISSING));
766 SETMSG(HA_ERR_TABLESPACE_IS_NOT_EMPTY, ER_DEFAULT(ER_TABLESPACE_IS_NOT_EMPTY));
767 SETMSG(HA_ERR_WRONG_FILE_NAME, ER_DEFAULT(ER_WRONG_FILE_NAME));
768 SETMSG(HA_ERR_NOT_ALLOWED_COMMAND, ER_DEFAULT(ER_NOT_ALLOWED_COMMAND));
769 SETMSG(HA_ERR_COMPUTE_FAILED, "Compute virtual column value failed");
770 SETMSG(HA_ERR_FTS_TOO_MANY_NESTED_EXP, "Too many nested sub-expressions in a full-text search");
771 /* Register the error messages for use with my_error(). */
772 return my_error_register(get_handler_errmsg, HA_ERR_FIRST, HA_ERR_LAST);
773 }
774
775
ha_finalize_handlerton(st_plugin_int * plugin)776 int ha_finalize_handlerton(st_plugin_int *plugin)
777 {
778 handlerton *hton= (handlerton *)plugin->data;
779 DBUG_ENTER("ha_finalize_handlerton");
780
781 /* hton can be NULL here, if ha_initialize_handlerton() failed. */
782 if (!hton)
783 goto end;
784
785 switch (hton->state)
786 {
787 case SHOW_OPTION_NO:
788 case SHOW_OPTION_DISABLED:
789 break;
790 case SHOW_OPTION_YES:
791 if (installed_htons[hton->db_type] == hton)
792 installed_htons[hton->db_type]= NULL;
793 break;
794 };
795
796 if (hton->panic)
797 hton->panic(hton, HA_PANIC_CLOSE);
798
799 if (plugin->plugin->deinit)
800 {
801 /*
802 Today we have no defined/special behavior for uninstalling
803 engine plugins.
804 */
805 DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str));
806 if (plugin->plugin->deinit(NULL))
807 {
808 DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.",
809 plugin->name.str));
810 }
811 }
812
813 /*
814 In case a plugin is uninstalled and re-installed later, it should
815 reuse an array slot. Otherwise the number of uninstall/install
816 cycles would be limited.
817 */
818 if (hton->slot != HA_SLOT_UNDEF)
819 {
820 /* Make sure we are not unpluging another plugin */
821 assert(hton2plugin[hton->slot] == plugin);
822 assert(hton->slot < MAX_HA);
823 hton2plugin[hton->slot]= NULL;
824 builtin_htons[hton->slot]= false; /* Extra correctness. */
825 }
826
827 my_free(hton);
828
829 end:
830 DBUG_RETURN(0);
831 }
832
833
ha_initialize_handlerton(st_plugin_int * plugin)834 int ha_initialize_handlerton(st_plugin_int *plugin)
835 {
836 handlerton *hton;
837 DBUG_ENTER("ha_initialize_handlerton");
838 DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str));
839
840 hton= (handlerton *)my_malloc(key_memory_handlerton,
841 sizeof(handlerton),
842 MYF(MY_WME | MY_ZEROFILL));
843
844 if (hton == NULL)
845 {
846 sql_print_error("Unable to allocate memory for plugin '%s' handlerton.",
847 plugin->name.str);
848 goto err_no_hton_memory;
849 }
850
851 hton->slot= HA_SLOT_UNDEF;
852 /* Historical Requirement */
853 plugin->data= hton; // shortcut for the future
854 if (plugin->plugin->init && plugin->plugin->init(hton))
855 {
856 sql_print_error("Plugin '%s' init function returned error.",
857 plugin->name.str);
858 goto err;
859 }
860
861 /*
862 the switch below and hton->state should be removed when
863 command-line options for plugins will be implemented
864 */
865 DBUG_PRINT("info", ("hton->state=%d", hton->state));
866 switch (hton->state) {
867 case SHOW_OPTION_NO:
868 break;
869 case SHOW_OPTION_YES:
870 {
871 uint tmp;
872 ulong fslot;
873 /* now check the db_type for conflict */
874 if (hton->db_type <= DB_TYPE_UNKNOWN ||
875 hton->db_type >= DB_TYPE_DEFAULT ||
876 installed_htons[hton->db_type])
877 {
878 int idx= (int) DB_TYPE_FIRST_DYNAMIC;
879
880 while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
881 idx++;
882
883 if (idx == (int) DB_TYPE_DEFAULT)
884 {
885 sql_print_warning("Too many storage engines!");
886 goto err_deinit;
887 }
888 if (hton->db_type != DB_TYPE_UNKNOWN)
889 sql_print_warning("Storage engine '%s' has conflicting typecode. "
890 "Assigning value %d.", plugin->plugin->name, idx);
891 hton->db_type= (enum legacy_db_type) idx;
892 }
893
894 /*
895 In case a plugin is uninstalled and re-installed later, it should
896 reuse an array slot. Otherwise the number of uninstall/install
897 cycles would be limited. So look for a free slot.
898 */
899 DBUG_PRINT("plugin", ("total_ha: %lu", total_ha));
900 for (fslot= 0; fslot < total_ha; fslot++)
901 {
902 if (!hton2plugin[fslot])
903 break;
904 }
905 if (fslot < total_ha)
906 hton->slot= fslot;
907 else
908 {
909 if (total_ha >= MAX_HA)
910 {
911 sql_print_error("Too many plugins loaded. Limit is %lu. "
912 "Failed on '%s'", (ulong) MAX_HA, plugin->name.str);
913 goto err_deinit;
914 }
915 hton->slot= total_ha++;
916 }
917 installed_htons[hton->db_type]= hton;
918 tmp= hton->savepoint_offset;
919 hton->savepoint_offset= savepoint_alloc_size;
920 savepoint_alloc_size+= tmp;
921 hton2plugin[hton->slot]=plugin;
922 builtin_htons[hton->slot]= (plugin->plugin_dl == NULL);
923 if (hton->prepare)
924 total_ha_2pc++;
925 break;
926 }
927 /* fall through */
928 default:
929 hton->state= SHOW_OPTION_DISABLED;
930 break;
931 }
932
933 /*
934 This is entirely for legacy. We will create a new "disk based" hton and a
935 "memory" hton which will be configurable longterm. We should be able to
936 remove partition and myisammrg.
937 */
938 switch (hton->db_type) {
939 case DB_TYPE_HEAP:
940 heap_hton= hton;
941 break;
942 case DB_TYPE_MYISAM:
943 myisam_hton= hton;
944 break;
945 case DB_TYPE_INNODB:
946 innodb_hton= hton;
947 break;
948 default:
949 break;
950 };
951
952 /*
953 Re-load the optimizer cost constants since this storage engine can
954 have non-default cost constants.
955 */
956 reload_optimizer_cost_constants();
957
958 DBUG_RETURN(0);
959
960 err_deinit:
961 /*
962 Let plugin do its inner deinitialization as plugin->init()
963 was successfully called before.
964 */
965 if (plugin->plugin->deinit)
966 (void) plugin->plugin->deinit(NULL);
967
968 err:
969 my_free(hton);
970 err_no_hton_memory:
971 plugin->data= NULL;
972 DBUG_RETURN(1);
973 }
974
ha_init()975 int ha_init()
976 {
977 int error= 0;
978 DBUG_ENTER("ha_init");
979
980 assert(total_ha < MAX_HA);
981 /*
982 Check if there is a transaction-capable storage engine besides the
983 binary log (which is considered a transaction-capable storage engine in
984 counting total_ha)
985 */
986 opt_using_transactions= total_ha>(ulong)opt_bin_log;
987 savepoint_alloc_size+= sizeof(SAVEPOINT);
988
989 /*
990 Initialize system database name cache.
991 This cache is used to do a quick check if a given
992 db.tablename is a system table.
993 */
994 known_system_databases= ha_known_system_databases();
995
996 DBUG_RETURN(error);
997 }
998
ha_end()999 void ha_end()
1000 {
1001 // Unregister handler error messages.
1002 my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST);
1003 my_free(handler_errmsgs);
1004 }
1005
dropdb_handlerton(THD * unused1,plugin_ref plugin,void * path)1006 static my_bool dropdb_handlerton(THD *unused1, plugin_ref plugin,
1007 void *path)
1008 {
1009 handlerton *hton= plugin_data<handlerton*>(plugin);
1010 if (hton->state == SHOW_OPTION_YES && hton->drop_database)
1011 hton->drop_database(hton, (char *)path);
1012 return FALSE;
1013 }
1014
1015
ha_drop_database(char * path)1016 void ha_drop_database(char* path)
1017 {
1018 plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
1019 }
1020
1021
closecon_handlerton(THD * thd,plugin_ref plugin,void * unused)1022 static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
1023 void *unused)
1024 {
1025 handlerton *hton= plugin_data<handlerton*>(plugin);
1026 /*
1027 there's no need to rollback here as all transactions must
1028 be rolled back already
1029 */
1030 if (hton->state == SHOW_OPTION_YES && thd_get_ha_data(thd, hton))
1031 {
1032 if (hton->close_connection)
1033 hton->close_connection(hton, thd);
1034 /* make sure ha_data is reset and ha_data_lock is released */
1035 thd_set_ha_data(thd, hton, NULL);
1036 }
1037 return FALSE;
1038 }
1039
1040
1041 /**
1042 @note
1043 don't bother to rollback here, it's done already
1044 */
ha_close_connection(THD * thd)1045 void ha_close_connection(THD* thd)
1046 {
1047 plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
1048 }
1049
1050
kill_handlerton(THD * thd,plugin_ref plugin,void *)1051 static my_bool kill_handlerton(THD *thd, plugin_ref plugin, void *)
1052 {
1053 handlerton *hton= plugin_data<handlerton*>(plugin);
1054
1055 if (hton->state == SHOW_OPTION_YES && hton->kill_connection)
1056 {
1057 if (thd_get_ha_data(thd, hton))
1058 hton->kill_connection(hton, thd);
1059 }
1060
1061 return FALSE;
1062 }
1063
ha_kill_connection(THD * thd)1064 void ha_kill_connection(THD *thd)
1065 {
1066 plugin_foreach(thd, kill_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
1067 }
1068
1069
1070 /* ========================================================================
1071 ======================= TRANSACTIONS ===================================*/
1072
1073 /**
1074 Transaction handling in the server
1075 ==================================
1076
1077 In each client connection, MySQL maintains two transactional
1078 states:
1079 - a statement transaction,
1080 - a standard, also called normal transaction.
1081
1082 Historical note
1083 ---------------
1084 "Statement transaction" is a non-standard term that comes
1085 from the times when MySQL supported BerkeleyDB storage engine.
1086
1087 First of all, it should be said that in BerkeleyDB auto-commit
1088 mode auto-commits operations that are atomic to the storage
1089 engine itself, such as a write of a record, and are too
1090 high-granular to be atomic from the application perspective
1091 (MySQL). One SQL statement could involve many BerkeleyDB
1092 auto-committed operations and thus BerkeleyDB auto-commit was of
1093 little use to MySQL.
1094
1095 Secondly, instead of SQL standard savepoints, BerkeleyDB
1096 provided the concept of "nested transactions". In a nutshell,
1097 transactions could be arbitrarily nested, but when the parent
1098 transaction was committed or aborted, all its child (nested)
1099 transactions were handled committed or aborted as well.
1100 Commit of a nested transaction, in turn, made its changes
1101 visible, but not durable: it destroyed the nested transaction,
1102 all its changes would become available to the parent and
1103 currently active nested transactions of this parent.
1104
1105 So the mechanism of nested transactions was employed to
1106 provide "all or nothing" guarantee of SQL statements
1107 required by the standard.
1108 A nested transaction would be created at start of each SQL
1109 statement, and destroyed (committed or aborted) at statement
1110 end. Such nested transaction was internally referred to as
1111 a "statement transaction" and gave birth to the term.
1112
1113 (Historical note ends)
1114
1115 Since then a statement transaction is started for each statement
1116 that accesses transactional tables or uses the binary log. If
1117 the statement succeeds, the statement transaction is committed.
1118 If the statement fails, the transaction is rolled back. Commits
1119 of statement transactions are not durable -- each such
1120 transaction is nested in the normal transaction, and if the
1121 normal transaction is rolled back, the effects of all enclosed
1122 statement transactions are undone as well. Technically,
1123 a statement transaction can be viewed as a savepoint which is
1124 maintained automatically in order to make effects of one
1125 statement atomic.
1126
1127 The normal transaction is started by the user and is ended
1128 usually upon a user request as well. The normal transaction
1129 encloses transactions of all statements issued between
1130 its beginning and its end.
1131 In autocommit mode, the normal transaction is equivalent
1132 to the statement transaction.
1133
1134 Since MySQL supports PSEA (pluggable storage engine
1135 architecture), more than one transactional engine can be
1136 active at a time. Hence transactions, from the server
1137 point of view, are always distributed. In particular,
1138 transactional state is maintained independently for each
1139 engine. In order to commit a transaction the two phase
1140 commit protocol is employed.
1141
1142 Not all statements are executed in context of a transaction.
1143 Administrative and status information statements do not modify
1144 engine data, and thus do not start a statement transaction and
1145 also have no effect on the normal transaction. Examples of such
1146 statements are SHOW STATUS and RESET SLAVE.
1147
1148 Similarly DDL statements are not transactional,
1149 and therefore a transaction is [almost] never started for a DDL
1150 statement. The difference between a DDL statement and a purely
1151 administrative statement though is that a DDL statement always
1152 commits the current transaction before proceeding, if there is
1153 any.
1154
1155 At last, SQL statements that work with non-transactional
1156 engines also have no effect on the transaction state of the
1157 connection. Even though they are written to the binary log,
1158 and the binary log is, overall, transactional, the writes
1159 are done in "write-through" mode, directly to the binlog
1160 file, followed with a OS cache sync, in other words,
1161 bypassing the binlog undo log (translog).
1162 They do not commit the current normal transaction.
1163 A failure of a statement that uses non-transactional tables
1164 would cause a rollback of the statement transaction, but
1165 in case there no non-transactional tables are used,
1166 no statement transaction is started.
1167
1168 Data layout
1169 -----------
1170
1171 The server stores its transaction-related data in
1172 thd->transaction. This structure has two members of type
1173 THD_TRANS. These members correspond to the statement and
1174 normal transactions respectively:
1175
1176 - thd->transaction.stmt contains a list of engines
1177 that are participating in the given statement
1178 - thd->transaction.all contains a list of engines that
1179 have participated in any of the statement transactions started
1180 within the context of the normal transaction.
1181 Each element of the list contains a pointer to the storage
1182 engine, engine-specific transactional data, and engine-specific
1183 transaction flags.
1184
1185 In autocommit mode thd->transaction.all is empty.
1186 Instead, data of thd->transaction.stmt is
1187 used to commit/rollback the normal transaction.
1188
1189 The list of registered engines has a few important properties:
1190 - no engine is registered in the list twice
1191 - engines are present in the list a reverse temporal order --
1192 new participants are always added to the beginning of the list.
1193
1194 Transaction life cycle
1195 ----------------------
1196
1197 When a new connection is established, thd->transaction
1198 members are initialized to an empty state.
1199 If a statement uses any tables, all affected engines
1200 are registered in the statement engine list. In
1201 non-autocommit mode, the same engines are registered in
1202 the normal transaction list.
1203 At the end of the statement, the server issues a commit
1204 or a roll back for all engines in the statement list.
1205 At this point transaction flags of an engine, if any, are
1206 propagated from the statement list to the list of the normal
1207 transaction.
1208 When commit/rollback is finished, the statement list is
1209 cleared. It will be filled in again by the next statement,
1210 and emptied again at the next statement's end.
1211
1212 The normal transaction is committed in a similar way
1213 (by going over all engines in thd->transaction.all list)
1214 but at different times:
1215 - upon COMMIT SQL statement is issued by the user
1216 - implicitly, by the server, at the beginning of a DDL statement
1217 or SET AUTOCOMMIT={0|1} statement.
1218
1219 The normal transaction can be rolled back as well:
1220 - if the user has requested so, by issuing ROLLBACK SQL
1221 statement
1222 - if one of the storage engines requested a rollback
1223 by setting thd->transaction_rollback_request. This may
1224 happen in case, e.g., when the transaction in the engine was
1225 chosen a victim of the internal deadlock resolution algorithm
1226 and rolled back internally. When such a situation happens, there
1227 is little the server can do and the only option is to rollback
1228 transactions in all other participating engines. In this case
1229 the rollback is accompanied by an error sent to the user.
1230
1231 As follows from the use cases above, the normal transaction
1232 is never committed when there is an outstanding statement
1233 transaction. In most cases there is no conflict, since
1234 commits of the normal transaction are issued by a stand-alone
1235 administrative or DDL statement, thus no outstanding statement
1236 transaction of the previous statement exists. Besides,
1237 all statements that manipulate with the normal transaction
1238 are prohibited in stored functions and triggers, therefore
1239 no conflicting situation can occur in a sub-statement either.
1240 The remaining rare cases when the server explicitly has
1241 to commit the statement transaction prior to committing the normal
1242 one cover error-handling scenarios (see for example
1243 SQLCOM_LOCK_TABLES).
1244
1245 When committing a statement or a normal transaction, the server
1246 either uses the two-phase commit protocol, or issues a commit
1247 in each engine independently. The two-phase commit protocol
1248 is used only if:
1249 - all participating engines support two-phase commit (provide
1250 handlerton::prepare PSEA API call) and
1251 - transactions in at least two engines modify data (i.e. are
1252 not read-only).
1253
1254 Note that the two phase commit is used for
1255 statement transactions, even though they are not durable anyway.
1256 This is done to ensure logical consistency of data in a multiple-
1257 engine transaction.
1258 For example, imagine that some day MySQL supports unique
1259 constraint checks deferred till the end of statement. In such
1260 case a commit in one of the engines may yield ER_DUP_KEY,
1261 and MySQL should be able to gracefully abort statement
1262 transactions of other participants.
1263
1264 After the normal transaction has been committed,
1265 thd->transaction.all list is cleared.
1266
1267 When a connection is closed, the current normal transaction, if
1268 any, is rolled back.
1269
1270 Roles and responsibilities
1271 --------------------------
1272
1273 The server has no way to know that an engine participates in
1274 the statement and a transaction has been started
1275 in it unless the engine says so. Thus, in order to be
1276 a part of a transaction, the engine must "register" itself.
1277 This is done by invoking trans_register_ha() server call.
1278 Normally the engine registers itself whenever handler::external_lock()
1279 is called. trans_register_ha() can be invoked many times: if
1280 an engine is already registered, the call does nothing.
1281 In case autocommit is not set, the engine must register itself
1282 twice -- both in the statement list and in the normal transaction
1283 list.
1284 In which list to register is a parameter of trans_register_ha().
1285
1286 Note, that although the registration interface in itself is
1287 fairly clear, the current usage practice often leads to undesired
1288 effects. E.g. since a call to trans_register_ha() in most engines
1289 is embedded into implementation of handler::external_lock(), some
1290 DDL statements start a transaction (at least from the server
1291 point of view) even though they are not expected to. E.g.
1292 CREATE TABLE does not start a transaction, since
1293 handler::external_lock() is never called during CREATE TABLE. But
1294 CREATE TABLE ... SELECT does, since handler::external_lock() is
1295 called for the table that is being selected from. This has no
1296 practical effects currently, but must be kept in mind
1297 nevertheless.
1298
1299 Once an engine is registered, the server will do the rest
1300 of the work.
1301
1302 During statement execution, whenever any of data-modifying
1303 PSEA API methods is used, e.g. handler::write_row() or
1304 handler::update_row(), the read-write flag is raised in the
1305 statement transaction for the involved engine.
1306 Currently All PSEA calls are "traced", and the data can not be
1307 changed in a way other than issuing a PSEA call. Important:
1308 unless this invariant is preserved the server will not know that
1309 a transaction in a given engine is read-write and will not
1310 involve the two-phase commit protocol!
1311
1312 At the end of a statement, server call trans_commit_stmt is
1313 invoked. This call in turn invokes handlerton::prepare()
1314 for every involved engine. Prepare is followed by a call
1315 to handlerton::commit_one_phase() If a one-phase commit
1316 will suffice, handlerton::prepare() is not invoked and
1317 the server only calls handlerton::commit_one_phase().
1318 At statement commit, the statement-related read-write
1319 engine flag is propagated to the corresponding flag in the
1320 normal transaction. When the commit is complete, the list
1321 of registered engines is cleared.
1322
1323 Rollback is handled in a similar fashion.
1324
1325 Additional notes on DDL and the normal transaction.
1326 ---------------------------------------------------
1327
1328 DDLs and operations with non-transactional engines
1329 do not "register" in thd->transaction lists, and thus do not
1330 modify the transaction state. Besides, each DDL in
1331 MySQL is prefixed with an implicit normal transaction commit
1332 (a call to trans_commit_implicit()), and thus leaves nothing
1333 to modify.
1334 However, as it has been pointed out with CREATE TABLE .. SELECT,
1335 some DDL statements can start a *new* transaction.
1336
1337 Behaviour of the server in this case is currently badly
1338 defined.
1339 DDL statements use a form of "semantic" logging
1340 to maintain atomicity: if CREATE TABLE .. SELECT failed,
1341 the newly created table is deleted.
1342 In addition, some DDL statements issue interim transaction
1343 commits: e.g. ALTER TABLE issues a commit after data is copied
1344 from the original table to the internal temporary table. Other
1345 statements, e.g. CREATE TABLE ... SELECT do not always commit
1346 after itself.
1347 And finally there is a group of DDL statements such as
1348 RENAME/DROP TABLE that doesn't start a new transaction
1349 and doesn't commit.
1350
1351 This diversity makes it hard to say what will happen if
1352 by chance a stored function is invoked during a DDL --
1353 whether any modifications it makes will be committed or not
1354 is not clear. Fortunately, SQL grammar of few DDLs allows
1355 invocation of a stored function.
1356
1357 A consistent behaviour is perhaps to always commit the normal
1358 transaction after all DDLs, just like the statement transaction
1359 is always committed at the end of all statements.
1360 */
1361
1362 /**
1363 Register a storage engine for a transaction.
1364
1365 Every storage engine MUST call this function when it starts
1366 a transaction or a statement (that is it must be called both for the
1367 "beginning of transaction" and "beginning of statement").
1368 Only storage engines registered for the transaction/statement
1369 will know when to commit/rollback it.
1370
1371 @note
1372 trans_register_ha is idempotent - storage engine may register many
1373 times per transaction.
1374
1375 */
trans_register_ha(THD * thd,bool all,handlerton * ht_arg,const ulonglong * trxid)1376 void trans_register_ha(THD *thd, bool all, handlerton *ht_arg,
1377 const ulonglong *trxid)
1378 {
1379 Ha_trx_info *ha_info;
1380 Transaction_ctx *trn_ctx= thd->get_transaction();
1381 Transaction_ctx::enum_trx_scope trx_scope=
1382 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1383
1384 DBUG_ENTER("trans_register_ha");
1385 DBUG_PRINT("enter",("%s", all ? "all" : "stmt"));
1386
1387 Ha_trx_info *knownn_trans= trn_ctx->ha_trx_info(trx_scope);
1388 if (all)
1389 {
1390 /*
1391 Ensure no active backup engine data exists, unless the current transaction
1392 is from replication and in active xa state.
1393 */
1394 assert(thd->ha_data[ht_arg->slot].ha_ptr_backup == NULL ||
1395 (thd->get_transaction()->xid_state()->
1396 has_state(XID_STATE::XA_ACTIVE)));
1397 assert(thd->ha_data[ht_arg->slot].ha_ptr_backup == NULL ||
1398 (thd->is_binlog_applier() || thd->slave_thread));
1399
1400 thd->server_status|= SERVER_STATUS_IN_TRANS;
1401 if (thd->tx_read_only)
1402 thd->server_status|= SERVER_STATUS_IN_TRANS_READONLY;
1403 DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS"));
1404 }
1405
1406 ha_info= thd->ha_data[ht_arg->slot].ha_info + (all ? 1 : 0);
1407
1408 if (ha_info->is_started())
1409 DBUG_VOID_RETURN; /* already registered, return */
1410
1411 ha_info->register_ha(knownn_trans, ht_arg);
1412 trn_ctx->set_ha_trx_info(trx_scope, ha_info);
1413
1414 if (ht_arg->prepare == 0)
1415 trn_ctx->set_no_2pc(trx_scope, true);
1416
1417 trn_ctx->xid_state()->set_query_id(thd->query_id);
1418 /*
1419 Register transaction start in performance schema if not done already.
1420 By doing this, we handle cases when the transaction is started implicitly in
1421 autocommit=0 mode, and cases when we are in normal autocommit=1 mode and the
1422 executed statement is a single-statement transaction.
1423
1424 Explicitly started transactions are handled in trans_begin().
1425
1426 Do not register transactions in which binary log is the only participating
1427 transactional storage engine.
1428 */
1429 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
1430 if (thd->m_transaction_psi == NULL &&
1431 ht_arg->db_type != DB_TYPE_BINLOG)
1432 {
1433 const XID *xid= trn_ctx->xid_state()->get_xid();
1434 my_bool autocommit= !thd->in_multi_stmt_transaction_mode();
1435 thd->m_transaction_psi= MYSQL_START_TRANSACTION(&thd->m_transaction_state,
1436 xid, trxid, thd->tx_isolation,
1437 thd->tx_read_only, autocommit);
1438 DEBUG_SYNC(thd, "after_set_transaction_psi_before_set_transaction_gtid");
1439 gtid_set_performance_schema_values(thd);
1440 }
1441 #endif
1442 DBUG_VOID_RETURN;
1443 }
1444
1445 /**
1446 @retval
1447 0 ok
1448 @retval
1449 1 error, transaction was rolled back
1450 */
ha_prepare(THD * thd)1451 int ha_prepare(THD *thd)
1452 {
1453 int error=0;
1454 Transaction_ctx *trn_ctx= thd->get_transaction();
1455 DBUG_ENTER("ha_prepare");
1456
1457 if (trn_ctx->is_active(Transaction_ctx::SESSION))
1458 {
1459 const Ha_trx_info *ha_info= trn_ctx->ha_trx_info(
1460 Transaction_ctx::SESSION);
1461 bool gtid_error= false, need_clear_owned_gtid= false;
1462
1463 if ((gtid_error=
1464 MY_TEST(commit_owned_gtids(thd, true, &need_clear_owned_gtid))))
1465 {
1466 assert(need_clear_owned_gtid);
1467
1468 ha_rollback_trans(thd, true);
1469 error= 1;
1470 goto err;
1471 }
1472
1473 while (ha_info)
1474 {
1475 handlerton *ht= ha_info->ht();
1476 assert(!thd->status_var_aggregated);
1477 thd->status_var.ha_prepare_count++;
1478 if (ht->prepare)
1479 {
1480 DBUG_EXECUTE_IF("simulate_xa_failure_prepare", {
1481 ha_rollback_trans(thd, true);
1482 DBUG_RETURN(1);
1483 });
1484 if (ht->prepare(ht, thd, true))
1485 {
1486 #ifdef WITH_WSREP
1487 if (WSREP(thd) && ht->db_type== DB_TYPE_WSREP)
1488 {
1489 error= 1;
1490 /* avoid sending error, if we need to replay */
1491 if (thd->wsrep_conflict_state!= MUST_REPLAY)
1492 {
1493 my_error(ER_LOCK_DEADLOCK, MYF(0));
1494 }
1495 }
1496 else
1497 {
1498 /* not wsrep hton, bail to native mysql behavior */
1499 #endif
1500 ha_rollback_trans(thd, true);
1501 error=1;
1502 break;
1503 #ifdef WITH_WSREP
1504 }
1505 #endif
1506 }
1507 }
1508 else
1509 {
1510 push_warning_printf(thd, Sql_condition::SL_WARNING,
1511 ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
1512 ha_resolve_storage_engine_name(ht));
1513 }
1514 ha_info= ha_info->next();
1515 }
1516
1517 assert(thd->get_transaction()->xid_state()->
1518 has_state(XID_STATE::XA_IDLE));
1519
1520 err:
1521 gtid_state_commit_or_rollback(thd, need_clear_owned_gtid, !gtid_error);
1522 }
1523 DBUG_RETURN(error);
1524 }
1525
1526 /**
1527 Check if we can skip the two-phase commit.
1528
1529 A helper function to evaluate if two-phase commit is mandatory.
1530 As a side effect, propagates the read-only/read-write flags
1531 of the statement transaction to its enclosing normal transaction.
1532
1533 If we have at least two engines with read-write changes we must
1534 run a two-phase commit. Otherwise we can run several independent
1535 commits as the only transactional engine has read-write changes
1536 and others are read-only.
1537
1538 @retval 0 All engines are read-only.
1539 @retval 1 We have the only engine with read-write changes.
1540 @retval >1 More than one engine have read-write changes.
1541 Note: return value might NOT be the exact number of
1542 engines with read-write changes.
1543 */
1544
1545 static
1546 uint
ha_check_and_coalesce_trx_read_only(THD * thd,Ha_trx_info * ha_list,bool all)1547 ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
1548 bool all)
1549 {
1550 /* The number of storage engines that have actual changes. */
1551 unsigned rw_ha_count= 0;
1552 Ha_trx_info *ha_info;
1553
1554 for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
1555 {
1556 if (ha_info->is_trx_read_write())
1557 ++rw_ha_count;
1558
1559 if (! all)
1560 {
1561 Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
1562 assert(ha_info != ha_info_all);
1563 /*
1564 Merge read-only/read-write information about statement
1565 transaction to its enclosing normal transaction. Do this
1566 only if in a real transaction -- that is, if we know
1567 that ha_info_all is registered in thd->transaction.all.
1568 Since otherwise we only clutter the normal transaction flags.
1569 */
1570 if (ha_info_all->is_started()) /* FALSE if autocommit. */
1571 ha_info_all->coalesce_trx_with(ha_info);
1572 }
1573 else if (rw_ha_count > 1)
1574 {
1575 /*
1576 It is a normal transaction, so we don't need to merge read/write
1577 information up, and the need for two-phase commit has been
1578 already established. Break the loop prematurely.
1579 */
1580 break;
1581 }
1582 }
1583 return rw_ha_count;
1584 }
1585
1586
1587 /**
1588 The function computes condition to call gtid persistor wrapper,
1589 and executes it.
1590 It is invoked at committing a statement or transaction, including XA,
1591 and also at XA prepare handling.
1592
1593 @param thd Thread context.
1594 @param all The execution scope, true for the transaction one, false
1595 for the statement one.
1596 @param[out] need_clear_owned_gtid_ptr
1597 A pointer to bool variable to return the computed decision
1598 value.
1599 @return zero as no error indication, non-zero otherwise
1600 */
1601
commit_owned_gtids(THD * thd,bool all,bool * need_clear_owned_gtid_ptr)1602 int commit_owned_gtids(THD *thd, bool all, bool *need_clear_owned_gtid_ptr)
1603 {
1604 DBUG_ENTER("commit_owned_gtids(...)");
1605 int error= 0;
1606
1607 if ((!opt_bin_log || (thd->slave_thread && !opt_log_slave_updates)) &&
1608 (all || !thd->in_multi_stmt_transaction_mode()) &&
1609 !thd->is_operating_gtid_table_implicitly &&
1610 !thd->is_operating_substatement_implicitly)
1611 {
1612 /*
1613 If the binary log is disabled for this thread (either by
1614 log_bin=0 or sql_log_bin=0 or by log_slave_updates=0 for a
1615 slave thread), then the statement will not be written to
1616 the binary log. In this case, we should save its GTID into
1617 mysql.gtid_executed table and @@GLOBAL.GTID_EXECUTED as it
1618 did when binlog is enabled.
1619 */
1620 if (thd->owned_gtid.sidno > 0)
1621 {
1622 error= gtid_state->save(thd);
1623 *need_clear_owned_gtid_ptr= true;
1624 }
1625 else if (thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS)
1626 *need_clear_owned_gtid_ptr= true;
1627 }
1628 else
1629 {
1630 *need_clear_owned_gtid_ptr= false;
1631 }
1632
1633 DBUG_RETURN(error);
1634 }
1635
1636
1637 /**
1638 The function is a wrapper of commit_owned_gtids(...). It is invoked
1639 at committing a partially failed statement or transaction.
1640
1641 @param thd Thread context.
1642
1643 @retval -1 if error when persisting owned gtid.
1644 @retval 0 if succeed to commit owned gtid.
1645 @retval 1 if do not meet conditions to commit owned gtid.
1646 */
commit_owned_gtid_by_partial_command(THD * thd)1647 int commit_owned_gtid_by_partial_command(THD *thd)
1648 {
1649 DBUG_ENTER("commit_owned_gtid_by_partial_command(THD *thd)");
1650 bool need_clear_owned_gtid_ptr= false;
1651 int ret= 0;
1652
1653 if (commit_owned_gtids(thd, true, &need_clear_owned_gtid_ptr))
1654 {
1655 /* Error when saving gtid into mysql.gtid_executed table. */
1656 gtid_state->update_on_rollback(thd);
1657 ret= -1;
1658 }
1659 else if (need_clear_owned_gtid_ptr)
1660 {
1661 gtid_state->update_on_commit(thd);
1662 ret= 0;
1663 }
1664 else
1665 {
1666 ret= 1;
1667 }
1668
1669 DBUG_RETURN(ret);
1670 }
1671
1672
1673 /**
1674 @param[in] ignore_global_read_lock Allow commit to complete even if a
1675 global read lock is active. This can be
1676 used to allow changes to internal tables
1677 (e.g. slave status tables).
1678
1679 @retval
1680 0 ok
1681 @retval
1682 1 transaction was rolled back
1683 @retval
1684 2 error during commit, data may be inconsistent
1685
1686 @todo
1687 Since we don't support nested statement transactions in 5.0,
1688 we can't commit or rollback stmt transactions while we are inside
1689 stored functions or triggers. So we simply do nothing now.
1690 TODO: This should be fixed in later ( >= 5.1) releases.
1691 */
1692
ha_commit_trans(THD * thd,bool all,bool ignore_global_read_lock)1693 int ha_commit_trans(THD *thd, bool all, bool ignore_global_read_lock)
1694 {
1695 int error= 0;
1696 bool need_clear_owned_gtid= false;
1697 /*
1698 Save transaction owned gtid into table before transaction prepare
1699 if binlog is disabled, or binlog is enabled and log_slave_updates
1700 is disabled with slave SQL thread or slave worker thread.
1701 */
1702 error= commit_owned_gtids(thd, all, &need_clear_owned_gtid);
1703
1704 /*
1705 'all' means that this is either an explicit commit issued by
1706 user, or an implicit commit issued by a DDL.
1707 */
1708 Transaction_ctx *trn_ctx= thd->get_transaction();
1709 Transaction_ctx::enum_trx_scope trx_scope=
1710 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1711
1712 /*
1713 "real" is a nick name for a transaction for which a commit will
1714 make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1715 transation is not 'real': even though it's possible to commit it,
1716 the changes are not durable as they might be rolled back if the
1717 enclosing 'all' transaction is rolled back.
1718 */
1719 bool is_real_trans=
1720 all || !trn_ctx->is_active(Transaction_ctx::SESSION);
1721
1722 Ha_trx_info *ha_info= trn_ctx->ha_trx_info(trx_scope);
1723 XID_STATE *xid_state= trn_ctx->xid_state();
1724
1725 DBUG_ENTER("ha_commit_trans");
1726
1727 DBUG_PRINT("info", ("all=%d thd->in_sub_stmt=%d ha_info=%p is_real_trans=%d",
1728 all, thd->in_sub_stmt, ha_info, is_real_trans));
1729 /*
1730 We must not commit the normal transaction if a statement
1731 transaction is pending. Otherwise statement transaction
1732 flags will not get propagated to its normal transaction's
1733 counterpart.
1734 */
1735 assert(!trn_ctx->is_active(Transaction_ctx::STMT) ||
1736 !all);
1737
1738 if (thd->in_sub_stmt)
1739 {
1740 assert(0);
1741 /*
1742 Since we don't support nested statement transactions in 5.0,
1743 we can't commit or rollback stmt transactions while we are inside
1744 stored functions or triggers. So we simply do nothing now.
1745 TODO: This should be fixed in later ( >= 5.1) releases.
1746 */
1747 if (!all)
1748 DBUG_RETURN(0);
1749 /*
1750 We assume that all statements which commit or rollback main transaction
1751 are prohibited inside of stored functions or triggers. So they should
1752 bail out with error even before ha_commit_trans() call. To be 100% safe
1753 let us throw error in non-debug builds.
1754 */
1755 my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1756 DBUG_RETURN(2);
1757 }
1758
1759 MDL_request mdl_request;
1760 bool release_mdl= false;
1761 if (ha_info)
1762 {
1763 uint rw_ha_count;
1764 bool rw_trans;
1765
1766 DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
1767
1768 rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1769 trn_ctx->set_rw_ha_count(trx_scope, rw_ha_count);
1770 /* rw_trans is TRUE when we in a transaction changing data */
1771 rw_trans= is_real_trans && (rw_ha_count > 0);
1772
1773 DBUG_EXECUTE_IF("dbug.enabled_commit",
1774 {
1775 const char act[]= "now signal Reached wait_for signal.commit_continue";
1776 assert(!debug_sync_set_action(current_thd,
1777 STRING_WITH_LEN(act)));
1778 };);
1779 if (rw_trans && !ignore_global_read_lock)
1780 {
1781 /*
1782 Acquire a metadata lock which will ensure that COMMIT is blocked
1783 by an active FLUSH TABLES WITH READ LOCK (and vice versa:
1784 COMMIT in progress blocks FTWRL).
1785
1786 We allow the owner of FTWRL to COMMIT; we assume that it knows
1787 what it does.
1788 */
1789 MDL_REQUEST_INIT(&mdl_request,
1790 MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE,
1791 MDL_EXPLICIT);
1792
1793 DBUG_PRINT("debug", ("Acquire MDL commit lock"));
1794 #ifdef WITH_WSREP
1795 if (!WSREP(thd) &&
1796 thd->mdl_context.acquire_lock(&mdl_request,
1797 #else
1798 if (thd->mdl_context.acquire_lock(&mdl_request,
1799 #endif /* WITH_WSREP */
1800 thd->variables.lock_wait_timeout))
1801 {
1802 ha_rollback_trans(thd, all);
1803 DBUG_RETURN(1);
1804 }
1805 release_mdl= true;
1806
1807 DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
1808 }
1809
1810 if (rw_trans && (stmt_has_updated_trans_table(ha_info)
1811 || trans_has_noop_dml(ha_info)) && check_readonly(thd, true))
1812 {
1813 ha_rollback_trans(thd, all);
1814 error= 1;
1815 goto end;
1816 }
1817
1818 #ifdef WITH_WSREP
1819 if ((!trn_ctx->no_2pc(trx_scope) && (trn_ctx->rw_ha_count(trx_scope) > 1)) ||
1820 (WSREP(thd) && thd->lex->sql_command == SQLCOM_CREATE_TABLE &&
1821 !trans_has_updated_trans_table(thd)))
1822 {
1823 WSREP_DEBUG("handler prepare for CTAS");
1824 #else
1825 if (!trn_ctx->no_2pc(trx_scope) && (trn_ctx->rw_ha_count(trx_scope) > 1))
1826 #endif /* WITH_WSREP */
1827 error= tc_log->prepare(thd, all);
1828 #ifdef WITH_WSREP
1829 }
1830 #endif /* WITH_WSREP */
1831 }
1832 /*
1833 The state of XA transaction is changed to Prepared, intermediately.
1834 It's going to change to the regular NOTR at the end.
1835 The fact of the Prepared state is of interest to binary logger.
1836 */
1837 if (!error && all && xid_state->has_state(XID_STATE::XA_IDLE))
1838 {
1839 assert(thd->lex->sql_command == SQLCOM_XA_COMMIT &&
1840 static_cast<Sql_cmd_xa_commit*>(thd->lex->m_sql_cmd)->
1841 get_xa_opt() == XA_ONE_PHASE);
1842
1843 xid_state->set_state(XID_STATE::XA_PREPARED);
1844 }
1845 #ifdef WITH_WSREP
1846 DEBUG_SYNC(thd, "wsrep_before_commit");
1847 #endif /* WITH_WSREP */
1848 if (error || (error= tc_log->commit(thd, all)))
1849 {
1850 ha_rollback_trans(thd, all);
1851 error= 1;
1852 goto end;
1853 }
1854 /*
1855 Mark multi-statement (any autocommit mode) or single-statement
1856 (autocommit=1) transaction as rolled back
1857 */
1858 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
1859 if (is_real_trans && thd->m_transaction_psi != NULL)
1860 {
1861 MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
1862 thd->m_transaction_psi= NULL;
1863 }
1864 #endif
1865 DBUG_EXECUTE_IF("crash_commit_after",
1866 if (!thd->is_operating_gtid_table_implicitly)
1867 DBUG_SUICIDE(););
1868 end:
1869 if (release_mdl && mdl_request.ticket)
1870 {
1871 /*
1872 We do not always immediately release transactional locks
1873 after ha_commit_trans() (see uses of ha_enable_transaction()),
1874 thus we release the commit blocker lock as soon as it's
1875 not needed.
1876 */
1877 DBUG_PRINT("debug", ("Releasing MDL commit lock"));
1878 thd->mdl_context.release_lock(mdl_request.ticket);
1879 }
1880 /* Free resources and perform other cleanup even for 'empty' transactions. */
1881 if (is_real_trans)
1882 {
1883 trn_ctx->cleanup();
1884 thd->tx_priority= 0;
1885 }
1886
1887 if (need_clear_owned_gtid)
1888 {
1889 thd->server_status&= ~SERVER_STATUS_IN_TRANS;
1890 /*
1891 Release the owned GTID when binlog is disabled, or binlog is
1892 enabled and log_slave_updates is disabled with slave SQL thread
1893 or slave worker thread.
1894 */
1895 if (error)
1896 gtid_state->update_on_rollback(thd);
1897 else
1898 gtid_state->update_on_commit(thd);
1899 }
1900
1901 DBUG_RETURN(error);
1902 }
1903
1904 /**
1905 Commit the sessions outstanding transaction.
1906
1907 @pre thd->transaction.flags.commit_low == true
1908 @post thd->transaction.flags.commit_low == false
1909
1910 @note This function does not care about global read lock; the caller
1911 should.
1912
1913 @param[in] all Is set in case of explicit commit
1914 (COMMIT statement), or implicit commit
1915 issued by DDL. Is not set when called
1916 at the end of statement, even if
1917 autocommit=1.
1918 @param[in] run_after_commit
1919 True by default, otherwise, does not execute
1920 the after_commit hook in the function.
1921 */
1922
ha_commit_low(THD * thd,bool all,bool run_after_commit)1923 int ha_commit_low(THD *thd, bool all, bool run_after_commit)
1924 {
1925 int error=0;
1926 Transaction_ctx *trn_ctx= thd->get_transaction();
1927 Transaction_ctx::enum_trx_scope trx_scope=
1928 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1929 Ha_trx_info *ha_info= trn_ctx->ha_trx_info(trx_scope), *ha_info_next;
1930
1931 DBUG_ENTER("ha_commit_low");
1932 #ifdef WITH_WSREP
1933 #ifdef WSREP_PROC_INFO
1934 char info[64]= { 0, };
1935 snprintf (info, sizeof(info) - 1, "ha_commit_one_phase(%lld)",
1936 (long long)wsrep_thd_trx_seqno(thd));
1937 #else
1938 const char info[]="ha_commit_one_phase()";
1939 #endif /* WSREP_PROC_INFO */
1940 char* tmp_info= NULL;
1941 if (WSREP(thd)) tmp_info= (char *)thd_proc_info(thd, info);
1942 #endif /* WITH_WSREP */
1943
1944 if (ha_info)
1945 {
1946 bool restore_backup_ha_data= false;
1947 /*
1948 At execution of XA COMMIT ONE PHASE binlog or slave applier
1949 reattaches the engine ha_data to THD, previously saved at XA START.
1950 */
1951 if (all && thd->rpl_unflag_detached_engine_ha_data())
1952 {
1953 assert(thd->lex->sql_command == SQLCOM_XA_COMMIT);
1954 assert(static_cast<Sql_cmd_xa_commit*>(thd->lex->m_sql_cmd)->
1955 get_xa_opt() == XA_ONE_PHASE);
1956 restore_backup_ha_data= true;
1957 }
1958
1959 for (; ha_info; ha_info= ha_info_next)
1960 {
1961 int err;
1962 handlerton *ht= ha_info->ht();
1963 if ((err= ht->commit(ht, thd, all)))
1964 {
1965 my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1966 error=1;
1967 }
1968 assert(!thd->status_var_aggregated);
1969 thd->status_var.ha_commit_count++;
1970 ha_info_next= ha_info->next();
1971 if (restore_backup_ha_data)
1972 reattach_engine_ha_data_to_thd(thd, ht);
1973 ha_info->reset(); /* keep it conveniently zero-filled */
1974 }
1975 trn_ctx->reset_scope(trx_scope);
1976 if (all)
1977 {
1978 trn_ctx->invalidate_changed_tables_in_cache();
1979 }
1980 }
1981 /* Free resources and perform other cleanup even for 'empty' transactions. */
1982 if (all)
1983 trn_ctx->cleanup();
1984 #ifdef WITH_WSREP
1985 if (WSREP(thd)) thd_proc_info(thd, tmp_info);
1986 #endif /* WITH_WSREP */
1987 /*
1988 When the transaction has been committed, we clear the commit_low
1989 flag. This allow other parts of the system to check if commit_low
1990 was called.
1991 */
1992 trn_ctx->m_flags.commit_low= false;
1993 if (run_after_commit && thd->get_transaction()->m_flags.run_hooks)
1994 {
1995 /*
1996 If commit succeeded, we call the after_commit hook.
1997
1998 TODO: Investigate if this can be refactored so that there is
1999 only one invocation of this hook in the code (in
2000 MYSQL_LOG_BIN::finish_commit).
2001 */
2002 if (!error)
2003 (void) RUN_HOOK(transaction, after_commit, (thd, all));
2004 trn_ctx->m_flags.run_hooks= false;
2005 }
2006 DBUG_RETURN(error);
2007 }
2008
2009
ha_rollback_low(THD * thd,bool all)2010 int ha_rollback_low(THD *thd, bool all)
2011 {
2012 Transaction_ctx *trn_ctx= thd->get_transaction();
2013 int error= 0;
2014 Transaction_ctx::enum_trx_scope trx_scope=
2015 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2016 Ha_trx_info *ha_info= trn_ctx->ha_trx_info(trx_scope), *ha_info_next;
2017
2018 (void) RUN_HOOK(transaction, before_rollback, (thd, all));
2019
2020 if (ha_info)
2021 {
2022 bool restore_backup_ha_data= false;
2023 /*
2024 Similarly to the commit case, the binlog or slave applier
2025 reattaches the engine ha_data to THD.
2026 */
2027 if (all && thd->rpl_unflag_detached_engine_ha_data())
2028 {
2029 assert(trn_ctx->xid_state()->get_state() != XID_STATE::XA_NOTR ||
2030 thd->killed == THD::KILL_CONNECTION);
2031
2032 restore_backup_ha_data= true;
2033 }
2034
2035 for (; ha_info; ha_info= ha_info_next)
2036 {
2037 int err;
2038 handlerton *ht= ha_info->ht();
2039 if ((err= ht->rollback(ht, thd, all)))
2040 { // cannot happen
2041 #ifdef WITH_WSREP
2042 WSREP_INFO("rollback failed for ht: %d, conf: %d SQL %s",
2043 ht->db_type, thd->wsrep_conflict_state, thd->query().str);
2044 Diagnostics_area *da= thd->get_stmt_da();
2045 if (da)
2046 {
2047 WSREP_INFO("stmt DA %d %s",
2048 da->status(), (da->is_error()) ? da->message_text() : "void");
2049 }
2050 #endif /* WITH_WSREP */
2051 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2052 error= 1;
2053 }
2054 assert(!thd->status_var_aggregated);
2055 thd->status_var.ha_rollback_count++;
2056 ha_info_next= ha_info->next();
2057 if (restore_backup_ha_data)
2058 reattach_engine_ha_data_to_thd(thd, ht);
2059 ha_info->reset(); /* keep it conveniently zero-filled */
2060 }
2061 trn_ctx->reset_scope(trx_scope);
2062 }
2063
2064 /*
2065 Thanks to possibility of MDL deadlock rollback request can come even if
2066 transaction hasn't been started in any transactional storage engine.
2067
2068 It is possible to have a call of ha_rollback_low() while handling
2069 failure from ha_prepare() and an error in Daignostics_area still
2070 wasn't set. Therefore it is required to check that an error in
2071 Diagnostics_area is set before calling the method XID_STATE::set_error().
2072
2073 If it wasn't done it would lead to failure of the assertion
2074 assert(m_status == DA_ERROR)
2075 in the method Diagnostics_area::mysql_errno().
2076
2077 In case ha_prepare is failed and an error wasn't set in Diagnostics_area
2078 the error ER_XA_RBROLLBACK is set in the Diagnostics_area from
2079 the method Sql_cmd_xa_prepare::trans_xa_prepare() when non-zero result code
2080 returned by ha_prepare() is handled.
2081 */
2082 if (all && thd->transaction_rollback_request && thd->is_error())
2083 trn_ctx->xid_state()->set_error(thd);
2084
2085 (void) RUN_HOOK(transaction, after_rollback, (thd, all));
2086 return error;
2087 }
2088
2089
ha_rollback_trans(THD * thd,bool all)2090 int ha_rollback_trans(THD *thd, bool all)
2091 {
2092 int error=0;
2093 Transaction_ctx *trn_ctx= thd->get_transaction();
2094 bool is_xa_rollback= trn_ctx->xid_state()->has_state(XID_STATE::XA_PREPARED);
2095
2096 /*
2097 "real" is a nick name for a transaction for which a commit will
2098 make persistent changes. E.g. a 'stmt' transaction inside a 'all'
2099 transaction is not 'real': even though it's possible to commit it,
2100 the changes are not durable as they might be rolled back if the
2101 enclosing 'all' transaction is rolled back.
2102 We establish the value of 'is_real_trans' by checking
2103 if it's an explicit COMMIT or BEGIN statement, or implicit
2104 commit issued by DDL (in these cases all == TRUE),
2105 or if we're running in autocommit mode (it's only in the autocommit mode
2106 ha_commit_one_phase() is called with an empty
2107 transaction.all.ha_list, see why in trans_register_ha()).
2108 */
2109 bool is_real_trans=
2110 all || !trn_ctx->is_active(Transaction_ctx::SESSION);
2111
2112 DBUG_ENTER("ha_rollback_trans");
2113
2114 /*
2115 We must not rollback the normal transaction if a statement
2116 transaction is pending.
2117 */
2118 assert(!trn_ctx->is_active(Transaction_ctx::STMT) ||
2119 !all);
2120
2121 if (thd->in_sub_stmt)
2122 {
2123 assert(0);
2124 /*
2125 If we are inside stored function or trigger we should not commit or
2126 rollback current statement transaction. See comment in ha_commit_trans()
2127 call for more information.
2128 */
2129 if (!all)
2130 DBUG_RETURN(0);
2131 my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
2132 DBUG_RETURN(1);
2133 }
2134
2135 if (tc_log)
2136 error= tc_log->rollback(thd, all);
2137 /*
2138 Mark multi-statement (any autocommit mode) or single-statement
2139 (autocommit=1) transaction as rolled back
2140 */
2141 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2142 if (all || !thd->in_active_multi_stmt_transaction())
2143 {
2144 MYSQL_ROLLBACK_TRANSACTION(thd->m_transaction_psi);
2145 thd->m_transaction_psi= NULL;
2146 }
2147 #endif
2148
2149 /* Always cleanup. Even if nht==0. There may be savepoints. */
2150 if (is_real_trans)
2151 {
2152 trn_ctx->cleanup();
2153 thd->tx_priority= 0;
2154 }
2155
2156 if (all)
2157 thd->transaction_rollback_request= FALSE;
2158
2159 /*
2160 Only call gtid_rollback(THD*), which will purge thd->owned_gtid, if
2161 complete transaction is being rollback or autocommit=1.
2162 Notice, XA rollback has just invoked update_on_commit() through
2163 tc_log->*rollback* stack.
2164 */
2165 if (is_real_trans && !is_xa_rollback)
2166 gtid_state->update_on_rollback(thd);
2167
2168 /*
2169 If the transaction cannot be rolled back safely, warn; don't warn if this
2170 is a slave thread (because when a slave thread executes a ROLLBACK, it has
2171 been read from the binary log, so it's 100% sure and normal to produce
2172 error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
2173 slave SQL thread, it would not stop the thread but just be printed in
2174 the error log; but we don't want users to wonder why they have this
2175 message in the error log, so we don't send it.
2176 */
2177 if (is_real_trans &&
2178 trn_ctx->cannot_safely_rollback(
2179 Transaction_ctx::SESSION) &&
2180 !thd->slave_thread && thd->killed != THD::KILL_CONNECTION)
2181 trn_ctx->push_unsafe_rollback_warnings(thd);
2182
2183 DBUG_RETURN(error);
2184 }
2185
2186
2187 /**
2188 Commit the attachable transaction in storage engines.
2189
2190 @note This is slimmed down version of ha_commit_trans()/ha_commit_low()
2191 which commits attachable transaction but skips code which is
2192 unnecessary and unsafe for them (like dealing with GTIDs).
2193 Since attachable transactions are read-only their commit only
2194 needs to release resources and cleanup state in SE.
2195
2196 @param thd Current thread
2197
2198 @retval 0 - Success
2199 @retval non-0 - Failure
2200 */
ha_commit_attachable(THD * thd)2201 int ha_commit_attachable(THD *thd)
2202 {
2203 int error= 0;
2204 Transaction_ctx *trn_ctx= thd->get_transaction();
2205 Ha_trx_info *ha_info= trn_ctx->ha_trx_info(Transaction_ctx::STMT);
2206 Ha_trx_info *ha_info_next;
2207
2208 /* This function only handles attachable transactions. */
2209 assert(thd->is_attachable_ro_transaction_active());
2210 /*
2211 Since the attachable transaction is AUTOCOMMIT we only need
2212 to care about statement transaction.
2213 */
2214 assert(! trn_ctx->is_active(Transaction_ctx::SESSION));
2215
2216 if (ha_info)
2217 {
2218 for (; ha_info; ha_info= ha_info_next)
2219 {
2220 /* Attachable transaction is not supposed to modify anything. */
2221 assert(! ha_info->is_trx_read_write());
2222
2223 handlerton *ht= ha_info->ht();
2224 if (ht->commit(ht, thd, false))
2225 {
2226 /*
2227 In theory this should not happen since attachable transactions
2228 are read only and therefore commit is supposed to only release
2229 resources/cleanup state. Even if this happens we will simply
2230 continue committing attachable transaction in other SEs.
2231 */
2232 assert(false);
2233 error= 1;
2234 }
2235 assert(!thd->status_var_aggregated);
2236 thd->status_var.ha_commit_count++;
2237 ha_info_next= ha_info->next();
2238
2239 ha_info->reset(); /* keep it conveniently zero-filled */
2240 }
2241 trn_ctx->reset_scope(Transaction_ctx::STMT);
2242 }
2243
2244 /*
2245 Mark transaction as commited in PSI.
2246 */
2247 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2248 if (thd->m_transaction_psi != NULL)
2249 {
2250 MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
2251 thd->m_transaction_psi= NULL;
2252 }
2253 #endif
2254
2255 /* Free resources and perform other cleanup even for 'empty' transactions. */
2256 trn_ctx->cleanup();
2257
2258 return (error);
2259 }
2260
2261
2262 /**
2263 @details
2264 This function should be called when MySQL sends rows of a SELECT result set
2265 or the EOF mark to the client. It releases a possible adaptive hash index
2266 S-latch held by thd in InnoDB and also releases a possible InnoDB query
2267 FIFO ticket to enter InnoDB. To save CPU time, InnoDB allows a thd to
2268 keep them over several calls of the InnoDB handler interface when a join
2269 is executed. But when we let the control to pass to the client they have
2270 to be released because if the application program uses mysql_use_result(),
2271 it may deadlock on the S-latch if the application on another connection
2272 performs another SQL query. In MySQL-4.1 this is even more important because
2273 there a connection can have several SELECT queries open at the same time.
2274
2275 @param thd the thread handle of the current connection
2276
2277 @return
2278 always 0
2279 */
2280
ha_release_temporary_latches(THD * thd)2281 int ha_release_temporary_latches(THD *thd)
2282 {
2283 const Ha_trx_info *info;
2284 Transaction_ctx *trn_ctx= thd->get_transaction();
2285
2286 /*
2287 Note that below we assume that only transactional storage engines
2288 may need release_temporary_latches(). If this will ever become false,
2289 we could iterate on thd->open_tables instead (and remove duplicates
2290 as if (!seen[hton->slot]) { seen[hton->slot]=1; ... }).
2291 */
2292 for (info= trn_ctx->ha_trx_info(Transaction_ctx::STMT);
2293 info; info= info->next())
2294 {
2295 handlerton *hton= info->ht();
2296 if (hton && hton->release_temporary_latches)
2297 hton->release_temporary_latches(hton, thd);
2298 }
2299 return 0;
2300 }
2301
2302 /**
2303 Check if all storage engines used in transaction agree that after
2304 rollback to savepoint it is safe to release MDL locks acquired after
2305 savepoint creation.
2306
2307 @param thd The client thread that executes the transaction.
2308
2309 @return true - It is safe to release MDL locks.
2310 false - If it is not.
2311 */
ha_rollback_to_savepoint_can_release_mdl(THD * thd)2312 bool ha_rollback_to_savepoint_can_release_mdl(THD *thd)
2313 {
2314 Ha_trx_info *ha_info;
2315 Transaction_ctx *trn_ctx= thd->get_transaction();
2316 Transaction_ctx::enum_trx_scope trx_scope=
2317 thd->in_sub_stmt ? Transaction_ctx::STMT : Transaction_ctx::SESSION;
2318
2319 DBUG_ENTER("ha_rollback_to_savepoint_can_release_mdl");
2320
2321 /**
2322 Checking whether it is safe to release metadata locks after rollback to
2323 savepoint in all the storage engines that are part of the transaction.
2324 */
2325 for (ha_info= trn_ctx->ha_trx_info(trx_scope);
2326 ha_info; ha_info= ha_info->next())
2327 {
2328 handlerton *ht= ha_info->ht();
2329 assert(ht);
2330
2331 if (ht->savepoint_rollback_can_release_mdl == 0 ||
2332 ht->savepoint_rollback_can_release_mdl(ht, thd) == false)
2333 DBUG_RETURN(false);
2334 }
2335
2336 DBUG_RETURN(true);
2337 }
2338
ha_rollback_to_savepoint(THD * thd,SAVEPOINT * sv)2339 int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
2340 {
2341 int error=0;
2342 Transaction_ctx *trn_ctx= thd->get_transaction();
2343 Transaction_ctx::enum_trx_scope trx_scope=
2344 !thd->in_sub_stmt ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2345
2346 Ha_trx_info *ha_info, *ha_info_next;
2347
2348 DBUG_ENTER("ha_rollback_to_savepoint");
2349
2350 trn_ctx->set_rw_ha_count(trx_scope, 0);
2351 trn_ctx->set_no_2pc(trx_scope, 0);
2352 /*
2353 rolling back to savepoint in all storage engines that were part of the
2354 transaction when the savepoint was set
2355 */
2356 for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
2357 {
2358 int err;
2359 handlerton *ht= ha_info->ht();
2360 assert(ht);
2361 assert(ht->savepoint_set != 0);
2362 if ((err= ht->savepoint_rollback(ht, thd,
2363 (uchar *)(sv+1)+ht->savepoint_offset)))
2364 { // cannot happen
2365 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2366 error=1;
2367 }
2368 assert(!thd->status_var_aggregated);
2369 thd->status_var.ha_savepoint_rollback_count++;
2370 if (ht->prepare == 0)
2371 trn_ctx->set_no_2pc(trx_scope, true);
2372 }
2373
2374 /*
2375 rolling back the transaction in all storage engines that were not part of
2376 the transaction when the savepoint was set
2377 */
2378 for (ha_info= trn_ctx->ha_trx_info(trx_scope); ha_info != sv->ha_list;
2379 ha_info= ha_info_next)
2380 {
2381 int err;
2382 handlerton *ht= ha_info->ht();
2383 if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
2384 { // cannot happen
2385 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2386 error=1;
2387 }
2388 assert(!thd->status_var_aggregated);
2389 thd->status_var.ha_rollback_count++;
2390 ha_info_next= ha_info->next();
2391 ha_info->reset(); /* keep it conveniently zero-filled */
2392 }
2393 trn_ctx->set_ha_trx_info(trx_scope, sv->ha_list);
2394
2395 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2396 if (thd->m_transaction_psi != NULL)
2397 MYSQL_INC_TRANSACTION_ROLLBACK_TO_SAVEPOINT(thd->m_transaction_psi, 1);
2398 #endif
2399
2400 DBUG_RETURN(error);
2401 }
2402
ha_prepare_low(THD * thd,bool all)2403 int ha_prepare_low(THD *thd, bool all)
2404 {
2405 int error= 0;
2406 Transaction_ctx::enum_trx_scope trx_scope=
2407 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2408 Ha_trx_info *ha_info= thd->get_transaction()->ha_trx_info(trx_scope);
2409
2410 DBUG_ENTER("ha_prepare_low");
2411
2412 if (ha_info)
2413 {
2414 for (; ha_info && !error; ha_info= ha_info->next())
2415 {
2416 int err= 0;
2417 handlerton *ht= ha_info->ht();
2418 /*
2419 Do not call two-phase commit if this particular
2420 transaction is read-only. This allows for simpler
2421 implementation in engines that are always read-only.
2422 */
2423 if (!ha_info->is_trx_read_write())
2424 continue;
2425 if ((err= ht->prepare(ht, thd, all)))
2426 {
2427 #ifdef WITH_WSREP
2428 if (WSREP(thd) && ht->db_type== DB_TYPE_WSREP)
2429 {
2430 error= 1;
2431 switch (err)
2432 {
2433 case WSREP_TRX_SIZE_EXCEEDED:
2434 /* give user size exeeded erro from wsrep_api.h */
2435 my_error(ER_ERROR_DURING_COMMIT, MYF(0), WSREP_SIZE_EXCEEDED);
2436 break;
2437 case WSREP_TRX_CERT_FAIL:
2438 case WSREP_TRX_ERROR:
2439 /* avoid sending error, if we need to replay */
2440 if (thd->wsrep_conflict_state!= MUST_REPLAY)
2441 {
2442 my_error(ER_LOCK_DEADLOCK, MYF(0), err);
2443 }
2444 }
2445 }
2446
2447 else
2448 {
2449 /* not wsrep hton, bail to native mysql behavior */
2450 #endif
2451 my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
2452 error= 1;
2453 #ifdef WITH_WSREP
2454 }
2455 #endif
2456 }
2457 assert(!thd->status_var_aggregated);
2458 thd->status_var.ha_prepare_count++;
2459 }
2460 DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
2461 }
2462
2463 DBUG_RETURN(error);
2464 }
2465
2466 /**
2467 @note
2468 according to the sql standard (ISO/IEC 9075-2:2003)
2469 section "4.33.4 SQL-statements and transaction states",
2470 SAVEPOINT is *not* transaction-initiating SQL-statement
2471 */
ha_savepoint(THD * thd,SAVEPOINT * sv)2472 int ha_savepoint(THD *thd, SAVEPOINT *sv)
2473 {
2474 int error=0;
2475 Transaction_ctx::enum_trx_scope trx_scope=
2476 !thd->in_sub_stmt ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2477 Ha_trx_info *ha_info= thd->get_transaction()->ha_trx_info(trx_scope);
2478 Ha_trx_info *begin_ha_info= ha_info;
2479
2480 DBUG_ENTER("ha_savepoint");
2481
2482 for (; ha_info; ha_info= ha_info->next())
2483 {
2484 int err;
2485 handlerton *ht= ha_info->ht();
2486 assert(ht);
2487 if (! ht->savepoint_set)
2488 {
2489 my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
2490 error=1;
2491 break;
2492 }
2493 if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
2494 { // cannot happen
2495 my_error(ER_GET_ERRNO, MYF(0), err);
2496 error=1;
2497 }
2498 assert(!thd->status_var_aggregated);
2499 thd->status_var.ha_savepoint_count++;
2500 }
2501 /*
2502 Remember the list of registered storage engines. All new
2503 engines are prepended to the beginning of the list.
2504 */
2505 sv->ha_list= begin_ha_info;
2506
2507 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2508 if (!error && thd->m_transaction_psi != NULL)
2509 MYSQL_INC_TRANSACTION_SAVEPOINTS(thd->m_transaction_psi, 1);
2510 #endif
2511
2512 DBUG_RETURN(error);
2513 }
2514
ha_release_savepoint(THD * thd,SAVEPOINT * sv)2515 int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
2516 {
2517 int error=0;
2518 Ha_trx_info *ha_info= sv->ha_list;
2519 DBUG_ENTER("ha_release_savepoint");
2520
2521 for (; ha_info; ha_info= ha_info->next())
2522 {
2523 int err;
2524 handlerton *ht= ha_info->ht();
2525 /* Savepoint life time is enclosed into transaction life time. */
2526 assert(ht);
2527 if (!ht->savepoint_release)
2528 continue;
2529 if ((err= ht->savepoint_release(ht, thd,
2530 (uchar *)(sv+1) + ht->savepoint_offset)))
2531 { // cannot happen
2532 my_error(ER_GET_ERRNO, MYF(0), err);
2533 error=1;
2534 }
2535 }
2536
2537 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2538 if (thd->m_transaction_psi != NULL)
2539 MYSQL_INC_TRANSACTION_RELEASE_SAVEPOINT(thd->m_transaction_psi, 1);
2540 #endif
2541 DBUG_RETURN(error);
2542 }
2543
2544
snapshot_handlerton(THD * thd,plugin_ref plugin,void * arg)2545 static my_bool snapshot_handlerton(THD *thd, plugin_ref plugin,
2546 void *arg)
2547 {
2548 handlerton *hton= plugin_data<handlerton*>(plugin);
2549 if (hton->state == SHOW_OPTION_YES &&
2550 hton->start_consistent_snapshot)
2551 {
2552 hton->start_consistent_snapshot(hton, thd);
2553 *((bool *)arg)= false;
2554 }
2555 return FALSE;
2556 }
2557
ha_start_consistent_snapshot(THD * thd)2558 int ha_start_consistent_snapshot(THD *thd)
2559 {
2560 bool warn= true;
2561
2562 plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
2563
2564 /*
2565 Same idea as when one wants to CREATE TABLE in one engine which does not
2566 exist:
2567 */
2568 if (warn)
2569 push_warning(thd, Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR,
2570 "This MySQL server does not support any "
2571 "consistent-read capable storage engine");
2572 return 0;
2573 }
2574
2575
flush_handlerton(THD * thd,plugin_ref plugin,void * arg)2576 static my_bool flush_handlerton(THD *thd, plugin_ref plugin,
2577 void *arg)
2578 {
2579 handlerton *hton= plugin_data<handlerton*>(plugin);
2580 if (hton->state == SHOW_OPTION_YES && hton->flush_logs &&
2581 hton->flush_logs(hton, *(static_cast<bool *>(arg))))
2582 return TRUE;
2583 return FALSE;
2584 }
2585
2586
ha_flush_logs(handlerton * db_type,bool binlog_group_flush)2587 bool ha_flush_logs(handlerton *db_type, bool binlog_group_flush)
2588 {
2589 if (db_type == NULL)
2590 {
2591 if (plugin_foreach(NULL, flush_handlerton,
2592 MYSQL_STORAGE_ENGINE_PLUGIN,
2593 static_cast<void *>(&binlog_group_flush)))
2594 return TRUE;
2595 }
2596 else
2597 {
2598 if (db_type->state != SHOW_OPTION_YES ||
2599 (db_type->flush_logs &&
2600 db_type->flush_logs(db_type, binlog_group_flush)))
2601 return TRUE;
2602 }
2603 return FALSE;
2604 }
2605
2606
2607 /**
2608 @brief make canonical filename
2609
2610 @param[in] file table handler
2611 @param[in] path original path
2612 @param[out] tmp_path buffer for canonized path
2613
2614 @details Lower case db name and table name path parts for
2615 non file based tables when lower_case_table_names
2616 is 2 (store as is, compare in lower case).
2617 Filesystem path prefix (mysql_data_home or tmpdir)
2618 is left intact.
2619
2620 @note tmp_path may be left intact if no conversion was
2621 performed.
2622
2623 @retval canonized path
2624
2625 @todo This may be done more efficiently when table path
2626 gets built. Convert this function to something like
2627 ASSERT_CANONICAL_FILENAME.
2628 */
get_canonical_filename(handler * file,const char * path,char * tmp_path)2629 const char *get_canonical_filename(handler *file, const char *path,
2630 char *tmp_path)
2631 {
2632 uint i;
2633 if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
2634 return path;
2635
2636 for (i= 0; i <= mysql_tmpdir_list.max; i++)
2637 {
2638 if (is_prefix(path, mysql_tmpdir_list.list[i]))
2639 return path;
2640 }
2641
2642 /* Ensure that table handler get path in lower case */
2643 if (tmp_path != path)
2644 my_stpcpy(tmp_path, path);
2645
2646 /*
2647 we only should turn into lowercase database/table part
2648 so start the process after homedirectory
2649 */
2650 my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
2651 return tmp_path;
2652 }
2653
2654
2655 class Ha_delete_table_error_handler: public Internal_error_handler
2656 {
2657 public:
handle_condition(THD * thd,uint sql_errno,const char * sqlstate,Sql_condition::enum_severity_level * level,const char * msg)2658 virtual bool handle_condition(THD *thd,
2659 uint sql_errno,
2660 const char* sqlstate,
2661 Sql_condition::enum_severity_level *level,
2662 const char* msg)
2663 {
2664 /* Downgrade errors to warnings. */
2665 if (*level == Sql_condition::SL_ERROR)
2666 *level= Sql_condition::SL_WARNING;
2667 return false;
2668 }
2669 };
2670
2671
2672 /** @brief
2673 This should return ENOENT if the file doesn't exists.
2674 The .frm file will be deleted only if we return 0 or ENOENT
2675 */
ha_delete_table(THD * thd,handlerton * table_type,const char * path,const char * db,const char * alias,bool generate_warning)2676 int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
2677 const char *db, const char *alias, bool generate_warning)
2678 {
2679 handler *file;
2680 char tmp_path[FN_REFLEN];
2681 int error;
2682 TABLE dummy_table;
2683 TABLE_SHARE dummy_share;
2684 DBUG_ENTER("ha_delete_table");
2685
2686 dummy_table.s= &dummy_share;
2687
2688 /* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */
2689 if (table_type == NULL ||
2690 ! (file=get_new_handler((TABLE_SHARE*)0, thd->mem_root, table_type)))
2691 DBUG_RETURN(ENOENT);
2692
2693 path= get_canonical_filename(file, path, tmp_path);
2694 if ((error= file->ha_delete_table(path)) && generate_warning)
2695 {
2696 /*
2697 Because file->print_error() use my_error() to generate the error message
2698 we use an internal error handler to intercept it and store the text
2699 in a temporary buffer. Later the message will be presented to user
2700 as a warning.
2701 */
2702 Ha_delete_table_error_handler ha_delete_table_error_handler;
2703
2704 /* Fill up strucutures that print_error may need */
2705 dummy_share.path.str= (char*) path;
2706 dummy_share.path.length= strlen(path);
2707 dummy_share.db.str= (char*) db;
2708 dummy_share.db.length= strlen(db);
2709 dummy_share.table_name.str= (char*) alias;
2710 dummy_share.table_name.length= strlen(alias);
2711 dummy_table.alias= alias;
2712
2713 file->change_table_ptr(&dummy_table, &dummy_share);
2714
2715 /*
2716 XXX: should we convert *all* errors to warnings here?
2717 What if the error is fatal?
2718 */
2719 thd->push_internal_handler(&ha_delete_table_error_handler);
2720 file->print_error(error, 0);
2721
2722 thd->pop_internal_handler();
2723 }
2724 delete file;
2725
2726 #ifdef HAVE_PSI_TABLE_INTERFACE
2727 if (likely(error == 0))
2728 {
2729 /* Table share not available, so check path for temp_table prefix. */
2730 bool temp_table= (strstr(path, tmp_file_prefix) != NULL);
2731 PSI_TABLE_CALL(drop_table_share)
2732 (temp_table, db, strlen(db), alias, strlen(alias));
2733 }
2734 #endif
2735
2736 DBUG_RETURN(error);
2737 }
2738
2739 /****************************************************************************
2740 ** General handler functions
2741 ****************************************************************************/
clone(const char * name,MEM_ROOT * mem_root)2742 handler *handler::clone(const char *name, MEM_ROOT *mem_root)
2743 {
2744 DBUG_ENTER("handler::clone");
2745 handler *new_handler= get_new_handler(table->s, mem_root, ht);
2746
2747 if (!new_handler)
2748 DBUG_RETURN(NULL);
2749 if (new_handler->set_ha_share_ref(ha_share))
2750 goto err;
2751
2752 /*
2753 Allocate handler->ref here because otherwise ha_open will allocate it
2754 on this->table->mem_root and we will not be able to reclaim that memory
2755 when the clone handler object is destroyed.
2756 */
2757 if (!(new_handler->ref= (uchar*) alloc_root(mem_root,
2758 ALIGN_SIZE(ref_length)*2)))
2759 goto err;
2760 /*
2761 TODO: Implement a more efficient way to have more than one index open for
2762 the same table instance. The ha_open call is not cachable for clone.
2763 */
2764 if (new_handler->ha_open(table, name, table->db_stat,
2765 HA_OPEN_IGNORE_IF_LOCKED))
2766 goto err;
2767
2768 DBUG_RETURN(new_handler);
2769
2770 err:
2771 delete new_handler;
2772 DBUG_RETURN(NULL);
2773 }
2774
2775
ha_statistic_increment(ulonglong SSV::* offset) const2776 void handler::ha_statistic_increment(ulonglong SSV::*offset) const
2777 {
2778 if (table && table->in_use) (table->in_use->status_var.*offset)++;
2779 }
2780
2781
ha_thd(void) const2782 THD *handler::ha_thd(void) const
2783 {
2784 assert(!table || !table->in_use || table->in_use == current_thd);
2785 return (table && table->in_use) ? table->in_use : current_thd;
2786 }
2787
unbind_psi()2788 void handler::unbind_psi()
2789 {
2790 #ifdef HAVE_PSI_TABLE_INTERFACE
2791 assert(m_lock_type == F_UNLCK);
2792 assert(inited == NONE);
2793 /*
2794 Notify the instrumentation that this table is not owned
2795 by this thread any more.
2796 */
2797 PSI_TABLE_CALL(unbind_table)(m_psi);
2798 #endif
2799 }
2800
rebind_psi()2801 void handler::rebind_psi()
2802 {
2803 #ifdef HAVE_PSI_TABLE_INTERFACE
2804 assert(m_lock_type == F_UNLCK);
2805 assert(inited == NONE);
2806 /*
2807 Notify the instrumentation that this table is now owned
2808 by this thread.
2809 */
2810 PSI_table_share *share_psi= ha_table_share_psi(table_share);
2811 m_psi= PSI_TABLE_CALL(rebind_table)(share_psi, this, m_psi);
2812 #endif
2813 }
2814
start_psi_batch_mode()2815 void handler::start_psi_batch_mode()
2816 {
2817 #ifdef HAVE_PSI_TABLE_INTERFACE
2818 assert(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
2819 assert(m_psi_locker == NULL);
2820 m_psi_batch_mode= PSI_BATCH_MODE_STARTING;
2821 m_psi_numrows= 0;
2822 #endif
2823 }
2824
end_psi_batch_mode()2825 void handler::end_psi_batch_mode()
2826 {
2827 #ifdef HAVE_PSI_TABLE_INTERFACE
2828 assert(m_psi_batch_mode != PSI_BATCH_MODE_NONE);
2829 if (m_psi_locker != NULL)
2830 {
2831 assert(m_psi_batch_mode == PSI_BATCH_MODE_STARTED);
2832 PSI_TABLE_CALL(end_table_io_wait)(m_psi_locker, m_psi_numrows);
2833 m_psi_locker= NULL;
2834 }
2835 m_psi_batch_mode= PSI_BATCH_MODE_NONE;
2836 #endif
2837 }
2838
ha_table_share_psi(const TABLE_SHARE * share) const2839 PSI_table_share *handler::ha_table_share_psi(const TABLE_SHARE *share) const
2840 {
2841 return share->m_psi;
2842 }
2843
2844 /** @brief
2845 Open database-handler.
2846
2847 IMPLEMENTATION
2848 Try O_RDONLY if cannot open as O_RDWR
2849 Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
2850 */
ha_open(TABLE * table_arg,const char * name,int mode,int test_if_locked)2851 int handler::ha_open(TABLE *table_arg, const char *name, int mode,
2852 int test_if_locked)
2853 {
2854 int error;
2855 DBUG_ENTER("handler::ha_open");
2856 DBUG_PRINT("enter",
2857 ("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d",
2858 name, ht->db_type, table_arg->db_stat, mode,
2859 test_if_locked));
2860
2861 table= table_arg;
2862 assert(table->s == table_share);
2863 assert(m_lock_type == F_UNLCK);
2864 DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK));
2865 assert(alloc_root_inited(&table->mem_root));
2866
2867 if ((error=open(name,mode,test_if_locked)))
2868 {
2869 if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
2870 (table->db_stat & HA_TRY_READ_ONLY))
2871 {
2872 table->db_stat|=HA_READ_ONLY;
2873 error=open(name,O_RDONLY,test_if_locked);
2874 }
2875 }
2876 if (error)
2877 {
2878 set_my_errno(error); /* Safeguard */
2879 DBUG_PRINT("error",("error: %d errno: %d",error,errno));
2880 }
2881 else
2882 {
2883 assert(m_psi == NULL);
2884 assert(table_share != NULL);
2885 #ifdef HAVE_PSI_TABLE_INTERFACE
2886 /*
2887 Do not call this for partitions handlers, since it may take too much
2888 resources.
2889 So only use the m_psi on table level, not for individual partitions.
2890 */
2891 if (!(test_if_locked & HA_OPEN_NO_PSI_CALL))
2892 {
2893 PSI_table_share *share_psi= ha_table_share_psi(table_share);
2894 m_psi= PSI_TABLE_CALL(open_table)(share_psi, this);
2895 }
2896 #endif
2897
2898 if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
2899 table->db_stat|=HA_READ_ONLY;
2900 (void) extra(HA_EXTRA_NO_READCHECK); // Not needed in SQL
2901
2902 /* ref is already allocated for us if we're called from handler::clone() */
2903 if (!ref && !(ref= (uchar*) alloc_root(&table->mem_root,
2904 ALIGN_SIZE(ref_length)*2)))
2905 {
2906 ha_close();
2907 error=HA_ERR_OUT_OF_MEM;
2908 }
2909 else
2910 dup_ref=ref+ALIGN_SIZE(ref_length);
2911 cached_table_flags= table_flags();
2912 }
2913 DBUG_RETURN(error);
2914 }
2915
2916
2917 /**
2918 Close handler.
2919 */
2920
ha_close(void)2921 int handler::ha_close(void)
2922 {
2923 DBUG_ENTER("handler::ha_close");
2924 #ifdef HAVE_PSI_TABLE_INTERFACE
2925 PSI_TABLE_CALL(close_table)(table_share, m_psi);
2926 m_psi= NULL; /* instrumentation handle, invalid after close_table() */
2927 assert(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
2928 assert(m_psi_locker == NULL);
2929 #endif
2930 // TODO: set table= NULL to mark the handler as closed?
2931 assert(m_psi == NULL);
2932 assert(m_lock_type == F_UNLCK);
2933 assert(inited == NONE);
2934 DBUG_RETURN(close());
2935 }
2936
2937
2938 /**
2939 Initialize use of index.
2940
2941 @param idx Index to use
2942 @param sorted Use sorted order
2943
2944 @return Operation status
2945 @retval 0 Success
2946 @retval != 0 Error (error code returned)
2947 */
2948
ha_index_init(uint idx,bool sorted)2949 int handler::ha_index_init(uint idx, bool sorted)
2950 {
2951 DBUG_EXECUTE_IF("ha_index_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2952 int result;
2953 DBUG_ENTER("ha_index_init");
2954 assert(table_share->tmp_table != NO_TMP_TABLE ||
2955 m_lock_type != F_UNLCK);
2956 assert(inited == NONE);
2957 if (!(result= index_init(idx, sorted)))
2958 inited= INDEX;
2959 end_range= NULL;
2960 DBUG_RETURN(result);
2961 }
2962
2963
2964 /**
2965 End use of index.
2966
2967 @return Operation status
2968 @retval 0 Success
2969 @retval != 0 Error (error code returned)
2970 */
2971
ha_index_end()2972 int handler::ha_index_end()
2973 {
2974 DBUG_ENTER("ha_index_end");
2975 /* SQL HANDLER function can call this without having it locked. */
2976 assert(table->open_by_handler ||
2977 table_share->tmp_table != NO_TMP_TABLE ||
2978 m_lock_type != F_UNLCK);
2979 assert(inited == INDEX);
2980 inited= NONE;
2981 end_range= NULL;
2982 DBUG_RETURN(index_end());
2983 }
2984
2985
2986 /**
2987 Initialize table for random read or scan.
2988
2989 @param scan if true: Initialize for random scans through rnd_next()
2990 if false: Initialize for random reads through rnd_pos()
2991
2992 @return Operation status
2993 @retval 0 Success
2994 @retval != 0 Error (error code returned)
2995 */
2996
ha_rnd_init(bool scan)2997 int handler::ha_rnd_init(bool scan)
2998 {
2999 DBUG_EXECUTE_IF("ha_rnd_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
3000 int result;
3001 DBUG_ENTER("ha_rnd_init");
3002 assert(table_share->tmp_table != NO_TMP_TABLE ||
3003 m_lock_type != F_UNLCK);
3004 assert(inited == NONE || (inited == RND && scan));
3005 inited= (result= rnd_init(scan)) ? NONE : RND;
3006 end_range= NULL;
3007 DBUG_RETURN(result);
3008 }
3009
3010
3011 /**
3012 End use of random access.
3013
3014 @return Operation status
3015 @retval 0 Success
3016 @retval != 0 Error (error code returned)
3017 */
3018
ha_rnd_end()3019 int handler::ha_rnd_end()
3020 {
3021 DBUG_ENTER("ha_rnd_end");
3022 /* SQL HANDLER function can call this without having it locked. */
3023 assert(table->open_by_handler ||
3024 table_share->tmp_table != NO_TMP_TABLE ||
3025 m_lock_type != F_UNLCK);
3026 assert(inited == RND);
3027 inited= NONE;
3028 end_range= NULL;
3029 DBUG_RETURN(rnd_end());
3030 }
3031
3032
3033 /**
3034 Read next row via random scan.
3035
3036 @param buf Buffer to read the row into
3037
3038 @return Operation status
3039 @retval 0 Success
3040 @retval != 0 Error (error code returned)
3041 */
3042
ha_rnd_next(uchar * buf)3043 int handler::ha_rnd_next(uchar *buf)
3044 {
3045 int result;
3046 DBUG_EXECUTE_IF("ha_rnd_next_deadlock", return HA_ERR_LOCK_DEADLOCK;);
3047 DBUG_ENTER("handler::ha_rnd_next");
3048 assert(table_share->tmp_table != NO_TMP_TABLE ||
3049 m_lock_type != F_UNLCK);
3050 assert(inited == RND);
3051
3052 // Set status for the need to update generated fields
3053 m_update_generated_read_fields= table->has_gcol();
3054
3055 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result,
3056 { result= rnd_next(buf); })
3057 if (!result && m_update_generated_read_fields)
3058 {
3059 result= update_generated_read_fields(buf, table);
3060 m_update_generated_read_fields= false;
3061 }
3062 DBUG_RETURN(result);
3063 }
3064
3065
3066 /**
3067 Read row via random scan from position.
3068
3069 @param[out] buf Buffer to read the row into
3070 @param pos Position from position() call
3071
3072 @return Operation status
3073 @retval 0 Success
3074 @retval != 0 Error (error code returned)
3075 */
3076
ha_rnd_pos(uchar * buf,uchar * pos)3077 int handler::ha_rnd_pos(uchar *buf, uchar *pos)
3078 {
3079 int result;
3080 DBUG_ENTER("handler::ha_rnd_pos");
3081 assert(table_share->tmp_table != NO_TMP_TABLE ||
3082 m_lock_type != F_UNLCK);
3083 /* TODO: Find out how to solve ha_rnd_pos when finding duplicate update. */
3084 /* assert(inited == RND); */
3085
3086 // Set status for the need to update generated fields
3087 m_update_generated_read_fields= table->has_gcol();
3088
3089 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result,
3090 { result= rnd_pos(buf, pos); })
3091 if (!result && m_update_generated_read_fields)
3092 {
3093 result= update_generated_read_fields(buf, table);
3094 m_update_generated_read_fields= false;
3095 }
3096 DBUG_RETURN(result);
3097 }
3098
3099
3100 /**
3101 Read [part of] row via [part of] index.
3102 @param[out] buf buffer where store the data
3103 @param key Key to search for
3104 @param keypart_map Which part of key to use
3105 @param find_flag Direction/condition on key usage
3106
3107 @returns Operation status
3108 @retval 0 Success (found a record, and function has
3109 set table->status to 0)
3110 @retval HA_ERR_END_OF_FILE Row not found (function has set table->status
3111 to STATUS_NOT_FOUND). End of index passed.
3112 @retval HA_ERR_KEY_NOT_FOUND Row not found (function has set table->status
3113 to STATUS_NOT_FOUND). Index cursor positioned.
3114 @retval != 0 Error
3115
3116 @note Positions an index cursor to the index specified in the handle.
3117 Fetches the row if available. If the key value is null,
3118 begin at the first key of the index.
3119 ha_index_read_map can be restarted without calling index_end on the previous
3120 index scan and without calling ha_index_init. In this case the
3121 ha_index_read_map is on the same index as the previous ha_index_scan.
3122 This is particularly used in conjunction with multi read ranges.
3123 */
3124
ha_index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3125 int handler::ha_index_read_map(uchar *buf, const uchar *key,
3126 key_part_map keypart_map,
3127 enum ha_rkey_function find_flag)
3128 {
3129 int result;
3130 DBUG_ENTER("handler::ha_index_read_map");
3131 assert(table_share->tmp_table != NO_TMP_TABLE ||
3132 m_lock_type != F_UNLCK);
3133 assert(inited == INDEX);
3134 assert(!pushed_idx_cond || buf == table->record[0]);
3135
3136 // Set status for the need to update generated fields
3137 m_update_generated_read_fields= table->has_gcol();
3138
3139 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3140 { result= index_read_map(buf, key, keypart_map, find_flag); })
3141 if (!result && m_update_generated_read_fields)
3142 {
3143 result= update_generated_read_fields(buf, table, active_index);
3144 m_update_generated_read_fields= false;
3145 }
3146 DBUG_RETURN(result);
3147 }
3148
ha_index_read_last_map(uchar * buf,const uchar * key,key_part_map keypart_map)3149 int handler::ha_index_read_last_map(uchar *buf, const uchar *key,
3150 key_part_map keypart_map)
3151 {
3152 int result;
3153 DBUG_ENTER("handler::ha_index_read_last_map");
3154 assert(table_share->tmp_table != NO_TMP_TABLE ||
3155 m_lock_type != F_UNLCK);
3156 assert(inited == INDEX);
3157 assert(!pushed_idx_cond || buf == table->record[0]);
3158
3159 // Set status for the need to update generated fields
3160 m_update_generated_read_fields= table->has_gcol();
3161
3162 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3163 { result= index_read_last_map(buf, key, keypart_map); })
3164 if (!result && m_update_generated_read_fields)
3165 {
3166 result= update_generated_read_fields(buf, table, active_index);
3167 m_update_generated_read_fields= false;
3168 }
3169 DBUG_RETURN(result);
3170 }
3171
3172 /**
3173 Initializes an index and read it.
3174
3175 @see handler::ha_index_read_map.
3176 */
3177
ha_index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3178 int handler::ha_index_read_idx_map(uchar *buf, uint index, const uchar *key,
3179 key_part_map keypart_map,
3180 enum ha_rkey_function find_flag)
3181 {
3182 int result;
3183 assert(table_share->tmp_table != NO_TMP_TABLE ||
3184 m_lock_type != F_UNLCK);
3185 assert(end_range == NULL);
3186 assert(!pushed_idx_cond || buf == table->record[0]);
3187
3188 // Set status for the need to update generated fields
3189 m_update_generated_read_fields= table->has_gcol();
3190
3191 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, index, result,
3192 { result= index_read_idx_map(buf, index, key, keypart_map, find_flag); })
3193 if (!result && m_update_generated_read_fields)
3194 {
3195 result= update_generated_read_fields(buf, table, index);
3196 m_update_generated_read_fields= false;
3197 }
3198 return result;
3199 }
3200
3201
3202 /**
3203 Reads the next row via index.
3204
3205 @param[out] buf Row data
3206
3207 @return Operation status.
3208 @retval 0 Success
3209 @retval HA_ERR_END_OF_FILE Row not found
3210 @retval != 0 Error
3211 */
3212
ha_index_next(uchar * buf)3213 int handler::ha_index_next(uchar * buf)
3214 {
3215 int result;
3216 DBUG_ENTER("handler::ha_index_next");
3217 assert(table_share->tmp_table != NO_TMP_TABLE ||
3218 m_lock_type != F_UNLCK);
3219 assert(inited == INDEX);
3220 assert(!pushed_idx_cond || buf == table->record[0]);
3221
3222 // Set status for the need to update generated fields
3223 m_update_generated_read_fields= table->has_gcol();
3224
3225 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3226 { result= index_next(buf); })
3227 if (!result && m_update_generated_read_fields)
3228 {
3229 result= update_generated_read_fields(buf, table, active_index);
3230 m_update_generated_read_fields= false;
3231 }
3232 DBUG_RETURN(result);
3233 }
3234
3235
3236 /**
3237 Reads the previous row via index.
3238
3239 @param[out] buf Row data
3240
3241 @return Operation status.
3242 @retval 0 Success
3243 @retval HA_ERR_END_OF_FILE Row not found
3244 @retval != 0 Error
3245 */
3246
ha_index_prev(uchar * buf)3247 int handler::ha_index_prev(uchar * buf)
3248 {
3249 int result;
3250 DBUG_ENTER("handler::ha_index_prev");
3251 assert(table_share->tmp_table != NO_TMP_TABLE ||
3252 m_lock_type != F_UNLCK);
3253 assert(inited == INDEX);
3254 assert(!pushed_idx_cond || buf == table->record[0]);
3255
3256 // Set status for the need to update generated fields
3257 m_update_generated_read_fields= table->has_gcol();
3258
3259 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3260 { result= index_prev(buf); })
3261 if (!result && m_update_generated_read_fields)
3262 {
3263 result= update_generated_read_fields(buf, table, active_index);
3264 m_update_generated_read_fields= false;
3265 }
3266 DBUG_RETURN(result);
3267 }
3268
3269
3270 /**
3271 Reads the first row via index.
3272
3273 @param[out] buf Row data
3274
3275 @return Operation status.
3276 @retval 0 Success
3277 @retval HA_ERR_END_OF_FILE Row not found
3278 @retval != 0 Error
3279 */
3280
ha_index_first(uchar * buf)3281 int handler::ha_index_first(uchar * buf)
3282 {
3283 int result;
3284 DBUG_ENTER("handler::ha_index_first");
3285 assert(table_share->tmp_table != NO_TMP_TABLE ||
3286 m_lock_type != F_UNLCK);
3287 assert(inited == INDEX);
3288 assert(!pushed_idx_cond || buf == table->record[0]);
3289
3290 // Set status for the need to update generated fields
3291 m_update_generated_read_fields= table->has_gcol();
3292
3293 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3294 { result= index_first(buf); })
3295 if (!result && m_update_generated_read_fields)
3296 {
3297 result= update_generated_read_fields(buf, table, active_index);
3298 m_update_generated_read_fields= false;
3299 }
3300 DBUG_RETURN(result);
3301 }
3302
3303
3304 /**
3305 Reads the last row via index.
3306
3307 @param[out] buf Row data
3308
3309 @return Operation status.
3310 @retval 0 Success
3311 @retval HA_ERR_END_OF_FILE Row not found
3312 @retval != 0 Error
3313 */
3314
ha_index_last(uchar * buf)3315 int handler::ha_index_last(uchar * buf)
3316 {
3317 int result;
3318 DBUG_ENTER("handler::ha_index_last");
3319 assert(table_share->tmp_table != NO_TMP_TABLE ||
3320 m_lock_type != F_UNLCK);
3321 assert(inited == INDEX);
3322 assert(!pushed_idx_cond || buf == table->record[0]);
3323
3324 // Set status for the need to update generated fields
3325 m_update_generated_read_fields= table->has_gcol();
3326
3327 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3328 { result= index_last(buf); })
3329 if (!result && m_update_generated_read_fields)
3330 {
3331 result= update_generated_read_fields(buf, table, active_index);
3332 m_update_generated_read_fields= false;
3333 }
3334 DBUG_RETURN(result);
3335 }
3336
3337
3338 /**
3339 Reads the next same row via index.
3340
3341 @param[out] buf Row data
3342 @param key Key to search for
3343 @param keylen Length of key
3344
3345 @return Operation status.
3346 @retval 0 Success
3347 @retval HA_ERR_END_OF_FILE Row not found
3348 @retval != 0 Error
3349 */
3350
ha_index_next_same(uchar * buf,const uchar * key,uint keylen)3351 int handler::ha_index_next_same(uchar *buf, const uchar *key, uint keylen)
3352 {
3353 int result;
3354 DBUG_ENTER("handler::ha_index_next_same");
3355 assert(table_share->tmp_table != NO_TMP_TABLE ||
3356 m_lock_type != F_UNLCK);
3357 assert(inited == INDEX);
3358 assert(!pushed_idx_cond || buf == table->record[0]);
3359
3360 // Set status for the need to update generated fields
3361 m_update_generated_read_fields= table->has_gcol();
3362
3363 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3364 { result= index_next_same(buf, key, keylen); })
3365 if (!result && m_update_generated_read_fields)
3366 {
3367 result= update_generated_read_fields(buf, table, active_index);
3368 m_update_generated_read_fields= false;
3369 }
3370 DBUG_RETURN(result);
3371 }
3372
3373
3374 /**
3375 Read first row (only) from a table.
3376
3377 This is never called for InnoDB tables, as these table types
3378 has the HA_STATS_RECORDS_IS_EXACT set.
3379 */
read_first_row(uchar * buf,uint primary_key)3380 int handler::read_first_row(uchar * buf, uint primary_key)
3381 {
3382 int error;
3383 DBUG_ENTER("handler::read_first_row");
3384
3385 ha_statistic_increment(&SSV::ha_read_first_count);
3386
3387 /*
3388 If there is very few deleted rows in the table, find the first row by
3389 scanning the table.
3390 TODO remove the test for HA_READ_ORDER
3391 */
3392 if (stats.deleted < 10 || primary_key >= MAX_KEY ||
3393 !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
3394 {
3395 if (!(error= ha_rnd_init(1)))
3396 {
3397 while ((error= ha_rnd_next(buf)) == HA_ERR_RECORD_DELETED)
3398 /* skip deleted row */;
3399 const int end_error= ha_rnd_end();
3400 if (!error)
3401 error= end_error;
3402 }
3403 }
3404 else
3405 {
3406 /* Find the first row through the primary key */
3407 if (!(error= ha_index_init(primary_key, 0)))
3408 {
3409 error= ha_index_first(buf);
3410 const int end_error= ha_index_end();
3411 if (!error)
3412 error= end_error;
3413 }
3414 }
3415 DBUG_RETURN(error);
3416 }
3417
3418 /**
3419 Generate the next auto-increment number based on increment and offset.
3420 computes the lowest number
3421 - strictly greater than "nr"
3422 - of the form: auto_increment_offset + N * auto_increment_increment
3423 If overflow happened then return MAX_ULONGLONG value as an
3424 indication of overflow.
3425 In most cases increment= offset= 1, in which case we get:
3426 @verbatim 1,2,3,4,5,... @endverbatim
3427 If increment=10 and offset=5 and previous number is 1, we get:
3428 @verbatim 1,5,15,25,35,... @endverbatim
3429 */
3430 inline ulonglong
compute_next_insert_id(ulonglong nr,struct system_variables * variables)3431 compute_next_insert_id(ulonglong nr,struct system_variables *variables)
3432 {
3433 const ulonglong save_nr= nr;
3434
3435 if (variables->auto_increment_increment == 1)
3436 nr= nr + 1; // optimization of the formula below
3437 else
3438 {
3439 nr= (((nr+ variables->auto_increment_increment -
3440 variables->auto_increment_offset)) /
3441 (ulonglong) variables->auto_increment_increment);
3442 nr= (nr* (ulonglong) variables->auto_increment_increment +
3443 variables->auto_increment_offset);
3444 }
3445
3446 if (unlikely(nr <= save_nr))
3447 return ULLONG_MAX;
3448
3449 return nr;
3450 }
3451
3452
adjust_next_insert_id_after_explicit_value(ulonglong nr)3453 void handler::adjust_next_insert_id_after_explicit_value(ulonglong nr)
3454 {
3455 /*
3456 If we have set THD::next_insert_id previously and plan to insert an
3457 explicitely-specified value larger than this, we need to increase
3458 THD::next_insert_id to be greater than the explicit value.
3459 */
3460 if ((next_insert_id > 0) && (nr >= next_insert_id))
3461 set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
3462 }
3463
3464
3465 /** @brief
3466 Computes the largest number X:
3467 - smaller than or equal to "nr"
3468 - of the form: auto_increment_offset + N * auto_increment_increment
3469 where N>=0.
3470
3471 SYNOPSIS
3472 prev_insert_id
3473 nr Number to "round down"
3474 variables variables struct containing auto_increment_increment and
3475 auto_increment_offset
3476
3477 RETURN
3478 The number X if it exists, "nr" otherwise.
3479 */
3480 inline ulonglong
prev_insert_id(ulonglong nr,struct system_variables * variables)3481 prev_insert_id(ulonglong nr, struct system_variables *variables)
3482 {
3483 if (unlikely(nr < variables->auto_increment_offset))
3484 {
3485 /*
3486 There's nothing good we can do here. That is a pathological case, where
3487 the offset is larger than the column's max possible value, i.e. not even
3488 the first sequence value may be inserted. User will receive warning.
3489 */
3490 DBUG_PRINT("info",("auto_increment: nr: %lu cannot honour "
3491 "auto_increment_offset: %lu",
3492 (ulong) nr, variables->auto_increment_offset));
3493 return nr;
3494 }
3495 if (variables->auto_increment_increment == 1)
3496 return nr; // optimization of the formula below
3497 nr= (((nr - variables->auto_increment_offset)) /
3498 (ulonglong) variables->auto_increment_increment);
3499 return (nr * (ulonglong) variables->auto_increment_increment +
3500 variables->auto_increment_offset);
3501 }
3502
3503
3504 /**
3505 Update the auto_increment field if necessary.
3506
3507 Updates columns with type NEXT_NUMBER if:
3508
3509 - If column value is set to NULL (in which case
3510 auto_increment_field_not_null is 0)
3511 - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
3512 set. In the future we will only set NEXT_NUMBER fields if one sets them
3513 to NULL (or they are not included in the insert list).
3514
3515 In those cases, we check if the currently reserved interval still has
3516 values we have not used. If yes, we pick the smallest one and use it.
3517 Otherwise:
3518
3519 - If a list of intervals has been provided to the statement via SET
3520 INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
3521 first unused interval from this list, consider it as reserved.
3522
3523 - Otherwise we set the column for the first row to the value
3524 next_insert_id(get_auto_increment(column))) which is usually
3525 max-used-column-value+1.
3526 We call get_auto_increment() for the first row in a multi-row
3527 statement. get_auto_increment() will tell us the interval of values it
3528 reserved for us.
3529
3530 - In both cases, for the following rows we use those reserved values without
3531 calling the handler again (we just progress in the interval, computing
3532 each new value from the previous one). Until we have exhausted them, then
3533 we either take the next provided interval or call get_auto_increment()
3534 again to reserve a new interval.
3535
3536 - In both cases, the reserved intervals are remembered in
3537 thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
3538 binlogging; the last reserved interval is remembered in
3539 auto_inc_interval_for_cur_row. The number of reserved intervals is
3540 remembered in auto_inc_intervals_count. It differs from the number of
3541 elements in thd->auto_inc_intervals_in_cur_stmt_for_binlog() because the
3542 latter list is cumulative over all statements forming one binlog event
3543 (when stored functions and triggers are used), and collapses two
3544 contiguous intervals in one (see its append() method).
3545
3546 The idea is that generated auto_increment values are predictable and
3547 independent of the column values in the table. This is needed to be
3548 able to replicate into a table that already has rows with a higher
3549 auto-increment value than the one that is inserted.
3550
3551 After we have already generated an auto-increment number and the user
3552 inserts a column with a higher value than the last used one, we will
3553 start counting from the inserted value.
3554
3555 This function's "outputs" are: the table's auto_increment field is filled
3556 with a value, thd->next_insert_id is filled with the value to use for the
3557 next row, if a value was autogenerated for the current row it is stored in
3558 thd->insert_id_for_cur_row, if get_auto_increment() was called
3559 thd->auto_inc_interval_for_cur_row is modified, if that interval is not
3560 present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
3561 this list.
3562
3563 @todo
3564 Replace all references to "next number" or NEXT_NUMBER to
3565 "auto_increment", everywhere (see below: there is
3566 table->auto_increment_field_not_null, and there also exists
3567 table->next_number_field, it's not consistent).
3568
3569 @retval
3570 0 ok
3571 @retval
3572 HA_ERR_AUTOINC_READ_FAILED get_auto_increment() was called and
3573 returned ~(ulonglong) 0
3574 @retval
3575 HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
3576 failure.
3577 */
3578
3579 #define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
3580 #define AUTO_INC_DEFAULT_NB_MAX_BITS 16
3581 #define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
3582
update_auto_increment()3583 int handler::update_auto_increment()
3584 {
3585 ulonglong nr, nb_reserved_values;
3586 bool append= FALSE;
3587 THD *thd= table->in_use;
3588 struct system_variables *variables= &thd->variables;
3589 assert(table_share->tmp_table != NO_TMP_TABLE ||
3590 m_lock_type != F_UNLCK);
3591 DBUG_ENTER("handler::update_auto_increment");
3592
3593 /*
3594 next_insert_id is a "cursor" into the reserved interval, it may go greater
3595 than the interval, but not smaller.
3596 */
3597 assert(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
3598
3599 if ((nr= table->next_number_field->val_int()) != 0 ||
3600 (table->auto_increment_field_not_null &&
3601 thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
3602 {
3603 /*
3604 Update next_insert_id if we had already generated a value in this
3605 statement (case of INSERT VALUES(null),(3763),(null):
3606 the last NULL needs to insert 3764, not the value of the first NULL plus
3607 1).
3608 Also we should take into account the the sign of the value.
3609 Since auto_increment value can't have negative value we should update
3610 next_insert_id only in case when we INSERTing explicit positive value.
3611 It means that for a table that has SIGNED INTEGER column when we execute
3612 the following statement
3613 INSERT INTO t1 VALUES( NULL), (-1), (NULL)
3614 we shouldn't call adjust_next_insert_id_after_explicit_value()
3615 and the result row will be (1, -1, 2) (for new opened connection
3616 to the server). On the other hand, for the statement
3617 INSERT INTO t1 VALUES( NULL), (333), (NULL)
3618 we should call adjust_next_insert_id_after_explicit_value()
3619 and result row will be (1, 333, 334).
3620 */
3621 if (((Field_num*)table->next_number_field)->unsigned_flag ||
3622 ((longlong)nr) > 0)
3623 adjust_next_insert_id_after_explicit_value(nr);
3624
3625 insert_id_for_cur_row= 0; // didn't generate anything
3626 DBUG_RETURN(0);
3627 }
3628
3629 if (next_insert_id > table->next_number_field->get_max_int_value())
3630 DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED);
3631
3632 if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
3633 {
3634 /* next_insert_id is beyond what is reserved, so we reserve more. */
3635 const Discrete_interval *forced=
3636 thd->auto_inc_intervals_forced.get_next();
3637 if (forced != NULL)
3638 {
3639 nr= forced->minimum();
3640 /*
3641 In a multi insert statement when the number of affected rows is known
3642 then reserve those many number of auto increment values. So that
3643 interval will be starting value to starting value + number of affected
3644 rows * increment of auto increment.
3645 */
3646 nb_reserved_values= (estimation_rows_to_insert > 0) ?
3647 estimation_rows_to_insert : forced->values();
3648 }
3649 else
3650 {
3651 /*
3652 handler::estimation_rows_to_insert was set by
3653 handler::ha_start_bulk_insert(); if 0 it means "unknown".
3654 */
3655 ulonglong nb_desired_values;
3656 /*
3657 If an estimation was given to the engine:
3658 - use it.
3659 - if we already reserved numbers, it means the estimation was
3660 not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
3661 time, twice that the 3rd time etc.
3662 If no estimation was given, use those increasing defaults from the
3663 start, starting from AUTO_INC_DEFAULT_NB_ROWS.
3664 Don't go beyond a max to not reserve "way too much" (because
3665 reservation means potentially losing unused values).
3666 Note that in prelocked mode no estimation is given.
3667 */
3668
3669 if ((auto_inc_intervals_count == 0) && (estimation_rows_to_insert > 0))
3670 nb_desired_values= estimation_rows_to_insert;
3671 else if ((auto_inc_intervals_count == 0) &&
3672 (thd->lex->bulk_insert_row_cnt > 0))
3673 {
3674 /*
3675 For multi-row inserts, if the bulk inserts cannot be started, the
3676 handler::estimation_rows_to_insert will not be set. But we still
3677 want to reserve the autoinc values.
3678 */
3679 nb_desired_values= thd->lex->bulk_insert_row_cnt;
3680 }
3681 else /* go with the increasing defaults */
3682 {
3683 /* avoid overflow in formula, with this if() */
3684 if (auto_inc_intervals_count <= AUTO_INC_DEFAULT_NB_MAX_BITS)
3685 {
3686 nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS *
3687 (1 << auto_inc_intervals_count);
3688 set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
3689 }
3690 else
3691 nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
3692 }
3693 /* This call ignores all its parameters but nr, currently */
3694 get_auto_increment(variables->auto_increment_offset,
3695 variables->auto_increment_increment,
3696 nb_desired_values, &nr,
3697 &nb_reserved_values);
3698 if (nr == ULLONG_MAX)
3699 DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED); // Mark failure
3700
3701 /*
3702 That rounding below should not be needed when all engines actually
3703 respect offset and increment in get_auto_increment(). But they don't
3704 so we still do it. Wonder if for the not-first-in-index we should do
3705 it. Hope that this rounding didn't push us out of the interval; even
3706 if it did we cannot do anything about it (calling the engine again
3707 will not help as we inserted no row).
3708 */
3709 nr= compute_next_insert_id(nr-1, variables);
3710 }
3711
3712 if (table->s->next_number_keypart == 0)
3713 {
3714 /* We must defer the appending until "nr" has been possibly truncated */
3715 append= TRUE;
3716 }
3717 else
3718 {
3719 /*
3720 For such auto_increment there is no notion of interval, just a
3721 singleton. The interval is not even stored in
3722 thd->auto_inc_interval_for_cur_row, so we are sure to call the engine
3723 for next row.
3724 */
3725 DBUG_PRINT("info",("auto_increment: special not-first-in-index"));
3726 }
3727 }
3728
3729 if (unlikely(nr == ULLONG_MAX))
3730 DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3731
3732 DBUG_PRINT("info",("auto_increment: %lu", (ulong) nr));
3733
3734 if (unlikely(table->next_number_field->store((longlong) nr, TRUE)))
3735 {
3736 /*
3737 first test if the query was aborted due to strict mode constraints
3738 */
3739 if (thd->killed == THD::KILL_BAD_DATA)
3740 DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3741
3742 /*
3743 field refused this value (overflow) and truncated it, use the result of
3744 the truncation (which is going to be inserted); however we try to
3745 decrease it to honour auto_increment_* variables.
3746 That will shift the left bound of the reserved interval, we don't
3747 bother shifting the right bound (anyway any other value from this
3748 interval will cause a duplicate key).
3749 */
3750 nr= prev_insert_id(table->next_number_field->val_int(), variables);
3751 if (unlikely(table->next_number_field->store((longlong) nr, TRUE)))
3752 nr= table->next_number_field->val_int();
3753 }
3754 if (append)
3755 {
3756 auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
3757 variables->auto_increment_increment);
3758 auto_inc_intervals_count++;
3759 /* Row-based replication does not need to store intervals in binlog */
3760 #ifdef WITH_WSREP
3761 if (((WSREP_EMULATE_BINLOG(thd)) || mysql_bin_log.is_open()) &&
3762 !thd->is_current_stmt_binlog_format_row())
3763 #else
3764 if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row())
3765 #endif /* WITH_WSREP */
3766 thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(),
3767 auto_inc_interval_for_cur_row.values(),
3768 variables->auto_increment_increment);
3769 }
3770
3771 /*
3772 Record this autogenerated value. If the caller then
3773 succeeds to insert this value, it will call
3774 record_first_successful_insert_id_in_cur_stmt()
3775 which will set first_successful_insert_id_in_cur_stmt if it's not
3776 already set.
3777 */
3778 insert_id_for_cur_row= nr;
3779 /*
3780 Set next insert id to point to next auto-increment value to be able to
3781 handle multi-row statements.
3782 */
3783 set_next_insert_id(compute_next_insert_id(nr, variables));
3784
3785 DBUG_RETURN(0);
3786 }
3787
3788
3789 /** @brief
3790 MySQL signal that it changed the column bitmap
3791
3792 USAGE
3793 This is for handlers that needs to setup their own column bitmaps.
3794 Normally the handler should set up their own column bitmaps in
3795 index_init() or rnd_init() and in any column_bitmaps_signal() call after
3796 this.
3797
3798 The handler is allowd to do changes to the bitmap after a index_init or
3799 rnd_init() call is made as after this, MySQL will not use the bitmap
3800 for any program logic checking.
3801 */
column_bitmaps_signal()3802 void handler::column_bitmaps_signal()
3803 {
3804 DBUG_ENTER("column_bitmaps_signal");
3805 DBUG_PRINT("info", ("read_set: 0x%lx write_set: 0x%lx", (long) table->read_set,
3806 (long)table->write_set));
3807 DBUG_VOID_RETURN;
3808 }
3809
3810
3811 /**
3812 Reserves an interval of auto_increment values from the handler.
3813
3814 @param offset offset (modulus increment)
3815 @param increment increment between calls
3816 @param nb_desired_values how many values we want
3817 @param[out] first_value the first value reserved by the handler
3818 @param[out] nb_reserved_values how many values the handler reserved
3819
3820 offset and increment means that we want values to be of the form
3821 offset + N * increment, where N>=0 is integer.
3822 If the function sets *first_value to ULLONG_MAX it means an error.
3823 If the function sets *nb_reserved_values to ULLONG_MAX it means it has
3824 reserved to "positive infinite".
3825 */
3826
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)3827 void handler::get_auto_increment(ulonglong offset, ulonglong increment,
3828 ulonglong nb_desired_values,
3829 ulonglong *first_value,
3830 ulonglong *nb_reserved_values)
3831 {
3832 ulonglong nr;
3833 int error;
3834 DBUG_ENTER("handler::get_auto_increment");
3835
3836 (void) extra(HA_EXTRA_KEYREAD);
3837 table->mark_columns_used_by_index_no_reset(table->s->next_number_index,
3838 table->read_set);
3839 column_bitmaps_signal();
3840
3841 if (ha_index_init(table->s->next_number_index, 1))
3842 {
3843 /* This should never happen, assert in debug, and fail in release build */
3844 assert(0);
3845 *first_value= ULLONG_MAX;
3846 DBUG_VOID_RETURN;
3847 }
3848
3849 if (table->s->next_number_keypart == 0)
3850 { // Autoincrement at key-start
3851 error= ha_index_last(table->record[1]);
3852 /*
3853 MySQL implicitely assumes such method does locking (as MySQL decides to
3854 use nr+increment without checking again with the handler, in
3855 handler::update_auto_increment()), so reserves to infinite.
3856 */
3857 *nb_reserved_values= ULLONG_MAX;
3858 }
3859 else
3860 {
3861 uchar key[MAX_KEY_LENGTH];
3862 key_copy(key, table->record[0],
3863 table->key_info + table->s->next_number_index,
3864 table->s->next_number_key_offset);
3865 error= ha_index_read_map(table->record[1], key,
3866 make_prev_keypart_map(table->s->next_number_keypart),
3867 HA_READ_PREFIX_LAST);
3868 /*
3869 MySQL needs to call us for next row: assume we are inserting ("a",null)
3870 here, we return 3, and next this statement will want to insert
3871 ("b",null): there is no reason why ("b",3+1) would be the good row to
3872 insert: maybe it already exists, maybe 3+1 is too large...
3873 */
3874 *nb_reserved_values= 1;
3875 }
3876
3877 if (error)
3878 {
3879 if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND)
3880 {
3881 /* No entry found, start with 1. */
3882 nr= 1;
3883 }
3884 else
3885 {
3886 assert(0);
3887 nr= ULLONG_MAX;
3888 }
3889 }
3890 else
3891 nr= ((ulonglong) table->next_number_field->
3892 val_int_offset(table->s->rec_buff_length)+1);
3893 ha_index_end();
3894 (void) extra(HA_EXTRA_NO_KEYREAD);
3895 *first_value= nr;
3896 DBUG_VOID_RETURN;
3897 }
3898
3899
ha_release_auto_increment()3900 void handler::ha_release_auto_increment()
3901 {
3902 assert(table_share->tmp_table != NO_TMP_TABLE ||
3903 m_lock_type != F_UNLCK ||
3904 (!next_insert_id && !insert_id_for_cur_row));
3905 DEBUG_SYNC(ha_thd(), "release_auto_increment");
3906 release_auto_increment();
3907 insert_id_for_cur_row= 0;
3908 auto_inc_interval_for_cur_row.replace(0, 0, 0);
3909 auto_inc_intervals_count= 0;
3910 if (next_insert_id > 0)
3911 {
3912 next_insert_id= 0;
3913 /*
3914 this statement used forced auto_increment values if there were some,
3915 wipe them away for other statements.
3916 */
3917 table->in_use->auto_inc_intervals_forced.empty();
3918 }
3919 }
3920
3921
3922 /**
3923 Construct and emit duplicate key error message using information
3924 from table's record buffer.
3925
3926 @param table TABLE object which record buffer should be used as
3927 source for column values.
3928 @param key Key description.
3929 @param msg Error message template to which key value should be
3930 added.
3931 @param errflag Flags for my_error() call.
3932 */
3933
print_keydup_error(TABLE * table,KEY * key,const char * msg,myf errflag)3934 void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag)
3935 {
3936 /* Write the duplicated key in the error message */
3937 char key_buff[MAX_KEY_LENGTH];
3938 String str(key_buff,sizeof(key_buff),system_charset_info);
3939
3940 if (key == NULL)
3941 {
3942 /* Key is unknown */
3943 str.copy("", 0, system_charset_info);
3944 my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr(), "*UNKNOWN*");
3945 }
3946 else
3947 {
3948 /* Table is opened and defined at this point */
3949 key_unpack(&str,table, key);
3950 size_t max_length= MYSQL_ERRMSG_SIZE - strlen(msg);
3951 if (str.length() >= max_length)
3952 {
3953 str.length(max_length-4);
3954 str.append(STRING_WITH_LEN("..."));
3955 }
3956 my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(), key->name);
3957 }
3958 }
3959
3960
3961 /**
3962 Construct and emit duplicate key error message using information
3963 from table's record buffer.
3964
3965 @sa print_keydup_error(table, key, msg, errflag).
3966 */
3967
print_keydup_error(TABLE * table,KEY * key,myf errflag)3968 void print_keydup_error(TABLE *table, KEY *key, myf errflag)
3969 {
3970 print_keydup_error(table, key, ER(ER_DUP_ENTRY_WITH_KEY_NAME), errflag);
3971 }
3972
3973
3974 /**
3975 This method is used to analyse the error to see whether the error
3976 is ignorable or not. Further comments in header file.
3977 */
3978
is_ignorable_error(int error)3979 bool handler::is_ignorable_error(int error)
3980 {
3981 DBUG_ENTER("is_ignorable_error");
3982
3983 // Catch errors that are ignorable
3984 switch (error)
3985 {
3986 // Error code 0 is not an error.
3987 case 0:
3988 // Dup key errors may be explicitly ignored.
3989 case HA_ERR_FOUND_DUPP_KEY:
3990 case HA_ERR_FOUND_DUPP_UNIQUE:
3991 // Foreign key constraint violations are ignorable.
3992 case HA_ERR_ROW_IS_REFERENCED:
3993 case HA_ERR_NO_REFERENCED_ROW:
3994 DBUG_RETURN(true);
3995 }
3996
3997 // Default is that an error is not ignorable.
3998 DBUG_RETURN(false);
3999 }
4000
4001
4002 /**
4003 This method is used to analyse the error to see whether the error
4004 is fatal or not. Further comments in header file.
4005 */
4006
is_fatal_error(int error)4007 bool handler::is_fatal_error(int error)
4008 {
4009 DBUG_ENTER("is_fatal_error");
4010
4011 // No ignorable errors are fatal
4012 if (is_ignorable_error(error))
4013 DBUG_RETURN(false);
4014
4015 // Catch errors that are not fatal
4016 switch (error)
4017 {
4018 /*
4019 Deadlock and lock timeout cause transaction/statement rollback so that
4020 THD::is_fatal_sub_stmt_error will be set. This means that they will not
4021 be possible to handle by stored program handlers inside stored functions
4022 and triggers even if non-fatal.
4023 */
4024 case HA_ERR_LOCK_WAIT_TIMEOUT:
4025 case HA_ERR_LOCK_DEADLOCK:
4026 DBUG_RETURN(false);
4027
4028 case HA_ERR_NULL_IN_SPATIAL:
4029 DBUG_RETURN(false);
4030 }
4031
4032 // Default is that an error is fatal
4033 DBUG_RETURN(true);
4034 }
4035
4036
4037 /**
4038 Print error that we got from handler function.
4039
4040 @note
4041 In case of delete table it's only safe to use the following parts of
4042 the 'table' structure:
4043 - table->s->path
4044 - table->alias
4045 */
print_error(int error,myf errflag)4046 void handler::print_error(int error, myf errflag)
4047 {
4048 DBUG_ENTER("handler::print_error");
4049 DBUG_PRINT("enter",("error: %d",error));
4050
4051 int textno=ER_GET_ERRNO;
4052 switch (error) {
4053 case EACCES:
4054 textno=ER_OPEN_AS_READONLY;
4055 break;
4056 case EAGAIN:
4057 textno=ER_FILE_USED;
4058 break;
4059 case ENOENT:
4060 {
4061 char errbuf[MYSYS_STRERROR_SIZE];
4062 textno=ER_FILE_NOT_FOUND;
4063 my_error(textno, errflag, table_share->table_name.str,
4064 error, my_strerror(errbuf, sizeof(errbuf), error));
4065 }
4066 break;
4067 case HA_ERR_KEY_NOT_FOUND:
4068 case HA_ERR_NO_ACTIVE_RECORD:
4069 case HA_ERR_RECORD_DELETED:
4070 case HA_ERR_END_OF_FILE:
4071 textno=ER_KEY_NOT_FOUND;
4072 break;
4073 case HA_ERR_WRONG_MRG_TABLE_DEF:
4074 textno=ER_WRONG_MRG_TABLE;
4075 break;
4076 case HA_ERR_FOUND_DUPP_KEY:
4077 {
4078 uint key_nr= table ? get_dup_key(error) : -1;
4079 if ((int) key_nr >= 0)
4080 {
4081 print_keydup_error(table,
4082 key_nr == MAX_KEY ? NULL : &table->key_info[key_nr],
4083 errflag);
4084 DBUG_VOID_RETURN;
4085 }
4086 textno=ER_DUP_KEY;
4087 break;
4088 }
4089 case HA_ERR_FOREIGN_DUPLICATE_KEY:
4090 {
4091 assert(table_share->tmp_table != NO_TMP_TABLE ||
4092 m_lock_type != F_UNLCK);
4093
4094 char rec_buf[MAX_KEY_LENGTH];
4095 String rec(rec_buf, sizeof(rec_buf), system_charset_info);
4096 /* Table is opened and defined at this point */
4097
4098 /*
4099 Just print the subset of fields that are part of the first index,
4100 printing the whole row from there is not easy.
4101 */
4102 key_unpack(&rec, table, &table->key_info[0]);
4103
4104 char child_table_name[NAME_LEN + 1];
4105 char child_key_name[NAME_LEN + 1];
4106 if (get_foreign_dup_key(child_table_name, sizeof(child_table_name),
4107 child_key_name, sizeof(child_key_name)))
4108 {
4109 my_error(ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO, errflag,
4110 table_share->table_name.str, rec.c_ptr_safe(),
4111 child_table_name, child_key_name);
4112 }
4113 else
4114 {
4115 my_error(ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO, errflag,
4116 table_share->table_name.str, rec.c_ptr_safe());
4117 }
4118 DBUG_VOID_RETURN;
4119 }
4120 case HA_ERR_NULL_IN_SPATIAL:
4121 my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag);
4122 DBUG_VOID_RETURN;
4123 case HA_ERR_FOUND_DUPP_UNIQUE:
4124 textno=ER_DUP_UNIQUE;
4125 break;
4126 case HA_ERR_RECORD_CHANGED:
4127 textno=ER_CHECKREAD;
4128 break;
4129 case HA_ERR_CRASHED:
4130 textno=ER_NOT_KEYFILE;
4131 break;
4132 case HA_ERR_WRONG_IN_RECORD:
4133 textno= ER_CRASHED_ON_USAGE;
4134 break;
4135 case HA_ERR_CRASHED_ON_USAGE:
4136 textno=ER_CRASHED_ON_USAGE;
4137 break;
4138 case HA_ERR_NOT_A_TABLE:
4139 textno= error;
4140 break;
4141 case HA_ERR_CRASHED_ON_REPAIR:
4142 textno=ER_CRASHED_ON_REPAIR;
4143 break;
4144 case HA_ERR_OUT_OF_MEM:
4145 textno=ER_OUT_OF_RESOURCES;
4146 break;
4147 case HA_ERR_SE_OUT_OF_MEMORY:
4148 my_error(ER_ENGINE_OUT_OF_MEMORY, errflag,
4149 table->file->table_type());
4150 DBUG_VOID_RETURN;
4151 case HA_ERR_WRONG_COMMAND:
4152 textno=ER_ILLEGAL_HA;
4153 break;
4154 case HA_ERR_OLD_FILE:
4155 textno=ER_OLD_KEYFILE;
4156 break;
4157 case HA_ERR_UNSUPPORTED:
4158 textno=ER_UNSUPPORTED_EXTENSION;
4159 break;
4160 case HA_ERR_RECORD_FILE_FULL:
4161 case HA_ERR_INDEX_FILE_FULL:
4162 {
4163 textno=ER_RECORD_FILE_FULL;
4164 /* Write the error message to error log */
4165 errflag|= ME_ERRORLOG;
4166 break;
4167 }
4168 case HA_ERR_LOCK_WAIT_TIMEOUT:
4169 textno=ER_LOCK_WAIT_TIMEOUT;
4170 break;
4171 case HA_ERR_LOCK_TABLE_FULL:
4172 textno=ER_LOCK_TABLE_FULL;
4173 break;
4174 case HA_ERR_LOCK_DEADLOCK:
4175 textno=ER_LOCK_DEADLOCK;
4176 break;
4177 case HA_ERR_READ_ONLY_TRANSACTION:
4178 textno=ER_READ_ONLY_TRANSACTION;
4179 break;
4180 case HA_ERR_CANNOT_ADD_FOREIGN:
4181 textno=ER_CANNOT_ADD_FOREIGN;
4182 break;
4183 case HA_ERR_ROW_IS_REFERENCED:
4184 {
4185 String str;
4186 get_error_message(error, &str);
4187 my_error(ER_ROW_IS_REFERENCED_2, errflag, str.c_ptr_safe());
4188 DBUG_VOID_RETURN;
4189 }
4190 case HA_ERR_NO_REFERENCED_ROW:
4191 {
4192 String str;
4193 get_error_message(error, &str);
4194 my_error(ER_NO_REFERENCED_ROW_2, errflag, str.c_ptr_safe());
4195 DBUG_VOID_RETURN;
4196 }
4197 case HA_ERR_TABLE_DEF_CHANGED:
4198 textno=ER_TABLE_DEF_CHANGED;
4199 break;
4200 case HA_ERR_NO_SUCH_TABLE:
4201 my_error(ER_NO_SUCH_TABLE, errflag, table_share->db.str,
4202 table_share->table_name.str);
4203 DBUG_VOID_RETURN;
4204 case HA_ERR_RBR_LOGGING_FAILED:
4205 textno= ER_BINLOG_ROW_LOGGING_FAILED;
4206 break;
4207 case HA_ERR_DROP_INDEX_FK:
4208 {
4209 const char *ptr= "???";
4210 uint key_nr= table ? get_dup_key(error) : -1;
4211 if ((int) key_nr >= 0 && key_nr != MAX_KEY)
4212 ptr= table->key_info[key_nr].name;
4213 my_error(ER_DROP_INDEX_FK, errflag, ptr);
4214 DBUG_VOID_RETURN;
4215 }
4216 case HA_ERR_TABLE_NEEDS_UPGRADE:
4217 textno=ER_TABLE_NEEDS_UPGRADE;
4218 break;
4219 case HA_ERR_NO_PARTITION_FOUND:
4220 textno=ER_WRONG_PARTITION_NAME;
4221 break;
4222 case HA_ERR_TABLE_READONLY:
4223 textno= ER_OPEN_AS_READONLY;
4224 break;
4225 case HA_ERR_AUTOINC_READ_FAILED:
4226 textno= ER_AUTOINC_READ_FAILED;
4227 break;
4228 case HA_ERR_AUTOINC_ERANGE:
4229 textno= ER_WARN_DATA_OUT_OF_RANGE;
4230 break;
4231 case HA_ERR_TOO_MANY_CONCURRENT_TRXS:
4232 textno= ER_TOO_MANY_CONCURRENT_TRXS;
4233 break;
4234 case HA_ERR_INDEX_COL_TOO_LONG:
4235 textno= ER_INDEX_COLUMN_TOO_LONG;
4236 break;
4237 case HA_ERR_NOT_IN_LOCK_PARTITIONS:
4238 textno=ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET;
4239 break;
4240 case HA_ERR_INDEX_CORRUPT:
4241 textno= ER_INDEX_CORRUPT;
4242 break;
4243 case HA_ERR_UNDO_REC_TOO_BIG:
4244 textno= ER_UNDO_RECORD_TOO_BIG;
4245 break;
4246 case HA_ERR_TABLE_IN_FK_CHECK:
4247 textno= ER_TABLE_IN_FK_CHECK;
4248 break;
4249 case HA_WRONG_CREATE_OPTION:
4250 textno= ER_ILLEGAL_HA;
4251 break;
4252 case HA_MISSING_CREATE_OPTION:
4253 {
4254 const char* engine= table_type();
4255 my_error(ER_MISSING_HA_CREATE_OPTION, errflag, engine);
4256 DBUG_VOID_RETURN;
4257 }
4258 case HA_ERR_TOO_MANY_FIELDS:
4259 textno= ER_TOO_MANY_FIELDS;
4260 break;
4261 case HA_ERR_INNODB_READ_ONLY:
4262 textno= ER_INNODB_READ_ONLY;
4263 break;
4264 case HA_ERR_TEMP_FILE_WRITE_FAILURE:
4265 textno= ER_TEMP_FILE_WRITE_FAILURE;
4266 break;
4267 case HA_ERR_INNODB_FORCED_RECOVERY:
4268 textno= ER_INNODB_FORCED_RECOVERY;
4269 break;
4270 case HA_ERR_TABLE_CORRUPT:
4271 my_error(ER_TABLE_CORRUPT, errflag, table_share->db.str,
4272 table_share->table_name.str);
4273 DBUG_VOID_RETURN;
4274 case HA_ERR_QUERY_INTERRUPTED:
4275 textno= ER_QUERY_INTERRUPTED;
4276 break;
4277 case HA_ERR_TABLESPACE_MISSING:
4278 {
4279 char errbuf[MYSYS_STRERROR_SIZE];
4280 my_snprintf(errbuf, MYSYS_STRERROR_SIZE, "`%s`.`%s`", table_share->db.str,
4281 table_share->table_name.str);
4282 my_error(ER_TABLESPACE_MISSING, errflag, errbuf, error);
4283 DBUG_VOID_RETURN;
4284 }
4285 case HA_ERR_TABLESPACE_IS_NOT_EMPTY:
4286 my_error(ER_TABLESPACE_IS_NOT_EMPTY, errflag, table_share->db.str,
4287 table_share->table_name.str);
4288 DBUG_VOID_RETURN;
4289 case HA_ERR_WRONG_FILE_NAME:
4290 my_error(ER_WRONG_FILE_NAME, errflag, table_share->table_name.str);
4291 DBUG_VOID_RETURN;
4292 case HA_ERR_NOT_ALLOWED_COMMAND:
4293 textno=ER_NOT_ALLOWED_COMMAND;
4294 break;
4295 default:
4296 {
4297 /* The error was "unknown" to this function.
4298 Ask handler if it has got a message for this error */
4299 String str;
4300 bool temporary= get_error_message(error, &str);
4301 if (!str.is_empty())
4302 {
4303 const char* engine= table_type();
4304 if (temporary)
4305 my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.ptr(), engine);
4306 else
4307 my_error(ER_GET_ERRMSG, errflag, error, str.ptr(), engine);
4308 }
4309 else
4310 my_error(ER_GET_ERRNO,errflag,error);
4311 DBUG_VOID_RETURN;
4312 }
4313 }
4314 if (textno != ER_FILE_NOT_FOUND)
4315 my_error(textno, errflag, table_share->table_name.str, error);
4316 DBUG_VOID_RETURN;
4317 }
4318
4319
4320 /**
4321 Return an error message specific to this handler.
4322
4323 @param error error code previously returned by handler
4324 @param buf pointer to String where to add error message
4325
4326 @return
4327 Returns true if this is a temporary error
4328 */
get_error_message(int error,String * buf)4329 bool handler::get_error_message(int error, String* buf)
4330 {
4331 return FALSE;
4332 }
4333
4334
4335 /**
4336 Check for incompatible collation changes.
4337
4338 @retval
4339 HA_ADMIN_NEEDS_UPGRADE Table may have data requiring upgrade.
4340 @retval
4341 0 No upgrade required.
4342 */
4343
check_collation_compatibility()4344 int handler::check_collation_compatibility()
4345 {
4346 ulong mysql_version= table->s->mysql_version;
4347
4348 if (mysql_version < 50124)
4349 {
4350 KEY *key= table->key_info;
4351 KEY *key_end= key + table->s->keys;
4352 for (; key < key_end; key++)
4353 {
4354 KEY_PART_INFO *key_part= key->key_part;
4355 KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
4356 for (; key_part < key_part_end; key_part++)
4357 {
4358 if (!key_part->fieldnr)
4359 continue;
4360 Field *field= table->field[key_part->fieldnr - 1];
4361 uint cs_number= field->charset()->number;
4362 if ((mysql_version < 50048 &&
4363 (cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */
4364 cs_number == 41 || /* latin7_general_ci - bug #29461 */
4365 cs_number == 42 || /* latin7_general_cs - bug #29461 */
4366 cs_number == 20 || /* latin7_estonian_cs - bug #29461 */
4367 cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */
4368 cs_number == 22 || /* koi8u_general_ci - bug #29461 */
4369 cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */
4370 cs_number == 26)) || /* cp1250_general_ci - bug #29461 */
4371 (mysql_version < 50124 &&
4372 (cs_number == 33 || /* utf8_general_ci - bug #27877 */
4373 cs_number == 35))) /* ucs2_general_ci - bug #27877 */
4374 return HA_ADMIN_NEEDS_UPGRADE;
4375 }
4376 }
4377 }
4378 return 0;
4379 }
4380
4381
ha_check_for_upgrade(HA_CHECK_OPT * check_opt)4382 int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
4383 {
4384 int error;
4385 KEY *keyinfo, *keyend;
4386 KEY_PART_INFO *keypart, *keypartend;
4387
4388 if (!table->s->mysql_version)
4389 {
4390 /* check for blob-in-key error */
4391 keyinfo= table->key_info;
4392 keyend= table->key_info + table->s->keys;
4393 for (; keyinfo < keyend; keyinfo++)
4394 {
4395 keypart= keyinfo->key_part;
4396 keypartend= keypart + keyinfo->user_defined_key_parts;
4397 for (; keypart < keypartend; keypart++)
4398 {
4399 if (!keypart->fieldnr)
4400 continue;
4401 Field *field= table->field[keypart->fieldnr-1];
4402 if (field->type() == MYSQL_TYPE_BLOB)
4403 {
4404 if (check_opt->sql_flags & TT_FOR_UPGRADE)
4405 check_opt->flags= T_MEDIUM;
4406 return HA_ADMIN_NEEDS_CHECK;
4407 }
4408 }
4409 }
4410 }
4411 if (table->s->frm_version != FRM_VER_TRUE_VARCHAR)
4412 return HA_ADMIN_NEEDS_ALTER;
4413
4414 if ((error= check_collation_compatibility()))
4415 return error;
4416
4417 return check_for_upgrade(check_opt);
4418 }
4419
4420
check_old_types()4421 int handler::check_old_types()
4422 {
4423 Field** field;
4424
4425 for (field= table->field; (*field); field++)
4426 {
4427 if (table->s->mysql_version == 0) // prior to MySQL 5.0
4428 {
4429 /* check for bad DECIMAL field */
4430 if ((*field)->type() == MYSQL_TYPE_NEWDECIMAL)
4431 {
4432 return HA_ADMIN_NEEDS_ALTER;
4433 }
4434 if ((*field)->type() == MYSQL_TYPE_VAR_STRING)
4435 {
4436 return HA_ADMIN_NEEDS_ALTER;
4437 }
4438 }
4439
4440 /*
4441 Check for old DECIMAL field.
4442
4443 Above check does not take into account for pre 5.0 decimal types which can
4444 be present in the data directory if user did in-place upgrade from
4445 mysql-4.1 to mysql-5.0.
4446 */
4447 if ((*field)->type() == MYSQL_TYPE_DECIMAL)
4448 {
4449 return HA_ADMIN_NEEDS_DUMP_UPGRADE;
4450 }
4451
4452 if ((*field)->type() == MYSQL_TYPE_YEAR && (*field)->field_length == 2)
4453 return HA_ADMIN_NEEDS_ALTER; // obsolete YEAR(2) type
4454
4455 //Check for old temporal format if avoid_temporal_upgrade is disabled.
4456 mysql_mutex_lock(&LOCK_global_system_variables);
4457 bool check_temporal_upgrade= !avoid_temporal_upgrade;
4458 mysql_mutex_unlock(&LOCK_global_system_variables);
4459
4460 if (check_temporal_upgrade)
4461 {
4462 if (((*field)->real_type() == MYSQL_TYPE_TIME) ||
4463 ((*field)->real_type() == MYSQL_TYPE_DATETIME) ||
4464 ((*field)->real_type() == MYSQL_TYPE_TIMESTAMP))
4465 return HA_ADMIN_NEEDS_ALTER;
4466 }
4467 }
4468 return 0;
4469 }
4470
4471
update_frm_version(TABLE * table)4472 static bool update_frm_version(TABLE *table)
4473 {
4474 char path[FN_REFLEN];
4475 File file;
4476 int result= 1;
4477 DBUG_ENTER("update_frm_version");
4478
4479 /*
4480 No need to update frm version in case table was created or checked
4481 by server with the same version. This also ensures that we do not
4482 update frm version for temporary tables as this code doesn't support
4483 temporary tables.
4484 */
4485 if (table->s->mysql_version == MYSQL_VERSION_ID)
4486 DBUG_RETURN(0);
4487
4488 strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
4489
4490 if ((file= mysql_file_open(key_file_frm,
4491 path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0)
4492 {
4493 uchar version[4];
4494
4495 int4store(version, MYSQL_VERSION_ID);
4496
4497 if ((result= mysql_file_pwrite(file, (uchar*) version, 4, 51L, MYF_RW)))
4498 goto err;
4499
4500 table->s->mysql_version= MYSQL_VERSION_ID;
4501 }
4502 err:
4503 if (file >= 0)
4504 (void) mysql_file_close(file, MYF(MY_WME));
4505 DBUG_RETURN(result);
4506 }
4507
4508
4509
4510 /**
4511 @return
4512 key if error because of duplicated keys
4513 */
get_dup_key(int error)4514 uint handler::get_dup_key(int error)
4515 {
4516 assert(table_share->tmp_table != NO_TMP_TABLE ||
4517 m_lock_type != F_UNLCK);
4518 DBUG_ENTER("handler::get_dup_key");
4519 table->file->errkey = (uint) -1;
4520 if (error == HA_ERR_FOUND_DUPP_KEY ||
4521 error == HA_ERR_FOUND_DUPP_UNIQUE || error == HA_ERR_NULL_IN_SPATIAL ||
4522 error == HA_ERR_DROP_INDEX_FK)
4523 table->file->info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
4524 DBUG_RETURN(table->file->errkey);
4525 }
4526
4527
4528 /**
4529 Delete all files with extension from bas_ext().
4530
4531 @param name Base name of table
4532
4533 @note
4534 We assume that the handler may return more extensions than
4535 was actually used for the file.
4536
4537 @retval
4538 0 If we successfully deleted at least one file from base_ext and
4539 didn't get any other errors than ENOENT
4540 @retval
4541 !0 Error
4542 */
delete_table(const char * name)4543 int handler::delete_table(const char *name)
4544 {
4545 int saved_error= 0;
4546 int error= 0;
4547 int enoent_or_zero= ENOENT; // Error if no file was deleted
4548 char buff[FN_REFLEN];
4549 assert(m_lock_type == F_UNLCK);
4550
4551 for (const char **ext=bas_ext(); *ext ; ext++)
4552 {
4553 fn_format(buff, name, "", *ext, MY_UNPACK_FILENAME|MY_APPEND_EXT);
4554 if (mysql_file_delete_with_symlink(key_file_misc, buff, MYF(0)))
4555 {
4556 if (my_errno() != ENOENT)
4557 {
4558 /*
4559 If error on the first existing file, return the error.
4560 Otherwise delete as much as possible.
4561 */
4562 if (enoent_or_zero)
4563 return my_errno();
4564 saved_error= my_errno();
4565 }
4566 }
4567 else
4568 enoent_or_zero= 0; // No error for ENOENT
4569 error= enoent_or_zero;
4570 }
4571 return saved_error ? saved_error : error;
4572 }
4573
4574
rename_table(const char * from,const char * to)4575 int handler::rename_table(const char * from, const char * to)
4576 {
4577 int error= 0;
4578 const char **ext, **start_ext;
4579 start_ext= bas_ext();
4580 for (ext= start_ext; *ext ; ext++)
4581 {
4582 if (rename_file_ext(from, to, *ext))
4583 {
4584 error= my_errno();
4585 if (error != ENOENT)
4586 break;
4587 error= 0;
4588 }
4589 }
4590 if (error)
4591 {
4592 /* Try to revert the rename. Ignore errors. */
4593 for (; ext >= start_ext; ext--)
4594 rename_file_ext(to, from, *ext);
4595 }
4596 return error;
4597 }
4598
4599
drop_table(const char * name)4600 void handler::drop_table(const char *name)
4601 {
4602 close();
4603 delete_table(name);
4604 }
4605
4606
4607 /**
4608 Performs checks upon the table.
4609
4610 @param thd thread doing CHECK TABLE operation
4611 @param check_opt options from the parser
4612
4613 @retval
4614 HA_ADMIN_OK Successful upgrade
4615 @retval
4616 HA_ADMIN_NEEDS_UPGRADE Table has structures requiring upgrade
4617 @retval
4618 HA_ADMIN_NEEDS_ALTER Table has structures requiring ALTER TABLE
4619 @retval
4620 HA_ADMIN_NOT_IMPLEMENTED
4621 */
ha_check(THD * thd,HA_CHECK_OPT * check_opt)4622 int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
4623 {
4624 int error;
4625 bool skip_version_update = false;
4626 bool is_upgrade = check_opt->sql_flags & TT_FOR_UPGRADE;
4627
4628 assert(table_share->tmp_table != NO_TMP_TABLE ||
4629 m_lock_type != F_UNLCK);
4630
4631 if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
4632 (check_opt->sql_flags & TT_FOR_UPGRADE))
4633 return 0;
4634
4635 if (table->s->mysql_version < MYSQL_VERSION_ID)
4636 {
4637 if ((error= check_old_types()))
4638 return error;
4639
4640 error= ha_check_for_upgrade(check_opt);
4641 switch (error)
4642 {
4643 case HA_ADMIN_NEEDS_UPG_PART:
4644 /* Skip version update as the table needs upgrade. */
4645 skip_version_update= true;
4646 /* Fall through */
4647 case HA_ADMIN_OK:
4648 if (is_upgrade)
4649 return error;
4650 /* Fall through */
4651 case HA_ADMIN_NEEDS_CHECK:
4652 break;
4653 default:
4654 return error;
4655 }
4656 }
4657
4658 if ((error= check(thd, check_opt)))
4659 return error;
4660 /* Skip updating frm version if not main handler. */
4661 if (table->file != this || skip_version_update)
4662 return error;
4663 return update_frm_version(table);
4664 }
4665
4666 void
mark_trx_noop_dml()4667 handler::mark_trx_noop_dml()
4668 {
4669 Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
4670 /*
4671 When a storage engine method is called, the transaction must
4672 have been started, unless it's a DDL call, for which the
4673 storage engine starts the transaction internally, and commits
4674 it internally, without registering in the ha_list.
4675 Unfortunately here we can't know for sure if the engine
4676 has registered the transaction or not, so we must check.
4677 */
4678 if (ha_info->is_started())
4679 {
4680 assert(has_transactions());
4681 /*
4682 table_share can be NULL in ha_delete_table(). See implementation
4683 of standalone function ha_delete_table() in sql_base.cc.
4684 */
4685 if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
4686 ha_info->set_trx_noop_read_write();
4687 }
4688 }
4689
4690 /**
4691 A helper function to mark a transaction read-write,
4692 if it is started.
4693 */
4694
4695 void
mark_trx_read_write()4696 handler::mark_trx_read_write()
4697 {
4698 Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
4699 /*
4700 When a storage engine method is called, the transaction must
4701 have been started, unless it's a DDL call, for which the
4702 storage engine starts the transaction internally, and commits
4703 it internally, without registering in the ha_list.
4704 Unfortunately here we can't know for sure if the engine
4705 has registered the transaction or not, so we must check.
4706 */
4707 if (ha_info->is_started())
4708 {
4709 assert(has_transactions());
4710 /*
4711 table_share can be NULL in ha_delete_table(). See implementation
4712 of standalone function ha_delete_table() in sql_base.cc.
4713 */
4714 if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
4715 ha_info->set_trx_read_write();
4716 }
4717 }
4718
4719
4720 /**
4721 Repair table: public interface.
4722
4723 @sa handler::repair()
4724 */
4725
ha_repair(THD * thd,HA_CHECK_OPT * check_opt)4726 int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
4727 {
4728 int result;
4729 mark_trx_read_write();
4730
4731 result= repair(thd, check_opt);
4732 assert(result == HA_ADMIN_NOT_IMPLEMENTED ||
4733 ha_table_flags() & HA_CAN_REPAIR);
4734
4735 int old_types_error= check_old_types();
4736
4737 if (old_types_error != HA_ADMIN_NEEDS_DUMP_UPGRADE && result == HA_ADMIN_OK)
4738 result= update_frm_version(table);
4739 return result;
4740 }
4741
4742
4743 /**
4744 Start bulk insert.
4745
4746 Allow the handler to optimize for multiple row insert.
4747
4748 @param rows Estimated rows to insert
4749 */
4750
ha_start_bulk_insert(ha_rows rows)4751 void handler::ha_start_bulk_insert(ha_rows rows)
4752 {
4753 DBUG_ENTER("handler::ha_start_bulk_insert");
4754 assert(table_share->tmp_table != NO_TMP_TABLE ||
4755 m_lock_type == F_WRLCK);
4756 estimation_rows_to_insert= rows;
4757 start_bulk_insert(rows);
4758 DBUG_VOID_RETURN;
4759 }
4760
4761
4762 /**
4763 End bulk insert.
4764
4765 @return Operation status
4766 @retval 0 Success
4767 @retval != 0 Failure (error code returned)
4768 */
4769
ha_end_bulk_insert()4770 int handler::ha_end_bulk_insert()
4771 {
4772 DBUG_ENTER("handler::ha_end_bulk_insert");
4773 assert(table_share->tmp_table != NO_TMP_TABLE ||
4774 m_lock_type == F_WRLCK);
4775 estimation_rows_to_insert= 0;
4776 DBUG_RETURN(end_bulk_insert());
4777 }
4778
4779
4780 /**
4781 Bulk update row: public interface.
4782
4783 @sa handler::bulk_update_row()
4784 */
4785
4786 int
ha_bulk_update_row(const uchar * old_data,uchar * new_data,uint * dup_key_found)4787 handler::ha_bulk_update_row(const uchar *old_data, uchar *new_data,
4788 uint *dup_key_found)
4789 {
4790 assert(table_share->tmp_table != NO_TMP_TABLE ||
4791 m_lock_type == F_WRLCK);
4792 mark_trx_read_write();
4793
4794 return bulk_update_row(old_data, new_data, dup_key_found);
4795 }
4796
4797
4798 /**
4799 Delete all rows: public interface.
4800
4801 @sa handler::delete_all_rows()
4802 */
4803
4804 int
ha_delete_all_rows()4805 handler::ha_delete_all_rows()
4806 {
4807 assert(table_share->tmp_table != NO_TMP_TABLE ||
4808 m_lock_type == F_WRLCK);
4809 mark_trx_read_write();
4810
4811 return delete_all_rows();
4812 }
4813
4814
4815 /**
4816 Truncate table: public interface.
4817
4818 @sa handler::truncate()
4819 */
4820
4821 int
ha_truncate()4822 handler::ha_truncate()
4823 {
4824 assert(table_share->tmp_table != NO_TMP_TABLE ||
4825 m_lock_type == F_WRLCK);
4826 mark_trx_read_write();
4827
4828 return truncate();
4829 }
4830
4831
4832 /**
4833 Optimize table: public interface.
4834
4835 @sa handler::optimize()
4836 */
4837
4838 int
ha_optimize(THD * thd,HA_CHECK_OPT * check_opt)4839 handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
4840 {
4841 assert(table_share->tmp_table != NO_TMP_TABLE ||
4842 m_lock_type == F_WRLCK);
4843 mark_trx_read_write();
4844
4845 return optimize(thd, check_opt);
4846 }
4847
4848
4849 /**
4850 Analyze table: public interface.
4851
4852 @sa handler::analyze()
4853 */
4854
4855 int
ha_analyze(THD * thd,HA_CHECK_OPT * check_opt)4856 handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
4857 {
4858 assert(table_share->tmp_table != NO_TMP_TABLE ||
4859 m_lock_type != F_UNLCK);
4860 mark_trx_read_write();
4861
4862 return analyze(thd, check_opt);
4863 }
4864
4865
4866 /**
4867 Check and repair table: public interface.
4868
4869 @sa handler::check_and_repair()
4870 */
4871
4872 bool
ha_check_and_repair(THD * thd)4873 handler::ha_check_and_repair(THD *thd)
4874 {
4875 assert(table_share->tmp_table != NO_TMP_TABLE ||
4876 m_lock_type == F_UNLCK);
4877 mark_trx_read_write();
4878
4879 return check_and_repair(thd);
4880 }
4881
4882
4883 /**
4884 Disable indexes: public interface.
4885
4886 @sa handler::disable_indexes()
4887 */
4888
4889 int
ha_disable_indexes(uint mode)4890 handler::ha_disable_indexes(uint mode)
4891 {
4892 assert(table_share->tmp_table != NO_TMP_TABLE ||
4893 m_lock_type != F_UNLCK);
4894 mark_trx_read_write();
4895
4896 return disable_indexes(mode);
4897 }
4898
4899
4900 /**
4901 Enable indexes: public interface.
4902
4903 @sa handler::enable_indexes()
4904 */
4905
4906 int
ha_enable_indexes(uint mode)4907 handler::ha_enable_indexes(uint mode)
4908 {
4909 assert(table_share->tmp_table != NO_TMP_TABLE ||
4910 m_lock_type != F_UNLCK);
4911 mark_trx_read_write();
4912
4913 return enable_indexes(mode);
4914 }
4915
4916
4917 /**
4918 Discard or import tablespace: public interface.
4919
4920 @sa handler::discard_or_import_tablespace()
4921 */
4922
4923 int
ha_discard_or_import_tablespace(my_bool discard)4924 handler::ha_discard_or_import_tablespace(my_bool discard)
4925 {
4926 assert(table_share->tmp_table != NO_TMP_TABLE ||
4927 m_lock_type == F_WRLCK);
4928 mark_trx_read_write();
4929
4930 return discard_or_import_tablespace(discard);
4931 }
4932
4933
ha_prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4934 bool handler::ha_prepare_inplace_alter_table(TABLE *altered_table,
4935 Alter_inplace_info *ha_alter_info)
4936 {
4937 assert(table_share->tmp_table != NO_TMP_TABLE ||
4938 m_lock_type != F_UNLCK);
4939 mark_trx_read_write();
4940
4941 return prepare_inplace_alter_table(altered_table, ha_alter_info);
4942 }
4943
4944
ha_commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit)4945 bool handler::ha_commit_inplace_alter_table(TABLE *altered_table,
4946 Alter_inplace_info *ha_alter_info,
4947 bool commit)
4948 {
4949 /*
4950 At this point we should have an exclusive metadata lock on the table.
4951 The exception is if we're about to roll back changes (commit= false).
4952 In this case, we might be rolling back after a failed lock upgrade,
4953 so we could be holding the same lock level as for inplace_alter_table().
4954 */
4955 assert(ha_thd()->mdl_context.owns_equal_or_stronger_lock(MDL_key::TABLE,
4956 table->s->db.str,
4957 table->s->table_name.str,
4958 MDL_EXCLUSIVE) ||
4959 !commit);
4960
4961 return commit_inplace_alter_table(altered_table, ha_alter_info, commit);
4962 }
4963
4964
4965 /*
4966 Default implementation to support in-place alter table
4967 and old online add/drop index API
4968 */
4969
4970 enum_alter_inplace_result
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4971 handler::check_if_supported_inplace_alter(TABLE *altered_table,
4972 Alter_inplace_info *ha_alter_info)
4973 {
4974 DBUG_ENTER("check_if_supported_alter");
4975
4976 HA_CREATE_INFO *create_info= ha_alter_info->create_info;
4977
4978 Alter_inplace_info::HA_ALTER_FLAGS inplace_offline_operations=
4979 Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH |
4980 Alter_inplace_info::ALTER_COLUMN_NAME |
4981 Alter_inplace_info::ALTER_COLUMN_DEFAULT |
4982 Alter_inplace_info::CHANGE_CREATE_OPTION |
4983 Alter_inplace_info::ALTER_RENAME |
4984 Alter_inplace_info::RENAME_INDEX |
4985 Alter_inplace_info::ALTER_INDEX_COMMENT |
4986 Alter_inplace_info::ALTER_COLUMN_INDEX_LENGTH;
4987
4988 /* Is there at least one operation that requires copy algorithm? */
4989 if (ha_alter_info->handler_flags & ~inplace_offline_operations)
4990 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4991
4992 /*
4993 ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. and
4994 ALTER TABLE table_name DEFAULT CHARSET = .. most likely
4995 change column charsets and so not supported in-place through
4996 old API.
4997
4998 Changing of PACK_KEYS, MAX_ROWS and ROW_FORMAT options were
4999 not supported as in-place operations in old API either.
5000 */
5001 if (create_info->used_fields & (HA_CREATE_USED_CHARSET |
5002 HA_CREATE_USED_DEFAULT_CHARSET |
5003 HA_CREATE_USED_PACK_KEYS |
5004 HA_CREATE_USED_MAX_ROWS) ||
5005 (table->s->row_type != create_info->row_type))
5006 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
5007
5008 uint table_changes= (ha_alter_info->handler_flags &
5009 Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH) ?
5010 IS_EQUAL_PACK_LENGTH : IS_EQUAL_YES;
5011 if (table->file->check_if_incompatible_data(create_info, table_changes)
5012 == COMPATIBLE_DATA_YES)
5013 DBUG_RETURN(HA_ALTER_INPLACE_EXCLUSIVE_LOCK);
5014
5015 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
5016 }
5017
5018
5019 /*
5020 Default implementation to support in-place alter table
5021 and old online add/drop index API
5022 */
5023
notify_table_changed()5024 void handler::notify_table_changed()
5025 {
5026 ha_create_handler_files(table->s->path.str, NULL, CHF_INDEX_FLAG, NULL);
5027 }
5028
5029
report_unsupported_error(const char * not_supported,const char * try_instead)5030 void Alter_inplace_info::report_unsupported_error(const char *not_supported,
5031 const char *try_instead)
5032 {
5033 if (unsupported_reason == NULL)
5034 my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0),
5035 not_supported, try_instead);
5036 else
5037 my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0),
5038 not_supported, unsupported_reason, try_instead);
5039 }
5040
5041
5042 /**
5043 Rename table: public interface.
5044
5045 @sa handler::rename_table()
5046 */
5047
5048 int
ha_rename_table(const char * from,const char * to)5049 handler::ha_rename_table(const char *from, const char *to)
5050 {
5051 assert(m_lock_type == F_UNLCK);
5052 mark_trx_read_write();
5053
5054 return rename_table(from, to);
5055 }
5056
5057
5058 /**
5059 Delete table: public interface.
5060
5061 @sa handler::delete_table()
5062 */
5063
5064 int
ha_delete_table(const char * name)5065 handler::ha_delete_table(const char *name)
5066 {
5067 assert(m_lock_type == F_UNLCK);
5068 mark_trx_read_write();
5069
5070 return delete_table(name);
5071 }
5072
5073
5074 /**
5075 Drop table in the engine: public interface.
5076
5077 @sa handler::drop_table()
5078 */
5079
5080 void
ha_drop_table(const char * name)5081 handler::ha_drop_table(const char *name)
5082 {
5083 assert(m_lock_type == F_UNLCK);
5084 mark_trx_read_write();
5085
5086 return drop_table(name);
5087 }
5088
5089
5090 /**
5091 Create a table in the engine: public interface.
5092
5093 @sa handler::create()
5094 */
5095
5096 int
ha_create(const char * name,TABLE * form,HA_CREATE_INFO * info)5097 handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info)
5098 {
5099 assert(m_lock_type == F_UNLCK);
5100 mark_trx_read_write();
5101
5102 return create(name, form, info);
5103 }
5104
5105
5106 /**
5107 Create handler files for CREATE TABLE: public interface.
5108
5109 @sa handler::create_handler_files()
5110 */
5111
5112 int
ha_create_handler_files(const char * name,const char * old_name,int action_flag,HA_CREATE_INFO * info)5113 handler::ha_create_handler_files(const char *name, const char *old_name,
5114 int action_flag, HA_CREATE_INFO *info)
5115 {
5116 /*
5117 Normally this is done when unlocked, but in fast_alter_partition_table,
5118 it is done on an already locked handler when preparing to alter/rename
5119 partitions.
5120 */
5121 assert(m_lock_type == F_UNLCK ||
5122 (!old_name && strcmp(name, table_share->path.str)));
5123 mark_trx_read_write();
5124
5125 return create_handler_files(name, old_name, action_flag, info);
5126 }
5127
5128
5129 /**
5130 Tell the storage engine that it is allowed to "disable transaction" in the
5131 handler. It is a hint that ACID is not required - it is used in NDB for
5132 ALTER TABLE, for example, when data are copied to temporary table.
5133 A storage engine may treat this hint any way it likes. NDB for example
5134 starts to commit every now and then automatically.
5135 This hint can be safely ignored.
5136 */
ha_enable_transaction(THD * thd,bool on)5137 int ha_enable_transaction(THD *thd, bool on)
5138 {
5139 int error=0;
5140 DBUG_ENTER("ha_enable_transaction");
5141 DBUG_PRINT("enter", ("on: %d", (int) on));
5142
5143 #ifdef WITH_WSREP
5144 if (thd->wsrep_applier) DBUG_RETURN(0);
5145 #endif
5146 if ((thd->get_transaction()->m_flags.enabled= on))
5147 {
5148 /*
5149 Now all storage engines should have transaction handling enabled.
5150 But some may have it enabled all the time - "disabling" transactions
5151 is an optimization hint that storage engine is free to ignore.
5152 So, let's commit an open transaction (if any) now.
5153 */
5154 if (!(error= ha_commit_trans(thd, 0)))
5155 error= trans_commit_implicit(thd);
5156 }
5157 DBUG_RETURN(error);
5158 }
5159
index_next_same(uchar * buf,const uchar * key,uint keylen)5160 int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
5161 {
5162 int error;
5163 DBUG_ENTER("index_next_same");
5164 if (!(error=index_next(buf)))
5165 {
5166 my_ptrdiff_t ptrdiff= buf - table->record[0];
5167 uchar *save_record_0= NULL;
5168 KEY *key_info= NULL;
5169 KEY_PART_INFO *key_part= NULL;
5170 KEY_PART_INFO *key_part_end= NULL;
5171
5172 /*
5173 key_cmp_if_same() compares table->record[0] against 'key'.
5174 In parts it uses table->record[0] directly, in parts it uses
5175 field objects with their local pointers into table->record[0].
5176 If 'buf' is distinct from table->record[0], we need to move
5177 all record references. This is table->record[0] itself and
5178 the field pointers of the fields used in this key.
5179 */
5180 if (ptrdiff)
5181 {
5182 save_record_0= table->record[0];
5183 table->record[0]= buf;
5184 key_info= table->key_info + active_index;
5185 key_part= key_info->key_part;
5186 key_part_end= key_part + key_info->user_defined_key_parts;
5187 for (; key_part < key_part_end; key_part++)
5188 {
5189 assert(key_part->field);
5190 key_part->field->move_field_offset(ptrdiff);
5191 }
5192 }
5193
5194 if (key_cmp_if_same(table, key, active_index, keylen))
5195 {
5196 table->status=STATUS_NOT_FOUND;
5197 error=HA_ERR_END_OF_FILE;
5198 }
5199
5200 /* Move back if necessary. */
5201 if (ptrdiff)
5202 {
5203 table->record[0]= save_record_0;
5204 for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
5205 key_part->field->move_field_offset(-ptrdiff);
5206 }
5207 }
5208 DBUG_RETURN(error);
5209 }
5210
5211 /****************************************************************************
5212 ** Some general functions that isn't in the handler class
5213 ****************************************************************************/
5214
5215 /**
5216 Initiates table-file and calls appropriate database-creator.
5217
5218 @retval
5219 0 ok
5220 @retval
5221 1 error
5222 */
ha_create_table(THD * thd,const char * path,const char * db,const char * table_name,HA_CREATE_INFO * create_info,bool update_create_info,bool is_temp_table)5223 int ha_create_table(THD *thd, const char *path,
5224 const char *db, const char *table_name,
5225 HA_CREATE_INFO *create_info,
5226 bool update_create_info,
5227 bool is_temp_table)
5228 {
5229 int error= 1;
5230 TABLE table;
5231 char name_buff[FN_REFLEN];
5232 const char *name;
5233 TABLE_SHARE share;
5234 #ifdef HAVE_PSI_TABLE_INTERFACE
5235 bool temp_table = is_temp_table ||
5236 (create_info->options & HA_LEX_CREATE_TMP_TABLE) ||
5237 (strstr(path, tmp_file_prefix) != NULL);
5238 #endif
5239 DBUG_ENTER("ha_create_table");
5240
5241 init_tmp_table_share(thd, &share, db, 0, table_name, path);
5242 if (open_table_def(thd, &share, 0))
5243 goto err;
5244
5245 #ifdef HAVE_PSI_TABLE_INTERFACE
5246 share.m_psi= PSI_TABLE_CALL(get_table_share)(temp_table, &share);
5247 #endif
5248
5249 if (open_table_from_share(thd, &share, "", 0, (uint) READ_ALL, 0, &table,
5250 TRUE))
5251 {
5252 #ifdef HAVE_PSI_TABLE_INTERFACE
5253 PSI_TABLE_CALL(drop_table_share)
5254 (temp_table, db, strlen(db), table_name, strlen(table_name));
5255 #endif
5256 goto err;
5257 }
5258
5259 if (update_create_info)
5260 update_create_info_from_table(create_info, &table);
5261
5262 name= get_canonical_filename(table.file, share.path.str, name_buff);
5263
5264 error= table.file->ha_create(name, &table, create_info);
5265 if (error)
5266 {
5267 table.file->print_error(error, MYF(0));
5268 #ifdef HAVE_PSI_TABLE_INTERFACE
5269 PSI_TABLE_CALL(drop_table_share)
5270 (temp_table, db, strlen(db), table_name, strlen(table_name));
5271 #endif
5272 }
5273 (void) closefrm(&table, 0);
5274 err:
5275 free_table_share(&share);
5276 DBUG_RETURN(error != 0);
5277 }
5278
5279 /**
5280 Try to discover table from engine.
5281
5282 @note
5283 If found, write the frm file to disk.
5284
5285 @retval
5286 -1 Table did not exists
5287 @retval
5288 0 Table created ok
5289 @retval
5290 > 0 Error, table existed but could not be created
5291 */
ha_create_table_from_engine(THD * thd,const char * db,const char * name)5292 int ha_create_table_from_engine(THD* thd, const char *db, const char *name)
5293 {
5294 int error;
5295 uchar *frmblob;
5296 size_t frmlen;
5297 char path[FN_REFLEN + 1];
5298 HA_CREATE_INFO create_info;
5299 TABLE table;
5300 TABLE_SHARE share;
5301 DBUG_ENTER("ha_create_table_from_engine");
5302 DBUG_PRINT("enter", ("name '%s'.'%s'", db, name));
5303
5304 if ((error= ha_discover(thd, db, name, &frmblob, &frmlen)))
5305 {
5306 /* Table could not be discovered and thus not created */
5307 DBUG_RETURN(error);
5308 }
5309
5310 /*
5311 Table exists in handler and could be discovered
5312 frmblob and frmlen are set, write the frm to disk
5313 */
5314
5315 build_table_filename(path, sizeof(path) - 1, db, name, "", 0);
5316 // Save the frm file
5317 error= writefrm(path, frmblob, frmlen);
5318 my_free(frmblob);
5319 if (error)
5320 DBUG_RETURN(2);
5321
5322 init_tmp_table_share(thd, &share, db, 0, name, path);
5323 if (open_table_def(thd, &share, 0))
5324 {
5325 DBUG_RETURN(3);
5326 }
5327
5328 #ifdef HAVE_PSI_TABLE_INTERFACE
5329 /*
5330 Table discovery is not instrumented.
5331 Once discovered, the table will be opened normally,
5332 and instrumented normally.
5333 */
5334 #endif
5335
5336 if (open_table_from_share(thd, &share, "" ,0, 0, 0, &table, FALSE))
5337 {
5338 free_table_share(&share);
5339 DBUG_RETURN(3);
5340 }
5341
5342 update_create_info_from_table(&create_info, &table);
5343 create_info.table_options|= HA_OPTION_CREATE_FROM_ENGINE;
5344
5345 get_canonical_filename(table.file, path, path);
5346 error=table.file->ha_create(path, &table, &create_info);
5347 (void) closefrm(&table, 1);
5348
5349 DBUG_RETURN(error != 0);
5350 }
5351
5352
5353 /**
5354 Try to find a table in a storage engine.
5355
5356 @param db Normalized table schema name
5357 @param name Normalized table name.
5358 @param[out] exists Only valid if the function succeeded.
5359
5360 @retval TRUE An error is found
5361 @retval FALSE Success, check *exists
5362 */
5363
5364 bool
ha_check_if_table_exists(THD * thd,const char * db,const char * name,bool * exists)5365 ha_check_if_table_exists(THD* thd, const char *db, const char *name,
5366 bool *exists)
5367 {
5368 uchar *frmblob= NULL;
5369 size_t frmlen;
5370 DBUG_ENTER("ha_check_if_table_exists");
5371
5372 *exists= ! ha_discover(thd, db, name, &frmblob, &frmlen);
5373 if (*exists)
5374 my_free(frmblob);
5375
5376 DBUG_RETURN(FALSE);
5377 }
5378
5379
5380 /**
5381 @brief Check if a given table is a user table or a valid system table or
5382 a valid system table that a SE supports.
5383
5384 @param hton Handlerton of new engine.
5385 @param db Database name.
5386 @param table_name Table name to be checked.
5387
5388 @retval st_sys_tbl_chk_params::enum_status
5389 */
5390 static st_sys_tbl_chk_params::enum_status
ha_get_system_table_check_status(handlerton * hton,const char * db,const char * table_name)5391 ha_get_system_table_check_status(handlerton *hton, const char *db,
5392 const char *table_name)
5393 {
5394 DBUG_ENTER("ha_get_system_table_check_status");
5395 st_sys_tbl_chk_params check_params;
5396 check_params.status= st_sys_tbl_chk_params::USER_TABLE;
5397 bool is_system_database= false;
5398 const char **names;
5399 st_handler_tablename *systab;
5400
5401 // Check if we have a system database name in the command.
5402 assert(known_system_databases != NULL);
5403 names= known_system_databases;
5404 while (names && *names)
5405 {
5406 if (strcmp(*names, db) == 0)
5407 {
5408 /* Used to compare later, will be faster */
5409 check_params.db= *names;
5410 is_system_database= true;
5411 break;
5412 }
5413 names++;
5414 }
5415 if (!is_system_database)
5416 DBUG_RETURN(st_sys_tbl_chk_params::USER_TABLE);
5417
5418 // Check if this is SQL layer system tables.
5419 systab= mysqld_system_tables;
5420 check_params.is_sql_layer_system_table= false;
5421 while (systab && systab->db)
5422 {
5423 if (systab->db == check_params.db &&
5424 strcmp(systab->tablename, table_name) == 0)
5425 {
5426 check_params.is_sql_layer_system_table= true;
5427 break;
5428 }
5429 systab++;
5430 }
5431
5432 // Check if this is a system table and if some engine supports it.
5433 check_params.status= check_params.is_sql_layer_system_table ?
5434 st_sys_tbl_chk_params::SYSTEM_TABLE :
5435 st_sys_tbl_chk_params::USER_TABLE;
5436 check_params.db_type= hton->db_type;
5437 check_params.table_name= table_name;
5438 plugin_foreach(NULL, check_engine_system_table_handlerton,
5439 MYSQL_STORAGE_ENGINE_PLUGIN, &check_params);
5440
5441 DBUG_RETURN(check_params.status);
5442 }
5443
5444
5445 /**
5446 @brief Check if a given table is a system table supported by a SE.
5447
5448 @todo There is another function called is_system_table_name() used by
5449 get_table_category(), which is used to set TABLE_SHARE table_category.
5450 It checks only a subset of table name like proc, event and time*.
5451 We cannot use below function in get_table_category(),
5452 as that affects locking mechanism. If we need to
5453 unify these functions, we need to fix locking issues generated.
5454
5455 @param hton Handlerton of new engine.
5456 @param db Database name.
5457 @param table_name Table name to be checked.
5458
5459 @return Operation status
5460 @retval true If the table name is a valid system table
5461 that is supported by a SE.
5462
5463 @retval false Not a system table.
5464 */
ha_is_supported_system_table(handlerton * hton,const char * db,const char * table_name)5465 bool ha_is_supported_system_table(handlerton *hton, const char *db,
5466 const char *table_name)
5467 {
5468 DBUG_ENTER("ha_is_supported_system_table");
5469 st_sys_tbl_chk_params::enum_status status=
5470 ha_get_system_table_check_status(hton, db, table_name);
5471
5472 // It's a valid SE supported system table.
5473 DBUG_RETURN(status == st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE);
5474 }
5475
5476
5477 /**
5478 @brief Check if a given table is a system table that belongs
5479 to some SE or a user table.
5480
5481 @details The primary purpose of introducing this function is to stop system
5482 tables to be created or being moved to undesired storage engines.
5483
5484 @todo There is another function called is_system_table_name() used by
5485 get_table_category(), which is used to set TABLE_SHARE table_category.
5486 It checks only a subset of table name like proc, event and time*.
5487 We cannot use below function in get_table_category(),
5488 as that affects locking mechanism. If we need to
5489 unify these functions, we need to fix locking issues generated.
5490
5491 @param hton Handlerton of new engine.
5492 @param db Database name.
5493 @param table_name Table name to be checked.
5494
5495 @return Operation status
5496 @retval true If the table name is a valid system table
5497 or if its a valid user table.
5498
5499 @retval false If the table name is a system table name
5500 and does not belong to engine specified
5501 in the command.
5502 */
ha_is_valid_system_or_user_table(handlerton * hton,const char * db,const char * table_name)5503 bool ha_is_valid_system_or_user_table(handlerton *hton, const char *db,
5504 const char *table_name)
5505 {
5506 DBUG_ENTER("ha_is_valid_system_or_user_table");
5507
5508 st_sys_tbl_chk_params::enum_status status=
5509 ha_get_system_table_check_status(hton, db, table_name);
5510
5511 // It's a user table or a valid SE supported system table.
5512 DBUG_RETURN(status == st_sys_tbl_chk_params::USER_TABLE ||
5513 status == st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE);
5514 }
5515
5516
5517 /**
5518 @brief Called for each SE to check if given db, tablename is a system table.
5519
5520 @details The primary purpose of introducing this function is to stop system
5521 tables to be created or being moved to undesired storage engines.
5522
5523 @param unused unused THD*
5524 @param plugin Points to specific SE.
5525 @param arg Is of type struct st_sys_tbl_chk_params.
5526
5527 @note
5528 args->status Indicates OUT param,
5529 see struct st_sys_tbl_chk_params definition for more info.
5530
5531 @return Operation status
5532 @retval true There was a match found.
5533 This will stop doing checks with other SE's.
5534
5535 @retval false There was no match found.
5536 Other SE's will be checked to find a match.
5537 */
check_engine_system_table_handlerton(THD * unused,plugin_ref plugin,void * arg)5538 static my_bool check_engine_system_table_handlerton(THD *unused,
5539 plugin_ref plugin,
5540 void *arg)
5541 {
5542 st_sys_tbl_chk_params *check_params= (st_sys_tbl_chk_params*) arg;
5543 handlerton *hton= plugin_data<handlerton*>(plugin);
5544
5545 // Do we already know that the table is a system table?
5546 if (check_params->status == st_sys_tbl_chk_params::SYSTEM_TABLE)
5547 {
5548 /*
5549 If this is the same SE specified in the command, we can
5550 simply ask the SE if it supports it stop the search regardless.
5551 */
5552 if (hton->db_type == check_params->db_type)
5553 {
5554 if (hton->is_supported_system_table &&
5555 hton->is_supported_system_table(check_params->db,
5556 check_params->table_name,
5557 check_params->is_sql_layer_system_table))
5558 check_params->status=
5559 st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE;
5560 return TRUE;
5561 }
5562 /*
5563 If this is a different SE, there is no point in asking the SE
5564 since we already know it's a system table and we don't care
5565 if it is supported or not.
5566 */
5567 return FALSE;
5568 }
5569
5570 /*
5571 We don't yet know if the table is a system table or not.
5572 We therefore must always ask the SE.
5573 */
5574 if (hton->is_supported_system_table &&
5575 hton->is_supported_system_table(check_params->db,
5576 check_params->table_name,
5577 check_params->is_sql_layer_system_table))
5578 {
5579 /*
5580 If this is the same SE specified in the command, we know it's a
5581 supported system table and can stop the search.
5582 */
5583 if (hton->db_type == check_params->db_type)
5584 {
5585 check_params->status= st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE;
5586 return TRUE;
5587 }
5588 else
5589 check_params->status= st_sys_tbl_chk_params::SYSTEM_TABLE;
5590 }
5591
5592 return FALSE;
5593 }
5594
5595 /*
5596 Prepare list of all known system database names
5597 current we just have 'mysql' as system database name.
5598
5599 Later ndbcluster, innodb SE's can define some new database
5600 name which can store system tables specific to SE.
5601 */
ha_known_system_databases(void)5602 const char** ha_known_system_databases(void)
5603 {
5604 list<const char*> found_databases;
5605 const char **databases, **database;
5606
5607 // Get mysqld system database name.
5608 found_databases.push_back((char*) mysqld_system_database);
5609
5610 // Get system database names from every specific storage engine.
5611 plugin_foreach(NULL, system_databases_handlerton,
5612 MYSQL_STORAGE_ENGINE_PLUGIN, &found_databases);
5613
5614 databases= (const char **) my_once_alloc(sizeof(char *)*
5615 (found_databases.size()+1),
5616 MYF(MY_WME | MY_FAE));
5617 assert(databases != NULL);
5618
5619 list<const char*>::iterator it;
5620 database= databases;
5621 for (it= found_databases.begin(); it != found_databases.end(); it++)
5622 *database++= *it;
5623 *database= 0; // Last element.
5624
5625 return databases;
5626 }
5627
5628 /**
5629 @brief Fetch system database name specific to SE.
5630
5631 @details This function is invoked by plugin_foreach() from
5632 ha_known_system_databases(), for each storage engine.
5633 */
system_databases_handlerton(THD * unused,plugin_ref plugin,void * arg)5634 static my_bool system_databases_handlerton(THD *unused, plugin_ref plugin,
5635 void *arg)
5636 {
5637 list<const char*> *found_databases= (list<const char*> *) arg;
5638 const char *db;
5639
5640 handlerton *hton= plugin_data<handlerton*>(plugin);
5641 if (hton->system_database)
5642 {
5643 db= hton->system_database();
5644 if (db)
5645 found_databases->push_back(db);
5646 }
5647
5648 return FALSE;
5649 }
5650
init()5651 void st_ha_check_opt::init()
5652 {
5653 flags= sql_flags= 0;
5654 }
5655
5656
5657 /*****************************************************************************
5658 Key cache handling.
5659
5660 This code is only relevant for ISAM/MyISAM tables
5661
5662 key_cache->cache may be 0 only in the case where a key cache is not
5663 initialized or when we where not able to init the key cache in a previous
5664 call to ha_init_key_cache() (probably out of memory)
5665 *****************************************************************************/
5666
5667 /**
5668 Init a key cache if it has not been initied before.
5669 */
ha_init_key_cache(const char * name,KEY_CACHE * key_cache)5670 int ha_init_key_cache(const char *name, KEY_CACHE *key_cache)
5671 {
5672 DBUG_ENTER("ha_init_key_cache");
5673
5674 if (!key_cache->key_cache_inited)
5675 {
5676 mysql_mutex_lock(&LOCK_global_system_variables);
5677 size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5678 ulonglong tmp_block_size= key_cache->param_block_size;
5679 ulonglong division_limit= key_cache->param_division_limit;
5680 ulonglong age_threshold= key_cache->param_age_threshold;
5681 mysql_mutex_unlock(&LOCK_global_system_variables);
5682 DBUG_RETURN(!init_key_cache(key_cache,
5683 tmp_block_size,
5684 tmp_buff_size,
5685 division_limit, age_threshold));
5686 }
5687 DBUG_RETURN(0);
5688 }
5689
5690
5691 /**
5692 Resize key cache.
5693 */
ha_resize_key_cache(KEY_CACHE * key_cache)5694 int ha_resize_key_cache(KEY_CACHE *key_cache)
5695 {
5696 DBUG_ENTER("ha_resize_key_cache");
5697
5698 if (key_cache->key_cache_inited)
5699 {
5700 mysql_mutex_lock(&LOCK_global_system_variables);
5701 size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5702 ulonglong tmp_block_size= key_cache->param_block_size;
5703 ulonglong division_limit= key_cache->param_division_limit;
5704 ulonglong age_threshold= key_cache->param_age_threshold;
5705 mysql_mutex_unlock(&LOCK_global_system_variables);
5706 const int retval= resize_key_cache(key_cache,
5707 keycache_thread_var(),
5708 tmp_block_size,
5709 tmp_buff_size,
5710 division_limit, age_threshold);
5711 DBUG_RETURN(!retval);
5712 }
5713 DBUG_RETURN(0);
5714 }
5715
5716
5717 /**
5718 Move all tables from one key cache to another one.
5719 */
ha_change_key_cache(KEY_CACHE * old_key_cache,KEY_CACHE * new_key_cache)5720 int ha_change_key_cache(KEY_CACHE *old_key_cache,
5721 KEY_CACHE *new_key_cache)
5722 {
5723 mi_change_key_cache(old_key_cache, new_key_cache);
5724 return 0;
5725 }
5726
5727
5728 /**
5729 Try to discover one table from handler(s).
5730
5731 @retval
5732 -1 Table did not exists
5733 @retval
5734 0 OK. In this case *frmblob and *frmlen are set
5735 @retval
5736 >0 error. frmblob and frmlen may not be set
5737 */
5738 struct st_discover_args
5739 {
5740 const char *db;
5741 const char *name;
5742 uchar **frmblob;
5743 size_t *frmlen;
5744 };
5745
discover_handlerton(THD * thd,plugin_ref plugin,void * arg)5746 static my_bool discover_handlerton(THD *thd, plugin_ref plugin,
5747 void *arg)
5748 {
5749 st_discover_args *vargs= (st_discover_args *)arg;
5750 handlerton *hton= plugin_data<handlerton*>(plugin);
5751 if (hton->state == SHOW_OPTION_YES && hton->discover &&
5752 (!(hton->discover(hton, thd, vargs->db, vargs->name,
5753 vargs->frmblob,
5754 vargs->frmlen))))
5755 return TRUE;
5756
5757 return FALSE;
5758 }
5759
ha_discover(THD * thd,const char * db,const char * name,uchar ** frmblob,size_t * frmlen)5760 int ha_discover(THD *thd, const char *db, const char *name,
5761 uchar **frmblob, size_t *frmlen)
5762 {
5763 int error= -1; // Table does not exist in any handler
5764 DBUG_ENTER("ha_discover");
5765 DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5766 st_discover_args args= {db, name, frmblob, frmlen};
5767
5768 if (is_prefix(name,tmp_file_prefix)) /* skip temporary tables */
5769 DBUG_RETURN(error);
5770
5771 if (plugin_foreach(thd, discover_handlerton,
5772 MYSQL_STORAGE_ENGINE_PLUGIN, &args))
5773 error= 0;
5774
5775 if (!error)
5776 {
5777 assert(!thd->status_var_aggregated);
5778 thd->status_var.ha_discover_count++;
5779 }
5780 DBUG_RETURN(error);
5781 }
5782
5783
5784 /**
5785 Call this function in order to give the handler the possiblity
5786 to ask engine if there are any new tables that should be written to disk
5787 or any dropped tables that need to be removed from disk
5788 */
5789 struct st_find_files_args
5790 {
5791 const char *db;
5792 const char *path;
5793 const char *wild;
5794 bool dir;
5795 List<LEX_STRING> *files;
5796 };
5797
find_files_handlerton(THD * thd,plugin_ref plugin,void * arg)5798 static my_bool find_files_handlerton(THD *thd, plugin_ref plugin,
5799 void *arg)
5800 {
5801 st_find_files_args *vargs= (st_find_files_args *)arg;
5802 handlerton *hton= plugin_data<handlerton*>(plugin);
5803
5804
5805 if (hton->state == SHOW_OPTION_YES && hton->find_files)
5806 if (hton->find_files(hton, thd, vargs->db, vargs->path, vargs->wild,
5807 vargs->dir, vargs->files))
5808 return TRUE;
5809
5810 return FALSE;
5811 }
5812
5813 int
ha_find_files(THD * thd,const char * db,const char * path,const char * wild,bool dir,List<LEX_STRING> * files)5814 ha_find_files(THD *thd,const char *db,const char *path,
5815 const char *wild, bool dir, List<LEX_STRING> *files)
5816 {
5817 int error= 0;
5818 DBUG_ENTER("ha_find_files");
5819 DBUG_PRINT("enter", ("db: '%s' path: '%s' wild: '%s' dir: %d",
5820 db, path, wild ? wild : "NULL", dir));
5821 st_find_files_args args= {db, path, wild, dir, files};
5822
5823 plugin_foreach(thd, find_files_handlerton,
5824 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5825 /* The return value is not currently used */
5826 DBUG_RETURN(error);
5827 }
5828
5829 /**
5830 Ask handler if the table exists in engine.
5831 @retval
5832 HA_ERR_NO_SUCH_TABLE Table does not exist
5833 @retval
5834 HA_ERR_TABLE_EXIST Table exists
5835 @retval
5836 \# Error code
5837 */
5838 struct st_table_exists_in_engine_args
5839 {
5840 const char *db;
5841 const char *name;
5842 int err;
5843 };
5844
table_exists_in_engine_handlerton(THD * thd,plugin_ref plugin,void * arg)5845 static my_bool table_exists_in_engine_handlerton(THD *thd, plugin_ref plugin,
5846 void *arg)
5847 {
5848 st_table_exists_in_engine_args *vargs= (st_table_exists_in_engine_args *)arg;
5849 handlerton *hton= plugin_data<handlerton*>(plugin);
5850
5851 int err= HA_ERR_NO_SUCH_TABLE;
5852
5853 if (hton->state == SHOW_OPTION_YES && hton->table_exists_in_engine)
5854 err = hton->table_exists_in_engine(hton, thd, vargs->db, vargs->name);
5855
5856 vargs->err = err;
5857 if (vargs->err == HA_ERR_TABLE_EXIST)
5858 return TRUE;
5859
5860 return FALSE;
5861 }
5862
ha_table_exists_in_engine(THD * thd,const char * db,const char * name)5863 int ha_table_exists_in_engine(THD* thd, const char* db, const char* name)
5864 {
5865 DBUG_ENTER("ha_table_exists_in_engine");
5866 DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5867 st_table_exists_in_engine_args args= {db, name, HA_ERR_NO_SUCH_TABLE};
5868 plugin_foreach(thd, table_exists_in_engine_handlerton,
5869 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5870 DBUG_PRINT("exit", ("error: %d", args.err));
5871 DBUG_RETURN(args.err);
5872 }
5873
5874 /**
5875 Prepare (sub-) sequences of joins in this statement
5876 which may be pushed to each storage engine for execution.
5877 */
5878 struct st_make_pushed_join_args
5879 {
5880 const AQP::Join_plan* plan; // Query plan provided by optimizer
5881 int err; // Error code to return.
5882 };
5883
make_pushed_join_handlerton(THD * thd,plugin_ref plugin,void * arg)5884 static my_bool make_pushed_join_handlerton(THD *thd, plugin_ref plugin,
5885 void *arg)
5886 {
5887 st_make_pushed_join_args *vargs= (st_make_pushed_join_args *)arg;
5888 handlerton *hton= plugin_data<handlerton*>(plugin);
5889
5890 if (hton && hton->make_pushed_join)
5891 {
5892 const int error= hton->make_pushed_join(hton, thd, vargs->plan);
5893 if (unlikely(error))
5894 {
5895 vargs->err = error;
5896 return TRUE;
5897 }
5898 }
5899 return FALSE;
5900 }
5901
ha_make_pushed_joins(THD * thd,const AQP::Join_plan * plan)5902 int ha_make_pushed_joins(THD *thd, const AQP::Join_plan* plan)
5903 {
5904 DBUG_ENTER("ha_make_pushed_joins");
5905 st_make_pushed_join_args args= {plan, 0};
5906 plugin_foreach(thd, make_pushed_join_handlerton,
5907 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5908 DBUG_PRINT("exit", ("error: %d", args.err));
5909 DBUG_RETURN(args.err);
5910 }
5911
5912 /*
5913 TODO: change this into a dynamic struct
5914 List<handlerton> does not work as
5915 1. binlog_end is called when MEM_ROOT is gone
5916 2. cannot work with thd MEM_ROOT as memory should be freed
5917 */
5918 #define MAX_HTON_LIST_ST 63
5919 struct hton_list_st
5920 {
5921 handlerton *hton[MAX_HTON_LIST_ST];
5922 uint sz;
5923 };
5924
5925 struct binlog_func_st
5926 {
5927 enum_binlog_func fn;
5928 void *arg;
5929 };
5930
5931 /** @brief
5932 Listing handlertons first to avoid recursive calls and deadlock
5933 */
binlog_func_list(THD * thd,plugin_ref plugin,void * arg)5934 static my_bool binlog_func_list(THD *thd, plugin_ref plugin, void *arg)
5935 {
5936 hton_list_st *hton_list= (hton_list_st *)arg;
5937 handlerton *hton= plugin_data<handlerton*>(plugin);
5938 if (hton->state == SHOW_OPTION_YES && hton->binlog_func)
5939 {
5940 uint sz= hton_list->sz;
5941 if (sz == MAX_HTON_LIST_ST-1)
5942 {
5943 /* list full */
5944 return FALSE;
5945 }
5946 hton_list->hton[sz]= hton;
5947 hton_list->sz= sz+1;
5948 }
5949 return FALSE;
5950 }
5951
binlog_func_foreach(THD * thd,binlog_func_st * bfn)5952 static my_bool binlog_func_foreach(THD *thd, binlog_func_st *bfn)
5953 {
5954 hton_list_st hton_list;
5955 uint i, sz;
5956
5957 hton_list.sz= 0;
5958 plugin_foreach(thd, binlog_func_list,
5959 MYSQL_STORAGE_ENGINE_PLUGIN, &hton_list);
5960
5961 for (i= 0, sz= hton_list.sz; i < sz ; i++)
5962 hton_list.hton[i]->binlog_func(hton_list.hton[i], thd, bfn->fn, bfn->arg);
5963 return FALSE;
5964 }
5965
5966
ha_reset_logs(THD * thd)5967 int ha_reset_logs(THD *thd)
5968 {
5969 binlog_func_st bfn= {BFN_RESET_LOGS, 0};
5970 binlog_func_foreach(thd, &bfn);
5971 return 0;
5972 }
5973
ha_reset_slave(THD * thd)5974 void ha_reset_slave(THD* thd)
5975 {
5976 binlog_func_st bfn= {BFN_RESET_SLAVE, 0};
5977 binlog_func_foreach(thd, &bfn);
5978 }
5979
ha_binlog_wait(THD * thd)5980 void ha_binlog_wait(THD* thd)
5981 {
5982 binlog_func_st bfn= {BFN_BINLOG_WAIT, 0};
5983 binlog_func_foreach(thd, &bfn);
5984 }
5985
ha_binlog_index_purge_file(THD * thd,const char * file)5986 int ha_binlog_index_purge_file(THD *thd, const char *file)
5987 {
5988 binlog_func_st bfn= {BFN_BINLOG_PURGE_FILE, (void *)file};
5989 binlog_func_foreach(thd, &bfn);
5990 return 0;
5991 }
5992
5993 struct binlog_log_query_st
5994 {
5995 enum_binlog_command binlog_command;
5996 const char *query;
5997 size_t query_length;
5998 const char *db;
5999 const char *table_name;
6000 };
6001
binlog_log_query_handlerton2(THD * thd,handlerton * hton,void * args)6002 static my_bool binlog_log_query_handlerton2(THD *thd,
6003 handlerton *hton,
6004 void *args)
6005 {
6006 struct binlog_log_query_st *b= (struct binlog_log_query_st*)args;
6007 if (hton->state == SHOW_OPTION_YES && hton->binlog_log_query)
6008 hton->binlog_log_query(hton, thd,
6009 b->binlog_command,
6010 b->query,
6011 b->query_length,
6012 b->db,
6013 b->table_name);
6014 return FALSE;
6015 }
6016
binlog_log_query_handlerton(THD * thd,plugin_ref plugin,void * args)6017 static my_bool binlog_log_query_handlerton(THD *thd,
6018 plugin_ref plugin,
6019 void *args)
6020 {
6021 return binlog_log_query_handlerton2(thd,
6022 plugin_data<handlerton*>(plugin), args);
6023 }
6024
ha_binlog_log_query(THD * thd,handlerton * hton,enum_binlog_command binlog_command,const char * query,size_t query_length,const char * db,const char * table_name)6025 void ha_binlog_log_query(THD *thd, handlerton *hton,
6026 enum_binlog_command binlog_command,
6027 const char *query, size_t query_length,
6028 const char *db, const char *table_name)
6029 {
6030 struct binlog_log_query_st b;
6031 b.binlog_command= binlog_command;
6032 b.query= query;
6033 b.query_length= query_length;
6034 b.db= db;
6035 b.table_name= table_name;
6036 if (hton == 0)
6037 plugin_foreach(thd, binlog_log_query_handlerton,
6038 MYSQL_STORAGE_ENGINE_PLUGIN, &b);
6039 else
6040 binlog_log_query_handlerton2(thd, hton, &b);
6041 }
6042
ha_binlog_end(THD * thd)6043 int ha_binlog_end(THD* thd)
6044 {
6045 binlog_func_st bfn= {BFN_BINLOG_END, 0};
6046 binlog_func_foreach(thd, &bfn);
6047 return 0;
6048 }
6049
6050 /**
6051 Calculate cost of 'index only' scan for given index and number of records
6052
6053 @param keynr Index number
6054 @param records Estimated number of records to be retrieved
6055
6056 @note
6057 It is assumed that we will read trough the whole key range and that all
6058 key blocks are half full (normally things are much better). It is also
6059 assumed that each time we read the next key from the index, the handler
6060 performs a random seek, thus the cost is proportional to the number of
6061 blocks read.
6062
6063 @return
6064 Estimated cost of 'index only' scan
6065 */
6066
index_only_read_time(uint keynr,double records)6067 double handler::index_only_read_time(uint keynr, double records)
6068 {
6069 double read_time;
6070 uint keys_per_block= (stats.block_size/2/
6071 (table_share->key_info[keynr].key_length + ref_length) +
6072 1);
6073 read_time=((double) (records + keys_per_block-1) /
6074 (double) keys_per_block);
6075 return read_time;
6076 }
6077
6078
table_in_memory_estimate() const6079 double handler::table_in_memory_estimate() const
6080 {
6081 assert(stats.table_in_mem_estimate == IN_MEMORY_ESTIMATE_UNKNOWN ||
6082 (stats.table_in_mem_estimate >= 0.0 &&
6083 stats.table_in_mem_estimate <= 1.0));
6084
6085 /*
6086 If the storage engine has supplied information about how much of the
6087 table that is currently in a memory buffer, then use this estimate.
6088 */
6089 if (stats.table_in_mem_estimate != IN_MEMORY_ESTIMATE_UNKNOWN)
6090 return stats.table_in_mem_estimate;
6091
6092 /*
6093 The storage engine has not provided any information about how much of
6094 this index is in memory, use an heuristic to produce an estimate.
6095 */
6096 return estimate_in_memory_buffer(stats.data_file_length);
6097 }
6098
6099
index_in_memory_estimate(uint keyno) const6100 double handler::index_in_memory_estimate(uint keyno) const
6101 {
6102 const KEY *key= &table->key_info[keyno];
6103
6104 /*
6105 If the storage engine has supplied information about how much of the
6106 index that is currently in a memory buffer, then use this estimate.
6107 */
6108 const double est= key->in_memory_estimate();
6109 if (est != IN_MEMORY_ESTIMATE_UNKNOWN)
6110 return est;
6111
6112 /*
6113 The storage engine has not provided any information about how much of
6114 this index is in memory, use an heuristic to produce an estimate.
6115 */
6116 ulonglong file_length;
6117
6118 /*
6119 If the index is a clustered primary index, then use the data file
6120 size as estimate for how large the index is.
6121 */
6122 if (keyno == table->s->primary_key && primary_key_is_clustered())
6123 file_length= stats.data_file_length;
6124 else
6125 file_length= stats.index_file_length;
6126
6127 return estimate_in_memory_buffer(file_length);
6128 }
6129
6130
estimate_in_memory_buffer(ulonglong table_index_size) const6131 double handler::estimate_in_memory_buffer(ulonglong table_index_size) const
6132 {
6133 /*
6134 The storage engine has not provided any information about how much of
6135 the table/index is in memory. In this case we use a heuristic:
6136
6137 - if the size of the table/index is less than 20 percent (pick any
6138 number) of the memory buffer, then the entire table/index is likely in
6139 memory.
6140 - if the size of the table/index is larger than the memory buffer, then
6141 assume nothing of the table/index is in memory.
6142 - if the size of the table/index is larger than 20 percent but less than
6143 the memory buffer size, then use a linear function of the table/index
6144 size that goes from 1.0 to 0.0.
6145 */
6146
6147 /*
6148 If the storage engine has information about the size of its
6149 memory buffer, then use this. Otherwise, assume that at least 100 MB
6150 of data can be chached in memory.
6151 */
6152 longlong memory_buf_size= get_memory_buffer_size();
6153 if (memory_buf_size <= 0)
6154 memory_buf_size= 100 * 1024 * 1024; // 100 MB
6155
6156 /*
6157 Upper limit for the relative size of a table to be considered
6158 entirely available in a memory buffer. If the actual table size is
6159 less than this we assume it is complete cached in a memory buffer.
6160 */
6161 const double table_index_in_memory_limit= 0.2;
6162
6163 /*
6164 Estimate for how much of the total memory buffer this table/index
6165 can occupy.
6166 */
6167 const double percent_of_mem= static_cast<double>(table_index_size) /
6168 memory_buf_size;
6169
6170 double in_mem_est;
6171
6172 if (percent_of_mem < table_index_in_memory_limit) // Less than 20 percent
6173 in_mem_est= 1.0;
6174 else if (percent_of_mem > 1.0) // Larger than buffer
6175 in_mem_est= 0.0;
6176 else
6177 {
6178 /*
6179 The size of the table/index is larger than
6180 "table_index_in_memory_limit" * "memory_buf_size" but less than
6181 the total size of the memory buffer.
6182 */
6183 in_mem_est= 1.0 - (percent_of_mem - table_index_in_memory_limit) /
6184 (1.0 - table_index_in_memory_limit);
6185 }
6186 assert(in_mem_est >= 0.0 && in_mem_est <= 1.0);
6187
6188 return in_mem_est;
6189 }
6190
6191
table_scan_cost()6192 Cost_estimate handler::table_scan_cost()
6193 {
6194 /*
6195 This function returns a Cost_estimate object. The function should be
6196 implemented in a way that allows the compiler to use "return value
6197 optimization" to avoid creating the temporary object for the return value
6198 and use of the copy constructor.
6199 */
6200
6201 const double io_cost= scan_time() * table->cost_model()->page_read_cost(1.0);
6202 Cost_estimate cost;
6203 cost.add_io(io_cost);
6204 return cost;
6205 }
6206
6207
index_scan_cost(uint index,double ranges,double rows)6208 Cost_estimate handler::index_scan_cost(uint index, double ranges, double rows)
6209 {
6210 /*
6211 This function returns a Cost_estimate object. The function should be
6212 implemented in a way that allows the compiler to use "return value
6213 optimization" to avoid creating the temporary object for the return value
6214 and use of the copy constructor.
6215 */
6216
6217 assert(ranges >= 0.0);
6218 assert(rows >= 0.0);
6219
6220 const double io_cost= index_only_read_time(index, rows) *
6221 table->cost_model()->page_read_cost_index(index, 1.0);
6222 Cost_estimate cost;
6223 cost.add_io(io_cost);
6224 return cost;
6225 }
6226
6227
read_cost(uint index,double ranges,double rows)6228 Cost_estimate handler::read_cost(uint index, double ranges, double rows)
6229 {
6230 /*
6231 This function returns a Cost_estimate object. The function should be
6232 implemented in a way that allows the compiler to use "return value
6233 optimization" to avoid creating the temporary object for the return value
6234 and use of the copy constructor.
6235 */
6236
6237 assert(ranges >= 0.0);
6238 assert(rows >= 0.0);
6239
6240 const double io_cost= read_time(index, static_cast<uint>(ranges),
6241 static_cast<ha_rows>(rows)) *
6242 table->cost_model()->page_read_cost(1.0);
6243 Cost_estimate cost;
6244 cost.add_io(io_cost);
6245 return cost;
6246 }
6247
6248
6249 /**
6250 Check if key has partially-covered columns
6251
6252 We can't use DS-MRR to perform range scans when the ranges are over
6253 partially-covered keys, because we'll not have full key part values
6254 (we'll have their prefixes from the index) and will not be able to check
6255 if we've reached the end the range.
6256
6257 @param keyno Key to check
6258
6259 @todo
6260 Allow use of DS-MRR in cases where the index has partially-covered
6261 components but they are not used for scanning.
6262
6263 @retval TRUE Yes
6264 @retval FALSE No
6265 */
6266
key_uses_partial_cols(TABLE * table,uint keyno)6267 bool key_uses_partial_cols(TABLE *table, uint keyno)
6268 {
6269 KEY_PART_INFO *kp= table->key_info[keyno].key_part;
6270 KEY_PART_INFO *kp_end= kp + table->key_info[keyno].user_defined_key_parts;
6271 for (; kp != kp_end; kp++)
6272 {
6273 if (!kp->field->part_of_key.is_set(keyno))
6274 return TRUE;
6275 }
6276 return FALSE;
6277 }
6278
6279 /****************************************************************************
6280 * Default MRR implementation (MRR to non-MRR converter)
6281 ***************************************************************************/
6282
6283 /**
6284 Get cost and other information about MRR scan over a known list of ranges
6285
6286 Calculate estimated cost and other information about an MRR scan for given
6287 sequence of ranges.
6288
6289 @param keyno Index number
6290 @param seq Range sequence to be traversed
6291 @param seq_init_param First parameter for seq->init()
6292 @param n_ranges_arg Number of ranges in the sequence, or 0 if the caller
6293 can't efficiently determine it
6294 @param bufsz[in,out] IN: Size of the buffer available for use
6295 OUT: Size of the buffer that is expected to be actually
6296 used, or 0 if buffer is not needed.
6297 @param flags[in,out] A combination of HA_MRR_* flags
6298 @param cost[out] Estimated cost of MRR access
6299
6300 @note
6301 This method (or an overriding one in a derived class) must check for
6302 thd->killed and return HA_POS_ERROR if it is not zero. This is required
6303 for a user to be able to interrupt the calculation by killing the
6304 connection/query.
6305
6306 @retval
6307 HA_POS_ERROR Error or the engine is unable to perform the requested
6308 scan. Values of OUT parameters are undefined.
6309 @retval
6310 other OK, *cost contains cost of the scan, *bufsz and *flags
6311 contain scan parameters.
6312 */
6313
6314 ha_rows
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges_arg,uint * bufsz,uint * flags,Cost_estimate * cost)6315 handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
6316 void *seq_init_param, uint n_ranges_arg,
6317 uint *bufsz, uint *flags,
6318 Cost_estimate *cost)
6319 {
6320 KEY_MULTI_RANGE range;
6321 range_seq_t seq_it;
6322 ha_rows rows, total_rows= 0;
6323 uint n_ranges=0;
6324 THD *thd= current_thd;
6325
6326 /* Default MRR implementation doesn't need buffer */
6327 *bufsz= 0;
6328
6329 DBUG_EXECUTE_IF("bug13822652_2", thd->killed= THD::KILL_QUERY;);
6330
6331 seq_it= seq->init(seq_init_param, n_ranges, *flags);
6332 while (!seq->next(seq_it, &range))
6333 {
6334 if (unlikely(thd->killed != 0))
6335 return HA_POS_ERROR;
6336
6337 n_ranges++;
6338 key_range *min_endp, *max_endp;
6339 if (range.range_flag & GEOM_FLAG)
6340 {
6341 min_endp= &range.start_key;
6342 max_endp= NULL;
6343 }
6344 else
6345 {
6346 min_endp= range.start_key.length? &range.start_key : NULL;
6347 max_endp= range.end_key.length? &range.end_key : NULL;
6348 }
6349 /*
6350 Get the number of rows in the range. This is done by calling
6351 records_in_range() unless:
6352
6353 1) The range is an equality range and the index is unique.
6354 There cannot be more than one matching row, so 1 is
6355 assumed. Note that it is possible that the correct number
6356 is actually 0, so the row estimate may be too high in this
6357 case. Also note: ranges of the form "x IS NULL" may have more
6358 than 1 mathing row so records_in_range() is called for these.
6359 2) a) The range is an equality range but the index is either
6360 not unique or all of the keyparts are not used.
6361 b) The user has requested that index statistics should be used
6362 for equality ranges to avoid the incurred overhead of
6363 index dives in records_in_range().
6364 c) Index statistics is available.
6365 Ranges of the form "x IS NULL" will not use index statistics
6366 because the number of rows with this value are likely to be
6367 very different than the values in the index statistics.
6368 */
6369 int keyparts_used= 0;
6370 if ((range.range_flag & UNIQUE_RANGE) && // 1)
6371 !(range.range_flag & NULL_RANGE))
6372 rows= 1; /* there can be at most one row */
6373 else if ((range.range_flag & EQ_RANGE) && // 2a)
6374 (range.range_flag & USE_INDEX_STATISTICS) && // 2b)
6375 (keyparts_used= my_count_bits(range.start_key.keypart_map)) &&
6376 table->
6377 key_info[keyno].has_records_per_key(keyparts_used-1) && // 2c)
6378 !(range.range_flag & NULL_RANGE))
6379 {
6380 rows= static_cast<ha_rows>(
6381 table->key_info[keyno].records_per_key(keyparts_used - 1));
6382 }
6383 else
6384 {
6385 DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););
6386 assert(min_endp || max_endp);
6387 if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
6388 max_endp)))
6389 {
6390 /* Can't scan one range => can't do MRR scan at all */
6391 total_rows= HA_POS_ERROR;
6392 break;
6393 }
6394 }
6395 total_rows += rows;
6396 }
6397
6398 if (total_rows != HA_POS_ERROR)
6399 {
6400 const Cost_model_table *const cost_model= table->cost_model();
6401
6402 /* The following calculation is the same as in multi_range_read_info(): */
6403 *flags|= HA_MRR_USE_DEFAULT_IMPL;
6404 *flags|= HA_MRR_SUPPORT_SORTED;
6405
6406 assert(cost->is_zero());
6407 if (*flags & HA_MRR_INDEX_ONLY)
6408 *cost= index_scan_cost(keyno, static_cast<double>(n_ranges),
6409 static_cast<double>(total_rows));
6410 else
6411 *cost= read_cost(keyno, static_cast<double>(n_ranges),
6412 static_cast<double>(total_rows));
6413 cost->add_cpu(cost_model->row_evaluate_cost(
6414 static_cast<double>(total_rows)) + 0.01);
6415 }
6416 return total_rows;
6417 }
6418
6419
6420 /**
6421 Get cost and other information about MRR scan over some sequence of ranges
6422
6423 Calculate estimated cost and other information about an MRR scan for some
6424 sequence of ranges.
6425
6426 The ranges themselves will be known only at execution phase. When this
6427 function is called we only know number of ranges and a (rough) E(#records)
6428 within those ranges.
6429
6430 Currently this function is only called for "n-keypart singlepoint" ranges,
6431 i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
6432
6433 The flags parameter is a combination of those flags: HA_MRR_SORTED,
6434 HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
6435
6436 @param keyno Index number
6437 @param n_ranges Estimated number of ranges (i.e. intervals) in the
6438 range sequence.
6439 @param n_rows Estimated total number of records contained within all
6440 of the ranges
6441 @param bufsz[in,out] IN: Size of the buffer available for use
6442 OUT: Size of the buffer that will be actually used, or
6443 0 if buffer is not needed.
6444 @param flags[in,out] A combination of HA_MRR_* flags
6445 @param cost[out] Estimated cost of MRR access
6446
6447 @retval
6448 0 OK, *cost contains cost of the scan, *bufsz and *flags contain scan
6449 parameters.
6450 @retval
6451 other Error or can't perform the requested scan
6452 */
6453
multi_range_read_info(uint keyno,uint n_ranges,uint n_rows,uint * bufsz,uint * flags,Cost_estimate * cost)6454 ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
6455 uint *bufsz, uint *flags,
6456 Cost_estimate *cost)
6457 {
6458 *bufsz= 0; /* Default implementation doesn't need a buffer */
6459
6460 *flags|= HA_MRR_USE_DEFAULT_IMPL;
6461 *flags|= HA_MRR_SUPPORT_SORTED;
6462
6463 assert(cost->is_zero());
6464
6465 /* Produce the same cost as non-MRR code does */
6466 if (*flags & HA_MRR_INDEX_ONLY)
6467 *cost= index_scan_cost(keyno, n_ranges, n_rows);
6468 else
6469 *cost= read_cost(keyno, n_ranges, n_rows);
6470 return 0;
6471 }
6472
6473
6474 /**
6475 Initialize the MRR scan
6476
6477 Initialize the MRR scan. This function may do heavyweight scan
6478 initialization like row prefetching/sorting/etc (NOTE: but better not do
6479 it here as we may not need it, e.g. if we never satisfy WHERE clause on
6480 previous tables. For many implementations it would be natural to do such
6481 initializations in the first multi_read_range_next() call)
6482
6483 mode is a combination of the following flags: HA_MRR_SORTED,
6484 HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION
6485
6486 @param seq Range sequence to be traversed
6487 @param seq_init_param First parameter for seq->init()
6488 @param n_ranges Number of ranges in the sequence
6489 @param mode Flags, see the description section for the details
6490 @param buf INOUT: memory buffer to be used
6491
6492 @note
6493 One must have called index_init() before calling this function. Several
6494 multi_range_read_init() calls may be made in course of one query.
6495
6496 Until WL#2623 is done (see its text, section 3.2), the following will
6497 also hold:
6498 The caller will guarantee that if "seq->init == mrr_ranges_array_init"
6499 then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
6500 This property will only be used by NDB handler until WL#2623 is done.
6501
6502 Buffer memory management is done according to the following scenario:
6503 The caller allocates the buffer and provides it to the callee by filling
6504 the members of HANDLER_BUFFER structure.
6505 The callee consumes all or some fraction of the provided buffer space, and
6506 sets the HANDLER_BUFFER members accordingly.
6507 The callee may use the buffer memory until the next multi_range_read_init()
6508 call is made, all records have been read, or until index_end() call is
6509 made, whichever comes first.
6510
6511 @retval 0 OK
6512 @retval 1 Error
6513 */
6514
6515 int
multi_range_read_init(RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)6516 handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
6517 uint n_ranges, uint mode, HANDLER_BUFFER *buf)
6518 {
6519 DBUG_ENTER("handler::multi_range_read_init");
6520 mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
6521 mrr_funcs= *seq_funcs;
6522 mrr_is_output_sorted= MY_TEST(mode & HA_MRR_SORTED);
6523 mrr_have_range= FALSE;
6524 DBUG_RETURN(0);
6525 }
6526
6527
6528 /**
6529 Get next record in MRR scan
6530
6531 Default MRR implementation: read the next record
6532
6533 @param range_info OUT Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
6534 Otherwise, the opaque value associated with the range
6535 that contains the returned record.
6536
6537 @retval 0 OK
6538 @retval other Error code
6539 */
6540
multi_range_read_next(char ** range_info)6541 int handler::multi_range_read_next(char **range_info)
6542 {
6543 int result= HA_ERR_END_OF_FILE;
6544 int range_res;
6545 DBUG_ENTER("handler::multi_range_read_next");
6546
6547 // Set status for the need to update generated fields
6548 m_update_generated_read_fields= table->has_gcol();
6549
6550 if (!mrr_have_range)
6551 {
6552 mrr_have_range= TRUE;
6553 goto start;
6554 }
6555
6556 do
6557 {
6558 /* Save a call if there can be only one row in range. */
6559 if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
6560 {
6561 result= read_range_next();
6562 /* On success or non-EOF errors jump to the end. */
6563 if (result != HA_ERR_END_OF_FILE)
6564 break;
6565 }
6566 else
6567 {
6568 if (was_semi_consistent_read())
6569 goto scan_it_again;
6570 }
6571
6572 start:
6573 /* Try the next range(s) until one matches a record. */
6574 while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
6575 {
6576 scan_it_again:
6577 result= read_range_first(mrr_cur_range.start_key.keypart_map ?
6578 &mrr_cur_range.start_key : 0,
6579 mrr_cur_range.end_key.keypart_map ?
6580 &mrr_cur_range.end_key : 0,
6581 MY_TEST(mrr_cur_range.range_flag & EQ_RANGE),
6582 mrr_is_output_sorted);
6583 if (result != HA_ERR_END_OF_FILE)
6584 break;
6585 }
6586 }
6587 while ((result == HA_ERR_END_OF_FILE) && !range_res);
6588
6589 *range_info= mrr_cur_range.ptr;
6590
6591 /* Update virtual generated fields */
6592 if (!result && m_update_generated_read_fields)
6593 {
6594 result= update_generated_read_fields(table->record[0], table, active_index);
6595 m_update_generated_read_fields= false;
6596 }
6597
6598 DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
6599 DBUG_RETURN(result);
6600 }
6601
6602
6603 /****************************************************************************
6604 * DS-MRR implementation
6605 ***************************************************************************/
6606
6607 /**
6608 DS-MRR: Initialize and start MRR scan
6609
6610 Initialize and start the MRR scan. Depending on the mode parameter, this
6611 may use default or DS-MRR implementation.
6612
6613 The DS-MRR implementation will use a second handler object (h2) for
6614 doing scan on the index:
6615 - on the first call to this function the h2 handler will be created
6616 and h2 will be opened using the same index as the main handler
6617 is set to use. The index scan on the main index will be closed
6618 and it will be re-opened to read records from the table using either
6619 no key or the primary key. The h2 handler will be deleted when
6620 reset() is called (which should happen on the end of the statement).
6621 - when dsmrr_close() is called the index scan on h2 is closed.
6622 - on following calls to this function one of the following must be valid:
6623 a. if dsmrr_close has been called:
6624 the main handler (h) must be open on an index, h2 will be opened
6625 using this index, and the index on h will be closed and
6626 h will be re-opened to read reads from the table using either
6627 no key or the primary key.
6628 b. dsmrr_close has not been called:
6629 h2 will already be open, the main handler h must be set up
6630 to read records from the table (handler->inited is RND) either
6631 using the primary index or using no index at all.
6632
6633 @param h_arg Table handler to be used
6634 @param seq_funcs Interval sequence enumeration functions
6635 @param seq_init_param Interval sequence enumeration parameter
6636 @param n_ranges Number of ranges in the sequence.
6637 @param mode HA_MRR_* modes to use
6638 @param buf INOUT Buffer to use
6639
6640 @retval 0 Ok, Scan started.
6641 @retval other Error
6642 */
6643
dsmrr_init(handler * h_arg,RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)6644 int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
6645 void *seq_init_param, uint n_ranges, uint mode,
6646 HANDLER_BUFFER *buf)
6647 {
6648 uint elem_size;
6649 int retval= 0;
6650 DBUG_ENTER("DsMrr_impl::dsmrr_init");
6651 THD *thd= h_arg->table->in_use; // current THD
6652
6653 /*
6654 index_merge may invoke a scan on an object for which dsmrr_info[_const]
6655 has not been called, so set the owner handler here as well.
6656 */
6657 h= h_arg;
6658
6659 if (!hint_key_state(thd, h->table, h->active_index,
6660 MRR_HINT_ENUM, OPTIMIZER_SWITCH_MRR) ||
6661 mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED)) // DS-MRR doesn't sort
6662 {
6663 use_default_impl= TRUE;
6664 retval= h->handler::multi_range_read_init(seq_funcs, seq_init_param,
6665 n_ranges, mode, buf);
6666 DBUG_RETURN(retval);
6667 }
6668
6669 /*
6670 This assert will hit if we have pushed an index condition to the
6671 primary key index and then "change our mind" and use a different
6672 index for retrieving data with MRR. One of the following criteria
6673 must be true:
6674 1. We have not pushed an index conditon on this handler.
6675 2. We have pushed an index condition and this is on the currently used
6676 index.
6677 3. We have pushed an index condition but this is not for the primary key.
6678 4. We have pushed an index condition and this has been transferred to
6679 the clone (h2) of the handler object.
6680 */
6681 assert(!h->pushed_idx_cond ||
6682 h->pushed_idx_cond_keyno == h->active_index ||
6683 h->pushed_idx_cond_keyno != table->s->primary_key ||
6684 (h2 && h->pushed_idx_cond_keyno == h2->active_index));
6685
6686 rowids_buf= buf->buffer;
6687
6688 is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
6689
6690 if (is_mrr_assoc)
6691 {
6692 assert(!thd->status_var_aggregated);
6693 table->in_use->status_var.ha_multi_range_read_init_count++;
6694 }
6695
6696 rowids_buf_end= buf->buffer_end;
6697 elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
6698 rowids_buf_last= rowids_buf +
6699 ((rowids_buf_end - rowids_buf)/ elem_size)*
6700 elem_size;
6701 rowids_buf_end= rowids_buf_last;
6702
6703 /*
6704 The DS-MRR scan uses a second handler object (h2) for doing the
6705 index scan. Create this by cloning the primary handler
6706 object. The h2 handler object is deleted when DsMrr_impl::reset()
6707 is called.
6708 */
6709 if (!h2)
6710 {
6711 handler *new_h2;
6712 /*
6713 ::clone() takes up a lot of stack, especially on 64 bit platforms.
6714 The constant 5 is an empiric result.
6715 @todo Is this still the case? Leave it as it is for now but could
6716 likely be removed?
6717 */
6718 if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
6719 DBUG_RETURN(1);
6720
6721 if (!(new_h2= h->clone(h->table->s->normalized_path.str, thd->mem_root)))
6722 DBUG_RETURN(1);
6723 h2= new_h2; /* Ok, now can put it into h2 */
6724 table->prepare_for_position();
6725 }
6726
6727 /*
6728 Open the index scan on h2 using the key from the primary handler.
6729 */
6730 if (h2->active_index == MAX_KEY)
6731 {
6732 assert(h->active_index != MAX_KEY);
6733 const uint mrr_keyno= h->active_index;
6734
6735 if ((retval= h2->ha_external_lock(thd, h->m_lock_type)))
6736 goto error;
6737
6738 if ((retval= h2->extra(HA_EXTRA_KEYREAD)))
6739 goto error;
6740
6741 if ((retval= h2->ha_index_init(mrr_keyno, false)))
6742 goto error;
6743
6744 // Transfer ICP from h to h2
6745 if (mrr_keyno == h->pushed_idx_cond_keyno)
6746 {
6747 if (h2->idx_cond_push(mrr_keyno, h->pushed_idx_cond))
6748 {
6749 retval= 1;
6750 goto error;
6751 }
6752 }
6753 else
6754 {
6755 // Cancel any potentially previously pushed index conditions
6756 h2->cancel_pushed_idx_cond();
6757 }
6758 }
6759 else
6760 {
6761 /*
6762 h2 has already an open index. This happens when the DS-MRR scan
6763 is re-started without closing it first. In this case the primary
6764 handler must be used for reading records from the table, ie. it
6765 must not be opened for doing a new range scan. In this case
6766 the active_index must either not be set or be the primary key.
6767 */
6768 assert(h->inited == handler::RND);
6769 assert(h->active_index == MAX_KEY ||
6770 h->active_index == table->s->primary_key);
6771 }
6772
6773 /*
6774 The index scan is now transferred to h2 and we can close the open
6775 index scan on the primary handler.
6776 */
6777 if (h->inited == handler::INDEX)
6778 {
6779 /*
6780 Calling h->ha_index_end() will invoke dsmrr_close() for this object,
6781 which will close the index scan on h2. We need to keep it open, so
6782 temporarily move h2 out of the DsMrr object.
6783 */
6784 handler *save_h2= h2;
6785 h2= NULL;
6786 retval= h->ha_index_end();
6787 h2= save_h2;
6788 if (retval)
6789 goto error;
6790 }
6791
6792 /*
6793 Verify consistency between h and h2.
6794 */
6795 assert(h->inited != handler::INDEX);
6796 assert(h->active_index == MAX_KEY ||
6797 h->active_index == table->s->primary_key);
6798 assert(h2->inited == handler::INDEX);
6799 assert(h2->active_index != MAX_KEY);
6800 assert(h->m_lock_type == h2->m_lock_type);
6801
6802 if ((retval= h2->handler::multi_range_read_init(seq_funcs, seq_init_param,
6803 n_ranges, mode, buf)))
6804 goto error;
6805
6806 if ((retval= dsmrr_fill_buffer()))
6807 goto error;
6808
6809 /*
6810 If the above call has scanned through all intervals in *seq, then
6811 adjust *buf to indicate that the remaining buffer space will not be used.
6812 */
6813 if (dsmrr_eof)
6814 buf->end_of_used_area= rowids_buf_last;
6815
6816 /*
6817 h->inited == INDEX may occur when 'range checked for each record' is
6818 used.
6819 */
6820 if ((h->inited != handler::RND) &&
6821 ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) ||
6822 (h->ha_rnd_init(FALSE))))
6823 {
6824 retval= 1;
6825 goto error;
6826 }
6827
6828 use_default_impl= FALSE;
6829 h->mrr_funcs= *seq_funcs;
6830
6831 DBUG_RETURN(0);
6832 error:
6833 h2->ha_index_or_rnd_end();
6834 h2->ha_external_lock(thd, F_UNLCK);
6835 h2->ha_close();
6836 delete h2;
6837 h2= NULL;
6838 assert(retval != 0);
6839 DBUG_RETURN(retval);
6840 }
6841
6842
dsmrr_close()6843 void DsMrr_impl::dsmrr_close()
6844 {
6845 DBUG_ENTER("DsMrr_impl::dsmrr_close");
6846
6847 // If there is an open index on h2, then close it
6848 if (h2 && h2->active_index != MAX_KEY)
6849 {
6850 h2->ha_index_or_rnd_end();
6851 h2->ha_external_lock(current_thd, F_UNLCK);
6852 }
6853 use_default_impl= true;
6854 DBUG_VOID_RETURN;
6855 }
6856
6857
reset()6858 void DsMrr_impl::reset()
6859 {
6860 DBUG_ENTER("DsMrr_impl::reset");
6861
6862 if (h2)
6863 {
6864 // Close any ongoing DS-MRR scan
6865 dsmrr_close();
6866
6867 // Close and delete the h2 handler
6868 h2->ha_close();
6869 delete h2;
6870 h2= NULL;
6871 }
6872 DBUG_VOID_RETURN;
6873 }
6874
6875
rowid_cmp(void * h,uchar * a,uchar * b)6876 static int rowid_cmp(void *h, uchar *a, uchar *b)
6877 {
6878 return ((handler*)h)->cmp_ref(a, b);
6879 }
6880
6881
6882 /**
6883 DS-MRR: Fill the buffer with rowids and sort it by rowid
6884
6885 {This is an internal function of DiskSweep MRR implementation}
6886 Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
6887 buffer. When the buffer is full or scan is completed, sort the buffer by
6888 rowid and return.
6889
6890 The function assumes that rowids buffer is empty when it is invoked.
6891
6892 @param h Table handler
6893
6894 @retval 0 OK, the next portion of rowids is in the buffer,
6895 properly ordered
6896 @retval other Error
6897 */
6898
dsmrr_fill_buffer()6899 int DsMrr_impl::dsmrr_fill_buffer()
6900 {
6901 char *range_info;
6902 int res= 0;
6903 DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
6904 assert(rowids_buf < rowids_buf_end);
6905
6906 /*
6907 Set key_read to TRUE since we only read fields from the index.
6908 This ensures that any virtual columns are read from index and are not
6909 attempted to be evaluated from base columns.
6910 (Do not use TABLE::set_keyread() since the MRR implementation operates
6911 with two handler objects, and set_keyread() would manipulate the keyread
6912 property of the wrong handler. MRR sets the handlers' keyread properties
6913 when initializing the MRR operation, independent of this call).
6914 */
6915 assert(table->key_read == FALSE);
6916 table->key_read= TRUE;
6917
6918 rowids_buf_cur= rowids_buf;
6919 while ((rowids_buf_cur < rowids_buf_end) &&
6920 !(res= h2->handler::multi_range_read_next(&range_info)))
6921 {
6922 KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
6923 if (h2->mrr_funcs.skip_index_tuple &&
6924 h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
6925 continue;
6926
6927 /* Put rowid, or {rowid, range_id} pair into the buffer */
6928 h2->position(table->record[0]);
6929 memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
6930 rowids_buf_cur += h2->ref_length;
6931
6932 if (is_mrr_assoc)
6933 {
6934 memcpy(rowids_buf_cur, &range_info, sizeof(void*));
6935 rowids_buf_cur += sizeof(void*);
6936 }
6937 }
6938
6939 // Restore key_read since the next read operation will read complete rows
6940 table->key_read= FALSE;
6941
6942 if (res && res != HA_ERR_END_OF_FILE)
6943 DBUG_RETURN(res);
6944 dsmrr_eof= MY_TEST(res == HA_ERR_END_OF_FILE);
6945
6946 /* Sort the buffer contents by rowid */
6947 uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
6948 size_t n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
6949
6950 my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
6951 (void*)h);
6952 rowids_buf_last= rowids_buf_cur;
6953 rowids_buf_cur= rowids_buf;
6954 DBUG_RETURN(0);
6955 }
6956
6957
6958 /*
6959 DS-MRR implementation: multi_range_read_next() function
6960 */
6961
dsmrr_next(char ** range_info)6962 int DsMrr_impl::dsmrr_next(char **range_info)
6963 {
6964 int res;
6965 uchar *cur_range_info= 0;
6966 uchar *rowid;
6967
6968 if (use_default_impl)
6969 return h->handler::multi_range_read_next(range_info);
6970
6971 do
6972 {
6973 if (rowids_buf_cur == rowids_buf_last)
6974 {
6975 if (dsmrr_eof)
6976 {
6977 res= HA_ERR_END_OF_FILE;
6978 goto end;
6979 }
6980
6981 res= dsmrr_fill_buffer();
6982 if (res)
6983 goto end;
6984 }
6985
6986 /* return eof if there are no rowids in the buffer after re-fill attempt */
6987 if (rowids_buf_cur == rowids_buf_last)
6988 {
6989 res= HA_ERR_END_OF_FILE;
6990 goto end;
6991 }
6992 rowid= rowids_buf_cur;
6993
6994 if (is_mrr_assoc)
6995 memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar*));
6996
6997 rowids_buf_cur += h->ref_length + sizeof(void*) * MY_TEST(is_mrr_assoc);
6998 if (h2->mrr_funcs.skip_record &&
6999 h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
7000 continue;
7001 res= h->ha_rnd_pos(table->record[0], rowid);
7002 break;
7003 } while (true);
7004
7005 if (is_mrr_assoc)
7006 {
7007 memcpy(range_info, rowid + h->ref_length, sizeof(void*));
7008 }
7009 end:
7010 return res;
7011 }
7012
7013
7014 /*
7015 DS-MRR implementation: multi_range_read_info() function
7016 */
dsmrr_info(uint keyno,uint n_ranges,uint rows,uint * bufsz,uint * flags,Cost_estimate * cost)7017 ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
7018 uint *bufsz, uint *flags, Cost_estimate *cost)
7019 {
7020 ha_rows res MY_ATTRIBUTE((unused));
7021 uint def_flags= *flags;
7022 uint def_bufsz= *bufsz;
7023
7024 /* Get cost/flags/mem_usage of default MRR implementation */
7025 res=
7026 h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
7027 &def_flags, cost);
7028 assert(!res);
7029
7030 if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
7031 choose_mrr_impl(keyno, rows, flags, bufsz, cost))
7032 {
7033 /* Default implementation is choosen */
7034 DBUG_PRINT("info", ("Default MRR implementation choosen"));
7035 *flags= def_flags;
7036 *bufsz= def_bufsz;
7037 assert(*flags & HA_MRR_USE_DEFAULT_IMPL);
7038 }
7039 else
7040 {
7041 /* *flags and *bufsz were set by choose_mrr_impl */
7042 DBUG_PRINT("info", ("DS-MRR implementation choosen"));
7043 }
7044 return 0;
7045 }
7046
7047
7048 /*
7049 DS-MRR Implementation: multi_range_read_info_const() function
7050 */
7051
dsmrr_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)7052 ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
7053 void *seq_init_param, uint n_ranges,
7054 uint *bufsz, uint *flags, Cost_estimate *cost)
7055 {
7056 ha_rows rows;
7057 uint def_flags= *flags;
7058 uint def_bufsz= *bufsz;
7059 /* Get cost/flags/mem_usage of default MRR implementation */
7060 rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
7061 n_ranges, &def_bufsz,
7062 &def_flags, cost);
7063 if (rows == HA_POS_ERROR)
7064 {
7065 /* Default implementation can't perform MRR scan => we can't either */
7066 return rows;
7067 }
7068
7069 /*
7070 If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
7071 use the default MRR implementation (we need it for UPDATE/DELETE).
7072 Otherwise, make a choice based on cost and mrr* flags of
7073 @@optimizer_switch.
7074 */
7075 if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
7076 choose_mrr_impl(keyno, rows, flags, bufsz, cost))
7077 {
7078 DBUG_PRINT("info", ("Default MRR implementation choosen"));
7079 *flags= def_flags;
7080 *bufsz= def_bufsz;
7081 assert(*flags & HA_MRR_USE_DEFAULT_IMPL);
7082 }
7083 else
7084 {
7085 /* *flags and *bufsz were set by choose_mrr_impl */
7086 DBUG_PRINT("info", ("DS-MRR implementation choosen"));
7087 }
7088 return rows;
7089 }
7090
7091
7092 /**
7093 DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
7094
7095 Make the choice between using Default MRR implementation and DS-MRR.
7096 This function contains common functionality factored out of dsmrr_info()
7097 and dsmrr_info_const(). The function assumes that the default MRR
7098 implementation's applicability requirements are satisfied.
7099
7100 @param keyno Index number
7101 @param rows E(full rows to be retrieved)
7102 @param flags IN MRR flags provided by the MRR user
7103 OUT If DS-MRR is choosen, flags of DS-MRR implementation
7104 else the value is not modified
7105 @param bufsz IN If DS-MRR is choosen, buffer use of DS-MRR implementation
7106 else the value is not modified
7107 @param cost IN Cost of default MRR implementation
7108 OUT If DS-MRR is choosen, cost of DS-MRR scan
7109 else the value is not modified
7110
7111 @retval TRUE Default MRR implementation should be used
7112 @retval FALSE DS-MRR implementation should be used
7113 */
7114
choose_mrr_impl(uint keyno,ha_rows rows,uint * flags,uint * bufsz,Cost_estimate * cost)7115 bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
7116 uint *bufsz, Cost_estimate *cost)
7117 {
7118 bool res;
7119 THD *thd= current_thd;
7120
7121 const bool mrr_on= hint_key_state(thd, table, keyno, MRR_HINT_ENUM,
7122 OPTIMIZER_SWITCH_MRR);
7123 const bool force_dsmrr_by_hints=
7124 hint_key_state(thd, table, keyno, MRR_HINT_ENUM, 0) ||
7125 hint_table_state(thd, table, BKA_HINT_ENUM, 0);
7126
7127 if (!(mrr_on || force_dsmrr_by_hints) ||
7128 *flags & (HA_MRR_INDEX_ONLY | HA_MRR_SORTED) || // Unsupported by DS-MRR
7129 (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
7130 key_uses_partial_cols(table, keyno) ||
7131 table->s->tmp_table != NO_TMP_TABLE)
7132 {
7133 /* Use the default implementation, don't modify args: See comments */
7134 return TRUE;
7135 }
7136
7137 /*
7138 If @@optimizer_switch has "mrr_cost_based" on, we should avoid
7139 using DS-MRR for queries where it is likely that the records are
7140 stored in memory. Since there is currently no way to determine
7141 this, we use a heuristic:
7142 a) if the storage engine has a memory buffer, DS-MRR is only
7143 considered if the table size is bigger than the buffer.
7144 b) if the storage engine does not have a memory buffer, DS-MRR is
7145 only considered if the table size is bigger than 100MB.
7146 c) Since there is an initial setup cost of DS-MRR, so it is only
7147 considered if at least 50 records will be read.
7148 */
7149 if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED) &&
7150 !force_dsmrr_by_hints)
7151 {
7152 /*
7153 If the storage engine has a database buffer we use this as the
7154 minimum size the table should have before considering DS-MRR.
7155 */
7156 longlong min_file_size= table->file->get_memory_buffer_size();
7157 if (min_file_size == -1)
7158 {
7159 // No estimate for database buffer
7160 min_file_size= 100 * 1024 * 1024; // 100 MB
7161 }
7162
7163 if (table->file->stats.data_file_length <
7164 static_cast<ulonglong>(min_file_size) ||
7165 rows <= 50)
7166 return true; // Use the default implementation
7167 }
7168
7169 Cost_estimate dsmrr_cost;
7170 if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
7171 return TRUE;
7172
7173 /*
7174 If @@optimizer_switch has "mrr" on and "mrr_cost_based" off, then set cost
7175 of DS-MRR to be minimum of DS-MRR and Default implementations cost. This
7176 allows one to force use of DS-MRR whenever it is applicable without
7177 affecting other cost-based choices. Note that if MRR or BKA hint is
7178 specified, DS-MRR will be used regardless of cost.
7179 */
7180 const bool force_dsmrr=
7181 (force_dsmrr_by_hints ||
7182 !thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED));
7183
7184 if (force_dsmrr && dsmrr_cost.total_cost() > cost->total_cost())
7185 dsmrr_cost= *cost;
7186
7187 if (force_dsmrr || (dsmrr_cost.total_cost() <= cost->total_cost()))
7188 {
7189 *flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */
7190 *flags &= ~HA_MRR_SUPPORT_SORTED; /* We can't provide ordered output */
7191 *cost= dsmrr_cost;
7192 res= FALSE;
7193 }
7194 else
7195 {
7196 /* Use the default MRR implementation */
7197 res= TRUE;
7198 }
7199 return res;
7200 }
7201
7202
7203 static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows,
7204 Cost_estimate *cost);
7205
7206
7207 /**
7208 Get cost of DS-MRR scan
7209
7210 @param keynr Index to be used
7211 @param rows E(Number of rows to be scanned)
7212 @param flags Scan parameters (HA_MRR_* flags)
7213 @param buffer_size INOUT Buffer size
7214 @param cost OUT The cost
7215
7216 @retval FALSE OK
7217 @retval TRUE Error, DS-MRR cannot be used (the buffer is too small
7218 for even 1 rowid)
7219 */
7220
get_disk_sweep_mrr_cost(uint keynr,ha_rows rows,uint flags,uint * buffer_size,Cost_estimate * cost)7221 bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
7222 uint *buffer_size,
7223 Cost_estimate *cost)
7224 {
7225 ha_rows rows_in_last_step;
7226 uint n_full_steps;
7227
7228 const uint elem_size= h->ref_length +
7229 sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION));
7230 const ha_rows max_buff_entries= *buffer_size / elem_size;
7231
7232 if (!max_buff_entries)
7233 return TRUE; /* Buffer has not enough space for even 1 rowid */
7234
7235 /* Number of iterations we'll make with full buffer */
7236 n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
7237
7238 /*
7239 Get numbers of rows we'll be processing in last iteration, with
7240 non-full buffer
7241 */
7242 rows_in_last_step= rows % max_buff_entries;
7243
7244 assert(cost->is_zero());
7245
7246 if (n_full_steps)
7247 {
7248 get_sort_and_sweep_cost(table, max_buff_entries, cost);
7249 cost->multiply(n_full_steps);
7250 }
7251 else
7252 {
7253 /*
7254 Adjust buffer size since only parts of the buffer will be used:
7255 1. Adjust record estimate for the last scan to reduce likelyhood
7256 of needing more than one scan by adding 20 percent to the
7257 record estimate and by ensuring this is at least 100 records.
7258 2. If the estimated needed buffer size is lower than suggested by
7259 the caller then set it to the estimated buffer size.
7260 */
7261 const ha_rows keys_in_buffer=
7262 max<ha_rows>(static_cast<ha_rows>(1.2 * rows_in_last_step), 100);
7263 *buffer_size= min<ulong>(*buffer_size,
7264 static_cast<ulong>(keys_in_buffer) * elem_size);
7265 }
7266
7267 Cost_estimate last_step_cost;
7268 get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
7269 (*cost)+= last_step_cost;
7270
7271 /*
7272 Cost of memory is not included in the total_cost() function and
7273 thus will not be considered when comparing costs. Still, we
7274 record it in the cost estimate object for future use.
7275 */
7276 cost->add_mem(*buffer_size);
7277
7278 /* Total cost of all index accesses */
7279 (*cost)+= h->index_scan_cost(keynr, 1, static_cast<double>(rows));
7280
7281 /*
7282 Add CPU cost for processing records (see
7283 @handler::multi_range_read_info_const()).
7284 */
7285 cost->add_cpu(table->cost_model()->row_evaluate_cost(
7286 static_cast<double>(rows)));
7287 return FALSE;
7288 }
7289
7290
7291 /*
7292 Get cost of one sort-and-sweep step
7293
7294 SYNOPSIS
7295 get_sort_and_sweep_cost()
7296 table Table being accessed
7297 nrows Number of rows to be sorted and retrieved
7298 cost OUT The cost
7299
7300 DESCRIPTION
7301 Get cost of these operations:
7302 - sort an array of #nrows ROWIDs using qsort
7303 - read #nrows records from table in a sweep.
7304 */
7305
7306 static
get_sort_and_sweep_cost(TABLE * table,ha_rows nrows,Cost_estimate * cost)7307 void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
7308 {
7309 assert(cost->is_zero());
7310 if (nrows)
7311 {
7312 get_sweep_read_cost(table, nrows, FALSE, cost);
7313
7314 /*
7315 @todo CostModel: For the old version of the cost model the
7316 following code should be used. For the new version of the cost
7317 model Cost_model::key_compare_cost() should be used. When
7318 removing support for the old cost model this code should be
7319 removed. The reason for this is that we should get rid of the
7320 ROWID_COMPARE_SORT_COST and use key_compare_cost() instead. For
7321 the current value returned by key_compare_cost() this would
7322 overestimate the cost for sorting.
7323 */
7324
7325 /*
7326 Constant for the cost of doing one key compare operation in the
7327 sort operation. We should have used the value returned by
7328 key_compare_cost() here but this would make the cost
7329 estimate of sorting very high for queries accessing many
7330 records. Until this constant is adjusted we introduce a constant
7331 that is more realistic. @todo: Replace this with
7332 key_compare_cost() when this has been given a realistic value.
7333 */
7334 const double ROWID_COMPARE_SORT_COST=
7335 table->cost_model()->key_compare_cost(1.0) / 10;
7336
7337 /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
7338
7339 // For the old version of the cost model this cost calculations should
7340 // be used....
7341 const double cpu_sort= nrows * log2(nrows) * ROWID_COMPARE_SORT_COST;
7342 // .... For the new cost model something like this should be used...
7343 // cpu_sort= nrows * log2(nrows) *
7344 // table->cost_model()->rowid_compare_cost();
7345 cost->add_cpu(cpu_sort);
7346 }
7347 }
7348
7349
7350 /**
7351 Get cost of reading nrows table records in a "disk sweep"
7352
7353 A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
7354 for an ordered sequence of rowids.
7355
7356 We take into account that some of the records might be in a memory
7357 buffer while others need to be read from a secondary storage
7358 device. The model for this assumes hard disk IO. A disk read is
7359 performed as follows:
7360
7361 1. The disk head is moved to the needed cylinder
7362 2. The controller waits for the plate to rotate
7363 3. The data is transferred
7364
7365 Time to do #3 is insignificant compared to #2+#1.
7366
7367 Time to move the disk head is proportional to head travel distance.
7368
7369 Time to wait for the plate to rotate depends on whether the disk head
7370 was moved or not.
7371
7372 If disk head wasn't moved, the wait time is proportional to distance
7373 between the previous block and the block we're reading.
7374
7375 If the head was moved, we don't know how much we'll need to wait for the
7376 plate to rotate. We assume the wait time to be a variate with a mean of
7377 0.5 of full rotation time.
7378
7379 Our cost units are "random disk seeks". The cost of random disk seek is
7380 actually not a constant, it depends one range of cylinders we're going
7381 to access. We make it constant by introducing a fuzzy concept of "typical
7382 datafile length" (it's fuzzy as it's hard to tell whether it should
7383 include index file, temp.tables etc). Then random seek cost is:
7384
7385 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
7386
7387 We define half_rotation_cost as disk_seek_base_cost() (see
7388 Cost_model_server::disk_seek_base_cost()).
7389
7390 @param table Table to be accessed
7391 @param nrows Number of rows to retrieve
7392 @param interrupted true <=> Assume that the disk sweep will be
7393 interrupted by other disk IO. false - otherwise.
7394 @param[out] cost the cost
7395 */
7396
get_sweep_read_cost(TABLE * table,ha_rows nrows,bool interrupted,Cost_estimate * cost)7397 void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
7398 Cost_estimate *cost)
7399 {
7400 DBUG_ENTER("get_sweep_read_cost");
7401
7402 assert(cost->is_zero());
7403 if(nrows > 0)
7404 {
7405 const Cost_model_table *const cost_model= table->cost_model();
7406
7407 // The total number of blocks used by this table
7408 double n_blocks=
7409 ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
7410 if (n_blocks < 1.0) // When data_file_length is 0
7411 n_blocks= 1.0;
7412
7413 /*
7414 The number of blocks that in average need to be read given that
7415 the records are uniformly distribution over the table.
7416 */
7417 double busy_blocks=
7418 n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
7419 if (busy_blocks < 1.0)
7420 busy_blocks= 1.0;
7421
7422 DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
7423 busy_blocks));
7424 /*
7425 The random access cost for reading the data pages will be the upper
7426 limit for the sweep_cost.
7427 */
7428 cost->add_io(cost_model->page_read_cost(busy_blocks));
7429 if (!interrupted)
7430 {
7431 Cost_estimate sweep_cost;
7432 /*
7433 Assume reading pages from disk is done in one 'sweep'.
7434
7435 The cost model and cost estimate for pages already in a memory
7436 buffer will be different from pages that needed to be read from
7437 disk. Calculate the number of blocks that likely already are
7438 in memory and the number of blocks that need to be read from
7439 disk.
7440 */
7441 const double busy_blocks_mem=
7442 busy_blocks * table->file->table_in_memory_estimate();
7443 const double busy_blocks_disk= busy_blocks - busy_blocks_mem;
7444 assert(busy_blocks_disk >= 0.0);
7445
7446 // Cost of accessing blocks in main memory buffer
7447 sweep_cost.add_io(cost_model->buffer_block_read_cost(busy_blocks_mem));
7448
7449 // Cost of reading blocks from disk in a 'sweep'
7450 const double seek_distance= (busy_blocks_disk > 1.0) ?
7451 n_blocks / busy_blocks_disk : n_blocks;
7452
7453 const double disk_cost=
7454 busy_blocks_disk * cost_model->disk_seek_cost(seek_distance);
7455 sweep_cost.add_io(disk_cost);
7456
7457 /*
7458 For some cases, ex: when only few blocks need to be read and the
7459 seek distance becomes very large, the sweep cost model can produce
7460 a cost estimate that is larger than the cost of random access.
7461 To handle this case, we use the sweep cost only when it is less
7462 than the random access cost.
7463 */
7464 if (sweep_cost < *cost)
7465 *cost= sweep_cost;
7466 }
7467 }
7468 DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
7469 DBUG_VOID_RETURN;
7470 }
7471
7472
7473 /****************************************************************************
7474 * DS-MRR implementation ends
7475 ***************************************************************************/
7476
7477 /** @brief
7478 Read first row between two ranges.
7479 Store ranges for future calls to read_range_next.
7480
7481 @param start_key Start key. Is 0 if no min range
7482 @param end_key End key. Is 0 if no max range
7483 @param eq_range_arg Set to 1 if start_key == end_key
7484 @param sorted Set to 1 if result should be sorted per key
7485
7486 @note
7487 Record is read into table->record[0]
7488
7489 @retval
7490 0 Found row
7491 @retval
7492 HA_ERR_END_OF_FILE No rows in range
7493 @retval
7494 \# Error code
7495 */
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_range_arg,bool sorted)7496 int handler::read_range_first(const key_range *start_key,
7497 const key_range *end_key,
7498 bool eq_range_arg,
7499 bool sorted /* ignored */)
7500 {
7501 int result;
7502 DBUG_ENTER("handler::read_range_first");
7503
7504 eq_range= eq_range_arg;
7505 set_end_range(end_key, RANGE_SCAN_ASC);
7506
7507 range_key_part= table->key_info[active_index].key_part;
7508
7509 if (!start_key) // Read first record
7510 result= ha_index_first(table->record[0]);
7511 else
7512 result= ha_index_read_map(table->record[0],
7513 start_key->key,
7514 start_key->keypart_map,
7515 start_key->flag);
7516 if (result)
7517 DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND)
7518 ? HA_ERR_END_OF_FILE
7519 : result);
7520
7521 if (compare_key(end_range) <= 0)
7522 {
7523 DBUG_RETURN(0);
7524 }
7525 else
7526 {
7527 /*
7528 The last read row does not fall in the range. So request
7529 storage engine to release row lock if possible.
7530 */
7531 unlock_row();
7532 DBUG_RETURN(HA_ERR_END_OF_FILE);
7533 }
7534 }
7535
7536
7537 /** @brief
7538 Read next row between two endpoints.
7539
7540 @note
7541 Record is read into table->record[0]
7542
7543 @retval
7544 0 Found row
7545 @retval
7546 HA_ERR_END_OF_FILE No rows in range
7547 @retval
7548 \# Error code
7549 */
read_range_next()7550 int handler::read_range_next()
7551 {
7552 int result;
7553 DBUG_ENTER("handler::read_range_next");
7554
7555 if (eq_range)
7556 {
7557 /* We trust that index_next_same always gives a row in range */
7558 DBUG_RETURN(ha_index_next_same(table->record[0],
7559 end_range->key,
7560 end_range->length));
7561 }
7562 result= ha_index_next(table->record[0]);
7563 if (result)
7564 DBUG_RETURN(result);
7565
7566 if (compare_key(end_range) <= 0)
7567 {
7568 DBUG_RETURN(0);
7569 }
7570 else
7571 {
7572 /*
7573 The last read row does not fall in the range. So request
7574 storage engine to release row lock if possible.
7575 */
7576 unlock_row();
7577 DBUG_RETURN(HA_ERR_END_OF_FILE);
7578 }
7579 }
7580
7581 /**
7582 Check if one of the columns in a key is a virtual generated column.
7583 @param part the first part of the key to check
7584 @param length the length of the key
7585 @retval true if the key contains a virtual generated column
7586 @retval false if the key does not contain a virtual generated column
7587 */
key_has_vcol(const KEY_PART_INFO * part,uint length)7588 static bool key_has_vcol(const KEY_PART_INFO *part, uint length) {
7589 for (uint len = 0; len < length; len += part->store_length, ++part)
7590 if (part->field->is_virtual_gcol()) return true;
7591 return false;
7592 }
7593
set_end_range(const key_range * range,enum_range_scan_direction direction)7594 void handler::set_end_range(const key_range* range,
7595 enum_range_scan_direction direction)
7596 {
7597 if (range)
7598 {
7599 save_end_range= *range;
7600 end_range= &save_end_range;
7601 range_key_part= table->key_info[active_index].key_part;
7602 key_compare_result_on_equal= ((range->flag == HA_READ_BEFORE_KEY) ? 1 :
7603 (range->flag == HA_READ_AFTER_KEY) ? -1 : 0);
7604 m_virt_gcol_in_end_range = key_has_vcol(range_key_part, range->length);
7605 }
7606 else
7607 end_range= NULL;
7608
7609 range_scan_direction= direction;
7610 }
7611
7612
7613 /**
7614 Compare if found key (in row) is over max-value.
7615
7616 @param range range to compare to row. May be 0 for no range
7617
7618 @seealso
7619 key.cc::key_cmp()
7620
7621 @return
7622 The return value is SIGN(key_in_row - range_key):
7623
7624 - 0 : Key is equal to range or 'range' == 0 (no range)
7625 - -1 : Key is less than range
7626 - 1 : Key is larger than range
7627 */
compare_key(key_range * range)7628 int handler::compare_key(key_range *range)
7629 {
7630 int cmp;
7631 if (!range || in_range_check_pushed_down)
7632 return 0; // No max range
7633 cmp= key_cmp(range_key_part, range->key, range->length);
7634 if (!cmp)
7635 cmp= key_compare_result_on_equal;
7636 return cmp;
7637 }
7638
7639
7640 /*
7641 Compare if a found key (in row) is within the range.
7642
7643 This function is similar to compare_key() but checks the range scan
7644 direction to determine if this is a descending scan. This function
7645 is used by the index condition pushdown implementation to determine
7646 if the read record is within the range scan.
7647
7648 @param range Range to compare to row. May be NULL for no range.
7649
7650 @seealso
7651 handler::compare_key()
7652
7653 @return Returns whether the key is within the range
7654
7655 - 0 : Key is equal to range or 'range' == 0 (no range)
7656 - -1 : Key is within the current range
7657 - 1 : Key is outside the current range
7658 */
7659
compare_key_icp(const key_range * range) const7660 int handler::compare_key_icp(const key_range *range) const
7661 {
7662 int cmp;
7663 if (!range)
7664 return 0; // no max range
7665 cmp= key_cmp(range_key_part, range->key, range->length);
7666 if (!cmp)
7667 cmp= key_compare_result_on_equal;
7668 if (range_scan_direction == RANGE_SCAN_DESC)
7669 cmp= -cmp;
7670 return cmp;
7671 }
7672
7673 /**
7674 Change the offsets of all the fields in a key range.
7675
7676 @param range the key range
7677 @param key_part the first key part
7678 @param diff how much to change the offsets with
7679 */
7680 static inline void
move_key_field_offsets(const key_range * range,const KEY_PART_INFO * key_part,my_ptrdiff_t diff)7681 move_key_field_offsets(const key_range *range, const KEY_PART_INFO *key_part,
7682 my_ptrdiff_t diff)
7683 {
7684 for (size_t len= 0; len < range->length;
7685 len+= key_part->store_length, ++key_part)
7686 key_part->field->move_field_offset(diff);
7687 }
7688
7689 /**
7690 Check if the key in the given buffer (which is not necessarily
7691 TABLE::record[0]) is within range. Called by the storage engine to
7692 avoid reading too many rows.
7693
7694 @param buf the buffer that holds the key
7695 @retval -1 if the key is within the range
7696 @retval 0 if the key is equal to the end_range key, and
7697 key_compare_result_on_equal is 0
7698 @retval 1 if the key is outside the range
7699 */
compare_key_in_buffer(const uchar * buf) const7700 int handler::compare_key_in_buffer(const uchar *buf) const
7701 {
7702 assert(end_range != NULL);
7703
7704 /*
7705 End range on descending scans is only checked with ICP for now, and then we
7706 check it with compare_key_icp() instead of this function.
7707 */
7708 assert(range_scan_direction == RANGE_SCAN_ASC);
7709
7710 // Make the fields in the key point into the buffer instead of record[0].
7711 const my_ptrdiff_t diff= buf - table->record[0];
7712 if (diff != 0)
7713 move_key_field_offsets(end_range, range_key_part, diff);
7714
7715 // Compare the key in buf against end_range.
7716 int cmp= key_cmp(range_key_part, end_range->key, end_range->length);
7717 if (cmp == 0)
7718 cmp= key_compare_result_on_equal;
7719
7720 // Reset the field offsets.
7721 if (diff != 0)
7722 move_key_field_offsets(end_range, range_key_part, -diff);
7723
7724 return cmp;
7725 }
7726
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)7727 int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
7728 key_part_map keypart_map,
7729 enum ha_rkey_function find_flag)
7730 {
7731 int error, error1;
7732 error= index_init(index, 0);
7733 if (!error)
7734 {
7735 error= index_read_map(buf, key, keypart_map, find_flag);
7736 error1= index_end();
7737 }
7738 return error ? error : error1;
7739 }
7740
7741
calculate_key_len(TABLE * table,uint key,key_part_map keypart_map)7742 uint calculate_key_len(TABLE *table, uint key,
7743 key_part_map keypart_map)
7744 {
7745 /* works only with key prefixes */
7746 assert(((keypart_map + 1) & keypart_map) == 0);
7747
7748 KEY *key_info= table->key_info + key;
7749 KEY_PART_INFO *key_part= key_info->key_part;
7750 KEY_PART_INFO *end_key_part= key_part + actual_key_parts(key_info);
7751 uint length= 0;
7752
7753 while (key_part < end_key_part && keypart_map)
7754 {
7755 length+= key_part->store_length;
7756 keypart_map >>= 1;
7757 key_part++;
7758 }
7759 return length;
7760 }
7761
7762
7763 /**
7764 Returns a list of all known extensions.
7765
7766 No mutexes, worst case race is a minor surplus memory allocation
7767 We have to recreate the extension map if mysqld is restarted (for example
7768 within libmysqld)
7769
7770 @retval
7771 pointer pointer to TYPELIB structure
7772 */
exts_handlerton(THD * unused,plugin_ref plugin,void * arg)7773 static my_bool exts_handlerton(THD *unused, plugin_ref plugin,
7774 void *arg)
7775 {
7776 List<char> *found_exts= (List<char> *) arg;
7777 handlerton *hton= plugin_data<handlerton*>(plugin);
7778 handler *file;
7779 if (hton->state == SHOW_OPTION_YES && hton->create &&
7780 (file= hton->create(hton, (TABLE_SHARE*) 0, current_thd->mem_root)))
7781 {
7782 List_iterator_fast<char> it(*found_exts);
7783 const char **ext, *old_ext;
7784
7785 for (ext= file->bas_ext(); *ext; ext++)
7786 {
7787 while ((old_ext= it++))
7788 {
7789 if (!strcmp(old_ext, *ext))
7790 break;
7791 }
7792 if (!old_ext)
7793 found_exts->push_back((char *) *ext);
7794
7795 it.rewind();
7796 }
7797 delete file;
7798 }
7799 return FALSE;
7800 }
7801
ha_known_exts()7802 TYPELIB* ha_known_exts()
7803 {
7804 TYPELIB *known_extensions = (TYPELIB*) sql_alloc(sizeof(TYPELIB));
7805 known_extensions->name= "known_exts";
7806 known_extensions->type_lengths= NULL;
7807
7808 List<char> found_exts;
7809 const char **ext, *old_ext;
7810
7811 found_exts.push_back((char*) TRG_EXT);
7812 found_exts.push_back((char*) TRN_EXT);
7813
7814 plugin_foreach(NULL, exts_handlerton,
7815 MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
7816
7817 size_t arr_length= sizeof(char *)* (found_exts.elements+1);
7818 ext= (const char **) sql_alloc(arr_length);
7819
7820 assert(NULL != ext);
7821 known_extensions->count= found_exts.elements;
7822 known_extensions->type_names= ext;
7823
7824 List_iterator_fast<char> it(found_exts);
7825 while ((old_ext= it++))
7826 *ext++= old_ext;
7827 *ext= NULL;
7828 return known_extensions;
7829 }
7830
7831
stat_print(THD * thd,const char * type,size_t type_len,const char * file,size_t file_len,const char * status,size_t status_len)7832 static bool stat_print(THD *thd, const char *type, size_t type_len,
7833 const char *file, size_t file_len,
7834 const char *status, size_t status_len)
7835 {
7836 Protocol *protocol= thd->get_protocol();
7837 protocol->start_row();
7838 protocol->store(type, type_len, system_charset_info);
7839 protocol->store(file, file_len, system_charset_info);
7840 protocol->store(status, status_len, system_charset_info);
7841 if (protocol->end_row())
7842 return TRUE;
7843 return FALSE;
7844 }
7845
7846
showstat_handlerton(THD * thd,plugin_ref plugin,void * arg)7847 static my_bool showstat_handlerton(THD *thd, plugin_ref plugin,
7848 void *arg)
7849 {
7850 enum ha_stat_type stat= *(enum ha_stat_type *) arg;
7851 handlerton *hton= plugin_data<handlerton*>(plugin);
7852 if (hton->state == SHOW_OPTION_YES && hton->show_status &&
7853 hton->show_status(hton, thd, stat_print, stat))
7854 return TRUE;
7855 return FALSE;
7856 }
7857
ha_show_status(THD * thd,handlerton * db_type,enum ha_stat_type stat)7858 bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
7859 {
7860 List<Item> field_list;
7861 bool result;
7862
7863 field_list.push_back(new Item_empty_string("Type",10));
7864 field_list.push_back(new Item_empty_string("Name",FN_REFLEN));
7865 field_list.push_back(new Item_empty_string("Status",10));
7866
7867 if (thd->send_result_metadata(&field_list,
7868 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
7869 return TRUE;
7870
7871 if (db_type == NULL)
7872 {
7873 result= plugin_foreach(thd, showstat_handlerton,
7874 MYSQL_STORAGE_ENGINE_PLUGIN, &stat);
7875 }
7876 else
7877 {
7878 if (db_type->state != SHOW_OPTION_YES)
7879 {
7880 const LEX_STRING *name=&hton2plugin[db_type->slot]->name;
7881 result= stat_print(thd, name->str, name->length,
7882 "", 0, "DISABLED", 8) ? 1 : 0;
7883 }
7884 else
7885 {
7886 DBUG_EXECUTE_IF("simulate_show_status_failure",
7887 DBUG_SET("+d,simulate_net_write_failure"););
7888 result= db_type->show_status &&
7889 db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
7890 DBUG_EXECUTE_IF("simulate_show_status_failure",
7891 DBUG_SET("-d,simulate_net_write_failure"););
7892 }
7893 }
7894
7895 if (!result)
7896 my_eof(thd);
7897 return result;
7898 }
7899
7900 /*
7901 Function to check if the conditions for row-based binlogging is
7902 correct for the table.
7903
7904 A row in the given table should be replicated if:
7905 - Row-based replication is enabled in the current thread
7906 - The binlog is enabled
7907 - It is not a temporary table
7908 - The binary log is open
7909 - The database the table resides in shall be binlogged (binlog_*_db rules)
7910 - table is not mysql.event
7911 */
7912
check_table_binlog_row_based(THD * thd,TABLE * table)7913 static bool check_table_binlog_row_based(THD *thd, TABLE *table)
7914 {
7915 if (table->s->cached_row_logging_check == -1)
7916 {
7917 int const check(table->s->tmp_table == NO_TMP_TABLE &&
7918 ! table->no_replicate &&
7919 binlog_filter->db_ok(table->s->db.str));
7920 table->s->cached_row_logging_check= check;
7921 }
7922
7923 assert(table->s->cached_row_logging_check == 0 ||
7924 table->s->cached_row_logging_check == 1);
7925
7926 return (thd->is_current_stmt_binlog_format_row() &&
7927 table->s->cached_row_logging_check &&
7928 (thd->variables.option_bits & OPTION_BIN_LOG) &&
7929 #ifdef WITH_WSREP
7930 /* applier and replayer should not binlog */
7931 ((WSREP_EMULATE_BINLOG(thd) && (thd->wsrep_exec_mode != REPL_RECV)) ||
7932 mysql_bin_log.is_open()));
7933 #else
7934 mysql_bin_log.is_open());
7935 #endif
7936 }
7937
7938
7939 /** @brief
7940 Write table maps for all (manually or automatically) locked tables
7941 to the binary log.
7942
7943 SYNOPSIS
7944 write_locked_table_maps()
7945 thd Pointer to THD structure
7946
7947 DESCRIPTION
7948 This function will generate and write table maps for all tables
7949 that are locked by the thread 'thd'.
7950
7951 RETURN VALUE
7952 0 All OK
7953 1 Failed to write all table maps
7954
7955 SEE ALSO
7956 THD::lock
7957 */
7958
write_locked_table_maps(THD * thd)7959 static int write_locked_table_maps(THD *thd)
7960 {
7961 DBUG_ENTER("write_locked_table_maps");
7962 DBUG_PRINT("enter", ("thd: 0x%lx thd->lock: 0x%lx "
7963 "thd->extra_lock: 0x%lx",
7964 (long) thd, (long) thd->lock, (long) thd->extra_lock));
7965
7966 DBUG_PRINT("debug", ("get_binlog_table_maps(): %d", thd->get_binlog_table_maps()));
7967
7968 if (thd->get_binlog_table_maps() == 0)
7969 {
7970 MYSQL_LOCK *locks[2];
7971 locks[0]= thd->extra_lock;
7972 locks[1]= thd->lock;
7973 for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
7974 {
7975 MYSQL_LOCK const *const lock= locks[i];
7976 if (lock == NULL)
7977 continue;
7978
7979 bool need_binlog_rows_query= thd->variables.binlog_rows_query_log_events;
7980 TABLE **const end_ptr= lock->table + lock->table_count;
7981 for (TABLE **table_ptr= lock->table ;
7982 table_ptr != end_ptr ;
7983 ++table_ptr)
7984 {
7985 TABLE *const table= *table_ptr;
7986 DBUG_PRINT("info", ("Checking table %s", table->s->table_name.str));
7987 if (table->current_lock == F_WRLCK &&
7988 check_table_binlog_row_based(thd, table))
7989 {
7990 /*
7991 We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7992 (e.g. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7993 compatible behavior with the STMT based replication even when
7994 the table is not transactional. In other words, if the operation
7995 fails while executing the insert phase nothing is written to the
7996 binlog.
7997
7998 Note that at this point, we check the type of a set of tables to
7999 create the table map events. In the function binlog_log_row(),
8000 which calls the current function, we check the type of the table
8001 of the current row.
8002 */
8003 bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
8004 table->file->has_transactions();
8005 int const error= thd->binlog_write_table_map(table, has_trans,
8006 need_binlog_rows_query);
8007 /* Binlog Rows_query log event once for one statement which updates
8008 two or more tables.*/
8009 if (need_binlog_rows_query)
8010 need_binlog_rows_query= FALSE;
8011 /*
8012 If an error occurs, it is the responsibility of the caller to
8013 roll back the transaction.
8014 */
8015 if (unlikely(error))
8016 DBUG_RETURN(1);
8017 }
8018 }
8019 }
8020 }
8021 DBUG_RETURN(0);
8022 }
8023
8024 typedef bool Log_func(THD*, TABLE*, bool,
8025 const uchar*, const uchar*);
8026
8027 /**
8028
8029 The purpose of an instance of this class is to :
8030
8031 1) Given a TABLE instance, backup the given TABLE::read_set, TABLE::write_set
8032 and restore those members upon this instance disposal.
8033
8034 2) Store a reference to a dynamically allocated buffer and dispose of it upon
8035 this instance disposal.
8036 */
8037
8038 class Binlog_log_row_cleanup
8039 {
8040 public:
8041 /**
8042 This constructor aims to create temporary copies of readset and writeset.
8043 @param table A pointer to TABLE object
8044 @param temp_read_bitmap Temporary BITMAP to store read_set.
8045 @param temp_write_bitmap Temporary BITMAP to store write_set.
8046 */
Binlog_log_row_cleanup(TABLE & table,MY_BITMAP & temp_read_bitmap,MY_BITMAP & temp_write_bitmap)8047 Binlog_log_row_cleanup(TABLE &table, MY_BITMAP &temp_read_bitmap,
8048 MY_BITMAP &temp_write_bitmap)
8049 : m_cleanup_table(table),
8050 m_cleanup_read_bitmap(temp_read_bitmap),
8051 m_cleanup_write_bitmap(temp_write_bitmap)
8052 {
8053 bitmap_copy(&this->m_cleanup_read_bitmap, this->m_cleanup_table.read_set);
8054 bitmap_copy(&this->m_cleanup_write_bitmap, this->m_cleanup_table.write_set);
8055 }
8056
8057 /**
8058 This destructor aims to restore the original readset and writeset and
8059 delete the temporary copies.
8060 */
~Binlog_log_row_cleanup()8061 virtual ~Binlog_log_row_cleanup()
8062 {
8063 bitmap_copy(this->m_cleanup_table.read_set, &this->m_cleanup_read_bitmap);
8064 bitmap_copy(this->m_cleanup_table.write_set, &this->m_cleanup_write_bitmap);
8065 bitmap_free(&this->m_cleanup_read_bitmap);
8066 bitmap_free(&this->m_cleanup_write_bitmap);
8067 }
8068
8069 private:
8070 TABLE &m_cleanup_table; // Creating a TABLE to get access to its members.
8071 MY_BITMAP &m_cleanup_read_bitmap; // Temporary bitmap to store read_set.
8072 MY_BITMAP &m_cleanup_write_bitmap; // Temporary bitmap to store write_set.
8073 };
8074
binlog_log_row(TABLE * table,const uchar * before_record,const uchar * after_record,Log_func * log_func)8075 int binlog_log_row(TABLE* table,
8076 const uchar *before_record,
8077 const uchar *after_record,
8078 Log_func *log_func)
8079 {
8080 bool error= 0;
8081 THD *const thd= table->in_use;
8082
8083 #ifdef WITH_WSREP
8084 /* only InnoDB tables will be replicated through binlog emulation */
8085 if (WSREP_EMULATE_BINLOG(thd) &&
8086 table->file->ht->db_type != DB_TYPE_INNODB &&
8087 !(table->file->ht->db_type == DB_TYPE_PARTITION_DB &&
8088 (((Partition_handler*)(table->file))->wsrep_is_innodb())))
8089 {
8090 return 0;
8091 }
8092
8093 /* enforce wsrep_max_ws_rows */
8094 if (table->s->tmp_table == NO_TMP_TABLE)
8095 {
8096 thd->wsrep_affected_rows++;
8097 if (wsrep_max_ws_rows &&
8098 thd->wsrep_exec_mode != REPL_RECV &&
8099 thd->wsrep_affected_rows > wsrep_max_ws_rows)
8100 {
8101 trans_rollback_stmt(thd) || trans_rollback(thd);
8102 my_message(ER_ERROR_DURING_COMMIT, "wsrep_max_ws_rows exceeded", MYF(0));
8103 return ER_ERROR_DURING_COMMIT;
8104 }
8105 }
8106 #endif /* WITH_WSREP */
8107 if (check_table_binlog_row_based(thd, table))
8108 {
8109 if (thd->variables.transaction_write_set_extraction != HASH_ALGORITHM_OFF)
8110 {
8111 try
8112 {
8113 MY_BITMAP save_read_set;
8114 MY_BITMAP save_write_set;
8115 if (bitmap_init(&save_read_set, NULL, table->s->fields, false) ||
8116 bitmap_init(&save_write_set, NULL, table->s->fields, false))
8117 {
8118 my_error(ER_OUT_OF_RESOURCES, MYF(0));
8119 return HA_ERR_RBR_LOGGING_FAILED;
8120 }
8121
8122 Binlog_log_row_cleanup cleanup_sentry(*table, save_read_set,
8123 save_write_set);
8124 if (thd->variables.binlog_row_image == 0)
8125 {
8126 for (uint key_number= 0; key_number < table->s->keys; ++key_number)
8127 {
8128 if (((table->key_info[key_number].flags & (HA_NOSAME)) ==
8129 HA_NOSAME))
8130 {
8131 table->mark_columns_used_by_index_no_reset(key_number,
8132 table->read_set);
8133 table->mark_columns_used_by_index_no_reset(key_number,
8134 table->write_set);
8135 }
8136 }
8137 }
8138 const uchar *records[]= {after_record, before_record};
8139
8140 for (int record= 0; record < 2; ++record)
8141 {
8142 if (records[record] != NULL)
8143 {
8144 assert(records[record] == table->record[0] ||
8145 records[record] == table->record[1]);
8146 bool res= add_pke(table, thd, records[record]);
8147 if (res) return HA_ERR_RBR_LOGGING_FAILED;
8148 }
8149 }
8150 }
8151 catch (const std::bad_alloc &)
8152 {
8153 my_error(ER_OUT_OF_RESOURCES, MYF(0));
8154 return HA_ERR_RBR_LOGGING_FAILED;
8155 }
8156 }
8157 DBUG_DUMP("read_set 10", (uchar*) table->read_set->bitmap,
8158 (table->s->fields + 7) / 8);
8159
8160 /*
8161 If there are no table maps written to the binary log, this is
8162 the first row handled in this statement. In that case, we need
8163 to write table maps for all locked tables to the binary log.
8164 */
8165 if (likely(!(error= write_locked_table_maps(thd))))
8166 {
8167 /*
8168 We need to have a transactional behavior for SQLCOM_CREATE_TABLE
8169 (i.e. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
8170 compatible behavior with the STMT based replication even when
8171 the table is not transactional. In other words, if the operation
8172 fails while executing the insert phase nothing is written to the
8173 binlog.
8174 */
8175 bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
8176 table->file->has_transactions();
8177 error=
8178 (*log_func)(thd, table, has_trans, before_record, after_record);
8179 }
8180 }
8181 return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
8182 }
8183
ha_external_lock(THD * thd,int lock_type)8184 int handler::ha_external_lock(THD *thd, int lock_type)
8185 {
8186 int error;
8187 DBUG_ENTER("handler::ha_external_lock");
8188 /*
8189 Whether this is lock or unlock, this should be true, and is to verify that
8190 if get_auto_increment() was called (thus may have reserved intervals or
8191 taken a table lock), ha_release_auto_increment() was too.
8192 */
8193 assert(next_insert_id == 0);
8194 /* Consecutive calls for lock without unlocking in between is not allowed */
8195 assert(table_share->tmp_table != NO_TMP_TABLE ||
8196 ((lock_type != F_UNLCK && m_lock_type == F_UNLCK) ||
8197 lock_type == F_UNLCK));
8198 /* SQL HANDLER call locks/unlock while scanning (RND/INDEX). */
8199 assert(inited == NONE || table->open_by_handler);
8200
8201 if (MYSQL_HANDLER_RDLOCK_START_ENABLED() && lock_type == F_RDLCK)
8202 {
8203 MYSQL_HANDLER_RDLOCK_START(table_share->db.str,
8204 table_share->table_name.str);
8205 }
8206 else if (MYSQL_HANDLER_WRLOCK_START_ENABLED() && lock_type == F_WRLCK)
8207 {
8208 MYSQL_HANDLER_WRLOCK_START(table_share->db.str,
8209 table_share->table_name.str);
8210 }
8211 else if (MYSQL_HANDLER_UNLOCK_START_ENABLED() && lock_type == F_UNLCK)
8212 {
8213 MYSQL_HANDLER_UNLOCK_START(table_share->db.str,
8214 table_share->table_name.str);
8215 }
8216
8217 ha_statistic_increment(&SSV::ha_external_lock_count);
8218
8219 MYSQL_TABLE_LOCK_WAIT(PSI_TABLE_EXTERNAL_LOCK, lock_type,
8220 { error= external_lock(thd, lock_type); })
8221
8222 /*
8223 We cache the table flags if the locking succeeded. Otherwise, we
8224 keep them as they were when they were fetched in ha_open().
8225 */
8226
8227 if (error == 0)
8228 {
8229 /*
8230 The lock type is needed by MRR when creating a clone of this handler
8231 object.
8232 */
8233 m_lock_type= lock_type;
8234 cached_table_flags= table_flags();
8235 }
8236
8237 if (MYSQL_HANDLER_RDLOCK_DONE_ENABLED() && lock_type == F_RDLCK)
8238 {
8239 MYSQL_HANDLER_RDLOCK_DONE(error);
8240 }
8241 else if (MYSQL_HANDLER_WRLOCK_DONE_ENABLED() && lock_type == F_WRLCK)
8242 {
8243 MYSQL_HANDLER_WRLOCK_DONE(error);
8244 }
8245 else if (MYSQL_HANDLER_UNLOCK_DONE_ENABLED() && lock_type == F_UNLCK)
8246 {
8247 MYSQL_HANDLER_UNLOCK_DONE(error);
8248 }
8249 DBUG_RETURN(error);
8250 }
8251
8252
8253 /** @brief
8254 Check handler usage and reset state of file to after 'open'
8255
8256 @note can be called regardless of it is locked or not.
8257 */
ha_reset()8258 int handler::ha_reset()
8259 {
8260 DBUG_ENTER("handler::ha_reset");
8261 /* Check that we have called all proper deallocation functions */
8262 assert((uchar*) table->def_read_set.bitmap +
8263 table->s->column_bitmap_size ==
8264 (uchar*) table->def_write_set.bitmap);
8265 assert(bitmap_is_set_all(&table->s->all_set));
8266 assert(table->key_read == 0);
8267 /* ensure that ha_index_end / ha_rnd_end has been called */
8268 assert(inited == NONE);
8269 /* Free cache used by filesort */
8270 free_io_cache(table);
8271 /* reset the bitmaps to point to defaults */
8272 table->default_column_bitmaps();
8273 /* Reset information about pushed engine conditions */
8274 pushed_cond= NULL;
8275 /* Reset information about pushed index conditions */
8276 cancel_pushed_idx_cond();
8277
8278 const int retval= reset();
8279 DBUG_RETURN(retval);
8280 }
8281
8282
ha_write_row(uchar * buf)8283 int handler::ha_write_row(uchar *buf)
8284 {
8285 int error;
8286 Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
8287 assert(table_share->tmp_table != NO_TMP_TABLE ||
8288 m_lock_type == F_WRLCK);
8289
8290 DBUG_ENTER("handler::ha_write_row");
8291 DBUG_EXECUTE_IF("inject_error_ha_write_row",
8292 DBUG_RETURN(HA_ERR_INTERNAL_ERROR); );
8293 DBUG_EXECUTE_IF("simulate_storage_engine_out_of_memory",
8294 DBUG_RETURN(HA_ERR_SE_OUT_OF_MEMORY); );
8295 MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str);
8296 mark_trx_read_write();
8297
8298 DBUG_EXECUTE_IF("handler_crashed_table_on_usage",
8299 my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
8300 set_my_errno(HA_ERR_CRASHED);
8301 DBUG_RETURN(HA_ERR_CRASHED););
8302
8303 MYSQL_TABLE_IO_WAIT(PSI_TABLE_WRITE_ROW, MAX_KEY, error,
8304 { error= write_row(buf); })
8305
8306 MYSQL_INSERT_ROW_DONE(error);
8307 if (unlikely(error))
8308 DBUG_RETURN(error);
8309
8310 if (unlikely((error= binlog_log_row(table, 0, buf, log_func))))
8311 DBUG_RETURN(error); /* purecov: inspected */
8312
8313 DEBUG_SYNC_C("ha_write_row_end");
8314 DBUG_RETURN(0);
8315 }
8316
8317
ha_update_row(const uchar * old_data,uchar * new_data)8318 int handler::ha_update_row(const uchar *old_data, uchar *new_data)
8319 {
8320 int error;
8321 assert(table_share->tmp_table != NO_TMP_TABLE ||
8322 m_lock_type == F_WRLCK);
8323 Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
8324
8325 /*
8326 Some storage engines require that the new record is in record[0]
8327 (and the old record is in record[1]).
8328 */
8329 assert(new_data == table->record[0]);
8330 assert(old_data == table->record[1]);
8331
8332 DBUG_ENTER("hanlder::ha_update_row");
8333 DBUG_EXECUTE_IF("inject_error_ha_update_row",
8334 DBUG_RETURN(HA_ERR_INTERNAL_ERROR); );
8335
8336 MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
8337 mark_trx_read_write();
8338
8339 DBUG_EXECUTE_IF("handler_crashed_table_on_usage",
8340 my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
8341 set_my_errno(HA_ERR_CRASHED);
8342 return(HA_ERR_CRASHED););
8343
8344 MYSQL_TABLE_IO_WAIT(PSI_TABLE_UPDATE_ROW, active_index, error,
8345 { error= update_row(old_data, new_data);})
8346
8347 MYSQL_UPDATE_ROW_DONE(error);
8348 if (unlikely(error))
8349 DBUG_RETURN(error);
8350 if (unlikely((error= binlog_log_row(table, old_data, new_data, log_func))))
8351 DBUG_RETURN(error);
8352 #ifdef WITH_WSREP
8353 THD* thd = table->in_use;
8354 if (WSREP(thd) && table->s->primary_key == MAX_KEY)
8355 {
8356 thd->wsrep_PA_safe= false;
8357 }
8358 #endif /* WITH_WSREP */
8359 DBUG_RETURN(0);
8360 }
8361
ha_delete_row(const uchar * buf)8362 int handler::ha_delete_row(const uchar *buf)
8363 {
8364 int error;
8365 assert(table_share->tmp_table != NO_TMP_TABLE ||
8366 m_lock_type == F_WRLCK);
8367 Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
8368 /*
8369 Normally table->record[0] is used, but sometimes table->record[1] is used.
8370 */
8371 assert(buf == table->record[0] ||
8372 buf == table->record[1]);
8373 DBUG_EXECUTE_IF("inject_error_ha_delete_row",
8374 return HA_ERR_INTERNAL_ERROR; );
8375
8376 DBUG_EXECUTE_IF("handler_crashed_table_on_usage",
8377 my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
8378 set_my_errno(HA_ERR_CRASHED);
8379 return(HA_ERR_CRASHED););
8380
8381 MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
8382 mark_trx_read_write();
8383
8384 MYSQL_TABLE_IO_WAIT(PSI_TABLE_DELETE_ROW, active_index, error,
8385 { error= delete_row(buf);})
8386
8387 MYSQL_DELETE_ROW_DONE(error);
8388 if (unlikely(error))
8389 return error;
8390 if (unlikely((error= binlog_log_row(table, buf, 0, log_func))))
8391 return error;
8392 #ifdef WITH_WSREP
8393 THD* thd = table->in_use;
8394 if (WSREP(thd) && table->s->primary_key == MAX_KEY)
8395 {
8396 thd->wsrep_PA_safe= false;
8397 }
8398 #endif /* WITH_WSREP */
8399 return 0;
8400 }
8401
8402
8403
8404 /** @brief
8405 use_hidden_primary_key() is called in case of an update/delete when
8406 (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
8407 but we don't have a primary key
8408 */
use_hidden_primary_key()8409 void handler::use_hidden_primary_key()
8410 {
8411 /* fallback to use all columns in the table to identify row */
8412 table->use_all_columns();
8413 }
8414
8415
8416 /**
8417 Get an initialized ha_share.
8418
8419 @return Initialized ha_share
8420 @retval NULL ha_share is not yet initialized.
8421 @retval != NULL previous initialized ha_share.
8422
8423 @note
8424 If not a temp table, then LOCK_ha_data must be held.
8425 */
8426
get_ha_share_ptr()8427 Handler_share *handler::get_ha_share_ptr()
8428 {
8429 DBUG_ENTER("handler::get_ha_share_ptr");
8430 assert(ha_share && table_share);
8431
8432 #ifndef NDEBUG
8433 if (table_share->tmp_table == NO_TMP_TABLE)
8434 mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
8435 #endif
8436
8437 DBUG_RETURN(*ha_share);
8438 }
8439
8440
8441 /**
8442 Set ha_share to be used by all instances of the same table/partition.
8443
8444 @param ha_share Handler_share to be shared.
8445
8446 @note
8447 If not a temp table, then LOCK_ha_data must be held.
8448 */
8449
set_ha_share_ptr(Handler_share * arg_ha_share)8450 void handler::set_ha_share_ptr(Handler_share *arg_ha_share)
8451 {
8452 DBUG_ENTER("handler::set_ha_share_ptr");
8453 assert(ha_share);
8454 #ifndef NDEBUG
8455 if (table_share->tmp_table == NO_TMP_TABLE)
8456 mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
8457 #endif
8458
8459 *ha_share= arg_ha_share;
8460 DBUG_VOID_RETURN;
8461 }
8462
8463
8464 /**
8465 Take a lock for protecting shared handler data.
8466 */
8467
lock_shared_ha_data()8468 void handler::lock_shared_ha_data()
8469 {
8470 assert(table_share);
8471 if (table_share->tmp_table == NO_TMP_TABLE)
8472 mysql_mutex_lock(&table_share->LOCK_ha_data);
8473 }
8474
8475
8476 /**
8477 Release lock for protecting ha_share.
8478 */
8479
unlock_shared_ha_data()8480 void handler::unlock_shared_ha_data()
8481 {
8482 assert(table_share);
8483 if (table_share->tmp_table == NO_TMP_TABLE)
8484 mysql_mutex_unlock(&table_share->LOCK_ha_data);
8485 }
8486 #ifdef WITH_WSREP
8487 /**
8488 @details
8489 This function makes the storage engine to force the victim transaction
8490 to abort. Currently, only innodb has this functionality, but any SE
8491 implementing the wsrep API should provide this service to support
8492 multi-master operation.
8493
8494 @param bf_thd brute force THD asking for the abort
8495 @param victim_thd victim THD to be aborted
8496
8497 @return
8498 always 0
8499 */
8500
ha_wsrep_abort_transaction(THD * bf_thd,THD * victim_thd,my_bool signal)8501 int ha_wsrep_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal)
8502 {
8503 DBUG_ENTER("ha_wsrep_abort_transaction");
8504 if (!WSREP(bf_thd) &&
8505 !(bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU &&
8506 bf_thd->wsrep_exec_mode == TOTAL_ORDER)) {
8507 DBUG_RETURN(0);
8508 }
8509
8510 handlerton *hton= installed_htons[DB_TYPE_INNODB];
8511 if (hton && hton->wsrep_abort_transaction)
8512 {
8513 hton->wsrep_abort_transaction(hton, bf_thd, victim_thd, signal);
8514 }
8515 else
8516 {
8517 WSREP_WARN("cannot abort InnoDB transaction");
8518 }
8519
8520 DBUG_RETURN(0);
8521 }
8522
ha_wsrep_fake_trx_id(THD * thd)8523 void ha_wsrep_fake_trx_id(THD *thd)
8524 {
8525 DBUG_ENTER("ha_wsrep_fake_trx_id");
8526 if (!WSREP(thd))
8527 {
8528 DBUG_VOID_RETURN;
8529 }
8530
8531 (void)wsrep_ws_handle_for_trx(&thd->wsrep_ws_handle, thd->query_id);
8532
8533 DBUG_VOID_RETURN;
8534 }
8535 #endif /* WITH_WSREP */
8536
8537
8538 /**
8539 This structure is a helper structure for passing the length and pointer of
8540 blob space allocated by storage engine.
8541 */
8542 struct blob_len_ptr{
8543 uint length; // length of the blob
8544 uchar *ptr; // pointer of the value
8545 };
8546
8547
8548 /**
8549 Get the blob length and pointer of allocated space from the record buffer.
8550
8551 During evaluating the blob virtual generated columns, the blob space will
8552 be allocated by server. In order to keep the blob data after the table is
8553 closed, we need write the data into a specified space allocated by storage
8554 engine. Here, we have to extract the space pointer and length from the
8555 record buffer.
8556 After we get the value of virtual generated columns, copy the data into
8557 the specified space and store it in the record buffer (@see copy_blob_data()).
8558
8559 @param table the pointer of table
8560 @param fields bitmap of field index of evaluated
8561 generated column
8562 @param[out] blob_len_ptr_array an array to record the length and pointer
8563 of allocated space by storage engine.
8564 @note The caller should provide the blob_len_ptr_array with a size of
8565 MAX_FIELDS.
8566 */
8567
extract_blob_space_and_length_from_record_buff(const TABLE * table,const MY_BITMAP * const fields,blob_len_ptr * blob_len_ptr_array)8568 static void extract_blob_space_and_length_from_record_buff(const TABLE *table,
8569 const MY_BITMAP *const fields,
8570 blob_len_ptr *blob_len_ptr_array)
8571 {
8572 int num= 0;
8573 for (Field **vfield= table->vfield; *vfield; vfield++)
8574 {
8575 // Check if this field should be included
8576 if (bitmap_is_set(fields, (*vfield)->field_index) &&
8577 (*vfield)->is_virtual_gcol() && (*vfield)->type() == MYSQL_TYPE_BLOB)
8578 {
8579 blob_len_ptr_array[num].length= (*vfield)->data_length();
8580 // TODO: The following check is only for Innodb.
8581 assert(blob_len_ptr_array[num].length == 255 ||
8582 blob_len_ptr_array[num].length == 768 ||
8583 blob_len_ptr_array[num].length == 3073);
8584
8585 uchar *ptr;
8586 (*vfield)->get_ptr(&ptr);
8587 blob_len_ptr_array[num].ptr= ptr;
8588
8589 // Let server allocate the space for BLOB virtual generated columns
8590 (*vfield)->reset();
8591
8592 num++;
8593 assert(num <= MAX_FIELDS);
8594 }
8595 }
8596 }
8597
8598
8599 /**
8600 Copy the value of BLOB virtual generated columns into the space allocated
8601 by storage engine.
8602
8603 This is because the table is closed after evaluating the value. In order to
8604 keep the BLOB value after the table is closed, we have to copy the value into
8605 the place where storage engine prepares for.
8606
8607 @param table pointer of the table to be operated on
8608 @param fields bitmap of field index of evaluated generated column
8609 @param blob_len_ptr_array array of length and pointer of allocated space by
8610 storage engine.
8611 */
8612
copy_blob_data(const TABLE * table,const MY_BITMAP * const fields,blob_len_ptr * blob_len_ptr_array)8613 static void copy_blob_data(const TABLE *table,
8614 const MY_BITMAP *const fields,
8615 blob_len_ptr *blob_len_ptr_array)
8616 {
8617 uint num= 0;
8618 for (Field **vfield= table->vfield; *vfield; vfield++)
8619 {
8620 // Check if this field should be included
8621 if (bitmap_is_set(fields, (*vfield)->field_index) &&
8622 (*vfield)->is_virtual_gcol() && (*vfield)->type() == MYSQL_TYPE_BLOB)
8623 {
8624 assert(blob_len_ptr_array[num].length > 0);
8625 assert(blob_len_ptr_array[num].ptr != NULL);
8626
8627 /*
8628 Only copy as much of the blob as the storage engine has
8629 allocated space for. This is sufficient since the only use of the
8630 blob in the storage engine is for using a prefix of it in a
8631 secondary index.
8632 */
8633 uint length= (*vfield)->data_length();
8634 const uint alloc_len= blob_len_ptr_array[num].length;
8635 length= length > alloc_len ? alloc_len : length;
8636
8637 uchar *ptr;
8638 (*vfield)->get_ptr(&ptr);
8639 memcpy(blob_len_ptr_array[num].ptr, ptr, length);
8640 (down_cast<Field_blob *>(*vfield))->store_in_allocated_space(
8641 pointer_cast<char *>(blob_len_ptr_array[num].ptr),
8642 length);
8643 num++;
8644 assert(num <= MAX_FIELDS);
8645 }
8646 }
8647 }
8648
8649
8650 /*
8651 Evaluate generated column's value. This is an internal helper reserved for
8652 handler::my_eval_gcolumn_expr().
8653
8654 @param thd pointer of THD
8655 @param table The pointer of table where evaluted generated
8656 columns are in
8657 @param fields bitmap of field index of evaluated generated column
8658 @param[in,out] record record buff of base columns generated column depends.
8659 After calling this function, it will be used to return
8660 the value of generated column.
8661 @param in_purge whehter the function is called by purge thread
8662
8663 @return true in case of error, false otherwise.
8664 */
8665
my_eval_gcolumn_expr_helper(THD * thd,TABLE * table,const MY_BITMAP * const fields,uchar * record,bool in_purge)8666 static bool my_eval_gcolumn_expr_helper(THD *thd, TABLE *table,
8667 const MY_BITMAP *const fields,
8668 uchar *record,
8669 bool in_purge)
8670 {
8671 DBUG_ENTER("my_eval_gcolumn_expr_helper");
8672 assert(table && table->vfield);
8673 assert(!thd->is_error());
8674
8675 uchar *old_buf= table->record[0];
8676 repoint_field_to_record(table, old_buf, record);
8677
8678 blob_len_ptr blob_len_ptr_array[MAX_FIELDS];
8679
8680 /*
8681 If it's purge thread, we need get the space allocated by storage engine
8682 for blob.
8683 */
8684 if (in_purge)
8685 extract_blob_space_and_length_from_record_buff(table, fields,
8686 blob_len_ptr_array);
8687
8688 bool res= false;
8689 MY_BITMAP fields_to_evaluate;
8690 my_bitmap_map bitbuf[bitmap_buffer_size(MAX_FIELDS) / sizeof(my_bitmap_map)];
8691 bitmap_init(&fields_to_evaluate, bitbuf, table->s->fields, 0);
8692 bitmap_set_all(&fields_to_evaluate);
8693 bitmap_intersect(&fields_to_evaluate, fields);
8694 /*
8695 In addition to evaluating the value for the columns requested by
8696 the caller we also need to evaluate any virtual columns that these
8697 depend on.
8698 This loop goes through the columns that should be evaluated and
8699 adds all the base columns. If the base column is virtual, it has
8700 to be evaluated.
8701 */
8702 for (Field **vfield_ptr= table->vfield; *vfield_ptr; vfield_ptr++)
8703 {
8704 Field *field= *vfield_ptr;
8705 // Validate that the field number is less than the bit map size
8706 assert(field->field_index < fields->n_bits);
8707
8708 if (bitmap_is_set(fields, field->field_index))
8709 bitmap_union(&fields_to_evaluate, &field->gcol_info->base_columns_map);
8710 }
8711
8712 /*
8713 Evaluate all requested columns and all base columns these depends
8714 on that are virtual.
8715
8716 This function is called by the storage engine, which may request to
8717 evaluate more generated columns than read_set/write_set says.
8718 For example, InnoDB's row_sel_sec_rec_is_for_clust_rec() reads the full
8719 record from the clustered index and asks us to compute generated columns
8720 that match key fields in the used secondary index. So we trust that the
8721 engine has filled all base columns necessary to requested computations,
8722 and we ignore read_set/write_set.
8723 */
8724
8725 my_bitmap_map *old_maps[2];
8726 dbug_tmp_use_all_columns(table, old_maps,
8727 table->read_set, table->write_set);
8728
8729 for (Field **vfield_ptr= table->vfield; *vfield_ptr; vfield_ptr++)
8730 {
8731 Field *field= *vfield_ptr;
8732
8733 // Check if we should evaluate this field
8734 if (bitmap_is_set(&fields_to_evaluate, field->field_index) &&
8735 field->is_virtual_gcol())
8736 {
8737 assert(field->gcol_info && field->gcol_info->expr_item->fixed);
8738
8739 const type_conversion_status save_in_field_status=
8740 field->gcol_info->expr_item->save_in_field(field, 0);
8741 assert(!thd->is_error() || save_in_field_status != TYPE_OK);
8742
8743 /*
8744 save_in_field() may return non-zero even if there was no
8745 error. This happens if a warning is raised, such as an
8746 out-of-range warning when converting the result to the target
8747 type of the virtual column. We should stop only if the
8748 non-zero return value was caused by an actual error.
8749 */
8750 if (save_in_field_status != TYPE_OK && thd->is_error())
8751 {
8752 res= true;
8753 break;
8754 }
8755 }
8756 }
8757
8758 dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_maps);
8759
8760 /*
8761 If it's a purge thread, we need copy the blob data into specified place
8762 allocated by storage engine so that the blob data still can be accessed
8763 after table is closed.
8764 */
8765 if (in_purge)
8766 copy_blob_data(table, fields, blob_len_ptr_array);
8767
8768 repoint_field_to_record(table, record, old_buf);
8769 DBUG_RETURN(res);
8770 }
8771
8772
8773 /**
8774 Callback to allow InnoDB to prepare a template for generated
8775 column processing. This function will open the table without
8776 opening in the engine and call the provided function with
8777 the TABLE object made. The function will then close the TABLE.
8778
8779 @param thd Thread handle
8780 @param db_name Name of database containing the table
8781 @param table_name Name of table to open
8782 @param myc InnoDB function to call for processing TABLE
8783 @param ib_table Argument for InnoDB function
8784
8785 @return true in case of error, false otherwise.
8786 */
8787
my_prepare_gcolumn_template(THD * thd,const char * db_name,const char * table_name,my_gcolumn_template_callback_t myc,void * ib_table)8788 bool handler::my_prepare_gcolumn_template(THD *thd,
8789 const char *db_name,
8790 const char *table_name,
8791 my_gcolumn_template_callback_t myc,
8792 void* ib_table)
8793 {
8794 char path[FN_REFLEN + 1];
8795 bool was_truncated;
8796 build_table_filename(path, sizeof(path) - 1 - reg_ext_length,
8797 db_name, table_name, "", 0, &was_truncated);
8798 assert(!was_truncated);
8799 lex_start(thd);
8800 bool rc= true;
8801
8802 // Note! The last argument to open_table_uncached() must be false,
8803 // since the table already exists in the TDC. Allowing the table to
8804 // be opened in the SE in this case is dangerous as the two shares
8805 // could get conflicting SE private data.
8806 TABLE *table= open_table_uncached(thd, path, db_name, table_name,
8807 false, false);
8808 if (table)
8809 {
8810 myc(table, ib_table);
8811 intern_close_table(table);
8812 rc= false;
8813 }
8814 lex_end(thd->lex);
8815 return rc;
8816 }
8817
8818
8819 /**
8820 Callback for generated columns processing. Will open the table, in the
8821 server *only*, and call my_eval_gcolumn_expr_helper() to do the actual
8822 processing. This function is a variant of the other
8823 handler::my_eval_gcolumn_expr() but is intended for use when no TABLE
8824 object already exists - e.g. from purge threads.
8825
8826 Note! The call to open_table_uncached() must be made with the last
8827 argument (open_in_engine) set to false. Failing to do so will cause
8828 deadlocks and incorrect behavior.
8829
8830 @param thd Thread handle
8831 @param db_name Database containing the table to open
8832 @param table_name Name of table to open
8833 @param fields Bitmap of field index of evaluated generated column
8834 @param record Record buffer
8835
8836 @return true in case of error, false otherwise.
8837 */
8838
my_eval_gcolumn_expr_with_open(THD * thd,const char * db_name,const char * table_name,const MY_BITMAP * const fields,uchar * record)8839 bool handler::my_eval_gcolumn_expr_with_open(THD *thd,
8840 const char *db_name,
8841 const char *table_name,
8842 const MY_BITMAP *const fields,
8843 uchar *record)
8844 {
8845 bool retval= true;
8846 lex_start(thd);
8847
8848 char path[FN_REFLEN + 1];
8849 bool was_truncated;
8850 build_table_filename(path, sizeof(path) - 1 - reg_ext_length,
8851 db_name, table_name, "", 0, &was_truncated);
8852 assert(!was_truncated);
8853
8854 TABLE *table= open_table_uncached(thd, path, db_name, table_name,
8855 false, false);
8856 if (table)
8857 {
8858 retval= my_eval_gcolumn_expr_helper(thd, table, fields, record, true);
8859 intern_close_table(table);
8860 }
8861
8862 lex_end(thd->lex);
8863 return retval;
8864 }
8865
8866
8867 /**
8868 Evaluate generated Column's value. If the engine has to write an index entry
8869 to its UNDO log (in a DELETE or UPDATE), and the index is on a virtual
8870 generated column, engine needs to calculate the column's value. This variant
8871 of handler::my_eval_gcolumn_expr() is used by client threads which have a
8872 TABLE.
8873
8874 @param thd Thread handle
8875 @param table mysql table object
8876 @param fields bitmap of field index of evaluated
8877 generated column
8878 @param record buff of base columns generated column depends.
8879 After calling this function, it will be used to
8880 return the value of generated column.
8881
8882 @retval true in case of error
8883 @retval false on success.
8884 */
8885
my_eval_gcolumn_expr(THD * thd,TABLE * table,const MY_BITMAP * const fields,uchar * record)8886 bool handler::my_eval_gcolumn_expr(THD *thd, TABLE *table,
8887 const MY_BITMAP *const fields,
8888 uchar *record)
8889 {
8890 DBUG_ENTER("my_eval_gcolumn_expr");
8891
8892 const bool res=
8893 my_eval_gcolumn_expr_helper(thd, table, fields, record, false);
8894 DBUG_RETURN(res);
8895 }
8896
8897
8898 /**
8899 Auxiliary structure for passing information to notify_*_helper()
8900 functions.
8901 */
8902
8903 struct HTON_NOTIFY_PARAMS
8904 {
HTON_NOTIFY_PARAMSHTON_NOTIFY_PARAMS8905 HTON_NOTIFY_PARAMS(const MDL_key *mdl_key,
8906 ha_notification_type mdl_type)
8907 : key(mdl_key), notification_type(mdl_type),
8908 some_htons_were_notified(false),
8909 victimized(false)
8910 {}
8911
8912 const MDL_key *key;
8913 const ha_notification_type notification_type;
8914 bool some_htons_were_notified;
8915 bool victimized;
8916 };
8917
8918
8919 static my_bool
notify_exclusive_mdl_helper(THD * thd,plugin_ref plugin,void * arg)8920 notify_exclusive_mdl_helper(THD *thd, plugin_ref plugin, void *arg)
8921 {
8922 handlerton *hton= plugin_data<handlerton*>(plugin);
8923 if (hton->state == SHOW_OPTION_YES && hton->notify_exclusive_mdl)
8924 {
8925 HTON_NOTIFY_PARAMS *params= reinterpret_cast<HTON_NOTIFY_PARAMS*>(arg);
8926
8927 if (hton->notify_exclusive_mdl(thd, params->key,
8928 params->notification_type,
8929 ¶ms->victimized))
8930 {
8931 // Ignore failures from post event notification.
8932 if (params->notification_type == HA_NOTIFY_PRE_EVENT)
8933 return TRUE;
8934 }
8935 else
8936 params->some_htons_were_notified= true;
8937 }
8938 return FALSE;
8939 }
8940
8941
8942 /**
8943 Notify/get permission from all interested storage engines before
8944 acquisition or after release of exclusive metadata lock on object
8945 represented by key.
8946
8947 @param thd Thread context.
8948 @param mdl_key MDL key identifying object on which exclusive
8949 lock is to be acquired/was released.
8950 @param notification_type Indicates whether this is pre-acquire or
8951 post-release notification.
8952 @param victimized 'true' if locking failed as we were selected
8953 as a victim in order to avoid possible deadlocks.
8954
8955 @note @see handlerton::notify_exclusive_mdl for details about
8956 calling convention and error reporting.
8957
8958 @return False - if notification was successful/lock can be acquired,
8959 True - if it has failed/lock should not be acquired.
8960 */
8961
ha_notify_exclusive_mdl(THD * thd,const MDL_key * mdl_key,ha_notification_type notification_type,bool * victimized)8962 bool ha_notify_exclusive_mdl(THD *thd, const MDL_key *mdl_key,
8963 ha_notification_type notification_type,
8964 bool *victimized)
8965 {
8966 HTON_NOTIFY_PARAMS params(mdl_key, notification_type);
8967 *victimized = false;
8968 if (plugin_foreach(thd, notify_exclusive_mdl_helper,
8969 MYSQL_STORAGE_ENGINE_PLUGIN, ¶ms))
8970 {
8971 *victimized = params.victimized;
8972 /*
8973 If some SE hasn't given its permission to acquire lock and some SEs
8974 has given their permissions, we need to notify the latter group about
8975 failed lock acquisition. We do this by calling post-release notification
8976 for all interested SEs unconditionally.
8977 */
8978 if (notification_type == HA_NOTIFY_PRE_EVENT &&
8979 params.some_htons_were_notified)
8980 {
8981 HTON_NOTIFY_PARAMS rollback_params(mdl_key, HA_NOTIFY_POST_EVENT);
8982 (void) plugin_foreach(thd, notify_exclusive_mdl_helper,
8983 MYSQL_STORAGE_ENGINE_PLUGIN, &rollback_params);
8984 }
8985 return true;
8986 }
8987 return false;
8988 }
8989
8990
8991 static my_bool
notify_alter_table_helper(THD * thd,plugin_ref plugin,void * arg)8992 notify_alter_table_helper(THD *thd, plugin_ref plugin, void *arg)
8993 {
8994 handlerton *hton= plugin_data<handlerton*>(plugin);
8995 if (hton->state == SHOW_OPTION_YES && hton->notify_alter_table)
8996 {
8997 HTON_NOTIFY_PARAMS *params= reinterpret_cast<HTON_NOTIFY_PARAMS*>(arg);
8998
8999 if (hton->notify_alter_table(thd, params->key, params->notification_type))
9000 {
9001 // Ignore failures from post event notification.
9002 if (params->notification_type == HA_NOTIFY_PRE_EVENT)
9003 return TRUE;
9004 }
9005 else
9006 params->some_htons_were_notified= true;
9007 }
9008 return FALSE;
9009 }
9010
9011
9012 /**
9013 Notify/get permission from all interested storage engines before
9014 or after executed ALTER TABLE on the table identified by key.
9015
9016 @param thd Thread context.
9017 @param mdl_key MDL key identifying table.
9018 @param notification_type Indicates whether this is pre-ALTER or
9019 post-ALTER notification.
9020
9021 @note @see handlerton::notify_alter_table for rationale,
9022 details about calling convention and error reporting.
9023
9024 @return False - if notification was successful/ALTER TABLE can
9025 proceed.
9026 True - if it has failed/ALTER TABLE should fail.
9027 */
9028
ha_notify_alter_table(THD * thd,const MDL_key * mdl_key,ha_notification_type notification_type)9029 bool ha_notify_alter_table(THD *thd, const MDL_key *mdl_key,
9030 ha_notification_type notification_type)
9031 {
9032 HTON_NOTIFY_PARAMS params(mdl_key, notification_type);
9033
9034 if (plugin_foreach(thd, notify_alter_table_helper,
9035 MYSQL_STORAGE_ENGINE_PLUGIN, ¶ms))
9036 {
9037 /*
9038 If some SE hasn't given its permission to do ALTER TABLE and some SEs
9039 has given their permissions, we need to notify the latter group about
9040 failed attemopt. We do this by calling post-ALTER TABLE notification
9041 for all interested SEs unconditionally.
9042 */
9043 if (notification_type == HA_NOTIFY_PRE_EVENT &&
9044 params.some_htons_were_notified)
9045 {
9046 HTON_NOTIFY_PARAMS rollback_params(mdl_key, HA_NOTIFY_POST_EVENT);
9047 (void) plugin_foreach(thd, notify_alter_table_helper,
9048 MYSQL_STORAGE_ENGINE_PLUGIN, &rollback_params);
9049 }
9050 return true;
9051 }
9052 return false;
9053 }
9054
9055 /**
9056 Set the transaction isolation level for the next transaction and update
9057 session tracker information about the transaction isolation level.
9058
9059 @param thd THD session setting the tx_isolation.
9060 @param tx_isolation The isolation level to be set.
9061 @param one_shot True if the isolation level should be restored to
9062 session default after finishing the transaction.
9063 */
set_tx_isolation(THD * thd,enum_tx_isolation tx_isolation,bool one_shot)9064 bool set_tx_isolation(THD *thd,
9065 enum_tx_isolation tx_isolation,
9066 bool one_shot)
9067 {
9068 Transaction_state_tracker *tst= NULL;
9069
9070 if (thd->variables.session_track_transaction_info > TX_TRACK_NONE)
9071 tst= (Transaction_state_tracker *)
9072 thd->session_tracker.get_tracker(TRANSACTION_INFO_TRACKER);
9073
9074 thd->tx_isolation= tx_isolation;
9075
9076 if (one_shot)
9077 {
9078 assert(!thd->in_active_multi_stmt_transaction());
9079 assert(!thd->in_sub_stmt);
9080 enum enum_tx_isol_level l;
9081 switch (thd->tx_isolation) {
9082 case ISO_READ_UNCOMMITTED:
9083 l= TX_ISOL_UNCOMMITTED;
9084 break;
9085 case ISO_READ_COMMITTED:
9086 l= TX_ISOL_COMMITTED;
9087 break;
9088 case ISO_REPEATABLE_READ:
9089 l= TX_ISOL_REPEATABLE;
9090 break;
9091 case ISO_SERIALIZABLE:
9092 l= TX_ISOL_SERIALIZABLE;
9093 break;
9094 default:
9095 assert(0);
9096 return true;
9097 }
9098 if (tst)
9099 tst->set_isol_level(thd, l);
9100 }
9101 else if (tst)
9102 {
9103 tst->set_isol_level(thd, TX_ISOL_INHERIT);
9104 }
9105 return false;
9106 }
9107
9108
9109 /**
9110 Checks if the file name is reserved word used by SE by invoking
9111 the handlerton method.
9112
9113 @param unused1 thread handler which is unused.
9114 @param plugin SE plugin.
9115 @param name Database name.
9116
9117 @retval true If the name is reserved word.
9118 @retval false If the name is not reserved word.
9119 */
is_reserved_db_name_handlerton(THD * unused1,plugin_ref plugin,void * name)9120 static my_bool is_reserved_db_name_handlerton(THD *unused1, plugin_ref plugin,
9121 void *name)
9122 {
9123 handlerton *hton= plugin_data<handlerton*>(plugin);
9124 if (hton->state == SHOW_OPTION_YES && hton->is_reserved_db_name)
9125 return (hton->is_reserved_db_name(hton, (const char *)name));
9126 return false;
9127 }
9128
9129
9130 /**
9131 Check if the file name is reserved word used by SE.
9132
9133 @param name Database name.
9134
9135 @retval true If the name is a reserved word.
9136 @retval false If the name is not a reserved word.
9137 */
ha_check_reserved_db_name(const char * name)9138 bool ha_check_reserved_db_name(const char* name)
9139 {
9140 return (plugin_foreach(NULL, is_reserved_db_name_handlerton,
9141 MYSQL_STORAGE_ENGINE_PLUGIN, (char *)name));
9142 }
9143