1 /* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software Foundation,
21 Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22
23 /** @file handler.cc
24
25 @brief
26 Handler-calling-functions
27 */
28
29 #include "binlog.h"
30 #include "sql_priv.h"
31 #include "unireg.h"
32 #include "rpl_handler.h"
33 #include "sql_cache.h" // query_cache, query_cache_*
34 #include "key.h" // key_copy, key_unpack, key_cmp_if_same, key_cmp
35 #include "sql_table.h" // build_table_filename
36 #include "sql_parse.h" // check_stack_overrun
37 #include "sql_acl.h" // SUPER_ACL
38 #include "sql_base.h" // free_io_cache
39 #include "discover.h" // writefrm
40 #include "log_event.h" // *_rows_log_event
41 #include "rpl_filter.h"
42 #include <myisampack.h>
43 #include "transaction.h"
44 #include <errno.h>
45 #include "probes_mysql.h"
46 #include <mysql/psi/mysql_table.h>
47 #include "debug_sync.h" // DEBUG_SYNC
48 #include <my_bit.h>
49 #include <list>
50 #include "global_threads.h"
51
52 #ifdef WITH_PARTITION_STORAGE_ENGINE
53 #include "ha_partition.h"
54 #endif
55
56 using std::min;
57 using std::max;
58 using std::list;
59
60 // This is a temporary backporting fix.
61 #ifndef HAVE_LOG2
62 /*
63 This will be slightly slower and perhaps a tiny bit less accurate than
64 doing it the IEEE754 way but log2() should be available on C99 systems.
65 */
log2(double x)66 inline double log2(double x)
67 {
68 return (log(x) / M_LN2);
69 }
70 #endif
71
72 /*
73 While we have legacy_db_type, we have this array to
74 check for dups and to find handlerton from legacy_db_type.
75 Remove when legacy_db_type is finally gone
76 */
77 st_plugin_int *hton2plugin[MAX_HA];
78
79 static handlerton *installed_htons[128];
80
81 #define BITMAP_STACKBUF_SIZE (128/8)
82
83 KEY_CREATE_INFO default_key_create_info=
84 { HA_KEY_ALG_UNDEF, 0, {NullS, 0}, {NullS, 0}, true };
85
86 /* number of entries in handlertons[] */
87 ulong total_ha= 0;
88 /* number of storage engines (from handlertons[]) that support 2pc */
89 ulong total_ha_2pc= 0;
90 /* size of savepoint storage area (see ha_init) */
91 ulong savepoint_alloc_size= 0;
92
93 static const LEX_STRING sys_table_aliases[]=
94 {
95 { C_STRING_WITH_LEN("INNOBASE") }, { C_STRING_WITH_LEN("INNODB") },
96 { C_STRING_WITH_LEN("NDB") }, { C_STRING_WITH_LEN("NDBCLUSTER") },
97 { C_STRING_WITH_LEN("HEAP") }, { C_STRING_WITH_LEN("MEMORY") },
98 { C_STRING_WITH_LEN("MERGE") }, { C_STRING_WITH_LEN("MRG_MYISAM") },
99 {NullS, 0}
100 };
101
102 const char *ha_row_type[] = {
103 "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT",
104 /* Reserved to be "PAGE" in future versions */ "?",
105 "TOKUDB_UNCOMPRESSED", "TOKUDB_ZLIB", "TOKUDB_SNAPPY", "TOKUDB_QUICKLZ",
106 "TOKUDB_LZMA", "TOKUDB_FAST", "TOKUDB_SMALL", "TOKUDB_DEFAULT",
107 "?","?","?"
108 };
109
110 const char *tx_isolation_names[] =
111 { "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
112 NullS};
113 TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
114 tx_isolation_names, NULL};
115
116 #ifndef DBUG_OFF
117
ha_legacy_type_name(legacy_db_type legacy_type)118 const char *ha_legacy_type_name(legacy_db_type legacy_type)
119 {
120 switch (legacy_type)
121 {
122 case DB_TYPE_UNKNOWN:
123 return "DB_TYPE_UNKNOWN";
124 case DB_TYPE_DIAB_ISAM:
125 return "DB_TYPE_DIAB_ISAM";
126 case DB_TYPE_HASH:
127 return "DB_TYPE_HASH";
128 case DB_TYPE_MISAM:
129 return "DB_TYPE_MISAM";
130 case DB_TYPE_PISAM:
131 return "DB_TYPE_PISAM";
132 case DB_TYPE_RMS_ISAM:
133 return "DB_TYPE_RMS_ISAM";
134 case DB_TYPE_HEAP:
135 return "DB_TYPE_HEAP";
136 case DB_TYPE_ISAM:
137 return "DB_TYPE_ISAM";
138 case DB_TYPE_MRG_ISAM:
139 return "DB_TYPE_MRG_ISAM";
140 case DB_TYPE_MYISAM:
141 return "DB_TYPE_MYISAM";
142 case DB_TYPE_MRG_MYISAM:
143 return "DB_TYPE_MRG_MYISAM";
144 case DB_TYPE_BERKELEY_DB:
145 return "DB_TYPE_BERKELEY_DB";
146 case DB_TYPE_INNODB:
147 return "DB_TYPE_INNODB";
148 case DB_TYPE_GEMINI:
149 return "DB_TYPE_GEMINI";
150 case DB_TYPE_NDBCLUSTER:
151 return "DB_TYPE_NDBCLUSTER";
152 case DB_TYPE_EXAMPLE_DB:
153 return "DB_TYPE_EXAMPLE_DB";
154 case DB_TYPE_ARCHIVE_DB:
155 return "DB_TYPE_ARCHIVE_DB";
156 case DB_TYPE_CSV_DB:
157 return "DB_TYPE_CSV_DB";
158 case DB_TYPE_FEDERATED_DB:
159 return "DB_TYPE_FEDERATED_DB";
160 case DB_TYPE_BLACKHOLE_DB:
161 return "DB_TYPE_BLACKHOLE_DB";
162 case DB_TYPE_PARTITION_DB:
163 return "DB_TYPE_PARTITION_DB";
164 case DB_TYPE_BINLOG:
165 return "DB_TYPE_BINLOG";
166 case DB_TYPE_SOLID:
167 return "DB_TYPE_SOLID";
168 case DB_TYPE_PBXT:
169 return "DB_TYPE_PBXT";
170 case DB_TYPE_TABLE_FUNCTION:
171 return "DB_TYPE_TABLE_FUNCTION";
172 case DB_TYPE_MEMCACHE:
173 return "DB_TYPE_MEMCACHE";
174 case DB_TYPE_FALCON:
175 return "DB_TYPE_FALCON";
176 case DB_TYPE_MARIA:
177 return "DB_TYPE_MARIA";
178 case DB_TYPE_PERFORMANCE_SCHEMA:
179 return "DB_TYPE_PERFORMANCE_SCHEMA";
180 default:
181 return "DB_TYPE_DYNAMIC";
182 }
183 }
184 #endif
185
186 /**
187 Database name that hold most of mysqld system tables.
188 Current code assumes that, there exists only some
189 specific "database name" designated as system database.
190 */
191 const char* mysqld_system_database= "mysql";
192
193 // System tables that belong to mysqld_system_database.
194 st_system_tablename mysqld_system_tables[]= {
195 {mysqld_system_database, "db"},
196 {mysqld_system_database, "user"},
197 {mysqld_system_database, "host"},
198 {mysqld_system_database, "func"},
199 {mysqld_system_database, "proc"},
200 {mysqld_system_database, "event"},
201 {mysqld_system_database, "plugin"},
202 {mysqld_system_database, "servers"},
203 {mysqld_system_database, "procs_priv"},
204 {mysqld_system_database, "tables_priv"},
205 {mysqld_system_database, "proxies_priv"},
206 {mysqld_system_database, "columns_priv"},
207 {mysqld_system_database, "time_zone"},
208 {mysqld_system_database, "time_zone_name"},
209 {mysqld_system_database, "time_zone_leap_second"},
210 {mysqld_system_database, "time_zone_transition"},
211 {mysqld_system_database, "time_zone_transition_type"},
212 {mysqld_system_database, "help_category"},
213 {mysqld_system_database, "help_keyword"},
214 {mysqld_system_database, "help_relation"},
215 {mysqld_system_database, "help_topic"},
216 {(const char *)NULL, (const char *)NULL} /* This must be at the end */
217 };
218
219 /**
220 This static pointer holds list of system databases from SQL layer and
221 various SE's. The required memory is allocated once, and never freed.
222 */
223 static const char **known_system_databases= NULL;
224 static const char **ha_known_system_databases();
225
226 // Called for each SE to get SE specific system database.
227 static my_bool system_databases_handlerton(THD *unused, plugin_ref plugin,
228 void *arg);
229
230 // Called for each SE to check if given db.table_name is a system table.
231 static my_bool check_engine_system_table_handlerton(THD *unused,
232 plugin_ref plugin,
233 void *arg);
234 /**
235 Structure used by SE during check for system table.
236 This structure is passed to each SE handlerton and the status (OUT param)
237 is collected.
238 */
239 struct st_sys_tbl_chk_params
240 {
241 const char *db; // IN param
242 const char *table_name; // IN param
243 bool is_sql_layer_system_table; // IN param
244 legacy_db_type db_type; // IN param
245
246 enum enum_sys_tbl_chk_status
247 {
248 // db.table_name is not a supported system table.
249 NOT_KNOWN_SYSTEM_TABLE,
250 /*
251 db.table_name is a system table,
252 but may not be supported by SE.
253 */
254 KNOWN_SYSTEM_TABLE,
255 /*
256 db.table_name is a system table,
257 and is supported by SE.
258 */
259 SUPPORTED_SYSTEM_TABLE
260 } status; // OUT param
261 };
262
263
ha_default_plugin(THD * thd)264 static plugin_ref ha_default_plugin(THD *thd)
265 {
266 if (thd->variables.table_plugin)
267 return thd->variables.table_plugin;
268 return my_plugin_lock(thd, &global_system_variables.table_plugin);
269 }
270
271
272 /** @brief
273 Return the default storage engine handlerton used for non-temp tables
274 for thread
275
276 SYNOPSIS
277 ha_default_handlerton(thd)
278 thd current thread
279
280 RETURN
281 pointer to handlerton
282 */
ha_default_handlerton(THD * thd)283 handlerton *ha_default_handlerton(THD *thd)
284 {
285 plugin_ref plugin= ha_default_plugin(thd);
286 DBUG_ASSERT(plugin);
287 handlerton *hton= plugin_data(plugin, handlerton*);
288 DBUG_ASSERT(hton);
289 return hton;
290 }
291
292 /** @brief
293 Return the enforced storage engine handlerton for thread
294
295 SYNOPSIS
296 ha_enforce_handlerton(thd)
297 thd current thread
298
299 RETURN
300 pointer to handlerton
301 */
ha_enforce_handlerton(THD * thd)302 handlerton *ha_enforce_handlerton(THD* thd)
303 {
304 if (enforce_storage_engine)
305 {
306 LEX_STRING name= { enforce_storage_engine,
307 strlen(enforce_storage_engine) };
308 plugin_ref plugin= ha_resolve_by_name(thd, &name, FALSE);
309 if (plugin)
310 {
311 handlerton *hton= plugin_data(plugin, handlerton*);
312 DBUG_ASSERT(hton);
313 return hton;
314 }
315 else
316 {
317 my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), enforce_storage_engine,
318 enforce_storage_engine);
319 }
320 }
321 return NULL;
322 }
323
ha_default_temp_plugin(THD * thd)324 static plugin_ref ha_default_temp_plugin(THD *thd)
325 {
326 if (thd->variables.temp_table_plugin)
327 return thd->variables.temp_table_plugin;
328 return my_plugin_lock(thd, &global_system_variables.temp_table_plugin);
329 }
330
331
332 /** @brief
333 Return the default storage engine handlerton used for explicitly
334 created temp tables for a thread
335
336 SYNOPSIS
337 ha_default_temp_handlerton(thd)
338 thd current thread
339
340 RETURN
341 pointer to handlerton
342 */
ha_default_temp_handlerton(THD * thd)343 handlerton *ha_default_temp_handlerton(THD *thd)
344 {
345 plugin_ref plugin= ha_default_temp_plugin(thd);
346 DBUG_ASSERT(plugin);
347 handlerton *hton= plugin_data(plugin, handlerton*);
348 DBUG_ASSERT(hton);
349 return hton;
350 }
351
352
353 /** @brief
354 Return the storage engine handlerton for the supplied name
355
356 SYNOPSIS
357 ha_resolve_by_name(thd, name)
358 thd current thread
359 name name of storage engine
360
361 RETURN
362 pointer to storage engine plugin handle
363 */
ha_resolve_by_name(THD * thd,const LEX_STRING * name,bool is_temp_table)364 plugin_ref ha_resolve_by_name(THD *thd, const LEX_STRING *name,
365 bool is_temp_table)
366 {
367 const LEX_STRING *table_alias;
368 plugin_ref plugin;
369
370 redo:
371 /* my_strnncoll is a macro and gcc doesn't do early expansion of macro */
372 if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1,
373 (const uchar *)name->str, name->length,
374 (const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
375 return is_temp_table ?
376 ha_default_plugin(thd) : ha_default_temp_plugin(thd);
377
378 if ((plugin= my_plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN)))
379 {
380 handlerton *hton= plugin_data(plugin, handlerton *);
381 if (hton && !(hton->flags & HTON_NOT_USER_SELECTABLE))
382 return plugin;
383
384 /*
385 unlocking plugin immediately after locking is relatively low cost.
386 */
387 plugin_unlock(thd, plugin);
388 }
389
390 /*
391 We check for the historical aliases.
392 */
393 for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
394 {
395 if (!my_strnncoll(&my_charset_latin1,
396 (const uchar *)name->str, name->length,
397 (const uchar *)table_alias->str, table_alias->length))
398 {
399 name= table_alias + 1;
400 goto redo;
401 }
402 }
403
404 return NULL;
405 }
406
407
ha_lock_engine(THD * thd,const handlerton * hton)408 plugin_ref ha_lock_engine(THD *thd, const handlerton *hton)
409 {
410 if (hton)
411 {
412 st_plugin_int **plugin= hton2plugin + hton->slot;
413
414 #ifdef DBUG_OFF
415 return my_plugin_lock(thd, plugin);
416 #else
417 return my_plugin_lock(thd, &plugin);
418 #endif
419 }
420 return NULL;
421 }
422
423
ha_resolve_by_legacy_type(THD * thd,enum legacy_db_type db_type)424 handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
425 {
426 plugin_ref plugin;
427 switch (db_type) {
428 case DB_TYPE_DEFAULT:
429 return ha_default_handlerton(thd);
430 default:
431 if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
432 (plugin= ha_lock_engine(thd, installed_htons[db_type])))
433 return plugin_data(plugin, handlerton*);
434 /* fall through */
435 case DB_TYPE_UNKNOWN:
436 return NULL;
437 }
438 }
439
440
441 /**
442 Use other database handler if databasehandler is not compiled in.
443 */
ha_checktype(THD * thd,enum legacy_db_type database_type,bool no_substitute,bool report_error)444 handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type,
445 bool no_substitute, bool report_error)
446 {
447 handlerton *hton= ha_resolve_by_legacy_type(thd, database_type);
448 if (ha_storage_engine_is_enabled(hton))
449 return hton;
450
451 if (no_substitute)
452 {
453 if (report_error)
454 {
455 const char *engine_name= ha_resolve_storage_engine_name(hton);
456 my_error(ER_FEATURE_DISABLED,MYF(0),engine_name,engine_name);
457 }
458 return NULL;
459 }
460
461 (void) RUN_HOOK(transaction, after_rollback, (thd, FALSE));
462
463 switch (database_type) {
464 case DB_TYPE_MRG_ISAM:
465 return ha_resolve_by_legacy_type(thd, DB_TYPE_MRG_MYISAM);
466 default:
467 break;
468 }
469
470 return ha_default_handlerton(thd);
471 } /* ha_checktype */
472
473
get_new_handler(TABLE_SHARE * share,MEM_ROOT * alloc,handlerton * db_type)474 handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
475 handlerton *db_type)
476 {
477 handler *file;
478 DBUG_ENTER("get_new_handler");
479 DBUG_PRINT("enter", ("alloc: 0x%lx", (long) alloc));
480
481 if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create)
482 {
483 if ((file= db_type->create(db_type, share, alloc)))
484 file->init();
485 DBUG_RETURN(file);
486 }
487 /*
488 Try the default table type
489 Here the call to current_thd() is ok as we call this function a lot of
490 times but we enter this branch very seldom.
491 */
492 DBUG_RETURN(get_new_handler(share, alloc, ha_default_handlerton(current_thd)));
493 }
494
495
496 #ifdef WITH_PARTITION_STORAGE_ENGINE
get_ha_partition(partition_info * part_info)497 handler *get_ha_partition(partition_info *part_info)
498 {
499 ha_partition *partition;
500 DBUG_ENTER("get_ha_partition");
501 if ((partition= new ha_partition(partition_hton, part_info)))
502 {
503 if (partition->initialize_partition(current_thd->mem_root))
504 {
505 delete partition;
506 partition= 0;
507 }
508 else
509 partition->init();
510 }
511 else
512 {
513 my_error(ER_OUTOFMEMORY, MYF(ME_FATALERROR),
514 static_cast<int>(sizeof(ha_partition)));
515 }
516 DBUG_RETURN(((handler*) partition));
517 }
518 #endif
519
520
521 static const char **handler_errmsgs;
522
523 C_MODE_START
get_handler_errmsgs()524 static const char **get_handler_errmsgs()
525 {
526 return handler_errmsgs;
527 }
528 C_MODE_END
529
530
531 /**
532 Register handler error messages for use with my_error().
533
534 @retval
535 0 OK
536 @retval
537 !=0 Error
538 */
539
ha_init_errors(void)540 int ha_init_errors(void)
541 {
542 #define SETMSG(nr, msg) handler_errmsgs[(nr) - HA_ERR_FIRST]= (msg)
543
544 /* Allocate a pointer array for the error message strings. */
545 /* Zerofill it to avoid uninitialized gaps. */
546 if (! (handler_errmsgs= (const char**) my_malloc(HA_ERR_ERRORS * sizeof(char*),
547 MYF(MY_WME | MY_ZEROFILL))))
548 return 1;
549
550 /* Set the dedicated error messages. */
551 SETMSG(HA_ERR_KEY_NOT_FOUND, ER_DEFAULT(ER_KEY_NOT_FOUND));
552 SETMSG(HA_ERR_FOUND_DUPP_KEY, ER_DEFAULT(ER_DUP_KEY));
553 SETMSG(HA_ERR_RECORD_CHANGED, "Update wich is recoverable");
554 SETMSG(HA_ERR_WRONG_INDEX, "Wrong index given to function");
555 SETMSG(HA_ERR_CRASHED, ER_DEFAULT(ER_NOT_KEYFILE));
556 SETMSG(HA_ERR_WRONG_IN_RECORD, ER_DEFAULT(ER_CRASHED_ON_USAGE));
557 SETMSG(HA_ERR_OUT_OF_MEM, "Table handler out of memory");
558 SETMSG(HA_ERR_NOT_A_TABLE, "Incorrect file format '%.64s'");
559 SETMSG(HA_ERR_WRONG_COMMAND, "Command not supported");
560 SETMSG(HA_ERR_OLD_FILE, ER_DEFAULT(ER_OLD_KEYFILE));
561 SETMSG(HA_ERR_NO_ACTIVE_RECORD, "No record read in update");
562 SETMSG(HA_ERR_RECORD_DELETED, "Intern record deleted");
563 SETMSG(HA_ERR_RECORD_FILE_FULL, ER_DEFAULT(ER_RECORD_FILE_FULL));
564 SETMSG(HA_ERR_INDEX_FILE_FULL, "No more room in index file '%.64s'");
565 SETMSG(HA_ERR_END_OF_FILE, "End in next/prev/first/last");
566 SETMSG(HA_ERR_UNSUPPORTED, ER_DEFAULT(ER_ILLEGAL_HA));
567 SETMSG(HA_ERR_TO_BIG_ROW, "Too big row");
568 SETMSG(HA_WRONG_CREATE_OPTION, "Wrong create option");
569 SETMSG(HA_ERR_FOUND_DUPP_UNIQUE, ER_DEFAULT(ER_DUP_UNIQUE));
570 SETMSG(HA_ERR_UNKNOWN_CHARSET, "Can't open charset");
571 SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF, ER_DEFAULT(ER_WRONG_MRG_TABLE));
572 SETMSG(HA_ERR_CRASHED_ON_REPAIR, ER_DEFAULT(ER_CRASHED_ON_REPAIR));
573 SETMSG(HA_ERR_CRASHED_ON_USAGE, ER_DEFAULT(ER_CRASHED_ON_USAGE));
574 SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT, ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT));
575 SETMSG(HA_ERR_LOCK_TABLE_FULL, ER_DEFAULT(ER_LOCK_TABLE_FULL));
576 SETMSG(HA_ERR_READ_ONLY_TRANSACTION, ER_DEFAULT(ER_READ_ONLY_TRANSACTION));
577 SETMSG(HA_ERR_LOCK_DEADLOCK, ER_DEFAULT(ER_LOCK_DEADLOCK));
578 SETMSG(HA_ERR_CANNOT_ADD_FOREIGN, ER_DEFAULT(ER_CANNOT_ADD_FOREIGN));
579 SETMSG(HA_ERR_NO_REFERENCED_ROW, ER_DEFAULT(ER_NO_REFERENCED_ROW_2));
580 SETMSG(HA_ERR_ROW_IS_REFERENCED, ER_DEFAULT(ER_ROW_IS_REFERENCED_2));
581 SETMSG(HA_ERR_NO_SAVEPOINT, "No savepoint with that name");
582 SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE, "Non unique key block size");
583 SETMSG(HA_ERR_NO_SUCH_TABLE, "No such table: '%.64s'");
584 SETMSG(HA_ERR_TABLE_EXIST, ER_DEFAULT(ER_TABLE_EXISTS_ERROR));
585 SETMSG(HA_ERR_NO_CONNECTION, "Could not connect to storage engine");
586 SETMSG(HA_ERR_TABLE_DEF_CHANGED, ER_DEFAULT(ER_TABLE_DEF_CHANGED));
587 SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY, "FK constraint would lead to duplicate key");
588 SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE, ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE));
589 SETMSG(HA_ERR_TABLE_READONLY, ER_DEFAULT(ER_OPEN_AS_READONLY));
590 SETMSG(HA_ERR_AUTOINC_READ_FAILED, ER_DEFAULT(ER_AUTOINC_READ_FAILED));
591 SETMSG(HA_ERR_AUTOINC_ERANGE, ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
592 SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
593 SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
594 SETMSG(HA_ERR_INDEX_CORRUPT, ER_DEFAULT(ER_INDEX_CORRUPT));
595 SETMSG(HA_FTS_INVALID_DOCID, "Invalid InnoDB FTS Doc ID");
596 SETMSG(HA_ERR_TABLE_IN_FK_CHECK, ER_DEFAULT(ER_TABLE_IN_FK_CHECK));
597 SETMSG(HA_ERR_TABLESPACE_EXISTS, "Tablespace already exists");
598 SETMSG(HA_ERR_FTS_EXCEED_RESULT_CACHE_LIMIT, "FTS query exceeds result cache limit");
599 SETMSG(HA_ERR_TEMP_FILE_WRITE_FAILURE, ER_DEFAULT(ER_TEMP_FILE_WRITE_FAILURE));
600 SETMSG(HA_ERR_INNODB_FORCED_RECOVERY, ER_DEFAULT(ER_INNODB_FORCED_RECOVERY));
601 SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE, "Too many words in a FTS phrase or proximity search");
602 SETMSG(HA_ERR_FTS_TOO_MANY_NESTED_EXP, "Too many nested sub-expressions in a full-text search");
603 /* Register the error messages for use with my_error(). */
604 return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
605 }
606
607
608 /**
609 Unregister handler error messages.
610
611 @retval
612 0 OK
613 @retval
614 !=0 Error
615 */
ha_finish_errors(void)616 static int ha_finish_errors(void)
617 {
618 const char **errmsgs;
619
620 /* Allocate a pointer array for the error message strings. */
621 if (! (errmsgs= my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST)))
622 return 1;
623 my_free(errmsgs);
624 return 0;
625 }
626
627
ha_finalize_handlerton(st_plugin_int * plugin)628 int ha_finalize_handlerton(st_plugin_int *plugin)
629 {
630 handlerton *hton= (handlerton *)plugin->data;
631 DBUG_ENTER("ha_finalize_handlerton");
632
633 /* hton can be NULL here, if ha_initialize_handlerton() failed. */
634 if (!hton)
635 goto end;
636
637 switch (hton->state)
638 {
639 case SHOW_OPTION_NO:
640 case SHOW_OPTION_DISABLED:
641 break;
642 case SHOW_OPTION_YES:
643 if (installed_htons[hton->db_type] == hton)
644 installed_htons[hton->db_type]= NULL;
645 break;
646 };
647
648 if (hton->panic)
649 hton->panic(hton, HA_PANIC_CLOSE);
650
651 if (plugin->plugin->deinit)
652 {
653 /*
654 Today we have no defined/special behavior for uninstalling
655 engine plugins.
656 */
657 DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str));
658 if (plugin->plugin->deinit(NULL))
659 {
660 DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.",
661 plugin->name.str));
662 }
663 }
664
665 /*
666 In case a plugin is uninstalled and re-installed later, it should
667 reuse an array slot. Otherwise the number of uninstall/install
668 cycles would be limited.
669 */
670 if (hton->slot != HA_SLOT_UNDEF)
671 {
672 /* Make sure we are not unpluging another plugin */
673 DBUG_ASSERT(hton2plugin[hton->slot] == plugin);
674 DBUG_ASSERT(hton->slot < MAX_HA);
675 hton2plugin[hton->slot]= NULL;
676 }
677
678 my_free(hton);
679
680 end:
681 DBUG_RETURN(0);
682 }
683
684
ha_initialize_handlerton(st_plugin_int * plugin)685 int ha_initialize_handlerton(st_plugin_int *plugin)
686 {
687 handlerton *hton;
688 DBUG_ENTER("ha_initialize_handlerton");
689 DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str));
690
691 hton= (handlerton *)my_malloc(sizeof(handlerton),
692 MYF(MY_WME | MY_ZEROFILL));
693
694 if (hton == NULL)
695 {
696 sql_print_error("Unable to allocate memory for plugin '%s' handlerton.",
697 plugin->name.str);
698 goto err_no_hton_memory;
699 }
700
701 hton->slot= HA_SLOT_UNDEF;
702 /* Historical Requirement */
703 plugin->data= hton; // shortcut for the future
704 if (plugin->plugin->init && plugin->plugin->init(hton))
705 {
706 sql_print_error("Plugin '%s' init function returned error.",
707 plugin->name.str);
708 goto err;
709 }
710
711 /*
712 the switch below and hton->state should be removed when
713 command-line options for plugins will be implemented
714 */
715 DBUG_PRINT("info", ("hton->state=%d", hton->state));
716 switch (hton->state) {
717 case SHOW_OPTION_NO:
718 break;
719 case SHOW_OPTION_YES:
720 {
721 uint tmp;
722 ulong fslot;
723 /* now check the db_type for conflict */
724 if (hton->db_type <= DB_TYPE_UNKNOWN ||
725 hton->db_type >= DB_TYPE_DEFAULT ||
726 installed_htons[hton->db_type])
727 {
728 int idx= (int) DB_TYPE_FIRST_DYNAMIC;
729
730 while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
731 idx++;
732
733 if (idx == (int) DB_TYPE_DEFAULT)
734 {
735 sql_print_warning("Too many storage engines!");
736 goto err_deinit;
737 }
738 if (hton->db_type != DB_TYPE_UNKNOWN)
739 sql_print_warning("Storage engine '%s' has conflicting typecode. "
740 "Assigning value %d.", plugin->plugin->name, idx);
741 hton->db_type= (enum legacy_db_type) idx;
742 }
743
744 /*
745 In case a plugin is uninstalled and re-installed later, it should
746 reuse an array slot. Otherwise the number of uninstall/install
747 cycles would be limited. So look for a free slot.
748 */
749 DBUG_PRINT("plugin", ("total_ha: %lu", total_ha));
750 for (fslot= 0; fslot < total_ha; fslot++)
751 {
752 if (!hton2plugin[fslot])
753 break;
754 }
755 if (fslot < total_ha)
756 hton->slot= fslot;
757 else
758 {
759 if (total_ha >= MAX_HA)
760 {
761 sql_print_error("Too many plugins loaded. Limit is %lu. "
762 "Failed on '%s'", (ulong) MAX_HA, plugin->name.str);
763 goto err_deinit;
764 }
765 hton->slot= total_ha++;
766 }
767 installed_htons[hton->db_type]= hton;
768 tmp= hton->savepoint_offset;
769 hton->savepoint_offset= savepoint_alloc_size;
770 savepoint_alloc_size+= tmp;
771 hton2plugin[hton->slot]=plugin;
772 if (hton->prepare)
773 total_ha_2pc++;
774 break;
775 }
776 /* fall through */
777 default:
778 hton->state= SHOW_OPTION_DISABLED;
779 break;
780 }
781
782 /*
783 This is entirely for legacy. We will create a new "disk based" hton and a
784 "memory" hton which will be configurable longterm. We should be able to
785 remove partition and myisammrg.
786 */
787 switch (hton->db_type) {
788 case DB_TYPE_HEAP:
789 heap_hton= hton;
790 break;
791 case DB_TYPE_MYISAM:
792 myisam_hton= hton;
793 break;
794 case DB_TYPE_PARTITION_DB:
795 partition_hton= hton;
796 break;
797 default:
798 break;
799 };
800
801 DBUG_RETURN(0);
802
803 err_deinit:
804 /*
805 Let plugin do its inner deinitialization as plugin->init()
806 was successfully called before.
807 */
808 if (plugin->plugin->deinit)
809 (void) plugin->plugin->deinit(NULL);
810
811 err:
812 my_free(hton);
813 err_no_hton_memory:
814 plugin->data= NULL;
815 DBUG_RETURN(1);
816 }
817
ha_init()818 int ha_init()
819 {
820 int error= 0;
821 DBUG_ENTER("ha_init");
822
823 DBUG_ASSERT(total_ha < MAX_HA);
824 /*
825 Check if there is a transaction-capable storage engine besides the
826 binary log (which is considered a transaction-capable storage engine in
827 counting total_ha)
828 */
829 opt_using_transactions= total_ha>(ulong)opt_bin_log;
830 savepoint_alloc_size+= sizeof(SAVEPOINT);
831
832 /*
833 Initialize system database name cache.
834 This cache is used to do a quick check if a given
835 db.tablename is a system table.
836 */
837 known_system_databases= ha_known_system_databases();
838
839 DBUG_RETURN(error);
840 }
841
ha_end()842 int ha_end()
843 {
844 int error= 0;
845 DBUG_ENTER("ha_end");
846
847
848 /*
849 This should be eventualy based on the graceful shutdown flag.
850 So if flag is equal to HA_PANIC_CLOSE, the deallocate
851 the errors.
852 */
853 if (ha_finish_errors())
854 error= 1;
855
856 DBUG_RETURN(error);
857 }
858
dropdb_handlerton(THD * unused1,plugin_ref plugin,void * path)859 static my_bool dropdb_handlerton(THD *unused1, plugin_ref plugin,
860 void *path)
861 {
862 handlerton *hton= plugin_data(plugin, handlerton *);
863 if (hton->state == SHOW_OPTION_YES && hton->drop_database)
864 hton->drop_database(hton, (char *)path);
865 return FALSE;
866 }
867
868
ha_drop_database(char * path)869 void ha_drop_database(char* path)
870 {
871 plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
872 }
873
874
closecon_handlerton(THD * thd,plugin_ref plugin,void * unused)875 static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
876 void *unused)
877 {
878 handlerton *hton= plugin_data(plugin, handlerton *);
879 /*
880 there's no need to rollback here as all transactions must
881 be rolled back already
882 */
883 if (hton->state == SHOW_OPTION_YES && thd_get_ha_data(thd, hton))
884 {
885 if (hton->close_connection)
886 hton->close_connection(hton, thd);
887 /* make sure ha_data is reset and ha_data_lock is released */
888 thd_set_ha_data(thd, hton, NULL);
889 }
890 return FALSE;
891 }
892
893
894 /**
895 @note
896 don't bother to rollback here, it's done already
897 */
ha_close_connection(THD * thd)898 void ha_close_connection(THD* thd)
899 {
900 plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
901 }
902
kill_handlerton(THD * thd,plugin_ref plugin,void *)903 static my_bool kill_handlerton(THD *thd, plugin_ref plugin, void *)
904 {
905 handlerton *hton= plugin_data(plugin, handlerton *);
906
907 if (hton->state == SHOW_OPTION_YES && hton->kill_connection)
908 {
909 if (thd_get_ha_data(thd, hton))
910 hton->kill_connection(hton, thd);
911 }
912
913 return FALSE;
914 }
915
ha_kill_connection(THD * thd)916 void ha_kill_connection(THD *thd)
917 {
918 plugin_foreach(thd, kill_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
919 }
920
921 /* ========================================================================
922 ======================= TRANSACTIONS ===================================*/
923
924 /**
925 Transaction handling in the server
926 ==================================
927
928 In each client connection, MySQL maintains two transactional
929 states:
930 - a statement transaction,
931 - a standard, also called normal transaction.
932
933 Historical note
934 ---------------
935 "Statement transaction" is a non-standard term that comes
936 from the times when MySQL supported BerkeleyDB storage engine.
937
938 First of all, it should be said that in BerkeleyDB auto-commit
939 mode auto-commits operations that are atomic to the storage
940 engine itself, such as a write of a record, and are too
941 high-granular to be atomic from the application perspective
942 (MySQL). One SQL statement could involve many BerkeleyDB
943 auto-committed operations and thus BerkeleyDB auto-commit was of
944 little use to MySQL.
945
946 Secondly, instead of SQL standard savepoints, BerkeleyDB
947 provided the concept of "nested transactions". In a nutshell,
948 transactions could be arbitrarily nested, but when the parent
949 transaction was committed or aborted, all its child (nested)
950 transactions were handled committed or aborted as well.
951 Commit of a nested transaction, in turn, made its changes
952 visible, but not durable: it destroyed the nested transaction,
953 all its changes would become available to the parent and
954 currently active nested transactions of this parent.
955
956 So the mechanism of nested transactions was employed to
957 provide "all or nothing" guarantee of SQL statements
958 required by the standard.
959 A nested transaction would be created at start of each SQL
960 statement, and destroyed (committed or aborted) at statement
961 end. Such nested transaction was internally referred to as
962 a "statement transaction" and gave birth to the term.
963
964 (Historical note ends)
965
966 Since then a statement transaction is started for each statement
967 that accesses transactional tables or uses the binary log. If
968 the statement succeeds, the statement transaction is committed.
969 If the statement fails, the transaction is rolled back. Commits
970 of statement transactions are not durable -- each such
971 transaction is nested in the normal transaction, and if the
972 normal transaction is rolled back, the effects of all enclosed
973 statement transactions are undone as well. Technically,
974 a statement transaction can be viewed as a savepoint which is
975 maintained automatically in order to make effects of one
976 statement atomic.
977
978 The normal transaction is started by the user and is ended
979 usually upon a user request as well. The normal transaction
980 encloses transactions of all statements issued between
981 its beginning and its end.
982 In autocommit mode, the normal transaction is equivalent
983 to the statement transaction.
984
985 Since MySQL supports PSEA (pluggable storage engine
986 architecture), more than one transactional engine can be
987 active at a time. Hence transactions, from the server
988 point of view, are always distributed. In particular,
989 transactional state is maintained independently for each
990 engine. In order to commit a transaction the two phase
991 commit protocol is employed.
992
993 Not all statements are executed in context of a transaction.
994 Administrative and status information statements do not modify
995 engine data, and thus do not start a statement transaction and
996 also have no effect on the normal transaction. Examples of such
997 statements are SHOW STATUS and RESET SLAVE.
998
999 Similarly DDL statements are not transactional,
1000 and therefore a transaction is [almost] never started for a DDL
1001 statement. The difference between a DDL statement and a purely
1002 administrative statement though is that a DDL statement always
1003 commits the current transaction before proceeding, if there is
1004 any.
1005
1006 At last, SQL statements that work with non-transactional
1007 engines also have no effect on the transaction state of the
1008 connection. Even though they are written to the binary log,
1009 and the binary log is, overall, transactional, the writes
1010 are done in "write-through" mode, directly to the binlog
1011 file, followed with a OS cache sync, in other words,
1012 bypassing the binlog undo log (translog).
1013 They do not commit the current normal transaction.
1014 A failure of a statement that uses non-transactional tables
1015 would cause a rollback of the statement transaction, but
1016 in case there no non-transactional tables are used,
1017 no statement transaction is started.
1018
1019 Data layout
1020 -----------
1021
1022 The server stores its transaction-related data in
1023 thd->transaction. This structure has two members of type
1024 THD_TRANS. These members correspond to the statement and
1025 normal transactions respectively:
1026
1027 - thd->transaction.stmt contains a list of engines
1028 that are participating in the given statement
1029 - thd->transaction.all contains a list of engines that
1030 have participated in any of the statement transactions started
1031 within the context of the normal transaction.
1032 Each element of the list contains a pointer to the storage
1033 engine, engine-specific transactional data, and engine-specific
1034 transaction flags.
1035
1036 In autocommit mode thd->transaction.all is empty.
1037 Instead, data of thd->transaction.stmt is
1038 used to commit/rollback the normal transaction.
1039
1040 The list of registered engines has a few important properties:
1041 - no engine is registered in the list twice
1042 - engines are present in the list a reverse temporal order --
1043 new participants are always added to the beginning of the list.
1044
1045 Transaction life cycle
1046 ----------------------
1047
1048 When a new connection is established, thd->transaction
1049 members are initialized to an empty state.
1050 If a statement uses any tables, all affected engines
1051 are registered in the statement engine list. In
1052 non-autocommit mode, the same engines are registered in
1053 the normal transaction list.
1054 At the end of the statement, the server issues a commit
1055 or a roll back for all engines in the statement list.
1056 At this point transaction flags of an engine, if any, are
1057 propagated from the statement list to the list of the normal
1058 transaction.
1059 When commit/rollback is finished, the statement list is
1060 cleared. It will be filled in again by the next statement,
1061 and emptied again at the next statement's end.
1062
1063 The normal transaction is committed in a similar way
1064 (by going over all engines in thd->transaction.all list)
1065 but at different times:
1066 - upon COMMIT SQL statement is issued by the user
1067 - implicitly, by the server, at the beginning of a DDL statement
1068 or SET AUTOCOMMIT={0|1} statement.
1069
1070 The normal transaction can be rolled back as well:
1071 - if the user has requested so, by issuing ROLLBACK SQL
1072 statement
1073 - if one of the storage engines requested a rollback
1074 by setting thd->transaction_rollback_request. This may
1075 happen in case, e.g., when the transaction in the engine was
1076 chosen a victim of the internal deadlock resolution algorithm
1077 and rolled back internally. When such a situation happens, there
1078 is little the server can do and the only option is to rollback
1079 transactions in all other participating engines. In this case
1080 the rollback is accompanied by an error sent to the user.
1081
1082 As follows from the use cases above, the normal transaction
1083 is never committed when there is an outstanding statement
1084 transaction. In most cases there is no conflict, since
1085 commits of the normal transaction are issued by a stand-alone
1086 administrative or DDL statement, thus no outstanding statement
1087 transaction of the previous statement exists. Besides,
1088 all statements that manipulate with the normal transaction
1089 are prohibited in stored functions and triggers, therefore
1090 no conflicting situation can occur in a sub-statement either.
1091 The remaining rare cases when the server explicitly has
1092 to commit the statement transaction prior to committing the normal
1093 one cover error-handling scenarios (see for example
1094 SQLCOM_LOCK_TABLES).
1095
1096 When committing a statement or a normal transaction, the server
1097 either uses the two-phase commit protocol, or issues a commit
1098 in each engine independently. The two-phase commit protocol
1099 is used only if:
1100 - all participating engines support two-phase commit (provide
1101 handlerton::prepare PSEA API call) and
1102 - transactions in at least two engines modify data (i.e. are
1103 not read-only).
1104
1105 Note that the two phase commit is used for
1106 statement transactions, even though they are not durable anyway.
1107 This is done to ensure logical consistency of data in a multiple-
1108 engine transaction.
1109 For example, imagine that some day MySQL supports unique
1110 constraint checks deferred till the end of statement. In such
1111 case a commit in one of the engines may yield ER_DUP_KEY,
1112 and MySQL should be able to gracefully abort statement
1113 transactions of other participants.
1114
1115 After the normal transaction has been committed,
1116 thd->transaction.all list is cleared.
1117
1118 When a connection is closed, the current normal transaction, if
1119 any, is rolled back.
1120
1121 Roles and responsibilities
1122 --------------------------
1123
1124 The server has no way to know that an engine participates in
1125 the statement and a transaction has been started
1126 in it unless the engine says so. Thus, in order to be
1127 a part of a transaction, the engine must "register" itself.
1128 This is done by invoking trans_register_ha() server call.
1129 Normally the engine registers itself whenever handler::external_lock()
1130 is called. trans_register_ha() can be invoked many times: if
1131 an engine is already registered, the call does nothing.
1132 In case autocommit is not set, the engine must register itself
1133 twice -- both in the statement list and in the normal transaction
1134 list.
1135 In which list to register is a parameter of trans_register_ha().
1136
1137 Note, that although the registration interface in itself is
1138 fairly clear, the current usage practice often leads to undesired
1139 effects. E.g. since a call to trans_register_ha() in most engines
1140 is embedded into implementation of handler::external_lock(), some
1141 DDL statements start a transaction (at least from the server
1142 point of view) even though they are not expected to. E.g.
1143 CREATE TABLE does not start a transaction, since
1144 handler::external_lock() is never called during CREATE TABLE. But
1145 CREATE TABLE ... SELECT does, since handler::external_lock() is
1146 called for the table that is being selected from. This has no
1147 practical effects currently, but must be kept in mind
1148 nevertheless.
1149
1150 Once an engine is registered, the server will do the rest
1151 of the work.
1152
1153 During statement execution, whenever any of data-modifying
1154 PSEA API methods is used, e.g. handler::write_row() or
1155 handler::update_row(), the read-write flag is raised in the
1156 statement transaction for the involved engine.
1157 Currently All PSEA calls are "traced", and the data can not be
1158 changed in a way other than issuing a PSEA call. Important:
1159 unless this invariant is preserved the server will not know that
1160 a transaction in a given engine is read-write and will not
1161 involve the two-phase commit protocol!
1162
1163 At the end of a statement, server call trans_commit_stmt is
1164 invoked. This call in turn invokes handlerton::prepare()
1165 for every involved engine. Prepare is followed by a call
1166 to handlerton::commit_one_phase() If a one-phase commit
1167 will suffice, handlerton::prepare() is not invoked and
1168 the server only calls handlerton::commit_one_phase().
1169 At statement commit, the statement-related read-write
1170 engine flag is propagated to the corresponding flag in the
1171 normal transaction. When the commit is complete, the list
1172 of registered engines is cleared.
1173
1174 Rollback is handled in a similar fashion.
1175
1176 Additional notes on DDL and the normal transaction.
1177 ---------------------------------------------------
1178
1179 DDLs and operations with non-transactional engines
1180 do not "register" in thd->transaction lists, and thus do not
1181 modify the transaction state. Besides, each DDL in
1182 MySQL is prefixed with an implicit normal transaction commit
1183 (a call to trans_commit_implicit()), and thus leaves nothing
1184 to modify.
1185 However, as it has been pointed out with CREATE TABLE .. SELECT,
1186 some DDL statements can start a *new* transaction.
1187
1188 Behaviour of the server in this case is currently badly
1189 defined.
1190 DDL statements use a form of "semantic" logging
1191 to maintain atomicity: if CREATE TABLE .. SELECT failed,
1192 the newly created table is deleted.
1193 In addition, some DDL statements issue interim transaction
1194 commits: e.g. ALTER TABLE issues a commit after data is copied
1195 from the original table to the internal temporary table. Other
1196 statements, e.g. CREATE TABLE ... SELECT do not always commit
1197 after itself.
1198 And finally there is a group of DDL statements such as
1199 RENAME/DROP TABLE that doesn't start a new transaction
1200 and doesn't commit.
1201
1202 This diversity makes it hard to say what will happen if
1203 by chance a stored function is invoked during a DDL --
1204 whether any modifications it makes will be committed or not
1205 is not clear. Fortunately, SQL grammar of few DDLs allows
1206 invocation of a stored function.
1207
1208 A consistent behaviour is perhaps to always commit the normal
1209 transaction after all DDLs, just like the statement transaction
1210 is always committed at the end of all statements.
1211 */
1212
1213 /**
1214 Register a storage engine for a transaction.
1215
1216 Every storage engine MUST call this function when it starts
1217 a transaction or a statement (that is it must be called both for the
1218 "beginning of transaction" and "beginning of statement").
1219 Only storage engines registered for the transaction/statement
1220 will know when to commit/rollback it.
1221
1222 @note
1223 trans_register_ha is idempotent - storage engine may register many
1224 times per transaction.
1225
1226 */
trans_register_ha(THD * thd,bool all,handlerton * ht_arg)1227 void trans_register_ha(THD *thd, bool all, handlerton *ht_arg)
1228 {
1229 THD_TRANS *trans;
1230 Ha_trx_info *ha_info;
1231 DBUG_ENTER("trans_register_ha");
1232 DBUG_PRINT("enter",("%s", all ? "all" : "stmt"));
1233
1234 if (all)
1235 {
1236 trans= &thd->transaction.all;
1237 thd->server_status|= SERVER_STATUS_IN_TRANS;
1238 if (thd->tx_read_only)
1239 thd->server_status|= SERVER_STATUS_IN_TRANS_READONLY;
1240 DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS"));
1241 }
1242 else
1243 trans= &thd->transaction.stmt;
1244
1245 ha_info= thd->ha_data[ht_arg->slot].ha_info + (all ? 1 : 0);
1246
1247 if (ha_info->is_started())
1248 DBUG_VOID_RETURN; /* already registered, return */
1249
1250 ha_info->register_ha(trans, ht_arg);
1251
1252 trans->no_2pc|=(ht_arg->prepare==0);
1253 if (thd->transaction.xid_state.xid.is_null())
1254 thd->transaction.xid_state.xid.set(thd->query_id);
1255 DBUG_VOID_RETURN;
1256 }
1257
1258 /**
1259 @retval
1260 0 ok
1261 @retval
1262 1 error, transaction was rolled back
1263 */
ha_prepare(THD * thd)1264 int ha_prepare(THD *thd)
1265 {
1266 int error=0, all=1;
1267 THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1268 Ha_trx_info *ha_info= trans->ha_list;
1269 DBUG_ENTER("ha_prepare");
1270
1271 if (ha_info)
1272 {
1273 for (; ha_info; ha_info= ha_info->next())
1274 {
1275 int err;
1276 handlerton *ht= ha_info->ht();
1277 DBUG_ASSERT(!thd->status_var_aggregated);
1278 status_var_increment(thd->status_var.ha_prepare_count);
1279 if (ht->prepare)
1280 {
1281 if ((err= ht->prepare(ht, thd, all)))
1282 {
1283 my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1284 ha_rollback_trans(thd, all);
1285 error=1;
1286 break;
1287 }
1288 }
1289 else
1290 {
1291 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1292 ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
1293 ha_resolve_storage_engine_name(ht));
1294 }
1295 }
1296 }
1297
1298 DBUG_RETURN(error);
1299 }
1300
1301 /**
1302 Check if we can skip the two-phase commit.
1303
1304 A helper function to evaluate if two-phase commit is mandatory.
1305 As a side effect, propagates the read-only/read-write flags
1306 of the statement transaction to its enclosing normal transaction.
1307
1308 If we have at least two engines with read-write changes we must
1309 run a two-phase commit. Otherwise we can run several independent
1310 commits as the only transactional engine has read-write changes
1311 and others are read-only.
1312
1313 @retval 0 All engines are read-only.
1314 @retval 1 We have the only engine with read-write changes.
1315 @retval >1 More than one engine have read-write changes.
1316 Note: return value might NOT be the exact number of
1317 engines with read-write changes.
1318 */
1319
1320 static
1321 uint
ha_check_and_coalesce_trx_read_only(THD * thd,Ha_trx_info * ha_list,bool all)1322 ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
1323 bool all)
1324 {
1325 /* The number of storage engines that have actual changes. */
1326 unsigned rw_ha_count= 0;
1327 Ha_trx_info *ha_info;
1328
1329 for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
1330 {
1331 if (ha_info->is_trx_read_write())
1332 ++rw_ha_count;
1333 else
1334 {
1335 /*
1336 If we have any fake changes handlertons, they will not be marked as
1337 read-write, potentially skipping 2PC and causing the fake transaction
1338 to be binlogged. Force using 2PC in this case by bumping rw_ha_count
1339 for each fake changes handlerton.
1340 */
1341 handlerton *ht= ha_info->ht();
1342 if (unlikely(ht->is_fake_change && ht->is_fake_change(ht, thd)))
1343 ++rw_ha_count;
1344 }
1345
1346 if (! all)
1347 {
1348 Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
1349 DBUG_ASSERT(ha_info != ha_info_all);
1350 /*
1351 Merge read-only/read-write information about statement
1352 transaction to its enclosing normal transaction. Do this
1353 only if in a real transaction -- that is, if we know
1354 that ha_info_all is registered in thd->transaction.all.
1355 Since otherwise we only clutter the normal transaction flags.
1356 */
1357 if (ha_info_all->is_started()) /* FALSE if autocommit. */
1358 ha_info_all->coalesce_trx_with(ha_info);
1359 }
1360 else if (rw_ha_count > 1)
1361 {
1362 /*
1363 It is a normal transaction, so we don't need to merge read/write
1364 information up, and the need for two-phase commit has been
1365 already established. Break the loop prematurely.
1366 */
1367 break;
1368 }
1369 }
1370 return rw_ha_count;
1371 }
1372
1373
1374 /**
1375 @param[in] ignore_global_read_lock Allow commit to complete even if a
1376 global read lock is active. This can be
1377 used to allow changes to internal tables
1378 (e.g. slave status tables).
1379
1380 @retval
1381 0 ok
1382 @retval
1383 1 transaction was rolled back
1384 @retval
1385 2 error during commit, data may be inconsistent
1386
1387 @todo
1388 Since we don't support nested statement transactions in 5.0,
1389 we can't commit or rollback stmt transactions while we are inside
1390 stored functions or triggers. So we simply do nothing now.
1391 TODO: This should be fixed in later ( >= 5.1) releases.
1392 */
1393
ha_commit_trans(THD * thd,bool all,bool ignore_global_read_lock)1394 int ha_commit_trans(THD *thd, bool all, bool ignore_global_read_lock)
1395 {
1396 int error= 0;
1397 /*
1398 'all' means that this is either an explicit commit issued by
1399 user, or an implicit commit issued by a DDL.
1400 */
1401 THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
1402 /*
1403 "real" is a nick name for a transaction for which a commit will
1404 make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1405 transation is not 'real': even though it's possible to commit it,
1406 the changes are not durable as they might be rolled back if the
1407 enclosing 'all' transaction is rolled back.
1408 */
1409 bool is_real_trans= all || thd->transaction.all.ha_list == 0;
1410 Ha_trx_info *ha_info= trans->ha_list;
1411 DBUG_ENTER("ha_commit_trans");
1412
1413 DBUG_PRINT("info", ("all=%d thd->in_sub_stmt=%d ha_info=%p is_real_trans=%d",
1414 all, thd->in_sub_stmt, ha_info, is_real_trans));
1415 /*
1416 We must not commit the normal transaction if a statement
1417 transaction is pending. Otherwise statement transaction
1418 flags will not get propagated to its normal transaction's
1419 counterpart.
1420 */
1421 DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL ||
1422 trans == &thd->transaction.stmt);
1423
1424 if (thd->in_sub_stmt)
1425 {
1426 DBUG_ASSERT(0);
1427 /*
1428 Since we don't support nested statement transactions in 5.0,
1429 we can't commit or rollback stmt transactions while we are inside
1430 stored functions or triggers. So we simply do nothing now.
1431 TODO: This should be fixed in later ( >= 5.1) releases.
1432 */
1433 if (!all)
1434 DBUG_RETURN(0);
1435 /*
1436 We assume that all statements which commit or rollback main transaction
1437 are prohibited inside of stored functions or triggers. So they should
1438 bail out with error even before ha_commit_trans() call. To be 100% safe
1439 let us throw error in non-debug builds.
1440 */
1441 my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1442 DBUG_RETURN(2);
1443 }
1444
1445 MDL_request mdl_request;
1446 bool release_mdl= false;
1447
1448 if (ha_info)
1449 {
1450 uint rw_ha_count;
1451 bool rw_trans;
1452
1453 DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
1454
1455 /* Close all cursors that can not survive COMMIT */
1456 if (is_real_trans) /* not a statement commit */
1457 thd->stmt_map.close_transient_cursors();
1458
1459 rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1460 trans->rw_ha_count= rw_ha_count;
1461 /* rw_trans is TRUE when we in a transaction changing data */
1462 rw_trans= is_real_trans && (rw_ha_count > 0);
1463
1464 DBUG_EXECUTE_IF("dbug.enabled_commit",
1465 {
1466 const char act[]= "now signal Reached wait_for signal.commit_continue";
1467 DBUG_ASSERT(!debug_sync_set_action(current_thd,
1468 STRING_WITH_LEN(act)));
1469 };);
1470 if (rw_trans && !ignore_global_read_lock)
1471 {
1472 /*
1473 Acquire a metadata lock which will ensure that COMMIT is blocked
1474 by an active FLUSH TABLES WITH READ LOCK (and vice versa:
1475 COMMIT in progress blocks FTWRL).
1476
1477 We allow the owner of FTWRL to COMMIT; we assume that it knows
1478 what it does.
1479 */
1480 mdl_request.init(MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE,
1481 MDL_EXPLICIT);
1482
1483 DBUG_PRINT("debug", ("Acquire MDL commit lock"));
1484 if (thd->mdl_context.acquire_lock(&mdl_request,
1485 thd->variables.lock_wait_timeout))
1486 {
1487 ha_rollback_trans(thd, all);
1488 DBUG_RETURN(1);
1489 }
1490 release_mdl= true;
1491
1492 DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
1493 }
1494
1495 bool enforce_ro= true;
1496 if (!opt_super_readonly)
1497 enforce_ro= !(thd->security_ctx->master_access & SUPER_ACL);
1498 /*
1499 Ignore super_read_only when ignore_global_read_lock is set.
1500 ignore_global_read_lock is set for transactions on replication
1501 repository tables.
1502 */
1503 if (ignore_global_read_lock)
1504 enforce_ro= false;
1505 if (rw_trans && stmt_has_updated_trans_table(ha_info) &&
1506 opt_readonly &&
1507 enforce_ro &&
1508 !thd->slave_thread)
1509 {
1510 my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0),
1511 opt_super_readonly ? "--read-only (super)" : "--read-only");
1512 ha_rollback_trans(thd, all);
1513 error= 1;
1514 goto end;
1515 }
1516
1517 if (!trans->no_2pc && (rw_ha_count > 1))
1518 error= tc_log->prepare(thd, all);
1519 }
1520 if (error || (error= tc_log->commit(thd, all)))
1521 {
1522 ha_rollback_trans(thd, all);
1523 error= 1;
1524 goto end;
1525 }
1526 DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
1527 end:
1528 if (release_mdl && mdl_request.ticket)
1529 {
1530 /*
1531 We do not always immediately release transactional locks
1532 after ha_commit_trans() (see uses of ha_enable_transaction()),
1533 thus we release the commit blocker lock as soon as it's
1534 not needed.
1535 */
1536 DBUG_PRINT("debug", ("Releasing MDL commit lock"));
1537 thd->mdl_context.release_lock(mdl_request.ticket);
1538 }
1539 /* Free resources and perform other cleanup even for 'empty' transactions. */
1540 if (is_real_trans)
1541 thd->transaction.cleanup();
1542
1543 if (!error)
1544 thd->diff_commit_trans++;
1545
1546 DBUG_RETURN(error);
1547 }
1548
1549 /**
1550 Commit the sessions outstanding transaction.
1551
1552 @pre thd->transaction.flags.commit_low == true
1553 @post thd->transaction.flags.commit_low == false
1554
1555 @note This function does not care about global read lock; the caller
1556 should.
1557
1558 @param[in] all Is set in case of explicit commit
1559 (COMMIT statement), or implicit commit
1560 issued by DDL. Is not set when called
1561 at the end of statement, even if
1562 autocommit=1.
1563 @param[in] run_after_commit
1564 True by default, otherwise, does not execute
1565 the after_commit hook in the function.
1566 */
1567
ha_commit_low(THD * thd,bool all,bool run_after_commit)1568 int ha_commit_low(THD *thd, bool all, bool run_after_commit)
1569 {
1570 int error=0;
1571 THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1572 Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1573 DBUG_ENTER("ha_commit_low");
1574
1575 if (ha_info)
1576 {
1577 for (; ha_info; ha_info= ha_info_next)
1578 {
1579 int err;
1580 handlerton *ht= ha_info->ht();
1581 if ((err= ht->commit(ht, thd, all)))
1582 {
1583 my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1584 error=1;
1585 }
1586 DBUG_ASSERT(!thd->status_var_aggregated);
1587 status_var_increment(thd->status_var.ha_commit_count);
1588 ha_info_next= ha_info->next();
1589 ha_info->reset(); /* keep it conveniently zero-filled */
1590 }
1591 trans->ha_list= 0;
1592 trans->no_2pc=0;
1593 trans->rw_ha_count= 0;
1594 if (all)
1595 {
1596 #ifdef HAVE_QUERY_CACHE
1597 if (thd->transaction.changed_tables)
1598 query_cache.invalidate(thd->transaction.changed_tables);
1599 #endif
1600 }
1601 }
1602 /* Free resources and perform other cleanup even for 'empty' transactions. */
1603 if (all)
1604 thd->transaction.cleanup();
1605 /*
1606 When the transaction has been committed, we clear the commit_low
1607 flag. This allow other parts of the system to check if commit_low
1608 was called.
1609 */
1610 thd->transaction.flags.commit_low= false;
1611 if (run_after_commit && thd->transaction.flags.run_hooks)
1612 {
1613 /*
1614 If commit succeeded, we call the after_commit hook.
1615
1616 TODO: Investigate if this can be refactored so that there is
1617 only one invocation of this hook in the code (in
1618 MYSQL_LOG_BIN::finish_commit).
1619 */
1620 if (!error)
1621 (void) RUN_HOOK(transaction, after_commit, (thd, all));
1622 thd->transaction.flags.run_hooks= false;
1623 }
1624 DBUG_RETURN(error);
1625 }
1626
1627
ha_rollback_low(THD * thd,bool all)1628 int ha_rollback_low(THD *thd, bool all)
1629 {
1630 THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1631 Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1632 int error= 0;
1633
1634 if (ha_info)
1635 {
1636 /* Close all cursors that can not survive ROLLBACK */
1637 if (all) /* not a statement commit */
1638 thd->stmt_map.close_transient_cursors();
1639
1640 for (; ha_info; ha_info= ha_info_next)
1641 {
1642 int err;
1643 handlerton *ht= ha_info->ht();
1644 if ((err= ht->rollback(ht, thd, all)))
1645 { // cannot happen
1646 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1647 error= 1;
1648 }
1649 DBUG_ASSERT(!thd->status_var_aggregated);
1650 status_var_increment(thd->status_var.ha_rollback_count);
1651 ha_info_next= ha_info->next();
1652 ha_info->reset(); /* keep it conveniently zero-filled */
1653 }
1654 trans->ha_list= 0;
1655 trans->no_2pc=0;
1656 trans->rw_ha_count= 0;
1657 }
1658
1659 /*
1660 Thanks to possibility of MDL deadlock rollback request can come even if
1661 transaction hasn't been started in any transactional storage engine.
1662 */
1663 if (all && thd->transaction_rollback_request &&
1664 thd->transaction.xid_state.xa_state != XA_NOTR)
1665 thd->transaction.xid_state.rm_error= thd->get_stmt_da()->sql_errno();
1666
1667 (void) RUN_HOOK(transaction, after_rollback, (thd, all));
1668 return error;
1669 }
1670
1671
ha_rollback_trans(THD * thd,bool all)1672 int ha_rollback_trans(THD *thd, bool all)
1673 {
1674 int error=0;
1675 #ifndef DBUG_OFF
1676 THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1677 #endif
1678 /*
1679 "real" is a nick name for a transaction for which a commit will
1680 make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1681 transaction is not 'real': even though it's possible to commit it,
1682 the changes are not durable as they might be rolled back if the
1683 enclosing 'all' transaction is rolled back.
1684 We establish the value of 'is_real_trans' by checking
1685 if it's an explicit COMMIT or BEGIN statement, or implicit
1686 commit issued by DDL (in these cases all == TRUE),
1687 or if we're running in autocommit mode (it's only in the autocommit mode
1688 ha_commit_one_phase() is called with an empty
1689 transaction.all.ha_list, see why in trans_register_ha()).
1690 */
1691 bool is_real_trans= all || thd->transaction.all.ha_list == NULL;
1692 DBUG_ENTER("ha_rollback_trans");
1693
1694 /*
1695 We must not rollback the normal transaction if a statement
1696 transaction is pending.
1697 */
1698 DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL ||
1699 trans == &thd->transaction.stmt);
1700
1701 if (thd->in_sub_stmt)
1702 {
1703 DBUG_ASSERT(0);
1704 /*
1705 If we are inside stored function or trigger we should not commit or
1706 rollback current statement transaction. See comment in ha_commit_trans()
1707 call for more information.
1708 */
1709 if (!all)
1710 DBUG_RETURN(0);
1711 my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1712 DBUG_RETURN(1);
1713 }
1714
1715 if (tc_log)
1716 tc_log->rollback(thd, all);
1717
1718 /* Always cleanup. Even if nht==0. There may be savepoints. */
1719 if (is_real_trans)
1720 thd->transaction.cleanup();
1721
1722 thd->diff_rollback_trans++;
1723 if (all)
1724 thd->transaction_rollback_request= FALSE;
1725
1726 /*
1727 Only call gtid_rollback(THD*), which will purge thd->owned_gtid, if
1728 complete transaction is being rollback or autocommit=1.
1729 */
1730 if (is_real_trans)
1731 gtid_rollback(thd);
1732
1733 /*
1734 If the transaction cannot be rolled back safely, warn; don't warn if this
1735 is a slave thread (because when a slave thread executes a ROLLBACK, it has
1736 been read from the binary log, so it's 100% sure and normal to produce
1737 error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
1738 slave SQL thread, it would not stop the thread but just be printed in
1739 the error log; but we don't want users to wonder why they have this
1740 message in the error log, so we don't send it.
1741 */
1742 #ifndef DBUG_OFF
1743 thd->transaction.stmt.dbug_unsafe_rollback_flags("stmt");
1744 thd->transaction.all.dbug_unsafe_rollback_flags("all");
1745 #endif
1746 if (is_real_trans && thd->transaction.all.cannot_safely_rollback() &&
1747 !thd->slave_thread && thd->killed != THD::KILL_CONNECTION)
1748 thd->transaction.push_unsafe_rollback_warnings(thd);
1749 DBUG_RETURN(error);
1750 }
1751
1752
1753 struct xahton_st {
1754 XID *xid;
1755 int result;
1756 };
1757
xacommit_handlerton(THD * unused1,plugin_ref plugin,void * arg)1758 static my_bool xacommit_handlerton(THD *unused1, plugin_ref plugin,
1759 void *arg)
1760 {
1761 handlerton *hton= plugin_data(plugin, handlerton *);
1762 if (hton->state == SHOW_OPTION_YES && hton->recover)
1763 {
1764 hton->commit_by_xid(hton, ((struct xahton_st *)arg)->xid);
1765 ((struct xahton_st *)arg)->result= 0;
1766 }
1767 return FALSE;
1768 }
1769
xarollback_handlerton(THD * unused1,plugin_ref plugin,void * arg)1770 static my_bool xarollback_handlerton(THD *unused1, plugin_ref plugin,
1771 void *arg)
1772 {
1773 handlerton *hton= plugin_data(plugin, handlerton *);
1774 if (hton->state == SHOW_OPTION_YES && hton->recover)
1775 {
1776 hton->rollback_by_xid(hton, ((struct xahton_st *)arg)->xid);
1777 ((struct xahton_st *)arg)->result= 0;
1778 }
1779 return FALSE;
1780 }
1781
1782
ha_commit_or_rollback_by_xid(THD * thd,XID * xid,bool commit)1783 int ha_commit_or_rollback_by_xid(THD *thd, XID *xid, bool commit)
1784 {
1785 struct xahton_st xaop;
1786 xaop.xid= xid;
1787 xaop.result= 1;
1788
1789 plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton,
1790 MYSQL_STORAGE_ENGINE_PLUGIN, &xaop);
1791
1792 gtid_rollback(thd);
1793
1794 return xaop.result;
1795 }
1796
1797
1798 #ifndef DBUG_OFF
1799 /**
1800 @note
1801 This does not need to be multi-byte safe or anything
1802 */
xid_to_str(char * buf,XID * xid)1803 static char* xid_to_str(char *buf, XID *xid)
1804 {
1805 int i;
1806 char *s=buf;
1807 *s++='\'';
1808 for (i=0; i < xid->gtrid_length+xid->bqual_length; i++)
1809 {
1810 uchar c=(uchar)xid->data[i];
1811 /* is_next_dig is set if next character is a number */
1812 bool is_next_dig= FALSE;
1813 if (i < XIDDATASIZE)
1814 {
1815 char ch= xid->data[i+1];
1816 is_next_dig= (ch >= '0' && ch <='9');
1817 }
1818 if (i == xid->gtrid_length)
1819 {
1820 *s++='\'';
1821 if (xid->bqual_length)
1822 {
1823 *s++='.';
1824 *s++='\'';
1825 }
1826 }
1827 if (c < 32 || c > 126)
1828 {
1829 *s++='\\';
1830 /*
1831 If next character is a number, write current character with
1832 3 octal numbers to ensure that the next number is not seen
1833 as part of the octal number
1834 */
1835 if (c > 077 || is_next_dig)
1836 *s++=_dig_vec_lower[c >> 6];
1837 if (c > 007 || is_next_dig)
1838 *s++=_dig_vec_lower[(c >> 3) & 7];
1839 *s++=_dig_vec_lower[c & 7];
1840 }
1841 else
1842 {
1843 if (c == '\'' || c == '\\')
1844 *s++='\\';
1845 *s++=c;
1846 }
1847 }
1848 *s++='\'';
1849 *s=0;
1850 return buf;
1851 }
1852 #endif
1853
1854 /**
1855 recover() step of xa.
1856
1857 @note
1858 there are three modes of operation:
1859 - automatic recover after a crash
1860 in this case commit_list != 0, tc_heuristic_recover==0
1861 all xids from commit_list are committed, others are rolled back
1862 - manual (heuristic) recover
1863 in this case commit_list==0, tc_heuristic_recover != 0
1864 DBA has explicitly specified that all prepared transactions should
1865 be committed (or rolled back).
1866 - no recovery (MySQL did not detect a crash)
1867 in this case commit_list==0, tc_heuristic_recover == 0
1868 there should be no prepared transactions in this case.
1869 */
1870 struct xarecover_st
1871 {
1872 int len, found_foreign_xids, found_my_xids;
1873 XID *list;
1874 HASH *commit_list;
1875 bool dry_run;
1876 };
1877
xarecover_handlerton(THD * unused,plugin_ref plugin,void * arg)1878 static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
1879 void *arg)
1880 {
1881 handlerton *hton= plugin_data(plugin, handlerton *);
1882 struct xarecover_st *info= (struct xarecover_st *) arg;
1883 int got;
1884
1885 if (hton->state == SHOW_OPTION_YES && hton->recover)
1886 {
1887 while ((got= hton->recover(hton, info->list, info->len)) > 0 )
1888 {
1889 sql_print_information("Found %d prepared transaction(s) in %s",
1890 got, ha_resolve_storage_engine_name(hton));
1891 for (int i=0; i < got; i ++)
1892 {
1893 my_xid x=info->list[i].get_my_xid();
1894 if (!x) // not "mine" - that is generated by external TM
1895 {
1896 #ifndef DBUG_OFF
1897 char buf[XIDDATASIZE*4+6]; // see xid_to_str
1898 sql_print_information("ignore xid %s", xid_to_str(buf, info->list+i));
1899 #endif
1900 xid_cache_insert(info->list+i, XA_PREPARED);
1901 info->found_foreign_xids++;
1902 continue;
1903 }
1904 if (info->dry_run)
1905 {
1906 info->found_my_xids++;
1907 continue;
1908 }
1909 // recovery mode
1910 if (info->commit_list ?
1911 my_hash_search(info->commit_list, (uchar *)&x, sizeof(x)) != 0 :
1912 tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT)
1913 {
1914 #ifndef DBUG_OFF
1915 char buf[XIDDATASIZE*4+6]; // see xid_to_str
1916 sql_print_information("commit xid %s", xid_to_str(buf, info->list+i));
1917 #endif
1918 hton->commit_by_xid(hton, info->list+i);
1919 }
1920 else
1921 {
1922 #ifndef DBUG_OFF
1923 char buf[XIDDATASIZE*4+6]; // see xid_to_str
1924 sql_print_information("rollback xid %s",
1925 xid_to_str(buf, info->list+i));
1926 #endif
1927 hton->rollback_by_xid(hton, info->list+i);
1928 }
1929 }
1930 if (got < info->len)
1931 break;
1932 }
1933 }
1934 return FALSE;
1935 }
1936
ha_recover(HASH * commit_list)1937 int ha_recover(HASH *commit_list)
1938 {
1939 struct xarecover_st info;
1940 DBUG_ENTER("ha_recover");
1941 info.found_foreign_xids= info.found_my_xids= 0;
1942 info.commit_list= commit_list;
1943 info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0);
1944 info.list= NULL;
1945
1946 /* commit_list and tc_heuristic_recover cannot be set both */
1947 DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0);
1948 /* if either is set, total_ha_2pc must be set too */
1949 DBUG_ASSERT(info.dry_run || total_ha_2pc>(ulong)opt_bin_log);
1950
1951 if (total_ha_2pc <= (ulong)opt_bin_log)
1952 DBUG_RETURN(0);
1953
1954 if (info.commit_list)
1955 sql_print_information("Starting crash recovery...");
1956
1957 #if 0
1958 /*
1959 for now, only InnoDB supports 2pc. It means we can always safely
1960 rollback all pending transactions, without risking inconsistent data
1961 */
1962 DBUG_ASSERT(total_ha_2pc == (ulong) opt_bin_log+1); // only InnoDB and binlog
1963 tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK
1964 info.dry_run=FALSE;
1965 #endif
1966
1967 for (info.len= MAX_XID_LIST_SIZE ;
1968 info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2)
1969 {
1970 info.list=(XID *)my_malloc(info.len*sizeof(XID), MYF(0));
1971 }
1972 if (!info.list)
1973 {
1974 sql_print_error(ER(ER_OUTOFMEMORY),
1975 static_cast<int>(info.len*sizeof(XID)));
1976 DBUG_RETURN(1);
1977 }
1978
1979 plugin_foreach(NULL, xarecover_handlerton,
1980 MYSQL_STORAGE_ENGINE_PLUGIN, &info);
1981
1982 my_free(info.list);
1983 if (info.found_foreign_xids)
1984 sql_print_warning("Found %d prepared XA transactions",
1985 info.found_foreign_xids);
1986 if (info.dry_run && info.found_my_xids)
1987 {
1988 sql_print_error("Found %d prepared transactions! It means that mysqld was "
1989 "not shut down properly last time and critical recovery "
1990 "information (last binlog or %s file) was manually deleted "
1991 "after a crash. You have to start mysqld with "
1992 "--tc-heuristic-recover switch to commit or rollback "
1993 "pending transactions.",
1994 info.found_my_xids, opt_tc_log_file);
1995 DBUG_RETURN(1);
1996 }
1997 if (info.commit_list)
1998 sql_print_information("Crash recovery finished.");
1999 DBUG_RETURN(0);
2000 }
2001
2002 /**
2003 return the list of XID's to a client, the same way SHOW commands do.
2004
2005 @note
2006 I didn't find in XA specs that an RM cannot return the same XID twice,
2007 so mysql_xa_recover does not filter XID's to ensure uniqueness.
2008 It can be easily fixed later, if necessary.
2009 */
mysql_xa_recover(THD * thd)2010 bool mysql_xa_recover(THD *thd)
2011 {
2012 List<Item> field_list;
2013 Protocol *protocol= thd->protocol;
2014 int i=0;
2015 XID_STATE *xs;
2016 DBUG_ENTER("mysql_xa_recover");
2017
2018 field_list.push_back(new Item_int(NAME_STRING("formatID"), 0, MY_INT32_NUM_DECIMAL_DIGITS));
2019 field_list.push_back(new Item_int(NAME_STRING("gtrid_length"), 0, MY_INT32_NUM_DECIMAL_DIGITS));
2020 field_list.push_back(new Item_int(NAME_STRING("bqual_length"), 0, MY_INT32_NUM_DECIMAL_DIGITS));
2021 field_list.push_back(new Item_empty_string("data",XIDDATASIZE));
2022
2023 if (protocol->send_result_set_metadata(&field_list,
2024 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
2025 DBUG_RETURN(1);
2026
2027 mysql_mutex_lock(&LOCK_xid_cache);
2028 while ((xs= (XID_STATE*) my_hash_element(&xid_cache, i++)))
2029 {
2030 if (xs->xa_state==XA_PREPARED)
2031 {
2032 protocol->prepare_for_resend();
2033 protocol->store_longlong((longlong)xs->xid.formatID, FALSE);
2034 protocol->store_longlong((longlong)xs->xid.gtrid_length, FALSE);
2035 protocol->store_longlong((longlong)xs->xid.bqual_length, FALSE);
2036 protocol->store(xs->xid.data, xs->xid.gtrid_length+xs->xid.bqual_length,
2037 &my_charset_bin);
2038 if (protocol->write())
2039 {
2040 mysql_mutex_unlock(&LOCK_xid_cache);
2041 DBUG_RETURN(1);
2042 }
2043 }
2044 }
2045
2046 mysql_mutex_unlock(&LOCK_xid_cache);
2047 my_eof(thd);
2048 DBUG_RETURN(0);
2049 }
2050
2051 /**
2052 @details
2053 This function should be called when MySQL sends rows of a SELECT result set
2054 or the EOF mark to the client. It releases a possible adaptive hash index
2055 S-latch held by thd in InnoDB and also releases a possible InnoDB query
2056 FIFO ticket to enter InnoDB. To save CPU time, InnoDB allows a thd to
2057 keep them over several calls of the InnoDB handler interface when a join
2058 is executed. But when we let the control to pass to the client they have
2059 to be released because if the application program uses mysql_use_result(),
2060 it may deadlock on the S-latch if the application on another connection
2061 performs another SQL query. In MySQL-4.1 this is even more important because
2062 there a connection can have several SELECT queries open at the same time.
2063
2064 @param thd the thread handle of the current connection
2065
2066 @return
2067 always 0
2068 */
2069
ha_release_temporary_latches(THD * thd)2070 int ha_release_temporary_latches(THD *thd)
2071 {
2072 Ha_trx_info *info;
2073
2074 /*
2075 Note that below we assume that only transactional storage engines
2076 may need release_temporary_latches(). If this will ever become false,
2077 we could iterate on thd->open_tables instead (and remove duplicates
2078 as if (!seen[hton->slot]) { seen[hton->slot]=1; ... }).
2079 */
2080 for (info= thd->transaction.stmt.ha_list; info; info= info->next())
2081 {
2082 handlerton *hton= info->ht();
2083 if (hton && hton->release_temporary_latches)
2084 hton->release_temporary_latches(hton, thd);
2085 }
2086 return 0;
2087 }
2088
2089 /**
2090 Check if all storage engines used in transaction agree that after
2091 rollback to savepoint it is safe to release MDL locks acquired after
2092 savepoint creation.
2093
2094 @param thd The client thread that executes the transaction.
2095
2096 @return true - It is safe to release MDL locks.
2097 false - If it is not.
2098 */
ha_rollback_to_savepoint_can_release_mdl(THD * thd)2099 bool ha_rollback_to_savepoint_can_release_mdl(THD *thd)
2100 {
2101 Ha_trx_info *ha_info;
2102 THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2103 &thd->transaction.all);
2104
2105 DBUG_ENTER("ha_rollback_to_savepoint_can_release_mdl");
2106
2107 /**
2108 Checking whether it is safe to release metadata locks after rollback to
2109 savepoint in all the storage engines that are part of the transaction.
2110 */
2111 for (ha_info= trans->ha_list; ha_info; ha_info= ha_info->next())
2112 {
2113 handlerton *ht= ha_info->ht();
2114 DBUG_ASSERT(ht);
2115
2116 if (ht->savepoint_rollback_can_release_mdl == 0 ||
2117 ht->savepoint_rollback_can_release_mdl(ht, thd) == false)
2118 DBUG_RETURN(false);
2119 }
2120
2121 DBUG_RETURN(true);
2122 }
2123
ha_rollback_to_savepoint(THD * thd,SAVEPOINT * sv)2124 int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
2125 {
2126 int error=0;
2127 THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2128 &thd->transaction.all);
2129 Ha_trx_info *ha_info, *ha_info_next;
2130
2131 DBUG_ENTER("ha_rollback_to_savepoint");
2132
2133 trans->no_2pc=0;
2134 trans->rw_ha_count= 0;
2135 /*
2136 rolling back to savepoint in all storage engines that were part of the
2137 transaction when the savepoint was set
2138 */
2139 for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
2140 {
2141 int err;
2142 handlerton *ht= ha_info->ht();
2143 DBUG_ASSERT(ht);
2144 DBUG_ASSERT(ht->savepoint_set != 0);
2145 if ((err= ht->savepoint_rollback(ht, thd,
2146 (uchar *)(sv+1)+ht->savepoint_offset)))
2147 { // cannot happen
2148 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2149 error=1;
2150 }
2151 DBUG_ASSERT(!thd->status_var_aggregated);
2152 status_var_increment(thd->status_var.ha_savepoint_rollback_count);
2153 trans->no_2pc|= ht->prepare == 0;
2154 }
2155 /*
2156 rolling back the transaction in all storage engines that were not part of
2157 the transaction when the savepoint was set
2158 */
2159 for (ha_info= trans->ha_list; ha_info != sv->ha_list;
2160 ha_info= ha_info_next)
2161 {
2162 int err;
2163 handlerton *ht= ha_info->ht();
2164 if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
2165 { // cannot happen
2166 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2167 error=1;
2168 }
2169 DBUG_ASSERT(!thd->status_var_aggregated);
2170 status_var_increment(thd->status_var.ha_rollback_count);
2171 ha_info_next= ha_info->next();
2172 ha_info->reset(); /* keep it conveniently zero-filled */
2173 }
2174 trans->ha_list= sv->ha_list;
2175 thd->diff_rollback_trans++;
2176 DBUG_RETURN(error);
2177 }
2178
ha_prepare_low(THD * thd,bool all)2179 int ha_prepare_low(THD *thd, bool all)
2180 {
2181 int error= 0;
2182 THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
2183 Ha_trx_info *ha_info= trans->ha_list;
2184 DBUG_ENTER("ha_prepare_low");
2185
2186 if (ha_info)
2187 {
2188 for (; ha_info && !error; ha_info= ha_info->next())
2189 {
2190 int err= 0;
2191 handlerton *ht= ha_info->ht();
2192 /*
2193 Do not call two-phase commit if this particular
2194 transaction is read-only. This allows for simpler
2195 implementation in engines that are always read-only.
2196 */
2197 /*
2198 But do call two-phase commit if the handlerton has fake changes
2199 enabled even if it's not marked as read-write. This will ensure that
2200 the fake changes handlerton prepare will fail, preventing binlogging
2201 and committing the transaction in other engines.
2202 */
2203 if (!ha_info->is_trx_read_write()
2204 && likely(!(ht->is_fake_change && ht->is_fake_change(ht, thd))))
2205 continue;
2206 if ((err= ht->prepare(ht, thd, all)))
2207 {
2208 my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
2209 error= 1;
2210 }
2211 DBUG_ASSERT(!thd->status_var_aggregated);
2212 status_var_increment(thd->status_var.ha_prepare_count);
2213 }
2214 DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
2215 }
2216
2217 DBUG_RETURN(error);
2218 }
2219
2220 /**
2221 @note
2222 according to the sql standard (ISO/IEC 9075-2:2003)
2223 section "4.33.4 SQL-statements and transaction states",
2224 SAVEPOINT is *not* transaction-initiating SQL-statement
2225 */
ha_savepoint(THD * thd,SAVEPOINT * sv)2226 int ha_savepoint(THD *thd, SAVEPOINT *sv)
2227 {
2228 int error=0;
2229 THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2230 &thd->transaction.all);
2231 Ha_trx_info *ha_info= trans->ha_list;
2232 DBUG_ENTER("ha_savepoint");
2233
2234 for (; ha_info; ha_info= ha_info->next())
2235 {
2236 int err;
2237 handlerton *ht= ha_info->ht();
2238 DBUG_ASSERT(ht);
2239 if (! ht->savepoint_set)
2240 {
2241 my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
2242 error=1;
2243 break;
2244 }
2245 if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
2246 { // cannot happen
2247 my_error(ER_GET_ERRNO, MYF(0), err);
2248 error=1;
2249 }
2250 DBUG_ASSERT(!thd->status_var_aggregated);
2251 status_var_increment(thd->status_var.ha_savepoint_count);
2252 }
2253 /*
2254 Remember the list of registered storage engines. All new
2255 engines are prepended to the beginning of the list.
2256 */
2257 sv->ha_list= trans->ha_list;
2258
2259 DBUG_RETURN(error);
2260 }
2261
ha_release_savepoint(THD * thd,SAVEPOINT * sv)2262 int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
2263 {
2264 int error=0;
2265 Ha_trx_info *ha_info= sv->ha_list;
2266 DBUG_ENTER("ha_release_savepoint");
2267
2268 for (; ha_info; ha_info= ha_info->next())
2269 {
2270 int err;
2271 handlerton *ht= ha_info->ht();
2272 /* Savepoint life time is enclosed into transaction life time. */
2273 DBUG_ASSERT(ht);
2274 if (!ht->savepoint_release)
2275 continue;
2276 if ((err= ht->savepoint_release(ht, thd,
2277 (uchar *)(sv+1) + ht->savepoint_offset)))
2278 { // cannot happen
2279 my_error(ER_GET_ERRNO, MYF(0), err);
2280 error=1;
2281 }
2282 }
2283 DBUG_RETURN(error);
2284 }
2285
2286
clone_snapshot_handlerton(THD * thd,plugin_ref plugin,void * arg)2287 static my_bool clone_snapshot_handlerton(THD *thd, plugin_ref plugin,
2288 void *arg)
2289 {
2290 handlerton *hton= plugin_data(plugin, handlerton *);
2291
2292 if (hton->state == SHOW_OPTION_YES &&
2293 hton->clone_consistent_snapshot)
2294 hton->clone_consistent_snapshot(hton, thd, (THD *) arg);
2295
2296 return FALSE;
2297 }
2298
2299
ha_clone_consistent_snapshot(THD * thd)2300 static int ha_clone_consistent_snapshot(THD *thd)
2301 {
2302 std::set<THD*> global_thread_list_copy;
2303 THD *from_thd;
2304 ulong id;
2305 Item *val;
2306 Thread_iterator it;
2307 Thread_iterator end;
2308
2309 DBUG_ASSERT(!thd->lex->value_list.is_empty());
2310
2311 val= (Item *) thd->lex->value_list.head();
2312
2313 if (thd->lex->table_or_sp_used())
2314 {
2315 my_error(ER_NOT_SUPPORTED_YET, MYF(0), "Usage of subqueries or stored "
2316 "function calls as part of this statement");
2317 goto error;
2318 }
2319
2320 if ((!val->fixed && val->fix_fields(thd, &val)) || val->check_cols(1))
2321 {
2322 my_error(ER_SET_CONSTANTS_ONLY, MYF(0));
2323 goto error;
2324 }
2325
2326 id= val->val_int();
2327
2328 mysql_mutex_lock(&LOCK_thd_remove);
2329 copy_global_thread_list(&global_thread_list_copy);
2330
2331 it= global_thread_list_copy.begin();
2332 end= global_thread_list_copy.end();
2333 from_thd= NULL;
2334
2335 for (; it != end; ++it)
2336 {
2337 if ((*it)->thread_id == id && *it != thd)
2338 {
2339 from_thd= *it;
2340 mysql_mutex_lock(&from_thd->LOCK_thd_data);
2341 break;
2342 }
2343 }
2344
2345 mysql_mutex_unlock(&LOCK_thd_remove);
2346
2347 if (!from_thd)
2348 {
2349 my_error(ER_NO_SUCH_THREAD, MYF(0), id);
2350 goto error;
2351 }
2352
2353 /*
2354 Blocking commits and binlog updates ensures that we get the same snapshot
2355 for all engines (including the binary log). This allows us among other
2356 things to do backups with START TRANSACTION WITH CONSISTENT SNAPSHOT and
2357 have a consistent binlog position.
2358 */
2359 tc_log->xlock();
2360
2361 plugin_foreach(thd, clone_snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
2362 from_thd);
2363
2364 tc_log->xunlock();
2365
2366 mysql_mutex_unlock(&from_thd->LOCK_thd_data);
2367
2368 return 0;
2369
2370 error:
2371
2372 return 1;
2373 }
2374
2375
start_snapshot_handlerton(THD * thd,plugin_ref plugin,void * arg)2376 static my_bool start_snapshot_handlerton(THD *thd, plugin_ref plugin,
2377 void *arg)
2378 {
2379 handlerton *hton= plugin_data(plugin, handlerton *);
2380 if (hton->state == SHOW_OPTION_YES &&
2381 hton->start_consistent_snapshot)
2382 {
2383 hton->start_consistent_snapshot(hton, thd);
2384 *((bool *)arg)= false;
2385 }
2386 return FALSE;
2387 }
2388
ha_start_consistent_snapshot(THD * thd)2389 int ha_start_consistent_snapshot(THD *thd)
2390 {
2391
2392 if (!thd->lex->value_list.is_empty())
2393 return ha_clone_consistent_snapshot(thd);
2394
2395 bool warn= true;
2396
2397 /*
2398 Blocking commits and binlog updates ensures that we get the same snapshot
2399 for all engines (including the binary log). This allows us among other
2400 things to do backups with START TRANSACTION WITH CONSISTENT SNAPSHOT and
2401 have a consistent binlog position.
2402 */
2403 tc_log->xlock();
2404
2405 plugin_foreach(thd, start_snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
2406 &warn);
2407
2408 tc_log->xunlock();
2409
2410 /*
2411 Same idea as when one wants to CREATE TABLE in one engine which does not
2412 exist:
2413 */
2414 if (warn)
2415 push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
2416 "This MySQL server does not support any "
2417 "consistent-read capable storage engine");
2418 return 0;
2419 }
2420
2421
store_binlog_info_handlerton(THD * thd,plugin_ref plugin,void * arg)2422 static my_bool store_binlog_info_handlerton(THD *thd, plugin_ref plugin,
2423 void *arg)
2424 {
2425 handlerton *hton= plugin_data(plugin, handlerton *);
2426
2427 if (hton->state == SHOW_OPTION_YES &&
2428 hton->store_binlog_info)
2429 {
2430 hton->store_binlog_info(hton, thd);
2431 *((bool *)arg)= false;
2432 }
2433
2434 return FALSE;
2435 }
2436
2437
ha_store_binlog_info(THD * thd)2438 int ha_store_binlog_info(THD *thd)
2439 {
2440 LOG_INFO li;
2441 bool warn= true;
2442
2443 if (!mysql_bin_log.is_open())
2444 return 0;
2445
2446 DBUG_ASSERT(tc_log == &mysql_bin_log);
2447
2448 /* Block commits to get consistent binlog coordinates */
2449 tc_log->xlock();
2450
2451 mysql_bin_log.raw_get_current_log(&li);
2452 thd->set_trans_pos(li.log_file_name, li.pos);
2453
2454 plugin_foreach(thd, store_binlog_info_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
2455 &warn);
2456
2457 tc_log->xunlock();
2458
2459 if (warn)
2460 push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
2461 "No support for storing binlog coordinates in any storage");
2462 return 0;
2463 }
2464
2465
flush_handlerton(THD * thd,plugin_ref plugin,void * arg)2466 static my_bool flush_handlerton(THD *thd, plugin_ref plugin,
2467 void *arg)
2468 {
2469 handlerton *hton= plugin_data(plugin, handlerton *);
2470 if (hton->state == SHOW_OPTION_YES && hton->flush_logs &&
2471 hton->flush_logs(hton))
2472 return TRUE;
2473 return FALSE;
2474 }
2475
2476
ha_flush_logs(handlerton * db_type)2477 bool ha_flush_logs(handlerton *db_type)
2478 {
2479 if (db_type == NULL)
2480 {
2481 if (plugin_foreach(NULL, flush_handlerton,
2482 MYSQL_STORAGE_ENGINE_PLUGIN, 0))
2483 return TRUE;
2484 }
2485 else
2486 {
2487 if (db_type->state != SHOW_OPTION_YES ||
2488 (db_type->flush_logs && db_type->flush_logs(db_type)))
2489 return TRUE;
2490 }
2491 return FALSE;
2492 }
2493
2494
2495 /**
2496 @brief make canonical filename
2497
2498 @param[in] file table handler
2499 @param[in] path original path
2500 @param[out] tmp_path buffer for canonized path
2501
2502 @details Lower case db name and table name path parts for
2503 non file based tables when lower_case_table_names
2504 is 2 (store as is, compare in lower case).
2505 Filesystem path prefix (mysql_data_home or tmpdir)
2506 is left intact.
2507
2508 @note tmp_path may be left intact if no conversion was
2509 performed.
2510
2511 @retval canonized path
2512
2513 @todo This may be done more efficiently when table path
2514 gets built. Convert this function to something like
2515 ASSERT_CANONICAL_FILENAME.
2516 */
get_canonical_filename(handler * file,const char * path,char * tmp_path)2517 const char *get_canonical_filename(handler *file, const char *path,
2518 char *tmp_path)
2519 {
2520 uint i;
2521 if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
2522 return path;
2523
2524 for (i= 0; i <= mysql_tmpdir_list.max; i++)
2525 {
2526 if (is_prefix(path, mysql_tmpdir_list.list[i]))
2527 return path;
2528 }
2529
2530 /* Ensure that table handler get path in lower case */
2531 if (tmp_path != path)
2532 strmov(tmp_path, path);
2533
2534 /*
2535 we only should turn into lowercase database/table part
2536 so start the process after homedirectory
2537 */
2538 my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
2539 return tmp_path;
2540 }
2541
2542
2543 /**
2544 An interceptor to hijack the text of the error message without
2545 setting an error in the thread. We need the text to present it
2546 in the form of a warning to the user.
2547 */
2548
2549 struct Ha_delete_table_error_handler: public Internal_error_handler
2550 {
2551 public:
2552 virtual bool handle_condition(THD *thd,
2553 uint sql_errno,
2554 const char* sqlstate,
2555 Sql_condition::enum_warning_level level,
2556 const char* msg,
2557 Sql_condition ** cond_hdl);
2558 char buff[MYSQL_ERRMSG_SIZE];
2559 };
2560
2561
2562 bool
2563 Ha_delete_table_error_handler::
handle_condition(THD *,uint,const char *,Sql_condition::enum_warning_level,const char * msg,Sql_condition ** cond_hdl)2564 handle_condition(THD *,
2565 uint,
2566 const char*,
2567 Sql_condition::enum_warning_level,
2568 const char* msg,
2569 Sql_condition ** cond_hdl)
2570 {
2571 *cond_hdl= NULL;
2572 /* Grab the error message */
2573 strmake(buff, msg, sizeof(buff)-1);
2574 return TRUE;
2575 }
2576
2577
2578 /** @brief
2579 This should return ENOENT if the file doesn't exists.
2580 The .frm file will be deleted only if we return 0 or ENOENT
2581 */
ha_delete_table(THD * thd,handlerton * table_type,const char * path,const char * db,const char * alias,bool generate_warning)2582 int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
2583 const char *db, const char *alias, bool generate_warning)
2584 {
2585 handler *file;
2586 char tmp_path[FN_REFLEN];
2587 int error;
2588 TABLE dummy_table;
2589 TABLE_SHARE dummy_share;
2590 DBUG_ENTER("ha_delete_table");
2591
2592 memset(static_cast<void*>(&dummy_table), 0, sizeof(dummy_table));
2593 memset(static_cast<void*>(&dummy_share), 0, sizeof(dummy_share));
2594 dummy_table.s= &dummy_share;
2595
2596 /* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */
2597 if (table_type == NULL ||
2598 ! (file=get_new_handler((TABLE_SHARE*)0, thd->mem_root, table_type)))
2599 DBUG_RETURN(ENOENT);
2600
2601 path= get_canonical_filename(file, path, tmp_path);
2602 if ((error= file->ha_delete_table(path)) && generate_warning)
2603 {
2604 /*
2605 Because file->print_error() use my_error() to generate the error message
2606 we use an internal error handler to intercept it and store the text
2607 in a temporary buffer. Later the message will be presented to user
2608 as a warning.
2609 */
2610 Ha_delete_table_error_handler ha_delete_table_error_handler;
2611
2612 /* Fill up strucutures that print_error may need */
2613 dummy_share.path.str= (char*) path;
2614 dummy_share.path.length= strlen(path);
2615 dummy_share.db.str= (char*) db;
2616 dummy_share.db.length= strlen(db);
2617 dummy_share.table_name.str= (char*) alias;
2618 dummy_share.table_name.length= strlen(alias);
2619 dummy_table.alias= alias;
2620
2621 file->change_table_ptr(&dummy_table, &dummy_share);
2622
2623 thd->push_internal_handler(&ha_delete_table_error_handler);
2624 file->print_error(error, 0);
2625
2626 thd->pop_internal_handler();
2627
2628 /*
2629 XXX: should we convert *all* errors to warnings here?
2630 What if the error is fatal?
2631 */
2632 push_warning(thd, Sql_condition::WARN_LEVEL_WARN, error,
2633 ha_delete_table_error_handler.buff);
2634 }
2635 delete file;
2636
2637 #ifdef HAVE_PSI_TABLE_INTERFACE
2638 if (likely(error == 0))
2639 {
2640 /* Table share not available, so check path for temp table prefix. */
2641 bool temp_table = (strstr(path, tmp_file_prefix) != NULL);
2642 PSI_TABLE_CALL(drop_table_share)
2643 (temp_table, db, strlen(db), alias, strlen(alias));
2644 }
2645 #endif
2646
2647 DBUG_RETURN(error);
2648 }
2649
2650 /****************************************************************************
2651 ** General handler functions
2652 ****************************************************************************/
clone(const char * name,MEM_ROOT * mem_root)2653 handler *handler::clone(const char *name, MEM_ROOT *mem_root)
2654 {
2655 handler *new_handler= table ? get_new_handler(table->s, mem_root, ht) : NULL;
2656
2657 if (!new_handler)
2658 return NULL;
2659 if (new_handler->set_ha_share_ref(ha_share))
2660 goto err;
2661
2662 /*
2663 Allocate handler->ref here because otherwise ha_open will allocate it
2664 on this->table->mem_root and we will not be able to reclaim that memory
2665 when the clone handler object is destroyed.
2666 */
2667 if (!(new_handler->ref= (uchar*) alloc_root(mem_root,
2668 ALIGN_SIZE(ref_length)*2)))
2669 goto err;
2670
2671 new_handler->cloned= true;
2672
2673 /*
2674 TODO: Implement a more efficient way to have more than one index open for
2675 the same table instance. The ha_open call is not cachable for clone.
2676 */
2677 if (new_handler->ha_open(table, name, table->db_stat,
2678 HA_OPEN_IGNORE_IF_LOCKED))
2679 goto err;
2680
2681 return new_handler;
2682
2683 err:
2684 delete new_handler;
2685 return NULL;
2686 }
2687
2688
2689
ha_statistic_increment(ulonglong SSV::* offset) const2690 void handler::ha_statistic_increment(ulonglong SSV::*offset) const
2691 {
2692 if (table && table->in_use)
2693 status_var_increment(table->in_use->status_var.*offset);
2694 }
2695
ha_data(THD * thd) const2696 void **handler::ha_data(THD *thd) const
2697 {
2698 return thd_ha_data(thd, ht);
2699 }
2700
ha_thd(void) const2701 THD *handler::ha_thd(void) const
2702 {
2703 if (unlikely(cloned))
2704 return current_thd;
2705 DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd);
2706 return (table && table->in_use) ? table->in_use : current_thd;
2707 }
2708
unbind_psi()2709 void handler::unbind_psi()
2710 {
2711 #ifdef HAVE_PSI_TABLE_INTERFACE
2712 DBUG_ASSERT(m_lock_type == F_UNLCK);
2713 DBUG_ASSERT(inited == NONE);
2714 /*
2715 Notify the instrumentation that this table is not owned
2716 by this thread any more.
2717 */
2718 PSI_TABLE_CALL(unbind_table)(m_psi);
2719 #endif
2720 }
2721
rebind_psi()2722 void handler::rebind_psi()
2723 {
2724 #ifdef HAVE_PSI_TABLE_INTERFACE
2725 DBUG_ASSERT(m_lock_type == F_UNLCK);
2726 DBUG_ASSERT(inited == NONE);
2727 /*
2728 Notify the instrumentation that this table is now owned
2729 by this thread.
2730 */
2731 PSI_table_share *share_psi= ha_table_share_psi(table_share);
2732 m_psi= PSI_TABLE_CALL(rebind_table)(share_psi, this, m_psi);
2733 #endif
2734 }
2735
ha_table_share_psi(const TABLE_SHARE * share) const2736 PSI_table_share *handler::ha_table_share_psi(const TABLE_SHARE *share) const
2737 {
2738 return share->m_psi;
2739 }
2740
2741 /** @brief
2742 Open database-handler.
2743
2744 IMPLEMENTATION
2745 Try O_RDONLY if cannot open as O_RDWR
2746 Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
2747 */
ha_open(TABLE * table_arg,const char * name,int mode,int test_if_locked)2748 int handler::ha_open(TABLE *table_arg, const char *name, int mode,
2749 int test_if_locked)
2750 {
2751 int error;
2752 DBUG_ENTER("handler::ha_open");
2753 DBUG_PRINT("enter",
2754 ("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d",
2755 name, ht->db_type, table_arg->db_stat, mode,
2756 test_if_locked));
2757
2758 table= table_arg;
2759 DBUG_ASSERT(table->s == table_share);
2760 DBUG_ASSERT(m_lock_type == F_UNLCK);
2761 DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK));
2762 DBUG_ASSERT(alloc_root_inited(&table->mem_root));
2763
2764 if (cloned) {
2765 DEBUG_SYNC(ha_thd(), "start_handler_ha_open_cloned");
2766 }
2767
2768 if ((error=open(name,mode,test_if_locked)))
2769 {
2770 if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
2771 (table->db_stat & HA_TRY_READ_ONLY))
2772 {
2773 table->db_stat|=HA_READ_ONLY;
2774 error=open(name,O_RDONLY,test_if_locked);
2775 }
2776 }
2777 if (error)
2778 {
2779 my_errno= error; /* Safeguard */
2780 DBUG_PRINT("error",("error: %d errno: %d",error,errno));
2781 }
2782 else
2783 {
2784 DBUG_ASSERT(m_psi == NULL);
2785 DBUG_ASSERT(table_share != NULL);
2786 #ifdef HAVE_PSI_TABLE_INTERFACE
2787 /*
2788 Do not call this for partitions handlers, since it may take too much
2789 resources.
2790 So only use the m_psi on table level, not for individual partitions.
2791 */
2792 if (!(test_if_locked & HA_OPEN_NO_PSI_CALL))
2793 {
2794 PSI_table_share *share_psi= ha_table_share_psi(table_share);
2795 m_psi= PSI_TABLE_CALL(open_table)(share_psi, this);
2796 }
2797 #endif
2798
2799 if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
2800 table->db_stat|=HA_READ_ONLY;
2801 (void) extra(HA_EXTRA_NO_READCHECK); // Not needed in SQL
2802
2803 /* ref is already allocated for us if we're called from handler::clone() */
2804 if (!ref && !(ref= (uchar*) alloc_root(&table->mem_root,
2805 ALIGN_SIZE(ref_length)*2)))
2806 {
2807 ha_close();
2808 error=HA_ERR_OUT_OF_MEM;
2809 }
2810 else
2811 dup_ref=ref+ALIGN_SIZE(ref_length);
2812 cached_table_flags= table_flags();
2813 }
2814
2815 if (unlikely(opt_userstat))
2816 {
2817 rows_read= rows_changed= 0;
2818 memset(index_rows_read, 0, sizeof(index_rows_read));
2819 }
2820
2821 DBUG_RETURN(error);
2822 }
2823
2824
2825 /**
2826 Close handler.
2827 */
2828
ha_close(void)2829 int handler::ha_close(void)
2830 {
2831 DBUG_ENTER("handler::ha_close");
2832 #ifdef HAVE_PSI_TABLE_INTERFACE
2833 PSI_TABLE_CALL(close_table)(m_psi);
2834 m_psi= NULL; /* instrumentation handle, invalid after close_table() */
2835 #endif
2836 // TODO: set table= NULL to mark the handler as closed?
2837 DBUG_ASSERT(m_psi == NULL);
2838 DBUG_ASSERT(m_lock_type == F_UNLCK);
2839 DBUG_ASSERT(inited == NONE);
2840 DBUG_RETURN(close());
2841 }
2842
2843
2844 /**
2845 Initialize use of index.
2846
2847 @param idx Index to use
2848 @param sorted Use sorted order
2849
2850 @return Operation status
2851 @retval 0 Success
2852 @retval != 0 Error (error code returned)
2853 */
2854
ha_index_init(uint idx,bool sorted)2855 int handler::ha_index_init(uint idx, bool sorted)
2856 {
2857 DBUG_EXECUTE_IF("ha_index_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2858 int result;
2859 DBUG_ENTER("ha_index_init");
2860 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2861 m_lock_type != F_UNLCK);
2862 DBUG_ASSERT(inited == NONE);
2863 if (!(result= index_init(idx, sorted)))
2864 inited= INDEX;
2865 end_range= NULL;
2866 DBUG_RETURN(result);
2867 }
2868
2869
2870 /**
2871 End use of index.
2872
2873 @return Operation status
2874 @retval 0 Success
2875 @retval != 0 Error (error code returned)
2876 */
2877
ha_index_end()2878 int handler::ha_index_end()
2879 {
2880 DBUG_ENTER("ha_index_end");
2881 /* SQL HANDLER function can call this without having it locked. */
2882 DBUG_ASSERT(table->open_by_handler ||
2883 table_share->tmp_table != NO_TMP_TABLE ||
2884 m_lock_type != F_UNLCK);
2885 DBUG_ASSERT(inited == INDEX);
2886 inited= NONE;
2887 end_range= NULL;
2888 DBUG_RETURN(index_end());
2889 }
2890
2891
2892 /**
2893 Initialize table for random read or scan.
2894
2895 @param scan if true: Initialize for random scans through rnd_next()
2896 if false: Initialize for random reads through rnd_pos()
2897
2898 @return Operation status
2899 @retval 0 Success
2900 @retval != 0 Error (error code returned)
2901 */
2902
ha_rnd_init(bool scan)2903 int handler::ha_rnd_init(bool scan)
2904 {
2905 DBUG_EXECUTE_IF("ha_rnd_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2906 int result;
2907 DBUG_ENTER("ha_rnd_init");
2908 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2909 m_lock_type != F_UNLCK);
2910 DBUG_ASSERT(inited == NONE || (inited == RND && scan));
2911 if (scan && is_using_prohibited_gap_locks(table, false))
2912 {
2913 DBUG_RETURN(HA_ERR_LOCK_DEADLOCK);
2914 }
2915
2916 inited= (result= rnd_init(scan)) ? NONE : RND;
2917 end_range= NULL;
2918 DBUG_RETURN(result);
2919 }
2920
2921
2922 /**
2923 End use of random access.
2924
2925 @return Operation status
2926 @retval 0 Success
2927 @retval != 0 Error (error code returned)
2928 */
2929
ha_rnd_end()2930 int handler::ha_rnd_end()
2931 {
2932 DBUG_ENTER("ha_rnd_end");
2933 /* SQL HANDLER function can call this without having it locked. */
2934 DBUG_ASSERT(table->open_by_handler ||
2935 table_share->tmp_table != NO_TMP_TABLE ||
2936 m_lock_type != F_UNLCK);
2937 DBUG_ASSERT(inited == RND);
2938 inited= NONE;
2939 end_range= NULL;
2940 DBUG_RETURN(rnd_end());
2941 }
2942
2943
2944 /**
2945 Read next row via random scan.
2946
2947 @param buf Buffer to read the row into
2948
2949 @return Operation status
2950 @retval 0 Success
2951 @retval != 0 Error (error code returned)
2952 */
2953
ha_rnd_next(uchar * buf)2954 int handler::ha_rnd_next(uchar *buf)
2955 {
2956 int result;
2957 DBUG_ENTER("handler::ha_rnd_next");
2958 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2959 m_lock_type != F_UNLCK);
2960 DBUG_ASSERT(inited == RND);
2961
2962 MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, MAX_KEY, 0,
2963 { result= rnd_next(buf); })
2964
2965 if (likely(!result))
2966 {
2967 update_index_stats(MAX_KEY);
2968 }
2969
2970 DBUG_RETURN(result);
2971 }
2972
2973
2974 /**
2975 Read row via random scan from position.
2976
2977 @param[out] buf Buffer to read the row into
2978 @param pos Position from position() call
2979
2980 @return Operation status
2981 @retval 0 Success
2982 @retval != 0 Error (error code returned)
2983 */
2984
ha_rnd_pos(uchar * buf,uchar * pos)2985 int handler::ha_rnd_pos(uchar *buf, uchar *pos)
2986 {
2987 int result;
2988 DBUG_ENTER("handler::ha_rnd_pos");
2989 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2990 m_lock_type != F_UNLCK);
2991 /* TODO: Find out how to solve ha_rnd_pos when finding duplicate update. */
2992 /* DBUG_ASSERT(inited == RND); */
2993
2994 MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, MAX_KEY, 0,
2995 { result= rnd_pos(buf, pos); })
2996
2997 if (likely(!result))
2998 {
2999 update_index_stats(MAX_KEY);
3000 }
3001
3002 DBUG_RETURN(result);
3003 }
3004
3005
3006 /**
3007 Read [part of] row via [part of] index.
3008 @param[out] buf buffer where store the data
3009 @param key Key to search for
3010 @param keypart_map Which part of key to use
3011 @param find_flag Direction/condition on key usage
3012
3013 @returns Operation status
3014 @retval 0 Success (found a record, and function has
3015 set table->status to 0)
3016 @retval HA_ERR_END_OF_FILE Row not found (function has set table->status
3017 to STATUS_NOT_FOUND)
3018 @retval != 0 Error
3019
3020 @note Positions an index cursor to the index specified in the handle.
3021 Fetches the row if available. If the key value is null,
3022 begin at the first key of the index.
3023 ha_index_read_map can be restarted without calling index_end on the previous
3024 index scan and without calling ha_index_init. In this case the
3025 ha_index_read_map is on the same index as the previous ha_index_scan.
3026 This is particularly used in conjunction with multi read ranges.
3027 */
3028
ha_index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3029 int handler::ha_index_read_map(uchar *buf, const uchar *key,
3030 key_part_map keypart_map,
3031 enum ha_rkey_function find_flag)
3032 {
3033 int result;
3034 DBUG_ENTER("handler::ha_index_read_map");
3035 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3036 m_lock_type != F_UNLCK);
3037 DBUG_ASSERT(inited == INDEX);
3038 if (is_using_prohibited_gap_locks(table, is_using_full_unique_key(
3039 active_index, keypart_map, find_flag)))
3040 {
3041 DBUG_RETURN(HA_ERR_LOCK_DEADLOCK);
3042 }
3043
3044 MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3045 { result= index_read_map(buf, key, keypart_map, find_flag); })
3046
3047 if (likely(!result))
3048 {
3049 update_index_stats(active_index);
3050 }
3051
3052 DBUG_RETURN(result);
3053 }
3054
ha_index_read_last_map(uchar * buf,const uchar * key,key_part_map keypart_map)3055 int handler::ha_index_read_last_map(uchar *buf, const uchar *key,
3056 key_part_map keypart_map)
3057 {
3058 int result;
3059 DBUG_ENTER("handler::ha_index_read_last_map");
3060 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3061 m_lock_type != F_UNLCK);
3062 DBUG_ASSERT(inited == INDEX);
3063 if (is_using_prohibited_gap_locks(table, false))
3064 {
3065 DBUG_RETURN(HA_ERR_LOCK_DEADLOCK);
3066 }
3067
3068 MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3069 { result= index_read_last_map(buf, key, keypart_map); })
3070
3071 if (likely(!result))
3072 {
3073 update_index_stats(active_index);
3074 }
3075
3076 DBUG_RETURN(result);
3077 }
3078
3079
3080 /**
3081 Initializes an index and read it.
3082
3083 @see handler::ha_index_read_map.
3084 */
3085
ha_index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3086 int handler::ha_index_read_idx_map(uchar *buf, uint index, const uchar *key,
3087 key_part_map keypart_map,
3088 enum ha_rkey_function find_flag)
3089 {
3090 int result;
3091 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3092 m_lock_type != F_UNLCK);
3093 DBUG_ASSERT(end_range == NULL);
3094
3095 if (is_using_prohibited_gap_locks(table, is_using_full_unique_key(
3096 index, keypart_map, find_flag)))
3097 {
3098 return HA_ERR_LOCK_DEADLOCK;
3099 }
3100
3101 MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, index, 0,
3102 { result= index_read_idx_map(buf, index, key, keypart_map, find_flag); })
3103
3104 if (likely(!result))
3105 {
3106 update_index_stats(index);
3107 }
3108 return result;
3109 }
3110
3111
3112 /**
3113 Reads the next row via index.
3114
3115 @param[out] buf Row data
3116
3117 @return Operation status.
3118 @retval 0 Success
3119 @retval HA_ERR_END_OF_FILE Row not found
3120 @retval != 0 Error
3121 */
3122
ha_index_next(uchar * buf)3123 int handler::ha_index_next(uchar * buf)
3124 {
3125 int result;
3126 DBUG_ENTER("handler::ha_index_next");
3127 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3128 m_lock_type != F_UNLCK);
3129 DBUG_ASSERT(inited == INDEX);
3130
3131 MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3132 { result= index_next(buf); })
3133
3134 if (likely(!result))
3135 {
3136 update_index_stats(active_index);
3137 }
3138
3139 DEBUG_SYNC(ha_thd(), "handler_ha_index_next_end");
3140
3141 DBUG_RETURN(result);
3142 }
3143
3144
3145 /**
3146 Reads the previous row via index.
3147
3148 @param[out] buf Row data
3149
3150 @return Operation status.
3151 @retval 0 Success
3152 @retval HA_ERR_END_OF_FILE Row not found
3153 @retval != 0 Error
3154 */
3155
ha_index_prev(uchar * buf)3156 int handler::ha_index_prev(uchar * buf)
3157 {
3158 int result;
3159 DBUG_ENTER("handler::ha_index_prev");
3160 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3161 m_lock_type != F_UNLCK);
3162 DBUG_ASSERT(inited == INDEX);
3163
3164 MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3165 { result= index_prev(buf); })
3166
3167 if (likely(!result))
3168 {
3169 update_index_stats(active_index);
3170 }
3171
3172 DBUG_RETURN(result);
3173 }
3174
3175
3176 /**
3177 Reads the first row via index.
3178
3179 @param[out] buf Row data
3180
3181 @return Operation status.
3182 @retval 0 Success
3183 @retval HA_ERR_END_OF_FILE Row not found
3184 @retval != 0 Error
3185 */
3186
ha_index_first(uchar * buf)3187 int handler::ha_index_first(uchar * buf)
3188 {
3189 int result;
3190 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3191 m_lock_type != F_UNLCK);
3192 DBUG_ASSERT(inited == INDEX);
3193
3194 if (is_using_prohibited_gap_locks(table, false))
3195 {
3196 return HA_ERR_LOCK_DEADLOCK;
3197 }
3198
3199 MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3200 { result= index_first(buf); })
3201
3202 if (likely(!result))
3203 {
3204 update_index_stats(active_index);
3205 }
3206
3207 return result;
3208 }
3209
is_using_full_key(key_part_map keypart_map,uint actual_key_parts)3210 bool handler::is_using_full_key(key_part_map keypart_map,
3211 uint actual_key_parts)
3212 {
3213 return (keypart_map == HA_WHOLE_KEY) ||
3214 (keypart_map == ((key_part_map(1) << actual_key_parts)
3215 - 1));
3216 }
3217
is_using_full_unique_key(uint index,key_part_map keypart_map,enum ha_rkey_function find_flag) const3218 bool handler::is_using_full_unique_key(uint index,
3219 key_part_map keypart_map,
3220 enum ha_rkey_function find_flag) const
3221 {
3222 return (is_using_full_key(keypart_map,
3223 table->key_info[index].actual_key_parts)
3224 && find_flag == HA_READ_KEY_EXACT
3225 && (index == table->s->primary_key
3226 || (table->key_info[index].flags & HA_NOSAME)));
3227 }
3228
3229 /**
3230 Reads the last row via index.
3231
3232 @param[out] buf Row data
3233
3234 @return Operation status.
3235 @retval 0 Success
3236 @retval HA_ERR_END_OF_FILE Row not found
3237 @retval != 0 Error
3238 */
3239
ha_index_last(uchar * buf)3240 int handler::ha_index_last(uchar * buf)
3241 {
3242 int result;
3243 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3244 m_lock_type != F_UNLCK);
3245 DBUG_ASSERT(inited == INDEX);
3246
3247 if (is_using_prohibited_gap_locks(table, false))
3248 {
3249 return HA_ERR_LOCK_DEADLOCK;
3250 }
3251
3252 MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3253 { result= index_last(buf); })
3254
3255 if (likely(!result))
3256 {
3257 update_index_stats(active_index);
3258 }
3259
3260 return result;
3261 }
3262
3263
3264 /**
3265 Reads the next same row via index.
3266
3267 @param[out] buf Row data
3268 @param key Key to search for
3269 @param keylen Length of key
3270
3271 @return Operation status.
3272 @retval 0 Success
3273 @retval HA_ERR_END_OF_FILE Row not found
3274 @retval != 0 Error
3275 */
3276
ha_index_next_same(uchar * buf,const uchar * key,uint keylen)3277 int handler::ha_index_next_same(uchar *buf, const uchar *key, uint keylen)
3278 {
3279 int result;
3280 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3281 m_lock_type != F_UNLCK);
3282 DBUG_ASSERT(inited == INDEX);
3283
3284 MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3285 { result= index_next_same(buf, key, keylen); })
3286
3287 if (likely(!result))
3288 {
3289 update_index_stats(active_index);
3290 }
3291
3292 return result;
3293 }
3294
3295
3296 /**
3297 Read one row via index.
3298
3299 @param[out] buf Row data
3300 @param key Key to search for
3301 @param keylen Length of key
3302 @param find_flag Direction/condition on key usage
3303
3304 @return Operation status.
3305 @retval 0 Success
3306 @retval HA_ERR_END_OF_FILE Row not found
3307 @retval != 0 Error
3308 */
3309
ha_index_read(uchar * buf,const uchar * key,uint key_len,enum ha_rkey_function find_flag)3310 int handler::ha_index_read(uchar *buf, const uchar *key, uint key_len,
3311 enum ha_rkey_function find_flag)
3312 {
3313 int result;
3314 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3315 m_lock_type != F_UNLCK);
3316 DBUG_ASSERT(inited == INDEX);
3317
3318 MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3319 { result= index_read(buf, key, key_len, find_flag); })
3320
3321 if (likely(!result))
3322 {
3323 update_index_stats(active_index);
3324 }
3325
3326 return result;
3327 }
3328
3329
3330 /**
3331 Reads the last row via index.
3332
3333 @param[out] buf Row data
3334 @param key Key to search for
3335 @param keylen Length of key
3336
3337 @return Operation status.
3338 @retval 0 Success
3339 @retval HA_ERR_END_OF_FILE Row not found
3340 @retval != 0 Error
3341 */
3342
ha_index_read_last(uchar * buf,const uchar * key,uint key_len)3343 int handler::ha_index_read_last(uchar *buf, const uchar *key, uint key_len)
3344 {
3345 int result;
3346 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3347 m_lock_type != F_UNLCK);
3348 DBUG_ASSERT(inited == INDEX);
3349
3350 MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3351 { result= index_read_last(buf, key, key_len); })
3352
3353 if (likely(!result))
3354 {
3355 update_index_stats(active_index);
3356 }
3357
3358 return result;
3359 }
3360
3361
3362 /**
3363 Read first row (only) from a table.
3364
3365 This is never called for InnoDB tables, as these table types
3366 has the HA_STATS_RECORDS_IS_EXACT set.
3367 */
read_first_row(uchar * buf,uint primary_key)3368 int handler::read_first_row(uchar * buf, uint primary_key)
3369 {
3370 int error;
3371 DBUG_ENTER("handler::read_first_row");
3372
3373 ha_statistic_increment(&SSV::ha_read_first_count);
3374
3375 /*
3376 If there is very few deleted rows in the table, find the first row by
3377 scanning the table.
3378 TODO remove the test for HA_READ_ORDER
3379 */
3380 if (stats.deleted < 10 || primary_key >= MAX_KEY ||
3381 !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
3382 {
3383 if (!(error= ha_rnd_init(1)))
3384 {
3385 while ((error= rnd_next(buf)) == HA_ERR_RECORD_DELETED)
3386 /* skip deleted row */;
3387 const int end_error= ha_rnd_end();
3388 if (!error)
3389 error= end_error;
3390 }
3391 }
3392 else
3393 {
3394 /* Find the first row through the primary key */
3395 if (!(error= ha_index_init(primary_key, 0)))
3396 {
3397 error= ha_index_first(buf);
3398 const int end_error= ha_index_end();
3399 if (!error)
3400 error= end_error;
3401 }
3402 }
3403 DBUG_RETURN(error);
3404 }
3405
3406 /**
3407 Generate the next auto-increment number based on increment and offset.
3408 computes the lowest number
3409 - strictly greater than "nr"
3410 - of the form: auto_increment_offset + N * auto_increment_increment
3411 If overflow happened then return MAX_ULONGLONG value as an
3412 indication of overflow.
3413 In most cases increment= offset= 1, in which case we get:
3414 @verbatim 1,2,3,4,5,... @endverbatim
3415 If increment=10 and offset=5 and previous number is 1, we get:
3416 @verbatim 1,5,15,25,35,... @endverbatim
3417 */
3418 inline ulonglong
compute_next_insert_id(ulonglong nr,struct system_variables * variables)3419 compute_next_insert_id(ulonglong nr,struct system_variables *variables)
3420 {
3421 const ulonglong save_nr= nr;
3422
3423 if (variables->auto_increment_increment == 1)
3424 nr= nr + 1; // optimization of the formula below
3425 else
3426 {
3427 nr= (((nr+ variables->auto_increment_increment -
3428 variables->auto_increment_offset)) /
3429 (ulonglong) variables->auto_increment_increment);
3430 nr= (nr* (ulonglong) variables->auto_increment_increment +
3431 variables->auto_increment_offset);
3432 }
3433
3434 if (unlikely(nr <= save_nr))
3435 return ULONGLONG_MAX;
3436
3437 return nr;
3438 }
3439
3440
adjust_next_insert_id_after_explicit_value(ulonglong nr)3441 void handler::adjust_next_insert_id_after_explicit_value(ulonglong nr)
3442 {
3443 /*
3444 If we have set THD::next_insert_id previously and plan to insert an
3445 explicitely-specified value larger than this, we need to increase
3446 THD::next_insert_id to be greater than the explicit value.
3447 */
3448 if ((next_insert_id > 0) && (nr >= next_insert_id))
3449 set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
3450 }
3451
3452
3453 /** @brief
3454 Computes the largest number X:
3455 - smaller than or equal to "nr"
3456 - of the form: auto_increment_offset + N * auto_increment_increment
3457 where N>=0.
3458
3459 SYNOPSIS
3460 prev_insert_id
3461 nr Number to "round down"
3462 variables variables struct containing auto_increment_increment and
3463 auto_increment_offset
3464
3465 RETURN
3466 The number X if it exists, "nr" otherwise.
3467 */
3468 inline ulonglong
prev_insert_id(ulonglong nr,struct system_variables * variables)3469 prev_insert_id(ulonglong nr, struct system_variables *variables)
3470 {
3471 if (unlikely(nr < variables->auto_increment_offset))
3472 {
3473 /*
3474 There's nothing good we can do here. That is a pathological case, where
3475 the offset is larger than the column's max possible value, i.e. not even
3476 the first sequence value may be inserted. User will receive warning.
3477 */
3478 DBUG_PRINT("info",("auto_increment: nr: %lu cannot honour "
3479 "auto_increment_offset: %lu",
3480 (ulong) nr, variables->auto_increment_offset));
3481 return nr;
3482 }
3483 if (variables->auto_increment_increment == 1)
3484 return nr; // optimization of the formula below
3485 nr= (((nr - variables->auto_increment_offset)) /
3486 (ulonglong) variables->auto_increment_increment);
3487 return (nr * (ulonglong) variables->auto_increment_increment +
3488 variables->auto_increment_offset);
3489 }
3490
3491
3492 /**
3493 Update the auto_increment field if necessary.
3494
3495 Updates columns with type NEXT_NUMBER if:
3496
3497 - If column value is set to NULL (in which case
3498 auto_increment_field_not_null is 0)
3499 - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
3500 set. In the future we will only set NEXT_NUMBER fields if one sets them
3501 to NULL (or they are not included in the insert list).
3502
3503 In those cases, we check if the currently reserved interval still has
3504 values we have not used. If yes, we pick the smallest one and use it.
3505 Otherwise:
3506
3507 - If a list of intervals has been provided to the statement via SET
3508 INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
3509 first unused interval from this list, consider it as reserved.
3510
3511 - Otherwise we set the column for the first row to the value
3512 next_insert_id(get_auto_increment(column))) which is usually
3513 max-used-column-value+1.
3514 We call get_auto_increment() for the first row in a multi-row
3515 statement. get_auto_increment() will tell us the interval of values it
3516 reserved for us.
3517
3518 - In both cases, for the following rows we use those reserved values without
3519 calling the handler again (we just progress in the interval, computing
3520 each new value from the previous one). Until we have exhausted them, then
3521 we either take the next provided interval or call get_auto_increment()
3522 again to reserve a new interval.
3523
3524 - In both cases, the reserved intervals are remembered in
3525 thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
3526 binlogging; the last reserved interval is remembered in
3527 auto_inc_interval_for_cur_row. The number of reserved intervals is
3528 remembered in auto_inc_intervals_count. It differs from the number of
3529 elements in thd->auto_inc_intervals_in_cur_stmt_for_binlog() because the
3530 latter list is cumulative over all statements forming one binlog event
3531 (when stored functions and triggers are used), and collapses two
3532 contiguous intervals in one (see its append() method).
3533
3534 The idea is that generated auto_increment values are predictable and
3535 independent of the column values in the table. This is needed to be
3536 able to replicate into a table that already has rows with a higher
3537 auto-increment value than the one that is inserted.
3538
3539 After we have already generated an auto-increment number and the user
3540 inserts a column with a higher value than the last used one, we will
3541 start counting from the inserted value.
3542
3543 This function's "outputs" are: the table's auto_increment field is filled
3544 with a value, thd->next_insert_id is filled with the value to use for the
3545 next row, if a value was autogenerated for the current row it is stored in
3546 thd->insert_id_for_cur_row, if get_auto_increment() was called
3547 thd->auto_inc_interval_for_cur_row is modified, if that interval is not
3548 present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
3549 this list.
3550
3551 @todo
3552 Replace all references to "next number" or NEXT_NUMBER to
3553 "auto_increment", everywhere (see below: there is
3554 table->auto_increment_field_not_null, and there also exists
3555 table->next_number_field, it's not consistent).
3556
3557 @retval
3558 0 ok
3559 @retval
3560 HA_ERR_AUTOINC_READ_FAILED get_auto_increment() was called and
3561 returned ~(ulonglong) 0
3562 @retval
3563 HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
3564 failure.
3565 */
3566
3567 #define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
3568 #define AUTO_INC_DEFAULT_NB_MAX_BITS 16
3569 #define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
3570
update_auto_increment()3571 int handler::update_auto_increment()
3572 {
3573 ulonglong nr, nb_reserved_values;
3574 bool append= FALSE;
3575 THD *thd= table->in_use;
3576 struct system_variables *variables= &thd->variables;
3577 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3578 m_lock_type != F_UNLCK);
3579 DBUG_ENTER("handler::update_auto_increment");
3580
3581 /*
3582 next_insert_id is a "cursor" into the reserved interval, it may go greater
3583 than the interval, but not smaller.
3584 */
3585 DBUG_ASSERT(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
3586
3587 if ((nr= table->next_number_field->val_int()) != 0 ||
3588 (table->auto_increment_field_not_null &&
3589 thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
3590 {
3591 /*
3592 Update next_insert_id if we had already generated a value in this
3593 statement (case of INSERT VALUES(null),(3763),(null):
3594 the last NULL needs to insert 3764, not the value of the first NULL plus
3595 1).
3596 */
3597 adjust_next_insert_id_after_explicit_value(nr);
3598 insert_id_for_cur_row= 0; // didn't generate anything
3599 DBUG_RETURN(0);
3600 }
3601
3602 if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
3603 {
3604 /* next_insert_id is beyond what is reserved, so we reserve more. */
3605 const Discrete_interval *forced=
3606 thd->auto_inc_intervals_forced.get_next();
3607 if (forced != NULL)
3608 {
3609 nr= forced->minimum();
3610 /*
3611 In a multi insert statement when the number of affected rows is known
3612 then reserve those many number of auto increment values. So that
3613 interval will be starting value to starting value + number of affected
3614 rows * increment of auto increment.
3615 */
3616 nb_reserved_values= (estimation_rows_to_insert > 0) ?
3617 estimation_rows_to_insert : forced->values();
3618 }
3619 else
3620 {
3621 /*
3622 handler::estimation_rows_to_insert was set by
3623 handler::ha_start_bulk_insert(); if 0 it means "unknown".
3624 */
3625 ulonglong nb_desired_values;
3626 /*
3627 If an estimation was given to the engine:
3628 - use it.
3629 - if we already reserved numbers, it means the estimation was
3630 not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
3631 time, twice that the 3rd time etc.
3632 If no estimation was given, use those increasing defaults from the
3633 start, starting from AUTO_INC_DEFAULT_NB_ROWS.
3634 Don't go beyond a max to not reserve "way too much" (because
3635 reservation means potentially losing unused values).
3636 Note that in prelocked mode no estimation is given.
3637 */
3638
3639 if ((auto_inc_intervals_count == 0) && (estimation_rows_to_insert > 0))
3640 nb_desired_values= estimation_rows_to_insert;
3641 else if ((auto_inc_intervals_count == 0) &&
3642 (thd->lex->many_values.elements > 0))
3643 {
3644 /*
3645 For multi-row inserts, if the bulk inserts cannot be started, the
3646 handler::estimation_rows_to_insert will not be set. But we still
3647 want to reserve the autoinc values.
3648 */
3649 nb_desired_values= thd->lex->many_values.elements;
3650 }
3651 else /* go with the increasing defaults */
3652 {
3653 /* avoid overflow in formula, with this if() */
3654 if (auto_inc_intervals_count <= AUTO_INC_DEFAULT_NB_MAX_BITS)
3655 {
3656 nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS *
3657 (1 << auto_inc_intervals_count);
3658 set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
3659 }
3660 else
3661 nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
3662 }
3663 /* This call ignores all its parameters but nr, currently */
3664 get_auto_increment(variables->auto_increment_offset,
3665 variables->auto_increment_increment,
3666 nb_desired_values, &nr,
3667 &nb_reserved_values);
3668 if (nr == ULONGLONG_MAX)
3669 DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED); // Mark failure
3670
3671 /*
3672 That rounding below should not be needed when all engines actually
3673 respect offset and increment in get_auto_increment(). But they don't
3674 so we still do it. Wonder if for the not-first-in-index we should do
3675 it. Hope that this rounding didn't push us out of the interval; even
3676 if it did we cannot do anything about it (calling the engine again
3677 will not help as we inserted no row).
3678 */
3679 nr= compute_next_insert_id(nr-1, variables);
3680 }
3681
3682 if (table->s->next_number_keypart == 0)
3683 {
3684 /* We must defer the appending until "nr" has been possibly truncated */
3685 append= TRUE;
3686 }
3687 else
3688 {
3689 /*
3690 For such auto_increment there is no notion of interval, just a
3691 singleton. The interval is not even stored in
3692 thd->auto_inc_interval_for_cur_row, so we are sure to call the engine
3693 for next row.
3694 */
3695 DBUG_PRINT("info",("auto_increment: special not-first-in-index"));
3696 }
3697 }
3698
3699 if (unlikely(nr == ULONGLONG_MAX))
3700 DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3701
3702 DBUG_PRINT("info",("auto_increment: %lu", (ulong) nr));
3703
3704 if (unlikely(table->next_number_field->store((longlong) nr, TRUE)))
3705 {
3706 /*
3707 first test if the query was aborted due to strict mode constraints
3708 */
3709 if (thd->killed == THD::KILL_BAD_DATA)
3710 DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3711
3712 /*
3713 field refused this value (overflow) and truncated it, use the result of
3714 the truncation (which is going to be inserted); however we try to
3715 decrease it to honour auto_increment_* variables.
3716 That will shift the left bound of the reserved interval, we don't
3717 bother shifting the right bound (anyway any other value from this
3718 interval will cause a duplicate key).
3719 */
3720 nr= prev_insert_id(table->next_number_field->val_int(), variables);
3721 if (unlikely(table->next_number_field->store((longlong) nr, TRUE)))
3722 nr= table->next_number_field->val_int();
3723 }
3724 if (append)
3725 {
3726 auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
3727 variables->auto_increment_increment);
3728 auto_inc_intervals_count++;
3729 /* Row-based replication does not need to store intervals in binlog */
3730 if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row())
3731 thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(),
3732 auto_inc_interval_for_cur_row.values(),
3733 variables->auto_increment_increment);
3734 }
3735
3736 /*
3737 Record this autogenerated value. If the caller then
3738 succeeds to insert this value, it will call
3739 record_first_successful_insert_id_in_cur_stmt()
3740 which will set first_successful_insert_id_in_cur_stmt if it's not
3741 already set.
3742 */
3743 insert_id_for_cur_row= nr;
3744 /*
3745 Set next insert id to point to next auto-increment value to be able to
3746 handle multi-row statements.
3747 */
3748 set_next_insert_id(compute_next_insert_id(nr, variables));
3749
3750 DBUG_RETURN(0);
3751 }
3752
3753
3754 /** @brief
3755 MySQL signal that it changed the column bitmap
3756
3757 USAGE
3758 This is for handlers that needs to setup their own column bitmaps.
3759 Normally the handler should set up their own column bitmaps in
3760 index_init() or rnd_init() and in any column_bitmaps_signal() call after
3761 this.
3762
3763 The handler is allowd to do changes to the bitmap after a index_init or
3764 rnd_init() call is made as after this, MySQL will not use the bitmap
3765 for any program logic checking.
3766 */
column_bitmaps_signal()3767 void handler::column_bitmaps_signal()
3768 {
3769 DBUG_ENTER("column_bitmaps_signal");
3770 DBUG_PRINT("info", ("read_set: 0x%lx write_set: 0x%lx", (long) table->read_set,
3771 (long)table->write_set));
3772 DBUG_VOID_RETURN;
3773 }
3774
3775
3776 /**
3777 Reserves an interval of auto_increment values from the handler.
3778
3779 @param offset offset (modulus increment)
3780 @param increment increment between calls
3781 @param nb_desired_values how many values we want
3782 @param[out] first_value the first value reserved by the handler
3783 @param[out] nb_reserved_values how many values the handler reserved
3784
3785 offset and increment means that we want values to be of the form
3786 offset + N * increment, where N>=0 is integer.
3787 If the function sets *first_value to ULONGLONG_MAX it means an error.
3788 If the function sets *nb_reserved_values to ULONGLONG_MAX it means it has
3789 reserved to "positive infinite".
3790 */
3791
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)3792 void handler::get_auto_increment(ulonglong offset, ulonglong increment,
3793 ulonglong nb_desired_values,
3794 ulonglong *first_value,
3795 ulonglong *nb_reserved_values)
3796 {
3797 ulonglong nr;
3798 int error;
3799 DBUG_ENTER("handler::get_auto_increment");
3800
3801 (void) extra(HA_EXTRA_KEYREAD);
3802 table->mark_columns_used_by_index_no_reset(table->s->next_number_index,
3803 table->read_set);
3804 column_bitmaps_signal();
3805
3806 if (ha_index_init(table->s->next_number_index, 1))
3807 {
3808 /* This should never happen, assert in debug, and fail in release build */
3809 DBUG_ASSERT(0);
3810 *first_value= ULONGLONG_MAX;
3811 DBUG_VOID_RETURN;
3812 }
3813
3814 if (table->s->next_number_keypart == 0)
3815 { // Autoincrement at key-start
3816 error= ha_index_last(table->record[1]);
3817 /*
3818 MySQL implicitely assumes such method does locking (as MySQL decides to
3819 use nr+increment without checking again with the handler, in
3820 handler::update_auto_increment()), so reserves to infinite.
3821 */
3822 *nb_reserved_values= ULONGLONG_MAX;
3823 }
3824 else
3825 {
3826 uchar key[MAX_KEY_LENGTH];
3827 key_copy(key, table->record[0],
3828 table->key_info + table->s->next_number_index,
3829 table->s->next_number_key_offset);
3830 error= ha_index_read_map(table->record[1], key,
3831 make_prev_keypart_map(table->s->next_number_keypart),
3832 HA_READ_PREFIX_LAST);
3833 /*
3834 MySQL needs to call us for next row: assume we are inserting ("a",null)
3835 here, we return 3, and next this statement will want to insert
3836 ("b",null): there is no reason why ("b",3+1) would be the good row to
3837 insert: maybe it already exists, maybe 3+1 is too large...
3838 */
3839 *nb_reserved_values= 1;
3840 }
3841
3842 if (error)
3843 {
3844 if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND)
3845 {
3846 /* No entry found, start with 1. */
3847 nr= 1;
3848 }
3849 else
3850 {
3851 DBUG_ASSERT(0);
3852 nr= ULONGLONG_MAX;
3853 }
3854 }
3855 else
3856 nr= ((ulonglong) table->next_number_field->
3857 val_int_offset(table->s->rec_buff_length)+1);
3858 ha_index_end();
3859 (void) extra(HA_EXTRA_NO_KEYREAD);
3860 *first_value= nr;
3861 DBUG_VOID_RETURN;
3862 }
3863
3864
ha_release_auto_increment()3865 void handler::ha_release_auto_increment()
3866 {
3867 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3868 m_lock_type != F_UNLCK ||
3869 (!next_insert_id && !insert_id_for_cur_row));
3870 release_auto_increment();
3871 insert_id_for_cur_row= 0;
3872 auto_inc_interval_for_cur_row.replace(0, 0, 0);
3873 auto_inc_intervals_count= 0;
3874 if (next_insert_id > 0)
3875 {
3876 next_insert_id= 0;
3877 /*
3878 this statement used forced auto_increment values if there were some,
3879 wipe them away for other statements.
3880 */
3881 table->in_use->auto_inc_intervals_forced.empty();
3882 }
3883 }
3884
3885
3886 /**
3887 Construct and emit duplicate key error message using information
3888 from table's record buffer.
3889
3890 @param table TABLE object which record buffer should be used as
3891 source for column values.
3892 @param key Key description.
3893 @param msg Error message template to which key value should be
3894 added.
3895 @param errflag Flags for my_error() call.
3896 */
3897
print_keydup_error(TABLE * table,KEY * key,const char * msg,myf errflag)3898 void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag)
3899 {
3900 /* Write the duplicated key in the error message */
3901 char key_buff[MAX_KEY_LENGTH];
3902 String str(key_buff,sizeof(key_buff),system_charset_info);
3903
3904 if (key == NULL)
3905 {
3906 /* Key is unknown */
3907 str.copy("", 0, system_charset_info);
3908 my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr(), "*UNKNOWN*");
3909 }
3910 else
3911 {
3912 /* Table is opened and defined at this point */
3913 key_unpack(&str,table, key);
3914 uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(msg);
3915 if (str.length() >= max_length)
3916 {
3917 str.length(max_length-4);
3918 str.append(STRING_WITH_LEN("..."));
3919 }
3920 my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(), key->name);
3921 }
3922 }
3923
3924
3925 /**
3926 Construct and emit duplicate key error message using information
3927 from table's record buffer.
3928
3929 @sa print_keydup_error(table, key, msg, errflag).
3930 */
3931
print_keydup_error(TABLE * table,KEY * key,myf errflag)3932 void print_keydup_error(TABLE *table, KEY *key, myf errflag)
3933 {
3934 print_keydup_error(table, key, ER(ER_DUP_ENTRY_WITH_KEY_NAME), errflag);
3935 }
3936
3937
3938 /**
3939 Print error that we got from handler function.
3940
3941 @note
3942 In case of delete table it's only safe to use the following parts of
3943 the 'table' structure:
3944 - table->s->path
3945 - table->alias
3946 */
print_error(int error,myf errflag)3947 void handler::print_error(int error, myf errflag)
3948 {
3949 DBUG_ENTER("handler::print_error");
3950 DBUG_PRINT("enter",("error: %d",error));
3951
3952 int textno=ER_GET_ERRNO;
3953 switch (error) {
3954 case EACCES:
3955 textno=ER_OPEN_AS_READONLY;
3956 break;
3957 case EAGAIN:
3958 textno=ER_FILE_USED;
3959 break;
3960 case ENOENT:
3961 {
3962 char errbuf[MYSYS_STRERROR_SIZE];
3963 textno=ER_FILE_NOT_FOUND;
3964 my_error(textno, errflag, table_share->table_name.str,
3965 error, my_strerror(errbuf, sizeof(errbuf), error));
3966 }
3967 break;
3968 case HA_ERR_KEY_NOT_FOUND:
3969 case HA_ERR_NO_ACTIVE_RECORD:
3970 case HA_ERR_RECORD_DELETED:
3971 case HA_ERR_END_OF_FILE:
3972 textno=ER_KEY_NOT_FOUND;
3973 break;
3974 case HA_ERR_WRONG_MRG_TABLE_DEF:
3975 textno=ER_WRONG_MRG_TABLE;
3976 break;
3977 case HA_ERR_FOUND_DUPP_KEY:
3978 {
3979 uint key_nr= table ? get_dup_key(error) : -1;
3980 if ((int) key_nr >= 0)
3981 {
3982 print_keydup_error(table,
3983 key_nr == MAX_KEY ? NULL : &table->key_info[key_nr],
3984 errflag);
3985 DBUG_VOID_RETURN;
3986 }
3987 textno=ER_DUP_KEY;
3988 break;
3989 }
3990 case HA_ERR_FOREIGN_DUPLICATE_KEY:
3991 {
3992 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3993 m_lock_type != F_UNLCK);
3994
3995 char rec_buf[MAX_KEY_LENGTH];
3996 String rec(rec_buf, sizeof(rec_buf), system_charset_info);
3997 /* Table is opened and defined at this point */
3998
3999 /*
4000 Just print the subset of fields that are part of the first index,
4001 printing the whole row from there is not easy.
4002 */
4003 key_unpack(&rec, table, &table->key_info[0]);
4004
4005 char child_table_name[NAME_LEN + 1];
4006 char child_key_name[NAME_LEN + 1];
4007 if (get_foreign_dup_key(child_table_name, sizeof(child_table_name),
4008 child_key_name, sizeof(child_key_name)))
4009 {
4010 my_error(ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO, errflag,
4011 table_share->table_name.str, rec.c_ptr_safe(),
4012 child_table_name, child_key_name);
4013 }
4014 else
4015 {
4016 my_error(ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO, errflag,
4017 table_share->table_name.str, rec.c_ptr_safe());
4018 }
4019 DBUG_VOID_RETURN;
4020 }
4021 case HA_ERR_NULL_IN_SPATIAL:
4022 my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag);
4023 DBUG_VOID_RETURN;
4024 case HA_ERR_FOUND_DUPP_UNIQUE:
4025 textno=ER_DUP_UNIQUE;
4026 break;
4027 case HA_ERR_RECORD_CHANGED:
4028 textno=ER_CHECKREAD;
4029 break;
4030 case HA_ERR_CRASHED:
4031 textno=ER_NOT_KEYFILE;
4032 break;
4033 case HA_ERR_WRONG_IN_RECORD:
4034 textno= ER_CRASHED_ON_USAGE;
4035 break;
4036 case HA_ERR_CRASHED_ON_USAGE:
4037 textno=ER_CRASHED_ON_USAGE;
4038 break;
4039 case HA_ERR_NOT_A_TABLE:
4040 textno= error;
4041 break;
4042 case HA_ERR_CRASHED_ON_REPAIR:
4043 textno=ER_CRASHED_ON_REPAIR;
4044 break;
4045 case HA_ERR_OUT_OF_MEM:
4046 textno=ER_OUT_OF_RESOURCES;
4047 break;
4048 case HA_ERR_WRONG_COMMAND:
4049 textno=ER_ILLEGAL_HA;
4050 break;
4051 case HA_ERR_OLD_FILE:
4052 textno=ER_OLD_KEYFILE;
4053 break;
4054 case HA_ERR_UNSUPPORTED:
4055 textno=ER_UNSUPPORTED_EXTENSION;
4056 break;
4057 case HA_ERR_RECORD_FILE_FULL:
4058 case HA_ERR_INDEX_FILE_FULL:
4059 {
4060 textno=ER_RECORD_FILE_FULL;
4061 /* Write the error message to error log */
4062 errflag|= ME_NOREFRESH;
4063 break;
4064 }
4065 case HA_ERR_LOCK_WAIT_TIMEOUT:
4066 textno=ER_LOCK_WAIT_TIMEOUT;
4067 break;
4068 case HA_ERR_LOCK_TABLE_FULL:
4069 textno=ER_LOCK_TABLE_FULL;
4070 break;
4071 case HA_ERR_LOCK_DEADLOCK:
4072 textno=ER_LOCK_DEADLOCK;
4073 break;
4074 case HA_ERR_READ_ONLY_TRANSACTION:
4075 textno=ER_READ_ONLY_TRANSACTION;
4076 break;
4077 case HA_ERR_CANNOT_ADD_FOREIGN:
4078 textno=ER_CANNOT_ADD_FOREIGN;
4079 break;
4080 case HA_ERR_ROW_IS_REFERENCED:
4081 {
4082 String str;
4083 get_error_message(error, &str);
4084 my_error(ER_ROW_IS_REFERENCED_2, errflag, str.c_ptr_safe());
4085 DBUG_VOID_RETURN;
4086 }
4087 case HA_ERR_NO_REFERENCED_ROW:
4088 {
4089 String str;
4090 get_error_message(error, &str);
4091 my_error(ER_NO_REFERENCED_ROW_2, errflag, str.c_ptr_safe());
4092 DBUG_VOID_RETURN;
4093 }
4094 case HA_ERR_TABLE_DEF_CHANGED:
4095 textno=ER_TABLE_DEF_CHANGED;
4096 break;
4097 case HA_ERR_NO_SUCH_TABLE:
4098 my_error(ER_NO_SUCH_TABLE, errflag, table_share->db.str,
4099 table_share->table_name.str);
4100 DBUG_VOID_RETURN;
4101 case HA_ERR_RBR_LOGGING_FAILED:
4102 textno= ER_BINLOG_ROW_LOGGING_FAILED;
4103 break;
4104 case HA_ERR_DROP_INDEX_FK:
4105 {
4106 const char *ptr= "???";
4107 uint key_nr= table ? get_dup_key(error) : -1;
4108 if ((int) key_nr >= 0)
4109 ptr= table->key_info[key_nr].name;
4110 my_error(ER_DROP_INDEX_FK, errflag, ptr);
4111 DBUG_VOID_RETURN;
4112 }
4113 case HA_ERR_TABLE_NEEDS_UPGRADE:
4114 textno=ER_TABLE_NEEDS_UPGRADE;
4115 break;
4116 case HA_ERR_NO_PARTITION_FOUND:
4117 textno=ER_WRONG_PARTITION_NAME;
4118 break;
4119 case HA_ERR_TABLE_READONLY:
4120 textno= ER_OPEN_AS_READONLY;
4121 break;
4122 case HA_ERR_AUTOINC_READ_FAILED:
4123 textno= ER_AUTOINC_READ_FAILED;
4124 break;
4125 case HA_ERR_AUTOINC_ERANGE:
4126 textno= ER_WARN_DATA_OUT_OF_RANGE;
4127 break;
4128 case HA_ERR_TOO_MANY_CONCURRENT_TRXS:
4129 textno= ER_TOO_MANY_CONCURRENT_TRXS;
4130 break;
4131 case HA_ERR_INDEX_COL_TOO_LONG:
4132 textno= ER_INDEX_COLUMN_TOO_LONG;
4133 break;
4134 case HA_ERR_NOT_IN_LOCK_PARTITIONS:
4135 textno=ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET;
4136 break;
4137 case HA_ERR_INDEX_CORRUPT:
4138 textno= ER_INDEX_CORRUPT;
4139 break;
4140 case HA_ERR_UNDO_REC_TOO_BIG:
4141 textno= ER_UNDO_RECORD_TOO_BIG;
4142 break;
4143 case HA_ERR_TABLE_IN_FK_CHECK:
4144 textno= ER_TABLE_IN_FK_CHECK;
4145 break;
4146 case HA_WRONG_CREATE_OPTION:
4147 textno= ER_ILLEGAL_HA;
4148 break;
4149 case HA_ERR_TOO_MANY_FIELDS:
4150 textno= ER_TOO_MANY_FIELDS;
4151 break;
4152 case HA_ERR_INNODB_READ_ONLY:
4153 textno= ER_INNODB_READ_ONLY;
4154 break;
4155 case HA_ERR_TEMP_FILE_WRITE_FAILURE:
4156 textno= ER_TEMP_FILE_WRITE_FAILURE;
4157 break;
4158 case HA_ERR_INNODB_FORCED_RECOVERY:
4159 textno= ER_INNODB_FORCED_RECOVERY;
4160 break;
4161 default:
4162 {
4163 /* The error was "unknown" to this function.
4164 Ask handler if it has got a message for this error */
4165 bool temporary= FALSE;
4166 String str;
4167 temporary= get_error_message(error, &str);
4168 if (!str.is_empty())
4169 {
4170 const char* engine= table_type();
4171 if (temporary)
4172 my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.ptr(), engine);
4173 else
4174 my_error(ER_GET_ERRMSG, errflag, error, str.ptr(), engine);
4175 }
4176 else
4177 my_error(ER_GET_ERRNO,errflag,error);
4178 DBUG_VOID_RETURN;
4179 }
4180 }
4181 if (textno != ER_FILE_NOT_FOUND)
4182 my_error(textno, errflag, table_share->table_name.str, error);
4183 DBUG_VOID_RETURN;
4184 }
4185
4186
4187 /**
4188 Return an error message specific to this handler.
4189
4190 @param error error code previously returned by handler
4191 @param buf pointer to String where to add error message
4192
4193 @return
4194 Returns true if this is a temporary error
4195 */
get_error_message(int error,String * buf)4196 bool handler::get_error_message(int error, String* buf)
4197 {
4198 return FALSE;
4199 }
4200
4201
4202 /**
4203 Check for incompatible collation changes.
4204
4205 @retval
4206 HA_ADMIN_NEEDS_UPGRADE Table may have data requiring upgrade.
4207 @retval
4208 0 No upgrade required.
4209 */
4210
check_collation_compatibility()4211 int handler::check_collation_compatibility()
4212 {
4213 ulong mysql_version= table->s->mysql_version;
4214
4215 if (mysql_version < 50124)
4216 {
4217 KEY *key= table->key_info;
4218 KEY *key_end= key + table->s->keys;
4219 for (; key < key_end; key++)
4220 {
4221 KEY_PART_INFO *key_part= key->key_part;
4222 KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
4223 for (; key_part < key_part_end; key_part++)
4224 {
4225 if (!key_part->fieldnr)
4226 continue;
4227 Field *field= table->field[key_part->fieldnr - 1];
4228 uint cs_number= field->charset()->number;
4229 if ((mysql_version < 50048 &&
4230 (cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */
4231 cs_number == 41 || /* latin7_general_ci - bug #29461 */
4232 cs_number == 42 || /* latin7_general_cs - bug #29461 */
4233 cs_number == 20 || /* latin7_estonian_cs - bug #29461 */
4234 cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */
4235 cs_number == 22 || /* koi8u_general_ci - bug #29461 */
4236 cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */
4237 cs_number == 26)) || /* cp1250_general_ci - bug #29461 */
4238 (mysql_version < 50124 &&
4239 (cs_number == 33 || /* utf8_general_ci - bug #27877 */
4240 cs_number == 35))) /* ucs2_general_ci - bug #27877 */
4241 return HA_ADMIN_NEEDS_UPGRADE;
4242 }
4243 }
4244 }
4245 return 0;
4246 }
4247
4248
ha_check_for_upgrade(HA_CHECK_OPT * check_opt)4249 int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
4250 {
4251 int error;
4252 KEY *keyinfo, *keyend;
4253 KEY_PART_INFO *keypart, *keypartend;
4254
4255 if (!table->s->mysql_version)
4256 {
4257 /* check for blob-in-key error */
4258 keyinfo= table->key_info;
4259 keyend= table->key_info + table->s->keys;
4260 for (; keyinfo < keyend; keyinfo++)
4261 {
4262 keypart= keyinfo->key_part;
4263 keypartend= keypart + keyinfo->user_defined_key_parts;
4264 for (; keypart < keypartend; keypart++)
4265 {
4266 if (!keypart->fieldnr)
4267 continue;
4268 Field *field= table->field[keypart->fieldnr-1];
4269 if (field->type() == MYSQL_TYPE_BLOB)
4270 {
4271 if (check_opt->sql_flags & TT_FOR_UPGRADE)
4272 check_opt->flags= T_MEDIUM;
4273 return HA_ADMIN_NEEDS_CHECK;
4274 }
4275 }
4276 }
4277 }
4278 if (table->s->frm_version != FRM_VER_TRUE_VARCHAR)
4279 return HA_ADMIN_NEEDS_ALTER;
4280
4281 if ((error= check_collation_compatibility()))
4282 return error;
4283
4284 return check_for_upgrade(check_opt);
4285 }
4286
4287
check_old_types()4288 int handler::check_old_types()
4289 {
4290 Field** field;
4291
4292 for (field= table->field; (*field); field++)
4293 {
4294 if (table->s->mysql_version == 0) // prior to MySQL 5.0
4295 {
4296 /* check for bad DECIMAL field */
4297 if ((*field)->type() == MYSQL_TYPE_NEWDECIMAL) // TODO: error? MYSQL_TYPE_DECIMAL?
4298 {
4299 return HA_ADMIN_NEEDS_ALTER;
4300 }
4301 if ((*field)->type() == MYSQL_TYPE_VAR_STRING)
4302 {
4303 return HA_ADMIN_NEEDS_ALTER;
4304 }
4305 }
4306 if ((*field)->type() == MYSQL_TYPE_YEAR && (*field)->field_length == 2)
4307 return HA_ADMIN_NEEDS_ALTER; // obsolete YEAR(2) type
4308 }
4309 return 0;
4310 }
4311
4312
update_frm_version(TABLE * table)4313 static bool update_frm_version(TABLE *table)
4314 {
4315 char path[FN_REFLEN];
4316 File file;
4317 int result= 1;
4318 DBUG_ENTER("update_frm_version");
4319
4320 /*
4321 No need to update frm version in case table was created or checked
4322 by server with the same version. This also ensures that we do not
4323 update frm version for temporary tables as this code doesn't support
4324 temporary tables.
4325 */
4326 if (table->s->mysql_version == MYSQL_VERSION_ID)
4327 DBUG_RETURN(0);
4328
4329 strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
4330
4331 if ((file= mysql_file_open(key_file_frm,
4332 path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0)
4333 {
4334 uchar version[4];
4335
4336 int4store(version, MYSQL_VERSION_ID);
4337
4338 if ((result= mysql_file_pwrite(file, (uchar*) version, 4, 51L, MYF_RW)))
4339 goto err;
4340
4341 table->s->mysql_version= MYSQL_VERSION_ID;
4342 }
4343 err:
4344 if (file >= 0)
4345 (void) mysql_file_close(file, MYF(MY_WME));
4346 DBUG_RETURN(result);
4347 }
4348
4349
4350
4351 /**
4352 @return
4353 key if error because of duplicated keys
4354 */
get_dup_key(int error)4355 uint handler::get_dup_key(int error)
4356 {
4357 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4358 m_lock_type != F_UNLCK);
4359 DBUG_ENTER("handler::get_dup_key");
4360 table->file->errkey = (uint) -1;
4361 if (error == HA_ERR_FOUND_DUPP_KEY ||
4362 error == HA_ERR_FOUND_DUPP_UNIQUE || error == HA_ERR_NULL_IN_SPATIAL ||
4363 error == HA_ERR_DROP_INDEX_FK)
4364 table->file->info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
4365 DBUG_RETURN(table->file->errkey);
4366 }
4367
4368
4369 /**
4370 Delete all files with extension from bas_ext().
4371
4372 @param name Base name of table
4373
4374 @note
4375 We assume that the handler may return more extensions than
4376 was actually used for the file.
4377
4378 @retval
4379 0 If we successfully deleted at least one file from base_ext and
4380 didn't get any other errors than ENOENT
4381 @retval
4382 !0 Error
4383 */
delete_table(const char * name)4384 int handler::delete_table(const char *name)
4385 {
4386 int saved_error= 0;
4387 int error= 0;
4388 int enoent_or_zero= ENOENT; // Error if no file was deleted
4389 char buff[FN_REFLEN];
4390 DBUG_ASSERT(m_lock_type == F_UNLCK);
4391
4392 for (const char **ext=bas_ext(); *ext ; ext++)
4393 {
4394 fn_format(buff, name, "", *ext, MY_UNPACK_FILENAME|MY_APPEND_EXT);
4395 if (mysql_file_delete_with_symlink(key_file_misc, buff, MYF(0)))
4396 {
4397 if (my_errno != ENOENT)
4398 {
4399 /*
4400 If error on the first existing file, return the error.
4401 Otherwise delete as much as possible.
4402 */
4403 if (enoent_or_zero)
4404 return my_errno;
4405 saved_error= my_errno;
4406 }
4407 }
4408 else
4409 enoent_or_zero= 0; // No error for ENOENT
4410 error= enoent_or_zero;
4411 }
4412 return saved_error ? saved_error : error;
4413 }
4414
4415
rename_table(const char * from,const char * to)4416 int handler::rename_table(const char * from, const char * to)
4417 {
4418 int error= 0;
4419 const char **ext, **start_ext;
4420 start_ext= bas_ext();
4421 for (ext= start_ext; *ext ; ext++)
4422 {
4423 if (rename_file_ext(from, to, *ext))
4424 {
4425 if ((error=my_errno) != ENOENT)
4426 break;
4427 error= 0;
4428 }
4429 }
4430 if (error)
4431 {
4432 /* Try to revert the rename. Ignore errors. */
4433 for (; ext >= start_ext; ext--)
4434 rename_file_ext(to, from, *ext);
4435 }
4436 return error;
4437 }
4438
4439
drop_table(const char * name)4440 void handler::drop_table(const char *name)
4441 {
4442 close();
4443 delete_table(name);
4444 }
4445
4446
4447 /**
4448 Performs checks upon the table.
4449
4450 @param thd thread doing CHECK TABLE operation
4451 @param check_opt options from the parser
4452
4453 @retval
4454 HA_ADMIN_OK Successful upgrade
4455 @retval
4456 HA_ADMIN_NEEDS_UPGRADE Table has structures requiring upgrade
4457 @retval
4458 HA_ADMIN_NEEDS_ALTER Table has structures requiring ALTER TABLE
4459 @retval
4460 HA_ADMIN_NOT_IMPLEMENTED
4461 */
ha_check(THD * thd,HA_CHECK_OPT * check_opt)4462 int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
4463 {
4464 int error;
4465 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4466 m_lock_type != F_UNLCK);
4467
4468 if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
4469 (check_opt->sql_flags & TT_FOR_UPGRADE))
4470 return 0;
4471
4472 if (table->s->mysql_version < MYSQL_VERSION_ID)
4473 {
4474 if ((error= check_old_types()))
4475 return error;
4476 error= ha_check_for_upgrade(check_opt);
4477 if (error && (error != HA_ADMIN_NEEDS_CHECK))
4478 return error;
4479 if (!error && (check_opt->sql_flags & TT_FOR_UPGRADE))
4480 return 0;
4481 }
4482 if ((error= check(thd, check_opt)))
4483 return error;
4484 /* Skip updating frm version if not main handler. */
4485 if (table->file != this)
4486 return error;
4487 return update_frm_version(table);
4488 }
4489
4490 /**
4491 A helper function to mark a transaction read-write,
4492 if it is started.
4493 */
4494
4495 inline
4496 void
mark_trx_read_write()4497 handler::mark_trx_read_write()
4498 {
4499 Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
4500 /*
4501 When a storage engine method is called, the transaction must
4502 have been started, unless it's a DDL call, for which the
4503 storage engine starts the transaction internally, and commits
4504 it internally, without registering in the ha_list.
4505 Unfortunately here we can't know know for sure if the engine
4506 has registered the transaction or not, so we must check.
4507 */
4508 if (ha_info->is_started())
4509 {
4510 DBUG_ASSERT(has_transactions());
4511 /*
4512 table_share can be NULL in ha_delete_table(). See implementation
4513 of standalone function ha_delete_table() in sql_base.cc.
4514 */
4515 if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
4516 ha_info->set_trx_read_write();
4517 }
4518 }
4519
4520
4521 /**
4522 Repair table: public interface.
4523
4524 @sa handler::repair()
4525 */
4526
ha_repair(THD * thd,HA_CHECK_OPT * check_opt)4527 int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
4528 {
4529 int result;
4530 mark_trx_read_write();
4531
4532 result= repair(thd, check_opt);
4533 DBUG_ASSERT(result == HA_ADMIN_NOT_IMPLEMENTED ||
4534 ha_table_flags() & HA_CAN_REPAIR);
4535
4536 if (result == HA_ADMIN_OK)
4537 result= update_frm_version(table);
4538 return result;
4539 }
4540
4541
4542 /**
4543 Start bulk insert.
4544
4545 Allow the handler to optimize for multiple row insert.
4546
4547 @param rows Estimated rows to insert
4548 */
4549
ha_start_bulk_insert(ha_rows rows)4550 void handler::ha_start_bulk_insert(ha_rows rows)
4551 {
4552 DBUG_ENTER("handler::ha_start_bulk_insert");
4553 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4554 m_lock_type == F_WRLCK);
4555 estimation_rows_to_insert= rows;
4556 start_bulk_insert(rows);
4557 DBUG_VOID_RETURN;
4558 }
4559
4560
4561 /**
4562 End bulk insert.
4563
4564 @return Operation status
4565 @retval 0 Success
4566 @retval != 0 Failure (error code returned)
4567 */
4568
ha_end_bulk_insert()4569 int handler::ha_end_bulk_insert()
4570 {
4571 DBUG_ENTER("handler::ha_end_bulk_insert");
4572 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4573 m_lock_type == F_WRLCK);
4574 estimation_rows_to_insert= 0;
4575 DBUG_RETURN(end_bulk_insert());
4576 }
4577
4578
4579 /**
4580 Bulk update row: public interface.
4581
4582 @sa handler::bulk_update_row()
4583 */
4584
4585 int
ha_bulk_update_row(const uchar * old_data,uchar * new_data,uint * dup_key_found)4586 handler::ha_bulk_update_row(const uchar *old_data, uchar *new_data,
4587 uint *dup_key_found)
4588 {
4589 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4590 m_lock_type == F_WRLCK);
4591 mark_trx_read_write();
4592
4593 return bulk_update_row(old_data, new_data, dup_key_found);
4594 }
4595
4596
4597 /**
4598 Delete all rows: public interface.
4599
4600 @sa handler::delete_all_rows()
4601 */
4602
4603 int
ha_delete_all_rows()4604 handler::ha_delete_all_rows()
4605 {
4606 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4607 m_lock_type == F_WRLCK);
4608 mark_trx_read_write();
4609
4610 return delete_all_rows();
4611 }
4612
4613
4614 /**
4615 Truncate table: public interface.
4616
4617 @sa handler::truncate()
4618 */
4619
4620 int
ha_truncate()4621 handler::ha_truncate()
4622 {
4623 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4624 m_lock_type == F_WRLCK);
4625 mark_trx_read_write();
4626
4627 return truncate();
4628 }
4629
4630
4631 /**
4632 Reset auto increment: public interface.
4633
4634 @sa handler::reset_auto_increment()
4635 */
4636
4637 int
ha_reset_auto_increment(ulonglong value)4638 handler::ha_reset_auto_increment(ulonglong value)
4639 {
4640 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4641 m_lock_type == F_WRLCK);
4642 mark_trx_read_write();
4643
4644 return reset_auto_increment(value);
4645 }
4646
4647
4648 /**
4649 Optimize table: public interface.
4650
4651 @sa handler::optimize()
4652 */
4653
4654 int
ha_optimize(THD * thd,HA_CHECK_OPT * check_opt)4655 handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
4656 {
4657 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4658 m_lock_type == F_WRLCK);
4659 mark_trx_read_write();
4660
4661 return optimize(thd, check_opt);
4662 }
4663
4664
4665 /**
4666 Analyze table: public interface.
4667
4668 @sa handler::analyze()
4669 */
4670
4671 int
ha_analyze(THD * thd,HA_CHECK_OPT * check_opt)4672 handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
4673 {
4674 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4675 m_lock_type != F_UNLCK);
4676 mark_trx_read_write();
4677
4678 return analyze(thd, check_opt);
4679 }
4680
4681
4682 /**
4683 Check and repair table: public interface.
4684
4685 @sa handler::check_and_repair()
4686 */
4687
4688 bool
ha_check_and_repair(THD * thd)4689 handler::ha_check_and_repair(THD *thd)
4690 {
4691 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4692 m_lock_type == F_UNLCK);
4693 mark_trx_read_write();
4694
4695 return check_and_repair(thd);
4696 }
4697
4698
4699 /**
4700 Disable indexes: public interface.
4701
4702 @sa handler::disable_indexes()
4703 */
4704
4705 int
ha_disable_indexes(uint mode)4706 handler::ha_disable_indexes(uint mode)
4707 {
4708 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4709 m_lock_type != F_UNLCK);
4710 mark_trx_read_write();
4711
4712 return disable_indexes(mode);
4713 }
4714
4715
4716 /**
4717 Enable indexes: public interface.
4718
4719 @sa handler::enable_indexes()
4720 */
4721
4722 int
ha_enable_indexes(uint mode)4723 handler::ha_enable_indexes(uint mode)
4724 {
4725 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4726 m_lock_type != F_UNLCK);
4727 mark_trx_read_write();
4728
4729 return enable_indexes(mode);
4730 }
4731
4732
4733 /**
4734 Discard or import tablespace: public interface.
4735
4736 @sa handler::discard_or_import_tablespace()
4737 */
4738
4739 int
ha_discard_or_import_tablespace(my_bool discard)4740 handler::ha_discard_or_import_tablespace(my_bool discard)
4741 {
4742 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4743 m_lock_type == F_WRLCK);
4744 mark_trx_read_write();
4745
4746 return discard_or_import_tablespace(discard);
4747 }
4748
4749
ha_prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4750 bool handler::ha_prepare_inplace_alter_table(TABLE *altered_table,
4751 Alter_inplace_info *ha_alter_info)
4752 {
4753 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4754 m_lock_type != F_UNLCK);
4755 mark_trx_read_write();
4756
4757 return prepare_inplace_alter_table(altered_table, ha_alter_info);
4758 }
4759
4760
ha_commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit)4761 bool handler::ha_commit_inplace_alter_table(TABLE *altered_table,
4762 Alter_inplace_info *ha_alter_info,
4763 bool commit)
4764 {
4765 /*
4766 At this point we should have an exclusive metadata lock on the table.
4767 The exception is if we're about to roll back changes (commit= false).
4768 In this case, we might be rolling back after a failed lock upgrade,
4769 so we could be holding the same lock level as for inplace_alter_table().
4770 TABLE::mdl_ticket is 0 for temporary tables.
4771 */
4772 DBUG_ASSERT((table->s->tmp_table != NO_TMP_TABLE && !table->mdl_ticket) ||
4773 (ha_thd()->mdl_context.is_lock_owner(MDL_key::TABLE,
4774 table->s->db.str,
4775 table->s->table_name.str,
4776 MDL_EXCLUSIVE) ||
4777 !commit));
4778
4779 return commit_inplace_alter_table(altered_table, ha_alter_info, commit);
4780 }
4781
4782
4783 /*
4784 Default implementation to support in-place alter table
4785 and old online add/drop index API
4786 */
4787
4788 enum_alter_inplace_result
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4789 handler::check_if_supported_inplace_alter(TABLE *altered_table,
4790 Alter_inplace_info *ha_alter_info)
4791 {
4792 DBUG_ENTER("check_if_supported_alter");
4793
4794 HA_CREATE_INFO *create_info= ha_alter_info->create_info;
4795
4796 Alter_inplace_info::HA_ALTER_FLAGS inplace_offline_operations=
4797 Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH |
4798 Alter_inplace_info::ALTER_COLUMN_NAME |
4799 Alter_inplace_info::ALTER_COLUMN_DEFAULT |
4800 Alter_inplace_info::CHANGE_CREATE_OPTION |
4801 Alter_inplace_info::ALTER_RENAME;
4802
4803 /* Is there at least one operation that requires copy algorithm? */
4804 if (ha_alter_info->handler_flags & ~inplace_offline_operations)
4805 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4806
4807 /*
4808 ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. and
4809 ALTER TABLE table_name DEFAULT CHARSET = .. most likely
4810 change column charsets and so not supported in-place through
4811 old API.
4812
4813 Changing of PACK_KEYS, MAX_ROWS and ROW_FORMAT options were
4814 not supported as in-place operations in old API either.
4815 */
4816 if (create_info->used_fields & (HA_CREATE_USED_CHARSET |
4817 HA_CREATE_USED_DEFAULT_CHARSET |
4818 HA_CREATE_USED_PACK_KEYS |
4819 HA_CREATE_USED_MAX_ROWS) ||
4820 (table->s->row_type != create_info->row_type))
4821 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4822
4823 uint table_changes= (ha_alter_info->handler_flags &
4824 Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH) ?
4825 IS_EQUAL_PACK_LENGTH : IS_EQUAL_YES;
4826 if (table->file->check_if_incompatible_data(create_info, table_changes)
4827 == COMPATIBLE_DATA_YES)
4828 DBUG_RETURN(HA_ALTER_INPLACE_EXCLUSIVE_LOCK);
4829
4830 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4831 }
4832
4833
4834 /*
4835 Default implementation to support in-place alter table
4836 and old online add/drop index API
4837 */
4838
notify_table_changed()4839 void handler::notify_table_changed()
4840 {
4841 ha_create_handler_files(table->s->path.str, NULL, CHF_INDEX_FLAG, NULL);
4842 }
4843
4844
report_unsupported_error(const char * not_supported,const char * try_instead)4845 void Alter_inplace_info::report_unsupported_error(const char *not_supported,
4846 const char *try_instead)
4847 {
4848 if (unsupported_reason == NULL)
4849 my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0),
4850 not_supported, try_instead);
4851 else
4852 my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0),
4853 not_supported, unsupported_reason, try_instead);
4854 }
4855
4856
4857 /**
4858 Rename table: public interface.
4859
4860 @sa handler::rename_table()
4861 */
4862
4863 int
ha_rename_table(const char * from,const char * to)4864 handler::ha_rename_table(const char *from, const char *to)
4865 {
4866 DBUG_ASSERT(m_lock_type == F_UNLCK);
4867 mark_trx_read_write();
4868
4869 return rename_table(from, to);
4870 }
4871
4872
4873 /**
4874 Delete table: public interface.
4875
4876 @sa handler::delete_table()
4877 */
4878
4879 int
ha_delete_table(const char * name)4880 handler::ha_delete_table(const char *name)
4881 {
4882 DBUG_ASSERT(m_lock_type == F_UNLCK);
4883 mark_trx_read_write();
4884
4885 return delete_table(name);
4886 }
4887
4888
4889 /**
4890 Drop table in the engine: public interface.
4891
4892 @sa handler::drop_table()
4893 */
4894
4895 void
ha_drop_table(const char * name)4896 handler::ha_drop_table(const char *name)
4897 {
4898 DBUG_ASSERT(m_lock_type == F_UNLCK);
4899 mark_trx_read_write();
4900
4901 return drop_table(name);
4902 }
4903
4904
4905 /**
4906 Create a table in the engine: public interface.
4907
4908 @sa handler::create()
4909 */
4910
4911 int
ha_create(const char * name,TABLE * form,HA_CREATE_INFO * info)4912 handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info)
4913 {
4914 DBUG_ASSERT(m_lock_type == F_UNLCK);
4915 mark_trx_read_write();
4916
4917 return create(name, form, info);
4918 }
4919
4920
4921 /**
4922 Create handler files for CREATE TABLE: public interface.
4923
4924 @sa handler::create_handler_files()
4925 */
4926
4927 int
ha_create_handler_files(const char * name,const char * old_name,int action_flag,HA_CREATE_INFO * info)4928 handler::ha_create_handler_files(const char *name, const char *old_name,
4929 int action_flag, HA_CREATE_INFO *info)
4930 {
4931 /*
4932 Normally this is done when unlocked, but in fast_alter_partition_table,
4933 it is done on an already locked handler when preparing to alter/rename
4934 partitions.
4935 */
4936 DBUG_ASSERT(m_lock_type == F_UNLCK ||
4937 (!old_name && strcmp(name, table_share->path.str)));
4938 mark_trx_read_write();
4939
4940 return create_handler_files(name, old_name, action_flag, info);
4941 }
4942
4943
4944 /**
4945 Change partitions: public interface.
4946
4947 @sa handler::change_partitions()
4948 */
4949
4950 int
ha_change_partitions(HA_CREATE_INFO * create_info,const char * path,ulonglong * const copied,ulonglong * const deleted,const uchar * pack_frm_data,size_t pack_frm_len)4951 handler::ha_change_partitions(HA_CREATE_INFO *create_info,
4952 const char *path,
4953 ulonglong * const copied,
4954 ulonglong * const deleted,
4955 const uchar *pack_frm_data,
4956 size_t pack_frm_len)
4957 {
4958 /*
4959 Must have at least RDLCK or be a TMP table. Read lock is needed to read
4960 from current partitions and write lock will be taken on new partitions.
4961 */
4962 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4963 m_lock_type != F_UNLCK);
4964 mark_trx_read_write();
4965
4966 return change_partitions(create_info, path, copied, deleted,
4967 pack_frm_data, pack_frm_len);
4968 }
4969
4970
4971 /**
4972 Drop partitions: public interface.
4973
4974 @sa handler::drop_partitions()
4975 */
4976
4977 int
ha_drop_partitions(const char * path)4978 handler::ha_drop_partitions(const char *path)
4979 {
4980 DBUG_ASSERT(!table->db_stat);
4981
4982 mark_trx_read_write();
4983
4984 return drop_partitions(path);
4985 }
4986
4987
4988 /**
4989 Rename partitions: public interface.
4990
4991 @sa handler::rename_partitions()
4992 */
4993
4994 int
ha_rename_partitions(const char * path)4995 handler::ha_rename_partitions(const char *path)
4996 {
4997 DBUG_ASSERT(!table->db_stat);
4998 mark_trx_read_write();
4999
5000 return rename_partitions(path);
5001 }
5002
5003
5004 /**
5005 Tell the storage engine that it is allowed to "disable transaction" in the
5006 handler. It is a hint that ACID is not required - it is used in NDB for
5007 ALTER TABLE, for example, when data are copied to temporary table.
5008 A storage engine may treat this hint any way it likes. NDB for example
5009 starts to commit every now and then automatically.
5010 This hint can be safely ignored.
5011 */
ha_enable_transaction(THD * thd,bool on)5012 int ha_enable_transaction(THD *thd, bool on)
5013 {
5014 int error=0;
5015 DBUG_ENTER("ha_enable_transaction");
5016 DBUG_PRINT("enter", ("on: %d", (int) on));
5017
5018 if ((thd->transaction.flags.enabled= on))
5019 {
5020 /*
5021 Now all storage engines should have transaction handling enabled.
5022 But some may have it enabled all the time - "disabling" transactions
5023 is an optimization hint that storage engine is free to ignore.
5024 So, let's commit an open transaction (if any) now.
5025 */
5026 if (!(error= ha_commit_trans(thd, 0)))
5027 error= trans_commit_implicit(thd);
5028 }
5029 DBUG_RETURN(error);
5030 }
5031
index_next_same(uchar * buf,const uchar * key,uint keylen)5032 int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
5033 {
5034 int error;
5035 DBUG_ENTER("index_next_same");
5036 if (!(error=index_next(buf)))
5037 {
5038 my_ptrdiff_t ptrdiff= buf - table->record[0];
5039 uchar *UNINIT_VAR(save_record_0);
5040 KEY *UNINIT_VAR(key_info);
5041 KEY_PART_INFO *UNINIT_VAR(key_part);
5042 KEY_PART_INFO *UNINIT_VAR(key_part_end);
5043
5044 /*
5045 key_cmp_if_same() compares table->record[0] against 'key'.
5046 In parts it uses table->record[0] directly, in parts it uses
5047 field objects with their local pointers into table->record[0].
5048 If 'buf' is distinct from table->record[0], we need to move
5049 all record references. This is table->record[0] itself and
5050 the field pointers of the fields used in this key.
5051 */
5052 if (ptrdiff)
5053 {
5054 save_record_0= table->record[0];
5055 table->record[0]= buf;
5056 key_info= table->key_info + active_index;
5057 key_part= key_info->key_part;
5058 key_part_end= key_part + key_info->user_defined_key_parts;
5059 for (; key_part < key_part_end; key_part++)
5060 {
5061 DBUG_ASSERT(key_part->field);
5062 key_part->field->move_field_offset(ptrdiff);
5063 }
5064 }
5065
5066 if (key_cmp_if_same(table, key, active_index, keylen))
5067 {
5068 table->status=STATUS_NOT_FOUND;
5069 error=HA_ERR_END_OF_FILE;
5070 }
5071
5072 /* Move back if necessary. */
5073 if (ptrdiff)
5074 {
5075 table->record[0]= save_record_0;
5076 for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
5077 key_part->field->move_field_offset(-ptrdiff);
5078 }
5079 }
5080 DBUG_RETURN(error);
5081 }
5082
5083
get_dynamic_partition_info(PARTITION_STATS * stat_info,uint part_id)5084 void handler::get_dynamic_partition_info(PARTITION_STATS *stat_info,
5085 uint part_id)
5086 {
5087 info(HA_STATUS_CONST | HA_STATUS_TIME | HA_STATUS_VARIABLE |
5088 HA_STATUS_NO_LOCK);
5089 stat_info->records= stats.records;
5090 stat_info->mean_rec_length= stats.mean_rec_length;
5091 stat_info->data_file_length= stats.data_file_length;
5092 stat_info->max_data_file_length= stats.max_data_file_length;
5093 stat_info->index_file_length= stats.index_file_length;
5094 stat_info->delete_length= stats.delete_length;
5095 stat_info->create_time= stats.create_time;
5096 stat_info->update_time= stats.update_time;
5097 stat_info->check_time= stats.check_time;
5098 stat_info->check_sum= 0;
5099 if (table_flags() & (ulong) HA_HAS_CHECKSUM)
5100 stat_info->check_sum= checksum();
5101 return;
5102 }
5103
5104 // Updates the global table stats with the TABLE this handler represents.
update_global_table_stats()5105 void handler::update_global_table_stats()
5106 {
5107 if (!rows_read && !rows_changed)
5108 return; // Nothing to update.
5109 // table_cache_key is db_name + '\0' + table_name + '\0'.
5110 if (!table->s || !table->s->table_cache_key.str || !table->s->table_name.str)
5111 return;
5112
5113 TABLE_STATS* table_stats;
5114 char key[NAME_LEN * 2 + 2];
5115 // [db] + '.' + [table]
5116 sprintf(key, "%s.%s", table->s->table_cache_key.str, table->s->table_name.str);
5117
5118 mysql_mutex_lock(&LOCK_global_table_stats);
5119 // Gets the global table stats, creating one if necessary.
5120 if (!(table_stats = (TABLE_STATS *) my_hash_search(&global_table_stats,
5121 (uchar*)key,
5122 strlen(key))))
5123 {
5124 if (!(table_stats = ((TABLE_STATS *)
5125 my_malloc(sizeof(TABLE_STATS), MYF(MY_WME | MY_ZEROFILL)))))
5126 {
5127 // Out of memory.
5128 sql_print_error("Allocating table stats failed.");
5129 goto end;
5130 }
5131 strncpy(table_stats->table, key, sizeof(table_stats->table));
5132 table_stats->table_len= strlen(table_stats->table);
5133 table_stats->rows_read= 0;
5134 table_stats->rows_changed= 0;
5135 table_stats->rows_changed_x_indexes= 0;
5136 table_stats->engine_type= (int) ht->db_type;
5137
5138 if (my_hash_insert(&global_table_stats, (uchar *) table_stats))
5139 {
5140 // Out of memory.
5141 sql_print_error("Inserting table stats failed.");
5142 my_free((char *) table_stats);
5143 goto end;
5144 }
5145 }
5146 // Updates the global table stats.
5147 table_stats->rows_read+= rows_read;
5148 table_stats->rows_changed+= rows_changed;
5149 table_stats->rows_changed_x_indexes+=
5150 rows_changed * (table->s->keys ? table->s->keys : 1);
5151 ha_thd()->diff_total_read_rows+= rows_read;
5152 rows_read= rows_changed= 0;
5153 end:
5154 mysql_mutex_unlock(&LOCK_global_table_stats);
5155 }
5156
5157 // Updates the global index stats with this handler's accumulated index reads.
update_global_index_stats()5158 void handler::update_global_index_stats()
5159 {
5160 // table_cache_key is db_name + '\0' + table_name + '\0'.
5161 if (!table || !table->s || !table->s->table_cache_key.str ||
5162 !table->s->table_name.str)
5163 return;
5164
5165 for (uint x = 0; x < table->s->keys; ++x)
5166 {
5167 if (index_rows_read[x])
5168 {
5169 // Rows were read using this index.
5170 KEY* key_info = &table->key_info[x];
5171
5172 if (!key_info->name) continue;
5173
5174 INDEX_STATS* index_stats;
5175 char key[NAME_LEN * 3 + 3];
5176 // [db] + '.' + [table] + '.' + [index]
5177 sprintf(key, "%s.%s.%s", table->s->table_cache_key.str,
5178 table->s->table_name.str, key_info->name);
5179
5180 mysql_mutex_lock(&LOCK_global_index_stats);
5181 // Gets the global index stats, creating one if necessary.
5182 if (!(index_stats = (INDEX_STATS *) my_hash_search(&global_index_stats,
5183 (uchar *) key,
5184 strlen(key))))
5185 {
5186 if (!(index_stats = ((INDEX_STATS *)
5187 my_malloc(sizeof(INDEX_STATS), MYF(MY_WME | MY_ZEROFILL)))))
5188 {
5189 // Out of memory.
5190 sql_print_error("Allocating index stats failed.");
5191 goto end;
5192 }
5193 strncpy(index_stats->index, key, sizeof(index_stats->index));
5194 index_stats->index_len= strlen(index_stats->index);
5195 index_stats->rows_read= 0;
5196
5197 if (my_hash_insert(&global_index_stats, (uchar *) index_stats))
5198 {
5199 // Out of memory.
5200 sql_print_error("Inserting index stats failed.");
5201 my_free((char *) index_stats);
5202 goto end;
5203 }
5204 }
5205 // Updates the global index stats.
5206 index_stats->rows_read+= index_rows_read[x];
5207 index_rows_read[x]= 0;
5208 end:
5209 mysql_mutex_unlock(&LOCK_global_index_stats);
5210 }
5211 }
5212 }
5213
5214 /****************************************************************************
5215 ** Some general functions that isn't in the handler class
5216 ****************************************************************************/
5217
5218 /**
5219 Initiates table-file and calls appropriate database-creator.
5220
5221 @retval
5222 0 ok
5223 @retval
5224 1 error
5225 */
ha_create_table(THD * thd,const char * path,const char * db,const char * table_name,HA_CREATE_INFO * create_info,const List<Create_field> * create_fields,bool update_create_info,bool is_temp_table)5226 int ha_create_table(THD *thd, const char *path,
5227 const char *db, const char *table_name,
5228 HA_CREATE_INFO *create_info,
5229 const List<Create_field> *create_fields,
5230 bool update_create_info,
5231 bool is_temp_table)
5232 {
5233 int error= 1;
5234 TABLE table;
5235 char name_buff[FN_REFLEN];
5236 const char *name;
5237 TABLE_SHARE share;
5238 bool saved_abort_on_warning;
5239 #ifdef HAVE_PSI_TABLE_INTERFACE
5240 bool temp_table = is_temp_table ||
5241 (create_info->options & HA_LEX_CREATE_TMP_TABLE) ||
5242 (strstr(path, tmp_file_prefix) != NULL);
5243 #endif
5244 DBUG_ENTER("ha_create_table");
5245
5246 init_tmp_table_share(thd, &share, db, 0, table_name, path);
5247 if (open_table_def(thd, &share, 0))
5248 goto err;
5249
5250 #ifdef HAVE_PSI_TABLE_INTERFACE
5251 share.m_psi= PSI_TABLE_CALL(get_table_share)(temp_table, &share);
5252 #endif
5253
5254 if (open_table_from_share(thd, &share, "", 0, (uint) READ_ALL, 0, &table,
5255 TRUE))
5256 {
5257 #ifdef HAVE_PSI_TABLE_INTERFACE
5258 PSI_TABLE_CALL(drop_table_share)
5259 (temp_table, db, strlen(db), table_name, strlen(table_name));
5260 #endif
5261 goto err;
5262 }
5263
5264 if (update_create_info)
5265 update_create_info_from_table(create_info, &table);
5266
5267 /*
5268 Updating field definitions in 'table' with zip_dict_name values
5269 from 'create_fields'
5270 */
5271 if (create_fields != 0)
5272 {
5273 table.update_compressed_columns_info(*create_fields);
5274 }
5275
5276 name= get_canonical_filename(table.file, share.path.str, name_buff);
5277
5278 saved_abort_on_warning = thd->abort_on_warning;
5279 thd->abort_on_warning = false;
5280 error= table.file->ha_create(name, &table, create_info);
5281 thd->abort_on_warning = saved_abort_on_warning;
5282 if (error)
5283 {
5284 table.file->print_error(error, MYF(0));
5285 #ifdef HAVE_PSI_TABLE_INTERFACE
5286 PSI_TABLE_CALL(drop_table_share)
5287 (temp_table, db, strlen(db), table_name, strlen(table_name));
5288 #endif
5289 }
5290 (void) closefrm(&table, 0);
5291 err:
5292 free_table_share(&share);
5293 DBUG_RETURN(error != 0);
5294 }
5295
5296 /**
5297 Try to discover table from engine.
5298
5299 @note
5300 If found, write the frm file to disk.
5301
5302 @retval
5303 -1 Table did not exists
5304 @retval
5305 0 Table created ok
5306 @retval
5307 > 0 Error, table existed but could not be created
5308 */
ha_create_table_from_engine(THD * thd,const char * db,const char * name)5309 int ha_create_table_from_engine(THD* thd, const char *db, const char *name)
5310 {
5311 int error;
5312 uchar *frmblob;
5313 size_t frmlen;
5314 char path[FN_REFLEN + 1];
5315 HA_CREATE_INFO create_info;
5316 TABLE table;
5317 TABLE_SHARE share;
5318 DBUG_ENTER("ha_create_table_from_engine");
5319 DBUG_PRINT("enter", ("name '%s'.'%s'", db, name));
5320
5321 memset(static_cast<void*>(&create_info), 0, sizeof(create_info));
5322 if ((error= ha_discover(thd, db, name, &frmblob, &frmlen)))
5323 {
5324 /* Table could not be discovered and thus not created */
5325 DBUG_RETURN(error);
5326 }
5327
5328 /*
5329 Table exists in handler and could be discovered
5330 frmblob and frmlen are set, write the frm to disk
5331 */
5332
5333 build_table_filename(path, sizeof(path) - 1, db, name, "", 0);
5334 // Save the frm file
5335 error= writefrm(path, frmblob, frmlen);
5336 my_free(frmblob);
5337 if (error)
5338 DBUG_RETURN(2);
5339
5340 init_tmp_table_share(thd, &share, db, 0, name, path);
5341 if (open_table_def(thd, &share, 0))
5342 {
5343 DBUG_RETURN(3);
5344 }
5345
5346 #ifdef HAVE_PSI_TABLE_INTERFACE
5347 /*
5348 Table discovery is not instrumented.
5349 Once discovered, the table will be opened normally,
5350 and instrumented normally.
5351 */
5352 #endif
5353
5354 if (open_table_from_share(thd, &share, "" ,0, 0, 0, &table, FALSE))
5355 {
5356 free_table_share(&share);
5357 DBUG_RETURN(3);
5358 }
5359
5360 update_create_info_from_table(&create_info, &table);
5361 create_info.table_options|= HA_OPTION_CREATE_FROM_ENGINE;
5362
5363 get_canonical_filename(table.file, path, path);
5364 error=table.file->ha_create(path, &table, &create_info);
5365 (void) closefrm(&table, 1);
5366
5367 DBUG_RETURN(error != 0);
5368 }
5369
5370
5371 /**
5372 Try to find a table in a storage engine.
5373
5374 @param db Normalized table schema name
5375 @param name Normalized table name.
5376 @param[out] exists Only valid if the function succeeded.
5377
5378 @retval TRUE An error is found
5379 @retval FALSE Success, check *exists
5380 */
5381
5382 bool
ha_check_if_table_exists(THD * thd,const char * db,const char * name,bool * exists)5383 ha_check_if_table_exists(THD* thd, const char *db, const char *name,
5384 bool *exists)
5385 {
5386 uchar *frmblob= NULL;
5387 size_t frmlen;
5388 DBUG_ENTER("ha_check_if_table_exists");
5389
5390 *exists= ! ha_discover(thd, db, name, &frmblob, &frmlen);
5391 if (*exists)
5392 my_free(frmblob);
5393
5394 DBUG_RETURN(FALSE);
5395 }
5396
5397 /**
5398 @brief Check if a given table is a system table.
5399
5400 @details The primary purpose of introducing this function is to stop system
5401 tables to be created or being moved to undesired storage engines.
5402
5403 @todo There is another function called is_system_table_name() used by
5404 get_table_category(), which is used to set TABLE_SHARE table_category.
5405 It checks only a subset of table name like proc, event and time*.
5406 We cannot use below function in get_table_category(),
5407 as that affects locking mechanism. If we need to
5408 unify these functions, we need to fix locking issues generated.
5409
5410 @param hton Handlerton of new engine.
5411 @param db Database name.
5412 @param table_name Table name to be checked.
5413
5414 @return Operation status
5415 @retval true If the table name is a valid system table
5416 or if its a valid user table.
5417
5418 @retval false If the table name is a system table name
5419 and does not belong to engine specified
5420 in the command.
5421 */
ha_check_if_supported_system_table(handlerton * hton,const char * db,const char * table_name)5422 bool ha_check_if_supported_system_table(handlerton *hton, const char *db,
5423 const char *table_name)
5424 {
5425 DBUG_ENTER("ha_check_if_supported_system_table");
5426 st_sys_tbl_chk_params check_params;
5427 bool is_system_database= false;
5428 const char **names;
5429 st_system_tablename *systab;
5430
5431 // Check if we have a system database name in the command.
5432 DBUG_ASSERT(known_system_databases != NULL);
5433 names= known_system_databases;
5434 while (names && *names)
5435 {
5436 if (strcmp(*names, db) == 0)
5437 {
5438 /* Used to compare later, will be faster */
5439 check_params.db= *names;
5440 is_system_database= true;
5441 break;
5442 }
5443 names++;
5444 }
5445 if (!is_system_database)
5446 DBUG_RETURN(true); // It's a user table name.
5447
5448 // Check if this is SQL layer system tables.
5449 systab= mysqld_system_tables;
5450 check_params.is_sql_layer_system_table= false;
5451 while (systab && systab->db)
5452 {
5453 if (systab->db == check_params.db &&
5454 strcmp(systab->tablename, table_name) == 0)
5455 {
5456 check_params.is_sql_layer_system_table= true;
5457 break;
5458 }
5459 systab++;
5460 }
5461
5462 // Check if this is a system table and if some engine supports it.
5463 check_params.status= check_params.is_sql_layer_system_table ?
5464 st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE :
5465 st_sys_tbl_chk_params::NOT_KNOWN_SYSTEM_TABLE;
5466 check_params.db_type= hton->db_type;
5467 check_params.table_name= table_name;
5468 plugin_foreach(NULL, check_engine_system_table_handlerton,
5469 MYSQL_STORAGE_ENGINE_PLUGIN, &check_params);
5470
5471 // SE does not support this system table.
5472 if (check_params.status == st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE)
5473 DBUG_RETURN(false);
5474
5475 // It's a system table or a valid user table.
5476 DBUG_RETURN(true);
5477 }
5478
5479 /**
5480 @brief Called for each SE to check if given db, tablename is a system table.
5481
5482 @details The primary purpose of introducing this function is to stop system
5483 tables to be created or being moved to undesired storage engines.
5484
5485 @param unused unused THD*
5486 @param plugin Points to specific SE.
5487 @param arg Is of type struct st_sys_tbl_chk_params.
5488
5489 @note
5490 args->status Indicates OUT param,
5491 see struct st_sys_tbl_chk_params definition for more info.
5492
5493 @return Operation status
5494 @retval true There was a match found.
5495 This will stop doing checks with other SE's.
5496
5497 @retval false There was no match found.
5498 Other SE's will be checked to find a match.
5499 */
check_engine_system_table_handlerton(THD * unused,plugin_ref plugin,void * arg)5500 static my_bool check_engine_system_table_handlerton(THD *unused,
5501 plugin_ref plugin,
5502 void *arg)
5503 {
5504 st_sys_tbl_chk_params *check_params= (st_sys_tbl_chk_params*) arg;
5505 handlerton *hton= plugin_data(plugin, handlerton *);
5506
5507 // Do we already know that the table is a system table?
5508 if (check_params->status == st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE)
5509 {
5510 /*
5511 If this is the same SE specified in the command, we can
5512 simply ask the SE if it supports it stop the search regardless.
5513 */
5514 if (hton->db_type == check_params->db_type)
5515 {
5516 if (hton->is_supported_system_table &&
5517 hton->is_supported_system_table(check_params->db,
5518 check_params->table_name,
5519 check_params->is_sql_layer_system_table))
5520 check_params->status= st_sys_tbl_chk_params::SUPPORTED_SYSTEM_TABLE;
5521 return TRUE;
5522 }
5523 /*
5524 If this is a different SE, there is no point in asking the SE
5525 since we already know it's a system table and we don't care
5526 if it is supported or not.
5527 */
5528 return FALSE;
5529 }
5530
5531 /*
5532 We don't yet know if the table is a system table or not.
5533 We therefore must always ask the SE.
5534 */
5535 if (hton->is_supported_system_table &&
5536 hton->is_supported_system_table(check_params->db,
5537 check_params->table_name,
5538 check_params->is_sql_layer_system_table))
5539 {
5540 /*
5541 If this is the same SE specified in the command, we know it's a
5542 supported system table and can stop the search.
5543 */
5544 if (hton->db_type == check_params->db_type)
5545 {
5546 check_params->status= st_sys_tbl_chk_params::SUPPORTED_SYSTEM_TABLE;
5547 return TRUE;
5548 }
5549 else
5550 check_params->status= st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE;
5551 }
5552
5553 return FALSE;
5554 }
5555
5556 /*
5557 Prepare list of all known system database names
5558 current we just have 'mysql' as system database name.
5559
5560 Later ndbcluster, innodb SE's can define some new database
5561 name which can store system tables specific to SE.
5562 */
ha_known_system_databases(void)5563 const char** ha_known_system_databases(void)
5564 {
5565 list<const char*> found_databases;
5566 const char **databases, **database;
5567
5568 // Get mysqld system database name.
5569 found_databases.push_back((char*) mysqld_system_database);
5570
5571 // Get system database names from every specific storage engine.
5572 plugin_foreach(NULL, system_databases_handlerton,
5573 MYSQL_STORAGE_ENGINE_PLUGIN, &found_databases);
5574
5575 databases= (const char **) my_once_alloc(sizeof(char *)*
5576 (found_databases.size()+1),
5577 MYF(MY_WME | MY_FAE));
5578 DBUG_ASSERT(databases != NULL);
5579
5580 list<const char*>::iterator it;
5581 database= databases;
5582 for (it= found_databases.begin(); it != found_databases.end(); it++)
5583 *database++= *it;
5584 *database= 0; // Last element.
5585
5586 return databases;
5587 }
5588
5589 /**
5590 @brief Fetch system database name specific to SE.
5591
5592 @details This function is invoked by plugin_foreach() from
5593 ha_known_system_databases(), for each storage engine.
5594 */
system_databases_handlerton(THD * unused,plugin_ref plugin,void * arg)5595 static my_bool system_databases_handlerton(THD *unused, plugin_ref plugin,
5596 void *arg)
5597 {
5598 list<const char*> *found_databases= (list<const char*> *) arg;
5599 const char *db;
5600
5601 handlerton *hton= plugin_data(plugin, handlerton *);
5602 if (hton->system_database)
5603 {
5604 db= hton->system_database();
5605 if (db)
5606 found_databases->push_back(db);
5607 }
5608
5609 return FALSE;
5610 }
5611
init()5612 void st_ha_check_opt::init()
5613 {
5614 flags= sql_flags= 0;
5615 }
5616
5617
5618 /*****************************************************************************
5619 Key cache handling.
5620
5621 This code is only relevant for ISAM/MyISAM tables
5622
5623 key_cache->cache may be 0 only in the case where a key cache is not
5624 initialized or when we where not able to init the key cache in a previous
5625 call to ha_init_key_cache() (probably out of memory)
5626 *****************************************************************************/
5627
5628 /**
5629 Init a key cache if it has not been initied before.
5630 */
ha_init_key_cache(const char * name,KEY_CACHE * key_cache)5631 int ha_init_key_cache(const char *name, KEY_CACHE *key_cache)
5632 {
5633 DBUG_ENTER("ha_init_key_cache");
5634
5635 if (!key_cache->key_cache_inited)
5636 {
5637 mysql_mutex_lock(&LOCK_global_system_variables);
5638 size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5639 uint tmp_block_size= (uint) key_cache->param_block_size;
5640 uint division_limit= key_cache->param_division_limit;
5641 uint age_threshold= key_cache->param_age_threshold;
5642 mysql_mutex_unlock(&LOCK_global_system_variables);
5643 DBUG_RETURN(!init_key_cache(key_cache,
5644 tmp_block_size,
5645 tmp_buff_size,
5646 division_limit, age_threshold));
5647 }
5648 DBUG_RETURN(0);
5649 }
5650
5651
5652 /**
5653 Resize key cache.
5654 */
ha_resize_key_cache(KEY_CACHE * key_cache)5655 int ha_resize_key_cache(KEY_CACHE *key_cache)
5656 {
5657 DBUG_ENTER("ha_resize_key_cache");
5658
5659 if (key_cache->key_cache_inited)
5660 {
5661 mysql_mutex_lock(&LOCK_global_system_variables);
5662 size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5663 long tmp_block_size= (long) key_cache->param_block_size;
5664 uint division_limit= key_cache->param_division_limit;
5665 uint age_threshold= key_cache->param_age_threshold;
5666 mysql_mutex_unlock(&LOCK_global_system_variables);
5667 DBUG_RETURN(!resize_key_cache(key_cache, tmp_block_size,
5668 tmp_buff_size,
5669 division_limit, age_threshold));
5670 }
5671 DBUG_RETURN(0);
5672 }
5673
5674
5675 /**
5676 Change parameters for key cache (like size)
5677 */
ha_change_key_cache_param(KEY_CACHE * key_cache)5678 int ha_change_key_cache_param(KEY_CACHE *key_cache)
5679 {
5680 if (key_cache->key_cache_inited)
5681 {
5682 mysql_mutex_lock(&LOCK_global_system_variables);
5683 uint division_limit= key_cache->param_division_limit;
5684 uint age_threshold= key_cache->param_age_threshold;
5685 mysql_mutex_unlock(&LOCK_global_system_variables);
5686 change_key_cache_param(key_cache, division_limit, age_threshold);
5687 }
5688 return 0;
5689 }
5690
5691 /**
5692 Move all tables from one key cache to another one.
5693 */
ha_change_key_cache(KEY_CACHE * old_key_cache,KEY_CACHE * new_key_cache)5694 int ha_change_key_cache(KEY_CACHE *old_key_cache,
5695 KEY_CACHE *new_key_cache)
5696 {
5697 mi_change_key_cache(old_key_cache, new_key_cache);
5698 return 0;
5699 }
5700
5701
5702 /**
5703 Try to discover one table from handler(s).
5704
5705 @retval
5706 -1 Table did not exists
5707 @retval
5708 0 OK. In this case *frmblob and *frmlen are set
5709 @retval
5710 >0 error. frmblob and frmlen may not be set
5711 */
5712 struct st_discover_args
5713 {
5714 const char *db;
5715 const char *name;
5716 uchar **frmblob;
5717 size_t *frmlen;
5718 };
5719
discover_handlerton(THD * thd,plugin_ref plugin,void * arg)5720 static my_bool discover_handlerton(THD *thd, plugin_ref plugin,
5721 void *arg)
5722 {
5723 st_discover_args *vargs= (st_discover_args *)arg;
5724 handlerton *hton= plugin_data(plugin, handlerton *);
5725 if (hton->state == SHOW_OPTION_YES && hton->discover &&
5726 (!(hton->discover(hton, thd, vargs->db, vargs->name,
5727 vargs->frmblob,
5728 vargs->frmlen))))
5729 return TRUE;
5730
5731 return FALSE;
5732 }
5733
ha_discover(THD * thd,const char * db,const char * name,uchar ** frmblob,size_t * frmlen)5734 int ha_discover(THD *thd, const char *db, const char *name,
5735 uchar **frmblob, size_t *frmlen)
5736 {
5737 int error= -1; // Table does not exist in any handler
5738 DBUG_ENTER("ha_discover");
5739 DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5740 st_discover_args args= {db, name, frmblob, frmlen};
5741
5742 if (is_prefix(name,tmp_file_prefix)) /* skip temporary tables */
5743 DBUG_RETURN(error);
5744
5745 if (plugin_foreach(thd, discover_handlerton,
5746 MYSQL_STORAGE_ENGINE_PLUGIN, &args))
5747 error= 0;
5748
5749 if (!error)
5750 {
5751 DBUG_ASSERT(!thd->status_var_aggregated);
5752 status_var_increment(thd->status_var.ha_discover_count);
5753 }
5754 DBUG_RETURN(error);
5755 }
5756
5757
5758 /**
5759 Call this function in order to give the handler the possiblity
5760 to ask engine if there are any new tables that should be written to disk
5761 or any dropped tables that need to be removed from disk
5762 */
5763 struct st_find_files_args
5764 {
5765 const char *db;
5766 const char *path;
5767 const char *wild;
5768 bool dir;
5769 List<LEX_STRING> *files;
5770 };
5771
find_files_handlerton(THD * thd,plugin_ref plugin,void * arg)5772 static my_bool find_files_handlerton(THD *thd, plugin_ref plugin,
5773 void *arg)
5774 {
5775 st_find_files_args *vargs= (st_find_files_args *)arg;
5776 handlerton *hton= plugin_data(plugin, handlerton *);
5777
5778
5779 if (hton->state == SHOW_OPTION_YES && hton->find_files)
5780 if (hton->find_files(hton, thd, vargs->db, vargs->path, vargs->wild,
5781 vargs->dir, vargs->files))
5782 return TRUE;
5783
5784 return FALSE;
5785 }
5786
5787 int
ha_find_files(THD * thd,const char * db,const char * path,const char * wild,bool dir,List<LEX_STRING> * files)5788 ha_find_files(THD *thd,const char *db,const char *path,
5789 const char *wild, bool dir, List<LEX_STRING> *files)
5790 {
5791 int error= 0;
5792 DBUG_ENTER("ha_find_files");
5793 DBUG_PRINT("enter", ("db: '%s' path: '%s' wild: '%s' dir: %d",
5794 db, path, wild ? wild : "NULL", dir));
5795 st_find_files_args args= {db, path, wild, dir, files};
5796
5797 plugin_foreach(thd, find_files_handlerton,
5798 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5799 /* The return value is not currently used */
5800 DBUG_RETURN(error);
5801 }
5802
5803 /**
5804 Ask handler if the table exists in engine.
5805 @retval
5806 HA_ERR_NO_SUCH_TABLE Table does not exist
5807 @retval
5808 HA_ERR_TABLE_EXIST Table exists
5809 @retval
5810 \# Error code
5811 */
5812 struct st_table_exists_in_engine_args
5813 {
5814 const char *db;
5815 const char *name;
5816 int err;
5817 };
5818
table_exists_in_engine_handlerton(THD * thd,plugin_ref plugin,void * arg)5819 static my_bool table_exists_in_engine_handlerton(THD *thd, plugin_ref plugin,
5820 void *arg)
5821 {
5822 st_table_exists_in_engine_args *vargs= (st_table_exists_in_engine_args *)arg;
5823 handlerton *hton= plugin_data(plugin, handlerton *);
5824
5825 int err= HA_ERR_NO_SUCH_TABLE;
5826
5827 if (hton->state == SHOW_OPTION_YES && hton->table_exists_in_engine)
5828 err = hton->table_exists_in_engine(hton, thd, vargs->db, vargs->name);
5829
5830 vargs->err = err;
5831 if (vargs->err == HA_ERR_TABLE_EXIST)
5832 return TRUE;
5833
5834 return FALSE;
5835 }
5836
ha_table_exists_in_engine(THD * thd,const char * db,const char * name)5837 int ha_table_exists_in_engine(THD* thd, const char* db, const char* name)
5838 {
5839 DBUG_ENTER("ha_table_exists_in_engine");
5840 DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5841 st_table_exists_in_engine_args args= {db, name, HA_ERR_NO_SUCH_TABLE};
5842 plugin_foreach(thd, table_exists_in_engine_handlerton,
5843 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5844 DBUG_PRINT("exit", ("error: %d", args.err));
5845 DBUG_RETURN(args.err);
5846 }
5847
5848 /**
5849 Prepare (sub-) sequences of joins in this statement
5850 which may be pushed to each storage engine for execution.
5851 */
5852 struct st_make_pushed_join_args
5853 {
5854 const AQP::Join_plan* plan; // Query plan provided by optimizer
5855 int err; // Error code to return.
5856 };
5857
make_pushed_join_handlerton(THD * thd,plugin_ref plugin,void * arg)5858 static my_bool make_pushed_join_handlerton(THD *thd, plugin_ref plugin,
5859 void *arg)
5860 {
5861 st_make_pushed_join_args *vargs= (st_make_pushed_join_args *)arg;
5862 handlerton *hton= plugin_data(plugin, handlerton *);
5863
5864 if (hton && hton->make_pushed_join)
5865 {
5866 const int error= hton->make_pushed_join(hton, thd, vargs->plan);
5867 if (unlikely(error))
5868 {
5869 vargs->err = error;
5870 return TRUE;
5871 }
5872 }
5873 return FALSE;
5874 }
5875
ha_make_pushed_joins(THD * thd,const AQP::Join_plan * plan)5876 int ha_make_pushed_joins(THD *thd, const AQP::Join_plan* plan)
5877 {
5878 DBUG_ENTER("ha_make_pushed_joins");
5879 st_make_pushed_join_args args= {plan, 0};
5880 plugin_foreach(thd, make_pushed_join_handlerton,
5881 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5882 DBUG_PRINT("exit", ("error: %d", args.err));
5883 DBUG_RETURN(args.err);
5884 }
5885
5886 /*
5887 TODO: change this into a dynamic struct
5888 List<handlerton> does not work as
5889 1. binlog_end is called when MEM_ROOT is gone
5890 2. cannot work with thd MEM_ROOT as memory should be freed
5891 */
5892 #define MAX_HTON_LIST_ST 63
5893 struct hton_list_st
5894 {
5895 handlerton *hton[MAX_HTON_LIST_ST];
5896 uint sz;
5897 };
5898
5899 struct binlog_func_st
5900 {
5901 enum_binlog_func fn;
5902 void *arg;
5903 };
5904
5905 /** @brief
5906 Listing handlertons first to avoid recursive calls and deadlock
5907 */
binlog_func_list(THD * thd,plugin_ref plugin,void * arg)5908 static my_bool binlog_func_list(THD *thd, plugin_ref plugin, void *arg)
5909 {
5910 hton_list_st *hton_list= (hton_list_st *)arg;
5911 handlerton *hton= plugin_data(plugin, handlerton *);
5912 if (hton->state == SHOW_OPTION_YES && hton->binlog_func)
5913 {
5914 uint sz= hton_list->sz;
5915 if (sz == MAX_HTON_LIST_ST-1)
5916 {
5917 /* list full */
5918 return FALSE;
5919 }
5920 hton_list->hton[sz]= hton;
5921 hton_list->sz= sz+1;
5922 }
5923 return FALSE;
5924 }
5925
binlog_func_foreach(THD * thd,binlog_func_st * bfn)5926 static my_bool binlog_func_foreach(THD *thd, binlog_func_st *bfn)
5927 {
5928 hton_list_st hton_list;
5929 uint i, sz;
5930
5931 hton_list.sz= 0;
5932 plugin_foreach(thd, binlog_func_list,
5933 MYSQL_STORAGE_ENGINE_PLUGIN, &hton_list);
5934
5935 for (i= 0, sz= hton_list.sz; i < sz ; i++)
5936 hton_list.hton[i]->binlog_func(hton_list.hton[i], thd, bfn->fn, bfn->arg);
5937 return FALSE;
5938 }
5939
5940 #ifdef HAVE_NDB_BINLOG
5941
ha_reset_logs(THD * thd)5942 int ha_reset_logs(THD *thd)
5943 {
5944 binlog_func_st bfn= {BFN_RESET_LOGS, 0};
5945 binlog_func_foreach(thd, &bfn);
5946 return 0;
5947 }
5948
ha_reset_slave(THD * thd)5949 void ha_reset_slave(THD* thd)
5950 {
5951 binlog_func_st bfn= {BFN_RESET_SLAVE, 0};
5952 binlog_func_foreach(thd, &bfn);
5953 }
5954
ha_binlog_wait(THD * thd)5955 void ha_binlog_wait(THD* thd)
5956 {
5957 binlog_func_st bfn= {BFN_BINLOG_WAIT, 0};
5958 binlog_func_foreach(thd, &bfn);
5959 }
5960
ha_binlog_index_purge_file(THD * thd,const char * file)5961 int ha_binlog_index_purge_file(THD *thd, const char *file)
5962 {
5963 binlog_func_st bfn= {BFN_BINLOG_PURGE_FILE, (void *)file};
5964 binlog_func_foreach(thd, &bfn);
5965 return 0;
5966 }
5967
5968 struct binlog_log_query_st
5969 {
5970 enum_binlog_command binlog_command;
5971 const char *query;
5972 uint query_length;
5973 const char *db;
5974 const char *table_name;
5975 };
5976
binlog_log_query_handlerton2(THD * thd,handlerton * hton,void * args)5977 static my_bool binlog_log_query_handlerton2(THD *thd,
5978 handlerton *hton,
5979 void *args)
5980 {
5981 struct binlog_log_query_st *b= (struct binlog_log_query_st*)args;
5982 if (hton->state == SHOW_OPTION_YES && hton->binlog_log_query)
5983 hton->binlog_log_query(hton, thd,
5984 b->binlog_command,
5985 b->query,
5986 b->query_length,
5987 b->db,
5988 b->table_name);
5989 return FALSE;
5990 }
5991
binlog_log_query_handlerton(THD * thd,plugin_ref plugin,void * args)5992 static my_bool binlog_log_query_handlerton(THD *thd,
5993 plugin_ref plugin,
5994 void *args)
5995 {
5996 return binlog_log_query_handlerton2(thd, plugin_data(plugin, handlerton *), args);
5997 }
5998
ha_binlog_log_query(THD * thd,handlerton * hton,enum_binlog_command binlog_command,const char * query,uint query_length,const char * db,const char * table_name)5999 void ha_binlog_log_query(THD *thd, handlerton *hton,
6000 enum_binlog_command binlog_command,
6001 const char *query, uint query_length,
6002 const char *db, const char *table_name)
6003 {
6004 struct binlog_log_query_st b;
6005 b.binlog_command= binlog_command;
6006 b.query= query;
6007 b.query_length= query_length;
6008 b.db= db;
6009 b.table_name= table_name;
6010 if (hton == 0)
6011 plugin_foreach(thd, binlog_log_query_handlerton,
6012 MYSQL_STORAGE_ENGINE_PLUGIN, &b);
6013 else
6014 binlog_log_query_handlerton2(thd, hton, &b);
6015 }
6016 #endif
6017
ha_binlog_end(THD * thd)6018 int ha_binlog_end(THD* thd)
6019 {
6020 binlog_func_st bfn= {BFN_BINLOG_END, 0};
6021 binlog_func_foreach(thd, &bfn);
6022 return 0;
6023 }
6024
6025 /**
6026 Calculate cost of 'index only' scan for given index and number of records
6027
6028 @param keynr Index number
6029 @param records Estimated number of records to be retrieved
6030
6031 @note
6032 It is assumed that we will read trough the whole key range and that all
6033 key blocks are half full (normally things are much better). It is also
6034 assumed that each time we read the next key from the index, the handler
6035 performs a random seek, thus the cost is proportional to the number of
6036 blocks read.
6037
6038 @todo
6039 Consider joining this function and handler::read_time() into one
6040 handler::read_time(keynr, records, ranges, bool index_only) function.
6041
6042 @return
6043 Estimated cost of 'index only' scan
6044 */
6045
index_only_read_time(uint keynr,double records)6046 double handler::index_only_read_time(uint keynr, double records)
6047 {
6048 double read_time;
6049 uint keys_per_block= (stats.block_size/2/
6050 (table_share->key_info[keynr].key_length + ref_length) +
6051 1);
6052 read_time=((double) (records + keys_per_block-1) /
6053 (double) keys_per_block);
6054 return read_time;
6055 }
6056
6057
6058 /**
6059 Check if key has partially-covered columns
6060
6061 We can't use DS-MRR to perform range scans when the ranges are over
6062 partially-covered keys, because we'll not have full key part values
6063 (we'll have their prefixes from the index) and will not be able to check
6064 if we've reached the end the range.
6065
6066 @param keyno Key to check
6067
6068 @todo
6069 Allow use of DS-MRR in cases where the index has partially-covered
6070 components but they are not used for scanning.
6071
6072 @retval TRUE Yes
6073 @retval FALSE No
6074 */
6075
key_uses_partial_cols(TABLE * table,uint keyno)6076 bool key_uses_partial_cols(TABLE *table, uint keyno)
6077 {
6078 KEY_PART_INFO *kp= table->key_info[keyno].key_part;
6079 KEY_PART_INFO *kp_end= kp + table->key_info[keyno].user_defined_key_parts;
6080 for (; kp != kp_end; kp++)
6081 {
6082 if (!kp->field->part_of_key.is_set(keyno))
6083 return TRUE;
6084 }
6085 return FALSE;
6086 }
6087
6088 /****************************************************************************
6089 * Default MRR implementation (MRR to non-MRR converter)
6090 ***************************************************************************/
6091
6092 /**
6093 Get cost and other information about MRR scan over a known list of ranges
6094
6095 Calculate estimated cost and other information about an MRR scan for given
6096 sequence of ranges.
6097
6098 @param keyno Index number
6099 @param seq Range sequence to be traversed
6100 @param seq_init_param First parameter for seq->init()
6101 @param n_ranges_arg Number of ranges in the sequence, or 0 if the caller
6102 can't efficiently determine it
6103 @param bufsz INOUT IN: Size of the buffer available for use
6104 OUT: Size of the buffer that is expected to be actually
6105 used, or 0 if buffer is not needed.
6106 @param flags INOUT A combination of HA_MRR_* flags
6107 @param cost OUT Estimated cost of MRR access
6108
6109 @note
6110 This method (or an overriding one in a derived class) must check for
6111 thd->killed and return HA_POS_ERROR if it is not zero. This is required
6112 for a user to be able to interrupt the calculation by killing the
6113 connection/query.
6114
6115 @retval
6116 HA_POS_ERROR Error or the engine is unable to perform the requested
6117 scan. Values of OUT parameters are undefined.
6118 @retval
6119 other OK, *cost contains cost of the scan, *bufsz and *flags
6120 contain scan parameters.
6121 */
6122
6123 ha_rows
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges_arg,uint * bufsz,uint * flags,Cost_estimate * cost)6124 handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
6125 void *seq_init_param, uint n_ranges_arg,
6126 uint *bufsz, uint *flags,
6127 Cost_estimate *cost)
6128 {
6129 KEY_MULTI_RANGE range;
6130 range_seq_t seq_it;
6131 ha_rows rows, total_rows= 0;
6132 uint n_ranges=0;
6133 THD *thd= current_thd;
6134
6135 /* Default MRR implementation doesn't need buffer */
6136 *bufsz= 0;
6137
6138 DBUG_EXECUTE_IF("bug13822652_2", thd->killed= THD::KILL_QUERY;);
6139
6140 seq_it= seq->init(seq_init_param, n_ranges, *flags);
6141 while (!seq->next(seq_it, &range))
6142 {
6143 if (unlikely(thd->killed != 0))
6144 return HA_POS_ERROR;
6145
6146 n_ranges++;
6147 key_range *min_endp, *max_endp;
6148 if (range.range_flag & GEOM_FLAG)
6149 {
6150 /* In this case tmp_min_flag contains the handler-read-function */
6151 range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG);
6152 min_endp= &range.start_key;
6153 max_endp= NULL;
6154 }
6155 else
6156 {
6157 min_endp= range.start_key.length? &range.start_key : NULL;
6158 max_endp= range.end_key.length? &range.end_key : NULL;
6159 }
6160 /*
6161 Get the number of rows in the range. This is done by calling
6162 records_in_range() unless:
6163
6164 1) The range is an equality range and the index is unique.
6165 There cannot be more than one matching row, so 1 is
6166 assumed. Note that it is possible that the correct number
6167 is actually 0, so the row estimate may be too high in this
6168 case. Also note: ranges of the form "x IS NULL" may have more
6169 than 1 mathing row so records_in_range() is called for these.
6170 2) a) The range is an equality range but the index is either
6171 not unique or all of the keyparts are not used.
6172 b) The user has requested that index statistics should be used
6173 for equality ranges to avoid the incurred overhead of
6174 index dives in records_in_range().
6175 c) Index statistics is available.
6176 Ranges of the form "x IS NULL" will not use index statistics
6177 because the number of rows with this value are likely to be
6178 very different than the values in the index statistics.
6179 */
6180 int keyparts_used= 0;
6181 if ((range.range_flag & UNIQUE_RANGE) && // 1)
6182 !(range.range_flag & NULL_RANGE))
6183 rows= 1; /* there can be at most one row */
6184 else if ((range.range_flag & EQ_RANGE) && // 2a)
6185 (range.range_flag & USE_INDEX_STATISTICS) && // 2b)
6186 (keyparts_used= my_count_bits(range.start_key.keypart_map)) &&
6187 table->key_info[keyno].rec_per_key[keyparts_used-1] && // 2c)
6188 !(range.range_flag & NULL_RANGE))
6189 rows= table->key_info[keyno].rec_per_key[keyparts_used-1];
6190 else
6191 {
6192 DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););
6193 DBUG_ASSERT(min_endp || max_endp);
6194 if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
6195 max_endp)))
6196 {
6197 /* Can't scan one range => can't do MRR scan at all */
6198 total_rows= HA_POS_ERROR;
6199 break;
6200 }
6201 }
6202 total_rows += rows;
6203 }
6204
6205 if (total_rows != HA_POS_ERROR)
6206 {
6207 /* The following calculation is the same as in multi_range_read_info(): */
6208 *flags|= HA_MRR_USE_DEFAULT_IMPL;
6209 *flags|= HA_MRR_SUPPORT_SORTED;
6210
6211 DBUG_ASSERT(cost->is_zero());
6212 if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
6213 cost->add_io(index_only_read_time(keyno, total_rows) *
6214 Cost_estimate::IO_BLOCK_READ_COST());
6215 else
6216 cost->add_io(read_time(keyno, n_ranges, total_rows) *
6217 Cost_estimate::IO_BLOCK_READ_COST());
6218 cost->add_cpu(total_rows * ROW_EVALUATE_COST + 0.01);
6219 }
6220 return total_rows;
6221 }
6222
6223
6224 /**
6225 Get cost and other information about MRR scan over some sequence of ranges
6226
6227 Calculate estimated cost and other information about an MRR scan for some
6228 sequence of ranges.
6229
6230 The ranges themselves will be known only at execution phase. When this
6231 function is called we only know number of ranges and a (rough) E(#records)
6232 within those ranges.
6233
6234 Currently this function is only called for "n-keypart singlepoint" ranges,
6235 i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
6236
6237 The flags parameter is a combination of those flags: HA_MRR_SORTED,
6238 HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
6239
6240 @param keyno Index number
6241 @param n_ranges Estimated number of ranges (i.e. intervals) in the
6242 range sequence.
6243 @param n_rows Estimated total number of records contained within all
6244 of the ranges
6245 @param bufsz INOUT IN: Size of the buffer available for use
6246 OUT: Size of the buffer that will be actually used, or
6247 0 if buffer is not needed.
6248 @param flags INOUT A combination of HA_MRR_* flags
6249 @param cost OUT Estimated cost of MRR access
6250
6251 @retval
6252 0 OK, *cost contains cost of the scan, *bufsz and *flags contain scan
6253 parameters.
6254 @retval
6255 other Error or can't perform the requested scan
6256 */
6257
multi_range_read_info(uint keyno,uint n_ranges,uint n_rows,uint * bufsz,uint * flags,Cost_estimate * cost)6258 ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
6259 uint *bufsz, uint *flags,
6260 Cost_estimate *cost)
6261 {
6262 *bufsz= 0; /* Default implementation doesn't need a buffer */
6263
6264 *flags|= HA_MRR_USE_DEFAULT_IMPL;
6265 *flags|= HA_MRR_SUPPORT_SORTED;
6266
6267 DBUG_ASSERT(cost->is_zero());
6268
6269 /* Produce the same cost as non-MRR code does */
6270 if (*flags & HA_MRR_INDEX_ONLY)
6271 cost->add_io(index_only_read_time(keyno, n_rows) *
6272 Cost_estimate::IO_BLOCK_READ_COST());
6273 else
6274 cost->add_io(read_time(keyno, n_ranges, n_rows) *
6275 Cost_estimate::IO_BLOCK_READ_COST());
6276 return 0;
6277 }
6278
6279
6280 /**
6281 Initialize the MRR scan
6282
6283 Initialize the MRR scan. This function may do heavyweight scan
6284 initialization like row prefetching/sorting/etc (NOTE: but better not do
6285 it here as we may not need it, e.g. if we never satisfy WHERE clause on
6286 previous tables. For many implementations it would be natural to do such
6287 initializations in the first multi_read_range_next() call)
6288
6289 mode is a combination of the following flags: HA_MRR_SORTED,
6290 HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION
6291
6292 @param seq Range sequence to be traversed
6293 @param seq_init_param First parameter for seq->init()
6294 @param n_ranges Number of ranges in the sequence
6295 @param mode Flags, see the description section for the details
6296 @param buf INOUT: memory buffer to be used
6297
6298 @note
6299 One must have called index_init() before calling this function. Several
6300 multi_range_read_init() calls may be made in course of one query.
6301
6302 Until WL#2623 is done (see its text, section 3.2), the following will
6303 also hold:
6304 The caller will guarantee that if "seq->init == mrr_ranges_array_init"
6305 then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
6306 This property will only be used by NDB handler until WL#2623 is done.
6307
6308 Buffer memory management is done according to the following scenario:
6309 The caller allocates the buffer and provides it to the callee by filling
6310 the members of HANDLER_BUFFER structure.
6311 The callee consumes all or some fraction of the provided buffer space, and
6312 sets the HANDLER_BUFFER members accordingly.
6313 The callee may use the buffer memory until the next multi_range_read_init()
6314 call is made, all records have been read, or until index_end() call is
6315 made, whichever comes first.
6316
6317 @retval 0 OK
6318 @retval 1 Error
6319 */
6320
6321 int
multi_range_read_init(RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)6322 handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
6323 uint n_ranges, uint mode, HANDLER_BUFFER *buf)
6324 {
6325 DBUG_ENTER("handler::multi_range_read_init");
6326 mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
6327 mrr_funcs= *seq_funcs;
6328 mrr_is_output_sorted= MY_TEST(mode & HA_MRR_SORTED);
6329 mrr_have_range= FALSE;
6330 DBUG_RETURN(0);
6331 }
6332
6333
6334 /**
6335 Get next record in MRR scan
6336
6337 Default MRR implementation: read the next record
6338
6339 @param range_info OUT Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
6340 Otherwise, the opaque value associated with the range
6341 that contains the returned record.
6342
6343 @retval 0 OK
6344 @retval other Error code
6345 */
6346
multi_range_read_next(char ** range_info)6347 int handler::multi_range_read_next(char **range_info)
6348 {
6349 int result= HA_ERR_END_OF_FILE;
6350 int range_res;
6351 DBUG_ENTER("handler::multi_range_read_next");
6352
6353 if (!mrr_have_range)
6354 {
6355 mrr_have_range= TRUE;
6356 goto start;
6357 }
6358
6359 do
6360 {
6361 /* Save a call if there can be only one row in range. */
6362 if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
6363 {
6364 result= read_range_next();
6365 /* On success or non-EOF errors jump to the end. */
6366 if (result != HA_ERR_END_OF_FILE)
6367 break;
6368 }
6369 else
6370 {
6371 if (was_semi_consistent_read())
6372 goto scan_it_again;
6373 }
6374
6375 start:
6376 /* Try the next range(s) until one matches a record. */
6377 while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
6378 {
6379 scan_it_again:
6380 result= read_range_first(mrr_cur_range.start_key.keypart_map ?
6381 &mrr_cur_range.start_key : 0,
6382 mrr_cur_range.end_key.keypart_map ?
6383 &mrr_cur_range.end_key : 0,
6384 MY_TEST(mrr_cur_range.range_flag & EQ_RANGE),
6385 mrr_is_output_sorted);
6386 if (result != HA_ERR_END_OF_FILE)
6387 break;
6388 }
6389 }
6390 while ((result == HA_ERR_END_OF_FILE) && !range_res);
6391
6392 *range_info= mrr_cur_range.ptr;
6393 DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
6394 DBUG_RETURN(result);
6395 }
6396
6397
6398 /****************************************************************************
6399 * DS-MRR implementation
6400 ***************************************************************************/
6401
6402 /**
6403 DS-MRR: Initialize and start MRR scan
6404
6405 Initialize and start the MRR scan. Depending on the mode parameter, this
6406 may use default or DS-MRR implementation.
6407
6408 The DS-MRR implementation will use a second handler object (h2) for
6409 doing scan on the index:
6410 - on the first call to this function the h2 handler will be created
6411 and h2 will be opened using the same index as the main handler
6412 is set to use. The index scan on the main index will be closed
6413 and it will be re-opened to read records from the table using either
6414 no key or the primary key. The h2 handler will be deleted when
6415 reset() is called (which should happen on the end of the statement).
6416 - when dsmrr_close() is called the index scan on h2 is closed.
6417 - on following calls to this function one of the following must be valid:
6418 a. if dsmrr_close has been called:
6419 the main handler (h) must be open on an index, h2 will be opened
6420 using this index, and the index on h will be closed and
6421 h will be re-opened to read reads from the table using either
6422 no key or the primary key.
6423 b. dsmrr_close has not been called:
6424 h2 will already be open, the main handler h must be set up
6425 to read records from the table (handler->inited is RND) either
6426 using the primary index or using no index at all.
6427
6428 @param h_arg Table handler to be used
6429 @param seq_funcs Interval sequence enumeration functions
6430 @param seq_init_param Interval sequence enumeration parameter
6431 @param n_ranges Number of ranges in the sequence.
6432 @param mode HA_MRR_* modes to use
6433 @param buf INOUT Buffer to use
6434
6435 @retval 0 Ok, Scan started.
6436 @retval other Error
6437 */
6438
dsmrr_init(handler * h_arg,RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)6439 int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
6440 void *seq_init_param, uint n_ranges, uint mode,
6441 HANDLER_BUFFER *buf)
6442 {
6443 uint elem_size;
6444 int retval= 0;
6445 DBUG_ENTER("DsMrr_impl::dsmrr_init");
6446 THD *thd= h_arg->table->in_use; // current THD
6447
6448 /*
6449 index_merge may invoke a scan on an object for which dsmrr_info[_const]
6450 has not been called, so set the owner handler here as well.
6451 */
6452 h= h_arg;
6453 if (!thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR) ||
6454 mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED)) // DS-MRR doesn't sort
6455 {
6456 use_default_impl= TRUE;
6457 retval= h->handler::multi_range_read_init(seq_funcs, seq_init_param,
6458 n_ranges, mode, buf);
6459 DBUG_RETURN(retval);
6460 }
6461
6462 /*
6463 This assert will hit if we have pushed an index condition to the
6464 primary key index and then "change our mind" and use a different
6465 index for retrieving data with MRR. One of the following criteria
6466 must be true:
6467 1. We have not pushed an index conditon on this handler.
6468 2. We have pushed an index condition and this is on the currently used
6469 index.
6470 3. We have pushed an index condition but this is not for the primary key.
6471 4. We have pushed an index condition and this has been transferred to
6472 the clone (h2) of the handler object.
6473 */
6474 DBUG_ASSERT(!h->pushed_idx_cond ||
6475 h->pushed_idx_cond_keyno == h->active_index ||
6476 h->pushed_idx_cond_keyno != table->s->primary_key ||
6477 (h2 && h->pushed_idx_cond_keyno == h2->active_index));
6478
6479 rowids_buf= buf->buffer;
6480
6481 is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
6482
6483 if (is_mrr_assoc)
6484 {
6485 DBUG_ASSERT(!thd->status_var_aggregated);
6486 status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count);
6487 }
6488
6489 rowids_buf_end= buf->buffer_end;
6490 elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
6491 rowids_buf_last= rowids_buf +
6492 ((rowids_buf_end - rowids_buf)/ elem_size)*
6493 elem_size;
6494 rowids_buf_end= rowids_buf_last;
6495
6496 /*
6497 The DS-MRR scan uses a second handler object (h2) for doing the
6498 index scan. Create this by cloning the primary handler
6499 object. The h2 handler object is deleted when DsMrr_impl::reset()
6500 is called.
6501 */
6502 if (!h2)
6503 {
6504 handler *new_h2;
6505 /*
6506 ::clone() takes up a lot of stack, especially on 64 bit platforms.
6507 The constant 5 is an empiric result.
6508 @todo Is this still the case? Leave it as it is for now but could
6509 likely be removed?
6510 */
6511 if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
6512 DBUG_RETURN(1);
6513
6514 if (!(new_h2= h->clone(h->table->s->normalized_path.str, thd->mem_root)))
6515 DBUG_RETURN(1);
6516 h2= new_h2; /* Ok, now can put it into h2 */
6517 table->prepare_for_position();
6518 }
6519
6520 /*
6521 Open the index scan on h2 using the key from the primary handler.
6522 */
6523 if (h2->active_index == MAX_KEY)
6524 {
6525 DBUG_ASSERT(h->active_index != MAX_KEY);
6526 const uint mrr_keyno= h->active_index;
6527
6528 if ((retval= h2->ha_external_lock(thd, h->m_lock_type)))
6529 goto error;
6530
6531 if ((retval= h2->extra(HA_EXTRA_KEYREAD)))
6532 goto error;
6533
6534 if ((retval= h2->ha_index_init(mrr_keyno, false)))
6535 goto error;
6536
6537 // Transfer ICP from h to h2
6538 if (mrr_keyno == h->pushed_idx_cond_keyno)
6539 {
6540 if (h2->idx_cond_push(mrr_keyno, h->pushed_idx_cond))
6541 {
6542 retval= 1;
6543 goto error;
6544 }
6545 }
6546 else
6547 {
6548 // Cancel any potentially previously pushed index conditions
6549 h2->cancel_pushed_idx_cond();
6550 }
6551 }
6552 else
6553 {
6554 /*
6555 h2 has already an open index. This happens when the DS-MRR scan
6556 is re-started without closing it first. In this case the primary
6557 handler must be used for reading records from the table, ie. it
6558 must not be opened for doing a new range scan. In this case
6559 the active_index must either not be set or be the primary key.
6560 */
6561 DBUG_ASSERT(h->inited == handler::RND);
6562 DBUG_ASSERT(h->active_index == MAX_KEY ||
6563 h->active_index == table->s->primary_key);
6564 }
6565
6566 /*
6567 The index scan is now transferred to h2 and we can close the open
6568 index scan on the primary handler.
6569 */
6570 if (h->inited == handler::INDEX)
6571 {
6572 /*
6573 Calling h->ha_index_end() will invoke dsmrr_close() for this object,
6574 which will close the index scan on h2. We need to keep it open, so
6575 temporarily move h2 out of the DsMrr object.
6576 */
6577 handler *save_h2= h2;
6578 h2= NULL;
6579 retval= h->ha_index_end();
6580 h2= save_h2;
6581 if (retval)
6582 goto error;
6583 }
6584
6585 /*
6586 Verify consistency between h and h2.
6587 */
6588 DBUG_ASSERT(h->inited != handler::INDEX);
6589 DBUG_ASSERT(h->active_index == MAX_KEY ||
6590 h->active_index == table->s->primary_key);
6591 DBUG_ASSERT(h2->inited == handler::INDEX);
6592 DBUG_ASSERT(h2->active_index != MAX_KEY);
6593 DBUG_ASSERT(h->m_lock_type == h2->m_lock_type);
6594
6595 if ((retval= h2->handler::multi_range_read_init(seq_funcs, seq_init_param,
6596 n_ranges, mode, buf)))
6597 goto error;
6598
6599 if ((retval= dsmrr_fill_buffer()))
6600 goto error;
6601
6602 /*
6603 If the above call has scanned through all intervals in *seq, then
6604 adjust *buf to indicate that the remaining buffer space will not be used.
6605 */
6606 if (dsmrr_eof)
6607 buf->end_of_used_area= rowids_buf_last;
6608
6609 /*
6610 h->inited == INDEX may occur when 'range checked for each record' is
6611 used.
6612 */
6613 if ((h->inited != handler::RND) &&
6614 ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) ||
6615 (h->ha_rnd_init(FALSE))))
6616 {
6617 retval= 1;
6618 goto error;
6619 }
6620
6621 use_default_impl= FALSE;
6622 h->mrr_funcs= *seq_funcs;
6623
6624 DBUG_RETURN(0);
6625 error:
6626 h2->ha_index_or_rnd_end();
6627 h2->ha_external_lock(thd, F_UNLCK);
6628 h2->close();
6629 delete h2;
6630 h2= NULL;
6631 DBUG_ASSERT(retval != 0);
6632 DBUG_RETURN(retval);
6633 }
6634
6635
dsmrr_close()6636 void DsMrr_impl::dsmrr_close()
6637 {
6638 DBUG_ENTER("DsMrr_impl::dsmrr_close");
6639
6640 // If there is an open index on h2, then close it
6641 if (h2 && h2->active_index != MAX_KEY)
6642 {
6643 h2->ha_index_or_rnd_end();
6644 h2->ha_external_lock(current_thd, F_UNLCK);
6645 }
6646 use_default_impl= true;
6647 DBUG_VOID_RETURN;
6648 }
6649
6650
reset()6651 void DsMrr_impl::reset()
6652 {
6653 DBUG_ENTER("DsMrr_impl::reset");
6654
6655 if (h2)
6656 {
6657 // Close any ongoing DS-MRR scan
6658 dsmrr_close();
6659
6660 // Close and delete the h2 handler
6661 h2->close();
6662 delete h2;
6663 h2= NULL;
6664 }
6665 DBUG_VOID_RETURN;
6666 }
6667
6668
rowid_cmp(void * h,uchar * a,uchar * b)6669 static int rowid_cmp(void *h, uchar *a, uchar *b)
6670 {
6671 return ((handler*)h)->cmp_ref(a, b);
6672 }
6673
6674
6675 /**
6676 DS-MRR: Fill the buffer with rowids and sort it by rowid
6677
6678 {This is an internal function of DiskSweep MRR implementation}
6679 Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
6680 buffer. When the buffer is full or scan is completed, sort the buffer by
6681 rowid and return.
6682
6683 The function assumes that rowids buffer is empty when it is invoked.
6684
6685 @param h Table handler
6686
6687 @retval 0 OK, the next portion of rowids is in the buffer,
6688 properly ordered
6689 @retval other Error
6690 */
6691
dsmrr_fill_buffer()6692 int DsMrr_impl::dsmrr_fill_buffer()
6693 {
6694 char *range_info;
6695 int res= 0;
6696 DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
6697 DBUG_ASSERT(rowids_buf < rowids_buf_end);
6698
6699 rowids_buf_cur= rowids_buf;
6700 while ((rowids_buf_cur < rowids_buf_end) &&
6701 !(res= h2->handler::multi_range_read_next(&range_info)))
6702 {
6703 KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
6704 if (h2->mrr_funcs.skip_index_tuple &&
6705 h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
6706 continue;
6707
6708 /* Put rowid, or {rowid, range_id} pair into the buffer */
6709 h2->position(table->record[0]);
6710 memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
6711 rowids_buf_cur += h2->ref_length;
6712
6713 if (is_mrr_assoc)
6714 {
6715 memcpy(rowids_buf_cur, &range_info, sizeof(void*));
6716 rowids_buf_cur += sizeof(void*);
6717 }
6718 }
6719
6720 if (res && res != HA_ERR_END_OF_FILE)
6721 DBUG_RETURN(res);
6722 dsmrr_eof= MY_TEST(res == HA_ERR_END_OF_FILE);
6723
6724 /* Sort the buffer contents by rowid */
6725 uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
6726 uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
6727
6728 my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
6729 (void*)h);
6730 rowids_buf_last= rowids_buf_cur;
6731 rowids_buf_cur= rowids_buf;
6732 DBUG_RETURN(0);
6733 }
6734
6735
6736 /*
6737 DS-MRR implementation: multi_range_read_next() function
6738 */
6739
dsmrr_next(char ** range_info)6740 int DsMrr_impl::dsmrr_next(char **range_info)
6741 {
6742 int res;
6743 uchar *cur_range_info= 0;
6744 uchar *rowid;
6745
6746 if (use_default_impl)
6747 return h->handler::multi_range_read_next(range_info);
6748
6749 do
6750 {
6751 if (rowids_buf_cur == rowids_buf_last)
6752 {
6753 if (dsmrr_eof)
6754 {
6755 res= HA_ERR_END_OF_FILE;
6756 goto end;
6757 }
6758
6759 res= dsmrr_fill_buffer();
6760 if (res)
6761 goto end;
6762 }
6763
6764 /* return eof if there are no rowids in the buffer after re-fill attempt */
6765 if (rowids_buf_cur == rowids_buf_last)
6766 {
6767 res= HA_ERR_END_OF_FILE;
6768 goto end;
6769 }
6770 rowid= rowids_buf_cur;
6771
6772 if (is_mrr_assoc)
6773 memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar*));
6774
6775 rowids_buf_cur += h->ref_length + sizeof(void*) * MY_TEST(is_mrr_assoc);
6776 if (h2->mrr_funcs.skip_record &&
6777 h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
6778 continue;
6779 res= h->rnd_pos(table->record[0], rowid);
6780 break;
6781 } while (true);
6782
6783 if (is_mrr_assoc)
6784 {
6785 memcpy(range_info, rowid + h->ref_length, sizeof(void*));
6786 }
6787 end:
6788 return res;
6789 }
6790
6791
6792 /*
6793 DS-MRR implementation: multi_range_read_info() function
6794 */
dsmrr_info(uint keyno,uint n_ranges,uint rows,uint * bufsz,uint * flags,Cost_estimate * cost)6795 ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
6796 uint *bufsz, uint *flags, Cost_estimate *cost)
6797 {
6798 ha_rows res MY_ATTRIBUTE((unused));
6799 uint def_flags= *flags;
6800 uint def_bufsz= *bufsz;
6801
6802 /* Get cost/flags/mem_usage of default MRR implementation */
6803 res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
6804 &def_flags, cost);
6805 DBUG_ASSERT(!res);
6806
6807 if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
6808 choose_mrr_impl(keyno, rows, flags, bufsz, cost))
6809 {
6810 /* Default implementation is choosen */
6811 DBUG_PRINT("info", ("Default MRR implementation choosen"));
6812 *flags= def_flags;
6813 *bufsz= def_bufsz;
6814 DBUG_ASSERT(*flags & HA_MRR_USE_DEFAULT_IMPL);
6815 }
6816 else
6817 {
6818 /* *flags and *bufsz were set by choose_mrr_impl */
6819 DBUG_PRINT("info", ("DS-MRR implementation choosen"));
6820 }
6821 return 0;
6822 }
6823
6824
6825 /*
6826 DS-MRR Implementation: multi_range_read_info_const() function
6827 */
6828
dsmrr_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)6829 ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
6830 void *seq_init_param, uint n_ranges,
6831 uint *bufsz, uint *flags, Cost_estimate *cost)
6832 {
6833 ha_rows rows;
6834 uint def_flags= *flags;
6835 uint def_bufsz= *bufsz;
6836 /* Get cost/flags/mem_usage of default MRR implementation */
6837 rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
6838 n_ranges, &def_bufsz,
6839 &def_flags, cost);
6840 if (rows == HA_POS_ERROR)
6841 {
6842 /* Default implementation can't perform MRR scan => we can't either */
6843 return rows;
6844 }
6845
6846 /*
6847 If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
6848 use the default MRR implementation (we need it for UPDATE/DELETE).
6849 Otherwise, make a choice based on cost and mrr* flags of
6850 @@optimizer_switch.
6851 */
6852 if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
6853 choose_mrr_impl(keyno, rows, flags, bufsz, cost))
6854 {
6855 DBUG_PRINT("info", ("Default MRR implementation choosen"));
6856 *flags= def_flags;
6857 *bufsz= def_bufsz;
6858 DBUG_ASSERT(*flags & HA_MRR_USE_DEFAULT_IMPL);
6859 }
6860 else
6861 {
6862 /* *flags and *bufsz were set by choose_mrr_impl */
6863 DBUG_PRINT("info", ("DS-MRR implementation choosen"));
6864 }
6865 return rows;
6866 }
6867
6868
6869 /**
6870 DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
6871
6872 Make the choice between using Default MRR implementation and DS-MRR.
6873 This function contains common functionality factored out of dsmrr_info()
6874 and dsmrr_info_const(). The function assumes that the default MRR
6875 implementation's applicability requirements are satisfied.
6876
6877 @param keyno Index number
6878 @param rows E(full rows to be retrieved)
6879 @param flags IN MRR flags provided by the MRR user
6880 OUT If DS-MRR is choosen, flags of DS-MRR implementation
6881 else the value is not modified
6882 @param bufsz IN If DS-MRR is choosen, buffer use of DS-MRR implementation
6883 else the value is not modified
6884 @param cost IN Cost of default MRR implementation
6885 OUT If DS-MRR is choosen, cost of DS-MRR scan
6886 else the value is not modified
6887
6888 @retval TRUE Default MRR implementation should be used
6889 @retval FALSE DS-MRR implementation should be used
6890 */
6891
choose_mrr_impl(uint keyno,ha_rows rows,uint * flags,uint * bufsz,Cost_estimate * cost)6892 bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
6893 uint *bufsz, Cost_estimate *cost)
6894 {
6895 bool res;
6896 THD *thd= current_thd;
6897 if (!thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR) ||
6898 *flags & (HA_MRR_INDEX_ONLY | HA_MRR_SORTED) || // Unsupported by DS-MRR
6899 (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
6900 key_uses_partial_cols(table, keyno))
6901 {
6902 /* Use the default implementation, don't modify args: See comments */
6903 return TRUE;
6904 }
6905
6906 /*
6907 If @@optimizer_switch has "mrr_cost_based" on, we should avoid
6908 using DS-MRR for queries where it is likely that the records are
6909 stored in memory. Since there is currently no way to determine
6910 this, we use a heuristic:
6911 a) if the storage engine has a memory buffer, DS-MRR is only
6912 considered if the table size is bigger than the buffer.
6913 b) if the storage engine does not have a memory buffer, DS-MRR is
6914 only considered if the table size is bigger than 100MB.
6915 c) Since there is an initial setup cost of DS-MRR, so it is only
6916 considered if at least 50 records will be read.
6917 */
6918 if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED))
6919 {
6920 /*
6921 If the storage engine has a database buffer we use this as the
6922 minimum size the table should have before considering DS-MRR.
6923 */
6924 longlong min_file_size= table->file->get_memory_buffer_size();
6925 if (min_file_size == -1)
6926 {
6927 // No estimate for database buffer
6928 min_file_size= 100 * 1024 * 1024; // 100 MB
6929 }
6930
6931 if (table->file->stats.data_file_length <
6932 static_cast<ulonglong>(min_file_size) ||
6933 rows <= 50)
6934 return true; // Use the default implementation
6935 }
6936
6937 Cost_estimate dsmrr_cost;
6938 if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
6939 return TRUE;
6940
6941 bool force_dsmrr;
6942 /*
6943 If @@optimizer_switch has "mrr" on and "mrr_cost_based" off, then set cost
6944 of DS-MRR to be minimum of DS-MRR and Default implementations cost. This
6945 allows one to force use of DS-MRR whenever it is applicable without
6946 affecting other cost-based choices.
6947 */
6948 if ((force_dsmrr=
6949 (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR) &&
6950 !thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED))) &&
6951 dsmrr_cost.total_cost() > cost->total_cost())
6952 dsmrr_cost= *cost;
6953
6954 if (force_dsmrr || (dsmrr_cost.total_cost() <= cost->total_cost()))
6955 {
6956 *flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */
6957 *flags &= ~HA_MRR_SUPPORT_SORTED; /* We can't provide ordered output */
6958 *cost= dsmrr_cost;
6959 res= FALSE;
6960 }
6961 else
6962 {
6963 /* Use the default MRR implementation */
6964 res= TRUE;
6965 }
6966 return res;
6967 }
6968
6969
6970 static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows,
6971 Cost_estimate *cost);
6972
6973
6974 /**
6975 Get cost of DS-MRR scan
6976
6977 @param keynr Index to be used
6978 @param rows E(Number of rows to be scanned)
6979 @param flags Scan parameters (HA_MRR_* flags)
6980 @param buffer_size INOUT Buffer size
6981 @param cost OUT The cost
6982
6983 @retval FALSE OK
6984 @retval TRUE Error, DS-MRR cannot be used (the buffer is too small
6985 for even 1 rowid)
6986 */
6987
get_disk_sweep_mrr_cost(uint keynr,ha_rows rows,uint flags,uint * buffer_size,Cost_estimate * cost)6988 bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
6989 uint *buffer_size,
6990 Cost_estimate *cost)
6991 {
6992 ha_rows rows_in_last_step;
6993 uint n_full_steps;
6994 double index_read_cost;
6995
6996 const uint elem_size= h->ref_length +
6997 sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION));
6998 const ha_rows max_buff_entries= *buffer_size / elem_size;
6999
7000 if (!max_buff_entries)
7001 return TRUE; /* Buffer has not enough space for even 1 rowid */
7002
7003 /* Number of iterations we'll make with full buffer */
7004 n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
7005
7006 /*
7007 Get numbers of rows we'll be processing in last iteration, with
7008 non-full buffer
7009 */
7010 rows_in_last_step= rows % max_buff_entries;
7011
7012 DBUG_ASSERT(cost->is_zero());
7013
7014 if (n_full_steps)
7015 {
7016 get_sort_and_sweep_cost(table, max_buff_entries, cost);
7017 cost->multiply(n_full_steps);
7018 }
7019 else
7020 {
7021 /*
7022 Adjust buffer size since only parts of the buffer will be used:
7023 1. Adjust record estimate for the last scan to reduce likelyhood
7024 of needing more than one scan by adding 20 percent to the
7025 record estimate and by ensuring this is at least 100 records.
7026 2. If the estimated needed buffer size is lower than suggested by
7027 the caller then set it to the estimated buffer size.
7028 */
7029 const ha_rows keys_in_buffer=
7030 max<ha_rows>(static_cast<ha_rows>(1.2 * rows_in_last_step), 100);
7031 *buffer_size= min<ulong>(*buffer_size,
7032 static_cast<ulong>(keys_in_buffer) * elem_size);
7033 }
7034
7035 Cost_estimate last_step_cost;
7036 get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
7037 (*cost)+= last_step_cost;
7038
7039 /*
7040 Cost of memory is not included in the total_cost() function and
7041 thus will not be considered when comparing costs. Still, we
7042 record it in the cost estimate object for future use.
7043 */
7044 cost->add_mem(*buffer_size);
7045
7046 /* Total cost of all index accesses */
7047 index_read_cost= h->index_only_read_time(keynr, rows);
7048 cost->add_io(index_read_cost * Cost_estimate::IO_BLOCK_READ_COST());
7049
7050 /*
7051 Add CPU cost for processing records (see
7052 @handler::multi_range_read_info_const()).
7053 */
7054 cost->add_cpu(rows * ROW_EVALUATE_COST);
7055 return FALSE;
7056 }
7057
7058
7059 /*
7060 Get cost of one sort-and-sweep step
7061
7062 SYNOPSIS
7063 get_sort_and_sweep_cost()
7064 table Table being accessed
7065 nrows Number of rows to be sorted and retrieved
7066 cost OUT The cost
7067
7068 DESCRIPTION
7069 Get cost of these operations:
7070 - sort an array of #nrows ROWIDs using qsort
7071 - read #nrows records from table in a sweep.
7072 */
7073
7074 static
get_sort_and_sweep_cost(TABLE * table,ha_rows nrows,Cost_estimate * cost)7075 void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
7076 {
7077 DBUG_ASSERT(cost->is_zero());
7078 if (nrows)
7079 {
7080 get_sweep_read_cost(table, nrows, FALSE, cost);
7081
7082 /*
7083 Constant for the cost of doing one key compare operation in the
7084 sort operation. We should have used the existing
7085 ROWID_COMPARE_COST constant here but this would make the cost
7086 estimate of sorting very high for queries accessing many
7087 records. Until this constant is adjusted we introduce a constant
7088 that is more realistic. @todo: Replace this with
7089 ROWID_COMPARE_COST when this have been given a realistic value.
7090 */
7091 const double ROWID_COMPARE_SORT_COST = 0.01;
7092
7093 /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
7094 const double cpu_sort= nrows * log2(nrows) * ROWID_COMPARE_SORT_COST;
7095 cost->add_cpu(cpu_sort);
7096 }
7097 }
7098
7099
7100 /**
7101 Get cost of reading nrows table records in a "disk sweep"
7102
7103 A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
7104 for an ordered sequence of rowids.
7105
7106 We assume hard disk IO. The read is performed as follows:
7107
7108 1. The disk head is moved to the needed cylinder
7109 2. The controller waits for the plate to rotate
7110 3. The data is transferred
7111
7112 Time to do #3 is insignificant compared to #2+#1.
7113
7114 Time to move the disk head is proportional to head travel distance.
7115
7116 Time to wait for the plate to rotate depends on whether the disk head
7117 was moved or not.
7118
7119 If disk head wasn't moved, the wait time is proportional to distance
7120 between the previous block and the block we're reading.
7121
7122 If the head was moved, we don't know how much we'll need to wait for the
7123 plate to rotate. We assume the wait time to be a variate with a mean of
7124 0.5 of full rotation time.
7125
7126 Our cost units are "random disk seeks". The cost of random disk seek is
7127 actually not a constant, it depends one range of cylinders we're going
7128 to access. We make it constant by introducing a fuzzy concept of "typical
7129 datafile length" (it's fuzzy as it's hard to tell whether it should
7130 include index file, temp.tables etc). Then random seek cost is:
7131
7132 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
7133
7134 We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
7135
7136 @param table Table to be accessed
7137 @param nrows Number of rows to retrieve
7138 @param interrupted TRUE <=> Assume that the disk sweep will be
7139 interrupted by other disk IO. FALSE - otherwise.
7140 @param cost OUT The cost.
7141 */
7142
get_sweep_read_cost(TABLE * table,ha_rows nrows,bool interrupted,Cost_estimate * cost)7143 void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
7144 Cost_estimate *cost)
7145 {
7146 DBUG_ENTER("get_sweep_read_cost");
7147
7148 DBUG_ASSERT(cost->is_zero());
7149 if(nrows > 0)
7150 {
7151 double n_blocks=
7152 ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
7153 if (n_blocks < 1.0) // When data_file_length is 0
7154 n_blocks= 1.0;
7155 double busy_blocks=
7156 n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
7157 if (busy_blocks < 1.0)
7158 busy_blocks= 1.0;
7159
7160 DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
7161 busy_blocks));
7162 /*
7163 The random access cost for reading the data pages will be the
7164 upper limit for the sweep_cost.
7165 */
7166 cost->add_io(busy_blocks * Cost_estimate::IO_BLOCK_READ_COST());
7167
7168 if (!interrupted)
7169 {
7170 /* Assume reading is done in one 'sweep' */
7171 Cost_estimate sweep_cost;
7172 sweep_cost.add_io(busy_blocks *
7173 (DISK_SEEK_BASE_COST +
7174 DISK_SEEK_PROP_COST * n_blocks / busy_blocks));
7175 /*
7176 For some cases, ex: when only few blocks need to be read
7177 and the seek distance becomes very large, the sweep cost
7178 model can produce a cost estimate that is larger than the
7179 cost of random access. To handle this case, we use the
7180 sweep cost only when it is less than the random access
7181 cost.
7182 */
7183 if (sweep_cost.get_io_cost() < cost->get_io_cost())
7184 *cost= sweep_cost;
7185 }
7186 }
7187 DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
7188 DBUG_VOID_RETURN;
7189 }
7190
7191
7192 /****************************************************************************
7193 * DS-MRR implementation ends
7194 ***************************************************************************/
7195
7196 /** @brief
7197 Read first row between two ranges.
7198 Store ranges for future calls to read_range_next.
7199
7200 @param start_key Start key. Is 0 if no min range
7201 @param end_key End key. Is 0 if no max range
7202 @param eq_range_arg Set to 1 if start_key == end_key
7203 @param sorted Set to 1 if result should be sorted per key
7204
7205 @note
7206 Record is read into table->record[0]
7207
7208 @retval
7209 0 Found row
7210 @retval
7211 HA_ERR_END_OF_FILE No rows in range
7212 @retval
7213 \# Error code
7214 */
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_range_arg,bool sorted)7215 int handler::read_range_first(const key_range *start_key,
7216 const key_range *end_key,
7217 bool eq_range_arg,
7218 bool sorted /* ignored */)
7219 {
7220 int result;
7221 DBUG_ENTER("handler::read_range_first");
7222
7223 eq_range= eq_range_arg;
7224 set_end_range(end_key, RANGE_SCAN_ASC);
7225
7226 range_key_part= table->key_info[active_index].key_part;
7227
7228 if (!start_key) // Read first record
7229 result= ha_index_first(table->record[0]);
7230 else
7231 result= ha_index_read_map(table->record[0],
7232 start_key->key,
7233 start_key->keypart_map,
7234 start_key->flag);
7235 if (result)
7236 DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND)
7237 ? HA_ERR_END_OF_FILE
7238 : result);
7239
7240 if (compare_key(end_range) <= 0)
7241 {
7242 DBUG_RETURN(0);
7243 }
7244 else
7245 {
7246 /*
7247 The last read row does not fall in the range. So request
7248 storage engine to release row lock if possible.
7249 */
7250 unlock_row();
7251 DBUG_RETURN(HA_ERR_END_OF_FILE);
7252 }
7253 }
7254
7255
7256 /** @brief
7257 Read next row between two endpoints.
7258
7259 @note
7260 Record is read into table->record[0]
7261
7262 @retval
7263 0 Found row
7264 @retval
7265 HA_ERR_END_OF_FILE No rows in range
7266 @retval
7267 \# Error code
7268 */
read_range_next()7269 int handler::read_range_next()
7270 {
7271 int result;
7272 DBUG_ENTER("handler::read_range_next");
7273
7274 if (eq_range)
7275 {
7276 /* We trust that index_next_same always gives a row in range */
7277 DBUG_RETURN(ha_index_next_same(table->record[0],
7278 end_range->key,
7279 end_range->length));
7280 }
7281 result= ha_index_next(table->record[0]);
7282 if (result)
7283 DBUG_RETURN(result);
7284
7285 if (compare_key(end_range) <= 0)
7286 {
7287 DBUG_RETURN(0);
7288 }
7289 else
7290 {
7291 /*
7292 The last read row does not fall in the range. So request
7293 storage engine to release row lock if possible.
7294 */
7295 unlock_row();
7296 DBUG_RETURN(HA_ERR_END_OF_FILE);
7297 }
7298 }
7299
7300
set_end_range(const key_range * range,enum_range_scan_direction direction)7301 void handler::set_end_range(const key_range* range,
7302 enum_range_scan_direction direction)
7303 {
7304 if (range)
7305 {
7306 save_end_range= *range;
7307 end_range= &save_end_range;
7308 range_key_part= table->key_info[active_index].key_part;
7309 key_compare_result_on_equal= ((range->flag == HA_READ_BEFORE_KEY) ? 1 :
7310 (range->flag == HA_READ_AFTER_KEY) ? -1 : 0);
7311 }
7312 else
7313 end_range= NULL;
7314
7315 range_scan_direction= direction;
7316 }
7317
7318
7319 /**
7320 Compare if found key (in row) is over max-value.
7321
7322 @param range range to compare to row. May be 0 for no range
7323
7324 @seealso
7325 key.cc::key_cmp()
7326
7327 @return
7328 The return value is SIGN(key_in_row - range_key):
7329
7330 - 0 : Key is equal to range or 'range' == 0 (no range)
7331 - -1 : Key is less than range
7332 - 1 : Key is larger than range
7333 */
compare_key(key_range * range)7334 int handler::compare_key(key_range *range)
7335 {
7336 int cmp;
7337 if (!range || in_range_check_pushed_down)
7338 return 0; // No max range
7339 cmp= key_cmp(range_key_part, range->key, range->length);
7340 if (!cmp)
7341 cmp= key_compare_result_on_equal;
7342 return cmp;
7343 }
7344
7345
7346 /*
7347 Compare if a found key (in row) is within the range.
7348
7349 This function is similar to compare_key() but checks the range scan
7350 direction to determine if this is a descending scan. This function
7351 is used by the index condition pushdown implementation to determine
7352 if the read record is within the range scan.
7353
7354 @param range Range to compare to row. May be NULL for no range.
7355
7356 @seealso
7357 handler::compare_key()
7358
7359 @return Returns whether the key is within the range
7360
7361 - 0 : Key is equal to range or 'range' == 0 (no range)
7362 - -1 : Key is within the current range
7363 - 1 : Key is outside the current range
7364 */
7365
compare_key_icp(const key_range * range) const7366 int handler::compare_key_icp(const key_range *range) const
7367 {
7368 int cmp;
7369 if (!range)
7370 return 0; // no max range
7371 cmp= key_cmp(range_key_part, range->key, range->length);
7372 if (!cmp)
7373 cmp= key_compare_result_on_equal;
7374 if (range_scan_direction == RANGE_SCAN_DESC)
7375 cmp= -cmp;
7376 return cmp;
7377 }
7378
7379 /**
7380 Change the offsets of all the fields in a key range.
7381
7382 @param range the key range
7383 @param key_part the first key part
7384 @param diff how much to change the offsets with
7385 */
7386 static inline void
move_key_field_offsets(const key_range * range,const KEY_PART_INFO * key_part,my_ptrdiff_t diff)7387 move_key_field_offsets(const key_range *range, const KEY_PART_INFO *key_part,
7388 my_ptrdiff_t diff)
7389 {
7390 for (size_t len= 0; len < range->length;
7391 len+= key_part->store_length, ++key_part)
7392 key_part->field->move_field_offset(diff);
7393 }
7394
7395 /**
7396 Check if the key in the given buffer (which is not necessarily
7397 TABLE::record[0]) is within range. Called by the storage engine to
7398 avoid reading too many rows.
7399
7400 @param buf the buffer that holds the key
7401 @retval -1 if the key is within the range
7402 @retval 0 if the key is equal to the end_range key, and
7403 key_compare_result_on_equal is 0
7404 @retval 1 if the key is outside the range
7405 */
compare_key_in_buffer(const uchar * buf) const7406 int handler::compare_key_in_buffer(const uchar *buf) const
7407 {
7408 DBUG_ASSERT(end_range != NULL);
7409
7410 /*
7411 End range on descending scans is only checked with ICP for now, and then we
7412 check it with compare_key_icp() instead of this function.
7413 */
7414 DBUG_ASSERT(range_scan_direction == RANGE_SCAN_ASC);
7415
7416 // Make the fields in the key point into the buffer instead of record[0].
7417 const my_ptrdiff_t diff= buf - table->record[0];
7418 if (diff != 0)
7419 move_key_field_offsets(end_range, range_key_part, diff);
7420
7421 // Compare the key in buf against end_range.
7422 int cmp= key_cmp(range_key_part, end_range->key, end_range->length);
7423 if (cmp == 0)
7424 cmp= key_compare_result_on_equal;
7425
7426 // Reset the field offsets.
7427 if (diff != 0)
7428 move_key_field_offsets(end_range, range_key_part, -diff);
7429
7430 return cmp;
7431 }
7432
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)7433 int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
7434 key_part_map keypart_map,
7435 enum ha_rkey_function find_flag)
7436 {
7437 int error, error1;
7438 error= index_init(index, 0);
7439 if (!error)
7440 {
7441 error= index_read_map(buf, key, keypart_map, find_flag);
7442 error1= index_end();
7443 }
7444 return error ? error : error1;
7445 }
7446
7447
7448 /**
7449 Returns a list of all known extensions.
7450
7451 No mutexes, worst case race is a minor surplus memory allocation
7452 We have to recreate the extension map if mysqld is restarted (for example
7453 within libmysqld)
7454
7455 @retval
7456 pointer pointer to TYPELIB structure
7457 */
exts_handlerton(THD * unused,plugin_ref plugin,void * arg)7458 static my_bool exts_handlerton(THD *unused, plugin_ref plugin,
7459 void *arg)
7460 {
7461 List<char> *found_exts= (List<char> *) arg;
7462 handlerton *hton= plugin_data(plugin, handlerton *);
7463 handler *file;
7464 if (hton->state == SHOW_OPTION_YES && hton->create &&
7465 (file= hton->create(hton, (TABLE_SHARE*) 0, current_thd->mem_root)))
7466 {
7467 List_iterator_fast<char> it(*found_exts);
7468 const char **ext, *old_ext;
7469
7470 for (ext= file->bas_ext(); *ext; ext++)
7471 {
7472 while ((old_ext= it++))
7473 {
7474 if (!strcmp(old_ext, *ext))
7475 break;
7476 }
7477 if (!old_ext)
7478 found_exts->push_back((char *) *ext);
7479
7480 it.rewind();
7481 }
7482 delete file;
7483 }
7484 return FALSE;
7485 }
7486
ha_known_exts()7487 TYPELIB* ha_known_exts()
7488 {
7489 TYPELIB *known_extensions = (TYPELIB*) sql_alloc(sizeof(TYPELIB));
7490 known_extensions->name= "known_exts";
7491 known_extensions->type_lengths= NULL;
7492
7493 List<char> found_exts;
7494 const char **ext, *old_ext;
7495
7496 found_exts.push_back((char*) TRG_EXT);
7497 found_exts.push_back((char*) TRN_EXT);
7498
7499 plugin_foreach(NULL, exts_handlerton,
7500 MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
7501
7502 size_t arr_length= sizeof(char *)* (found_exts.elements+1);
7503 ext= (const char **) sql_alloc(arr_length);
7504
7505 DBUG_ASSERT(NULL != ext);
7506 known_extensions->count= found_exts.elements;
7507 known_extensions->type_names= ext;
7508
7509 List_iterator_fast<char> it(found_exts);
7510 while ((old_ext= it++))
7511 *ext++= old_ext;
7512 *ext= NULL;
7513 return known_extensions;
7514 }
7515
7516
stat_print(THD * thd,const char * type,uint type_len,const char * file,uint file_len,const char * status,uint status_len)7517 static bool stat_print(THD *thd, const char *type, uint type_len,
7518 const char *file, uint file_len,
7519 const char *status, uint status_len)
7520 {
7521 Protocol *protocol= thd->protocol;
7522 protocol->prepare_for_resend();
7523 protocol->store(type, type_len, system_charset_info);
7524 protocol->store(file, file_len, system_charset_info);
7525 protocol->store(status, status_len, system_charset_info);
7526 if (protocol->write())
7527 return TRUE;
7528 return FALSE;
7529 }
7530
7531
showstat_handlerton(THD * thd,plugin_ref plugin,void * arg)7532 static my_bool showstat_handlerton(THD *thd, plugin_ref plugin,
7533 void *arg)
7534 {
7535 enum ha_stat_type stat= *(enum ha_stat_type *) arg;
7536 handlerton *hton= plugin_data(plugin, handlerton *);
7537 if (hton->state == SHOW_OPTION_YES && hton->show_status &&
7538 hton->show_status(hton, thd, stat_print, stat))
7539 return TRUE;
7540 return FALSE;
7541 }
7542
ha_show_status(THD * thd,handlerton * db_type,enum ha_stat_type stat)7543 bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
7544 {
7545 List<Item> field_list;
7546 Protocol *protocol= thd->protocol;
7547 bool result;
7548
7549 field_list.push_back(new Item_empty_string("Type",10));
7550 field_list.push_back(new Item_empty_string("Name",FN_REFLEN));
7551 field_list.push_back(new Item_empty_string("Status",10));
7552
7553 if (protocol->send_result_set_metadata(&field_list,
7554 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
7555 return TRUE;
7556
7557 if (db_type == NULL)
7558 {
7559 result= plugin_foreach(thd, showstat_handlerton,
7560 MYSQL_STORAGE_ENGINE_PLUGIN, &stat);
7561 }
7562 else
7563 {
7564 if (db_type->state != SHOW_OPTION_YES)
7565 {
7566 const LEX_STRING *name=&hton2plugin[db_type->slot]->name;
7567 result= stat_print(thd, name->str, name->length,
7568 "", 0, "DISABLED", 8) ? 1 : 0;
7569 }
7570 else
7571 {
7572 DBUG_EXECUTE_IF("simulate_show_status_failure",
7573 DBUG_SET("+d,simulate_net_write_failure"););
7574 result= db_type->show_status &&
7575 db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
7576 DBUG_EXECUTE_IF("simulate_show_status_failure",
7577 DBUG_SET("-d,simulate_net_write_failure"););
7578 }
7579 }
7580
7581 if (!result)
7582 my_eof(thd);
7583 return result;
7584 }
7585
flush_changed_page_bitmaps_handlerton(THD * unused1,plugin_ref plugin,void * unused2)7586 static my_bool flush_changed_page_bitmaps_handlerton(THD *unused1,
7587 plugin_ref plugin,
7588 void *unused2)
7589 {
7590 handlerton *hton= plugin_data(plugin, handlerton *);
7591
7592 if (hton->flush_changed_page_bitmaps == NULL)
7593 return FALSE;
7594
7595 return hton->flush_changed_page_bitmaps();
7596 }
7597
ha_flush_changed_page_bitmaps()7598 bool ha_flush_changed_page_bitmaps()
7599 {
7600 return plugin_foreach(NULL, flush_changed_page_bitmaps_handlerton,
7601 MYSQL_STORAGE_ENGINE_PLUGIN, NULL);
7602 }
7603
purge_changed_page_bitmaps_handlerton(THD * unused1,plugin_ref plugin,void * lsn)7604 static my_bool purge_changed_page_bitmaps_handlerton(THD *unused1,
7605 plugin_ref plugin,
7606 void *lsn)
7607 {
7608 handlerton *hton= plugin_data(plugin, handlerton *);
7609
7610 if (hton->purge_changed_page_bitmaps == NULL)
7611 return FALSE;
7612
7613 return hton->purge_changed_page_bitmaps(*(ulonglong *)lsn);
7614 }
7615
ha_purge_changed_page_bitmaps(ulonglong lsn)7616 bool ha_purge_changed_page_bitmaps(ulonglong lsn)
7617 {
7618 return plugin_foreach(NULL, purge_changed_page_bitmaps_handlerton,
7619 MYSQL_STORAGE_ENGINE_PLUGIN, &lsn);
7620 }
7621
purge_archive_logs_handlerton(THD * thd,plugin_ref plugin,void * arg)7622 static my_bool purge_archive_logs_handlerton(THD *thd, plugin_ref plugin,
7623 void *arg)
7624 {
7625 ulong before_timestamp= *(ulong*) arg;
7626 handlerton *hton= plugin_data(plugin, handlerton *);
7627
7628 if (hton->purge_archive_logs == NULL)
7629 return FALSE;
7630
7631 return hton->purge_archive_logs(hton, before_timestamp, NULL);
7632 }
7633
ha_purge_archive_logs(THD * thd,handlerton * db_type,void * args)7634 bool ha_purge_archive_logs(THD *thd, handlerton *db_type, void* args)
7635 {
7636 if (db_type == NULL)
7637 return plugin_foreach(thd, purge_archive_logs_handlerton,
7638 MYSQL_STORAGE_ENGINE_PLUGIN, args);
7639
7640 return false;
7641 }
7642
purge_archive_logs_to_handlerton(THD * thd,plugin_ref plugin,void * arg)7643 static my_bool purge_archive_logs_to_handlerton(THD *thd, plugin_ref plugin,
7644 void *arg)
7645 {
7646 const char* to_filename= (const char*) arg;
7647 handlerton *hton= plugin_data(plugin, handlerton *);
7648
7649 if (hton->purge_archive_logs == NULL)
7650 return FALSE;
7651
7652 return hton->purge_archive_logs(hton, 0, to_filename);
7653 }
7654
ha_purge_archive_logs_to(THD * thd,handlerton * db_type,void * args)7655 bool ha_purge_archive_logs_to(THD *thd, handlerton *db_type, void* args)
7656 {
7657 if (db_type == NULL)
7658 return plugin_foreach(thd, purge_archive_logs_to_handlerton,
7659 MYSQL_STORAGE_ENGINE_PLUGIN, args);
7660
7661 return false;
7662 }
7663
7664 /*
7665 Function to check if the conditions for row-based binlogging is
7666 correct for the table.
7667
7668 A row in the given table should be replicated if:
7669 - Row-based replication is enabled in the current thread
7670 - The binlog is enabled
7671 - It is not a temporary table
7672 - The binary log is open
7673 - The database the table resides in shall be binlogged (binlog_*_db rules)
7674 - table is not mysql.event
7675 */
7676
check_table_binlog_row_based(THD * thd,TABLE * table)7677 static bool check_table_binlog_row_based(THD *thd, TABLE *table)
7678 {
7679 if (table->s->cached_row_logging_check == -1)
7680 {
7681 int const check(table->s->tmp_table == NO_TMP_TABLE &&
7682 ! table->no_replicate &&
7683 binlog_filter->db_ok(table->s->db.str));
7684 table->s->cached_row_logging_check= check;
7685 }
7686
7687 DBUG_ASSERT(table->s->cached_row_logging_check == 0 ||
7688 table->s->cached_row_logging_check == 1);
7689
7690 return (thd->is_current_stmt_binlog_format_row() &&
7691 table->s->cached_row_logging_check &&
7692 (thd->variables.option_bits & OPTION_BIN_LOG) &&
7693 mysql_bin_log.is_open());
7694 }
7695
7696
7697 /** @brief
7698 Write table maps for all (manually or automatically) locked tables
7699 to the binary log.
7700
7701 SYNOPSIS
7702 write_locked_table_maps()
7703 thd Pointer to THD structure
7704
7705 DESCRIPTION
7706 This function will generate and write table maps for all tables
7707 that are locked by the thread 'thd'.
7708
7709 RETURN VALUE
7710 0 All OK
7711 1 Failed to write all table maps
7712
7713 SEE ALSO
7714 THD::lock
7715 */
7716
write_locked_table_maps(THD * thd)7717 static int write_locked_table_maps(THD *thd)
7718 {
7719 DBUG_ENTER("write_locked_table_maps");
7720 DBUG_PRINT("enter", ("thd: 0x%lx thd->lock: 0x%lx "
7721 "thd->extra_lock: 0x%lx",
7722 (long) thd, (long) thd->lock, (long) thd->extra_lock));
7723
7724 DBUG_PRINT("debug", ("get_binlog_table_maps(): %d", thd->get_binlog_table_maps()));
7725
7726 if (thd->get_binlog_table_maps() == 0)
7727 {
7728 MYSQL_LOCK *locks[2];
7729 locks[0]= thd->extra_lock;
7730 locks[1]= thd->lock;
7731 for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
7732 {
7733 MYSQL_LOCK const *const lock= locks[i];
7734 if (lock == NULL)
7735 continue;
7736
7737 bool need_binlog_rows_query= thd->variables.binlog_rows_query_log_events;
7738 TABLE **const end_ptr= lock->table + lock->table_count;
7739 for (TABLE **table_ptr= lock->table ;
7740 table_ptr != end_ptr ;
7741 ++table_ptr)
7742 {
7743 TABLE *const table= *table_ptr;
7744 DBUG_PRINT("info", ("Checking table %s", table->s->table_name.str));
7745 if (table->current_lock == F_WRLCK &&
7746 check_table_binlog_row_based(thd, table))
7747 {
7748 /*
7749 We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7750 (e.g. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7751 compatible behavior with the STMT based replication even when
7752 the table is not transactional. In other words, if the operation
7753 fails while executing the insert phase nothing is written to the
7754 binlog.
7755
7756 Note that at this point, we check the type of a set of tables to
7757 create the table map events. In the function binlog_log_row(),
7758 which calls the current function, we check the type of the table
7759 of the current row.
7760 */
7761 bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
7762 table->file->has_transactions();
7763 int const error= thd->binlog_write_table_map(table, has_trans,
7764 need_binlog_rows_query);
7765 /* Binlog Rows_query log event once for one statement which updates
7766 two or more tables.*/
7767 if (need_binlog_rows_query)
7768 need_binlog_rows_query= FALSE;
7769 /*
7770 If an error occurs, it is the responsibility of the caller to
7771 roll back the transaction.
7772 */
7773 if (unlikely(error))
7774 DBUG_RETURN(1);
7775 }
7776 }
7777 }
7778 }
7779 DBUG_RETURN(0);
7780 }
7781
7782
7783 typedef bool Log_func(THD*, TABLE*, bool,
7784 const uchar*, const uchar*);
7785
binlog_log_row(TABLE * table,const uchar * before_record,const uchar * after_record,Log_func * log_func)7786 int binlog_log_row(TABLE* table,
7787 const uchar *before_record,
7788 const uchar *after_record,
7789 Log_func *log_func)
7790 {
7791 bool error= 0;
7792 THD *const thd= table->in_use;
7793
7794 if (check_table_binlog_row_based(thd, table))
7795 {
7796 DBUG_DUMP("read_set 10", (uchar*) table->read_set->bitmap,
7797 (table->s->fields + 7) / 8);
7798
7799 /*
7800 If there are no table maps written to the binary log, this is
7801 the first row handled in this statement. In that case, we need
7802 to write table maps for all locked tables to the binary log.
7803 */
7804 if (likely(!(error= write_locked_table_maps(thd))))
7805 {
7806 /*
7807 We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7808 (i.e. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7809 compatible behavior with the STMT based replication even when
7810 the table is not transactional. In other words, if the operation
7811 fails while executing the insert phase nothing is written to the
7812 binlog.
7813 */
7814 bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
7815 table->file->has_transactions();
7816 error=
7817 (*log_func)(thd, table, has_trans, before_record, after_record);
7818 }
7819 }
7820 return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
7821 }
7822
ha_external_lock(THD * thd,int lock_type)7823 int handler::ha_external_lock(THD *thd, int lock_type)
7824 {
7825 int error;
7826 DBUG_ENTER("handler::ha_external_lock");
7827 /*
7828 Whether this is lock or unlock, this should be true, and is to verify that
7829 if get_auto_increment() was called (thus may have reserved intervals or
7830 taken a table lock), ha_release_auto_increment() was too.
7831 */
7832 DBUG_ASSERT(next_insert_id == 0);
7833 /* Consecutive calls for lock without unlocking in between is not allowed */
7834 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7835 ((lock_type != F_UNLCK && m_lock_type == F_UNLCK) ||
7836 lock_type == F_UNLCK));
7837 /* SQL HANDLER call locks/unlock while scanning (RND/INDEX). */
7838 DBUG_ASSERT(inited == NONE || table->open_by_handler);
7839
7840 if (MYSQL_HANDLER_RDLOCK_START_ENABLED() ||
7841 MYSQL_HANDLER_WRLOCK_START_ENABLED() ||
7842 MYSQL_HANDLER_UNLOCK_START_ENABLED())
7843 {
7844 if (lock_type == F_RDLCK)
7845 {
7846 MYSQL_HANDLER_RDLOCK_START(table_share->db.str,
7847 table_share->table_name.str);
7848 }
7849 else if (lock_type == F_WRLCK)
7850 {
7851 MYSQL_HANDLER_WRLOCK_START(table_share->db.str,
7852 table_share->table_name.str);
7853 }
7854 else if (lock_type == F_UNLCK)
7855 {
7856 MYSQL_HANDLER_UNLOCK_START(table_share->db.str,
7857 table_share->table_name.str);
7858 }
7859 }
7860
7861 ha_statistic_increment(&SSV::ha_external_lock_count);
7862
7863 MYSQL_TABLE_LOCK_WAIT(m_psi, PSI_TABLE_EXTERNAL_LOCK, lock_type,
7864 { error= external_lock(thd, lock_type); })
7865
7866 /*
7867 We cache the table flags if the locking succeeded. Otherwise, we
7868 keep them as they were when they were fetched in ha_open().
7869 */
7870
7871 if (error == 0)
7872 {
7873 /*
7874 The lock type is needed by MRR when creating a clone of this handler
7875 object.
7876 */
7877 m_lock_type= lock_type;
7878 cached_table_flags= table_flags();
7879 }
7880
7881 if (MYSQL_HANDLER_RDLOCK_DONE_ENABLED() ||
7882 MYSQL_HANDLER_WRLOCK_DONE_ENABLED() ||
7883 MYSQL_HANDLER_UNLOCK_DONE_ENABLED())
7884 {
7885 if (lock_type == F_RDLCK)
7886 {
7887 MYSQL_HANDLER_RDLOCK_DONE(error);
7888 }
7889 else if (lock_type == F_WRLCK)
7890 {
7891 MYSQL_HANDLER_WRLOCK_DONE(error);
7892 }
7893 else if (lock_type == F_UNLCK)
7894 {
7895 MYSQL_HANDLER_UNLOCK_DONE(error);
7896 }
7897 }
7898 DBUG_RETURN(error);
7899 }
7900
7901
7902 /** @brief
7903 Check handler usage and reset state of file to after 'open'
7904
7905 @note can be called regardless of it is locked or not.
7906 */
ha_reset()7907 int handler::ha_reset()
7908 {
7909 DBUG_ENTER("handler::ha_reset");
7910 /* Check that we have called all proper deallocation functions */
7911 DBUG_ASSERT((uchar*) table->def_read_set.bitmap +
7912 table->s->column_bitmap_size ==
7913 (uchar*) table->def_write_set.bitmap);
7914 DBUG_ASSERT(bitmap_is_set_all(&table->s->all_set));
7915 DBUG_ASSERT(table->key_read == 0);
7916 /* ensure that ha_index_end / ha_rnd_end has been called */
7917 DBUG_ASSERT(inited == NONE);
7918 /* Free cache used by filesort */
7919 free_io_cache(table);
7920 /* reset the bitmaps to point to defaults */
7921 table->default_column_bitmaps();
7922 /* Reset information about pushed engine conditions */
7923 pushed_cond= NULL;
7924 /* Reset information about pushed index conditions */
7925 cancel_pushed_idx_cond();
7926
7927 const int retval= reset();
7928 DBUG_RETURN(retval);
7929 }
7930
7931
ha_write_row(uchar * buf)7932 int handler::ha_write_row(uchar *buf)
7933 {
7934 int error;
7935 Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
7936 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7937 m_lock_type == F_WRLCK);
7938
7939 DBUG_ENTER("handler::ha_write_row");
7940 DEBUG_SYNC(ha_thd(), "start_ha_write_row");
7941 DBUG_EXECUTE_IF("inject_error_ha_write_row",
7942 DBUG_RETURN(HA_ERR_INTERNAL_ERROR); );
7943
7944 MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str);
7945 mark_trx_read_write();
7946
7947 MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_WRITE_ROW, MAX_KEY, 0,
7948 { error= write_row(buf); })
7949
7950 MYSQL_INSERT_ROW_DONE(error);
7951 if (unlikely(error))
7952 DBUG_RETURN(error);
7953
7954 if (unlikely(error= binlog_log_row(table, 0, buf, log_func)))
7955 DBUG_RETURN(error); /* purecov: inspected */
7956
7957 if (likely(!is_fake_change_enabled(ha_thd())))
7958 rows_changed++;
7959
7960 DEBUG_SYNC_C("ha_write_row_end");
7961 DBUG_RETURN(0);
7962 }
7963
7964
ha_update_row(const uchar * old_data,uchar * new_data)7965 int handler::ha_update_row(const uchar *old_data, uchar *new_data)
7966 {
7967 int error;
7968 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7969 m_lock_type == F_WRLCK);
7970 Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
7971
7972 /*
7973 Some storage engines require that the new record is in record[0]
7974 (and the old record is in record[1]).
7975 */
7976 DBUG_ASSERT(new_data == table->record[0]);
7977 DBUG_ASSERT(old_data == table->record[1]);
7978
7979 MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
7980 mark_trx_read_write();
7981
7982 MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_UPDATE_ROW, active_index, 0,
7983 { error= update_row(old_data, new_data);})
7984
7985 MYSQL_UPDATE_ROW_DONE(error);
7986 if (unlikely(error))
7987 return error;
7988 if (unlikely(error= binlog_log_row(table, old_data, new_data, log_func)))
7989 return error;
7990
7991 if (likely(!is_fake_change_enabled(ha_thd())))
7992 rows_changed++;
7993
7994 return 0;
7995 }
7996
ha_delete_row(const uchar * buf)7997 int handler::ha_delete_row(const uchar *buf)
7998 {
7999 int error;
8000 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
8001 m_lock_type == F_WRLCK);
8002 Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
8003 /*
8004 Normally table->record[0] is used, but sometimes table->record[1] is used.
8005 */
8006 DBUG_ASSERT(buf == table->record[0] ||
8007 buf == table->record[1]);
8008 DBUG_EXECUTE_IF("inject_error_ha_delete_row",
8009 return HA_ERR_INTERNAL_ERROR; );
8010
8011 MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
8012 mark_trx_read_write();
8013
8014 MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_DELETE_ROW, active_index, 0,
8015 { error= delete_row(buf);})
8016
8017 MYSQL_DELETE_ROW_DONE(error);
8018 if (unlikely(error))
8019 return error;
8020 if (unlikely(error= binlog_log_row(table, buf, 0, log_func)))
8021 return error;
8022
8023 if (likely(!is_fake_change_enabled(ha_thd())))
8024 rows_changed++;
8025
8026 return 0;
8027 }
8028
8029 /**
8030 @brief Offload an update to the storage engine. See handler::fast_update()
8031 for details.
8032 */
ha_fast_update(THD * thd,List<Item> & update_fields,List<Item> & update_values,Item * conds)8033 int handler::ha_fast_update(THD *thd,
8034 List<Item> &update_fields,
8035 List<Item> &update_values,
8036 Item *conds)
8037 {
8038 int error= fast_update(thd, update_fields, update_values, conds);
8039 if (error == 0)
8040 mark_trx_read_write();
8041 return error;
8042 }
8043
8044 /**
8045 @brief Offload an upsert to the storage engine. See handler::upsert()
8046 for details.
8047 */
ha_upsert(THD * thd,List<Item> & update_fields,List<Item> & update_values)8048 int handler::ha_upsert(THD *thd,
8049 List<Item> &update_fields,
8050 List<Item> &update_values)
8051 {
8052 int error= upsert(thd, update_fields, update_values);
8053 if (error == 0)
8054 mark_trx_read_write();
8055 return error;
8056 }
8057
8058 /** @brief
8059 use_hidden_primary_key() is called in case of an update/delete when
8060 (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
8061 but we don't have a primary key
8062 */
use_hidden_primary_key()8063 void handler::use_hidden_primary_key()
8064 {
8065 /* fallback to use all columns in the table to identify row */
8066 table->use_all_columns();
8067 }
8068
8069
8070 /**
8071 Get an initialized ha_share.
8072
8073 @return Initialized ha_share
8074 @retval NULL ha_share is not yet initialized.
8075 @retval != NULL previous initialized ha_share.
8076
8077 @note
8078 If not a temp table, then LOCK_ha_data must be held.
8079 */
8080
get_ha_share_ptr()8081 Handler_share *handler::get_ha_share_ptr()
8082 {
8083 DBUG_ENTER("handler::get_ha_share_ptr");
8084 DBUG_ASSERT(ha_share && table_share);
8085
8086 #ifndef DBUG_OFF
8087 if (table_share->tmp_table == NO_TMP_TABLE)
8088 mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
8089 #endif
8090
8091 DBUG_RETURN(*ha_share);
8092 }
8093
8094
8095 /**
8096 Set ha_share to be used by all instances of the same table/partition.
8097
8098 @param ha_share Handler_share to be shared.
8099
8100 @note
8101 If not a temp table, then LOCK_ha_data must be held.
8102 */
8103
set_ha_share_ptr(Handler_share * arg_ha_share)8104 void handler::set_ha_share_ptr(Handler_share *arg_ha_share)
8105 {
8106 DBUG_ENTER("handler::set_ha_share_ptr");
8107 DBUG_ASSERT(ha_share);
8108 #ifndef DBUG_OFF
8109 if (table_share->tmp_table == NO_TMP_TABLE)
8110 mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
8111 #endif
8112
8113 *ha_share= arg_ha_share;
8114 DBUG_VOID_RETURN;
8115 }
8116
8117
8118 /**
8119 Take a lock for protecting shared handler data.
8120 */
8121
lock_shared_ha_data()8122 void handler::lock_shared_ha_data()
8123 {
8124 DBUG_ASSERT(table_share);
8125 if (table_share->tmp_table == NO_TMP_TABLE)
8126 mysql_mutex_lock(&table_share->LOCK_ha_data);
8127 }
8128
8129
8130 /**
8131 Release lock for protecting ha_share.
8132 */
8133
unlock_shared_ha_data()8134 void handler::unlock_shared_ha_data()
8135 {
8136 DBUG_ASSERT(table_share);
8137 if (table_share->tmp_table == NO_TMP_TABLE)
8138 mysql_mutex_unlock(&table_share->LOCK_ha_data);
8139 }
8140
8141
8142 /** @brief
8143 Dummy function which accept information about log files which is not need
8144 by handlers
8145 */
signal_log_not_needed(struct handlerton,char * log_file)8146 void signal_log_not_needed(struct handlerton, char *log_file)
8147 {
8148 DBUG_ENTER("signal_log_not_needed");
8149 DBUG_PRINT("enter", ("logfile '%s'", log_file));
8150 DBUG_VOID_RETURN;
8151 }
8152
is_using_prohibited_gap_locks(TABLE * table,bool using_full_primary_key) const8153 bool handler::is_using_prohibited_gap_locks(TABLE* table,
8154 bool using_full_primary_key) const
8155 {
8156 THD* thd = table->in_use;
8157 thr_lock_type lock_type = table->reginfo.lock_type;
8158
8159 if (!using_full_primary_key
8160 && has_transactions()
8161 && !has_gap_locks()
8162 && thd_tx_isolation(thd) >= ISO_REPEATABLE_READ
8163 && !thd->rli_slave
8164 && (thd->lex->table_count >= 2 || thd->in_multi_stmt_transaction_mode())
8165 && (lock_type >= TL_WRITE_ALLOW_WRITE ||
8166 lock_type == TL_READ_WITH_SHARED_LOCKS ||
8167 lock_type == TL_READ_NO_INSERT ||
8168 (lock_type != TL_IGNORE && thd->lex->sql_command != SQLCOM_SELECT)))
8169 {
8170 my_printf_error(ER_UNKNOWN_ERROR,
8171 "Using Gap Lock without full unique key in multi-table "
8172 "or multi-statement transactions is not "
8173 "allowed. You need to either rewrite queries to use "
8174 "all unique key columns in WHERE equal conditions, or "
8175 "rewrite to single-table, single-statement "
8176 "transaction. Query: %s",
8177 MYF(0), thd->query());
8178 return true;
8179 }
8180 return false;
8181 }
8182
8183
8184 #ifdef TRANS_LOG_MGM_EXAMPLE_CODE
8185 /*
8186 Example of transaction log management functions based on assumption that logs
8187 placed into a directory
8188 */
8189 #include <my_dir.h>
8190 #include <my_sys.h>
example_of_iterator_using_for_logs_cleanup(handlerton * hton)8191 int example_of_iterator_using_for_logs_cleanup(handlerton *hton)
8192 {
8193 void *buffer;
8194 int res= 1;
8195 struct handler_iterator iterator;
8196 struct handler_log_file_data data;
8197
8198 if (!hton->create_iterator)
8199 return 1; /* iterator creator is not supported */
8200
8201 if ((*hton->create_iterator)(hton, HA_TRANSACTLOG_ITERATOR, &iterator) !=
8202 HA_ITERATOR_OK)
8203 {
8204 /* error during creation of log iterator or iterator is not supported */
8205 return 1;
8206 }
8207 while((*iterator.next)(&iterator, (void*)&data) == 0)
8208 {
8209 printf("%s\n", data.filename.str);
8210 if (data.status == HA_LOG_STATUS_FREE &&
8211 mysql_file_delete(INSTRUMENT_ME,
8212 data.filename.str, MYF(MY_WME)))
8213 goto err;
8214 }
8215 res= 0;
8216 err:
8217 (*iterator.destroy)(&iterator);
8218 return res;
8219 }
8220
8221
8222 /*
8223 Here we should get info from handler where it save logs but here is
8224 just example, so we use constant.
8225 IMHO FN_ROOTDIR ("/") is safe enough for example, because nobody has
8226 rights on it except root and it consist of directories only at lest for
8227 *nix (sorry, can't find windows-safe solution here, but it is only example).
8228 */
8229 #define fl_dir FN_ROOTDIR
8230
8231
8232 /** @brief
8233 Dummy function to return log status should be replaced by function which
8234 really detect the log status and check that the file is a log of this
8235 handler.
8236 */
fl_get_log_status(char * log)8237 enum log_status fl_get_log_status(char *log)
8238 {
8239 MY_STAT stat_buff;
8240 if (mysql_file_stat(INSTRUMENT_ME, log, &stat_buff, MYF(0)))
8241 return HA_LOG_STATUS_INUSE;
8242 return HA_LOG_STATUS_NOSUCHLOG;
8243 }
8244
8245
8246 struct fl_buff
8247 {
8248 LEX_STRING *names;
8249 enum log_status *statuses;
8250 uint32 entries;
8251 uint32 current;
8252 };
8253
8254
fl_log_iterator_next(struct handler_iterator * iterator,void * iterator_object)8255 int fl_log_iterator_next(struct handler_iterator *iterator,
8256 void *iterator_object)
8257 {
8258 struct fl_buff *buff= (struct fl_buff *)iterator->buffer;
8259 struct handler_log_file_data *data=
8260 (struct handler_log_file_data *) iterator_object;
8261 if (buff->current >= buff->entries)
8262 return 1;
8263 data->filename= buff->names[buff->current];
8264 data->status= buff->statuses[buff->current];
8265 buff->current++;
8266 return 0;
8267 }
8268
8269
fl_log_iterator_destroy(struct handler_iterator * iterator)8270 void fl_log_iterator_destroy(struct handler_iterator *iterator)
8271 {
8272 my_free(iterator->buffer);
8273 }
8274
8275
8276 /** @brief
8277 returns buffer, to be assigned in handler_iterator struct
8278 */
8279 enum handler_create_iterator_result
fl_log_iterator_buffer_init(struct handler_iterator * iterator)8280 fl_log_iterator_buffer_init(struct handler_iterator *iterator)
8281 {
8282 MY_DIR *dirp;
8283 struct fl_buff *buff;
8284 char *name_ptr;
8285 uchar *ptr;
8286 FILEINFO *file;
8287 uint32 i;
8288
8289 /* to be able to make my_free without crash in case of error */
8290 iterator->buffer= 0;
8291
8292 if (!(dirp = my_dir(fl_dir, MYF(0))))
8293 {
8294 return HA_ITERATOR_ERROR;
8295 }
8296 if ((ptr= (uchar*)my_malloc(ALIGN_SIZE(sizeof(fl_buff)) +
8297 ((ALIGN_SIZE(sizeof(LEX_STRING)) +
8298 sizeof(enum log_status) +
8299 + FN_REFLEN + 1) *
8300 (uint) dirp->number_off_files),
8301 MYF(0))) == 0)
8302 {
8303 return HA_ITERATOR_ERROR;
8304 }
8305 buff= (struct fl_buff *)ptr;
8306 buff->entries= buff->current= 0;
8307 ptr= ptr + (ALIGN_SIZE(sizeof(fl_buff)));
8308 buff->names= (LEX_STRING*) (ptr);
8309 ptr= ptr + ((ALIGN_SIZE(sizeof(LEX_STRING)) *
8310 (uint) dirp->number_off_files));
8311 buff->statuses= (enum log_status *)(ptr);
8312 name_ptr= (char *)(ptr + (sizeof(enum log_status) *
8313 (uint) dirp->number_off_files));
8314 for (i=0 ; i < (uint) dirp->number_off_files ; i++)
8315 {
8316 enum log_status st;
8317 file= dirp->dir_entry + i;
8318 if ((file->name[0] == '.' &&
8319 ((file->name[1] == '.' && file->name[2] == '\0') ||
8320 file->name[1] == '\0')))
8321 continue;
8322 if ((st= fl_get_log_status(file->name)) == HA_LOG_STATUS_NOSUCHLOG)
8323 continue;
8324 name_ptr= strxnmov(buff->names[buff->entries].str= name_ptr,
8325 FN_REFLEN, fl_dir, file->name, NullS);
8326 buff->names[buff->entries].length= (name_ptr -
8327 buff->names[buff->entries].str);
8328 buff->statuses[buff->entries]= st;
8329 buff->entries++;
8330 }
8331
8332 iterator->buffer= buff;
8333 iterator->next= &fl_log_iterator_next;
8334 iterator->destroy= &fl_log_iterator_destroy;
8335 return HA_ITERATOR_OK;
8336 }
8337
8338
8339 /* An example of a iterator creator */
8340 enum handler_create_iterator_result
fl_create_iterator(enum handler_iterator_type type,struct handler_iterator * iterator)8341 fl_create_iterator(enum handler_iterator_type type,
8342 struct handler_iterator *iterator)
8343 {
8344 switch(type) {
8345 case HA_TRANSACTLOG_ITERATOR:
8346 return fl_log_iterator_buffer_init(iterator);
8347 default:
8348 return HA_ITERATOR_UNSUPPORTED;
8349 }
8350 }
8351 #endif /*TRANS_LOG_MGM_EXAMPLE_CODE*/
8352
8353
8354 /**
8355 Report a warning for FK constraint violation.
8356
8357 @param thd Thread handle.
8358 @param table table on which the operation is performed.
8359 @param error handler error number.
8360 */
warn_fk_constraint_violation(THD * thd,TABLE * table,int error)8361 void warn_fk_constraint_violation(THD *thd,TABLE *table, int error)
8362 {
8363 String str;
8364 switch(error) {
8365 case HA_ERR_ROW_IS_REFERENCED:
8366 table->file->get_error_message(error, &str);
8367 push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
8368 ER_ROW_IS_REFERENCED_2, str.c_ptr_safe());
8369 break;
8370 case HA_ERR_NO_REFERENCED_ROW:
8371 table->file->get_error_message(error, &str);
8372 push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
8373 ER_NO_REFERENCED_ROW_2, str.c_ptr_safe());
8374 break;
8375 }
8376 }
8377
8378
8379 /**
8380 Checks if the file name is reserved word used by SE by invoking
8381 the handlerton method.
8382
8383 @param unused1 thread handler which is unused.
8384 @param plugin SE plugin.
8385 @param name Database name.
8386
8387 @retval true If the name is reserved word.
8388 @retval false If the name is not reserved word.
8389 */
is_reserved_db_name_handlerton(THD * unused1,plugin_ref plugin,void * name)8390 static my_bool is_reserved_db_name_handlerton(THD *unused1, plugin_ref plugin,
8391 void *name)
8392 {
8393 handlerton *hton= plugin_data(plugin, handlerton *);
8394 if (hton->state == SHOW_OPTION_YES && hton->is_reserved_db_name)
8395 return (hton->is_reserved_db_name(hton, (const char *)name));
8396 return false;
8397 }
8398
8399
8400 /**
8401 Check if the file name is reserved word used by SE.
8402
8403 @param name Database name.
8404
8405 @retval true If the name is a reserved word.
8406 @retval false If the name is not a reserved word.
8407 */
ha_check_reserved_db_name(const char * name)8408 bool ha_check_reserved_db_name(const char* name)
8409 {
8410 return (plugin_foreach(NULL, is_reserved_db_name_handlerton,
8411 MYSQL_STORAGE_ENGINE_PLUGIN, (char *)name));
8412 }
8413