1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software Foundation,
21    Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22 
23 /** @file handler.cc
24 
25     @brief
26   Handler-calling-functions
27 */
28 
29 #include "handler.h"
30 
31 #include "my_bit.h"                   // my_count_bits
32 #include "myisam.h"                   // TT_FOR_UPGRADE
33 #include "mysql_version.h"            // MYSQL_VERSION_ID
34 
35 #include "binlog.h"                   // mysql_bin_log
36 #include "debug_sync.h"               // DEBUG_SYNC
37 #include "discover.h"                 // writefrm
38 #include "log.h"                      // sql_print_error
39 #include "log_event.h"                // Write_rows_log_event
40 #include "my_bitmap.h"                // MY_BITMAP
41 #include "probes_mysql.h"             // MYSQL_HANDLER_WRLOCK_START
42 #include "opt_costconstantcache.h"    // reload_optimizer_cost_constants
43 #include "rpl_handler.h"              // RUN_HOOK
44 #include "sql_base.h"                 // free_io_cache
45 #include "sql_parse.h"                // check_stack_overrun
46 #include "sql_plugin.h"               // plugin_foreach
47 #include "sql_table.h"                // build_table_filename
48 #include "transaction.h"              // trans_commit_implicit
49 #include "trigger_def.h"              // TRG_EXT
50 #include "sql_select.h"               // actual_key_parts
51 #include "rpl_write_set_handler.h"    // add_pke
52 #include "auth_common.h"              // check_readonly() and SUPER_ACL
53 
54 
55 #include "pfs_file_provider.h"
56 #include "mysql/psi/mysql_file.h"
57 
58 #include <pfs_table_provider.h>
59 #include <mysql/psi/mysql_table.h>
60 
61 #include <pfs_transaction_provider.h>
62 #include <mysql/psi/mysql_transaction.h>
63 #include "opt_hints.h"
64 
65 #ifdef WITH_WSREP
66 #include "partitioning/partition_handler.h"
67 #endif
68 #include <list>
69 #include <cstring>
70 #include <string>
71 #include <boost/foreach.hpp>
72 #include <boost/tokenizer.hpp>
73 #include <boost/algorithm/string.hpp>
74 
75 /**
76   @def MYSQL_TABLE_IO_WAIT
77   Instrumentation helper for table io_waits.
78   Note that this helper is intended to be used from
79   within the handler class only, as it uses members
80   from @c handler
81   Performance schema events are instrumented as follows:
82   - in non batch mode, one event is generated per call
83   - in batch mode, the number of rows affected is saved
84   in @c m_psi_numrows, so that @c end_psi_batch_mode()
85   generates a single event for the batch.
86   @param OP the table operation to be performed
87   @param INDEX the table index used if any, or MAX_KEY.
88   @param PAYLOAD instrumented code to execute
89   @sa handler::end_psi_batch_mode.
90 */
91 #ifdef HAVE_PSI_TABLE_INTERFACE
92   #define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD)     \
93     {                                                         \
94       if (m_psi != NULL)                                      \
95       {                                                       \
96         switch (m_psi_batch_mode)                             \
97         {                                                     \
98           case PSI_BATCH_MODE_NONE:                           \
99           {                                                   \
100             PSI_table_locker *sub_locker= NULL;               \
101             PSI_table_locker_state reentrant_safe_state;      \
102             sub_locker= PSI_TABLE_CALL(start_table_io_wait)   \
103               (& reentrant_safe_state, m_psi, OP, INDEX,      \
104                __FILE__, __LINE__);                           \
105             PAYLOAD                                           \
106             if (sub_locker != NULL)                           \
107               PSI_TABLE_CALL(end_table_io_wait)               \
108                 (sub_locker, 1);                              \
109             break;                                            \
110           }                                                   \
111           case PSI_BATCH_MODE_STARTING:                       \
112           {                                                   \
113             m_psi_locker= PSI_TABLE_CALL(start_table_io_wait) \
114               (& m_psi_locker_state, m_psi, OP, INDEX,        \
115                __FILE__, __LINE__);                           \
116             PAYLOAD                                           \
117             if (!RESULT)                                      \
118               m_psi_numrows++;                                \
119             m_psi_batch_mode= PSI_BATCH_MODE_STARTED;         \
120             break;                                            \
121           }                                                   \
122           case PSI_BATCH_MODE_STARTED:                        \
123           default:                                            \
124           {                                                   \
125             assert(m_psi_batch_mode                           \
126                    == PSI_BATCH_MODE_STARTED);                \
127             PAYLOAD                                           \
128             if (!RESULT)                                      \
129               m_psi_numrows++;                                \
130             break;                                            \
131           }                                                   \
132         }                                                     \
133       }                                                       \
134       else                                                    \
135       {                                                       \
136         PAYLOAD                                               \
137       }                                                       \
138     }
139 #else
140   #define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD) \
141     PAYLOAD
142 #endif
143 
144 /**
145   @def MYSQL_TABLE_LOCK_WAIT
146   Instrumentation helper for table io_waits.
147   @param OP the table operation to be performed
148   @param FLAGS per table operation flags.
149   @param PAYLOAD the code to instrument.
150   @sa MYSQL_END_TABLE_WAIT.
151 */
152 #ifdef HAVE_PSI_TABLE_INTERFACE
153   #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD)    \
154     {                                                  \
155       if (m_psi != NULL)                               \
156       {                                                \
157         PSI_table_locker *locker;                      \
158         PSI_table_locker_state state;                  \
159         locker= PSI_TABLE_CALL(start_table_lock_wait)  \
160           (& state, m_psi, OP, FLAGS,                  \
161           __FILE__, __LINE__);                         \
162         PAYLOAD                                        \
163         if (locker != NULL)                            \
164           PSI_TABLE_CALL(end_table_lock_wait)(locker); \
165       }                                                \
166       else                                             \
167       {                                                \
168         PAYLOAD                                        \
169       }                                                \
170     }
171 #else
172   #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) \
173     PAYLOAD
174 #endif
175 
176 using std::min;
177 using std::max;
178 using std::list;
179 
180 // This is a temporary backporting fix.
181 #ifndef HAVE_LOG2
182 /*
183   This will be slightly slower and perhaps a tiny bit less accurate than
184   doing it the IEEE754 way but log2() should be available on C99 systems.
185 */
log2(double x)186 inline double log2(double x)
187 {
188   return (log(x) / M_LN2);
189 }
190 #endif
191 #ifdef WITH_WSREP
192 #include "wsrep_mysqld.h"
193 #include "wsrep_xid.h"
194 #endif
195 /*
196   While we have legacy_db_type, we have this array to
197   check for dups and to find handlerton from legacy_db_type.
198   Remove when legacy_db_type is finally gone
199 */
200 st_plugin_int *hton2plugin[MAX_HA];
201 
202 /**
203   Array allowing to check if handlerton is builtin without
204   acquiring LOCK_plugin.
205 */
206 static bool builtin_htons[MAX_HA];
207 
ha_resolve_storage_engine_name(const handlerton * db_type)208 const char *ha_resolve_storage_engine_name(const handlerton *db_type)
209 {
210   return db_type == NULL ? "UNKNOWN" : hton2plugin[db_type->slot]->name.str;
211 }
212 
213 static handlerton *installed_htons[128];
214 
215 #define BITMAP_STACKBUF_SIZE (128/8)
216 
217 KEY_CREATE_INFO default_key_create_info=
218   { HA_KEY_ALG_UNDEF, 0, {NullS, 0}, {NullS, 0}, true };
219 
220 /* number of entries in handlertons[] */
221 ulong total_ha= 0;
222 /* number of storage engines (from handlertons[]) that support 2pc */
223 ulong total_ha_2pc= 0;
224 /* size of savepoint storage area (see ha_init) */
225 ulong savepoint_alloc_size= 0;
226 
227 static const LEX_STRING sys_table_aliases[]=
228 {
229   { C_STRING_WITH_LEN("INNOBASE") },  { C_STRING_WITH_LEN("INNODB") },
230   { C_STRING_WITH_LEN("NDB") },       { C_STRING_WITH_LEN("NDBCLUSTER") },
231   { C_STRING_WITH_LEN("HEAP") },      { C_STRING_WITH_LEN("MEMORY") },
232   { C_STRING_WITH_LEN("MERGE") },     { C_STRING_WITH_LEN("MRG_MYISAM") },
233   {NullS, 0}
234 };
235 
236 const char *ha_row_type[] = {
237   "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT",
238   /* Reserved to be "PAGE" in future versions */ "?",
239   "?","?","?"
240 };
241 
242 const char *tx_isolation_names[] =
243 { "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
244   NullS};
245 TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
246 			       tx_isolation_names, NULL};
247 
248 #ifndef NDEBUG
249 
ha_legacy_type_name(legacy_db_type legacy_type)250 const char *ha_legacy_type_name(legacy_db_type legacy_type)
251 {
252   switch (legacy_type)
253   {
254   case DB_TYPE_UNKNOWN:
255     return "DB_TYPE_UNKNOWN";
256   case DB_TYPE_DIAB_ISAM:
257     return "DB_TYPE_DIAB_ISAM";
258   case DB_TYPE_HASH:
259     return "DB_TYPE_HASH";
260   case DB_TYPE_MISAM:
261     return "DB_TYPE_MISAM";
262   case DB_TYPE_PISAM:
263     return "DB_TYPE_PISAM";
264   case DB_TYPE_RMS_ISAM:
265     return "DB_TYPE_RMS_ISAM";
266   case DB_TYPE_HEAP:
267     return "DB_TYPE_HEAP";
268   case DB_TYPE_ISAM:
269     return "DB_TYPE_ISAM";
270   case DB_TYPE_MRG_ISAM:
271     return "DB_TYPE_MRG_ISAM";
272   case DB_TYPE_MYISAM:
273     return "DB_TYPE_MYISAM";
274   case DB_TYPE_MRG_MYISAM:
275     return "DB_TYPE_MRG_MYISAM";
276   case DB_TYPE_BERKELEY_DB:
277     return "DB_TYPE_BERKELEY_DB";
278   case DB_TYPE_INNODB:
279     return "DB_TYPE_INNODB";
280   case DB_TYPE_GEMINI:
281     return "DB_TYPE_GEMINI";
282   case DB_TYPE_NDBCLUSTER:
283     return "DB_TYPE_NDBCLUSTER";
284   case DB_TYPE_EXAMPLE_DB:
285     return "DB_TYPE_EXAMPLE_DB";
286   case DB_TYPE_ARCHIVE_DB:
287     return "DB_TYPE_ARCHIVE_DB";
288   case DB_TYPE_CSV_DB:
289     return "DB_TYPE_CSV_DB";
290   case DB_TYPE_FEDERATED_DB:
291     return "DB_TYPE_FEDERATED_DB";
292   case DB_TYPE_BLACKHOLE_DB:
293     return "DB_TYPE_BLACKHOLE_DB";
294   case DB_TYPE_PARTITION_DB:
295     return "DB_TYPE_PARTITION_DB";
296   case DB_TYPE_BINLOG:
297     return "DB_TYPE_BINLOG";
298   case DB_TYPE_SOLID:
299     return "DB_TYPE_SOLID";
300   case DB_TYPE_PBXT:
301     return "DB_TYPE_PBXT";
302   case DB_TYPE_TABLE_FUNCTION:
303     return "DB_TYPE_TABLE_FUNCTION";
304   case DB_TYPE_MEMCACHE:
305     return "DB_TYPE_MEMCACHE";
306   case DB_TYPE_FALCON:
307     return "DB_TYPE_FALCON";
308   case DB_TYPE_MARIA:
309     return "DB_TYPE_MARIA";
310   case DB_TYPE_PERFORMANCE_SCHEMA:
311     return "DB_TYPE_PERFORMANCE_SCHEMA";
312   default:
313     return "DB_TYPE_DYNAMIC";
314   }
315 }
316 #endif
317 
318 /**
319   Database name that hold most of mysqld system tables.
320   Current code assumes that, there exists only some
321   specific "database name" designated as system database.
322 */
323 const char* mysqld_system_database= "mysql";
324 
325 // System tables that belong to mysqld_system_database.
326 st_handler_tablename mysqld_system_tables[]= {
327   {mysqld_system_database, "db"},
328   {mysqld_system_database, "user"},
329   {mysqld_system_database, "host"},
330   {mysqld_system_database, "func"},
331   {mysqld_system_database, "proc"},
332   {mysqld_system_database, "event"},
333   {mysqld_system_database, "plugin"},
334   {mysqld_system_database, "servers"},
335   {mysqld_system_database, "procs_priv"},
336   {mysqld_system_database, "tables_priv"},
337   {mysqld_system_database, "proxies_priv"},
338   {mysqld_system_database, "columns_priv"},
339   {mysqld_system_database, "time_zone"},
340   {mysqld_system_database, "time_zone_name"},
341   {mysqld_system_database, "time_zone_leap_second"},
342   {mysqld_system_database, "time_zone_transition"},
343   {mysqld_system_database, "time_zone_transition_type"},
344   {mysqld_system_database, "help_category"},
345   {mysqld_system_database, "help_keyword"},
346   {mysqld_system_database, "help_relation"},
347   {mysqld_system_database, "help_topic"},
348   {mysqld_system_database, "innodb_table_stats"},
349   {mysqld_system_database, "innodb_index_stats"},
350   {(const char *)NULL, (const char *)NULL} /* This must be at the end */
351 };
352 
353 /**
354   This static pointer holds list of system databases from SQL layer and
355   various SE's. The required memory is allocated once, and never freed.
356 */
357 static const char **known_system_databases= NULL;
358 static const char **ha_known_system_databases();
359 
360 // Called for each SE to get SE specific system database.
361 static my_bool system_databases_handlerton(THD *unused, plugin_ref plugin,
362                                            void *arg);
363 
364 // Called for each SE to check if given db.table_name is a system table.
365 static my_bool check_engine_system_table_handlerton(THD *unused,
366                                                     plugin_ref plugin,
367                                                     void *arg);
368 /**
369   Structure used by SE during check for system table.
370   This structure is passed to each SE handlerton and the status (OUT param)
371   is collected.
372 */
373 struct st_sys_tbl_chk_params
374 {
375   const char *db;                             // IN param
376   const char *table_name;                     // IN param
377   bool is_sql_layer_system_table;             // IN param
378   legacy_db_type db_type;                     // IN param
379 
380   enum enum_status
381   {
382     // db.table_name is user table.
383     USER_TABLE,
384     /*
385       db.table_name is a system table,
386       but may not be supported by SE.
387     */
388     SYSTEM_TABLE,
389     /*
390       db.table_name is a system table,
391       and is supported by SE.
392     */
393     SE_SUPPORTED_SYSTEM_TABLE
394   } status;                                    // OUT param
395 };
396 
397 
ha_default_plugin(THD * thd)398 static plugin_ref ha_default_plugin(THD *thd)
399 {
400   if (thd->variables.table_plugin)
401     return thd->variables.table_plugin;
402   return my_plugin_lock(thd, &global_system_variables.table_plugin);
403 }
404 
405 
406 /** @brief
407   Return the default storage engine handlerton used for non-temp tables
408   for thread
409 
410   SYNOPSIS
411     ha_default_handlerton(thd)
412     thd         current thread
413 
414   RETURN
415     pointer to handlerton
416 */
ha_default_handlerton(THD * thd)417 handlerton *ha_default_handlerton(THD *thd)
418 {
419   plugin_ref plugin= ha_default_plugin(thd);
420   assert(plugin);
421   handlerton *hton= plugin_data<handlerton*>(plugin);
422   assert(hton);
423   return hton;
424 }
425 
426 
ha_default_temp_plugin(THD * thd)427 static plugin_ref ha_default_temp_plugin(THD *thd)
428 {
429   if (thd->variables.temp_table_plugin)
430     return thd->variables.temp_table_plugin;
431   return my_plugin_lock(thd, &global_system_variables.temp_table_plugin);
432 }
433 
434 
435 /** @brief
436   Return the default storage engine handlerton used for explicitly
437   created temp tables for a thread
438 
439   SYNOPSIS
440     ha_default_temp_handlerton(thd)
441     thd         current thread
442 
443   RETURN
444     pointer to handlerton
445 */
ha_default_temp_handlerton(THD * thd)446 handlerton *ha_default_temp_handlerton(THD *thd)
447 {
448   plugin_ref plugin= ha_default_temp_plugin(thd);
449   assert(plugin);
450   handlerton *hton= plugin_data<handlerton*>(plugin);
451   assert(hton);
452   return hton;
453 }
454 
455 
456 /**
457   Resolve handlerton plugin by name, without checking for "DEFAULT" or
458   HTON_NOT_USER_SELECTABLE.
459 
460   @param thd  Thread context.
461   @param name Plugin name.
462 
463   @return plugin or NULL if not found.
464 */
ha_resolve_by_name_raw(THD * thd,const LEX_CSTRING & name)465 plugin_ref ha_resolve_by_name_raw(THD *thd, const LEX_CSTRING &name)
466 {
467   return plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN);
468 }
469 
470 /** @brief
471   Return the storage engine handlerton for the supplied name
472 
473   SYNOPSIS
474     ha_resolve_by_name(thd, name)
475     thd         current thread
476     name        name of storage engine
477 
478   RETURN
479     pointer to storage engine plugin handle
480 */
ha_resolve_by_name(THD * thd,const LEX_STRING * name,bool is_temp_table)481 plugin_ref ha_resolve_by_name(THD *thd, const LEX_STRING *name,
482                               bool is_temp_table)
483 {
484   const LEX_STRING *table_alias;
485   plugin_ref plugin;
486 
487 redo:
488   /* my_strnncoll is a macro and gcc doesn't do early expansion of macro */
489   if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1,
490                            (const uchar *)name->str, name->length,
491                            (const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
492     return is_temp_table ?
493       ha_default_plugin(thd) : ha_default_temp_plugin(thd);
494 
495   LEX_CSTRING cstring_name= {name->str, name->length};
496   if ((plugin= ha_resolve_by_name_raw(thd, cstring_name)))
497   {
498     handlerton *hton= plugin_data<handlerton*>(plugin);
499     if (hton && !(hton->flags & HTON_NOT_USER_SELECTABLE))
500       return plugin;
501 
502     /*
503       unlocking plugin immediately after locking is relatively low cost.
504     */
505     plugin_unlock(thd, plugin);
506   }
507 
508   /*
509     We check for the historical aliases.
510   */
511   for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
512   {
513     if (!my_strnncoll(&my_charset_latin1,
514                       (const uchar *)name->str, name->length,
515                       (const uchar *)table_alias->str, table_alias->length))
516     {
517       name= table_alias + 1;
518       goto redo;
519     }
520   }
521 
522   return NULL;
523 }
524 
525 std::string normalized_se_str= "";
526 
527 /*
528   Parse comma separated list of disabled storage engine names
529   and create a normalized string by appending storage names that
530   have aliases. This normalized string is used to disallow
531   table/tablespace creation under the storage engines specified.
532 */
ha_set_normalized_disabled_se_str(const std::string & disabled_se)533 void ha_set_normalized_disabled_se_str(const std::string &disabled_se)
534 {
535   boost::char_separator<char> sep(",");
536   boost::tokenizer< boost::char_separator<char> > tokens(disabled_se, sep);
537   normalized_se_str.append(",");
538   BOOST_FOREACH (std::string se_name, tokens)
539   {
540     const LEX_STRING *table_alias;
541     boost::algorithm::to_upper(se_name);
542     for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
543     {
544       if (!strcasecmp(se_name.c_str(), table_alias->str) ||
545           !strcasecmp(se_name.c_str(), (table_alias+1)->str))
546       {
547         normalized_se_str.append(std::string(table_alias->str) + "," +
548                                  std::string((table_alias+1)->str) + ",");
549         break;
550       }
551     }
552 
553     if (table_alias->str == NULL)
554       normalized_se_str.append(se_name+",");
555   }
556 }
557 
558 // Check if storage engine is disabled for table/tablespace creation.
ha_is_storage_engine_disabled(handlerton * se_handle)559 bool ha_is_storage_engine_disabled(handlerton *se_handle)
560 {
561   if (normalized_se_str.size())
562   {
563     std::string se_name(",");
564     se_name.append(ha_resolve_storage_engine_name(se_handle));
565     se_name.append(",");
566     boost::algorithm::to_upper(se_name);
567     if(strstr(normalized_se_str.c_str(), se_name.c_str()))
568       return true;
569   }
570   return false;
571 }
572 
573 
ha_lock_engine(THD * thd,const handlerton * hton)574 plugin_ref ha_lock_engine(THD *thd, const handlerton *hton)
575 {
576   if (hton)
577   {
578     st_plugin_int **plugin= hton2plugin + hton->slot;
579 
580 #ifdef NDEBUG
581     /*
582       Take a shortcut for builtin engines -- return pointer to plugin
583       without acquiring LOCK_plugin mutex. This is safe safe since such
584       plugins are not deleted until shutdown and we don't do reference
585       counting in non-debug builds for them.
586 
587       Since we have reference to handlerton on our hands, this method
588       can't be called concurrently to non-builtin handlerton initialization/
589       deinitialization. So it is safe to access builtin_htons[] without
590       additional locking.
591      */
592     if (builtin_htons[hton->slot])
593       return *plugin;
594 
595     return my_plugin_lock(thd, plugin);
596 #else
597     /*
598       We can't take shortcut in debug builds.
599       At least assert that builtin_htons[slot] is set correctly.
600     */
601     assert(builtin_htons[hton->slot] == (plugin[0]->plugin_dl == NULL));
602     return my_plugin_lock(thd, &plugin);
603 #endif
604   }
605   return NULL;
606 }
607 
608 
ha_resolve_by_legacy_type(THD * thd,enum legacy_db_type db_type)609 handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
610 {
611   plugin_ref plugin;
612   switch (db_type) {
613   case DB_TYPE_DEFAULT:
614     return ha_default_handlerton(thd);
615   default:
616     if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
617         (plugin= ha_lock_engine(thd, installed_htons[db_type])))
618       return plugin_data<handlerton*>(plugin);
619     /* fall through */
620   case DB_TYPE_UNKNOWN:
621     return NULL;
622   }
623 }
624 
625 
626 /**
627   Use other database handler if databasehandler is not compiled in.
628 */
ha_checktype(THD * thd,enum legacy_db_type database_type,bool no_substitute,bool report_error)629 handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type,
630                           bool no_substitute, bool report_error)
631 {
632   handlerton *hton= ha_resolve_by_legacy_type(thd, database_type);
633   if (ha_storage_engine_is_enabled(hton))
634     return hton;
635 
636   if (no_substitute)
637   {
638     if (report_error)
639     {
640       const char *engine_name= ha_resolve_storage_engine_name(hton);
641       my_error(ER_FEATURE_DISABLED,MYF(0),engine_name,engine_name);
642     }
643     return NULL;
644   }
645 
646   (void) RUN_HOOK(transaction, after_rollback, (thd, FALSE));
647 
648   switch (database_type) {
649   case DB_TYPE_MRG_ISAM:
650     return ha_resolve_by_legacy_type(thd, DB_TYPE_MRG_MYISAM);
651   default:
652     break;
653   }
654 
655   return ha_default_handlerton(thd);
656 } /* ha_checktype */
657 
658 
get_new_handler(TABLE_SHARE * share,MEM_ROOT * alloc,handlerton * db_type)659 handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
660                          handlerton *db_type)
661 {
662   handler *file;
663   DBUG_ENTER("get_new_handler");
664   DBUG_PRINT("enter", ("alloc: 0x%lx", (long) alloc));
665 
666   if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create)
667   {
668     if ((file= db_type->create(db_type, share, alloc)))
669       file->init();
670     DBUG_RETURN(file);
671   }
672   /*
673     Try the default table type
674     Here the call to current_thd() is ok as we call this function a lot of
675     times but we enter this branch very seldom.
676   */
677   DBUG_RETURN(get_new_handler(share, alloc, ha_default_handlerton(current_thd)));
678 }
679 
680 
681 static const char **handler_errmsgs;
682 
683 C_MODE_START
get_handler_errmsg(int nr)684 static const char *get_handler_errmsg(int nr)
685 {
686   return handler_errmsgs[nr - HA_ERR_FIRST];
687 }
688 C_MODE_END
689 
690 
691 /**
692   Register handler error messages for use with my_error().
693 
694   @retval
695     0           OK
696   @retval
697     !=0         Error
698 */
699 
ha_init_errors(void)700 int ha_init_errors(void)
701 {
702 #define SETMSG(nr, msg) handler_errmsgs[(nr) - HA_ERR_FIRST]= (msg)
703 
704   /* Allocate a pointer array for the error message strings. */
705   /* Zerofill it to avoid uninitialized gaps. */
706   if (! (handler_errmsgs= (const char**) my_malloc(key_memory_handler_errmsgs,
707                                                    HA_ERR_ERRORS * sizeof(char*),
708                                                    MYF(MY_WME | MY_ZEROFILL))))
709     return 1;
710 
711   /* Set the dedicated error messages. */
712   SETMSG(HA_ERR_KEY_NOT_FOUND,          ER_DEFAULT(ER_KEY_NOT_FOUND));
713   SETMSG(HA_ERR_FOUND_DUPP_KEY,         ER_DEFAULT(ER_DUP_KEY));
714   SETMSG(HA_ERR_RECORD_CHANGED,         "Update wich is recoverable");
715   SETMSG(HA_ERR_WRONG_INDEX,            "Wrong index given to function");
716   SETMSG(HA_ERR_CRASHED,                ER_DEFAULT(ER_NOT_KEYFILE));
717   SETMSG(HA_ERR_WRONG_IN_RECORD,        ER_DEFAULT(ER_CRASHED_ON_USAGE));
718   SETMSG(HA_ERR_OUT_OF_MEM,             "Table handler out of memory");
719   SETMSG(HA_ERR_NOT_A_TABLE,            "Incorrect file format '%.64s'");
720   SETMSG(HA_ERR_WRONG_COMMAND,          "Command not supported");
721   SETMSG(HA_ERR_OLD_FILE,               ER_DEFAULT(ER_OLD_KEYFILE));
722   SETMSG(HA_ERR_NO_ACTIVE_RECORD,       "No record read in update");
723   SETMSG(HA_ERR_RECORD_DELETED,         "Intern record deleted");
724   SETMSG(HA_ERR_RECORD_FILE_FULL,       ER_DEFAULT(ER_RECORD_FILE_FULL));
725   SETMSG(HA_ERR_INDEX_FILE_FULL,        "No more room in index file '%.64s'");
726   SETMSG(HA_ERR_END_OF_FILE,            "End in next/prev/first/last");
727   SETMSG(HA_ERR_UNSUPPORTED,            ER_DEFAULT(ER_ILLEGAL_HA));
728   SETMSG(HA_ERR_TOO_BIG_ROW,            "Too big row");
729   SETMSG(HA_WRONG_CREATE_OPTION,        "Wrong create option");
730   SETMSG(HA_ERR_FOUND_DUPP_UNIQUE,      ER_DEFAULT(ER_DUP_UNIQUE));
731   SETMSG(HA_ERR_UNKNOWN_CHARSET,        "Can't open charset");
732   SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF,    ER_DEFAULT(ER_WRONG_MRG_TABLE));
733   SETMSG(HA_ERR_CRASHED_ON_REPAIR,      ER_DEFAULT(ER_CRASHED_ON_REPAIR));
734   SETMSG(HA_ERR_CRASHED_ON_USAGE,       ER_DEFAULT(ER_CRASHED_ON_USAGE));
735   SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT,      ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT));
736   SETMSG(HA_ERR_LOCK_TABLE_FULL,        ER_DEFAULT(ER_LOCK_TABLE_FULL));
737   SETMSG(HA_ERR_READ_ONLY_TRANSACTION,  ER_DEFAULT(ER_READ_ONLY_TRANSACTION));
738   SETMSG(HA_ERR_LOCK_DEADLOCK,          ER_DEFAULT(ER_LOCK_DEADLOCK));
739   SETMSG(HA_ERR_CANNOT_ADD_FOREIGN,     ER_DEFAULT(ER_CANNOT_ADD_FOREIGN));
740   SETMSG(HA_ERR_NO_REFERENCED_ROW,      ER_DEFAULT(ER_NO_REFERENCED_ROW_2));
741   SETMSG(HA_ERR_ROW_IS_REFERENCED,      ER_DEFAULT(ER_ROW_IS_REFERENCED_2));
742   SETMSG(HA_ERR_NO_SAVEPOINT,           "No savepoint with that name");
743   SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE,  "Non unique key block size");
744   SETMSG(HA_ERR_NO_SUCH_TABLE,          "No such table: '%.64s'");
745   SETMSG(HA_ERR_TABLE_EXIST,            ER_DEFAULT(ER_TABLE_EXISTS_ERROR));
746   SETMSG(HA_ERR_NO_CONNECTION,          "Could not connect to storage engine");
747   SETMSG(HA_ERR_TABLE_DEF_CHANGED,      ER_DEFAULT(ER_TABLE_DEF_CHANGED));
748   SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY,  "FK constraint would lead to duplicate key");
749   SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE,    ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE));
750   SETMSG(HA_ERR_TABLE_READONLY,         ER_DEFAULT(ER_OPEN_AS_READONLY));
751   SETMSG(HA_ERR_AUTOINC_READ_FAILED,    ER_DEFAULT(ER_AUTOINC_READ_FAILED));
752   SETMSG(HA_ERR_AUTOINC_ERANGE,         ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
753   SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
754   SETMSG(HA_ERR_INDEX_COL_TOO_LONG,     ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
755   SETMSG(HA_ERR_INDEX_CORRUPT,          ER_DEFAULT(ER_INDEX_CORRUPT));
756   SETMSG(HA_FTS_INVALID_DOCID,          "Invalid InnoDB FTS Doc ID");
757   SETMSG(HA_ERR_TABLE_IN_FK_CHECK,	ER_DEFAULT(ER_TABLE_IN_FK_CHECK));
758   SETMSG(HA_ERR_TABLESPACE_EXISTS,      "Tablespace already exists");
759   SETMSG(HA_ERR_TABLESPACE_MISSING,     ER_DEFAULT(ER_TABLESPACE_MISSING));
760   SETMSG(HA_ERR_FTS_EXCEED_RESULT_CACHE_LIMIT,  "FTS query exceeds result cache limit");
761   SETMSG(HA_ERR_TEMP_FILE_WRITE_FAILURE,	ER_DEFAULT(ER_TEMP_FILE_WRITE_FAILURE));
762   SETMSG(HA_ERR_INNODB_FORCED_RECOVERY,	ER_DEFAULT(ER_INNODB_FORCED_RECOVERY));
763   SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE,  "Too many words in a FTS phrase or proximity search");
764   SETMSG(HA_ERR_TABLE_CORRUPT,		ER_DEFAULT(ER_TABLE_CORRUPT));
765   SETMSG(HA_ERR_TABLESPACE_MISSING,	ER_DEFAULT(ER_TABLESPACE_MISSING));
766   SETMSG(HA_ERR_TABLESPACE_IS_NOT_EMPTY,	ER_DEFAULT(ER_TABLESPACE_IS_NOT_EMPTY));
767   SETMSG(HA_ERR_WRONG_FILE_NAME,		ER_DEFAULT(ER_WRONG_FILE_NAME));
768   SETMSG(HA_ERR_NOT_ALLOWED_COMMAND,		ER_DEFAULT(ER_NOT_ALLOWED_COMMAND));
769   SETMSG(HA_ERR_COMPUTE_FAILED,		"Compute virtual column value failed");
770   SETMSG(HA_ERR_FTS_TOO_MANY_NESTED_EXP,  "Too many nested sub-expressions in a full-text search");
771   /* Register the error messages for use with my_error(). */
772   return my_error_register(get_handler_errmsg, HA_ERR_FIRST, HA_ERR_LAST);
773 }
774 
775 
ha_finalize_handlerton(st_plugin_int * plugin)776 int ha_finalize_handlerton(st_plugin_int *plugin)
777 {
778   handlerton *hton= (handlerton *)plugin->data;
779   DBUG_ENTER("ha_finalize_handlerton");
780 
781   /* hton can be NULL here, if ha_initialize_handlerton() failed. */
782   if (!hton)
783     goto end;
784 
785   switch (hton->state)
786   {
787   case SHOW_OPTION_NO:
788   case SHOW_OPTION_DISABLED:
789     break;
790   case SHOW_OPTION_YES:
791     if (installed_htons[hton->db_type] == hton)
792       installed_htons[hton->db_type]= NULL;
793     break;
794   };
795 
796   if (hton->panic)
797     hton->panic(hton, HA_PANIC_CLOSE);
798 
799   if (plugin->plugin->deinit)
800   {
801     /*
802       Today we have no defined/special behavior for uninstalling
803       engine plugins.
804     */
805     DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str));
806     if (plugin->plugin->deinit(NULL))
807     {
808       DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.",
809                              plugin->name.str));
810     }
811   }
812 
813   /*
814     In case a plugin is uninstalled and re-installed later, it should
815     reuse an array slot. Otherwise the number of uninstall/install
816     cycles would be limited.
817   */
818   if (hton->slot != HA_SLOT_UNDEF)
819   {
820     /* Make sure we are not unpluging another plugin */
821     assert(hton2plugin[hton->slot] == plugin);
822     assert(hton->slot < MAX_HA);
823     hton2plugin[hton->slot]= NULL;
824     builtin_htons[hton->slot]= false; /* Extra correctness. */
825   }
826 
827   my_free(hton);
828 
829  end:
830   DBUG_RETURN(0);
831 }
832 
833 
ha_initialize_handlerton(st_plugin_int * plugin)834 int ha_initialize_handlerton(st_plugin_int *plugin)
835 {
836   handlerton *hton;
837   DBUG_ENTER("ha_initialize_handlerton");
838   DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str));
839 
840   hton= (handlerton *)my_malloc(key_memory_handlerton,
841                                 sizeof(handlerton),
842                                 MYF(MY_WME | MY_ZEROFILL));
843 
844   if (hton == NULL)
845   {
846     sql_print_error("Unable to allocate memory for plugin '%s' handlerton.",
847                     plugin->name.str);
848     goto err_no_hton_memory;
849   }
850 
851   hton->slot= HA_SLOT_UNDEF;
852   /* Historical Requirement */
853   plugin->data= hton; // shortcut for the future
854   if (plugin->plugin->init && plugin->plugin->init(hton))
855   {
856     sql_print_error("Plugin '%s' init function returned error.",
857                     plugin->name.str);
858     goto err;
859   }
860 
861   /*
862     the switch below and hton->state should be removed when
863     command-line options for plugins will be implemented
864   */
865   DBUG_PRINT("info", ("hton->state=%d", hton->state));
866   switch (hton->state) {
867   case SHOW_OPTION_NO:
868     break;
869   case SHOW_OPTION_YES:
870     {
871       uint tmp;
872       ulong fslot;
873       /* now check the db_type for conflict */
874       if (hton->db_type <= DB_TYPE_UNKNOWN ||
875           hton->db_type >= DB_TYPE_DEFAULT ||
876           installed_htons[hton->db_type])
877       {
878         int idx= (int) DB_TYPE_FIRST_DYNAMIC;
879 
880         while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
881           idx++;
882 
883         if (idx == (int) DB_TYPE_DEFAULT)
884         {
885           sql_print_warning("Too many storage engines!");
886           goto err_deinit;
887         }
888         if (hton->db_type != DB_TYPE_UNKNOWN)
889           sql_print_warning("Storage engine '%s' has conflicting typecode. "
890                             "Assigning value %d.", plugin->plugin->name, idx);
891         hton->db_type= (enum legacy_db_type) idx;
892       }
893 
894       /*
895         In case a plugin is uninstalled and re-installed later, it should
896         reuse an array slot. Otherwise the number of uninstall/install
897         cycles would be limited. So look for a free slot.
898       */
899       DBUG_PRINT("plugin", ("total_ha: %lu", total_ha));
900       for (fslot= 0; fslot < total_ha; fslot++)
901       {
902         if (!hton2plugin[fslot])
903           break;
904       }
905       if (fslot < total_ha)
906         hton->slot= fslot;
907       else
908       {
909         if (total_ha >= MAX_HA)
910         {
911           sql_print_error("Too many plugins loaded. Limit is %lu. "
912                           "Failed on '%s'", (ulong) MAX_HA, plugin->name.str);
913           goto err_deinit;
914         }
915         hton->slot= total_ha++;
916       }
917       installed_htons[hton->db_type]= hton;
918       tmp= hton->savepoint_offset;
919       hton->savepoint_offset= savepoint_alloc_size;
920       savepoint_alloc_size+= tmp;
921       hton2plugin[hton->slot]=plugin;
922       builtin_htons[hton->slot]= (plugin->plugin_dl == NULL);
923       if (hton->prepare)
924         total_ha_2pc++;
925       break;
926     }
927     /* fall through */
928   default:
929     hton->state= SHOW_OPTION_DISABLED;
930     break;
931   }
932 
933   /*
934     This is entirely for legacy. We will create a new "disk based" hton and a
935     "memory" hton which will be configurable longterm. We should be able to
936     remove partition and myisammrg.
937   */
938   switch (hton->db_type) {
939   case DB_TYPE_HEAP:
940     heap_hton= hton;
941     break;
942   case DB_TYPE_MYISAM:
943     myisam_hton= hton;
944     break;
945   case DB_TYPE_INNODB:
946     innodb_hton= hton;
947     break;
948   default:
949     break;
950   };
951 
952   /*
953     Re-load the optimizer cost constants since this storage engine can
954     have non-default cost constants.
955   */
956   reload_optimizer_cost_constants();
957 
958   DBUG_RETURN(0);
959 
960 err_deinit:
961   /*
962     Let plugin do its inner deinitialization as plugin->init()
963     was successfully called before.
964   */
965   if (plugin->plugin->deinit)
966     (void) plugin->plugin->deinit(NULL);
967 
968 err:
969   my_free(hton);
970 err_no_hton_memory:
971   plugin->data= NULL;
972   DBUG_RETURN(1);
973 }
974 
ha_init()975 int ha_init()
976 {
977   int error= 0;
978   DBUG_ENTER("ha_init");
979 
980   assert(total_ha < MAX_HA);
981   /*
982     Check if there is a transaction-capable storage engine besides the
983     binary log (which is considered a transaction-capable storage engine in
984     counting total_ha)
985   */
986   opt_using_transactions= total_ha>(ulong)opt_bin_log;
987   savepoint_alloc_size+= sizeof(SAVEPOINT);
988 
989   /*
990     Initialize system database name cache.
991     This cache is used to do a quick check if a given
992     db.tablename is a system table.
993   */
994   known_system_databases= ha_known_system_databases();
995 
996   DBUG_RETURN(error);
997 }
998 
ha_end()999 void ha_end()
1000 {
1001   // Unregister handler error messages.
1002   my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST);
1003   my_free(handler_errmsgs);
1004 }
1005 
dropdb_handlerton(THD * unused1,plugin_ref plugin,void * path)1006 static my_bool dropdb_handlerton(THD *unused1, plugin_ref plugin,
1007                                  void *path)
1008 {
1009   handlerton *hton= plugin_data<handlerton*>(plugin);
1010   if (hton->state == SHOW_OPTION_YES && hton->drop_database)
1011     hton->drop_database(hton, (char *)path);
1012   return FALSE;
1013 }
1014 
1015 
ha_drop_database(char * path)1016 void ha_drop_database(char* path)
1017 {
1018   plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
1019 }
1020 
1021 
closecon_handlerton(THD * thd,plugin_ref plugin,void * unused)1022 static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
1023                                    void *unused)
1024 {
1025   handlerton *hton= plugin_data<handlerton*>(plugin);
1026   /*
1027     there's no need to rollback here as all transactions must
1028     be rolled back already
1029   */
1030   if (hton->state == SHOW_OPTION_YES && thd_get_ha_data(thd, hton))
1031   {
1032     if (hton->close_connection)
1033       hton->close_connection(hton, thd);
1034     /* make sure ha_data is reset and ha_data_lock is released */
1035     thd_set_ha_data(thd, hton, NULL);
1036   }
1037   return FALSE;
1038 }
1039 
1040 
1041 /**
1042   @note
1043     don't bother to rollback here, it's done already
1044 */
ha_close_connection(THD * thd)1045 void ha_close_connection(THD* thd)
1046 {
1047   plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
1048 }
1049 
1050 
kill_handlerton(THD * thd,plugin_ref plugin,void *)1051 static my_bool kill_handlerton(THD *thd, plugin_ref plugin, void *)
1052 {
1053   handlerton *hton= plugin_data<handlerton*>(plugin);
1054 
1055   if (hton->state == SHOW_OPTION_YES && hton->kill_connection)
1056   {
1057     if (thd_get_ha_data(thd, hton))
1058       hton->kill_connection(hton, thd);
1059   }
1060 
1061   return FALSE;
1062 }
1063 
ha_kill_connection(THD * thd)1064 void ha_kill_connection(THD *thd)
1065 {
1066   plugin_foreach(thd, kill_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
1067 }
1068 
1069 
1070 /* ========================================================================
1071  ======================= TRANSACTIONS ===================================*/
1072 
1073 /**
1074   Transaction handling in the server
1075   ==================================
1076 
1077   In each client connection, MySQL maintains two transactional
1078   states:
1079   - a statement transaction,
1080   - a standard, also called normal transaction.
1081 
1082   Historical note
1083   ---------------
1084   "Statement transaction" is a non-standard term that comes
1085   from the times when MySQL supported BerkeleyDB storage engine.
1086 
1087   First of all, it should be said that in BerkeleyDB auto-commit
1088   mode auto-commits operations that are atomic to the storage
1089   engine itself, such as a write of a record, and are too
1090   high-granular to be atomic from the application perspective
1091   (MySQL). One SQL statement could involve many BerkeleyDB
1092   auto-committed operations and thus BerkeleyDB auto-commit was of
1093   little use to MySQL.
1094 
1095   Secondly, instead of SQL standard savepoints, BerkeleyDB
1096   provided the concept of "nested transactions". In a nutshell,
1097   transactions could be arbitrarily nested, but when the parent
1098   transaction was committed or aborted, all its child (nested)
1099   transactions were handled committed or aborted as well.
1100   Commit of a nested transaction, in turn, made its changes
1101   visible, but not durable: it destroyed the nested transaction,
1102   all its changes would become available to the parent and
1103   currently active nested transactions of this parent.
1104 
1105   So the mechanism of nested transactions was employed to
1106   provide "all or nothing" guarantee of SQL statements
1107   required by the standard.
1108   A nested transaction would be created at start of each SQL
1109   statement, and destroyed (committed or aborted) at statement
1110   end. Such nested transaction was internally referred to as
1111   a "statement transaction" and gave birth to the term.
1112 
1113   (Historical note ends)
1114 
1115   Since then a statement transaction is started for each statement
1116   that accesses transactional tables or uses the binary log.  If
1117   the statement succeeds, the statement transaction is committed.
1118   If the statement fails, the transaction is rolled back. Commits
1119   of statement transactions are not durable -- each such
1120   transaction is nested in the normal transaction, and if the
1121   normal transaction is rolled back, the effects of all enclosed
1122   statement transactions are undone as well.  Technically,
1123   a statement transaction can be viewed as a savepoint which is
1124   maintained automatically in order to make effects of one
1125   statement atomic.
1126 
1127   The normal transaction is started by the user and is ended
1128   usually upon a user request as well. The normal transaction
1129   encloses transactions of all statements issued between
1130   its beginning and its end.
1131   In autocommit mode, the normal transaction is equivalent
1132   to the statement transaction.
1133 
1134   Since MySQL supports PSEA (pluggable storage engine
1135   architecture), more than one transactional engine can be
1136   active at a time. Hence transactions, from the server
1137   point of view, are always distributed. In particular,
1138   transactional state is maintained independently for each
1139   engine. In order to commit a transaction the two phase
1140   commit protocol is employed.
1141 
1142   Not all statements are executed in context of a transaction.
1143   Administrative and status information statements do not modify
1144   engine data, and thus do not start a statement transaction and
1145   also have no effect on the normal transaction. Examples of such
1146   statements are SHOW STATUS and RESET SLAVE.
1147 
1148   Similarly DDL statements are not transactional,
1149   and therefore a transaction is [almost] never started for a DDL
1150   statement. The difference between a DDL statement and a purely
1151   administrative statement though is that a DDL statement always
1152   commits the current transaction before proceeding, if there is
1153   any.
1154 
1155   At last, SQL statements that work with non-transactional
1156   engines also have no effect on the transaction state of the
1157   connection. Even though they are written to the binary log,
1158   and the binary log is, overall, transactional, the writes
1159   are done in "write-through" mode, directly to the binlog
1160   file, followed with a OS cache sync, in other words,
1161   bypassing the binlog undo log (translog).
1162   They do not commit the current normal transaction.
1163   A failure of a statement that uses non-transactional tables
1164   would cause a rollback of the statement transaction, but
1165   in case there no non-transactional tables are used,
1166   no statement transaction is started.
1167 
1168   Data layout
1169   -----------
1170 
1171   The server stores its transaction-related data in
1172   thd->transaction. This structure has two members of type
1173   THD_TRANS. These members correspond to the statement and
1174   normal transactions respectively:
1175 
1176   - thd->transaction.stmt contains a list of engines
1177   that are participating in the given statement
1178   - thd->transaction.all contains a list of engines that
1179   have participated in any of the statement transactions started
1180   within the context of the normal transaction.
1181   Each element of the list contains a pointer to the storage
1182   engine, engine-specific transactional data, and engine-specific
1183   transaction flags.
1184 
1185   In autocommit mode thd->transaction.all is empty.
1186   Instead, data of thd->transaction.stmt is
1187   used to commit/rollback the normal transaction.
1188 
1189   The list of registered engines has a few important properties:
1190   - no engine is registered in the list twice
1191   - engines are present in the list a reverse temporal order --
1192   new participants are always added to the beginning of the list.
1193 
1194   Transaction life cycle
1195   ----------------------
1196 
1197   When a new connection is established, thd->transaction
1198   members are initialized to an empty state.
1199   If a statement uses any tables, all affected engines
1200   are registered in the statement engine list. In
1201   non-autocommit mode, the same engines are registered in
1202   the normal transaction list.
1203   At the end of the statement, the server issues a commit
1204   or a roll back for all engines in the statement list.
1205   At this point transaction flags of an engine, if any, are
1206   propagated from the statement list to the list of the normal
1207   transaction.
1208   When commit/rollback is finished, the statement list is
1209   cleared. It will be filled in again by the next statement,
1210   and emptied again at the next statement's end.
1211 
1212   The normal transaction is committed in a similar way
1213   (by going over all engines in thd->transaction.all list)
1214   but at different times:
1215   - upon COMMIT SQL statement is issued by the user
1216   - implicitly, by the server, at the beginning of a DDL statement
1217   or SET AUTOCOMMIT={0|1} statement.
1218 
1219   The normal transaction can be rolled back as well:
1220   - if the user has requested so, by issuing ROLLBACK SQL
1221   statement
1222   - if one of the storage engines requested a rollback
1223   by setting thd->transaction_rollback_request. This may
1224   happen in case, e.g., when the transaction in the engine was
1225   chosen a victim of the internal deadlock resolution algorithm
1226   and rolled back internally. When such a situation happens, there
1227   is little the server can do and the only option is to rollback
1228   transactions in all other participating engines.  In this case
1229   the rollback is accompanied by an error sent to the user.
1230 
1231   As follows from the use cases above, the normal transaction
1232   is never committed when there is an outstanding statement
1233   transaction. In most cases there is no conflict, since
1234   commits of the normal transaction are issued by a stand-alone
1235   administrative or DDL statement, thus no outstanding statement
1236   transaction of the previous statement exists. Besides,
1237   all statements that manipulate with the normal transaction
1238   are prohibited in stored functions and triggers, therefore
1239   no conflicting situation can occur in a sub-statement either.
1240   The remaining rare cases when the server explicitly has
1241   to commit the statement transaction prior to committing the normal
1242   one cover error-handling scenarios (see for example
1243   SQLCOM_LOCK_TABLES).
1244 
1245   When committing a statement or a normal transaction, the server
1246   either uses the two-phase commit protocol, or issues a commit
1247   in each engine independently. The two-phase commit protocol
1248   is used only if:
1249   - all participating engines support two-phase commit (provide
1250     handlerton::prepare PSEA API call) and
1251   - transactions in at least two engines modify data (i.e. are
1252   not read-only).
1253 
1254   Note that the two phase commit is used for
1255   statement transactions, even though they are not durable anyway.
1256   This is done to ensure logical consistency of data in a multiple-
1257   engine transaction.
1258   For example, imagine that some day MySQL supports unique
1259   constraint checks deferred till the end of statement. In such
1260   case a commit in one of the engines may yield ER_DUP_KEY,
1261   and MySQL should be able to gracefully abort statement
1262   transactions of other participants.
1263 
1264   After the normal transaction has been committed,
1265   thd->transaction.all list is cleared.
1266 
1267   When a connection is closed, the current normal transaction, if
1268   any, is rolled back.
1269 
1270   Roles and responsibilities
1271   --------------------------
1272 
1273   The server has no way to know that an engine participates in
1274   the statement and a transaction has been started
1275   in it unless the engine says so. Thus, in order to be
1276   a part of a transaction, the engine must "register" itself.
1277   This is done by invoking trans_register_ha() server call.
1278   Normally the engine registers itself whenever handler::external_lock()
1279   is called. trans_register_ha() can be invoked many times: if
1280   an engine is already registered, the call does nothing.
1281   In case autocommit is not set, the engine must register itself
1282   twice -- both in the statement list and in the normal transaction
1283   list.
1284   In which list to register is a parameter of trans_register_ha().
1285 
1286   Note, that although the registration interface in itself is
1287   fairly clear, the current usage practice often leads to undesired
1288   effects. E.g. since a call to trans_register_ha() in most engines
1289   is embedded into implementation of handler::external_lock(), some
1290   DDL statements start a transaction (at least from the server
1291   point of view) even though they are not expected to. E.g.
1292   CREATE TABLE does not start a transaction, since
1293   handler::external_lock() is never called during CREATE TABLE. But
1294   CREATE TABLE ... SELECT does, since handler::external_lock() is
1295   called for the table that is being selected from. This has no
1296   practical effects currently, but must be kept in mind
1297   nevertheless.
1298 
1299   Once an engine is registered, the server will do the rest
1300   of the work.
1301 
1302   During statement execution, whenever any of data-modifying
1303   PSEA API methods is used, e.g. handler::write_row() or
1304   handler::update_row(), the read-write flag is raised in the
1305   statement transaction for the involved engine.
1306   Currently All PSEA calls are "traced", and the data can not be
1307   changed in a way other than issuing a PSEA call. Important:
1308   unless this invariant is preserved the server will not know that
1309   a transaction in a given engine is read-write and will not
1310   involve the two-phase commit protocol!
1311 
1312   At the end of a statement, server call trans_commit_stmt is
1313   invoked. This call in turn invokes handlerton::prepare()
1314   for every involved engine. Prepare is followed by a call
1315   to handlerton::commit_one_phase() If a one-phase commit
1316   will suffice, handlerton::prepare() is not invoked and
1317   the server only calls handlerton::commit_one_phase().
1318   At statement commit, the statement-related read-write
1319   engine flag is propagated to the corresponding flag in the
1320   normal transaction.  When the commit is complete, the list
1321   of registered engines is cleared.
1322 
1323   Rollback is handled in a similar fashion.
1324 
1325   Additional notes on DDL and the normal transaction.
1326   ---------------------------------------------------
1327 
1328   DDLs and operations with non-transactional engines
1329   do not "register" in thd->transaction lists, and thus do not
1330   modify the transaction state. Besides, each DDL in
1331   MySQL is prefixed with an implicit normal transaction commit
1332   (a call to trans_commit_implicit()), and thus leaves nothing
1333   to modify.
1334   However, as it has been pointed out with CREATE TABLE .. SELECT,
1335   some DDL statements can start a *new* transaction.
1336 
1337   Behaviour of the server in this case is currently badly
1338   defined.
1339   DDL statements use a form of "semantic" logging
1340   to maintain atomicity: if CREATE TABLE .. SELECT failed,
1341   the newly created table is deleted.
1342   In addition, some DDL statements issue interim transaction
1343   commits: e.g. ALTER TABLE issues a commit after data is copied
1344   from the original table to the internal temporary table. Other
1345   statements, e.g. CREATE TABLE ... SELECT do not always commit
1346   after itself.
1347   And finally there is a group of DDL statements such as
1348   RENAME/DROP TABLE that doesn't start a new transaction
1349   and doesn't commit.
1350 
1351   This diversity makes it hard to say what will happen if
1352   by chance a stored function is invoked during a DDL --
1353   whether any modifications it makes will be committed or not
1354   is not clear. Fortunately, SQL grammar of few DDLs allows
1355   invocation of a stored function.
1356 
1357   A consistent behaviour is perhaps to always commit the normal
1358   transaction after all DDLs, just like the statement transaction
1359   is always committed at the end of all statements.
1360 */
1361 
1362 /**
1363   Register a storage engine for a transaction.
1364 
1365   Every storage engine MUST call this function when it starts
1366   a transaction or a statement (that is it must be called both for the
1367   "beginning of transaction" and "beginning of statement").
1368   Only storage engines registered for the transaction/statement
1369   will know when to commit/rollback it.
1370 
1371   @note
1372     trans_register_ha is idempotent - storage engine may register many
1373     times per transaction.
1374 
1375 */
trans_register_ha(THD * thd,bool all,handlerton * ht_arg,const ulonglong * trxid)1376 void trans_register_ha(THD *thd, bool all, handlerton *ht_arg,
1377                        const ulonglong *trxid)
1378 {
1379   Ha_trx_info *ha_info;
1380   Transaction_ctx *trn_ctx= thd->get_transaction();
1381   Transaction_ctx::enum_trx_scope trx_scope=
1382     all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1383 
1384   DBUG_ENTER("trans_register_ha");
1385   DBUG_PRINT("enter",("%s", all ? "all" : "stmt"));
1386 
1387   Ha_trx_info *knownn_trans= trn_ctx->ha_trx_info(trx_scope);
1388   if (all)
1389   {
1390     /*
1391       Ensure no active backup engine data exists, unless the current transaction
1392       is from replication and in active xa state.
1393     */
1394     assert(thd->ha_data[ht_arg->slot].ha_ptr_backup == NULL ||
1395            (thd->get_transaction()->xid_state()->
1396             has_state(XID_STATE::XA_ACTIVE)));
1397     assert(thd->ha_data[ht_arg->slot].ha_ptr_backup == NULL ||
1398            (thd->is_binlog_applier() || thd->slave_thread));
1399 
1400     thd->server_status|= SERVER_STATUS_IN_TRANS;
1401     if (thd->tx_read_only)
1402       thd->server_status|= SERVER_STATUS_IN_TRANS_READONLY;
1403     DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS"));
1404   }
1405 
1406   ha_info= thd->ha_data[ht_arg->slot].ha_info + (all ? 1 : 0);
1407 
1408   if (ha_info->is_started())
1409     DBUG_VOID_RETURN; /* already registered, return */
1410 
1411   ha_info->register_ha(knownn_trans, ht_arg);
1412   trn_ctx->set_ha_trx_info(trx_scope, ha_info);
1413 
1414   if (ht_arg->prepare == 0)
1415     trn_ctx->set_no_2pc(trx_scope, true);
1416 
1417   trn_ctx->xid_state()->set_query_id(thd->query_id);
1418 /*
1419   Register transaction start in performance schema if not done already.
1420   By doing this, we handle cases when the transaction is started implicitly in
1421   autocommit=0 mode, and cases when we are in normal autocommit=1 mode and the
1422   executed statement is a single-statement transaction.
1423 
1424   Explicitly started transactions are handled in trans_begin().
1425 
1426   Do not register transactions in which binary log is the only participating
1427   transactional storage engine.
1428 */
1429 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
1430   if (thd->m_transaction_psi == NULL &&
1431       ht_arg->db_type != DB_TYPE_BINLOG)
1432   {
1433     const XID *xid= trn_ctx->xid_state()->get_xid();
1434     my_bool autocommit= !thd->in_multi_stmt_transaction_mode();
1435     thd->m_transaction_psi= MYSQL_START_TRANSACTION(&thd->m_transaction_state,
1436                                          xid, trxid, thd->tx_isolation,
1437                                          thd->tx_read_only, autocommit);
1438     DEBUG_SYNC(thd, "after_set_transaction_psi_before_set_transaction_gtid");
1439     gtid_set_performance_schema_values(thd);
1440   }
1441 #endif
1442   DBUG_VOID_RETURN;
1443 }
1444 
1445 /**
1446   @retval
1447     0   ok
1448   @retval
1449     1   error, transaction was rolled back
1450 */
ha_prepare(THD * thd)1451 int ha_prepare(THD *thd)
1452 {
1453   int error=0;
1454   Transaction_ctx *trn_ctx= thd->get_transaction();
1455   DBUG_ENTER("ha_prepare");
1456 
1457   if (trn_ctx->is_active(Transaction_ctx::SESSION))
1458   {
1459     const Ha_trx_info *ha_info= trn_ctx->ha_trx_info(
1460       Transaction_ctx::SESSION);
1461     bool gtid_error= false, need_clear_owned_gtid= false;
1462 
1463     if ((gtid_error=
1464          MY_TEST(commit_owned_gtids(thd, true, &need_clear_owned_gtid))))
1465     {
1466       assert(need_clear_owned_gtid);
1467 
1468       ha_rollback_trans(thd, true);
1469       error= 1;
1470       goto err;
1471     }
1472 
1473     while (ha_info)
1474     {
1475       handlerton *ht= ha_info->ht();
1476       assert(!thd->status_var_aggregated);
1477       thd->status_var.ha_prepare_count++;
1478       if (ht->prepare)
1479       {
1480         DBUG_EXECUTE_IF("simulate_xa_failure_prepare", {
1481           ha_rollback_trans(thd, true);
1482           DBUG_RETURN(1);
1483         });
1484         if (ht->prepare(ht, thd, true))
1485         {
1486 #ifdef WITH_WSREP
1487           if (WSREP(thd) && ht->db_type== DB_TYPE_WSREP)
1488           {
1489 	    error= 1;
1490 	    /* avoid sending error, if we need to replay */
1491             if (thd->wsrep_conflict_state!= MUST_REPLAY)
1492             {
1493               my_error(ER_LOCK_DEADLOCK, MYF(0));
1494             }
1495           }
1496           else
1497           {
1498             /* not wsrep hton, bail to native mysql behavior */
1499 #endif
1500           ha_rollback_trans(thd, true);
1501           error=1;
1502           break;
1503 #ifdef WITH_WSREP
1504           }
1505 #endif
1506         }
1507       }
1508       else
1509       {
1510         push_warning_printf(thd, Sql_condition::SL_WARNING,
1511                             ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
1512                             ha_resolve_storage_engine_name(ht));
1513       }
1514       ha_info= ha_info->next();
1515     }
1516 
1517     assert(thd->get_transaction()->xid_state()->
1518            has_state(XID_STATE::XA_IDLE));
1519 
1520 err:
1521     gtid_state_commit_or_rollback(thd, need_clear_owned_gtid, !gtid_error);
1522   }
1523   DBUG_RETURN(error);
1524 }
1525 
1526 /**
1527   Check if we can skip the two-phase commit.
1528 
1529   A helper function to evaluate if two-phase commit is mandatory.
1530   As a side effect, propagates the read-only/read-write flags
1531   of the statement transaction to its enclosing normal transaction.
1532 
1533   If we have at least two engines with read-write changes we must
1534   run a two-phase commit. Otherwise we can run several independent
1535   commits as the only transactional engine has read-write changes
1536   and others are read-only.
1537 
1538   @retval   0   All engines are read-only.
1539   @retval   1   We have the only engine with read-write changes.
1540   @retval   >1  More than one engine have read-write changes.
1541                 Note: return value might NOT be the exact number of
1542                 engines with read-write changes.
1543 */
1544 
1545 static
1546 uint
ha_check_and_coalesce_trx_read_only(THD * thd,Ha_trx_info * ha_list,bool all)1547 ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
1548                                     bool all)
1549 {
1550   /* The number of storage engines that have actual changes. */
1551   unsigned rw_ha_count= 0;
1552   Ha_trx_info *ha_info;
1553 
1554   for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
1555   {
1556     if (ha_info->is_trx_read_write())
1557       ++rw_ha_count;
1558 
1559     if (! all)
1560     {
1561       Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
1562       assert(ha_info != ha_info_all);
1563       /*
1564         Merge read-only/read-write information about statement
1565         transaction to its enclosing normal transaction. Do this
1566         only if in a real transaction -- that is, if we know
1567         that ha_info_all is registered in thd->transaction.all.
1568         Since otherwise we only clutter the normal transaction flags.
1569       */
1570       if (ha_info_all->is_started()) /* FALSE if autocommit. */
1571         ha_info_all->coalesce_trx_with(ha_info);
1572     }
1573     else if (rw_ha_count > 1)
1574     {
1575       /*
1576         It is a normal transaction, so we don't need to merge read/write
1577         information up, and the need for two-phase commit has been
1578         already established. Break the loop prematurely.
1579       */
1580       break;
1581     }
1582   }
1583   return rw_ha_count;
1584 }
1585 
1586 
1587 /**
1588   The function computes condition to call gtid persistor wrapper,
1589   and executes it.
1590   It is invoked at committing a statement or transaction, including XA,
1591   and also at XA prepare handling.
1592 
1593   @param thd  Thread context.
1594   @param all  The execution scope, true for the transaction one, false
1595               for the statement one.
1596   @param[out] need_clear_owned_gtid_ptr
1597               A pointer to bool variable to return the computed decision
1598               value.
1599   @return zero as no error indication, non-zero otherwise
1600 */
1601 
commit_owned_gtids(THD * thd,bool all,bool * need_clear_owned_gtid_ptr)1602 int commit_owned_gtids(THD *thd, bool all, bool *need_clear_owned_gtid_ptr)
1603 {
1604   DBUG_ENTER("commit_owned_gtids(...)");
1605   int error= 0;
1606 
1607   if ((!opt_bin_log || (thd->slave_thread && !opt_log_slave_updates)) &&
1608       (all || !thd->in_multi_stmt_transaction_mode()) &&
1609       !thd->is_operating_gtid_table_implicitly &&
1610       !thd->is_operating_substatement_implicitly)
1611   {
1612     /*
1613       If the binary log is disabled for this thread (either by
1614       log_bin=0 or sql_log_bin=0 or by log_slave_updates=0 for a
1615       slave thread), then the statement will not be written to
1616       the binary log. In this case, we should save its GTID into
1617       mysql.gtid_executed table and @@GLOBAL.GTID_EXECUTED as it
1618       did when binlog is enabled.
1619     */
1620     if (thd->owned_gtid.sidno > 0)
1621     {
1622       error= gtid_state->save(thd);
1623       *need_clear_owned_gtid_ptr= true;
1624     }
1625     else if (thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS)
1626       *need_clear_owned_gtid_ptr= true;
1627   }
1628   else
1629   {
1630     *need_clear_owned_gtid_ptr= false;
1631   }
1632 
1633   DBUG_RETURN(error);
1634 }
1635 
1636 
1637 /**
1638   The function is a wrapper of commit_owned_gtids(...). It is invoked
1639   at committing a partially failed statement or transaction.
1640 
1641   @param thd  Thread context.
1642 
1643   @retval -1 if error when persisting owned gtid.
1644   @retval 0 if succeed to commit owned gtid.
1645   @retval 1 if do not meet conditions to commit owned gtid.
1646 */
commit_owned_gtid_by_partial_command(THD * thd)1647 int commit_owned_gtid_by_partial_command(THD *thd)
1648 {
1649   DBUG_ENTER("commit_owned_gtid_by_partial_command(THD *thd)");
1650   bool need_clear_owned_gtid_ptr= false;
1651   int ret= 0;
1652 
1653   if (commit_owned_gtids(thd, true, &need_clear_owned_gtid_ptr))
1654   {
1655     /* Error when saving gtid into mysql.gtid_executed table. */
1656     gtid_state->update_on_rollback(thd);
1657     ret= -1;
1658   }
1659   else if (need_clear_owned_gtid_ptr)
1660   {
1661     gtid_state->update_on_commit(thd);
1662     ret= 0;
1663   }
1664   else
1665   {
1666     ret= 1;
1667   }
1668 
1669   DBUG_RETURN(ret);
1670 }
1671 
1672 
1673 /**
1674   @param[in] ignore_global_read_lock   Allow commit to complete even if a
1675                                        global read lock is active. This can be
1676                                        used to allow changes to internal tables
1677                                        (e.g. slave status tables).
1678 
1679   @retval
1680     0   ok
1681   @retval
1682     1   transaction was rolled back
1683   @retval
1684     2   error during commit, data may be inconsistent
1685 
1686   @todo
1687     Since we don't support nested statement transactions in 5.0,
1688     we can't commit or rollback stmt transactions while we are inside
1689     stored functions or triggers. So we simply do nothing now.
1690     TODO: This should be fixed in later ( >= 5.1) releases.
1691 */
1692 
ha_commit_trans(THD * thd,bool all,bool ignore_global_read_lock)1693 int ha_commit_trans(THD *thd, bool all, bool ignore_global_read_lock)
1694 {
1695   int error= 0;
1696   bool need_clear_owned_gtid= false;
1697   /*
1698     Save transaction owned gtid into table before transaction prepare
1699     if binlog is disabled, or binlog is enabled and log_slave_updates
1700     is disabled with slave SQL thread or slave worker thread.
1701   */
1702   error= commit_owned_gtids(thd, all, &need_clear_owned_gtid);
1703 
1704   /*
1705     'all' means that this is either an explicit commit issued by
1706     user, or an implicit commit issued by a DDL.
1707   */
1708   Transaction_ctx *trn_ctx= thd->get_transaction();
1709   Transaction_ctx::enum_trx_scope trx_scope=
1710     all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1711 
1712   /*
1713     "real" is a nick name for a transaction for which a commit will
1714     make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1715     transation is not 'real': even though it's possible to commit it,
1716     the changes are not durable as they might be rolled back if the
1717     enclosing 'all' transaction is rolled back.
1718   */
1719   bool is_real_trans=
1720     all || !trn_ctx->is_active(Transaction_ctx::SESSION);
1721 
1722   Ha_trx_info *ha_info= trn_ctx->ha_trx_info(trx_scope);
1723   XID_STATE *xid_state= trn_ctx->xid_state();
1724 
1725   DBUG_ENTER("ha_commit_trans");
1726 
1727   DBUG_PRINT("info", ("all=%d thd->in_sub_stmt=%d ha_info=%p is_real_trans=%d",
1728                       all, thd->in_sub_stmt, ha_info, is_real_trans));
1729   /*
1730     We must not commit the normal transaction if a statement
1731     transaction is pending. Otherwise statement transaction
1732     flags will not get propagated to its normal transaction's
1733     counterpart.
1734   */
1735   assert(!trn_ctx->is_active(Transaction_ctx::STMT) ||
1736          !all);
1737 
1738   if (thd->in_sub_stmt)
1739   {
1740     assert(0);
1741     /*
1742       Since we don't support nested statement transactions in 5.0,
1743       we can't commit or rollback stmt transactions while we are inside
1744       stored functions or triggers. So we simply do nothing now.
1745       TODO: This should be fixed in later ( >= 5.1) releases.
1746     */
1747     if (!all)
1748       DBUG_RETURN(0);
1749     /*
1750       We assume that all statements which commit or rollback main transaction
1751       are prohibited inside of stored functions or triggers. So they should
1752       bail out with error even before ha_commit_trans() call. To be 100% safe
1753       let us throw error in non-debug builds.
1754     */
1755     my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1756     DBUG_RETURN(2);
1757   }
1758 
1759   MDL_request mdl_request;
1760   bool release_mdl= false;
1761   if (ha_info)
1762   {
1763     uint rw_ha_count;
1764     bool rw_trans;
1765 
1766     DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
1767 
1768     rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1769     trn_ctx->set_rw_ha_count(trx_scope, rw_ha_count);
1770     /* rw_trans is TRUE when we in a transaction changing data */
1771     rw_trans= is_real_trans && (rw_ha_count > 0);
1772 
1773     DBUG_EXECUTE_IF("dbug.enabled_commit",
1774                     {
1775                       const char act[]= "now signal Reached wait_for signal.commit_continue";
1776                       assert(!debug_sync_set_action(current_thd,
1777                                                     STRING_WITH_LEN(act)));
1778                     };);
1779     if (rw_trans && !ignore_global_read_lock)
1780     {
1781       /*
1782         Acquire a metadata lock which will ensure that COMMIT is blocked
1783         by an active FLUSH TABLES WITH READ LOCK (and vice versa:
1784         COMMIT in progress blocks FTWRL).
1785 
1786         We allow the owner of FTWRL to COMMIT; we assume that it knows
1787         what it does.
1788       */
1789       MDL_REQUEST_INIT(&mdl_request,
1790                        MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE,
1791                        MDL_EXPLICIT);
1792 
1793       DBUG_PRINT("debug", ("Acquire MDL commit lock"));
1794 #ifdef WITH_WSREP
1795       if (!WSREP(thd) &&
1796           thd->mdl_context.acquire_lock(&mdl_request,
1797 #else
1798       if (thd->mdl_context.acquire_lock(&mdl_request,
1799 #endif /* WITH_WSREP */
1800                                         thd->variables.lock_wait_timeout))
1801       {
1802         ha_rollback_trans(thd, all);
1803         DBUG_RETURN(1);
1804       }
1805       release_mdl= true;
1806 
1807       DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
1808     }
1809 
1810     if (rw_trans && (stmt_has_updated_trans_table(ha_info)
1811         || trans_has_noop_dml(ha_info)) && check_readonly(thd, true))
1812     {
1813       ha_rollback_trans(thd, all);
1814       error= 1;
1815       goto end;
1816     }
1817 
1818 #ifdef WITH_WSREP
1819     if ((!trn_ctx->no_2pc(trx_scope) && (trn_ctx->rw_ha_count(trx_scope) > 1)) ||
1820         (WSREP(thd) && thd->lex->sql_command == SQLCOM_CREATE_TABLE &&
1821          !trans_has_updated_trans_table(thd)))
1822     {
1823       WSREP_DEBUG("handler prepare for CTAS");
1824 #else
1825     if (!trn_ctx->no_2pc(trx_scope) && (trn_ctx->rw_ha_count(trx_scope) > 1))
1826 #endif /* WITH_WSREP */
1827       error= tc_log->prepare(thd, all);
1828 #ifdef WITH_WSREP
1829     }
1830 #endif /* WITH_WSREP */
1831   }
1832   /*
1833     The state of XA transaction is changed to Prepared, intermediately.
1834     It's going to change to the regular NOTR at the end.
1835     The fact of the Prepared state is of interest to binary logger.
1836   */
1837   if (!error && all && xid_state->has_state(XID_STATE::XA_IDLE))
1838   {
1839     assert(thd->lex->sql_command == SQLCOM_XA_COMMIT &&
1840            static_cast<Sql_cmd_xa_commit*>(thd->lex->m_sql_cmd)->
1841            get_xa_opt() == XA_ONE_PHASE);
1842 
1843     xid_state->set_state(XID_STATE::XA_PREPARED);
1844   }
1845 #ifdef WITH_WSREP
1846   DEBUG_SYNC(thd, "wsrep_before_commit");
1847 #endif /* WITH_WSREP */
1848   if (error || (error= tc_log->commit(thd, all)))
1849   {
1850     ha_rollback_trans(thd, all);
1851     error= 1;
1852     goto end;
1853   }
1854 /*
1855   Mark multi-statement (any autocommit mode) or single-statement
1856   (autocommit=1) transaction as rolled back
1857 */
1858 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
1859   if (is_real_trans && thd->m_transaction_psi != NULL)
1860   {
1861     MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
1862     thd->m_transaction_psi= NULL;
1863   }
1864 #endif
1865   DBUG_EXECUTE_IF("crash_commit_after",
1866                   if (!thd->is_operating_gtid_table_implicitly)
1867                     DBUG_SUICIDE(););
1868 end:
1869   if (release_mdl && mdl_request.ticket)
1870   {
1871     /*
1872       We do not always immediately release transactional locks
1873       after ha_commit_trans() (see uses of ha_enable_transaction()),
1874       thus we release the commit blocker lock as soon as it's
1875       not needed.
1876     */
1877     DBUG_PRINT("debug", ("Releasing MDL commit lock"));
1878     thd->mdl_context.release_lock(mdl_request.ticket);
1879   }
1880   /* Free resources and perform other cleanup even for 'empty' transactions. */
1881   if (is_real_trans)
1882   {
1883     trn_ctx->cleanup();
1884     thd->tx_priority= 0;
1885   }
1886 
1887   if (need_clear_owned_gtid)
1888   {
1889     thd->server_status&= ~SERVER_STATUS_IN_TRANS;
1890     /*
1891       Release the owned GTID when binlog is disabled, or binlog is
1892       enabled and log_slave_updates is disabled with slave SQL thread
1893       or slave worker thread.
1894     */
1895     if (error)
1896       gtid_state->update_on_rollback(thd);
1897     else
1898       gtid_state->update_on_commit(thd);
1899   }
1900 
1901   DBUG_RETURN(error);
1902 }
1903 
1904 /**
1905   Commit the sessions outstanding transaction.
1906 
1907   @pre thd->transaction.flags.commit_low == true
1908   @post thd->transaction.flags.commit_low == false
1909 
1910   @note This function does not care about global read lock; the caller
1911   should.
1912 
1913   @param[in]  all  Is set in case of explicit commit
1914                    (COMMIT statement), or implicit commit
1915                    issued by DDL. Is not set when called
1916                    at the end of statement, even if
1917                    autocommit=1.
1918   @param[in]  run_after_commit
1919                    True by default, otherwise, does not execute
1920                    the after_commit hook in the function.
1921 */
1922 
ha_commit_low(THD * thd,bool all,bool run_after_commit)1923 int ha_commit_low(THD *thd, bool all, bool run_after_commit)
1924 {
1925   int error=0;
1926   Transaction_ctx *trn_ctx= thd->get_transaction();
1927   Transaction_ctx::enum_trx_scope trx_scope=
1928     all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1929   Ha_trx_info *ha_info= trn_ctx->ha_trx_info(trx_scope), *ha_info_next;
1930 
1931   DBUG_ENTER("ha_commit_low");
1932 #ifdef WITH_WSREP
1933 #ifdef WSREP_PROC_INFO
1934   char info[64]= { 0, };
1935   snprintf (info, sizeof(info) - 1, "ha_commit_one_phase(%lld)",
1936             (long long)wsrep_thd_trx_seqno(thd));
1937 #else
1938   const char info[]="ha_commit_one_phase()";
1939 #endif /* WSREP_PROC_INFO */
1940   char* tmp_info= NULL;
1941   if (WSREP(thd)) tmp_info= (char *)thd_proc_info(thd, info);
1942 #endif /* WITH_WSREP */
1943 
1944   if (ha_info)
1945   {
1946     bool restore_backup_ha_data= false;
1947     /*
1948       At execution of XA COMMIT ONE PHASE binlog or slave applier
1949       reattaches the engine ha_data to THD, previously saved at XA START.
1950     */
1951     if (all && thd->rpl_unflag_detached_engine_ha_data())
1952     {
1953       assert(thd->lex->sql_command == SQLCOM_XA_COMMIT);
1954       assert(static_cast<Sql_cmd_xa_commit*>(thd->lex->m_sql_cmd)->
1955              get_xa_opt() == XA_ONE_PHASE);
1956       restore_backup_ha_data= true;
1957     }
1958 
1959     for (; ha_info; ha_info= ha_info_next)
1960     {
1961       int err;
1962       handlerton *ht= ha_info->ht();
1963       if ((err= ht->commit(ht, thd, all)))
1964       {
1965         my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1966         error=1;
1967       }
1968       assert(!thd->status_var_aggregated);
1969       thd->status_var.ha_commit_count++;
1970       ha_info_next= ha_info->next();
1971       if (restore_backup_ha_data)
1972         reattach_engine_ha_data_to_thd(thd, ht);
1973       ha_info->reset(); /* keep it conveniently zero-filled */
1974     }
1975     trn_ctx->reset_scope(trx_scope);
1976     if (all)
1977     {
1978       trn_ctx->invalidate_changed_tables_in_cache();
1979     }
1980   }
1981   /* Free resources and perform other cleanup even for 'empty' transactions. */
1982   if (all)
1983     trn_ctx->cleanup();
1984 #ifdef WITH_WSREP
1985   if (WSREP(thd)) thd_proc_info(thd, tmp_info);
1986 #endif /* WITH_WSREP */
1987   /*
1988     When the transaction has been committed, we clear the commit_low
1989     flag. This allow other parts of the system to check if commit_low
1990     was called.
1991   */
1992   trn_ctx->m_flags.commit_low= false;
1993   if (run_after_commit && thd->get_transaction()->m_flags.run_hooks)
1994   {
1995     /*
1996        If commit succeeded, we call the after_commit hook.
1997 
1998        TODO: Investigate if this can be refactored so that there is
1999              only one invocation of this hook in the code (in
2000              MYSQL_LOG_BIN::finish_commit).
2001     */
2002     if (!error)
2003       (void) RUN_HOOK(transaction, after_commit, (thd, all));
2004     trn_ctx->m_flags.run_hooks= false;
2005   }
2006   DBUG_RETURN(error);
2007 }
2008 
2009 
ha_rollback_low(THD * thd,bool all)2010 int ha_rollback_low(THD *thd, bool all)
2011 {
2012   Transaction_ctx *trn_ctx= thd->get_transaction();
2013   int error= 0;
2014   Transaction_ctx::enum_trx_scope trx_scope=
2015     all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2016   Ha_trx_info *ha_info= trn_ctx->ha_trx_info(trx_scope), *ha_info_next;
2017 
2018   (void) RUN_HOOK(transaction, before_rollback, (thd, all));
2019 
2020   if (ha_info)
2021   {
2022     bool restore_backup_ha_data= false;
2023     /*
2024       Similarly to the commit case, the binlog or slave applier
2025       reattaches the engine ha_data to THD.
2026     */
2027     if (all && thd->rpl_unflag_detached_engine_ha_data())
2028     {
2029       assert(trn_ctx->xid_state()->get_state() != XID_STATE::XA_NOTR ||
2030              thd->killed == THD::KILL_CONNECTION);
2031 
2032       restore_backup_ha_data= true;
2033     }
2034 
2035     for (; ha_info; ha_info= ha_info_next)
2036     {
2037       int err;
2038       handlerton *ht= ha_info->ht();
2039       if ((err= ht->rollback(ht, thd, all)))
2040       { // cannot happen
2041 #ifdef WITH_WSREP
2042         WSREP_INFO("rollback failed for ht: %d, conf: %d SQL %s",
2043                    ht->db_type, thd->wsrep_conflict_state, thd->query().str);
2044         Diagnostics_area *da= thd->get_stmt_da();
2045         if (da)
2046         {
2047           WSREP_INFO("stmt DA %d %s",
2048                      da->status(), (da->is_error()) ? da->message_text() : "void");
2049         }
2050 #endif /* WITH_WSREP */
2051         my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2052         error= 1;
2053       }
2054       assert(!thd->status_var_aggregated);
2055       thd->status_var.ha_rollback_count++;
2056       ha_info_next= ha_info->next();
2057       if (restore_backup_ha_data)
2058         reattach_engine_ha_data_to_thd(thd, ht);
2059       ha_info->reset(); /* keep it conveniently zero-filled */
2060     }
2061     trn_ctx->reset_scope(trx_scope);
2062   }
2063 
2064   /*
2065     Thanks to possibility of MDL deadlock rollback request can come even if
2066     transaction hasn't been started in any transactional storage engine.
2067 
2068     It is possible to have a call of ha_rollback_low() while handling
2069     failure from ha_prepare() and an error in Daignostics_area still
2070     wasn't set. Therefore it is required to check that an error in
2071     Diagnostics_area is set before calling the method XID_STATE::set_error().
2072 
2073     If it wasn't done it would lead to failure of the assertion
2074     assert(m_status == DA_ERROR)
2075     in the method Diagnostics_area::mysql_errno().
2076 
2077     In case ha_prepare is failed and an error wasn't set in Diagnostics_area
2078     the error ER_XA_RBROLLBACK is set in the Diagnostics_area from
2079     the method Sql_cmd_xa_prepare::trans_xa_prepare() when non-zero result code
2080     returned by ha_prepare() is handled.
2081   */
2082   if (all && thd->transaction_rollback_request && thd->is_error())
2083     trn_ctx->xid_state()->set_error(thd);
2084 
2085   (void) RUN_HOOK(transaction, after_rollback, (thd, all));
2086   return error;
2087 }
2088 
2089 
ha_rollback_trans(THD * thd,bool all)2090 int ha_rollback_trans(THD *thd, bool all)
2091 {
2092   int error=0;
2093   Transaction_ctx *trn_ctx= thd->get_transaction();
2094   bool is_xa_rollback= trn_ctx->xid_state()->has_state(XID_STATE::XA_PREPARED);
2095 
2096   /*
2097     "real" is a nick name for a transaction for which a commit will
2098     make persistent changes. E.g. a 'stmt' transaction inside a 'all'
2099     transaction is not 'real': even though it's possible to commit it,
2100     the changes are not durable as they might be rolled back if the
2101     enclosing 'all' transaction is rolled back.
2102     We establish the value of 'is_real_trans' by checking
2103     if it's an explicit COMMIT or BEGIN statement, or implicit
2104     commit issued by DDL (in these cases all == TRUE),
2105     or if we're running in autocommit mode (it's only in the autocommit mode
2106     ha_commit_one_phase() is called with an empty
2107     transaction.all.ha_list, see why in trans_register_ha()).
2108   */
2109   bool is_real_trans=
2110     all || !trn_ctx->is_active(Transaction_ctx::SESSION);
2111 
2112   DBUG_ENTER("ha_rollback_trans");
2113 
2114   /*
2115     We must not rollback the normal transaction if a statement
2116     transaction is pending.
2117   */
2118   assert(!trn_ctx->is_active(Transaction_ctx::STMT) ||
2119          !all);
2120 
2121   if (thd->in_sub_stmt)
2122   {
2123     assert(0);
2124     /*
2125       If we are inside stored function or trigger we should not commit or
2126       rollback current statement transaction. See comment in ha_commit_trans()
2127       call for more information.
2128     */
2129     if (!all)
2130       DBUG_RETURN(0);
2131     my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
2132     DBUG_RETURN(1);
2133   }
2134 
2135   if (tc_log)
2136     error= tc_log->rollback(thd, all);
2137   /*
2138     Mark multi-statement (any autocommit mode) or single-statement
2139     (autocommit=1) transaction as rolled back
2140   */
2141 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2142   if (all || !thd->in_active_multi_stmt_transaction())
2143   {
2144     MYSQL_ROLLBACK_TRANSACTION(thd->m_transaction_psi);
2145     thd->m_transaction_psi= NULL;
2146   }
2147 #endif
2148 
2149   /* Always cleanup. Even if nht==0. There may be savepoints. */
2150   if (is_real_trans)
2151   {
2152     trn_ctx->cleanup();
2153     thd->tx_priority= 0;
2154   }
2155 
2156   if (all)
2157     thd->transaction_rollback_request= FALSE;
2158 
2159   /*
2160     Only call gtid_rollback(THD*), which will purge thd->owned_gtid, if
2161     complete transaction is being rollback or autocommit=1.
2162     Notice, XA rollback has just invoked update_on_commit() through
2163     tc_log->*rollback* stack.
2164   */
2165   if (is_real_trans && !is_xa_rollback)
2166     gtid_state->update_on_rollback(thd);
2167 
2168   /*
2169     If the transaction cannot be rolled back safely, warn; don't warn if this
2170     is a slave thread (because when a slave thread executes a ROLLBACK, it has
2171     been read from the binary log, so it's 100% sure and normal to produce
2172     error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
2173     slave SQL thread, it would not stop the thread but just be printed in
2174     the error log; but we don't want users to wonder why they have this
2175     message in the error log, so we don't send it.
2176   */
2177   if (is_real_trans &&
2178       trn_ctx->cannot_safely_rollback(
2179         Transaction_ctx::SESSION) &&
2180       !thd->slave_thread && thd->killed != THD::KILL_CONNECTION)
2181     trn_ctx->push_unsafe_rollback_warnings(thd);
2182 
2183   DBUG_RETURN(error);
2184 }
2185 
2186 
2187 /**
2188   Commit the attachable transaction in storage engines.
2189 
2190   @note This is slimmed down version of ha_commit_trans()/ha_commit_low()
2191         which commits attachable transaction but skips code which is
2192         unnecessary and unsafe for them (like dealing with GTIDs).
2193         Since attachable transactions are read-only their commit only
2194         needs to release resources and cleanup state in SE.
2195 
2196   @param thd     Current thread
2197 
2198   @retval 0      - Success
2199   @retval non-0  - Failure
2200 */
ha_commit_attachable(THD * thd)2201 int ha_commit_attachable(THD *thd)
2202 {
2203   int error= 0;
2204   Transaction_ctx *trn_ctx= thd->get_transaction();
2205   Ha_trx_info *ha_info= trn_ctx->ha_trx_info(Transaction_ctx::STMT);
2206   Ha_trx_info *ha_info_next;
2207 
2208   /* This function only handles attachable transactions. */
2209   assert(thd->is_attachable_ro_transaction_active());
2210   /*
2211     Since the attachable transaction is AUTOCOMMIT we only need
2212     to care about statement transaction.
2213   */
2214   assert(! trn_ctx->is_active(Transaction_ctx::SESSION));
2215 
2216   if (ha_info)
2217   {
2218     for (; ha_info; ha_info= ha_info_next)
2219     {
2220       /* Attachable transaction is not supposed to modify anything. */
2221       assert(! ha_info->is_trx_read_write());
2222 
2223       handlerton *ht= ha_info->ht();
2224       if (ht->commit(ht, thd, false))
2225       {
2226         /*
2227           In theory this should not happen since attachable transactions
2228           are read only and therefore commit is supposed to only release
2229           resources/cleanup state. Even if this happens we will simply
2230           continue committing attachable transaction in other SEs.
2231         */
2232         assert(false);
2233         error= 1;
2234       }
2235       assert(!thd->status_var_aggregated);
2236       thd->status_var.ha_commit_count++;
2237       ha_info_next= ha_info->next();
2238 
2239       ha_info->reset(); /* keep it conveniently zero-filled */
2240     }
2241     trn_ctx->reset_scope(Transaction_ctx::STMT);
2242   }
2243 
2244   /*
2245     Mark transaction as commited in PSI.
2246   */
2247 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2248   if (thd->m_transaction_psi != NULL)
2249   {
2250     MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
2251     thd->m_transaction_psi= NULL;
2252   }
2253 #endif
2254 
2255   /* Free resources and perform other cleanup even for 'empty' transactions. */
2256   trn_ctx->cleanup();
2257 
2258   return (error);
2259 }
2260 
2261 
2262 /**
2263   @details
2264   This function should be called when MySQL sends rows of a SELECT result set
2265   or the EOF mark to the client. It releases a possible adaptive hash index
2266   S-latch held by thd in InnoDB and also releases a possible InnoDB query
2267   FIFO ticket to enter InnoDB. To save CPU time, InnoDB allows a thd to
2268   keep them over several calls of the InnoDB handler interface when a join
2269   is executed. But when we let the control to pass to the client they have
2270   to be released because if the application program uses mysql_use_result(),
2271   it may deadlock on the S-latch if the application on another connection
2272   performs another SQL query. In MySQL-4.1 this is even more important because
2273   there a connection can have several SELECT queries open at the same time.
2274 
2275   @param thd           the thread handle of the current connection
2276 
2277   @return
2278     always 0
2279 */
2280 
ha_release_temporary_latches(THD * thd)2281 int ha_release_temporary_latches(THD *thd)
2282 {
2283   const Ha_trx_info *info;
2284   Transaction_ctx *trn_ctx= thd->get_transaction();
2285 
2286   /*
2287     Note that below we assume that only transactional storage engines
2288     may need release_temporary_latches(). If this will ever become false,
2289     we could iterate on thd->open_tables instead (and remove duplicates
2290     as if (!seen[hton->slot]) { seen[hton->slot]=1; ... }).
2291   */
2292   for (info= trn_ctx->ha_trx_info(Transaction_ctx::STMT);
2293        info; info= info->next())
2294   {
2295     handlerton *hton= info->ht();
2296     if (hton && hton->release_temporary_latches)
2297         hton->release_temporary_latches(hton, thd);
2298   }
2299   return 0;
2300 }
2301 
2302 /**
2303   Check if all storage engines used in transaction agree that after
2304   rollback to savepoint it is safe to release MDL locks acquired after
2305   savepoint creation.
2306 
2307   @param thd   The client thread that executes the transaction.
2308 
2309   @return true  - It is safe to release MDL locks.
2310           false - If it is not.
2311 */
ha_rollback_to_savepoint_can_release_mdl(THD * thd)2312 bool ha_rollback_to_savepoint_can_release_mdl(THD *thd)
2313 {
2314   Ha_trx_info *ha_info;
2315   Transaction_ctx *trn_ctx= thd->get_transaction();
2316   Transaction_ctx::enum_trx_scope trx_scope=
2317     thd->in_sub_stmt ? Transaction_ctx::STMT : Transaction_ctx::SESSION;
2318 
2319   DBUG_ENTER("ha_rollback_to_savepoint_can_release_mdl");
2320 
2321   /**
2322     Checking whether it is safe to release metadata locks after rollback to
2323     savepoint in all the storage engines that are part of the transaction.
2324   */
2325   for (ha_info= trn_ctx->ha_trx_info(trx_scope);
2326        ha_info; ha_info= ha_info->next())
2327   {
2328     handlerton *ht= ha_info->ht();
2329     assert(ht);
2330 
2331     if (ht->savepoint_rollback_can_release_mdl == 0 ||
2332         ht->savepoint_rollback_can_release_mdl(ht, thd) == false)
2333       DBUG_RETURN(false);
2334   }
2335 
2336   DBUG_RETURN(true);
2337 }
2338 
ha_rollback_to_savepoint(THD * thd,SAVEPOINT * sv)2339 int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
2340 {
2341   int error=0;
2342   Transaction_ctx *trn_ctx= thd->get_transaction();
2343   Transaction_ctx::enum_trx_scope trx_scope=
2344     !thd->in_sub_stmt ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2345 
2346   Ha_trx_info *ha_info, *ha_info_next;
2347 
2348   DBUG_ENTER("ha_rollback_to_savepoint");
2349 
2350   trn_ctx->set_rw_ha_count(trx_scope, 0);
2351   trn_ctx->set_no_2pc(trx_scope, 0);
2352   /*
2353     rolling back to savepoint in all storage engines that were part of the
2354     transaction when the savepoint was set
2355   */
2356   for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
2357   {
2358     int err;
2359     handlerton *ht= ha_info->ht();
2360     assert(ht);
2361     assert(ht->savepoint_set != 0);
2362     if ((err= ht->savepoint_rollback(ht, thd,
2363                                      (uchar *)(sv+1)+ht->savepoint_offset)))
2364     { // cannot happen
2365       my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2366       error=1;
2367     }
2368     assert(!thd->status_var_aggregated);
2369     thd->status_var.ha_savepoint_rollback_count++;
2370     if (ht->prepare == 0)
2371       trn_ctx->set_no_2pc(trx_scope, true);
2372   }
2373 
2374   /*
2375     rolling back the transaction in all storage engines that were not part of
2376     the transaction when the savepoint was set
2377   */
2378   for (ha_info= trn_ctx->ha_trx_info(trx_scope); ha_info != sv->ha_list;
2379        ha_info= ha_info_next)
2380   {
2381     int err;
2382     handlerton *ht= ha_info->ht();
2383     if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
2384     { // cannot happen
2385       my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2386       error=1;
2387     }
2388     assert(!thd->status_var_aggregated);
2389     thd->status_var.ha_rollback_count++;
2390     ha_info_next= ha_info->next();
2391     ha_info->reset(); /* keep it conveniently zero-filled */
2392   }
2393   trn_ctx->set_ha_trx_info(trx_scope, sv->ha_list);
2394 
2395 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2396   if (thd->m_transaction_psi != NULL)
2397     MYSQL_INC_TRANSACTION_ROLLBACK_TO_SAVEPOINT(thd->m_transaction_psi, 1);
2398 #endif
2399 
2400   DBUG_RETURN(error);
2401 }
2402 
ha_prepare_low(THD * thd,bool all)2403 int ha_prepare_low(THD *thd, bool all)
2404 {
2405   int error= 0;
2406   Transaction_ctx::enum_trx_scope trx_scope=
2407     all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2408   Ha_trx_info *ha_info= thd->get_transaction()->ha_trx_info(trx_scope);
2409 
2410   DBUG_ENTER("ha_prepare_low");
2411 
2412   if (ha_info)
2413   {
2414     for (; ha_info && !error; ha_info= ha_info->next())
2415     {
2416       int err= 0;
2417       handlerton *ht= ha_info->ht();
2418       /*
2419         Do not call two-phase commit if this particular
2420         transaction is read-only. This allows for simpler
2421         implementation in engines that are always read-only.
2422       */
2423       if (!ha_info->is_trx_read_write())
2424         continue;
2425       if ((err= ht->prepare(ht, thd, all)))
2426       {
2427 #ifdef WITH_WSREP
2428 	if (WSREP(thd) && ht->db_type== DB_TYPE_WSREP)
2429         {
2430 	  error= 1;
2431 	  switch (err)
2432           {
2433 	  case WSREP_TRX_SIZE_EXCEEDED:
2434 	    /* give user size exeeded erro from wsrep_api.h */
2435 	    my_error(ER_ERROR_DURING_COMMIT, MYF(0), WSREP_SIZE_EXCEEDED);
2436 	    break;
2437 	  case WSREP_TRX_CERT_FAIL:
2438 	  case WSREP_TRX_ERROR:
2439 	    /* avoid sending error, if we need to replay */
2440 	    if (thd->wsrep_conflict_state!= MUST_REPLAY)
2441             {
2442 	      my_error(ER_LOCK_DEADLOCK, MYF(0), err);
2443 	    }
2444 	  }
2445 	}
2446 
2447         else
2448         {
2449           /* not wsrep hton, bail to native mysql behavior */
2450 #endif
2451         my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
2452         error= 1;
2453 #ifdef WITH_WSREP
2454         }
2455 #endif
2456       }
2457       assert(!thd->status_var_aggregated);
2458       thd->status_var.ha_prepare_count++;
2459     }
2460     DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
2461   }
2462 
2463   DBUG_RETURN(error);
2464 }
2465 
2466 /**
2467   @note
2468   according to the sql standard (ISO/IEC 9075-2:2003)
2469   section "4.33.4 SQL-statements and transaction states",
2470   SAVEPOINT is *not* transaction-initiating SQL-statement
2471 */
ha_savepoint(THD * thd,SAVEPOINT * sv)2472 int ha_savepoint(THD *thd, SAVEPOINT *sv)
2473 {
2474   int error=0;
2475   Transaction_ctx::enum_trx_scope trx_scope=
2476     !thd->in_sub_stmt ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2477   Ha_trx_info *ha_info= thd->get_transaction()->ha_trx_info(trx_scope);
2478   Ha_trx_info *begin_ha_info= ha_info;
2479 
2480   DBUG_ENTER("ha_savepoint");
2481 
2482   for (; ha_info; ha_info= ha_info->next())
2483   {
2484     int err;
2485     handlerton *ht= ha_info->ht();
2486     assert(ht);
2487     if (! ht->savepoint_set)
2488     {
2489       my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
2490       error=1;
2491       break;
2492     }
2493     if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
2494     { // cannot happen
2495       my_error(ER_GET_ERRNO, MYF(0), err);
2496       error=1;
2497     }
2498     assert(!thd->status_var_aggregated);
2499     thd->status_var.ha_savepoint_count++;
2500   }
2501   /*
2502     Remember the list of registered storage engines. All new
2503     engines are prepended to the beginning of the list.
2504   */
2505   sv->ha_list= begin_ha_info;
2506 
2507 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2508   if (!error && thd->m_transaction_psi != NULL)
2509     MYSQL_INC_TRANSACTION_SAVEPOINTS(thd->m_transaction_psi, 1);
2510 #endif
2511 
2512   DBUG_RETURN(error);
2513 }
2514 
ha_release_savepoint(THD * thd,SAVEPOINT * sv)2515 int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
2516 {
2517   int error=0;
2518   Ha_trx_info *ha_info= sv->ha_list;
2519   DBUG_ENTER("ha_release_savepoint");
2520 
2521   for (; ha_info; ha_info= ha_info->next())
2522   {
2523     int err;
2524     handlerton *ht= ha_info->ht();
2525     /* Savepoint life time is enclosed into transaction life time. */
2526     assert(ht);
2527     if (!ht->savepoint_release)
2528       continue;
2529     if ((err= ht->savepoint_release(ht, thd,
2530                                     (uchar *)(sv+1) + ht->savepoint_offset)))
2531     { // cannot happen
2532       my_error(ER_GET_ERRNO, MYF(0), err);
2533       error=1;
2534     }
2535   }
2536 
2537 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2538   if (thd->m_transaction_psi != NULL)
2539     MYSQL_INC_TRANSACTION_RELEASE_SAVEPOINT(thd->m_transaction_psi, 1);
2540 #endif
2541   DBUG_RETURN(error);
2542 }
2543 
2544 
snapshot_handlerton(THD * thd,plugin_ref plugin,void * arg)2545 static my_bool snapshot_handlerton(THD *thd, plugin_ref plugin,
2546                                    void *arg)
2547 {
2548   handlerton *hton= plugin_data<handlerton*>(plugin);
2549   if (hton->state == SHOW_OPTION_YES &&
2550       hton->start_consistent_snapshot)
2551   {
2552     hton->start_consistent_snapshot(hton, thd);
2553     *((bool *)arg)= false;
2554   }
2555   return FALSE;
2556 }
2557 
ha_start_consistent_snapshot(THD * thd)2558 int ha_start_consistent_snapshot(THD *thd)
2559 {
2560   bool warn= true;
2561 
2562   plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
2563 
2564   /*
2565     Same idea as when one wants to CREATE TABLE in one engine which does not
2566     exist:
2567   */
2568   if (warn)
2569     push_warning(thd, Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR,
2570                  "This MySQL server does not support any "
2571                  "consistent-read capable storage engine");
2572   return 0;
2573 }
2574 
2575 
flush_handlerton(THD * thd,plugin_ref plugin,void * arg)2576 static my_bool flush_handlerton(THD *thd, plugin_ref plugin,
2577                                 void *arg)
2578 {
2579   handlerton *hton= plugin_data<handlerton*>(plugin);
2580   if (hton->state == SHOW_OPTION_YES && hton->flush_logs &&
2581       hton->flush_logs(hton, *(static_cast<bool *>(arg))))
2582     return TRUE;
2583   return FALSE;
2584 }
2585 
2586 
ha_flush_logs(handlerton * db_type,bool binlog_group_flush)2587 bool ha_flush_logs(handlerton *db_type, bool binlog_group_flush)
2588 {
2589   if (db_type == NULL)
2590   {
2591     if (plugin_foreach(NULL, flush_handlerton,
2592                        MYSQL_STORAGE_ENGINE_PLUGIN,
2593                        static_cast<void *>(&binlog_group_flush)))
2594       return TRUE;
2595   }
2596   else
2597   {
2598     if (db_type->state != SHOW_OPTION_YES ||
2599         (db_type->flush_logs &&
2600          db_type->flush_logs(db_type, binlog_group_flush)))
2601       return TRUE;
2602   }
2603   return FALSE;
2604 }
2605 
2606 
2607 /**
2608   @brief make canonical filename
2609 
2610   @param[in]  file     table handler
2611   @param[in]  path     original path
2612   @param[out] tmp_path buffer for canonized path
2613 
2614   @details Lower case db name and table name path parts for
2615            non file based tables when lower_case_table_names
2616            is 2 (store as is, compare in lower case).
2617            Filesystem path prefix (mysql_data_home or tmpdir)
2618            is left intact.
2619 
2620   @note tmp_path may be left intact if no conversion was
2621         performed.
2622 
2623   @retval canonized path
2624 
2625   @todo This may be done more efficiently when table path
2626         gets built. Convert this function to something like
2627         ASSERT_CANONICAL_FILENAME.
2628 */
get_canonical_filename(handler * file,const char * path,char * tmp_path)2629 const char *get_canonical_filename(handler *file, const char *path,
2630                                    char *tmp_path)
2631 {
2632   uint i;
2633   if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
2634     return path;
2635 
2636   for (i= 0; i <= mysql_tmpdir_list.max; i++)
2637   {
2638     if (is_prefix(path, mysql_tmpdir_list.list[i]))
2639       return path;
2640   }
2641 
2642   /* Ensure that table handler get path in lower case */
2643   if (tmp_path != path)
2644     my_stpcpy(tmp_path, path);
2645 
2646   /*
2647     we only should turn into lowercase database/table part
2648     so start the process after homedirectory
2649   */
2650   my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
2651   return tmp_path;
2652 }
2653 
2654 
2655 class Ha_delete_table_error_handler: public Internal_error_handler
2656 {
2657 public:
handle_condition(THD * thd,uint sql_errno,const char * sqlstate,Sql_condition::enum_severity_level * level,const char * msg)2658   virtual bool handle_condition(THD *thd,
2659                                 uint sql_errno,
2660                                 const char* sqlstate,
2661                                 Sql_condition::enum_severity_level *level,
2662                                 const char* msg)
2663   {
2664     /* Downgrade errors to warnings. */
2665     if (*level == Sql_condition::SL_ERROR)
2666       *level= Sql_condition::SL_WARNING;
2667     return false;
2668   }
2669 };
2670 
2671 
2672 /** @brief
2673   This should return ENOENT if the file doesn't exists.
2674   The .frm file will be deleted only if we return 0 or ENOENT
2675 */
ha_delete_table(THD * thd,handlerton * table_type,const char * path,const char * db,const char * alias,bool generate_warning)2676 int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
2677                     const char *db, const char *alias, bool generate_warning)
2678 {
2679   handler *file;
2680   char tmp_path[FN_REFLEN];
2681   int error;
2682   TABLE dummy_table;
2683   TABLE_SHARE dummy_share;
2684   DBUG_ENTER("ha_delete_table");
2685 
2686   dummy_table.s= &dummy_share;
2687 
2688   /* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */
2689   if (table_type == NULL ||
2690       ! (file=get_new_handler((TABLE_SHARE*)0, thd->mem_root, table_type)))
2691     DBUG_RETURN(ENOENT);
2692 
2693   path= get_canonical_filename(file, path, tmp_path);
2694   if ((error= file->ha_delete_table(path)) && generate_warning)
2695   {
2696     /*
2697       Because file->print_error() use my_error() to generate the error message
2698       we use an internal error handler to intercept it and store the text
2699       in a temporary buffer. Later the message will be presented to user
2700       as a warning.
2701     */
2702     Ha_delete_table_error_handler ha_delete_table_error_handler;
2703 
2704     /* Fill up strucutures that print_error may need */
2705     dummy_share.path.str= (char*) path;
2706     dummy_share.path.length= strlen(path);
2707     dummy_share.db.str= (char*) db;
2708     dummy_share.db.length= strlen(db);
2709     dummy_share.table_name.str= (char*) alias;
2710     dummy_share.table_name.length= strlen(alias);
2711     dummy_table.alias= alias;
2712 
2713     file->change_table_ptr(&dummy_table, &dummy_share);
2714 
2715     /*
2716       XXX: should we convert *all* errors to warnings here?
2717       What if the error is fatal?
2718     */
2719     thd->push_internal_handler(&ha_delete_table_error_handler);
2720     file->print_error(error, 0);
2721 
2722     thd->pop_internal_handler();
2723   }
2724   delete file;
2725 
2726 #ifdef HAVE_PSI_TABLE_INTERFACE
2727   if (likely(error == 0))
2728   {
2729     /* Table share not available, so check path for temp_table prefix. */
2730     bool temp_table= (strstr(path, tmp_file_prefix) != NULL);
2731     PSI_TABLE_CALL(drop_table_share)
2732       (temp_table, db, strlen(db), alias, strlen(alias));
2733   }
2734 #endif
2735 
2736   DBUG_RETURN(error);
2737 }
2738 
2739 /****************************************************************************
2740 ** General handler functions
2741 ****************************************************************************/
clone(const char * name,MEM_ROOT * mem_root)2742 handler *handler::clone(const char *name, MEM_ROOT *mem_root)
2743 {
2744   DBUG_ENTER("handler::clone");
2745   handler *new_handler= get_new_handler(table->s, mem_root, ht);
2746 
2747   if (!new_handler)
2748     DBUG_RETURN(NULL);
2749   if (new_handler->set_ha_share_ref(ha_share))
2750     goto err;
2751 
2752   /*
2753     Allocate handler->ref here because otherwise ha_open will allocate it
2754     on this->table->mem_root and we will not be able to reclaim that memory
2755     when the clone handler object is destroyed.
2756   */
2757   if (!(new_handler->ref= (uchar*) alloc_root(mem_root,
2758                                               ALIGN_SIZE(ref_length)*2)))
2759     goto err;
2760   /*
2761     TODO: Implement a more efficient way to have more than one index open for
2762     the same table instance. The ha_open call is not cachable for clone.
2763   */
2764   if (new_handler->ha_open(table, name, table->db_stat,
2765                            HA_OPEN_IGNORE_IF_LOCKED))
2766     goto err;
2767 
2768   DBUG_RETURN(new_handler);
2769 
2770 err:
2771   delete new_handler;
2772   DBUG_RETURN(NULL);
2773 }
2774 
2775 
ha_statistic_increment(ulonglong SSV::* offset) const2776 void handler::ha_statistic_increment(ulonglong SSV::*offset) const
2777 {
2778   if (table && table->in_use) (table->in_use->status_var.*offset)++;
2779 }
2780 
2781 
ha_thd(void) const2782 THD *handler::ha_thd(void) const
2783 {
2784   assert(!table || !table->in_use || table->in_use == current_thd);
2785   return (table && table->in_use) ? table->in_use : current_thd;
2786 }
2787 
unbind_psi()2788 void handler::unbind_psi()
2789 {
2790 #ifdef HAVE_PSI_TABLE_INTERFACE
2791   assert(m_lock_type == F_UNLCK);
2792   assert(inited == NONE);
2793   /*
2794     Notify the instrumentation that this table is not owned
2795     by this thread any more.
2796   */
2797   PSI_TABLE_CALL(unbind_table)(m_psi);
2798 #endif
2799 }
2800 
rebind_psi()2801 void handler::rebind_psi()
2802 {
2803 #ifdef HAVE_PSI_TABLE_INTERFACE
2804   assert(m_lock_type == F_UNLCK);
2805   assert(inited == NONE);
2806   /*
2807     Notify the instrumentation that this table is now owned
2808     by this thread.
2809   */
2810   PSI_table_share *share_psi= ha_table_share_psi(table_share);
2811   m_psi= PSI_TABLE_CALL(rebind_table)(share_psi, this, m_psi);
2812 #endif
2813 }
2814 
start_psi_batch_mode()2815 void handler::start_psi_batch_mode()
2816 {
2817 #ifdef HAVE_PSI_TABLE_INTERFACE
2818   assert(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
2819   assert(m_psi_locker == NULL);
2820   m_psi_batch_mode= PSI_BATCH_MODE_STARTING;
2821   m_psi_numrows= 0;
2822 #endif
2823 }
2824 
end_psi_batch_mode()2825 void handler::end_psi_batch_mode()
2826 {
2827 #ifdef HAVE_PSI_TABLE_INTERFACE
2828   assert(m_psi_batch_mode != PSI_BATCH_MODE_NONE);
2829   if (m_psi_locker != NULL)
2830   {
2831     assert(m_psi_batch_mode == PSI_BATCH_MODE_STARTED);
2832     PSI_TABLE_CALL(end_table_io_wait)(m_psi_locker, m_psi_numrows);
2833     m_psi_locker= NULL;
2834   }
2835   m_psi_batch_mode= PSI_BATCH_MODE_NONE;
2836 #endif
2837 }
2838 
ha_table_share_psi(const TABLE_SHARE * share) const2839 PSI_table_share *handler::ha_table_share_psi(const TABLE_SHARE *share) const
2840 {
2841   return share->m_psi;
2842 }
2843 
2844 /** @brief
2845   Open database-handler.
2846 
2847   IMPLEMENTATION
2848     Try O_RDONLY if cannot open as O_RDWR
2849     Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
2850 */
ha_open(TABLE * table_arg,const char * name,int mode,int test_if_locked)2851 int handler::ha_open(TABLE *table_arg, const char *name, int mode,
2852                      int test_if_locked)
2853 {
2854   int error;
2855   DBUG_ENTER("handler::ha_open");
2856   DBUG_PRINT("enter",
2857              ("name: %s  db_type: %d  db_stat: %d  mode: %d  lock_test: %d",
2858               name, ht->db_type, table_arg->db_stat, mode,
2859               test_if_locked));
2860 
2861   table= table_arg;
2862   assert(table->s == table_share);
2863   assert(m_lock_type == F_UNLCK);
2864   DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK));
2865   assert(alloc_root_inited(&table->mem_root));
2866 
2867   if ((error=open(name,mode,test_if_locked)))
2868   {
2869     if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
2870 	(table->db_stat & HA_TRY_READ_ONLY))
2871     {
2872       table->db_stat|=HA_READ_ONLY;
2873       error=open(name,O_RDONLY,test_if_locked);
2874     }
2875   }
2876   if (error)
2877   {
2878     set_my_errno(error);                            /* Safeguard */
2879     DBUG_PRINT("error",("error: %d  errno: %d",error,errno));
2880   }
2881   else
2882   {
2883     assert(m_psi == NULL);
2884     assert(table_share != NULL);
2885 #ifdef HAVE_PSI_TABLE_INTERFACE
2886     /*
2887       Do not call this for partitions handlers, since it may take too much
2888       resources.
2889       So only use the m_psi on table level, not for individual partitions.
2890     */
2891     if (!(test_if_locked & HA_OPEN_NO_PSI_CALL))
2892     {
2893       PSI_table_share *share_psi= ha_table_share_psi(table_share);
2894       m_psi= PSI_TABLE_CALL(open_table)(share_psi, this);
2895     }
2896 #endif
2897 
2898     if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
2899       table->db_stat|=HA_READ_ONLY;
2900     (void) extra(HA_EXTRA_NO_READCHECK);	// Not needed in SQL
2901 
2902     /* ref is already allocated for us if we're called from handler::clone() */
2903     if (!ref && !(ref= (uchar*) alloc_root(&table->mem_root,
2904                                           ALIGN_SIZE(ref_length)*2)))
2905     {
2906       ha_close();
2907       error=HA_ERR_OUT_OF_MEM;
2908     }
2909     else
2910       dup_ref=ref+ALIGN_SIZE(ref_length);
2911     cached_table_flags= table_flags();
2912   }
2913   DBUG_RETURN(error);
2914 }
2915 
2916 
2917 /**
2918   Close handler.
2919 */
2920 
ha_close(void)2921 int handler::ha_close(void)
2922 {
2923   DBUG_ENTER("handler::ha_close");
2924 #ifdef HAVE_PSI_TABLE_INTERFACE
2925   PSI_TABLE_CALL(close_table)(table_share, m_psi);
2926   m_psi= NULL; /* instrumentation handle, invalid after close_table() */
2927   assert(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
2928   assert(m_psi_locker == NULL);
2929 #endif
2930   // TODO: set table= NULL to mark the handler as closed?
2931   assert(m_psi == NULL);
2932   assert(m_lock_type == F_UNLCK);
2933   assert(inited == NONE);
2934   DBUG_RETURN(close());
2935 }
2936 
2937 
2938 /**
2939   Initialize use of index.
2940 
2941   @param idx     Index to use
2942   @param sorted  Use sorted order
2943 
2944   @return Operation status
2945     @retval 0     Success
2946     @retval != 0  Error (error code returned)
2947 */
2948 
ha_index_init(uint idx,bool sorted)2949 int handler::ha_index_init(uint idx, bool sorted)
2950 {
2951   DBUG_EXECUTE_IF("ha_index_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2952   int result;
2953   DBUG_ENTER("ha_index_init");
2954   assert(table_share->tmp_table != NO_TMP_TABLE ||
2955          m_lock_type != F_UNLCK);
2956   assert(inited == NONE);
2957   if (!(result= index_init(idx, sorted)))
2958     inited= INDEX;
2959   end_range= NULL;
2960   DBUG_RETURN(result);
2961 }
2962 
2963 
2964 /**
2965   End use of index.
2966 
2967   @return Operation status
2968     @retval 0     Success
2969     @retval != 0  Error (error code returned)
2970 */
2971 
ha_index_end()2972 int handler::ha_index_end()
2973 {
2974   DBUG_ENTER("ha_index_end");
2975   /* SQL HANDLER function can call this without having it locked. */
2976   assert(table->open_by_handler ||
2977          table_share->tmp_table != NO_TMP_TABLE ||
2978          m_lock_type != F_UNLCK);
2979   assert(inited == INDEX);
2980   inited= NONE;
2981   end_range= NULL;
2982   DBUG_RETURN(index_end());
2983 }
2984 
2985 
2986 /**
2987   Initialize table for random read or scan.
2988 
2989   @param scan  if true: Initialize for random scans through rnd_next()
2990                if false: Initialize for random reads through rnd_pos()
2991 
2992   @return Operation status
2993     @retval 0     Success
2994     @retval != 0  Error (error code returned)
2995 */
2996 
ha_rnd_init(bool scan)2997 int handler::ha_rnd_init(bool scan)
2998 {
2999   DBUG_EXECUTE_IF("ha_rnd_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
3000   int result;
3001   DBUG_ENTER("ha_rnd_init");
3002   assert(table_share->tmp_table != NO_TMP_TABLE ||
3003          m_lock_type != F_UNLCK);
3004   assert(inited == NONE || (inited == RND && scan));
3005   inited= (result= rnd_init(scan)) ? NONE : RND;
3006   end_range= NULL;
3007   DBUG_RETURN(result);
3008 }
3009 
3010 
3011 /**
3012   End use of random access.
3013 
3014   @return Operation status
3015     @retval 0     Success
3016     @retval != 0  Error (error code returned)
3017 */
3018 
ha_rnd_end()3019 int handler::ha_rnd_end()
3020 {
3021   DBUG_ENTER("ha_rnd_end");
3022   /* SQL HANDLER function can call this without having it locked. */
3023   assert(table->open_by_handler ||
3024          table_share->tmp_table != NO_TMP_TABLE ||
3025          m_lock_type != F_UNLCK);
3026   assert(inited == RND);
3027   inited= NONE;
3028   end_range= NULL;
3029   DBUG_RETURN(rnd_end());
3030 }
3031 
3032 
3033 /**
3034   Read next row via random scan.
3035 
3036   @param buf  Buffer to read the row into
3037 
3038   @return Operation status
3039     @retval 0     Success
3040     @retval != 0  Error (error code returned)
3041 */
3042 
ha_rnd_next(uchar * buf)3043 int handler::ha_rnd_next(uchar *buf)
3044 {
3045   int result;
3046   DBUG_EXECUTE_IF("ha_rnd_next_deadlock", return HA_ERR_LOCK_DEADLOCK;);
3047   DBUG_ENTER("handler::ha_rnd_next");
3048   assert(table_share->tmp_table != NO_TMP_TABLE ||
3049          m_lock_type != F_UNLCK);
3050   assert(inited == RND);
3051 
3052   // Set status for the need to update generated fields
3053   m_update_generated_read_fields= table->has_gcol();
3054 
3055   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result,
3056     { result= rnd_next(buf); })
3057   if (!result && m_update_generated_read_fields)
3058   {
3059     result= update_generated_read_fields(buf, table);
3060     m_update_generated_read_fields= false;
3061   }
3062   DBUG_RETURN(result);
3063 }
3064 
3065 
3066 /**
3067   Read row via random scan from position.
3068 
3069   @param[out] buf  Buffer to read the row into
3070   @param      pos  Position from position() call
3071 
3072   @return Operation status
3073     @retval 0     Success
3074     @retval != 0  Error (error code returned)
3075 */
3076 
ha_rnd_pos(uchar * buf,uchar * pos)3077 int handler::ha_rnd_pos(uchar *buf, uchar *pos)
3078 {
3079   int result;
3080   DBUG_ENTER("handler::ha_rnd_pos");
3081   assert(table_share->tmp_table != NO_TMP_TABLE ||
3082          m_lock_type != F_UNLCK);
3083   /* TODO: Find out how to solve ha_rnd_pos when finding duplicate update. */
3084   /* assert(inited == RND); */
3085 
3086   // Set status for the need to update generated fields
3087   m_update_generated_read_fields= table->has_gcol();
3088 
3089   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result,
3090     { result= rnd_pos(buf, pos); })
3091   if (!result && m_update_generated_read_fields)
3092   {
3093     result= update_generated_read_fields(buf, table);
3094     m_update_generated_read_fields= false;
3095   }
3096   DBUG_RETURN(result);
3097 }
3098 
3099 
3100 /**
3101   Read [part of] row via [part of] index.
3102   @param[out] buf          buffer where store the data
3103   @param      key          Key to search for
3104   @param      keypart_map  Which part of key to use
3105   @param      find_flag    Direction/condition on key usage
3106 
3107   @returns Operation status
3108     @retval  0                   Success (found a record, and function has
3109                                  set table->status to 0)
3110     @retval  HA_ERR_END_OF_FILE  Row not found (function has set table->status
3111                                  to STATUS_NOT_FOUND). End of index passed.
3112     @retval  HA_ERR_KEY_NOT_FOUND Row not found (function has set table->status
3113                                  to STATUS_NOT_FOUND). Index cursor positioned.
3114     @retval  != 0                Error
3115 
3116   @note Positions an index cursor to the index specified in the handle.
3117   Fetches the row if available. If the key value is null,
3118   begin at the first key of the index.
3119   ha_index_read_map can be restarted without calling index_end on the previous
3120   index scan and without calling ha_index_init. In this case the
3121   ha_index_read_map is on the same index as the previous ha_index_scan.
3122   This is particularly used in conjunction with multi read ranges.
3123 */
3124 
ha_index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3125 int handler::ha_index_read_map(uchar *buf, const uchar *key,
3126                                key_part_map keypart_map,
3127                                enum ha_rkey_function find_flag)
3128 {
3129   int result;
3130   DBUG_ENTER("handler::ha_index_read_map");
3131   assert(table_share->tmp_table != NO_TMP_TABLE ||
3132          m_lock_type != F_UNLCK);
3133   assert(inited == INDEX);
3134   assert(!pushed_idx_cond || buf == table->record[0]);
3135 
3136   // Set status for the need to update generated fields
3137   m_update_generated_read_fields= table->has_gcol();
3138 
3139   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3140     { result= index_read_map(buf, key, keypart_map, find_flag); })
3141   if (!result && m_update_generated_read_fields)
3142   {
3143     result= update_generated_read_fields(buf, table, active_index);
3144     m_update_generated_read_fields= false;
3145   }
3146   DBUG_RETURN(result);
3147 }
3148 
ha_index_read_last_map(uchar * buf,const uchar * key,key_part_map keypart_map)3149 int handler::ha_index_read_last_map(uchar *buf, const uchar *key,
3150                                     key_part_map keypart_map)
3151 {
3152   int result;
3153   DBUG_ENTER("handler::ha_index_read_last_map");
3154   assert(table_share->tmp_table != NO_TMP_TABLE ||
3155          m_lock_type != F_UNLCK);
3156   assert(inited == INDEX);
3157   assert(!pushed_idx_cond || buf == table->record[0]);
3158 
3159   // Set status for the need to update generated fields
3160   m_update_generated_read_fields= table->has_gcol();
3161 
3162   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3163     { result= index_read_last_map(buf, key, keypart_map); })
3164   if (!result && m_update_generated_read_fields)
3165   {
3166     result= update_generated_read_fields(buf, table, active_index);
3167     m_update_generated_read_fields= false;
3168   }
3169   DBUG_RETURN(result);
3170 }
3171 
3172 /**
3173   Initializes an index and read it.
3174 
3175   @see handler::ha_index_read_map.
3176 */
3177 
ha_index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3178 int handler::ha_index_read_idx_map(uchar *buf, uint index, const uchar *key,
3179                                    key_part_map keypart_map,
3180                                    enum ha_rkey_function find_flag)
3181 {
3182   int result;
3183   assert(table_share->tmp_table != NO_TMP_TABLE ||
3184          m_lock_type != F_UNLCK);
3185   assert(end_range == NULL);
3186   assert(!pushed_idx_cond || buf == table->record[0]);
3187 
3188   // Set status for the need to update generated fields
3189   m_update_generated_read_fields= table->has_gcol();
3190 
3191   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, index, result,
3192     { result= index_read_idx_map(buf, index, key, keypart_map, find_flag); })
3193   if (!result && m_update_generated_read_fields)
3194   {
3195     result= update_generated_read_fields(buf, table, index);
3196     m_update_generated_read_fields= false;
3197   }
3198   return result;
3199 }
3200 
3201 
3202 /**
3203   Reads the next row via index.
3204 
3205   @param[out] buf  Row data
3206 
3207   @return Operation status.
3208     @retval  0                   Success
3209     @retval  HA_ERR_END_OF_FILE  Row not found
3210     @retval  != 0                Error
3211 */
3212 
ha_index_next(uchar * buf)3213 int handler::ha_index_next(uchar * buf)
3214 {
3215   int result;
3216   DBUG_ENTER("handler::ha_index_next");
3217   assert(table_share->tmp_table != NO_TMP_TABLE ||
3218          m_lock_type != F_UNLCK);
3219   assert(inited == INDEX);
3220   assert(!pushed_idx_cond || buf == table->record[0]);
3221 
3222   // Set status for the need to update generated fields
3223   m_update_generated_read_fields= table->has_gcol();
3224 
3225   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3226     { result= index_next(buf); })
3227   if (!result && m_update_generated_read_fields)
3228   {
3229     result= update_generated_read_fields(buf, table, active_index);
3230     m_update_generated_read_fields= false;
3231   }
3232   DBUG_RETURN(result);
3233 }
3234 
3235 
3236 /**
3237   Reads the previous row via index.
3238 
3239   @param[out] buf  Row data
3240 
3241   @return Operation status.
3242     @retval  0                   Success
3243     @retval  HA_ERR_END_OF_FILE  Row not found
3244     @retval  != 0                Error
3245 */
3246 
ha_index_prev(uchar * buf)3247 int handler::ha_index_prev(uchar * buf)
3248 {
3249   int result;
3250   DBUG_ENTER("handler::ha_index_prev");
3251   assert(table_share->tmp_table != NO_TMP_TABLE ||
3252          m_lock_type != F_UNLCK);
3253   assert(inited == INDEX);
3254   assert(!pushed_idx_cond || buf == table->record[0]);
3255 
3256   // Set status for the need to update generated fields
3257   m_update_generated_read_fields= table->has_gcol();
3258 
3259   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3260     { result= index_prev(buf); })
3261   if (!result && m_update_generated_read_fields)
3262   {
3263     result= update_generated_read_fields(buf, table, active_index);
3264     m_update_generated_read_fields= false;
3265   }
3266   DBUG_RETURN(result);
3267 }
3268 
3269 
3270 /**
3271   Reads the first row via index.
3272 
3273   @param[out] buf  Row data
3274 
3275   @return Operation status.
3276     @retval  0                   Success
3277     @retval  HA_ERR_END_OF_FILE  Row not found
3278     @retval  != 0                Error
3279 */
3280 
ha_index_first(uchar * buf)3281 int handler::ha_index_first(uchar * buf)
3282 {
3283   int result;
3284   DBUG_ENTER("handler::ha_index_first");
3285   assert(table_share->tmp_table != NO_TMP_TABLE ||
3286          m_lock_type != F_UNLCK);
3287   assert(inited == INDEX);
3288   assert(!pushed_idx_cond || buf == table->record[0]);
3289 
3290   // Set status for the need to update generated fields
3291   m_update_generated_read_fields= table->has_gcol();
3292 
3293   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3294     { result= index_first(buf); })
3295   if (!result && m_update_generated_read_fields)
3296   {
3297     result= update_generated_read_fields(buf, table, active_index);
3298     m_update_generated_read_fields= false;
3299   }
3300   DBUG_RETURN(result);
3301 }
3302 
3303 
3304 /**
3305   Reads the last row via index.
3306 
3307   @param[out] buf  Row data
3308 
3309   @return Operation status.
3310     @retval  0                   Success
3311     @retval  HA_ERR_END_OF_FILE  Row not found
3312     @retval  != 0                Error
3313 */
3314 
ha_index_last(uchar * buf)3315 int handler::ha_index_last(uchar * buf)
3316 {
3317   int result;
3318   DBUG_ENTER("handler::ha_index_last");
3319   assert(table_share->tmp_table != NO_TMP_TABLE ||
3320          m_lock_type != F_UNLCK);
3321   assert(inited == INDEX);
3322   assert(!pushed_idx_cond || buf == table->record[0]);
3323 
3324   // Set status for the need to update generated fields
3325   m_update_generated_read_fields= table->has_gcol();
3326 
3327   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3328     { result= index_last(buf); })
3329   if (!result && m_update_generated_read_fields)
3330   {
3331     result= update_generated_read_fields(buf, table, active_index);
3332     m_update_generated_read_fields= false;
3333   }
3334   DBUG_RETURN(result);
3335 }
3336 
3337 
3338 /**
3339   Reads the next same row via index.
3340 
3341   @param[out] buf     Row data
3342   @param      key     Key to search for
3343   @param      keylen  Length of key
3344 
3345   @return Operation status.
3346     @retval  0                   Success
3347     @retval  HA_ERR_END_OF_FILE  Row not found
3348     @retval  != 0                Error
3349 */
3350 
ha_index_next_same(uchar * buf,const uchar * key,uint keylen)3351 int handler::ha_index_next_same(uchar *buf, const uchar *key, uint keylen)
3352 {
3353   int result;
3354   DBUG_ENTER("handler::ha_index_next_same");
3355   assert(table_share->tmp_table != NO_TMP_TABLE ||
3356          m_lock_type != F_UNLCK);
3357   assert(inited == INDEX);
3358   assert(!pushed_idx_cond || buf == table->record[0]);
3359 
3360   // Set status for the need to update generated fields
3361   m_update_generated_read_fields= table->has_gcol();
3362 
3363   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3364     { result= index_next_same(buf, key, keylen); })
3365   if (!result && m_update_generated_read_fields)
3366   {
3367     result= update_generated_read_fields(buf, table, active_index);
3368     m_update_generated_read_fields= false;
3369   }
3370   DBUG_RETURN(result);
3371 }
3372 
3373 
3374 /**
3375   Read first row (only) from a table.
3376 
3377   This is never called for InnoDB tables, as these table types
3378   has the HA_STATS_RECORDS_IS_EXACT set.
3379 */
read_first_row(uchar * buf,uint primary_key)3380 int handler::read_first_row(uchar * buf, uint primary_key)
3381 {
3382   int error;
3383   DBUG_ENTER("handler::read_first_row");
3384 
3385   ha_statistic_increment(&SSV::ha_read_first_count);
3386 
3387   /*
3388     If there is very few deleted rows in the table, find the first row by
3389     scanning the table.
3390     TODO remove the test for HA_READ_ORDER
3391   */
3392   if (stats.deleted < 10 || primary_key >= MAX_KEY ||
3393       !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
3394   {
3395     if (!(error= ha_rnd_init(1)))
3396     {
3397       while ((error= ha_rnd_next(buf)) == HA_ERR_RECORD_DELETED)
3398         /* skip deleted row */;
3399       const int end_error= ha_rnd_end();
3400       if (!error)
3401         error= end_error;
3402     }
3403   }
3404   else
3405   {
3406     /* Find the first row through the primary key */
3407     if (!(error= ha_index_init(primary_key, 0)))
3408     {
3409       error= ha_index_first(buf);
3410       const int end_error= ha_index_end();
3411       if (!error)
3412         error= end_error;
3413     }
3414   }
3415   DBUG_RETURN(error);
3416 }
3417 
3418 /**
3419   Generate the next auto-increment number based on increment and offset.
3420   computes the lowest number
3421   - strictly greater than "nr"
3422   - of the form: auto_increment_offset + N * auto_increment_increment
3423   If overflow happened then return MAX_ULONGLONG value as an
3424   indication of overflow.
3425   In most cases increment= offset= 1, in which case we get:
3426   @verbatim 1,2,3,4,5,... @endverbatim
3427     If increment=10 and offset=5 and previous number is 1, we get:
3428   @verbatim 1,5,15,25,35,... @endverbatim
3429 */
3430 inline ulonglong
compute_next_insert_id(ulonglong nr,struct system_variables * variables)3431 compute_next_insert_id(ulonglong nr,struct system_variables *variables)
3432 {
3433   const ulonglong save_nr= nr;
3434 
3435   if (variables->auto_increment_increment == 1)
3436     nr= nr + 1; // optimization of the formula below
3437   else
3438   {
3439     nr= (((nr+ variables->auto_increment_increment -
3440            variables->auto_increment_offset)) /
3441          (ulonglong) variables->auto_increment_increment);
3442     nr= (nr* (ulonglong) variables->auto_increment_increment +
3443          variables->auto_increment_offset);
3444   }
3445 
3446   if (unlikely(nr <= save_nr))
3447     return ULLONG_MAX;
3448 
3449   return nr;
3450 }
3451 
3452 
adjust_next_insert_id_after_explicit_value(ulonglong nr)3453 void handler::adjust_next_insert_id_after_explicit_value(ulonglong nr)
3454 {
3455   /*
3456     If we have set THD::next_insert_id previously and plan to insert an
3457     explicitely-specified value larger than this, we need to increase
3458     THD::next_insert_id to be greater than the explicit value.
3459   */
3460   if ((next_insert_id > 0) && (nr >= next_insert_id))
3461     set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
3462 }
3463 
3464 
3465 /** @brief
3466   Computes the largest number X:
3467   - smaller than or equal to "nr"
3468   - of the form: auto_increment_offset + N * auto_increment_increment
3469   where N>=0.
3470 
3471   SYNOPSIS
3472     prev_insert_id
3473       nr            Number to "round down"
3474       variables     variables struct containing auto_increment_increment and
3475                     auto_increment_offset
3476 
3477   RETURN
3478     The number X if it exists, "nr" otherwise.
3479 */
3480 inline ulonglong
prev_insert_id(ulonglong nr,struct system_variables * variables)3481 prev_insert_id(ulonglong nr, struct system_variables *variables)
3482 {
3483   if (unlikely(nr < variables->auto_increment_offset))
3484   {
3485     /*
3486       There's nothing good we can do here. That is a pathological case, where
3487       the offset is larger than the column's max possible value, i.e. not even
3488       the first sequence value may be inserted. User will receive warning.
3489     */
3490     DBUG_PRINT("info",("auto_increment: nr: %lu cannot honour "
3491                        "auto_increment_offset: %lu",
3492                        (ulong) nr, variables->auto_increment_offset));
3493     return nr;
3494   }
3495   if (variables->auto_increment_increment == 1)
3496     return nr; // optimization of the formula below
3497   nr= (((nr - variables->auto_increment_offset)) /
3498        (ulonglong) variables->auto_increment_increment);
3499   return (nr * (ulonglong) variables->auto_increment_increment +
3500           variables->auto_increment_offset);
3501 }
3502 
3503 
3504 /**
3505   Update the auto_increment field if necessary.
3506 
3507   Updates columns with type NEXT_NUMBER if:
3508 
3509   - If column value is set to NULL (in which case
3510     auto_increment_field_not_null is 0)
3511   - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
3512     set. In the future we will only set NEXT_NUMBER fields if one sets them
3513     to NULL (or they are not included in the insert list).
3514 
3515     In those cases, we check if the currently reserved interval still has
3516     values we have not used. If yes, we pick the smallest one and use it.
3517     Otherwise:
3518 
3519   - If a list of intervals has been provided to the statement via SET
3520     INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
3521     first unused interval from this list, consider it as reserved.
3522 
3523   - Otherwise we set the column for the first row to the value
3524     next_insert_id(get_auto_increment(column))) which is usually
3525     max-used-column-value+1.
3526     We call get_auto_increment() for the first row in a multi-row
3527     statement. get_auto_increment() will tell us the interval of values it
3528     reserved for us.
3529 
3530   - In both cases, for the following rows we use those reserved values without
3531     calling the handler again (we just progress in the interval, computing
3532     each new value from the previous one). Until we have exhausted them, then
3533     we either take the next provided interval or call get_auto_increment()
3534     again to reserve a new interval.
3535 
3536   - In both cases, the reserved intervals are remembered in
3537     thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
3538     binlogging; the last reserved interval is remembered in
3539     auto_inc_interval_for_cur_row. The number of reserved intervals is
3540     remembered in auto_inc_intervals_count. It differs from the number of
3541     elements in thd->auto_inc_intervals_in_cur_stmt_for_binlog() because the
3542     latter list is cumulative over all statements forming one binlog event
3543     (when stored functions and triggers are used), and collapses two
3544     contiguous intervals in one (see its append() method).
3545 
3546     The idea is that generated auto_increment values are predictable and
3547     independent of the column values in the table.  This is needed to be
3548     able to replicate into a table that already has rows with a higher
3549     auto-increment value than the one that is inserted.
3550 
3551     After we have already generated an auto-increment number and the user
3552     inserts a column with a higher value than the last used one, we will
3553     start counting from the inserted value.
3554 
3555     This function's "outputs" are: the table's auto_increment field is filled
3556     with a value, thd->next_insert_id is filled with the value to use for the
3557     next row, if a value was autogenerated for the current row it is stored in
3558     thd->insert_id_for_cur_row, if get_auto_increment() was called
3559     thd->auto_inc_interval_for_cur_row is modified, if that interval is not
3560     present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
3561     this list.
3562 
3563   @todo
3564     Replace all references to "next number" or NEXT_NUMBER to
3565     "auto_increment", everywhere (see below: there is
3566     table->auto_increment_field_not_null, and there also exists
3567     table->next_number_field, it's not consistent).
3568 
3569   @retval
3570     0	ok
3571   @retval
3572     HA_ERR_AUTOINC_READ_FAILED  get_auto_increment() was called and
3573     returned ~(ulonglong) 0
3574   @retval
3575     HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
3576     failure.
3577 */
3578 
3579 #define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
3580 #define AUTO_INC_DEFAULT_NB_MAX_BITS 16
3581 #define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
3582 
update_auto_increment()3583 int handler::update_auto_increment()
3584 {
3585   ulonglong nr, nb_reserved_values;
3586   bool append= FALSE;
3587   THD *thd= table->in_use;
3588   struct system_variables *variables= &thd->variables;
3589   assert(table_share->tmp_table != NO_TMP_TABLE ||
3590          m_lock_type != F_UNLCK);
3591   DBUG_ENTER("handler::update_auto_increment");
3592 
3593   /*
3594     next_insert_id is a "cursor" into the reserved interval, it may go greater
3595     than the interval, but not smaller.
3596   */
3597   assert(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
3598 
3599   if ((nr= table->next_number_field->val_int()) != 0 ||
3600       (table->auto_increment_field_not_null &&
3601       thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
3602   {
3603     /*
3604       Update next_insert_id if we had already generated a value in this
3605       statement (case of INSERT VALUES(null),(3763),(null):
3606       the last NULL needs to insert 3764, not the value of the first NULL plus
3607       1).
3608       Also we should take into account the the sign of the value.
3609       Since auto_increment value can't have negative value we should update
3610       next_insert_id only in case when we INSERTing explicit positive value.
3611       It means that for a table that has SIGNED INTEGER column when we execute
3612       the following statement
3613       INSERT INTO t1 VALUES( NULL), (-1), (NULL)
3614       we shouldn't call adjust_next_insert_id_after_explicit_value()
3615       and the result row will be (1, -1, 2) (for new opened connection
3616       to the server). On the other hand, for the statement
3617       INSERT INTO t1 VALUES( NULL), (333), (NULL)
3618       we should call adjust_next_insert_id_after_explicit_value()
3619       and result row will be (1, 333, 334).
3620     */
3621     if (((Field_num*)table->next_number_field)->unsigned_flag ||
3622         ((longlong)nr) > 0)
3623       adjust_next_insert_id_after_explicit_value(nr);
3624 
3625     insert_id_for_cur_row= 0; // didn't generate anything
3626     DBUG_RETURN(0);
3627   }
3628 
3629   if (next_insert_id > table->next_number_field->get_max_int_value())
3630     DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED);
3631 
3632   if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
3633   {
3634     /* next_insert_id is beyond what is reserved, so we reserve more. */
3635     const Discrete_interval *forced=
3636       thd->auto_inc_intervals_forced.get_next();
3637     if (forced != NULL)
3638     {
3639       nr= forced->minimum();
3640       /*
3641         In a multi insert statement when the number of affected rows is known
3642         then reserve those many number of auto increment values. So that
3643         interval will be starting value to starting value + number of affected
3644         rows * increment of auto increment.
3645        */
3646       nb_reserved_values= (estimation_rows_to_insert > 0) ?
3647         estimation_rows_to_insert : forced->values();
3648     }
3649     else
3650     {
3651       /*
3652         handler::estimation_rows_to_insert was set by
3653         handler::ha_start_bulk_insert(); if 0 it means "unknown".
3654       */
3655       ulonglong nb_desired_values;
3656       /*
3657         If an estimation was given to the engine:
3658         - use it.
3659         - if we already reserved numbers, it means the estimation was
3660         not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
3661         time, twice that the 3rd time etc.
3662         If no estimation was given, use those increasing defaults from the
3663         start, starting from AUTO_INC_DEFAULT_NB_ROWS.
3664         Don't go beyond a max to not reserve "way too much" (because
3665         reservation means potentially losing unused values).
3666         Note that in prelocked mode no estimation is given.
3667       */
3668 
3669       if ((auto_inc_intervals_count == 0) && (estimation_rows_to_insert > 0))
3670         nb_desired_values= estimation_rows_to_insert;
3671       else if ((auto_inc_intervals_count == 0) &&
3672                (thd->lex->bulk_insert_row_cnt > 0))
3673       {
3674         /*
3675           For multi-row inserts, if the bulk inserts cannot be started, the
3676           handler::estimation_rows_to_insert will not be set. But we still
3677           want to reserve the autoinc values.
3678         */
3679         nb_desired_values= thd->lex->bulk_insert_row_cnt;
3680       }
3681       else /* go with the increasing defaults */
3682       {
3683         /* avoid overflow in formula, with this if() */
3684         if (auto_inc_intervals_count <= AUTO_INC_DEFAULT_NB_MAX_BITS)
3685         {
3686           nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS *
3687             (1 << auto_inc_intervals_count);
3688           set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
3689         }
3690         else
3691           nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
3692       }
3693       /* This call ignores all its parameters but nr, currently */
3694       get_auto_increment(variables->auto_increment_offset,
3695                          variables->auto_increment_increment,
3696                          nb_desired_values, &nr,
3697                          &nb_reserved_values);
3698       if (nr == ULLONG_MAX)
3699         DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED);  // Mark failure
3700 
3701       /*
3702         That rounding below should not be needed when all engines actually
3703         respect offset and increment in get_auto_increment(). But they don't
3704         so we still do it. Wonder if for the not-first-in-index we should do
3705         it. Hope that this rounding didn't push us out of the interval; even
3706         if it did we cannot do anything about it (calling the engine again
3707         will not help as we inserted no row).
3708       */
3709       nr= compute_next_insert_id(nr-1, variables);
3710     }
3711 
3712     if (table->s->next_number_keypart == 0)
3713     {
3714       /* We must defer the appending until "nr" has been possibly truncated */
3715       append= TRUE;
3716     }
3717     else
3718     {
3719       /*
3720         For such auto_increment there is no notion of interval, just a
3721         singleton. The interval is not even stored in
3722         thd->auto_inc_interval_for_cur_row, so we are sure to call the engine
3723         for next row.
3724       */
3725       DBUG_PRINT("info",("auto_increment: special not-first-in-index"));
3726     }
3727   }
3728 
3729   if (unlikely(nr == ULLONG_MAX))
3730       DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3731 
3732   DBUG_PRINT("info",("auto_increment: %lu", (ulong) nr));
3733 
3734   if (unlikely(table->next_number_field->store((longlong) nr, TRUE)))
3735   {
3736     /*
3737       first test if the query was aborted due to strict mode constraints
3738     */
3739     if (thd->killed == THD::KILL_BAD_DATA)
3740       DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3741 
3742     /*
3743       field refused this value (overflow) and truncated it, use the result of
3744       the truncation (which is going to be inserted); however we try to
3745       decrease it to honour auto_increment_* variables.
3746       That will shift the left bound of the reserved interval, we don't
3747       bother shifting the right bound (anyway any other value from this
3748       interval will cause a duplicate key).
3749     */
3750     nr= prev_insert_id(table->next_number_field->val_int(), variables);
3751     if (unlikely(table->next_number_field->store((longlong) nr, TRUE)))
3752       nr= table->next_number_field->val_int();
3753   }
3754   if (append)
3755   {
3756     auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
3757                                           variables->auto_increment_increment);
3758     auto_inc_intervals_count++;
3759     /* Row-based replication does not need to store intervals in binlog */
3760 #ifdef WITH_WSREP
3761     if (((WSREP_EMULATE_BINLOG(thd)) || mysql_bin_log.is_open()) &&
3762 	!thd->is_current_stmt_binlog_format_row())
3763 #else
3764     if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row())
3765 #endif /* WITH_WSREP */
3766         thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(),
3767                                                               auto_inc_interval_for_cur_row.values(),
3768                                                               variables->auto_increment_increment);
3769   }
3770 
3771   /*
3772     Record this autogenerated value. If the caller then
3773     succeeds to insert this value, it will call
3774     record_first_successful_insert_id_in_cur_stmt()
3775     which will set first_successful_insert_id_in_cur_stmt if it's not
3776     already set.
3777   */
3778   insert_id_for_cur_row= nr;
3779   /*
3780     Set next insert id to point to next auto-increment value to be able to
3781     handle multi-row statements.
3782   */
3783   set_next_insert_id(compute_next_insert_id(nr, variables));
3784 
3785   DBUG_RETURN(0);
3786 }
3787 
3788 
3789 /** @brief
3790   MySQL signal that it changed the column bitmap
3791 
3792   USAGE
3793     This is for handlers that needs to setup their own column bitmaps.
3794     Normally the handler should set up their own column bitmaps in
3795     index_init() or rnd_init() and in any column_bitmaps_signal() call after
3796     this.
3797 
3798     The handler is allowd to do changes to the bitmap after a index_init or
3799     rnd_init() call is made as after this, MySQL will not use the bitmap
3800     for any program logic checking.
3801 */
column_bitmaps_signal()3802 void handler::column_bitmaps_signal()
3803 {
3804   DBUG_ENTER("column_bitmaps_signal");
3805   DBUG_PRINT("info", ("read_set: 0x%lx  write_set: 0x%lx", (long) table->read_set,
3806                       (long)table->write_set));
3807   DBUG_VOID_RETURN;
3808 }
3809 
3810 
3811 /**
3812   Reserves an interval of auto_increment values from the handler.
3813 
3814   @param       offset              offset (modulus increment)
3815   @param       increment           increment between calls
3816   @param       nb_desired_values   how many values we want
3817   @param[out]  first_value         the first value reserved by the handler
3818   @param[out]  nb_reserved_values  how many values the handler reserved
3819 
3820   offset and increment means that we want values to be of the form
3821   offset + N * increment, where N>=0 is integer.
3822   If the function sets *first_value to ULLONG_MAX it means an error.
3823   If the function sets *nb_reserved_values to ULLONG_MAX it means it has
3824   reserved to "positive infinite".
3825 */
3826 
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)3827 void handler::get_auto_increment(ulonglong offset, ulonglong increment,
3828                                  ulonglong nb_desired_values,
3829                                  ulonglong *first_value,
3830                                  ulonglong *nb_reserved_values)
3831 {
3832   ulonglong nr;
3833   int error;
3834   DBUG_ENTER("handler::get_auto_increment");
3835 
3836   (void) extra(HA_EXTRA_KEYREAD);
3837   table->mark_columns_used_by_index_no_reset(table->s->next_number_index,
3838                                         table->read_set);
3839   column_bitmaps_signal();
3840 
3841   if (ha_index_init(table->s->next_number_index, 1))
3842   {
3843     /* This should never happen, assert in debug, and fail in release build */
3844     assert(0);
3845     *first_value= ULLONG_MAX;
3846     DBUG_VOID_RETURN;
3847   }
3848 
3849   if (table->s->next_number_keypart == 0)
3850   {						// Autoincrement at key-start
3851     error= ha_index_last(table->record[1]);
3852     /*
3853       MySQL implicitely assumes such method does locking (as MySQL decides to
3854       use nr+increment without checking again with the handler, in
3855       handler::update_auto_increment()), so reserves to infinite.
3856     */
3857     *nb_reserved_values= ULLONG_MAX;
3858   }
3859   else
3860   {
3861     uchar key[MAX_KEY_LENGTH];
3862     key_copy(key, table->record[0],
3863              table->key_info + table->s->next_number_index,
3864              table->s->next_number_key_offset);
3865     error= ha_index_read_map(table->record[1], key,
3866                              make_prev_keypart_map(table->s->next_number_keypart),
3867                              HA_READ_PREFIX_LAST);
3868     /*
3869       MySQL needs to call us for next row: assume we are inserting ("a",null)
3870       here, we return 3, and next this statement will want to insert
3871       ("b",null): there is no reason why ("b",3+1) would be the good row to
3872       insert: maybe it already exists, maybe 3+1 is too large...
3873     */
3874     *nb_reserved_values= 1;
3875   }
3876 
3877   if (error)
3878   {
3879     if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND)
3880     {
3881       /* No entry found, start with 1. */
3882       nr= 1;
3883     }
3884     else
3885     {
3886       assert(0);
3887       nr= ULLONG_MAX;
3888     }
3889   }
3890   else
3891     nr= ((ulonglong) table->next_number_field->
3892          val_int_offset(table->s->rec_buff_length)+1);
3893   ha_index_end();
3894   (void) extra(HA_EXTRA_NO_KEYREAD);
3895   *first_value= nr;
3896   DBUG_VOID_RETURN;
3897 }
3898 
3899 
ha_release_auto_increment()3900 void handler::ha_release_auto_increment()
3901 {
3902   assert(table_share->tmp_table != NO_TMP_TABLE ||
3903          m_lock_type != F_UNLCK ||
3904          (!next_insert_id && !insert_id_for_cur_row));
3905   DEBUG_SYNC(ha_thd(), "release_auto_increment");
3906   release_auto_increment();
3907   insert_id_for_cur_row= 0;
3908   auto_inc_interval_for_cur_row.replace(0, 0, 0);
3909   auto_inc_intervals_count= 0;
3910   if (next_insert_id > 0)
3911   {
3912     next_insert_id= 0;
3913     /*
3914       this statement used forced auto_increment values if there were some,
3915       wipe them away for other statements.
3916     */
3917     table->in_use->auto_inc_intervals_forced.empty();
3918   }
3919 }
3920 
3921 
3922 /**
3923   Construct and emit duplicate key error message using information
3924   from table's record buffer.
3925 
3926   @param table    TABLE object which record buffer should be used as
3927                   source for column values.
3928   @param key      Key description.
3929   @param msg      Error message template to which key value should be
3930                   added.
3931   @param errflag  Flags for my_error() call.
3932 */
3933 
print_keydup_error(TABLE * table,KEY * key,const char * msg,myf errflag)3934 void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag)
3935 {
3936   /* Write the duplicated key in the error message */
3937   char key_buff[MAX_KEY_LENGTH];
3938   String str(key_buff,sizeof(key_buff),system_charset_info);
3939 
3940   if (key == NULL)
3941   {
3942     /* Key is unknown */
3943     str.copy("", 0, system_charset_info);
3944     my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr(), "*UNKNOWN*");
3945   }
3946   else
3947   {
3948     /* Table is opened and defined at this point */
3949     key_unpack(&str,table, key);
3950     size_t max_length= MYSQL_ERRMSG_SIZE - strlen(msg);
3951     if (str.length() >= max_length)
3952     {
3953       str.length(max_length-4);
3954       str.append(STRING_WITH_LEN("..."));
3955     }
3956     my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(), key->name);
3957   }
3958 }
3959 
3960 
3961 /**
3962   Construct and emit duplicate key error message using information
3963   from table's record buffer.
3964 
3965   @sa print_keydup_error(table, key, msg, errflag).
3966 */
3967 
print_keydup_error(TABLE * table,KEY * key,myf errflag)3968 void print_keydup_error(TABLE *table, KEY *key, myf errflag)
3969 {
3970   print_keydup_error(table, key, ER(ER_DUP_ENTRY_WITH_KEY_NAME), errflag);
3971 }
3972 
3973 
3974 /**
3975   This method is used to analyse the error to see whether the error
3976   is ignorable or not. Further comments in header file.
3977 */
3978 
is_ignorable_error(int error)3979 bool handler::is_ignorable_error(int error)
3980 {
3981   DBUG_ENTER("is_ignorable_error");
3982 
3983   // Catch errors that are ignorable
3984   switch (error)
3985   {
3986     // Error code 0 is not an error.
3987     case 0:
3988     // Dup key errors may be explicitly ignored.
3989     case HA_ERR_FOUND_DUPP_KEY:
3990     case HA_ERR_FOUND_DUPP_UNIQUE:
3991     // Foreign key constraint violations are ignorable.
3992     case HA_ERR_ROW_IS_REFERENCED:
3993     case HA_ERR_NO_REFERENCED_ROW:
3994       DBUG_RETURN(true);
3995   }
3996 
3997   // Default is that an error is not ignorable.
3998   DBUG_RETURN(false);
3999 }
4000 
4001 
4002 /**
4003   This method is used to analyse the error to see whether the error
4004   is fatal or not. Further comments in header file.
4005 */
4006 
is_fatal_error(int error)4007 bool handler::is_fatal_error(int error)
4008 {
4009   DBUG_ENTER("is_fatal_error");
4010 
4011   // No ignorable errors are fatal
4012   if (is_ignorable_error(error))
4013     DBUG_RETURN(false);
4014 
4015   // Catch errors that are not fatal
4016   switch (error)
4017   {
4018     /*
4019       Deadlock and lock timeout cause transaction/statement rollback so that
4020       THD::is_fatal_sub_stmt_error will be set. This means that they will not
4021       be possible to handle by stored program handlers inside stored functions
4022       and triggers even if non-fatal.
4023     */
4024     case HA_ERR_LOCK_WAIT_TIMEOUT:
4025     case HA_ERR_LOCK_DEADLOCK:
4026       DBUG_RETURN(false);
4027 
4028     case HA_ERR_NULL_IN_SPATIAL:
4029       DBUG_RETURN(false);
4030   }
4031 
4032   // Default is that an error is fatal
4033   DBUG_RETURN(true);
4034 }
4035 
4036 
4037 /**
4038   Print error that we got from handler function.
4039 
4040   @note
4041     In case of delete table it's only safe to use the following parts of
4042     the 'table' structure:
4043     - table->s->path
4044     - table->alias
4045 */
print_error(int error,myf errflag)4046 void handler::print_error(int error, myf errflag)
4047 {
4048   DBUG_ENTER("handler::print_error");
4049   DBUG_PRINT("enter",("error: %d",error));
4050 
4051   int textno=ER_GET_ERRNO;
4052   switch (error) {
4053   case EACCES:
4054     textno=ER_OPEN_AS_READONLY;
4055     break;
4056   case EAGAIN:
4057     textno=ER_FILE_USED;
4058     break;
4059   case ENOENT:
4060     {
4061       char errbuf[MYSYS_STRERROR_SIZE];
4062       textno=ER_FILE_NOT_FOUND;
4063       my_error(textno, errflag, table_share->table_name.str,
4064                error, my_strerror(errbuf, sizeof(errbuf), error));
4065     }
4066     break;
4067   case HA_ERR_KEY_NOT_FOUND:
4068   case HA_ERR_NO_ACTIVE_RECORD:
4069   case HA_ERR_RECORD_DELETED:
4070   case HA_ERR_END_OF_FILE:
4071     textno=ER_KEY_NOT_FOUND;
4072     break;
4073   case HA_ERR_WRONG_MRG_TABLE_DEF:
4074     textno=ER_WRONG_MRG_TABLE;
4075     break;
4076   case HA_ERR_FOUND_DUPP_KEY:
4077   {
4078     uint key_nr= table ? get_dup_key(error) : -1;
4079     if ((int) key_nr >= 0)
4080     {
4081       print_keydup_error(table,
4082                          key_nr == MAX_KEY ? NULL : &table->key_info[key_nr],
4083                          errflag);
4084       DBUG_VOID_RETURN;
4085     }
4086     textno=ER_DUP_KEY;
4087     break;
4088   }
4089   case HA_ERR_FOREIGN_DUPLICATE_KEY:
4090   {
4091     assert(table_share->tmp_table != NO_TMP_TABLE ||
4092            m_lock_type != F_UNLCK);
4093 
4094     char rec_buf[MAX_KEY_LENGTH];
4095     String rec(rec_buf, sizeof(rec_buf), system_charset_info);
4096     /* Table is opened and defined at this point */
4097 
4098     /*
4099       Just print the subset of fields that are part of the first index,
4100       printing the whole row from there is not easy.
4101     */
4102     key_unpack(&rec, table, &table->key_info[0]);
4103 
4104     char child_table_name[NAME_LEN + 1];
4105     char child_key_name[NAME_LEN + 1];
4106     if (get_foreign_dup_key(child_table_name, sizeof(child_table_name),
4107                             child_key_name, sizeof(child_key_name)))
4108     {
4109       my_error(ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO, errflag,
4110                table_share->table_name.str, rec.c_ptr_safe(),
4111                child_table_name, child_key_name);
4112     }
4113     else
4114     {
4115       my_error(ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO, errflag,
4116                table_share->table_name.str, rec.c_ptr_safe());
4117     }
4118     DBUG_VOID_RETURN;
4119   }
4120   case HA_ERR_NULL_IN_SPATIAL:
4121     my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag);
4122     DBUG_VOID_RETURN;
4123   case HA_ERR_FOUND_DUPP_UNIQUE:
4124     textno=ER_DUP_UNIQUE;
4125     break;
4126   case HA_ERR_RECORD_CHANGED:
4127     textno=ER_CHECKREAD;
4128     break;
4129   case HA_ERR_CRASHED:
4130     textno=ER_NOT_KEYFILE;
4131     break;
4132   case HA_ERR_WRONG_IN_RECORD:
4133     textno= ER_CRASHED_ON_USAGE;
4134     break;
4135   case HA_ERR_CRASHED_ON_USAGE:
4136     textno=ER_CRASHED_ON_USAGE;
4137     break;
4138   case HA_ERR_NOT_A_TABLE:
4139     textno= error;
4140     break;
4141   case HA_ERR_CRASHED_ON_REPAIR:
4142     textno=ER_CRASHED_ON_REPAIR;
4143     break;
4144   case HA_ERR_OUT_OF_MEM:
4145     textno=ER_OUT_OF_RESOURCES;
4146     break;
4147   case HA_ERR_SE_OUT_OF_MEMORY:
4148     my_error(ER_ENGINE_OUT_OF_MEMORY, errflag,
4149              table->file->table_type());
4150     DBUG_VOID_RETURN;
4151   case HA_ERR_WRONG_COMMAND:
4152     textno=ER_ILLEGAL_HA;
4153     break;
4154   case HA_ERR_OLD_FILE:
4155     textno=ER_OLD_KEYFILE;
4156     break;
4157   case HA_ERR_UNSUPPORTED:
4158     textno=ER_UNSUPPORTED_EXTENSION;
4159     break;
4160   case HA_ERR_RECORD_FILE_FULL:
4161   case HA_ERR_INDEX_FILE_FULL:
4162   {
4163     textno=ER_RECORD_FILE_FULL;
4164     /* Write the error message to error log */
4165     errflag|= ME_ERRORLOG;
4166     break;
4167   }
4168   case HA_ERR_LOCK_WAIT_TIMEOUT:
4169     textno=ER_LOCK_WAIT_TIMEOUT;
4170     break;
4171   case HA_ERR_LOCK_TABLE_FULL:
4172     textno=ER_LOCK_TABLE_FULL;
4173     break;
4174   case HA_ERR_LOCK_DEADLOCK:
4175     textno=ER_LOCK_DEADLOCK;
4176     break;
4177   case HA_ERR_READ_ONLY_TRANSACTION:
4178     textno=ER_READ_ONLY_TRANSACTION;
4179     break;
4180   case HA_ERR_CANNOT_ADD_FOREIGN:
4181     textno=ER_CANNOT_ADD_FOREIGN;
4182     break;
4183   case HA_ERR_ROW_IS_REFERENCED:
4184   {
4185     String str;
4186     get_error_message(error, &str);
4187     my_error(ER_ROW_IS_REFERENCED_2, errflag, str.c_ptr_safe());
4188     DBUG_VOID_RETURN;
4189   }
4190   case HA_ERR_NO_REFERENCED_ROW:
4191   {
4192     String str;
4193     get_error_message(error, &str);
4194     my_error(ER_NO_REFERENCED_ROW_2, errflag, str.c_ptr_safe());
4195     DBUG_VOID_RETURN;
4196   }
4197   case HA_ERR_TABLE_DEF_CHANGED:
4198     textno=ER_TABLE_DEF_CHANGED;
4199     break;
4200   case HA_ERR_NO_SUCH_TABLE:
4201     my_error(ER_NO_SUCH_TABLE, errflag, table_share->db.str,
4202              table_share->table_name.str);
4203     DBUG_VOID_RETURN;
4204   case HA_ERR_RBR_LOGGING_FAILED:
4205     textno= ER_BINLOG_ROW_LOGGING_FAILED;
4206     break;
4207   case HA_ERR_DROP_INDEX_FK:
4208   {
4209     const char *ptr= "???";
4210     uint key_nr= table ? get_dup_key(error) : -1;
4211     if ((int) key_nr >= 0 && key_nr != MAX_KEY)
4212       ptr= table->key_info[key_nr].name;
4213     my_error(ER_DROP_INDEX_FK, errflag, ptr);
4214     DBUG_VOID_RETURN;
4215   }
4216   case HA_ERR_TABLE_NEEDS_UPGRADE:
4217     textno=ER_TABLE_NEEDS_UPGRADE;
4218     break;
4219   case HA_ERR_NO_PARTITION_FOUND:
4220     textno=ER_WRONG_PARTITION_NAME;
4221     break;
4222   case HA_ERR_TABLE_READONLY:
4223     textno= ER_OPEN_AS_READONLY;
4224     break;
4225   case HA_ERR_AUTOINC_READ_FAILED:
4226     textno= ER_AUTOINC_READ_FAILED;
4227     break;
4228   case HA_ERR_AUTOINC_ERANGE:
4229     textno= ER_WARN_DATA_OUT_OF_RANGE;
4230     break;
4231   case HA_ERR_TOO_MANY_CONCURRENT_TRXS:
4232     textno= ER_TOO_MANY_CONCURRENT_TRXS;
4233     break;
4234   case HA_ERR_INDEX_COL_TOO_LONG:
4235     textno= ER_INDEX_COLUMN_TOO_LONG;
4236     break;
4237   case HA_ERR_NOT_IN_LOCK_PARTITIONS:
4238     textno=ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET;
4239     break;
4240   case HA_ERR_INDEX_CORRUPT:
4241     textno= ER_INDEX_CORRUPT;
4242     break;
4243   case HA_ERR_UNDO_REC_TOO_BIG:
4244     textno= ER_UNDO_RECORD_TOO_BIG;
4245     break;
4246   case HA_ERR_TABLE_IN_FK_CHECK:
4247     textno= ER_TABLE_IN_FK_CHECK;
4248     break;
4249   case HA_WRONG_CREATE_OPTION:
4250     textno= ER_ILLEGAL_HA;
4251     break;
4252   case HA_MISSING_CREATE_OPTION:
4253   {
4254     const char* engine= table_type();
4255     my_error(ER_MISSING_HA_CREATE_OPTION, errflag, engine);
4256     DBUG_VOID_RETURN;
4257   }
4258   case HA_ERR_TOO_MANY_FIELDS:
4259     textno= ER_TOO_MANY_FIELDS;
4260     break;
4261   case HA_ERR_INNODB_READ_ONLY:
4262     textno= ER_INNODB_READ_ONLY;
4263     break;
4264   case HA_ERR_TEMP_FILE_WRITE_FAILURE:
4265     textno= ER_TEMP_FILE_WRITE_FAILURE;
4266     break;
4267   case HA_ERR_INNODB_FORCED_RECOVERY:
4268     textno= ER_INNODB_FORCED_RECOVERY;
4269     break;
4270   case HA_ERR_TABLE_CORRUPT:
4271     my_error(ER_TABLE_CORRUPT, errflag, table_share->db.str,
4272              table_share->table_name.str);
4273     DBUG_VOID_RETURN;
4274   case HA_ERR_QUERY_INTERRUPTED:
4275     textno= ER_QUERY_INTERRUPTED;
4276     break;
4277   case HA_ERR_TABLESPACE_MISSING:
4278   {
4279     char errbuf[MYSYS_STRERROR_SIZE];
4280     my_snprintf(errbuf, MYSYS_STRERROR_SIZE, "`%s`.`%s`", table_share->db.str,
4281     table_share->table_name.str);
4282     my_error(ER_TABLESPACE_MISSING, errflag, errbuf, error);
4283     DBUG_VOID_RETURN;
4284   }
4285   case HA_ERR_TABLESPACE_IS_NOT_EMPTY:
4286     my_error(ER_TABLESPACE_IS_NOT_EMPTY, errflag, table_share->db.str,
4287              table_share->table_name.str);
4288     DBUG_VOID_RETURN;
4289   case HA_ERR_WRONG_FILE_NAME:
4290     my_error(ER_WRONG_FILE_NAME, errflag, table_share->table_name.str);
4291     DBUG_VOID_RETURN;
4292   case HA_ERR_NOT_ALLOWED_COMMAND:
4293     textno=ER_NOT_ALLOWED_COMMAND;
4294     break;
4295   default:
4296     {
4297       /* The error was "unknown" to this function.
4298 	 Ask handler if it has got a message for this error */
4299       String str;
4300       bool temporary= get_error_message(error, &str);
4301       if (!str.is_empty())
4302       {
4303 	const char* engine= table_type();
4304 	if (temporary)
4305 	  my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.ptr(), engine);
4306 	else
4307 	  my_error(ER_GET_ERRMSG, errflag, error, str.ptr(), engine);
4308       }
4309       else
4310 	my_error(ER_GET_ERRNO,errflag,error);
4311       DBUG_VOID_RETURN;
4312     }
4313   }
4314   if (textno != ER_FILE_NOT_FOUND)
4315     my_error(textno, errflag, table_share->table_name.str, error);
4316   DBUG_VOID_RETURN;
4317 }
4318 
4319 
4320 /**
4321   Return an error message specific to this handler.
4322 
4323   @param error  error code previously returned by handler
4324   @param buf    pointer to String where to add error message
4325 
4326   @return
4327     Returns true if this is a temporary error
4328 */
get_error_message(int error,String * buf)4329 bool handler::get_error_message(int error, String* buf)
4330 {
4331   return FALSE;
4332 }
4333 
4334 
4335 /**
4336   Check for incompatible collation changes.
4337 
4338   @retval
4339     HA_ADMIN_NEEDS_UPGRADE   Table may have data requiring upgrade.
4340   @retval
4341     0                        No upgrade required.
4342 */
4343 
check_collation_compatibility()4344 int handler::check_collation_compatibility()
4345 {
4346   ulong mysql_version= table->s->mysql_version;
4347 
4348   if (mysql_version < 50124)
4349   {
4350     KEY *key= table->key_info;
4351     KEY *key_end= key + table->s->keys;
4352     for (; key < key_end; key++)
4353     {
4354       KEY_PART_INFO *key_part= key->key_part;
4355       KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
4356       for (; key_part < key_part_end; key_part++)
4357       {
4358         if (!key_part->fieldnr)
4359           continue;
4360         Field *field= table->field[key_part->fieldnr - 1];
4361         uint cs_number= field->charset()->number;
4362         if ((mysql_version < 50048 &&
4363              (cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */
4364               cs_number == 41 || /* latin7_general_ci - bug #29461 */
4365               cs_number == 42 || /* latin7_general_cs - bug #29461 */
4366               cs_number == 20 || /* latin7_estonian_cs - bug #29461 */
4367               cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */
4368               cs_number == 22 || /* koi8u_general_ci - bug #29461 */
4369               cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */
4370               cs_number == 26)) || /* cp1250_general_ci - bug #29461 */
4371              (mysql_version < 50124 &&
4372              (cs_number == 33 || /* utf8_general_ci - bug #27877 */
4373               cs_number == 35))) /* ucs2_general_ci - bug #27877 */
4374           return HA_ADMIN_NEEDS_UPGRADE;
4375       }
4376     }
4377   }
4378   return 0;
4379 }
4380 
4381 
ha_check_for_upgrade(HA_CHECK_OPT * check_opt)4382 int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
4383 {
4384   int error;
4385   KEY *keyinfo, *keyend;
4386   KEY_PART_INFO *keypart, *keypartend;
4387 
4388   if (!table->s->mysql_version)
4389   {
4390     /* check for blob-in-key error */
4391     keyinfo= table->key_info;
4392     keyend= table->key_info + table->s->keys;
4393     for (; keyinfo < keyend; keyinfo++)
4394     {
4395       keypart= keyinfo->key_part;
4396       keypartend= keypart + keyinfo->user_defined_key_parts;
4397       for (; keypart < keypartend; keypart++)
4398       {
4399         if (!keypart->fieldnr)
4400           continue;
4401         Field *field= table->field[keypart->fieldnr-1];
4402         if (field->type() == MYSQL_TYPE_BLOB)
4403         {
4404           if (check_opt->sql_flags & TT_FOR_UPGRADE)
4405             check_opt->flags= T_MEDIUM;
4406           return HA_ADMIN_NEEDS_CHECK;
4407         }
4408       }
4409     }
4410   }
4411   if (table->s->frm_version != FRM_VER_TRUE_VARCHAR)
4412     return HA_ADMIN_NEEDS_ALTER;
4413 
4414   if ((error= check_collation_compatibility()))
4415     return error;
4416 
4417   return check_for_upgrade(check_opt);
4418 }
4419 
4420 
check_old_types()4421 int handler::check_old_types()
4422 {
4423   Field** field;
4424 
4425   for (field= table->field; (*field); field++)
4426   {
4427     if (table->s->mysql_version == 0) // prior to MySQL 5.0
4428     {
4429       /* check for bad DECIMAL field */
4430       if ((*field)->type() == MYSQL_TYPE_NEWDECIMAL)
4431       {
4432         return HA_ADMIN_NEEDS_ALTER;
4433       }
4434       if ((*field)->type() == MYSQL_TYPE_VAR_STRING)
4435       {
4436         return HA_ADMIN_NEEDS_ALTER;
4437       }
4438     }
4439 
4440     /*
4441       Check for old DECIMAL field.
4442 
4443       Above check does not take into account for pre 5.0 decimal types which can
4444       be present in the data directory if user did in-place upgrade from
4445       mysql-4.1 to mysql-5.0.
4446     */
4447     if ((*field)->type() == MYSQL_TYPE_DECIMAL)
4448     {
4449       return HA_ADMIN_NEEDS_DUMP_UPGRADE;
4450     }
4451 
4452     if ((*field)->type() == MYSQL_TYPE_YEAR && (*field)->field_length == 2)
4453       return HA_ADMIN_NEEDS_ALTER; // obsolete YEAR(2) type
4454 
4455     //Check for old temporal format if avoid_temporal_upgrade is disabled.
4456     mysql_mutex_lock(&LOCK_global_system_variables);
4457     bool check_temporal_upgrade= !avoid_temporal_upgrade;
4458     mysql_mutex_unlock(&LOCK_global_system_variables);
4459 
4460     if (check_temporal_upgrade)
4461     {
4462       if (((*field)->real_type() == MYSQL_TYPE_TIME) ||
4463           ((*field)->real_type() == MYSQL_TYPE_DATETIME) ||
4464           ((*field)->real_type() == MYSQL_TYPE_TIMESTAMP))
4465         return HA_ADMIN_NEEDS_ALTER;
4466     }
4467   }
4468   return 0;
4469 }
4470 
4471 
update_frm_version(TABLE * table)4472 static bool update_frm_version(TABLE *table)
4473 {
4474   char path[FN_REFLEN];
4475   File file;
4476   int result= 1;
4477   DBUG_ENTER("update_frm_version");
4478 
4479   /*
4480     No need to update frm version in case table was created or checked
4481     by server with the same version. This also ensures that we do not
4482     update frm version for temporary tables as this code doesn't support
4483     temporary tables.
4484   */
4485   if (table->s->mysql_version == MYSQL_VERSION_ID)
4486     DBUG_RETURN(0);
4487 
4488   strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
4489 
4490   if ((file= mysql_file_open(key_file_frm,
4491                              path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0)
4492   {
4493     uchar version[4];
4494 
4495     int4store(version, MYSQL_VERSION_ID);
4496 
4497     if ((result= mysql_file_pwrite(file, (uchar*) version, 4, 51L, MYF_RW)))
4498       goto err;
4499 
4500     table->s->mysql_version= MYSQL_VERSION_ID;
4501   }
4502 err:
4503   if (file >= 0)
4504     (void) mysql_file_close(file, MYF(MY_WME));
4505   DBUG_RETURN(result);
4506 }
4507 
4508 
4509 
4510 /**
4511   @return
4512     key if error because of duplicated keys
4513 */
get_dup_key(int error)4514 uint handler::get_dup_key(int error)
4515 {
4516   assert(table_share->tmp_table != NO_TMP_TABLE ||
4517          m_lock_type != F_UNLCK);
4518   DBUG_ENTER("handler::get_dup_key");
4519   table->file->errkey  = (uint) -1;
4520   if (error == HA_ERR_FOUND_DUPP_KEY ||
4521       error == HA_ERR_FOUND_DUPP_UNIQUE || error == HA_ERR_NULL_IN_SPATIAL ||
4522       error == HA_ERR_DROP_INDEX_FK)
4523     table->file->info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
4524   DBUG_RETURN(table->file->errkey);
4525 }
4526 
4527 
4528 /**
4529   Delete all files with extension from bas_ext().
4530 
4531   @param name		Base name of table
4532 
4533   @note
4534     We assume that the handler may return more extensions than
4535     was actually used for the file.
4536 
4537   @retval
4538     0   If we successfully deleted at least one file from base_ext and
4539     didn't get any other errors than ENOENT
4540   @retval
4541     !0  Error
4542 */
delete_table(const char * name)4543 int handler::delete_table(const char *name)
4544 {
4545   int saved_error= 0;
4546   int error= 0;
4547   int enoent_or_zero= ENOENT;                   // Error if no file was deleted
4548   char buff[FN_REFLEN];
4549   assert(m_lock_type == F_UNLCK);
4550 
4551   for (const char **ext=bas_ext(); *ext ; ext++)
4552   {
4553     fn_format(buff, name, "", *ext, MY_UNPACK_FILENAME|MY_APPEND_EXT);
4554     if (mysql_file_delete_with_symlink(key_file_misc, buff, MYF(0)))
4555     {
4556       if (my_errno() != ENOENT)
4557       {
4558         /*
4559           If error on the first existing file, return the error.
4560           Otherwise delete as much as possible.
4561         */
4562         if (enoent_or_zero)
4563           return my_errno();
4564 	saved_error= my_errno();
4565       }
4566     }
4567     else
4568       enoent_or_zero= 0;                        // No error for ENOENT
4569     error= enoent_or_zero;
4570   }
4571   return saved_error ? saved_error : error;
4572 }
4573 
4574 
rename_table(const char * from,const char * to)4575 int handler::rename_table(const char * from, const char * to)
4576 {
4577   int error= 0;
4578   const char **ext, **start_ext;
4579   start_ext= bas_ext();
4580   for (ext= start_ext; *ext ; ext++)
4581   {
4582     if (rename_file_ext(from, to, *ext))
4583     {
4584       error= my_errno();
4585       if (error != ENOENT)
4586 	break;
4587       error= 0;
4588     }
4589   }
4590   if (error)
4591   {
4592     /* Try to revert the rename. Ignore errors. */
4593     for (; ext >= start_ext; ext--)
4594       rename_file_ext(to, from, *ext);
4595   }
4596   return error;
4597 }
4598 
4599 
drop_table(const char * name)4600 void handler::drop_table(const char *name)
4601 {
4602   close();
4603   delete_table(name);
4604 }
4605 
4606 
4607 /**
4608   Performs checks upon the table.
4609 
4610   @param thd                thread doing CHECK TABLE operation
4611   @param check_opt          options from the parser
4612 
4613   @retval
4614     HA_ADMIN_OK               Successful upgrade
4615   @retval
4616     HA_ADMIN_NEEDS_UPGRADE    Table has structures requiring upgrade
4617   @retval
4618     HA_ADMIN_NEEDS_ALTER      Table has structures requiring ALTER TABLE
4619   @retval
4620     HA_ADMIN_NOT_IMPLEMENTED
4621 */
ha_check(THD * thd,HA_CHECK_OPT * check_opt)4622 int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
4623 {
4624   int error;
4625   bool skip_version_update = false;
4626   bool is_upgrade = check_opt->sql_flags & TT_FOR_UPGRADE;
4627 
4628   assert(table_share->tmp_table != NO_TMP_TABLE ||
4629          m_lock_type != F_UNLCK);
4630 
4631   if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
4632       (check_opt->sql_flags & TT_FOR_UPGRADE))
4633     return 0;
4634 
4635   if (table->s->mysql_version < MYSQL_VERSION_ID)
4636   {
4637     if ((error= check_old_types()))
4638       return error;
4639 
4640     error= ha_check_for_upgrade(check_opt);
4641     switch (error)
4642     {
4643       case HA_ADMIN_NEEDS_UPG_PART:
4644         /* Skip version update as the table needs upgrade. */
4645         skip_version_update= true;
4646         /* Fall through */
4647       case HA_ADMIN_OK:
4648         if (is_upgrade)
4649           return error;
4650         /* Fall through */
4651       case HA_ADMIN_NEEDS_CHECK:
4652         break;
4653       default:
4654         return error;
4655     }
4656   }
4657 
4658   if ((error= check(thd, check_opt)))
4659     return error;
4660   /* Skip updating frm version if not main handler. */
4661   if (table->file != this || skip_version_update)
4662     return error;
4663   return update_frm_version(table);
4664 }
4665 
4666 void
mark_trx_noop_dml()4667 handler::mark_trx_noop_dml()
4668 {
4669   Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
4670   /*
4671     When a storage engine method is called, the transaction must
4672     have been started, unless it's a DDL call, for which the
4673     storage engine starts the transaction internally, and commits
4674     it internally, without registering in the ha_list.
4675     Unfortunately here we can't know for sure if the engine
4676     has registered the transaction or not, so we must check.
4677   */
4678   if (ha_info->is_started())
4679   {
4680     assert(has_transactions());
4681     /*
4682       table_share can be NULL in ha_delete_table(). See implementation
4683       of standalone function ha_delete_table() in sql_base.cc.
4684     */
4685     if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
4686       ha_info->set_trx_noop_read_write();
4687   }
4688 }
4689 
4690 /**
4691   A helper function to mark a transaction read-write,
4692   if it is started.
4693 */
4694 
4695 void
mark_trx_read_write()4696 handler::mark_trx_read_write()
4697 {
4698   Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
4699   /*
4700     When a storage engine method is called, the transaction must
4701     have been started, unless it's a DDL call, for which the
4702     storage engine starts the transaction internally, and commits
4703     it internally, without registering in the ha_list.
4704     Unfortunately here we can't know for sure if the engine
4705     has registered the transaction or not, so we must check.
4706   */
4707   if (ha_info->is_started())
4708   {
4709     assert(has_transactions());
4710     /*
4711       table_share can be NULL in ha_delete_table(). See implementation
4712       of standalone function ha_delete_table() in sql_base.cc.
4713     */
4714     if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
4715       ha_info->set_trx_read_write();
4716   }
4717 }
4718 
4719 
4720 /**
4721   Repair table: public interface.
4722 
4723   @sa handler::repair()
4724 */
4725 
ha_repair(THD * thd,HA_CHECK_OPT * check_opt)4726 int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
4727 {
4728   int result;
4729   mark_trx_read_write();
4730 
4731   result= repair(thd, check_opt);
4732   assert(result == HA_ADMIN_NOT_IMPLEMENTED ||
4733          ha_table_flags() & HA_CAN_REPAIR);
4734 
4735   int old_types_error= check_old_types();
4736 
4737   if (old_types_error != HA_ADMIN_NEEDS_DUMP_UPGRADE && result == HA_ADMIN_OK)
4738     result= update_frm_version(table);
4739   return result;
4740 }
4741 
4742 
4743 /**
4744   Start bulk insert.
4745 
4746   Allow the handler to optimize for multiple row insert.
4747 
4748   @param rows  Estimated rows to insert
4749 */
4750 
ha_start_bulk_insert(ha_rows rows)4751 void handler::ha_start_bulk_insert(ha_rows rows)
4752 {
4753   DBUG_ENTER("handler::ha_start_bulk_insert");
4754   assert(table_share->tmp_table != NO_TMP_TABLE ||
4755          m_lock_type == F_WRLCK);
4756   estimation_rows_to_insert= rows;
4757   start_bulk_insert(rows);
4758   DBUG_VOID_RETURN;
4759 }
4760 
4761 
4762 /**
4763   End bulk insert.
4764 
4765   @return Operation status
4766     @retval 0     Success
4767     @retval != 0  Failure (error code returned)
4768 */
4769 
ha_end_bulk_insert()4770 int handler::ha_end_bulk_insert()
4771 {
4772   DBUG_ENTER("handler::ha_end_bulk_insert");
4773   assert(table_share->tmp_table != NO_TMP_TABLE ||
4774          m_lock_type == F_WRLCK);
4775   estimation_rows_to_insert= 0;
4776   DBUG_RETURN(end_bulk_insert());
4777 }
4778 
4779 
4780 /**
4781   Bulk update row: public interface.
4782 
4783   @sa handler::bulk_update_row()
4784 */
4785 
4786 int
ha_bulk_update_row(const uchar * old_data,uchar * new_data,uint * dup_key_found)4787 handler::ha_bulk_update_row(const uchar *old_data, uchar *new_data,
4788                             uint *dup_key_found)
4789 {
4790   assert(table_share->tmp_table != NO_TMP_TABLE ||
4791          m_lock_type == F_WRLCK);
4792   mark_trx_read_write();
4793 
4794   return bulk_update_row(old_data, new_data, dup_key_found);
4795 }
4796 
4797 
4798 /**
4799   Delete all rows: public interface.
4800 
4801   @sa handler::delete_all_rows()
4802 */
4803 
4804 int
ha_delete_all_rows()4805 handler::ha_delete_all_rows()
4806 {
4807   assert(table_share->tmp_table != NO_TMP_TABLE ||
4808          m_lock_type == F_WRLCK);
4809   mark_trx_read_write();
4810 
4811   return delete_all_rows();
4812 }
4813 
4814 
4815 /**
4816   Truncate table: public interface.
4817 
4818   @sa handler::truncate()
4819 */
4820 
4821 int
ha_truncate()4822 handler::ha_truncate()
4823 {
4824   assert(table_share->tmp_table != NO_TMP_TABLE ||
4825          m_lock_type == F_WRLCK);
4826   mark_trx_read_write();
4827 
4828   return truncate();
4829 }
4830 
4831 
4832 /**
4833   Optimize table: public interface.
4834 
4835   @sa handler::optimize()
4836 */
4837 
4838 int
ha_optimize(THD * thd,HA_CHECK_OPT * check_opt)4839 handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
4840 {
4841   assert(table_share->tmp_table != NO_TMP_TABLE ||
4842          m_lock_type == F_WRLCK);
4843   mark_trx_read_write();
4844 
4845   return optimize(thd, check_opt);
4846 }
4847 
4848 
4849 /**
4850   Analyze table: public interface.
4851 
4852   @sa handler::analyze()
4853 */
4854 
4855 int
ha_analyze(THD * thd,HA_CHECK_OPT * check_opt)4856 handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
4857 {
4858   assert(table_share->tmp_table != NO_TMP_TABLE ||
4859          m_lock_type != F_UNLCK);
4860   mark_trx_read_write();
4861 
4862   return analyze(thd, check_opt);
4863 }
4864 
4865 
4866 /**
4867   Check and repair table: public interface.
4868 
4869   @sa handler::check_and_repair()
4870 */
4871 
4872 bool
ha_check_and_repair(THD * thd)4873 handler::ha_check_and_repair(THD *thd)
4874 {
4875   assert(table_share->tmp_table != NO_TMP_TABLE ||
4876          m_lock_type == F_UNLCK);
4877   mark_trx_read_write();
4878 
4879   return check_and_repair(thd);
4880 }
4881 
4882 
4883 /**
4884   Disable indexes: public interface.
4885 
4886   @sa handler::disable_indexes()
4887 */
4888 
4889 int
ha_disable_indexes(uint mode)4890 handler::ha_disable_indexes(uint mode)
4891 {
4892   assert(table_share->tmp_table != NO_TMP_TABLE ||
4893          m_lock_type != F_UNLCK);
4894   mark_trx_read_write();
4895 
4896   return disable_indexes(mode);
4897 }
4898 
4899 
4900 /**
4901   Enable indexes: public interface.
4902 
4903   @sa handler::enable_indexes()
4904 */
4905 
4906 int
ha_enable_indexes(uint mode)4907 handler::ha_enable_indexes(uint mode)
4908 {
4909   assert(table_share->tmp_table != NO_TMP_TABLE ||
4910          m_lock_type != F_UNLCK);
4911   mark_trx_read_write();
4912 
4913   return enable_indexes(mode);
4914 }
4915 
4916 
4917 /**
4918   Discard or import tablespace: public interface.
4919 
4920   @sa handler::discard_or_import_tablespace()
4921 */
4922 
4923 int
ha_discard_or_import_tablespace(my_bool discard)4924 handler::ha_discard_or_import_tablespace(my_bool discard)
4925 {
4926   assert(table_share->tmp_table != NO_TMP_TABLE ||
4927          m_lock_type == F_WRLCK);
4928   mark_trx_read_write();
4929 
4930   return discard_or_import_tablespace(discard);
4931 }
4932 
4933 
ha_prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4934 bool handler::ha_prepare_inplace_alter_table(TABLE *altered_table,
4935                                              Alter_inplace_info *ha_alter_info)
4936 {
4937   assert(table_share->tmp_table != NO_TMP_TABLE ||
4938          m_lock_type != F_UNLCK);
4939   mark_trx_read_write();
4940 
4941   return prepare_inplace_alter_table(altered_table, ha_alter_info);
4942 }
4943 
4944 
ha_commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit)4945 bool handler::ha_commit_inplace_alter_table(TABLE *altered_table,
4946                                             Alter_inplace_info *ha_alter_info,
4947                                             bool commit)
4948 {
4949    /*
4950      At this point we should have an exclusive metadata lock on the table.
4951      The exception is if we're about to roll back changes (commit= false).
4952      In this case, we might be rolling back after a failed lock upgrade,
4953      so we could be holding the same lock level as for inplace_alter_table().
4954    */
4955   assert(ha_thd()->mdl_context.owns_equal_or_stronger_lock(MDL_key::TABLE,
4956                                                            table->s->db.str,
4957                                                            table->s->table_name.str,
4958                                                            MDL_EXCLUSIVE) ||
4959          !commit);
4960 
4961    return commit_inplace_alter_table(altered_table, ha_alter_info, commit);
4962 }
4963 
4964 
4965 /*
4966    Default implementation to support in-place alter table
4967    and old online add/drop index API
4968 */
4969 
4970 enum_alter_inplace_result
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4971 handler::check_if_supported_inplace_alter(TABLE *altered_table,
4972                                           Alter_inplace_info *ha_alter_info)
4973 {
4974   DBUG_ENTER("check_if_supported_alter");
4975 
4976   HA_CREATE_INFO *create_info= ha_alter_info->create_info;
4977 
4978   Alter_inplace_info::HA_ALTER_FLAGS inplace_offline_operations=
4979     Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH |
4980     Alter_inplace_info::ALTER_COLUMN_NAME |
4981     Alter_inplace_info::ALTER_COLUMN_DEFAULT |
4982     Alter_inplace_info::CHANGE_CREATE_OPTION |
4983     Alter_inplace_info::ALTER_RENAME |
4984     Alter_inplace_info::RENAME_INDEX |
4985     Alter_inplace_info::ALTER_INDEX_COMMENT |
4986     Alter_inplace_info::ALTER_COLUMN_INDEX_LENGTH;
4987 
4988   /* Is there at least one operation that requires copy algorithm? */
4989   if (ha_alter_info->handler_flags & ~inplace_offline_operations)
4990     DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4991 
4992   /*
4993     ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. and
4994     ALTER TABLE table_name DEFAULT CHARSET = .. most likely
4995     change column charsets and so not supported in-place through
4996     old API.
4997 
4998     Changing of PACK_KEYS, MAX_ROWS and ROW_FORMAT options were
4999     not supported as in-place operations in old API either.
5000   */
5001   if (create_info->used_fields & (HA_CREATE_USED_CHARSET |
5002                                   HA_CREATE_USED_DEFAULT_CHARSET |
5003                                   HA_CREATE_USED_PACK_KEYS |
5004                                   HA_CREATE_USED_MAX_ROWS) ||
5005       (table->s->row_type != create_info->row_type))
5006     DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
5007 
5008   uint table_changes= (ha_alter_info->handler_flags &
5009                        Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH) ?
5010     IS_EQUAL_PACK_LENGTH : IS_EQUAL_YES;
5011   if (table->file->check_if_incompatible_data(create_info, table_changes)
5012       == COMPATIBLE_DATA_YES)
5013     DBUG_RETURN(HA_ALTER_INPLACE_EXCLUSIVE_LOCK);
5014 
5015   DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
5016 }
5017 
5018 
5019 /*
5020    Default implementation to support in-place alter table
5021    and old online add/drop index API
5022 */
5023 
notify_table_changed()5024 void handler::notify_table_changed()
5025 {
5026   ha_create_handler_files(table->s->path.str, NULL, CHF_INDEX_FLAG, NULL);
5027 }
5028 
5029 
report_unsupported_error(const char * not_supported,const char * try_instead)5030 void Alter_inplace_info::report_unsupported_error(const char *not_supported,
5031                                                   const char *try_instead)
5032 {
5033   if (unsupported_reason == NULL)
5034     my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0),
5035              not_supported, try_instead);
5036   else
5037     my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0),
5038              not_supported, unsupported_reason, try_instead);
5039 }
5040 
5041 
5042 /**
5043   Rename table: public interface.
5044 
5045   @sa handler::rename_table()
5046 */
5047 
5048 int
ha_rename_table(const char * from,const char * to)5049 handler::ha_rename_table(const char *from, const char *to)
5050 {
5051   assert(m_lock_type == F_UNLCK);
5052   mark_trx_read_write();
5053 
5054   return rename_table(from, to);
5055 }
5056 
5057 
5058 /**
5059   Delete table: public interface.
5060 
5061   @sa handler::delete_table()
5062 */
5063 
5064 int
ha_delete_table(const char * name)5065 handler::ha_delete_table(const char *name)
5066 {
5067   assert(m_lock_type == F_UNLCK);
5068   mark_trx_read_write();
5069 
5070   return delete_table(name);
5071 }
5072 
5073 
5074 /**
5075   Drop table in the engine: public interface.
5076 
5077   @sa handler::drop_table()
5078 */
5079 
5080 void
ha_drop_table(const char * name)5081 handler::ha_drop_table(const char *name)
5082 {
5083   assert(m_lock_type == F_UNLCK);
5084   mark_trx_read_write();
5085 
5086   return drop_table(name);
5087 }
5088 
5089 
5090 /**
5091   Create a table in the engine: public interface.
5092 
5093   @sa handler::create()
5094 */
5095 
5096 int
ha_create(const char * name,TABLE * form,HA_CREATE_INFO * info)5097 handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info)
5098 {
5099   assert(m_lock_type == F_UNLCK);
5100   mark_trx_read_write();
5101 
5102   return create(name, form, info);
5103 }
5104 
5105 
5106 /**
5107   Create handler files for CREATE TABLE: public interface.
5108 
5109   @sa handler::create_handler_files()
5110 */
5111 
5112 int
ha_create_handler_files(const char * name,const char * old_name,int action_flag,HA_CREATE_INFO * info)5113 handler::ha_create_handler_files(const char *name, const char *old_name,
5114                         int action_flag, HA_CREATE_INFO *info)
5115 {
5116   /*
5117     Normally this is done when unlocked, but in fast_alter_partition_table,
5118     it is done on an already locked handler when preparing to alter/rename
5119     partitions.
5120   */
5121   assert(m_lock_type == F_UNLCK ||
5122          (!old_name && strcmp(name, table_share->path.str)));
5123   mark_trx_read_write();
5124 
5125   return create_handler_files(name, old_name, action_flag, info);
5126 }
5127 
5128 
5129 /**
5130   Tell the storage engine that it is allowed to "disable transaction" in the
5131   handler. It is a hint that ACID is not required - it is used in NDB for
5132   ALTER TABLE, for example, when data are copied to temporary table.
5133   A storage engine may treat this hint any way it likes. NDB for example
5134   starts to commit every now and then automatically.
5135   This hint can be safely ignored.
5136 */
ha_enable_transaction(THD * thd,bool on)5137 int ha_enable_transaction(THD *thd, bool on)
5138 {
5139   int error=0;
5140   DBUG_ENTER("ha_enable_transaction");
5141   DBUG_PRINT("enter", ("on: %d", (int) on));
5142 
5143 #ifdef WITH_WSREP
5144   if (thd->wsrep_applier) DBUG_RETURN(0);
5145 #endif
5146   if ((thd->get_transaction()->m_flags.enabled= on))
5147   {
5148     /*
5149       Now all storage engines should have transaction handling enabled.
5150       But some may have it enabled all the time - "disabling" transactions
5151       is an optimization hint that storage engine is free to ignore.
5152       So, let's commit an open transaction (if any) now.
5153     */
5154     if (!(error= ha_commit_trans(thd, 0)))
5155       error= trans_commit_implicit(thd);
5156   }
5157   DBUG_RETURN(error);
5158 }
5159 
index_next_same(uchar * buf,const uchar * key,uint keylen)5160 int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
5161 {
5162   int error;
5163   DBUG_ENTER("index_next_same");
5164   if (!(error=index_next(buf)))
5165   {
5166     my_ptrdiff_t ptrdiff= buf - table->record[0];
5167     uchar *save_record_0= NULL;
5168     KEY *key_info= NULL;
5169     KEY_PART_INFO *key_part= NULL;
5170     KEY_PART_INFO *key_part_end= NULL;
5171 
5172     /*
5173       key_cmp_if_same() compares table->record[0] against 'key'.
5174       In parts it uses table->record[0] directly, in parts it uses
5175       field objects with their local pointers into table->record[0].
5176       If 'buf' is distinct from table->record[0], we need to move
5177       all record references. This is table->record[0] itself and
5178       the field pointers of the fields used in this key.
5179     */
5180     if (ptrdiff)
5181     {
5182       save_record_0= table->record[0];
5183       table->record[0]= buf;
5184       key_info= table->key_info + active_index;
5185       key_part= key_info->key_part;
5186       key_part_end= key_part + key_info->user_defined_key_parts;
5187       for (; key_part < key_part_end; key_part++)
5188       {
5189         assert(key_part->field);
5190         key_part->field->move_field_offset(ptrdiff);
5191       }
5192     }
5193 
5194     if (key_cmp_if_same(table, key, active_index, keylen))
5195     {
5196       table->status=STATUS_NOT_FOUND;
5197       error=HA_ERR_END_OF_FILE;
5198     }
5199 
5200     /* Move back if necessary. */
5201     if (ptrdiff)
5202     {
5203       table->record[0]= save_record_0;
5204       for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
5205         key_part->field->move_field_offset(-ptrdiff);
5206     }
5207   }
5208   DBUG_RETURN(error);
5209 }
5210 
5211 /****************************************************************************
5212 ** Some general functions that isn't in the handler class
5213 ****************************************************************************/
5214 
5215 /**
5216   Initiates table-file and calls appropriate database-creator.
5217 
5218   @retval
5219    0  ok
5220   @retval
5221    1  error
5222 */
ha_create_table(THD * thd,const char * path,const char * db,const char * table_name,HA_CREATE_INFO * create_info,bool update_create_info,bool is_temp_table)5223 int ha_create_table(THD *thd, const char *path,
5224                     const char *db, const char *table_name,
5225                     HA_CREATE_INFO *create_info,
5226                     bool update_create_info,
5227                     bool is_temp_table)
5228 {
5229   int error= 1;
5230   TABLE table;
5231   char name_buff[FN_REFLEN];
5232   const char *name;
5233   TABLE_SHARE share;
5234 #ifdef HAVE_PSI_TABLE_INTERFACE
5235   bool temp_table = is_temp_table ||
5236     (create_info->options & HA_LEX_CREATE_TMP_TABLE) ||
5237     (strstr(path, tmp_file_prefix) != NULL);
5238 #endif
5239   DBUG_ENTER("ha_create_table");
5240 
5241   init_tmp_table_share(thd, &share, db, 0, table_name, path);
5242   if (open_table_def(thd, &share, 0))
5243     goto err;
5244 
5245 #ifdef HAVE_PSI_TABLE_INTERFACE
5246   share.m_psi= PSI_TABLE_CALL(get_table_share)(temp_table, &share);
5247 #endif
5248 
5249   if (open_table_from_share(thd, &share, "", 0, (uint) READ_ALL, 0, &table,
5250                             TRUE))
5251   {
5252 #ifdef HAVE_PSI_TABLE_INTERFACE
5253     PSI_TABLE_CALL(drop_table_share)
5254       (temp_table, db, strlen(db), table_name, strlen(table_name));
5255 #endif
5256     goto err;
5257   }
5258 
5259   if (update_create_info)
5260     update_create_info_from_table(create_info, &table);
5261 
5262   name= get_canonical_filename(table.file, share.path.str, name_buff);
5263 
5264   error= table.file->ha_create(name, &table, create_info);
5265   if (error)
5266   {
5267     table.file->print_error(error, MYF(0));
5268 #ifdef HAVE_PSI_TABLE_INTERFACE
5269     PSI_TABLE_CALL(drop_table_share)
5270       (temp_table, db, strlen(db), table_name, strlen(table_name));
5271 #endif
5272   }
5273   (void) closefrm(&table, 0);
5274 err:
5275   free_table_share(&share);
5276   DBUG_RETURN(error != 0);
5277 }
5278 
5279 /**
5280   Try to discover table from engine.
5281 
5282   @note
5283     If found, write the frm file to disk.
5284 
5285   @retval
5286   -1    Table did not exists
5287   @retval
5288    0    Table created ok
5289   @retval
5290    > 0  Error, table existed but could not be created
5291 */
ha_create_table_from_engine(THD * thd,const char * db,const char * name)5292 int ha_create_table_from_engine(THD* thd, const char *db, const char *name)
5293 {
5294   int error;
5295   uchar *frmblob;
5296   size_t frmlen;
5297   char path[FN_REFLEN + 1];
5298   HA_CREATE_INFO create_info;
5299   TABLE table;
5300   TABLE_SHARE share;
5301   DBUG_ENTER("ha_create_table_from_engine");
5302   DBUG_PRINT("enter", ("name '%s'.'%s'", db, name));
5303 
5304   if ((error= ha_discover(thd, db, name, &frmblob, &frmlen)))
5305   {
5306     /* Table could not be discovered and thus not created */
5307     DBUG_RETURN(error);
5308   }
5309 
5310   /*
5311     Table exists in handler and could be discovered
5312     frmblob and frmlen are set, write the frm to disk
5313   */
5314 
5315   build_table_filename(path, sizeof(path) - 1, db, name, "", 0);
5316   // Save the frm file
5317   error= writefrm(path, frmblob, frmlen);
5318   my_free(frmblob);
5319   if (error)
5320     DBUG_RETURN(2);
5321 
5322   init_tmp_table_share(thd, &share, db, 0, name, path);
5323   if (open_table_def(thd, &share, 0))
5324   {
5325     DBUG_RETURN(3);
5326   }
5327 
5328 #ifdef HAVE_PSI_TABLE_INTERFACE
5329   /*
5330     Table discovery is not instrumented.
5331     Once discovered, the table will be opened normally,
5332     and instrumented normally.
5333   */
5334 #endif
5335 
5336   if (open_table_from_share(thd, &share, "" ,0, 0, 0, &table, FALSE))
5337   {
5338     free_table_share(&share);
5339     DBUG_RETURN(3);
5340   }
5341 
5342   update_create_info_from_table(&create_info, &table);
5343   create_info.table_options|= HA_OPTION_CREATE_FROM_ENGINE;
5344 
5345   get_canonical_filename(table.file, path, path);
5346   error=table.file->ha_create(path, &table, &create_info);
5347   (void) closefrm(&table, 1);
5348 
5349   DBUG_RETURN(error != 0);
5350 }
5351 
5352 
5353 /**
5354   Try to find a table in a storage engine.
5355 
5356   @param db   Normalized table schema name
5357   @param name Normalized table name.
5358   @param[out] exists Only valid if the function succeeded.
5359 
5360   @retval TRUE   An error is found
5361   @retval FALSE  Success, check *exists
5362 */
5363 
5364 bool
ha_check_if_table_exists(THD * thd,const char * db,const char * name,bool * exists)5365 ha_check_if_table_exists(THD* thd, const char *db, const char *name,
5366                          bool *exists)
5367 {
5368   uchar *frmblob= NULL;
5369   size_t frmlen;
5370   DBUG_ENTER("ha_check_if_table_exists");
5371 
5372   *exists= ! ha_discover(thd, db, name, &frmblob, &frmlen);
5373   if (*exists)
5374     my_free(frmblob);
5375 
5376   DBUG_RETURN(FALSE);
5377 }
5378 
5379 
5380 /**
5381   @brief Check if a given table is a user table or a valid system table or
5382          a valid system table that a SE supports.
5383 
5384   @param   hton                  Handlerton of new engine.
5385   @param   db                    Database name.
5386   @param   table_name            Table name to be checked.
5387 
5388   @retval  st_sys_tbl_chk_params::enum_status
5389 */
5390 static st_sys_tbl_chk_params::enum_status
ha_get_system_table_check_status(handlerton * hton,const char * db,const char * table_name)5391 ha_get_system_table_check_status(handlerton *hton, const char *db,
5392                                    const char *table_name)
5393 {
5394   DBUG_ENTER("ha_get_system_table_check_status");
5395   st_sys_tbl_chk_params check_params;
5396   check_params.status= st_sys_tbl_chk_params::USER_TABLE;
5397   bool is_system_database= false;
5398   const char **names;
5399   st_handler_tablename *systab;
5400 
5401   // Check if we have a system database name in the command.
5402   assert(known_system_databases != NULL);
5403   names= known_system_databases;
5404   while (names && *names)
5405   {
5406     if (strcmp(*names, db) == 0)
5407     {
5408       /* Used to compare later, will be faster */
5409       check_params.db= *names;
5410       is_system_database= true;
5411       break;
5412     }
5413     names++;
5414   }
5415   if (!is_system_database)
5416     DBUG_RETURN(st_sys_tbl_chk_params::USER_TABLE);
5417 
5418   // Check if this is SQL layer system tables.
5419   systab= mysqld_system_tables;
5420   check_params.is_sql_layer_system_table= false;
5421   while (systab && systab->db)
5422   {
5423     if (systab->db == check_params.db &&
5424         strcmp(systab->tablename, table_name) == 0)
5425     {
5426       check_params.is_sql_layer_system_table= true;
5427       break;
5428     }
5429     systab++;
5430   }
5431 
5432   // Check if this is a system table and if some engine supports it.
5433   check_params.status= check_params.is_sql_layer_system_table ?
5434     st_sys_tbl_chk_params::SYSTEM_TABLE :
5435     st_sys_tbl_chk_params::USER_TABLE;
5436   check_params.db_type= hton->db_type;
5437   check_params.table_name= table_name;
5438   plugin_foreach(NULL, check_engine_system_table_handlerton,
5439                  MYSQL_STORAGE_ENGINE_PLUGIN, &check_params);
5440 
5441   DBUG_RETURN(check_params.status);
5442 }
5443 
5444 
5445 /**
5446   @brief Check if a given table is a system table supported by a SE.
5447 
5448   @todo There is another function called is_system_table_name() used by
5449         get_table_category(), which is used to set TABLE_SHARE table_category.
5450         It checks only a subset of table name like proc, event and time*.
5451         We cannot use below function in get_table_category(),
5452         as that affects locking mechanism. If we need to
5453         unify these functions, we need to fix locking issues generated.
5454 
5455   @param   hton                  Handlerton of new engine.
5456   @param   db                    Database name.
5457   @param   table_name            Table name to be checked.
5458 
5459   @return Operation status
5460     @retval  true                If the table name is a valid system table
5461                                  that is supported by a SE.
5462 
5463     @retval  false               Not a system table.
5464 */
ha_is_supported_system_table(handlerton * hton,const char * db,const char * table_name)5465 bool ha_is_supported_system_table(handlerton *hton, const char *db,
5466                                   const char *table_name)
5467 {
5468   DBUG_ENTER("ha_is_supported_system_table");
5469   st_sys_tbl_chk_params::enum_status status=
5470     ha_get_system_table_check_status(hton, db, table_name);
5471 
5472   // It's a valid SE supported system table.
5473   DBUG_RETURN(status == st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE);
5474 }
5475 
5476 
5477 /**
5478   @brief Check if a given table is a system table that belongs
5479   to some SE or a user table.
5480 
5481   @details The primary purpose of introducing this function is to stop system
5482   tables to be created or being moved to undesired storage engines.
5483 
5484   @todo There is another function called is_system_table_name() used by
5485         get_table_category(), which is used to set TABLE_SHARE table_category.
5486         It checks only a subset of table name like proc, event and time*.
5487         We cannot use below function in get_table_category(),
5488         as that affects locking mechanism. If we need to
5489         unify these functions, we need to fix locking issues generated.
5490 
5491   @param   hton                  Handlerton of new engine.
5492   @param   db                    Database name.
5493   @param   table_name            Table name to be checked.
5494 
5495   @return Operation status
5496     @retval  true                If the table name is a valid system table
5497                                  or if its a valid user table.
5498 
5499     @retval  false               If the table name is a system table name
5500                                  and does not belong to engine specified
5501                                  in the command.
5502 */
ha_is_valid_system_or_user_table(handlerton * hton,const char * db,const char * table_name)5503 bool ha_is_valid_system_or_user_table(handlerton *hton, const char *db,
5504                                       const char *table_name)
5505 {
5506   DBUG_ENTER("ha_is_valid_system_or_user_table");
5507 
5508   st_sys_tbl_chk_params::enum_status status=
5509     ha_get_system_table_check_status(hton, db, table_name);
5510 
5511   // It's a user table or a valid SE supported system table.
5512   DBUG_RETURN(status == st_sys_tbl_chk_params::USER_TABLE ||
5513               status == st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE);
5514 }
5515 
5516 
5517 /**
5518   @brief Called for each SE to check if given db, tablename is a system table.
5519 
5520   @details The primary purpose of introducing this function is to stop system
5521   tables to be created or being moved to undesired storage engines.
5522 
5523   @param   unused  unused THD*
5524   @param   plugin  Points to specific SE.
5525   @param   arg     Is of type struct st_sys_tbl_chk_params.
5526 
5527   @note
5528     args->status   Indicates OUT param,
5529                    see struct st_sys_tbl_chk_params definition for more info.
5530 
5531   @return Operation status
5532     @retval  true  There was a match found.
5533                    This will stop doing checks with other SE's.
5534 
5535     @retval  false There was no match found.
5536                    Other SE's will be checked to find a match.
5537 */
check_engine_system_table_handlerton(THD * unused,plugin_ref plugin,void * arg)5538 static my_bool check_engine_system_table_handlerton(THD *unused,
5539                                                     plugin_ref plugin,
5540                                                     void *arg)
5541 {
5542   st_sys_tbl_chk_params *check_params= (st_sys_tbl_chk_params*) arg;
5543   handlerton *hton= plugin_data<handlerton*>(plugin);
5544 
5545   // Do we already know that the table is a system table?
5546   if (check_params->status == st_sys_tbl_chk_params::SYSTEM_TABLE)
5547   {
5548     /*
5549       If this is the same SE specified in the command, we can
5550       simply ask the SE if it supports it stop the search regardless.
5551     */
5552     if (hton->db_type == check_params->db_type)
5553     {
5554       if (hton->is_supported_system_table &&
5555           hton->is_supported_system_table(check_params->db,
5556                                        check_params->table_name,
5557                                        check_params->is_sql_layer_system_table))
5558         check_params->status=
5559           st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE;
5560       return TRUE;
5561     }
5562     /*
5563       If this is a different SE, there is no point in asking the SE
5564       since we already know it's a system table and we don't care
5565       if it is supported or not.
5566     */
5567     return FALSE;
5568   }
5569 
5570   /*
5571     We don't yet know if the table is a system table or not.
5572     We therefore must always ask the SE.
5573   */
5574   if (hton->is_supported_system_table &&
5575       hton->is_supported_system_table(check_params->db,
5576                                       check_params->table_name,
5577                                       check_params->is_sql_layer_system_table))
5578   {
5579     /*
5580       If this is the same SE specified in the command, we know it's a
5581       supported system table and can stop the search.
5582     */
5583     if (hton->db_type == check_params->db_type)
5584     {
5585       check_params->status= st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE;
5586       return TRUE;
5587     }
5588     else
5589       check_params->status= st_sys_tbl_chk_params::SYSTEM_TABLE;
5590   }
5591 
5592   return FALSE;
5593 }
5594 
5595 /*
5596   Prepare list of all known system database names
5597   current we just have 'mysql' as system database name.
5598 
5599   Later ndbcluster, innodb SE's can define some new database
5600   name which can store system tables specific to SE.
5601 */
ha_known_system_databases(void)5602 const char** ha_known_system_databases(void)
5603 {
5604   list<const char*> found_databases;
5605   const char **databases, **database;
5606 
5607   // Get mysqld system database name.
5608   found_databases.push_back((char*) mysqld_system_database);
5609 
5610   // Get system database names from every specific storage engine.
5611   plugin_foreach(NULL, system_databases_handlerton,
5612                  MYSQL_STORAGE_ENGINE_PLUGIN, &found_databases);
5613 
5614   databases= (const char **) my_once_alloc(sizeof(char *)*
5615                                      (found_databases.size()+1),
5616                                      MYF(MY_WME | MY_FAE));
5617   assert(databases != NULL);
5618 
5619   list<const char*>::iterator it;
5620   database= databases;
5621   for (it= found_databases.begin(); it != found_databases.end(); it++)
5622     *database++= *it;
5623   *database= 0; // Last element.
5624 
5625   return databases;
5626 }
5627 
5628 /**
5629   @brief Fetch system database name specific to SE.
5630 
5631   @details This function is invoked by plugin_foreach() from
5632            ha_known_system_databases(), for each storage engine.
5633 */
system_databases_handlerton(THD * unused,plugin_ref plugin,void * arg)5634 static my_bool system_databases_handlerton(THD *unused, plugin_ref plugin,
5635                                            void *arg)
5636 {
5637   list<const char*> *found_databases= (list<const char*> *) arg;
5638   const char *db;
5639 
5640   handlerton *hton= plugin_data<handlerton*>(plugin);
5641   if (hton->system_database)
5642   {
5643     db= hton->system_database();
5644     if (db)
5645       found_databases->push_back(db);
5646   }
5647 
5648   return FALSE;
5649 }
5650 
init()5651 void st_ha_check_opt::init()
5652 {
5653   flags= sql_flags= 0;
5654 }
5655 
5656 
5657 /*****************************************************************************
5658   Key cache handling.
5659 
5660   This code is only relevant for ISAM/MyISAM tables
5661 
5662   key_cache->cache may be 0 only in the case where a key cache is not
5663   initialized or when we where not able to init the key cache in a previous
5664   call to ha_init_key_cache() (probably out of memory)
5665 *****************************************************************************/
5666 
5667 /**
5668   Init a key cache if it has not been initied before.
5669 */
ha_init_key_cache(const char * name,KEY_CACHE * key_cache)5670 int ha_init_key_cache(const char *name, KEY_CACHE *key_cache)
5671 {
5672   DBUG_ENTER("ha_init_key_cache");
5673 
5674   if (!key_cache->key_cache_inited)
5675   {
5676     mysql_mutex_lock(&LOCK_global_system_variables);
5677     size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5678     ulonglong tmp_block_size= key_cache->param_block_size;
5679     ulonglong division_limit= key_cache->param_division_limit;
5680     ulonglong age_threshold=  key_cache->param_age_threshold;
5681     mysql_mutex_unlock(&LOCK_global_system_variables);
5682     DBUG_RETURN(!init_key_cache(key_cache,
5683 				tmp_block_size,
5684 				tmp_buff_size,
5685 				division_limit, age_threshold));
5686   }
5687   DBUG_RETURN(0);
5688 }
5689 
5690 
5691 /**
5692   Resize key cache.
5693 */
ha_resize_key_cache(KEY_CACHE * key_cache)5694 int ha_resize_key_cache(KEY_CACHE *key_cache)
5695 {
5696   DBUG_ENTER("ha_resize_key_cache");
5697 
5698   if (key_cache->key_cache_inited)
5699   {
5700     mysql_mutex_lock(&LOCK_global_system_variables);
5701     size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5702     ulonglong tmp_block_size= key_cache->param_block_size;
5703     ulonglong division_limit= key_cache->param_division_limit;
5704     ulonglong age_threshold=  key_cache->param_age_threshold;
5705     mysql_mutex_unlock(&LOCK_global_system_variables);
5706     const int retval= resize_key_cache(key_cache,
5707                                        keycache_thread_var(),
5708                                        tmp_block_size,
5709                                        tmp_buff_size,
5710                                        division_limit, age_threshold);
5711     DBUG_RETURN(!retval);
5712   }
5713   DBUG_RETURN(0);
5714 }
5715 
5716 
5717 /**
5718   Move all tables from one key cache to another one.
5719 */
ha_change_key_cache(KEY_CACHE * old_key_cache,KEY_CACHE * new_key_cache)5720 int ha_change_key_cache(KEY_CACHE *old_key_cache,
5721 			KEY_CACHE *new_key_cache)
5722 {
5723   mi_change_key_cache(old_key_cache, new_key_cache);
5724   return 0;
5725 }
5726 
5727 
5728 /**
5729   Try to discover one table from handler(s).
5730 
5731   @retval
5732     -1   Table did not exists
5733   @retval
5734     0   OK. In this case *frmblob and *frmlen are set
5735   @retval
5736     >0   error.  frmblob and frmlen may not be set
5737 */
5738 struct st_discover_args
5739 {
5740   const char *db;
5741   const char *name;
5742   uchar **frmblob;
5743   size_t *frmlen;
5744 };
5745 
discover_handlerton(THD * thd,plugin_ref plugin,void * arg)5746 static my_bool discover_handlerton(THD *thd, plugin_ref plugin,
5747                                    void *arg)
5748 {
5749   st_discover_args *vargs= (st_discover_args *)arg;
5750   handlerton *hton= plugin_data<handlerton*>(plugin);
5751   if (hton->state == SHOW_OPTION_YES && hton->discover &&
5752       (!(hton->discover(hton, thd, vargs->db, vargs->name,
5753                         vargs->frmblob,
5754                         vargs->frmlen))))
5755     return TRUE;
5756 
5757   return FALSE;
5758 }
5759 
ha_discover(THD * thd,const char * db,const char * name,uchar ** frmblob,size_t * frmlen)5760 int ha_discover(THD *thd, const char *db, const char *name,
5761 		uchar **frmblob, size_t *frmlen)
5762 {
5763   int error= -1; // Table does not exist in any handler
5764   DBUG_ENTER("ha_discover");
5765   DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5766   st_discover_args args= {db, name, frmblob, frmlen};
5767 
5768   if (is_prefix(name,tmp_file_prefix)) /* skip temporary tables */
5769     DBUG_RETURN(error);
5770 
5771   if (plugin_foreach(thd, discover_handlerton,
5772                  MYSQL_STORAGE_ENGINE_PLUGIN, &args))
5773     error= 0;
5774 
5775   if (!error)
5776   {
5777     assert(!thd->status_var_aggregated);
5778     thd->status_var.ha_discover_count++;
5779   }
5780   DBUG_RETURN(error);
5781 }
5782 
5783 
5784 /**
5785   Call this function in order to give the handler the possiblity
5786   to ask engine if there are any new tables that should be written to disk
5787   or any dropped tables that need to be removed from disk
5788 */
5789 struct st_find_files_args
5790 {
5791   const char *db;
5792   const char *path;
5793   const char *wild;
5794   bool dir;
5795   List<LEX_STRING> *files;
5796 };
5797 
find_files_handlerton(THD * thd,plugin_ref plugin,void * arg)5798 static my_bool find_files_handlerton(THD *thd, plugin_ref plugin,
5799                                    void *arg)
5800 {
5801   st_find_files_args *vargs= (st_find_files_args *)arg;
5802   handlerton *hton= plugin_data<handlerton*>(plugin);
5803 
5804 
5805   if (hton->state == SHOW_OPTION_YES && hton->find_files)
5806       if (hton->find_files(hton, thd, vargs->db, vargs->path, vargs->wild,
5807                           vargs->dir, vargs->files))
5808         return TRUE;
5809 
5810   return FALSE;
5811 }
5812 
5813 int
ha_find_files(THD * thd,const char * db,const char * path,const char * wild,bool dir,List<LEX_STRING> * files)5814 ha_find_files(THD *thd,const char *db,const char *path,
5815 	      const char *wild, bool dir, List<LEX_STRING> *files)
5816 {
5817   int error= 0;
5818   DBUG_ENTER("ha_find_files");
5819   DBUG_PRINT("enter", ("db: '%s'  path: '%s'  wild: '%s'  dir: %d",
5820 		       db, path, wild ? wild : "NULL", dir));
5821   st_find_files_args args= {db, path, wild, dir, files};
5822 
5823   plugin_foreach(thd, find_files_handlerton,
5824                  MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5825   /* The return value is not currently used */
5826   DBUG_RETURN(error);
5827 }
5828 
5829 /**
5830   Ask handler if the table exists in engine.
5831   @retval
5832     HA_ERR_NO_SUCH_TABLE     Table does not exist
5833   @retval
5834     HA_ERR_TABLE_EXIST       Table exists
5835   @retval
5836     \#                  Error code
5837 */
5838 struct st_table_exists_in_engine_args
5839 {
5840   const char *db;
5841   const char *name;
5842   int err;
5843 };
5844 
table_exists_in_engine_handlerton(THD * thd,plugin_ref plugin,void * arg)5845 static my_bool table_exists_in_engine_handlerton(THD *thd, plugin_ref plugin,
5846                                    void *arg)
5847 {
5848   st_table_exists_in_engine_args *vargs= (st_table_exists_in_engine_args *)arg;
5849   handlerton *hton= plugin_data<handlerton*>(plugin);
5850 
5851   int err= HA_ERR_NO_SUCH_TABLE;
5852 
5853   if (hton->state == SHOW_OPTION_YES && hton->table_exists_in_engine)
5854     err = hton->table_exists_in_engine(hton, thd, vargs->db, vargs->name);
5855 
5856   vargs->err = err;
5857   if (vargs->err == HA_ERR_TABLE_EXIST)
5858     return TRUE;
5859 
5860   return FALSE;
5861 }
5862 
ha_table_exists_in_engine(THD * thd,const char * db,const char * name)5863 int ha_table_exists_in_engine(THD* thd, const char* db, const char* name)
5864 {
5865   DBUG_ENTER("ha_table_exists_in_engine");
5866   DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5867   st_table_exists_in_engine_args args= {db, name, HA_ERR_NO_SUCH_TABLE};
5868   plugin_foreach(thd, table_exists_in_engine_handlerton,
5869                  MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5870   DBUG_PRINT("exit", ("error: %d", args.err));
5871   DBUG_RETURN(args.err);
5872 }
5873 
5874 /**
5875   Prepare (sub-) sequences of joins in this statement
5876   which may be pushed to each storage engine for execution.
5877 */
5878 struct st_make_pushed_join_args
5879 {
5880   const AQP::Join_plan* plan; // Query plan provided by optimizer
5881   int err;                    // Error code to return.
5882 };
5883 
make_pushed_join_handlerton(THD * thd,plugin_ref plugin,void * arg)5884 static my_bool make_pushed_join_handlerton(THD *thd, plugin_ref plugin,
5885                                    void *arg)
5886 {
5887   st_make_pushed_join_args *vargs= (st_make_pushed_join_args *)arg;
5888   handlerton *hton= plugin_data<handlerton*>(plugin);
5889 
5890   if (hton && hton->make_pushed_join)
5891   {
5892     const int error= hton->make_pushed_join(hton, thd, vargs->plan);
5893     if (unlikely(error))
5894     {
5895       vargs->err = error;
5896       return TRUE;
5897     }
5898   }
5899   return FALSE;
5900 }
5901 
ha_make_pushed_joins(THD * thd,const AQP::Join_plan * plan)5902 int ha_make_pushed_joins(THD *thd, const AQP::Join_plan* plan)
5903 {
5904   DBUG_ENTER("ha_make_pushed_joins");
5905   st_make_pushed_join_args args= {plan, 0};
5906   plugin_foreach(thd, make_pushed_join_handlerton,
5907                  MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5908   DBUG_PRINT("exit", ("error: %d", args.err));
5909   DBUG_RETURN(args.err);
5910 }
5911 
5912 /*
5913   TODO: change this into a dynamic struct
5914   List<handlerton> does not work as
5915   1. binlog_end is called when MEM_ROOT is gone
5916   2. cannot work with thd MEM_ROOT as memory should be freed
5917 */
5918 #define MAX_HTON_LIST_ST 63
5919 struct hton_list_st
5920 {
5921   handlerton *hton[MAX_HTON_LIST_ST];
5922   uint sz;
5923 };
5924 
5925 struct binlog_func_st
5926 {
5927   enum_binlog_func fn;
5928   void *arg;
5929 };
5930 
5931 /** @brief
5932   Listing handlertons first to avoid recursive calls and deadlock
5933 */
binlog_func_list(THD * thd,plugin_ref plugin,void * arg)5934 static my_bool binlog_func_list(THD *thd, plugin_ref plugin, void *arg)
5935 {
5936   hton_list_st *hton_list= (hton_list_st *)arg;
5937   handlerton *hton= plugin_data<handlerton*>(plugin);
5938   if (hton->state == SHOW_OPTION_YES && hton->binlog_func)
5939   {
5940     uint sz= hton_list->sz;
5941     if (sz == MAX_HTON_LIST_ST-1)
5942     {
5943       /* list full */
5944       return FALSE;
5945     }
5946     hton_list->hton[sz]= hton;
5947     hton_list->sz= sz+1;
5948   }
5949   return FALSE;
5950 }
5951 
binlog_func_foreach(THD * thd,binlog_func_st * bfn)5952 static my_bool binlog_func_foreach(THD *thd, binlog_func_st *bfn)
5953 {
5954   hton_list_st hton_list;
5955   uint i, sz;
5956 
5957   hton_list.sz= 0;
5958   plugin_foreach(thd, binlog_func_list,
5959                  MYSQL_STORAGE_ENGINE_PLUGIN, &hton_list);
5960 
5961   for (i= 0, sz= hton_list.sz; i < sz ; i++)
5962     hton_list.hton[i]->binlog_func(hton_list.hton[i], thd, bfn->fn, bfn->arg);
5963   return FALSE;
5964 }
5965 
5966 
ha_reset_logs(THD * thd)5967 int ha_reset_logs(THD *thd)
5968 {
5969   binlog_func_st bfn= {BFN_RESET_LOGS, 0};
5970   binlog_func_foreach(thd, &bfn);
5971   return 0;
5972 }
5973 
ha_reset_slave(THD * thd)5974 void ha_reset_slave(THD* thd)
5975 {
5976   binlog_func_st bfn= {BFN_RESET_SLAVE, 0};
5977   binlog_func_foreach(thd, &bfn);
5978 }
5979 
ha_binlog_wait(THD * thd)5980 void ha_binlog_wait(THD* thd)
5981 {
5982   binlog_func_st bfn= {BFN_BINLOG_WAIT, 0};
5983   binlog_func_foreach(thd, &bfn);
5984 }
5985 
ha_binlog_index_purge_file(THD * thd,const char * file)5986 int ha_binlog_index_purge_file(THD *thd, const char *file)
5987 {
5988   binlog_func_st bfn= {BFN_BINLOG_PURGE_FILE, (void *)file};
5989   binlog_func_foreach(thd, &bfn);
5990   return 0;
5991 }
5992 
5993 struct binlog_log_query_st
5994 {
5995   enum_binlog_command binlog_command;
5996   const char *query;
5997   size_t query_length;
5998   const char *db;
5999   const char *table_name;
6000 };
6001 
binlog_log_query_handlerton2(THD * thd,handlerton * hton,void * args)6002 static my_bool binlog_log_query_handlerton2(THD *thd,
6003                                             handlerton *hton,
6004                                             void *args)
6005 {
6006   struct binlog_log_query_st *b= (struct binlog_log_query_st*)args;
6007   if (hton->state == SHOW_OPTION_YES && hton->binlog_log_query)
6008     hton->binlog_log_query(hton, thd,
6009                            b->binlog_command,
6010                            b->query,
6011                            b->query_length,
6012                            b->db,
6013                            b->table_name);
6014   return FALSE;
6015 }
6016 
binlog_log_query_handlerton(THD * thd,plugin_ref plugin,void * args)6017 static my_bool binlog_log_query_handlerton(THD *thd,
6018                                            plugin_ref plugin,
6019                                            void *args)
6020 {
6021   return binlog_log_query_handlerton2(thd,
6022                                       plugin_data<handlerton*>(plugin), args);
6023 }
6024 
ha_binlog_log_query(THD * thd,handlerton * hton,enum_binlog_command binlog_command,const char * query,size_t query_length,const char * db,const char * table_name)6025 void ha_binlog_log_query(THD *thd, handlerton *hton,
6026                          enum_binlog_command binlog_command,
6027                          const char *query, size_t query_length,
6028                          const char *db, const char *table_name)
6029 {
6030   struct binlog_log_query_st b;
6031   b.binlog_command= binlog_command;
6032   b.query= query;
6033   b.query_length= query_length;
6034   b.db= db;
6035   b.table_name= table_name;
6036   if (hton == 0)
6037     plugin_foreach(thd, binlog_log_query_handlerton,
6038                    MYSQL_STORAGE_ENGINE_PLUGIN, &b);
6039   else
6040     binlog_log_query_handlerton2(thd, hton, &b);
6041 }
6042 
ha_binlog_end(THD * thd)6043 int ha_binlog_end(THD* thd)
6044 {
6045   binlog_func_st bfn= {BFN_BINLOG_END, 0};
6046   binlog_func_foreach(thd, &bfn);
6047   return 0;
6048 }
6049 
6050 /**
6051   Calculate cost of 'index only' scan for given index and number of records
6052 
6053   @param keynr    Index number
6054   @param records  Estimated number of records to be retrieved
6055 
6056   @note
6057     It is assumed that we will read trough the whole key range and that all
6058     key blocks are half full (normally things are much better). It is also
6059     assumed that each time we read the next key from the index, the handler
6060     performs a random seek, thus the cost is proportional to the number of
6061     blocks read.
6062 
6063   @return
6064     Estimated cost of 'index only' scan
6065 */
6066 
index_only_read_time(uint keynr,double records)6067 double handler::index_only_read_time(uint keynr, double records)
6068 {
6069   double read_time;
6070   uint keys_per_block= (stats.block_size/2/
6071                         (table_share->key_info[keynr].key_length + ref_length) +
6072                         1);
6073   read_time=((double) (records + keys_per_block-1) /
6074              (double) keys_per_block);
6075   return read_time;
6076 }
6077 
6078 
table_in_memory_estimate() const6079 double handler::table_in_memory_estimate() const
6080 {
6081   assert(stats.table_in_mem_estimate == IN_MEMORY_ESTIMATE_UNKNOWN ||
6082          (stats.table_in_mem_estimate >= 0.0 &&
6083           stats.table_in_mem_estimate <= 1.0));
6084 
6085   /*
6086     If the storage engine has supplied information about how much of the
6087     table that is currently in a memory buffer, then use this estimate.
6088   */
6089   if (stats.table_in_mem_estimate != IN_MEMORY_ESTIMATE_UNKNOWN)
6090     return stats.table_in_mem_estimate;
6091 
6092   /*
6093     The storage engine has not provided any information about how much of
6094     this index is in memory, use an heuristic to produce an estimate.
6095   */
6096   return estimate_in_memory_buffer(stats.data_file_length);
6097 }
6098 
6099 
index_in_memory_estimate(uint keyno) const6100 double handler::index_in_memory_estimate(uint keyno) const
6101 {
6102   const KEY *key= &table->key_info[keyno];
6103 
6104   /*
6105     If the storage engine has supplied information about how much of the
6106     index that is currently in a memory buffer, then use this estimate.
6107   */
6108   const double est= key->in_memory_estimate();
6109   if (est != IN_MEMORY_ESTIMATE_UNKNOWN)
6110     return est;
6111 
6112   /*
6113     The storage engine has not provided any information about how much of
6114     this index is in memory, use an heuristic to produce an estimate.
6115   */
6116   ulonglong file_length;
6117 
6118   /*
6119     If the index is a clustered primary index, then use the data file
6120     size as estimate for how large the index is.
6121   */
6122   if (keyno == table->s->primary_key && primary_key_is_clustered())
6123     file_length= stats.data_file_length;
6124   else
6125     file_length= stats.index_file_length;
6126 
6127   return estimate_in_memory_buffer(file_length);
6128 }
6129 
6130 
estimate_in_memory_buffer(ulonglong table_index_size) const6131 double handler::estimate_in_memory_buffer(ulonglong table_index_size) const
6132 {
6133   /*
6134     The storage engine has not provided any information about how much of
6135     the table/index is in memory. In this case we use a heuristic:
6136 
6137     - if the size of the table/index is less than 20 percent (pick any
6138       number) of the memory buffer, then the entire table/index is likely in
6139       memory.
6140     - if the size of the table/index is larger than the memory buffer, then
6141       assume nothing of the table/index is in memory.
6142     - if the size of the table/index is larger than 20 percent but less than
6143       the memory buffer size, then use a linear function of the table/index
6144       size that goes from 1.0 to 0.0.
6145   */
6146 
6147   /*
6148     If the storage engine has information about the size of its
6149     memory buffer, then use this. Otherwise, assume that at least 100 MB
6150     of data can be chached in memory.
6151   */
6152   longlong memory_buf_size= get_memory_buffer_size();
6153   if (memory_buf_size <= 0)
6154     memory_buf_size= 100 * 1024 * 1024;    // 100 MB
6155 
6156   /*
6157     Upper limit for the relative size of a table to be considered
6158     entirely available in a memory buffer. If the actual table size is
6159     less than this we assume it is complete cached in a memory buffer.
6160   */
6161   const double table_index_in_memory_limit= 0.2;
6162 
6163   /*
6164     Estimate for how much of the total memory buffer this table/index
6165     can occupy.
6166   */
6167   const double percent_of_mem= static_cast<double>(table_index_size) /
6168     memory_buf_size;
6169 
6170   double in_mem_est;
6171 
6172   if (percent_of_mem < table_index_in_memory_limit) // Less than 20 percent
6173     in_mem_est= 1.0;
6174   else if (percent_of_mem > 1.0)                // Larger than buffer
6175     in_mem_est= 0.0;
6176   else
6177   {
6178     /*
6179       The size of the table/index is larger than
6180       "table_index_in_memory_limit" * "memory_buf_size" but less than
6181       the total size of the memory buffer.
6182     */
6183     in_mem_est= 1.0 - (percent_of_mem - table_index_in_memory_limit) /
6184       (1.0 - table_index_in_memory_limit);
6185   }
6186   assert(in_mem_est >= 0.0 && in_mem_est <= 1.0);
6187 
6188   return in_mem_est;
6189 }
6190 
6191 
table_scan_cost()6192 Cost_estimate handler::table_scan_cost()
6193 {
6194   /*
6195     This function returns a Cost_estimate object. The function should be
6196     implemented in a way that allows the compiler to use "return value
6197     optimization" to avoid creating the temporary object for the return value
6198     and use of the copy constructor.
6199   */
6200 
6201   const double io_cost= scan_time() * table->cost_model()->page_read_cost(1.0);
6202   Cost_estimate cost;
6203   cost.add_io(io_cost);
6204   return cost;
6205 }
6206 
6207 
index_scan_cost(uint index,double ranges,double rows)6208 Cost_estimate handler::index_scan_cost(uint index, double ranges, double rows)
6209 {
6210   /*
6211     This function returns a Cost_estimate object. The function should be
6212     implemented in a way that allows the compiler to use "return value
6213     optimization" to avoid creating the temporary object for the return value
6214     and use of the copy constructor.
6215   */
6216 
6217   assert(ranges >= 0.0);
6218   assert(rows >= 0.0);
6219 
6220   const double io_cost= index_only_read_time(index, rows) *
6221     table->cost_model()->page_read_cost_index(index, 1.0);
6222   Cost_estimate cost;
6223   cost.add_io(io_cost);
6224   return cost;
6225 }
6226 
6227 
read_cost(uint index,double ranges,double rows)6228 Cost_estimate handler::read_cost(uint index, double ranges, double rows)
6229 {
6230   /*
6231     This function returns a Cost_estimate object. The function should be
6232     implemented in a way that allows the compiler to use "return value
6233     optimization" to avoid creating the temporary object for the return value
6234     and use of the copy constructor.
6235   */
6236 
6237   assert(ranges >= 0.0);
6238   assert(rows >= 0.0);
6239 
6240   const double io_cost= read_time(index, static_cast<uint>(ranges),
6241                                   static_cast<ha_rows>(rows)) *
6242                         table->cost_model()->page_read_cost(1.0);
6243   Cost_estimate cost;
6244   cost.add_io(io_cost);
6245   return cost;
6246 }
6247 
6248 
6249 /**
6250   Check if key has partially-covered columns
6251 
6252   We can't use DS-MRR to perform range scans when the ranges are over
6253   partially-covered keys, because we'll not have full key part values
6254   (we'll have their prefixes from the index) and will not be able to check
6255   if we've reached the end the range.
6256 
6257   @param keyno  Key to check
6258 
6259   @todo
6260     Allow use of DS-MRR in cases where the index has partially-covered
6261     components but they are not used for scanning.
6262 
6263   @retval TRUE   Yes
6264   @retval FALSE  No
6265 */
6266 
key_uses_partial_cols(TABLE * table,uint keyno)6267 bool key_uses_partial_cols(TABLE *table, uint keyno)
6268 {
6269   KEY_PART_INFO *kp= table->key_info[keyno].key_part;
6270   KEY_PART_INFO *kp_end= kp + table->key_info[keyno].user_defined_key_parts;
6271   for (; kp != kp_end; kp++)
6272   {
6273     if (!kp->field->part_of_key.is_set(keyno))
6274       return TRUE;
6275   }
6276   return FALSE;
6277 }
6278 
6279 /****************************************************************************
6280  * Default MRR implementation (MRR to non-MRR converter)
6281  ***************************************************************************/
6282 
6283 /**
6284   Get cost and other information about MRR scan over a known list of ranges
6285 
6286   Calculate estimated cost and other information about an MRR scan for given
6287   sequence of ranges.
6288 
6289   @param keyno           Index number
6290   @param seq             Range sequence to be traversed
6291   @param seq_init_param  First parameter for seq->init()
6292   @param n_ranges_arg    Number of ranges in the sequence, or 0 if the caller
6293                          can't efficiently determine it
6294   @param bufsz[in,out]   IN:  Size of the buffer available for use
6295                          OUT: Size of the buffer that is expected to be actually
6296                               used, or 0 if buffer is not needed.
6297   @param flags[in,out]   A combination of HA_MRR_* flags
6298   @param cost[out]       Estimated cost of MRR access
6299 
6300   @note
6301     This method (or an overriding one in a derived class) must check for
6302     thd->killed and return HA_POS_ERROR if it is not zero. This is required
6303     for a user to be able to interrupt the calculation by killing the
6304     connection/query.
6305 
6306   @retval
6307     HA_POS_ERROR  Error or the engine is unable to perform the requested
6308                   scan. Values of OUT parameters are undefined.
6309   @retval
6310     other         OK, *cost contains cost of the scan, *bufsz and *flags
6311                   contain scan parameters.
6312 */
6313 
6314 ha_rows
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges_arg,uint * bufsz,uint * flags,Cost_estimate * cost)6315 handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
6316                                      void *seq_init_param, uint n_ranges_arg,
6317                                      uint *bufsz, uint *flags,
6318                                      Cost_estimate *cost)
6319 {
6320   KEY_MULTI_RANGE range;
6321   range_seq_t seq_it;
6322   ha_rows rows, total_rows= 0;
6323   uint n_ranges=0;
6324   THD *thd= current_thd;
6325 
6326   /* Default MRR implementation doesn't need buffer */
6327   *bufsz= 0;
6328 
6329   DBUG_EXECUTE_IF("bug13822652_2", thd->killed= THD::KILL_QUERY;);
6330 
6331   seq_it= seq->init(seq_init_param, n_ranges, *flags);
6332   while (!seq->next(seq_it, &range))
6333   {
6334     if (unlikely(thd->killed != 0))
6335       return HA_POS_ERROR;
6336 
6337     n_ranges++;
6338     key_range *min_endp, *max_endp;
6339     if (range.range_flag & GEOM_FLAG)
6340     {
6341       min_endp= &range.start_key;
6342       max_endp= NULL;
6343     }
6344     else
6345     {
6346       min_endp= range.start_key.length? &range.start_key : NULL;
6347       max_endp= range.end_key.length? &range.end_key : NULL;
6348     }
6349     /*
6350       Get the number of rows in the range. This is done by calling
6351       records_in_range() unless:
6352 
6353         1) The range is an equality range and the index is unique.
6354            There cannot be more than one matching row, so 1 is
6355            assumed. Note that it is possible that the correct number
6356            is actually 0, so the row estimate may be too high in this
6357            case. Also note: ranges of the form "x IS NULL" may have more
6358            than 1 mathing row so records_in_range() is called for these.
6359         2) a) The range is an equality range but the index is either
6360               not unique or all of the keyparts are not used.
6361            b) The user has requested that index statistics should be used
6362               for equality ranges to avoid the incurred overhead of
6363               index dives in records_in_range().
6364            c) Index statistics is available.
6365            Ranges of the form "x IS NULL" will not use index statistics
6366            because the number of rows with this value are likely to be
6367            very different than the values in the index statistics.
6368     */
6369     int keyparts_used= 0;
6370     if ((range.range_flag & UNIQUE_RANGE) &&                        // 1)
6371         !(range.range_flag & NULL_RANGE))
6372       rows= 1; /* there can be at most one row */
6373     else if ((range.range_flag & EQ_RANGE) &&                       // 2a)
6374              (range.range_flag & USE_INDEX_STATISTICS) &&           // 2b)
6375              (keyparts_used= my_count_bits(range.start_key.keypart_map)) &&
6376              table->
6377                key_info[keyno].has_records_per_key(keyparts_used-1) && // 2c)
6378              !(range.range_flag & NULL_RANGE))
6379     {
6380       rows= static_cast<ha_rows>(
6381         table->key_info[keyno].records_per_key(keyparts_used - 1));
6382     }
6383     else
6384     {
6385       DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););
6386       assert(min_endp || max_endp);
6387       if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
6388                                                         max_endp)))
6389       {
6390         /* Can't scan one range => can't do MRR scan at all */
6391         total_rows= HA_POS_ERROR;
6392         break;
6393       }
6394     }
6395     total_rows += rows;
6396   }
6397 
6398   if (total_rows != HA_POS_ERROR)
6399   {
6400     const Cost_model_table *const cost_model= table->cost_model();
6401 
6402     /* The following calculation is the same as in multi_range_read_info(): */
6403     *flags|= HA_MRR_USE_DEFAULT_IMPL;
6404     *flags|= HA_MRR_SUPPORT_SORTED;
6405 
6406     assert(cost->is_zero());
6407     if (*flags & HA_MRR_INDEX_ONLY)
6408       *cost= index_scan_cost(keyno, static_cast<double>(n_ranges),
6409                              static_cast<double>(total_rows));
6410     else
6411       *cost= read_cost(keyno, static_cast<double>(n_ranges),
6412                        static_cast<double>(total_rows));
6413     cost->add_cpu(cost_model->row_evaluate_cost(
6414       static_cast<double>(total_rows)) + 0.01);
6415   }
6416   return total_rows;
6417 }
6418 
6419 
6420 /**
6421   Get cost and other information about MRR scan over some sequence of ranges
6422 
6423   Calculate estimated cost and other information about an MRR scan for some
6424   sequence of ranges.
6425 
6426   The ranges themselves will be known only at execution phase. When this
6427   function is called we only know number of ranges and a (rough) E(#records)
6428   within those ranges.
6429 
6430   Currently this function is only called for "n-keypart singlepoint" ranges,
6431   i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
6432 
6433   The flags parameter is a combination of those flags: HA_MRR_SORTED,
6434   HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
6435 
6436   @param keyno           Index number
6437   @param n_ranges        Estimated number of ranges (i.e. intervals) in the
6438                          range sequence.
6439   @param n_rows          Estimated total number of records contained within all
6440                          of the ranges
6441   @param bufsz[in,out]   IN:  Size of the buffer available for use
6442                          OUT: Size of the buffer that will be actually used, or
6443                               0 if buffer is not needed.
6444   @param flags[in,out]   A combination of HA_MRR_* flags
6445   @param cost[out]       Estimated cost of MRR access
6446 
6447   @retval
6448     0     OK, *cost contains cost of the scan, *bufsz and *flags contain scan
6449           parameters.
6450   @retval
6451     other Error or can't perform the requested scan
6452 */
6453 
multi_range_read_info(uint keyno,uint n_ranges,uint n_rows,uint * bufsz,uint * flags,Cost_estimate * cost)6454 ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
6455                                        uint *bufsz, uint *flags,
6456                                        Cost_estimate *cost)
6457 {
6458   *bufsz= 0; /* Default implementation doesn't need a buffer */
6459 
6460   *flags|= HA_MRR_USE_DEFAULT_IMPL;
6461   *flags|= HA_MRR_SUPPORT_SORTED;
6462 
6463   assert(cost->is_zero());
6464 
6465   /* Produce the same cost as non-MRR code does */
6466   if (*flags & HA_MRR_INDEX_ONLY)
6467     *cost= index_scan_cost(keyno, n_ranges, n_rows);
6468   else
6469     *cost= read_cost(keyno, n_ranges, n_rows);
6470   return 0;
6471 }
6472 
6473 
6474 /**
6475   Initialize the MRR scan
6476 
6477   Initialize the MRR scan. This function may do heavyweight scan
6478   initialization like row prefetching/sorting/etc (NOTE: but better not do
6479   it here as we may not need it, e.g. if we never satisfy WHERE clause on
6480   previous tables. For many implementations it would be natural to do such
6481   initializations in the first multi_read_range_next() call)
6482 
6483   mode is a combination of the following flags: HA_MRR_SORTED,
6484   HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION
6485 
6486   @param seq             Range sequence to be traversed
6487   @param seq_init_param  First parameter for seq->init()
6488   @param n_ranges        Number of ranges in the sequence
6489   @param mode            Flags, see the description section for the details
6490   @param buf             INOUT: memory buffer to be used
6491 
6492   @note
6493     One must have called index_init() before calling this function. Several
6494     multi_range_read_init() calls may be made in course of one query.
6495 
6496     Until WL#2623 is done (see its text, section 3.2), the following will
6497     also hold:
6498     The caller will guarantee that if "seq->init == mrr_ranges_array_init"
6499     then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
6500     This property will only be used by NDB handler until WL#2623 is done.
6501 
6502     Buffer memory management is done according to the following scenario:
6503     The caller allocates the buffer and provides it to the callee by filling
6504     the members of HANDLER_BUFFER structure.
6505     The callee consumes all or some fraction of the provided buffer space, and
6506     sets the HANDLER_BUFFER members accordingly.
6507     The callee may use the buffer memory until the next multi_range_read_init()
6508     call is made, all records have been read, or until index_end() call is
6509     made, whichever comes first.
6510 
6511   @retval 0  OK
6512   @retval 1  Error
6513 */
6514 
6515 int
multi_range_read_init(RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)6516 handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
6517                                uint n_ranges, uint mode, HANDLER_BUFFER *buf)
6518 {
6519   DBUG_ENTER("handler::multi_range_read_init");
6520   mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
6521   mrr_funcs= *seq_funcs;
6522   mrr_is_output_sorted= MY_TEST(mode & HA_MRR_SORTED);
6523   mrr_have_range= FALSE;
6524   DBUG_RETURN(0);
6525 }
6526 
6527 
6528 /**
6529   Get next record in MRR scan
6530 
6531   Default MRR implementation: read the next record
6532 
6533   @param range_info  OUT  Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
6534                           Otherwise, the opaque value associated with the range
6535                           that contains the returned record.
6536 
6537   @retval 0      OK
6538   @retval other  Error code
6539 */
6540 
multi_range_read_next(char ** range_info)6541 int handler::multi_range_read_next(char **range_info)
6542 {
6543   int result= HA_ERR_END_OF_FILE;
6544   int range_res;
6545   DBUG_ENTER("handler::multi_range_read_next");
6546 
6547   // Set status for the need to update generated fields
6548   m_update_generated_read_fields= table->has_gcol();
6549 
6550   if (!mrr_have_range)
6551   {
6552     mrr_have_range= TRUE;
6553     goto start;
6554   }
6555 
6556   do
6557   {
6558     /* Save a call if there can be only one row in range. */
6559     if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
6560     {
6561       result= read_range_next();
6562       /* On success or non-EOF errors jump to the end. */
6563       if (result != HA_ERR_END_OF_FILE)
6564         break;
6565     }
6566     else
6567     {
6568       if (was_semi_consistent_read())
6569         goto scan_it_again;
6570     }
6571 
6572 start:
6573     /* Try the next range(s) until one matches a record. */
6574     while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
6575     {
6576 scan_it_again:
6577       result= read_range_first(mrr_cur_range.start_key.keypart_map ?
6578                                  &mrr_cur_range.start_key : 0,
6579                                mrr_cur_range.end_key.keypart_map ?
6580                                  &mrr_cur_range.end_key : 0,
6581                                MY_TEST(mrr_cur_range.range_flag & EQ_RANGE),
6582                                mrr_is_output_sorted);
6583       if (result != HA_ERR_END_OF_FILE)
6584         break;
6585     }
6586   }
6587   while ((result == HA_ERR_END_OF_FILE) && !range_res);
6588 
6589   *range_info= mrr_cur_range.ptr;
6590 
6591   /* Update virtual generated fields */
6592   if (!result && m_update_generated_read_fields)
6593   {
6594     result= update_generated_read_fields(table->record[0], table, active_index);
6595     m_update_generated_read_fields= false;
6596   }
6597 
6598   DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
6599   DBUG_RETURN(result);
6600 }
6601 
6602 
6603 /****************************************************************************
6604  * DS-MRR implementation
6605  ***************************************************************************/
6606 
6607 /**
6608   DS-MRR: Initialize and start MRR scan
6609 
6610   Initialize and start the MRR scan. Depending on the mode parameter, this
6611   may use default or DS-MRR implementation.
6612 
6613   The DS-MRR implementation will use a second handler object (h2) for
6614   doing scan on the index:
6615   - on the first call to this function the h2 handler will be created
6616     and h2 will be opened using the same index as the main handler
6617     is set to use. The index scan on the main index will be closed
6618     and it will be re-opened to read records from the table using either
6619     no key or the primary key. The h2 handler will be deleted when
6620     reset() is called (which should happen on the end of the statement).
6621   - when dsmrr_close() is called the index scan on h2 is closed.
6622   - on following calls to this function one of the following must be valid:
6623     a. if dsmrr_close has been called:
6624        the main handler (h) must be open on an index, h2 will be opened
6625        using this index, and the index on h will be closed and
6626        h will be re-opened to read reads from the table using either
6627        no key or the primary key.
6628     b. dsmrr_close has not been called:
6629        h2 will already be open, the main handler h must be set up
6630        to read records from the table (handler->inited is RND) either
6631        using the primary index or using no index at all.
6632 
6633   @param h_arg           Table handler to be used
6634   @param seq_funcs       Interval sequence enumeration functions
6635   @param seq_init_param  Interval sequence enumeration parameter
6636   @param n_ranges        Number of ranges in the sequence.
6637   @param mode            HA_MRR_* modes to use
6638   @param buf             INOUT Buffer to use
6639 
6640   @retval 0     Ok, Scan started.
6641   @retval other Error
6642 */
6643 
dsmrr_init(handler * h_arg,RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)6644 int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
6645                            void *seq_init_param, uint n_ranges, uint mode,
6646                            HANDLER_BUFFER *buf)
6647 {
6648   uint elem_size;
6649   int retval= 0;
6650   DBUG_ENTER("DsMrr_impl::dsmrr_init");
6651   THD *thd= h_arg->table->in_use;     // current THD
6652 
6653   /*
6654     index_merge may invoke a scan on an object for which dsmrr_info[_const]
6655     has not been called, so set the owner handler here as well.
6656   */
6657   h= h_arg;
6658 
6659   if (!hint_key_state(thd, h->table, h->active_index,
6660                       MRR_HINT_ENUM, OPTIMIZER_SWITCH_MRR) ||
6661       mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED)) // DS-MRR doesn't sort
6662   {
6663     use_default_impl= TRUE;
6664     retval= h->handler::multi_range_read_init(seq_funcs, seq_init_param,
6665                                               n_ranges, mode, buf);
6666     DBUG_RETURN(retval);
6667   }
6668 
6669   /*
6670     This assert will hit if we have pushed an index condition to the
6671     primary key index and then "change our mind" and use a different
6672     index for retrieving data with MRR. One of the following criteria
6673     must be true:
6674       1. We have not pushed an index conditon on this handler.
6675       2. We have pushed an index condition and this is on the currently used
6676          index.
6677       3. We have pushed an index condition but this is not for the primary key.
6678       4. We have pushed an index condition and this has been transferred to
6679          the clone (h2) of the handler object.
6680   */
6681   assert(!h->pushed_idx_cond ||
6682          h->pushed_idx_cond_keyno == h->active_index ||
6683          h->pushed_idx_cond_keyno != table->s->primary_key ||
6684          (h2 && h->pushed_idx_cond_keyno == h2->active_index));
6685 
6686   rowids_buf= buf->buffer;
6687 
6688   is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
6689 
6690   if (is_mrr_assoc)
6691   {
6692     assert(!thd->status_var_aggregated);
6693     table->in_use->status_var.ha_multi_range_read_init_count++;
6694   }
6695 
6696   rowids_buf_end= buf->buffer_end;
6697   elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
6698   rowids_buf_last= rowids_buf +
6699                       ((rowids_buf_end - rowids_buf)/ elem_size)*
6700                       elem_size;
6701   rowids_buf_end= rowids_buf_last;
6702 
6703   /*
6704     The DS-MRR scan uses a second handler object (h2) for doing the
6705     index scan. Create this by cloning the primary handler
6706     object. The h2 handler object is deleted when DsMrr_impl::reset()
6707     is called.
6708   */
6709   if (!h2)
6710   {
6711     handler *new_h2;
6712     /*
6713       ::clone() takes up a lot of stack, especially on 64 bit platforms.
6714       The constant 5 is an empiric result.
6715       @todo Is this still the case? Leave it as it is for now but could
6716             likely be removed?
6717     */
6718     if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
6719       DBUG_RETURN(1);
6720 
6721     if (!(new_h2= h->clone(h->table->s->normalized_path.str, thd->mem_root)))
6722       DBUG_RETURN(1);
6723     h2= new_h2; /* Ok, now can put it into h2 */
6724     table->prepare_for_position();
6725   }
6726 
6727   /*
6728     Open the index scan on h2 using the key from the primary handler.
6729   */
6730   if (h2->active_index == MAX_KEY)
6731   {
6732     assert(h->active_index != MAX_KEY);
6733     const uint mrr_keyno= h->active_index;
6734 
6735     if ((retval= h2->ha_external_lock(thd, h->m_lock_type)))
6736       goto error;
6737 
6738     if ((retval= h2->extra(HA_EXTRA_KEYREAD)))
6739       goto error;
6740 
6741     if ((retval= h2->ha_index_init(mrr_keyno, false)))
6742       goto error;
6743 
6744     // Transfer ICP from h to h2
6745     if (mrr_keyno == h->pushed_idx_cond_keyno)
6746     {
6747       if (h2->idx_cond_push(mrr_keyno, h->pushed_idx_cond))
6748       {
6749         retval= 1;
6750         goto error;
6751       }
6752     }
6753     else
6754     {
6755       // Cancel any potentially previously pushed index conditions
6756       h2->cancel_pushed_idx_cond();
6757     }
6758   }
6759   else
6760   {
6761     /*
6762       h2 has already an open index. This happens when the DS-MRR scan
6763       is re-started without closing it first. In this case the primary
6764       handler must be used for reading records from the table, ie. it
6765       must not be opened for doing a new range scan. In this case
6766       the active_index must either not be set or be the primary key.
6767     */
6768     assert(h->inited == handler::RND);
6769     assert(h->active_index == MAX_KEY ||
6770            h->active_index == table->s->primary_key);
6771   }
6772 
6773   /*
6774     The index scan is now transferred to h2 and we can close the open
6775     index scan on the primary handler.
6776   */
6777   if (h->inited == handler::INDEX)
6778   {
6779     /*
6780       Calling h->ha_index_end() will invoke dsmrr_close() for this object,
6781       which will close the index scan on h2. We need to keep it open, so
6782       temporarily move h2 out of the DsMrr object.
6783     */
6784     handler *save_h2= h2;
6785     h2= NULL;
6786     retval= h->ha_index_end();
6787     h2= save_h2;
6788     if (retval)
6789       goto error;
6790   }
6791 
6792   /*
6793     Verify consistency between h and h2.
6794   */
6795   assert(h->inited != handler::INDEX);
6796   assert(h->active_index == MAX_KEY ||
6797          h->active_index == table->s->primary_key);
6798   assert(h2->inited == handler::INDEX);
6799   assert(h2->active_index != MAX_KEY);
6800   assert(h->m_lock_type == h2->m_lock_type);
6801 
6802   if ((retval= h2->handler::multi_range_read_init(seq_funcs, seq_init_param,
6803                                                   n_ranges, mode, buf)))
6804     goto error;
6805 
6806   if ((retval= dsmrr_fill_buffer()))
6807     goto error;
6808 
6809   /*
6810     If the above call has scanned through all intervals in *seq, then
6811     adjust *buf to indicate that the remaining buffer space will not be used.
6812   */
6813   if (dsmrr_eof)
6814     buf->end_of_used_area= rowids_buf_last;
6815 
6816   /*
6817      h->inited == INDEX may occur when 'range checked for each record' is
6818      used.
6819   */
6820   if ((h->inited != handler::RND) &&
6821       ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) ||
6822        (h->ha_rnd_init(FALSE))))
6823   {
6824     retval= 1;
6825     goto error;
6826   }
6827 
6828   use_default_impl= FALSE;
6829   h->mrr_funcs= *seq_funcs;
6830 
6831   DBUG_RETURN(0);
6832 error:
6833   h2->ha_index_or_rnd_end();
6834   h2->ha_external_lock(thd, F_UNLCK);
6835   h2->ha_close();
6836   delete h2;
6837   h2= NULL;
6838   assert(retval != 0);
6839   DBUG_RETURN(retval);
6840 }
6841 
6842 
dsmrr_close()6843 void DsMrr_impl::dsmrr_close()
6844 {
6845   DBUG_ENTER("DsMrr_impl::dsmrr_close");
6846 
6847   // If there is an open index on h2, then close it
6848   if (h2 && h2->active_index != MAX_KEY)
6849   {
6850     h2->ha_index_or_rnd_end();
6851     h2->ha_external_lock(current_thd, F_UNLCK);
6852   }
6853   use_default_impl= true;
6854   DBUG_VOID_RETURN;
6855 }
6856 
6857 
reset()6858 void DsMrr_impl::reset()
6859 {
6860   DBUG_ENTER("DsMrr_impl::reset");
6861 
6862   if (h2)
6863   {
6864     // Close any ongoing DS-MRR scan
6865     dsmrr_close();
6866 
6867     // Close and delete the h2 handler
6868     h2->ha_close();
6869     delete h2;
6870     h2= NULL;
6871   }
6872   DBUG_VOID_RETURN;
6873 }
6874 
6875 
rowid_cmp(void * h,uchar * a,uchar * b)6876 static int rowid_cmp(void *h, uchar *a, uchar *b)
6877 {
6878   return ((handler*)h)->cmp_ref(a, b);
6879 }
6880 
6881 
6882 /**
6883   DS-MRR: Fill the buffer with rowids and sort it by rowid
6884 
6885   {This is an internal function of DiskSweep MRR implementation}
6886   Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
6887   buffer. When the buffer is full or scan is completed, sort the buffer by
6888   rowid and return.
6889 
6890   The function assumes that rowids buffer is empty when it is invoked.
6891 
6892   @param h  Table handler
6893 
6894   @retval 0      OK, the next portion of rowids is in the buffer,
6895                  properly ordered
6896   @retval other  Error
6897 */
6898 
dsmrr_fill_buffer()6899 int DsMrr_impl::dsmrr_fill_buffer()
6900 {
6901   char *range_info;
6902   int res= 0;
6903   DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
6904   assert(rowids_buf < rowids_buf_end);
6905 
6906   /*
6907     Set key_read to TRUE since we only read fields from the index.
6908     This ensures that any virtual columns are read from index and are not
6909     attempted to be evaluated from base columns.
6910     (Do not use TABLE::set_keyread() since the MRR implementation operates
6911     with two handler objects, and set_keyread() would manipulate the keyread
6912     property of the wrong handler. MRR sets the handlers' keyread properties
6913     when initializing the MRR operation, independent of this call).
6914   */
6915   assert(table->key_read == FALSE);
6916   table->key_read= TRUE;
6917 
6918   rowids_buf_cur= rowids_buf;
6919   while ((rowids_buf_cur < rowids_buf_end) &&
6920          !(res= h2->handler::multi_range_read_next(&range_info)))
6921   {
6922     KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
6923     if (h2->mrr_funcs.skip_index_tuple &&
6924         h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
6925       continue;
6926 
6927     /* Put rowid, or {rowid, range_id} pair into the buffer */
6928     h2->position(table->record[0]);
6929     memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
6930     rowids_buf_cur += h2->ref_length;
6931 
6932     if (is_mrr_assoc)
6933     {
6934       memcpy(rowids_buf_cur, &range_info, sizeof(void*));
6935       rowids_buf_cur += sizeof(void*);
6936     }
6937   }
6938 
6939   // Restore key_read since the next read operation will read complete rows
6940   table->key_read= FALSE;
6941 
6942   if (res && res != HA_ERR_END_OF_FILE)
6943     DBUG_RETURN(res);
6944   dsmrr_eof= MY_TEST(res == HA_ERR_END_OF_FILE);
6945 
6946   /* Sort the buffer contents by rowid */
6947   uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
6948   size_t n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
6949 
6950   my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
6951             (void*)h);
6952   rowids_buf_last= rowids_buf_cur;
6953   rowids_buf_cur=  rowids_buf;
6954   DBUG_RETURN(0);
6955 }
6956 
6957 
6958 /*
6959   DS-MRR implementation: multi_range_read_next() function
6960 */
6961 
dsmrr_next(char ** range_info)6962 int DsMrr_impl::dsmrr_next(char **range_info)
6963 {
6964   int res;
6965   uchar *cur_range_info= 0;
6966   uchar *rowid;
6967 
6968   if (use_default_impl)
6969     return h->handler::multi_range_read_next(range_info);
6970 
6971   do
6972   {
6973     if (rowids_buf_cur == rowids_buf_last)
6974     {
6975       if (dsmrr_eof)
6976       {
6977         res= HA_ERR_END_OF_FILE;
6978         goto end;
6979       }
6980 
6981       res= dsmrr_fill_buffer();
6982       if (res)
6983         goto end;
6984     }
6985 
6986     /* return eof if there are no rowids in the buffer after re-fill attempt */
6987     if (rowids_buf_cur == rowids_buf_last)
6988     {
6989       res= HA_ERR_END_OF_FILE;
6990       goto end;
6991     }
6992     rowid= rowids_buf_cur;
6993 
6994     if (is_mrr_assoc)
6995       memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar*));
6996 
6997     rowids_buf_cur += h->ref_length + sizeof(void*) * MY_TEST(is_mrr_assoc);
6998     if (h2->mrr_funcs.skip_record &&
6999 	h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
7000       continue;
7001     res= h->ha_rnd_pos(table->record[0], rowid);
7002     break;
7003   } while (true);
7004 
7005   if (is_mrr_assoc)
7006   {
7007     memcpy(range_info, rowid + h->ref_length, sizeof(void*));
7008   }
7009 end:
7010   return res;
7011 }
7012 
7013 
7014 /*
7015   DS-MRR implementation: multi_range_read_info() function
7016 */
dsmrr_info(uint keyno,uint n_ranges,uint rows,uint * bufsz,uint * flags,Cost_estimate * cost)7017 ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
7018                                uint *bufsz, uint *flags, Cost_estimate *cost)
7019 {
7020   ha_rows res MY_ATTRIBUTE((unused));
7021   uint def_flags= *flags;
7022   uint def_bufsz= *bufsz;
7023 
7024   /* Get cost/flags/mem_usage of default MRR implementation */
7025   res=
7026     h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
7027                                       &def_flags, cost);
7028   assert(!res);
7029 
7030   if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
7031       choose_mrr_impl(keyno, rows, flags, bufsz, cost))
7032   {
7033     /* Default implementation is choosen */
7034     DBUG_PRINT("info", ("Default MRR implementation choosen"));
7035     *flags= def_flags;
7036     *bufsz= def_bufsz;
7037     assert(*flags & HA_MRR_USE_DEFAULT_IMPL);
7038   }
7039   else
7040   {
7041     /* *flags and *bufsz were set by choose_mrr_impl */
7042     DBUG_PRINT("info", ("DS-MRR implementation choosen"));
7043   }
7044   return 0;
7045 }
7046 
7047 
7048 /*
7049   DS-MRR Implementation: multi_range_read_info_const() function
7050 */
7051 
dsmrr_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)7052 ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
7053                                  void *seq_init_param, uint n_ranges,
7054                                  uint *bufsz, uint *flags, Cost_estimate *cost)
7055 {
7056   ha_rows rows;
7057   uint def_flags= *flags;
7058   uint def_bufsz= *bufsz;
7059   /* Get cost/flags/mem_usage of default MRR implementation */
7060   rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
7061                                                 n_ranges, &def_bufsz,
7062                                                 &def_flags, cost);
7063   if (rows == HA_POS_ERROR)
7064   {
7065     /* Default implementation can't perform MRR scan => we can't either */
7066     return rows;
7067   }
7068 
7069   /*
7070     If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
7071     use the default MRR implementation (we need it for UPDATE/DELETE).
7072     Otherwise, make a choice based on cost and mrr* flags of
7073     @@optimizer_switch.
7074   */
7075   if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
7076       choose_mrr_impl(keyno, rows, flags, bufsz, cost))
7077   {
7078     DBUG_PRINT("info", ("Default MRR implementation choosen"));
7079     *flags= def_flags;
7080     *bufsz= def_bufsz;
7081     assert(*flags & HA_MRR_USE_DEFAULT_IMPL);
7082   }
7083   else
7084   {
7085     /* *flags and *bufsz were set by choose_mrr_impl */
7086     DBUG_PRINT("info", ("DS-MRR implementation choosen"));
7087   }
7088   return rows;
7089 }
7090 
7091 
7092 /**
7093   DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
7094 
7095   Make the choice between using Default MRR implementation and DS-MRR.
7096   This function contains common functionality factored out of dsmrr_info()
7097   and dsmrr_info_const(). The function assumes that the default MRR
7098   implementation's applicability requirements are satisfied.
7099 
7100   @param keyno       Index number
7101   @param rows        E(full rows to be retrieved)
7102   @param flags  IN   MRR flags provided by the MRR user
7103                 OUT  If DS-MRR is choosen, flags of DS-MRR implementation
7104                      else the value is not modified
7105   @param bufsz  IN   If DS-MRR is choosen, buffer use of DS-MRR implementation
7106                      else the value is not modified
7107   @param cost   IN   Cost of default MRR implementation
7108                 OUT  If DS-MRR is choosen, cost of DS-MRR scan
7109                      else the value is not modified
7110 
7111   @retval TRUE   Default MRR implementation should be used
7112   @retval FALSE  DS-MRR implementation should be used
7113 */
7114 
choose_mrr_impl(uint keyno,ha_rows rows,uint * flags,uint * bufsz,Cost_estimate * cost)7115 bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
7116                                  uint *bufsz, Cost_estimate *cost)
7117 {
7118   bool res;
7119   THD *thd= current_thd;
7120 
7121   const bool mrr_on= hint_key_state(thd, table, keyno, MRR_HINT_ENUM,
7122                                     OPTIMIZER_SWITCH_MRR);
7123   const bool force_dsmrr_by_hints=
7124     hint_key_state(thd, table, keyno, MRR_HINT_ENUM, 0) ||
7125     hint_table_state(thd, table, BKA_HINT_ENUM, 0);
7126 
7127   if (!(mrr_on || force_dsmrr_by_hints) ||
7128       *flags & (HA_MRR_INDEX_ONLY | HA_MRR_SORTED) || // Unsupported by DS-MRR
7129       (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
7130        key_uses_partial_cols(table, keyno) ||
7131        table->s->tmp_table != NO_TMP_TABLE)
7132   {
7133     /* Use the default implementation, don't modify args: See comments  */
7134     return TRUE;
7135   }
7136 
7137   /*
7138     If @@optimizer_switch has "mrr_cost_based" on, we should avoid
7139     using DS-MRR for queries where it is likely that the records are
7140     stored in memory. Since there is currently no way to determine
7141     this, we use a heuristic:
7142     a) if the storage engine has a memory buffer, DS-MRR is only
7143        considered if the table size is bigger than the buffer.
7144     b) if the storage engine does not have a memory buffer, DS-MRR is
7145        only considered if the table size is bigger than 100MB.
7146     c) Since there is an initial setup cost of DS-MRR, so it is only
7147        considered if at least 50 records will be read.
7148   */
7149   if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED) &&
7150       !force_dsmrr_by_hints)
7151   {
7152     /*
7153       If the storage engine has a database buffer we use this as the
7154       minimum size the table should have before considering DS-MRR.
7155     */
7156     longlong min_file_size= table->file->get_memory_buffer_size();
7157     if (min_file_size == -1)
7158     {
7159       // No estimate for database buffer
7160       min_file_size= 100 * 1024 * 1024;    // 100 MB
7161     }
7162 
7163     if (table->file->stats.data_file_length <
7164         static_cast<ulonglong>(min_file_size) ||
7165         rows <= 50)
7166       return true;                 // Use the default implementation
7167   }
7168 
7169   Cost_estimate dsmrr_cost;
7170   if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
7171     return TRUE;
7172 
7173   /*
7174     If @@optimizer_switch has "mrr" on and "mrr_cost_based" off, then set cost
7175     of DS-MRR to be minimum of DS-MRR and Default implementations cost. This
7176     allows one to force use of DS-MRR whenever it is applicable without
7177     affecting other cost-based choices. Note that if MRR or BKA hint is
7178     specified, DS-MRR will be used regardless of cost.
7179   */
7180   const bool force_dsmrr=
7181     (force_dsmrr_by_hints ||
7182      !thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED));
7183 
7184   if (force_dsmrr && dsmrr_cost.total_cost() > cost->total_cost())
7185     dsmrr_cost= *cost;
7186 
7187   if (force_dsmrr || (dsmrr_cost.total_cost() <= cost->total_cost()))
7188   {
7189     *flags &= ~HA_MRR_USE_DEFAULT_IMPL;  /* Use the DS-MRR implementation */
7190     *flags &= ~HA_MRR_SUPPORT_SORTED;    /* We can't provide ordered output */
7191     *cost= dsmrr_cost;
7192     res= FALSE;
7193   }
7194   else
7195   {
7196     /* Use the default MRR implementation */
7197     res= TRUE;
7198   }
7199   return res;
7200 }
7201 
7202 
7203 static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows,
7204                                     Cost_estimate *cost);
7205 
7206 
7207 /**
7208   Get cost of DS-MRR scan
7209 
7210   @param keynr              Index to be used
7211   @param rows               E(Number of rows to be scanned)
7212   @param flags              Scan parameters (HA_MRR_* flags)
7213   @param buffer_size INOUT  Buffer size
7214   @param cost        OUT    The cost
7215 
7216   @retval FALSE  OK
7217   @retval TRUE   Error, DS-MRR cannot be used (the buffer is too small
7218                  for even 1 rowid)
7219 */
7220 
get_disk_sweep_mrr_cost(uint keynr,ha_rows rows,uint flags,uint * buffer_size,Cost_estimate * cost)7221 bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
7222                                          uint *buffer_size,
7223                                          Cost_estimate *cost)
7224 {
7225   ha_rows rows_in_last_step;
7226   uint n_full_steps;
7227 
7228   const uint elem_size= h->ref_length +
7229                         sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION));
7230   const ha_rows max_buff_entries= *buffer_size / elem_size;
7231 
7232   if (!max_buff_entries)
7233     return TRUE; /* Buffer has not enough space for even 1 rowid */
7234 
7235   /* Number of iterations we'll make with full buffer */
7236   n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
7237 
7238   /*
7239     Get numbers of rows we'll be processing in last iteration, with
7240     non-full buffer
7241   */
7242   rows_in_last_step= rows % max_buff_entries;
7243 
7244   assert(cost->is_zero());
7245 
7246   if (n_full_steps)
7247   {
7248     get_sort_and_sweep_cost(table, max_buff_entries, cost);
7249     cost->multiply(n_full_steps);
7250   }
7251   else
7252   {
7253     /*
7254       Adjust buffer size since only parts of the buffer will be used:
7255       1. Adjust record estimate for the last scan to reduce likelyhood
7256          of needing more than one scan by adding 20 percent to the
7257          record estimate and by ensuring this is at least 100 records.
7258       2. If the estimated needed buffer size is lower than suggested by
7259          the caller then set it to the estimated buffer size.
7260     */
7261     const ha_rows keys_in_buffer=
7262       max<ha_rows>(static_cast<ha_rows>(1.2 * rows_in_last_step), 100);
7263     *buffer_size= min<ulong>(*buffer_size,
7264                              static_cast<ulong>(keys_in_buffer) * elem_size);
7265   }
7266 
7267   Cost_estimate last_step_cost;
7268   get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
7269   (*cost)+= last_step_cost;
7270 
7271   /*
7272     Cost of memory is not included in the total_cost() function and
7273     thus will not be considered when comparing costs. Still, we
7274     record it in the cost estimate object for future use.
7275   */
7276   cost->add_mem(*buffer_size);
7277 
7278   /* Total cost of all index accesses */
7279   (*cost)+= h->index_scan_cost(keynr, 1, static_cast<double>(rows));
7280 
7281   /*
7282     Add CPU cost for processing records (see
7283     @handler::multi_range_read_info_const()).
7284   */
7285   cost->add_cpu(table->cost_model()->row_evaluate_cost(
7286     static_cast<double>(rows)));
7287   return FALSE;
7288 }
7289 
7290 
7291 /*
7292   Get cost of one sort-and-sweep step
7293 
7294   SYNOPSIS
7295     get_sort_and_sweep_cost()
7296       table       Table being accessed
7297       nrows       Number of rows to be sorted and retrieved
7298       cost   OUT  The cost
7299 
7300   DESCRIPTION
7301     Get cost of these operations:
7302      - sort an array of #nrows ROWIDs using qsort
7303      - read #nrows records from table in a sweep.
7304 */
7305 
7306 static
get_sort_and_sweep_cost(TABLE * table,ha_rows nrows,Cost_estimate * cost)7307 void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
7308 {
7309   assert(cost->is_zero());
7310   if (nrows)
7311   {
7312     get_sweep_read_cost(table, nrows, FALSE, cost);
7313 
7314     /*
7315       @todo CostModel: For the old version of the cost model the
7316       following code should be used. For the new version of the cost
7317       model Cost_model::key_compare_cost() should be used.  When
7318       removing support for the old cost model this code should be
7319       removed. The reason for this is that we should get rid of the
7320       ROWID_COMPARE_SORT_COST and use key_compare_cost() instead. For
7321       the current value returned by key_compare_cost() this would
7322       overestimate the cost for sorting.
7323     */
7324 
7325     /*
7326       Constant for the cost of doing one key compare operation in the
7327       sort operation. We should have used the value returned by
7328       key_compare_cost() here but this would make the cost
7329       estimate of sorting very high for queries accessing many
7330       records. Until this constant is adjusted we introduce a constant
7331       that is more realistic. @todo: Replace this with
7332       key_compare_cost() when this has been given a realistic value.
7333     */
7334     const double ROWID_COMPARE_SORT_COST=
7335       table->cost_model()->key_compare_cost(1.0) / 10;
7336 
7337     /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
7338 
7339     // For the old version of the cost model this cost calculations should
7340     // be used....
7341     const double cpu_sort= nrows * log2(nrows) * ROWID_COMPARE_SORT_COST;
7342     // .... For the new cost model something like this should be used...
7343     // cpu_sort= nrows * log2(nrows) *
7344     //           table->cost_model()->rowid_compare_cost();
7345     cost->add_cpu(cpu_sort);
7346   }
7347 }
7348 
7349 
7350 /**
7351   Get cost of reading nrows table records in a "disk sweep"
7352 
7353   A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
7354   for an ordered sequence of rowids.
7355 
7356   We take into account that some of the records might be in a memory
7357   buffer while others need to be read from a secondary storage
7358   device. The model for this assumes hard disk IO. A disk read is
7359   performed as follows:
7360 
7361    1. The disk head is moved to the needed cylinder
7362    2. The controller waits for the plate to rotate
7363    3. The data is transferred
7364 
7365   Time to do #3 is insignificant compared to #2+#1.
7366 
7367   Time to move the disk head is proportional to head travel distance.
7368 
7369   Time to wait for the plate to rotate depends on whether the disk head
7370   was moved or not.
7371 
7372   If disk head wasn't moved, the wait time is proportional to distance
7373   between the previous block and the block we're reading.
7374 
7375   If the head was moved, we don't know how much we'll need to wait for the
7376   plate to rotate. We assume the wait time to be a variate with a mean of
7377   0.5 of full rotation time.
7378 
7379   Our cost units are "random disk seeks". The cost of random disk seek is
7380   actually not a constant, it depends one range of cylinders we're going
7381   to access. We make it constant by introducing a fuzzy concept of "typical
7382   datafile length" (it's fuzzy as it's hard to tell whether it should
7383   include index file, temp.tables etc). Then random seek cost is:
7384 
7385     1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
7386 
7387   We define half_rotation_cost as disk_seek_base_cost() (see
7388   Cost_model_server::disk_seek_base_cost()).
7389 
7390   @param      table        Table to be accessed
7391   @param      nrows        Number of rows to retrieve
7392   @param      interrupted  true <=> Assume that the disk sweep will be
7393                            interrupted by other disk IO. false - otherwise.
7394   @param[out] cost         the cost
7395 */
7396 
get_sweep_read_cost(TABLE * table,ha_rows nrows,bool interrupted,Cost_estimate * cost)7397 void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
7398                          Cost_estimate *cost)
7399 {
7400   DBUG_ENTER("get_sweep_read_cost");
7401 
7402   assert(cost->is_zero());
7403   if(nrows > 0)
7404   {
7405     const Cost_model_table *const cost_model= table->cost_model();
7406 
7407     // The total number of blocks used by this table
7408     double n_blocks=
7409       ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
7410     if (n_blocks < 1.0)                         // When data_file_length is 0
7411       n_blocks= 1.0;
7412 
7413     /*
7414       The number of blocks that in average need to be read given that
7415       the records are uniformly distribution over the table.
7416     */
7417     double busy_blocks=
7418       n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
7419     if (busy_blocks < 1.0)
7420       busy_blocks= 1.0;
7421 
7422     DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
7423                        busy_blocks));
7424     /*
7425       The random access cost for reading the data pages will be the upper
7426       limit for the sweep_cost.
7427     */
7428     cost->add_io(cost_model->page_read_cost(busy_blocks));
7429     if (!interrupted)
7430     {
7431       Cost_estimate sweep_cost;
7432       /*
7433         Assume reading pages from disk is done in one 'sweep'.
7434 
7435         The cost model and cost estimate for pages already in a memory
7436         buffer will be different from pages that needed to be read from
7437         disk. Calculate the number of blocks that likely already are
7438         in memory and the number of blocks that need to be read from
7439         disk.
7440       */
7441       const double busy_blocks_mem=
7442         busy_blocks * table->file->table_in_memory_estimate();
7443       const double busy_blocks_disk= busy_blocks - busy_blocks_mem;
7444       assert(busy_blocks_disk >= 0.0);
7445 
7446       // Cost of accessing blocks in main memory buffer
7447       sweep_cost.add_io(cost_model->buffer_block_read_cost(busy_blocks_mem));
7448 
7449       // Cost of reading blocks from disk in a 'sweep'
7450       const double seek_distance= (busy_blocks_disk > 1.0) ?
7451         n_blocks / busy_blocks_disk : n_blocks;
7452 
7453       const double disk_cost=
7454         busy_blocks_disk * cost_model->disk_seek_cost(seek_distance);
7455       sweep_cost.add_io(disk_cost);
7456 
7457       /*
7458         For some cases, ex: when only few blocks need to be read and the
7459         seek distance becomes very large, the sweep cost model can produce
7460         a cost estimate that is larger than the cost of random access.
7461         To handle this case, we use the sweep cost only when it is less
7462         than the random access cost.
7463       */
7464       if (sweep_cost < *cost)
7465         *cost= sweep_cost;
7466     }
7467   }
7468   DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
7469   DBUG_VOID_RETURN;
7470 }
7471 
7472 
7473 /****************************************************************************
7474  * DS-MRR implementation ends
7475  ***************************************************************************/
7476 
7477 /** @brief
7478   Read first row between two ranges.
7479   Store ranges for future calls to read_range_next.
7480 
7481   @param start_key		Start key. Is 0 if no min range
7482   @param end_key		End key.  Is 0 if no max range
7483   @param eq_range_arg	        Set to 1 if start_key == end_key
7484   @param sorted		Set to 1 if result should be sorted per key
7485 
7486   @note
7487     Record is read into table->record[0]
7488 
7489   @retval
7490     0			Found row
7491   @retval
7492     HA_ERR_END_OF_FILE	No rows in range
7493   @retval
7494     \#			Error code
7495 */
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_range_arg,bool sorted)7496 int handler::read_range_first(const key_range *start_key,
7497 			      const key_range *end_key,
7498 			      bool eq_range_arg,
7499                               bool sorted /* ignored */)
7500 {
7501   int result;
7502   DBUG_ENTER("handler::read_range_first");
7503 
7504   eq_range= eq_range_arg;
7505   set_end_range(end_key, RANGE_SCAN_ASC);
7506 
7507   range_key_part= table->key_info[active_index].key_part;
7508 
7509   if (!start_key)			// Read first record
7510     result= ha_index_first(table->record[0]);
7511   else
7512     result= ha_index_read_map(table->record[0],
7513                               start_key->key,
7514                               start_key->keypart_map,
7515                               start_key->flag);
7516   if (result)
7517     DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND)
7518 		? HA_ERR_END_OF_FILE
7519 		: result);
7520 
7521   if (compare_key(end_range) <= 0)
7522   {
7523     DBUG_RETURN(0);
7524   }
7525   else
7526   {
7527     /*
7528       The last read row does not fall in the range. So request
7529       storage engine to release row lock if possible.
7530     */
7531     unlock_row();
7532     DBUG_RETURN(HA_ERR_END_OF_FILE);
7533   }
7534 }
7535 
7536 
7537 /** @brief
7538   Read next row between two endpoints.
7539 
7540   @note
7541     Record is read into table->record[0]
7542 
7543   @retval
7544     0			Found row
7545   @retval
7546     HA_ERR_END_OF_FILE	No rows in range
7547   @retval
7548     \#			Error code
7549 */
read_range_next()7550 int handler::read_range_next()
7551 {
7552   int result;
7553   DBUG_ENTER("handler::read_range_next");
7554 
7555   if (eq_range)
7556   {
7557     /* We trust that index_next_same always gives a row in range */
7558     DBUG_RETURN(ha_index_next_same(table->record[0],
7559                                    end_range->key,
7560                                    end_range->length));
7561   }
7562   result= ha_index_next(table->record[0]);
7563   if (result)
7564     DBUG_RETURN(result);
7565 
7566   if (compare_key(end_range) <= 0)
7567   {
7568     DBUG_RETURN(0);
7569   }
7570   else
7571   {
7572     /*
7573       The last read row does not fall in the range. So request
7574       storage engine to release row lock if possible.
7575     */
7576     unlock_row();
7577     DBUG_RETURN(HA_ERR_END_OF_FILE);
7578   }
7579 }
7580 
7581 /**
7582   Check if one of the columns in a key is a virtual generated column.
7583   @param part    the first part of the key to check
7584   @param length  the length of the key
7585   @retval true   if the key contains a virtual generated column
7586   @retval false  if the key does not contain a virtual generated column
7587 */
key_has_vcol(const KEY_PART_INFO * part,uint length)7588 static bool key_has_vcol(const KEY_PART_INFO *part, uint length) {
7589   for (uint len = 0; len < length; len += part->store_length, ++part)
7590     if (part->field->is_virtual_gcol()) return true;
7591   return false;
7592 }
7593 
set_end_range(const key_range * range,enum_range_scan_direction direction)7594 void handler::set_end_range(const key_range* range,
7595                             enum_range_scan_direction direction)
7596 {
7597   if (range)
7598   {
7599     save_end_range= *range;
7600     end_range= &save_end_range;
7601     range_key_part= table->key_info[active_index].key_part;
7602     key_compare_result_on_equal= ((range->flag == HA_READ_BEFORE_KEY) ? 1 :
7603                                   (range->flag == HA_READ_AFTER_KEY) ? -1 : 0);
7604     m_virt_gcol_in_end_range = key_has_vcol(range_key_part, range->length);
7605   }
7606   else
7607     end_range= NULL;
7608 
7609   range_scan_direction= direction;
7610 }
7611 
7612 
7613 /**
7614   Compare if found key (in row) is over max-value.
7615 
7616   @param range		range to compare to row. May be 0 for no range
7617 
7618   @seealso
7619     key.cc::key_cmp()
7620 
7621   @return
7622     The return value is SIGN(key_in_row - range_key):
7623 
7624     - 0   : Key is equal to range or 'range' == 0 (no range)
7625     - -1  : Key is less than range
7626     - 1   : Key is larger than range
7627 */
compare_key(key_range * range)7628 int handler::compare_key(key_range *range)
7629 {
7630   int cmp;
7631   if (!range || in_range_check_pushed_down)
7632     return 0;					// No max range
7633   cmp= key_cmp(range_key_part, range->key, range->length);
7634   if (!cmp)
7635     cmp= key_compare_result_on_equal;
7636   return cmp;
7637 }
7638 
7639 
7640 /*
7641   Compare if a found key (in row) is within the range.
7642 
7643   This function is similar to compare_key() but checks the range scan
7644   direction to determine if this is a descending scan. This function
7645   is used by the index condition pushdown implementation to determine
7646   if the read record is within the range scan.
7647 
7648   @param range Range to compare to row. May be NULL for no range.
7649 
7650   @seealso
7651     handler::compare_key()
7652 
7653   @return Returns whether the key is within the range
7654 
7655     - 0   : Key is equal to range or 'range' == 0 (no range)
7656     - -1  : Key is within the current range
7657     - 1   : Key is outside the current range
7658 */
7659 
compare_key_icp(const key_range * range) const7660 int handler::compare_key_icp(const key_range *range) const
7661 {
7662   int cmp;
7663   if (!range)
7664     return 0;					// no max range
7665   cmp= key_cmp(range_key_part, range->key, range->length);
7666   if (!cmp)
7667     cmp= key_compare_result_on_equal;
7668   if (range_scan_direction == RANGE_SCAN_DESC)
7669     cmp= -cmp;
7670   return cmp;
7671 }
7672 
7673 /**
7674   Change the offsets of all the fields in a key range.
7675 
7676   @param range	  the key range
7677   @param key_part the first key part
7678   @param diff	  how much to change the offsets with
7679 */
7680 static inline void
move_key_field_offsets(const key_range * range,const KEY_PART_INFO * key_part,my_ptrdiff_t diff)7681 move_key_field_offsets(const key_range *range, const KEY_PART_INFO *key_part,
7682 		       my_ptrdiff_t diff)
7683 {
7684   for (size_t len= 0; len < range->length;
7685        len+= key_part->store_length, ++key_part)
7686     key_part->field->move_field_offset(diff);
7687 }
7688 
7689 /**
7690   Check if the key in the given buffer (which is not necessarily
7691   TABLE::record[0]) is within range. Called by the storage engine to
7692   avoid reading too many rows.
7693 
7694   @param buf  the buffer that holds the key
7695   @retval -1 if the key is within the range
7696   @retval  0 if the key is equal to the end_range key, and
7697              key_compare_result_on_equal is 0
7698   @retval  1 if the key is outside the range
7699 */
compare_key_in_buffer(const uchar * buf) const7700 int handler::compare_key_in_buffer(const uchar *buf) const
7701 {
7702   assert(end_range != NULL);
7703 
7704   /*
7705     End range on descending scans is only checked with ICP for now, and then we
7706     check it with compare_key_icp() instead of this function.
7707   */
7708   assert(range_scan_direction == RANGE_SCAN_ASC);
7709 
7710   // Make the fields in the key point into the buffer instead of record[0].
7711   const my_ptrdiff_t diff= buf - table->record[0];
7712   if (diff != 0)
7713     move_key_field_offsets(end_range, range_key_part, diff);
7714 
7715   // Compare the key in buf against end_range.
7716   int cmp= key_cmp(range_key_part, end_range->key, end_range->length);
7717   if (cmp == 0)
7718     cmp= key_compare_result_on_equal;
7719 
7720   // Reset the field offsets.
7721   if (diff != 0)
7722     move_key_field_offsets(end_range, range_key_part, -diff);
7723 
7724   return cmp;
7725 }
7726 
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)7727 int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
7728                                 key_part_map keypart_map,
7729                                 enum ha_rkey_function find_flag)
7730 {
7731   int error, error1;
7732   error= index_init(index, 0);
7733   if (!error)
7734   {
7735     error= index_read_map(buf, key, keypart_map, find_flag);
7736     error1= index_end();
7737   }
7738   return error ?  error : error1;
7739 }
7740 
7741 
calculate_key_len(TABLE * table,uint key,key_part_map keypart_map)7742 uint calculate_key_len(TABLE *table, uint key,
7743                        key_part_map keypart_map)
7744 {
7745   /* works only with key prefixes */
7746   assert(((keypart_map + 1) & keypart_map) == 0);
7747 
7748   KEY *key_info= table->key_info + key;
7749   KEY_PART_INFO *key_part= key_info->key_part;
7750   KEY_PART_INFO *end_key_part= key_part + actual_key_parts(key_info);
7751   uint length= 0;
7752 
7753   while (key_part < end_key_part && keypart_map)
7754   {
7755     length+= key_part->store_length;
7756     keypart_map >>= 1;
7757     key_part++;
7758   }
7759   return length;
7760 }
7761 
7762 
7763 /**
7764   Returns a list of all known extensions.
7765 
7766     No mutexes, worst case race is a minor surplus memory allocation
7767     We have to recreate the extension map if mysqld is restarted (for example
7768     within libmysqld)
7769 
7770   @retval
7771     pointer		pointer to TYPELIB structure
7772 */
exts_handlerton(THD * unused,plugin_ref plugin,void * arg)7773 static my_bool exts_handlerton(THD *unused, plugin_ref plugin,
7774                                void *arg)
7775 {
7776   List<char> *found_exts= (List<char> *) arg;
7777   handlerton *hton= plugin_data<handlerton*>(plugin);
7778   handler *file;
7779   if (hton->state == SHOW_OPTION_YES && hton->create &&
7780       (file= hton->create(hton, (TABLE_SHARE*) 0, current_thd->mem_root)))
7781   {
7782     List_iterator_fast<char> it(*found_exts);
7783     const char **ext, *old_ext;
7784 
7785     for (ext= file->bas_ext(); *ext; ext++)
7786     {
7787       while ((old_ext= it++))
7788       {
7789         if (!strcmp(old_ext, *ext))
7790 	  break;
7791       }
7792       if (!old_ext)
7793         found_exts->push_back((char *) *ext);
7794 
7795       it.rewind();
7796     }
7797     delete file;
7798   }
7799   return FALSE;
7800 }
7801 
ha_known_exts()7802 TYPELIB* ha_known_exts()
7803 {
7804   TYPELIB *known_extensions = (TYPELIB*) sql_alloc(sizeof(TYPELIB));
7805   known_extensions->name= "known_exts";
7806   known_extensions->type_lengths= NULL;
7807 
7808   List<char> found_exts;
7809   const char **ext, *old_ext;
7810 
7811   found_exts.push_back((char*) TRG_EXT);
7812   found_exts.push_back((char*) TRN_EXT);
7813 
7814   plugin_foreach(NULL, exts_handlerton,
7815                  MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
7816 
7817   size_t arr_length= sizeof(char *)* (found_exts.elements+1);
7818   ext= (const char **) sql_alloc(arr_length);
7819 
7820   assert(NULL != ext);
7821   known_extensions->count= found_exts.elements;
7822   known_extensions->type_names= ext;
7823 
7824   List_iterator_fast<char> it(found_exts);
7825   while ((old_ext= it++))
7826     *ext++= old_ext;
7827   *ext= NULL;
7828   return known_extensions;
7829 }
7830 
7831 
stat_print(THD * thd,const char * type,size_t type_len,const char * file,size_t file_len,const char * status,size_t status_len)7832 static bool stat_print(THD *thd, const char *type, size_t type_len,
7833                        const char *file, size_t file_len,
7834                        const char *status, size_t status_len)
7835 {
7836   Protocol *protocol= thd->get_protocol();
7837   protocol->start_row();
7838   protocol->store(type, type_len, system_charset_info);
7839   protocol->store(file, file_len, system_charset_info);
7840   protocol->store(status, status_len, system_charset_info);
7841   if (protocol->end_row())
7842     return TRUE;
7843   return FALSE;
7844 }
7845 
7846 
showstat_handlerton(THD * thd,plugin_ref plugin,void * arg)7847 static my_bool showstat_handlerton(THD *thd, plugin_ref plugin,
7848                                    void *arg)
7849 {
7850   enum ha_stat_type stat= *(enum ha_stat_type *) arg;
7851   handlerton *hton= plugin_data<handlerton*>(plugin);
7852   if (hton->state == SHOW_OPTION_YES && hton->show_status &&
7853       hton->show_status(hton, thd, stat_print, stat))
7854     return TRUE;
7855   return FALSE;
7856 }
7857 
ha_show_status(THD * thd,handlerton * db_type,enum ha_stat_type stat)7858 bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
7859 {
7860   List<Item> field_list;
7861   bool result;
7862 
7863   field_list.push_back(new Item_empty_string("Type",10));
7864   field_list.push_back(new Item_empty_string("Name",FN_REFLEN));
7865   field_list.push_back(new Item_empty_string("Status",10));
7866 
7867   if (thd->send_result_metadata(&field_list,
7868                                 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
7869     return TRUE;
7870 
7871   if (db_type == NULL)
7872   {
7873     result= plugin_foreach(thd, showstat_handlerton,
7874                            MYSQL_STORAGE_ENGINE_PLUGIN, &stat);
7875   }
7876   else
7877   {
7878     if (db_type->state != SHOW_OPTION_YES)
7879     {
7880       const LEX_STRING *name=&hton2plugin[db_type->slot]->name;
7881       result= stat_print(thd, name->str, name->length,
7882                          "", 0, "DISABLED", 8) ? 1 : 0;
7883     }
7884     else
7885     {
7886       DBUG_EXECUTE_IF("simulate_show_status_failure",
7887                       DBUG_SET("+d,simulate_net_write_failure"););
7888       result= db_type->show_status &&
7889               db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
7890       DBUG_EXECUTE_IF("simulate_show_status_failure",
7891                       DBUG_SET("-d,simulate_net_write_failure"););
7892     }
7893   }
7894 
7895   if (!result)
7896     my_eof(thd);
7897   return result;
7898 }
7899 
7900 /*
7901   Function to check if the conditions for row-based binlogging is
7902   correct for the table.
7903 
7904   A row in the given table should be replicated if:
7905   - Row-based replication is enabled in the current thread
7906   - The binlog is enabled
7907   - It is not a temporary table
7908   - The binary log is open
7909   - The database the table resides in shall be binlogged (binlog_*_db rules)
7910   - table is not mysql.event
7911 */
7912 
check_table_binlog_row_based(THD * thd,TABLE * table)7913 static bool check_table_binlog_row_based(THD *thd, TABLE *table)
7914 {
7915   if (table->s->cached_row_logging_check == -1)
7916   {
7917     int const check(table->s->tmp_table == NO_TMP_TABLE &&
7918                     ! table->no_replicate &&
7919                     binlog_filter->db_ok(table->s->db.str));
7920     table->s->cached_row_logging_check= check;
7921   }
7922 
7923   assert(table->s->cached_row_logging_check == 0 ||
7924          table->s->cached_row_logging_check == 1);
7925 
7926   return (thd->is_current_stmt_binlog_format_row() &&
7927           table->s->cached_row_logging_check &&
7928           (thd->variables.option_bits & OPTION_BIN_LOG) &&
7929 #ifdef WITH_WSREP
7930 	  /* applier and replayer should not binlog */
7931           ((WSREP_EMULATE_BINLOG(thd) && (thd->wsrep_exec_mode != REPL_RECV)) ||
7932            mysql_bin_log.is_open()));
7933 #else
7934           mysql_bin_log.is_open());
7935 #endif
7936 }
7937 
7938 
7939 /** @brief
7940    Write table maps for all (manually or automatically) locked tables
7941    to the binary log.
7942 
7943    SYNOPSIS
7944      write_locked_table_maps()
7945        thd     Pointer to THD structure
7946 
7947    DESCRIPTION
7948        This function will generate and write table maps for all tables
7949        that are locked by the thread 'thd'.
7950 
7951    RETURN VALUE
7952        0   All OK
7953        1   Failed to write all table maps
7954 
7955    SEE ALSO
7956        THD::lock
7957 */
7958 
write_locked_table_maps(THD * thd)7959 static int write_locked_table_maps(THD *thd)
7960 {
7961   DBUG_ENTER("write_locked_table_maps");
7962   DBUG_PRINT("enter", ("thd: 0x%lx  thd->lock: 0x%lx "
7963                        "thd->extra_lock: 0x%lx",
7964                        (long) thd, (long) thd->lock, (long) thd->extra_lock));
7965 
7966   DBUG_PRINT("debug", ("get_binlog_table_maps(): %d", thd->get_binlog_table_maps()));
7967 
7968   if (thd->get_binlog_table_maps() == 0)
7969   {
7970     MYSQL_LOCK *locks[2];
7971     locks[0]= thd->extra_lock;
7972     locks[1]= thd->lock;
7973     for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
7974     {
7975       MYSQL_LOCK const *const lock= locks[i];
7976       if (lock == NULL)
7977         continue;
7978 
7979       bool need_binlog_rows_query= thd->variables.binlog_rows_query_log_events;
7980       TABLE **const end_ptr= lock->table + lock->table_count;
7981       for (TABLE **table_ptr= lock->table ;
7982            table_ptr != end_ptr ;
7983            ++table_ptr)
7984       {
7985         TABLE *const table= *table_ptr;
7986         DBUG_PRINT("info", ("Checking table %s", table->s->table_name.str));
7987         if (table->current_lock == F_WRLCK &&
7988             check_table_binlog_row_based(thd, table))
7989         {
7990           /*
7991             We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7992             (e.g. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7993             compatible behavior with the STMT based replication even when
7994             the table is not transactional. In other words, if the operation
7995             fails while executing the insert phase nothing is written to the
7996             binlog.
7997 
7998             Note that at this point, we check the type of a set of tables to
7999             create the table map events. In the function binlog_log_row(),
8000             which calls the current function, we check the type of the table
8001             of the current row.
8002           */
8003           bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
8004                                 table->file->has_transactions();
8005           int const error= thd->binlog_write_table_map(table, has_trans,
8006                                                        need_binlog_rows_query);
8007           /* Binlog Rows_query log event once for one statement which updates
8008              two or more tables.*/
8009           if (need_binlog_rows_query)
8010             need_binlog_rows_query= FALSE;
8011           /*
8012             If an error occurs, it is the responsibility of the caller to
8013             roll back the transaction.
8014           */
8015           if (unlikely(error))
8016             DBUG_RETURN(1);
8017         }
8018       }
8019     }
8020   }
8021   DBUG_RETURN(0);
8022 }
8023 
8024 typedef bool Log_func(THD*, TABLE*, bool,
8025                       const uchar*, const uchar*);
8026 
8027 /**
8028 
8029   The purpose of an instance of this class is to :
8030 
8031   1) Given a TABLE instance, backup the given TABLE::read_set, TABLE::write_set
8032      and restore those members upon this instance disposal.
8033 
8034   2) Store a reference to a dynamically allocated buffer and dispose of it upon
8035      this instance disposal.
8036  */
8037 
8038 class Binlog_log_row_cleanup
8039 {
8040  public:
8041   /**
8042     This constructor aims to create temporary copies of readset and writeset.
8043     @param table                 A pointer to TABLE object
8044     @param temp_read_bitmap      Temporary BITMAP to store read_set.
8045     @param temp_write_bitmap     Temporary BITMAP to store write_set.
8046   */
Binlog_log_row_cleanup(TABLE & table,MY_BITMAP & temp_read_bitmap,MY_BITMAP & temp_write_bitmap)8047   Binlog_log_row_cleanup(TABLE &table, MY_BITMAP &temp_read_bitmap,
8048                          MY_BITMAP &temp_write_bitmap)
8049       : m_cleanup_table(table),
8050         m_cleanup_read_bitmap(temp_read_bitmap),
8051         m_cleanup_write_bitmap(temp_write_bitmap)
8052   {
8053     bitmap_copy(&this->m_cleanup_read_bitmap, this->m_cleanup_table.read_set);
8054     bitmap_copy(&this->m_cleanup_write_bitmap, this->m_cleanup_table.write_set);
8055   }
8056 
8057   /**
8058     This destructor aims to restore the original readset and writeset and
8059     delete the temporary copies.
8060   */
~Binlog_log_row_cleanup()8061   virtual ~Binlog_log_row_cleanup()
8062   {
8063     bitmap_copy(this->m_cleanup_table.read_set, &this->m_cleanup_read_bitmap);
8064     bitmap_copy(this->m_cleanup_table.write_set, &this->m_cleanup_write_bitmap);
8065     bitmap_free(&this->m_cleanup_read_bitmap);
8066     bitmap_free(&this->m_cleanup_write_bitmap);
8067   }
8068 
8069  private:
8070   TABLE &m_cleanup_table;  // Creating a TABLE to get access to its members.
8071   MY_BITMAP &m_cleanup_read_bitmap;   // Temporary bitmap to store read_set.
8072   MY_BITMAP &m_cleanup_write_bitmap;  // Temporary bitmap to store write_set.
8073 };
8074 
binlog_log_row(TABLE * table,const uchar * before_record,const uchar * after_record,Log_func * log_func)8075 int binlog_log_row(TABLE* table,
8076                           const uchar *before_record,
8077                           const uchar *after_record,
8078                           Log_func *log_func)
8079 {
8080   bool error= 0;
8081   THD *const thd= table->in_use;
8082 
8083 #ifdef WITH_WSREP
8084   /* only InnoDB tables will be replicated through binlog emulation */
8085   if (WSREP_EMULATE_BINLOG(thd) &&
8086       table->file->ht->db_type != DB_TYPE_INNODB &&
8087       !(table->file->ht->db_type == DB_TYPE_PARTITION_DB &&
8088         (((Partition_handler*)(table->file))->wsrep_is_innodb())))
8089   {
8090       return 0;
8091   }
8092 
8093   /* enforce wsrep_max_ws_rows */
8094   if (table->s->tmp_table == NO_TMP_TABLE)
8095   {
8096     thd->wsrep_affected_rows++;
8097     if (wsrep_max_ws_rows &&
8098         thd->wsrep_exec_mode != REPL_RECV &&
8099         thd->wsrep_affected_rows > wsrep_max_ws_rows)
8100     {
8101       trans_rollback_stmt(thd) || trans_rollback(thd);
8102       my_message(ER_ERROR_DURING_COMMIT, "wsrep_max_ws_rows exceeded", MYF(0));
8103       return ER_ERROR_DURING_COMMIT;
8104     }
8105   }
8106 #endif /* WITH_WSREP */
8107   if (check_table_binlog_row_based(thd, table))
8108   {
8109     if (thd->variables.transaction_write_set_extraction != HASH_ALGORITHM_OFF)
8110     {
8111       try
8112       {
8113         MY_BITMAP save_read_set;
8114         MY_BITMAP save_write_set;
8115         if (bitmap_init(&save_read_set, NULL, table->s->fields, false) ||
8116             bitmap_init(&save_write_set, NULL, table->s->fields, false))
8117         {
8118           my_error(ER_OUT_OF_RESOURCES, MYF(0));
8119           return HA_ERR_RBR_LOGGING_FAILED;
8120         }
8121 
8122         Binlog_log_row_cleanup cleanup_sentry(*table, save_read_set,
8123                                               save_write_set);
8124         if (thd->variables.binlog_row_image == 0)
8125         {
8126           for (uint key_number= 0; key_number < table->s->keys; ++key_number)
8127           {
8128             if (((table->key_info[key_number].flags & (HA_NOSAME)) ==
8129                  HA_NOSAME))
8130             {
8131               table->mark_columns_used_by_index_no_reset(key_number,
8132                                                          table->read_set);
8133               table->mark_columns_used_by_index_no_reset(key_number,
8134                                                          table->write_set);
8135             }
8136           }
8137         }
8138         const uchar *records[]= {after_record, before_record};
8139 
8140         for (int record= 0; record < 2; ++record)
8141         {
8142           if (records[record] != NULL)
8143           {
8144             assert(records[record] == table->record[0] ||
8145                    records[record] == table->record[1]);
8146             bool res= add_pke(table, thd, records[record]);
8147             if (res) return HA_ERR_RBR_LOGGING_FAILED;
8148           }
8149         }
8150       }
8151       catch (const std::bad_alloc &)
8152       {
8153         my_error(ER_OUT_OF_RESOURCES, MYF(0));
8154         return HA_ERR_RBR_LOGGING_FAILED;
8155       }
8156     }
8157     DBUG_DUMP("read_set 10", (uchar*) table->read_set->bitmap,
8158               (table->s->fields + 7) / 8);
8159 
8160     /*
8161       If there are no table maps written to the binary log, this is
8162       the first row handled in this statement. In that case, we need
8163       to write table maps for all locked tables to the binary log.
8164     */
8165     if (likely(!(error= write_locked_table_maps(thd))))
8166     {
8167       /*
8168         We need to have a transactional behavior for SQLCOM_CREATE_TABLE
8169         (i.e. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
8170         compatible behavior with the STMT based replication even when
8171         the table is not transactional. In other words, if the operation
8172         fails while executing the insert phase nothing is written to the
8173         binlog.
8174       */
8175       bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
8176                            table->file->has_transactions();
8177       error=
8178         (*log_func)(thd, table, has_trans, before_record, after_record);
8179     }
8180   }
8181   return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
8182 }
8183 
ha_external_lock(THD * thd,int lock_type)8184 int handler::ha_external_lock(THD *thd, int lock_type)
8185 {
8186   int error;
8187   DBUG_ENTER("handler::ha_external_lock");
8188   /*
8189     Whether this is lock or unlock, this should be true, and is to verify that
8190     if get_auto_increment() was called (thus may have reserved intervals or
8191     taken a table lock), ha_release_auto_increment() was too.
8192   */
8193   assert(next_insert_id == 0);
8194   /* Consecutive calls for lock without unlocking in between is not allowed */
8195   assert(table_share->tmp_table != NO_TMP_TABLE ||
8196          ((lock_type != F_UNLCK && m_lock_type == F_UNLCK) ||
8197           lock_type == F_UNLCK));
8198   /* SQL HANDLER call locks/unlock while scanning (RND/INDEX). */
8199   assert(inited == NONE || table->open_by_handler);
8200 
8201   if (MYSQL_HANDLER_RDLOCK_START_ENABLED() && lock_type == F_RDLCK)
8202   {
8203     MYSQL_HANDLER_RDLOCK_START(table_share->db.str,
8204                                table_share->table_name.str);
8205   }
8206   else if (MYSQL_HANDLER_WRLOCK_START_ENABLED() && lock_type == F_WRLCK)
8207   {
8208     MYSQL_HANDLER_WRLOCK_START(table_share->db.str,
8209                                table_share->table_name.str);
8210   }
8211   else if (MYSQL_HANDLER_UNLOCK_START_ENABLED() && lock_type == F_UNLCK)
8212   {
8213     MYSQL_HANDLER_UNLOCK_START(table_share->db.str,
8214                                table_share->table_name.str);
8215   }
8216 
8217   ha_statistic_increment(&SSV::ha_external_lock_count);
8218 
8219   MYSQL_TABLE_LOCK_WAIT(PSI_TABLE_EXTERNAL_LOCK, lock_type,
8220     { error= external_lock(thd, lock_type); })
8221 
8222   /*
8223     We cache the table flags if the locking succeeded. Otherwise, we
8224     keep them as they were when they were fetched in ha_open().
8225   */
8226 
8227   if (error == 0)
8228   {
8229     /*
8230       The lock type is needed by MRR when creating a clone of this handler
8231       object.
8232     */
8233     m_lock_type= lock_type;
8234     cached_table_flags= table_flags();
8235   }
8236 
8237   if (MYSQL_HANDLER_RDLOCK_DONE_ENABLED() && lock_type == F_RDLCK)
8238   {
8239     MYSQL_HANDLER_RDLOCK_DONE(error);
8240   }
8241   else if (MYSQL_HANDLER_WRLOCK_DONE_ENABLED() && lock_type == F_WRLCK)
8242   {
8243     MYSQL_HANDLER_WRLOCK_DONE(error);
8244   }
8245   else if (MYSQL_HANDLER_UNLOCK_DONE_ENABLED() && lock_type == F_UNLCK)
8246   {
8247     MYSQL_HANDLER_UNLOCK_DONE(error);
8248   }
8249   DBUG_RETURN(error);
8250 }
8251 
8252 
8253 /** @brief
8254   Check handler usage and reset state of file to after 'open'
8255 
8256   @note can be called regardless of it is locked or not.
8257 */
ha_reset()8258 int handler::ha_reset()
8259 {
8260   DBUG_ENTER("handler::ha_reset");
8261   /* Check that we have called all proper deallocation functions */
8262   assert((uchar*) table->def_read_set.bitmap +
8263          table->s->column_bitmap_size ==
8264          (uchar*) table->def_write_set.bitmap);
8265   assert(bitmap_is_set_all(&table->s->all_set));
8266   assert(table->key_read == 0);
8267   /* ensure that ha_index_end / ha_rnd_end has been called */
8268   assert(inited == NONE);
8269   /* Free cache used by filesort */
8270   free_io_cache(table);
8271   /* reset the bitmaps to point to defaults */
8272   table->default_column_bitmaps();
8273   /* Reset information about pushed engine conditions */
8274   pushed_cond= NULL;
8275   /* Reset information about pushed index conditions */
8276   cancel_pushed_idx_cond();
8277 
8278   const int retval= reset();
8279   DBUG_RETURN(retval);
8280 }
8281 
8282 
ha_write_row(uchar * buf)8283 int handler::ha_write_row(uchar *buf)
8284 {
8285   int error;
8286   Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
8287   assert(table_share->tmp_table != NO_TMP_TABLE ||
8288          m_lock_type == F_WRLCK);
8289 
8290   DBUG_ENTER("handler::ha_write_row");
8291   DBUG_EXECUTE_IF("inject_error_ha_write_row",
8292                   DBUG_RETURN(HA_ERR_INTERNAL_ERROR); );
8293   DBUG_EXECUTE_IF("simulate_storage_engine_out_of_memory",
8294                   DBUG_RETURN(HA_ERR_SE_OUT_OF_MEMORY); );
8295   MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str);
8296   mark_trx_read_write();
8297 
8298   DBUG_EXECUTE_IF("handler_crashed_table_on_usage",
8299                   my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
8300                   set_my_errno(HA_ERR_CRASHED);
8301                   DBUG_RETURN(HA_ERR_CRASHED););
8302 
8303   MYSQL_TABLE_IO_WAIT(PSI_TABLE_WRITE_ROW, MAX_KEY, error,
8304     { error= write_row(buf); })
8305 
8306   MYSQL_INSERT_ROW_DONE(error);
8307   if (unlikely(error))
8308     DBUG_RETURN(error);
8309 
8310   if (unlikely((error= binlog_log_row(table, 0, buf, log_func))))
8311     DBUG_RETURN(error); /* purecov: inspected */
8312 
8313   DEBUG_SYNC_C("ha_write_row_end");
8314   DBUG_RETURN(0);
8315 }
8316 
8317 
ha_update_row(const uchar * old_data,uchar * new_data)8318 int handler::ha_update_row(const uchar *old_data, uchar *new_data)
8319 {
8320   int error;
8321   assert(table_share->tmp_table != NO_TMP_TABLE ||
8322          m_lock_type == F_WRLCK);
8323   Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
8324 
8325   /*
8326     Some storage engines require that the new record is in record[0]
8327     (and the old record is in record[1]).
8328    */
8329   assert(new_data == table->record[0]);
8330   assert(old_data == table->record[1]);
8331 
8332   DBUG_ENTER("hanlder::ha_update_row");
8333   DBUG_EXECUTE_IF("inject_error_ha_update_row",
8334                   DBUG_RETURN(HA_ERR_INTERNAL_ERROR); );
8335 
8336   MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
8337   mark_trx_read_write();
8338 
8339   DBUG_EXECUTE_IF("handler_crashed_table_on_usage",
8340                   my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
8341                   set_my_errno(HA_ERR_CRASHED);
8342                   return(HA_ERR_CRASHED););
8343 
8344   MYSQL_TABLE_IO_WAIT(PSI_TABLE_UPDATE_ROW, active_index, error,
8345     { error= update_row(old_data, new_data);})
8346 
8347   MYSQL_UPDATE_ROW_DONE(error);
8348   if (unlikely(error))
8349     DBUG_RETURN(error);
8350   if (unlikely((error= binlog_log_row(table, old_data, new_data, log_func))))
8351     DBUG_RETURN(error);
8352 #ifdef WITH_WSREP
8353   THD* thd = table->in_use;
8354   if (WSREP(thd) && table->s->primary_key == MAX_KEY)
8355     {
8356       thd->wsrep_PA_safe= false;
8357     }
8358 #endif /* WITH_WSREP */
8359   DBUG_RETURN(0);
8360 }
8361 
ha_delete_row(const uchar * buf)8362 int handler::ha_delete_row(const uchar *buf)
8363 {
8364   int error;
8365   assert(table_share->tmp_table != NO_TMP_TABLE ||
8366          m_lock_type == F_WRLCK);
8367   Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
8368   /*
8369     Normally table->record[0] is used, but sometimes table->record[1] is used.
8370   */
8371   assert(buf == table->record[0] ||
8372          buf == table->record[1]);
8373   DBUG_EXECUTE_IF("inject_error_ha_delete_row",
8374                   return HA_ERR_INTERNAL_ERROR; );
8375 
8376   DBUG_EXECUTE_IF("handler_crashed_table_on_usage",
8377                   my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
8378                   set_my_errno(HA_ERR_CRASHED);
8379                   return(HA_ERR_CRASHED););
8380 
8381   MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
8382   mark_trx_read_write();
8383 
8384   MYSQL_TABLE_IO_WAIT(PSI_TABLE_DELETE_ROW, active_index, error,
8385     { error= delete_row(buf);})
8386 
8387   MYSQL_DELETE_ROW_DONE(error);
8388   if (unlikely(error))
8389     return error;
8390   if (unlikely((error= binlog_log_row(table, buf, 0, log_func))))
8391     return error;
8392 #ifdef WITH_WSREP
8393   THD* thd = table->in_use;
8394   if (WSREP(thd) && table->s->primary_key == MAX_KEY)
8395     {
8396       thd->wsrep_PA_safe= false;
8397     }
8398 #endif /* WITH_WSREP */
8399   return 0;
8400 }
8401 
8402 
8403 
8404 /** @brief
8405   use_hidden_primary_key() is called in case of an update/delete when
8406   (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
8407   but we don't have a primary key
8408 */
use_hidden_primary_key()8409 void handler::use_hidden_primary_key()
8410 {
8411   /* fallback to use all columns in the table to identify row */
8412   table->use_all_columns();
8413 }
8414 
8415 
8416 /**
8417   Get an initialized ha_share.
8418 
8419   @return Initialized ha_share
8420     @retval NULL    ha_share is not yet initialized.
8421     @retval != NULL previous initialized ha_share.
8422 
8423   @note
8424   If not a temp table, then LOCK_ha_data must be held.
8425 */
8426 
get_ha_share_ptr()8427 Handler_share *handler::get_ha_share_ptr()
8428 {
8429   DBUG_ENTER("handler::get_ha_share_ptr");
8430   assert(ha_share && table_share);
8431 
8432 #ifndef NDEBUG
8433   if (table_share->tmp_table == NO_TMP_TABLE)
8434     mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
8435 #endif
8436 
8437   DBUG_RETURN(*ha_share);
8438 }
8439 
8440 
8441 /**
8442   Set ha_share to be used by all instances of the same table/partition.
8443 
8444   @param ha_share    Handler_share to be shared.
8445 
8446   @note
8447   If not a temp table, then LOCK_ha_data must be held.
8448 */
8449 
set_ha_share_ptr(Handler_share * arg_ha_share)8450 void handler::set_ha_share_ptr(Handler_share *arg_ha_share)
8451 {
8452   DBUG_ENTER("handler::set_ha_share_ptr");
8453   assert(ha_share);
8454 #ifndef NDEBUG
8455   if (table_share->tmp_table == NO_TMP_TABLE)
8456     mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
8457 #endif
8458 
8459   *ha_share= arg_ha_share;
8460   DBUG_VOID_RETURN;
8461 }
8462 
8463 
8464 /**
8465   Take a lock for protecting shared handler data.
8466 */
8467 
lock_shared_ha_data()8468 void handler::lock_shared_ha_data()
8469 {
8470   assert(table_share);
8471   if (table_share->tmp_table == NO_TMP_TABLE)
8472     mysql_mutex_lock(&table_share->LOCK_ha_data);
8473 }
8474 
8475 
8476 /**
8477   Release lock for protecting ha_share.
8478 */
8479 
unlock_shared_ha_data()8480 void handler::unlock_shared_ha_data()
8481 {
8482   assert(table_share);
8483   if (table_share->tmp_table == NO_TMP_TABLE)
8484     mysql_mutex_unlock(&table_share->LOCK_ha_data);
8485 }
8486 #ifdef WITH_WSREP
8487 /**
8488   @details
8489   This function makes the storage engine to force the victim transaction
8490   to abort. Currently, only innodb has this functionality, but any SE
8491   implementing the wsrep API should provide this service to support
8492   multi-master operation.
8493 
8494   @param bf_thd       brute force THD asking for the abort
8495   @param victim_thd   victim THD to be aborted
8496 
8497   @return
8498     always 0
8499 */
8500 
ha_wsrep_abort_transaction(THD * bf_thd,THD * victim_thd,my_bool signal)8501 int ha_wsrep_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal)
8502 {
8503   DBUG_ENTER("ha_wsrep_abort_transaction");
8504   if (!WSREP(bf_thd) &&
8505       !(bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU &&
8506         bf_thd->wsrep_exec_mode == TOTAL_ORDER)) {
8507     DBUG_RETURN(0);
8508   }
8509 
8510   handlerton *hton= installed_htons[DB_TYPE_INNODB];
8511   if (hton && hton->wsrep_abort_transaction)
8512   {
8513     hton->wsrep_abort_transaction(hton, bf_thd, victim_thd, signal);
8514   }
8515   else
8516   {
8517     WSREP_WARN("cannot abort InnoDB transaction");
8518   }
8519 
8520   DBUG_RETURN(0);
8521 }
8522 
ha_wsrep_fake_trx_id(THD * thd)8523 void ha_wsrep_fake_trx_id(THD *thd)
8524 {
8525   DBUG_ENTER("ha_wsrep_fake_trx_id");
8526   if (!WSREP(thd))
8527   {
8528     DBUG_VOID_RETURN;
8529   }
8530 
8531   (void)wsrep_ws_handle_for_trx(&thd->wsrep_ws_handle, thd->query_id);
8532 
8533   DBUG_VOID_RETURN;
8534 }
8535 #endif /* WITH_WSREP */
8536 
8537 
8538 /**
8539   This structure is a helper structure for passing the length and pointer of
8540   blob space allocated by storage engine.
8541 */
8542 struct blob_len_ptr{
8543   uint length;  // length of the blob
8544   uchar *ptr;   // pointer of the value
8545 };
8546 
8547 
8548 /**
8549   Get the blob length and pointer of allocated space from the record buffer.
8550 
8551   During evaluating the blob virtual generated columns, the blob space will
8552   be allocated by server. In order to keep the blob data after the table is
8553   closed, we need write the data into a specified space allocated by storage
8554   engine. Here, we have to extract the space pointer and length from the
8555   record buffer.
8556   After we get the value of virtual generated columns, copy the data into
8557   the specified space and store it in the record buffer (@see copy_blob_data()).
8558 
8559   @param table                    the pointer of table
8560   @param fields                   bitmap of field index of evaluated
8561                                   generated column
8562   @param[out] blob_len_ptr_array  an array to record the length and pointer
8563                                   of allocated space by storage engine.
8564   @note The caller should provide the blob_len_ptr_array with a size of
8565         MAX_FIELDS.
8566 */
8567 
extract_blob_space_and_length_from_record_buff(const TABLE * table,const MY_BITMAP * const fields,blob_len_ptr * blob_len_ptr_array)8568 static void extract_blob_space_and_length_from_record_buff(const TABLE *table,
8569                                            const MY_BITMAP *const fields,
8570                                            blob_len_ptr *blob_len_ptr_array)
8571 {
8572   int num= 0;
8573   for (Field **vfield= table->vfield; *vfield; vfield++)
8574   {
8575     // Check if this field should be included
8576     if (bitmap_is_set(fields, (*vfield)->field_index) &&
8577         (*vfield)->is_virtual_gcol() && (*vfield)->type() == MYSQL_TYPE_BLOB)
8578     {
8579       blob_len_ptr_array[num].length= (*vfield)->data_length();
8580       // TODO: The following check is only for Innodb.
8581       assert(blob_len_ptr_array[num].length == 255 ||
8582              blob_len_ptr_array[num].length == 768 ||
8583              blob_len_ptr_array[num].length == 3073);
8584 
8585       uchar *ptr;
8586       (*vfield)->get_ptr(&ptr);
8587       blob_len_ptr_array[num].ptr= ptr;
8588 
8589       // Let server allocate the space for BLOB virtual generated columns
8590       (*vfield)->reset();
8591 
8592       num++;
8593       assert(num <= MAX_FIELDS);
8594     }
8595   }
8596 }
8597 
8598 
8599 /**
8600   Copy the value of BLOB virtual generated columns into the space allocated
8601   by storage engine.
8602 
8603   This is because the table is closed after evaluating the value. In order to
8604   keep the BLOB value after the table is closed, we have to copy the value into
8605   the place where storage engine prepares for.
8606 
8607   @param table              pointer of the table to be operated on
8608   @param fields             bitmap of field index of evaluated generated column
8609   @param blob_len_ptr_array array of length and pointer of allocated space by
8610                             storage engine.
8611 */
8612 
copy_blob_data(const TABLE * table,const MY_BITMAP * const fields,blob_len_ptr * blob_len_ptr_array)8613 static void copy_blob_data(const TABLE *table,
8614                            const MY_BITMAP *const fields,
8615                            blob_len_ptr *blob_len_ptr_array)
8616 {
8617   uint  num= 0;
8618   for (Field **vfield= table->vfield; *vfield; vfield++)
8619   {
8620     // Check if this field should be included
8621     if (bitmap_is_set(fields, (*vfield)->field_index) &&
8622         (*vfield)->is_virtual_gcol() && (*vfield)->type() == MYSQL_TYPE_BLOB)
8623     {
8624       assert(blob_len_ptr_array[num].length > 0);
8625       assert(blob_len_ptr_array[num].ptr != NULL);
8626 
8627       /*
8628         Only copy as much of the blob as the storage engine has
8629         allocated space for. This is sufficient since the only use of the
8630         blob in the storage engine is for using a prefix of it in a
8631         secondary index.
8632       */
8633       uint length= (*vfield)->data_length();
8634       const uint alloc_len= blob_len_ptr_array[num].length;
8635       length= length > alloc_len ? alloc_len : length;
8636 
8637       uchar *ptr;
8638       (*vfield)->get_ptr(&ptr);
8639       memcpy(blob_len_ptr_array[num].ptr, ptr, length);
8640       (down_cast<Field_blob *>(*vfield))->store_in_allocated_space(
8641                             pointer_cast<char *>(blob_len_ptr_array[num].ptr),
8642                             length);
8643       num++;
8644       assert(num <= MAX_FIELDS);
8645     }
8646   }
8647 }
8648 
8649 
8650 /*
8651   Evaluate generated column's value. This is an internal helper reserved for
8652   handler::my_eval_gcolumn_expr().
8653 
8654   @param thd        pointer of THD
8655   @param table      The pointer of table where evaluted generated
8656                     columns are in
8657   @param fields     bitmap of field index of evaluated generated column
8658   @param[in,out] record record buff of base columns generated column depends.
8659                         After calling this function, it will be used to return
8660                         the value of generated column.
8661   @param in_purge   whehter the function is called by purge thread
8662 
8663   @return true in case of error, false otherwise.
8664 */
8665 
my_eval_gcolumn_expr_helper(THD * thd,TABLE * table,const MY_BITMAP * const fields,uchar * record,bool in_purge)8666 static bool my_eval_gcolumn_expr_helper(THD *thd, TABLE *table,
8667                                         const MY_BITMAP *const fields,
8668                                         uchar *record,
8669                                         bool in_purge)
8670 {
8671   DBUG_ENTER("my_eval_gcolumn_expr_helper");
8672   assert(table && table->vfield);
8673   assert(!thd->is_error());
8674 
8675   uchar *old_buf= table->record[0];
8676   repoint_field_to_record(table, old_buf, record);
8677 
8678   blob_len_ptr blob_len_ptr_array[MAX_FIELDS];
8679 
8680   /*
8681     If it's purge thread, we need get the space allocated by storage engine
8682     for blob.
8683   */
8684   if (in_purge)
8685     extract_blob_space_and_length_from_record_buff(table, fields,
8686                                                    blob_len_ptr_array);
8687 
8688   bool res= false;
8689   MY_BITMAP fields_to_evaluate;
8690   my_bitmap_map bitbuf[bitmap_buffer_size(MAX_FIELDS) / sizeof(my_bitmap_map)];
8691   bitmap_init(&fields_to_evaluate, bitbuf, table->s->fields, 0);
8692   bitmap_set_all(&fields_to_evaluate);
8693   bitmap_intersect(&fields_to_evaluate, fields);
8694   /*
8695     In addition to evaluating the value for the columns requested by
8696     the caller we also need to evaluate any virtual columns that these
8697     depend on.
8698     This loop goes through the columns that should be evaluated and
8699     adds all the base columns. If the base column is virtual, it has
8700     to be evaluated.
8701   */
8702   for (Field **vfield_ptr= table->vfield; *vfield_ptr; vfield_ptr++)
8703   {
8704     Field *field= *vfield_ptr;
8705     // Validate that the field number is less than the bit map size
8706     assert(field->field_index < fields->n_bits);
8707 
8708     if (bitmap_is_set(fields, field->field_index))
8709       bitmap_union(&fields_to_evaluate, &field->gcol_info->base_columns_map);
8710   }
8711 
8712    /*
8713      Evaluate all requested columns and all base columns these depends
8714      on that are virtual.
8715 
8716      This function is called by the storage engine, which may request to
8717      evaluate more generated columns than read_set/write_set says.
8718      For example, InnoDB's row_sel_sec_rec_is_for_clust_rec() reads the full
8719      record from the clustered index and asks us to compute generated columns
8720      that match key fields in the used secondary index. So we trust that the
8721      engine has filled all base columns necessary to requested computations,
8722      and we ignore read_set/write_set.
8723   */
8724 
8725   my_bitmap_map *old_maps[2];
8726   dbug_tmp_use_all_columns(table, old_maps,
8727                            table->read_set, table->write_set);
8728 
8729   for (Field **vfield_ptr= table->vfield; *vfield_ptr; vfield_ptr++)
8730   {
8731     Field *field= *vfield_ptr;
8732 
8733     // Check if we should evaluate this field
8734     if (bitmap_is_set(&fields_to_evaluate, field->field_index) &&
8735         field->is_virtual_gcol())
8736     {
8737       assert(field->gcol_info && field->gcol_info->expr_item->fixed);
8738 
8739       const type_conversion_status save_in_field_status=
8740         field->gcol_info->expr_item->save_in_field(field, 0);
8741       assert(!thd->is_error() || save_in_field_status != TYPE_OK);
8742 
8743       /*
8744         save_in_field() may return non-zero even if there was no
8745         error. This happens if a warning is raised, such as an
8746         out-of-range warning when converting the result to the target
8747         type of the virtual column. We should stop only if the
8748         non-zero return value was caused by an actual error.
8749       */
8750       if (save_in_field_status != TYPE_OK && thd->is_error())
8751       {
8752         res= true;
8753         break;
8754       }
8755     }
8756   }
8757 
8758   dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_maps);
8759 
8760   /*
8761     If it's a purge thread, we need copy the blob data into specified place
8762     allocated by storage engine so that the blob data still can be accessed
8763     after table is closed.
8764   */
8765   if (in_purge)
8766     copy_blob_data(table, fields, blob_len_ptr_array);
8767 
8768   repoint_field_to_record(table, record, old_buf);
8769   DBUG_RETURN(res);
8770 }
8771 
8772 
8773 /**
8774    Callback to allow InnoDB to prepare a template for generated
8775    column processing. This function will open the table without
8776    opening in the engine and call the provided function with
8777    the TABLE object made. The function will then close the TABLE.
8778 
8779    @param thd            Thread handle
8780    @param db_name        Name of database containing the table
8781    @param table_name     Name of table to open
8782    @param myc            InnoDB function to call for processing TABLE
8783    @param ib_table       Argument for InnoDB function
8784 
8785    @return true in case of error, false otherwise.
8786 */
8787 
my_prepare_gcolumn_template(THD * thd,const char * db_name,const char * table_name,my_gcolumn_template_callback_t myc,void * ib_table)8788 bool handler::my_prepare_gcolumn_template(THD *thd,
8789                                           const char *db_name,
8790                                           const char *table_name,
8791                                           my_gcolumn_template_callback_t myc,
8792                                           void* ib_table)
8793 {
8794   char path[FN_REFLEN + 1];
8795   bool was_truncated;
8796   build_table_filename(path, sizeof(path) - 1 - reg_ext_length,
8797                        db_name, table_name, "", 0, &was_truncated);
8798   assert(!was_truncated);
8799   lex_start(thd);
8800   bool rc= true;
8801 
8802   // Note! The last argument to open_table_uncached() must be false,
8803   // since the table already exists in the TDC. Allowing the table to
8804   // be opened in the SE in this case is dangerous as the two shares
8805   // could get conflicting SE private data.
8806   TABLE *table= open_table_uncached(thd, path, db_name, table_name,
8807                                     false, false);
8808   if (table)
8809   {
8810     myc(table, ib_table);
8811     intern_close_table(table);
8812     rc= false;
8813   }
8814   lex_end(thd->lex);
8815   return rc;
8816 }
8817 
8818 
8819 /**
8820    Callback for generated columns processing. Will open the table, in the
8821    server *only*, and call my_eval_gcolumn_expr_helper() to do the actual
8822    processing. This function is a variant of the other
8823    handler::my_eval_gcolumn_expr() but is intended for use when no TABLE
8824    object already exists - e.g. from purge threads.
8825 
8826    Note! The call to open_table_uncached() must be made with the last
8827    argument (open_in_engine) set to false. Failing to do so will cause
8828    deadlocks and incorrect behavior.
8829 
8830    @param thd             Thread handle
8831    @param db_name         Database containing the table to open
8832    @param table_name      Name of table to open
8833    @param fields          Bitmap of field index of evaluated generated column
8834    @param record          Record buffer
8835 
8836    @return true in case of error, false otherwise.
8837 */
8838 
my_eval_gcolumn_expr_with_open(THD * thd,const char * db_name,const char * table_name,const MY_BITMAP * const fields,uchar * record)8839 bool handler::my_eval_gcolumn_expr_with_open(THD *thd,
8840                                              const char *db_name,
8841                                              const char *table_name,
8842                                              const MY_BITMAP *const fields,
8843                                              uchar *record)
8844 {
8845   bool retval= true;
8846   lex_start(thd);
8847 
8848   char path[FN_REFLEN + 1];
8849   bool was_truncated;
8850   build_table_filename(path, sizeof(path) - 1 - reg_ext_length,
8851                        db_name, table_name, "", 0, &was_truncated);
8852   assert(!was_truncated);
8853 
8854   TABLE *table= open_table_uncached(thd, path, db_name, table_name,
8855                                     false, false);
8856   if (table)
8857   {
8858     retval= my_eval_gcolumn_expr_helper(thd, table, fields, record, true);
8859     intern_close_table(table);
8860   }
8861 
8862   lex_end(thd->lex);
8863   return retval;
8864 }
8865 
8866 
8867 /**
8868   Evaluate generated Column's value. If the engine has to write an index entry
8869   to its UNDO log (in a DELETE or UPDATE), and the index is on a virtual
8870   generated column, engine needs to calculate the column's value. This variant
8871   of handler::my_eval_gcolumn_expr() is used by client threads which have a
8872   TABLE.
8873 
8874   @param thd        Thread handle
8875   @param table      mysql table object
8876   @param fields     bitmap of field index of evaluated
8877 	            generated column
8878   @param record     buff of base columns generated column depends.
8879                     After calling this function, it will be used to
8880                     return the value of generated column.
8881 
8882   @retval true in case of error
8883   @retval false on success.
8884 */
8885 
my_eval_gcolumn_expr(THD * thd,TABLE * table,const MY_BITMAP * const fields,uchar * record)8886 bool handler::my_eval_gcolumn_expr(THD *thd, TABLE *table,
8887 				   const MY_BITMAP *const fields,
8888                                    uchar *record)
8889 {
8890   DBUG_ENTER("my_eval_gcolumn_expr");
8891 
8892   const bool res=
8893      my_eval_gcolumn_expr_helper(thd, table, fields, record, false);
8894   DBUG_RETURN(res);
8895 }
8896 
8897 
8898 /**
8899   Auxiliary structure for passing information to notify_*_helper()
8900   functions.
8901 */
8902 
8903 struct HTON_NOTIFY_PARAMS
8904 {
HTON_NOTIFY_PARAMSHTON_NOTIFY_PARAMS8905   HTON_NOTIFY_PARAMS(const MDL_key *mdl_key,
8906                      ha_notification_type mdl_type)
8907     : key(mdl_key), notification_type(mdl_type),
8908       some_htons_were_notified(false),
8909       victimized(false)
8910   {}
8911 
8912   const MDL_key *key;
8913   const ha_notification_type notification_type;
8914   bool some_htons_were_notified;
8915   bool victimized;
8916 };
8917 
8918 
8919 static my_bool
notify_exclusive_mdl_helper(THD * thd,plugin_ref plugin,void * arg)8920 notify_exclusive_mdl_helper(THD *thd, plugin_ref plugin, void *arg)
8921 {
8922   handlerton *hton= plugin_data<handlerton*>(plugin);
8923   if (hton->state == SHOW_OPTION_YES && hton->notify_exclusive_mdl)
8924   {
8925     HTON_NOTIFY_PARAMS *params= reinterpret_cast<HTON_NOTIFY_PARAMS*>(arg);
8926 
8927     if (hton->notify_exclusive_mdl(thd, params->key,
8928                                    params->notification_type,
8929                                    &params->victimized))
8930     {
8931       // Ignore failures from post event notification.
8932       if (params->notification_type == HA_NOTIFY_PRE_EVENT)
8933         return TRUE;
8934     }
8935     else
8936       params->some_htons_were_notified= true;
8937   }
8938   return FALSE;
8939 }
8940 
8941 
8942 /**
8943   Notify/get permission from all interested storage engines before
8944   acquisition or after release of exclusive metadata lock on object
8945   represented by key.
8946 
8947   @param thd                Thread context.
8948   @param mdl_key            MDL key identifying object on which exclusive
8949                             lock is to be acquired/was released.
8950   @param notification_type  Indicates whether this is pre-acquire or
8951                             post-release notification.
8952   @param victimized        'true' if locking failed as we were selected
8953                             as a victim in order to avoid possible deadlocks.
8954 
8955   @note @see handlerton::notify_exclusive_mdl for details about
8956         calling convention and error reporting.
8957 
8958   @return False - if notification was successful/lock can be acquired,
8959           True - if it has failed/lock should not be acquired.
8960 */
8961 
ha_notify_exclusive_mdl(THD * thd,const MDL_key * mdl_key,ha_notification_type notification_type,bool * victimized)8962 bool ha_notify_exclusive_mdl(THD *thd, const MDL_key *mdl_key,
8963                              ha_notification_type notification_type,
8964                              bool *victimized)
8965 {
8966   HTON_NOTIFY_PARAMS params(mdl_key, notification_type);
8967   *victimized = false;
8968   if (plugin_foreach(thd, notify_exclusive_mdl_helper,
8969                      MYSQL_STORAGE_ENGINE_PLUGIN, &params))
8970   {
8971     *victimized = params.victimized;
8972     /*
8973       If some SE hasn't given its permission to acquire lock and some SEs
8974       has given their permissions, we need to notify the latter group about
8975       failed lock acquisition. We do this by calling post-release notification
8976       for all interested SEs unconditionally.
8977     */
8978     if (notification_type == HA_NOTIFY_PRE_EVENT &&
8979         params.some_htons_were_notified)
8980     {
8981       HTON_NOTIFY_PARAMS rollback_params(mdl_key, HA_NOTIFY_POST_EVENT);
8982       (void) plugin_foreach(thd, notify_exclusive_mdl_helper,
8983                             MYSQL_STORAGE_ENGINE_PLUGIN, &rollback_params);
8984     }
8985     return true;
8986   }
8987   return false;
8988 }
8989 
8990 
8991 static my_bool
notify_alter_table_helper(THD * thd,plugin_ref plugin,void * arg)8992 notify_alter_table_helper(THD *thd, plugin_ref plugin, void *arg)
8993 {
8994   handlerton *hton= plugin_data<handlerton*>(plugin);
8995   if (hton->state == SHOW_OPTION_YES && hton->notify_alter_table)
8996   {
8997     HTON_NOTIFY_PARAMS *params= reinterpret_cast<HTON_NOTIFY_PARAMS*>(arg);
8998 
8999     if (hton->notify_alter_table(thd, params->key, params->notification_type))
9000     {
9001       // Ignore failures from post event notification.
9002       if (params->notification_type == HA_NOTIFY_PRE_EVENT)
9003         return TRUE;
9004     }
9005     else
9006       params->some_htons_were_notified= true;
9007   }
9008   return FALSE;
9009 }
9010 
9011 
9012 /**
9013   Notify/get permission from all interested storage engines before
9014   or after executed ALTER TABLE on the table identified by key.
9015 
9016   @param thd                Thread context.
9017   @param mdl_key            MDL key identifying table.
9018   @param notification_type  Indicates whether this is pre-ALTER or
9019                             post-ALTER notification.
9020 
9021   @note @see handlerton::notify_alter_table for rationale,
9022         details about calling convention and error reporting.
9023 
9024   @return False - if notification was successful/ALTER TABLE can
9025                   proceed.
9026           True -  if it has failed/ALTER TABLE should fail.
9027 */
9028 
ha_notify_alter_table(THD * thd,const MDL_key * mdl_key,ha_notification_type notification_type)9029 bool ha_notify_alter_table(THD *thd, const MDL_key *mdl_key,
9030                            ha_notification_type notification_type)
9031 {
9032   HTON_NOTIFY_PARAMS params(mdl_key, notification_type);
9033 
9034   if (plugin_foreach(thd, notify_alter_table_helper,
9035                      MYSQL_STORAGE_ENGINE_PLUGIN, &params))
9036   {
9037     /*
9038       If some SE hasn't given its permission to do ALTER TABLE and some SEs
9039       has given their permissions, we need to notify the latter group about
9040       failed attemopt. We do this by calling post-ALTER TABLE notification
9041       for all interested SEs unconditionally.
9042     */
9043     if (notification_type == HA_NOTIFY_PRE_EVENT &&
9044         params.some_htons_were_notified)
9045     {
9046       HTON_NOTIFY_PARAMS rollback_params(mdl_key, HA_NOTIFY_POST_EVENT);
9047       (void) plugin_foreach(thd, notify_alter_table_helper,
9048                             MYSQL_STORAGE_ENGINE_PLUGIN, &rollback_params);
9049     }
9050     return true;
9051   }
9052   return false;
9053 }
9054 
9055 /**
9056   Set the transaction isolation level for the next transaction and update
9057   session tracker information about the transaction isolation level.
9058 
9059   @param thd           THD session setting the tx_isolation.
9060   @param tx_isolation  The isolation level to be set.
9061   @param one_shot      True if the isolation level should be restored to
9062                        session default after finishing the transaction.
9063 */
set_tx_isolation(THD * thd,enum_tx_isolation tx_isolation,bool one_shot)9064 bool set_tx_isolation(THD *thd,
9065                       enum_tx_isolation tx_isolation,
9066                       bool one_shot)
9067 {
9068   Transaction_state_tracker *tst= NULL;
9069 
9070   if (thd->variables.session_track_transaction_info > TX_TRACK_NONE)
9071     tst= (Transaction_state_tracker *)
9072            thd->session_tracker.get_tracker(TRANSACTION_INFO_TRACKER);
9073 
9074   thd->tx_isolation= tx_isolation;
9075 
9076   if (one_shot)
9077   {
9078     assert(!thd->in_active_multi_stmt_transaction());
9079     assert(!thd->in_sub_stmt);
9080     enum enum_tx_isol_level l;
9081     switch (thd->tx_isolation) {
9082     case ISO_READ_UNCOMMITTED:
9083       l=  TX_ISOL_UNCOMMITTED;
9084       break;
9085     case ISO_READ_COMMITTED:
9086       l=  TX_ISOL_COMMITTED;
9087       break;
9088     case ISO_REPEATABLE_READ:
9089       l= TX_ISOL_REPEATABLE;
9090       break;
9091     case ISO_SERIALIZABLE:
9092       l= TX_ISOL_SERIALIZABLE;
9093       break;
9094     default:
9095       assert(0);
9096       return true;
9097     }
9098     if (tst)
9099       tst->set_isol_level(thd, l);
9100   }
9101   else if (tst)
9102   {
9103     tst->set_isol_level(thd, TX_ISOL_INHERIT);
9104   }
9105   return false;
9106 }
9107 
9108 
9109 /**
9110   Checks if the file name is reserved word used by SE by invoking
9111   the handlerton method.
9112 
9113   @param  unused1       thread handler which is unused.
9114   @param  plugin        SE plugin.
9115   @param  name          Database name.
9116 
9117   @retval true          If the name is reserved word.
9118   @retval false         If the name is not reserved word.
9119 */
is_reserved_db_name_handlerton(THD * unused1,plugin_ref plugin,void * name)9120 static my_bool is_reserved_db_name_handlerton(THD *unused1, plugin_ref plugin,
9121                                               void *name)
9122 {
9123   handlerton *hton= plugin_data<handlerton*>(plugin);
9124   if (hton->state == SHOW_OPTION_YES && hton->is_reserved_db_name)
9125     return (hton->is_reserved_db_name(hton, (const char *)name));
9126   return false;
9127 }
9128 
9129 
9130 /**
9131    Check if the file name is reserved word used by SE.
9132 
9133    @param  name    Database name.
9134 
9135    @retval true    If the name is a reserved word.
9136    @retval false   If the name is not a reserved word.
9137 */
ha_check_reserved_db_name(const char * name)9138 bool ha_check_reserved_db_name(const char* name)
9139 {
9140   return (plugin_foreach(NULL, is_reserved_db_name_handlerton,
9141                          MYSQL_STORAGE_ENGINE_PLUGIN, (char *)name));
9142 }
9143