1 /*****************************************************************************
2
3 Copyright (c) 2000, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2008, 2009 Google Inc.
5 Copyright (c) 2009, Percona Inc.
6 Copyright (c) 2012, Facebook Inc.
7 Copyright (c) 2013, 2022, MariaDB Corporation.
8
9 Portions of this file contain modifications contributed and copyrighted by
10 Google, Inc. Those modifications are gratefully acknowledged and are described
11 briefly in the InnoDB documentation. The contributions by Google are
12 incorporated with their permission, and subject to the conditions contained in
13 the file COPYING.Google.
14
15 Portions of this file contain modifications contributed and copyrighted
16 by Percona Inc.. Those modifications are
17 gratefully acknowledged and are described briefly in the InnoDB
18 documentation. The contributions by Percona Inc. are incorporated with
19 their permission, and subject to the conditions contained in the file
20 COPYING.Percona.
21
22 This program is free software; you can redistribute it and/or modify it under
23 the terms of the GNU General Public License as published by the Free Software
24 Foundation; version 2 of the License.
25
26 This program is distributed in the hope that it will be useful, but WITHOUT
27 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
28 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
29
30 You should have received a copy of the GNU General Public License along with
31 this program; if not, write to the Free Software Foundation, Inc.,
32 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
33
34 *****************************************************************************/
35
36 /** @file ha_innodb.cc */
37
38 #include "univ.i"
39
40 /* Include necessary SQL headers */
41 #include "ha_prototypes.h"
42 #include <debug_sync.h>
43 #include <gstream.h>
44 #include <log.h>
45 #include <mysys_err.h>
46 #include <innodb_priv.h>
47 #include <strfunc.h>
48 #include <sql_acl.h>
49 #include <sql_class.h>
50 #include <sql_show.h>
51 #include <sql_table.h>
52 #include <table_cache.h>
53 #include <my_check_opt.h>
54 #include <my_bitmap.h>
55 #include <mysql/service_thd_alloc.h>
56 #include <mysql/service_thd_wait.h>
57
58 // MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system;
59 // MYSQL_PLUGIN_IMPORT extern char mysql_unpacked_real_data_home[];
60
61 #include <my_service_manager.h>
62 #include <key.h>
63
64 /* Include necessary InnoDB headers */
65 #include "btr0btr.h"
66 #include "btr0cur.h"
67 #include "btr0bulk.h"
68 #include "btr0sea.h"
69 #include "buf0dblwr.h"
70 #include "buf0dump.h"
71 #include "buf0flu.h"
72 #include "buf0lru.h"
73 #include "dict0boot.h"
74 #include "dict0load.h"
75 #include "btr0defragment.h"
76 #include "dict0crea.h"
77 #include "dict0dict.h"
78 #include "dict0stats.h"
79 #include "dict0stats_bg.h"
80 #include "fil0fil.h"
81 #include "fsp0fsp.h"
82 #include "fts0fts.h"
83 #include "fts0plugin.h"
84 #include "fts0priv.h"
85 #include "fts0types.h"
86 #include "ibuf0ibuf.h"
87 #include "lock0lock.h"
88 #include "log0crypt.h"
89 #include "mtr0mtr.h"
90 #include "os0file.h"
91 #include "page0zip.h"
92 #include "pars0pars.h"
93 #include "rem0types.h"
94 #include "row0import.h"
95 #include "row0ins.h"
96 #include "row0merge.h"
97 #include "row0mysql.h"
98 #include "row0quiesce.h"
99 #include "row0sel.h"
100 #include "row0trunc.h"
101 #include "row0upd.h"
102 #include "fil0crypt.h"
103 #include "srv0mon.h"
104 #include "srv0srv.h"
105 #include "srv0start.h"
106 #include "rem0rec.h"
107 #ifdef UNIV_DEBUG
108 #include "trx0purge.h"
109 #endif /* UNIV_DEBUG */
110 #include "trx0roll.h"
111 #include "trx0rseg.h"
112 #include "trx0trx.h"
113 #include "fil0pagecompress.h"
114 #include "ut0mem.h"
115 #include "row0ext.h"
116
117 #define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X))
118
119 extern "C" void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
120 unsigned long long thd_get_query_id(const MYSQL_THD thd);
121 void thd_clear_error(MYSQL_THD thd);
122
123 TABLE *find_fk_open_table(THD *thd, const char *db, size_t db_len,
124 const char *table, size_t table_len);
125 MYSQL_THD create_thd();
126 void destroy_thd(MYSQL_THD thd);
127 void reset_thd(MYSQL_THD thd);
128 TABLE *open_purge_table(THD *thd, const char *db, size_t dblen,
129 const char *tb, size_t tblen);
130 void close_thread_tables(THD* thd);
131
132 #ifdef MYSQL_DYNAMIC_PLUGIN
133 #define tc_size 400
134 #define tdc_size 400
135 #endif
136
137 #include "ha_innodb.h"
138 #include "i_s.h"
139 #include "sync0sync.h"
140
141 #include <string>
142 #include <sstream>
143
144 #include <mysql/plugin.h>
145 #include <mysql/service_wsrep.h>
146
147 #ifdef WITH_WSREP
148 #include "dict0priv.h"
149 #include <mysql/service_md5.h>
150 #include "wsrep_sst.h"
151
152 static inline wsrep_ws_handle_t*
wsrep_ws_handle(THD * thd,const trx_t * trx)153 wsrep_ws_handle(THD* thd, const trx_t* trx) {
154 return wsrep_ws_handle_for_trx(wsrep_thd_ws_handle(thd),
155 (wsrep_trx_id_t)trx->id);
156 }
157
158 extern void wsrep_cleanup_transaction(THD *thd);
159 static void wsrep_abort_transaction(handlerton*, THD *, THD *, my_bool);
160 static void wsrep_fake_trx_id(handlerton* hton, THD *thd);
161 static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid);
162 static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid);
163 #endif /* WITH_WSREP */
164
165 /** to force correct commit order in binlog */
166 static ulong commit_threads = 0;
167 static mysql_cond_t commit_cond;
168 static mysql_mutex_t commit_cond_m;
169 static mysql_mutex_t pending_checkpoint_mutex;
170
171 #define INSIDE_HA_INNOBASE_CC
172
173 #define EQ_CURRENT_THD(thd) ((thd) == current_thd)
174
175 struct handlerton* innodb_hton_ptr;
176
177 static const long AUTOINC_OLD_STYLE_LOCKING = 0;
178 static const long AUTOINC_NEW_STYLE_LOCKING = 1;
179 static const long AUTOINC_NO_LOCKING = 2;
180
181 static ulong innobase_open_files;
182 static long innobase_autoinc_lock_mode;
183 static ulong innobase_commit_concurrency;
184
185 static ulonglong innobase_buffer_pool_size;
186
187 /** Percentage of the buffer pool to reserve for 'old' blocks.
188 Connected to buf_LRU_old_ratio. */
189 static uint innobase_old_blocks_pct;
190
191 static char* innobase_data_file_path;
192 static char* innobase_temp_data_file_path;
193
194 /* The default values for the following char* start-up parameters
195 are determined in innodb_init_params(). */
196
197 static char* innobase_data_home_dir;
198 static char* innobase_enable_monitor_counter;
199 static char* innobase_disable_monitor_counter;
200 static char* innobase_reset_monitor_counter;
201 static char* innobase_reset_all_monitor_counter;
202
203 static ulong innodb_flush_method;
204
205 /** Deprecated; no effect other than issuing a deprecation warning. */
206 static char* innodb_file_format;
207 /** Deprecated; no effect other than issuing a deprecation warning. */
208 static char* innodb_large_prefix;
209
210 /* This variable can be set in the server configure file, specifying
211 stopword table to be used */
212 static char* innobase_server_stopword_table;
213
214 static my_bool innobase_use_checksums;
215 static my_bool innobase_locks_unsafe_for_binlog;
216 static my_bool innobase_rollback_on_timeout;
217 static my_bool innobase_create_status_file;
218 my_bool innobase_stats_on_metadata;
219 static my_bool innodb_optimize_fulltext_only;
220
221 static char* innodb_version_str = (char*) INNODB_VERSION_STR;
222
223 extern uint srv_fil_crypt_rotate_key_age;
224 extern uint srv_n_fil_crypt_iops;
225
226 extern my_bool srv_immediate_scrub_data_uncompressed;
227 extern my_bool srv_background_scrub_data_uncompressed;
228 extern my_bool srv_background_scrub_data_compressed;
229 extern uint srv_background_scrub_data_interval;
230 extern uint srv_background_scrub_data_check_interval;
231 #ifdef UNIV_DEBUG
232 my_bool innodb_evict_tables_on_commit_debug;
233 extern my_bool srv_scrub_force_testing;
234 #endif
235
236 /** File format constraint for ALTER TABLE */
237 ulong innodb_instant_alter_column_allowed;
238
239 /** Note we cannot use rec_format_enum because we do not allow
240 COMPRESSED row format for innodb_default_row_format option. */
241 enum default_row_format_enum {
242 DEFAULT_ROW_FORMAT_REDUNDANT = 0,
243 DEFAULT_ROW_FORMAT_COMPACT = 1,
244 DEFAULT_ROW_FORMAT_DYNAMIC = 2,
245 };
246
247 /** A dummy variable */
248 static uint innodb_max_purge_lag_wait;
249
250 /** Wait for trx_sys_t::rseg_history_len to be below a limit. */
innodb_max_purge_lag_wait_update(THD * thd,st_mysql_sys_var *,void *,const void * limit)251 static void innodb_max_purge_lag_wait_update(THD *thd, st_mysql_sys_var *,
252 void *, const void *limit)
253 {
254 const uint l= *static_cast<const uint*>(limit);
255 if (trx_sys.history_size() <= l)
256 return;
257 mysql_mutex_unlock(&LOCK_global_system_variables);
258 while (trx_sys.history_size() > l)
259 {
260 if (thd_kill_level(thd))
261 break;
262 srv_wake_purge_thread_if_not_active();
263 os_thread_sleep(100000);
264 }
265 mysql_mutex_lock(&LOCK_global_system_variables);
266 }
267
268 static
set_my_errno(int err)269 void set_my_errno(int err)
270 {
271 errno = err;
272 }
273
274 /** Checks whether the file name belongs to a partition of a table.
275 @param[in] file_name file name
276 @return pointer to the end of the table name part of the file name, or NULL */
277 static
278 char*
is_partition(char * file_name)279 is_partition(
280 /*=========*/
281 char* file_name)
282 {
283 /* We look for pattern #P# to see if the table is partitioned
284 MariaDB table. */
285 return strstr(file_name, table_name_t::part_suffix);
286 }
287
288 /** Signal to shut down InnoDB (NULL if shutdown was signaled, or if
289 running in innodb_read_only mode, srv_read_only_mode) */
290 st_my_thread_var *srv_running;
291 /** Service thread that waits for the server shutdown and stops purge threads.
292 Purge workers have THDs that are needed to calculate virtual columns.
293 This THDs must be destroyed rather early in the server shutdown sequence.
294 This service thread creates a THD and idly waits for it to get a signal to
295 die. Then it notifies all purge workers to shutdown.
296 */
297 static pthread_t thd_destructor_thread;
298
299 pthread_handler_t
thd_destructor_proxy(void *)300 thd_destructor_proxy(void *)
301 {
302 mysql_mutex_t thd_destructor_mutex;
303 mysql_cond_t thd_destructor_cond;
304
305 my_thread_init();
306 mysql_mutex_init(PSI_NOT_INSTRUMENTED, &thd_destructor_mutex, 0);
307 mysql_cond_init(PSI_NOT_INSTRUMENTED, &thd_destructor_cond, 0);
308
309 st_my_thread_var *myvar= _my_thread_var();
310 myvar->current_mutex = &thd_destructor_mutex;
311 myvar->current_cond = &thd_destructor_cond;
312
313 THD *thd= create_thd();
314 thd_proc_info(thd, "InnoDB shutdown handler");
315
316
317 mysql_mutex_lock(&thd_destructor_mutex);
318 my_atomic_storeptr_explicit(reinterpret_cast<void**>(&srv_running),
319 myvar,
320 MY_MEMORY_ORDER_RELAXED);
321 /* wait until the server wakes the THD to abort and die */
322 while (!srv_running->abort)
323 mysql_cond_wait(&thd_destructor_cond, &thd_destructor_mutex);
324 mysql_mutex_unlock(&thd_destructor_mutex);
325 my_atomic_storeptr_explicit(reinterpret_cast<void**>(&srv_running),
326 NULL,
327 MY_MEMORY_ORDER_RELAXED);
328
329 while (srv_fast_shutdown == 0 &&
330 (trx_sys.any_active_transactions() ||
331 (uint)thread_count > srv_n_purge_threads + 1)) {
332 thd_proc_info(thd, "InnoDB slow shutdown wait");
333 os_thread_sleep(1000);
334 }
335
336 /* Some background threads might generate undo pages that will
337 need to be purged, so they have to be shut down before purge
338 threads if slow shutdown is requested. */
339 srv_shutdown_bg_undo_sources();
340 srv_purge_shutdown();
341
342 destroy_thd(thd);
343 mysql_cond_destroy(&thd_destructor_cond);
344 mysql_mutex_destroy(&thd_destructor_mutex);
345 my_thread_end();
346 return 0;
347 }
348
349 /** Return the InnoDB ROW_FORMAT enum value
350 @param[in] row_format row_format from "innodb_default_row_format"
351 @return InnoDB ROW_FORMAT value from rec_format_t enum. */
352 static
353 rec_format_t
get_row_format(ulong row_format)354 get_row_format(
355 ulong row_format)
356 {
357 switch(row_format) {
358 case DEFAULT_ROW_FORMAT_REDUNDANT:
359 return(REC_FORMAT_REDUNDANT);
360 case DEFAULT_ROW_FORMAT_COMPACT:
361 return(REC_FORMAT_COMPACT);
362 case DEFAULT_ROW_FORMAT_DYNAMIC:
363 return(REC_FORMAT_DYNAMIC);
364 default:
365 ut_ad(0);
366 return(REC_FORMAT_DYNAMIC);
367 }
368 }
369
370 static ulong innodb_default_row_format = DEFAULT_ROW_FORMAT_DYNAMIC;
371
372 /** Possible values for system variable "innodb_stats_method". The values
373 are defined the same as its corresponding MyISAM system variable
374 "myisam_stats_method"(see "myisam_stats_method_names"), for better usability */
375 static const char* innodb_stats_method_names[] = {
376 "nulls_equal",
377 "nulls_unequal",
378 "nulls_ignored",
379 NullS
380 };
381
382 /** Used to define an enumerate type of the system variable innodb_stats_method.
383 This is the same as "myisam_stats_method_typelib" */
384 static TYPELIB innodb_stats_method_typelib = {
385 array_elements(innodb_stats_method_names) - 1,
386 "innodb_stats_method_typelib",
387 innodb_stats_method_names,
388 NULL
389 };
390
391 /** Possible values of the parameter innodb_checksum_algorithm */
392 const char* innodb_checksum_algorithm_names[] = {
393 "crc32",
394 "strict_crc32",
395 "innodb",
396 "strict_innodb",
397 "none",
398 "strict_none",
399 NullS
400 };
401
402 /** Used to define an enumerate type of the system variable
403 innodb_checksum_algorithm. */
404 TYPELIB innodb_checksum_algorithm_typelib = {
405 array_elements(innodb_checksum_algorithm_names) - 1,
406 "innodb_checksum_algorithm_typelib",
407 innodb_checksum_algorithm_names,
408 NULL
409 };
410
411 /** Possible values for system variable "innodb_default_row_format". */
412 static const char* innodb_default_row_format_names[] = {
413 "redundant",
414 "compact",
415 "dynamic",
416 NullS
417 };
418
419 /** Used to define an enumerate type of the system variable
420 innodb_default_row_format. */
421 static TYPELIB innodb_default_row_format_typelib = {
422 array_elements(innodb_default_row_format_names) - 1,
423 "innodb_default_row_format_typelib",
424 innodb_default_row_format_names,
425 NULL
426 };
427
428 /** Possible values of the parameter innodb_lock_schedule_algorithm */
429 static const char* innodb_lock_schedule_algorithm_names[] = {
430 "fcfs",
431 "vats",
432 NullS
433 };
434
435 /** Used to define an enumerate type of the system variable
436 innodb_lock_schedule_algorithm. */
437 static TYPELIB innodb_lock_schedule_algorithm_typelib = {
438 array_elements(innodb_lock_schedule_algorithm_names) - 1,
439 "innodb_lock_schedule_algorithm_typelib",
440 innodb_lock_schedule_algorithm_names,
441 NULL
442 };
443
444 /** Names of allowed values of innodb_flush_method */
445 const char* innodb_flush_method_names[] = {
446 "fsync",
447 "O_DSYNC",
448 "littlesync",
449 "nosync",
450 "O_DIRECT",
451 "O_DIRECT_NO_FSYNC",
452 #ifdef _WIN32
453 "unbuffered",
454 "async_unbuffered" /* alias for "unbuffered" */,
455 "normal" /* alias for "fsync" */,
456 #endif
457 NullS
458 };
459
460 /** Enumeration of innodb_flush_method */
461 TYPELIB innodb_flush_method_typelib = {
462 array_elements(innodb_flush_method_names) - 1,
463 "innodb_flush_method_typelib",
464 innodb_flush_method_names,
465 NULL
466 };
467
468 /* The following counter is used to convey information to InnoDB
469 about server activity: in case of normal DML ops it is not
470 sensible to call srv_active_wake_master_thread after each
471 operation, we only do it every INNOBASE_WAKE_INTERVAL'th step. */
472
473 #define INNOBASE_WAKE_INTERVAL 32
474 static ulong innobase_active_counter = 0;
475
476 /** Allowed values of innodb_change_buffering */
477 static const char* innodb_change_buffering_names[] = {
478 "none", /* IBUF_USE_NONE */
479 "inserts", /* IBUF_USE_INSERT */
480 "deletes", /* IBUF_USE_DELETE_MARK */
481 "changes", /* IBUF_USE_INSERT_DELETE_MARK */
482 "purges", /* IBUF_USE_DELETE */
483 "all", /* IBUF_USE_ALL */
484 NullS
485 };
486
487 /** Enumeration of innodb_change_buffering */
488 static TYPELIB innodb_change_buffering_typelib = {
489 array_elements(innodb_change_buffering_names) - 1,
490 "innodb_change_buffering_typelib",
491 innodb_change_buffering_names,
492 NULL
493 };
494
495 /** Allowed values of innodb_instant_alter_column_allowed */
496 const char* innodb_instant_alter_column_allowed_names[] = {
497 "never", /* compatible with MariaDB 5.5 to 10.2 */
498 "add_last",/* allow instant ADD COLUMN */
499 NullS
500 };
501
502 /** Enumeration of innodb_instant_alter_column_allowed */
503 static TYPELIB innodb_instant_alter_column_allowed_typelib = {
504 array_elements(innodb_instant_alter_column_allowed_names) - 1,
505 "innodb_instant_alter_column_allowed_typelib",
506 innodb_instant_alter_column_allowed_names,
507 NULL
508 };
509
510 /** Retrieve the FTS Relevance Ranking result for doc with doc_id
511 of m_prebuilt->fts_doc_id
512 @param[in,out] fts_hdl FTS handler
513 @return the relevance ranking value */
514 static
515 float
516 innobase_fts_retrieve_ranking(
517 FT_INFO* fts_hdl);
518 /** Free the memory for the FTS handler
519 @param[in,out] fts_hdl FTS handler */
520 static
521 void
522 innobase_fts_close_ranking(
523 FT_INFO* fts_hdl);
524 /** Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
525 of m_prebuilt->fts_doc_id
526 @param[in,out] fts_hdl FTS handler
527 @return the relevance ranking value */
528 static
529 float
530 innobase_fts_find_ranking(
531 FT_INFO* fts_hdl,
532 uchar*,
533 uint);
534
535 /* Call back function array defined by MySQL and used to
536 retrieve FTS results. */
537 const struct _ft_vft ft_vft_result = {NULL,
538 innobase_fts_find_ranking,
539 innobase_fts_close_ranking,
540 innobase_fts_retrieve_ranking,
541 NULL};
542
543 /** @return version of the extended FTS API */
544 static
545 uint
innobase_fts_get_version()546 innobase_fts_get_version()
547 {
548 /* Currently this doesn't make much sense as returning
549 HA_CAN_FULLTEXT_EXT automatically mean this version is supported.
550 This supposed to ease future extensions. */
551 return(2);
552 }
553
554 /** @return Which part of the extended FTS API is supported */
555 static
556 ulonglong
innobase_fts_flags()557 innobase_fts_flags()
558 {
559 return(FTS_ORDERED_RESULT | FTS_DOCID_IN_RESULT);
560 }
561
562 /** Find and Retrieve the FTS doc_id for the current result row
563 @param[in,out] fts_hdl FTS handler
564 @return the document ID */
565 static
566 ulonglong
567 innobase_fts_retrieve_docid(
568 FT_INFO_EXT* fts_hdl);
569
570 /** Find and retrieve the size of the current result
571 @param[in,out] fts_hdl FTS handler
572 @return number of matching rows */
573 static
574 ulonglong
innobase_fts_count_matches(FT_INFO_EXT * fts_hdl)575 innobase_fts_count_matches(
576 FT_INFO_EXT* fts_hdl) /*!< in: FTS handler */
577 {
578 NEW_FT_INFO* handle = reinterpret_cast<NEW_FT_INFO*>(fts_hdl);
579
580 if (handle->ft_result->rankings_by_id != NULL) {
581 return(rbt_size(handle->ft_result->rankings_by_id));
582 } else {
583 return(0);
584 }
585 }
586
587 const struct _ft_vft_ext ft_vft_ext_result = {innobase_fts_get_version,
588 innobase_fts_flags,
589 innobase_fts_retrieve_docid,
590 innobase_fts_count_matches};
591
592 #ifdef HAVE_PSI_INTERFACE
593 # define PSI_KEY(n) {&n##_key, #n, 0}
594 /* All RWLOCK used in Innodb are SX-locks */
595 # define PSI_RWLOCK_KEY(n) {&n##_key, #n, PSI_RWLOCK_FLAG_SX}
596
597 /* Keys to register pthread mutexes/cond in the current file with
598 performance schema */
599 static mysql_pfs_key_t commit_cond_mutex_key;
600 static mysql_pfs_key_t commit_cond_key;
601 static mysql_pfs_key_t pending_checkpoint_mutex_key;
602 static mysql_pfs_key_t thd_destructor_thread_key;
603
604 static PSI_mutex_info all_pthread_mutexes[] = {
605 PSI_KEY(commit_cond_mutex),
606 PSI_KEY(pending_checkpoint_mutex),
607 };
608
609 static PSI_cond_info all_innodb_conds[] = {
610 PSI_KEY(commit_cond)
611 };
612
613 # ifdef UNIV_PFS_MUTEX
614 /* all_innodb_mutexes array contains mutexes that are
615 performance schema instrumented if "UNIV_PFS_MUTEX"
616 is defined */
617 static PSI_mutex_info all_innodb_mutexes[] = {
618 PSI_KEY(autoinc_mutex),
619 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
620 PSI_KEY(buffer_block_mutex),
621 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
622 PSI_KEY(buf_pool_mutex),
623 PSI_KEY(buf_pool_zip_mutex),
624 PSI_KEY(cache_last_read_mutex),
625 PSI_KEY(dict_foreign_err_mutex),
626 PSI_KEY(dict_sys_mutex),
627 PSI_KEY(recalc_pool_mutex),
628 PSI_KEY(fil_system_mutex),
629 PSI_KEY(flush_list_mutex),
630 PSI_KEY(fts_delete_mutex),
631 PSI_KEY(fts_doc_id_mutex),
632 PSI_KEY(log_flush_order_mutex),
633 PSI_KEY(hash_table_mutex),
634 PSI_KEY(ibuf_bitmap_mutex),
635 PSI_KEY(ibuf_mutex),
636 PSI_KEY(ibuf_pessimistic_insert_mutex),
637 PSI_KEY(index_online_log),
638 PSI_KEY(log_sys_mutex),
639 PSI_KEY(log_sys_write_mutex),
640 PSI_KEY(mutex_list_mutex),
641 PSI_KEY(page_zip_stat_per_index_mutex),
642 PSI_KEY(purge_sys_pq_mutex),
643 PSI_KEY(recv_sys_mutex),
644 PSI_KEY(recv_writer_mutex),
645 PSI_KEY(redo_rseg_mutex),
646 PSI_KEY(noredo_rseg_mutex),
647 # ifdef UNIV_DEBUG
648 PSI_KEY(rw_lock_debug_mutex),
649 # endif /* UNIV_DEBUG */
650 PSI_KEY(rw_lock_list_mutex),
651 PSI_KEY(rw_lock_mutex),
652 PSI_KEY(srv_innodb_monitor_mutex),
653 PSI_KEY(srv_misc_tmpfile_mutex),
654 PSI_KEY(srv_monitor_file_mutex),
655 PSI_KEY(buf_dblwr_mutex),
656 PSI_KEY(trx_pool_mutex),
657 PSI_KEY(trx_pool_manager_mutex),
658 PSI_KEY(srv_sys_mutex),
659 PSI_KEY(lock_mutex),
660 PSI_KEY(lock_wait_mutex),
661 PSI_KEY(trx_mutex),
662 PSI_KEY(srv_threads_mutex),
663 # ifndef PFS_SKIP_EVENT_MUTEX
664 PSI_KEY(event_mutex),
665 # endif /* PFS_SKIP_EVENT_MUTEX */
666 PSI_KEY(rtr_active_mutex),
667 PSI_KEY(rtr_match_mutex),
668 PSI_KEY(rtr_path_mutex),
669 PSI_KEY(trx_sys_mutex),
670 PSI_KEY(zip_pad_mutex)
671 };
672 # endif /* UNIV_PFS_MUTEX */
673
674 # ifdef UNIV_PFS_RWLOCK
675 /* all_innodb_rwlocks array contains rwlocks that are
676 performance schema instrumented if "UNIV_PFS_RWLOCK"
677 is defined */
678 static PSI_rwlock_info all_innodb_rwlocks[] = {
679 PSI_RWLOCK_KEY(btr_search_latch),
680 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
681 PSI_RWLOCK_KEY(buf_block_lock),
682 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
683 # ifdef UNIV_DEBUG
684 PSI_RWLOCK_KEY(buf_block_debug_latch),
685 # endif /* UNIV_DEBUG */
686 PSI_RWLOCK_KEY(dict_operation_lock),
687 PSI_RWLOCK_KEY(fil_space_latch),
688 PSI_RWLOCK_KEY(checkpoint_lock),
689 PSI_RWLOCK_KEY(fts_cache_rw_lock),
690 PSI_RWLOCK_KEY(fts_cache_init_rw_lock),
691 PSI_RWLOCK_KEY(trx_i_s_cache_lock),
692 PSI_RWLOCK_KEY(trx_purge_latch),
693 PSI_RWLOCK_KEY(index_tree_rw_lock),
694 PSI_RWLOCK_KEY(hash_table_locks)
695 };
696 # endif /* UNIV_PFS_RWLOCK */
697
698 # ifdef UNIV_PFS_THREAD
699 /* all_innodb_threads array contains threads that are
700 performance schema instrumented if "UNIV_PFS_THREAD"
701 is defined */
702 static PSI_thread_info all_innodb_threads[] = {
703 PSI_KEY(buf_dump_thread),
704 PSI_KEY(dict_stats_thread),
705 PSI_KEY(io_handler_thread),
706 PSI_KEY(io_ibuf_thread),
707 PSI_KEY(io_log_thread),
708 PSI_KEY(io_read_thread),
709 PSI_KEY(io_write_thread),
710 PSI_KEY(page_cleaner_thread),
711 PSI_KEY(recv_writer_thread),
712 PSI_KEY(srv_error_monitor_thread),
713 PSI_KEY(srv_lock_timeout_thread),
714 PSI_KEY(srv_master_thread),
715 PSI_KEY(srv_monitor_thread),
716 PSI_KEY(srv_purge_thread),
717 PSI_KEY(srv_worker_thread),
718 PSI_KEY(trx_rollback_clean_thread),
719 PSI_KEY(thd_destructor_thread),
720 };
721 # endif /* UNIV_PFS_THREAD */
722
723 # ifdef UNIV_PFS_IO
724 /* all_innodb_files array contains the type of files that are
725 performance schema instrumented if "UNIV_PFS_IO" is defined */
726 static PSI_file_info all_innodb_files[] = {
727 PSI_KEY(innodb_data_file),
728 PSI_KEY(innodb_log_file),
729 PSI_KEY(innodb_temp_file)
730 };
731 # endif /* UNIV_PFS_IO */
732 #endif /* HAVE_PSI_INTERFACE */
733
734 static void innodb_remember_check_sysvar_funcs();
735 mysql_var_check_func check_sysvar_enum;
736 mysql_var_check_func check_sysvar_int;
737
738 // should page compression be used by default for new tables
739 static MYSQL_THDVAR_BOOL(compression_default, PLUGIN_VAR_OPCMDARG,
740 "Is compression the default for new tables",
741 NULL, NULL, FALSE);
742
743 /** Update callback for SET [SESSION] innodb_default_encryption_key_id */
744 static void
innodb_default_encryption_key_id_update(THD * thd,st_mysql_sys_var * var,void * var_ptr,const void * save)745 innodb_default_encryption_key_id_update(THD* thd, st_mysql_sys_var* var,
746 void* var_ptr, const void *save)
747 {
748 uint key_id = *static_cast<const uint*>(save);
749 if (key_id != FIL_DEFAULT_ENCRYPTION_KEY
750 && !encryption_key_id_exists(key_id)) {
751 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
752 ER_WRONG_ARGUMENTS,
753 "innodb_default_encryption_key=%u"
754 " is not available", key_id);
755 }
756 *static_cast<uint*>(var_ptr) = key_id;
757 }
758
759 static MYSQL_THDVAR_UINT(default_encryption_key_id, PLUGIN_VAR_RQCMDARG,
760 "Default encryption key id used for table encryption.",
761 NULL, innodb_default_encryption_key_id_update,
762 FIL_DEFAULT_ENCRYPTION_KEY, 1, UINT_MAX32, 0);
763
764 /**
765 Structure for CREATE TABLE options (table options).
766 It needs to be called ha_table_option_struct.
767
768 The option values can be specified in the CREATE TABLE at the end:
769 CREATE TABLE ( ... ) *here*
770 */
771
772 ha_create_table_option innodb_table_option_list[]=
773 {
774 /* With this option user can enable page compression feature for the
775 table */
776 HA_TOPTION_SYSVAR("PAGE_COMPRESSED", page_compressed, compression_default),
777 /* With this option user can set zip compression level for page
778 compression for this table*/
779 HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, 0, 1, 9, 1),
780 /* With this option the user can enable encryption for the table */
781 HA_TOPTION_ENUM("ENCRYPTED", encryption, "DEFAULT,YES,NO", 0),
782 /* With this option the user defines the key identifier using for the encryption */
783 HA_TOPTION_SYSVAR("ENCRYPTION_KEY_ID", encryption_key_id, default_encryption_key_id),
784
785 HA_TOPTION_END
786 };
787
788 /*************************************************************//**
789 Check whether valid argument given to innodb_ft_*_stopword_table.
790 This function is registered as a callback with MySQL.
791 @return 0 for valid stopword table */
792 static
793 int
794 innodb_stopword_table_validate(
795 /*===========================*/
796 THD* thd, /*!< in: thread handle */
797 struct st_mysql_sys_var* var, /*!< in: pointer to system
798 variable */
799 void* save, /*!< out: immediate result
800 for update function */
801 struct st_mysql_value* value); /*!< in: incoming string */
802
803 static bool is_mysql_datadir_path(const char *path);
804
805 /** Validate passed-in "value" is a valid directory name.
806 This function is registered as a callback with MySQL.
807 @param[in,out] thd thread handle
808 @param[in] var pointer to system variable
809 @param[out] save immediate result for update
810 @param[in] value incoming string
811 @return 0 for valid name */
812 static
813 int
innodb_tmpdir_validate(THD * thd,struct st_mysql_sys_var *,void * save,struct st_mysql_value * value)814 innodb_tmpdir_validate(
815 THD* thd,
816 struct st_mysql_sys_var*,
817 void* save,
818 struct st_mysql_value* value)
819 {
820
821 char* alter_tmp_dir;
822 char* innodb_tmp_dir;
823 char buff[OS_FILE_MAX_PATH];
824 int len = sizeof(buff);
825 char tmp_abs_path[FN_REFLEN + 2];
826
827 ut_ad(save != NULL);
828 ut_ad(value != NULL);
829
830 if (check_global_access(thd, FILE_ACL)) {
831 push_warning_printf(
832 thd, Sql_condition::WARN_LEVEL_WARN,
833 ER_WRONG_ARGUMENTS,
834 "InnoDB: FILE Permissions required");
835 *static_cast<const char**>(save) = NULL;
836 return(1);
837 }
838
839 alter_tmp_dir = (char*) value->val_str(value, buff, &len);
840
841 if (!alter_tmp_dir) {
842 *static_cast<const char**>(save) = alter_tmp_dir;
843 return(0);
844 }
845
846 if (strlen(alter_tmp_dir) > FN_REFLEN) {
847 push_warning_printf(
848 thd, Sql_condition::WARN_LEVEL_WARN,
849 ER_WRONG_ARGUMENTS,
850 "Path length should not exceed %d bytes", FN_REFLEN);
851 *static_cast<const char**>(save) = NULL;
852 return(1);
853 }
854
855 os_normalize_path(alter_tmp_dir);
856 my_realpath(tmp_abs_path, alter_tmp_dir, 0);
857 size_t tmp_abs_len = strlen(tmp_abs_path);
858
859 if (my_access(tmp_abs_path, F_OK)) {
860
861 push_warning_printf(
862 thd, Sql_condition::WARN_LEVEL_WARN,
863 ER_WRONG_ARGUMENTS,
864 "InnoDB: Path doesn't exist.");
865 *static_cast<const char**>(save) = NULL;
866 return(1);
867 } else if (my_access(tmp_abs_path, R_OK | W_OK)) {
868 push_warning_printf(
869 thd, Sql_condition::WARN_LEVEL_WARN,
870 ER_WRONG_ARGUMENTS,
871 "InnoDB: Server doesn't have permission in "
872 "the given location.");
873 *static_cast<const char**>(save) = NULL;
874 return(1);
875 }
876
877 MY_STAT stat_info_dir;
878
879 if (my_stat(tmp_abs_path, &stat_info_dir, MYF(0))) {
880 if ((stat_info_dir.st_mode & S_IFDIR) != S_IFDIR) {
881
882 push_warning_printf(
883 thd, Sql_condition::WARN_LEVEL_WARN,
884 ER_WRONG_ARGUMENTS,
885 "Given path is not a directory. ");
886 *static_cast<const char**>(save) = NULL;
887 return(1);
888 }
889 }
890
891 if (!is_mysql_datadir_path(tmp_abs_path)) {
892
893 push_warning_printf(
894 thd, Sql_condition::WARN_LEVEL_WARN,
895 ER_WRONG_ARGUMENTS,
896 "InnoDB: Path Location should not be same as "
897 "mysql data directory location.");
898 *static_cast<const char**>(save) = NULL;
899 return(1);
900 }
901
902 innodb_tmp_dir = static_cast<char*>(
903 thd_memdup(thd, tmp_abs_path, tmp_abs_len + 1));
904 *static_cast<const char**>(save) = innodb_tmp_dir;
905 return(0);
906 }
907
908 /******************************************************************//**
909 Maps a MySQL trx isolation level code to the InnoDB isolation level code
910 @return InnoDB isolation level */
911 static inline
912 ulint
913 innobase_map_isolation_level(
914 /*=========================*/
915 enum_tx_isolation iso); /*!< in: MySQL isolation level code */
916
917 /** Gets field offset for a field in a table.
918 @param[in] table MySQL table object
919 @param[in] field MySQL field object (from table->field array)
920 @return offset */
921 static inline
922 uint
get_field_offset(const TABLE * table,const Field * field)923 get_field_offset(
924 const TABLE* table,
925 const Field* field)
926 {
927 return field->offset(table->record[0]);
928 }
929
930
931 /*************************************************************//**
932 Check for a valid value of innobase_compression_algorithm.
933 @return 0 for valid innodb_compression_algorithm. */
934 static
935 int
936 innodb_compression_algorithm_validate(
937 /*==================================*/
938 THD* thd, /*!< in: thread handle */
939 struct st_mysql_sys_var* var, /*!< in: pointer to system
940 variable */
941 void* save, /*!< out: immediate result
942 for update function */
943 struct st_mysql_value* value); /*!< in: incoming string */
944
945 static ibool innodb_have_lzo=IF_LZO(1, 0);
946 static ibool innodb_have_lz4=IF_LZ4(1, 0);
947 static ibool innodb_have_lzma=IF_LZMA(1, 0);
948 static ibool innodb_have_bzip2=IF_BZIP2(1, 0);
949 static ibool innodb_have_snappy=IF_SNAPPY(1, 0);
950 static ibool innodb_have_punch_hole=IF_PUNCH_HOLE(1, 0);
951
952 static
953 int
954 innodb_encrypt_tables_validate(
955 /*==================================*/
956 THD* thd, /*!< in: thread handle */
957 struct st_mysql_sys_var* var, /*!< in: pointer to system
958 variable */
959 void* save, /*!< out: immediate result
960 for update function */
961 struct st_mysql_value* value); /*!< in: incoming string */
962
963 static const char innobase_hton_name[]= "InnoDB";
964
965 static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
966 "Enable InnoDB locking in LOCK TABLES",
967 /* check_func */ NULL, /* update_func */ NULL,
968 /* default */ TRUE);
969
970 static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG,
971 "Use strict mode when evaluating create options.",
972 NULL, NULL, TRUE);
973
974 static MYSQL_THDVAR_BOOL(ft_enable_stopword, PLUGIN_VAR_OPCMDARG,
975 "Create FTS index with stopword.",
976 NULL, NULL,
977 /* default */ TRUE);
978
979 static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
980 "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
981 NULL, NULL, 50, 0, 1024 * 1024 * 1024, 0);
982
983 static MYSQL_THDVAR_STR(ft_user_stopword_table,
984 PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC,
985 "User supplied stopword table name, effective in the session level.",
986 innodb_stopword_table_validate, NULL, NULL);
987
988 static MYSQL_THDVAR_STR(tmpdir,
989 PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC,
990 "Directory for temporary non-tablespace files.",
991 innodb_tmpdir_validate, NULL, NULL);
992
993 static SHOW_VAR innodb_status_variables[]= {
994 {"buffer_pool_dump_status",
995 (char*) &export_vars.innodb_buffer_pool_dump_status, SHOW_CHAR},
996 {"buffer_pool_load_status",
997 (char*) &export_vars.innodb_buffer_pool_load_status, SHOW_CHAR},
998 {"buffer_pool_resize_status",
999 (char*) &export_vars.innodb_buffer_pool_resize_status, SHOW_CHAR},
1000 {"buffer_pool_load_incomplete",
1001 &export_vars.innodb_buffer_pool_load_incomplete, SHOW_BOOL},
1002 {"buffer_pool_pages_data",
1003 (char*) &export_vars.innodb_buffer_pool_pages_data, SHOW_LONG},
1004 {"buffer_pool_bytes_data",
1005 (char*) &export_vars.innodb_buffer_pool_bytes_data, SHOW_LONG},
1006 {"buffer_pool_pages_dirty",
1007 (char*) &export_vars.innodb_buffer_pool_pages_dirty, SHOW_LONG},
1008 {"buffer_pool_bytes_dirty",
1009 (char*) &export_vars.innodb_buffer_pool_bytes_dirty, SHOW_LONG},
1010 {"buffer_pool_pages_flushed",
1011 (char*) &export_vars.innodb_buffer_pool_pages_flushed, SHOW_LONG},
1012 {"buffer_pool_pages_free",
1013 (char*) &export_vars.innodb_buffer_pool_pages_free, SHOW_LONG},
1014 #ifdef UNIV_DEBUG
1015 {"buffer_pool_pages_latched",
1016 (char*) &export_vars.innodb_buffer_pool_pages_latched, SHOW_LONG},
1017 #endif /* UNIV_DEBUG */
1018 {"buffer_pool_pages_misc",
1019 (char*) &export_vars.innodb_buffer_pool_pages_misc, SHOW_LONG},
1020 {"buffer_pool_pages_total",
1021 (char*) &export_vars.innodb_buffer_pool_pages_total, SHOW_LONG},
1022 {"buffer_pool_read_ahead_rnd",
1023 (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
1024 {"buffer_pool_read_ahead",
1025 (char*) &export_vars.innodb_buffer_pool_read_ahead, SHOW_LONG},
1026 {"buffer_pool_read_ahead_evicted",
1027 (char*) &export_vars.innodb_buffer_pool_read_ahead_evicted, SHOW_LONG},
1028 {"buffer_pool_read_requests",
1029 (char*) &export_vars.innodb_buffer_pool_read_requests, SHOW_LONG},
1030 {"buffer_pool_reads",
1031 (char*) &export_vars.innodb_buffer_pool_reads, SHOW_LONG},
1032 {"buffer_pool_wait_free",
1033 (char*) &export_vars.innodb_buffer_pool_wait_free, SHOW_LONG},
1034 {"buffer_pool_write_requests",
1035 (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
1036 {"data_fsyncs",
1037 (char*) &export_vars.innodb_data_fsyncs, SHOW_LONG},
1038 {"data_pending_fsyncs",
1039 (char*) &export_vars.innodb_data_pending_fsyncs, SHOW_LONG},
1040 {"data_pending_reads",
1041 (char*) &export_vars.innodb_data_pending_reads, SHOW_LONG},
1042 {"data_pending_writes",
1043 (char*) &export_vars.innodb_data_pending_writes, SHOW_LONG},
1044 {"data_read",
1045 (char*) &export_vars.innodb_data_read, SHOW_LONG},
1046 {"data_reads",
1047 (char*) &export_vars.innodb_data_reads, SHOW_LONG},
1048 {"data_writes",
1049 (char*) &export_vars.innodb_data_writes, SHOW_LONG},
1050 {"data_written",
1051 (char*) &export_vars.innodb_data_written, SHOW_LONG},
1052 {"dblwr_pages_written",
1053 (char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG},
1054 {"dblwr_writes",
1055 (char*) &export_vars.innodb_dblwr_writes, SHOW_LONG},
1056 {"log_waits",
1057 (char*) &export_vars.innodb_log_waits, SHOW_LONG},
1058 {"log_write_requests",
1059 (char*) &export_vars.innodb_log_write_requests, SHOW_LONG},
1060 {"log_writes",
1061 (char*) &export_vars.innodb_log_writes, SHOW_LONG},
1062 {"os_log_fsyncs",
1063 (char*) &export_vars.innodb_os_log_fsyncs, SHOW_LONG},
1064 {"os_log_pending_fsyncs",
1065 (char*) &export_vars.innodb_os_log_pending_fsyncs, SHOW_LONG},
1066 {"os_log_pending_writes",
1067 (char*) &export_vars.innodb_os_log_pending_writes, SHOW_LONG},
1068 {"os_log_written",
1069 (char*) &export_vars.innodb_os_log_written, SHOW_LONGLONG},
1070 {"page_size",
1071 (char*) &export_vars.innodb_page_size, SHOW_LONG},
1072 {"pages_created",
1073 (char*) &export_vars.innodb_pages_created, SHOW_LONG},
1074 {"pages_read",
1075 (char*) &export_vars.innodb_pages_read, SHOW_LONG},
1076 {"pages0_read",
1077 (char*) &export_vars.innodb_page0_read, SHOW_LONG},
1078 {"pages_written",
1079 (char*) &export_vars.innodb_pages_written, SHOW_LONG},
1080 {"row_lock_current_waits",
1081 (char*) &export_vars.innodb_row_lock_current_waits, SHOW_LONG},
1082 {"row_lock_time",
1083 (char*) &export_vars.innodb_row_lock_time, SHOW_LONGLONG},
1084 {"row_lock_time_avg",
1085 (char*) &export_vars.innodb_row_lock_time_avg, SHOW_LONG},
1086 {"row_lock_time_max",
1087 (char*) &export_vars.innodb_row_lock_time_max, SHOW_LONG},
1088 {"row_lock_waits",
1089 (char*) &export_vars.innodb_row_lock_waits, SHOW_LONG},
1090 {"rows_deleted",
1091 (char*) &export_vars.innodb_rows_deleted, SHOW_LONG},
1092 {"rows_inserted",
1093 (char*) &export_vars.innodb_rows_inserted, SHOW_LONG},
1094 {"rows_read",
1095 (char*) &export_vars.innodb_rows_read, SHOW_LONG},
1096 {"rows_updated",
1097 (char*) &export_vars.innodb_rows_updated, SHOW_LONG},
1098 {"system_rows_deleted",
1099 (char*) &export_vars.innodb_system_rows_deleted, SHOW_LONG},
1100 {"system_rows_inserted",
1101 (char*) &export_vars.innodb_system_rows_inserted, SHOW_LONG},
1102 {"system_rows_read",
1103 (char*) &export_vars.innodb_system_rows_read, SHOW_LONG},
1104 {"system_rows_updated",
1105 (char*) &export_vars.innodb_system_rows_updated, SHOW_LONG},
1106 {"num_open_files",
1107 (char*) &export_vars.innodb_num_open_files, SHOW_LONG},
1108 {"truncated_status_writes",
1109 (char*) &export_vars.innodb_truncated_status_writes, SHOW_LONG},
1110 {"available_undo_logs",
1111 (char*) &export_vars.innodb_available_undo_logs, SHOW_LONG},
1112 {"undo_truncations",
1113 (char*) &export_vars.innodb_undo_truncations, SHOW_LONG},
1114
1115 /* Status variables for page compression */
1116 {"page_compression_saved",
1117 (char*) &export_vars.innodb_page_compression_saved, SHOW_LONGLONG},
1118 {"num_index_pages_written",
1119 (char*) &export_vars.innodb_index_pages_written, SHOW_LONGLONG},
1120 {"num_non_index_pages_written",
1121 (char*) &export_vars.innodb_non_index_pages_written, SHOW_LONGLONG},
1122 {"num_pages_page_compressed",
1123 (char*) &export_vars.innodb_pages_page_compressed, SHOW_LONGLONG},
1124 {"num_page_compressed_trim_op",
1125 (char*) &export_vars.innodb_page_compressed_trim_op, SHOW_LONGLONG},
1126 {"num_pages_page_decompressed",
1127 (char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG},
1128 {"num_pages_page_compression_error",
1129 (char*) &export_vars.innodb_pages_page_compression_error, SHOW_LONGLONG},
1130 {"num_pages_encrypted",
1131 (char*) &export_vars.innodb_pages_encrypted, SHOW_LONGLONG},
1132 {"num_pages_decrypted",
1133 (char*) &export_vars.innodb_pages_decrypted, SHOW_LONGLONG},
1134 {"have_lz4",
1135 (char*) &innodb_have_lz4, SHOW_BOOL},
1136 {"have_lzo",
1137 (char*) &innodb_have_lzo, SHOW_BOOL},
1138 {"have_lzma",
1139 (char*) &innodb_have_lzma, SHOW_BOOL},
1140 {"have_bzip2",
1141 (char*) &innodb_have_bzip2, SHOW_BOOL},
1142 {"have_snappy",
1143 (char*) &innodb_have_snappy, SHOW_BOOL},
1144 {"have_punch_hole",
1145 (char*) &innodb_have_punch_hole, SHOW_BOOL},
1146
1147 /* Defragmentation */
1148 {"defragment_compression_failures",
1149 (char*) &export_vars.innodb_defragment_compression_failures, SHOW_LONG},
1150 {"defragment_failures",
1151 (char*) &export_vars.innodb_defragment_failures, SHOW_LONG},
1152 {"defragment_count",
1153 (char*) &export_vars.innodb_defragment_count, SHOW_LONG},
1154
1155 {"instant_alter_column",
1156 (char*) &export_vars.innodb_instant_alter_column, SHOW_LONG},
1157
1158 /* Online alter table status variables */
1159 {"onlineddl_rowlog_rows",
1160 (char*) &export_vars.innodb_onlineddl_rowlog_rows, SHOW_LONG},
1161 {"onlineddl_rowlog_pct_used",
1162 (char*) &export_vars.innodb_onlineddl_rowlog_pct_used, SHOW_LONG},
1163 {"onlineddl_pct_progress",
1164 (char*) &export_vars.innodb_onlineddl_pct_progress, SHOW_LONG},
1165
1166 /* Times secondary index lookup triggered cluster lookup and
1167 times prefix optimization avoided triggering cluster lookup */
1168 {"secondary_index_triggered_cluster_reads",
1169 (char*) &export_vars.innodb_sec_rec_cluster_reads, SHOW_LONG},
1170 {"secondary_index_triggered_cluster_reads_avoided",
1171 (char*) &export_vars.innodb_sec_rec_cluster_reads_avoided, SHOW_LONG},
1172
1173 /* Encryption */
1174 {"encryption_rotation_pages_read_from_cache",
1175 (char*) &export_vars.innodb_encryption_rotation_pages_read_from_cache,
1176 SHOW_LONG},
1177 {"encryption_rotation_pages_read_from_disk",
1178 (char*) &export_vars.innodb_encryption_rotation_pages_read_from_disk,
1179 SHOW_LONG},
1180 {"encryption_rotation_pages_modified",
1181 (char*) &export_vars.innodb_encryption_rotation_pages_modified,
1182 SHOW_LONG},
1183 {"encryption_rotation_pages_flushed",
1184 (char*) &export_vars.innodb_encryption_rotation_pages_flushed,
1185 SHOW_LONG},
1186 {"encryption_rotation_estimated_iops",
1187 (char*) &export_vars.innodb_encryption_rotation_estimated_iops,
1188 SHOW_LONG},
1189 {"encryption_key_rotation_list_length",
1190 (char*)&export_vars.innodb_key_rotation_list_length,
1191 SHOW_LONGLONG},
1192 {"encryption_n_merge_blocks_encrypted",
1193 (char*)&export_vars.innodb_n_merge_blocks_encrypted,
1194 SHOW_LONGLONG},
1195 {"encryption_n_merge_blocks_decrypted",
1196 (char*)&export_vars.innodb_n_merge_blocks_decrypted,
1197 SHOW_LONGLONG},
1198 {"encryption_n_rowlog_blocks_encrypted",
1199 (char*)&export_vars.innodb_n_rowlog_blocks_encrypted,
1200 SHOW_LONGLONG},
1201 {"encryption_n_rowlog_blocks_decrypted",
1202 (char*)&export_vars.innodb_n_rowlog_blocks_decrypted,
1203 SHOW_LONGLONG},
1204 {"encryption_n_temp_blocks_encrypted",
1205 (char*)&export_vars.innodb_n_temp_blocks_encrypted,
1206 SHOW_LONGLONG},
1207 {"encryption_n_temp_blocks_decrypted",
1208 (char*)&export_vars.innodb_n_temp_blocks_decrypted,
1209 SHOW_LONGLONG},
1210
1211 /* scrubing */
1212 {"scrub_background_page_reorganizations",
1213 (char*) &export_vars.innodb_scrub_page_reorganizations,
1214 SHOW_LONG},
1215 {"scrub_background_page_splits",
1216 (char*) &export_vars.innodb_scrub_page_splits,
1217 SHOW_LONG},
1218 {"scrub_background_page_split_failures_underflow",
1219 (char*) &export_vars.innodb_scrub_page_split_failures_underflow,
1220 SHOW_LONG},
1221 {"scrub_background_page_split_failures_out_of_filespace",
1222 (char*) &export_vars.innodb_scrub_page_split_failures_out_of_filespace,
1223 SHOW_LONG},
1224 {"scrub_background_page_split_failures_missing_index",
1225 (char*) &export_vars.innodb_scrub_page_split_failures_missing_index,
1226 SHOW_LONG},
1227 {"scrub_background_page_split_failures_unknown",
1228 (char*) &export_vars.innodb_scrub_page_split_failures_unknown,
1229 SHOW_LONG},
1230 {"scrub_log",
1231 (char*) &export_vars.innodb_scrub_log,
1232 SHOW_LONGLONG},
1233 {"encryption_num_key_requests",
1234 (char*) &export_vars.innodb_encryption_key_requests, SHOW_LONGLONG},
1235
1236 {NullS, NullS, SHOW_LONG}
1237 };
1238
1239 /*****************************************************************//**
1240 Frees a possible InnoDB trx object associated with the current THD.
1241 @return 0 or error number */
1242 static
1243 int
1244 innobase_close_connection(
1245 /*======================*/
1246 handlerton* hton, /*!< in/out: InnoDB handlerton */
1247 THD* thd); /*!< in: MySQL thread handle for
1248 which to close the connection */
1249
1250 /** Cancel any pending lock request associated with the current THD.
1251 @sa THD::awake() @sa ha_kill_query() */
1252 static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels);
1253 static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
1254
1255 /*****************************************************************//**
1256 Commits a transaction in an InnoDB database or marks an SQL statement
1257 ended.
1258 @return 0 */
1259 static
1260 int
1261 innobase_commit(
1262 /*============*/
1263 handlerton* hton, /*!< in/out: InnoDB handlerton */
1264 THD* thd, /*!< in: MySQL thread handle of the
1265 user for whom the transaction should
1266 be committed */
1267 bool commit_trx); /*!< in: true - commit transaction
1268 false - the current SQL statement
1269 ended */
1270
1271 /*****************************************************************//**
1272 Rolls back a transaction to a savepoint.
1273 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
1274 given name */
1275 static
1276 int
1277 innobase_rollback(
1278 /*==============*/
1279 handlerton* hton, /*!< in/out: InnoDB handlerton */
1280 THD* thd, /*!< in: handle to the MySQL thread
1281 of the user whose transaction should
1282 be rolled back */
1283 bool rollback_trx); /*!< in: TRUE - rollback entire
1284 transaction FALSE - rollback the current
1285 statement only */
1286
1287 /*****************************************************************//**
1288 Rolls back a transaction to a savepoint.
1289 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
1290 given name */
1291 static
1292 int
1293 innobase_rollback_to_savepoint(
1294 /*===========================*/
1295 handlerton* hton, /*!< in/out: InnoDB handlerton */
1296 THD* thd, /*!< in: handle to the MySQL thread of
1297 the user whose XA transaction should
1298 be rolled back to savepoint */
1299 void* savepoint); /*!< in: savepoint data */
1300
1301 /*****************************************************************//**
1302 Check whether innodb state allows to safely release MDL locks after
1303 rollback to savepoint.
1304 @return true if it is safe, false if its not safe. */
1305 static
1306 bool
1307 innobase_rollback_to_savepoint_can_release_mdl(
1308 /*===========================================*/
1309 handlerton* hton, /*!< in/out: InnoDB handlerton */
1310 THD* thd); /*!< in: handle to the MySQL thread of
1311 the user whose XA transaction should
1312 be rolled back to savepoint */
1313
1314 /*****************************************************************//**
1315 Sets a transaction savepoint.
1316 @return always 0, that is, always succeeds */
1317 static
1318 int
1319 innobase_savepoint(
1320 /*===============*/
1321 handlerton* hton, /*!< in/out: InnoDB handlerton */
1322 THD* thd, /*!< in: handle to the MySQL thread of
1323 the user's XA transaction for which
1324 we need to take a savepoint */
1325 void* savepoint); /*!< in: savepoint data */
1326
1327 /*****************************************************************//**
1328 Release transaction savepoint name.
1329 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
1330 given name */
1331 static
1332 int
1333 innobase_release_savepoint(
1334 /*=======================*/
1335 handlerton* hton, /*!< in/out: handlerton for InnoDB */
1336 THD* thd, /*!< in: handle to the MySQL thread
1337 of the user whose transaction's
1338 savepoint should be released */
1339 void* savepoint); /*!< in: savepoint data */
1340
1341 static void innobase_checkpoint_request(handlerton *hton, void *cookie);
1342
1343 /** @brief Initialize the default value of innodb_commit_concurrency.
1344
1345 Once InnoDB is running, the innodb_commit_concurrency must not change
1346 from zero to nonzero. (Bug #42101)
1347
1348 The initial default value is 0, and without this extra initialization,
1349 SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter
1350 to 0, even if it was initially set to nonzero at the command line
1351 or configuration file. */
1352 static
1353 void
1354 innobase_commit_concurrency_init_default();
1355 /*=======================================*/
1356
1357 /** @brief Adjust some InnoDB startup parameters based on file contents
1358 or innodb_page_size. */
1359 static
1360 void
1361 innodb_params_adjust();
1362
1363 /*******************************************************************//**
1364 This function is used to prepare an X/Open XA distributed transaction.
1365 @return 0 or error number */
1366 static
1367 int
1368 innobase_xa_prepare(
1369 /*================*/
1370 handlerton* hton, /*!< in: InnoDB handlerton */
1371 THD* thd, /*!< in: handle to the MySQL thread of
1372 the user whose XA transaction should
1373 be prepared */
1374 bool all); /*!< in: true - prepare transaction
1375 false - the current SQL statement
1376 ended */
1377 /*******************************************************************//**
1378 This function is used to recover X/Open XA distributed transactions.
1379 @return number of prepared transactions stored in xid_list */
1380 static
1381 int
1382 innobase_xa_recover(
1383 /*================*/
1384 handlerton* hton, /*!< in: InnoDB handlerton */
1385 XID* xid_list, /*!< in/out: prepared transactions */
1386 uint len); /*!< in: number of slots in xid_list */
1387 /*******************************************************************//**
1388 This function is used to commit one X/Open XA distributed transaction
1389 which is in the prepared state
1390 @return 0 or error number */
1391 static
1392 int
1393 innobase_commit_by_xid(
1394 /*===================*/
1395 handlerton* hton, /*!< in: InnoDB handlerton */
1396 XID* xid); /*!< in: X/Open XA transaction
1397 identification */
1398 /** Remove all tables in the named database inside InnoDB.
1399 @param[in] hton handlerton from InnoDB
1400 @param[in] path Database path; Inside InnoDB the name of the last
1401 directory in the path is used as the database name.
1402 For example, in 'mysql/data/test' the database name is 'test'. */
1403 static
1404 void
1405 innobase_drop_database(
1406 handlerton* hton,
1407 char* path);
1408
1409 /** Shut down the InnoDB storage engine.
1410 @return 0 */
1411 static
1412 int
1413 innobase_end(handlerton*, ha_panic_function);
1414
1415 /*****************************************************************//**
1416 Creates an InnoDB transaction struct for the thd if it does not yet have one.
1417 Starts a new InnoDB transaction if a transaction is not yet started. And
1418 assigns a new snapshot for a consistent read if the transaction does not yet
1419 have one.
1420 @return 0 */
1421 static
1422 int
1423 innobase_start_trx_and_assign_read_view(
1424 /*====================================*/
1425 handlerton* hton, /* in: InnoDB handlerton */
1426 THD* thd); /* in: MySQL thread handle of the
1427 user for whom the transaction should
1428 be committed */
1429
1430 /** Flush InnoDB redo logs to the file system.
1431 @param[in] hton InnoDB handlerton
1432 @param[in] binlog_group_flush true if we got invoked by binlog
1433 group commit during flush stage, false in other cases.
1434 @return false */
1435 static
1436 bool
innobase_flush_logs(handlerton * hton,bool binlog_group_flush)1437 innobase_flush_logs(
1438 handlerton* hton,
1439 bool binlog_group_flush)
1440 {
1441 DBUG_ENTER("innobase_flush_logs");
1442 DBUG_ASSERT(hton == innodb_hton_ptr);
1443
1444 if (srv_read_only_mode) {
1445 DBUG_RETURN(false);
1446 }
1447
1448 /* If !binlog_group_flush, we got invoked by FLUSH LOGS or similar.
1449 Else, we got invoked by binlog group commit during flush stage. */
1450
1451 if (binlog_group_flush && srv_flush_log_at_trx_commit == 0) {
1452 /* innodb_flush_log_at_trx_commit=0
1453 (write and sync once per second).
1454 Do not flush the redo log during binlog group commit. */
1455 DBUG_RETURN(false);
1456 }
1457
1458 /* Flush the redo log buffer to the redo log file.
1459 Sync it to disc if we are in FLUSH LOGS, or if
1460 innodb_flush_log_at_trx_commit=1
1461 (write and sync at each commit). */
1462 log_buffer_flush_to_disk(!binlog_group_flush
1463 || srv_flush_log_at_trx_commit == 1);
1464
1465 DBUG_RETURN(false);
1466 }
1467
1468 /** Flush InnoDB redo logs to the file system.
1469 @param[in] hton InnoDB handlerton
1470 @param[in] binlog_group_flush true if we got invoked by binlog
1471 group commit during flush stage, false in other cases.
1472 @return false */
1473 static
1474 bool
innobase_flush_logs(handlerton * hton)1475 innobase_flush_logs(
1476 handlerton* hton)
1477 {
1478 return innobase_flush_logs(hton, true);
1479 }
1480
1481 /************************************************************************//**
1482 Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the
1483 InnoDB Monitor to the client.
1484 @return 0 on success */
1485 static
1486 int
1487 innodb_show_status(
1488 /*===============*/
1489 handlerton* hton, /*!< in: the innodb handlerton */
1490 THD* thd, /*!< in: the MySQL query thread of
1491 the caller */
1492 stat_print_fn* stat_print);
1493 /************************************************************************//**
1494 Return 0 on success and non-zero on failure. Note: the bool return type
1495 seems to be abused here, should be an int. */
1496 static
1497 bool
1498 innobase_show_status(
1499 /*=================*/
1500 handlerton* hton, /*!< in: the innodb handlerton */
1501 THD* thd, /*!< in: the MySQL query thread of
1502 the caller */
1503 stat_print_fn* stat_print,
1504 enum ha_stat_type stat_type);
1505
1506 /****************************************************************//**
1507 Parse and enable InnoDB monitor counters during server startup.
1508 User can enable monitor counters/groups by specifying
1509 "loose-innodb_monitor_enable = monitor_name1;monitor_name2..."
1510 in server configuration file or at the command line. */
1511 static
1512 void
1513 innodb_enable_monitor_at_startup(
1514 /*=============================*/
1515 char* str); /*!< in: monitor counter enable list */
1516
1517 #ifdef MYSQL_STORE_FTS_DOC_ID
1518 /** Store doc_id value into FTS_DOC_ID field
1519 @param[in,out] tbl table containing FULLTEXT index
1520 @param[in] doc_id FTS_DOC_ID value */
1521 static
1522 void
innobase_fts_store_docid(TABLE * tbl,ulonglong doc_id)1523 innobase_fts_store_docid(
1524 TABLE* tbl,
1525 ulonglong doc_id)
1526 {
1527 my_bitmap_map* old_map
1528 = dbug_tmp_use_all_columns(tbl, tbl->write_set);
1529
1530 tbl->fts_doc_id_field->store(static_cast<longlong>(doc_id), true);
1531
1532 dbug_tmp_restore_column_map(tbl->write_set, old_map);
1533 }
1534 #endif
1535
1536 /*************************************************************//**
1537 Check for a valid value of innobase_commit_concurrency.
1538 @return 0 for valid innodb_commit_concurrency */
1539 static
1540 int
innobase_commit_concurrency_validate(THD *,st_mysql_sys_var *,void * save,struct st_mysql_value * value)1541 innobase_commit_concurrency_validate(
1542 /*=================================*/
1543 THD*, st_mysql_sys_var*,
1544 void* save, /*!< out: immediate result
1545 for update function */
1546 struct st_mysql_value* value) /*!< in: incoming string */
1547 {
1548 long long intbuf;
1549 ulong commit_concurrency;
1550
1551 DBUG_ENTER("innobase_commit_concurrency_validate");
1552
1553 if (value->val_int(value, &intbuf)) {
1554 /* The value is NULL. That is invalid. */
1555 DBUG_RETURN(1);
1556 }
1557
1558 *reinterpret_cast<ulong*>(save) = commit_concurrency
1559 = static_cast<ulong>(intbuf);
1560
1561 /* Allow the value to be updated, as long as it remains zero
1562 or nonzero. */
1563 DBUG_RETURN(!(!commit_concurrency == !innobase_commit_concurrency));
1564 }
1565
1566 /*******************************************************************//**
1567 Function for constructing an InnoDB table handler instance. */
1568 static
1569 handler*
innobase_create_handler(handlerton * hton,TABLE_SHARE * table,MEM_ROOT * mem_root)1570 innobase_create_handler(
1571 /*====================*/
1572 handlerton* hton, /*!< in: InnoDB handlerton */
1573 TABLE_SHARE* table,
1574 MEM_ROOT* mem_root)
1575 {
1576 return(new (mem_root) ha_innobase(hton, table));
1577 }
1578
1579 /* General functions */
1580
1581 /** Check that a page_size is correct for InnoDB.
1582 If correct, set the associated page_size_shift which is the power of 2
1583 for this page size.
1584 @param[in] page_size Page Size to evaluate
1585 @return an associated page_size_shift if valid, 0 if invalid. */
1586 inline
1587 ulong
innodb_page_size_validate(ulong page_size)1588 innodb_page_size_validate(
1589 ulong page_size)
1590 {
1591 ulong n;
1592
1593 DBUG_ENTER("innodb_page_size_validate");
1594
1595 for (n = UNIV_PAGE_SIZE_SHIFT_MIN;
1596 n <= UNIV_PAGE_SIZE_SHIFT_MAX;
1597 n++) {
1598 if (page_size == static_cast<ulong>(1 << n)) {
1599 DBUG_RETURN(n);
1600 }
1601 }
1602
1603 DBUG_RETURN(0);
1604 }
1605
1606 /******************************************************************//**
1607 Returns true if the thread is the replication thread on the slave
1608 server. Used in srv_conc_enter_innodb() to determine if the thread
1609 should be allowed to enter InnoDB - the replication thread is treated
1610 differently than other threads. Also used in
1611 srv_conc_force_exit_innodb().
1612 @return true if thd is the replication thread */
1613 ibool
thd_is_replication_slave_thread(THD * thd)1614 thd_is_replication_slave_thread(
1615 /*============================*/
1616 THD* thd) /*!< in: thread handle */
1617 {
1618 return thd && ((ibool) thd_slave_thread(thd));
1619 }
1620
1621 /******************************************************************//**
1622 Returns true if transaction should be flagged as read-only.
1623 @return true if the thd is marked as read-only */
1624 bool
thd_trx_is_read_only(THD * thd)1625 thd_trx_is_read_only(
1626 /*=================*/
1627 THD* thd) /*!< in: thread handle */
1628 {
1629 return(thd != 0 && thd_tx_is_read_only(thd));
1630 }
1631
1632 static MYSQL_THDVAR_BOOL(background_thread,
1633 PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_NOSYSVAR,
1634 "Internal (not user visible) flag to mark "
1635 "background purge threads", NULL, NULL, 0);
1636
1637 /** Create a MYSQL_THD for a background thread and mark it as such.
1638 @param name thread info for SHOW PROCESSLIST
1639 @return new MYSQL_THD */
1640 MYSQL_THD
innobase_create_background_thd(const char * name)1641 innobase_create_background_thd(const char* name)
1642 /*============================*/
1643 {
1644 MYSQL_THD thd= create_thd();
1645 thd_proc_info(thd, name);
1646 THDVAR(thd, background_thread) = true;
1647 return thd;
1648 }
1649
1650
1651 /** Destroy a background purge thread THD.
1652 @param[in] thd MYSQL_THD to destroy */
1653 void
innobase_destroy_background_thd(MYSQL_THD thd)1654 innobase_destroy_background_thd(
1655 /*============================*/
1656 MYSQL_THD thd)
1657 {
1658 /* need to close the connection explicitly, the server won't do it
1659 if innodb is in the PLUGIN_IS_DYING state */
1660 innobase_close_connection(innodb_hton_ptr, thd);
1661 thd_set_ha_data(thd, innodb_hton_ptr, NULL);
1662 destroy_thd(thd);
1663 }
1664
1665 /** Close opened tables, free memory, delete items for a MYSQL_THD.
1666 @param[in] thd MYSQL_THD to reset */
1667 void
innobase_reset_background_thd(MYSQL_THD thd)1668 innobase_reset_background_thd(MYSQL_THD thd)
1669 {
1670 if (!thd) {
1671 thd = current_thd;
1672 }
1673
1674 ut_ad(thd);
1675 ut_ad(THDVAR(thd, background_thread));
1676
1677 /* background purge thread */
1678 const char *proc_info= thd_proc_info(thd, "reset");
1679 reset_thd(thd);
1680 thd_proc_info(thd, proc_info);
1681 }
1682
1683
1684 /******************************************************************//**
1685 Check if the transaction is an auto-commit transaction. TRUE also
1686 implies that it is a SELECT (read-only) transaction.
1687 @return true if the transaction is an auto commit read-only transaction. */
1688 ibool
thd_trx_is_auto_commit(THD * thd)1689 thd_trx_is_auto_commit(
1690 /*===================*/
1691 THD* thd) /*!< in: thread handle, can be NULL */
1692 {
1693 return(thd != NULL
1694 && !thd_test_options(
1695 thd,
1696 OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)
1697 && thd_is_select(thd));
1698 }
1699
1700 /** Enter InnoDB engine after checking the max number of user threads
1701 allowed, else the thread is put into sleep.
1702 @param[in,out] prebuilt row prebuilt handler */
innobase_srv_conc_enter_innodb(row_prebuilt_t * prebuilt)1703 static inline void innobase_srv_conc_enter_innodb(row_prebuilt_t *prebuilt)
1704 {
1705 trx_t* trx = prebuilt->trx;
1706
1707 #ifdef WITH_WSREP
1708 if (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return;
1709 #endif /* WITH_WSREP */
1710
1711 if (srv_thread_concurrency) {
1712 if (trx->n_tickets_to_enter_innodb > 0) {
1713
1714 /* If trx has 'free tickets' to enter the engine left,
1715 then use one such ticket */
1716
1717 --trx->n_tickets_to_enter_innodb;
1718
1719 } else if (trx->mysql_thd != NULL
1720 && thd_is_replication_slave_thread(trx->mysql_thd)) {
1721 const ulonglong end = my_interval_timer()
1722 + ulonglong(srv_replication_delay) * 1000000;
1723 while ((srv_conc_get_active_threads()
1724 >= srv_thread_concurrency)
1725 && my_interval_timer() < end) {
1726 os_thread_sleep(2000 /* 2 ms */);
1727 }
1728 } else {
1729 srv_conc_enter_innodb(prebuilt);
1730 }
1731 }
1732 }
1733
1734 /** Note that the thread wants to leave InnoDB only if it doesn't have
1735 any spare tickets.
1736 @param[in,out] m_prebuilt row prebuilt handler */
innobase_srv_conc_exit_innodb(row_prebuilt_t * prebuilt)1737 static inline void innobase_srv_conc_exit_innodb(row_prebuilt_t *prebuilt)
1738 {
1739 ut_ad(!sync_check_iterate(sync_check()));
1740
1741 trx_t* trx = prebuilt->trx;
1742
1743 #ifdef WITH_WSREP
1744 if (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return;
1745 #endif /* WITH_WSREP */
1746
1747 /* This is to avoid making an unnecessary function call. */
1748 if (trx->declared_to_be_inside_innodb
1749 && trx->n_tickets_to_enter_innodb == 0) {
1750
1751 srv_conc_force_exit_innodb(trx);
1752 }
1753 }
1754
1755 /******************************************************************//**
1756 Force a thread to leave InnoDB even if it has spare tickets. */
1757 static inline
1758 void
innobase_srv_conc_force_exit_innodb(trx_t * trx)1759 innobase_srv_conc_force_exit_innodb(
1760 /*================================*/
1761 trx_t* trx) /*!< in: transaction handle */
1762 {
1763 ut_ad(!sync_check_iterate(sync_check()));
1764
1765 /* This is to avoid making an unnecessary function call. */
1766 if (trx->declared_to_be_inside_innodb) {
1767 srv_conc_force_exit_innodb(trx);
1768 }
1769 }
1770
1771 /******************************************************************//**
1772 Returns the NUL terminated value of glob_hostname.
1773 @return pointer to glob_hostname. */
1774 const char*
server_get_hostname()1775 server_get_hostname()
1776 /*=================*/
1777 {
1778 return(glob_hostname);
1779 }
1780
1781 /******************************************************************//**
1782 Returns true if the transaction this thread is processing has edited
1783 non-transactional tables. Used by the deadlock detector when deciding
1784 which transaction to rollback in case of a deadlock - we try to avoid
1785 rolling back transactions that have edited non-transactional tables.
1786 @return true if non-transactional tables have been edited */
1787 ibool
thd_has_edited_nontrans_tables(THD * thd)1788 thd_has_edited_nontrans_tables(
1789 /*===========================*/
1790 THD* thd) /*!< in: thread handle */
1791 {
1792 return((ibool) thd_non_transactional_update(thd));
1793 }
1794
1795 /* Return high resolution timestamp for the start of the current query */
1796 UNIV_INTERN
1797 unsigned long long
thd_query_start_micro(const THD * thd)1798 thd_query_start_micro(
1799 const THD* thd) /*!< in: thread handle */
1800 {
1801 return thd_start_utime(thd);
1802 }
1803
1804 /******************************************************************//**
1805 Returns true if the thread is executing a SELECT statement.
1806 @return true if thd is executing SELECT */
1807 ibool
thd_is_select(const THD * thd)1808 thd_is_select(
1809 /*==========*/
1810 const THD* thd) /*!< in: thread handle */
1811 {
1812 return(thd_sql_command(thd) == SQLCOM_SELECT);
1813 }
1814
1815 /******************************************************************//**
1816 Returns the lock wait timeout for the current connection.
1817 @return the lock wait timeout, in seconds */
1818 ulong
thd_lock_wait_timeout(THD * thd)1819 thd_lock_wait_timeout(
1820 /*==================*/
1821 THD* thd) /*!< in: thread handle, or NULL to query
1822 the global innodb_lock_wait_timeout */
1823 {
1824 /* According to <mysql/plugin.h>, passing thd == NULL
1825 returns the global value of the session variable. */
1826 return(THDVAR(thd, lock_wait_timeout));
1827 }
1828
1829 /** Get the value of innodb_tmpdir.
1830 @param[in] thd thread handle, or NULL to query
1831 the global innodb_tmpdir.
1832 @retval NULL if innodb_tmpdir="" */
1833 const char*
thd_innodb_tmpdir(THD * thd)1834 thd_innodb_tmpdir(
1835 THD* thd)
1836 {
1837 ut_ad(!sync_check_iterate(sync_check()));
1838
1839 const char* tmp_dir = THDVAR(thd, tmpdir);
1840
1841 if (tmp_dir != NULL && *tmp_dir == '\0') {
1842 tmp_dir = NULL;
1843 }
1844
1845 return(tmp_dir);
1846 }
1847
1848 /** Obtain the InnoDB transaction of a MySQL thread.
1849 @param[in,out] thd thread handle
1850 @return reference to transaction pointer */
thd_to_trx(THD * thd)1851 static trx_t* thd_to_trx(THD* thd)
1852 {
1853 return reinterpret_cast<trx_t*>(thd_get_ha_data(thd, innodb_hton_ptr));
1854 }
1855
1856 #ifdef WITH_WSREP
1857 /********************************************************************//**
1858 Obtain the InnoDB transaction id of a MySQL thread.
1859 @return transaction id */
1860 __attribute__((warn_unused_result, nonnull))
1861 ulonglong
thd_to_trx_id(THD * thd)1862 thd_to_trx_id(
1863 THD* thd) /*!< in: MySQL thread */
1864 {
1865 return(thd_to_trx(thd)->id);
1866 }
1867 #endif /* WITH_WSREP */
1868
1869 /********************************************************************//**
1870 Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
1871 time calls srv_active_wake_master_thread. This function should be used
1872 when a single database operation may introduce a small need for
1873 server utility activity, like checkpointing. */
1874 inline
1875 void
innobase_active_small(void)1876 innobase_active_small(void)
1877 /*=======================*/
1878 {
1879 innobase_active_counter++;
1880
1881 if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
1882 srv_active_wake_master_thread();
1883 }
1884 }
1885
1886 /********************************************************************//**
1887 Converts an InnoDB error code to a MySQL error code and also tells to MySQL
1888 about a possible transaction rollback inside InnoDB caused by a lock wait
1889 timeout or a deadlock.
1890 @return MySQL error code */
1891 static int
convert_error_code_to_mysql(dberr_t error,ulint flags,THD * thd)1892 convert_error_code_to_mysql(
1893 /*========================*/
1894 dberr_t error, /*!< in: InnoDB error code */
1895 ulint flags, /*!< in: InnoDB table flags, or 0 */
1896 THD* thd) /*!< in: user thread handle or NULL */
1897 {
1898 switch (error) {
1899 case DB_SUCCESS:
1900 return(0);
1901
1902 case DB_INTERRUPTED:
1903 return(HA_ERR_ABORTED_BY_USER);
1904
1905 case DB_FOREIGN_EXCEED_MAX_CASCADE:
1906 ut_ad(thd);
1907 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1908 HA_ERR_ROW_IS_REFERENCED,
1909 "InnoDB: Cannot delete/update "
1910 "rows with cascading foreign key "
1911 "constraints that exceed max "
1912 "depth of %d. Please "
1913 "drop extra constraints and try "
1914 "again", DICT_FK_MAX_RECURSIVE_LOAD);
1915 return(HA_ERR_FK_DEPTH_EXCEEDED);
1916
1917 case DB_CANT_CREATE_GEOMETRY_OBJECT:
1918 my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, MYF(0));
1919 return(HA_ERR_NULL_IN_SPATIAL);
1920
1921 case DB_ERROR:
1922 default:
1923 return(HA_ERR_GENERIC); /* unspecified error */
1924
1925 case DB_DUPLICATE_KEY:
1926 /* Be cautious with returning this error, since
1927 mysql could re-enter the storage layer to get
1928 duplicated key info, the operation requires a
1929 valid table handle and/or transaction information,
1930 which might not always be available in the error
1931 handling stage. */
1932 return(HA_ERR_FOUND_DUPP_KEY);
1933
1934 case DB_READ_ONLY:
1935 return(HA_ERR_TABLE_READONLY);
1936
1937 case DB_FOREIGN_DUPLICATE_KEY:
1938 return(HA_ERR_FOREIGN_DUPLICATE_KEY);
1939
1940 case DB_MISSING_HISTORY:
1941 return(HA_ERR_TABLE_DEF_CHANGED);
1942
1943 case DB_RECORD_NOT_FOUND:
1944 return(HA_ERR_NO_ACTIVE_RECORD);
1945
1946 case DB_DEADLOCK:
1947 /* Since we rolled back the whole transaction, we must
1948 tell it also to MySQL so that MySQL knows to empty the
1949 cached binlog for this transaction */
1950
1951 if (thd != NULL) {
1952 thd_mark_transaction_to_rollback(thd, 1);
1953 }
1954
1955 return(HA_ERR_LOCK_DEADLOCK);
1956
1957 case DB_LOCK_WAIT_TIMEOUT:
1958 /* Starting from 5.0.13, we let MySQL just roll back the
1959 latest SQL statement in a lock wait timeout. Previously, we
1960 rolled back the whole transaction. */
1961
1962 if (thd) {
1963 thd_mark_transaction_to_rollback(
1964 thd, (bool) row_rollback_on_timeout);
1965 }
1966
1967 return(HA_ERR_LOCK_WAIT_TIMEOUT);
1968
1969 case DB_NO_REFERENCED_ROW:
1970 return(HA_ERR_NO_REFERENCED_ROW);
1971
1972 case DB_ROW_IS_REFERENCED:
1973 return(HA_ERR_ROW_IS_REFERENCED);
1974
1975 case DB_NO_FK_ON_S_BASE_COL:
1976 case DB_CANNOT_ADD_CONSTRAINT:
1977 case DB_CHILD_NO_INDEX:
1978 case DB_PARENT_NO_INDEX:
1979 return(HA_ERR_CANNOT_ADD_FOREIGN);
1980
1981 case DB_CANNOT_DROP_CONSTRAINT:
1982
1983 return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
1984 misleading, a new MySQL error
1985 code should be introduced */
1986
1987 case DB_CORRUPTION:
1988 return(HA_ERR_CRASHED);
1989
1990 case DB_OUT_OF_FILE_SPACE:
1991 return(HA_ERR_RECORD_FILE_FULL);
1992
1993 case DB_TEMP_FILE_WRITE_FAIL:
1994 my_error(ER_GET_ERRMSG, MYF(0),
1995 DB_TEMP_FILE_WRITE_FAIL,
1996 ut_strerr(DB_TEMP_FILE_WRITE_FAIL),
1997 "InnoDB");
1998 return(HA_ERR_INTERNAL_ERROR);
1999
2000 case DB_TABLE_IN_FK_CHECK:
2001 return(HA_ERR_TABLE_IN_FK_CHECK);
2002
2003 case DB_TABLE_IS_BEING_USED:
2004 return(HA_ERR_WRONG_COMMAND);
2005
2006 case DB_TABLE_NOT_FOUND:
2007 return(HA_ERR_NO_SUCH_TABLE);
2008
2009 case DB_DECRYPTION_FAILED:
2010 return(HA_ERR_DECRYPTION_FAILED);
2011
2012 case DB_TABLESPACE_NOT_FOUND:
2013 return(HA_ERR_TABLESPACE_MISSING);
2014
2015 case DB_TOO_BIG_RECORD: {
2016 /* If prefix is true then a 768-byte prefix is stored
2017 locally for BLOB fields. Refer to dict_table_get_format().
2018 We limit max record size to 16k for 64k page size. */
2019 bool prefix = !DICT_TF_HAS_ATOMIC_BLOBS(flags);
2020 bool comp = !!(flags & DICT_TF_COMPACT);
2021 ulint free_space = page_get_free_space_of_empty(comp) / 2;
2022
2023 if (free_space >= ulint(comp ? COMPRESSED_REC_MAX_DATA_SIZE :
2024 REDUNDANT_REC_MAX_DATA_SIZE)) {
2025 free_space = (comp ? COMPRESSED_REC_MAX_DATA_SIZE :
2026 REDUNDANT_REC_MAX_DATA_SIZE) - 1;
2027 }
2028
2029 my_printf_error(ER_TOO_BIG_ROWSIZE,
2030 "Row size too large (> " ULINTPF "). Changing some columns "
2031 "to TEXT or BLOB %smay help. In current row "
2032 "format, BLOB prefix of %d bytes is stored inline.",
2033 MYF(0),
2034 free_space,
2035 prefix
2036 ? "or using ROW_FORMAT=DYNAMIC or"
2037 " ROW_FORMAT=COMPRESSED "
2038 : "",
2039 prefix
2040 ? DICT_MAX_FIXED_COL_LEN
2041 : 0);
2042 return(HA_ERR_TO_BIG_ROW);
2043 }
2044
2045 case DB_TOO_BIG_INDEX_COL:
2046 my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
2047 (ulong) DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags));
2048 return(HA_ERR_INDEX_COL_TOO_LONG);
2049
2050 case DB_NO_SAVEPOINT:
2051 return(HA_ERR_NO_SAVEPOINT);
2052
2053 case DB_LOCK_TABLE_FULL:
2054 /* Since we rolled back the whole transaction, we must
2055 tell it also to MySQL so that MySQL knows to empty the
2056 cached binlog for this transaction */
2057
2058 if (thd) {
2059 thd_mark_transaction_to_rollback(thd, 1);
2060 }
2061
2062 return(HA_ERR_LOCK_TABLE_FULL);
2063
2064 case DB_FTS_INVALID_DOCID:
2065 return(HA_FTS_INVALID_DOCID);
2066 case DB_FTS_EXCEED_RESULT_CACHE_LIMIT:
2067 return(HA_ERR_OUT_OF_MEM);
2068 case DB_TOO_MANY_CONCURRENT_TRXS:
2069 return(HA_ERR_TOO_MANY_CONCURRENT_TRXS);
2070 case DB_UNSUPPORTED:
2071 return(HA_ERR_UNSUPPORTED);
2072 case DB_INDEX_CORRUPT:
2073 return(HA_ERR_INDEX_CORRUPT);
2074 case DB_UNDO_RECORD_TOO_BIG:
2075 return(HA_ERR_UNDO_REC_TOO_BIG);
2076 case DB_OUT_OF_MEMORY:
2077 return(HA_ERR_OUT_OF_MEM);
2078 case DB_TABLESPACE_EXISTS:
2079 return(HA_ERR_TABLESPACE_EXISTS);
2080 case DB_TABLESPACE_DELETED:
2081 return(HA_ERR_TABLESPACE_MISSING);
2082 case DB_IDENTIFIER_TOO_LONG:
2083 return(HA_ERR_INTERNAL_ERROR);
2084 case DB_TABLE_CORRUPT:
2085 return(HA_ERR_TABLE_CORRUPT);
2086 case DB_FTS_TOO_MANY_WORDS_IN_PHRASE:
2087 return(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE);
2088 case DB_COMPUTE_VALUE_FAILED:
2089 return(HA_ERR_GENERIC); // impossible
2090 }
2091 }
2092
2093 /*************************************************************//**
2094 Prints info of a THD object (== user session thread) to the given file. */
2095 void
innobase_mysql_print_thd(FILE * f,THD * thd,uint max_query_len)2096 innobase_mysql_print_thd(
2097 /*=====================*/
2098 FILE* f, /*!< in: output stream */
2099 THD* thd, /*!< in: MySQL THD object */
2100 uint max_query_len) /*!< in: max query length to print, or 0 to
2101 use the default max length */
2102 {
2103 char buffer[1024];
2104
2105 fputs(thd_get_error_context_description(thd, buffer, sizeof buffer,
2106 max_query_len), f);
2107 putc('\n', f);
2108 }
2109
2110 /******************************************************************//**
2111 Get the variable length bounds of the given character set. */
2112 void
innobase_get_cset_width(ulint cset,ulint * mbminlen,ulint * mbmaxlen)2113 innobase_get_cset_width(
2114 /*====================*/
2115 ulint cset, /*!< in: MySQL charset-collation code */
2116 ulint* mbminlen, /*!< out: minimum length of a char (in bytes) */
2117 ulint* mbmaxlen) /*!< out: maximum length of a char (in bytes) */
2118 {
2119 CHARSET_INFO* cs;
2120 ut_ad(cset <= MAX_CHAR_COLL_NUM);
2121 ut_ad(mbminlen);
2122 ut_ad(mbmaxlen);
2123
2124 cs = all_charsets[cset];
2125 if (cs) {
2126 *mbminlen = cs->mbminlen;
2127 *mbmaxlen = cs->mbmaxlen;
2128 ut_ad(*mbminlen < DATA_MBMAX);
2129 ut_ad(*mbmaxlen < DATA_MBMAX);
2130 } else {
2131 THD* thd = current_thd;
2132
2133 if (thd && thd_sql_command(thd) == SQLCOM_DROP_TABLE) {
2134
2135 /* Fix bug#46256: allow tables to be dropped if the
2136 collation is not found, but issue a warning. */
2137 if (cset != 0) {
2138
2139 sql_print_warning(
2140 "Unknown collation #" ULINTPF ".",
2141 cset);
2142 }
2143 } else {
2144
2145 ut_a(cset == 0);
2146 }
2147
2148 *mbminlen = *mbmaxlen = 0;
2149 }
2150 }
2151
2152 /******************************************************************//**
2153 Converts an identifier to a table name. */
2154 void
innobase_convert_from_table_id(CHARSET_INFO * cs,char * to,const char * from,ulint len)2155 innobase_convert_from_table_id(
2156 /*===========================*/
2157 CHARSET_INFO* cs, /*!< in: the 'from' character set */
2158 char* to, /*!< out: converted identifier */
2159 const char* from, /*!< in: identifier to convert */
2160 ulint len) /*!< in: length of 'to', in bytes */
2161 {
2162 uint errors;
2163
2164 strconvert(cs, from, FN_REFLEN, &my_charset_filename, to, (uint) len, &errors);
2165 }
2166
2167 /**********************************************************************
2168 Check if the length of the identifier exceeds the maximum allowed.
2169 return true when length of identifier is too long. */
2170 my_bool
innobase_check_identifier_length(const char * id)2171 innobase_check_identifier_length(
2172 /*=============================*/
2173 const char* id) /* in: FK identifier to check excluding the
2174 database portion. */
2175 {
2176 int well_formed_error = 0;
2177 CHARSET_INFO *cs = system_charset_info;
2178 DBUG_ENTER("innobase_check_identifier_length");
2179
2180 size_t len = my_well_formed_length(
2181 cs, id, id + strlen(id),
2182 NAME_CHAR_LEN, &well_formed_error);
2183
2184 if (well_formed_error || len == NAME_CHAR_LEN) {
2185 my_error(ER_TOO_LONG_IDENT, MYF(0), id);
2186 DBUG_RETURN(true);
2187 }
2188 DBUG_RETURN(false);
2189 }
2190
2191 /******************************************************************//**
2192 Converts an identifier to UTF-8. */
2193 void
innobase_convert_from_id(CHARSET_INFO * cs,char * to,const char * from,ulint len)2194 innobase_convert_from_id(
2195 /*=====================*/
2196 CHARSET_INFO* cs, /*!< in: the 'from' character set */
2197 char* to, /*!< out: converted identifier */
2198 const char* from, /*!< in: identifier to convert */
2199 ulint len) /*!< in: length of 'to', in bytes */
2200 {
2201 uint errors;
2202
2203 strconvert(cs, from, FN_REFLEN, system_charset_info, to, (uint) len, &errors);
2204 }
2205
2206 /******************************************************************//**
2207 Compares NUL-terminated UTF-8 strings case insensitively.
2208 @return 0 if a=b, <0 if a<b, >1 if a>b */
2209 int
innobase_strcasecmp(const char * a,const char * b)2210 innobase_strcasecmp(
2211 /*================*/
2212 const char* a, /*!< in: first string to compare */
2213 const char* b) /*!< in: second string to compare */
2214 {
2215 if (!a) {
2216 if (!b) {
2217 return(0);
2218 } else {
2219 return(-1);
2220 }
2221 } else if (!b) {
2222 return(1);
2223 }
2224
2225 return(my_strcasecmp(system_charset_info, a, b));
2226 }
2227
2228 /******************************************************************//**
2229 Compares NUL-terminated UTF-8 strings case insensitively. The
2230 second string contains wildcards.
2231 @return 0 if a match is found, 1 if not */
2232 static
2233 int
innobase_wildcasecmp(const char * a,const char * b)2234 innobase_wildcasecmp(
2235 /*=================*/
2236 const char* a, /*!< in: string to compare */
2237 const char* b) /*!< in: wildcard string to compare */
2238 {
2239 return(wild_case_compare(system_charset_info, a, b));
2240 }
2241
2242 /** Strip dir name from a full path name and return only the file name
2243 @param[in] path_name full path name
2244 @return file name or "null" if no file name */
2245 const char*
innobase_basename(const char * path_name)2246 innobase_basename(
2247 const char* path_name)
2248 {
2249 const char* name = base_name(path_name);
2250
2251 return((name) ? name : "null");
2252 }
2253
2254 /******************************************************************//**
2255 Makes all characters in a NUL-terminated UTF-8 string lower case. */
2256 void
innobase_casedn_str(char * a)2257 innobase_casedn_str(
2258 /*================*/
2259 char* a) /*!< in/out: string to put in lower case */
2260 {
2261 my_casedn_str(system_charset_info, a);
2262 }
2263
2264 /** Determines the current SQL statement.
2265 Thread unsafe, can only be called from the thread owning the THD.
2266 @param[in] thd MySQL thread handle
2267 @param[out] length Length of the SQL statement
2268 @return SQL statement string */
2269 const char*
innobase_get_stmt_unsafe(THD * thd,size_t * length)2270 innobase_get_stmt_unsafe(
2271 THD* thd,
2272 size_t* length)
2273 {
2274 if (const LEX_STRING *stmt = thd_query_string(thd)) {
2275 *length = stmt->length;
2276 return stmt->str;
2277 }
2278
2279 *length = 0;
2280 return NULL;
2281 }
2282
2283 /**********************************************************************//**
2284 Get the current setting of the tdc_size global parameter. We do
2285 a dirty read because for one there is no synchronization object and
2286 secondly there is little harm in doing so even if we get a torn read.
2287 @return value of tdc_size */
2288 ulint
innobase_get_table_cache_size(void)2289 innobase_get_table_cache_size(void)
2290 /*===============================*/
2291 {
2292 return(tdc_size);
2293 }
2294
2295 /**********************************************************************//**
2296 Get the current setting of the lower_case_table_names global parameter from
2297 mysqld.cc. We do a dirty read because for one there is no synchronization
2298 object and secondly there is little harm in doing so even if we get a torn
2299 read.
2300 @return value of lower_case_table_names */
2301 ulint
innobase_get_lower_case_table_names(void)2302 innobase_get_lower_case_table_names(void)
2303 /*=====================================*/
2304 {
2305 return(lower_case_table_names);
2306 }
2307
2308 /**
2309 Test a file path whether it is same as mysql data directory path.
2310
2311 @param path null terminated character string
2312
2313 @return
2314 @retval TRUE The path is different from mysql data directory.
2315 @retval FALSE The path is same as mysql data directory.
2316 */
is_mysql_datadir_path(const char * path)2317 static bool is_mysql_datadir_path(const char *path)
2318 {
2319 if (path == NULL)
2320 return false;
2321
2322 char mysql_data_dir[FN_REFLEN], path_dir[FN_REFLEN];
2323 convert_dirname(path_dir, path, NullS);
2324 convert_dirname(mysql_data_dir, mysql_unpacked_real_data_home, NullS);
2325 size_t mysql_data_home_len= dirname_length(mysql_data_dir);
2326 size_t path_len = dirname_length(path_dir);
2327
2328 if (path_len < mysql_data_home_len)
2329 return true;
2330
2331 if (!lower_case_file_system)
2332 return(memcmp(mysql_data_dir, path_dir, mysql_data_home_len));
2333
2334 return(files_charset_info->coll->strnncoll(files_charset_info,
2335 (uchar *) path_dir, path_len,
2336 (uchar *) mysql_data_dir,
2337 mysql_data_home_len,
2338 TRUE));
2339 }
2340
mysql_tmpfile_path(const char * path,const char * prefix)2341 static int mysql_tmpfile_path(const char *path, const char *prefix)
2342 {
2343 DBUG_ASSERT(path != NULL);
2344 DBUG_ASSERT((strlen(path) + strlen(prefix)) <= FN_REFLEN);
2345
2346 char filename[FN_REFLEN];
2347 File fd = create_temp_file(filename, path, prefix, O_BINARY | O_SEQUENTIAL,
2348 MYF(MY_WME | MY_TEMPORARY));
2349 return fd;
2350 }
2351
2352 /** Creates a temporary file in the location specified by the parameter
2353 path. If the path is NULL, then it will be created in tmpdir.
2354 @param[in] path location for creating temporary file
2355 @return temporary file descriptor, or < 0 on error */
2356 os_file_t
innobase_mysql_tmpfile(const char * path)2357 innobase_mysql_tmpfile(
2358 const char* path)
2359 {
2360 #ifdef WITH_INNODB_DISALLOW_WRITES
2361 os_event_wait(srv_allow_writes_event);
2362 #endif /* WITH_INNODB_DISALLOW_WRITES */
2363 File fd;
2364
2365 DBUG_EXECUTE_IF(
2366 "innobase_tmpfile_creation_failure",
2367 return(OS_FILE_CLOSED);
2368 );
2369
2370 if (path == NULL) {
2371 fd = mysql_tmpfile("ib");
2372 } else {
2373 fd = mysql_tmpfile_path(path, "ib");
2374 }
2375
2376 if (fd < 0)
2377 return OS_FILE_CLOSED;
2378
2379 /* Copy the file descriptor, so that the additional resources
2380 allocated by create_temp_file() can be freed by invoking
2381 my_close().
2382
2383 Because the file descriptor returned by this function
2384 will be passed to fdopen(), it will be closed by invoking
2385 fclose(), which in turn will invoke close() instead of
2386 my_close(). */
2387
2388 #ifdef _WIN32
2389 /* Note that on Windows, the integer returned by mysql_tmpfile
2390 has no relation to C runtime file descriptor. Here, we need
2391 to call my_get_osfhandle to get the HANDLE and then convert it
2392 to C runtime filedescriptor. */
2393
2394 HANDLE hFile = my_get_osfhandle(fd);
2395 HANDLE hDup;
2396 BOOL bOK = DuplicateHandle(
2397 GetCurrentProcess(),
2398 hFile, GetCurrentProcess(),
2399 &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS);
2400 my_close(fd, MYF(MY_WME));
2401
2402 if (!bOK) {
2403 my_osmaperr(GetLastError());
2404 goto error;
2405 }
2406 return hDup;
2407 #else
2408 #ifdef F_DUPFD_CLOEXEC
2409 int fd2 = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2410 #else
2411 int fd2 = dup(fd);
2412 #endif
2413 my_close(fd, MYF(MY_WME));
2414 if (fd2 < 0) {
2415 set_my_errno(errno);
2416 goto error;
2417 }
2418 return fd2;
2419 #endif
2420
2421 error:
2422 char errbuf[MYSYS_STRERROR_SIZE];
2423
2424 my_error(EE_OUT_OF_FILERESOURCES,
2425 MYF(0),
2426 "ib*", errno,
2427 my_strerror(errbuf, sizeof(errbuf), errno));
2428 return (OS_FILE_CLOSED);
2429 }
2430
2431 /*********************************************************************//**
2432 Wrapper around MySQL's copy_and_convert function.
2433 @return number of bytes copied to 'to' */
2434 static
2435 ulint
innobase_convert_string(void * to,ulint to_length,CHARSET_INFO * to_cs,const void * from,ulint from_length,CHARSET_INFO * from_cs,uint * errors)2436 innobase_convert_string(
2437 /*====================*/
2438 void* to, /*!< out: converted string */
2439 ulint to_length, /*!< in: number of bytes reserved
2440 for the converted string */
2441 CHARSET_INFO* to_cs, /*!< in: character set to convert to */
2442 const void* from, /*!< in: string to convert */
2443 ulint from_length, /*!< in: number of bytes to convert */
2444 CHARSET_INFO* from_cs, /*!< in: character set to convert
2445 from */
2446 uint* errors) /*!< out: number of errors encountered
2447 during the conversion */
2448 {
2449 return(copy_and_convert(
2450 (char*) to, (uint32) to_length, to_cs,
2451 (const char*) from, (uint32) from_length, from_cs,
2452 errors));
2453 }
2454
2455 /*******************************************************************//**
2456 Formats the raw data in "data" (in InnoDB on-disk format) that is of
2457 type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
2458 the result to "buf". The result is converted to "system_charset_info".
2459 Not more than "buf_size" bytes are written to "buf".
2460 The result is always NUL-terminated (provided buf_size > 0) and the
2461 number of bytes that were written to "buf" is returned (including the
2462 terminating NUL).
2463 @return number of bytes that were written */
2464 ulint
innobase_raw_format(const char * data,ulint data_len,ulint charset_coll,char * buf,ulint buf_size)2465 innobase_raw_format(
2466 /*================*/
2467 const char* data, /*!< in: raw data */
2468 ulint data_len, /*!< in: raw data length
2469 in bytes */
2470 ulint charset_coll, /*!< in: charset collation */
2471 char* buf, /*!< out: output buffer */
2472 ulint buf_size) /*!< in: output buffer size
2473 in bytes */
2474 {
2475 /* XXX we use a hard limit instead of allocating
2476 but_size bytes from the heap */
2477 CHARSET_INFO* data_cs;
2478 char buf_tmp[8192];
2479 ulint buf_tmp_used;
2480 uint num_errors;
2481
2482 data_cs = all_charsets[charset_coll];
2483
2484 buf_tmp_used = innobase_convert_string(buf_tmp, sizeof(buf_tmp),
2485 system_charset_info,
2486 data, data_len, data_cs,
2487 &num_errors);
2488
2489 return(ut_str_sql_format(buf_tmp, buf_tmp_used, buf, buf_size));
2490 }
2491
2492 /*
2493 The helper function nlz(x) calculates the number of leading zeros
2494 in the binary representation of the number "x", either using a
2495 built-in compiler function or a substitute trick based on the use
2496 of the multiplication operation and a table indexed by the prefix
2497 of the multiplication result:
2498 */
2499 #ifdef __GNUC__
2500 #define nlz(x) __builtin_clzll(x)
2501 #elif defined(_MSC_VER) && !defined(_M_CEE_PURE) && \
2502 (defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64))
2503 #ifndef __INTRIN_H_
2504 #pragma warning(push, 4)
2505 #pragma warning(disable: 4255 4668)
2506 #include <intrin.h>
2507 #pragma warning(pop)
2508 #endif
nlz(ulonglong x)2509 __forceinline unsigned int nlz (ulonglong x)
2510 {
2511 #if defined(_M_IX86) || defined(_M_X64)
2512 unsigned long n;
2513 #ifdef _M_X64
2514 _BitScanReverse64(&n, x);
2515 return (unsigned int) n ^ 63;
2516 #else
2517 unsigned long y = (unsigned long) (x >> 32);
2518 unsigned int m = 31;
2519 if (y == 0)
2520 {
2521 y = (unsigned long) x;
2522 m = 63;
2523 }
2524 _BitScanReverse(&n, y);
2525 return (unsigned int) n ^ m;
2526 #endif
2527 #elif defined(_M_ARM64)
2528 return _CountLeadingZeros(x);
2529 #endif
2530 }
2531 #else
nlz(ulonglong x)2532 inline unsigned int nlz (ulonglong x)
2533 {
2534 static unsigned char table [48] = {
2535 32, 6, 5, 0, 4, 12, 0, 20,
2536 15, 3, 11, 0, 0, 18, 25, 31,
2537 8, 14, 2, 0, 10, 0, 0, 0,
2538 0, 0, 0, 21, 0, 0, 19, 26,
2539 7, 0, 13, 0, 16, 1, 22, 27,
2540 9, 0, 17, 23, 28, 24, 29, 30
2541 };
2542 unsigned int y= (unsigned int) (x >> 32);
2543 unsigned int n= 0;
2544 if (y == 0) {
2545 y= (unsigned int) x;
2546 n= 32;
2547 }
2548 y = y | (y >> 1); // Propagate leftmost 1-bit to the right.
2549 y = y | (y >> 2);
2550 y = y | (y >> 4);
2551 y = y | (y >> 8);
2552 y = y & ~(y >> 16);
2553 y = y * 0x3EF5D037;
2554 return n + table[y >> 26];
2555 }
2556 #endif
2557
2558 /*********************************************************************//**
2559 Compute the next autoinc value.
2560
2561 For MySQL replication the autoincrement values can be partitioned among
2562 the nodes. The offset is the start or origin of the autoincrement value
2563 for a particular node. For n nodes the increment will be n and the offset
2564 will be in the interval [1, n]. The formula tries to allocate the next
2565 value for a particular node.
2566
2567 Note: This function is also called with increment set to the number of
2568 values we want to reserve for multi-value inserts e.g.,
2569
2570 INSERT INTO T VALUES(), (), ();
2571
2572 innobase_next_autoinc() will be called with increment set to 3 where
2573 autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
2574 the multi-value INSERT above.
2575 @return the next value */
2576 ulonglong
innobase_next_autoinc(ulonglong current,ulonglong need,ulonglong step,ulonglong offset,ulonglong max_value)2577 innobase_next_autoinc(
2578 /*==================*/
2579 ulonglong current, /*!< in: Current value */
2580 ulonglong need, /*!< in: count of values needed */
2581 ulonglong step, /*!< in: AUTOINC increment step */
2582 ulonglong offset, /*!< in: AUTOINC offset */
2583 ulonglong max_value) /*!< in: max value for type */
2584 {
2585 ulonglong next_value;
2586 ulonglong block;
2587
2588 /* Should never be 0. */
2589 ut_a(need > 0);
2590 ut_a(step > 0);
2591 ut_a(max_value > 0);
2592
2593 /*
2594 We need to calculate the "block" value equal to the product
2595 "step * need". However, when calculating this product, an integer
2596 overflow can occur, so we cannot simply use the usual multiplication
2597 operation. The snippet below calculates the product of two numbers
2598 and detects an unsigned integer overflow:
2599 */
2600 unsigned int m= nlz(need);
2601 unsigned int n= nlz(step);
2602 if (m + n <= 8 * sizeof(ulonglong) - 2) {
2603 // The bit width of the original values is too large,
2604 // therefore we are guaranteed to get an overflow.
2605 goto overflow;
2606 }
2607 block = need * (step >> 1);
2608 if ((longlong) block < 0) {
2609 goto overflow;
2610 }
2611 block += block;
2612 if (step & 1) {
2613 block += need;
2614 if (block < need) {
2615 goto overflow;
2616 }
2617 }
2618
2619 /* Check for overflow. Current can be > max_value if the value
2620 is in reality a negative value. Also, the visual studio compiler
2621 converts large double values (which hypothetically can then be
2622 passed here as the values of the "current" parameter) automatically
2623 into unsigned long long datatype maximum value: */
2624 if (current > max_value) {
2625 goto overflow;
2626 }
2627
2628 /* According to MySQL documentation, if the offset is greater than
2629 the step then the offset is ignored. */
2630 if (offset > step) {
2631 offset = 0;
2632 }
2633
2634 /*
2635 Let's round the current value to within a step-size block:
2636 */
2637 if (current > offset) {
2638 next_value = current - offset;
2639 } else {
2640 next_value = offset - current;
2641 }
2642 next_value -= next_value % step;
2643
2644 /*
2645 Add an offset to the next value and check that the addition
2646 does not cause an integer overflow:
2647 */
2648 next_value += offset;
2649 if (next_value < offset) {
2650 goto overflow;
2651 }
2652
2653 /*
2654 Add a block to the next value and check that the addition
2655 does not cause an integer overflow:
2656 */
2657 next_value += block;
2658 if (next_value < block) {
2659 goto overflow;
2660 }
2661
2662 return(next_value);
2663
2664 overflow:
2665 /*
2666 Allow auto_increment to go over max_value up to max ulonglong.
2667 This allows us to detect that all values are exhausted.
2668 If we don't do this, we will return max_value several times
2669 and get duplicate key errors instead of auto increment value
2670 out of range:
2671 */
2672 return(~(ulonglong) 0);
2673 }
2674
2675 /********************************************************************//**
2676 Reset the autoinc value in the table.
2677 @return DB_SUCCESS if all went well else error code */
2678 UNIV_INTERN
2679 dberr_t
innobase_reset_autoinc(ulonglong autoinc)2680 ha_innobase::innobase_reset_autoinc(
2681 /*================================*/
2682 ulonglong autoinc) /*!< in: value to store */
2683 {
2684 dberr_t error;
2685
2686 error = innobase_lock_autoinc();
2687
2688 if (error == DB_SUCCESS) {
2689
2690 dict_table_autoinc_initialize(m_prebuilt->table, autoinc);
2691
2692 dict_table_autoinc_unlock(m_prebuilt->table);
2693 }
2694
2695 return(error);
2696 }
2697
2698 /*******************************************************************//**
2699 Reset the auto-increment counter to the given value, i.e. the next row
2700 inserted will get the given value. This is called e.g. after TRUNCATE
2701 is emulated by doing a 'DELETE FROM t'. HA_ERR_WRONG_COMMAND is
2702 returned by storage engines that don't support this operation.
2703 @return 0 or error code */
2704 UNIV_INTERN
2705 int
reset_auto_increment(ulonglong value)2706 ha_innobase::reset_auto_increment(
2707 /*==============================*/
2708 ulonglong value) /*!< in: new value for table autoinc */
2709 {
2710 DBUG_ENTER("ha_innobase::reset_auto_increment");
2711
2712 dberr_t error;
2713
2714 update_thd(ha_thd());
2715
2716 error = row_lock_table_autoinc_for_mysql(m_prebuilt);
2717
2718 if (error != DB_SUCCESS) {
2719 DBUG_RETURN(convert_error_code_to_mysql(
2720 error, m_prebuilt->table->flags, m_user_thd));
2721 }
2722
2723 /* The next value can never be 0. */
2724 if (value == 0) {
2725 value = 1;
2726 }
2727
2728 innobase_reset_autoinc(value);
2729
2730 DBUG_RETURN(0);
2731 }
2732
2733 /*********************************************************************//**
2734 Initializes some fields in an InnoDB transaction object. */
2735 static
2736 void
innobase_trx_init(THD * thd,trx_t * trx)2737 innobase_trx_init(
2738 /*==============*/
2739 THD* thd, /*!< in: user thread handle */
2740 trx_t* trx) /*!< in/out: InnoDB transaction handle */
2741 {
2742 DBUG_ENTER("innobase_trx_init");
2743 DBUG_ASSERT(thd == trx->mysql_thd);
2744
2745 /* Ensure that thd_lock_wait_timeout(), which may be called
2746 while holding lock_sys.mutex, by lock_rec_enqueue_waiting(),
2747 will not end up acquiring LOCK_global_system_variables in
2748 intern_sys_var_ptr(). */
2749 THDVAR(thd, lock_wait_timeout);
2750
2751 trx->check_foreigns = !thd_test_options(
2752 thd, OPTION_NO_FOREIGN_KEY_CHECKS);
2753
2754 trx->check_unique_secondary = !thd_test_options(
2755 thd, OPTION_RELAXED_UNIQUE_CHECKS);
2756 #ifdef WITH_WSREP
2757 trx->wsrep = wsrep_on(thd);
2758 #endif
2759
2760 DBUG_VOID_RETURN;
2761 }
2762
2763 /*********************************************************************//**
2764 Allocates an InnoDB transaction for a MySQL handler object for DML.
2765 @return InnoDB transaction handle */
2766 trx_t*
innobase_trx_allocate(THD * thd)2767 innobase_trx_allocate(
2768 /*==================*/
2769 THD* thd) /*!< in: user thread handle */
2770 {
2771 trx_t* trx;
2772
2773 DBUG_ENTER("innobase_trx_allocate");
2774 DBUG_ASSERT(thd != NULL);
2775 DBUG_ASSERT(EQ_CURRENT_THD(thd));
2776
2777 trx = trx_create();
2778
2779 trx->mysql_thd = thd;
2780
2781 innobase_trx_init(thd, trx);
2782
2783 DBUG_RETURN(trx);
2784 }
2785
2786 /*********************************************************************//**
2787 Gets the InnoDB transaction handle for a MySQL handler object, creates
2788 an InnoDB transaction struct if the corresponding MySQL thread struct still
2789 lacks one.
2790 @return InnoDB transaction handle */
2791 static inline
2792 trx_t*
check_trx_exists(THD * thd)2793 check_trx_exists(
2794 /*=============*/
2795 THD* thd) /*!< in: user thread handle */
2796 {
2797 if (trx_t* trx = thd_to_trx(thd)) {
2798 ut_a(trx->magic_n == TRX_MAGIC_N);
2799 innobase_trx_init(thd, trx);
2800 return trx;
2801 } else {
2802 trx = innobase_trx_allocate(thd);
2803 thd_set_ha_data(thd, innodb_hton_ptr, trx);
2804 return trx;
2805 }
2806 }
2807
2808 /**
2809 Gets current trx.
2810
2811 This function may be called during InnoDB initialisation, when
2812 innodb_hton_ptr->slot is not yet set to meaningful value.
2813 */
2814
current_trx()2815 trx_t *current_trx()
2816 {
2817 THD *thd=current_thd;
2818 if (likely(thd != 0) && innodb_hton_ptr->slot != HA_SLOT_UNDEF) {
2819 return thd_to_trx(thd);
2820 } else {
2821 return(NULL);
2822 }
2823 }
2824
2825 /*********************************************************************//**
2826 Note that a transaction has been registered with MySQL.
2827 @return true if transaction is registered with MySQL 2PC coordinator */
2828 static inline
2829 bool
trx_is_registered_for_2pc(const trx_t * trx)2830 trx_is_registered_for_2pc(
2831 /*======================*/
2832 const trx_t* trx) /* in: transaction */
2833 {
2834 return(trx->is_registered == 1);
2835 }
2836
2837 /*********************************************************************//**
2838 Note that a transaction has been registered with MySQL 2PC coordinator. */
2839 static inline
2840 void
trx_register_for_2pc(trx_t * trx)2841 trx_register_for_2pc(
2842 /*==================*/
2843 trx_t* trx) /* in: transaction */
2844 {
2845 trx->is_registered = 1;
2846 ut_ad(!trx->active_commit_ordered);
2847 }
2848
2849 /*********************************************************************//**
2850 Note that a transaction has been deregistered. */
2851 static inline
2852 void
trx_deregister_from_2pc(trx_t * trx)2853 trx_deregister_from_2pc(
2854 /*====================*/
2855 trx_t* trx) /* in: transaction */
2856 {
2857 trx->is_registered= false;
2858 trx->active_commit_ordered= false;
2859 }
2860
2861 /*********************************************************************//**
2862 Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object.
2863 Those flags are stored in .frm file and end up in the MySQL table object,
2864 but are frequently used inside InnoDB so we keep their copies into the
2865 InnoDB table object. */
2866 static
2867 void
innobase_copy_frm_flags_from_create_info(dict_table_t * innodb_table,const HA_CREATE_INFO * create_info)2868 innobase_copy_frm_flags_from_create_info(
2869 /*=====================================*/
2870 dict_table_t* innodb_table, /*!< in/out: InnoDB table */
2871 const HA_CREATE_INFO* create_info) /*!< in: create info */
2872 {
2873 ibool ps_on;
2874 ibool ps_off;
2875
2876 if (innodb_table->is_temporary()
2877 || innodb_table->no_rollback()) {
2878 /* Temp tables do not use persistent stats. */
2879 ps_on = FALSE;
2880 ps_off = TRUE;
2881 } else {
2882 ps_on = create_info->table_options
2883 & HA_OPTION_STATS_PERSISTENT;
2884 ps_off = create_info->table_options
2885 & HA_OPTION_NO_STATS_PERSISTENT;
2886 }
2887
2888 dict_stats_set_persistent(innodb_table, ps_on, ps_off);
2889
2890 dict_stats_auto_recalc_set(
2891 innodb_table,
2892 create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON,
2893 create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF);
2894
2895 innodb_table->stats_sample_pages = create_info->stats_sample_pages;
2896 }
2897
2898 /*********************************************************************//**
2899 Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
2900 Those flags are stored in .frm file and end up in the MySQL table object,
2901 but are frequently used inside InnoDB so we keep their copies into the
2902 InnoDB table object. */
2903 void
innobase_copy_frm_flags_from_table_share(dict_table_t * innodb_table,const TABLE_SHARE * table_share)2904 innobase_copy_frm_flags_from_table_share(
2905 /*=====================================*/
2906 dict_table_t* innodb_table, /*!< in/out: InnoDB table */
2907 const TABLE_SHARE* table_share) /*!< in: table share */
2908 {
2909 ibool ps_on;
2910 ibool ps_off;
2911
2912 if (innodb_table->is_temporary()) {
2913 /* Temp tables do not use persistent stats */
2914 ps_on = FALSE;
2915 ps_off = TRUE;
2916 } else {
2917 ps_on = table_share->db_create_options
2918 & HA_OPTION_STATS_PERSISTENT;
2919 ps_off = table_share->db_create_options
2920 & HA_OPTION_NO_STATS_PERSISTENT;
2921 }
2922
2923 dict_stats_set_persistent(innodb_table, ps_on, ps_off);
2924
2925 dict_stats_auto_recalc_set(
2926 innodb_table,
2927 table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON,
2928 table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF);
2929
2930 innodb_table->stats_sample_pages = table_share->stats_sample_pages;
2931 }
2932
2933 /*********************************************************************//**
2934 Construct ha_innobase handler. */
2935
ha_innobase(handlerton * hton,TABLE_SHARE * table_arg)2936 ha_innobase::ha_innobase(
2937 /*=====================*/
2938 handlerton* hton,
2939 TABLE_SHARE* table_arg)
2940 :handler(hton, table_arg),
2941 m_prebuilt(),
2942 m_user_thd(),
2943 m_int_table_flags(HA_REC_NOT_IN_SEQ
2944 | HA_NULL_IN_KEY
2945 | HA_CAN_VIRTUAL_COLUMNS
2946 | HA_CAN_INDEX_BLOBS
2947 | HA_CAN_SQL_HANDLER
2948 | HA_REQUIRES_KEY_COLUMNS_FOR_DELETE
2949 | HA_PRIMARY_KEY_REQUIRED_FOR_POSITION
2950 | HA_PRIMARY_KEY_IN_READ_INDEX
2951 | HA_BINLOG_ROW_CAPABLE
2952 | HA_CAN_GEOMETRY
2953 | HA_PARTIAL_COLUMN_READ
2954 | HA_TABLE_SCAN_ON_INDEX
2955 | HA_CAN_FULLTEXT
2956 | HA_CAN_FULLTEXT_EXT
2957 /* JAN: TODO: MySQL 5.7
2958 | HA_CAN_FULLTEXT_HINTS
2959 */
2960 | HA_CAN_EXPORT
2961 | HA_CAN_RTREEKEYS
2962 | HA_CAN_TABLES_WITHOUT_ROLLBACK
2963 | HA_CONCURRENT_OPTIMIZE
2964 | (srv_force_primary_key ? HA_WANTS_PRIMARY_KEY : 0)
2965 ),
2966 m_start_of_scan(),
2967 m_mysql_has_locked()
2968 {}
2969
2970 /*********************************************************************//**
2971 Destruct ha_innobase handler. */
2972
~ha_innobase()2973 ha_innobase::~ha_innobase()
2974 /*======================*/
2975 {
2976 }
2977
2978 /*********************************************************************//**
2979 Updates the user_thd field in a handle and also allocates a new InnoDB
2980 transaction handle if needed, and updates the transaction fields in the
2981 m_prebuilt struct. */
2982 void
update_thd(THD * thd)2983 ha_innobase::update_thd(
2984 /*====================*/
2985 THD* thd) /*!< in: thd to use the handle */
2986 {
2987 DBUG_ENTER("ha_innobase::update_thd");
2988 DBUG_PRINT("ha_innobase::update_thd", ("user_thd: %p -> %p",
2989 m_user_thd, thd));
2990
2991 /* The table should have been opened in ha_innobase::open(). */
2992 DBUG_ASSERT(m_prebuilt->table->get_ref_count() > 0);
2993
2994 trx_t* trx = check_trx_exists(thd);
2995
2996 ut_ad(trx->dict_operation_lock_mode == 0);
2997 ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
2998
2999 if (m_prebuilt->trx != trx) {
3000
3001 row_update_prebuilt_trx(m_prebuilt, trx);
3002 }
3003
3004 m_user_thd = thd;
3005
3006 DBUG_ASSERT(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
3007 DBUG_ASSERT(m_prebuilt->trx == thd_to_trx(m_user_thd));
3008
3009 DBUG_VOID_RETURN;
3010 }
3011
3012 /*********************************************************************//**
3013 Updates the user_thd field in a handle and also allocates a new InnoDB
3014 transaction handle if needed, and updates the transaction fields in the
3015 m_prebuilt struct. */
3016
3017 void
update_thd()3018 ha_innobase::update_thd()
3019 /*=====================*/
3020 {
3021 THD* thd = ha_thd();
3022
3023 ut_ad(EQ_CURRENT_THD(thd));
3024 update_thd(thd);
3025 }
3026
3027 /*********************************************************************//**
3028 Registers an InnoDB transaction with the MySQL 2PC coordinator, so that
3029 the MySQL XA code knows to call the InnoDB prepare and commit, or rollback
3030 for the transaction. This MUST be called for every transaction for which
3031 the user may call commit or rollback. Calling this several times to register
3032 the same transaction is allowed, too. This function also registers the
3033 current SQL statement. */
3034 static inline
3035 void
innobase_register_trx(handlerton * hton,THD * thd,trx_t * trx)3036 innobase_register_trx(
3037 /*==================*/
3038 handlerton* hton, /* in: Innobase handlerton */
3039 THD* thd, /* in: MySQL thd (connection) object */
3040 trx_t* trx) /* in: transaction to register */
3041 {
3042 /* JAN: TODO: MySQL 5.7 PSI
3043 const ulonglong trx_id = static_cast<const ulonglong>(
3044 trx_get_id_for_print(trx));
3045
3046 trans_register_ha(thd, FALSE, hton, &trx_id);
3047 */
3048 trans_register_ha(thd, FALSE, hton);
3049
3050 if (!trx_is_registered_for_2pc(trx)
3051 && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
3052
3053 //trans_register_ha(thd, TRUE, hton, &trx_id);
3054 trans_register_ha(thd, TRUE, hton);
3055 }
3056
3057 trx_register_for_2pc(trx);
3058 }
3059
3060 /* BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
3061 ------------------------------------------------------------
3062
3063 1) The use of the query cache for TBL is disabled when there is an
3064 uncommitted change to TBL.
3065
3066 2) When a change to TBL commits, InnoDB stores the current value of
3067 its global trx id counter, let us denote it by INV_TRX_ID, to the table object
3068 in the InnoDB data dictionary, and does only allow such transactions whose
3069 id <= INV_TRX_ID to use the query cache.
3070
3071 3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
3072 modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
3073 of TBL immediately.
3074
3075 How this is implemented inside InnoDB:
3076
3077 1) Since every modification always sets an IX type table lock on the InnoDB
3078 table, it is easy to check if there can be uncommitted modifications for a
3079 table: just check if there are locks in the lock list of the table.
3080
3081 2) When a transaction inside InnoDB commits, it reads the global trx id
3082 counter and stores the value INV_TRX_ID to the tables on which it had a lock.
3083
3084 3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
3085 InnoDB calls an invalidate method for the MySQL query cache for that table.
3086
3087 How this is implemented inside sql_cache.cc:
3088
3089 1) The query cache for an InnoDB table TBL is invalidated immediately at an
3090 INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
3091 invalidation to the transaction commit.
3092
3093 2) To store or retrieve a value from the query cache of an InnoDB table TBL,
3094 any query must first ask InnoDB's permission. We must pass the thd as a
3095 parameter because InnoDB will look at the trx id, if any, associated with
3096 that thd. Also the full_name which is used as key to search for the table
3097 object. The full_name is a string containing the normalized path to the
3098 table in the canonical format.
3099
3100 3) Use of the query cache for InnoDB tables is now allowed also when
3101 AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
3102 put restrictions on the use of the query cache.
3103 */
3104
3105 /** Check if mysql can allow the transaction to read from/store to
3106 the query cache.
3107 @param[in] table table object
3108 @param[in] trx transaction object
3109 @return whether the storing or retrieving from the query cache is permitted */
innobase_query_caching_table_check_low(const dict_table_t * table,trx_t * trx)3110 static bool innobase_query_caching_table_check_low(
3111 const dict_table_t* table,
3112 trx_t* trx)
3113 {
3114 /* The following conditions will decide the query cache
3115 retrieval or storing into:
3116
3117 (1) There should not be any locks on the table.
3118 (2) Someother trx shouldn't invalidate the cache before this
3119 transaction started.
3120 (3) Read view shouldn't exist. If exists then the view
3121 low_limit_id should be greater than or equal to the transaction that
3122 invalidates the cache for the particular table.
3123
3124 For read-only transaction: should satisfy (1) and (3)
3125 For read-write transaction: should satisfy (1), (2), (3) */
3126
3127 if (lock_table_get_n_locks(table)) {
3128 return false;
3129 }
3130
3131 if (trx->id && trx->id < table->query_cache_inv_trx_id) {
3132 return false;
3133 }
3134
3135 return !trx->read_view.is_open()
3136 || trx->read_view.low_limit_id()
3137 >= table->query_cache_inv_trx_id;
3138 }
3139
3140 /** Checks if MySQL at the moment is allowed for this table to retrieve a
3141 consistent read result, or store it to the query cache.
3142 @param[in,out] trx transaction
3143 @param[in] norm_name concatenation of database name,
3144 '/' char, table name
3145 @return whether storing or retrieving from the query cache is permitted */
innobase_query_caching_table_check(trx_t * trx,const char * norm_name)3146 static bool innobase_query_caching_table_check(
3147 trx_t* trx,
3148 const char* norm_name)
3149 {
3150 dict_table_t* table = dict_table_open_on_name(
3151 norm_name, FALSE, FALSE, DICT_ERR_IGNORE_FK_NOKEY);
3152
3153 if (table == NULL) {
3154 return false;
3155 }
3156
3157 /* Start the transaction if it is not started yet */
3158 trx_start_if_not_started(trx, false);
3159
3160 bool allow = innobase_query_caching_table_check_low(table, trx);
3161
3162 dict_table_close(table, FALSE, FALSE);
3163
3164 if (allow) {
3165 /* If the isolation level is high, assign a read view for the
3166 transaction if it does not yet have one */
3167
3168 if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
3169 && !srv_read_only_mode
3170 && !trx->read_view.is_open()) {
3171
3172 /* Start the transaction if it is not started yet */
3173 trx_start_if_not_started(trx, false);
3174
3175 trx->read_view.open(trx);
3176 }
3177 }
3178
3179 return allow;
3180 }
3181
3182 /******************************************************************//**
3183 The MySQL query cache uses this to check from InnoDB if the query cache at
3184 the moment is allowed to operate on an InnoDB table. The SQL query must
3185 be a non-locking SELECT.
3186
3187 The query cache is allowed to operate on certain query only if this function
3188 returns TRUE for all tables in the query.
3189
3190 If thd is not in the autocommit state, this function also starts a new
3191 transaction for thd if there is no active trx yet, and assigns a consistent
3192 read view to it if there is no read view yet.
3193
3194 Why a deadlock of threads is not possible: the query cache calls this function
3195 at the start of a SELECT processing. Then the calling thread cannot be
3196 holding any InnoDB semaphores. The calling thread is holding the
3197 query cache mutex, and this function will reserve the InnoDB trx_sys.mutex.
3198 Thus, the 'rank' in sync0mutex.h of the MySQL query cache mutex is above
3199 the InnoDB trx_sys.mutex.
3200 @return TRUE if permitted, FALSE if not; note that the value FALSE
3201 does not mean we should invalidate the query cache: invalidation is
3202 called explicitly */
3203 static
3204 my_bool
innobase_query_caching_of_table_permitted(THD * thd,const char * full_name,uint full_name_len,ulonglong *)3205 innobase_query_caching_of_table_permitted(
3206 /*======================================*/
3207 THD* thd, /*!< in: thd of the user who is trying to
3208 store a result to the query cache or
3209 retrieve it */
3210 const char* full_name, /*!< in: normalized path to the table */
3211 uint full_name_len, /*!< in: length of the normalized path
3212 to the table */
3213 ulonglong *)
3214 {
3215 char norm_name[1000];
3216 trx_t* trx = check_trx_exists(thd);
3217
3218 ut_a(full_name_len < 999);
3219
3220 if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
3221 /* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
3222 plain SELECT if AUTOCOMMIT is not on. */
3223
3224 return(false);
3225 }
3226
3227 innobase_srv_conc_force_exit_innodb(trx);
3228
3229 if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)
3230 && trx->n_mysql_tables_in_use == 0) {
3231 /* We are going to retrieve the query result from the query
3232 cache. This cannot be a store operation to the query cache
3233 because then MySQL would have locks on tables already.
3234
3235 TODO: if the user has used LOCK TABLES to lock the table,
3236 then we open a transaction in the call of row_.. below.
3237 That trx can stay open until UNLOCK TABLES. The same problem
3238 exists even if we do not use the query cache. MySQL should be
3239 modified so that it ALWAYS calls some cleanup function when
3240 the processing of a query ends!
3241
3242 We can imagine we instantaneously serialize this consistent
3243 read trx to the current trx id counter. If trx2 would have
3244 changed the tables of a query result stored in the cache, and
3245 trx2 would have already committed, making the result obsolete,
3246 then trx2 would have already invalidated the cache. Thus we
3247 can trust the result in the cache is ok for this query. */
3248
3249 return(true);
3250 }
3251
3252 /* Normalize the table name to InnoDB format */
3253 normalize_table_name(norm_name, full_name);
3254
3255 innobase_register_trx(innodb_hton_ptr, thd, trx);
3256
3257 return innobase_query_caching_table_check(trx, norm_name);
3258 }
3259
3260 /*****************************************************************//**
3261 Invalidates the MySQL query cache for the table. */
3262 void
innobase_invalidate_query_cache(trx_t * trx,const char * full_name)3263 innobase_invalidate_query_cache(
3264 /*============================*/
3265 trx_t* trx, /*!< in: transaction which
3266 modifies the table */
3267 const char* full_name) /*!< in: concatenation of
3268 database name, path separator,
3269 table name, null char NUL;
3270 NOTE that in Windows this is
3271 always in LOWER CASE! */
3272 {
3273 /* Note that the sync0mutex.h rank of the query cache mutex is just
3274 above the InnoDB trx_sys_t->lock. The caller of this function must
3275 not have latches of a lower rank. */
3276
3277 #ifdef HAVE_QUERY_CACHE
3278 char qcache_key_name[2 * (NAME_LEN + 1)];
3279 char db_name[NAME_CHAR_LEN * MY_CS_MBMAXLEN + 1];
3280 const char *key_ptr;
3281 size_t tabname_len;
3282
3283 // Extract the database name.
3284 key_ptr= strchr(full_name, '/');
3285 DBUG_ASSERT(key_ptr != NULL); // Database name should be present
3286 size_t dbname_len= size_t(key_ptr - full_name);
3287 memcpy(db_name, full_name, dbname_len);
3288 db_name[dbname_len]= '\0';
3289
3290 /* Construct the key("db-name\0table$name\0") for the query cache using
3291 the path name("db@002dname\0table@0024name\0") of the table in its
3292 canonical form. */
3293 dbname_len = filename_to_tablename(db_name, qcache_key_name,
3294 sizeof(qcache_key_name));
3295 tabname_len = filename_to_tablename(++key_ptr,
3296 (qcache_key_name + dbname_len + 1),
3297 sizeof(qcache_key_name) -
3298 dbname_len - 1);
3299
3300 /* Argument TRUE below means we are using transactions */
3301 mysql_query_cache_invalidate4(trx->mysql_thd,
3302 qcache_key_name,
3303 uint(dbname_len + tabname_len + 2),
3304 TRUE);
3305 #endif
3306 }
3307
3308 /** Quote a standard SQL identifier like index or column name.
3309 @param[in] file output stream
3310 @param[in] trx InnoDB transaction, or NULL
3311 @param[in] id identifier to quote */
3312 void
innobase_quote_identifier(FILE * file,trx_t * trx,const char * id)3313 innobase_quote_identifier(
3314 FILE* file,
3315 trx_t* trx,
3316 const char* id)
3317 {
3318 const int q = trx != NULL && trx->mysql_thd != NULL
3319 ? get_quote_char_for_identifier(trx->mysql_thd, id, strlen(id))
3320 : '`';
3321
3322 if (q == EOF) {
3323 fputs(id, file);
3324 } else {
3325 putc(q, file);
3326
3327 while (int c = *id++) {
3328 if (c == q) {
3329 putc(c, file);
3330 }
3331 putc(c, file);
3332 }
3333
3334 putc(q, file);
3335 }
3336 }
3337
3338 /** Quote a standard SQL identifier like tablespace, index or column name.
3339 @param[in] trx InnoDB transaction, or NULL
3340 @param[in] id identifier to quote
3341 @return quoted identifier */
3342 std::string
innobase_quote_identifier(trx_t * trx,const char * id)3343 innobase_quote_identifier(
3344 /*======================*/
3345 trx_t* trx,
3346 const char* id)
3347 {
3348 std::string quoted_identifier;
3349 const int q = trx != NULL && trx->mysql_thd != NULL
3350 ? get_quote_char_for_identifier(trx->mysql_thd, id, strlen(id))
3351 : '`';
3352
3353 if (q == EOF) {
3354 quoted_identifier.append(id);
3355 } else {
3356 quoted_identifier += char(q);
3357 quoted_identifier.append(id);
3358 quoted_identifier += char(q);
3359 }
3360
3361 return (quoted_identifier);
3362 }
3363
3364 /** Convert a table name to the MySQL system_charset_info (UTF-8)
3365 and quote it.
3366 @param[out] buf buffer for converted identifier
3367 @param[in] buflen length of buf, in bytes
3368 @param[in] id identifier to convert
3369 @param[in] idlen length of id, in bytes
3370 @param[in] thd MySQL connection thread, or NULL
3371 @return pointer to the end of buf */
3372 static
3373 char*
innobase_convert_identifier(char * buf,ulint buflen,const char * id,ulint idlen,THD * thd)3374 innobase_convert_identifier(
3375 char* buf,
3376 ulint buflen,
3377 const char* id,
3378 ulint idlen,
3379 THD* thd)
3380 {
3381 const char* s = id;
3382
3383 char nz[MAX_TABLE_NAME_LEN + 1];
3384 char nz2[MAX_TABLE_NAME_LEN + 1];
3385
3386 /* Decode the table name. The MySQL function expects
3387 a NUL-terminated string. The input and output strings
3388 buffers must not be shared. */
3389 ut_a(idlen <= MAX_TABLE_NAME_LEN);
3390 memcpy(nz, id, idlen);
3391 nz[idlen] = 0;
3392
3393 s = nz2;
3394 idlen = explain_filename(thd, nz, nz2, sizeof nz2,
3395 EXPLAIN_PARTITIONS_AS_COMMENT);
3396 if (idlen > buflen) {
3397 idlen = buflen;
3398 }
3399 memcpy(buf, s, idlen);
3400 return(buf + idlen);
3401 }
3402
3403 /*****************************************************************//**
3404 Convert a table name to the MySQL system_charset_info (UTF-8).
3405 @return pointer to the end of buf */
3406 char*
innobase_convert_name(char * buf,ulint buflen,const char * id,ulint idlen,THD * thd)3407 innobase_convert_name(
3408 /*==================*/
3409 char* buf, /*!< out: buffer for converted identifier */
3410 ulint buflen, /*!< in: length of buf, in bytes */
3411 const char* id, /*!< in: table name to convert */
3412 ulint idlen, /*!< in: length of id, in bytes */
3413 THD* thd) /*!< in: MySQL connection thread, or NULL */
3414 {
3415 char* s = buf;
3416 const char* bufend = buf + buflen;
3417
3418 const char* slash = (const char*) memchr(id, '/', idlen);
3419
3420 if (slash == NULL) {
3421 return(innobase_convert_identifier(
3422 buf, buflen, id, idlen, thd));
3423 }
3424
3425 /* Print the database name and table name separately. */
3426 s = innobase_convert_identifier(s, ulint(bufend - s),
3427 id, ulint(slash - id), thd);
3428 if (s < bufend) {
3429 *s++ = '.';
3430 s = innobase_convert_identifier(s, ulint(bufend - s),
3431 slash + 1, idlen
3432 - ulint(slash - id) - 1,
3433 thd);
3434 }
3435
3436 return(s);
3437 }
3438
3439 /*****************************************************************//**
3440 A wrapper function of innobase_convert_name(), convert a table name
3441 to the MySQL system_charset_info (UTF-8) and quote it if needed.
3442 @return pointer to the end of buf */
3443 void
innobase_format_name(char * buf,ulint buflen,const char * name)3444 innobase_format_name(
3445 /*==================*/
3446 char* buf, /*!< out: buffer for converted identifier */
3447 ulint buflen, /*!< in: length of buf, in bytes */
3448 const char* name) /*!< in: table name to format */
3449 {
3450 const char* bufend;
3451
3452 bufend = innobase_convert_name(buf, buflen, name, strlen(name), NULL);
3453
3454 ut_ad((ulint) (bufend - buf) < buflen);
3455
3456 buf[bufend - buf] = '\0';
3457 }
3458
3459 /**********************************************************************//**
3460 Determines if the currently running transaction has been interrupted.
3461 @return true if interrupted */
3462 bool
trx_is_interrupted(const trx_t * trx)3463 trx_is_interrupted(
3464 /*===============*/
3465 const trx_t* trx) /*!< in: transaction */
3466 {
3467 return(trx && trx->mysql_thd && thd_kill_level(trx->mysql_thd));
3468 }
3469
3470 /**************************************************************//**
3471 Resets some fields of a m_prebuilt struct. The template is used in fast
3472 retrieval of just those column values MySQL needs in its processing. */
3473 void
reset_template(void)3474 ha_innobase::reset_template(void)
3475 /*=============================*/
3476 {
3477 ut_ad(m_prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
3478 ut_ad(m_prebuilt->magic_n2 == m_prebuilt->magic_n);
3479
3480 /* Force table to be freed in close_thread_table(). */
3481 DBUG_EXECUTE_IF("free_table_in_fts_query",
3482 if (m_prebuilt->in_fts_query) {
3483 table->mark_table_for_reopen();
3484 }
3485 );
3486
3487 m_prebuilt->keep_other_fields_on_keyread = false;
3488 m_prebuilt->read_just_key = 0;
3489 m_prebuilt->in_fts_query = 0;
3490
3491 /* Reset index condition pushdown state. */
3492 if (m_prebuilt->idx_cond) {
3493 m_prebuilt->idx_cond = NULL;
3494 m_prebuilt->idx_cond_n_cols = 0;
3495 /* Invalidate m_prebuilt->mysql_template
3496 in ha_innobase::write_row(). */
3497 m_prebuilt->template_type = ROW_MYSQL_NO_TEMPLATE;
3498 }
3499 }
3500
3501 /*****************************************************************//**
3502 Call this when you have opened a new table handle in HANDLER, before you
3503 call index_read_map() etc. Actually, we can let the cursor stay open even
3504 over a transaction commit! Then you should call this before every operation,
3505 fetch next etc. This function inits the necessary things even after a
3506 transaction commit. */
3507
3508 void
init_table_handle_for_HANDLER(void)3509 ha_innobase::init_table_handle_for_HANDLER(void)
3510 /*============================================*/
3511 {
3512 /* If current thd does not yet have a trx struct, create one.
3513 If the current handle does not yet have a m_prebuilt struct, create
3514 one. Update the trx pointers in the m_prebuilt struct. Normally
3515 this operation is done in external_lock. */
3516
3517 update_thd(ha_thd());
3518
3519 /* Initialize the m_prebuilt struct much like it would be inited in
3520 external_lock */
3521
3522 innobase_srv_conc_force_exit_innodb(m_prebuilt->trx);
3523
3524 /* If the transaction is not started yet, start it */
3525
3526 trx_start_if_not_started_xa(m_prebuilt->trx, false);
3527
3528 /* Assign a read view if the transaction does not have it yet */
3529
3530 m_prebuilt->trx->read_view.open(m_prebuilt->trx);
3531
3532 innobase_register_trx(ht, m_user_thd, m_prebuilt->trx);
3533
3534 /* We did the necessary inits in this function, no need to repeat them
3535 in row_search_for_mysql */
3536
3537 m_prebuilt->sql_stat_start = FALSE;
3538
3539 /* We let HANDLER always to do the reads as consistent reads, even
3540 if the trx isolation level would have been specified as SERIALIZABLE */
3541
3542 m_prebuilt->select_lock_type = LOCK_NONE;
3543 m_prebuilt->stored_select_lock_type = LOCK_NONE;
3544
3545 /* Always fetch all columns in the index record */
3546
3547 m_prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
3548
3549 /* We want always to fetch all columns in the whole row? Or do
3550 we???? */
3551
3552 m_prebuilt->used_in_HANDLER = TRUE;
3553
3554 reset_template();
3555 }
3556
3557 /*********************************************************************//**
3558 Free any resources that were allocated and return failure.
3559 @return always return 1 */
innodb_init_abort()3560 static int innodb_init_abort()
3561 {
3562 DBUG_ENTER("innodb_init_abort");
3563
3564 if (fil_system.temp_space) {
3565 fil_system.temp_space->close();
3566 }
3567
3568 srv_sys_space.shutdown();
3569 if (srv_tmp_space.get_sanity_check_status()) {
3570 srv_tmp_space.delete_files();
3571 }
3572 srv_tmp_space.shutdown();
3573
3574 #ifdef WITH_INNODB_DISALLOW_WRITES
3575 os_event_destroy(srv_allow_writes_event);
3576 #endif /* WITH_INNODB_DISALLOW_WRITES */
3577 DBUG_RETURN(1);
3578 }
3579
3580 /** Return the minimum buffer pool size based on page size */
min_buffer_pool_size()3581 static inline ulint min_buffer_pool_size()
3582 {
3583 ulint s= (BUF_LRU_MIN_LEN + BUF_LRU_MIN_LEN / 4) * srv_page_size;
3584 /* buf_pool_chunk_size minimum is 1M, so round up to a multiple */
3585 ulint alignment= 1U << 20;
3586 return UT_CALC_ALIGN(s, alignment);
3587 }
3588
3589 /** Validate the requested buffer pool size. Also, reserve the necessary
3590 memory needed for buffer pool resize.
3591 @param[in] thd thread handle
3592 @param[in] var pointer to system variable
3593 @param[out] save immediate result for update function
3594 @param[in] value incoming string
3595 @return 0 on success, 1 on failure.
3596 */
3597 static
3598 int
3599 innodb_buffer_pool_size_validate(
3600 THD* thd,
3601 struct st_mysql_sys_var* var,
3602 void* save,
3603 struct st_mysql_value* value);
3604
3605 /** Update the system variable innodb_buffer_pool_size using the "saved"
3606 value. This function is registered as a callback with MySQL.
3607 @param[in] thd thread handle
3608 @param[in] var pointer to system variable
3609 @param[out] var_ptr where the formal string goes
3610 @param[in] save immediate result from check function */
3611 static
3612 void
3613 innodb_buffer_pool_size_update(
3614 THD* thd,
3615 struct st_mysql_sys_var* var,
3616 void* var_ptr,
3617 const void* save);
3618
3619 /* If the default value of innodb_buffer_pool_size is increased to be more than
3620 BUF_POOL_SIZE_THRESHOLD (srv/srv0start.cc), then srv_buf_pool_instances_default
3621 can be removed and 8 used instead. The problem with the current setup is that
3622 with 128MiB default buffer pool size and 8 instances by default we would emit
3623 a warning when no options are specified. */
3624 static MYSQL_SYSVAR_ULONGLONG(buffer_pool_size, innobase_buffer_pool_size,
3625 PLUGIN_VAR_RQCMDARG,
3626 "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
3627 innodb_buffer_pool_size_validate,
3628 innodb_buffer_pool_size_update,
3629 128ULL << 20,
3630 2ULL << 20,
3631 LLONG_MAX, 1024*1024L);
3632
3633 /** Deprecation message about innodb_idle_flush_pct */
3634 static const char* deprecated_idle_flush_pct
3635 = "innodb_idle_flush_pct is DEPRECATED and has no effect.";
3636
3637 static const char* deprecated_innodb_checksum_algorithm
3638 = "Setting innodb_checksum_algorithm to values other than"
3639 " crc32 or strict_crc32 is UNSAFE and DEPRECATED."
3640 " These deprecated values will be disallowed in MariaDB 10.6.";
3641
3642 static ulong innodb_idle_flush_pct;
3643
3644 /** If applicable, emit a message that log checksums cannot be disabled.
3645 @param[in,out] thd client session, or NULL if at startup
3646 @param[in] check whether redo log block checksums are enabled
3647 @return whether redo log block checksums are enabled */
3648 static inline
3649 bool
innodb_log_checksums_func_update(THD * thd,bool check)3650 innodb_log_checksums_func_update(THD* thd, bool check)
3651 {
3652 static const char msg[] = "innodb_log_checksums is deprecated"
3653 " and has no effect outside recovery";
3654
3655 ut_ad(!thd == !srv_was_started);
3656
3657 if (!check) {
3658 if (thd) {
3659 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
3660 HA_ERR_UNSUPPORTED, msg);
3661 check = true;
3662 } else {
3663 sql_print_warning(msg);
3664 }
3665 }
3666
3667 return(check);
3668 }
3669
innodb_checksum_algorithm_update(THD * thd,st_mysql_sys_var *,void *,const void * save)3670 static void innodb_checksum_algorithm_update(THD *thd, st_mysql_sys_var*,
3671 void *, const void *save)
3672 {
3673 srv_checksum_algorithm= *static_cast<const ulong*>(save);
3674 switch (srv_checksum_algorithm) {
3675 case SRV_CHECKSUM_ALGORITHM_CRC32:
3676 case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
3677 break;
3678 default:
3679 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
3680 HA_ERR_UNSUPPORTED,
3681 deprecated_innodb_checksum_algorithm);
3682 }
3683 }
3684
3685 /****************************************************************//**
3686 Gives the file extension of an InnoDB single-table tablespace. */
3687 static const char* ha_innobase_exts[] = {
3688 dot_ext[IBD],
3689 dot_ext[ISL],
3690 NullS
3691 };
3692
3693 /** Determine if system-versioned data was modified by the transaction.
3694 @param[in,out] thd current session
3695 @param[out] trx_id transaction start ID
3696 @return transaction commit ID
3697 @retval 0 if no system-versioned data was affected by the transaction */
innodb_prepare_commit_versioned(THD * thd,ulonglong * trx_id)3698 static ulonglong innodb_prepare_commit_versioned(THD* thd, ulonglong *trx_id)
3699 {
3700 if (const trx_t* trx = thd_to_trx(thd)) {
3701 *trx_id = trx->id;
3702
3703 for (trx_mod_tables_t::const_iterator t
3704 = trx->mod_tables.begin();
3705 t != trx->mod_tables.end(); t++) {
3706 if (t->second.is_versioned()) {
3707 DBUG_ASSERT(t->first->versioned_by_id());
3708 DBUG_ASSERT(trx->rsegs.m_redo.rseg);
3709
3710 return trx_sys.get_new_trx_id();
3711 }
3712 }
3713
3714 return 0;
3715 }
3716
3717 *trx_id = 0;
3718 return 0;
3719 }
3720
3721 /** Initialize and normalize innodb_buffer_pool_size. */
innodb_buffer_pool_size_init()3722 static void innodb_buffer_pool_size_init()
3723 {
3724 if (srv_buf_pool_size >= BUF_POOL_SIZE_THRESHOLD) {
3725
3726 if (srv_buf_pool_instances == srv_buf_pool_instances_default) {
3727 #if defined(_WIN32) && !defined(_WIN64)
3728 /* Do not allocate too large of a buffer pool on
3729 Windows 32-bit systems, which can have trouble
3730 allocating larger single contiguous memory blocks. */
3731 srv_buf_pool_size = ulint(
3732 ut_uint64_align_up(srv_buf_pool_size,
3733 srv_buf_pool_chunk_unit));
3734 srv_buf_pool_instances = std::min<ulong>(
3735 MAX_BUFFER_POOLS,
3736 ulong(srv_buf_pool_size
3737 / srv_buf_pool_chunk_unit));
3738 #else /* defined(_WIN32) && !defined(_WIN64) */
3739 /* Default to 8 instances when size > 1GB. */
3740 srv_buf_pool_instances = 8;
3741 #endif /* defined(_WIN32) && !defined(_WIN64) */
3742 }
3743 } else {
3744 /* If buffer pool is less than 1 GiB, assume fewer
3745 threads. Also use only one buffer pool instance. */
3746 if (srv_buf_pool_instances != srv_buf_pool_instances_default
3747 && srv_buf_pool_instances != 1) {
3748 /* We can't distinguish whether the user has explicitly
3749 started mysqld with --innodb-buffer-pool-instances=0,
3750 (srv_buf_pool_instances_default is 0) or has not
3751 specified that option at all. Thus we have the
3752 limitation that if the user started with =0, we
3753 will not emit a warning here, but we should actually
3754 do so. */
3755 ib::info()
3756 << "Adjusting innodb_buffer_pool_instances"
3757 " from " << srv_buf_pool_instances << " to 1"
3758 " since innodb_buffer_pool_size is less than "
3759 << BUF_POOL_SIZE_THRESHOLD / (1024 * 1024)
3760 << " MiB";
3761 }
3762
3763 srv_buf_pool_instances = 1;
3764 }
3765
3766 if (srv_buf_pool_chunk_unit * srv_buf_pool_instances
3767 > srv_buf_pool_size) {
3768 /* Size unit of buffer pool is larger than srv_buf_pool_size.
3769 adjust srv_buf_pool_chunk_unit for srv_buf_pool_size. */
3770 srv_buf_pool_chunk_unit
3771 = static_cast<ulong>(srv_buf_pool_size)
3772 / srv_buf_pool_instances;
3773 if (srv_buf_pool_size % srv_buf_pool_instances != 0) {
3774 ++srv_buf_pool_chunk_unit;
3775 }
3776 }
3777
3778 srv_buf_pool_size = buf_pool_size_align(srv_buf_pool_size);
3779 innobase_buffer_pool_size = srv_buf_pool_size;
3780 }
3781
3782 /** Initialize, validate and normalize the InnoDB startup parameters.
3783 @return failure code
3784 @retval 0 on success
3785 @retval HA_ERR_OUT_OF_MEM when out of memory
3786 @retval HA_ERR_INITIALIZATION when some parameters are out of range */
innodb_init_params()3787 static int innodb_init_params()
3788 {
3789 DBUG_ENTER("innodb_init_params");
3790
3791 static char current_dir[3];
3792 char *default_path;
3793 ulong num_pll_degree;
3794
3795 if (innodb_large_prefix || innodb_file_format) {
3796 const char* p = innodb_file_format
3797 ? "file_format"
3798 : "large_prefix";
3799 sql_print_warning("The parameter innodb_%s is deprecated"
3800 " and has no effect."
3801 " It may be removed in future releases."
3802 " See https://mariadb.com/kb/en/library/"
3803 "xtradbinnodb-file-format/", p);
3804 }
3805
3806 /* Check that values don't overflow on 32-bit systems. */
3807 if (sizeof(ulint) == 4) {
3808 if (innobase_buffer_pool_size > UINT_MAX32) {
3809 sql_print_error(
3810 "innodb_buffer_pool_size can't be over 4GB"
3811 " on 32-bit systems");
3812 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
3813 }
3814 }
3815
3816 /* The buffer pool needs to be able to accommodate enough many
3817 pages, even for larger pages */
3818 MYSQL_SYSVAR_NAME(buffer_pool_size).min_val= min_buffer_pool_size();
3819
3820 if (innobase_buffer_pool_size < MYSQL_SYSVAR_NAME(buffer_pool_size).min_val) {
3821 ib::error() << "innodb_page_size="
3822 << srv_page_size << " requires "
3823 << "innodb_buffer_pool_size >= "
3824 << (MYSQL_SYSVAR_NAME(buffer_pool_size).min_val >> 20)
3825 << "MiB current " << (innobase_buffer_pool_size >> 20)
3826 << "MiB";
3827 DBUG_RETURN(HA_ERR_INITIALIZATION);
3828 }
3829
3830 if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS) {
3831 ib::warn() << "The parameter innodb_lock_schedule_algorithm"
3832 " is deprecated, and the setting"
3833 " innodb_lock_schedule_algorithm=vats"
3834 " may cause corruption. The parameter may be removed"
3835 " in future releases.";
3836
3837 #ifdef WITH_WSREP
3838 /* Currently, Galera does not support VATS lock schedule algorithm. */
3839 if (global_system_variables.wsrep_on) {
3840 ib::info() << "For Galera, using innodb_lock_schedule_algorithm=fcfs";
3841 innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;
3842 }
3843 #endif /* WITH_WSREP */
3844 }
3845
3846 #ifdef WITH_WSREP
3847 /* Print deprecation info if xtrabackup is used for SST method */
3848 if (global_system_variables.wsrep_on
3849 && wsrep_sst_method
3850 && (!strcmp(wsrep_sst_method, "xtrabackup")
3851 || !strcmp(wsrep_sst_method, "xtrabackup-v2"))) {
3852 ib::info() << "Galera SST method xtrabackup is deprecated and the "
3853 " support for it may be removed in future releases.";
3854 }
3855 #endif /* WITH_WSREP */
3856
3857 #ifndef HAVE_LZ4
3858 if (innodb_compression_algorithm == PAGE_LZ4_ALGORITHM) {
3859 sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3860 "InnoDB: liblz4 is not installed. \n",
3861 innodb_compression_algorithm);
3862 DBUG_RETURN(HA_ERR_INITIALIZATION);
3863 }
3864 #endif
3865
3866 #ifndef HAVE_LZO
3867 if (innodb_compression_algorithm == PAGE_LZO_ALGORITHM) {
3868 sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3869 "InnoDB: liblzo is not installed. \n",
3870 innodb_compression_algorithm);
3871 DBUG_RETURN(HA_ERR_INITIALIZATION);
3872 }
3873 #endif
3874
3875 #ifndef HAVE_LZMA
3876 if (innodb_compression_algorithm == PAGE_LZMA_ALGORITHM) {
3877 sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3878 "InnoDB: liblzma is not installed. \n",
3879 innodb_compression_algorithm);
3880 DBUG_RETURN(HA_ERR_INITIALIZATION);
3881 }
3882 #endif
3883
3884 #ifndef HAVE_BZIP2
3885 if (innodb_compression_algorithm == PAGE_BZIP2_ALGORITHM) {
3886 sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3887 "InnoDB: libbz2 is not installed. \n",
3888 innodb_compression_algorithm);
3889 DBUG_RETURN(HA_ERR_INITIALIZATION);
3890 }
3891 #endif
3892
3893 #ifndef HAVE_SNAPPY
3894 if (innodb_compression_algorithm == PAGE_SNAPPY_ALGORITHM) {
3895 sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3896 "InnoDB: libsnappy is not installed. \n",
3897 innodb_compression_algorithm);
3898 DBUG_RETURN(HA_ERR_INITIALIZATION);
3899 }
3900 #endif
3901
3902 if ((srv_encrypt_tables || srv_encrypt_log
3903 || innodb_encrypt_temporary_tables)
3904 && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) {
3905 sql_print_error("InnoDB: cannot enable encryption, "
3906 "encryption plugin is not available");
3907 DBUG_RETURN(HA_ERR_INITIALIZATION);
3908 }
3909
3910 #ifdef _WIN32
3911 if (!is_filename_allowed(srv_buf_dump_filename,
3912 strlen(srv_buf_dump_filename), FALSE)) {
3913 sql_print_error("InnoDB: innodb_buffer_pool_filename"
3914 " cannot have colon (:) in the file name.");
3915 DBUG_RETURN(HA_ERR_INITIALIZATION);
3916 }
3917 #endif
3918
3919 /* First calculate the default path for innodb_data_home_dir etc.,
3920 in case the user has not given any value.
3921
3922 Note that when using the embedded server, the datadirectory is not
3923 necessarily the current directory of this program. */
3924
3925 if (mysqld_embedded) {
3926 default_path = mysql_real_data_home;
3927 } else {
3928 /* It's better to use current lib, to keep paths short */
3929 current_dir[0] = FN_CURLIB;
3930 current_dir[1] = FN_LIBCHAR;
3931 current_dir[2] = 0;
3932 default_path = current_dir;
3933 }
3934
3935 ut_a(default_path);
3936
3937 fil_path_to_mysql_datadir = default_path;
3938
3939 /* Set InnoDB initialization parameters according to the values
3940 read from MySQL .cnf file */
3941
3942 /* The default dir for data files is the datadir of MySQL */
3943
3944 srv_data_home = innobase_data_home_dir
3945 ? innobase_data_home_dir : default_path;
3946 #ifdef WITH_WSREP
3947 /* If we use the wsrep API, then we need to tell the server
3948 the path to the data files (for passing it to the SST scripts): */
3949 wsrep_set_data_home_dir(srv_data_home);
3950 #endif /* WITH_WSREP */
3951
3952
3953 /*--------------- Shared tablespaces -------------------------*/
3954
3955 /* Check that the value of system variable innodb_page_size was
3956 set correctly. Its value was put into srv_page_size. If valid,
3957 return the associated srv_page_size_shift. */
3958 srv_page_size_shift = innodb_page_size_validate(srv_page_size);
3959 if (!srv_page_size_shift) {
3960 sql_print_error("InnoDB: Invalid page size=%lu.\n",
3961 srv_page_size);
3962 DBUG_RETURN(HA_ERR_INITIALIZATION);
3963 }
3964
3965 /* This is the first time univ_page_size is used.
3966 It was initialized to 16k pages before srv_page_size was set */
3967 univ_page_size.copy_from(
3968 page_size_t(srv_page_size, srv_page_size, false));
3969
3970 srv_sys_space.set_space_id(TRX_SYS_SPACE);
3971 srv_sys_space.set_flags(FSP_FLAGS_PAGE_SSIZE());
3972 srv_sys_space.set_name("innodb_system");
3973 srv_sys_space.set_path(srv_data_home);
3974
3975 /* Supports raw devices */
3976 if (!srv_sys_space.parse_params(innobase_data_file_path, true)) {
3977 ib::error() << "Unable to parse innodb_data_file_path="
3978 << innobase_data_file_path;
3979 DBUG_RETURN(HA_ERR_INITIALIZATION);
3980 }
3981
3982 srv_tmp_space.set_name("innodb_temporary");
3983 srv_tmp_space.set_path(srv_data_home);
3984 srv_tmp_space.set_flags(FSP_FLAGS_PAGE_SSIZE());
3985
3986 if (!srv_tmp_space.parse_params(innobase_temp_data_file_path, false)) {
3987 ib::error() << "Unable to parse innodb_temp_data_file_path="
3988 << innobase_temp_data_file_path;
3989 DBUG_RETURN(HA_ERR_INITIALIZATION);
3990 }
3991
3992 /* Perform all sanity check before we take action of deleting files*/
3993 if (srv_sys_space.intersection(&srv_tmp_space)) {
3994 sql_print_error("%s and %s file names seem to be the same.",
3995 srv_tmp_space.name(), srv_sys_space.name());
3996 DBUG_RETURN(HA_ERR_INITIALIZATION);
3997 }
3998
3999 srv_sys_space.normalize_size();
4000 srv_tmp_space.normalize_size();
4001
4002 /* ------------ UNDO tablespaces files ---------------------*/
4003 if (!srv_undo_dir) {
4004 srv_undo_dir = default_path;
4005 }
4006
4007 os_normalize_path(srv_undo_dir);
4008
4009 if (strchr(srv_undo_dir, ';')) {
4010 sql_print_error("syntax error in innodb_undo_directory");
4011 DBUG_RETURN(HA_ERR_INITIALIZATION);
4012 }
4013
4014 /* -------------- All log files ---------------------------*/
4015
4016 /* The default dir for log files is the datadir of MySQL */
4017
4018 if (!srv_log_group_home_dir) {
4019 srv_log_group_home_dir = default_path;
4020 }
4021
4022 os_normalize_path(srv_log_group_home_dir);
4023
4024 if (strchr(srv_log_group_home_dir, ';')) {
4025 sql_print_error("syntax error in innodb_log_group_home_dir");
4026 DBUG_RETURN(HA_ERR_INITIALIZATION);
4027 }
4028
4029 if (srv_n_log_files * srv_log_file_size >= log_group_max_size) {
4030 /* Log group size is limited by the size of page number.
4031 Remove this limitation when fil_io() is not used for
4032 recovery log io. */
4033 ib::error() << "Combined size of log files must be < "
4034 << log_group_max_size;
4035 DBUG_RETURN(HA_ERR_INITIALIZATION);
4036 }
4037
4038 DBUG_ASSERT(innodb_change_buffering <= IBUF_USE_ALL);
4039
4040 /* Check that interdependent parameters have sane values. */
4041 if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) {
4042 sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm"
4043 " cannot be set higher than"
4044 " innodb_max_dirty_pages_pct.\n"
4045 "InnoDB: Setting"
4046 " innodb_max_dirty_pages_pct_lwm to %lf\n",
4047 srv_max_buf_pool_modified_pct);
4048
4049 srv_max_dirty_pages_pct_lwm = srv_max_buf_pool_modified_pct;
4050 }
4051
4052 if (srv_max_io_capacity == SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT) {
4053
4054 if (srv_io_capacity >= SRV_MAX_IO_CAPACITY_LIMIT / 2) {
4055 /* Avoid overflow. */
4056 srv_max_io_capacity = SRV_MAX_IO_CAPACITY_LIMIT;
4057 } else {
4058 /* The user has not set the value. We should
4059 set it based on innodb_io_capacity. */
4060 srv_max_io_capacity =
4061 ut_max(2 * srv_io_capacity, 2000UL);
4062 }
4063
4064 } else if (srv_max_io_capacity < srv_io_capacity) {
4065 sql_print_warning("InnoDB: innodb_io_capacity"
4066 " cannot be set higher than"
4067 " innodb_io_capacity_max."
4068 "Setting innodb_io_capacity=%lu",
4069 srv_max_io_capacity);
4070
4071 srv_io_capacity = srv_max_io_capacity;
4072 }
4073
4074 if (UNIV_PAGE_SIZE_DEF != srv_page_size) {
4075 ib::info() << "innodb_page_size=" << srv_page_size;
4076
4077 srv_max_undo_log_size = std::max(
4078 srv_max_undo_log_size,
4079 ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
4080 << srv_page_size_shift);
4081 }
4082
4083 if (srv_log_write_ahead_size > srv_page_size) {
4084 srv_log_write_ahead_size = srv_page_size;
4085 } else {
4086 ulong srv_log_write_ahead_size_tmp = OS_FILE_LOG_BLOCK_SIZE;
4087
4088 while (srv_log_write_ahead_size_tmp
4089 < srv_log_write_ahead_size) {
4090 srv_log_write_ahead_size_tmp
4091 = srv_log_write_ahead_size_tmp * 2;
4092 }
4093 if (srv_log_write_ahead_size_tmp
4094 != srv_log_write_ahead_size) {
4095 srv_log_write_ahead_size
4096 = srv_log_write_ahead_size_tmp / 2;
4097 }
4098 }
4099
4100 srv_buf_pool_size = ulint(innobase_buffer_pool_size);
4101
4102 if (!innobase_use_checksums) {
4103 ib::warn() << "Setting innodb_checksums to OFF is DEPRECATED."
4104 " This option was removed in MariaDB 10.5.";
4105 srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_NONE;
4106 } else {
4107 switch (srv_checksum_algorithm) {
4108 case SRV_CHECKSUM_ALGORITHM_CRC32:
4109 case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
4110 break;
4111 default:
4112 ib::warn() << deprecated_innodb_checksum_algorithm;
4113 }
4114 }
4115
4116 innodb_log_checksums = innodb_log_checksums_func_update(
4117 NULL, innodb_log_checksums);
4118
4119 row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout;
4120
4121 srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
4122 if (innobase_locks_unsafe_for_binlog) {
4123 ib::warn() << "Using innodb_locks_unsafe_for_binlog is"
4124 " DEPRECATED. This option may be removed in future"
4125 " releases. Please use READ COMMITTED transaction"
4126 " isolation level instead; " << SET_TRANSACTION_MSG;
4127 }
4128
4129 if (innobase_open_files < 10) {
4130 innobase_open_files = 300;
4131 if (srv_file_per_table && tc_size > 300 && tc_size < open_files_limit) {
4132 innobase_open_files = tc_size;
4133 }
4134 }
4135
4136 if (innobase_open_files > open_files_limit) {
4137 ib::warn() << "innodb_open_files " << innobase_open_files
4138 << " should not be greater"
4139 << " than the open_files_limit " << open_files_limit;
4140 if (innobase_open_files > tc_size) {
4141 innobase_open_files = tc_size;
4142 }
4143 }
4144
4145 srv_max_n_open_files = innobase_open_files;
4146 srv_innodb_status = (ibool) innobase_create_status_file;
4147
4148 srv_print_verbose_log = mysqld_embedded ? 0 : 1;
4149
4150 /* Round up fts_sort_pll_degree to nearest power of 2 number */
4151 for (num_pll_degree = 1;
4152 num_pll_degree < fts_sort_pll_degree;
4153 num_pll_degree <<= 1) {
4154
4155 /* No op */
4156 }
4157
4158 fts_sort_pll_degree = num_pll_degree;
4159
4160 /* Store the default charset-collation number of this MySQL
4161 installation */
4162
4163 data_mysql_default_charset_coll = (ulint) default_charset_info->number;
4164
4165 innobase_commit_concurrency_init_default();
4166
4167 if (innodb_idle_flush_pct != 100) {
4168 ib::warn() << deprecated_idle_flush_pct;
4169 }
4170
4171 #ifndef _WIN32
4172 if (srv_use_atomic_writes && my_may_have_atomic_write) {
4173 /*
4174 Force O_DIRECT on Unixes (on Windows writes are always
4175 unbuffered)
4176 */
4177 switch (innodb_flush_method) {
4178 case SRV_O_DIRECT:
4179 case SRV_O_DIRECT_NO_FSYNC:
4180 break;
4181 default:
4182 innodb_flush_method = SRV_O_DIRECT;
4183 fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n");
4184 }
4185 }
4186 #endif
4187
4188 if (srv_read_only_mode) {
4189 ib::info() << "Started in read only mode";
4190 srv_use_doublewrite_buf = FALSE;
4191 }
4192
4193 #ifdef LINUX_NATIVE_AIO
4194 if (srv_use_native_aio) {
4195 ib::info() << "Using Linux native AIO";
4196 }
4197 #elif !defined _WIN32
4198 /* Currently native AIO is supported only on windows and linux
4199 and that also when the support is compiled in. In all other
4200 cases, we ignore the setting of innodb_use_native_aio. */
4201 srv_use_native_aio = FALSE;
4202 #endif
4203
4204 #ifndef _WIN32
4205 ut_ad(innodb_flush_method <= SRV_O_DIRECT_NO_FSYNC);
4206 #else
4207 switch (innodb_flush_method) {
4208 case SRV_ALL_O_DIRECT_FSYNC + 1 /* "async_unbuffered"="unbuffered" */:
4209 innodb_flush_method = SRV_ALL_O_DIRECT_FSYNC;
4210 break;
4211 case SRV_ALL_O_DIRECT_FSYNC + 2 /* "normal"="fsync" */:
4212 innodb_flush_method = SRV_FSYNC;
4213 break;
4214 default:
4215 ut_ad(innodb_flush_method <= SRV_ALL_O_DIRECT_FSYNC);
4216 }
4217 #endif
4218 srv_file_flush_method = srv_flush_t(innodb_flush_method);
4219
4220 innodb_buffer_pool_size_init();
4221
4222 if (srv_n_page_cleaners > srv_buf_pool_instances) {
4223 /* limit of page_cleaner parallelizability
4224 is number of buffer pool instances. */
4225 srv_n_page_cleaners = srv_buf_pool_instances;
4226 }
4227
4228 srv_lock_table_size = 5 * (srv_buf_pool_size >> srv_page_size_shift);
4229 DBUG_RETURN(0);
4230 }
4231
4232 /** Initialize the InnoDB storage engine plugin.
4233 @param[in,out] p InnoDB handlerton
4234 @return error code
4235 @retval 0 on success */
innodb_init(void * p)4236 static int innodb_init(void* p)
4237 {
4238 DBUG_ENTER("innodb_init");
4239 handlerton* innobase_hton= static_cast<handlerton*>(p);
4240 innodb_hton_ptr = innobase_hton;
4241
4242 innobase_hton->state = SHOW_OPTION_YES;
4243 innobase_hton->db_type = DB_TYPE_INNODB;
4244 innobase_hton->savepoint_offset = sizeof(trx_named_savept_t);
4245 innobase_hton->close_connection = innobase_close_connection;
4246 innobase_hton->kill_query = innobase_kill_query;
4247 innobase_hton->savepoint_set = innobase_savepoint;
4248 innobase_hton->savepoint_rollback = innobase_rollback_to_savepoint;
4249
4250 innobase_hton->savepoint_rollback_can_release_mdl =
4251 innobase_rollback_to_savepoint_can_release_mdl;
4252
4253 innobase_hton->savepoint_release = innobase_release_savepoint;
4254 innobase_hton->prepare_ordered= NULL;
4255 innobase_hton->commit_ordered= innobase_commit_ordered;
4256 innobase_hton->commit = innobase_commit;
4257 innobase_hton->rollback = innobase_rollback;
4258 innobase_hton->prepare = innobase_xa_prepare;
4259 innobase_hton->recover = innobase_xa_recover;
4260 innobase_hton->commit_by_xid = innobase_commit_by_xid;
4261 innobase_hton->rollback_by_xid = innobase_rollback_by_xid;
4262 innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
4263 innobase_hton->create = innobase_create_handler;
4264
4265 innobase_hton->drop_database = innobase_drop_database;
4266 innobase_hton->panic = innobase_end;
4267
4268 innobase_hton->start_consistent_snapshot =
4269 innobase_start_trx_and_assign_read_view;
4270
4271 innobase_hton->flush_logs = innobase_flush_logs;
4272 innobase_hton->show_status = innobase_show_status;
4273 innobase_hton->flags =
4274 HTON_SUPPORTS_EXTENDED_KEYS | HTON_SUPPORTS_FOREIGN_KEYS |
4275 HTON_NATIVE_SYS_VERSIONING |
4276 HTON_REQUIRES_CLOSE_AFTER_TRUNCATE;
4277
4278 #ifdef WITH_WSREP
4279 innobase_hton->abort_transaction=wsrep_abort_transaction;
4280 innobase_hton->set_checkpoint=innobase_wsrep_set_checkpoint;
4281 innobase_hton->get_checkpoint=innobase_wsrep_get_checkpoint;
4282 innobase_hton->fake_trx_id=wsrep_fake_trx_id;
4283 #endif /* WITH_WSREP */
4284
4285 innobase_hton->tablefile_extensions = ha_innobase_exts;
4286 innobase_hton->table_options = innodb_table_option_list;
4287
4288 /* System Versioning */
4289 innobase_hton->prepare_commit_versioned
4290 = innodb_prepare_commit_versioned;
4291
4292 innodb_remember_check_sysvar_funcs();
4293
4294 compile_time_assert(DATA_MYSQL_TRUE_VARCHAR == MYSQL_TYPE_VARCHAR);
4295
4296 #ifndef DBUG_OFF
4297 static const char test_filename[] = "-@";
4298 char test_tablename[sizeof test_filename
4299 + sizeof(srv_mysql50_table_name_prefix) - 1];
4300 DBUG_ASSERT(sizeof test_tablename - 1
4301 == filename_to_tablename(test_filename,
4302 test_tablename,
4303 sizeof test_tablename, true));
4304 DBUG_ASSERT(!strncmp(test_tablename,
4305 srv_mysql50_table_name_prefix,
4306 sizeof srv_mysql50_table_name_prefix - 1));
4307 DBUG_ASSERT(!strcmp(test_tablename
4308 + sizeof srv_mysql50_table_name_prefix - 1,
4309 test_filename));
4310 #endif /* DBUG_OFF */
4311
4312 os_file_set_umask(my_umask);
4313
4314 /* Setup the memory alloc/free tracing mechanisms before calling
4315 any functions that could possibly allocate memory. */
4316 ut_new_boot();
4317
4318 if (int error = innodb_init_params()) {
4319 DBUG_RETURN(error);
4320 }
4321
4322 /* After this point, error handling has to use
4323 innodb_init_abort(). */
4324
4325 #ifdef HAVE_PSI_INTERFACE
4326 /* Register keys with MySQL performance schema */
4327 int count;
4328
4329 count = array_elements(all_pthread_mutexes);
4330 mysql_mutex_register("innodb", all_pthread_mutexes, count);
4331
4332 # ifdef UNIV_PFS_MUTEX
4333 count = array_elements(all_innodb_mutexes);
4334 mysql_mutex_register("innodb", all_innodb_mutexes, count);
4335 # endif /* UNIV_PFS_MUTEX */
4336
4337 # ifdef UNIV_PFS_RWLOCK
4338 count = array_elements(all_innodb_rwlocks);
4339 mysql_rwlock_register("innodb", all_innodb_rwlocks, count);
4340 # endif /* UNIV_PFS_MUTEX */
4341
4342 # ifdef UNIV_PFS_THREAD
4343 count = array_elements(all_innodb_threads);
4344 mysql_thread_register("innodb", all_innodb_threads, count);
4345 # endif /* UNIV_PFS_THREAD */
4346
4347 # ifdef UNIV_PFS_IO
4348 count = array_elements(all_innodb_files);
4349 mysql_file_register("innodb", all_innodb_files, count);
4350 # endif /* UNIV_PFS_IO */
4351
4352 count = array_elements(all_innodb_conds);
4353 mysql_cond_register("innodb", all_innodb_conds, count);
4354 #endif /* HAVE_PSI_INTERFACE */
4355
4356 bool create_new_db = false;
4357
4358 /* Check whether the data files exist. */
4359 dberr_t err = srv_sys_space.check_file_spec(&create_new_db, 5U << 20);
4360
4361 if (err != DB_SUCCESS) {
4362 DBUG_RETURN(innodb_init_abort());
4363 }
4364
4365 err = srv_start(create_new_db);
4366
4367 if (err != DB_SUCCESS) {
4368 innodb_shutdown();
4369 DBUG_RETURN(innodb_init_abort());
4370 } else if (!srv_read_only_mode) {
4371 mysql_thread_create(thd_destructor_thread_key,
4372 &thd_destructor_thread,
4373 NULL, thd_destructor_proxy, NULL);
4374 while (!my_atomic_loadptr_explicit(reinterpret_cast<void**>
4375 (&srv_running),
4376 MY_MEMORY_ORDER_RELAXED))
4377 os_thread_sleep(20);
4378 }
4379
4380 srv_was_started = true;
4381 innodb_params_adjust();
4382
4383 innobase_old_blocks_pct = static_cast<uint>(
4384 buf_LRU_old_ratio_update(innobase_old_blocks_pct, TRUE));
4385
4386 ibuf_max_size_update(srv_change_buffer_max_size);
4387
4388 mysql_mutex_init(commit_cond_mutex_key,
4389 &commit_cond_m, MY_MUTEX_INIT_FAST);
4390 mysql_cond_init(commit_cond_key, &commit_cond, 0);
4391 mysql_mutex_init(pending_checkpoint_mutex_key,
4392 &pending_checkpoint_mutex,
4393 MY_MUTEX_INIT_FAST);
4394 #ifdef MYSQL_DYNAMIC_PLUGIN
4395 if (innobase_hton != p) {
4396 innobase_hton = reinterpret_cast<handlerton*>(p);
4397 *innobase_hton = *innodb_hton_ptr;
4398 }
4399 #endif /* MYSQL_DYNAMIC_PLUGIN */
4400
4401 /* Currently, monitor counter information are not persistent. */
4402 memset(monitor_set_tbl, 0, sizeof monitor_set_tbl);
4403
4404 memset(innodb_counter_value, 0, sizeof innodb_counter_value);
4405
4406 /* Do this as late as possible so server is fully starts up,
4407 since we might get some initial stats if user choose to turn
4408 on some counters from start up */
4409 if (innobase_enable_monitor_counter) {
4410 innodb_enable_monitor_at_startup(
4411 innobase_enable_monitor_counter);
4412 }
4413
4414 /* Turn on monitor counters that are default on */
4415 srv_mon_default_on();
4416
4417 /* Unit Tests */
4418 #ifdef UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
4419 unit_test_os_file_get_parent_dir();
4420 #endif /* UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR */
4421
4422 #ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
4423 test_make_filepath();
4424 #endif /*UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
4425
4426 #ifdef UNIV_ENABLE_DICT_STATS_TEST
4427 test_dict_stats_all();
4428 #endif /*UNIV_ENABLE_DICT_STATS_TEST */
4429
4430 #ifdef UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT
4431 # ifdef HAVE_UT_CHRONO_T
4432 test_row_raw_format_int();
4433 # endif /* HAVE_UT_CHRONO_T */
4434 #endif /* UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT */
4435
4436 DBUG_RETURN(0);
4437 }
4438
4439 /** Shut down the InnoDB storage engine.
4440 @return 0 */
4441 static
4442 int
innobase_end(handlerton *,ha_panic_function)4443 innobase_end(handlerton*, ha_panic_function)
4444 {
4445 DBUG_ENTER("innobase_end");
4446
4447 if (srv_was_started) {
4448 THD *thd= current_thd;
4449 if (thd) { // may be UNINSTALL PLUGIN statement
4450 if (trx_t* trx = thd_to_trx(thd)) {
4451 trx->free();
4452 }
4453 }
4454
4455 st_my_thread_var* r = reinterpret_cast<st_my_thread_var*>(
4456 my_atomic_loadptr_explicit(
4457 reinterpret_cast<void**>(&srv_running),
4458 MY_MEMORY_ORDER_RELAXED));
4459 if (r) {
4460 ut_ad(!srv_read_only_mode);
4461 if (!abort_loop) {
4462 // may be UNINSTALL PLUGIN statement
4463 mysql_mutex_lock(r->current_mutex);
4464 r->abort = 1;
4465 mysql_cond_broadcast(r->current_cond);
4466 mysql_mutex_unlock(r->current_mutex);
4467 }
4468 pthread_join(thd_destructor_thread, NULL);
4469 }
4470
4471 innodb_shutdown();
4472
4473 mysql_mutex_destroy(&commit_cond_m);
4474 mysql_cond_destroy(&commit_cond);
4475 mysql_mutex_destroy(&pending_checkpoint_mutex);
4476 }
4477
4478 DBUG_RETURN(0);
4479 }
4480
4481 /*****************************************************************//**
4482 Commits a transaction in an InnoDB database. */
4483 void
innobase_commit_low(trx_t * trx)4484 innobase_commit_low(
4485 /*================*/
4486 trx_t* trx) /*!< in: transaction handle */
4487 {
4488 #ifdef WITH_WSREP
4489 const char* tmp = 0;
4490 const bool is_wsrep = trx->is_wsrep();
4491 THD* thd = trx->mysql_thd;
4492 if (is_wsrep) {
4493 tmp = thd_proc_info(thd, "innobase_commit_low()");
4494 }
4495 #endif /* WITH_WSREP */
4496 if (trx_is_started(trx)) {
4497 trx_commit_for_mysql(trx);
4498 } else {
4499 trx->will_lock = false;
4500 #ifdef WITH_WSREP
4501 trx->wsrep = false;
4502 #endif /* WITH_WSREP */
4503 }
4504
4505 #ifdef WITH_WSREP
4506 if (is_wsrep) {
4507 thd_proc_info(thd, tmp);
4508 }
4509 #endif /* WITH_WSREP */
4510 }
4511
4512 /*****************************************************************//**
4513 Creates an InnoDB transaction struct for the thd if it does not yet have one.
4514 Starts a new InnoDB transaction if a transaction is not yet started. And
4515 assigns a new snapshot for a consistent read if the transaction does not yet
4516 have one.
4517 @return 0 */
4518 static
4519 int
innobase_start_trx_and_assign_read_view(handlerton * hton,THD * thd)4520 innobase_start_trx_and_assign_read_view(
4521 /*====================================*/
4522 handlerton* hton, /*!< in: InnoDB handlerton */
4523 THD* thd) /*!< in: MySQL thread handle of the user for
4524 whom the transaction should be committed */
4525 {
4526 DBUG_ENTER("innobase_start_trx_and_assign_read_view");
4527 DBUG_ASSERT(hton == innodb_hton_ptr);
4528
4529 /* Create a new trx struct for thd, if it does not yet have one */
4530
4531 trx_t* trx = check_trx_exists(thd);
4532
4533 innobase_srv_conc_force_exit_innodb(trx);
4534
4535 /* The transaction should not be active yet, start it */
4536
4537 ut_ad(!trx_is_started(trx));
4538
4539 trx_start_if_not_started_xa(trx, false);
4540
4541 /* Assign a read view if the transaction does not have it yet.
4542 Do this only if transaction is using REPEATABLE READ isolation
4543 level. */
4544 trx->isolation_level = innobase_map_isolation_level(
4545 thd_get_trx_isolation(thd));
4546
4547 if (trx->isolation_level == TRX_ISO_REPEATABLE_READ) {
4548 trx->read_view.open(trx);
4549 } else {
4550 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4551 HA_ERR_UNSUPPORTED,
4552 "InnoDB: WITH CONSISTENT SNAPSHOT"
4553 " was ignored because this phrase"
4554 " can only be used with"
4555 " REPEATABLE READ isolation level.");
4556 }
4557
4558 /* Set the MySQL flag to mark that there is an active transaction */
4559
4560 innobase_register_trx(hton, current_thd, trx);
4561
4562 DBUG_RETURN(0);
4563 }
4564
4565 static
4566 void
innobase_commit_ordered_2(trx_t * trx,THD * thd)4567 innobase_commit_ordered_2(
4568 /*======================*/
4569 trx_t* trx, /*!< in: Innodb transaction */
4570 THD* thd) /*!< in: MySQL thread handle */
4571 {
4572 DBUG_ENTER("innobase_commit_ordered_2");
4573
4574 bool read_only = trx->read_only || trx->id == 0;
4575
4576 if (!read_only) {
4577
4578 while (innobase_commit_concurrency > 0) {
4579
4580 mysql_mutex_lock(&commit_cond_m);
4581
4582 ++commit_threads;
4583
4584 if (commit_threads
4585 <= innobase_commit_concurrency) {
4586
4587 mysql_mutex_unlock(&commit_cond_m);
4588 break;
4589 }
4590
4591 --commit_threads;
4592
4593 mysql_cond_wait(&commit_cond, &commit_cond_m);
4594
4595 mysql_mutex_unlock(&commit_cond_m);
4596 }
4597
4598 /* The following call reads the binary log position of
4599 the transaction being committed.
4600
4601 Binary logging of other engines is not relevant to
4602 InnoDB as all InnoDB requires is that committing
4603 InnoDB transactions appear in the same order in the
4604 MySQL binary log as they appear in InnoDB logs, which
4605 is guaranteed by the server.
4606
4607 If the binary log is not enabled, or the transaction
4608 is not written to the binary log, the file name will
4609 be a NULL pointer. */
4610 thd_binlog_pos(thd, &trx->mysql_log_file_name,
4611 &trx->mysql_log_offset);
4612
4613 /* Don't do write + flush right now. For group commit
4614 to work we want to do the flush later. */
4615 trx->flush_log_later = true;
4616 }
4617
4618 innobase_commit_low(trx);
4619
4620 if (!read_only) {
4621 trx->flush_log_later = false;
4622
4623 if (innobase_commit_concurrency > 0) {
4624
4625 mysql_mutex_lock(&commit_cond_m);
4626
4627 ut_ad(commit_threads > 0);
4628 --commit_threads;
4629
4630 mysql_cond_signal(&commit_cond);
4631
4632 mysql_mutex_unlock(&commit_cond_m);
4633 }
4634 }
4635
4636 DBUG_VOID_RETURN;
4637 }
4638
4639 /*****************************************************************//**
4640 Perform the first, fast part of InnoDB commit.
4641
4642 Doing it in this call ensures that we get the same commit order here
4643 as in binlog and any other participating transactional storage engines.
4644
4645 Note that we want to do as little as really needed here, as we run
4646 under a global mutex. The expensive fsync() is done later, in
4647 innobase_commit(), without a lock so group commit can take place.
4648
4649 Note also that this method can be called from a different thread than
4650 the one handling the rest of the transaction. */
4651 static
4652 void
innobase_commit_ordered(handlerton * hton,THD * thd,bool all)4653 innobase_commit_ordered(
4654 /*====================*/
4655 handlerton *hton, /*!< in: Innodb handlerton */
4656 THD* thd, /*!< in: MySQL thread handle of the user for whom
4657 the transaction should be committed */
4658 bool all) /*!< in: TRUE - commit transaction
4659 FALSE - the current SQL statement ended */
4660 {
4661 trx_t* trx;
4662 DBUG_ENTER("innobase_commit_ordered");
4663 DBUG_ASSERT(hton == innodb_hton_ptr);
4664
4665 trx = check_trx_exists(thd);
4666
4667 if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
4668 /* We cannot throw error here; instead we will catch this error
4669 again in innobase_commit() and report it from there. */
4670 DBUG_VOID_RETURN;
4671 }
4672
4673 /* commit_ordered is only called when committing the whole transaction
4674 (or an SQL statement when autocommit is on). */
4675 DBUG_ASSERT(all ||
4676 (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
4677
4678 innobase_commit_ordered_2(trx, thd);
4679 trx->active_commit_ordered = true;
4680
4681 DBUG_VOID_RETURN;
4682 }
4683
4684 /*****************************************************************//**
4685 Commits a transaction in an InnoDB database or marks an SQL statement
4686 ended.
4687 @return 0 or deadlock error if the transaction was aborted by another
4688 higher priority transaction. */
4689 static
4690 int
innobase_commit(handlerton * hton,THD * thd,bool commit_trx)4691 innobase_commit(
4692 /*============*/
4693 handlerton* hton, /*!< in: InnoDB handlerton */
4694 THD* thd, /*!< in: MySQL thread handle of the
4695 user for whom the transaction should
4696 be committed */
4697 bool commit_trx) /*!< in: true - commit transaction
4698 false - the current SQL statement
4699 ended */
4700 {
4701 DBUG_ENTER("innobase_commit");
4702 DBUG_PRINT("enter", ("commit_trx: %d", commit_trx));
4703 DBUG_ASSERT(hton == innodb_hton_ptr);
4704 DBUG_PRINT("trans", ("ending transaction"));
4705
4706 trx_t* trx = check_trx_exists(thd);
4707
4708 ut_ad(trx->dict_operation_lock_mode == 0);
4709 ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
4710
4711 /* Transaction is deregistered only in a commit or a rollback. If
4712 it is deregistered we know there cannot be resources to be freed
4713 and we could return immediately. For the time being, we play safe
4714 and do the cleanup though there should be nothing to clean up. */
4715
4716 if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
4717
4718 sql_print_error("Transaction not registered for MariaDB 2PC,"
4719 " but transaction is active");
4720 }
4721
4722 bool read_only = trx->read_only || trx->id == 0;
4723 DBUG_PRINT("info", ("readonly: %d", read_only));
4724
4725 if (commit_trx
4726 || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
4727
4728 DBUG_EXECUTE_IF("crash_innodb_before_commit",
4729 DBUG_SUICIDE(););
4730
4731 /* Run the fast part of commit if we did not already. */
4732 if (!trx->active_commit_ordered) {
4733 innobase_commit_ordered_2(trx, thd);
4734
4735 }
4736
4737 /* We were instructed to commit the whole transaction, or
4738 this is an SQL statement end and autocommit is on */
4739
4740 /* At this point commit order is fixed and transaction is
4741 visible to others. So we can wakeup other commits waiting for
4742 this one, to allow then to group commit with us. */
4743 thd_wakeup_subsequent_commits(thd, 0);
4744
4745 /* Now do a write + flush of logs. */
4746 trx_commit_complete_for_mysql(trx);
4747
4748 trx_deregister_from_2pc(trx);
4749 } else {
4750 /* We just mark the SQL statement ended and do not do a
4751 transaction commit */
4752
4753 /* If we had reserved the auto-inc lock for some
4754 table in this SQL statement we release it now */
4755
4756 if (!read_only) {
4757 lock_unlock_table_autoinc(trx);
4758 }
4759
4760 /* Store the current undo_no of the transaction so that we
4761 know where to roll back if we have to roll back the next
4762 SQL statement */
4763
4764 trx_mark_sql_stat_end(trx);
4765 }
4766
4767 /* Reset the number AUTO-INC rows required */
4768 trx->n_autoinc_rows = 0;
4769
4770 /* This is a statement level variable. */
4771 trx->fts_next_doc_id = 0;
4772
4773 innobase_srv_conc_force_exit_innodb(trx);
4774
4775 DBUG_RETURN(0);
4776 }
4777
4778 /*****************************************************************//**
4779 Rolls back a transaction or the latest SQL statement.
4780 @return 0 or error number */
4781 static
4782 int
innobase_rollback(handlerton * hton,THD * thd,bool rollback_trx)4783 innobase_rollback(
4784 /*==============*/
4785 handlerton* hton, /*!< in: InnoDB handlerton */
4786 THD* thd, /*!< in: handle to the MySQL thread
4787 of the user whose transaction should
4788 be rolled back */
4789 bool rollback_trx) /*!< in: TRUE - rollback entire
4790 transaction FALSE - rollback the current
4791 statement only */
4792 {
4793 DBUG_ENTER("innobase_rollback");
4794 DBUG_ASSERT(hton == innodb_hton_ptr);
4795 DBUG_PRINT("trans", ("aborting transaction"));
4796
4797 trx_t* trx = check_trx_exists(thd);
4798
4799 ut_ad(trx->dict_operation_lock_mode == 0);
4800 ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
4801
4802 innobase_srv_conc_force_exit_innodb(trx);
4803
4804 /* Reset the number AUTO-INC rows required */
4805
4806 trx->n_autoinc_rows = 0;
4807
4808 /* If we had reserved the auto-inc lock for some table (if
4809 we come here to roll back the latest SQL statement) we
4810 release it now before a possibly lengthy rollback */
4811 lock_unlock_table_autoinc(trx);
4812
4813 /* This is a statement level variable. */
4814
4815 trx->fts_next_doc_id = 0;
4816
4817 dberr_t error;
4818
4819 if (rollback_trx
4820 || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
4821
4822 error = trx_rollback_for_mysql(trx);
4823
4824 trx_deregister_from_2pc(trx);
4825 } else {
4826
4827 error = trx_rollback_last_sql_stat_for_mysql(trx);
4828 }
4829
4830 DBUG_RETURN(convert_error_code_to_mysql(error, 0, trx->mysql_thd));
4831 }
4832
4833 /*****************************************************************//**
4834 Rolls back a transaction
4835 @return 0 or error number */
4836 static
4837 int
innobase_rollback_trx(trx_t * trx)4838 innobase_rollback_trx(
4839 /*==================*/
4840 trx_t* trx) /*!< in: transaction */
4841 {
4842 DBUG_ENTER("innobase_rollback_trx");
4843 DBUG_PRINT("trans", ("aborting transaction"));
4844
4845 innobase_srv_conc_force_exit_innodb(trx);
4846
4847 /* If we had reserved the auto-inc lock for some table (if
4848 we come here to roll back the latest SQL statement) we
4849 release it now before a possibly lengthy rollback */
4850 lock_unlock_table_autoinc(trx);
4851
4852 if (!trx->has_logged()) {
4853 trx->will_lock = false;
4854 #ifdef WITH_WSREP
4855 trx->wsrep = false;
4856 #endif
4857 DBUG_RETURN(0);
4858 }
4859
4860 DBUG_RETURN(convert_error_code_to_mysql(trx_rollback_for_mysql(trx),
4861 0, trx->mysql_thd));
4862 }
4863
4864
4865 struct pending_checkpoint {
4866 struct pending_checkpoint *next;
4867 handlerton *hton;
4868 void *cookie;
4869 ib_uint64_t lsn;
4870 };
4871 static struct pending_checkpoint *pending_checkpoint_list;
4872 static struct pending_checkpoint *pending_checkpoint_list_end;
4873
4874 /*****************************************************************//**
4875 Handle a commit checkpoint request from server layer.
4876 We put the request in a queue, so that we can notify upper layer about
4877 checkpoint complete when we have flushed the redo log.
4878 If we have already flushed all relevant redo log, we notify immediately.*/
4879 static
4880 void
innobase_checkpoint_request(handlerton * hton,void * cookie)4881 innobase_checkpoint_request(
4882 handlerton *hton,
4883 void *cookie)
4884 {
4885 ib_uint64_t lsn;
4886 ib_uint64_t flush_lsn;
4887 struct pending_checkpoint * entry;
4888
4889 /* Do the allocation outside of lock to reduce contention. The normal
4890 case is that not everything is flushed, so we will need to enqueue. */
4891 entry = static_cast<struct pending_checkpoint *>
4892 (my_malloc(sizeof(*entry), MYF(MY_WME)));
4893 if (!entry) {
4894 sql_print_error("Failed to allocate %u bytes."
4895 " Commit checkpoint will be skipped.",
4896 static_cast<unsigned>(sizeof(*entry)));
4897 return;
4898 }
4899
4900 entry->next = NULL;
4901 entry->hton = hton;
4902 entry->cookie = cookie;
4903
4904 mysql_mutex_lock(&pending_checkpoint_mutex);
4905 lsn = log_get_lsn();
4906 flush_lsn = log_get_flush_lsn();
4907 if (lsn > flush_lsn) {
4908 /* Put the request in queue.
4909 When the log gets flushed past the lsn, we will remove the
4910 entry from the queue and notify the upper layer. */
4911 entry->lsn = lsn;
4912 if (pending_checkpoint_list_end) {
4913 pending_checkpoint_list_end->next = entry;
4914 /* There is no need to order the entries in the list
4915 by lsn. The upper layer can accept notifications in
4916 any order, and short delays in notifications do not
4917 significantly impact performance. */
4918 } else {
4919 pending_checkpoint_list = entry;
4920 }
4921 pending_checkpoint_list_end = entry;
4922 entry = NULL;
4923 }
4924 mysql_mutex_unlock(&pending_checkpoint_mutex);
4925
4926 if (entry) {
4927 /* We are already flushed. Notify the checkpoint immediately. */
4928 commit_checkpoint_notify_ha(entry->hton, entry->cookie);
4929 my_free(entry);
4930 }
4931 }
4932
4933 /*****************************************************************//**
4934 Log code calls this whenever log has been written and/or flushed up
4935 to a new position. We use this to notify upper layer of a new commit
4936 checkpoint when necessary.*/
4937 UNIV_INTERN
4938 void
innobase_mysql_log_notify(ib_uint64_t flush_lsn)4939 innobase_mysql_log_notify(
4940 /*======================*/
4941 ib_uint64_t flush_lsn) /*!< in: LSN flushed to disk */
4942 {
4943 struct pending_checkpoint * pending;
4944 struct pending_checkpoint * entry;
4945 struct pending_checkpoint * last_ready;
4946
4947 /* It is safe to do a quick check for NULL first without lock.
4948 Even if we should race, we will at most skip one checkpoint and
4949 take the next one, which is harmless. */
4950 if (!pending_checkpoint_list)
4951 return;
4952
4953 mysql_mutex_lock(&pending_checkpoint_mutex);
4954 pending = pending_checkpoint_list;
4955 if (!pending)
4956 {
4957 mysql_mutex_unlock(&pending_checkpoint_mutex);
4958 return;
4959 }
4960
4961 last_ready = NULL;
4962 for (entry = pending; entry != NULL; entry = entry -> next)
4963 {
4964 /* Notify checkpoints up until the first entry that has not
4965 been fully flushed to the redo log. Since we do not maintain
4966 the list ordered, in principle there could be more entries
4967 later than were also flushed. But there is no harm in
4968 delaying notifications for those a bit. And in practise, the
4969 list is unlikely to have more than one element anyway, as we
4970 flush the redo log at least once every second. */
4971 if (entry->lsn > flush_lsn)
4972 break;
4973 last_ready = entry;
4974 }
4975
4976 if (last_ready)
4977 {
4978 /* We found some pending checkpoints that are now flushed to
4979 disk. So remove them from the list. */
4980 pending_checkpoint_list = entry;
4981 if (!entry)
4982 pending_checkpoint_list_end = NULL;
4983 }
4984
4985 mysql_mutex_unlock(&pending_checkpoint_mutex);
4986
4987 if (!last_ready)
4988 return;
4989
4990 /* Now that we have released the lock, notify upper layer about all
4991 commit checkpoints that have now completed. */
4992 for (;;) {
4993 entry = pending;
4994 pending = pending->next;
4995
4996 commit_checkpoint_notify_ha(entry->hton, entry->cookie);
4997
4998 my_free(entry);
4999 if (entry == last_ready)
5000 break;
5001 }
5002 }
5003
5004 /*****************************************************************//**
5005 Rolls back a transaction to a savepoint.
5006 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
5007 given name */
5008 static
5009 int
innobase_rollback_to_savepoint(handlerton * hton,THD * thd,void * savepoint)5010 innobase_rollback_to_savepoint(
5011 /*===========================*/
5012 handlerton* hton, /*!< in: InnoDB handlerton */
5013 THD* thd, /*!< in: handle to the MySQL thread
5014 of the user whose transaction should
5015 be rolled back to savepoint */
5016 void* savepoint) /*!< in: savepoint data */
5017 {
5018
5019 DBUG_ENTER("innobase_rollback_to_savepoint");
5020 DBUG_ASSERT(hton == innodb_hton_ptr);
5021
5022 trx_t* trx = check_trx_exists(thd);
5023
5024 innobase_srv_conc_force_exit_innodb(trx);
5025
5026 /* TODO: use provided savepoint data area to store savepoint data */
5027
5028 char name[64];
5029
5030 longlong2str(longlong(savepoint), name, 36);
5031
5032 int64_t mysql_binlog_cache_pos;
5033
5034 dberr_t error = trx_rollback_to_savepoint_for_mysql(
5035 trx, name, &mysql_binlog_cache_pos);
5036
5037 if (error == DB_SUCCESS && trx->fts_trx != NULL) {
5038 fts_savepoint_rollback(trx, name);
5039 }
5040
5041 DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
5042 }
5043
5044 /*****************************************************************//**
5045 Check whether innodb state allows to safely release MDL locks after
5046 rollback to savepoint.
5047 When binlog is on, MDL locks acquired after savepoint unit are not
5048 released if there are any locks held in InnoDB.
5049 @return true if it is safe, false if its not safe. */
5050 static
5051 bool
innobase_rollback_to_savepoint_can_release_mdl(handlerton * hton,THD * thd)5052 innobase_rollback_to_savepoint_can_release_mdl(
5053 /*===========================================*/
5054 handlerton* hton, /*!< in: InnoDB handlerton */
5055 THD* thd) /*!< in: handle to the MySQL thread
5056 of the user whose transaction should
5057 be rolled back to savepoint */
5058 {
5059 DBUG_ENTER("innobase_rollback_to_savepoint_can_release_mdl");
5060 DBUG_ASSERT(hton == innodb_hton_ptr);
5061
5062 trx_t* trx = check_trx_exists(thd);
5063
5064 /* If transaction has not acquired any locks then it is safe
5065 to release MDL after rollback to savepoint */
5066 if (UT_LIST_GET_LEN(trx->lock.trx_locks) == 0) {
5067
5068 DBUG_RETURN(true);
5069 }
5070
5071 DBUG_RETURN(false);
5072 }
5073
5074 /*****************************************************************//**
5075 Release transaction savepoint name.
5076 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
5077 given name */
5078 static
5079 int
innobase_release_savepoint(handlerton * hton,THD * thd,void * savepoint)5080 innobase_release_savepoint(
5081 /*=======================*/
5082 handlerton* hton, /*!< in: handlerton for InnoDB */
5083 THD* thd, /*!< in: handle to the MySQL thread
5084 of the user whose transaction's
5085 savepoint should be released */
5086 void* savepoint) /*!< in: savepoint data */
5087 {
5088 dberr_t error;
5089 trx_t* trx;
5090 char name[64];
5091
5092 DBUG_ENTER("innobase_release_savepoint");
5093 DBUG_ASSERT(hton == innodb_hton_ptr);
5094
5095 trx = check_trx_exists(thd);
5096
5097 /* TODO: use provided savepoint data area to store savepoint data */
5098
5099 longlong2str(longlong(savepoint), name, 36);
5100
5101 error = trx_release_savepoint_for_mysql(trx, name);
5102
5103 if (error == DB_SUCCESS && trx->fts_trx != NULL) {
5104 fts_savepoint_release(trx, name);
5105 }
5106
5107 DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
5108 }
5109
5110 /*****************************************************************//**
5111 Sets a transaction savepoint.
5112 @return always 0, that is, always succeeds */
5113 static
5114 int
innobase_savepoint(handlerton * hton,THD * thd,void * savepoint)5115 innobase_savepoint(
5116 /*===============*/
5117 handlerton* hton, /*!< in: handle to the InnoDB handlerton */
5118 THD* thd, /*!< in: handle to the MySQL thread */
5119 void* savepoint)/*!< in: savepoint data */
5120 {
5121 DBUG_ENTER("innobase_savepoint");
5122 DBUG_ASSERT(hton == innodb_hton_ptr);
5123
5124 /* In the autocommit mode there is no sense to set a savepoint
5125 (unless we are in sub-statement), so SQL layer ensures that
5126 this method is never called in such situation. */
5127
5128 trx_t* trx = check_trx_exists(thd);
5129
5130 innobase_srv_conc_force_exit_innodb(trx);
5131
5132 /* Cannot happen outside of transaction */
5133 DBUG_ASSERT(trx_is_registered_for_2pc(trx));
5134
5135 /* TODO: use provided savepoint data area to store savepoint data */
5136 char name[64];
5137
5138 longlong2str(longlong(savepoint), name, 36);
5139
5140 dberr_t error = trx_savepoint_for_mysql(trx, name, 0);
5141
5142 if (error == DB_SUCCESS && trx->fts_trx != NULL) {
5143 fts_savepoint_take(trx->fts_trx, name);
5144 }
5145
5146 DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
5147 }
5148
5149 /*****************************************************************//**
5150 Frees a possible InnoDB trx object associated with the current THD.
5151 @return 0 or error number */
5152 static
5153 int
innobase_close_connection(handlerton * hton,THD * thd)5154 innobase_close_connection(
5155 /*======================*/
5156 handlerton* hton, /*!< in: innobase handlerton */
5157 THD* thd) /*!< in: handle to the MySQL thread of the user
5158 whose resources should be free'd */
5159 {
5160
5161 DBUG_ENTER("innobase_close_connection");
5162 DBUG_ASSERT(hton == innodb_hton_ptr);
5163
5164 trx_t* trx = thd_to_trx(thd);
5165
5166 /* During server initialization MySQL layer will try to open
5167 some of the master-slave tables those residing in InnoDB.
5168 After MySQL layer is done with needed checks these tables
5169 are closed followed by invocation of close_connection on the
5170 associated thd.
5171
5172 close_connection rolls back the trx and then frees it.
5173 Once trx is freed thd should avoid maintaining reference to
5174 it else it can be classified as stale reference.
5175
5176 Re-invocation of innodb_close_connection on same thd should
5177 get trx as NULL. */
5178
5179 if (trx) {
5180
5181 thd_set_ha_data(thd, hton, NULL);
5182 if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
5183
5184 sql_print_error("Transaction not registered for MariaDB 2PC, "
5185 "but transaction is active");
5186 }
5187
5188 /* Disconnect causes rollback in the following cases:
5189 - trx is not started, or
5190 - trx is in *not* in PREPARED state, or
5191 - trx has not updated any persistent data.
5192 TODO/FIXME: it does not make sense to initiate rollback
5193 in the 1st and 3rd case. */
5194 if (trx_is_started(trx)) {
5195 if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
5196 if (trx->has_logged_persistent()) {
5197 trx_disconnect_prepared(trx);
5198 } else {
5199 trx_deregister_from_2pc(trx);
5200 goto rollback_and_free;
5201 }
5202 } else {
5203 sql_print_warning(
5204 "MariaDB is closing a connection that has an active "
5205 "InnoDB transaction. " TRX_ID_FMT " row modifications "
5206 "will roll back.",
5207 trx->undo_no);
5208 goto rollback_and_free;
5209 }
5210 } else {
5211 rollback_and_free:
5212 innobase_rollback_trx(trx);
5213 trx->free();
5214 }
5215 }
5216
5217 DBUG_RETURN(0);
5218 }
5219
5220 UNIV_INTERN void lock_cancel_waiting_and_release(lock_t* lock);
5221
5222 /** Cancel any pending lock request associated with the current THD.
5223 @sa THD::awake() @sa ha_kill_query() */
innobase_kill_query(handlerton *,THD * thd,enum thd_kill_levels)5224 static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels)
5225 {
5226 DBUG_ENTER("innobase_kill_query");
5227 #ifdef WITH_WSREP
5228 if (wsrep_thd_get_conflict_state(thd) != NO_CONFLICT)
5229 {
5230 /* if victim has been signaled by BF thread and/or aborting
5231 is already progressing, following query aborting is not necessary
5232 any more. */
5233 WSREP_DEBUG("Victim thread %ld bail out conflict_state %s query %s",
5234 thd_get_thread_id(thd),
5235 wsrep_thd_conflict_state_str(thd), wsrep_thd_query(thd));
5236 DBUG_VOID_RETURN;
5237 }
5238 #endif /* WITH_WSREP */
5239
5240 if (trx_t* trx= thd_to_trx(thd))
5241 {
5242 ut_ad(trx->mysql_thd == thd);
5243 lock_mutex_enter();
5244 if (lock_t *lock= trx->lock.wait_lock)
5245 {
5246 trx_mutex_enter(trx);
5247 lock_cancel_waiting_and_release(lock);
5248 trx_mutex_exit(trx);
5249 }
5250 lock_mutex_exit();
5251 }
5252
5253 DBUG_VOID_RETURN;
5254 }
5255
5256
5257 /*************************************************************************//**
5258 ** InnoDB database tables
5259 *****************************************************************************/
5260
5261 /** Get the record format from the data dictionary.
5262 @return one of ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT,
5263 ROW_TYPE_COMPRESSED, ROW_TYPE_DYNAMIC */
5264
5265 enum row_type
get_row_type() const5266 ha_innobase::get_row_type() const
5267 {
5268 if (m_prebuilt && m_prebuilt->table) {
5269 const ulint flags = m_prebuilt->table->flags;
5270
5271 switch (dict_tf_get_rec_format(flags)) {
5272 case REC_FORMAT_REDUNDANT:
5273 return(ROW_TYPE_REDUNDANT);
5274 case REC_FORMAT_COMPACT:
5275 return(ROW_TYPE_COMPACT);
5276 case REC_FORMAT_COMPRESSED:
5277 return(ROW_TYPE_COMPRESSED);
5278 case REC_FORMAT_DYNAMIC:
5279 return(ROW_TYPE_DYNAMIC);
5280 }
5281 }
5282 ut_ad(0);
5283 return(ROW_TYPE_NOT_USED);
5284 }
5285
5286 /****************************************************************//**
5287 Get the table flags to use for the statement.
5288 @return table flags */
5289
5290 handler::Table_flags
table_flags() const5291 ha_innobase::table_flags() const
5292 /*============================*/
5293 {
5294 THD* thd = ha_thd();
5295 handler::Table_flags flags = m_int_table_flags;
5296
5297 /* Need to use tx_isolation here since table flags is (also)
5298 called before prebuilt is inited. */
5299
5300 if (thd_tx_isolation(thd) <= ISO_READ_COMMITTED) {
5301 return(flags);
5302 }
5303
5304 return(flags | HA_BINLOG_STMT_CAPABLE);
5305 }
5306
5307 /****************************************************************//**
5308 Returns the table type (storage engine name).
5309 @return table type */
5310
5311 const char*
table_type() const5312 ha_innobase::table_type() const
5313 /*===========================*/
5314 {
5315 return(innobase_hton_name);
5316 }
5317
5318 /****************************************************************//**
5319 Returns the index type.
5320 @return index type */
5321
5322 const char*
index_type(uint keynr)5323 ha_innobase::index_type(
5324 /*====================*/
5325 uint keynr) /*!< : index number */
5326 {
5327 dict_index_t* index = innobase_get_index(keynr);
5328
5329 if (!index) {
5330 return "Corrupted";
5331 }
5332
5333 if (index->type & DICT_FTS) {
5334 return("FULLTEXT");
5335 }
5336
5337 if (dict_index_is_spatial(index)) {
5338 return("SPATIAL");
5339 }
5340
5341 return("BTREE");
5342 }
5343
5344 /****************************************************************//**
5345 Returns the table file name extension.
5346 @return file extension string */
5347
5348 const char**
bas_ext() const5349 ha_innobase::bas_ext() const
5350 /*========================*/
5351 {
5352 return(ha_innobase_exts);
5353 }
5354
5355 /****************************************************************//**
5356 Returns the operations supported for indexes.
5357 @return flags of supported operations */
5358
5359 ulong
index_flags(uint key,uint,bool) const5360 ha_innobase::index_flags(
5361 /*=====================*/
5362 uint key,
5363 uint,
5364 bool) const
5365 {
5366 if (table_share->key_info[key].algorithm == HA_KEY_ALG_FULLTEXT) {
5367 return(0);
5368 }
5369
5370 ulong extra_flag= 0;
5371
5372 if (table && key == table->s->primary_key) {
5373 extra_flag= HA_CLUSTERED_INDEX;
5374 }
5375
5376 ulong flags = HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
5377 | HA_READ_RANGE | HA_KEYREAD_ONLY
5378 | extra_flag
5379 | HA_DO_INDEX_COND_PUSHDOWN;
5380
5381 /* For spatial index, we don't support descending scan
5382 and ICP so far. */
5383 if (table_share->key_info[key].flags & HA_SPATIAL) {
5384 flags = HA_READ_NEXT | HA_READ_ORDER| HA_READ_RANGE
5385 | HA_KEYREAD_ONLY | HA_KEY_SCAN_NOT_ROR;
5386 }
5387
5388 return(flags);
5389 }
5390
5391 /****************************************************************//**
5392 Returns the maximum number of keys.
5393 @return MAX_KEY */
5394
5395 uint
max_supported_keys() const5396 ha_innobase::max_supported_keys() const
5397 /*===================================*/
5398 {
5399 return(MAX_KEY);
5400 }
5401
5402 /****************************************************************//**
5403 Returns the maximum key length.
5404 @return maximum supported key length, in bytes */
5405
5406 uint
max_supported_key_length() const5407 ha_innobase::max_supported_key_length() const
5408 /*=========================================*/
5409 {
5410 /* An InnoDB page must store >= 2 keys; a secondary key record
5411 must also contain the primary key value. Therefore, if both
5412 the primary key and the secondary key are at this maximum length,
5413 it must be less than 1/4th of the free space on a page including
5414 record overhead.
5415
5416 MySQL imposes its own limit to this number; MAX_KEY_LENGTH = 3072.
5417
5418 For page sizes = 16k, InnoDB historically reported 3500 bytes here,
5419 But the MySQL limit of 3072 was always used through the handler
5420 interface.
5421
5422 Note: Handle 16k and 32k pages the same here since the limits
5423 are higher than imposed by MySQL. */
5424
5425 switch (srv_page_size) {
5426 case 4096:
5427 /* Hack: allow mysql.innodb_index_stats to be created. */
5428 /* FIXME: rewrite this API, and in sql_table.cc consider
5429 that in index-organized tables (such as InnoDB), secondary
5430 index records will be padded with the PRIMARY KEY, instead
5431 of some short ROWID or record heap address. */
5432 return(1173);
5433 case 8192:
5434 return(1536);
5435 default:
5436 return(3500);
5437 }
5438 }
5439
5440 /****************************************************************//**
5441 Returns the key map of keys that are usable for scanning.
5442 @return key_map_full */
5443
5444 const key_map*
keys_to_use_for_scanning()5445 ha_innobase::keys_to_use_for_scanning()
5446 /*===================================*/
5447 {
5448 return(&key_map_full);
5449 }
5450
5451 /****************************************************************//**
5452 Ensures that if there's a concurrent inplace ADD INDEX, being-indexed virtual
5453 columns are computed. They are not marked as indexed in the old table, so the
5454 server won't add them to the vcol_set automatically */
5455 void
column_bitmaps_signal()5456 ha_innobase::column_bitmaps_signal()
5457 /*================================*/
5458 {
5459 if (!table->vfield || table->current_lock != F_WRLCK) {
5460 return;
5461 }
5462
5463 dict_index_t* clust_index = dict_table_get_first_index(m_prebuilt->table);
5464 uint num_v = 0;
5465 for (uint j = 0; j < table->s->virtual_fields; j++) {
5466 if (table->vfield[j]->stored_in_db()) {
5467 continue;
5468 }
5469
5470 dict_col_t* col = &m_prebuilt->table->v_cols[num_v].m_col;
5471 if (col->ord_part ||
5472 (dict_index_is_online_ddl(clust_index) &&
5473 row_log_col_is_indexed(clust_index, num_v))) {
5474 table->mark_virtual_col(table->vfield[j]);
5475 }
5476 num_v++;
5477 }
5478 }
5479
5480
5481 /****************************************************************//**
5482 Determines if table caching is supported.
5483 @return HA_CACHE_TBL_ASKTRANSACT */
5484
5485 uint8
table_cache_type()5486 ha_innobase::table_cache_type()
5487 /*===========================*/
5488 {
5489 return(HA_CACHE_TBL_ASKTRANSACT);
5490 }
5491
5492 /****************************************************************//**
5493 Determines if the primary key is clustered index.
5494 @return true */
5495
5496 bool
primary_key_is_clustered()5497 ha_innobase::primary_key_is_clustered()
5498 /*===================================*/
5499 {
5500 return(true);
5501 }
5502
5503 /** Normalizes a table name string.
5504 A normalized name consists of the database name catenated to '/'
5505 and table name. For example: test/mytable.
5506 On Windows, normalization puts both the database name and the
5507 table name always to lower case if "set_lower_case" is set to TRUE.
5508 @param[out] norm_name Normalized name, null-terminated.
5509 @param[in] name Name to normalize.
5510 @param[in] set_lower_case True if we also should fold to lower case. */
5511 void
normalize_table_name_c_low(char * norm_name,const char * name,ibool set_lower_case)5512 normalize_table_name_c_low(
5513 /*=======================*/
5514 char* norm_name, /* out: normalized name as a
5515 null-terminated string */
5516 const char* name, /* in: table name string */
5517 ibool set_lower_case) /* in: TRUE if we want to set
5518 name to lower case */
5519 {
5520 char* name_ptr;
5521 ulint name_len;
5522 char* db_ptr;
5523 ulint db_len;
5524 char* ptr;
5525 ulint norm_len;
5526
5527 /* Scan name from the end */
5528
5529 ptr = strend(name) - 1;
5530
5531 /* seek to the last path separator */
5532 while (ptr >= name && *ptr != '\\' && *ptr != '/') {
5533 ptr--;
5534 }
5535
5536 name_ptr = ptr + 1;
5537 name_len = strlen(name_ptr);
5538
5539 /* skip any number of path separators */
5540 while (ptr >= name && (*ptr == '\\' || *ptr == '/')) {
5541 ptr--;
5542 }
5543
5544 DBUG_ASSERT(ptr >= name);
5545
5546 /* seek to the last but one path separator or one char before
5547 the beginning of name */
5548 db_len = 0;
5549 while (ptr >= name && *ptr != '\\' && *ptr != '/') {
5550 ptr--;
5551 db_len++;
5552 }
5553
5554 db_ptr = ptr + 1;
5555
5556 norm_len = db_len + name_len + sizeof "/";
5557 ut_a(norm_len < FN_REFLEN - 1);
5558
5559 memcpy(norm_name, db_ptr, db_len);
5560
5561 norm_name[db_len] = '/';
5562
5563 /* Copy the name and null-byte. */
5564 memcpy(norm_name + db_len + 1, name_ptr, name_len + 1);
5565
5566 if (set_lower_case) {
5567 innobase_casedn_str(norm_name);
5568 }
5569 }
5570
create_table_info_t(THD * thd,const TABLE * form,HA_CREATE_INFO * create_info,char * table_name,char * remote_path,bool file_per_table,trx_t * trx)5571 create_table_info_t::create_table_info_t(
5572 THD* thd,
5573 const TABLE* form,
5574 HA_CREATE_INFO* create_info,
5575 char* table_name,
5576 char* remote_path,
5577 bool file_per_table,
5578 trx_t* trx)
5579 : m_thd(thd),
5580 m_trx(trx),
5581 m_form(form),
5582 m_default_row_format(innodb_default_row_format),
5583 m_create_info(create_info),
5584 m_table_name(table_name), m_table(NULL),
5585 m_drop_before_rollback(false),
5586 m_remote_path(remote_path),
5587 m_innodb_file_per_table(file_per_table)
5588 {
5589 }
5590
5591 /** Normalizes a table name string.
5592 A normalized name consists of the database name catenated to '/'
5593 and table name. For example: test/mytable.
5594 On Windows, normalization puts both the database name and the
5595 table name always to lower case if "set_lower_case" is set to TRUE.
5596 @param[out] norm_name Normalized name, null-terminated.
5597 @param[in] name Name to normalize.
5598 @param[in] set_lower_case True if we also should fold to lower case. */
5599 void
normalize_table_name_low(char * norm_name,const char * name,ibool set_lower_case)5600 create_table_info_t::normalize_table_name_low(
5601 char* norm_name,
5602 const char* name,
5603 ibool set_lower_case)
5604 {
5605 normalize_table_name_c_low(norm_name, name, set_lower_case);
5606 }
5607
5608 #if !defined(DBUG_OFF)
5609 /*********************************************************************
5610 Test normalize_table_name_low(). */
5611 static
5612 void
test_normalize_table_name_low()5613 test_normalize_table_name_low()
5614 /*===========================*/
5615 {
5616 char norm_name[FN_REFLEN];
5617 const char* test_data[][2] = {
5618 /* input, expected result */
5619 {"./mysqltest/t1", "mysqltest/t1"},
5620 {"./test/#sql-842b_2", "test/#sql-842b_2"},
5621 {"./test/#sql-85a3_10", "test/#sql-85a3_10"},
5622 {"./test/#sql2-842b-2", "test/#sql2-842b-2"},
5623 {"./test/bug29807", "test/bug29807"},
5624 {"./test/foo", "test/foo"},
5625 {"./test/innodb_bug52663", "test/innodb_bug52663"},
5626 {"./test/t", "test/t"},
5627 {"./test/t1", "test/t1"},
5628 {"./test/t10", "test/t10"},
5629 {"/a/b/db/table", "db/table"},
5630 {"/a/b/db///////table", "db/table"},
5631 {"/a/b////db///////table", "db/table"},
5632 {"/var/tmp/mysqld.1/#sql842b_2_10", "mysqld.1/#sql842b_2_10"},
5633 {"db/table", "db/table"},
5634 {"ddd/t", "ddd/t"},
5635 {"d/ttt", "d/ttt"},
5636 {"d/t", "d/t"},
5637 {".\\mysqltest\\t1", "mysqltest/t1"},
5638 {".\\test\\#sql-842b_2", "test/#sql-842b_2"},
5639 {".\\test\\#sql-85a3_10", "test/#sql-85a3_10"},
5640 {".\\test\\#sql2-842b-2", "test/#sql2-842b-2"},
5641 {".\\test\\bug29807", "test/bug29807"},
5642 {".\\test\\foo", "test/foo"},
5643 {".\\test\\innodb_bug52663", "test/innodb_bug52663"},
5644 {".\\test\\t", "test/t"},
5645 {".\\test\\t1", "test/t1"},
5646 {".\\test\\t10", "test/t10"},
5647 {"C:\\a\\b\\db\\table", "db/table"},
5648 {"C:\\a\\b\\db\\\\\\\\\\\\\\table", "db/table"},
5649 {"C:\\a\\b\\\\\\\\db\\\\\\\\\\\\\\table", "db/table"},
5650 {"C:\\var\\tmp\\mysqld.1\\#sql842b_2_10", "mysqld.1/#sql842b_2_10"},
5651 {"db\\table", "db/table"},
5652 {"ddd\\t", "ddd/t"},
5653 {"d\\ttt", "d/ttt"},
5654 {"d\\t", "d/t"},
5655 };
5656
5657 for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
5658 printf("test_normalize_table_name_low():"
5659 " testing \"%s\", expected \"%s\"... ",
5660 test_data[i][0], test_data[i][1]);
5661
5662 create_table_info_t::normalize_table_name_low(
5663 norm_name, test_data[i][0], FALSE);
5664
5665 if (strcmp(norm_name, test_data[i][1]) == 0) {
5666 printf("ok\n");
5667 } else {
5668 printf("got \"%s\"\n", norm_name);
5669 ut_error;
5670 }
5671 }
5672 }
5673
5674 /*********************************************************************
5675 Test ut_format_name(). */
5676 static
5677 void
test_ut_format_name()5678 test_ut_format_name()
5679 /*=================*/
5680 {
5681 char buf[NAME_LEN * 3];
5682
5683 struct {
5684 const char* name;
5685 ulint buf_size;
5686 const char* expected;
5687 } test_data[] = {
5688 {"test/t1", sizeof(buf), "`test`.`t1`"},
5689 {"test/t1", 12, "`test`.`t1`"},
5690 {"test/t1", 11, "`test`.`t1"},
5691 {"test/t1", 10, "`test`.`t"},
5692 {"test/t1", 9, "`test`.`"},
5693 {"test/t1", 8, "`test`."},
5694 {"test/t1", 7, "`test`"},
5695 {"test/t1", 6, "`test"},
5696 {"test/t1", 5, "`tes"},
5697 {"test/t1", 4, "`te"},
5698 {"test/t1", 3, "`t"},
5699 {"test/t1", 2, "`"},
5700 {"test/t1", 1, ""},
5701 {"test/t1", 0, "BUF_NOT_CHANGED"},
5702 {"table", sizeof(buf), "`table`"},
5703 {"ta'le", sizeof(buf), "`ta'le`"},
5704 {"ta\"le", sizeof(buf), "`ta\"le`"},
5705 {"ta`le", sizeof(buf), "`ta``le`"},
5706 };
5707
5708 for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
5709
5710 memcpy(buf, "BUF_NOT_CHANGED", strlen("BUF_NOT_CHANGED") + 1);
5711
5712 char* ret;
5713
5714 ret = ut_format_name(test_data[i].name,
5715 buf,
5716 test_data[i].buf_size);
5717
5718 ut_a(ret == buf);
5719
5720 if (strcmp(buf, test_data[i].expected) == 0) {
5721 ib::info() << "ut_format_name(" << test_data[i].name
5722 << ", buf, " << test_data[i].buf_size << "),"
5723 " expected " << test_data[i].expected
5724 << ", OK";
5725 } else {
5726 ib::error() << "ut_format_name(" << test_data[i].name
5727 << ", buf, " << test_data[i].buf_size << "),"
5728 " expected " << test_data[i].expected
5729 << ", ERROR: got " << buf;
5730 ut_error;
5731 }
5732 }
5733 }
5734 #endif /* !DBUG_OFF */
5735
5736 /** Match index columns between MySQL and InnoDB.
5737 This function checks whether the index column information
5738 is consistent between KEY info from mysql and that from innodb index.
5739 @param[in] key_info Index info from mysql
5740 @param[in] index_info Index info from InnoDB
5741 @return true if all column types match. */
5742 static
5743 bool
innobase_match_index_columns(const KEY * key_info,const dict_index_t * index_info)5744 innobase_match_index_columns(
5745 const KEY* key_info,
5746 const dict_index_t* index_info)
5747 {
5748 const KEY_PART_INFO* key_part;
5749 const KEY_PART_INFO* key_end;
5750 const dict_field_t* innodb_idx_fld;
5751 const dict_field_t* innodb_idx_fld_end;
5752
5753 DBUG_ENTER("innobase_match_index_columns");
5754
5755 /* Check whether user defined index column count matches */
5756 if (key_info->user_defined_key_parts !=
5757 index_info->n_user_defined_cols) {
5758 DBUG_RETURN(FALSE);
5759 }
5760
5761 key_part = key_info->key_part;
5762 key_end = key_part + key_info->user_defined_key_parts;
5763 innodb_idx_fld = index_info->fields;
5764 innodb_idx_fld_end = index_info->fields + index_info->n_fields;
5765
5766 /* Check each index column's datatype. We do not check
5767 column name because there exists case that index
5768 column name got modified in mysql but such change does not
5769 propagate to InnoDB.
5770 One hidden assumption here is that the index column sequences
5771 are matched up between those in mysql and InnoDB. */
5772 for (; key_part != key_end; ++key_part) {
5773 ulint col_type;
5774 ibool is_unsigned;
5775 ulint mtype = innodb_idx_fld->col->mtype;
5776
5777 /* Need to translate to InnoDB column type before
5778 comparison. */
5779 col_type = get_innobase_type_from_mysql_type(
5780 &is_unsigned, key_part->field);
5781
5782 /* Ignore InnoDB specific system columns. */
5783 while (mtype == DATA_SYS) {
5784 innodb_idx_fld++;
5785
5786 if (innodb_idx_fld >= innodb_idx_fld_end) {
5787 DBUG_RETURN(FALSE);
5788 }
5789 }
5790
5791 /* MariaDB-5.5 compatibility */
5792 if ((key_part->field->real_type() == MYSQL_TYPE_ENUM ||
5793 key_part->field->real_type() == MYSQL_TYPE_SET) &&
5794 mtype == DATA_FIXBINARY) {
5795 col_type= DATA_FIXBINARY;
5796 }
5797
5798 if (col_type != mtype) {
5799 /* If the col_type we get from mysql type is a geometry
5800 data type, we should check if mtype is a legacy type
5801 from 5.6, either upgraded to DATA_GEOMETRY or not.
5802 This is indeed not an accurate check, but should be
5803 safe, since DATA_BLOB would be upgraded once we create
5804 spatial index on it and we intend to use DATA_GEOMETRY
5805 for legacy GIS data types which are of var-length. */
5806 switch (col_type) {
5807 case DATA_GEOMETRY:
5808 if (mtype == DATA_BLOB) {
5809 break;
5810 }
5811 /* Fall through */
5812 default:
5813 /* Column type mismatches */
5814 DBUG_RETURN(false);
5815 }
5816 }
5817
5818 innodb_idx_fld++;
5819 }
5820
5821 DBUG_RETURN(TRUE);
5822 }
5823
5824 /** Build a template for a base column for a virtual column
5825 @param[in] table MySQL TABLE
5826 @param[in] clust_index InnoDB clustered index
5827 @param[in] field field in MySQL table
5828 @param[in] col InnoDB column
5829 @param[in,out] templ template to fill
5830 @param[in] col_no field index for virtual col
5831 */
5832 static
5833 void
innobase_vcol_build_templ(const TABLE * table,dict_index_t * clust_index,Field * field,const dict_col_t * col,mysql_row_templ_t * templ,ulint col_no)5834 innobase_vcol_build_templ(
5835 const TABLE* table,
5836 dict_index_t* clust_index,
5837 Field* field,
5838 const dict_col_t* col,
5839 mysql_row_templ_t* templ,
5840 ulint col_no)
5841 {
5842 templ->col_no = col_no;
5843 templ->is_virtual = col->is_virtual();
5844
5845 if (templ->is_virtual) {
5846 templ->clust_rec_field_no = ULINT_UNDEFINED;
5847 templ->rec_field_no = col->ind;
5848 } else {
5849 templ->clust_rec_field_no = dict_col_get_clust_pos(
5850 col, clust_index);
5851 ut_a(templ->clust_rec_field_no != ULINT_UNDEFINED);
5852
5853 templ->rec_field_no = templ->clust_rec_field_no;
5854 }
5855
5856 if (field->real_maybe_null()) {
5857 templ->mysql_null_byte_offset =
5858 field->null_offset();
5859
5860 templ->mysql_null_bit_mask = (ulint) field->null_bit;
5861 } else {
5862 templ->mysql_null_bit_mask = 0;
5863 }
5864
5865 templ->mysql_col_offset = static_cast<ulint>(
5866 get_field_offset(table, field));
5867 templ->mysql_col_len = static_cast<ulint>(field->pack_length());
5868 templ->type = col->mtype;
5869 templ->mysql_type = static_cast<ulint>(field->type());
5870
5871 if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
5872 templ->mysql_length_bytes = static_cast<ulint>(
5873 ((Field_varstring*) field)->length_bytes);
5874 }
5875
5876 templ->charset = dtype_get_charset_coll(col->prtype);
5877 templ->mbminlen = dict_col_get_mbminlen(col);
5878 templ->mbmaxlen = dict_col_get_mbmaxlen(col);
5879 templ->is_unsigned = col->prtype & DATA_UNSIGNED;
5880 }
5881
5882 /** Build template for the virtual columns and their base columns. This
5883 is done when the table first opened.
5884 @param[in] table MySQL TABLE
5885 @param[in] ib_table InnoDB dict_table_t
5886 @param[in,out] s_templ InnoDB template structure
5887 @param[in] add_v new virtual columns added along with
5888 add index call
5889 @param[in] locked true if dict_sys mutex is held */
5890 void
innobase_build_v_templ(const TABLE * table,const dict_table_t * ib_table,dict_vcol_templ_t * s_templ,const dict_add_v_col_t * add_v,bool locked)5891 innobase_build_v_templ(
5892 const TABLE* table,
5893 const dict_table_t* ib_table,
5894 dict_vcol_templ_t* s_templ,
5895 const dict_add_v_col_t* add_v,
5896 bool locked)
5897 {
5898 ulint ncol = unsigned(ib_table->n_cols) - DATA_N_SYS_COLS;
5899 ulint n_v_col = ib_table->n_v_cols;
5900 bool marker[REC_MAX_N_FIELDS];
5901
5902 DBUG_ENTER("innobase_build_v_templ");
5903 ut_ad(ncol < REC_MAX_N_FIELDS);
5904
5905 if (add_v != NULL) {
5906 n_v_col += add_v->n_v_col;
5907 }
5908
5909 ut_ad(n_v_col > 0);
5910
5911 if (!locked) {
5912 mutex_enter(&dict_sys->mutex);
5913 }
5914
5915 if (s_templ->vtempl) {
5916 if (!locked) {
5917 mutex_exit(&dict_sys->mutex);
5918 }
5919 DBUG_VOID_RETURN;
5920 }
5921
5922 memset(marker, 0, sizeof(bool) * ncol);
5923
5924 s_templ->vtempl = static_cast<mysql_row_templ_t**>(
5925 ut_zalloc_nokey((ncol + n_v_col)
5926 * sizeof *s_templ->vtempl));
5927 s_templ->n_col = ncol;
5928 s_templ->n_v_col = n_v_col;
5929 s_templ->rec_len = table->s->reclength;
5930 s_templ->default_rec = UT_NEW_ARRAY_NOKEY(uchar, s_templ->rec_len);
5931 memcpy(s_templ->default_rec, table->s->default_values, s_templ->rec_len);
5932
5933 /* Mark those columns could be base columns */
5934 for (ulint i = 0; i < ib_table->n_v_cols; i++) {
5935 const dict_v_col_t* vcol = dict_table_get_nth_v_col(
5936 ib_table, i);
5937
5938 for (ulint j = 0; j < vcol->num_base; j++) {
5939 ulint col_no = vcol->base_col[j]->ind;
5940 marker[col_no] = true;
5941 }
5942 }
5943
5944 if (add_v) {
5945 for (ulint i = 0; i < add_v->n_v_col; i++) {
5946 const dict_v_col_t* vcol = &add_v->v_col[i];
5947
5948 for (ulint j = 0; j < vcol->num_base; j++) {
5949 ulint col_no = vcol->base_col[j]->ind;
5950 marker[col_no] = true;
5951 }
5952 }
5953 }
5954
5955 ulint j = 0;
5956 ulint z = 0;
5957
5958 dict_index_t* clust_index = dict_table_get_first_index(ib_table);
5959
5960 for (ulint i = 0; i < table->s->fields; i++) {
5961 Field* field = table->field[i];
5962
5963 /* Build template for virtual columns */
5964 if (!field->stored_in_db()) {
5965 #ifdef UNIV_DEBUG
5966 const char* name;
5967
5968 if (z >= ib_table->n_v_def) {
5969 name = add_v->v_col_name[z - ib_table->n_v_def];
5970 } else {
5971 name = dict_table_get_v_col_name(ib_table, z);
5972 }
5973
5974 ut_ad(!my_strcasecmp(system_charset_info, name,
5975 field->field_name.str));
5976 #endif
5977 const dict_v_col_t* vcol;
5978
5979 if (z >= ib_table->n_v_def) {
5980 vcol = &add_v->v_col[z - ib_table->n_v_def];
5981 } else {
5982 vcol = dict_table_get_nth_v_col(ib_table, z);
5983 }
5984
5985 s_templ->vtempl[z + s_templ->n_col]
5986 = static_cast<mysql_row_templ_t*>(
5987 ut_malloc_nokey(
5988 sizeof *s_templ->vtempl[j]));
5989
5990 innobase_vcol_build_templ(
5991 table, clust_index, field,
5992 &vcol->m_col,
5993 s_templ->vtempl[z + s_templ->n_col],
5994 z);
5995 z++;
5996 continue;
5997 }
5998
5999 ut_ad(j < ncol);
6000
6001 /* Build template for base columns */
6002 if (marker[j]) {
6003 dict_col_t* col = dict_table_get_nth_col(
6004 ib_table, j);
6005
6006 ut_ad(!my_strcasecmp(system_charset_info,
6007 dict_table_get_col_name(
6008 ib_table, j),
6009 field->field_name.str));
6010
6011 s_templ->vtempl[j] = static_cast<
6012 mysql_row_templ_t*>(
6013 ut_malloc_nokey(
6014 sizeof *s_templ->vtempl[j]));
6015
6016 innobase_vcol_build_templ(
6017 table, clust_index, field, col,
6018 s_templ->vtempl[j], j);
6019 }
6020
6021 j++;
6022 }
6023
6024 if (!locked) {
6025 mutex_exit(&dict_sys->mutex);
6026 }
6027
6028 s_templ->db_name = table->s->db.str;
6029 s_templ->tb_name = table->s->table_name.str;
6030 DBUG_VOID_RETURN;
6031 }
6032
6033 /** Check consistency between .frm indexes and InnoDB indexes.
6034 @param[in] table table object formed from .frm
6035 @param[in] ib_table InnoDB table definition
6036 @retval true if not errors were found */
6037 static bool
check_index_consistency(const TABLE * table,const dict_table_t * ib_table)6038 check_index_consistency(const TABLE* table, const dict_table_t* ib_table)
6039 {
6040 ulint mysql_num_index = table->s->keys;
6041 ulint ib_num_index = UT_LIST_GET_LEN(ib_table->indexes);
6042 bool ret = true;
6043
6044 /* If there exists inconsistency between MySQL and InnoDB dictionary
6045 (metadata) information, the number of index defined in MySQL
6046 could exceed that in InnoDB, return error */
6047 if (ib_num_index < mysql_num_index) {
6048 ret = false;
6049 goto func_exit;
6050 }
6051
6052 /* For each index in the mysql key_info array, fetch its
6053 corresponding InnoDB index pointer into index_mapping
6054 array. */
6055 for (ulint count = 0; count < mysql_num_index; count++) {
6056 const dict_index_t* index = dict_table_get_index_on_name(
6057 ib_table, table->key_info[count].name.str);
6058
6059 if (index == NULL) {
6060 sql_print_error("Cannot find index %s in InnoDB"
6061 " index dictionary.",
6062 table->key_info[count].name.str);
6063 ret = false;
6064 goto func_exit;
6065 }
6066
6067 /* Double check fetched index has the same
6068 column info as those in mysql key_info. */
6069 if (!innobase_match_index_columns(&table->key_info[count],
6070 index)) {
6071 sql_print_error("Found index %s whose column info"
6072 " does not match that of MariaDB.",
6073 table->key_info[count].name.str);
6074 ret = false;
6075 goto func_exit;
6076 }
6077 }
6078
6079 func_exit:
6080 return ret;
6081 }
6082
6083 /********************************************************************//**
6084 Get the upper limit of the MySQL integral and floating-point type.
6085 @return maximum allowed value for the field */
6086 UNIV_INTERN
6087 ulonglong
innobase_get_int_col_max_value(const Field * field)6088 innobase_get_int_col_max_value(
6089 /*===========================*/
6090 const Field* field) /*!< in: MySQL field */
6091 {
6092 ulonglong max_value = 0;
6093
6094 switch (field->key_type()) {
6095 /* TINY */
6096 case HA_KEYTYPE_BINARY:
6097 max_value = 0xFFULL;
6098 break;
6099 case HA_KEYTYPE_INT8:
6100 max_value = 0x7FULL;
6101 break;
6102 /* SHORT */
6103 case HA_KEYTYPE_USHORT_INT:
6104 max_value = 0xFFFFULL;
6105 break;
6106 case HA_KEYTYPE_SHORT_INT:
6107 max_value = 0x7FFFULL;
6108 break;
6109 /* MEDIUM */
6110 case HA_KEYTYPE_UINT24:
6111 max_value = 0xFFFFFFULL;
6112 break;
6113 case HA_KEYTYPE_INT24:
6114 max_value = 0x7FFFFFULL;
6115 break;
6116 /* LONG */
6117 case HA_KEYTYPE_ULONG_INT:
6118 max_value = 0xFFFFFFFFULL;
6119 break;
6120 case HA_KEYTYPE_LONG_INT:
6121 max_value = 0x7FFFFFFFULL;
6122 break;
6123 /* BIG */
6124 case HA_KEYTYPE_ULONGLONG:
6125 max_value = 0xFFFFFFFFFFFFFFFFULL;
6126 break;
6127 case HA_KEYTYPE_LONGLONG:
6128 max_value = 0x7FFFFFFFFFFFFFFFULL;
6129 break;
6130 case HA_KEYTYPE_FLOAT:
6131 /* We use the maximum as per IEEE754-2008 standard, 2^24 */
6132 max_value = 0x1000000ULL;
6133 break;
6134 case HA_KEYTYPE_DOUBLE:
6135 /* We use the maximum as per IEEE754-2008 standard, 2^53 */
6136 max_value = 0x20000000000000ULL;
6137 break;
6138 default:
6139 ut_error;
6140 }
6141
6142 return(max_value);
6143 }
6144
6145 /** Initialize the AUTO_INCREMENT column metadata.
6146
6147 Since a partial table definition for a persistent table can already be
6148 present in the InnoDB dict_sys cache before it is accessed from SQL,
6149 we have to initialize the AUTO_INCREMENT counter on the first
6150 ha_innobase::open().
6151
6152 @param[in,out] table persistent table
6153 @param[in] field the AUTO_INCREMENT column */
6154 static
6155 void
initialize_auto_increment(dict_table_t * table,const Field * field)6156 initialize_auto_increment(dict_table_t* table, const Field* field)
6157 {
6158 ut_ad(!table->is_temporary());
6159
6160 const unsigned col_no = innodb_col_no(field);
6161
6162 dict_table_autoinc_lock(table);
6163
6164 table->persistent_autoinc = 1
6165 + dict_table_get_nth_col_pos(table, col_no, NULL);
6166
6167 if (table->autoinc) {
6168 /* Already initialized. Our caller checked
6169 table->persistent_autoinc without
6170 dict_table_autoinc_lock(), and there might be multiple
6171 ha_innobase::open() executing concurrently. */
6172 } else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
6173 /* If the recovery level is set so high that writes
6174 are disabled we force the AUTOINC counter to 0
6175 value effectively disabling writes to the table.
6176 Secondly, we avoid reading the table in case the read
6177 results in failure due to a corrupted table/index.
6178
6179 We will not return an error to the client, so that the
6180 tables can be dumped with minimal hassle. If an error
6181 were returned in this case, the first attempt to read
6182 the table would fail and subsequent SELECTs would succeed. */
6183 } else if (table->persistent_autoinc) {
6184 table->autoinc = innobase_next_autoinc(
6185 btr_read_autoinc_with_fallback(table, col_no),
6186 1 /* need */,
6187 1 /* auto_increment_increment */,
6188 0 /* auto_increment_offset */,
6189 innobase_get_int_col_max_value(field));
6190 }
6191
6192 dict_table_autoinc_unlock(table);
6193 }
6194
6195 /** Open an InnoDB table
6196 @param[in] name table name
6197 @return error code
6198 @retval 0 on success */
6199 int
open(const char * name,int,uint)6200 ha_innobase::open(const char* name, int, uint)
6201 {
6202 char norm_name[FN_REFLEN];
6203
6204 DBUG_ENTER("ha_innobase::open");
6205
6206 normalize_table_name(norm_name, name);
6207
6208 m_user_thd = NULL;
6209
6210 /* Will be allocated if it is needed in ::update_row() */
6211 m_upd_buf = NULL;
6212 m_upd_buf_size = 0;
6213
6214 char* is_part = is_partition(norm_name);
6215 THD* thd = ha_thd();
6216 dict_table_t* ib_table = open_dict_table(name, norm_name, is_part,
6217 DICT_ERR_IGNORE_FK_NOKEY);
6218
6219 DEBUG_SYNC(thd, "ib_open_after_dict_open");
6220
6221 if (NULL == ib_table) {
6222
6223 if (is_part) {
6224 sql_print_error("Failed to open table %s.\n",
6225 norm_name);
6226 }
6227 no_such_table:
6228 set_my_errno(ENOENT);
6229
6230 DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
6231 }
6232
6233 size_t n_fields = omits_virtual_cols(*table_share)
6234 ? table_share->stored_fields : table_share->fields;
6235 size_t n_cols = dict_table_get_n_user_cols(ib_table)
6236 + dict_table_get_n_v_cols(ib_table)
6237 - !!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID);
6238
6239 if (UNIV_UNLIKELY(n_cols != n_fields)) {
6240 ib::warn() << "Table " << norm_name << " contains "
6241 << n_cols << " user"
6242 " defined columns in InnoDB, but " << n_fields
6243 << " columns in MariaDB. Please check"
6244 " INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and"
6245 " https://mariadb.com/kb/en/innodb-data-dictionary-troubleshooting/"
6246 " for how to resolve the issue.";
6247
6248 /* Mark this table as corrupted, so the drop table
6249 or force recovery can still use it, but not others. */
6250 ib_table->file_unreadable = true;
6251 ib_table->corrupted = true;
6252 dict_table_close(ib_table, FALSE, FALSE);
6253 goto no_such_table;
6254 }
6255
6256 innobase_copy_frm_flags_from_table_share(ib_table, table->s);
6257
6258 MONITOR_INC(MONITOR_TABLE_OPEN);
6259
6260 if ((ib_table->flags2 & DICT_TF2_DISCARDED)) {
6261
6262 ib_senderrf(thd,
6263 IB_LOG_LEVEL_WARN, ER_TABLESPACE_DISCARDED,
6264 table->s->table_name.str);
6265
6266 /* Allow an open because a proper DISCARD should have set
6267 all the flags and index root page numbers to FIL_NULL that
6268 should prevent any DML from running but it should allow DDL
6269 operations. */
6270 } else if (!ib_table->is_readable()) {
6271 const fil_space_t* space = ib_table->space;
6272 if (!space) {
6273 ib_senderrf(
6274 thd, IB_LOG_LEVEL_WARN,
6275 ER_TABLESPACE_MISSING, norm_name);
6276 }
6277
6278 if (!thd_tablespace_op(thd)) {
6279 set_my_errno(ENOENT);
6280 int ret_err = HA_ERR_TABLESPACE_MISSING;
6281
6282 if (space && space->crypt_data
6283 && space->crypt_data->is_encrypted()) {
6284 push_warning_printf(
6285 thd,
6286 Sql_condition::WARN_LEVEL_WARN,
6287 HA_ERR_DECRYPTION_FAILED,
6288 "Table %s in file %s is encrypted"
6289 " but encryption service or"
6290 " used key_id %u is not available. "
6291 " Can't continue reading table.",
6292 table_share->table_name.str,
6293 space->chain.start->name,
6294 space->crypt_data->key_id);
6295 ret_err = HA_ERR_DECRYPTION_FAILED;
6296 }
6297
6298 dict_table_close(ib_table, FALSE, FALSE);
6299 DBUG_RETURN(ret_err);
6300 }
6301 }
6302
6303 m_prebuilt = row_create_prebuilt(ib_table, table->s->reclength);
6304
6305 m_prebuilt->default_rec = table->s->default_values;
6306 ut_ad(m_prebuilt->default_rec);
6307
6308 m_prebuilt->m_mysql_table = table;
6309
6310 /* Looks like MySQL-3.23 sometimes has primary key number != 0 */
6311 m_primary_key = table->s->primary_key;
6312
6313 key_used_on_scan = m_primary_key;
6314
6315 if (ib_table->n_v_cols) {
6316 mutex_enter(&dict_sys->mutex);
6317 if (ib_table->vc_templ == NULL) {
6318 ib_table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
6319 innobase_build_v_templ(
6320 table, ib_table, ib_table->vc_templ, NULL,
6321 true);
6322 }
6323
6324 mutex_exit(&dict_sys->mutex);
6325 }
6326
6327 if (!check_index_consistency(table, ib_table)) {
6328 sql_print_error("InnoDB indexes are inconsistent with what "
6329 "defined in .frm for table %s",
6330 name);
6331 }
6332
6333 /* Allocate a buffer for a 'row reference'. A row reference is
6334 a string of bytes of length ref_length which uniquely specifies
6335 a row in our table. Note that MySQL may also compare two row
6336 references for equality by doing a simple memcmp on the strings
6337 of length ref_length! */
6338 if (!(m_prebuilt->clust_index_was_generated
6339 = dict_index_is_auto_gen_clust(ib_table->indexes.start))) {
6340 if (m_primary_key >= MAX_KEY) {
6341 ib_table->dict_frm_mismatch = DICT_FRM_NO_PK;
6342
6343 /* This mismatch could cause further problems
6344 if not attended, bring this to the user's attention
6345 by printing a warning in addition to log a message
6346 in the errorlog */
6347
6348 ib_push_frm_error(thd, ib_table, table, 0, true);
6349
6350 /* If m_primary_key >= MAX_KEY, its (m_primary_key)
6351 value could be out of bound if continue to index
6352 into key_info[] array. Find InnoDB primary index,
6353 and assign its key_length to ref_length.
6354 In addition, since MySQL indexes are sorted starting
6355 with primary index, unique index etc., initialize
6356 ref_length to the first index key length in
6357 case we fail to find InnoDB cluster index.
6358
6359 Please note, this will not resolve the primary
6360 index mismatch problem, other side effects are
6361 possible if users continue to use the table.
6362 However, we allow this table to be opened so
6363 that user can adopt necessary measures for the
6364 mismatch while still being accessible to the table
6365 date. */
6366 if (!table->key_info) {
6367 ut_ad(!table->s->keys);
6368 ref_length = 0;
6369 } else {
6370 ref_length = table->key_info[0].key_length;
6371 }
6372
6373 /* Find corresponding cluster index
6374 key length in MySQL's key_info[] array */
6375 for (uint i = 0; i < table->s->keys; i++) {
6376 dict_index_t* index;
6377 index = innobase_get_index(i);
6378 if (dict_index_is_clust(index)) {
6379 ref_length =
6380 table->key_info[i].key_length;
6381 }
6382 }
6383 } else {
6384 /* MySQL allocates the buffer for ref.
6385 key_info->key_length includes space for all key
6386 columns + one byte for each column that may be
6387 NULL. ref_length must be as exact as possible to
6388 save space, because all row reference buffers are
6389 allocated based on ref_length. */
6390
6391 ref_length = table->key_info[m_primary_key].key_length;
6392 }
6393 } else {
6394 if (m_primary_key != MAX_KEY) {
6395
6396 ib_table->dict_frm_mismatch = DICT_NO_PK_FRM_HAS;
6397
6398 /* This mismatch could cause further problems
6399 if not attended, bring this to the user attention
6400 by printing a warning in addition to log a message
6401 in the errorlog */
6402 ib_push_frm_error(thd, ib_table, table, 0, true);
6403 }
6404
6405 ref_length = DATA_ROW_ID_LEN;
6406
6407 /* If we automatically created the clustered index, then
6408 MySQL does not know about it, and MySQL must NOT be aware
6409 of the index used on scan, to make it avoid checking if we
6410 update the column of the index. That is why we assert below
6411 that key_used_on_scan is the undefined value MAX_KEY.
6412 The column is the row id in the automatical generation case,
6413 and it will never be updated anyway. */
6414
6415 if (key_used_on_scan != MAX_KEY) {
6416 sql_print_warning(
6417 "Table %s key_used_on_scan is %u even "
6418 "though there is no primary key inside "
6419 "InnoDB.", name, key_used_on_scan);
6420 }
6421 }
6422
6423 /* Index block size in InnoDB: used by MySQL in query optimization */
6424 stats.block_size = srv_page_size;
6425
6426 const my_bool for_vc_purge = THDVAR(thd, background_thread);
6427
6428 if (for_vc_purge || !m_prebuilt->table
6429 || m_prebuilt->table->is_temporary()
6430 || m_prebuilt->table->persistent_autoinc
6431 || !m_prebuilt->table->is_readable()) {
6432 } else if (const Field* ai = table->found_next_number_field) {
6433 initialize_auto_increment(m_prebuilt->table, ai);
6434 }
6435
6436 /* Set plugin parser for fulltext index */
6437 for (uint i = 0; i < table->s->keys; i++) {
6438 if (table->key_info[i].flags & HA_USES_PARSER) {
6439 dict_index_t* index = innobase_get_index(i);
6440 plugin_ref parser = table->key_info[i].parser;
6441
6442 ut_ad(index->type & DICT_FTS);
6443 index->parser =
6444 static_cast<st_mysql_ftparser *>(
6445 plugin_decl(parser)->info);
6446
6447 DBUG_EXECUTE_IF("fts_instrument_use_default_parser",
6448 index->parser = &fts_default_parser;);
6449 }
6450 }
6451
6452 ut_ad(!m_prebuilt->table
6453 || table->versioned() == m_prebuilt->table->versioned());
6454
6455 if (!for_vc_purge) {
6456 info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST
6457 | HA_STATUS_OPEN);
6458 }
6459
6460 DBUG_RETURN(0);
6461 }
6462
6463 /** Convert MySQL column number to dict_table_t::cols[] offset.
6464 @param[in] field non-virtual column
6465 @return column number relative to dict_table_t::cols[] */
6466 unsigned
innodb_col_no(const Field * field)6467 innodb_col_no(const Field* field)
6468 {
6469 ut_ad(!innobase_is_s_fld(field));
6470 const TABLE* table = field->table;
6471 unsigned col_no = 0;
6472 ut_ad(field == table->field[field->field_index]);
6473 for (unsigned i = 0; i < field->field_index; i++) {
6474 if (table->field[i]->stored_in_db()) {
6475 col_no++;
6476 }
6477 }
6478 return(col_no);
6479 }
6480
6481 /** Opens dictionary table object using table name. For partition, we need to
6482 try alternative lower/upper case names to support moving data files across
6483 platforms.
6484 @param[in] table_name name of the table/partition
6485 @param[in] norm_name normalized name of the table/partition
6486 @param[in] is_partition if this is a partition of a table
6487 @param[in] ignore_err error to ignore for loading dictionary object
6488 @return dictionary table object or NULL if not found */
6489 dict_table_t*
open_dict_table(const char * table_name,const char * norm_name,bool is_partition,dict_err_ignore_t ignore_err)6490 ha_innobase::open_dict_table(
6491 const char*
6492 #ifdef _WIN32
6493 table_name
6494 #endif
6495 ,
6496 const char* norm_name,
6497 bool is_partition,
6498 dict_err_ignore_t ignore_err)
6499 {
6500 DBUG_ENTER("ha_innobase::open_dict_table");
6501 dict_table_t* ib_table = dict_table_open_on_name(norm_name, FALSE,
6502 TRUE, ignore_err);
6503
6504 if (NULL == ib_table && is_partition) {
6505 /* MySQL partition engine hard codes the file name
6506 separator as "#P#". The text case is fixed even if
6507 lower_case_table_names is set to 1 or 2. This is true
6508 for sub-partition names as well. InnoDB always
6509 normalises file names to lower case on Windows, this
6510 can potentially cause problems when copying/moving
6511 tables between platforms.
6512
6513 1) If boot against an installation from Windows
6514 platform, then its partition table name could
6515 be in lower case in system tables. So we will
6516 need to check lower case name when load table.
6517
6518 2) If we boot an installation from other case
6519 sensitive platform in Windows, we might need to
6520 check the existence of table name without lower
6521 case in the system table. */
6522 if (innobase_get_lower_case_table_names() == 1) {
6523 char par_case_name[FN_REFLEN];
6524
6525 #ifndef _WIN32
6526 /* Check for the table using lower
6527 case name, including the partition
6528 separator "P" */
6529 strcpy(par_case_name, norm_name);
6530 innobase_casedn_str(par_case_name);
6531 #else
6532 /* On Windows platfrom, check
6533 whether there exists table name in
6534 system table whose name is
6535 not being normalized to lower case */
6536 create_table_info_t::
6537 normalize_table_name_low(
6538 par_case_name,
6539 table_name, FALSE);
6540 #endif
6541 ib_table = dict_table_open_on_name(
6542 par_case_name, FALSE, TRUE,
6543 ignore_err);
6544 }
6545
6546 if (ib_table != NULL) {
6547 #ifndef _WIN32
6548 sql_print_warning("Partition table %s opened"
6549 " after converting to lower"
6550 " case. The table may have"
6551 " been moved from a case"
6552 " in-sensitive file system."
6553 " Please recreate table in"
6554 " the current file system\n",
6555 norm_name);
6556 #else
6557 sql_print_warning("Partition table %s opened"
6558 " after skipping the step to"
6559 " lower case the table name."
6560 " The table may have been"
6561 " moved from a case sensitive"
6562 " file system. Please"
6563 " recreate table in the"
6564 " current file system\n",
6565 norm_name);
6566 #endif
6567 }
6568 }
6569
6570 DBUG_RETURN(ib_table);
6571 }
6572
6573 handler*
clone(const char * name,MEM_ROOT * mem_root)6574 ha_innobase::clone(
6575 /*===============*/
6576 const char* name, /*!< in: table name */
6577 MEM_ROOT* mem_root) /*!< in: memory context */
6578 {
6579 DBUG_ENTER("ha_innobase::clone");
6580
6581 ha_innobase* new_handler = static_cast<ha_innobase*>(
6582 handler::clone(m_prebuilt->table->name.m_name, mem_root));
6583
6584 if (new_handler != NULL) {
6585 DBUG_ASSERT(new_handler->m_prebuilt != NULL);
6586
6587 new_handler->m_prebuilt->select_lock_type
6588 = m_prebuilt->select_lock_type;
6589 }
6590
6591 DBUG_RETURN(new_handler);
6592 }
6593
6594
6595 uint
max_supported_key_part_length() const6596 ha_innobase::max_supported_key_part_length() const
6597 /*==============================================*/
6598 {
6599 /* A table format specific index column length check will be performed
6600 at ha_innobase::add_index() and row_create_index_for_mysql() */
6601 return(REC_VERSION_56_MAX_INDEX_COL_LEN);
6602 }
6603
6604 /******************************************************************//**
6605 Closes a handle to an InnoDB table.
6606 @return 0 */
6607
6608 int
close()6609 ha_innobase::close()
6610 /*================*/
6611 {
6612 DBUG_ENTER("ha_innobase::close");
6613
6614 row_prebuilt_free(m_prebuilt, FALSE);
6615
6616 if (m_upd_buf != NULL) {
6617 ut_ad(m_upd_buf_size != 0);
6618 my_free(m_upd_buf);
6619 m_upd_buf = NULL;
6620 m_upd_buf_size = 0;
6621 }
6622
6623 MONITOR_INC(MONITOR_TABLE_CLOSE);
6624
6625 /* Tell InnoDB server that there might be work for
6626 utility threads: */
6627
6628 srv_active_wake_master_thread();
6629
6630 DBUG_RETURN(0);
6631 }
6632
6633 /* The following accessor functions should really be inside MySQL code! */
6634
6635 #ifdef WITH_WSREP
6636 UNIV_INTERN
6637 ulint
wsrep_innobase_mysql_sort(int mysql_type,uint charset_number,unsigned char * str,unsigned int str_length,unsigned int buf_length)6638 wsrep_innobase_mysql_sort(
6639 /*======================*/
6640 /* out: str contains sort string */
6641 int mysql_type, /* in: MySQL type */
6642 uint charset_number, /* in: number of the charset */
6643 unsigned char* str, /* in: data field */
6644 unsigned int str_length, /* in: data field length,
6645 not UNIV_SQL_NULL */
6646 unsigned int buf_length) /* in: total str buffer length */
6647
6648 {
6649 CHARSET_INFO* charset;
6650 enum_field_types mysql_tp;
6651 ulint ret_length = str_length;
6652
6653 DBUG_ASSERT(str_length != UNIV_SQL_NULL);
6654
6655 mysql_tp = (enum_field_types) mysql_type;
6656
6657 switch (mysql_tp) {
6658
6659 case MYSQL_TYPE_BIT:
6660 case MYSQL_TYPE_STRING:
6661 case MYSQL_TYPE_VAR_STRING:
6662 case MYSQL_TYPE_TINY_BLOB:
6663 case MYSQL_TYPE_MEDIUM_BLOB:
6664 case MYSQL_TYPE_BLOB:
6665 case MYSQL_TYPE_LONG_BLOB:
6666 case MYSQL_TYPE_VARCHAR:
6667 {
6668 uchar tmp_str[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'};
6669 uint tmp_length = REC_VERSION_56_MAX_INDEX_COL_LEN;
6670
6671 /* Use the charset number to pick the right charset struct for
6672 the comparison. Since the MySQL function get_charset may be
6673 slow before Bar removes the mutex operation there, we first
6674 look at 2 common charsets directly. */
6675
6676 if (charset_number == default_charset_info->number) {
6677 charset = default_charset_info;
6678 } else if (charset_number == my_charset_latin1.number) {
6679 charset = &my_charset_latin1;
6680 } else {
6681 charset = get_charset(charset_number, MYF(MY_WME));
6682
6683 if (charset == NULL) {
6684 sql_print_error("InnoDB needs charset %lu for doing "
6685 "a comparison, but MariaDB cannot "
6686 "find that charset.",
6687 (ulong) charset_number);
6688 ut_a(0);
6689 }
6690 }
6691
6692 ut_a(str_length <= tmp_length);
6693 memcpy(tmp_str, str, str_length);
6694
6695 tmp_length = charset->coll->strnxfrm(charset, str, str_length,
6696 str_length, tmp_str,
6697 tmp_length, 0);
6698 DBUG_ASSERT(tmp_length <= str_length);
6699 if (wsrep_protocol_version < 3) {
6700 tmp_length = charset->coll->strnxfrm(
6701 charset, str, str_length,
6702 str_length, tmp_str, tmp_length, 0);
6703 DBUG_ASSERT(tmp_length <= str_length);
6704 } else {
6705 /* strnxfrm will expand the destination string,
6706 protocols < 3 truncated the sorted sring
6707 protocols >= 3 gets full sorted sring
6708 */
6709 tmp_length = charset->coll->strnxfrm(
6710 charset, str, buf_length,
6711 str_length, tmp_str, str_length, 0);
6712 DBUG_ASSERT(tmp_length <= buf_length);
6713 ret_length = tmp_length;
6714 }
6715
6716 break;
6717 }
6718 case MYSQL_TYPE_DECIMAL :
6719 case MYSQL_TYPE_TINY :
6720 case MYSQL_TYPE_SHORT :
6721 case MYSQL_TYPE_LONG :
6722 case MYSQL_TYPE_FLOAT :
6723 case MYSQL_TYPE_DOUBLE :
6724 case MYSQL_TYPE_NULL :
6725 case MYSQL_TYPE_TIMESTAMP :
6726 case MYSQL_TYPE_LONGLONG :
6727 case MYSQL_TYPE_INT24 :
6728 case MYSQL_TYPE_DATE :
6729 case MYSQL_TYPE_TIME :
6730 case MYSQL_TYPE_DATETIME :
6731 case MYSQL_TYPE_YEAR :
6732 case MYSQL_TYPE_NEWDATE :
6733 case MYSQL_TYPE_NEWDECIMAL :
6734 case MYSQL_TYPE_ENUM :
6735 case MYSQL_TYPE_SET :
6736 case MYSQL_TYPE_GEOMETRY :
6737 break;
6738 default:
6739 break;
6740 }
6741
6742 return ret_length;
6743 }
6744 #endif /* WITH_WSREP */
6745
6746 /******************************************************************//**
6747 compare two character string according to their charset. */
6748 int
innobase_fts_text_cmp(const void * cs,const void * p1,const void * p2)6749 innobase_fts_text_cmp(
6750 /*==================*/
6751 const void* cs, /*!< in: Character set */
6752 const void* p1, /*!< in: key */
6753 const void* p2) /*!< in: node */
6754 {
6755 const CHARSET_INFO* charset = (const CHARSET_INFO*) cs;
6756 const fts_string_t* s1 = (const fts_string_t*) p1;
6757 const fts_string_t* s2 = (const fts_string_t*) p2;
6758
6759 return(ha_compare_text(
6760 charset, s1->f_str, static_cast<uint>(s1->f_len),
6761 s2->f_str, static_cast<uint>(s2->f_len), 0));
6762 }
6763
6764 /******************************************************************//**
6765 compare two character string case insensitively according to their charset. */
6766 int
innobase_fts_text_case_cmp(const void * cs,const void * p1,const void * p2)6767 innobase_fts_text_case_cmp(
6768 /*=======================*/
6769 const void* cs, /*!< in: Character set */
6770 const void* p1, /*!< in: key */
6771 const void* p2) /*!< in: node */
6772 {
6773 const CHARSET_INFO* charset = (const CHARSET_INFO*) cs;
6774 const fts_string_t* s1 = (const fts_string_t*) p1;
6775 const fts_string_t* s2 = (const fts_string_t*) p2;
6776 ulint newlen;
6777
6778 my_casedn_str(charset, (char*) s2->f_str);
6779
6780 newlen = strlen((const char*) s2->f_str);
6781
6782 return(ha_compare_text(
6783 charset, s1->f_str, static_cast<uint>(s1->f_len),
6784 s2->f_str, static_cast<uint>(newlen), 0));
6785 }
6786
6787 /******************************************************************//**
6788 Get the first character's code position for FTS index partition. */
6789 ulint
innobase_strnxfrm(const CHARSET_INFO * cs,const uchar * str,const ulint len)6790 innobase_strnxfrm(
6791 /*==============*/
6792 const CHARSET_INFO*
6793 cs, /*!< in: Character set */
6794 const uchar* str, /*!< in: string */
6795 const ulint len) /*!< in: string length */
6796 {
6797 uchar mystr[2];
6798 ulint value;
6799
6800 if (!str || len == 0) {
6801 return(0);
6802 }
6803
6804 my_strnxfrm(cs, (uchar*) mystr, 2, str, len);
6805
6806 value = mach_read_from_2(mystr);
6807
6808 if (value > 255) {
6809 value = value / 256;
6810 }
6811
6812 return(value);
6813 }
6814
6815 /******************************************************************//**
6816 compare two character string according to their charset. */
6817 int
innobase_fts_text_cmp_prefix(const void * cs,const void * p1,const void * p2)6818 innobase_fts_text_cmp_prefix(
6819 /*=========================*/
6820 const void* cs, /*!< in: Character set */
6821 const void* p1, /*!< in: prefix key */
6822 const void* p2) /*!< in: value to compare */
6823 {
6824 const CHARSET_INFO* charset = (const CHARSET_INFO*) cs;
6825 const fts_string_t* s1 = (const fts_string_t*) p1;
6826 const fts_string_t* s2 = (const fts_string_t*) p2;
6827 int result;
6828
6829 result = ha_compare_text(
6830 charset, s2->f_str, static_cast<uint>(s2->f_len),
6831 s1->f_str, static_cast<uint>(s1->f_len), 1);
6832
6833 /* We switched s1, s2 position in ha_compare_text. So we need
6834 to negate the result */
6835 return(-result);
6836 }
6837
6838 /******************************************************************//**
6839 Makes all characters in a string lower case. */
6840 size_t
innobase_fts_casedn_str(CHARSET_INFO * cs,char * src,size_t src_len,char * dst,size_t dst_len)6841 innobase_fts_casedn_str(
6842 /*====================*/
6843 CHARSET_INFO* cs, /*!< in: Character set */
6844 char* src, /*!< in: string to put in lower case */
6845 size_t src_len,/*!< in: input string length */
6846 char* dst, /*!< in: buffer for result string */
6847 size_t dst_len)/*!< in: buffer size */
6848 {
6849 if (cs->casedn_multiply == 1) {
6850 memcpy(dst, src, src_len);
6851 dst[src_len] = 0;
6852 my_casedn_str(cs, dst);
6853
6854 return(strlen(dst));
6855 } else {
6856 return(cs->cset->casedn(cs, src, src_len, dst, dst_len));
6857 }
6858 }
6859
6860 #define true_word_char(c, ch) ((c) & (_MY_U | _MY_L | _MY_NMR) || (ch) == '_')
6861
6862 #define misc_word_char(X) 0
6863
6864 /*************************************************************//**
6865 Get the next token from the given string and store it in *token.
6866 It is mostly copied from MyISAM's doc parsing function ft_simple_get_word()
6867 @return length of string processed */
6868 ulint
innobase_mysql_fts_get_token(CHARSET_INFO * cs,const byte * start,const byte * end,fts_string_t * token)6869 innobase_mysql_fts_get_token(
6870 /*=========================*/
6871 CHARSET_INFO* cs, /*!< in: Character set */
6872 const byte* start, /*!< in: start of text */
6873 const byte* end, /*!< in: one character past end of
6874 text */
6875 fts_string_t* token) /*!< out: token's text */
6876 {
6877 int mbl;
6878 const uchar* doc = start;
6879
6880 ut_a(cs);
6881
6882 token->f_n_char = token->f_len = 0;
6883 token->f_str = NULL;
6884
6885 for (;;) {
6886
6887 if (doc >= end) {
6888 return ulint(doc - start);
6889 }
6890
6891 int ctype;
6892
6893 mbl = cs->cset->ctype(
6894 cs, &ctype, doc, (const uchar*) end);
6895
6896 if (true_word_char(ctype, *doc)) {
6897 break;
6898 }
6899
6900 doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
6901 }
6902
6903 ulint mwc = 0;
6904 ulint length = 0;
6905
6906 token->f_str = const_cast<byte*>(doc);
6907
6908 while (doc < end) {
6909
6910 int ctype;
6911
6912 mbl = cs->cset->ctype(
6913 cs, &ctype, (uchar*) doc, (uchar*) end);
6914 if (true_word_char(ctype, *doc)) {
6915 mwc = 0;
6916 } else if (!misc_word_char(*doc) || mwc) {
6917 break;
6918 } else {
6919 ++mwc;
6920 }
6921
6922 ++length;
6923
6924 doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
6925 }
6926
6927 token->f_len = (uint) (doc - token->f_str) - mwc;
6928 token->f_n_char = length;
6929
6930 return ulint(doc - start);
6931 }
6932
6933 /** Converts a MySQL type to an InnoDB type. Note that this function returns
6934 the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
6935 VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
6936 @param[out] unsigned_flag DATA_UNSIGNED if an 'unsigned type'; at least
6937 ENUM and SET, and unsigned integer types are 'unsigned types'
6938 @param[in] f MySQL Field
6939 @return DATA_BINARY, DATA_VARCHAR, ... */
6940 ulint
get_innobase_type_from_mysql_type(ulint * unsigned_flag,const void * f)6941 get_innobase_type_from_mysql_type(
6942 ulint* unsigned_flag,
6943 const void* f)
6944 {
6945 const class Field* field = reinterpret_cast<const class Field*>(f);
6946
6947 /* The following asserts try to check that the MySQL type code fits in
6948 8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
6949 the type */
6950
6951 DBUG_ASSERT((ulint)MYSQL_TYPE_STRING < 256);
6952 DBUG_ASSERT((ulint)MYSQL_TYPE_VAR_STRING < 256);
6953 DBUG_ASSERT((ulint)MYSQL_TYPE_DOUBLE < 256);
6954 DBUG_ASSERT((ulint)MYSQL_TYPE_FLOAT < 256);
6955 DBUG_ASSERT((ulint)MYSQL_TYPE_DECIMAL < 256);
6956
6957 if (field->flags & UNSIGNED_FLAG) {
6958
6959 *unsigned_flag = DATA_UNSIGNED;
6960 } else {
6961 *unsigned_flag = 0;
6962 }
6963
6964 if (field->real_type() == MYSQL_TYPE_ENUM
6965 || field->real_type() == MYSQL_TYPE_SET) {
6966
6967 /* MySQL has field->type() a string type for these, but the
6968 data is actually internally stored as an unsigned integer
6969 code! */
6970
6971 *unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
6972 flag set to zero, even though
6973 internally this is an unsigned
6974 integer type */
6975 return(DATA_INT);
6976 }
6977
6978 switch (field->type()) {
6979 /* NOTE that we only allow string types in DATA_MYSQL and
6980 DATA_VARMYSQL */
6981 case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */
6982 case MYSQL_TYPE_VARCHAR: /* new >= 5.0.3 true VARCHAR */
6983 if (field->binary()) {
6984 return(DATA_BINARY);
6985 } else if (field->charset() == &my_charset_latin1) {
6986 return(DATA_VARCHAR);
6987 } else {
6988 return(DATA_VARMYSQL);
6989 }
6990 case MYSQL_TYPE_BIT:
6991 case MYSQL_TYPE_STRING:
6992 if (field->binary()) {
6993 return(DATA_FIXBINARY);
6994 } else if (field->charset() == &my_charset_latin1) {
6995 return(DATA_CHAR);
6996 } else {
6997 return(DATA_MYSQL);
6998 }
6999 case MYSQL_TYPE_NEWDECIMAL:
7000 return(DATA_FIXBINARY);
7001 case MYSQL_TYPE_LONG:
7002 case MYSQL_TYPE_LONGLONG:
7003 case MYSQL_TYPE_TINY:
7004 case MYSQL_TYPE_SHORT:
7005 case MYSQL_TYPE_INT24:
7006 case MYSQL_TYPE_DATE:
7007 case MYSQL_TYPE_YEAR:
7008 case MYSQL_TYPE_NEWDATE:
7009 return(DATA_INT);
7010 case MYSQL_TYPE_TIME:
7011 case MYSQL_TYPE_DATETIME:
7012 case MYSQL_TYPE_TIMESTAMP:
7013 if (field->key_type() == HA_KEYTYPE_BINARY) {
7014 return(DATA_FIXBINARY);
7015 } else {
7016 return(DATA_INT);
7017 }
7018 case MYSQL_TYPE_FLOAT:
7019 return(DATA_FLOAT);
7020 case MYSQL_TYPE_DOUBLE:
7021 return(DATA_DOUBLE);
7022 case MYSQL_TYPE_DECIMAL:
7023 return(DATA_DECIMAL);
7024 case MYSQL_TYPE_GEOMETRY:
7025 return(DATA_GEOMETRY);
7026 case MYSQL_TYPE_TINY_BLOB:
7027 case MYSQL_TYPE_MEDIUM_BLOB:
7028 case MYSQL_TYPE_BLOB:
7029 case MYSQL_TYPE_LONG_BLOB:
7030 return(DATA_BLOB);
7031 case MYSQL_TYPE_NULL:
7032 /* MySQL currently accepts "NULL" datatype, but will
7033 reject such datatype in the next release. We will cope
7034 with it and not trigger assertion failure in 5.1 */
7035 break;
7036 default:
7037 ut_error;
7038 }
7039
7040 return(0);
7041 }
7042
7043 /*******************************************************************//**
7044 Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
7045 storage format.
7046 @return value */
7047 static inline
7048 uint
innobase_read_from_2_little_endian(const uchar * buf)7049 innobase_read_from_2_little_endian(
7050 /*===============================*/
7051 const uchar* buf) /*!< in: from where to read */
7052 {
7053 return((uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1]))));
7054 }
7055
7056 #ifdef WITH_WSREP
7057 /*******************************************************************//**
7058 Stores a key value for a row to a buffer.
7059 @return key value length as stored in buff */
7060 UNIV_INTERN
7061 uint
wsrep_store_key_val_for_row(THD * thd,TABLE * table,uint keynr,char * buff,uint buff_len,const uchar * record,ibool * key_is_null)7062 wsrep_store_key_val_for_row(
7063 /*=========================*/
7064 THD* thd,
7065 TABLE* table,
7066 uint keynr, /*!< in: key number */
7067 char* buff, /*!< in/out: buffer for the key value (in MySQL
7068 format) */
7069 uint buff_len,/*!< in: buffer length */
7070 const uchar* record,
7071 ibool* key_is_null)/*!< out: full key was null */
7072 {
7073 KEY* key_info = table->key_info + keynr;
7074 KEY_PART_INFO* key_part = key_info->key_part;
7075 KEY_PART_INFO* end = key_part + key_info->user_defined_key_parts;
7076 char* buff_start = buff;
7077 enum_field_types mysql_type;
7078 Field* field;
7079 uint buff_space = buff_len;
7080
7081 DBUG_ENTER("wsrep_store_key_val_for_row");
7082
7083 memset(buff, 0, buff_len);
7084 *key_is_null = TRUE;
7085
7086 for (; key_part != end; key_part++) {
7087
7088 uchar sorted[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'};
7089 ibool part_is_null = FALSE;
7090
7091 if (key_part->null_bit) {
7092 if (buff_space > 0) {
7093 if (record[key_part->null_offset]
7094 & key_part->null_bit) {
7095 *buff = 1;
7096 part_is_null = TRUE;
7097 } else {
7098 *buff = 0;
7099 }
7100 buff++;
7101 buff_space--;
7102 } else {
7103 fprintf (stderr, "WSREP: key truncated: %s\n",
7104 wsrep_thd_query(thd));
7105 }
7106 }
7107 if (!part_is_null) *key_is_null = FALSE;
7108
7109 field = key_part->field;
7110 mysql_type = field->type();
7111
7112 if (mysql_type == MYSQL_TYPE_VARCHAR) {
7113 /* >= 5.0.3 true VARCHAR */
7114 ulint lenlen;
7115 ulint len;
7116 const byte* data;
7117 ulint key_len;
7118 ulint true_len;
7119 const CHARSET_INFO* cs;
7120 int error=0;
7121
7122 key_len = key_part->length;
7123
7124 if (part_is_null) {
7125 true_len = key_len + 2;
7126 if (true_len > buff_space) {
7127 fprintf (stderr,
7128 "WSREP: key truncated: %s\n",
7129 wsrep_thd_query(thd));
7130 true_len = buff_space;
7131 }
7132 buff += true_len;
7133 buff_space -= true_len;
7134 continue;
7135 }
7136 cs = field->charset();
7137
7138 lenlen = (ulint)
7139 (((Field_varstring*)field)->length_bytes);
7140
7141 data = row_mysql_read_true_varchar(&len,
7142 (byte*) (record
7143 + (ulint)get_field_offset(table, field)),
7144 lenlen);
7145
7146 true_len = len;
7147
7148 /* For multi byte character sets we need to calculate
7149 the true length of the key */
7150
7151 if (len > 0 && cs->mbmaxlen > 1) {
7152 true_len = (ulint) my_well_formed_length(cs,
7153 (const char *) data,
7154 (const char *) data + len,
7155 (uint) (key_len /
7156 cs->mbmaxlen),
7157 &error);
7158 }
7159
7160 /* In a column prefix index, we may need to truncate
7161 the stored value: */
7162 if (true_len > key_len) {
7163 true_len = key_len;
7164 }
7165 /* cannot exceed max column lenght either, we may need to truncate
7166 the stored value: */
7167 if (true_len > sizeof(sorted)) {
7168 true_len = sizeof(sorted);
7169 }
7170
7171 memcpy(sorted, data, true_len);
7172 true_len = wsrep_innobase_mysql_sort(
7173 mysql_type, cs->number, sorted, true_len,
7174 REC_VERSION_56_MAX_INDEX_COL_LEN);
7175 if (wsrep_protocol_version > 1) {
7176 /* Note that we always reserve the maximum possible
7177 length of the true VARCHAR in the key value, though
7178 only len first bytes after the 2 length bytes contain
7179 actual data. The rest of the space was reset to zero
7180 in the bzero() call above. */
7181 if (true_len > buff_space) {
7182 WSREP_DEBUG (
7183 "write set key truncated for: %s\n",
7184 wsrep_thd_query(thd));
7185 true_len = buff_space;
7186 }
7187 memcpy(buff, sorted, true_len);
7188 buff += true_len;
7189 buff_space -= true_len;
7190 } else {
7191 buff += key_len;
7192 }
7193 } else if (mysql_type == MYSQL_TYPE_TINY_BLOB
7194 || mysql_type == MYSQL_TYPE_MEDIUM_BLOB
7195 || mysql_type == MYSQL_TYPE_BLOB
7196 || mysql_type == MYSQL_TYPE_LONG_BLOB
7197 /* MYSQL_TYPE_GEOMETRY data is treated
7198 as BLOB data in innodb. */
7199 || mysql_type == MYSQL_TYPE_GEOMETRY) {
7200
7201 const CHARSET_INFO* cs;
7202 ulint key_len;
7203 ulint true_len;
7204 int error=0;
7205 ulint blob_len;
7206 const byte* blob_data;
7207
7208 ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
7209
7210 key_len = key_part->length;
7211
7212 if (part_is_null) {
7213 true_len = key_len + 2;
7214 if (true_len > buff_space) {
7215 fprintf (stderr,
7216 "WSREP: key truncated: %s\n",
7217 wsrep_thd_query(thd));
7218 true_len = buff_space;
7219 }
7220 buff += true_len;
7221 buff_space -= true_len;
7222
7223 continue;
7224 }
7225
7226 cs = field->charset();
7227
7228 blob_data = row_mysql_read_blob_ref(&blob_len,
7229 (byte*) (record
7230 + (ulint)get_field_offset(table, field)),
7231 (ulint) field->pack_length());
7232
7233 true_len = blob_len;
7234
7235 ut_a(get_field_offset(table, field)
7236 == key_part->offset);
7237
7238 /* For multi byte character sets we need to calculate
7239 the true length of the key */
7240
7241 if (blob_len > 0 && cs->mbmaxlen > 1) {
7242 true_len = (ulint) my_well_formed_length(cs,
7243 (const char *) blob_data,
7244 (const char *) blob_data
7245 + blob_len,
7246 (uint) (key_len /
7247 cs->mbmaxlen),
7248 &error);
7249 }
7250
7251 /* All indexes on BLOB and TEXT are column prefix
7252 indexes, and we may need to truncate the data to be
7253 stored in the key value: */
7254
7255 if (true_len > key_len) {
7256 true_len = key_len;
7257 }
7258
7259 memcpy(sorted, blob_data, true_len);
7260 true_len = wsrep_innobase_mysql_sort(
7261 mysql_type, cs->number, sorted, true_len,
7262 REC_VERSION_56_MAX_INDEX_COL_LEN);
7263
7264
7265 /* Note that we always reserve the maximum possible
7266 length of the BLOB prefix in the key value. */
7267 if (wsrep_protocol_version > 1) {
7268 if (true_len > buff_space) {
7269 fprintf (stderr,
7270 "WSREP: key truncated: %s\n",
7271 wsrep_thd_query(thd));
7272 true_len = buff_space;
7273 }
7274 buff += true_len;
7275 buff_space -= true_len;
7276 } else {
7277 buff += key_len;
7278 }
7279 memcpy(buff, sorted, true_len);
7280 } else {
7281 /* Here we handle all other data types except the
7282 true VARCHAR, BLOB and TEXT. Note that the column
7283 value we store may be also in a column prefix
7284 index. */
7285
7286 const CHARSET_INFO* cs = NULL;
7287 ulint true_len;
7288 ulint key_len;
7289 const uchar* src_start;
7290 int error=0;
7291 enum_field_types real_type;
7292
7293 key_len = key_part->length;
7294
7295 if (part_is_null) {
7296 true_len = key_len;
7297 if (true_len > buff_space) {
7298 fprintf (stderr,
7299 "WSREP: key truncated: %s\n",
7300 wsrep_thd_query(thd));
7301 true_len = buff_space;
7302 }
7303 buff += true_len;
7304 buff_space -= true_len;
7305
7306 continue;
7307 }
7308
7309 src_start = record + key_part->offset;
7310 real_type = field->real_type();
7311 true_len = key_len;
7312
7313 /* Character set for the field is defined only
7314 to fields whose type is string and real field
7315 type is not enum or set. For these fields check
7316 if character set is multi byte. */
7317
7318 if (real_type != MYSQL_TYPE_ENUM
7319 && real_type != MYSQL_TYPE_SET
7320 && ( mysql_type == MYSQL_TYPE_VAR_STRING
7321 || mysql_type == MYSQL_TYPE_STRING)) {
7322
7323 cs = field->charset();
7324
7325 /* For multi byte character sets we need to
7326 calculate the true length of the key */
7327
7328 if (key_len > 0 && cs->mbmaxlen > 1) {
7329
7330 true_len = (ulint)
7331 my_well_formed_length(cs,
7332 (const char *)src_start,
7333 (const char *)src_start
7334 + key_len,
7335 (uint) (key_len /
7336 cs->mbmaxlen),
7337 &error);
7338 }
7339 memcpy(sorted, src_start, true_len);
7340 true_len = wsrep_innobase_mysql_sort(
7341 mysql_type, cs->number, sorted, true_len,
7342 REC_VERSION_56_MAX_INDEX_COL_LEN);
7343
7344 if (true_len > buff_space) {
7345 fprintf (stderr,
7346 "WSREP: key truncated: %s\n",
7347 wsrep_thd_query(thd));
7348 true_len = buff_space;
7349 }
7350 memcpy(buff, sorted, true_len);
7351 } else {
7352 memcpy(buff, src_start, true_len);
7353 }
7354 buff += true_len;
7355 buff_space -= true_len;
7356 }
7357 }
7358
7359 ut_a(buff <= buff_start + buff_len);
7360
7361 DBUG_RETURN((uint)(buff - buff_start));
7362 }
7363 #endif /* WITH_WSREP */
7364 /**************************************************************//**
7365 Determines if a field is needed in a m_prebuilt struct 'template'.
7366 @return field to use, or NULL if the field is not needed */
7367 static
7368 const Field*
build_template_needs_field(ibool index_contains,ibool read_just_key,ibool fetch_all_in_key,ibool fetch_primary_key_cols,dict_index_t * index,const TABLE * table,ulint i,ulint num_v)7369 build_template_needs_field(
7370 /*=======================*/
7371 ibool index_contains, /*!< in:
7372 dict_index_contains_col_or_prefix(
7373 index, i) */
7374 ibool read_just_key, /*!< in: TRUE when MySQL calls
7375 ha_innobase::extra with the
7376 argument HA_EXTRA_KEYREAD; it is enough
7377 to read just columns defined in
7378 the index (i.e., no read of the
7379 clustered index record necessary) */
7380 ibool fetch_all_in_key,
7381 /*!< in: true=fetch all fields in
7382 the index */
7383 ibool fetch_primary_key_cols,
7384 /*!< in: true=fetch the
7385 primary key columns */
7386 dict_index_t* index, /*!< in: InnoDB index to use */
7387 const TABLE* table, /*!< in: MySQL table object */
7388 ulint i, /*!< in: field index in InnoDB table */
7389 ulint num_v) /*!< in: num virtual column so far */
7390 {
7391 const Field* field = table->field[i];
7392
7393 if (!field->stored_in_db()
7394 && ha_innobase::omits_virtual_cols(*table->s)) {
7395 return NULL;
7396 }
7397
7398 if (!index_contains) {
7399 if (read_just_key) {
7400 /* If this is a 'key read', we do not need
7401 columns that are not in the key */
7402
7403 return(NULL);
7404 }
7405 } else if (fetch_all_in_key) {
7406 /* This field is needed in the query */
7407
7408 return(field);
7409 }
7410
7411 if (bitmap_is_set(table->read_set, static_cast<uint>(i))
7412 || bitmap_is_set(table->write_set, static_cast<uint>(i))) {
7413 /* This field is needed in the query */
7414
7415 return(field);
7416 }
7417
7418 ut_ad(i >= num_v);
7419 if (fetch_primary_key_cols
7420 && dict_table_col_in_clustered_key(index->table, i - num_v)) {
7421 /* This field is needed in the query */
7422 return(field);
7423 }
7424
7425 /* This field is not needed in the query, skip it */
7426
7427 return(NULL);
7428 }
7429
7430 /**************************************************************//**
7431 Determines if a field is needed in a m_prebuilt struct 'template'.
7432 @return whether the field is needed for index condition pushdown */
7433 inline
7434 bool
build_template_needs_field_in_icp(const dict_index_t * index,const row_prebuilt_t * prebuilt,bool contains,ulint i,bool is_virtual)7435 build_template_needs_field_in_icp(
7436 /*==============================*/
7437 const dict_index_t* index, /*!< in: InnoDB index */
7438 const row_prebuilt_t* prebuilt,/*!< in: row fetch template */
7439 bool contains,/*!< in: whether the index contains
7440 column i */
7441 ulint i, /*!< in: column number */
7442 bool is_virtual)
7443 /*!< in: a virtual column or not */
7444 {
7445 ut_ad(contains == dict_index_contains_col_or_prefix(index, i, is_virtual));
7446
7447 return(index == prebuilt->index
7448 ? contains
7449 : dict_index_contains_col_or_prefix(prebuilt->index, i, is_virtual));
7450 }
7451
7452 /**************************************************************//**
7453 Adds a field to a m_prebuilt struct 'template'.
7454 @return the field template */
7455 static
7456 mysql_row_templ_t*
build_template_field(row_prebuilt_t * prebuilt,dict_index_t * clust_index,dict_index_t * index,TABLE * table,const Field * field,ulint i,ulint v_no)7457 build_template_field(
7458 /*=================*/
7459 row_prebuilt_t* prebuilt, /*!< in/out: template */
7460 dict_index_t* clust_index, /*!< in: InnoDB clustered index */
7461 dict_index_t* index, /*!< in: InnoDB index to use */
7462 TABLE* table, /*!< in: MySQL table object */
7463 const Field* field, /*!< in: field in MySQL table */
7464 ulint i, /*!< in: field index in InnoDB table */
7465 ulint v_no) /*!< in: field index for virtual col */
7466 {
7467 mysql_row_templ_t* templ;
7468 const dict_col_t* col;
7469
7470 ut_ad(clust_index->table == index->table);
7471
7472 templ = prebuilt->mysql_template + prebuilt->n_template++;
7473 MEM_UNDEFINED(templ, sizeof *templ);
7474 templ->rec_field_is_prefix = FALSE;
7475 templ->rec_prefix_field_no = ULINT_UNDEFINED;
7476 templ->is_virtual = !field->stored_in_db();
7477
7478 if (!templ->is_virtual) {
7479 templ->col_no = i;
7480 col = dict_table_get_nth_col(index->table, i);
7481 templ->clust_rec_field_no = dict_col_get_clust_pos(
7482 col, clust_index);
7483 /* If clustered index record field is not found, lets print out
7484 field names and all the rest to understand why field is not found. */
7485 if (templ->clust_rec_field_no == ULINT_UNDEFINED) {
7486 const char* tb_col_name = dict_table_get_col_name(clust_index->table, i);
7487 dict_field_t* field=NULL;
7488 size_t size = 0;
7489
7490 for(ulint j=0; j < clust_index->n_user_defined_cols; j++) {
7491 dict_field_t* ifield = &(clust_index->fields[j]);
7492 if (ifield && !memcmp(tb_col_name, ifield->name,
7493 strlen(tb_col_name))) {
7494 field = ifield;
7495 break;
7496 }
7497 }
7498
7499 ib::info() << "Looking for field " << i << " name "
7500 << (tb_col_name ? tb_col_name : "NULL")
7501 << " from table " << clust_index->table->name;
7502
7503
7504 for(ulint j=0; j < clust_index->n_user_defined_cols; j++) {
7505 dict_field_t* ifield = &(clust_index->fields[j]);
7506 ib::info() << "InnoDB Table "
7507 << clust_index->table->name
7508 << "field " << j << " name "
7509 << (ifield ? ifield->name() : "NULL");
7510 }
7511
7512 for(ulint j=0; j < table->s->stored_fields; j++) {
7513 ib::info() << "MySQL table "
7514 << table->s->table_name.str
7515 << " field " << j << " name "
7516 << table->field[j]->field_name.str;
7517 }
7518
7519 ib::fatal() << "Clustered record field for column " << i
7520 << " not found table n_user_defined "
7521 << clust_index->n_user_defined_cols
7522 << " index n_user_defined "
7523 << clust_index->table->n_cols - DATA_N_SYS_COLS
7524 << " InnoDB table "
7525 << clust_index->table->name
7526 << " field name "
7527 << (field ? field->name() : "NULL")
7528 << " MySQL table "
7529 << table->s->table_name.str
7530 << " field name "
7531 << (tb_col_name ? tb_col_name : "NULL")
7532 << " n_fields "
7533 << table->s->stored_fields
7534 << " query "
7535 << innobase_get_stmt_unsafe(current_thd, &size);
7536 }
7537
7538 if (dict_index_is_clust(index)) {
7539 templ->rec_field_no = templ->clust_rec_field_no;
7540 } else {
7541 /* If we're in a secondary index, keep track
7542 * of the original index position even if this
7543 * is just a prefix index; we will use this
7544 * later to avoid a cluster index lookup in
7545 * some cases.*/
7546
7547 templ->rec_field_no = dict_index_get_nth_col_pos(index, i,
7548 &templ->rec_prefix_field_no);
7549 }
7550 } else {
7551 DBUG_ASSERT(!ha_innobase::omits_virtual_cols(*table->s));
7552 col = &dict_table_get_nth_v_col(index->table, v_no)->m_col;
7553 templ->clust_rec_field_no = v_no;
7554
7555 if (dict_index_is_clust(index)) {
7556 templ->rec_field_no = templ->clust_rec_field_no;
7557 } else {
7558 templ->rec_field_no
7559 = dict_index_get_nth_col_or_prefix_pos(
7560 index, v_no, FALSE, true,
7561 &templ->rec_prefix_field_no);
7562 }
7563 templ->icp_rec_field_no = ULINT_UNDEFINED;
7564 }
7565
7566 if (field->real_maybe_null()) {
7567 templ->mysql_null_byte_offset =
7568 field->null_offset();
7569
7570 templ->mysql_null_bit_mask = (ulint) field->null_bit;
7571 } else {
7572 templ->mysql_null_bit_mask = 0;
7573 }
7574
7575
7576 templ->mysql_col_offset = (ulint) get_field_offset(table, field);
7577 templ->mysql_col_len = (ulint) field->pack_length();
7578 templ->type = col->mtype;
7579 templ->mysql_type = (ulint) field->type();
7580
7581 if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
7582 templ->mysql_length_bytes = (ulint)
7583 (((Field_varstring*) field)->length_bytes);
7584 } else {
7585 templ->mysql_length_bytes = 0;
7586 }
7587
7588 templ->charset = dtype_get_charset_coll(col->prtype);
7589 templ->mbminlen = dict_col_get_mbminlen(col);
7590 templ->mbmaxlen = dict_col_get_mbmaxlen(col);
7591 templ->is_unsigned = col->prtype & DATA_UNSIGNED;
7592
7593 if (!dict_index_is_clust(index)
7594 && templ->rec_field_no == ULINT_UNDEFINED) {
7595 prebuilt->need_to_access_clustered = TRUE;
7596
7597 if (templ->rec_prefix_field_no != ULINT_UNDEFINED) {
7598 dict_field_t* field = dict_index_get_nth_field(
7599 index,
7600 templ->rec_prefix_field_no);
7601 templ->rec_field_is_prefix = (field->prefix_len != 0);
7602 }
7603 }
7604
7605 /* For spatial index, we need to access cluster index. */
7606 if (dict_index_is_spatial(index)) {
7607 prebuilt->need_to_access_clustered = TRUE;
7608 }
7609
7610 if (prebuilt->mysql_prefix_len < templ->mysql_col_offset
7611 + templ->mysql_col_len) {
7612 prebuilt->mysql_prefix_len = templ->mysql_col_offset
7613 + templ->mysql_col_len;
7614 }
7615
7616 if (DATA_LARGE_MTYPE(templ->type)) {
7617 prebuilt->templ_contains_blob = TRUE;
7618 }
7619
7620 return(templ);
7621 }
7622
7623 /**************************************************************//**
7624 Builds a 'template' to the m_prebuilt struct. The template is used in fast
7625 retrieval of just those column values MySQL needs in its processing. */
7626
7627 void
build_template(bool whole_row)7628 ha_innobase::build_template(
7629 /*========================*/
7630 bool whole_row) /*!< in: true=ROW_MYSQL_WHOLE_ROW,
7631 false=ROW_MYSQL_REC_FIELDS */
7632 {
7633 dict_index_t* index;
7634 dict_index_t* clust_index;
7635 ibool fetch_all_in_key = FALSE;
7636 ibool fetch_primary_key_cols = FALSE;
7637
7638 if (m_prebuilt->select_lock_type == LOCK_X || m_prebuilt->table->no_rollback()) {
7639 /* We always retrieve the whole clustered index record if we
7640 use exclusive row level locks, for example, if the read is
7641 done in an UPDATE statement or if we are using a no rollback
7642 table */
7643
7644 whole_row = true;
7645 } else if (!whole_row) {
7646 if (m_prebuilt->hint_need_to_fetch_extra_cols
7647 == ROW_RETRIEVE_ALL_COLS) {
7648
7649 /* We know we must at least fetch all columns in the
7650 key, or all columns in the table */
7651
7652 if (m_prebuilt->read_just_key) {
7653 /* MySQL has instructed us that it is enough
7654 to fetch the columns in the key; looks like
7655 MySQL can set this flag also when there is
7656 only a prefix of the column in the key: in
7657 that case we retrieve the whole column from
7658 the clustered index */
7659
7660 fetch_all_in_key = TRUE;
7661 } else {
7662 whole_row = true;
7663 }
7664 } else if (m_prebuilt->hint_need_to_fetch_extra_cols
7665 == ROW_RETRIEVE_PRIMARY_KEY) {
7666 /* We must at least fetch all primary key cols. Note
7667 that if the clustered index was internally generated
7668 by InnoDB on the row id (no primary key was
7669 defined), then row_search_for_mysql() will always
7670 retrieve the row id to a special buffer in the
7671 m_prebuilt struct. */
7672
7673 fetch_primary_key_cols = TRUE;
7674 }
7675 }
7676
7677 clust_index = dict_table_get_first_index(m_prebuilt->table);
7678
7679 index = whole_row ? clust_index : m_prebuilt->index;
7680
7681 m_prebuilt->versioned_write = table->versioned_write(VERS_TRX_ID);
7682 m_prebuilt->need_to_access_clustered = (index == clust_index);
7683
7684 /* Either m_prebuilt->index should be a secondary index, or it
7685 should be the clustered index. */
7686 ut_ad(dict_index_is_clust(index) == (index == clust_index));
7687
7688 /* Below we check column by column if we need to access
7689 the clustered index. */
7690
7691 const bool skip_virtual = omits_virtual_cols(*table_share);
7692 const ulint n_fields = table_share->fields;
7693
7694 if (!m_prebuilt->mysql_template) {
7695 m_prebuilt->mysql_template = (mysql_row_templ_t*)
7696 ut_malloc_nokey(n_fields * sizeof(mysql_row_templ_t));
7697 }
7698
7699 m_prebuilt->template_type = whole_row
7700 ? ROW_MYSQL_WHOLE_ROW : ROW_MYSQL_REC_FIELDS;
7701 m_prebuilt->null_bitmap_len = table->s->null_bytes;
7702
7703 /* Prepare to build m_prebuilt->mysql_template[]. */
7704 m_prebuilt->templ_contains_blob = FALSE;
7705 m_prebuilt->mysql_prefix_len = 0;
7706 m_prebuilt->n_template = 0;
7707 m_prebuilt->idx_cond_n_cols = 0;
7708
7709 /* Note that in InnoDB, i is the column number in the table.
7710 MySQL calls columns 'fields'. */
7711
7712 ulint num_v = 0;
7713
7714 if (active_index != MAX_KEY
7715 && active_index == pushed_idx_cond_keyno) {
7716 /* Push down an index condition or an end_range check. */
7717 for (ulint i = 0; i < n_fields; i++) {
7718 const Field* field = table->field[i];
7719 const bool is_v = !field->stored_in_db();
7720 if (is_v && skip_virtual) {
7721 num_v++;
7722 continue;
7723 }
7724 ibool index_contains
7725 = dict_index_contains_col_or_prefix(
7726 index, is_v ? num_v : i - num_v, is_v);
7727 if (is_v && index_contains) {
7728 m_prebuilt->n_template = 0;
7729 num_v = 0;
7730 goto no_icp;
7731 }
7732
7733 /* Test if an end_range or an index condition
7734 refers to the field. Note that "index" and
7735 "index_contains" may refer to the clustered index.
7736 Index condition pushdown is relative to
7737 m_prebuilt->index (the index that is being
7738 looked up first). */
7739
7740 /* When join_read_always_key() invokes this
7741 code via handler::ha_index_init() and
7742 ha_innobase::index_init(), end_range is not
7743 yet initialized. Because of that, we must
7744 always check for index_contains, instead of
7745 the subset
7746 field->part_of_key.is_set(active_index)
7747 which would be acceptable if end_range==NULL. */
7748 if (build_template_needs_field_in_icp(
7749 index, m_prebuilt, index_contains,
7750 is_v ? num_v : i - num_v, is_v)) {
7751 if (!whole_row) {
7752 field = build_template_needs_field(
7753 index_contains,
7754 m_prebuilt->read_just_key,
7755 fetch_all_in_key,
7756 fetch_primary_key_cols,
7757 index, table, i, num_v);
7758 if (!field) {
7759 if (is_v) {
7760 num_v++;
7761 }
7762 continue;
7763 }
7764 }
7765
7766 ut_ad(!is_v);
7767
7768 mysql_row_templ_t* templ= build_template_field(
7769 m_prebuilt, clust_index, index,
7770 table, field, i - num_v, 0);
7771
7772 ut_ad(!templ->is_virtual);
7773
7774 m_prebuilt->idx_cond_n_cols++;
7775 ut_ad(m_prebuilt->idx_cond_n_cols
7776 == m_prebuilt->n_template);
7777
7778 if (index == m_prebuilt->index) {
7779 templ->icp_rec_field_no
7780 = templ->rec_field_no;
7781 } else {
7782 templ->icp_rec_field_no
7783 = dict_index_get_nth_col_pos(
7784 m_prebuilt->index,
7785 i - num_v,
7786 &templ->rec_prefix_field_no);
7787 }
7788
7789 if (dict_index_is_clust(m_prebuilt->index)) {
7790 ut_ad(templ->icp_rec_field_no
7791 != ULINT_UNDEFINED);
7792 /* If the primary key includes
7793 a column prefix, use it in
7794 index condition pushdown,
7795 because the condition is
7796 evaluated before fetching any
7797 off-page (externally stored)
7798 columns. */
7799 if (templ->icp_rec_field_no
7800 < m_prebuilt->index->n_uniq) {
7801 /* This is a key column;
7802 all set. */
7803 continue;
7804 }
7805 } else if (templ->icp_rec_field_no
7806 != ULINT_UNDEFINED) {
7807 continue;
7808 }
7809
7810 /* This is a column prefix index.
7811 The column prefix can be used in
7812 an end_range comparison. */
7813
7814 templ->icp_rec_field_no
7815 = dict_index_get_nth_col_or_prefix_pos(
7816 m_prebuilt->index, i - num_v,
7817 true, false,
7818 &templ->rec_prefix_field_no);
7819 ut_ad(templ->icp_rec_field_no
7820 != ULINT_UNDEFINED);
7821
7822 /* Index condition pushdown can be used on
7823 all columns of a secondary index, and on
7824 the PRIMARY KEY columns. On the clustered
7825 index, it must never be used on other than
7826 PRIMARY KEY columns, because those columns
7827 may be stored off-page, and we will not
7828 fetch externally stored columns before
7829 checking the index condition. */
7830 /* TODO: test the above with an assertion
7831 like this. Note that index conditions are
7832 currently pushed down as part of the
7833 "optimizer phase" while end_range is done
7834 as part of the execution phase. Therefore,
7835 we were unable to use an accurate condition
7836 for end_range in the "if" condition above,
7837 and the following assertion would fail.
7838 ut_ad(!dict_index_is_clust(m_prebuilt->index)
7839 || templ->rec_field_no
7840 < m_prebuilt->index->n_uniq);
7841 */
7842 }
7843
7844 if (is_v) {
7845 num_v++;
7846 }
7847 }
7848
7849 ut_ad(m_prebuilt->idx_cond_n_cols > 0);
7850 ut_ad(m_prebuilt->idx_cond_n_cols == m_prebuilt->n_template);
7851
7852 num_v = 0;
7853
7854 /* Include the fields that are not needed in index condition
7855 pushdown. */
7856 for (ulint i = 0; i < n_fields; i++) {
7857 const Field* field = table->field[i];
7858 const bool is_v = !field->stored_in_db();
7859 if (is_v && skip_virtual) {
7860 num_v++;
7861 continue;
7862 }
7863
7864 ibool index_contains
7865 = dict_index_contains_col_or_prefix(
7866 index, is_v ? num_v : i - num_v, is_v);
7867
7868 if (!build_template_needs_field_in_icp(
7869 index, m_prebuilt, index_contains,
7870 is_v ? num_v : i - num_v, is_v)) {
7871 /* Not needed in ICP */
7872 if (!whole_row) {
7873 field = build_template_needs_field(
7874 index_contains,
7875 m_prebuilt->read_just_key,
7876 fetch_all_in_key,
7877 fetch_primary_key_cols,
7878 index, table, i, num_v);
7879 if (!field) {
7880 if (is_v) {
7881 num_v++;
7882 }
7883 continue;
7884 }
7885 }
7886
7887 ut_d(mysql_row_templ_t* templ =)
7888 build_template_field(
7889 m_prebuilt, clust_index, index,
7890 table, field, i - num_v, num_v);
7891 ut_ad(templ->is_virtual == (ulint)is_v);
7892
7893 if (is_v) {
7894 num_v++;
7895 }
7896 }
7897 }
7898
7899 m_prebuilt->idx_cond = this;
7900 } else {
7901 no_icp:
7902 /* No index condition pushdown */
7903 m_prebuilt->idx_cond = NULL;
7904 ut_ad(num_v == 0);
7905
7906 for (ulint i = 0; i < n_fields; i++) {
7907 const Field* field = table->field[i];
7908 const bool is_v = !field->stored_in_db();
7909
7910 if (whole_row) {
7911 if (is_v && skip_virtual) {
7912 num_v++;
7913 continue;
7914 }
7915 /* Even this is whole_row, if the seach is
7916 on a virtual column, and read_just_key is
7917 set, and field is not in this index, we
7918 will not try to fill the value since they
7919 are not stored in such index nor in the
7920 cluster index. */
7921 if (is_v
7922 && m_prebuilt->read_just_key
7923 && !dict_index_contains_col_or_prefix(
7924 m_prebuilt->index, num_v, true))
7925 {
7926 /* Turn off ROW_MYSQL_WHOLE_ROW */
7927 m_prebuilt->template_type =
7928 ROW_MYSQL_REC_FIELDS;
7929 num_v++;
7930 continue;
7931 }
7932 } else {
7933 ibool contain;
7934
7935 if (!is_v) {
7936 contain = dict_index_contains_col_or_prefix(
7937 index, i - num_v,
7938 false);
7939 } else if (skip_virtual
7940 || dict_index_is_clust(index)) {
7941 num_v++;
7942 continue;
7943 } else {
7944 contain = dict_index_contains_col_or_prefix(
7945 index, num_v, true);
7946 }
7947
7948 field = build_template_needs_field(
7949 contain,
7950 m_prebuilt->read_just_key,
7951 fetch_all_in_key,
7952 fetch_primary_key_cols,
7953 index, table, i, num_v);
7954 if (!field) {
7955 if (is_v) {
7956 num_v++;
7957 }
7958 continue;
7959 }
7960 }
7961
7962 ut_d(mysql_row_templ_t* templ =)
7963 build_template_field(
7964 m_prebuilt, clust_index, index,
7965 table, field, i - num_v, num_v);
7966 ut_ad(templ->is_virtual == (ulint)is_v);
7967 if (is_v) {
7968 num_v++;
7969 }
7970 }
7971 }
7972
7973 if (index != clust_index && m_prebuilt->need_to_access_clustered) {
7974 /* Change rec_field_no's to correspond to the clustered index
7975 record */
7976 for (ulint i = 0; i < m_prebuilt->n_template; i++) {
7977 mysql_row_templ_t* templ
7978 = &m_prebuilt->mysql_template[i];
7979
7980 templ->rec_field_no = templ->clust_rec_field_no;
7981 }
7982 }
7983 }
7984
7985 /********************************************************************//**
7986 This special handling is really to overcome the limitations of MySQL's
7987 binlogging. We need to eliminate the non-determinism that will arise in
7988 INSERT ... SELECT type of statements, since MySQL binlog only stores the
7989 min value of the autoinc interval. Once that is fixed we can get rid of
7990 the special lock handling.
7991 @return DB_SUCCESS if all OK else error code */
7992
7993 dberr_t
innobase_lock_autoinc(void)7994 ha_innobase::innobase_lock_autoinc(void)
7995 /*====================================*/
7996 {
7997 DBUG_ENTER("ha_innobase::innobase_lock_autoinc");
7998 dberr_t error = DB_SUCCESS;
7999
8000 ut_ad(!srv_read_only_mode);
8001
8002 switch (innobase_autoinc_lock_mode) {
8003 case AUTOINC_NO_LOCKING:
8004 /* Acquire only the AUTOINC mutex. */
8005 dict_table_autoinc_lock(m_prebuilt->table);
8006 break;
8007
8008 case AUTOINC_NEW_STYLE_LOCKING:
8009 /* For simple (single/multi) row INSERTs/REPLACEs and RBR
8010 events, we fallback to the old style only if another
8011 transaction has already acquired the AUTOINC lock on
8012 behalf of a LOAD FILE or INSERT ... SELECT etc. type of
8013 statement. */
8014 if (thd_sql_command(m_user_thd) == SQLCOM_INSERT
8015 || thd_sql_command(m_user_thd) == SQLCOM_REPLACE
8016 || thd_sql_command(m_user_thd) == SQLCOM_END // RBR event
8017 ) {
8018
8019 /* Acquire the AUTOINC mutex. */
8020 dict_table_autoinc_lock(m_prebuilt->table);
8021
8022 /* We need to check that another transaction isn't
8023 already holding the AUTOINC lock on the table. */
8024 if (m_prebuilt->table->n_waiting_or_granted_auto_inc_locks) {
8025 /* Release the mutex to avoid deadlocks and
8026 fall back to old style locking. */
8027 dict_table_autoinc_unlock(m_prebuilt->table);
8028 } else {
8029 /* Do not fall back to old style locking. */
8030 break;
8031 }
8032 }
8033 /* Use old style locking. */
8034 /* fall through */
8035 case AUTOINC_OLD_STYLE_LOCKING:
8036 DBUG_EXECUTE_IF("die_if_autoinc_old_lock_style_used",
8037 ut_ad(0););
8038 error = row_lock_table_autoinc_for_mysql(m_prebuilt);
8039
8040 if (error == DB_SUCCESS) {
8041
8042 /* Acquire the AUTOINC mutex. */
8043 dict_table_autoinc_lock(m_prebuilt->table);
8044 }
8045 break;
8046
8047 default:
8048 ut_error;
8049 }
8050
8051 DBUG_RETURN(error);
8052 }
8053
8054 /********************************************************************//**
8055 Store the autoinc value in the table. The autoinc value is only set if
8056 it's greater than the existing autoinc value in the table.
8057 @return DB_SUCCESS if all went well else error code */
8058
8059 dberr_t
innobase_set_max_autoinc(ulonglong auto_inc)8060 ha_innobase::innobase_set_max_autoinc(
8061 /*==================================*/
8062 ulonglong auto_inc) /*!< in: value to store */
8063 {
8064 dberr_t error;
8065
8066 error = innobase_lock_autoinc();
8067
8068 if (error == DB_SUCCESS) {
8069
8070 dict_table_autoinc_update_if_greater(m_prebuilt->table, auto_inc);
8071
8072 dict_table_autoinc_unlock(m_prebuilt->table);
8073 }
8074
8075 return(error);
8076 }
8077
8078 /********************************************************************//**
8079 Stores a row in an InnoDB database, to the table specified in this
8080 handle.
8081 @return error code */
8082
8083 int
write_row(uchar * record)8084 ha_innobase::write_row(
8085 /*===================*/
8086 uchar* record) /*!< in: a row in MySQL format */
8087 {
8088 dberr_t error;
8089 #ifdef WITH_WSREP
8090 bool wsrep_auto_inc_inserted= false;
8091 #endif
8092 int error_result = 0;
8093 bool auto_inc_used = false;
8094
8095 DBUG_ENTER("ha_innobase::write_row");
8096
8097 trx_t* trx = thd_to_trx(m_user_thd);
8098
8099 /* Validation checks before we commence write_row operation. */
8100 if (high_level_read_only) {
8101 ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
8102 DBUG_RETURN(HA_ERR_TABLE_READONLY);
8103 }
8104
8105 ut_a(m_prebuilt->trx == trx);
8106
8107 if (!trx_is_started(trx)) {
8108 trx->will_lock = true;
8109 }
8110
8111 #ifdef WITH_WSREP
8112 if (trx->is_wsrep() && wsrep_is_load_multi_commit(m_user_thd))
8113 {
8114 /* Note that this transaction is still active. */
8115 trx_register_for_2pc(m_prebuilt->trx);
8116 /* We will need an IX lock on the destination table. */
8117 m_prebuilt->sql_stat_start = TRUE;
8118 }
8119 #endif /* WITH_WSREP */
8120
8121 ins_mode_t vers_set_fields;
8122 /* Handling of Auto-Increment Columns. */
8123 if (table->next_number_field && record == table->record[0]) {
8124
8125 /* Reset the error code before calling
8126 innobase_get_auto_increment(). */
8127 m_prebuilt->autoinc_error = DB_SUCCESS;
8128
8129 #ifdef WITH_WSREP
8130 wsrep_auto_inc_inserted = trx->is_wsrep()
8131 && wsrep_drupal_282555_workaround
8132 && table->next_number_field->val_int() == 0;
8133 #endif
8134
8135 if ((error_result = update_auto_increment())) {
8136 /* We don't want to mask autoinc overflow errors. */
8137
8138 /* Handle the case where the AUTOINC sub-system
8139 failed during initialization. */
8140 if (m_prebuilt->autoinc_error == DB_UNSUPPORTED) {
8141 error_result = ER_AUTOINC_READ_FAILED;
8142 /* Set the error message to report too. */
8143 my_error(ER_AUTOINC_READ_FAILED, MYF(0));
8144 goto func_exit;
8145 } else if (m_prebuilt->autoinc_error != DB_SUCCESS) {
8146 error = m_prebuilt->autoinc_error;
8147 goto report_error;
8148 }
8149
8150 /* MySQL errors are passed straight back. */
8151 goto func_exit;
8152 }
8153
8154 auto_inc_used = true;
8155 }
8156
8157 /* Prepare INSERT graph that will be executed for actual INSERT
8158 (This is a one time operation) */
8159 if (m_prebuilt->mysql_template == NULL
8160 || m_prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
8161
8162 /* Build the template used in converting quickly between
8163 the two database formats */
8164
8165 build_template(true);
8166 }
8167
8168 innobase_srv_conc_enter_innodb(m_prebuilt);
8169
8170 vers_set_fields = table->versioned_write(VERS_TRX_ID) ?
8171 ROW_INS_VERSIONED : ROW_INS_NORMAL;
8172
8173 /* Execute insert graph that will result in actual insert. */
8174 error = row_insert_for_mysql((byte*) record, m_prebuilt, vers_set_fields);
8175
8176 DEBUG_SYNC(m_user_thd, "ib_after_row_insert");
8177
8178 /* Handling of errors related to auto-increment. */
8179 if (auto_inc_used) {
8180 ulonglong auto_inc;
8181
8182 /* Note the number of rows processed for this statement, used
8183 by get_auto_increment() to determine the number of AUTO-INC
8184 values to reserve. This is only useful for a mult-value INSERT
8185 and is a statement level counter. */
8186 if (trx->n_autoinc_rows > 0) {
8187 --trx->n_autoinc_rows;
8188 }
8189
8190 /* Get the value that MySQL attempted to store in the table.*/
8191 auto_inc = table->next_number_field->val_uint();
8192
8193 switch (error) {
8194 case DB_DUPLICATE_KEY:
8195
8196 /* A REPLACE command and LOAD DATA INFILE REPLACE
8197 handle a duplicate key error themselves, but we
8198 must update the autoinc counter if we are performing
8199 those statements. */
8200
8201 switch (thd_sql_command(m_user_thd)) {
8202 case SQLCOM_LOAD:
8203 if (!trx->duplicates) {
8204 break;
8205 }
8206
8207 case SQLCOM_REPLACE:
8208 case SQLCOM_INSERT_SELECT:
8209 case SQLCOM_REPLACE_SELECT:
8210 goto set_max_autoinc;
8211
8212 #ifdef WITH_WSREP
8213 /* workaround for LP bug #355000, retrying the insert */
8214 case SQLCOM_INSERT:
8215
8216 WSREP_DEBUG("DUPKEY error for autoinc\n"
8217 "THD %ld, value %llu, off %llu inc %llu",
8218 thd_get_thread_id(m_user_thd),
8219 auto_inc,
8220 m_prebuilt->autoinc_offset,
8221 m_prebuilt->autoinc_increment);
8222
8223 if (wsrep_auto_inc_inserted &&
8224 wsrep_thd_retry_counter(m_user_thd) == 0 &&
8225 !thd_test_options(m_user_thd,
8226 OPTION_NOT_AUTOCOMMIT |
8227 OPTION_BEGIN)) {
8228 WSREP_DEBUG(
8229 "retrying insert: %s",
8230 wsrep_thd_query(m_user_thd));
8231 error= DB_SUCCESS;
8232 wsrep_thd_set_conflict_state(
8233 m_user_thd, MUST_ABORT);
8234 innobase_srv_conc_exit_innodb(m_prebuilt);
8235 /* jump straight to func exit over
8236 * later wsrep hooks */
8237 goto func_exit;
8238 }
8239 break;
8240 #endif /* WITH_WSREP */
8241
8242 default:
8243 break;
8244 }
8245
8246 break;
8247
8248 case DB_SUCCESS:
8249 /* If the actual value inserted is greater than
8250 the upper limit of the interval, then we try and
8251 update the table upper limit. Note: last_value
8252 will be 0 if get_auto_increment() was not called. */
8253
8254 if (auto_inc >= m_prebuilt->autoinc_last_value) {
8255 set_max_autoinc:
8256 /* We need the upper limit of the col type to check for
8257 whether we update the table autoinc counter or not. */
8258 ulonglong col_max_value =
8259 table->next_number_field->get_max_int_value();
8260
8261 /* This should filter out the negative
8262 values set explicitly by the user. */
8263 if (auto_inc <= col_max_value) {
8264 ut_ad(m_prebuilt->autoinc_increment > 0);
8265
8266 ulonglong offset;
8267 ulonglong increment;
8268 dberr_t err;
8269
8270 offset = m_prebuilt->autoinc_offset;
8271 increment = m_prebuilt->autoinc_increment;
8272
8273 auto_inc = innobase_next_autoinc(
8274 auto_inc, 1, increment, offset,
8275 col_max_value);
8276
8277 err = innobase_set_max_autoinc(
8278 auto_inc);
8279
8280 if (err != DB_SUCCESS) {
8281 error = err;
8282 }
8283 }
8284 }
8285 break;
8286 default:
8287 break;
8288 }
8289 }
8290
8291 innobase_srv_conc_exit_innodb(m_prebuilt);
8292
8293 report_error:
8294 /* Cleanup and exit. */
8295 if (error == DB_TABLESPACE_DELETED) {
8296 ib_senderrf(
8297 trx->mysql_thd, IB_LOG_LEVEL_ERROR,
8298 ER_TABLESPACE_DISCARDED,
8299 table->s->table_name.str);
8300 }
8301
8302 error_result = convert_error_code_to_mysql(
8303 error, m_prebuilt->table->flags, m_user_thd);
8304
8305 #ifdef WITH_WSREP
8306 if (!error_result && trx->is_wsrep()
8307 && wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE
8308 && !wsrep_consistency_check(m_user_thd)
8309 && !wsrep_thd_ignore_table(m_user_thd)) {
8310 if (wsrep_append_keys(m_user_thd, WSREP_KEY_EXCLUSIVE, record,
8311 NULL)) {
8312 DBUG_PRINT("wsrep", ("row key failed"));
8313 error_result = HA_ERR_INTERNAL_ERROR;
8314 goto func_exit;
8315 }
8316 }
8317 #endif /* WITH_WSREP */
8318
8319 if (error_result == HA_FTS_INVALID_DOCID) {
8320 my_error(HA_FTS_INVALID_DOCID, MYF(0));
8321 }
8322
8323 func_exit:
8324 innobase_active_small();
8325
8326 DBUG_RETURN(error_result);
8327 }
8328
8329 /** Fill the update vector's "old_vrow" field for those non-updated,
8330 but indexed columns. Such columns could stil present in the virtual
8331 index rec fields even if they are not updated (some other fields updated),
8332 so needs to be logged.
8333 @param[in] prebuilt InnoDB prebuilt struct
8334 @param[in,out] vfield field to filled
8335 @param[in] o_len actual column length
8336 @param[in,out] col column to be filled
8337 @param[in] old_mysql_row_col MySQL old field ptr
8338 @param[in] col_pack_len MySQL field col length
8339 @param[in,out] buf buffer for a converted integer value
8340 @return used buffer ptr from row_mysql_store_col_in_innobase_format() */
8341 static
8342 byte*
innodb_fill_old_vcol_val(row_prebuilt_t * prebuilt,dfield_t * vfield,ulint o_len,dict_col_t * col,const byte * old_mysql_row_col,ulint col_pack_len,byte * buf)8343 innodb_fill_old_vcol_val(
8344 row_prebuilt_t* prebuilt,
8345 dfield_t* vfield,
8346 ulint o_len,
8347 dict_col_t* col,
8348 const byte* old_mysql_row_col,
8349 ulint col_pack_len,
8350 byte* buf)
8351 {
8352 dict_col_copy_type(
8353 col, dfield_get_type(vfield));
8354 if (o_len != UNIV_SQL_NULL) {
8355
8356 buf = row_mysql_store_col_in_innobase_format(
8357 vfield,
8358 buf,
8359 TRUE,
8360 old_mysql_row_col,
8361 col_pack_len,
8362 dict_table_is_comp(prebuilt->table));
8363 } else {
8364 dfield_set_null(vfield);
8365 }
8366
8367 return(buf);
8368 }
8369
8370 /** Calculate an update vector corresponding to the changes
8371 between old_row and new_row.
8372 @param[out] uvect update vector
8373 @param[in] old_row current row in MySQL format
8374 @param[in] new_row intended updated row in MySQL format
8375 @param[in] table MySQL table handle
8376 @param[in,out] upd_buff buffer to use for converted values
8377 @param[in] buff_len length of upd_buff
8378 @param[in,out] prebuilt InnoDB execution context
8379 @param[out] auto_inc updated AUTO_INCREMENT value, or 0 if none
8380 @return DB_SUCCESS or error code */
8381 static
8382 dberr_t
calc_row_difference(upd_t * uvect,const uchar * old_row,const uchar * new_row,TABLE * table,uchar * upd_buff,ulint buff_len,row_prebuilt_t * prebuilt,ib_uint64_t & auto_inc)8383 calc_row_difference(
8384 upd_t* uvect,
8385 const uchar* old_row,
8386 const uchar* new_row,
8387 TABLE* table,
8388 uchar* upd_buff,
8389 ulint buff_len,
8390 row_prebuilt_t* prebuilt,
8391 ib_uint64_t& auto_inc)
8392 {
8393 uchar* original_upd_buff = upd_buff;
8394 Field* field;
8395 enum_field_types field_mysql_type;
8396 ulint o_len;
8397 ulint n_len;
8398 ulint col_pack_len;
8399 const byte* new_mysql_row_col;
8400 const byte* old_mysql_row_col;
8401 const byte* o_ptr;
8402 const byte* n_ptr;
8403 byte* buf;
8404 upd_field_t* ufield;
8405 ulint col_type;
8406 ulint n_changed = 0;
8407 dfield_t dfield;
8408 dict_index_t* clust_index;
8409 ibool changes_fts_column = FALSE;
8410 ibool changes_fts_doc_col = FALSE;
8411 trx_t* const trx = prebuilt->trx;
8412 doc_id_t doc_id = FTS_NULL_DOC_ID;
8413 ulint num_v = 0;
8414 const bool skip_virtual = ha_innobase::omits_virtual_cols(*table->s);
8415
8416 ut_ad(!srv_read_only_mode);
8417
8418 clust_index = dict_table_get_first_index(prebuilt->table);
8419 auto_inc = 0;
8420
8421 /* We use upd_buff to convert changed fields */
8422 buf = (byte*) upd_buff;
8423
8424 for (uint i = 0; i < table->s->fields; i++) {
8425 field = table->field[i];
8426 const bool is_virtual = !field->stored_in_db();
8427 if (is_virtual && skip_virtual) {
8428 num_v++;
8429 continue;
8430 }
8431 dict_col_t* col = is_virtual
8432 ? &prebuilt->table->v_cols[num_v].m_col
8433 : &prebuilt->table->cols[i - num_v];
8434
8435 o_ptr = (const byte*) old_row + get_field_offset(table, field);
8436 n_ptr = (const byte*) new_row + get_field_offset(table, field);
8437
8438 /* Use new_mysql_row_col and col_pack_len save the values */
8439
8440 new_mysql_row_col = n_ptr;
8441 old_mysql_row_col = o_ptr;
8442 col_pack_len = field->pack_length();
8443
8444 o_len = col_pack_len;
8445 n_len = col_pack_len;
8446
8447 /* We use o_ptr and n_ptr to dig up the actual data for
8448 comparison. */
8449
8450 field_mysql_type = field->type();
8451
8452 col_type = col->mtype;
8453
8454 switch (col_type) {
8455
8456 case DATA_BLOB:
8457 case DATA_GEOMETRY:
8458 o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
8459 n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
8460
8461 break;
8462
8463 case DATA_VARCHAR:
8464 case DATA_BINARY:
8465 case DATA_VARMYSQL:
8466 if (field_mysql_type == MYSQL_TYPE_VARCHAR) {
8467 /* This is a >= 5.0.3 type true VARCHAR where
8468 the real payload data length is stored in
8469 1 or 2 bytes */
8470
8471 o_ptr = row_mysql_read_true_varchar(
8472 &o_len, o_ptr,
8473 (ulint)
8474 (((Field_varstring*) field)->length_bytes));
8475
8476 n_ptr = row_mysql_read_true_varchar(
8477 &n_len, n_ptr,
8478 (ulint)
8479 (((Field_varstring*) field)->length_bytes));
8480 }
8481
8482 break;
8483 default:
8484 ;
8485 }
8486
8487 if (field_mysql_type == MYSQL_TYPE_LONGLONG
8488 && prebuilt->table->fts
8489 && innobase_strcasecmp(
8490 field->field_name.str, FTS_DOC_ID_COL_NAME) == 0) {
8491 doc_id = mach_read_uint64_little_endian(n_ptr);
8492 if (doc_id == 0) {
8493 return(DB_FTS_INVALID_DOCID);
8494 }
8495 }
8496
8497 if (field->real_maybe_null()) {
8498 if (field->is_null_in_record(old_row)) {
8499 o_len = UNIV_SQL_NULL;
8500 }
8501
8502 if (field->is_null_in_record(new_row)) {
8503 n_len = UNIV_SQL_NULL;
8504 }
8505 }
8506
8507 #ifdef UNIV_DEBUG
8508 bool online_ord_part = false;
8509 #endif
8510
8511 if (is_virtual) {
8512 /* If the virtual column is not indexed,
8513 we shall ignore it for update */
8514 if (!col->ord_part) {
8515 /* Check whether there is a table-rebuilding
8516 online ALTER TABLE in progress, and this
8517 virtual column could be newly indexed, thus
8518 it will be materialized. Then we will have
8519 to log its update.
8520 Note, we do not support online dropping virtual
8521 column while adding new index, nor with
8522 online alter column order while adding index,
8523 so the virtual column sequence must not change
8524 if it is online operation */
8525 if (dict_index_is_online_ddl(clust_index)
8526 && row_log_col_is_indexed(clust_index,
8527 num_v)) {
8528 #ifdef UNIV_DEBUG
8529 online_ord_part = true;
8530 #endif
8531 } else {
8532 num_v++;
8533 continue;
8534 }
8535 }
8536
8537 if (!uvect->old_vrow) {
8538 uvect->old_vrow = dtuple_create_with_vcol(
8539 uvect->heap, 0, prebuilt->table->n_v_cols);
8540 }
8541
8542 ulint max_field_len = DICT_MAX_FIELD_LEN_BY_FORMAT(
8543 prebuilt->table);
8544
8545 /* for virtual columns, we only materialize
8546 its index, and index field length would not
8547 exceed max_field_len. So continue if the
8548 first max_field_len bytes are matched up */
8549 if (o_len != UNIV_SQL_NULL
8550 && n_len != UNIV_SQL_NULL
8551 && o_len >= max_field_len
8552 && n_len >= max_field_len
8553 && memcmp(o_ptr, n_ptr, max_field_len) == 0) {
8554 dfield_t* vfield = dtuple_get_nth_v_field(
8555 uvect->old_vrow, num_v);
8556 buf = innodb_fill_old_vcol_val(
8557 prebuilt, vfield, o_len,
8558 col, old_mysql_row_col,
8559 col_pack_len, buf);
8560 num_v++;
8561 continue;
8562 }
8563 }
8564
8565 if (o_len != n_len || (o_len != 0 && o_len != UNIV_SQL_NULL
8566 && 0 != memcmp(o_ptr, n_ptr, o_len))) {
8567 /* The field has changed */
8568
8569 ufield = uvect->fields + n_changed;
8570 MEM_UNDEFINED(ufield, sizeof *ufield);
8571
8572 /* Let us use a dummy dfield to make the conversion
8573 from the MySQL column format to the InnoDB format */
8574
8575
8576 /* If the length of new geometry object is 0, means
8577 this object is invalid geometry object, we need
8578 to block it. */
8579 if (DATA_GEOMETRY_MTYPE(col_type)
8580 && o_len != 0 && n_len == 0) {
8581 return(DB_CANT_CREATE_GEOMETRY_OBJECT);
8582 }
8583
8584 if (n_len != UNIV_SQL_NULL) {
8585 dict_col_copy_type(
8586 col, dfield_get_type(&dfield));
8587
8588 buf = row_mysql_store_col_in_innobase_format(
8589 &dfield,
8590 (byte*) buf,
8591 TRUE,
8592 new_mysql_row_col,
8593 col_pack_len,
8594 dict_table_is_comp(prebuilt->table));
8595 dfield_copy(&ufield->new_val, &dfield);
8596 } else {
8597 dict_col_copy_type(
8598 col, dfield_get_type(&ufield->new_val));
8599 dfield_set_null(&ufield->new_val);
8600 }
8601
8602 ufield->exp = NULL;
8603 ufield->orig_len = 0;
8604 if (is_virtual) {
8605 dfield_t* vfield = dtuple_get_nth_v_field(
8606 uvect->old_vrow, num_v);
8607 upd_fld_set_virtual_col(ufield);
8608 ufield->field_no = num_v;
8609
8610 ut_ad(col->ord_part || online_ord_part);
8611 ufield->old_v_val = static_cast<dfield_t*>(
8612 mem_heap_alloc(
8613 uvect->heap,
8614 sizeof *ufield->old_v_val));
8615
8616 if (!field->is_null_in_record(old_row)) {
8617 if (n_len == UNIV_SQL_NULL) {
8618 dict_col_copy_type(
8619 col, dfield_get_type(
8620 &dfield));
8621 }
8622
8623 buf = row_mysql_store_col_in_innobase_format(
8624 &dfield,
8625 (byte*) buf,
8626 TRUE,
8627 old_mysql_row_col,
8628 col_pack_len,
8629 dict_table_is_comp(
8630 prebuilt->table));
8631 dfield_copy(ufield->old_v_val,
8632 &dfield);
8633 dfield_copy(vfield, &dfield);
8634 } else {
8635 dict_col_copy_type(
8636 col, dfield_get_type(
8637 ufield->old_v_val));
8638 dfield_set_null(ufield->old_v_val);
8639 dfield_set_null(vfield);
8640 }
8641 num_v++;
8642 ut_ad(field != table->found_next_number_field);
8643 } else {
8644 ufield->field_no = dict_col_get_clust_pos(
8645 &prebuilt->table->cols[i - num_v],
8646 clust_index);
8647 ufield->old_v_val = NULL;
8648 if (field != table->found_next_number_field
8649 || dfield_is_null(&ufield->new_val)) {
8650 } else {
8651 auto_inc = field->val_uint();
8652 }
8653 }
8654 n_changed++;
8655
8656 /* If an FTS indexed column was changed by this
8657 UPDATE then we need to inform the FTS sub-system.
8658
8659 NOTE: Currently we re-index all FTS indexed columns
8660 even if only a subset of the FTS indexed columns
8661 have been updated. That is the reason we are
8662 checking only once here. Later we will need to
8663 note which columns have been updated and do
8664 selective processing. */
8665 if (prebuilt->table->fts != NULL && !is_virtual) {
8666 ulint offset;
8667 dict_table_t* innodb_table;
8668
8669 innodb_table = prebuilt->table;
8670
8671 if (!changes_fts_column) {
8672 offset = row_upd_changes_fts_column(
8673 innodb_table, ufield);
8674
8675 if (offset != ULINT_UNDEFINED) {
8676 changes_fts_column = TRUE;
8677 }
8678 }
8679
8680 if (!changes_fts_doc_col) {
8681 changes_fts_doc_col =
8682 row_upd_changes_doc_id(
8683 innodb_table, ufield);
8684 }
8685 }
8686 } else if (is_virtual) {
8687 dfield_t* vfield = dtuple_get_nth_v_field(
8688 uvect->old_vrow, num_v);
8689 buf = innodb_fill_old_vcol_val(
8690 prebuilt, vfield, o_len,
8691 col, old_mysql_row_col,
8692 col_pack_len, buf);
8693 ut_ad(col->ord_part || online_ord_part);
8694 num_v++;
8695 }
8696 }
8697
8698 /* If the update changes a column with an FTS index on it, we
8699 then add an update column node with a new document id to the
8700 other changes. We piggy back our changes on the normal UPDATE
8701 to reduce processing and IO overhead. */
8702 if (!prebuilt->table->fts) {
8703 trx->fts_next_doc_id = 0;
8704 } else if (changes_fts_column || changes_fts_doc_col) {
8705 dict_table_t* innodb_table = prebuilt->table;
8706
8707 ufield = uvect->fields + n_changed;
8708
8709 if (!DICT_TF2_FLAG_IS_SET(
8710 innodb_table, DICT_TF2_FTS_HAS_DOC_ID)) {
8711
8712 /* If Doc ID is managed by user, and if any
8713 FTS indexed column has been updated, its corresponding
8714 Doc ID must also be updated. Otherwise, return
8715 error */
8716 if (changes_fts_column && !changes_fts_doc_col) {
8717 ib::warn() << "A new Doc ID must be supplied"
8718 " while updating FTS indexed columns.";
8719 return(DB_FTS_INVALID_DOCID);
8720 }
8721
8722 /* Doc ID must monotonically increase */
8723 ut_ad(innodb_table->fts->cache);
8724 if (doc_id < prebuilt->table->fts->cache->next_doc_id) {
8725
8726 ib::warn() << "FTS Doc ID must be larger than "
8727 << innodb_table->fts->cache->next_doc_id
8728 - 1 << " for table "
8729 << innodb_table->name;
8730
8731 return(DB_FTS_INVALID_DOCID);
8732 }
8733
8734
8735 trx->fts_next_doc_id = doc_id;
8736 } else {
8737 /* If the Doc ID is a hidden column, it can't be
8738 changed by user */
8739 ut_ad(!changes_fts_doc_col);
8740
8741 /* Doc ID column is hidden, a new Doc ID will be
8742 generated by following fts_update_doc_id() call */
8743 trx->fts_next_doc_id = 0;
8744 }
8745
8746 fts_update_doc_id(
8747 innodb_table, ufield, &trx->fts_next_doc_id);
8748
8749 ++n_changed;
8750 } else {
8751 /* We have a Doc ID column, but none of FTS indexed
8752 columns are touched, nor the Doc ID column, so set
8753 fts_next_doc_id to UINT64_UNDEFINED, which means do not
8754 update the Doc ID column */
8755 trx->fts_next_doc_id = UINT64_UNDEFINED;
8756 }
8757
8758 uvect->n_fields = n_changed;
8759 uvect->info_bits = 0;
8760
8761 ut_a(buf <= (byte*) original_upd_buff + buff_len);
8762
8763 ut_ad(uvect->validate());
8764 return(DB_SUCCESS);
8765 }
8766
8767 #ifdef WITH_WSREP
8768 static
8769 int
wsrep_calc_row_hash(byte * digest,const uchar * row,TABLE * table,row_prebuilt_t * prebuilt)8770 wsrep_calc_row_hash(
8771 /*================*/
8772 byte* digest, /*!< in/out: md5 sum */
8773 const uchar* row, /*!< in: row in MySQL format */
8774 TABLE* table, /*!< in: table in MySQL data
8775 dictionary */
8776 row_prebuilt_t* prebuilt) /*!< in: InnoDB prebuilt struct */
8777 {
8778 ulint len;
8779 const byte* ptr;
8780
8781 void *ctx = alloca(my_md5_context_size());
8782 my_md5_init(ctx);
8783
8784 for (uint i = 0; i < table->s->fields; i++) {
8785 byte null_byte=0;
8786 byte true_byte=1;
8787 ulint col_type;
8788 ulint is_unsigned;
8789
8790 const Field* field = table->field[i];
8791 if (!field->stored_in_db()) {
8792 continue;
8793 }
8794
8795 ptr = (const byte*) row + get_field_offset(table, field);
8796 len = field->pack_length();
8797 col_type = get_innobase_type_from_mysql_type(&is_unsigned, field);
8798
8799 switch (col_type) {
8800
8801 case DATA_BLOB:
8802 ptr = row_mysql_read_blob_ref(&len, ptr, len);
8803
8804 break;
8805
8806 case DATA_VARCHAR:
8807 case DATA_BINARY:
8808 case DATA_VARMYSQL:
8809 if (field->type() == MYSQL_TYPE_VARCHAR) {
8810 /* This is a >= 5.0.3 type true VARCHAR where
8811 the real payload data length is stored in
8812 1 or 2 bytes */
8813
8814 ptr = row_mysql_read_true_varchar(
8815 &len, ptr,
8816 (ulint)
8817 (((Field_varstring*)field)->length_bytes));
8818
8819 }
8820
8821 break;
8822 default:
8823 ;
8824 }
8825 /*
8826 if (field->null_ptr &&
8827 field_in_record_is_null(table, field, (char*) row)) {
8828 */
8829
8830 if (field->is_null_in_record(row)) {
8831 my_md5_input(ctx, &null_byte, 1);
8832 } else {
8833 my_md5_input(ctx, &true_byte, 1);
8834 my_md5_input(ctx, ptr, len);
8835 }
8836 }
8837
8838 my_md5_result(ctx, digest);
8839
8840 return(0);
8841 }
8842 #endif /* WITH_WSREP */
8843
8844 /**
8845 Updates a row given as a parameter to a new value. Note that we are given
8846 whole rows, not just the fields which are updated: this incurs some
8847 overhead for CPU when we check which fields are actually updated.
8848 TODO: currently InnoDB does not prevent the 'Halloween problem':
8849 in a searched update a single row can get updated several times
8850 if its index columns are updated!
8851 @param[in] old_row Old row contents in MySQL format
8852 @param[out] new_row Updated row contents in MySQL format
8853 @return error number or 0 */
8854
8855 int
update_row(const uchar * old_row,const uchar * new_row)8856 ha_innobase::update_row(
8857 const uchar* old_row,
8858 const uchar* new_row)
8859 {
8860 int err;
8861
8862 dberr_t error;
8863 trx_t* trx = thd_to_trx(m_user_thd);
8864
8865 DBUG_ENTER("ha_innobase::update_row");
8866
8867 ut_a(m_prebuilt->trx == trx);
8868
8869 if (high_level_read_only) {
8870 ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
8871 DBUG_RETURN(HA_ERR_TABLE_READONLY);
8872 } else if (!trx_is_started(trx)) {
8873 trx->will_lock = true;
8874 }
8875
8876 if (m_upd_buf == NULL) {
8877 ut_ad(m_upd_buf_size == 0);
8878
8879 /* Create a buffer for packing the fields of a record. Why
8880 table->reclength did not work here? Obviously, because char
8881 fields when packed actually became 1 byte longer, when we also
8882 stored the string length as the first byte. */
8883
8884 m_upd_buf_size = table->s->reclength + table->s->max_key_length
8885 + MAX_REF_PARTS * 3;
8886
8887 m_upd_buf = reinterpret_cast<uchar*>(
8888 my_malloc(//PSI_INSTRUMENT_ME,
8889 m_upd_buf_size,
8890 MYF(MY_WME)));
8891
8892 if (m_upd_buf == NULL) {
8893 m_upd_buf_size = 0;
8894 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
8895 }
8896 }
8897
8898 upd_t* uvect = row_get_prebuilt_update_vector(m_prebuilt);
8899 ib_uint64_t autoinc;
8900
8901 /* Build an update vector from the modified fields in the rows
8902 (uses m_upd_buf of the handle) */
8903
8904 error = calc_row_difference(
8905 uvect, old_row, new_row, table, m_upd_buf, m_upd_buf_size,
8906 m_prebuilt, autoinc);
8907
8908 if (error != DB_SUCCESS) {
8909 goto func_exit;
8910 }
8911
8912 if (!uvect->n_fields) {
8913 /* This is the same as success, but instructs
8914 MySQL that the row is not really updated and it
8915 should not increase the count of updated rows.
8916 This is fix for http://bugs.mysql.com/29157 */
8917 if (m_prebuilt->versioned_write
8918 && thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE
8919 /* Multiple UPDATE of same rows in single transaction create
8920 historical rows only once. */
8921 && trx->id != table->vers_start_id()) {
8922 error = row_insert_for_mysql((byte*) old_row,
8923 m_prebuilt,
8924 ROW_INS_HISTORICAL);
8925 if (error != DB_SUCCESS) {
8926 goto func_exit;
8927 }
8928 innobase_srv_conc_exit_innodb(m_prebuilt);
8929 innobase_active_small();
8930 }
8931 DBUG_RETURN(HA_ERR_RECORD_IS_THE_SAME);
8932 } else {
8933 const bool vers_set_fields = m_prebuilt->versioned_write
8934 && m_prebuilt->upd_node->update->affects_versioned();
8935 const bool vers_ins_row = vers_set_fields
8936 && thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE;
8937
8938 /* This is not a delete */
8939 m_prebuilt->upd_node->is_delete =
8940 (vers_set_fields && !vers_ins_row) ||
8941 (thd_sql_command(m_user_thd) == SQLCOM_DELETE &&
8942 table->versioned(VERS_TIMESTAMP))
8943 ? VERSIONED_DELETE
8944 : NO_DELETE;
8945
8946 innobase_srv_conc_enter_innodb(m_prebuilt);
8947
8948 error = row_update_for_mysql(m_prebuilt);
8949
8950 if (error == DB_SUCCESS && vers_ins_row
8951 /* Multiple UPDATE of same rows in single transaction create
8952 historical rows only once. */
8953 && trx->id != table->vers_start_id()) {
8954 error = row_insert_for_mysql((byte*) old_row,
8955 m_prebuilt,
8956 ROW_INS_HISTORICAL);
8957 }
8958 }
8959
8960 if (error == DB_SUCCESS && autoinc) {
8961 /* A value for an AUTO_INCREMENT column
8962 was specified in the UPDATE statement. */
8963
8964 /* We need the upper limit of the col type to check for
8965 whether we update the table autoinc counter or not. */
8966 ulonglong col_max_value =
8967 table->found_next_number_field->get_max_int_value();
8968
8969 /* This should filter out the negative
8970 values set explicitly by the user. */
8971 if (autoinc <= col_max_value) {
8972 ulonglong offset;
8973 ulonglong increment;
8974
8975 offset = m_prebuilt->autoinc_offset;
8976 increment = m_prebuilt->autoinc_increment;
8977
8978 autoinc = innobase_next_autoinc(
8979 autoinc, 1, increment, offset,
8980 col_max_value);
8981
8982 error = innobase_set_max_autoinc(autoinc);
8983
8984 if (m_prebuilt->table->persistent_autoinc) {
8985 /* Update the PAGE_ROOT_AUTO_INC. Yes, we do
8986 this even if dict_table_t::autoinc already was
8987 greater than autoinc, because we cannot know
8988 if any INSERT actually used (and wrote to
8989 PAGE_ROOT_AUTO_INC) a value bigger than our
8990 autoinc. */
8991 btr_write_autoinc(dict_table_get_first_index(
8992 m_prebuilt->table),
8993 autoinc);
8994 }
8995 }
8996 }
8997
8998 innobase_srv_conc_exit_innodb(m_prebuilt);
8999
9000 func_exit:
9001 if (error == DB_FTS_INVALID_DOCID) {
9002 err = HA_FTS_INVALID_DOCID;
9003 my_error(HA_FTS_INVALID_DOCID, MYF(0));
9004 } else {
9005 err = convert_error_code_to_mysql(
9006 error, m_prebuilt->table->flags, m_user_thd);
9007 }
9008
9009 /* Tell InnoDB server that there might be work for
9010 utility threads: */
9011
9012 innobase_active_small();
9013
9014 #ifdef WITH_WSREP
9015 if (error == DB_SUCCESS && trx->is_wsrep() &&
9016 wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE &&
9017 !wsrep_thd_ignore_table(m_user_thd)) {
9018 DBUG_PRINT("wsrep", ("update row key"));
9019
9020 if (wsrep_append_keys(m_user_thd, WSREP_KEY_EXCLUSIVE, old_row,
9021 new_row)) {
9022 WSREP_DEBUG("WSREP: UPDATE_ROW_KEY FAILED");
9023 DBUG_PRINT("wsrep", ("row key failed"));
9024 DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
9025 }
9026 }
9027 #endif /* WITH_WSREP */
9028
9029 DBUG_RETURN(err);
9030 }
9031
9032 /**********************************************************************//**
9033 Deletes a row given as the parameter.
9034 @return error number or 0 */
9035
9036 int
delete_row(const uchar * record)9037 ha_innobase::delete_row(
9038 /*====================*/
9039 const uchar* record) /*!< in: a row in MySQL format */
9040 {
9041 dberr_t error;
9042 trx_t* trx = thd_to_trx(m_user_thd);
9043
9044 DBUG_ENTER("ha_innobase::delete_row");
9045
9046 ut_a(m_prebuilt->trx == trx);
9047
9048 if (high_level_read_only) {
9049 ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
9050 DBUG_RETURN(HA_ERR_TABLE_READONLY);
9051 } else if (!trx_is_started(trx)) {
9052 trx->will_lock = true;
9053 }
9054
9055 if (!m_prebuilt->upd_node) {
9056 row_get_prebuilt_update_vector(m_prebuilt);
9057 }
9058
9059 /* This is a delete */
9060 m_prebuilt->upd_node->is_delete = table->versioned_write(VERS_TRX_ID)
9061 && table->vers_end_field()->is_max()
9062 && trx->id != table->vers_start_id()
9063 ? VERSIONED_DELETE
9064 : PLAIN_DELETE;
9065
9066 innobase_srv_conc_enter_innodb(m_prebuilt);
9067
9068 error = row_update_for_mysql(m_prebuilt);
9069
9070 innobase_srv_conc_exit_innodb(m_prebuilt);
9071
9072 /* Tell the InnoDB server that there might be work for
9073 utility threads: */
9074
9075 innobase_active_small();
9076
9077 #ifdef WITH_WSREP
9078 if (error == DB_SUCCESS && trx->is_wsrep()
9079 && wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE
9080 && !wsrep_thd_ignore_table(m_user_thd)) {
9081 if (wsrep_append_keys(m_user_thd, WSREP_KEY_EXCLUSIVE, record,
9082 NULL)) {
9083 DBUG_PRINT("wsrep", ("delete fail"));
9084 DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
9085 }
9086 }
9087 #endif /* WITH_WSREP */
9088 DBUG_RETURN(convert_error_code_to_mysql(
9089 error, m_prebuilt->table->flags, m_user_thd));
9090 }
9091
9092 /** Delete all rows from the table.
9093 @return error number or 0 */
9094
9095 int
delete_all_rows()9096 ha_innobase::delete_all_rows()
9097 {
9098 DBUG_ENTER("ha_innobase::delete_all_rows");
9099 DBUG_RETURN(HA_ERR_WRONG_COMMAND);
9100 }
9101
9102 /**********************************************************************//**
9103 Removes a new lock set on a row, if it was not read optimistically. This can
9104 be called after a row has been read in the processing of an UPDATE or a DELETE
9105 query, if the option innodb_locks_unsafe_for_binlog is set. */
9106
9107 void
unlock_row(void)9108 ha_innobase::unlock_row(void)
9109 /*=========================*/
9110 {
9111 DBUG_ENTER("ha_innobase::unlock_row");
9112
9113 if (m_prebuilt->select_lock_type == LOCK_NONE) {
9114 DBUG_VOID_RETURN;
9115 }
9116
9117 ut_ad(trx_state_eq(m_prebuilt->trx, TRX_STATE_ACTIVE, true));
9118
9119 switch (m_prebuilt->row_read_type) {
9120 case ROW_READ_WITH_LOCKS:
9121 if (!srv_locks_unsafe_for_binlog
9122 && m_prebuilt->trx->isolation_level
9123 > TRX_ISO_READ_COMMITTED) {
9124 break;
9125 }
9126 /* fall through */
9127 case ROW_READ_TRY_SEMI_CONSISTENT:
9128 row_unlock_for_mysql(m_prebuilt, FALSE);
9129 break;
9130 case ROW_READ_DID_SEMI_CONSISTENT:
9131 m_prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
9132 break;
9133 }
9134
9135 DBUG_VOID_RETURN;
9136 }
9137
9138 /* See handler.h and row0mysql.h for docs on this function. */
9139
9140 bool
was_semi_consistent_read(void)9141 ha_innobase::was_semi_consistent_read(void)
9142 /*=======================================*/
9143 {
9144 return(m_prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT);
9145 }
9146
9147 /* See handler.h and row0mysql.h for docs on this function. */
9148
9149 void
try_semi_consistent_read(bool yes)9150 ha_innobase::try_semi_consistent_read(bool yes)
9151 /*===========================================*/
9152 {
9153 ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
9154
9155 /* Row read type is set to semi consistent read if this was
9156 requested by the MySQL and either innodb_locks_unsafe_for_binlog
9157 option is used or this session is using READ COMMITTED isolation
9158 level. */
9159
9160 if (yes
9161 && (srv_locks_unsafe_for_binlog
9162 || m_prebuilt->trx->isolation_level
9163 <= TRX_ISO_READ_COMMITTED)) {
9164
9165 m_prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
9166
9167 } else {
9168 m_prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
9169 }
9170 }
9171
9172 /******************************************************************//**
9173 Initializes a handle to use an index.
9174 @return 0 or error number */
9175
9176 int
index_init(uint keynr,bool)9177 ha_innobase::index_init(
9178 /*====================*/
9179 uint keynr, /*!< in: key (index) number */
9180 bool)
9181 {
9182 DBUG_ENTER("index_init");
9183
9184 DBUG_RETURN(change_active_index(keynr));
9185 }
9186
9187 /******************************************************************//**
9188 Currently does nothing.
9189 @return 0 */
9190
9191 int
index_end(void)9192 ha_innobase::index_end(void)
9193 /*========================*/
9194 {
9195 DBUG_ENTER("index_end");
9196
9197 active_index = MAX_KEY;
9198
9199 in_range_check_pushed_down = FALSE;
9200
9201 m_ds_mrr.dsmrr_close();
9202
9203 DBUG_RETURN(0);
9204 }
9205
9206 /*********************************************************************//**
9207 Converts a search mode flag understood by MySQL to a flag understood
9208 by InnoDB. */
9209 page_cur_mode_t
convert_search_mode_to_innobase(ha_rkey_function find_flag)9210 convert_search_mode_to_innobase(
9211 /*============================*/
9212 ha_rkey_function find_flag)
9213 {
9214 switch (find_flag) {
9215 case HA_READ_KEY_EXACT:
9216 /* this does not require the index to be UNIQUE */
9217 case HA_READ_KEY_OR_NEXT:
9218 return(PAGE_CUR_GE);
9219 case HA_READ_AFTER_KEY:
9220 return(PAGE_CUR_G);
9221 case HA_READ_BEFORE_KEY:
9222 return(PAGE_CUR_L);
9223 case HA_READ_KEY_OR_PREV:
9224 case HA_READ_PREFIX_LAST:
9225 case HA_READ_PREFIX_LAST_OR_PREV:
9226 return(PAGE_CUR_LE);
9227 case HA_READ_MBR_CONTAIN:
9228 return(PAGE_CUR_CONTAIN);
9229 case HA_READ_MBR_INTERSECT:
9230 return(PAGE_CUR_INTERSECT);
9231 case HA_READ_MBR_WITHIN:
9232 return(PAGE_CUR_WITHIN);
9233 case HA_READ_MBR_DISJOINT:
9234 return(PAGE_CUR_DISJOINT);
9235 case HA_READ_MBR_EQUAL:
9236 return(PAGE_CUR_MBR_EQUAL);
9237 case HA_READ_PREFIX:
9238 return(PAGE_CUR_UNSUPP);
9239 /* do not use "default:" in order to produce a gcc warning:
9240 enumeration value '...' not handled in switch
9241 (if -Wswitch or -Wall is used) */
9242 }
9243
9244 my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "this functionality");
9245
9246 return(PAGE_CUR_UNSUPP);
9247 }
9248
9249 /*
9250 BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
9251 ---------------------------------------------------
9252 The following does not cover all the details, but explains how we determine
9253 the start of a new SQL statement, and what is associated with it.
9254
9255 For each table in the database the MySQL interpreter may have several
9256 table handle instances in use, also in a single SQL query. For each table
9257 handle instance there is an InnoDB 'm_prebuilt' struct which contains most
9258 of the InnoDB data associated with this table handle instance.
9259
9260 A) if the user has not explicitly set any MySQL table level locks:
9261
9262 1) MySQL calls ::external_lock to set an 'intention' table level lock on
9263 the table of the handle instance. There we set
9264 m_prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
9265 true if we are taking this table handle instance to use in a new SQL
9266 statement issued by the user. We also increment trx->n_mysql_tables_in_use.
9267
9268 2) If m_prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
9269 instructions to m_prebuilt->template of the table handle instance in
9270 ::index_read. The template is used to save CPU time in large joins.
9271
9272 3) In row_search_for_mysql, if m_prebuilt->sql_stat_start is true, we
9273 allocate a new consistent read view for the trx if it does not yet have one,
9274 or in the case of a locking read, set an InnoDB 'intention' table level
9275 lock on the table.
9276
9277 4) We do the SELECT. MySQL may repeatedly call ::index_read for the
9278 same table handle instance, if it is a join.
9279
9280 5) When the SELECT ends, MySQL removes its intention table level locks
9281 in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
9282 (a) we execute a COMMIT there if the autocommit is on,
9283 (b) we also release possible 'SQL statement level resources' InnoDB may
9284 have for this SQL statement. The MySQL interpreter does NOT execute
9285 autocommit for pure read transactions, though it should. That is why the
9286 table handler in that case has to execute the COMMIT in ::external_lock.
9287
9288 B) If the user has explicitly set MySQL table level locks, then MySQL
9289 does NOT call ::external_lock at the start of the statement. To determine
9290 when we are at the start of a new SQL statement we at the start of
9291 ::index_read also compare the query id to the latest query id where the
9292 table handle instance was used. If it has changed, we know we are at the
9293 start of a new SQL statement. Since the query id can theoretically
9294 overwrap, we use this test only as a secondary way of determining the
9295 start of a new SQL statement. */
9296
9297
9298 /**********************************************************************//**
9299 Positions an index cursor to the index specified in the handle. Fetches the
9300 row if any.
9301 @return 0, HA_ERR_KEY_NOT_FOUND, or error number */
9302
9303 int
index_read(uchar * buf,const uchar * key_ptr,uint key_len,enum ha_rkey_function find_flag)9304 ha_innobase::index_read(
9305 /*====================*/
9306 uchar* buf, /*!< in/out: buffer for the returned
9307 row */
9308 const uchar* key_ptr, /*!< in: key value; if this is NULL
9309 we position the cursor at the
9310 start or end of index; this can
9311 also contain an InnoDB row id, in
9312 which case key_len is the InnoDB
9313 row id length; the key value can
9314 also be a prefix of a full key value,
9315 and the last column can be a prefix
9316 of a full column */
9317 uint key_len,/*!< in: key value length */
9318 enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
9319 {
9320 DBUG_ENTER("index_read");
9321 DEBUG_SYNC_C("ha_innobase_index_read_begin");
9322
9323 ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd));
9324 ut_ad(key_len != 0 || find_flag != HA_READ_KEY_EXACT);
9325
9326 dict_index_t* index = m_prebuilt->index;
9327
9328 if (index == NULL || index->is_corrupted()) {
9329 m_prebuilt->index_usable = FALSE;
9330 DBUG_RETURN(HA_ERR_CRASHED);
9331 }
9332
9333 if (!m_prebuilt->index_usable) {
9334 DBUG_RETURN(index->is_corrupted()
9335 ? HA_ERR_INDEX_CORRUPT
9336 : HA_ERR_TABLE_DEF_CHANGED);
9337 }
9338
9339 if (index->type & DICT_FTS) {
9340 DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
9341 }
9342
9343 /* For R-Tree index, we will always place the page lock to
9344 pages being searched */
9345 if (index->is_spatial() && !m_prebuilt->trx->will_lock) {
9346 if (trx_is_started(m_prebuilt->trx)) {
9347 DBUG_RETURN(HA_ERR_READ_ONLY_TRANSACTION);
9348 } else {
9349 m_prebuilt->trx->will_lock = true;
9350 }
9351 }
9352
9353 /* Note that if the index for which the search template is built is not
9354 necessarily m_prebuilt->index, but can also be the clustered index */
9355
9356 if (m_prebuilt->sql_stat_start) {
9357 build_template(false);
9358 }
9359
9360 if (key_ptr != NULL) {
9361 /* Convert the search key value to InnoDB format into
9362 m_prebuilt->search_tuple */
9363
9364 row_sel_convert_mysql_key_to_innobase(
9365 m_prebuilt->search_tuple,
9366 m_prebuilt->srch_key_val1,
9367 m_prebuilt->srch_key_val_len,
9368 index,
9369 (byte*) key_ptr,
9370 (ulint) key_len);
9371
9372 DBUG_ASSERT(m_prebuilt->search_tuple->n_fields > 0);
9373 } else {
9374 /* We position the cursor to the last or the first entry
9375 in the index */
9376
9377 dtuple_set_n_fields(m_prebuilt->search_tuple, 0);
9378 }
9379
9380 page_cur_mode_t mode = convert_search_mode_to_innobase(find_flag);
9381
9382 ulint match_mode = 0;
9383
9384 if (find_flag == HA_READ_KEY_EXACT) {
9385
9386 match_mode = ROW_SEL_EXACT;
9387
9388 } else if (find_flag == HA_READ_PREFIX_LAST) {
9389
9390 match_mode = ROW_SEL_EXACT_PREFIX;
9391 }
9392
9393 m_last_match_mode = (uint) match_mode;
9394
9395 dberr_t ret;
9396
9397 if (mode != PAGE_CUR_UNSUPP) {
9398
9399 innobase_srv_conc_enter_innodb(m_prebuilt);
9400
9401 ret = row_search_mvcc(
9402 buf, mode, m_prebuilt, match_mode, 0);
9403
9404 innobase_srv_conc_exit_innodb(m_prebuilt);
9405 } else {
9406
9407 ret = DB_UNSUPPORTED;
9408 }
9409
9410 DBUG_EXECUTE_IF("ib_select_query_failure", ret = DB_ERROR;);
9411
9412 int error;
9413
9414 switch (ret) {
9415 case DB_SUCCESS:
9416 error = 0;
9417 table->status = 0;
9418 if (m_prebuilt->table->is_system_db) {
9419 srv_stats.n_system_rows_read.add(
9420 thd_get_thread_id(m_prebuilt->trx->mysql_thd), 1);
9421 } else {
9422 srv_stats.n_rows_read.add(
9423 thd_get_thread_id(m_prebuilt->trx->mysql_thd), 1);
9424 }
9425 break;
9426
9427 case DB_RECORD_NOT_FOUND:
9428 error = HA_ERR_KEY_NOT_FOUND;
9429 table->status = STATUS_NOT_FOUND;
9430 break;
9431
9432 case DB_END_OF_INDEX:
9433 error = HA_ERR_KEY_NOT_FOUND;
9434 table->status = STATUS_NOT_FOUND;
9435 break;
9436
9437 case DB_TABLESPACE_DELETED:
9438 ib_senderrf(
9439 m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9440 ER_TABLESPACE_DISCARDED,
9441 table->s->table_name.str);
9442
9443 table->status = STATUS_NOT_FOUND;
9444 error = HA_ERR_TABLESPACE_MISSING;
9445 break;
9446
9447 case DB_TABLESPACE_NOT_FOUND:
9448
9449 ib_senderrf(
9450 m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9451 ER_TABLESPACE_MISSING,
9452 table->s->table_name.str);
9453
9454 table->status = STATUS_NOT_FOUND;
9455 error = HA_ERR_TABLESPACE_MISSING;
9456 break;
9457
9458 default:
9459 error = convert_error_code_to_mysql(
9460 ret, m_prebuilt->table->flags, m_user_thd);
9461
9462 table->status = STATUS_NOT_FOUND;
9463 break;
9464 }
9465
9466 DBUG_RETURN(error);
9467 }
9468
9469 /*******************************************************************//**
9470 The following functions works like index_read, but it find the last
9471 row with the current key value or prefix.
9472 @return 0, HA_ERR_KEY_NOT_FOUND, or an error code */
9473
9474 int
index_read_last(uchar * buf,const uchar * key_ptr,uint key_len)9475 ha_innobase::index_read_last(
9476 /*=========================*/
9477 uchar* buf, /*!< out: fetched row */
9478 const uchar* key_ptr,/*!< in: key value, or a prefix of a full
9479 key value */
9480 uint key_len)/*!< in: length of the key val or prefix
9481 in bytes */
9482 {
9483 return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
9484 }
9485
9486 /********************************************************************//**
9487 Get the index for a handle. Does not change active index.
9488 @return NULL or index instance. */
9489
9490 dict_index_t*
innobase_get_index(uint keynr)9491 ha_innobase::innobase_get_index(
9492 /*============================*/
9493 uint keynr) /*!< in: use this index; MAX_KEY means always
9494 clustered index, even if it was internally
9495 generated by InnoDB */
9496 {
9497 KEY* key = NULL;
9498 dict_table_t* ib_table = m_prebuilt->table;
9499 dict_index_t* index;
9500
9501 DBUG_ENTER("innobase_get_index");
9502
9503 if (keynr != MAX_KEY && table->s->keys > 0) {
9504 key = &table->key_info[keynr];
9505 index = dict_table_get_index_on_name(ib_table, key->name.str);
9506 } else {
9507 index = dict_table_get_first_index(ib_table);
9508 }
9509
9510 if (index == NULL) {
9511 sql_print_error(
9512 "InnoDB could not find key no %u with name %s"
9513 " from dict cache for table %s",
9514 keynr, key ? key->name.str : "NULL",
9515 ib_table->name.m_name);
9516 }
9517
9518 DBUG_RETURN(index);
9519 }
9520
9521 /********************************************************************//**
9522 Changes the active index of a handle.
9523 @return 0 or error code */
9524
9525 int
change_active_index(uint keynr)9526 ha_innobase::change_active_index(
9527 /*=============================*/
9528 uint keynr) /*!< in: use this index; MAX_KEY means always clustered
9529 index, even if it was internally generated by
9530 InnoDB */
9531 {
9532 DBUG_ENTER("change_active_index");
9533
9534 ut_ad(m_user_thd == ha_thd());
9535 ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd));
9536
9537 active_index = keynr;
9538
9539 m_prebuilt->index = innobase_get_index(keynr);
9540
9541 if (m_prebuilt->index == NULL) {
9542 sql_print_warning("InnoDB: change_active_index(%u) failed",
9543 keynr);
9544 m_prebuilt->index_usable = FALSE;
9545 DBUG_RETURN(1);
9546 }
9547
9548 m_prebuilt->index_usable = row_merge_is_index_usable(
9549 m_prebuilt->trx, m_prebuilt->index);
9550
9551 if (!m_prebuilt->index_usable) {
9552 if (m_prebuilt->index->is_corrupted()) {
9553 char table_name[MAX_FULL_NAME_LEN + 1];
9554
9555 innobase_format_name(
9556 table_name, sizeof table_name,
9557 m_prebuilt->index->table->name.m_name);
9558
9559 if (m_prebuilt->index->is_primary()) {
9560 ut_ad(m_prebuilt->index->table->corrupted);
9561 push_warning_printf(
9562 m_user_thd, Sql_condition::WARN_LEVEL_WARN,
9563 ER_TABLE_CORRUPT,
9564 "InnoDB: Table %s is corrupted.",
9565 table_name);
9566 DBUG_RETURN(ER_TABLE_CORRUPT);
9567 } else {
9568 push_warning_printf(
9569 m_user_thd, Sql_condition::WARN_LEVEL_WARN,
9570 HA_ERR_INDEX_CORRUPT,
9571 "InnoDB: Index %s for table %s is"
9572 " marked as corrupted",
9573 m_prebuilt->index->name(),
9574 table_name);
9575 DBUG_RETURN(HA_ERR_INDEX_CORRUPT);
9576 }
9577 } else {
9578 push_warning_printf(
9579 m_user_thd, Sql_condition::WARN_LEVEL_WARN,
9580 HA_ERR_TABLE_DEF_CHANGED,
9581 "InnoDB: insufficient history for index %u",
9582 keynr);
9583 }
9584
9585 /* The caller seems to ignore this. Thus, we must check
9586 this again in row_search_for_mysql(). */
9587 DBUG_RETURN(convert_error_code_to_mysql(DB_MISSING_HISTORY,
9588 0, NULL));
9589 }
9590
9591 ut_a(m_prebuilt->search_tuple != 0);
9592
9593 /* Initialization of search_tuple is not needed for FT index
9594 since FT search returns rank only. In addition engine should
9595 be able to retrieve FTS_DOC_ID column value if necessary. */
9596 if (m_prebuilt->index->type & DICT_FTS) {
9597 for (uint i = 0; i < table->s->fields; i++) {
9598 if (m_prebuilt->read_just_key
9599 && bitmap_is_set(table->read_set, i)
9600 && !strcmp(table->s->field[i]->field_name.str,
9601 FTS_DOC_ID_COL_NAME)) {
9602 m_prebuilt->fts_doc_id_in_read_set = true;
9603 break;
9604 }
9605 }
9606 } else {
9607 dtuple_set_n_fields(m_prebuilt->search_tuple,
9608 m_prebuilt->index->n_fields);
9609
9610 dict_index_copy_types(
9611 m_prebuilt->search_tuple, m_prebuilt->index,
9612 m_prebuilt->index->n_fields);
9613
9614 /* If it's FTS query and FTS_DOC_ID exists FTS_DOC_ID field is
9615 always added to read_set. */
9616 m_prebuilt->fts_doc_id_in_read_set = m_prebuilt->in_fts_query
9617 && m_prebuilt->read_just_key
9618 && dict_index_contains_col_or_prefix(
9619 m_prebuilt->index,
9620 m_prebuilt->table->fts->doc_col,
9621 false);
9622 }
9623
9624 /* MySQL changes the active index for a handle also during some
9625 queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
9626 and then calculates the sum. Previously we played safe and used
9627 the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
9628 copying. Starting from MySQL-4.1 we use a more efficient flag here. */
9629
9630 build_template(false);
9631
9632 DBUG_RETURN(0);
9633 }
9634
9635 /***********************************************************************//**
9636 Reads the next or previous row from a cursor, which must have previously been
9637 positioned using index_read.
9638 @return 0, HA_ERR_END_OF_FILE, or error number */
9639
9640 int
general_fetch(uchar * buf,uint direction,uint match_mode)9641 ha_innobase::general_fetch(
9642 /*=======================*/
9643 uchar* buf, /*!< in/out: buffer for next row in MySQL
9644 format */
9645 uint direction, /*!< in: ROW_SEL_NEXT or ROW_SEL_PREV */
9646 uint match_mode) /*!< in: 0, ROW_SEL_EXACT, or
9647 ROW_SEL_EXACT_PREFIX */
9648 {
9649 DBUG_ENTER("general_fetch");
9650
9651 const trx_t* trx = m_prebuilt->trx;
9652
9653 ut_ad(trx == thd_to_trx(m_user_thd));
9654
9655 if (m_prebuilt->table->is_readable()) {
9656 } else if (m_prebuilt->table->corrupted) {
9657 DBUG_RETURN(HA_ERR_CRASHED);
9658 } else {
9659 DBUG_RETURN(m_prebuilt->table->space
9660 ? HA_ERR_DECRYPTION_FAILED
9661 : HA_ERR_NO_SUCH_TABLE);
9662 }
9663
9664 innobase_srv_conc_enter_innodb(m_prebuilt);
9665
9666 dberr_t ret = row_search_mvcc(
9667 buf, PAGE_CUR_UNSUPP, m_prebuilt, match_mode, direction);
9668
9669 innobase_srv_conc_exit_innodb(m_prebuilt);
9670
9671 int error;
9672
9673 switch (ret) {
9674 case DB_SUCCESS:
9675 error = 0;
9676 table->status = 0;
9677 if (m_prebuilt->table->is_system_db) {
9678 srv_stats.n_system_rows_read.add(
9679 thd_get_thread_id(trx->mysql_thd), 1);
9680 } else {
9681 srv_stats.n_rows_read.add(
9682 thd_get_thread_id(trx->mysql_thd), 1);
9683 }
9684 break;
9685 case DB_RECORD_NOT_FOUND:
9686 error = HA_ERR_END_OF_FILE;
9687 table->status = STATUS_NOT_FOUND;
9688 break;
9689 case DB_END_OF_INDEX:
9690 error = HA_ERR_END_OF_FILE;
9691 table->status = STATUS_NOT_FOUND;
9692 break;
9693 case DB_TABLESPACE_DELETED:
9694 ib_senderrf(
9695 trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9696 ER_TABLESPACE_DISCARDED,
9697 table->s->table_name.str);
9698
9699 table->status = STATUS_NOT_FOUND;
9700 error = HA_ERR_TABLESPACE_MISSING;
9701 break;
9702 case DB_TABLESPACE_NOT_FOUND:
9703
9704 ib_senderrf(
9705 trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9706 ER_TABLESPACE_MISSING,
9707 table->s->table_name.str);
9708
9709 table->status = STATUS_NOT_FOUND;
9710 error = HA_ERR_TABLESPACE_MISSING;
9711 break;
9712 default:
9713 error = convert_error_code_to_mysql(
9714 ret, m_prebuilt->table->flags, m_user_thd);
9715
9716 table->status = STATUS_NOT_FOUND;
9717 break;
9718 }
9719
9720 DBUG_RETURN(error);
9721 }
9722
9723 /***********************************************************************//**
9724 Reads the next row from a cursor, which must have previously been
9725 positioned using index_read.
9726 @return 0, HA_ERR_END_OF_FILE, or error number */
9727
9728 int
index_next(uchar * buf)9729 ha_innobase::index_next(
9730 /*====================*/
9731 uchar* buf) /*!< in/out: buffer for next row in MySQL
9732 format */
9733 {
9734 return(general_fetch(buf, ROW_SEL_NEXT, 0));
9735 }
9736
9737 /*******************************************************************//**
9738 Reads the next row matching to the key value given as the parameter.
9739 @return 0, HA_ERR_END_OF_FILE, or error number */
9740
9741 int
index_next_same(uchar * buf,const uchar *,uint)9742 ha_innobase::index_next_same(
9743 /*=========================*/
9744 uchar* buf, /*!< in/out: buffer for the row */
9745 const uchar*, uint)
9746 {
9747 return(general_fetch(buf, ROW_SEL_NEXT, m_last_match_mode));
9748 }
9749
9750 /***********************************************************************//**
9751 Reads the previous row from a cursor, which must have previously been
9752 positioned using index_read.
9753 @return 0, HA_ERR_END_OF_FILE, or error number */
9754
9755 int
index_prev(uchar * buf)9756 ha_innobase::index_prev(
9757 /*====================*/
9758 uchar* buf) /*!< in/out: buffer for previous row in MySQL format */
9759 {
9760 return(general_fetch(buf, ROW_SEL_PREV, 0));
9761 }
9762
9763 /********************************************************************//**
9764 Positions a cursor on the first record in an index and reads the
9765 corresponding row to buf.
9766 @return 0, HA_ERR_END_OF_FILE, or error code */
9767
9768 int
index_first(uchar * buf)9769 ha_innobase::index_first(
9770 /*=====================*/
9771 uchar* buf) /*!< in/out: buffer for the row */
9772 {
9773 DBUG_ENTER("index_first");
9774
9775 int error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
9776
9777 /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
9778
9779 if (error == HA_ERR_KEY_NOT_FOUND) {
9780 error = HA_ERR_END_OF_FILE;
9781 }
9782
9783 DBUG_RETURN(error);
9784 }
9785
9786 /********************************************************************//**
9787 Positions a cursor on the last record in an index and reads the
9788 corresponding row to buf.
9789 @return 0, HA_ERR_END_OF_FILE, or error code */
9790
9791 int
index_last(uchar * buf)9792 ha_innobase::index_last(
9793 /*====================*/
9794 uchar* buf) /*!< in/out: buffer for the row */
9795 {
9796 DBUG_ENTER("index_last");
9797
9798 int error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
9799
9800 /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
9801
9802 if (error == HA_ERR_KEY_NOT_FOUND) {
9803 error = HA_ERR_END_OF_FILE;
9804 }
9805
9806 DBUG_RETURN(error);
9807 }
9808
9809 /****************************************************************//**
9810 Initialize a table scan.
9811 @return 0 or error number */
9812
9813 int
rnd_init(bool scan)9814 ha_innobase::rnd_init(
9815 /*==================*/
9816 bool scan) /*!< in: true if table/index scan FALSE otherwise */
9817 {
9818 int err;
9819
9820 /* Store the active index value so that we can restore the original
9821 value after a scan */
9822
9823 if (m_prebuilt->clust_index_was_generated) {
9824 err = change_active_index(MAX_KEY);
9825 } else {
9826 err = change_active_index(m_primary_key);
9827 }
9828
9829 /* Don't use semi-consistent read in random row reads (by position).
9830 This means we must disable semi_consistent_read if scan is false */
9831
9832 if (!scan) {
9833 try_semi_consistent_read(0);
9834 }
9835
9836 m_start_of_scan = true;
9837
9838 return(err);
9839 }
9840
9841 /*****************************************************************//**
9842 Ends a table scan.
9843 @return 0 or error number */
9844
9845 int
rnd_end(void)9846 ha_innobase::rnd_end(void)
9847 /*======================*/
9848 {
9849 return(index_end());
9850 }
9851
9852 /*****************************************************************//**
9853 Reads the next row in a table scan (also used to read the FIRST row
9854 in a table scan).
9855 @return 0, HA_ERR_END_OF_FILE, or error number */
9856
9857 int
rnd_next(uchar * buf)9858 ha_innobase::rnd_next(
9859 /*==================*/
9860 uchar* buf) /*!< in/out: returns the row in this buffer,
9861 in MySQL format */
9862 {
9863 int error;
9864
9865 DBUG_ENTER("rnd_next");
9866
9867 if (m_start_of_scan) {
9868 error = index_first(buf);
9869
9870 if (error == HA_ERR_KEY_NOT_FOUND) {
9871 error = HA_ERR_END_OF_FILE;
9872 }
9873
9874 m_start_of_scan = false;
9875 } else {
9876 error = general_fetch(buf, ROW_SEL_NEXT, 0);
9877 }
9878
9879 DBUG_RETURN(error);
9880 }
9881
9882 /**********************************************************************//**
9883 Fetches a row from the table based on a row reference.
9884 @return 0, HA_ERR_KEY_NOT_FOUND, or error code */
9885
9886 int
rnd_pos(uchar * buf,uchar * pos)9887 ha_innobase::rnd_pos(
9888 /*=================*/
9889 uchar* buf, /*!< in/out: buffer for the row */
9890 uchar* pos) /*!< in: primary key value of the row in the
9891 MySQL format, or the row id if the clustered
9892 index was internally generated by InnoDB; the
9893 length of data in pos has to be ref_length */
9894 {
9895 DBUG_ENTER("rnd_pos");
9896 DBUG_DUMP("key", pos, ref_length);
9897
9898 ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
9899
9900 /* Note that we assume the length of the row reference is fixed
9901 for the table, and it is == ref_length */
9902
9903 int error = index_read(buf, pos, (uint)ref_length, HA_READ_KEY_EXACT);
9904
9905 if (error != 0) {
9906 DBUG_PRINT("error", ("Got error: %d", error));
9907 }
9908
9909 DBUG_RETURN(error);
9910 }
9911
9912 /**********************************************************************//**
9913 Initialize FT index scan
9914 @return 0 or error number */
9915
9916 int
ft_init()9917 ha_innobase::ft_init()
9918 /*==================*/
9919 {
9920 DBUG_ENTER("ft_init");
9921
9922 trx_t* trx = check_trx_exists(ha_thd());
9923
9924 /* FTS queries are not treated as autocommit non-locking selects.
9925 This is because the FTS implementation can acquire locks behind
9926 the scenes. This has not been verified but it is safer to treat
9927 them as regular read only transactions for now. */
9928
9929 if (!trx_is_started(trx)) {
9930 trx->will_lock = true;
9931 }
9932
9933 DBUG_RETURN(rnd_init(false));
9934 }
9935
9936 /**********************************************************************//**
9937 Initialize FT index scan
9938 @return FT_INFO structure if successful or NULL */
9939
9940 FT_INFO*
ft_init_ext(uint flags,uint keynr,String * key)9941 ha_innobase::ft_init_ext(
9942 /*=====================*/
9943 uint flags, /* in: */
9944 uint keynr, /* in: */
9945 String* key) /* in: */
9946 {
9947 NEW_FT_INFO* fts_hdl = NULL;
9948 dict_index_t* index;
9949 fts_result_t* result;
9950 char buf_tmp[8192];
9951 ulint buf_tmp_used;
9952 uint num_errors;
9953 ulint query_len = key->length();
9954 const CHARSET_INFO* char_set = key->charset();
9955 const char* query = key->ptr();
9956
9957 if (UNIV_UNLIKELY(fts_enable_diag_print)) {
9958 {
9959 ib::info out;
9960 out << "keynr=" << keynr << ", '";
9961 out.write(key->ptr(), key->length());
9962 }
9963
9964 if (flags & FT_BOOL) {
9965 ib::info() << "BOOL search";
9966 } else {
9967 ib::info() << "NL search";
9968 }
9969 }
9970
9971 /* FIXME: utf32 and utf16 are not compatible with some
9972 string function used. So to convert them to uft8 before
9973 we proceed. */
9974 if (strcmp(char_set->csname, "utf32") == 0
9975 || strcmp(char_set->csname, "utf16") == 0) {
9976
9977 buf_tmp_used = innobase_convert_string(
9978 buf_tmp, sizeof(buf_tmp) - 1,
9979 &my_charset_utf8_general_ci,
9980 query, query_len, (CHARSET_INFO*) char_set,
9981 &num_errors);
9982
9983 buf_tmp[buf_tmp_used] = 0;
9984 query = buf_tmp;
9985 query_len = buf_tmp_used;
9986 }
9987
9988 trx_t* trx = m_prebuilt->trx;
9989
9990 /* FTS queries are not treated as autocommit non-locking selects.
9991 This is because the FTS implementation can acquire locks behind
9992 the scenes. This has not been verified but it is safer to treat
9993 them as regular read only transactions for now. */
9994
9995 if (!trx_is_started(trx)) {
9996 trx->will_lock = true;
9997 }
9998
9999 dict_table_t* ft_table = m_prebuilt->table;
10000
10001 /* Table does not have an FTS index */
10002 if (!ft_table->fts || ib_vector_is_empty(ft_table->fts->indexes)) {
10003 my_error(ER_TABLE_HAS_NO_FT, MYF(0));
10004 return(NULL);
10005 }
10006
10007 /* If tablespace is discarded, we should return here */
10008 if (!ft_table->space) {
10009 my_error(ER_TABLESPACE_MISSING, MYF(0), table->s->db.str,
10010 table->s->table_name.str);
10011 return(NULL);
10012 }
10013
10014 if (keynr == NO_SUCH_KEY) {
10015 /* FIXME: Investigate the NO_SUCH_KEY usage */
10016 index = reinterpret_cast<dict_index_t*>
10017 (ib_vector_getp(ft_table->fts->indexes, 0));
10018 } else {
10019 index = innobase_get_index(keynr);
10020 }
10021
10022 if (index == NULL || index->type != DICT_FTS) {
10023 my_error(ER_TABLE_HAS_NO_FT, MYF(0));
10024 return(NULL);
10025 }
10026
10027 if (!(ft_table->fts->added_synced)) {
10028 fts_init_index(ft_table, FALSE);
10029
10030 ft_table->fts->added_synced = true;
10031 }
10032
10033 const byte* q = reinterpret_cast<const byte*>(
10034 const_cast<char*>(query));
10035
10036 // FIXME: support ft_init_ext_with_hints(), pass LIMIT
10037 dberr_t error = fts_query(trx, index, flags, q, query_len, &result);
10038
10039 if (error != DB_SUCCESS) {
10040 my_error(convert_error_code_to_mysql(error, 0, NULL), MYF(0));
10041 return(NULL);
10042 }
10043
10044 /* Allocate FTS handler, and instantiate it before return */
10045 fts_hdl = reinterpret_cast<NEW_FT_INFO*>(
10046 my_malloc(/*PSI_INSTRUMENT_ME,*/ sizeof(NEW_FT_INFO), MYF(0)));
10047
10048 fts_hdl->please = const_cast<_ft_vft*>(&ft_vft_result);
10049 fts_hdl->could_you = const_cast<_ft_vft_ext*>(&ft_vft_ext_result);
10050 fts_hdl->ft_prebuilt = m_prebuilt;
10051 fts_hdl->ft_result = result;
10052
10053 /* FIXME: Re-evaluate the condition when Bug 14469540 is resolved */
10054 m_prebuilt->in_fts_query = true;
10055
10056 return(reinterpret_cast<FT_INFO*>(fts_hdl));
10057 }
10058
10059 /*****************************************************************//**
10060 Set up search tuple for a query through FTS_DOC_ID_INDEX on
10061 supplied Doc ID. This is used by MySQL to retrieve the documents
10062 once the search result (Doc IDs) is available */
10063 static
10064 void
innobase_fts_create_doc_id_key(dtuple_t * tuple,const dict_index_t * index,doc_id_t * doc_id)10065 innobase_fts_create_doc_id_key(
10066 /*===========================*/
10067 dtuple_t* tuple, /* in/out: m_prebuilt->search_tuple */
10068 const dict_index_t*
10069 index, /* in: index (FTS_DOC_ID_INDEX) */
10070 doc_id_t* doc_id) /* in/out: doc id to search, value
10071 could be changed to storage format
10072 used for search. */
10073 {
10074 doc_id_t temp_doc_id;
10075 dfield_t* dfield = dtuple_get_nth_field(tuple, 0);
10076
10077 ut_a(dict_index_get_n_unique(index) == 1);
10078
10079 dtuple_set_n_fields(tuple, index->n_fields);
10080 dict_index_copy_types(tuple, index, index->n_fields);
10081
10082 #ifdef UNIV_DEBUG
10083 /* The unique Doc ID field should be an eight-bytes integer */
10084 dict_field_t* field = dict_index_get_nth_field(index, 0);
10085 ut_a(field->col->mtype == DATA_INT);
10086 ut_ad(sizeof(*doc_id) == field->fixed_len);
10087 ut_ad(!strcmp(index->name, FTS_DOC_ID_INDEX_NAME));
10088 #endif /* UNIV_DEBUG */
10089
10090 /* Convert to storage byte order */
10091 mach_write_to_8(reinterpret_cast<byte*>(&temp_doc_id), *doc_id);
10092 *doc_id = temp_doc_id;
10093 dfield_set_data(dfield, doc_id, sizeof(*doc_id));
10094
10095 dtuple_set_n_fields_cmp(tuple, 1);
10096
10097 for (ulint i = 1; i < index->n_fields; i++) {
10098 dfield = dtuple_get_nth_field(tuple, i);
10099 dfield_set_null(dfield);
10100 }
10101 }
10102
10103 /**********************************************************************//**
10104 Fetch next result from the FT result set
10105 @return error code */
10106
10107 int
ft_read(uchar * buf)10108 ha_innobase::ft_read(
10109 /*=================*/
10110 uchar* buf) /*!< in/out: buf contain result row */
10111 {
10112 row_prebuilt_t* ft_prebuilt;
10113
10114 ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(ft_handler)->ft_prebuilt;
10115
10116 ut_a(ft_prebuilt == m_prebuilt);
10117
10118 fts_result_t* result;
10119
10120 result = reinterpret_cast<NEW_FT_INFO*>(ft_handler)->ft_result;
10121
10122 if (result->current == NULL) {
10123 /* This is the case where the FTS query did not
10124 contain and matching documents. */
10125 if (result->rankings_by_id != NULL) {
10126 /* Now that we have the complete result, we
10127 need to sort the document ids on their rank
10128 calculation. */
10129
10130 fts_query_sort_result_on_rank(result);
10131
10132 result->current = const_cast<ib_rbt_node_t*>(
10133 rbt_first(result->rankings_by_rank));
10134 } else {
10135 ut_a(result->current == NULL);
10136 }
10137 } else {
10138 result->current = const_cast<ib_rbt_node_t*>(
10139 rbt_next(result->rankings_by_rank, result->current));
10140 }
10141
10142 next_record:
10143
10144 if (result->current != NULL) {
10145 doc_id_t search_doc_id;
10146 dtuple_t* tuple = m_prebuilt->search_tuple;
10147
10148 /* If we only need information from result we can return
10149 without fetching the table row */
10150 if (ft_prebuilt->read_just_key) {
10151 #ifdef MYSQL_STORE_FTS_DOC_ID
10152 if (m_prebuilt->fts_doc_id_in_read_set) {
10153 fts_ranking_t* ranking;
10154 ranking = rbt_value(fts_ranking_t,
10155 result->current);
10156 innobase_fts_store_docid(
10157 table, ranking->doc_id);
10158 }
10159 #endif
10160 table->status= 0;
10161 return(0);
10162 }
10163
10164 dict_index_t* index;
10165
10166 index = m_prebuilt->table->fts_doc_id_index;
10167
10168 /* Must find the index */
10169 ut_a(index != NULL);
10170
10171 /* Switch to the FTS doc id index */
10172 m_prebuilt->index = index;
10173
10174 fts_ranking_t* ranking = rbt_value(
10175 fts_ranking_t, result->current);
10176
10177 search_doc_id = ranking->doc_id;
10178
10179 /* We pass a pointer of search_doc_id because it will be
10180 converted to storage byte order used in the search
10181 tuple. */
10182 innobase_fts_create_doc_id_key(tuple, index, &search_doc_id);
10183
10184 innobase_srv_conc_enter_innodb(m_prebuilt);
10185
10186 dberr_t ret = row_search_for_mysql(
10187 (byte*) buf, PAGE_CUR_GE, m_prebuilt, ROW_SEL_EXACT, 0);
10188
10189 innobase_srv_conc_exit_innodb(m_prebuilt);
10190
10191 int error;
10192
10193 switch (ret) {
10194 case DB_SUCCESS:
10195 error = 0;
10196 table->status = 0;
10197 break;
10198 case DB_RECORD_NOT_FOUND:
10199 result->current = const_cast<ib_rbt_node_t*>(
10200 rbt_next(result->rankings_by_rank,
10201 result->current));
10202
10203 if (!result->current) {
10204 /* exhaust the result set, should return
10205 HA_ERR_END_OF_FILE just like
10206 ha_innobase::general_fetch() and/or
10207 ha_innobase::index_first() etc. */
10208 error = HA_ERR_END_OF_FILE;
10209 table->status = STATUS_NOT_FOUND;
10210 } else {
10211 goto next_record;
10212 }
10213 break;
10214 case DB_END_OF_INDEX:
10215 error = HA_ERR_END_OF_FILE;
10216 table->status = STATUS_NOT_FOUND;
10217 break;
10218 case DB_TABLESPACE_DELETED:
10219
10220 ib_senderrf(
10221 m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
10222 ER_TABLESPACE_DISCARDED,
10223 table->s->table_name.str);
10224
10225 table->status = STATUS_NOT_FOUND;
10226 error = HA_ERR_TABLESPACE_MISSING;
10227 break;
10228 case DB_TABLESPACE_NOT_FOUND:
10229
10230 ib_senderrf(
10231 m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
10232 ER_TABLESPACE_MISSING,
10233 table->s->table_name.str);
10234
10235 table->status = STATUS_NOT_FOUND;
10236 error = HA_ERR_TABLESPACE_MISSING;
10237 break;
10238 default:
10239 error = convert_error_code_to_mysql(
10240 ret, 0, m_user_thd);
10241
10242 table->status = STATUS_NOT_FOUND;
10243 break;
10244 }
10245
10246 return(error);
10247 }
10248
10249 return(HA_ERR_END_OF_FILE);
10250 }
10251
10252 #ifdef WITH_WSREP
10253 inline
10254 const char*
wsrep_key_type_to_str(wsrep_key_type type)10255 wsrep_key_type_to_str(wsrep_key_type type)
10256 {
10257 switch (type) {
10258 case WSREP_KEY_SHARED:
10259 return "shared";
10260 case WSREP_KEY_SEMI:
10261 return "semi";
10262 case WSREP_KEY_EXCLUSIVE:
10263 return "exclusive";
10264 };
10265 return "unknown";
10266 }
10267
10268 ulint
wsrep_append_foreign_key(trx_t * trx,dict_foreign_t * foreign,const rec_t * rec,dict_index_t * index,ibool referenced,wsrep_key_type key_type)10269 wsrep_append_foreign_key(
10270 /*===========================*/
10271 trx_t* trx, /*!< in: trx */
10272 dict_foreign_t* foreign, /*!< in: foreign key constraint */
10273 const rec_t* rec, /*!<in: clustered index record */
10274 dict_index_t* index, /*!<in: clustered index */
10275 ibool referenced, /*!<in: is check for referenced table */
10276 wsrep_key_type key_type) /*!< in: access type of this key
10277 (shared, exclusive, semi...) */
10278 {
10279 THD* thd = trx->mysql_thd;
10280
10281 if (!trx->is_wsrep() || wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
10282 return DB_SUCCESS;
10283 }
10284
10285 if (!foreign ||
10286 (!foreign->referenced_table && !foreign->foreign_table)) {
10287 WSREP_INFO("FK: %s missing in: %s",
10288 (!foreign ? "constraint" :
10289 (!foreign->referenced_table ?
10290 "referenced table" : "foreign table")),
10291 wsrep_thd_query(thd));
10292 return DB_ERROR;
10293 }
10294
10295 ulint rcode = DB_SUCCESS;
10296 char cache_key[513] = {'\0'};
10297 int cache_key_len=0;
10298 bool const copy = true;
10299
10300 if ( !((referenced) ?
10301 foreign->referenced_table : foreign->foreign_table)) {
10302 WSREP_DEBUG("pulling %s table into cache",
10303 (referenced) ? "referenced" : "foreign");
10304 mutex_enter(&(dict_sys->mutex));
10305
10306 if (referenced) {
10307 foreign->referenced_table =
10308 dict_table_get_low(
10309 foreign->referenced_table_name_lookup);
10310 if (foreign->referenced_table) {
10311 foreign->referenced_index =
10312 dict_foreign_find_index(
10313 foreign->referenced_table, NULL,
10314 foreign->referenced_col_names,
10315 foreign->n_fields,
10316 foreign->foreign_index,
10317 TRUE, FALSE);
10318 }
10319 } else {
10320 foreign->foreign_table =
10321 dict_table_get_low(
10322 foreign->foreign_table_name_lookup);
10323
10324 if (foreign->foreign_table) {
10325 foreign->foreign_index =
10326 dict_foreign_find_index(
10327 foreign->foreign_table, NULL,
10328 foreign->foreign_col_names,
10329 foreign->n_fields,
10330 foreign->referenced_index,
10331 TRUE, FALSE);
10332 }
10333 }
10334 mutex_exit(&(dict_sys->mutex));
10335 }
10336
10337 if ( !((referenced) ?
10338 foreign->referenced_table : foreign->foreign_table)) {
10339 WSREP_WARN("FK: %s missing in query: %s",
10340 (!foreign->referenced_table) ?
10341 "referenced table" : "foreign table",
10342 (wsrep_thd_query(thd)) ?
10343 wsrep_thd_query(thd) : "void");
10344 return DB_ERROR;
10345 }
10346
10347 byte key[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
10348 ulint len = WSREP_MAX_SUPPORTED_KEY_LENGTH;
10349
10350 dict_index_t *idx_target = (referenced) ?
10351 foreign->referenced_index : index;
10352 dict_index_t *idx = (referenced) ?
10353 UT_LIST_GET_FIRST(foreign->referenced_table->indexes) :
10354 UT_LIST_GET_FIRST(foreign->foreign_table->indexes);
10355 int i = 0;
10356
10357 while (idx != NULL && idx != idx_target) {
10358 if (innobase_strcasecmp (idx->name, innobase_index_reserve_name) != 0) {
10359 i++;
10360 }
10361 idx = UT_LIST_GET_NEXT(indexes, idx);
10362 }
10363
10364 ut_a(idx);
10365 key[0] = byte(i);
10366
10367 rcode = wsrep_rec_get_foreign_key(
10368 &key[1], &len, rec, index, idx,
10369 wsrep_protocol_version > 1);
10370
10371 if (rcode != DB_SUCCESS) {
10372 WSREP_ERROR(
10373 "FK key set failed: " ULINTPF
10374 " (" ULINTPF " %s), index: %s %s, %s",
10375 rcode, referenced, wsrep_key_type_to_str(key_type),
10376 index ? index->name() : "void index",
10377 (index && index->table) ? index->table->name.m_name :
10378 "void table",
10379 wsrep_thd_query(thd));
10380 return DB_ERROR;
10381 }
10382
10383 strncpy(cache_key,
10384 (wsrep_protocol_version > 1) ?
10385 ((referenced) ?
10386 foreign->referenced_table->name.m_name :
10387 foreign->foreign_table->name.m_name) :
10388 foreign->foreign_table->name.m_name, sizeof(cache_key) - 1);
10389 cache_key_len = strlen(cache_key);
10390
10391 #ifdef WSREP_DEBUG_PRINT
10392 ulint j;
10393 fprintf(stderr, "FK parent key, table: %s %s len: %lu ",
10394 cache_key, (shared) ? "shared" : "exclusive", len+1);
10395 for (j=0; j<len+1; j++) {
10396 fprintf(stderr, " %hhX, ", key[j]);
10397 }
10398 fprintf(stderr, "\n");
10399 #endif
10400 char *p = strchr(cache_key, '/');
10401
10402 if (p) {
10403 *p = '\0';
10404 } else {
10405 WSREP_WARN("unexpected foreign key table %s %s",
10406 foreign->referenced_table->name.m_name,
10407 foreign->foreign_table->name.m_name);
10408 }
10409
10410 wsrep_buf_t wkey_part[3];
10411 wsrep_key_t wkey = {wkey_part, 3};
10412
10413 if (!wsrep_prepare_key(
10414 (const uchar*)cache_key,
10415 cache_key_len + 1,
10416 (const uchar*)key, len+1,
10417 wkey_part,
10418 (size_t*)&wkey.key_parts_num)) {
10419 WSREP_WARN("key prepare failed for cascaded FK: %s",
10420 (wsrep_thd_query(thd)) ?
10421 wsrep_thd_query(thd) : "void");
10422 return DB_ERROR;
10423 }
10424
10425 wsrep_t *wsrep= get_wsrep();
10426
10427 rcode = (int)wsrep->append_key(
10428 wsrep,
10429 wsrep_ws_handle(thd, trx),
10430 &wkey,
10431 1,
10432 key_type,
10433 copy);
10434
10435 if (rcode) {
10436 DBUG_PRINT("wsrep", ("row key failed: " ULINTPF, rcode));
10437 WSREP_ERROR("Appending cascaded fk row key failed: %s, "
10438 ULINTPF,
10439 (wsrep_thd_query(thd)) ?
10440 wsrep_thd_query(thd) : "void", rcode);
10441 return DB_ERROR;
10442 }
10443
10444 return DB_SUCCESS;
10445 }
10446
10447 static int
wsrep_append_key(THD * thd,trx_t * trx,TABLE_SHARE * table_share,const char * key,uint16_t key_len,wsrep_key_type key_type)10448 wsrep_append_key(
10449 /*=============*/
10450 THD *thd,
10451 trx_t *trx,
10452 TABLE_SHARE *table_share,
10453 const char* key,
10454 uint16_t key_len,
10455 wsrep_key_type key_type /*!< in: access type of this key
10456 (shared, exclusive, semi...) */
10457 )
10458 {
10459 DBUG_ENTER("wsrep_append_key");
10460 bool const copy = true;
10461 #ifdef WSREP_DEBUG_PRINT
10462 fprintf(stderr, "%s conn %ld, trx %llu, keylen %d, table %s\n Query: %s ",
10463 wsrep_key_type_to_str(key_type),
10464 wsrep_thd_thread_id(thd), trx->id, key_len,
10465 table_share->table_name.str, wsrep_thd_query(thd));
10466 for (int i=0; i<key_len; i++) {
10467 fprintf(stderr, "%hhX, ", key[i]);
10468 }
10469 fprintf(stderr, "\n");
10470 #endif
10471 wsrep_buf_t wkey_part[3];
10472 wsrep_key_t wkey = {wkey_part, 3};
10473
10474 if (!wsrep_prepare_key(
10475 (const uchar*)table_share->table_cache_key.str,
10476 table_share->table_cache_key.length,
10477 (const uchar*)key, key_len,
10478 wkey_part,
10479 (size_t*)&wkey.key_parts_num)) {
10480 WSREP_WARN("key prepare failed for: %s",
10481 (wsrep_thd_query(thd)) ?
10482 wsrep_thd_query(thd) : "void");
10483 DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
10484 }
10485
10486 wsrep_t *wsrep= get_wsrep();
10487
10488 int rcode = (int)wsrep->append_key(
10489 wsrep,
10490 wsrep_ws_handle(thd, trx),
10491 &wkey,
10492 1,
10493 key_type,
10494 copy);
10495 if (rcode) {
10496 DBUG_PRINT("wsrep", ("row key failed: %d", rcode));
10497 WSREP_WARN("Appending row key failed: %s, %d",
10498 (wsrep_thd_query(thd)) ?
10499 wsrep_thd_query(thd) : "void", rcode);
10500 DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
10501 }
10502
10503 DBUG_RETURN(0);
10504 }
10505
10506 static bool
referenced_by_foreign_key2(dict_table_t * table,dict_index_t * index)10507 referenced_by_foreign_key2(
10508 /*=======================*/
10509 dict_table_t* table,
10510 dict_index_t* index)
10511 {
10512 ut_ad(table != NULL);
10513 ut_ad(index != NULL);
10514
10515 const dict_foreign_set* fks = &table->referenced_set;
10516
10517 for (dict_foreign_set::const_iterator it = fks->begin();
10518 it != fks->end();
10519 ++it) {
10520 dict_foreign_t* foreign = *it;
10521
10522 if (foreign->referenced_index != index) {
10523 continue;
10524 }
10525 ut_ad(table == foreign->referenced_table);
10526 return true;
10527 }
10528 return false;
10529 }
10530
10531 int
wsrep_append_keys(THD * thd,wsrep_key_type key_type,const uchar * record0,const uchar * record1)10532 ha_innobase::wsrep_append_keys(
10533 /*===========================*/
10534 THD *thd,
10535 wsrep_key_type key_type, /*!< in: access type of this key
10536 (shared, exclusive, semi...) */
10537 const uchar* record0, /* in: row in MySQL format */
10538 const uchar* record1) /* in: row in MySQL format */
10539 {
10540 int rcode;
10541 DBUG_ENTER("wsrep_append_keys");
10542
10543 bool key_appended = false;
10544 trx_t *trx = thd_to_trx(thd);
10545
10546 if (table_share && table_share->tmp_table != NO_TMP_TABLE) {
10547 WSREP_DEBUG("skipping tmp table DML: THD: %lu tmp: %d SQL: %s",
10548 thd_get_thread_id(thd),
10549 table_share->tmp_table,
10550 (wsrep_thd_query(thd)) ?
10551 wsrep_thd_query(thd) : "void");
10552 DBUG_RETURN(0);
10553 }
10554
10555 if (wsrep_protocol_version == 0) {
10556 uint len;
10557 char keyval[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
10558 char *key = &keyval[0];
10559 ibool is_null;
10560
10561 len = wsrep_store_key_val_for_row(
10562 thd, table, 0, key, WSREP_MAX_SUPPORTED_KEY_LENGTH,
10563 record0, &is_null);
10564
10565 if (!is_null) {
10566 rcode = wsrep_append_key(
10567 thd, trx, table_share, keyval,
10568 len, key_type);
10569
10570 if (rcode) DBUG_RETURN(rcode);
10571 } else {
10572 WSREP_DEBUG("NULL key skipped (proto 0): %s",
10573 wsrep_thd_query(thd));
10574 }
10575 } else {
10576 ut_a(table->s->keys <= 256);
10577 uint i;
10578 bool hasPK= false;
10579
10580 for (i=0; i<table->s->keys; ++i) {
10581 KEY* key_info = table->key_info + i;
10582 if (key_info->flags & HA_NOSAME) {
10583 hasPK = true;
10584 }
10585 }
10586
10587 for (i=0; i<table->s->keys; ++i) {
10588 uint len;
10589 char keyval0[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
10590 char keyval1[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
10591 char* key0 = &keyval0[1];
10592 char* key1 = &keyval1[1];
10593 KEY* key_info = table->key_info + i;
10594 ibool is_null;
10595
10596 dict_index_t* idx = innobase_get_index(i);
10597 dict_table_t* tab = (idx) ? idx->table : NULL;
10598
10599 keyval0[0] = (char)i;
10600 keyval1[0] = (char)i;
10601
10602 if (!tab) {
10603 WSREP_WARN("MariaDB-InnoDB key mismatch %s %s",
10604 table->s->table_name.str,
10605 key_info->name.str);
10606 }
10607 /* !hasPK == table with no PK, must append all non-unique keys */
10608 if (!hasPK || key_info->flags & HA_NOSAME ||
10609 ((tab &&
10610 referenced_by_foreign_key2(tab, idx)) ||
10611 (!tab && referenced_by_foreign_key()))) {
10612
10613 len = wsrep_store_key_val_for_row(
10614 thd, table, i, key0,
10615 WSREP_MAX_SUPPORTED_KEY_LENGTH,
10616 record0, &is_null);
10617 if (!is_null) {
10618 rcode = wsrep_append_key(
10619 thd, trx, table_share,
10620 keyval0, len+1, key_type);
10621 if (rcode) DBUG_RETURN(rcode);
10622
10623 if (key_info->flags & HA_NOSAME ||
10624 key_type == WSREP_KEY_SHARED)
10625 key_appended = true;
10626 } else {
10627 WSREP_DEBUG("NULL key skipped: %s",
10628 wsrep_thd_query(thd));
10629 }
10630
10631 if (record1) {
10632 len = wsrep_store_key_val_for_row(
10633 thd, table, i, key1,
10634 WSREP_MAX_SUPPORTED_KEY_LENGTH,
10635 record1, &is_null);
10636
10637 if (!is_null
10638 && memcmp(key0, key1, len)) {
10639 rcode = wsrep_append_key(
10640 thd, trx, table_share,
10641 keyval1, len+1,
10642 key_type);
10643 if (rcode) DBUG_RETURN(rcode);
10644 }
10645 }
10646 }
10647 }
10648 }
10649
10650 /* if no PK, calculate hash of full row, to be the key value */
10651 if (!key_appended && wsrep_certify_nonPK) {
10652 uchar digest[16];
10653
10654 wsrep_calc_row_hash(digest, record0, table, m_prebuilt);
10655
10656 if (int rcode = wsrep_append_key(thd, trx, table_share,
10657 reinterpret_cast<char*>
10658 (digest), 16, key_type)) {
10659 DBUG_RETURN(rcode);
10660 }
10661
10662 if (record1) {
10663 wsrep_calc_row_hash(
10664 digest, record1, table, m_prebuilt);
10665 if (int rcode = wsrep_append_key(
10666 thd, trx, table_share,
10667 reinterpret_cast<char*>(digest), 16,
10668 key_type)) {
10669 DBUG_RETURN(rcode);
10670 }
10671 }
10672 DBUG_RETURN(0);
10673 }
10674
10675 DBUG_RETURN(0);
10676 }
10677 #endif /* WITH_WSREP */
10678
10679 /*********************************************************************//**
10680 Stores a reference to the current row to 'ref' field of the handle. Note
10681 that in the case where we have generated the clustered index for the
10682 table, the function parameter is illogical: we MUST ASSUME that 'record'
10683 is the current 'position' of the handle, because if row ref is actually
10684 the row id internally generated in InnoDB, then 'record' does not contain
10685 it. We just guess that the row id must be for the record where the handle
10686 was positioned the last time. */
10687
10688 void
position(const uchar * record)10689 ha_innobase::position(
10690 /*==================*/
10691 const uchar* record) /*!< in: row in MySQL format */
10692 {
10693 uint len;
10694
10695 ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
10696
10697 if (m_prebuilt->clust_index_was_generated) {
10698 /* No primary key was defined for the table and we
10699 generated the clustered index from row id: the
10700 row reference will be the row id, not any key value
10701 that MySQL knows of */
10702
10703 len = DATA_ROW_ID_LEN;
10704
10705 memcpy(ref, m_prebuilt->row_id, len);
10706 } else {
10707
10708 /* Copy primary key as the row reference */
10709 KEY* key_info = table->key_info + m_primary_key;
10710 key_copy(ref, (uchar*)record, key_info, key_info->key_length);
10711 len = key_info->key_length;
10712 }
10713
10714 ut_ad(len == ref_length);
10715 }
10716
10717 /*****************************************************************//**
10718 Check whether there exist a column named as "FTS_DOC_ID", which is
10719 reserved for InnoDB FTS Doc ID
10720 @return true if there exist a "FTS_DOC_ID" column */
10721 static
10722 bool
create_table_check_doc_id_col(trx_t * trx,const TABLE * form,ulint * doc_id_col)10723 create_table_check_doc_id_col(
10724 /*==========================*/
10725 trx_t* trx, /*!< in: InnoDB transaction handle */
10726 const TABLE* form, /*!< in: information on table
10727 columns and indexes */
10728 ulint* doc_id_col) /*!< out: Doc ID column number if
10729 there exist a FTS_DOC_ID column,
10730 ULINT_UNDEFINED if column is of the
10731 wrong type/name/size */
10732 {
10733 for (ulint i = 0; i < form->s->fields; i++) {
10734 const Field* field;
10735 ulint col_type;
10736 ulint col_len;
10737 ulint unsigned_type;
10738
10739 field = form->field[i];
10740 if (!field->stored_in_db()) {
10741 continue;
10742 }
10743
10744 col_type = get_innobase_type_from_mysql_type(
10745 &unsigned_type, field);
10746
10747 col_len = field->pack_length();
10748
10749 if (innobase_strcasecmp(field->field_name.str,
10750 FTS_DOC_ID_COL_NAME) == 0) {
10751
10752 /* Note the name is case sensitive due to
10753 our internal query parser */
10754 if (col_type == DATA_INT
10755 && !field->real_maybe_null()
10756 && col_len == sizeof(doc_id_t)
10757 && (strcmp(field->field_name.str,
10758 FTS_DOC_ID_COL_NAME) == 0)) {
10759 *doc_id_col = i;
10760 } else {
10761 push_warning_printf(
10762 trx->mysql_thd,
10763 Sql_condition::WARN_LEVEL_WARN,
10764 ER_ILLEGAL_HA_CREATE_OPTION,
10765 "InnoDB: FTS_DOC_ID column must be"
10766 " of BIGINT NOT NULL type, and named"
10767 " in all capitalized characters");
10768 my_error(ER_WRONG_COLUMN_NAME, MYF(0),
10769 field->field_name.str);
10770 *doc_id_col = ULINT_UNDEFINED;
10771 }
10772
10773 return(true);
10774 }
10775 }
10776
10777 return(false);
10778 }
10779
10780
10781 /** Finds all base columns needed to compute a given generated column.
10782 This is returned as a bitmap, in field->table->tmp_set.
10783 Works for both dict_v_col_t and dict_s_col_t columns.
10784 @param[in] table InnoDB table
10785 @param[in] field MySQL field
10786 @param[in,out] col virtual or stored column */
10787 template <typename T>
10788 void
prepare_vcol_for_base_setup(const dict_table_t * table,const Field * field,T * col)10789 prepare_vcol_for_base_setup(
10790 /*========================*/
10791 const dict_table_t* table,
10792 const Field* field,
10793 T* col)
10794 {
10795 ut_ad(col->num_base == 0);
10796 ut_ad(col->base_col == NULL);
10797
10798 MY_BITMAP *old_read_set = field->table->read_set;
10799 MY_BITMAP *old_vcol_set = field->table->vcol_set;
10800
10801 field->table->read_set = field->table->vcol_set = &field->table->tmp_set;
10802
10803 bitmap_clear_all(&field->table->tmp_set);
10804 field->vcol_info->expr->walk(
10805 &Item::register_field_in_read_map, 1, field->table);
10806 col->num_base= bitmap_bits_set(&field->table->tmp_set);
10807 if (col->num_base != 0) {
10808 col->base_col = static_cast<dict_col_t**>(mem_heap_zalloc(
10809 table->heap, col->num_base * sizeof(
10810 * col->base_col)));
10811 }
10812 field->table->read_set= old_read_set;
10813 field->table->vcol_set= old_vcol_set;
10814 }
10815
10816
10817 /** Set up base columns for virtual column
10818 @param[in] table InnoDB table
10819 @param[in] field MySQL field
10820 @param[in,out] v_col virtual column */
10821 void
innodb_base_col_setup(dict_table_t * table,const Field * field,dict_v_col_t * v_col)10822 innodb_base_col_setup(
10823 dict_table_t* table,
10824 const Field* field,
10825 dict_v_col_t* v_col)
10826 {
10827 ulint n = 0;
10828
10829 prepare_vcol_for_base_setup(table, field, v_col);
10830
10831 for (uint i= 0; i < field->table->s->fields; ++i) {
10832 const Field* base_field = field->table->field[i];
10833 if (base_field->stored_in_db()
10834 && bitmap_is_set(&field->table->tmp_set, i)) {
10835 ulint z;
10836
10837 for (z = 0; z < table->n_cols; z++) {
10838 const char* name = dict_table_get_col_name(table, z);
10839 if (!innobase_strcasecmp(name,
10840 base_field->field_name.str)) {
10841 break;
10842 }
10843 }
10844
10845 ut_ad(z != table->n_cols);
10846
10847 v_col->base_col[n] = dict_table_get_nth_col(table, z);
10848 ut_ad(v_col->base_col[n]->ind == z);
10849 n++;
10850 }
10851 }
10852 v_col->num_base= n;
10853 }
10854
10855 /** Set up base columns for stored column
10856 @param[in] table InnoDB table
10857 @param[in] field MySQL field
10858 @param[in,out] s_col stored column */
10859 void
innodb_base_col_setup_for_stored(const dict_table_t * table,const Field * field,dict_s_col_t * s_col)10860 innodb_base_col_setup_for_stored(
10861 const dict_table_t* table,
10862 const Field* field,
10863 dict_s_col_t* s_col)
10864 {
10865 ulint n = 0;
10866
10867 prepare_vcol_for_base_setup(table, field, s_col);
10868
10869 for (uint i= 0; i < field->table->s->fields; ++i) {
10870 const Field* base_field = field->table->field[i];
10871
10872 if (base_field->stored_in_db()
10873 && bitmap_is_set(&field->table->tmp_set, i)) {
10874 ulint z;
10875 for (z = 0; z < table->n_cols; z++) {
10876 const char* name = dict_table_get_col_name(
10877 table, z);
10878 if (!innobase_strcasecmp(
10879 name, base_field->field_name.str)) {
10880 break;
10881 }
10882 }
10883
10884 ut_ad(z != table->n_cols);
10885
10886 s_col->base_col[n] = dict_table_get_nth_col(table, z);
10887 n++;
10888
10889 if (n == s_col->num_base) {
10890 break;
10891 }
10892 }
10893 }
10894 s_col->num_base= n;
10895 }
10896
10897 /** Create a table definition to an InnoDB database.
10898 @return ER_* level error */
10899 inline MY_ATTRIBUTE((warn_unused_result))
10900 int
create_table_def()10901 create_table_info_t::create_table_def()
10902 {
10903 dict_table_t* table;
10904 ulint col_type;
10905 ulint col_len;
10906 ulint nulls_allowed;
10907 ulint unsigned_type;
10908 ulint binary_type;
10909 ulint long_true_varchar;
10910 ulint charset_no;
10911 ulint doc_id_col = 0;
10912 ibool has_doc_id_col = FALSE;
10913 mem_heap_t* heap;
10914 ha_table_option_struct *options= m_form->s->option_struct;
10915 dberr_t err = DB_SUCCESS;
10916
10917 DBUG_ENTER("create_table_def");
10918 DBUG_PRINT("enter", ("table_name: %s", m_table_name));
10919
10920 DBUG_ASSERT(m_trx->mysql_thd == m_thd);
10921 DBUG_ASSERT(!m_drop_before_rollback);
10922
10923 /* MySQL does the name length check. But we do additional check
10924 on the name length here */
10925 const size_t table_name_len = strlen(m_table_name);
10926 if (table_name_len > MAX_FULL_NAME_LEN) {
10927 push_warning_printf(
10928 m_thd, Sql_condition::WARN_LEVEL_WARN,
10929 ER_TABLE_NAME,
10930 "InnoDB: Table Name or Database Name is too long");
10931
10932 DBUG_RETURN(ER_TABLE_NAME);
10933 }
10934
10935 if (m_table_name[table_name_len - 1] == '/') {
10936 push_warning_printf(
10937 m_thd, Sql_condition::WARN_LEVEL_WARN,
10938 ER_TABLE_NAME,
10939 "InnoDB: Table name is empty");
10940
10941 DBUG_RETURN(ER_WRONG_TABLE_NAME);
10942 }
10943
10944 /* Find out the number of virtual columns. */
10945 ulint num_v = 0;
10946 const bool omit_virtual = ha_innobase::omits_virtual_cols(*m_form->s);
10947 const ulint n_cols = omit_virtual
10948 ? m_form->s->stored_fields : m_form->s->fields;
10949
10950 if (!omit_virtual) {
10951 for (ulint i = 0; i < n_cols; i++) {
10952 num_v += !m_form->field[i]->stored_in_db();
10953 }
10954 }
10955
10956 /* Check whether there already exists a FTS_DOC_ID column */
10957 if (create_table_check_doc_id_col(m_trx, m_form, &doc_id_col)){
10958
10959 /* Raise error if the Doc ID column is of wrong type or name */
10960 if (doc_id_col == ULINT_UNDEFINED) {
10961 DBUG_RETURN(HA_ERR_GENERIC);
10962 } else {
10963 has_doc_id_col = TRUE;
10964 }
10965 }
10966
10967 /* Adjust the number of columns for the FTS hidden field */
10968 const ulint actual_n_cols = n_cols
10969 + (m_flags2 & DICT_TF2_FTS && !has_doc_id_col);
10970
10971 table = dict_mem_table_create(m_table_name, NULL,
10972 actual_n_cols, num_v, m_flags, m_flags2);
10973
10974 /* Set the hidden doc_id column. */
10975 if (m_flags2 & DICT_TF2_FTS) {
10976 table->fts->doc_col = has_doc_id_col
10977 ? doc_id_col : n_cols - num_v;
10978 }
10979
10980 if (DICT_TF_HAS_DATA_DIR(m_flags)) {
10981 ut_a(strlen(m_remote_path));
10982
10983 table->data_dir_path = mem_heap_strdup(
10984 table->heap, m_remote_path);
10985
10986 } else {
10987 table->data_dir_path = NULL;
10988 }
10989
10990 heap = mem_heap_create(1000);
10991
10992 ut_d(bool have_vers_start = false);
10993 ut_d(bool have_vers_end = false);
10994
10995 for (ulint i = 0, j = 0; j < n_cols; i++) {
10996 Field* field = m_form->field[i];
10997 ulint vers_row = 0;
10998
10999 if (m_form->versioned()) {
11000 if (i == m_form->s->row_start_field) {
11001 vers_row = DATA_VERS_START;
11002 ut_d(have_vers_start = true);
11003 } else if (i == m_form->s->row_end_field) {
11004 vers_row = DATA_VERS_END;
11005 ut_d(have_vers_end = true);
11006 } else if (!(field->flags
11007 & VERS_UPDATE_UNVERSIONED_FLAG)) {
11008 vers_row = DATA_VERSIONED;
11009 }
11010 }
11011
11012 col_type = get_innobase_type_from_mysql_type(
11013 &unsigned_type, field);
11014
11015 if (!col_type) {
11016 push_warning_printf(
11017 m_thd, Sql_condition::WARN_LEVEL_WARN,
11018 ER_CANT_CREATE_TABLE,
11019 "Error creating table '%s' with"
11020 " column '%s'. Please check its"
11021 " column type and try to re-create"
11022 " the table with an appropriate"
11023 " column type.",
11024 table->name.m_name, field->field_name.str);
11025 goto err_col;
11026 }
11027
11028 nulls_allowed = field->real_maybe_null() ? 0 : DATA_NOT_NULL;
11029 binary_type = field->binary() ? DATA_BINARY_TYPE : 0;
11030
11031 charset_no = 0;
11032
11033 if (dtype_is_string_type(col_type)) {
11034
11035 charset_no = (ulint) field->charset()->number;
11036
11037 DBUG_EXECUTE_IF("simulate_max_char_col",
11038 charset_no = MAX_CHAR_COLL_NUM + 1;
11039 );
11040
11041 if (charset_no > MAX_CHAR_COLL_NUM) {
11042 /* in data0type.h we assume that the
11043 number fits in one byte in prtype */
11044 push_warning_printf(
11045 m_thd, Sql_condition::WARN_LEVEL_WARN,
11046 ER_CANT_CREATE_TABLE,
11047 "In InnoDB, charset-collation codes"
11048 " must be below 256."
11049 " Unsupported code " ULINTPF ".",
11050 charset_no);
11051 mem_heap_free(heap);
11052 dict_mem_table_free(table);
11053
11054 DBUG_RETURN(ER_CANT_CREATE_TABLE);
11055 }
11056 }
11057
11058 col_len = field->pack_length();
11059
11060 /* The MySQL pack length contains 1 or 2 bytes length field
11061 for a true VARCHAR. Let us subtract that, so that the InnoDB
11062 column length in the InnoDB data dictionary is the real
11063 maximum byte length of the actual data. */
11064
11065 long_true_varchar = 0;
11066
11067 if (field->type() == MYSQL_TYPE_VARCHAR) {
11068 col_len -= ((Field_varstring*) field)->length_bytes;
11069
11070 if (((Field_varstring*) field)->length_bytes == 2) {
11071 long_true_varchar = DATA_LONG_TRUE_VARCHAR;
11072 }
11073 }
11074
11075 /* First check whether the column to be added has a
11076 system reserved name. */
11077 if (dict_col_name_is_reserved(field->field_name.str)){
11078 my_error(ER_WRONG_COLUMN_NAME, MYF(0),
11079 field->field_name.str);
11080 err_col:
11081 dict_mem_table_free(table);
11082 mem_heap_free(heap);
11083 ut_ad(trx_state_eq(m_trx, TRX_STATE_NOT_STARTED));
11084 DBUG_RETURN(HA_ERR_GENERIC);
11085 }
11086
11087 ulint is_virtual = !field->stored_in_db() ? DATA_VIRTUAL : 0;
11088
11089 if (!is_virtual) {
11090 dict_mem_table_add_col(table, heap,
11091 field->field_name.str, col_type,
11092 dtype_form_prtype(
11093 (ulint) field->type()
11094 | nulls_allowed | unsigned_type
11095 | binary_type | long_true_varchar
11096 | vers_row,
11097 charset_no),
11098 col_len);
11099 } else if (!omit_virtual) {
11100 dict_mem_table_add_v_col(table, heap,
11101 field->field_name.str, col_type,
11102 dtype_form_prtype(
11103 (ulint) field->type()
11104 | nulls_allowed | unsigned_type
11105 | binary_type | long_true_varchar
11106 | vers_row
11107 | is_virtual,
11108 charset_no),
11109 col_len, i, 0);
11110 }
11111
11112 if (innobase_is_s_fld(field)) {
11113 ut_ad(!is_virtual);
11114 /* Added stored column in m_s_cols list. */
11115 dict_mem_table_add_s_col(
11116 table, 0);
11117 }
11118
11119 if (is_virtual && omit_virtual) {
11120 continue;
11121 }
11122
11123 j++;
11124 }
11125
11126 ut_ad(have_vers_start == have_vers_end);
11127 ut_ad(table->versioned() == have_vers_start);
11128 ut_ad(!table->versioned() || table->vers_start != table->vers_end);
11129
11130 if (num_v) {
11131 for (ulint i = 0, j = 0; i < n_cols; i++) {
11132 dict_v_col_t* v_col;
11133
11134 const Field* field = m_form->field[i];
11135
11136 if (field->stored_in_db()) {
11137 continue;
11138 }
11139
11140 v_col = dict_table_get_nth_v_col(table, j);
11141
11142 j++;
11143
11144 innodb_base_col_setup(table, field, v_col);
11145 }
11146 }
11147
11148 /** Fill base columns for the stored column present in the list. */
11149 if (table->s_cols && table->s_cols->size()) {
11150 for (ulint i = 0; i < n_cols; i++) {
11151 Field* field = m_form->field[i];
11152
11153 if (!innobase_is_s_fld(field)) {
11154 continue;
11155 }
11156
11157 dict_s_col_list::iterator it;
11158 for (it = table->s_cols->begin();
11159 it != table->s_cols->end(); ++it) {
11160 dict_s_col_t s_col = *it;
11161
11162 if (s_col.s_pos == i) {
11163 innodb_base_col_setup_for_stored(
11164 table, field, &s_col);
11165 break;
11166 }
11167 }
11168 }
11169 }
11170
11171 /* Add the FTS doc_id hidden column. */
11172 if (m_flags2 & DICT_TF2_FTS && !has_doc_id_col) {
11173 fts_add_doc_id_column(table, heap);
11174 }
11175
11176 dict_table_add_system_columns(table, heap);
11177
11178 if (table->is_temporary()) {
11179 if ((options->encryption == 1
11180 && !innodb_encrypt_temporary_tables)
11181 || (options->encryption == 2
11182 && innodb_encrypt_temporary_tables)) {
11183 push_warning_printf(m_thd,
11184 Sql_condition::WARN_LEVEL_WARN,
11185 ER_ILLEGAL_HA_CREATE_OPTION,
11186 "Ignoring encryption parameter during "
11187 "temporary table creation.");
11188 }
11189
11190 /* Get a new table ID. FIXME: Make this a private
11191 sequence, not shared with persistent tables! */
11192 dict_table_assign_new_id(table, m_trx);
11193 ut_ad(dict_tf_get_rec_format(table->flags)
11194 != REC_FORMAT_COMPRESSED);
11195 table->space_id = SRV_TMP_SPACE_ID;
11196 table->space = fil_system.temp_space;
11197 table->add_to_cache();
11198 } else {
11199 if (err == DB_SUCCESS) {
11200 err = row_create_table_for_mysql(
11201 table, m_trx,
11202 fil_encryption_t(options->encryption),
11203 uint32_t(options->encryption_key_id));
11204 m_drop_before_rollback = (err == DB_SUCCESS);
11205 }
11206
11207 DBUG_EXECUTE_IF("ib_crash_during_create_for_encryption",
11208 DBUG_SUICIDE(););
11209 }
11210
11211 mem_heap_free(heap);
11212
11213 DBUG_EXECUTE_IF("ib_create_err_tablespace_exist",
11214 err = DB_TABLESPACE_EXISTS;);
11215
11216 switch (err) {
11217 case DB_SUCCESS:
11218 ut_ad(table);
11219 m_table = table;
11220 DBUG_RETURN(0);
11221 default:
11222 break;
11223 case DB_DUPLICATE_KEY:
11224 case DB_TABLESPACE_EXISTS:
11225 char display_name[FN_REFLEN];
11226 char* buf_end = innobase_convert_identifier(
11227 display_name, sizeof(display_name) - 1,
11228 m_table_name, strlen(m_table_name),
11229 m_thd);
11230
11231 *buf_end = '\0';
11232
11233 my_error(err == DB_DUPLICATE_KEY
11234 ? ER_TABLE_EXISTS_ERROR
11235 : ER_TABLESPACE_EXISTS, MYF(0), display_name);
11236 }
11237
11238 DBUG_RETURN(convert_error_code_to_mysql(err, m_flags, m_thd));
11239 }
11240
11241 /*****************************************************************//**
11242 Creates an index in an InnoDB database. */
11243 inline
11244 int
create_index(trx_t * trx,const TABLE * form,dict_table_t * table,uint key_num)11245 create_index(
11246 /*=========*/
11247 trx_t* trx, /*!< in: InnoDB transaction handle */
11248 const TABLE* form, /*!< in: information on table
11249 columns and indexes */
11250 dict_table_t* table, /*!< in,out: table */
11251 uint key_num) /*!< in: index number */
11252 {
11253 dict_index_t* index;
11254 int error;
11255 const KEY* key;
11256 ulint* field_lengths;
11257
11258 DBUG_ENTER("create_index");
11259
11260 key = form->key_info + key_num;
11261
11262 /* Assert that "GEN_CLUST_INDEX" cannot be used as non-primary index */
11263 ut_a(innobase_strcasecmp(key->name.str, innobase_index_reserve_name) != 0);
11264
11265 if (key->flags & (HA_SPATIAL | HA_FULLTEXT)) {
11266 /* Only one of these can be specified at a time. */
11267 ut_ad(~key->flags & (HA_SPATIAL | HA_FULLTEXT));
11268 ut_ad(!(key->flags & HA_NOSAME));
11269 index = dict_mem_index_create(table, key->name.str,
11270 (key->flags & HA_SPATIAL)
11271 ? DICT_SPATIAL : DICT_FTS,
11272 key->user_defined_key_parts);
11273
11274 for (ulint i = 0; i < key->user_defined_key_parts; i++) {
11275 const Field* field = key->key_part[i].field;
11276
11277 /* We do not support special (Fulltext or Spatial)
11278 index on virtual columns */
11279 if (!field->stored_in_db()) {
11280 ut_ad(0);
11281 DBUG_RETURN(HA_ERR_UNSUPPORTED);
11282 }
11283
11284 dict_mem_index_add_field(index, field->field_name.str,
11285 0);
11286 }
11287
11288 DBUG_RETURN(convert_error_code_to_mysql(
11289 row_create_index_for_mysql(
11290 index, trx, NULL),
11291 table->flags, NULL));
11292 }
11293
11294 ulint ind_type = 0;
11295
11296 if (key_num == form->s->primary_key) {
11297 ind_type |= DICT_CLUSTERED;
11298 }
11299
11300 if (key->flags & HA_NOSAME) {
11301 ind_type |= DICT_UNIQUE;
11302 }
11303
11304 field_lengths = (ulint*) my_malloc(//PSI_INSTRUMENT_ME,
11305 key->user_defined_key_parts * sizeof *
11306 field_lengths, MYF(MY_FAE));
11307
11308 /* We pass 0 as the space id, and determine at a lower level the space
11309 id where to store the table */
11310
11311 index = dict_mem_index_create(table, key->name.str,
11312 ind_type, key->user_defined_key_parts);
11313
11314 for (ulint i = 0; i < key->user_defined_key_parts; i++) {
11315 KEY_PART_INFO* key_part = key->key_part + i;
11316 ulint prefix_len;
11317 ulint col_type;
11318 ulint is_unsigned;
11319
11320
11321 /* (The flag HA_PART_KEY_SEG denotes in MySQL a
11322 column prefix field in an index: we only store a
11323 specified number of first bytes of the column to
11324 the index field.) The flag does not seem to be
11325 properly set by MySQL. Let us fall back on testing
11326 the length of the key part versus the column.
11327 We first reach to the table's column; if the index is on a
11328 prefix, key_part->field is not the table's column (it's a
11329 "fake" field forged in open_table_from_share() with length
11330 equal to the length of the prefix); so we have to go to
11331 form->fied. */
11332 Field* field= form->field[key_part->field->field_index];
11333 if (field == NULL)
11334 ut_error;
11335
11336 const char* field_name = key_part->field->field_name.str;
11337
11338 col_type = get_innobase_type_from_mysql_type(
11339 &is_unsigned, key_part->field);
11340
11341 if (DATA_LARGE_MTYPE(col_type)
11342 || (key_part->length < field->pack_length()
11343 && field->type() != MYSQL_TYPE_VARCHAR)
11344 || (field->type() == MYSQL_TYPE_VARCHAR
11345 && key_part->length < field->pack_length()
11346 - ((Field_varstring*) field)->length_bytes)) {
11347
11348 switch (col_type) {
11349 default:
11350 prefix_len = key_part->length;
11351 break;
11352 case DATA_INT:
11353 case DATA_FLOAT:
11354 case DATA_DOUBLE:
11355 case DATA_DECIMAL:
11356 sql_print_error(
11357 "MariaDB is trying to create a column"
11358 " prefix index field, on an"
11359 " inappropriate data type. Table"
11360 " name %s, column name %s.",
11361 form->s->table_name.str,
11362 key_part->field->field_name.str);
11363
11364 prefix_len = 0;
11365 }
11366 } else {
11367 prefix_len = 0;
11368 }
11369
11370 field_lengths[i] = key_part->length;
11371
11372 if (!key_part->field->stored_in_db()) {
11373 index->type |= DICT_VIRTUAL;
11374 }
11375
11376 dict_mem_index_add_field(index, field_name, prefix_len);
11377 }
11378
11379 ut_ad(key->flags & HA_FULLTEXT || !(index->type & DICT_FTS));
11380
11381 /* Even though we've defined max_supported_key_part_length, we
11382 still do our own checking using field_lengths to be absolutely
11383 sure we don't create too long indexes. */
11384 ulint flags = table->flags;
11385
11386 error = convert_error_code_to_mysql(
11387 row_create_index_for_mysql(index, trx, field_lengths),
11388 flags, NULL);
11389
11390 my_free(field_lengths);
11391
11392 DBUG_RETURN(error);
11393 }
11394
11395 /** Return a display name for the row format
11396 @param[in] row_format Row Format
11397 @return row format name */
11398 static
11399 const char*
get_row_format_name(enum row_type row_format)11400 get_row_format_name(
11401 enum row_type row_format)
11402 {
11403 switch (row_format) {
11404 case ROW_TYPE_COMPACT:
11405 return("COMPACT");
11406 case ROW_TYPE_COMPRESSED:
11407 return("COMPRESSED");
11408 case ROW_TYPE_DYNAMIC:
11409 return("DYNAMIC");
11410 case ROW_TYPE_REDUNDANT:
11411 return("REDUNDANT");
11412 case ROW_TYPE_DEFAULT:
11413 return("DEFAULT");
11414 case ROW_TYPE_FIXED:
11415 return("FIXED");
11416 case ROW_TYPE_PAGE:
11417 case ROW_TYPE_NOT_USED:
11418 break;
11419 }
11420 return("NOT USED");
11421 }
11422
11423 /** Validate DATA DIRECTORY option.
11424 @return true if valid, false if not. */
11425 bool
create_option_data_directory_is_valid()11426 create_table_info_t::create_option_data_directory_is_valid()
11427 {
11428 bool is_valid = true;
11429
11430 ut_ad(m_create_info->data_file_name
11431 && m_create_info->data_file_name[0] != '\0');
11432
11433 /* Use DATA DIRECTORY only with file-per-table. */
11434 if (!m_allow_file_per_table) {
11435 push_warning(
11436 m_thd, Sql_condition::WARN_LEVEL_WARN,
11437 ER_ILLEGAL_HA_CREATE_OPTION,
11438 "InnoDB: DATA DIRECTORY requires"
11439 " innodb_file_per_table.");
11440 is_valid = false;
11441 }
11442
11443 /* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
11444 if (m_create_info->options & HA_LEX_CREATE_TMP_TABLE) {
11445 push_warning(
11446 m_thd, Sql_condition::WARN_LEVEL_WARN,
11447 ER_ILLEGAL_HA_CREATE_OPTION,
11448 "InnoDB: DATA DIRECTORY cannot be used"
11449 " for TEMPORARY tables.");
11450 is_valid = false;
11451 }
11452
11453 /* We check for a DATA DIRECTORY mixed with TABLESPACE in
11454 create_option_tablespace_is_valid(), no need to here. */
11455
11456 return(is_valid);
11457 }
11458
11459 /** Validate the create options. Check that the options KEY_BLOCK_SIZE,
11460 ROW_FORMAT, DATA DIRECTORY, TEMPORARY are compatible with
11461 each other and other settings. These CREATE OPTIONS are not validated
11462 here unless innodb_strict_mode is on. With strict mode, this function
11463 will report each problem it finds using a custom message with error
11464 code ER_ILLEGAL_HA_CREATE_OPTION, not its built-in message.
11465 @return NULL if valid, string name of bad option if not. */
11466 const char*
create_options_are_invalid()11467 create_table_info_t::create_options_are_invalid()
11468 {
11469 bool has_key_block_size = (m_create_info->key_block_size != 0);
11470
11471 const char* ret = NULL;
11472 enum row_type row_format = m_create_info->row_type;
11473 const bool is_temp
11474 = m_create_info->options & HA_LEX_CREATE_TMP_TABLE;
11475
11476 ut_ad(m_thd != NULL);
11477
11478 /* If innodb_strict_mode is not set don't do any more validation. */
11479 if (!THDVAR(m_thd, strict_mode)) {
11480 return(NULL);
11481 }
11482
11483 /* Check if a non-zero KEY_BLOCK_SIZE was specified. */
11484 if (has_key_block_size) {
11485 if (is_temp) {
11486 my_error(ER_UNSUPPORT_COMPRESSED_TEMPORARY_TABLE,
11487 MYF(0));
11488 return("KEY_BLOCK_SIZE");
11489 }
11490
11491 switch (m_create_info->key_block_size) {
11492 ulint kbs_max;
11493 case 1:
11494 case 2:
11495 case 4:
11496 case 8:
11497 case 16:
11498 /* The maximum KEY_BLOCK_SIZE (KBS) is
11499 UNIV_PAGE_SIZE_MAX. But if srv_page_size is
11500 smaller than UNIV_PAGE_SIZE_MAX, the maximum
11501 KBS is also smaller. */
11502 kbs_max = ut_min(
11503 1U << (UNIV_PAGE_SSIZE_MAX - 1),
11504 1U << (PAGE_ZIP_SSIZE_MAX - 1));
11505 if (m_create_info->key_block_size > kbs_max) {
11506 push_warning_printf(
11507 m_thd, Sql_condition::WARN_LEVEL_WARN,
11508 ER_ILLEGAL_HA_CREATE_OPTION,
11509 "InnoDB: KEY_BLOCK_SIZE=%ld"
11510 " cannot be larger than %ld.",
11511 m_create_info->key_block_size,
11512 kbs_max);
11513 ret = "KEY_BLOCK_SIZE";
11514 }
11515
11516 /* Valid KEY_BLOCK_SIZE, check its dependencies. */
11517 if (!m_allow_file_per_table) {
11518 push_warning(
11519 m_thd, Sql_condition::WARN_LEVEL_WARN,
11520 ER_ILLEGAL_HA_CREATE_OPTION,
11521 "InnoDB: KEY_BLOCK_SIZE requires"
11522 " innodb_file_per_table.");
11523 ret = "KEY_BLOCK_SIZE";
11524 }
11525 break;
11526 default:
11527 push_warning_printf(
11528 m_thd, Sql_condition::WARN_LEVEL_WARN,
11529 ER_ILLEGAL_HA_CREATE_OPTION,
11530 "InnoDB: invalid KEY_BLOCK_SIZE = %u."
11531 " Valid values are [1, 2, 4, 8, 16]",
11532 (uint) m_create_info->key_block_size);
11533 ret = "KEY_BLOCK_SIZE";
11534 break;
11535 }
11536 }
11537
11538 /* Check for a valid InnoDB ROW_FORMAT specifier and
11539 other incompatibilities. */
11540 switch (row_format) {
11541 case ROW_TYPE_COMPRESSED:
11542 if (is_temp) {
11543 my_error(ER_UNSUPPORT_COMPRESSED_TEMPORARY_TABLE,
11544 MYF(0));
11545 return("ROW_FORMAT");
11546 }
11547 if (!m_allow_file_per_table) {
11548 push_warning_printf(
11549 m_thd, Sql_condition::WARN_LEVEL_WARN,
11550 ER_ILLEGAL_HA_CREATE_OPTION,
11551 "InnoDB: ROW_FORMAT=%s requires"
11552 " innodb_file_per_table.",
11553 get_row_format_name(row_format));
11554 ret = "ROW_FORMAT";
11555 }
11556 break;
11557 case ROW_TYPE_DYNAMIC:
11558 case ROW_TYPE_COMPACT:
11559 case ROW_TYPE_REDUNDANT:
11560 if (has_key_block_size) {
11561 push_warning_printf(
11562 m_thd, Sql_condition::WARN_LEVEL_WARN,
11563 ER_ILLEGAL_HA_CREATE_OPTION,
11564 "InnoDB: cannot specify ROW_FORMAT = %s"
11565 " with KEY_BLOCK_SIZE.",
11566 get_row_format_name(row_format));
11567 ret = "KEY_BLOCK_SIZE";
11568 }
11569 break;
11570 case ROW_TYPE_DEFAULT:
11571 break;
11572 case ROW_TYPE_FIXED:
11573 case ROW_TYPE_PAGE:
11574 case ROW_TYPE_NOT_USED:
11575 push_warning(
11576 m_thd, Sql_condition::WARN_LEVEL_WARN,
11577 ER_ILLEGAL_HA_CREATE_OPTION,
11578 "InnoDB: invalid ROW_FORMAT specifier.");
11579 ret = "ROW_TYPE";
11580 break;
11581 }
11582
11583 if (!m_create_info->data_file_name
11584 || !m_create_info->data_file_name[0]) {
11585 } else if (!my_use_symdir) {
11586 my_error(WARN_OPTION_IGNORED, MYF(ME_JUST_WARNING),
11587 "DATA DIRECTORY");
11588 } else if (!create_option_data_directory_is_valid()) {
11589 ret = "DATA DIRECTORY";
11590 }
11591
11592 /* Do not allow INDEX_DIRECTORY */
11593 if (m_create_info->index_file_name) {
11594 push_warning_printf(
11595 m_thd, Sql_condition::WARN_LEVEL_WARN,
11596 ER_ILLEGAL_HA_CREATE_OPTION,
11597 "InnoDB: INDEX DIRECTORY is not supported");
11598 ret = "INDEX DIRECTORY";
11599 }
11600
11601 /* Don't support compressed table when page size > 16k. */
11602 if ((has_key_block_size || row_format == ROW_TYPE_COMPRESSED)
11603 && srv_page_size > UNIV_PAGE_SIZE_DEF) {
11604 push_warning(m_thd, Sql_condition::WARN_LEVEL_WARN,
11605 ER_ILLEGAL_HA_CREATE_OPTION,
11606 "InnoDB: Cannot create a COMPRESSED table"
11607 " when innodb_page_size > 16k.");
11608
11609 if (has_key_block_size) {
11610 ret = "KEY_BLOCK_SIZE";
11611 } else {
11612 ret = "ROW_TYPE";
11613 }
11614 }
11615
11616 return(ret);
11617 }
11618
11619 /*****************************************************************//**
11620 Check engine specific table options not handled by SQL-parser.
11621 @return NULL if valid, string if not */
11622 const char*
check_table_options()11623 create_table_info_t::check_table_options()
11624 {
11625 enum row_type row_format = m_create_info->row_type;
11626 const ha_table_option_struct *options= m_form->s->option_struct;
11627
11628 switch (options->encryption) {
11629 case FIL_ENCRYPTION_OFF:
11630 if (options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) {
11631 push_warning(
11632 m_thd, Sql_condition::WARN_LEVEL_WARN,
11633 HA_WRONG_CREATE_OPTION,
11634 "InnoDB: ENCRYPTED=NO implies"
11635 " ENCRYPTION_KEY_ID=1");
11636 compile_time_assert(FIL_DEFAULT_ENCRYPTION_KEY == 1);
11637 }
11638 if (srv_encrypt_tables != 2) {
11639 break;
11640 }
11641 push_warning(
11642 m_thd, Sql_condition::WARN_LEVEL_WARN,
11643 HA_WRONG_CREATE_OPTION,
11644 "InnoDB: ENCRYPTED=NO cannot be used with"
11645 " innodb_encrypt_tables=FORCE");
11646 return "ENCRYPTED";
11647 case FIL_ENCRYPTION_DEFAULT:
11648 if (!srv_encrypt_tables) {
11649 break;
11650 }
11651 /* fall through */
11652 case FIL_ENCRYPTION_ON:
11653 const uint32_t key_id = uint32_t(options->encryption_key_id);
11654 if (!encryption_key_id_exists(key_id)) {
11655 push_warning_printf(
11656 m_thd, Sql_condition::WARN_LEVEL_WARN,
11657 HA_WRONG_CREATE_OPTION,
11658 "InnoDB: ENCRYPTION_KEY_ID %u not available",
11659 key_id);
11660 return "ENCRYPTION_KEY_ID";
11661 }
11662
11663 /* Currently we do not support encryption for spatial indexes.
11664 Do not allow ENCRYPTED=YES if any SPATIAL INDEX exists. */
11665 if (options->encryption != FIL_ENCRYPTION_ON) {
11666 break;
11667 }
11668 for (ulint i = 0; i < m_form->s->keys; i++) {
11669 if (m_form->key_info[i].flags & HA_SPATIAL) {
11670 push_warning(m_thd,
11671 Sql_condition::WARN_LEVEL_WARN,
11672 HA_ERR_UNSUPPORTED,
11673 "InnoDB: ENCRYPTED=YES is not"
11674 " supported for SPATIAL INDEX");
11675 return "ENCRYPTED";
11676 }
11677 }
11678 }
11679
11680 if (!m_allow_file_per_table
11681 && options->encryption != FIL_ENCRYPTION_DEFAULT) {
11682 push_warning(
11683 m_thd, Sql_condition::WARN_LEVEL_WARN,
11684 HA_WRONG_CREATE_OPTION,
11685 "InnoDB: ENCRYPTED requires innodb_file_per_table");
11686 return "ENCRYPTED";
11687 }
11688
11689 /* Check page compression requirements */
11690 if (options->page_compressed) {
11691
11692 if (row_format == ROW_TYPE_COMPRESSED) {
11693 push_warning(
11694 m_thd, Sql_condition::WARN_LEVEL_WARN,
11695 HA_WRONG_CREATE_OPTION,
11696 "InnoDB: PAGE_COMPRESSED table can't have"
11697 " ROW_TYPE=COMPRESSED");
11698 return "PAGE_COMPRESSED";
11699 }
11700
11701 switch (row_format) {
11702 default:
11703 break;
11704 case ROW_TYPE_DEFAULT:
11705 if (m_default_row_format
11706 != DEFAULT_ROW_FORMAT_REDUNDANT) {
11707 break;
11708 }
11709 /* fall through */
11710 case ROW_TYPE_REDUNDANT:
11711 push_warning(
11712 m_thd, Sql_condition::WARN_LEVEL_WARN,
11713 HA_WRONG_CREATE_OPTION,
11714 "InnoDB: PAGE_COMPRESSED table can't have"
11715 " ROW_TYPE=REDUNDANT");
11716 return "PAGE_COMPRESSED";
11717 }
11718
11719 if (!m_allow_file_per_table) {
11720 push_warning(
11721 m_thd, Sql_condition::WARN_LEVEL_WARN,
11722 HA_WRONG_CREATE_OPTION,
11723 "InnoDB: PAGE_COMPRESSED requires"
11724 " innodb_file_per_table.");
11725 return "PAGE_COMPRESSED";
11726 }
11727
11728 if (m_create_info->key_block_size) {
11729 push_warning(
11730 m_thd, Sql_condition::WARN_LEVEL_WARN,
11731 HA_WRONG_CREATE_OPTION,
11732 "InnoDB: PAGE_COMPRESSED table can't have"
11733 " key_block_size");
11734 return "PAGE_COMPRESSED";
11735 }
11736 }
11737
11738 /* Check page compression level requirements, some of them are
11739 already checked above */
11740 if (options->page_compression_level != 0) {
11741 if (options->page_compressed == false) {
11742 push_warning(
11743 m_thd, Sql_condition::WARN_LEVEL_WARN,
11744 HA_WRONG_CREATE_OPTION,
11745 "InnoDB: PAGE_COMPRESSION_LEVEL requires"
11746 " PAGE_COMPRESSED");
11747 return "PAGE_COMPRESSION_LEVEL";
11748 }
11749
11750 if (options->page_compression_level < 1 || options->page_compression_level > 9) {
11751 push_warning_printf(
11752 m_thd, Sql_condition::WARN_LEVEL_WARN,
11753 HA_WRONG_CREATE_OPTION,
11754 "InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu."
11755 " Valid values are [1, 2, 3, 4, 5, 6, 7, 8, 9]",
11756 options->page_compression_level);
11757 return "PAGE_COMPRESSION_LEVEL";
11758 }
11759 }
11760
11761 return NULL;
11762 }
11763
11764 /*****************************************************************//**
11765 Update create_info. Used in SHOW CREATE TABLE et al. */
11766
11767 void
update_create_info(HA_CREATE_INFO * create_info)11768 ha_innobase::update_create_info(
11769 /*============================*/
11770 HA_CREATE_INFO* create_info) /*!< in/out: create info */
11771 {
11772 if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
11773 info(HA_STATUS_AUTO);
11774 create_info->auto_increment_value = stats.auto_increment_value;
11775 }
11776
11777 if (m_prebuilt->table->is_temporary()) {
11778 return;
11779 }
11780
11781 /* Update the DATA DIRECTORY name from SYS_DATAFILES. */
11782 dict_get_and_save_data_dir_path(m_prebuilt->table, false);
11783
11784 if (m_prebuilt->table->data_dir_path) {
11785 create_info->data_file_name = m_prebuilt->table->data_dir_path;
11786 }
11787 }
11788
11789 /*****************************************************************//**
11790 Initialize the table FTS stopword list
11791 @return TRUE if success */
11792 ibool
innobase_fts_load_stopword(dict_table_t * table,trx_t * trx,THD * thd)11793 innobase_fts_load_stopword(
11794 /*=======================*/
11795 dict_table_t* table, /*!< in: Table has the FTS */
11796 trx_t* trx, /*!< in: transaction */
11797 THD* thd) /*!< in: current thread */
11798 {
11799 const char *stopword_table= THDVAR(thd, ft_user_stopword_table);
11800 if (!stopword_table)
11801 {
11802 mysql_mutex_lock(&LOCK_global_system_variables);
11803 if (innobase_server_stopword_table)
11804 stopword_table= thd_strdup(thd, innobase_server_stopword_table);
11805 mysql_mutex_unlock(&LOCK_global_system_variables);
11806 }
11807
11808 return fts_load_stopword(table, trx, stopword_table,
11809 THDVAR(thd, ft_enable_stopword), false);
11810 }
11811
11812 /** Parse the table name into normal name and remote path if needed.
11813 @param[in] name Table name (db/table or full path).
11814 @return 0 if successful, otherwise, error number */
11815 int
parse_table_name(const char * name)11816 create_table_info_t::parse_table_name(
11817 const char*
11818 #ifdef _WIN32
11819 name
11820 #endif
11821 )
11822 {
11823 DBUG_ENTER("parse_table_name");
11824
11825 #ifdef _WIN32
11826 /* Names passed in from server are in two formats:
11827 1. <database_name>/<table_name>: for normal table creation
11828 2. full path: for temp table creation, or DATA DIRECTORY.
11829
11830 When srv_file_per_table is on and mysqld_embedded is off,
11831 check for full path pattern, i.e.
11832 X:\dir\..., X is a driver letter, or
11833 \\dir1\dir2\..., UNC path
11834 returns error if it is in full path format, but not creating a temp.
11835 table. Currently InnoDB does not support symbolic link on Windows. */
11836
11837 if (m_innodb_file_per_table
11838 && !mysqld_embedded
11839 && !(m_create_info->options & HA_LEX_CREATE_TMP_TABLE)) {
11840
11841 if ((name[1] == ':')
11842 || (name[0] == '\\' && name[1] == '\\')) {
11843 sql_print_error("Cannot create table %s\n", name);
11844 DBUG_RETURN(HA_ERR_GENERIC);
11845 }
11846 }
11847 #endif
11848
11849 m_remote_path[0] = '\0';
11850
11851 /* Make sure DATA DIRECTORY is compatible with other options
11852 and set the remote path. In the case of either;
11853 CREATE TEMPORARY TABLE ... DATA DIRECTORY={path} ... ;
11854 CREATE TABLE ... DATA DIRECTORY={path} TABLESPACE={name}... ;
11855 we ignore the DATA DIRECTORY. */
11856 if (m_create_info->data_file_name
11857 && m_create_info->data_file_name[0]
11858 && my_use_symdir) {
11859 if (!create_option_data_directory_is_valid()) {
11860 push_warning_printf(
11861 m_thd, Sql_condition::WARN_LEVEL_WARN,
11862 WARN_OPTION_IGNORED,
11863 ER_DEFAULT(WARN_OPTION_IGNORED),
11864 "DATA DIRECTORY");
11865
11866 m_flags &= ~DICT_TF_MASK_DATA_DIR;
11867 } else {
11868 strncpy(m_remote_path,
11869 m_create_info->data_file_name,
11870 FN_REFLEN - 1);
11871 }
11872 }
11873
11874 if (m_create_info->index_file_name) {
11875 my_error(WARN_OPTION_IGNORED, ME_JUST_WARNING,
11876 "INDEX DIRECTORY");
11877 }
11878
11879 DBUG_RETURN(0);
11880 }
11881
11882 /** Determine InnoDB table flags.
11883 If strict_mode=OFF, this will adjust the flags to what should be assumed.
11884 @retval true on success
11885 @retval false on error */
innobase_table_flags()11886 bool create_table_info_t::innobase_table_flags()
11887 {
11888 DBUG_ENTER("innobase_table_flags");
11889
11890 const char* fts_doc_id_index_bad = NULL;
11891 ulint zip_ssize = 0;
11892 enum row_type row_type;
11893 rec_format_t innodb_row_format =
11894 get_row_format(m_default_row_format);
11895 const bool is_temp
11896 = m_create_info->options & HA_LEX_CREATE_TMP_TABLE;
11897 bool zip_allowed
11898 = !is_temp;
11899
11900 const ulint zip_ssize_max =
11901 ut_min(static_cast<ulint>(UNIV_PAGE_SSIZE_MAX),
11902 static_cast<ulint>(PAGE_ZIP_SSIZE_MAX));
11903
11904 ha_table_option_struct *options= m_form->s->option_struct;
11905
11906 m_flags = 0;
11907 m_flags2 = 0;
11908
11909 /* Check if there are any FTS indexes defined on this table. */
11910 for (uint i = 0; i < m_form->s->keys; i++) {
11911 const KEY* key = &m_form->key_info[i];
11912
11913 if (key->flags & HA_FULLTEXT) {
11914 m_flags2 |= DICT_TF2_FTS;
11915
11916 /* We don't support FTS indexes in temporary
11917 tables. */
11918 if (is_temp) {
11919 my_error(ER_INNODB_NO_FT_TEMP_TABLE, MYF(0));
11920 DBUG_RETURN(false);
11921 }
11922
11923 if (fts_doc_id_index_bad) {
11924 goto index_bad;
11925 }
11926 }
11927
11928 if (innobase_strcasecmp(key->name.str, FTS_DOC_ID_INDEX_NAME)) {
11929 continue;
11930 }
11931
11932 /* Do a pre-check on FTS DOC ID index */
11933 if (!(key->flags & HA_NOSAME)
11934 || strcmp(key->name.str, FTS_DOC_ID_INDEX_NAME)
11935 || strcmp(key->key_part[0].field->field_name.str,
11936 FTS_DOC_ID_COL_NAME)) {
11937 fts_doc_id_index_bad = key->name.str;
11938 }
11939
11940 if (fts_doc_id_index_bad && (m_flags2 & DICT_TF2_FTS)) {
11941 index_bad:
11942 my_error(ER_INNODB_FT_WRONG_DOCID_INDEX, MYF(0),
11943 fts_doc_id_index_bad);
11944 DBUG_RETURN(false);
11945 }
11946 }
11947
11948 if (m_create_info->key_block_size > 0) {
11949 /* The requested compressed page size (key_block_size)
11950 is given in kilobytes. If it is a valid number, store
11951 that value as the number of log2 shifts from 512 in
11952 zip_ssize. Zero means it is not compressed. */
11953 ulint zssize; /* Zip Shift Size */
11954 ulint kbsize; /* Key Block Size */
11955 for (zssize = kbsize = 1;
11956 zssize <= zip_ssize_max;
11957 zssize++, kbsize <<= 1) {
11958 if (kbsize == m_create_info->key_block_size) {
11959 zip_ssize = zssize;
11960 break;
11961 }
11962 }
11963
11964 /* Make sure compressed row format is allowed. */
11965 if (is_temp) {
11966 push_warning(
11967 m_thd, Sql_condition::WARN_LEVEL_WARN,
11968 ER_ILLEGAL_HA_CREATE_OPTION,
11969 "InnoDB: KEY_BLOCK_SIZE is ignored"
11970 " for TEMPORARY TABLE.");
11971 zip_allowed = false;
11972 } else if (!m_allow_file_per_table) {
11973 push_warning(
11974 m_thd, Sql_condition::WARN_LEVEL_WARN,
11975 ER_ILLEGAL_HA_CREATE_OPTION,
11976 "InnoDB: KEY_BLOCK_SIZE requires"
11977 " innodb_file_per_table.");
11978 zip_allowed = false;
11979 }
11980
11981 if (!zip_allowed
11982 || zssize > zip_ssize_max) {
11983 push_warning_printf(
11984 m_thd, Sql_condition::WARN_LEVEL_WARN,
11985 ER_ILLEGAL_HA_CREATE_OPTION,
11986 "InnoDB: ignoring KEY_BLOCK_SIZE=%u.",
11987 (uint) m_create_info->key_block_size);
11988 }
11989 }
11990
11991 row_type = m_create_info->row_type;
11992
11993 if (zip_ssize && zip_allowed) {
11994 /* if ROW_FORMAT is set to default,
11995 automatically change it to COMPRESSED. */
11996 if (row_type == ROW_TYPE_DEFAULT) {
11997 row_type = ROW_TYPE_COMPRESSED;
11998 } else if (row_type != ROW_TYPE_COMPRESSED) {
11999 /* ROW_FORMAT other than COMPRESSED
12000 ignores KEY_BLOCK_SIZE. It does not
12001 make sense to reject conflicting
12002 KEY_BLOCK_SIZE and ROW_FORMAT, because
12003 such combinations can be obtained
12004 with ALTER TABLE anyway. */
12005 push_warning_printf(
12006 m_thd, Sql_condition::WARN_LEVEL_WARN,
12007 ER_ILLEGAL_HA_CREATE_OPTION,
12008 "InnoDB: ignoring KEY_BLOCK_SIZE=%u"
12009 " unless ROW_FORMAT=COMPRESSED.",
12010 (uint) m_create_info->key_block_size);
12011 zip_allowed = false;
12012 }
12013 } else {
12014 /* zip_ssize == 0 means no KEY_BLOCK_SIZE. */
12015 if (row_type == ROW_TYPE_COMPRESSED && zip_allowed) {
12016 /* ROW_FORMAT=COMPRESSED without KEY_BLOCK_SIZE
12017 implies half the maximum KEY_BLOCK_SIZE(*1k) or
12018 srv_page_size, whichever is less. */
12019 zip_ssize = zip_ssize_max - 1;
12020 }
12021 }
12022
12023 /* Validate the row format. Correct it if necessary */
12024
12025 switch (row_type) {
12026 case ROW_TYPE_REDUNDANT:
12027 innodb_row_format = REC_FORMAT_REDUNDANT;
12028 break;
12029 case ROW_TYPE_COMPACT:
12030 innodb_row_format = REC_FORMAT_COMPACT;
12031 break;
12032 case ROW_TYPE_COMPRESSED:
12033 if (is_temp) {
12034 push_warning_printf(
12035 m_thd, Sql_condition::WARN_LEVEL_WARN,
12036 ER_ILLEGAL_HA_CREATE_OPTION,
12037 "InnoDB: ROW_FORMAT=%s is ignored for"
12038 " TEMPORARY TABLE.",
12039 get_row_format_name(row_type));
12040 } else if (!m_allow_file_per_table) {
12041 push_warning_printf(
12042 m_thd, Sql_condition::WARN_LEVEL_WARN,
12043 ER_ILLEGAL_HA_CREATE_OPTION,
12044 "InnoDB: ROW_FORMAT=COMPRESSED requires"
12045 " innodb_file_per_table.");
12046 } else {
12047 innodb_row_format = REC_FORMAT_COMPRESSED;
12048 break;
12049 }
12050 zip_allowed = false;
12051 /* Set ROW_FORMAT = COMPACT */
12052 /* fall through */
12053 case ROW_TYPE_NOT_USED:
12054 case ROW_TYPE_FIXED:
12055 case ROW_TYPE_PAGE:
12056 push_warning(
12057 m_thd, Sql_condition::WARN_LEVEL_WARN,
12058 ER_ILLEGAL_HA_CREATE_OPTION,
12059 "InnoDB: assuming ROW_FORMAT=DYNAMIC.");
12060 /* fall through */
12061 case ROW_TYPE_DYNAMIC:
12062 innodb_row_format = REC_FORMAT_DYNAMIC;
12063 break;
12064 case ROW_TYPE_DEFAULT:
12065 ;
12066 }
12067
12068 /* Don't support compressed table when page size > 16k. */
12069 if (zip_allowed && zip_ssize && srv_page_size > UNIV_PAGE_SIZE_DEF) {
12070 push_warning(m_thd, Sql_condition::WARN_LEVEL_WARN,
12071 ER_ILLEGAL_HA_CREATE_OPTION,
12072 "InnoDB: Cannot create a COMPRESSED table"
12073 " when innodb_page_size > 16k."
12074 " Assuming ROW_FORMAT=DYNAMIC.");
12075 zip_allowed = false;
12076 }
12077
12078 ut_ad(!is_temp || !zip_allowed);
12079 ut_ad(!is_temp || innodb_row_format != REC_FORMAT_COMPRESSED);
12080
12081 /* Set the table flags */
12082 if (!zip_allowed) {
12083 zip_ssize = 0;
12084 }
12085
12086 if (is_temp) {
12087 m_flags2 |= DICT_TF2_TEMPORARY;
12088 } else if (m_use_file_per_table) {
12089 m_flags2 |= DICT_TF2_USE_FILE_PER_TABLE;
12090 }
12091
12092 ulint level = ulint(options->page_compression_level);
12093 if (!level) {
12094 level = page_zip_level;
12095 if (!level && options->page_compressed) {
12096 push_warning_printf(
12097 m_thd, Sql_condition::WARN_LEVEL_WARN,
12098 ER_ILLEGAL_HA_CREATE_OPTION,
12099 "InnoDB: PAGE_COMPRESSED requires"
12100 " PAGE_COMPRESSION_LEVEL or"
12101 " innodb_compression_level > 0");
12102 DBUG_RETURN(false);
12103 }
12104 }
12105
12106 /* Set the table flags */
12107 dict_tf_set(&m_flags, innodb_row_format, zip_ssize,
12108 m_use_data_dir, options->page_compressed, level);
12109
12110 if (m_form->s->table_type == TABLE_TYPE_SEQUENCE) {
12111 m_flags |= DICT_TF_MASK_NO_ROLLBACK;
12112 }
12113
12114 /* Set the flags2 when create table or alter tables */
12115 m_flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
12116 DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
12117 m_flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;);
12118
12119 DBUG_RETURN(true);
12120 }
12121
12122 /** Parse MERGE_THRESHOLD value from the string.
12123 @param[in] thd connection
12124 @param[in] str string which might include 'MERGE_THRESHOLD='
12125 @return value parsed. 0 means not found or invalid value. */
12126 static
12127 ulint
innobase_parse_merge_threshold(THD * thd,const char * str)12128 innobase_parse_merge_threshold(
12129 THD* thd,
12130 const char* str)
12131 {
12132 static const char* label = "MERGE_THRESHOLD=";
12133 static const size_t label_len = strlen(label);
12134 const char* pos = str;
12135
12136 pos = strstr(str, label);
12137
12138 if (pos == NULL) {
12139 return(0);
12140 }
12141
12142 pos += label_len;
12143
12144 lint ret = atoi(pos);
12145
12146 if (ret > 0 && ret <= 50) {
12147 return(static_cast<ulint>(ret));
12148 }
12149
12150 push_warning_printf(
12151 thd, Sql_condition::WARN_LEVEL_WARN,
12152 ER_ILLEGAL_HA_CREATE_OPTION,
12153 "InnoDB: Invalid value for MERGE_THRESHOLD in the CREATE TABLE"
12154 " statement. The value is ignored.");
12155
12156 return(0);
12157 }
12158
12159 /** Parse hint for table and its indexes, and update the information
12160 in dictionary.
12161 @param[in] thd connection
12162 @param[in,out] table target table
12163 @param[in] table_share table definition */
12164 void
innobase_parse_hint_from_comment(THD * thd,dict_table_t * table,const TABLE_SHARE * table_share)12165 innobase_parse_hint_from_comment(
12166 THD* thd,
12167 dict_table_t* table,
12168 const TABLE_SHARE* table_share)
12169 {
12170 ulint merge_threshold_table;
12171 ulint merge_threshold_index[MAX_KEY];
12172 bool is_found[MAX_KEY];
12173
12174 if (table_share->comment.str != NULL) {
12175 merge_threshold_table
12176 = innobase_parse_merge_threshold(
12177 thd, table_share->comment.str);
12178 } else {
12179 merge_threshold_table = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
12180 }
12181
12182 if (merge_threshold_table == 0) {
12183 merge_threshold_table = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
12184 }
12185
12186 for (uint i = 0; i < table_share->keys; i++) {
12187 KEY* key_info = &table_share->key_info[i];
12188
12189 ut_ad(i < sizeof(merge_threshold_index)
12190 / sizeof(merge_threshold_index[0]));
12191
12192 if (key_info->flags & HA_USES_COMMENT
12193 && key_info->comment.str != NULL) {
12194 merge_threshold_index[i]
12195 = innobase_parse_merge_threshold(
12196 thd, key_info->comment.str);
12197 } else {
12198 merge_threshold_index[i] = merge_threshold_table;
12199 }
12200
12201 if (merge_threshold_index[i] == 0) {
12202 merge_threshold_index[i] = merge_threshold_table;
12203 }
12204 }
12205
12206 /* update SYS_INDEX table */
12207 if (!table->is_temporary()) {
12208 for (uint i = 0; i < table_share->keys; i++) {
12209 is_found[i] = false;
12210 }
12211
12212 for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
12213 index != NULL;
12214 index = UT_LIST_GET_NEXT(indexes, index)) {
12215
12216 if (dict_index_is_auto_gen_clust(index)) {
12217
12218 /* GEN_CLUST_INDEX should use
12219 merge_threshold_table */
12220 dict_index_set_merge_threshold(
12221 index, merge_threshold_table);
12222 continue;
12223 }
12224
12225 for (uint i = 0; i < table_share->keys; i++) {
12226 if (is_found[i]) {
12227 continue;
12228 }
12229
12230 KEY* key_info = &table_share->key_info[i];
12231
12232 if (innobase_strcasecmp(
12233 index->name, key_info->name.str) == 0) {
12234
12235 dict_index_set_merge_threshold(
12236 index,
12237 merge_threshold_index[i]);
12238 is_found[i] = true;
12239 break;
12240 }
12241 }
12242 }
12243 }
12244
12245 for (uint i = 0; i < table_share->keys; i++) {
12246 is_found[i] = false;
12247 }
12248
12249 /* update in memory */
12250 for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
12251 index != NULL;
12252 index = UT_LIST_GET_NEXT(indexes, index)) {
12253
12254 if (dict_index_is_auto_gen_clust(index)) {
12255
12256 /* GEN_CLUST_INDEX should use merge_threshold_table */
12257
12258 /* x-lock index is needed to exclude concurrent
12259 pessimistic tree operations */
12260 rw_lock_x_lock(dict_index_get_lock(index));
12261 index->merge_threshold = merge_threshold_table;
12262 rw_lock_x_unlock(dict_index_get_lock(index));
12263
12264 continue;
12265 }
12266
12267 for (uint i = 0; i < table_share->keys; i++) {
12268 if (is_found[i]) {
12269 continue;
12270 }
12271
12272 KEY* key_info = &table_share->key_info[i];
12273
12274 if (innobase_strcasecmp(
12275 index->name, key_info->name.str) == 0) {
12276
12277 /* x-lock index is needed to exclude concurrent
12278 pessimistic tree operations */
12279 rw_lock_x_lock(dict_index_get_lock(index));
12280 index->merge_threshold
12281 = merge_threshold_index[i];
12282 rw_lock_x_unlock(dict_index_get_lock(index));
12283 is_found[i] = true;
12284
12285 break;
12286 }
12287 }
12288 }
12289 }
12290
12291 /** Set m_use_* flags. */
12292 void
set_tablespace_type(bool table_being_altered_is_file_per_table)12293 create_table_info_t::set_tablespace_type(
12294 bool table_being_altered_is_file_per_table)
12295 {
12296 /** Allow file_per_table for this table either because:
12297 1) the setting innodb_file_per_table=on,
12298 2) the table being altered is currently file_per_table */
12299 m_allow_file_per_table =
12300 m_innodb_file_per_table
12301 || table_being_altered_is_file_per_table;
12302
12303 /* Ignore the current innodb-file-per-table setting if we are
12304 creating a temporary table. */
12305 m_use_file_per_table =
12306 m_allow_file_per_table
12307 && !(m_create_info->options & HA_LEX_CREATE_TMP_TABLE);
12308
12309 /* DATA DIRECTORY must have m_use_file_per_table but cannot be
12310 used with TEMPORARY tables. */
12311 m_use_data_dir =
12312 m_use_file_per_table
12313 && m_create_info->data_file_name
12314 && m_create_info->data_file_name[0]
12315 && my_use_symdir;
12316 }
12317
12318 /** Initialize the create_table_info_t object.
12319 @return error number */
12320 int
initialize()12321 create_table_info_t::initialize()
12322 {
12323 DBUG_ENTER("create_table_info_t::initialize");
12324
12325 ut_ad(m_thd != NULL);
12326 ut_ad(m_create_info != NULL);
12327
12328 if (m_form->s->fields > REC_MAX_N_USER_FIELDS) {
12329 DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS);
12330 }
12331
12332 /* Check for name conflicts (with reserved name) for
12333 any user indices to be created. */
12334 if (innobase_index_name_is_reserved(m_thd, m_form->key_info,
12335 m_form->s->keys)) {
12336 DBUG_RETURN(HA_ERR_WRONG_INDEX);
12337 }
12338
12339 /* Get the transaction associated with the current thd, or create one
12340 if not yet created */
12341
12342 check_trx_exists(m_thd);
12343
12344 DBUG_RETURN(0);
12345 }
12346
12347
12348 /** Check if a virtual column is part of a fulltext or spatial index. */
12349 bool
gcols_in_fulltext_or_spatial()12350 create_table_info_t::gcols_in_fulltext_or_spatial()
12351 {
12352 for (ulint i = 0; i < m_form->s->keys; i++) {
12353 const KEY* key = m_form->key_info + i;
12354 if (!(key->flags & (HA_SPATIAL | HA_FULLTEXT))) {
12355 continue;
12356 }
12357 for (ulint j = 0; j < key->user_defined_key_parts; j++) {
12358 /* We do not support special (Fulltext or
12359 Spatial) index on virtual columns */
12360 if (!key->key_part[j].field->stored_in_db()) {
12361 my_error(ER_UNSUPPORTED_ACTION_ON_GENERATED_COLUMN, MYF(0));
12362 return true;
12363 }
12364 }
12365 }
12366 return false;
12367 }
12368
12369
12370 /** Prepare to create a new table to an InnoDB database.
12371 @param[in] name Table name
12372 @return error number */
prepare_create_table(const char * name,bool strict)12373 int create_table_info_t::prepare_create_table(const char* name, bool strict)
12374 {
12375 DBUG_ENTER("prepare_create_table");
12376
12377 ut_ad(m_thd != NULL);
12378 ut_ad(m_create_info != NULL);
12379
12380 set_tablespace_type(false);
12381
12382 normalize_table_name(m_table_name, name);
12383
12384 /* Validate table options not handled by the SQL-parser */
12385 if (check_table_options()) {
12386 DBUG_RETURN(HA_WRONG_CREATE_OPTION);
12387 }
12388
12389 /* Validate the create options if innodb_strict_mode is set.
12390 Do not use the regular message for ER_ILLEGAL_HA_CREATE_OPTION
12391 because InnoDB might actually support the option, but not under
12392 the current conditions. The messages revealing the specific
12393 problems are reported inside this function. */
12394 if (strict && create_options_are_invalid()) {
12395 DBUG_RETURN(HA_WRONG_CREATE_OPTION);
12396 }
12397
12398 /* Create the table flags and flags2 */
12399 if (!innobase_table_flags()) {
12400 DBUG_RETURN(HA_WRONG_CREATE_OPTION);
12401 }
12402
12403 if (high_level_read_only) {
12404 DBUG_RETURN(HA_ERR_TABLE_READONLY);
12405 }
12406
12407 if (gcols_in_fulltext_or_spatial()) {
12408 DBUG_RETURN(HA_ERR_UNSUPPORTED);
12409 }
12410
12411 for (uint i = 0; i < m_form->s->keys; i++) {
12412 const size_t max_field_len
12413 = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(m_flags);
12414 const KEY& key = m_form->key_info[i];
12415
12416 if (key.algorithm == HA_KEY_ALG_FULLTEXT) {
12417 continue;
12418 }
12419
12420 if (too_big_key_part_length(max_field_len, key)) {
12421 DBUG_RETURN(convert_error_code_to_mysql(
12422 DB_TOO_BIG_INDEX_COL, m_flags, NULL));
12423 }
12424 }
12425
12426 DBUG_RETURN(parse_table_name(name));
12427 }
12428
12429 /** Create the internal innodb table.
12430 @param create_fk whether to add FOREIGN KEY constraints */
create_table(bool create_fk)12431 int create_table_info_t::create_table(bool create_fk)
12432 {
12433 int error;
12434 int primary_key_no;
12435 uint i;
12436
12437 DBUG_ENTER("create_table");
12438
12439 /* Look for a primary key */
12440 primary_key_no = (m_form->s->primary_key != MAX_KEY ?
12441 (int) m_form->s->primary_key : -1);
12442
12443 /* Our function innobase_get_mysql_key_number_for_index assumes
12444 the primary key is always number 0, if it exists */
12445 ut_a(primary_key_no == -1 || primary_key_no == 0);
12446
12447 error = create_table_def();
12448
12449 if (error) {
12450 DBUG_RETURN(error);
12451 }
12452
12453 DBUG_ASSERT(m_drop_before_rollback
12454 == !(m_flags2 & DICT_TF2_TEMPORARY));
12455
12456 /* Create the keys */
12457
12458 if (m_form->s->keys == 0 || primary_key_no == -1) {
12459 /* Create an index which is used as the clustered index;
12460 order the rows by their row id which is internally generated
12461 by InnoDB */
12462 ulint flags = m_table->flags;
12463 dict_index_t* index = dict_mem_index_create(
12464 m_table, innobase_index_reserve_name,
12465 DICT_CLUSTERED, 0);
12466 error = convert_error_code_to_mysql(
12467 row_create_index_for_mysql(index, m_trx, NULL),
12468 flags, m_thd);
12469 if (error) {
12470 DBUG_RETURN(error);
12471 }
12472 }
12473
12474 if (primary_key_no != -1) {
12475 /* In InnoDB the clustered index must always be created
12476 first */
12477 if ((error = create_index(m_trx, m_form, m_table,
12478 (uint) primary_key_no))) {
12479 DBUG_RETURN(error);
12480 }
12481 }
12482
12483 /* Create the ancillary tables that are common to all FTS indexes on
12484 this table. */
12485 if (m_flags2 & DICT_TF2_FTS) {
12486 fts_doc_id_index_enum ret;
12487
12488 /* Check whether there already exists FTS_DOC_ID_INDEX */
12489 ret = innobase_fts_check_doc_id_index_in_def(
12490 m_form->s->keys, m_form->key_info);
12491
12492 switch (ret) {
12493 case FTS_INCORRECT_DOC_ID_INDEX:
12494 push_warning_printf(m_thd,
12495 Sql_condition::WARN_LEVEL_WARN,
12496 ER_WRONG_NAME_FOR_INDEX,
12497 " InnoDB: Index name %s is reserved"
12498 " for the unique index on"
12499 " FTS_DOC_ID column for FTS"
12500 " Document ID indexing"
12501 " on table %s. Please check"
12502 " the index definition to"
12503 " make sure it is of correct"
12504 " type\n",
12505 FTS_DOC_ID_INDEX_NAME,
12506 m_table->name.m_name);
12507
12508 if (m_table->fts) {
12509 fts_free(m_table);
12510 }
12511
12512 my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
12513 FTS_DOC_ID_INDEX_NAME);
12514 DBUG_RETURN(-1);
12515 case FTS_EXIST_DOC_ID_INDEX:
12516 case FTS_NOT_EXIST_DOC_ID_INDEX:
12517 break;
12518 }
12519
12520 dberr_t err = fts_create_common_tables(
12521 m_trx, m_table,
12522 (ret == FTS_EXIST_DOC_ID_INDEX));
12523
12524 error = convert_error_code_to_mysql(err, 0, NULL);
12525
12526 if (error) {
12527 DBUG_RETURN(error);
12528 }
12529 }
12530
12531 for (i = 0; i < m_form->s->keys; i++) {
12532 if (i != uint(primary_key_no)
12533 && (error = create_index(m_trx, m_form, m_table, i))) {
12534 DBUG_RETURN(error);
12535 }
12536 }
12537
12538 /* Cache all the FTS indexes on this table in the FTS specific
12539 structure. They are used for FTS indexed column update handling. */
12540 if (m_flags2 & DICT_TF2_FTS) {
12541 fts_t* fts = m_table->fts;
12542
12543 ut_a(fts != NULL);
12544
12545 dict_table_get_all_fts_indexes(m_table, fts->indexes);
12546 }
12547
12548 size_t stmt_len;
12549 if (const char* stmt = innobase_get_stmt_unsafe(m_thd, &stmt_len)) {
12550 dberr_t err = create_fk
12551 ? dict_create_foreign_constraints(
12552 m_trx, stmt, stmt_len, m_table_name,
12553 m_flags2 & DICT_TF2_TEMPORARY)
12554 : DB_SUCCESS;
12555 if (err == DB_SUCCESS) {
12556 /* Check that also referencing constraints are ok */
12557 dict_names_t fk_tables;
12558 err = dict_load_foreigns(m_table_name, NULL,
12559 false, true,
12560 DICT_ERR_IGNORE_NONE,
12561 fk_tables);
12562 while (err == DB_SUCCESS && !fk_tables.empty()) {
12563 dict_load_table(fk_tables.front(),
12564 DICT_ERR_IGNORE_NONE);
12565 fk_tables.pop_front();
12566 }
12567 }
12568
12569 switch (err) {
12570 case DB_PARENT_NO_INDEX:
12571 push_warning_printf(
12572 m_thd, Sql_condition::WARN_LEVEL_WARN,
12573 HA_ERR_CANNOT_ADD_FOREIGN,
12574 "Create table '%s' with foreign key constraint"
12575 " failed. There is no index in the referenced"
12576 " table where the referenced columns appear"
12577 " as the first columns.\n", m_table_name);
12578 break;
12579
12580 case DB_CHILD_NO_INDEX:
12581 push_warning_printf(
12582 m_thd, Sql_condition::WARN_LEVEL_WARN,
12583 HA_ERR_CANNOT_ADD_FOREIGN,
12584 "Create table '%s' with foreign key constraint"
12585 " failed. There is no index in the referencing"
12586 " table where referencing columns appear"
12587 " as the first columns.\n", m_table_name);
12588 break;
12589 case DB_NO_FK_ON_S_BASE_COL:
12590 push_warning_printf(
12591 m_thd, Sql_condition::WARN_LEVEL_WARN,
12592 HA_ERR_CANNOT_ADD_FOREIGN,
12593 "Create table '%s' with foreign key constraint"
12594 " failed. Cannot add foreign key constraint"
12595 " placed on the base column of stored"
12596 " column. \n",
12597 m_table_name);
12598 default:
12599 break;
12600 }
12601
12602 if (err != DB_SUCCESS) {
12603 DBUG_RETURN(convert_error_code_to_mysql(
12604 err, m_flags, NULL));
12605 }
12606 }
12607
12608 /* In TRUNCATE TABLE, we will merely warn about the maximum
12609 row size being too large. */
12610 if (!row_size_is_acceptable(*m_table, create_fk)) {
12611 DBUG_RETURN(convert_error_code_to_mysql(
12612 DB_TOO_BIG_RECORD, m_flags, NULL));
12613 }
12614
12615 DBUG_RETURN(0);
12616 }
12617
row_size_is_acceptable(const dict_table_t & table,bool strict) const12618 bool create_table_info_t::row_size_is_acceptable(
12619 const dict_table_t &table, bool strict) const
12620 {
12621 for (dict_index_t *index= dict_table_get_first_index(&table); index;
12622 index= dict_table_get_next_index(index))
12623 if (!row_size_is_acceptable(*index, strict))
12624 return false;
12625 return true;
12626 }
12627
12628 /* FIXME: row size check has some flaws and should be improved */
record_size_info() const12629 dict_index_t::record_size_info_t dict_index_t::record_size_info() const
12630 {
12631 ut_ad(!(type & DICT_FTS));
12632
12633 /* maximum allowed size of a node pointer record */
12634 ulint page_ptr_max;
12635 const bool comp= dict_table_is_comp(table);
12636 /* table->space == NULL after DISCARD TABLESPACE */
12637 const page_size_t page_size(dict_tf_get_page_size(table->flags));
12638 record_size_info_t result;
12639
12640 if (page_size.is_compressed() &&
12641 page_size.physical() < univ_page_size.physical())
12642 {
12643 /* On a ROW_FORMAT=COMPRESSED page, two records must fit in the
12644 uncompressed page modification log. On compressed pages
12645 with size.physical() == univ_page_size.physical(),
12646 this limit will never be reached. */
12647 ut_ad(comp);
12648 /* The maximum allowed record size is the size of
12649 an empty page, minus a byte for recoding the heap
12650 number in the page modification log. The maximum
12651 allowed node pointer size is half that. */
12652 result.max_leaf_size= page_zip_empty_size(n_fields, page_size.physical());
12653 if (result.max_leaf_size)
12654 {
12655 result.max_leaf_size--;
12656 }
12657 page_ptr_max= result.max_leaf_size / 2;
12658 /* On a compressed page, there is a two-byte entry in
12659 the dense page directory for every record. But there
12660 is no record header. */
12661 result.shortest_size= 2;
12662 }
12663 else
12664 {
12665 /* The maximum allowed record size is half a B-tree
12666 page(16k for 64k page size). No additional sparse
12667 page directory entry will be generated for the first
12668 few user records. */
12669 result.max_leaf_size= (comp || srv_page_size < UNIV_PAGE_SIZE_MAX)
12670 ? page_get_free_space_of_empty(comp) / 2
12671 : REDUNDANT_REC_MAX_DATA_SIZE;
12672
12673 page_ptr_max= result.max_leaf_size;
12674 /* Each record has a header. */
12675 result.shortest_size= comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES;
12676 }
12677
12678 if (comp)
12679 {
12680 /* Include the "null" flags in the
12681 maximum possible record size. */
12682 result.shortest_size+= UT_BITS_IN_BYTES(n_nullable);
12683 }
12684 else
12685 {
12686 /* For each column, include a 2-byte offset and a
12687 "null" flag. The 1-byte format is only used in short
12688 records that do not contain externally stored columns.
12689 Such records could never exceed the page limit, even
12690 when using the 2-byte format. */
12691 result.shortest_size+= 2 * n_fields;
12692 }
12693
12694 const ulint max_local_len= table->get_overflow_field_local_len();
12695
12696 /* Compute the maximum possible record size. */
12697 for (unsigned i= 0; i < n_fields; i++)
12698 {
12699 const dict_field_t &f= fields[i];
12700 const dict_col_t &col= *f.col;
12701
12702 /* In dtuple_convert_big_rec(), variable-length columns
12703 that are longer than BTR_EXTERN_LOCAL_STORED_MAX_SIZE
12704 may be chosen for external storage.
12705
12706 Fixed-length columns, and all columns of secondary
12707 index records are always stored inline. */
12708
12709 /* Determine the maximum length of the index field.
12710 The field_ext_max_size should be computed as the worst
12711 case in rec_get_converted_size_comp() for
12712 REC_STATUS_ORDINARY records. */
12713
12714 size_t field_max_size= dict_col_get_fixed_size(&col, comp);
12715 if (field_max_size && f.fixed_len != 0)
12716 {
12717 /* dict_index_add_col() should guarantee this */
12718 ut_ad(!f.prefix_len || f.fixed_len == f.prefix_len);
12719 /* Fixed lengths are not encoded
12720 in ROW_FORMAT=COMPACT. */
12721 goto add_field_size;
12722 }
12723
12724 field_max_size= dict_col_get_max_size(&col);
12725
12726 if (f.prefix_len)
12727 {
12728 if (f.prefix_len < field_max_size)
12729 {
12730 field_max_size= f.prefix_len;
12731 }
12732
12733 /* those conditions were copied from dtuple_convert_big_rec()*/
12734 }
12735 else if (field_max_size > max_local_len &&
12736 field_max_size > BTR_EXTERN_LOCAL_STORED_MAX_SIZE &&
12737 DATA_BIG_COL(&col) && dict_index_is_clust(this))
12738 {
12739
12740 /* In the worst case, we have a locally stored
12741 column of BTR_EXTERN_LOCAL_STORED_MAX_SIZE bytes.
12742 The length can be stored in one byte. If the
12743 column were stored externally, the lengths in
12744 the clustered index page would be
12745 BTR_EXTERN_FIELD_REF_SIZE and 2. */
12746 field_max_size= max_local_len;
12747 }
12748
12749 if (comp)
12750 {
12751 /* Add the extra size for ROW_FORMAT=COMPACT.
12752 For ROW_FORMAT=REDUNDANT, these bytes were
12753 added to result.shortest_size before this loop. */
12754 result.shortest_size+= field_max_size < 256 ? 1 : 2;
12755 }
12756 add_field_size:
12757 result.shortest_size+= field_max_size;
12758
12759 /* Check the size limit on leaf pages. */
12760 if (result.shortest_size >= result.max_leaf_size)
12761 {
12762 result.set_too_big(i);
12763 }
12764
12765 /* Check the size limit on non-leaf pages. Records
12766 stored in non-leaf B-tree pages consist of the unique
12767 columns of the record (the key columns of the B-tree)
12768 and a node pointer field. When we have processed the
12769 unique columns, result.shortest_size equals the size of the
12770 node pointer record minus the node pointer column. */
12771 if (i + 1 == dict_index_get_n_unique_in_tree(this) &&
12772 result.shortest_size + REC_NODE_PTR_SIZE >= page_ptr_max)
12773 {
12774 result.set_too_big(i);
12775 }
12776 }
12777
12778 return result;
12779 }
12780
12781 /** Issue a warning that the row is too big. */
ib_warn_row_too_big(THD * thd,const dict_table_t * table)12782 static void ib_warn_row_too_big(THD *thd, const dict_table_t *table)
12783 {
12784 /* FIXME: this row size check should be improved */
12785 /* If prefix is true then a 768-byte prefix is stored
12786 locally for BLOB fields. Refer to dict_table_get_format() */
12787 const bool prefix= !dict_table_has_atomic_blobs(table);
12788
12789 const ulint free_space=
12790 page_get_free_space_of_empty(table->flags & DICT_TF_COMPACT) / 2;
12791
12792 push_warning_printf(
12793 thd, Sql_condition::WARN_LEVEL_WARN, HA_ERR_TO_BIG_ROW,
12794 "Row size too large (> " ULINTPF "). Changing some columns to TEXT"
12795 " or BLOB %smay help. In current row format, BLOB prefix of"
12796 " %d bytes is stored inline.",
12797 free_space,
12798 prefix ? "or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED " : "",
12799 prefix ? DICT_MAX_FIXED_COL_LEN : 0);
12800 }
12801
row_size_is_acceptable(const dict_index_t & index,bool strict) const12802 bool create_table_info_t::row_size_is_acceptable(
12803 const dict_index_t &index, bool strict) const
12804 {
12805 if ((index.type & DICT_FTS) || index.table->is_system_db)
12806 {
12807 /* Ignore system tables check because innodb_table_stats
12808 maximum row size can not fit on 4k page. */
12809 return true;
12810 }
12811
12812 const bool innodb_strict_mode= THDVAR(m_thd, strict_mode);
12813 dict_index_t::record_size_info_t info= index.record_size_info();
12814
12815 if (info.row_is_too_big())
12816 {
12817 ut_ad(info.get_overrun_size() != 0);
12818 ut_ad(info.max_leaf_size != 0);
12819
12820 const size_t idx= info.get_first_overrun_field_index();
12821 const dict_field_t *field= dict_index_get_nth_field(&index, idx);
12822
12823 if (innodb_strict_mode || global_system_variables.log_warnings > 2)
12824 {
12825 ib::error_or_warn(strict && innodb_strict_mode)
12826 << "Cannot add field " << field->name << " in table "
12827 << index.table->name << " because after adding it, the row size is "
12828 << info.get_overrun_size()
12829 << " which is greater than maximum allowed size ("
12830 << info.max_leaf_size << " bytes) for a record on index leaf page.";
12831 }
12832
12833 if (strict && innodb_strict_mode)
12834 return false;
12835
12836 ib_warn_row_too_big(m_thd, index.table);
12837 }
12838
12839 return true;
12840 }
12841
12842 /** Update a new table in an InnoDB database.
12843 @return error number */
12844 int
create_table_update_dict()12845 create_table_info_t::create_table_update_dict()
12846 {
12847 dict_table_t* innobase_table;
12848
12849 DBUG_ENTER("create_table_update_dict");
12850
12851 innobase_table = dict_table_open_on_name(
12852 m_table_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
12853
12854 DBUG_ASSERT(innobase_table != 0);
12855 if (innobase_table->fts != NULL) {
12856 if (innobase_table->fts_doc_id_index == NULL) {
12857 innobase_table->fts_doc_id_index
12858 = dict_table_get_index_on_name(
12859 innobase_table, FTS_DOC_ID_INDEX_NAME);
12860 DBUG_ASSERT(innobase_table->fts_doc_id_index != NULL);
12861 } else {
12862 DBUG_ASSERT(innobase_table->fts_doc_id_index
12863 == dict_table_get_index_on_name(
12864 innobase_table,
12865 FTS_DOC_ID_INDEX_NAME));
12866 }
12867 }
12868
12869 DBUG_ASSERT((innobase_table->fts == NULL)
12870 == (innobase_table->fts_doc_id_index == NULL));
12871
12872 innobase_copy_frm_flags_from_create_info(innobase_table, m_create_info);
12873
12874 dict_stats_update(innobase_table, DICT_STATS_EMPTY_TABLE);
12875
12876 /* Load server stopword into FTS cache */
12877 if (m_flags2 & DICT_TF2_FTS) {
12878 if (!innobase_fts_load_stopword(innobase_table, NULL, m_thd)) {
12879 dict_table_close(innobase_table, FALSE, FALSE);
12880 srv_active_wake_master_thread();
12881 DBUG_RETURN(-1);
12882 }
12883
12884 mutex_enter(&dict_sys->mutex);
12885 fts_optimize_add_table(innobase_table);
12886 mutex_exit(&dict_sys->mutex);
12887 }
12888
12889 if (const Field* ai = m_form->found_next_number_field) {
12890 ut_ad(ai->stored_in_db());
12891
12892 ib_uint64_t autoinc = m_create_info->auto_increment_value;
12893
12894 if (autoinc == 0) {
12895 autoinc = 1;
12896 }
12897
12898 dict_table_autoinc_lock(innobase_table);
12899 dict_table_autoinc_initialize(innobase_table, autoinc);
12900
12901 if (innobase_table->is_temporary()) {
12902 /* AUTO_INCREMENT is not persistent for
12903 TEMPORARY TABLE. Temporary tables are never
12904 evicted. Keep the counter in memory only. */
12905 } else {
12906 const unsigned col_no = innodb_col_no(ai);
12907
12908 innobase_table->persistent_autoinc = 1
12909 + dict_table_get_nth_col_pos(
12910 innobase_table, col_no, NULL);
12911
12912 /* Persist the "last used" value, which
12913 typically is AUTO_INCREMENT - 1.
12914 In btr_create(), the value 0 was already written. */
12915 if (--autoinc) {
12916 btr_write_autoinc(
12917 dict_table_get_first_index(
12918 innobase_table),
12919 autoinc);
12920 }
12921 }
12922
12923 dict_table_autoinc_unlock(innobase_table);
12924 }
12925
12926 innobase_parse_hint_from_comment(m_thd, innobase_table, m_form->s);
12927
12928 dict_table_close(innobase_table, FALSE, FALSE);
12929 DBUG_RETURN(0);
12930 }
12931
12932 /** Allocate a new trx. */
12933 void
allocate_trx()12934 create_table_info_t::allocate_trx()
12935 {
12936 m_trx = innobase_trx_allocate(m_thd);
12937
12938 m_trx->will_lock = true;
12939 m_trx->ddl = true;
12940 }
12941
12942 /** Create a new table to an InnoDB database.
12943 @param[in] name Table name, format: "db/table_name".
12944 @param[in] form Table format; columns and index information.
12945 @param[in] create_info Create info (including create statement string).
12946 @param[in] file_per_table whether to create .ibd file
12947 @param[in,out] trx dictionary transaction, or NULL to create new
12948 @return 0 if success else error number. */
12949 inline int
create(const char * name,TABLE * form,HA_CREATE_INFO * create_info,bool file_per_table,trx_t * trx)12950 ha_innobase::create(
12951 const char* name,
12952 TABLE* form,
12953 HA_CREATE_INFO* create_info,
12954 bool file_per_table,
12955 trx_t* trx)
12956 {
12957 int error;
12958 char norm_name[FN_REFLEN]; /* {database}/{tablename} */
12959 char remote_path[FN_REFLEN]; /* Absolute path of table */
12960
12961 DBUG_ENTER("ha_innobase::create");
12962
12963 DBUG_ASSERT(form->s == table_share);
12964 DBUG_ASSERT(table_share->table_type == TABLE_TYPE_SEQUENCE
12965 || table_share->table_type == TABLE_TYPE_NORMAL);
12966
12967 create_table_info_t info(ha_thd(),
12968 form,
12969 create_info,
12970 norm_name,
12971 remote_path,
12972 file_per_table, trx);
12973
12974 if ((error = info.initialize())
12975 || (error = info.prepare_create_table(name, !trx))) {
12976 if (trx) {
12977 trx_rollback_for_mysql(trx);
12978 row_mysql_unlock_data_dictionary(trx);
12979 }
12980 DBUG_RETURN(error);
12981 }
12982
12983 const bool own_trx = !trx;
12984
12985 if (own_trx) {
12986 info.allocate_trx();
12987 trx = info.trx();
12988 /* Latch the InnoDB data dictionary exclusively so that no deadlocks
12989 or lock waits can happen in it during a table create operation.
12990 Drop table etc. do this latching in row0mysql.cc. */
12991 row_mysql_lock_data_dictionary(trx);
12992 DBUG_ASSERT(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
12993 }
12994
12995 if ((error = info.create_table(own_trx))) {
12996 /* Drop the being-created table before rollback,
12997 so that rollback can possibly rename back a table
12998 that could have been renamed before the failed creation. */
12999 if (info.drop_before_rollback()) {
13000 trx->error_state = DB_SUCCESS;
13001 row_drop_table_for_mysql(info.table_name(),
13002 trx, SQLCOM_TRUNCATE, true,
13003 false);
13004 }
13005 trx_rollback_for_mysql(trx);
13006 row_mysql_unlock_data_dictionary(trx);
13007 goto func_exit;
13008 }
13009
13010 innobase_commit_low(trx);
13011 row_mysql_unlock_data_dictionary(trx);
13012
13013 /* Flush the log to reduce probability that the .frm files and
13014 the InnoDB data dictionary get out-of-sync if the user runs
13015 with innodb_flush_log_at_trx_commit = 0 */
13016 log_buffer_flush_to_disk();
13017
13018 ut_ad(!srv_read_only_mode);
13019
13020 error = info.create_table_update_dict();
13021
13022 func_exit:
13023 if (own_trx) {
13024 trx->free();
13025 }
13026
13027 /* Tell the InnoDB server that there might be work for
13028 utility threads: */
13029
13030 srv_active_wake_master_thread();
13031
13032 DBUG_RETURN(error);
13033 }
13034
13035 /** Create a new table to an InnoDB database.
13036 @param[in] name Table name, format: "db/table_name".
13037 @param[in] form Table format; columns and index information.
13038 @param[in] create_info Create info (including create statement string).
13039 @return 0 if success else error number. */
13040 int
create(const char * name,TABLE * form,HA_CREATE_INFO * create_info)13041 ha_innobase::create(
13042 const char* name,
13043 TABLE* form,
13044 HA_CREATE_INFO* create_info)
13045 {
13046 return create(name, form, create_info, srv_file_per_table);
13047 }
13048
13049 /*****************************************************************//**
13050 Discards or imports an InnoDB tablespace.
13051 @return 0 == success, -1 == error */
13052
13053 int
discard_or_import_tablespace(my_bool discard)13054 ha_innobase::discard_or_import_tablespace(
13055 /*======================================*/
13056 my_bool discard) /*!< in: TRUE if discard, else import */
13057 {
13058
13059 DBUG_ENTER("ha_innobase::discard_or_import_tablespace");
13060
13061 ut_a(m_prebuilt->trx != NULL);
13062 ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
13063 ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
13064
13065 if (high_level_read_only) {
13066 DBUG_RETURN(HA_ERR_TABLE_READONLY);
13067 }
13068
13069 if (m_prebuilt->table->is_temporary()) {
13070 ib_senderrf(
13071 m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
13072 ER_CANNOT_DISCARD_TEMPORARY_TABLE);
13073
13074 DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
13075 }
13076
13077 if (m_prebuilt->table->space == fil_system.sys_space) {
13078 ib_senderrf(
13079 m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
13080 ER_TABLE_IN_SYSTEM_TABLESPACE,
13081 m_prebuilt->table->name.m_name);
13082
13083 DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
13084 }
13085
13086 trx_start_if_not_started(m_prebuilt->trx, true);
13087
13088 /* Obtain an exclusive lock on the table. */
13089 dberr_t err = row_mysql_lock_table(
13090 m_prebuilt->trx, m_prebuilt->table, LOCK_X,
13091 discard ? "setting table lock for DISCARD TABLESPACE"
13092 : "setting table lock for IMPORT TABLESPACE");
13093
13094 if (err != DB_SUCCESS) {
13095 /* unable to lock the table: do nothing */
13096 } else if (discard) {
13097
13098 /* Discarding an already discarded tablespace should be an
13099 idempotent operation. Also, if the .ibd file is missing the
13100 user may want to set the DISCARD flag in order to IMPORT
13101 a new tablespace. */
13102
13103 if (!m_prebuilt->table->is_readable()) {
13104 ib_senderrf(
13105 m_prebuilt->trx->mysql_thd,
13106 IB_LOG_LEVEL_WARN, ER_TABLESPACE_MISSING,
13107 m_prebuilt->table->name.m_name);
13108 }
13109
13110 err = row_discard_tablespace_for_mysql(
13111 m_prebuilt->table->name.m_name, m_prebuilt->trx);
13112
13113 } else if (m_prebuilt->table->is_readable()) {
13114 /* Commit the transaction in order to
13115 release the table lock. */
13116 trx_commit_for_mysql(m_prebuilt->trx);
13117
13118 ib::error() << "Unable to import tablespace "
13119 << m_prebuilt->table->name << " because it already"
13120 " exists. Please DISCARD the tablespace"
13121 " before IMPORT.";
13122 ib_senderrf(
13123 m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
13124 ER_TABLESPACE_EXISTS, m_prebuilt->table->name.m_name);
13125
13126 DBUG_RETURN(HA_ERR_TABLE_EXIST);
13127 } else {
13128 err = row_import_for_mysql(m_prebuilt->table, m_prebuilt);
13129
13130 if (err == DB_SUCCESS) {
13131
13132 info(HA_STATUS_TIME
13133 | HA_STATUS_CONST
13134 | HA_STATUS_VARIABLE
13135 | HA_STATUS_AUTO);
13136
13137 fil_crypt_set_encrypt_tables(srv_encrypt_tables);
13138 }
13139 }
13140
13141 /* Commit the transaction in order to release the table lock. */
13142 trx_commit_for_mysql(m_prebuilt->trx);
13143
13144 if (discard || err != DB_SUCCESS) {
13145 DBUG_RETURN(convert_error_code_to_mysql(
13146 err, m_prebuilt->table->flags, NULL));
13147 }
13148
13149 /* Evict and reload the table definition in order to invoke
13150 btr_cur_instant_init(). */
13151 table_id_t id = m_prebuilt->table->id;
13152 ut_ad(id);
13153 mutex_enter(&dict_sys->mutex);
13154 dict_table_close(m_prebuilt->table, TRUE, FALSE);
13155 dict_table_remove_from_cache(m_prebuilt->table);
13156 m_prebuilt->table = dict_table_open_on_id(id, TRUE,
13157 DICT_TABLE_OP_NORMAL);
13158 mutex_exit(&dict_sys->mutex);
13159 if (!m_prebuilt->table) {
13160 err = DB_TABLE_NOT_FOUND;
13161 } else {
13162 if (const Field* ai = table->found_next_number_field) {
13163 initialize_auto_increment(m_prebuilt->table, ai);
13164 }
13165 dict_stats_init(m_prebuilt->table);
13166 }
13167
13168 if (dict_stats_is_persistent_enabled(m_prebuilt->table)) {
13169 dberr_t ret;
13170
13171 /* Adjust the persistent statistics. */
13172 ret = dict_stats_update(m_prebuilt->table,
13173 DICT_STATS_RECALC_PERSISTENT);
13174
13175 if (ret != DB_SUCCESS) {
13176 push_warning_printf(
13177 ha_thd(),
13178 Sql_condition::WARN_LEVEL_WARN,
13179 ER_ALTER_INFO,
13180 "Error updating stats for table '%s'"
13181 " after table rebuild: %s",
13182 m_prebuilt->table->name.m_name,
13183 ut_strerr(ret));
13184 }
13185 }
13186
13187 DBUG_RETURN(0);
13188 }
13189
13190 /**
13191 Drops a table from an InnoDB database. Before calling this function,
13192 MySQL calls innobase_commit to commit the transaction of the current user.
13193 Then the current user cannot have locks set on the table. Drop table
13194 operation inside InnoDB will remove all locks any user has on the table
13195 inside InnoDB.
13196 @param[in] name table name
13197 @param[in] sqlcom SQLCOM_DROP_DB, SQLCOM_TRUNCATE, ...
13198 @return error number */
delete_table(const char * name,enum_sql_command sqlcom)13199 inline int ha_innobase::delete_table(const char* name, enum_sql_command sqlcom)
13200 {
13201 dberr_t err;
13202 THD* thd = ha_thd();
13203 char norm_name[FN_REFLEN];
13204
13205 DBUG_ENTER("ha_innobase::delete_table");
13206
13207 DBUG_EXECUTE_IF(
13208 "test_normalize_table_name_low",
13209 test_normalize_table_name_low();
13210 );
13211 DBUG_EXECUTE_IF(
13212 "test_ut_format_name",
13213 test_ut_format_name();
13214 );
13215
13216 /* Strangely, MySQL passes the table name without the '.frm'
13217 extension, in contrast to ::create */
13218 normalize_table_name(norm_name, name);
13219
13220 if (high_level_read_only) {
13221 DBUG_RETURN(HA_ERR_TABLE_READONLY);
13222 }
13223
13224 trx_t* parent_trx = check_trx_exists(thd);
13225
13226 /* Remove the to-be-dropped table from the list of modified tables
13227 by parent_trx. Otherwise we may end up with an orphaned pointer to
13228 the table object from parent_trx::mod_tables. This could happen in:
13229 SET AUTOCOMMIT=0;
13230 CREATE TABLE t (PRIMARY KEY (a)) ENGINE=INNODB SELECT 1 AS a UNION
13231 ALL SELECT 1 AS a; */
13232 trx_mod_tables_t::const_iterator iter;
13233
13234 for (iter = parent_trx->mod_tables.begin();
13235 iter != parent_trx->mod_tables.end();
13236 ++iter) {
13237
13238 dict_table_t* table_to_drop = iter->first;
13239
13240 if (strcmp(norm_name, table_to_drop->name.m_name) == 0) {
13241 parent_trx->mod_tables.erase(table_to_drop);
13242 break;
13243 }
13244 }
13245
13246 trx_t* trx = innobase_trx_allocate(thd);
13247
13248 ulint name_len = strlen(name);
13249
13250 ut_a(name_len < 1000);
13251
13252 trx->will_lock = true;
13253
13254 /* Drop the table in InnoDB */
13255
13256 err = row_drop_table_for_mysql(norm_name, trx, sqlcom);
13257
13258 if (err == DB_TABLE_NOT_FOUND
13259 && innobase_get_lower_case_table_names() == 1) {
13260 char* is_part = is_partition(norm_name);
13261
13262 if (is_part) {
13263 char par_case_name[FN_REFLEN];
13264
13265 #ifndef __WIN__
13266 /* Check for the table using lower
13267 case name, including the partition
13268 separator "P" */
13269 strcpy(par_case_name, norm_name);
13270 innobase_casedn_str(par_case_name);
13271 #else
13272 /* On Windows platfrom, check
13273 whether there exists table name in
13274 system table whose name is
13275 not being normalized to lower case */
13276 normalize_table_name_c_low(
13277 par_case_name, name, FALSE);
13278 #endif
13279 err = row_drop_table_for_mysql(
13280 par_case_name, trx, sqlcom);
13281 }
13282 }
13283
13284 if (err == DB_TABLE_NOT_FOUND) {
13285 /* Test to drop all tables which matches db/tablename + '#'.
13286 Only partitions can have '#' as non-first character in
13287 the table name!
13288
13289 Temporary table names always start with '#', partitions are
13290 the only 'tables' that can have '#' after the first character
13291 and table name must have length > 0. User tables cannot have
13292 '#' since it would be translated to @0023. Therefor this should
13293 only match partitions. */
13294 uint len = (uint) strlen(norm_name);
13295 ulint num_partitions;
13296 ut_a(len < FN_REFLEN);
13297 norm_name[len] = '#';
13298 norm_name[len + 1] = 0;
13299 err = row_drop_database_for_mysql(norm_name, trx,
13300 &num_partitions);
13301 norm_name[len] = 0;
13302 table_name_t tbl_name(norm_name);
13303 if (num_partitions == 0 && !tbl_name.is_temporary()) {
13304 ib::error() << "Table " << tbl_name <<
13305 " does not exist in the InnoDB"
13306 " internal data dictionary though MariaDB is"
13307 " trying to drop it. Have you copied the .frm"
13308 " file of the table to the MariaDB database"
13309 " directory from another database? "
13310 << TROUBLESHOOTING_MSG;
13311 }
13312 if (num_partitions == 0) {
13313 err = DB_TABLE_NOT_FOUND;
13314 }
13315 }
13316
13317 if (err == DB_TABLE_NOT_FOUND
13318 && innobase_get_lower_case_table_names() == 1) {
13319 char* is_part = is_partition(norm_name);
13320
13321 if (is_part != NULL) {
13322 char par_case_name[FN_REFLEN];
13323
13324 #ifndef _WIN32
13325 /* Check for the table using lower
13326 case name, including the partition
13327 separator "P" */
13328 strcpy(par_case_name, norm_name);
13329 innobase_casedn_str(par_case_name);
13330 #else
13331 /* On Windows platfrom, check
13332 whether there exists table name in
13333 system table whose name is
13334 not being normalized to lower case */
13335 create_table_info_t::normalize_table_name_low(
13336 par_case_name, name, FALSE);
13337 #endif /* _WIN32 */
13338 err = row_drop_table_for_mysql(
13339 par_case_name, trx, sqlcom, true);
13340 }
13341 }
13342
13343 ut_ad(!srv_read_only_mode);
13344 /* Flush the log to reduce probability that the .frm files and
13345 the InnoDB data dictionary get out-of-sync if the user runs
13346 with innodb_flush_log_at_trx_commit = 0 */
13347
13348 log_buffer_flush_to_disk();
13349
13350 innobase_commit_low(trx);
13351
13352 trx->free();
13353
13354 DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL));
13355 }
13356
13357 /** Drop an InnoDB table.
13358 @param[in] name table name
13359 @return error number */
delete_table(const char * name)13360 int ha_innobase::delete_table(const char* name)
13361 {
13362 enum_sql_command sqlcom = enum_sql_command(thd_sql_command(ha_thd()));
13363 /* SQLCOM_TRUNCATE should be passed via ha_innobase::truncate() only.
13364
13365 On client disconnect, when dropping temporary tables, the
13366 previous sqlcom would not be overwritten. In such a case, we
13367 will have thd_kill_level() != NOT_KILLED, !m_prebuilt can
13368 hold, and sqlcom could be anything, including TRUNCATE.
13369
13370 The sqlcom only matters for persistent tables; no persistent
13371 metadata or FOREIGN KEY metadata is kept for temporary
13372 tables. Therefore, we relax the assertion. If there is a bug
13373 that slips through this assertion due to !m_prebuilt, the
13374 worst impact should be that on DROP TABLE of a persistent
13375 table, FOREIGN KEY constraints will be ignored and their
13376 metadata will not be removed. */
13377 DBUG_ASSERT(sqlcom != SQLCOM_TRUNCATE
13378 || (thd_kill_level(ha_thd()) != THD_IS_NOT_KILLED
13379 && (!m_prebuilt
13380 || m_prebuilt->table->is_temporary())));
13381 return delete_table(name, sqlcom);
13382 }
13383
13384 /** Remove all tables in the named database inside InnoDB.
13385 @param[in] hton handlerton from InnoDB
13386 @param[in] path Database path; Inside InnoDB the name of the last
13387 directory in the path is used as the database name.
13388 For example, in 'mysql/data/test' the database name is 'test'. */
13389
13390 static
13391 void
innobase_drop_database(handlerton * hton,char * path)13392 innobase_drop_database(
13393 handlerton* hton,
13394 char* path)
13395 {
13396 char* namebuf;
13397
13398 /* Get the transaction associated with the current thd, or create one
13399 if not yet created */
13400
13401 DBUG_ASSERT(hton == innodb_hton_ptr);
13402
13403 if (high_level_read_only) {
13404 return;
13405 }
13406
13407 THD* thd = current_thd;
13408
13409 ulint len = 0;
13410 char* ptr = strend(path) - 2;
13411
13412 while (ptr >= path && *ptr != '\\' && *ptr != '/') {
13413 ptr--;
13414 len++;
13415 }
13416
13417 ptr++;
13418 namebuf = (char*) my_malloc(/*PSI_INSTRUMENT_ME,*/ (uint) len + 2, MYF(0));
13419
13420 memcpy(namebuf, ptr, len);
13421 namebuf[len] = '/';
13422 namebuf[len + 1] = '\0';
13423
13424 #ifdef _WIN32
13425 innobase_casedn_str(namebuf);
13426 #endif /* _WIN32 */
13427
13428 trx_t* trx = innobase_trx_allocate(thd);
13429 trx->will_lock = true;
13430
13431 ulint dummy;
13432
13433 row_drop_database_for_mysql(namebuf, trx, &dummy);
13434
13435 my_free(namebuf);
13436
13437 /* Flush the log to reduce probability that the .frm files and
13438 the InnoDB data dictionary get out-of-sync if the user runs
13439 with innodb_flush_log_at_trx_commit = 0 */
13440
13441 log_buffer_flush_to_disk();
13442
13443 innobase_commit_low(trx);
13444
13445 trx->free();
13446 }
13447
13448 /** Rename an InnoDB table.
13449 @param[in,out] trx InnoDB data dictionary transaction
13450 @param[in] from old table name
13451 @param[in] to new table name
13452 @param[in] commit whether to commit trx (and to enforce FOREIGN KEY)
13453 @return DB_SUCCESS or error code */
innobase_rename_table(trx_t * trx,const char * from,const char * to,bool commit)13454 inline dberr_t innobase_rename_table(trx_t *trx, const char *from,
13455 const char *to, bool commit)
13456 {
13457 dberr_t error;
13458 char norm_to[FN_REFLEN];
13459 char norm_from[FN_REFLEN];
13460
13461 DBUG_ENTER("innobase_rename_table");
13462 DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX
13463 || trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE);
13464
13465 ut_ad(!srv_read_only_mode);
13466
13467 normalize_table_name(norm_to, to);
13468 normalize_table_name(norm_from, from);
13469
13470 DEBUG_SYNC_C("innodb_rename_table_ready");
13471
13472 trx_start_if_not_started(trx, true);
13473 ut_ad(trx->will_lock);
13474
13475 if (commit) {
13476 /* Serialize data dictionary operations with dictionary mutex:
13477 no deadlocks can occur then in these operations. */
13478 row_mysql_lock_data_dictionary(trx);
13479 }
13480
13481 dict_table_t* table = dict_table_open_on_name(
13482 norm_from, TRUE, FALSE, DICT_ERR_IGNORE_FK_NOKEY);
13483
13484 /* Since DICT_BG_YIELD has sleep for 250 milliseconds,
13485 Convert lock_wait_timeout unit from second to 250 milliseconds */
13486 long int lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd) * 4;
13487 if (table != NULL) {
13488 if (commit) {
13489 dict_stats_wait_bg_to_stop_using_table(table, trx);
13490 }
13491 for (dict_index_t* index = dict_table_get_first_index(table);
13492 index != NULL;
13493 index = dict_table_get_next_index(index)) {
13494
13495 if (index->type & DICT_FTS) {
13496 /* Found */
13497 while (index->index_fts_syncing
13498 && !trx_is_interrupted(trx)
13499 && (lock_wait_timeout--) > 0) {
13500 DICT_BG_YIELD(trx);
13501 }
13502 }
13503 }
13504 if (!commit) {
13505 dict_table_close(table, TRUE, FALSE);
13506 }
13507 }
13508
13509 /* FTS sync is in progress. We shall timeout this operation */
13510 if (lock_wait_timeout < 0) {
13511 error = DB_LOCK_WAIT_TIMEOUT;
13512 goto func_exit;
13513 }
13514
13515 error = row_rename_table_for_mysql(norm_from, norm_to, trx, commit,
13516 commit);
13517
13518 if (error != DB_SUCCESS) {
13519 if (error == DB_TABLE_NOT_FOUND
13520 && innobase_get_lower_case_table_names() == 1) {
13521 char* is_part = is_partition(norm_from);
13522
13523 if (is_part) {
13524 char par_case_name[FN_REFLEN];
13525 #ifndef _WIN32
13526 /* Check for the table using lower
13527 case name, including the partition
13528 separator "P" */
13529 strcpy(par_case_name, norm_from);
13530 innobase_casedn_str(par_case_name);
13531 #else
13532 /* On Windows platfrom, check
13533 whether there exists table name in
13534 system table whose name is
13535 not being normalized to lower case */
13536 create_table_info_t::normalize_table_name_low(
13537 par_case_name, from, FALSE);
13538 #endif /* _WIN32 */
13539 trx_start_if_not_started(trx, true);
13540 error = row_rename_table_for_mysql(
13541 par_case_name, norm_to, trx,
13542 true, false);
13543 }
13544 }
13545
13546 if (error == DB_SUCCESS) {
13547 #ifndef _WIN32
13548 sql_print_warning("Rename partition table %s"
13549 " succeeds after converting to lower"
13550 " case. The table may have"
13551 " been moved from a case"
13552 " in-sensitive file system.\n",
13553 norm_from);
13554 #else
13555 sql_print_warning("Rename partition table %s"
13556 " succeeds after skipping the step to"
13557 " lower case the table name."
13558 " The table may have been"
13559 " moved from a case sensitive"
13560 " file system.\n",
13561 norm_from);
13562 #endif /* _WIN32 */
13563 }
13564 }
13565
13566 func_exit:
13567 if (commit) {
13568 if (table) {
13569 table->stats_bg_flag &= ~BG_STAT_SHOULD_QUIT;
13570 dict_table_close(table, TRUE, FALSE);
13571 }
13572 row_mysql_unlock_data_dictionary(trx);
13573 }
13574
13575 /* Flush the log to reduce probability that the .frm
13576 files and the InnoDB data dictionary get out-of-sync
13577 if the user runs with innodb_flush_log_at_trx_commit = 0 */
13578
13579 log_buffer_flush_to_disk();
13580
13581 DBUG_RETURN(error);
13582 }
13583
13584 /** TRUNCATE TABLE
13585 @return error code
13586 @retval 0 on success */
truncate()13587 int ha_innobase::truncate()
13588 {
13589 DBUG_ENTER("ha_innobase::truncate");
13590
13591 if (high_level_read_only) {
13592 DBUG_RETURN(HA_ERR_TABLE_READONLY);
13593 }
13594
13595 update_thd();
13596
13597 HA_CREATE_INFO info;
13598 mem_heap_t* heap = mem_heap_create(1000);
13599 dict_table_t* ib_table = m_prebuilt->table;
13600 const time_t update_time = ib_table->update_time;
13601 const ulint stored_lock = m_prebuilt->stored_select_lock_type;
13602 info.init();
13603 update_create_info_from_table(&info, table);
13604
13605 if (ib_table->is_temporary()) {
13606 info.options|= HA_LEX_CREATE_TMP_TABLE;
13607 } else {
13608 dict_get_and_save_data_dir_path(ib_table, false);
13609 }
13610
13611 char* data_file_name = ib_table->data_dir_path;
13612
13613 if (data_file_name) {
13614 info.data_file_name = data_file_name
13615 = mem_heap_strdup(heap, data_file_name);
13616 }
13617
13618 const char* temp_name = dict_mem_create_temporary_tablename(
13619 heap, ib_table->name.m_name, ib_table->id);
13620 const char* name = mem_heap_strdup(heap, ib_table->name.m_name);
13621 trx_t* trx = innobase_trx_allocate(m_user_thd);
13622 trx->will_lock = true;
13623 trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
13624 row_mysql_lock_data_dictionary(trx);
13625 dict_stats_wait_bg_to_stop_using_table(ib_table, trx);
13626
13627 int err = convert_error_code_to_mysql(
13628 innobase_rename_table(trx, ib_table->name.m_name, temp_name,
13629 false),
13630 ib_table->flags, m_user_thd);
13631 if (err) {
13632 trx_rollback_for_mysql(trx);
13633 row_mysql_unlock_data_dictionary(trx);
13634 } else {
13635 switch (dict_tf_get_rec_format(ib_table->flags)) {
13636 case REC_FORMAT_REDUNDANT:
13637 info.row_type = ROW_TYPE_REDUNDANT;
13638 break;
13639 case REC_FORMAT_COMPACT:
13640 info.row_type = ROW_TYPE_COMPACT;
13641 break;
13642 case REC_FORMAT_COMPRESSED:
13643 info.row_type = ROW_TYPE_COMPRESSED;
13644 break;
13645 case REC_FORMAT_DYNAMIC:
13646 info.row_type = ROW_TYPE_DYNAMIC;
13647 break;
13648 }
13649
13650 err = create(name, table, &info,
13651 ib_table->is_temporary()
13652 || dict_table_is_file_per_table(ib_table), trx);
13653 }
13654
13655 trx->free();
13656
13657 if (!err) {
13658 /* Reopen the newly created table, and drop the
13659 original table that was renamed to temp_name. */
13660
13661 row_prebuilt_t* prebuilt = m_prebuilt;
13662 uchar* upd_buf = m_upd_buf;
13663 ulint upd_buf_size = m_upd_buf_size;
13664 /* Mimic ha_innobase::close(). */
13665 m_prebuilt = NULL;
13666 m_upd_buf = NULL;
13667 m_upd_buf_size = 0;
13668 err = open(name, 0, 0);
13669 if (!err) {
13670 m_prebuilt->stored_select_lock_type = stored_lock;
13671 m_prebuilt->table->update_time = update_time;
13672 row_prebuilt_free(prebuilt, FALSE);
13673 delete_table(temp_name, SQLCOM_TRUNCATE);
13674 my_free(upd_buf);
13675 } else {
13676 /* Revert to the old table before truncation. */
13677 m_prebuilt = prebuilt;
13678 m_upd_buf = upd_buf;
13679 m_upd_buf_size = upd_buf_size;
13680 }
13681 }
13682
13683 mem_heap_free(heap);
13684 DBUG_RETURN(err);
13685 }
13686
13687 /*********************************************************************//**
13688 Renames an InnoDB table.
13689 @return 0 or error code */
13690
13691 int
rename_table(const char * from,const char * to)13692 ha_innobase::rename_table(
13693 /*======================*/
13694 const char* from, /*!< in: old name of the table */
13695 const char* to) /*!< in: new name of the table */
13696 {
13697 THD* thd = ha_thd();
13698
13699 DBUG_ENTER("ha_innobase::rename_table");
13700
13701 if (high_level_read_only) {
13702 ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
13703 DBUG_RETURN(HA_ERR_TABLE_READONLY);
13704 }
13705
13706 trx_t* trx = innobase_trx_allocate(thd);
13707 trx->will_lock = true;
13708 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
13709
13710 dberr_t error = innobase_rename_table(trx, from, to, true);
13711
13712 DEBUG_SYNC(thd, "after_innobase_rename_table");
13713
13714 innobase_commit_low(trx);
13715
13716 trx->free();
13717
13718 if (error == DB_SUCCESS) {
13719 char norm_from[MAX_FULL_NAME_LEN];
13720 char norm_to[MAX_FULL_NAME_LEN];
13721 char errstr[512];
13722 dberr_t ret;
13723
13724 normalize_table_name(norm_from, from);
13725 normalize_table_name(norm_to, to);
13726
13727 ret = dict_stats_rename_table(norm_from, norm_to,
13728 errstr, sizeof(errstr));
13729
13730 if (ret != DB_SUCCESS) {
13731 ib::error() << errstr;
13732
13733 push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
13734 ER_LOCK_WAIT_TIMEOUT, errstr);
13735 }
13736 }
13737
13738 /* Add a special case to handle the Duplicated Key error
13739 and return DB_ERROR instead.
13740 This is to avoid a possible SIGSEGV error from mysql error
13741 handling code. Currently, mysql handles the Duplicated Key
13742 error by re-entering the storage layer and getting dup key
13743 info by calling get_dup_key(). This operation requires a valid
13744 table handle ('row_prebuilt_t' structure) which could no
13745 longer be available in the error handling stage. The suggested
13746 solution is to report a 'table exists' error message (since
13747 the dup key error here is due to an existing table whose name
13748 is the one we are trying to rename to) and return the generic
13749 error code. */
13750 if (error == DB_DUPLICATE_KEY) {
13751 my_error(ER_TABLE_EXISTS_ERROR, MYF(0), to);
13752
13753 error = DB_ERROR;
13754 } else if (error == DB_LOCK_WAIT_TIMEOUT) {
13755 my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0), to);
13756
13757 error = DB_LOCK_WAIT;
13758 }
13759
13760 DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
13761 }
13762
13763 /*********************************************************************//**
13764 Estimates the number of index records in a range.
13765 @return estimated number of rows */
13766
13767 ha_rows
records_in_range(uint keynr,key_range * min_key,key_range * max_key)13768 ha_innobase::records_in_range(
13769 /*==========================*/
13770 uint keynr, /*!< in: index number */
13771 key_range *min_key, /*!< in: start key value of the
13772 range, may also be 0 */
13773 key_range *max_key) /*!< in: range end key val, may
13774 also be 0 */
13775 {
13776 KEY* key;
13777 dict_index_t* index;
13778 dtuple_t* range_start;
13779 dtuple_t* range_end;
13780 ha_rows n_rows;
13781 page_cur_mode_t mode1;
13782 page_cur_mode_t mode2;
13783 mem_heap_t* heap;
13784
13785 DBUG_ENTER("records_in_range");
13786
13787 ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
13788
13789 m_prebuilt->trx->op_info = "estimating records in index range";
13790
13791 active_index = keynr;
13792
13793 key = table->key_info + active_index;
13794
13795 index = innobase_get_index(keynr);
13796
13797 /* There exists possibility of not being able to find requested
13798 index due to inconsistency between MySQL and InoDB dictionary info.
13799 Necessary message should have been printed in innobase_get_index() */
13800 if (!m_prebuilt->table->space) {
13801 n_rows = HA_POS_ERROR;
13802 goto func_exit;
13803 }
13804 if (!index) {
13805 n_rows = HA_POS_ERROR;
13806 goto func_exit;
13807 }
13808 if (index->is_corrupted()) {
13809 n_rows = HA_ERR_INDEX_CORRUPT;
13810 goto func_exit;
13811 }
13812 if (!row_merge_is_index_usable(m_prebuilt->trx, index)) {
13813 n_rows = HA_ERR_TABLE_DEF_CHANGED;
13814 goto func_exit;
13815 }
13816
13817 heap = mem_heap_create(2 * (key->ext_key_parts * sizeof(dfield_t)
13818 + sizeof(dtuple_t)));
13819
13820 range_start = dtuple_create(heap, key->ext_key_parts);
13821 dict_index_copy_types(range_start, index, key->ext_key_parts);
13822
13823 range_end = dtuple_create(heap, key->ext_key_parts);
13824 dict_index_copy_types(range_end, index, key->ext_key_parts);
13825
13826 row_sel_convert_mysql_key_to_innobase(
13827 range_start,
13828 m_prebuilt->srch_key_val1,
13829 m_prebuilt->srch_key_val_len,
13830 index,
13831 (byte*) (min_key ? min_key->key : (const uchar*) 0),
13832 (ulint) (min_key ? min_key->length : 0));
13833
13834 DBUG_ASSERT(min_key
13835 ? range_start->n_fields > 0
13836 : range_start->n_fields == 0);
13837
13838 row_sel_convert_mysql_key_to_innobase(
13839 range_end,
13840 m_prebuilt->srch_key_val2,
13841 m_prebuilt->srch_key_val_len,
13842 index,
13843 (byte*) (max_key ? max_key->key : (const uchar*) 0),
13844 (ulint) (max_key ? max_key->length : 0));
13845
13846 DBUG_ASSERT(max_key
13847 ? range_end->n_fields > 0
13848 : range_end->n_fields == 0);
13849
13850 mode1 = convert_search_mode_to_innobase(
13851 min_key ? min_key->flag : HA_READ_KEY_EXACT);
13852
13853 mode2 = convert_search_mode_to_innobase(
13854 max_key ? max_key->flag : HA_READ_KEY_EXACT);
13855
13856 if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) {
13857
13858 if (dict_index_is_spatial(index)) {
13859 /*Only min_key used in spatial index. */
13860 n_rows = rtr_estimate_n_rows_in_range(
13861 index, range_start, mode1);
13862 } else {
13863 n_rows = btr_estimate_n_rows_in_range(
13864 index, range_start, mode1, range_end, mode2);
13865 }
13866 } else {
13867
13868 n_rows = HA_POS_ERROR;
13869 }
13870
13871 mem_heap_free(heap);
13872
13873 DBUG_EXECUTE_IF(
13874 "print_btr_estimate_n_rows_in_range_return_value",
13875 push_warning_printf(
13876 ha_thd(), Sql_condition::WARN_LEVEL_WARN,
13877 ER_NO_DEFAULT,
13878 "btr_estimate_n_rows_in_range(): %lld",
13879 (longlong) n_rows);
13880 );
13881
13882 func_exit:
13883
13884 m_prebuilt->trx->op_info = (char*)"";
13885
13886 /* The MySQL optimizer seems to believe an estimate of 0 rows is
13887 always accurate and may return the result 'Empty set' based on that.
13888 The accuracy is not guaranteed, and even if it were, for a locking
13889 read we should anyway perform the search to set the next-key lock.
13890 Add 1 to the value to make sure MySQL does not make the assumption! */
13891
13892 if (n_rows == 0) {
13893 n_rows = 1;
13894 }
13895
13896 DBUG_RETURN((ha_rows) n_rows);
13897 }
13898
13899 /*********************************************************************//**
13900 Gives an UPPER BOUND to the number of rows in a table. This is used in
13901 filesort.cc.
13902 @return upper bound of rows */
13903
13904 ha_rows
estimate_rows_upper_bound()13905 ha_innobase::estimate_rows_upper_bound()
13906 /*====================================*/
13907 {
13908 const dict_index_t* index;
13909 ulonglong estimate;
13910 ulonglong local_data_file_length;
13911
13912 DBUG_ENTER("estimate_rows_upper_bound");
13913
13914 /* We do not know if MySQL can call this function before calling
13915 external_lock(). To be safe, update the thd of the current table
13916 handle. */
13917
13918 update_thd(ha_thd());
13919
13920 m_prebuilt->trx->op_info = "calculating upper bound for table rows";
13921
13922 index = dict_table_get_first_index(m_prebuilt->table);
13923
13924 ulint stat_n_leaf_pages = index->stat_n_leaf_pages;
13925
13926 ut_a(stat_n_leaf_pages > 0);
13927
13928 local_data_file_length = ulonglong(stat_n_leaf_pages)
13929 << srv_page_size_shift;
13930
13931 /* Calculate a minimum length for a clustered index record and from
13932 that an upper bound for the number of rows. Since we only calculate
13933 new statistics in row0mysql.cc when a table has grown by a threshold
13934 factor, we must add a safety factor 2 in front of the formula below. */
13935
13936 estimate = 2 * local_data_file_length
13937 / dict_index_calc_min_rec_len(index);
13938
13939 m_prebuilt->trx->op_info = "";
13940
13941 /* Set num_rows less than MERGEBUFF to simulate the case where we do
13942 not have enough space to merge the externally sorted file blocks. */
13943 DBUG_EXECUTE_IF("set_num_rows_lt_MERGEBUFF",
13944 estimate = 2;
13945 DBUG_SET("-d,set_num_rows_lt_MERGEBUFF");
13946 );
13947
13948 DBUG_RETURN((ha_rows) estimate);
13949 }
13950
13951 /*********************************************************************//**
13952 How many seeks it will take to read through the table. This is to be
13953 comparable to the number returned by records_in_range so that we can
13954 decide if we should scan the table or use keys.
13955 @return estimated time measured in disk seeks */
13956
13957 double
scan_time()13958 ha_innobase::scan_time()
13959 /*====================*/
13960 {
13961 /* Since MySQL seems to favor table scans too much over index
13962 searches, we pretend that a sequential read takes the same time
13963 as a random disk read, that is, we do not divide the following
13964 by 10, which would be physically realistic. */
13965
13966 /* The locking below is disabled for performance reasons. Without
13967 it we could end up returning uninitialized value to the caller,
13968 which in the worst case could make some query plan go bogus or
13969 issue a Valgrind warning. */
13970 if (m_prebuilt == NULL) {
13971 /* In case of derived table, Optimizer will try to fetch stat
13972 for table even before table is create or open. In such
13973 cases return default value of 1.
13974 TODO: This will be further improved to return some approximate
13975 estimate but that would also needs pre-population of stats
13976 structure. As of now approach is in sync with MyISAM. */
13977 return(ulonglong2double(stats.data_file_length) / IO_SIZE + 2);
13978 }
13979
13980 ulint stat_clustered_index_size;
13981
13982 ut_a(m_prebuilt->table->stat_initialized);
13983
13984 stat_clustered_index_size =
13985 m_prebuilt->table->stat_clustered_index_size;
13986
13987 return((double) stat_clustered_index_size);
13988 }
13989
13990 /******************************************************************//**
13991 Calculate the time it takes to read a set of ranges through an index
13992 This enables us to optimise reads for clustered indexes.
13993 @return estimated time measured in disk seeks */
13994
13995 double
read_time(uint index,uint ranges,ha_rows rows)13996 ha_innobase::read_time(
13997 /*===================*/
13998 uint index, /*!< in: key number */
13999 uint ranges, /*!< in: how many ranges */
14000 ha_rows rows) /*!< in: estimated number of rows in the ranges */
14001 {
14002 ha_rows total_rows;
14003
14004 if (index != table->s->primary_key) {
14005 /* Not clustered */
14006 return(handler::read_time(index, ranges, rows));
14007 }
14008
14009 /* Assume that the read time is proportional to the scan time for all
14010 rows + at most one seek per range. */
14011
14012 double time_for_scan = scan_time();
14013
14014 if ((total_rows = estimate_rows_upper_bound()) < rows) {
14015
14016 return(time_for_scan);
14017 }
14018
14019 return(ranges + (double) rows / (double) total_rows * time_for_scan);
14020 }
14021
14022 /** Update the system variable with the given value of the InnoDB
14023 buffer pool size.
14024 @param[in] buf_pool_size given value of buffer pool size.*/
14025 void
innodb_set_buf_pool_size(ulonglong buf_pool_size)14026 innodb_set_buf_pool_size(ulonglong buf_pool_size)
14027 {
14028 innobase_buffer_pool_size = buf_pool_size;
14029 }
14030
14031 /*********************************************************************//**
14032 Calculates the key number used inside MySQL for an Innobase index.
14033 @return the key number used inside MySQL */
14034 static
14035 unsigned
innobase_get_mysql_key_number_for_index(const TABLE * table,dict_table_t * ib_table,const dict_index_t * index)14036 innobase_get_mysql_key_number_for_index(
14037 /*====================================*/
14038 const TABLE* table, /*!< in: table in MySQL data
14039 dictionary */
14040 dict_table_t* ib_table,/*!< in: table in InnoDB data
14041 dictionary */
14042 const dict_index_t* index) /*!< in: index */
14043 {
14044 const dict_index_t* ind;
14045 unsigned int i;
14046
14047 /* If index does not belong to the table object of share structure
14048 (ib_table comes from the share structure) search the index->table
14049 object instead */
14050 if (index->table != ib_table) {
14051 i = 0;
14052 ind = dict_table_get_first_index(index->table);
14053
14054 while (index != ind) {
14055 ind = dict_table_get_next_index(ind);
14056 i++;
14057 }
14058
14059 if (dict_index_is_auto_gen_clust(index)) {
14060 ut_a(i > 0);
14061 i--;
14062 }
14063
14064 return(i);
14065 }
14066
14067 /* Directly find matching index with information from mysql TABLE
14068 structure and InnoDB dict_index_t list */
14069 for (i = 0; i < table->s->keys; i++) {
14070 ind = dict_table_get_index_on_name(
14071 ib_table, table->key_info[i].name.str);
14072
14073 if (index == ind) {
14074 return(i);
14075 }
14076 }
14077
14078 /* Loop through each index of the table and lock them */
14079 for (ind = dict_table_get_first_index(ib_table);
14080 ind != NULL;
14081 ind = dict_table_get_next_index(ind)) {
14082 if (index == ind) {
14083 /* Temp index is internal to InnoDB, that is
14084 not present in the MySQL index list, so no
14085 need to print such mismatch warning. */
14086 if (index->is_committed()) {
14087 sql_print_warning(
14088 "Found index %s in InnoDB index list"
14089 " but not its MariaDB index number."
14090 " It could be an InnoDB internal"
14091 " index.",
14092 index->name());
14093 }
14094 return(~0U);
14095 }
14096 }
14097
14098 ut_error;
14099
14100 return(~0U);
14101 }
14102
14103 /*********************************************************************//**
14104 Calculate Record Per Key value. Need to exclude the NULL value if
14105 innodb_stats_method is set to "nulls_ignored"
14106 @return estimated record per key value */
14107 rec_per_key_t
innodb_rec_per_key(dict_index_t * index,ulint i,ha_rows records)14108 innodb_rec_per_key(
14109 /*===============*/
14110 dict_index_t* index, /*!< in: dict_index_t structure */
14111 ulint i, /*!< in: the column we are
14112 calculating rec per key */
14113 ha_rows records) /*!< in: estimated total records */
14114 {
14115 rec_per_key_t rec_per_key;
14116 ib_uint64_t n_diff;
14117
14118 ut_a(index->table->stat_initialized);
14119
14120 ut_ad(i < dict_index_get_n_unique(index));
14121 ut_ad(!dict_index_is_spatial(index));
14122
14123 if (records == 0) {
14124 /* "Records per key" is meaningless for empty tables.
14125 Return 1.0 because that is most convenient to the Optimizer. */
14126 return(1.0);
14127 }
14128
14129 n_diff = index->stat_n_diff_key_vals[i];
14130
14131 if (n_diff == 0) {
14132
14133 rec_per_key = static_cast<rec_per_key_t>(records);
14134 } else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) {
14135 ib_uint64_t n_null;
14136 ib_uint64_t n_non_null;
14137
14138 n_non_null = index->stat_n_non_null_key_vals[i];
14139
14140 /* In theory, index->stat_n_non_null_key_vals[i]
14141 should always be less than the number of records.
14142 Since this is statistics value, the value could
14143 have slight discrepancy. But we will make sure
14144 the number of null values is not a negative number. */
14145 if (records < n_non_null) {
14146 n_null = 0;
14147 } else {
14148 n_null = records - n_non_null;
14149 }
14150
14151 /* If the number of NULL values is the same as or
14152 larger than that of the distinct values, we could
14153 consider that the table consists mostly of NULL value.
14154 Set rec_per_key to 1. */
14155 if (n_diff <= n_null) {
14156 rec_per_key = 1.0;
14157 } else {
14158 /* Need to exclude rows with NULL values from
14159 rec_per_key calculation */
14160 rec_per_key
14161 = static_cast<rec_per_key_t>(records - n_null)
14162 / (n_diff - n_null);
14163 }
14164 } else {
14165 DEBUG_SYNC_C("after_checking_for_0");
14166 rec_per_key = static_cast<rec_per_key_t>(records) / n_diff;
14167 }
14168
14169 if (rec_per_key < 1.0) {
14170 /* Values below 1.0 are meaningless and must be due to the
14171 stats being imprecise. */
14172 rec_per_key = 1.0;
14173 }
14174
14175 return(rec_per_key);
14176 }
14177
14178 /** Calculate how many KiB of new data we will be able to insert to the
14179 tablespace without running out of space. Start with a space object that has
14180 been acquired by the caller who holds it for the calculation,
14181 @param[in] space tablespace object from fil_space_acquire()
14182 @return available space in KiB */
14183 static uintmax_t
fsp_get_available_space_in_free_extents(const fil_space_t & space)14184 fsp_get_available_space_in_free_extents(const fil_space_t& space)
14185 {
14186 ulint size_in_header = space.size_in_header;
14187 if (size_in_header < FSP_EXTENT_SIZE) {
14188 return 0; /* TODO: count free frag pages and
14189 return a value based on that */
14190 }
14191
14192 /* Below we play safe when counting free extents above the free limit:
14193 some of them will contain extent descriptor pages, and therefore
14194 will not be free extents */
14195 ut_ad(size_in_header >= space.free_limit);
14196 ulint n_free_up =
14197 (size_in_header - space.free_limit) / FSP_EXTENT_SIZE;
14198
14199 const ulint size = page_size_t(space.flags).physical();
14200 if (n_free_up > 0) {
14201 n_free_up--;
14202 n_free_up -= n_free_up / (size / FSP_EXTENT_SIZE);
14203 }
14204
14205 /* We reserve 1 extent + 0.5 % of the space size to undo logs
14206 and 1 extent + 0.5 % to cleaning operations; NOTE: this source
14207 code is duplicated in the function above! */
14208
14209 ulint reserve = 2 + ((size_in_header / FSP_EXTENT_SIZE) * 2) / 200;
14210 ulint n_free = space.free_len + n_free_up;
14211
14212 if (reserve > n_free) {
14213 return(0);
14214 }
14215
14216 return(static_cast<uintmax_t>(n_free - reserve)
14217 * FSP_EXTENT_SIZE * (size / 1024));
14218 }
14219
14220 /*********************************************************************//**
14221 Returns statistics information of the table to the MySQL interpreter,
14222 in various fields of the handle object.
14223 @return HA_ERR_* error code or 0 */
14224
14225 int
info_low(uint flag,bool is_analyze)14226 ha_innobase::info_low(
14227 /*==================*/
14228 uint flag, /*!< in: what information is requested */
14229 bool is_analyze)
14230 {
14231 dict_table_t* ib_table;
14232 ib_uint64_t n_rows;
14233 char path[FN_REFLEN];
14234 os_file_stat_t stat_info;
14235
14236 DBUG_ENTER("info");
14237
14238 DEBUG_SYNC_C("ha_innobase_info_low");
14239
14240 ut_ad(!mutex_own(&dict_sys->mutex));
14241
14242 /* If we are forcing recovery at a high level, we will suppress
14243 statistics calculation on tables, because that may crash the
14244 server if an index is badly corrupted. */
14245
14246 /* We do not know if MySQL can call this function before calling
14247 external_lock(). To be safe, update the thd of the current table
14248 handle. */
14249
14250 update_thd(ha_thd());
14251
14252 m_prebuilt->trx->op_info = "returning various info to MariaDB";
14253
14254 ib_table = m_prebuilt->table;
14255 DBUG_ASSERT(ib_table->get_ref_count() > 0);
14256
14257 if (!ib_table->is_readable()) {
14258 ib_table->stat_initialized = true;
14259 }
14260
14261 if (flag & HA_STATUS_TIME) {
14262 if (is_analyze || innobase_stats_on_metadata) {
14263
14264 dict_stats_upd_option_t opt;
14265 dberr_t ret;
14266
14267 m_prebuilt->trx->op_info = "updating table statistics";
14268
14269 if (dict_stats_is_persistent_enabled(ib_table)) {
14270
14271 if (is_analyze) {
14272 row_mysql_lock_data_dictionary(
14273 m_prebuilt->trx);
14274 dict_stats_recalc_pool_del(ib_table);
14275 dict_stats_wait_bg_to_stop_using_table(
14276 ib_table, m_prebuilt->trx);
14277 row_mysql_unlock_data_dictionary(
14278 m_prebuilt->trx);
14279 opt = DICT_STATS_RECALC_PERSISTENT;
14280 } else {
14281 /* This is e.g. 'SHOW INDEXES', fetch
14282 the persistent stats from disk. */
14283 opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
14284 }
14285 } else {
14286 opt = DICT_STATS_RECALC_TRANSIENT;
14287 }
14288
14289 ret = dict_stats_update(ib_table, opt);
14290
14291 if (opt == DICT_STATS_RECALC_PERSISTENT) {
14292 mutex_enter(&dict_sys->mutex);
14293 ib_table->stats_bg_flag
14294 &= byte(~BG_STAT_SHOULD_QUIT);
14295 mutex_exit(&dict_sys->mutex);
14296 }
14297
14298 if (ret != DB_SUCCESS) {
14299 m_prebuilt->trx->op_info = "";
14300 DBUG_RETURN(HA_ERR_GENERIC);
14301 }
14302
14303 m_prebuilt->trx->op_info =
14304 "returning various info to MariaDB";
14305 }
14306
14307
14308 stats.update_time = (ulong) ib_table->update_time;
14309 }
14310
14311 DBUG_EXECUTE_IF("dict_sys_mutex_avoid", goto func_exit;);
14312
14313 dict_stats_init(ib_table);
14314
14315 if (flag & HA_STATUS_VARIABLE) {
14316
14317 ulint stat_clustered_index_size;
14318 ulint stat_sum_of_other_index_sizes;
14319
14320 mutex_enter(&dict_sys->mutex);
14321
14322 ut_a(ib_table->stat_initialized);
14323
14324 n_rows = ib_table->stat_n_rows;
14325
14326 stat_clustered_index_size
14327 = ib_table->stat_clustered_index_size;
14328
14329 stat_sum_of_other_index_sizes
14330 = ib_table->stat_sum_of_other_index_sizes;
14331
14332 mutex_exit(&dict_sys->mutex);
14333
14334 /*
14335 The MySQL optimizer seems to assume in a left join that n_rows
14336 is an accurate estimate if it is zero. Of course, it is not,
14337 since we do not have any locks on the rows yet at this phase.
14338 Since SHOW TABLE STATUS seems to call this function with the
14339 HA_STATUS_TIME flag set, while the left join optimizer does not
14340 set that flag, we add one to a zero value if the flag is not
14341 set. That way SHOW TABLE STATUS will show the best estimate,
14342 while the optimizer never sees the table empty. */
14343
14344 if (n_rows == 0 && !(flag & (HA_STATUS_TIME | HA_STATUS_OPEN))) {
14345 n_rows++;
14346 }
14347
14348 /* Fix bug#40386: Not flushing query cache after truncate.
14349 n_rows can not be 0 unless the table is empty, set to 1
14350 instead. The original problem of bug#29507 is actually
14351 fixed in the server code. */
14352 if (thd_sql_command(m_user_thd) == SQLCOM_TRUNCATE) {
14353
14354 n_rows = 1;
14355
14356 /* We need to reset the m_prebuilt value too, otherwise
14357 checks for values greater than the last value written
14358 to the table will fail and the autoinc counter will
14359 not be updated. This will force write_row() into
14360 attempting an update of the table's AUTOINC counter. */
14361
14362 m_prebuilt->autoinc_last_value = 0;
14363 }
14364
14365 stats.records = (ha_rows) n_rows;
14366 stats.deleted = 0;
14367 if (fil_space_t* space = ib_table->space) {
14368 const ulint size = page_size_t(space->flags)
14369 .physical();
14370 stats.data_file_length
14371 = ulonglong(stat_clustered_index_size)
14372 * size;
14373 stats.index_file_length
14374 = ulonglong(stat_sum_of_other_index_sizes)
14375 * size;
14376 stats.delete_length = 1024
14377 * fsp_get_available_space_in_free_extents(
14378 *space);
14379 }
14380 stats.check_time = 0;
14381 stats.mrr_length_per_rec= (uint)ref_length + 8; // 8 = max(sizeof(void *));
14382
14383 if (stats.records == 0) {
14384 stats.mean_rec_length = 0;
14385 } else {
14386 stats.mean_rec_length = (ulong)
14387 (stats.data_file_length / stats.records);
14388 }
14389 }
14390
14391 if (flag & HA_STATUS_CONST) {
14392 ulong i;
14393 /* Verify the number of index in InnoDB and MySQL
14394 matches up. If m_prebuilt->clust_index_was_generated
14395 holds, InnoDB defines GEN_CLUST_INDEX internally */
14396 ulint num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes)
14397 - m_prebuilt->clust_index_was_generated;
14398 if (table->s->keys < num_innodb_index) {
14399 /* If there are too many indexes defined
14400 inside InnoDB, ignore those that are being
14401 created, because MySQL will only consider
14402 the fully built indexes here. */
14403
14404 for (const dict_index_t* index
14405 = UT_LIST_GET_FIRST(ib_table->indexes);
14406 index != NULL;
14407 index = UT_LIST_GET_NEXT(indexes, index)) {
14408
14409 /* First, online index creation is
14410 completed inside InnoDB, and then
14411 MySQL attempts to upgrade the
14412 meta-data lock so that it can rebuild
14413 the .frm file. If we get here in that
14414 time frame, dict_index_is_online_ddl()
14415 would not hold and the index would
14416 still not be included in TABLE_SHARE. */
14417 if (!index->is_committed()) {
14418 num_innodb_index--;
14419 }
14420 }
14421
14422 if (table->s->keys < num_innodb_index
14423 && innobase_fts_check_doc_id_index(
14424 ib_table, NULL, NULL)
14425 == FTS_EXIST_DOC_ID_INDEX) {
14426 num_innodb_index--;
14427 }
14428 }
14429
14430 if (table->s->keys != num_innodb_index) {
14431 ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
14432 ib_push_frm_error(m_user_thd, ib_table, table, num_innodb_index, true);
14433 }
14434
14435
14436 snprintf(path, sizeof(path), "%s/%s%s",
14437 mysql_data_home, table->s->normalized_path.str,
14438 reg_ext);
14439
14440 unpack_filename(path,path);
14441
14442 /* Note that we do not know the access time of the table,
14443 nor the CHECK TABLE time, nor the UPDATE or INSERT time. */
14444
14445 if (os_file_get_status(
14446 path, &stat_info, false,
14447 srv_read_only_mode) == DB_SUCCESS) {
14448 stats.create_time = (ulong) stat_info.ctime;
14449 }
14450
14451 struct Locking {
14452 Locking() { mutex_enter(&dict_sys->mutex); }
14453 ~Locking() { mutex_exit(&dict_sys->mutex); }
14454 } locking;
14455
14456 ut_a(ib_table->stat_initialized);
14457
14458 for (i = 0; i < table->s->keys; i++) {
14459 ulong j;
14460
14461 dict_index_t* index = innobase_get_index(i);
14462
14463 if (index == NULL) {
14464 ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
14465 ib_push_frm_error(m_user_thd, ib_table, table, num_innodb_index, true);
14466 break;
14467 }
14468
14469 KEY* key = &table->key_info[i];
14470
14471 for (j = 0; j < key->ext_key_parts; j++) {
14472
14473 if ((key->flags & HA_FULLTEXT)
14474 || (key->flags & HA_SPATIAL)) {
14475
14476 /* The record per key does not apply to
14477 FTS or Spatial indexes. */
14478 /*
14479 key->rec_per_key[j] = 1;
14480 key->set_records_per_key(j, 1.0);
14481 */
14482 continue;
14483 }
14484
14485 if (j + 1 > index->n_uniq) {
14486 sql_print_error(
14487 "Index %s of %s has %u columns"
14488 " unique inside InnoDB, but "
14489 "MySQL is asking statistics for"
14490 " %lu columns. Have you mixed "
14491 "up .frm files from different "
14492 " installations? %s",
14493 index->name(),
14494 ib_table->name.m_name,
14495 index->n_uniq, j + 1,
14496 TROUBLESHOOTING_MSG);
14497 break;
14498 }
14499
14500 /* innodb_rec_per_key() will use
14501 index->stat_n_diff_key_vals[] and the value we
14502 pass index->table->stat_n_rows. Both are
14503 calculated by ANALYZE and by the background
14504 stats gathering thread (which kicks in when too
14505 much of the table has been changed). In
14506 addition table->stat_n_rows is adjusted with
14507 each DML (e.g. ++ on row insert). Those
14508 adjustments are not MVCC'ed and not even
14509 reversed on rollback. So,
14510 index->stat_n_diff_key_vals[] and
14511 index->table->stat_n_rows could have been
14512 calculated at different time. This is
14513 acceptable. */
14514
14515 ulong rec_per_key_int = static_cast<ulong>(
14516 innodb_rec_per_key(index, j,
14517 stats.records));
14518
14519 /* Since MySQL seems to favor table scans
14520 too much over index searches, we pretend
14521 index selectivity is 2 times better than
14522 our estimate: */
14523
14524 rec_per_key_int = rec_per_key_int / 2;
14525
14526 if (rec_per_key_int == 0) {
14527 rec_per_key_int = 1;
14528 }
14529
14530 key->rec_per_key[j] = rec_per_key_int;
14531 }
14532 }
14533 }
14534
14535 if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
14536
14537 goto func_exit;
14538
14539 } else if (flag & HA_STATUS_ERRKEY) {
14540 const dict_index_t* err_index;
14541
14542 ut_a(m_prebuilt->trx);
14543 ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
14544
14545 err_index = trx_get_error_info(m_prebuilt->trx);
14546
14547 if (err_index) {
14548 errkey = innobase_get_mysql_key_number_for_index(
14549 table, ib_table, err_index);
14550 } else {
14551 errkey = (unsigned int) (
14552 (m_prebuilt->trx->error_key_num
14553 == ULINT_UNDEFINED)
14554 ? ~0U
14555 : m_prebuilt->trx->error_key_num);
14556 }
14557 }
14558
14559 if ((flag & HA_STATUS_AUTO) && table->found_next_number_field) {
14560 stats.auto_increment_value = innobase_peek_autoinc();
14561 }
14562
14563 func_exit:
14564 m_prebuilt->trx->op_info = (char*)"";
14565
14566 DBUG_RETURN(0);
14567 }
14568
14569 /*********************************************************************//**
14570 Returns statistics information of the table to the MySQL interpreter,
14571 in various fields of the handle object.
14572 @return HA_ERR_* error code or 0 */
14573
14574 int
info(uint flag)14575 ha_innobase::info(
14576 /*==============*/
14577 uint flag) /*!< in: what information is requested */
14578 {
14579 return(info_low(flag, false /* not ANALYZE */));
14580 }
14581
14582 /*
14583 Updates index cardinalities of the table, based on random dives into
14584 each index tree. This does NOT calculate exact statistics on the table.
14585 @return HA_ADMIN_* error code or HA_ADMIN_OK */
14586
14587 int
analyze(THD *,HA_CHECK_OPT *)14588 ha_innobase::analyze(THD*, HA_CHECK_OPT*)
14589 {
14590 /* Simply call info_low() with all the flags
14591 and request recalculation of the statistics */
14592 int ret = info_low(
14593 HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE,
14594 true /* this is ANALYZE */);
14595
14596 if (ret != 0) {
14597 return(HA_ADMIN_FAILED);
14598 }
14599
14600 return(HA_ADMIN_OK);
14601 }
14602
14603 /*****************************************************************//**
14604 Defragment table.
14605 @return error number */
defragment_table(const char * name)14606 inline int ha_innobase::defragment_table(const char *name)
14607 {
14608 char norm_name[FN_REFLEN];
14609 dict_table_t* table = NULL;
14610 dict_index_t* index = NULL;
14611 int ret = 0;
14612 dberr_t err = DB_SUCCESS;
14613
14614 normalize_table_name(norm_name, name);
14615
14616 table = dict_table_open_on_name(norm_name, FALSE,
14617 FALSE, DICT_ERR_IGNORE_FK_NOKEY);
14618
14619 for (index = dict_table_get_first_index(table); index;
14620 index = dict_table_get_next_index(index)) {
14621
14622 if (index->is_corrupted()) {
14623 continue;
14624 }
14625
14626 if (dict_index_is_spatial(index)) {
14627 /* Do not try to defragment spatial indexes,
14628 because doing it properly would require
14629 appropriate logic around the SSN (split
14630 sequence number). */
14631 continue;
14632 }
14633
14634 if (index->page == FIL_NULL) {
14635 /* Do not defragment auxiliary tables related
14636 to FULLTEXT INDEX. */
14637 ut_ad(index->type & DICT_FTS);
14638 continue;
14639 }
14640
14641 if (btr_defragment_find_index(index)) {
14642 // We borrow this error code. When the same index is
14643 // already in the defragmentation queue, issue another
14644 // defragmentation only introduces overhead. We return
14645 // an error here to let the user know this is not
14646 // necessary. Note that this will fail a query that's
14647 // trying to defragment a full table if one of the
14648 // indicies in that table is already in defragmentation.
14649 // We choose this behavior so user is aware of this
14650 // rather than silently defragment other indicies of
14651 // that table.
14652 ret = ER_SP_ALREADY_EXISTS;
14653 break;
14654 }
14655
14656 os_event_t event = btr_defragment_add_index(index, &err);
14657
14658 if (err != DB_SUCCESS) {
14659 push_warning_printf(
14660 current_thd,
14661 Sql_condition::WARN_LEVEL_WARN,
14662 ER_NO_SUCH_TABLE,
14663 "Table %s is encrypted but encryption service or"
14664 " used key_id is not available. "
14665 " Can't continue checking table.",
14666 index->table->name.m_name);
14667
14668 ret = convert_error_code_to_mysql(err, 0, current_thd);
14669 break;
14670 }
14671
14672 if (event) {
14673 while(os_event_wait_time(event, 1000000)) {
14674 if (thd_killed(current_thd)) {
14675 btr_defragment_remove_index(index);
14676 ret = ER_QUERY_INTERRUPTED;
14677 break;
14678 }
14679 }
14680 os_event_destroy(event);
14681 }
14682
14683 if (ret) {
14684 break;
14685 }
14686 }
14687
14688 dict_table_close(table, FALSE, FALSE);
14689 return ret;
14690 }
14691
14692 /**********************************************************************//**
14693 This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
14694 the table in MySQL. */
14695
14696 int
optimize(THD * thd,HA_CHECK_OPT *)14697 ha_innobase::optimize(
14698 /*==================*/
14699 THD* thd, /*!< in: connection thread handle */
14700 HA_CHECK_OPT*)
14701 {
14702
14703 /* FTS-FIXME: Since MySQL doesn't support engine-specific commands,
14704 we have to hijack some existing command in order to be able to test
14705 the new admin commands added in InnoDB's FTS support. For now, we
14706 use MySQL's OPTIMIZE command, normally mapped to ALTER TABLE in
14707 InnoDB (so it recreates the table anew), and map it to OPTIMIZE.
14708
14709 This works OK otherwise, but MySQL locks the entire table during
14710 calls to OPTIMIZE, which is undesirable. */
14711 bool try_alter = true;
14712
14713 if (!m_prebuilt->table->is_temporary() && srv_defragment) {
14714 int err = defragment_table(m_prebuilt->table->name.m_name);
14715
14716 if (err == 0) {
14717 try_alter = false;
14718 } else {
14719 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
14720 uint(err),
14721 "InnoDB: Cannot defragment table %s: returned error code %d\n",
14722 m_prebuilt->table->name.m_name, err);
14723
14724 if(err == ER_SP_ALREADY_EXISTS) {
14725 try_alter = false;
14726 }
14727 }
14728 }
14729
14730 if (innodb_optimize_fulltext_only) {
14731 if (m_prebuilt->table->fts && m_prebuilt->table->fts->cache
14732 && m_prebuilt->table->space) {
14733 fts_sync_table(m_prebuilt->table);
14734 fts_optimize_table(m_prebuilt->table);
14735 }
14736 try_alter = false;
14737 }
14738
14739 return try_alter ? HA_ADMIN_TRY_ALTER : HA_ADMIN_OK;
14740 }
14741
14742 /*******************************************************************//**
14743 Tries to check that an InnoDB table is not corrupted. If corruption is
14744 noticed, prints to stderr information about it. In case of corruption
14745 may also assert a failure and crash the server.
14746 @return HA_ADMIN_CORRUPT or HA_ADMIN_OK */
14747
14748 int
check(THD * thd,HA_CHECK_OPT * check_opt)14749 ha_innobase::check(
14750 /*===============*/
14751 THD* thd, /*!< in: user thread handle */
14752 HA_CHECK_OPT* check_opt) /*!< in: check options */
14753 {
14754 dict_index_t* index;
14755 ulint n_rows;
14756 ulint n_rows_in_table = ULINT_UNDEFINED;
14757 bool is_ok = true;
14758 ulint old_isolation_level;
14759 dberr_t ret;
14760
14761 DBUG_ENTER("ha_innobase::check");
14762 DBUG_ASSERT(thd == ha_thd());
14763 ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
14764 ut_a(m_prebuilt->trx == thd_to_trx(thd));
14765
14766 if (m_prebuilt->mysql_template == NULL) {
14767 /* Build the template; we will use a dummy template
14768 in index scans done in checking */
14769
14770 build_template(true);
14771 }
14772
14773 if (!m_prebuilt->table->space) {
14774
14775 ib_senderrf(
14776 thd,
14777 IB_LOG_LEVEL_ERROR,
14778 ER_TABLESPACE_DISCARDED,
14779 table->s->table_name.str);
14780
14781 DBUG_RETURN(HA_ADMIN_CORRUPT);
14782
14783 } else if (!m_prebuilt->table->is_readable() &&
14784 !m_prebuilt->table->space) {
14785
14786 ib_senderrf(
14787 thd, IB_LOG_LEVEL_ERROR,
14788 ER_TABLESPACE_MISSING,
14789 table->s->table_name.str);
14790
14791 DBUG_RETURN(HA_ADMIN_CORRUPT);
14792 }
14793
14794 m_prebuilt->trx->op_info = "checking table";
14795
14796 if (m_prebuilt->table->corrupted) {
14797 /* If some previous operation has marked the table as
14798 corrupted in memory, and has not propagated such to
14799 clustered index, we will do so here */
14800 index = dict_table_get_first_index(m_prebuilt->table);
14801
14802 if (!index->is_corrupted()) {
14803 dict_set_corrupted(
14804 index, m_prebuilt->trx, "CHECK TABLE");
14805 }
14806
14807 push_warning_printf(m_user_thd,
14808 Sql_condition::WARN_LEVEL_WARN,
14809 HA_ERR_INDEX_CORRUPT,
14810 "InnoDB: Index %s is marked as"
14811 " corrupted",
14812 index->name());
14813
14814 /* Now that the table is already marked as corrupted,
14815 there is no need to check any index of this table */
14816 m_prebuilt->trx->op_info = "";
14817
14818 DBUG_RETURN(HA_ADMIN_CORRUPT);
14819 }
14820
14821 old_isolation_level = m_prebuilt->trx->isolation_level;
14822
14823 /* We must run the index record counts at an isolation level
14824 >= READ COMMITTED, because a dirty read can see a wrong number
14825 of records in some index; to play safe, we normally use
14826 REPEATABLE READ here */
14827 m_prebuilt->trx->isolation_level = high_level_read_only
14828 ? TRX_ISO_READ_UNCOMMITTED
14829 : TRX_ISO_REPEATABLE_READ;
14830
14831 ut_ad(!m_prebuilt->table->corrupted);
14832
14833 for (index = dict_table_get_first_index(m_prebuilt->table);
14834 index != NULL;
14835 index = dict_table_get_next_index(index)) {
14836 /* If this is an index being created or dropped, skip */
14837 if (!index->is_committed()) {
14838 continue;
14839 }
14840
14841 if (!(check_opt->flags & T_QUICK)
14842 && !index->is_corrupted()) {
14843 /* Enlarge the fatal lock wait timeout during
14844 CHECK TABLE. */
14845 my_atomic_addlong(
14846 &srv_fatal_semaphore_wait_threshold,
14847 SRV_SEMAPHORE_WAIT_EXTENSION);
14848
14849 dberr_t err = btr_validate_index(
14850 index, m_prebuilt->trx, false);
14851
14852 /* Restore the fatal lock wait timeout after
14853 CHECK TABLE. */
14854 my_atomic_addlong(
14855 &srv_fatal_semaphore_wait_threshold,
14856 -SRV_SEMAPHORE_WAIT_EXTENSION);
14857
14858 if (err != DB_SUCCESS) {
14859 is_ok = false;
14860
14861 if (err == DB_DECRYPTION_FAILED) {
14862 push_warning_printf(
14863 thd,
14864 Sql_condition::WARN_LEVEL_WARN,
14865 ER_NO_SUCH_TABLE,
14866 "Table %s is encrypted but encryption service or"
14867 " used key_id is not available. "
14868 " Can't continue checking table.",
14869 index->table->name.m_name);
14870 } else {
14871 push_warning_printf(
14872 thd,
14873 Sql_condition::WARN_LEVEL_WARN,
14874 ER_NOT_KEYFILE,
14875 "InnoDB: The B-tree of"
14876 " index %s is corrupted.",
14877 index->name());
14878 }
14879
14880 continue;
14881 }
14882 }
14883
14884 /* Instead of invoking change_active_index(), set up
14885 a dummy template for non-locking reads, disabling
14886 access to the clustered index. */
14887 m_prebuilt->index = index;
14888
14889 m_prebuilt->index_usable = row_merge_is_index_usable(
14890 m_prebuilt->trx, m_prebuilt->index);
14891
14892 DBUG_EXECUTE_IF(
14893 "dict_set_index_corrupted",
14894 if (!index->is_primary()) {
14895 m_prebuilt->index_usable = FALSE;
14896 // row_mysql_lock_data_dictionary(m_prebuilt->trx);
14897 dict_set_corrupted(index, m_prebuilt->trx, "dict_set_index_corrupted");
14898 // row_mysql_unlock_data_dictionary(m_prebuilt->trx);
14899 });
14900
14901 if (UNIV_UNLIKELY(!m_prebuilt->index_usable)) {
14902 if (index->is_corrupted()) {
14903 push_warning_printf(
14904 m_user_thd,
14905 Sql_condition::WARN_LEVEL_WARN,
14906 HA_ERR_INDEX_CORRUPT,
14907 "InnoDB: Index %s is marked as"
14908 " corrupted",
14909 index->name());
14910 is_ok = false;
14911 } else {
14912 push_warning_printf(
14913 m_user_thd,
14914 Sql_condition::WARN_LEVEL_WARN,
14915 HA_ERR_TABLE_DEF_CHANGED,
14916 "InnoDB: Insufficient history for"
14917 " index %s",
14918 index->name());
14919 }
14920 continue;
14921 }
14922
14923 m_prebuilt->sql_stat_start = TRUE;
14924 m_prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
14925 m_prebuilt->n_template = 0;
14926 m_prebuilt->need_to_access_clustered = FALSE;
14927
14928 dtuple_set_n_fields(m_prebuilt->search_tuple, 0);
14929
14930 m_prebuilt->select_lock_type = LOCK_NONE;
14931
14932 /* Scan this index. */
14933 if (dict_index_is_spatial(index)) {
14934 ret = row_count_rtree_recs(m_prebuilt, &n_rows);
14935 } else {
14936 ret = row_scan_index_for_mysql(
14937 m_prebuilt, index, &n_rows);
14938 }
14939
14940 DBUG_EXECUTE_IF(
14941 "dict_set_index_corrupted",
14942 if (!index->is_primary()) {
14943 ret = DB_CORRUPTION;
14944 });
14945
14946 if (ret == DB_INTERRUPTED || thd_killed(m_user_thd)) {
14947 /* Do not report error since this could happen
14948 during shutdown */
14949 break;
14950 }
14951 if (ret != DB_SUCCESS) {
14952 /* Assume some kind of corruption. */
14953 push_warning_printf(
14954 thd, Sql_condition::WARN_LEVEL_WARN,
14955 ER_NOT_KEYFILE,
14956 "InnoDB: The B-tree of"
14957 " index %s is corrupted.",
14958 index->name());
14959 is_ok = false;
14960 dict_set_corrupted(
14961 index, m_prebuilt->trx, "CHECK TABLE-check index");
14962 }
14963
14964
14965 if (index == dict_table_get_first_index(m_prebuilt->table)) {
14966 n_rows_in_table = n_rows;
14967 } else if (!(index->type & DICT_FTS)
14968 && (n_rows != n_rows_in_table)) {
14969 push_warning_printf(
14970 thd, Sql_condition::WARN_LEVEL_WARN,
14971 ER_NOT_KEYFILE,
14972 "InnoDB: Index '%-.200s' contains " ULINTPF
14973 " entries, should be " ULINTPF ".",
14974 index->name(), n_rows, n_rows_in_table);
14975 is_ok = false;
14976 dict_set_corrupted(
14977 index, m_prebuilt->trx,
14978 "CHECK TABLE; Wrong count");
14979 }
14980 }
14981
14982 /* Restore the original isolation level */
14983 m_prebuilt->trx->isolation_level = old_isolation_level;
14984 #ifdef BTR_CUR_HASH_ADAPT
14985 # if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
14986 /* We validate the whole adaptive hash index for all tables
14987 at every CHECK TABLE only when QUICK flag is not present. */
14988
14989 if (!(check_opt->flags & T_QUICK) && !btr_search_validate()) {
14990 push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
14991 ER_NOT_KEYFILE,
14992 "InnoDB: The adaptive hash index is corrupted.");
14993 is_ok = false;
14994 }
14995 # endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
14996 #endif /* BTR_CUR_HASH_ADAPT */
14997 m_prebuilt->trx->op_info = "";
14998
14999 DBUG_RETURN(is_ok ? HA_ADMIN_OK : HA_ADMIN_CORRUPT);
15000 }
15001
15002 /*******************************************************************//**
15003 Gets the foreign key create info for a table stored in InnoDB.
15004 @return own: character string in the form which can be inserted to the
15005 CREATE TABLE statement, MUST be freed with
15006 ha_innobase::free_foreign_key_create_info */
15007
15008 char*
get_foreign_key_create_info(void)15009 ha_innobase::get_foreign_key_create_info(void)
15010 /*==========================================*/
15011 {
15012 ut_a(m_prebuilt != NULL);
15013
15014 /* We do not know if MySQL can call this function before calling
15015 external_lock(). To be safe, update the thd of the current table
15016 handle. */
15017
15018 update_thd(ha_thd());
15019
15020 m_prebuilt->trx->op_info = "getting info on foreign keys";
15021
15022 /* Output the data to a temporary string */
15023 std::string str = dict_print_info_on_foreign_keys(
15024 TRUE, m_prebuilt->trx,
15025 m_prebuilt->table);
15026
15027 m_prebuilt->trx->op_info = "";
15028
15029 /* Allocate buffer for the string */
15030 char* fk_str = (char*) my_malloc(str.length() + 1, MYF(0));
15031
15032 /* JAN: TODO: MySQL 5.7
15033 fk_str = reinterpret_cast<char*>(
15034 my_malloc(PSI_INSTRUMENT_ME, str.length() + 1, MYF(0)));
15035 */
15036
15037
15038
15039 if (fk_str) {
15040 memcpy(fk_str, str.c_str(), str.length());
15041 fk_str[str.length()]='\0';
15042 }
15043
15044 return(fk_str);
15045 }
15046
15047
15048 /***********************************************************************//**
15049 Maps a InnoDB foreign key constraint to a equivalent MySQL foreign key info.
15050 @return pointer to foreign key info */
15051 static
15052 FOREIGN_KEY_INFO*
get_foreign_key_info(THD * thd,dict_foreign_t * foreign)15053 get_foreign_key_info(
15054 /*=================*/
15055 THD* thd, /*!< in: user thread handle */
15056 dict_foreign_t* foreign)/*!< in: foreign key constraint */
15057 {
15058 FOREIGN_KEY_INFO f_key_info;
15059 FOREIGN_KEY_INFO* pf_key_info;
15060 uint i = 0;
15061 size_t len;
15062 char tmp_buff[NAME_LEN+1];
15063 char name_buff[NAME_LEN+1];
15064 const char* ptr;
15065 LEX_CSTRING* referenced_key_name;
15066 LEX_CSTRING* name = NULL;
15067
15068 if (dict_table_t::is_temporary_name(foreign->foreign_table_name)) {
15069 return NULL;
15070 }
15071
15072 ptr = dict_remove_db_name(foreign->id);
15073 f_key_info.foreign_id = thd_make_lex_string(
15074 thd, 0, ptr, strlen(ptr), 1);
15075
15076 /* Name format: database name, '/', table name, '\0' */
15077
15078 /* Referenced (parent) database name */
15079 len = dict_get_db_name_len(foreign->referenced_table_name);
15080 ut_a(len < sizeof(tmp_buff));
15081 ut_memcpy(tmp_buff, foreign->referenced_table_name, len);
15082 tmp_buff[len] = 0;
15083
15084 len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
15085 f_key_info.referenced_db = thd_make_lex_string(
15086 thd, 0, name_buff, len, 1);
15087
15088 /* Referenced (parent) table name */
15089 ptr = dict_remove_db_name(foreign->referenced_table_name);
15090 len = filename_to_tablename(ptr, name_buff, sizeof(name_buff), 1);
15091 f_key_info.referenced_table = thd_make_lex_string(
15092 thd, 0, name_buff, len, 1);
15093
15094 /* Dependent (child) database name */
15095 len = dict_get_db_name_len(foreign->foreign_table_name);
15096 ut_a(len < sizeof(tmp_buff));
15097 ut_memcpy(tmp_buff, foreign->foreign_table_name, len);
15098 tmp_buff[len] = 0;
15099
15100 len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
15101 f_key_info.foreign_db = thd_make_lex_string(
15102 thd, 0, name_buff, len, 1);
15103
15104 /* Dependent (child) table name */
15105 ptr = dict_remove_db_name(foreign->foreign_table_name);
15106 len = filename_to_tablename(ptr, name_buff, sizeof(name_buff), 1);
15107 f_key_info.foreign_table = thd_make_lex_string(
15108 thd, 0, name_buff, len, 1);
15109
15110 do {
15111 ptr = foreign->foreign_col_names[i];
15112 name = thd_make_lex_string(thd, name, ptr,
15113 strlen(ptr), 1);
15114 f_key_info.foreign_fields.push_back(name);
15115 ptr = foreign->referenced_col_names[i];
15116 name = thd_make_lex_string(thd, name, ptr,
15117 strlen(ptr), 1);
15118 f_key_info.referenced_fields.push_back(name);
15119 } while (++i < foreign->n_fields);
15120
15121 if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE) {
15122 f_key_info.delete_method = FK_OPTION_CASCADE;
15123 } else if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) {
15124 f_key_info.delete_method = FK_OPTION_SET_NULL;
15125 } else if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) {
15126 f_key_info.delete_method = FK_OPTION_NO_ACTION;
15127 } else {
15128 f_key_info.delete_method = FK_OPTION_RESTRICT;
15129 }
15130
15131
15132 if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) {
15133 f_key_info.update_method = FK_OPTION_CASCADE;
15134 } else if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) {
15135 f_key_info.update_method = FK_OPTION_SET_NULL;
15136 } else if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) {
15137 f_key_info.update_method = FK_OPTION_NO_ACTION;
15138 } else {
15139 f_key_info.update_method = FK_OPTION_RESTRICT;
15140 }
15141
15142 /* Load referenced table to update FK referenced key name. */
15143 if (foreign->referenced_table == NULL) {
15144
15145 dict_table_t* ref_table;
15146
15147 ut_ad(mutex_own(&dict_sys->mutex));
15148 ref_table = dict_table_open_on_name(
15149 foreign->referenced_table_name_lookup,
15150 TRUE, FALSE, DICT_ERR_IGNORE_NONE);
15151
15152 if (ref_table == NULL) {
15153
15154 if (!thd_test_options(
15155 thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
15156 ib::info()
15157 << "Foreign Key referenced table "
15158 << foreign->referenced_table_name
15159 << " not found for foreign table "
15160 << foreign->foreign_table_name;
15161 }
15162 } else {
15163
15164 dict_table_close(ref_table, TRUE, FALSE);
15165 }
15166 }
15167
15168 if (foreign->referenced_index
15169 && foreign->referenced_index->name != NULL) {
15170 referenced_key_name = thd_make_lex_string(
15171 thd,
15172 f_key_info.referenced_key_name,
15173 foreign->referenced_index->name,
15174 strlen(foreign->referenced_index->name),
15175 1);
15176 } else {
15177 referenced_key_name = NULL;
15178 }
15179
15180 f_key_info.referenced_key_name = referenced_key_name;
15181
15182 pf_key_info = (FOREIGN_KEY_INFO*) thd_memdup(thd, &f_key_info,
15183 sizeof(FOREIGN_KEY_INFO));
15184
15185 return(pf_key_info);
15186 }
15187
15188 /*******************************************************************//**
15189 Gets the list of foreign keys in this table.
15190 @return always 0, that is, always succeeds */
15191
15192 int
get_foreign_key_list(THD * thd,List<FOREIGN_KEY_INFO> * f_key_list)15193 ha_innobase::get_foreign_key_list(
15194 /*==============================*/
15195 THD* thd, /*!< in: user thread handle */
15196 List<FOREIGN_KEY_INFO>* f_key_list) /*!< out: foreign key list */
15197 {
15198 update_thd(ha_thd());
15199
15200 m_prebuilt->trx->op_info = "getting list of foreign keys";
15201
15202 mutex_enter(&dict_sys->mutex);
15203
15204 for (dict_foreign_set::iterator it
15205 = m_prebuilt->table->foreign_set.begin();
15206 it != m_prebuilt->table->foreign_set.end();
15207 ++it) {
15208
15209 FOREIGN_KEY_INFO* pf_key_info;
15210 dict_foreign_t* foreign = *it;
15211
15212 pf_key_info = get_foreign_key_info(thd, foreign);
15213
15214 if (pf_key_info != NULL) {
15215 f_key_list->push_back(pf_key_info);
15216 }
15217 }
15218
15219 mutex_exit(&dict_sys->mutex);
15220
15221 m_prebuilt->trx->op_info = "";
15222
15223 return(0);
15224 }
15225
15226 /*******************************************************************//**
15227 Gets the set of foreign keys where this table is the referenced table.
15228 @return always 0, that is, always succeeds */
15229
15230 int
get_parent_foreign_key_list(THD * thd,List<FOREIGN_KEY_INFO> * f_key_list)15231 ha_innobase::get_parent_foreign_key_list(
15232 /*=====================================*/
15233 THD* thd, /*!< in: user thread handle */
15234 List<FOREIGN_KEY_INFO>* f_key_list) /*!< out: foreign key list */
15235 {
15236 update_thd(ha_thd());
15237
15238 m_prebuilt->trx->op_info = "getting list of referencing foreign keys";
15239
15240 mutex_enter(&dict_sys->mutex);
15241
15242 for (dict_foreign_set::iterator it
15243 = m_prebuilt->table->referenced_set.begin();
15244 it != m_prebuilt->table->referenced_set.end();
15245 ++it) {
15246
15247 FOREIGN_KEY_INFO* pf_key_info;
15248 dict_foreign_t* foreign = *it;
15249
15250 pf_key_info = get_foreign_key_info(thd, foreign);
15251
15252 if (pf_key_info != NULL) {
15253 f_key_list->push_back(pf_key_info);
15254 }
15255 }
15256
15257 mutex_exit(&dict_sys->mutex);
15258
15259 m_prebuilt->trx->op_info = "";
15260
15261 return(0);
15262 }
15263
15264 /** Table list item structure is used to store only the table
15265 and name. It is used by get_cascade_foreign_key_table_list to store
15266 the intermediate result for fetching the table set. */
15267 struct table_list_item {
15268 /** InnoDB table object */
15269 const dict_table_t* table;
15270 /** Table name */
15271 const char* name;
15272 };
15273
15274 /** Structure to compare two st_tablename objects using their
15275 db and tablename. It is used in the ordering of cascade_fk_set.
15276 It returns true if the first argument precedes the second argument
15277 and false otherwise. */
15278 struct tablename_compare {
15279
operator ()tablename_compare15280 bool operator()(const st_handler_tablename lhs,
15281 const st_handler_tablename rhs) const
15282 {
15283 int cmp = strcmp(lhs.db, rhs.db);
15284 if (cmp == 0) {
15285 cmp = strcmp(lhs.tablename, rhs.tablename);
15286 }
15287
15288 return(cmp < 0);
15289 }
15290 };
15291
15292 /** Get the table name and database name for the given table.
15293 @param[in,out] thd user thread handle
15294 @param[out] f_key_info pointer to table_name_info object
15295 @param[in] foreign foreign key constraint. */
15296 static
15297 void
get_table_name_info(THD * thd,st_handler_tablename * f_key_info,const dict_foreign_t * foreign)15298 get_table_name_info(
15299 THD* thd,
15300 st_handler_tablename* f_key_info,
15301 const dict_foreign_t* foreign)
15302 {
15303 #define FILENAME_CHARSET_MBMAXLEN 5
15304 char tmp_buff[NAME_CHAR_LEN * FILENAME_CHARSET_MBMAXLEN + 1];
15305 char name_buff[NAME_CHAR_LEN * FILENAME_CHARSET_MBMAXLEN + 1];
15306 const char* ptr;
15307
15308 size_t len = dict_get_db_name_len(
15309 foreign->referenced_table_name_lookup);
15310 ut_memcpy(tmp_buff, foreign->referenced_table_name_lookup, len);
15311 tmp_buff[len] = 0;
15312
15313 ut_ad(len < sizeof(tmp_buff));
15314
15315 len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
15316 f_key_info->db = thd_strmake(thd, name_buff, len);
15317
15318 ptr = dict_remove_db_name(foreign->referenced_table_name_lookup);
15319 len = filename_to_tablename(ptr, name_buff, sizeof(name_buff));
15320 f_key_info->tablename = thd_strmake(thd, name_buff, len);
15321 }
15322
15323 /** Get the list of tables ordered by the dependency on the other tables using
15324 the 'CASCADE' foreign key constraint.
15325 @param[in,out] thd user thread handle
15326 @param[out] fk_table_list set of tables name info for the
15327 dependent table
15328 @retval 0 for success. */
15329 int
get_cascade_foreign_key_table_list(THD * thd,List<st_handler_tablename> * fk_table_list)15330 ha_innobase::get_cascade_foreign_key_table_list(
15331 THD* thd,
15332 List<st_handler_tablename>* fk_table_list)
15333 {
15334 m_prebuilt->trx->op_info = "getting cascading foreign keys";
15335
15336 std::list<table_list_item, ut_allocator<table_list_item> > table_list;
15337
15338 typedef std::set<st_handler_tablename, tablename_compare,
15339 ut_allocator<st_handler_tablename> > cascade_fk_set;
15340
15341 cascade_fk_set fk_set;
15342
15343 mutex_enter(&dict_sys->mutex);
15344
15345 /* Initialize the table_list with prebuilt->table name. */
15346 struct table_list_item item = {m_prebuilt->table,
15347 m_prebuilt->table->name.m_name};
15348
15349 table_list.push_back(item);
15350
15351 /* Get the parent table, grand parent table info from the
15352 table list by depth-first traversal. */
15353 do {
15354 const dict_table_t* parent_table;
15355 dict_table_t* parent = NULL;
15356 std::pair<cascade_fk_set::iterator,bool> ret;
15357
15358 item = table_list.back();
15359 table_list.pop_back();
15360 parent_table = item.table;
15361
15362 if (parent_table == NULL) {
15363
15364 ut_ad(item.name != NULL);
15365
15366 parent_table = parent = dict_table_open_on_name(
15367 item.name, TRUE, FALSE,
15368 DICT_ERR_IGNORE_NONE);
15369
15370 if (parent_table == NULL) {
15371 /* foreign_key_checks is or was probably
15372 disabled; ignore the constraint */
15373 continue;
15374 }
15375 }
15376
15377 for (dict_foreign_set::const_iterator it =
15378 parent_table->foreign_set.begin();
15379 it != parent_table->foreign_set.end(); ++it) {
15380
15381 const dict_foreign_t* foreign = *it;
15382 st_handler_tablename f1;
15383
15384 /* Skip the table if there is no
15385 cascading operation. */
15386 if (0 == (foreign->type
15387 & ~(DICT_FOREIGN_ON_DELETE_NO_ACTION
15388 | DICT_FOREIGN_ON_UPDATE_NO_ACTION))) {
15389 continue;
15390 }
15391
15392 if (foreign->referenced_table_name_lookup != NULL) {
15393 get_table_name_info(thd, &f1, foreign);
15394 ret = fk_set.insert(f1);
15395
15396 /* Ignore the table if it is already
15397 in the set. */
15398 if (!ret.second) {
15399 continue;
15400 }
15401
15402 struct table_list_item item1 = {
15403 foreign->referenced_table,
15404 foreign->referenced_table_name_lookup};
15405
15406 table_list.push_back(item1);
15407
15408 st_handler_tablename* fk_table =
15409 (st_handler_tablename*) thd_memdup(
15410 thd, &f1, sizeof(*fk_table));
15411
15412 fk_table_list->push_back(fk_table);
15413 }
15414 }
15415
15416 if (parent != NULL) {
15417 dict_table_close(parent, true, false);
15418 }
15419
15420 } while(!table_list.empty());
15421
15422 mutex_exit(&dict_sys->mutex);
15423
15424 m_prebuilt->trx->op_info = "";
15425
15426 return(0);
15427 }
15428
15429 /*****************************************************************//**
15430 Checks if ALTER TABLE may change the storage engine of the table.
15431 Changing storage engines is not allowed for tables for which there
15432 are foreign key constraints (parent or child tables).
15433 @return TRUE if can switch engines */
15434
15435 bool
can_switch_engines(void)15436 ha_innobase::can_switch_engines(void)
15437 /*=================================*/
15438 {
15439 DBUG_ENTER("ha_innobase::can_switch_engines");
15440
15441 update_thd();
15442
15443 m_prebuilt->trx->op_info =
15444 "determining if there are foreign key constraints";
15445
15446 row_mysql_freeze_data_dictionary(m_prebuilt->trx);
15447
15448 bool can_switch = m_prebuilt->table->referenced_set.empty()
15449 && m_prebuilt->table->foreign_set.empty();
15450
15451 row_mysql_unfreeze_data_dictionary(m_prebuilt->trx);
15452 m_prebuilt->trx->op_info = "";
15453
15454 DBUG_RETURN(can_switch);
15455 }
15456
15457 /*******************************************************************//**
15458 Checks if a table is referenced by a foreign key. The MySQL manual states that
15459 a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
15460 delete is then allowed internally to resolve a duplicate key conflict in
15461 REPLACE, not an update.
15462 @return > 0 if referenced by a FOREIGN KEY */
15463
15464 uint
referenced_by_foreign_key(void)15465 ha_innobase::referenced_by_foreign_key(void)
15466 /*========================================*/
15467 {
15468 if (dict_table_is_referenced_by_foreign_key(m_prebuilt->table)) {
15469
15470 return(1);
15471 }
15472
15473 return(0);
15474 }
15475
15476 /*******************************************************************//**
15477 Frees the foreign key create info for a table stored in InnoDB, if it is
15478 non-NULL. */
15479
15480 void
free_foreign_key_create_info(char * str)15481 ha_innobase::free_foreign_key_create_info(
15482 /*======================================*/
15483 char* str) /*!< in, own: create info string to free */
15484 {
15485 if (str != NULL) {
15486 my_free(str);
15487 }
15488 }
15489
15490 /*******************************************************************//**
15491 Tells something additional to the handler about how to do things.
15492 @return 0 or error number */
15493
15494 int
extra(enum ha_extra_function operation)15495 ha_innobase::extra(
15496 /*===============*/
15497 enum ha_extra_function operation)
15498 /*!< in: HA_EXTRA_FLUSH or some other flag */
15499 {
15500 check_trx_exists(ha_thd());
15501
15502 /* Warning: since it is not sure that MySQL calls external_lock
15503 before calling this function, the trx field in m_prebuilt can be
15504 obsolete! */
15505
15506 switch (operation) {
15507 case HA_EXTRA_FLUSH:
15508 if (m_prebuilt->blob_heap) {
15509 row_mysql_prebuilt_free_blob_heap(m_prebuilt);
15510 }
15511 break;
15512 case HA_EXTRA_RESET_STATE:
15513 reset_template();
15514 thd_to_trx(ha_thd())->duplicates = 0;
15515 break;
15516 case HA_EXTRA_NO_KEYREAD:
15517 m_prebuilt->read_just_key = 0;
15518 break;
15519 case HA_EXTRA_KEYREAD:
15520 m_prebuilt->read_just_key = 1;
15521 break;
15522 case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
15523 m_prebuilt->keep_other_fields_on_keyread = 1;
15524 break;
15525
15526 /* IMPORTANT: m_prebuilt->trx can be obsolete in
15527 this method, because it is not sure that MySQL
15528 calls external_lock before this method with the
15529 parameters below. We must not invoke update_thd()
15530 either, because the calling threads may change.
15531 CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */
15532 case HA_EXTRA_INSERT_WITH_UPDATE:
15533 thd_to_trx(ha_thd())->duplicates |= TRX_DUP_IGNORE;
15534 break;
15535 case HA_EXTRA_NO_IGNORE_DUP_KEY:
15536 thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_IGNORE;
15537 break;
15538 case HA_EXTRA_WRITE_CAN_REPLACE:
15539 thd_to_trx(ha_thd())->duplicates |= TRX_DUP_REPLACE;
15540 break;
15541 case HA_EXTRA_WRITE_CANNOT_REPLACE:
15542 thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE;
15543 break;
15544 case HA_EXTRA_BEGIN_ALTER_COPY:
15545 m_prebuilt->table->skip_alter_undo = 1;
15546 if (m_prebuilt->table->is_temporary()
15547 || !m_prebuilt->table->versioned_by_id()) {
15548 break;
15549 }
15550 trx_start_if_not_started(m_prebuilt->trx, true);
15551 m_prebuilt->trx->mod_tables.insert(
15552 trx_mod_tables_t::value_type(
15553 const_cast<dict_table_t*>(m_prebuilt->table),
15554 0))
15555 .first->second.set_versioned(0);
15556 break;
15557 case HA_EXTRA_END_ALTER_COPY:
15558 m_prebuilt->table->skip_alter_undo = 0;
15559 break;
15560 case HA_EXTRA_FAKE_START_STMT:
15561 trx_register_for_2pc(m_prebuilt->trx);
15562 m_prebuilt->sql_stat_start = true;
15563 break;
15564 default:/* Do nothing */
15565 ;
15566 }
15567
15568 return(0);
15569 }
15570
15571 /**
15572 MySQL calls this method at the end of each statement. This method
15573 exists for readability only. ha_innobase::reset() doesn't give any
15574 clue about the method. */
15575
15576 int
end_stmt()15577 ha_innobase::end_stmt()
15578 {
15579 if (m_prebuilt->blob_heap) {
15580 row_mysql_prebuilt_free_blob_heap(m_prebuilt);
15581 }
15582
15583 reset_template();
15584
15585 m_ds_mrr.dsmrr_close();
15586
15587 /* TODO: This should really be reset in reset_template() but for now
15588 it's safer to do it explicitly here. */
15589
15590 /* This is a statement level counter. */
15591 m_prebuilt->autoinc_last_value = 0;
15592
15593 return(0);
15594 }
15595
15596 /**
15597 MySQL calls this method at the end of each statement */
15598
15599 int
reset()15600 ha_innobase::reset()
15601 {
15602 return(end_stmt());
15603 }
15604
15605 /******************************************************************//**
15606 MySQL calls this function at the start of each SQL statement inside LOCK
15607 TABLES. Inside LOCK TABLES the ::external_lock method does not work to
15608 mark SQL statement borders. Note also a special case: if a temporary table
15609 is created inside LOCK TABLES, MySQL has not called external_lock() at all
15610 on that table.
15611 MySQL-5.0 also calls this before each statement in an execution of a stored
15612 procedure. To make the execution more deterministic for binlogging, MySQL-5.0
15613 locks all tables involved in a stored procedure with full explicit table
15614 locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
15615 procedure.
15616 @return 0 or error code */
15617
15618 int
start_stmt(THD * thd,thr_lock_type lock_type)15619 ha_innobase::start_stmt(
15620 /*====================*/
15621 THD* thd, /*!< in: handle to the user thread */
15622 thr_lock_type lock_type)
15623 {
15624 trx_t* trx = m_prebuilt->trx;
15625
15626 DBUG_ENTER("ha_innobase::start_stmt");
15627
15628 update_thd(thd);
15629
15630 ut_ad(m_prebuilt->table != NULL);
15631
15632 trx = m_prebuilt->trx;
15633
15634 innobase_srv_conc_force_exit_innodb(trx);
15635
15636 /* Reset the AUTOINC statement level counter for multi-row INSERTs. */
15637 trx->n_autoinc_rows = 0;
15638
15639 m_prebuilt->sql_stat_start = TRUE;
15640 m_prebuilt->hint_need_to_fetch_extra_cols = 0;
15641 reset_template();
15642
15643 if (m_prebuilt->table->is_temporary()
15644 && m_mysql_has_locked
15645 && m_prebuilt->select_lock_type == LOCK_NONE) {
15646 dberr_t error;
15647
15648 switch (thd_sql_command(thd)) {
15649 case SQLCOM_INSERT:
15650 case SQLCOM_UPDATE:
15651 case SQLCOM_DELETE:
15652 case SQLCOM_REPLACE:
15653 init_table_handle_for_HANDLER();
15654 m_prebuilt->select_lock_type = LOCK_X;
15655 m_prebuilt->stored_select_lock_type = LOCK_X;
15656 error = row_lock_table(m_prebuilt);
15657
15658 if (error != DB_SUCCESS) {
15659 int st = convert_error_code_to_mysql(
15660 error, 0, thd);
15661 DBUG_RETURN(st);
15662 }
15663 break;
15664 }
15665 }
15666
15667 if (!m_mysql_has_locked) {
15668 /* This handle is for a temporary table created inside
15669 this same LOCK TABLES; since MySQL does NOT call external_lock
15670 in this case, we must use x-row locks inside InnoDB to be
15671 prepared for an update of a row */
15672
15673 m_prebuilt->select_lock_type = LOCK_X;
15674
15675 } else if (trx->isolation_level != TRX_ISO_SERIALIZABLE
15676 && thd_sql_command(thd) == SQLCOM_SELECT
15677 && lock_type == TL_READ) {
15678
15679 /* For other than temporary tables, we obtain
15680 no lock for consistent read (plain SELECT). */
15681
15682 m_prebuilt->select_lock_type = LOCK_NONE;
15683 } else {
15684 /* Not a consistent read: restore the
15685 select_lock_type value. The value of
15686 stored_select_lock_type was decided in:
15687 1) ::store_lock(),
15688 2) ::external_lock(),
15689 3) ::init_table_handle_for_HANDLER(). */
15690
15691 ut_a(m_prebuilt->stored_select_lock_type != LOCK_NONE_UNSET);
15692
15693 m_prebuilt->select_lock_type =
15694 m_prebuilt->stored_select_lock_type;
15695 }
15696
15697 *trx->detailed_error = 0;
15698
15699 innobase_register_trx(ht, thd, trx);
15700
15701 if (!trx_is_started(trx)) {
15702 trx->will_lock = true;
15703 }
15704
15705 DBUG_RETURN(0);
15706 }
15707
15708 /******************************************************************//**
15709 Maps a MySQL trx isolation level code to the InnoDB isolation level code
15710 @return InnoDB isolation level */
15711 static inline
15712 ulint
innobase_map_isolation_level(enum_tx_isolation iso)15713 innobase_map_isolation_level(
15714 /*=========================*/
15715 enum_tx_isolation iso) /*!< in: MySQL isolation level code */
15716 {
15717 if (UNIV_UNLIKELY(srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN)
15718 || UNIV_UNLIKELY(srv_read_only_mode)) {
15719 return TRX_ISO_READ_UNCOMMITTED;
15720 }
15721 switch (iso) {
15722 case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
15723 case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
15724 case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
15725 case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
15726 }
15727
15728 ut_error;
15729
15730 return(0);
15731 }
15732
15733 /******************************************************************//**
15734 As MySQL will execute an external lock for every new table it uses when it
15735 starts to process an SQL statement (an exception is when MySQL calls
15736 start_stmt for the handle) we can use this function to store the pointer to
15737 the THD in the handle. We will also use this function to communicate
15738 to InnoDB that a new SQL statement has started and that we must store a
15739 savepoint to our transaction handle, so that we are able to roll back
15740 the SQL statement in case of an error.
15741 @return 0 */
15742
15743 int
external_lock(THD * thd,int lock_type)15744 ha_innobase::external_lock(
15745 /*=======================*/
15746 THD* thd, /*!< in: handle to the user thread */
15747 int lock_type) /*!< in: lock type */
15748 {
15749 DBUG_ENTER("ha_innobase::external_lock");
15750 DBUG_PRINT("enter",("lock_type: %d", lock_type));
15751
15752 update_thd(thd);
15753 trx_t* trx = m_prebuilt->trx;
15754 ut_ad(m_prebuilt->table);
15755
15756 /* Statement based binlogging does not work in isolation level
15757 READ UNCOMMITTED and READ COMMITTED since the necessary
15758 locks cannot be taken. In this case, we print an
15759 informative error message and return with an error.
15760 Note: decide_logging_format would give the same error message,
15761 except it cannot give the extra details. */
15762
15763 if (lock_type == F_WRLCK
15764 && !(table_flags() & HA_BINLOG_STMT_CAPABLE)
15765 && thd_binlog_format(thd) == BINLOG_FORMAT_STMT
15766 && thd_binlog_filter_ok(thd)
15767 && thd_sqlcom_can_generate_row_events(thd)) {
15768 bool skip = false;
15769 #ifdef WITH_WSREP
15770 skip = trx->is_wsrep()
15771 && wsrep_thd_exec_mode(thd) != LOCAL_STATE;
15772 #endif /* WITH_WSREP */
15773 /* used by test case */
15774 DBUG_EXECUTE_IF("no_innodb_binlog_errors", skip = true;);
15775
15776 if (!skip) {
15777 my_error(ER_BINLOG_STMT_MODE_AND_ROW_ENGINE, MYF(0),
15778 " InnoDB is limited to row-logging when"
15779 " transaction isolation level is"
15780 " READ COMMITTED or READ UNCOMMITTED.");
15781
15782 DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE);
15783 }
15784 }
15785
15786 /* Check for UPDATEs in read-only mode. */
15787 if (srv_read_only_mode) {
15788 switch (thd_sql_command(thd)) {
15789 case SQLCOM_CREATE_TABLE:
15790 if (lock_type != F_WRLCK) {
15791 break;
15792 }
15793 /* fall through */
15794 case SQLCOM_UPDATE:
15795 case SQLCOM_INSERT:
15796 case SQLCOM_REPLACE:
15797 case SQLCOM_DROP_TABLE:
15798 case SQLCOM_ALTER_TABLE:
15799 case SQLCOM_OPTIMIZE:
15800 case SQLCOM_CREATE_INDEX:
15801 case SQLCOM_DROP_INDEX:
15802 case SQLCOM_CREATE_SEQUENCE:
15803 case SQLCOM_DROP_SEQUENCE:
15804 case SQLCOM_DELETE:
15805 ib_senderrf(thd, IB_LOG_LEVEL_WARN,
15806 ER_READ_ONLY_MODE);
15807 DBUG_RETURN(HA_ERR_TABLE_READONLY);
15808 }
15809 }
15810
15811 m_prebuilt->sql_stat_start = TRUE;
15812 m_prebuilt->hint_need_to_fetch_extra_cols = 0;
15813
15814 reset_template();
15815
15816 switch (m_prebuilt->table->quiesce) {
15817 case QUIESCE_START:
15818 /* Check for FLUSH TABLE t WITH READ LOCK; */
15819 if (!srv_read_only_mode
15820 && thd_sql_command(thd) == SQLCOM_FLUSH
15821 && lock_type == F_RDLCK) {
15822
15823 if (!m_prebuilt->table->space) {
15824 ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
15825 ER_TABLESPACE_DISCARDED,
15826 table->s->table_name.str);
15827
15828 DBUG_RETURN(HA_ERR_TABLESPACE_MISSING);
15829 }
15830
15831 row_quiesce_table_start(m_prebuilt->table, trx);
15832
15833 /* Use the transaction instance to track UNLOCK
15834 TABLES. It can be done via START TRANSACTION; too
15835 implicitly. */
15836
15837 ++trx->flush_tables;
15838 }
15839 break;
15840
15841 case QUIESCE_COMPLETE:
15842 /* Check for UNLOCK TABLES; implicit or explicit
15843 or trx interruption. */
15844 if (trx->flush_tables > 0
15845 && (lock_type == F_UNLCK || trx_is_interrupted(trx))) {
15846
15847 row_quiesce_table_complete(m_prebuilt->table, trx);
15848
15849 ut_a(trx->flush_tables > 0);
15850 --trx->flush_tables;
15851 }
15852
15853 break;
15854
15855 case QUIESCE_NONE:
15856 break;
15857 }
15858
15859 if (lock_type == F_WRLCK) {
15860
15861 /* If this is a SELECT, then it is in UPDATE TABLE ...
15862 or SELECT ... FOR UPDATE */
15863 m_prebuilt->select_lock_type = LOCK_X;
15864 m_prebuilt->stored_select_lock_type = LOCK_X;
15865 }
15866
15867 if (lock_type != F_UNLCK) {
15868 /* MySQL is setting a new table lock */
15869
15870 *trx->detailed_error = 0;
15871
15872 innobase_register_trx(ht, thd, trx);
15873
15874 if (trx->isolation_level == TRX_ISO_SERIALIZABLE
15875 && m_prebuilt->select_lock_type == LOCK_NONE
15876 && thd_test_options(
15877 thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
15878
15879 /* To get serializable execution, we let InnoDB
15880 conceptually add 'LOCK IN SHARE MODE' to all SELECTs
15881 which otherwise would have been consistent reads. An
15882 exception is consistent reads in the AUTOCOMMIT=1 mode:
15883 we know that they are read-only transactions, and they
15884 can be serialized also if performed as consistent
15885 reads. */
15886
15887 m_prebuilt->select_lock_type = LOCK_S;
15888 m_prebuilt->stored_select_lock_type = LOCK_S;
15889 }
15890
15891 /* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
15892 TABLES if AUTOCOMMIT=1. It does not make much sense to acquire
15893 an InnoDB table lock if it is released immediately at the end
15894 of LOCK TABLES, and InnoDB's table locks in that case cause
15895 VERY easily deadlocks.
15896
15897 We do not set InnoDB table locks if user has not explicitly
15898 requested a table lock. Note that thd_in_lock_tables(thd)
15899 can hold in some cases, e.g., at the start of a stored
15900 procedure call (SQLCOM_CALL). */
15901
15902 if (m_prebuilt->select_lock_type != LOCK_NONE) {
15903
15904 if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES
15905 && THDVAR(thd, table_locks)
15906 && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)
15907 && thd_in_lock_tables(thd)) {
15908
15909 dberr_t error = row_lock_table(m_prebuilt);
15910
15911 if (error != DB_SUCCESS) {
15912
15913 DBUG_RETURN(
15914 convert_error_code_to_mysql(
15915 error, 0, thd));
15916 }
15917 }
15918
15919 trx->mysql_n_tables_locked++;
15920 }
15921
15922 trx->n_mysql_tables_in_use++;
15923 m_mysql_has_locked = true;
15924
15925 if (!trx_is_started(trx)
15926 && (m_prebuilt->select_lock_type != LOCK_NONE
15927 || m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
15928
15929 trx->will_lock = true;
15930 }
15931
15932 DBUG_RETURN(0);
15933 } else {
15934 DEBUG_SYNC_C("ha_innobase_end_statement");
15935 }
15936
15937 /* MySQL is releasing a table lock */
15938
15939 trx->n_mysql_tables_in_use--;
15940 m_mysql_has_locked = false;
15941
15942 innobase_srv_conc_force_exit_innodb(trx);
15943
15944 /* If the MySQL lock count drops to zero we know that the current SQL
15945 statement has ended */
15946
15947 if (trx->n_mysql_tables_in_use == 0) {
15948
15949 trx->mysql_n_tables_locked = 0;
15950 m_prebuilt->used_in_HANDLER = FALSE;
15951
15952 if (!thd_test_options(
15953 thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
15954
15955 if (trx_is_started(trx)) {
15956
15957 innobase_commit(ht, thd, TRUE);
15958 }
15959
15960 } else if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
15961 trx->read_view.close();
15962 }
15963 }
15964
15965 if (!trx_is_started(trx)
15966 && lock_type != F_UNLCK
15967 && (m_prebuilt->select_lock_type != LOCK_NONE
15968 || m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
15969
15970 trx->will_lock = true;
15971 }
15972
15973 DBUG_RETURN(0);
15974 }
15975
15976 /************************************************************************//**
15977 Here we export InnoDB status variables to MySQL. */
15978 static
15979 void
innodb_export_status()15980 innodb_export_status()
15981 /*==================*/
15982 {
15983 if (srv_was_started) {
15984 srv_export_innodb_status();
15985 }
15986 }
15987
15988 /************************************************************************//**
15989 Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the
15990 InnoDB Monitor to the client.
15991 @return 0 on success */
15992 static
15993 int
innodb_show_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)15994 innodb_show_status(
15995 /*===============*/
15996 handlerton* hton, /*!< in: the innodb handlerton */
15997 THD* thd, /*!< in: the MySQL query thread of the caller */
15998 stat_print_fn* stat_print)
15999 {
16000 static const char truncated_msg[] = "... truncated...\n";
16001 const long MAX_STATUS_SIZE = 1048576;
16002 ulint trx_list_start = ULINT_UNDEFINED;
16003 ulint trx_list_end = ULINT_UNDEFINED;
16004 bool ret_val;
16005
16006 DBUG_ENTER("innodb_show_status");
16007 DBUG_ASSERT(hton == innodb_hton_ptr);
16008
16009 /* We don't create the temp files or associated
16010 mutexes in read-only-mode */
16011
16012 if (srv_read_only_mode) {
16013 DBUG_RETURN(0);
16014 }
16015
16016 srv_wake_purge_thread_if_not_active();
16017
16018 trx_t* trx = check_trx_exists(thd);
16019
16020 innobase_srv_conc_force_exit_innodb(trx);
16021
16022 /* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE
16023 bytes of text. */
16024
16025 char* str;
16026 size_t flen;
16027
16028 mutex_enter(&srv_monitor_file_mutex);
16029 rewind(srv_monitor_file);
16030
16031 srv_printf_innodb_monitor(srv_monitor_file, FALSE,
16032 &trx_list_start, &trx_list_end);
16033
16034 os_file_set_eof(srv_monitor_file);
16035
16036 flen = size_t(ftell(srv_monitor_file));
16037 if (ssize_t(flen) < 0) {
16038 flen = 0;
16039 }
16040
16041 size_t usable_len;
16042
16043 if (flen > MAX_STATUS_SIZE) {
16044 usable_len = MAX_STATUS_SIZE;
16045 srv_truncated_status_writes++;
16046 } else {
16047 usable_len = flen;
16048 }
16049
16050 /* allocate buffer for the string, and
16051 read the contents of the temporary file */
16052
16053 if (!(str = (char*) my_malloc(//PSI_INSTRUMENT_ME,
16054 usable_len + 1, MYF(0)))) {
16055 mutex_exit(&srv_monitor_file_mutex);
16056 DBUG_RETURN(1);
16057 }
16058
16059 rewind(srv_monitor_file);
16060
16061 if (flen < MAX_STATUS_SIZE) {
16062 /* Display the entire output. */
16063 flen = fread(str, 1, flen, srv_monitor_file);
16064 } else if (trx_list_end < flen
16065 && trx_list_start < trx_list_end
16066 && trx_list_start + flen - trx_list_end
16067 < MAX_STATUS_SIZE - sizeof truncated_msg - 1) {
16068
16069 /* Omit the beginning of the list of active transactions. */
16070 size_t len = fread(str, 1, trx_list_start, srv_monitor_file);
16071
16072 memcpy(str + len, truncated_msg, sizeof truncated_msg - 1);
16073 len += sizeof truncated_msg - 1;
16074 usable_len = (MAX_STATUS_SIZE - 1) - len;
16075 fseek(srv_monitor_file, long(flen - usable_len), SEEK_SET);
16076 len += fread(str + len, 1, usable_len, srv_monitor_file);
16077 flen = len;
16078 } else {
16079 /* Omit the end of the output. */
16080 flen = fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file);
16081 }
16082
16083 mutex_exit(&srv_monitor_file_mutex);
16084
16085 ret_val= stat_print(
16086 thd, innobase_hton_name,
16087 static_cast<uint>(strlen(innobase_hton_name)),
16088 STRING_WITH_LEN(""), str, static_cast<uint>(flen));
16089
16090 my_free(str);
16091
16092 DBUG_RETURN(ret_val);
16093 }
16094
16095 /** Callback for collecting mutex statistics */
16096 struct ShowStatus {
16097
16098 /** For tracking the mutex metrics */
16099 struct Value {
16100
16101 /** Constructor
16102 @param[in] name Name of the mutex
16103 @param[in] spins Number of spins
16104 @param[in] os_waits OS waits so far
16105 @param[in] calls Number of calls to enter() */
ValueShowStatus::Value16106 Value(const char* name,
16107 ulint spins,
16108 uint64_t waits,
16109 uint64_t calls)
16110 :
16111 m_name(name),
16112 m_spins(spins),
16113 m_waits(waits),
16114 m_calls(calls)
16115 {
16116 /* No op */
16117 }
16118
16119 /** Mutex name */
16120 std::string m_name;
16121
16122 /** Spins so far */
16123 ulint m_spins;
16124
16125 /** Waits so far */
16126 uint64_t m_waits;
16127
16128 /** Number of calls so far */
16129 uint64_t m_calls;
16130 };
16131
16132 /** Order by m_waits, in descending order. */
16133 struct OrderByWaits: public std::binary_function<Value, Value, bool>
16134 {
16135 /** @return true if rhs < lhs */
operator ()ShowStatus::OrderByWaits16136 bool operator()(
16137 const Value& lhs,
16138 const Value& rhs) const
16139 UNIV_NOTHROW
16140 {
16141 return(rhs.m_waits < lhs.m_waits);
16142 }
16143 };
16144
16145 typedef std::vector<Value, ut_allocator<Value> > Values;
16146
16147 /** Collect the individual latch counts */
16148 struct GetCount {
16149 typedef latch_meta_t::CounterType::Count Count;
16150
16151 /** Constructor
16152 @param[in] name Latch name
16153 @param[in,out] values Put the values here */
GetCountShowStatus::GetCount16154 GetCount(
16155 const char* name,
16156 Values* values)
16157 UNIV_NOTHROW
16158 :
16159 m_name(name),
16160 m_values(values)
16161 {
16162 /* No op */
16163 }
16164
16165 /** Collect the latch metrics. Ignore entries where the
16166 spins and waits are zero.
16167 @param[in] count The latch metrics */
operator ()ShowStatus::GetCount16168 void operator()(Count* count) const UNIV_NOTHROW
16169 {
16170 if (count->m_spins > 0 || count->m_waits > 0) {
16171
16172 m_values->push_back(Value(
16173 m_name,
16174 count->m_spins,
16175 count->m_waits,
16176 count->m_calls));
16177 }
16178 }
16179
16180 /** The latch name */
16181 const char* m_name;
16182
16183 /** For collecting the active mutex stats. */
16184 Values* m_values;
16185 };
16186
16187 /** Constructor */
ShowStatusShowStatus16188 ShowStatus() { }
16189
16190 /** Callback for collecting the stats
16191 @param[in] latch_meta Latch meta data
16192 @return always returns true */
operator ()ShowStatus16193 bool operator()(latch_meta_t& latch_meta)
16194 UNIV_NOTHROW
16195 {
16196 latch_meta.get_counter()->iterate(
16197 GetCount(latch_meta.get_name(), &m_values));
16198
16199 return(true);
16200 }
16201
16202 /** Implements the SHOW MUTEX STATUS command, for mutexes.
16203 The table structure is like so: Engine | Mutex Name | Status
16204 We store the metrics in the "Status" column as:
16205
16206 spins=N,waits=N,calls=N"
16207
16208 The user has to parse the dataunfortunately
16209 @param[in,out] thd the MySQL query thread of the caller
16210 @param[in,out] stat_print function for printing statistics
16211 @return true on success. */
16212 bool to_string(
16213 THD* thd,
16214 stat_print_fn* stat_print)
16215 UNIV_NOTHROW;
16216
16217 /** For collecting the active mutex stats. */
16218 Values m_values;
16219 };
16220
16221 /** Implements the SHOW MUTEX STATUS command, for mutexes.
16222 The table structure is like so: Engine | Mutex Name | Status
16223 We store the metrics in the "Status" column as:
16224
16225 spins=N,waits=N,calls=N"
16226
16227 The user has to parse the dataunfortunately
16228 @param[in,out] thd the MySQL query thread of the caller
16229 @param[in,out] stat_print function for printing statistics
16230 @return true on success. */
16231 bool
to_string(THD * thd,stat_print_fn * stat_print)16232 ShowStatus::to_string(
16233 THD* thd,
16234 stat_print_fn* stat_print)
16235 UNIV_NOTHROW
16236 {
16237 uint hton_name_len = (uint) strlen(innobase_hton_name);
16238
16239 std::sort(m_values.begin(), m_values.end(), OrderByWaits());
16240
16241 Values::iterator end = m_values.end();
16242
16243 for (Values::iterator it = m_values.begin(); it != end; ++it) {
16244
16245 int name_len;
16246 char name_buf[IO_SIZE];
16247
16248 name_len = snprintf(
16249 name_buf, sizeof(name_buf), "%s", it->m_name.c_str());
16250
16251 int status_len;
16252 char status_buf[IO_SIZE];
16253
16254 status_len = snprintf(
16255 status_buf, sizeof(status_buf),
16256 "spins=%lu,waits=%lu,calls=%llu",
16257 static_cast<ulong>(it->m_spins),
16258 static_cast<long>(it->m_waits),
16259 (ulonglong) it->m_calls);
16260
16261 if (stat_print(thd, innobase_hton_name,
16262 hton_name_len,
16263 name_buf, static_cast<uint>(name_len),
16264 status_buf, static_cast<uint>(status_len))) {
16265
16266 return(false);
16267 }
16268 }
16269
16270 return(true);
16271 }
16272
16273 /** Implements the SHOW MUTEX STATUS command, for mutexes.
16274 @param[in,out] hton the innodb handlerton
16275 @param[in,out] thd the MySQL query thread of the caller
16276 @param[in,out] stat_print function for printing statistics
16277 @return 0 on success. */
16278 static
16279 int
innodb_show_mutex_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)16280 innodb_show_mutex_status(
16281 handlerton*
16282 #ifdef DBUG_ASSERT_EXISTS
16283 hton
16284 #endif
16285 ,
16286 THD* thd,
16287 stat_print_fn* stat_print)
16288 {
16289 DBUG_ENTER("innodb_show_mutex_status");
16290
16291 ShowStatus collector;
16292
16293 DBUG_ASSERT(hton == innodb_hton_ptr);
16294
16295 mutex_monitor.iterate(collector);
16296
16297 if (!collector.to_string(thd, stat_print)) {
16298 DBUG_RETURN(1);
16299 }
16300
16301 DBUG_RETURN(0);
16302 }
16303
16304 /** Implements the SHOW MUTEX STATUS command.
16305 @param[in,out] hton the innodb handlerton
16306 @param[in,out] thd the MySQL query thread of the caller
16307 @param[in,out] stat_print function for printing statistics
16308 @return 0 on success. */
16309 static
16310 int
innodb_show_rwlock_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)16311 innodb_show_rwlock_status(
16312 handlerton*
16313 #ifdef DBUG_ASSERT_EXISTS
16314 hton
16315 #endif
16316 ,
16317 THD* thd,
16318 stat_print_fn* stat_print)
16319 {
16320 DBUG_ENTER("innodb_show_rwlock_status");
16321
16322 rw_lock_t* block_rwlock = NULL;
16323 ulint block_rwlock_oswait_count = 0;
16324 uint hton_name_len = (uint) strlen(innobase_hton_name);
16325
16326 DBUG_ASSERT(hton == innodb_hton_ptr);
16327
16328 mutex_enter(&rw_lock_list_mutex);
16329
16330 for (rw_lock_t* rw_lock = UT_LIST_GET_FIRST(rw_lock_list);
16331 rw_lock != NULL;
16332 rw_lock = UT_LIST_GET_NEXT(list, rw_lock)) {
16333
16334 if (rw_lock->count_os_wait == 0) {
16335 continue;
16336 }
16337
16338 int buf1len;
16339 char buf1[IO_SIZE];
16340
16341 if (rw_lock->is_block_lock) {
16342
16343 block_rwlock = rw_lock;
16344 block_rwlock_oswait_count += rw_lock->count_os_wait;
16345
16346 continue;
16347 }
16348
16349 buf1len = snprintf(
16350 buf1, sizeof buf1, "rwlock: %s:%u",
16351 innobase_basename(rw_lock->cfile_name),
16352 rw_lock->cline);
16353
16354 int buf2len;
16355 char buf2[IO_SIZE];
16356
16357 buf2len = snprintf(
16358 buf2, sizeof buf2, "waits=%u",
16359 rw_lock->count_os_wait);
16360
16361 if (stat_print(thd, innobase_hton_name,
16362 hton_name_len,
16363 buf1, static_cast<uint>(buf1len),
16364 buf2, static_cast<uint>(buf2len))) {
16365
16366 mutex_exit(&rw_lock_list_mutex);
16367
16368 DBUG_RETURN(1);
16369 }
16370 }
16371
16372 if (block_rwlock != NULL) {
16373
16374 int buf1len;
16375 char buf1[IO_SIZE];
16376
16377 buf1len = snprintf(
16378 buf1, sizeof buf1, "sum rwlock: %s:%u",
16379 innobase_basename(block_rwlock->cfile_name),
16380 block_rwlock->cline);
16381
16382 int buf2len;
16383 char buf2[IO_SIZE];
16384
16385 buf2len = snprintf(
16386 buf2, sizeof buf2, "waits=" ULINTPF,
16387 block_rwlock_oswait_count);
16388
16389 if (stat_print(thd, innobase_hton_name,
16390 hton_name_len,
16391 buf1, static_cast<uint>(buf1len),
16392 buf2, static_cast<uint>(buf2len))) {
16393
16394 mutex_exit(&rw_lock_list_mutex);
16395
16396 DBUG_RETURN(1);
16397 }
16398 }
16399
16400 mutex_exit(&rw_lock_list_mutex);
16401
16402 DBUG_RETURN(0);
16403 }
16404
16405 /** Implements the SHOW MUTEX STATUS command.
16406 @param[in,out] hton the innodb handlerton
16407 @param[in,out] thd the MySQL query thread of the caller
16408 @param[in,out] stat_print function for printing statistics
16409 @return 0 on success. */
16410 static
16411 int
innodb_show_latch_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)16412 innodb_show_latch_status(
16413 handlerton* hton,
16414 THD* thd,
16415 stat_print_fn* stat_print)
16416 {
16417 int ret = innodb_show_mutex_status(hton, thd, stat_print);
16418
16419 if (ret != 0) {
16420 return(ret);
16421 }
16422
16423 return(innodb_show_rwlock_status(hton, thd, stat_print));
16424 }
16425
16426 /************************************************************************//**
16427 Return 0 on success and non-zero on failure. Note: the bool return type
16428 seems to be abused here, should be an int. */
16429 static
16430 bool
innobase_show_status(handlerton * hton,THD * thd,stat_print_fn * stat_print,enum ha_stat_type stat_type)16431 innobase_show_status(
16432 /*=================*/
16433 handlerton* hton, /*!< in: the innodb handlerton */
16434 THD* thd, /*!< in: the MySQL query thread
16435 of the caller */
16436 stat_print_fn* stat_print,
16437 enum ha_stat_type stat_type)
16438 {
16439 DBUG_ASSERT(hton == innodb_hton_ptr);
16440
16441 switch (stat_type) {
16442 case HA_ENGINE_STATUS:
16443 /* Non-zero return value means there was an error. */
16444 return(innodb_show_status(hton, thd, stat_print) != 0);
16445
16446 case HA_ENGINE_MUTEX:
16447 return(innodb_show_latch_status(hton, thd, stat_print) != 0);
16448
16449 case HA_ENGINE_LOGS:
16450 /* Not handled */
16451 break;
16452 }
16453
16454 /* Success */
16455 return(false);
16456 }
16457 /*********************************************************************//**
16458 Returns number of THR_LOCK locks used for one instance of InnoDB table.
16459 InnoDB no longer relies on THR_LOCK locks so 0 value is returned.
16460 Instead of THR_LOCK locks InnoDB relies on combination of metadata locks
16461 (e.g. for LOCK TABLES and DDL) and its own locking subsystem.
16462 Note that even though this method returns 0, SQL-layer still calls
16463 ::store_lock(), ::start_stmt() and ::external_lock() methods for InnoDB
16464 tables. */
16465
16466 uint
lock_count(void) const16467 ha_innobase::lock_count(void) const
16468 /*===============================*/
16469 {
16470 return 0;
16471 }
16472
16473 /*****************************************************************//**
16474 Supposed to convert a MySQL table lock stored in the 'lock' field of the
16475 handle to a proper type before storing pointer to the lock into an array
16476 of pointers.
16477 In practice, since InnoDB no longer relies on THR_LOCK locks and its
16478 lock_count() method returns 0 it just informs storage engine about type
16479 of THR_LOCK which SQL-layer would have acquired for this specific statement
16480 on this specific table.
16481 MySQL also calls this if it wants to reset some table locks to a not-locked
16482 state during the processing of an SQL query. An example is that during a
16483 SELECT the read lock is released early on the 'const' tables where we only
16484 fetch one row. MySQL does not call this when it releases all locks at the
16485 end of an SQL statement.
16486 @return pointer to the current element in the 'to' array. */
16487
16488 THR_LOCK_DATA**
store_lock(THD * thd,THR_LOCK_DATA ** to,thr_lock_type lock_type)16489 ha_innobase::store_lock(
16490 /*====================*/
16491 THD* thd, /*!< in: user thread handle */
16492 THR_LOCK_DATA** to, /*!< in: pointer to the current
16493 element in an array of pointers
16494 to lock structs;
16495 only used as return value */
16496 thr_lock_type lock_type) /*!< in: lock type to store in
16497 'lock'; this may also be
16498 TL_IGNORE */
16499 {
16500 /* Note that trx in this function is NOT necessarily m_prebuilt->trx
16501 because we call update_thd() later, in ::external_lock()! Failure to
16502 understand this caused a serious memory corruption bug in 5.1.11. */
16503
16504 trx_t* trx = check_trx_exists(thd);
16505
16506 /* NOTE: MySQL can call this function with lock 'type' TL_IGNORE!
16507 Be careful to ignore TL_IGNORE if we are going to do something with
16508 only 'real' locks! */
16509
16510 /* If no MySQL table is in use, we need to set the isolation level
16511 of the transaction. */
16512
16513 if (lock_type != TL_IGNORE
16514 && trx->n_mysql_tables_in_use == 0) {
16515 trx->isolation_level = innobase_map_isolation_level(
16516 (enum_tx_isolation) thd_tx_isolation(thd));
16517
16518 if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
16519
16520 /* At low transaction isolation levels we let
16521 each consistent read set its own snapshot */
16522 trx->read_view.close();
16523 }
16524 }
16525
16526 DBUG_ASSERT(EQ_CURRENT_THD(thd));
16527 const bool in_lock_tables = thd_in_lock_tables(thd);
16528 const int sql_command = thd_sql_command(thd);
16529
16530 if (srv_read_only_mode
16531 && (sql_command == SQLCOM_UPDATE
16532 || sql_command == SQLCOM_INSERT
16533 || sql_command == SQLCOM_REPLACE
16534 || sql_command == SQLCOM_DROP_TABLE
16535 || sql_command == SQLCOM_ALTER_TABLE
16536 || sql_command == SQLCOM_OPTIMIZE
16537 || (sql_command == SQLCOM_CREATE_TABLE
16538 && (lock_type >= TL_WRITE_CONCURRENT_INSERT
16539 && lock_type <= TL_WRITE))
16540 || sql_command == SQLCOM_CREATE_INDEX
16541 || sql_command == SQLCOM_DROP_INDEX
16542 || sql_command == SQLCOM_CREATE_SEQUENCE
16543 || sql_command == SQLCOM_DROP_SEQUENCE
16544 || sql_command == SQLCOM_DELETE)) {
16545
16546 ib_senderrf(trx->mysql_thd,
16547 IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
16548
16549 } else if (sql_command == SQLCOM_FLUSH
16550 && lock_type == TL_READ_NO_INSERT) {
16551
16552 /* Check for FLUSH TABLES ... WITH READ LOCK */
16553
16554 /* Note: This call can fail, but there is no way to return
16555 the error to the caller. We simply ignore it for now here
16556 and push the error code to the caller where the error is
16557 detected in the function. */
16558
16559 dberr_t err = row_quiesce_set_state(
16560 m_prebuilt->table, QUIESCE_START, trx);
16561
16562 ut_a(err == DB_SUCCESS || err == DB_UNSUPPORTED);
16563
16564 if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
16565 m_prebuilt->select_lock_type = LOCK_S;
16566 m_prebuilt->stored_select_lock_type = LOCK_S;
16567 } else {
16568 m_prebuilt->select_lock_type = LOCK_NONE;
16569 m_prebuilt->stored_select_lock_type = LOCK_NONE;
16570 }
16571
16572 /* Check for DROP TABLE */
16573 } else if (sql_command == SQLCOM_DROP_TABLE ||
16574 sql_command == SQLCOM_DROP_SEQUENCE) {
16575
16576 /* MySQL calls this function in DROP TABLE though this table
16577 handle may belong to another thd that is running a query. Let
16578 us in that case skip any changes to the m_prebuilt struct. */
16579
16580 /* Check for LOCK TABLE t1,...,tn WITH SHARED LOCKS */
16581 } else if ((lock_type == TL_READ && in_lock_tables)
16582 || (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables)
16583 || lock_type == TL_READ_WITH_SHARED_LOCKS
16584 || lock_type == TL_READ_NO_INSERT
16585 || (lock_type != TL_IGNORE
16586 && sql_command != SQLCOM_SELECT)) {
16587
16588 /* The OR cases above are in this order:
16589 1) MySQL is doing LOCK TABLES ... READ LOCAL, or we
16590 are processing a stored procedure or function, or
16591 2) (we do not know when TL_READ_HIGH_PRIORITY is used), or
16592 3) this is a SELECT ... IN SHARE MODE, or
16593 4) we are doing a complex SQL statement like
16594 INSERT INTO ... SELECT ... and the logical logging (MySQL
16595 binlog) requires the use of a locking read, or
16596 MySQL is doing LOCK TABLES ... READ.
16597 5) we let InnoDB do locking reads for all SQL statements that
16598 are not simple SELECTs; note that select_lock_type in this
16599 case may get strengthened in ::external_lock() to LOCK_X.
16600 Note that we MUST use a locking read in all data modifying
16601 SQL statements, because otherwise the execution would not be
16602 serializable, and also the results from the update could be
16603 unexpected if an obsolete consistent read view would be
16604 used. */
16605
16606 /* Use consistent read for checksum table */
16607
16608 if (sql_command == SQLCOM_CHECKSUM
16609 || sql_command == SQLCOM_CREATE_SEQUENCE
16610 || (sql_command == SQLCOM_ANALYZE && lock_type == TL_READ)
16611 || ((srv_locks_unsafe_for_binlog
16612 || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
16613 && trx->isolation_level != TRX_ISO_SERIALIZABLE
16614 && (lock_type == TL_READ
16615 || lock_type == TL_READ_NO_INSERT)
16616 && (sql_command == SQLCOM_INSERT_SELECT
16617 || sql_command == SQLCOM_REPLACE_SELECT
16618 || sql_command == SQLCOM_UPDATE
16619 || sql_command == SQLCOM_CREATE_SEQUENCE
16620 || sql_command == SQLCOM_CREATE_TABLE))) {
16621
16622 /* If we either have innobase_locks_unsafe_for_binlog
16623 option set or this session is using READ COMMITTED
16624 isolation level and isolation level of the transaction
16625 is not set to serializable and MySQL is doing
16626 INSERT INTO...SELECT or REPLACE INTO...SELECT
16627 or UPDATE ... = (SELECT ...) or CREATE ...
16628 SELECT... without FOR UPDATE or IN SHARE
16629 MODE in select, then we use consistent read
16630 for select. */
16631
16632 m_prebuilt->select_lock_type = LOCK_NONE;
16633 m_prebuilt->stored_select_lock_type = LOCK_NONE;
16634 } else {
16635 m_prebuilt->select_lock_type = LOCK_S;
16636 m_prebuilt->stored_select_lock_type = LOCK_S;
16637 }
16638
16639 } else if (lock_type != TL_IGNORE) {
16640
16641 /* We set possible LOCK_X value in external_lock, not yet
16642 here even if this would be SELECT ... FOR UPDATE */
16643
16644 m_prebuilt->select_lock_type = LOCK_NONE;
16645 m_prebuilt->stored_select_lock_type = LOCK_NONE;
16646 }
16647
16648 if (!trx_is_started(trx)
16649 && (m_prebuilt->select_lock_type != LOCK_NONE
16650 || m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
16651
16652 trx->will_lock = true;
16653 }
16654
16655 return(to);
16656 }
16657
16658 /*********************************************************************//**
16659 Read the next autoinc value. Acquire the relevant locks before reading
16660 the AUTOINC value. If SUCCESS then the table AUTOINC mutex will be locked
16661 on return and all relevant locks acquired.
16662 @return DB_SUCCESS or error code */
16663
16664 dberr_t
innobase_get_autoinc(ulonglong * value)16665 ha_innobase::innobase_get_autoinc(
16666 /*==============================*/
16667 ulonglong* value) /*!< out: autoinc value */
16668 {
16669 *value = 0;
16670
16671 m_prebuilt->autoinc_error = innobase_lock_autoinc();
16672
16673 if (m_prebuilt->autoinc_error == DB_SUCCESS) {
16674
16675 /* Determine the first value of the interval */
16676 *value = dict_table_autoinc_read(m_prebuilt->table);
16677
16678 /* It should have been initialized during open. */
16679 if (*value == 0) {
16680 m_prebuilt->autoinc_error = DB_UNSUPPORTED;
16681 dict_table_autoinc_unlock(m_prebuilt->table);
16682 }
16683 }
16684
16685 return(m_prebuilt->autoinc_error);
16686 }
16687
16688 /*******************************************************************//**
16689 This function reads the global auto-inc counter. It doesn't use the
16690 AUTOINC lock even if the lock mode is set to TRADITIONAL.
16691 @return the autoinc value */
16692
16693 ulonglong
innobase_peek_autoinc(void)16694 ha_innobase::innobase_peek_autoinc(void)
16695 /*====================================*/
16696 {
16697 ulonglong auto_inc;
16698 dict_table_t* innodb_table;
16699
16700 ut_a(m_prebuilt != NULL);
16701 ut_a(m_prebuilt->table != NULL);
16702
16703 innodb_table = m_prebuilt->table;
16704
16705 dict_table_autoinc_lock(innodb_table);
16706
16707 auto_inc = dict_table_autoinc_read(innodb_table);
16708
16709 if (auto_inc == 0) {
16710 ib::info() << "AUTOINC next value generation is disabled for"
16711 " '" << innodb_table->name << "'";
16712 }
16713
16714 dict_table_autoinc_unlock(innodb_table);
16715
16716 return(auto_inc);
16717 }
16718
16719 /*********************************************************************//**
16720 Returns the value of the auto-inc counter in *first_value and ~0 on failure. */
16721
16722 void
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)16723 ha_innobase::get_auto_increment(
16724 /*============================*/
16725 ulonglong offset, /*!< in: table autoinc offset */
16726 ulonglong increment, /*!< in: table autoinc
16727 increment */
16728 ulonglong nb_desired_values, /*!< in: number of values
16729 reqd */
16730 ulonglong* first_value, /*!< out: the autoinc value */
16731 ulonglong* nb_reserved_values) /*!< out: count of reserved
16732 values */
16733 {
16734 trx_t* trx;
16735 dberr_t error;
16736 ulonglong autoinc = 0;
16737
16738 /* Prepare m_prebuilt->trx in the table handle */
16739 update_thd(ha_thd());
16740
16741 error = innobase_get_autoinc(&autoinc);
16742
16743 if (error != DB_SUCCESS) {
16744 *first_value = (~(ulonglong) 0);
16745 return;
16746 }
16747
16748 /* This is a hack, since nb_desired_values seems to be accurate only
16749 for the first call to get_auto_increment() for multi-row INSERT and
16750 meaningless for other statements e.g, LOAD etc. Subsequent calls to
16751 this method for the same statement results in different values which
16752 don't make sense. Therefore we store the value the first time we are
16753 called and count down from that as rows are written (see write_row()).
16754 */
16755
16756 trx = m_prebuilt->trx;
16757
16758 /* Note: We can't rely on *first_value since some MySQL engines,
16759 in particular the partition engine, don't initialize it to 0 when
16760 invoking this method. So we are not sure if it's guaranteed to
16761 be 0 or not. */
16762
16763 /* We need the upper limit of the col type to check for
16764 whether we update the table autoinc counter or not. */
16765 ulonglong col_max_value =
16766 table->next_number_field->get_max_int_value();
16767
16768 /** The following logic is needed to avoid duplicate key error
16769 for autoincrement column.
16770
16771 (1) InnoDB gives the current autoincrement value with respect
16772 to increment and offset value.
16773
16774 (2) Basically it does compute_next_insert_id() logic inside InnoDB
16775 to avoid the current auto increment value changed by handler layer.
16776
16777 (3) It is restricted only for insert operations. */
16778
16779 if (increment > 1 && thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE
16780 && autoinc < col_max_value) {
16781
16782 ulonglong prev_auto_inc = autoinc;
16783
16784 autoinc = ((autoinc - 1) + increment - offset)/ increment;
16785
16786 autoinc = autoinc * increment + offset;
16787
16788 /* If autoinc exceeds the col_max_value then reset
16789 to old autoinc value. Because in case of non-strict
16790 sql mode, boundary value is not considered as error. */
16791
16792 if (autoinc >= col_max_value) {
16793 autoinc = prev_auto_inc;
16794 }
16795
16796 ut_ad(autoinc > 0);
16797 }
16798
16799 /* Called for the first time ? */
16800 if (trx->n_autoinc_rows == 0) {
16801
16802 trx->n_autoinc_rows = (ulint) nb_desired_values;
16803
16804 /* It's possible for nb_desired_values to be 0:
16805 e.g., INSERT INTO T1(C) SELECT C FROM T2; */
16806 if (nb_desired_values == 0) {
16807
16808 trx->n_autoinc_rows = 1;
16809 }
16810
16811 set_if_bigger(*first_value, autoinc);
16812 /* Not in the middle of a mult-row INSERT. */
16813 } else if (m_prebuilt->autoinc_last_value == 0) {
16814 set_if_bigger(*first_value, autoinc);
16815 }
16816
16817 if (*first_value > col_max_value) {
16818 /* Out of range number. Let handler::update_auto_increment()
16819 take care of this */
16820 m_prebuilt->autoinc_last_value = 0;
16821 dict_table_autoinc_unlock(m_prebuilt->table);
16822 *nb_reserved_values= 0;
16823 return;
16824 }
16825
16826 *nb_reserved_values = trx->n_autoinc_rows;
16827
16828 /* With old style AUTOINC locking we only update the table's
16829 AUTOINC counter after attempting to insert the row. */
16830 if (innobase_autoinc_lock_mode != AUTOINC_OLD_STYLE_LOCKING) {
16831 ulonglong current;
16832 ulonglong next_value;
16833
16834 current = *first_value;
16835
16836 /* Compute the last value in the interval */
16837 next_value = innobase_next_autoinc(
16838 current, *nb_reserved_values, increment, offset,
16839 col_max_value);
16840
16841 m_prebuilt->autoinc_last_value = next_value;
16842
16843 if (m_prebuilt->autoinc_last_value < *first_value) {
16844 *first_value = (~(ulonglong) 0);
16845 } else {
16846 /* Update the table autoinc variable */
16847 dict_table_autoinc_update_if_greater(
16848 m_prebuilt->table,
16849 m_prebuilt->autoinc_last_value);
16850 }
16851 } else {
16852 /* This will force write_row() into attempting an update
16853 of the table's AUTOINC counter. */
16854 m_prebuilt->autoinc_last_value = 0;
16855 }
16856
16857 /* The increment to be used to increase the AUTOINC value, we use
16858 this in write_row() and update_row() to increase the autoinc counter
16859 for columns that are filled by the user. We need the offset and
16860 the increment. */
16861 m_prebuilt->autoinc_offset = offset;
16862 m_prebuilt->autoinc_increment = increment;
16863
16864 dict_table_autoinc_unlock(m_prebuilt->table);
16865 }
16866
16867 /*******************************************************************//**
16868 See comment in handler.cc */
16869
16870 bool
get_error_message(int error,String * buf)16871 ha_innobase::get_error_message(
16872 /*===========================*/
16873 int error,
16874 String* buf)
16875 {
16876 trx_t* trx = check_trx_exists(ha_thd());
16877
16878 if (error == HA_ERR_DECRYPTION_FAILED) {
16879 const char *msg = "Table encrypted but decryption failed. This could be because correct encryption management plugin is not loaded, used encryption key is not available or encryption method does not match.";
16880 buf->copy(msg, (uint)strlen(msg), system_charset_info);
16881 } else {
16882 buf->copy(trx->detailed_error, (uint) strlen(trx->detailed_error),
16883 system_charset_info);
16884 }
16885
16886 return(FALSE);
16887 }
16888
16889 /** Retrieves the names of the table and the key for which there was a
16890 duplicate entry in the case of HA_ERR_FOREIGN_DUPLICATE_KEY.
16891
16892 If any of the names is not available, then this method will return
16893 false and will not change any of child_table_name or child_key_name.
16894
16895 @param[out] child_table_name Table name
16896 @param[in] child_table_name_len Table name buffer size
16897 @param[out] child_key_name Key name
16898 @param[in] child_key_name_len Key name buffer size
16899
16900 @retval true table and key names were available and were written into the
16901 corresponding out parameters.
16902 @retval false table and key names were not available, the out parameters
16903 were not touched. */
16904 bool
get_foreign_dup_key(char * child_table_name,uint child_table_name_len,char * child_key_name,uint child_key_name_len)16905 ha_innobase::get_foreign_dup_key(
16906 /*=============================*/
16907 char* child_table_name,
16908 uint child_table_name_len,
16909 char* child_key_name,
16910 uint child_key_name_len)
16911 {
16912 const dict_index_t* err_index;
16913
16914 ut_a(m_prebuilt->trx != NULL);
16915 ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
16916
16917 err_index = trx_get_error_info(m_prebuilt->trx);
16918
16919 if (err_index == NULL) {
16920 return(false);
16921 }
16922 /* else */
16923
16924 /* copy table name (and convert from filename-safe encoding to
16925 system_charset_info) */
16926 char* p = strchr(err_index->table->name.m_name, '/');
16927
16928 /* strip ".../" prefix if any */
16929 if (p != NULL) {
16930 p++;
16931 } else {
16932 p = err_index->table->name.m_name;
16933 }
16934
16935 size_t len;
16936
16937 len = filename_to_tablename(p, child_table_name, child_table_name_len);
16938
16939 child_table_name[len] = '\0';
16940
16941 /* copy index name */
16942 snprintf(child_key_name, child_key_name_len, "%s",
16943 err_index->name());
16944
16945 return(true);
16946 }
16947
16948 /*******************************************************************//**
16949 Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
16950 If there is no explicitly declared non-null unique key or a primary key, then
16951 InnoDB internally uses the row id as the primary key.
16952 @return < 0 if ref1 < ref2, 0 if equal, else > 0 */
16953
16954 int
cmp_ref(const uchar * ref1,const uchar * ref2)16955 ha_innobase::cmp_ref(
16956 /*=================*/
16957 const uchar* ref1, /*!< in: an (internal) primary key value in the
16958 MySQL key value format */
16959 const uchar* ref2) /*!< in: an (internal) primary key value in the
16960 MySQL key value format */
16961 {
16962 enum_field_types mysql_type;
16963 Field* field;
16964 KEY_PART_INFO* key_part;
16965 KEY_PART_INFO* key_part_end;
16966 uint len1;
16967 uint len2;
16968 int result;
16969
16970 if (m_prebuilt->clust_index_was_generated) {
16971 /* The 'ref' is an InnoDB row id */
16972
16973 return(memcmp(ref1, ref2, DATA_ROW_ID_LEN));
16974 }
16975
16976 /* Do a type-aware comparison of primary key fields. PK fields
16977 are always NOT NULL, so no checks for NULL are performed. */
16978
16979 key_part = table->key_info[table->s->primary_key].key_part;
16980
16981 key_part_end = key_part
16982 + table->key_info[table->s->primary_key].user_defined_key_parts;
16983
16984 for (; key_part != key_part_end; ++key_part) {
16985 field = key_part->field;
16986 mysql_type = field->type();
16987
16988 if (mysql_type == MYSQL_TYPE_TINY_BLOB
16989 || mysql_type == MYSQL_TYPE_MEDIUM_BLOB
16990 || mysql_type == MYSQL_TYPE_BLOB
16991 || mysql_type == MYSQL_TYPE_LONG_BLOB) {
16992
16993 /* In the MySQL key value format, a column prefix of
16994 a BLOB is preceded by a 2-byte length field */
16995
16996 len1 = innobase_read_from_2_little_endian(ref1);
16997 len2 = innobase_read_from_2_little_endian(ref2);
16998
16999 result = ((Field_blob*) field)->cmp(
17000 ref1 + 2, len1, ref2 + 2, len2);
17001 } else {
17002 result = field->key_cmp(ref1, ref2);
17003 }
17004
17005 if (result) {
17006
17007 return(result);
17008 }
17009
17010 ref1 += key_part->store_length;
17011 ref2 += key_part->store_length;
17012 }
17013
17014 return(0);
17015 }
17016
17017 /*******************************************************************//**
17018 Ask InnoDB if a query to a table can be cached.
17019 @return TRUE if query caching of the table is permitted */
17020
17021 my_bool
register_query_cache_table(THD * thd,const char * table_key,uint key_length,qc_engine_callback * call_back,ulonglong * engine_data)17022 ha_innobase::register_query_cache_table(
17023 /*====================================*/
17024 THD* thd, /*!< in: user thread handle */
17025 const char* table_key, /*!< in: normalized path to the
17026 table */
17027 uint key_length, /*!< in: length of the normalized
17028 path to the table */
17029 qc_engine_callback*
17030 call_back, /*!< out: pointer to function for
17031 checking if query caching
17032 is permitted */
17033 ulonglong *engine_data) /*!< in/out: data to call_back */
17034 {
17035 *engine_data = 0;
17036 *call_back = innobase_query_caching_of_table_permitted;
17037
17038 return(innobase_query_caching_of_table_permitted(
17039 thd, table_key,
17040 static_cast<uint>(key_length),
17041 engine_data));
17042 }
17043
17044 /******************************************************************//**
17045 This function is used to find the storage length in bytes of the first n
17046 characters for prefix indexes using a multibyte character set. The function
17047 finds charset information and returns length of prefix_len characters in the
17048 index field in bytes.
17049 @return number of bytes occupied by the first n characters */
17050 ulint
innobase_get_at_most_n_mbchars(ulint charset_id,ulint prefix_len,ulint data_len,const char * str)17051 innobase_get_at_most_n_mbchars(
17052 /*===========================*/
17053 ulint charset_id, /*!< in: character set id */
17054 ulint prefix_len, /*!< in: prefix length in bytes of the index
17055 (this has to be divided by mbmaxlen to get the
17056 number of CHARACTERS n in the prefix) */
17057 ulint data_len, /*!< in: length of the string in bytes */
17058 const char* str) /*!< in: character string */
17059 {
17060 ulint char_length; /*!< character length in bytes */
17061 ulint n_chars; /*!< number of characters in prefix */
17062 CHARSET_INFO* charset; /*!< charset used in the field */
17063
17064 charset = get_charset((uint) charset_id, MYF(MY_WME));
17065
17066 ut_ad(charset);
17067 ut_ad(charset->mbmaxlen);
17068
17069 /* Calculate how many characters at most the prefix index contains */
17070
17071 n_chars = prefix_len / charset->mbmaxlen;
17072
17073 /* If the charset is multi-byte, then we must find the length of the
17074 first at most n chars in the string. If the string contains less
17075 characters than n, then we return the length to the end of the last
17076 character. */
17077
17078 if (charset->mbmaxlen > 1) {
17079 /* my_charpos() returns the byte length of the first n_chars
17080 characters, or a value bigger than the length of str, if
17081 there were not enough full characters in str.
17082
17083 Why does the code below work:
17084 Suppose that we are looking for n UTF-8 characters.
17085
17086 1) If the string is long enough, then the prefix contains at
17087 least n complete UTF-8 characters + maybe some extra
17088 characters + an incomplete UTF-8 character. No problem in
17089 this case. The function returns the pointer to the
17090 end of the nth character.
17091
17092 2) If the string is not long enough, then the string contains
17093 the complete value of a column, that is, only complete UTF-8
17094 characters, and we can store in the column prefix index the
17095 whole string. */
17096
17097 char_length= my_charpos(charset, str, str + data_len, n_chars);
17098 if (char_length > data_len) {
17099 char_length = data_len;
17100 }
17101 } else if (data_len < prefix_len) {
17102
17103 char_length = data_len;
17104
17105 } else {
17106
17107 char_length = prefix_len;
17108 }
17109
17110 return(char_length);
17111 }
17112
17113 /*******************************************************************//**
17114 This function is used to prepare an X/Open XA distributed transaction.
17115 @return 0 or error number */
17116 static
17117 int
innobase_xa_prepare(handlerton * hton,THD * thd,bool prepare_trx)17118 innobase_xa_prepare(
17119 /*================*/
17120 handlerton* hton, /*!< in: InnoDB handlerton */
17121 THD* thd, /*!< in: handle to the MySQL thread of
17122 the user whose XA transaction should
17123 be prepared */
17124 bool prepare_trx) /*!< in: true - prepare transaction
17125 false - the current SQL statement
17126 ended */
17127 {
17128 trx_t* trx = check_trx_exists(thd);
17129
17130 DBUG_ASSERT(hton == innodb_hton_ptr);
17131
17132 thd_get_xid(thd, (MYSQL_XID*) trx->xid);
17133
17134 innobase_srv_conc_force_exit_innodb(trx);
17135
17136 if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
17137
17138 sql_print_error("Transaction not registered for MariaDB 2PC,"
17139 " but transaction is active");
17140 }
17141
17142 if (prepare_trx
17143 || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
17144
17145 /* We were instructed to prepare the whole transaction, or
17146 this is an SQL statement end and autocommit is on */
17147
17148 ut_ad(trx_is_registered_for_2pc(trx));
17149
17150 trx_prepare_for_mysql(trx);
17151 } else {
17152 /* We just mark the SQL statement ended and do not do a
17153 transaction prepare */
17154
17155 /* If we had reserved the auto-inc lock for some
17156 table in this SQL statement we release it now */
17157
17158 lock_unlock_table_autoinc(trx);
17159
17160 /* Store the current undo_no of the transaction so that we
17161 know where to roll back if we have to roll back the next
17162 SQL statement */
17163
17164 trx_mark_sql_stat_end(trx);
17165 }
17166
17167 if (thd_sql_command(thd) != SQLCOM_XA_PREPARE
17168 && (prepare_trx
17169 || !thd_test_options(
17170 thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
17171
17172 /* For mysqlbackup to work the order of transactions in binlog
17173 and InnoDB must be the same. Consider the situation
17174
17175 thread1> prepare; write to binlog; ...
17176 <context switch>
17177 thread2> prepare; write to binlog; commit
17178 thread1> ... commit
17179
17180 The server guarantees that writes to the binary log
17181 and commits are in the same order, so we do not have
17182 to handle this case. */
17183 }
17184
17185 return(0);
17186 }
17187
17188 /*******************************************************************//**
17189 This function is used to recover X/Open XA distributed transactions.
17190 @return number of prepared transactions stored in xid_list */
17191 static
17192 int
innobase_xa_recover(handlerton * hton,XID * xid_list,uint len)17193 innobase_xa_recover(
17194 /*================*/
17195 handlerton* hton, /*!< in: InnoDB handlerton */
17196 XID* xid_list,/*!< in/out: prepared transactions */
17197 uint len) /*!< in: number of slots in xid_list */
17198 {
17199 DBUG_ASSERT(hton == innodb_hton_ptr);
17200
17201 if (len == 0 || xid_list == NULL) {
17202
17203 return(0);
17204 }
17205
17206 return(trx_recover_for_mysql(xid_list, len));
17207 }
17208
17209 /*******************************************************************//**
17210 This function is used to commit one X/Open XA distributed transaction
17211 which is in the prepared state
17212 @return 0 or error number */
17213 static
17214 int
innobase_commit_by_xid(handlerton * hton,XID * xid)17215 innobase_commit_by_xid(
17216 /*===================*/
17217 handlerton* hton,
17218 XID* xid) /*!< in: X/Open XA transaction identification */
17219 {
17220 DBUG_ASSERT(hton == innodb_hton_ptr);
17221
17222 if (high_level_read_only) {
17223 return(XAER_RMFAIL);
17224 }
17225
17226 if (trx_t* trx = trx_get_trx_by_xid(xid)) {
17227 /* use cases are: disconnected xa, slave xa, recovery */
17228 innobase_commit_low(trx);
17229 ut_ad(trx->mysql_thd == NULL);
17230 trx_deregister_from_2pc(trx);
17231 ut_ad(!trx->will_lock); /* trx cache requirement */
17232 trx->free();
17233
17234 return(XA_OK);
17235 } else {
17236 return(XAER_NOTA);
17237 }
17238 }
17239
17240 /** This function is used to rollback one X/Open XA distributed transaction
17241 which is in the prepared state
17242
17243 @param[in] hton InnoDB handlerton
17244 @param[in] xid X/Open XA transaction identification
17245
17246 @return 0 or error number */
innobase_rollback_by_xid(handlerton * hton,XID * xid)17247 int innobase_rollback_by_xid(handlerton* hton, XID* xid)
17248 {
17249 DBUG_ASSERT(hton == innodb_hton_ptr);
17250
17251 if (high_level_read_only) {
17252 return(XAER_RMFAIL);
17253 }
17254
17255 if (trx_t* trx = trx_get_trx_by_xid(xid)) {
17256 #ifdef WITH_WSREP
17257 /* If a wsrep transaction is being rolled back during
17258 the recovery, we must clear the xid in order to avoid
17259 writing serialisation history for rolled back transaction. */
17260 if (wsrep_is_wsrep_xid(trx->xid)) {
17261 trx->xid->null();
17262 }
17263 #endif /* WITH_WSREP */
17264 int ret = innobase_rollback_trx(trx);
17265 trx_deregister_from_2pc(trx);
17266 ut_ad(!trx->will_lock);
17267 trx->free();
17268
17269 return(ret);
17270 } else {
17271 return(XAER_NOTA);
17272 }
17273 }
17274
17275 bool
check_if_incompatible_data(HA_CREATE_INFO * info,uint table_changes)17276 ha_innobase::check_if_incompatible_data(
17277 /*====================================*/
17278 HA_CREATE_INFO* info,
17279 uint table_changes)
17280 {
17281 ha_table_option_struct *param_old, *param_new;
17282
17283 /* Cache engine specific options */
17284 param_new = info->option_struct;
17285 param_old = table->s->option_struct;
17286
17287 innobase_copy_frm_flags_from_create_info(m_prebuilt->table, info);
17288
17289 if (table_changes != IS_EQUAL_YES) {
17290
17291 return(COMPATIBLE_DATA_NO);
17292 }
17293
17294 /* Check that auto_increment value was not changed */
17295 if ((info->used_fields & HA_CREATE_USED_AUTO)
17296 && info->auto_increment_value != 0) {
17297
17298 return(COMPATIBLE_DATA_NO);
17299 }
17300
17301 /* Check that row format didn't change */
17302 if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT)
17303 && info->row_type != get_row_type()) {
17304
17305 return(COMPATIBLE_DATA_NO);
17306 }
17307
17308 /* Specifying KEY_BLOCK_SIZE requests a rebuild of the table. */
17309 if (info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE) {
17310 return(COMPATIBLE_DATA_NO);
17311 }
17312
17313 /* Changes on engine specific table options requests a rebuild of the table. */
17314 if (param_new->page_compressed != param_old->page_compressed ||
17315 param_new->page_compression_level != param_old->page_compression_level)
17316 {
17317 return(COMPATIBLE_DATA_NO);
17318 }
17319
17320 return(COMPATIBLE_DATA_YES);
17321 }
17322
17323 /****************************************************************//**
17324 Update the system variable innodb_io_capacity_max using the "saved"
17325 value. This function is registered as a callback with MySQL. */
17326 static
17327 void
innodb_io_capacity_max_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17328 innodb_io_capacity_max_update(
17329 /*===========================*/
17330 THD* thd, /*!< in: thread handle */
17331 st_mysql_sys_var*, void*,
17332 const void* save) /*!< in: immediate result
17333 from check function */
17334 {
17335 ulong in_val = *static_cast<const ulong*>(save);
17336
17337 if (in_val < srv_io_capacity) {
17338 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17339 ER_WRONG_ARGUMENTS,
17340 "Setting innodb_io_capacity_max %lu"
17341 " lower than innodb_io_capacity %lu.",
17342 in_val, srv_io_capacity);
17343
17344 srv_io_capacity = in_val;
17345
17346 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17347 ER_WRONG_ARGUMENTS,
17348 "Setting innodb_io_capacity to %lu",
17349 srv_io_capacity);
17350 }
17351
17352 srv_max_io_capacity = in_val;
17353 }
17354
17355 /****************************************************************//**
17356 Update the system variable innodb_io_capacity using the "saved"
17357 value. This function is registered as a callback with MySQL. */
17358 static
17359 void
innodb_io_capacity_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17360 innodb_io_capacity_update(
17361 /*======================*/
17362 THD* thd, /*!< in: thread handle */
17363 st_mysql_sys_var*, void*,
17364 const void* save) /*!< in: immediate result
17365 from check function */
17366 {
17367 ulong in_val = *static_cast<const ulong*>(save);
17368
17369 if (in_val > srv_max_io_capacity) {
17370 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17371 ER_WRONG_ARGUMENTS,
17372 "Setting innodb_io_capacity to %lu"
17373 " higher than innodb_io_capacity_max %lu",
17374 in_val, srv_max_io_capacity);
17375
17376 srv_max_io_capacity = (in_val & ~(~0UL >> 1))
17377 ? in_val : in_val * 2;
17378
17379 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17380 ER_WRONG_ARGUMENTS,
17381 "Setting innodb_max_io_capacity to %lu",
17382 srv_max_io_capacity);
17383 }
17384
17385 srv_io_capacity = in_val;
17386 }
17387
17388 /****************************************************************//**
17389 Update the system variable innodb_max_dirty_pages_pct using the "saved"
17390 value. This function is registered as a callback with MySQL. */
17391 static
17392 void
innodb_max_dirty_pages_pct_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17393 innodb_max_dirty_pages_pct_update(
17394 /*==============================*/
17395 THD* thd, /*!< in: thread handle */
17396 st_mysql_sys_var*, void*,
17397 const void* save) /*!< in: immediate result
17398 from check function */
17399 {
17400 double in_val = *static_cast<const double*>(save);
17401 if (in_val < srv_max_dirty_pages_pct_lwm) {
17402 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17403 ER_WRONG_ARGUMENTS,
17404 "innodb_max_dirty_pages_pct cannot be"
17405 " set lower than"
17406 " innodb_max_dirty_pages_pct_lwm.");
17407 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17408 ER_WRONG_ARGUMENTS,
17409 "Lowering"
17410 " innodb_max_dirty_page_pct_lwm to %lf",
17411 in_val);
17412
17413 srv_max_dirty_pages_pct_lwm = in_val;
17414 }
17415
17416 srv_max_buf_pool_modified_pct = in_val;
17417 }
17418
17419 /****************************************************************//**
17420 Update the system variable innodb_max_dirty_pages_pct_lwm using the
17421 "saved" value. This function is registered as a callback with MySQL. */
17422 static
17423 void
innodb_max_dirty_pages_pct_lwm_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17424 innodb_max_dirty_pages_pct_lwm_update(
17425 /*==================================*/
17426 THD* thd, /*!< in: thread handle */
17427 st_mysql_sys_var*, void*,
17428 const void* save) /*!< in: immediate result
17429 from check function */
17430 {
17431 double in_val = *static_cast<const double*>(save);
17432 if (in_val > srv_max_buf_pool_modified_pct) {
17433 in_val = srv_max_buf_pool_modified_pct;
17434 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17435 ER_WRONG_ARGUMENTS,
17436 "innodb_max_dirty_pages_pct_lwm"
17437 " cannot be set higher than"
17438 " innodb_max_dirty_pages_pct.");
17439 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17440 ER_WRONG_ARGUMENTS,
17441 "Setting innodb_max_dirty_page_pct_lwm"
17442 " to %lf",
17443 in_val);
17444 }
17445
17446 srv_max_dirty_pages_pct_lwm = in_val;
17447 }
17448
17449 /*************************************************************//**
17450 Don't allow to set innodb_fast_shutdown=0 if purge threads are
17451 already down.
17452 @return 0 if innodb_fast_shutdown can be set */
17453 static
17454 int
fast_shutdown_validate(THD * thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)17455 fast_shutdown_validate(
17456 /*=============================*/
17457 THD* thd, /*!< in: thread handle */
17458 struct st_mysql_sys_var* var, /*!< in: pointer to system
17459 variable */
17460 void* save, /*!< out: immediate result
17461 for update function */
17462 struct st_mysql_value* value) /*!< in: incoming string */
17463 {
17464 if (check_sysvar_int(thd, var, save, value)) {
17465 return(1);
17466 }
17467
17468 uint new_val = *reinterpret_cast<uint*>(save);
17469
17470 if (srv_fast_shutdown && !new_val
17471 && !my_atomic_loadptr_explicit(reinterpret_cast<void**>
17472 (&srv_running),
17473 MY_MEMORY_ORDER_RELAXED)) {
17474 return(1);
17475 }
17476
17477 return(0);
17478 }
17479
17480 /*************************************************************//**
17481 Check whether valid argument given to innobase_*_stopword_table.
17482 This function is registered as a callback with MySQL.
17483 @return 0 for valid stopword table */
17484 static
17485 int
innodb_stopword_table_validate(THD * thd,st_mysql_sys_var *,void * save,struct st_mysql_value * value)17486 innodb_stopword_table_validate(
17487 /*===========================*/
17488 THD* thd, /*!< in: thread handle */
17489 st_mysql_sys_var*,
17490 void* save, /*!< out: immediate result
17491 for update function */
17492 struct st_mysql_value* value) /*!< in: incoming string */
17493 {
17494 const char* stopword_table_name;
17495 char buff[STRING_BUFFER_USUAL_SIZE];
17496 int len = sizeof(buff);
17497 trx_t* trx;
17498
17499 ut_a(save != NULL);
17500 ut_a(value != NULL);
17501
17502 stopword_table_name = value->val_str(value, buff, &len);
17503
17504 trx = check_trx_exists(thd);
17505
17506 row_mysql_lock_data_dictionary(trx);
17507
17508 /* Validate the stopword table's (if supplied) existence and
17509 of the right format */
17510 int ret = stopword_table_name && !fts_valid_stopword_table(
17511 stopword_table_name);
17512
17513 row_mysql_unlock_data_dictionary(trx);
17514
17515 if (!ret) {
17516 if (stopword_table_name == buff) {
17517 ut_ad(static_cast<size_t>(len) < sizeof buff);
17518 stopword_table_name = thd_strmake(thd,
17519 stopword_table_name,
17520 len);
17521 }
17522
17523 *static_cast<const char**>(save) = stopword_table_name;
17524 }
17525
17526 return(ret);
17527 }
17528
17529 /** Update the system variable innodb_buffer_pool_size using the "saved"
17530 value. This function is registered as a callback with MySQL.
17531 @param[in] save immediate result from check function */
17532 static
17533 void
innodb_buffer_pool_size_update(THD *,st_mysql_sys_var *,void *,const void * save)17534 innodb_buffer_pool_size_update(THD*,st_mysql_sys_var*,void*, const void* save)
17535 {
17536 longlong in_val = *static_cast<const longlong*>(save);
17537
17538 snprintf(export_vars.innodb_buffer_pool_resize_status,
17539 sizeof(export_vars.innodb_buffer_pool_resize_status),
17540 "Requested to resize buffer pool.");
17541
17542 os_event_set(srv_buf_resize_event);
17543
17544 ib::info() << export_vars.innodb_buffer_pool_resize_status
17545 << " (new size: " << in_val << " bytes)";
17546 }
17547
17548 /** The latest assigned innodb_ft_aux_table name */
17549 static char* innodb_ft_aux_table;
17550
17551 /** Update innodb_ft_aux_table_id on SET GLOBAL innodb_ft_aux_table.
17552 @param[in,out] thd connection
17553 @param[out] save new value of innodb_ft_aux_table
17554 @param[in] value user-specified value */
innodb_ft_aux_table_validate(THD * thd,st_mysql_sys_var *,void * save,st_mysql_value * value)17555 static int innodb_ft_aux_table_validate(THD *thd, st_mysql_sys_var*,
17556 void* save, st_mysql_value* value)
17557 {
17558 char buf[STRING_BUFFER_USUAL_SIZE];
17559 int len = sizeof buf;
17560
17561 if (const char* table_name = value->val_str(value, buf, &len)) {
17562 if (dict_table_t* table = dict_table_open_on_name(
17563 table_name, FALSE, TRUE, DICT_ERR_IGNORE_NONE)) {
17564 const table_id_t id = dict_table_has_fts_index(table)
17565 ? table->id : 0;
17566 dict_table_close(table, FALSE, FALSE);
17567 if (id) {
17568 innodb_ft_aux_table_id = id;
17569 if (table_name == buf) {
17570 ut_ad(static_cast<size_t>(len)
17571 < sizeof buf);
17572 table_name = thd_strmake(thd,
17573 table_name,
17574 len);
17575 }
17576
17577
17578 *static_cast<const char**>(save) = table_name;
17579 return 0;
17580 }
17581 }
17582
17583 return 1;
17584 } else {
17585 *static_cast<char**>(save) = NULL;
17586 innodb_ft_aux_table_id = 0;
17587 return 0;
17588 }
17589 }
17590
17591 #ifdef BTR_CUR_HASH_ADAPT
17592 /****************************************************************//**
17593 Update the system variable innodb_adaptive_hash_index using the "saved"
17594 value. This function is registered as a callback with MySQL. */
17595 static
17596 void
innodb_adaptive_hash_index_update(THD *,st_mysql_sys_var *,void *,const void * save)17597 innodb_adaptive_hash_index_update(THD*, st_mysql_sys_var*, void*,
17598 const void* save)
17599 {
17600 mysql_mutex_unlock(&LOCK_global_system_variables);
17601 if (*(my_bool*) save) {
17602 btr_search_enable();
17603 } else {
17604 btr_search_disable();
17605 }
17606 mysql_mutex_lock(&LOCK_global_system_variables);
17607 }
17608 #endif /* BTR_CUR_HASH_ADAPT */
17609
17610 /****************************************************************//**
17611 Update the system variable innodb_cmp_per_index using the "saved"
17612 value. This function is registered as a callback with MySQL. */
17613 static
17614 void
innodb_cmp_per_index_update(THD *,st_mysql_sys_var *,void *,const void * save)17615 innodb_cmp_per_index_update(THD*, st_mysql_sys_var*, void*, const void* save)
17616 {
17617 /* Reset the stats whenever we enable the table
17618 INFORMATION_SCHEMA.innodb_cmp_per_index. */
17619 if (!srv_cmp_per_index_enabled && *(my_bool*) save) {
17620 mysql_mutex_unlock(&LOCK_global_system_variables);
17621 page_zip_reset_stat_per_index();
17622 mysql_mutex_lock(&LOCK_global_system_variables);
17623 }
17624
17625 srv_cmp_per_index_enabled = !!(*(my_bool*) save);
17626 }
17627
17628 /****************************************************************//**
17629 Update the system variable innodb_old_blocks_pct using the "saved"
17630 value. This function is registered as a callback with MySQL. */
17631 static
17632 void
innodb_old_blocks_pct_update(THD *,st_mysql_sys_var *,void *,const void * save)17633 innodb_old_blocks_pct_update(THD*, st_mysql_sys_var*, void*, const void* save)
17634 {
17635 mysql_mutex_unlock(&LOCK_global_system_variables);
17636 uint ratio = buf_LRU_old_ratio_update(*static_cast<const uint*>(save),
17637 true);
17638 mysql_mutex_lock(&LOCK_global_system_variables);
17639 innobase_old_blocks_pct = ratio;
17640 }
17641
17642 /****************************************************************//**
17643 Update the system variable innodb_old_blocks_pct using the "saved"
17644 value. This function is registered as a callback with MySQL. */
17645 static
17646 void
innodb_change_buffer_max_size_update(THD *,st_mysql_sys_var *,void *,const void * save)17647 innodb_change_buffer_max_size_update(THD*, st_mysql_sys_var*, void*,
17648 const void* save)
17649 {
17650 srv_change_buffer_max_size = *static_cast<const uint*>(save);
17651 mysql_mutex_unlock(&LOCK_global_system_variables);
17652 ibuf_max_size_update(srv_change_buffer_max_size);
17653 mysql_mutex_lock(&LOCK_global_system_variables);
17654 }
17655
17656 #ifdef UNIV_DEBUG
17657 static ulong srv_fil_make_page_dirty_debug = 0;
17658 static ulong srv_saved_page_number_debug = 0;
17659
17660 /****************************************************************//**
17661 Save an InnoDB page number. */
17662 static
17663 void
innodb_save_page_no(THD *,st_mysql_sys_var *,void *,const void * save)17664 innodb_save_page_no(THD*, st_mysql_sys_var*, void*, const void* save)
17665 {
17666 srv_saved_page_number_debug = *static_cast<const ulong*>(save);
17667
17668 ib::info() << "Saving InnoDB page number: "
17669 << srv_saved_page_number_debug;
17670 }
17671
17672 /****************************************************************//**
17673 Make the first page of given user tablespace dirty. */
17674 static
17675 void
innodb_make_page_dirty(THD *,st_mysql_sys_var *,void *,const void * save)17676 innodb_make_page_dirty(THD*, st_mysql_sys_var*, void*, const void* save)
17677 {
17678 mtr_t mtr;
17679 ulong space_id = *static_cast<const ulong*>(save);
17680 mysql_mutex_unlock(&LOCK_global_system_variables);
17681 fil_space_t* space = fil_space_acquire_silent(space_id);
17682
17683 if (space == NULL) {
17684 func_exit_no_space:
17685 mysql_mutex_lock(&LOCK_global_system_variables);
17686 return;
17687 }
17688
17689 if (srv_saved_page_number_debug >= space->size) {
17690 func_exit:
17691 space->release();
17692 goto func_exit_no_space;
17693 }
17694
17695 mtr.start();
17696 mtr.set_named_space(space);
17697
17698 buf_block_t* block = buf_page_get(
17699 page_id_t(space_id, srv_saved_page_number_debug),
17700 page_size_t(space->flags), RW_X_LATCH, &mtr);
17701
17702 if (block != NULL) {
17703 byte* page = block->frame;
17704
17705 ib::info() << "Dirtying page: " << page_id_t(
17706 page_get_space_id(page), page_get_page_no(page));
17707
17708 mlog_write_ulint(page + FIL_PAGE_TYPE,
17709 fil_page_get_type(page),
17710 MLOG_2BYTES, &mtr);
17711 }
17712 mtr.commit();
17713 goto func_exit;
17714 }
17715 #endif // UNIV_DEBUG
17716 /*************************************************************//**
17717 Just emit a warning that the usage of the variable is deprecated.
17718 @return 0 */
17719 static
17720 void
innodb_stats_sample_pages_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17721 innodb_stats_sample_pages_update(
17722 /*=============================*/
17723 THD* thd, /*!< in: thread handle */
17724 st_mysql_sys_var*, void*,
17725 const void* save) /*!< in: immediate result
17726 from check function */
17727 {
17728
17729 const char* STATS_SAMPLE_PAGES_DEPRECATED_MSG =
17730 "Using innodb_stats_sample_pages is deprecated and"
17731 " the variable may be removed in future releases."
17732 " Please use innodb_stats_transient_sample_pages instead.";
17733
17734 push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
17735 HA_ERR_WRONG_COMMAND, STATS_SAMPLE_PAGES_DEPRECATED_MSG);
17736
17737 ib::warn() << STATS_SAMPLE_PAGES_DEPRECATED_MSG;
17738
17739 srv_stats_transient_sample_pages =
17740 *static_cast<const unsigned long long*>(save);
17741 }
17742
17743 /****************************************************************//**
17744 Update the monitor counter according to the "set_option", turn
17745 on/off or reset specified monitor counter. */
17746 static
17747 void
innodb_monitor_set_option(const monitor_info_t * monitor_info,mon_option_t set_option)17748 innodb_monitor_set_option(
17749 /*======================*/
17750 const monitor_info_t* monitor_info,/*!< in: monitor info for the monitor
17751 to set */
17752 mon_option_t set_option) /*!< in: Turn on/off reset the
17753 counter */
17754 {
17755 monitor_id_t monitor_id = monitor_info->monitor_id;
17756
17757 /* If module type is MONITOR_GROUP_MODULE, it cannot be
17758 turned on/off individually. It should never use this
17759 function to set options */
17760 ut_a(!(monitor_info->monitor_type & MONITOR_GROUP_MODULE));
17761
17762 switch (set_option) {
17763 case MONITOR_TURN_ON:
17764 MONITOR_ON(monitor_id);
17765 MONITOR_INIT(monitor_id);
17766 MONITOR_SET_START(monitor_id);
17767
17768 /* If the monitor to be turned on uses
17769 exisitng monitor counter (status variable),
17770 make special processing to remember existing
17771 counter value. */
17772 if (monitor_info->monitor_type & MONITOR_EXISTING) {
17773 srv_mon_process_existing_counter(
17774 monitor_id, MONITOR_TURN_ON);
17775 }
17776
17777 if (MONITOR_IS_ON(MONITOR_LATCHES)) {
17778
17779 mutex_monitor.enable();
17780 }
17781 break;
17782
17783 case MONITOR_TURN_OFF:
17784 if (monitor_info->monitor_type & MONITOR_EXISTING) {
17785 srv_mon_process_existing_counter(
17786 monitor_id, MONITOR_TURN_OFF);
17787 }
17788
17789 MONITOR_OFF(monitor_id);
17790 MONITOR_SET_OFF(monitor_id);
17791
17792 if (!MONITOR_IS_ON(MONITOR_LATCHES)) {
17793
17794 mutex_monitor.disable();
17795 }
17796 break;
17797
17798 case MONITOR_RESET_VALUE:
17799 srv_mon_reset(monitor_id);
17800
17801 if (monitor_id == (MONITOR_LATCHES)) {
17802
17803 mutex_monitor.reset();
17804 }
17805 break;
17806
17807 case MONITOR_RESET_ALL_VALUE:
17808 srv_mon_reset_all(monitor_id);
17809 mutex_monitor.reset();
17810 break;
17811
17812 default:
17813 ut_error;
17814 }
17815 }
17816
17817 /****************************************************************//**
17818 Find matching InnoDB monitor counters and update their status
17819 according to the "set_option", turn on/off or reset specified
17820 monitor counter. */
17821 static
17822 void
innodb_monitor_update_wildcard(const char * name,mon_option_t set_option)17823 innodb_monitor_update_wildcard(
17824 /*===========================*/
17825 const char* name, /*!< in: monitor name to match */
17826 mon_option_t set_option) /*!< in: the set option, whether
17827 to turn on/off or reset the counter */
17828 {
17829 ut_a(name);
17830
17831 for (ulint use = 0; use < NUM_MONITOR; use++) {
17832 ulint type;
17833 monitor_id_t monitor_id = static_cast<monitor_id_t>(use);
17834 monitor_info_t* monitor_info;
17835
17836 if (!innobase_wildcasecmp(
17837 srv_mon_get_name(monitor_id), name)) {
17838 monitor_info = srv_mon_get_info(monitor_id);
17839
17840 type = monitor_info->monitor_type;
17841
17842 /* If the monitor counter is of MONITOR_MODULE
17843 type, skip it. Except for those also marked with
17844 MONITOR_GROUP_MODULE flag, which can be turned
17845 on only as a module. */
17846 if (!(type & MONITOR_MODULE)
17847 && !(type & MONITOR_GROUP_MODULE)) {
17848 innodb_monitor_set_option(monitor_info,
17849 set_option);
17850 }
17851
17852 /* Need to special handle counters marked with
17853 MONITOR_GROUP_MODULE, turn on the whole module if
17854 any one of it comes here. Currently, only
17855 "module_buf_page" is marked with MONITOR_GROUP_MODULE */
17856 if (type & MONITOR_GROUP_MODULE) {
17857 if ((monitor_id >= MONITOR_MODULE_BUF_PAGE)
17858 && (monitor_id < MONITOR_MODULE_OS)) {
17859 if (set_option == MONITOR_TURN_ON
17860 && MONITOR_IS_ON(
17861 MONITOR_MODULE_BUF_PAGE)) {
17862 continue;
17863 }
17864
17865 srv_mon_set_module_control(
17866 MONITOR_MODULE_BUF_PAGE,
17867 set_option);
17868 } else {
17869 /* If new monitor is added with
17870 MONITOR_GROUP_MODULE, it needs
17871 to be added here. */
17872 ut_ad(0);
17873 }
17874 }
17875 }
17876 }
17877 }
17878
17879 /*************************************************************//**
17880 Given a configuration variable name, find corresponding monitor counter
17881 and return its monitor ID if found.
17882 @return monitor ID if found, MONITOR_NO_MATCH if there is no match */
17883 static
17884 ulint
innodb_monitor_id_by_name_get(const char * name)17885 innodb_monitor_id_by_name_get(
17886 /*==========================*/
17887 const char* name) /*!< in: monitor counter namer */
17888 {
17889 ut_a(name);
17890
17891 /* Search for wild character '%' in the name, if
17892 found, we treat it as a wildcard match. We do not search for
17893 single character wildcard '_' since our monitor names already contain
17894 such character. To avoid confusion, we request user must include
17895 at least one '%' character to activate the wildcard search. */
17896 if (strchr(name, '%')) {
17897 return(MONITOR_WILDCARD_MATCH);
17898 }
17899
17900 /* Not wildcard match, check for an exact match */
17901 for (ulint i = 0; i < NUM_MONITOR; i++) {
17902 if (!innobase_strcasecmp(
17903 name, srv_mon_get_name(static_cast<monitor_id_t>(i)))) {
17904 return(i);
17905 }
17906 }
17907
17908 return(MONITOR_NO_MATCH);
17909 }
17910 /*************************************************************//**
17911 Validate that the passed in monitor name matches at least one
17912 monitor counter name with wildcard compare.
17913 @return TRUE if at least one monitor name matches */
17914 static
17915 ibool
innodb_monitor_validate_wildcard_name(const char * name)17916 innodb_monitor_validate_wildcard_name(
17917 /*==================================*/
17918 const char* name) /*!< in: monitor counter namer */
17919 {
17920 for (ulint i = 0; i < NUM_MONITOR; i++) {
17921 if (!innobase_wildcasecmp(
17922 srv_mon_get_name(static_cast<monitor_id_t>(i)), name)) {
17923 return(TRUE);
17924 }
17925 }
17926
17927 return(FALSE);
17928 }
17929 /*************************************************************//**
17930 Validate the passed in monitor name, find and save the
17931 corresponding monitor name in the function parameter "save".
17932 @return 0 if monitor name is valid */
17933 static
17934 int
innodb_monitor_valid_byname(void * save,const char * name)17935 innodb_monitor_valid_byname(
17936 /*========================*/
17937 void* save, /*!< out: immediate result
17938 for update function */
17939 const char* name) /*!< in: incoming monitor name */
17940 {
17941 ulint use;
17942 monitor_info_t* monitor_info;
17943
17944 if (!name) {
17945 return(1);
17946 }
17947
17948 use = innodb_monitor_id_by_name_get(name);
17949
17950 /* No monitor name matches, nor it is wildcard match */
17951 if (use == MONITOR_NO_MATCH) {
17952 return(1);
17953 }
17954
17955 if (use < NUM_MONITOR) {
17956 monitor_info = srv_mon_get_info((monitor_id_t) use);
17957
17958 /* If the monitor counter is marked with
17959 MONITOR_GROUP_MODULE flag, then this counter
17960 cannot be turned on/off individually, instead
17961 it shall be turned on/off as a group using
17962 its module name */
17963 if ((monitor_info->monitor_type & MONITOR_GROUP_MODULE)
17964 && (!(monitor_info->monitor_type & MONITOR_MODULE))) {
17965 sql_print_warning(
17966 "Monitor counter '%s' cannot"
17967 " be turned on/off individually."
17968 " Please use its module name"
17969 " to turn on/off the counters"
17970 " in the module as a group.\n",
17971 name);
17972
17973 return(1);
17974 }
17975
17976 } else {
17977 ut_a(use == MONITOR_WILDCARD_MATCH);
17978
17979 /* For wildcard match, if there is not a single monitor
17980 counter name that matches, treat it as an invalid
17981 value for the system configuration variables */
17982 if (!innodb_monitor_validate_wildcard_name(name)) {
17983 return(1);
17984 }
17985 }
17986
17987 /* Save the configure name for innodb_monitor_update() */
17988 *static_cast<const char**>(save) = name;
17989
17990 return(0);
17991 }
17992 /*************************************************************//**
17993 Validate passed-in "value" is a valid monitor counter name.
17994 This function is registered as a callback with MySQL.
17995 @return 0 for valid name */
17996 static
17997 int
innodb_monitor_validate(THD *,st_mysql_sys_var *,void * save,struct st_mysql_value * value)17998 innodb_monitor_validate(
17999 /*====================*/
18000 THD*, st_mysql_sys_var*,
18001 void* save, /*!< out: immediate result
18002 for update function */
18003 struct st_mysql_value* value) /*!< in: incoming string */
18004 {
18005 const char* name;
18006 char* monitor_name;
18007 char buff[STRING_BUFFER_USUAL_SIZE];
18008 int len = sizeof(buff);
18009 int ret;
18010
18011 ut_a(save != NULL);
18012 ut_a(value != NULL);
18013
18014 name = value->val_str(value, buff, &len);
18015
18016 /* monitor_name could point to memory from MySQL
18017 or buff[]. Always dup the name to memory allocated
18018 by InnoDB, so we can access it in another callback
18019 function innodb_monitor_update() and free it appropriately */
18020 if (name) {
18021 monitor_name = my_strdup(//PSI_INSTRUMENT_ME,
18022 name, MYF(0));
18023 } else {
18024 return(1);
18025 }
18026
18027 ret = innodb_monitor_valid_byname(save, monitor_name);
18028
18029 if (ret) {
18030 /* Validation failed */
18031 my_free(monitor_name);
18032 } else {
18033 /* monitor_name will be freed in separate callback function
18034 innodb_monitor_update(). Assert "save" point to
18035 the "monitor_name" variable */
18036 ut_ad(*static_cast<char**>(save) == monitor_name);
18037 }
18038
18039 return(ret);
18040 }
18041
18042 /****************************************************************//**
18043 Update the system variable innodb_enable(disable/reset/reset_all)_monitor
18044 according to the "set_option" and turn on/off or reset specified monitor
18045 counter. */
18046 static
18047 void
innodb_monitor_update(THD * thd,void * var_ptr,const void * save,mon_option_t set_option,ibool free_mem)18048 innodb_monitor_update(
18049 /*==================*/
18050 THD* thd, /*!< in: thread handle */
18051 void* var_ptr, /*!< out: where the
18052 formal string goes */
18053 const void* save, /*!< in: immediate result
18054 from check function */
18055 mon_option_t set_option, /*!< in: the set option,
18056 whether to turn on/off or
18057 reset the counter */
18058 ibool free_mem) /*!< in: whether we will
18059 need to free the memory */
18060 {
18061 monitor_info_t* monitor_info;
18062 ulint monitor_id;
18063 ulint err_monitor = 0;
18064 const char* name;
18065
18066 ut_a(save != NULL);
18067
18068 name = *static_cast<const char*const*>(save);
18069
18070 if (!name) {
18071 monitor_id = MONITOR_DEFAULT_START;
18072 } else {
18073 monitor_id = innodb_monitor_id_by_name_get(name);
18074
18075 /* Double check we have a valid monitor ID */
18076 if (monitor_id == MONITOR_NO_MATCH) {
18077 return;
18078 }
18079 }
18080
18081 if (monitor_id == MONITOR_DEFAULT_START) {
18082 /* If user set the variable to "default", we will
18083 print a message and make this set operation a "noop".
18084 The check is being made here is because "set default"
18085 does not go through validation function */
18086 if (thd) {
18087 push_warning_printf(
18088 thd, Sql_condition::WARN_LEVEL_WARN,
18089 ER_NO_DEFAULT,
18090 "Default value is not defined for"
18091 " this set option. Please specify"
18092 " correct counter or module name.");
18093 } else {
18094 sql_print_error(
18095 "Default value is not defined for"
18096 " this set option. Please specify"
18097 " correct counter or module name.\n");
18098 }
18099
18100 if (var_ptr) {
18101 *(const char**) var_ptr = NULL;
18102 }
18103 } else if (monitor_id == MONITOR_WILDCARD_MATCH) {
18104 innodb_monitor_update_wildcard(name, set_option);
18105 } else {
18106 monitor_info = srv_mon_get_info(
18107 static_cast<monitor_id_t>(monitor_id));
18108
18109 ut_a(monitor_info);
18110
18111 /* If monitor is already truned on, someone could already
18112 collect monitor data, exit and ask user to turn off the
18113 monitor before turn it on again. */
18114 if (set_option == MONITOR_TURN_ON
18115 && MONITOR_IS_ON(monitor_id)) {
18116 err_monitor = monitor_id;
18117 goto exit;
18118 }
18119
18120 if (var_ptr) {
18121 *(const char**) var_ptr = monitor_info->monitor_name;
18122 }
18123
18124 /* Depending on the monitor name is for a module or
18125 a counter, process counters in the whole module or
18126 individual counter. */
18127 if (monitor_info->monitor_type & MONITOR_MODULE) {
18128 srv_mon_set_module_control(
18129 static_cast<monitor_id_t>(monitor_id),
18130 set_option);
18131 } else {
18132 innodb_monitor_set_option(monitor_info, set_option);
18133 }
18134 }
18135 exit:
18136 /* Only if we are trying to turn on a monitor that already
18137 been turned on, we will set err_monitor. Print related
18138 information */
18139 if (err_monitor) {
18140 sql_print_warning("InnoDB: Monitor %s is already enabled.",
18141 srv_mon_get_name((monitor_id_t) err_monitor));
18142 }
18143
18144 if (free_mem && name) {
18145 my_free((void*) name);
18146 }
18147
18148 return;
18149 }
18150
18151 /** Validate SET GLOBAL innodb_buffer_pool_filename.
18152 On Windows, file names with colon (:) are not allowed.
18153 @param thd connection
18154 @param save &srv_buf_dump_filename
18155 @param value new value to be validated
18156 @return 0 for valid name */
innodb_srv_buf_dump_filename_validate(THD * thd,st_mysql_sys_var *,void * save,st_mysql_value * value)18157 static int innodb_srv_buf_dump_filename_validate(THD *thd, st_mysql_sys_var*,
18158 void *save,
18159 st_mysql_value *value)
18160 {
18161 char buff[OS_FILE_MAX_PATH];
18162 int len= sizeof buff;
18163
18164 if (const char *buf_name= value->val_str(value, buff, &len))
18165 {
18166 #ifdef _WIN32
18167 if (!is_filename_allowed(buf_name, len, FALSE))
18168 {
18169 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18170 ER_WRONG_ARGUMENTS,
18171 "InnoDB: innodb_buffer_pool_filename "
18172 "cannot have colon (:) in the file name.");
18173 return 1;
18174 }
18175 #endif /* _WIN32 */
18176 if (buf_name == buff)
18177 {
18178 ut_ad(static_cast<size_t>(len) < sizeof buff);
18179 buf_name= thd_strmake(thd, buf_name, len);
18180 }
18181
18182 *static_cast<const char**>(save)= buf_name;
18183 return 0;
18184 }
18185
18186 return 1;
18187 }
18188
18189 #ifdef UNIV_DEBUG
18190 static char* srv_buffer_pool_evict;
18191
18192 /****************************************************************//**
18193 Evict all uncompressed pages of compressed tables from the buffer pool.
18194 Keep the compressed pages in the buffer pool.
18195 @return whether all uncompressed pages were evicted */
innodb_buffer_pool_evict_uncompressed()18196 static bool innodb_buffer_pool_evict_uncompressed()
18197 {
18198 bool all_evicted = true;
18199
18200 for (ulint i = 0; i < srv_buf_pool_instances; i++) {
18201 buf_pool_t* buf_pool = &buf_pool_ptr[i];
18202
18203 buf_pool_mutex_enter(buf_pool);
18204
18205 for (buf_block_t* block = UT_LIST_GET_LAST(
18206 buf_pool->unzip_LRU);
18207 block != NULL; ) {
18208 buf_block_t* prev_block = UT_LIST_GET_PREV(
18209 unzip_LRU, block);
18210 ut_ad(buf_block_get_state(block)
18211 == BUF_BLOCK_FILE_PAGE);
18212 ut_ad(block->in_unzip_LRU_list);
18213 ut_ad(block->page.in_LRU_list);
18214
18215 if (!buf_LRU_free_page(&block->page, false)) {
18216 all_evicted = false;
18217 block = prev_block;
18218 } else {
18219 /* Because buf_LRU_free_page() may release
18220 and reacquire buf_pool_t::mutex, prev_block
18221 may be invalid. */
18222 block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
18223 }
18224 }
18225
18226 buf_pool_mutex_exit(buf_pool);
18227 }
18228
18229 return(all_evicted);
18230 }
18231
18232 /****************************************************************//**
18233 Called on SET GLOBAL innodb_buffer_pool_evict=...
18234 Handles some values specially, to evict pages from the buffer pool.
18235 SET GLOBAL innodb_buffer_pool_evict='uncompressed'
18236 evicts all uncompressed page frames of compressed tablespaces. */
18237 static
18238 void
innodb_buffer_pool_evict_update(THD *,st_mysql_sys_var *,void *,const void * save)18239 innodb_buffer_pool_evict_update(THD*, st_mysql_sys_var*, void*,
18240 const void* save)
18241 {
18242 if (const char* op = *static_cast<const char*const*>(save)) {
18243 if (!strcmp(op, "uncompressed")) {
18244 mysql_mutex_unlock(&LOCK_global_system_variables);
18245 for (uint tries = 0; tries < 10000; tries++) {
18246 if (innodb_buffer_pool_evict_uncompressed()) {
18247 mysql_mutex_lock(
18248 &LOCK_global_system_variables);
18249 return;
18250 }
18251
18252 os_thread_sleep(10000);
18253 }
18254
18255 /* We failed to evict all uncompressed pages. */
18256 ut_ad(0);
18257 }
18258 }
18259 }
18260 #endif /* UNIV_DEBUG */
18261
18262 /****************************************************************//**
18263 Update the system variable innodb_monitor_enable and enable
18264 specified monitor counter.
18265 This function is registered as a callback with MySQL. */
18266 static
18267 void
innodb_enable_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18268 innodb_enable_monitor_update(
18269 /*=========================*/
18270 THD* thd, /*!< in: thread handle */
18271 st_mysql_sys_var*,
18272 void* var_ptr,/*!< out: where the
18273 formal string goes */
18274 const void* save) /*!< in: immediate result
18275 from check function */
18276 {
18277 innodb_monitor_update(thd, var_ptr, save, MONITOR_TURN_ON, TRUE);
18278 }
18279
18280 /****************************************************************//**
18281 Update the system variable innodb_monitor_disable and turn
18282 off specified monitor counter. */
18283 static
18284 void
innodb_disable_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18285 innodb_disable_monitor_update(
18286 /*==========================*/
18287 THD* thd, /*!< in: thread handle */
18288 st_mysql_sys_var*,
18289 void* var_ptr,/*!< out: where the
18290 formal string goes */
18291 const void* save) /*!< in: immediate result
18292 from check function */
18293 {
18294 innodb_monitor_update(thd, var_ptr, save, MONITOR_TURN_OFF, TRUE);
18295 }
18296
18297 /****************************************************************//**
18298 Update the system variable innodb_monitor_reset and reset
18299 specified monitor counter(s).
18300 This function is registered as a callback with MySQL. */
18301 static
18302 void
innodb_reset_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18303 innodb_reset_monitor_update(
18304 /*========================*/
18305 THD* thd, /*!< in: thread handle */
18306 st_mysql_sys_var*,
18307 void* var_ptr,/*!< out: where the
18308 formal string goes */
18309 const void* save) /*!< in: immediate result
18310 from check function */
18311 {
18312 innodb_monitor_update(thd, var_ptr, save, MONITOR_RESET_VALUE, TRUE);
18313 }
18314
18315 /****************************************************************//**
18316 Update the system variable innodb_monitor_reset_all and reset
18317 all value related monitor counter.
18318 This function is registered as a callback with MySQL. */
18319 static
18320 void
innodb_reset_all_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18321 innodb_reset_all_monitor_update(
18322 /*============================*/
18323 THD* thd, /*!< in: thread handle */
18324 st_mysql_sys_var*,
18325 void* var_ptr,/*!< out: where the
18326 formal string goes */
18327 const void* save) /*!< in: immediate result
18328 from check function */
18329 {
18330 innodb_monitor_update(thd, var_ptr, save, MONITOR_RESET_ALL_VALUE,
18331 TRUE);
18332 }
18333
18334 static
18335 void
innodb_defragment_frequency_update(THD *,st_mysql_sys_var *,void *,const void * save)18336 innodb_defragment_frequency_update(THD*, st_mysql_sys_var*, void*,
18337 const void* save)
18338 {
18339 srv_defragment_frequency = (*static_cast<const uint*>(save));
18340 srv_defragment_interval = 1000000000ULL / srv_defragment_frequency;
18341 }
18342
my_strtok_r(char * str,const char * delim,char ** saveptr)18343 static inline char *my_strtok_r(char *str, const char *delim, char **saveptr)
18344 {
18345 #if defined _WIN32
18346 return strtok_s(str, delim, saveptr);
18347 #else
18348 return strtok_r(str, delim, saveptr);
18349 #endif
18350 }
18351
18352 /****************************************************************//**
18353 Parse and enable InnoDB monitor counters during server startup.
18354 User can list the monitor counters/groups to be enable by specifying
18355 "loose-innodb_monitor_enable=monitor_name1;monitor_name2..."
18356 in server configuration file or at the command line. The string
18357 separate could be ";", "," or empty space. */
18358 static
18359 void
innodb_enable_monitor_at_startup(char * str)18360 innodb_enable_monitor_at_startup(
18361 /*=============================*/
18362 char* str) /*!< in/out: monitor counter enable list */
18363 {
18364 static const char* sep = " ;,";
18365 char* last;
18366
18367 ut_a(str);
18368
18369 /* Walk through the string, and separate each monitor counter
18370 and/or counter group name, and calling innodb_monitor_update()
18371 if successfully updated. Please note that the "str" would be
18372 changed by strtok_r() as it walks through it. */
18373 for (char* option = my_strtok_r(str, sep, &last);
18374 option;
18375 option = my_strtok_r(NULL, sep, &last)) {
18376 char* option_name;
18377 if (!innodb_monitor_valid_byname(&option_name, option)) {
18378 innodb_monitor_update(NULL, NULL, &option,
18379 MONITOR_TURN_ON, FALSE);
18380 } else {
18381 sql_print_warning("Invalid monitor counter"
18382 " name: '%s'", option);
18383 }
18384 }
18385 }
18386
18387 /****************************************************************//**
18388 Callback function for accessing the InnoDB variables from MySQL:
18389 SHOW VARIABLES. */
show_innodb_vars(THD *,SHOW_VAR * var,char *)18390 static int show_innodb_vars(THD*, SHOW_VAR* var, char*)
18391 {
18392 innodb_export_status();
18393 var->type = SHOW_ARRAY;
18394 var->value = (char*) &innodb_status_variables;
18395 //var->scope = SHOW_SCOPE_GLOBAL;
18396
18397 return(0);
18398 }
18399
18400 /****************************************************************//**
18401 This function checks each index name for a table against reserved
18402 system default primary index name 'GEN_CLUST_INDEX'. If a name
18403 matches, this function pushes an warning message to the client,
18404 and returns true.
18405 @return true if the index name matches the reserved name */
18406 bool
innobase_index_name_is_reserved(THD * thd,const KEY * key_info,ulint num_of_keys)18407 innobase_index_name_is_reserved(
18408 /*============================*/
18409 THD* thd, /*!< in/out: MySQL connection */
18410 const KEY* key_info, /*!< in: Indexes to be created */
18411 ulint num_of_keys) /*!< in: Number of indexes to
18412 be created. */
18413 {
18414 const KEY* key;
18415 uint key_num; /* index number */
18416
18417 for (key_num = 0; key_num < num_of_keys; key_num++) {
18418 key = &key_info[key_num];
18419
18420 if (innobase_strcasecmp(key->name.str,
18421 innobase_index_reserve_name) == 0) {
18422 /* Push warning to mysql */
18423 push_warning_printf(thd,
18424 Sql_condition::WARN_LEVEL_WARN,
18425 ER_WRONG_NAME_FOR_INDEX,
18426 "Cannot Create Index with name"
18427 " '%s'. The name is reserved"
18428 " for the system default primary"
18429 " index.",
18430 innobase_index_reserve_name);
18431
18432 my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
18433 innobase_index_reserve_name);
18434
18435 return(true);
18436 }
18437 }
18438
18439 return(false);
18440 }
18441
18442 /** Retrieve the FTS Relevance Ranking result for doc with doc_id
18443 of m_prebuilt->fts_doc_id
18444 @param[in,out] fts_hdl FTS handler
18445 @return the relevance ranking value */
18446 static
18447 float
innobase_fts_retrieve_ranking(FT_INFO * fts_hdl)18448 innobase_fts_retrieve_ranking(
18449 FT_INFO* fts_hdl)
18450 {
18451 fts_result_t* result;
18452 row_prebuilt_t* ft_prebuilt;
18453
18454 result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
18455
18456 ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_prebuilt;
18457
18458 fts_ranking_t* ranking = rbt_value(fts_ranking_t, result->current);
18459 ft_prebuilt->fts_doc_id= ranking->doc_id;
18460
18461 return(ranking->rank);
18462 }
18463
18464 /** Free the memory for the FTS handler
18465 @param[in,out] fts_hdl FTS handler */
18466 static
18467 void
innobase_fts_close_ranking(FT_INFO * fts_hdl)18468 innobase_fts_close_ranking(
18469 FT_INFO* fts_hdl)
18470 {
18471 fts_result_t* result;
18472
18473 result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
18474
18475 fts_query_free_result(result);
18476
18477 my_free((uchar*) fts_hdl);
18478 }
18479
18480 /** Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
18481 of m_prebuilt->fts_doc_id
18482 @param[in,out] fts_hdl FTS handler
18483 @return the relevance ranking value */
18484 static
18485 float
innobase_fts_find_ranking(FT_INFO * fts_hdl,uchar *,uint)18486 innobase_fts_find_ranking(FT_INFO* fts_hdl, uchar*, uint)
18487 {
18488 fts_result_t* result;
18489 row_prebuilt_t* ft_prebuilt;
18490
18491 ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_prebuilt;
18492 result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
18493
18494 /* Retrieve the ranking value for doc_id with value of
18495 m_prebuilt->fts_doc_id */
18496 return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id));
18497 }
18498
18499 #ifdef UNIV_DEBUG
18500 static my_bool innodb_background_drop_list_empty = TRUE;
18501 static my_bool innodb_log_checkpoint_now = TRUE;
18502 static my_bool innodb_buf_flush_list_now = TRUE;
18503 static uint innodb_merge_threshold_set_all_debug
18504 = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
18505
18506 /** Wait for the background drop list to become empty. */
18507 static
18508 void
wait_background_drop_list_empty(THD *,st_mysql_sys_var *,void *,const void *)18509 wait_background_drop_list_empty(THD*, st_mysql_sys_var*, void*, const void*)
18510 {
18511 row_wait_for_background_drop_list_empty();
18512 }
18513
18514 /****************************************************************//**
18515 Force innodb to checkpoint. */
18516 static
18517 void
checkpoint_now_set(THD *,st_mysql_sys_var *,void *,const void * save)18518 checkpoint_now_set(THD*, st_mysql_sys_var*, void*, const void* save)
18519 {
18520 if (*(my_bool*) save) {
18521 mysql_mutex_unlock(&LOCK_global_system_variables);
18522
18523 while (log_sys.last_checkpoint_lsn
18524 + SIZE_OF_MLOG_CHECKPOINT
18525 + (log_sys.append_on_checkpoint != NULL
18526 ? log_sys.append_on_checkpoint->size() : 0)
18527 < log_sys.lsn) {
18528 log_make_checkpoint();
18529 fil_flush_file_spaces(FIL_TYPE_LOG);
18530 }
18531
18532 dberr_t err = fil_write_flushed_lsn(log_sys.lsn);
18533
18534 if (err != DB_SUCCESS) {
18535 ib::warn() << "Checkpoint set failed " << err;
18536 }
18537
18538 mysql_mutex_lock(&LOCK_global_system_variables);
18539 }
18540 }
18541
18542 /****************************************************************//**
18543 Force a dirty pages flush now. */
18544 static
18545 void
buf_flush_list_now_set(THD *,st_mysql_sys_var *,void *,const void * save)18546 buf_flush_list_now_set(THD*, st_mysql_sys_var*, void*, const void* save)
18547 {
18548 if (*(my_bool*) save) {
18549 mysql_mutex_unlock(&LOCK_global_system_variables);
18550 buf_flush_sync_all_buf_pools();
18551 mysql_mutex_lock(&LOCK_global_system_variables);
18552 }
18553 }
18554
18555 /** Override current MERGE_THRESHOLD setting for all indexes at dictionary
18556 now.
18557 @param[in] save immediate result from check function */
18558 static
18559 void
innodb_merge_threshold_set_all_debug_update(THD *,st_mysql_sys_var *,void *,const void * save)18560 innodb_merge_threshold_set_all_debug_update(THD*, st_mysql_sys_var*, void*,
18561 const void* save)
18562 {
18563 innodb_merge_threshold_set_all_debug
18564 = (*static_cast<const uint*>(save));
18565 dict_set_merge_threshold_all_debug(
18566 innodb_merge_threshold_set_all_debug);
18567 }
18568 #endif /* UNIV_DEBUG */
18569
18570 /** Find and Retrieve the FTS doc_id for the current result row
18571 @param[in,out] fts_hdl FTS handler
18572 @return the document ID */
18573 static
18574 ulonglong
innobase_fts_retrieve_docid(FT_INFO_EXT * fts_hdl)18575 innobase_fts_retrieve_docid(
18576 FT_INFO_EXT* fts_hdl)
18577 {
18578 fts_result_t* result;
18579 row_prebuilt_t* ft_prebuilt;
18580
18581 ft_prebuilt = reinterpret_cast<NEW_FT_INFO *>(fts_hdl)->ft_prebuilt;
18582 result = reinterpret_cast<NEW_FT_INFO *>(fts_hdl)->ft_result;
18583
18584 if (ft_prebuilt->read_just_key) {
18585
18586 fts_ranking_t* ranking =
18587 rbt_value(fts_ranking_t, result->current);
18588
18589 return(ranking->doc_id);
18590 }
18591
18592 return(ft_prebuilt->fts_doc_id);
18593 }
18594
18595 /* These variables are never read by InnoDB or changed. They are a kind of
18596 dummies that are needed by the MySQL infrastructure to call
18597 buffer_pool_dump_now(), buffer_pool_load_now() and buffer_pool_load_abort()
18598 by the user by doing:
18599 SET GLOBAL innodb_buffer_pool_dump_now=ON;
18600 SET GLOBAL innodb_buffer_pool_load_now=ON;
18601 SET GLOBAL innodb_buffer_pool_load_abort=ON;
18602 Their values are read by MySQL and displayed to the user when the variables
18603 are queried, e.g.:
18604 SELECT @@innodb_buffer_pool_dump_now;
18605 SELECT @@innodb_buffer_pool_load_now;
18606 SELECT @@innodb_buffer_pool_load_abort; */
18607 static my_bool innodb_buffer_pool_dump_now = FALSE;
18608 static my_bool innodb_buffer_pool_load_now = FALSE;
18609 static my_bool innodb_buffer_pool_load_abort = FALSE;
18610
18611 /****************************************************************//**
18612 Trigger a dump of the buffer pool if innodb_buffer_pool_dump_now is set
18613 to ON. This function is registered as a callback with MySQL. */
18614 static
18615 void
buffer_pool_dump_now(THD * thd MY_ATTRIBUTE ((unused)),struct st_mysql_sys_var * var MY_ATTRIBUTE ((unused)),void * var_ptr MY_ATTRIBUTE ((unused)),const void * save)18616 buffer_pool_dump_now(
18617 /*=================*/
18618 THD* thd /*!< in: thread handle */
18619 MY_ATTRIBUTE((unused)),
18620 struct st_mysql_sys_var* var /*!< in: pointer to system
18621 variable */
18622 MY_ATTRIBUTE((unused)),
18623 void* var_ptr /*!< out: where the formal
18624 string goes */
18625 MY_ATTRIBUTE((unused)),
18626 const void* save) /*!< in: immediate result from
18627 check function */
18628 {
18629 if (*(my_bool*) save && !srv_read_only_mode) {
18630 mysql_mutex_unlock(&LOCK_global_system_variables);
18631 buf_dump_start();
18632 mysql_mutex_lock(&LOCK_global_system_variables);
18633 }
18634 }
18635
18636 /****************************************************************//**
18637 Trigger a load of the buffer pool if innodb_buffer_pool_load_now is set
18638 to ON. This function is registered as a callback with MySQL. */
18639 static
18640 void
buffer_pool_load_now(THD * thd MY_ATTRIBUTE ((unused)),struct st_mysql_sys_var * var MY_ATTRIBUTE ((unused)),void * var_ptr MY_ATTRIBUTE ((unused)),const void * save)18641 buffer_pool_load_now(
18642 /*=================*/
18643 THD* thd /*!< in: thread handle */
18644 MY_ATTRIBUTE((unused)),
18645 struct st_mysql_sys_var* var /*!< in: pointer to system
18646 variable */
18647 MY_ATTRIBUTE((unused)),
18648 void* var_ptr /*!< out: where the formal
18649 string goes */
18650 MY_ATTRIBUTE((unused)),
18651 const void* save) /*!< in: immediate result from
18652 check function */
18653 {
18654 if (*(my_bool*) save && !srv_read_only_mode) {
18655 mysql_mutex_unlock(&LOCK_global_system_variables);
18656 buf_load_start();
18657 mysql_mutex_lock(&LOCK_global_system_variables);
18658 }
18659 }
18660
18661 /****************************************************************//**
18662 Abort a load of the buffer pool if innodb_buffer_pool_load_abort
18663 is set to ON. This function is registered as a callback with MySQL. */
18664 static
18665 void
buffer_pool_load_abort(THD * thd MY_ATTRIBUTE ((unused)),struct st_mysql_sys_var * var MY_ATTRIBUTE ((unused)),void * var_ptr MY_ATTRIBUTE ((unused)),const void * save)18666 buffer_pool_load_abort(
18667 /*===================*/
18668 THD* thd /*!< in: thread handle */
18669 MY_ATTRIBUTE((unused)),
18670 struct st_mysql_sys_var* var /*!< in: pointer to system
18671 variable */
18672 MY_ATTRIBUTE((unused)),
18673 void* var_ptr /*!< out: where the formal
18674 string goes */
18675 MY_ATTRIBUTE((unused)),
18676 const void* save) /*!< in: immediate result from
18677 check function */
18678 {
18679 if (*(my_bool*) save && !srv_read_only_mode) {
18680 mysql_mutex_unlock(&LOCK_global_system_variables);
18681 buf_load_abort();
18682 mysql_mutex_lock(&LOCK_global_system_variables);
18683 }
18684 }
18685
18686 /****************************************************************//**
18687 Update the system variable innodb_log_write_ahead_size using the "saved"
18688 value. This function is registered as a callback with MySQL. */
18689 static
18690 void
innodb_log_write_ahead_size_update(THD * thd,st_mysql_sys_var *,void *,const void * save)18691 innodb_log_write_ahead_size_update(
18692 /*===============================*/
18693 THD* thd, /*!< in: thread handle */
18694 st_mysql_sys_var*, void*,
18695 const void* save) /*!< in: immediate result
18696 from check function */
18697 {
18698 ulong val = OS_FILE_LOG_BLOCK_SIZE;
18699 ulong in_val = *static_cast<const ulong*>(save);
18700
18701 while (val < in_val) {
18702 val = val * 2;
18703 }
18704
18705 if (val > srv_page_size) {
18706 val = srv_page_size;
18707 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18708 ER_WRONG_ARGUMENTS,
18709 "innodb_log_write_ahead_size cannot"
18710 " be set higher than innodb_page_size.");
18711 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18712 ER_WRONG_ARGUMENTS,
18713 "Setting innodb_log_write_ahead_size"
18714 " to %lu",
18715 srv_page_size);
18716 } else if (val != in_val) {
18717 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18718 ER_WRONG_ARGUMENTS,
18719 "innodb_log_write_ahead_size should be"
18720 " set 2^n value and larger than 512.");
18721 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18722 ER_WRONG_ARGUMENTS,
18723 "Setting innodb_log_write_ahead_size"
18724 " to %lu",
18725 val);
18726 }
18727
18728 srv_log_write_ahead_size = val;
18729 }
18730
18731 /** Update innodb_status_output or innodb_status_output_locks,
18732 which control InnoDB "status monitor" output to the error log.
18733 @param[out] var current value
18734 @param[in] save to-be-assigned value */
18735 static
18736 void
innodb_status_output_update(THD *,st_mysql_sys_var *,void * var,const void * save)18737 innodb_status_output_update(THD*,st_mysql_sys_var*,void*var,const void*save)
18738 {
18739 *static_cast<my_bool*>(var)= *static_cast<const my_bool*>(save);
18740 if (srv_monitor_event)
18741 {
18742 mysql_mutex_unlock(&LOCK_global_system_variables);
18743 /* Wakeup server monitor thread. */
18744 os_event_set(srv_monitor_event);
18745 mysql_mutex_lock(&LOCK_global_system_variables);
18746 }
18747 }
18748
18749 /** Update the system variable innodb_encryption_threads.
18750 @param[in] save to-be-assigned value */
18751 static
18752 void
innodb_encryption_threads_update(THD *,st_mysql_sys_var *,void *,const void * save)18753 innodb_encryption_threads_update(THD*,st_mysql_sys_var*,void*,const void*save)
18754 {
18755 mysql_mutex_unlock(&LOCK_global_system_variables);
18756 fil_crypt_set_thread_cnt(*static_cast<const uint*>(save));
18757 mysql_mutex_lock(&LOCK_global_system_variables);
18758 }
18759
18760 /** Update the system variable innodb_encryption_rotate_key_age.
18761 @param[in] save to-be-assigned value */
18762 static
18763 void
innodb_encryption_rotate_key_age_update(THD *,st_mysql_sys_var *,void *,const void * save)18764 innodb_encryption_rotate_key_age_update(THD*, st_mysql_sys_var*, void*,
18765 const void* save)
18766 {
18767 mysql_mutex_unlock(&LOCK_global_system_variables);
18768 fil_crypt_set_rotate_key_age(*static_cast<const uint*>(save));
18769 mysql_mutex_lock(&LOCK_global_system_variables);
18770 }
18771
18772 /** Update the system variable innodb_encryption_rotation_iops.
18773 @param[in] save to-be-assigned value */
18774 static
18775 void
innodb_encryption_rotation_iops_update(THD *,st_mysql_sys_var *,void *,const void * save)18776 innodb_encryption_rotation_iops_update(THD*, st_mysql_sys_var*, void*,
18777 const void* save)
18778 {
18779 mysql_mutex_unlock(&LOCK_global_system_variables);
18780 fil_crypt_set_rotation_iops(*static_cast<const uint*>(save));
18781 mysql_mutex_lock(&LOCK_global_system_variables);
18782 }
18783
18784 /** Update the system variable innodb_encrypt_tables.
18785 @param[in] save to-be-assigned value */
18786 static
18787 void
innodb_encrypt_tables_update(THD *,st_mysql_sys_var *,void *,const void * save)18788 innodb_encrypt_tables_update(THD*, st_mysql_sys_var*, void*, const void* save)
18789 {
18790 mysql_mutex_unlock(&LOCK_global_system_variables);
18791 fil_crypt_set_encrypt_tables(*static_cast<const ulong*>(save));
18792 mysql_mutex_lock(&LOCK_global_system_variables);
18793 }
18794
18795 /** Update the innodb_log_checksums parameter.
18796 @param[in,out] thd client connection
18797 @param[out] var_ptr current value
18798 @param[in] save immediate result from check function */
18799 static
18800 void
innodb_log_checksums_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18801 innodb_log_checksums_update(THD* thd, st_mysql_sys_var*, void* var_ptr,
18802 const void* save)
18803 {
18804 *static_cast<my_bool*>(var_ptr) = innodb_log_checksums_func_update(
18805 thd, *static_cast<const my_bool*>(save));
18806 }
18807
18808 #ifdef UNIV_DEBUG
18809 static
18810 void
innobase_debug_sync_callback(srv_slot_t * slot,const void * value)18811 innobase_debug_sync_callback(srv_slot_t *slot, const void *value)
18812 {
18813 const char *value_str = *static_cast<const char* const*>(value);
18814 size_t len = strlen(value_str) + 1;
18815
18816
18817 // One allocation for list node object and value.
18818 void *buf = ut_malloc_nokey(sizeof(srv_slot_t::debug_sync_t) + len-1);
18819 srv_slot_t::debug_sync_t *sync = new(buf) srv_slot_t::debug_sync_t();
18820 strcpy(sync->str, value_str);
18821
18822 rw_lock_x_lock(&slot->debug_sync_lock);
18823 UT_LIST_ADD_LAST(slot->debug_sync, sync);
18824 rw_lock_x_unlock(&slot->debug_sync_lock);
18825 }
18826 static
18827 void
innobase_debug_sync_set(THD * thd,st_mysql_sys_var *,void *,const void * value)18828 innobase_debug_sync_set(THD *thd, st_mysql_sys_var*, void *, const void *value)
18829 {
18830 srv_for_each_thread(SRV_WORKER, innobase_debug_sync_callback, value);
18831 srv_for_each_thread(SRV_PURGE, innobase_debug_sync_callback, value);
18832 }
18833 #endif
18834
18835 static SHOW_VAR innodb_status_variables_export[]= {
18836 {"Innodb", (char*) &show_innodb_vars, SHOW_FUNC},
18837 {NullS, NullS, SHOW_LONG}
18838 };
18839
18840 static struct st_mysql_storage_engine innobase_storage_engine=
18841 { MYSQL_HANDLERTON_INTERFACE_VERSION };
18842
18843 #ifdef WITH_WSREP
18844 static
18845 void
wsrep_abort_slave_trx(THD * bf_thd,THD * victim_thd)18846 wsrep_abort_slave_trx(
18847 THD* bf_thd,
18848 THD* victim_thd)
18849 {
18850 wsrep_seqno_t bf_seqno= wsrep_thd_trx_seqno(bf_thd);
18851 wsrep_seqno_t victim_seqno= wsrep_thd_trx_seqno(victim_thd);
18852
18853 WSREP_ERROR("wsrep_abort_slave_trx: BF Aborter %s thread: %ld "
18854 "seqno: %lld query_state: %s conflict_state: %s "
18855 "exec mode %s query: %s",
18856 wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
18857 thd_get_thread_id(bf_thd),
18858 bf_seqno,
18859 wsrep_thd_query_state_str(bf_thd),
18860 wsrep_thd_conflict_state_str(bf_thd),
18861 wsrep_thd_exec_mode_str(bf_thd),
18862 wsrep_thd_query(bf_thd));
18863
18864 WSREP_ERROR("wsrep_abort_slave_trx: Victim %s thread: %ld "
18865 "seqno: %lld query_state: %s conflict_state: %s "
18866 "exec mode %s query: %s",
18867 wsrep_thd_is_BF(victim_thd, false) ? "BF" : "normal",
18868 thd_get_thread_id(victim_thd),
18869 wsrep_thd_trx_seqno(victim_thd),
18870 wsrep_thd_query_state_str(victim_thd),
18871 wsrep_thd_conflict_state_str(victim_thd),
18872 wsrep_thd_exec_mode_str(victim_thd),
18873 wsrep_thd_query(victim_thd));
18874
18875 WSREP_ERROR("Trx %lld tries to abort slave trx %lld. This could be "
18876 "caused by:\n\t"
18877 "1) unsupported configuration options combination, please check documentation.\n\t"
18878 "2) a bug in the code.\n\t"
18879 "3) a database corruption.\n Node consistency compromized, "
18880 "need to abort. Restart the node to resync with cluster.",
18881 (long long)bf_seqno, (long long)victim_seqno);
18882 abort();
18883 }
18884
18885 /** This function is used to kill one transaction in BF. */
18886 static
18887 void
wsrep_kill_victim(MYSQL_THD const bf_thd,const trx_t * const bf_trx,MYSQL_THD thd,trx_t * victim_trx,my_bool signal)18888 wsrep_kill_victim(
18889 MYSQL_THD const bf_thd,
18890 const trx_t* const bf_trx,
18891 MYSQL_THD thd,
18892 trx_t* victim_trx,
18893 my_bool signal)
18894 {
18895 ut_ad(bf_thd);
18896 ut_ad(thd);
18897 ut_ad(victim_trx);
18898 ut_ad(lock_mutex_own());
18899 ut_ad(trx_mutex_own(victim_trx));
18900
18901 DBUG_ENTER("wsrep_kill_victim");
18902
18903 const int64_t bf_seqno= wsrep_thd_trx_seqno(bf_thd);
18904
18905 if (wsrep_thd_query_state(thd) == QUERY_EXITING) {
18906 WSREP_DEBUG("kill trx EXITING for " TRX_ID_FMT,
18907 victim_trx->id);
18908 wsrep_thd_UNLOCK(thd);
18909 DBUG_VOID_RETURN;
18910 }
18911
18912 if (wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
18913 WSREP_DEBUG("withdraw for BF trx: " TRX_ID_FMT
18914 ", state: %s exec %s",
18915 victim_trx->id,
18916 wsrep_thd_conflict_state_str(thd),
18917 wsrep_thd_exec_mode_str(thd));
18918 }
18919
18920 switch (wsrep_thd_get_conflict_state(thd)) {
18921 case NO_CONFLICT:
18922 /* This will cause any call to innobase_kill_query()
18923 for this thd to bail out. */
18924 wsrep_thd_set_conflict_state(thd, MUST_ABORT);
18925 break;
18926 case MUST_ABORT:
18927 WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state",
18928 victim_trx->id);
18929 wsrep_thd_awake(thd, signal);
18930 wsrep_thd_UNLOCK(thd);
18931 DBUG_VOID_RETURN;
18932 break;
18933 case ABORTED:
18934 case ABORTING: // fall through
18935 default:
18936 WSREP_DEBUG("victim " TRX_ID_FMT " in state %s",
18937 victim_trx->id,
18938 wsrep_thd_conflict_state_str(thd));
18939 wsrep_thd_UNLOCK(thd);
18940 DBUG_VOID_RETURN;
18941 break;
18942 }
18943
18944 switch (wsrep_thd_query_state(thd)) {
18945 case QUERY_COMMITTING:
18946 {
18947 enum wsrep_status rcode;
18948
18949 WSREP_DEBUG("kill query for: %ld",
18950 thd_get_thread_id(thd));
18951 WSREP_DEBUG("kill trx QUERY_COMMITTING for " TRX_ID_FMT,
18952 victim_trx->id);
18953
18954 if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
18955 wsrep_abort_slave_trx(bf_thd, thd);
18956 } else {
18957 wsrep_t *wsrep= get_wsrep();
18958 rcode = wsrep->abort_pre_commit(
18959 wsrep, bf_seqno,
18960 (wsrep_trx_id_t)wsrep_thd_ws_handle(thd)->trx_id
18961 );
18962
18963 switch (rcode) {
18964 case WSREP_WARNING:
18965 WSREP_DEBUG("cancel commit warning: "
18966 TRX_ID_FMT,
18967 victim_trx->id);
18968 wsrep_thd_awake(thd, signal);
18969 wsrep_thd_UNLOCK(thd);
18970 DBUG_VOID_RETURN;
18971 break;
18972 case WSREP_OK:
18973 break;
18974 default:
18975 WSREP_ERROR(
18976 "cancel commit bad exit: %d "
18977 TRX_ID_FMT,
18978 rcode, victim_trx->id);
18979 /* unable to interrupt, must abort */
18980 /* note: kill_mysql() will block, if we cannot.
18981 * kill the lock holder first.
18982 */
18983 abort();
18984 break;
18985 }
18986 }
18987 wsrep_thd_awake(thd, signal);
18988 wsrep_thd_UNLOCK(thd);
18989 break;
18990 }
18991 case QUERY_EXEC:
18992 {
18993 /* it is possible that victim trx is itself waiting for some
18994 * other lock. We need to cancel this waiting
18995 */
18996 WSREP_DEBUG("kill trx QUERY_EXEC for " TRX_ID_FMT,
18997 victim_trx->id);
18998
18999 if (victim_trx->lock.wait_lock) {
19000 WSREP_DEBUG("victim has wait flag: %ld",
19001 thd_get_thread_id(thd));
19002 lock_t* wait_lock = victim_trx->lock.wait_lock;
19003
19004 if (wait_lock) {
19005 WSREP_DEBUG("canceling wait lock");
19006 victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
19007 lock_cancel_waiting_and_release(wait_lock);
19008 }
19009
19010 wsrep_thd_awake(thd, signal);
19011 wsrep_thd_UNLOCK(thd);
19012 } else {
19013 /* abort currently executing query */
19014 WSREP_DEBUG("kill query for: %ld",
19015 thd_get_thread_id(thd));
19016
19017 /* for BF thd, we need to prevent him from committing */
19018 if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
19019 wsrep_abort_slave_trx(bf_thd, thd);
19020 }
19021
19022 /* Note that innobase_kill_query will take lock_mutex
19023 and trx_mutex */
19024 wsrep_thd_awake(thd, signal);
19025 wsrep_thd_UNLOCK(thd);
19026 }
19027 break;
19028 }
19029 case QUERY_IDLE:
19030 {
19031 WSREP_DEBUG("kill IDLE for " TRX_ID_FMT, victim_trx->id);
19032
19033 if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
19034 WSREP_DEBUG("kill BF IDLE, seqno: %lld",
19035 wsrep_thd_trx_seqno(thd));
19036 wsrep_abort_slave_trx(bf_thd, thd);
19037 }
19038
19039 /* This will lock thd from proceeding after net_read() and
19040 will cause any call to innobase_kill_query() for this
19041 thd to bail out. */
19042 wsrep_thd_set_conflict_state(thd, ABORTING);
19043 wsrep_lock_rollback();
19044
19045 if (wsrep_aborting_thd_contains(thd)) {
19046 WSREP_WARN("duplicate thd aborter %lu",
19047 thd_get_thread_id(thd));
19048 } else {
19049 wsrep_aborting_thd_enqueue(thd);
19050 WSREP_DEBUG("enqueuing trx abort for (%lu)",
19051 thd_get_thread_id(thd));
19052 }
19053
19054 WSREP_DEBUG("signaling aborter");
19055 wsrep_unlock_rollback();
19056 wsrep_thd_UNLOCK(thd);
19057 break;
19058 }
19059 default:
19060 WSREP_WARN("bad wsrep query state: %d",
19061 wsrep_thd_query_state(thd));
19062 ut_error;
19063 }
19064 DBUG_VOID_RETURN;
19065 }
19066
19067 /*******************************************************************
19068 This function is used to kill one transaction in BF. */
19069 void
wsrep_innobase_kill_one_trx(MYSQL_THD const bf_thd,const trx_t * const bf_trx,trx_t * victim_trx,my_bool signal)19070 wsrep_innobase_kill_one_trx(
19071 MYSQL_THD const bf_thd,
19072 const trx_t * const bf_trx,
19073 trx_t *victim_trx,
19074 my_bool signal)
19075 {
19076 ut_ad(bf_thd);
19077 ut_ad(victim_trx);
19078 ut_ad(lock_mutex_own());
19079 ut_ad(trx_mutex_own(victim_trx));
19080
19081 DBUG_ENTER("wsrep_innobase_kill_one_trx");
19082 THD *thd= (THD *) victim_trx->mysql_thd;
19083
19084 /* Here we need to lock THD::LOCK_thd_data to protect from
19085 concurrent usage or disconnect or delete. */
19086 DEBUG_SYNC(bf_thd, "wsrep_before_BF_victim_lock");
19087 wsrep_thd_LOCK(thd);
19088 DEBUG_SYNC(bf_thd, "wsrep_after_BF_victim_lock");
19089
19090 WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
19091
19092 WSREP_DEBUG("wsrep_innobase_kill_one_trx: Aborter %s "
19093 "trx_id: " TRX_ID_FMT " thread: %ld "
19094 "seqno: %lld query_state: %s conflict_state: %s "
19095 "exec mode %s query: %s",
19096 wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
19097 bf_trx ? bf_trx->id : TRX_ID_MAX,
19098 thd_get_thread_id(bf_thd),
19099 wsrep_thd_trx_seqno(bf_thd),
19100 wsrep_thd_query_state_str(bf_thd),
19101 wsrep_thd_conflict_state_str(bf_thd),
19102 wsrep_thd_exec_mode_str(bf_thd),
19103 wsrep_thd_query(bf_thd));
19104
19105 WSREP_DEBUG("wsrep_innobase_kill_one_trx: Victim %s "
19106 "trx_id: " TRX_ID_FMT " thread: %ld "
19107 "seqno: %lld query_state: %s conflict_state: %s "
19108 "exec mode %s query: %s",
19109 wsrep_thd_is_BF(thd, false) ? "BF" : "normal",
19110 victim_trx->id,
19111 thd_get_thread_id(thd),
19112 wsrep_thd_trx_seqno(thd),
19113 wsrep_thd_query_state_str(thd),
19114 wsrep_thd_conflict_state_str(thd),
19115 wsrep_thd_exec_mode_str(thd),
19116 wsrep_thd_query(thd));
19117
19118 wsrep_kill_victim(bf_thd, bf_trx, thd, victim_trx, signal);
19119 DBUG_VOID_RETURN;
19120 }
19121
19122 static
19123 void
wsrep_abort_transaction(handlerton *,THD * bf_thd,THD * victim_thd,my_bool signal)19124 wsrep_abort_transaction(
19125 handlerton*,
19126 THD *bf_thd,
19127 THD *victim_thd,
19128 my_bool signal)
19129 {
19130 DBUG_ENTER("wsrep_abort_transaction");
19131 /* Note that victim thd is protected with
19132 THD::LOCK_thd_data here. */
19133 trx_t* victim_trx= thd_to_trx(victim_thd);
19134 trx_t* bf_trx= thd_to_trx(bf_thd);
19135
19136 WSREP_DEBUG("wsrep_abort_transaction: BF:"
19137 " thread %ld query_state %s conflict_state %s"
19138 " exec %s query %s trx " TRX_ID_FMT,
19139 thd_get_thread_id(bf_thd),
19140 wsrep_thd_query_state_str(bf_thd),
19141 wsrep_thd_conflict_state_str(bf_thd),
19142 wsrep_thd_exec_mode_str(bf_thd),
19143 wsrep_thd_query(bf_thd),
19144 bf_trx ? bf_trx->id : 0);
19145
19146 WSREP_DEBUG("wsrep_abort_transaction: victim:"
19147 " thread %ld query_state %s conflict_state %s"
19148 " exec %s query %s trx " TRX_ID_FMT,
19149 thd_get_thread_id(victim_thd),
19150 wsrep_thd_query_state_str(victim_thd),
19151 wsrep_thd_conflict_state_str(victim_thd),
19152 wsrep_thd_exec_mode_str(victim_thd),
19153 wsrep_thd_query(victim_thd),
19154 victim_trx ? victim_trx->id : 0);
19155
19156 if (victim_trx) {
19157 lock_mutex_enter();
19158 trx_mutex_enter(victim_trx);
19159 wsrep_kill_victim(bf_thd, bf_trx, victim_thd, victim_trx, signal);
19160 lock_mutex_exit();
19161 trx_mutex_exit(victim_trx);
19162 wsrep_srv_conc_cancel_wait(victim_trx);
19163 } else {
19164 wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT);
19165 wsrep_thd_awake(victim_thd, signal);
19166 wsrep_thd_UNLOCK(victim_thd);
19167 }
19168
19169 DBUG_VOID_RETURN;
19170 }
19171
19172 static
19173 int
innobase_wsrep_set_checkpoint(handlerton * hton,const XID * xid)19174 innobase_wsrep_set_checkpoint(
19175 /*==========================*/
19176 handlerton* hton,
19177 const XID* xid)
19178 {
19179 DBUG_ASSERT(hton == innodb_hton_ptr);
19180
19181 if (wsrep_is_wsrep_xid(xid)) {
19182
19183 trx_rseg_update_wsrep_checkpoint(xid);
19184 innobase_flush_logs(hton, false);
19185 return 0;
19186 } else {
19187 return 1;
19188 }
19189 }
19190
19191 static
19192 int
innobase_wsrep_get_checkpoint(handlerton * hton,XID * xid)19193 innobase_wsrep_get_checkpoint(
19194 /*==========================*/
19195 handlerton* hton,
19196 XID* xid)
19197 {
19198 DBUG_ASSERT(hton == innodb_hton_ptr);
19199 trx_rseg_read_wsrep_checkpoint(*xid);
19200 return 0;
19201 }
19202
wsrep_fake_trx_id(handlerton *,THD * thd)19203 static void wsrep_fake_trx_id(handlerton *, THD *thd)
19204 {
19205 trx_id_t trx_id = trx_sys.get_new_trx_id();
19206 WSREP_DEBUG("innodb fake trx id: " TRX_ID_FMT " thd: %s",
19207 trx_id, wsrep_thd_query(thd));
19208 wsrep_ws_handle_for_trx(wsrep_thd_ws_handle(thd), trx_id);
19209 }
19210
19211 #endif /* WITH_WSREP */
19212
innodb_idle_flush_pct_update(THD * thd,st_mysql_sys_var * var,void *,const void * save)19213 static void innodb_idle_flush_pct_update(THD *thd, st_mysql_sys_var *var,
19214 void*, const void *save)
19215 {
19216 innodb_idle_flush_pct = *static_cast<const ulong*>(save);
19217 push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
19218 HA_ERR_WRONG_COMMAND, deprecated_idle_flush_pct);
19219 }
19220
19221 /* plugin options */
19222
19223 static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm,
19224 PLUGIN_VAR_RQCMDARG,
19225 "The algorithm InnoDB uses for page checksumming. Possible values are"
19226 " CRC32 (hardware accelerated if the CPU supports it)"
19227 " write crc32, allow any of the other checksums to match when reading;"
19228 " STRICT_CRC32"
19229 " write crc32, do not allow other algorithms to match when reading;"
19230 " INNODB"
19231 " write a software calculated checksum, allow any other checksums"
19232 " to match when reading;"
19233 " STRICT_INNODB"
19234 " write a software calculated checksum, do not allow other algorithms"
19235 " to match when reading;"
19236 " NONE"
19237 " write a constant magic number, do not do any checksum verification"
19238 " when reading (same as innodb_checksums=OFF);"
19239 " STRICT_NONE"
19240 " write a constant magic number, do not allow values other than that"
19241 " magic number when reading;"
19242 " Files updated when this option is set to crc32 or strict_crc32 will"
19243 " not be readable by MariaDB versions older than 10.0.4",
19244 NULL, innodb_checksum_algorithm_update, SRV_CHECKSUM_ALGORITHM_CRC32,
19245 &innodb_checksum_algorithm_typelib);
19246
19247 static MYSQL_SYSVAR_BOOL(log_checksums, innodb_log_checksums,
19248 PLUGIN_VAR_RQCMDARG,
19249 "DEPRECATED. Whether to require checksums for InnoDB redo log blocks.",
19250 NULL, innodb_log_checksums_update, TRUE);
19251
19252 static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums,
19253 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19254 "DEPRECATED. Use innodb_checksum_algorithm=NONE instead of setting"
19255 " this to OFF."
19256 " Enable InnoDB checksums validation (enabled by default)."
19257 " Disable with --skip-innodb-checksums.",
19258 NULL, NULL, TRUE);
19259
19260 static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
19261 PLUGIN_VAR_READONLY,
19262 "The common part for InnoDB table spaces.",
19263 NULL, NULL, NULL);
19264
19265 static MYSQL_SYSVAR_BOOL(doublewrite, srv_use_doublewrite_buf,
19266 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19267 "Enable InnoDB doublewrite buffer (enabled by default)."
19268 " Disable with --skip-innodb-doublewrite.",
19269 NULL, NULL, TRUE);
19270
19271 static MYSQL_SYSVAR_BOOL(use_atomic_writes, srv_use_atomic_writes,
19272 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19273 "Enable atomic writes, instead of using the doublewrite buffer, for files "
19274 "on devices that supports atomic writes. "
19275 "This option only works on Linux with either FusionIO cards using "
19276 "the directFS filesystem or with Shannon cards using any file system.",
19277 NULL, NULL, TRUE);
19278
19279 static MYSQL_SYSVAR_BOOL(stats_include_delete_marked,
19280 srv_stats_include_delete_marked,
19281 PLUGIN_VAR_OPCMDARG,
19282 "Include delete marked records when calculating persistent statistics",
19283 NULL, NULL, FALSE);
19284
19285 static MYSQL_SYSVAR_ENUM(instant_alter_column_allowed,
19286 innodb_instant_alter_column_allowed,
19287 PLUGIN_VAR_RQCMDARG,
19288 "File format constraint for ALTER TABLE", NULL, NULL, 1/*add_last*/,
19289 &innodb_instant_alter_column_allowed_typelib);
19290
19291 static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
19292 PLUGIN_VAR_RQCMDARG,
19293 "Number of IOPs the server can do. Tunes the background IO rate",
19294 NULL, innodb_io_capacity_update, 200, 100, ~0UL, 0);
19295
19296 static MYSQL_SYSVAR_ULONG(io_capacity_max, srv_max_io_capacity,
19297 PLUGIN_VAR_RQCMDARG,
19298 "Limit to which innodb_io_capacity can be inflated.",
19299 NULL, innodb_io_capacity_max_update,
19300 SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT, 100,
19301 SRV_MAX_IO_CAPACITY_LIMIT, 0);
19302
19303 static MYSQL_SYSVAR_ULONG(idle_flush_pct, innodb_idle_flush_pct,
19304 PLUGIN_VAR_RQCMDARG,
19305 "DEPRECATED. This setting has no effect.",
19306 NULL, innodb_idle_flush_pct_update, 100, 0, 100, 0);
19307
19308 #ifdef UNIV_DEBUG
19309 static MYSQL_SYSVAR_BOOL(background_drop_list_empty,
19310 innodb_background_drop_list_empty,
19311 PLUGIN_VAR_OPCMDARG,
19312 "Wait for the background drop list to become empty",
19313 NULL, wait_background_drop_list_empty, FALSE);
19314
19315 static MYSQL_SYSVAR_BOOL(log_checkpoint_now, innodb_log_checkpoint_now,
19316 PLUGIN_VAR_OPCMDARG,
19317 "Force checkpoint now",
19318 NULL, checkpoint_now_set, FALSE);
19319
19320 static MYSQL_SYSVAR_BOOL(buf_flush_list_now, innodb_buf_flush_list_now,
19321 PLUGIN_VAR_OPCMDARG,
19322 "Force dirty page flush now",
19323 NULL, buf_flush_list_now_set, FALSE);
19324
19325 static MYSQL_SYSVAR_UINT(merge_threshold_set_all_debug,
19326 innodb_merge_threshold_set_all_debug,
19327 PLUGIN_VAR_RQCMDARG,
19328 "Override current MERGE_THRESHOLD setting for all indexes at dictionary"
19329 " cache by the specified value dynamically, at the time.",
19330 NULL, innodb_merge_threshold_set_all_debug_update,
19331 DICT_INDEX_MERGE_THRESHOLD_DEFAULT, 1, 50, 0);
19332 #endif /* UNIV_DEBUG */
19333
19334 static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
19335 PLUGIN_VAR_OPCMDARG,
19336 "Number of UNDO log pages to purge in one batch from the history list.",
19337 NULL, NULL,
19338 300, /* Default setting */
19339 1, /* Minimum value */
19340 5000, 0); /* Maximum value */
19341
19342 static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
19343 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19344 "Purge threads can be from 1 to 32. Default is 4.",
19345 NULL, NULL,
19346 4, /* Default setting */
19347 1, /* Minimum value */
19348 srv_max_purge_threads,/* Maximum value */
19349 0);
19350
19351 static MYSQL_SYSVAR_ULONG(sync_array_size, srv_sync_array_size,
19352 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19353 "Size of the mutex/lock wait array.",
19354 NULL, NULL,
19355 1, /* Default setting */
19356 1, /* Minimum value */
19357 1024, 0); /* Maximum value */
19358
19359 static MYSQL_SYSVAR_UINT(fast_shutdown, srv_fast_shutdown,
19360 PLUGIN_VAR_OPCMDARG,
19361 "Speeds up the shutdown process of the InnoDB storage engine. Possible"
19362 " values are 0, 1 (faster), 2 (crash-like), 3 (fastest clean).",
19363 fast_shutdown_validate, NULL, 1, 0, 3, 0);
19364
19365 static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table,
19366 PLUGIN_VAR_NOCMDARG,
19367 "Stores each InnoDB table to an .ibd file in the database dir.",
19368 NULL, NULL, TRUE);
19369
19370 static MYSQL_SYSVAR_STR(ft_server_stopword_table, innobase_server_stopword_table,
19371 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
19372 "The user supplied stopword table name.",
19373 innodb_stopword_table_validate,
19374 NULL,
19375 NULL);
19376
19377 static MYSQL_SYSVAR_UINT(flush_log_at_timeout, srv_flush_log_at_timeout,
19378 PLUGIN_VAR_OPCMDARG,
19379 "Write and flush logs every (n) second.",
19380 NULL, NULL, 1, 0, 2700, 0);
19381
19382 static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
19383 PLUGIN_VAR_OPCMDARG,
19384 "Controls the durability/speed trade-off for commits."
19385 " Set to 0 (write and flush redo log to disk only once per second),"
19386 " 1 (flush to disk at each commit),"
19387 " 2 (write to log at commit but flush to disk only once per second)"
19388 " or 3 (flush to disk at prepare and at commit, slower and usually redundant)."
19389 " 1 and 3 guarantees that after a crash, committed transactions will"
19390 " not be lost and will be consistent with the binlog and other transactional"
19391 " engines. 2 can get inconsistent and lose transactions if there is a"
19392 " power failure or kernel crash but not if mysqld crashes. 0 has no"
19393 " guarantees in case of crash. 0 and 2 can be faster than 1 or 3.",
19394 NULL, NULL, 1, 0, 3, 0);
19395
19396 static MYSQL_SYSVAR_ENUM(flush_method, innodb_flush_method,
19397 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19398 "With which method to flush data.",
19399 NULL, NULL, IF_WIN(SRV_ALL_O_DIRECT_FSYNC, SRV_FSYNC),
19400 &innodb_flush_method_typelib);
19401
19402 static MYSQL_SYSVAR_STR(file_format, innodb_file_format,
19403 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19404 "Deprecated parameter with no effect.", NULL, NULL, NULL);
19405
19406 static MYSQL_SYSVAR_STR(large_prefix, innodb_large_prefix,
19407 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19408 "Deprecated parameter with no effect.", NULL, NULL, NULL);
19409
19410 static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted,
19411 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19412 "Force InnoDB to load metadata of corrupted table.",
19413 NULL, NULL, FALSE);
19414
19415 static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog,
19416 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19417 "DEPRECATED. This option may be removed in future releases."
19418 " Please use READ COMMITTED transaction isolation level instead."
19419 " Force InnoDB to not use next-key locking, to use only row-level locking.",
19420 NULL, NULL, FALSE);
19421
19422 static MYSQL_SYSVAR_STR(log_group_home_dir, srv_log_group_home_dir,
19423 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19424 "Path to InnoDB log files.", NULL, NULL, NULL);
19425
19426 /** Update innodb_page_cleaners.
19427 @param[in] save the new value of innodb_page_cleaners */
19428 static
19429 void
innodb_page_cleaners_threads_update(THD *,struct st_mysql_sys_var *,void *,const void * save)19430 innodb_page_cleaners_threads_update(THD*, struct st_mysql_sys_var*, void*, const void *save)
19431 {
19432 buf_flush_set_page_cleaner_thread_cnt(*static_cast<const ulong*>(save));
19433 }
19434
19435 static MYSQL_SYSVAR_ULONG(page_cleaners, srv_n_page_cleaners,
19436 PLUGIN_VAR_RQCMDARG,
19437 "Page cleaner threads can be from 1 to 64. Default is 4.",
19438 NULL,
19439 innodb_page_cleaners_threads_update, 4, 1, 64, 0);
19440
19441 static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
19442 PLUGIN_VAR_RQCMDARG,
19443 "Percentage of dirty pages allowed in bufferpool.",
19444 NULL, innodb_max_dirty_pages_pct_update, 75.0, 0, 99.999, 0);
19445
19446 static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct_lwm,
19447 srv_max_dirty_pages_pct_lwm,
19448 PLUGIN_VAR_RQCMDARG,
19449 "Percentage of dirty pages at which flushing kicks in.",
19450 NULL, innodb_max_dirty_pages_pct_lwm_update, 0, 0, 99.999, 0);
19451
19452 static MYSQL_SYSVAR_DOUBLE(adaptive_flushing_lwm,
19453 srv_adaptive_flushing_lwm,
19454 PLUGIN_VAR_RQCMDARG,
19455 "Percentage of log capacity below which no adaptive flushing happens.",
19456 NULL, NULL, 10.0, 0.0, 70.0, 0);
19457
19458 static MYSQL_SYSVAR_BOOL(adaptive_flushing, srv_adaptive_flushing,
19459 PLUGIN_VAR_NOCMDARG,
19460 "Attempt flushing dirty pages to avoid IO bursts at checkpoints.",
19461 NULL, NULL, TRUE);
19462
19463 static MYSQL_SYSVAR_BOOL(flush_sync, srv_flush_sync,
19464 PLUGIN_VAR_NOCMDARG,
19465 "Allow IO bursts at the checkpoints ignoring io_capacity setting.",
19466 NULL, NULL, TRUE);
19467
19468 static MYSQL_SYSVAR_ULONG(flushing_avg_loops,
19469 srv_flushing_avg_loops,
19470 PLUGIN_VAR_RQCMDARG,
19471 "Number of iterations over which the background flushing is averaged.",
19472 NULL, NULL, 30, 1, 1000, 0);
19473
19474 static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
19475 PLUGIN_VAR_RQCMDARG,
19476 "Desired maximum length of the purge queue (0 = no limit)",
19477 NULL, NULL, 0, 0, ~0UL, 0);
19478
19479 static MYSQL_SYSVAR_ULONG(max_purge_lag_delay, srv_max_purge_lag_delay,
19480 PLUGIN_VAR_RQCMDARG,
19481 "Maximum delay of user threads in micro-seconds",
19482 NULL, NULL,
19483 0L, /* Default seting */
19484 0L, /* Minimum value */
19485 10000000UL, 0); /* Maximum value */
19486
19487 static MYSQL_SYSVAR_UINT(max_purge_lag_wait, innodb_max_purge_lag_wait,
19488 PLUGIN_VAR_RQCMDARG,
19489 "Wait until History list length is below the specified limit",
19490 NULL, innodb_max_purge_lag_wait_update, UINT_MAX, 0, UINT_MAX, 0);
19491
19492 static MYSQL_SYSVAR_BOOL(rollback_on_timeout, innobase_rollback_on_timeout,
19493 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19494 "Roll back the complete transaction on lock wait timeout, for 4.x compatibility (disabled by default)",
19495 NULL, NULL, FALSE);
19496
19497 static MYSQL_SYSVAR_BOOL(status_file, innobase_create_status_file,
19498 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NOSYSVAR,
19499 "Enable SHOW ENGINE INNODB STATUS output in the innodb_status.<pid> file",
19500 NULL, NULL, FALSE);
19501
19502 static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
19503 PLUGIN_VAR_OPCMDARG,
19504 "Enable statistics gathering for metadata commands such as"
19505 " SHOW TABLE STATUS for tables that use transient statistics (off by default)",
19506 NULL, NULL, FALSE);
19507
19508 static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_transient_sample_pages,
19509 PLUGIN_VAR_RQCMDARG,
19510 "Deprecated, use innodb_stats_transient_sample_pages instead",
19511 NULL, innodb_stats_sample_pages_update, 8, 1, ~0ULL, 0);
19512
19513 static MYSQL_SYSVAR_ULONGLONG(stats_transient_sample_pages,
19514 srv_stats_transient_sample_pages,
19515 PLUGIN_VAR_RQCMDARG,
19516 "The number of leaf index pages to sample when calculating transient"
19517 " statistics (if persistent statistics are not used, default 8)",
19518 NULL, NULL, 8, 1, ~0ULL, 0);
19519
19520 static MYSQL_SYSVAR_BOOL(stats_persistent, srv_stats_persistent,
19521 PLUGIN_VAR_OPCMDARG,
19522 "InnoDB persistent statistics enabled for all tables unless overridden"
19523 " at table level",
19524 NULL, NULL, TRUE);
19525
19526 static MYSQL_SYSVAR_BOOL(stats_auto_recalc, srv_stats_auto_recalc,
19527 PLUGIN_VAR_OPCMDARG,
19528 "InnoDB automatic recalculation of persistent statistics enabled for all"
19529 " tables unless overridden at table level (automatic recalculation is only"
19530 " done when InnoDB decides that the table has changed too much and needs a"
19531 " new statistics)",
19532 NULL, NULL, TRUE);
19533
19534 static MYSQL_SYSVAR_ULONGLONG(stats_persistent_sample_pages,
19535 srv_stats_persistent_sample_pages,
19536 PLUGIN_VAR_RQCMDARG,
19537 "The number of leaf index pages to sample when calculating persistent"
19538 " statistics (by ANALYZE, default 20)",
19539 NULL, NULL, 20, 1, ~0ULL, 0);
19540
19541 static MYSQL_SYSVAR_ULONGLONG(stats_modified_counter, srv_stats_modified_counter,
19542 PLUGIN_VAR_RQCMDARG,
19543 "The number of rows modified before we calculate new statistics (default 0 = current limits)",
19544 NULL, NULL, 0, 0, ~0ULL, 0);
19545
19546 static MYSQL_SYSVAR_BOOL(stats_traditional, srv_stats_sample_traditional,
19547 PLUGIN_VAR_RQCMDARG,
19548 "Enable traditional statistic calculation based on number of configured pages (default true)",
19549 NULL, NULL, TRUE);
19550
19551 #ifdef BTR_CUR_HASH_ADAPT
19552 static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled,
19553 PLUGIN_VAR_OPCMDARG,
19554 "Enable InnoDB adaptive hash index (enabled by default). "
19555 " Disable with --skip-innodb-adaptive-hash-index.",
19556 NULL, innodb_adaptive_hash_index_update, true);
19557
19558 /** Number of distinct partitions of AHI.
19559 Each partition is protected by its own latch and so we have parts number
19560 of latches protecting complete search system. */
19561 static MYSQL_SYSVAR_ULONG(adaptive_hash_index_parts, btr_ahi_parts,
19562 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19563 "Number of InnoDB Adaptive Hash Index Partitions (default 8)",
19564 NULL, NULL, 8, 1, 512, 0);
19565 #endif /* BTR_CUR_HASH_ADAPT */
19566
19567 static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
19568 PLUGIN_VAR_RQCMDARG,
19569 "Replication thread delay (ms) on the slave server if"
19570 " innodb_thread_concurrency is reached (0 by default)",
19571 NULL, NULL, 0, 0, ~0UL, 0);
19572
19573 static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
19574 PLUGIN_VAR_RQCMDARG,
19575 "Compression level used for zlib compression. 0 is no compression"
19576 ", 1 is fastest, 9 is best compression and default is 6.",
19577 NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
19578
19579 static MYSQL_SYSVAR_BOOL(log_compressed_pages, page_zip_log_pages,
19580 PLUGIN_VAR_OPCMDARG,
19581 "Enables/disables the logging of entire compressed page images."
19582 " InnoDB logs the compressed pages to prevent corruption if"
19583 " the zlib compression algorithm changes."
19584 " When turned OFF, InnoDB will assume that the zlib"
19585 " compression algorithm doesn't change.",
19586 NULL, NULL, TRUE);
19587
19588 static MYSQL_SYSVAR_BOOL(log_optimize_ddl, innodb_log_optimize_ddl,
19589 PLUGIN_VAR_OPCMDARG,
19590 "DEPRECATED. Ignored in MariaDB 10.5."
19591 " Reduce redo logging when natively creating indexes or rebuilding tables."
19592 " Enabling this may slow down backup and cause delay due to page flushing.",
19593 NULL, NULL, FALSE);
19594
19595 static MYSQL_SYSVAR_ULONG(autoextend_increment,
19596 sys_tablespace_auto_extend_increment,
19597 PLUGIN_VAR_RQCMDARG,
19598 "Data file autoextend increment in megabytes",
19599 NULL, NULL, 64L, 1L, 1000L, 0);
19600
19601 static MYSQL_SYSVAR_ULONG(buffer_pool_chunk_size, srv_buf_pool_chunk_unit,
19602 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19603 "Size of a single memory chunk within each buffer pool instance"
19604 " for resizing buffer pool. Online buffer pool resizing happens"
19605 " at this granularity. 0 means disable resizing buffer pool.",
19606 NULL, NULL,
19607 128 * 1024 * 1024, 1024 * 1024, LONG_MAX, 1024 * 1024);
19608
19609 #if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
19610 static MYSQL_SYSVAR_ULONG(page_hash_locks, srv_n_page_hash_locks,
19611 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19612 "Number of rw_locks protecting buffer pool page_hash. Rounded up to the next power of 2",
19613 NULL, NULL, 16, 1, MAX_PAGE_HASH_LOCKS, 0);
19614
19615 static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size,
19616 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19617 "Number of pages reserved in doublewrite buffer for batch flushing",
19618 NULL, NULL, 120, 1, 127, 0);
19619 #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
19620
19621 static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm,
19622 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19623 "The algorithm Innodb uses for deciding which locks to grant next when"
19624 " a lock is released. Possible values are"
19625 " FCFS"
19626 " grant the locks in First-Come-First-Served order;"
19627 " VATS"
19628 " use the Variance-Aware-Transaction-Scheduling algorithm, which"
19629 " uses an Eldest-Transaction-First heuristic.",
19630 NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
19631 &innodb_lock_schedule_algorithm_typelib);
19632
19633 static MYSQL_SYSVAR_ULONG(buffer_pool_instances, srv_buf_pool_instances,
19634 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19635 "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
19636 NULL, NULL, srv_buf_pool_instances_default, 0, MAX_BUFFER_POOLS, 0);
19637
19638 static MYSQL_SYSVAR_STR(buffer_pool_filename, srv_buf_dump_filename,
19639 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
19640 "Filename to/from which to dump/load the InnoDB buffer pool",
19641 innodb_srv_buf_dump_filename_validate, NULL, SRV_BUF_DUMP_FILENAME_DEFAULT);
19642
19643 static MYSQL_SYSVAR_BOOL(buffer_pool_dump_now, innodb_buffer_pool_dump_now,
19644 PLUGIN_VAR_RQCMDARG,
19645 "Trigger an immediate dump of the buffer pool into a file named @@innodb_buffer_pool_filename",
19646 NULL, buffer_pool_dump_now, FALSE);
19647
19648 static MYSQL_SYSVAR_BOOL(buffer_pool_dump_at_shutdown, srv_buffer_pool_dump_at_shutdown,
19649 PLUGIN_VAR_RQCMDARG,
19650 "Dump the buffer pool into a file named @@innodb_buffer_pool_filename",
19651 NULL, NULL, TRUE);
19652
19653 static MYSQL_SYSVAR_ULONG(buffer_pool_dump_pct, srv_buf_pool_dump_pct,
19654 PLUGIN_VAR_RQCMDARG,
19655 "Dump only the hottest N% of each buffer pool, defaults to 25",
19656 NULL, NULL, 25, 1, 100, 0);
19657
19658 #ifdef UNIV_DEBUG
19659 /* Added to test the innodb_buffer_pool_load_incomplete status variable. */
19660 static MYSQL_SYSVAR_ULONG(buffer_pool_load_pages_abort, srv_buf_pool_load_pages_abort,
19661 PLUGIN_VAR_RQCMDARG,
19662 "Number of pages during a buffer pool load to process before signaling innodb_buffer_pool_load_abort=1",
19663 NULL, NULL, LONG_MAX, 1, LONG_MAX, 0);
19664
19665 static MYSQL_SYSVAR_STR(buffer_pool_evict, srv_buffer_pool_evict,
19666 PLUGIN_VAR_RQCMDARG,
19667 "Evict pages from the buffer pool",
19668 NULL, innodb_buffer_pool_evict_update, "");
19669 #endif /* UNIV_DEBUG */
19670
19671 static MYSQL_SYSVAR_BOOL(buffer_pool_load_now, innodb_buffer_pool_load_now,
19672 PLUGIN_VAR_RQCMDARG,
19673 "Trigger an immediate load of the buffer pool from a file named @@innodb_buffer_pool_filename",
19674 NULL, buffer_pool_load_now, FALSE);
19675
19676 static MYSQL_SYSVAR_BOOL(buffer_pool_load_abort, innodb_buffer_pool_load_abort,
19677 PLUGIN_VAR_RQCMDARG,
19678 "Abort a currently running load of the buffer pool",
19679 NULL, buffer_pool_load_abort, FALSE);
19680
19681 /* there is no point in changing this during runtime, thus readonly */
19682 static MYSQL_SYSVAR_BOOL(buffer_pool_load_at_startup, srv_buffer_pool_load_at_startup,
19683 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19684 "Load the buffer pool from a file named @@innodb_buffer_pool_filename",
19685 NULL, NULL, TRUE);
19686
19687 static MYSQL_SYSVAR_BOOL(defragment, srv_defragment,
19688 PLUGIN_VAR_RQCMDARG,
19689 "Enable/disable InnoDB defragmentation (default FALSE). When set to FALSE, all existing "
19690 "defragmentation will be paused. And new defragmentation command will fail."
19691 "Paused defragmentation commands will resume when this variable is set to "
19692 "true again.",
19693 NULL, NULL, FALSE);
19694
19695 static MYSQL_SYSVAR_UINT(defragment_n_pages, srv_defragment_n_pages,
19696 PLUGIN_VAR_RQCMDARG,
19697 "Number of pages considered at once when merging multiple pages to "
19698 "defragment",
19699 NULL, NULL, 7, 2, 32, 0);
19700
19701 static MYSQL_SYSVAR_UINT(defragment_stats_accuracy,
19702 srv_defragment_stats_accuracy,
19703 PLUGIN_VAR_RQCMDARG,
19704 "How many defragment stats changes there are before the stats "
19705 "are written to persistent storage. Set to 0 meaning disable "
19706 "defragment stats tracking.",
19707 NULL, NULL, 0, 0, ~0U, 0);
19708
19709 static MYSQL_SYSVAR_UINT(defragment_fill_factor_n_recs,
19710 srv_defragment_fill_factor_n_recs,
19711 PLUGIN_VAR_RQCMDARG,
19712 "How many records of space defragmentation should leave on the page. "
19713 "This variable, together with innodb_defragment_fill_factor, is introduced "
19714 "so defragmentation won't pack the page too full and cause page split on "
19715 "the next insert on every page. The variable indicating more defragmentation"
19716 " gain is the one effective.",
19717 NULL, NULL, 20, 1, 100, 0);
19718
19719 static MYSQL_SYSVAR_DOUBLE(defragment_fill_factor, srv_defragment_fill_factor,
19720 PLUGIN_VAR_RQCMDARG,
19721 "A number between [0.7, 1] that tells defragmentation how full it should "
19722 "fill a page. Default is 0.9. Number below 0.7 won't make much sense."
19723 "This variable, together with innodb_defragment_fill_factor_n_recs, is "
19724 "introduced so defragmentation won't pack the page too full and cause "
19725 "page split on the next insert on every page. The variable indicating more "
19726 "defragmentation gain is the one effective.",
19727 NULL, NULL, 0.9, 0.7, 1, 0);
19728
19729 static MYSQL_SYSVAR_UINT(defragment_frequency, srv_defragment_frequency,
19730 PLUGIN_VAR_RQCMDARG,
19731 "Do not defragment a single index more than this number of time per second."
19732 "This controls the number of time defragmentation thread can request X_LOCK "
19733 "on an index. Defragmentation thread will check whether "
19734 "1/defragment_frequency (s) has passed since it worked on this index last "
19735 "time, and put the index back to the queue if not enough time has passed. "
19736 "The actual frequency can only be lower than this given number.",
19737 NULL, innodb_defragment_frequency_update,
19738 SRV_DEFRAGMENT_FREQUENCY_DEFAULT, 1, 1000, 0);
19739
19740
19741 static MYSQL_SYSVAR_ULONG(lru_scan_depth, srv_LRU_scan_depth,
19742 PLUGIN_VAR_RQCMDARG,
19743 "How deep to scan LRU to keep it clean",
19744 NULL, NULL, 1024, 100, ~0UL, 0);
19745
19746 static MYSQL_SYSVAR_ULONG(flush_neighbors, srv_flush_neighbors,
19747 PLUGIN_VAR_OPCMDARG,
19748 "Set to 0 (don't flush neighbors from buffer pool),"
19749 " 1 (flush contiguous neighbors from buffer pool)"
19750 " or 2 (flush neighbors from buffer pool),"
19751 " when flushing a block",
19752 NULL, NULL, 1, 0, 2, 0);
19753
19754 static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
19755 PLUGIN_VAR_RQCMDARG,
19756 "Helps in performance tuning in heavily concurrent environments.",
19757 innobase_commit_concurrency_validate, NULL, 0, 0, 1000, 0);
19758
19759 static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
19760 PLUGIN_VAR_RQCMDARG,
19761 "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
19762 NULL, NULL, 5000L, 1L, ~0UL, 0);
19763
19764 static MYSQL_SYSVAR_BOOL(deadlock_detect, innobase_deadlock_detect,
19765 PLUGIN_VAR_NOCMDARG,
19766 "Enable/disable InnoDB deadlock detector (default ON)."
19767 " if set to OFF, deadlock detection is skipped,"
19768 " and we rely on innodb_lock_wait_timeout in case of deadlock.",
19769 NULL, NULL, TRUE);
19770
19771 static MYSQL_SYSVAR_UINT(fill_factor, innobase_fill_factor,
19772 PLUGIN_VAR_RQCMDARG,
19773 "Percentage of B-tree page filled during bulk insert",
19774 NULL, NULL, 100, 10, 100, 0);
19775
19776 static MYSQL_SYSVAR_BOOL(ft_enable_diag_print, fts_enable_diag_print,
19777 PLUGIN_VAR_OPCMDARG,
19778 "Whether to enable additional FTS diagnostic printout ",
19779 NULL, NULL, FALSE);
19780
19781 static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
19782 PLUGIN_VAR_OPCMDARG,
19783 "Whether to disable OS system file cache for sort I/O",
19784 NULL, NULL, FALSE);
19785
19786 static MYSQL_SYSVAR_STR(ft_aux_table, innodb_ft_aux_table,
19787 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
19788 "FTS internal auxiliary table to be checked",
19789 innodb_ft_aux_table_validate, NULL, NULL);
19790
19791 static MYSQL_SYSVAR_ULONG(ft_cache_size, fts_max_cache_size,
19792 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19793 "InnoDB Fulltext search cache size in bytes",
19794 NULL, NULL, 8000000, 1600000, 80000000, 0);
19795
19796 static MYSQL_SYSVAR_ULONG(ft_total_cache_size, fts_max_total_cache_size,
19797 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19798 "Total memory allocated for InnoDB Fulltext Search cache",
19799 NULL, NULL, 640000000, 32000000, 1600000000, 0);
19800
19801 static MYSQL_SYSVAR_SIZE_T(ft_result_cache_limit, fts_result_cache_limit,
19802 PLUGIN_VAR_RQCMDARG,
19803 "InnoDB Fulltext search query result cache limit in bytes",
19804 NULL, NULL, 2000000000L, 1000000L, SIZE_T_MAX, 0);
19805
19806 static MYSQL_SYSVAR_ULONG(ft_min_token_size, fts_min_token_size,
19807 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19808 "InnoDB Fulltext search minimum token size in characters",
19809 NULL, NULL, 3, 0, 16, 0);
19810
19811 static MYSQL_SYSVAR_ULONG(ft_max_token_size, fts_max_token_size,
19812 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19813 "InnoDB Fulltext search maximum token size in characters",
19814 NULL, NULL, FTS_MAX_WORD_LEN_IN_CHAR, 10, FTS_MAX_WORD_LEN_IN_CHAR, 0);
19815
19816 static MYSQL_SYSVAR_ULONG(ft_num_word_optimize, fts_num_word_optimize,
19817 PLUGIN_VAR_OPCMDARG,
19818 "InnoDB Fulltext search number of words to optimize for each optimize table call ",
19819 NULL, NULL, 2000, 1000, 10000, 0);
19820
19821 static MYSQL_SYSVAR_ULONG(ft_sort_pll_degree, fts_sort_pll_degree,
19822 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19823 "InnoDB Fulltext search parallel sort degree, will round up to nearest power of 2 number",
19824 NULL, NULL, 2, 1, 16, 0);
19825
19826 static MYSQL_SYSVAR_ULONG(sort_buffer_size, srv_sort_buf_size,
19827 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19828 "Memory buffer size for index creation",
19829 NULL, NULL, 1048576, 65536, 64<<20, 0);
19830
19831 static MYSQL_SYSVAR_ULONGLONG(online_alter_log_max_size, srv_online_max_size,
19832 PLUGIN_VAR_RQCMDARG,
19833 "Maximum modification log file size for online index creation",
19834 NULL, NULL, 128<<20, 65536, ~0ULL, 0);
19835
19836 static MYSQL_SYSVAR_BOOL(optimize_fulltext_only, innodb_optimize_fulltext_only,
19837 PLUGIN_VAR_NOCMDARG,
19838 "Only optimize the Fulltext index of the table",
19839 NULL, NULL, FALSE);
19840
19841 static MYSQL_SYSVAR_ULONG(read_io_threads, srv_n_read_io_threads,
19842 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19843 "Number of background read I/O threads in InnoDB.",
19844 NULL, NULL, 4, 1, 64, 0);
19845
19846 static MYSQL_SYSVAR_ULONG(write_io_threads, srv_n_write_io_threads,
19847 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19848 "Number of background write I/O threads in InnoDB.",
19849 NULL, NULL, 4, 1, 64, 0);
19850
19851 static MYSQL_SYSVAR_ULONG(force_recovery, srv_force_recovery,
19852 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19853 "Helps to save your data in case the disk image of the database becomes corrupt.",
19854 NULL, NULL, 0, 0, 6, 0);
19855
19856 static MYSQL_SYSVAR_ULONG(page_size, srv_page_size,
19857 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19858 "Page size to use for all InnoDB tablespaces.",
19859 NULL, NULL, UNIV_PAGE_SIZE_DEF,
19860 UNIV_PAGE_SIZE_MIN, UNIV_PAGE_SIZE_MAX, 0);
19861
19862 static MYSQL_SYSVAR_ULONG(log_buffer_size, srv_log_buffer_size,
19863 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19864 "The size of the buffer which InnoDB uses to write log to the log files on disk.",
19865 NULL, NULL, 16L << 20, 256L << 10, LONG_MAX, 1024);
19866
19867 static MYSQL_SYSVAR_ULONGLONG(log_file_size, srv_log_file_size,
19868 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19869 "Size of each log file in a log group.",
19870 NULL, NULL, 48 << 20, 1 << 20, log_group_max_size, UNIV_PAGE_SIZE_MAX);
19871 /* OS_FILE_LOG_BLOCK_SIZE would be more appropriate than UNIV_PAGE_SIZE_MAX,
19872 but fil_space_t is being used for the redo log, and it uses data pages. */
19873
19874 static MYSQL_SYSVAR_ULONG(log_files_in_group, srv_n_log_files,
19875 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19876 "Number of log files in the log group. InnoDB writes to the files in a circular fashion.",
19877 NULL, NULL, 2, 1, SRV_N_LOG_FILES_MAX, 0);
19878
19879 static MYSQL_SYSVAR_ULONG(log_write_ahead_size, srv_log_write_ahead_size,
19880 PLUGIN_VAR_RQCMDARG,
19881 "Redo log write ahead unit size to avoid read-on-write,"
19882 " it should match the OS cache block IO size",
19883 NULL, innodb_log_write_ahead_size_update,
19884 8*1024L, OS_FILE_LOG_BLOCK_SIZE, UNIV_PAGE_SIZE_DEF, OS_FILE_LOG_BLOCK_SIZE);
19885
19886 static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct,
19887 PLUGIN_VAR_RQCMDARG,
19888 "Percentage of the buffer pool to reserve for 'old' blocks.",
19889 NULL, innodb_old_blocks_pct_update, 100 * 3 / 8, 5, 95, 0);
19890
19891 static MYSQL_SYSVAR_UINT(old_blocks_time, buf_LRU_old_threshold_ms,
19892 PLUGIN_VAR_RQCMDARG,
19893 "Move blocks to the 'new' end of the buffer pool if the first access"
19894 " was at least this many milliseconds ago."
19895 " The timeout is disabled if 0.",
19896 NULL, NULL, 1000, 0, UINT_MAX32, 0);
19897
19898 static MYSQL_SYSVAR_ULONG(open_files, innobase_open_files,
19899 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19900 "How many files at the maximum InnoDB keeps open at the same time.",
19901 NULL, NULL, 0, 0, LONG_MAX, 0);
19902
19903 static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
19904 PLUGIN_VAR_RQCMDARG,
19905 "Count of spin-loop rounds in InnoDB mutexes (30 by default)",
19906 NULL, NULL, 30L, 0L, ~0UL, 0);
19907
19908 static MYSQL_SYSVAR_UINT(spin_wait_delay, srv_spin_wait_delay,
19909 PLUGIN_VAR_OPCMDARG,
19910 "Maximum delay between polling for a spin lock (4 by default)",
19911 NULL, NULL, 4, 0, 6000, 0);
19912
19913 static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
19914 PLUGIN_VAR_RQCMDARG,
19915 "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
19916 NULL, NULL, 0, 0, 1000, 0);
19917
19918 static MYSQL_SYSVAR_ULONG(
19919 adaptive_max_sleep_delay, srv_adaptive_max_sleep_delay,
19920 PLUGIN_VAR_RQCMDARG,
19921 "The upper limit of the sleep delay in usec. Value of 0 disables it.",
19922 NULL, NULL,
19923 150000, /* Default setting */
19924 0, /* Minimum value */
19925 1000000, 0); /* Maximum value */
19926
19927 static MYSQL_SYSVAR_BOOL(prefix_index_cluster_optimization,
19928 srv_prefix_index_cluster_optimization,
19929 PLUGIN_VAR_OPCMDARG,
19930 "Enable prefix optimization to sometimes avoid cluster index lookups.",
19931 NULL, NULL, FALSE);
19932
19933 static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay,
19934 PLUGIN_VAR_RQCMDARG,
19935 "Time of innodb thread sleeping before joining InnoDB queue (usec)."
19936 " Value 0 disable a sleep",
19937 NULL, NULL,
19938 10000L,
19939 0L,
19940 1000000L, 0);
19941
19942 static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path,
19943 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19944 "Path to individual files and their sizes.",
19945 NULL, NULL, "ibdata1:12M:autoextend");
19946
19947 static MYSQL_SYSVAR_STR(temp_data_file_path, innobase_temp_data_file_path,
19948 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19949 "Path to files and their sizes making temp-tablespace.",
19950 NULL, NULL, "ibtmp1:12M:autoextend");
19951
19952 static MYSQL_SYSVAR_STR(undo_directory, srv_undo_dir,
19953 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19954 "Directory where undo tablespace files live, this path can be absolute.",
19955 NULL, NULL, NULL);
19956
19957 static MYSQL_SYSVAR_ULONG(undo_tablespaces, srv_undo_tablespaces,
19958 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19959 "Number of undo tablespaces to use.",
19960 NULL, NULL,
19961 0L, /* Default seting */
19962 0L, /* Minimum value */
19963 TRX_SYS_MAX_UNDO_SPACES, 0); /* Maximum value */
19964
19965 static MYSQL_SYSVAR_ULONG(undo_logs, srv_undo_logs,
19966 PLUGIN_VAR_OPCMDARG,
19967 "Number of undo logs to use.",
19968 NULL, NULL,
19969 TRX_SYS_N_RSEGS, /* Default setting */
19970 1, /* Minimum value */
19971 TRX_SYS_N_RSEGS, 0); /* Maximum value */
19972
19973 static MYSQL_SYSVAR_ULONGLONG(max_undo_log_size, srv_max_undo_log_size,
19974 PLUGIN_VAR_OPCMDARG,
19975 "Desired maximum UNDO tablespace size in bytes",
19976 NULL, NULL,
19977 10 << 20, 10 << 20,
19978 1ULL << (32 + UNIV_PAGE_SIZE_SHIFT_MAX), 0);
19979
19980 static MYSQL_SYSVAR_ULONG(purge_rseg_truncate_frequency,
19981 srv_purge_rseg_truncate_frequency,
19982 PLUGIN_VAR_OPCMDARG,
19983 "Dictates rate at which UNDO records are purged. Value N means"
19984 " purge rollback segment(s) on every Nth iteration of purge invocation",
19985 NULL, NULL, 128, 1, 128, 0);
19986
19987 static MYSQL_SYSVAR_BOOL(undo_log_truncate, srv_undo_log_truncate,
19988 PLUGIN_VAR_OPCMDARG,
19989 "Enable or Disable Truncate of UNDO tablespace.",
19990 NULL, NULL, FALSE);
19991
19992 /* Alias for innodb_undo_logs, this config variable is deprecated. */
19993 static MYSQL_SYSVAR_ULONG(rollback_segments, srv_undo_logs,
19994 PLUGIN_VAR_OPCMDARG,
19995 "Number of undo logs to use (deprecated).",
19996 NULL, NULL,
19997 TRX_SYS_N_RSEGS, /* Default setting */
19998 1, /* Minimum value */
19999 TRX_SYS_N_RSEGS, 0); /* Maximum value */
20000
20001 static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
20002 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
20003 "The AUTOINC lock modes supported by InnoDB:"
20004 " 0 => Old style AUTOINC locking (for backward compatibility);"
20005 " 1 => New style AUTOINC locking;"
20006 " 2 => No AUTOINC locking (unsafe for SBR)",
20007 NULL, NULL,
20008 AUTOINC_NEW_STYLE_LOCKING, /* Default setting */
20009 AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */
20010 AUTOINC_NO_LOCKING, 0); /* Maximum value */
20011
20012 static MYSQL_SYSVAR_STR(version, innodb_version_str,
20013 PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
20014 "InnoDB version", NULL, NULL, INNODB_VERSION_STR);
20015
20016 static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
20017 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
20018 "Use native AIO if supported on this platform.",
20019 NULL, NULL, TRUE);
20020
20021 #ifdef HAVE_LIBNUMA
20022 static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
20023 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
20024 "Use NUMA interleave memory policy to allocate InnoDB buffer pool.",
20025 NULL, NULL, FALSE);
20026 #endif /* HAVE_LIBNUMA */
20027
20028 static MYSQL_SYSVAR_ENUM(change_buffering, innodb_change_buffering,
20029 PLUGIN_VAR_RQCMDARG,
20030 "Buffer changes to secondary indexes.",
20031 NULL, NULL, IBUF_USE_ALL, &innodb_change_buffering_typelib);
20032
20033 static MYSQL_SYSVAR_UINT(change_buffer_max_size,
20034 srv_change_buffer_max_size,
20035 PLUGIN_VAR_RQCMDARG,
20036 "Maximum on-disk size of change buffer in terms of percentage"
20037 " of the buffer pool.",
20038 NULL, innodb_change_buffer_max_size_update,
20039 CHANGE_BUFFER_DEFAULT_SIZE, 0, 50, 0);
20040
20041 static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
20042 PLUGIN_VAR_RQCMDARG,
20043 "Specifies how InnoDB index statistics collection code should"
20044 " treat NULLs. Possible values are NULLS_EQUAL (default),"
20045 " NULLS_UNEQUAL and NULLS_IGNORED",
20046 NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib);
20047
20048 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
20049 static MYSQL_SYSVAR_BOOL(change_buffer_dump, ibuf_dump,
20050 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
20051 "Dump the change buffer at startup.",
20052 NULL, NULL, FALSE);
20053
20054 static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
20055 PLUGIN_VAR_RQCMDARG,
20056 "Debug flags for InnoDB change buffering (0=none, 1=try to buffer)",
20057 NULL, NULL, 0, 0, 1, 0);
20058
20059 static MYSQL_SYSVAR_BOOL(disable_background_merge,
20060 srv_ibuf_disable_background_merge,
20061 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_RQCMDARG,
20062 "Disable change buffering merges by the master thread",
20063 NULL, NULL, FALSE);
20064 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
20065
20066 static MYSQL_SYSVAR_ULONG(buf_dump_status_frequency, srv_buf_dump_status_frequency,
20067 PLUGIN_VAR_RQCMDARG,
20068 "A number between [0, 100] that tells how oftern buffer pool dump status "
20069 "in percentages should be printed. E.g. 10 means that buffer pool dump "
20070 "status is printed when every 10% of number of buffer pool pages are "
20071 "dumped. Default is 0 (only start and end status is printed).",
20072 NULL, NULL, 0, 0, 100, 0);
20073
20074 #ifdef WITH_INNODB_DISALLOW_WRITES
20075 /*******************************************************
20076 * innobase_disallow_writes variable definition *
20077 *******************************************************/
20078
20079 /* Must always init to FALSE. */
20080 static my_bool innobase_disallow_writes = FALSE;
20081
20082 /**************************************************************************
20083 An "update" method for innobase_disallow_writes variable. */
20084 static
20085 void
innobase_disallow_writes_update(THD *,st_mysql_sys_var *,void * var_ptr,const void * save)20086 innobase_disallow_writes_update(THD*, st_mysql_sys_var*,
20087 void* var_ptr, const void* save)
20088 {
20089 const my_bool val = *static_cast<const my_bool*>(save);
20090 *static_cast<my_bool*>(var_ptr) = val;
20091 ut_a(srv_allow_writes_event);
20092 mysql_mutex_unlock(&LOCK_global_system_variables);
20093 if (val) {
20094 os_event_reset(srv_allow_writes_event);
20095 } else {
20096 os_event_set(srv_allow_writes_event);
20097 }
20098 mysql_mutex_lock(&LOCK_global_system_variables);
20099 }
20100
20101 static MYSQL_SYSVAR_BOOL(disallow_writes, innobase_disallow_writes,
20102 PLUGIN_VAR_NOCMDOPT,
20103 "Tell InnoDB to stop any writes to disk",
20104 NULL, innobase_disallow_writes_update, FALSE);
20105 #endif /* WITH_INNODB_DISALLOW_WRITES */
20106
20107 static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
20108 PLUGIN_VAR_NOCMDARG,
20109 "Whether to use read ahead for random access within an extent.",
20110 NULL, NULL, FALSE);
20111
20112 static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
20113 PLUGIN_VAR_RQCMDARG,
20114 "Number of pages that must be accessed sequentially for InnoDB to"
20115 " trigger a readahead.",
20116 NULL, NULL, 56, 0, 64, 0);
20117
20118 static MYSQL_SYSVAR_STR(monitor_enable, innobase_enable_monitor_counter,
20119 PLUGIN_VAR_RQCMDARG,
20120 "Turn on a monitor counter",
20121 innodb_monitor_validate,
20122 innodb_enable_monitor_update, NULL);
20123
20124 static MYSQL_SYSVAR_STR(monitor_disable, innobase_disable_monitor_counter,
20125 PLUGIN_VAR_RQCMDARG,
20126 "Turn off a monitor counter",
20127 innodb_monitor_validate,
20128 innodb_disable_monitor_update, NULL);
20129
20130 static MYSQL_SYSVAR_STR(monitor_reset, innobase_reset_monitor_counter,
20131 PLUGIN_VAR_RQCMDARG,
20132 "Reset a monitor counter",
20133 innodb_monitor_validate,
20134 innodb_reset_monitor_update, NULL);
20135
20136 static MYSQL_SYSVAR_STR(monitor_reset_all, innobase_reset_all_monitor_counter,
20137 PLUGIN_VAR_RQCMDARG,
20138 "Reset all values for a monitor counter",
20139 innodb_monitor_validate,
20140 innodb_reset_all_monitor_update, NULL);
20141
20142 static MYSQL_SYSVAR_BOOL(status_output, srv_print_innodb_monitor,
20143 PLUGIN_VAR_OPCMDARG, "Enable InnoDB monitor output to the error log.",
20144 NULL, innodb_status_output_update, FALSE);
20145
20146 static MYSQL_SYSVAR_BOOL(status_output_locks, srv_print_innodb_lock_monitor,
20147 PLUGIN_VAR_OPCMDARG, "Enable InnoDB lock monitor output to the error log."
20148 " Requires innodb_status_output=ON.",
20149 NULL, innodb_status_output_update, FALSE);
20150
20151 static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks,
20152 PLUGIN_VAR_OPCMDARG,
20153 "Print all deadlocks to MariaDB error log (off by default)",
20154 NULL, NULL, FALSE);
20155
20156 static MYSQL_SYSVAR_ULONG(compression_failure_threshold_pct,
20157 zip_failure_threshold_pct, PLUGIN_VAR_OPCMDARG,
20158 "If the compression failure rate of a table is greater than this number"
20159 " more padding is added to the pages to reduce the failures. A value of"
20160 " zero implies no padding",
20161 NULL, NULL, 5, 0, 100, 0);
20162
20163 static MYSQL_SYSVAR_ULONG(compression_pad_pct_max,
20164 zip_pad_max, PLUGIN_VAR_OPCMDARG,
20165 "Percentage of empty space on a data page that can be reserved"
20166 " to make the page compressible.",
20167 NULL, NULL, 50, 0, 75, 0);
20168
20169 static MYSQL_SYSVAR_BOOL(read_only, srv_read_only_mode,
20170 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20171 "Start InnoDB in read only mode (off by default)",
20172 NULL, NULL, FALSE);
20173
20174 static MYSQL_SYSVAR_BOOL(cmp_per_index_enabled, srv_cmp_per_index_enabled,
20175 PLUGIN_VAR_OPCMDARG,
20176 "Enable INFORMATION_SCHEMA.innodb_cmp_per_index,"
20177 " may have negative impact on performance (off by default)",
20178 NULL, innodb_cmp_per_index_update, FALSE);
20179
20180 static MYSQL_SYSVAR_ENUM(default_row_format, innodb_default_row_format,
20181 PLUGIN_VAR_RQCMDARG,
20182 "The default ROW FORMAT for all innodb tables created without explicit"
20183 " ROW_FORMAT. Possible values are REDUNDANT, COMPACT, and DYNAMIC."
20184 " The ROW_FORMAT value COMPRESSED is not allowed",
20185 NULL, NULL, DEFAULT_ROW_FORMAT_DYNAMIC,
20186 &innodb_default_row_format_typelib);
20187
20188 #ifdef UNIV_DEBUG
20189 static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug,
20190 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_NOCMDOPT,
20191 "Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()",
20192 NULL, NULL, 0, 0, 1024, 0);
20193
20194 static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug,
20195 btr_cur_limit_optimistic_insert_debug, PLUGIN_VAR_RQCMDARG,
20196 "Artificially limit the number of records per B-tree page (0=unlimited).",
20197 NULL, NULL, 0, 0, UINT_MAX32, 0);
20198
20199 static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
20200 srv_purge_view_update_only_debug, PLUGIN_VAR_NOCMDOPT,
20201 "Pause actual purging any delete-marked records, but merely update the purge view."
20202 " It is to create artificially the situation the purge view have been updated"
20203 " but the each purges were not done yet.",
20204 NULL, NULL, FALSE);
20205
20206 static MYSQL_SYSVAR_BOOL(evict_tables_on_commit_debug,
20207 innodb_evict_tables_on_commit_debug, PLUGIN_VAR_OPCMDARG,
20208 "On transaction commit, try to evict tables from the data dictionary cache.",
20209 NULL, NULL, FALSE);
20210
20211 static MYSQL_SYSVAR_UINT(data_file_size_debug,
20212 srv_sys_space_size_debug,
20213 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
20214 "InnoDB system tablespace size to be set in recovery.",
20215 NULL, NULL, 0, 0, 256U << 20, 0);
20216
20217 static MYSQL_SYSVAR_ULONG(fil_make_page_dirty_debug,
20218 srv_fil_make_page_dirty_debug, PLUGIN_VAR_OPCMDARG,
20219 "Make the first page of the given tablespace dirty.",
20220 NULL, innodb_make_page_dirty, 0, 0, UINT_MAX32, 0);
20221
20222 static MYSQL_SYSVAR_ULONG(saved_page_number_debug,
20223 srv_saved_page_number_debug, PLUGIN_VAR_OPCMDARG,
20224 "An InnoDB page number.",
20225 NULL, innodb_save_page_no, 0, 0, UINT_MAX32, 0);
20226
20227 static MYSQL_SYSVAR_BOOL(disable_resize_buffer_pool_debug,
20228 buf_disable_resize_buffer_pool_debug, PLUGIN_VAR_NOCMDARG,
20229 "Disable resizing buffer pool to make assertion code not expensive.",
20230 NULL, NULL, TRUE);
20231
20232 static MYSQL_SYSVAR_BOOL(page_cleaner_disabled_debug,
20233 innodb_page_cleaner_disabled_debug,
20234 PLUGIN_VAR_OPCMDARG,
20235 "Disable page cleaner",
20236 NULL, buf_flush_page_cleaner_disabled_debug_update, FALSE);
20237
20238 static MYSQL_SYSVAR_BOOL(sync_debug, srv_sync_debug,
20239 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20240 "Enable the sync debug checks",
20241 NULL, NULL, FALSE);
20242
20243 static MYSQL_SYSVAR_BOOL(dict_stats_disabled_debug,
20244 innodb_dict_stats_disabled_debug,
20245 PLUGIN_VAR_OPCMDARG,
20246 "Disable dict_stats thread",
20247 NULL, dict_stats_disabled_debug_update, FALSE);
20248
20249 static MYSQL_SYSVAR_BOOL(master_thread_disabled_debug,
20250 srv_master_thread_disabled_debug,
20251 PLUGIN_VAR_OPCMDARG,
20252 "Disable master thread",
20253 NULL, srv_master_thread_disabled_debug_update, FALSE);
20254 #endif /* UNIV_DEBUG */
20255
20256 static MYSQL_SYSVAR_BOOL(force_primary_key,
20257 srv_force_primary_key,
20258 PLUGIN_VAR_OPCMDARG,
20259 "Do not allow to create table without primary key (off by default)",
20260 NULL, NULL, FALSE);
20261
20262 static const char *page_compression_algorithms[]= { "none", "zlib", "lz4", "lzo", "lzma", "bzip2", "snappy", 0 };
20263 static TYPELIB page_compression_algorithms_typelib=
20264 {
20265 array_elements(page_compression_algorithms) - 1, 0,
20266 page_compression_algorithms, 0
20267 };
20268 static MYSQL_SYSVAR_ENUM(compression_algorithm, innodb_compression_algorithm,
20269 PLUGIN_VAR_OPCMDARG,
20270 "Compression algorithm used on page compression. One of: none, zlib, lz4, lzo, lzma, bzip2, or snappy",
20271 innodb_compression_algorithm_validate, NULL,
20272 /* We use here the largest number of supported compression method to
20273 enable all those methods that are available. Availability of compression
20274 method is verified on innodb_compression_algorithm_validate function. */
20275 PAGE_ZLIB_ALGORITHM,
20276 &page_compression_algorithms_typelib);
20277
20278 static MYSQL_SYSVAR_ULONG(fatal_semaphore_wait_threshold, srv_fatal_semaphore_wait_threshold,
20279 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
20280 "Maximum number of seconds that semaphore times out in InnoDB.",
20281 NULL, NULL,
20282 DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT, /* Default setting */
20283 1, /* Minimum setting */
20284 UINT_MAX32, /* Maximum setting */
20285 0);
20286
20287 static const char* srv_encrypt_tables_names[] = { "OFF", "ON", "FORCE", 0 };
20288 static TYPELIB srv_encrypt_tables_typelib = {
20289 array_elements(srv_encrypt_tables_names)-1, 0, srv_encrypt_tables_names,
20290 NULL
20291 };
20292 static MYSQL_SYSVAR_ENUM(encrypt_tables, srv_encrypt_tables,
20293 PLUGIN_VAR_OPCMDARG,
20294 "Enable encryption for tables. "
20295 "Don't forget to enable --innodb-encrypt-log too",
20296 innodb_encrypt_tables_validate,
20297 innodb_encrypt_tables_update,
20298 0,
20299 &srv_encrypt_tables_typelib);
20300
20301 static MYSQL_SYSVAR_UINT(encryption_threads, srv_n_fil_crypt_threads,
20302 PLUGIN_VAR_RQCMDARG,
20303 "Number of threads performing background key rotation and "
20304 "scrubbing",
20305 NULL,
20306 innodb_encryption_threads_update,
20307 0, 0, 255, 0);
20308
20309 static MYSQL_SYSVAR_UINT(encryption_rotate_key_age,
20310 srv_fil_crypt_rotate_key_age,
20311 PLUGIN_VAR_RQCMDARG,
20312 "Key rotation - re-encrypt in background "
20313 "all pages that were encrypted with a key that "
20314 "many (or more) versions behind. Value 0 indicates "
20315 "that key rotation is disabled.",
20316 NULL,
20317 innodb_encryption_rotate_key_age_update,
20318 1, 0, UINT_MAX32, 0);
20319
20320 static MYSQL_SYSVAR_UINT(encryption_rotation_iops, srv_n_fil_crypt_iops,
20321 PLUGIN_VAR_RQCMDARG,
20322 "Use this many iops for background key rotation",
20323 NULL,
20324 innodb_encryption_rotation_iops_update,
20325 srv_n_fil_crypt_iops, 0, UINT_MAX32, 0);
20326
20327 static MYSQL_SYSVAR_BOOL(scrub_log, srv_scrub_log,
20328 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20329 "Enable background redo log (ib_logfile0, ib_logfile1...) scrubbing",
20330 0, 0, 0);
20331
20332 static MYSQL_SYSVAR_ULONGLONG(scrub_log_speed, innodb_scrub_log_speed,
20333 PLUGIN_VAR_OPCMDARG,
20334 "Background redo log scrubbing speed in bytes/sec",
20335 NULL, NULL,
20336 256, /* 256 bytes/sec, corresponds to 2000 ms scrub_log_interval */
20337 1, /* min */
20338 50000, 0); /* 50Kbyte/sec, corresponds to 10 ms scrub_log_interval */
20339
20340 static MYSQL_SYSVAR_BOOL(encrypt_log, srv_encrypt_log,
20341 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20342 "Enable redo log encryption",
20343 NULL, NULL, FALSE);
20344
20345 static MYSQL_SYSVAR_BOOL(immediate_scrub_data_uncompressed,
20346 srv_immediate_scrub_data_uncompressed,
20347 0,
20348 "Enable scrubbing of data",
20349 NULL, NULL, FALSE);
20350
20351 static MYSQL_SYSVAR_BOOL(background_scrub_data_uncompressed,
20352 srv_background_scrub_data_uncompressed,
20353 0,
20354 "Enable scrubbing of uncompressed data by "
20355 "background threads (same as encryption_threads)",
20356 NULL, NULL, FALSE);
20357
20358 static MYSQL_SYSVAR_BOOL(background_scrub_data_compressed,
20359 srv_background_scrub_data_compressed,
20360 0,
20361 "Enable scrubbing of compressed data by "
20362 "background threads (same as encryption_threads)",
20363 NULL, NULL, FALSE);
20364
20365 static MYSQL_SYSVAR_UINT(background_scrub_data_check_interval,
20366 srv_background_scrub_data_check_interval,
20367 0,
20368 "check if spaces needs scrubbing every "
20369 "innodb_background_scrub_data_check_interval "
20370 "seconds",
20371 NULL, NULL,
20372 srv_background_scrub_data_check_interval,
20373 1,
20374 UINT_MAX32, 0);
20375
20376 static MYSQL_SYSVAR_UINT(background_scrub_data_interval,
20377 srv_background_scrub_data_interval,
20378 0,
20379 "scrub spaces that were last scrubbed longer than "
20380 " innodb_background_scrub_data_interval seconds ago",
20381 NULL, NULL,
20382 srv_background_scrub_data_interval,
20383 1,
20384 UINT_MAX32, 0);
20385
20386 #ifdef UNIV_DEBUG
20387 static MYSQL_SYSVAR_BOOL(debug_force_scrubbing,
20388 srv_scrub_force_testing,
20389 0,
20390 "Perform extra scrubbing to increase test exposure",
20391 NULL, NULL, FALSE);
20392
20393 char *innobase_debug_sync;
20394 static MYSQL_SYSVAR_STR(debug_sync, innobase_debug_sync,
20395 PLUGIN_VAR_NOCMDARG,
20396 "debug_sync for innodb purge threads. "
20397 "Use it to set up sync points for all purge threads "
20398 "at once. The commands will be applied sequentially at"
20399 " the beginning of purging the next undo record.",
20400 NULL,
20401 innobase_debug_sync_set, NULL);
20402 #endif /* UNIV_DEBUG */
20403
20404 static MYSQL_SYSVAR_BOOL(encrypt_temporary_tables, innodb_encrypt_temporary_tables,
20405 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20406 "Enrypt the temporary table data.",
20407 NULL, NULL, false);
20408
20409 static struct st_mysql_sys_var* innobase_system_variables[]= {
20410 MYSQL_SYSVAR(autoextend_increment),
20411 MYSQL_SYSVAR(buffer_pool_size),
20412 MYSQL_SYSVAR(buffer_pool_chunk_size),
20413 MYSQL_SYSVAR(buffer_pool_instances),
20414 MYSQL_SYSVAR(buffer_pool_filename),
20415 MYSQL_SYSVAR(buffer_pool_dump_now),
20416 MYSQL_SYSVAR(buffer_pool_dump_at_shutdown),
20417 MYSQL_SYSVAR(buffer_pool_dump_pct),
20418 #ifdef UNIV_DEBUG
20419 MYSQL_SYSVAR(buffer_pool_evict),
20420 #endif /* UNIV_DEBUG */
20421 MYSQL_SYSVAR(buffer_pool_load_now),
20422 MYSQL_SYSVAR(buffer_pool_load_abort),
20423 #ifdef UNIV_DEBUG
20424 MYSQL_SYSVAR(buffer_pool_load_pages_abort),
20425 #endif /* UNIV_DEBUG */
20426 MYSQL_SYSVAR(buffer_pool_load_at_startup),
20427 MYSQL_SYSVAR(defragment),
20428 MYSQL_SYSVAR(defragment_n_pages),
20429 MYSQL_SYSVAR(defragment_stats_accuracy),
20430 MYSQL_SYSVAR(defragment_fill_factor),
20431 MYSQL_SYSVAR(defragment_fill_factor_n_recs),
20432 MYSQL_SYSVAR(defragment_frequency),
20433 MYSQL_SYSVAR(lru_scan_depth),
20434 MYSQL_SYSVAR(flush_neighbors),
20435 MYSQL_SYSVAR(checksum_algorithm),
20436 MYSQL_SYSVAR(log_checksums),
20437 MYSQL_SYSVAR(checksums),
20438 MYSQL_SYSVAR(commit_concurrency),
20439 MYSQL_SYSVAR(concurrency_tickets),
20440 MYSQL_SYSVAR(compression_level),
20441 MYSQL_SYSVAR(data_file_path),
20442 MYSQL_SYSVAR(temp_data_file_path),
20443 MYSQL_SYSVAR(data_home_dir),
20444 MYSQL_SYSVAR(doublewrite),
20445 MYSQL_SYSVAR(stats_include_delete_marked),
20446 MYSQL_SYSVAR(use_atomic_writes),
20447 MYSQL_SYSVAR(fast_shutdown),
20448 MYSQL_SYSVAR(read_io_threads),
20449 MYSQL_SYSVAR(write_io_threads),
20450 MYSQL_SYSVAR(file_per_table),
20451 MYSQL_SYSVAR(file_format), /* deprecated in MariaDB 10.2; no effect */
20452 MYSQL_SYSVAR(flush_log_at_timeout),
20453 MYSQL_SYSVAR(flush_log_at_trx_commit),
20454 MYSQL_SYSVAR(flush_method),
20455 MYSQL_SYSVAR(force_recovery),
20456 MYSQL_SYSVAR(fill_factor),
20457 MYSQL_SYSVAR(ft_cache_size),
20458 MYSQL_SYSVAR(ft_total_cache_size),
20459 MYSQL_SYSVAR(ft_result_cache_limit),
20460 MYSQL_SYSVAR(ft_enable_stopword),
20461 MYSQL_SYSVAR(ft_max_token_size),
20462 MYSQL_SYSVAR(ft_min_token_size),
20463 MYSQL_SYSVAR(ft_num_word_optimize),
20464 MYSQL_SYSVAR(ft_sort_pll_degree),
20465 MYSQL_SYSVAR(large_prefix), /* deprecated in MariaDB 10.2; no effect */
20466 MYSQL_SYSVAR(force_load_corrupted),
20467 MYSQL_SYSVAR(lock_schedule_algorithm),
20468 MYSQL_SYSVAR(locks_unsafe_for_binlog),
20469 MYSQL_SYSVAR(lock_wait_timeout),
20470 MYSQL_SYSVAR(deadlock_detect),
20471 MYSQL_SYSVAR(page_size),
20472 MYSQL_SYSVAR(log_buffer_size),
20473 MYSQL_SYSVAR(log_file_size),
20474 MYSQL_SYSVAR(log_files_in_group),
20475 MYSQL_SYSVAR(log_write_ahead_size),
20476 MYSQL_SYSVAR(log_group_home_dir),
20477 MYSQL_SYSVAR(log_compressed_pages),
20478 MYSQL_SYSVAR(log_optimize_ddl),
20479 MYSQL_SYSVAR(max_dirty_pages_pct),
20480 MYSQL_SYSVAR(max_dirty_pages_pct_lwm),
20481 MYSQL_SYSVAR(adaptive_flushing_lwm),
20482 MYSQL_SYSVAR(adaptive_flushing),
20483 MYSQL_SYSVAR(flush_sync),
20484 MYSQL_SYSVAR(flushing_avg_loops),
20485 MYSQL_SYSVAR(max_purge_lag),
20486 MYSQL_SYSVAR(max_purge_lag_delay),
20487 MYSQL_SYSVAR(max_purge_lag_wait),
20488 MYSQL_SYSVAR(old_blocks_pct),
20489 MYSQL_SYSVAR(old_blocks_time),
20490 MYSQL_SYSVAR(open_files),
20491 MYSQL_SYSVAR(optimize_fulltext_only),
20492 MYSQL_SYSVAR(rollback_on_timeout),
20493 MYSQL_SYSVAR(ft_aux_table),
20494 MYSQL_SYSVAR(ft_enable_diag_print),
20495 MYSQL_SYSVAR(ft_server_stopword_table),
20496 MYSQL_SYSVAR(ft_user_stopword_table),
20497 MYSQL_SYSVAR(disable_sort_file_cache),
20498 MYSQL_SYSVAR(stats_on_metadata),
20499 MYSQL_SYSVAR(stats_sample_pages),
20500 MYSQL_SYSVAR(stats_transient_sample_pages),
20501 MYSQL_SYSVAR(stats_persistent),
20502 MYSQL_SYSVAR(stats_persistent_sample_pages),
20503 MYSQL_SYSVAR(stats_auto_recalc),
20504 MYSQL_SYSVAR(stats_modified_counter),
20505 MYSQL_SYSVAR(stats_traditional),
20506 #ifdef BTR_CUR_HASH_ADAPT
20507 MYSQL_SYSVAR(adaptive_hash_index),
20508 MYSQL_SYSVAR(adaptive_hash_index_parts),
20509 #endif /* BTR_CUR_HASH_ADAPT */
20510 MYSQL_SYSVAR(stats_method),
20511 MYSQL_SYSVAR(replication_delay),
20512 MYSQL_SYSVAR(status_file),
20513 MYSQL_SYSVAR(strict_mode),
20514 MYSQL_SYSVAR(sort_buffer_size),
20515 MYSQL_SYSVAR(online_alter_log_max_size),
20516 MYSQL_SYSVAR(sync_spin_loops),
20517 MYSQL_SYSVAR(spin_wait_delay),
20518 MYSQL_SYSVAR(table_locks),
20519 MYSQL_SYSVAR(thread_concurrency),
20520 MYSQL_SYSVAR(adaptive_max_sleep_delay),
20521 MYSQL_SYSVAR(prefix_index_cluster_optimization),
20522 MYSQL_SYSVAR(thread_sleep_delay),
20523 MYSQL_SYSVAR(tmpdir),
20524 MYSQL_SYSVAR(autoinc_lock_mode),
20525 MYSQL_SYSVAR(version),
20526 MYSQL_SYSVAR(use_native_aio),
20527 #ifdef HAVE_LIBNUMA
20528 MYSQL_SYSVAR(numa_interleave),
20529 #endif /* HAVE_LIBNUMA */
20530 MYSQL_SYSVAR(change_buffering),
20531 MYSQL_SYSVAR(change_buffer_max_size),
20532 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
20533 MYSQL_SYSVAR(change_buffer_dump),
20534 MYSQL_SYSVAR(change_buffering_debug),
20535 MYSQL_SYSVAR(disable_background_merge),
20536 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
20537 #ifdef WITH_INNODB_DISALLOW_WRITES
20538 MYSQL_SYSVAR(disallow_writes),
20539 #endif /* WITH_INNODB_DISALLOW_WRITES */
20540 MYSQL_SYSVAR(random_read_ahead),
20541 MYSQL_SYSVAR(read_ahead_threshold),
20542 MYSQL_SYSVAR(read_only),
20543 MYSQL_SYSVAR(instant_alter_column_allowed),
20544 MYSQL_SYSVAR(io_capacity),
20545 MYSQL_SYSVAR(io_capacity_max),
20546 MYSQL_SYSVAR(page_cleaners),
20547 MYSQL_SYSVAR(idle_flush_pct),
20548 MYSQL_SYSVAR(monitor_enable),
20549 MYSQL_SYSVAR(monitor_disable),
20550 MYSQL_SYSVAR(monitor_reset),
20551 MYSQL_SYSVAR(monitor_reset_all),
20552 MYSQL_SYSVAR(purge_threads),
20553 MYSQL_SYSVAR(purge_batch_size),
20554 #ifdef UNIV_DEBUG
20555 MYSQL_SYSVAR(background_drop_list_empty),
20556 MYSQL_SYSVAR(log_checkpoint_now),
20557 MYSQL_SYSVAR(buf_flush_list_now),
20558 MYSQL_SYSVAR(merge_threshold_set_all_debug),
20559 #endif /* UNIV_DEBUG */
20560 #if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
20561 MYSQL_SYSVAR(page_hash_locks),
20562 MYSQL_SYSVAR(doublewrite_batch_size),
20563 #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
20564 MYSQL_SYSVAR(status_output),
20565 MYSQL_SYSVAR(status_output_locks),
20566 MYSQL_SYSVAR(print_all_deadlocks),
20567 MYSQL_SYSVAR(cmp_per_index_enabled),
20568 MYSQL_SYSVAR(undo_logs),
20569 MYSQL_SYSVAR(max_undo_log_size),
20570 MYSQL_SYSVAR(purge_rseg_truncate_frequency),
20571 MYSQL_SYSVAR(undo_log_truncate),
20572 MYSQL_SYSVAR(rollback_segments),
20573 MYSQL_SYSVAR(undo_directory),
20574 MYSQL_SYSVAR(undo_tablespaces),
20575 MYSQL_SYSVAR(sync_array_size),
20576 MYSQL_SYSVAR(compression_failure_threshold_pct),
20577 MYSQL_SYSVAR(compression_pad_pct_max),
20578 MYSQL_SYSVAR(default_row_format),
20579 #ifdef UNIV_DEBUG
20580 MYSQL_SYSVAR(trx_rseg_n_slots_debug),
20581 MYSQL_SYSVAR(limit_optimistic_insert_debug),
20582 MYSQL_SYSVAR(trx_purge_view_update_only_debug),
20583 MYSQL_SYSVAR(evict_tables_on_commit_debug),
20584 MYSQL_SYSVAR(data_file_size_debug),
20585 MYSQL_SYSVAR(fil_make_page_dirty_debug),
20586 MYSQL_SYSVAR(saved_page_number_debug),
20587 MYSQL_SYSVAR(disable_resize_buffer_pool_debug),
20588 MYSQL_SYSVAR(page_cleaner_disabled_debug),
20589 MYSQL_SYSVAR(dict_stats_disabled_debug),
20590 MYSQL_SYSVAR(master_thread_disabled_debug),
20591 MYSQL_SYSVAR(sync_debug),
20592 #endif /* UNIV_DEBUG */
20593 MYSQL_SYSVAR(force_primary_key),
20594 MYSQL_SYSVAR(fatal_semaphore_wait_threshold),
20595 /* Table page compression feature */
20596 MYSQL_SYSVAR(compression_default),
20597 MYSQL_SYSVAR(compression_algorithm),
20598 /* Encryption feature */
20599 MYSQL_SYSVAR(encrypt_tables),
20600 MYSQL_SYSVAR(encryption_threads),
20601 MYSQL_SYSVAR(encryption_rotate_key_age),
20602 MYSQL_SYSVAR(encryption_rotation_iops),
20603 MYSQL_SYSVAR(scrub_log),
20604 MYSQL_SYSVAR(scrub_log_speed),
20605 MYSQL_SYSVAR(encrypt_log),
20606 MYSQL_SYSVAR(default_encryption_key_id),
20607 /* Scrubing feature */
20608 MYSQL_SYSVAR(immediate_scrub_data_uncompressed),
20609 MYSQL_SYSVAR(background_scrub_data_uncompressed),
20610 MYSQL_SYSVAR(background_scrub_data_compressed),
20611 MYSQL_SYSVAR(background_scrub_data_interval),
20612 MYSQL_SYSVAR(background_scrub_data_check_interval),
20613 #ifdef UNIV_DEBUG
20614 MYSQL_SYSVAR(debug_force_scrubbing),
20615 MYSQL_SYSVAR(debug_sync),
20616 #endif
20617 MYSQL_SYSVAR(buf_dump_status_frequency),
20618 MYSQL_SYSVAR(background_thread),
20619 MYSQL_SYSVAR(encrypt_temporary_tables),
20620
20621 NULL
20622 };
20623
maria_declare_plugin(innobase)20624 maria_declare_plugin(innobase)
20625 {
20626 MYSQL_STORAGE_ENGINE_PLUGIN,
20627 &innobase_storage_engine,
20628 innobase_hton_name,
20629 plugin_author,
20630 "Supports transactions, row-level locking, foreign keys and encryption for tables",
20631 PLUGIN_LICENSE_GPL,
20632 innodb_init, /* Plugin Init */
20633 NULL, /* Plugin Deinit */
20634 INNODB_VERSION_SHORT,
20635 innodb_status_variables_export,/* status variables */
20636 innobase_system_variables, /* system variables */
20637 INNODB_VERSION_STR, /* string version */
20638 MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
20639 },
20640 i_s_innodb_trx,
20641 i_s_innodb_locks,
20642 i_s_innodb_lock_waits,
20643 i_s_innodb_cmp,
20644 i_s_innodb_cmp_reset,
20645 i_s_innodb_cmpmem,
20646 i_s_innodb_cmpmem_reset,
20647 i_s_innodb_cmp_per_index,
20648 i_s_innodb_cmp_per_index_reset,
20649 i_s_innodb_buffer_page,
20650 i_s_innodb_buffer_page_lru,
20651 i_s_innodb_buffer_stats,
20652 i_s_innodb_metrics,
20653 i_s_innodb_ft_default_stopword,
20654 i_s_innodb_ft_deleted,
20655 i_s_innodb_ft_being_deleted,
20656 i_s_innodb_ft_config,
20657 i_s_innodb_ft_index_cache,
20658 i_s_innodb_ft_index_table,
20659 i_s_innodb_sys_tables,
20660 i_s_innodb_sys_tablestats,
20661 i_s_innodb_sys_indexes,
20662 i_s_innodb_sys_columns,
20663 i_s_innodb_sys_fields,
20664 i_s_innodb_sys_foreign,
20665 i_s_innodb_sys_foreign_cols,
20666 i_s_innodb_sys_tablespaces,
20667 i_s_innodb_sys_datafiles,
20668 i_s_innodb_sys_virtual,
20669 i_s_innodb_mutexes,
20670 i_s_innodb_sys_semaphore_waits,
20671 i_s_innodb_tablespaces_encryption,
20672 i_s_innodb_tablespaces_scrubbing
20673 maria_declare_plugin_end;
20674
20675 /** @brief Initialize the default value of innodb_commit_concurrency.
20676
20677 Once InnoDB is running, the innodb_commit_concurrency must not change
20678 from zero to nonzero. (Bug #42101)
20679
20680 The initial default value is 0, and without this extra initialization,
20681 SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter
20682 to 0, even if it was initially set to nonzero at the command line
20683 or configuration file. */
20684 static
20685 void
innobase_commit_concurrency_init_default()20686 innobase_commit_concurrency_init_default()
20687 /*======================================*/
20688 {
20689 MYSQL_SYSVAR_NAME(commit_concurrency).def_val
20690 = innobase_commit_concurrency;
20691 }
20692
20693 /** @brief Adjust some InnoDB startup parameters based on file contents
20694 or innodb_page_size. */
20695 static
20696 void
innodb_params_adjust()20697 innodb_params_adjust()
20698 {
20699 /* The default value and the max value of
20700 innodb_undo_logs must be equal to the available undo logs. */
20701 MYSQL_SYSVAR_NAME(undo_logs).max_val
20702 = MYSQL_SYSVAR_NAME(undo_logs).def_val
20703 = srv_available_undo_logs;
20704 MYSQL_SYSVAR_NAME(max_undo_log_size).max_val
20705 = 1ULL << (32U + srv_page_size_shift);
20706 MYSQL_SYSVAR_NAME(max_undo_log_size).min_val
20707 = MYSQL_SYSVAR_NAME(max_undo_log_size).def_val
20708 = ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
20709 << srv_page_size_shift;
20710 MYSQL_SYSVAR_NAME(max_undo_log_size).max_val
20711 = 1ULL << (32U + srv_page_size_shift);
20712 }
20713
20714 /****************************************************************************
20715 * DS-MRR implementation
20716 ***************************************************************************/
20717
20718 /**
20719 Multi Range Read interface, DS-MRR calls */
20720 int
multi_range_read_init(RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)20721 ha_innobase::multi_range_read_init(
20722 RANGE_SEQ_IF* seq,
20723 void* seq_init_param,
20724 uint n_ranges,
20725 uint mode,
20726 HANDLER_BUFFER* buf)
20727 {
20728 return(m_ds_mrr.dsmrr_init(this, seq, seq_init_param,
20729 n_ranges, mode, buf));
20730 }
20731
20732 int
multi_range_read_next(range_id_t * range_info)20733 ha_innobase::multi_range_read_next(
20734 range_id_t* range_info)
20735 {
20736 return(m_ds_mrr.dsmrr_next(range_info));
20737 }
20738
20739 ha_rows
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)20740 ha_innobase::multi_range_read_info_const(
20741 uint keyno,
20742 RANGE_SEQ_IF* seq,
20743 void* seq_init_param,
20744 uint n_ranges,
20745 uint* bufsz,
20746 uint* flags,
20747 Cost_estimate* cost)
20748 {
20749 /* See comments in ha_myisam::multi_range_read_info_const */
20750 m_ds_mrr.init(this, table);
20751
20752 if (m_prebuilt->select_lock_type != LOCK_NONE) {
20753 *flags |= HA_MRR_USE_DEFAULT_IMPL;
20754 }
20755
20756 ha_rows res= m_ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
20757 bufsz, flags, cost);
20758 return res;
20759 }
20760
20761 ha_rows
multi_range_read_info(uint keyno,uint n_ranges,uint keys,uint key_parts,uint * bufsz,uint * flags,Cost_estimate * cost)20762 ha_innobase::multi_range_read_info(
20763 uint keyno,
20764 uint n_ranges,
20765 uint keys,
20766 uint key_parts,
20767 uint* bufsz,
20768 uint* flags,
20769 Cost_estimate* cost)
20770 {
20771 m_ds_mrr.init(this, table);
20772 ha_rows res= m_ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
20773 flags, cost);
20774 return res;
20775 }
20776
20777 int
multi_range_read_explain_info(uint mrr_mode,char * str,size_t size)20778 ha_innobase::multi_range_read_explain_info(
20779 uint mrr_mode,
20780 char *str,
20781 size_t size)
20782 {
20783 return m_ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
20784 }
20785
20786 /**
20787 Index Condition Pushdown interface implementation */
20788
20789 /*************************************************************//**
20790 InnoDB index push-down condition check
20791 @return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
20792 ICP_RESULT
innobase_index_cond(void * file)20793 innobase_index_cond(
20794 /*================*/
20795 void* file) /*!< in/out: pointer to ha_innobase */
20796 {
20797 return handler_index_cond_check(file);
20798 }
20799
20800 /** Parse the table file name into table name and database name.
20801 @param[in] tbl_name InnoDB table name
20802 @param[out] dbname database name buffer (NAME_LEN + 1 bytes)
20803 @param[out] tblname table name buffer (NAME_LEN + 1 bytes)
20804 @param[out] dbnamelen database name length
20805 @param[out] tblnamelen table name length
20806 @return true if the table name is parsed properly. */
table_name_parse(const table_name_t & tbl_name,char * dbname,char * tblname,ulint & dbnamelen,ulint & tblnamelen)20807 static bool table_name_parse(
20808 const table_name_t& tbl_name,
20809 char* dbname,
20810 char* tblname,
20811 ulint& dbnamelen,
20812 ulint& tblnamelen)
20813 {
20814 dbnamelen = dict_get_db_name_len(tbl_name.m_name);
20815 char db_buf[MAX_DATABASE_NAME_LEN + 1];
20816 char tbl_buf[MAX_TABLE_NAME_LEN + 1];
20817
20818 ut_ad(dbnamelen > 0);
20819 ut_ad(dbnamelen <= MAX_DATABASE_NAME_LEN);
20820
20821 memcpy(db_buf, tbl_name.m_name, dbnamelen);
20822 db_buf[dbnamelen] = 0;
20823
20824 tblnamelen = strlen(tbl_name.m_name + dbnamelen + 1);
20825 memcpy(tbl_buf, tbl_name.m_name + dbnamelen + 1, tblnamelen);
20826 tbl_buf[tblnamelen] = 0;
20827
20828 dbnamelen = filename_to_tablename(db_buf, dbname, MAX_DATABASE_NAME_LEN + 1, true);
20829
20830 if (tblnamelen > TEMP_FILE_PREFIX_LENGTH
20831 && !strncmp(tbl_buf, TEMP_FILE_PREFIX, TEMP_FILE_PREFIX_LENGTH)) {
20832 return false;
20833 }
20834
20835 if (char *is_part = strchr(tbl_buf, '#')) {
20836 *is_part = '\0';
20837 tblnamelen = is_part - tbl_buf;
20838 }
20839
20840 tblnamelen = filename_to_tablename(tbl_buf, tblname, MAX_TABLE_NAME_LEN + 1, true);
20841 return true;
20842 }
20843
20844
20845 /** Acquire metadata lock and MariaDB table handle for an InnoDB table.
20846 @param[in,out] thd thread handle
20847 @param[in,out] table InnoDB table
20848 @return MariaDB table handle
20849 @retval NULL if the table does not exist, is unaccessible or corrupted. */
innodb_acquire_mdl(THD * thd,dict_table_t * table)20850 static TABLE* innodb_acquire_mdl(THD* thd, dict_table_t* table)
20851 {
20852 char db_buf[NAME_LEN + 1], db_buf1[NAME_LEN + 1];
20853 char tbl_buf[NAME_LEN + 1], tbl_buf1[NAME_LEN + 1];
20854 ulint db_buf_len, db_buf1_len;
20855 ulint tbl_buf_len, tbl_buf1_len;
20856
20857 if (!table_name_parse(table->name, db_buf, tbl_buf,
20858 db_buf_len, tbl_buf_len)) {
20859 table->release();
20860 return NULL;
20861 }
20862
20863 DEBUG_SYNC(thd, "ib_purge_virtual_latch_released");
20864
20865 const table_id_t table_id = table->id;
20866 retry_mdl:
20867 const bool unaccessible = !table->is_readable() || table->corrupted;
20868 table->release();
20869
20870 if (unaccessible) {
20871 return NULL;
20872 }
20873
20874 TABLE* mariadb_table = open_purge_table(thd, db_buf, db_buf_len,
20875 tbl_buf, tbl_buf_len);
20876 if (!mariadb_table)
20877 thd_clear_error(thd);
20878
20879 DEBUG_SYNC(thd, "ib_purge_virtual_got_no_such_table");
20880
20881 table = dict_table_open_on_id(table_id, false, DICT_TABLE_OP_NORMAL);
20882
20883 if (table == NULL) {
20884 /* Table is dropped. */
20885 goto fail;
20886 }
20887
20888 if (!fil_table_accessible(table)) {
20889 release_fail:
20890 table->release();
20891 fail:
20892 if (mariadb_table) {
20893 close_thread_tables(thd);
20894 }
20895
20896 return NULL;
20897 }
20898
20899 if (!table_name_parse(table->name, db_buf1, tbl_buf1,
20900 db_buf1_len, tbl_buf1_len)) {
20901 goto release_fail;
20902 }
20903
20904 if (!mariadb_table) {
20905 } else if (!strcmp(db_buf, db_buf1) && !strcmp(tbl_buf, tbl_buf1)) {
20906 return mariadb_table;
20907 } else {
20908 /* Table is renamed. So release MDL for old name and try
20909 to acquire the MDL for new table name. */
20910 close_thread_tables(thd);
20911 }
20912
20913 strcpy(tbl_buf, tbl_buf1);
20914 strcpy(db_buf, db_buf1);
20915 tbl_buf_len = tbl_buf1_len;
20916 db_buf_len = db_buf1_len;
20917 goto retry_mdl;
20918 }
20919
20920 /** Find or open a table handle for the virtual column template
20921 @param[in] thd thread handle
20922 @param[in,out] table InnoDB table whose virtual column template
20923 is to be updated
20924 @return table handle
20925 @retval NULL if the table is dropped, unaccessible or corrupted
20926 for purge thread */
innodb_find_table_for_vc(THD * thd,dict_table_t * table)20927 static TABLE* innodb_find_table_for_vc(THD* thd, dict_table_t* table)
20928 {
20929 DBUG_EXECUTE_IF(
20930 "ib_purge_virtual_mdev_16222_1",
20931 DBUG_ASSERT(!debug_sync_set_action(
20932 thd,
20933 STRING_WITH_LEN("ib_purge_virtual_latch_released "
20934 "SIGNAL latch_released "
20935 "WAIT_FOR drop_started"))););
20936 DBUG_EXECUTE_IF(
20937 "ib_purge_virtual_mdev_16222_2",
20938 DBUG_ASSERT(!debug_sync_set_action(
20939 thd,
20940 STRING_WITH_LEN("ib_purge_virtual_got_no_such_table "
20941 "SIGNAL got_no_such_table"))););
20942
20943 if (THDVAR(thd, background_thread)) {
20944 /* Purge thread acquires dict_operation_lock while
20945 processing undo log record. Release the dict_operation_lock
20946 before acquiring MDL on the table. */
20947 rw_lock_s_unlock(&dict_operation_lock);
20948 return innodb_acquire_mdl(thd, table);
20949 } else {
20950 if (table->vc_templ->mysql_table_query_id
20951 == thd_get_query_id(thd)) {
20952 return table->vc_templ->mysql_table;
20953 }
20954 }
20955
20956 char db_buf[NAME_LEN + 1];
20957 char tbl_buf[NAME_LEN + 1];
20958 ulint db_buf_len, tbl_buf_len;
20959
20960 if (!table_name_parse(table->name, db_buf, tbl_buf,
20961 db_buf_len, tbl_buf_len)) {
20962 ut_ad(!"invalid table name");
20963 return NULL;
20964 }
20965
20966 TABLE* mysql_table = find_fk_open_table(thd, db_buf, db_buf_len,
20967 tbl_buf, tbl_buf_len);
20968
20969 table->vc_templ->mysql_table = mysql_table;
20970 table->vc_templ->mysql_table_query_id = thd_get_query_id(thd);
20971 return mysql_table;
20972 }
20973
20974 /** Get the computed value by supplying the base column values.
20975 @param[in,out] table table whose virtual column
20976 template to be built */
innobase_init_vc_templ(dict_table_t * table)20977 TABLE* innobase_init_vc_templ(dict_table_t* table)
20978 {
20979 if (table->vc_templ != NULL) {
20980 return NULL;
20981 }
20982 DBUG_ENTER("innobase_init_vc_templ");
20983
20984 table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
20985
20986 TABLE *mysql_table= innodb_find_table_for_vc(current_thd, table);
20987
20988 ut_ad(mysql_table);
20989 if (!mysql_table) {
20990 DBUG_RETURN(NULL);
20991 }
20992
20993 mutex_enter(&dict_sys->mutex);
20994 innobase_build_v_templ(mysql_table, table, table->vc_templ, NULL, true);
20995 mutex_exit(&dict_sys->mutex);
20996 DBUG_RETURN(mysql_table);
20997 }
20998
20999 /** Change dbname and table name in table->vc_templ.
21000 @param[in,out] table the table whose virtual column template
21001 dbname and tbname to be renamed. */
21002 void
innobase_rename_vc_templ(dict_table_t * table)21003 innobase_rename_vc_templ(
21004 dict_table_t* table)
21005 {
21006 char dbname[MAX_DATABASE_NAME_LEN + 1];
21007 char tbname[MAX_DATABASE_NAME_LEN + 1];
21008 char* name = table->name.m_name;
21009 ulint dbnamelen = dict_get_db_name_len(name);
21010 ulint tbnamelen = strlen(name) - dbnamelen - 1;
21011 char t_dbname[MAX_DATABASE_NAME_LEN + 1];
21012 char t_tbname[MAX_TABLE_NAME_LEN + 1];
21013
21014 strncpy(dbname, name, dbnamelen);
21015 dbname[dbnamelen] = 0;
21016 strncpy(tbname, name + dbnamelen + 1, tbnamelen);
21017 tbname[tbnamelen] =0;
21018
21019 /* For partition table, remove the partition name and use the
21020 "main" table name to build the template */
21021 char* is_part = is_partition(tbname);
21022
21023 if (is_part != NULL) {
21024 *is_part = '\0';
21025 tbnamelen = ulint(is_part - tbname);
21026 }
21027
21028 dbnamelen = filename_to_tablename(dbname, t_dbname,
21029 MAX_DATABASE_NAME_LEN + 1);
21030 tbnamelen = filename_to_tablename(tbname, t_tbname,
21031 MAX_TABLE_NAME_LEN + 1);
21032
21033 table->vc_templ->db_name = t_dbname;
21034 table->vc_templ->tb_name = t_tbname;
21035 }
21036
21037
21038 /**
21039 Allocate a heap and record for calculating virtual fields
21040 Used mainly for virtual fields in indexes
21041
21042 @param[in] thd MariaDB THD
21043 @param[in] index Index in use
21044 @param[out] heap Heap that holds temporary row
21045 @param[in,out] table MariaDB table
21046 @param[out] record Pointer to allocated MariaDB record
21047 @param[out] storage Internal storage for blobs etc
21048
21049 @retval true on success
21050 @retval false on malloc failure or failed to open the maria table
21051 for purge thread.
21052 */
21053
innobase_allocate_row_for_vcol(THD * thd,dict_index_t * index,mem_heap_t ** heap,TABLE ** table,VCOL_STORAGE * storage)21054 bool innobase_allocate_row_for_vcol(THD *thd, dict_index_t *index,
21055 mem_heap_t **heap, TABLE **table,
21056 VCOL_STORAGE *storage)
21057 {
21058 TABLE *maria_table;
21059 String *blob_value_storage;
21060 if (!*table)
21061 *table = innodb_find_table_for_vc(thd, index->table);
21062
21063 /* For purge thread, there is a possiblity that table could have
21064 dropped, corrupted or unaccessible. */
21065 if (!*table)
21066 return false;
21067 maria_table = *table;
21068 if (!*heap && !(*heap = mem_heap_create(srv_page_size)))
21069 return false;
21070
21071 uchar *record = static_cast<byte *>(mem_heap_alloc(*heap,
21072 maria_table->s->reclength));
21073
21074 size_t len = maria_table->s->virtual_not_stored_blob_fields * sizeof(String);
21075 blob_value_storage = static_cast<String *>(mem_heap_alloc(*heap, len));
21076
21077 if (!record || !blob_value_storage)
21078 return false;
21079
21080 storage->maria_table = maria_table;
21081 storage->innobase_record = record;
21082 storage->maria_record = maria_table->field[0]->record_ptr();
21083 storage->blob_value_storage = blob_value_storage;
21084
21085 maria_table->move_fields(maria_table->field, record, storage->maria_record);
21086 maria_table->remember_blob_values(blob_value_storage);
21087
21088 return true;
21089 }
21090
21091
21092 /** Free memory allocated by innobase_allocate_row_for_vcol() */
21093
innobase_free_row_for_vcol(VCOL_STORAGE * storage)21094 void innobase_free_row_for_vcol(VCOL_STORAGE *storage)
21095 {
21096 TABLE *maria_table= storage->maria_table;
21097 maria_table->move_fields(maria_table->field, storage->maria_record,
21098 storage->innobase_record);
21099 maria_table->restore_blob_values(storage->blob_value_storage);
21100 }
21101
21102
innobase_report_computed_value_failed(dtuple_t * row)21103 void innobase_report_computed_value_failed(dtuple_t *row)
21104 {
21105 ib::error() << "Compute virtual column values failed for "
21106 << rec_printer(row).str();
21107 }
21108
21109
21110 /** Get the computed value by supplying the base column values.
21111 @param[in,out] row the data row
21112 @param[in] col virtual column
21113 @param[in] index index
21114 @param[in,out] local_heap heap memory for processing large data etc.
21115 @param[in,out] heap memory heap that copies the actual index row
21116 @param[in] ifield index field
21117 @param[in] thd MySQL thread handle
21118 @param[in,out] mysql_table mysql table object
21119 @param[in,out] mysql_rec MariaDB record buffer
21120 @param[in] old_table during ALTER TABLE, this is the old table
21121 or NULL.
21122 @param[in] update update vector for the row, if any
21123 @param[in] foreign foreign key information
21124 @return the field filled with computed value, or NULL if just want
21125 to store the value in passed in "my_rec" */
21126 dfield_t*
innobase_get_computed_value(dtuple_t * row,const dict_v_col_t * col,const dict_index_t * index,mem_heap_t ** local_heap,mem_heap_t * heap,const dict_field_t * ifield,THD * thd,TABLE * mysql_table,byte * mysql_rec,const dict_table_t * old_table,const upd_t * update)21127 innobase_get_computed_value(
21128 dtuple_t* row,
21129 const dict_v_col_t* col,
21130 const dict_index_t* index,
21131 mem_heap_t** local_heap,
21132 mem_heap_t* heap,
21133 const dict_field_t* ifield,
21134 THD* thd,
21135 TABLE* mysql_table,
21136 byte* mysql_rec,
21137 const dict_table_t* old_table,
21138 const upd_t* update)
21139 {
21140 byte rec_buf2[REC_VERSION_56_MAX_INDEX_COL_LEN];
21141 byte* buf;
21142 dfield_t* field;
21143 ulint len;
21144
21145 const page_size_t page_size = (old_table == NULL)
21146 ? dict_table_page_size(index->table)
21147 : dict_table_page_size(old_table);
21148
21149 ulint ret = 0;
21150
21151 dict_index_t *clust_index= dict_table_get_first_index(index->table);
21152
21153 ut_ad(index->table->vc_templ);
21154 ut_ad(thd != NULL);
21155 ut_ad(mysql_table);
21156
21157 DBUG_ENTER("innobase_get_computed_value");
21158 const mysql_row_templ_t*
21159 vctempl = index->table->vc_templ->vtempl[
21160 index->table->vc_templ->n_col + col->v_pos];
21161
21162 if (!heap || index->table->vc_templ->rec_len
21163 >= REC_VERSION_56_MAX_INDEX_COL_LEN) {
21164 if (*local_heap == NULL) {
21165 *local_heap = mem_heap_create(srv_page_size);
21166 }
21167
21168 buf = static_cast<byte*>(mem_heap_alloc(
21169 *local_heap, index->table->vc_templ->rec_len));
21170 } else {
21171 buf = rec_buf2;
21172 }
21173
21174 for (ulint i = 0; i < col->num_base; i++) {
21175 dict_col_t* base_col = col->base_col[i];
21176 const dfield_t* row_field = NULL;
21177 ulint col_no = base_col->ind;
21178 const mysql_row_templ_t* templ
21179 = index->table->vc_templ->vtempl[col_no];
21180 const byte* data;
21181
21182 if (update) {
21183 ulint clust_no = dict_col_get_clust_pos(base_col,
21184 clust_index);
21185 if (const upd_field_t *uf = upd_get_field_by_field_no(
21186 update, clust_no, false)) {
21187 row_field = &uf->new_val;
21188 }
21189 }
21190
21191 if (!row_field) {
21192 row_field = dtuple_get_nth_field(row, col_no);
21193 }
21194
21195 data = static_cast<const byte*>(row_field->data);
21196 len = row_field->len;
21197
21198 if (row_field->ext) {
21199 if (*local_heap == NULL) {
21200 *local_heap = mem_heap_create(srv_page_size);
21201 }
21202
21203 data = btr_copy_externally_stored_field(
21204 &len, data, page_size,
21205 dfield_get_len(row_field), *local_heap);
21206 }
21207
21208 if (len == UNIV_SQL_NULL) {
21209 mysql_rec[templ->mysql_null_byte_offset]
21210 |= (byte) templ->mysql_null_bit_mask;
21211 memcpy(mysql_rec + templ->mysql_col_offset,
21212 static_cast<const byte*>(
21213 index->table->vc_templ->default_rec
21214 + templ->mysql_col_offset),
21215 templ->mysql_col_len);
21216 } else {
21217
21218 row_sel_field_store_in_mysql_format(
21219 mysql_rec + templ->mysql_col_offset,
21220 templ, index, templ->clust_rec_field_no,
21221 (const byte*)data, len);
21222
21223 if (templ->mysql_null_bit_mask) {
21224 /* It is a nullable column with a
21225 non-NULL value */
21226 mysql_rec[templ->mysql_null_byte_offset]
21227 &= ~(byte) templ->mysql_null_bit_mask;
21228 }
21229 }
21230 }
21231
21232 field = dtuple_get_nth_v_field(row, col->v_pos);
21233
21234 MY_BITMAP *old_write_set = dbug_tmp_use_all_columns(mysql_table, &mysql_table->write_set);
21235 MY_BITMAP *old_read_set = dbug_tmp_use_all_columns(mysql_table, &mysql_table->read_set);
21236 ret = mysql_table->update_virtual_field(mysql_table->field[col->m_col.ind]);
21237 dbug_tmp_restore_column_map(&mysql_table->read_set, old_read_set);
21238 dbug_tmp_restore_column_map(&mysql_table->write_set, old_write_set);
21239
21240 if (ret != 0) {
21241 DBUG_RETURN(NULL);
21242 }
21243
21244 if (vctempl->mysql_null_bit_mask
21245 && (mysql_rec[vctempl->mysql_null_byte_offset]
21246 & vctempl->mysql_null_bit_mask)) {
21247 dfield_set_null(field);
21248 field->type.prtype |= DATA_VIRTUAL;
21249 DBUG_RETURN(field);
21250 }
21251
21252 row_mysql_store_col_in_innobase_format(
21253 field, buf,
21254 TRUE, mysql_rec + vctempl->mysql_col_offset,
21255 vctempl->mysql_col_len, dict_table_is_comp(index->table));
21256 field->type.prtype |= DATA_VIRTUAL;
21257
21258 ulint max_prefix = col->m_col.max_prefix;
21259
21260 if (max_prefix && ifield
21261 && (ifield->prefix_len == 0
21262 || ifield->prefix_len > col->m_col.max_prefix)) {
21263 max_prefix = ifield->prefix_len;
21264 }
21265
21266 /* If this is a prefix index, we only need a portion of the field */
21267 if (max_prefix) {
21268 len = dtype_get_at_most_n_mbchars(
21269 col->m_col.prtype,
21270 col->m_col.mbminlen, col->m_col.mbmaxlen,
21271 max_prefix,
21272 field->len,
21273 static_cast<char*>(dfield_get_data(field)));
21274 dfield_set_len(field, len);
21275 }
21276
21277 if (heap) {
21278 dfield_dup(field, heap);
21279 }
21280
21281 DBUG_RETURN(field);
21282 }
21283
21284
21285 /** Attempt to push down an index condition.
21286 @param[in] keyno MySQL key number
21287 @param[in] idx_cond Index condition to be checked
21288 @return Part of idx_cond which the handler will not evaluate */
21289
21290 class Item*
idx_cond_push(uint keyno,class Item * idx_cond)21291 ha_innobase::idx_cond_push(
21292 uint keyno,
21293 class Item* idx_cond)
21294 {
21295 DBUG_ENTER("ha_innobase::idx_cond_push");
21296 DBUG_ASSERT(keyno != MAX_KEY);
21297 DBUG_ASSERT(idx_cond != NULL);
21298
21299 /* We can only evaluate the condition if all columns are stored.*/
21300 dict_index_t* idx = innobase_get_index(keyno);
21301 if (idx && dict_index_has_virtual(idx)) {
21302 DBUG_RETURN(idx_cond);
21303 }
21304
21305 pushed_idx_cond = idx_cond;
21306 pushed_idx_cond_keyno = keyno;
21307 in_range_check_pushed_down = TRUE;
21308 /* We will evaluate the condition entirely */
21309 DBUG_RETURN(NULL);
21310 }
21311
21312 /******************************************************************//**
21313 Use this when the args are passed to the format string from
21314 errmsg-utf8.txt directly as is.
21315
21316 Push a warning message to the client, it is a wrapper around:
21317
21318 void push_warning_printf(
21319 THD *thd, Sql_condition::enum_condition_level level,
21320 uint code, const char *format, ...);
21321 */
21322 void
ib_senderrf(THD * thd,ib_log_level_t level,ib_uint32_t code,...)21323 ib_senderrf(
21324 /*========*/
21325 THD* thd, /*!< in/out: session */
21326 ib_log_level_t level, /*!< in: warning level */
21327 ib_uint32_t code, /*!< MySQL error code */
21328 ...) /*!< Args */
21329 {
21330 va_list args;
21331 const char* format = my_get_err_msg(code);
21332
21333 /* If the caller wants to push a message to the client then
21334 the caller must pass a valid session handle. */
21335
21336 ut_a(thd != 0);
21337
21338 /* The error code must exist in the errmsg-utf8.txt file. */
21339 ut_a(format != 0);
21340
21341 va_start(args, code);
21342
21343 myf l;
21344
21345 switch (level) {
21346 case IB_LOG_LEVEL_INFO:
21347 l = ME_JUST_INFO;
21348 break;
21349 case IB_LOG_LEVEL_WARN:
21350 l = ME_JUST_WARNING;
21351 break;
21352 default:
21353 l = 0;
21354 break;
21355 }
21356
21357 my_printv_error(code, format, MYF(l), args);
21358
21359 va_end(args);
21360
21361 if (level == IB_LOG_LEVEL_FATAL) {
21362 ut_error;
21363 }
21364 }
21365
21366 /******************************************************************//**
21367 Use this when the args are first converted to a formatted string and then
21368 passed to the format string from errmsg-utf8.txt. The error message format
21369 must be: "Some string ... %s".
21370
21371 Push a warning message to the client, it is a wrapper around:
21372
21373 void push_warning_printf(
21374 THD *thd, Sql_condition::enum_condition_level level,
21375 uint code, const char *format, ...);
21376 */
21377 void
ib_errf(THD * thd,ib_log_level_t level,ib_uint32_t code,const char * format,...)21378 ib_errf(
21379 /*====*/
21380 THD* thd, /*!< in/out: session */
21381 ib_log_level_t level, /*!< in: warning level */
21382 ib_uint32_t code, /*!< MySQL error code */
21383 const char* format, /*!< printf format */
21384 ...) /*!< Args */
21385 {
21386 char* str = NULL;
21387 va_list args;
21388
21389 /* If the caller wants to push a message to the client then
21390 the caller must pass a valid session handle. */
21391
21392 ut_a(thd != 0);
21393 ut_a(format != 0);
21394
21395 va_start(args, format);
21396
21397 #ifdef _WIN32
21398 int size = _vscprintf(format, args) + 1;
21399 if (size > 0) {
21400 str = static_cast<char*>(malloc(size));
21401 }
21402 if (str == NULL) {
21403 va_end(args);
21404 return; /* Watch for Out-Of-Memory */
21405 }
21406 str[size - 1] = 0x0;
21407 vsnprintf(str, size, format, args);
21408 #elif HAVE_VASPRINTF
21409 if (vasprintf(&str, format, args) == -1) {
21410 /* In case of failure use a fixed length string */
21411 str = static_cast<char*>(malloc(BUFSIZ));
21412 vsnprintf(str, BUFSIZ, format, args);
21413 }
21414 #else
21415 /* Use a fixed length string. */
21416 str = static_cast<char*>(malloc(BUFSIZ));
21417 if (str == NULL) {
21418 va_end(args);
21419 return; /* Watch for Out-Of-Memory */
21420 }
21421 vsnprintf(str, BUFSIZ, format, args);
21422 #endif /* _WIN32 */
21423
21424 ib_senderrf(thd, level, code, str);
21425
21426 va_end(args);
21427 free(str);
21428 }
21429
21430 /* Keep the first 16 characters as-is, since the url is sometimes used
21431 as an offset from this.*/
21432 const char* TROUBLESHOOTING_MSG =
21433 "Please refer to https://mariadb.com/kb/en/innodb-troubleshooting/"
21434 " for how to resolve the issue.";
21435
21436 const char* TROUBLESHOOT_DATADICT_MSG =
21437 "Please refer to https://mariadb.com/kb/en/innodb-data-dictionary-troubleshooting/"
21438 " for how to resolve the issue.";
21439
21440 const char* BUG_REPORT_MSG =
21441 "Submit a detailed bug report to https://jira.mariadb.org/";
21442
21443 const char* FORCE_RECOVERY_MSG =
21444 "Please refer to "
21445 "https://mariadb.com/kb/en/library/innodb-recovery-modes/"
21446 " for information about forcing recovery.";
21447
21448 const char* OPERATING_SYSTEM_ERROR_MSG =
21449 "Some operating system error numbers are described at"
21450 " https://mariadb.com/kb/en/library/operating-system-error-codes/";
21451
21452 const char* FOREIGN_KEY_CONSTRAINTS_MSG =
21453 "Please refer to https://mariadb.com/kb/en/library/foreign-keys/"
21454 " for correct foreign key definition.";
21455
21456 const char* SET_TRANSACTION_MSG =
21457 "Please refer to https://mariadb.com/kb/en/library/set-transaction/";
21458
21459 const char* INNODB_PARAMETERS_MSG =
21460 "Please refer to https://mariadb.com/kb/en/library/innodb-system-variables/";
21461
21462 /**********************************************************************
21463 Converts an identifier from my_charset_filename to UTF-8 charset.
21464 @return result string length, as returned by strconvert() */
21465 uint
innobase_convert_to_filename_charset(char * to,const char * from,ulint len)21466 innobase_convert_to_filename_charset(
21467 /*=================================*/
21468 char* to, /* out: converted identifier */
21469 const char* from, /* in: identifier to convert */
21470 ulint len) /* in: length of 'to', in bytes */
21471 {
21472 uint errors;
21473 CHARSET_INFO* cs_to = &my_charset_filename;
21474 CHARSET_INFO* cs_from = system_charset_info;
21475
21476 return(static_cast<uint>(strconvert(
21477 cs_from, from, uint(strlen(from)),
21478 cs_to, to, static_cast<uint>(len), &errors)));
21479 }
21480
21481 /**********************************************************************
21482 Converts an identifier from my_charset_filename to UTF-8 charset.
21483 @return result string length, as returned by strconvert() */
21484 uint
innobase_convert_to_system_charset(char * to,const char * from,ulint len,uint * errors)21485 innobase_convert_to_system_charset(
21486 /*===============================*/
21487 char* to, /* out: converted identifier */
21488 const char* from, /* in: identifier to convert */
21489 ulint len, /* in: length of 'to', in bytes */
21490 uint* errors) /* out: error return */
21491 {
21492 CHARSET_INFO* cs1 = &my_charset_filename;
21493 CHARSET_INFO* cs2 = system_charset_info;
21494
21495 return(static_cast<uint>(strconvert(
21496 cs1, from, static_cast<uint>(strlen(from)),
21497 cs2, to, static_cast<uint>(len), errors)));
21498 }
21499
21500 /** Validate the requested buffer pool size. Also, reserve the necessary
21501 memory needed for buffer pool resize.
21502 @param[in] thd thread handle
21503 @param[out] save immediate result for update function
21504 @param[in] value incoming string
21505 @return 0 on success, 1 on failure.
21506 */
21507 static
21508 int
innodb_buffer_pool_size_validate(THD * thd,st_mysql_sys_var *,void * save,struct st_mysql_value * value)21509 innodb_buffer_pool_size_validate(
21510 THD* thd,
21511 st_mysql_sys_var*,
21512 void* save,
21513 struct st_mysql_value* value)
21514 {
21515 longlong intbuf;
21516
21517 value->val_int(value, &intbuf);
21518
21519 if (static_cast<ulonglong>(intbuf) < MYSQL_SYSVAR_NAME(buffer_pool_size).min_val) {
21520 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21521 ER_WRONG_ARGUMENTS,
21522 "innodb_buffer_pool_size must be at least"
21523 " %lld for innodb_page_size=%lu",
21524 MYSQL_SYSVAR_NAME(buffer_pool_size).min_val,
21525 srv_page_size);
21526 return(1);
21527 }
21528
21529 if (!srv_was_started) {
21530 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21531 ER_WRONG_ARGUMENTS,
21532 "Cannot update innodb_buffer_pool_size,"
21533 " because InnoDB is not started.");
21534 return(1);
21535 }
21536
21537 #ifdef UNIV_DEBUG
21538 if (buf_disable_resize_buffer_pool_debug == TRUE) {
21539 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21540 ER_WRONG_ARGUMENTS,
21541 "Cannot update innodb_buffer_pool_size,"
21542 " because innodb_disable_resize_buffer_pool_debug"
21543 " is set.");
21544 ib::warn() << "Cannot update innodb_buffer_pool_size,"
21545 " because innodb_disable_resize_buffer_pool_debug"
21546 " is set.";
21547 return(1);
21548 }
21549 #endif /* UNIV_DEBUG */
21550
21551
21552 buf_pool_mutex_enter_all();
21553
21554 if (srv_buf_pool_old_size != srv_buf_pool_size) {
21555 buf_pool_mutex_exit_all();
21556 my_printf_error(ER_WRONG_ARGUMENTS,
21557 "Another buffer pool resize is already in progress.", MYF(0));
21558 return(1);
21559 }
21560
21561 if (srv_buf_pool_instances > 1 && intbuf < BUF_POOL_SIZE_THRESHOLD) {
21562 buf_pool_mutex_exit_all();
21563
21564 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21565 ER_WRONG_ARGUMENTS,
21566 "Cannot update innodb_buffer_pool_size"
21567 " to less than 1GB if"
21568 " innodb_buffer_pool_instances > 1.");
21569 return(1);
21570 }
21571
21572 ulint requested_buf_pool_size = buf_pool_size_align(ulint(intbuf));
21573
21574 *static_cast<ulonglong*>(save) = requested_buf_pool_size;
21575
21576 if (srv_buf_pool_size == ulint(intbuf)) {
21577 buf_pool_mutex_exit_all();
21578 /* nothing to do */
21579 return(0);
21580 }
21581
21582 if (srv_buf_pool_size == requested_buf_pool_size) {
21583 buf_pool_mutex_exit_all();
21584 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21585 ER_WRONG_ARGUMENTS,
21586 "innodb_buffer_pool_size must be at least"
21587 " innodb_buffer_pool_chunk_size=%lu",
21588 srv_buf_pool_chunk_unit);
21589 /* nothing to do */
21590 return(0);
21591 }
21592
21593 srv_buf_pool_size = requested_buf_pool_size;
21594 buf_pool_mutex_exit_all();
21595
21596 if (intbuf != static_cast<longlong>(requested_buf_pool_size)) {
21597 char buf[64];
21598 int len = 64;
21599 value->val_str(value, buf, &len);
21600 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21601 ER_TRUNCATED_WRONG_VALUE,
21602 "Truncated incorrect %-.32s value: '%-.128s'",
21603 mysql_sysvar_buffer_pool_size.name,
21604 value->val_str(value, buf, &len));
21605 }
21606
21607 return(0);
21608 }
21609
21610 /*************************************************************//**
21611 Check for a valid value of innobase_compression_algorithm.
21612 @return 0 for valid innodb_compression_algorithm. */
21613 static
21614 int
innodb_compression_algorithm_validate(THD * thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)21615 innodb_compression_algorithm_validate(
21616 /*==================================*/
21617 THD* thd, /*!< in: thread handle */
21618 struct st_mysql_sys_var* var, /*!< in: pointer to system
21619 variable */
21620 void* save, /*!< out: immediate result
21621 for update function */
21622 struct st_mysql_value* value) /*!< in: incoming string */
21623 {
21624 ulong compression_algorithm;
21625 DBUG_ENTER("innobase_compression_algorithm_validate");
21626
21627 if (check_sysvar_enum(thd, var, save, value)) {
21628 DBUG_RETURN(1);
21629 }
21630
21631 compression_algorithm = *reinterpret_cast<ulong*>(save);
21632 (void)compression_algorithm;
21633
21634 #ifndef HAVE_LZ4
21635 if (compression_algorithm == PAGE_LZ4_ALGORITHM) {
21636 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21637 HA_ERR_UNSUPPORTED,
21638 "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21639 "InnoDB: liblz4 is not installed. \n",
21640 compression_algorithm);
21641 DBUG_RETURN(1);
21642 }
21643 #endif
21644
21645 #ifndef HAVE_LZO
21646 if (compression_algorithm == PAGE_LZO_ALGORITHM) {
21647 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21648 HA_ERR_UNSUPPORTED,
21649 "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21650 "InnoDB: liblzo is not installed. \n",
21651 compression_algorithm);
21652 DBUG_RETURN(1);
21653 }
21654 #endif
21655
21656 #ifndef HAVE_LZMA
21657 if (compression_algorithm == PAGE_LZMA_ALGORITHM) {
21658 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21659 HA_ERR_UNSUPPORTED,
21660 "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21661 "InnoDB: liblzma is not installed. \n",
21662 compression_algorithm);
21663 DBUG_RETURN(1);
21664 }
21665 #endif
21666
21667 #ifndef HAVE_BZIP2
21668 if (compression_algorithm == PAGE_BZIP2_ALGORITHM) {
21669 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21670 HA_ERR_UNSUPPORTED,
21671 "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21672 "InnoDB: libbz2 is not installed. \n",
21673 compression_algorithm);
21674 DBUG_RETURN(1);
21675 }
21676 #endif
21677
21678 #ifndef HAVE_SNAPPY
21679 if (compression_algorithm == PAGE_SNAPPY_ALGORITHM) {
21680 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21681 HA_ERR_UNSUPPORTED,
21682 "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21683 "InnoDB: libsnappy is not installed. \n",
21684 compression_algorithm);
21685 DBUG_RETURN(1);
21686 }
21687 #endif
21688 DBUG_RETURN(0);
21689 }
21690
21691 static
21692 int
innodb_encrypt_tables_validate(THD * thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)21693 innodb_encrypt_tables_validate(
21694 /*=================================*/
21695 THD* thd, /*!< in: thread handle */
21696 struct st_mysql_sys_var* var, /*!< in: pointer to system
21697 variable */
21698 void* save, /*!< out: immediate result
21699 for update function */
21700 struct st_mysql_value* value) /*!< in: incoming string */
21701 {
21702 if (check_sysvar_enum(thd, var, save, value)) {
21703 return 1;
21704 }
21705
21706 ulong encrypt_tables = *(ulong*)save;
21707
21708 if (encrypt_tables
21709 && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) {
21710 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21711 HA_ERR_UNSUPPORTED,
21712 "InnoDB: cannot enable encryption, "
21713 "encryption plugin is not available");
21714 return 1;
21715 }
21716
21717 return 0;
21718 }
21719
innodb_remember_check_sysvar_funcs()21720 static void innodb_remember_check_sysvar_funcs()
21721 {
21722 /* remember build-in sysvar check functions */
21723 ut_ad((MYSQL_SYSVAR_NAME(checksum_algorithm).flags & 0x1FF) == PLUGIN_VAR_ENUM);
21724 check_sysvar_enum = MYSQL_SYSVAR_NAME(checksum_algorithm).check;
21725
21726 ut_ad((MYSQL_SYSVAR_NAME(flush_log_at_timeout).flags & 15) == PLUGIN_VAR_INT);
21727 check_sysvar_int = MYSQL_SYSVAR_NAME(flush_log_at_timeout).check;
21728 }
21729
21730 /********************************************************************//**
21731 Helper function to push warnings from InnoDB internals to SQL-layer. */
21732 UNIV_INTERN
21733 void
ib_push_warning(trx_t * trx,dberr_t error,const char * format,...)21734 ib_push_warning(
21735 trx_t* trx, /*!< in: trx */
21736 dberr_t error, /*!< in: error code to push as warning */
21737 const char *format,/*!< in: warning message */
21738 ...)
21739 {
21740 if (trx && trx->mysql_thd) {
21741 THD *thd = (THD *)trx->mysql_thd;
21742 va_list args;
21743 char *buf;
21744 #define MAX_BUF_SIZE 4*1024
21745
21746 va_start(args, format);
21747 buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME));
21748 buf[MAX_BUF_SIZE - 1] = 0;
21749 vsnprintf(buf, MAX_BUF_SIZE - 1, format, args);
21750 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21751 uint(convert_error_code_to_mysql(error, 0,
21752 thd)),
21753 buf);
21754 my_free(buf);
21755 va_end(args);
21756 }
21757 }
21758
21759 /********************************************************************//**
21760 Helper function to push warnings from InnoDB internals to SQL-layer. */
21761 UNIV_INTERN
21762 void
ib_push_warning(void * ithd,dberr_t error,const char * format,...)21763 ib_push_warning(
21764 void* ithd, /*!< in: thd */
21765 dberr_t error, /*!< in: error code to push as warning */
21766 const char *format,/*!< in: warning message */
21767 ...)
21768 {
21769 va_list args;
21770 THD *thd = (THD *)ithd;
21771 char *buf;
21772 #define MAX_BUF_SIZE 4*1024
21773
21774 if (ithd == NULL) {
21775 thd = current_thd;
21776 }
21777
21778 if (thd) {
21779 va_start(args, format);
21780 buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME));
21781 buf[MAX_BUF_SIZE - 1] = 0;
21782 vsnprintf(buf, MAX_BUF_SIZE - 1, format, args);
21783
21784 push_warning_printf(
21785 thd, Sql_condition::WARN_LEVEL_WARN,
21786 uint(convert_error_code_to_mysql(error, 0, thd)), buf);
21787 my_free(buf);
21788 va_end(args);
21789 }
21790 }
21791
21792 /********************************************************************//**
21793 Helper function to push frm mismatch error to error log and
21794 if needed to sql-layer. */
21795 UNIV_INTERN
21796 void
ib_push_frm_error(THD * thd,dict_table_t * ib_table,TABLE * table,ulint n_keys,bool push_warning)21797 ib_push_frm_error(
21798 /*==============*/
21799 THD* thd, /*!< in: MySQL thd */
21800 dict_table_t* ib_table, /*!< in: InnoDB table */
21801 TABLE* table, /*!< in: MySQL table */
21802 ulint n_keys, /*!< in: InnoDB #keys */
21803 bool push_warning) /*!< in: print warning ? */
21804 {
21805 switch (ib_table->dict_frm_mismatch) {
21806 case DICT_FRM_NO_PK:
21807 sql_print_error("Table %s has a primary key in "
21808 "InnoDB data dictionary, but not "
21809 "in MariaDB!"
21810 " Have you mixed up "
21811 ".frm files from different "
21812 "installations? See "
21813 "https://mariadb.com/kb/en/innodb-troubleshooting/\n",
21814 ib_table->name.m_name);
21815
21816 if (push_warning) {
21817 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21818 ER_NO_SUCH_INDEX,
21819 "InnoDB: Table %s has a "
21820 "primary key in InnoDB data "
21821 "dictionary, but not in "
21822 "MariaDB!", ib_table->name.m_name);
21823 }
21824 break;
21825 case DICT_NO_PK_FRM_HAS:
21826 sql_print_error(
21827 "Table %s has no primary key in InnoDB data "
21828 "dictionary, but has one in MariaDB! If you "
21829 "created the table with a MariaDB version < "
21830 "3.23.54 and did not define a primary key, "
21831 "but defined a unique key with all non-NULL "
21832 "columns, then MariaDB internally treats that "
21833 "key as the primary key. You can fix this "
21834 "error by dump + DROP + CREATE + reimport "
21835 "of the table.", ib_table->name.m_name);
21836
21837 if (push_warning) {
21838 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21839 ER_NO_SUCH_INDEX,
21840 "InnoDB: Table %s has no "
21841 "primary key in InnoDB data "
21842 "dictionary, but has one in "
21843 "MariaDB!",
21844 ib_table->name.m_name);
21845 }
21846 break;
21847
21848 case DICT_FRM_INCONSISTENT_KEYS:
21849 sql_print_error("InnoDB: Table %s contains " ULINTPF " "
21850 "indexes inside InnoDB, which "
21851 "is different from the number of "
21852 "indexes %u defined in the MariaDB "
21853 " Have you mixed up "
21854 ".frm files from different "
21855 "installations? See "
21856 "https://mariadb.com/kb/en/innodb-troubleshooting/\n",
21857 ib_table->name.m_name, n_keys,
21858 table->s->keys);
21859
21860 if (push_warning) {
21861 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21862 ER_NO_SUCH_INDEX,
21863 "InnoDB: Table %s contains " ULINTPF " "
21864 "indexes inside InnoDB, which "
21865 "is different from the number of "
21866 "indexes %u defined in the MariaDB ",
21867 ib_table->name.m_name, n_keys,
21868 table->s->keys);
21869 }
21870 break;
21871
21872 case DICT_FRM_CONSISTENT:
21873 default:
21874 sql_print_error("InnoDB: Table %s is consistent "
21875 "on InnoDB data dictionary and MariaDB "
21876 " FRM file.",
21877 ib_table->name.m_name);
21878 ut_error;
21879 break;
21880 }
21881 }
21882
21883 /** Writes 8 bytes to nth tuple field
21884 @param[in] tuple where to write
21885 @param[in] nth index in tuple
21886 @param[in] data what to write
21887 @param[in] buf field data buffer */
set_tuple_col_8(dtuple_t * tuple,int col,uint64_t data,byte * buf)21888 static void set_tuple_col_8(dtuple_t *tuple, int col, uint64_t data, byte *buf)
21889 {
21890 dfield_t *dfield= dtuple_get_nth_field(tuple, col);
21891 ut_ad(dfield->type.len == 8);
21892 if (dfield->len == UNIV_SQL_NULL)
21893 {
21894 dfield_set_data(dfield, buf, 8);
21895 }
21896 ut_ad(dfield->len == dfield->type.len && dfield->data);
21897 mach_write_to_8(dfield->data, data);
21898 }
21899
vers_update_end(row_prebuilt_t * prebuilt,bool history_row)21900 void ins_node_t::vers_update_end(row_prebuilt_t *prebuilt, bool history_row)
21901 {
21902 ut_ad(prebuilt->ins_node == this);
21903 trx_t *trx= prebuilt->trx;
21904 #ifndef DBUG_OFF
21905 ut_ad(table->vers_start != table->vers_end);
21906 const mysql_row_templ_t *t= prebuilt->get_template_by_col(table->vers_end);
21907 ut_ad(t);
21908 ut_ad(t->mysql_col_len == 8);
21909 #endif
21910
21911 if (history_row)
21912 {
21913 set_tuple_col_8(row, table->vers_end, trx->id, vers_end_buf);
21914 }
21915 else /* ROW_INS_VERSIONED */
21916 {
21917 set_tuple_col_8(row, table->vers_end, TRX_ID_MAX, vers_end_buf);
21918 #ifndef DBUG_OFF
21919 t= prebuilt->get_template_by_col(table->vers_start);
21920 ut_ad(t);
21921 ut_ad(t->mysql_col_len == 8);
21922 #endif
21923 set_tuple_col_8(row, table->vers_start, trx->id, vers_start_buf);
21924 }
21925 dict_index_t *clust_index= dict_table_get_first_index(table);
21926 THD *thd= trx->mysql_thd;
21927 TABLE *mysql_table= prebuilt->m_mysql_table;
21928 mem_heap_t *local_heap= NULL;
21929 for (ulint col_no= 0; col_no < dict_table_get_n_v_cols(table); col_no++)
21930 {
21931 const dict_v_col_t *v_col= dict_table_get_nth_v_col(table, col_no);
21932 for (ulint i= 0; i < unsigned(v_col->num_base); i++)
21933 if (v_col->base_col[i]->ind == table->vers_end)
21934 innobase_get_computed_value(row, v_col, clust_index, &local_heap,
21935 table->heap, NULL, thd, mysql_table,
21936 mysql_table->record[0], NULL, NULL);
21937 }
21938 if (UNIV_LIKELY_NULL(local_heap))
21939 mem_heap_free(local_heap);
21940 }
21941
21942 /** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
21943 if needed.
21944 @param[in] size size in bytes
21945 @return aligned size */
21946 ulint
buf_pool_size_align(ulint size)21947 buf_pool_size_align(
21948 ulint size)
21949 {
21950 const ib_uint64_t m = ((ib_uint64_t)srv_buf_pool_instances) * srv_buf_pool_chunk_unit;
21951 size = ut_max((size_t) size, (size_t) MYSQL_SYSVAR_NAME(buffer_pool_size).min_val);
21952
21953 if (size % m == 0) {
21954 return(size);
21955 } else {
21956 return (ulint)((size / m + 1) * m);
21957 }
21958 }
21959