1 /*****************************************************************************
2
3 Copyright (c) 2000, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2008, 2009 Google Inc.
5 Copyright (c) 2009, Percona Inc.
6 Copyright (c) 2012, Facebook Inc.
7 Copyright (c) 2013, 2022, MariaDB Corporation.
8
9 Portions of this file contain modifications contributed and copyrighted by
10 Google, Inc. Those modifications are gratefully acknowledged and are described
11 briefly in the InnoDB documentation. The contributions by Google are
12 incorporated with their permission, and subject to the conditions contained in
13 the file COPYING.Google.
14
15 Portions of this file contain modifications contributed and copyrighted
16 by Percona Inc.. Those modifications are
17 gratefully acknowledged and are described briefly in the InnoDB
18 documentation. The contributions by Percona Inc. are incorporated with
19 their permission, and subject to the conditions contained in the file
20 COPYING.Percona.
21
22 This program is free software; you can redistribute it and/or modify it under
23 the terms of the GNU General Public License as published by the Free Software
24 Foundation; version 2 of the License.
25
26 This program is distributed in the hope that it will be useful, but WITHOUT
27 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
28 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
29
30 You should have received a copy of the GNU General Public License along with
31 this program; if not, write to the Free Software Foundation, Inc.,
32 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
33
34 *****************************************************************************/
35
36 /** @file ha_innodb.cc */
37
38 #include "univ.i"
39
40 /* Include necessary SQL headers */
41 #include "ha_prototypes.h"
42 #include <debug_sync.h>
43 #include <gstream.h>
44 #include <log.h>
45 #include <mysys_err.h>
46 #include <innodb_priv.h>
47 #include <strfunc.h>
48 #include <sql_acl.h>
49 #include <sql_class.h>
50 #include <sql_show.h>
51 #include <sql_table.h>
52 #include <table_cache.h>
53 #include <my_check_opt.h>
54 #include <my_bitmap.h>
55 #include <mysql/service_thd_alloc.h>
56 #include <mysql/service_thd_wait.h>
57 #include "field.h"
58 #include "scope.h"
59
60 // MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system;
61 // MYSQL_PLUGIN_IMPORT extern char mysql_unpacked_real_data_home[];
62
63 #include <my_service_manager.h>
64 #include <key.h>
65
66 /* Include necessary InnoDB headers */
67 #include "btr0btr.h"
68 #include "btr0cur.h"
69 #include "btr0bulk.h"
70 #include "btr0sea.h"
71 #include "buf0dblwr.h"
72 #include "buf0dump.h"
73 #include "buf0flu.h"
74 #include "buf0lru.h"
75 #include "dict0boot.h"
76 #include "dict0load.h"
77 #include "btr0defragment.h"
78 #include "dict0crea.h"
79 #include "dict0dict.h"
80 #include "dict0stats.h"
81 #include "dict0stats_bg.h"
82 #include "fil0fil.h"
83 #include "fsp0fsp.h"
84 #include "fts0fts.h"
85 #include "fts0plugin.h"
86 #include "fts0priv.h"
87 #include "fts0types.h"
88 #include "ibuf0ibuf.h"
89 #include "lock0lock.h"
90 #include "log0crypt.h"
91 #include "mtr0mtr.h"
92 #include "os0file.h"
93 #include "page0zip.h"
94 #include "pars0pars.h"
95 #include "rem0types.h"
96 #include "row0import.h"
97 #include "row0ins.h"
98 #include "row0merge.h"
99 #include "row0mysql.h"
100 #include "row0quiesce.h"
101 #include "row0sel.h"
102 #include "row0upd.h"
103 #include "fil0crypt.h"
104 #include "srv0mon.h"
105 #include "srv0srv.h"
106 #include "srv0start.h"
107 #include "rem0rec.h"
108 #ifdef UNIV_DEBUG
109 #include "trx0purge.h"
110 #endif /* UNIV_DEBUG */
111 #include "trx0roll.h"
112 #include "trx0rseg.h"
113 #include "trx0trx.h"
114 #include "fil0pagecompress.h"
115 #include "ut0mem.h"
116 #include "row0ext.h"
117
118 #define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X))
119
120 extern "C" void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
121 unsigned long long thd_get_query_id(const MYSQL_THD thd);
122 void thd_clear_error(MYSQL_THD thd);
123
124 TABLE *find_fk_open_table(THD *thd, const char *db, size_t db_len,
125 const char *table, size_t table_len);
126 MYSQL_THD create_thd();
127 void destroy_thd(MYSQL_THD thd);
128 void reset_thd(MYSQL_THD thd);
129 TABLE *open_purge_table(THD *thd, const char *db, size_t dblen,
130 const char *tb, size_t tblen);
131 void close_thread_tables(THD* thd);
132
133 #ifdef MYSQL_DYNAMIC_PLUGIN
134 #define tc_size 400
135 #define tdc_size 400
136 #endif
137
138 #include <mysql/plugin.h>
139 #include <mysql/service_wsrep.h>
140
141 #include "ha_innodb.h"
142 #include "i_s.h"
143 #include "sync0sync.h"
144
145 #include <string>
146 #include <sstream>
147
148 #ifdef WITH_WSREP
149 #include "dict0priv.h"
150 #include <mysql/service_md5.h>
151 #include "wsrep_sst.h"
152 #endif /* WITH_WSREP */
153
154 /** to force correct commit order in binlog */
155 static ulong commit_threads = 0;
156 static mysql_cond_t commit_cond;
157 static mysql_mutex_t commit_cond_m;
158 static mysql_mutex_t pending_checkpoint_mutex;
159
160 #define INSIDE_HA_INNOBASE_CC
161
162 #define EQ_CURRENT_THD(thd) ((thd) == current_thd)
163
164 struct handlerton* innodb_hton_ptr;
165
166 static const long AUTOINC_OLD_STYLE_LOCKING = 0;
167 static const long AUTOINC_NEW_STYLE_LOCKING = 1;
168 static const long AUTOINC_NO_LOCKING = 2;
169
170 static ulong innobase_open_files;
171 static long innobase_autoinc_lock_mode;
172 static ulong innobase_commit_concurrency;
173
174 static ulonglong innobase_buffer_pool_size;
175
176 /** Percentage of the buffer pool to reserve for 'old' blocks.
177 Connected to buf_LRU_old_ratio. */
178 static uint innobase_old_blocks_pct;
179
180 static char* innobase_data_file_path;
181 static char* innobase_temp_data_file_path;
182
183 /* The default values for the following char* start-up parameters
184 are determined in innodb_init_params(). */
185
186 static char* innobase_data_home_dir;
187 static char* innobase_enable_monitor_counter;
188 static char* innobase_disable_monitor_counter;
189 static char* innobase_reset_monitor_counter;
190 static char* innobase_reset_all_monitor_counter;
191
192 static ulong innodb_flush_method;
193
194 /** Deprecated; no effect other than issuing a deprecation warning. */
195 static char* innodb_file_format;
196 /** Deprecated; no effect other than issuing a deprecation warning. */
197 static char* innodb_large_prefix;
198
199 /* This variable can be set in the server configure file, specifying
200 stopword table to be used */
201 static char* innobase_server_stopword_table;
202
203 static my_bool innobase_use_checksums;
204 static my_bool innobase_locks_unsafe_for_binlog;
205 static my_bool innobase_rollback_on_timeout;
206 static my_bool innobase_create_status_file;
207 my_bool innobase_stats_on_metadata;
208 static my_bool innodb_optimize_fulltext_only;
209
210 static char* innodb_version_str = (char*) INNODB_VERSION_STR;
211
212 extern uint srv_fil_crypt_rotate_key_age;
213 extern uint srv_n_fil_crypt_iops;
214
215 extern my_bool srv_immediate_scrub_data_uncompressed;
216 extern my_bool srv_background_scrub_data_uncompressed;
217 extern my_bool srv_background_scrub_data_compressed;
218 extern uint srv_background_scrub_data_interval;
219 extern uint srv_background_scrub_data_check_interval;
220 #ifdef UNIV_DEBUG
221 my_bool innodb_evict_tables_on_commit_debug;
222 extern my_bool srv_scrub_force_testing;
223 #endif
224
225 /** File format constraint for ALTER TABLE */
226 ulong innodb_instant_alter_column_allowed;
227
228 /** Note we cannot use rec_format_enum because we do not allow
229 COMPRESSED row format for innodb_default_row_format option. */
230 enum default_row_format_enum {
231 DEFAULT_ROW_FORMAT_REDUNDANT = 0,
232 DEFAULT_ROW_FORMAT_COMPACT = 1,
233 DEFAULT_ROW_FORMAT_DYNAMIC = 2,
234 };
235
236 /** A dummy variable */
237 static uint innodb_max_purge_lag_wait;
238
239 /** Wait for trx_sys_t::rseg_history_len to be below a limit. */
innodb_max_purge_lag_wait_update(THD * thd,st_mysql_sys_var *,void *,const void * limit)240 static void innodb_max_purge_lag_wait_update(THD *thd, st_mysql_sys_var *,
241 void *, const void *limit)
242 {
243 const uint l= *static_cast<const uint*>(limit);
244 if (trx_sys.rseg_history_len <= l)
245 return;
246 mysql_mutex_unlock(&LOCK_global_system_variables);
247 while (trx_sys.rseg_history_len > l)
248 {
249 if (thd_kill_level(thd))
250 break;
251 srv_wake_purge_thread_if_not_active();
252 os_thread_sleep(100000);
253 }
254 mysql_mutex_lock(&LOCK_global_system_variables);
255 }
256
257 static
set_my_errno(int err)258 void set_my_errno(int err)
259 {
260 errno = err;
261 }
262
263 /** Checks whether the file name belongs to a partition of a table.
264 @param[in] file_name file name
265 @return pointer to the end of the table name part of the file name, or NULL */
266 static
267 char*
is_partition(char * file_name)268 is_partition(
269 /*=========*/
270 char* file_name)
271 {
272 /* We look for pattern #P# to see if the table is partitioned
273 MariaDB table. */
274 return strstr(file_name, table_name_t::part_suffix);
275 }
276
277 /** Signal to shut down InnoDB (NULL if shutdown was signaled, or if
278 running in innodb_read_only mode, srv_read_only_mode) */
279 std::atomic <st_my_thread_var *> srv_running;
280 /** Service thread that waits for the server shutdown and stops purge threads.
281 Purge workers have THDs that are needed to calculate virtual columns.
282 This THDs must be destroyed rather early in the server shutdown sequence.
283 This service thread creates a THD and idly waits for it to get a signal to
284 die. Then it notifies all purge workers to shutdown.
285 */
286 static pthread_t thd_destructor_thread;
287
288 pthread_handler_t
thd_destructor_proxy(void *)289 thd_destructor_proxy(void *)
290 {
291 mysql_mutex_t thd_destructor_mutex;
292 mysql_cond_t thd_destructor_cond;
293
294 my_thread_init();
295 mysql_mutex_init(PSI_NOT_INSTRUMENTED, &thd_destructor_mutex, 0);
296 mysql_cond_init(PSI_NOT_INSTRUMENTED, &thd_destructor_cond, 0);
297
298 st_my_thread_var *myvar= _my_thread_var();
299 myvar->current_mutex = &thd_destructor_mutex;
300 myvar->current_cond = &thd_destructor_cond;
301
302 THD *thd= create_thd();
303 thd_proc_info(thd, "InnoDB shutdown handler");
304
305
306 mysql_mutex_lock(&thd_destructor_mutex);
307 srv_running.store(myvar, std::memory_order_relaxed);
308 /* wait until the server wakes the THD to abort and die */
309 while (!myvar->abort)
310 mysql_cond_wait(&thd_destructor_cond, &thd_destructor_mutex);
311 mysql_mutex_unlock(&thd_destructor_mutex);
312 srv_running.store(NULL, std::memory_order_relaxed);
313
314 while (srv_fast_shutdown == 0 &&
315 (trx_sys.any_active_transactions() ||
316 THD_count::value() > srv_n_purge_threads + 1)) {
317 thd_proc_info(thd, "InnoDB slow shutdown wait");
318 os_thread_sleep(1000);
319 }
320
321 /* Some background threads might generate undo pages that will
322 need to be purged, so they have to be shut down before purge
323 threads if slow shutdown is requested. */
324 srv_shutdown_bg_undo_sources();
325 srv_purge_shutdown();
326
327 destroy_thd(thd);
328 mysql_cond_destroy(&thd_destructor_cond);
329 mysql_mutex_destroy(&thd_destructor_mutex);
330 my_thread_end();
331 return 0;
332 }
333
334 /** Return the InnoDB ROW_FORMAT enum value
335 @param[in] row_format row_format from "innodb_default_row_format"
336 @return InnoDB ROW_FORMAT value from rec_format_t enum. */
337 static
338 rec_format_t
get_row_format(ulong row_format)339 get_row_format(
340 ulong row_format)
341 {
342 switch(row_format) {
343 case DEFAULT_ROW_FORMAT_REDUNDANT:
344 return(REC_FORMAT_REDUNDANT);
345 case DEFAULT_ROW_FORMAT_COMPACT:
346 return(REC_FORMAT_COMPACT);
347 case DEFAULT_ROW_FORMAT_DYNAMIC:
348 return(REC_FORMAT_DYNAMIC);
349 default:
350 ut_ad(0);
351 return(REC_FORMAT_DYNAMIC);
352 }
353 }
354
355 static ulong innodb_default_row_format = DEFAULT_ROW_FORMAT_DYNAMIC;
356
357 /** Possible values for system variable "innodb_stats_method". The values
358 are defined the same as its corresponding MyISAM system variable
359 "myisam_stats_method"(see "myisam_stats_method_names"), for better usability */
360 static const char* innodb_stats_method_names[] = {
361 "nulls_equal",
362 "nulls_unequal",
363 "nulls_ignored",
364 NullS
365 };
366
367 /** Used to define an enumerate type of the system variable innodb_stats_method.
368 This is the same as "myisam_stats_method_typelib" */
369 static TYPELIB innodb_stats_method_typelib = {
370 array_elements(innodb_stats_method_names) - 1,
371 "innodb_stats_method_typelib",
372 innodb_stats_method_names,
373 NULL
374 };
375
376 /** Possible values of the parameter innodb_checksum_algorithm */
377 const char* innodb_checksum_algorithm_names[] = {
378 "crc32",
379 "strict_crc32",
380 "innodb",
381 "strict_innodb",
382 "none",
383 "strict_none",
384 "full_crc32",
385 "strict_full_crc32",
386 NullS
387 };
388
389 /** Used to define an enumerate type of the system variable
390 innodb_checksum_algorithm. */
391 TYPELIB innodb_checksum_algorithm_typelib = {
392 array_elements(innodb_checksum_algorithm_names) - 1,
393 "innodb_checksum_algorithm_typelib",
394 innodb_checksum_algorithm_names,
395 NULL
396 };
397
398 /** Possible values for system variable "innodb_default_row_format". */
399 static const char* innodb_default_row_format_names[] = {
400 "redundant",
401 "compact",
402 "dynamic",
403 NullS
404 };
405
406 /** Used to define an enumerate type of the system variable
407 innodb_default_row_format. */
408 static TYPELIB innodb_default_row_format_typelib = {
409 array_elements(innodb_default_row_format_names) - 1,
410 "innodb_default_row_format_typelib",
411 innodb_default_row_format_names,
412 NULL
413 };
414
415 /** Possible values of the parameter innodb_lock_schedule_algorithm */
416 static const char* innodb_lock_schedule_algorithm_names[] = {
417 "fcfs",
418 "vats",
419 NullS
420 };
421
422 /** Used to define an enumerate type of the system variable
423 innodb_lock_schedule_algorithm. */
424 static TYPELIB innodb_lock_schedule_algorithm_typelib = {
425 array_elements(innodb_lock_schedule_algorithm_names) - 1,
426 "innodb_lock_schedule_algorithm_typelib",
427 innodb_lock_schedule_algorithm_names,
428 NULL
429 };
430
431 /** Names of allowed values of innodb_flush_method */
432 const char* innodb_flush_method_names[] = {
433 "fsync",
434 "O_DSYNC",
435 "littlesync",
436 "nosync",
437 "O_DIRECT",
438 "O_DIRECT_NO_FSYNC",
439 #ifdef _WIN32
440 "unbuffered",
441 "async_unbuffered" /* alias for "unbuffered" */,
442 "normal" /* alias for "fsync" */,
443 #endif
444 NullS
445 };
446
447 /** Enumeration of innodb_flush_method */
448 TYPELIB innodb_flush_method_typelib = {
449 array_elements(innodb_flush_method_names) - 1,
450 "innodb_flush_method_typelib",
451 innodb_flush_method_names,
452 NULL
453 };
454
455 /* The following counter is used to convey information to InnoDB
456 about server activity: in case of normal DML ops it is not
457 sensible to call srv_active_wake_master_thread after each
458 operation, we only do it every INNOBASE_WAKE_INTERVAL'th step. */
459
460 #define INNOBASE_WAKE_INTERVAL 32
461 static ulong innobase_active_counter = 0;
462
463 /** Allowed values of innodb_change_buffering */
464 static const char* innodb_change_buffering_names[] = {
465 "none", /* IBUF_USE_NONE */
466 "inserts", /* IBUF_USE_INSERT */
467 "deletes", /* IBUF_USE_DELETE_MARK */
468 "changes", /* IBUF_USE_INSERT_DELETE_MARK */
469 "purges", /* IBUF_USE_DELETE */
470 "all", /* IBUF_USE_ALL */
471 NullS
472 };
473
474 /** Enumeration of innodb_change_buffering */
475 static TYPELIB innodb_change_buffering_typelib = {
476 array_elements(innodb_change_buffering_names) - 1,
477 "innodb_change_buffering_typelib",
478 innodb_change_buffering_names,
479 NULL
480 };
481
482 /** Allowed values of innodb_instant_alter_column_allowed */
483 const char* innodb_instant_alter_column_allowed_names[] = {
484 "never", /* compatible with MariaDB 5.5 to 10.2 */
485 "add_last",/* allow instant ADD COLUMN ... LAST */
486 "add_drop_reorder", /* allow instant ADD anywhere & DROP & reorder */
487 NullS
488 };
489
490 /** Enumeration of innodb_instant_alter_column_allowed */
491 static TYPELIB innodb_instant_alter_column_allowed_typelib = {
492 array_elements(innodb_instant_alter_column_allowed_names) - 1,
493 "innodb_instant_alter_column_allowed_typelib",
494 innodb_instant_alter_column_allowed_names,
495 NULL
496 };
497
498 /** Retrieve the FTS Relevance Ranking result for doc with doc_id
499 of m_prebuilt->fts_doc_id
500 @param[in,out] fts_hdl FTS handler
501 @return the relevance ranking value */
502 static
503 float
504 innobase_fts_retrieve_ranking(
505 FT_INFO* fts_hdl);
506 /** Free the memory for the FTS handler
507 @param[in,out] fts_hdl FTS handler */
508 static
509 void
510 innobase_fts_close_ranking(
511 FT_INFO* fts_hdl);
512 /** Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
513 of m_prebuilt->fts_doc_id
514 @param[in,out] fts_hdl FTS handler
515 @return the relevance ranking value */
516 static
517 float
518 innobase_fts_find_ranking(
519 FT_INFO* fts_hdl,
520 uchar*,
521 uint);
522
523 /* Call back function array defined by MySQL and used to
524 retrieve FTS results. */
525 const struct _ft_vft ft_vft_result = {NULL,
526 innobase_fts_find_ranking,
527 innobase_fts_close_ranking,
528 innobase_fts_retrieve_ranking,
529 NULL};
530
531 /** @return version of the extended FTS API */
532 static
533 uint
innobase_fts_get_version()534 innobase_fts_get_version()
535 {
536 /* Currently this doesn't make much sense as returning
537 HA_CAN_FULLTEXT_EXT automatically mean this version is supported.
538 This supposed to ease future extensions. */
539 return(2);
540 }
541
542 /** @return Which part of the extended FTS API is supported */
543 static
544 ulonglong
innobase_fts_flags()545 innobase_fts_flags()
546 {
547 return(FTS_ORDERED_RESULT | FTS_DOCID_IN_RESULT);
548 }
549
550 /** Find and Retrieve the FTS doc_id for the current result row
551 @param[in,out] fts_hdl FTS handler
552 @return the document ID */
553 static
554 ulonglong
555 innobase_fts_retrieve_docid(
556 FT_INFO_EXT* fts_hdl);
557
558 /** Find and retrieve the size of the current result
559 @param[in,out] fts_hdl FTS handler
560 @return number of matching rows */
561 static
562 ulonglong
innobase_fts_count_matches(FT_INFO_EXT * fts_hdl)563 innobase_fts_count_matches(
564 FT_INFO_EXT* fts_hdl) /*!< in: FTS handler */
565 {
566 NEW_FT_INFO* handle = reinterpret_cast<NEW_FT_INFO*>(fts_hdl);
567
568 if (handle->ft_result->rankings_by_id != NULL) {
569 return(rbt_size(handle->ft_result->rankings_by_id));
570 } else {
571 return(0);
572 }
573 }
574
575 const struct _ft_vft_ext ft_vft_ext_result = {innobase_fts_get_version,
576 innobase_fts_flags,
577 innobase_fts_retrieve_docid,
578 innobase_fts_count_matches};
579
580 #ifdef HAVE_PSI_INTERFACE
581 # define PSI_KEY(n) {&n##_key, #n, 0}
582 /* All RWLOCK used in Innodb are SX-locks */
583 # define PSI_RWLOCK_KEY(n) {&n##_key, #n, PSI_RWLOCK_FLAG_SX}
584
585 /* Keys to register pthread mutexes/cond in the current file with
586 performance schema */
587 static mysql_pfs_key_t commit_cond_mutex_key;
588 static mysql_pfs_key_t commit_cond_key;
589 static mysql_pfs_key_t pending_checkpoint_mutex_key;
590 static mysql_pfs_key_t thd_destructor_thread_key;
591
592 static PSI_mutex_info all_pthread_mutexes[] = {
593 PSI_KEY(commit_cond_mutex),
594 PSI_KEY(pending_checkpoint_mutex),
595 };
596
597 static PSI_cond_info all_innodb_conds[] = {
598 PSI_KEY(commit_cond)
599 };
600
601 # ifdef UNIV_PFS_MUTEX
602 /* all_innodb_mutexes array contains mutexes that are
603 performance schema instrumented if "UNIV_PFS_MUTEX"
604 is defined */
605 static PSI_mutex_info all_innodb_mutexes[] = {
606 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
607 PSI_KEY(buffer_block_mutex),
608 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
609 PSI_KEY(buf_pool_mutex),
610 PSI_KEY(buf_pool_zip_mutex),
611 PSI_KEY(dict_foreign_err_mutex),
612 PSI_KEY(dict_sys_mutex),
613 PSI_KEY(recalc_pool_mutex),
614 PSI_KEY(fil_system_mutex),
615 PSI_KEY(flush_list_mutex),
616 PSI_KEY(fts_delete_mutex),
617 PSI_KEY(fts_doc_id_mutex),
618 PSI_KEY(log_flush_order_mutex),
619 PSI_KEY(hash_table_mutex),
620 PSI_KEY(ibuf_bitmap_mutex),
621 PSI_KEY(ibuf_mutex),
622 PSI_KEY(ibuf_pessimistic_insert_mutex),
623 PSI_KEY(index_online_log),
624 PSI_KEY(log_sys_mutex),
625 PSI_KEY(log_sys_write_mutex),
626 PSI_KEY(mutex_list_mutex),
627 PSI_KEY(page_zip_stat_per_index_mutex),
628 PSI_KEY(purge_sys_pq_mutex),
629 PSI_KEY(recv_sys_mutex),
630 PSI_KEY(recv_writer_mutex),
631 PSI_KEY(redo_rseg_mutex),
632 PSI_KEY(noredo_rseg_mutex),
633 # ifdef UNIV_DEBUG
634 PSI_KEY(rw_lock_debug_mutex),
635 # endif /* UNIV_DEBUG */
636 PSI_KEY(rw_lock_list_mutex),
637 PSI_KEY(rw_lock_mutex),
638 PSI_KEY(srv_innodb_monitor_mutex),
639 PSI_KEY(srv_misc_tmpfile_mutex),
640 PSI_KEY(srv_monitor_file_mutex),
641 PSI_KEY(buf_dblwr_mutex),
642 PSI_KEY(trx_pool_mutex),
643 PSI_KEY(trx_pool_manager_mutex),
644 PSI_KEY(srv_sys_mutex),
645 PSI_KEY(lock_mutex),
646 PSI_KEY(lock_wait_mutex),
647 PSI_KEY(trx_mutex),
648 PSI_KEY(srv_threads_mutex),
649 # ifndef PFS_SKIP_EVENT_MUTEX
650 PSI_KEY(event_mutex),
651 # endif /* PFS_SKIP_EVENT_MUTEX */
652 PSI_KEY(rtr_active_mutex),
653 PSI_KEY(rtr_match_mutex),
654 PSI_KEY(rtr_path_mutex),
655 PSI_KEY(trx_sys_mutex),
656 };
657 # endif /* UNIV_PFS_MUTEX */
658
659 # ifdef UNIV_PFS_RWLOCK
660 /* all_innodb_rwlocks array contains rwlocks that are
661 performance schema instrumented if "UNIV_PFS_RWLOCK"
662 is defined */
663 static PSI_rwlock_info all_innodb_rwlocks[] = {
664 PSI_RWLOCK_KEY(btr_search_latch),
665 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
666 PSI_RWLOCK_KEY(buf_block_lock),
667 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
668 # ifdef UNIV_DEBUG
669 PSI_RWLOCK_KEY(buf_block_debug_latch),
670 # endif /* UNIV_DEBUG */
671 PSI_RWLOCK_KEY(dict_operation_lock),
672 PSI_RWLOCK_KEY(fil_space_latch),
673 PSI_RWLOCK_KEY(checkpoint_lock),
674 PSI_RWLOCK_KEY(fts_cache_rw_lock),
675 PSI_RWLOCK_KEY(fts_cache_init_rw_lock),
676 PSI_RWLOCK_KEY(trx_i_s_cache_lock),
677 PSI_RWLOCK_KEY(trx_purge_latch),
678 PSI_RWLOCK_KEY(index_tree_rw_lock),
679 PSI_RWLOCK_KEY(hash_table_locks)
680 };
681 # endif /* UNIV_PFS_RWLOCK */
682
683 # ifdef UNIV_PFS_THREAD
684 /* all_innodb_threads array contains threads that are
685 performance schema instrumented if "UNIV_PFS_THREAD"
686 is defined */
687 static PSI_thread_info all_innodb_threads[] = {
688 PSI_KEY(buf_dump_thread),
689 PSI_KEY(dict_stats_thread),
690 PSI_KEY(io_handler_thread),
691 PSI_KEY(io_ibuf_thread),
692 PSI_KEY(io_log_thread),
693 PSI_KEY(io_read_thread),
694 PSI_KEY(io_write_thread),
695 PSI_KEY(page_cleaner_thread),
696 PSI_KEY(recv_writer_thread),
697 PSI_KEY(srv_error_monitor_thread),
698 PSI_KEY(srv_lock_timeout_thread),
699 PSI_KEY(srv_master_thread),
700 PSI_KEY(srv_monitor_thread),
701 PSI_KEY(srv_purge_thread),
702 PSI_KEY(srv_worker_thread),
703 PSI_KEY(trx_rollback_clean_thread),
704 PSI_KEY(thd_destructor_thread),
705 };
706 # endif /* UNIV_PFS_THREAD */
707
708 # ifdef UNIV_PFS_IO
709 /* all_innodb_files array contains the type of files that are
710 performance schema instrumented if "UNIV_PFS_IO" is defined */
711 static PSI_file_info all_innodb_files[] = {
712 PSI_KEY(innodb_data_file),
713 PSI_KEY(innodb_log_file),
714 PSI_KEY(innodb_temp_file)
715 };
716 # endif /* UNIV_PFS_IO */
717 #endif /* HAVE_PSI_INTERFACE */
718
719 static void innodb_remember_check_sysvar_funcs();
720 mysql_var_check_func check_sysvar_enum;
721 mysql_var_check_func check_sysvar_int;
722
723 // should page compression be used by default for new tables
724 static MYSQL_THDVAR_BOOL(compression_default, PLUGIN_VAR_OPCMDARG,
725 "Is compression the default for new tables",
726 NULL, NULL, FALSE);
727
728 /** Update callback for SET [SESSION] innodb_default_encryption_key_id */
729 static void
innodb_default_encryption_key_id_update(THD * thd,st_mysql_sys_var * var,void * var_ptr,const void * save)730 innodb_default_encryption_key_id_update(THD* thd, st_mysql_sys_var* var,
731 void* var_ptr, const void *save)
732 {
733 uint key_id = *static_cast<const uint*>(save);
734 if (key_id != FIL_DEFAULT_ENCRYPTION_KEY
735 && !encryption_key_id_exists(key_id)) {
736 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
737 ER_WRONG_ARGUMENTS,
738 "innodb_default_encryption_key=%u"
739 " is not available", key_id);
740 }
741 *static_cast<uint*>(var_ptr) = key_id;
742 }
743
744 static MYSQL_THDVAR_UINT(default_encryption_key_id, PLUGIN_VAR_RQCMDARG,
745 "Default encryption key id used for table encryption.",
746 NULL, innodb_default_encryption_key_id_update,
747 FIL_DEFAULT_ENCRYPTION_KEY, 1, UINT_MAX32, 0);
748
749 /**
750 Structure for CREATE TABLE options (table options).
751 It needs to be called ha_table_option_struct.
752
753 The option values can be specified in the CREATE TABLE at the end:
754 CREATE TABLE ( ... ) *here*
755 */
756
757 ha_create_table_option innodb_table_option_list[]=
758 {
759 /* With this option user can enable page compression feature for the
760 table */
761 HA_TOPTION_SYSVAR("PAGE_COMPRESSED", page_compressed, compression_default),
762 /* With this option user can set zip compression level for page
763 compression for this table*/
764 HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, 0, 1, 9, 1),
765 /* With this option the user can enable encryption for the table */
766 HA_TOPTION_ENUM("ENCRYPTED", encryption, "DEFAULT,YES,NO", 0),
767 /* With this option the user defines the key identifier using for the encryption */
768 HA_TOPTION_SYSVAR("ENCRYPTION_KEY_ID", encryption_key_id, default_encryption_key_id),
769
770 HA_TOPTION_END
771 };
772
773 /*************************************************************//**
774 Check whether valid argument given to innodb_ft_*_stopword_table.
775 This function is registered as a callback with MySQL.
776 @return 0 for valid stopword table */
777 static
778 int
779 innodb_stopword_table_validate(
780 /*===========================*/
781 THD* thd, /*!< in: thread handle */
782 struct st_mysql_sys_var* var, /*!< in: pointer to system
783 variable */
784 void* save, /*!< out: immediate result
785 for update function */
786 struct st_mysql_value* value); /*!< in: incoming string */
787
788 static bool is_mysql_datadir_path(const char *path);
789
790 /** Validate passed-in "value" is a valid directory name.
791 This function is registered as a callback with MySQL.
792 @param[in,out] thd thread handle
793 @param[in] var pointer to system variable
794 @param[out] save immediate result for update
795 @param[in] value incoming string
796 @return 0 for valid name */
797 static
798 int
innodb_tmpdir_validate(THD * thd,struct st_mysql_sys_var *,void * save,struct st_mysql_value * value)799 innodb_tmpdir_validate(
800 THD* thd,
801 struct st_mysql_sys_var*,
802 void* save,
803 struct st_mysql_value* value)
804 {
805
806 char* alter_tmp_dir;
807 char* innodb_tmp_dir;
808 char buff[OS_FILE_MAX_PATH];
809 int len = sizeof(buff);
810 char tmp_abs_path[FN_REFLEN + 2];
811
812 ut_ad(save != NULL);
813 ut_ad(value != NULL);
814
815 if (check_global_access(thd, FILE_ACL)) {
816 push_warning_printf(
817 thd, Sql_condition::WARN_LEVEL_WARN,
818 ER_WRONG_ARGUMENTS,
819 "InnoDB: FILE Permissions required");
820 *static_cast<const char**>(save) = NULL;
821 return(1);
822 }
823
824 alter_tmp_dir = (char*) value->val_str(value, buff, &len);
825
826 if (!alter_tmp_dir) {
827 *static_cast<const char**>(save) = alter_tmp_dir;
828 return(0);
829 }
830
831 if (strlen(alter_tmp_dir) > FN_REFLEN) {
832 push_warning_printf(
833 thd, Sql_condition::WARN_LEVEL_WARN,
834 ER_WRONG_ARGUMENTS,
835 "Path length should not exceed %d bytes", FN_REFLEN);
836 *static_cast<const char**>(save) = NULL;
837 return(1);
838 }
839
840 os_normalize_path(alter_tmp_dir);
841 my_realpath(tmp_abs_path, alter_tmp_dir, 0);
842 size_t tmp_abs_len = strlen(tmp_abs_path);
843
844 if (my_access(tmp_abs_path, F_OK)) {
845
846 push_warning_printf(
847 thd, Sql_condition::WARN_LEVEL_WARN,
848 ER_WRONG_ARGUMENTS,
849 "InnoDB: Path doesn't exist.");
850 *static_cast<const char**>(save) = NULL;
851 return(1);
852 } else if (my_access(tmp_abs_path, R_OK | W_OK)) {
853 push_warning_printf(
854 thd, Sql_condition::WARN_LEVEL_WARN,
855 ER_WRONG_ARGUMENTS,
856 "InnoDB: Server doesn't have permission in "
857 "the given location.");
858 *static_cast<const char**>(save) = NULL;
859 return(1);
860 }
861
862 MY_STAT stat_info_dir;
863
864 if (my_stat(tmp_abs_path, &stat_info_dir, MYF(0))) {
865 if ((stat_info_dir.st_mode & S_IFDIR) != S_IFDIR) {
866
867 push_warning_printf(
868 thd, Sql_condition::WARN_LEVEL_WARN,
869 ER_WRONG_ARGUMENTS,
870 "Given path is not a directory. ");
871 *static_cast<const char**>(save) = NULL;
872 return(1);
873 }
874 }
875
876 if (!is_mysql_datadir_path(tmp_abs_path)) {
877
878 push_warning_printf(
879 thd, Sql_condition::WARN_LEVEL_WARN,
880 ER_WRONG_ARGUMENTS,
881 "InnoDB: Path Location should not be same as "
882 "mysql data directory location.");
883 *static_cast<const char**>(save) = NULL;
884 return(1);
885 }
886
887 innodb_tmp_dir = static_cast<char*>(
888 thd_memdup(thd, tmp_abs_path, tmp_abs_len + 1));
889 *static_cast<const char**>(save) = innodb_tmp_dir;
890 return(0);
891 }
892
893 /******************************************************************//**
894 Maps a MySQL trx isolation level code to the InnoDB isolation level code
895 @return InnoDB isolation level */
896 static inline
897 ulint
898 innobase_map_isolation_level(
899 /*=========================*/
900 enum_tx_isolation iso); /*!< in: MySQL isolation level code */
901
902 /** Gets field offset for a field in a table.
903 @param[in] table MySQL table object
904 @param[in] field MySQL field object (from table->field array)
905 @return offset */
906 static inline
907 uint
get_field_offset(const TABLE * table,const Field * field)908 get_field_offset(
909 const TABLE* table,
910 const Field* field)
911 {
912 return field->offset(table->record[0]);
913 }
914
915
916 /*************************************************************//**
917 Check for a valid value of innobase_compression_algorithm.
918 @return 0 for valid innodb_compression_algorithm. */
919 static
920 int
921 innodb_compression_algorithm_validate(
922 /*==================================*/
923 THD* thd, /*!< in: thread handle */
924 struct st_mysql_sys_var* var, /*!< in: pointer to system
925 variable */
926 void* save, /*!< out: immediate result
927 for update function */
928 struct st_mysql_value* value); /*!< in: incoming string */
929
930 static ibool innodb_have_lzo=IF_LZO(1, 0);
931 static ibool innodb_have_lz4=IF_LZ4(1, 0);
932 static ibool innodb_have_lzma=IF_LZMA(1, 0);
933 static ibool innodb_have_bzip2=IF_BZIP2(1, 0);
934 static ibool innodb_have_snappy=IF_SNAPPY(1, 0);
935 static ibool innodb_have_punch_hole=IF_PUNCH_HOLE(1, 0);
936
937 static
938 int
939 innodb_encrypt_tables_validate(
940 /*==================================*/
941 THD* thd, /*!< in: thread handle */
942 struct st_mysql_sys_var* var, /*!< in: pointer to system
943 variable */
944 void* save, /*!< out: immediate result
945 for update function */
946 struct st_mysql_value* value); /*!< in: incoming string */
947
948 static const char innobase_hton_name[]= "InnoDB";
949
950 static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
951 "Enable InnoDB locking in LOCK TABLES",
952 /* check_func */ NULL, /* update_func */ NULL,
953 /* default */ TRUE);
954
955 static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG,
956 "Use strict mode when evaluating create options.",
957 NULL, NULL, TRUE);
958
959 static MYSQL_THDVAR_BOOL(ft_enable_stopword, PLUGIN_VAR_OPCMDARG,
960 "Create FTS index with stopword.",
961 NULL, NULL,
962 /* default */ TRUE);
963
964 static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
965 "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
966 NULL, NULL, 50, 0, 1024 * 1024 * 1024, 0);
967
968 static MYSQL_THDVAR_STR(ft_user_stopword_table,
969 PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC,
970 "User supplied stopword table name, effective in the session level.",
971 innodb_stopword_table_validate, NULL, NULL);
972
973 static MYSQL_THDVAR_STR(tmpdir,
974 PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC,
975 "Directory for temporary non-tablespace files.",
976 innodb_tmpdir_validate, NULL, NULL);
977
978 static SHOW_VAR innodb_status_variables[]= {
979 {"buffer_pool_dump_status",
980 (char*) &export_vars.innodb_buffer_pool_dump_status, SHOW_CHAR},
981 {"buffer_pool_load_status",
982 (char*) &export_vars.innodb_buffer_pool_load_status, SHOW_CHAR},
983 {"buffer_pool_resize_status",
984 (char*) &export_vars.innodb_buffer_pool_resize_status, SHOW_CHAR},
985 {"buffer_pool_load_incomplete",
986 &export_vars.innodb_buffer_pool_load_incomplete, SHOW_BOOL},
987 {"buffer_pool_pages_data",
988 (char*) &export_vars.innodb_buffer_pool_pages_data, SHOW_LONG},
989 {"buffer_pool_bytes_data",
990 (char*) &export_vars.innodb_buffer_pool_bytes_data, SHOW_LONG},
991 {"buffer_pool_pages_dirty",
992 (char*) &export_vars.innodb_buffer_pool_pages_dirty, SHOW_LONG},
993 {"buffer_pool_bytes_dirty",
994 (char*) &export_vars.innodb_buffer_pool_bytes_dirty, SHOW_LONG},
995 {"buffer_pool_pages_flushed",
996 (char*) &export_vars.innodb_buffer_pool_pages_flushed, SHOW_LONG},
997 {"buffer_pool_pages_free",
998 (char*) &export_vars.innodb_buffer_pool_pages_free, SHOW_LONG},
999 #ifdef UNIV_DEBUG
1000 {"buffer_pool_pages_latched",
1001 (char*) &export_vars.innodb_buffer_pool_pages_latched, SHOW_LONG},
1002 #endif /* UNIV_DEBUG */
1003 {"buffer_pool_pages_misc",
1004 (char*) &export_vars.innodb_buffer_pool_pages_misc, SHOW_LONG},
1005 {"buffer_pool_pages_total",
1006 (char*) &export_vars.innodb_buffer_pool_pages_total, SHOW_LONG},
1007 {"buffer_pool_read_ahead_rnd",
1008 (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
1009 {"buffer_pool_read_ahead",
1010 (char*) &export_vars.innodb_buffer_pool_read_ahead, SHOW_LONG},
1011 {"buffer_pool_read_ahead_evicted",
1012 (char*) &export_vars.innodb_buffer_pool_read_ahead_evicted, SHOW_LONG},
1013 {"buffer_pool_read_requests",
1014 (char*) &export_vars.innodb_buffer_pool_read_requests, SHOW_LONG},
1015 {"buffer_pool_reads",
1016 (char*) &export_vars.innodb_buffer_pool_reads, SHOW_LONG},
1017 {"buffer_pool_wait_free",
1018 (char*) &export_vars.innodb_buffer_pool_wait_free, SHOW_LONG},
1019 {"buffer_pool_write_requests",
1020 (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
1021 {"data_fsyncs",
1022 (char*) &export_vars.innodb_data_fsyncs, SHOW_LONG},
1023 {"data_pending_fsyncs",
1024 (char*) &export_vars.innodb_data_pending_fsyncs, SHOW_LONG},
1025 {"data_pending_reads",
1026 (char*) &export_vars.innodb_data_pending_reads, SHOW_LONG},
1027 {"data_pending_writes",
1028 (char*) &export_vars.innodb_data_pending_writes, SHOW_LONG},
1029 {"data_read",
1030 (char*) &export_vars.innodb_data_read, SHOW_LONG},
1031 {"data_reads",
1032 (char*) &export_vars.innodb_data_reads, SHOW_LONG},
1033 {"data_writes",
1034 (char*) &export_vars.innodb_data_writes, SHOW_LONG},
1035 {"data_written",
1036 (char*) &export_vars.innodb_data_written, SHOW_LONG},
1037 {"dblwr_pages_written",
1038 (char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG},
1039 {"dblwr_writes",
1040 (char*) &export_vars.innodb_dblwr_writes, SHOW_LONG},
1041 {"log_waits",
1042 (char*) &export_vars.innodb_log_waits, SHOW_LONG},
1043 {"log_write_requests",
1044 (char*) &export_vars.innodb_log_write_requests, SHOW_LONG},
1045 {"log_writes",
1046 (char*) &export_vars.innodb_log_writes, SHOW_LONG},
1047 {"os_log_fsyncs",
1048 (char*) &export_vars.innodb_os_log_fsyncs, SHOW_LONG},
1049 {"os_log_pending_fsyncs",
1050 (char*) &export_vars.innodb_os_log_pending_fsyncs, SHOW_LONG},
1051 {"os_log_pending_writes",
1052 (char*) &export_vars.innodb_os_log_pending_writes, SHOW_LONG},
1053 {"os_log_written",
1054 (char*) &export_vars.innodb_os_log_written, SHOW_LONGLONG},
1055 {"page_size",
1056 (char*) &export_vars.innodb_page_size, SHOW_LONG},
1057 {"pages_created",
1058 (char*) &export_vars.innodb_pages_created, SHOW_LONG},
1059 {"pages_read",
1060 (char*) &export_vars.innodb_pages_read, SHOW_LONG},
1061 {"pages_written",
1062 (char*) &export_vars.innodb_pages_written, SHOW_LONG},
1063 {"row_lock_current_waits",
1064 (char*) &export_vars.innodb_row_lock_current_waits, SHOW_LONG},
1065 {"row_lock_time",
1066 (char*) &export_vars.innodb_row_lock_time, SHOW_LONGLONG},
1067 {"row_lock_time_avg",
1068 (char*) &export_vars.innodb_row_lock_time_avg, SHOW_LONG},
1069 {"row_lock_time_max",
1070 (char*) &export_vars.innodb_row_lock_time_max, SHOW_LONG},
1071 {"row_lock_waits",
1072 (char*) &export_vars.innodb_row_lock_waits, SHOW_LONG},
1073 {"rows_deleted",
1074 (char*) &export_vars.innodb_rows_deleted, SHOW_LONG},
1075 {"rows_inserted",
1076 (char*) &export_vars.innodb_rows_inserted, SHOW_LONG},
1077 {"rows_read",
1078 (char*) &export_vars.innodb_rows_read, SHOW_LONG},
1079 {"rows_updated",
1080 (char*) &export_vars.innodb_rows_updated, SHOW_LONG},
1081 {"system_rows_deleted",
1082 (char*) &export_vars.innodb_system_rows_deleted, SHOW_LONG},
1083 {"system_rows_inserted",
1084 (char*) &export_vars.innodb_system_rows_inserted, SHOW_LONG},
1085 {"system_rows_read",
1086 (char*) &export_vars.innodb_system_rows_read, SHOW_LONG},
1087 {"system_rows_updated",
1088 (char*) &export_vars.innodb_system_rows_updated, SHOW_LONG},
1089 {"num_open_files",
1090 (char*) &export_vars.innodb_num_open_files, SHOW_LONG},
1091 {"truncated_status_writes",
1092 (char*) &export_vars.innodb_truncated_status_writes, SHOW_LONG},
1093 {"available_undo_logs",
1094 (char*) &export_vars.innodb_available_undo_logs, SHOW_LONG},
1095 {"undo_truncations",
1096 (char*) &export_vars.innodb_undo_truncations, SHOW_LONG},
1097
1098 /* Status variables for page compression */
1099 {"page_compression_saved",
1100 (char*) &export_vars.innodb_page_compression_saved, SHOW_LONGLONG},
1101 {"num_index_pages_written",
1102 (char*) &export_vars.innodb_index_pages_written, SHOW_LONGLONG},
1103 {"num_non_index_pages_written",
1104 (char*) &export_vars.innodb_non_index_pages_written, SHOW_LONGLONG},
1105 {"num_pages_page_compressed",
1106 (char*) &export_vars.innodb_pages_page_compressed, SHOW_LONGLONG},
1107 {"num_page_compressed_trim_op",
1108 (char*) &export_vars.innodb_page_compressed_trim_op, SHOW_LONGLONG},
1109 {"num_pages_page_decompressed",
1110 (char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG},
1111 {"num_pages_page_compression_error",
1112 (char*) &export_vars.innodb_pages_page_compression_error, SHOW_LONGLONG},
1113 {"num_pages_encrypted",
1114 (char*) &export_vars.innodb_pages_encrypted, SHOW_LONGLONG},
1115 {"num_pages_decrypted",
1116 (char*) &export_vars.innodb_pages_decrypted, SHOW_LONGLONG},
1117 {"have_lz4",
1118 (char*) &innodb_have_lz4, SHOW_BOOL},
1119 {"have_lzo",
1120 (char*) &innodb_have_lzo, SHOW_BOOL},
1121 {"have_lzma",
1122 (char*) &innodb_have_lzma, SHOW_BOOL},
1123 {"have_bzip2",
1124 (char*) &innodb_have_bzip2, SHOW_BOOL},
1125 {"have_snappy",
1126 (char*) &innodb_have_snappy, SHOW_BOOL},
1127 {"have_punch_hole",
1128 (char*) &innodb_have_punch_hole, SHOW_BOOL},
1129
1130 /* Defragmentation */
1131 {"defragment_compression_failures",
1132 (char*) &export_vars.innodb_defragment_compression_failures, SHOW_LONG},
1133 {"defragment_failures",
1134 (char*) &export_vars.innodb_defragment_failures, SHOW_LONG},
1135 {"defragment_count",
1136 (char*) &export_vars.innodb_defragment_count, SHOW_LONG},
1137
1138 {"instant_alter_column",
1139 (char*) &export_vars.innodb_instant_alter_column, SHOW_LONG},
1140
1141 /* Online alter table status variables */
1142 {"onlineddl_rowlog_rows",
1143 (char*) &export_vars.innodb_onlineddl_rowlog_rows, SHOW_LONG},
1144 {"onlineddl_rowlog_pct_used",
1145 (char*) &export_vars.innodb_onlineddl_rowlog_pct_used, SHOW_LONG},
1146 {"onlineddl_pct_progress",
1147 (char*) &export_vars.innodb_onlineddl_pct_progress, SHOW_LONG},
1148
1149 /* Times secondary index lookup triggered cluster lookup and
1150 times prefix optimization avoided triggering cluster lookup */
1151 {"secondary_index_triggered_cluster_reads",
1152 (char*) &export_vars.innodb_sec_rec_cluster_reads, SHOW_LONG},
1153 {"secondary_index_triggered_cluster_reads_avoided",
1154 (char*) &export_vars.innodb_sec_rec_cluster_reads_avoided, SHOW_LONG},
1155
1156 /* Encryption */
1157 {"encryption_rotation_pages_read_from_cache",
1158 (char*) &export_vars.innodb_encryption_rotation_pages_read_from_cache,
1159 SHOW_LONG},
1160 {"encryption_rotation_pages_read_from_disk",
1161 (char*) &export_vars.innodb_encryption_rotation_pages_read_from_disk,
1162 SHOW_LONG},
1163 {"encryption_rotation_pages_modified",
1164 (char*) &export_vars.innodb_encryption_rotation_pages_modified,
1165 SHOW_LONG},
1166 {"encryption_rotation_pages_flushed",
1167 (char*) &export_vars.innodb_encryption_rotation_pages_flushed,
1168 SHOW_LONG},
1169 {"encryption_rotation_estimated_iops",
1170 (char*) &export_vars.innodb_encryption_rotation_estimated_iops,
1171 SHOW_LONG},
1172 {"encryption_key_rotation_list_length",
1173 (char*)&export_vars.innodb_key_rotation_list_length,
1174 SHOW_LONGLONG},
1175 {"encryption_n_merge_blocks_encrypted",
1176 (char*)&export_vars.innodb_n_merge_blocks_encrypted,
1177 SHOW_LONGLONG},
1178 {"encryption_n_merge_blocks_decrypted",
1179 (char*)&export_vars.innodb_n_merge_blocks_decrypted,
1180 SHOW_LONGLONG},
1181 {"encryption_n_rowlog_blocks_encrypted",
1182 (char*)&export_vars.innodb_n_rowlog_blocks_encrypted,
1183 SHOW_LONGLONG},
1184 {"encryption_n_rowlog_blocks_decrypted",
1185 (char*)&export_vars.innodb_n_rowlog_blocks_decrypted,
1186 SHOW_LONGLONG},
1187 {"encryption_n_temp_blocks_encrypted",
1188 (char*)&export_vars.innodb_n_temp_blocks_encrypted,
1189 SHOW_LONGLONG},
1190 {"encryption_n_temp_blocks_decrypted",
1191 (char*)&export_vars.innodb_n_temp_blocks_decrypted,
1192 SHOW_LONGLONG},
1193
1194 /* scrubing */
1195 {"scrub_background_page_reorganizations",
1196 (char*) &export_vars.innodb_scrub_page_reorganizations,
1197 SHOW_LONG},
1198 {"scrub_background_page_splits",
1199 (char*) &export_vars.innodb_scrub_page_splits,
1200 SHOW_LONG},
1201 {"scrub_background_page_split_failures_underflow",
1202 (char*) &export_vars.innodb_scrub_page_split_failures_underflow,
1203 SHOW_LONG},
1204 {"scrub_background_page_split_failures_out_of_filespace",
1205 (char*) &export_vars.innodb_scrub_page_split_failures_out_of_filespace,
1206 SHOW_LONG},
1207 {"scrub_background_page_split_failures_missing_index",
1208 (char*) &export_vars.innodb_scrub_page_split_failures_missing_index,
1209 SHOW_LONG},
1210 {"scrub_background_page_split_failures_unknown",
1211 (char*) &export_vars.innodb_scrub_page_split_failures_unknown,
1212 SHOW_LONG},
1213 {"scrub_log",
1214 (char*) &export_vars.innodb_scrub_log,
1215 SHOW_LONGLONG},
1216 {"encryption_num_key_requests",
1217 (char*) &export_vars.innodb_encryption_key_requests, SHOW_LONGLONG},
1218
1219 {NullS, NullS, SHOW_LONG}
1220 };
1221
1222 /*****************************************************************//**
1223 Frees a possible InnoDB trx object associated with the current THD.
1224 @return 0 or error number */
1225 static
1226 int
1227 innobase_close_connection(
1228 /*======================*/
1229 handlerton* hton, /*!< in/out: InnoDB handlerton */
1230 THD* thd); /*!< in: MySQL thread handle for
1231 which to close the connection */
1232
1233 /** Cancel any pending lock request associated with the current THD.
1234 @sa THD::awake() @sa ha_kill_query() */
1235 static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels);
1236 static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
1237
1238 /*****************************************************************//**
1239 Commits a transaction in an InnoDB database or marks an SQL statement
1240 ended.
1241 @return 0 */
1242 static
1243 int
1244 innobase_commit(
1245 /*============*/
1246 handlerton* hton, /*!< in/out: InnoDB handlerton */
1247 THD* thd, /*!< in: MySQL thread handle of the
1248 user for whom the transaction should
1249 be committed */
1250 bool commit_trx); /*!< in: true - commit transaction
1251 false - the current SQL statement
1252 ended */
1253
1254 /*****************************************************************//**
1255 Rolls back a transaction to a savepoint.
1256 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
1257 given name */
1258 static
1259 int
1260 innobase_rollback(
1261 /*==============*/
1262 handlerton* hton, /*!< in/out: InnoDB handlerton */
1263 THD* thd, /*!< in: handle to the MySQL thread
1264 of the user whose transaction should
1265 be rolled back */
1266 bool rollback_trx); /*!< in: TRUE - rollback entire
1267 transaction FALSE - rollback the current
1268 statement only */
1269
1270 /*****************************************************************//**
1271 Rolls back a transaction to a savepoint.
1272 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
1273 given name */
1274 static
1275 int
1276 innobase_rollback_to_savepoint(
1277 /*===========================*/
1278 handlerton* hton, /*!< in/out: InnoDB handlerton */
1279 THD* thd, /*!< in: handle to the MySQL thread of
1280 the user whose XA transaction should
1281 be rolled back to savepoint */
1282 void* savepoint); /*!< in: savepoint data */
1283
1284 /*****************************************************************//**
1285 Check whether innodb state allows to safely release MDL locks after
1286 rollback to savepoint.
1287 @return true if it is safe, false if its not safe. */
1288 static
1289 bool
1290 innobase_rollback_to_savepoint_can_release_mdl(
1291 /*===========================================*/
1292 handlerton* hton, /*!< in/out: InnoDB handlerton */
1293 THD* thd); /*!< in: handle to the MySQL thread of
1294 the user whose XA transaction should
1295 be rolled back to savepoint */
1296
1297 /*****************************************************************//**
1298 Sets a transaction savepoint.
1299 @return always 0, that is, always succeeds */
1300 static
1301 int
1302 innobase_savepoint(
1303 /*===============*/
1304 handlerton* hton, /*!< in/out: InnoDB handlerton */
1305 THD* thd, /*!< in: handle to the MySQL thread of
1306 the user's XA transaction for which
1307 we need to take a savepoint */
1308 void* savepoint); /*!< in: savepoint data */
1309
1310 /*****************************************************************//**
1311 Release transaction savepoint name.
1312 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
1313 given name */
1314 static
1315 int
1316 innobase_release_savepoint(
1317 /*=======================*/
1318 handlerton* hton, /*!< in/out: handlerton for InnoDB */
1319 THD* thd, /*!< in: handle to the MySQL thread
1320 of the user whose transaction's
1321 savepoint should be released */
1322 void* savepoint); /*!< in: savepoint data */
1323
1324 static void innobase_checkpoint_request(handlerton *hton, void *cookie);
1325
1326 /** @brief Initialize the default value of innodb_commit_concurrency.
1327
1328 Once InnoDB is running, the innodb_commit_concurrency must not change
1329 from zero to nonzero. (Bug #42101)
1330
1331 The initial default value is 0, and without this extra initialization,
1332 SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter
1333 to 0, even if it was initially set to nonzero at the command line
1334 or configuration file. */
1335 static
1336 void
1337 innobase_commit_concurrency_init_default();
1338 /*=======================================*/
1339
1340 /** @brief Adjust some InnoDB startup parameters based on file contents
1341 or innodb_page_size. */
1342 static
1343 void
1344 innodb_params_adjust();
1345
1346 /*******************************************************************//**
1347 This function is used to prepare an X/Open XA distributed transaction.
1348 @return 0 or error number */
1349 static
1350 int
1351 innobase_xa_prepare(
1352 /*================*/
1353 handlerton* hton, /*!< in: InnoDB handlerton */
1354 THD* thd, /*!< in: handle to the MySQL thread of
1355 the user whose XA transaction should
1356 be prepared */
1357 bool all); /*!< in: true - prepare transaction
1358 false - the current SQL statement
1359 ended */
1360 /*******************************************************************//**
1361 This function is used to recover X/Open XA distributed transactions.
1362 @return number of prepared transactions stored in xid_list */
1363 static
1364 int
1365 innobase_xa_recover(
1366 /*================*/
1367 handlerton* hton, /*!< in: InnoDB handlerton */
1368 XID* xid_list, /*!< in/out: prepared transactions */
1369 uint len); /*!< in: number of slots in xid_list */
1370 /*******************************************************************//**
1371 This function is used to commit one X/Open XA distributed transaction
1372 which is in the prepared state
1373 @return 0 or error number */
1374 static
1375 int
1376 innobase_commit_by_xid(
1377 /*===================*/
1378 handlerton* hton, /*!< in: InnoDB handlerton */
1379 XID* xid); /*!< in: X/Open XA transaction
1380 identification */
1381 /** Remove all tables in the named database inside InnoDB.
1382 @param[in] hton handlerton from InnoDB
1383 @param[in] path Database path; Inside InnoDB the name of the last
1384 directory in the path is used as the database name.
1385 For example, in 'mysql/data/test' the database name is 'test'. */
1386 static
1387 void
1388 innobase_drop_database(
1389 handlerton* hton,
1390 char* path);
1391
1392 /** Shut down the InnoDB storage engine.
1393 @return 0 */
1394 static
1395 int
1396 innobase_end(handlerton*, ha_panic_function);
1397
1398 /*****************************************************************//**
1399 Creates an InnoDB transaction struct for the thd if it does not yet have one.
1400 Starts a new InnoDB transaction if a transaction is not yet started. And
1401 assigns a new snapshot for a consistent read if the transaction does not yet
1402 have one.
1403 @return 0 */
1404 static
1405 int
1406 innobase_start_trx_and_assign_read_view(
1407 /*====================================*/
1408 handlerton* hton, /* in: InnoDB handlerton */
1409 THD* thd); /* in: MySQL thread handle of the
1410 user for whom the transaction should
1411 be committed */
1412
1413 /** Flush InnoDB redo logs to the file system.
1414 @param[in] hton InnoDB handlerton
1415 @param[in] binlog_group_flush true if we got invoked by binlog
1416 group commit during flush stage, false in other cases.
1417 @return false */
1418 static
1419 bool
innobase_flush_logs(handlerton * hton,bool binlog_group_flush)1420 innobase_flush_logs(
1421 handlerton* hton,
1422 bool binlog_group_flush)
1423 {
1424 DBUG_ENTER("innobase_flush_logs");
1425 DBUG_ASSERT(hton == innodb_hton_ptr);
1426
1427 if (srv_read_only_mode) {
1428 DBUG_RETURN(false);
1429 }
1430
1431 /* If !binlog_group_flush, we got invoked by FLUSH LOGS or similar.
1432 Else, we got invoked by binlog group commit during flush stage. */
1433
1434 if (binlog_group_flush && srv_flush_log_at_trx_commit == 0) {
1435 /* innodb_flush_log_at_trx_commit=0
1436 (write and sync once per second).
1437 Do not flush the redo log during binlog group commit. */
1438 DBUG_RETURN(false);
1439 }
1440
1441 /* Flush the redo log buffer to the redo log file.
1442 Sync it to disc if we are in FLUSH LOGS, or if
1443 innodb_flush_log_at_trx_commit=1
1444 (write and sync at each commit). */
1445 log_buffer_flush_to_disk(!binlog_group_flush
1446 || srv_flush_log_at_trx_commit == 1);
1447
1448 DBUG_RETURN(false);
1449 }
1450
1451 /** Flush InnoDB redo logs to the file system.
1452 @param[in] hton InnoDB handlerton
1453 @param[in] binlog_group_flush true if we got invoked by binlog
1454 group commit during flush stage, false in other cases.
1455 @return false */
1456 static
1457 bool
innobase_flush_logs(handlerton * hton)1458 innobase_flush_logs(
1459 handlerton* hton)
1460 {
1461 return innobase_flush_logs(hton, true);
1462 }
1463
1464 /************************************************************************//**
1465 Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the
1466 InnoDB Monitor to the client.
1467 @return 0 on success */
1468 static
1469 int
1470 innodb_show_status(
1471 /*===============*/
1472 handlerton* hton, /*!< in: the innodb handlerton */
1473 THD* thd, /*!< in: the MySQL query thread of
1474 the caller */
1475 stat_print_fn* stat_print);
1476 /************************************************************************//**
1477 Return 0 on success and non-zero on failure. Note: the bool return type
1478 seems to be abused here, should be an int. */
1479 static
1480 bool
1481 innobase_show_status(
1482 /*=================*/
1483 handlerton* hton, /*!< in: the innodb handlerton */
1484 THD* thd, /*!< in: the MySQL query thread of
1485 the caller */
1486 stat_print_fn* stat_print,
1487 enum ha_stat_type stat_type);
1488
1489 /****************************************************************//**
1490 Parse and enable InnoDB monitor counters during server startup.
1491 User can enable monitor counters/groups by specifying
1492 "loose-innodb_monitor_enable = monitor_name1;monitor_name2..."
1493 in server configuration file or at the command line. */
1494 static
1495 void
1496 innodb_enable_monitor_at_startup(
1497 /*=============================*/
1498 char* str); /*!< in: monitor counter enable list */
1499
1500 #ifdef MYSQL_STORE_FTS_DOC_ID
1501 /** Store doc_id value into FTS_DOC_ID field
1502 @param[in,out] tbl table containing FULLTEXT index
1503 @param[in] doc_id FTS_DOC_ID value */
1504 static
1505 void
innobase_fts_store_docid(TABLE * tbl,ulonglong doc_id)1506 innobase_fts_store_docid(
1507 TABLE* tbl,
1508 ulonglong doc_id)
1509 {
1510 my_bitmap_map* old_map
1511 = dbug_tmp_use_all_columns(tbl, tbl->write_set);
1512
1513 tbl->fts_doc_id_field->store(static_cast<longlong>(doc_id), true);
1514
1515 dbug_tmp_restore_column_map(tbl->write_set, old_map);
1516 }
1517 #endif
1518
1519 /*************************************************************//**
1520 Check for a valid value of innobase_commit_concurrency.
1521 @return 0 for valid innodb_commit_concurrency */
1522 static
1523 int
innobase_commit_concurrency_validate(THD *,st_mysql_sys_var *,void * save,struct st_mysql_value * value)1524 innobase_commit_concurrency_validate(
1525 /*=================================*/
1526 THD*, st_mysql_sys_var*,
1527 void* save, /*!< out: immediate result
1528 for update function */
1529 struct st_mysql_value* value) /*!< in: incoming string */
1530 {
1531 long long intbuf;
1532 ulong commit_concurrency;
1533
1534 DBUG_ENTER("innobase_commit_concurrency_validate");
1535
1536 if (value->val_int(value, &intbuf)) {
1537 /* The value is NULL. That is invalid. */
1538 DBUG_RETURN(1);
1539 }
1540
1541 *reinterpret_cast<ulong*>(save) = commit_concurrency
1542 = static_cast<ulong>(intbuf);
1543
1544 /* Allow the value to be updated, as long as it remains zero
1545 or nonzero. */
1546 DBUG_RETURN(!(!commit_concurrency == !innobase_commit_concurrency));
1547 }
1548
1549 /*******************************************************************//**
1550 Function for constructing an InnoDB table handler instance. */
1551 static
1552 handler*
innobase_create_handler(handlerton * hton,TABLE_SHARE * table,MEM_ROOT * mem_root)1553 innobase_create_handler(
1554 /*====================*/
1555 handlerton* hton, /*!< in: InnoDB handlerton */
1556 TABLE_SHARE* table,
1557 MEM_ROOT* mem_root)
1558 {
1559 return(new (mem_root) ha_innobase(hton, table));
1560 }
1561
1562 /* General functions */
1563
1564 /** Check that a page_size is correct for InnoDB.
1565 If correct, set the associated page_size_shift which is the power of 2
1566 for this page size.
1567 @param[in] page_size Page Size to evaluate
1568 @return an associated page_size_shift if valid, 0 if invalid. */
1569 inline
1570 ulong
innodb_page_size_validate(ulong page_size)1571 innodb_page_size_validate(
1572 ulong page_size)
1573 {
1574 ulong n;
1575
1576 DBUG_ENTER("innodb_page_size_validate");
1577
1578 for (n = UNIV_PAGE_SIZE_SHIFT_MIN;
1579 n <= UNIV_PAGE_SIZE_SHIFT_MAX;
1580 n++) {
1581 if (page_size == static_cast<ulong>(1 << n)) {
1582 DBUG_RETURN(n);
1583 }
1584 }
1585
1586 DBUG_RETURN(0);
1587 }
1588
1589 /******************************************************************//**
1590 Returns true if the thread is the replication thread on the slave
1591 server. Used in srv_conc_enter_innodb() to determine if the thread
1592 should be allowed to enter InnoDB - the replication thread is treated
1593 differently than other threads. Also used in
1594 srv_conc_force_exit_innodb().
1595 @return true if thd is the replication thread */
1596 ibool
thd_is_replication_slave_thread(THD * thd)1597 thd_is_replication_slave_thread(
1598 /*============================*/
1599 THD* thd) /*!< in: thread handle */
1600 {
1601 return thd && ((ibool) thd_slave_thread(thd));
1602 }
1603
1604 /******************************************************************//**
1605 Returns true if transaction should be flagged as read-only.
1606 @return true if the thd is marked as read-only */
1607 bool
thd_trx_is_read_only(THD * thd)1608 thd_trx_is_read_only(
1609 /*=================*/
1610 THD* thd) /*!< in: thread handle */
1611 {
1612 return(thd != 0 && thd_tx_is_read_only(thd));
1613 }
1614
1615 static MYSQL_THDVAR_BOOL(background_thread,
1616 PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_NOSYSVAR,
1617 "Internal (not user visible) flag to mark "
1618 "background purge threads", NULL, NULL, 0);
1619
1620 /** Create a MYSQL_THD for a background thread and mark it as such.
1621 @param name thread info for SHOW PROCESSLIST
1622 @return new MYSQL_THD */
1623 MYSQL_THD
innobase_create_background_thd(const char * name)1624 innobase_create_background_thd(const char* name)
1625 /*============================*/
1626 {
1627 MYSQL_THD thd= create_thd();
1628 thd_proc_info(thd, name);
1629 THDVAR(thd, background_thread) = true;
1630 return thd;
1631 }
1632
1633
1634 /** Destroy a background purge thread THD.
1635 @param[in] thd MYSQL_THD to destroy */
1636 void
innobase_destroy_background_thd(MYSQL_THD thd)1637 innobase_destroy_background_thd(
1638 /*============================*/
1639 MYSQL_THD thd)
1640 {
1641 /* need to close the connection explicitly, the server won't do it
1642 if innodb is in the PLUGIN_IS_DYING state */
1643 innobase_close_connection(innodb_hton_ptr, thd);
1644 thd_set_ha_data(thd, innodb_hton_ptr, NULL);
1645 destroy_thd(thd);
1646 }
1647
1648 /** Close opened tables, free memory, delete items for a MYSQL_THD.
1649 @param[in] thd MYSQL_THD to reset */
1650 void
innobase_reset_background_thd(MYSQL_THD thd)1651 innobase_reset_background_thd(MYSQL_THD thd)
1652 {
1653 if (!thd) {
1654 thd = current_thd;
1655 }
1656
1657 ut_ad(thd);
1658 ut_ad(THDVAR(thd, background_thread));
1659
1660 /* background purge thread */
1661 const char *proc_info= thd_proc_info(thd, "reset");
1662 reset_thd(thd);
1663 thd_proc_info(thd, proc_info);
1664 }
1665
1666
1667 /******************************************************************//**
1668 Check if the transaction is an auto-commit transaction. TRUE also
1669 implies that it is a SELECT (read-only) transaction.
1670 @return true if the transaction is an auto commit read-only transaction. */
1671 ibool
thd_trx_is_auto_commit(THD * thd)1672 thd_trx_is_auto_commit(
1673 /*===================*/
1674 THD* thd) /*!< in: thread handle, can be NULL */
1675 {
1676 return(thd != NULL
1677 && !thd_test_options(
1678 thd,
1679 OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)
1680 && thd_is_select(thd));
1681 }
1682
1683 /** Enter InnoDB engine after checking the max number of user threads
1684 allowed, else the thread is put into sleep.
1685 @param[in,out] prebuilt row prebuilt handler */
innobase_srv_conc_enter_innodb(row_prebuilt_t * prebuilt)1686 static inline void innobase_srv_conc_enter_innodb(row_prebuilt_t *prebuilt)
1687 {
1688 trx_t* trx = prebuilt->trx;
1689
1690 #ifdef WITH_WSREP
1691 if (global_system_variables.wsrep_on &&
1692 (wsrep_thd_is_applying(trx->mysql_thd)
1693 || wsrep_thd_is_toi(trx->mysql_thd))) {
1694 return;
1695 }
1696 #endif /* WITH_WSREP */
1697
1698 if (srv_thread_concurrency) {
1699 if (trx->n_tickets_to_enter_innodb > 0) {
1700
1701 /* If trx has 'free tickets' to enter the engine left,
1702 then use one such ticket */
1703
1704 --trx->n_tickets_to_enter_innodb;
1705
1706 } else if (trx->mysql_thd != NULL
1707 && thd_is_replication_slave_thread(trx->mysql_thd)) {
1708 const ulonglong end = my_interval_timer()
1709 + ulonglong(srv_replication_delay) * 1000000;
1710 while ((srv_conc_get_active_threads()
1711 >= srv_thread_concurrency)
1712 && my_interval_timer() < end) {
1713 os_thread_sleep(2000 /* 2 ms */);
1714 }
1715 } else {
1716 srv_conc_enter_innodb(prebuilt);
1717 }
1718 }
1719 }
1720
1721 /** Note that the thread wants to leave InnoDB only if it doesn't have
1722 any spare tickets.
1723 @param[in,out] m_prebuilt row prebuilt handler */
innobase_srv_conc_exit_innodb(row_prebuilt_t * prebuilt)1724 static inline void innobase_srv_conc_exit_innodb(row_prebuilt_t *prebuilt)
1725 {
1726 ut_ad(!sync_check_iterate(sync_check()));
1727
1728 trx_t* trx = prebuilt->trx;
1729
1730 #ifdef WITH_WSREP
1731 if (global_system_variables.wsrep_on &&
1732 (wsrep_thd_is_applying(trx->mysql_thd)
1733 || wsrep_thd_is_toi(trx->mysql_thd))) {
1734 return;
1735 }
1736 #endif /* WITH_WSREP */
1737
1738 /* This is to avoid making an unnecessary function call. */
1739 if (trx->declared_to_be_inside_innodb
1740 && trx->n_tickets_to_enter_innodb == 0) {
1741
1742 srv_conc_force_exit_innodb(trx);
1743 }
1744 }
1745
1746 /******************************************************************//**
1747 Force a thread to leave InnoDB even if it has spare tickets. */
1748 static inline
1749 void
innobase_srv_conc_force_exit_innodb(trx_t * trx)1750 innobase_srv_conc_force_exit_innodb(
1751 /*================================*/
1752 trx_t* trx) /*!< in: transaction handle */
1753 {
1754 ut_ad(!sync_check_iterate(sync_check()));
1755
1756 /* This is to avoid making an unnecessary function call. */
1757 if (trx->declared_to_be_inside_innodb) {
1758 srv_conc_force_exit_innodb(trx);
1759 }
1760 }
1761
1762 /******************************************************************//**
1763 Returns the NUL terminated value of glob_hostname.
1764 @return pointer to glob_hostname. */
1765 const char*
server_get_hostname()1766 server_get_hostname()
1767 /*=================*/
1768 {
1769 return(glob_hostname);
1770 }
1771
1772 /******************************************************************//**
1773 Returns true if the transaction this thread is processing has edited
1774 non-transactional tables. Used by the deadlock detector when deciding
1775 which transaction to rollback in case of a deadlock - we try to avoid
1776 rolling back transactions that have edited non-transactional tables.
1777 @return true if non-transactional tables have been edited */
1778 ibool
thd_has_edited_nontrans_tables(THD * thd)1779 thd_has_edited_nontrans_tables(
1780 /*===========================*/
1781 THD* thd) /*!< in: thread handle */
1782 {
1783 return((ibool) thd_non_transactional_update(thd));
1784 }
1785
1786 /* Return high resolution timestamp for the start of the current query */
1787 UNIV_INTERN
1788 unsigned long long
thd_query_start_micro(const THD * thd)1789 thd_query_start_micro(
1790 const THD* thd) /*!< in: thread handle */
1791 {
1792 return thd_start_utime(thd);
1793 }
1794
1795 /******************************************************************//**
1796 Returns true if the thread is executing a SELECT statement.
1797 @return true if thd is executing SELECT */
1798 ibool
thd_is_select(const THD * thd)1799 thd_is_select(
1800 /*==========*/
1801 const THD* thd) /*!< in: thread handle */
1802 {
1803 return(thd_sql_command(thd) == SQLCOM_SELECT);
1804 }
1805
1806 /******************************************************************//**
1807 Returns the lock wait timeout for the current connection.
1808 @return the lock wait timeout, in seconds */
1809 ulong
thd_lock_wait_timeout(THD * thd)1810 thd_lock_wait_timeout(
1811 /*==================*/
1812 THD* thd) /*!< in: thread handle, or NULL to query
1813 the global innodb_lock_wait_timeout */
1814 {
1815 /* According to <mysql/plugin.h>, passing thd == NULL
1816 returns the global value of the session variable. */
1817 return(THDVAR(thd, lock_wait_timeout));
1818 }
1819
1820 /** Get the value of innodb_tmpdir.
1821 @param[in] thd thread handle, or NULL to query
1822 the global innodb_tmpdir.
1823 @retval NULL if innodb_tmpdir="" */
1824 const char*
thd_innodb_tmpdir(THD * thd)1825 thd_innodb_tmpdir(
1826 THD* thd)
1827 {
1828 ut_ad(!sync_check_iterate(sync_check()));
1829
1830 const char* tmp_dir = THDVAR(thd, tmpdir);
1831
1832 if (tmp_dir != NULL && *tmp_dir == '\0') {
1833 tmp_dir = NULL;
1834 }
1835
1836 return(tmp_dir);
1837 }
1838
1839 /** Obtain the InnoDB transaction of a MySQL thread.
1840 @param[in,out] thd thread handle
1841 @return reference to transaction pointer */
thd_to_trx(THD * thd)1842 static trx_t* thd_to_trx(THD* thd)
1843 {
1844 return reinterpret_cast<trx_t*>(thd_get_ha_data(thd, innodb_hton_ptr));
1845 }
1846
1847 #ifdef WITH_WSREP
1848 /********************************************************************//**
1849 Obtain the InnoDB transaction id of a MySQL thread.
1850 @return transaction id */
1851 __attribute__((warn_unused_result, nonnull))
1852 ulonglong
thd_to_trx_id(THD * thd)1853 thd_to_trx_id(
1854 THD* thd) /*!< in: MySQL thread */
1855 {
1856 return(thd_to_trx(thd)->id);
1857 }
1858
1859 static void wsrep_abort_transaction(handlerton*, THD *, THD *, my_bool);
1860 static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid);
1861 static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid);
1862 #endif /* WITH_WSREP */
1863 /********************************************************************//**
1864 Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
1865 time calls srv_active_wake_master_thread. This function should be used
1866 when a single database operation may introduce a small need for
1867 server utility activity, like checkpointing. */
1868 inline
1869 void
innobase_active_small(void)1870 innobase_active_small(void)
1871 /*=======================*/
1872 {
1873 innobase_active_counter++;
1874
1875 if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
1876 srv_active_wake_master_thread();
1877 }
1878 }
1879
1880 /********************************************************************//**
1881 Converts an InnoDB error code to a MySQL error code and also tells to MySQL
1882 about a possible transaction rollback inside InnoDB caused by a lock wait
1883 timeout or a deadlock.
1884 @return MySQL error code */
1885 static int
convert_error_code_to_mysql(dberr_t error,ulint flags,THD * thd)1886 convert_error_code_to_mysql(
1887 /*========================*/
1888 dberr_t error, /*!< in: InnoDB error code */
1889 ulint flags, /*!< in: InnoDB table flags, or 0 */
1890 THD* thd) /*!< in: user thread handle or NULL */
1891 {
1892 switch (error) {
1893 case DB_SUCCESS:
1894 return(0);
1895
1896 case DB_INTERRUPTED:
1897 return(HA_ERR_ABORTED_BY_USER);
1898
1899 case DB_FOREIGN_EXCEED_MAX_CASCADE:
1900 ut_ad(thd);
1901 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1902 HA_ERR_ROW_IS_REFERENCED,
1903 "InnoDB: Cannot delete/update "
1904 "rows with cascading foreign key "
1905 "constraints that exceed max "
1906 "depth of %d. Please "
1907 "drop extra constraints and try "
1908 "again", DICT_FK_MAX_RECURSIVE_LOAD);
1909 return(HA_ERR_FK_DEPTH_EXCEEDED);
1910
1911 case DB_CANT_CREATE_GEOMETRY_OBJECT:
1912 my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, MYF(0));
1913 return(HA_ERR_NULL_IN_SPATIAL);
1914
1915 case DB_ERROR:
1916 default:
1917 return(HA_ERR_GENERIC); /* unspecified error */
1918
1919 case DB_DUPLICATE_KEY:
1920 /* Be cautious with returning this error, since
1921 mysql could re-enter the storage layer to get
1922 duplicated key info, the operation requires a
1923 valid table handle and/or transaction information,
1924 which might not always be available in the error
1925 handling stage. */
1926 return(HA_ERR_FOUND_DUPP_KEY);
1927
1928 case DB_READ_ONLY:
1929 return(HA_ERR_TABLE_READONLY);
1930
1931 case DB_FOREIGN_DUPLICATE_KEY:
1932 return(HA_ERR_FOREIGN_DUPLICATE_KEY);
1933
1934 case DB_MISSING_HISTORY:
1935 return(HA_ERR_TABLE_DEF_CHANGED);
1936
1937 case DB_RECORD_NOT_FOUND:
1938 return(HA_ERR_NO_ACTIVE_RECORD);
1939
1940 case DB_DEADLOCK:
1941 /* Since we rolled back the whole transaction, we must
1942 tell it also to MySQL so that MySQL knows to empty the
1943 cached binlog for this transaction */
1944
1945 if (thd != NULL) {
1946 thd_mark_transaction_to_rollback(thd, 1);
1947 }
1948
1949 return(HA_ERR_LOCK_DEADLOCK);
1950
1951 case DB_LOCK_WAIT_TIMEOUT:
1952 /* Starting from 5.0.13, we let MySQL just roll back the
1953 latest SQL statement in a lock wait timeout. Previously, we
1954 rolled back the whole transaction. */
1955
1956 if (thd) {
1957 thd_mark_transaction_to_rollback(
1958 thd, (bool) row_rollback_on_timeout);
1959 }
1960
1961 return(HA_ERR_LOCK_WAIT_TIMEOUT);
1962
1963 case DB_NO_REFERENCED_ROW:
1964 return(HA_ERR_NO_REFERENCED_ROW);
1965
1966 case DB_ROW_IS_REFERENCED:
1967 return(HA_ERR_ROW_IS_REFERENCED);
1968
1969 case DB_NO_FK_ON_S_BASE_COL:
1970 case DB_CANNOT_ADD_CONSTRAINT:
1971 case DB_CHILD_NO_INDEX:
1972 case DB_PARENT_NO_INDEX:
1973 return(HA_ERR_CANNOT_ADD_FOREIGN);
1974
1975 case DB_CANNOT_DROP_CONSTRAINT:
1976
1977 return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
1978 misleading, a new MySQL error
1979 code should be introduced */
1980
1981 case DB_CORRUPTION:
1982 return(HA_ERR_CRASHED);
1983
1984 case DB_OUT_OF_FILE_SPACE:
1985 return(HA_ERR_RECORD_FILE_FULL);
1986
1987 case DB_TEMP_FILE_WRITE_FAIL:
1988 my_error(ER_GET_ERRMSG, MYF(0),
1989 DB_TEMP_FILE_WRITE_FAIL,
1990 ut_strerr(DB_TEMP_FILE_WRITE_FAIL),
1991 "InnoDB");
1992 return(HA_ERR_INTERNAL_ERROR);
1993
1994 case DB_TABLE_IN_FK_CHECK:
1995 return(HA_ERR_TABLE_IN_FK_CHECK);
1996
1997 case DB_TABLE_IS_BEING_USED:
1998 return(HA_ERR_WRONG_COMMAND);
1999
2000 case DB_TABLE_NOT_FOUND:
2001 return(HA_ERR_NO_SUCH_TABLE);
2002
2003 case DB_DECRYPTION_FAILED:
2004 return(HA_ERR_DECRYPTION_FAILED);
2005
2006 case DB_TABLESPACE_NOT_FOUND:
2007 return(HA_ERR_TABLESPACE_MISSING);
2008
2009 case DB_TOO_BIG_RECORD: {
2010 /* If prefix is true then a 768-byte prefix is stored
2011 locally for BLOB fields. Refer to dict_table_get_format().
2012 We limit max record size to 16k for 64k page size. */
2013 bool prefix = !DICT_TF_HAS_ATOMIC_BLOBS(flags);
2014 bool comp = !!(flags & DICT_TF_COMPACT);
2015 ulint free_space = page_get_free_space_of_empty(comp) / 2;
2016
2017 if (free_space >= ulint(comp ? COMPRESSED_REC_MAX_DATA_SIZE :
2018 REDUNDANT_REC_MAX_DATA_SIZE)) {
2019 free_space = (comp ? COMPRESSED_REC_MAX_DATA_SIZE :
2020 REDUNDANT_REC_MAX_DATA_SIZE) - 1;
2021 }
2022
2023 my_printf_error(ER_TOO_BIG_ROWSIZE,
2024 "Row size too large (> " ULINTPF "). Changing some columns "
2025 "to TEXT or BLOB %smay help. In current row "
2026 "format, BLOB prefix of %d bytes is stored inline.",
2027 MYF(0),
2028 free_space,
2029 prefix
2030 ? "or using ROW_FORMAT=DYNAMIC or"
2031 " ROW_FORMAT=COMPRESSED "
2032 : "",
2033 prefix
2034 ? DICT_MAX_FIXED_COL_LEN
2035 : 0);
2036 return(HA_ERR_TO_BIG_ROW);
2037 }
2038
2039 case DB_TOO_BIG_INDEX_COL:
2040 my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
2041 (ulong) DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags));
2042 return(HA_ERR_INDEX_COL_TOO_LONG);
2043
2044 case DB_NO_SAVEPOINT:
2045 return(HA_ERR_NO_SAVEPOINT);
2046
2047 case DB_LOCK_TABLE_FULL:
2048 /* Since we rolled back the whole transaction, we must
2049 tell it also to MySQL so that MySQL knows to empty the
2050 cached binlog for this transaction */
2051
2052 if (thd) {
2053 thd_mark_transaction_to_rollback(thd, 1);
2054 }
2055
2056 return(HA_ERR_LOCK_TABLE_FULL);
2057
2058 case DB_FTS_INVALID_DOCID:
2059 return(HA_FTS_INVALID_DOCID);
2060 case DB_FTS_EXCEED_RESULT_CACHE_LIMIT:
2061 return(HA_ERR_OUT_OF_MEM);
2062 case DB_TOO_MANY_CONCURRENT_TRXS:
2063 return(HA_ERR_TOO_MANY_CONCURRENT_TRXS);
2064 case DB_UNSUPPORTED:
2065 return(HA_ERR_UNSUPPORTED);
2066 case DB_INDEX_CORRUPT:
2067 return(HA_ERR_INDEX_CORRUPT);
2068 case DB_UNDO_RECORD_TOO_BIG:
2069 return(HA_ERR_UNDO_REC_TOO_BIG);
2070 case DB_OUT_OF_MEMORY:
2071 return(HA_ERR_OUT_OF_MEM);
2072 case DB_TABLESPACE_EXISTS:
2073 return(HA_ERR_TABLESPACE_EXISTS);
2074 case DB_TABLESPACE_DELETED:
2075 return(HA_ERR_TABLESPACE_MISSING);
2076 case DB_IDENTIFIER_TOO_LONG:
2077 return(HA_ERR_INTERNAL_ERROR);
2078 case DB_TABLE_CORRUPT:
2079 return(HA_ERR_TABLE_CORRUPT);
2080 case DB_FTS_TOO_MANY_WORDS_IN_PHRASE:
2081 return(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE);
2082 case DB_COMPUTE_VALUE_FAILED:
2083 return(HA_ERR_GENERIC); // impossible
2084 }
2085 }
2086
2087 /*************************************************************//**
2088 Prints info of a THD object (== user session thread) to the given file. */
2089 void
innobase_mysql_print_thd(FILE * f,THD * thd,uint max_query_len)2090 innobase_mysql_print_thd(
2091 /*=====================*/
2092 FILE* f, /*!< in: output stream */
2093 THD* thd, /*!< in: MySQL THD object */
2094 uint max_query_len) /*!< in: max query length to print, or 0 to
2095 use the default max length */
2096 {
2097 char buffer[1024];
2098
2099 fputs(thd_get_error_context_description(thd, buffer, sizeof buffer,
2100 max_query_len), f);
2101 putc('\n', f);
2102 }
2103
2104 /******************************************************************//**
2105 Get the variable length bounds of the given character set. */
2106 void
innobase_get_cset_width(ulint cset,ulint * mbminlen,ulint * mbmaxlen)2107 innobase_get_cset_width(
2108 /*====================*/
2109 ulint cset, /*!< in: MySQL charset-collation code */
2110 ulint* mbminlen, /*!< out: minimum length of a char (in bytes) */
2111 ulint* mbmaxlen) /*!< out: maximum length of a char (in bytes) */
2112 {
2113 CHARSET_INFO* cs;
2114 ut_ad(cset <= MAX_CHAR_COLL_NUM);
2115 ut_ad(mbminlen);
2116 ut_ad(mbmaxlen);
2117
2118 cs = all_charsets[cset];
2119 if (cs) {
2120 *mbminlen = cs->mbminlen;
2121 *mbmaxlen = cs->mbmaxlen;
2122 ut_ad(*mbminlen < DATA_MBMAX);
2123 ut_ad(*mbmaxlen < DATA_MBMAX);
2124 } else {
2125 THD* thd = current_thd;
2126
2127 if (thd && thd_sql_command(thd) == SQLCOM_DROP_TABLE) {
2128
2129 /* Fix bug#46256: allow tables to be dropped if the
2130 collation is not found, but issue a warning. */
2131 if (cset != 0) {
2132
2133 sql_print_warning(
2134 "Unknown collation #" ULINTPF ".",
2135 cset);
2136 }
2137 } else {
2138
2139 ut_a(cset == 0);
2140 }
2141
2142 *mbminlen = *mbmaxlen = 0;
2143 }
2144 }
2145
2146 /******************************************************************//**
2147 Converts an identifier to a table name. */
2148 void
innobase_convert_from_table_id(CHARSET_INFO * cs,char * to,const char * from,ulint len)2149 innobase_convert_from_table_id(
2150 /*===========================*/
2151 CHARSET_INFO* cs, /*!< in: the 'from' character set */
2152 char* to, /*!< out: converted identifier */
2153 const char* from, /*!< in: identifier to convert */
2154 ulint len) /*!< in: length of 'to', in bytes */
2155 {
2156 uint errors;
2157
2158 strconvert(cs, from, FN_REFLEN, &my_charset_filename, to, (uint) len, &errors);
2159 }
2160
2161 /**********************************************************************
2162 Check if the length of the identifier exceeds the maximum allowed.
2163 return true when length of identifier is too long. */
2164 my_bool
innobase_check_identifier_length(const char * id)2165 innobase_check_identifier_length(
2166 /*=============================*/
2167 const char* id) /* in: FK identifier to check excluding the
2168 database portion. */
2169 {
2170 int well_formed_error = 0;
2171 CHARSET_INFO *cs = system_charset_info;
2172 DBUG_ENTER("innobase_check_identifier_length");
2173
2174 size_t len = my_well_formed_length(
2175 cs, id, id + strlen(id),
2176 NAME_CHAR_LEN, &well_formed_error);
2177
2178 if (well_formed_error || len == NAME_CHAR_LEN) {
2179 my_error(ER_TOO_LONG_IDENT, MYF(0), id);
2180 DBUG_RETURN(true);
2181 }
2182 DBUG_RETURN(false);
2183 }
2184
2185 /******************************************************************//**
2186 Converts an identifier to UTF-8. */
2187 void
innobase_convert_from_id(CHARSET_INFO * cs,char * to,const char * from,ulint len)2188 innobase_convert_from_id(
2189 /*=====================*/
2190 CHARSET_INFO* cs, /*!< in: the 'from' character set */
2191 char* to, /*!< out: converted identifier */
2192 const char* from, /*!< in: identifier to convert */
2193 ulint len) /*!< in: length of 'to', in bytes */
2194 {
2195 uint errors;
2196
2197 strconvert(cs, from, FN_REFLEN, system_charset_info, to, (uint) len, &errors);
2198 }
2199
2200 /******************************************************************//**
2201 Compares NUL-terminated UTF-8 strings case insensitively.
2202 @return 0 if a=b, <0 if a<b, >1 if a>b */
2203 int
innobase_strcasecmp(const char * a,const char * b)2204 innobase_strcasecmp(
2205 /*================*/
2206 const char* a, /*!< in: first string to compare */
2207 const char* b) /*!< in: second string to compare */
2208 {
2209 if (!a) {
2210 if (!b) {
2211 return(0);
2212 } else {
2213 return(-1);
2214 }
2215 } else if (!b) {
2216 return(1);
2217 }
2218
2219 return(my_strcasecmp(system_charset_info, a, b));
2220 }
2221
2222 /******************************************************************//**
2223 Compares NUL-terminated UTF-8 strings case insensitively. The
2224 second string contains wildcards.
2225 @return 0 if a match is found, 1 if not */
2226 static
2227 int
innobase_wildcasecmp(const char * a,const char * b)2228 innobase_wildcasecmp(
2229 /*=================*/
2230 const char* a, /*!< in: string to compare */
2231 const char* b) /*!< in: wildcard string to compare */
2232 {
2233 return(wild_case_compare(system_charset_info, a, b));
2234 }
2235
2236 /** Strip dir name from a full path name and return only the file name
2237 @param[in] path_name full path name
2238 @return file name or "null" if no file name */
2239 const char*
innobase_basename(const char * path_name)2240 innobase_basename(
2241 const char* path_name)
2242 {
2243 const char* name = base_name(path_name);
2244
2245 return((name) ? name : "null");
2246 }
2247
2248 /******************************************************************//**
2249 Makes all characters in a NUL-terminated UTF-8 string lower case. */
2250 void
innobase_casedn_str(char * a)2251 innobase_casedn_str(
2252 /*================*/
2253 char* a) /*!< in/out: string to put in lower case */
2254 {
2255 my_casedn_str(system_charset_info, a);
2256 }
2257
2258 /** Determines the current SQL statement.
2259 Thread unsafe, can only be called from the thread owning the THD.
2260 @param[in] thd MySQL thread handle
2261 @param[out] length Length of the SQL statement
2262 @return SQL statement string */
2263 const char*
innobase_get_stmt_unsafe(THD * thd,size_t * length)2264 innobase_get_stmt_unsafe(
2265 THD* thd,
2266 size_t* length)
2267 {
2268 if (const LEX_STRING *stmt = thd_query_string(thd)) {
2269 *length = stmt->length;
2270 return stmt->str;
2271 }
2272
2273 *length = 0;
2274 return NULL;
2275 }
2276
2277 /**********************************************************************//**
2278 Get the current setting of the tdc_size global parameter. We do
2279 a dirty read because for one there is no synchronization object and
2280 secondly there is little harm in doing so even if we get a torn read.
2281 @return value of tdc_size */
2282 ulint
innobase_get_table_cache_size(void)2283 innobase_get_table_cache_size(void)
2284 /*===============================*/
2285 {
2286 return(tdc_size);
2287 }
2288
2289 /**********************************************************************//**
2290 Get the current setting of the lower_case_table_names global parameter from
2291 mysqld.cc. We do a dirty read because for one there is no synchronization
2292 object and secondly there is little harm in doing so even if we get a torn
2293 read.
2294 @return value of lower_case_table_names */
2295 ulint
innobase_get_lower_case_table_names(void)2296 innobase_get_lower_case_table_names(void)
2297 /*=====================================*/
2298 {
2299 return(lower_case_table_names);
2300 }
2301
2302 /**
2303 Test a file path whether it is same as mysql data directory path.
2304
2305 @param path null terminated character string
2306
2307 @return
2308 @retval TRUE The path is different from mysql data directory.
2309 @retval FALSE The path is same as mysql data directory.
2310 */
is_mysql_datadir_path(const char * path)2311 static bool is_mysql_datadir_path(const char *path)
2312 {
2313 if (path == NULL)
2314 return false;
2315
2316 char mysql_data_dir[FN_REFLEN], path_dir[FN_REFLEN];
2317 convert_dirname(path_dir, path, NullS);
2318 convert_dirname(mysql_data_dir, mysql_unpacked_real_data_home, NullS);
2319 size_t mysql_data_home_len= dirname_length(mysql_data_dir);
2320 size_t path_len = dirname_length(path_dir);
2321
2322 if (path_len < mysql_data_home_len)
2323 return true;
2324
2325 if (!lower_case_file_system)
2326 return(memcmp(mysql_data_dir, path_dir, mysql_data_home_len));
2327
2328 return(files_charset_info->coll->strnncoll(files_charset_info,
2329 (uchar *) path_dir, path_len,
2330 (uchar *) mysql_data_dir,
2331 mysql_data_home_len,
2332 TRUE));
2333 }
2334
mysql_tmpfile_path(const char * path,const char * prefix)2335 static int mysql_tmpfile_path(const char *path, const char *prefix)
2336 {
2337 DBUG_ASSERT(path != NULL);
2338 DBUG_ASSERT((strlen(path) + strlen(prefix)) <= FN_REFLEN);
2339
2340 char filename[FN_REFLEN];
2341 File fd = create_temp_file(filename, path, prefix, O_BINARY | O_SEQUENTIAL,
2342 MYF(MY_WME | MY_TEMPORARY));
2343 return fd;
2344 }
2345
2346 /** Creates a temporary file in the location specified by the parameter
2347 path. If the path is NULL, then it will be created in tmpdir.
2348 @param[in] path location for creating temporary file
2349 @return temporary file descriptor, or < 0 on error */
2350 os_file_t
innobase_mysql_tmpfile(const char * path)2351 innobase_mysql_tmpfile(
2352 const char* path)
2353 {
2354 #ifdef WITH_INNODB_DISALLOW_WRITES
2355 os_event_wait(srv_allow_writes_event);
2356 #endif /* WITH_INNODB_DISALLOW_WRITES */
2357 File fd;
2358
2359 DBUG_EXECUTE_IF(
2360 "innobase_tmpfile_creation_failure",
2361 return(OS_FILE_CLOSED);
2362 );
2363
2364 if (path == NULL) {
2365 fd = mysql_tmpfile("ib");
2366 } else {
2367 fd = mysql_tmpfile_path(path, "ib");
2368 }
2369
2370 if (fd < 0)
2371 return OS_FILE_CLOSED;
2372
2373 /* Copy the file descriptor, so that the additional resources
2374 allocated by create_temp_file() can be freed by invoking
2375 my_close().
2376
2377 Because the file descriptor returned by this function
2378 will be passed to fdopen(), it will be closed by invoking
2379 fclose(), which in turn will invoke close() instead of
2380 my_close(). */
2381
2382 #ifdef _WIN32
2383 /* Note that on Windows, the integer returned by mysql_tmpfile
2384 has no relation to C runtime file descriptor. Here, we need
2385 to call my_get_osfhandle to get the HANDLE and then convert it
2386 to C runtime filedescriptor. */
2387
2388 HANDLE hFile = my_get_osfhandle(fd);
2389 HANDLE hDup;
2390 BOOL bOK = DuplicateHandle(
2391 GetCurrentProcess(),
2392 hFile, GetCurrentProcess(),
2393 &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS);
2394 my_close(fd, MYF(MY_WME));
2395
2396 if (!bOK) {
2397 my_osmaperr(GetLastError());
2398 goto error;
2399 }
2400 return hDup;
2401 #else
2402 #ifdef F_DUPFD_CLOEXEC
2403 int fd2 = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2404 #else
2405 int fd2 = dup(fd);
2406 #endif
2407 my_close(fd, MYF(MY_WME));
2408 if (fd2 < 0) {
2409 set_my_errno(errno);
2410 goto error;
2411 }
2412 return fd2;
2413 #endif
2414
2415 error:
2416 char errbuf[MYSYS_STRERROR_SIZE];
2417
2418 my_error(EE_OUT_OF_FILERESOURCES,
2419 MYF(0),
2420 "ib*", errno,
2421 my_strerror(errbuf, sizeof(errbuf), errno));
2422 return (OS_FILE_CLOSED);
2423 }
2424
2425 /*********************************************************************//**
2426 Wrapper around MySQL's copy_and_convert function.
2427 @return number of bytes copied to 'to' */
2428 static
2429 ulint
innobase_convert_string(void * to,ulint to_length,CHARSET_INFO * to_cs,const void * from,ulint from_length,CHARSET_INFO * from_cs,uint * errors)2430 innobase_convert_string(
2431 /*====================*/
2432 void* to, /*!< out: converted string */
2433 ulint to_length, /*!< in: number of bytes reserved
2434 for the converted string */
2435 CHARSET_INFO* to_cs, /*!< in: character set to convert to */
2436 const void* from, /*!< in: string to convert */
2437 ulint from_length, /*!< in: number of bytes to convert */
2438 CHARSET_INFO* from_cs, /*!< in: character set to convert
2439 from */
2440 uint* errors) /*!< out: number of errors encountered
2441 during the conversion */
2442 {
2443 return(copy_and_convert(
2444 (char*) to, (uint32) to_length, to_cs,
2445 (const char*) from, (uint32) from_length, from_cs,
2446 errors));
2447 }
2448
2449 /*******************************************************************//**
2450 Formats the raw data in "data" (in InnoDB on-disk format) that is of
2451 type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
2452 the result to "buf". The result is converted to "system_charset_info".
2453 Not more than "buf_size" bytes are written to "buf".
2454 The result is always NUL-terminated (provided buf_size > 0) and the
2455 number of bytes that were written to "buf" is returned (including the
2456 terminating NUL).
2457 @return number of bytes that were written */
2458 ulint
innobase_raw_format(const char * data,ulint data_len,ulint charset_coll,char * buf,ulint buf_size)2459 innobase_raw_format(
2460 /*================*/
2461 const char* data, /*!< in: raw data */
2462 ulint data_len, /*!< in: raw data length
2463 in bytes */
2464 ulint charset_coll, /*!< in: charset collation */
2465 char* buf, /*!< out: output buffer */
2466 ulint buf_size) /*!< in: output buffer size
2467 in bytes */
2468 {
2469 /* XXX we use a hard limit instead of allocating
2470 but_size bytes from the heap */
2471 CHARSET_INFO* data_cs;
2472 char buf_tmp[8192];
2473 ulint buf_tmp_used;
2474 uint num_errors;
2475
2476 data_cs = all_charsets[charset_coll];
2477
2478 buf_tmp_used = innobase_convert_string(buf_tmp, sizeof(buf_tmp),
2479 system_charset_info,
2480 data, data_len, data_cs,
2481 &num_errors);
2482
2483 return(ut_str_sql_format(buf_tmp, buf_tmp_used, buf, buf_size));
2484 }
2485
2486 /*
2487 The helper function nlz(x) calculates the number of leading zeros
2488 in the binary representation of the number "x", either using a
2489 built-in compiler function or a substitute trick based on the use
2490 of the multiplication operation and a table indexed by the prefix
2491 of the multiplication result:
2492 */
2493 #ifdef __GNUC__
2494 #define nlz(x) __builtin_clzll(x)
2495 #elif defined(_MSC_VER) && !defined(_M_CEE_PURE) && \
2496 (defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64))
2497 #ifndef __INTRIN_H_
2498 #pragma warning(push, 4)
2499 #pragma warning(disable: 4255 4668)
2500 #include <intrin.h>
2501 #pragma warning(pop)
2502 #endif
nlz(ulonglong x)2503 __forceinline unsigned int nlz (ulonglong x)
2504 {
2505 #if defined(_M_IX86) || defined(_M_X64)
2506 unsigned long n;
2507 #ifdef _M_X64
2508 _BitScanReverse64(&n, x);
2509 return (unsigned int) n ^ 63;
2510 #else
2511 unsigned long y = (unsigned long) (x >> 32);
2512 unsigned int m = 31;
2513 if (y == 0)
2514 {
2515 y = (unsigned long) x;
2516 m = 63;
2517 }
2518 _BitScanReverse(&n, y);
2519 return (unsigned int) n ^ m;
2520 #endif
2521 #elif defined(_M_ARM64)
2522 return _CountLeadingZeros(x);
2523 #endif
2524 }
2525 #else
nlz(ulonglong x)2526 inline unsigned int nlz (ulonglong x)
2527 {
2528 static unsigned char table [48] = {
2529 32, 6, 5, 0, 4, 12, 0, 20,
2530 15, 3, 11, 0, 0, 18, 25, 31,
2531 8, 14, 2, 0, 10, 0, 0, 0,
2532 0, 0, 0, 21, 0, 0, 19, 26,
2533 7, 0, 13, 0, 16, 1, 22, 27,
2534 9, 0, 17, 23, 28, 24, 29, 30
2535 };
2536 unsigned int y= (unsigned int) (x >> 32);
2537 unsigned int n= 0;
2538 if (y == 0) {
2539 y= (unsigned int) x;
2540 n= 32;
2541 }
2542 y = y | (y >> 1); // Propagate leftmost 1-bit to the right.
2543 y = y | (y >> 2);
2544 y = y | (y >> 4);
2545 y = y | (y >> 8);
2546 y = y & ~(y >> 16);
2547 y = y * 0x3EF5D037;
2548 return n + table[y >> 26];
2549 }
2550 #endif
2551
2552 /*********************************************************************//**
2553 Compute the next autoinc value.
2554
2555 For MySQL replication the autoincrement values can be partitioned among
2556 the nodes. The offset is the start or origin of the autoincrement value
2557 for a particular node. For n nodes the increment will be n and the offset
2558 will be in the interval [1, n]. The formula tries to allocate the next
2559 value for a particular node.
2560
2561 Note: This function is also called with increment set to the number of
2562 values we want to reserve for multi-value inserts e.g.,
2563
2564 INSERT INTO T VALUES(), (), ();
2565
2566 innobase_next_autoinc() will be called with increment set to 3 where
2567 autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
2568 the multi-value INSERT above.
2569 @return the next value */
2570 ulonglong
innobase_next_autoinc(ulonglong current,ulonglong need,ulonglong step,ulonglong offset,ulonglong max_value)2571 innobase_next_autoinc(
2572 /*==================*/
2573 ulonglong current, /*!< in: Current value */
2574 ulonglong need, /*!< in: count of values needed */
2575 ulonglong step, /*!< in: AUTOINC increment step */
2576 ulonglong offset, /*!< in: AUTOINC offset */
2577 ulonglong max_value) /*!< in: max value for type */
2578 {
2579 ulonglong next_value;
2580 ulonglong block;
2581
2582 /* Should never be 0. */
2583 ut_a(need > 0);
2584 ut_a(step > 0);
2585 ut_a(max_value > 0);
2586
2587 /*
2588 We need to calculate the "block" value equal to the product
2589 "step * need". However, when calculating this product, an integer
2590 overflow can occur, so we cannot simply use the usual multiplication
2591 operation. The snippet below calculates the product of two numbers
2592 and detects an unsigned integer overflow:
2593 */
2594 unsigned int m= nlz(need);
2595 unsigned int n= nlz(step);
2596 if (m + n <= 8 * sizeof(ulonglong) - 2) {
2597 // The bit width of the original values is too large,
2598 // therefore we are guaranteed to get an overflow.
2599 goto overflow;
2600 }
2601 block = need * (step >> 1);
2602 if ((longlong) block < 0) {
2603 goto overflow;
2604 }
2605 block += block;
2606 if (step & 1) {
2607 block += need;
2608 if (block < need) {
2609 goto overflow;
2610 }
2611 }
2612
2613 /* Check for overflow. Current can be > max_value if the value
2614 is in reality a negative value. Also, the visual studio compiler
2615 converts large double values (which hypothetically can then be
2616 passed here as the values of the "current" parameter) automatically
2617 into unsigned long long datatype maximum value: */
2618 if (current > max_value) {
2619 goto overflow;
2620 }
2621
2622 /* According to MySQL documentation, if the offset is greater than
2623 the step then the offset is ignored. */
2624 if (offset > step) {
2625 offset = 0;
2626 }
2627
2628 /*
2629 Let's round the current value to within a step-size block:
2630 */
2631 if (current > offset) {
2632 next_value = current - offset;
2633 } else {
2634 next_value = offset - current;
2635 }
2636 next_value -= next_value % step;
2637
2638 /*
2639 Add an offset to the next value and check that the addition
2640 does not cause an integer overflow:
2641 */
2642 next_value += offset;
2643 if (next_value < offset) {
2644 goto overflow;
2645 }
2646
2647 /*
2648 Add a block to the next value and check that the addition
2649 does not cause an integer overflow:
2650 */
2651 next_value += block;
2652 if (next_value < block) {
2653 goto overflow;
2654 }
2655
2656 return(next_value);
2657
2658 overflow:
2659 /*
2660 Allow auto_increment to go over max_value up to max ulonglong.
2661 This allows us to detect that all values are exhausted.
2662 If we don't do this, we will return max_value several times
2663 and get duplicate key errors instead of auto increment value
2664 out of range:
2665 */
2666 return(~(ulonglong) 0);
2667 }
2668
2669 /********************************************************************//**
2670 Reset the autoinc value in the table.
2671 @return DB_SUCCESS if all went well else error code */
2672 UNIV_INTERN
2673 dberr_t
innobase_reset_autoinc(ulonglong autoinc)2674 ha_innobase::innobase_reset_autoinc(
2675 /*================================*/
2676 ulonglong autoinc) /*!< in: value to store */
2677 {
2678 dberr_t error;
2679
2680 error = innobase_lock_autoinc();
2681
2682 if (error == DB_SUCCESS) {
2683
2684 dict_table_autoinc_initialize(m_prebuilt->table, autoinc);
2685 m_prebuilt->table->autoinc_mutex.unlock();
2686 }
2687
2688 return(error);
2689 }
2690
2691 /*******************************************************************//**
2692 Reset the auto-increment counter to the given value, i.e. the next row
2693 inserted will get the given value. This is called e.g. after TRUNCATE
2694 is emulated by doing a 'DELETE FROM t'. HA_ERR_WRONG_COMMAND is
2695 returned by storage engines that don't support this operation.
2696 @return 0 or error code */
2697 UNIV_INTERN
2698 int
reset_auto_increment(ulonglong value)2699 ha_innobase::reset_auto_increment(
2700 /*==============================*/
2701 ulonglong value) /*!< in: new value for table autoinc */
2702 {
2703 DBUG_ENTER("ha_innobase::reset_auto_increment");
2704
2705 dberr_t error;
2706
2707 update_thd(ha_thd());
2708
2709 error = row_lock_table_autoinc_for_mysql(m_prebuilt);
2710
2711 if (error != DB_SUCCESS) {
2712 DBUG_RETURN(convert_error_code_to_mysql(
2713 error, m_prebuilt->table->flags, m_user_thd));
2714 }
2715
2716 /* The next value can never be 0. */
2717 if (value == 0) {
2718 value = 1;
2719 }
2720
2721 innobase_reset_autoinc(value);
2722
2723 DBUG_RETURN(0);
2724 }
2725
2726 /*********************************************************************//**
2727 Initializes some fields in an InnoDB transaction object. */
2728 static
2729 void
innobase_trx_init(THD * thd,trx_t * trx)2730 innobase_trx_init(
2731 /*==============*/
2732 THD* thd, /*!< in: user thread handle */
2733 trx_t* trx) /*!< in/out: InnoDB transaction handle */
2734 {
2735 DBUG_ENTER("innobase_trx_init");
2736 DBUG_ASSERT(thd == trx->mysql_thd);
2737
2738 /* Ensure that thd_lock_wait_timeout(), which may be called
2739 while holding lock_sys.mutex, by lock_rec_enqueue_waiting(),
2740 will not end up acquiring LOCK_global_system_variables in
2741 intern_sys_var_ptr(). */
2742 THDVAR(thd, lock_wait_timeout);
2743
2744 trx->check_foreigns = !thd_test_options(
2745 thd, OPTION_NO_FOREIGN_KEY_CHECKS);
2746
2747 trx->check_unique_secondary = !thd_test_options(
2748 thd, OPTION_RELAXED_UNIQUE_CHECKS);
2749 #ifdef WITH_WSREP
2750 trx->wsrep = wsrep_on(thd);
2751 #endif
2752
2753 DBUG_VOID_RETURN;
2754 }
2755
2756 /*********************************************************************//**
2757 Allocates an InnoDB transaction for a MySQL handler object for DML.
2758 @return InnoDB transaction handle */
2759 trx_t*
innobase_trx_allocate(THD * thd)2760 innobase_trx_allocate(
2761 /*==================*/
2762 THD* thd) /*!< in: user thread handle */
2763 {
2764 trx_t* trx;
2765
2766 DBUG_ENTER("innobase_trx_allocate");
2767 DBUG_ASSERT(thd != NULL);
2768 DBUG_ASSERT(EQ_CURRENT_THD(thd));
2769
2770 trx = trx_create();
2771
2772 trx->mysql_thd = thd;
2773
2774 innobase_trx_init(thd, trx);
2775
2776 DBUG_RETURN(trx);
2777 }
2778
2779 /*********************************************************************//**
2780 Gets the InnoDB transaction handle for a MySQL handler object, creates
2781 an InnoDB transaction struct if the corresponding MySQL thread struct still
2782 lacks one.
2783 @return InnoDB transaction handle */
2784 static inline
2785 trx_t*
check_trx_exists(THD * thd)2786 check_trx_exists(
2787 /*=============*/
2788 THD* thd) /*!< in: user thread handle */
2789 {
2790 if (trx_t* trx = thd_to_trx(thd)) {
2791 ut_a(trx->magic_n == TRX_MAGIC_N);
2792 innobase_trx_init(thd, trx);
2793 return trx;
2794 } else {
2795 trx = innobase_trx_allocate(thd);
2796 thd_set_ha_data(thd, innodb_hton_ptr, trx);
2797 return trx;
2798 }
2799 }
2800
2801 /**
2802 Gets current trx.
2803
2804 This function may be called during InnoDB initialisation, when
2805 innodb_hton_ptr->slot is not yet set to meaningful value.
2806 */
2807
current_trx()2808 trx_t *current_trx()
2809 {
2810 THD *thd=current_thd;
2811 if (likely(thd != 0) && innodb_hton_ptr->slot != HA_SLOT_UNDEF) {
2812 return thd_to_trx(thd);
2813 } else {
2814 return(NULL);
2815 }
2816 }
2817
2818 /*********************************************************************//**
2819 Note that a transaction has been registered with MySQL.
2820 @return true if transaction is registered with MySQL 2PC coordinator */
2821 static inline
2822 bool
trx_is_registered_for_2pc(const trx_t * trx)2823 trx_is_registered_for_2pc(
2824 /*======================*/
2825 const trx_t* trx) /* in: transaction */
2826 {
2827 return(trx->is_registered == 1);
2828 }
2829
2830 /*********************************************************************//**
2831 Note that a transaction has been registered with MySQL 2PC coordinator. */
2832 static inline
2833 void
trx_register_for_2pc(trx_t * trx)2834 trx_register_for_2pc(
2835 /*==================*/
2836 trx_t* trx) /* in: transaction */
2837 {
2838 trx->is_registered = 1;
2839 ut_ad(!trx->active_commit_ordered);
2840 }
2841
2842 /*********************************************************************//**
2843 Note that a transaction has been deregistered. */
2844 static inline
2845 void
trx_deregister_from_2pc(trx_t * trx)2846 trx_deregister_from_2pc(
2847 /*====================*/
2848 trx_t* trx) /* in: transaction */
2849 {
2850 trx->is_registered= false;
2851 trx->active_commit_ordered= false;
2852 }
2853
2854 /*********************************************************************//**
2855 Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object.
2856 Those flags are stored in .frm file and end up in the MySQL table object,
2857 but are frequently used inside InnoDB so we keep their copies into the
2858 InnoDB table object. */
2859 static
2860 void
innobase_copy_frm_flags_from_create_info(dict_table_t * innodb_table,const HA_CREATE_INFO * create_info)2861 innobase_copy_frm_flags_from_create_info(
2862 /*=====================================*/
2863 dict_table_t* innodb_table, /*!< in/out: InnoDB table */
2864 const HA_CREATE_INFO* create_info) /*!< in: create info */
2865 {
2866 ibool ps_on;
2867 ibool ps_off;
2868
2869 if (innodb_table->is_temporary()
2870 || innodb_table->no_rollback()) {
2871 /* Temp tables do not use persistent stats. */
2872 ps_on = FALSE;
2873 ps_off = TRUE;
2874 } else {
2875 ps_on = create_info->table_options
2876 & HA_OPTION_STATS_PERSISTENT;
2877 ps_off = create_info->table_options
2878 & HA_OPTION_NO_STATS_PERSISTENT;
2879 }
2880
2881 dict_stats_set_persistent(innodb_table, ps_on, ps_off);
2882
2883 dict_stats_auto_recalc_set(
2884 innodb_table,
2885 create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON,
2886 create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF);
2887
2888 innodb_table->stats_sample_pages = create_info->stats_sample_pages;
2889 }
2890
2891 /*********************************************************************//**
2892 Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
2893 Those flags are stored in .frm file and end up in the MySQL table object,
2894 but are frequently used inside InnoDB so we keep their copies into the
2895 InnoDB table object. */
2896 void
innobase_copy_frm_flags_from_table_share(dict_table_t * innodb_table,const TABLE_SHARE * table_share)2897 innobase_copy_frm_flags_from_table_share(
2898 /*=====================================*/
2899 dict_table_t* innodb_table, /*!< in/out: InnoDB table */
2900 const TABLE_SHARE* table_share) /*!< in: table share */
2901 {
2902 ibool ps_on;
2903 ibool ps_off;
2904
2905 if (innodb_table->is_temporary()) {
2906 /* Temp tables do not use persistent stats */
2907 ps_on = FALSE;
2908 ps_off = TRUE;
2909 } else {
2910 ps_on = table_share->db_create_options
2911 & HA_OPTION_STATS_PERSISTENT;
2912 ps_off = table_share->db_create_options
2913 & HA_OPTION_NO_STATS_PERSISTENT;
2914 }
2915
2916 dict_stats_set_persistent(innodb_table, ps_on, ps_off);
2917
2918 dict_stats_auto_recalc_set(
2919 innodb_table,
2920 table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON,
2921 table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF);
2922
2923 innodb_table->stats_sample_pages = table_share->stats_sample_pages;
2924 }
2925
2926 /*********************************************************************//**
2927 Construct ha_innobase handler. */
2928
ha_innobase(handlerton * hton,TABLE_SHARE * table_arg)2929 ha_innobase::ha_innobase(
2930 /*=====================*/
2931 handlerton* hton,
2932 TABLE_SHARE* table_arg)
2933 :handler(hton, table_arg),
2934 m_prebuilt(),
2935 m_user_thd(),
2936 m_int_table_flags(HA_REC_NOT_IN_SEQ
2937 | HA_NULL_IN_KEY
2938 | HA_CAN_VIRTUAL_COLUMNS
2939 | HA_CAN_INDEX_BLOBS
2940 | HA_CAN_SQL_HANDLER
2941 | HA_REQUIRES_KEY_COLUMNS_FOR_DELETE
2942 | HA_PRIMARY_KEY_REQUIRED_FOR_POSITION
2943 | HA_PRIMARY_KEY_IN_READ_INDEX
2944 | HA_BINLOG_ROW_CAPABLE
2945 | HA_CAN_GEOMETRY
2946 | HA_PARTIAL_COLUMN_READ
2947 | HA_TABLE_SCAN_ON_INDEX
2948 | HA_CAN_FULLTEXT
2949 | HA_CAN_FULLTEXT_EXT
2950 /* JAN: TODO: MySQL 5.7
2951 | HA_CAN_FULLTEXT_HINTS
2952 */
2953 | HA_CAN_EXPORT
2954 | HA_CAN_RTREEKEYS
2955 | HA_CAN_TABLES_WITHOUT_ROLLBACK
2956 | HA_CAN_ONLINE_BACKUPS
2957 | HA_CONCURRENT_OPTIMIZE
2958 | (srv_force_primary_key ? HA_REQUIRE_PRIMARY_KEY : 0)
2959 ),
2960 m_start_of_scan(),
2961 m_mysql_has_locked()
2962 {}
2963
2964 /*********************************************************************//**
2965 Destruct ha_innobase handler. */
2966
~ha_innobase()2967 ha_innobase::~ha_innobase()
2968 /*======================*/
2969 {
2970 }
2971
2972 /*********************************************************************//**
2973 Updates the user_thd field in a handle and also allocates a new InnoDB
2974 transaction handle if needed, and updates the transaction fields in the
2975 m_prebuilt struct. */
2976 void
update_thd(THD * thd)2977 ha_innobase::update_thd(
2978 /*====================*/
2979 THD* thd) /*!< in: thd to use the handle */
2980 {
2981 DBUG_ENTER("ha_innobase::update_thd");
2982 DBUG_PRINT("ha_innobase::update_thd", ("user_thd: %p -> %p",
2983 m_user_thd, thd));
2984
2985 /* The table should have been opened in ha_innobase::open(). */
2986 DBUG_ASSERT(m_prebuilt->table->get_ref_count() > 0);
2987
2988 trx_t* trx = check_trx_exists(thd);
2989
2990 ut_ad(trx->dict_operation_lock_mode == 0);
2991 ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
2992
2993 if (m_prebuilt->trx != trx) {
2994
2995 row_update_prebuilt_trx(m_prebuilt, trx);
2996 }
2997
2998 m_user_thd = thd;
2999
3000 DBUG_ASSERT(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
3001 DBUG_ASSERT(m_prebuilt->trx == thd_to_trx(m_user_thd));
3002
3003 DBUG_VOID_RETURN;
3004 }
3005
3006 /*********************************************************************//**
3007 Updates the user_thd field in a handle and also allocates a new InnoDB
3008 transaction handle if needed, and updates the transaction fields in the
3009 m_prebuilt struct. */
3010
3011 void
update_thd()3012 ha_innobase::update_thd()
3013 /*=====================*/
3014 {
3015 THD* thd = ha_thd();
3016
3017 ut_ad(EQ_CURRENT_THD(thd));
3018 update_thd(thd);
3019 }
3020
3021 /*********************************************************************//**
3022 Registers an InnoDB transaction with the MySQL 2PC coordinator, so that
3023 the MySQL XA code knows to call the InnoDB prepare and commit, or rollback
3024 for the transaction. This MUST be called for every transaction for which
3025 the user may call commit or rollback. Calling this several times to register
3026 the same transaction is allowed, too. This function also registers the
3027 current SQL statement. */
3028 static inline
3029 void
innobase_register_trx(handlerton * hton,THD * thd,trx_t * trx)3030 innobase_register_trx(
3031 /*==================*/
3032 handlerton* hton, /* in: Innobase handlerton */
3033 THD* thd, /* in: MySQL thd (connection) object */
3034 trx_t* trx) /* in: transaction to register */
3035 {
3036 /* JAN: TODO: MySQL 5.7 PSI
3037 const ulonglong trx_id = static_cast<const ulonglong>(
3038 trx_get_id_for_print(trx));
3039
3040 trans_register_ha(thd, FALSE, hton, &trx_id);
3041 */
3042 trans_register_ha(thd, FALSE, hton);
3043
3044 if (!trx_is_registered_for_2pc(trx)
3045 && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
3046
3047 //trans_register_ha(thd, TRUE, hton, &trx_id);
3048 trans_register_ha(thd, TRUE, hton);
3049 }
3050
3051 trx_register_for_2pc(trx);
3052 }
3053
3054 /* BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
3055 ------------------------------------------------------------
3056
3057 1) The use of the query cache for TBL is disabled when there is an
3058 uncommitted change to TBL.
3059
3060 2) When a change to TBL commits, InnoDB stores the current value of
3061 its global trx id counter, let us denote it by INV_TRX_ID, to the table object
3062 in the InnoDB data dictionary, and does only allow such transactions whose
3063 id <= INV_TRX_ID to use the query cache.
3064
3065 3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
3066 modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
3067 of TBL immediately.
3068
3069 How this is implemented inside InnoDB:
3070
3071 1) Since every modification always sets an IX type table lock on the InnoDB
3072 table, it is easy to check if there can be uncommitted modifications for a
3073 table: just check if there are locks in the lock list of the table.
3074
3075 2) When a transaction inside InnoDB commits, it reads the global trx id
3076 counter and stores the value INV_TRX_ID to the tables on which it had a lock.
3077
3078 3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
3079 InnoDB calls an invalidate method for the MySQL query cache for that table.
3080
3081 How this is implemented inside sql_cache.cc:
3082
3083 1) The query cache for an InnoDB table TBL is invalidated immediately at an
3084 INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
3085 invalidation to the transaction commit.
3086
3087 2) To store or retrieve a value from the query cache of an InnoDB table TBL,
3088 any query must first ask InnoDB's permission. We must pass the thd as a
3089 parameter because InnoDB will look at the trx id, if any, associated with
3090 that thd. Also the full_name which is used as key to search for the table
3091 object. The full_name is a string containing the normalized path to the
3092 table in the canonical format.
3093
3094 3) Use of the query cache for InnoDB tables is now allowed also when
3095 AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
3096 put restrictions on the use of the query cache.
3097 */
3098
3099 /** Check if mysql can allow the transaction to read from/store to
3100 the query cache.
3101 @param[in] table table object
3102 @param[in] trx transaction object
3103 @return whether the storing or retrieving from the query cache is permitted */
innobase_query_caching_table_check_low(const dict_table_t * table,trx_t * trx)3104 static bool innobase_query_caching_table_check_low(
3105 const dict_table_t* table,
3106 trx_t* trx)
3107 {
3108 /* The following conditions will decide the query cache
3109 retrieval or storing into:
3110
3111 (1) There should not be any locks on the table.
3112 (2) Someother trx shouldn't invalidate the cache before this
3113 transaction started.
3114 (3) Read view shouldn't exist. If exists then the view
3115 low_limit_id should be greater than or equal to the transaction that
3116 invalidates the cache for the particular table.
3117
3118 For read-only transaction: should satisfy (1) and (3)
3119 For read-write transaction: should satisfy (1), (2), (3) */
3120
3121 if (lock_table_get_n_locks(table)) {
3122 return false;
3123 }
3124
3125 if (trx->id && trx->id < table->query_cache_inv_trx_id) {
3126 return false;
3127 }
3128
3129 return !trx->read_view.is_open()
3130 || trx->read_view.low_limit_id()
3131 >= table->query_cache_inv_trx_id;
3132 }
3133
3134 /** Checks if MySQL at the moment is allowed for this table to retrieve a
3135 consistent read result, or store it to the query cache.
3136 @param[in,out] trx transaction
3137 @param[in] norm_name concatenation of database name,
3138 '/' char, table name
3139 @return whether storing or retrieving from the query cache is permitted */
innobase_query_caching_table_check(trx_t * trx,const char * norm_name)3140 static bool innobase_query_caching_table_check(
3141 trx_t* trx,
3142 const char* norm_name)
3143 {
3144 dict_table_t* table = dict_table_open_on_name(
3145 norm_name, FALSE, FALSE, DICT_ERR_IGNORE_FK_NOKEY);
3146
3147 if (table == NULL) {
3148 return false;
3149 }
3150
3151 /* Start the transaction if it is not started yet */
3152 trx_start_if_not_started(trx, false);
3153
3154 bool allow = innobase_query_caching_table_check_low(table, trx);
3155
3156 dict_table_close(table, FALSE, FALSE);
3157
3158 if (allow) {
3159 /* If the isolation level is high, assign a read view for the
3160 transaction if it does not yet have one */
3161
3162 if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
3163 && !srv_read_only_mode
3164 && !trx->read_view.is_open()) {
3165
3166 /* Start the transaction if it is not started yet */
3167 trx_start_if_not_started(trx, false);
3168
3169 trx->read_view.open(trx);
3170 }
3171 }
3172
3173 return allow;
3174 }
3175
3176 /******************************************************************//**
3177 The MySQL query cache uses this to check from InnoDB if the query cache at
3178 the moment is allowed to operate on an InnoDB table. The SQL query must
3179 be a non-locking SELECT.
3180
3181 The query cache is allowed to operate on certain query only if this function
3182 returns TRUE for all tables in the query.
3183
3184 If thd is not in the autocommit state, this function also starts a new
3185 transaction for thd if there is no active trx yet, and assigns a consistent
3186 read view to it if there is no read view yet.
3187
3188 Why a deadlock of threads is not possible: the query cache calls this function
3189 at the start of a SELECT processing. Then the calling thread cannot be
3190 holding any InnoDB semaphores. The calling thread is holding the
3191 query cache mutex, and this function will reserve the InnoDB trx_sys.mutex.
3192 Thus, the 'rank' in sync0mutex.h of the MySQL query cache mutex is above
3193 the InnoDB trx_sys.mutex.
3194 @return TRUE if permitted, FALSE if not; note that the value FALSE
3195 does not mean we should invalidate the query cache: invalidation is
3196 called explicitly */
3197 static
3198 my_bool
innobase_query_caching_of_table_permitted(THD * thd,const char * full_name,uint full_name_len,ulonglong *)3199 innobase_query_caching_of_table_permitted(
3200 /*======================================*/
3201 THD* thd, /*!< in: thd of the user who is trying to
3202 store a result to the query cache or
3203 retrieve it */
3204 const char* full_name, /*!< in: normalized path to the table */
3205 uint full_name_len, /*!< in: length of the normalized path
3206 to the table */
3207 ulonglong *)
3208 {
3209 char norm_name[1000];
3210 trx_t* trx = check_trx_exists(thd);
3211
3212 ut_a(full_name_len < 999);
3213
3214 if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
3215 /* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
3216 plain SELECT if AUTOCOMMIT is not on. */
3217
3218 return(false);
3219 }
3220
3221 innobase_srv_conc_force_exit_innodb(trx);
3222
3223 if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)
3224 && trx->n_mysql_tables_in_use == 0) {
3225 /* We are going to retrieve the query result from the query
3226 cache. This cannot be a store operation to the query cache
3227 because then MySQL would have locks on tables already.
3228
3229 TODO: if the user has used LOCK TABLES to lock the table,
3230 then we open a transaction in the call of row_.. below.
3231 That trx can stay open until UNLOCK TABLES. The same problem
3232 exists even if we do not use the query cache. MySQL should be
3233 modified so that it ALWAYS calls some cleanup function when
3234 the processing of a query ends!
3235
3236 We can imagine we instantaneously serialize this consistent
3237 read trx to the current trx id counter. If trx2 would have
3238 changed the tables of a query result stored in the cache, and
3239 trx2 would have already committed, making the result obsolete,
3240 then trx2 would have already invalidated the cache. Thus we
3241 can trust the result in the cache is ok for this query. */
3242
3243 return(true);
3244 }
3245
3246 /* Normalize the table name to InnoDB format */
3247 normalize_table_name(norm_name, full_name);
3248
3249 innobase_register_trx(innodb_hton_ptr, thd, trx);
3250
3251 return innobase_query_caching_table_check(trx, norm_name);
3252 }
3253
3254 /*****************************************************************//**
3255 Invalidates the MySQL query cache for the table. */
3256 void
innobase_invalidate_query_cache(trx_t * trx,const char * full_name)3257 innobase_invalidate_query_cache(
3258 /*============================*/
3259 trx_t* trx, /*!< in: transaction which
3260 modifies the table */
3261 const char* full_name) /*!< in: concatenation of
3262 database name, path separator,
3263 table name, null char NUL;
3264 NOTE that in Windows this is
3265 always in LOWER CASE! */
3266 {
3267 /* Note that the sync0mutex.h rank of the query cache mutex is just
3268 above the InnoDB trx_sys_t->lock. The caller of this function must
3269 not have latches of a lower rank. */
3270
3271 #ifdef HAVE_QUERY_CACHE
3272 char qcache_key_name[2 * (NAME_LEN + 1)];
3273 char db_name[NAME_CHAR_LEN * MY_CS_MBMAXLEN + 1];
3274 const char *key_ptr;
3275 size_t tabname_len;
3276
3277 // Extract the database name.
3278 key_ptr= strchr(full_name, '/');
3279 DBUG_ASSERT(key_ptr != NULL); // Database name should be present
3280 size_t dbname_len= size_t(key_ptr - full_name);
3281 memcpy(db_name, full_name, dbname_len);
3282 db_name[dbname_len]= '\0';
3283
3284 /* Construct the key("db-name\0table$name\0") for the query cache using
3285 the path name("db@002dname\0table@0024name\0") of the table in its
3286 canonical form. */
3287 dbname_len = filename_to_tablename(db_name, qcache_key_name,
3288 sizeof(qcache_key_name));
3289 tabname_len = filename_to_tablename(++key_ptr,
3290 (qcache_key_name + dbname_len + 1),
3291 sizeof(qcache_key_name) -
3292 dbname_len - 1);
3293
3294 /* Argument TRUE below means we are using transactions */
3295 mysql_query_cache_invalidate4(trx->mysql_thd,
3296 qcache_key_name,
3297 uint(dbname_len + tabname_len + 2),
3298 TRUE);
3299 #endif
3300 }
3301
3302 /** Quote a standard SQL identifier like index or column name.
3303 @param[in] file output stream
3304 @param[in] trx InnoDB transaction, or NULL
3305 @param[in] id identifier to quote */
3306 void
innobase_quote_identifier(FILE * file,trx_t * trx,const char * id)3307 innobase_quote_identifier(
3308 FILE* file,
3309 trx_t* trx,
3310 const char* id)
3311 {
3312 const int q = trx != NULL && trx->mysql_thd != NULL
3313 ? get_quote_char_for_identifier(trx->mysql_thd, id, strlen(id))
3314 : '`';
3315
3316 if (q == EOF) {
3317 fputs(id, file);
3318 } else {
3319 putc(q, file);
3320
3321 while (int c = *id++) {
3322 if (c == q) {
3323 putc(c, file);
3324 }
3325 putc(c, file);
3326 }
3327
3328 putc(q, file);
3329 }
3330 }
3331
3332 /** Quote a standard SQL identifier like tablespace, index or column name.
3333 @param[in] trx InnoDB transaction, or NULL
3334 @param[in] id identifier to quote
3335 @return quoted identifier */
3336 std::string
innobase_quote_identifier(trx_t * trx,const char * id)3337 innobase_quote_identifier(
3338 /*======================*/
3339 trx_t* trx,
3340 const char* id)
3341 {
3342 std::string quoted_identifier;
3343 const int q = trx != NULL && trx->mysql_thd != NULL
3344 ? get_quote_char_for_identifier(trx->mysql_thd, id, strlen(id))
3345 : '`';
3346
3347 if (q == EOF) {
3348 quoted_identifier.append(id);
3349 } else {
3350 quoted_identifier += char(q);
3351 quoted_identifier.append(id);
3352 quoted_identifier += char(q);
3353 }
3354
3355 return (quoted_identifier);
3356 }
3357
3358 /** Convert a table name to the MySQL system_charset_info (UTF-8)
3359 and quote it.
3360 @param[out] buf buffer for converted identifier
3361 @param[in] buflen length of buf, in bytes
3362 @param[in] id identifier to convert
3363 @param[in] idlen length of id, in bytes
3364 @param[in] thd MySQL connection thread, or NULL
3365 @return pointer to the end of buf */
3366 static
3367 char*
innobase_convert_identifier(char * buf,ulint buflen,const char * id,ulint idlen,THD * thd)3368 innobase_convert_identifier(
3369 char* buf,
3370 ulint buflen,
3371 const char* id,
3372 ulint idlen,
3373 THD* thd)
3374 {
3375 const char* s = id;
3376
3377 char nz[MAX_TABLE_NAME_LEN + 1];
3378 char nz2[MAX_TABLE_NAME_LEN + 1];
3379
3380 /* Decode the table name. The MySQL function expects
3381 a NUL-terminated string. The input and output strings
3382 buffers must not be shared. */
3383 ut_a(idlen <= MAX_TABLE_NAME_LEN);
3384 memcpy(nz, id, idlen);
3385 nz[idlen] = 0;
3386
3387 s = nz2;
3388 idlen = explain_filename(thd, nz, nz2, sizeof nz2,
3389 EXPLAIN_PARTITIONS_AS_COMMENT);
3390 if (idlen > buflen) {
3391 idlen = buflen;
3392 }
3393 memcpy(buf, s, idlen);
3394 return(buf + idlen);
3395 }
3396
3397 /*****************************************************************//**
3398 Convert a table name to the MySQL system_charset_info (UTF-8).
3399 @return pointer to the end of buf */
3400 char*
innobase_convert_name(char * buf,ulint buflen,const char * id,ulint idlen,THD * thd)3401 innobase_convert_name(
3402 /*==================*/
3403 char* buf, /*!< out: buffer for converted identifier */
3404 ulint buflen, /*!< in: length of buf, in bytes */
3405 const char* id, /*!< in: table name to convert */
3406 ulint idlen, /*!< in: length of id, in bytes */
3407 THD* thd) /*!< in: MySQL connection thread, or NULL */
3408 {
3409 char* s = buf;
3410 const char* bufend = buf + buflen;
3411
3412 const char* slash = (const char*) memchr(id, '/', idlen);
3413
3414 if (slash == NULL) {
3415 return(innobase_convert_identifier(
3416 buf, buflen, id, idlen, thd));
3417 }
3418
3419 /* Print the database name and table name separately. */
3420 s = innobase_convert_identifier(s, ulint(bufend - s),
3421 id, ulint(slash - id), thd);
3422 if (s < bufend) {
3423 *s++ = '.';
3424 s = innobase_convert_identifier(s, ulint(bufend - s),
3425 slash + 1, idlen
3426 - ulint(slash - id) - 1,
3427 thd);
3428 }
3429
3430 return(s);
3431 }
3432
3433 /*****************************************************************//**
3434 A wrapper function of innobase_convert_name(), convert a table name
3435 to the MySQL system_charset_info (UTF-8) and quote it if needed.
3436 @return pointer to the end of buf */
3437 void
innobase_format_name(char * buf,ulint buflen,const char * name)3438 innobase_format_name(
3439 /*==================*/
3440 char* buf, /*!< out: buffer for converted identifier */
3441 ulint buflen, /*!< in: length of buf, in bytes */
3442 const char* name) /*!< in: table name to format */
3443 {
3444 const char* bufend;
3445
3446 bufend = innobase_convert_name(buf, buflen, name, strlen(name), NULL);
3447
3448 ut_ad((ulint) (bufend - buf) < buflen);
3449
3450 buf[bufend - buf] = '\0';
3451 }
3452
3453 /**********************************************************************//**
3454 Determines if the currently running transaction has been interrupted.
3455 @return true if interrupted */
3456 bool
trx_is_interrupted(const trx_t * trx)3457 trx_is_interrupted(
3458 /*===============*/
3459 const trx_t* trx) /*!< in: transaction */
3460 {
3461 return(trx && trx->mysql_thd && thd_kill_level(trx->mysql_thd));
3462 }
3463
3464 /**************************************************************//**
3465 Resets some fields of a m_prebuilt struct. The template is used in fast
3466 retrieval of just those column values MySQL needs in its processing. */
3467 void
reset_template(void)3468 ha_innobase::reset_template(void)
3469 /*=============================*/
3470 {
3471 ut_ad(m_prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
3472 ut_ad(m_prebuilt->magic_n2 == m_prebuilt->magic_n);
3473
3474 /* Force table to be freed in close_thread_table(). */
3475 DBUG_EXECUTE_IF("free_table_in_fts_query",
3476 if (m_prebuilt->in_fts_query) {
3477 table->mark_table_for_reopen();
3478 }
3479 );
3480
3481 m_prebuilt->keep_other_fields_on_keyread = false;
3482 m_prebuilt->read_just_key = 0;
3483 m_prebuilt->in_fts_query = 0;
3484
3485 /* Reset index condition pushdown state. */
3486 if (m_prebuilt->idx_cond) {
3487 m_prebuilt->idx_cond = NULL;
3488 m_prebuilt->idx_cond_n_cols = 0;
3489 /* Invalidate m_prebuilt->mysql_template
3490 in ha_innobase::write_row(). */
3491 m_prebuilt->template_type = ROW_MYSQL_NO_TEMPLATE;
3492 }
3493 if (m_prebuilt->pk_filter) {
3494 m_prebuilt->pk_filter = NULL;
3495 m_prebuilt->template_type = ROW_MYSQL_NO_TEMPLATE;
3496 }
3497 }
3498
3499 /*****************************************************************//**
3500 Call this when you have opened a new table handle in HANDLER, before you
3501 call index_read_map() etc. Actually, we can let the cursor stay open even
3502 over a transaction commit! Then you should call this before every operation,
3503 fetch next etc. This function inits the necessary things even after a
3504 transaction commit. */
3505
3506 void
init_table_handle_for_HANDLER(void)3507 ha_innobase::init_table_handle_for_HANDLER(void)
3508 /*============================================*/
3509 {
3510 /* If current thd does not yet have a trx struct, create one.
3511 If the current handle does not yet have a m_prebuilt struct, create
3512 one. Update the trx pointers in the m_prebuilt struct. Normally
3513 this operation is done in external_lock. */
3514
3515 update_thd(ha_thd());
3516
3517 /* Initialize the m_prebuilt struct much like it would be inited in
3518 external_lock */
3519
3520 innobase_srv_conc_force_exit_innodb(m_prebuilt->trx);
3521
3522 /* If the transaction is not started yet, start it */
3523
3524 trx_start_if_not_started_xa(m_prebuilt->trx, false);
3525
3526 /* Assign a read view if the transaction does not have it yet */
3527
3528 m_prebuilt->trx->read_view.open(m_prebuilt->trx);
3529
3530 innobase_register_trx(ht, m_user_thd, m_prebuilt->trx);
3531
3532 /* We did the necessary inits in this function, no need to repeat them
3533 in row_search_for_mysql */
3534
3535 m_prebuilt->sql_stat_start = FALSE;
3536
3537 /* We let HANDLER always to do the reads as consistent reads, even
3538 if the trx isolation level would have been specified as SERIALIZABLE */
3539
3540 m_prebuilt->select_lock_type = LOCK_NONE;
3541 m_prebuilt->stored_select_lock_type = LOCK_NONE;
3542
3543 /* Always fetch all columns in the index record */
3544
3545 m_prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
3546
3547 /* We want always to fetch all columns in the whole row? Or do
3548 we???? */
3549
3550 m_prebuilt->used_in_HANDLER = TRUE;
3551
3552 reset_template();
3553 }
3554
3555 /*********************************************************************//**
3556 Free any resources that were allocated and return failure.
3557 @return always return 1 */
innodb_init_abort()3558 static int innodb_init_abort()
3559 {
3560 DBUG_ENTER("innodb_init_abort");
3561
3562 if (fil_system.temp_space) {
3563 fil_system.temp_space->close();
3564 }
3565
3566 srv_sys_space.shutdown();
3567 if (srv_tmp_space.get_sanity_check_status()) {
3568 srv_tmp_space.delete_files();
3569 }
3570 srv_tmp_space.shutdown();
3571
3572 #ifdef WITH_INNODB_DISALLOW_WRITES
3573 os_event_destroy(srv_allow_writes_event);
3574 #endif /* WITH_INNODB_DISALLOW_WRITES */
3575 DBUG_RETURN(1);
3576 }
3577
3578 /** Return the minimum buffer pool size based on page size */
min_buffer_pool_size()3579 static inline ulint min_buffer_pool_size()
3580 {
3581 ulint s= (BUF_LRU_MIN_LEN + BUF_LRU_MIN_LEN / 4) * srv_page_size;
3582 /* buf_pool_chunk_size minimum is 1M, so round up to a multiple */
3583 ulint alignment= 1U << 20;
3584 return UT_CALC_ALIGN(s, alignment);
3585 }
3586
3587 /** Validate the requested buffer pool size. Also, reserve the necessary
3588 memory needed for buffer pool resize.
3589 @param[in] thd thread handle
3590 @param[in] var pointer to system variable
3591 @param[out] save immediate result for update function
3592 @param[in] value incoming string
3593 @return 0 on success, 1 on failure.
3594 */
3595 static
3596 int
3597 innodb_buffer_pool_size_validate(
3598 THD* thd,
3599 struct st_mysql_sys_var* var,
3600 void* save,
3601 struct st_mysql_value* value);
3602
3603 /** Update the system variable innodb_buffer_pool_size using the "saved"
3604 value. This function is registered as a callback with MySQL.
3605 @param[in] thd thread handle
3606 @param[in] var pointer to system variable
3607 @param[out] var_ptr where the formal string goes
3608 @param[in] save immediate result from check function */
3609 static
3610 void
3611 innodb_buffer_pool_size_update(
3612 THD* thd,
3613 struct st_mysql_sys_var* var,
3614 void* var_ptr,
3615 const void* save);
3616
3617 /* If the default value of innodb_buffer_pool_size is increased to be more than
3618 BUF_POOL_SIZE_THRESHOLD (srv/srv0start.cc), then srv_buf_pool_instances_default
3619 can be removed and 8 used instead. The problem with the current setup is that
3620 with 128MiB default buffer pool size and 8 instances by default we would emit
3621 a warning when no options are specified. */
3622 static MYSQL_SYSVAR_ULONGLONG(buffer_pool_size, innobase_buffer_pool_size,
3623 PLUGIN_VAR_RQCMDARG,
3624 "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
3625 innodb_buffer_pool_size_validate,
3626 innodb_buffer_pool_size_update,
3627 128ULL << 20,
3628 2ULL << 20,
3629 LLONG_MAX, 1024*1024L);
3630
3631 /** Deprecation message about innodb_idle_flush_pct */
3632 static const char* deprecated_idle_flush_pct
3633 = "innodb_idle_flush_pct is DEPRECATED and has no effect.";
3634
3635 static const char* deprecated_innodb_checksum_algorithm
3636 = "Setting innodb_checksum_algorithm to values other than"
3637 " crc32, full_crc32, strict_crc32 or strict_full_crc32"
3638 " is UNSAFE and DEPRECATED."
3639 " These deprecated values will be disallowed in MariaDB 10.6.";
3640
3641 static ulong innodb_idle_flush_pct;
3642
3643 /** If applicable, emit a message that log checksums cannot be disabled.
3644 @param[in,out] thd client session, or NULL if at startup
3645 @param[in] check whether redo log block checksums are enabled
3646 @return whether redo log block checksums are enabled */
3647 static inline
3648 bool
innodb_log_checksums_func_update(THD * thd,bool check)3649 innodb_log_checksums_func_update(THD* thd, bool check)
3650 {
3651 static const char msg[] = "innodb_log_checksums is deprecated"
3652 " and has no effect outside recovery";
3653
3654 ut_ad(!thd == !srv_was_started);
3655
3656 if (!check) {
3657 if (thd) {
3658 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
3659 HA_ERR_UNSUPPORTED, msg);
3660 check = true;
3661 } else {
3662 sql_print_warning(msg);
3663 }
3664 }
3665
3666 return(check);
3667 }
3668
innodb_checksum_algorithm_update(THD * thd,st_mysql_sys_var *,void *,const void * save)3669 static void innodb_checksum_algorithm_update(THD *thd, st_mysql_sys_var*,
3670 void *, const void *save)
3671 {
3672 srv_checksum_algorithm= *static_cast<const ulong*>(save);
3673 switch (srv_checksum_algorithm) {
3674 case SRV_CHECKSUM_ALGORITHM_CRC32:
3675 case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
3676 case SRV_CHECKSUM_ALGORITHM_FULL_CRC32:
3677 case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
3678 break;
3679 default:
3680 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
3681 HA_ERR_UNSUPPORTED,
3682 deprecated_innodb_checksum_algorithm);
3683 }
3684 }
3685
3686 /****************************************************************//**
3687 Gives the file extension of an InnoDB single-table tablespace. */
3688 static const char* ha_innobase_exts[] = {
3689 dot_ext[IBD],
3690 dot_ext[ISL],
3691 NullS
3692 };
3693
3694 /** Determine if system-versioned data was modified by the transaction.
3695 @param[in,out] thd current session
3696 @param[out] trx_id transaction start ID
3697 @return transaction commit ID
3698 @retval 0 if no system-versioned data was affected by the transaction */
innodb_prepare_commit_versioned(THD * thd,ulonglong * trx_id)3699 static ulonglong innodb_prepare_commit_versioned(THD* thd, ulonglong *trx_id)
3700 {
3701 if (const trx_t* trx = thd_to_trx(thd)) {
3702 *trx_id = trx->id;
3703
3704 for (trx_mod_tables_t::const_iterator t
3705 = trx->mod_tables.begin();
3706 t != trx->mod_tables.end(); t++) {
3707 if (t->second.is_versioned()) {
3708 DBUG_ASSERT(t->first->versioned_by_id());
3709 DBUG_ASSERT(trx->rsegs.m_redo.rseg);
3710
3711 return trx_sys.get_new_trx_id();
3712 }
3713 }
3714
3715 return 0;
3716 }
3717
3718 *trx_id = 0;
3719 return 0;
3720 }
3721
3722 /** Initialize and normalize innodb_buffer_pool_size. */
innodb_buffer_pool_size_init()3723 static void innodb_buffer_pool_size_init()
3724 {
3725 if (srv_buf_pool_size >= BUF_POOL_SIZE_THRESHOLD) {
3726
3727 if (srv_buf_pool_instances == srv_buf_pool_instances_default) {
3728 #if defined(_WIN32) && !defined(_WIN64)
3729 /* Do not allocate too large of a buffer pool on
3730 Windows 32-bit systems, which can have trouble
3731 allocating larger single contiguous memory blocks. */
3732 srv_buf_pool_size = ulint(
3733 ut_uint64_align_up(srv_buf_pool_size,
3734 srv_buf_pool_chunk_unit));
3735 srv_buf_pool_instances = std::min<ulong>(
3736 MAX_BUFFER_POOLS,
3737 ulong(srv_buf_pool_size
3738 / srv_buf_pool_chunk_unit));
3739 #else /* defined(_WIN32) && !defined(_WIN64) */
3740 /* Default to 8 instances when size > 1GB. */
3741 srv_buf_pool_instances = 8;
3742 #endif /* defined(_WIN32) && !defined(_WIN64) */
3743 }
3744 } else {
3745 /* If buffer pool is less than 1 GiB, assume fewer
3746 threads. Also use only one buffer pool instance. */
3747 if (srv_buf_pool_instances != srv_buf_pool_instances_default
3748 && srv_buf_pool_instances != 1) {
3749 /* We can't distinguish whether the user has explicitly
3750 started mysqld with --innodb-buffer-pool-instances=0,
3751 (srv_buf_pool_instances_default is 0) or has not
3752 specified that option at all. Thus we have the
3753 limitation that if the user started with =0, we
3754 will not emit a warning here, but we should actually
3755 do so. */
3756 ib::info()
3757 << "Adjusting innodb_buffer_pool_instances"
3758 " from " << srv_buf_pool_instances << " to 1"
3759 " since innodb_buffer_pool_size is less than "
3760 << BUF_POOL_SIZE_THRESHOLD / (1024 * 1024)
3761 << " MiB";
3762 }
3763
3764 srv_buf_pool_instances = 1;
3765 }
3766
3767 if (srv_buf_pool_chunk_unit * srv_buf_pool_instances
3768 > srv_buf_pool_size) {
3769 /* Size unit of buffer pool is larger than srv_buf_pool_size.
3770 adjust srv_buf_pool_chunk_unit for srv_buf_pool_size. */
3771 srv_buf_pool_chunk_unit
3772 = static_cast<ulong>(srv_buf_pool_size)
3773 / srv_buf_pool_instances;
3774 if (srv_buf_pool_size % srv_buf_pool_instances != 0) {
3775 ++srv_buf_pool_chunk_unit;
3776 }
3777 }
3778
3779 srv_buf_pool_size = buf_pool_size_align(srv_buf_pool_size);
3780 innobase_buffer_pool_size = srv_buf_pool_size;
3781 }
3782
3783 /** Initialize, validate and normalize the InnoDB startup parameters.
3784 @return failure code
3785 @retval 0 on success
3786 @retval HA_ERR_OUT_OF_MEM when out of memory
3787 @retval HA_ERR_INITIALIZATION when some parameters are out of range */
innodb_init_params()3788 static int innodb_init_params()
3789 {
3790 DBUG_ENTER("innodb_init_params");
3791
3792 static char current_dir[3];
3793 char *default_path;
3794 ulong num_pll_degree;
3795
3796 if (innodb_large_prefix || innodb_file_format) {
3797 const char* p = innodb_file_format
3798 ? "file_format"
3799 : "large_prefix";
3800 sql_print_warning("The parameter innodb_%s is deprecated"
3801 " and has no effect."
3802 " It may be removed in future releases."
3803 " See https://mariadb.com/kb/en/library/"
3804 "xtradbinnodb-file-format/", p);
3805 }
3806
3807 /* Check that values don't overflow on 32-bit systems. */
3808 if (sizeof(ulint) == 4) {
3809 if (innobase_buffer_pool_size > UINT_MAX32) {
3810 sql_print_error(
3811 "innodb_buffer_pool_size can't be over 4GB"
3812 " on 32-bit systems");
3813 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
3814 }
3815 }
3816
3817 /* The buffer pool needs to be able to accommodate enough many
3818 pages, even for larger pages */
3819 MYSQL_SYSVAR_NAME(buffer_pool_size).min_val= min_buffer_pool_size();
3820
3821 if (innobase_buffer_pool_size < MYSQL_SYSVAR_NAME(buffer_pool_size).min_val) {
3822 ib::error() << "innodb_page_size="
3823 << srv_page_size << " requires "
3824 << "innodb_buffer_pool_size >= "
3825 << (MYSQL_SYSVAR_NAME(buffer_pool_size).min_val >> 20)
3826 << "MiB current " << (innobase_buffer_pool_size >> 20)
3827 << "MiB";
3828 DBUG_RETURN(HA_ERR_INITIALIZATION);
3829 }
3830
3831 if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS) {
3832 ib::warn() << "The parameter innodb_lock_schedule_algorithm"
3833 " is deprecated, and the setting"
3834 " innodb_lock_schedule_algorithm=vats"
3835 " may cause corruption. The parameter may be removed"
3836 " in future releases.";
3837
3838 #ifdef WITH_WSREP
3839 /* Currently, Galera does not support VATS lock schedule algorithm. */
3840 if (global_system_variables.wsrep_on) {
3841 ib::info() << "For Galera, using innodb_lock_schedule_algorithm=fcfs";
3842 innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;
3843 }
3844 #endif /* WITH_WSREP */
3845 }
3846
3847 #ifdef WITH_WSREP
3848 /* Print deprecation info if xtrabackup is used for SST method */
3849 if (global_system_variables.wsrep_on
3850 && wsrep_sst_method
3851 && (!strcmp(wsrep_sst_method, "xtrabackup")
3852 || !strcmp(wsrep_sst_method, "xtrabackup-v2"))) {
3853 ib::info() << "Galera SST method xtrabackup is deprecated and the "
3854 " support for it may be removed in future releases.";
3855 }
3856 #endif /* WITH_WSREP */
3857
3858 #ifndef HAVE_LZ4
3859 if (innodb_compression_algorithm == PAGE_LZ4_ALGORITHM) {
3860 sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3861 "InnoDB: liblz4 is not installed. \n",
3862 innodb_compression_algorithm);
3863 DBUG_RETURN(HA_ERR_INITIALIZATION);
3864 }
3865 #endif
3866
3867 #ifndef HAVE_LZO
3868 if (innodb_compression_algorithm == PAGE_LZO_ALGORITHM) {
3869 sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3870 "InnoDB: liblzo is not installed. \n",
3871 innodb_compression_algorithm);
3872 DBUG_RETURN(HA_ERR_INITIALIZATION);
3873 }
3874 #endif
3875
3876 #ifndef HAVE_LZMA
3877 if (innodb_compression_algorithm == PAGE_LZMA_ALGORITHM) {
3878 sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3879 "InnoDB: liblzma is not installed. \n",
3880 innodb_compression_algorithm);
3881 DBUG_RETURN(HA_ERR_INITIALIZATION);
3882 }
3883 #endif
3884
3885 #ifndef HAVE_BZIP2
3886 if (innodb_compression_algorithm == PAGE_BZIP2_ALGORITHM) {
3887 sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3888 "InnoDB: libbz2 is not installed. \n",
3889 innodb_compression_algorithm);
3890 DBUG_RETURN(HA_ERR_INITIALIZATION);
3891 }
3892 #endif
3893
3894 #ifndef HAVE_SNAPPY
3895 if (innodb_compression_algorithm == PAGE_SNAPPY_ALGORITHM) {
3896 sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3897 "InnoDB: libsnappy is not installed. \n",
3898 innodb_compression_algorithm);
3899 DBUG_RETURN(HA_ERR_INITIALIZATION);
3900 }
3901 #endif
3902
3903 if ((srv_encrypt_tables || srv_encrypt_log
3904 || innodb_encrypt_temporary_tables)
3905 && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) {
3906 sql_print_error("InnoDB: cannot enable encryption, "
3907 "encryption plugin is not available");
3908 DBUG_RETURN(HA_ERR_INITIALIZATION);
3909 }
3910
3911 #ifdef _WIN32
3912 if (!is_filename_allowed(srv_buf_dump_filename,
3913 strlen(srv_buf_dump_filename), FALSE)) {
3914 sql_print_error("InnoDB: innodb_buffer_pool_filename"
3915 " cannot have colon (:) in the file name.");
3916 DBUG_RETURN(HA_ERR_INITIALIZATION);
3917 }
3918 #endif
3919
3920 /* First calculate the default path for innodb_data_home_dir etc.,
3921 in case the user has not given any value.
3922
3923 Note that when using the embedded server, the datadirectory is not
3924 necessarily the current directory of this program. */
3925
3926 if (mysqld_embedded) {
3927 default_path = mysql_real_data_home;
3928 } else {
3929 /* It's better to use current lib, to keep paths short */
3930 current_dir[0] = FN_CURLIB;
3931 current_dir[1] = FN_LIBCHAR;
3932 current_dir[2] = 0;
3933 default_path = current_dir;
3934 }
3935
3936 ut_a(default_path);
3937
3938 fil_path_to_mysql_datadir = default_path;
3939
3940 /* Set InnoDB initialization parameters according to the values
3941 read from MySQL .cnf file */
3942
3943 /* The default dir for data files is the datadir of MySQL */
3944
3945 srv_data_home = innobase_data_home_dir
3946 ? innobase_data_home_dir : default_path;
3947 #ifdef WITH_WSREP
3948 /* If we use the wsrep API, then we need to tell the server
3949 the path to the data files (for passing it to the SST scripts): */
3950 wsrep_set_data_home_dir(srv_data_home);
3951 #endif /* WITH_WSREP */
3952
3953
3954 /*--------------- Shared tablespaces -------------------------*/
3955
3956 /* Check that the value of system variable innodb_page_size was
3957 set correctly. Its value was put into srv_page_size. If valid,
3958 return the associated srv_page_size_shift. */
3959 srv_page_size_shift = innodb_page_size_validate(srv_page_size);
3960 if (!srv_page_size_shift) {
3961 sql_print_error("InnoDB: Invalid page size=%lu.\n",
3962 srv_page_size);
3963 DBUG_RETURN(HA_ERR_INITIALIZATION);
3964 }
3965
3966 srv_sys_space.set_space_id(TRX_SYS_SPACE);
3967
3968 switch (srv_checksum_algorithm) {
3969 case SRV_CHECKSUM_ALGORITHM_FULL_CRC32:
3970 case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
3971 srv_sys_space.set_flags(FSP_FLAGS_FCRC32_MASK_MARKER
3972 | FSP_FLAGS_FCRC32_PAGE_SSIZE());
3973 break;
3974 default:
3975 srv_sys_space.set_flags(FSP_FLAGS_PAGE_SSIZE());
3976 }
3977
3978 srv_sys_space.set_name("innodb_system");
3979 srv_sys_space.set_path(srv_data_home);
3980
3981 /* Supports raw devices */
3982 if (!srv_sys_space.parse_params(innobase_data_file_path, true)) {
3983 ib::error() << "Unable to parse innodb_data_file_path="
3984 << innobase_data_file_path;
3985 DBUG_RETURN(HA_ERR_INITIALIZATION);
3986 }
3987
3988 srv_tmp_space.set_name("innodb_temporary");
3989 srv_tmp_space.set_path(srv_data_home);
3990
3991 /* Temporary tablespace is in full crc32 format. */
3992 srv_tmp_space.set_flags(FSP_FLAGS_FCRC32_MASK_MARKER
3993 | FSP_FLAGS_FCRC32_PAGE_SSIZE());
3994
3995 if (!srv_tmp_space.parse_params(innobase_temp_data_file_path, false)) {
3996 ib::error() << "Unable to parse innodb_temp_data_file_path="
3997 << innobase_temp_data_file_path;
3998 DBUG_RETURN(HA_ERR_INITIALIZATION);
3999 }
4000
4001 /* Perform all sanity check before we take action of deleting files*/
4002 if (srv_sys_space.intersection(&srv_tmp_space)) {
4003 sql_print_error("%s and %s file names seem to be the same.",
4004 srv_tmp_space.name(), srv_sys_space.name());
4005 DBUG_RETURN(HA_ERR_INITIALIZATION);
4006 }
4007
4008 srv_sys_space.normalize_size();
4009 srv_tmp_space.normalize_size();
4010
4011 /* ------------ UNDO tablespaces files ---------------------*/
4012 if (!srv_undo_dir) {
4013 srv_undo_dir = default_path;
4014 }
4015
4016 os_normalize_path(srv_undo_dir);
4017
4018 if (strchr(srv_undo_dir, ';')) {
4019 sql_print_error("syntax error in innodb_undo_directory");
4020 DBUG_RETURN(HA_ERR_INITIALIZATION);
4021 }
4022
4023 /* -------------- All log files ---------------------------*/
4024
4025 /* The default dir for log files is the datadir of MySQL */
4026
4027 if (!srv_log_group_home_dir) {
4028 srv_log_group_home_dir = default_path;
4029 }
4030
4031 os_normalize_path(srv_log_group_home_dir);
4032
4033 if (strchr(srv_log_group_home_dir, ';')) {
4034 sql_print_error("syntax error in innodb_log_group_home_dir");
4035 DBUG_RETURN(HA_ERR_INITIALIZATION);
4036 }
4037
4038 if (srv_n_log_files * srv_log_file_size >= log_group_max_size) {
4039 /* Log group size is limited by the size of page number.
4040 Remove this limitation when fil_io() is not used for
4041 recovery log io. */
4042 ib::error() << "Combined size of log files must be < "
4043 << log_group_max_size;
4044 DBUG_RETURN(HA_ERR_INITIALIZATION);
4045 }
4046
4047 DBUG_ASSERT(innodb_change_buffering <= IBUF_USE_ALL);
4048
4049 /* Check that interdependent parameters have sane values. */
4050 if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) {
4051 sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm"
4052 " cannot be set higher than"
4053 " innodb_max_dirty_pages_pct.\n"
4054 "InnoDB: Setting"
4055 " innodb_max_dirty_pages_pct_lwm to %lf\n",
4056 srv_max_buf_pool_modified_pct);
4057
4058 srv_max_dirty_pages_pct_lwm = srv_max_buf_pool_modified_pct;
4059 }
4060
4061 if (srv_max_io_capacity == SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT) {
4062
4063 if (srv_io_capacity >= SRV_MAX_IO_CAPACITY_LIMIT / 2) {
4064 /* Avoid overflow. */
4065 srv_max_io_capacity = SRV_MAX_IO_CAPACITY_LIMIT;
4066 } else {
4067 /* The user has not set the value. We should
4068 set it based on innodb_io_capacity. */
4069 srv_max_io_capacity =
4070 ut_max(2 * srv_io_capacity, 2000UL);
4071 }
4072
4073 } else if (srv_max_io_capacity < srv_io_capacity) {
4074 sql_print_warning("InnoDB: innodb_io_capacity"
4075 " cannot be set higher than"
4076 " innodb_io_capacity_max."
4077 "Setting innodb_io_capacity=%lu",
4078 srv_max_io_capacity);
4079
4080 srv_io_capacity = srv_max_io_capacity;
4081 }
4082
4083 if (UNIV_PAGE_SIZE_DEF != srv_page_size) {
4084 ib::info() << "innodb_page_size=" << srv_page_size;
4085
4086 srv_max_undo_log_size = std::max(
4087 srv_max_undo_log_size,
4088 ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
4089 << srv_page_size_shift);
4090 }
4091
4092 if (srv_log_write_ahead_size > srv_page_size) {
4093 srv_log_write_ahead_size = srv_page_size;
4094 } else {
4095 ulong srv_log_write_ahead_size_tmp = OS_FILE_LOG_BLOCK_SIZE;
4096
4097 while (srv_log_write_ahead_size_tmp
4098 < srv_log_write_ahead_size) {
4099 srv_log_write_ahead_size_tmp
4100 = srv_log_write_ahead_size_tmp * 2;
4101 }
4102 if (srv_log_write_ahead_size_tmp
4103 != srv_log_write_ahead_size) {
4104 srv_log_write_ahead_size
4105 = srv_log_write_ahead_size_tmp / 2;
4106 }
4107 }
4108
4109 srv_buf_pool_size = ulint(innobase_buffer_pool_size);
4110
4111 if (!innobase_use_checksums) {
4112 ib::warn() << "Setting innodb_checksums to OFF is DEPRECATED."
4113 " This option was removed in MariaDB 10.5.";
4114 srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_NONE;
4115 } else {
4116 switch (srv_checksum_algorithm) {
4117 case SRV_CHECKSUM_ALGORITHM_CRC32:
4118 case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
4119 case SRV_CHECKSUM_ALGORITHM_FULL_CRC32:
4120 case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
4121 break;
4122 default:
4123 ib::warn() << deprecated_innodb_checksum_algorithm;
4124 }
4125 }
4126
4127 innodb_log_checksums = innodb_log_checksums_func_update(
4128 NULL, innodb_log_checksums);
4129
4130 row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout;
4131
4132 srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
4133 if (innobase_locks_unsafe_for_binlog) {
4134 ib::warn() << "Using innodb_locks_unsafe_for_binlog is"
4135 " DEPRECATED. This option may be removed in future"
4136 " releases. Please use READ COMMITTED transaction"
4137 " isolation level instead; " << SET_TRANSACTION_MSG;
4138 }
4139
4140 if (innobase_open_files < 10) {
4141 innobase_open_files = 300;
4142 if (srv_file_per_table && tc_size > 300 && tc_size < open_files_limit) {
4143 innobase_open_files = tc_size;
4144 }
4145 }
4146
4147 if (innobase_open_files > open_files_limit) {
4148 ib::warn() << "innodb_open_files " << innobase_open_files
4149 << " should not be greater"
4150 << " than the open_files_limit " << open_files_limit;
4151 if (innobase_open_files > tc_size) {
4152 innobase_open_files = tc_size;
4153 }
4154 }
4155
4156 srv_max_n_open_files = innobase_open_files;
4157 srv_innodb_status = (ibool) innobase_create_status_file;
4158
4159 srv_print_verbose_log = mysqld_embedded ? 0 : 1;
4160
4161 /* Round up fts_sort_pll_degree to nearest power of 2 number */
4162 for (num_pll_degree = 1;
4163 num_pll_degree < fts_sort_pll_degree;
4164 num_pll_degree <<= 1) {
4165
4166 /* No op */
4167 }
4168
4169 fts_sort_pll_degree = num_pll_degree;
4170
4171 /* Store the default charset-collation number of this MySQL
4172 installation */
4173
4174 data_mysql_default_charset_coll = (ulint) default_charset_info->number;
4175
4176 innobase_commit_concurrency_init_default();
4177
4178 if (innodb_idle_flush_pct != 100) {
4179 ib::warn() << deprecated_idle_flush_pct;
4180 }
4181
4182 #ifndef _WIN32
4183 if (srv_use_atomic_writes && my_may_have_atomic_write) {
4184 /*
4185 Force O_DIRECT on Unixes (on Windows writes are always
4186 unbuffered)
4187 */
4188 switch (innodb_flush_method) {
4189 case SRV_O_DIRECT:
4190 case SRV_O_DIRECT_NO_FSYNC:
4191 break;
4192 default:
4193 innodb_flush_method = SRV_O_DIRECT;
4194 fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n");
4195 }
4196 }
4197 #endif
4198
4199 if (srv_read_only_mode) {
4200 ib::info() << "Started in read only mode";
4201 srv_use_doublewrite_buf = FALSE;
4202 }
4203
4204 #ifdef LINUX_NATIVE_AIO
4205 if (srv_use_native_aio) {
4206 ib::info() << "Using Linux native AIO";
4207 }
4208 #elif !defined _WIN32
4209 /* Currently native AIO is supported only on windows and linux
4210 and that also when the support is compiled in. In all other
4211 cases, we ignore the setting of innodb_use_native_aio. */
4212 srv_use_native_aio = FALSE;
4213 #endif
4214
4215 #ifndef _WIN32
4216 ut_ad(innodb_flush_method <= SRV_O_DIRECT_NO_FSYNC);
4217 #else
4218 switch (innodb_flush_method) {
4219 case SRV_ALL_O_DIRECT_FSYNC + 1 /* "async_unbuffered"="unbuffered" */:
4220 innodb_flush_method = SRV_ALL_O_DIRECT_FSYNC;
4221 break;
4222 case SRV_ALL_O_DIRECT_FSYNC + 2 /* "normal"="fsync" */:
4223 innodb_flush_method = SRV_FSYNC;
4224 break;
4225 default:
4226 ut_ad(innodb_flush_method <= SRV_ALL_O_DIRECT_FSYNC);
4227 }
4228 #endif
4229 srv_file_flush_method = srv_flush_t(innodb_flush_method);
4230
4231 innodb_buffer_pool_size_init();
4232
4233 if (srv_n_page_cleaners > srv_buf_pool_instances) {
4234 /* limit of page_cleaner parallelizability
4235 is number of buffer pool instances. */
4236 srv_n_page_cleaners = srv_buf_pool_instances;
4237 }
4238
4239 srv_lock_table_size = 5 * (srv_buf_pool_size >> srv_page_size_shift);
4240 DBUG_RETURN(0);
4241 }
4242
4243 /** Initialize the InnoDB storage engine plugin.
4244 @param[in,out] p InnoDB handlerton
4245 @return error code
4246 @retval 0 on success */
innodb_init(void * p)4247 static int innodb_init(void* p)
4248 {
4249 DBUG_ENTER("innodb_init");
4250 handlerton* innobase_hton= static_cast<handlerton*>(p);
4251 innodb_hton_ptr = innobase_hton;
4252
4253 innobase_hton->state = SHOW_OPTION_YES;
4254 innobase_hton->db_type = DB_TYPE_INNODB;
4255 innobase_hton->savepoint_offset = sizeof(trx_named_savept_t);
4256 innobase_hton->close_connection = innobase_close_connection;
4257 innobase_hton->kill_query = innobase_kill_query;
4258 innobase_hton->savepoint_set = innobase_savepoint;
4259 innobase_hton->savepoint_rollback = innobase_rollback_to_savepoint;
4260
4261 innobase_hton->savepoint_rollback_can_release_mdl =
4262 innobase_rollback_to_savepoint_can_release_mdl;
4263
4264 innobase_hton->savepoint_release = innobase_release_savepoint;
4265 innobase_hton->prepare_ordered= NULL;
4266 innobase_hton->commit_ordered= innobase_commit_ordered;
4267 innobase_hton->commit = innobase_commit;
4268 innobase_hton->rollback = innobase_rollback;
4269 innobase_hton->prepare = innobase_xa_prepare;
4270 innobase_hton->recover = innobase_xa_recover;
4271 innobase_hton->commit_by_xid = innobase_commit_by_xid;
4272 innobase_hton->rollback_by_xid = innobase_rollback_by_xid;
4273 innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
4274 innobase_hton->create = innobase_create_handler;
4275
4276 innobase_hton->drop_database = innobase_drop_database;
4277 innobase_hton->panic = innobase_end;
4278
4279 innobase_hton->start_consistent_snapshot =
4280 innobase_start_trx_and_assign_read_view;
4281
4282 innobase_hton->flush_logs = innobase_flush_logs;
4283 innobase_hton->show_status = innobase_show_status;
4284 innobase_hton->flags =
4285 HTON_SUPPORTS_EXTENDED_KEYS | HTON_SUPPORTS_FOREIGN_KEYS |
4286 HTON_NATIVE_SYS_VERSIONING |
4287 HTON_WSREP_REPLICATION |
4288 HTON_REQUIRES_CLOSE_AFTER_TRUNCATE;
4289
4290 #ifdef WITH_WSREP
4291 innobase_hton->abort_transaction=wsrep_abort_transaction;
4292 innobase_hton->set_checkpoint=innobase_wsrep_set_checkpoint;
4293 innobase_hton->get_checkpoint=innobase_wsrep_get_checkpoint;
4294 #endif /* WITH_WSREP */
4295
4296 innobase_hton->tablefile_extensions = ha_innobase_exts;
4297 innobase_hton->table_options = innodb_table_option_list;
4298
4299 /* System Versioning */
4300 innobase_hton->prepare_commit_versioned
4301 = innodb_prepare_commit_versioned;
4302
4303 innodb_remember_check_sysvar_funcs();
4304
4305 compile_time_assert(DATA_MYSQL_TRUE_VARCHAR == MYSQL_TYPE_VARCHAR);
4306
4307 #ifndef DBUG_OFF
4308 static const char test_filename[] = "-@";
4309 char test_tablename[sizeof test_filename
4310 + sizeof(srv_mysql50_table_name_prefix) - 1];
4311 DBUG_ASSERT(sizeof test_tablename - 1
4312 == filename_to_tablename(test_filename,
4313 test_tablename,
4314 sizeof test_tablename, true));
4315 DBUG_ASSERT(!strncmp(test_tablename,
4316 srv_mysql50_table_name_prefix,
4317 sizeof srv_mysql50_table_name_prefix - 1));
4318 DBUG_ASSERT(!strcmp(test_tablename
4319 + sizeof srv_mysql50_table_name_prefix - 1,
4320 test_filename));
4321 #endif /* DBUG_OFF */
4322
4323 os_file_set_umask(my_umask);
4324
4325 /* Setup the memory alloc/free tracing mechanisms before calling
4326 any functions that could possibly allocate memory. */
4327 ut_new_boot();
4328
4329 if (int error = innodb_init_params()) {
4330 DBUG_RETURN(error);
4331 }
4332
4333 /* After this point, error handling has to use
4334 innodb_init_abort(). */
4335
4336 #ifdef HAVE_PSI_INTERFACE
4337 /* Register keys with MySQL performance schema */
4338 int count;
4339
4340 count = array_elements(all_pthread_mutexes);
4341 mysql_mutex_register("innodb", all_pthread_mutexes, count);
4342
4343 # ifdef UNIV_PFS_MUTEX
4344 count = array_elements(all_innodb_mutexes);
4345 mysql_mutex_register("innodb", all_innodb_mutexes, count);
4346 # endif /* UNIV_PFS_MUTEX */
4347
4348 # ifdef UNIV_PFS_RWLOCK
4349 count = array_elements(all_innodb_rwlocks);
4350 mysql_rwlock_register("innodb", all_innodb_rwlocks, count);
4351 # endif /* UNIV_PFS_MUTEX */
4352
4353 # ifdef UNIV_PFS_THREAD
4354 count = array_elements(all_innodb_threads);
4355 mysql_thread_register("innodb", all_innodb_threads, count);
4356 # endif /* UNIV_PFS_THREAD */
4357
4358 # ifdef UNIV_PFS_IO
4359 count = array_elements(all_innodb_files);
4360 mysql_file_register("innodb", all_innodb_files, count);
4361 # endif /* UNIV_PFS_IO */
4362
4363 count = array_elements(all_innodb_conds);
4364 mysql_cond_register("innodb", all_innodb_conds, count);
4365 #endif /* HAVE_PSI_INTERFACE */
4366
4367 bool create_new_db = false;
4368
4369 /* Check whether the data files exist. */
4370 dberr_t err = srv_sys_space.check_file_spec(&create_new_db, 5U << 20);
4371
4372 if (err != DB_SUCCESS) {
4373 DBUG_RETURN(innodb_init_abort());
4374 }
4375
4376 err = srv_start(create_new_db);
4377
4378 if (err != DB_SUCCESS) {
4379 innodb_shutdown();
4380 DBUG_RETURN(innodb_init_abort());
4381 } else if (!srv_read_only_mode) {
4382 mysql_thread_create(thd_destructor_thread_key,
4383 &thd_destructor_thread,
4384 NULL, thd_destructor_proxy, NULL);
4385 while (!srv_running.load(std::memory_order_relaxed))
4386 os_thread_sleep(20);
4387 }
4388
4389 srv_was_started = true;
4390 innodb_params_adjust();
4391
4392 innobase_old_blocks_pct = static_cast<uint>(
4393 buf_LRU_old_ratio_update(innobase_old_blocks_pct, TRUE));
4394
4395 ibuf_max_size_update(srv_change_buffer_max_size);
4396
4397 mysql_mutex_init(commit_cond_mutex_key,
4398 &commit_cond_m, MY_MUTEX_INIT_FAST);
4399 mysql_cond_init(commit_cond_key, &commit_cond, 0);
4400 mysql_mutex_init(pending_checkpoint_mutex_key,
4401 &pending_checkpoint_mutex,
4402 MY_MUTEX_INIT_FAST);
4403 #ifdef MYSQL_DYNAMIC_PLUGIN
4404 if (innobase_hton != p) {
4405 innobase_hton = reinterpret_cast<handlerton*>(p);
4406 *innobase_hton = *innodb_hton_ptr;
4407 }
4408 #endif /* MYSQL_DYNAMIC_PLUGIN */
4409
4410 memset(innodb_counter_value, 0, sizeof innodb_counter_value);
4411
4412 /* Do this as late as possible so server is fully starts up,
4413 since we might get some initial stats if user choose to turn
4414 on some counters from start up */
4415 if (innobase_enable_monitor_counter) {
4416 innodb_enable_monitor_at_startup(
4417 innobase_enable_monitor_counter);
4418 }
4419
4420 /* Turn on monitor counters that are default on */
4421 srv_mon_default_on();
4422
4423 /* Unit Tests */
4424 #ifdef UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
4425 unit_test_os_file_get_parent_dir();
4426 #endif /* UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR */
4427
4428 #ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
4429 test_make_filepath();
4430 #endif /*UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
4431
4432 #ifdef UNIV_ENABLE_DICT_STATS_TEST
4433 test_dict_stats_all();
4434 #endif /*UNIV_ENABLE_DICT_STATS_TEST */
4435
4436 #ifdef UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT
4437 # ifdef HAVE_UT_CHRONO_T
4438 test_row_raw_format_int();
4439 # endif /* HAVE_UT_CHRONO_T */
4440 #endif /* UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT */
4441
4442 DBUG_RETURN(0);
4443 }
4444
4445 /** Shut down the InnoDB storage engine.
4446 @return 0 */
4447 static
4448 int
innobase_end(handlerton *,ha_panic_function)4449 innobase_end(handlerton*, ha_panic_function)
4450 {
4451 DBUG_ENTER("innobase_end");
4452
4453 if (srv_was_started) {
4454 THD *thd= current_thd;
4455 if (thd) { // may be UNINSTALL PLUGIN statement
4456 if (trx_t* trx = thd_to_trx(thd)) {
4457 trx->free();
4458 }
4459 }
4460
4461 if (auto r = srv_running.load(std::memory_order_relaxed)) {
4462 ut_ad(!srv_read_only_mode);
4463 if (!abort_loop) {
4464 // may be UNINSTALL PLUGIN statement
4465 mysql_mutex_lock(r->current_mutex);
4466 r->abort = 1;
4467 mysql_cond_broadcast(r->current_cond);
4468 mysql_mutex_unlock(r->current_mutex);
4469 }
4470 pthread_join(thd_destructor_thread, NULL);
4471 }
4472
4473 innodb_shutdown();
4474
4475 mysql_mutex_destroy(&commit_cond_m);
4476 mysql_cond_destroy(&commit_cond);
4477 mysql_mutex_destroy(&pending_checkpoint_mutex);
4478 }
4479
4480 DBUG_RETURN(0);
4481 }
4482
4483 /*****************************************************************//**
4484 Commits a transaction in an InnoDB database. */
4485 void
innobase_commit_low(trx_t * trx)4486 innobase_commit_low(
4487 /*================*/
4488 trx_t* trx) /*!< in: transaction handle */
4489 {
4490 #ifdef WITH_WSREP
4491 const char* tmp = 0;
4492 const bool is_wsrep = trx->is_wsrep();
4493 THD* thd = trx->mysql_thd;
4494 if (is_wsrep) {
4495 tmp = thd_proc_info(thd, "innobase_commit_low()");
4496 }
4497 #endif /* WITH_WSREP */
4498 if (trx_is_started(trx)) {
4499 trx_commit_for_mysql(trx);
4500 } else {
4501 trx->will_lock = false;
4502 #ifdef WITH_WSREP
4503 trx->wsrep = false;
4504 #endif /* WITH_WSREP */
4505 }
4506
4507 #ifdef WITH_WSREP
4508 if (is_wsrep) {
4509 thd_proc_info(thd, tmp);
4510 }
4511 #endif /* WITH_WSREP */
4512 }
4513
4514 /*****************************************************************//**
4515 Creates an InnoDB transaction struct for the thd if it does not yet have one.
4516 Starts a new InnoDB transaction if a transaction is not yet started. And
4517 assigns a new snapshot for a consistent read if the transaction does not yet
4518 have one.
4519 @return 0 */
4520 static
4521 int
innobase_start_trx_and_assign_read_view(handlerton * hton,THD * thd)4522 innobase_start_trx_and_assign_read_view(
4523 /*====================================*/
4524 handlerton* hton, /*!< in: InnoDB handlerton */
4525 THD* thd) /*!< in: MySQL thread handle of the user for
4526 whom the transaction should be committed */
4527 {
4528 DBUG_ENTER("innobase_start_trx_and_assign_read_view");
4529 DBUG_ASSERT(hton == innodb_hton_ptr);
4530
4531 /* Create a new trx struct for thd, if it does not yet have one */
4532
4533 trx_t* trx = check_trx_exists(thd);
4534
4535 innobase_srv_conc_force_exit_innodb(trx);
4536
4537 /* The transaction should not be active yet, start it */
4538
4539 ut_ad(!trx_is_started(trx));
4540
4541 trx_start_if_not_started_xa(trx, false);
4542
4543 /* Assign a read view if the transaction does not have it yet.
4544 Do this only if transaction is using REPEATABLE READ isolation
4545 level. */
4546 trx->isolation_level = innobase_map_isolation_level(
4547 thd_get_trx_isolation(thd));
4548
4549 if (trx->isolation_level == TRX_ISO_REPEATABLE_READ) {
4550 trx->read_view.open(trx);
4551 } else {
4552 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4553 HA_ERR_UNSUPPORTED,
4554 "InnoDB: WITH CONSISTENT SNAPSHOT"
4555 " was ignored because this phrase"
4556 " can only be used with"
4557 " REPEATABLE READ isolation level.");
4558 }
4559
4560 /* Set the MySQL flag to mark that there is an active transaction */
4561
4562 innobase_register_trx(hton, current_thd, trx);
4563
4564 DBUG_RETURN(0);
4565 }
4566
4567 static
4568 void
innobase_commit_ordered_2(trx_t * trx,THD * thd)4569 innobase_commit_ordered_2(
4570 /*======================*/
4571 trx_t* trx, /*!< in: Innodb transaction */
4572 THD* thd) /*!< in: MySQL thread handle */
4573 {
4574 DBUG_ENTER("innobase_commit_ordered_2");
4575
4576 bool read_only = trx->read_only || trx->id == 0;
4577
4578 if (!read_only) {
4579
4580 while (innobase_commit_concurrency > 0) {
4581
4582 mysql_mutex_lock(&commit_cond_m);
4583
4584 ++commit_threads;
4585
4586 if (commit_threads
4587 <= innobase_commit_concurrency) {
4588
4589 mysql_mutex_unlock(&commit_cond_m);
4590 break;
4591 }
4592
4593 --commit_threads;
4594
4595 mysql_cond_wait(&commit_cond, &commit_cond_m);
4596
4597 mysql_mutex_unlock(&commit_cond_m);
4598 }
4599
4600 /* The following call reads the binary log position of
4601 the transaction being committed.
4602
4603 Binary logging of other engines is not relevant to
4604 InnoDB as all InnoDB requires is that committing
4605 InnoDB transactions appear in the same order in the
4606 MySQL binary log as they appear in InnoDB logs, which
4607 is guaranteed by the server.
4608
4609 If the binary log is not enabled, or the transaction
4610 is not written to the binary log, the file name will
4611 be a NULL pointer. */
4612 thd_binlog_pos(thd, &trx->mysql_log_file_name,
4613 &trx->mysql_log_offset);
4614
4615 /* Don't do write + flush right now. For group commit
4616 to work we want to do the flush later. */
4617 trx->flush_log_later = true;
4618 }
4619
4620 #ifdef WITH_WSREP
4621 /* If the transaction is not run in 2pc, we must assign wsrep
4622 XID here in order to get it written in rollback segment. */
4623 if (trx->is_wsrep()) {
4624 thd_get_xid(thd, (MYSQL_XID*)trx->xid);
4625 }
4626 #endif /* WITH_WSREP */
4627
4628 innobase_commit_low(trx);
4629
4630 if (!read_only) {
4631 trx->flush_log_later = false;
4632
4633 if (innobase_commit_concurrency > 0) {
4634
4635 mysql_mutex_lock(&commit_cond_m);
4636
4637 ut_ad(commit_threads > 0);
4638 --commit_threads;
4639
4640 mysql_cond_signal(&commit_cond);
4641
4642 mysql_mutex_unlock(&commit_cond_m);
4643 }
4644 }
4645
4646 DBUG_VOID_RETURN;
4647 }
4648
4649 /*****************************************************************//**
4650 Perform the first, fast part of InnoDB commit.
4651
4652 Doing it in this call ensures that we get the same commit order here
4653 as in binlog and any other participating transactional storage engines.
4654
4655 Note that we want to do as little as really needed here, as we run
4656 under a global mutex. The expensive fsync() is done later, in
4657 innobase_commit(), without a lock so group commit can take place.
4658
4659 Note also that this method can be called from a different thread than
4660 the one handling the rest of the transaction. */
4661 static
4662 void
innobase_commit_ordered(handlerton * hton,THD * thd,bool all)4663 innobase_commit_ordered(
4664 /*====================*/
4665 handlerton *hton, /*!< in: Innodb handlerton */
4666 THD* thd, /*!< in: MySQL thread handle of the user for whom
4667 the transaction should be committed */
4668 bool all) /*!< in: TRUE - commit transaction
4669 FALSE - the current SQL statement ended */
4670 {
4671 trx_t* trx;
4672 DBUG_ENTER("innobase_commit_ordered");
4673 DBUG_ASSERT(hton == innodb_hton_ptr);
4674
4675 trx = check_trx_exists(thd);
4676
4677 if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
4678 /* We cannot throw error here; instead we will catch this error
4679 again in innobase_commit() and report it from there. */
4680 DBUG_VOID_RETURN;
4681 }
4682
4683 /* commit_ordered is only called when committing the whole transaction
4684 (or an SQL statement when autocommit is on). */
4685 DBUG_ASSERT(all ||
4686 (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
4687
4688 innobase_commit_ordered_2(trx, thd);
4689 trx->active_commit_ordered = true;
4690
4691 DBUG_VOID_RETURN;
4692 }
4693
4694 /*****************************************************************//**
4695 Commits a transaction in an InnoDB database or marks an SQL statement
4696 ended.
4697 @return 0 or deadlock error if the transaction was aborted by another
4698 higher priority transaction. */
4699 static
4700 int
innobase_commit(handlerton * hton,THD * thd,bool commit_trx)4701 innobase_commit(
4702 /*============*/
4703 handlerton* hton, /*!< in: InnoDB handlerton */
4704 THD* thd, /*!< in: MySQL thread handle of the
4705 user for whom the transaction should
4706 be committed */
4707 bool commit_trx) /*!< in: true - commit transaction
4708 false - the current SQL statement
4709 ended */
4710 {
4711 DBUG_ENTER("innobase_commit");
4712 DBUG_PRINT("enter", ("commit_trx: %d", commit_trx));
4713 DBUG_ASSERT(hton == innodb_hton_ptr);
4714 DBUG_PRINT("trans", ("ending transaction"));
4715
4716 trx_t* trx = check_trx_exists(thd);
4717
4718 ut_ad(trx->dict_operation_lock_mode == 0);
4719 ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
4720
4721 /* Transaction is deregistered only in a commit or a rollback. If
4722 it is deregistered we know there cannot be resources to be freed
4723 and we could return immediately. For the time being, we play safe
4724 and do the cleanup though there should be nothing to clean up. */
4725
4726 if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
4727
4728 sql_print_error("Transaction not registered for MariaDB 2PC,"
4729 " but transaction is active");
4730 }
4731
4732 bool read_only = trx->read_only || trx->id == 0;
4733 DBUG_PRINT("info", ("readonly: %d", read_only));
4734
4735 if (commit_trx
4736 || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
4737
4738 DBUG_EXECUTE_IF("crash_innodb_before_commit",
4739 DBUG_SUICIDE(););
4740
4741 /* Run the fast part of commit if we did not already. */
4742 if (!trx->active_commit_ordered) {
4743 innobase_commit_ordered_2(trx, thd);
4744
4745 }
4746
4747 /* We were instructed to commit the whole transaction, or
4748 this is an SQL statement end and autocommit is on */
4749
4750 /* At this point commit order is fixed and transaction is
4751 visible to others. So we can wakeup other commits waiting for
4752 this one, to allow then to group commit with us. */
4753 thd_wakeup_subsequent_commits(thd, 0);
4754
4755 /* Now do a write + flush of logs. */
4756 trx_commit_complete_for_mysql(trx);
4757
4758 trx_deregister_from_2pc(trx);
4759 } else {
4760 /* We just mark the SQL statement ended and do not do a
4761 transaction commit */
4762
4763 /* If we had reserved the auto-inc lock for some
4764 table in this SQL statement we release it now */
4765
4766 if (!read_only) {
4767 lock_unlock_table_autoinc(trx);
4768 }
4769
4770 /* Store the current undo_no of the transaction so that we
4771 know where to roll back if we have to roll back the next
4772 SQL statement */
4773
4774 trx_mark_sql_stat_end(trx);
4775 }
4776
4777 /* Reset the number AUTO-INC rows required */
4778 trx->n_autoinc_rows = 0;
4779
4780 /* This is a statement level variable. */
4781 trx->fts_next_doc_id = 0;
4782
4783 innobase_srv_conc_force_exit_innodb(trx);
4784
4785 DBUG_RETURN(0);
4786 }
4787
4788 /*****************************************************************//**
4789 Rolls back a transaction or the latest SQL statement.
4790 @return 0 or error number */
4791 static
4792 int
innobase_rollback(handlerton * hton,THD * thd,bool rollback_trx)4793 innobase_rollback(
4794 /*==============*/
4795 handlerton* hton, /*!< in: InnoDB handlerton */
4796 THD* thd, /*!< in: handle to the MySQL thread
4797 of the user whose transaction should
4798 be rolled back */
4799 bool rollback_trx) /*!< in: TRUE - rollback entire
4800 transaction FALSE - rollback the current
4801 statement only */
4802 {
4803 DBUG_ENTER("innobase_rollback");
4804 DBUG_ASSERT(hton == innodb_hton_ptr);
4805 DBUG_PRINT("trans", ("aborting transaction"));
4806
4807 trx_t* trx = check_trx_exists(thd);
4808
4809 ut_ad(trx->dict_operation_lock_mode == 0);
4810 ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
4811
4812 innobase_srv_conc_force_exit_innodb(trx);
4813
4814 /* Reset the number AUTO-INC rows required */
4815
4816 trx->n_autoinc_rows = 0;
4817
4818 /* If we had reserved the auto-inc lock for some table (if
4819 we come here to roll back the latest SQL statement) we
4820 release it now before a possibly lengthy rollback */
4821 lock_unlock_table_autoinc(trx);
4822
4823 /* This is a statement level variable. */
4824
4825 trx->fts_next_doc_id = 0;
4826
4827 dberr_t error;
4828
4829 #ifdef WITH_WSREP
4830 /* If trx was assigned wsrep XID in prepare phase and the
4831 trx is being rolled back due to BF abort, clear XID in order
4832 to avoid writing it to rollback segment out of order. The XID
4833 will be reassigned when the transaction is replayed. */
4834 if (trx->state != TRX_STATE_NOT_STARTED && wsrep_is_wsrep_xid(trx->xid)) {
4835 trx->xid->null();
4836 }
4837 #endif /* WITH_WSREP */
4838 if (rollback_trx
4839 || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
4840
4841 error = trx_rollback_for_mysql(trx);
4842
4843 trx_deregister_from_2pc(trx);
4844 } else {
4845
4846 error = trx_rollback_last_sql_stat_for_mysql(trx);
4847 }
4848
4849 DBUG_RETURN(convert_error_code_to_mysql(error, 0, trx->mysql_thd));
4850 }
4851
4852 /*****************************************************************//**
4853 Rolls back a transaction
4854 @return 0 or error number */
4855 static
4856 int
innobase_rollback_trx(trx_t * trx)4857 innobase_rollback_trx(
4858 /*==================*/
4859 trx_t* trx) /*!< in: transaction */
4860 {
4861 DBUG_ENTER("innobase_rollback_trx");
4862 DBUG_PRINT("trans", ("aborting transaction"));
4863
4864 innobase_srv_conc_force_exit_innodb(trx);
4865
4866 /* If we had reserved the auto-inc lock for some table (if
4867 we come here to roll back the latest SQL statement) we
4868 release it now before a possibly lengthy rollback */
4869 lock_unlock_table_autoinc(trx);
4870
4871 if (!trx->has_logged()) {
4872 trx->will_lock = false;
4873 #ifdef WITH_WSREP
4874 trx->wsrep= false;
4875 trx->lock.was_chosen_as_wsrep_victim= false;
4876 #endif
4877 DBUG_RETURN(0);
4878 }
4879
4880 DBUG_RETURN(convert_error_code_to_mysql(trx_rollback_for_mysql(trx),
4881 0, trx->mysql_thd));
4882 }
4883
4884
4885 struct pending_checkpoint {
4886 struct pending_checkpoint *next;
4887 handlerton *hton;
4888 void *cookie;
4889 ib_uint64_t lsn;
4890 };
4891 static struct pending_checkpoint *pending_checkpoint_list;
4892 static struct pending_checkpoint *pending_checkpoint_list_end;
4893
4894 /*****************************************************************//**
4895 Handle a commit checkpoint request from server layer.
4896 We put the request in a queue, so that we can notify upper layer about
4897 checkpoint complete when we have flushed the redo log.
4898 If we have already flushed all relevant redo log, we notify immediately.*/
4899 static
4900 void
innobase_checkpoint_request(handlerton * hton,void * cookie)4901 innobase_checkpoint_request(
4902 handlerton *hton,
4903 void *cookie)
4904 {
4905 ib_uint64_t lsn;
4906 ib_uint64_t flush_lsn;
4907 struct pending_checkpoint * entry;
4908
4909 /* Do the allocation outside of lock to reduce contention. The normal
4910 case is that not everything is flushed, so we will need to enqueue. */
4911 entry = static_cast<struct pending_checkpoint *>
4912 (my_malloc(sizeof(*entry), MYF(MY_WME)));
4913 if (!entry) {
4914 sql_print_error("Failed to allocate %u bytes."
4915 " Commit checkpoint will be skipped.",
4916 static_cast<unsigned>(sizeof(*entry)));
4917 return;
4918 }
4919
4920 entry->next = NULL;
4921 entry->hton = hton;
4922 entry->cookie = cookie;
4923
4924 mysql_mutex_lock(&pending_checkpoint_mutex);
4925 lsn = log_get_lsn();
4926 flush_lsn = log_get_flush_lsn();
4927 if (lsn > flush_lsn) {
4928 /* Put the request in queue.
4929 When the log gets flushed past the lsn, we will remove the
4930 entry from the queue and notify the upper layer. */
4931 entry->lsn = lsn;
4932 if (pending_checkpoint_list_end) {
4933 pending_checkpoint_list_end->next = entry;
4934 /* There is no need to order the entries in the list
4935 by lsn. The upper layer can accept notifications in
4936 any order, and short delays in notifications do not
4937 significantly impact performance. */
4938 } else {
4939 pending_checkpoint_list = entry;
4940 }
4941 pending_checkpoint_list_end = entry;
4942 entry = NULL;
4943 }
4944 mysql_mutex_unlock(&pending_checkpoint_mutex);
4945
4946 if (entry) {
4947 /* We are already flushed. Notify the checkpoint immediately. */
4948 commit_checkpoint_notify_ha(entry->hton, entry->cookie);
4949 my_free(entry);
4950 }
4951 }
4952
4953 /*****************************************************************//**
4954 Log code calls this whenever log has been written and/or flushed up
4955 to a new position. We use this to notify upper layer of a new commit
4956 checkpoint when necessary.*/
4957 UNIV_INTERN
4958 void
innobase_mysql_log_notify(ib_uint64_t flush_lsn)4959 innobase_mysql_log_notify(
4960 /*======================*/
4961 ib_uint64_t flush_lsn) /*!< in: LSN flushed to disk */
4962 {
4963 struct pending_checkpoint * pending;
4964 struct pending_checkpoint * entry;
4965 struct pending_checkpoint * last_ready;
4966
4967 /* It is safe to do a quick check for NULL first without lock.
4968 Even if we should race, we will at most skip one checkpoint and
4969 take the next one, which is harmless. */
4970 if (!pending_checkpoint_list)
4971 return;
4972
4973 mysql_mutex_lock(&pending_checkpoint_mutex);
4974 pending = pending_checkpoint_list;
4975 if (!pending)
4976 {
4977 mysql_mutex_unlock(&pending_checkpoint_mutex);
4978 return;
4979 }
4980
4981 last_ready = NULL;
4982 for (entry = pending; entry != NULL; entry = entry -> next)
4983 {
4984 /* Notify checkpoints up until the first entry that has not
4985 been fully flushed to the redo log. Since we do not maintain
4986 the list ordered, in principle there could be more entries
4987 later than were also flushed. But there is no harm in
4988 delaying notifications for those a bit. And in practise, the
4989 list is unlikely to have more than one element anyway, as we
4990 flush the redo log at least once every second. */
4991 if (entry->lsn > flush_lsn)
4992 break;
4993 last_ready = entry;
4994 }
4995
4996 if (last_ready)
4997 {
4998 /* We found some pending checkpoints that are now flushed to
4999 disk. So remove them from the list. */
5000 pending_checkpoint_list = entry;
5001 if (!entry)
5002 pending_checkpoint_list_end = NULL;
5003 }
5004
5005 mysql_mutex_unlock(&pending_checkpoint_mutex);
5006
5007 if (!last_ready)
5008 return;
5009
5010 /* Now that we have released the lock, notify upper layer about all
5011 commit checkpoints that have now completed. */
5012 for (;;) {
5013 entry = pending;
5014 pending = pending->next;
5015
5016 commit_checkpoint_notify_ha(entry->hton, entry->cookie);
5017
5018 my_free(entry);
5019 if (entry == last_ready)
5020 break;
5021 }
5022 }
5023
5024 /*****************************************************************//**
5025 Rolls back a transaction to a savepoint.
5026 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
5027 given name */
5028 static
5029 int
innobase_rollback_to_savepoint(handlerton * hton,THD * thd,void * savepoint)5030 innobase_rollback_to_savepoint(
5031 /*===========================*/
5032 handlerton* hton, /*!< in: InnoDB handlerton */
5033 THD* thd, /*!< in: handle to the MySQL thread
5034 of the user whose transaction should
5035 be rolled back to savepoint */
5036 void* savepoint) /*!< in: savepoint data */
5037 {
5038
5039 DBUG_ENTER("innobase_rollback_to_savepoint");
5040 DBUG_ASSERT(hton == innodb_hton_ptr);
5041
5042 trx_t* trx = check_trx_exists(thd);
5043
5044 innobase_srv_conc_force_exit_innodb(trx);
5045
5046 /* TODO: use provided savepoint data area to store savepoint data */
5047
5048 char name[64];
5049
5050 longlong2str(longlong(savepoint), name, 36);
5051
5052 int64_t mysql_binlog_cache_pos;
5053
5054 dberr_t error = trx_rollback_to_savepoint_for_mysql(
5055 trx, name, &mysql_binlog_cache_pos);
5056
5057 if (error == DB_SUCCESS && trx->fts_trx != NULL) {
5058 fts_savepoint_rollback(trx, name);
5059 }
5060
5061 DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
5062 }
5063
5064 /*****************************************************************//**
5065 Check whether innodb state allows to safely release MDL locks after
5066 rollback to savepoint.
5067 When binlog is on, MDL locks acquired after savepoint unit are not
5068 released if there are any locks held in InnoDB.
5069 @return true if it is safe, false if its not safe. */
5070 static
5071 bool
innobase_rollback_to_savepoint_can_release_mdl(handlerton * hton,THD * thd)5072 innobase_rollback_to_savepoint_can_release_mdl(
5073 /*===========================================*/
5074 handlerton* hton, /*!< in: InnoDB handlerton */
5075 THD* thd) /*!< in: handle to the MySQL thread
5076 of the user whose transaction should
5077 be rolled back to savepoint */
5078 {
5079 DBUG_ENTER("innobase_rollback_to_savepoint_can_release_mdl");
5080 DBUG_ASSERT(hton == innodb_hton_ptr);
5081
5082 trx_t* trx = check_trx_exists(thd);
5083
5084 /* If transaction has not acquired any locks then it is safe
5085 to release MDL after rollback to savepoint */
5086 if (UT_LIST_GET_LEN(trx->lock.trx_locks) == 0) {
5087
5088 DBUG_RETURN(true);
5089 }
5090
5091 DBUG_RETURN(false);
5092 }
5093
5094 /*****************************************************************//**
5095 Release transaction savepoint name.
5096 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
5097 given name */
5098 static
5099 int
innobase_release_savepoint(handlerton * hton,THD * thd,void * savepoint)5100 innobase_release_savepoint(
5101 /*=======================*/
5102 handlerton* hton, /*!< in: handlerton for InnoDB */
5103 THD* thd, /*!< in: handle to the MySQL thread
5104 of the user whose transaction's
5105 savepoint should be released */
5106 void* savepoint) /*!< in: savepoint data */
5107 {
5108 dberr_t error;
5109 trx_t* trx;
5110 char name[64];
5111
5112 DBUG_ENTER("innobase_release_savepoint");
5113 DBUG_ASSERT(hton == innodb_hton_ptr);
5114
5115 trx = check_trx_exists(thd);
5116
5117 /* TODO: use provided savepoint data area to store savepoint data */
5118
5119 longlong2str(longlong(savepoint), name, 36);
5120
5121 error = trx_release_savepoint_for_mysql(trx, name);
5122
5123 if (error == DB_SUCCESS && trx->fts_trx != NULL) {
5124 fts_savepoint_release(trx, name);
5125 }
5126
5127 DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
5128 }
5129
5130 /*****************************************************************//**
5131 Sets a transaction savepoint.
5132 @return always 0, that is, always succeeds */
5133 static
5134 int
innobase_savepoint(handlerton * hton,THD * thd,void * savepoint)5135 innobase_savepoint(
5136 /*===============*/
5137 handlerton* hton, /*!< in: handle to the InnoDB handlerton */
5138 THD* thd, /*!< in: handle to the MySQL thread */
5139 void* savepoint)/*!< in: savepoint data */
5140 {
5141 DBUG_ENTER("innobase_savepoint");
5142 DBUG_ASSERT(hton == innodb_hton_ptr);
5143
5144 /* In the autocommit mode there is no sense to set a savepoint
5145 (unless we are in sub-statement), so SQL layer ensures that
5146 this method is never called in such situation. */
5147
5148 trx_t* trx = check_trx_exists(thd);
5149
5150 innobase_srv_conc_force_exit_innodb(trx);
5151
5152 /* Cannot happen outside of transaction */
5153 DBUG_ASSERT(trx_is_registered_for_2pc(trx));
5154
5155 /* TODO: use provided savepoint data area to store savepoint data */
5156 char name[64];
5157
5158 longlong2str(longlong(savepoint), name, 36);
5159
5160 dberr_t error = trx_savepoint_for_mysql(trx, name, 0);
5161
5162 if (error == DB_SUCCESS && trx->fts_trx != NULL) {
5163 fts_savepoint_take(trx->fts_trx, name);
5164 }
5165
5166 DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
5167 }
5168
5169 /*****************************************************************//**
5170 Frees a possible InnoDB trx object associated with the current THD.
5171 @return 0 or error number */
5172 static
5173 int
innobase_close_connection(handlerton * hton,THD * thd)5174 innobase_close_connection(
5175 /*======================*/
5176 handlerton* hton, /*!< in: innobase handlerton */
5177 THD* thd) /*!< in: handle to the MySQL thread of the user
5178 whose resources should be free'd */
5179 {
5180
5181 DBUG_ENTER("innobase_close_connection");
5182 DBUG_ASSERT(hton == innodb_hton_ptr);
5183
5184 trx_t* trx = thd_to_trx(thd);
5185
5186 /* During server initialization MySQL layer will try to open
5187 some of the master-slave tables those residing in InnoDB.
5188 After MySQL layer is done with needed checks these tables
5189 are closed followed by invocation of close_connection on the
5190 associated thd.
5191
5192 close_connection rolls back the trx and then frees it.
5193 Once trx is freed thd should avoid maintaining reference to
5194 it else it can be classified as stale reference.
5195
5196 Re-invocation of innodb_close_connection on same thd should
5197 get trx as NULL. */
5198
5199 if (trx) {
5200
5201 thd_set_ha_data(thd, hton, NULL);
5202 if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
5203
5204 sql_print_error("Transaction not registered for MariaDB 2PC, "
5205 "but transaction is active");
5206 }
5207
5208 /* Disconnect causes rollback in the following cases:
5209 - trx is not started, or
5210 - trx is in *not* in PREPARED state, or
5211 - trx has not updated any persistent data.
5212 TODO/FIXME: it does not make sense to initiate rollback
5213 in the 1st and 3rd case. */
5214 if (trx_is_started(trx)) {
5215 if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
5216 if (trx->has_logged_persistent()) {
5217 trx_disconnect_prepared(trx);
5218 } else {
5219 trx_deregister_from_2pc(trx);
5220 goto rollback_and_free;
5221 }
5222 } else {
5223 sql_print_warning(
5224 "MariaDB is closing a connection that has an active "
5225 "InnoDB transaction. " TRX_ID_FMT " row modifications "
5226 "will roll back.",
5227 trx->undo_no);
5228 goto rollback_and_free;
5229 }
5230 } else {
5231 rollback_and_free:
5232 innobase_rollback_trx(trx);
5233 trx->free();
5234 }
5235 }
5236
5237 DBUG_RETURN(0);
5238 }
5239
5240 void lock_cancel_waiting_and_release(lock_t *lock);
5241
5242 /** Cancel any pending lock request associated with the current THD.
5243 @sa THD::awake() @sa ha_kill_query() */
innobase_kill_query(handlerton *,THD * thd,enum thd_kill_levels)5244 static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels)
5245 {
5246 DBUG_ENTER("innobase_kill_query");
5247
5248 if (trx_t* trx= thd_to_trx(thd))
5249 {
5250 ut_ad(trx->mysql_thd == thd);
5251 #ifdef WITH_WSREP
5252 if (wsrep_thd_is_aborting(thd) || trx->lock.was_chosen_as_wsrep_victim)
5253 /* if victim has been signaled by BF thread and/or aborting is already
5254 progressing, following query aborting is not necessary any more.
5255 Also, BF thread should own trx mutex for the victim. */
5256 DBUG_VOID_RETURN;
5257 #endif /* WITH_WSREP */
5258 lock_mutex_enter();
5259 if (lock_t *lock= trx->lock.wait_lock)
5260 {
5261 trx_mutex_enter(trx);
5262 if (trx->is_wsrep() && wsrep_thd_is_aborting(thd))
5263 trx->lock.was_chosen_as_deadlock_victim= TRUE;
5264 lock_cancel_waiting_and_release(lock);
5265 trx_mutex_exit(trx);
5266 }
5267 lock_mutex_exit();
5268 }
5269
5270 DBUG_VOID_RETURN;
5271 }
5272
5273
5274 /*************************************************************************//**
5275 ** InnoDB database tables
5276 *****************************************************************************/
5277
5278 /** Get the record format from the data dictionary.
5279 @return one of ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT,
5280 ROW_TYPE_COMPRESSED, ROW_TYPE_DYNAMIC */
5281
5282 enum row_type
get_row_type() const5283 ha_innobase::get_row_type() const
5284 {
5285 if (m_prebuilt && m_prebuilt->table) {
5286 const ulint flags = m_prebuilt->table->flags;
5287
5288 switch (dict_tf_get_rec_format(flags)) {
5289 case REC_FORMAT_REDUNDANT:
5290 return(ROW_TYPE_REDUNDANT);
5291 case REC_FORMAT_COMPACT:
5292 return(ROW_TYPE_COMPACT);
5293 case REC_FORMAT_COMPRESSED:
5294 return(ROW_TYPE_COMPRESSED);
5295 case REC_FORMAT_DYNAMIC:
5296 return(ROW_TYPE_DYNAMIC);
5297 }
5298 }
5299 ut_ad(0);
5300 return(ROW_TYPE_NOT_USED);
5301 }
5302
5303 /****************************************************************//**
5304 Get the table flags to use for the statement.
5305 @return table flags */
5306
5307 handler::Table_flags
table_flags() const5308 ha_innobase::table_flags() const
5309 /*============================*/
5310 {
5311 THD* thd = ha_thd();
5312 handler::Table_flags flags = m_int_table_flags;
5313
5314 /* Need to use tx_isolation here since table flags is (also)
5315 called before prebuilt is inited. */
5316
5317 if (thd_tx_isolation(thd) <= ISO_READ_COMMITTED) {
5318 return(flags);
5319 }
5320
5321 return(flags | HA_BINLOG_STMT_CAPABLE);
5322 }
5323
5324 /****************************************************************//**
5325 Returns the table type (storage engine name).
5326 @return table type */
5327
5328 const char*
table_type() const5329 ha_innobase::table_type() const
5330 /*===========================*/
5331 {
5332 return(innobase_hton_name);
5333 }
5334
5335 /****************************************************************//**
5336 Returns the index type.
5337 @return index type */
5338
5339 const char*
index_type(uint keynr)5340 ha_innobase::index_type(
5341 /*====================*/
5342 uint keynr) /*!< : index number */
5343 {
5344 dict_index_t* index = innobase_get_index(keynr);
5345
5346 if (!index) {
5347 return "Corrupted";
5348 }
5349
5350 if (index->type & DICT_FTS) {
5351 return("FULLTEXT");
5352 }
5353
5354 if (dict_index_is_spatial(index)) {
5355 return("SPATIAL");
5356 }
5357
5358 return("BTREE");
5359 }
5360
5361 /****************************************************************//**
5362 Returns the operations supported for indexes.
5363 @return flags of supported operations */
5364
5365 ulong
index_flags(uint key,uint,bool) const5366 ha_innobase::index_flags(
5367 /*=====================*/
5368 uint key,
5369 uint,
5370 bool) const
5371 {
5372 if (table_share->key_info[key].algorithm == HA_KEY_ALG_FULLTEXT) {
5373 return(0);
5374 }
5375
5376 /* For spatial index, we don't support descending scan
5377 and ICP so far. */
5378 if (table_share->key_info[key].flags & HA_SPATIAL) {
5379 return HA_READ_NEXT | HA_READ_ORDER| HA_READ_RANGE
5380 | HA_KEYREAD_ONLY | HA_KEY_SCAN_NOT_ROR;
5381 }
5382
5383 ulong flags= key == table_share->primary_key
5384 ? HA_CLUSTERED_INDEX : 0;
5385
5386 flags |= HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
5387 | HA_READ_RANGE | HA_KEYREAD_ONLY
5388 | HA_DO_INDEX_COND_PUSHDOWN
5389 | HA_DO_RANGE_FILTER_PUSHDOWN;
5390
5391 return(flags);
5392 }
5393
5394 /****************************************************************//**
5395 Returns the maximum number of keys.
5396 @return MAX_KEY */
5397
5398 uint
max_supported_keys() const5399 ha_innobase::max_supported_keys() const
5400 /*===================================*/
5401 {
5402 return(MAX_KEY);
5403 }
5404
5405 /****************************************************************//**
5406 Returns the maximum key length.
5407 @return maximum supported key length, in bytes */
5408
5409 uint
max_supported_key_length() const5410 ha_innobase::max_supported_key_length() const
5411 /*=========================================*/
5412 {
5413 /* An InnoDB page must store >= 2 keys; a secondary key record
5414 must also contain the primary key value. Therefore, if both
5415 the primary key and the secondary key are at this maximum length,
5416 it must be less than 1/4th of the free space on a page including
5417 record overhead.
5418
5419 MySQL imposes its own limit to this number; MAX_KEY_LENGTH = 3072.
5420
5421 For page sizes = 16k, InnoDB historically reported 3500 bytes here,
5422 But the MySQL limit of 3072 was always used through the handler
5423 interface.
5424
5425 Note: Handle 16k and 32k pages the same here since the limits
5426 are higher than imposed by MySQL. */
5427
5428 switch (srv_page_size) {
5429 case 4096:
5430 /* Hack: allow mysql.innodb_index_stats to be created. */
5431 /* FIXME: rewrite this API, and in sql_table.cc consider
5432 that in index-organized tables (such as InnoDB), secondary
5433 index records will be padded with the PRIMARY KEY, instead
5434 of some short ROWID or record heap address. */
5435 return(1173);
5436 case 8192:
5437 return(1536);
5438 default:
5439 return(3500);
5440 }
5441 }
5442
5443 /****************************************************************//**
5444 Returns the key map of keys that are usable for scanning.
5445 @return key_map_full */
5446
5447 const key_map*
keys_to_use_for_scanning()5448 ha_innobase::keys_to_use_for_scanning()
5449 /*===================================*/
5450 {
5451 return(&key_map_full);
5452 }
5453
5454 /****************************************************************//**
5455 Ensures that if there's a concurrent inplace ADD INDEX, being-indexed virtual
5456 columns are computed. They are not marked as indexed in the old table, so the
5457 server won't add them to the read_set automatically */
5458 void
column_bitmaps_signal()5459 ha_innobase::column_bitmaps_signal()
5460 /*================================*/
5461 {
5462 if (!table->vfield || table->current_lock != F_WRLCK) {
5463 return;
5464 }
5465
5466 dict_index_t* clust_index = dict_table_get_first_index(m_prebuilt->table);
5467 uint num_v = 0;
5468 for (uint j = 0; j < table->s->virtual_fields; j++) {
5469 if (table->vfield[j]->stored_in_db()) {
5470 continue;
5471 }
5472
5473 dict_col_t* col = &m_prebuilt->table->v_cols[num_v].m_col;
5474 if (col->ord_part ||
5475 (dict_index_is_online_ddl(clust_index) &&
5476 row_log_col_is_indexed(clust_index, num_v))) {
5477 table->mark_virtual_column_with_deps(table->vfield[j]);
5478 }
5479 num_v++;
5480 }
5481 }
5482
5483
5484 /****************************************************************//**
5485 Determines if table caching is supported.
5486 @return HA_CACHE_TBL_ASKTRANSACT */
5487
5488 uint8
table_cache_type()5489 ha_innobase::table_cache_type()
5490 /*===========================*/
5491 {
5492 return(HA_CACHE_TBL_ASKTRANSACT);
5493 }
5494
5495 /****************************************************************//**
5496 Determines if the primary key is clustered index.
5497 @return true */
5498
5499 bool
primary_key_is_clustered()5500 ha_innobase::primary_key_is_clustered()
5501 /*===================================*/
5502 {
5503 return(true);
5504 }
5505
5506 /** Normalizes a table name string.
5507 A normalized name consists of the database name catenated to '/'
5508 and table name. For example: test/mytable.
5509 On Windows, normalization puts both the database name and the
5510 table name always to lower case if "set_lower_case" is set to TRUE.
5511 @param[out] norm_name Normalized name, null-terminated.
5512 @param[in] name Name to normalize.
5513 @param[in] set_lower_case True if we also should fold to lower case. */
5514 void
normalize_table_name_c_low(char * norm_name,const char * name,ibool set_lower_case)5515 normalize_table_name_c_low(
5516 /*=======================*/
5517 char* norm_name, /* out: normalized name as a
5518 null-terminated string */
5519 const char* name, /* in: table name string */
5520 ibool set_lower_case) /* in: TRUE if we want to set
5521 name to lower case */
5522 {
5523 char* name_ptr;
5524 ulint name_len;
5525 char* db_ptr;
5526 ulint db_len;
5527 char* ptr;
5528 ulint norm_len;
5529
5530 /* Scan name from the end */
5531
5532 ptr = strend(name) - 1;
5533
5534 /* seek to the last path separator */
5535 while (ptr >= name && *ptr != '\\' && *ptr != '/') {
5536 ptr--;
5537 }
5538
5539 name_ptr = ptr + 1;
5540 name_len = strlen(name_ptr);
5541
5542 /* skip any number of path separators */
5543 while (ptr >= name && (*ptr == '\\' || *ptr == '/')) {
5544 ptr--;
5545 }
5546
5547 DBUG_ASSERT(ptr >= name);
5548
5549 /* seek to the last but one path separator or one char before
5550 the beginning of name */
5551 db_len = 0;
5552 while (ptr >= name && *ptr != '\\' && *ptr != '/') {
5553 ptr--;
5554 db_len++;
5555 }
5556
5557 db_ptr = ptr + 1;
5558
5559 norm_len = db_len + name_len + sizeof "/";
5560 ut_a(norm_len < FN_REFLEN - 1);
5561
5562 memcpy(norm_name, db_ptr, db_len);
5563
5564 norm_name[db_len] = '/';
5565
5566 /* Copy the name and null-byte. */
5567 memcpy(norm_name + db_len + 1, name_ptr, name_len + 1);
5568
5569 if (set_lower_case) {
5570 innobase_casedn_str(norm_name);
5571 }
5572 }
5573
create_table_info_t(THD * thd,const TABLE * form,HA_CREATE_INFO * create_info,char * table_name,char * remote_path,bool file_per_table,trx_t * trx)5574 create_table_info_t::create_table_info_t(
5575 THD* thd,
5576 const TABLE* form,
5577 HA_CREATE_INFO* create_info,
5578 char* table_name,
5579 char* remote_path,
5580 bool file_per_table,
5581 trx_t* trx)
5582 : m_thd(thd),
5583 m_trx(trx),
5584 m_form(form),
5585 m_default_row_format(innodb_default_row_format),
5586 m_create_info(create_info),
5587 m_table_name(table_name), m_table(NULL),
5588 m_drop_before_rollback(false),
5589 m_remote_path(remote_path),
5590 m_innodb_file_per_table(file_per_table)
5591 {
5592 }
5593
5594 /** Normalizes a table name string.
5595 A normalized name consists of the database name catenated to '/'
5596 and table name. For example: test/mytable.
5597 On Windows, normalization puts both the database name and the
5598 table name always to lower case if "set_lower_case" is set to TRUE.
5599 @param[out] norm_name Normalized name, null-terminated.
5600 @param[in] name Name to normalize.
5601 @param[in] set_lower_case True if we also should fold to lower case. */
5602 void
normalize_table_name_low(char * norm_name,const char * name,ibool set_lower_case)5603 create_table_info_t::normalize_table_name_low(
5604 char* norm_name,
5605 const char* name,
5606 ibool set_lower_case)
5607 {
5608 normalize_table_name_c_low(norm_name, name, set_lower_case);
5609 }
5610
5611 #if !defined(DBUG_OFF)
5612 /*********************************************************************
5613 Test normalize_table_name_low(). */
5614 static
5615 void
test_normalize_table_name_low()5616 test_normalize_table_name_low()
5617 /*===========================*/
5618 {
5619 char norm_name[FN_REFLEN];
5620 const char* test_data[][2] = {
5621 /* input, expected result */
5622 {"./mysqltest/t1", "mysqltest/t1"},
5623 {"./test/#sql-842b_2", "test/#sql-842b_2"},
5624 {"./test/#sql-85a3_10", "test/#sql-85a3_10"},
5625 {"./test/#sql2-842b-2", "test/#sql2-842b-2"},
5626 {"./test/bug29807", "test/bug29807"},
5627 {"./test/foo", "test/foo"},
5628 {"./test/innodb_bug52663", "test/innodb_bug52663"},
5629 {"./test/t", "test/t"},
5630 {"./test/t1", "test/t1"},
5631 {"./test/t10", "test/t10"},
5632 {"/a/b/db/table", "db/table"},
5633 {"/a/b/db///////table", "db/table"},
5634 {"/a/b////db///////table", "db/table"},
5635 {"/var/tmp/mysqld.1/#sql842b_2_10", "mysqld.1/#sql842b_2_10"},
5636 {"db/table", "db/table"},
5637 {"ddd/t", "ddd/t"},
5638 {"d/ttt", "d/ttt"},
5639 {"d/t", "d/t"},
5640 {".\\mysqltest\\t1", "mysqltest/t1"},
5641 {".\\test\\#sql-842b_2", "test/#sql-842b_2"},
5642 {".\\test\\#sql-85a3_10", "test/#sql-85a3_10"},
5643 {".\\test\\#sql2-842b-2", "test/#sql2-842b-2"},
5644 {".\\test\\bug29807", "test/bug29807"},
5645 {".\\test\\foo", "test/foo"},
5646 {".\\test\\innodb_bug52663", "test/innodb_bug52663"},
5647 {".\\test\\t", "test/t"},
5648 {".\\test\\t1", "test/t1"},
5649 {".\\test\\t10", "test/t10"},
5650 {"C:\\a\\b\\db\\table", "db/table"},
5651 {"C:\\a\\b\\db\\\\\\\\\\\\\\table", "db/table"},
5652 {"C:\\a\\b\\\\\\\\db\\\\\\\\\\\\\\table", "db/table"},
5653 {"C:\\var\\tmp\\mysqld.1\\#sql842b_2_10", "mysqld.1/#sql842b_2_10"},
5654 {"db\\table", "db/table"},
5655 {"ddd\\t", "ddd/t"},
5656 {"d\\ttt", "d/ttt"},
5657 {"d\\t", "d/t"},
5658 };
5659
5660 for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
5661 printf("test_normalize_table_name_low():"
5662 " testing \"%s\", expected \"%s\"... ",
5663 test_data[i][0], test_data[i][1]);
5664
5665 create_table_info_t::normalize_table_name_low(
5666 norm_name, test_data[i][0], FALSE);
5667
5668 if (strcmp(norm_name, test_data[i][1]) == 0) {
5669 printf("ok\n");
5670 } else {
5671 printf("got \"%s\"\n", norm_name);
5672 ut_error;
5673 }
5674 }
5675 }
5676
5677 /*********************************************************************
5678 Test ut_format_name(). */
5679 static
5680 void
test_ut_format_name()5681 test_ut_format_name()
5682 /*=================*/
5683 {
5684 char buf[NAME_LEN * 3];
5685
5686 struct {
5687 const char* name;
5688 ulint buf_size;
5689 const char* expected;
5690 } test_data[] = {
5691 {"test/t1", sizeof(buf), "`test`.`t1`"},
5692 {"test/t1", 12, "`test`.`t1`"},
5693 {"test/t1", 11, "`test`.`t1"},
5694 {"test/t1", 10, "`test`.`t"},
5695 {"test/t1", 9, "`test`.`"},
5696 {"test/t1", 8, "`test`."},
5697 {"test/t1", 7, "`test`"},
5698 {"test/t1", 6, "`test"},
5699 {"test/t1", 5, "`tes"},
5700 {"test/t1", 4, "`te"},
5701 {"test/t1", 3, "`t"},
5702 {"test/t1", 2, "`"},
5703 {"test/t1", 1, ""},
5704 {"test/t1", 0, "BUF_NOT_CHANGED"},
5705 {"table", sizeof(buf), "`table`"},
5706 {"ta'le", sizeof(buf), "`ta'le`"},
5707 {"ta\"le", sizeof(buf), "`ta\"le`"},
5708 {"ta`le", sizeof(buf), "`ta``le`"},
5709 };
5710
5711 for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
5712
5713 memcpy(buf, "BUF_NOT_CHANGED", strlen("BUF_NOT_CHANGED") + 1);
5714
5715 char* ret;
5716
5717 ret = ut_format_name(test_data[i].name,
5718 buf,
5719 test_data[i].buf_size);
5720
5721 ut_a(ret == buf);
5722
5723 if (strcmp(buf, test_data[i].expected) == 0) {
5724 ib::info() << "ut_format_name(" << test_data[i].name
5725 << ", buf, " << test_data[i].buf_size << "),"
5726 " expected " << test_data[i].expected
5727 << ", OK";
5728 } else {
5729 ib::error() << "ut_format_name(" << test_data[i].name
5730 << ", buf, " << test_data[i].buf_size << "),"
5731 " expected " << test_data[i].expected
5732 << ", ERROR: got " << buf;
5733 ut_error;
5734 }
5735 }
5736 }
5737 #endif /* !DBUG_OFF */
5738
5739 /** Match index columns between MySQL and InnoDB.
5740 This function checks whether the index column information
5741 is consistent between KEY info from mysql and that from innodb index.
5742 @param[in] key_info Index info from mysql
5743 @param[in] index_info Index info from InnoDB
5744 @return true if all column types match. */
5745 static
5746 bool
innobase_match_index_columns(const KEY * key_info,const dict_index_t * index_info)5747 innobase_match_index_columns(
5748 const KEY* key_info,
5749 const dict_index_t* index_info)
5750 {
5751 const KEY_PART_INFO* key_part;
5752 const KEY_PART_INFO* key_end;
5753 const dict_field_t* innodb_idx_fld;
5754 const dict_field_t* innodb_idx_fld_end;
5755
5756 DBUG_ENTER("innobase_match_index_columns");
5757
5758 /* Check whether user defined index column count matches */
5759 if (key_info->user_defined_key_parts !=
5760 index_info->n_user_defined_cols) {
5761 DBUG_RETURN(FALSE);
5762 }
5763
5764 key_part = key_info->key_part;
5765 key_end = key_part + key_info->user_defined_key_parts;
5766 innodb_idx_fld = index_info->fields;
5767 innodb_idx_fld_end = index_info->fields + index_info->n_fields;
5768
5769 /* Check each index column's datatype. We do not check
5770 column name because there exists case that index
5771 column name got modified in mysql but such change does not
5772 propagate to InnoDB.
5773 One hidden assumption here is that the index column sequences
5774 are matched up between those in mysql and InnoDB. */
5775 for (; key_part != key_end; ++key_part) {
5776 ulint col_type;
5777 ibool is_unsigned;
5778 ulint mtype = innodb_idx_fld->col->mtype;
5779
5780 /* Need to translate to InnoDB column type before
5781 comparison. */
5782 col_type = get_innobase_type_from_mysql_type(
5783 &is_unsigned, key_part->field);
5784
5785 /* Ignore InnoDB specific system columns. */
5786 while (mtype == DATA_SYS) {
5787 innodb_idx_fld++;
5788
5789 if (innodb_idx_fld >= innodb_idx_fld_end) {
5790 DBUG_RETURN(FALSE);
5791 }
5792 }
5793
5794 /* MariaDB-5.5 compatibility */
5795 if ((key_part->field->real_type() == MYSQL_TYPE_ENUM ||
5796 key_part->field->real_type() == MYSQL_TYPE_SET) &&
5797 mtype == DATA_FIXBINARY) {
5798 col_type= DATA_FIXBINARY;
5799 }
5800
5801 if (col_type != mtype) {
5802 /* If the col_type we get from mysql type is a geometry
5803 data type, we should check if mtype is a legacy type
5804 from 5.6, either upgraded to DATA_GEOMETRY or not.
5805 This is indeed not an accurate check, but should be
5806 safe, since DATA_BLOB would be upgraded once we create
5807 spatial index on it and we intend to use DATA_GEOMETRY
5808 for legacy GIS data types which are of var-length. */
5809 switch (col_type) {
5810 case DATA_GEOMETRY:
5811 if (mtype == DATA_BLOB) {
5812 break;
5813 }
5814 /* Fall through */
5815 default:
5816 /* Column type mismatches */
5817 DBUG_RETURN(false);
5818 }
5819 }
5820
5821 innodb_idx_fld++;
5822 }
5823
5824 DBUG_RETURN(TRUE);
5825 }
5826
5827 /** Build a template for a base column for a virtual column
5828 @param[in] table MySQL TABLE
5829 @param[in] clust_index InnoDB clustered index
5830 @param[in] field field in MySQL table
5831 @param[in] col InnoDB column
5832 @param[in,out] templ template to fill
5833 @param[in] col_no field index for virtual col
5834 */
5835 static
5836 void
innobase_vcol_build_templ(const TABLE * table,dict_index_t * clust_index,Field * field,const dict_col_t * col,mysql_row_templ_t * templ,ulint col_no)5837 innobase_vcol_build_templ(
5838 const TABLE* table,
5839 dict_index_t* clust_index,
5840 Field* field,
5841 const dict_col_t* col,
5842 mysql_row_templ_t* templ,
5843 ulint col_no)
5844 {
5845 templ->col_no = col_no;
5846 templ->is_virtual = col->is_virtual();
5847
5848 if (templ->is_virtual) {
5849 templ->clust_rec_field_no = ULINT_UNDEFINED;
5850 templ->rec_field_no = col->ind;
5851 } else {
5852 templ->clust_rec_field_no = dict_col_get_clust_pos(
5853 col, clust_index);
5854 ut_a(templ->clust_rec_field_no != ULINT_UNDEFINED);
5855
5856 templ->rec_field_no = templ->clust_rec_field_no;
5857 }
5858
5859 if (field->real_maybe_null()) {
5860 templ->mysql_null_byte_offset =
5861 field->null_offset();
5862
5863 templ->mysql_null_bit_mask = (ulint) field->null_bit;
5864 } else {
5865 templ->mysql_null_bit_mask = 0;
5866 }
5867
5868 templ->mysql_col_offset = static_cast<ulint>(
5869 get_field_offset(table, field));
5870 templ->mysql_col_len = static_cast<ulint>(field->pack_length());
5871 templ->type = col->mtype;
5872 templ->mysql_type = static_cast<ulint>(field->type());
5873
5874 if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
5875 templ->mysql_length_bytes = static_cast<ulint>(
5876 ((Field_varstring*) field)->length_bytes);
5877 }
5878
5879 templ->charset = dtype_get_charset_coll(col->prtype);
5880 templ->mbminlen = dict_col_get_mbminlen(col);
5881 templ->mbmaxlen = dict_col_get_mbmaxlen(col);
5882 templ->is_unsigned = col->prtype & DATA_UNSIGNED;
5883 }
5884
5885 /** Build template for the virtual columns and their base columns. This
5886 is done when the table first opened.
5887 @param[in] table MySQL TABLE
5888 @param[in] ib_table InnoDB dict_table_t
5889 @param[in,out] s_templ InnoDB template structure
5890 @param[in] add_v new virtual columns added along with
5891 add index call
5892 @param[in] locked true if dict_sys mutex is held */
5893 void
innobase_build_v_templ(const TABLE * table,const dict_table_t * ib_table,dict_vcol_templ_t * s_templ,const dict_add_v_col_t * add_v,bool locked)5894 innobase_build_v_templ(
5895 const TABLE* table,
5896 const dict_table_t* ib_table,
5897 dict_vcol_templ_t* s_templ,
5898 const dict_add_v_col_t* add_v,
5899 bool locked)
5900 {
5901 ulint ncol = unsigned(ib_table->n_cols) - DATA_N_SYS_COLS;
5902 ulint n_v_col = ib_table->n_v_cols;
5903 bool marker[REC_MAX_N_FIELDS];
5904
5905 DBUG_ENTER("innobase_build_v_templ");
5906 ut_ad(ncol < REC_MAX_N_FIELDS);
5907
5908 if (add_v != NULL) {
5909 n_v_col += add_v->n_v_col;
5910 }
5911
5912 ut_ad(n_v_col > 0);
5913
5914 if (!locked) {
5915 mutex_enter(&dict_sys.mutex);
5916 }
5917
5918 if (s_templ->vtempl) {
5919 if (!locked) {
5920 mutex_exit(&dict_sys.mutex);
5921 }
5922 DBUG_VOID_RETURN;
5923 }
5924
5925 memset(marker, 0, sizeof(bool) * ncol);
5926
5927 s_templ->vtempl = static_cast<mysql_row_templ_t**>(
5928 ut_zalloc_nokey((ncol + n_v_col)
5929 * sizeof *s_templ->vtempl));
5930 s_templ->n_col = ncol;
5931 s_templ->n_v_col = n_v_col;
5932 s_templ->rec_len = table->s->reclength;
5933 s_templ->default_rec = UT_NEW_ARRAY_NOKEY(uchar, s_templ->rec_len);
5934 memcpy(s_templ->default_rec, table->s->default_values, s_templ->rec_len);
5935
5936 /* Mark those columns could be base columns */
5937 for (ulint i = 0; i < ib_table->n_v_cols; i++) {
5938 const dict_v_col_t* vcol = dict_table_get_nth_v_col(
5939 ib_table, i);
5940
5941 for (ulint j = vcol->num_base; j--; ) {
5942 marker[vcol->base_col[j]->ind] = true;
5943 }
5944 }
5945
5946 if (add_v) {
5947 for (ulint i = 0; i < add_v->n_v_col; i++) {
5948 const dict_v_col_t* vcol = &add_v->v_col[i];
5949
5950 for (ulint j = vcol->num_base; j--; ) {
5951 marker[vcol->base_col[j]->ind] = true;
5952 }
5953 }
5954 }
5955
5956 ulint j = 0;
5957 ulint z = 0;
5958
5959 dict_index_t* clust_index = dict_table_get_first_index(ib_table);
5960
5961 for (ulint i = 0; i < table->s->fields; i++) {
5962 Field* field = table->field[i];
5963
5964 /* Build template for virtual columns */
5965 if (!field->stored_in_db()) {
5966 #ifdef UNIV_DEBUG
5967 const char* name;
5968
5969 if (z >= ib_table->n_v_def) {
5970 name = add_v->v_col_name[z - ib_table->n_v_def];
5971 } else {
5972 name = dict_table_get_v_col_name(ib_table, z);
5973 }
5974
5975 ut_ad(!my_strcasecmp(system_charset_info, name,
5976 field->field_name.str));
5977 #endif
5978 const dict_v_col_t* vcol;
5979
5980 if (z >= ib_table->n_v_def) {
5981 vcol = &add_v->v_col[z - ib_table->n_v_def];
5982 } else {
5983 vcol = dict_table_get_nth_v_col(ib_table, z);
5984 }
5985
5986 s_templ->vtempl[z + s_templ->n_col]
5987 = static_cast<mysql_row_templ_t*>(
5988 ut_malloc_nokey(
5989 sizeof *s_templ->vtempl[j]));
5990
5991 innobase_vcol_build_templ(
5992 table, clust_index, field,
5993 &vcol->m_col,
5994 s_templ->vtempl[z + s_templ->n_col],
5995 z);
5996 z++;
5997 continue;
5998 }
5999
6000 ut_ad(j < ncol);
6001
6002 /* Build template for base columns */
6003 if (marker[j]) {
6004 dict_col_t* col = dict_table_get_nth_col(
6005 ib_table, j);
6006
6007 ut_ad(!my_strcasecmp(system_charset_info,
6008 dict_table_get_col_name(
6009 ib_table, j),
6010 field->field_name.str));
6011
6012 s_templ->vtempl[j] = static_cast<
6013 mysql_row_templ_t*>(
6014 ut_malloc_nokey(
6015 sizeof *s_templ->vtempl[j]));
6016
6017 innobase_vcol_build_templ(
6018 table, clust_index, field, col,
6019 s_templ->vtempl[j], j);
6020 }
6021
6022 j++;
6023 }
6024
6025 if (!locked) {
6026 mutex_exit(&dict_sys.mutex);
6027 }
6028
6029 s_templ->db_name = table->s->db.str;
6030 s_templ->tb_name = table->s->table_name.str;
6031 DBUG_VOID_RETURN;
6032 }
6033
6034 /** Check consistency between .frm indexes and InnoDB indexes.
6035 @param[in] table table object formed from .frm
6036 @param[in] ib_table InnoDB table definition
6037 @retval true if not errors were found */
6038 static bool
check_index_consistency(const TABLE * table,const dict_table_t * ib_table)6039 check_index_consistency(const TABLE* table, const dict_table_t* ib_table)
6040 {
6041 ulint mysql_num_index = table->s->keys;
6042 ulint ib_num_index = UT_LIST_GET_LEN(ib_table->indexes);
6043 bool ret = true;
6044
6045 /* If there exists inconsistency between MySQL and InnoDB dictionary
6046 (metadata) information, the number of index defined in MySQL
6047 could exceed that in InnoDB, return error */
6048 if (ib_num_index < mysql_num_index) {
6049 ret = false;
6050 goto func_exit;
6051 }
6052
6053 /* For each index in the mysql key_info array, fetch its
6054 corresponding InnoDB index pointer into index_mapping
6055 array. */
6056 for (ulint count = 0; count < mysql_num_index; count++) {
6057 const dict_index_t* index = dict_table_get_index_on_name(
6058 ib_table, table->key_info[count].name.str);
6059
6060 if (index == NULL) {
6061 sql_print_error("Cannot find index %s in InnoDB"
6062 " index dictionary.",
6063 table->key_info[count].name.str);
6064 ret = false;
6065 goto func_exit;
6066 }
6067
6068 /* Double check fetched index has the same
6069 column info as those in mysql key_info. */
6070 if (!innobase_match_index_columns(&table->key_info[count],
6071 index)) {
6072 sql_print_error("Found index %s whose column info"
6073 " does not match that of MariaDB.",
6074 table->key_info[count].name.str);
6075 ret = false;
6076 goto func_exit;
6077 }
6078 }
6079
6080 func_exit:
6081 return ret;
6082 }
6083
6084 /********************************************************************//**
6085 Get the upper limit of the MySQL integral and floating-point type.
6086 @return maximum allowed value for the field */
6087 UNIV_INTERN
6088 ulonglong
innobase_get_int_col_max_value(const Field * field)6089 innobase_get_int_col_max_value(
6090 /*===========================*/
6091 const Field* field) /*!< in: MySQL field */
6092 {
6093 ulonglong max_value = 0;
6094
6095 switch (field->key_type()) {
6096 /* TINY */
6097 case HA_KEYTYPE_BINARY:
6098 max_value = 0xFFULL;
6099 break;
6100 case HA_KEYTYPE_INT8:
6101 max_value = 0x7FULL;
6102 break;
6103 /* SHORT */
6104 case HA_KEYTYPE_USHORT_INT:
6105 max_value = 0xFFFFULL;
6106 break;
6107 case HA_KEYTYPE_SHORT_INT:
6108 max_value = 0x7FFFULL;
6109 break;
6110 /* MEDIUM */
6111 case HA_KEYTYPE_UINT24:
6112 max_value = 0xFFFFFFULL;
6113 break;
6114 case HA_KEYTYPE_INT24:
6115 max_value = 0x7FFFFFULL;
6116 break;
6117 /* LONG */
6118 case HA_KEYTYPE_ULONG_INT:
6119 max_value = 0xFFFFFFFFULL;
6120 break;
6121 case HA_KEYTYPE_LONG_INT:
6122 max_value = 0x7FFFFFFFULL;
6123 break;
6124 /* BIG */
6125 case HA_KEYTYPE_ULONGLONG:
6126 max_value = 0xFFFFFFFFFFFFFFFFULL;
6127 break;
6128 case HA_KEYTYPE_LONGLONG:
6129 max_value = 0x7FFFFFFFFFFFFFFFULL;
6130 break;
6131 case HA_KEYTYPE_FLOAT:
6132 /* We use the maximum as per IEEE754-2008 standard, 2^24 */
6133 max_value = 0x1000000ULL;
6134 break;
6135 case HA_KEYTYPE_DOUBLE:
6136 /* We use the maximum as per IEEE754-2008 standard, 2^53 */
6137 max_value = 0x20000000000000ULL;
6138 break;
6139 default:
6140 ut_error;
6141 }
6142
6143 return(max_value);
6144 }
6145
6146 /** Initialize the AUTO_INCREMENT column metadata.
6147
6148 Since a partial table definition for a persistent table can already be
6149 present in the InnoDB dict_sys cache before it is accessed from SQL,
6150 we have to initialize the AUTO_INCREMENT counter on the first
6151 ha_innobase::open().
6152
6153 @param[in,out] table persistent table
6154 @param[in] field the AUTO_INCREMENT column */
6155 static
6156 void
initialize_auto_increment(dict_table_t * table,const Field * field)6157 initialize_auto_increment(dict_table_t* table, const Field* field)
6158 {
6159 ut_ad(!table->is_temporary());
6160
6161 const unsigned col_no = innodb_col_no(field);
6162
6163 table->autoinc_mutex.lock();
6164
6165 table->persistent_autoinc = 1
6166 + dict_table_get_nth_col_pos(table, col_no, NULL);
6167
6168 if (table->autoinc) {
6169 /* Already initialized. Our caller checked
6170 table->persistent_autoinc without
6171 autoinc_mutex protection, and there might be multiple
6172 ha_innobase::open() executing concurrently. */
6173 } else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
6174 /* If the recovery level is set so high that writes
6175 are disabled we force the AUTOINC counter to 0
6176 value effectively disabling writes to the table.
6177 Secondly, we avoid reading the table in case the read
6178 results in failure due to a corrupted table/index.
6179
6180 We will not return an error to the client, so that the
6181 tables can be dumped with minimal hassle. If an error
6182 were returned in this case, the first attempt to read
6183 the table would fail and subsequent SELECTs would succeed. */
6184 } else if (table->persistent_autoinc) {
6185 table->autoinc = innobase_next_autoinc(
6186 btr_read_autoinc_with_fallback(table, col_no),
6187 1 /* need */,
6188 1 /* auto_increment_increment */,
6189 0 /* auto_increment_offset */,
6190 innobase_get_int_col_max_value(field));
6191 }
6192
6193 table->autoinc_mutex.unlock();
6194 }
6195
6196 /** Open an InnoDB table
6197 @param[in] name table name
6198 @return error code
6199 @retval 0 on success */
6200 int
open(const char * name,int,uint)6201 ha_innobase::open(const char* name, int, uint)
6202 {
6203 /* TODO: If trx_rollback_recovered(bool all=false) is ever
6204 removed, the first-time open() must hold (or acquire and release)
6205 a table lock that conflicts with trx_resurrect_table_locks(),
6206 to ensure that any recovered incomplete ALTER TABLE will have been
6207 rolled back. Otherwise, dict_table_t::instant could be cleared by
6208 the rollback invoking dict_index_t::clear_instant_alter() while
6209 open table handles exist in client connections. */
6210
6211 char norm_name[FN_REFLEN];
6212
6213 DBUG_ENTER("ha_innobase::open");
6214
6215 normalize_table_name(norm_name, name);
6216
6217 m_user_thd = NULL;
6218
6219 /* Will be allocated if it is needed in ::update_row() */
6220 m_upd_buf = NULL;
6221 m_upd_buf_size = 0;
6222
6223 char* is_part = is_partition(norm_name);
6224 THD* thd = ha_thd();
6225 dict_table_t* ib_table = open_dict_table(name, norm_name, is_part,
6226 DICT_ERR_IGNORE_FK_NOKEY);
6227
6228 DEBUG_SYNC(thd, "ib_open_after_dict_open");
6229
6230 if (NULL == ib_table) {
6231
6232 if (is_part) {
6233 sql_print_error("Failed to open table %s.\n",
6234 norm_name);
6235 }
6236 set_my_errno(ENOENT);
6237
6238 DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
6239 }
6240
6241 size_t n_fields = omits_virtual_cols(*table_share)
6242 ? table_share->stored_fields : table_share->fields;
6243 size_t n_cols = dict_table_get_n_user_cols(ib_table)
6244 + dict_table_get_n_v_cols(ib_table)
6245 - !!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID);
6246
6247 if (UNIV_UNLIKELY(n_cols != n_fields)) {
6248 ib::warn() << "Table " << norm_name << " contains "
6249 << n_cols << " user"
6250 " defined columns in InnoDB, but " << n_fields
6251 << " columns in MariaDB. Please check"
6252 " INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and"
6253 " https://mariadb.com/kb/en/innodb-data-dictionary-troubleshooting/"
6254 " for how to resolve the issue.";
6255
6256 /* Mark this table as corrupted, so the drop table
6257 or force recovery can still use it, but not others. */
6258 ib_table->file_unreadable = true;
6259 ib_table->corrupted = true;
6260 dict_table_close(ib_table, FALSE, FALSE);
6261 set_my_errno(ENOENT);
6262 DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
6263 }
6264
6265 innobase_copy_frm_flags_from_table_share(ib_table, table->s);
6266
6267 MONITOR_INC(MONITOR_TABLE_OPEN);
6268
6269 if ((ib_table->flags2 & DICT_TF2_DISCARDED)) {
6270
6271 ib_senderrf(thd,
6272 IB_LOG_LEVEL_WARN, ER_TABLESPACE_DISCARDED,
6273 table->s->table_name.str);
6274
6275 /* Allow an open because a proper DISCARD should have set
6276 all the flags and index root page numbers to FIL_NULL that
6277 should prevent any DML from running but it should allow DDL
6278 operations. */
6279 } else if (!ib_table->is_readable()) {
6280 const fil_space_t* space = ib_table->space;
6281 if (!space) {
6282 ib_senderrf(
6283 thd, IB_LOG_LEVEL_WARN,
6284 ER_TABLESPACE_MISSING, norm_name);
6285 }
6286
6287 if (!thd_tablespace_op(thd)) {
6288 set_my_errno(ENOENT);
6289 int ret_err = HA_ERR_TABLESPACE_MISSING;
6290
6291 if (space && space->crypt_data
6292 && space->crypt_data->is_encrypted()) {
6293 push_warning_printf(
6294 thd,
6295 Sql_condition::WARN_LEVEL_WARN,
6296 HA_ERR_DECRYPTION_FAILED,
6297 "Table %s in file %s is encrypted"
6298 " but encryption service or"
6299 " used key_id %u is not available. "
6300 " Can't continue reading table.",
6301 table_share->table_name.str,
6302 space->chain.start->name,
6303 space->crypt_data->key_id);
6304 ret_err = HA_ERR_DECRYPTION_FAILED;
6305 }
6306
6307 dict_table_close(ib_table, FALSE, FALSE);
6308 DBUG_RETURN(ret_err);
6309 }
6310 }
6311
6312 m_prebuilt = row_create_prebuilt(ib_table, table->s->reclength);
6313
6314 m_prebuilt->default_rec = table->s->default_values;
6315 ut_ad(m_prebuilt->default_rec);
6316
6317 m_prebuilt->m_mysql_table = table;
6318
6319 /* Looks like MySQL-3.23 sometimes has primary key number != 0 */
6320 m_primary_key = table->s->primary_key;
6321
6322 key_used_on_scan = m_primary_key;
6323
6324 if (ib_table->n_v_cols) {
6325 mutex_enter(&dict_sys.mutex);
6326 if (ib_table->vc_templ == NULL) {
6327 ib_table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
6328 innobase_build_v_templ(
6329 table, ib_table, ib_table->vc_templ, NULL,
6330 true);
6331 }
6332
6333 mutex_exit(&dict_sys.mutex);
6334 }
6335
6336 if (!check_index_consistency(table, ib_table)) {
6337 sql_print_error("InnoDB indexes are inconsistent with what "
6338 "defined in .frm for table %s",
6339 name);
6340 }
6341
6342 /* Allocate a buffer for a 'row reference'. A row reference is
6343 a string of bytes of length ref_length which uniquely specifies
6344 a row in our table. Note that MySQL may also compare two row
6345 references for equality by doing a simple memcmp on the strings
6346 of length ref_length! */
6347 if (!(m_prebuilt->clust_index_was_generated
6348 = dict_index_is_auto_gen_clust(ib_table->indexes.start))) {
6349 if (m_primary_key >= MAX_KEY) {
6350 ib_table->dict_frm_mismatch = DICT_FRM_NO_PK;
6351
6352 /* This mismatch could cause further problems
6353 if not attended, bring this to the user's attention
6354 by printing a warning in addition to log a message
6355 in the errorlog */
6356
6357 ib_push_frm_error(thd, ib_table, table, 0, true);
6358
6359 /* If m_primary_key >= MAX_KEY, its (m_primary_key)
6360 value could be out of bound if continue to index
6361 into key_info[] array. Find InnoDB primary index,
6362 and assign its key_length to ref_length.
6363 In addition, since MySQL indexes are sorted starting
6364 with primary index, unique index etc., initialize
6365 ref_length to the first index key length in
6366 case we fail to find InnoDB cluster index.
6367
6368 Please note, this will not resolve the primary
6369 index mismatch problem, other side effects are
6370 possible if users continue to use the table.
6371 However, we allow this table to be opened so
6372 that user can adopt necessary measures for the
6373 mismatch while still being accessible to the table
6374 date. */
6375 if (!table->key_info) {
6376 ut_ad(!table->s->keys);
6377 ref_length = 0;
6378 } else {
6379 ref_length = table->key_info[0].key_length;
6380 }
6381
6382 /* Find corresponding cluster index
6383 key length in MySQL's key_info[] array */
6384 for (uint i = 0; i < table->s->keys; i++) {
6385 dict_index_t* index;
6386 index = innobase_get_index(i);
6387 if (dict_index_is_clust(index)) {
6388 ref_length =
6389 table->key_info[i].key_length;
6390 }
6391 }
6392 } else {
6393 /* MySQL allocates the buffer for ref.
6394 key_info->key_length includes space for all key
6395 columns + one byte for each column that may be
6396 NULL. ref_length must be as exact as possible to
6397 save space, because all row reference buffers are
6398 allocated based on ref_length. */
6399
6400 ref_length = table->key_info[m_primary_key].key_length;
6401 }
6402 } else {
6403 if (m_primary_key != MAX_KEY) {
6404
6405 ib_table->dict_frm_mismatch = DICT_NO_PK_FRM_HAS;
6406
6407 /* This mismatch could cause further problems
6408 if not attended, bring this to the user attention
6409 by printing a warning in addition to log a message
6410 in the errorlog */
6411 ib_push_frm_error(thd, ib_table, table, 0, true);
6412 }
6413
6414 ref_length = DATA_ROW_ID_LEN;
6415
6416 /* If we automatically created the clustered index, then
6417 MySQL does not know about it, and MySQL must NOT be aware
6418 of the index used on scan, to make it avoid checking if we
6419 update the column of the index. That is why we assert below
6420 that key_used_on_scan is the undefined value MAX_KEY.
6421 The column is the row id in the automatical generation case,
6422 and it will never be updated anyway. */
6423
6424 if (key_used_on_scan != MAX_KEY) {
6425 sql_print_warning(
6426 "Table %s key_used_on_scan is %u even "
6427 "though there is no primary key inside "
6428 "InnoDB.", name, key_used_on_scan);
6429 }
6430 }
6431
6432 /* Index block size in InnoDB: used by MySQL in query optimization */
6433 stats.block_size = srv_page_size;
6434
6435 const my_bool for_vc_purge = THDVAR(thd, background_thread);
6436
6437 if (for_vc_purge || !m_prebuilt->table
6438 || m_prebuilt->table->is_temporary()
6439 || m_prebuilt->table->persistent_autoinc
6440 || !m_prebuilt->table->is_readable()) {
6441 } else if (const Field* ai = table->found_next_number_field) {
6442 initialize_auto_increment(m_prebuilt->table, ai);
6443 }
6444
6445 /* Set plugin parser for fulltext index */
6446 for (uint i = 0; i < table->s->keys; i++) {
6447 if (table->key_info[i].flags & HA_USES_PARSER) {
6448 dict_index_t* index = innobase_get_index(i);
6449 plugin_ref parser = table->key_info[i].parser;
6450
6451 ut_ad(index->type & DICT_FTS);
6452 index->parser =
6453 static_cast<st_mysql_ftparser *>(
6454 plugin_decl(parser)->info);
6455
6456 DBUG_EXECUTE_IF("fts_instrument_use_default_parser",
6457 index->parser = &fts_default_parser;);
6458 }
6459 }
6460
6461 ut_ad(!m_prebuilt->table
6462 || table->versioned() == m_prebuilt->table->versioned());
6463
6464 if (!for_vc_purge) {
6465 info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST
6466 | HA_STATUS_OPEN);
6467 }
6468
6469 DBUG_RETURN(0);
6470 }
6471
6472 /** Convert MySQL column number to dict_table_t::cols[] offset.
6473 @param[in] field non-virtual column
6474 @return column number relative to dict_table_t::cols[] */
6475 unsigned
innodb_col_no(const Field * field)6476 innodb_col_no(const Field* field)
6477 {
6478 ut_ad(!innobase_is_s_fld(field));
6479 const TABLE* table = field->table;
6480 unsigned col_no = 0;
6481 ut_ad(field == table->field[field->field_index]);
6482 for (unsigned i = 0; i < field->field_index; i++) {
6483 if (table->field[i]->stored_in_db()) {
6484 col_no++;
6485 }
6486 }
6487 return(col_no);
6488 }
6489
6490 /** Opens dictionary table object using table name. For partition, we need to
6491 try alternative lower/upper case names to support moving data files across
6492 platforms.
6493 @param[in] table_name name of the table/partition
6494 @param[in] norm_name normalized name of the table/partition
6495 @param[in] is_partition if this is a partition of a table
6496 @param[in] ignore_err error to ignore for loading dictionary object
6497 @return dictionary table object or NULL if not found */
6498 dict_table_t*
open_dict_table(const char * table_name,const char * norm_name,bool is_partition,dict_err_ignore_t ignore_err)6499 ha_innobase::open_dict_table(
6500 const char*
6501 #ifdef _WIN32
6502 table_name
6503 #endif
6504 ,
6505 const char* norm_name,
6506 bool is_partition,
6507 dict_err_ignore_t ignore_err)
6508 {
6509 DBUG_ENTER("ha_innobase::open_dict_table");
6510 dict_table_t* ib_table = dict_table_open_on_name(norm_name, FALSE,
6511 TRUE, ignore_err);
6512
6513 if (NULL == ib_table && is_partition) {
6514 /* MySQL partition engine hard codes the file name
6515 separator as "#P#". The text case is fixed even if
6516 lower_case_table_names is set to 1 or 2. This is true
6517 for sub-partition names as well. InnoDB always
6518 normalises file names to lower case on Windows, this
6519 can potentially cause problems when copying/moving
6520 tables between platforms.
6521
6522 1) If boot against an installation from Windows
6523 platform, then its partition table name could
6524 be in lower case in system tables. So we will
6525 need to check lower case name when load table.
6526
6527 2) If we boot an installation from other case
6528 sensitive platform in Windows, we might need to
6529 check the existence of table name without lower
6530 case in the system table. */
6531 if (innobase_get_lower_case_table_names() == 1) {
6532 char par_case_name[FN_REFLEN];
6533
6534 #ifndef _WIN32
6535 /* Check for the table using lower
6536 case name, including the partition
6537 separator "P" */
6538 strcpy(par_case_name, norm_name);
6539 innobase_casedn_str(par_case_name);
6540 #else
6541 /* On Windows platfrom, check
6542 whether there exists table name in
6543 system table whose name is
6544 not being normalized to lower case */
6545 create_table_info_t::
6546 normalize_table_name_low(
6547 par_case_name,
6548 table_name, FALSE);
6549 #endif
6550 ib_table = dict_table_open_on_name(
6551 par_case_name, FALSE, TRUE,
6552 ignore_err);
6553 }
6554
6555 if (ib_table != NULL) {
6556 #ifndef _WIN32
6557 sql_print_warning("Partition table %s opened"
6558 " after converting to lower"
6559 " case. The table may have"
6560 " been moved from a case"
6561 " in-sensitive file system."
6562 " Please recreate table in"
6563 " the current file system\n",
6564 norm_name);
6565 #else
6566 sql_print_warning("Partition table %s opened"
6567 " after skipping the step to"
6568 " lower case the table name."
6569 " The table may have been"
6570 " moved from a case sensitive"
6571 " file system. Please"
6572 " recreate table in the"
6573 " current file system\n",
6574 norm_name);
6575 #endif
6576 }
6577 }
6578
6579 DBUG_RETURN(ib_table);
6580 }
6581
6582 handler*
clone(const char * name,MEM_ROOT * mem_root)6583 ha_innobase::clone(
6584 /*===============*/
6585 const char* name, /*!< in: table name */
6586 MEM_ROOT* mem_root) /*!< in: memory context */
6587 {
6588 DBUG_ENTER("ha_innobase::clone");
6589
6590 ha_innobase* new_handler = static_cast<ha_innobase*>(
6591 handler::clone(m_prebuilt->table->name.m_name, mem_root));
6592
6593 if (new_handler != NULL) {
6594 DBUG_ASSERT(new_handler->m_prebuilt != NULL);
6595
6596 new_handler->m_prebuilt->select_lock_type
6597 = m_prebuilt->select_lock_type;
6598 }
6599
6600 DBUG_RETURN(new_handler);
6601 }
6602
6603
6604 uint
max_supported_key_part_length() const6605 ha_innobase::max_supported_key_part_length() const
6606 /*==============================================*/
6607 {
6608 /* A table format specific index column length check will be performed
6609 at ha_innobase::add_index() and row_create_index_for_mysql() */
6610 return(REC_VERSION_56_MAX_INDEX_COL_LEN);
6611 }
6612
6613 /******************************************************************//**
6614 Closes a handle to an InnoDB table.
6615 @return 0 */
6616
6617 int
close()6618 ha_innobase::close()
6619 /*================*/
6620 {
6621 DBUG_ENTER("ha_innobase::close");
6622
6623 row_prebuilt_free(m_prebuilt, FALSE);
6624
6625 if (m_upd_buf != NULL) {
6626 ut_ad(m_upd_buf_size != 0);
6627 my_free(m_upd_buf);
6628 m_upd_buf = NULL;
6629 m_upd_buf_size = 0;
6630 }
6631
6632 MONITOR_INC(MONITOR_TABLE_CLOSE);
6633
6634 /* Tell InnoDB server that there might be work for
6635 utility threads: */
6636
6637 srv_active_wake_master_thread();
6638
6639 DBUG_RETURN(0);
6640 }
6641
6642 /* The following accessor functions should really be inside MySQL code! */
6643
6644 #ifdef WITH_WSREP
6645 UNIV_INTERN
6646 ulint
wsrep_innobase_mysql_sort(int mysql_type,uint charset_number,unsigned char * str,unsigned int str_length,unsigned int buf_length)6647 wsrep_innobase_mysql_sort(
6648 /*======================*/
6649 /* out: str contains sort string */
6650 int mysql_type, /* in: MySQL type */
6651 uint charset_number, /* in: number of the charset */
6652 unsigned char* str, /* in: data field */
6653 unsigned int str_length, /* in: data field length,
6654 not UNIV_SQL_NULL */
6655 unsigned int buf_length) /* in: total str buffer length */
6656
6657 {
6658 CHARSET_INFO* charset;
6659 enum_field_types mysql_tp;
6660 ulint ret_length = str_length;
6661
6662 DBUG_ASSERT(str_length != UNIV_SQL_NULL);
6663
6664 mysql_tp = (enum_field_types) mysql_type;
6665
6666 switch (mysql_tp) {
6667
6668 case MYSQL_TYPE_BIT:
6669 case MYSQL_TYPE_STRING:
6670 case MYSQL_TYPE_VAR_STRING:
6671 case MYSQL_TYPE_TINY_BLOB:
6672 case MYSQL_TYPE_MEDIUM_BLOB:
6673 case MYSQL_TYPE_BLOB:
6674 case MYSQL_TYPE_LONG_BLOB:
6675 case MYSQL_TYPE_VARCHAR:
6676 {
6677 uchar tmp_str[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'};
6678 uint tmp_length = REC_VERSION_56_MAX_INDEX_COL_LEN;
6679
6680 /* Use the charset number to pick the right charset struct for
6681 the comparison. Since the MySQL function get_charset may be
6682 slow before Bar removes the mutex operation there, we first
6683 look at 2 common charsets directly. */
6684
6685 if (charset_number == default_charset_info->number) {
6686 charset = default_charset_info;
6687 } else if (charset_number == my_charset_latin1.number) {
6688 charset = &my_charset_latin1;
6689 } else {
6690 charset = get_charset(charset_number, MYF(MY_WME));
6691
6692 if (charset == NULL) {
6693 sql_print_error("InnoDB needs charset %lu for doing "
6694 "a comparison, but MariaDB cannot "
6695 "find that charset.",
6696 (ulong) charset_number);
6697 ut_a(0);
6698 }
6699 }
6700
6701 ut_a(str_length <= tmp_length);
6702 memcpy(tmp_str, str, str_length);
6703
6704 tmp_length = charset->coll->strnxfrm(charset, str, str_length,
6705 str_length, tmp_str,
6706 tmp_length, 0);
6707 DBUG_ASSERT(tmp_length <= str_length);
6708 if (wsrep_protocol_version < 3) {
6709 tmp_length = charset->coll->strnxfrm(
6710 charset, str, str_length,
6711 str_length, tmp_str, tmp_length, 0);
6712 DBUG_ASSERT(tmp_length <= str_length);
6713 } else {
6714 /* strnxfrm will expand the destination string,
6715 protocols < 3 truncated the sorted sring
6716 protocols >= 3 gets full sorted sring
6717 */
6718 tmp_length = charset->coll->strnxfrm(
6719 charset, str, buf_length,
6720 str_length, tmp_str, str_length, 0);
6721 DBUG_ASSERT(tmp_length <= buf_length);
6722 ret_length = tmp_length;
6723 }
6724
6725 break;
6726 }
6727 case MYSQL_TYPE_DECIMAL :
6728 case MYSQL_TYPE_TINY :
6729 case MYSQL_TYPE_SHORT :
6730 case MYSQL_TYPE_LONG :
6731 case MYSQL_TYPE_FLOAT :
6732 case MYSQL_TYPE_DOUBLE :
6733 case MYSQL_TYPE_NULL :
6734 case MYSQL_TYPE_TIMESTAMP :
6735 case MYSQL_TYPE_LONGLONG :
6736 case MYSQL_TYPE_INT24 :
6737 case MYSQL_TYPE_DATE :
6738 case MYSQL_TYPE_TIME :
6739 case MYSQL_TYPE_DATETIME :
6740 case MYSQL_TYPE_YEAR :
6741 case MYSQL_TYPE_NEWDATE :
6742 case MYSQL_TYPE_NEWDECIMAL :
6743 case MYSQL_TYPE_ENUM :
6744 case MYSQL_TYPE_SET :
6745 case MYSQL_TYPE_GEOMETRY :
6746 break;
6747 default:
6748 break;
6749 }
6750
6751 return ret_length;
6752 }
6753 #endif /* WITH_WSREP */
6754
6755 /******************************************************************//**
6756 compare two character string according to their charset. */
6757 int
innobase_fts_text_cmp(const void * cs,const void * p1,const void * p2)6758 innobase_fts_text_cmp(
6759 /*==================*/
6760 const void* cs, /*!< in: Character set */
6761 const void* p1, /*!< in: key */
6762 const void* p2) /*!< in: node */
6763 {
6764 const CHARSET_INFO* charset = (const CHARSET_INFO*) cs;
6765 const fts_string_t* s1 = (const fts_string_t*) p1;
6766 const fts_string_t* s2 = (const fts_string_t*) p2;
6767
6768 return(ha_compare_text(
6769 charset, s1->f_str, static_cast<uint>(s1->f_len),
6770 s2->f_str, static_cast<uint>(s2->f_len), 0));
6771 }
6772
6773 /******************************************************************//**
6774 compare two character string case insensitively according to their charset. */
6775 int
innobase_fts_text_case_cmp(const void * cs,const void * p1,const void * p2)6776 innobase_fts_text_case_cmp(
6777 /*=======================*/
6778 const void* cs, /*!< in: Character set */
6779 const void* p1, /*!< in: key */
6780 const void* p2) /*!< in: node */
6781 {
6782 const CHARSET_INFO* charset = (const CHARSET_INFO*) cs;
6783 const fts_string_t* s1 = (const fts_string_t*) p1;
6784 const fts_string_t* s2 = (const fts_string_t*) p2;
6785 ulint newlen;
6786
6787 my_casedn_str(charset, (char*) s2->f_str);
6788
6789 newlen = strlen((const char*) s2->f_str);
6790
6791 return(ha_compare_text(
6792 charset, s1->f_str, static_cast<uint>(s1->f_len),
6793 s2->f_str, static_cast<uint>(newlen), 0));
6794 }
6795
6796 /******************************************************************//**
6797 Get the first character's code position for FTS index partition. */
6798 ulint
innobase_strnxfrm(const CHARSET_INFO * cs,const uchar * str,const ulint len)6799 innobase_strnxfrm(
6800 /*==============*/
6801 const CHARSET_INFO*
6802 cs, /*!< in: Character set */
6803 const uchar* str, /*!< in: string */
6804 const ulint len) /*!< in: string length */
6805 {
6806 uchar mystr[2];
6807 ulint value;
6808
6809 if (!str || len == 0) {
6810 return(0);
6811 }
6812
6813 my_strnxfrm(cs, (uchar*) mystr, 2, str, len);
6814
6815 value = mach_read_from_2(mystr);
6816
6817 if (value > 255) {
6818 value = value / 256;
6819 }
6820
6821 return(value);
6822 }
6823
6824 /******************************************************************//**
6825 compare two character string according to their charset. */
6826 int
innobase_fts_text_cmp_prefix(const void * cs,const void * p1,const void * p2)6827 innobase_fts_text_cmp_prefix(
6828 /*=========================*/
6829 const void* cs, /*!< in: Character set */
6830 const void* p1, /*!< in: prefix key */
6831 const void* p2) /*!< in: value to compare */
6832 {
6833 const CHARSET_INFO* charset = (const CHARSET_INFO*) cs;
6834 const fts_string_t* s1 = (const fts_string_t*) p1;
6835 const fts_string_t* s2 = (const fts_string_t*) p2;
6836 int result;
6837
6838 result = ha_compare_text(
6839 charset, s2->f_str, static_cast<uint>(s2->f_len),
6840 s1->f_str, static_cast<uint>(s1->f_len), 1);
6841
6842 /* We switched s1, s2 position in ha_compare_text. So we need
6843 to negate the result */
6844 return(-result);
6845 }
6846
6847 /******************************************************************//**
6848 Makes all characters in a string lower case. */
6849 size_t
innobase_fts_casedn_str(CHARSET_INFO * cs,char * src,size_t src_len,char * dst,size_t dst_len)6850 innobase_fts_casedn_str(
6851 /*====================*/
6852 CHARSET_INFO* cs, /*!< in: Character set */
6853 char* src, /*!< in: string to put in lower case */
6854 size_t src_len,/*!< in: input string length */
6855 char* dst, /*!< in: buffer for result string */
6856 size_t dst_len)/*!< in: buffer size */
6857 {
6858 if (cs->casedn_multiply == 1) {
6859 memcpy(dst, src, src_len);
6860 dst[src_len] = 0;
6861 my_casedn_str(cs, dst);
6862
6863 return(strlen(dst));
6864 } else {
6865 return(cs->cset->casedn(cs, src, src_len, dst, dst_len));
6866 }
6867 }
6868
6869 #define true_word_char(c, ch) ((c) & (_MY_U | _MY_L | _MY_NMR) || (ch) == '_')
6870
6871 #define misc_word_char(X) 0
6872
6873 /*************************************************************//**
6874 Get the next token from the given string and store it in *token.
6875 It is mostly copied from MyISAM's doc parsing function ft_simple_get_word()
6876 @return length of string processed */
6877 ulint
innobase_mysql_fts_get_token(CHARSET_INFO * cs,const byte * start,const byte * end,fts_string_t * token)6878 innobase_mysql_fts_get_token(
6879 /*=========================*/
6880 CHARSET_INFO* cs, /*!< in: Character set */
6881 const byte* start, /*!< in: start of text */
6882 const byte* end, /*!< in: one character past end of
6883 text */
6884 fts_string_t* token) /*!< out: token's text */
6885 {
6886 int mbl;
6887 const uchar* doc = start;
6888
6889 ut_a(cs);
6890
6891 token->f_n_char = token->f_len = 0;
6892 token->f_str = NULL;
6893
6894 for (;;) {
6895
6896 if (doc >= end) {
6897 return ulint(doc - start);
6898 }
6899
6900 int ctype;
6901
6902 mbl = cs->cset->ctype(
6903 cs, &ctype, doc, (const uchar*) end);
6904
6905 if (true_word_char(ctype, *doc)) {
6906 break;
6907 }
6908
6909 doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
6910 }
6911
6912 ulint mwc = 0;
6913 ulint length = 0;
6914
6915 token->f_str = const_cast<byte*>(doc);
6916
6917 while (doc < end) {
6918
6919 int ctype;
6920
6921 mbl = cs->cset->ctype(
6922 cs, &ctype, (uchar*) doc, (uchar*) end);
6923 if (true_word_char(ctype, *doc)) {
6924 mwc = 0;
6925 } else if (!misc_word_char(*doc) || mwc) {
6926 break;
6927 } else {
6928 ++mwc;
6929 }
6930
6931 ++length;
6932
6933 doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
6934 }
6935
6936 token->f_len = (uint) (doc - token->f_str) - mwc;
6937 token->f_n_char = length;
6938
6939 return ulint(doc - start);
6940 }
6941
6942 /** Converts a MySQL type to an InnoDB type. Note that this function returns
6943 the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
6944 VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
6945 @param[out] unsigned_flag DATA_UNSIGNED if an 'unsigned type'; at least
6946 ENUM and SET, and unsigned integer types are 'unsigned types'
6947 @param[in] f MySQL Field
6948 @return DATA_BINARY, DATA_VARCHAR, ... */
6949 ulint
get_innobase_type_from_mysql_type(ulint * unsigned_flag,const void * f)6950 get_innobase_type_from_mysql_type(
6951 ulint* unsigned_flag,
6952 const void* f)
6953 {
6954 const class Field* field = reinterpret_cast<const class Field*>(f);
6955
6956 /* The following asserts try to check that the MySQL type code fits in
6957 8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
6958 the type */
6959
6960 DBUG_ASSERT((ulint)MYSQL_TYPE_STRING < 256);
6961 DBUG_ASSERT((ulint)MYSQL_TYPE_VAR_STRING < 256);
6962 DBUG_ASSERT((ulint)MYSQL_TYPE_DOUBLE < 256);
6963 DBUG_ASSERT((ulint)MYSQL_TYPE_FLOAT < 256);
6964 DBUG_ASSERT((ulint)MYSQL_TYPE_DECIMAL < 256);
6965
6966 if (field->flags & UNSIGNED_FLAG) {
6967
6968 *unsigned_flag = DATA_UNSIGNED;
6969 } else {
6970 *unsigned_flag = 0;
6971 }
6972
6973 if (field->real_type() == MYSQL_TYPE_ENUM
6974 || field->real_type() == MYSQL_TYPE_SET) {
6975
6976 /* MySQL has field->type() a string type for these, but the
6977 data is actually internally stored as an unsigned integer
6978 code! */
6979
6980 *unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
6981 flag set to zero, even though
6982 internally this is an unsigned
6983 integer type */
6984 return(DATA_INT);
6985 }
6986
6987 switch (field->type()) {
6988 /* NOTE that we only allow string types in DATA_MYSQL and
6989 DATA_VARMYSQL */
6990 case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */
6991 case MYSQL_TYPE_VARCHAR: /* new >= 5.0.3 true VARCHAR */
6992 if (field->binary()) {
6993 return(DATA_BINARY);
6994 } else if (field->charset() == &my_charset_latin1) {
6995 return(DATA_VARCHAR);
6996 } else {
6997 return(DATA_VARMYSQL);
6998 }
6999 case MYSQL_TYPE_BIT:
7000 case MYSQL_TYPE_STRING:
7001 if (field->binary()) {
7002 return(DATA_FIXBINARY);
7003 } else if (field->charset() == &my_charset_latin1) {
7004 return(DATA_CHAR);
7005 } else {
7006 return(DATA_MYSQL);
7007 }
7008 case MYSQL_TYPE_NEWDECIMAL:
7009 return(DATA_FIXBINARY);
7010 case MYSQL_TYPE_LONG:
7011 case MYSQL_TYPE_LONGLONG:
7012 case MYSQL_TYPE_TINY:
7013 case MYSQL_TYPE_SHORT:
7014 case MYSQL_TYPE_INT24:
7015 case MYSQL_TYPE_DATE:
7016 case MYSQL_TYPE_YEAR:
7017 case MYSQL_TYPE_NEWDATE:
7018 return(DATA_INT);
7019 case MYSQL_TYPE_TIME:
7020 case MYSQL_TYPE_DATETIME:
7021 case MYSQL_TYPE_TIMESTAMP:
7022 if (field->key_type() == HA_KEYTYPE_BINARY) {
7023 return(DATA_FIXBINARY);
7024 } else {
7025 return(DATA_INT);
7026 }
7027 case MYSQL_TYPE_FLOAT:
7028 return(DATA_FLOAT);
7029 case MYSQL_TYPE_DOUBLE:
7030 return(DATA_DOUBLE);
7031 case MYSQL_TYPE_DECIMAL:
7032 return(DATA_DECIMAL);
7033 case MYSQL_TYPE_GEOMETRY:
7034 return(DATA_GEOMETRY);
7035 case MYSQL_TYPE_TINY_BLOB:
7036 case MYSQL_TYPE_MEDIUM_BLOB:
7037 case MYSQL_TYPE_BLOB:
7038 case MYSQL_TYPE_LONG_BLOB:
7039 return(DATA_BLOB);
7040 case MYSQL_TYPE_NULL:
7041 /* MySQL currently accepts "NULL" datatype, but will
7042 reject such datatype in the next release. We will cope
7043 with it and not trigger assertion failure in 5.1 */
7044 break;
7045 default:
7046 ut_error;
7047 }
7048
7049 return(0);
7050 }
7051
7052 /*******************************************************************//**
7053 Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
7054 storage format.
7055 @return value */
7056 static inline
7057 uint
innobase_read_from_2_little_endian(const uchar * buf)7058 innobase_read_from_2_little_endian(
7059 /*===============================*/
7060 const uchar* buf) /*!< in: from where to read */
7061 {
7062 return((uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1]))));
7063 }
7064
7065 #ifdef WITH_WSREP
7066 /*******************************************************************//**
7067 Stores a key value for a row to a buffer.
7068 @return key value length as stored in buff */
7069 UNIV_INTERN
7070 uint
wsrep_store_key_val_for_row(THD * thd,TABLE * table,uint keynr,char * buff,uint buff_len,const uchar * record,ibool * key_is_null)7071 wsrep_store_key_val_for_row(
7072 /*=========================*/
7073 THD* thd,
7074 TABLE* table,
7075 uint keynr, /*!< in: key number */
7076 char* buff, /*!< in/out: buffer for the key value (in MySQL
7077 format) */
7078 uint buff_len,/*!< in: buffer length */
7079 const uchar* record,
7080 ibool* key_is_null)/*!< out: full key was null */
7081 {
7082 KEY* key_info = table->key_info + keynr;
7083 KEY_PART_INFO* key_part = key_info->key_part;
7084 KEY_PART_INFO* end = key_part + key_info->user_defined_key_parts;
7085 char* buff_start = buff;
7086 enum_field_types mysql_type;
7087 Field* field;
7088 uint buff_space = buff_len;
7089
7090 DBUG_ENTER("wsrep_store_key_val_for_row");
7091
7092 memset(buff, 0, buff_len);
7093 *key_is_null = TRUE;
7094
7095 for (; key_part != end; key_part++) {
7096
7097 uchar sorted[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'};
7098 ibool part_is_null = FALSE;
7099
7100 if (key_part->null_bit) {
7101 if (buff_space > 0) {
7102 if (record[key_part->null_offset]
7103 & key_part->null_bit) {
7104 *buff = 1;
7105 part_is_null = TRUE;
7106 } else {
7107 *buff = 0;
7108 }
7109 buff++;
7110 buff_space--;
7111 } else {
7112 fprintf (stderr, "WSREP: key truncated: %s\n",
7113 wsrep_thd_query(thd));
7114 }
7115 }
7116 if (!part_is_null) *key_is_null = FALSE;
7117
7118 field = key_part->field;
7119 mysql_type = field->type();
7120
7121 if (mysql_type == MYSQL_TYPE_VARCHAR) {
7122 /* >= 5.0.3 true VARCHAR */
7123 ulint lenlen;
7124 ulint len;
7125 const byte* data;
7126 ulint key_len;
7127 ulint true_len;
7128 const CHARSET_INFO* cs;
7129 int error=0;
7130
7131 key_len = key_part->length;
7132
7133 if (part_is_null) {
7134 true_len = key_len + 2;
7135 if (true_len > buff_space) {
7136 fprintf (stderr,
7137 "WSREP: key truncated: %s\n",
7138 wsrep_thd_query(thd));
7139 true_len = buff_space;
7140 }
7141 buff += true_len;
7142 buff_space -= true_len;
7143 continue;
7144 }
7145 cs = field->charset();
7146
7147 lenlen = (ulint)
7148 (((Field_varstring*)field)->length_bytes);
7149
7150 data = row_mysql_read_true_varchar(&len,
7151 (byte*) (record
7152 + (ulint)get_field_offset(table, field)),
7153 lenlen);
7154
7155 true_len = len;
7156
7157 /* For multi byte character sets we need to calculate
7158 the true length of the key */
7159
7160 if (len > 0 && cs->mbmaxlen > 1) {
7161 true_len = (ulint) my_well_formed_length(cs,
7162 (const char *) data,
7163 (const char *) data + len,
7164 (uint) (key_len /
7165 cs->mbmaxlen),
7166 &error);
7167 }
7168
7169 /* In a column prefix index, we may need to truncate
7170 the stored value: */
7171 if (true_len > key_len) {
7172 true_len = key_len;
7173 }
7174 /* cannot exceed max column lenght either, we may need to truncate
7175 the stored value: */
7176 if (true_len > sizeof(sorted)) {
7177 true_len = sizeof(sorted);
7178 }
7179
7180 memcpy(sorted, data, true_len);
7181 true_len = wsrep_innobase_mysql_sort(
7182 mysql_type, cs->number, sorted, true_len,
7183 REC_VERSION_56_MAX_INDEX_COL_LEN);
7184 if (wsrep_protocol_version > 1) {
7185 /* Note that we always reserve the maximum possible
7186 length of the true VARCHAR in the key value, though
7187 only len first bytes after the 2 length bytes contain
7188 actual data. The rest of the space was reset to zero
7189 in the bzero() call above. */
7190 if (true_len > buff_space) {
7191 WSREP_DEBUG (
7192 "write set key truncated for: %s\n",
7193 wsrep_thd_query(thd));
7194 true_len = buff_space;
7195 }
7196 memcpy(buff, sorted, true_len);
7197 buff += true_len;
7198 buff_space -= true_len;
7199 } else {
7200 buff += key_len;
7201 }
7202 } else if (mysql_type == MYSQL_TYPE_TINY_BLOB
7203 || mysql_type == MYSQL_TYPE_MEDIUM_BLOB
7204 || mysql_type == MYSQL_TYPE_BLOB
7205 || mysql_type == MYSQL_TYPE_LONG_BLOB
7206 /* MYSQL_TYPE_GEOMETRY data is treated
7207 as BLOB data in innodb. */
7208 || mysql_type == MYSQL_TYPE_GEOMETRY) {
7209
7210 const CHARSET_INFO* cs;
7211 ulint key_len;
7212 ulint true_len;
7213 int error=0;
7214 ulint blob_len;
7215 const byte* blob_data;
7216
7217 ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
7218
7219 key_len = key_part->length;
7220
7221 if (part_is_null) {
7222 true_len = key_len + 2;
7223 if (true_len > buff_space) {
7224 fprintf (stderr,
7225 "WSREP: key truncated: %s\n",
7226 wsrep_thd_query(thd));
7227 true_len = buff_space;
7228 }
7229 buff += true_len;
7230 buff_space -= true_len;
7231
7232 continue;
7233 }
7234
7235 cs = field->charset();
7236
7237 blob_data = row_mysql_read_blob_ref(&blob_len,
7238 (byte*) (record
7239 + (ulint)get_field_offset(table, field)),
7240 (ulint) field->pack_length());
7241
7242 true_len = blob_len;
7243
7244 ut_a(get_field_offset(table, field)
7245 == key_part->offset);
7246
7247 /* For multi byte character sets we need to calculate
7248 the true length of the key */
7249
7250 if (blob_len > 0 && cs->mbmaxlen > 1) {
7251 true_len = (ulint) my_well_formed_length(cs,
7252 (const char *) blob_data,
7253 (const char *) blob_data
7254 + blob_len,
7255 (uint) (key_len /
7256 cs->mbmaxlen),
7257 &error);
7258 }
7259
7260 /* All indexes on BLOB and TEXT are column prefix
7261 indexes, and we may need to truncate the data to be
7262 stored in the key value: */
7263
7264 if (true_len > key_len) {
7265 true_len = key_len;
7266 }
7267
7268 memcpy(sorted, blob_data, true_len);
7269 true_len = wsrep_innobase_mysql_sort(
7270 mysql_type, cs->number, sorted, true_len,
7271 REC_VERSION_56_MAX_INDEX_COL_LEN);
7272
7273
7274 /* Note that we always reserve the maximum possible
7275 length of the BLOB prefix in the key value. */
7276 if (wsrep_protocol_version > 1) {
7277 if (true_len > buff_space) {
7278 fprintf (stderr,
7279 "WSREP: key truncated: %s\n",
7280 wsrep_thd_query(thd));
7281 true_len = buff_space;
7282 }
7283 buff += true_len;
7284 buff_space -= true_len;
7285 } else {
7286 buff += key_len;
7287 }
7288 memcpy(buff, sorted, true_len);
7289 } else {
7290 /* Here we handle all other data types except the
7291 true VARCHAR, BLOB and TEXT. Note that the column
7292 value we store may be also in a column prefix
7293 index. */
7294
7295 const CHARSET_INFO* cs = NULL;
7296 ulint true_len;
7297 ulint key_len;
7298 const uchar* src_start;
7299 int error=0;
7300 enum_field_types real_type;
7301
7302 key_len = key_part->length;
7303
7304 if (part_is_null) {
7305 true_len = key_len;
7306 if (true_len > buff_space) {
7307 fprintf (stderr,
7308 "WSREP: key truncated: %s\n",
7309 wsrep_thd_query(thd));
7310 true_len = buff_space;
7311 }
7312 buff += true_len;
7313 buff_space -= true_len;
7314
7315 continue;
7316 }
7317
7318 src_start = record + key_part->offset;
7319 real_type = field->real_type();
7320 true_len = key_len;
7321
7322 /* Character set for the field is defined only
7323 to fields whose type is string and real field
7324 type is not enum or set. For these fields check
7325 if character set is multi byte. */
7326
7327 if (real_type != MYSQL_TYPE_ENUM
7328 && real_type != MYSQL_TYPE_SET
7329 && ( mysql_type == MYSQL_TYPE_VAR_STRING
7330 || mysql_type == MYSQL_TYPE_STRING)) {
7331
7332 cs = field->charset();
7333
7334 /* For multi byte character sets we need to
7335 calculate the true length of the key */
7336
7337 if (key_len > 0 && cs->mbmaxlen > 1) {
7338
7339 true_len = (ulint)
7340 my_well_formed_length(cs,
7341 (const char *)src_start,
7342 (const char *)src_start
7343 + key_len,
7344 (uint) (key_len /
7345 cs->mbmaxlen),
7346 &error);
7347 }
7348 memcpy(sorted, src_start, true_len);
7349 true_len = wsrep_innobase_mysql_sort(
7350 mysql_type, cs->number, sorted, true_len,
7351 REC_VERSION_56_MAX_INDEX_COL_LEN);
7352
7353 if (true_len > buff_space) {
7354 fprintf (stderr,
7355 "WSREP: key truncated: %s\n",
7356 wsrep_thd_query(thd));
7357 true_len = buff_space;
7358 }
7359 memcpy(buff, sorted, true_len);
7360 } else {
7361 memcpy(buff, src_start, true_len);
7362 }
7363 buff += true_len;
7364 buff_space -= true_len;
7365 }
7366 }
7367
7368 ut_a(buff <= buff_start + buff_len);
7369
7370 DBUG_RETURN((uint)(buff - buff_start));
7371 }
7372 #endif /* WITH_WSREP */
7373 /**************************************************************//**
7374 Determines if a field is needed in a m_prebuilt struct 'template'.
7375 @return field to use, or NULL if the field is not needed */
7376 static
7377 const Field*
build_template_needs_field(bool index_contains,bool read_just_key,bool fetch_all_in_key,bool fetch_primary_key_cols,dict_index_t * index,const TABLE * table,ulint i,ulint num_v)7378 build_template_needs_field(
7379 /*=======================*/
7380 bool index_contains, /*!< in:
7381 dict_index_t::contains_col_or_prefix(
7382 i) */
7383 bool read_just_key, /*!< in: TRUE when MySQL calls
7384 ha_innobase::extra with the
7385 argument HA_EXTRA_KEYREAD; it is enough
7386 to read just columns defined in
7387 the index (i.e., no read of the
7388 clustered index record necessary) */
7389 bool fetch_all_in_key,
7390 /*!< in: true=fetch all fields in
7391 the index */
7392 bool fetch_primary_key_cols,
7393 /*!< in: true=fetch the
7394 primary key columns */
7395 dict_index_t* index, /*!< in: InnoDB index to use */
7396 const TABLE* table, /*!< in: MySQL table object */
7397 ulint i, /*!< in: field index in InnoDB table */
7398 ulint num_v) /*!< in: num virtual column so far */
7399 {
7400 const Field* field = table->field[i];
7401
7402 if (!field->stored_in_db()
7403 && ha_innobase::omits_virtual_cols(*table->s)) {
7404 return NULL;
7405 }
7406
7407 if (!index_contains) {
7408 if (read_just_key) {
7409 /* If this is a 'key read', we do not need
7410 columns that are not in the key */
7411
7412 return(NULL);
7413 }
7414 } else if (fetch_all_in_key) {
7415 /* This field is needed in the query */
7416
7417 return(field);
7418 }
7419
7420 if (bitmap_is_set(table->read_set, static_cast<uint>(i))
7421 || bitmap_is_set(table->write_set, static_cast<uint>(i))) {
7422 /* This field is needed in the query */
7423
7424 return(field);
7425 }
7426
7427 ut_ad(i >= num_v);
7428 if (fetch_primary_key_cols
7429 && dict_table_col_in_clustered_key(index->table, i - num_v)) {
7430 /* This field is needed in the query */
7431 return(field);
7432 }
7433
7434 /* This field is not needed in the query, skip it */
7435
7436 return(NULL);
7437 }
7438
7439 /**************************************************************//**
7440 Determines if a field is needed in a m_prebuilt struct 'template'.
7441 @return whether the field is needed for index condition pushdown */
7442 inline
7443 bool
build_template_needs_field_in_icp(const dict_index_t * index,const row_prebuilt_t * prebuilt,bool contains,ulint i,bool is_virtual)7444 build_template_needs_field_in_icp(
7445 /*==============================*/
7446 const dict_index_t* index, /*!< in: InnoDB index */
7447 const row_prebuilt_t* prebuilt,/*!< in: row fetch template */
7448 bool contains,/*!< in: whether the index contains
7449 column i */
7450 ulint i, /*!< in: column number */
7451 bool is_virtual)
7452 /*!< in: a virtual column or not */
7453 {
7454 ut_ad(contains == index->contains_col_or_prefix(i, is_virtual));
7455
7456 return(index == prebuilt->index
7457 ? contains
7458 : prebuilt->index->contains_col_or_prefix(i, is_virtual));
7459 }
7460
7461 /**************************************************************//**
7462 Adds a field to a m_prebuilt struct 'template'.
7463 @return the field template */
7464 static
7465 mysql_row_templ_t*
build_template_field(row_prebuilt_t * prebuilt,dict_index_t * clust_index,dict_index_t * index,TABLE * table,const Field * field,ulint i,ulint v_no)7466 build_template_field(
7467 /*=================*/
7468 row_prebuilt_t* prebuilt, /*!< in/out: template */
7469 dict_index_t* clust_index, /*!< in: InnoDB clustered index */
7470 dict_index_t* index, /*!< in: InnoDB index to use */
7471 TABLE* table, /*!< in: MySQL table object */
7472 const Field* field, /*!< in: field in MySQL table */
7473 ulint i, /*!< in: field index in InnoDB table */
7474 ulint v_no) /*!< in: field index for virtual col */
7475 {
7476 mysql_row_templ_t* templ;
7477 const dict_col_t* col;
7478
7479 ut_ad(clust_index->table == index->table);
7480
7481 templ = prebuilt->mysql_template + prebuilt->n_template++;
7482 MEM_UNDEFINED(templ, sizeof *templ);
7483 templ->rec_field_is_prefix = FALSE;
7484 templ->rec_prefix_field_no = ULINT_UNDEFINED;
7485 templ->is_virtual = !field->stored_in_db();
7486
7487 if (!templ->is_virtual) {
7488 templ->col_no = i;
7489 col = dict_table_get_nth_col(index->table, i);
7490 templ->clust_rec_field_no = dict_col_get_clust_pos(
7491 col, clust_index);
7492 /* If clustered index record field is not found, lets print out
7493 field names and all the rest to understand why field is not found. */
7494 if (templ->clust_rec_field_no == ULINT_UNDEFINED) {
7495 const char* tb_col_name = dict_table_get_col_name(clust_index->table, i);
7496 dict_field_t* field=NULL;
7497 size_t size = 0;
7498
7499 for(ulint j=0; j < clust_index->n_user_defined_cols; j++) {
7500 dict_field_t* ifield = &(clust_index->fields[j]);
7501 if (ifield && !memcmp(tb_col_name, ifield->name,
7502 strlen(tb_col_name))) {
7503 field = ifield;
7504 break;
7505 }
7506 }
7507
7508 ib::info() << "Looking for field " << i << " name "
7509 << (tb_col_name ? tb_col_name : "NULL")
7510 << " from table " << clust_index->table->name;
7511
7512
7513 for(ulint j=0; j < clust_index->n_user_defined_cols; j++) {
7514 dict_field_t* ifield = &(clust_index->fields[j]);
7515 ib::info() << "InnoDB Table "
7516 << clust_index->table->name
7517 << "field " << j << " name "
7518 << (ifield ? ifield->name() : "NULL");
7519 }
7520
7521 for(ulint j=0; j < table->s->stored_fields; j++) {
7522 ib::info() << "MySQL table "
7523 << table->s->table_name.str
7524 << " field " << j << " name "
7525 << table->field[j]->field_name.str;
7526 }
7527
7528 ib::fatal() << "Clustered record field for column " << i
7529 << " not found table n_user_defined "
7530 << clust_index->n_user_defined_cols
7531 << " index n_user_defined "
7532 << clust_index->table->n_cols - DATA_N_SYS_COLS
7533 << " InnoDB table "
7534 << clust_index->table->name
7535 << " field name "
7536 << (field ? field->name() : "NULL")
7537 << " MySQL table "
7538 << table->s->table_name.str
7539 << " field name "
7540 << (tb_col_name ? tb_col_name : "NULL")
7541 << " n_fields "
7542 << table->s->stored_fields
7543 << " query "
7544 << innobase_get_stmt_unsafe(current_thd, &size);
7545 }
7546
7547 if (dict_index_is_clust(index)) {
7548 templ->rec_field_no = templ->clust_rec_field_no;
7549 } else {
7550 /* If we're in a secondary index, keep track
7551 * of the original index position even if this
7552 * is just a prefix index; we will use this
7553 * later to avoid a cluster index lookup in
7554 * some cases.*/
7555
7556 templ->rec_field_no = dict_index_get_nth_col_pos(index, i,
7557 &templ->rec_prefix_field_no);
7558 }
7559 } else {
7560 DBUG_ASSERT(!ha_innobase::omits_virtual_cols(*table->s));
7561 col = &dict_table_get_nth_v_col(index->table, v_no)->m_col;
7562 templ->clust_rec_field_no = v_no;
7563
7564 if (dict_index_is_clust(index)) {
7565 templ->rec_field_no = templ->clust_rec_field_no;
7566 } else {
7567 templ->rec_field_no
7568 = dict_index_get_nth_col_or_prefix_pos(
7569 index, v_no, FALSE, true,
7570 &templ->rec_prefix_field_no);
7571 }
7572 templ->icp_rec_field_no = ULINT_UNDEFINED;
7573 }
7574
7575 if (field->real_maybe_null()) {
7576 templ->mysql_null_byte_offset =
7577 field->null_offset();
7578
7579 templ->mysql_null_bit_mask = (ulint) field->null_bit;
7580 } else {
7581 templ->mysql_null_bit_mask = 0;
7582 }
7583
7584
7585 templ->mysql_col_offset = (ulint) get_field_offset(table, field);
7586 templ->mysql_col_len = (ulint) field->pack_length();
7587 templ->type = col->mtype;
7588 templ->mysql_type = (ulint) field->type();
7589
7590 if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
7591 templ->mysql_length_bytes = (ulint)
7592 (((Field_varstring*) field)->length_bytes);
7593 } else {
7594 templ->mysql_length_bytes = 0;
7595 }
7596
7597 templ->charset = dtype_get_charset_coll(col->prtype);
7598 templ->mbminlen = dict_col_get_mbminlen(col);
7599 templ->mbmaxlen = dict_col_get_mbmaxlen(col);
7600 templ->is_unsigned = col->prtype & DATA_UNSIGNED;
7601
7602 if (!dict_index_is_clust(index)
7603 && templ->rec_field_no == ULINT_UNDEFINED) {
7604 prebuilt->need_to_access_clustered = TRUE;
7605
7606 if (templ->rec_prefix_field_no != ULINT_UNDEFINED) {
7607 dict_field_t* field = dict_index_get_nth_field(
7608 index,
7609 templ->rec_prefix_field_no);
7610 templ->rec_field_is_prefix = (field->prefix_len != 0);
7611 }
7612 }
7613
7614 /* For spatial index, we need to access cluster index. */
7615 if (dict_index_is_spatial(index)) {
7616 prebuilt->need_to_access_clustered = TRUE;
7617 }
7618
7619 if (prebuilt->mysql_prefix_len < templ->mysql_col_offset
7620 + templ->mysql_col_len) {
7621 prebuilt->mysql_prefix_len = templ->mysql_col_offset
7622 + templ->mysql_col_len;
7623 }
7624
7625 if (DATA_LARGE_MTYPE(templ->type)) {
7626 prebuilt->templ_contains_blob = TRUE;
7627 }
7628
7629 return(templ);
7630 }
7631
7632 /**************************************************************//**
7633 Builds a 'template' to the m_prebuilt struct. The template is used in fast
7634 retrieval of just those column values MySQL needs in its processing. */
7635
7636 void
build_template(bool whole_row)7637 ha_innobase::build_template(
7638 /*========================*/
7639 bool whole_row) /*!< in: true=ROW_MYSQL_WHOLE_ROW,
7640 false=ROW_MYSQL_REC_FIELDS */
7641 {
7642 dict_index_t* index;
7643 dict_index_t* clust_index;
7644 ibool fetch_all_in_key = FALSE;
7645 ibool fetch_primary_key_cols = FALSE;
7646
7647 if (m_prebuilt->select_lock_type == LOCK_X || m_prebuilt->table->no_rollback()) {
7648 /* We always retrieve the whole clustered index record if we
7649 use exclusive row level locks, for example, if the read is
7650 done in an UPDATE statement or if we are using a no rollback
7651 table */
7652
7653 whole_row = true;
7654 } else if (!whole_row) {
7655 if (m_prebuilt->hint_need_to_fetch_extra_cols
7656 == ROW_RETRIEVE_ALL_COLS) {
7657
7658 /* We know we must at least fetch all columns in the
7659 key, or all columns in the table */
7660
7661 if (m_prebuilt->read_just_key) {
7662 /* MySQL has instructed us that it is enough
7663 to fetch the columns in the key; looks like
7664 MySQL can set this flag also when there is
7665 only a prefix of the column in the key: in
7666 that case we retrieve the whole column from
7667 the clustered index */
7668
7669 fetch_all_in_key = TRUE;
7670 } else {
7671 whole_row = true;
7672 }
7673 } else if (m_prebuilt->hint_need_to_fetch_extra_cols
7674 == ROW_RETRIEVE_PRIMARY_KEY) {
7675 /* We must at least fetch all primary key cols. Note
7676 that if the clustered index was internally generated
7677 by InnoDB on the row id (no primary key was
7678 defined), then row_search_for_mysql() will always
7679 retrieve the row id to a special buffer in the
7680 m_prebuilt struct. */
7681
7682 fetch_primary_key_cols = TRUE;
7683 }
7684 }
7685
7686 clust_index = dict_table_get_first_index(m_prebuilt->table);
7687
7688 index = whole_row ? clust_index : m_prebuilt->index;
7689
7690 m_prebuilt->versioned_write = table->versioned_write(VERS_TRX_ID);
7691 m_prebuilt->need_to_access_clustered = (index == clust_index);
7692
7693 /* Either m_prebuilt->index should be a secondary index, or it
7694 should be the clustered index. */
7695 ut_ad(dict_index_is_clust(index) == (index == clust_index));
7696
7697 /* Below we check column by column if we need to access
7698 the clustered index. */
7699
7700 if (pushed_rowid_filter && rowid_filter_is_active) {
7701 fetch_primary_key_cols = TRUE;
7702 m_prebuilt->pk_filter = this;
7703 } else {
7704 m_prebuilt->pk_filter = NULL;
7705 }
7706
7707 const bool skip_virtual = omits_virtual_cols(*table_share);
7708 const ulint n_fields = table_share->fields;
7709
7710 if (!m_prebuilt->mysql_template) {
7711 m_prebuilt->mysql_template = (mysql_row_templ_t*)
7712 ut_malloc_nokey(n_fields * sizeof(mysql_row_templ_t));
7713 }
7714
7715 m_prebuilt->template_type = whole_row
7716 ? ROW_MYSQL_WHOLE_ROW : ROW_MYSQL_REC_FIELDS;
7717 m_prebuilt->null_bitmap_len = table->s->null_bytes;
7718
7719 /* Prepare to build m_prebuilt->mysql_template[]. */
7720 m_prebuilt->templ_contains_blob = FALSE;
7721 m_prebuilt->mysql_prefix_len = 0;
7722 m_prebuilt->n_template = 0;
7723 m_prebuilt->idx_cond_n_cols = 0;
7724
7725 /* Note that in InnoDB, i is the column number in the table.
7726 MySQL calls columns 'fields'. */
7727
7728 ulint num_v = 0;
7729
7730 if ((active_index != MAX_KEY
7731 && active_index == pushed_idx_cond_keyno)
7732 || (pushed_rowid_filter && rowid_filter_is_active)) {
7733 /* Push down an index condition or an end_range check. */
7734 for (ulint i = 0; i < n_fields; i++) {
7735 const Field* field = table->field[i];
7736 const bool is_v = !field->stored_in_db();
7737 if (is_v && skip_virtual) {
7738 num_v++;
7739 continue;
7740 }
7741 bool index_contains = index->contains_col_or_prefix(
7742 is_v ? num_v : i - num_v, is_v);
7743 if (is_v && index_contains) {
7744 m_prebuilt->n_template = 0;
7745 num_v = 0;
7746 goto no_icp;
7747 }
7748
7749 /* Test if an end_range or an index condition
7750 refers to the field. Note that "index" and
7751 "index_contains" may refer to the clustered index.
7752 Index condition pushdown is relative to
7753 m_prebuilt->index (the index that is being
7754 looked up first). */
7755
7756 /* When join_read_always_key() invokes this
7757 code via handler::ha_index_init() and
7758 ha_innobase::index_init(), end_range is not
7759 yet initialized. Because of that, we must
7760 always check for index_contains, instead of
7761 the subset
7762 field->part_of_key.is_set(active_index)
7763 which would be acceptable if end_range==NULL. */
7764 if (build_template_needs_field_in_icp(
7765 index, m_prebuilt, index_contains,
7766 is_v ? num_v : i - num_v, is_v)) {
7767 if (!whole_row) {
7768 field = build_template_needs_field(
7769 index_contains,
7770 m_prebuilt->read_just_key,
7771 fetch_all_in_key,
7772 fetch_primary_key_cols,
7773 index, table, i, num_v);
7774 if (!field) {
7775 if (is_v) {
7776 num_v++;
7777 }
7778 continue;
7779 }
7780 }
7781
7782 ut_ad(!is_v);
7783
7784 mysql_row_templ_t* templ= build_template_field(
7785 m_prebuilt, clust_index, index,
7786 table, field, i - num_v, 0);
7787
7788 ut_ad(!templ->is_virtual);
7789
7790 m_prebuilt->idx_cond_n_cols++;
7791 ut_ad(m_prebuilt->idx_cond_n_cols
7792 == m_prebuilt->n_template);
7793
7794 if (index == m_prebuilt->index) {
7795 templ->icp_rec_field_no
7796 = templ->rec_field_no;
7797 } else {
7798 templ->icp_rec_field_no
7799 = dict_index_get_nth_col_pos(
7800 m_prebuilt->index,
7801 i - num_v,
7802 &templ->rec_prefix_field_no);
7803 }
7804
7805 if (dict_index_is_clust(m_prebuilt->index)) {
7806 ut_ad(templ->icp_rec_field_no
7807 != ULINT_UNDEFINED);
7808 /* If the primary key includes
7809 a column prefix, use it in
7810 index condition pushdown,
7811 because the condition is
7812 evaluated before fetching any
7813 off-page (externally stored)
7814 columns. */
7815 if (templ->icp_rec_field_no
7816 < m_prebuilt->index->n_uniq) {
7817 /* This is a key column;
7818 all set. */
7819 continue;
7820 }
7821 } else if (templ->icp_rec_field_no
7822 != ULINT_UNDEFINED) {
7823 continue;
7824 }
7825
7826 /* This is a column prefix index.
7827 The column prefix can be used in
7828 an end_range comparison. */
7829
7830 templ->icp_rec_field_no
7831 = dict_index_get_nth_col_or_prefix_pos(
7832 m_prebuilt->index, i - num_v,
7833 true, false,
7834 &templ->rec_prefix_field_no);
7835 ut_ad(templ->icp_rec_field_no
7836 != ULINT_UNDEFINED);
7837
7838 /* Index condition pushdown can be used on
7839 all columns of a secondary index, and on
7840 the PRIMARY KEY columns. On the clustered
7841 index, it must never be used on other than
7842 PRIMARY KEY columns, because those columns
7843 may be stored off-page, and we will not
7844 fetch externally stored columns before
7845 checking the index condition. */
7846 /* TODO: test the above with an assertion
7847 like this. Note that index conditions are
7848 currently pushed down as part of the
7849 "optimizer phase" while end_range is done
7850 as part of the execution phase. Therefore,
7851 we were unable to use an accurate condition
7852 for end_range in the "if" condition above,
7853 and the following assertion would fail.
7854 ut_ad(!dict_index_is_clust(m_prebuilt->index)
7855 || templ->rec_field_no
7856 < m_prebuilt->index->n_uniq);
7857 */
7858 }
7859
7860 if (is_v) {
7861 num_v++;
7862 }
7863 }
7864
7865 ut_ad(m_prebuilt->idx_cond_n_cols > 0);
7866 ut_ad(m_prebuilt->idx_cond_n_cols == m_prebuilt->n_template);
7867
7868 num_v = 0;
7869
7870 /* Include the fields that are not needed in index condition
7871 pushdown. */
7872 for (ulint i = 0; i < n_fields; i++) {
7873 const Field* field = table->field[i];
7874 const bool is_v = !field->stored_in_db();
7875 if (is_v && skip_virtual) {
7876 num_v++;
7877 continue;
7878 }
7879
7880 bool index_contains = index->contains_col_or_prefix(
7881 is_v ? num_v : i - num_v, is_v);
7882
7883 if (!build_template_needs_field_in_icp(
7884 index, m_prebuilt, index_contains,
7885 is_v ? num_v : i - num_v, is_v)) {
7886 /* Not needed in ICP */
7887 if (!whole_row) {
7888 field = build_template_needs_field(
7889 index_contains,
7890 m_prebuilt->read_just_key,
7891 fetch_all_in_key,
7892 fetch_primary_key_cols,
7893 index, table, i, num_v);
7894 if (!field) {
7895 if (is_v) {
7896 num_v++;
7897 }
7898 continue;
7899 }
7900 }
7901
7902 ut_d(mysql_row_templ_t* templ =)
7903 build_template_field(
7904 m_prebuilt, clust_index, index,
7905 table, field, i - num_v, num_v);
7906 ut_ad(templ->is_virtual == (ulint)is_v);
7907
7908 if (is_v) {
7909 num_v++;
7910 }
7911 }
7912 }
7913 if (active_index == pushed_idx_cond_keyno) {
7914 m_prebuilt->idx_cond = this;
7915 }
7916 } else {
7917 no_icp:
7918 /* No index condition pushdown */
7919 m_prebuilt->idx_cond = NULL;
7920 ut_ad(num_v == 0);
7921
7922 for (ulint i = 0; i < n_fields; i++) {
7923 const Field* field = table->field[i];
7924 const bool is_v = !field->stored_in_db();
7925
7926 if (whole_row) {
7927 if (is_v && skip_virtual) {
7928 num_v++;
7929 continue;
7930 }
7931 /* Even this is whole_row, if the seach is
7932 on a virtual column, and read_just_key is
7933 set, and field is not in this index, we
7934 will not try to fill the value since they
7935 are not stored in such index nor in the
7936 cluster index. */
7937 if (is_v
7938 && m_prebuilt->read_just_key
7939 && !m_prebuilt->index->contains_col_or_prefix(
7940 num_v, true))
7941 {
7942 /* Turn off ROW_MYSQL_WHOLE_ROW */
7943 m_prebuilt->template_type =
7944 ROW_MYSQL_REC_FIELDS;
7945 num_v++;
7946 continue;
7947 }
7948 } else {
7949 if (is_v
7950 && (skip_virtual || index->is_primary())) {
7951 num_v++;
7952 continue;
7953 }
7954
7955 bool contain = index->contains_col_or_prefix(
7956 is_v ? num_v: i - num_v, is_v);
7957
7958 field = build_template_needs_field(
7959 contain,
7960 m_prebuilt->read_just_key,
7961 fetch_all_in_key,
7962 fetch_primary_key_cols,
7963 index, table, i, num_v);
7964 if (!field) {
7965 if (is_v) {
7966 num_v++;
7967 }
7968 continue;
7969 }
7970 }
7971
7972 ut_d(mysql_row_templ_t* templ =)
7973 build_template_field(
7974 m_prebuilt, clust_index, index,
7975 table, field, i - num_v, num_v);
7976 ut_ad(templ->is_virtual == (ulint)is_v);
7977 if (is_v) {
7978 num_v++;
7979 }
7980 }
7981 }
7982
7983 if (index != clust_index && m_prebuilt->need_to_access_clustered) {
7984 /* Change rec_field_no's to correspond to the clustered index
7985 record */
7986 for (ulint i = 0; i < m_prebuilt->n_template; i++) {
7987 mysql_row_templ_t* templ
7988 = &m_prebuilt->mysql_template[i];
7989
7990 templ->rec_field_no = templ->clust_rec_field_no;
7991 }
7992 }
7993 }
7994
7995 /********************************************************************//**
7996 This special handling is really to overcome the limitations of MySQL's
7997 binlogging. We need to eliminate the non-determinism that will arise in
7998 INSERT ... SELECT type of statements, since MySQL binlog only stores the
7999 min value of the autoinc interval. Once that is fixed we can get rid of
8000 the special lock handling.
8001 @return DB_SUCCESS if all OK else error code */
8002
8003 dberr_t
innobase_lock_autoinc(void)8004 ha_innobase::innobase_lock_autoinc(void)
8005 /*====================================*/
8006 {
8007 DBUG_ENTER("ha_innobase::innobase_lock_autoinc");
8008 dberr_t error = DB_SUCCESS;
8009
8010 ut_ad(!srv_read_only_mode);
8011
8012 switch (innobase_autoinc_lock_mode) {
8013 case AUTOINC_NO_LOCKING:
8014 /* Acquire only the AUTOINC mutex. */
8015 m_prebuilt->table->autoinc_mutex.lock();
8016 break;
8017
8018 case AUTOINC_NEW_STYLE_LOCKING:
8019 /* For simple (single/multi) row INSERTs/REPLACEs and RBR
8020 events, we fallback to the old style only if another
8021 transaction has already acquired the AUTOINC lock on
8022 behalf of a LOAD FILE or INSERT ... SELECT etc. type of
8023 statement. */
8024 switch (thd_sql_command(m_user_thd)) {
8025 case SQLCOM_INSERT:
8026 case SQLCOM_REPLACE:
8027 case SQLCOM_END: // RBR event
8028 /* Acquire the AUTOINC mutex. */
8029 m_prebuilt->table->autoinc_mutex.lock();
8030 /* We need to check that another transaction isn't
8031 already holding the AUTOINC lock on the table. */
8032 if (!m_prebuilt->table->n_waiting_or_granted_auto_inc_locks) {
8033 /* Do not fall back to old style locking. */
8034 DBUG_RETURN(error);
8035 }
8036 m_prebuilt->table->autoinc_mutex.unlock();
8037 }
8038 /* Use old style locking. */
8039 /* fall through */
8040 case AUTOINC_OLD_STYLE_LOCKING:
8041 DBUG_EXECUTE_IF("die_if_autoinc_old_lock_style_used",
8042 ut_ad(0););
8043 error = row_lock_table_autoinc_for_mysql(m_prebuilt);
8044
8045 if (error == DB_SUCCESS) {
8046
8047 /* Acquire the AUTOINC mutex. */
8048 m_prebuilt->table->autoinc_mutex.lock();
8049 }
8050 break;
8051
8052 default:
8053 ut_error;
8054 }
8055
8056 DBUG_RETURN(error);
8057 }
8058
8059 /********************************************************************//**
8060 Store the autoinc value in the table. The autoinc value is only set if
8061 it's greater than the existing autoinc value in the table.
8062 @return DB_SUCCESS if all went well else error code */
8063
8064 dberr_t
innobase_set_max_autoinc(ulonglong auto_inc)8065 ha_innobase::innobase_set_max_autoinc(
8066 /*==================================*/
8067 ulonglong auto_inc) /*!< in: value to store */
8068 {
8069 dberr_t error;
8070
8071 error = innobase_lock_autoinc();
8072
8073 if (error == DB_SUCCESS) {
8074
8075 dict_table_autoinc_update_if_greater(m_prebuilt->table, auto_inc);
8076 m_prebuilt->table->autoinc_mutex.unlock();
8077 }
8078
8079 return(error);
8080 }
8081
8082 /********************************************************************//**
8083 Stores a row in an InnoDB database, to the table specified in this
8084 handle.
8085 @return error code */
8086
8087 int
write_row(const uchar * record)8088 ha_innobase::write_row(
8089 /*===================*/
8090 const uchar* record) /*!< in: a row in MySQL format */
8091 {
8092 dberr_t error;
8093 #ifdef WITH_WSREP
8094 bool wsrep_auto_inc_inserted= false;
8095 #endif
8096 int error_result = 0;
8097 bool auto_inc_used = false;
8098
8099 DBUG_ENTER("ha_innobase::write_row");
8100
8101 trx_t* trx = thd_to_trx(m_user_thd);
8102
8103 /* Validation checks before we commence write_row operation. */
8104 if (high_level_read_only) {
8105 ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
8106 DBUG_RETURN(HA_ERR_TABLE_READONLY);
8107 }
8108
8109 ut_a(m_prebuilt->trx == trx);
8110
8111 if (!trx_is_started(trx)) {
8112 trx->will_lock = true;
8113 }
8114
8115 ins_mode_t vers_set_fields;
8116 /* Handling of Auto-Increment Columns. */
8117 if (table->next_number_field && record == table->record[0]) {
8118
8119 /* Reset the error code before calling
8120 innobase_get_auto_increment(). */
8121 m_prebuilt->autoinc_error = DB_SUCCESS;
8122
8123 #ifdef WITH_WSREP
8124 wsrep_auto_inc_inserted = trx->is_wsrep()
8125 && wsrep_drupal_282555_workaround
8126 && table->next_number_field->val_int() == 0;
8127 #endif
8128
8129 if ((error_result = update_auto_increment())) {
8130 /* We don't want to mask autoinc overflow errors. */
8131
8132 /* Handle the case where the AUTOINC sub-system
8133 failed during initialization. */
8134 if (m_prebuilt->autoinc_error == DB_UNSUPPORTED) {
8135 error_result = ER_AUTOINC_READ_FAILED;
8136 /* Set the error message to report too. */
8137 my_error(ER_AUTOINC_READ_FAILED, MYF(0));
8138 goto func_exit;
8139 } else if (m_prebuilt->autoinc_error != DB_SUCCESS) {
8140 error = m_prebuilt->autoinc_error;
8141 goto report_error;
8142 }
8143
8144 /* MySQL errors are passed straight back. */
8145 goto func_exit;
8146 }
8147
8148 auto_inc_used = true;
8149 }
8150
8151 /* Prepare INSERT graph that will be executed for actual INSERT
8152 (This is a one time operation) */
8153 if (m_prebuilt->mysql_template == NULL
8154 || m_prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
8155
8156 /* Build the template used in converting quickly between
8157 the two database formats */
8158
8159 build_template(true);
8160 }
8161
8162 innobase_srv_conc_enter_innodb(m_prebuilt);
8163
8164 vers_set_fields = table->versioned_write(VERS_TRX_ID) ?
8165 ROW_INS_VERSIONED : ROW_INS_NORMAL;
8166
8167 /* Execute insert graph that will result in actual insert. */
8168 error = row_insert_for_mysql((byte*) record, m_prebuilt, vers_set_fields);
8169
8170 DEBUG_SYNC(m_user_thd, "ib_after_row_insert");
8171
8172 /* Handling of errors related to auto-increment. */
8173 if (auto_inc_used) {
8174 ulonglong auto_inc;
8175
8176 /* Note the number of rows processed for this statement, used
8177 by get_auto_increment() to determine the number of AUTO-INC
8178 values to reserve. This is only useful for a mult-value INSERT
8179 and is a statement level counter. */
8180 if (trx->n_autoinc_rows > 0) {
8181 --trx->n_autoinc_rows;
8182 }
8183
8184 /* Get the value that MySQL attempted to store in the table.*/
8185 auto_inc = table->next_number_field->val_uint();
8186
8187 switch (error) {
8188 case DB_DUPLICATE_KEY:
8189
8190 /* A REPLACE command and LOAD DATA INFILE REPLACE
8191 handle a duplicate key error themselves, but we
8192 must update the autoinc counter if we are performing
8193 those statements. */
8194
8195 switch (thd_sql_command(m_user_thd)) {
8196 case SQLCOM_LOAD:
8197 if (!trx->duplicates) {
8198 break;
8199 }
8200
8201 case SQLCOM_REPLACE:
8202 case SQLCOM_INSERT_SELECT:
8203 case SQLCOM_REPLACE_SELECT:
8204 goto set_max_autoinc;
8205
8206 #ifdef WITH_WSREP
8207 /* workaround for LP bug #355000, retrying the insert */
8208 case SQLCOM_INSERT:
8209
8210 WSREP_DEBUG("DUPKEY error for autoinc\n"
8211 "THD %ld, value %llu, off %llu inc %llu",
8212 thd_get_thread_id(m_user_thd),
8213 auto_inc,
8214 m_prebuilt->autoinc_offset,
8215 m_prebuilt->autoinc_increment);
8216
8217 if (wsrep_auto_inc_inserted &&
8218 wsrep_thd_retry_counter(m_user_thd) == 0 &&
8219 !thd_test_options(m_user_thd,
8220 OPTION_NOT_AUTOCOMMIT |
8221 OPTION_BEGIN)) {
8222 WSREP_DEBUG(
8223 "retrying insert: %s",
8224 wsrep_thd_query(m_user_thd));
8225 error= DB_SUCCESS;
8226 wsrep_thd_self_abort(m_user_thd);
8227 innobase_srv_conc_exit_innodb(
8228 m_prebuilt);
8229 /* jump straight to func exit over
8230 * later wsrep hooks */
8231 goto func_exit;
8232 }
8233 break;
8234 #endif /* WITH_WSREP */
8235
8236 default:
8237 break;
8238 }
8239
8240 break;
8241
8242 case DB_SUCCESS:
8243 /* If the actual value inserted is greater than
8244 the upper limit of the interval, then we try and
8245 update the table upper limit. Note: last_value
8246 will be 0 if get_auto_increment() was not called. */
8247
8248 if (auto_inc >= m_prebuilt->autoinc_last_value) {
8249 set_max_autoinc:
8250 /* We need the upper limit of the col type to check for
8251 whether we update the table autoinc counter or not. */
8252 ulonglong col_max_value =
8253 table->next_number_field->get_max_int_value();
8254
8255 /* This should filter out the negative
8256 values set explicitly by the user. */
8257 if (auto_inc <= col_max_value) {
8258 ut_ad(m_prebuilt->autoinc_increment > 0);
8259
8260 ulonglong offset;
8261 ulonglong increment;
8262 dberr_t err;
8263
8264 offset = m_prebuilt->autoinc_offset;
8265 increment = m_prebuilt->autoinc_increment;
8266
8267 auto_inc = innobase_next_autoinc(
8268 auto_inc, 1, increment, offset,
8269 col_max_value);
8270
8271 err = innobase_set_max_autoinc(
8272 auto_inc);
8273
8274 if (err != DB_SUCCESS) {
8275 error = err;
8276 }
8277 }
8278 }
8279 break;
8280 default:
8281 break;
8282 }
8283 }
8284
8285 innobase_srv_conc_exit_innodb(m_prebuilt);
8286
8287 report_error:
8288 /* Cleanup and exit. */
8289 if (error == DB_TABLESPACE_DELETED) {
8290 ib_senderrf(
8291 trx->mysql_thd, IB_LOG_LEVEL_ERROR,
8292 ER_TABLESPACE_DISCARDED,
8293 table->s->table_name.str);
8294 }
8295
8296 error_result = convert_error_code_to_mysql(
8297 error, m_prebuilt->table->flags, m_user_thd);
8298
8299 #ifdef WITH_WSREP
8300 if (!error_result && trx->is_wsrep()
8301 && wsrep_thd_is_local(m_user_thd)
8302 && !wsrep_thd_ignore_table(m_user_thd)
8303 && !wsrep_consistency_check(m_user_thd)
8304 && (thd_sql_command(m_user_thd) != SQLCOM_CREATE_TABLE)
8305 && (thd_sql_command(m_user_thd) != SQLCOM_LOAD ||
8306 thd_binlog_format(m_user_thd) == BINLOG_FORMAT_ROW)) {
8307 if (wsrep_append_keys(m_user_thd, WSREP_SERVICE_KEY_EXCLUSIVE,
8308 record,
8309 NULL)) {
8310 DBUG_PRINT("wsrep", ("row key failed"));
8311 error_result = HA_ERR_INTERNAL_ERROR;
8312 goto func_exit;
8313 }
8314 }
8315 #endif /* WITH_WSREP */
8316
8317 if (error_result == HA_FTS_INVALID_DOCID) {
8318 my_error(HA_FTS_INVALID_DOCID, MYF(0));
8319 }
8320
8321 func_exit:
8322 innobase_active_small();
8323
8324 DBUG_RETURN(error_result);
8325 }
8326
8327 /** Fill the update vector's "old_vrow" field for those non-updated,
8328 but indexed columns. Such columns could stil present in the virtual
8329 index rec fields even if they are not updated (some other fields updated),
8330 so needs to be logged.
8331 @param[in] prebuilt InnoDB prebuilt struct
8332 @param[in,out] vfield field to filled
8333 @param[in] o_len actual column length
8334 @param[in,out] col column to be filled
8335 @param[in] old_mysql_row_col MySQL old field ptr
8336 @param[in] col_pack_len MySQL field col length
8337 @param[in,out] buf buffer for a converted integer value
8338 @return used buffer ptr from row_mysql_store_col_in_innobase_format() */
8339 static
8340 byte*
innodb_fill_old_vcol_val(row_prebuilt_t * prebuilt,dfield_t * vfield,ulint o_len,dict_col_t * col,const byte * old_mysql_row_col,ulint col_pack_len,byte * buf)8341 innodb_fill_old_vcol_val(
8342 row_prebuilt_t* prebuilt,
8343 dfield_t* vfield,
8344 ulint o_len,
8345 dict_col_t* col,
8346 const byte* old_mysql_row_col,
8347 ulint col_pack_len,
8348 byte* buf)
8349 {
8350 dict_col_copy_type(
8351 col, dfield_get_type(vfield));
8352 if (o_len != UNIV_SQL_NULL) {
8353
8354 buf = row_mysql_store_col_in_innobase_format(
8355 vfield,
8356 buf,
8357 TRUE,
8358 old_mysql_row_col,
8359 col_pack_len,
8360 dict_table_is_comp(prebuilt->table));
8361 } else {
8362 dfield_set_null(vfield);
8363 }
8364
8365 return(buf);
8366 }
8367
8368 /** Calculate an update vector corresponding to the changes
8369 between old_row and new_row.
8370 @param[out] uvect update vector
8371 @param[in] old_row current row in MySQL format
8372 @param[in] new_row intended updated row in MySQL format
8373 @param[in] table MySQL table handle
8374 @param[in,out] upd_buff buffer to use for converted values
8375 @param[in] buff_len length of upd_buff
8376 @param[in,out] prebuilt InnoDB execution context
8377 @param[out] auto_inc updated AUTO_INCREMENT value, or 0 if none
8378 @return DB_SUCCESS or error code */
8379 static
8380 dberr_t
calc_row_difference(upd_t * uvect,const uchar * old_row,const uchar * new_row,TABLE * table,uchar * upd_buff,ulint buff_len,row_prebuilt_t * prebuilt,ib_uint64_t & auto_inc)8381 calc_row_difference(
8382 upd_t* uvect,
8383 const uchar* old_row,
8384 const uchar* new_row,
8385 TABLE* table,
8386 uchar* upd_buff,
8387 ulint buff_len,
8388 row_prebuilt_t* prebuilt,
8389 ib_uint64_t& auto_inc)
8390 {
8391 uchar* original_upd_buff = upd_buff;
8392 Field* field;
8393 enum_field_types field_mysql_type;
8394 ulint o_len;
8395 ulint n_len;
8396 ulint col_pack_len;
8397 const byte* new_mysql_row_col;
8398 const byte* old_mysql_row_col;
8399 const byte* o_ptr;
8400 const byte* n_ptr;
8401 byte* buf;
8402 upd_field_t* ufield;
8403 ulint col_type;
8404 ulint n_changed = 0;
8405 dfield_t dfield;
8406 dict_index_t* clust_index;
8407 ibool changes_fts_column = FALSE;
8408 ibool changes_fts_doc_col = FALSE;
8409 trx_t* const trx = prebuilt->trx;
8410 doc_id_t doc_id = FTS_NULL_DOC_ID;
8411 ulint num_v = 0;
8412 const bool skip_virtual = ha_innobase::omits_virtual_cols(*table->s);
8413
8414 ut_ad(!srv_read_only_mode);
8415
8416 clust_index = dict_table_get_first_index(prebuilt->table);
8417 auto_inc = 0;
8418
8419 /* We use upd_buff to convert changed fields */
8420 buf = (byte*) upd_buff;
8421
8422 for (uint i = 0; i < table->s->fields; i++) {
8423 field = table->field[i];
8424 const bool is_virtual = !field->stored_in_db();
8425 if (is_virtual && skip_virtual) {
8426 num_v++;
8427 continue;
8428 }
8429 dict_col_t* col = is_virtual
8430 ? &prebuilt->table->v_cols[num_v].m_col
8431 : &prebuilt->table->cols[i - num_v];
8432
8433 o_ptr = (const byte*) old_row + get_field_offset(table, field);
8434 n_ptr = (const byte*) new_row + get_field_offset(table, field);
8435
8436 /* Use new_mysql_row_col and col_pack_len save the values */
8437
8438 new_mysql_row_col = n_ptr;
8439 old_mysql_row_col = o_ptr;
8440 col_pack_len = field->pack_length();
8441
8442 o_len = col_pack_len;
8443 n_len = col_pack_len;
8444
8445 /* We use o_ptr and n_ptr to dig up the actual data for
8446 comparison. */
8447
8448 field_mysql_type = field->type();
8449
8450 col_type = col->mtype;
8451
8452 switch (col_type) {
8453
8454 case DATA_BLOB:
8455 case DATA_GEOMETRY:
8456 o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
8457 n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
8458
8459 break;
8460
8461 case DATA_VARCHAR:
8462 case DATA_BINARY:
8463 case DATA_VARMYSQL:
8464 if (field_mysql_type == MYSQL_TYPE_VARCHAR) {
8465 /* This is a >= 5.0.3 type true VARCHAR where
8466 the real payload data length is stored in
8467 1 or 2 bytes */
8468
8469 o_ptr = row_mysql_read_true_varchar(
8470 &o_len, o_ptr,
8471 (ulint)
8472 (((Field_varstring*) field)->length_bytes));
8473
8474 n_ptr = row_mysql_read_true_varchar(
8475 &n_len, n_ptr,
8476 (ulint)
8477 (((Field_varstring*) field)->length_bytes));
8478 }
8479
8480 break;
8481 default:
8482 ;
8483 }
8484
8485 if (field_mysql_type == MYSQL_TYPE_LONGLONG
8486 && prebuilt->table->fts
8487 && innobase_strcasecmp(
8488 field->field_name.str, FTS_DOC_ID_COL_NAME) == 0) {
8489 doc_id = mach_read_uint64_little_endian(n_ptr);
8490 if (doc_id == 0) {
8491 return(DB_FTS_INVALID_DOCID);
8492 }
8493 }
8494
8495 if (field->real_maybe_null()) {
8496 if (field->is_null_in_record(old_row)) {
8497 o_len = UNIV_SQL_NULL;
8498 }
8499
8500 if (field->is_null_in_record(new_row)) {
8501 n_len = UNIV_SQL_NULL;
8502 }
8503 }
8504
8505 #ifdef UNIV_DEBUG
8506 bool online_ord_part = false;
8507 #endif
8508
8509 if (is_virtual) {
8510 /* If the virtual column is not indexed,
8511 we shall ignore it for update */
8512 if (!col->ord_part) {
8513 /* Check whether there is a table-rebuilding
8514 online ALTER TABLE in progress, and this
8515 virtual column could be newly indexed, thus
8516 it will be materialized. Then we will have
8517 to log its update.
8518 Note, we do not support online dropping virtual
8519 column while adding new index, nor with
8520 online alter column order while adding index,
8521 so the virtual column sequence must not change
8522 if it is online operation */
8523 if (dict_index_is_online_ddl(clust_index)
8524 && row_log_col_is_indexed(clust_index,
8525 num_v)) {
8526 #ifdef UNIV_DEBUG
8527 online_ord_part = true;
8528 #endif
8529 } else {
8530 num_v++;
8531 continue;
8532 }
8533 }
8534
8535 if (!uvect->old_vrow) {
8536 uvect->old_vrow = dtuple_create_with_vcol(
8537 uvect->heap, 0, prebuilt->table->n_v_cols);
8538 }
8539
8540 ulint max_field_len = DICT_MAX_FIELD_LEN_BY_FORMAT(
8541 prebuilt->table);
8542
8543 /* for virtual columns, we only materialize
8544 its index, and index field length would not
8545 exceed max_field_len. So continue if the
8546 first max_field_len bytes are matched up */
8547 if (o_len != UNIV_SQL_NULL
8548 && n_len != UNIV_SQL_NULL
8549 && o_len >= max_field_len
8550 && n_len >= max_field_len
8551 && memcmp(o_ptr, n_ptr, max_field_len) == 0) {
8552 dfield_t* vfield = dtuple_get_nth_v_field(
8553 uvect->old_vrow, num_v);
8554 buf = innodb_fill_old_vcol_val(
8555 prebuilt, vfield, o_len,
8556 col, old_mysql_row_col,
8557 col_pack_len, buf);
8558 num_v++;
8559 continue;
8560 }
8561 }
8562
8563 if (o_len != n_len || (o_len != 0 && o_len != UNIV_SQL_NULL
8564 && 0 != memcmp(o_ptr, n_ptr, o_len))) {
8565 /* The field has changed */
8566
8567 ufield = uvect->fields + n_changed;
8568 MEM_UNDEFINED(ufield, sizeof *ufield);
8569
8570 /* Let us use a dummy dfield to make the conversion
8571 from the MySQL column format to the InnoDB format */
8572
8573
8574 /* If the length of new geometry object is 0, means
8575 this object is invalid geometry object, we need
8576 to block it. */
8577 if (DATA_GEOMETRY_MTYPE(col_type)
8578 && o_len != 0 && n_len == 0) {
8579 return(DB_CANT_CREATE_GEOMETRY_OBJECT);
8580 }
8581
8582 if (n_len != UNIV_SQL_NULL) {
8583 dict_col_copy_type(
8584 col, dfield_get_type(&dfield));
8585
8586 buf = row_mysql_store_col_in_innobase_format(
8587 &dfield,
8588 (byte*) buf,
8589 TRUE,
8590 new_mysql_row_col,
8591 col_pack_len,
8592 dict_table_is_comp(prebuilt->table));
8593 dfield_copy(&ufield->new_val, &dfield);
8594 } else {
8595 dict_col_copy_type(
8596 col, dfield_get_type(&ufield->new_val));
8597 dfield_set_null(&ufield->new_val);
8598 }
8599
8600 ufield->exp = NULL;
8601 ufield->orig_len = 0;
8602 if (is_virtual) {
8603 dfield_t* vfield = dtuple_get_nth_v_field(
8604 uvect->old_vrow, num_v);
8605 upd_fld_set_virtual_col(ufield);
8606 ufield->field_no = num_v;
8607
8608 ut_ad(col->ord_part || online_ord_part);
8609 ufield->old_v_val = static_cast<dfield_t*>(
8610 mem_heap_alloc(
8611 uvect->heap,
8612 sizeof *ufield->old_v_val));
8613
8614 if (!field->is_null_in_record(old_row)) {
8615 if (n_len == UNIV_SQL_NULL) {
8616 dict_col_copy_type(
8617 col, dfield_get_type(
8618 &dfield));
8619 }
8620
8621 buf = row_mysql_store_col_in_innobase_format(
8622 &dfield,
8623 (byte*) buf,
8624 TRUE,
8625 old_mysql_row_col,
8626 col_pack_len,
8627 dict_table_is_comp(
8628 prebuilt->table));
8629 dfield_copy(ufield->old_v_val,
8630 &dfield);
8631 dfield_copy(vfield, &dfield);
8632 } else {
8633 dict_col_copy_type(
8634 col, dfield_get_type(
8635 ufield->old_v_val));
8636 dfield_set_null(ufield->old_v_val);
8637 dfield_set_null(vfield);
8638 }
8639 num_v++;
8640 ut_ad(field != table->found_next_number_field);
8641 } else {
8642 ufield->field_no = dict_col_get_clust_pos(
8643 &prebuilt->table->cols[i - num_v],
8644 clust_index);
8645 ufield->old_v_val = NULL;
8646 if (field != table->found_next_number_field
8647 || dfield_is_null(&ufield->new_val)) {
8648 } else {
8649 auto_inc = field->val_uint();
8650 }
8651 }
8652 n_changed++;
8653
8654 /* If an FTS indexed column was changed by this
8655 UPDATE then we need to inform the FTS sub-system.
8656
8657 NOTE: Currently we re-index all FTS indexed columns
8658 even if only a subset of the FTS indexed columns
8659 have been updated. That is the reason we are
8660 checking only once here. Later we will need to
8661 note which columns have been updated and do
8662 selective processing. */
8663 if (prebuilt->table->fts != NULL && !is_virtual) {
8664 ulint offset;
8665 dict_table_t* innodb_table;
8666
8667 innodb_table = prebuilt->table;
8668
8669 if (!changes_fts_column) {
8670 offset = row_upd_changes_fts_column(
8671 innodb_table, ufield);
8672
8673 if (offset != ULINT_UNDEFINED) {
8674 changes_fts_column = TRUE;
8675 }
8676 }
8677
8678 if (!changes_fts_doc_col) {
8679 changes_fts_doc_col =
8680 row_upd_changes_doc_id(
8681 innodb_table, ufield);
8682 }
8683 }
8684 } else if (is_virtual) {
8685 dfield_t* vfield = dtuple_get_nth_v_field(
8686 uvect->old_vrow, num_v);
8687 buf = innodb_fill_old_vcol_val(
8688 prebuilt, vfield, o_len,
8689 col, old_mysql_row_col,
8690 col_pack_len, buf);
8691 ut_ad(col->ord_part || online_ord_part);
8692 num_v++;
8693 }
8694 }
8695
8696 /* If the update changes a column with an FTS index on it, we
8697 then add an update column node with a new document id to the
8698 other changes. We piggy back our changes on the normal UPDATE
8699 to reduce processing and IO overhead. */
8700 if (!prebuilt->table->fts) {
8701 trx->fts_next_doc_id = 0;
8702 } else if (changes_fts_column || changes_fts_doc_col) {
8703 dict_table_t* innodb_table = prebuilt->table;
8704
8705 ufield = uvect->fields + n_changed;
8706
8707 if (!DICT_TF2_FLAG_IS_SET(
8708 innodb_table, DICT_TF2_FTS_HAS_DOC_ID)) {
8709
8710 /* If Doc ID is managed by user, and if any
8711 FTS indexed column has been updated, its corresponding
8712 Doc ID must also be updated. Otherwise, return
8713 error */
8714 if (changes_fts_column && !changes_fts_doc_col) {
8715 ib::warn() << "A new Doc ID must be supplied"
8716 " while updating FTS indexed columns.";
8717 return(DB_FTS_INVALID_DOCID);
8718 }
8719
8720 /* Doc ID must monotonically increase */
8721 ut_ad(innodb_table->fts->cache);
8722 if (doc_id < prebuilt->table->fts->cache->next_doc_id) {
8723
8724 ib::warn() << "FTS Doc ID must be larger than "
8725 << innodb_table->fts->cache->next_doc_id
8726 - 1 << " for table "
8727 << innodb_table->name;
8728
8729 return(DB_FTS_INVALID_DOCID);
8730 }
8731
8732
8733 trx->fts_next_doc_id = doc_id;
8734 } else {
8735 /* If the Doc ID is a hidden column, it can't be
8736 changed by user */
8737 ut_ad(!changes_fts_doc_col);
8738
8739 /* Doc ID column is hidden, a new Doc ID will be
8740 generated by following fts_update_doc_id() call */
8741 trx->fts_next_doc_id = 0;
8742 }
8743
8744 fts_update_doc_id(
8745 innodb_table, ufield, &trx->fts_next_doc_id);
8746
8747 ++n_changed;
8748 } else {
8749 /* We have a Doc ID column, but none of FTS indexed
8750 columns are touched, nor the Doc ID column, so set
8751 fts_next_doc_id to UINT64_UNDEFINED, which means do not
8752 update the Doc ID column */
8753 trx->fts_next_doc_id = UINT64_UNDEFINED;
8754 }
8755
8756 uvect->n_fields = n_changed;
8757 uvect->info_bits = 0;
8758
8759 ut_a(buf <= (byte*) original_upd_buff + buff_len);
8760
8761 ut_ad(uvect->validate());
8762 return(DB_SUCCESS);
8763 }
8764
8765 #ifdef WITH_WSREP
8766 static
8767 int
wsrep_calc_row_hash(byte * digest,const uchar * row,TABLE * table,row_prebuilt_t * prebuilt)8768 wsrep_calc_row_hash(
8769 /*================*/
8770 byte* digest, /*!< in/out: md5 sum */
8771 const uchar* row, /*!< in: row in MySQL format */
8772 TABLE* table, /*!< in: table in MySQL data
8773 dictionary */
8774 row_prebuilt_t* prebuilt) /*!< in: InnoDB prebuilt struct */
8775 {
8776 ulint len;
8777 const byte* ptr;
8778
8779 void *ctx = alloca(my_md5_context_size());
8780 my_md5_init(ctx);
8781
8782 for (uint i = 0; i < table->s->fields; i++) {
8783 byte null_byte=0;
8784 byte true_byte=1;
8785 ulint col_type;
8786 ulint is_unsigned;
8787
8788 const Field* field = table->field[i];
8789 if (!field->stored_in_db()) {
8790 continue;
8791 }
8792
8793 ptr = (const byte*) row + get_field_offset(table, field);
8794 len = field->pack_length();
8795 col_type = get_innobase_type_from_mysql_type(&is_unsigned, field);
8796
8797 switch (col_type) {
8798
8799 case DATA_BLOB:
8800 ptr = row_mysql_read_blob_ref(&len, ptr, len);
8801
8802 break;
8803
8804 case DATA_VARCHAR:
8805 case DATA_BINARY:
8806 case DATA_VARMYSQL:
8807 if (field->type() == MYSQL_TYPE_VARCHAR) {
8808 /* This is a >= 5.0.3 type true VARCHAR where
8809 the real payload data length is stored in
8810 1 or 2 bytes */
8811
8812 ptr = row_mysql_read_true_varchar(
8813 &len, ptr,
8814 (ulint)
8815 (((Field_varstring*)field)->length_bytes));
8816
8817 }
8818
8819 break;
8820 default:
8821 ;
8822 }
8823 /*
8824 if (field->null_ptr &&
8825 field_in_record_is_null(table, field, (char*) row)) {
8826 */
8827
8828 if (field->is_null_in_record(row)) {
8829 my_md5_input(ctx, &null_byte, 1);
8830 } else {
8831 my_md5_input(ctx, &true_byte, 1);
8832 my_md5_input(ctx, ptr, len);
8833 }
8834 }
8835
8836 my_md5_result(ctx, digest);
8837
8838 return(0);
8839 }
8840 #endif /* WITH_WSREP */
8841
8842 /**
8843 Updates a row given as a parameter to a new value. Note that we are given
8844 whole rows, not just the fields which are updated: this incurs some
8845 overhead for CPU when we check which fields are actually updated.
8846 TODO: currently InnoDB does not prevent the 'Halloween problem':
8847 in a searched update a single row can get updated several times
8848 if its index columns are updated!
8849 @param[in] old_row Old row contents in MySQL format
8850 @param[out] new_row Updated row contents in MySQL format
8851 @return error number or 0 */
8852
8853 int
update_row(const uchar * old_row,const uchar * new_row)8854 ha_innobase::update_row(
8855 const uchar* old_row,
8856 const uchar* new_row)
8857 {
8858 int err;
8859
8860 dberr_t error;
8861 trx_t* trx = thd_to_trx(m_user_thd);
8862
8863 DBUG_ENTER("ha_innobase::update_row");
8864
8865 ut_a(m_prebuilt->trx == trx);
8866
8867 if (high_level_read_only) {
8868 ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
8869 DBUG_RETURN(HA_ERR_TABLE_READONLY);
8870 } else if (!trx_is_started(trx)) {
8871 trx->will_lock = true;
8872 }
8873
8874 if (m_upd_buf == NULL) {
8875 ut_ad(m_upd_buf_size == 0);
8876
8877 /* Create a buffer for packing the fields of a record. Why
8878 table->reclength did not work here? Obviously, because char
8879 fields when packed actually became 1 byte longer, when we also
8880 stored the string length as the first byte. */
8881
8882 m_upd_buf_size = table->s->reclength + table->s->max_key_length
8883 + MAX_REF_PARTS * 3;
8884
8885 m_upd_buf = reinterpret_cast<uchar*>(
8886 my_malloc(//PSI_INSTRUMENT_ME,
8887 m_upd_buf_size,
8888 MYF(MY_WME)));
8889
8890 if (m_upd_buf == NULL) {
8891 m_upd_buf_size = 0;
8892 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
8893 }
8894 }
8895
8896 upd_t* uvect = row_get_prebuilt_update_vector(m_prebuilt);
8897 ib_uint64_t autoinc;
8898
8899 /* Build an update vector from the modified fields in the rows
8900 (uses m_upd_buf of the handle) */
8901
8902 error = calc_row_difference(
8903 uvect, old_row, new_row, table, m_upd_buf, m_upd_buf_size,
8904 m_prebuilt, autoinc);
8905
8906 if (error != DB_SUCCESS) {
8907 goto func_exit;
8908 }
8909
8910 if (!uvect->n_fields) {
8911 /* This is the same as success, but instructs
8912 MySQL that the row is not really updated and it
8913 should not increase the count of updated rows.
8914 This is fix for http://bugs.mysql.com/29157 */
8915 if (m_prebuilt->versioned_write
8916 && thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE
8917 /* Multiple UPDATE of same rows in single transaction create
8918 historical rows only once. */
8919 && trx->id != table->vers_start_id()) {
8920 error = row_insert_for_mysql((byte*) old_row,
8921 m_prebuilt,
8922 ROW_INS_HISTORICAL);
8923 if (error != DB_SUCCESS) {
8924 goto func_exit;
8925 }
8926 innobase_srv_conc_exit_innodb(m_prebuilt);
8927 innobase_active_small();
8928 }
8929 DBUG_RETURN(HA_ERR_RECORD_IS_THE_SAME);
8930 } else {
8931 const bool vers_set_fields = m_prebuilt->versioned_write
8932 && m_prebuilt->upd_node->update->affects_versioned();
8933 const bool vers_ins_row = vers_set_fields
8934 && thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE;
8935
8936 /* This is not a delete */
8937 m_prebuilt->upd_node->is_delete =
8938 (vers_set_fields && !vers_ins_row) ||
8939 (thd_sql_command(m_user_thd) == SQLCOM_DELETE &&
8940 table->versioned(VERS_TIMESTAMP))
8941 ? VERSIONED_DELETE
8942 : NO_DELETE;
8943
8944 innobase_srv_conc_enter_innodb(m_prebuilt);
8945
8946 error = row_update_for_mysql(m_prebuilt);
8947
8948 if (error == DB_SUCCESS && vers_ins_row
8949 /* Multiple UPDATE of same rows in single transaction create
8950 historical rows only once. */
8951 && trx->id != table->vers_start_id()) {
8952 error = row_insert_for_mysql((byte*) old_row,
8953 m_prebuilt,
8954 ROW_INS_HISTORICAL);
8955 }
8956 }
8957
8958 if (error == DB_SUCCESS && autoinc) {
8959 /* A value for an AUTO_INCREMENT column
8960 was specified in the UPDATE statement. */
8961
8962 /* We need the upper limit of the col type to check for
8963 whether we update the table autoinc counter or not. */
8964 ulonglong col_max_value =
8965 table->found_next_number_field->get_max_int_value();
8966
8967 /* This should filter out the negative
8968 values set explicitly by the user. */
8969 if (autoinc <= col_max_value) {
8970 ulonglong offset;
8971 ulonglong increment;
8972
8973 offset = m_prebuilt->autoinc_offset;
8974 increment = m_prebuilt->autoinc_increment;
8975
8976 autoinc = innobase_next_autoinc(
8977 autoinc, 1, increment, offset,
8978 col_max_value);
8979
8980 error = innobase_set_max_autoinc(autoinc);
8981
8982 if (m_prebuilt->table->persistent_autoinc) {
8983 /* Update the PAGE_ROOT_AUTO_INC. Yes, we do
8984 this even if dict_table_t::autoinc already was
8985 greater than autoinc, because we cannot know
8986 if any INSERT actually used (and wrote to
8987 PAGE_ROOT_AUTO_INC) a value bigger than our
8988 autoinc. */
8989 btr_write_autoinc(dict_table_get_first_index(
8990 m_prebuilt->table),
8991 autoinc);
8992 }
8993 }
8994 }
8995
8996 innobase_srv_conc_exit_innodb(m_prebuilt);
8997
8998 func_exit:
8999 if (error == DB_FTS_INVALID_DOCID) {
9000 err = HA_FTS_INVALID_DOCID;
9001 my_error(HA_FTS_INVALID_DOCID, MYF(0));
9002 } else {
9003 err = convert_error_code_to_mysql(
9004 error, m_prebuilt->table->flags, m_user_thd);
9005 }
9006
9007 /* Tell InnoDB server that there might be work for
9008 utility threads: */
9009
9010 innobase_active_small();
9011
9012 #ifdef WITH_WSREP
9013 if (error == DB_SUCCESS && trx->is_wsrep()
9014 && wsrep_thd_is_local(m_user_thd)
9015 && !wsrep_thd_ignore_table(m_user_thd)) {
9016 DBUG_PRINT("wsrep", ("update row key"));
9017
9018 if (wsrep_append_keys(m_user_thd,
9019 wsrep_protocol_version >= 4
9020 ? WSREP_SERVICE_KEY_UPDATE
9021 : WSREP_SERVICE_KEY_EXCLUSIVE,
9022 old_row, new_row)){
9023 WSREP_DEBUG("WSREP: UPDATE_ROW_KEY FAILED");
9024 DBUG_PRINT("wsrep", ("row key failed"));
9025 DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
9026 }
9027 }
9028 #endif /* WITH_WSREP */
9029
9030 DBUG_RETURN(err);
9031 }
9032
9033 /**********************************************************************//**
9034 Deletes a row given as the parameter.
9035 @return error number or 0 */
9036
9037 int
delete_row(const uchar * record)9038 ha_innobase::delete_row(
9039 /*====================*/
9040 const uchar* record) /*!< in: a row in MySQL format */
9041 {
9042 dberr_t error;
9043 trx_t* trx = thd_to_trx(m_user_thd);
9044
9045 DBUG_ENTER("ha_innobase::delete_row");
9046
9047 ut_a(m_prebuilt->trx == trx);
9048
9049 if (high_level_read_only) {
9050 ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
9051 DBUG_RETURN(HA_ERR_TABLE_READONLY);
9052 } else if (!trx_is_started(trx)) {
9053 trx->will_lock = true;
9054 }
9055
9056 if (!m_prebuilt->upd_node) {
9057 row_get_prebuilt_update_vector(m_prebuilt);
9058 }
9059
9060 /* This is a delete */
9061 m_prebuilt->upd_node->is_delete = table->versioned_write(VERS_TRX_ID)
9062 && table->vers_end_field()->is_max()
9063 && trx->id != table->vers_start_id()
9064 ? VERSIONED_DELETE
9065 : PLAIN_DELETE;
9066
9067 innobase_srv_conc_enter_innodb(m_prebuilt);
9068
9069 error = row_update_for_mysql(m_prebuilt);
9070
9071 innobase_srv_conc_exit_innodb(m_prebuilt);
9072
9073 /* Tell the InnoDB server that there might be work for
9074 utility threads: */
9075
9076 innobase_active_small();
9077
9078 #ifdef WITH_WSREP
9079 if (error == DB_SUCCESS && trx->is_wsrep()
9080 && wsrep_thd_is_local(m_user_thd)
9081 && !wsrep_thd_ignore_table(m_user_thd)) {
9082 if (wsrep_append_keys(m_user_thd, WSREP_SERVICE_KEY_EXCLUSIVE,
9083 record,
9084 NULL)) {
9085 DBUG_PRINT("wsrep", ("delete fail"));
9086 DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
9087 }
9088 }
9089 #endif /* WITH_WSREP */
9090 DBUG_RETURN(convert_error_code_to_mysql(
9091 error, m_prebuilt->table->flags, m_user_thd));
9092 }
9093
9094 /** Delete all rows from the table.
9095 @return error number or 0 */
9096
9097 int
delete_all_rows()9098 ha_innobase::delete_all_rows()
9099 {
9100 DBUG_ENTER("ha_innobase::delete_all_rows");
9101 DBUG_RETURN(HA_ERR_WRONG_COMMAND);
9102 }
9103
9104 /**********************************************************************//**
9105 Removes a new lock set on a row, if it was not read optimistically. This can
9106 be called after a row has been read in the processing of an UPDATE or a DELETE
9107 query, if the option innodb_locks_unsafe_for_binlog is set. */
9108
9109 void
unlock_row(void)9110 ha_innobase::unlock_row(void)
9111 /*=========================*/
9112 {
9113 DBUG_ENTER("ha_innobase::unlock_row");
9114
9115 if (m_prebuilt->select_lock_type == LOCK_NONE) {
9116 DBUG_VOID_RETURN;
9117 }
9118
9119 ut_ad(trx_state_eq(m_prebuilt->trx, TRX_STATE_ACTIVE, true));
9120
9121 switch (m_prebuilt->row_read_type) {
9122 case ROW_READ_WITH_LOCKS:
9123 if (!srv_locks_unsafe_for_binlog
9124 && m_prebuilt->trx->isolation_level
9125 > TRX_ISO_READ_COMMITTED) {
9126 break;
9127 }
9128 /* fall through */
9129 case ROW_READ_TRY_SEMI_CONSISTENT:
9130 row_unlock_for_mysql(m_prebuilt, FALSE);
9131 break;
9132 case ROW_READ_DID_SEMI_CONSISTENT:
9133 m_prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
9134 break;
9135 }
9136
9137 DBUG_VOID_RETURN;
9138 }
9139
9140 /* See handler.h and row0mysql.h for docs on this function. */
9141
9142 bool
was_semi_consistent_read(void)9143 ha_innobase::was_semi_consistent_read(void)
9144 /*=======================================*/
9145 {
9146 return(m_prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT);
9147 }
9148
9149 /* See handler.h and row0mysql.h for docs on this function. */
9150
9151 void
try_semi_consistent_read(bool yes)9152 ha_innobase::try_semi_consistent_read(bool yes)
9153 /*===========================================*/
9154 {
9155 ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
9156
9157 /* Row read type is set to semi consistent read if this was
9158 requested by the MySQL and either innodb_locks_unsafe_for_binlog
9159 option is used or this session is using READ COMMITTED isolation
9160 level. */
9161
9162 if (yes
9163 && (srv_locks_unsafe_for_binlog
9164 || m_prebuilt->trx->isolation_level
9165 <= TRX_ISO_READ_COMMITTED)) {
9166
9167 m_prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
9168
9169 } else {
9170 m_prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
9171 }
9172 }
9173
9174 /******************************************************************//**
9175 Initializes a handle to use an index.
9176 @return 0 or error number */
9177
9178 int
index_init(uint keynr,bool)9179 ha_innobase::index_init(
9180 /*====================*/
9181 uint keynr, /*!< in: key (index) number */
9182 bool)
9183 {
9184 DBUG_ENTER("index_init");
9185
9186 DBUG_RETURN(change_active_index(keynr));
9187 }
9188
9189 /******************************************************************//**
9190 Currently does nothing.
9191 @return 0 */
9192
9193 int
index_end(void)9194 ha_innobase::index_end(void)
9195 /*========================*/
9196 {
9197 DBUG_ENTER("index_end");
9198
9199 active_index = MAX_KEY;
9200
9201 in_range_check_pushed_down = FALSE;
9202
9203 m_ds_mrr.dsmrr_close();
9204
9205 DBUG_RETURN(0);
9206 }
9207
9208 /*********************************************************************//**
9209 Converts a search mode flag understood by MySQL to a flag understood
9210 by InnoDB. */
9211 page_cur_mode_t
convert_search_mode_to_innobase(ha_rkey_function find_flag)9212 convert_search_mode_to_innobase(
9213 /*============================*/
9214 ha_rkey_function find_flag)
9215 {
9216 switch (find_flag) {
9217 case HA_READ_KEY_EXACT:
9218 /* this does not require the index to be UNIQUE */
9219 case HA_READ_KEY_OR_NEXT:
9220 return(PAGE_CUR_GE);
9221 case HA_READ_AFTER_KEY:
9222 return(PAGE_CUR_G);
9223 case HA_READ_BEFORE_KEY:
9224 return(PAGE_CUR_L);
9225 case HA_READ_KEY_OR_PREV:
9226 case HA_READ_PREFIX_LAST:
9227 case HA_READ_PREFIX_LAST_OR_PREV:
9228 return(PAGE_CUR_LE);
9229 case HA_READ_MBR_CONTAIN:
9230 return(PAGE_CUR_CONTAIN);
9231 case HA_READ_MBR_INTERSECT:
9232 return(PAGE_CUR_INTERSECT);
9233 case HA_READ_MBR_WITHIN:
9234 return(PAGE_CUR_WITHIN);
9235 case HA_READ_MBR_DISJOINT:
9236 return(PAGE_CUR_DISJOINT);
9237 case HA_READ_MBR_EQUAL:
9238 return(PAGE_CUR_MBR_EQUAL);
9239 case HA_READ_PREFIX:
9240 return(PAGE_CUR_UNSUPP);
9241 /* do not use "default:" in order to produce a gcc warning:
9242 enumeration value '...' not handled in switch
9243 (if -Wswitch or -Wall is used) */
9244 }
9245
9246 my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "this functionality");
9247
9248 return(PAGE_CUR_UNSUPP);
9249 }
9250
9251 /*
9252 BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
9253 ---------------------------------------------------
9254 The following does not cover all the details, but explains how we determine
9255 the start of a new SQL statement, and what is associated with it.
9256
9257 For each table in the database the MySQL interpreter may have several
9258 table handle instances in use, also in a single SQL query. For each table
9259 handle instance there is an InnoDB 'm_prebuilt' struct which contains most
9260 of the InnoDB data associated with this table handle instance.
9261
9262 A) if the user has not explicitly set any MySQL table level locks:
9263
9264 1) MySQL calls ::external_lock to set an 'intention' table level lock on
9265 the table of the handle instance. There we set
9266 m_prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
9267 true if we are taking this table handle instance to use in a new SQL
9268 statement issued by the user. We also increment trx->n_mysql_tables_in_use.
9269
9270 2) If m_prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
9271 instructions to m_prebuilt->template of the table handle instance in
9272 ::index_read. The template is used to save CPU time in large joins.
9273
9274 3) In row_search_for_mysql, if m_prebuilt->sql_stat_start is true, we
9275 allocate a new consistent read view for the trx if it does not yet have one,
9276 or in the case of a locking read, set an InnoDB 'intention' table level
9277 lock on the table.
9278
9279 4) We do the SELECT. MySQL may repeatedly call ::index_read for the
9280 same table handle instance, if it is a join.
9281
9282 5) When the SELECT ends, MySQL removes its intention table level locks
9283 in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
9284 (a) we execute a COMMIT there if the autocommit is on,
9285 (b) we also release possible 'SQL statement level resources' InnoDB may
9286 have for this SQL statement. The MySQL interpreter does NOT execute
9287 autocommit for pure read transactions, though it should. That is why the
9288 table handler in that case has to execute the COMMIT in ::external_lock.
9289
9290 B) If the user has explicitly set MySQL table level locks, then MySQL
9291 does NOT call ::external_lock at the start of the statement. To determine
9292 when we are at the start of a new SQL statement we at the start of
9293 ::index_read also compare the query id to the latest query id where the
9294 table handle instance was used. If it has changed, we know we are at the
9295 start of a new SQL statement. Since the query id can theoretically
9296 overwrap, we use this test only as a secondary way of determining the
9297 start of a new SQL statement. */
9298
9299
9300 /**********************************************************************//**
9301 Positions an index cursor to the index specified in the handle. Fetches the
9302 row if any.
9303 @return 0, HA_ERR_KEY_NOT_FOUND, or error number */
9304
9305 int
index_read(uchar * buf,const uchar * key_ptr,uint key_len,enum ha_rkey_function find_flag)9306 ha_innobase::index_read(
9307 /*====================*/
9308 uchar* buf, /*!< in/out: buffer for the returned
9309 row */
9310 const uchar* key_ptr, /*!< in: key value; if this is NULL
9311 we position the cursor at the
9312 start or end of index; this can
9313 also contain an InnoDB row id, in
9314 which case key_len is the InnoDB
9315 row id length; the key value can
9316 also be a prefix of a full key value,
9317 and the last column can be a prefix
9318 of a full column */
9319 uint key_len,/*!< in: key value length */
9320 enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
9321 {
9322 DBUG_ENTER("index_read");
9323 DEBUG_SYNC_C("ha_innobase_index_read_begin");
9324
9325 ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd));
9326 ut_ad(key_len != 0 || find_flag != HA_READ_KEY_EXACT);
9327
9328 dict_index_t* index = m_prebuilt->index;
9329
9330 if (index == NULL || index->is_corrupted()) {
9331 m_prebuilt->index_usable = FALSE;
9332 DBUG_RETURN(HA_ERR_CRASHED);
9333 }
9334
9335 if (!m_prebuilt->index_usable) {
9336 DBUG_RETURN(index->is_corrupted()
9337 ? HA_ERR_INDEX_CORRUPT
9338 : HA_ERR_TABLE_DEF_CHANGED);
9339 }
9340
9341 if (index->type & DICT_FTS) {
9342 DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
9343 }
9344
9345 /* For R-Tree index, we will always place the page lock to
9346 pages being searched */
9347 if (index->is_spatial() && !m_prebuilt->trx->will_lock) {
9348 if (trx_is_started(m_prebuilt->trx)) {
9349 DBUG_RETURN(HA_ERR_READ_ONLY_TRANSACTION);
9350 } else {
9351 m_prebuilt->trx->will_lock = true;
9352 }
9353 }
9354
9355 /* Note that if the index for which the search template is built is not
9356 necessarily m_prebuilt->index, but can also be the clustered index */
9357
9358 if (m_prebuilt->sql_stat_start) {
9359 build_template(false);
9360 }
9361
9362 if (key_ptr != NULL) {
9363 /* Convert the search key value to InnoDB format into
9364 m_prebuilt->search_tuple */
9365
9366 row_sel_convert_mysql_key_to_innobase(
9367 m_prebuilt->search_tuple,
9368 m_prebuilt->srch_key_val1,
9369 m_prebuilt->srch_key_val_len,
9370 index,
9371 (byte*) key_ptr,
9372 (ulint) key_len);
9373
9374 DBUG_ASSERT(m_prebuilt->search_tuple->n_fields > 0);
9375 } else {
9376 /* We position the cursor to the last or the first entry
9377 in the index */
9378
9379 dtuple_set_n_fields(m_prebuilt->search_tuple, 0);
9380 }
9381
9382 page_cur_mode_t mode = convert_search_mode_to_innobase(find_flag);
9383
9384 ulint match_mode = 0;
9385
9386 if (find_flag == HA_READ_KEY_EXACT) {
9387
9388 match_mode = ROW_SEL_EXACT;
9389
9390 } else if (find_flag == HA_READ_PREFIX_LAST) {
9391
9392 match_mode = ROW_SEL_EXACT_PREFIX;
9393 }
9394
9395 m_last_match_mode = (uint) match_mode;
9396
9397 dberr_t ret;
9398
9399 if (mode != PAGE_CUR_UNSUPP) {
9400
9401 innobase_srv_conc_enter_innodb(m_prebuilt);
9402
9403 ret = row_search_mvcc(
9404 buf, mode, m_prebuilt, match_mode, 0);
9405
9406 innobase_srv_conc_exit_innodb(m_prebuilt);
9407 } else {
9408
9409 ret = DB_UNSUPPORTED;
9410 }
9411
9412 DBUG_EXECUTE_IF("ib_select_query_failure", ret = DB_ERROR;);
9413
9414 int error;
9415
9416 switch (ret) {
9417 case DB_SUCCESS:
9418 error = 0;
9419 table->status = 0;
9420 if (m_prebuilt->table->is_system_db) {
9421 srv_stats.n_system_rows_read.add(
9422 thd_get_thread_id(m_prebuilt->trx->mysql_thd), 1);
9423 } else {
9424 srv_stats.n_rows_read.add(
9425 thd_get_thread_id(m_prebuilt->trx->mysql_thd), 1);
9426 }
9427 break;
9428
9429 case DB_RECORD_NOT_FOUND:
9430 error = HA_ERR_KEY_NOT_FOUND;
9431 table->status = STATUS_NOT_FOUND;
9432 break;
9433
9434 case DB_END_OF_INDEX:
9435 error = HA_ERR_KEY_NOT_FOUND;
9436 table->status = STATUS_NOT_FOUND;
9437 break;
9438
9439 case DB_TABLESPACE_DELETED:
9440 ib_senderrf(
9441 m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9442 ER_TABLESPACE_DISCARDED,
9443 table->s->table_name.str);
9444
9445 table->status = STATUS_NOT_FOUND;
9446 error = HA_ERR_TABLESPACE_MISSING;
9447 break;
9448
9449 case DB_TABLESPACE_NOT_FOUND:
9450
9451 ib_senderrf(
9452 m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9453 ER_TABLESPACE_MISSING,
9454 table->s->table_name.str);
9455
9456 table->status = STATUS_NOT_FOUND;
9457 error = HA_ERR_TABLESPACE_MISSING;
9458 break;
9459
9460 default:
9461 error = convert_error_code_to_mysql(
9462 ret, m_prebuilt->table->flags, m_user_thd);
9463
9464 table->status = STATUS_NOT_FOUND;
9465 break;
9466 }
9467
9468 DBUG_RETURN(error);
9469 }
9470
9471 /*******************************************************************//**
9472 The following functions works like index_read, but it find the last
9473 row with the current key value or prefix.
9474 @return 0, HA_ERR_KEY_NOT_FOUND, or an error code */
9475
9476 int
index_read_last(uchar * buf,const uchar * key_ptr,uint key_len)9477 ha_innobase::index_read_last(
9478 /*=========================*/
9479 uchar* buf, /*!< out: fetched row */
9480 const uchar* key_ptr,/*!< in: key value, or a prefix of a full
9481 key value */
9482 uint key_len)/*!< in: length of the key val or prefix
9483 in bytes */
9484 {
9485 return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
9486 }
9487
9488 /********************************************************************//**
9489 Get the index for a handle. Does not change active index.
9490 @return NULL or index instance. */
9491
9492 dict_index_t*
innobase_get_index(uint keynr)9493 ha_innobase::innobase_get_index(
9494 /*============================*/
9495 uint keynr) /*!< in: use this index; MAX_KEY means always
9496 clustered index, even if it was internally
9497 generated by InnoDB */
9498 {
9499 KEY* key = NULL;
9500 dict_table_t* ib_table = m_prebuilt->table;
9501 dict_index_t* index;
9502
9503 DBUG_ENTER("innobase_get_index");
9504
9505 if (keynr != MAX_KEY && table->s->keys > 0) {
9506 key = &table->key_info[keynr];
9507 index = dict_table_get_index_on_name(ib_table, key->name.str);
9508 } else {
9509 index = dict_table_get_first_index(ib_table);
9510 }
9511
9512 if (index == NULL) {
9513 sql_print_error(
9514 "InnoDB could not find key no %u with name %s"
9515 " from dict cache for table %s",
9516 keynr, key ? key->name.str : "NULL",
9517 ib_table->name.m_name);
9518 }
9519
9520 DBUG_RETURN(index);
9521 }
9522
9523 /********************************************************************//**
9524 Changes the active index of a handle.
9525 @return 0 or error code */
9526
9527 int
change_active_index(uint keynr)9528 ha_innobase::change_active_index(
9529 /*=============================*/
9530 uint keynr) /*!< in: use this index; MAX_KEY means always clustered
9531 index, even if it was internally generated by
9532 InnoDB */
9533 {
9534 DBUG_ENTER("change_active_index");
9535
9536 ut_ad(m_user_thd == ha_thd());
9537 ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd));
9538
9539 active_index = keynr;
9540
9541 m_prebuilt->index = innobase_get_index(keynr);
9542
9543 if (m_prebuilt->index == NULL) {
9544 sql_print_warning("InnoDB: change_active_index(%u) failed",
9545 keynr);
9546 m_prebuilt->index_usable = FALSE;
9547 DBUG_RETURN(1);
9548 }
9549
9550 m_prebuilt->index_usable = row_merge_is_index_usable(
9551 m_prebuilt->trx, m_prebuilt->index);
9552
9553 if (!m_prebuilt->index_usable) {
9554 if (m_prebuilt->index->is_corrupted()) {
9555 char table_name[MAX_FULL_NAME_LEN + 1];
9556
9557 innobase_format_name(
9558 table_name, sizeof table_name,
9559 m_prebuilt->index->table->name.m_name);
9560
9561 if (m_prebuilt->index->is_primary()) {
9562 ut_ad(m_prebuilt->index->table->corrupted);
9563 push_warning_printf(
9564 m_user_thd, Sql_condition::WARN_LEVEL_WARN,
9565 ER_TABLE_CORRUPT,
9566 "InnoDB: Table %s is corrupted.",
9567 table_name);
9568 DBUG_RETURN(ER_TABLE_CORRUPT);
9569 } else {
9570 push_warning_printf(
9571 m_user_thd, Sql_condition::WARN_LEVEL_WARN,
9572 HA_ERR_INDEX_CORRUPT,
9573 "InnoDB: Index %s for table %s is"
9574 " marked as corrupted",
9575 m_prebuilt->index->name(),
9576 table_name);
9577 DBUG_RETURN(HA_ERR_INDEX_CORRUPT);
9578 }
9579 } else {
9580 push_warning_printf(
9581 m_user_thd, Sql_condition::WARN_LEVEL_WARN,
9582 HA_ERR_TABLE_DEF_CHANGED,
9583 "InnoDB: insufficient history for index %u",
9584 keynr);
9585 }
9586
9587 /* The caller seems to ignore this. Thus, we must check
9588 this again in row_search_for_mysql(). */
9589 DBUG_RETURN(convert_error_code_to_mysql(DB_MISSING_HISTORY,
9590 0, NULL));
9591 }
9592
9593 ut_a(m_prebuilt->search_tuple != 0);
9594
9595 /* Initialization of search_tuple is not needed for FT index
9596 since FT search returns rank only. In addition engine should
9597 be able to retrieve FTS_DOC_ID column value if necessary. */
9598 if (m_prebuilt->index->type & DICT_FTS) {
9599 for (uint i = 0; i < table->s->fields; i++) {
9600 if (m_prebuilt->read_just_key
9601 && bitmap_is_set(table->read_set, i)
9602 && !strcmp(table->s->field[i]->field_name.str,
9603 FTS_DOC_ID_COL_NAME)) {
9604 m_prebuilt->fts_doc_id_in_read_set = true;
9605 break;
9606 }
9607 }
9608 } else {
9609 ulint n_fields = dict_index_get_n_unique_in_tree(
9610 m_prebuilt->index);
9611
9612 dtuple_set_n_fields(m_prebuilt->search_tuple, n_fields);
9613
9614 dict_index_copy_types(
9615 m_prebuilt->search_tuple, m_prebuilt->index,
9616 n_fields);
9617
9618 /* If it's FTS query and FTS_DOC_ID exists FTS_DOC_ID field is
9619 always added to read_set. */
9620 m_prebuilt->fts_doc_id_in_read_set = m_prebuilt->in_fts_query
9621 && m_prebuilt->read_just_key
9622 && m_prebuilt->index->contains_col_or_prefix(
9623 m_prebuilt->table->fts->doc_col, false);
9624 }
9625
9626 /* MySQL changes the active index for a handle also during some
9627 queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
9628 and then calculates the sum. Previously we played safe and used
9629 the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
9630 copying. Starting from MySQL-4.1 we use a more efficient flag here. */
9631
9632 build_template(false);
9633
9634 DBUG_RETURN(0);
9635 }
9636
9637 /***********************************************************************//**
9638 Reads the next or previous row from a cursor, which must have previously been
9639 positioned using index_read.
9640 @return 0, HA_ERR_END_OF_FILE, or error number */
9641
9642 int
general_fetch(uchar * buf,uint direction,uint match_mode)9643 ha_innobase::general_fetch(
9644 /*=======================*/
9645 uchar* buf, /*!< in/out: buffer for next row in MySQL
9646 format */
9647 uint direction, /*!< in: ROW_SEL_NEXT or ROW_SEL_PREV */
9648 uint match_mode) /*!< in: 0, ROW_SEL_EXACT, or
9649 ROW_SEL_EXACT_PREFIX */
9650 {
9651 DBUG_ENTER("general_fetch");
9652
9653 const trx_t* trx = m_prebuilt->trx;
9654
9655 ut_ad(trx == thd_to_trx(m_user_thd));
9656
9657 if (m_prebuilt->table->is_readable()) {
9658 } else if (m_prebuilt->table->corrupted) {
9659 DBUG_RETURN(HA_ERR_CRASHED);
9660 } else {
9661 DBUG_RETURN(m_prebuilt->table->space
9662 ? HA_ERR_DECRYPTION_FAILED
9663 : HA_ERR_NO_SUCH_TABLE);
9664 }
9665
9666 innobase_srv_conc_enter_innodb(m_prebuilt);
9667
9668 dberr_t ret = row_search_mvcc(
9669 buf, PAGE_CUR_UNSUPP, m_prebuilt, match_mode, direction);
9670
9671 innobase_srv_conc_exit_innodb(m_prebuilt);
9672
9673 int error;
9674
9675 switch (ret) {
9676 case DB_SUCCESS:
9677 error = 0;
9678 table->status = 0;
9679 if (m_prebuilt->table->is_system_db) {
9680 srv_stats.n_system_rows_read.add(
9681 thd_get_thread_id(trx->mysql_thd), 1);
9682 } else {
9683 srv_stats.n_rows_read.add(
9684 thd_get_thread_id(trx->mysql_thd), 1);
9685 }
9686 break;
9687 case DB_RECORD_NOT_FOUND:
9688 error = HA_ERR_END_OF_FILE;
9689 table->status = STATUS_NOT_FOUND;
9690 break;
9691 case DB_END_OF_INDEX:
9692 error = HA_ERR_END_OF_FILE;
9693 table->status = STATUS_NOT_FOUND;
9694 break;
9695 case DB_TABLESPACE_DELETED:
9696 ib_senderrf(
9697 trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9698 ER_TABLESPACE_DISCARDED,
9699 table->s->table_name.str);
9700
9701 table->status = STATUS_NOT_FOUND;
9702 error = HA_ERR_TABLESPACE_MISSING;
9703 break;
9704 case DB_TABLESPACE_NOT_FOUND:
9705
9706 ib_senderrf(
9707 trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9708 ER_TABLESPACE_MISSING,
9709 table->s->table_name.str);
9710
9711 table->status = STATUS_NOT_FOUND;
9712 error = HA_ERR_TABLESPACE_MISSING;
9713 break;
9714 default:
9715 error = convert_error_code_to_mysql(
9716 ret, m_prebuilt->table->flags, m_user_thd);
9717
9718 table->status = STATUS_NOT_FOUND;
9719 break;
9720 }
9721
9722 DBUG_RETURN(error);
9723 }
9724
9725 /***********************************************************************//**
9726 Reads the next row from a cursor, which must have previously been
9727 positioned using index_read.
9728 @return 0, HA_ERR_END_OF_FILE, or error number */
9729
9730 int
index_next(uchar * buf)9731 ha_innobase::index_next(
9732 /*====================*/
9733 uchar* buf) /*!< in/out: buffer for next row in MySQL
9734 format */
9735 {
9736 return(general_fetch(buf, ROW_SEL_NEXT, 0));
9737 }
9738
9739 /*******************************************************************//**
9740 Reads the next row matching to the key value given as the parameter.
9741 @return 0, HA_ERR_END_OF_FILE, or error number */
9742
9743 int
index_next_same(uchar * buf,const uchar *,uint)9744 ha_innobase::index_next_same(
9745 /*=========================*/
9746 uchar* buf, /*!< in/out: buffer for the row */
9747 const uchar*, uint)
9748 {
9749 return(general_fetch(buf, ROW_SEL_NEXT, m_last_match_mode));
9750 }
9751
9752 /***********************************************************************//**
9753 Reads the previous row from a cursor, which must have previously been
9754 positioned using index_read.
9755 @return 0, HA_ERR_END_OF_FILE, or error number */
9756
9757 int
index_prev(uchar * buf)9758 ha_innobase::index_prev(
9759 /*====================*/
9760 uchar* buf) /*!< in/out: buffer for previous row in MySQL format */
9761 {
9762 return(general_fetch(buf, ROW_SEL_PREV, 0));
9763 }
9764
9765 /********************************************************************//**
9766 Positions a cursor on the first record in an index and reads the
9767 corresponding row to buf.
9768 @return 0, HA_ERR_END_OF_FILE, or error code */
9769
9770 int
index_first(uchar * buf)9771 ha_innobase::index_first(
9772 /*=====================*/
9773 uchar* buf) /*!< in/out: buffer for the row */
9774 {
9775 DBUG_ENTER("index_first");
9776
9777 int error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
9778
9779 /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
9780
9781 if (error == HA_ERR_KEY_NOT_FOUND) {
9782 error = HA_ERR_END_OF_FILE;
9783 }
9784
9785 DBUG_RETURN(error);
9786 }
9787
9788 /********************************************************************//**
9789 Positions a cursor on the last record in an index and reads the
9790 corresponding row to buf.
9791 @return 0, HA_ERR_END_OF_FILE, or error code */
9792
9793 int
index_last(uchar * buf)9794 ha_innobase::index_last(
9795 /*====================*/
9796 uchar* buf) /*!< in/out: buffer for the row */
9797 {
9798 DBUG_ENTER("index_last");
9799
9800 int error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
9801
9802 /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
9803
9804 if (error == HA_ERR_KEY_NOT_FOUND) {
9805 error = HA_ERR_END_OF_FILE;
9806 }
9807
9808 DBUG_RETURN(error);
9809 }
9810
9811 /****************************************************************//**
9812 Initialize a table scan.
9813 @return 0 or error number */
9814
9815 int
rnd_init(bool scan)9816 ha_innobase::rnd_init(
9817 /*==================*/
9818 bool scan) /*!< in: true if table/index scan FALSE otherwise */
9819 {
9820 int err;
9821
9822 /* Store the active index value so that we can restore the original
9823 value after a scan */
9824
9825 if (m_prebuilt->clust_index_was_generated) {
9826 err = change_active_index(MAX_KEY);
9827 } else {
9828 err = change_active_index(m_primary_key);
9829 }
9830
9831 /* Don't use semi-consistent read in random row reads (by position).
9832 This means we must disable semi_consistent_read if scan is false */
9833
9834 if (!scan) {
9835 try_semi_consistent_read(0);
9836 }
9837
9838 m_start_of_scan = true;
9839
9840 return(err);
9841 }
9842
9843 /*****************************************************************//**
9844 Ends a table scan.
9845 @return 0 or error number */
9846
9847 int
rnd_end(void)9848 ha_innobase::rnd_end(void)
9849 /*======================*/
9850 {
9851 return(index_end());
9852 }
9853
9854 /*****************************************************************//**
9855 Reads the next row in a table scan (also used to read the FIRST row
9856 in a table scan).
9857 @return 0, HA_ERR_END_OF_FILE, or error number */
9858
9859 int
rnd_next(uchar * buf)9860 ha_innobase::rnd_next(
9861 /*==================*/
9862 uchar* buf) /*!< in/out: returns the row in this buffer,
9863 in MySQL format */
9864 {
9865 int error;
9866
9867 DBUG_ENTER("rnd_next");
9868
9869 if (m_start_of_scan) {
9870 error = index_first(buf);
9871
9872 if (error == HA_ERR_KEY_NOT_FOUND) {
9873 error = HA_ERR_END_OF_FILE;
9874 }
9875
9876 m_start_of_scan = false;
9877 } else {
9878 error = general_fetch(buf, ROW_SEL_NEXT, 0);
9879 }
9880
9881 DBUG_RETURN(error);
9882 }
9883
9884 /**********************************************************************//**
9885 Fetches a row from the table based on a row reference.
9886 @return 0, HA_ERR_KEY_NOT_FOUND, or error code */
9887
9888 int
rnd_pos(uchar * buf,uchar * pos)9889 ha_innobase::rnd_pos(
9890 /*=================*/
9891 uchar* buf, /*!< in/out: buffer for the row */
9892 uchar* pos) /*!< in: primary key value of the row in the
9893 MySQL format, or the row id if the clustered
9894 index was internally generated by InnoDB; the
9895 length of data in pos has to be ref_length */
9896 {
9897 DBUG_ENTER("rnd_pos");
9898 DBUG_DUMP("key", pos, ref_length);
9899
9900 ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
9901
9902 /* Note that we assume the length of the row reference is fixed
9903 for the table, and it is == ref_length */
9904
9905 int error = index_read(buf, pos, (uint)ref_length, HA_READ_KEY_EXACT);
9906
9907 if (error != 0) {
9908 DBUG_PRINT("error", ("Got error: %d", error));
9909 }
9910
9911 DBUG_RETURN(error);
9912 }
9913
9914 /**********************************************************************//**
9915 Initialize FT index scan
9916 @return 0 or error number */
9917
9918 int
ft_init()9919 ha_innobase::ft_init()
9920 /*==================*/
9921 {
9922 DBUG_ENTER("ft_init");
9923
9924 trx_t* trx = check_trx_exists(ha_thd());
9925
9926 /* FTS queries are not treated as autocommit non-locking selects.
9927 This is because the FTS implementation can acquire locks behind
9928 the scenes. This has not been verified but it is safer to treat
9929 them as regular read only transactions for now. */
9930
9931 if (!trx_is_started(trx)) {
9932 trx->will_lock = true;
9933 }
9934
9935 DBUG_RETURN(rnd_init(false));
9936 }
9937
9938 /**********************************************************************//**
9939 Initialize FT index scan
9940 @return FT_INFO structure if successful or NULL */
9941
9942 FT_INFO*
ft_init_ext(uint flags,uint keynr,String * key)9943 ha_innobase::ft_init_ext(
9944 /*=====================*/
9945 uint flags, /* in: */
9946 uint keynr, /* in: */
9947 String* key) /* in: */
9948 {
9949 NEW_FT_INFO* fts_hdl = NULL;
9950 dict_index_t* index;
9951 fts_result_t* result;
9952 char buf_tmp[8192];
9953 ulint buf_tmp_used;
9954 uint num_errors;
9955 ulint query_len = key->length();
9956 const CHARSET_INFO* char_set = key->charset();
9957 const char* query = key->ptr();
9958
9959 if (UNIV_UNLIKELY(fts_enable_diag_print)) {
9960 {
9961 ib::info out;
9962 out << "keynr=" << keynr << ", '";
9963 out.write(key->ptr(), key->length());
9964 }
9965
9966 if (flags & FT_BOOL) {
9967 ib::info() << "BOOL search";
9968 } else {
9969 ib::info() << "NL search";
9970 }
9971 }
9972
9973 /* FIXME: utf32 and utf16 are not compatible with some
9974 string function used. So to convert them to uft8 before
9975 we proceed. */
9976 if (strcmp(char_set->csname, "utf32") == 0
9977 || strcmp(char_set->csname, "utf16") == 0) {
9978
9979 buf_tmp_used = innobase_convert_string(
9980 buf_tmp, sizeof(buf_tmp) - 1,
9981 &my_charset_utf8_general_ci,
9982 query, query_len, (CHARSET_INFO*) char_set,
9983 &num_errors);
9984
9985 buf_tmp[buf_tmp_used] = 0;
9986 query = buf_tmp;
9987 query_len = buf_tmp_used;
9988 }
9989
9990 trx_t* trx = m_prebuilt->trx;
9991
9992 /* FTS queries are not treated as autocommit non-locking selects.
9993 This is because the FTS implementation can acquire locks behind
9994 the scenes. This has not been verified but it is safer to treat
9995 them as regular read only transactions for now. */
9996
9997 if (!trx_is_started(trx)) {
9998 trx->will_lock = true;
9999 }
10000
10001 dict_table_t* ft_table = m_prebuilt->table;
10002
10003 /* Table does not have an FTS index */
10004 if (!ft_table->fts || ib_vector_is_empty(ft_table->fts->indexes)) {
10005 my_error(ER_TABLE_HAS_NO_FT, MYF(0));
10006 return(NULL);
10007 }
10008
10009 /* If tablespace is discarded, we should return here */
10010 if (!ft_table->space) {
10011 my_error(ER_TABLESPACE_MISSING, MYF(0), table->s->db.str,
10012 table->s->table_name.str);
10013 return(NULL);
10014 }
10015
10016 if (keynr == NO_SUCH_KEY) {
10017 /* FIXME: Investigate the NO_SUCH_KEY usage */
10018 index = reinterpret_cast<dict_index_t*>
10019 (ib_vector_getp(ft_table->fts->indexes, 0));
10020 } else {
10021 index = innobase_get_index(keynr);
10022 }
10023
10024 if (index == NULL || index->type != DICT_FTS) {
10025 my_error(ER_TABLE_HAS_NO_FT, MYF(0));
10026 return(NULL);
10027 }
10028
10029 if (!(ft_table->fts->added_synced)) {
10030 fts_init_index(ft_table, FALSE);
10031
10032 ft_table->fts->added_synced = true;
10033 }
10034
10035 const byte* q = reinterpret_cast<const byte*>(
10036 const_cast<char*>(query));
10037
10038 // FIXME: support ft_init_ext_with_hints(), pass LIMIT
10039 dberr_t error = fts_query(trx, index, flags, q, query_len, &result);
10040
10041 if (error != DB_SUCCESS) {
10042 my_error(convert_error_code_to_mysql(error, 0, NULL), MYF(0));
10043 return(NULL);
10044 }
10045
10046 /* Allocate FTS handler, and instantiate it before return */
10047 fts_hdl = reinterpret_cast<NEW_FT_INFO*>(
10048 my_malloc(/*PSI_INSTRUMENT_ME,*/ sizeof(NEW_FT_INFO), MYF(0)));
10049
10050 fts_hdl->please = const_cast<_ft_vft*>(&ft_vft_result);
10051 fts_hdl->could_you = const_cast<_ft_vft_ext*>(&ft_vft_ext_result);
10052 fts_hdl->ft_prebuilt = m_prebuilt;
10053 fts_hdl->ft_result = result;
10054
10055 /* FIXME: Re-evaluate the condition when Bug 14469540 is resolved */
10056 m_prebuilt->in_fts_query = true;
10057
10058 return(reinterpret_cast<FT_INFO*>(fts_hdl));
10059 }
10060
10061 /*****************************************************************//**
10062 Set up search tuple for a query through FTS_DOC_ID_INDEX on
10063 supplied Doc ID. This is used by MySQL to retrieve the documents
10064 once the search result (Doc IDs) is available */
10065 static
10066 void
innobase_fts_create_doc_id_key(dtuple_t * tuple,const dict_index_t * index,doc_id_t * doc_id)10067 innobase_fts_create_doc_id_key(
10068 /*===========================*/
10069 dtuple_t* tuple, /* in/out: m_prebuilt->search_tuple */
10070 const dict_index_t*
10071 index, /* in: index (FTS_DOC_ID_INDEX) */
10072 doc_id_t* doc_id) /* in/out: doc id to search, value
10073 could be changed to storage format
10074 used for search. */
10075 {
10076 doc_id_t temp_doc_id;
10077 dfield_t* dfield = dtuple_get_nth_field(tuple, 0);
10078
10079 ut_a(dict_index_get_n_unique(index) == 1);
10080
10081 dtuple_set_n_fields(tuple, index->n_fields);
10082 dict_index_copy_types(tuple, index, index->n_fields);
10083
10084 #ifdef UNIV_DEBUG
10085 /* The unique Doc ID field should be an eight-bytes integer */
10086 dict_field_t* field = dict_index_get_nth_field(index, 0);
10087 ut_a(field->col->mtype == DATA_INT);
10088 ut_ad(sizeof(*doc_id) == field->fixed_len);
10089 ut_ad(!strcmp(index->name, FTS_DOC_ID_INDEX_NAME));
10090 #endif /* UNIV_DEBUG */
10091
10092 /* Convert to storage byte order */
10093 mach_write_to_8(reinterpret_cast<byte*>(&temp_doc_id), *doc_id);
10094 *doc_id = temp_doc_id;
10095 dfield_set_data(dfield, doc_id, sizeof(*doc_id));
10096
10097 dtuple_set_n_fields_cmp(tuple, 1);
10098
10099 for (ulint i = 1; i < index->n_fields; i++) {
10100 dfield = dtuple_get_nth_field(tuple, i);
10101 dfield_set_null(dfield);
10102 }
10103 }
10104
10105 /**********************************************************************//**
10106 Fetch next result from the FT result set
10107 @return error code */
10108
10109 int
ft_read(uchar * buf)10110 ha_innobase::ft_read(
10111 /*=================*/
10112 uchar* buf) /*!< in/out: buf contain result row */
10113 {
10114 row_prebuilt_t* ft_prebuilt;
10115
10116 ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(ft_handler)->ft_prebuilt;
10117
10118 ut_a(ft_prebuilt == m_prebuilt);
10119
10120 fts_result_t* result;
10121
10122 result = reinterpret_cast<NEW_FT_INFO*>(ft_handler)->ft_result;
10123
10124 if (result->current == NULL) {
10125 /* This is the case where the FTS query did not
10126 contain and matching documents. */
10127 if (result->rankings_by_id != NULL) {
10128 /* Now that we have the complete result, we
10129 need to sort the document ids on their rank
10130 calculation. */
10131
10132 fts_query_sort_result_on_rank(result);
10133
10134 result->current = const_cast<ib_rbt_node_t*>(
10135 rbt_first(result->rankings_by_rank));
10136 } else {
10137 ut_a(result->current == NULL);
10138 }
10139 } else {
10140 result->current = const_cast<ib_rbt_node_t*>(
10141 rbt_next(result->rankings_by_rank, result->current));
10142 }
10143
10144 next_record:
10145
10146 if (result->current != NULL) {
10147 doc_id_t search_doc_id;
10148 dtuple_t* tuple = m_prebuilt->search_tuple;
10149
10150 /* If we only need information from result we can return
10151 without fetching the table row */
10152 if (ft_prebuilt->read_just_key) {
10153 #ifdef MYSQL_STORE_FTS_DOC_ID
10154 if (m_prebuilt->fts_doc_id_in_read_set) {
10155 fts_ranking_t* ranking;
10156 ranking = rbt_value(fts_ranking_t,
10157 result->current);
10158 innobase_fts_store_docid(
10159 table, ranking->doc_id);
10160 }
10161 #endif
10162 table->status= 0;
10163 return(0);
10164 }
10165
10166 dict_index_t* index;
10167
10168 index = m_prebuilt->table->fts_doc_id_index;
10169
10170 /* Must find the index */
10171 ut_a(index != NULL);
10172
10173 /* Switch to the FTS doc id index */
10174 m_prebuilt->index = index;
10175
10176 fts_ranking_t* ranking = rbt_value(
10177 fts_ranking_t, result->current);
10178
10179 search_doc_id = ranking->doc_id;
10180
10181 /* We pass a pointer of search_doc_id because it will be
10182 converted to storage byte order used in the search
10183 tuple. */
10184 innobase_fts_create_doc_id_key(tuple, index, &search_doc_id);
10185
10186 innobase_srv_conc_enter_innodb(m_prebuilt);
10187
10188 dberr_t ret = row_search_for_mysql(
10189 (byte*) buf, PAGE_CUR_GE, m_prebuilt, ROW_SEL_EXACT, 0);
10190
10191 innobase_srv_conc_exit_innodb(m_prebuilt);
10192
10193 int error;
10194
10195 switch (ret) {
10196 case DB_SUCCESS:
10197 error = 0;
10198 table->status = 0;
10199 break;
10200 case DB_RECORD_NOT_FOUND:
10201 result->current = const_cast<ib_rbt_node_t*>(
10202 rbt_next(result->rankings_by_rank,
10203 result->current));
10204
10205 if (!result->current) {
10206 /* exhaust the result set, should return
10207 HA_ERR_END_OF_FILE just like
10208 ha_innobase::general_fetch() and/or
10209 ha_innobase::index_first() etc. */
10210 error = HA_ERR_END_OF_FILE;
10211 table->status = STATUS_NOT_FOUND;
10212 } else {
10213 goto next_record;
10214 }
10215 break;
10216 case DB_END_OF_INDEX:
10217 error = HA_ERR_END_OF_FILE;
10218 table->status = STATUS_NOT_FOUND;
10219 break;
10220 case DB_TABLESPACE_DELETED:
10221
10222 ib_senderrf(
10223 m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
10224 ER_TABLESPACE_DISCARDED,
10225 table->s->table_name.str);
10226
10227 table->status = STATUS_NOT_FOUND;
10228 error = HA_ERR_TABLESPACE_MISSING;
10229 break;
10230 case DB_TABLESPACE_NOT_FOUND:
10231
10232 ib_senderrf(
10233 m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
10234 ER_TABLESPACE_MISSING,
10235 table->s->table_name.str);
10236
10237 table->status = STATUS_NOT_FOUND;
10238 error = HA_ERR_TABLESPACE_MISSING;
10239 break;
10240 default:
10241 error = convert_error_code_to_mysql(
10242 ret, 0, m_user_thd);
10243
10244 table->status = STATUS_NOT_FOUND;
10245 break;
10246 }
10247
10248 return(error);
10249 }
10250
10251 return(HA_ERR_END_OF_FILE);
10252 }
10253
10254 #ifdef WITH_WSREP
10255 inline
10256 const char*
wsrep_key_type_to_str(Wsrep_service_key_type type)10257 wsrep_key_type_to_str(Wsrep_service_key_type type)
10258 {
10259 switch (type) {
10260 case WSREP_SERVICE_KEY_SHARED:
10261 return "shared";
10262 case WSREP_SERVICE_KEY_REFERENCE:
10263 return "reference";
10264 case WSREP_SERVICE_KEY_UPDATE:
10265 return "update";
10266 case WSREP_SERVICE_KEY_EXCLUSIVE:
10267 return "exclusive";
10268 };
10269 return "unknown";
10270 }
10271
10272 extern dberr_t
wsrep_append_foreign_key(trx_t * trx,dict_foreign_t * foreign,const rec_t * rec,dict_index_t * index,bool referenced,upd_node_t * upd_node,bool pa_disable,Wsrep_service_key_type key_type)10273 wsrep_append_foreign_key(
10274 /*===========================*/
10275 trx_t* trx, /*!< in: trx */
10276 dict_foreign_t* foreign, /*!< in: foreign key constraint */
10277 const rec_t* rec, /*!<in: clustered index record */
10278 dict_index_t* index, /*!<in: clustered index */
10279 bool referenced, /*!<in: is check for
10280 referenced table */
10281 upd_node_t* upd_node, /*<!in: update node */
10282 bool pa_disable, /*<!in: disable parallel apply ?*/
10283 Wsrep_service_key_type key_type) /*!< in: access type of this key
10284 (shared, exclusive, reference...) */
10285 {
10286 ut_ad(trx->is_wsrep());
10287
10288 if (!wsrep_thd_is_local(trx->mysql_thd))
10289 return DB_SUCCESS;
10290
10291 if (upd_node && wsrep_protocol_version < 4) {
10292 key_type = WSREP_SERVICE_KEY_SHARED;
10293 }
10294
10295 THD* thd = trx->mysql_thd;
10296
10297 if (!foreign ||
10298 (!foreign->referenced_table && !foreign->foreign_table)) {
10299 WSREP_INFO("FK: %s missing in: %s",
10300 (!foreign ? "constraint" :
10301 (!foreign->referenced_table ?
10302 "referenced table" : "foreign table")),
10303 wsrep_thd_query(thd));
10304 return DB_ERROR;
10305 }
10306
10307 ulint rcode = DB_SUCCESS;
10308 char cache_key[513] = {'\0'};
10309 int cache_key_len=0;
10310
10311 if ( !((referenced) ?
10312 foreign->referenced_table : foreign->foreign_table)) {
10313 WSREP_DEBUG("pulling %s table into cache",
10314 (referenced) ? "referenced" : "foreign");
10315 mutex_enter(&dict_sys.mutex);
10316
10317 if (referenced) {
10318 foreign->referenced_table =
10319 dict_table_get_low(
10320 foreign->referenced_table_name_lookup);
10321 if (foreign->referenced_table) {
10322 foreign->referenced_index =
10323 dict_foreign_find_index(
10324 foreign->referenced_table, NULL,
10325 foreign->referenced_col_names,
10326 foreign->n_fields,
10327 foreign->foreign_index,
10328 TRUE, FALSE);
10329 }
10330 } else {
10331 foreign->foreign_table =
10332 dict_table_get_low(
10333 foreign->foreign_table_name_lookup);
10334
10335 if (foreign->foreign_table) {
10336 foreign->foreign_index =
10337 dict_foreign_find_index(
10338 foreign->foreign_table, NULL,
10339 foreign->foreign_col_names,
10340 foreign->n_fields,
10341 foreign->referenced_index,
10342 TRUE, FALSE);
10343 }
10344 }
10345 mutex_exit(&dict_sys.mutex);
10346 }
10347
10348 if ( !((referenced) ?
10349 foreign->referenced_table : foreign->foreign_table)) {
10350 WSREP_WARN("FK: %s missing in query: %s",
10351 (!foreign->referenced_table) ?
10352 "referenced table" : "foreign table",
10353 wsrep_thd_query(thd));
10354 return DB_ERROR;
10355 }
10356
10357 byte key[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
10358 ulint len = WSREP_MAX_SUPPORTED_KEY_LENGTH;
10359
10360 dict_index_t *idx_target = (referenced) ?
10361 foreign->referenced_index : index;
10362 dict_index_t *idx = (referenced) ?
10363 UT_LIST_GET_FIRST(foreign->referenced_table->indexes) :
10364 UT_LIST_GET_FIRST(foreign->foreign_table->indexes);
10365 int i = 0;
10366
10367 while (idx != NULL && idx != idx_target) {
10368 if (innobase_strcasecmp (idx->name, innobase_index_reserve_name) != 0) {
10369 i++;
10370 }
10371 idx = UT_LIST_GET_NEXT(indexes, idx);
10372 }
10373
10374 ut_a(idx);
10375 key[0] = byte(i);
10376
10377 rcode = wsrep_rec_get_foreign_key(
10378 &key[1], &len, rec, index, idx,
10379 wsrep_protocol_version > 1);
10380
10381 if (rcode != DB_SUCCESS) {
10382 WSREP_ERROR(
10383 "FK key set failed: " ULINTPF
10384 " (" ULINTPF "%s), index: %s %s, %s",
10385 rcode, referenced, wsrep_key_type_to_str(key_type),
10386 (index) ? index->name() : "void index",
10387 (index && index->table) ? index->table->name.m_name :
10388 "void table",
10389 wsrep_thd_query(thd));
10390 return DB_ERROR;
10391 }
10392
10393 strncpy(cache_key,
10394 (wsrep_protocol_version > 1) ?
10395 ((referenced) ?
10396 foreign->referenced_table->name.m_name :
10397 foreign->foreign_table->name.m_name) :
10398 foreign->foreign_table->name.m_name, sizeof(cache_key) - 1);
10399 cache_key_len = strlen(cache_key);
10400
10401 #ifdef WSREP_DEBUG_PRINT
10402 ulint j;
10403 fprintf(stderr, "FK parent key, table: %s %s len: %lu ",
10404 cache_key, wsrep_key_type_to_str(key_type), len+1);
10405 for (j=0; j<len+1; j++) {
10406 fprintf(stderr, " %hhX, ", key[j]);
10407 }
10408 fprintf(stderr, "\n");
10409 #endif
10410 char *p = strchr(cache_key, '/');
10411
10412 if (p) {
10413 *p = '\0';
10414 } else {
10415 WSREP_WARN("unexpected foreign key table %s %s",
10416 foreign->referenced_table->name.m_name,
10417 foreign->foreign_table->name.m_name);
10418 }
10419
10420 wsrep_buf_t wkey_part[3];
10421 wsrep_key_t wkey = {wkey_part, 3};
10422
10423 if (!wsrep_prepare_key_for_innodb(
10424 thd,
10425 (const uchar*)cache_key,
10426 cache_key_len + 1,
10427 (const uchar*)key, len+1,
10428 wkey_part,
10429 (size_t*)&wkey.key_parts_num)) {
10430 WSREP_WARN("key prepare failed for cascaded FK: %s",
10431 wsrep_thd_query(thd));
10432 return DB_ERROR;
10433 }
10434
10435 rcode = wsrep_thd_append_key(thd, &wkey, 1, key_type);
10436
10437 if (rcode) {
10438 WSREP_ERROR("Appending cascaded fk row key failed: %s, "
10439 ULINTPF,
10440 wsrep_thd_query(thd),
10441 rcode);
10442 return DB_ERROR;
10443 }
10444
10445 if (pa_disable) {
10446 wsrep_thd_set_PA_unsafe(trx->mysql_thd);
10447 }
10448
10449 return DB_SUCCESS;
10450 }
10451
10452 static int
wsrep_append_key(THD * thd,trx_t * trx,TABLE_SHARE * table_share,const char * key,uint16_t key_len,Wsrep_service_key_type key_type)10453 wsrep_append_key(
10454 /*=============*/
10455 THD *thd,
10456 trx_t *trx,
10457 TABLE_SHARE *table_share,
10458 const char* key,
10459 uint16_t key_len,
10460 Wsrep_service_key_type key_type /*!< in: access type of this key
10461 (shared, exclusive, semi...) */
10462 )
10463 {
10464 DBUG_ENTER("wsrep_append_key");
10465 DBUG_PRINT("enter",
10466 ("thd: %lu trx: %lld", thd_get_thread_id(thd),
10467 (long long)trx->id));
10468 #ifdef WSREP_DEBUG_PRINT
10469 fprintf(stderr, "%s conn %lu, trx " TRX_ID_FMT ", keylen %d, key %s.%s\n",
10470 wsrep_key_type_to_str(key_type),
10471 thd_get_thread_id(thd), trx->id, key_len,
10472 table_share->table_name.str, key);
10473 for (int i=0; i<key_len; i++) {
10474 fprintf(stderr, "%hhX, ", key[i]);
10475 }
10476 fprintf(stderr, "\n");
10477 #endif
10478 wsrep_buf_t wkey_part[3];
10479 wsrep_key_t wkey = {wkey_part, 3};
10480
10481 if (!wsrep_prepare_key_for_innodb(
10482 thd,
10483 (const uchar*)table_share->table_cache_key.str,
10484 table_share->table_cache_key.length,
10485 (const uchar*)key, key_len,
10486 wkey_part,
10487 (size_t*)&wkey.key_parts_num)) {
10488 WSREP_WARN("key prepare failed for: %s",
10489 (wsrep_thd_query(thd)) ?
10490 wsrep_thd_query(thd) : "void");
10491 DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
10492 }
10493
10494 int rcode = wsrep_thd_append_key(thd, &wkey, 1, key_type);
10495 if (rcode) {
10496 DBUG_PRINT("wsrep", ("row key failed: %d", rcode));
10497 WSREP_WARN("Appending row key failed: %s, %d",
10498 (wsrep_thd_query(thd)) ?
10499 wsrep_thd_query(thd) : "void", rcode);
10500 DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
10501 }
10502
10503 DBUG_RETURN(0);
10504 }
10505
10506 static bool
referenced_by_foreign_key2(dict_table_t * table,dict_index_t * index)10507 referenced_by_foreign_key2(
10508 /*=======================*/
10509 dict_table_t* table,
10510 dict_index_t* index)
10511 {
10512 ut_ad(table != NULL);
10513 ut_ad(index != NULL);
10514
10515 const dict_foreign_set* fks = &table->referenced_set;
10516
10517 for (dict_foreign_set::const_iterator it = fks->begin();
10518 it != fks->end();
10519 ++it) {
10520 dict_foreign_t* foreign = *it;
10521
10522 if (foreign->referenced_index != index) {
10523 continue;
10524 }
10525 ut_ad(table == foreign->referenced_table);
10526 return true;
10527 }
10528 return false;
10529 }
10530
10531 int
wsrep_append_keys(THD * thd,Wsrep_service_key_type key_type,const uchar * record0,const uchar * record1)10532 ha_innobase::wsrep_append_keys(
10533 /*===========================*/
10534 THD *thd,
10535 Wsrep_service_key_type key_type, /*!< in: access type of this row
10536 operation:
10537 (shared, exclusive, reference...) */
10538 const uchar* record0, /* in: row in MySQL format */
10539 const uchar* record1) /* in: row in MySQL format */
10540 {
10541 /* Sanity check: newly inserted records should always be passed with
10542 EXCLUSIVE key type, all the rest are expected to carry a pre-image
10543 */
10544 ut_a(record1 != NULL || key_type == WSREP_SERVICE_KEY_EXCLUSIVE);
10545
10546 int rcode;
10547 DBUG_ENTER("wsrep_append_keys");
10548
10549 bool key_appended = false;
10550 trx_t *trx = thd_to_trx(thd);
10551
10552 #ifdef WSREP_DEBUG_PRINT
10553 fprintf(stderr, "%s conn %lu, trx " TRX_ID_FMT ", table %s\nSQL: %s\n",
10554 wsrep_key_type_to_str(key_type),
10555 thd_get_thread_id(thd), trx->id,
10556 table_share->table_name.str, wsrep_thd_query(thd));
10557 #endif
10558
10559 if (table_share && table_share->tmp_table != NO_TMP_TABLE) {
10560 WSREP_DEBUG("skipping tmp table DML: THD: %lu tmp: %d SQL: %s",
10561 thd_get_thread_id(thd),
10562 table_share->tmp_table,
10563 (wsrep_thd_query(thd)) ?
10564 wsrep_thd_query(thd) : "void");
10565 DBUG_RETURN(0);
10566 }
10567
10568 if (wsrep_protocol_version == 0) {
10569 uint len;
10570 char keyval[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
10571 char *key = &keyval[0];
10572 ibool is_null;
10573
10574 len = wsrep_store_key_val_for_row(
10575 thd, table, 0, key, WSREP_MAX_SUPPORTED_KEY_LENGTH,
10576 record0, &is_null);
10577
10578 if (!is_null) {
10579 rcode = wsrep_append_key(
10580 thd, trx, table_share, keyval,
10581 len, key_type);
10582
10583 if (rcode) {
10584 DBUG_RETURN(rcode);
10585 }
10586 } else {
10587 WSREP_DEBUG("NULL key skipped (proto 0): %s",
10588 wsrep_thd_query(thd));
10589 }
10590 } else {
10591 ut_a(table->s->keys <= 256);
10592 uint i;
10593 bool hasPK= false;
10594
10595 for (i=0; i<table->s->keys; ++i) {
10596 KEY* key_info = table->key_info + i;
10597 if (key_info->flags & HA_NOSAME) {
10598 hasPK = true;
10599 break;
10600 }
10601 }
10602
10603 for (i=0; i<table->s->keys; ++i) {
10604 KEY* key_info = table->key_info + i;
10605
10606 dict_index_t* idx = innobase_get_index(i);
10607 dict_table_t* tab = (idx) ? idx->table : NULL;
10608
10609 /* keyval[] shall contain an ordinal number at byte 0
10610 and the actual key data shall be written at byte 1.
10611 Hence the total data length is the key length + 1 */
10612 char keyval0[WSREP_MAX_SUPPORTED_KEY_LENGTH+1]= {'\0'};
10613 char keyval1[WSREP_MAX_SUPPORTED_KEY_LENGTH+1]= {'\0'};
10614 keyval0[0] = (char)i;
10615 keyval1[0] = (char)i;
10616 char* key0 = &keyval0[1];
10617 char* key1 = &keyval1[1];
10618
10619 if (!tab) {
10620 WSREP_WARN("MariaDB-InnoDB key mismatch %s %s",
10621 table->s->table_name.str,
10622 key_info->name.str);
10623 }
10624 /* !hasPK == table with no PK,
10625 must append all non-unique keys */
10626 if (!hasPK || key_info->flags & HA_NOSAME ||
10627 ((tab &&
10628 referenced_by_foreign_key2(tab, idx)) ||
10629 (!tab && referenced_by_foreign_key()))) {
10630
10631 ibool is_null0;
10632 uint len0 = wsrep_store_key_val_for_row(
10633 thd, table, i, key0,
10634 WSREP_MAX_SUPPORTED_KEY_LENGTH,
10635 record0, &is_null0);
10636
10637 if (record1) {
10638 ibool is_null1;
10639 uint len1 = wsrep_store_key_val_for_row(
10640 thd, table, i, key1,
10641 WSREP_MAX_SUPPORTED_KEY_LENGTH,
10642 record1, &is_null1);
10643
10644 if (is_null0 != is_null1 ||
10645 len0 != len1 ||
10646 memcmp(key0, key1, len0)) {
10647 /* This key has chaged. If it
10648 is unique, this is an exclusive
10649 operation -> upgrade key type */
10650 if (key_info->flags & HA_NOSAME) {
10651 key_type = WSREP_SERVICE_KEY_EXCLUSIVE;
10652 }
10653
10654 if (!is_null1) {
10655 rcode = wsrep_append_key(
10656 thd, trx, table_share,
10657 keyval1,
10658 /* for len1+1 see keyval1
10659 initialization comment */
10660 len1+1, key_type);
10661 if (rcode)
10662 DBUG_RETURN(rcode);
10663 }
10664 }
10665 }
10666
10667 if (!is_null0) {
10668 rcode = wsrep_append_key(
10669 thd, trx, table_share,
10670 /* for len0+1 see keyval0
10671 initialization comment */
10672 keyval0, len0+1, key_type);
10673 if (rcode)
10674 DBUG_RETURN(rcode);
10675
10676 if (key_info->flags & HA_NOSAME ||
10677 key_type == WSREP_SERVICE_KEY_SHARED||
10678 key_type == WSREP_SERVICE_KEY_REFERENCE)
10679 key_appended = true;
10680 } else {
10681 WSREP_DEBUG("NULL key skipped: %s",
10682 wsrep_thd_query(thd));
10683 }
10684 }
10685 }
10686 }
10687
10688 /* if no PK, calculate hash of full row, to be the key value */
10689 if (!key_appended && wsrep_certify_nonPK) {
10690 uchar digest[16];
10691
10692 wsrep_calc_row_hash(digest, record0, table, m_prebuilt);
10693
10694 if (int rcode = wsrep_append_key(thd, trx, table_share,
10695 reinterpret_cast<char*>
10696 (digest), 16, key_type)) {
10697 DBUG_RETURN(rcode);
10698 }
10699
10700 if (record1) {
10701 wsrep_calc_row_hash(
10702 digest, record1, table, m_prebuilt);
10703 if (int rcode = wsrep_append_key(
10704 thd, trx, table_share,
10705 reinterpret_cast<char*>(digest), 16,
10706 key_type)) {
10707 DBUG_RETURN(rcode);
10708 }
10709 }
10710 DBUG_RETURN(0);
10711 }
10712
10713 DBUG_RETURN(0);
10714 }
10715 #endif /* WITH_WSREP */
10716
10717 /*********************************************************************//**
10718 Stores a reference to the current row to 'ref' field of the handle. Note
10719 that in the case where we have generated the clustered index for the
10720 table, the function parameter is illogical: we MUST ASSUME that 'record'
10721 is the current 'position' of the handle, because if row ref is actually
10722 the row id internally generated in InnoDB, then 'record' does not contain
10723 it. We just guess that the row id must be for the record where the handle
10724 was positioned the last time. */
10725
10726 void
position(const uchar * record)10727 ha_innobase::position(
10728 /*==================*/
10729 const uchar* record) /*!< in: row in MySQL format */
10730 {
10731 uint len;
10732
10733 ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
10734
10735 if (m_prebuilt->clust_index_was_generated) {
10736 /* No primary key was defined for the table and we
10737 generated the clustered index from row id: the
10738 row reference will be the row id, not any key value
10739 that MySQL knows of */
10740
10741 len = DATA_ROW_ID_LEN;
10742
10743 memcpy(ref, m_prebuilt->row_id, len);
10744 } else {
10745
10746 /* Copy primary key as the row reference */
10747 KEY* key_info = table->key_info + m_primary_key;
10748 key_copy(ref, (uchar*)record, key_info, key_info->key_length);
10749 len = key_info->key_length;
10750 }
10751
10752 ut_ad(len == ref_length);
10753 }
10754
10755 /*****************************************************************//**
10756 Check whether there exist a column named as "FTS_DOC_ID", which is
10757 reserved for InnoDB FTS Doc ID
10758 @return true if there exist a "FTS_DOC_ID" column */
10759 static
10760 bool
create_table_check_doc_id_col(trx_t * trx,const TABLE * form,ulint * doc_id_col)10761 create_table_check_doc_id_col(
10762 /*==========================*/
10763 trx_t* trx, /*!< in: InnoDB transaction handle */
10764 const TABLE* form, /*!< in: information on table
10765 columns and indexes */
10766 ulint* doc_id_col) /*!< out: Doc ID column number if
10767 there exist a FTS_DOC_ID column,
10768 ULINT_UNDEFINED if column is of the
10769 wrong type/name/size */
10770 {
10771 for (ulint i = 0; i < form->s->fields; i++) {
10772 const Field* field;
10773 ulint col_type;
10774 ulint col_len;
10775 ulint unsigned_type;
10776
10777 field = form->field[i];
10778 if (!field->stored_in_db()) {
10779 continue;
10780 }
10781
10782 col_type = get_innobase_type_from_mysql_type(
10783 &unsigned_type, field);
10784
10785 col_len = field->pack_length();
10786
10787 if (innobase_strcasecmp(field->field_name.str,
10788 FTS_DOC_ID_COL_NAME) == 0) {
10789
10790 /* Note the name is case sensitive due to
10791 our internal query parser */
10792 if (col_type == DATA_INT
10793 && !field->real_maybe_null()
10794 && col_len == sizeof(doc_id_t)
10795 && (strcmp(field->field_name.str,
10796 FTS_DOC_ID_COL_NAME) == 0)) {
10797 *doc_id_col = i;
10798 } else {
10799 push_warning_printf(
10800 trx->mysql_thd,
10801 Sql_condition::WARN_LEVEL_WARN,
10802 ER_ILLEGAL_HA_CREATE_OPTION,
10803 "InnoDB: FTS_DOC_ID column must be"
10804 " of BIGINT NOT NULL type, and named"
10805 " in all capitalized characters");
10806 my_error(ER_WRONG_COLUMN_NAME, MYF(0),
10807 field->field_name.str);
10808 *doc_id_col = ULINT_UNDEFINED;
10809 }
10810
10811 return(true);
10812 }
10813 }
10814
10815 return(false);
10816 }
10817
10818
10819 /** Finds all base columns needed to compute a given generated column.
10820 This is returned as a bitmap, in field->table->tmp_set.
10821 Works for both dict_v_col_t and dict_s_col_t columns.
10822 @param[in] table InnoDB table
10823 @param[in] field MySQL field
10824 @param[in,out] col virtual or stored column */
10825 template <typename T>
10826 void
prepare_vcol_for_base_setup(const dict_table_t * table,const Field * field,T * col)10827 prepare_vcol_for_base_setup(
10828 /*========================*/
10829 const dict_table_t* table,
10830 const Field* field,
10831 T* col)
10832 {
10833 ut_ad(col->num_base == 0);
10834 ut_ad(col->base_col == NULL);
10835
10836 MY_BITMAP *old_read_set = field->table->read_set;
10837
10838 field->table->read_set = &field->table->tmp_set;
10839
10840 bitmap_clear_all(&field->table->tmp_set);
10841 field->vcol_info->expr->walk(
10842 &Item::register_field_in_read_map, 1, field->table);
10843 col->num_base= bitmap_bits_set(&field->table->tmp_set);
10844 if (col->num_base != 0) {
10845 col->base_col = static_cast<dict_col_t**>(mem_heap_zalloc(
10846 table->heap, col->num_base * sizeof(
10847 * col->base_col)));
10848 }
10849 field->table->read_set= old_read_set;
10850 }
10851
10852
10853 /** Set up base columns for virtual column
10854 @param[in] table InnoDB table
10855 @param[in] field MySQL field
10856 @param[in,out] v_col virtual column */
10857 void
innodb_base_col_setup(dict_table_t * table,const Field * field,dict_v_col_t * v_col)10858 innodb_base_col_setup(
10859 dict_table_t* table,
10860 const Field* field,
10861 dict_v_col_t* v_col)
10862 {
10863 ulint n = 0;
10864
10865 prepare_vcol_for_base_setup(table, field, v_col);
10866
10867 for (uint i= 0; i < field->table->s->fields; ++i) {
10868 const Field* base_field = field->table->field[i];
10869 if (base_field->stored_in_db()
10870 && bitmap_is_set(&field->table->tmp_set, i)) {
10871 ulint z;
10872
10873 for (z = 0; z < table->n_cols; z++) {
10874 const char* name = dict_table_get_col_name(table, z);
10875 if (!innobase_strcasecmp(name,
10876 base_field->field_name.str)) {
10877 break;
10878 }
10879 }
10880
10881 ut_ad(z != table->n_cols);
10882
10883 v_col->base_col[n] = dict_table_get_nth_col(table, z);
10884 ut_ad(v_col->base_col[n]->ind == z);
10885 n++;
10886 }
10887 }
10888 v_col->num_base= n;
10889 }
10890
10891 /** Set up base columns for stored column
10892 @param[in] table InnoDB table
10893 @param[in] field MySQL field
10894 @param[in,out] s_col stored column */
10895 void
innodb_base_col_setup_for_stored(const dict_table_t * table,const Field * field,dict_s_col_t * s_col)10896 innodb_base_col_setup_for_stored(
10897 const dict_table_t* table,
10898 const Field* field,
10899 dict_s_col_t* s_col)
10900 {
10901 ulint n = 0;
10902
10903 prepare_vcol_for_base_setup(table, field, s_col);
10904
10905 for (uint i= 0; i < field->table->s->fields; ++i) {
10906 const Field* base_field = field->table->field[i];
10907
10908 if (base_field->stored_in_db()
10909 && bitmap_is_set(&field->table->tmp_set, i)) {
10910 ulint z;
10911 for (z = 0; z < table->n_cols; z++) {
10912 const char* name = dict_table_get_col_name(
10913 table, z);
10914 if (!innobase_strcasecmp(
10915 name, base_field->field_name.str)) {
10916 break;
10917 }
10918 }
10919
10920 ut_ad(z != table->n_cols);
10921
10922 s_col->base_col[n] = dict_table_get_nth_col(table, z);
10923 n++;
10924
10925 if (n == s_col->num_base) {
10926 break;
10927 }
10928 }
10929 }
10930 s_col->num_base= n;
10931 }
10932
10933 /** Create a table definition to an InnoDB database.
10934 @return ER_* level error */
10935 inline MY_ATTRIBUTE((warn_unused_result))
10936 int
create_table_def()10937 create_table_info_t::create_table_def()
10938 {
10939 dict_table_t* table;
10940 ulint col_type;
10941 ulint col_len;
10942 ulint nulls_allowed;
10943 ulint unsigned_type;
10944 ulint binary_type;
10945 ulint long_true_varchar;
10946 ulint charset_no;
10947 ulint doc_id_col = 0;
10948 ibool has_doc_id_col = FALSE;
10949 mem_heap_t* heap;
10950 ha_table_option_struct *options= m_form->s->option_struct;
10951 dberr_t err = DB_SUCCESS;
10952
10953 DBUG_ENTER("create_table_def");
10954 DBUG_PRINT("enter", ("table_name: %s", m_table_name));
10955
10956 DBUG_ASSERT(m_trx->mysql_thd == m_thd);
10957 DBUG_ASSERT(!m_drop_before_rollback);
10958
10959 /* MySQL does the name length check. But we do additional check
10960 on the name length here */
10961 const size_t table_name_len = strlen(m_table_name);
10962 if (table_name_len > MAX_FULL_NAME_LEN) {
10963 push_warning_printf(
10964 m_thd, Sql_condition::WARN_LEVEL_WARN,
10965 ER_TABLE_NAME,
10966 "InnoDB: Table Name or Database Name is too long");
10967
10968 DBUG_RETURN(ER_TABLE_NAME);
10969 }
10970
10971 if (m_table_name[table_name_len - 1] == '/') {
10972 push_warning_printf(
10973 m_thd, Sql_condition::WARN_LEVEL_WARN,
10974 ER_TABLE_NAME,
10975 "InnoDB: Table name is empty");
10976
10977 DBUG_RETURN(ER_WRONG_TABLE_NAME);
10978 }
10979
10980 /* Find out the number of virtual columns. */
10981 ulint num_v = 0;
10982 const bool omit_virtual = ha_innobase::omits_virtual_cols(*m_form->s);
10983 const ulint n_cols = omit_virtual
10984 ? m_form->s->stored_fields : m_form->s->fields;
10985
10986 if (!omit_virtual) {
10987 for (ulint i = 0; i < n_cols; i++) {
10988 num_v += !m_form->field[i]->stored_in_db();
10989 }
10990 }
10991
10992 /* Check whether there already exists a FTS_DOC_ID column */
10993 if (create_table_check_doc_id_col(m_trx, m_form, &doc_id_col)){
10994
10995 /* Raise error if the Doc ID column is of wrong type or name */
10996 if (doc_id_col == ULINT_UNDEFINED) {
10997 DBUG_RETURN(HA_ERR_GENERIC);
10998 } else {
10999 has_doc_id_col = TRUE;
11000 }
11001 }
11002
11003 /* Adjust the number of columns for the FTS hidden field */
11004 const ulint actual_n_cols = n_cols
11005 + (m_flags2 & DICT_TF2_FTS && !has_doc_id_col);
11006
11007 table = dict_mem_table_create(m_table_name, NULL,
11008 actual_n_cols, num_v, m_flags, m_flags2);
11009
11010 /* Set the hidden doc_id column. */
11011 if (m_flags2 & DICT_TF2_FTS) {
11012 table->fts->doc_col = has_doc_id_col
11013 ? doc_id_col : n_cols - num_v;
11014 }
11015
11016 if (DICT_TF_HAS_DATA_DIR(m_flags)) {
11017 ut_a(strlen(m_remote_path));
11018
11019 table->data_dir_path = mem_heap_strdup(
11020 table->heap, m_remote_path);
11021
11022 } else {
11023 table->data_dir_path = NULL;
11024 }
11025
11026 heap = mem_heap_create(1000);
11027 auto _ = make_scope_exit([heap]() { mem_heap_free(heap); });
11028
11029 ut_d(bool have_vers_start = false);
11030 ut_d(bool have_vers_end = false);
11031
11032 for (ulint i = 0, j = 0; j < n_cols; i++) {
11033 Field* field = m_form->field[i];
11034 ulint vers_row = 0;
11035
11036 if (m_form->versioned()) {
11037 if (i == m_form->s->vers.start_fieldno) {
11038 vers_row = DATA_VERS_START;
11039 ut_d(have_vers_start = true);
11040 } else if (i == m_form->s->vers.end_fieldno) {
11041 vers_row = DATA_VERS_END;
11042 ut_d(have_vers_end = true);
11043 } else if (!(field->flags
11044 & VERS_UPDATE_UNVERSIONED_FLAG)) {
11045 vers_row = DATA_VERSIONED;
11046 }
11047 }
11048
11049 col_type = get_innobase_type_from_mysql_type(
11050 &unsigned_type, field);
11051
11052 if (!col_type) {
11053 push_warning_printf(
11054 m_thd, Sql_condition::WARN_LEVEL_WARN,
11055 ER_CANT_CREATE_TABLE,
11056 "Error creating table '%s' with"
11057 " column '%s'. Please check its"
11058 " column type and try to re-create"
11059 " the table with an appropriate"
11060 " column type.",
11061 table->name.m_name, field->field_name.str);
11062 goto err_col;
11063 }
11064
11065 nulls_allowed = field->real_maybe_null() ? 0 : DATA_NOT_NULL;
11066 binary_type = field->binary() ? DATA_BINARY_TYPE : 0;
11067
11068 charset_no = 0;
11069
11070 if (dtype_is_string_type(col_type)) {
11071
11072 charset_no = (ulint) field->charset()->number;
11073
11074 DBUG_EXECUTE_IF("simulate_max_char_col",
11075 charset_no = MAX_CHAR_COLL_NUM + 1;
11076 );
11077
11078 if (charset_no > MAX_CHAR_COLL_NUM) {
11079 /* in data0type.h we assume that the
11080 number fits in one byte in prtype */
11081 push_warning_printf(
11082 m_thd, Sql_condition::WARN_LEVEL_WARN,
11083 ER_CANT_CREATE_TABLE,
11084 "In InnoDB, charset-collation codes"
11085 " must be below 256."
11086 " Unsupported code " ULINTPF ".",
11087 charset_no);
11088 dict_mem_table_free(table);
11089
11090 DBUG_RETURN(ER_CANT_CREATE_TABLE);
11091 }
11092 }
11093
11094 col_len = field->pack_length();
11095
11096 /* The MySQL pack length contains 1 or 2 bytes length field
11097 for a true VARCHAR. Let us subtract that, so that the InnoDB
11098 column length in the InnoDB data dictionary is the real
11099 maximum byte length of the actual data. */
11100
11101 long_true_varchar = 0;
11102
11103 if (field->type() == MYSQL_TYPE_VARCHAR) {
11104 col_len -= ((Field_varstring*) field)->length_bytes;
11105
11106 if (((Field_varstring*) field)->length_bytes == 2) {
11107 long_true_varchar = DATA_LONG_TRUE_VARCHAR;
11108 }
11109 }
11110
11111 /* First check whether the column to be added has a
11112 system reserved name. */
11113 if (dict_col_name_is_reserved(field->field_name.str)){
11114 my_error(ER_WRONG_COLUMN_NAME, MYF(0),
11115 field->field_name.str);
11116 err_col:
11117 dict_mem_table_free(table);
11118 ut_ad(trx_state_eq(m_trx, TRX_STATE_NOT_STARTED));
11119 DBUG_RETURN(HA_ERR_GENERIC);
11120 }
11121
11122 ulint is_virtual = !field->stored_in_db() ? DATA_VIRTUAL : 0;
11123
11124 if (!is_virtual) {
11125 dict_mem_table_add_col(table, heap,
11126 field->field_name.str, col_type,
11127 dtype_form_prtype(
11128 (ulint) field->type()
11129 | nulls_allowed | unsigned_type
11130 | binary_type | long_true_varchar
11131 | vers_row,
11132 charset_no),
11133 col_len);
11134 } else if (!omit_virtual) {
11135 dict_mem_table_add_v_col(table, heap,
11136 field->field_name.str, col_type,
11137 dtype_form_prtype(
11138 (ulint) field->type()
11139 | nulls_allowed | unsigned_type
11140 | binary_type | long_true_varchar
11141 | vers_row
11142 | is_virtual,
11143 charset_no),
11144 col_len, i, 0);
11145 }
11146
11147 if (innobase_is_s_fld(field)) {
11148 ut_ad(!is_virtual);
11149 /* Added stored column in m_s_cols list. */
11150 dict_mem_table_add_s_col(
11151 table, 0);
11152 }
11153
11154 if (is_virtual && omit_virtual) {
11155 continue;
11156 }
11157
11158 j++;
11159 }
11160
11161 ut_ad(have_vers_start == have_vers_end);
11162 ut_ad(table->versioned() == have_vers_start);
11163 ut_ad(!table->versioned() || table->vers_start != table->vers_end);
11164
11165 if (num_v) {
11166 for (ulint i = 0, j = 0; i < n_cols; i++) {
11167 dict_v_col_t* v_col;
11168
11169 const Field* field = m_form->field[i];
11170
11171 if (field->stored_in_db()) {
11172 continue;
11173 }
11174
11175 v_col = dict_table_get_nth_v_col(table, j);
11176
11177 j++;
11178
11179 innodb_base_col_setup(table, field, v_col);
11180 }
11181 }
11182
11183 /** Fill base columns for the stored column present in the list. */
11184 if (table->s_cols && !table->s_cols->empty()) {
11185 for (ulint i = 0; i < n_cols; i++) {
11186 Field* field = m_form->field[i];
11187
11188 if (!innobase_is_s_fld(field)) {
11189 continue;
11190 }
11191
11192 dict_s_col_list::iterator it;
11193 for (it = table->s_cols->begin();
11194 it != table->s_cols->end(); ++it) {
11195 dict_s_col_t s_col = *it;
11196
11197 if (s_col.s_pos == i) {
11198 innodb_base_col_setup_for_stored(
11199 table, field, &s_col);
11200 break;
11201 }
11202 }
11203 }
11204 }
11205
11206 /* Add the FTS doc_id hidden column. */
11207 if (m_flags2 & DICT_TF2_FTS && !has_doc_id_col) {
11208 fts_add_doc_id_column(table, heap);
11209 }
11210
11211 dict_table_add_system_columns(table, heap);
11212
11213 if (table->is_temporary()) {
11214 if ((options->encryption == 1
11215 && !innodb_encrypt_temporary_tables)
11216 || (options->encryption == 2
11217 && innodb_encrypt_temporary_tables)) {
11218 push_warning_printf(m_thd,
11219 Sql_condition::WARN_LEVEL_WARN,
11220 ER_ILLEGAL_HA_CREATE_OPTION,
11221 "Ignoring encryption parameter during "
11222 "temporary table creation.");
11223 }
11224
11225 m_trx->table_id = table->id
11226 = dict_sys.get_temporary_table_id();
11227 ut_ad(dict_tf_get_rec_format(table->flags)
11228 != REC_FORMAT_COMPRESSED);
11229 table->space_id = SRV_TMP_SPACE_ID;
11230 table->space = fil_system.temp_space;
11231 table->add_to_cache();
11232 } else {
11233 if (err == DB_SUCCESS) {
11234 err = row_create_table_for_mysql(
11235 table, m_trx,
11236 fil_encryption_t(options->encryption),
11237 uint32_t(options->encryption_key_id));
11238 m_drop_before_rollback = (err == DB_SUCCESS);
11239 }
11240
11241 DBUG_EXECUTE_IF("ib_crash_during_create_for_encryption",
11242 DBUG_SUICIDE(););
11243 }
11244
11245 DBUG_EXECUTE_IF("ib_create_err_tablespace_exist",
11246 err = DB_TABLESPACE_EXISTS;);
11247
11248 switch (err) {
11249 case DB_SUCCESS:
11250 ut_ad(table);
11251 m_table = table;
11252 DBUG_RETURN(0);
11253 default:
11254 break;
11255 case DB_DUPLICATE_KEY:
11256 case DB_TABLESPACE_EXISTS:
11257 char display_name[FN_REFLEN];
11258 char* buf_end = innobase_convert_identifier(
11259 display_name, sizeof(display_name) - 1,
11260 m_table_name, strlen(m_table_name),
11261 m_thd);
11262
11263 *buf_end = '\0';
11264
11265 my_error(err == DB_DUPLICATE_KEY
11266 ? ER_TABLE_EXISTS_ERROR
11267 : ER_TABLESPACE_EXISTS, MYF(0), display_name);
11268 }
11269
11270 DBUG_RETURN(convert_error_code_to_mysql(err, m_flags, m_thd));
11271 }
11272
11273 /*****************************************************************//**
11274 Creates an index in an InnoDB database. */
11275 inline
11276 int
create_index(trx_t * trx,const TABLE * form,dict_table_t * table,uint key_num)11277 create_index(
11278 /*=========*/
11279 trx_t* trx, /*!< in: InnoDB transaction handle */
11280 const TABLE* form, /*!< in: information on table
11281 columns and indexes */
11282 dict_table_t* table, /*!< in,out: table */
11283 uint key_num) /*!< in: index number */
11284 {
11285 dict_index_t* index;
11286 int error;
11287 const KEY* key;
11288 ulint* field_lengths;
11289
11290 DBUG_ENTER("create_index");
11291
11292 key = form->key_info + key_num;
11293
11294 /* Assert that "GEN_CLUST_INDEX" cannot be used as non-primary index */
11295 ut_a(innobase_strcasecmp(key->name.str, innobase_index_reserve_name) != 0);
11296
11297 if (key->flags & (HA_SPATIAL | HA_FULLTEXT)) {
11298 /* Only one of these can be specified at a time. */
11299 ut_ad(~key->flags & (HA_SPATIAL | HA_FULLTEXT));
11300 ut_ad(!(key->flags & HA_NOSAME));
11301 index = dict_mem_index_create(table, key->name.str,
11302 (key->flags & HA_SPATIAL)
11303 ? DICT_SPATIAL : DICT_FTS,
11304 key->user_defined_key_parts);
11305
11306 for (ulint i = 0; i < key->user_defined_key_parts; i++) {
11307 const Field* field = key->key_part[i].field;
11308
11309 /* We do not support special (Fulltext or Spatial)
11310 index on virtual columns */
11311 if (!field->stored_in_db()) {
11312 ut_ad(0);
11313 DBUG_RETURN(HA_ERR_UNSUPPORTED);
11314 }
11315
11316 dict_mem_index_add_field(index, field->field_name.str,
11317 0);
11318 }
11319
11320 DBUG_RETURN(convert_error_code_to_mysql(
11321 row_create_index_for_mysql(
11322 index, trx, NULL),
11323 table->flags, NULL));
11324 }
11325
11326 ulint ind_type = 0;
11327
11328 if (key_num == form->s->primary_key) {
11329 ind_type |= DICT_CLUSTERED;
11330 }
11331
11332 if (key->flags & HA_NOSAME) {
11333 ind_type |= DICT_UNIQUE;
11334 }
11335
11336 field_lengths = (ulint*) my_malloc(//PSI_INSTRUMENT_ME,
11337 key->user_defined_key_parts * sizeof *
11338 field_lengths, MYF(MY_FAE));
11339
11340 /* We pass 0 as the space id, and determine at a lower level the space
11341 id where to store the table */
11342
11343 index = dict_mem_index_create(table, key->name.str,
11344 ind_type, key->user_defined_key_parts);
11345
11346 for (ulint i = 0; i < key->user_defined_key_parts; i++) {
11347 KEY_PART_INFO* key_part = key->key_part + i;
11348 ulint prefix_len;
11349 ulint col_type;
11350 ulint is_unsigned;
11351
11352
11353 /* (The flag HA_PART_KEY_SEG denotes in MySQL a
11354 column prefix field in an index: we only store a
11355 specified number of first bytes of the column to
11356 the index field.) The flag does not seem to be
11357 properly set by MySQL. Let us fall back on testing
11358 the length of the key part versus the column.
11359 We first reach to the table's column; if the index is on a
11360 prefix, key_part->field is not the table's column (it's a
11361 "fake" field forged in open_table_from_share() with length
11362 equal to the length of the prefix); so we have to go to
11363 form->fied. */
11364 Field* field= form->field[key_part->field->field_index];
11365 if (field == NULL)
11366 ut_error;
11367
11368 const char* field_name = key_part->field->field_name.str;
11369
11370 col_type = get_innobase_type_from_mysql_type(
11371 &is_unsigned, key_part->field);
11372
11373 if (DATA_LARGE_MTYPE(col_type)
11374 || (key_part->length < field->pack_length()
11375 && field->type() != MYSQL_TYPE_VARCHAR)
11376 || (field->type() == MYSQL_TYPE_VARCHAR
11377 && key_part->length < field->pack_length()
11378 - ((Field_varstring*) field)->length_bytes)) {
11379
11380 switch (col_type) {
11381 default:
11382 prefix_len = key_part->length;
11383 break;
11384 case DATA_INT:
11385 case DATA_FLOAT:
11386 case DATA_DOUBLE:
11387 case DATA_DECIMAL:
11388 sql_print_error(
11389 "MariaDB is trying to create a column"
11390 " prefix index field, on an"
11391 " inappropriate data type. Table"
11392 " name %s, column name %s.",
11393 form->s->table_name.str,
11394 key_part->field->field_name.str);
11395
11396 prefix_len = 0;
11397 }
11398 } else {
11399 prefix_len = 0;
11400 }
11401
11402 ut_ad(prefix_len % field->charset()->mbmaxlen == 0);
11403
11404 field_lengths[i] = key_part->length;
11405
11406 if (!key_part->field->stored_in_db()) {
11407 index->type |= DICT_VIRTUAL;
11408 }
11409
11410 dict_mem_index_add_field(index, field_name, prefix_len);
11411 }
11412
11413 ut_ad(key->flags & HA_FULLTEXT || !(index->type & DICT_FTS));
11414
11415 /* Even though we've defined max_supported_key_part_length, we
11416 still do our own checking using field_lengths to be absolutely
11417 sure we don't create too long indexes. */
11418 ulint flags = table->flags;
11419
11420 error = convert_error_code_to_mysql(
11421 row_create_index_for_mysql(index, trx, field_lengths),
11422 flags, NULL);
11423
11424 my_free(field_lengths);
11425
11426 DBUG_RETURN(error);
11427 }
11428
11429 /** Return a display name for the row format
11430 @param[in] row_format Row Format
11431 @return row format name */
11432 static
11433 const char*
get_row_format_name(enum row_type row_format)11434 get_row_format_name(
11435 enum row_type row_format)
11436 {
11437 switch (row_format) {
11438 case ROW_TYPE_COMPACT:
11439 return("COMPACT");
11440 case ROW_TYPE_COMPRESSED:
11441 return("COMPRESSED");
11442 case ROW_TYPE_DYNAMIC:
11443 return("DYNAMIC");
11444 case ROW_TYPE_REDUNDANT:
11445 return("REDUNDANT");
11446 case ROW_TYPE_DEFAULT:
11447 return("DEFAULT");
11448 case ROW_TYPE_FIXED:
11449 return("FIXED");
11450 case ROW_TYPE_PAGE:
11451 case ROW_TYPE_NOT_USED:
11452 break;
11453 }
11454 return("NOT USED");
11455 }
11456
11457 /** Validate DATA DIRECTORY option.
11458 @return true if valid, false if not. */
11459 bool
create_option_data_directory_is_valid()11460 create_table_info_t::create_option_data_directory_is_valid()
11461 {
11462 bool is_valid = true;
11463
11464 ut_ad(m_create_info->data_file_name
11465 && m_create_info->data_file_name[0] != '\0');
11466
11467 /* Use DATA DIRECTORY only with file-per-table. */
11468 if (!m_allow_file_per_table) {
11469 push_warning(
11470 m_thd, Sql_condition::WARN_LEVEL_WARN,
11471 ER_ILLEGAL_HA_CREATE_OPTION,
11472 "InnoDB: DATA DIRECTORY requires"
11473 " innodb_file_per_table.");
11474 is_valid = false;
11475 }
11476
11477 /* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
11478 if (m_create_info->tmp_table()) {
11479 push_warning(
11480 m_thd, Sql_condition::WARN_LEVEL_WARN,
11481 ER_ILLEGAL_HA_CREATE_OPTION,
11482 "InnoDB: DATA DIRECTORY cannot be used"
11483 " for TEMPORARY tables.");
11484 is_valid = false;
11485 }
11486
11487 /* We check for a DATA DIRECTORY mixed with TABLESPACE in
11488 create_option_tablespace_is_valid(), no need to here. */
11489
11490 return(is_valid);
11491 }
11492
11493 /** Validate the create options. Check that the options KEY_BLOCK_SIZE,
11494 ROW_FORMAT, DATA DIRECTORY, TEMPORARY are compatible with
11495 each other and other settings. These CREATE OPTIONS are not validated
11496 here unless innodb_strict_mode is on. With strict mode, this function
11497 will report each problem it finds using a custom message with error
11498 code ER_ILLEGAL_HA_CREATE_OPTION, not its built-in message.
11499 @return NULL if valid, string name of bad option if not. */
11500 const char*
create_options_are_invalid()11501 create_table_info_t::create_options_are_invalid()
11502 {
11503 bool has_key_block_size = (m_create_info->key_block_size != 0);
11504
11505 const char* ret = NULL;
11506 enum row_type row_format = m_create_info->row_type;
11507 const bool is_temp = m_create_info->tmp_table();
11508
11509 ut_ad(m_thd != NULL);
11510
11511 /* If innodb_strict_mode is not set don't do any more validation. */
11512 if (!THDVAR(m_thd, strict_mode)) {
11513 return(NULL);
11514 }
11515
11516 /* Check if a non-zero KEY_BLOCK_SIZE was specified. */
11517 if (has_key_block_size) {
11518 if (is_temp) {
11519 my_error(ER_UNSUPPORT_COMPRESSED_TEMPORARY_TABLE,
11520 MYF(0));
11521 return("KEY_BLOCK_SIZE");
11522 }
11523
11524 switch (m_create_info->key_block_size) {
11525 ulint kbs_max;
11526 case 1:
11527 case 2:
11528 case 4:
11529 case 8:
11530 case 16:
11531 /* The maximum KEY_BLOCK_SIZE (KBS) is
11532 UNIV_PAGE_SIZE_MAX. But if srv_page_size is
11533 smaller than UNIV_PAGE_SIZE_MAX, the maximum
11534 KBS is also smaller. */
11535 kbs_max = ut_min(
11536 1U << (UNIV_PAGE_SSIZE_MAX - 1),
11537 1U << (PAGE_ZIP_SSIZE_MAX - 1));
11538 if (m_create_info->key_block_size > kbs_max) {
11539 push_warning_printf(
11540 m_thd, Sql_condition::WARN_LEVEL_WARN,
11541 ER_ILLEGAL_HA_CREATE_OPTION,
11542 "InnoDB: KEY_BLOCK_SIZE=%ld"
11543 " cannot be larger than %ld.",
11544 m_create_info->key_block_size,
11545 kbs_max);
11546 ret = "KEY_BLOCK_SIZE";
11547 }
11548
11549 /* Valid KEY_BLOCK_SIZE, check its dependencies. */
11550 if (!m_allow_file_per_table) {
11551 push_warning(
11552 m_thd, Sql_condition::WARN_LEVEL_WARN,
11553 ER_ILLEGAL_HA_CREATE_OPTION,
11554 "InnoDB: KEY_BLOCK_SIZE requires"
11555 " innodb_file_per_table.");
11556 ret = "KEY_BLOCK_SIZE";
11557 }
11558 break;
11559 default:
11560 push_warning_printf(
11561 m_thd, Sql_condition::WARN_LEVEL_WARN,
11562 ER_ILLEGAL_HA_CREATE_OPTION,
11563 "InnoDB: invalid KEY_BLOCK_SIZE = %u."
11564 " Valid values are [1, 2, 4, 8, 16]",
11565 (uint) m_create_info->key_block_size);
11566 ret = "KEY_BLOCK_SIZE";
11567 break;
11568 }
11569 }
11570
11571 /* Check for a valid InnoDB ROW_FORMAT specifier and
11572 other incompatibilities. */
11573 switch (row_format) {
11574 case ROW_TYPE_COMPRESSED:
11575 if (is_temp) {
11576 my_error(ER_UNSUPPORT_COMPRESSED_TEMPORARY_TABLE,
11577 MYF(0));
11578 return("ROW_FORMAT");
11579 }
11580 if (!m_allow_file_per_table) {
11581 push_warning_printf(
11582 m_thd, Sql_condition::WARN_LEVEL_WARN,
11583 ER_ILLEGAL_HA_CREATE_OPTION,
11584 "InnoDB: ROW_FORMAT=%s requires"
11585 " innodb_file_per_table.",
11586 get_row_format_name(row_format));
11587 ret = "ROW_FORMAT";
11588 }
11589 break;
11590 case ROW_TYPE_DYNAMIC:
11591 case ROW_TYPE_COMPACT:
11592 case ROW_TYPE_REDUNDANT:
11593 if (has_key_block_size) {
11594 push_warning_printf(
11595 m_thd, Sql_condition::WARN_LEVEL_WARN,
11596 ER_ILLEGAL_HA_CREATE_OPTION,
11597 "InnoDB: cannot specify ROW_FORMAT = %s"
11598 " with KEY_BLOCK_SIZE.",
11599 get_row_format_name(row_format));
11600 ret = "KEY_BLOCK_SIZE";
11601 }
11602 break;
11603 case ROW_TYPE_DEFAULT:
11604 break;
11605 case ROW_TYPE_FIXED:
11606 case ROW_TYPE_PAGE:
11607 case ROW_TYPE_NOT_USED:
11608 push_warning(
11609 m_thd, Sql_condition::WARN_LEVEL_WARN,
11610 ER_ILLEGAL_HA_CREATE_OPTION,
11611 "InnoDB: invalid ROW_FORMAT specifier.");
11612 ret = "ROW_TYPE";
11613 break;
11614 }
11615
11616 if (!m_create_info->data_file_name
11617 || !m_create_info->data_file_name[0]) {
11618 } else if (!my_use_symdir) {
11619 my_error(WARN_OPTION_IGNORED, MYF(ME_WARNING),
11620 "DATA DIRECTORY");
11621 } else if (!create_option_data_directory_is_valid()) {
11622 ret = "DATA DIRECTORY";
11623 }
11624
11625 /* Do not allow INDEX_DIRECTORY */
11626 if (m_create_info->index_file_name) {
11627 push_warning_printf(
11628 m_thd, Sql_condition::WARN_LEVEL_WARN,
11629 ER_ILLEGAL_HA_CREATE_OPTION,
11630 "InnoDB: INDEX DIRECTORY is not supported");
11631 ret = "INDEX DIRECTORY";
11632 }
11633
11634 /* Don't support compressed table when page size > 16k. */
11635 if ((has_key_block_size || row_format == ROW_TYPE_COMPRESSED)
11636 && srv_page_size > UNIV_PAGE_SIZE_DEF) {
11637 push_warning(m_thd, Sql_condition::WARN_LEVEL_WARN,
11638 ER_ILLEGAL_HA_CREATE_OPTION,
11639 "InnoDB: Cannot create a COMPRESSED table"
11640 " when innodb_page_size > 16k.");
11641
11642 if (has_key_block_size) {
11643 ret = "KEY_BLOCK_SIZE";
11644 } else {
11645 ret = "ROW_TYPE";
11646 }
11647 }
11648
11649 return(ret);
11650 }
11651
11652 /*****************************************************************//**
11653 Check engine specific table options not handled by SQL-parser.
11654 @return NULL if valid, string if not */
11655 const char*
check_table_options()11656 create_table_info_t::check_table_options()
11657 {
11658 enum row_type row_format = m_create_info->row_type;
11659 const ha_table_option_struct *options= m_form->s->option_struct;
11660
11661 switch (options->encryption) {
11662 case FIL_ENCRYPTION_OFF:
11663 if (options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) {
11664 push_warning(
11665 m_thd, Sql_condition::WARN_LEVEL_WARN,
11666 HA_WRONG_CREATE_OPTION,
11667 "InnoDB: ENCRYPTED=NO implies"
11668 " ENCRYPTION_KEY_ID=1");
11669 compile_time_assert(FIL_DEFAULT_ENCRYPTION_KEY == 1);
11670 }
11671 if (srv_encrypt_tables != 2) {
11672 break;
11673 }
11674 push_warning(
11675 m_thd, Sql_condition::WARN_LEVEL_WARN,
11676 HA_WRONG_CREATE_OPTION,
11677 "InnoDB: ENCRYPTED=NO cannot be used with"
11678 " innodb_encrypt_tables=FORCE");
11679 return "ENCRYPTED";
11680 case FIL_ENCRYPTION_DEFAULT:
11681 if (!srv_encrypt_tables) {
11682 break;
11683 }
11684 /* fall through */
11685 case FIL_ENCRYPTION_ON:
11686 const uint32_t key_id = uint32_t(options->encryption_key_id);
11687 if (!encryption_key_id_exists(key_id)) {
11688 push_warning_printf(
11689 m_thd, Sql_condition::WARN_LEVEL_WARN,
11690 HA_WRONG_CREATE_OPTION,
11691 "InnoDB: ENCRYPTION_KEY_ID %u not available",
11692 key_id);
11693 return "ENCRYPTION_KEY_ID";
11694 }
11695
11696 /* We do not support encryption for spatial indexes,
11697 except if innodb_checksum_algorithm=full_crc32.
11698 Do not allow ENCRYPTED=YES if any SPATIAL INDEX exists. */
11699 if (options->encryption != FIL_ENCRYPTION_ON
11700 || srv_checksum_algorithm
11701 >= SRV_CHECKSUM_ALGORITHM_FULL_CRC32) {
11702 break;
11703 }
11704 for (ulint i = 0; i < m_form->s->keys; i++) {
11705 if (m_form->key_info[i].flags & HA_SPATIAL) {
11706 push_warning(m_thd,
11707 Sql_condition::WARN_LEVEL_WARN,
11708 HA_ERR_UNSUPPORTED,
11709 "InnoDB: ENCRYPTED=YES is not"
11710 " supported for SPATIAL INDEX");
11711 return "ENCRYPTED";
11712 }
11713 }
11714 }
11715
11716 if (!m_allow_file_per_table
11717 && options->encryption != FIL_ENCRYPTION_DEFAULT) {
11718 push_warning(
11719 m_thd, Sql_condition::WARN_LEVEL_WARN,
11720 HA_WRONG_CREATE_OPTION,
11721 "InnoDB: ENCRYPTED requires innodb_file_per_table");
11722 return "ENCRYPTED";
11723 }
11724
11725 /* Check page compression requirements */
11726 if (options->page_compressed) {
11727
11728 if (row_format == ROW_TYPE_COMPRESSED) {
11729 push_warning(
11730 m_thd, Sql_condition::WARN_LEVEL_WARN,
11731 HA_WRONG_CREATE_OPTION,
11732 "InnoDB: PAGE_COMPRESSED table can't have"
11733 " ROW_TYPE=COMPRESSED");
11734 return "PAGE_COMPRESSED";
11735 }
11736
11737 switch (row_format) {
11738 default:
11739 break;
11740 case ROW_TYPE_DEFAULT:
11741 if (m_default_row_format
11742 != DEFAULT_ROW_FORMAT_REDUNDANT) {
11743 break;
11744 }
11745 /* fall through */
11746 case ROW_TYPE_REDUNDANT:
11747 push_warning(
11748 m_thd, Sql_condition::WARN_LEVEL_WARN,
11749 HA_WRONG_CREATE_OPTION,
11750 "InnoDB: PAGE_COMPRESSED table can't have"
11751 " ROW_TYPE=REDUNDANT");
11752 return "PAGE_COMPRESSED";
11753 }
11754
11755 if (!m_allow_file_per_table) {
11756 push_warning(
11757 m_thd, Sql_condition::WARN_LEVEL_WARN,
11758 HA_WRONG_CREATE_OPTION,
11759 "InnoDB: PAGE_COMPRESSED requires"
11760 " innodb_file_per_table.");
11761 return "PAGE_COMPRESSED";
11762 }
11763
11764 if (m_create_info->key_block_size) {
11765 push_warning(
11766 m_thd, Sql_condition::WARN_LEVEL_WARN,
11767 HA_WRONG_CREATE_OPTION,
11768 "InnoDB: PAGE_COMPRESSED table can't have"
11769 " key_block_size");
11770 return "PAGE_COMPRESSED";
11771 }
11772 }
11773
11774 /* Check page compression level requirements, some of them are
11775 already checked above */
11776 if (options->page_compression_level != 0) {
11777 if (options->page_compressed == false) {
11778 push_warning(
11779 m_thd, Sql_condition::WARN_LEVEL_WARN,
11780 HA_WRONG_CREATE_OPTION,
11781 "InnoDB: PAGE_COMPRESSION_LEVEL requires"
11782 " PAGE_COMPRESSED");
11783 return "PAGE_COMPRESSION_LEVEL";
11784 }
11785
11786 if (options->page_compression_level < 1 || options->page_compression_level > 9) {
11787 push_warning_printf(
11788 m_thd, Sql_condition::WARN_LEVEL_WARN,
11789 HA_WRONG_CREATE_OPTION,
11790 "InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu."
11791 " Valid values are [1, 2, 3, 4, 5, 6, 7, 8, 9]",
11792 options->page_compression_level);
11793 return "PAGE_COMPRESSION_LEVEL";
11794 }
11795 }
11796
11797 return NULL;
11798 }
11799
11800 /*****************************************************************//**
11801 Update create_info. Used in SHOW CREATE TABLE et al. */
11802
11803 void
update_create_info(HA_CREATE_INFO * create_info)11804 ha_innobase::update_create_info(
11805 /*============================*/
11806 HA_CREATE_INFO* create_info) /*!< in/out: create info */
11807 {
11808 if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
11809 info(HA_STATUS_AUTO);
11810 create_info->auto_increment_value = stats.auto_increment_value;
11811 }
11812
11813 if (m_prebuilt->table->is_temporary()) {
11814 return;
11815 }
11816
11817 /* Update the DATA DIRECTORY name from SYS_DATAFILES. */
11818 dict_get_and_save_data_dir_path(m_prebuilt->table, false);
11819
11820 if (m_prebuilt->table->data_dir_path) {
11821 create_info->data_file_name = m_prebuilt->table->data_dir_path;
11822 }
11823 }
11824
11825 /*****************************************************************//**
11826 Initialize the table FTS stopword list
11827 @return TRUE if success */
11828 ibool
innobase_fts_load_stopword(dict_table_t * table,trx_t * trx,THD * thd)11829 innobase_fts_load_stopword(
11830 /*=======================*/
11831 dict_table_t* table, /*!< in: Table has the FTS */
11832 trx_t* trx, /*!< in: transaction */
11833 THD* thd) /*!< in: current thread */
11834 {
11835 const char *stopword_table= THDVAR(thd, ft_user_stopword_table);
11836 if (!stopword_table)
11837 {
11838 mysql_mutex_lock(&LOCK_global_system_variables);
11839 if (innobase_server_stopword_table)
11840 stopword_table= thd_strdup(thd, innobase_server_stopword_table);
11841 mysql_mutex_unlock(&LOCK_global_system_variables);
11842 }
11843
11844 return fts_load_stopword(table, trx, stopword_table,
11845 THDVAR(thd, ft_enable_stopword), false);
11846 }
11847
11848 /** Parse the table name into normal name and remote path if needed.
11849 @param[in] name Table name (db/table or full path).
11850 @return 0 if successful, otherwise, error number */
11851 int
parse_table_name(const char * name)11852 create_table_info_t::parse_table_name(
11853 const char*
11854 #ifdef _WIN32
11855 name
11856 #endif
11857 )
11858 {
11859 DBUG_ENTER("parse_table_name");
11860
11861 #ifdef _WIN32
11862 /* Names passed in from server are in two formats:
11863 1. <database_name>/<table_name>: for normal table creation
11864 2. full path: for temp table creation, or DATA DIRECTORY.
11865
11866 When srv_file_per_table is on and mysqld_embedded is off,
11867 check for full path pattern, i.e.
11868 X:\dir\..., X is a driver letter, or
11869 \\dir1\dir2\..., UNC path
11870 returns error if it is in full path format, but not creating a temp.
11871 table. Currently InnoDB does not support symbolic link on Windows. */
11872
11873 if (m_innodb_file_per_table
11874 && !mysqld_embedded
11875 && !m_create_info->tmp_table()) {
11876
11877 if ((name[1] == ':')
11878 || (name[0] == '\\' && name[1] == '\\')) {
11879 sql_print_error("Cannot create table %s\n", name);
11880 DBUG_RETURN(HA_ERR_GENERIC);
11881 }
11882 }
11883 #endif
11884
11885 m_remote_path[0] = '\0';
11886
11887 /* Make sure DATA DIRECTORY is compatible with other options
11888 and set the remote path. In the case of either;
11889 CREATE TEMPORARY TABLE ... DATA DIRECTORY={path} ... ;
11890 CREATE TABLE ... DATA DIRECTORY={path} TABLESPACE={name}... ;
11891 we ignore the DATA DIRECTORY. */
11892 if (m_create_info->data_file_name
11893 && m_create_info->data_file_name[0]
11894 && my_use_symdir) {
11895 if (!create_option_data_directory_is_valid()) {
11896 push_warning_printf(
11897 m_thd, Sql_condition::WARN_LEVEL_WARN,
11898 WARN_OPTION_IGNORED,
11899 ER_DEFAULT(WARN_OPTION_IGNORED),
11900 "DATA DIRECTORY");
11901
11902 m_flags &= ~DICT_TF_MASK_DATA_DIR;
11903 } else {
11904 strncpy(m_remote_path,
11905 m_create_info->data_file_name,
11906 FN_REFLEN - 1);
11907 }
11908 }
11909
11910 if (m_create_info->index_file_name) {
11911 my_error(WARN_OPTION_IGNORED, ME_WARNING,
11912 "INDEX DIRECTORY");
11913 }
11914
11915 DBUG_RETURN(0);
11916 }
11917
11918 /** @return whether innodb_strict_mode is active */
is_innodb_strict_mode(THD * thd)11919 bool ha_innobase::is_innodb_strict_mode(THD *thd)
11920 {
11921 return THDVAR(thd, strict_mode);
11922 }
11923
11924 /** Determine InnoDB table flags.
11925 If strict_mode=OFF, this will adjust the flags to what should be assumed.
11926 @retval true on success
11927 @retval false on error */
innobase_table_flags()11928 bool create_table_info_t::innobase_table_flags()
11929 {
11930 DBUG_ENTER("innobase_table_flags");
11931
11932 const char* fts_doc_id_index_bad = NULL;
11933 ulint zip_ssize = 0;
11934 enum row_type row_type;
11935 rec_format_t innodb_row_format =
11936 get_row_format(m_default_row_format);
11937 const bool is_temp = m_create_info->tmp_table();
11938 bool zip_allowed = !is_temp;
11939
11940 const ulint zip_ssize_max =
11941 ut_min(static_cast<ulint>(UNIV_PAGE_SSIZE_MAX),
11942 static_cast<ulint>(PAGE_ZIP_SSIZE_MAX));
11943
11944 ha_table_option_struct *options= m_form->s->option_struct;
11945
11946 m_flags = 0;
11947 m_flags2 = 0;
11948
11949 /* Check if there are any FTS indexes defined on this table. */
11950 for (uint i = 0; i < m_form->s->keys; i++) {
11951 const KEY* key = &m_form->key_info[i];
11952
11953 if (key->flags & HA_FULLTEXT) {
11954 m_flags2 |= DICT_TF2_FTS;
11955
11956 /* We don't support FTS indexes in temporary
11957 tables. */
11958 if (is_temp) {
11959 my_error(ER_INNODB_NO_FT_TEMP_TABLE, MYF(0));
11960 DBUG_RETURN(false);
11961 }
11962
11963 if (fts_doc_id_index_bad) {
11964 goto index_bad;
11965 }
11966 }
11967
11968 if (innobase_strcasecmp(key->name.str, FTS_DOC_ID_INDEX_NAME)) {
11969 continue;
11970 }
11971
11972 /* Do a pre-check on FTS DOC ID index */
11973 if (!(key->flags & HA_NOSAME)
11974 || strcmp(key->name.str, FTS_DOC_ID_INDEX_NAME)
11975 || strcmp(key->key_part[0].field->field_name.str,
11976 FTS_DOC_ID_COL_NAME)) {
11977 fts_doc_id_index_bad = key->name.str;
11978 }
11979
11980 if (fts_doc_id_index_bad && (m_flags2 & DICT_TF2_FTS)) {
11981 index_bad:
11982 my_error(ER_INNODB_FT_WRONG_DOCID_INDEX, MYF(0),
11983 fts_doc_id_index_bad);
11984 DBUG_RETURN(false);
11985 }
11986 }
11987
11988 if (m_create_info->key_block_size > 0) {
11989 /* The requested compressed page size (key_block_size)
11990 is given in kilobytes. If it is a valid number, store
11991 that value as the number of log2 shifts from 512 in
11992 zip_ssize. Zero means it is not compressed. */
11993 ulint zssize; /* Zip Shift Size */
11994 ulint kbsize; /* Key Block Size */
11995 for (zssize = kbsize = 1;
11996 zssize <= zip_ssize_max;
11997 zssize++, kbsize <<= 1) {
11998 if (kbsize == m_create_info->key_block_size) {
11999 zip_ssize = zssize;
12000 break;
12001 }
12002 }
12003
12004 /* Make sure compressed row format is allowed. */
12005 if (is_temp) {
12006 push_warning(
12007 m_thd, Sql_condition::WARN_LEVEL_WARN,
12008 ER_ILLEGAL_HA_CREATE_OPTION,
12009 "InnoDB: KEY_BLOCK_SIZE is ignored"
12010 " for TEMPORARY TABLE.");
12011 zip_allowed = false;
12012 } else if (!m_allow_file_per_table) {
12013 push_warning(
12014 m_thd, Sql_condition::WARN_LEVEL_WARN,
12015 ER_ILLEGAL_HA_CREATE_OPTION,
12016 "InnoDB: KEY_BLOCK_SIZE requires"
12017 " innodb_file_per_table.");
12018 zip_allowed = false;
12019 }
12020
12021 if (!zip_allowed
12022 || zssize > zip_ssize_max) {
12023 push_warning_printf(
12024 m_thd, Sql_condition::WARN_LEVEL_WARN,
12025 ER_ILLEGAL_HA_CREATE_OPTION,
12026 "InnoDB: ignoring KEY_BLOCK_SIZE=%u.",
12027 (uint) m_create_info->key_block_size);
12028 }
12029 }
12030
12031 row_type = m_create_info->row_type;
12032
12033 if (zip_ssize && zip_allowed) {
12034 /* if ROW_FORMAT is set to default,
12035 automatically change it to COMPRESSED. */
12036 if (row_type == ROW_TYPE_DEFAULT) {
12037 row_type = ROW_TYPE_COMPRESSED;
12038 } else if (row_type != ROW_TYPE_COMPRESSED) {
12039 /* ROW_FORMAT other than COMPRESSED
12040 ignores KEY_BLOCK_SIZE. It does not
12041 make sense to reject conflicting
12042 KEY_BLOCK_SIZE and ROW_FORMAT, because
12043 such combinations can be obtained
12044 with ALTER TABLE anyway. */
12045 push_warning_printf(
12046 m_thd, Sql_condition::WARN_LEVEL_WARN,
12047 ER_ILLEGAL_HA_CREATE_OPTION,
12048 "InnoDB: ignoring KEY_BLOCK_SIZE=%u"
12049 " unless ROW_FORMAT=COMPRESSED.",
12050 (uint) m_create_info->key_block_size);
12051 zip_allowed = false;
12052 }
12053 } else {
12054 /* zip_ssize == 0 means no KEY_BLOCK_SIZE. */
12055 if (row_type == ROW_TYPE_COMPRESSED && zip_allowed) {
12056 /* ROW_FORMAT=COMPRESSED without KEY_BLOCK_SIZE
12057 implies half the maximum KEY_BLOCK_SIZE(*1k) or
12058 srv_page_size, whichever is less. */
12059 zip_ssize = zip_ssize_max - 1;
12060 }
12061 }
12062
12063 /* Validate the row format. Correct it if necessary */
12064
12065 switch (row_type) {
12066 case ROW_TYPE_REDUNDANT:
12067 innodb_row_format = REC_FORMAT_REDUNDANT;
12068 break;
12069 case ROW_TYPE_COMPACT:
12070 innodb_row_format = REC_FORMAT_COMPACT;
12071 break;
12072 case ROW_TYPE_COMPRESSED:
12073 if (is_temp) {
12074 push_warning_printf(
12075 m_thd, Sql_condition::WARN_LEVEL_WARN,
12076 ER_ILLEGAL_HA_CREATE_OPTION,
12077 "InnoDB: ROW_FORMAT=%s is ignored for"
12078 " TEMPORARY TABLE.",
12079 get_row_format_name(row_type));
12080 } else if (!m_allow_file_per_table) {
12081 push_warning_printf(
12082 m_thd, Sql_condition::WARN_LEVEL_WARN,
12083 ER_ILLEGAL_HA_CREATE_OPTION,
12084 "InnoDB: ROW_FORMAT=COMPRESSED requires"
12085 " innodb_file_per_table.");
12086 } else {
12087 innodb_row_format = REC_FORMAT_COMPRESSED;
12088 break;
12089 }
12090 zip_allowed = false;
12091 /* Set ROW_FORMAT = COMPACT */
12092 /* fall through */
12093 case ROW_TYPE_NOT_USED:
12094 case ROW_TYPE_FIXED:
12095 case ROW_TYPE_PAGE:
12096 push_warning(
12097 m_thd, Sql_condition::WARN_LEVEL_WARN,
12098 ER_ILLEGAL_HA_CREATE_OPTION,
12099 "InnoDB: assuming ROW_FORMAT=DYNAMIC.");
12100 /* fall through */
12101 case ROW_TYPE_DYNAMIC:
12102 innodb_row_format = REC_FORMAT_DYNAMIC;
12103 break;
12104 case ROW_TYPE_DEFAULT:
12105 ;
12106 }
12107
12108 /* Don't support compressed table when page size > 16k. */
12109 if (zip_allowed && zip_ssize && srv_page_size > UNIV_PAGE_SIZE_DEF) {
12110 push_warning(m_thd, Sql_condition::WARN_LEVEL_WARN,
12111 ER_ILLEGAL_HA_CREATE_OPTION,
12112 "InnoDB: Cannot create a COMPRESSED table"
12113 " when innodb_page_size > 16k."
12114 " Assuming ROW_FORMAT=DYNAMIC.");
12115 zip_allowed = false;
12116 }
12117
12118 ut_ad(!is_temp || !zip_allowed);
12119 ut_ad(!is_temp || innodb_row_format != REC_FORMAT_COMPRESSED);
12120
12121 /* Set the table flags */
12122 if (!zip_allowed) {
12123 zip_ssize = 0;
12124 }
12125
12126 if (is_temp) {
12127 m_flags2 |= DICT_TF2_TEMPORARY;
12128 } else if (m_use_file_per_table) {
12129 m_flags2 |= DICT_TF2_USE_FILE_PER_TABLE;
12130 }
12131
12132 ulint level = ulint(options->page_compression_level);
12133 if (!level) {
12134 level = page_zip_level;
12135 if (!level && options->page_compressed) {
12136 push_warning_printf(
12137 m_thd, Sql_condition::WARN_LEVEL_WARN,
12138 ER_ILLEGAL_HA_CREATE_OPTION,
12139 "InnoDB: PAGE_COMPRESSED requires"
12140 " PAGE_COMPRESSION_LEVEL or"
12141 " innodb_compression_level > 0");
12142 DBUG_RETURN(false);
12143 }
12144 }
12145
12146 /* Set the table flags */
12147 dict_tf_set(&m_flags, innodb_row_format, zip_ssize,
12148 m_use_data_dir, options->page_compressed, level);
12149
12150 if (m_form->s->table_type == TABLE_TYPE_SEQUENCE) {
12151 m_flags |= DICT_TF_MASK_NO_ROLLBACK;
12152 }
12153
12154 /* Set the flags2 when create table or alter tables */
12155 m_flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
12156 DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
12157 m_flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;);
12158
12159 DBUG_RETURN(true);
12160 }
12161
12162 /** Parse MERGE_THRESHOLD value from the string.
12163 @param[in] thd connection
12164 @param[in] str string which might include 'MERGE_THRESHOLD='
12165 @return value parsed. 0 means not found or invalid value. */
12166 static
12167 ulint
innobase_parse_merge_threshold(THD * thd,const char * str)12168 innobase_parse_merge_threshold(
12169 THD* thd,
12170 const char* str)
12171 {
12172 static const char* label = "MERGE_THRESHOLD=";
12173 static const size_t label_len = strlen(label);
12174 const char* pos = str;
12175
12176 pos = strstr(str, label);
12177
12178 if (pos == NULL) {
12179 return(0);
12180 }
12181
12182 pos += label_len;
12183
12184 lint ret = atoi(pos);
12185
12186 if (ret > 0 && ret <= 50) {
12187 return(static_cast<ulint>(ret));
12188 }
12189
12190 push_warning_printf(
12191 thd, Sql_condition::WARN_LEVEL_WARN,
12192 ER_ILLEGAL_HA_CREATE_OPTION,
12193 "InnoDB: Invalid value for MERGE_THRESHOLD in the CREATE TABLE"
12194 " statement. The value is ignored.");
12195
12196 return(0);
12197 }
12198
12199 /** Parse hint for table and its indexes, and update the information
12200 in dictionary.
12201 @param[in] thd connection
12202 @param[in,out] table target table
12203 @param[in] table_share table definition */
12204 void
innobase_parse_hint_from_comment(THD * thd,dict_table_t * table,const TABLE_SHARE * table_share)12205 innobase_parse_hint_from_comment(
12206 THD* thd,
12207 dict_table_t* table,
12208 const TABLE_SHARE* table_share)
12209 {
12210 ulint merge_threshold_table;
12211 ulint merge_threshold_index[MAX_KEY];
12212 bool is_found[MAX_KEY];
12213
12214 if (table_share->comment.str != NULL) {
12215 merge_threshold_table
12216 = innobase_parse_merge_threshold(
12217 thd, table_share->comment.str);
12218 } else {
12219 merge_threshold_table = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
12220 }
12221
12222 if (merge_threshold_table == 0) {
12223 merge_threshold_table = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
12224 }
12225
12226 for (uint i = 0; i < table_share->keys; i++) {
12227 KEY* key_info = &table_share->key_info[i];
12228
12229 ut_ad(i < sizeof(merge_threshold_index)
12230 / sizeof(merge_threshold_index[0]));
12231
12232 if (key_info->flags & HA_USES_COMMENT
12233 && key_info->comment.str != NULL) {
12234 merge_threshold_index[i]
12235 = innobase_parse_merge_threshold(
12236 thd, key_info->comment.str);
12237 } else {
12238 merge_threshold_index[i] = merge_threshold_table;
12239 }
12240
12241 if (merge_threshold_index[i] == 0) {
12242 merge_threshold_index[i] = merge_threshold_table;
12243 }
12244 }
12245
12246 /* update SYS_INDEX table */
12247 if (!table->is_temporary()) {
12248 for (uint i = 0; i < table_share->keys; i++) {
12249 is_found[i] = false;
12250 }
12251
12252 for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
12253 index != NULL;
12254 index = UT_LIST_GET_NEXT(indexes, index)) {
12255
12256 if (dict_index_is_auto_gen_clust(index)) {
12257
12258 /* GEN_CLUST_INDEX should use
12259 merge_threshold_table */
12260 dict_index_set_merge_threshold(
12261 index, merge_threshold_table);
12262 continue;
12263 }
12264
12265 for (uint i = 0; i < table_share->keys; i++) {
12266 if (is_found[i]) {
12267 continue;
12268 }
12269
12270 KEY* key_info = &table_share->key_info[i];
12271
12272 if (innobase_strcasecmp(
12273 index->name, key_info->name.str) == 0) {
12274
12275 dict_index_set_merge_threshold(
12276 index,
12277 merge_threshold_index[i]);
12278 is_found[i] = true;
12279 break;
12280 }
12281 }
12282 }
12283 }
12284
12285 for (uint i = 0; i < table_share->keys; i++) {
12286 is_found[i] = false;
12287 }
12288
12289 /* update in memory */
12290 for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
12291 index != NULL;
12292 index = UT_LIST_GET_NEXT(indexes, index)) {
12293
12294 if (dict_index_is_auto_gen_clust(index)) {
12295
12296 /* GEN_CLUST_INDEX should use merge_threshold_table */
12297
12298 /* x-lock index is needed to exclude concurrent
12299 pessimistic tree operations */
12300 rw_lock_x_lock(dict_index_get_lock(index));
12301 index->merge_threshold = merge_threshold_table;
12302 rw_lock_x_unlock(dict_index_get_lock(index));
12303
12304 continue;
12305 }
12306
12307 for (uint i = 0; i < table_share->keys; i++) {
12308 if (is_found[i]) {
12309 continue;
12310 }
12311
12312 KEY* key_info = &table_share->key_info[i];
12313
12314 if (innobase_strcasecmp(
12315 index->name, key_info->name.str) == 0) {
12316
12317 /* x-lock index is needed to exclude concurrent
12318 pessimistic tree operations */
12319 rw_lock_x_lock(dict_index_get_lock(index));
12320 index->merge_threshold
12321 = merge_threshold_index[i];
12322 rw_lock_x_unlock(dict_index_get_lock(index));
12323 is_found[i] = true;
12324
12325 break;
12326 }
12327 }
12328 }
12329 }
12330
12331 /** Set m_use_* flags. */
12332 void
set_tablespace_type(bool table_being_altered_is_file_per_table)12333 create_table_info_t::set_tablespace_type(
12334 bool table_being_altered_is_file_per_table)
12335 {
12336 /** Allow file_per_table for this table either because:
12337 1) the setting innodb_file_per_table=on,
12338 2) the table being altered is currently file_per_table */
12339 m_allow_file_per_table =
12340 m_innodb_file_per_table
12341 || table_being_altered_is_file_per_table;
12342
12343 /* Ignore the current innodb-file-per-table setting if we are
12344 creating a temporary table. */
12345 m_use_file_per_table = m_allow_file_per_table
12346 && !m_create_info->tmp_table();
12347
12348 /* DATA DIRECTORY must have m_use_file_per_table but cannot be
12349 used with TEMPORARY tables. */
12350 m_use_data_dir =
12351 m_use_file_per_table
12352 && m_create_info->data_file_name
12353 && m_create_info->data_file_name[0]
12354 && my_use_symdir;
12355 }
12356
12357 /** Initialize the create_table_info_t object.
12358 @return error number */
12359 int
initialize()12360 create_table_info_t::initialize()
12361 {
12362 DBUG_ENTER("create_table_info_t::initialize");
12363
12364 ut_ad(m_thd != NULL);
12365 ut_ad(m_create_info != NULL);
12366
12367 if (m_form->s->fields > REC_MAX_N_USER_FIELDS) {
12368 DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS);
12369 }
12370
12371 /* Check for name conflicts (with reserved name) for
12372 any user indices to be created. */
12373 if (innobase_index_name_is_reserved(m_thd, m_form->key_info,
12374 m_form->s->keys)) {
12375 DBUG_RETURN(HA_ERR_WRONG_INDEX);
12376 }
12377
12378 /* Get the transaction associated with the current thd, or create one
12379 if not yet created */
12380
12381 check_trx_exists(m_thd);
12382
12383 DBUG_RETURN(0);
12384 }
12385
12386
12387 /** Check if a virtual column is part of a fulltext or spatial index. */
12388 bool
gcols_in_fulltext_or_spatial()12389 create_table_info_t::gcols_in_fulltext_or_spatial()
12390 {
12391 for (ulint i = 0; i < m_form->s->keys; i++) {
12392 const KEY* key = m_form->key_info + i;
12393 if (!(key->flags & (HA_SPATIAL | HA_FULLTEXT))) {
12394 continue;
12395 }
12396 for (ulint j = 0; j < key->user_defined_key_parts; j++) {
12397 /* We do not support special (Fulltext or
12398 Spatial) index on virtual columns */
12399 if (!key->key_part[j].field->stored_in_db()) {
12400 my_error(ER_UNSUPPORTED_ACTION_ON_GENERATED_COLUMN, MYF(0));
12401 return true;
12402 }
12403 }
12404 }
12405 return false;
12406 }
12407
12408
12409 /** Prepare to create a new table to an InnoDB database.
12410 @param[in] name Table name
12411 @return error number */
prepare_create_table(const char * name,bool strict)12412 int create_table_info_t::prepare_create_table(const char* name, bool strict)
12413 {
12414 DBUG_ENTER("prepare_create_table");
12415
12416 ut_ad(m_thd != NULL);
12417 ut_ad(m_create_info != NULL);
12418
12419 set_tablespace_type(false);
12420
12421 normalize_table_name(m_table_name, name);
12422
12423 /* Validate table options not handled by the SQL-parser */
12424 if (check_table_options()) {
12425 DBUG_RETURN(HA_WRONG_CREATE_OPTION);
12426 }
12427
12428 /* Validate the create options if innodb_strict_mode is set.
12429 Do not use the regular message for ER_ILLEGAL_HA_CREATE_OPTION
12430 because InnoDB might actually support the option, but not under
12431 the current conditions. The messages revealing the specific
12432 problems are reported inside this function. */
12433 if (strict && create_options_are_invalid()) {
12434 DBUG_RETURN(HA_WRONG_CREATE_OPTION);
12435 }
12436
12437 /* Create the table flags and flags2 */
12438 if (!innobase_table_flags()) {
12439 DBUG_RETURN(HA_WRONG_CREATE_OPTION);
12440 }
12441
12442 if (high_level_read_only) {
12443 DBUG_RETURN(HA_ERR_TABLE_READONLY);
12444 }
12445
12446 if (gcols_in_fulltext_or_spatial()) {
12447 DBUG_RETURN(HA_ERR_UNSUPPORTED);
12448 }
12449
12450 for (uint i = 0; i < m_form->s->keys; i++) {
12451 const size_t max_field_len
12452 = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(m_flags);
12453 const KEY& key = m_form->key_info[i];
12454
12455 if (key.algorithm == HA_KEY_ALG_FULLTEXT) {
12456 continue;
12457 }
12458
12459 if (too_big_key_part_length(max_field_len, key)) {
12460 DBUG_RETURN(convert_error_code_to_mysql(
12461 DB_TOO_BIG_INDEX_COL, m_flags, NULL));
12462 }
12463 }
12464
12465 DBUG_RETURN(parse_table_name(name));
12466 }
12467
12468 /** Create the internal innodb table.
12469 @param create_fk whether to add FOREIGN KEY constraints */
create_table(bool create_fk)12470 int create_table_info_t::create_table(bool create_fk)
12471 {
12472 int error;
12473 int primary_key_no;
12474 uint i;
12475
12476 DBUG_ENTER("create_table");
12477
12478 /* Look for a primary key */
12479 primary_key_no = (m_form->s->primary_key != MAX_KEY ?
12480 (int) m_form->s->primary_key : -1);
12481
12482 /* Our function innobase_get_mysql_key_number_for_index assumes
12483 the primary key is always number 0, if it exists */
12484 ut_a(primary_key_no == -1 || primary_key_no == 0);
12485
12486 error = create_table_def();
12487
12488 if (error) {
12489 DBUG_RETURN(error);
12490 }
12491
12492 DBUG_ASSERT(m_drop_before_rollback
12493 == !(m_flags2 & DICT_TF2_TEMPORARY));
12494
12495 /* Create the keys */
12496
12497 if (m_form->s->keys == 0 || primary_key_no == -1) {
12498 /* Create an index which is used as the clustered index;
12499 order the rows by their row id which is internally generated
12500 by InnoDB */
12501 ulint flags = m_table->flags;
12502 dict_index_t* index = dict_mem_index_create(
12503 m_table, innobase_index_reserve_name,
12504 DICT_CLUSTERED, 0);
12505 error = convert_error_code_to_mysql(
12506 row_create_index_for_mysql(index, m_trx, NULL),
12507 flags, m_thd);
12508 if (error) {
12509 DBUG_RETURN(error);
12510 }
12511 }
12512
12513 if (primary_key_no != -1) {
12514 /* In InnoDB the clustered index must always be created
12515 first */
12516 if ((error = create_index(m_trx, m_form, m_table,
12517 (uint) primary_key_no))) {
12518 DBUG_RETURN(error);
12519 }
12520 }
12521
12522 /* Create the ancillary tables that are common to all FTS indexes on
12523 this table. */
12524 if (m_flags2 & DICT_TF2_FTS) {
12525 fts_doc_id_index_enum ret;
12526
12527 /* Check whether there already exists FTS_DOC_ID_INDEX */
12528 ret = innobase_fts_check_doc_id_index_in_def(
12529 m_form->s->keys, m_form->key_info);
12530
12531 switch (ret) {
12532 case FTS_INCORRECT_DOC_ID_INDEX:
12533 push_warning_printf(m_thd,
12534 Sql_condition::WARN_LEVEL_WARN,
12535 ER_WRONG_NAME_FOR_INDEX,
12536 " InnoDB: Index name %s is reserved"
12537 " for the unique index on"
12538 " FTS_DOC_ID column for FTS"
12539 " Document ID indexing"
12540 " on table %s. Please check"
12541 " the index definition to"
12542 " make sure it is of correct"
12543 " type\n",
12544 FTS_DOC_ID_INDEX_NAME,
12545 m_table->name.m_name);
12546
12547 if (m_table->fts) {
12548 fts_free(m_table);
12549 }
12550
12551 my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
12552 FTS_DOC_ID_INDEX_NAME);
12553 DBUG_RETURN(-1);
12554 case FTS_EXIST_DOC_ID_INDEX:
12555 case FTS_NOT_EXIST_DOC_ID_INDEX:
12556 break;
12557 }
12558
12559 dberr_t err = fts_create_common_tables(
12560 m_trx, m_table,
12561 (ret == FTS_EXIST_DOC_ID_INDEX));
12562
12563 error = convert_error_code_to_mysql(err, 0, NULL);
12564
12565 if (error) {
12566 DBUG_RETURN(error);
12567 }
12568 }
12569
12570 for (i = 0; i < m_form->s->keys; i++) {
12571 if (i != uint(primary_key_no)
12572 && (error = create_index(m_trx, m_form, m_table, i))) {
12573 DBUG_RETURN(error);
12574 }
12575 }
12576
12577 /* Cache all the FTS indexes on this table in the FTS specific
12578 structure. They are used for FTS indexed column update handling. */
12579 if (m_flags2 & DICT_TF2_FTS) {
12580 fts_t* fts = m_table->fts;
12581
12582 ut_a(fts != NULL);
12583
12584 dict_table_get_all_fts_indexes(m_table, fts->indexes);
12585 }
12586
12587 size_t stmt_len;
12588 if (const char* stmt = innobase_get_stmt_unsafe(m_thd, &stmt_len)) {
12589 dberr_t err = create_fk
12590 ? dict_create_foreign_constraints(
12591 m_trx, stmt, stmt_len, m_table_name,
12592 m_flags2 & DICT_TF2_TEMPORARY)
12593 : DB_SUCCESS;
12594 if (err == DB_SUCCESS) {
12595 /* Check that also referencing constraints are ok */
12596 dict_names_t fk_tables;
12597 err = dict_load_foreigns(m_table_name, NULL,
12598 false, true,
12599 DICT_ERR_IGNORE_NONE,
12600 fk_tables);
12601 while (err == DB_SUCCESS && !fk_tables.empty()) {
12602 dict_load_table(fk_tables.front(),
12603 DICT_ERR_IGNORE_NONE);
12604 fk_tables.pop_front();
12605 }
12606 }
12607
12608 switch (err) {
12609 case DB_PARENT_NO_INDEX:
12610 push_warning_printf(
12611 m_thd, Sql_condition::WARN_LEVEL_WARN,
12612 HA_ERR_CANNOT_ADD_FOREIGN,
12613 "Create table '%s' with foreign key constraint"
12614 " failed. There is no index in the referenced"
12615 " table where the referenced columns appear"
12616 " as the first columns.\n", m_table_name);
12617 break;
12618
12619 case DB_CHILD_NO_INDEX:
12620 push_warning_printf(
12621 m_thd, Sql_condition::WARN_LEVEL_WARN,
12622 HA_ERR_CANNOT_ADD_FOREIGN,
12623 "Create table '%s' with foreign key constraint"
12624 " failed. There is no index in the referencing"
12625 " table where referencing columns appear"
12626 " as the first columns.\n", m_table_name);
12627 break;
12628 case DB_NO_FK_ON_S_BASE_COL:
12629 push_warning_printf(
12630 m_thd, Sql_condition::WARN_LEVEL_WARN,
12631 HA_ERR_CANNOT_ADD_FOREIGN,
12632 "Create table '%s' with foreign key constraint"
12633 " failed. Cannot add foreign key constraint"
12634 " placed on the base column of stored"
12635 " column. \n",
12636 m_table_name);
12637 default:
12638 break;
12639 }
12640
12641 if (err != DB_SUCCESS) {
12642 DBUG_RETURN(convert_error_code_to_mysql(
12643 err, m_flags, NULL));
12644 }
12645 }
12646
12647 /* In TRUNCATE TABLE, we will merely warn about the maximum
12648 row size being too large. */
12649 if (!row_size_is_acceptable(*m_table, create_fk)) {
12650 DBUG_RETURN(convert_error_code_to_mysql(
12651 DB_TOO_BIG_RECORD, m_flags, NULL));
12652 }
12653
12654 DBUG_RETURN(0);
12655 }
12656
row_size_is_acceptable(const dict_table_t & table,bool strict) const12657 bool create_table_info_t::row_size_is_acceptable(
12658 const dict_table_t &table, bool strict) const
12659 {
12660 for (dict_index_t *index= dict_table_get_first_index(&table); index;
12661 index= dict_table_get_next_index(index))
12662 if (!row_size_is_acceptable(*index, strict))
12663 return false;
12664 return true;
12665 }
12666
12667 /* FIXME: row size check has some flaws and should be improved */
record_size_info() const12668 dict_index_t::record_size_info_t dict_index_t::record_size_info() const
12669 {
12670 ut_ad(!(type & DICT_FTS));
12671
12672 /* maximum allowed size of a node pointer record */
12673 ulint page_ptr_max;
12674 const bool comp= table->not_redundant();
12675 /* table->space == NULL after DISCARD TABLESPACE */
12676 const ulint zip_size= dict_tf_get_zip_size(table->flags);
12677 record_size_info_t result;
12678
12679 if (zip_size && zip_size < srv_page_size)
12680 {
12681 /* On a ROW_FORMAT=COMPRESSED page, two records must fit in the
12682 uncompressed page modification log. On compressed pages
12683 with size.physical() == univ_page_size.physical(),
12684 this limit will never be reached. */
12685 ut_ad(comp);
12686 /* The maximum allowed record size is the size of
12687 an empty page, minus a byte for recoding the heap
12688 number in the page modification log. The maximum
12689 allowed node pointer size is half that. */
12690 result.max_leaf_size= page_zip_empty_size(n_fields, zip_size);
12691 if (result.max_leaf_size)
12692 {
12693 result.max_leaf_size--;
12694 }
12695 page_ptr_max= result.max_leaf_size / 2;
12696 /* On a compressed page, there is a two-byte entry in
12697 the dense page directory for every record. But there
12698 is no record header. */
12699 result.shortest_size= 2;
12700 }
12701 else
12702 {
12703 /* The maximum allowed record size is half a B-tree
12704 page(16k for 64k page size). No additional sparse
12705 page directory entry will be generated for the first
12706 few user records. */
12707 result.max_leaf_size= (comp || srv_page_size < UNIV_PAGE_SIZE_MAX)
12708 ? page_get_free_space_of_empty(comp) / 2
12709 : REDUNDANT_REC_MAX_DATA_SIZE;
12710
12711 page_ptr_max= result.max_leaf_size;
12712 /* Each record has a header. */
12713 result.shortest_size= comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES;
12714 }
12715
12716 if (comp)
12717 {
12718 /* Include the "null" flags in the
12719 maximum possible record size. */
12720 result.shortest_size+= UT_BITS_IN_BYTES(n_nullable);
12721 }
12722 else
12723 {
12724 /* For each column, include a 2-byte offset and a
12725 "null" flag. The 1-byte format is only used in short
12726 records that do not contain externally stored columns.
12727 Such records could never exceed the page limit, even
12728 when using the 2-byte format. */
12729 result.shortest_size+= 2 * n_fields;
12730 }
12731
12732 const ulint max_local_len= table->get_overflow_field_local_len();
12733
12734 /* Compute the maximum possible record size. */
12735 for (unsigned i= 0; i < n_fields; i++)
12736 {
12737 const dict_field_t &f= fields[i];
12738 const dict_col_t &col= *f.col;
12739
12740 /* In dtuple_convert_big_rec(), variable-length columns
12741 that are longer than BTR_EXTERN_LOCAL_STORED_MAX_SIZE
12742 may be chosen for external storage.
12743
12744 Fixed-length columns, and all columns of secondary
12745 index records are always stored inline. */
12746
12747 /* Determine the maximum length of the index field.
12748 The field_ext_max_size should be computed as the worst
12749 case in rec_get_converted_size_comp() for
12750 REC_STATUS_ORDINARY records. */
12751
12752 size_t field_max_size= dict_col_get_fixed_size(&col, comp);
12753 if (field_max_size && f.fixed_len != 0)
12754 {
12755 /* dict_index_add_col() should guarantee this */
12756 ut_ad(!f.prefix_len || f.fixed_len == f.prefix_len);
12757 /* Fixed lengths are not encoded
12758 in ROW_FORMAT=COMPACT. */
12759 goto add_field_size;
12760 }
12761
12762 field_max_size= dict_col_get_max_size(&col);
12763
12764 if (f.prefix_len)
12765 {
12766 if (f.prefix_len < field_max_size)
12767 {
12768 field_max_size= f.prefix_len;
12769 }
12770
12771 /* those conditions were copied from dtuple_convert_big_rec()*/
12772 }
12773 else if (field_max_size > max_local_len &&
12774 field_max_size > BTR_EXTERN_LOCAL_STORED_MAX_SIZE &&
12775 DATA_BIG_COL(&col) && dict_index_is_clust(this))
12776 {
12777
12778 /* In the worst case, we have a locally stored
12779 column of BTR_EXTERN_LOCAL_STORED_MAX_SIZE bytes.
12780 The length can be stored in one byte. If the
12781 column were stored externally, the lengths in
12782 the clustered index page would be
12783 BTR_EXTERN_FIELD_REF_SIZE and 2. */
12784 field_max_size= max_local_len;
12785 }
12786
12787 if (comp)
12788 {
12789 /* Add the extra size for ROW_FORMAT=COMPACT.
12790 For ROW_FORMAT=REDUNDANT, these bytes were
12791 added to result.shortest_size before this loop. */
12792 result.shortest_size+= field_max_size < 256 ? 1 : 2;
12793 }
12794 add_field_size:
12795 result.shortest_size+= field_max_size;
12796
12797 /* Check the size limit on leaf pages. */
12798 if (result.shortest_size >= result.max_leaf_size)
12799 {
12800 result.set_too_big(i);
12801 }
12802
12803 /* Check the size limit on non-leaf pages. Records
12804 stored in non-leaf B-tree pages consist of the unique
12805 columns of the record (the key columns of the B-tree)
12806 and a node pointer field. When we have processed the
12807 unique columns, result.shortest_size equals the size of the
12808 node pointer record minus the node pointer column. */
12809 if (i + 1 == dict_index_get_n_unique_in_tree(this) &&
12810 result.shortest_size + REC_NODE_PTR_SIZE >= page_ptr_max)
12811 {
12812 result.set_too_big(i);
12813 }
12814 }
12815
12816 return result;
12817 }
12818
12819 /** Issue a warning that the row is too big. */
ib_warn_row_too_big(THD * thd,const dict_table_t * table)12820 static void ib_warn_row_too_big(THD *thd, const dict_table_t *table)
12821 {
12822 /* FIXME: this row size check should be improved */
12823 /* If prefix is true then a 768-byte prefix is stored
12824 locally for BLOB fields. Refer to dict_table_get_format() */
12825 const bool prefix= !dict_table_has_atomic_blobs(table);
12826
12827 const ulint free_space=
12828 page_get_free_space_of_empty(table->flags & DICT_TF_COMPACT) / 2;
12829
12830 push_warning_printf(
12831 thd, Sql_condition::WARN_LEVEL_WARN, HA_ERR_TO_BIG_ROW,
12832 "Row size too large (> " ULINTPF "). Changing some columns to TEXT"
12833 " or BLOB %smay help. In current row format, BLOB prefix of"
12834 " %d bytes is stored inline.",
12835 free_space,
12836 prefix ? "or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED " : "",
12837 prefix ? DICT_MAX_FIXED_COL_LEN : 0);
12838 }
12839
row_size_is_acceptable(const dict_index_t & index,bool strict) const12840 bool create_table_info_t::row_size_is_acceptable(
12841 const dict_index_t &index, bool strict) const
12842 {
12843 if ((index.type & DICT_FTS) || index.table->is_system_db)
12844 {
12845 /* Ignore system tables check because innodb_table_stats
12846 maximum row size can not fit on 4k page. */
12847 return true;
12848 }
12849
12850 const bool innodb_strict_mode= THDVAR(m_thd, strict_mode);
12851 dict_index_t::record_size_info_t info= index.record_size_info();
12852
12853 if (info.row_is_too_big())
12854 {
12855 ut_ad(info.get_overrun_size() != 0);
12856 ut_ad(info.max_leaf_size != 0);
12857
12858 const size_t idx= info.get_first_overrun_field_index();
12859 const dict_field_t *field= dict_index_get_nth_field(&index, idx);
12860
12861 ut_ad((!field->name) == field->col->is_dropped());
12862 if (innodb_strict_mode || global_system_variables.log_warnings > 2)
12863 {
12864 ib::error_or_warn eow(strict && innodb_strict_mode);
12865 if (field->name)
12866 eow << "Cannot add field " << field->name << " in table ";
12867 else
12868 eow << "Cannot add an instantly dropped column in table ";
12869 eow << index.table->name << " because after adding it, the row size is "
12870 << info.get_overrun_size()
12871 << " which is greater than maximum allowed size ("
12872 << info.max_leaf_size << " bytes) for a record on index leaf page.";
12873 }
12874
12875 if (strict && innodb_strict_mode)
12876 return false;
12877
12878 ib_warn_row_too_big(m_thd, index.table);
12879 }
12880
12881 return true;
12882 }
12883
12884 /** Update a new table in an InnoDB database.
12885 @return error number */
12886 int
create_table_update_dict()12887 create_table_info_t::create_table_update_dict()
12888 {
12889 dict_table_t* innobase_table;
12890
12891 DBUG_ENTER("create_table_update_dict");
12892
12893 innobase_table = dict_table_open_on_name(
12894 m_table_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
12895
12896 DBUG_ASSERT(innobase_table != 0);
12897 if (innobase_table->fts != NULL) {
12898 if (innobase_table->fts_doc_id_index == NULL) {
12899 innobase_table->fts_doc_id_index
12900 = dict_table_get_index_on_name(
12901 innobase_table, FTS_DOC_ID_INDEX_NAME);
12902 DBUG_ASSERT(innobase_table->fts_doc_id_index != NULL);
12903 } else {
12904 DBUG_ASSERT(innobase_table->fts_doc_id_index
12905 == dict_table_get_index_on_name(
12906 innobase_table,
12907 FTS_DOC_ID_INDEX_NAME));
12908 }
12909 }
12910
12911 DBUG_ASSERT((innobase_table->fts == NULL)
12912 == (innobase_table->fts_doc_id_index == NULL));
12913
12914 innobase_copy_frm_flags_from_create_info(innobase_table, m_create_info);
12915
12916 dict_stats_update(innobase_table, DICT_STATS_EMPTY_TABLE);
12917
12918 /* Load server stopword into FTS cache */
12919 if (m_flags2 & DICT_TF2_FTS) {
12920 if (!innobase_fts_load_stopword(innobase_table, NULL, m_thd)) {
12921 dict_table_close(innobase_table, FALSE, FALSE);
12922 srv_active_wake_master_thread();
12923 DBUG_RETURN(-1);
12924 }
12925
12926 mutex_enter(&dict_sys.mutex);
12927 fts_optimize_add_table(innobase_table);
12928 mutex_exit(&dict_sys.mutex);
12929 }
12930
12931 if (const Field* ai = m_form->found_next_number_field) {
12932 ut_ad(ai->stored_in_db());
12933
12934 ib_uint64_t autoinc = m_create_info->auto_increment_value;
12935
12936 if (autoinc == 0) {
12937 autoinc = 1;
12938 }
12939
12940 innobase_table->autoinc_mutex.lock();
12941 dict_table_autoinc_initialize(innobase_table, autoinc);
12942
12943 if (innobase_table->is_temporary()) {
12944 /* AUTO_INCREMENT is not persistent for
12945 TEMPORARY TABLE. Temporary tables are never
12946 evicted. Keep the counter in memory only. */
12947 } else {
12948 const unsigned col_no = innodb_col_no(ai);
12949
12950 innobase_table->persistent_autoinc = 1
12951 + dict_table_get_nth_col_pos(
12952 innobase_table, col_no, NULL);
12953
12954 /* Persist the "last used" value, which
12955 typically is AUTO_INCREMENT - 1.
12956 In btr_create(), the value 0 was already written. */
12957 if (--autoinc) {
12958 btr_write_autoinc(
12959 dict_table_get_first_index(
12960 innobase_table),
12961 autoinc);
12962 }
12963 }
12964
12965 innobase_table->autoinc_mutex.unlock();
12966 }
12967
12968 innobase_parse_hint_from_comment(m_thd, innobase_table, m_form->s);
12969
12970 dict_table_close(innobase_table, FALSE, FALSE);
12971 DBUG_RETURN(0);
12972 }
12973
12974 /** Allocate a new trx. */
12975 void
allocate_trx()12976 create_table_info_t::allocate_trx()
12977 {
12978 m_trx = innobase_trx_allocate(m_thd);
12979
12980 m_trx->will_lock = true;
12981 m_trx->ddl = true;
12982 }
12983
12984 /** Create a new table to an InnoDB database.
12985 @param[in] name Table name, format: "db/table_name".
12986 @param[in] form Table format; columns and index information.
12987 @param[in] create_info Create info (including create statement string).
12988 @param[in] file_per_table whether to create .ibd file
12989 @param[in,out] trx dictionary transaction, or NULL to create new
12990 @return 0 if success else error number. */
12991 inline int
create(const char * name,TABLE * form,HA_CREATE_INFO * create_info,bool file_per_table,trx_t * trx)12992 ha_innobase::create(
12993 const char* name,
12994 TABLE* form,
12995 HA_CREATE_INFO* create_info,
12996 bool file_per_table,
12997 trx_t* trx)
12998 {
12999 int error;
13000 char norm_name[FN_REFLEN]; /* {database}/{tablename} */
13001 char remote_path[FN_REFLEN]; /* Absolute path of table */
13002
13003 DBUG_ENTER("ha_innobase::create");
13004
13005 DBUG_ASSERT(form->s == table_share);
13006 DBUG_ASSERT(table_share->table_type == TABLE_TYPE_SEQUENCE
13007 || table_share->table_type == TABLE_TYPE_NORMAL);
13008
13009 create_table_info_t info(ha_thd(),
13010 form,
13011 create_info,
13012 norm_name,
13013 remote_path,
13014 file_per_table, trx);
13015
13016 if ((error = info.initialize())
13017 || (error = info.prepare_create_table(name, !trx))) {
13018 if (trx) {
13019 trx_rollback_for_mysql(trx);
13020 row_mysql_unlock_data_dictionary(trx);
13021 }
13022 DBUG_RETURN(error);
13023 }
13024
13025 const bool own_trx = !trx;
13026
13027 if (own_trx) {
13028 info.allocate_trx();
13029 trx = info.trx();
13030 /* Latch the InnoDB data dictionary exclusively so that no deadlocks
13031 or lock waits can happen in it during a table create operation.
13032 Drop table etc. do this latching in row0mysql.cc. */
13033 row_mysql_lock_data_dictionary(trx);
13034 DBUG_ASSERT(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
13035 }
13036
13037 if ((error = info.create_table(own_trx))) {
13038 /* Drop the being-created table before rollback,
13039 so that rollback can possibly rename back a table
13040 that could have been renamed before the failed creation. */
13041 if (info.drop_before_rollback()) {
13042 trx->error_state = DB_SUCCESS;
13043 row_drop_table_for_mysql(info.table_name(),
13044 trx, SQLCOM_TRUNCATE, true,
13045 false);
13046 }
13047 trx_rollback_for_mysql(trx);
13048 row_mysql_unlock_data_dictionary(trx);
13049 goto func_exit;
13050 }
13051
13052 innobase_commit_low(trx);
13053 row_mysql_unlock_data_dictionary(trx);
13054
13055 /* Flush the log to reduce probability that the .frm files and
13056 the InnoDB data dictionary get out-of-sync if the user runs
13057 with innodb_flush_log_at_trx_commit = 0 */
13058 log_buffer_flush_to_disk();
13059
13060 ut_ad(!srv_read_only_mode);
13061
13062 error = info.create_table_update_dict();
13063
13064 func_exit:
13065 if (own_trx) {
13066 trx->free();
13067 }
13068
13069 /* Tell the InnoDB server that there might be work for
13070 utility threads: */
13071
13072 srv_active_wake_master_thread();
13073
13074 DBUG_RETURN(error);
13075 }
13076
13077 /** Create a new table to an InnoDB database.
13078 @param[in] name Table name, format: "db/table_name".
13079 @param[in] form Table format; columns and index information.
13080 @param[in] create_info Create info (including create statement string).
13081 @return 0 if success else error number. */
13082 int
create(const char * name,TABLE * form,HA_CREATE_INFO * create_info)13083 ha_innobase::create(
13084 const char* name,
13085 TABLE* form,
13086 HA_CREATE_INFO* create_info)
13087 {
13088 return create(name, form, create_info, srv_file_per_table);
13089 }
13090
13091 /*****************************************************************//**
13092 Discards or imports an InnoDB tablespace.
13093 @return 0 == success, -1 == error */
13094
13095 int
discard_or_import_tablespace(my_bool discard)13096 ha_innobase::discard_or_import_tablespace(
13097 /*======================================*/
13098 my_bool discard) /*!< in: TRUE if discard, else import */
13099 {
13100
13101 DBUG_ENTER("ha_innobase::discard_or_import_tablespace");
13102
13103 ut_a(m_prebuilt->trx != NULL);
13104 ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
13105 ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
13106
13107 if (high_level_read_only) {
13108 DBUG_RETURN(HA_ERR_TABLE_READONLY);
13109 }
13110
13111 if (m_prebuilt->table->is_temporary()) {
13112 ib_senderrf(
13113 m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
13114 ER_CANNOT_DISCARD_TEMPORARY_TABLE);
13115
13116 DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
13117 }
13118
13119 if (m_prebuilt->table->space == fil_system.sys_space) {
13120 ib_senderrf(
13121 m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
13122 ER_TABLE_IN_SYSTEM_TABLESPACE,
13123 m_prebuilt->table->name.m_name);
13124
13125 DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
13126 }
13127
13128 trx_start_if_not_started(m_prebuilt->trx, true);
13129
13130 /* Obtain an exclusive lock on the table. */
13131 dberr_t err = row_mysql_lock_table(
13132 m_prebuilt->trx, m_prebuilt->table, LOCK_X,
13133 discard ? "setting table lock for DISCARD TABLESPACE"
13134 : "setting table lock for IMPORT TABLESPACE");
13135
13136 if (err != DB_SUCCESS) {
13137 /* unable to lock the table: do nothing */
13138 } else if (discard) {
13139
13140 /* Discarding an already discarded tablespace should be an
13141 idempotent operation. Also, if the .ibd file is missing the
13142 user may want to set the DISCARD flag in order to IMPORT
13143 a new tablespace. */
13144
13145 if (!m_prebuilt->table->is_readable()) {
13146 ib_senderrf(
13147 m_prebuilt->trx->mysql_thd,
13148 IB_LOG_LEVEL_WARN, ER_TABLESPACE_MISSING,
13149 m_prebuilt->table->name.m_name);
13150 }
13151
13152 err = row_discard_tablespace_for_mysql(
13153 m_prebuilt->table->name.m_name, m_prebuilt->trx);
13154
13155 } else if (m_prebuilt->table->is_readable()) {
13156 /* Commit the transaction in order to
13157 release the table lock. */
13158 trx_commit_for_mysql(m_prebuilt->trx);
13159
13160 ib::error() << "Unable to import tablespace "
13161 << m_prebuilt->table->name << " because it already"
13162 " exists. Please DISCARD the tablespace"
13163 " before IMPORT.";
13164 ib_senderrf(
13165 m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
13166 ER_TABLESPACE_EXISTS, m_prebuilt->table->name.m_name);
13167
13168 DBUG_RETURN(HA_ERR_TABLE_EXIST);
13169 } else {
13170 err = row_import_for_mysql(m_prebuilt->table, m_prebuilt);
13171
13172 if (err == DB_SUCCESS) {
13173
13174 info(HA_STATUS_TIME
13175 | HA_STATUS_CONST
13176 | HA_STATUS_VARIABLE
13177 | HA_STATUS_AUTO);
13178
13179 fil_crypt_set_encrypt_tables(srv_encrypt_tables);
13180 }
13181 }
13182
13183 /* Commit the transaction in order to release the table lock. */
13184 trx_commit_for_mysql(m_prebuilt->trx);
13185
13186 if (discard || err != DB_SUCCESS) {
13187 DBUG_RETURN(convert_error_code_to_mysql(
13188 err, m_prebuilt->table->flags, NULL));
13189 }
13190
13191 if (dict_stats_is_persistent_enabled(m_prebuilt->table)) {
13192 dberr_t ret;
13193
13194 /* Adjust the persistent statistics. */
13195 ret = dict_stats_update(m_prebuilt->table,
13196 DICT_STATS_RECALC_PERSISTENT);
13197
13198 if (ret != DB_SUCCESS) {
13199 push_warning_printf(
13200 ha_thd(),
13201 Sql_condition::WARN_LEVEL_WARN,
13202 ER_ALTER_INFO,
13203 "Error updating stats for table '%s'"
13204 " after table rebuild: %s",
13205 m_prebuilt->table->name.m_name,
13206 ut_strerr(ret));
13207 }
13208 }
13209
13210 DBUG_RETURN(0);
13211 }
13212
13213 /**
13214 Drops a table from an InnoDB database. Before calling this function,
13215 MySQL calls innobase_commit to commit the transaction of the current user.
13216 Then the current user cannot have locks set on the table. Drop table
13217 operation inside InnoDB will remove all locks any user has on the table
13218 inside InnoDB.
13219 @param[in] name table name
13220 @param[in] sqlcom SQLCOM_DROP_DB, SQLCOM_TRUNCATE, ...
13221 @return error number */
delete_table(const char * name,enum_sql_command sqlcom)13222 inline int ha_innobase::delete_table(const char* name, enum_sql_command sqlcom)
13223 {
13224 dberr_t err;
13225 THD* thd = ha_thd();
13226 char norm_name[FN_REFLEN];
13227
13228 DBUG_ENTER("ha_innobase::delete_table");
13229
13230 DBUG_EXECUTE_IF(
13231 "test_normalize_table_name_low",
13232 test_normalize_table_name_low();
13233 );
13234 DBUG_EXECUTE_IF(
13235 "test_ut_format_name",
13236 test_ut_format_name();
13237 );
13238
13239 /* Strangely, MySQL passes the table name without the '.frm'
13240 extension, in contrast to ::create */
13241 normalize_table_name(norm_name, name);
13242
13243 if (high_level_read_only) {
13244 DBUG_RETURN(HA_ERR_TABLE_READONLY);
13245 }
13246
13247 trx_t* parent_trx = check_trx_exists(thd);
13248
13249 /* Remove the to-be-dropped table from the list of modified tables
13250 by parent_trx. Otherwise we may end up with an orphaned pointer to
13251 the table object from parent_trx::mod_tables. This could happen in:
13252 SET AUTOCOMMIT=0;
13253 CREATE TABLE t (PRIMARY KEY (a)) ENGINE=INNODB SELECT 1 AS a UNION
13254 ALL SELECT 1 AS a; */
13255 trx_mod_tables_t::const_iterator iter;
13256
13257 for (iter = parent_trx->mod_tables.begin();
13258 iter != parent_trx->mod_tables.end();
13259 ++iter) {
13260
13261 dict_table_t* table_to_drop = iter->first;
13262
13263 if (strcmp(norm_name, table_to_drop->name.m_name) == 0) {
13264 parent_trx->mod_tables.erase(table_to_drop);
13265 break;
13266 }
13267 }
13268
13269 trx_t* trx = innobase_trx_allocate(thd);
13270
13271 ulint name_len = strlen(name);
13272
13273 ut_a(name_len < 1000);
13274
13275 trx->will_lock = true;
13276
13277 /* Drop the table in InnoDB */
13278
13279 err = row_drop_table_for_mysql(norm_name, trx, sqlcom);
13280
13281 if (err == DB_TABLE_NOT_FOUND
13282 && innobase_get_lower_case_table_names() == 1) {
13283 char* is_part = is_partition(norm_name);
13284
13285 if (is_part) {
13286 char par_case_name[FN_REFLEN];
13287
13288 #ifndef __WIN__
13289 /* Check for the table using lower
13290 case name, including the partition
13291 separator "P" */
13292 strcpy(par_case_name, norm_name);
13293 innobase_casedn_str(par_case_name);
13294 #else
13295 /* On Windows platfrom, check
13296 whether there exists table name in
13297 system table whose name is
13298 not being normalized to lower case */
13299 normalize_table_name_c_low(
13300 par_case_name, name, FALSE);
13301 #endif
13302 err = row_drop_table_for_mysql(
13303 par_case_name, trx, sqlcom);
13304 }
13305 }
13306
13307 if (err == DB_TABLE_NOT_FOUND) {
13308 /* Test to drop all tables which matches db/tablename + '#'.
13309 Only partitions can have '#' as non-first character in
13310 the table name!
13311
13312 Temporary table names always start with '#', partitions are
13313 the only 'tables' that can have '#' after the first character
13314 and table name must have length > 0. User tables cannot have
13315 '#' since it would be translated to @0023. Therefor this should
13316 only match partitions. */
13317 uint len = (uint) strlen(norm_name);
13318 ulint num_partitions;
13319 ut_a(len < FN_REFLEN);
13320 norm_name[len] = '#';
13321 norm_name[len + 1] = 0;
13322 err = row_drop_database_for_mysql(norm_name, trx,
13323 &num_partitions);
13324 norm_name[len] = 0;
13325 table_name_t tbl_name(norm_name);
13326 if (num_partitions == 0 && !tbl_name.is_temporary()) {
13327 ib::error() << "Table " << tbl_name <<
13328 " does not exist in the InnoDB"
13329 " internal data dictionary though MariaDB is"
13330 " trying to drop it. Have you copied the .frm"
13331 " file of the table to the MariaDB database"
13332 " directory from another database? "
13333 << TROUBLESHOOTING_MSG;
13334 }
13335 if (num_partitions == 0) {
13336 err = DB_TABLE_NOT_FOUND;
13337 }
13338 }
13339
13340 if (err == DB_TABLE_NOT_FOUND
13341 && innobase_get_lower_case_table_names() == 1) {
13342 char* is_part = is_partition(norm_name);
13343
13344 if (is_part != NULL) {
13345 char par_case_name[FN_REFLEN];
13346
13347 #ifndef _WIN32
13348 /* Check for the table using lower
13349 case name, including the partition
13350 separator "P" */
13351 strcpy(par_case_name, norm_name);
13352 innobase_casedn_str(par_case_name);
13353 #else
13354 /* On Windows platfrom, check
13355 whether there exists table name in
13356 system table whose name is
13357 not being normalized to lower case */
13358 create_table_info_t::normalize_table_name_low(
13359 par_case_name, name, FALSE);
13360 #endif /* _WIN32 */
13361 err = row_drop_table_for_mysql(
13362 par_case_name, trx, sqlcom, true);
13363 }
13364 }
13365
13366 ut_ad(!srv_read_only_mode);
13367 /* Flush the log to reduce probability that the .frm files and
13368 the InnoDB data dictionary get out-of-sync if the user runs
13369 with innodb_flush_log_at_trx_commit = 0 */
13370
13371 log_buffer_flush_to_disk();
13372
13373 innobase_commit_low(trx);
13374
13375 trx->free();
13376
13377 DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL));
13378 }
13379
13380 /** Drop an InnoDB table.
13381 @param[in] name table name
13382 @return error number */
delete_table(const char * name)13383 int ha_innobase::delete_table(const char* name)
13384 {
13385 enum_sql_command sqlcom = enum_sql_command(thd_sql_command(ha_thd()));
13386 /* SQLCOM_TRUNCATE should be passed via ha_innobase::truncate() only.
13387
13388 On client disconnect, when dropping temporary tables, the
13389 previous sqlcom would not be overwritten. In such a case, we
13390 will have thd_kill_level() != NOT_KILLED, !m_prebuilt can
13391 hold, and sqlcom could be anything, including TRUNCATE.
13392
13393 The sqlcom only matters for persistent tables; no persistent
13394 metadata or FOREIGN KEY metadata is kept for temporary
13395 tables. Therefore, we relax the assertion. If there is a bug
13396 that slips through this assertion due to !m_prebuilt, the
13397 worst impact should be that on DROP TABLE of a persistent
13398 table, FOREIGN KEY constraints will be ignored and their
13399 metadata will not be removed. */
13400 DBUG_ASSERT(sqlcom != SQLCOM_TRUNCATE
13401 || (thd_kill_level(ha_thd()) != THD_IS_NOT_KILLED
13402 && (!m_prebuilt
13403 || m_prebuilt->table->is_temporary())));
13404 return delete_table(name, sqlcom);
13405 }
13406
13407 /** Remove all tables in the named database inside InnoDB.
13408 @param[in] hton handlerton from InnoDB
13409 @param[in] path Database path; Inside InnoDB the name of the last
13410 directory in the path is used as the database name.
13411 For example, in 'mysql/data/test' the database name is 'test'. */
13412
13413 static
13414 void
innobase_drop_database(handlerton * hton,char * path)13415 innobase_drop_database(
13416 handlerton* hton,
13417 char* path)
13418 {
13419 char* namebuf;
13420
13421 /* Get the transaction associated with the current thd, or create one
13422 if not yet created */
13423
13424 DBUG_ASSERT(hton == innodb_hton_ptr);
13425
13426 if (high_level_read_only) {
13427 return;
13428 }
13429
13430 THD* thd = current_thd;
13431
13432 ulint len = 0;
13433 char* ptr = strend(path) - 2;
13434
13435 while (ptr >= path && *ptr != '\\' && *ptr != '/') {
13436 ptr--;
13437 len++;
13438 }
13439
13440 ptr++;
13441 namebuf = (char*) my_malloc(/*PSI_INSTRUMENT_ME,*/ (uint) len + 2, MYF(0));
13442
13443 memcpy(namebuf, ptr, len);
13444 namebuf[len] = '/';
13445 namebuf[len + 1] = '\0';
13446
13447 #ifdef _WIN32
13448 innobase_casedn_str(namebuf);
13449 #endif /* _WIN32 */
13450
13451 trx_t* trx = innobase_trx_allocate(thd);
13452 trx->will_lock = true;
13453
13454 ulint dummy;
13455
13456 row_drop_database_for_mysql(namebuf, trx, &dummy);
13457
13458 my_free(namebuf);
13459
13460 /* Flush the log to reduce probability that the .frm files and
13461 the InnoDB data dictionary get out-of-sync if the user runs
13462 with innodb_flush_log_at_trx_commit = 0 */
13463
13464 log_buffer_flush_to_disk();
13465
13466 innobase_commit_low(trx);
13467
13468 trx->free();
13469 }
13470
13471 /** Rename an InnoDB table.
13472 @param[in,out] trx InnoDB data dictionary transaction
13473 @param[in] from old table name
13474 @param[in] to new table name
13475 @param[in] commit whether to commit trx (and to enforce FOREIGN KEY)
13476 @return DB_SUCCESS or error code */
innobase_rename_table(trx_t * trx,const char * from,const char * to,bool commit)13477 inline dberr_t innobase_rename_table(trx_t *trx, const char *from,
13478 const char *to, bool commit)
13479 {
13480 dberr_t error;
13481 char norm_to[FN_REFLEN];
13482 char norm_from[FN_REFLEN];
13483
13484 DBUG_ENTER("innobase_rename_table");
13485 DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX
13486 || trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE);
13487
13488 ut_ad(!srv_read_only_mode);
13489
13490 normalize_table_name(norm_to, to);
13491 normalize_table_name(norm_from, from);
13492
13493 DEBUG_SYNC_C("innodb_rename_table_ready");
13494
13495 trx_start_if_not_started(trx, true);
13496 ut_ad(trx->will_lock);
13497
13498 if (commit) {
13499 /* Serialize data dictionary operations with dictionary mutex:
13500 no deadlocks can occur then in these operations. */
13501 row_mysql_lock_data_dictionary(trx);
13502 }
13503
13504 dict_table_t* table = dict_table_open_on_name(
13505 norm_from, TRUE, FALSE, DICT_ERR_IGNORE_FK_NOKEY);
13506
13507 /* Since DICT_BG_YIELD has sleep for 250 milliseconds,
13508 Convert lock_wait_timeout unit from second to 250 milliseconds */
13509 long int lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd) * 4;
13510 if (table != NULL) {
13511 if (commit) {
13512 dict_stats_wait_bg_to_stop_using_table(table, trx);
13513 }
13514 for (dict_index_t* index = dict_table_get_first_index(table);
13515 index != NULL;
13516 index = dict_table_get_next_index(index)) {
13517
13518 if (index->type & DICT_FTS) {
13519 /* Found */
13520 while (index->index_fts_syncing
13521 && !trx_is_interrupted(trx)
13522 && (lock_wait_timeout--) > 0) {
13523 DICT_BG_YIELD(trx);
13524 }
13525 }
13526 }
13527 if (!commit) {
13528 dict_table_close(table, TRUE, FALSE);
13529 }
13530 }
13531
13532 /* FTS sync is in progress. We shall timeout this operation */
13533 if (lock_wait_timeout < 0) {
13534 error = DB_LOCK_WAIT_TIMEOUT;
13535 goto func_exit;
13536 }
13537
13538 error = row_rename_table_for_mysql(norm_from, norm_to, trx, commit,
13539 commit);
13540
13541 if (error != DB_SUCCESS) {
13542 if (error == DB_TABLE_NOT_FOUND
13543 && innobase_get_lower_case_table_names() == 1) {
13544 char* is_part = is_partition(norm_from);
13545
13546 if (is_part) {
13547 char par_case_name[FN_REFLEN];
13548 #ifndef _WIN32
13549 /* Check for the table using lower
13550 case name, including the partition
13551 separator "P" */
13552 strcpy(par_case_name, norm_from);
13553 innobase_casedn_str(par_case_name);
13554 #else
13555 /* On Windows platfrom, check
13556 whether there exists table name in
13557 system table whose name is
13558 not being normalized to lower case */
13559 create_table_info_t::normalize_table_name_low(
13560 par_case_name, from, FALSE);
13561 #endif /* _WIN32 */
13562 trx_start_if_not_started(trx, true);
13563 error = row_rename_table_for_mysql(
13564 par_case_name, norm_to, trx,
13565 true, false);
13566 }
13567 }
13568
13569 if (error == DB_SUCCESS) {
13570 #ifndef _WIN32
13571 sql_print_warning("Rename partition table %s"
13572 " succeeds after converting to lower"
13573 " case. The table may have"
13574 " been moved from a case"
13575 " in-sensitive file system.\n",
13576 norm_from);
13577 #else
13578 sql_print_warning("Rename partition table %s"
13579 " succeeds after skipping the step to"
13580 " lower case the table name."
13581 " The table may have been"
13582 " moved from a case sensitive"
13583 " file system.\n",
13584 norm_from);
13585 #endif /* _WIN32 */
13586 }
13587 }
13588
13589 func_exit:
13590 if (commit) {
13591 if (table) {
13592 table->stats_bg_flag &= ~BG_STAT_SHOULD_QUIT;
13593 dict_table_close(table, TRUE, FALSE);
13594 }
13595 row_mysql_unlock_data_dictionary(trx);
13596 }
13597
13598 /* Flush the log to reduce probability that the .frm
13599 files and the InnoDB data dictionary get out-of-sync
13600 if the user runs with innodb_flush_log_at_trx_commit = 0 */
13601
13602 log_buffer_flush_to_disk();
13603
13604 DBUG_RETURN(error);
13605 }
13606
13607 /** TRUNCATE TABLE
13608 @return error code
13609 @retval 0 on success */
truncate()13610 int ha_innobase::truncate()
13611 {
13612 DBUG_ENTER("ha_innobase::truncate");
13613
13614 if (high_level_read_only) {
13615 DBUG_RETURN(HA_ERR_TABLE_READONLY);
13616 }
13617
13618 update_thd();
13619
13620 HA_CREATE_INFO info;
13621 mem_heap_t* heap = mem_heap_create(1000);
13622 dict_table_t* ib_table = m_prebuilt->table;
13623 const time_t update_time = ib_table->update_time;
13624 const ulint stored_lock = m_prebuilt->stored_select_lock_type;
13625 info.init();
13626 update_create_info_from_table(&info, table);
13627
13628 if (ib_table->is_temporary()) {
13629 info.options|= HA_LEX_CREATE_TMP_TABLE;
13630 } else {
13631 dict_get_and_save_data_dir_path(ib_table, false);
13632 }
13633
13634 char* data_file_name = ib_table->data_dir_path;
13635
13636 if (data_file_name) {
13637 info.data_file_name = data_file_name
13638 = mem_heap_strdup(heap, data_file_name);
13639 }
13640
13641 const char* temp_name = dict_mem_create_temporary_tablename(
13642 heap, ib_table->name.m_name, ib_table->id);
13643 const char* name = mem_heap_strdup(heap, ib_table->name.m_name);
13644 trx_t* trx = innobase_trx_allocate(m_user_thd);
13645 trx->will_lock = true;
13646 trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
13647 row_mysql_lock_data_dictionary(trx);
13648 dict_stats_wait_bg_to_stop_using_table(ib_table, trx);
13649
13650 int err = convert_error_code_to_mysql(
13651 innobase_rename_table(trx, ib_table->name.m_name, temp_name,
13652 false),
13653 ib_table->flags, m_user_thd);
13654 if (err) {
13655 trx_rollback_for_mysql(trx);
13656 row_mysql_unlock_data_dictionary(trx);
13657 } else {
13658 switch (dict_tf_get_rec_format(ib_table->flags)) {
13659 case REC_FORMAT_REDUNDANT:
13660 info.row_type = ROW_TYPE_REDUNDANT;
13661 break;
13662 case REC_FORMAT_COMPACT:
13663 info.row_type = ROW_TYPE_COMPACT;
13664 break;
13665 case REC_FORMAT_COMPRESSED:
13666 info.row_type = ROW_TYPE_COMPRESSED;
13667 break;
13668 case REC_FORMAT_DYNAMIC:
13669 info.row_type = ROW_TYPE_DYNAMIC;
13670 break;
13671 }
13672
13673 err = create(name, table, &info,
13674 ib_table->is_temporary()
13675 || dict_table_is_file_per_table(ib_table), trx);
13676 }
13677
13678 trx->free();
13679
13680 if (!err) {
13681 /* Reopen the newly created table, and drop the
13682 original table that was renamed to temp_name. */
13683
13684 row_prebuilt_t* prebuilt = m_prebuilt;
13685 uchar* upd_buf = m_upd_buf;
13686 ulint upd_buf_size = m_upd_buf_size;
13687 /* Mimic ha_innobase::close(). */
13688 m_prebuilt = NULL;
13689 m_upd_buf = NULL;
13690 m_upd_buf_size = 0;
13691 err = open(name, 0, 0);
13692 if (!err) {
13693 m_prebuilt->stored_select_lock_type = stored_lock;
13694 m_prebuilt->table->update_time = update_time;
13695 row_prebuilt_free(prebuilt, FALSE);
13696 delete_table(temp_name, SQLCOM_TRUNCATE);
13697 my_free(upd_buf);
13698 } else {
13699 /* Revert to the old table before truncation. */
13700 m_prebuilt = prebuilt;
13701 m_upd_buf = upd_buf;
13702 m_upd_buf_size = upd_buf_size;
13703 }
13704 }
13705
13706 mem_heap_free(heap);
13707 DBUG_RETURN(err);
13708 }
13709
13710 /*********************************************************************//**
13711 Renames an InnoDB table.
13712 @return 0 or error code */
13713
13714 int
rename_table(const char * from,const char * to)13715 ha_innobase::rename_table(
13716 /*======================*/
13717 const char* from, /*!< in: old name of the table */
13718 const char* to) /*!< in: new name of the table */
13719 {
13720 THD* thd = ha_thd();
13721
13722 DBUG_ENTER("ha_innobase::rename_table");
13723
13724 if (high_level_read_only) {
13725 ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
13726 DBUG_RETURN(HA_ERR_TABLE_READONLY);
13727 }
13728
13729 trx_t* trx = innobase_trx_allocate(thd);
13730 trx->will_lock = true;
13731 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
13732
13733 dberr_t error = innobase_rename_table(trx, from, to, true);
13734
13735 DEBUG_SYNC(thd, "after_innobase_rename_table");
13736
13737 innobase_commit_low(trx);
13738
13739 trx->free();
13740
13741 if (error == DB_SUCCESS) {
13742 char norm_from[MAX_FULL_NAME_LEN];
13743 char norm_to[MAX_FULL_NAME_LEN];
13744 char errstr[512];
13745 dberr_t ret;
13746
13747 normalize_table_name(norm_from, from);
13748 normalize_table_name(norm_to, to);
13749
13750 ret = dict_stats_rename_table(norm_from, norm_to,
13751 errstr, sizeof(errstr));
13752
13753 if (ret != DB_SUCCESS) {
13754 ib::error() << errstr;
13755
13756 push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
13757 ER_LOCK_WAIT_TIMEOUT, errstr);
13758 }
13759 }
13760
13761 /* Add a special case to handle the Duplicated Key error
13762 and return DB_ERROR instead.
13763 This is to avoid a possible SIGSEGV error from mysql error
13764 handling code. Currently, mysql handles the Duplicated Key
13765 error by re-entering the storage layer and getting dup key
13766 info by calling get_dup_key(). This operation requires a valid
13767 table handle ('row_prebuilt_t' structure) which could no
13768 longer be available in the error handling stage. The suggested
13769 solution is to report a 'table exists' error message (since
13770 the dup key error here is due to an existing table whose name
13771 is the one we are trying to rename to) and return the generic
13772 error code. */
13773 if (error == DB_DUPLICATE_KEY) {
13774 my_error(ER_TABLE_EXISTS_ERROR, MYF(0), to);
13775
13776 error = DB_ERROR;
13777 } else if (error == DB_LOCK_WAIT_TIMEOUT) {
13778 my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0), to);
13779
13780 error = DB_LOCK_WAIT;
13781 }
13782
13783 DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
13784 }
13785
13786 /*********************************************************************//**
13787 Estimates the number of index records in a range.
13788 @return estimated number of rows */
13789
13790 ha_rows
records_in_range(uint keynr,key_range * min_key,key_range * max_key)13791 ha_innobase::records_in_range(
13792 /*==========================*/
13793 uint keynr, /*!< in: index number */
13794 key_range *min_key, /*!< in: start key value of the
13795 range, may also be 0 */
13796 key_range *max_key) /*!< in: range end key val, may
13797 also be 0 */
13798 {
13799 KEY* key;
13800 dict_index_t* index;
13801 dtuple_t* range_start;
13802 dtuple_t* range_end;
13803 ha_rows n_rows;
13804 page_cur_mode_t mode1;
13805 page_cur_mode_t mode2;
13806 mem_heap_t* heap;
13807
13808 DBUG_ENTER("records_in_range");
13809
13810 ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
13811
13812 m_prebuilt->trx->op_info = "estimating records in index range";
13813
13814 active_index = keynr;
13815
13816 key = table->key_info + active_index;
13817
13818 index = innobase_get_index(keynr);
13819
13820 /* There exists possibility of not being able to find requested
13821 index due to inconsistency between MySQL and InoDB dictionary info.
13822 Necessary message should have been printed in innobase_get_index() */
13823 if (!m_prebuilt->table->space) {
13824 n_rows = HA_POS_ERROR;
13825 goto func_exit;
13826 }
13827 if (!index) {
13828 n_rows = HA_POS_ERROR;
13829 goto func_exit;
13830 }
13831 if (index->is_corrupted()) {
13832 n_rows = HA_ERR_INDEX_CORRUPT;
13833 goto func_exit;
13834 }
13835 if (!row_merge_is_index_usable(m_prebuilt->trx, index)) {
13836 n_rows = HA_ERR_TABLE_DEF_CHANGED;
13837 goto func_exit;
13838 }
13839
13840 heap = mem_heap_create(2 * (key->ext_key_parts * sizeof(dfield_t)
13841 + sizeof(dtuple_t)));
13842
13843 range_start = dtuple_create(heap, key->ext_key_parts);
13844 dict_index_copy_types(range_start, index, key->ext_key_parts);
13845
13846 range_end = dtuple_create(heap, key->ext_key_parts);
13847 dict_index_copy_types(range_end, index, key->ext_key_parts);
13848
13849 row_sel_convert_mysql_key_to_innobase(
13850 range_start,
13851 m_prebuilt->srch_key_val1,
13852 m_prebuilt->srch_key_val_len,
13853 index,
13854 (byte*) (min_key ? min_key->key : (const uchar*) 0),
13855 (ulint) (min_key ? min_key->length : 0));
13856
13857 DBUG_ASSERT(min_key
13858 ? range_start->n_fields > 0
13859 : range_start->n_fields == 0);
13860
13861 row_sel_convert_mysql_key_to_innobase(
13862 range_end,
13863 m_prebuilt->srch_key_val2,
13864 m_prebuilt->srch_key_val_len,
13865 index,
13866 (byte*) (max_key ? max_key->key : (const uchar*) 0),
13867 (ulint) (max_key ? max_key->length : 0));
13868
13869 DBUG_ASSERT(max_key
13870 ? range_end->n_fields > 0
13871 : range_end->n_fields == 0);
13872
13873 mode1 = convert_search_mode_to_innobase(
13874 min_key ? min_key->flag : HA_READ_KEY_EXACT);
13875
13876 mode2 = convert_search_mode_to_innobase(
13877 max_key ? max_key->flag : HA_READ_KEY_EXACT);
13878
13879 if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) {
13880
13881 if (dict_index_is_spatial(index)) {
13882 /*Only min_key used in spatial index. */
13883 n_rows = rtr_estimate_n_rows_in_range(
13884 index, range_start, mode1);
13885 } else {
13886 n_rows = btr_estimate_n_rows_in_range(
13887 index, range_start, mode1, range_end, mode2);
13888 }
13889 } else {
13890
13891 n_rows = HA_POS_ERROR;
13892 }
13893
13894 mem_heap_free(heap);
13895
13896 DBUG_EXECUTE_IF(
13897 "print_btr_estimate_n_rows_in_range_return_value",
13898 push_warning_printf(
13899 ha_thd(), Sql_condition::WARN_LEVEL_WARN,
13900 ER_NO_DEFAULT,
13901 "btr_estimate_n_rows_in_range(): %lld",
13902 (longlong) n_rows);
13903 );
13904
13905 func_exit:
13906
13907 m_prebuilt->trx->op_info = (char*)"";
13908
13909 /* The MySQL optimizer seems to believe an estimate of 0 rows is
13910 always accurate and may return the result 'Empty set' based on that.
13911 The accuracy is not guaranteed, and even if it were, for a locking
13912 read we should anyway perform the search to set the next-key lock.
13913 Add 1 to the value to make sure MySQL does not make the assumption! */
13914
13915 if (n_rows == 0) {
13916 n_rows = 1;
13917 }
13918
13919 DBUG_RETURN((ha_rows) n_rows);
13920 }
13921
13922 /*********************************************************************//**
13923 Gives an UPPER BOUND to the number of rows in a table. This is used in
13924 filesort.cc.
13925 @return upper bound of rows */
13926
13927 ha_rows
estimate_rows_upper_bound()13928 ha_innobase::estimate_rows_upper_bound()
13929 /*====================================*/
13930 {
13931 const dict_index_t* index;
13932 ulonglong estimate;
13933 ulonglong local_data_file_length;
13934
13935 DBUG_ENTER("estimate_rows_upper_bound");
13936
13937 /* We do not know if MySQL can call this function before calling
13938 external_lock(). To be safe, update the thd of the current table
13939 handle. */
13940
13941 update_thd(ha_thd());
13942
13943 m_prebuilt->trx->op_info = "calculating upper bound for table rows";
13944
13945 index = dict_table_get_first_index(m_prebuilt->table);
13946
13947 ulint stat_n_leaf_pages = index->stat_n_leaf_pages;
13948
13949 ut_a(stat_n_leaf_pages > 0);
13950
13951 local_data_file_length = ulonglong(stat_n_leaf_pages)
13952 << srv_page_size_shift;
13953
13954 /* Calculate a minimum length for a clustered index record and from
13955 that an upper bound for the number of rows. Since we only calculate
13956 new statistics in row0mysql.cc when a table has grown by a threshold
13957 factor, we must add a safety factor 2 in front of the formula below. */
13958
13959 estimate = 2 * local_data_file_length
13960 / dict_index_calc_min_rec_len(index);
13961
13962 m_prebuilt->trx->op_info = "";
13963
13964 /* Set num_rows less than MERGEBUFF to simulate the case where we do
13965 not have enough space to merge the externally sorted file blocks. */
13966 DBUG_EXECUTE_IF("set_num_rows_lt_MERGEBUFF",
13967 estimate = 2;
13968 DBUG_SET("-d,set_num_rows_lt_MERGEBUFF");
13969 );
13970
13971 DBUG_RETURN((ha_rows) estimate);
13972 }
13973
13974 /*********************************************************************//**
13975 How many seeks it will take to read through the table. This is to be
13976 comparable to the number returned by records_in_range so that we can
13977 decide if we should scan the table or use keys.
13978 @return estimated time measured in disk seeks */
13979
13980 double
scan_time()13981 ha_innobase::scan_time()
13982 /*====================*/
13983 {
13984 /* Since MySQL seems to favor table scans too much over index
13985 searches, we pretend that a sequential read takes the same time
13986 as a random disk read, that is, we do not divide the following
13987 by 10, which would be physically realistic. */
13988
13989 /* The locking below is disabled for performance reasons. Without
13990 it we could end up returning uninitialized value to the caller,
13991 which in the worst case could make some query plan go bogus or
13992 issue a Valgrind warning. */
13993 if (m_prebuilt == NULL) {
13994 /* In case of derived table, Optimizer will try to fetch stat
13995 for table even before table is create or open. In such
13996 cases return default value of 1.
13997 TODO: This will be further improved to return some approximate
13998 estimate but that would also needs pre-population of stats
13999 structure. As of now approach is in sync with MyISAM. */
14000 return(ulonglong2double(stats.data_file_length) / IO_SIZE + 2);
14001 }
14002
14003 ulint stat_clustered_index_size;
14004
14005 ut_a(m_prebuilt->table->stat_initialized);
14006
14007 stat_clustered_index_size =
14008 m_prebuilt->table->stat_clustered_index_size;
14009
14010 return((double) stat_clustered_index_size);
14011 }
14012
14013 /******************************************************************//**
14014 Calculate the time it takes to read a set of ranges through an index
14015 This enables us to optimise reads for clustered indexes.
14016 @return estimated time measured in disk seeks */
14017
14018 double
read_time(uint index,uint ranges,ha_rows rows)14019 ha_innobase::read_time(
14020 /*===================*/
14021 uint index, /*!< in: key number */
14022 uint ranges, /*!< in: how many ranges */
14023 ha_rows rows) /*!< in: estimated number of rows in the ranges */
14024 {
14025 ha_rows total_rows;
14026
14027 if (index != table->s->primary_key) {
14028 /* Not clustered */
14029 return(handler::read_time(index, ranges, rows));
14030 }
14031
14032 /* Assume that the read time is proportional to the scan time for all
14033 rows + at most one seek per range. */
14034
14035 double time_for_scan = scan_time();
14036
14037 if ((total_rows = estimate_rows_upper_bound()) < rows) {
14038
14039 return(time_for_scan);
14040 }
14041
14042 return(ranges + (double) rows / (double) total_rows * time_for_scan);
14043 }
14044
14045 /** Update the system variable with the given value of the InnoDB
14046 buffer pool size.
14047 @param[in] buf_pool_size given value of buffer pool size.*/
14048 void
innodb_set_buf_pool_size(ulonglong buf_pool_size)14049 innodb_set_buf_pool_size(ulonglong buf_pool_size)
14050 {
14051 innobase_buffer_pool_size = buf_pool_size;
14052 }
14053
14054 /*********************************************************************//**
14055 Calculates the key number used inside MySQL for an Innobase index.
14056 @return the key number used inside MySQL */
14057 static
14058 unsigned
innobase_get_mysql_key_number_for_index(const TABLE * table,dict_table_t * ib_table,const dict_index_t * index)14059 innobase_get_mysql_key_number_for_index(
14060 /*====================================*/
14061 const TABLE* table, /*!< in: table in MySQL data
14062 dictionary */
14063 dict_table_t* ib_table,/*!< in: table in InnoDB data
14064 dictionary */
14065 const dict_index_t* index) /*!< in: index */
14066 {
14067 const dict_index_t* ind;
14068 unsigned int i;
14069
14070 /* If index does not belong to the table object of share structure
14071 (ib_table comes from the share structure) search the index->table
14072 object instead */
14073 if (index->table != ib_table) {
14074 i = 0;
14075 ind = dict_table_get_first_index(index->table);
14076
14077 while (index != ind) {
14078 ind = dict_table_get_next_index(ind);
14079 i++;
14080 }
14081
14082 if (dict_index_is_auto_gen_clust(index)) {
14083 ut_a(i > 0);
14084 i--;
14085 }
14086
14087 return(i);
14088 }
14089
14090 /* Directly find matching index with information from mysql TABLE
14091 structure and InnoDB dict_index_t list */
14092 for (i = 0; i < table->s->keys; i++) {
14093 ind = dict_table_get_index_on_name(
14094 ib_table, table->key_info[i].name.str);
14095
14096 if (index == ind) {
14097 return(i);
14098 }
14099 }
14100
14101 /* Loop through each index of the table and lock them */
14102 for (ind = dict_table_get_first_index(ib_table);
14103 ind != NULL;
14104 ind = dict_table_get_next_index(ind)) {
14105 if (index == ind) {
14106 /* Temp index is internal to InnoDB, that is
14107 not present in the MySQL index list, so no
14108 need to print such mismatch warning. */
14109 if (index->is_committed()) {
14110 sql_print_warning(
14111 "Found index %s in InnoDB index list"
14112 " but not its MariaDB index number."
14113 " It could be an InnoDB internal"
14114 " index.",
14115 index->name());
14116 }
14117 return(~0U);
14118 }
14119 }
14120
14121 ut_error;
14122
14123 return(~0U);
14124 }
14125
14126 /*********************************************************************//**
14127 Calculate Record Per Key value. Need to exclude the NULL value if
14128 innodb_stats_method is set to "nulls_ignored"
14129 @return estimated record per key value */
14130 rec_per_key_t
innodb_rec_per_key(dict_index_t * index,ulint i,ha_rows records)14131 innodb_rec_per_key(
14132 /*===============*/
14133 dict_index_t* index, /*!< in: dict_index_t structure */
14134 ulint i, /*!< in: the column we are
14135 calculating rec per key */
14136 ha_rows records) /*!< in: estimated total records */
14137 {
14138 rec_per_key_t rec_per_key;
14139 ib_uint64_t n_diff;
14140
14141 ut_a(index->table->stat_initialized);
14142
14143 ut_ad(i < dict_index_get_n_unique(index));
14144 ut_ad(!dict_index_is_spatial(index));
14145
14146 if (records == 0) {
14147 /* "Records per key" is meaningless for empty tables.
14148 Return 1.0 because that is most convenient to the Optimizer. */
14149 return(1.0);
14150 }
14151
14152 n_diff = index->stat_n_diff_key_vals[i];
14153
14154 if (n_diff == 0) {
14155
14156 rec_per_key = static_cast<rec_per_key_t>(records);
14157 } else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) {
14158 ib_uint64_t n_null;
14159 ib_uint64_t n_non_null;
14160
14161 n_non_null = index->stat_n_non_null_key_vals[i];
14162
14163 /* In theory, index->stat_n_non_null_key_vals[i]
14164 should always be less than the number of records.
14165 Since this is statistics value, the value could
14166 have slight discrepancy. But we will make sure
14167 the number of null values is not a negative number. */
14168 if (records < n_non_null) {
14169 n_null = 0;
14170 } else {
14171 n_null = records - n_non_null;
14172 }
14173
14174 /* If the number of NULL values is the same as or
14175 larger than that of the distinct values, we could
14176 consider that the table consists mostly of NULL value.
14177 Set rec_per_key to 1. */
14178 if (n_diff <= n_null) {
14179 rec_per_key = 1.0;
14180 } else {
14181 /* Need to exclude rows with NULL values from
14182 rec_per_key calculation */
14183 rec_per_key
14184 = static_cast<rec_per_key_t>(records - n_null)
14185 / (n_diff - n_null);
14186 }
14187 } else {
14188 DEBUG_SYNC_C("after_checking_for_0");
14189 rec_per_key = static_cast<rec_per_key_t>(records) / n_diff;
14190 }
14191
14192 if (rec_per_key < 1.0) {
14193 /* Values below 1.0 are meaningless and must be due to the
14194 stats being imprecise. */
14195 rec_per_key = 1.0;
14196 }
14197
14198 return(rec_per_key);
14199 }
14200
14201 /** Calculate how many KiB of new data we will be able to insert to the
14202 tablespace without running out of space. Start with a space object that has
14203 been acquired by the caller who holds it for the calculation,
14204 @param[in] space tablespace object from fil_space_acquire()
14205 @return available space in KiB */
14206 static uintmax_t
fsp_get_available_space_in_free_extents(const fil_space_t & space)14207 fsp_get_available_space_in_free_extents(const fil_space_t& space)
14208 {
14209 ulint size_in_header = space.size_in_header;
14210 if (size_in_header < FSP_EXTENT_SIZE) {
14211 return 0; /* TODO: count free frag pages and
14212 return a value based on that */
14213 }
14214
14215 /* Below we play safe when counting free extents above the free limit:
14216 some of them will contain extent descriptor pages, and therefore
14217 will not be free extents */
14218 ut_ad(size_in_header >= space.free_limit);
14219 ulint n_free_up =
14220 (size_in_header - space.free_limit) / FSP_EXTENT_SIZE;
14221
14222 const ulint size = space.physical_size();
14223 if (n_free_up > 0) {
14224 n_free_up--;
14225 n_free_up -= n_free_up / (size / FSP_EXTENT_SIZE);
14226 }
14227
14228 /* We reserve 1 extent + 0.5 % of the space size to undo logs
14229 and 1 extent + 0.5 % to cleaning operations; NOTE: this source
14230 code is duplicated in the function above! */
14231
14232 ulint reserve = 2 + ((size_in_header / FSP_EXTENT_SIZE) * 2) / 200;
14233 ulint n_free = space.free_len + n_free_up;
14234
14235 if (reserve > n_free) {
14236 return(0);
14237 }
14238
14239 return(static_cast<uintmax_t>(n_free - reserve)
14240 * FSP_EXTENT_SIZE * (size / 1024));
14241 }
14242
14243 /*********************************************************************//**
14244 Returns statistics information of the table to the MySQL interpreter,
14245 in various fields of the handle object.
14246 @return HA_ERR_* error code or 0 */
14247
14248 int
info_low(uint flag,bool is_analyze)14249 ha_innobase::info_low(
14250 /*==================*/
14251 uint flag, /*!< in: what information is requested */
14252 bool is_analyze)
14253 {
14254 dict_table_t* ib_table;
14255 ib_uint64_t n_rows;
14256 char path[FN_REFLEN];
14257 os_file_stat_t stat_info;
14258
14259 DBUG_ENTER("info");
14260
14261 DEBUG_SYNC_C("ha_innobase_info_low");
14262
14263 ut_ad(!mutex_own(&dict_sys.mutex));
14264
14265 /* If we are forcing recovery at a high level, we will suppress
14266 statistics calculation on tables, because that may crash the
14267 server if an index is badly corrupted. */
14268
14269 /* We do not know if MySQL can call this function before calling
14270 external_lock(). To be safe, update the thd of the current table
14271 handle. */
14272
14273 update_thd(ha_thd());
14274
14275 m_prebuilt->trx->op_info = "returning various info to MariaDB";
14276
14277 ib_table = m_prebuilt->table;
14278 DBUG_ASSERT(ib_table->get_ref_count() > 0);
14279
14280 if (!ib_table->is_readable()) {
14281 ib_table->stat_initialized = true;
14282 }
14283
14284 if (flag & HA_STATUS_TIME) {
14285 if (is_analyze || innobase_stats_on_metadata) {
14286
14287 dict_stats_upd_option_t opt;
14288 dberr_t ret;
14289
14290 m_prebuilt->trx->op_info = "updating table statistics";
14291
14292 if (dict_stats_is_persistent_enabled(ib_table)) {
14293
14294 if (is_analyze) {
14295 row_mysql_lock_data_dictionary(
14296 m_prebuilt->trx);
14297 dict_stats_recalc_pool_del(ib_table);
14298 dict_stats_wait_bg_to_stop_using_table(
14299 ib_table, m_prebuilt->trx);
14300 row_mysql_unlock_data_dictionary(
14301 m_prebuilt->trx);
14302 opt = DICT_STATS_RECALC_PERSISTENT;
14303 } else {
14304 /* This is e.g. 'SHOW INDEXES', fetch
14305 the persistent stats from disk. */
14306 opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
14307 }
14308 } else {
14309 opt = DICT_STATS_RECALC_TRANSIENT;
14310 }
14311
14312 ret = dict_stats_update(ib_table, opt);
14313
14314 if (opt == DICT_STATS_RECALC_PERSISTENT) {
14315 mutex_enter(&dict_sys.mutex);
14316 ib_table->stats_bg_flag
14317 &= byte(~BG_STAT_SHOULD_QUIT);
14318 mutex_exit(&dict_sys.mutex);
14319 }
14320
14321 if (ret != DB_SUCCESS) {
14322 m_prebuilt->trx->op_info = "";
14323 DBUG_RETURN(HA_ERR_GENERIC);
14324 }
14325
14326 m_prebuilt->trx->op_info =
14327 "returning various info to MariaDB";
14328 }
14329
14330
14331 stats.update_time = (ulong) ib_table->update_time;
14332 }
14333
14334 DBUG_EXECUTE_IF("dict_sys_mutex_avoid", goto func_exit;);
14335
14336 dict_stats_init(ib_table);
14337
14338 if (flag & HA_STATUS_VARIABLE) {
14339
14340 ulint stat_clustered_index_size;
14341 ulint stat_sum_of_other_index_sizes;
14342
14343 mutex_enter(&dict_sys.mutex);
14344
14345 ut_a(ib_table->stat_initialized);
14346
14347 n_rows = ib_table->stat_n_rows;
14348
14349 stat_clustered_index_size
14350 = ib_table->stat_clustered_index_size;
14351
14352 stat_sum_of_other_index_sizes
14353 = ib_table->stat_sum_of_other_index_sizes;
14354
14355 mutex_exit(&dict_sys.mutex);
14356
14357 /*
14358 The MySQL optimizer seems to assume in a left join that n_rows
14359 is an accurate estimate if it is zero. Of course, it is not,
14360 since we do not have any locks on the rows yet at this phase.
14361 Since SHOW TABLE STATUS seems to call this function with the
14362 HA_STATUS_TIME flag set, while the left join optimizer does not
14363 set that flag, we add one to a zero value if the flag is not
14364 set. That way SHOW TABLE STATUS will show the best estimate,
14365 while the optimizer never sees the table empty. */
14366
14367 if (n_rows == 0 && !(flag & (HA_STATUS_TIME | HA_STATUS_OPEN))) {
14368 n_rows++;
14369 }
14370
14371 /* Fix bug#40386: Not flushing query cache after truncate.
14372 n_rows can not be 0 unless the table is empty, set to 1
14373 instead. The original problem of bug#29507 is actually
14374 fixed in the server code. */
14375 if (thd_sql_command(m_user_thd) == SQLCOM_TRUNCATE) {
14376
14377 n_rows = 1;
14378
14379 /* We need to reset the m_prebuilt value too, otherwise
14380 checks for values greater than the last value written
14381 to the table will fail and the autoinc counter will
14382 not be updated. This will force write_row() into
14383 attempting an update of the table's AUTOINC counter. */
14384
14385 m_prebuilt->autoinc_last_value = 0;
14386 }
14387
14388 stats.records = (ha_rows) n_rows;
14389 stats.deleted = 0;
14390 if (fil_space_t* space = ib_table->space) {
14391 const ulint size = space->physical_size();
14392 stats.data_file_length
14393 = ulonglong(stat_clustered_index_size)
14394 * size;
14395 stats.index_file_length
14396 = ulonglong(stat_sum_of_other_index_sizes)
14397 * size;
14398 stats.delete_length = 1024
14399 * fsp_get_available_space_in_free_extents(
14400 *space);
14401 }
14402 stats.check_time = 0;
14403 stats.mrr_length_per_rec= (uint)ref_length + 8; // 8 = max(sizeof(void *));
14404
14405 if (stats.records == 0) {
14406 stats.mean_rec_length = 0;
14407 } else {
14408 stats.mean_rec_length = (ulong)
14409 (stats.data_file_length / stats.records);
14410 }
14411 }
14412
14413 if (flag & HA_STATUS_CONST) {
14414 ulong i;
14415 /* Verify the number of index in InnoDB and MySQL
14416 matches up. If m_prebuilt->clust_index_was_generated
14417 holds, InnoDB defines GEN_CLUST_INDEX internally */
14418 ulint num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes)
14419 - m_prebuilt->clust_index_was_generated;
14420 if (table->s->keys < num_innodb_index) {
14421 /* If there are too many indexes defined
14422 inside InnoDB, ignore those that are being
14423 created, because MySQL will only consider
14424 the fully built indexes here. */
14425
14426 for (const dict_index_t* index
14427 = UT_LIST_GET_FIRST(ib_table->indexes);
14428 index != NULL;
14429 index = UT_LIST_GET_NEXT(indexes, index)) {
14430
14431 /* First, online index creation is
14432 completed inside InnoDB, and then
14433 MySQL attempts to upgrade the
14434 meta-data lock so that it can rebuild
14435 the .frm file. If we get here in that
14436 time frame, dict_index_is_online_ddl()
14437 would not hold and the index would
14438 still not be included in TABLE_SHARE. */
14439 if (!index->is_committed()) {
14440 num_innodb_index--;
14441 }
14442 }
14443
14444 if (table->s->keys < num_innodb_index
14445 && innobase_fts_check_doc_id_index(
14446 ib_table, NULL, NULL)
14447 == FTS_EXIST_DOC_ID_INDEX) {
14448 num_innodb_index--;
14449 }
14450 }
14451
14452 if (table->s->keys != num_innodb_index) {
14453 ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
14454 ib_push_frm_error(m_user_thd, ib_table, table, num_innodb_index, true);
14455 }
14456
14457 snprintf(path, sizeof(path), "%s/%s%s",
14458 mysql_data_home, table->s->normalized_path.str,
14459 reg_ext);
14460
14461 unpack_filename(path,path);
14462
14463 /* Note that we do not know the access time of the table,
14464 nor the CHECK TABLE time, nor the UPDATE or INSERT time. */
14465
14466 if (os_file_get_status(
14467 path, &stat_info, false,
14468 srv_read_only_mode) == DB_SUCCESS) {
14469 stats.create_time = (ulong) stat_info.ctime;
14470 }
14471
14472 struct Locking {
14473 Locking() { mutex_enter(&dict_sys.mutex); }
14474 ~Locking() { mutex_exit(&dict_sys.mutex); }
14475 } locking;
14476
14477 ut_a(ib_table->stat_initialized);
14478
14479 for (i = 0; i < table->s->keys; i++) {
14480 ulong j;
14481
14482 dict_index_t* index = innobase_get_index(i);
14483
14484 if (index == NULL) {
14485 ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
14486 ib_push_frm_error(m_user_thd, ib_table, table, num_innodb_index, true);
14487 break;
14488 }
14489
14490 KEY* key = &table->key_info[i];
14491
14492 for (j = 0; j < key->ext_key_parts; j++) {
14493
14494 if ((key->flags & HA_FULLTEXT)
14495 || (key->flags & HA_SPATIAL)) {
14496
14497 /* The record per key does not apply to
14498 FTS or Spatial indexes. */
14499 /*
14500 key->rec_per_key[j] = 1;
14501 key->set_records_per_key(j, 1.0);
14502 */
14503 continue;
14504 }
14505
14506 if (j + 1 > index->n_uniq) {
14507 sql_print_error(
14508 "Index %s of %s has %u columns"
14509 " unique inside InnoDB, but "
14510 "MySQL is asking statistics for"
14511 " %lu columns. Have you mixed "
14512 "up .frm files from different "
14513 " installations? %s",
14514 index->name(),
14515 ib_table->name.m_name,
14516 index->n_uniq, j + 1,
14517 TROUBLESHOOTING_MSG);
14518 break;
14519 }
14520
14521 /* innodb_rec_per_key() will use
14522 index->stat_n_diff_key_vals[] and the value we
14523 pass index->table->stat_n_rows. Both are
14524 calculated by ANALYZE and by the background
14525 stats gathering thread (which kicks in when too
14526 much of the table has been changed). In
14527 addition table->stat_n_rows is adjusted with
14528 each DML (e.g. ++ on row insert). Those
14529 adjustments are not MVCC'ed and not even
14530 reversed on rollback. So,
14531 index->stat_n_diff_key_vals[] and
14532 index->table->stat_n_rows could have been
14533 calculated at different time. This is
14534 acceptable. */
14535
14536 ulong rec_per_key_int = static_cast<ulong>(
14537 innodb_rec_per_key(index, j,
14538 stats.records));
14539
14540 /* Since MySQL seems to favor table scans
14541 too much over index searches, we pretend
14542 index selectivity is 2 times better than
14543 our estimate: */
14544
14545 rec_per_key_int = rec_per_key_int / 2;
14546
14547 if (rec_per_key_int == 0) {
14548 rec_per_key_int = 1;
14549 }
14550
14551 key->rec_per_key[j] = rec_per_key_int;
14552 }
14553 }
14554 }
14555
14556 if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
14557
14558 goto func_exit;
14559
14560 } else if (flag & HA_STATUS_ERRKEY) {
14561 const dict_index_t* err_index;
14562
14563 ut_a(m_prebuilt->trx);
14564 ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
14565
14566 err_index = trx_get_error_info(m_prebuilt->trx);
14567
14568 if (err_index) {
14569 errkey = innobase_get_mysql_key_number_for_index(
14570 table, ib_table, err_index);
14571 } else {
14572 errkey = (unsigned int) (
14573 (m_prebuilt->trx->error_key_num
14574 == ULINT_UNDEFINED)
14575 ? ~0U
14576 : m_prebuilt->trx->error_key_num);
14577 }
14578 }
14579
14580 if ((flag & HA_STATUS_AUTO) && table->found_next_number_field) {
14581 stats.auto_increment_value = innobase_peek_autoinc();
14582 }
14583
14584 func_exit:
14585 m_prebuilt->trx->op_info = (char*)"";
14586
14587 DBUG_RETURN(0);
14588 }
14589
14590 /*********************************************************************//**
14591 Returns statistics information of the table to the MySQL interpreter,
14592 in various fields of the handle object.
14593 @return HA_ERR_* error code or 0 */
14594
14595 int
info(uint flag)14596 ha_innobase::info(
14597 /*==============*/
14598 uint flag) /*!< in: what information is requested */
14599 {
14600 return(info_low(flag, false /* not ANALYZE */));
14601 }
14602
14603 /*
14604 Updates index cardinalities of the table, based on random dives into
14605 each index tree. This does NOT calculate exact statistics on the table.
14606 @return HA_ADMIN_* error code or HA_ADMIN_OK */
14607
14608 int
analyze(THD *,HA_CHECK_OPT *)14609 ha_innobase::analyze(THD*, HA_CHECK_OPT*)
14610 {
14611 /* Simply call info_low() with all the flags
14612 and request recalculation of the statistics */
14613 int ret = info_low(
14614 HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE,
14615 true /* this is ANALYZE */);
14616
14617 if (ret != 0) {
14618 return(HA_ADMIN_FAILED);
14619 }
14620
14621 return(HA_ADMIN_OK);
14622 }
14623
14624 /*****************************************************************//**
14625 Defragment table.
14626 @return error number */
defragment_table(const char * name)14627 inline int ha_innobase::defragment_table(const char *name)
14628 {
14629 char norm_name[FN_REFLEN];
14630 dict_table_t* table = NULL;
14631 dict_index_t* index = NULL;
14632 int ret = 0;
14633 dberr_t err = DB_SUCCESS;
14634
14635 normalize_table_name(norm_name, name);
14636
14637 table = dict_table_open_on_name(norm_name, FALSE,
14638 FALSE, DICT_ERR_IGNORE_FK_NOKEY);
14639
14640 for (index = dict_table_get_first_index(table); index;
14641 index = dict_table_get_next_index(index)) {
14642
14643 if (index->is_corrupted()) {
14644 continue;
14645 }
14646
14647 if (dict_index_is_spatial(index)) {
14648 /* Do not try to defragment spatial indexes,
14649 because doing it properly would require
14650 appropriate logic around the SSN (split
14651 sequence number). */
14652 continue;
14653 }
14654
14655 if (index->page == FIL_NULL) {
14656 /* Do not defragment auxiliary tables related
14657 to FULLTEXT INDEX. */
14658 ut_ad(index->type & DICT_FTS);
14659 continue;
14660 }
14661
14662 if (btr_defragment_find_index(index)) {
14663 // We borrow this error code. When the same index is
14664 // already in the defragmentation queue, issue another
14665 // defragmentation only introduces overhead. We return
14666 // an error here to let the user know this is not
14667 // necessary. Note that this will fail a query that's
14668 // trying to defragment a full table if one of the
14669 // indicies in that table is already in defragmentation.
14670 // We choose this behavior so user is aware of this
14671 // rather than silently defragment other indicies of
14672 // that table.
14673 ret = ER_SP_ALREADY_EXISTS;
14674 break;
14675 }
14676
14677 os_event_t event = btr_defragment_add_index(index, &err);
14678
14679 if (err != DB_SUCCESS) {
14680 push_warning_printf(
14681 current_thd,
14682 Sql_condition::WARN_LEVEL_WARN,
14683 ER_NO_SUCH_TABLE,
14684 "Table %s is encrypted but encryption service or"
14685 " used key_id is not available. "
14686 " Can't continue checking table.",
14687 index->table->name.m_name);
14688
14689 ret = convert_error_code_to_mysql(err, 0, current_thd);
14690 break;
14691 }
14692
14693 if (event) {
14694 while(os_event_wait_time(event, 1000000)) {
14695 if (thd_killed(current_thd)) {
14696 btr_defragment_remove_index(index);
14697 ret = ER_QUERY_INTERRUPTED;
14698 break;
14699 }
14700 }
14701 os_event_destroy(event);
14702 }
14703
14704 if (ret) {
14705 break;
14706 }
14707 }
14708
14709 dict_table_close(table, FALSE, FALSE);
14710 return ret;
14711 }
14712
14713 /**********************************************************************//**
14714 This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
14715 the table in MySQL. */
14716
14717 int
optimize(THD * thd,HA_CHECK_OPT *)14718 ha_innobase::optimize(
14719 /*==================*/
14720 THD* thd, /*!< in: connection thread handle */
14721 HA_CHECK_OPT*)
14722 {
14723
14724 /* FTS-FIXME: Since MySQL doesn't support engine-specific commands,
14725 we have to hijack some existing command in order to be able to test
14726 the new admin commands added in InnoDB's FTS support. For now, we
14727 use MySQL's OPTIMIZE command, normally mapped to ALTER TABLE in
14728 InnoDB (so it recreates the table anew), and map it to OPTIMIZE.
14729
14730 This works OK otherwise, but MySQL locks the entire table during
14731 calls to OPTIMIZE, which is undesirable. */
14732 bool try_alter = true;
14733
14734 if (!m_prebuilt->table->is_temporary() && srv_defragment) {
14735 int err = defragment_table(m_prebuilt->table->name.m_name);
14736
14737 if (err == 0) {
14738 try_alter = false;
14739 } else {
14740 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
14741 uint(err),
14742 "InnoDB: Cannot defragment table %s: returned error code %d\n",
14743 m_prebuilt->table->name.m_name, err);
14744
14745 if(err == ER_SP_ALREADY_EXISTS) {
14746 try_alter = false;
14747 }
14748 }
14749 }
14750
14751 if (innodb_optimize_fulltext_only) {
14752 if (m_prebuilt->table->fts && m_prebuilt->table->fts->cache
14753 && m_prebuilt->table->space) {
14754 fts_sync_table(m_prebuilt->table);
14755 fts_optimize_table(m_prebuilt->table);
14756 }
14757 try_alter = false;
14758 }
14759
14760 return try_alter ? HA_ADMIN_TRY_ALTER : HA_ADMIN_OK;
14761 }
14762
14763 /*******************************************************************//**
14764 Tries to check that an InnoDB table is not corrupted. If corruption is
14765 noticed, prints to stderr information about it. In case of corruption
14766 may also assert a failure and crash the server.
14767 @return HA_ADMIN_CORRUPT or HA_ADMIN_OK */
14768
14769 int
check(THD * thd,HA_CHECK_OPT * check_opt)14770 ha_innobase::check(
14771 /*===============*/
14772 THD* thd, /*!< in: user thread handle */
14773 HA_CHECK_OPT* check_opt) /*!< in: check options */
14774 {
14775 dict_index_t* index;
14776 ulint n_rows;
14777 ulint n_rows_in_table = ULINT_UNDEFINED;
14778 bool is_ok = true;
14779 ulint old_isolation_level;
14780 dberr_t ret;
14781
14782 DBUG_ENTER("ha_innobase::check");
14783 DBUG_ASSERT(thd == ha_thd());
14784 ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
14785 ut_a(m_prebuilt->trx == thd_to_trx(thd));
14786
14787 if (m_prebuilt->mysql_template == NULL) {
14788 /* Build the template; we will use a dummy template
14789 in index scans done in checking */
14790
14791 build_template(true);
14792 }
14793
14794 if (!m_prebuilt->table->space) {
14795
14796 ib_senderrf(
14797 thd,
14798 IB_LOG_LEVEL_ERROR,
14799 ER_TABLESPACE_DISCARDED,
14800 table->s->table_name.str);
14801
14802 DBUG_RETURN(HA_ADMIN_CORRUPT);
14803
14804 } else if (!m_prebuilt->table->is_readable() &&
14805 !m_prebuilt->table->space) {
14806
14807 ib_senderrf(
14808 thd, IB_LOG_LEVEL_ERROR,
14809 ER_TABLESPACE_MISSING,
14810 table->s->table_name.str);
14811
14812 DBUG_RETURN(HA_ADMIN_CORRUPT);
14813 }
14814
14815 m_prebuilt->trx->op_info = "checking table";
14816
14817 if (m_prebuilt->table->corrupted) {
14818 /* If some previous operation has marked the table as
14819 corrupted in memory, and has not propagated such to
14820 clustered index, we will do so here */
14821 index = dict_table_get_first_index(m_prebuilt->table);
14822
14823 if (!index->is_corrupted()) {
14824 dict_set_corrupted(
14825 index, m_prebuilt->trx, "CHECK TABLE");
14826 }
14827
14828 push_warning_printf(m_user_thd,
14829 Sql_condition::WARN_LEVEL_WARN,
14830 HA_ERR_INDEX_CORRUPT,
14831 "InnoDB: Index %s is marked as"
14832 " corrupted",
14833 index->name());
14834
14835 /* Now that the table is already marked as corrupted,
14836 there is no need to check any index of this table */
14837 m_prebuilt->trx->op_info = "";
14838
14839 DBUG_RETURN(HA_ADMIN_CORRUPT);
14840 }
14841
14842 old_isolation_level = m_prebuilt->trx->isolation_level;
14843
14844 /* We must run the index record counts at an isolation level
14845 >= READ COMMITTED, because a dirty read can see a wrong number
14846 of records in some index; to play safe, we normally use
14847 REPEATABLE READ here */
14848 m_prebuilt->trx->isolation_level = high_level_read_only
14849 ? TRX_ISO_READ_UNCOMMITTED
14850 : TRX_ISO_REPEATABLE_READ;
14851
14852 ut_ad(!m_prebuilt->table->corrupted);
14853
14854 for (index = dict_table_get_first_index(m_prebuilt->table);
14855 index != NULL;
14856 index = dict_table_get_next_index(index)) {
14857 /* If this is an index being created or dropped, skip */
14858 if (!index->is_committed()) {
14859 continue;
14860 }
14861
14862 if (!(check_opt->flags & T_QUICK)
14863 && !index->is_corrupted()) {
14864
14865 dberr_t err = btr_validate_index(
14866 index, m_prebuilt->trx);
14867
14868 if (err != DB_SUCCESS) {
14869 is_ok = false;
14870
14871 if (err == DB_DECRYPTION_FAILED) {
14872 push_warning_printf(
14873 thd,
14874 Sql_condition::WARN_LEVEL_WARN,
14875 ER_NO_SUCH_TABLE,
14876 "Table %s is encrypted but encryption service or"
14877 " used key_id is not available. "
14878 " Can't continue checking table.",
14879 index->table->name.m_name);
14880 } else {
14881 push_warning_printf(
14882 thd,
14883 Sql_condition::WARN_LEVEL_WARN,
14884 ER_NOT_KEYFILE,
14885 "InnoDB: The B-tree of"
14886 " index %s is corrupted.",
14887 index->name());
14888 }
14889
14890 continue;
14891 }
14892 }
14893
14894 /* Instead of invoking change_active_index(), set up
14895 a dummy template for non-locking reads, disabling
14896 access to the clustered index. */
14897 m_prebuilt->index = index;
14898
14899 m_prebuilt->index_usable = row_merge_is_index_usable(
14900 m_prebuilt->trx, m_prebuilt->index);
14901
14902 DBUG_EXECUTE_IF(
14903 "dict_set_index_corrupted",
14904 if (!index->is_primary()) {
14905 m_prebuilt->index_usable = FALSE;
14906 // row_mysql_lock_data_dictionary(m_prebuilt->trx);
14907 dict_set_corrupted(index, m_prebuilt->trx, "dict_set_index_corrupted");
14908 // row_mysql_unlock_data_dictionary(m_prebuilt->trx);
14909 });
14910
14911 if (UNIV_UNLIKELY(!m_prebuilt->index_usable)) {
14912 if (index->is_corrupted()) {
14913 push_warning_printf(
14914 m_user_thd,
14915 Sql_condition::WARN_LEVEL_WARN,
14916 HA_ERR_INDEX_CORRUPT,
14917 "InnoDB: Index %s is marked as"
14918 " corrupted",
14919 index->name());
14920 is_ok = false;
14921 } else {
14922 push_warning_printf(
14923 m_user_thd,
14924 Sql_condition::WARN_LEVEL_WARN,
14925 HA_ERR_TABLE_DEF_CHANGED,
14926 "InnoDB: Insufficient history for"
14927 " index %s",
14928 index->name());
14929 }
14930 continue;
14931 }
14932
14933 m_prebuilt->sql_stat_start = TRUE;
14934 m_prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
14935 m_prebuilt->n_template = 0;
14936 m_prebuilt->need_to_access_clustered = FALSE;
14937
14938 dtuple_set_n_fields(m_prebuilt->search_tuple, 0);
14939
14940 m_prebuilt->select_lock_type = LOCK_NONE;
14941
14942 /* Scan this index. */
14943 if (dict_index_is_spatial(index)) {
14944 ret = row_count_rtree_recs(m_prebuilt, &n_rows);
14945 } else {
14946 ret = row_scan_index_for_mysql(
14947 m_prebuilt, index, &n_rows);
14948 }
14949
14950 DBUG_EXECUTE_IF(
14951 "dict_set_index_corrupted",
14952 if (!index->is_primary()) {
14953 ret = DB_CORRUPTION;
14954 });
14955
14956 if (ret == DB_INTERRUPTED || thd_killed(m_user_thd)) {
14957 /* Do not report error since this could happen
14958 during shutdown */
14959 break;
14960 }
14961 if (ret != DB_SUCCESS) {
14962 /* Assume some kind of corruption. */
14963 push_warning_printf(
14964 thd, Sql_condition::WARN_LEVEL_WARN,
14965 ER_NOT_KEYFILE,
14966 "InnoDB: The B-tree of"
14967 " index %s is corrupted.",
14968 index->name());
14969 is_ok = false;
14970 dict_set_corrupted(
14971 index, m_prebuilt->trx, "CHECK TABLE-check index");
14972 }
14973
14974
14975 if (index == dict_table_get_first_index(m_prebuilt->table)) {
14976 n_rows_in_table = n_rows;
14977 } else if (!(index->type & DICT_FTS)
14978 && (n_rows != n_rows_in_table)) {
14979 push_warning_printf(
14980 thd, Sql_condition::WARN_LEVEL_WARN,
14981 ER_NOT_KEYFILE,
14982 "InnoDB: Index '%-.200s' contains " ULINTPF
14983 " entries, should be " ULINTPF ".",
14984 index->name(), n_rows, n_rows_in_table);
14985 is_ok = false;
14986 dict_set_corrupted(
14987 index, m_prebuilt->trx,
14988 "CHECK TABLE; Wrong count");
14989 }
14990 }
14991
14992 /* Restore the original isolation level */
14993 m_prebuilt->trx->isolation_level = old_isolation_level;
14994 #ifdef BTR_CUR_HASH_ADAPT
14995 # if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
14996 /* We validate the whole adaptive hash index for all tables
14997 at every CHECK TABLE only when QUICK flag is not present. */
14998
14999 if (!(check_opt->flags & T_QUICK) && !btr_search_validate()) {
15000 push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
15001 ER_NOT_KEYFILE,
15002 "InnoDB: The adaptive hash index is corrupted.");
15003 is_ok = false;
15004 }
15005 # endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
15006 #endif /* BTR_CUR_HASH_ADAPT */
15007 m_prebuilt->trx->op_info = "";
15008
15009 DBUG_RETURN(is_ok ? HA_ADMIN_OK : HA_ADMIN_CORRUPT);
15010 }
15011
15012 /*******************************************************************//**
15013 Gets the foreign key create info for a table stored in InnoDB.
15014 @return own: character string in the form which can be inserted to the
15015 CREATE TABLE statement, MUST be freed with
15016 ha_innobase::free_foreign_key_create_info */
15017
15018 char*
get_foreign_key_create_info(void)15019 ha_innobase::get_foreign_key_create_info(void)
15020 /*==========================================*/
15021 {
15022 ut_a(m_prebuilt != NULL);
15023
15024 /* We do not know if MySQL can call this function before calling
15025 external_lock(). To be safe, update the thd of the current table
15026 handle. */
15027
15028 update_thd(ha_thd());
15029
15030 m_prebuilt->trx->op_info = "getting info on foreign keys";
15031
15032 /* Output the data to a temporary string */
15033 std::string str = dict_print_info_on_foreign_keys(
15034 TRUE, m_prebuilt->trx,
15035 m_prebuilt->table);
15036
15037 m_prebuilt->trx->op_info = "";
15038
15039 /* Allocate buffer for the string */
15040 char* fk_str = (char*) my_malloc(str.length() + 1, MYF(0));
15041
15042 /* JAN: TODO: MySQL 5.7
15043 fk_str = reinterpret_cast<char*>(
15044 my_malloc(PSI_INSTRUMENT_ME, str.length() + 1, MYF(0)));
15045 */
15046
15047
15048
15049 if (fk_str) {
15050 memcpy(fk_str, str.c_str(), str.length());
15051 fk_str[str.length()]='\0';
15052 }
15053
15054 return(fk_str);
15055 }
15056
15057
15058 /***********************************************************************//**
15059 Maps a InnoDB foreign key constraint to a equivalent MySQL foreign key info.
15060 @return pointer to foreign key info */
15061 static
15062 FOREIGN_KEY_INFO*
get_foreign_key_info(THD * thd,dict_foreign_t * foreign)15063 get_foreign_key_info(
15064 /*=================*/
15065 THD* thd, /*!< in: user thread handle */
15066 dict_foreign_t* foreign)/*!< in: foreign key constraint */
15067 {
15068 FOREIGN_KEY_INFO f_key_info;
15069 FOREIGN_KEY_INFO* pf_key_info;
15070 uint i = 0;
15071 size_t len;
15072 char tmp_buff[NAME_LEN+1];
15073 char name_buff[NAME_LEN+1];
15074 const char* ptr;
15075 LEX_CSTRING* referenced_key_name;
15076 LEX_CSTRING* name = NULL;
15077
15078 if (dict_table_t::is_temporary_name(foreign->foreign_table_name)) {
15079 return NULL;
15080 }
15081
15082 ptr = dict_remove_db_name(foreign->id);
15083 f_key_info.foreign_id = thd_make_lex_string(
15084 thd, 0, ptr, strlen(ptr), 1);
15085
15086 /* Name format: database name, '/', table name, '\0' */
15087
15088 /* Referenced (parent) database name */
15089 len = dict_get_db_name_len(foreign->referenced_table_name);
15090 ut_a(len < sizeof(tmp_buff));
15091 ut_memcpy(tmp_buff, foreign->referenced_table_name, len);
15092 tmp_buff[len] = 0;
15093
15094 len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
15095 f_key_info.referenced_db = thd_make_lex_string(
15096 thd, 0, name_buff, len, 1);
15097
15098 /* Referenced (parent) table name */
15099 ptr = dict_remove_db_name(foreign->referenced_table_name);
15100 len = filename_to_tablename(ptr, name_buff, sizeof(name_buff), 1);
15101 f_key_info.referenced_table = thd_make_lex_string(
15102 thd, 0, name_buff, len, 1);
15103
15104 /* Dependent (child) database name */
15105 len = dict_get_db_name_len(foreign->foreign_table_name);
15106 ut_a(len < sizeof(tmp_buff));
15107 ut_memcpy(tmp_buff, foreign->foreign_table_name, len);
15108 tmp_buff[len] = 0;
15109
15110 len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
15111 f_key_info.foreign_db = thd_make_lex_string(
15112 thd, 0, name_buff, len, 1);
15113
15114 /* Dependent (child) table name */
15115 ptr = dict_remove_db_name(foreign->foreign_table_name);
15116 len = filename_to_tablename(ptr, name_buff, sizeof(name_buff), 1);
15117 f_key_info.foreign_table = thd_make_lex_string(
15118 thd, 0, name_buff, len, 1);
15119
15120 do {
15121 ptr = foreign->foreign_col_names[i];
15122 name = thd_make_lex_string(thd, name, ptr,
15123 strlen(ptr), 1);
15124 f_key_info.foreign_fields.push_back(name);
15125 ptr = foreign->referenced_col_names[i];
15126 name = thd_make_lex_string(thd, name, ptr,
15127 strlen(ptr), 1);
15128 f_key_info.referenced_fields.push_back(name);
15129 } while (++i < foreign->n_fields);
15130
15131 if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE) {
15132 f_key_info.delete_method = FK_OPTION_CASCADE;
15133 } else if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) {
15134 f_key_info.delete_method = FK_OPTION_SET_NULL;
15135 } else if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) {
15136 f_key_info.delete_method = FK_OPTION_NO_ACTION;
15137 } else {
15138 f_key_info.delete_method = FK_OPTION_RESTRICT;
15139 }
15140
15141
15142 if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) {
15143 f_key_info.update_method = FK_OPTION_CASCADE;
15144 } else if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) {
15145 f_key_info.update_method = FK_OPTION_SET_NULL;
15146 } else if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) {
15147 f_key_info.update_method = FK_OPTION_NO_ACTION;
15148 } else {
15149 f_key_info.update_method = FK_OPTION_RESTRICT;
15150 }
15151
15152 /* Load referenced table to update FK referenced key name. */
15153 if (foreign->referenced_table == NULL) {
15154
15155 dict_table_t* ref_table;
15156
15157 ut_ad(mutex_own(&dict_sys.mutex));
15158 ref_table = dict_table_open_on_name(
15159 foreign->referenced_table_name_lookup,
15160 TRUE, FALSE, DICT_ERR_IGNORE_NONE);
15161
15162 if (ref_table == NULL) {
15163
15164 if (!thd_test_options(
15165 thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
15166 ib::info()
15167 << "Foreign Key referenced table "
15168 << foreign->referenced_table_name
15169 << " not found for foreign table "
15170 << foreign->foreign_table_name;
15171 }
15172 } else {
15173
15174 dict_table_close(ref_table, TRUE, FALSE);
15175 }
15176 }
15177
15178 if (foreign->referenced_index
15179 && foreign->referenced_index->name != NULL) {
15180 referenced_key_name = thd_make_lex_string(
15181 thd,
15182 f_key_info.referenced_key_name,
15183 foreign->referenced_index->name,
15184 strlen(foreign->referenced_index->name),
15185 1);
15186 } else {
15187 referenced_key_name = NULL;
15188 }
15189
15190 f_key_info.referenced_key_name = referenced_key_name;
15191
15192 pf_key_info = (FOREIGN_KEY_INFO*) thd_memdup(thd, &f_key_info,
15193 sizeof(FOREIGN_KEY_INFO));
15194
15195 return(pf_key_info);
15196 }
15197
15198 /*******************************************************************//**
15199 Gets the list of foreign keys in this table.
15200 @return always 0, that is, always succeeds */
15201
15202 int
get_foreign_key_list(THD * thd,List<FOREIGN_KEY_INFO> * f_key_list)15203 ha_innobase::get_foreign_key_list(
15204 /*==============================*/
15205 THD* thd, /*!< in: user thread handle */
15206 List<FOREIGN_KEY_INFO>* f_key_list) /*!< out: foreign key list */
15207 {
15208 update_thd(ha_thd());
15209
15210 m_prebuilt->trx->op_info = "getting list of foreign keys";
15211
15212 mutex_enter(&dict_sys.mutex);
15213
15214 for (dict_foreign_set::iterator it
15215 = m_prebuilt->table->foreign_set.begin();
15216 it != m_prebuilt->table->foreign_set.end();
15217 ++it) {
15218
15219 FOREIGN_KEY_INFO* pf_key_info;
15220 dict_foreign_t* foreign = *it;
15221
15222 pf_key_info = get_foreign_key_info(thd, foreign);
15223
15224 if (pf_key_info != NULL) {
15225 f_key_list->push_back(pf_key_info);
15226 }
15227 }
15228
15229 mutex_exit(&dict_sys.mutex);
15230
15231 m_prebuilt->trx->op_info = "";
15232
15233 return(0);
15234 }
15235
15236 /*******************************************************************//**
15237 Gets the set of foreign keys where this table is the referenced table.
15238 @return always 0, that is, always succeeds */
15239
15240 int
get_parent_foreign_key_list(THD * thd,List<FOREIGN_KEY_INFO> * f_key_list)15241 ha_innobase::get_parent_foreign_key_list(
15242 /*=====================================*/
15243 THD* thd, /*!< in: user thread handle */
15244 List<FOREIGN_KEY_INFO>* f_key_list) /*!< out: foreign key list */
15245 {
15246 update_thd(ha_thd());
15247
15248 m_prebuilt->trx->op_info = "getting list of referencing foreign keys";
15249
15250 mutex_enter(&dict_sys.mutex);
15251
15252 for (dict_foreign_set::iterator it
15253 = m_prebuilt->table->referenced_set.begin();
15254 it != m_prebuilt->table->referenced_set.end();
15255 ++it) {
15256
15257 FOREIGN_KEY_INFO* pf_key_info;
15258 dict_foreign_t* foreign = *it;
15259
15260 pf_key_info = get_foreign_key_info(thd, foreign);
15261
15262 if (pf_key_info != NULL) {
15263 f_key_list->push_back(pf_key_info);
15264 }
15265 }
15266
15267 mutex_exit(&dict_sys.mutex);
15268
15269 m_prebuilt->trx->op_info = "";
15270
15271 return(0);
15272 }
15273
15274 /** Table list item structure is used to store only the table
15275 and name. It is used by get_cascade_foreign_key_table_list to store
15276 the intermediate result for fetching the table set. */
15277 struct table_list_item {
15278 /** InnoDB table object */
15279 const dict_table_t* table;
15280 /** Table name */
15281 const char* name;
15282 };
15283
15284 /*****************************************************************//**
15285 Checks if ALTER TABLE may change the storage engine of the table.
15286 Changing storage engines is not allowed for tables for which there
15287 are foreign key constraints (parent or child tables).
15288 @return TRUE if can switch engines */
15289
15290 bool
can_switch_engines(void)15291 ha_innobase::can_switch_engines(void)
15292 /*=================================*/
15293 {
15294 DBUG_ENTER("ha_innobase::can_switch_engines");
15295
15296 update_thd();
15297
15298 m_prebuilt->trx->op_info =
15299 "determining if there are foreign key constraints";
15300
15301 row_mysql_freeze_data_dictionary(m_prebuilt->trx);
15302
15303 bool can_switch = m_prebuilt->table->referenced_set.empty()
15304 && m_prebuilt->table->foreign_set.empty();
15305
15306 row_mysql_unfreeze_data_dictionary(m_prebuilt->trx);
15307 m_prebuilt->trx->op_info = "";
15308
15309 DBUG_RETURN(can_switch);
15310 }
15311
15312 /*******************************************************************//**
15313 Checks if a table is referenced by a foreign key. The MySQL manual states that
15314 a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
15315 delete is then allowed internally to resolve a duplicate key conflict in
15316 REPLACE, not an update.
15317 @return > 0 if referenced by a FOREIGN KEY */
15318
15319 uint
referenced_by_foreign_key(void)15320 ha_innobase::referenced_by_foreign_key(void)
15321 /*========================================*/
15322 {
15323 if (dict_table_is_referenced_by_foreign_key(m_prebuilt->table)) {
15324
15325 return(1);
15326 }
15327
15328 return(0);
15329 }
15330
15331 /*******************************************************************//**
15332 Frees the foreign key create info for a table stored in InnoDB, if it is
15333 non-NULL. */
15334
15335 void
free_foreign_key_create_info(char * str)15336 ha_innobase::free_foreign_key_create_info(
15337 /*======================================*/
15338 char* str) /*!< in, own: create info string to free */
15339 {
15340 if (str != NULL) {
15341 my_free(str);
15342 }
15343 }
15344
15345 /*******************************************************************//**
15346 Tells something additional to the handler about how to do things.
15347 @return 0 or error number */
15348
15349 int
extra(enum ha_extra_function operation)15350 ha_innobase::extra(
15351 /*===============*/
15352 enum ha_extra_function operation)
15353 /*!< in: HA_EXTRA_FLUSH or some other flag */
15354 {
15355 check_trx_exists(ha_thd());
15356
15357 /* Warning: since it is not sure that MySQL calls external_lock
15358 before calling this function, the trx field in m_prebuilt can be
15359 obsolete! */
15360
15361 switch (operation) {
15362 case HA_EXTRA_FLUSH:
15363 if (m_prebuilt->blob_heap) {
15364 row_mysql_prebuilt_free_blob_heap(m_prebuilt);
15365 }
15366 break;
15367 case HA_EXTRA_RESET_STATE:
15368 reset_template();
15369 thd_to_trx(ha_thd())->duplicates = 0;
15370 break;
15371 case HA_EXTRA_NO_KEYREAD:
15372 m_prebuilt->read_just_key = 0;
15373 break;
15374 case HA_EXTRA_KEYREAD:
15375 m_prebuilt->read_just_key = 1;
15376 break;
15377 case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
15378 m_prebuilt->keep_other_fields_on_keyread = 1;
15379 break;
15380
15381 /* IMPORTANT: m_prebuilt->trx can be obsolete in
15382 this method, because it is not sure that MySQL
15383 calls external_lock before this method with the
15384 parameters below. We must not invoke update_thd()
15385 either, because the calling threads may change.
15386 CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */
15387 case HA_EXTRA_INSERT_WITH_UPDATE:
15388 thd_to_trx(ha_thd())->duplicates |= TRX_DUP_IGNORE;
15389 break;
15390 case HA_EXTRA_NO_IGNORE_DUP_KEY:
15391 thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_IGNORE;
15392 break;
15393 case HA_EXTRA_WRITE_CAN_REPLACE:
15394 thd_to_trx(ha_thd())->duplicates |= TRX_DUP_REPLACE;
15395 break;
15396 case HA_EXTRA_WRITE_CANNOT_REPLACE:
15397 thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE;
15398 break;
15399 case HA_EXTRA_BEGIN_ALTER_COPY:
15400 m_prebuilt->table->skip_alter_undo = 1;
15401 if (m_prebuilt->table->is_temporary()
15402 || !m_prebuilt->table->versioned_by_id()) {
15403 break;
15404 }
15405 trx_start_if_not_started(m_prebuilt->trx, true);
15406 m_prebuilt->trx->mod_tables.insert(
15407 trx_mod_tables_t::value_type(
15408 const_cast<dict_table_t*>(m_prebuilt->table),
15409 0))
15410 .first->second.set_versioned(0);
15411 break;
15412 case HA_EXTRA_END_ALTER_COPY:
15413 m_prebuilt->table->skip_alter_undo = 0;
15414 break;
15415 default:/* Do nothing */
15416 ;
15417 }
15418
15419 return(0);
15420 }
15421
15422 /**
15423 MySQL calls this method at the end of each statement */
15424 int
reset()15425 ha_innobase::reset()
15426 {
15427 if (m_prebuilt->blob_heap) {
15428 row_mysql_prebuilt_free_blob_heap(m_prebuilt);
15429 }
15430
15431 reset_template();
15432
15433 m_ds_mrr.dsmrr_close();
15434
15435 /* TODO: This should really be reset in reset_template() but for now
15436 it's safer to do it explicitly here. */
15437
15438 /* This is a statement level counter. */
15439 m_prebuilt->autoinc_last_value = 0;
15440
15441 return(0);
15442 }
15443
15444 /******************************************************************//**
15445 MySQL calls this function at the start of each SQL statement inside LOCK
15446 TABLES. Inside LOCK TABLES the ::external_lock method does not work to
15447 mark SQL statement borders. Note also a special case: if a temporary table
15448 is created inside LOCK TABLES, MySQL has not called external_lock() at all
15449 on that table.
15450 MySQL-5.0 also calls this before each statement in an execution of a stored
15451 procedure. To make the execution more deterministic for binlogging, MySQL-5.0
15452 locks all tables involved in a stored procedure with full explicit table
15453 locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
15454 procedure.
15455 @return 0 or error code */
15456
15457 int
start_stmt(THD * thd,thr_lock_type lock_type)15458 ha_innobase::start_stmt(
15459 /*====================*/
15460 THD* thd, /*!< in: handle to the user thread */
15461 thr_lock_type lock_type)
15462 {
15463 trx_t* trx = m_prebuilt->trx;
15464
15465 DBUG_ENTER("ha_innobase::start_stmt");
15466
15467 update_thd(thd);
15468
15469 ut_ad(m_prebuilt->table != NULL);
15470
15471 trx = m_prebuilt->trx;
15472
15473 innobase_srv_conc_force_exit_innodb(trx);
15474
15475 /* Reset the AUTOINC statement level counter for multi-row INSERTs. */
15476 trx->n_autoinc_rows = 0;
15477
15478 m_prebuilt->sql_stat_start = TRUE;
15479 m_prebuilt->hint_need_to_fetch_extra_cols = 0;
15480 reset_template();
15481
15482 if (m_prebuilt->table->is_temporary()
15483 && m_mysql_has_locked
15484 && m_prebuilt->select_lock_type == LOCK_NONE) {
15485 dberr_t error;
15486
15487 switch (thd_sql_command(thd)) {
15488 case SQLCOM_INSERT:
15489 case SQLCOM_UPDATE:
15490 case SQLCOM_DELETE:
15491 case SQLCOM_REPLACE:
15492 init_table_handle_for_HANDLER();
15493 m_prebuilt->select_lock_type = LOCK_X;
15494 m_prebuilt->stored_select_lock_type = LOCK_X;
15495 error = row_lock_table(m_prebuilt);
15496
15497 if (error != DB_SUCCESS) {
15498 int st = convert_error_code_to_mysql(
15499 error, 0, thd);
15500 DBUG_RETURN(st);
15501 }
15502 break;
15503 }
15504 }
15505
15506 if (!m_mysql_has_locked) {
15507 /* This handle is for a temporary table created inside
15508 this same LOCK TABLES; since MySQL does NOT call external_lock
15509 in this case, we must use x-row locks inside InnoDB to be
15510 prepared for an update of a row */
15511
15512 m_prebuilt->select_lock_type = LOCK_X;
15513
15514 } else if (trx->isolation_level != TRX_ISO_SERIALIZABLE
15515 && thd_sql_command(thd) == SQLCOM_SELECT
15516 && lock_type == TL_READ) {
15517
15518 /* For other than temporary tables, we obtain
15519 no lock for consistent read (plain SELECT). */
15520
15521 m_prebuilt->select_lock_type = LOCK_NONE;
15522 } else {
15523 /* Not a consistent read: restore the
15524 select_lock_type value. The value of
15525 stored_select_lock_type was decided in:
15526 1) ::store_lock(),
15527 2) ::external_lock(),
15528 3) ::init_table_handle_for_HANDLER(). */
15529
15530 ut_a(m_prebuilt->stored_select_lock_type != LOCK_NONE_UNSET);
15531
15532 m_prebuilt->select_lock_type =
15533 m_prebuilt->stored_select_lock_type;
15534 }
15535
15536 *trx->detailed_error = 0;
15537
15538 innobase_register_trx(ht, thd, trx);
15539
15540 if (!trx_is_started(trx)) {
15541 trx->will_lock = true;
15542 }
15543
15544 DBUG_RETURN(0);
15545 }
15546
15547 /******************************************************************//**
15548 Maps a MySQL trx isolation level code to the InnoDB isolation level code
15549 @return InnoDB isolation level */
15550 static inline
15551 ulint
innobase_map_isolation_level(enum_tx_isolation iso)15552 innobase_map_isolation_level(
15553 /*=========================*/
15554 enum_tx_isolation iso) /*!< in: MySQL isolation level code */
15555 {
15556 if (UNIV_UNLIKELY(srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN)
15557 || UNIV_UNLIKELY(srv_read_only_mode)) {
15558 return TRX_ISO_READ_UNCOMMITTED;
15559 }
15560 switch (iso) {
15561 case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
15562 case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
15563 case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
15564 case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
15565 }
15566
15567 ut_error;
15568
15569 return(0);
15570 }
15571
15572 /******************************************************************//**
15573 As MySQL will execute an external lock for every new table it uses when it
15574 starts to process an SQL statement (an exception is when MySQL calls
15575 start_stmt for the handle) we can use this function to store the pointer to
15576 the THD in the handle. We will also use this function to communicate
15577 to InnoDB that a new SQL statement has started and that we must store a
15578 savepoint to our transaction handle, so that we are able to roll back
15579 the SQL statement in case of an error.
15580 @return 0 */
15581
15582 int
external_lock(THD * thd,int lock_type)15583 ha_innobase::external_lock(
15584 /*=======================*/
15585 THD* thd, /*!< in: handle to the user thread */
15586 int lock_type) /*!< in: lock type */
15587 {
15588 DBUG_ENTER("ha_innobase::external_lock");
15589 DBUG_PRINT("enter",("lock_type: %d", lock_type));
15590
15591 update_thd(thd);
15592 trx_t* trx = m_prebuilt->trx;
15593 ut_ad(m_prebuilt->table);
15594
15595 /* Statement based binlogging does not work in isolation level
15596 READ UNCOMMITTED and READ COMMITTED since the necessary
15597 locks cannot be taken. In this case, we print an
15598 informative error message and return with an error.
15599 Note: decide_logging_format would give the same error message,
15600 except it cannot give the extra details. */
15601
15602 if (lock_type == F_WRLCK
15603 && !(table_flags() & HA_BINLOG_STMT_CAPABLE)
15604 && thd_binlog_format(thd) == BINLOG_FORMAT_STMT
15605 && thd_binlog_filter_ok(thd)
15606 && thd_sqlcom_can_generate_row_events(thd)) {
15607 bool skip = false;
15608 #ifdef WITH_WSREP
15609 skip = trx->is_wsrep() && !wsrep_thd_is_local(thd);
15610 #endif /* WITH_WSREP */
15611 /* used by test case */
15612 DBUG_EXECUTE_IF("no_innodb_binlog_errors", skip = true;);
15613
15614 if (!skip) {
15615 my_error(ER_BINLOG_STMT_MODE_AND_ROW_ENGINE, MYF(0),
15616 " InnoDB is limited to row-logging when"
15617 " transaction isolation level is"
15618 " READ COMMITTED or READ UNCOMMITTED.");
15619
15620 DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE);
15621 }
15622 }
15623
15624 /* Check for UPDATEs in read-only mode. */
15625 if (srv_read_only_mode) {
15626 switch (thd_sql_command(thd)) {
15627 case SQLCOM_CREATE_TABLE:
15628 if (lock_type != F_WRLCK) {
15629 break;
15630 }
15631 /* fall through */
15632 case SQLCOM_UPDATE:
15633 case SQLCOM_INSERT:
15634 case SQLCOM_REPLACE:
15635 case SQLCOM_DROP_TABLE:
15636 case SQLCOM_ALTER_TABLE:
15637 case SQLCOM_OPTIMIZE:
15638 case SQLCOM_CREATE_INDEX:
15639 case SQLCOM_DROP_INDEX:
15640 case SQLCOM_CREATE_SEQUENCE:
15641 case SQLCOM_DROP_SEQUENCE:
15642 case SQLCOM_DELETE:
15643 ib_senderrf(thd, IB_LOG_LEVEL_WARN,
15644 ER_READ_ONLY_MODE);
15645 DBUG_RETURN(HA_ERR_TABLE_READONLY);
15646 }
15647 }
15648
15649 m_prebuilt->sql_stat_start = TRUE;
15650 m_prebuilt->hint_need_to_fetch_extra_cols = 0;
15651
15652 reset_template();
15653
15654 switch (m_prebuilt->table->quiesce) {
15655 case QUIESCE_START:
15656 /* Check for FLUSH TABLE t WITH READ LOCK; */
15657 if (!srv_read_only_mode
15658 && thd_sql_command(thd) == SQLCOM_FLUSH
15659 && lock_type == F_RDLCK) {
15660
15661 if (!m_prebuilt->table->space) {
15662 ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
15663 ER_TABLESPACE_DISCARDED,
15664 table->s->table_name.str);
15665
15666 DBUG_RETURN(HA_ERR_TABLESPACE_MISSING);
15667 }
15668
15669 row_quiesce_table_start(m_prebuilt->table, trx);
15670
15671 /* Use the transaction instance to track UNLOCK
15672 TABLES. It can be done via START TRANSACTION; too
15673 implicitly. */
15674
15675 ++trx->flush_tables;
15676 }
15677 break;
15678
15679 case QUIESCE_COMPLETE:
15680 /* Check for UNLOCK TABLES; implicit or explicit
15681 or trx interruption. */
15682 if (trx->flush_tables > 0
15683 && (lock_type == F_UNLCK || trx_is_interrupted(trx))) {
15684
15685 row_quiesce_table_complete(m_prebuilt->table, trx);
15686
15687 ut_a(trx->flush_tables > 0);
15688 --trx->flush_tables;
15689 }
15690
15691 break;
15692
15693 case QUIESCE_NONE:
15694 break;
15695 }
15696
15697 if (lock_type == F_WRLCK) {
15698
15699 /* If this is a SELECT, then it is in UPDATE TABLE ...
15700 or SELECT ... FOR UPDATE */
15701 m_prebuilt->select_lock_type = LOCK_X;
15702 m_prebuilt->stored_select_lock_type = LOCK_X;
15703 }
15704
15705 if (lock_type != F_UNLCK) {
15706 /* MySQL is setting a new table lock */
15707
15708 *trx->detailed_error = 0;
15709
15710 innobase_register_trx(ht, thd, trx);
15711
15712 if (trx->isolation_level == TRX_ISO_SERIALIZABLE
15713 && m_prebuilt->select_lock_type == LOCK_NONE
15714 && thd_test_options(
15715 thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
15716
15717 /* To get serializable execution, we let InnoDB
15718 conceptually add 'LOCK IN SHARE MODE' to all SELECTs
15719 which otherwise would have been consistent reads. An
15720 exception is consistent reads in the AUTOCOMMIT=1 mode:
15721 we know that they are read-only transactions, and they
15722 can be serialized also if performed as consistent
15723 reads. */
15724
15725 m_prebuilt->select_lock_type = LOCK_S;
15726 m_prebuilt->stored_select_lock_type = LOCK_S;
15727 }
15728
15729 /* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
15730 TABLES if AUTOCOMMIT=1. It does not make much sense to acquire
15731 an InnoDB table lock if it is released immediately at the end
15732 of LOCK TABLES, and InnoDB's table locks in that case cause
15733 VERY easily deadlocks.
15734
15735 We do not set InnoDB table locks if user has not explicitly
15736 requested a table lock. Note that thd_in_lock_tables(thd)
15737 can hold in some cases, e.g., at the start of a stored
15738 procedure call (SQLCOM_CALL). */
15739
15740 if (m_prebuilt->select_lock_type != LOCK_NONE) {
15741
15742 if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES
15743 && THDVAR(thd, table_locks)
15744 && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)
15745 && thd_in_lock_tables(thd)) {
15746
15747 dberr_t error = row_lock_table(m_prebuilt);
15748
15749 if (error != DB_SUCCESS) {
15750
15751 DBUG_RETURN(
15752 convert_error_code_to_mysql(
15753 error, 0, thd));
15754 }
15755 }
15756
15757 trx->mysql_n_tables_locked++;
15758 }
15759
15760 trx->n_mysql_tables_in_use++;
15761 m_mysql_has_locked = true;
15762
15763 if (!trx_is_started(trx)
15764 && (m_prebuilt->select_lock_type != LOCK_NONE
15765 || m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
15766
15767 trx->will_lock = true;
15768 }
15769
15770 DBUG_RETURN(0);
15771 } else {
15772 DEBUG_SYNC_C("ha_innobase_end_statement");
15773 }
15774
15775 /* MySQL is releasing a table lock */
15776
15777 trx->n_mysql_tables_in_use--;
15778 m_mysql_has_locked = false;
15779
15780 innobase_srv_conc_force_exit_innodb(trx);
15781
15782 /* If the MySQL lock count drops to zero we know that the current SQL
15783 statement has ended */
15784
15785 if (trx->n_mysql_tables_in_use == 0) {
15786
15787 trx->mysql_n_tables_locked = 0;
15788 m_prebuilt->used_in_HANDLER = FALSE;
15789
15790 if (!thd_test_options(
15791 thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
15792
15793 if (trx_is_started(trx)) {
15794
15795 innobase_commit(ht, thd, TRUE);
15796 }
15797
15798 } else if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
15799 trx->read_view.close();
15800 }
15801 }
15802
15803 if (!trx_is_started(trx)
15804 && lock_type != F_UNLCK
15805 && (m_prebuilt->select_lock_type != LOCK_NONE
15806 || m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
15807
15808 trx->will_lock = true;
15809 }
15810
15811 DBUG_RETURN(0);
15812 }
15813
15814 /************************************************************************//**
15815 Here we export InnoDB status variables to MySQL. */
15816 static
15817 void
innodb_export_status()15818 innodb_export_status()
15819 /*==================*/
15820 {
15821 if (srv_was_started) {
15822 srv_export_innodb_status();
15823 }
15824 }
15825
15826 /************************************************************************//**
15827 Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the
15828 InnoDB Monitor to the client.
15829 @return 0 on success */
15830 static
15831 int
innodb_show_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)15832 innodb_show_status(
15833 /*===============*/
15834 handlerton* hton, /*!< in: the innodb handlerton */
15835 THD* thd, /*!< in: the MySQL query thread of the caller */
15836 stat_print_fn* stat_print)
15837 {
15838 static const char truncated_msg[] = "... truncated...\n";
15839 const long MAX_STATUS_SIZE = 1048576;
15840 ulint trx_list_start = ULINT_UNDEFINED;
15841 ulint trx_list_end = ULINT_UNDEFINED;
15842 bool ret_val;
15843
15844 DBUG_ENTER("innodb_show_status");
15845 DBUG_ASSERT(hton == innodb_hton_ptr);
15846
15847 /* We don't create the temp files or associated
15848 mutexes in read-only-mode */
15849
15850 if (srv_read_only_mode) {
15851 DBUG_RETURN(0);
15852 }
15853
15854 srv_wake_purge_thread_if_not_active();
15855
15856 trx_t* trx = check_trx_exists(thd);
15857
15858 innobase_srv_conc_force_exit_innodb(trx);
15859
15860 /* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE
15861 bytes of text. */
15862
15863 char* str;
15864 size_t flen;
15865
15866 mutex_enter(&srv_monitor_file_mutex);
15867 rewind(srv_monitor_file);
15868
15869 srv_printf_innodb_monitor(srv_monitor_file, FALSE,
15870 &trx_list_start, &trx_list_end);
15871
15872 os_file_set_eof(srv_monitor_file);
15873
15874 flen = size_t(ftell(srv_monitor_file));
15875 if (ssize_t(flen) < 0) {
15876 flen = 0;
15877 }
15878
15879 size_t usable_len;
15880
15881 if (flen > MAX_STATUS_SIZE) {
15882 usable_len = MAX_STATUS_SIZE;
15883 srv_truncated_status_writes++;
15884 } else {
15885 usable_len = flen;
15886 }
15887
15888 /* allocate buffer for the string, and
15889 read the contents of the temporary file */
15890
15891 if (!(str = (char*) my_malloc(//PSI_INSTRUMENT_ME,
15892 usable_len + 1, MYF(0)))) {
15893 mutex_exit(&srv_monitor_file_mutex);
15894 DBUG_RETURN(1);
15895 }
15896
15897 rewind(srv_monitor_file);
15898
15899 if (flen < MAX_STATUS_SIZE) {
15900 /* Display the entire output. */
15901 flen = fread(str, 1, flen, srv_monitor_file);
15902 } else if (trx_list_end < flen
15903 && trx_list_start < trx_list_end
15904 && trx_list_start + flen - trx_list_end
15905 < MAX_STATUS_SIZE - sizeof truncated_msg - 1) {
15906
15907 /* Omit the beginning of the list of active transactions. */
15908 size_t len = fread(str, 1, trx_list_start, srv_monitor_file);
15909
15910 memcpy(str + len, truncated_msg, sizeof truncated_msg - 1);
15911 len += sizeof truncated_msg - 1;
15912 usable_len = (MAX_STATUS_SIZE - 1) - len;
15913 fseek(srv_monitor_file, long(flen - usable_len), SEEK_SET);
15914 len += fread(str + len, 1, usable_len, srv_monitor_file);
15915 flen = len;
15916 } else {
15917 /* Omit the end of the output. */
15918 flen = fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file);
15919 }
15920
15921 mutex_exit(&srv_monitor_file_mutex);
15922
15923 ret_val= stat_print(
15924 thd, innobase_hton_name,
15925 static_cast<uint>(strlen(innobase_hton_name)),
15926 STRING_WITH_LEN(""), str, static_cast<uint>(flen));
15927
15928 my_free(str);
15929
15930 DBUG_RETURN(ret_val);
15931 }
15932
15933 /** Callback for collecting mutex statistics */
15934 struct ShowStatus {
15935
15936 /** For tracking the mutex metrics */
15937 struct Value {
15938
15939 /** Constructor
15940 @param[in] name Name of the mutex
15941 @param[in] spins Number of spins
15942 @param[in] os_waits OS waits so far
15943 @param[in] calls Number of calls to enter() */
ValueShowStatus::Value15944 Value(const char* name,
15945 ulint spins,
15946 uint64_t waits,
15947 uint64_t calls)
15948 :
15949 m_name(name),
15950 m_spins(spins),
15951 m_waits(waits),
15952 m_calls(calls)
15953 {
15954 /* No op */
15955 }
15956
15957 /** Mutex name */
15958 std::string m_name;
15959
15960 /** Spins so far */
15961 ulint m_spins;
15962
15963 /** Waits so far */
15964 uint64_t m_waits;
15965
15966 /** Number of calls so far */
15967 uint64_t m_calls;
15968 };
15969
15970 /** Order by m_waits, in descending order. */
15971 struct OrderByWaits: public std::binary_function<Value, Value, bool>
15972 {
15973 /** @return true if rhs < lhs */
operator ()ShowStatus::OrderByWaits15974 bool operator()(
15975 const Value& lhs,
15976 const Value& rhs) const
15977 UNIV_NOTHROW
15978 {
15979 return(rhs.m_waits < lhs.m_waits);
15980 }
15981 };
15982
15983 typedef std::vector<Value, ut_allocator<Value> > Values;
15984
15985 /** Collect the individual latch counts */
15986 struct GetCount {
15987 typedef latch_meta_t::CounterType::Count Count;
15988
15989 /** Constructor
15990 @param[in] name Latch name
15991 @param[in,out] values Put the values here */
GetCountShowStatus::GetCount15992 GetCount(
15993 const char* name,
15994 Values* values)
15995 UNIV_NOTHROW
15996 :
15997 m_name(name),
15998 m_values(values)
15999 {
16000 /* No op */
16001 }
16002
16003 /** Collect the latch metrics. Ignore entries where the
16004 spins and waits are zero.
16005 @param[in] count The latch metrics */
operator ()ShowStatus::GetCount16006 void operator()(Count* count) const UNIV_NOTHROW
16007 {
16008 if (count->m_spins > 0 || count->m_waits > 0) {
16009
16010 m_values->push_back(Value(
16011 m_name,
16012 count->m_spins,
16013 count->m_waits,
16014 count->m_calls));
16015 }
16016 }
16017
16018 /** The latch name */
16019 const char* m_name;
16020
16021 /** For collecting the active mutex stats. */
16022 Values* m_values;
16023 };
16024
16025 /** Constructor */
ShowStatusShowStatus16026 ShowStatus() { }
16027
16028 /** Callback for collecting the stats
16029 @param[in] latch_meta Latch meta data
16030 @return always returns true */
operator ()ShowStatus16031 bool operator()(latch_meta_t& latch_meta)
16032 UNIV_NOTHROW
16033 {
16034 latch_meta.get_counter()->iterate(
16035 GetCount(latch_meta.get_name(), &m_values));
16036
16037 return(true);
16038 }
16039
16040 /** Implements the SHOW MUTEX STATUS command, for mutexes.
16041 The table structure is like so: Engine | Mutex Name | Status
16042 We store the metrics in the "Status" column as:
16043
16044 spins=N,waits=N,calls=N"
16045
16046 The user has to parse the dataunfortunately
16047 @param[in,out] thd the MySQL query thread of the caller
16048 @param[in,out] stat_print function for printing statistics
16049 @return true on success. */
16050 bool to_string(
16051 THD* thd,
16052 stat_print_fn* stat_print)
16053 UNIV_NOTHROW;
16054
16055 /** For collecting the active mutex stats. */
16056 Values m_values;
16057 };
16058
16059 /** Implements the SHOW MUTEX STATUS command, for mutexes.
16060 The table structure is like so: Engine | Mutex Name | Status
16061 We store the metrics in the "Status" column as:
16062
16063 spins=N,waits=N,calls=N"
16064
16065 The user has to parse the dataunfortunately
16066 @param[in,out] thd the MySQL query thread of the caller
16067 @param[in,out] stat_print function for printing statistics
16068 @return true on success. */
16069 bool
to_string(THD * thd,stat_print_fn * stat_print)16070 ShowStatus::to_string(
16071 THD* thd,
16072 stat_print_fn* stat_print)
16073 UNIV_NOTHROW
16074 {
16075 uint hton_name_len = (uint) strlen(innobase_hton_name);
16076
16077 std::sort(m_values.begin(), m_values.end(), OrderByWaits());
16078
16079 Values::iterator end = m_values.end();
16080
16081 for (Values::iterator it = m_values.begin(); it != end; ++it) {
16082
16083 int name_len;
16084 char name_buf[IO_SIZE];
16085
16086 name_len = snprintf(
16087 name_buf, sizeof(name_buf), "%s", it->m_name.c_str());
16088
16089 int status_len;
16090 char status_buf[IO_SIZE];
16091
16092 status_len = snprintf(
16093 status_buf, sizeof(status_buf),
16094 "spins=%lu,waits=%lu,calls=%llu",
16095 static_cast<ulong>(it->m_spins),
16096 static_cast<long>(it->m_waits),
16097 (ulonglong) it->m_calls);
16098
16099 if (stat_print(thd, innobase_hton_name,
16100 hton_name_len,
16101 name_buf, static_cast<uint>(name_len),
16102 status_buf, static_cast<uint>(status_len))) {
16103
16104 return(false);
16105 }
16106 }
16107
16108 return(true);
16109 }
16110
16111 /** Implements the SHOW MUTEX STATUS command, for mutexes.
16112 @param[in,out] hton the innodb handlerton
16113 @param[in,out] thd the MySQL query thread of the caller
16114 @param[in,out] stat_print function for printing statistics
16115 @return 0 on success. */
16116 static
16117 int
innodb_show_mutex_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)16118 innodb_show_mutex_status(
16119 handlerton*
16120 #ifdef DBUG_ASSERT_EXISTS
16121 hton
16122 #endif
16123 ,
16124 THD* thd,
16125 stat_print_fn* stat_print)
16126 {
16127 DBUG_ENTER("innodb_show_mutex_status");
16128
16129 ShowStatus collector;
16130
16131 DBUG_ASSERT(hton == innodb_hton_ptr);
16132
16133 mutex_monitor.iterate(collector);
16134
16135 if (!collector.to_string(thd, stat_print)) {
16136 DBUG_RETURN(1);
16137 }
16138
16139 DBUG_RETURN(0);
16140 }
16141
16142 /** Implements the SHOW MUTEX STATUS command.
16143 @param[in,out] hton the innodb handlerton
16144 @param[in,out] thd the MySQL query thread of the caller
16145 @param[in,out] stat_print function for printing statistics
16146 @return 0 on success. */
16147 static
16148 int
innodb_show_rwlock_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)16149 innodb_show_rwlock_status(
16150 handlerton*
16151 #ifdef DBUG_ASSERT_EXISTS
16152 hton
16153 #endif
16154 ,
16155 THD* thd,
16156 stat_print_fn* stat_print)
16157 {
16158 DBUG_ENTER("innodb_show_rwlock_status");
16159
16160 rw_lock_t* block_rwlock = NULL;
16161 ulint block_rwlock_oswait_count = 0;
16162 uint hton_name_len = (uint) strlen(innobase_hton_name);
16163
16164 DBUG_ASSERT(hton == innodb_hton_ptr);
16165
16166 mutex_enter(&rw_lock_list_mutex);
16167
16168 for (rw_lock_t* rw_lock = UT_LIST_GET_FIRST(rw_lock_list);
16169 rw_lock != NULL;
16170 rw_lock = UT_LIST_GET_NEXT(list, rw_lock)) {
16171
16172 if (rw_lock->count_os_wait == 0) {
16173 continue;
16174 }
16175
16176 int buf1len;
16177 char buf1[IO_SIZE];
16178
16179 if (rw_lock->is_block_lock) {
16180
16181 block_rwlock = rw_lock;
16182 block_rwlock_oswait_count += rw_lock->count_os_wait;
16183
16184 continue;
16185 }
16186
16187 buf1len = snprintf(
16188 buf1, sizeof buf1, "rwlock: %s:%u",
16189 innobase_basename(rw_lock->cfile_name),
16190 rw_lock->cline);
16191
16192 int buf2len;
16193 char buf2[IO_SIZE];
16194
16195 buf2len = snprintf(
16196 buf2, sizeof buf2, "waits=%u",
16197 rw_lock->count_os_wait);
16198
16199 if (stat_print(thd, innobase_hton_name,
16200 hton_name_len,
16201 buf1, static_cast<uint>(buf1len),
16202 buf2, static_cast<uint>(buf2len))) {
16203
16204 mutex_exit(&rw_lock_list_mutex);
16205
16206 DBUG_RETURN(1);
16207 }
16208 }
16209
16210 if (block_rwlock != NULL) {
16211
16212 int buf1len;
16213 char buf1[IO_SIZE];
16214
16215 buf1len = snprintf(
16216 buf1, sizeof buf1, "sum rwlock: %s:%u",
16217 innobase_basename(block_rwlock->cfile_name),
16218 block_rwlock->cline);
16219
16220 int buf2len;
16221 char buf2[IO_SIZE];
16222
16223 buf2len = snprintf(
16224 buf2, sizeof buf2, "waits=" ULINTPF,
16225 block_rwlock_oswait_count);
16226
16227 if (stat_print(thd, innobase_hton_name,
16228 hton_name_len,
16229 buf1, static_cast<uint>(buf1len),
16230 buf2, static_cast<uint>(buf2len))) {
16231
16232 mutex_exit(&rw_lock_list_mutex);
16233
16234 DBUG_RETURN(1);
16235 }
16236 }
16237
16238 mutex_exit(&rw_lock_list_mutex);
16239
16240 DBUG_RETURN(0);
16241 }
16242
16243 /** Implements the SHOW MUTEX STATUS command.
16244 @param[in,out] hton the innodb handlerton
16245 @param[in,out] thd the MySQL query thread of the caller
16246 @param[in,out] stat_print function for printing statistics
16247 @return 0 on success. */
16248 static
16249 int
innodb_show_latch_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)16250 innodb_show_latch_status(
16251 handlerton* hton,
16252 THD* thd,
16253 stat_print_fn* stat_print)
16254 {
16255 int ret = innodb_show_mutex_status(hton, thd, stat_print);
16256
16257 if (ret != 0) {
16258 return(ret);
16259 }
16260
16261 return(innodb_show_rwlock_status(hton, thd, stat_print));
16262 }
16263
16264 /************************************************************************//**
16265 Return 0 on success and non-zero on failure. Note: the bool return type
16266 seems to be abused here, should be an int. */
16267 static
16268 bool
innobase_show_status(handlerton * hton,THD * thd,stat_print_fn * stat_print,enum ha_stat_type stat_type)16269 innobase_show_status(
16270 /*=================*/
16271 handlerton* hton, /*!< in: the innodb handlerton */
16272 THD* thd, /*!< in: the MySQL query thread
16273 of the caller */
16274 stat_print_fn* stat_print,
16275 enum ha_stat_type stat_type)
16276 {
16277 DBUG_ASSERT(hton == innodb_hton_ptr);
16278
16279 switch (stat_type) {
16280 case HA_ENGINE_STATUS:
16281 /* Non-zero return value means there was an error. */
16282 return(innodb_show_status(hton, thd, stat_print) != 0);
16283
16284 case HA_ENGINE_MUTEX:
16285 return(innodb_show_latch_status(hton, thd, stat_print) != 0);
16286
16287 case HA_ENGINE_LOGS:
16288 /* Not handled */
16289 break;
16290 }
16291
16292 /* Success */
16293 return(false);
16294 }
16295 /*********************************************************************//**
16296 Returns number of THR_LOCK locks used for one instance of InnoDB table.
16297 InnoDB no longer relies on THR_LOCK locks so 0 value is returned.
16298 Instead of THR_LOCK locks InnoDB relies on combination of metadata locks
16299 (e.g. for LOCK TABLES and DDL) and its own locking subsystem.
16300 Note that even though this method returns 0, SQL-layer still calls
16301 ::store_lock(), ::start_stmt() and ::external_lock() methods for InnoDB
16302 tables. */
16303
16304 uint
lock_count(void) const16305 ha_innobase::lock_count(void) const
16306 /*===============================*/
16307 {
16308 return 0;
16309 }
16310
16311 /*****************************************************************//**
16312 Supposed to convert a MySQL table lock stored in the 'lock' field of the
16313 handle to a proper type before storing pointer to the lock into an array
16314 of pointers.
16315 In practice, since InnoDB no longer relies on THR_LOCK locks and its
16316 lock_count() method returns 0 it just informs storage engine about type
16317 of THR_LOCK which SQL-layer would have acquired for this specific statement
16318 on this specific table.
16319 MySQL also calls this if it wants to reset some table locks to a not-locked
16320 state during the processing of an SQL query. An example is that during a
16321 SELECT the read lock is released early on the 'const' tables where we only
16322 fetch one row. MySQL does not call this when it releases all locks at the
16323 end of an SQL statement.
16324 @return pointer to the current element in the 'to' array. */
16325
16326 THR_LOCK_DATA**
store_lock(THD * thd,THR_LOCK_DATA ** to,thr_lock_type lock_type)16327 ha_innobase::store_lock(
16328 /*====================*/
16329 THD* thd, /*!< in: user thread handle */
16330 THR_LOCK_DATA** to, /*!< in: pointer to the current
16331 element in an array of pointers
16332 to lock structs;
16333 only used as return value */
16334 thr_lock_type lock_type) /*!< in: lock type to store in
16335 'lock'; this may also be
16336 TL_IGNORE */
16337 {
16338 /* Note that trx in this function is NOT necessarily m_prebuilt->trx
16339 because we call update_thd() later, in ::external_lock()! Failure to
16340 understand this caused a serious memory corruption bug in 5.1.11. */
16341
16342 trx_t* trx = check_trx_exists(thd);
16343
16344 /* NOTE: MySQL can call this function with lock 'type' TL_IGNORE!
16345 Be careful to ignore TL_IGNORE if we are going to do something with
16346 only 'real' locks! */
16347
16348 /* If no MySQL table is in use, we need to set the isolation level
16349 of the transaction. */
16350
16351 if (lock_type != TL_IGNORE
16352 && trx->n_mysql_tables_in_use == 0) {
16353 trx->isolation_level = innobase_map_isolation_level(
16354 (enum_tx_isolation) thd_tx_isolation(thd));
16355
16356 if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
16357
16358 /* At low transaction isolation levels we let
16359 each consistent read set its own snapshot */
16360 trx->read_view.close();
16361 }
16362 }
16363
16364 DBUG_ASSERT(EQ_CURRENT_THD(thd));
16365 const bool in_lock_tables = thd_in_lock_tables(thd);
16366 const int sql_command = thd_sql_command(thd);
16367
16368 if (srv_read_only_mode
16369 && (sql_command == SQLCOM_UPDATE
16370 || sql_command == SQLCOM_INSERT
16371 || sql_command == SQLCOM_REPLACE
16372 || sql_command == SQLCOM_DROP_TABLE
16373 || sql_command == SQLCOM_ALTER_TABLE
16374 || sql_command == SQLCOM_OPTIMIZE
16375 || (sql_command == SQLCOM_CREATE_TABLE
16376 && (lock_type >= TL_WRITE_CONCURRENT_INSERT
16377 && lock_type <= TL_WRITE))
16378 || sql_command == SQLCOM_CREATE_INDEX
16379 || sql_command == SQLCOM_DROP_INDEX
16380 || sql_command == SQLCOM_CREATE_SEQUENCE
16381 || sql_command == SQLCOM_DROP_SEQUENCE
16382 || sql_command == SQLCOM_DELETE)) {
16383
16384 ib_senderrf(trx->mysql_thd,
16385 IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
16386
16387 } else if (sql_command == SQLCOM_FLUSH
16388 && lock_type == TL_READ_NO_INSERT) {
16389
16390 /* Check for FLUSH TABLES ... WITH READ LOCK */
16391
16392 /* Note: This call can fail, but there is no way to return
16393 the error to the caller. We simply ignore it for now here
16394 and push the error code to the caller where the error is
16395 detected in the function. */
16396
16397 dberr_t err = row_quiesce_set_state(
16398 m_prebuilt->table, QUIESCE_START, trx);
16399
16400 ut_a(err == DB_SUCCESS || err == DB_UNSUPPORTED);
16401
16402 if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
16403 m_prebuilt->select_lock_type = LOCK_S;
16404 m_prebuilt->stored_select_lock_type = LOCK_S;
16405 } else {
16406 m_prebuilt->select_lock_type = LOCK_NONE;
16407 m_prebuilt->stored_select_lock_type = LOCK_NONE;
16408 }
16409
16410 /* Check for DROP TABLE */
16411 } else if (sql_command == SQLCOM_DROP_TABLE ||
16412 sql_command == SQLCOM_DROP_SEQUENCE) {
16413
16414 /* MySQL calls this function in DROP TABLE though this table
16415 handle may belong to another thd that is running a query. Let
16416 us in that case skip any changes to the m_prebuilt struct. */
16417
16418 /* Check for LOCK TABLE t1,...,tn WITH SHARED LOCKS */
16419 } else if ((lock_type == TL_READ && in_lock_tables)
16420 || (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables)
16421 || lock_type == TL_READ_WITH_SHARED_LOCKS
16422 || lock_type == TL_READ_NO_INSERT
16423 || (lock_type != TL_IGNORE
16424 && sql_command != SQLCOM_SELECT)) {
16425
16426 /* The OR cases above are in this order:
16427 1) MySQL is doing LOCK TABLES ... READ LOCAL, or we
16428 are processing a stored procedure or function, or
16429 2) (we do not know when TL_READ_HIGH_PRIORITY is used), or
16430 3) this is a SELECT ... IN SHARE MODE, or
16431 4) we are doing a complex SQL statement like
16432 INSERT INTO ... SELECT ... and the logical logging (MySQL
16433 binlog) requires the use of a locking read, or
16434 MySQL is doing LOCK TABLES ... READ.
16435 5) we let InnoDB do locking reads for all SQL statements that
16436 are not simple SELECTs; note that select_lock_type in this
16437 case may get strengthened in ::external_lock() to LOCK_X.
16438 Note that we MUST use a locking read in all data modifying
16439 SQL statements, because otherwise the execution would not be
16440 serializable, and also the results from the update could be
16441 unexpected if an obsolete consistent read view would be
16442 used. */
16443
16444 /* Use consistent read for checksum table */
16445
16446 if (sql_command == SQLCOM_CHECKSUM
16447 || sql_command == SQLCOM_CREATE_SEQUENCE
16448 || (sql_command == SQLCOM_ANALYZE && lock_type == TL_READ)
16449 || ((srv_locks_unsafe_for_binlog
16450 || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
16451 && trx->isolation_level != TRX_ISO_SERIALIZABLE
16452 && (lock_type == TL_READ
16453 || lock_type == TL_READ_NO_INSERT)
16454 && (sql_command == SQLCOM_INSERT_SELECT
16455 || sql_command == SQLCOM_REPLACE_SELECT
16456 || sql_command == SQLCOM_UPDATE
16457 || sql_command == SQLCOM_CREATE_SEQUENCE
16458 || sql_command == SQLCOM_CREATE_TABLE))) {
16459
16460 /* If we either have innobase_locks_unsafe_for_binlog
16461 option set or this session is using READ COMMITTED
16462 isolation level and isolation level of the transaction
16463 is not set to serializable and MySQL is doing
16464 INSERT INTO...SELECT or REPLACE INTO...SELECT
16465 or UPDATE ... = (SELECT ...) or CREATE ...
16466 SELECT... without FOR UPDATE or IN SHARE
16467 MODE in select, then we use consistent read
16468 for select. */
16469
16470 m_prebuilt->select_lock_type = LOCK_NONE;
16471 m_prebuilt->stored_select_lock_type = LOCK_NONE;
16472 } else {
16473 m_prebuilt->select_lock_type = LOCK_S;
16474 m_prebuilt->stored_select_lock_type = LOCK_S;
16475 }
16476
16477 } else if (lock_type != TL_IGNORE) {
16478
16479 /* We set possible LOCK_X value in external_lock, not yet
16480 here even if this would be SELECT ... FOR UPDATE */
16481
16482 m_prebuilt->select_lock_type = LOCK_NONE;
16483 m_prebuilt->stored_select_lock_type = LOCK_NONE;
16484 }
16485
16486 if (!trx_is_started(trx)
16487 && (m_prebuilt->select_lock_type != LOCK_NONE
16488 || m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
16489
16490 trx->will_lock = true;
16491 }
16492
16493 return(to);
16494 }
16495
16496 /*********************************************************************//**
16497 Read the next autoinc value. Acquire the relevant locks before reading
16498 the AUTOINC value. If SUCCESS then the table AUTOINC mutex will be locked
16499 on return and all relevant locks acquired.
16500 @return DB_SUCCESS or error code */
16501
16502 dberr_t
innobase_get_autoinc(ulonglong * value)16503 ha_innobase::innobase_get_autoinc(
16504 /*==============================*/
16505 ulonglong* value) /*!< out: autoinc value */
16506 {
16507 *value = 0;
16508
16509 m_prebuilt->autoinc_error = innobase_lock_autoinc();
16510
16511 if (m_prebuilt->autoinc_error == DB_SUCCESS) {
16512
16513 /* Determine the first value of the interval */
16514 *value = dict_table_autoinc_read(m_prebuilt->table);
16515
16516 /* It should have been initialized during open. */
16517 if (*value == 0) {
16518 m_prebuilt->autoinc_error = DB_UNSUPPORTED;
16519 m_prebuilt->table->autoinc_mutex.unlock();
16520 }
16521 }
16522
16523 return(m_prebuilt->autoinc_error);
16524 }
16525
16526 /*******************************************************************//**
16527 This function reads the global auto-inc counter. It doesn't use the
16528 AUTOINC lock even if the lock mode is set to TRADITIONAL.
16529 @return the autoinc value */
16530
16531 ulonglong
innobase_peek_autoinc(void)16532 ha_innobase::innobase_peek_autoinc(void)
16533 /*====================================*/
16534 {
16535 ulonglong auto_inc;
16536 dict_table_t* innodb_table;
16537
16538 ut_a(m_prebuilt != NULL);
16539 ut_a(m_prebuilt->table != NULL);
16540
16541 innodb_table = m_prebuilt->table;
16542
16543 innodb_table->autoinc_mutex.lock();
16544
16545 auto_inc = dict_table_autoinc_read(innodb_table);
16546
16547 if (auto_inc == 0) {
16548 ib::info() << "AUTOINC next value generation is disabled for"
16549 " '" << innodb_table->name << "'";
16550 }
16551
16552 innodb_table->autoinc_mutex.unlock();
16553
16554 return(auto_inc);
16555 }
16556
16557 /*********************************************************************//**
16558 Returns the value of the auto-inc counter in *first_value and ~0 on failure. */
16559
16560 void
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)16561 ha_innobase::get_auto_increment(
16562 /*============================*/
16563 ulonglong offset, /*!< in: table autoinc offset */
16564 ulonglong increment, /*!< in: table autoinc
16565 increment */
16566 ulonglong nb_desired_values, /*!< in: number of values
16567 reqd */
16568 ulonglong* first_value, /*!< out: the autoinc value */
16569 ulonglong* nb_reserved_values) /*!< out: count of reserved
16570 values */
16571 {
16572 trx_t* trx;
16573 dberr_t error;
16574 ulonglong autoinc = 0;
16575
16576 /* Prepare m_prebuilt->trx in the table handle */
16577 update_thd(ha_thd());
16578
16579 error = innobase_get_autoinc(&autoinc);
16580
16581 if (error != DB_SUCCESS) {
16582 *first_value = (~(ulonglong) 0);
16583 return;
16584 }
16585
16586 /* This is a hack, since nb_desired_values seems to be accurate only
16587 for the first call to get_auto_increment() for multi-row INSERT and
16588 meaningless for other statements e.g, LOAD etc. Subsequent calls to
16589 this method for the same statement results in different values which
16590 don't make sense. Therefore we store the value the first time we are
16591 called and count down from that as rows are written (see write_row()).
16592 */
16593
16594 trx = m_prebuilt->trx;
16595
16596 /* Note: We can't rely on *first_value since some MySQL engines,
16597 in particular the partition engine, don't initialize it to 0 when
16598 invoking this method. So we are not sure if it's guaranteed to
16599 be 0 or not. */
16600
16601 /* We need the upper limit of the col type to check for
16602 whether we update the table autoinc counter or not. */
16603 ulonglong col_max_value =
16604 table->next_number_field->get_max_int_value();
16605
16606 /** The following logic is needed to avoid duplicate key error
16607 for autoincrement column.
16608
16609 (1) InnoDB gives the current autoincrement value with respect
16610 to increment and offset value.
16611
16612 (2) Basically it does compute_next_insert_id() logic inside InnoDB
16613 to avoid the current auto increment value changed by handler layer.
16614
16615 (3) It is restricted only for insert operations. */
16616
16617 if (increment > 1 && thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE
16618 && autoinc < col_max_value) {
16619
16620 ulonglong prev_auto_inc = autoinc;
16621
16622 autoinc = ((autoinc - 1) + increment - offset)/ increment;
16623
16624 autoinc = autoinc * increment + offset;
16625
16626 /* If autoinc exceeds the col_max_value then reset
16627 to old autoinc value. Because in case of non-strict
16628 sql mode, boundary value is not considered as error. */
16629
16630 if (autoinc >= col_max_value) {
16631 autoinc = prev_auto_inc;
16632 }
16633
16634 ut_ad(autoinc > 0);
16635 }
16636
16637 /* Called for the first time ? */
16638 if (trx->n_autoinc_rows == 0) {
16639
16640 trx->n_autoinc_rows = (ulint) nb_desired_values;
16641
16642 /* It's possible for nb_desired_values to be 0:
16643 e.g., INSERT INTO T1(C) SELECT C FROM T2; */
16644 if (nb_desired_values == 0) {
16645
16646 trx->n_autoinc_rows = 1;
16647 }
16648
16649 set_if_bigger(*first_value, autoinc);
16650 /* Not in the middle of a mult-row INSERT. */
16651 } else if (m_prebuilt->autoinc_last_value == 0) {
16652 set_if_bigger(*first_value, autoinc);
16653 }
16654
16655 if (*first_value > col_max_value) {
16656 /* Out of range number. Let handler::update_auto_increment()
16657 take care of this */
16658 m_prebuilt->autoinc_last_value = 0;
16659 m_prebuilt->table->autoinc_mutex.unlock();
16660 *nb_reserved_values= 0;
16661 return;
16662 }
16663
16664 *nb_reserved_values = trx->n_autoinc_rows;
16665
16666 /* With old style AUTOINC locking we only update the table's
16667 AUTOINC counter after attempting to insert the row. */
16668 if (innobase_autoinc_lock_mode != AUTOINC_OLD_STYLE_LOCKING) {
16669 ulonglong current;
16670 ulonglong next_value;
16671
16672 current = *first_value;
16673
16674 /* Compute the last value in the interval */
16675 next_value = innobase_next_autoinc(
16676 current, *nb_reserved_values, increment, offset,
16677 col_max_value);
16678
16679 m_prebuilt->autoinc_last_value = next_value;
16680
16681 if (m_prebuilt->autoinc_last_value < *first_value) {
16682 *first_value = (~(ulonglong) 0);
16683 } else {
16684 /* Update the table autoinc variable */
16685 dict_table_autoinc_update_if_greater(
16686 m_prebuilt->table,
16687 m_prebuilt->autoinc_last_value);
16688 }
16689 } else {
16690 /* This will force write_row() into attempting an update
16691 of the table's AUTOINC counter. */
16692 m_prebuilt->autoinc_last_value = 0;
16693 }
16694
16695 /* The increment to be used to increase the AUTOINC value, we use
16696 this in write_row() and update_row() to increase the autoinc counter
16697 for columns that are filled by the user. We need the offset and
16698 the increment. */
16699 m_prebuilt->autoinc_offset = offset;
16700 m_prebuilt->autoinc_increment = increment;
16701
16702 m_prebuilt->table->autoinc_mutex.unlock();
16703 }
16704
16705 /*******************************************************************//**
16706 See comment in handler.cc */
16707
16708 bool
get_error_message(int error,String * buf)16709 ha_innobase::get_error_message(
16710 /*===========================*/
16711 int error,
16712 String* buf)
16713 {
16714 trx_t* trx = check_trx_exists(ha_thd());
16715
16716 if (error == HA_ERR_DECRYPTION_FAILED) {
16717 const char *msg = "Table encrypted but decryption failed. This could be because correct encryption management plugin is not loaded, used encryption key is not available or encryption method does not match.";
16718 buf->copy(msg, (uint)strlen(msg), system_charset_info);
16719 } else {
16720 buf->copy(trx->detailed_error, (uint) strlen(trx->detailed_error),
16721 system_charset_info);
16722 }
16723
16724 return(FALSE);
16725 }
16726
16727 /** Retrieves the names of the table and the key for which there was a
16728 duplicate entry in the case of HA_ERR_FOREIGN_DUPLICATE_KEY.
16729
16730 If any of the names is not available, then this method will return
16731 false and will not change any of child_table_name or child_key_name.
16732
16733 @param[out] child_table_name Table name
16734 @param[in] child_table_name_len Table name buffer size
16735 @param[out] child_key_name Key name
16736 @param[in] child_key_name_len Key name buffer size
16737
16738 @retval true table and key names were available and were written into the
16739 corresponding out parameters.
16740 @retval false table and key names were not available, the out parameters
16741 were not touched. */
16742 bool
get_foreign_dup_key(char * child_table_name,uint child_table_name_len,char * child_key_name,uint child_key_name_len)16743 ha_innobase::get_foreign_dup_key(
16744 /*=============================*/
16745 char* child_table_name,
16746 uint child_table_name_len,
16747 char* child_key_name,
16748 uint child_key_name_len)
16749 {
16750 const dict_index_t* err_index;
16751
16752 ut_a(m_prebuilt->trx != NULL);
16753 ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
16754
16755 err_index = trx_get_error_info(m_prebuilt->trx);
16756
16757 if (err_index == NULL) {
16758 return(false);
16759 }
16760 /* else */
16761
16762 /* copy table name (and convert from filename-safe encoding to
16763 system_charset_info) */
16764 char* p = strchr(err_index->table->name.m_name, '/');
16765
16766 /* strip ".../" prefix if any */
16767 if (p != NULL) {
16768 p++;
16769 } else {
16770 p = err_index->table->name.m_name;
16771 }
16772
16773 size_t len;
16774
16775 len = filename_to_tablename(p, child_table_name, child_table_name_len);
16776
16777 child_table_name[len] = '\0';
16778
16779 /* copy index name */
16780 snprintf(child_key_name, child_key_name_len, "%s",
16781 err_index->name());
16782
16783 return(true);
16784 }
16785
16786 /*******************************************************************//**
16787 Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
16788 If there is no explicitly declared non-null unique key or a primary key, then
16789 InnoDB internally uses the row id as the primary key.
16790 @return < 0 if ref1 < ref2, 0 if equal, else > 0 */
16791
16792 int
cmp_ref(const uchar * ref1,const uchar * ref2)16793 ha_innobase::cmp_ref(
16794 /*=================*/
16795 const uchar* ref1, /*!< in: an (internal) primary key value in the
16796 MySQL key value format */
16797 const uchar* ref2) /*!< in: an (internal) primary key value in the
16798 MySQL key value format */
16799 {
16800 enum_field_types mysql_type;
16801 Field* field;
16802 KEY_PART_INFO* key_part;
16803 KEY_PART_INFO* key_part_end;
16804 uint len1;
16805 uint len2;
16806 int result;
16807
16808 if (m_prebuilt->clust_index_was_generated) {
16809 /* The 'ref' is an InnoDB row id */
16810
16811 return(memcmp(ref1, ref2, DATA_ROW_ID_LEN));
16812 }
16813
16814 /* Do a type-aware comparison of primary key fields. PK fields
16815 are always NOT NULL, so no checks for NULL are performed. */
16816
16817 key_part = table->key_info[table->s->primary_key].key_part;
16818
16819 key_part_end = key_part
16820 + table->key_info[table->s->primary_key].user_defined_key_parts;
16821
16822 for (; key_part != key_part_end; ++key_part) {
16823 field = key_part->field;
16824 mysql_type = field->type();
16825
16826 if (mysql_type == MYSQL_TYPE_TINY_BLOB
16827 || mysql_type == MYSQL_TYPE_MEDIUM_BLOB
16828 || mysql_type == MYSQL_TYPE_BLOB
16829 || mysql_type == MYSQL_TYPE_LONG_BLOB) {
16830
16831 /* In the MySQL key value format, a column prefix of
16832 a BLOB is preceded by a 2-byte length field */
16833
16834 len1 = innobase_read_from_2_little_endian(ref1);
16835 len2 = innobase_read_from_2_little_endian(ref2);
16836
16837 result = ((Field_blob*) field)->cmp(
16838 ref1 + 2, len1, ref2 + 2, len2);
16839 } else {
16840 result = field->key_cmp(ref1, ref2);
16841 }
16842
16843 if (result) {
16844
16845 return(result);
16846 }
16847
16848 ref1 += key_part->store_length;
16849 ref2 += key_part->store_length;
16850 }
16851
16852 return(0);
16853 }
16854
16855 /*******************************************************************//**
16856 Ask InnoDB if a query to a table can be cached.
16857 @return TRUE if query caching of the table is permitted */
16858
16859 my_bool
register_query_cache_table(THD * thd,const char * table_key,uint key_length,qc_engine_callback * call_back,ulonglong * engine_data)16860 ha_innobase::register_query_cache_table(
16861 /*====================================*/
16862 THD* thd, /*!< in: user thread handle */
16863 const char* table_key, /*!< in: normalized path to the
16864 table */
16865 uint key_length, /*!< in: length of the normalized
16866 path to the table */
16867 qc_engine_callback*
16868 call_back, /*!< out: pointer to function for
16869 checking if query caching
16870 is permitted */
16871 ulonglong *engine_data) /*!< in/out: data to call_back */
16872 {
16873 *engine_data = 0;
16874 *call_back = innobase_query_caching_of_table_permitted;
16875
16876 return(innobase_query_caching_of_table_permitted(
16877 thd, table_key,
16878 static_cast<uint>(key_length),
16879 engine_data));
16880 }
16881
16882 /******************************************************************//**
16883 This function is used to find the storage length in bytes of the first n
16884 characters for prefix indexes using a multibyte character set. The function
16885 finds charset information and returns length of prefix_len characters in the
16886 index field in bytes.
16887 @return number of bytes occupied by the first n characters */
16888 ulint
innobase_get_at_most_n_mbchars(ulint charset_id,ulint prefix_len,ulint data_len,const char * str)16889 innobase_get_at_most_n_mbchars(
16890 /*===========================*/
16891 ulint charset_id, /*!< in: character set id */
16892 ulint prefix_len, /*!< in: prefix length in bytes of the index
16893 (this has to be divided by mbmaxlen to get the
16894 number of CHARACTERS n in the prefix) */
16895 ulint data_len, /*!< in: length of the string in bytes */
16896 const char* str) /*!< in: character string */
16897 {
16898 ulint char_length; /*!< character length in bytes */
16899 ulint n_chars; /*!< number of characters in prefix */
16900 CHARSET_INFO* charset; /*!< charset used in the field */
16901
16902 charset = get_charset((uint) charset_id, MYF(MY_WME));
16903
16904 ut_ad(charset);
16905 ut_ad(charset->mbmaxlen);
16906
16907 /* Calculate how many characters at most the prefix index contains */
16908
16909 n_chars = prefix_len / charset->mbmaxlen;
16910
16911 /* If the charset is multi-byte, then we must find the length of the
16912 first at most n chars in the string. If the string contains less
16913 characters than n, then we return the length to the end of the last
16914 character. */
16915
16916 if (charset->mbmaxlen > 1) {
16917 /* my_charpos() returns the byte length of the first n_chars
16918 characters, or a value bigger than the length of str, if
16919 there were not enough full characters in str.
16920
16921 Why does the code below work:
16922 Suppose that we are looking for n UTF-8 characters.
16923
16924 1) If the string is long enough, then the prefix contains at
16925 least n complete UTF-8 characters + maybe some extra
16926 characters + an incomplete UTF-8 character. No problem in
16927 this case. The function returns the pointer to the
16928 end of the nth character.
16929
16930 2) If the string is not long enough, then the string contains
16931 the complete value of a column, that is, only complete UTF-8
16932 characters, and we can store in the column prefix index the
16933 whole string. */
16934
16935 char_length= my_charpos(charset, str, str + data_len, n_chars);
16936 if (char_length > data_len) {
16937 char_length = data_len;
16938 }
16939 } else if (data_len < prefix_len) {
16940
16941 char_length = data_len;
16942
16943 } else {
16944
16945 char_length = prefix_len;
16946 }
16947
16948 return(char_length);
16949 }
16950
16951 /*******************************************************************//**
16952 This function is used to prepare an X/Open XA distributed transaction.
16953 @return 0 or error number */
16954 static
16955 int
innobase_xa_prepare(handlerton * hton,THD * thd,bool prepare_trx)16956 innobase_xa_prepare(
16957 /*================*/
16958 handlerton* hton, /*!< in: InnoDB handlerton */
16959 THD* thd, /*!< in: handle to the MySQL thread of
16960 the user whose XA transaction should
16961 be prepared */
16962 bool prepare_trx) /*!< in: true - prepare transaction
16963 false - the current SQL statement
16964 ended */
16965 {
16966 trx_t* trx = check_trx_exists(thd);
16967
16968 DBUG_ASSERT(hton == innodb_hton_ptr);
16969
16970 thd_get_xid(thd, (MYSQL_XID*) trx->xid);
16971
16972 innobase_srv_conc_force_exit_innodb(trx);
16973
16974 if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
16975
16976 sql_print_error("Transaction not registered for MariaDB 2PC,"
16977 " but transaction is active");
16978 }
16979
16980 if (prepare_trx
16981 || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
16982
16983 /* We were instructed to prepare the whole transaction, or
16984 this is an SQL statement end and autocommit is on */
16985
16986 ut_ad(trx_is_registered_for_2pc(trx));
16987
16988 trx_prepare_for_mysql(trx);
16989 } else {
16990 /* We just mark the SQL statement ended and do not do a
16991 transaction prepare */
16992
16993 /* If we had reserved the auto-inc lock for some
16994 table in this SQL statement we release it now */
16995
16996 lock_unlock_table_autoinc(trx);
16997
16998 /* Store the current undo_no of the transaction so that we
16999 know where to roll back if we have to roll back the next
17000 SQL statement */
17001
17002 trx_mark_sql_stat_end(trx);
17003 }
17004
17005 if (thd_sql_command(thd) != SQLCOM_XA_PREPARE
17006 && (prepare_trx
17007 || !thd_test_options(
17008 thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
17009
17010 /* For mysqlbackup to work the order of transactions in binlog
17011 and InnoDB must be the same. Consider the situation
17012
17013 thread1> prepare; write to binlog; ...
17014 <context switch>
17015 thread2> prepare; write to binlog; commit
17016 thread1> ... commit
17017
17018 The server guarantees that writes to the binary log
17019 and commits are in the same order, so we do not have
17020 to handle this case. */
17021 }
17022
17023 return(0);
17024 }
17025
17026 /*******************************************************************//**
17027 This function is used to recover X/Open XA distributed transactions.
17028 @return number of prepared transactions stored in xid_list */
17029 static
17030 int
innobase_xa_recover(handlerton * hton,XID * xid_list,uint len)17031 innobase_xa_recover(
17032 /*================*/
17033 handlerton* hton, /*!< in: InnoDB handlerton */
17034 XID* xid_list,/*!< in/out: prepared transactions */
17035 uint len) /*!< in: number of slots in xid_list */
17036 {
17037 DBUG_ASSERT(hton == innodb_hton_ptr);
17038
17039 if (len == 0 || xid_list == NULL) {
17040
17041 return(0);
17042 }
17043
17044 return(trx_recover_for_mysql(xid_list, len));
17045 }
17046
17047 /*******************************************************************//**
17048 This function is used to commit one X/Open XA distributed transaction
17049 which is in the prepared state
17050 @return 0 or error number */
17051 static
17052 int
innobase_commit_by_xid(handlerton * hton,XID * xid)17053 innobase_commit_by_xid(
17054 /*===================*/
17055 handlerton* hton,
17056 XID* xid) /*!< in: X/Open XA transaction identification */
17057 {
17058 DBUG_ASSERT(hton == innodb_hton_ptr);
17059
17060 if (high_level_read_only) {
17061 return(XAER_RMFAIL);
17062 }
17063
17064 if (trx_t* trx = trx_get_trx_by_xid(xid)) {
17065 /* use cases are: disconnected xa, slave xa, recovery */
17066 innobase_commit_low(trx);
17067 ut_ad(trx->mysql_thd == NULL);
17068 trx_deregister_from_2pc(trx);
17069 ut_ad(!trx->will_lock); /* trx cache requirement */
17070 trx->free();
17071
17072 return(XA_OK);
17073 } else {
17074 return(XAER_NOTA);
17075 }
17076 }
17077
17078 /** This function is used to rollback one X/Open XA distributed transaction
17079 which is in the prepared state
17080
17081 @param[in] hton InnoDB handlerton
17082 @param[in] xid X/Open XA transaction identification
17083
17084 @return 0 or error number */
innobase_rollback_by_xid(handlerton * hton,XID * xid)17085 int innobase_rollback_by_xid(handlerton* hton, XID* xid)
17086 {
17087 DBUG_ASSERT(hton == innodb_hton_ptr);
17088
17089 if (high_level_read_only) {
17090 return(XAER_RMFAIL);
17091 }
17092
17093 if (trx_t* trx = trx_get_trx_by_xid(xid)) {
17094 #ifdef WITH_WSREP
17095 /* If a wsrep transaction is being rolled back during
17096 the recovery, we must clear the xid in order to avoid
17097 writing serialisation history for rolled back transaction. */
17098 if (wsrep_is_wsrep_xid(trx->xid)) {
17099 trx->xid->null();
17100 }
17101 #endif /* WITH_WSREP */
17102 int ret = innobase_rollback_trx(trx);
17103 trx_deregister_from_2pc(trx);
17104 ut_ad(!trx->will_lock);
17105 trx->free();
17106
17107 return(ret);
17108 } else {
17109 return(XAER_NOTA);
17110 }
17111 }
17112
17113 bool
check_if_incompatible_data(HA_CREATE_INFO * info,uint table_changes)17114 ha_innobase::check_if_incompatible_data(
17115 /*====================================*/
17116 HA_CREATE_INFO* info,
17117 uint table_changes)
17118 {
17119 ha_table_option_struct *param_old, *param_new;
17120
17121 /* Cache engine specific options */
17122 param_new = info->option_struct;
17123 param_old = table->s->option_struct;
17124
17125 innobase_copy_frm_flags_from_create_info(m_prebuilt->table, info);
17126
17127 if (table_changes != IS_EQUAL_YES) {
17128
17129 return(COMPATIBLE_DATA_NO);
17130 }
17131
17132 /* Check that auto_increment value was not changed */
17133 if ((info->used_fields & HA_CREATE_USED_AUTO)
17134 && info->auto_increment_value != 0) {
17135
17136 return(COMPATIBLE_DATA_NO);
17137 }
17138
17139 /* Check that row format didn't change */
17140 if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT)
17141 && info->row_type != get_row_type()) {
17142
17143 return(COMPATIBLE_DATA_NO);
17144 }
17145
17146 /* Specifying KEY_BLOCK_SIZE requests a rebuild of the table. */
17147 if (info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE) {
17148 return(COMPATIBLE_DATA_NO);
17149 }
17150
17151 /* Changes on engine specific table options requests a rebuild of the table. */
17152 if (param_new->page_compressed != param_old->page_compressed ||
17153 param_new->page_compression_level != param_old->page_compression_level)
17154 {
17155 return(COMPATIBLE_DATA_NO);
17156 }
17157
17158 return(COMPATIBLE_DATA_YES);
17159 }
17160
17161 /****************************************************************//**
17162 Update the system variable innodb_io_capacity_max using the "saved"
17163 value. This function is registered as a callback with MySQL. */
17164 static
17165 void
innodb_io_capacity_max_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17166 innodb_io_capacity_max_update(
17167 /*===========================*/
17168 THD* thd, /*!< in: thread handle */
17169 st_mysql_sys_var*, void*,
17170 const void* save) /*!< in: immediate result
17171 from check function */
17172 {
17173 ulong in_val = *static_cast<const ulong*>(save);
17174
17175 if (in_val < srv_io_capacity) {
17176 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17177 ER_WRONG_ARGUMENTS,
17178 "Setting innodb_io_capacity_max %lu"
17179 " lower than innodb_io_capacity %lu.",
17180 in_val, srv_io_capacity);
17181
17182 srv_io_capacity = in_val;
17183
17184 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17185 ER_WRONG_ARGUMENTS,
17186 "Setting innodb_io_capacity to %lu",
17187 srv_io_capacity);
17188 }
17189
17190 srv_max_io_capacity = in_val;
17191 }
17192
17193 /****************************************************************//**
17194 Update the system variable innodb_io_capacity using the "saved"
17195 value. This function is registered as a callback with MySQL. */
17196 static
17197 void
innodb_io_capacity_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17198 innodb_io_capacity_update(
17199 /*======================*/
17200 THD* thd, /*!< in: thread handle */
17201 st_mysql_sys_var*, void*,
17202 const void* save) /*!< in: immediate result
17203 from check function */
17204 {
17205 ulong in_val = *static_cast<const ulong*>(save);
17206
17207 if (in_val > srv_max_io_capacity) {
17208 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17209 ER_WRONG_ARGUMENTS,
17210 "Setting innodb_io_capacity to %lu"
17211 " higher than innodb_io_capacity_max %lu",
17212 in_val, srv_max_io_capacity);
17213
17214 srv_max_io_capacity = (in_val & ~(~0UL >> 1))
17215 ? in_val : in_val * 2;
17216
17217 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17218 ER_WRONG_ARGUMENTS,
17219 "Setting innodb_max_io_capacity to %lu",
17220 srv_max_io_capacity);
17221 }
17222
17223 srv_io_capacity = in_val;
17224 }
17225
17226 /****************************************************************//**
17227 Update the system variable innodb_max_dirty_pages_pct using the "saved"
17228 value. This function is registered as a callback with MySQL. */
17229 static
17230 void
innodb_max_dirty_pages_pct_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17231 innodb_max_dirty_pages_pct_update(
17232 /*==============================*/
17233 THD* thd, /*!< in: thread handle */
17234 st_mysql_sys_var*, void*,
17235 const void* save) /*!< in: immediate result
17236 from check function */
17237 {
17238 double in_val = *static_cast<const double*>(save);
17239 if (in_val < srv_max_dirty_pages_pct_lwm) {
17240 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17241 ER_WRONG_ARGUMENTS,
17242 "innodb_max_dirty_pages_pct cannot be"
17243 " set lower than"
17244 " innodb_max_dirty_pages_pct_lwm.");
17245 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17246 ER_WRONG_ARGUMENTS,
17247 "Lowering"
17248 " innodb_max_dirty_page_pct_lwm to %lf",
17249 in_val);
17250
17251 srv_max_dirty_pages_pct_lwm = in_val;
17252 }
17253
17254 srv_max_buf_pool_modified_pct = in_val;
17255 }
17256
17257 /****************************************************************//**
17258 Update the system variable innodb_max_dirty_pages_pct_lwm using the
17259 "saved" value. This function is registered as a callback with MySQL. */
17260 static
17261 void
innodb_max_dirty_pages_pct_lwm_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17262 innodb_max_dirty_pages_pct_lwm_update(
17263 /*==================================*/
17264 THD* thd, /*!< in: thread handle */
17265 st_mysql_sys_var*, void*,
17266 const void* save) /*!< in: immediate result
17267 from check function */
17268 {
17269 double in_val = *static_cast<const double*>(save);
17270 if (in_val > srv_max_buf_pool_modified_pct) {
17271 in_val = srv_max_buf_pool_modified_pct;
17272 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17273 ER_WRONG_ARGUMENTS,
17274 "innodb_max_dirty_pages_pct_lwm"
17275 " cannot be set higher than"
17276 " innodb_max_dirty_pages_pct.");
17277 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17278 ER_WRONG_ARGUMENTS,
17279 "Setting innodb_max_dirty_page_pct_lwm"
17280 " to %lf",
17281 in_val);
17282 }
17283
17284 srv_max_dirty_pages_pct_lwm = in_val;
17285 }
17286
17287 /*************************************************************//**
17288 Don't allow to set innodb_fast_shutdown=0 if purge threads are
17289 already down.
17290 @return 0 if innodb_fast_shutdown can be set */
17291 static
17292 int
fast_shutdown_validate(THD * thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)17293 fast_shutdown_validate(
17294 /*=============================*/
17295 THD* thd, /*!< in: thread handle */
17296 struct st_mysql_sys_var* var, /*!< in: pointer to system
17297 variable */
17298 void* save, /*!< out: immediate result
17299 for update function */
17300 struct st_mysql_value* value) /*!< in: incoming string */
17301 {
17302 if (check_sysvar_int(thd, var, save, value)) {
17303 return(1);
17304 }
17305
17306 uint new_val = *reinterpret_cast<uint*>(save);
17307
17308 if (srv_fast_shutdown && !new_val
17309 && !srv_running.load(std::memory_order_relaxed)) {
17310 return(1);
17311 }
17312
17313 return(0);
17314 }
17315
17316 /*************************************************************//**
17317 Check whether valid argument given to innobase_*_stopword_table.
17318 This function is registered as a callback with MySQL.
17319 @return 0 for valid stopword table */
17320 static
17321 int
innodb_stopword_table_validate(THD * thd,st_mysql_sys_var *,void * save,struct st_mysql_value * value)17322 innodb_stopword_table_validate(
17323 /*===========================*/
17324 THD* thd, /*!< in: thread handle */
17325 st_mysql_sys_var*,
17326 void* save, /*!< out: immediate result
17327 for update function */
17328 struct st_mysql_value* value) /*!< in: incoming string */
17329 {
17330 const char* stopword_table_name;
17331 char buff[STRING_BUFFER_USUAL_SIZE];
17332 int len = sizeof(buff);
17333 trx_t* trx;
17334
17335 ut_a(save != NULL);
17336 ut_a(value != NULL);
17337
17338 stopword_table_name = value->val_str(value, buff, &len);
17339
17340 trx = check_trx_exists(thd);
17341
17342 row_mysql_lock_data_dictionary(trx);
17343
17344 /* Validate the stopword table's (if supplied) existence and
17345 of the right format */
17346 int ret = stopword_table_name && !fts_valid_stopword_table(
17347 stopword_table_name);
17348
17349 row_mysql_unlock_data_dictionary(trx);
17350
17351 if (!ret) {
17352 if (stopword_table_name == buff) {
17353 ut_ad(static_cast<size_t>(len) < sizeof buff);
17354 stopword_table_name = thd_strmake(thd,
17355 stopword_table_name,
17356 len);
17357 }
17358
17359 *static_cast<const char**>(save) = stopword_table_name;
17360 }
17361
17362 return(ret);
17363 }
17364
17365 /** Update the system variable innodb_buffer_pool_size using the "saved"
17366 value. This function is registered as a callback with MySQL.
17367 @param[in] save immediate result from check function */
17368 static
17369 void
innodb_buffer_pool_size_update(THD *,st_mysql_sys_var *,void *,const void * save)17370 innodb_buffer_pool_size_update(THD*,st_mysql_sys_var*,void*, const void* save)
17371 {
17372 longlong in_val = *static_cast<const longlong*>(save);
17373
17374 snprintf(export_vars.innodb_buffer_pool_resize_status,
17375 sizeof(export_vars.innodb_buffer_pool_resize_status),
17376 "Requested to resize buffer pool.");
17377
17378 os_event_set(srv_buf_resize_event);
17379
17380 ib::info() << export_vars.innodb_buffer_pool_resize_status
17381 << " (new size: " << in_val << " bytes)";
17382 }
17383
17384 /** The latest assigned innodb_ft_aux_table name */
17385 static char* innodb_ft_aux_table;
17386
17387 /** Update innodb_ft_aux_table_id on SET GLOBAL innodb_ft_aux_table.
17388 @param[in,out] thd connection
17389 @param[out] save new value of innodb_ft_aux_table
17390 @param[in] value user-specified value */
innodb_ft_aux_table_validate(THD * thd,st_mysql_sys_var *,void * save,st_mysql_value * value)17391 static int innodb_ft_aux_table_validate(THD *thd, st_mysql_sys_var*,
17392 void* save, st_mysql_value* value)
17393 {
17394 char buf[STRING_BUFFER_USUAL_SIZE];
17395 int len = sizeof buf;
17396
17397 if (const char* table_name = value->val_str(value, buf, &len)) {
17398 if (dict_table_t* table = dict_table_open_on_name(
17399 table_name, FALSE, TRUE, DICT_ERR_IGNORE_NONE)) {
17400 const table_id_t id = dict_table_has_fts_index(table)
17401 ? table->id : 0;
17402 dict_table_close(table, FALSE, FALSE);
17403 if (id) {
17404 innodb_ft_aux_table_id = id;
17405 if (table_name == buf) {
17406 ut_ad(static_cast<size_t>(len)
17407 < sizeof buf);
17408 table_name = thd_strmake(thd,
17409 table_name,
17410 len);
17411 }
17412
17413
17414 *static_cast<const char**>(save) = table_name;
17415 return 0;
17416 }
17417 }
17418
17419 return 1;
17420 } else {
17421 *static_cast<char**>(save) = NULL;
17422 innodb_ft_aux_table_id = 0;
17423 return 0;
17424 }
17425 }
17426
17427 #ifdef BTR_CUR_HASH_ADAPT
17428 /****************************************************************//**
17429 Update the system variable innodb_adaptive_hash_index using the "saved"
17430 value. This function is registered as a callback with MySQL. */
17431 static
17432 void
innodb_adaptive_hash_index_update(THD *,st_mysql_sys_var *,void *,const void * save)17433 innodb_adaptive_hash_index_update(THD*, st_mysql_sys_var*, void*,
17434 const void* save)
17435 {
17436 mysql_mutex_unlock(&LOCK_global_system_variables);
17437 if (*(my_bool*) save) {
17438 btr_search_enable();
17439 } else {
17440 btr_search_disable();
17441 }
17442 mysql_mutex_lock(&LOCK_global_system_variables);
17443 }
17444 #endif /* BTR_CUR_HASH_ADAPT */
17445
17446 /****************************************************************//**
17447 Update the system variable innodb_cmp_per_index using the "saved"
17448 value. This function is registered as a callback with MySQL. */
17449 static
17450 void
innodb_cmp_per_index_update(THD *,st_mysql_sys_var *,void *,const void * save)17451 innodb_cmp_per_index_update(THD*, st_mysql_sys_var*, void*, const void* save)
17452 {
17453 /* Reset the stats whenever we enable the table
17454 INFORMATION_SCHEMA.innodb_cmp_per_index. */
17455 if (!srv_cmp_per_index_enabled && *(my_bool*) save) {
17456 mysql_mutex_unlock(&LOCK_global_system_variables);
17457 page_zip_reset_stat_per_index();
17458 mysql_mutex_lock(&LOCK_global_system_variables);
17459 }
17460
17461 srv_cmp_per_index_enabled = !!(*(my_bool*) save);
17462 }
17463
17464 /****************************************************************//**
17465 Update the system variable innodb_old_blocks_pct using the "saved"
17466 value. This function is registered as a callback with MySQL. */
17467 static
17468 void
innodb_old_blocks_pct_update(THD *,st_mysql_sys_var *,void *,const void * save)17469 innodb_old_blocks_pct_update(THD*, st_mysql_sys_var*, void*, const void* save)
17470 {
17471 mysql_mutex_unlock(&LOCK_global_system_variables);
17472 uint ratio = buf_LRU_old_ratio_update(*static_cast<const uint*>(save),
17473 true);
17474 mysql_mutex_lock(&LOCK_global_system_variables);
17475 innobase_old_blocks_pct = ratio;
17476 }
17477
17478 /****************************************************************//**
17479 Update the system variable innodb_old_blocks_pct using the "saved"
17480 value. This function is registered as a callback with MySQL. */
17481 static
17482 void
innodb_change_buffer_max_size_update(THD *,st_mysql_sys_var *,void *,const void * save)17483 innodb_change_buffer_max_size_update(THD*, st_mysql_sys_var*, void*,
17484 const void* save)
17485 {
17486 srv_change_buffer_max_size = *static_cast<const uint*>(save);
17487 mysql_mutex_unlock(&LOCK_global_system_variables);
17488 ibuf_max_size_update(srv_change_buffer_max_size);
17489 mysql_mutex_lock(&LOCK_global_system_variables);
17490 }
17491
17492 #ifdef UNIV_DEBUG
17493 static ulong srv_fil_make_page_dirty_debug = 0;
17494 static ulong srv_saved_page_number_debug = 0;
17495
17496 /****************************************************************//**
17497 Save an InnoDB page number. */
17498 static
17499 void
innodb_save_page_no(THD *,st_mysql_sys_var *,void *,const void * save)17500 innodb_save_page_no(THD*, st_mysql_sys_var*, void*, const void* save)
17501 {
17502 srv_saved_page_number_debug = *static_cast<const ulong*>(save);
17503
17504 ib::info() << "Saving InnoDB page number: "
17505 << srv_saved_page_number_debug;
17506 }
17507
17508 /****************************************************************//**
17509 Make the first page of given user tablespace dirty. */
17510 static
17511 void
innodb_make_page_dirty(THD *,st_mysql_sys_var *,void *,const void * save)17512 innodb_make_page_dirty(THD*, st_mysql_sys_var*, void*, const void* save)
17513 {
17514 mtr_t mtr;
17515 ulong space_id = *static_cast<const ulong*>(save);
17516 mysql_mutex_unlock(&LOCK_global_system_variables);
17517 fil_space_t* space = fil_space_acquire_silent(space_id);
17518
17519 if (space == NULL) {
17520 func_exit_no_space:
17521 mysql_mutex_lock(&LOCK_global_system_variables);
17522 return;
17523 }
17524
17525 if (srv_saved_page_number_debug >= space->size) {
17526 func_exit:
17527 space->release();
17528 goto func_exit_no_space;
17529 }
17530
17531 mtr.start();
17532 mtr.set_named_space(space);
17533
17534 buf_block_t* block = buf_page_get(
17535 page_id_t(space_id, srv_saved_page_number_debug),
17536 space->zip_size(), RW_X_LATCH, &mtr);
17537
17538 if (block != NULL) {
17539 byte* page = block->frame;
17540
17541 ib::info() << "Dirtying page: " << page_id_t(
17542 page_get_space_id(page), page_get_page_no(page));
17543
17544 mlog_write_ulint(page + FIL_PAGE_TYPE,
17545 fil_page_get_type(page),
17546 MLOG_2BYTES, &mtr);
17547 }
17548 mtr.commit();
17549 goto func_exit;
17550 }
17551 #endif // UNIV_DEBUG
17552 /*************************************************************//**
17553 Just emit a warning that the usage of the variable is deprecated.
17554 @return 0 */
17555 static
17556 void
innodb_stats_sample_pages_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17557 innodb_stats_sample_pages_update(
17558 /*=============================*/
17559 THD* thd, /*!< in: thread handle */
17560 st_mysql_sys_var*, void*,
17561 const void* save) /*!< in: immediate result
17562 from check function */
17563 {
17564
17565 const char* STATS_SAMPLE_PAGES_DEPRECATED_MSG =
17566 "Using innodb_stats_sample_pages is deprecated and"
17567 " the variable may be removed in future releases."
17568 " Please use innodb_stats_transient_sample_pages instead.";
17569
17570 push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
17571 HA_ERR_WRONG_COMMAND, STATS_SAMPLE_PAGES_DEPRECATED_MSG);
17572
17573 ib::warn() << STATS_SAMPLE_PAGES_DEPRECATED_MSG;
17574
17575 srv_stats_transient_sample_pages =
17576 *static_cast<const unsigned long long*>(save);
17577 }
17578
17579 /****************************************************************//**
17580 Update the monitor counter according to the "set_option", turn
17581 on/off or reset specified monitor counter. */
17582 static
17583 void
innodb_monitor_set_option(const monitor_info_t * monitor_info,mon_option_t set_option)17584 innodb_monitor_set_option(
17585 /*======================*/
17586 const monitor_info_t* monitor_info,/*!< in: monitor info for the monitor
17587 to set */
17588 mon_option_t set_option) /*!< in: Turn on/off reset the
17589 counter */
17590 {
17591 monitor_id_t monitor_id = monitor_info->monitor_id;
17592
17593 /* If module type is MONITOR_GROUP_MODULE, it cannot be
17594 turned on/off individually. It should never use this
17595 function to set options */
17596 ut_a(!(monitor_info->monitor_type & MONITOR_GROUP_MODULE));
17597
17598 switch (set_option) {
17599 case MONITOR_TURN_ON:
17600 MONITOR_ON(monitor_id);
17601 MONITOR_INIT(monitor_id);
17602 MONITOR_SET_START(monitor_id);
17603
17604 /* If the monitor to be turned on uses
17605 exisitng monitor counter (status variable),
17606 make special processing to remember existing
17607 counter value. */
17608 if (monitor_info->monitor_type & MONITOR_EXISTING) {
17609 srv_mon_process_existing_counter(
17610 monitor_id, MONITOR_TURN_ON);
17611 }
17612
17613 if (MONITOR_IS_ON(MONITOR_LATCHES)) {
17614
17615 mutex_monitor.enable();
17616 }
17617 break;
17618
17619 case MONITOR_TURN_OFF:
17620 if (monitor_info->monitor_type & MONITOR_EXISTING) {
17621 srv_mon_process_existing_counter(
17622 monitor_id, MONITOR_TURN_OFF);
17623 }
17624
17625 MONITOR_OFF(monitor_id);
17626 MONITOR_SET_OFF(monitor_id);
17627
17628 if (!MONITOR_IS_ON(MONITOR_LATCHES)) {
17629
17630 mutex_monitor.disable();
17631 }
17632 break;
17633
17634 case MONITOR_RESET_VALUE:
17635 srv_mon_reset(monitor_id);
17636
17637 if (monitor_id == (MONITOR_LATCHES)) {
17638
17639 mutex_monitor.reset();
17640 }
17641 break;
17642
17643 case MONITOR_RESET_ALL_VALUE:
17644 srv_mon_reset_all(monitor_id);
17645 mutex_monitor.reset();
17646 break;
17647
17648 default:
17649 ut_error;
17650 }
17651 }
17652
17653 /****************************************************************//**
17654 Find matching InnoDB monitor counters and update their status
17655 according to the "set_option", turn on/off or reset specified
17656 monitor counter. */
17657 static
17658 void
innodb_monitor_update_wildcard(const char * name,mon_option_t set_option)17659 innodb_monitor_update_wildcard(
17660 /*===========================*/
17661 const char* name, /*!< in: monitor name to match */
17662 mon_option_t set_option) /*!< in: the set option, whether
17663 to turn on/off or reset the counter */
17664 {
17665 ut_a(name);
17666
17667 for (ulint use = 0; use < NUM_MONITOR; use++) {
17668 ulint type;
17669 monitor_id_t monitor_id = static_cast<monitor_id_t>(use);
17670 monitor_info_t* monitor_info;
17671
17672 if (!innobase_wildcasecmp(
17673 srv_mon_get_name(monitor_id), name)) {
17674 monitor_info = srv_mon_get_info(monitor_id);
17675
17676 type = monitor_info->monitor_type;
17677
17678 /* If the monitor counter is of MONITOR_MODULE
17679 type, skip it. Except for those also marked with
17680 MONITOR_GROUP_MODULE flag, which can be turned
17681 on only as a module. */
17682 if (!(type & MONITOR_MODULE)
17683 && !(type & MONITOR_GROUP_MODULE)) {
17684 innodb_monitor_set_option(monitor_info,
17685 set_option);
17686 }
17687
17688 /* Need to special handle counters marked with
17689 MONITOR_GROUP_MODULE, turn on the whole module if
17690 any one of it comes here. Currently, only
17691 "module_buf_page" is marked with MONITOR_GROUP_MODULE */
17692 if (type & MONITOR_GROUP_MODULE) {
17693 if ((monitor_id >= MONITOR_MODULE_BUF_PAGE)
17694 && (monitor_id < MONITOR_MODULE_OS)) {
17695 if (set_option == MONITOR_TURN_ON
17696 && MONITOR_IS_ON(
17697 MONITOR_MODULE_BUF_PAGE)) {
17698 continue;
17699 }
17700
17701 srv_mon_set_module_control(
17702 MONITOR_MODULE_BUF_PAGE,
17703 set_option);
17704 } else {
17705 /* If new monitor is added with
17706 MONITOR_GROUP_MODULE, it needs
17707 to be added here. */
17708 ut_ad(0);
17709 }
17710 }
17711 }
17712 }
17713 }
17714
17715 /*************************************************************//**
17716 Given a configuration variable name, find corresponding monitor counter
17717 and return its monitor ID if found.
17718 @return monitor ID if found, MONITOR_NO_MATCH if there is no match */
17719 static
17720 ulint
innodb_monitor_id_by_name_get(const char * name)17721 innodb_monitor_id_by_name_get(
17722 /*==========================*/
17723 const char* name) /*!< in: monitor counter namer */
17724 {
17725 ut_a(name);
17726
17727 /* Search for wild character '%' in the name, if
17728 found, we treat it as a wildcard match. We do not search for
17729 single character wildcard '_' since our monitor names already contain
17730 such character. To avoid confusion, we request user must include
17731 at least one '%' character to activate the wildcard search. */
17732 if (strchr(name, '%')) {
17733 return(MONITOR_WILDCARD_MATCH);
17734 }
17735
17736 /* Not wildcard match, check for an exact match */
17737 for (ulint i = 0; i < NUM_MONITOR; i++) {
17738 if (!innobase_strcasecmp(
17739 name, srv_mon_get_name(static_cast<monitor_id_t>(i)))) {
17740 return(i);
17741 }
17742 }
17743
17744 return(MONITOR_NO_MATCH);
17745 }
17746 /*************************************************************//**
17747 Validate that the passed in monitor name matches at least one
17748 monitor counter name with wildcard compare.
17749 @return TRUE if at least one monitor name matches */
17750 static
17751 ibool
innodb_monitor_validate_wildcard_name(const char * name)17752 innodb_monitor_validate_wildcard_name(
17753 /*==================================*/
17754 const char* name) /*!< in: monitor counter namer */
17755 {
17756 for (ulint i = 0; i < NUM_MONITOR; i++) {
17757 if (!innobase_wildcasecmp(
17758 srv_mon_get_name(static_cast<monitor_id_t>(i)), name)) {
17759 return(TRUE);
17760 }
17761 }
17762
17763 return(FALSE);
17764 }
17765 /*************************************************************//**
17766 Validate the passed in monitor name, find and save the
17767 corresponding monitor name in the function parameter "save".
17768 @return 0 if monitor name is valid */
17769 static
17770 int
innodb_monitor_valid_byname(void * save,const char * name)17771 innodb_monitor_valid_byname(
17772 /*========================*/
17773 void* save, /*!< out: immediate result
17774 for update function */
17775 const char* name) /*!< in: incoming monitor name */
17776 {
17777 ulint use;
17778 monitor_info_t* monitor_info;
17779
17780 if (!name) {
17781 return(1);
17782 }
17783
17784 use = innodb_monitor_id_by_name_get(name);
17785
17786 /* No monitor name matches, nor it is wildcard match */
17787 if (use == MONITOR_NO_MATCH) {
17788 return(1);
17789 }
17790
17791 if (use < NUM_MONITOR) {
17792 monitor_info = srv_mon_get_info((monitor_id_t) use);
17793
17794 /* If the monitor counter is marked with
17795 MONITOR_GROUP_MODULE flag, then this counter
17796 cannot be turned on/off individually, instead
17797 it shall be turned on/off as a group using
17798 its module name */
17799 if ((monitor_info->monitor_type & MONITOR_GROUP_MODULE)
17800 && (!(monitor_info->monitor_type & MONITOR_MODULE))) {
17801 sql_print_warning(
17802 "Monitor counter '%s' cannot"
17803 " be turned on/off individually."
17804 " Please use its module name"
17805 " to turn on/off the counters"
17806 " in the module as a group.\n",
17807 name);
17808
17809 return(1);
17810 }
17811
17812 } else {
17813 ut_a(use == MONITOR_WILDCARD_MATCH);
17814
17815 /* For wildcard match, if there is not a single monitor
17816 counter name that matches, treat it as an invalid
17817 value for the system configuration variables */
17818 if (!innodb_monitor_validate_wildcard_name(name)) {
17819 return(1);
17820 }
17821 }
17822
17823 /* Save the configure name for innodb_monitor_update() */
17824 *static_cast<const char**>(save) = name;
17825
17826 return(0);
17827 }
17828 /*************************************************************//**
17829 Validate passed-in "value" is a valid monitor counter name.
17830 This function is registered as a callback with MySQL.
17831 @return 0 for valid name */
17832 static
17833 int
innodb_monitor_validate(THD *,st_mysql_sys_var *,void * save,struct st_mysql_value * value)17834 innodb_monitor_validate(
17835 /*====================*/
17836 THD*, st_mysql_sys_var*,
17837 void* save, /*!< out: immediate result
17838 for update function */
17839 struct st_mysql_value* value) /*!< in: incoming string */
17840 {
17841 const char* name;
17842 char* monitor_name;
17843 char buff[STRING_BUFFER_USUAL_SIZE];
17844 int len = sizeof(buff);
17845 int ret;
17846
17847 ut_a(save != NULL);
17848 ut_a(value != NULL);
17849
17850 name = value->val_str(value, buff, &len);
17851
17852 /* monitor_name could point to memory from MySQL
17853 or buff[]. Always dup the name to memory allocated
17854 by InnoDB, so we can access it in another callback
17855 function innodb_monitor_update() and free it appropriately */
17856 if (name) {
17857 monitor_name = my_strdup(//PSI_INSTRUMENT_ME,
17858 name, MYF(0));
17859 } else {
17860 return(1);
17861 }
17862
17863 ret = innodb_monitor_valid_byname(save, monitor_name);
17864
17865 if (ret) {
17866 /* Validation failed */
17867 my_free(monitor_name);
17868 } else {
17869 /* monitor_name will be freed in separate callback function
17870 innodb_monitor_update(). Assert "save" point to
17871 the "monitor_name" variable */
17872 ut_ad(*static_cast<char**>(save) == monitor_name);
17873 }
17874
17875 return(ret);
17876 }
17877
17878 /****************************************************************//**
17879 Update the system variable innodb_enable(disable/reset/reset_all)_monitor
17880 according to the "set_option" and turn on/off or reset specified monitor
17881 counter. */
17882 static
17883 void
innodb_monitor_update(THD * thd,void * var_ptr,const void * save,mon_option_t set_option,ibool free_mem)17884 innodb_monitor_update(
17885 /*==================*/
17886 THD* thd, /*!< in: thread handle */
17887 void* var_ptr, /*!< out: where the
17888 formal string goes */
17889 const void* save, /*!< in: immediate result
17890 from check function */
17891 mon_option_t set_option, /*!< in: the set option,
17892 whether to turn on/off or
17893 reset the counter */
17894 ibool free_mem) /*!< in: whether we will
17895 need to free the memory */
17896 {
17897 monitor_info_t* monitor_info;
17898 ulint monitor_id;
17899 ulint err_monitor = 0;
17900 const char* name;
17901
17902 ut_a(save != NULL);
17903
17904 name = *static_cast<const char*const*>(save);
17905
17906 if (!name) {
17907 monitor_id = MONITOR_DEFAULT_START;
17908 } else {
17909 monitor_id = innodb_monitor_id_by_name_get(name);
17910
17911 /* Double check we have a valid monitor ID */
17912 if (monitor_id == MONITOR_NO_MATCH) {
17913 return;
17914 }
17915 }
17916
17917 if (monitor_id == MONITOR_DEFAULT_START) {
17918 /* If user set the variable to "default", we will
17919 print a message and make this set operation a "noop".
17920 The check is being made here is because "set default"
17921 does not go through validation function */
17922 if (thd) {
17923 push_warning_printf(
17924 thd, Sql_condition::WARN_LEVEL_WARN,
17925 ER_NO_DEFAULT,
17926 "Default value is not defined for"
17927 " this set option. Please specify"
17928 " correct counter or module name.");
17929 } else {
17930 sql_print_error(
17931 "Default value is not defined for"
17932 " this set option. Please specify"
17933 " correct counter or module name.\n");
17934 }
17935
17936 if (var_ptr) {
17937 *(const char**) var_ptr = NULL;
17938 }
17939 } else if (monitor_id == MONITOR_WILDCARD_MATCH) {
17940 innodb_monitor_update_wildcard(name, set_option);
17941 } else {
17942 monitor_info = srv_mon_get_info(
17943 static_cast<monitor_id_t>(monitor_id));
17944
17945 ut_a(monitor_info);
17946
17947 /* If monitor is already truned on, someone could already
17948 collect monitor data, exit and ask user to turn off the
17949 monitor before turn it on again. */
17950 if (set_option == MONITOR_TURN_ON
17951 && MONITOR_IS_ON(monitor_id)) {
17952 err_monitor = monitor_id;
17953 goto exit;
17954 }
17955
17956 if (var_ptr) {
17957 *(const char**) var_ptr = monitor_info->monitor_name;
17958 }
17959
17960 /* Depending on the monitor name is for a module or
17961 a counter, process counters in the whole module or
17962 individual counter. */
17963 if (monitor_info->monitor_type & MONITOR_MODULE) {
17964 srv_mon_set_module_control(
17965 static_cast<monitor_id_t>(monitor_id),
17966 set_option);
17967 } else {
17968 innodb_monitor_set_option(monitor_info, set_option);
17969 }
17970 }
17971 exit:
17972 /* Only if we are trying to turn on a monitor that already
17973 been turned on, we will set err_monitor. Print related
17974 information */
17975 if (err_monitor) {
17976 sql_print_warning("InnoDB: Monitor %s is already enabled.",
17977 srv_mon_get_name((monitor_id_t) err_monitor));
17978 }
17979
17980 if (free_mem && name) {
17981 my_free((void*) name);
17982 }
17983
17984 return;
17985 }
17986
17987 /** Validate SET GLOBAL innodb_buffer_pool_filename.
17988 On Windows, file names with colon (:) are not allowed.
17989 @param thd connection
17990 @param save &srv_buf_dump_filename
17991 @param value new value to be validated
17992 @return 0 for valid name */
innodb_srv_buf_dump_filename_validate(THD * thd,st_mysql_sys_var *,void * save,st_mysql_value * value)17993 static int innodb_srv_buf_dump_filename_validate(THD *thd, st_mysql_sys_var*,
17994 void *save,
17995 st_mysql_value *value)
17996 {
17997 char buff[OS_FILE_MAX_PATH];
17998 int len= sizeof buff;
17999
18000 if (const char *buf_name= value->val_str(value, buff, &len))
18001 {
18002 #ifdef _WIN32
18003 if (!is_filename_allowed(buf_name, len, FALSE))
18004 {
18005 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18006 ER_WRONG_ARGUMENTS,
18007 "InnoDB: innodb_buffer_pool_filename "
18008 "cannot have colon (:) in the file name.");
18009 return 1;
18010 }
18011 #endif /* _WIN32 */
18012 if (buf_name == buff)
18013 {
18014 ut_ad(static_cast<size_t>(len) < sizeof buff);
18015 buf_name= thd_strmake(thd, buf_name, len);
18016 }
18017
18018 *static_cast<const char**>(save)= buf_name;
18019 return 0;
18020 }
18021
18022 return 1;
18023 }
18024
18025 #ifdef UNIV_DEBUG
18026 static char* srv_buffer_pool_evict;
18027
18028 /****************************************************************//**
18029 Evict all uncompressed pages of compressed tables from the buffer pool.
18030 Keep the compressed pages in the buffer pool.
18031 @return whether all uncompressed pages were evicted */
innodb_buffer_pool_evict_uncompressed()18032 static bool innodb_buffer_pool_evict_uncompressed()
18033 {
18034 bool all_evicted = true;
18035
18036 for (ulint i = 0; i < srv_buf_pool_instances; i++) {
18037 buf_pool_t* buf_pool = &buf_pool_ptr[i];
18038
18039 buf_pool_mutex_enter(buf_pool);
18040
18041 for (buf_block_t* block = UT_LIST_GET_LAST(
18042 buf_pool->unzip_LRU);
18043 block != NULL; ) {
18044 buf_block_t* prev_block = UT_LIST_GET_PREV(
18045 unzip_LRU, block);
18046 ut_ad(buf_block_get_state(block)
18047 == BUF_BLOCK_FILE_PAGE);
18048 ut_ad(block->in_unzip_LRU_list);
18049 ut_ad(block->page.in_LRU_list);
18050
18051 if (!buf_LRU_free_page(&block->page, false)) {
18052 all_evicted = false;
18053 block = prev_block;
18054 } else {
18055 /* Because buf_LRU_free_page() may release
18056 and reacquire buf_pool_t::mutex, prev_block
18057 may be invalid. */
18058 block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
18059 }
18060 }
18061
18062 buf_pool_mutex_exit(buf_pool);
18063 }
18064
18065 return(all_evicted);
18066 }
18067
18068 /****************************************************************//**
18069 Called on SET GLOBAL innodb_buffer_pool_evict=...
18070 Handles some values specially, to evict pages from the buffer pool.
18071 SET GLOBAL innodb_buffer_pool_evict='uncompressed'
18072 evicts all uncompressed page frames of compressed tablespaces. */
18073 static
18074 void
innodb_buffer_pool_evict_update(THD *,st_mysql_sys_var *,void *,const void * save)18075 innodb_buffer_pool_evict_update(THD*, st_mysql_sys_var*, void*,
18076 const void* save)
18077 {
18078 if (const char* op = *static_cast<const char*const*>(save)) {
18079 if (!strcmp(op, "uncompressed")) {
18080 mysql_mutex_unlock(&LOCK_global_system_variables);
18081 for (uint tries = 0; tries < 10000; tries++) {
18082 if (innodb_buffer_pool_evict_uncompressed()) {
18083 mysql_mutex_lock(
18084 &LOCK_global_system_variables);
18085 return;
18086 }
18087
18088 os_thread_sleep(10000);
18089 }
18090
18091 /* We failed to evict all uncompressed pages. */
18092 ut_ad(0);
18093 }
18094 }
18095 }
18096 #endif /* UNIV_DEBUG */
18097
18098 /****************************************************************//**
18099 Update the system variable innodb_monitor_enable and enable
18100 specified monitor counter.
18101 This function is registered as a callback with MySQL. */
18102 static
18103 void
innodb_enable_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18104 innodb_enable_monitor_update(
18105 /*=========================*/
18106 THD* thd, /*!< in: thread handle */
18107 st_mysql_sys_var*,
18108 void* var_ptr,/*!< out: where the
18109 formal string goes */
18110 const void* save) /*!< in: immediate result
18111 from check function */
18112 {
18113 innodb_monitor_update(thd, var_ptr, save, MONITOR_TURN_ON, TRUE);
18114 }
18115
18116 /****************************************************************//**
18117 Update the system variable innodb_monitor_disable and turn
18118 off specified monitor counter. */
18119 static
18120 void
innodb_disable_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18121 innodb_disable_monitor_update(
18122 /*==========================*/
18123 THD* thd, /*!< in: thread handle */
18124 st_mysql_sys_var*,
18125 void* var_ptr,/*!< out: where the
18126 formal string goes */
18127 const void* save) /*!< in: immediate result
18128 from check function */
18129 {
18130 innodb_monitor_update(thd, var_ptr, save, MONITOR_TURN_OFF, TRUE);
18131 }
18132
18133 /****************************************************************//**
18134 Update the system variable innodb_monitor_reset and reset
18135 specified monitor counter(s).
18136 This function is registered as a callback with MySQL. */
18137 static
18138 void
innodb_reset_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18139 innodb_reset_monitor_update(
18140 /*========================*/
18141 THD* thd, /*!< in: thread handle */
18142 st_mysql_sys_var*,
18143 void* var_ptr,/*!< out: where the
18144 formal string goes */
18145 const void* save) /*!< in: immediate result
18146 from check function */
18147 {
18148 innodb_monitor_update(thd, var_ptr, save, MONITOR_RESET_VALUE, TRUE);
18149 }
18150
18151 /****************************************************************//**
18152 Update the system variable innodb_monitor_reset_all and reset
18153 all value related monitor counter.
18154 This function is registered as a callback with MySQL. */
18155 static
18156 void
innodb_reset_all_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18157 innodb_reset_all_monitor_update(
18158 /*============================*/
18159 THD* thd, /*!< in: thread handle */
18160 st_mysql_sys_var*,
18161 void* var_ptr,/*!< out: where the
18162 formal string goes */
18163 const void* save) /*!< in: immediate result
18164 from check function */
18165 {
18166 innodb_monitor_update(thd, var_ptr, save, MONITOR_RESET_ALL_VALUE,
18167 TRUE);
18168 }
18169
18170 static
18171 void
innodb_defragment_frequency_update(THD *,st_mysql_sys_var *,void *,const void * save)18172 innodb_defragment_frequency_update(THD*, st_mysql_sys_var*, void*,
18173 const void* save)
18174 {
18175 srv_defragment_frequency = (*static_cast<const uint*>(save));
18176 srv_defragment_interval = 1000000000ULL / srv_defragment_frequency;
18177 }
18178
my_strtok_r(char * str,const char * delim,char ** saveptr)18179 static inline char *my_strtok_r(char *str, const char *delim, char **saveptr)
18180 {
18181 #if defined _WIN32
18182 return strtok_s(str, delim, saveptr);
18183 #else
18184 return strtok_r(str, delim, saveptr);
18185 #endif
18186 }
18187
18188 /****************************************************************//**
18189 Parse and enable InnoDB monitor counters during server startup.
18190 User can list the monitor counters/groups to be enable by specifying
18191 "loose-innodb_monitor_enable=monitor_name1;monitor_name2..."
18192 in server configuration file or at the command line. The string
18193 separate could be ";", "," or empty space. */
18194 static
18195 void
innodb_enable_monitor_at_startup(char * str)18196 innodb_enable_monitor_at_startup(
18197 /*=============================*/
18198 char* str) /*!< in/out: monitor counter enable list */
18199 {
18200 static const char* sep = " ;,";
18201 char* last;
18202
18203 ut_a(str);
18204
18205 /* Walk through the string, and separate each monitor counter
18206 and/or counter group name, and calling innodb_monitor_update()
18207 if successfully updated. Please note that the "str" would be
18208 changed by strtok_r() as it walks through it. */
18209 for (char* option = my_strtok_r(str, sep, &last);
18210 option;
18211 option = my_strtok_r(NULL, sep, &last)) {
18212 char* option_name;
18213 if (!innodb_monitor_valid_byname(&option_name, option)) {
18214 innodb_monitor_update(NULL, NULL, &option,
18215 MONITOR_TURN_ON, FALSE);
18216 } else {
18217 sql_print_warning("Invalid monitor counter"
18218 " name: '%s'", option);
18219 }
18220 }
18221 }
18222
18223 /****************************************************************//**
18224 Callback function for accessing the InnoDB variables from MySQL:
18225 SHOW VARIABLES. */
show_innodb_vars(THD *,SHOW_VAR * var,char *)18226 static int show_innodb_vars(THD*, SHOW_VAR* var, char*)
18227 {
18228 innodb_export_status();
18229 var->type = SHOW_ARRAY;
18230 var->value = (char*) &innodb_status_variables;
18231 //var->scope = SHOW_SCOPE_GLOBAL;
18232
18233 return(0);
18234 }
18235
18236 /****************************************************************//**
18237 This function checks each index name for a table against reserved
18238 system default primary index name 'GEN_CLUST_INDEX'. If a name
18239 matches, this function pushes an warning message to the client,
18240 and returns true.
18241 @return true if the index name matches the reserved name */
18242 bool
innobase_index_name_is_reserved(THD * thd,const KEY * key_info,ulint num_of_keys)18243 innobase_index_name_is_reserved(
18244 /*============================*/
18245 THD* thd, /*!< in/out: MySQL connection */
18246 const KEY* key_info, /*!< in: Indexes to be created */
18247 ulint num_of_keys) /*!< in: Number of indexes to
18248 be created. */
18249 {
18250 const KEY* key;
18251 uint key_num; /* index number */
18252
18253 for (key_num = 0; key_num < num_of_keys; key_num++) {
18254 key = &key_info[key_num];
18255
18256 if (innobase_strcasecmp(key->name.str,
18257 innobase_index_reserve_name) == 0) {
18258 /* Push warning to mysql */
18259 push_warning_printf(thd,
18260 Sql_condition::WARN_LEVEL_WARN,
18261 ER_WRONG_NAME_FOR_INDEX,
18262 "Cannot Create Index with name"
18263 " '%s'. The name is reserved"
18264 " for the system default primary"
18265 " index.",
18266 innobase_index_reserve_name);
18267
18268 my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
18269 innobase_index_reserve_name);
18270
18271 return(true);
18272 }
18273 }
18274
18275 return(false);
18276 }
18277
18278 /** Retrieve the FTS Relevance Ranking result for doc with doc_id
18279 of m_prebuilt->fts_doc_id
18280 @param[in,out] fts_hdl FTS handler
18281 @return the relevance ranking value */
18282 static
18283 float
innobase_fts_retrieve_ranking(FT_INFO * fts_hdl)18284 innobase_fts_retrieve_ranking(
18285 FT_INFO* fts_hdl)
18286 {
18287 fts_result_t* result;
18288 row_prebuilt_t* ft_prebuilt;
18289
18290 result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
18291
18292 ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_prebuilt;
18293
18294 fts_ranking_t* ranking = rbt_value(fts_ranking_t, result->current);
18295 ft_prebuilt->fts_doc_id= ranking->doc_id;
18296
18297 return(ranking->rank);
18298 }
18299
18300 /** Free the memory for the FTS handler
18301 @param[in,out] fts_hdl FTS handler */
18302 static
18303 void
innobase_fts_close_ranking(FT_INFO * fts_hdl)18304 innobase_fts_close_ranking(
18305 FT_INFO* fts_hdl)
18306 {
18307 fts_result_t* result;
18308
18309 result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
18310
18311 fts_query_free_result(result);
18312
18313 my_free((uchar*) fts_hdl);
18314 }
18315
18316 /** Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
18317 of m_prebuilt->fts_doc_id
18318 @param[in,out] fts_hdl FTS handler
18319 @return the relevance ranking value */
18320 static
18321 float
innobase_fts_find_ranking(FT_INFO * fts_hdl,uchar *,uint)18322 innobase_fts_find_ranking(FT_INFO* fts_hdl, uchar*, uint)
18323 {
18324 fts_result_t* result;
18325 row_prebuilt_t* ft_prebuilt;
18326
18327 ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_prebuilt;
18328 result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
18329
18330 /* Retrieve the ranking value for doc_id with value of
18331 m_prebuilt->fts_doc_id */
18332 return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id));
18333 }
18334
18335 #ifdef UNIV_DEBUG
18336 static my_bool innodb_background_drop_list_empty = TRUE;
18337 static my_bool innodb_log_checkpoint_now = TRUE;
18338 static my_bool innodb_buf_flush_list_now = TRUE;
18339 static uint innodb_merge_threshold_set_all_debug
18340 = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
18341
18342 /** Wait for the background drop list to become empty. */
18343 static
18344 void
wait_background_drop_list_empty(THD *,st_mysql_sys_var *,void *,const void *)18345 wait_background_drop_list_empty(THD*, st_mysql_sys_var*, void*, const void*)
18346 {
18347 row_wait_for_background_drop_list_empty();
18348 }
18349
18350 /****************************************************************//**
18351 Force innodb to checkpoint. */
18352 static
18353 void
checkpoint_now_set(THD *,st_mysql_sys_var *,void *,const void * save)18354 checkpoint_now_set(THD*, st_mysql_sys_var*, void*, const void* save)
18355 {
18356 if (*(my_bool*) save) {
18357 mysql_mutex_unlock(&LOCK_global_system_variables);
18358
18359 while (log_sys.last_checkpoint_lsn
18360 + SIZE_OF_MLOG_CHECKPOINT
18361 + (log_sys.append_on_checkpoint != NULL
18362 ? log_sys.append_on_checkpoint->size() : 0)
18363 < log_sys.lsn) {
18364 log_make_checkpoint();
18365 fil_flush_file_spaces(FIL_TYPE_LOG);
18366 }
18367
18368 dberr_t err = fil_write_flushed_lsn(log_sys.lsn);
18369
18370 if (err != DB_SUCCESS) {
18371 ib::warn() << "Checkpoint set failed " << err;
18372 }
18373
18374 mysql_mutex_lock(&LOCK_global_system_variables);
18375 }
18376 }
18377
18378 /****************************************************************//**
18379 Force a dirty pages flush now. */
18380 static
18381 void
buf_flush_list_now_set(THD *,st_mysql_sys_var *,void *,const void * save)18382 buf_flush_list_now_set(THD*, st_mysql_sys_var*, void*, const void* save)
18383 {
18384 if (*(my_bool*) save) {
18385 mysql_mutex_unlock(&LOCK_global_system_variables);
18386 buf_flush_sync_all_buf_pools();
18387 mysql_mutex_lock(&LOCK_global_system_variables);
18388 }
18389 }
18390
18391 /** Override current MERGE_THRESHOLD setting for all indexes at dictionary
18392 now.
18393 @param[in] save immediate result from check function */
18394 static
18395 void
innodb_merge_threshold_set_all_debug_update(THD *,st_mysql_sys_var *,void *,const void * save)18396 innodb_merge_threshold_set_all_debug_update(THD*, st_mysql_sys_var*, void*,
18397 const void* save)
18398 {
18399 innodb_merge_threshold_set_all_debug
18400 = (*static_cast<const uint*>(save));
18401 dict_set_merge_threshold_all_debug(
18402 innodb_merge_threshold_set_all_debug);
18403 }
18404 #endif /* UNIV_DEBUG */
18405
18406 /** Find and Retrieve the FTS doc_id for the current result row
18407 @param[in,out] fts_hdl FTS handler
18408 @return the document ID */
18409 static
18410 ulonglong
innobase_fts_retrieve_docid(FT_INFO_EXT * fts_hdl)18411 innobase_fts_retrieve_docid(
18412 FT_INFO_EXT* fts_hdl)
18413 {
18414 fts_result_t* result;
18415 row_prebuilt_t* ft_prebuilt;
18416
18417 ft_prebuilt = reinterpret_cast<NEW_FT_INFO *>(fts_hdl)->ft_prebuilt;
18418 result = reinterpret_cast<NEW_FT_INFO *>(fts_hdl)->ft_result;
18419
18420 if (ft_prebuilt->read_just_key) {
18421
18422 fts_ranking_t* ranking =
18423 rbt_value(fts_ranking_t, result->current);
18424
18425 return(ranking->doc_id);
18426 }
18427
18428 return(ft_prebuilt->fts_doc_id);
18429 }
18430
18431 /* These variables are never read by InnoDB or changed. They are a kind of
18432 dummies that are needed by the MySQL infrastructure to call
18433 buffer_pool_dump_now(), buffer_pool_load_now() and buffer_pool_load_abort()
18434 by the user by doing:
18435 SET GLOBAL innodb_buffer_pool_dump_now=ON;
18436 SET GLOBAL innodb_buffer_pool_load_now=ON;
18437 SET GLOBAL innodb_buffer_pool_load_abort=ON;
18438 Their values are read by MySQL and displayed to the user when the variables
18439 are queried, e.g.:
18440 SELECT @@innodb_buffer_pool_dump_now;
18441 SELECT @@innodb_buffer_pool_load_now;
18442 SELECT @@innodb_buffer_pool_load_abort; */
18443 static my_bool innodb_buffer_pool_dump_now = FALSE;
18444 static my_bool innodb_buffer_pool_load_now = FALSE;
18445 static my_bool innodb_buffer_pool_load_abort = FALSE;
18446
18447 /****************************************************************//**
18448 Trigger a dump of the buffer pool if innodb_buffer_pool_dump_now is set
18449 to ON. This function is registered as a callback with MySQL. */
18450 static
18451 void
buffer_pool_dump_now(THD * thd MY_ATTRIBUTE ((unused)),struct st_mysql_sys_var * var MY_ATTRIBUTE ((unused)),void * var_ptr MY_ATTRIBUTE ((unused)),const void * save)18452 buffer_pool_dump_now(
18453 /*=================*/
18454 THD* thd /*!< in: thread handle */
18455 MY_ATTRIBUTE((unused)),
18456 struct st_mysql_sys_var* var /*!< in: pointer to system
18457 variable */
18458 MY_ATTRIBUTE((unused)),
18459 void* var_ptr /*!< out: where the formal
18460 string goes */
18461 MY_ATTRIBUTE((unused)),
18462 const void* save) /*!< in: immediate result from
18463 check function */
18464 {
18465 if (*(my_bool*) save && !srv_read_only_mode) {
18466 mysql_mutex_unlock(&LOCK_global_system_variables);
18467 buf_dump_start();
18468 mysql_mutex_lock(&LOCK_global_system_variables);
18469 }
18470 }
18471
18472 /****************************************************************//**
18473 Trigger a load of the buffer pool if innodb_buffer_pool_load_now is set
18474 to ON. This function is registered as a callback with MySQL. */
18475 static
18476 void
buffer_pool_load_now(THD * thd MY_ATTRIBUTE ((unused)),struct st_mysql_sys_var * var MY_ATTRIBUTE ((unused)),void * var_ptr MY_ATTRIBUTE ((unused)),const void * save)18477 buffer_pool_load_now(
18478 /*=================*/
18479 THD* thd /*!< in: thread handle */
18480 MY_ATTRIBUTE((unused)),
18481 struct st_mysql_sys_var* var /*!< in: pointer to system
18482 variable */
18483 MY_ATTRIBUTE((unused)),
18484 void* var_ptr /*!< out: where the formal
18485 string goes */
18486 MY_ATTRIBUTE((unused)),
18487 const void* save) /*!< in: immediate result from
18488 check function */
18489 {
18490 if (*(my_bool*) save && !srv_read_only_mode) {
18491 mysql_mutex_unlock(&LOCK_global_system_variables);
18492 buf_load_start();
18493 mysql_mutex_lock(&LOCK_global_system_variables);
18494 }
18495 }
18496
18497 /****************************************************************//**
18498 Abort a load of the buffer pool if innodb_buffer_pool_load_abort
18499 is set to ON. This function is registered as a callback with MySQL. */
18500 static
18501 void
buffer_pool_load_abort(THD * thd MY_ATTRIBUTE ((unused)),struct st_mysql_sys_var * var MY_ATTRIBUTE ((unused)),void * var_ptr MY_ATTRIBUTE ((unused)),const void * save)18502 buffer_pool_load_abort(
18503 /*===================*/
18504 THD* thd /*!< in: thread handle */
18505 MY_ATTRIBUTE((unused)),
18506 struct st_mysql_sys_var* var /*!< in: pointer to system
18507 variable */
18508 MY_ATTRIBUTE((unused)),
18509 void* var_ptr /*!< out: where the formal
18510 string goes */
18511 MY_ATTRIBUTE((unused)),
18512 const void* save) /*!< in: immediate result from
18513 check function */
18514 {
18515 if (*(my_bool*) save && !srv_read_only_mode) {
18516 mysql_mutex_unlock(&LOCK_global_system_variables);
18517 buf_load_abort();
18518 mysql_mutex_lock(&LOCK_global_system_variables);
18519 }
18520 }
18521
18522 /****************************************************************//**
18523 Update the system variable innodb_log_write_ahead_size using the "saved"
18524 value. This function is registered as a callback with MySQL. */
18525 static
18526 void
innodb_log_write_ahead_size_update(THD * thd,st_mysql_sys_var *,void *,const void * save)18527 innodb_log_write_ahead_size_update(
18528 /*===============================*/
18529 THD* thd, /*!< in: thread handle */
18530 st_mysql_sys_var*, void*,
18531 const void* save) /*!< in: immediate result
18532 from check function */
18533 {
18534 ulong val = OS_FILE_LOG_BLOCK_SIZE;
18535 ulong in_val = *static_cast<const ulong*>(save);
18536
18537 while (val < in_val) {
18538 val = val * 2;
18539 }
18540
18541 if (val > srv_page_size) {
18542 val = srv_page_size;
18543 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18544 ER_WRONG_ARGUMENTS,
18545 "innodb_log_write_ahead_size cannot"
18546 " be set higher than innodb_page_size.");
18547 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18548 ER_WRONG_ARGUMENTS,
18549 "Setting innodb_log_write_ahead_size"
18550 " to %lu",
18551 srv_page_size);
18552 } else if (val != in_val) {
18553 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18554 ER_WRONG_ARGUMENTS,
18555 "innodb_log_write_ahead_size should be"
18556 " set 2^n value and larger than 512.");
18557 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18558 ER_WRONG_ARGUMENTS,
18559 "Setting innodb_log_write_ahead_size"
18560 " to %lu",
18561 val);
18562 }
18563
18564 srv_log_write_ahead_size = val;
18565 }
18566
18567 /** Update innodb_status_output or innodb_status_output_locks,
18568 which control InnoDB "status monitor" output to the error log.
18569 @param[out] var current value
18570 @param[in] save to-be-assigned value */
18571 static
18572 void
innodb_status_output_update(THD *,st_mysql_sys_var *,void * var,const void * save)18573 innodb_status_output_update(THD*,st_mysql_sys_var*,void*var,const void*save)
18574 {
18575 *static_cast<my_bool*>(var)= *static_cast<const my_bool*>(save);
18576 if (srv_monitor_event)
18577 {
18578 mysql_mutex_unlock(&LOCK_global_system_variables);
18579 /* Wakeup server monitor thread. */
18580 os_event_set(srv_monitor_event);
18581 mysql_mutex_lock(&LOCK_global_system_variables);
18582 }
18583 }
18584
18585 /** Update the system variable innodb_encryption_threads.
18586 @param[in] save to-be-assigned value */
18587 static
18588 void
innodb_encryption_threads_update(THD *,st_mysql_sys_var *,void *,const void * save)18589 innodb_encryption_threads_update(THD*,st_mysql_sys_var*,void*,const void*save)
18590 {
18591 mysql_mutex_unlock(&LOCK_global_system_variables);
18592 fil_crypt_set_thread_cnt(*static_cast<const uint*>(save));
18593 mysql_mutex_lock(&LOCK_global_system_variables);
18594 }
18595
18596 /** Update the system variable innodb_encryption_rotate_key_age.
18597 @param[in] save to-be-assigned value */
18598 static
18599 void
innodb_encryption_rotate_key_age_update(THD *,st_mysql_sys_var *,void *,const void * save)18600 innodb_encryption_rotate_key_age_update(THD*, st_mysql_sys_var*, void*,
18601 const void* save)
18602 {
18603 mysql_mutex_unlock(&LOCK_global_system_variables);
18604 fil_crypt_set_rotate_key_age(*static_cast<const uint*>(save));
18605 mysql_mutex_lock(&LOCK_global_system_variables);
18606 }
18607
18608 /** Update the system variable innodb_encryption_rotation_iops.
18609 @param[in] save to-be-assigned value */
18610 static
18611 void
innodb_encryption_rotation_iops_update(THD *,st_mysql_sys_var *,void *,const void * save)18612 innodb_encryption_rotation_iops_update(THD*, st_mysql_sys_var*, void*,
18613 const void* save)
18614 {
18615 mysql_mutex_unlock(&LOCK_global_system_variables);
18616 fil_crypt_set_rotation_iops(*static_cast<const uint*>(save));
18617 mysql_mutex_lock(&LOCK_global_system_variables);
18618 }
18619
18620 /** Update the system variable innodb_encrypt_tables.
18621 @param[in] save to-be-assigned value */
18622 static
18623 void
innodb_encrypt_tables_update(THD *,st_mysql_sys_var *,void *,const void * save)18624 innodb_encrypt_tables_update(THD*, st_mysql_sys_var*, void*, const void* save)
18625 {
18626 mysql_mutex_unlock(&LOCK_global_system_variables);
18627 fil_crypt_set_encrypt_tables(*static_cast<const ulong*>(save));
18628 mysql_mutex_lock(&LOCK_global_system_variables);
18629 }
18630
18631 /** Update the innodb_log_checksums parameter.
18632 @param[in,out] thd client connection
18633 @param[out] var_ptr current value
18634 @param[in] save immediate result from check function */
18635 static
18636 void
innodb_log_checksums_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18637 innodb_log_checksums_update(THD* thd, st_mysql_sys_var*, void* var_ptr,
18638 const void* save)
18639 {
18640 *static_cast<my_bool*>(var_ptr) = innodb_log_checksums_func_update(
18641 thd, *static_cast<const my_bool*>(save));
18642 }
18643
18644 #ifdef UNIV_DEBUG
18645 static
18646 void
innobase_debug_sync_callback(srv_slot_t * slot,const void * value)18647 innobase_debug_sync_callback(srv_slot_t *slot, const void *value)
18648 {
18649 const char *value_str = *static_cast<const char* const*>(value);
18650 size_t len = strlen(value_str) + 1;
18651
18652
18653 // One allocation for list node object and value.
18654 void *buf = ut_malloc_nokey(sizeof(srv_slot_t::debug_sync_t) + len-1);
18655 srv_slot_t::debug_sync_t *sync = new(buf) srv_slot_t::debug_sync_t();
18656 strcpy(sync->str, value_str);
18657
18658 rw_lock_x_lock(&slot->debug_sync_lock);
18659 UT_LIST_ADD_LAST(slot->debug_sync, sync);
18660 rw_lock_x_unlock(&slot->debug_sync_lock);
18661 }
18662 static
18663 void
innobase_debug_sync_set(THD * thd,st_mysql_sys_var *,void *,const void * value)18664 innobase_debug_sync_set(THD *thd, st_mysql_sys_var*, void *, const void *value)
18665 {
18666 srv_for_each_thread(SRV_WORKER, innobase_debug_sync_callback, value);
18667 srv_for_each_thread(SRV_PURGE, innobase_debug_sync_callback, value);
18668 }
18669 #endif
18670
18671 static SHOW_VAR innodb_status_variables_export[]= {
18672 {"Innodb", (char*) &show_innodb_vars, SHOW_FUNC},
18673 {NullS, NullS, SHOW_LONG}
18674 };
18675
18676 static struct st_mysql_storage_engine innobase_storage_engine=
18677 { MYSQL_HANDLERTON_INTERFACE_VERSION };
18678
18679 #ifdef WITH_WSREP
18680
18681 static
18682 void
wsrep_kill_victim(MYSQL_THD const bf_thd,MYSQL_THD thd,trx_t * victim_trx,my_bool signal)18683 wsrep_kill_victim(
18684 MYSQL_THD const bf_thd,
18685 MYSQL_THD thd,
18686 trx_t* victim_trx,
18687 my_bool signal)
18688 {
18689 DBUG_ENTER("wsrep_kill_victim");
18690
18691 /* Mark transaction as a victim for Galera abort */
18692 victim_trx->lock.was_chosen_as_wsrep_victim= true;
18693 if (wsrep_thd_set_wsrep_aborter(bf_thd, thd))
18694 {
18695 WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set");
18696 wsrep_thd_UNLOCK(thd);
18697 DBUG_VOID_RETURN;
18698 }
18699
18700 if (wsrep_thd_bf_abort(bf_thd, thd, signal))
18701 {
18702 lock_t* wait_lock= victim_trx->lock.wait_lock;
18703 if (wait_lock)
18704 {
18705 DBUG_ASSERT(victim_trx->is_wsrep());
18706 WSREP_DEBUG("victim has wait flag: %lu", thd_get_thread_id(thd));
18707 victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
18708 lock_cancel_waiting_and_release(wait_lock);
18709 }
18710 }
18711
18712 DBUG_VOID_RETURN;
18713 }
18714
18715 /** This function is used to kill one transaction.
18716
18717 This transaction was open on this node (not-yet-committed), and a
18718 conflicting writeset from some other node that was being applied
18719 caused a locking conflict. First committed (from other node)
18720 wins, thus open transaction is rolled back. BF stands for
18721 brute-force: any transaction can get aborted by galera any time
18722 it is necessary.
18723
18724 This conflict can happen only when the replicated writeset (from
18725 other node) is being applied, not when it’s waiting in the queue.
18726 If our local transaction reached its COMMIT and this conflicting
18727 writeset was in the queue, then it should fail the local
18728 certification test instead.
18729
18730 A brute force abort is only triggered by a locking conflict
18731 between a writeset being applied by an applier thread (slave thread)
18732 and an open transaction on the node, not by a Galera writeset
18733 comparison as in the local certification failure.
18734
18735 @param[in] bf_thd Brute force (BF) thread
18736 @param[in,out] victim_trx Vimtim trx to be killed
18737 @param[in] signal Should victim be signaled */
18738 void
wsrep_innobase_kill_one_trx(MYSQL_THD const bf_thd,trx_t * victim_trx,my_bool signal)18739 wsrep_innobase_kill_one_trx(
18740 MYSQL_THD const bf_thd,
18741 trx_t *victim_trx,
18742 my_bool signal)
18743 {
18744 ut_ad(bf_thd);
18745 ut_ad(victim_trx);
18746 ut_ad(lock_mutex_own());
18747 ut_ad(trx_mutex_own(victim_trx));
18748
18749 DBUG_ENTER("wsrep_innobase_kill_one_trx");
18750 THD *thd= (THD *) victim_trx->mysql_thd;
18751 /* Note that bf_trx might not exist here e.g. on MDL conflict
18752 case (test: galera_concurrent_ctas).*/
18753 trx_t* bf_trx= (trx_t*)thd_to_trx(bf_thd);
18754
18755 if (!thd)
18756 {
18757 WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id);
18758 DBUG_VOID_RETURN;
18759 }
18760
18761 /* Here we need to lock THD::LOCK_thd_data to protect from
18762 concurrent usage or disconnect or delete. */
18763 DEBUG_SYNC(bf_thd, "wsrep_before_BF_victim_lock");
18764 wsrep_thd_LOCK(thd);
18765 DEBUG_SYNC(bf_thd, "wsrep_after_BF_victim_lock");
18766
18767 WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
18768
18769 WSREP_DEBUG("wsrep_innobase_kill_one_trx: Aborter %s "
18770 "trx_id: " TRX_ID_FMT " thread: %ld "
18771 "seqno: %lld client_state: %s client_mode: %s "
18772 "trx_state %s query: %s",
18773 wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
18774 bf_trx ? bf_trx->id : TRX_ID_MAX,
18775 thd_get_thread_id(bf_thd),
18776 wsrep_thd_trx_seqno(bf_thd),
18777 wsrep_thd_client_state_str(bf_thd),
18778 wsrep_thd_client_mode_str(bf_thd),
18779 wsrep_thd_transaction_state_str(bf_thd),
18780 wsrep_thd_query(bf_thd));
18781
18782 WSREP_DEBUG("wsrep_innobase_kill_one_trx: Victim %s "
18783 "trx_id: " TRX_ID_FMT " thread: %ld "
18784 "seqno: %lld client_state: %s client_mode: %s "
18785 "trx_state %s query: %s",
18786 wsrep_thd_is_BF(thd, false) ? "BF" : "normal",
18787 victim_trx->id,
18788 thd_get_thread_id(thd),
18789 wsrep_thd_trx_seqno(thd),
18790 wsrep_thd_client_state_str(thd),
18791 wsrep_thd_client_mode_str(thd),
18792 wsrep_thd_transaction_state_str(thd),
18793 wsrep_thd_query(thd));
18794
18795 wsrep_kill_victim(bf_thd, thd, victim_trx, signal);
18796 DBUG_VOID_RETURN;
18797 }
18798
18799 /** This function forces the victim transaction to abort. Aborting the
18800 transaction does NOT end it, it still has to be rolled back.
18801
18802 @param bf_thd brute force THD asking for the abort
18803 @param victim_thd victim THD to be aborted
18804
18805 @return 0 victim was aborted
18806 @return -1 victim thread was aborted (no transaction)
18807 */
18808 static
18809 void
wsrep_abort_transaction(handlerton *,THD * bf_thd,THD * victim_thd,my_bool signal)18810 wsrep_abort_transaction(
18811 handlerton*,
18812 THD *bf_thd,
18813 THD *victim_thd,
18814 my_bool signal)
18815 {
18816 /* Note that victim thd is protected with
18817 THD::LOCK_thd_data and THD::LOCK_thd_kill here. */
18818 trx_t* victim_trx= thd_to_trx(victim_thd);
18819 trx_t* bf_trx= thd_to_trx(bf_thd);
18820 WSREP_DEBUG("wsrep_abort_transaction: BF:"
18821 " thread %ld client_state %s client_mode %s"
18822 " trans_state %s query %s trx " TRX_ID_FMT,
18823 thd_get_thread_id(bf_thd),
18824 wsrep_thd_client_state_str(bf_thd),
18825 wsrep_thd_client_mode_str(bf_thd),
18826 wsrep_thd_transaction_state_str(bf_thd),
18827 wsrep_thd_query(bf_thd),
18828 bf_trx ? bf_trx->id : 0);
18829
18830 WSREP_DEBUG("wsrep_abort_transaction: victim:"
18831 " thread %ld client_state %s client_mode %s"
18832 " trans_state %s query %s trx " TRX_ID_FMT,
18833 thd_get_thread_id(victim_thd),
18834 wsrep_thd_client_state_str(victim_thd),
18835 wsrep_thd_client_mode_str(victim_thd),
18836 wsrep_thd_transaction_state_str(victim_thd),
18837 wsrep_thd_query(victim_thd),
18838 victim_trx ? victim_trx->id : 0);
18839
18840 if (victim_trx)
18841 {
18842 lock_mutex_enter();
18843 trx_mutex_enter(victim_trx);
18844 wsrep_kill_victim(bf_thd, victim_thd, victim_trx, signal);
18845 lock_mutex_exit();
18846 trx_mutex_exit(victim_trx);
18847 wsrep_srv_conc_cancel_wait(victim_trx);
18848 }
18849 else
18850 {
18851 wsrep_thd_bf_abort(bf_thd, victim_thd, signal);
18852 }
18853 }
18854
18855 static
18856 int
innobase_wsrep_set_checkpoint(handlerton * hton,const XID * xid)18857 innobase_wsrep_set_checkpoint(
18858 /*==========================*/
18859 handlerton* hton,
18860 const XID* xid)
18861 {
18862 DBUG_ASSERT(hton == innodb_hton_ptr);
18863
18864 if (wsrep_is_wsrep_xid(xid)) {
18865
18866 trx_rseg_update_wsrep_checkpoint(xid);
18867 innobase_flush_logs(hton, false);
18868 return 0;
18869 } else {
18870 return 1;
18871 }
18872 }
18873
18874 static
18875 int
innobase_wsrep_get_checkpoint(handlerton * hton,XID * xid)18876 innobase_wsrep_get_checkpoint(
18877 /*==========================*/
18878 handlerton* hton,
18879 XID* xid)
18880 {
18881 DBUG_ASSERT(hton == innodb_hton_ptr);
18882 trx_rseg_read_wsrep_checkpoint(*xid);
18883 return 0;
18884 }
18885 #endif /* WITH_WSREP */
18886
innodb_idle_flush_pct_update(THD * thd,st_mysql_sys_var * var,void *,const void * save)18887 static void innodb_idle_flush_pct_update(THD *thd, st_mysql_sys_var *var,
18888 void*, const void *save)
18889 {
18890 innodb_idle_flush_pct = *static_cast<const ulong*>(save);
18891 push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
18892 HA_ERR_WRONG_COMMAND, deprecated_idle_flush_pct);
18893 }
18894
18895 /* plugin options */
18896
18897 static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm,
18898 PLUGIN_VAR_RQCMDARG,
18899 "The algorithm InnoDB uses for page checksumming. Possible values are"
18900 " FULL_CRC32"
18901 " for new files, always use CRC-32C; for old, see CRC32 below;"
18902 " STRICT_FULL_CRC32"
18903 " for new files, always use CRC-32C; for old, see STRICT_CRC32 below;"
18904 " CRC32"
18905 " write crc32, allow any of the other checksums to match when reading;"
18906 " STRICT_CRC32"
18907 " write crc32, do not allow other algorithms to match when reading;"
18908 " INNODB"
18909 " write a software calculated checksum, allow any other checksums"
18910 " to match when reading;"
18911 " STRICT_INNODB"
18912 " write a software calculated checksum, do not allow other algorithms"
18913 " to match when reading;"
18914 " NONE"
18915 " write a constant magic number, do not do any checksum verification"
18916 " when reading (same as innodb_checksums=OFF);"
18917 " STRICT_NONE"
18918 " write a constant magic number, do not allow values other than that"
18919 " magic number when reading;"
18920 " Files updated when this option is set to crc32 or strict_crc32 will"
18921 " not be readable by MariaDB versions older than 10.0.4;"
18922 " new files created with full_crc32 are readable by MariaDB 10.4.3+",
18923 NULL, innodb_checksum_algorithm_update, SRV_CHECKSUM_ALGORITHM_CRC32,
18924 &innodb_checksum_algorithm_typelib);
18925
18926 static MYSQL_SYSVAR_BOOL(log_checksums, innodb_log_checksums,
18927 PLUGIN_VAR_RQCMDARG,
18928 "DEPRECATED. Whether to require checksums for InnoDB redo log blocks.",
18929 NULL, innodb_log_checksums_update, TRUE);
18930
18931 static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums,
18932 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
18933 "DEPRECATED. Use innodb_checksum_algorithm=NONE instead of setting"
18934 " this to OFF."
18935 " Enable InnoDB checksums validation (enabled by default)."
18936 " Disable with --skip-innodb-checksums.",
18937 NULL, NULL, TRUE);
18938
18939 static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
18940 PLUGIN_VAR_READONLY,
18941 "The common part for InnoDB table spaces.",
18942 NULL, NULL, NULL);
18943
18944 static MYSQL_SYSVAR_BOOL(doublewrite, srv_use_doublewrite_buf,
18945 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
18946 "Enable InnoDB doublewrite buffer (enabled by default)."
18947 " Disable with --skip-innodb-doublewrite.",
18948 NULL, NULL, TRUE);
18949
18950 static MYSQL_SYSVAR_BOOL(use_atomic_writes, srv_use_atomic_writes,
18951 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
18952 "Enable atomic writes, instead of using the doublewrite buffer, for files "
18953 "on devices that supports atomic writes. "
18954 "This option only works on Linux with either FusionIO cards using "
18955 "the directFS filesystem or with Shannon cards using any file system.",
18956 NULL, NULL, TRUE);
18957
18958 static MYSQL_SYSVAR_BOOL(stats_include_delete_marked,
18959 srv_stats_include_delete_marked,
18960 PLUGIN_VAR_OPCMDARG,
18961 "Include delete marked records when calculating persistent statistics",
18962 NULL, NULL, FALSE);
18963
18964 static MYSQL_SYSVAR_ENUM(instant_alter_column_allowed,
18965 innodb_instant_alter_column_allowed,
18966 PLUGIN_VAR_RQCMDARG,
18967 "File format constraint for ALTER TABLE", NULL, NULL, 2/*add_drop_reorder*/,
18968 &innodb_instant_alter_column_allowed_typelib);
18969
18970 static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
18971 PLUGIN_VAR_RQCMDARG,
18972 "Number of IOPs the server can do. Tunes the background IO rate",
18973 NULL, innodb_io_capacity_update, 200, 100, ~0UL, 0);
18974
18975 static MYSQL_SYSVAR_ULONG(io_capacity_max, srv_max_io_capacity,
18976 PLUGIN_VAR_RQCMDARG,
18977 "Limit to which innodb_io_capacity can be inflated.",
18978 NULL, innodb_io_capacity_max_update,
18979 SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT, 100,
18980 SRV_MAX_IO_CAPACITY_LIMIT, 0);
18981
18982 static MYSQL_SYSVAR_ULONG(idle_flush_pct, innodb_idle_flush_pct,
18983 PLUGIN_VAR_RQCMDARG,
18984 "DEPRECATED. This setting has no effect.",
18985 NULL, innodb_idle_flush_pct_update, 100, 0, 100, 0);
18986
18987 #ifdef UNIV_DEBUG
18988 static MYSQL_SYSVAR_BOOL(background_drop_list_empty,
18989 innodb_background_drop_list_empty,
18990 PLUGIN_VAR_OPCMDARG,
18991 "Wait for the background drop list to become empty",
18992 NULL, wait_background_drop_list_empty, FALSE);
18993
18994 static MYSQL_SYSVAR_BOOL(log_checkpoint_now, innodb_log_checkpoint_now,
18995 PLUGIN_VAR_OPCMDARG,
18996 "Force checkpoint now",
18997 NULL, checkpoint_now_set, FALSE);
18998
18999 static MYSQL_SYSVAR_BOOL(buf_flush_list_now, innodb_buf_flush_list_now,
19000 PLUGIN_VAR_OPCMDARG,
19001 "Force dirty page flush now",
19002 NULL, buf_flush_list_now_set, FALSE);
19003
19004 static MYSQL_SYSVAR_UINT(merge_threshold_set_all_debug,
19005 innodb_merge_threshold_set_all_debug,
19006 PLUGIN_VAR_RQCMDARG,
19007 "Override current MERGE_THRESHOLD setting for all indexes at dictionary"
19008 " cache by the specified value dynamically, at the time.",
19009 NULL, innodb_merge_threshold_set_all_debug_update,
19010 DICT_INDEX_MERGE_THRESHOLD_DEFAULT, 1, 50, 0);
19011 #endif /* UNIV_DEBUG */
19012
19013 static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
19014 PLUGIN_VAR_OPCMDARG,
19015 "Number of UNDO log pages to purge in one batch from the history list.",
19016 NULL, NULL,
19017 300, /* Default setting */
19018 1, /* Minimum value */
19019 5000, 0); /* Maximum value */
19020
19021 static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
19022 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19023 "Purge threads can be from 1 to 32. Default is 4.",
19024 NULL, NULL,
19025 4, /* Default setting */
19026 1, /* Minimum value */
19027 srv_max_purge_threads,/* Maximum value */
19028 0);
19029
19030 static MYSQL_SYSVAR_ULONG(sync_array_size, srv_sync_array_size,
19031 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19032 "Size of the mutex/lock wait array.",
19033 NULL, NULL,
19034 1, /* Default setting */
19035 1, /* Minimum value */
19036 1024, 0); /* Maximum value */
19037
19038 static MYSQL_SYSVAR_UINT(fast_shutdown, srv_fast_shutdown,
19039 PLUGIN_VAR_OPCMDARG,
19040 "Speeds up the shutdown process of the InnoDB storage engine. Possible"
19041 " values are 0, 1 (faster), 2 (crash-like), 3 (fastest clean).",
19042 fast_shutdown_validate, NULL, 1, 0, 3, 0);
19043
19044 static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table,
19045 PLUGIN_VAR_NOCMDARG,
19046 "Stores each InnoDB table to an .ibd file in the database dir.",
19047 NULL, NULL, TRUE);
19048
19049 static MYSQL_SYSVAR_STR(ft_server_stopword_table, innobase_server_stopword_table,
19050 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
19051 "The user supplied stopword table name.",
19052 innodb_stopword_table_validate,
19053 NULL,
19054 NULL);
19055
19056 static MYSQL_SYSVAR_UINT(flush_log_at_timeout, srv_flush_log_at_timeout,
19057 PLUGIN_VAR_OPCMDARG,
19058 "Write and flush logs every (n) second.",
19059 NULL, NULL, 1, 0, 2700, 0);
19060
19061 static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
19062 PLUGIN_VAR_OPCMDARG,
19063 "Controls the durability/speed trade-off for commits."
19064 " Set to 0 (write and flush redo log to disk only once per second),"
19065 " 1 (flush to disk at each commit),"
19066 " 2 (write to log at commit but flush to disk only once per second)"
19067 " or 3 (flush to disk at prepare and at commit, slower and usually redundant)."
19068 " 1 and 3 guarantees that after a crash, committed transactions will"
19069 " not be lost and will be consistent with the binlog and other transactional"
19070 " engines. 2 can get inconsistent and lose transactions if there is a"
19071 " power failure or kernel crash but not if mysqld crashes. 0 has no"
19072 " guarantees in case of crash. 0 and 2 can be faster than 1 or 3.",
19073 NULL, NULL, 1, 0, 3, 0);
19074
19075 static MYSQL_SYSVAR_ENUM(flush_method, innodb_flush_method,
19076 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19077 "With which method to flush data.",
19078 NULL, NULL, IF_WIN(SRV_ALL_O_DIRECT_FSYNC, SRV_FSYNC),
19079 &innodb_flush_method_typelib);
19080
19081 static MYSQL_SYSVAR_STR(file_format, innodb_file_format,
19082 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19083 "Deprecated parameter with no effect.", NULL, NULL, NULL);
19084
19085 static MYSQL_SYSVAR_STR(large_prefix, innodb_large_prefix,
19086 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19087 "Deprecated parameter with no effect.", NULL, NULL, NULL);
19088
19089 static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted,
19090 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19091 "Force InnoDB to load metadata of corrupted table.",
19092 NULL, NULL, FALSE);
19093
19094 static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog,
19095 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19096 "DEPRECATED. This option may be removed in future releases."
19097 " Please use READ COMMITTED transaction isolation level instead."
19098 " Force InnoDB to not use next-key locking, to use only row-level locking.",
19099 NULL, NULL, FALSE);
19100
19101 static MYSQL_SYSVAR_STR(log_group_home_dir, srv_log_group_home_dir,
19102 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19103 "Path to InnoDB log files.", NULL, NULL, NULL);
19104
19105 /** Update innodb_page_cleaners.
19106 @param[in] save the new value of innodb_page_cleaners */
19107 static
19108 void
innodb_page_cleaners_threads_update(THD *,struct st_mysql_sys_var *,void *,const void * save)19109 innodb_page_cleaners_threads_update(THD*, struct st_mysql_sys_var*, void*, const void *save)
19110 {
19111 buf_flush_set_page_cleaner_thread_cnt(*static_cast<const ulong*>(save));
19112 }
19113
19114 static MYSQL_SYSVAR_ULONG(page_cleaners, srv_n_page_cleaners,
19115 PLUGIN_VAR_RQCMDARG,
19116 "Page cleaner threads can be from 1 to 64. Default is 4.",
19117 NULL,
19118 innodb_page_cleaners_threads_update, 4, 1, 64, 0);
19119
19120 static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
19121 PLUGIN_VAR_RQCMDARG,
19122 "Percentage of dirty pages allowed in bufferpool.",
19123 NULL, innodb_max_dirty_pages_pct_update, 75.0, 0, 99.999, 0);
19124
19125 static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct_lwm,
19126 srv_max_dirty_pages_pct_lwm,
19127 PLUGIN_VAR_RQCMDARG,
19128 "Percentage of dirty pages at which flushing kicks in.",
19129 NULL, innodb_max_dirty_pages_pct_lwm_update, 0, 0, 99.999, 0);
19130
19131 static MYSQL_SYSVAR_DOUBLE(adaptive_flushing_lwm,
19132 srv_adaptive_flushing_lwm,
19133 PLUGIN_VAR_RQCMDARG,
19134 "Percentage of log capacity below which no adaptive flushing happens.",
19135 NULL, NULL, 10.0, 0.0, 70.0, 0);
19136
19137 static MYSQL_SYSVAR_BOOL(adaptive_flushing, srv_adaptive_flushing,
19138 PLUGIN_VAR_NOCMDARG,
19139 "Attempt flushing dirty pages to avoid IO bursts at checkpoints.",
19140 NULL, NULL, TRUE);
19141
19142 static MYSQL_SYSVAR_BOOL(flush_sync, srv_flush_sync,
19143 PLUGIN_VAR_NOCMDARG,
19144 "Allow IO bursts at the checkpoints ignoring io_capacity setting.",
19145 NULL, NULL, TRUE);
19146
19147 static MYSQL_SYSVAR_ULONG(flushing_avg_loops,
19148 srv_flushing_avg_loops,
19149 PLUGIN_VAR_RQCMDARG,
19150 "Number of iterations over which the background flushing is averaged.",
19151 NULL, NULL, 30, 1, 1000, 0);
19152
19153 static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
19154 PLUGIN_VAR_RQCMDARG,
19155 "Desired maximum length of the purge queue (0 = no limit)",
19156 NULL, NULL, 0, 0, ~0UL, 0);
19157
19158 static MYSQL_SYSVAR_ULONG(max_purge_lag_delay, srv_max_purge_lag_delay,
19159 PLUGIN_VAR_RQCMDARG,
19160 "Maximum delay of user threads in micro-seconds",
19161 NULL, NULL,
19162 0L, /* Default seting */
19163 0L, /* Minimum value */
19164 10000000UL, 0); /* Maximum value */
19165
19166 static MYSQL_SYSVAR_UINT(max_purge_lag_wait, innodb_max_purge_lag_wait,
19167 PLUGIN_VAR_RQCMDARG,
19168 "Wait until History list length is below the specified limit",
19169 NULL, innodb_max_purge_lag_wait_update, UINT_MAX, 0, UINT_MAX, 0);
19170
19171 static MYSQL_SYSVAR_BOOL(rollback_on_timeout, innobase_rollback_on_timeout,
19172 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19173 "Roll back the complete transaction on lock wait timeout, for 4.x compatibility (disabled by default)",
19174 NULL, NULL, FALSE);
19175
19176 static MYSQL_SYSVAR_BOOL(status_file, innobase_create_status_file,
19177 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NOSYSVAR,
19178 "Enable SHOW ENGINE INNODB STATUS output in the innodb_status.<pid> file",
19179 NULL, NULL, FALSE);
19180
19181 static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
19182 PLUGIN_VAR_OPCMDARG,
19183 "Enable statistics gathering for metadata commands such as"
19184 " SHOW TABLE STATUS for tables that use transient statistics (off by default)",
19185 NULL, NULL, FALSE);
19186
19187 static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_transient_sample_pages,
19188 PLUGIN_VAR_RQCMDARG,
19189 "Deprecated, use innodb_stats_transient_sample_pages instead",
19190 NULL, innodb_stats_sample_pages_update, 8, 1, ~0ULL, 0);
19191
19192 static MYSQL_SYSVAR_ULONGLONG(stats_transient_sample_pages,
19193 srv_stats_transient_sample_pages,
19194 PLUGIN_VAR_RQCMDARG,
19195 "The number of leaf index pages to sample when calculating transient"
19196 " statistics (if persistent statistics are not used, default 8)",
19197 NULL, NULL, 8, 1, ~0ULL, 0);
19198
19199 static MYSQL_SYSVAR_BOOL(stats_persistent, srv_stats_persistent,
19200 PLUGIN_VAR_OPCMDARG,
19201 "InnoDB persistent statistics enabled for all tables unless overridden"
19202 " at table level",
19203 NULL, NULL, TRUE);
19204
19205 static MYSQL_SYSVAR_BOOL(stats_auto_recalc, srv_stats_auto_recalc,
19206 PLUGIN_VAR_OPCMDARG,
19207 "InnoDB automatic recalculation of persistent statistics enabled for all"
19208 " tables unless overridden at table level (automatic recalculation is only"
19209 " done when InnoDB decides that the table has changed too much and needs a"
19210 " new statistics)",
19211 NULL, NULL, TRUE);
19212
19213 static MYSQL_SYSVAR_ULONGLONG(stats_persistent_sample_pages,
19214 srv_stats_persistent_sample_pages,
19215 PLUGIN_VAR_RQCMDARG,
19216 "The number of leaf index pages to sample when calculating persistent"
19217 " statistics (by ANALYZE, default 20)",
19218 NULL, NULL, 20, 1, ~0ULL, 0);
19219
19220 static MYSQL_SYSVAR_ULONGLONG(stats_modified_counter, srv_stats_modified_counter,
19221 PLUGIN_VAR_RQCMDARG,
19222 "The number of rows modified before we calculate new statistics (default 0 = current limits)",
19223 NULL, NULL, 0, 0, ~0ULL, 0);
19224
19225 static MYSQL_SYSVAR_BOOL(stats_traditional, srv_stats_sample_traditional,
19226 PLUGIN_VAR_RQCMDARG,
19227 "Enable traditional statistic calculation based on number of configured pages (default true)",
19228 NULL, NULL, TRUE);
19229
19230 #ifdef BTR_CUR_HASH_ADAPT
19231 static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled,
19232 PLUGIN_VAR_OPCMDARG,
19233 "Enable InnoDB adaptive hash index (enabled by default). "
19234 " Disable with --skip-innodb-adaptive-hash-index.",
19235 NULL, innodb_adaptive_hash_index_update, true);
19236
19237 /** Number of distinct partitions of AHI.
19238 Each partition is protected by its own latch and so we have parts number
19239 of latches protecting complete search system. */
19240 static MYSQL_SYSVAR_ULONG(adaptive_hash_index_parts, btr_ahi_parts,
19241 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19242 "Number of InnoDB Adaptive Hash Index Partitions (default 8)",
19243 NULL, NULL, 8, 1, 512, 0);
19244 #endif /* BTR_CUR_HASH_ADAPT */
19245
19246 static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
19247 PLUGIN_VAR_RQCMDARG,
19248 "Replication thread delay (ms) on the slave server if"
19249 " innodb_thread_concurrency is reached (0 by default)",
19250 NULL, NULL, 0, 0, ~0UL, 0);
19251
19252 static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
19253 PLUGIN_VAR_RQCMDARG,
19254 "Compression level used for zlib compression. 0 is no compression"
19255 ", 1 is fastest, 9 is best compression and default is 6.",
19256 NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
19257
19258 static MYSQL_SYSVAR_BOOL(log_compressed_pages, page_zip_log_pages,
19259 PLUGIN_VAR_OPCMDARG,
19260 "Enables/disables the logging of entire compressed page images."
19261 " InnoDB logs the compressed pages to prevent corruption if"
19262 " the zlib compression algorithm changes."
19263 " When turned OFF, InnoDB will assume that the zlib"
19264 " compression algorithm doesn't change.",
19265 NULL, NULL, TRUE);
19266
19267 static MYSQL_SYSVAR_BOOL(log_optimize_ddl, innodb_log_optimize_ddl,
19268 PLUGIN_VAR_OPCMDARG,
19269 "DEPRECATED. Ignored in MariaDB 10.5."
19270 " Reduce redo logging when natively creating indexes or rebuilding tables."
19271 " Enabling this may slow down backup and cause delay due to page flushing.",
19272 NULL, NULL, FALSE);
19273
19274 static MYSQL_SYSVAR_ULONG(autoextend_increment,
19275 sys_tablespace_auto_extend_increment,
19276 PLUGIN_VAR_RQCMDARG,
19277 "Data file autoextend increment in megabytes",
19278 NULL, NULL, 64L, 1L, 1000L, 0);
19279
19280 static MYSQL_SYSVAR_ULONG(buffer_pool_chunk_size, srv_buf_pool_chunk_unit,
19281 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19282 "Size of a single memory chunk within each buffer pool instance"
19283 " for resizing buffer pool. Online buffer pool resizing happens"
19284 " at this granularity. 0 means disable resizing buffer pool.",
19285 NULL, NULL,
19286 128 * 1024 * 1024, 1024 * 1024, LONG_MAX, 1024 * 1024);
19287
19288 #if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
19289 static MYSQL_SYSVAR_ULONG(page_hash_locks, srv_n_page_hash_locks,
19290 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19291 "Number of rw_locks protecting buffer pool page_hash. Rounded up to the next power of 2",
19292 NULL, NULL, 16, 1, MAX_PAGE_HASH_LOCKS, 0);
19293
19294 static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size,
19295 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19296 "Number of pages reserved in doublewrite buffer for batch flushing",
19297 NULL, NULL, 120, 1, 127, 0);
19298 #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
19299
19300 static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm,
19301 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19302 "The algorithm Innodb uses for deciding which locks to grant next when"
19303 " a lock is released. Possible values are"
19304 " FCFS"
19305 " grant the locks in First-Come-First-Served order;"
19306 " VATS"
19307 " use the Variance-Aware-Transaction-Scheduling algorithm, which"
19308 " uses an Eldest-Transaction-First heuristic.",
19309 NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
19310 &innodb_lock_schedule_algorithm_typelib);
19311
19312 static MYSQL_SYSVAR_ULONG(buffer_pool_instances, srv_buf_pool_instances,
19313 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19314 "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
19315 NULL, NULL, srv_buf_pool_instances_default, 0, MAX_BUFFER_POOLS, 0);
19316
19317 static MYSQL_SYSVAR_STR(buffer_pool_filename, srv_buf_dump_filename,
19318 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
19319 "Filename to/from which to dump/load the InnoDB buffer pool",
19320 innodb_srv_buf_dump_filename_validate, NULL, SRV_BUF_DUMP_FILENAME_DEFAULT);
19321
19322 static MYSQL_SYSVAR_BOOL(buffer_pool_dump_now, innodb_buffer_pool_dump_now,
19323 PLUGIN_VAR_RQCMDARG,
19324 "Trigger an immediate dump of the buffer pool into a file named @@innodb_buffer_pool_filename",
19325 NULL, buffer_pool_dump_now, FALSE);
19326
19327 static MYSQL_SYSVAR_BOOL(buffer_pool_dump_at_shutdown, srv_buffer_pool_dump_at_shutdown,
19328 PLUGIN_VAR_RQCMDARG,
19329 "Dump the buffer pool into a file named @@innodb_buffer_pool_filename",
19330 NULL, NULL, TRUE);
19331
19332 static MYSQL_SYSVAR_ULONG(buffer_pool_dump_pct, srv_buf_pool_dump_pct,
19333 PLUGIN_VAR_RQCMDARG,
19334 "Dump only the hottest N% of each buffer pool, defaults to 25",
19335 NULL, NULL, 25, 1, 100, 0);
19336
19337 #ifdef UNIV_DEBUG
19338 /* Added to test the innodb_buffer_pool_load_incomplete status variable. */
19339 static MYSQL_SYSVAR_ULONG(buffer_pool_load_pages_abort, srv_buf_pool_load_pages_abort,
19340 PLUGIN_VAR_RQCMDARG,
19341 "Number of pages during a buffer pool load to process before signaling innodb_buffer_pool_load_abort=1",
19342 NULL, NULL, LONG_MAX, 1, LONG_MAX, 0);
19343
19344 static MYSQL_SYSVAR_STR(buffer_pool_evict, srv_buffer_pool_evict,
19345 PLUGIN_VAR_RQCMDARG,
19346 "Evict pages from the buffer pool",
19347 NULL, innodb_buffer_pool_evict_update, "");
19348 #endif /* UNIV_DEBUG */
19349
19350 static MYSQL_SYSVAR_BOOL(buffer_pool_load_now, innodb_buffer_pool_load_now,
19351 PLUGIN_VAR_RQCMDARG,
19352 "Trigger an immediate load of the buffer pool from a file named @@innodb_buffer_pool_filename",
19353 NULL, buffer_pool_load_now, FALSE);
19354
19355 static MYSQL_SYSVAR_BOOL(buffer_pool_load_abort, innodb_buffer_pool_load_abort,
19356 PLUGIN_VAR_RQCMDARG,
19357 "Abort a currently running load of the buffer pool",
19358 NULL, buffer_pool_load_abort, FALSE);
19359
19360 /* there is no point in changing this during runtime, thus readonly */
19361 static MYSQL_SYSVAR_BOOL(buffer_pool_load_at_startup, srv_buffer_pool_load_at_startup,
19362 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19363 "Load the buffer pool from a file named @@innodb_buffer_pool_filename",
19364 NULL, NULL, TRUE);
19365
19366 static MYSQL_SYSVAR_BOOL(defragment, srv_defragment,
19367 PLUGIN_VAR_RQCMDARG,
19368 "Enable/disable InnoDB defragmentation (default FALSE). When set to FALSE, all existing "
19369 "defragmentation will be paused. And new defragmentation command will fail."
19370 "Paused defragmentation commands will resume when this variable is set to "
19371 "true again.",
19372 NULL, NULL, FALSE);
19373
19374 static MYSQL_SYSVAR_UINT(defragment_n_pages, srv_defragment_n_pages,
19375 PLUGIN_VAR_RQCMDARG,
19376 "Number of pages considered at once when merging multiple pages to "
19377 "defragment",
19378 NULL, NULL, 7, 2, 32, 0);
19379
19380 static MYSQL_SYSVAR_UINT(defragment_stats_accuracy,
19381 srv_defragment_stats_accuracy,
19382 PLUGIN_VAR_RQCMDARG,
19383 "How many defragment stats changes there are before the stats "
19384 "are written to persistent storage. Set to 0 meaning disable "
19385 "defragment stats tracking.",
19386 NULL, NULL, 0, 0, ~0U, 0);
19387
19388 static MYSQL_SYSVAR_UINT(defragment_fill_factor_n_recs,
19389 srv_defragment_fill_factor_n_recs,
19390 PLUGIN_VAR_RQCMDARG,
19391 "How many records of space defragmentation should leave on the page. "
19392 "This variable, together with innodb_defragment_fill_factor, is introduced "
19393 "so defragmentation won't pack the page too full and cause page split on "
19394 "the next insert on every page. The variable indicating more defragmentation"
19395 " gain is the one effective.",
19396 NULL, NULL, 20, 1, 100, 0);
19397
19398 static MYSQL_SYSVAR_DOUBLE(defragment_fill_factor, srv_defragment_fill_factor,
19399 PLUGIN_VAR_RQCMDARG,
19400 "A number between [0.7, 1] that tells defragmentation how full it should "
19401 "fill a page. Default is 0.9. Number below 0.7 won't make much sense."
19402 "This variable, together with innodb_defragment_fill_factor_n_recs, is "
19403 "introduced so defragmentation won't pack the page too full and cause "
19404 "page split on the next insert on every page. The variable indicating more "
19405 "defragmentation gain is the one effective.",
19406 NULL, NULL, 0.9, 0.7, 1, 0);
19407
19408 static MYSQL_SYSVAR_UINT(defragment_frequency, srv_defragment_frequency,
19409 PLUGIN_VAR_RQCMDARG,
19410 "Do not defragment a single index more than this number of time per second."
19411 "This controls the number of time defragmentation thread can request X_LOCK "
19412 "on an index. Defragmentation thread will check whether "
19413 "1/defragment_frequency (s) has passed since it worked on this index last "
19414 "time, and put the index back to the queue if not enough time has passed. "
19415 "The actual frequency can only be lower than this given number.",
19416 NULL, innodb_defragment_frequency_update,
19417 SRV_DEFRAGMENT_FREQUENCY_DEFAULT, 1, 1000, 0);
19418
19419
19420 static MYSQL_SYSVAR_ULONG(lru_scan_depth, srv_LRU_scan_depth,
19421 PLUGIN_VAR_RQCMDARG,
19422 "How deep to scan LRU to keep it clean",
19423 NULL, NULL, 1024, 100, ~0UL, 0);
19424
19425 static MYSQL_SYSVAR_ULONG(flush_neighbors, srv_flush_neighbors,
19426 PLUGIN_VAR_OPCMDARG,
19427 "Set to 0 (don't flush neighbors from buffer pool),"
19428 " 1 (flush contiguous neighbors from buffer pool)"
19429 " or 2 (flush neighbors from buffer pool),"
19430 " when flushing a block",
19431 NULL, NULL, 1, 0, 2, 0);
19432
19433 static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
19434 PLUGIN_VAR_RQCMDARG,
19435 "Helps in performance tuning in heavily concurrent environments.",
19436 innobase_commit_concurrency_validate, NULL, 0, 0, 1000, 0);
19437
19438 static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
19439 PLUGIN_VAR_RQCMDARG,
19440 "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
19441 NULL, NULL, 5000L, 1L, ~0UL, 0);
19442
19443 static MYSQL_SYSVAR_BOOL(deadlock_detect, innobase_deadlock_detect,
19444 PLUGIN_VAR_NOCMDARG,
19445 "Enable/disable InnoDB deadlock detector (default ON)."
19446 " if set to OFF, deadlock detection is skipped,"
19447 " and we rely on innodb_lock_wait_timeout in case of deadlock.",
19448 NULL, NULL, TRUE);
19449
19450 static MYSQL_SYSVAR_UINT(fill_factor, innobase_fill_factor,
19451 PLUGIN_VAR_RQCMDARG,
19452 "Percentage of B-tree page filled during bulk insert",
19453 NULL, NULL, 100, 10, 100, 0);
19454
19455 static MYSQL_SYSVAR_BOOL(ft_enable_diag_print, fts_enable_diag_print,
19456 PLUGIN_VAR_OPCMDARG,
19457 "Whether to enable additional FTS diagnostic printout ",
19458 NULL, NULL, FALSE);
19459
19460 static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
19461 PLUGIN_VAR_OPCMDARG,
19462 "Whether to disable OS system file cache for sort I/O",
19463 NULL, NULL, FALSE);
19464
19465 static MYSQL_SYSVAR_STR(ft_aux_table, innodb_ft_aux_table,
19466 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
19467 "FTS internal auxiliary table to be checked",
19468 innodb_ft_aux_table_validate, NULL, NULL);
19469
19470 static MYSQL_SYSVAR_ULONG(ft_cache_size, fts_max_cache_size,
19471 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19472 "InnoDB Fulltext search cache size in bytes",
19473 NULL, NULL, 8000000, 1600000, 80000000, 0);
19474
19475 static MYSQL_SYSVAR_ULONG(ft_total_cache_size, fts_max_total_cache_size,
19476 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19477 "Total memory allocated for InnoDB Fulltext Search cache",
19478 NULL, NULL, 640000000, 32000000, 1600000000, 0);
19479
19480 static MYSQL_SYSVAR_SIZE_T(ft_result_cache_limit, fts_result_cache_limit,
19481 PLUGIN_VAR_RQCMDARG,
19482 "InnoDB Fulltext search query result cache limit in bytes",
19483 NULL, NULL, 2000000000L, 1000000L, SIZE_T_MAX, 0);
19484
19485 static MYSQL_SYSVAR_ULONG(ft_min_token_size, fts_min_token_size,
19486 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19487 "InnoDB Fulltext search minimum token size in characters",
19488 NULL, NULL, 3, 0, 16, 0);
19489
19490 static MYSQL_SYSVAR_ULONG(ft_max_token_size, fts_max_token_size,
19491 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19492 "InnoDB Fulltext search maximum token size in characters",
19493 NULL, NULL, FTS_MAX_WORD_LEN_IN_CHAR, 10, FTS_MAX_WORD_LEN_IN_CHAR, 0);
19494
19495 static MYSQL_SYSVAR_ULONG(ft_num_word_optimize, fts_num_word_optimize,
19496 PLUGIN_VAR_OPCMDARG,
19497 "InnoDB Fulltext search number of words to optimize for each optimize table call ",
19498 NULL, NULL, 2000, 1000, 10000, 0);
19499
19500 static MYSQL_SYSVAR_ULONG(ft_sort_pll_degree, fts_sort_pll_degree,
19501 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19502 "InnoDB Fulltext search parallel sort degree, will round up to nearest power of 2 number",
19503 NULL, NULL, 2, 1, 16, 0);
19504
19505 static MYSQL_SYSVAR_ULONG(sort_buffer_size, srv_sort_buf_size,
19506 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19507 "Memory buffer size for index creation",
19508 NULL, NULL, 1048576, 65536, 64<<20, 0);
19509
19510 static MYSQL_SYSVAR_ULONGLONG(online_alter_log_max_size, srv_online_max_size,
19511 PLUGIN_VAR_RQCMDARG,
19512 "Maximum modification log file size for online index creation",
19513 NULL, NULL, 128<<20, 65536, ~0ULL, 0);
19514
19515 static MYSQL_SYSVAR_BOOL(optimize_fulltext_only, innodb_optimize_fulltext_only,
19516 PLUGIN_VAR_NOCMDARG,
19517 "Only optimize the Fulltext index of the table",
19518 NULL, NULL, FALSE);
19519
19520 static MYSQL_SYSVAR_ULONG(read_io_threads, srv_n_read_io_threads,
19521 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19522 "Number of background read I/O threads in InnoDB.",
19523 NULL, NULL, 4, 1, 64, 0);
19524
19525 static MYSQL_SYSVAR_ULONG(write_io_threads, srv_n_write_io_threads,
19526 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19527 "Number of background write I/O threads in InnoDB.",
19528 NULL, NULL, 4, 1, 64, 0);
19529
19530 static MYSQL_SYSVAR_ULONG(force_recovery, srv_force_recovery,
19531 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19532 "Helps to save your data in case the disk image of the database becomes corrupt.",
19533 NULL, NULL, 0, 0, 6, 0);
19534
19535 static MYSQL_SYSVAR_ULONG(page_size, srv_page_size,
19536 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19537 "Page size to use for all InnoDB tablespaces.",
19538 NULL, NULL, UNIV_PAGE_SIZE_DEF,
19539 UNIV_PAGE_SIZE_MIN, UNIV_PAGE_SIZE_MAX, 0);
19540
19541 static MYSQL_SYSVAR_ULONG(log_buffer_size, srv_log_buffer_size,
19542 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19543 "The size of the buffer which InnoDB uses to write log to the log files on disk.",
19544 NULL, NULL, 16L << 20, 256L << 10, LONG_MAX, 1024);
19545
19546 static MYSQL_SYSVAR_ULONGLONG(log_file_size, srv_log_file_size,
19547 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19548 "Size of each log file in a log group.",
19549 NULL, NULL, 48 << 20, 1 << 20, log_group_max_size, UNIV_PAGE_SIZE_MAX);
19550 /* OS_FILE_LOG_BLOCK_SIZE would be more appropriate than UNIV_PAGE_SIZE_MAX,
19551 but fil_space_t is being used for the redo log, and it uses data pages. */
19552
19553 static MYSQL_SYSVAR_ULONG(log_files_in_group, srv_n_log_files,
19554 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19555 "Number of log files in the log group. InnoDB writes to the files in a circular fashion.",
19556 NULL, NULL, 2, 1, SRV_N_LOG_FILES_MAX, 0);
19557
19558 static MYSQL_SYSVAR_ULONG(log_write_ahead_size, srv_log_write_ahead_size,
19559 PLUGIN_VAR_RQCMDARG,
19560 "Redo log write ahead unit size to avoid read-on-write,"
19561 " it should match the OS cache block IO size",
19562 NULL, innodb_log_write_ahead_size_update,
19563 8*1024L, OS_FILE_LOG_BLOCK_SIZE, UNIV_PAGE_SIZE_DEF, OS_FILE_LOG_BLOCK_SIZE);
19564
19565 static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct,
19566 PLUGIN_VAR_RQCMDARG,
19567 "Percentage of the buffer pool to reserve for 'old' blocks.",
19568 NULL, innodb_old_blocks_pct_update, 100 * 3 / 8, 5, 95, 0);
19569
19570 static MYSQL_SYSVAR_UINT(old_blocks_time, buf_LRU_old_threshold_ms,
19571 PLUGIN_VAR_RQCMDARG,
19572 "Move blocks to the 'new' end of the buffer pool if the first access"
19573 " was at least this many milliseconds ago."
19574 " The timeout is disabled if 0.",
19575 NULL, NULL, 1000, 0, UINT_MAX32, 0);
19576
19577 static MYSQL_SYSVAR_ULONG(open_files, innobase_open_files,
19578 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19579 "How many files at the maximum InnoDB keeps open at the same time.",
19580 NULL, NULL, 0, 0, LONG_MAX, 0);
19581
19582 static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
19583 PLUGIN_VAR_RQCMDARG,
19584 "Count of spin-loop rounds in InnoDB mutexes (30 by default)",
19585 NULL, NULL, 30L, 0L, ~0UL, 0);
19586
19587 static MYSQL_SYSVAR_UINT(spin_wait_delay, srv_spin_wait_delay,
19588 PLUGIN_VAR_OPCMDARG,
19589 "Maximum delay between polling for a spin lock (4 by default)",
19590 NULL, NULL, 4, 0, 6000, 0);
19591
19592 static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
19593 PLUGIN_VAR_RQCMDARG,
19594 "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
19595 NULL, NULL, 0, 0, 1000, 0);
19596
19597 static MYSQL_SYSVAR_ULONG(
19598 adaptive_max_sleep_delay, srv_adaptive_max_sleep_delay,
19599 PLUGIN_VAR_RQCMDARG,
19600 "The upper limit of the sleep delay in usec. Value of 0 disables it.",
19601 NULL, NULL,
19602 150000, /* Default setting */
19603 0, /* Minimum value */
19604 1000000, 0); /* Maximum value */
19605
19606 static MYSQL_SYSVAR_BOOL(prefix_index_cluster_optimization,
19607 srv_prefix_index_cluster_optimization,
19608 PLUGIN_VAR_OPCMDARG,
19609 "Enable prefix optimization to sometimes avoid cluster index lookups.",
19610 NULL, NULL, FALSE);
19611
19612 static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay,
19613 PLUGIN_VAR_RQCMDARG,
19614 "Time of innodb thread sleeping before joining InnoDB queue (usec)."
19615 " Value 0 disable a sleep",
19616 NULL, NULL,
19617 10000L,
19618 0L,
19619 1000000L, 0);
19620
19621 static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path,
19622 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19623 "Path to individual files and their sizes.",
19624 NULL, NULL, "ibdata1:12M:autoextend");
19625
19626 static MYSQL_SYSVAR_STR(temp_data_file_path, innobase_temp_data_file_path,
19627 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19628 "Path to files and their sizes making temp-tablespace.",
19629 NULL, NULL, "ibtmp1:12M:autoextend");
19630
19631 static MYSQL_SYSVAR_STR(undo_directory, srv_undo_dir,
19632 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19633 "Directory where undo tablespace files live, this path can be absolute.",
19634 NULL, NULL, NULL);
19635
19636 static MYSQL_SYSVAR_ULONG(undo_tablespaces, srv_undo_tablespaces,
19637 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19638 "Number of undo tablespaces to use.",
19639 NULL, NULL,
19640 0L, /* Default seting */
19641 0L, /* Minimum value */
19642 TRX_SYS_MAX_UNDO_SPACES, 0); /* Maximum value */
19643
19644 static MYSQL_SYSVAR_ULONG(undo_logs, srv_undo_logs,
19645 PLUGIN_VAR_OPCMDARG,
19646 "Number of undo logs to use.",
19647 NULL, NULL,
19648 TRX_SYS_N_RSEGS, /* Default setting */
19649 1, /* Minimum value */
19650 TRX_SYS_N_RSEGS, 0); /* Maximum value */
19651
19652 static MYSQL_SYSVAR_ULONGLONG(max_undo_log_size, srv_max_undo_log_size,
19653 PLUGIN_VAR_OPCMDARG,
19654 "Desired maximum UNDO tablespace size in bytes",
19655 NULL, NULL,
19656 10 << 20, 10 << 20,
19657 1ULL << (32 + UNIV_PAGE_SIZE_SHIFT_MAX), 0);
19658
19659 static MYSQL_SYSVAR_ULONG(purge_rseg_truncate_frequency,
19660 srv_purge_rseg_truncate_frequency,
19661 PLUGIN_VAR_OPCMDARG,
19662 "Dictates rate at which UNDO records are purged. Value N means"
19663 " purge rollback segment(s) on every Nth iteration of purge invocation",
19664 NULL, NULL, 128, 1, 128, 0);
19665
19666 static MYSQL_SYSVAR_BOOL(undo_log_truncate, srv_undo_log_truncate,
19667 PLUGIN_VAR_OPCMDARG,
19668 "Enable or Disable Truncate of UNDO tablespace.",
19669 NULL, NULL, FALSE);
19670
19671 /* Alias for innodb_undo_logs, this config variable is deprecated. */
19672 static MYSQL_SYSVAR_ULONG(rollback_segments, srv_undo_logs,
19673 PLUGIN_VAR_OPCMDARG,
19674 "Number of undo logs to use (deprecated).",
19675 NULL, NULL,
19676 TRX_SYS_N_RSEGS, /* Default setting */
19677 1, /* Minimum value */
19678 TRX_SYS_N_RSEGS, 0); /* Maximum value */
19679
19680 static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
19681 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19682 "The AUTOINC lock modes supported by InnoDB:"
19683 " 0 => Old style AUTOINC locking (for backward compatibility);"
19684 " 1 => New style AUTOINC locking;"
19685 " 2 => No AUTOINC locking (unsafe for SBR)",
19686 NULL, NULL,
19687 AUTOINC_NEW_STYLE_LOCKING, /* Default setting */
19688 AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */
19689 AUTOINC_NO_LOCKING, 0); /* Maximum value */
19690
19691 static MYSQL_SYSVAR_STR(version, innodb_version_str,
19692 PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
19693 "InnoDB version", NULL, NULL, INNODB_VERSION_STR);
19694
19695 static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
19696 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19697 "Use native AIO if supported on this platform.",
19698 NULL, NULL, TRUE);
19699
19700 #ifdef HAVE_LIBNUMA
19701 static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
19702 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19703 "Use NUMA interleave memory policy to allocate InnoDB buffer pool.",
19704 NULL, NULL, FALSE);
19705 #endif /* HAVE_LIBNUMA */
19706
19707 static MYSQL_SYSVAR_ENUM(change_buffering, innodb_change_buffering,
19708 PLUGIN_VAR_RQCMDARG,
19709 "Buffer changes to secondary indexes.",
19710 NULL, NULL, IBUF_USE_ALL, &innodb_change_buffering_typelib);
19711
19712 static MYSQL_SYSVAR_UINT(change_buffer_max_size,
19713 srv_change_buffer_max_size,
19714 PLUGIN_VAR_RQCMDARG,
19715 "Maximum on-disk size of change buffer in terms of percentage"
19716 " of the buffer pool.",
19717 NULL, innodb_change_buffer_max_size_update,
19718 CHANGE_BUFFER_DEFAULT_SIZE, 0, 50, 0);
19719
19720 static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
19721 PLUGIN_VAR_RQCMDARG,
19722 "Specifies how InnoDB index statistics collection code should"
19723 " treat NULLs. Possible values are NULLS_EQUAL (default),"
19724 " NULLS_UNEQUAL and NULLS_IGNORED",
19725 NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib);
19726
19727 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
19728 static MYSQL_SYSVAR_BOOL(change_buffer_dump, ibuf_dump,
19729 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19730 "Dump the change buffer at startup.",
19731 NULL, NULL, FALSE);
19732
19733 static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
19734 PLUGIN_VAR_RQCMDARG,
19735 "Debug flags for InnoDB change buffering (0=none, 1=try to buffer)",
19736 NULL, NULL, 0, 0, 1, 0);
19737
19738 static MYSQL_SYSVAR_BOOL(disable_background_merge,
19739 srv_ibuf_disable_background_merge,
19740 PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_RQCMDARG,
19741 "Disable change buffering merges by the master thread",
19742 NULL, NULL, FALSE);
19743 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
19744
19745 static MYSQL_SYSVAR_ULONG(buf_dump_status_frequency, srv_buf_dump_status_frequency,
19746 PLUGIN_VAR_RQCMDARG,
19747 "A number between [0, 100] that tells how oftern buffer pool dump status "
19748 "in percentages should be printed. E.g. 10 means that buffer pool dump "
19749 "status is printed when every 10% of number of buffer pool pages are "
19750 "dumped. Default is 0 (only start and end status is printed).",
19751 NULL, NULL, 0, 0, 100, 0);
19752
19753 #ifdef WITH_INNODB_DISALLOW_WRITES
19754 /*******************************************************
19755 * innobase_disallow_writes variable definition *
19756 *******************************************************/
19757
19758 /* Must always init to FALSE. */
19759 static my_bool innobase_disallow_writes = FALSE;
19760
19761 /**************************************************************************
19762 An "update" method for innobase_disallow_writes variable. */
19763 static
19764 void
innobase_disallow_writes_update(THD *,st_mysql_sys_var *,void * var_ptr,const void * save)19765 innobase_disallow_writes_update(THD*, st_mysql_sys_var*,
19766 void* var_ptr, const void* save)
19767 {
19768 const my_bool val = *static_cast<const my_bool*>(save);
19769 *static_cast<my_bool*>(var_ptr) = val;
19770 ut_a(srv_allow_writes_event);
19771 mysql_mutex_unlock(&LOCK_global_system_variables);
19772 if (val) {
19773 os_event_reset(srv_allow_writes_event);
19774 } else {
19775 os_event_set(srv_allow_writes_event);
19776 }
19777 mysql_mutex_lock(&LOCK_global_system_variables);
19778 }
19779
19780 static MYSQL_SYSVAR_BOOL(disallow_writes, innobase_disallow_writes,
19781 PLUGIN_VAR_NOCMDOPT,
19782 "Tell InnoDB to stop any writes to disk",
19783 NULL, innobase_disallow_writes_update, FALSE);
19784 #endif /* WITH_INNODB_DISALLOW_WRITES */
19785
19786 static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
19787 PLUGIN_VAR_NOCMDARG,
19788 "Whether to use read ahead for random access within an extent.",
19789 NULL, NULL, FALSE);
19790
19791 static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
19792 PLUGIN_VAR_RQCMDARG,
19793 "Number of pages that must be accessed sequentially for InnoDB to"
19794 " trigger a readahead.",
19795 NULL, NULL, 56, 0, 64, 0);
19796
19797 static MYSQL_SYSVAR_STR(monitor_enable, innobase_enable_monitor_counter,
19798 PLUGIN_VAR_RQCMDARG,
19799 "Turn on a monitor counter",
19800 innodb_monitor_validate,
19801 innodb_enable_monitor_update, NULL);
19802
19803 static MYSQL_SYSVAR_STR(monitor_disable, innobase_disable_monitor_counter,
19804 PLUGIN_VAR_RQCMDARG,
19805 "Turn off a monitor counter",
19806 innodb_monitor_validate,
19807 innodb_disable_monitor_update, NULL);
19808
19809 static MYSQL_SYSVAR_STR(monitor_reset, innobase_reset_monitor_counter,
19810 PLUGIN_VAR_RQCMDARG,
19811 "Reset a monitor counter",
19812 innodb_monitor_validate,
19813 innodb_reset_monitor_update, NULL);
19814
19815 static MYSQL_SYSVAR_STR(monitor_reset_all, innobase_reset_all_monitor_counter,
19816 PLUGIN_VAR_RQCMDARG,
19817 "Reset all values for a monitor counter",
19818 innodb_monitor_validate,
19819 innodb_reset_all_monitor_update, NULL);
19820
19821 static MYSQL_SYSVAR_BOOL(status_output, srv_print_innodb_monitor,
19822 PLUGIN_VAR_OPCMDARG, "Enable InnoDB monitor output to the error log.",
19823 NULL, innodb_status_output_update, FALSE);
19824
19825 static MYSQL_SYSVAR_BOOL(status_output_locks, srv_print_innodb_lock_monitor,
19826 PLUGIN_VAR_OPCMDARG, "Enable InnoDB lock monitor output to the error log."
19827 " Requires innodb_status_output=ON.",
19828 NULL, innodb_status_output_update, FALSE);
19829
19830 static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks,
19831 PLUGIN_VAR_OPCMDARG,
19832 "Print all deadlocks to MariaDB error log (off by default)",
19833 NULL, NULL, FALSE);
19834
19835 static MYSQL_SYSVAR_ULONG(compression_failure_threshold_pct,
19836 zip_failure_threshold_pct, PLUGIN_VAR_OPCMDARG,
19837 "If the compression failure rate of a table is greater than this number"
19838 " more padding is added to the pages to reduce the failures. A value of"
19839 " zero implies no padding",
19840 NULL, NULL, 5, 0, 100, 0);
19841
19842 static MYSQL_SYSVAR_ULONG(compression_pad_pct_max,
19843 zip_pad_max, PLUGIN_VAR_OPCMDARG,
19844 "Percentage of empty space on a data page that can be reserved"
19845 " to make the page compressible.",
19846 NULL, NULL, 50, 0, 75, 0);
19847
19848 static MYSQL_SYSVAR_BOOL(read_only, srv_read_only_mode,
19849 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19850 "Start InnoDB in read only mode (off by default)",
19851 NULL, NULL, FALSE);
19852
19853 static MYSQL_SYSVAR_BOOL(cmp_per_index_enabled, srv_cmp_per_index_enabled,
19854 PLUGIN_VAR_OPCMDARG,
19855 "Enable INFORMATION_SCHEMA.innodb_cmp_per_index,"
19856 " may have negative impact on performance (off by default)",
19857 NULL, innodb_cmp_per_index_update, FALSE);
19858
19859 static MYSQL_SYSVAR_ENUM(default_row_format, innodb_default_row_format,
19860 PLUGIN_VAR_RQCMDARG,
19861 "The default ROW FORMAT for all innodb tables created without explicit"
19862 " ROW_FORMAT. Possible values are REDUNDANT, COMPACT, and DYNAMIC."
19863 " The ROW_FORMAT value COMPRESSED is not allowed",
19864 NULL, NULL, DEFAULT_ROW_FORMAT_DYNAMIC,
19865 &innodb_default_row_format_typelib);
19866
19867 #ifdef UNIV_DEBUG
19868 static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug,
19869 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_NOCMDOPT,
19870 "Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()",
19871 NULL, NULL, 0, 0, 1024, 0);
19872
19873 static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug,
19874 btr_cur_limit_optimistic_insert_debug, PLUGIN_VAR_RQCMDARG,
19875 "Artificially limit the number of records per B-tree page (0=unlimited).",
19876 NULL, NULL, 0, 0, UINT_MAX32, 0);
19877
19878 static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
19879 srv_purge_view_update_only_debug, PLUGIN_VAR_NOCMDOPT,
19880 "Pause actual purging any delete-marked records, but merely update the purge view."
19881 " It is to create artificially the situation the purge view have been updated"
19882 " but the each purges were not done yet.",
19883 NULL, NULL, FALSE);
19884
19885 static MYSQL_SYSVAR_BOOL(evict_tables_on_commit_debug,
19886 innodb_evict_tables_on_commit_debug, PLUGIN_VAR_OPCMDARG,
19887 "On transaction commit, try to evict tables from the data dictionary cache.",
19888 NULL, NULL, FALSE);
19889
19890 static MYSQL_SYSVAR_UINT(data_file_size_debug,
19891 srv_sys_space_size_debug,
19892 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19893 "InnoDB system tablespace size to be set in recovery.",
19894 NULL, NULL, 0, 0, 256U << 20, 0);
19895
19896 static MYSQL_SYSVAR_ULONG(fil_make_page_dirty_debug,
19897 srv_fil_make_page_dirty_debug, PLUGIN_VAR_OPCMDARG,
19898 "Make the first page of the given tablespace dirty.",
19899 NULL, innodb_make_page_dirty, 0, 0, UINT_MAX32, 0);
19900
19901 static MYSQL_SYSVAR_ULONG(saved_page_number_debug,
19902 srv_saved_page_number_debug, PLUGIN_VAR_OPCMDARG,
19903 "An InnoDB page number.",
19904 NULL, innodb_save_page_no, 0, 0, UINT_MAX32, 0);
19905
19906 static MYSQL_SYSVAR_BOOL(disable_resize_buffer_pool_debug,
19907 buf_disable_resize_buffer_pool_debug, PLUGIN_VAR_NOCMDARG,
19908 "Disable resizing buffer pool to make assertion code not expensive.",
19909 NULL, NULL, TRUE);
19910
19911 static MYSQL_SYSVAR_BOOL(page_cleaner_disabled_debug,
19912 innodb_page_cleaner_disabled_debug,
19913 PLUGIN_VAR_OPCMDARG,
19914 "Disable page cleaner",
19915 NULL, buf_flush_page_cleaner_disabled_debug_update, FALSE);
19916
19917 static MYSQL_SYSVAR_BOOL(sync_debug, srv_sync_debug,
19918 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19919 "Enable the sync debug checks",
19920 NULL, NULL, FALSE);
19921
19922 static MYSQL_SYSVAR_BOOL(dict_stats_disabled_debug,
19923 innodb_dict_stats_disabled_debug,
19924 PLUGIN_VAR_OPCMDARG,
19925 "Disable dict_stats thread",
19926 NULL, dict_stats_disabled_debug_update, FALSE);
19927
19928 static MYSQL_SYSVAR_BOOL(master_thread_disabled_debug,
19929 srv_master_thread_disabled_debug,
19930 PLUGIN_VAR_OPCMDARG,
19931 "Disable master thread",
19932 NULL, srv_master_thread_disabled_debug_update, FALSE);
19933 #endif /* UNIV_DEBUG */
19934
19935 static MYSQL_SYSVAR_BOOL(force_primary_key,
19936 srv_force_primary_key,
19937 PLUGIN_VAR_OPCMDARG,
19938 "Do not allow to create table without primary key (off by default)",
19939 NULL, NULL, FALSE);
19940
19941 static const char *page_compression_algorithms[]= { "none", "zlib", "lz4", "lzo", "lzma", "bzip2", "snappy", 0 };
19942 static TYPELIB page_compression_algorithms_typelib=
19943 {
19944 array_elements(page_compression_algorithms) - 1, 0,
19945 page_compression_algorithms, 0
19946 };
19947 static MYSQL_SYSVAR_ENUM(compression_algorithm, innodb_compression_algorithm,
19948 PLUGIN_VAR_OPCMDARG,
19949 "Compression algorithm used on page compression. One of: none, zlib, lz4, lzo, lzma, bzip2, or snappy",
19950 innodb_compression_algorithm_validate, NULL,
19951 /* We use here the largest number of supported compression method to
19952 enable all those methods that are available. Availability of compression
19953 method is verified on innodb_compression_algorithm_validate function. */
19954 PAGE_ZLIB_ALGORITHM,
19955 &page_compression_algorithms_typelib);
19956
19957 static MYSQL_SYSVAR_ULONG(fatal_semaphore_wait_threshold, srv_fatal_semaphore_wait_threshold,
19958 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19959 "Maximum number of seconds that semaphore times out in InnoDB.",
19960 NULL, NULL,
19961 DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT, /* Default setting */
19962 1, /* Minimum setting */
19963 UINT_MAX32, /* Maximum setting */
19964 0);
19965
19966 static const char* srv_encrypt_tables_names[] = { "OFF", "ON", "FORCE", 0 };
19967 static TYPELIB srv_encrypt_tables_typelib = {
19968 array_elements(srv_encrypt_tables_names)-1, 0, srv_encrypt_tables_names,
19969 NULL
19970 };
19971 static MYSQL_SYSVAR_ENUM(encrypt_tables, srv_encrypt_tables,
19972 PLUGIN_VAR_OPCMDARG,
19973 "Enable encryption for tables. "
19974 "Don't forget to enable --innodb-encrypt-log too",
19975 innodb_encrypt_tables_validate,
19976 innodb_encrypt_tables_update,
19977 0,
19978 &srv_encrypt_tables_typelib);
19979
19980 static MYSQL_SYSVAR_UINT(encryption_threads, srv_n_fil_crypt_threads,
19981 PLUGIN_VAR_RQCMDARG,
19982 "Number of threads performing background key rotation and "
19983 "scrubbing",
19984 NULL,
19985 innodb_encryption_threads_update,
19986 0, 0, 255, 0);
19987
19988 static MYSQL_SYSVAR_UINT(encryption_rotate_key_age,
19989 srv_fil_crypt_rotate_key_age,
19990 PLUGIN_VAR_RQCMDARG,
19991 "Key rotation - re-encrypt in background "
19992 "all pages that were encrypted with a key that "
19993 "many (or more) versions behind. Value 0 indicates "
19994 "that key rotation is disabled.",
19995 NULL,
19996 innodb_encryption_rotate_key_age_update,
19997 1, 0, UINT_MAX32, 0);
19998
19999 static MYSQL_SYSVAR_UINT(encryption_rotation_iops, srv_n_fil_crypt_iops,
20000 PLUGIN_VAR_RQCMDARG,
20001 "Use this many iops for background key rotation",
20002 NULL,
20003 innodb_encryption_rotation_iops_update,
20004 srv_n_fil_crypt_iops, 0, UINT_MAX32, 0);
20005
20006 static MYSQL_SYSVAR_BOOL(scrub_log, srv_scrub_log,
20007 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20008 "Enable background redo log (ib_logfile0, ib_logfile1...) scrubbing",
20009 0, 0, 0);
20010
20011 static MYSQL_SYSVAR_ULONGLONG(scrub_log_speed, innodb_scrub_log_speed,
20012 PLUGIN_VAR_OPCMDARG,
20013 "Background redo log scrubbing speed in bytes/sec",
20014 NULL, NULL,
20015 256, /* 256 bytes/sec, corresponds to 2000 ms scrub_log_interval */
20016 1, /* min */
20017 50000, 0); /* 50Kbyte/sec, corresponds to 10 ms scrub_log_interval */
20018
20019 static MYSQL_SYSVAR_BOOL(encrypt_log, srv_encrypt_log,
20020 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20021 "Enable redo log encryption",
20022 NULL, NULL, FALSE);
20023
20024 static MYSQL_SYSVAR_BOOL(immediate_scrub_data_uncompressed,
20025 srv_immediate_scrub_data_uncompressed,
20026 0,
20027 "Enable scrubbing of data",
20028 NULL, NULL, FALSE);
20029
20030 static MYSQL_SYSVAR_BOOL(background_scrub_data_uncompressed,
20031 srv_background_scrub_data_uncompressed,
20032 0,
20033 "Enable scrubbing of uncompressed data by "
20034 "background threads (same as encryption_threads)",
20035 NULL, NULL, FALSE);
20036
20037 static MYSQL_SYSVAR_BOOL(background_scrub_data_compressed,
20038 srv_background_scrub_data_compressed,
20039 0,
20040 "Enable scrubbing of compressed data by "
20041 "background threads (same as encryption_threads)",
20042 NULL, NULL, FALSE);
20043
20044 static MYSQL_SYSVAR_UINT(background_scrub_data_check_interval,
20045 srv_background_scrub_data_check_interval,
20046 0,
20047 "check if spaces needs scrubbing every "
20048 "innodb_background_scrub_data_check_interval "
20049 "seconds",
20050 NULL, NULL,
20051 srv_background_scrub_data_check_interval,
20052 1,
20053 UINT_MAX32, 0);
20054
20055 static MYSQL_SYSVAR_UINT(background_scrub_data_interval,
20056 srv_background_scrub_data_interval,
20057 0,
20058 "scrub spaces that were last scrubbed longer than "
20059 " innodb_background_scrub_data_interval seconds ago",
20060 NULL, NULL,
20061 srv_background_scrub_data_interval,
20062 1,
20063 UINT_MAX32, 0);
20064
20065 #ifdef UNIV_DEBUG
20066 static MYSQL_SYSVAR_BOOL(debug_force_scrubbing,
20067 srv_scrub_force_testing,
20068 0,
20069 "Perform extra scrubbing to increase test exposure",
20070 NULL, NULL, FALSE);
20071
20072 char *innobase_debug_sync;
20073 static MYSQL_SYSVAR_STR(debug_sync, innobase_debug_sync,
20074 PLUGIN_VAR_NOCMDARG,
20075 "debug_sync for innodb purge threads. "
20076 "Use it to set up sync points for all purge threads "
20077 "at once. The commands will be applied sequentially at"
20078 " the beginning of purging the next undo record.",
20079 NULL,
20080 innobase_debug_sync_set, NULL);
20081 #endif /* UNIV_DEBUG */
20082
20083 static MYSQL_SYSVAR_BOOL(encrypt_temporary_tables, innodb_encrypt_temporary_tables,
20084 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20085 "Enrypt the temporary table data.",
20086 NULL, NULL, false);
20087
20088 static struct st_mysql_sys_var* innobase_system_variables[]= {
20089 MYSQL_SYSVAR(autoextend_increment),
20090 MYSQL_SYSVAR(buffer_pool_size),
20091 MYSQL_SYSVAR(buffer_pool_chunk_size),
20092 MYSQL_SYSVAR(buffer_pool_instances),
20093 MYSQL_SYSVAR(buffer_pool_filename),
20094 MYSQL_SYSVAR(buffer_pool_dump_now),
20095 MYSQL_SYSVAR(buffer_pool_dump_at_shutdown),
20096 MYSQL_SYSVAR(buffer_pool_dump_pct),
20097 #ifdef UNIV_DEBUG
20098 MYSQL_SYSVAR(buffer_pool_evict),
20099 #endif /* UNIV_DEBUG */
20100 MYSQL_SYSVAR(buffer_pool_load_now),
20101 MYSQL_SYSVAR(buffer_pool_load_abort),
20102 #ifdef UNIV_DEBUG
20103 MYSQL_SYSVAR(buffer_pool_load_pages_abort),
20104 #endif /* UNIV_DEBUG */
20105 MYSQL_SYSVAR(buffer_pool_load_at_startup),
20106 MYSQL_SYSVAR(defragment),
20107 MYSQL_SYSVAR(defragment_n_pages),
20108 MYSQL_SYSVAR(defragment_stats_accuracy),
20109 MYSQL_SYSVAR(defragment_fill_factor),
20110 MYSQL_SYSVAR(defragment_fill_factor_n_recs),
20111 MYSQL_SYSVAR(defragment_frequency),
20112 MYSQL_SYSVAR(lru_scan_depth),
20113 MYSQL_SYSVAR(flush_neighbors),
20114 MYSQL_SYSVAR(checksum_algorithm),
20115 MYSQL_SYSVAR(log_checksums),
20116 MYSQL_SYSVAR(checksums),
20117 MYSQL_SYSVAR(commit_concurrency),
20118 MYSQL_SYSVAR(concurrency_tickets),
20119 MYSQL_SYSVAR(compression_level),
20120 MYSQL_SYSVAR(data_file_path),
20121 MYSQL_SYSVAR(temp_data_file_path),
20122 MYSQL_SYSVAR(data_home_dir),
20123 MYSQL_SYSVAR(doublewrite),
20124 MYSQL_SYSVAR(stats_include_delete_marked),
20125 MYSQL_SYSVAR(use_atomic_writes),
20126 MYSQL_SYSVAR(fast_shutdown),
20127 MYSQL_SYSVAR(read_io_threads),
20128 MYSQL_SYSVAR(write_io_threads),
20129 MYSQL_SYSVAR(file_per_table),
20130 MYSQL_SYSVAR(file_format), /* deprecated in MariaDB 10.2; no effect */
20131 MYSQL_SYSVAR(flush_log_at_timeout),
20132 MYSQL_SYSVAR(flush_log_at_trx_commit),
20133 MYSQL_SYSVAR(flush_method),
20134 MYSQL_SYSVAR(force_recovery),
20135 MYSQL_SYSVAR(fill_factor),
20136 MYSQL_SYSVAR(ft_cache_size),
20137 MYSQL_SYSVAR(ft_total_cache_size),
20138 MYSQL_SYSVAR(ft_result_cache_limit),
20139 MYSQL_SYSVAR(ft_enable_stopword),
20140 MYSQL_SYSVAR(ft_max_token_size),
20141 MYSQL_SYSVAR(ft_min_token_size),
20142 MYSQL_SYSVAR(ft_num_word_optimize),
20143 MYSQL_SYSVAR(ft_sort_pll_degree),
20144 MYSQL_SYSVAR(large_prefix), /* deprecated in MariaDB 10.2; no effect */
20145 MYSQL_SYSVAR(force_load_corrupted),
20146 MYSQL_SYSVAR(lock_schedule_algorithm),
20147 MYSQL_SYSVAR(locks_unsafe_for_binlog),
20148 MYSQL_SYSVAR(lock_wait_timeout),
20149 MYSQL_SYSVAR(deadlock_detect),
20150 MYSQL_SYSVAR(page_size),
20151 MYSQL_SYSVAR(log_buffer_size),
20152 MYSQL_SYSVAR(log_file_size),
20153 MYSQL_SYSVAR(log_files_in_group),
20154 MYSQL_SYSVAR(log_write_ahead_size),
20155 MYSQL_SYSVAR(log_group_home_dir),
20156 MYSQL_SYSVAR(log_compressed_pages),
20157 MYSQL_SYSVAR(log_optimize_ddl),
20158 MYSQL_SYSVAR(max_dirty_pages_pct),
20159 MYSQL_SYSVAR(max_dirty_pages_pct_lwm),
20160 MYSQL_SYSVAR(adaptive_flushing_lwm),
20161 MYSQL_SYSVAR(adaptive_flushing),
20162 MYSQL_SYSVAR(flush_sync),
20163 MYSQL_SYSVAR(flushing_avg_loops),
20164 MYSQL_SYSVAR(max_purge_lag),
20165 MYSQL_SYSVAR(max_purge_lag_delay),
20166 MYSQL_SYSVAR(max_purge_lag_wait),
20167 MYSQL_SYSVAR(old_blocks_pct),
20168 MYSQL_SYSVAR(old_blocks_time),
20169 MYSQL_SYSVAR(open_files),
20170 MYSQL_SYSVAR(optimize_fulltext_only),
20171 MYSQL_SYSVAR(rollback_on_timeout),
20172 MYSQL_SYSVAR(ft_aux_table),
20173 MYSQL_SYSVAR(ft_enable_diag_print),
20174 MYSQL_SYSVAR(ft_server_stopword_table),
20175 MYSQL_SYSVAR(ft_user_stopword_table),
20176 MYSQL_SYSVAR(disable_sort_file_cache),
20177 MYSQL_SYSVAR(stats_on_metadata),
20178 MYSQL_SYSVAR(stats_sample_pages),
20179 MYSQL_SYSVAR(stats_transient_sample_pages),
20180 MYSQL_SYSVAR(stats_persistent),
20181 MYSQL_SYSVAR(stats_persistent_sample_pages),
20182 MYSQL_SYSVAR(stats_auto_recalc),
20183 MYSQL_SYSVAR(stats_modified_counter),
20184 MYSQL_SYSVAR(stats_traditional),
20185 #ifdef BTR_CUR_HASH_ADAPT
20186 MYSQL_SYSVAR(adaptive_hash_index),
20187 MYSQL_SYSVAR(adaptive_hash_index_parts),
20188 #endif /* BTR_CUR_HASH_ADAPT */
20189 MYSQL_SYSVAR(stats_method),
20190 MYSQL_SYSVAR(replication_delay),
20191 MYSQL_SYSVAR(status_file),
20192 MYSQL_SYSVAR(strict_mode),
20193 MYSQL_SYSVAR(sort_buffer_size),
20194 MYSQL_SYSVAR(online_alter_log_max_size),
20195 MYSQL_SYSVAR(sync_spin_loops),
20196 MYSQL_SYSVAR(spin_wait_delay),
20197 MYSQL_SYSVAR(table_locks),
20198 MYSQL_SYSVAR(thread_concurrency),
20199 MYSQL_SYSVAR(adaptive_max_sleep_delay),
20200 MYSQL_SYSVAR(prefix_index_cluster_optimization),
20201 MYSQL_SYSVAR(thread_sleep_delay),
20202 MYSQL_SYSVAR(tmpdir),
20203 MYSQL_SYSVAR(autoinc_lock_mode),
20204 MYSQL_SYSVAR(version),
20205 MYSQL_SYSVAR(use_native_aio),
20206 #ifdef HAVE_LIBNUMA
20207 MYSQL_SYSVAR(numa_interleave),
20208 #endif /* HAVE_LIBNUMA */
20209 MYSQL_SYSVAR(change_buffering),
20210 MYSQL_SYSVAR(change_buffer_max_size),
20211 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
20212 MYSQL_SYSVAR(change_buffer_dump),
20213 MYSQL_SYSVAR(change_buffering_debug),
20214 MYSQL_SYSVAR(disable_background_merge),
20215 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
20216 #ifdef WITH_INNODB_DISALLOW_WRITES
20217 MYSQL_SYSVAR(disallow_writes),
20218 #endif /* WITH_INNODB_DISALLOW_WRITES */
20219 MYSQL_SYSVAR(random_read_ahead),
20220 MYSQL_SYSVAR(read_ahead_threshold),
20221 MYSQL_SYSVAR(read_only),
20222 MYSQL_SYSVAR(instant_alter_column_allowed),
20223 MYSQL_SYSVAR(io_capacity),
20224 MYSQL_SYSVAR(io_capacity_max),
20225 MYSQL_SYSVAR(page_cleaners),
20226 MYSQL_SYSVAR(idle_flush_pct),
20227 MYSQL_SYSVAR(monitor_enable),
20228 MYSQL_SYSVAR(monitor_disable),
20229 MYSQL_SYSVAR(monitor_reset),
20230 MYSQL_SYSVAR(monitor_reset_all),
20231 MYSQL_SYSVAR(purge_threads),
20232 MYSQL_SYSVAR(purge_batch_size),
20233 #ifdef UNIV_DEBUG
20234 MYSQL_SYSVAR(background_drop_list_empty),
20235 MYSQL_SYSVAR(log_checkpoint_now),
20236 MYSQL_SYSVAR(buf_flush_list_now),
20237 MYSQL_SYSVAR(merge_threshold_set_all_debug),
20238 #endif /* UNIV_DEBUG */
20239 #if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
20240 MYSQL_SYSVAR(page_hash_locks),
20241 MYSQL_SYSVAR(doublewrite_batch_size),
20242 #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
20243 MYSQL_SYSVAR(status_output),
20244 MYSQL_SYSVAR(status_output_locks),
20245 MYSQL_SYSVAR(print_all_deadlocks),
20246 MYSQL_SYSVAR(cmp_per_index_enabled),
20247 MYSQL_SYSVAR(undo_logs),
20248 MYSQL_SYSVAR(max_undo_log_size),
20249 MYSQL_SYSVAR(purge_rseg_truncate_frequency),
20250 MYSQL_SYSVAR(undo_log_truncate),
20251 MYSQL_SYSVAR(rollback_segments),
20252 MYSQL_SYSVAR(undo_directory),
20253 MYSQL_SYSVAR(undo_tablespaces),
20254 MYSQL_SYSVAR(sync_array_size),
20255 MYSQL_SYSVAR(compression_failure_threshold_pct),
20256 MYSQL_SYSVAR(compression_pad_pct_max),
20257 MYSQL_SYSVAR(default_row_format),
20258 #ifdef UNIV_DEBUG
20259 MYSQL_SYSVAR(trx_rseg_n_slots_debug),
20260 MYSQL_SYSVAR(limit_optimistic_insert_debug),
20261 MYSQL_SYSVAR(trx_purge_view_update_only_debug),
20262 MYSQL_SYSVAR(evict_tables_on_commit_debug),
20263 MYSQL_SYSVAR(data_file_size_debug),
20264 MYSQL_SYSVAR(fil_make_page_dirty_debug),
20265 MYSQL_SYSVAR(saved_page_number_debug),
20266 MYSQL_SYSVAR(disable_resize_buffer_pool_debug),
20267 MYSQL_SYSVAR(page_cleaner_disabled_debug),
20268 MYSQL_SYSVAR(dict_stats_disabled_debug),
20269 MYSQL_SYSVAR(master_thread_disabled_debug),
20270 MYSQL_SYSVAR(sync_debug),
20271 #endif /* UNIV_DEBUG */
20272 MYSQL_SYSVAR(force_primary_key),
20273 MYSQL_SYSVAR(fatal_semaphore_wait_threshold),
20274 /* Table page compression feature */
20275 MYSQL_SYSVAR(compression_default),
20276 MYSQL_SYSVAR(compression_algorithm),
20277 /* Encryption feature */
20278 MYSQL_SYSVAR(encrypt_tables),
20279 MYSQL_SYSVAR(encryption_threads),
20280 MYSQL_SYSVAR(encryption_rotate_key_age),
20281 MYSQL_SYSVAR(encryption_rotation_iops),
20282 MYSQL_SYSVAR(scrub_log),
20283 MYSQL_SYSVAR(scrub_log_speed),
20284 MYSQL_SYSVAR(encrypt_log),
20285 MYSQL_SYSVAR(default_encryption_key_id),
20286 /* Scrubing feature */
20287 MYSQL_SYSVAR(immediate_scrub_data_uncompressed),
20288 MYSQL_SYSVAR(background_scrub_data_uncompressed),
20289 MYSQL_SYSVAR(background_scrub_data_compressed),
20290 MYSQL_SYSVAR(background_scrub_data_interval),
20291 MYSQL_SYSVAR(background_scrub_data_check_interval),
20292 #ifdef UNIV_DEBUG
20293 MYSQL_SYSVAR(debug_force_scrubbing),
20294 MYSQL_SYSVAR(debug_sync),
20295 #endif
20296 MYSQL_SYSVAR(buf_dump_status_frequency),
20297 MYSQL_SYSVAR(background_thread),
20298 MYSQL_SYSVAR(encrypt_temporary_tables),
20299
20300 NULL
20301 };
20302
maria_declare_plugin(innobase)20303 maria_declare_plugin(innobase)
20304 {
20305 MYSQL_STORAGE_ENGINE_PLUGIN,
20306 &innobase_storage_engine,
20307 innobase_hton_name,
20308 plugin_author,
20309 "Supports transactions, row-level locking, foreign keys and encryption for tables",
20310 PLUGIN_LICENSE_GPL,
20311 innodb_init, /* Plugin Init */
20312 NULL, /* Plugin Deinit */
20313 INNODB_VERSION_SHORT,
20314 innodb_status_variables_export,/* status variables */
20315 innobase_system_variables, /* system variables */
20316 INNODB_VERSION_STR, /* string version */
20317 MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
20318 },
20319 i_s_innodb_trx,
20320 i_s_innodb_locks,
20321 i_s_innodb_lock_waits,
20322 i_s_innodb_cmp,
20323 i_s_innodb_cmp_reset,
20324 i_s_innodb_cmpmem,
20325 i_s_innodb_cmpmem_reset,
20326 i_s_innodb_cmp_per_index,
20327 i_s_innodb_cmp_per_index_reset,
20328 i_s_innodb_buffer_page,
20329 i_s_innodb_buffer_page_lru,
20330 i_s_innodb_buffer_stats,
20331 i_s_innodb_metrics,
20332 i_s_innodb_ft_default_stopword,
20333 i_s_innodb_ft_deleted,
20334 i_s_innodb_ft_being_deleted,
20335 i_s_innodb_ft_config,
20336 i_s_innodb_ft_index_cache,
20337 i_s_innodb_ft_index_table,
20338 i_s_innodb_sys_tables,
20339 i_s_innodb_sys_tablestats,
20340 i_s_innodb_sys_indexes,
20341 i_s_innodb_sys_columns,
20342 i_s_innodb_sys_fields,
20343 i_s_innodb_sys_foreign,
20344 i_s_innodb_sys_foreign_cols,
20345 i_s_innodb_sys_tablespaces,
20346 i_s_innodb_sys_datafiles,
20347 i_s_innodb_sys_virtual,
20348 i_s_innodb_mutexes,
20349 i_s_innodb_sys_semaphore_waits,
20350 i_s_innodb_tablespaces_encryption,
20351 i_s_innodb_tablespaces_scrubbing
20352 maria_declare_plugin_end;
20353
20354 /** @brief Initialize the default value of innodb_commit_concurrency.
20355
20356 Once InnoDB is running, the innodb_commit_concurrency must not change
20357 from zero to nonzero. (Bug #42101)
20358
20359 The initial default value is 0, and without this extra initialization,
20360 SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter
20361 to 0, even if it was initially set to nonzero at the command line
20362 or configuration file. */
20363 static
20364 void
innobase_commit_concurrency_init_default()20365 innobase_commit_concurrency_init_default()
20366 /*======================================*/
20367 {
20368 MYSQL_SYSVAR_NAME(commit_concurrency).def_val
20369 = innobase_commit_concurrency;
20370 }
20371
20372 /** @brief Adjust some InnoDB startup parameters based on file contents
20373 or innodb_page_size. */
20374 static
20375 void
innodb_params_adjust()20376 innodb_params_adjust()
20377 {
20378 /* The default value and the max value of
20379 innodb_undo_logs must be equal to the available undo logs. */
20380 MYSQL_SYSVAR_NAME(undo_logs).max_val
20381 = MYSQL_SYSVAR_NAME(undo_logs).def_val
20382 = srv_available_undo_logs;
20383 MYSQL_SYSVAR_NAME(max_undo_log_size).max_val
20384 = 1ULL << (32U + srv_page_size_shift);
20385 MYSQL_SYSVAR_NAME(max_undo_log_size).min_val
20386 = MYSQL_SYSVAR_NAME(max_undo_log_size).def_val
20387 = ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
20388 << srv_page_size_shift;
20389 MYSQL_SYSVAR_NAME(max_undo_log_size).max_val
20390 = 1ULL << (32U + srv_page_size_shift);
20391 }
20392
20393 /****************************************************************************
20394 * DS-MRR implementation
20395 ***************************************************************************/
20396
20397 /**
20398 Multi Range Read interface, DS-MRR calls */
20399 int
multi_range_read_init(RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)20400 ha_innobase::multi_range_read_init(
20401 RANGE_SEQ_IF* seq,
20402 void* seq_init_param,
20403 uint n_ranges,
20404 uint mode,
20405 HANDLER_BUFFER* buf)
20406 {
20407 return(m_ds_mrr.dsmrr_init(this, seq, seq_init_param,
20408 n_ranges, mode, buf));
20409 }
20410
20411 int
multi_range_read_next(range_id_t * range_info)20412 ha_innobase::multi_range_read_next(
20413 range_id_t* range_info)
20414 {
20415 return(m_ds_mrr.dsmrr_next(range_info));
20416 }
20417
20418 ha_rows
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)20419 ha_innobase::multi_range_read_info_const(
20420 uint keyno,
20421 RANGE_SEQ_IF* seq,
20422 void* seq_init_param,
20423 uint n_ranges,
20424 uint* bufsz,
20425 uint* flags,
20426 Cost_estimate* cost)
20427 {
20428 /* See comments in ha_myisam::multi_range_read_info_const */
20429 m_ds_mrr.init(this, table);
20430
20431 if (m_prebuilt->select_lock_type != LOCK_NONE) {
20432 *flags |= HA_MRR_USE_DEFAULT_IMPL;
20433 }
20434
20435 ha_rows res= m_ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
20436 bufsz, flags, cost);
20437 return res;
20438 }
20439
20440 ha_rows
multi_range_read_info(uint keyno,uint n_ranges,uint keys,uint key_parts,uint * bufsz,uint * flags,Cost_estimate * cost)20441 ha_innobase::multi_range_read_info(
20442 uint keyno,
20443 uint n_ranges,
20444 uint keys,
20445 uint key_parts,
20446 uint* bufsz,
20447 uint* flags,
20448 Cost_estimate* cost)
20449 {
20450 m_ds_mrr.init(this, table);
20451 ha_rows res= m_ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
20452 flags, cost);
20453 return res;
20454 }
20455
20456 int
multi_range_read_explain_info(uint mrr_mode,char * str,size_t size)20457 ha_innobase::multi_range_read_explain_info(
20458 uint mrr_mode,
20459 char *str,
20460 size_t size)
20461 {
20462 return m_ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
20463 }
20464
20465 /** Parse the table file name into table name and database name.
20466 @param[in] tbl_name InnoDB table name
20467 @param[out] dbname database name buffer (NAME_LEN + 1 bytes)
20468 @param[out] tblname table name buffer (NAME_LEN + 1 bytes)
20469 @param[out] dbnamelen database name length
20470 @param[out] tblnamelen table name length
20471 @return true if the table name is parsed properly. */
table_name_parse(const table_name_t & tbl_name,char * dbname,char * tblname,ulint & dbnamelen,ulint & tblnamelen)20472 static bool table_name_parse(
20473 const table_name_t& tbl_name,
20474 char* dbname,
20475 char* tblname,
20476 ulint& dbnamelen,
20477 ulint& tblnamelen)
20478 {
20479 dbnamelen = dict_get_db_name_len(tbl_name.m_name);
20480 char db_buf[MAX_DATABASE_NAME_LEN + 1];
20481 char tbl_buf[MAX_TABLE_NAME_LEN + 1];
20482
20483 ut_ad(dbnamelen > 0);
20484 ut_ad(dbnamelen <= MAX_DATABASE_NAME_LEN);
20485
20486 memcpy(db_buf, tbl_name.m_name, dbnamelen);
20487 db_buf[dbnamelen] = 0;
20488
20489 tblnamelen = strlen(tbl_name.m_name + dbnamelen + 1);
20490 memcpy(tbl_buf, tbl_name.m_name + dbnamelen + 1, tblnamelen);
20491 tbl_buf[tblnamelen] = 0;
20492
20493 dbnamelen = filename_to_tablename(db_buf, dbname, MAX_DATABASE_NAME_LEN + 1, true);
20494
20495 if (tblnamelen > TEMP_FILE_PREFIX_LENGTH
20496 && !strncmp(tbl_buf, TEMP_FILE_PREFIX, TEMP_FILE_PREFIX_LENGTH)) {
20497 return false;
20498 }
20499
20500 if (char *is_part = strchr(tbl_buf, '#')) {
20501 *is_part = '\0';
20502 tblnamelen = is_part - tbl_buf;
20503 }
20504
20505 tblnamelen = filename_to_tablename(tbl_buf, tblname, MAX_TABLE_NAME_LEN + 1, true);
20506 return true;
20507 }
20508
20509
20510 /** Acquire metadata lock and MariaDB table handle for an InnoDB table.
20511 @param[in,out] thd thread handle
20512 @param[in,out] table InnoDB table
20513 @return MariaDB table handle
20514 @retval NULL if the table does not exist, is unaccessible or corrupted. */
innodb_acquire_mdl(THD * thd,dict_table_t * table)20515 static TABLE* innodb_acquire_mdl(THD* thd, dict_table_t* table)
20516 {
20517 char db_buf[NAME_LEN + 1], db_buf1[NAME_LEN + 1];
20518 char tbl_buf[NAME_LEN + 1], tbl_buf1[NAME_LEN + 1];
20519 ulint db_buf_len, db_buf1_len;
20520 ulint tbl_buf_len, tbl_buf1_len;
20521
20522 if (!table_name_parse(table->name, db_buf, tbl_buf,
20523 db_buf_len, tbl_buf_len)) {
20524 table->release();
20525 return NULL;
20526 }
20527
20528 DEBUG_SYNC(thd, "ib_purge_virtual_latch_released");
20529
20530 const table_id_t table_id = table->id;
20531 retry_mdl:
20532 const bool unaccessible = !table->is_readable() || table->corrupted;
20533 table->release();
20534
20535 if (unaccessible) {
20536 return NULL;
20537 }
20538
20539 TABLE* mariadb_table = open_purge_table(thd, db_buf, db_buf_len,
20540 tbl_buf, tbl_buf_len);
20541 if (!mariadb_table)
20542 thd_clear_error(thd);
20543
20544 DEBUG_SYNC(thd, "ib_purge_virtual_got_no_such_table");
20545
20546 table = dict_table_open_on_id(table_id, false, DICT_TABLE_OP_NORMAL);
20547
20548 if (table == NULL) {
20549 /* Table is dropped. */
20550 goto fail;
20551 }
20552
20553 if (!fil_table_accessible(table)) {
20554 release_fail:
20555 table->release();
20556 fail:
20557 if (mariadb_table) {
20558 close_thread_tables(thd);
20559 }
20560
20561 return NULL;
20562 }
20563
20564 if (!table_name_parse(table->name, db_buf1, tbl_buf1,
20565 db_buf1_len, tbl_buf1_len)) {
20566 goto release_fail;
20567 }
20568
20569 if (!mariadb_table) {
20570 } else if (!strcmp(db_buf, db_buf1) && !strcmp(tbl_buf, tbl_buf1)) {
20571 return mariadb_table;
20572 } else {
20573 /* Table is renamed. So release MDL for old name and try
20574 to acquire the MDL for new table name. */
20575 close_thread_tables(thd);
20576 }
20577
20578 strcpy(tbl_buf, tbl_buf1);
20579 strcpy(db_buf, db_buf1);
20580 tbl_buf_len = tbl_buf1_len;
20581 db_buf_len = db_buf1_len;
20582 goto retry_mdl;
20583 }
20584
20585 /** Find or open a table handle for the virtual column template
20586 @param[in] thd thread handle
20587 @param[in,out] table InnoDB table whose virtual column template
20588 is to be updated
20589 @return table handle
20590 @retval NULL if the table is dropped, unaccessible or corrupted
20591 for purge thread */
innodb_find_table_for_vc(THD * thd,dict_table_t * table)20592 static TABLE* innodb_find_table_for_vc(THD* thd, dict_table_t* table)
20593 {
20594 DBUG_EXECUTE_IF(
20595 "ib_purge_virtual_mdev_16222_1",
20596 DBUG_ASSERT(!debug_sync_set_action(
20597 thd,
20598 STRING_WITH_LEN("ib_purge_virtual_latch_released "
20599 "SIGNAL latch_released "
20600 "WAIT_FOR drop_started"))););
20601 DBUG_EXECUTE_IF(
20602 "ib_purge_virtual_mdev_16222_2",
20603 DBUG_ASSERT(!debug_sync_set_action(
20604 thd,
20605 STRING_WITH_LEN("ib_purge_virtual_got_no_such_table "
20606 "SIGNAL got_no_such_table"))););
20607
20608 if (THDVAR(thd, background_thread)) {
20609 /* Purge thread acquires dict_sys.latch while
20610 processing undo log record. Release it
20611 before acquiring MDL on the table. */
20612 rw_lock_s_unlock(&dict_sys.latch);
20613 return innodb_acquire_mdl(thd, table);
20614 } else {
20615 if (table->vc_templ->mysql_table_query_id
20616 == thd_get_query_id(thd)) {
20617 return table->vc_templ->mysql_table;
20618 }
20619 }
20620
20621 char db_buf[NAME_LEN + 1];
20622 char tbl_buf[NAME_LEN + 1];
20623 ulint db_buf_len, tbl_buf_len;
20624
20625 if (!table_name_parse(table->name, db_buf, tbl_buf,
20626 db_buf_len, tbl_buf_len)) {
20627 ut_ad(!"invalid table name");
20628 return NULL;
20629 }
20630
20631 TABLE* mysql_table = find_fk_open_table(thd, db_buf, db_buf_len,
20632 tbl_buf, tbl_buf_len);
20633
20634 table->vc_templ->mysql_table = mysql_table;
20635 table->vc_templ->mysql_table_query_id = thd_get_query_id(thd);
20636 return mysql_table;
20637 }
20638
20639 /** Get the computed value by supplying the base column values.
20640 @param[in,out] table table whose virtual column
20641 template to be built */
innobase_init_vc_templ(dict_table_t * table)20642 TABLE* innobase_init_vc_templ(dict_table_t* table)
20643 {
20644 if (table->vc_templ != NULL) {
20645 return NULL;
20646 }
20647 DBUG_ENTER("innobase_init_vc_templ");
20648
20649 table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
20650
20651 TABLE *mysql_table= innodb_find_table_for_vc(current_thd, table);
20652
20653 ut_ad(mysql_table);
20654 if (!mysql_table) {
20655 DBUG_RETURN(NULL);
20656 }
20657
20658 mutex_enter(&dict_sys.mutex);
20659 innobase_build_v_templ(mysql_table, table, table->vc_templ, NULL, true);
20660 mutex_exit(&dict_sys.mutex);
20661 DBUG_RETURN(mysql_table);
20662 }
20663
20664 /** Change dbname and table name in table->vc_templ.
20665 @param[in,out] table the table whose virtual column template
20666 dbname and tbname to be renamed. */
20667 void
innobase_rename_vc_templ(dict_table_t * table)20668 innobase_rename_vc_templ(
20669 dict_table_t* table)
20670 {
20671 char dbname[MAX_DATABASE_NAME_LEN + 1];
20672 char tbname[MAX_DATABASE_NAME_LEN + 1];
20673 char* name = table->name.m_name;
20674 ulint dbnamelen = dict_get_db_name_len(name);
20675 ulint tbnamelen = strlen(name) - dbnamelen - 1;
20676 char t_dbname[MAX_DATABASE_NAME_LEN + 1];
20677 char t_tbname[MAX_TABLE_NAME_LEN + 1];
20678
20679 strncpy(dbname, name, dbnamelen);
20680 dbname[dbnamelen] = 0;
20681 strncpy(tbname, name + dbnamelen + 1, tbnamelen);
20682 tbname[tbnamelen] =0;
20683
20684 /* For partition table, remove the partition name and use the
20685 "main" table name to build the template */
20686 char* is_part = is_partition(tbname);
20687
20688 if (is_part != NULL) {
20689 *is_part = '\0';
20690 tbnamelen = ulint(is_part - tbname);
20691 }
20692
20693 dbnamelen = filename_to_tablename(dbname, t_dbname,
20694 MAX_DATABASE_NAME_LEN + 1);
20695 tbnamelen = filename_to_tablename(tbname, t_tbname,
20696 MAX_TABLE_NAME_LEN + 1);
20697
20698 table->vc_templ->db_name = t_dbname;
20699 table->vc_templ->tb_name = t_tbname;
20700 }
20701
20702
20703 /**
20704 Allocate a heap and record for calculating virtual fields
20705 Used mainly for virtual fields in indexes
20706
20707 @param[in] thd MariaDB THD
20708 @param[in] index Index in use
20709 @param[out] heap Heap that holds temporary row
20710 @param[in,out] table MariaDB table
20711 @param[out] record Pointer to allocated MariaDB record
20712 @param[out] storage Internal storage for blobs etc
20713
20714 @retval true on success
20715 @retval false on malloc failure or failed to open the maria table
20716 for purge thread.
20717 */
20718
innobase_allocate_row_for_vcol(THD * thd,dict_index_t * index,mem_heap_t ** heap,TABLE ** table,VCOL_STORAGE * storage)20719 bool innobase_allocate_row_for_vcol(THD *thd, dict_index_t *index,
20720 mem_heap_t **heap, TABLE **table,
20721 VCOL_STORAGE *storage)
20722 {
20723 TABLE *maria_table;
20724 String *blob_value_storage;
20725 if (!*table)
20726 *table = innodb_find_table_for_vc(thd, index->table);
20727
20728 /* For purge thread, there is a possiblity that table could have
20729 dropped, corrupted or unaccessible. */
20730 if (!*table)
20731 return false;
20732 maria_table = *table;
20733 if (!*heap && !(*heap = mem_heap_create(srv_page_size)))
20734 return false;
20735
20736 uchar *record = static_cast<byte *>(mem_heap_alloc(*heap,
20737 maria_table->s->reclength));
20738
20739 size_t len = maria_table->s->virtual_not_stored_blob_fields * sizeof(String);
20740 blob_value_storage = static_cast<String *>(mem_heap_alloc(*heap, len));
20741
20742 if (!record || !blob_value_storage)
20743 return false;
20744
20745 storage->maria_table = maria_table;
20746 storage->innobase_record = record;
20747 storage->maria_record = maria_table->field[0]->record_ptr();
20748 storage->blob_value_storage = blob_value_storage;
20749
20750 maria_table->move_fields(maria_table->field, record, storage->maria_record);
20751 maria_table->remember_blob_values(blob_value_storage);
20752
20753 return true;
20754 }
20755
20756
20757 /** Free memory allocated by innobase_allocate_row_for_vcol() */
20758
innobase_free_row_for_vcol(VCOL_STORAGE * storage)20759 void innobase_free_row_for_vcol(VCOL_STORAGE *storage)
20760 {
20761 TABLE *maria_table= storage->maria_table;
20762 maria_table->move_fields(maria_table->field, storage->maria_record,
20763 storage->innobase_record);
20764 maria_table->restore_blob_values(storage->blob_value_storage);
20765 }
20766
20767
innobase_report_computed_value_failed(dtuple_t * row)20768 void innobase_report_computed_value_failed(dtuple_t *row)
20769 {
20770 ib::error() << "Compute virtual column values failed for "
20771 << rec_printer(row).str();
20772 }
20773
20774
20775 /** Get the computed value by supplying the base column values.
20776 @param[in,out] row the data row
20777 @param[in] col virtual column
20778 @param[in] index index
20779 @param[in,out] local_heap heap memory for processing large data etc.
20780 @param[in,out] heap memory heap that copies the actual index row
20781 @param[in] ifield index field
20782 @param[in] thd MySQL thread handle
20783 @param[in,out] mysql_table mysql table object
20784 @param[in,out] mysql_rec MariaDB record buffer
20785 @param[in] old_table during ALTER TABLE, this is the old table
20786 or NULL.
20787 @param[in] update update vector for the row, if any
20788 @param[in] foreign foreign key information
20789 @return the field filled with computed value, or NULL if just want
20790 to store the value in passed in "my_rec" */
20791 dfield_t*
innobase_get_computed_value(dtuple_t * row,const dict_v_col_t * col,const dict_index_t * index,mem_heap_t ** local_heap,mem_heap_t * heap,const dict_field_t * ifield,THD * thd,TABLE * mysql_table,byte * mysql_rec,const dict_table_t * old_table,const upd_t * update)20792 innobase_get_computed_value(
20793 dtuple_t* row,
20794 const dict_v_col_t* col,
20795 const dict_index_t* index,
20796 mem_heap_t** local_heap,
20797 mem_heap_t* heap,
20798 const dict_field_t* ifield,
20799 THD* thd,
20800 TABLE* mysql_table,
20801 byte* mysql_rec,
20802 const dict_table_t* old_table,
20803 const upd_t* update)
20804 {
20805 byte rec_buf2[REC_VERSION_56_MAX_INDEX_COL_LEN];
20806 byte* buf;
20807 dfield_t* field;
20808 ulint len;
20809
20810 const ulint zip_size = old_table
20811 ? old_table->space->zip_size()
20812 : dict_tf_get_zip_size(index->table->flags);
20813
20814 ulint ret = 0;
20815
20816 dict_index_t *clust_index= dict_table_get_first_index(index->table);
20817
20818 ut_ad(index->table->vc_templ);
20819 ut_ad(thd != NULL);
20820 ut_ad(mysql_table);
20821
20822 DBUG_ENTER("innobase_get_computed_value");
20823 const mysql_row_templ_t*
20824 vctempl = index->table->vc_templ->vtempl[
20825 index->table->vc_templ->n_col + col->v_pos];
20826
20827 if (!heap || index->table->vc_templ->rec_len
20828 >= REC_VERSION_56_MAX_INDEX_COL_LEN) {
20829 if (*local_heap == NULL) {
20830 *local_heap = mem_heap_create(srv_page_size);
20831 }
20832
20833 buf = static_cast<byte*>(mem_heap_alloc(
20834 *local_heap, index->table->vc_templ->rec_len));
20835 } else {
20836 buf = rec_buf2;
20837 }
20838
20839 for (ulint i = 0; i < unsigned{col->num_base}; i++) {
20840 dict_col_t* base_col = col->base_col[i];
20841 const dfield_t* row_field = NULL;
20842 ulint col_no = base_col->ind;
20843 const mysql_row_templ_t* templ
20844 = index->table->vc_templ->vtempl[col_no];
20845 const byte* data;
20846
20847 if (update) {
20848 ulint clust_no = dict_col_get_clust_pos(base_col,
20849 clust_index);
20850 if (const upd_field_t *uf = upd_get_field_by_field_no(
20851 update, clust_no, false)) {
20852 row_field = &uf->new_val;
20853 }
20854 }
20855
20856 if (!row_field) {
20857 row_field = dtuple_get_nth_field(row, col_no);
20858 }
20859
20860 data = static_cast<const byte*>(row_field->data);
20861 len = row_field->len;
20862
20863 if (row_field->ext) {
20864 if (*local_heap == NULL) {
20865 *local_heap = mem_heap_create(srv_page_size);
20866 }
20867
20868 data = btr_copy_externally_stored_field(
20869 &len, data, zip_size,
20870 dfield_get_len(row_field), *local_heap);
20871 }
20872
20873 if (len == UNIV_SQL_NULL) {
20874 mysql_rec[templ->mysql_null_byte_offset]
20875 |= (byte) templ->mysql_null_bit_mask;
20876 memcpy(mysql_rec + templ->mysql_col_offset,
20877 static_cast<const byte*>(
20878 index->table->vc_templ->default_rec
20879 + templ->mysql_col_offset),
20880 templ->mysql_col_len);
20881 } else {
20882
20883 row_sel_field_store_in_mysql_format(
20884 mysql_rec + templ->mysql_col_offset,
20885 templ, index, templ->clust_rec_field_no,
20886 (const byte*)data, len);
20887
20888 if (templ->mysql_null_bit_mask) {
20889 /* It is a nullable column with a
20890 non-NULL value */
20891 mysql_rec[templ->mysql_null_byte_offset]
20892 &= ~(byte) templ->mysql_null_bit_mask;
20893 }
20894 }
20895 }
20896
20897 field = dtuple_get_nth_v_field(row, col->v_pos);
20898
20899 MY_BITMAP *old_write_set = dbug_tmp_use_all_columns(mysql_table, &mysql_table->write_set);
20900 MY_BITMAP *old_read_set = dbug_tmp_use_all_columns(mysql_table, &mysql_table->read_set);
20901 ret = mysql_table->update_virtual_field(mysql_table->field[col->m_col.ind]);
20902 dbug_tmp_restore_column_map(&mysql_table->read_set, old_read_set);
20903 dbug_tmp_restore_column_map(&mysql_table->write_set, old_write_set);
20904
20905 if (ret != 0) {
20906 DBUG_RETURN(NULL);
20907 }
20908
20909 if (vctempl->mysql_null_bit_mask
20910 && (mysql_rec[vctempl->mysql_null_byte_offset]
20911 & vctempl->mysql_null_bit_mask)) {
20912 dfield_set_null(field);
20913 field->type.prtype |= DATA_VIRTUAL;
20914 DBUG_RETURN(field);
20915 }
20916
20917 row_mysql_store_col_in_innobase_format(
20918 field, buf,
20919 TRUE, mysql_rec + vctempl->mysql_col_offset,
20920 vctempl->mysql_col_len, dict_table_is_comp(index->table));
20921 field->type.prtype |= DATA_VIRTUAL;
20922
20923 ulint max_prefix = col->m_col.max_prefix;
20924
20925 if (max_prefix && ifield
20926 && (ifield->prefix_len == 0
20927 || ifield->prefix_len > col->m_col.max_prefix)) {
20928 max_prefix = ifield->prefix_len;
20929 }
20930
20931 /* If this is a prefix index, we only need a portion of the field */
20932 if (max_prefix) {
20933 len = dtype_get_at_most_n_mbchars(
20934 col->m_col.prtype,
20935 col->m_col.mbminlen, col->m_col.mbmaxlen,
20936 max_prefix,
20937 field->len,
20938 static_cast<char*>(dfield_get_data(field)));
20939 dfield_set_len(field, len);
20940 }
20941
20942 if (heap) {
20943 dfield_dup(field, heap);
20944 }
20945
20946 DBUG_RETURN(field);
20947 }
20948
20949
20950 /** Attempt to push down an index condition.
20951 @param[in] keyno MySQL key number
20952 @param[in] idx_cond Index condition to be checked
20953 @return Part of idx_cond which the handler will not evaluate */
20954
20955 class Item*
idx_cond_push(uint keyno,class Item * idx_cond)20956 ha_innobase::idx_cond_push(
20957 uint keyno,
20958 class Item* idx_cond)
20959 {
20960 DBUG_ENTER("ha_innobase::idx_cond_push");
20961 DBUG_ASSERT(keyno != MAX_KEY);
20962 DBUG_ASSERT(idx_cond != NULL);
20963
20964 /* We can only evaluate the condition if all columns are stored.*/
20965 dict_index_t* idx = innobase_get_index(keyno);
20966 if (idx && dict_index_has_virtual(idx)) {
20967 DBUG_RETURN(idx_cond);
20968 }
20969
20970 pushed_idx_cond = idx_cond;
20971 pushed_idx_cond_keyno = keyno;
20972 in_range_check_pushed_down = TRUE;
20973 /* We will evaluate the condition entirely */
20974 DBUG_RETURN(NULL);
20975 }
20976
20977
20978 /** Push a primary key filter.
20979 @param[in] pk_filter filter against which primary keys
20980 are to be checked
20981 @retval false if pushed (always) */
rowid_filter_push(Rowid_filter * pk_filter)20982 bool ha_innobase::rowid_filter_push(Rowid_filter* pk_filter)
20983 {
20984 DBUG_ENTER("ha_innobase::rowid_filter_push");
20985 DBUG_ASSERT(pk_filter != NULL);
20986 pushed_rowid_filter= pk_filter;
20987 DBUG_RETURN(false);
20988 }
20989
is_part_of_a_key_prefix(const Field_longstr * field)20990 static bool is_part_of_a_key_prefix(const Field_longstr *field)
20991 {
20992 const TABLE_SHARE *s= field->table->s;
20993
20994 for (uint i= 0; i < s->keys; i++)
20995 {
20996 const KEY &key= s->key_info[i];
20997 for (uint j= 0; j < key.user_defined_key_parts; j++)
20998 {
20999 const KEY_PART_INFO &info= key.key_part[j];
21000 // When field is a part of some key, a key part and field will have the
21001 // same length. And their length will be different when only some prefix
21002 // of a field is used as a key part. That's what we're looking for here.
21003 if (info.field->field_index == field->field_index &&
21004 info.length != field->field_length)
21005 {
21006 DBUG_ASSERT(info.length < field->field_length);
21007 return true;
21008 }
21009 }
21010 }
21011
21012 return false;
21013 }
21014
21015 static bool
is_part_of_a_primary_key(const Field * field)21016 is_part_of_a_primary_key(const Field* field)
21017 {
21018 const TABLE_SHARE* s = field->table->s;
21019
21020 return s->primary_key != MAX_KEY
21021 && field->part_of_key.is_set(s->primary_key);
21022 }
21023
can_convert_string(const Field_string * field,const Column_definition & new_type) const21024 bool ha_innobase::can_convert_string(const Field_string *field,
21025 const Column_definition &new_type) const
21026 {
21027 DBUG_ASSERT(!field->compression_method());
21028 if (new_type.type_handler() != field->type_handler())
21029 return false;
21030
21031 if (new_type.char_length != field->char_length())
21032 return false;
21033
21034 const Charset field_cs(field->charset());
21035
21036 if (new_type.length != field->max_display_length() &&
21037 (!m_prebuilt->table->not_redundant() ||
21038 field_cs.mbminlen() == field_cs.mbmaxlen()))
21039 return false;
21040
21041 if (new_type.charset != field->charset())
21042 {
21043 if (!field_cs.encoding_allows_reinterpret_as(new_type.charset))
21044 return false;
21045
21046 if (!field_cs.eq_collation_specific_names(new_type.charset))
21047 return !is_part_of_a_primary_key(field);
21048
21049 // Fully indexed case works instantly like
21050 // Compare_keys::EqualButKeyPartLength. But prefix case isn't implemented.
21051 if (is_part_of_a_key_prefix(field))
21052 return false;
21053
21054 return true;
21055 }
21056
21057 return true;
21058 }
21059
21060 static bool
supports_enlarging(const dict_table_t * table,const Field_varstring * field,const Column_definition & new_type)21061 supports_enlarging(const dict_table_t* table, const Field_varstring* field,
21062 const Column_definition& new_type)
21063 {
21064 return field->field_length <= 127 || new_type.length <= 255
21065 || field->field_length > 255 || !table->not_redundant();
21066 }
21067
can_convert_varstring(const Field_varstring * field,const Column_definition & new_type) const21068 bool ha_innobase::can_convert_varstring(
21069 const Field_varstring *field, const Column_definition &new_type) const
21070 {
21071 if (new_type.length < field->field_length)
21072 return false;
21073
21074 if (new_type.char_length < field->char_length())
21075 return false;
21076
21077 if (!new_type.compression_method() != !field->compression_method())
21078 return false;
21079
21080 if (new_type.type_handler() != field->type_handler())
21081 return false;
21082
21083 if (new_type.charset != field->charset())
21084 {
21085 if (!supports_enlarging(m_prebuilt->table, field, new_type))
21086 return false;
21087
21088 Charset field_cs(field->charset());
21089 if (!field_cs.encoding_allows_reinterpret_as(new_type.charset))
21090 return false;
21091
21092 if (!field_cs.eq_collation_specific_names(new_type.charset))
21093 return !is_part_of_a_primary_key(field);
21094
21095 // Fully indexed case works instantly like
21096 // Compare_keys::EqualButKeyPartLength. But prefix case isn't implemented.
21097 if (is_part_of_a_key_prefix(field))
21098 return false;
21099
21100 return true;
21101 }
21102
21103 if (new_type.length != field->field_length)
21104 {
21105 if (!supports_enlarging(m_prebuilt->table, field, new_type))
21106 return false;
21107
21108 return true;
21109 }
21110
21111 return true;
21112 }
21113
is_part_of_a_key(const Field_blob * field)21114 static bool is_part_of_a_key(const Field_blob *field)
21115 {
21116 const TABLE_SHARE *s= field->table->s;
21117
21118 for (uint i= 0; i < s->keys; i++)
21119 {
21120 const KEY &key= s->key_info[i];
21121 for (uint j= 0; j < key.user_defined_key_parts; j++)
21122 {
21123 const KEY_PART_INFO &info= key.key_part[j];
21124 if (info.field->field_index == field->field_index)
21125 return true;
21126 }
21127 }
21128
21129 return false;
21130 }
21131
can_convert_blob(const Field_blob * field,const Column_definition & new_type) const21132 bool ha_innobase::can_convert_blob(const Field_blob *field,
21133 const Column_definition &new_type) const
21134 {
21135 if (new_type.type_handler() != field->type_handler())
21136 return false;
21137
21138 if (!new_type.compression_method() != !field->compression_method())
21139 return false;
21140
21141 if (new_type.pack_length != field->pack_length())
21142 return false;
21143
21144 if (new_type.charset != field->charset())
21145 {
21146 Charset field_cs(field->charset());
21147 if (!field_cs.encoding_allows_reinterpret_as(new_type.charset))
21148 return false;
21149
21150 if (!field_cs.eq_collation_specific_names(new_type.charset))
21151 return !is_part_of_a_key(field);
21152
21153 // Fully indexed case works instantly like
21154 // Compare_keys::EqualButKeyPartLength. But prefix case isn't implemented.
21155 if (is_part_of_a_key_prefix(field))
21156 return false;
21157
21158 return true;
21159 }
21160
21161 return true;
21162 }
21163
compare_key_parts(const Field & old_field,const Column_definition & new_field,const KEY_PART_INFO & old_part,const KEY_PART_INFO & new_part) const21164 Compare_keys ha_innobase::compare_key_parts(
21165 const Field &old_field, const Column_definition &new_field,
21166 const KEY_PART_INFO &old_part, const KEY_PART_INFO &new_part) const
21167 {
21168 const bool is_equal= old_field.is_equal(new_field);
21169 const CHARSET_INFO *old_cs= old_field.charset();
21170 const CHARSET_INFO *new_cs= new_field.charset;
21171
21172 if (!is_equal)
21173 {
21174 if (!old_field.can_be_converted_by_engine(new_field))
21175 return Compare_keys::NotEqual;
21176
21177 if (!Charset(old_cs).eq_collation_specific_names(new_cs))
21178 return Compare_keys::NotEqual;
21179 }
21180
21181 if (old_part.length / old_cs->mbmaxlen != new_part.length / new_cs->mbmaxlen)
21182 {
21183 if (old_part.length != old_field.field_length)
21184 return Compare_keys::NotEqual;
21185
21186 if (old_part.length >= new_part.length)
21187 return Compare_keys::NotEqual;
21188
21189 return Compare_keys::EqualButKeyPartLength;
21190 }
21191
21192 return Compare_keys::Equal;
21193 }
21194
21195 /******************************************************************//**
21196 Use this when the args are passed to the format string from
21197 errmsg-utf8.txt directly as is.
21198
21199 Push a warning message to the client, it is a wrapper around:
21200
21201 void push_warning_printf(
21202 THD *thd, Sql_condition::enum_condition_level level,
21203 uint code, const char *format, ...);
21204 */
21205 void
ib_senderrf(THD * thd,ib_log_level_t level,ib_uint32_t code,...)21206 ib_senderrf(
21207 /*========*/
21208 THD* thd, /*!< in/out: session */
21209 ib_log_level_t level, /*!< in: warning level */
21210 ib_uint32_t code, /*!< MySQL error code */
21211 ...) /*!< Args */
21212 {
21213 va_list args;
21214 const char* format = my_get_err_msg(code);
21215
21216 /* If the caller wants to push a message to the client then
21217 the caller must pass a valid session handle. */
21218
21219 ut_a(thd != 0);
21220
21221 /* The error code must exist in the errmsg-utf8.txt file. */
21222 ut_a(format != 0);
21223
21224 va_start(args, code);
21225
21226 myf l;
21227
21228 switch (level) {
21229 case IB_LOG_LEVEL_INFO:
21230 l = ME_NOTE;
21231 break;
21232 case IB_LOG_LEVEL_WARN:
21233 l = ME_WARNING;
21234 break;
21235 default:
21236 l = 0;
21237 break;
21238 }
21239
21240 my_printv_error(code, format, MYF(l), args);
21241
21242 va_end(args);
21243
21244 if (level == IB_LOG_LEVEL_FATAL) {
21245 ut_error;
21246 }
21247 }
21248
21249 /******************************************************************//**
21250 Use this when the args are first converted to a formatted string and then
21251 passed to the format string from errmsg-utf8.txt. The error message format
21252 must be: "Some string ... %s".
21253
21254 Push a warning message to the client, it is a wrapper around:
21255
21256 void push_warning_printf(
21257 THD *thd, Sql_condition::enum_condition_level level,
21258 uint code, const char *format, ...);
21259 */
21260 void
ib_errf(THD * thd,ib_log_level_t level,ib_uint32_t code,const char * format,...)21261 ib_errf(
21262 /*====*/
21263 THD* thd, /*!< in/out: session */
21264 ib_log_level_t level, /*!< in: warning level */
21265 ib_uint32_t code, /*!< MySQL error code */
21266 const char* format, /*!< printf format */
21267 ...) /*!< Args */
21268 {
21269 char* str = NULL;
21270 va_list args;
21271
21272 /* If the caller wants to push a message to the client then
21273 the caller must pass a valid session handle. */
21274
21275 ut_a(thd != 0);
21276 ut_a(format != 0);
21277
21278 va_start(args, format);
21279
21280 #ifdef _WIN32
21281 int size = _vscprintf(format, args) + 1;
21282 if (size > 0) {
21283 str = static_cast<char*>(malloc(size));
21284 }
21285 if (str == NULL) {
21286 va_end(args);
21287 return; /* Watch for Out-Of-Memory */
21288 }
21289 str[size - 1] = 0x0;
21290 vsnprintf(str, size, format, args);
21291 #elif HAVE_VASPRINTF
21292 if (vasprintf(&str, format, args) == -1) {
21293 /* In case of failure use a fixed length string */
21294 str = static_cast<char*>(malloc(BUFSIZ));
21295 vsnprintf(str, BUFSIZ, format, args);
21296 }
21297 #else
21298 /* Use a fixed length string. */
21299 str = static_cast<char*>(malloc(BUFSIZ));
21300 if (str == NULL) {
21301 va_end(args);
21302 return; /* Watch for Out-Of-Memory */
21303 }
21304 vsnprintf(str, BUFSIZ, format, args);
21305 #endif /* _WIN32 */
21306
21307 ib_senderrf(thd, level, code, str);
21308
21309 va_end(args);
21310 free(str);
21311 }
21312
21313 /* Keep the first 16 characters as-is, since the url is sometimes used
21314 as an offset from this.*/
21315 const char* TROUBLESHOOTING_MSG =
21316 "Please refer to https://mariadb.com/kb/en/innodb-troubleshooting/"
21317 " for how to resolve the issue.";
21318
21319 const char* TROUBLESHOOT_DATADICT_MSG =
21320 "Please refer to https://mariadb.com/kb/en/innodb-data-dictionary-troubleshooting/"
21321 " for how to resolve the issue.";
21322
21323 const char* BUG_REPORT_MSG =
21324 "Submit a detailed bug report to https://jira.mariadb.org/";
21325
21326 const char* FORCE_RECOVERY_MSG =
21327 "Please refer to "
21328 "https://mariadb.com/kb/en/library/innodb-recovery-modes/"
21329 " for information about forcing recovery.";
21330
21331 const char* OPERATING_SYSTEM_ERROR_MSG =
21332 "Some operating system error numbers are described at"
21333 " https://mariadb.com/kb/en/library/operating-system-error-codes/";
21334
21335 const char* FOREIGN_KEY_CONSTRAINTS_MSG =
21336 "Please refer to https://mariadb.com/kb/en/library/foreign-keys/"
21337 " for correct foreign key definition.";
21338
21339 const char* SET_TRANSACTION_MSG =
21340 "Please refer to https://mariadb.com/kb/en/library/set-transaction/";
21341
21342 const char* INNODB_PARAMETERS_MSG =
21343 "Please refer to https://mariadb.com/kb/en/library/innodb-system-variables/";
21344
21345 /**********************************************************************
21346 Converts an identifier from my_charset_filename to UTF-8 charset.
21347 @return result string length, as returned by strconvert() */
21348 uint
innobase_convert_to_filename_charset(char * to,const char * from,ulint len)21349 innobase_convert_to_filename_charset(
21350 /*=================================*/
21351 char* to, /* out: converted identifier */
21352 const char* from, /* in: identifier to convert */
21353 ulint len) /* in: length of 'to', in bytes */
21354 {
21355 uint errors;
21356 CHARSET_INFO* cs_to = &my_charset_filename;
21357 CHARSET_INFO* cs_from = system_charset_info;
21358
21359 return(static_cast<uint>(strconvert(
21360 cs_from, from, uint(strlen(from)),
21361 cs_to, to, static_cast<uint>(len), &errors)));
21362 }
21363
21364 /**********************************************************************
21365 Converts an identifier from my_charset_filename to UTF-8 charset.
21366 @return result string length, as returned by strconvert() */
21367 uint
innobase_convert_to_system_charset(char * to,const char * from,ulint len,uint * errors)21368 innobase_convert_to_system_charset(
21369 /*===============================*/
21370 char* to, /* out: converted identifier */
21371 const char* from, /* in: identifier to convert */
21372 ulint len, /* in: length of 'to', in bytes */
21373 uint* errors) /* out: error return */
21374 {
21375 CHARSET_INFO* cs1 = &my_charset_filename;
21376 CHARSET_INFO* cs2 = system_charset_info;
21377
21378 return(static_cast<uint>(strconvert(
21379 cs1, from, static_cast<uint>(strlen(from)),
21380 cs2, to, static_cast<uint>(len), errors)));
21381 }
21382
21383 /** Validate the requested buffer pool size. Also, reserve the necessary
21384 memory needed for buffer pool resize.
21385 @param[in] thd thread handle
21386 @param[out] save immediate result for update function
21387 @param[in] value incoming string
21388 @return 0 on success, 1 on failure.
21389 */
21390 static
21391 int
innodb_buffer_pool_size_validate(THD * thd,st_mysql_sys_var *,void * save,struct st_mysql_value * value)21392 innodb_buffer_pool_size_validate(
21393 THD* thd,
21394 st_mysql_sys_var*,
21395 void* save,
21396 struct st_mysql_value* value)
21397 {
21398 longlong intbuf;
21399
21400 value->val_int(value, &intbuf);
21401
21402 if (static_cast<ulonglong>(intbuf) < MYSQL_SYSVAR_NAME(buffer_pool_size).min_val) {
21403 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21404 ER_WRONG_ARGUMENTS,
21405 "innodb_buffer_pool_size must be at least"
21406 " %lld for innodb_page_size=%lu",
21407 MYSQL_SYSVAR_NAME(buffer_pool_size).min_val,
21408 srv_page_size);
21409 return(1);
21410 }
21411
21412 if (!srv_was_started) {
21413 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21414 ER_WRONG_ARGUMENTS,
21415 "Cannot update innodb_buffer_pool_size,"
21416 " because InnoDB is not started.");
21417 return(1);
21418 }
21419
21420 #ifdef UNIV_DEBUG
21421 if (buf_disable_resize_buffer_pool_debug == TRUE) {
21422 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21423 ER_WRONG_ARGUMENTS,
21424 "Cannot update innodb_buffer_pool_size,"
21425 " because innodb_disable_resize_buffer_pool_debug"
21426 " is set.");
21427 ib::warn() << "Cannot update innodb_buffer_pool_size,"
21428 " because innodb_disable_resize_buffer_pool_debug"
21429 " is set.";
21430 return(1);
21431 }
21432 #endif /* UNIV_DEBUG */
21433
21434
21435 buf_pool_mutex_enter_all();
21436
21437 if (srv_buf_pool_old_size != srv_buf_pool_size) {
21438 buf_pool_mutex_exit_all();
21439 my_printf_error(ER_WRONG_ARGUMENTS,
21440 "Another buffer pool resize is already in progress.", MYF(0));
21441 return(1);
21442 }
21443
21444 if (srv_buf_pool_instances > 1 && intbuf < BUF_POOL_SIZE_THRESHOLD) {
21445 buf_pool_mutex_exit_all();
21446
21447 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21448 ER_WRONG_ARGUMENTS,
21449 "Cannot update innodb_buffer_pool_size"
21450 " to less than 1GB if"
21451 " innodb_buffer_pool_instances > 1.");
21452 return(1);
21453 }
21454
21455 ulint requested_buf_pool_size = buf_pool_size_align(ulint(intbuf));
21456
21457 *static_cast<ulonglong*>(save) = requested_buf_pool_size;
21458
21459 if (srv_buf_pool_size == ulint(intbuf)) {
21460 buf_pool_mutex_exit_all();
21461 /* nothing to do */
21462 return(0);
21463 }
21464
21465 if (srv_buf_pool_size == requested_buf_pool_size) {
21466 buf_pool_mutex_exit_all();
21467 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21468 ER_WRONG_ARGUMENTS,
21469 "innodb_buffer_pool_size must be at least"
21470 " innodb_buffer_pool_chunk_size=%lu",
21471 srv_buf_pool_chunk_unit);
21472 /* nothing to do */
21473 return(0);
21474 }
21475
21476 srv_buf_pool_size = requested_buf_pool_size;
21477 buf_pool_mutex_exit_all();
21478
21479 if (intbuf != static_cast<longlong>(requested_buf_pool_size)) {
21480 char buf[64];
21481 int len = 64;
21482 value->val_str(value, buf, &len);
21483 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21484 ER_TRUNCATED_WRONG_VALUE,
21485 "Truncated incorrect %-.32s value: '%-.128s'",
21486 mysql_sysvar_buffer_pool_size.name,
21487 value->val_str(value, buf, &len));
21488 }
21489
21490 return(0);
21491 }
21492
21493 /*************************************************************//**
21494 Check for a valid value of innobase_compression_algorithm.
21495 @return 0 for valid innodb_compression_algorithm. */
21496 static
21497 int
innodb_compression_algorithm_validate(THD * thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)21498 innodb_compression_algorithm_validate(
21499 /*==================================*/
21500 THD* thd, /*!< in: thread handle */
21501 struct st_mysql_sys_var* var, /*!< in: pointer to system
21502 variable */
21503 void* save, /*!< out: immediate result
21504 for update function */
21505 struct st_mysql_value* value) /*!< in: incoming string */
21506 {
21507 ulong compression_algorithm;
21508 DBUG_ENTER("innobase_compression_algorithm_validate");
21509
21510 if (check_sysvar_enum(thd, var, save, value)) {
21511 DBUG_RETURN(1);
21512 }
21513
21514 compression_algorithm = *reinterpret_cast<ulong*>(save);
21515 (void)compression_algorithm;
21516
21517 #ifndef HAVE_LZ4
21518 if (compression_algorithm == PAGE_LZ4_ALGORITHM) {
21519 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21520 HA_ERR_UNSUPPORTED,
21521 "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21522 "InnoDB: liblz4 is not installed. \n",
21523 compression_algorithm);
21524 DBUG_RETURN(1);
21525 }
21526 #endif
21527
21528 #ifndef HAVE_LZO
21529 if (compression_algorithm == PAGE_LZO_ALGORITHM) {
21530 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21531 HA_ERR_UNSUPPORTED,
21532 "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21533 "InnoDB: liblzo is not installed. \n",
21534 compression_algorithm);
21535 DBUG_RETURN(1);
21536 }
21537 #endif
21538
21539 #ifndef HAVE_LZMA
21540 if (compression_algorithm == PAGE_LZMA_ALGORITHM) {
21541 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21542 HA_ERR_UNSUPPORTED,
21543 "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21544 "InnoDB: liblzma is not installed. \n",
21545 compression_algorithm);
21546 DBUG_RETURN(1);
21547 }
21548 #endif
21549
21550 #ifndef HAVE_BZIP2
21551 if (compression_algorithm == PAGE_BZIP2_ALGORITHM) {
21552 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21553 HA_ERR_UNSUPPORTED,
21554 "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21555 "InnoDB: libbz2 is not installed. \n",
21556 compression_algorithm);
21557 DBUG_RETURN(1);
21558 }
21559 #endif
21560
21561 #ifndef HAVE_SNAPPY
21562 if (compression_algorithm == PAGE_SNAPPY_ALGORITHM) {
21563 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21564 HA_ERR_UNSUPPORTED,
21565 "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21566 "InnoDB: libsnappy is not installed. \n",
21567 compression_algorithm);
21568 DBUG_RETURN(1);
21569 }
21570 #endif
21571 DBUG_RETURN(0);
21572 }
21573
21574 static
21575 int
innodb_encrypt_tables_validate(THD * thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)21576 innodb_encrypt_tables_validate(
21577 /*=================================*/
21578 THD* thd, /*!< in: thread handle */
21579 struct st_mysql_sys_var* var, /*!< in: pointer to system
21580 variable */
21581 void* save, /*!< out: immediate result
21582 for update function */
21583 struct st_mysql_value* value) /*!< in: incoming string */
21584 {
21585 if (check_sysvar_enum(thd, var, save, value)) {
21586 return 1;
21587 }
21588
21589 ulong encrypt_tables = *(ulong*)save;
21590
21591 if (encrypt_tables
21592 && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) {
21593 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21594 HA_ERR_UNSUPPORTED,
21595 "InnoDB: cannot enable encryption, "
21596 "encryption plugin is not available");
21597 return 1;
21598 }
21599
21600 return 0;
21601 }
21602
innodb_remember_check_sysvar_funcs()21603 static void innodb_remember_check_sysvar_funcs()
21604 {
21605 /* remember build-in sysvar check functions */
21606 ut_ad((MYSQL_SYSVAR_NAME(checksum_algorithm).flags & 0x1FF) == PLUGIN_VAR_ENUM);
21607 check_sysvar_enum = MYSQL_SYSVAR_NAME(checksum_algorithm).check;
21608
21609 ut_ad((MYSQL_SYSVAR_NAME(flush_log_at_timeout).flags & 15) == PLUGIN_VAR_INT);
21610 check_sysvar_int = MYSQL_SYSVAR_NAME(flush_log_at_timeout).check;
21611 }
21612
21613 /********************************************************************//**
21614 Helper function to push warnings from InnoDB internals to SQL-layer. */
21615 UNIV_INTERN
21616 void
ib_push_warning(trx_t * trx,dberr_t error,const char * format,...)21617 ib_push_warning(
21618 trx_t* trx, /*!< in: trx */
21619 dberr_t error, /*!< in: error code to push as warning */
21620 const char *format,/*!< in: warning message */
21621 ...)
21622 {
21623 if (trx && trx->mysql_thd) {
21624 THD *thd = (THD *)trx->mysql_thd;
21625 va_list args;
21626 char *buf;
21627 #define MAX_BUF_SIZE 4*1024
21628
21629 va_start(args, format);
21630 buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME));
21631 buf[MAX_BUF_SIZE - 1] = 0;
21632 vsnprintf(buf, MAX_BUF_SIZE - 1, format, args);
21633 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21634 uint(convert_error_code_to_mysql(error, 0,
21635 thd)),
21636 buf);
21637 my_free(buf);
21638 va_end(args);
21639 }
21640 }
21641
21642 /********************************************************************//**
21643 Helper function to push warnings from InnoDB internals to SQL-layer. */
21644 UNIV_INTERN
21645 void
ib_push_warning(void * ithd,dberr_t error,const char * format,...)21646 ib_push_warning(
21647 void* ithd, /*!< in: thd */
21648 dberr_t error, /*!< in: error code to push as warning */
21649 const char *format,/*!< in: warning message */
21650 ...)
21651 {
21652 va_list args;
21653 THD *thd = (THD *)ithd;
21654 char *buf;
21655 #define MAX_BUF_SIZE 4*1024
21656
21657 if (ithd == NULL) {
21658 thd = current_thd;
21659 }
21660
21661 if (thd) {
21662 va_start(args, format);
21663 buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME));
21664 buf[MAX_BUF_SIZE - 1] = 0;
21665 vsnprintf(buf, MAX_BUF_SIZE - 1, format, args);
21666
21667 push_warning_printf(
21668 thd, Sql_condition::WARN_LEVEL_WARN,
21669 uint(convert_error_code_to_mysql(error, 0, thd)), buf);
21670 my_free(buf);
21671 va_end(args);
21672 }
21673 }
21674
21675 /********************************************************************//**
21676 Helper function to push frm mismatch error to error log and
21677 if needed to sql-layer. */
21678 UNIV_INTERN
21679 void
ib_push_frm_error(THD * thd,dict_table_t * ib_table,TABLE * table,ulint n_keys,bool push_warning)21680 ib_push_frm_error(
21681 /*==============*/
21682 THD* thd, /*!< in: MySQL thd */
21683 dict_table_t* ib_table, /*!< in: InnoDB table */
21684 TABLE* table, /*!< in: MySQL table */
21685 ulint n_keys, /*!< in: InnoDB #keys */
21686 bool push_warning) /*!< in: print warning ? */
21687 {
21688 switch (ib_table->dict_frm_mismatch) {
21689 case DICT_FRM_NO_PK:
21690 sql_print_error("Table %s has a primary key in "
21691 "InnoDB data dictionary, but not "
21692 "in MariaDB!"
21693 " Have you mixed up "
21694 ".frm files from different "
21695 "installations? See "
21696 "https://mariadb.com/kb/en/innodb-troubleshooting/\n",
21697 ib_table->name.m_name);
21698
21699 if (push_warning) {
21700 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21701 ER_NO_SUCH_INDEX,
21702 "InnoDB: Table %s has a "
21703 "primary key in InnoDB data "
21704 "dictionary, but not in "
21705 "MariaDB!", ib_table->name.m_name);
21706 }
21707 break;
21708 case DICT_NO_PK_FRM_HAS:
21709 sql_print_error(
21710 "Table %s has no primary key in InnoDB data "
21711 "dictionary, but has one in MariaDB! If you "
21712 "created the table with a MariaDB version < "
21713 "3.23.54 and did not define a primary key, "
21714 "but defined a unique key with all non-NULL "
21715 "columns, then MariaDB internally treats that "
21716 "key as the primary key. You can fix this "
21717 "error by dump + DROP + CREATE + reimport "
21718 "of the table.", ib_table->name.m_name);
21719
21720 if (push_warning) {
21721 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21722 ER_NO_SUCH_INDEX,
21723 "InnoDB: Table %s has no "
21724 "primary key in InnoDB data "
21725 "dictionary, but has one in "
21726 "MariaDB!",
21727 ib_table->name.m_name);
21728 }
21729 break;
21730
21731 case DICT_FRM_INCONSISTENT_KEYS:
21732 sql_print_error("InnoDB: Table %s contains " ULINTPF " "
21733 "indexes inside InnoDB, which "
21734 "is different from the number of "
21735 "indexes %u defined in the MariaDB "
21736 " Have you mixed up "
21737 ".frm files from different "
21738 "installations? See "
21739 "https://mariadb.com/kb/en/innodb-troubleshooting/\n",
21740 ib_table->name.m_name, n_keys,
21741 table->s->keys);
21742
21743 if (push_warning) {
21744 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21745 ER_NO_SUCH_INDEX,
21746 "InnoDB: Table %s contains " ULINTPF " "
21747 "indexes inside InnoDB, which "
21748 "is different from the number of "
21749 "indexes %u defined in the MariaDB ",
21750 ib_table->name.m_name, n_keys,
21751 table->s->keys);
21752 }
21753 break;
21754
21755 case DICT_FRM_CONSISTENT:
21756 default:
21757 sql_print_error("InnoDB: Table %s is consistent "
21758 "on InnoDB data dictionary and MariaDB "
21759 " FRM file.",
21760 ib_table->name.m_name);
21761 ut_error;
21762 break;
21763 }
21764 }
21765
21766 /** Writes 8 bytes to nth tuple field
21767 @param[in] tuple where to write
21768 @param[in] nth index in tuple
21769 @param[in] data what to write
21770 @param[in] buf field data buffer */
set_tuple_col_8(dtuple_t * tuple,int col,uint64_t data,byte * buf)21771 static void set_tuple_col_8(dtuple_t *tuple, int col, uint64_t data, byte *buf)
21772 {
21773 dfield_t *dfield= dtuple_get_nth_field(tuple, col);
21774 ut_ad(dfield->type.len == 8);
21775 if (dfield->len == UNIV_SQL_NULL)
21776 {
21777 dfield_set_data(dfield, buf, 8);
21778 }
21779 ut_ad(dfield->len == dfield->type.len && dfield->data);
21780 mach_write_to_8(dfield->data, data);
21781 }
21782
vers_update_end(row_prebuilt_t * prebuilt,bool history_row)21783 void ins_node_t::vers_update_end(row_prebuilt_t *prebuilt, bool history_row)
21784 {
21785 ut_ad(prebuilt->ins_node == this);
21786 trx_t *trx= prebuilt->trx;
21787 #ifndef DBUG_OFF
21788 ut_ad(table->vers_start != table->vers_end);
21789 const mysql_row_templ_t *t= prebuilt->get_template_by_col(table->vers_end);
21790 ut_ad(t);
21791 ut_ad(t->mysql_col_len == 8);
21792 #endif
21793
21794 if (history_row)
21795 {
21796 set_tuple_col_8(row, table->vers_end, trx->id, vers_end_buf);
21797 }
21798 else /* ROW_INS_VERSIONED */
21799 {
21800 set_tuple_col_8(row, table->vers_end, TRX_ID_MAX, vers_end_buf);
21801 #ifndef DBUG_OFF
21802 t= prebuilt->get_template_by_col(table->vers_start);
21803 ut_ad(t);
21804 ut_ad(t->mysql_col_len == 8);
21805 #endif
21806 set_tuple_col_8(row, table->vers_start, trx->id, vers_start_buf);
21807 }
21808 dict_index_t *clust_index= dict_table_get_first_index(table);
21809 THD *thd= trx->mysql_thd;
21810 TABLE *mysql_table= prebuilt->m_mysql_table;
21811 mem_heap_t *local_heap= NULL;
21812 for (ulint col_no= 0; col_no < dict_table_get_n_v_cols(table); col_no++)
21813 {
21814 const dict_v_col_t *v_col= dict_table_get_nth_v_col(table, col_no);
21815 for (ulint i= 0; i < unsigned(v_col->num_base); i++)
21816 if (v_col->base_col[i]->ind == table->vers_end)
21817 innobase_get_computed_value(row, v_col, clust_index, &local_heap,
21818 table->heap, NULL, thd, mysql_table,
21819 mysql_table->record[0], NULL, NULL);
21820 }
21821 if (UNIV_LIKELY_NULL(local_heap))
21822 mem_heap_free(local_heap);
21823 }
21824
21825 /** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
21826 if needed.
21827 @param[in] size size in bytes
21828 @return aligned size */
21829 ulint
buf_pool_size_align(ulint size)21830 buf_pool_size_align(
21831 ulint size)
21832 {
21833 const ib_uint64_t m = ((ib_uint64_t)srv_buf_pool_instances) * srv_buf_pool_chunk_unit;
21834 size = ut_max((size_t) size, (size_t) MYSQL_SYSVAR_NAME(buffer_pool_size).min_val);
21835
21836 if (size % m == 0) {
21837 return(size);
21838 } else {
21839 return (ulint)((size / m + 1) * m);
21840 }
21841 }
21842