1 /*****************************************************************************
2 
3 Copyright (c) 2000, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2008, 2009 Google Inc.
5 Copyright (c) 2009, Percona Inc.
6 Copyright (c) 2012, Facebook Inc.
7 Copyright (c) 2013, 2022, MariaDB Corporation.
8 
9 Portions of this file contain modifications contributed and copyrighted by
10 Google, Inc. Those modifications are gratefully acknowledged and are described
11 briefly in the InnoDB documentation. The contributions by Google are
12 incorporated with their permission, and subject to the conditions contained in
13 the file COPYING.Google.
14 
15 Portions of this file contain modifications contributed and copyrighted
16 by Percona Inc.. Those modifications are
17 gratefully acknowledged and are described briefly in the InnoDB
18 documentation. The contributions by Percona Inc. are incorporated with
19 their permission, and subject to the conditions contained in the file
20 COPYING.Percona.
21 
22 This program is free software; you can redistribute it and/or modify it under
23 the terms of the GNU General Public License as published by the Free Software
24 Foundation; version 2 of the License.
25 
26 This program is distributed in the hope that it will be useful, but WITHOUT
27 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
28 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
29 
30 You should have received a copy of the GNU General Public License along with
31 this program; if not, write to the Free Software Foundation, Inc.,
32 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
33 
34 *****************************************************************************/
35 
36 /** @file ha_innodb.cc */
37 
38 #include "univ.i"
39 
40 /* Include necessary SQL headers */
41 #include "ha_prototypes.h"
42 #include <debug_sync.h>
43 #include <gstream.h>
44 #include <log.h>
45 #include <mysys_err.h>
46 #include <innodb_priv.h>
47 #include <strfunc.h>
48 #include <sql_acl.h>
49 #include <sql_class.h>
50 #include <sql_show.h>
51 #include <sql_table.h>
52 #include <table_cache.h>
53 #include <my_check_opt.h>
54 #include <my_bitmap.h>
55 #include <mysql/service_thd_alloc.h>
56 #include <mysql/service_thd_wait.h>
57 
58 // MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system;
59 // MYSQL_PLUGIN_IMPORT extern char mysql_unpacked_real_data_home[];
60 
61 #include <my_service_manager.h>
62 #include <key.h>
63 
64 /* Include necessary InnoDB headers */
65 #include "btr0btr.h"
66 #include "btr0cur.h"
67 #include "btr0bulk.h"
68 #include "btr0sea.h"
69 #include "buf0dblwr.h"
70 #include "buf0dump.h"
71 #include "buf0flu.h"
72 #include "buf0lru.h"
73 #include "dict0boot.h"
74 #include "dict0load.h"
75 #include "btr0defragment.h"
76 #include "dict0crea.h"
77 #include "dict0dict.h"
78 #include "dict0stats.h"
79 #include "dict0stats_bg.h"
80 #include "fil0fil.h"
81 #include "fsp0fsp.h"
82 #include "fts0fts.h"
83 #include "fts0plugin.h"
84 #include "fts0priv.h"
85 #include "fts0types.h"
86 #include "ibuf0ibuf.h"
87 #include "lock0lock.h"
88 #include "log0crypt.h"
89 #include "mtr0mtr.h"
90 #include "os0file.h"
91 #include "page0zip.h"
92 #include "pars0pars.h"
93 #include "rem0types.h"
94 #include "row0import.h"
95 #include "row0ins.h"
96 #include "row0merge.h"
97 #include "row0mysql.h"
98 #include "row0quiesce.h"
99 #include "row0sel.h"
100 #include "row0trunc.h"
101 #include "row0upd.h"
102 #include "fil0crypt.h"
103 #include "srv0mon.h"
104 #include "srv0srv.h"
105 #include "srv0start.h"
106 #include "rem0rec.h"
107 #ifdef UNIV_DEBUG
108 #include "trx0purge.h"
109 #endif /* UNIV_DEBUG */
110 #include "trx0roll.h"
111 #include "trx0rseg.h"
112 #include "trx0trx.h"
113 #include "fil0pagecompress.h"
114 #include "ut0mem.h"
115 #include "row0ext.h"
116 
117 #define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X))
118 
119 extern "C" void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
120 unsigned long long thd_get_query_id(const MYSQL_THD thd);
121 void thd_clear_error(MYSQL_THD thd);
122 
123 TABLE *find_fk_open_table(THD *thd, const char *db, size_t db_len,
124 			  const char *table, size_t table_len);
125 MYSQL_THD create_thd();
126 void destroy_thd(MYSQL_THD thd);
127 void reset_thd(MYSQL_THD thd);
128 TABLE *open_purge_table(THD *thd, const char *db, size_t dblen,
129 			const char *tb, size_t tblen);
130 void close_thread_tables(THD* thd);
131 
132 #ifdef MYSQL_DYNAMIC_PLUGIN
133 #define tc_size  400
134 #define tdc_size 400
135 #endif
136 
137 #include "ha_innodb.h"
138 #include "i_s.h"
139 #include "sync0sync.h"
140 
141 #include <string>
142 #include <sstream>
143 
144 #include <mysql/plugin.h>
145 #include <mysql/service_wsrep.h>
146 
147 #ifdef WITH_WSREP
148 #include "dict0priv.h"
149 #include <mysql/service_md5.h>
150 #include "wsrep_sst.h"
151 
152 static inline wsrep_ws_handle_t*
wsrep_ws_handle(THD * thd,const trx_t * trx)153 wsrep_ws_handle(THD* thd, const trx_t* trx) {
154 	return wsrep_ws_handle_for_trx(wsrep_thd_ws_handle(thd),
155 				       (wsrep_trx_id_t)trx->id);
156 }
157 
158 extern void wsrep_cleanup_transaction(THD *thd);
159 static void wsrep_abort_transaction(handlerton*, THD *, THD *, my_bool);
160 static void wsrep_fake_trx_id(handlerton* hton, THD *thd);
161 static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid);
162 static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid);
163 #endif /* WITH_WSREP */
164 
165 /** to force correct commit order in binlog */
166 static ulong commit_threads = 0;
167 static mysql_cond_t commit_cond;
168 static mysql_mutex_t commit_cond_m;
169 static mysql_mutex_t pending_checkpoint_mutex;
170 
171 #define INSIDE_HA_INNOBASE_CC
172 
173 #define EQ_CURRENT_THD(thd) ((thd) == current_thd)
174 
175 struct handlerton* innodb_hton_ptr;
176 
177 static const long AUTOINC_OLD_STYLE_LOCKING = 0;
178 static const long AUTOINC_NEW_STYLE_LOCKING = 1;
179 static const long AUTOINC_NO_LOCKING = 2;
180 
181 static ulong innobase_open_files;
182 static long innobase_autoinc_lock_mode;
183 static ulong innobase_commit_concurrency;
184 
185 static ulonglong innobase_buffer_pool_size;
186 
187 /** Percentage of the buffer pool to reserve for 'old' blocks.
188 Connected to buf_LRU_old_ratio. */
189 static uint innobase_old_blocks_pct;
190 
191 static char*	innobase_data_file_path;
192 static char*	innobase_temp_data_file_path;
193 
194 /* The default values for the following char* start-up parameters
195 are determined in innodb_init_params(). */
196 
197 static char*	innobase_data_home_dir;
198 static char*	innobase_enable_monitor_counter;
199 static char*	innobase_disable_monitor_counter;
200 static char*	innobase_reset_monitor_counter;
201 static char*	innobase_reset_all_monitor_counter;
202 
203 static ulong	innodb_flush_method;
204 
205 /** Deprecated; no effect other than issuing a deprecation warning. */
206 static char* innodb_file_format;
207 /** Deprecated; no effect other than issuing a deprecation warning. */
208 static char* innodb_large_prefix;
209 
210 /* This variable can be set in the server configure file, specifying
211 stopword table to be used */
212 static char*	innobase_server_stopword_table;
213 
214 static my_bool	innobase_use_checksums;
215 static my_bool	innobase_locks_unsafe_for_binlog;
216 static my_bool	innobase_rollback_on_timeout;
217 static my_bool	innobase_create_status_file;
218 my_bool	innobase_stats_on_metadata;
219 static my_bool	innodb_optimize_fulltext_only;
220 
221 static char*	innodb_version_str = (char*) INNODB_VERSION_STR;
222 
223 extern uint srv_fil_crypt_rotate_key_age;
224 extern uint srv_n_fil_crypt_iops;
225 
226 extern my_bool srv_immediate_scrub_data_uncompressed;
227 extern my_bool srv_background_scrub_data_uncompressed;
228 extern my_bool srv_background_scrub_data_compressed;
229 extern uint srv_background_scrub_data_interval;
230 extern uint srv_background_scrub_data_check_interval;
231 #ifdef UNIV_DEBUG
232 my_bool innodb_evict_tables_on_commit_debug;
233 extern my_bool srv_scrub_force_testing;
234 #endif
235 
236 /** File format constraint for ALTER TABLE */
237 ulong innodb_instant_alter_column_allowed;
238 
239 /** Note we cannot use rec_format_enum because we do not allow
240 COMPRESSED row format for innodb_default_row_format option. */
241 enum default_row_format_enum {
242 	DEFAULT_ROW_FORMAT_REDUNDANT = 0,
243 	DEFAULT_ROW_FORMAT_COMPACT = 1,
244 	DEFAULT_ROW_FORMAT_DYNAMIC = 2,
245 };
246 
247 /** A dummy variable */
248 static uint innodb_max_purge_lag_wait;
249 
250 /** Wait for trx_sys_t::rseg_history_len to be below a limit. */
innodb_max_purge_lag_wait_update(THD * thd,st_mysql_sys_var *,void *,const void * limit)251 static void innodb_max_purge_lag_wait_update(THD *thd, st_mysql_sys_var *,
252                                              void *, const void *limit)
253 {
254   const uint l= *static_cast<const uint*>(limit);
255   if (trx_sys.history_size() <= l)
256     return;
257   mysql_mutex_unlock(&LOCK_global_system_variables);
258   while (trx_sys.history_size() > l)
259   {
260     if (thd_kill_level(thd))
261       break;
262     srv_wake_purge_thread_if_not_active();
263     os_thread_sleep(100000);
264   }
265   mysql_mutex_lock(&LOCK_global_system_variables);
266 }
267 
268 static
set_my_errno(int err)269 void set_my_errno(int err)
270 {
271 	errno = err;
272 }
273 
274 /** Checks whether the file name belongs to a partition of a table.
275 @param[in]	file_name	file name
276 @return pointer to the end of the table name part of the file name, or NULL */
277 static
278 char*
is_partition(char * file_name)279 is_partition(
280 /*=========*/
281 	char*		file_name)
282 {
283 	/* We look for pattern #P# to see if the table is partitioned
284 	MariaDB table. */
285 	return strstr(file_name, table_name_t::part_suffix);
286 }
287 
288 /** Signal to shut down InnoDB (NULL if shutdown was signaled, or if
289 running in innodb_read_only mode, srv_read_only_mode) */
290 st_my_thread_var *srv_running;
291 /** Service thread that waits for the server shutdown and stops purge threads.
292 Purge workers have THDs that are needed to calculate virtual columns.
293 This THDs must be destroyed rather early in the server shutdown sequence.
294 This service thread creates a THD and idly waits for it to get a signal to
295 die. Then it notifies all purge workers to shutdown.
296 */
297 static pthread_t thd_destructor_thread;
298 
299 pthread_handler_t
thd_destructor_proxy(void *)300 thd_destructor_proxy(void *)
301 {
302 	mysql_mutex_t thd_destructor_mutex;
303 	mysql_cond_t thd_destructor_cond;
304 
305 	my_thread_init();
306 	mysql_mutex_init(PSI_NOT_INSTRUMENTED, &thd_destructor_mutex, 0);
307 	mysql_cond_init(PSI_NOT_INSTRUMENTED, &thd_destructor_cond, 0);
308 
309 	st_my_thread_var *myvar= _my_thread_var();
310 	myvar->current_mutex = &thd_destructor_mutex;
311 	myvar->current_cond = &thd_destructor_cond;
312 
313 	THD *thd= create_thd();
314 	thd_proc_info(thd, "InnoDB shutdown handler");
315 
316 
317 	mysql_mutex_lock(&thd_destructor_mutex);
318 	my_atomic_storeptr_explicit(reinterpret_cast<void**>(&srv_running),
319 				    myvar,
320 				    MY_MEMORY_ORDER_RELAXED);
321 	/* wait until the server wakes the THD to abort and die */
322 	while (!srv_running->abort)
323 		mysql_cond_wait(&thd_destructor_cond, &thd_destructor_mutex);
324 	mysql_mutex_unlock(&thd_destructor_mutex);
325 	my_atomic_storeptr_explicit(reinterpret_cast<void**>(&srv_running),
326 				    NULL,
327 				    MY_MEMORY_ORDER_RELAXED);
328 
329 	while (srv_fast_shutdown == 0 &&
330 	       (trx_sys.any_active_transactions() ||
331 		(uint)thread_count > srv_n_purge_threads + 1)) {
332 		thd_proc_info(thd, "InnoDB slow shutdown wait");
333 		os_thread_sleep(1000);
334 	}
335 
336 	/* Some background threads might generate undo pages that will
337 	need to be purged, so they have to be shut down before purge
338 	threads if slow shutdown is requested.  */
339 	srv_shutdown_bg_undo_sources();
340 	srv_purge_shutdown();
341 
342 	destroy_thd(thd);
343 	mysql_cond_destroy(&thd_destructor_cond);
344 	mysql_mutex_destroy(&thd_destructor_mutex);
345 	my_thread_end();
346 	return 0;
347 }
348 
349 /** Return the InnoDB ROW_FORMAT enum value
350 @param[in]	row_format	row_format from "innodb_default_row_format"
351 @return InnoDB ROW_FORMAT value from rec_format_t enum. */
352 static
353 rec_format_t
get_row_format(ulong row_format)354 get_row_format(
355 	ulong row_format)
356 {
357 	switch(row_format) {
358 	case DEFAULT_ROW_FORMAT_REDUNDANT:
359 		return(REC_FORMAT_REDUNDANT);
360 	case DEFAULT_ROW_FORMAT_COMPACT:
361 		return(REC_FORMAT_COMPACT);
362 	case DEFAULT_ROW_FORMAT_DYNAMIC:
363 		return(REC_FORMAT_DYNAMIC);
364 	default:
365 		ut_ad(0);
366 		return(REC_FORMAT_DYNAMIC);
367 	}
368 }
369 
370 static ulong	innodb_default_row_format = DEFAULT_ROW_FORMAT_DYNAMIC;
371 
372 /** Possible values for system variable "innodb_stats_method". The values
373 are defined the same as its corresponding MyISAM system variable
374 "myisam_stats_method"(see "myisam_stats_method_names"), for better usability */
375 static const char* innodb_stats_method_names[] = {
376 	"nulls_equal",
377 	"nulls_unequal",
378 	"nulls_ignored",
379 	NullS
380 };
381 
382 /** Used to define an enumerate type of the system variable innodb_stats_method.
383 This is the same as "myisam_stats_method_typelib" */
384 static TYPELIB innodb_stats_method_typelib = {
385 	array_elements(innodb_stats_method_names) - 1,
386 	"innodb_stats_method_typelib",
387 	innodb_stats_method_names,
388 	NULL
389 };
390 
391 /** Possible values of the parameter innodb_checksum_algorithm */
392 const char* innodb_checksum_algorithm_names[] = {
393 	"crc32",
394 	"strict_crc32",
395 	"innodb",
396 	"strict_innodb",
397 	"none",
398 	"strict_none",
399 	NullS
400 };
401 
402 /** Used to define an enumerate type of the system variable
403 innodb_checksum_algorithm. */
404 TYPELIB innodb_checksum_algorithm_typelib = {
405 	array_elements(innodb_checksum_algorithm_names) - 1,
406 	"innodb_checksum_algorithm_typelib",
407 	innodb_checksum_algorithm_names,
408 	NULL
409 };
410 
411 /** Possible values for system variable "innodb_default_row_format". */
412 static const char* innodb_default_row_format_names[] = {
413 	"redundant",
414 	"compact",
415 	"dynamic",
416 	NullS
417 };
418 
419 /** Used to define an enumerate type of the system variable
420 innodb_default_row_format. */
421 static TYPELIB innodb_default_row_format_typelib = {
422 	array_elements(innodb_default_row_format_names) - 1,
423 	"innodb_default_row_format_typelib",
424 	innodb_default_row_format_names,
425 	NULL
426 };
427 
428 /** Possible values of the parameter innodb_lock_schedule_algorithm */
429 static const char* innodb_lock_schedule_algorithm_names[] = {
430 	"fcfs",
431 	"vats",
432 	NullS
433 };
434 
435 /** Used to define an enumerate type of the system variable
436 innodb_lock_schedule_algorithm. */
437 static TYPELIB innodb_lock_schedule_algorithm_typelib = {
438 	array_elements(innodb_lock_schedule_algorithm_names) - 1,
439 	"innodb_lock_schedule_algorithm_typelib",
440 	innodb_lock_schedule_algorithm_names,
441 	NULL
442 };
443 
444 /** Names of allowed values of innodb_flush_method */
445 const char* innodb_flush_method_names[] = {
446 	"fsync",
447 	"O_DSYNC",
448 	"littlesync",
449 	"nosync",
450 	"O_DIRECT",
451 	"O_DIRECT_NO_FSYNC",
452 #ifdef _WIN32
453 	"unbuffered",
454 	"async_unbuffered" /* alias for "unbuffered" */,
455 	"normal" /* alias for "fsync" */,
456 #endif
457 	NullS
458 };
459 
460 /** Enumeration of innodb_flush_method */
461 TYPELIB innodb_flush_method_typelib = {
462 	array_elements(innodb_flush_method_names) - 1,
463 	"innodb_flush_method_typelib",
464 	innodb_flush_method_names,
465 	NULL
466 };
467 
468 /* The following counter is used to convey information to InnoDB
469 about server activity: in case of normal DML ops it is not
470 sensible to call srv_active_wake_master_thread after each
471 operation, we only do it every INNOBASE_WAKE_INTERVAL'th step. */
472 
473 #define INNOBASE_WAKE_INTERVAL	32
474 static ulong	innobase_active_counter	= 0;
475 
476 /** Allowed values of innodb_change_buffering */
477 static const char* innodb_change_buffering_names[] = {
478 	"none",		/* IBUF_USE_NONE */
479 	"inserts",	/* IBUF_USE_INSERT */
480 	"deletes",	/* IBUF_USE_DELETE_MARK */
481 	"changes",	/* IBUF_USE_INSERT_DELETE_MARK */
482 	"purges",	/* IBUF_USE_DELETE */
483 	"all",		/* IBUF_USE_ALL */
484 	NullS
485 };
486 
487 /** Enumeration of innodb_change_buffering */
488 static TYPELIB innodb_change_buffering_typelib = {
489 	array_elements(innodb_change_buffering_names) - 1,
490 	"innodb_change_buffering_typelib",
491 	innodb_change_buffering_names,
492 	NULL
493 };
494 
495 /** Allowed values of innodb_instant_alter_column_allowed */
496 const char* innodb_instant_alter_column_allowed_names[] = {
497 	"never", /* compatible with MariaDB 5.5 to 10.2 */
498 	"add_last",/* allow instant ADD COLUMN */
499 	NullS
500 };
501 
502 /** Enumeration of innodb_instant_alter_column_allowed */
503 static TYPELIB innodb_instant_alter_column_allowed_typelib = {
504 	array_elements(innodb_instant_alter_column_allowed_names) - 1,
505 	"innodb_instant_alter_column_allowed_typelib",
506 	innodb_instant_alter_column_allowed_names,
507 	NULL
508 };
509 
510 /** Retrieve the FTS Relevance Ranking result for doc with doc_id
511 of m_prebuilt->fts_doc_id
512 @param[in,out]	fts_hdl	FTS handler
513 @return the relevance ranking value */
514 static
515 float
516 innobase_fts_retrieve_ranking(
517 	FT_INFO*	fts_hdl);
518 /** Free the memory for the FTS handler
519 @param[in,out]	fts_hdl	FTS handler */
520 static
521 void
522 innobase_fts_close_ranking(
523 	FT_INFO*	fts_hdl);
524 /** Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
525 of m_prebuilt->fts_doc_id
526 @param[in,out]	fts_hdl	FTS handler
527 @return the relevance ranking value */
528 static
529 float
530 innobase_fts_find_ranking(
531 	FT_INFO*	fts_hdl,
532 	uchar*,
533 	uint);
534 
535 /* Call back function array defined by MySQL and used to
536 retrieve FTS results. */
537 const struct _ft_vft ft_vft_result = {NULL,
538 				      innobase_fts_find_ranking,
539 				      innobase_fts_close_ranking,
540 				      innobase_fts_retrieve_ranking,
541 				      NULL};
542 
543 /** @return version of the extended FTS API */
544 static
545 uint
innobase_fts_get_version()546 innobase_fts_get_version()
547 {
548 	/* Currently this doesn't make much sense as returning
549 	HA_CAN_FULLTEXT_EXT automatically mean this version is supported.
550 	This supposed to ease future extensions.  */
551 	return(2);
552 }
553 
554 /** @return Which part of the extended FTS API is supported */
555 static
556 ulonglong
innobase_fts_flags()557 innobase_fts_flags()
558 {
559 	return(FTS_ORDERED_RESULT | FTS_DOCID_IN_RESULT);
560 }
561 
562 /** Find and Retrieve the FTS doc_id for the current result row
563 @param[in,out]	fts_hdl	FTS handler
564 @return the document ID */
565 static
566 ulonglong
567 innobase_fts_retrieve_docid(
568 	FT_INFO_EXT*	fts_hdl);
569 
570 /** Find and retrieve the size of the current result
571 @param[in,out]	fts_hdl	FTS handler
572 @return number of matching rows */
573 static
574 ulonglong
innobase_fts_count_matches(FT_INFO_EXT * fts_hdl)575 innobase_fts_count_matches(
576 	FT_INFO_EXT*	fts_hdl)	/*!< in: FTS handler */
577 {
578 	NEW_FT_INFO*	handle = reinterpret_cast<NEW_FT_INFO*>(fts_hdl);
579 
580 	if (handle->ft_result->rankings_by_id != NULL) {
581 		return(rbt_size(handle->ft_result->rankings_by_id));
582 	} else {
583 		return(0);
584 	}
585 }
586 
587 const struct _ft_vft_ext ft_vft_ext_result = {innobase_fts_get_version,
588 					      innobase_fts_flags,
589 					      innobase_fts_retrieve_docid,
590 					      innobase_fts_count_matches};
591 
592 #ifdef HAVE_PSI_INTERFACE
593 # define PSI_KEY(n) {&n##_key, #n, 0}
594 /* All RWLOCK used in Innodb are SX-locks */
595 # define PSI_RWLOCK_KEY(n) {&n##_key, #n, PSI_RWLOCK_FLAG_SX}
596 
597 /* Keys to register pthread mutexes/cond in the current file with
598 performance schema */
599 static mysql_pfs_key_t	commit_cond_mutex_key;
600 static mysql_pfs_key_t	commit_cond_key;
601 static mysql_pfs_key_t	pending_checkpoint_mutex_key;
602 static mysql_pfs_key_t  thd_destructor_thread_key;
603 
604 static PSI_mutex_info	all_pthread_mutexes[] = {
605 	PSI_KEY(commit_cond_mutex),
606 	PSI_KEY(pending_checkpoint_mutex),
607 };
608 
609 static PSI_cond_info	all_innodb_conds[] = {
610 	PSI_KEY(commit_cond)
611 };
612 
613 # ifdef UNIV_PFS_MUTEX
614 /* all_innodb_mutexes array contains mutexes that are
615 performance schema instrumented if "UNIV_PFS_MUTEX"
616 is defined */
617 static PSI_mutex_info all_innodb_mutexes[] = {
618 	PSI_KEY(autoinc_mutex),
619 #  ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
620 	PSI_KEY(buffer_block_mutex),
621 #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
622 	PSI_KEY(buf_pool_mutex),
623 	PSI_KEY(buf_pool_zip_mutex),
624 	PSI_KEY(cache_last_read_mutex),
625 	PSI_KEY(dict_foreign_err_mutex),
626 	PSI_KEY(dict_sys_mutex),
627 	PSI_KEY(recalc_pool_mutex),
628 	PSI_KEY(fil_system_mutex),
629 	PSI_KEY(flush_list_mutex),
630 	PSI_KEY(fts_delete_mutex),
631 	PSI_KEY(fts_doc_id_mutex),
632 	PSI_KEY(log_flush_order_mutex),
633 	PSI_KEY(hash_table_mutex),
634 	PSI_KEY(ibuf_bitmap_mutex),
635 	PSI_KEY(ibuf_mutex),
636 	PSI_KEY(ibuf_pessimistic_insert_mutex),
637 	PSI_KEY(index_online_log),
638 	PSI_KEY(log_sys_mutex),
639 	PSI_KEY(log_sys_write_mutex),
640 	PSI_KEY(mutex_list_mutex),
641 	PSI_KEY(page_zip_stat_per_index_mutex),
642 	PSI_KEY(purge_sys_pq_mutex),
643 	PSI_KEY(recv_sys_mutex),
644 	PSI_KEY(recv_writer_mutex),
645 	PSI_KEY(redo_rseg_mutex),
646 	PSI_KEY(noredo_rseg_mutex),
647 #  ifdef UNIV_DEBUG
648 	PSI_KEY(rw_lock_debug_mutex),
649 #  endif /* UNIV_DEBUG */
650 	PSI_KEY(rw_lock_list_mutex),
651 	PSI_KEY(rw_lock_mutex),
652 	PSI_KEY(srv_innodb_monitor_mutex),
653 	PSI_KEY(srv_misc_tmpfile_mutex),
654 	PSI_KEY(srv_monitor_file_mutex),
655 	PSI_KEY(buf_dblwr_mutex),
656 	PSI_KEY(trx_pool_mutex),
657 	PSI_KEY(trx_pool_manager_mutex),
658 	PSI_KEY(srv_sys_mutex),
659 	PSI_KEY(lock_mutex),
660 	PSI_KEY(lock_wait_mutex),
661 	PSI_KEY(trx_mutex),
662 	PSI_KEY(srv_threads_mutex),
663 #  ifndef PFS_SKIP_EVENT_MUTEX
664 	PSI_KEY(event_mutex),
665 #  endif /* PFS_SKIP_EVENT_MUTEX */
666 	PSI_KEY(rtr_active_mutex),
667 	PSI_KEY(rtr_match_mutex),
668 	PSI_KEY(rtr_path_mutex),
669 	PSI_KEY(trx_sys_mutex),
670 	PSI_KEY(zip_pad_mutex)
671 };
672 # endif /* UNIV_PFS_MUTEX */
673 
674 # ifdef UNIV_PFS_RWLOCK
675 /* all_innodb_rwlocks array contains rwlocks that are
676 performance schema instrumented if "UNIV_PFS_RWLOCK"
677 is defined */
678 static PSI_rwlock_info all_innodb_rwlocks[] = {
679 	PSI_RWLOCK_KEY(btr_search_latch),
680 #  ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
681 	PSI_RWLOCK_KEY(buf_block_lock),
682 #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
683 #  ifdef UNIV_DEBUG
684 	PSI_RWLOCK_KEY(buf_block_debug_latch),
685 #  endif /* UNIV_DEBUG */
686 	PSI_RWLOCK_KEY(dict_operation_lock),
687 	PSI_RWLOCK_KEY(fil_space_latch),
688 	PSI_RWLOCK_KEY(checkpoint_lock),
689 	PSI_RWLOCK_KEY(fts_cache_rw_lock),
690 	PSI_RWLOCK_KEY(fts_cache_init_rw_lock),
691 	PSI_RWLOCK_KEY(trx_i_s_cache_lock),
692 	PSI_RWLOCK_KEY(trx_purge_latch),
693 	PSI_RWLOCK_KEY(index_tree_rw_lock),
694 	PSI_RWLOCK_KEY(hash_table_locks)
695 };
696 # endif /* UNIV_PFS_RWLOCK */
697 
698 # ifdef UNIV_PFS_THREAD
699 /* all_innodb_threads array contains threads that are
700 performance schema instrumented if "UNIV_PFS_THREAD"
701 is defined */
702 static PSI_thread_info	all_innodb_threads[] = {
703 	PSI_KEY(buf_dump_thread),
704 	PSI_KEY(dict_stats_thread),
705 	PSI_KEY(io_handler_thread),
706 	PSI_KEY(io_ibuf_thread),
707 	PSI_KEY(io_log_thread),
708 	PSI_KEY(io_read_thread),
709 	PSI_KEY(io_write_thread),
710 	PSI_KEY(page_cleaner_thread),
711 	PSI_KEY(recv_writer_thread),
712 	PSI_KEY(srv_error_monitor_thread),
713 	PSI_KEY(srv_lock_timeout_thread),
714 	PSI_KEY(srv_master_thread),
715 	PSI_KEY(srv_monitor_thread),
716 	PSI_KEY(srv_purge_thread),
717 	PSI_KEY(srv_worker_thread),
718 	PSI_KEY(trx_rollback_clean_thread),
719 	PSI_KEY(thd_destructor_thread),
720 };
721 # endif /* UNIV_PFS_THREAD */
722 
723 # ifdef UNIV_PFS_IO
724 /* all_innodb_files array contains the type of files that are
725 performance schema instrumented if "UNIV_PFS_IO" is defined */
726 static PSI_file_info	all_innodb_files[] = {
727 	PSI_KEY(innodb_data_file),
728 	PSI_KEY(innodb_log_file),
729 	PSI_KEY(innodb_temp_file)
730 };
731 # endif /* UNIV_PFS_IO */
732 #endif /* HAVE_PSI_INTERFACE */
733 
734 static void innodb_remember_check_sysvar_funcs();
735 mysql_var_check_func check_sysvar_enum;
736 mysql_var_check_func check_sysvar_int;
737 
738 // should page compression be used by default for new tables
739 static MYSQL_THDVAR_BOOL(compression_default, PLUGIN_VAR_OPCMDARG,
740   "Is compression the default for new tables",
741   NULL, NULL, FALSE);
742 
743 /** Update callback for SET [SESSION] innodb_default_encryption_key_id */
744 static void
innodb_default_encryption_key_id_update(THD * thd,st_mysql_sys_var * var,void * var_ptr,const void * save)745 innodb_default_encryption_key_id_update(THD* thd, st_mysql_sys_var* var,
746 					void* var_ptr, const void *save)
747 {
748 	uint key_id = *static_cast<const uint*>(save);
749 	if (key_id != FIL_DEFAULT_ENCRYPTION_KEY
750 	    && !encryption_key_id_exists(key_id)) {
751 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
752 				    ER_WRONG_ARGUMENTS,
753 				    "innodb_default_encryption_key=%u"
754 				    " is not available", key_id);
755 	}
756 	*static_cast<uint*>(var_ptr) = key_id;
757 }
758 
759 static MYSQL_THDVAR_UINT(default_encryption_key_id, PLUGIN_VAR_RQCMDARG,
760 			 "Default encryption key id used for table encryption.",
761 			 NULL, innodb_default_encryption_key_id_update,
762 			 FIL_DEFAULT_ENCRYPTION_KEY, 1, UINT_MAX32, 0);
763 
764 /**
765   Structure for CREATE TABLE options (table options).
766   It needs to be called ha_table_option_struct.
767 
768   The option values can be specified in the CREATE TABLE at the end:
769   CREATE TABLE ( ... ) *here*
770 */
771 
772 ha_create_table_option innodb_table_option_list[]=
773 {
774   /* With this option user can enable page compression feature for the
775   table */
776   HA_TOPTION_SYSVAR("PAGE_COMPRESSED", page_compressed, compression_default),
777   /* With this option user can set zip compression level for page
778   compression for this table*/
779   HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, 0, 1, 9, 1),
780   /* With this option the user can enable encryption for the table */
781   HA_TOPTION_ENUM("ENCRYPTED", encryption, "DEFAULT,YES,NO", 0),
782   /* With this option the user defines the key identifier using for the encryption */
783   HA_TOPTION_SYSVAR("ENCRYPTION_KEY_ID", encryption_key_id, default_encryption_key_id),
784 
785   HA_TOPTION_END
786 };
787 
788 /*************************************************************//**
789 Check whether valid argument given to innodb_ft_*_stopword_table.
790 This function is registered as a callback with MySQL.
791 @return 0 for valid stopword table */
792 static
793 int
794 innodb_stopword_table_validate(
795 /*===========================*/
796 	THD*				thd,	/*!< in: thread handle */
797 	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
798 						variable */
799 	void*				save,	/*!< out: immediate result
800 						for update function */
801 	struct st_mysql_value*		value);	/*!< in: incoming string */
802 
803 static bool is_mysql_datadir_path(const char *path);
804 
805 /** Validate passed-in "value" is a valid directory name.
806 This function is registered as a callback with MySQL.
807 @param[in,out]	thd	thread handle
808 @param[in]	var	pointer to system variable
809 @param[out]	save	immediate result for update
810 @param[in]	value	incoming string
811 @return 0 for valid name */
812 static
813 int
innodb_tmpdir_validate(THD * thd,struct st_mysql_sys_var *,void * save,struct st_mysql_value * value)814 innodb_tmpdir_validate(
815 	THD*				thd,
816 	struct st_mysql_sys_var*,
817 	void*				save,
818 	struct st_mysql_value*		value)
819 {
820 
821 	char*	alter_tmp_dir;
822 	char*	innodb_tmp_dir;
823 	char	buff[OS_FILE_MAX_PATH];
824 	int	len = sizeof(buff);
825 	char	tmp_abs_path[FN_REFLEN + 2];
826 
827 	ut_ad(save != NULL);
828 	ut_ad(value != NULL);
829 
830 	if (check_global_access(thd, FILE_ACL)) {
831 		push_warning_printf(
832 			thd, Sql_condition::WARN_LEVEL_WARN,
833 			ER_WRONG_ARGUMENTS,
834 			"InnoDB: FILE Permissions required");
835 		*static_cast<const char**>(save) = NULL;
836 		return(1);
837 	}
838 
839 	alter_tmp_dir = (char*) value->val_str(value, buff, &len);
840 
841 	if (!alter_tmp_dir) {
842 		*static_cast<const char**>(save) = alter_tmp_dir;
843 		return(0);
844 	}
845 
846 	if (strlen(alter_tmp_dir) > FN_REFLEN) {
847 		push_warning_printf(
848 			thd, Sql_condition::WARN_LEVEL_WARN,
849 			ER_WRONG_ARGUMENTS,
850 			"Path length should not exceed %d bytes", FN_REFLEN);
851 		*static_cast<const char**>(save) = NULL;
852 		return(1);
853 	}
854 
855 	os_normalize_path(alter_tmp_dir);
856 	my_realpath(tmp_abs_path, alter_tmp_dir, 0);
857 	size_t	tmp_abs_len = strlen(tmp_abs_path);
858 
859 	if (my_access(tmp_abs_path, F_OK)) {
860 
861 		push_warning_printf(
862 			thd, Sql_condition::WARN_LEVEL_WARN,
863 			ER_WRONG_ARGUMENTS,
864 			"InnoDB: Path doesn't exist.");
865 		*static_cast<const char**>(save) = NULL;
866 		return(1);
867 	} else if (my_access(tmp_abs_path, R_OK | W_OK)) {
868 		push_warning_printf(
869 			thd, Sql_condition::WARN_LEVEL_WARN,
870 			ER_WRONG_ARGUMENTS,
871 			"InnoDB: Server doesn't have permission in "
872 			"the given location.");
873 		*static_cast<const char**>(save) = NULL;
874 		return(1);
875 	}
876 
877 	MY_STAT stat_info_dir;
878 
879 	if (my_stat(tmp_abs_path, &stat_info_dir, MYF(0))) {
880 		if ((stat_info_dir.st_mode & S_IFDIR) != S_IFDIR) {
881 
882 			push_warning_printf(
883 				thd, Sql_condition::WARN_LEVEL_WARN,
884 				ER_WRONG_ARGUMENTS,
885 				"Given path is not a directory. ");
886 			*static_cast<const char**>(save) = NULL;
887 			return(1);
888 		}
889 	}
890 
891 	if (!is_mysql_datadir_path(tmp_abs_path)) {
892 
893 		push_warning_printf(
894 			thd, Sql_condition::WARN_LEVEL_WARN,
895 			ER_WRONG_ARGUMENTS,
896 			"InnoDB: Path Location should not be same as "
897 			"mysql data directory location.");
898 		*static_cast<const char**>(save) = NULL;
899 		return(1);
900 	}
901 
902 	innodb_tmp_dir = static_cast<char*>(
903 		thd_memdup(thd, tmp_abs_path, tmp_abs_len + 1));
904 	*static_cast<const char**>(save) = innodb_tmp_dir;
905 	return(0);
906 }
907 
908 /******************************************************************//**
909 Maps a MySQL trx isolation level code to the InnoDB isolation level code
910 @return	InnoDB isolation level */
911 static inline
912 ulint
913 innobase_map_isolation_level(
914 /*=========================*/
915 	enum_tx_isolation	iso);	/*!< in: MySQL isolation level code */
916 
917 /** Gets field offset for a field in a table.
918 @param[in]	table	MySQL table object
919 @param[in]	field	MySQL field object (from table->field array)
920 @return offset */
921 static inline
922 uint
get_field_offset(const TABLE * table,const Field * field)923 get_field_offset(
924 	const TABLE*	table,
925 	const Field*	field)
926 {
927 	return field->offset(table->record[0]);
928 }
929 
930 
931 /*************************************************************//**
932 Check for a valid value of innobase_compression_algorithm.
933 @return	0 for valid innodb_compression_algorithm. */
934 static
935 int
936 innodb_compression_algorithm_validate(
937 /*==================================*/
938 	THD*				thd,	/*!< in: thread handle */
939 	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
940 						variable */
941 	void*				save,	/*!< out: immediate result
942 						for update function */
943 	struct st_mysql_value*		value);	/*!< in: incoming string */
944 
945 static ibool innodb_have_lzo=IF_LZO(1, 0);
946 static ibool innodb_have_lz4=IF_LZ4(1, 0);
947 static ibool innodb_have_lzma=IF_LZMA(1, 0);
948 static ibool innodb_have_bzip2=IF_BZIP2(1, 0);
949 static ibool innodb_have_snappy=IF_SNAPPY(1, 0);
950 static ibool innodb_have_punch_hole=IF_PUNCH_HOLE(1, 0);
951 
952 static
953 int
954 innodb_encrypt_tables_validate(
955 /*==================================*/
956 	THD*				thd,	/*!< in: thread handle */
957 	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
958 						variable */
959 	void*				save,	/*!< out: immediate result
960 						for update function */
961 	struct st_mysql_value*		value);	/*!< in: incoming string */
962 
963 static const char innobase_hton_name[]= "InnoDB";
964 
965 static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
966   "Enable InnoDB locking in LOCK TABLES",
967   /* check_func */ NULL, /* update_func */ NULL,
968   /* default */ TRUE);
969 
970 static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG,
971   "Use strict mode when evaluating create options.",
972   NULL, NULL, TRUE);
973 
974 static MYSQL_THDVAR_BOOL(ft_enable_stopword, PLUGIN_VAR_OPCMDARG,
975   "Create FTS index with stopword.",
976   NULL, NULL,
977   /* default */ TRUE);
978 
979 static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
980   "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
981   NULL, NULL, 50, 0, 1024 * 1024 * 1024, 0);
982 
983 static MYSQL_THDVAR_STR(ft_user_stopword_table,
984   PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC,
985   "User supplied stopword table name, effective in the session level.",
986   innodb_stopword_table_validate, NULL, NULL);
987 
988 static MYSQL_THDVAR_STR(tmpdir,
989   PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC,
990   "Directory for temporary non-tablespace files.",
991   innodb_tmpdir_validate, NULL, NULL);
992 
993 static SHOW_VAR innodb_status_variables[]= {
994   {"buffer_pool_dump_status",
995   (char*) &export_vars.innodb_buffer_pool_dump_status,	  SHOW_CHAR},
996   {"buffer_pool_load_status",
997   (char*) &export_vars.innodb_buffer_pool_load_status,	  SHOW_CHAR},
998   {"buffer_pool_resize_status",
999   (char*) &export_vars.innodb_buffer_pool_resize_status,  SHOW_CHAR},
1000   {"buffer_pool_load_incomplete",
1001   &export_vars.innodb_buffer_pool_load_incomplete,        SHOW_BOOL},
1002   {"buffer_pool_pages_data",
1003   (char*) &export_vars.innodb_buffer_pool_pages_data,	  SHOW_LONG},
1004   {"buffer_pool_bytes_data",
1005   (char*) &export_vars.innodb_buffer_pool_bytes_data,	  SHOW_LONG},
1006   {"buffer_pool_pages_dirty",
1007   (char*) &export_vars.innodb_buffer_pool_pages_dirty,	  SHOW_LONG},
1008   {"buffer_pool_bytes_dirty",
1009   (char*) &export_vars.innodb_buffer_pool_bytes_dirty,	  SHOW_LONG},
1010   {"buffer_pool_pages_flushed",
1011   (char*) &export_vars.innodb_buffer_pool_pages_flushed,  SHOW_LONG},
1012   {"buffer_pool_pages_free",
1013   (char*) &export_vars.innodb_buffer_pool_pages_free,	  SHOW_LONG},
1014 #ifdef UNIV_DEBUG
1015   {"buffer_pool_pages_latched",
1016   (char*) &export_vars.innodb_buffer_pool_pages_latched,  SHOW_LONG},
1017 #endif /* UNIV_DEBUG */
1018   {"buffer_pool_pages_misc",
1019   (char*) &export_vars.innodb_buffer_pool_pages_misc,	  SHOW_LONG},
1020   {"buffer_pool_pages_total",
1021   (char*) &export_vars.innodb_buffer_pool_pages_total,	  SHOW_LONG},
1022   {"buffer_pool_read_ahead_rnd",
1023   (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
1024   {"buffer_pool_read_ahead",
1025   (char*) &export_vars.innodb_buffer_pool_read_ahead,	  SHOW_LONG},
1026   {"buffer_pool_read_ahead_evicted",
1027   (char*) &export_vars.innodb_buffer_pool_read_ahead_evicted, SHOW_LONG},
1028   {"buffer_pool_read_requests",
1029   (char*) &export_vars.innodb_buffer_pool_read_requests,  SHOW_LONG},
1030   {"buffer_pool_reads",
1031   (char*) &export_vars.innodb_buffer_pool_reads,	  SHOW_LONG},
1032   {"buffer_pool_wait_free",
1033   (char*) &export_vars.innodb_buffer_pool_wait_free,	  SHOW_LONG},
1034   {"buffer_pool_write_requests",
1035   (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
1036   {"data_fsyncs",
1037   (char*) &export_vars.innodb_data_fsyncs,		  SHOW_LONG},
1038   {"data_pending_fsyncs",
1039   (char*) &export_vars.innodb_data_pending_fsyncs,	  SHOW_LONG},
1040   {"data_pending_reads",
1041   (char*) &export_vars.innodb_data_pending_reads,	  SHOW_LONG},
1042   {"data_pending_writes",
1043   (char*) &export_vars.innodb_data_pending_writes,	  SHOW_LONG},
1044   {"data_read",
1045   (char*) &export_vars.innodb_data_read,		  SHOW_LONG},
1046   {"data_reads",
1047   (char*) &export_vars.innodb_data_reads,		  SHOW_LONG},
1048   {"data_writes",
1049   (char*) &export_vars.innodb_data_writes,		  SHOW_LONG},
1050   {"data_written",
1051   (char*) &export_vars.innodb_data_written,		  SHOW_LONG},
1052   {"dblwr_pages_written",
1053   (char*) &export_vars.innodb_dblwr_pages_written,	  SHOW_LONG},
1054   {"dblwr_writes",
1055   (char*) &export_vars.innodb_dblwr_writes,		  SHOW_LONG},
1056   {"log_waits",
1057   (char*) &export_vars.innodb_log_waits,		  SHOW_LONG},
1058   {"log_write_requests",
1059   (char*) &export_vars.innodb_log_write_requests,	  SHOW_LONG},
1060   {"log_writes",
1061   (char*) &export_vars.innodb_log_writes,		  SHOW_LONG},
1062   {"os_log_fsyncs",
1063   (char*) &export_vars.innodb_os_log_fsyncs,		  SHOW_LONG},
1064   {"os_log_pending_fsyncs",
1065   (char*) &export_vars.innodb_os_log_pending_fsyncs,	  SHOW_LONG},
1066   {"os_log_pending_writes",
1067   (char*) &export_vars.innodb_os_log_pending_writes,	  SHOW_LONG},
1068   {"os_log_written",
1069   (char*) &export_vars.innodb_os_log_written,		  SHOW_LONGLONG},
1070   {"page_size",
1071   (char*) &export_vars.innodb_page_size,		  SHOW_LONG},
1072   {"pages_created",
1073   (char*) &export_vars.innodb_pages_created,		  SHOW_LONG},
1074   {"pages_read",
1075   (char*) &export_vars.innodb_pages_read,		  SHOW_LONG},
1076   {"pages0_read",
1077   (char*) &export_vars.innodb_page0_read,		  SHOW_LONG},
1078   {"pages_written",
1079   (char*) &export_vars.innodb_pages_written,		  SHOW_LONG},
1080   {"row_lock_current_waits",
1081   (char*) &export_vars.innodb_row_lock_current_waits,	  SHOW_LONG},
1082   {"row_lock_time",
1083   (char*) &export_vars.innodb_row_lock_time,		  SHOW_LONGLONG},
1084   {"row_lock_time_avg",
1085   (char*) &export_vars.innodb_row_lock_time_avg,	  SHOW_LONG},
1086   {"row_lock_time_max",
1087   (char*) &export_vars.innodb_row_lock_time_max,	  SHOW_LONG},
1088   {"row_lock_waits",
1089   (char*) &export_vars.innodb_row_lock_waits,		  SHOW_LONG},
1090   {"rows_deleted",
1091   (char*) &export_vars.innodb_rows_deleted,		  SHOW_LONG},
1092   {"rows_inserted",
1093   (char*) &export_vars.innodb_rows_inserted,		  SHOW_LONG},
1094   {"rows_read",
1095   (char*) &export_vars.innodb_rows_read,		  SHOW_LONG},
1096   {"rows_updated",
1097   (char*) &export_vars.innodb_rows_updated,		  SHOW_LONG},
1098   {"system_rows_deleted",
1099   (char*) &export_vars.innodb_system_rows_deleted, SHOW_LONG},
1100   {"system_rows_inserted",
1101   (char*) &export_vars.innodb_system_rows_inserted, SHOW_LONG},
1102   {"system_rows_read",
1103   (char*) &export_vars.innodb_system_rows_read, SHOW_LONG},
1104   {"system_rows_updated",
1105   (char*) &export_vars.innodb_system_rows_updated, SHOW_LONG},
1106   {"num_open_files",
1107   (char*) &export_vars.innodb_num_open_files,		  SHOW_LONG},
1108   {"truncated_status_writes",
1109   (char*) &export_vars.innodb_truncated_status_writes,	  SHOW_LONG},
1110   {"available_undo_logs",
1111   (char*) &export_vars.innodb_available_undo_logs,        SHOW_LONG},
1112   {"undo_truncations",
1113   (char*) &export_vars.innodb_undo_truncations,           SHOW_LONG},
1114 
1115   /* Status variables for page compression */
1116   {"page_compression_saved",
1117    (char*) &export_vars.innodb_page_compression_saved,    SHOW_LONGLONG},
1118   {"num_index_pages_written",
1119    (char*) &export_vars.innodb_index_pages_written,       SHOW_LONGLONG},
1120   {"num_non_index_pages_written",
1121    (char*) &export_vars.innodb_non_index_pages_written,       SHOW_LONGLONG},
1122   {"num_pages_page_compressed",
1123    (char*) &export_vars.innodb_pages_page_compressed,     SHOW_LONGLONG},
1124   {"num_page_compressed_trim_op",
1125    (char*) &export_vars.innodb_page_compressed_trim_op,     SHOW_LONGLONG},
1126   {"num_pages_page_decompressed",
1127    (char*) &export_vars.innodb_pages_page_decompressed,   SHOW_LONGLONG},
1128   {"num_pages_page_compression_error",
1129    (char*) &export_vars.innodb_pages_page_compression_error,   SHOW_LONGLONG},
1130   {"num_pages_encrypted",
1131    (char*) &export_vars.innodb_pages_encrypted,   SHOW_LONGLONG},
1132   {"num_pages_decrypted",
1133    (char*) &export_vars.innodb_pages_decrypted,   SHOW_LONGLONG},
1134   {"have_lz4",
1135   (char*) &innodb_have_lz4,		  SHOW_BOOL},
1136   {"have_lzo",
1137   (char*) &innodb_have_lzo,		  SHOW_BOOL},
1138   {"have_lzma",
1139   (char*) &innodb_have_lzma,		  SHOW_BOOL},
1140   {"have_bzip2",
1141   (char*) &innodb_have_bzip2,		  SHOW_BOOL},
1142   {"have_snappy",
1143   (char*) &innodb_have_snappy,		  SHOW_BOOL},
1144   {"have_punch_hole",
1145   (char*) &innodb_have_punch_hole,	  SHOW_BOOL},
1146 
1147   /* Defragmentation */
1148   {"defragment_compression_failures",
1149   (char*) &export_vars.innodb_defragment_compression_failures, SHOW_LONG},
1150   {"defragment_failures",
1151   (char*) &export_vars.innodb_defragment_failures, SHOW_LONG},
1152   {"defragment_count",
1153   (char*) &export_vars.innodb_defragment_count, SHOW_LONG},
1154 
1155   {"instant_alter_column",
1156   (char*) &export_vars.innodb_instant_alter_column, SHOW_LONG},
1157 
1158   /* Online alter table status variables */
1159   {"onlineddl_rowlog_rows",
1160   (char*) &export_vars.innodb_onlineddl_rowlog_rows, SHOW_LONG},
1161   {"onlineddl_rowlog_pct_used",
1162   (char*) &export_vars.innodb_onlineddl_rowlog_pct_used, SHOW_LONG},
1163   {"onlineddl_pct_progress",
1164   (char*) &export_vars.innodb_onlineddl_pct_progress, SHOW_LONG},
1165 
1166   /* Times secondary index lookup triggered cluster lookup and
1167   times prefix optimization avoided triggering cluster lookup */
1168   {"secondary_index_triggered_cluster_reads",
1169   (char*) &export_vars.innodb_sec_rec_cluster_reads,	  SHOW_LONG},
1170   {"secondary_index_triggered_cluster_reads_avoided",
1171   (char*) &export_vars.innodb_sec_rec_cluster_reads_avoided, SHOW_LONG},
1172 
1173   /* Encryption */
1174   {"encryption_rotation_pages_read_from_cache",
1175    (char*) &export_vars.innodb_encryption_rotation_pages_read_from_cache,
1176    SHOW_LONG},
1177   {"encryption_rotation_pages_read_from_disk",
1178   (char*) &export_vars.innodb_encryption_rotation_pages_read_from_disk,
1179    SHOW_LONG},
1180   {"encryption_rotation_pages_modified",
1181   (char*) &export_vars.innodb_encryption_rotation_pages_modified,
1182    SHOW_LONG},
1183   {"encryption_rotation_pages_flushed",
1184   (char*) &export_vars.innodb_encryption_rotation_pages_flushed,
1185    SHOW_LONG},
1186   {"encryption_rotation_estimated_iops",
1187   (char*) &export_vars.innodb_encryption_rotation_estimated_iops,
1188    SHOW_LONG},
1189   {"encryption_key_rotation_list_length",
1190   (char*)&export_vars.innodb_key_rotation_list_length,
1191    SHOW_LONGLONG},
1192   {"encryption_n_merge_blocks_encrypted",
1193   (char*)&export_vars.innodb_n_merge_blocks_encrypted,
1194    SHOW_LONGLONG},
1195   {"encryption_n_merge_blocks_decrypted",
1196   (char*)&export_vars.innodb_n_merge_blocks_decrypted,
1197    SHOW_LONGLONG},
1198   {"encryption_n_rowlog_blocks_encrypted",
1199   (char*)&export_vars.innodb_n_rowlog_blocks_encrypted,
1200    SHOW_LONGLONG},
1201   {"encryption_n_rowlog_blocks_decrypted",
1202   (char*)&export_vars.innodb_n_rowlog_blocks_decrypted,
1203    SHOW_LONGLONG},
1204   {"encryption_n_temp_blocks_encrypted",
1205   (char*)&export_vars.innodb_n_temp_blocks_encrypted,
1206    SHOW_LONGLONG},
1207   {"encryption_n_temp_blocks_decrypted",
1208   (char*)&export_vars.innodb_n_temp_blocks_decrypted,
1209    SHOW_LONGLONG},
1210 
1211   /* scrubing */
1212   {"scrub_background_page_reorganizations",
1213    (char*) &export_vars.innodb_scrub_page_reorganizations,
1214    SHOW_LONG},
1215   {"scrub_background_page_splits",
1216    (char*) &export_vars.innodb_scrub_page_splits,
1217    SHOW_LONG},
1218   {"scrub_background_page_split_failures_underflow",
1219    (char*) &export_vars.innodb_scrub_page_split_failures_underflow,
1220    SHOW_LONG},
1221   {"scrub_background_page_split_failures_out_of_filespace",
1222    (char*) &export_vars.innodb_scrub_page_split_failures_out_of_filespace,
1223    SHOW_LONG},
1224   {"scrub_background_page_split_failures_missing_index",
1225    (char*) &export_vars.innodb_scrub_page_split_failures_missing_index,
1226    SHOW_LONG},
1227   {"scrub_background_page_split_failures_unknown",
1228    (char*) &export_vars.innodb_scrub_page_split_failures_unknown,
1229    SHOW_LONG},
1230   {"scrub_log",
1231    (char*) &export_vars.innodb_scrub_log,
1232    SHOW_LONGLONG},
1233   {"encryption_num_key_requests",
1234    (char*) &export_vars.innodb_encryption_key_requests, SHOW_LONGLONG},
1235 
1236   {NullS, NullS, SHOW_LONG}
1237 };
1238 
1239 /*****************************************************************//**
1240 Frees a possible InnoDB trx object associated with the current THD.
1241 @return 0 or error number */
1242 static
1243 int
1244 innobase_close_connection(
1245 /*======================*/
1246 	handlerton*	hton,		/*!< in/out: InnoDB handlerton */
1247 	THD*		thd);		/*!< in: MySQL thread handle for
1248 					which to close the connection */
1249 
1250 /** Cancel any pending lock request associated with the current THD.
1251 @sa THD::awake() @sa ha_kill_query() */
1252 static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels);
1253 static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
1254 
1255 /*****************************************************************//**
1256 Commits a transaction in an InnoDB database or marks an SQL statement
1257 ended.
1258 @return 0 */
1259 static
1260 int
1261 innobase_commit(
1262 /*============*/
1263 	handlerton*	hton,		/*!< in/out: InnoDB handlerton */
1264 	THD*		thd,		/*!< in: MySQL thread handle of the
1265 					user for whom the transaction should
1266 					be committed */
1267 	bool		commit_trx);	/*!< in: true - commit transaction
1268 					false - the current SQL statement
1269 					ended */
1270 
1271 /*****************************************************************//**
1272 Rolls back a transaction to a savepoint.
1273 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
1274 given name */
1275 static
1276 int
1277 innobase_rollback(
1278 /*==============*/
1279 	handlerton*	hton,		/*!< in/out: InnoDB handlerton */
1280 	THD*		thd,		/*!< in: handle to the MySQL thread
1281 					of the user whose transaction should
1282 					be rolled back */
1283 	bool		rollback_trx);	/*!< in: TRUE - rollback entire
1284 					transaction FALSE - rollback the current
1285 					statement only */
1286 
1287 /*****************************************************************//**
1288 Rolls back a transaction to a savepoint.
1289 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
1290 given name */
1291 static
1292 int
1293 innobase_rollback_to_savepoint(
1294 /*===========================*/
1295 	handlerton*	hton,		/*!< in/out: InnoDB handlerton */
1296 	THD*		thd,		/*!< in: handle to the MySQL thread of
1297 					the user whose XA transaction should
1298 					be rolled back to savepoint */
1299 	void*		savepoint);	/*!< in: savepoint data */
1300 
1301 /*****************************************************************//**
1302 Check whether innodb state allows to safely release MDL locks after
1303 rollback to savepoint.
1304 @return true if it is safe, false if its not safe. */
1305 static
1306 bool
1307 innobase_rollback_to_savepoint_can_release_mdl(
1308 /*===========================================*/
1309 	handlerton*	hton,		/*!< in/out: InnoDB handlerton */
1310 	THD*		thd);		/*!< in: handle to the MySQL thread of
1311 					the user whose XA transaction should
1312 					be rolled back to savepoint */
1313 
1314 /*****************************************************************//**
1315 Sets a transaction savepoint.
1316 @return always 0, that is, always succeeds */
1317 static
1318 int
1319 innobase_savepoint(
1320 /*===============*/
1321 	handlerton*	hton,		/*!< in/out: InnoDB handlerton */
1322 	THD*		thd,		/*!< in: handle to the MySQL thread of
1323 					the user's XA transaction for which
1324 					we need to take a savepoint */
1325 	void*		savepoint);	/*!< in: savepoint data */
1326 
1327 /*****************************************************************//**
1328 Release transaction savepoint name.
1329 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
1330 given name */
1331 static
1332 int
1333 innobase_release_savepoint(
1334 /*=======================*/
1335 	handlerton*	hton,		/*!< in/out: handlerton for InnoDB */
1336 	THD*		thd,		/*!< in: handle to the MySQL thread
1337 					of the user whose transaction's
1338 					savepoint should be released */
1339 	void*		savepoint);	/*!< in: savepoint data */
1340 
1341 static void innobase_checkpoint_request(handlerton *hton, void *cookie);
1342 
1343 /** @brief Initialize the default value of innodb_commit_concurrency.
1344 
1345 Once InnoDB is running, the innodb_commit_concurrency must not change
1346 from zero to nonzero. (Bug #42101)
1347 
1348 The initial default value is 0, and without this extra initialization,
1349 SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter
1350 to 0, even if it was initially set to nonzero at the command line
1351 or configuration file. */
1352 static
1353 void
1354 innobase_commit_concurrency_init_default();
1355 /*=======================================*/
1356 
1357 /** @brief Adjust some InnoDB startup parameters based on file contents
1358 or innodb_page_size. */
1359 static
1360 void
1361 innodb_params_adjust();
1362 
1363 /*******************************************************************//**
1364 This function is used to prepare an X/Open XA distributed transaction.
1365 @return 0 or error number */
1366 static
1367 int
1368 innobase_xa_prepare(
1369 /*================*/
1370 	handlerton*	hton,		/*!< in: InnoDB handlerton */
1371 	THD*		thd,		/*!< in: handle to the MySQL thread of
1372 					the user whose XA transaction should
1373 					be prepared */
1374 	bool		all);		/*!< in: true - prepare transaction
1375 					false - the current SQL statement
1376 					ended */
1377 /*******************************************************************//**
1378 This function is used to recover X/Open XA distributed transactions.
1379 @return number of prepared transactions stored in xid_list */
1380 static
1381 int
1382 innobase_xa_recover(
1383 /*================*/
1384 	handlerton*	hton,		/*!< in: InnoDB handlerton */
1385 	XID*		xid_list,	/*!< in/out: prepared transactions */
1386 	uint		len);		/*!< in: number of slots in xid_list */
1387 /*******************************************************************//**
1388 This function is used to commit one X/Open XA distributed transaction
1389 which is in the prepared state
1390 @return 0 or error number */
1391 static
1392 int
1393 innobase_commit_by_xid(
1394 /*===================*/
1395 	handlerton*	hton,		/*!< in: InnoDB handlerton */
1396 	XID*		xid);		/*!< in: X/Open XA transaction
1397 					identification */
1398 /** Remove all tables in the named database inside InnoDB.
1399 @param[in]	hton	handlerton from InnoDB
1400 @param[in]	path	Database path; Inside InnoDB the name of the last
1401 directory in the path is used as the database name.
1402 For example, in 'mysql/data/test' the database name is 'test'. */
1403 static
1404 void
1405 innobase_drop_database(
1406 	handlerton*	hton,
1407 	char*		path);
1408 
1409 /** Shut down the InnoDB storage engine.
1410 @return	0 */
1411 static
1412 int
1413 innobase_end(handlerton*, ha_panic_function);
1414 
1415 /*****************************************************************//**
1416 Creates an InnoDB transaction struct for the thd if it does not yet have one.
1417 Starts a new InnoDB transaction if a transaction is not yet started. And
1418 assigns a new snapshot for a consistent read if the transaction does not yet
1419 have one.
1420 @return 0 */
1421 static
1422 int
1423 innobase_start_trx_and_assign_read_view(
1424 /*====================================*/
1425 	handlerton*	hton,		/* in: InnoDB handlerton */
1426 	THD*		thd);		/* in: MySQL thread handle of the
1427 					user for whom the transaction should
1428 					be committed */
1429 
1430 /** Flush InnoDB redo logs to the file system.
1431 @param[in]	hton			InnoDB handlerton
1432 @param[in]	binlog_group_flush	true if we got invoked by binlog
1433 group commit during flush stage, false in other cases.
1434 @return false */
1435 static
1436 bool
innobase_flush_logs(handlerton * hton,bool binlog_group_flush)1437 innobase_flush_logs(
1438 	handlerton*	hton,
1439 	bool		binlog_group_flush)
1440 {
1441 	DBUG_ENTER("innobase_flush_logs");
1442 	DBUG_ASSERT(hton == innodb_hton_ptr);
1443 
1444 	if (srv_read_only_mode) {
1445 		DBUG_RETURN(false);
1446 	}
1447 
1448 	/* If !binlog_group_flush, we got invoked by FLUSH LOGS or similar.
1449 	Else, we got invoked by binlog group commit during flush stage. */
1450 
1451 	if (binlog_group_flush && srv_flush_log_at_trx_commit == 0) {
1452 		/* innodb_flush_log_at_trx_commit=0
1453 		(write and sync once per second).
1454 		Do not flush the redo log during binlog group commit. */
1455 		DBUG_RETURN(false);
1456 	}
1457 
1458 	/* Flush the redo log buffer to the redo log file.
1459 	Sync it to disc if we are in FLUSH LOGS, or if
1460 	innodb_flush_log_at_trx_commit=1
1461 	(write and sync at each commit). */
1462 	log_buffer_flush_to_disk(!binlog_group_flush
1463 				 || srv_flush_log_at_trx_commit == 1);
1464 
1465 	DBUG_RETURN(false);
1466 }
1467 
1468 /** Flush InnoDB redo logs to the file system.
1469 @param[in]	hton			InnoDB handlerton
1470 @param[in]	binlog_group_flush	true if we got invoked by binlog
1471 group commit during flush stage, false in other cases.
1472 @return false */
1473 static
1474 bool
innobase_flush_logs(handlerton * hton)1475 innobase_flush_logs(
1476 	handlerton*	hton)
1477 {
1478 	return innobase_flush_logs(hton, true);
1479 }
1480 
1481 /************************************************************************//**
1482 Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the
1483 InnoDB Monitor to the client.
1484 @return 0 on success */
1485 static
1486 int
1487 innodb_show_status(
1488 /*===============*/
1489 	handlerton*	hton,		/*!< in: the innodb handlerton */
1490 	THD*		thd,		/*!< in: the MySQL query thread of
1491 					the caller */
1492 	stat_print_fn*	stat_print);
1493 /************************************************************************//**
1494 Return 0 on success and non-zero on failure. Note: the bool return type
1495 seems to be abused here, should be an int. */
1496 static
1497 bool
1498 innobase_show_status(
1499 /*=================*/
1500 	handlerton*		hton,	/*!< in: the innodb handlerton */
1501 	THD*			thd,	/*!< in: the MySQL query thread of
1502 					the caller */
1503 	stat_print_fn*		stat_print,
1504 	enum ha_stat_type	stat_type);
1505 
1506 /****************************************************************//**
1507 Parse and enable InnoDB monitor counters during server startup.
1508 User can enable monitor counters/groups by specifying
1509 "loose-innodb_monitor_enable = monitor_name1;monitor_name2..."
1510 in server configuration file or at the command line. */
1511 static
1512 void
1513 innodb_enable_monitor_at_startup(
1514 /*=============================*/
1515 	char*	str);	/*!< in: monitor counter enable list */
1516 
1517 #ifdef MYSQL_STORE_FTS_DOC_ID
1518 /** Store doc_id value into FTS_DOC_ID field
1519 @param[in,out]	tbl	table containing FULLTEXT index
1520 @param[in]	doc_id	FTS_DOC_ID value */
1521 static
1522 void
innobase_fts_store_docid(TABLE * tbl,ulonglong doc_id)1523 innobase_fts_store_docid(
1524 	TABLE*		tbl,
1525 	ulonglong	doc_id)
1526 {
1527 	my_bitmap_map*	old_map
1528 		= dbug_tmp_use_all_columns(tbl, tbl->write_set);
1529 
1530 	tbl->fts_doc_id_field->store(static_cast<longlong>(doc_id), true);
1531 
1532 	dbug_tmp_restore_column_map(tbl->write_set, old_map);
1533 }
1534 #endif
1535 
1536 /*************************************************************//**
1537 Check for a valid value of innobase_commit_concurrency.
1538 @return 0 for valid innodb_commit_concurrency */
1539 static
1540 int
innobase_commit_concurrency_validate(THD *,st_mysql_sys_var *,void * save,struct st_mysql_value * value)1541 innobase_commit_concurrency_validate(
1542 /*=================================*/
1543 	THD*, st_mysql_sys_var*,
1544 	void*				save,	/*!< out: immediate result
1545 						for update function */
1546 	struct st_mysql_value*		value)	/*!< in: incoming string */
1547 {
1548 	long long	intbuf;
1549 	ulong		commit_concurrency;
1550 
1551 	DBUG_ENTER("innobase_commit_concurrency_validate");
1552 
1553 	if (value->val_int(value, &intbuf)) {
1554 		/* The value is NULL. That is invalid. */
1555 		DBUG_RETURN(1);
1556 	}
1557 
1558 	*reinterpret_cast<ulong*>(save) = commit_concurrency
1559 		= static_cast<ulong>(intbuf);
1560 
1561 	/* Allow the value to be updated, as long as it remains zero
1562 	or nonzero. */
1563 	DBUG_RETURN(!(!commit_concurrency == !innobase_commit_concurrency));
1564 }
1565 
1566 /*******************************************************************//**
1567 Function for constructing an InnoDB table handler instance. */
1568 static
1569 handler*
innobase_create_handler(handlerton * hton,TABLE_SHARE * table,MEM_ROOT * mem_root)1570 innobase_create_handler(
1571 /*====================*/
1572 	handlerton*	hton,	/*!< in: InnoDB handlerton */
1573 	TABLE_SHARE*	table,
1574 	MEM_ROOT*	mem_root)
1575 {
1576 	return(new (mem_root) ha_innobase(hton, table));
1577 }
1578 
1579 /* General functions */
1580 
1581 /** Check that a page_size is correct for InnoDB.
1582 If correct, set the associated page_size_shift which is the power of 2
1583 for this page size.
1584 @param[in]	page_size	Page Size to evaluate
1585 @return an associated page_size_shift if valid, 0 if invalid. */
1586 inline
1587 ulong
innodb_page_size_validate(ulong page_size)1588 innodb_page_size_validate(
1589 	ulong	page_size)
1590 {
1591 	ulong		n;
1592 
1593 	DBUG_ENTER("innodb_page_size_validate");
1594 
1595 	for (n = UNIV_PAGE_SIZE_SHIFT_MIN;
1596 	     n <= UNIV_PAGE_SIZE_SHIFT_MAX;
1597 	     n++) {
1598 		if (page_size == static_cast<ulong>(1 << n)) {
1599 			DBUG_RETURN(n);
1600 		}
1601 	}
1602 
1603 	DBUG_RETURN(0);
1604 }
1605 
1606 /******************************************************************//**
1607 Returns true if the thread is the replication thread on the slave
1608 server. Used in srv_conc_enter_innodb() to determine if the thread
1609 should be allowed to enter InnoDB - the replication thread is treated
1610 differently than other threads. Also used in
1611 srv_conc_force_exit_innodb().
1612 @return true if thd is the replication thread */
1613 ibool
thd_is_replication_slave_thread(THD * thd)1614 thd_is_replication_slave_thread(
1615 /*============================*/
1616 	THD*	thd)	/*!< in: thread handle */
1617 {
1618 	return thd && ((ibool) thd_slave_thread(thd));
1619 }
1620 
1621 /******************************************************************//**
1622 Returns true if transaction should be flagged as read-only.
1623 @return true if the thd is marked as read-only */
1624 bool
thd_trx_is_read_only(THD * thd)1625 thd_trx_is_read_only(
1626 /*=================*/
1627 	THD*	thd)	/*!< in: thread handle */
1628 {
1629 	return(thd != 0 && thd_tx_is_read_only(thd));
1630 }
1631 
1632 static MYSQL_THDVAR_BOOL(background_thread,
1633 			 PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_NOSYSVAR,
1634 			 "Internal (not user visible) flag to mark "
1635 			 "background purge threads", NULL, NULL, 0);
1636 
1637 /** Create a MYSQL_THD for a background thread and mark it as such.
1638 @param name thread info for SHOW PROCESSLIST
1639 @return new MYSQL_THD */
1640 MYSQL_THD
innobase_create_background_thd(const char * name)1641 innobase_create_background_thd(const char* name)
1642 /*============================*/
1643 {
1644 	MYSQL_THD thd= create_thd();
1645 	thd_proc_info(thd, name);
1646 	THDVAR(thd, background_thread) = true;
1647 	return thd;
1648 }
1649 
1650 
1651 /** Destroy a background purge thread THD.
1652 @param[in]	thd	MYSQL_THD to destroy */
1653 void
innobase_destroy_background_thd(MYSQL_THD thd)1654 innobase_destroy_background_thd(
1655 /*============================*/
1656 	MYSQL_THD thd)
1657 {
1658 	/* need to close the connection explicitly, the server won't do it
1659 	if innodb is in the PLUGIN_IS_DYING state */
1660 	innobase_close_connection(innodb_hton_ptr, thd);
1661 	thd_set_ha_data(thd, innodb_hton_ptr, NULL);
1662 	destroy_thd(thd);
1663 }
1664 
1665 /** Close opened tables, free memory, delete items for a MYSQL_THD.
1666 @param[in]	thd	MYSQL_THD to reset */
1667 void
innobase_reset_background_thd(MYSQL_THD thd)1668 innobase_reset_background_thd(MYSQL_THD thd)
1669 {
1670 	if (!thd) {
1671 		thd = current_thd;
1672 	}
1673 
1674 	ut_ad(thd);
1675 	ut_ad(THDVAR(thd, background_thread));
1676 
1677 	/* background purge thread */
1678 	const char *proc_info= thd_proc_info(thd, "reset");
1679 	reset_thd(thd);
1680 	thd_proc_info(thd, proc_info);
1681 }
1682 
1683 
1684 /******************************************************************//**
1685 Check if the transaction is an auto-commit transaction. TRUE also
1686 implies that it is a SELECT (read-only) transaction.
1687 @return true if the transaction is an auto commit read-only transaction. */
1688 ibool
thd_trx_is_auto_commit(THD * thd)1689 thd_trx_is_auto_commit(
1690 /*===================*/
1691 	THD*	thd)	/*!< in: thread handle, can be NULL */
1692 {
1693 	return(thd != NULL
1694 	       && !thd_test_options(
1695 		       thd,
1696 		       OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)
1697 	       && thd_is_select(thd));
1698 }
1699 
1700 /** Enter InnoDB engine after checking the max number of user threads
1701 allowed, else the thread is put into sleep.
1702 @param[in,out]	prebuilt	row prebuilt handler */
innobase_srv_conc_enter_innodb(row_prebuilt_t * prebuilt)1703 static inline void innobase_srv_conc_enter_innodb(row_prebuilt_t *prebuilt)
1704 {
1705 	trx_t* trx = prebuilt->trx;
1706 
1707 #ifdef WITH_WSREP
1708 	if (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return;
1709 #endif /* WITH_WSREP */
1710 
1711 	if (srv_thread_concurrency) {
1712 		if (trx->n_tickets_to_enter_innodb > 0) {
1713 
1714 			/* If trx has 'free tickets' to enter the engine left,
1715 			then use one such ticket */
1716 
1717 			--trx->n_tickets_to_enter_innodb;
1718 
1719 		} else if (trx->mysql_thd != NULL
1720 			   && thd_is_replication_slave_thread(trx->mysql_thd)) {
1721 			const ulonglong end = my_interval_timer()
1722 				+ ulonglong(srv_replication_delay) * 1000000;
1723 			while ((srv_conc_get_active_threads()
1724 			        >= srv_thread_concurrency)
1725 			       && my_interval_timer() < end) {
1726 				os_thread_sleep(2000 /* 2 ms */);
1727 			}
1728 		} else {
1729 			srv_conc_enter_innodb(prebuilt);
1730 		}
1731 	}
1732 }
1733 
1734 /** Note that the thread wants to leave InnoDB only if it doesn't have
1735 any spare tickets.
1736 @param[in,out]	m_prebuilt	row prebuilt handler */
innobase_srv_conc_exit_innodb(row_prebuilt_t * prebuilt)1737 static inline void innobase_srv_conc_exit_innodb(row_prebuilt_t *prebuilt)
1738 {
1739 	ut_ad(!sync_check_iterate(sync_check()));
1740 
1741 	trx_t* trx = prebuilt->trx;
1742 
1743 #ifdef WITH_WSREP
1744 	if (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return;
1745 #endif /* WITH_WSREP */
1746 
1747 	/* This is to avoid making an unnecessary function call. */
1748 	if (trx->declared_to_be_inside_innodb
1749 	    && trx->n_tickets_to_enter_innodb == 0) {
1750 
1751 		srv_conc_force_exit_innodb(trx);
1752 	}
1753 }
1754 
1755 /******************************************************************//**
1756 Force a thread to leave InnoDB even if it has spare tickets. */
1757 static inline
1758 void
innobase_srv_conc_force_exit_innodb(trx_t * trx)1759 innobase_srv_conc_force_exit_innodb(
1760 /*================================*/
1761 	trx_t*	trx)	/*!< in: transaction handle */
1762 {
1763 	ut_ad(!sync_check_iterate(sync_check()));
1764 
1765 	/* This is to avoid making an unnecessary function call. */
1766 	if (trx->declared_to_be_inside_innodb) {
1767 		srv_conc_force_exit_innodb(trx);
1768 	}
1769 }
1770 
1771 /******************************************************************//**
1772 Returns the NUL terminated value of glob_hostname.
1773 @return pointer to glob_hostname. */
1774 const char*
server_get_hostname()1775 server_get_hostname()
1776 /*=================*/
1777 {
1778 	return(glob_hostname);
1779 }
1780 
1781 /******************************************************************//**
1782 Returns true if the transaction this thread is processing has edited
1783 non-transactional tables. Used by the deadlock detector when deciding
1784 which transaction to rollback in case of a deadlock - we try to avoid
1785 rolling back transactions that have edited non-transactional tables.
1786 @return true if non-transactional tables have been edited */
1787 ibool
thd_has_edited_nontrans_tables(THD * thd)1788 thd_has_edited_nontrans_tables(
1789 /*===========================*/
1790 	THD*	thd)	/*!< in: thread handle */
1791 {
1792 	return((ibool) thd_non_transactional_update(thd));
1793 }
1794 
1795 /* Return high resolution timestamp for the start of the current query */
1796 UNIV_INTERN
1797 unsigned long long
thd_query_start_micro(const THD * thd)1798 thd_query_start_micro(
1799 	const THD*	thd)	/*!< in: thread handle */
1800 {
1801 	return thd_start_utime(thd);
1802 }
1803 
1804 /******************************************************************//**
1805 Returns true if the thread is executing a SELECT statement.
1806 @return true if thd is executing SELECT */
1807 ibool
thd_is_select(const THD * thd)1808 thd_is_select(
1809 /*==========*/
1810 	const THD*	thd)	/*!< in: thread handle */
1811 {
1812 	return(thd_sql_command(thd) == SQLCOM_SELECT);
1813 }
1814 
1815 /******************************************************************//**
1816 Returns the lock wait timeout for the current connection.
1817 @return the lock wait timeout, in seconds */
1818 ulong
thd_lock_wait_timeout(THD * thd)1819 thd_lock_wait_timeout(
1820 /*==================*/
1821 	THD*	thd)	/*!< in: thread handle, or NULL to query
1822 			the global innodb_lock_wait_timeout */
1823 {
1824 	/* According to <mysql/plugin.h>, passing thd == NULL
1825 	returns the global value of the session variable. */
1826 	return(THDVAR(thd, lock_wait_timeout));
1827 }
1828 
1829 /** Get the value of innodb_tmpdir.
1830 @param[in]	thd	thread handle, or NULL to query
1831 			the global innodb_tmpdir.
1832 @retval NULL if innodb_tmpdir="" */
1833 const char*
thd_innodb_tmpdir(THD * thd)1834 thd_innodb_tmpdir(
1835 	THD*	thd)
1836 {
1837 	ut_ad(!sync_check_iterate(sync_check()));
1838 
1839 	const char*	tmp_dir = THDVAR(thd, tmpdir);
1840 
1841 	if (tmp_dir != NULL && *tmp_dir == '\0') {
1842 		tmp_dir = NULL;
1843 	}
1844 
1845 	return(tmp_dir);
1846 }
1847 
1848 /** Obtain the InnoDB transaction of a MySQL thread.
1849 @param[in,out]	thd	thread handle
1850 @return reference to transaction pointer */
thd_to_trx(THD * thd)1851 static trx_t* thd_to_trx(THD* thd)
1852 {
1853 	return reinterpret_cast<trx_t*>(thd_get_ha_data(thd, innodb_hton_ptr));
1854 }
1855 
1856 #ifdef WITH_WSREP
1857 /********************************************************************//**
1858 Obtain the InnoDB transaction id of a MySQL thread.
1859 @return	transaction id */
1860 __attribute__((warn_unused_result, nonnull))
1861 ulonglong
thd_to_trx_id(THD * thd)1862 thd_to_trx_id(
1863 	THD*	thd)	/*!< in: MySQL thread */
1864 {
1865 	return(thd_to_trx(thd)->id);
1866 }
1867 #endif /* WITH_WSREP */
1868 
1869 /********************************************************************//**
1870 Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
1871 time calls srv_active_wake_master_thread. This function should be used
1872 when a single database operation may introduce a small need for
1873 server utility activity, like checkpointing. */
1874 inline
1875 void
innobase_active_small(void)1876 innobase_active_small(void)
1877 /*=======================*/
1878 {
1879 	innobase_active_counter++;
1880 
1881 	if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
1882 		srv_active_wake_master_thread();
1883 	}
1884 }
1885 
1886 /********************************************************************//**
1887 Converts an InnoDB error code to a MySQL error code and also tells to MySQL
1888 about a possible transaction rollback inside InnoDB caused by a lock wait
1889 timeout or a deadlock.
1890 @return MySQL error code */
1891 static int
convert_error_code_to_mysql(dberr_t error,ulint flags,THD * thd)1892 convert_error_code_to_mysql(
1893 /*========================*/
1894 	dberr_t	error,	/*!< in: InnoDB error code */
1895 	ulint	flags,  /*!< in: InnoDB table flags, or 0 */
1896 	THD*	thd)	/*!< in: user thread handle or NULL */
1897 {
1898 	switch (error) {
1899 	case DB_SUCCESS:
1900 		return(0);
1901 
1902 	case DB_INTERRUPTED:
1903 		return(HA_ERR_ABORTED_BY_USER);
1904 
1905 	case DB_FOREIGN_EXCEED_MAX_CASCADE:
1906 		ut_ad(thd);
1907 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1908 				    HA_ERR_ROW_IS_REFERENCED,
1909 				    "InnoDB: Cannot delete/update "
1910 				    "rows with cascading foreign key "
1911 				    "constraints that exceed max "
1912 				    "depth of %d. Please "
1913 				    "drop extra constraints and try "
1914 				    "again", DICT_FK_MAX_RECURSIVE_LOAD);
1915 		return(HA_ERR_FK_DEPTH_EXCEEDED);
1916 
1917 	case DB_CANT_CREATE_GEOMETRY_OBJECT:
1918 		my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, MYF(0));
1919 		return(HA_ERR_NULL_IN_SPATIAL);
1920 
1921 	case DB_ERROR:
1922 	default:
1923 		return(HA_ERR_GENERIC); /* unspecified error */
1924 
1925 	case DB_DUPLICATE_KEY:
1926 		/* Be cautious with returning this error, since
1927 		mysql could re-enter the storage layer to get
1928 		duplicated key info, the operation requires a
1929 		valid table handle and/or transaction information,
1930 		which might not always be available in the error
1931 		handling stage. */
1932 		return(HA_ERR_FOUND_DUPP_KEY);
1933 
1934 	case DB_READ_ONLY:
1935 		return(HA_ERR_TABLE_READONLY);
1936 
1937 	case DB_FOREIGN_DUPLICATE_KEY:
1938 		return(HA_ERR_FOREIGN_DUPLICATE_KEY);
1939 
1940 	case DB_MISSING_HISTORY:
1941 		return(HA_ERR_TABLE_DEF_CHANGED);
1942 
1943 	case DB_RECORD_NOT_FOUND:
1944 		return(HA_ERR_NO_ACTIVE_RECORD);
1945 
1946 	case DB_DEADLOCK:
1947 		/* Since we rolled back the whole transaction, we must
1948 		tell it also to MySQL so that MySQL knows to empty the
1949 		cached binlog for this transaction */
1950 
1951 		if (thd != NULL) {
1952 			thd_mark_transaction_to_rollback(thd, 1);
1953 		}
1954 
1955 		return(HA_ERR_LOCK_DEADLOCK);
1956 
1957 	case DB_LOCK_WAIT_TIMEOUT:
1958 		/* Starting from 5.0.13, we let MySQL just roll back the
1959 		latest SQL statement in a lock wait timeout. Previously, we
1960 		rolled back the whole transaction. */
1961 
1962 		if (thd) {
1963 			thd_mark_transaction_to_rollback(
1964 				thd, (bool) row_rollback_on_timeout);
1965 		}
1966 
1967 		return(HA_ERR_LOCK_WAIT_TIMEOUT);
1968 
1969 	case DB_NO_REFERENCED_ROW:
1970 		return(HA_ERR_NO_REFERENCED_ROW);
1971 
1972 	case DB_ROW_IS_REFERENCED:
1973 		return(HA_ERR_ROW_IS_REFERENCED);
1974 
1975 	case DB_NO_FK_ON_S_BASE_COL:
1976 	case DB_CANNOT_ADD_CONSTRAINT:
1977 	case DB_CHILD_NO_INDEX:
1978 	case DB_PARENT_NO_INDEX:
1979 		return(HA_ERR_CANNOT_ADD_FOREIGN);
1980 
1981 	case DB_CANNOT_DROP_CONSTRAINT:
1982 
1983 		return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
1984 						misleading, a new MySQL error
1985 						code should be introduced */
1986 
1987 	case DB_CORRUPTION:
1988 		return(HA_ERR_CRASHED);
1989 
1990 	case DB_OUT_OF_FILE_SPACE:
1991 		return(HA_ERR_RECORD_FILE_FULL);
1992 
1993 	case DB_TEMP_FILE_WRITE_FAIL:
1994 		my_error(ER_GET_ERRMSG, MYF(0),
1995                          DB_TEMP_FILE_WRITE_FAIL,
1996                          ut_strerr(DB_TEMP_FILE_WRITE_FAIL),
1997                          "InnoDB");
1998 		return(HA_ERR_INTERNAL_ERROR);
1999 
2000 	case DB_TABLE_IN_FK_CHECK:
2001 		return(HA_ERR_TABLE_IN_FK_CHECK);
2002 
2003 	case DB_TABLE_IS_BEING_USED:
2004 		return(HA_ERR_WRONG_COMMAND);
2005 
2006 	case DB_TABLE_NOT_FOUND:
2007 		return(HA_ERR_NO_SUCH_TABLE);
2008 
2009 	case DB_DECRYPTION_FAILED:
2010 		return(HA_ERR_DECRYPTION_FAILED);
2011 
2012 	case DB_TABLESPACE_NOT_FOUND:
2013 		return(HA_ERR_TABLESPACE_MISSING);
2014 
2015 	case DB_TOO_BIG_RECORD: {
2016 		/* If prefix is true then a 768-byte prefix is stored
2017 		locally for BLOB fields. Refer to dict_table_get_format().
2018 		We limit max record size to 16k for 64k page size. */
2019 		bool prefix = !DICT_TF_HAS_ATOMIC_BLOBS(flags);
2020 		bool comp = !!(flags & DICT_TF_COMPACT);
2021 		ulint free_space = page_get_free_space_of_empty(comp) / 2;
2022 
2023 		if (free_space >= ulint(comp ? COMPRESSED_REC_MAX_DATA_SIZE :
2024 				          REDUNDANT_REC_MAX_DATA_SIZE)) {
2025 			free_space = (comp ? COMPRESSED_REC_MAX_DATA_SIZE :
2026 				REDUNDANT_REC_MAX_DATA_SIZE) - 1;
2027 		}
2028 
2029 		my_printf_error(ER_TOO_BIG_ROWSIZE,
2030 			"Row size too large (> " ULINTPF "). Changing some columns "
2031 			"to TEXT or BLOB %smay help. In current row "
2032 			"format, BLOB prefix of %d bytes is stored inline.",
2033 			MYF(0),
2034 			free_space,
2035 			prefix
2036 			? "or using ROW_FORMAT=DYNAMIC or"
2037 			  " ROW_FORMAT=COMPRESSED "
2038 			: "",
2039 			prefix
2040 			? DICT_MAX_FIXED_COL_LEN
2041 			: 0);
2042 		return(HA_ERR_TO_BIG_ROW);
2043 	}
2044 
2045 	case DB_TOO_BIG_INDEX_COL:
2046 		my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
2047 			 (ulong) DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags));
2048 		return(HA_ERR_INDEX_COL_TOO_LONG);
2049 
2050 	case DB_NO_SAVEPOINT:
2051 		return(HA_ERR_NO_SAVEPOINT);
2052 
2053 	case DB_LOCK_TABLE_FULL:
2054 		/* Since we rolled back the whole transaction, we must
2055 		tell it also to MySQL so that MySQL knows to empty the
2056 		cached binlog for this transaction */
2057 
2058 		if (thd) {
2059 			thd_mark_transaction_to_rollback(thd, 1);
2060 		}
2061 
2062 		return(HA_ERR_LOCK_TABLE_FULL);
2063 
2064 	case DB_FTS_INVALID_DOCID:
2065 		return(HA_FTS_INVALID_DOCID);
2066 	case DB_FTS_EXCEED_RESULT_CACHE_LIMIT:
2067 		return(HA_ERR_OUT_OF_MEM);
2068 	case DB_TOO_MANY_CONCURRENT_TRXS:
2069 		return(HA_ERR_TOO_MANY_CONCURRENT_TRXS);
2070 	case DB_UNSUPPORTED:
2071 		return(HA_ERR_UNSUPPORTED);
2072 	case DB_INDEX_CORRUPT:
2073 		return(HA_ERR_INDEX_CORRUPT);
2074 	case DB_UNDO_RECORD_TOO_BIG:
2075 		return(HA_ERR_UNDO_REC_TOO_BIG);
2076 	case DB_OUT_OF_MEMORY:
2077 		return(HA_ERR_OUT_OF_MEM);
2078 	case DB_TABLESPACE_EXISTS:
2079 		return(HA_ERR_TABLESPACE_EXISTS);
2080 	case DB_TABLESPACE_DELETED:
2081 		return(HA_ERR_TABLESPACE_MISSING);
2082 	case DB_IDENTIFIER_TOO_LONG:
2083 		return(HA_ERR_INTERNAL_ERROR);
2084 	case DB_TABLE_CORRUPT:
2085 		return(HA_ERR_TABLE_CORRUPT);
2086 	case DB_FTS_TOO_MANY_WORDS_IN_PHRASE:
2087 		return(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE);
2088 	case DB_COMPUTE_VALUE_FAILED:
2089 		return(HA_ERR_GENERIC); // impossible
2090 	}
2091 }
2092 
2093 /*************************************************************//**
2094 Prints info of a THD object (== user session thread) to the given file. */
2095 void
innobase_mysql_print_thd(FILE * f,THD * thd,uint max_query_len)2096 innobase_mysql_print_thd(
2097 /*=====================*/
2098 	FILE*	f,		/*!< in: output stream */
2099 	THD*	thd,		/*!< in: MySQL THD object */
2100 	uint	max_query_len)	/*!< in: max query length to print, or 0 to
2101 				use the default max length */
2102 {
2103 	char	buffer[1024];
2104 
2105 	fputs(thd_get_error_context_description(thd, buffer, sizeof buffer,
2106 						max_query_len), f);
2107 	putc('\n', f);
2108 }
2109 
2110 /******************************************************************//**
2111 Get the variable length bounds of the given character set. */
2112 void
innobase_get_cset_width(ulint cset,ulint * mbminlen,ulint * mbmaxlen)2113 innobase_get_cset_width(
2114 /*====================*/
2115 	ulint	cset,		/*!< in: MySQL charset-collation code */
2116 	ulint*	mbminlen,	/*!< out: minimum length of a char (in bytes) */
2117 	ulint*	mbmaxlen)	/*!< out: maximum length of a char (in bytes) */
2118 {
2119 	CHARSET_INFO*	cs;
2120 	ut_ad(cset <= MAX_CHAR_COLL_NUM);
2121 	ut_ad(mbminlen);
2122 	ut_ad(mbmaxlen);
2123 
2124 	cs = all_charsets[cset];
2125 	if (cs) {
2126 		*mbminlen = cs->mbminlen;
2127 		*mbmaxlen = cs->mbmaxlen;
2128 		ut_ad(*mbminlen < DATA_MBMAX);
2129 		ut_ad(*mbmaxlen < DATA_MBMAX);
2130 	} else {
2131 		THD*	thd = current_thd;
2132 
2133 		if (thd && thd_sql_command(thd) == SQLCOM_DROP_TABLE) {
2134 
2135 			/* Fix bug#46256: allow tables to be dropped if the
2136 			collation is not found, but issue a warning. */
2137 			if (cset != 0) {
2138 
2139 				sql_print_warning(
2140 					"Unknown collation #" ULINTPF ".",
2141 					cset);
2142 			}
2143 		} else {
2144 
2145 			ut_a(cset == 0);
2146 		}
2147 
2148 		*mbminlen = *mbmaxlen = 0;
2149 	}
2150 }
2151 
2152 /******************************************************************//**
2153 Converts an identifier to a table name. */
2154 void
innobase_convert_from_table_id(CHARSET_INFO * cs,char * to,const char * from,ulint len)2155 innobase_convert_from_table_id(
2156 /*===========================*/
2157 	CHARSET_INFO*	cs,	/*!< in: the 'from' character set */
2158 	char*		to,	/*!< out: converted identifier */
2159 	const char*	from,	/*!< in: identifier to convert */
2160 	ulint		len)	/*!< in: length of 'to', in bytes */
2161 {
2162 	uint	errors;
2163 
2164 	strconvert(cs, from, FN_REFLEN, &my_charset_filename, to, (uint) len, &errors);
2165 }
2166 
2167 /**********************************************************************
2168 Check if the length of the identifier exceeds the maximum allowed.
2169 return true when length of identifier is too long. */
2170 my_bool
innobase_check_identifier_length(const char * id)2171 innobase_check_identifier_length(
2172 /*=============================*/
2173 	const char*	id)	/* in: FK identifier to check excluding the
2174 				database portion. */
2175 {
2176 	int		well_formed_error = 0;
2177 	CHARSET_INFO	*cs = system_charset_info;
2178 	DBUG_ENTER("innobase_check_identifier_length");
2179 
2180 	size_t len = my_well_formed_length(
2181 		cs, id, id + strlen(id),
2182 		NAME_CHAR_LEN, &well_formed_error);
2183 
2184 	if (well_formed_error || len == NAME_CHAR_LEN) {
2185 		my_error(ER_TOO_LONG_IDENT, MYF(0), id);
2186 		DBUG_RETURN(true);
2187 	}
2188 	DBUG_RETURN(false);
2189 }
2190 
2191 /******************************************************************//**
2192 Converts an identifier to UTF-8. */
2193 void
innobase_convert_from_id(CHARSET_INFO * cs,char * to,const char * from,ulint len)2194 innobase_convert_from_id(
2195 /*=====================*/
2196 	CHARSET_INFO*	cs,	/*!< in: the 'from' character set */
2197 	char*		to,	/*!< out: converted identifier */
2198 	const char*	from,	/*!< in: identifier to convert */
2199 	ulint		len)	/*!< in: length of 'to', in bytes */
2200 {
2201 	uint	errors;
2202 
2203 	strconvert(cs, from, FN_REFLEN, system_charset_info, to, (uint) len, &errors);
2204 }
2205 
2206 /******************************************************************//**
2207 Compares NUL-terminated UTF-8 strings case insensitively.
2208 @return 0 if a=b, <0 if a<b, >1 if a>b */
2209 int
innobase_strcasecmp(const char * a,const char * b)2210 innobase_strcasecmp(
2211 /*================*/
2212 	const char*	a,	/*!< in: first string to compare */
2213 	const char*	b)	/*!< in: second string to compare */
2214 {
2215 	if (!a) {
2216 		if (!b) {
2217 			return(0);
2218 		} else {
2219 			return(-1);
2220 		}
2221 	} else if (!b) {
2222 		return(1);
2223 	}
2224 
2225 	return(my_strcasecmp(system_charset_info, a, b));
2226 }
2227 
2228 /******************************************************************//**
2229 Compares NUL-terminated UTF-8 strings case insensitively. The
2230 second string contains wildcards.
2231 @return 0 if a match is found, 1 if not */
2232 static
2233 int
innobase_wildcasecmp(const char * a,const char * b)2234 innobase_wildcasecmp(
2235 /*=================*/
2236 	const char*	a,	/*!< in: string to compare */
2237 	const char*	b)	/*!< in: wildcard string to compare */
2238 {
2239 	return(wild_case_compare(system_charset_info, a, b));
2240 }
2241 
2242 /** Strip dir name from a full path name and return only the file name
2243 @param[in]	path_name	full path name
2244 @return file name or "null" if no file name */
2245 const char*
innobase_basename(const char * path_name)2246 innobase_basename(
2247 	const char*	path_name)
2248 {
2249 	const char*	name = base_name(path_name);
2250 
2251 	return((name) ? name : "null");
2252 }
2253 
2254 /******************************************************************//**
2255 Makes all characters in a NUL-terminated UTF-8 string lower case. */
2256 void
innobase_casedn_str(char * a)2257 innobase_casedn_str(
2258 /*================*/
2259 	char*	a)	/*!< in/out: string to put in lower case */
2260 {
2261 	my_casedn_str(system_charset_info, a);
2262 }
2263 
2264 /** Determines the current SQL statement.
2265 Thread unsafe, can only be called from the thread owning the THD.
2266 @param[in]	thd	MySQL thread handle
2267 @param[out]	length	Length of the SQL statement
2268 @return			SQL statement string */
2269 const char*
innobase_get_stmt_unsafe(THD * thd,size_t * length)2270 innobase_get_stmt_unsafe(
2271 	THD*	thd,
2272 	size_t*	length)
2273 {
2274 	if (const LEX_STRING *stmt = thd_query_string(thd)) {
2275 		*length = stmt->length;
2276 		return stmt->str;
2277 	}
2278 
2279 	*length = 0;
2280 	return NULL;
2281 }
2282 
2283 /**********************************************************************//**
2284 Get the current setting of the tdc_size global parameter. We do
2285 a dirty read because for one there is no synchronization object and
2286 secondly there is little harm in doing so even if we get a torn read.
2287 @return	value of tdc_size */
2288 ulint
innobase_get_table_cache_size(void)2289 innobase_get_table_cache_size(void)
2290 /*===============================*/
2291 {
2292 	return(tdc_size);
2293 }
2294 
2295 /**********************************************************************//**
2296 Get the current setting of the lower_case_table_names global parameter from
2297 mysqld.cc. We do a dirty read because for one there is no synchronization
2298 object and secondly there is little harm in doing so even if we get a torn
2299 read.
2300 @return value of lower_case_table_names */
2301 ulint
innobase_get_lower_case_table_names(void)2302 innobase_get_lower_case_table_names(void)
2303 /*=====================================*/
2304 {
2305 	return(lower_case_table_names);
2306 }
2307 
2308 /**
2309   Test a file path whether it is same as mysql data directory path.
2310 
2311   @param path null terminated character string
2312 
2313   @return
2314     @retval TRUE The path is different from mysql data directory.
2315     @retval FALSE The path is same as mysql data directory.
2316 */
is_mysql_datadir_path(const char * path)2317 static bool is_mysql_datadir_path(const char *path)
2318 {
2319   if (path == NULL)
2320     return false;
2321 
2322   char mysql_data_dir[FN_REFLEN], path_dir[FN_REFLEN];
2323   convert_dirname(path_dir, path, NullS);
2324   convert_dirname(mysql_data_dir, mysql_unpacked_real_data_home, NullS);
2325   size_t mysql_data_home_len= dirname_length(mysql_data_dir);
2326   size_t path_len = dirname_length(path_dir);
2327 
2328   if (path_len < mysql_data_home_len)
2329     return true;
2330 
2331   if (!lower_case_file_system)
2332     return(memcmp(mysql_data_dir, path_dir, mysql_data_home_len));
2333 
2334   return(files_charset_info->coll->strnncoll(files_charset_info,
2335                                             (uchar *) path_dir, path_len,
2336                                             (uchar *) mysql_data_dir,
2337                                             mysql_data_home_len,
2338                                             TRUE));
2339 }
2340 
mysql_tmpfile_path(const char * path,const char * prefix)2341 static int mysql_tmpfile_path(const char *path, const char *prefix)
2342 {
2343   DBUG_ASSERT(path != NULL);
2344   DBUG_ASSERT((strlen(path) + strlen(prefix)) <= FN_REFLEN);
2345 
2346   char filename[FN_REFLEN];
2347   File fd = create_temp_file(filename, path, prefix, O_BINARY | O_SEQUENTIAL,
2348                              MYF(MY_WME | MY_TEMPORARY));
2349   return fd;
2350 }
2351 
2352 /** Creates a temporary file in the location specified by the parameter
2353 path. If the path is NULL, then it will be created in tmpdir.
2354 @param[in]	path	location for creating temporary file
2355 @return temporary file descriptor, or < 0 on error */
2356 os_file_t
innobase_mysql_tmpfile(const char * path)2357 innobase_mysql_tmpfile(
2358 	const char*	path)
2359 {
2360 #ifdef WITH_INNODB_DISALLOW_WRITES
2361 	os_event_wait(srv_allow_writes_event);
2362 #endif /* WITH_INNODB_DISALLOW_WRITES */
2363 	File	fd;
2364 
2365 	DBUG_EXECUTE_IF(
2366 		"innobase_tmpfile_creation_failure",
2367 		return(OS_FILE_CLOSED);
2368 	);
2369 
2370 	if (path == NULL) {
2371 		fd = mysql_tmpfile("ib");
2372 	} else {
2373 		fd = mysql_tmpfile_path(path, "ib");
2374 	}
2375 
2376 	if (fd < 0)
2377 		return OS_FILE_CLOSED;
2378 
2379 	/* Copy the file descriptor, so that the additional resources
2380 	allocated by create_temp_file() can be freed by invoking
2381 	my_close().
2382 
2383 	Because the file descriptor returned by this function
2384 	will be passed to fdopen(), it will be closed by invoking
2385 	fclose(), which in turn will invoke close() instead of
2386 	my_close(). */
2387 
2388 #ifdef _WIN32
2389 	/* Note that on Windows, the integer returned by mysql_tmpfile
2390 	has no relation to C runtime file descriptor. Here, we need
2391 	to call my_get_osfhandle to get the HANDLE and then convert it
2392 	to C runtime filedescriptor. */
2393 
2394 	HANDLE hFile = my_get_osfhandle(fd);
2395 	HANDLE hDup;
2396 	BOOL bOK = DuplicateHandle(
2397 			GetCurrentProcess(),
2398 			hFile, GetCurrentProcess(),
2399 			&hDup, 0, FALSE, DUPLICATE_SAME_ACCESS);
2400 	my_close(fd, MYF(MY_WME));
2401 
2402 	if (!bOK) {
2403 		my_osmaperr(GetLastError());
2404 		goto error;
2405 	}
2406 	return hDup;
2407 #else
2408 #ifdef F_DUPFD_CLOEXEC
2409 	int fd2 = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2410 #else
2411 	int fd2 = dup(fd);
2412 #endif
2413 	my_close(fd, MYF(MY_WME));
2414 	if (fd2 < 0) {
2415 		set_my_errno(errno);
2416 		goto error;
2417 	}
2418 	return fd2;
2419 #endif
2420 
2421 error:
2422 	char errbuf[MYSYS_STRERROR_SIZE];
2423 
2424 	my_error(EE_OUT_OF_FILERESOURCES,
2425 		MYF(0),
2426 		"ib*", errno,
2427 		my_strerror(errbuf, sizeof(errbuf), errno));
2428 	return (OS_FILE_CLOSED);
2429 }
2430 
2431 /*********************************************************************//**
2432 Wrapper around MySQL's copy_and_convert function.
2433 @return number of bytes copied to 'to' */
2434 static
2435 ulint
innobase_convert_string(void * to,ulint to_length,CHARSET_INFO * to_cs,const void * from,ulint from_length,CHARSET_INFO * from_cs,uint * errors)2436 innobase_convert_string(
2437 /*====================*/
2438 	void*		to,		/*!< out: converted string */
2439 	ulint		to_length,	/*!< in: number of bytes reserved
2440 					for the converted string */
2441 	CHARSET_INFO*	to_cs,		/*!< in: character set to convert to */
2442 	const void*	from,		/*!< in: string to convert */
2443 	ulint		from_length,	/*!< in: number of bytes to convert */
2444 	CHARSET_INFO*	from_cs,	/*!< in: character set to convert
2445 					from */
2446 	uint*		errors)		/*!< out: number of errors encountered
2447 					during the conversion */
2448 {
2449 	return(copy_and_convert(
2450 			(char*) to, (uint32) to_length, to_cs,
2451 			(const char*) from, (uint32) from_length, from_cs,
2452 			errors));
2453 }
2454 
2455 /*******************************************************************//**
2456 Formats the raw data in "data" (in InnoDB on-disk format) that is of
2457 type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
2458 the result to "buf". The result is converted to "system_charset_info".
2459 Not more than "buf_size" bytes are written to "buf".
2460 The result is always NUL-terminated (provided buf_size > 0) and the
2461 number of bytes that were written to "buf" is returned (including the
2462 terminating NUL).
2463 @return number of bytes that were written */
2464 ulint
innobase_raw_format(const char * data,ulint data_len,ulint charset_coll,char * buf,ulint buf_size)2465 innobase_raw_format(
2466 /*================*/
2467 	const char*	data,		/*!< in: raw data */
2468 	ulint		data_len,	/*!< in: raw data length
2469 					in bytes */
2470 	ulint		charset_coll,	/*!< in: charset collation */
2471 	char*		buf,		/*!< out: output buffer */
2472 	ulint		buf_size)	/*!< in: output buffer size
2473 					in bytes */
2474 {
2475 	/* XXX we use a hard limit instead of allocating
2476 	but_size bytes from the heap */
2477 	CHARSET_INFO*	data_cs;
2478 	char		buf_tmp[8192];
2479 	ulint		buf_tmp_used;
2480 	uint		num_errors;
2481 
2482 	data_cs = all_charsets[charset_coll];
2483 
2484 	buf_tmp_used = innobase_convert_string(buf_tmp, sizeof(buf_tmp),
2485 					       system_charset_info,
2486 					       data, data_len, data_cs,
2487 					       &num_errors);
2488 
2489 	return(ut_str_sql_format(buf_tmp, buf_tmp_used, buf, buf_size));
2490 }
2491 
2492 /*
2493 The helper function nlz(x) calculates the number of leading zeros
2494 in the binary representation of the number "x", either using a
2495 built-in compiler function or a substitute trick based on the use
2496 of the multiplication operation and a table indexed by the prefix
2497 of the multiplication result:
2498 */
2499 #ifdef __GNUC__
2500 #define nlz(x) __builtin_clzll(x)
2501 #elif defined(_MSC_VER) && !defined(_M_CEE_PURE) && \
2502   (defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64))
2503 #ifndef __INTRIN_H_
2504 #pragma warning(push, 4)
2505 #pragma warning(disable: 4255 4668)
2506 #include <intrin.h>
2507 #pragma warning(pop)
2508 #endif
nlz(ulonglong x)2509 __forceinline unsigned int nlz (ulonglong x)
2510 {
2511 #if defined(_M_IX86) || defined(_M_X64)
2512   unsigned long n;
2513 #ifdef _M_X64
2514   _BitScanReverse64(&n, x);
2515   return (unsigned int) n ^ 63;
2516 #else
2517   unsigned long y = (unsigned long) (x >> 32);
2518   unsigned int m = 31;
2519   if (y == 0)
2520   {
2521     y = (unsigned long) x;
2522     m = 63;
2523   }
2524   _BitScanReverse(&n, y);
2525   return (unsigned int) n ^ m;
2526 #endif
2527 #elif defined(_M_ARM64)
2528   return _CountLeadingZeros(x);
2529 #endif
2530 }
2531 #else
nlz(ulonglong x)2532 inline unsigned int nlz (ulonglong x)
2533 {
2534   static unsigned char table [48] = {
2535     32,  6,  5,  0,  4, 12,  0, 20,
2536     15,  3, 11,  0,  0, 18, 25, 31,
2537      8, 14,  2,  0, 10,  0,  0,  0,
2538      0,  0,  0, 21,  0,  0, 19, 26,
2539      7,  0, 13,  0, 16,  1, 22, 27,
2540      9,  0, 17, 23, 28, 24, 29, 30
2541   };
2542   unsigned int y= (unsigned int) (x >> 32);
2543   unsigned int n= 0;
2544   if (y == 0) {
2545     y= (unsigned int) x;
2546     n= 32;
2547   }
2548   y = y | (y >> 1); // Propagate leftmost 1-bit to the right.
2549   y = y | (y >> 2);
2550   y = y | (y >> 4);
2551   y = y | (y >> 8);
2552   y = y & ~(y >> 16);
2553   y = y * 0x3EF5D037;
2554   return n + table[y >> 26];
2555 }
2556 #endif
2557 
2558 /*********************************************************************//**
2559 Compute the next autoinc value.
2560 
2561 For MySQL replication the autoincrement values can be partitioned among
2562 the nodes. The offset is the start or origin of the autoincrement value
2563 for a particular node. For n nodes the increment will be n and the offset
2564 will be in the interval [1, n]. The formula tries to allocate the next
2565 value for a particular node.
2566 
2567 Note: This function is also called with increment set to the number of
2568 values we want to reserve for multi-value inserts e.g.,
2569 
2570 	INSERT INTO T VALUES(), (), ();
2571 
2572 innobase_next_autoinc() will be called with increment set to 3 where
2573 autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
2574 the multi-value INSERT above.
2575 @return the next value */
2576 ulonglong
innobase_next_autoinc(ulonglong current,ulonglong need,ulonglong step,ulonglong offset,ulonglong max_value)2577 innobase_next_autoinc(
2578 /*==================*/
2579 	ulonglong	current,	/*!< in: Current value */
2580 	ulonglong	need,		/*!< in: count of values needed */
2581 	ulonglong	step,		/*!< in: AUTOINC increment step */
2582 	ulonglong	offset,		/*!< in: AUTOINC offset */
2583 	ulonglong	max_value)	/*!< in: max value for type */
2584 {
2585 	ulonglong	next_value;
2586 	ulonglong	block;
2587 
2588 	/* Should never be 0. */
2589 	ut_a(need > 0);
2590 	ut_a(step > 0);
2591 	ut_a(max_value > 0);
2592 
2593 	/*
2594 	  We need to calculate the "block" value equal to the product
2595 	  "step * need". However, when calculating this product, an integer
2596 	  overflow can occur, so we cannot simply use the usual multiplication
2597 	  operation. The snippet below calculates the product of two numbers
2598 	  and detects an unsigned integer overflow:
2599 	*/
2600 	unsigned int	m= nlz(need);
2601 	unsigned int	n= nlz(step);
2602 	if (m + n <= 8 * sizeof(ulonglong) - 2) {
2603 		// The bit width of the original values is too large,
2604 		// therefore we are guaranteed to get an overflow.
2605 		goto overflow;
2606 	}
2607 	block = need * (step >> 1);
2608 	if ((longlong) block < 0) {
2609 		goto overflow;
2610 	}
2611 	block += block;
2612 	if (step & 1) {
2613 		block += need;
2614 		if (block < need) {
2615 			goto overflow;
2616 		}
2617 	}
2618 
2619 	/* Check for overflow. Current can be > max_value if the value
2620 	is in reality a negative value. Also, the visual studio compiler
2621 	converts large double values (which hypothetically can then be
2622 	passed here as the values of the "current" parameter) automatically
2623 	into unsigned long long datatype maximum value: */
2624 	if (current > max_value) {
2625 		goto overflow;
2626 	}
2627 
2628 	/* According to MySQL documentation, if the offset is greater than
2629 	the step then the offset is ignored. */
2630 	if (offset > step) {
2631 		offset = 0;
2632 	}
2633 
2634 	/*
2635 	  Let's round the current value to within a step-size block:
2636 	*/
2637 	if (current > offset) {
2638 		next_value = current - offset;
2639 	} else {
2640 		next_value = offset - current;
2641 	}
2642 	next_value -= next_value % step;
2643 
2644 	/*
2645 	  Add an offset to the next value and check that the addition
2646 	  does not cause an integer overflow:
2647 	*/
2648 	next_value += offset;
2649 	if (next_value < offset) {
2650 		goto overflow;
2651 	}
2652 
2653 	/*
2654 	  Add a block to the next value and check that the addition
2655 	  does not cause an integer overflow:
2656 	*/
2657 	next_value += block;
2658 	if (next_value < block) {
2659 		goto overflow;
2660 	}
2661 
2662 	return(next_value);
2663 
2664 overflow:
2665 	/*
2666 	  Allow auto_increment to go over max_value up to max ulonglong.
2667 	  This allows us to detect that all values are exhausted.
2668 	  If we don't do this, we will return max_value several times
2669 	  and get duplicate key errors instead of auto increment value
2670 	  out of range:
2671 	*/
2672 	return(~(ulonglong) 0);
2673 }
2674 
2675 /********************************************************************//**
2676 Reset the autoinc value in the table.
2677 @return	DB_SUCCESS if all went well else error code */
2678 UNIV_INTERN
2679 dberr_t
innobase_reset_autoinc(ulonglong autoinc)2680 ha_innobase::innobase_reset_autoinc(
2681 /*================================*/
2682 	ulonglong	autoinc)	/*!< in: value to store */
2683 {
2684 	dberr_t		error;
2685 
2686 	error = innobase_lock_autoinc();
2687 
2688 	if (error == DB_SUCCESS) {
2689 
2690 		dict_table_autoinc_initialize(m_prebuilt->table, autoinc);
2691 
2692 		dict_table_autoinc_unlock(m_prebuilt->table);
2693 	}
2694 
2695 	return(error);
2696 }
2697 
2698 /*******************************************************************//**
2699 Reset the auto-increment counter to the given value, i.e. the next row
2700 inserted will get the given value. This is called e.g. after TRUNCATE
2701 is emulated by doing a 'DELETE FROM t'. HA_ERR_WRONG_COMMAND is
2702 returned by storage engines that don't support this operation.
2703 @return	0 or error code */
2704 UNIV_INTERN
2705 int
reset_auto_increment(ulonglong value)2706 ha_innobase::reset_auto_increment(
2707 /*==============================*/
2708 	ulonglong	value)		/*!< in: new value for table autoinc */
2709 {
2710 	DBUG_ENTER("ha_innobase::reset_auto_increment");
2711 
2712 	dberr_t	error;
2713 
2714 	update_thd(ha_thd());
2715 
2716 	error = row_lock_table_autoinc_for_mysql(m_prebuilt);
2717 
2718 	if (error != DB_SUCCESS) {
2719 		DBUG_RETURN(convert_error_code_to_mysql(
2720 				    error, m_prebuilt->table->flags, m_user_thd));
2721 	}
2722 
2723 	/* The next value can never be 0. */
2724 	if (value == 0) {
2725 		value = 1;
2726 	}
2727 
2728 	innobase_reset_autoinc(value);
2729 
2730 	DBUG_RETURN(0);
2731 }
2732 
2733 /*********************************************************************//**
2734 Initializes some fields in an InnoDB transaction object. */
2735 static
2736 void
innobase_trx_init(THD * thd,trx_t * trx)2737 innobase_trx_init(
2738 /*==============*/
2739 	THD*	thd,	/*!< in: user thread handle */
2740 	trx_t*	trx)	/*!< in/out: InnoDB transaction handle */
2741 {
2742 	DBUG_ENTER("innobase_trx_init");
2743 	DBUG_ASSERT(thd == trx->mysql_thd);
2744 
2745 	/* Ensure that thd_lock_wait_timeout(), which may be called
2746 	while holding lock_sys.mutex, by lock_rec_enqueue_waiting(),
2747 	will not end up acquiring LOCK_global_system_variables in
2748 	intern_sys_var_ptr(). */
2749 	THDVAR(thd, lock_wait_timeout);
2750 
2751 	trx->check_foreigns = !thd_test_options(
2752 		thd, OPTION_NO_FOREIGN_KEY_CHECKS);
2753 
2754 	trx->check_unique_secondary = !thd_test_options(
2755 		thd, OPTION_RELAXED_UNIQUE_CHECKS);
2756 #ifdef WITH_WSREP
2757 	trx->wsrep = wsrep_on(thd);
2758 #endif
2759 
2760 	DBUG_VOID_RETURN;
2761 }
2762 
2763 /*********************************************************************//**
2764 Allocates an InnoDB transaction for a MySQL handler object for DML.
2765 @return InnoDB transaction handle */
2766 trx_t*
innobase_trx_allocate(THD * thd)2767 innobase_trx_allocate(
2768 /*==================*/
2769 	THD*	thd)	/*!< in: user thread handle */
2770 {
2771 	trx_t*	trx;
2772 
2773 	DBUG_ENTER("innobase_trx_allocate");
2774 	DBUG_ASSERT(thd != NULL);
2775 	DBUG_ASSERT(EQ_CURRENT_THD(thd));
2776 
2777 	trx = trx_create();
2778 
2779 	trx->mysql_thd = thd;
2780 
2781 	innobase_trx_init(thd, trx);
2782 
2783 	DBUG_RETURN(trx);
2784 }
2785 
2786 /*********************************************************************//**
2787 Gets the InnoDB transaction handle for a MySQL handler object, creates
2788 an InnoDB transaction struct if the corresponding MySQL thread struct still
2789 lacks one.
2790 @return InnoDB transaction handle */
2791 static inline
2792 trx_t*
check_trx_exists(THD * thd)2793 check_trx_exists(
2794 /*=============*/
2795 	THD*	thd)	/*!< in: user thread handle */
2796 {
2797 	if (trx_t* trx = thd_to_trx(thd)) {
2798 		ut_a(trx->magic_n == TRX_MAGIC_N);
2799 		innobase_trx_init(thd, trx);
2800 		return trx;
2801 	} else {
2802 		trx = innobase_trx_allocate(thd);
2803 		thd_set_ha_data(thd, innodb_hton_ptr, trx);
2804 		return trx;
2805 	}
2806 }
2807 
2808 /**
2809   Gets current trx.
2810 
2811   This function may be called during InnoDB initialisation, when
2812   innodb_hton_ptr->slot is not yet set to meaningful value.
2813 */
2814 
current_trx()2815 trx_t *current_trx()
2816 {
2817 	THD *thd=current_thd;
2818 	if (likely(thd != 0) && innodb_hton_ptr->slot != HA_SLOT_UNDEF) {
2819 		return thd_to_trx(thd);
2820 	} else {
2821 		return(NULL);
2822 	}
2823 }
2824 
2825 /*********************************************************************//**
2826 Note that a transaction has been registered with MySQL.
2827 @return true if transaction is registered with MySQL 2PC coordinator */
2828 static inline
2829 bool
trx_is_registered_for_2pc(const trx_t * trx)2830 trx_is_registered_for_2pc(
2831 /*======================*/
2832 	const trx_t*	trx)	/* in: transaction */
2833 {
2834 	return(trx->is_registered == 1);
2835 }
2836 
2837 /*********************************************************************//**
2838 Note that a transaction has been registered with MySQL 2PC coordinator. */
2839 static inline
2840 void
trx_register_for_2pc(trx_t * trx)2841 trx_register_for_2pc(
2842 /*==================*/
2843 	trx_t*	trx)	/* in: transaction */
2844 {
2845 	trx->is_registered = 1;
2846 	ut_ad(!trx->active_commit_ordered);
2847 }
2848 
2849 /*********************************************************************//**
2850 Note that a transaction has been deregistered. */
2851 static inline
2852 void
trx_deregister_from_2pc(trx_t * trx)2853 trx_deregister_from_2pc(
2854 /*====================*/
2855 	trx_t*	trx)	/* in: transaction */
2856 {
2857   trx->is_registered= false;
2858   trx->active_commit_ordered= false;
2859 }
2860 
2861 /*********************************************************************//**
2862 Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object.
2863 Those flags are stored in .frm file and end up in the MySQL table object,
2864 but are frequently used inside InnoDB so we keep their copies into the
2865 InnoDB table object. */
2866 static
2867 void
innobase_copy_frm_flags_from_create_info(dict_table_t * innodb_table,const HA_CREATE_INFO * create_info)2868 innobase_copy_frm_flags_from_create_info(
2869 /*=====================================*/
2870 	dict_table_t*		innodb_table,	/*!< in/out: InnoDB table */
2871 	const HA_CREATE_INFO*	create_info)	/*!< in: create info */
2872 {
2873 	ibool	ps_on;
2874 	ibool	ps_off;
2875 
2876 	if (innodb_table->is_temporary()
2877 	    || innodb_table->no_rollback()) {
2878 		/* Temp tables do not use persistent stats. */
2879 		ps_on = FALSE;
2880 		ps_off = TRUE;
2881 	} else {
2882 		ps_on = create_info->table_options
2883 			& HA_OPTION_STATS_PERSISTENT;
2884 		ps_off = create_info->table_options
2885 			& HA_OPTION_NO_STATS_PERSISTENT;
2886 	}
2887 
2888 	dict_stats_set_persistent(innodb_table, ps_on, ps_off);
2889 
2890 	dict_stats_auto_recalc_set(
2891 		innodb_table,
2892 		create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON,
2893 		create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF);
2894 
2895 	innodb_table->stats_sample_pages = create_info->stats_sample_pages;
2896 }
2897 
2898 /*********************************************************************//**
2899 Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
2900 Those flags are stored in .frm file and end up in the MySQL table object,
2901 but are frequently used inside InnoDB so we keep their copies into the
2902 InnoDB table object. */
2903 void
innobase_copy_frm_flags_from_table_share(dict_table_t * innodb_table,const TABLE_SHARE * table_share)2904 innobase_copy_frm_flags_from_table_share(
2905 /*=====================================*/
2906 	dict_table_t*		innodb_table,	/*!< in/out: InnoDB table */
2907 	const TABLE_SHARE*	table_share)	/*!< in: table share */
2908 {
2909 	ibool	ps_on;
2910 	ibool	ps_off;
2911 
2912 	if (innodb_table->is_temporary()) {
2913 		/* Temp tables do not use persistent stats */
2914 		ps_on = FALSE;
2915 		ps_off = TRUE;
2916 	} else {
2917 		ps_on = table_share->db_create_options
2918 			& HA_OPTION_STATS_PERSISTENT;
2919 		ps_off = table_share->db_create_options
2920 			& HA_OPTION_NO_STATS_PERSISTENT;
2921 	}
2922 
2923 	dict_stats_set_persistent(innodb_table, ps_on, ps_off);
2924 
2925 	dict_stats_auto_recalc_set(
2926 		innodb_table,
2927 		table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON,
2928 		table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF);
2929 
2930 	innodb_table->stats_sample_pages = table_share->stats_sample_pages;
2931 }
2932 
2933 /*********************************************************************//**
2934 Construct ha_innobase handler. */
2935 
ha_innobase(handlerton * hton,TABLE_SHARE * table_arg)2936 ha_innobase::ha_innobase(
2937 /*=====================*/
2938 	handlerton*	hton,
2939 	TABLE_SHARE*	table_arg)
2940 	:handler(hton, table_arg),
2941 	m_prebuilt(),
2942 	m_user_thd(),
2943 	m_int_table_flags(HA_REC_NOT_IN_SEQ
2944 			  | HA_NULL_IN_KEY
2945 			  | HA_CAN_VIRTUAL_COLUMNS
2946 			  | HA_CAN_INDEX_BLOBS
2947 			  | HA_CAN_SQL_HANDLER
2948 			  | HA_REQUIRES_KEY_COLUMNS_FOR_DELETE
2949 			  | HA_PRIMARY_KEY_REQUIRED_FOR_POSITION
2950 			  | HA_PRIMARY_KEY_IN_READ_INDEX
2951 			  | HA_BINLOG_ROW_CAPABLE
2952 			  | HA_CAN_GEOMETRY
2953 			  | HA_PARTIAL_COLUMN_READ
2954 			  | HA_TABLE_SCAN_ON_INDEX
2955 			  | HA_CAN_FULLTEXT
2956 			  | HA_CAN_FULLTEXT_EXT
2957 		/* JAN: TODO: MySQL 5.7
2958 			  | HA_CAN_FULLTEXT_HINTS
2959 		*/
2960 			  | HA_CAN_EXPORT
2961 			  | HA_CAN_RTREEKEYS
2962                           | HA_CAN_TABLES_WITHOUT_ROLLBACK
2963 			  | HA_CONCURRENT_OPTIMIZE
2964 			  |  (srv_force_primary_key ? HA_WANTS_PRIMARY_KEY : 0)
2965 		  ),
2966 	m_start_of_scan(),
2967         m_mysql_has_locked()
2968 {}
2969 
2970 /*********************************************************************//**
2971 Destruct ha_innobase handler. */
2972 
~ha_innobase()2973 ha_innobase::~ha_innobase()
2974 /*======================*/
2975 {
2976 }
2977 
2978 /*********************************************************************//**
2979 Updates the user_thd field in a handle and also allocates a new InnoDB
2980 transaction handle if needed, and updates the transaction fields in the
2981 m_prebuilt struct. */
2982 void
update_thd(THD * thd)2983 ha_innobase::update_thd(
2984 /*====================*/
2985 	THD*	thd)	/*!< in: thd to use the handle */
2986 {
2987 	DBUG_ENTER("ha_innobase::update_thd");
2988 	DBUG_PRINT("ha_innobase::update_thd", ("user_thd: %p -> %p",
2989 		   m_user_thd, thd));
2990 
2991 	/* The table should have been opened in ha_innobase::open(). */
2992 	DBUG_ASSERT(m_prebuilt->table->get_ref_count() > 0);
2993 
2994 	trx_t*	trx = check_trx_exists(thd);
2995 
2996 	ut_ad(trx->dict_operation_lock_mode == 0);
2997 	ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
2998 
2999 	if (m_prebuilt->trx != trx) {
3000 
3001 		row_update_prebuilt_trx(m_prebuilt, trx);
3002 	}
3003 
3004 	m_user_thd = thd;
3005 
3006 	DBUG_ASSERT(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
3007 	DBUG_ASSERT(m_prebuilt->trx == thd_to_trx(m_user_thd));
3008 
3009 	DBUG_VOID_RETURN;
3010 }
3011 
3012 /*********************************************************************//**
3013 Updates the user_thd field in a handle and also allocates a new InnoDB
3014 transaction handle if needed, and updates the transaction fields in the
3015 m_prebuilt struct. */
3016 
3017 void
update_thd()3018 ha_innobase::update_thd()
3019 /*=====================*/
3020 {
3021 	THD*	thd = ha_thd();
3022 
3023 	ut_ad(EQ_CURRENT_THD(thd));
3024 	update_thd(thd);
3025 }
3026 
3027 /*********************************************************************//**
3028 Registers an InnoDB transaction with the MySQL 2PC coordinator, so that
3029 the MySQL XA code knows to call the InnoDB prepare and commit, or rollback
3030 for the transaction. This MUST be called for every transaction for which
3031 the user may call commit or rollback. Calling this several times to register
3032 the same transaction is allowed, too. This function also registers the
3033 current SQL statement. */
3034 static inline
3035 void
innobase_register_trx(handlerton * hton,THD * thd,trx_t * trx)3036 innobase_register_trx(
3037 /*==================*/
3038 	handlerton*	hton,	/* in: Innobase handlerton */
3039 	THD*		thd,	/* in: MySQL thd (connection) object */
3040 	trx_t*		trx)	/* in: transaction to register */
3041 {
3042 	/* JAN: TODO: MySQL 5.7 PSI
3043 	const ulonglong	trx_id = static_cast<const ulonglong>(
3044 		trx_get_id_for_print(trx));
3045 
3046 	trans_register_ha(thd, FALSE, hton, &trx_id);
3047 	*/
3048 	trans_register_ha(thd, FALSE, hton);
3049 
3050 	if (!trx_is_registered_for_2pc(trx)
3051 	    && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
3052 
3053 		//trans_register_ha(thd, TRUE, hton, &trx_id);
3054 		trans_register_ha(thd, TRUE, hton);
3055 	}
3056 
3057 	trx_register_for_2pc(trx);
3058 }
3059 
3060 /*	BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
3061 	------------------------------------------------------------
3062 
3063 1) The use of the query cache for TBL is disabled when there is an
3064 uncommitted change to TBL.
3065 
3066 2) When a change to TBL commits, InnoDB stores the current value of
3067 its global trx id counter, let us denote it by INV_TRX_ID, to the table object
3068 in the InnoDB data dictionary, and does only allow such transactions whose
3069 id <= INV_TRX_ID to use the query cache.
3070 
3071 3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
3072 modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
3073 of TBL immediately.
3074 
3075 How this is implemented inside InnoDB:
3076 
3077 1) Since every modification always sets an IX type table lock on the InnoDB
3078 table, it is easy to check if there can be uncommitted modifications for a
3079 table: just check if there are locks in the lock list of the table.
3080 
3081 2) When a transaction inside InnoDB commits, it reads the global trx id
3082 counter and stores the value INV_TRX_ID to the tables on which it had a lock.
3083 
3084 3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
3085 InnoDB calls an invalidate method for the MySQL query cache for that table.
3086 
3087 How this is implemented inside sql_cache.cc:
3088 
3089 1) The query cache for an InnoDB table TBL is invalidated immediately at an
3090 INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
3091 invalidation to the transaction commit.
3092 
3093 2) To store or retrieve a value from the query cache of an InnoDB table TBL,
3094 any query must first ask InnoDB's permission. We must pass the thd as a
3095 parameter because InnoDB will look at the trx id, if any, associated with
3096 that thd. Also the full_name which is used as key to search for the table
3097 object. The full_name is a string containing the normalized path to the
3098 table in the canonical format.
3099 
3100 3) Use of the query cache for InnoDB tables is now allowed also when
3101 AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
3102 put restrictions on the use of the query cache.
3103 */
3104 
3105 /** Check if mysql can allow the transaction to read from/store to
3106 the query cache.
3107 @param[in]	table	table object
3108 @param[in]	trx	transaction object
3109 @return whether the storing or retrieving from the query cache is permitted */
innobase_query_caching_table_check_low(const dict_table_t * table,trx_t * trx)3110 static bool innobase_query_caching_table_check_low(
3111 	const dict_table_t*	table,
3112 	trx_t*			trx)
3113 {
3114 	/* The following conditions will decide the query cache
3115 	retrieval or storing into:
3116 
3117 	(1) There should not be any locks on the table.
3118 	(2) Someother trx shouldn't invalidate the cache before this
3119 	transaction started.
3120 	(3) Read view shouldn't exist. If exists then the view
3121 	low_limit_id should be greater than or equal to the transaction that
3122 	invalidates the cache for the particular table.
3123 
3124 	For read-only transaction: should satisfy (1) and (3)
3125 	For read-write transaction: should satisfy (1), (2), (3) */
3126 
3127 	if (lock_table_get_n_locks(table)) {
3128 		return false;
3129 	}
3130 
3131 	if (trx->id && trx->id < table->query_cache_inv_trx_id) {
3132 		return false;
3133 	}
3134 
3135 	return !trx->read_view.is_open()
3136 		|| trx->read_view.low_limit_id()
3137 		>= table->query_cache_inv_trx_id;
3138 }
3139 
3140 /** Checks if MySQL at the moment is allowed for this table to retrieve a
3141 consistent read result, or store it to the query cache.
3142 @param[in,out]	trx		transaction
3143 @param[in]	norm_name	concatenation of database name,
3144 				'/' char, table name
3145 @return whether storing or retrieving from the query cache is permitted */
innobase_query_caching_table_check(trx_t * trx,const char * norm_name)3146 static bool innobase_query_caching_table_check(
3147 	trx_t*		trx,
3148 	const char*	norm_name)
3149 {
3150 	dict_table_t*   table = dict_table_open_on_name(
3151 		norm_name, FALSE, FALSE, DICT_ERR_IGNORE_FK_NOKEY);
3152 
3153 	if (table == NULL) {
3154 		return false;
3155 	}
3156 
3157 	/* Start the transaction if it is not started yet */
3158 	trx_start_if_not_started(trx, false);
3159 
3160 	bool allow = innobase_query_caching_table_check_low(table, trx);
3161 
3162 	dict_table_close(table, FALSE, FALSE);
3163 
3164 	if (allow) {
3165 		/* If the isolation level is high, assign a read view for the
3166 		transaction if it does not yet have one */
3167 
3168 		if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
3169 		    && !srv_read_only_mode
3170 		    && !trx->read_view.is_open()) {
3171 
3172 			/* Start the transaction if it is not started yet */
3173 			trx_start_if_not_started(trx, false);
3174 
3175 			trx->read_view.open(trx);
3176 		}
3177 	}
3178 
3179 	return allow;
3180 }
3181 
3182 /******************************************************************//**
3183 The MySQL query cache uses this to check from InnoDB if the query cache at
3184 the moment is allowed to operate on an InnoDB table. The SQL query must
3185 be a non-locking SELECT.
3186 
3187 The query cache is allowed to operate on certain query only if this function
3188 returns TRUE for all tables in the query.
3189 
3190 If thd is not in the autocommit state, this function also starts a new
3191 transaction for thd if there is no active trx yet, and assigns a consistent
3192 read view to it if there is no read view yet.
3193 
3194 Why a deadlock of threads is not possible: the query cache calls this function
3195 at the start of a SELECT processing. Then the calling thread cannot be
3196 holding any InnoDB semaphores. The calling thread is holding the
3197 query cache mutex, and this function will reserve the InnoDB trx_sys.mutex.
3198 Thus, the 'rank' in sync0mutex.h of the MySQL query cache mutex is above
3199 the InnoDB trx_sys.mutex.
3200 @return TRUE if permitted, FALSE if not; note that the value FALSE
3201 does not mean we should invalidate the query cache: invalidation is
3202 called explicitly */
3203 static
3204 my_bool
innobase_query_caching_of_table_permitted(THD * thd,const char * full_name,uint full_name_len,ulonglong *)3205 innobase_query_caching_of_table_permitted(
3206 /*======================================*/
3207 	THD*	thd,		/*!< in: thd of the user who is trying to
3208 				store a result to the query cache or
3209 				retrieve it */
3210 	const char* full_name,	/*!< in: normalized path to the table */
3211 	uint	full_name_len,	/*!< in: length of the normalized path
3212 				to the table */
3213 	ulonglong *)
3214 {
3215 	char	norm_name[1000];
3216 	trx_t*	trx = check_trx_exists(thd);
3217 
3218 	ut_a(full_name_len < 999);
3219 
3220 	if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
3221 		/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
3222 		plain SELECT if AUTOCOMMIT is not on. */
3223 
3224 		return(false);
3225 	}
3226 
3227 	innobase_srv_conc_force_exit_innodb(trx);
3228 
3229 	if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)
3230 	    && trx->n_mysql_tables_in_use == 0) {
3231 		/* We are going to retrieve the query result from the query
3232 		cache. This cannot be a store operation to the query cache
3233 		because then MySQL would have locks on tables already.
3234 
3235 		TODO: if the user has used LOCK TABLES to lock the table,
3236 		then we open a transaction in the call of row_.. below.
3237 		That trx can stay open until UNLOCK TABLES. The same problem
3238 		exists even if we do not use the query cache. MySQL should be
3239 		modified so that it ALWAYS calls some cleanup function when
3240 		the processing of a query ends!
3241 
3242 		We can imagine we instantaneously serialize this consistent
3243 		read trx to the current trx id counter. If trx2 would have
3244 		changed the tables of a query result stored in the cache, and
3245 		trx2 would have already committed, making the result obsolete,
3246 		then trx2 would have already invalidated the cache. Thus we
3247 		can trust the result in the cache is ok for this query. */
3248 
3249 		return(true);
3250 	}
3251 
3252 	/* Normalize the table name to InnoDB format */
3253 	normalize_table_name(norm_name, full_name);
3254 
3255 	innobase_register_trx(innodb_hton_ptr, thd, trx);
3256 
3257 	return innobase_query_caching_table_check(trx, norm_name);
3258 }
3259 
3260 /*****************************************************************//**
3261 Invalidates the MySQL query cache for the table. */
3262 void
innobase_invalidate_query_cache(trx_t * trx,const char * full_name)3263 innobase_invalidate_query_cache(
3264 /*============================*/
3265 	trx_t*		trx,		/*!< in: transaction which
3266 					modifies the table */
3267 	const char*	full_name)	/*!< in: concatenation of
3268 					database name, path separator,
3269 					table name, null char NUL;
3270 					NOTE that in Windows this is
3271 					always in LOWER CASE! */
3272 {
3273 	/* Note that the sync0mutex.h rank of the query cache mutex is just
3274 	above the InnoDB trx_sys_t->lock. The caller of this function must
3275 	not have latches of a lower rank. */
3276 
3277 #ifdef HAVE_QUERY_CACHE
3278         char    qcache_key_name[2 * (NAME_LEN + 1)];
3279         char db_name[NAME_CHAR_LEN * MY_CS_MBMAXLEN + 1];
3280         const char *key_ptr;
3281         size_t  tabname_len;
3282 
3283         // Extract the database name.
3284         key_ptr= strchr(full_name, '/');
3285         DBUG_ASSERT(key_ptr != NULL); // Database name should be present
3286         size_t  dbname_len= size_t(key_ptr - full_name);
3287         memcpy(db_name, full_name, dbname_len);
3288         db_name[dbname_len]= '\0';
3289 
3290         /* Construct the key("db-name\0table$name\0") for the query cache using
3291         the path name("db@002dname\0table@0024name\0") of the table in its
3292         canonical form. */
3293         dbname_len = filename_to_tablename(db_name, qcache_key_name,
3294                                            sizeof(qcache_key_name));
3295         tabname_len = filename_to_tablename(++key_ptr,
3296                                             (qcache_key_name + dbname_len + 1),
3297                                             sizeof(qcache_key_name) -
3298                                             dbname_len - 1);
3299 
3300         /* Argument TRUE below means we are using transactions */
3301         mysql_query_cache_invalidate4(trx->mysql_thd,
3302                                       qcache_key_name,
3303                                       uint(dbname_len + tabname_len + 2),
3304                                       TRUE);
3305 #endif
3306 }
3307 
3308 /** Quote a standard SQL identifier like index or column name.
3309 @param[in]	file	output stream
3310 @param[in]	trx	InnoDB transaction, or NULL
3311 @param[in]	id	identifier to quote */
3312 void
innobase_quote_identifier(FILE * file,trx_t * trx,const char * id)3313 innobase_quote_identifier(
3314 	FILE*		file,
3315 	trx_t*		trx,
3316 	const char*	id)
3317 {
3318 	const int	q = trx != NULL && trx->mysql_thd != NULL
3319 		? get_quote_char_for_identifier(trx->mysql_thd, id, strlen(id))
3320 		: '`';
3321 
3322 	if (q == EOF) {
3323 		fputs(id, file);
3324 	} else {
3325 		putc(q, file);
3326 
3327 		while (int c = *id++) {
3328 			if (c == q) {
3329 				putc(c, file);
3330 			}
3331 			putc(c, file);
3332 		}
3333 
3334 		putc(q, file);
3335 	}
3336 }
3337 
3338 /** Quote a standard SQL identifier like tablespace, index or column name.
3339 @param[in]	trx	InnoDB transaction, or NULL
3340 @param[in]	id	identifier to quote
3341 @return quoted identifier */
3342 std::string
innobase_quote_identifier(trx_t * trx,const char * id)3343 innobase_quote_identifier(
3344 /*======================*/
3345 	trx_t*		trx,
3346 	const char*	id)
3347 {
3348 	std::string quoted_identifier;
3349 	const int	q = trx != NULL && trx->mysql_thd != NULL
3350 		? get_quote_char_for_identifier(trx->mysql_thd, id, strlen(id))
3351 		: '`';
3352 
3353 	if (q == EOF) {
3354 		quoted_identifier.append(id);
3355 	} else {
3356 		quoted_identifier += char(q);
3357 		quoted_identifier.append(id);
3358 		quoted_identifier += char(q);
3359 	}
3360 
3361 	return (quoted_identifier);
3362 }
3363 
3364 /** Convert a table name to the MySQL system_charset_info (UTF-8)
3365 and quote it.
3366 @param[out]	buf	buffer for converted identifier
3367 @param[in]	buflen	length of buf, in bytes
3368 @param[in]	id	identifier to convert
3369 @param[in]	idlen	length of id, in bytes
3370 @param[in]	thd	MySQL connection thread, or NULL
3371 @return pointer to the end of buf */
3372 static
3373 char*
innobase_convert_identifier(char * buf,ulint buflen,const char * id,ulint idlen,THD * thd)3374 innobase_convert_identifier(
3375 	char*		buf,
3376 	ulint		buflen,
3377 	const char*	id,
3378 	ulint		idlen,
3379 	THD*		thd)
3380 {
3381 	const char*	s	= id;
3382 
3383 	char nz[MAX_TABLE_NAME_LEN + 1];
3384 	char nz2[MAX_TABLE_NAME_LEN + 1];
3385 
3386 	/* Decode the table name.  The MySQL function expects
3387 	a NUL-terminated string.  The input and output strings
3388 	buffers must not be shared. */
3389 	ut_a(idlen <= MAX_TABLE_NAME_LEN);
3390 	memcpy(nz, id, idlen);
3391 	nz[idlen] = 0;
3392 
3393 	s = nz2;
3394 	idlen = explain_filename(thd, nz, nz2, sizeof nz2,
3395 				 EXPLAIN_PARTITIONS_AS_COMMENT);
3396 	if (idlen > buflen) {
3397 		idlen = buflen;
3398 	}
3399 	memcpy(buf, s, idlen);
3400 	return(buf + idlen);
3401 }
3402 
3403 /*****************************************************************//**
3404 Convert a table name to the MySQL system_charset_info (UTF-8).
3405 @return pointer to the end of buf */
3406 char*
innobase_convert_name(char * buf,ulint buflen,const char * id,ulint idlen,THD * thd)3407 innobase_convert_name(
3408 /*==================*/
3409 	char*		buf,	/*!< out: buffer for converted identifier */
3410 	ulint		buflen,	/*!< in: length of buf, in bytes */
3411 	const char*	id,	/*!< in: table name to convert */
3412 	ulint		idlen,	/*!< in: length of id, in bytes */
3413 	THD*		thd)	/*!< in: MySQL connection thread, or NULL */
3414 {
3415 	char*		s	= buf;
3416 	const char*	bufend	= buf + buflen;
3417 
3418 	const char*	slash = (const char*) memchr(id, '/', idlen);
3419 
3420 	if (slash == NULL) {
3421 		return(innobase_convert_identifier(
3422 				buf, buflen, id, idlen, thd));
3423 	}
3424 
3425 	/* Print the database name and table name separately. */
3426 	s = innobase_convert_identifier(s, ulint(bufend - s),
3427 					id, ulint(slash - id), thd);
3428 	if (s < bufend) {
3429 		*s++ = '.';
3430 		s = innobase_convert_identifier(s, ulint(bufend - s),
3431 						slash + 1, idlen
3432 						- ulint(slash - id) - 1,
3433 						thd);
3434 	}
3435 
3436 	return(s);
3437 }
3438 
3439 /*****************************************************************//**
3440 A wrapper function of innobase_convert_name(), convert a table name
3441 to the MySQL system_charset_info (UTF-8) and quote it if needed.
3442 @return pointer to the end of buf */
3443 void
innobase_format_name(char * buf,ulint buflen,const char * name)3444 innobase_format_name(
3445 /*==================*/
3446 	char*		buf,	/*!< out: buffer for converted identifier */
3447 	ulint		buflen,	/*!< in: length of buf, in bytes */
3448 	const char*	name)	/*!< in: table name to format */
3449 {
3450 	const char*     bufend;
3451 
3452 	bufend = innobase_convert_name(buf, buflen, name, strlen(name), NULL);
3453 
3454 	ut_ad((ulint) (bufend - buf) < buflen);
3455 
3456 	buf[bufend - buf] = '\0';
3457 }
3458 
3459 /**********************************************************************//**
3460 Determines if the currently running transaction has been interrupted.
3461 @return true if interrupted */
3462 bool
trx_is_interrupted(const trx_t * trx)3463 trx_is_interrupted(
3464 /*===============*/
3465 	const trx_t*	trx)	/*!< in: transaction */
3466 {
3467 	return(trx && trx->mysql_thd && thd_kill_level(trx->mysql_thd));
3468 }
3469 
3470 /**************************************************************//**
3471 Resets some fields of a m_prebuilt struct. The template is used in fast
3472 retrieval of just those column values MySQL needs in its processing. */
3473 void
reset_template(void)3474 ha_innobase::reset_template(void)
3475 /*=============================*/
3476 {
3477 	ut_ad(m_prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
3478 	ut_ad(m_prebuilt->magic_n2 == m_prebuilt->magic_n);
3479 
3480 	/* Force table to be freed in close_thread_table(). */
3481 	DBUG_EXECUTE_IF("free_table_in_fts_query",
3482 		if (m_prebuilt->in_fts_query) {
3483                   table->mark_table_for_reopen();
3484 		}
3485 	);
3486 
3487 	m_prebuilt->keep_other_fields_on_keyread = false;
3488 	m_prebuilt->read_just_key = 0;
3489 	m_prebuilt->in_fts_query = 0;
3490 
3491 	/* Reset index condition pushdown state. */
3492 	if (m_prebuilt->idx_cond) {
3493 		m_prebuilt->idx_cond = NULL;
3494 		m_prebuilt->idx_cond_n_cols = 0;
3495 		/* Invalidate m_prebuilt->mysql_template
3496 		in ha_innobase::write_row(). */
3497 		m_prebuilt->template_type = ROW_MYSQL_NO_TEMPLATE;
3498 	}
3499 }
3500 
3501 /*****************************************************************//**
3502 Call this when you have opened a new table handle in HANDLER, before you
3503 call index_read_map() etc. Actually, we can let the cursor stay open even
3504 over a transaction commit! Then you should call this before every operation,
3505 fetch next etc. This function inits the necessary things even after a
3506 transaction commit. */
3507 
3508 void
init_table_handle_for_HANDLER(void)3509 ha_innobase::init_table_handle_for_HANDLER(void)
3510 /*============================================*/
3511 {
3512 	/* If current thd does not yet have a trx struct, create one.
3513 	If the current handle does not yet have a m_prebuilt struct, create
3514 	one. Update the trx pointers in the m_prebuilt struct. Normally
3515 	this operation is done in external_lock. */
3516 
3517 	update_thd(ha_thd());
3518 
3519 	/* Initialize the m_prebuilt struct much like it would be inited in
3520 	external_lock */
3521 
3522 	innobase_srv_conc_force_exit_innodb(m_prebuilt->trx);
3523 
3524 	/* If the transaction is not started yet, start it */
3525 
3526 	trx_start_if_not_started_xa(m_prebuilt->trx, false);
3527 
3528 	/* Assign a read view if the transaction does not have it yet */
3529 
3530 	m_prebuilt->trx->read_view.open(m_prebuilt->trx);
3531 
3532 	innobase_register_trx(ht, m_user_thd, m_prebuilt->trx);
3533 
3534 	/* We did the necessary inits in this function, no need to repeat them
3535 	in row_search_for_mysql */
3536 
3537 	m_prebuilt->sql_stat_start = FALSE;
3538 
3539 	/* We let HANDLER always to do the reads as consistent reads, even
3540 	if the trx isolation level would have been specified as SERIALIZABLE */
3541 
3542 	m_prebuilt->select_lock_type = LOCK_NONE;
3543 	m_prebuilt->stored_select_lock_type = LOCK_NONE;
3544 
3545 	/* Always fetch all columns in the index record */
3546 
3547 	m_prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
3548 
3549 	/* We want always to fetch all columns in the whole row? Or do
3550 	we???? */
3551 
3552 	m_prebuilt->used_in_HANDLER = TRUE;
3553 
3554 	reset_template();
3555 }
3556 
3557 /*********************************************************************//**
3558 Free any resources that were allocated and return failure.
3559 @return always return 1 */
innodb_init_abort()3560 static int innodb_init_abort()
3561 {
3562 	DBUG_ENTER("innodb_init_abort");
3563 
3564 	if (fil_system.temp_space) {
3565 		fil_system.temp_space->close();
3566 	}
3567 
3568 	srv_sys_space.shutdown();
3569 	if (srv_tmp_space.get_sanity_check_status()) {
3570 		srv_tmp_space.delete_files();
3571 	}
3572 	srv_tmp_space.shutdown();
3573 
3574 #ifdef WITH_INNODB_DISALLOW_WRITES
3575 	os_event_destroy(srv_allow_writes_event);
3576 #endif /* WITH_INNODB_DISALLOW_WRITES */
3577 	DBUG_RETURN(1);
3578 }
3579 
3580 /** Return the minimum buffer pool size based on page size */
min_buffer_pool_size()3581 static inline ulint min_buffer_pool_size()
3582 {
3583   ulint s= (BUF_LRU_MIN_LEN + BUF_LRU_MIN_LEN / 4) * srv_page_size;
3584   /* buf_pool_chunk_size minimum is 1M, so round up to a multiple */
3585   ulint alignment= 1U << 20;
3586   return UT_CALC_ALIGN(s, alignment);
3587 }
3588 
3589 /** Validate the requested buffer pool size.  Also, reserve the necessary
3590 memory needed for buffer pool resize.
3591 @param[in]	thd	thread handle
3592 @param[in]	var	pointer to system variable
3593 @param[out]	save	immediate result for update function
3594 @param[in]	value	incoming string
3595 @return 0 on success, 1 on failure.
3596 */
3597 static
3598 int
3599 innodb_buffer_pool_size_validate(
3600 	THD*				thd,
3601 	struct st_mysql_sys_var*	var,
3602 	void*				save,
3603 	struct st_mysql_value*		value);
3604 
3605 /** Update the system variable innodb_buffer_pool_size using the "saved"
3606 value. This function is registered as a callback with MySQL.
3607 @param[in]	thd	thread handle
3608 @param[in]	var	pointer to system variable
3609 @param[out]	var_ptr	where the formal string goes
3610 @param[in]	save	immediate result from check function */
3611 static
3612 void
3613 innodb_buffer_pool_size_update(
3614 	THD*				thd,
3615 	struct st_mysql_sys_var*	var,
3616 	void*				var_ptr,
3617 	const void*			save);
3618 
3619 /* If the default value of innodb_buffer_pool_size is increased to be more than
3620 BUF_POOL_SIZE_THRESHOLD (srv/srv0start.cc), then srv_buf_pool_instances_default
3621 can be removed and 8 used instead. The problem with the current setup is that
3622 with 128MiB default buffer pool size and 8 instances by default we would emit
3623 a warning when no options are specified. */
3624 static MYSQL_SYSVAR_ULONGLONG(buffer_pool_size, innobase_buffer_pool_size,
3625   PLUGIN_VAR_RQCMDARG,
3626   "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
3627   innodb_buffer_pool_size_validate,
3628   innodb_buffer_pool_size_update,
3629   128ULL << 20,
3630   2ULL << 20,
3631   LLONG_MAX, 1024*1024L);
3632 
3633 /** Deprecation message about innodb_idle_flush_pct */
3634 static const char*	deprecated_idle_flush_pct
3635 	= "innodb_idle_flush_pct is DEPRECATED and has no effect.";
3636 
3637 static const char*	deprecated_innodb_checksum_algorithm
3638 	= "Setting innodb_checksum_algorithm to values other than"
3639 	" crc32 or strict_crc32 is UNSAFE and DEPRECATED."
3640 	" These deprecated values will be disallowed in MariaDB 10.6.";
3641 
3642 static ulong innodb_idle_flush_pct;
3643 
3644 /** If applicable, emit a message that log checksums cannot be disabled.
3645 @param[in,out]	thd	client session, or NULL if at startup
3646 @param[in]	check	whether redo log block checksums are enabled
3647 @return whether redo log block checksums are enabled */
3648 static inline
3649 bool
innodb_log_checksums_func_update(THD * thd,bool check)3650 innodb_log_checksums_func_update(THD* thd, bool check)
3651 {
3652 	static const char msg[] = "innodb_log_checksums is deprecated"
3653 		" and has no effect outside recovery";
3654 
3655 	ut_ad(!thd == !srv_was_started);
3656 
3657 	if (!check) {
3658 		if (thd) {
3659 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
3660 					    HA_ERR_UNSUPPORTED, msg);
3661 			check = true;
3662 		} else {
3663 			sql_print_warning(msg);
3664 		}
3665 	}
3666 
3667 	return(check);
3668 }
3669 
innodb_checksum_algorithm_update(THD * thd,st_mysql_sys_var *,void *,const void * save)3670 static void innodb_checksum_algorithm_update(THD *thd, st_mysql_sys_var*,
3671                                              void *, const void *save)
3672 {
3673   srv_checksum_algorithm= *static_cast<const ulong*>(save);
3674   switch (srv_checksum_algorithm) {
3675   case SRV_CHECKSUM_ALGORITHM_CRC32:
3676   case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
3677     break;
3678   default:
3679     push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
3680                         HA_ERR_UNSUPPORTED,
3681                         deprecated_innodb_checksum_algorithm);
3682   }
3683 }
3684 
3685 /****************************************************************//**
3686 Gives the file extension of an InnoDB single-table tablespace. */
3687 static const char* ha_innobase_exts[] = {
3688 	dot_ext[IBD],
3689 	dot_ext[ISL],
3690 	NullS
3691 };
3692 
3693 /** Determine if system-versioned data was modified by the transaction.
3694 @param[in,out]	thd	current session
3695 @param[out]	trx_id	transaction start ID
3696 @return	transaction commit ID
3697 @retval	0	if no system-versioned data was affected by the transaction */
innodb_prepare_commit_versioned(THD * thd,ulonglong * trx_id)3698 static ulonglong innodb_prepare_commit_versioned(THD* thd, ulonglong *trx_id)
3699 {
3700 	if (const trx_t* trx = thd_to_trx(thd)) {
3701 		*trx_id = trx->id;
3702 
3703 		for (trx_mod_tables_t::const_iterator t
3704 			     = trx->mod_tables.begin();
3705 		     t != trx->mod_tables.end(); t++) {
3706 			if (t->second.is_versioned()) {
3707 				DBUG_ASSERT(t->first->versioned_by_id());
3708 				DBUG_ASSERT(trx->rsegs.m_redo.rseg);
3709 
3710 				return trx_sys.get_new_trx_id();
3711 			}
3712 		}
3713 
3714 		return 0;
3715 	}
3716 
3717 	*trx_id = 0;
3718 	return 0;
3719 }
3720 
3721 /** Initialize and normalize innodb_buffer_pool_size. */
innodb_buffer_pool_size_init()3722 static void innodb_buffer_pool_size_init()
3723 {
3724 	if (srv_buf_pool_size >= BUF_POOL_SIZE_THRESHOLD) {
3725 
3726 		if (srv_buf_pool_instances == srv_buf_pool_instances_default) {
3727 #if defined(_WIN32) && !defined(_WIN64)
3728 			/* Do not allocate too large of a buffer pool on
3729 			Windows 32-bit systems, which can have trouble
3730 			allocating larger single contiguous memory blocks. */
3731 			srv_buf_pool_size = ulint(
3732 				ut_uint64_align_up(srv_buf_pool_size,
3733 						   srv_buf_pool_chunk_unit));
3734 			srv_buf_pool_instances = std::min<ulong>(
3735 				MAX_BUFFER_POOLS,
3736 				ulong(srv_buf_pool_size
3737 				      / srv_buf_pool_chunk_unit));
3738 #else /* defined(_WIN32) && !defined(_WIN64) */
3739 			/* Default to 8 instances when size > 1GB. */
3740 			srv_buf_pool_instances = 8;
3741 #endif /* defined(_WIN32) && !defined(_WIN64) */
3742 		}
3743 	} else {
3744 		/* If buffer pool is less than 1 GiB, assume fewer
3745 		threads. Also use only one buffer pool instance. */
3746 		if (srv_buf_pool_instances != srv_buf_pool_instances_default
3747 		    && srv_buf_pool_instances != 1) {
3748 			/* We can't distinguish whether the user has explicitly
3749 			started mysqld with --innodb-buffer-pool-instances=0,
3750 			(srv_buf_pool_instances_default is 0) or has not
3751 			specified that option at all. Thus we have the
3752 			limitation that if the user started with =0, we
3753 			will not emit a warning here, but we should actually
3754 			do so. */
3755 			ib::info()
3756 				<< "Adjusting innodb_buffer_pool_instances"
3757 				" from " << srv_buf_pool_instances << " to 1"
3758 				" since innodb_buffer_pool_size is less than "
3759 				<< BUF_POOL_SIZE_THRESHOLD / (1024 * 1024)
3760 				<< " MiB";
3761 		}
3762 
3763 		srv_buf_pool_instances = 1;
3764 	}
3765 
3766 	if (srv_buf_pool_chunk_unit * srv_buf_pool_instances
3767 	    > srv_buf_pool_size) {
3768 		/* Size unit of buffer pool is larger than srv_buf_pool_size.
3769 		adjust srv_buf_pool_chunk_unit for srv_buf_pool_size. */
3770 		srv_buf_pool_chunk_unit
3771 			= static_cast<ulong>(srv_buf_pool_size)
3772 			  / srv_buf_pool_instances;
3773 		if (srv_buf_pool_size % srv_buf_pool_instances != 0) {
3774 			++srv_buf_pool_chunk_unit;
3775 		}
3776 	}
3777 
3778 	srv_buf_pool_size = buf_pool_size_align(srv_buf_pool_size);
3779 	innobase_buffer_pool_size = srv_buf_pool_size;
3780 }
3781 
3782 /** Initialize, validate and normalize the InnoDB startup parameters.
3783 @return failure code
3784 @retval 0 on success
3785 @retval HA_ERR_OUT_OF_MEM	when out of memory
3786 @retval HA_ERR_INITIALIZATION	when some parameters are out of range */
innodb_init_params()3787 static int innodb_init_params()
3788 {
3789 	DBUG_ENTER("innodb_init_params");
3790 
3791 	static char	current_dir[3];
3792 	char		*default_path;
3793 	ulong		num_pll_degree;
3794 
3795 	if (innodb_large_prefix || innodb_file_format) {
3796 		const char* p = innodb_file_format
3797 			? "file_format"
3798 			: "large_prefix";
3799 		sql_print_warning("The parameter innodb_%s is deprecated"
3800 				  " and has no effect."
3801 				  " It may be removed in future releases."
3802 				  " See https://mariadb.com/kb/en/library/"
3803 				  "xtradbinnodb-file-format/", p);
3804 	}
3805 
3806 	/* Check that values don't overflow on 32-bit systems. */
3807 	if (sizeof(ulint) == 4) {
3808 		if (innobase_buffer_pool_size > UINT_MAX32) {
3809 			sql_print_error(
3810 				"innodb_buffer_pool_size can't be over 4GB"
3811 				" on 32-bit systems");
3812 			DBUG_RETURN(HA_ERR_OUT_OF_MEM);
3813 		}
3814 	}
3815 
3816 	/* The buffer pool needs to be able to accommodate enough many
3817 	pages, even for larger pages */
3818 	MYSQL_SYSVAR_NAME(buffer_pool_size).min_val= min_buffer_pool_size();
3819 
3820 	if (innobase_buffer_pool_size < MYSQL_SYSVAR_NAME(buffer_pool_size).min_val) {
3821 		ib::error() << "innodb_page_size="
3822 			<< srv_page_size << " requires "
3823 			<< "innodb_buffer_pool_size >= "
3824 			<< (MYSQL_SYSVAR_NAME(buffer_pool_size).min_val >> 20)
3825 			<< "MiB current " << (innobase_buffer_pool_size >> 20)
3826 			<< "MiB";
3827 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3828 	}
3829 
3830 	if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS) {
3831 		ib::warn() << "The parameter innodb_lock_schedule_algorithm"
3832 			" is deprecated, and the setting"
3833 			" innodb_lock_schedule_algorithm=vats"
3834 			" may cause corruption. The parameter may be removed"
3835 			" in future releases.";
3836 
3837 #ifdef WITH_WSREP
3838 		/* Currently, Galera does not support VATS lock schedule algorithm. */
3839 		if (global_system_variables.wsrep_on) {
3840 			ib::info() << "For Galera, using innodb_lock_schedule_algorithm=fcfs";
3841 			innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;
3842 		}
3843 #endif /* WITH_WSREP */
3844 	}
3845 
3846 #ifdef WITH_WSREP
3847 	/* Print deprecation info if xtrabackup is used for SST method */
3848 	if (global_system_variables.wsrep_on
3849 	    && wsrep_sst_method
3850 	    && (!strcmp(wsrep_sst_method, "xtrabackup")
3851 	        || !strcmp(wsrep_sst_method, "xtrabackup-v2"))) {
3852 		ib::info() << "Galera SST method xtrabackup is deprecated and the "
3853 			" support for it may be removed in future releases.";
3854 	}
3855 #endif /* WITH_WSREP */
3856 
3857 #ifndef HAVE_LZ4
3858 	if (innodb_compression_algorithm == PAGE_LZ4_ALGORITHM) {
3859 		sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3860 				"InnoDB: liblz4 is not installed. \n",
3861 				innodb_compression_algorithm);
3862 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3863 	}
3864 #endif
3865 
3866 #ifndef HAVE_LZO
3867 	if (innodb_compression_algorithm == PAGE_LZO_ALGORITHM) {
3868 		sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3869 				"InnoDB: liblzo is not installed. \n",
3870 				innodb_compression_algorithm);
3871 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3872 	}
3873 #endif
3874 
3875 #ifndef HAVE_LZMA
3876 	if (innodb_compression_algorithm == PAGE_LZMA_ALGORITHM) {
3877 		sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3878 				"InnoDB: liblzma is not installed. \n",
3879 				innodb_compression_algorithm);
3880 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3881 	}
3882 #endif
3883 
3884 #ifndef HAVE_BZIP2
3885 	if (innodb_compression_algorithm == PAGE_BZIP2_ALGORITHM) {
3886 		sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3887 				"InnoDB: libbz2 is not installed. \n",
3888 				innodb_compression_algorithm);
3889 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3890 	}
3891 #endif
3892 
3893 #ifndef HAVE_SNAPPY
3894 	if (innodb_compression_algorithm == PAGE_SNAPPY_ALGORITHM) {
3895 		sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3896 				"InnoDB: libsnappy is not installed. \n",
3897 				innodb_compression_algorithm);
3898 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3899 	}
3900 #endif
3901 
3902 	if ((srv_encrypt_tables || srv_encrypt_log
3903 	     || innodb_encrypt_temporary_tables)
3904 	     && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) {
3905 		sql_print_error("InnoDB: cannot enable encryption, "
3906 				"encryption plugin is not available");
3907 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3908 	}
3909 
3910 #ifdef _WIN32
3911 	if (!is_filename_allowed(srv_buf_dump_filename,
3912 				 strlen(srv_buf_dump_filename), FALSE)) {
3913 		sql_print_error("InnoDB: innodb_buffer_pool_filename"
3914 			" cannot have colon (:) in the file name.");
3915 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3916 	}
3917 #endif
3918 
3919 	/* First calculate the default path for innodb_data_home_dir etc.,
3920 	in case the user has not given any value.
3921 
3922 	Note that when using the embedded server, the datadirectory is not
3923 	necessarily the current directory of this program. */
3924 
3925 	if (mysqld_embedded) {
3926 		default_path = mysql_real_data_home;
3927 	} else {
3928 		/* It's better to use current lib, to keep paths short */
3929 		current_dir[0] = FN_CURLIB;
3930 		current_dir[1] = FN_LIBCHAR;
3931 		current_dir[2] = 0;
3932 		default_path = current_dir;
3933 	}
3934 
3935 	ut_a(default_path);
3936 
3937 	fil_path_to_mysql_datadir = default_path;
3938 
3939 	/* Set InnoDB initialization parameters according to the values
3940 	read from MySQL .cnf file */
3941 
3942 	/* The default dir for data files is the datadir of MySQL */
3943 
3944 	srv_data_home = innobase_data_home_dir
3945 		? innobase_data_home_dir : default_path;
3946 #ifdef WITH_WSREP
3947 	/* If we use the wsrep API, then we need to tell the server
3948 	the path to the data files (for passing it to the SST scripts): */
3949 	wsrep_set_data_home_dir(srv_data_home);
3950 #endif /* WITH_WSREP */
3951 
3952 
3953 	/*--------------- Shared tablespaces -------------------------*/
3954 
3955 	/* Check that the value of system variable innodb_page_size was
3956 	set correctly.  Its value was put into srv_page_size. If valid,
3957 	return the associated srv_page_size_shift. */
3958 	srv_page_size_shift = innodb_page_size_validate(srv_page_size);
3959 	if (!srv_page_size_shift) {
3960 		sql_print_error("InnoDB: Invalid page size=%lu.\n",
3961 				srv_page_size);
3962 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3963 	}
3964 
3965 	/* This is the first time univ_page_size is used.
3966 	It was initialized to 16k pages before srv_page_size was set */
3967 	univ_page_size.copy_from(
3968 		page_size_t(srv_page_size, srv_page_size, false));
3969 
3970 	srv_sys_space.set_space_id(TRX_SYS_SPACE);
3971 	srv_sys_space.set_flags(FSP_FLAGS_PAGE_SSIZE());
3972 	srv_sys_space.set_name("innodb_system");
3973 	srv_sys_space.set_path(srv_data_home);
3974 
3975 	/* Supports raw devices */
3976 	if (!srv_sys_space.parse_params(innobase_data_file_path, true)) {
3977 		ib::error() << "Unable to parse innodb_data_file_path="
3978 			    << innobase_data_file_path;
3979 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3980 	}
3981 
3982 	srv_tmp_space.set_name("innodb_temporary");
3983 	srv_tmp_space.set_path(srv_data_home);
3984 	srv_tmp_space.set_flags(FSP_FLAGS_PAGE_SSIZE());
3985 
3986 	if (!srv_tmp_space.parse_params(innobase_temp_data_file_path, false)) {
3987 		ib::error() << "Unable to parse innodb_temp_data_file_path="
3988 			    << innobase_temp_data_file_path;
3989 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3990 	}
3991 
3992 	/* Perform all sanity check before we take action of deleting files*/
3993 	if (srv_sys_space.intersection(&srv_tmp_space)) {
3994 		sql_print_error("%s and %s file names seem to be the same.",
3995 			srv_tmp_space.name(), srv_sys_space.name());
3996 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3997 	}
3998 
3999 	srv_sys_space.normalize_size();
4000 	srv_tmp_space.normalize_size();
4001 
4002 	/* ------------ UNDO tablespaces files ---------------------*/
4003 	if (!srv_undo_dir) {
4004 		srv_undo_dir = default_path;
4005 	}
4006 
4007 	os_normalize_path(srv_undo_dir);
4008 
4009 	if (strchr(srv_undo_dir, ';')) {
4010 		sql_print_error("syntax error in innodb_undo_directory");
4011 		DBUG_RETURN(HA_ERR_INITIALIZATION);
4012 	}
4013 
4014 	/* -------------- All log files ---------------------------*/
4015 
4016 	/* The default dir for log files is the datadir of MySQL */
4017 
4018 	if (!srv_log_group_home_dir) {
4019 		srv_log_group_home_dir = default_path;
4020 	}
4021 
4022 	os_normalize_path(srv_log_group_home_dir);
4023 
4024 	if (strchr(srv_log_group_home_dir, ';')) {
4025 		sql_print_error("syntax error in innodb_log_group_home_dir");
4026 		DBUG_RETURN(HA_ERR_INITIALIZATION);
4027 	}
4028 
4029 	if (srv_n_log_files * srv_log_file_size >= log_group_max_size) {
4030 		/* Log group size is limited by the size of page number.
4031 		Remove this limitation when fil_io() is not used for
4032 		recovery log io. */
4033 		ib::error() << "Combined size of log files must be < "
4034 			<< log_group_max_size;
4035 		DBUG_RETURN(HA_ERR_INITIALIZATION);
4036 	}
4037 
4038 	DBUG_ASSERT(innodb_change_buffering <= IBUF_USE_ALL);
4039 
4040 	/* Check that interdependent parameters have sane values. */
4041 	if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) {
4042 		sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm"
4043 				  " cannot be set higher than"
4044 				  " innodb_max_dirty_pages_pct.\n"
4045 				  "InnoDB: Setting"
4046 				  " innodb_max_dirty_pages_pct_lwm to %lf\n",
4047 				  srv_max_buf_pool_modified_pct);
4048 
4049 		srv_max_dirty_pages_pct_lwm = srv_max_buf_pool_modified_pct;
4050 	}
4051 
4052 	if (srv_max_io_capacity == SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT) {
4053 
4054 		if (srv_io_capacity >= SRV_MAX_IO_CAPACITY_LIMIT / 2) {
4055 			/* Avoid overflow. */
4056 			srv_max_io_capacity = SRV_MAX_IO_CAPACITY_LIMIT;
4057 		} else {
4058 			/* The user has not set the value. We should
4059 			set it based on innodb_io_capacity. */
4060 			srv_max_io_capacity =
4061 				ut_max(2 * srv_io_capacity, 2000UL);
4062 		}
4063 
4064 	} else if (srv_max_io_capacity < srv_io_capacity) {
4065 		sql_print_warning("InnoDB: innodb_io_capacity"
4066 				  " cannot be set higher than"
4067 				  " innodb_io_capacity_max."
4068 				  "Setting innodb_io_capacity=%lu",
4069 				  srv_max_io_capacity);
4070 
4071 		srv_io_capacity = srv_max_io_capacity;
4072 	}
4073 
4074 	if (UNIV_PAGE_SIZE_DEF != srv_page_size) {
4075 		ib::info() << "innodb_page_size=" << srv_page_size;
4076 
4077 		srv_max_undo_log_size = std::max(
4078 			srv_max_undo_log_size,
4079 			ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
4080 			<< srv_page_size_shift);
4081 	}
4082 
4083 	if (srv_log_write_ahead_size > srv_page_size) {
4084 		srv_log_write_ahead_size = srv_page_size;
4085 	} else {
4086 		ulong	srv_log_write_ahead_size_tmp = OS_FILE_LOG_BLOCK_SIZE;
4087 
4088 		while (srv_log_write_ahead_size_tmp
4089 		       < srv_log_write_ahead_size) {
4090 			srv_log_write_ahead_size_tmp
4091 				= srv_log_write_ahead_size_tmp * 2;
4092 		}
4093 		if (srv_log_write_ahead_size_tmp
4094 		    != srv_log_write_ahead_size) {
4095 			srv_log_write_ahead_size
4096 				= srv_log_write_ahead_size_tmp / 2;
4097 		}
4098 	}
4099 
4100 	srv_buf_pool_size = ulint(innobase_buffer_pool_size);
4101 
4102 	if (!innobase_use_checksums) {
4103 		ib::warn() << "Setting innodb_checksums to OFF is DEPRECATED."
4104 			" This option was removed in MariaDB 10.5.";
4105 		srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_NONE;
4106 	} else {
4107 		switch (srv_checksum_algorithm) {
4108 		case SRV_CHECKSUM_ALGORITHM_CRC32:
4109 		case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
4110 			break;
4111 		default:
4112 			ib::warn() << deprecated_innodb_checksum_algorithm;
4113 		}
4114 	}
4115 
4116 	innodb_log_checksums = innodb_log_checksums_func_update(
4117 		NULL, innodb_log_checksums);
4118 
4119 	row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout;
4120 
4121 	srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
4122 	if (innobase_locks_unsafe_for_binlog) {
4123 		ib::warn() << "Using innodb_locks_unsafe_for_binlog is"
4124 			" DEPRECATED. This option may be removed in future"
4125 			" releases. Please use READ COMMITTED transaction"
4126 			" isolation level instead; " << SET_TRANSACTION_MSG;
4127 	}
4128 
4129 	if (innobase_open_files < 10) {
4130 		innobase_open_files = 300;
4131 		if (srv_file_per_table && tc_size > 300 && tc_size < open_files_limit) {
4132 			innobase_open_files = tc_size;
4133 		}
4134 	}
4135 
4136 	if (innobase_open_files > open_files_limit) {
4137 		ib::warn() << "innodb_open_files " << innobase_open_files
4138 			   << " should not be greater"
4139 			   << " than the open_files_limit " << open_files_limit;
4140 		if (innobase_open_files > tc_size) {
4141 			innobase_open_files = tc_size;
4142 		}
4143 	}
4144 
4145 	srv_max_n_open_files = innobase_open_files;
4146 	srv_innodb_status = (ibool) innobase_create_status_file;
4147 
4148 	srv_print_verbose_log = mysqld_embedded ? 0 : 1;
4149 
4150 	/* Round up fts_sort_pll_degree to nearest power of 2 number */
4151 	for (num_pll_degree = 1;
4152 	     num_pll_degree < fts_sort_pll_degree;
4153 	     num_pll_degree <<= 1) {
4154 
4155 		/* No op */
4156 	}
4157 
4158 	fts_sort_pll_degree = num_pll_degree;
4159 
4160 	/* Store the default charset-collation number of this MySQL
4161 	installation */
4162 
4163 	data_mysql_default_charset_coll = (ulint) default_charset_info->number;
4164 
4165 	innobase_commit_concurrency_init_default();
4166 
4167 	if (innodb_idle_flush_pct != 100) {
4168 		ib::warn() << deprecated_idle_flush_pct;
4169 	}
4170 
4171 #ifndef _WIN32
4172 	if (srv_use_atomic_writes && my_may_have_atomic_write) {
4173 		/*
4174                   Force O_DIRECT on Unixes (on Windows writes are always
4175                   unbuffered)
4176                 */
4177 		switch (innodb_flush_method) {
4178 		case SRV_O_DIRECT:
4179 		case SRV_O_DIRECT_NO_FSYNC:
4180 			break;
4181 		default:
4182 			innodb_flush_method = SRV_O_DIRECT;
4183 			fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n");
4184 		}
4185 	}
4186 #endif
4187 
4188 	if (srv_read_only_mode) {
4189 		ib::info() << "Started in read only mode";
4190 		srv_use_doublewrite_buf = FALSE;
4191 	}
4192 
4193 #ifdef LINUX_NATIVE_AIO
4194 	if (srv_use_native_aio) {
4195 		ib::info() << "Using Linux native AIO";
4196 	}
4197 #elif !defined _WIN32
4198 	/* Currently native AIO is supported only on windows and linux
4199 	and that also when the support is compiled in. In all other
4200 	cases, we ignore the setting of innodb_use_native_aio. */
4201 	srv_use_native_aio = FALSE;
4202 #endif
4203 
4204 #ifndef _WIN32
4205 	ut_ad(innodb_flush_method <= SRV_O_DIRECT_NO_FSYNC);
4206 #else
4207 	switch (innodb_flush_method) {
4208 	case SRV_ALL_O_DIRECT_FSYNC + 1 /* "async_unbuffered"="unbuffered" */:
4209 		innodb_flush_method = SRV_ALL_O_DIRECT_FSYNC;
4210 		break;
4211 	case SRV_ALL_O_DIRECT_FSYNC + 2 /* "normal"="fsync" */:
4212 		innodb_flush_method = SRV_FSYNC;
4213 		break;
4214 	default:
4215 		ut_ad(innodb_flush_method <= SRV_ALL_O_DIRECT_FSYNC);
4216 	}
4217 #endif
4218 	srv_file_flush_method = srv_flush_t(innodb_flush_method);
4219 
4220 	innodb_buffer_pool_size_init();
4221 
4222 	if (srv_n_page_cleaners > srv_buf_pool_instances) {
4223 		/* limit of page_cleaner parallelizability
4224 		is number of buffer pool instances. */
4225 		srv_n_page_cleaners = srv_buf_pool_instances;
4226 	}
4227 
4228 	srv_lock_table_size = 5 * (srv_buf_pool_size >> srv_page_size_shift);
4229 	DBUG_RETURN(0);
4230 }
4231 
4232 /** Initialize the InnoDB storage engine plugin.
4233 @param[in,out]	p	InnoDB handlerton
4234 @return error code
4235 @retval 0 on success */
innodb_init(void * p)4236 static int innodb_init(void* p)
4237 {
4238 	DBUG_ENTER("innodb_init");
4239 	handlerton* innobase_hton= static_cast<handlerton*>(p);
4240 	innodb_hton_ptr = innobase_hton;
4241 
4242 	innobase_hton->state = SHOW_OPTION_YES;
4243 	innobase_hton->db_type = DB_TYPE_INNODB;
4244 	innobase_hton->savepoint_offset = sizeof(trx_named_savept_t);
4245 	innobase_hton->close_connection = innobase_close_connection;
4246 	innobase_hton->kill_query = innobase_kill_query;
4247 	innobase_hton->savepoint_set = innobase_savepoint;
4248 	innobase_hton->savepoint_rollback = innobase_rollback_to_savepoint;
4249 
4250 	innobase_hton->savepoint_rollback_can_release_mdl =
4251 				innobase_rollback_to_savepoint_can_release_mdl;
4252 
4253 	innobase_hton->savepoint_release = innobase_release_savepoint;
4254 	innobase_hton->prepare_ordered= NULL;
4255 	innobase_hton->commit_ordered= innobase_commit_ordered;
4256 	innobase_hton->commit = innobase_commit;
4257 	innobase_hton->rollback = innobase_rollback;
4258 	innobase_hton->prepare = innobase_xa_prepare;
4259 	innobase_hton->recover = innobase_xa_recover;
4260 	innobase_hton->commit_by_xid = innobase_commit_by_xid;
4261 	innobase_hton->rollback_by_xid = innobase_rollback_by_xid;
4262 	innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
4263 	innobase_hton->create = innobase_create_handler;
4264 
4265 	innobase_hton->drop_database = innobase_drop_database;
4266 	innobase_hton->panic = innobase_end;
4267 
4268 	innobase_hton->start_consistent_snapshot =
4269 		innobase_start_trx_and_assign_read_view;
4270 
4271 	innobase_hton->flush_logs = innobase_flush_logs;
4272 	innobase_hton->show_status = innobase_show_status;
4273 	innobase_hton->flags =
4274 		HTON_SUPPORTS_EXTENDED_KEYS | HTON_SUPPORTS_FOREIGN_KEYS |
4275 		HTON_NATIVE_SYS_VERSIONING |
4276 		HTON_REQUIRES_CLOSE_AFTER_TRUNCATE;
4277 
4278 #ifdef WITH_WSREP
4279 	innobase_hton->abort_transaction=wsrep_abort_transaction;
4280 	innobase_hton->set_checkpoint=innobase_wsrep_set_checkpoint;
4281 	innobase_hton->get_checkpoint=innobase_wsrep_get_checkpoint;
4282 	innobase_hton->fake_trx_id=wsrep_fake_trx_id;
4283 #endif /* WITH_WSREP */
4284 
4285 	innobase_hton->tablefile_extensions = ha_innobase_exts;
4286 	innobase_hton->table_options = innodb_table_option_list;
4287 
4288 	/* System Versioning */
4289 	innobase_hton->prepare_commit_versioned
4290 		= innodb_prepare_commit_versioned;
4291 
4292 	innodb_remember_check_sysvar_funcs();
4293 
4294 	compile_time_assert(DATA_MYSQL_TRUE_VARCHAR == MYSQL_TYPE_VARCHAR);
4295 
4296 #ifndef DBUG_OFF
4297 	static const char	test_filename[] = "-@";
4298 	char			test_tablename[sizeof test_filename
4299 				+ sizeof(srv_mysql50_table_name_prefix) - 1];
4300 	DBUG_ASSERT(sizeof test_tablename - 1
4301 		    == filename_to_tablename(test_filename,
4302 					     test_tablename,
4303 					     sizeof test_tablename, true));
4304 	DBUG_ASSERT(!strncmp(test_tablename,
4305 			     srv_mysql50_table_name_prefix,
4306 			     sizeof srv_mysql50_table_name_prefix - 1));
4307 	DBUG_ASSERT(!strcmp(test_tablename
4308 			    + sizeof srv_mysql50_table_name_prefix - 1,
4309 			    test_filename));
4310 #endif /* DBUG_OFF */
4311 
4312 	os_file_set_umask(my_umask);
4313 
4314 	/* Setup the memory alloc/free tracing mechanisms before calling
4315 	any functions that could possibly allocate memory. */
4316 	ut_new_boot();
4317 
4318 	if (int error = innodb_init_params()) {
4319 		DBUG_RETURN(error);
4320 	}
4321 
4322 	/* After this point, error handling has to use
4323 	innodb_init_abort(). */
4324 
4325 #ifdef HAVE_PSI_INTERFACE
4326 	/* Register keys with MySQL performance schema */
4327 	int	count;
4328 
4329 	count = array_elements(all_pthread_mutexes);
4330 	mysql_mutex_register("innodb", all_pthread_mutexes, count);
4331 
4332 # ifdef UNIV_PFS_MUTEX
4333 	count = array_elements(all_innodb_mutexes);
4334 	mysql_mutex_register("innodb", all_innodb_mutexes, count);
4335 # endif /* UNIV_PFS_MUTEX */
4336 
4337 # ifdef UNIV_PFS_RWLOCK
4338 	count = array_elements(all_innodb_rwlocks);
4339 	mysql_rwlock_register("innodb", all_innodb_rwlocks, count);
4340 # endif /* UNIV_PFS_MUTEX */
4341 
4342 # ifdef UNIV_PFS_THREAD
4343 	count = array_elements(all_innodb_threads);
4344 	mysql_thread_register("innodb", all_innodb_threads, count);
4345 # endif /* UNIV_PFS_THREAD */
4346 
4347 # ifdef UNIV_PFS_IO
4348 	count = array_elements(all_innodb_files);
4349 	mysql_file_register("innodb", all_innodb_files, count);
4350 # endif /* UNIV_PFS_IO */
4351 
4352 	count = array_elements(all_innodb_conds);
4353 	mysql_cond_register("innodb", all_innodb_conds, count);
4354 #endif /* HAVE_PSI_INTERFACE */
4355 
4356 	bool	create_new_db = false;
4357 
4358 	/* Check whether the data files exist. */
4359 	dberr_t	err = srv_sys_space.check_file_spec(&create_new_db, 5U << 20);
4360 
4361 	if (err != DB_SUCCESS) {
4362 		DBUG_RETURN(innodb_init_abort());
4363 	}
4364 
4365 	err = srv_start(create_new_db);
4366 
4367 	if (err != DB_SUCCESS) {
4368 		innodb_shutdown();
4369 		DBUG_RETURN(innodb_init_abort());
4370 	} else if (!srv_read_only_mode) {
4371 		mysql_thread_create(thd_destructor_thread_key,
4372 				    &thd_destructor_thread,
4373 				    NULL, thd_destructor_proxy, NULL);
4374 		while (!my_atomic_loadptr_explicit(reinterpret_cast<void**>
4375 						   (&srv_running),
4376 						   MY_MEMORY_ORDER_RELAXED))
4377 			os_thread_sleep(20);
4378 	}
4379 
4380 	srv_was_started = true;
4381 	innodb_params_adjust();
4382 
4383 	innobase_old_blocks_pct = static_cast<uint>(
4384 		buf_LRU_old_ratio_update(innobase_old_blocks_pct, TRUE));
4385 
4386 	ibuf_max_size_update(srv_change_buffer_max_size);
4387 
4388 	mysql_mutex_init(commit_cond_mutex_key,
4389 			 &commit_cond_m, MY_MUTEX_INIT_FAST);
4390 	mysql_cond_init(commit_cond_key, &commit_cond, 0);
4391 	mysql_mutex_init(pending_checkpoint_mutex_key,
4392 			 &pending_checkpoint_mutex,
4393 			 MY_MUTEX_INIT_FAST);
4394 #ifdef MYSQL_DYNAMIC_PLUGIN
4395 	if (innobase_hton != p) {
4396 		innobase_hton = reinterpret_cast<handlerton*>(p);
4397 		*innobase_hton = *innodb_hton_ptr;
4398 	}
4399 #endif /* MYSQL_DYNAMIC_PLUGIN */
4400 
4401 	/* Currently, monitor counter information are not persistent. */
4402 	memset(monitor_set_tbl, 0, sizeof monitor_set_tbl);
4403 
4404 	memset(innodb_counter_value, 0, sizeof innodb_counter_value);
4405 
4406 	/* Do this as late as possible so server is fully starts up,
4407 	since  we might get some initial stats if user choose to turn
4408 	on some counters from start up */
4409 	if (innobase_enable_monitor_counter) {
4410 		innodb_enable_monitor_at_startup(
4411 			innobase_enable_monitor_counter);
4412 	}
4413 
4414 	/* Turn on monitor counters that are default on */
4415 	srv_mon_default_on();
4416 
4417 	/* Unit Tests */
4418 #ifdef UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
4419 	unit_test_os_file_get_parent_dir();
4420 #endif /* UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR */
4421 
4422 #ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
4423 	test_make_filepath();
4424 #endif /*UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
4425 
4426 #ifdef UNIV_ENABLE_DICT_STATS_TEST
4427 	test_dict_stats_all();
4428 #endif /*UNIV_ENABLE_DICT_STATS_TEST */
4429 
4430 #ifdef UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT
4431 # ifdef HAVE_UT_CHRONO_T
4432 	test_row_raw_format_int();
4433 # endif /* HAVE_UT_CHRONO_T */
4434 #endif /* UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT */
4435 
4436 	DBUG_RETURN(0);
4437 }
4438 
4439 /** Shut down the InnoDB storage engine.
4440 @return	0 */
4441 static
4442 int
innobase_end(handlerton *,ha_panic_function)4443 innobase_end(handlerton*, ha_panic_function)
4444 {
4445 	DBUG_ENTER("innobase_end");
4446 
4447 	if (srv_was_started) {
4448 		THD *thd= current_thd;
4449 		if (thd) { // may be UNINSTALL PLUGIN statement
4450 		 	if (trx_t* trx = thd_to_trx(thd)) {
4451 				trx->free();
4452 		 	}
4453 		}
4454 
4455 		st_my_thread_var* r = reinterpret_cast<st_my_thread_var*>(
4456 			my_atomic_loadptr_explicit(
4457 			reinterpret_cast<void**>(&srv_running),
4458 			MY_MEMORY_ORDER_RELAXED));
4459 		if (r) {
4460 			ut_ad(!srv_read_only_mode);
4461 			if (!abort_loop) {
4462 				// may be UNINSTALL PLUGIN statement
4463 				mysql_mutex_lock(r->current_mutex);
4464 				r->abort = 1;
4465 				mysql_cond_broadcast(r->current_cond);
4466 				mysql_mutex_unlock(r->current_mutex);
4467 			}
4468 			pthread_join(thd_destructor_thread, NULL);
4469 		}
4470 
4471 		innodb_shutdown();
4472 
4473 		mysql_mutex_destroy(&commit_cond_m);
4474 		mysql_cond_destroy(&commit_cond);
4475 		mysql_mutex_destroy(&pending_checkpoint_mutex);
4476 	}
4477 
4478 	DBUG_RETURN(0);
4479 }
4480 
4481 /*****************************************************************//**
4482 Commits a transaction in an InnoDB database. */
4483 void
innobase_commit_low(trx_t * trx)4484 innobase_commit_low(
4485 /*================*/
4486 	trx_t*	trx)	/*!< in: transaction handle */
4487 {
4488 #ifdef WITH_WSREP
4489 	const char* tmp = 0;
4490 	const bool is_wsrep = trx->is_wsrep();
4491 	THD* thd = trx->mysql_thd;
4492 	if (is_wsrep) {
4493 		tmp = thd_proc_info(thd, "innobase_commit_low()");
4494 	}
4495 #endif /* WITH_WSREP */
4496 	if (trx_is_started(trx)) {
4497 		trx_commit_for_mysql(trx);
4498 	} else {
4499 		trx->will_lock = false;
4500 #ifdef WITH_WSREP
4501 		trx->wsrep = false;
4502 #endif /* WITH_WSREP */
4503 	}
4504 
4505 #ifdef WITH_WSREP
4506 	if (is_wsrep) {
4507 		thd_proc_info(thd, tmp);
4508 	}
4509 #endif /* WITH_WSREP */
4510 }
4511 
4512 /*****************************************************************//**
4513 Creates an InnoDB transaction struct for the thd if it does not yet have one.
4514 Starts a new InnoDB transaction if a transaction is not yet started. And
4515 assigns a new snapshot for a consistent read if the transaction does not yet
4516 have one.
4517 @return 0 */
4518 static
4519 int
innobase_start_trx_and_assign_read_view(handlerton * hton,THD * thd)4520 innobase_start_trx_and_assign_read_view(
4521 /*====================================*/
4522 	handlerton*	hton,	/*!< in: InnoDB handlerton */
4523 	THD*		thd)	/*!< in: MySQL thread handle of the user for
4524 				whom the transaction should be committed */
4525 {
4526 	DBUG_ENTER("innobase_start_trx_and_assign_read_view");
4527 	DBUG_ASSERT(hton == innodb_hton_ptr);
4528 
4529 	/* Create a new trx struct for thd, if it does not yet have one */
4530 
4531 	trx_t*	trx = check_trx_exists(thd);
4532 
4533 	innobase_srv_conc_force_exit_innodb(trx);
4534 
4535 	/* The transaction should not be active yet, start it */
4536 
4537 	ut_ad(!trx_is_started(trx));
4538 
4539 	trx_start_if_not_started_xa(trx, false);
4540 
4541 	/* Assign a read view if the transaction does not have it yet.
4542 	Do this only if transaction is using REPEATABLE READ isolation
4543 	level. */
4544 	trx->isolation_level = innobase_map_isolation_level(
4545 		thd_get_trx_isolation(thd));
4546 
4547 	if (trx->isolation_level == TRX_ISO_REPEATABLE_READ) {
4548 		trx->read_view.open(trx);
4549 	} else {
4550 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4551 				    HA_ERR_UNSUPPORTED,
4552 				    "InnoDB: WITH CONSISTENT SNAPSHOT"
4553 				    " was ignored because this phrase"
4554 				    " can only be used with"
4555 				    " REPEATABLE READ isolation level.");
4556 	}
4557 
4558 	/* Set the MySQL flag to mark that there is an active transaction */
4559 
4560 	innobase_register_trx(hton, current_thd, trx);
4561 
4562 	DBUG_RETURN(0);
4563 }
4564 
4565 static
4566 void
innobase_commit_ordered_2(trx_t * trx,THD * thd)4567 innobase_commit_ordered_2(
4568 /*======================*/
4569 	trx_t*	trx, 	/*!< in: Innodb transaction */
4570 	THD*	thd)	/*!< in: MySQL thread handle */
4571 {
4572 	DBUG_ENTER("innobase_commit_ordered_2");
4573 
4574 	bool	read_only = trx->read_only || trx->id == 0;
4575 
4576 	if (!read_only) {
4577 
4578 		while (innobase_commit_concurrency > 0) {
4579 
4580 			mysql_mutex_lock(&commit_cond_m);
4581 
4582 			++commit_threads;
4583 
4584 			if (commit_threads
4585 				<= innobase_commit_concurrency) {
4586 
4587 				mysql_mutex_unlock(&commit_cond_m);
4588 				break;
4589 			}
4590 
4591 			--commit_threads;
4592 
4593 			mysql_cond_wait(&commit_cond, &commit_cond_m);
4594 
4595 			mysql_mutex_unlock(&commit_cond_m);
4596 		}
4597 
4598 		/* The following call reads the binary log position of
4599 		the transaction being committed.
4600 
4601 		Binary logging of other engines is not relevant to
4602 		InnoDB as all InnoDB requires is that committing
4603 		InnoDB transactions appear in the same order in the
4604 		MySQL binary log as they appear in InnoDB logs, which
4605 		is guaranteed by the server.
4606 
4607 		If the binary log is not enabled, or the transaction
4608 		is not written to the binary log, the file name will
4609 		be a NULL pointer. */
4610 		thd_binlog_pos(thd, &trx->mysql_log_file_name,
4611 			       &trx->mysql_log_offset);
4612 
4613 		/* Don't do write + flush right now. For group commit
4614 		to work we want to do the flush later. */
4615 		trx->flush_log_later = true;
4616 	}
4617 
4618 	innobase_commit_low(trx);
4619 
4620 	if (!read_only) {
4621 		trx->flush_log_later = false;
4622 
4623 		if (innobase_commit_concurrency > 0) {
4624 
4625 			mysql_mutex_lock(&commit_cond_m);
4626 
4627 			ut_ad(commit_threads > 0);
4628 			--commit_threads;
4629 
4630 			mysql_cond_signal(&commit_cond);
4631 
4632 			mysql_mutex_unlock(&commit_cond_m);
4633 		}
4634 	}
4635 
4636 	DBUG_VOID_RETURN;
4637 }
4638 
4639 /*****************************************************************//**
4640 Perform the first, fast part of InnoDB commit.
4641 
4642 Doing it in this call ensures that we get the same commit order here
4643 as in binlog and any other participating transactional storage engines.
4644 
4645 Note that we want to do as little as really needed here, as we run
4646 under a global mutex. The expensive fsync() is done later, in
4647 innobase_commit(), without a lock so group commit can take place.
4648 
4649 Note also that this method can be called from a different thread than
4650 the one handling the rest of the transaction. */
4651 static
4652 void
innobase_commit_ordered(handlerton * hton,THD * thd,bool all)4653 innobase_commit_ordered(
4654 /*====================*/
4655 	handlerton *hton, /*!< in: Innodb handlerton */
4656 	THD*	thd,	/*!< in: MySQL thread handle of the user for whom
4657 			the transaction should be committed */
4658 	bool	all)	/*!< in:	TRUE - commit transaction
4659 				FALSE - the current SQL statement ended */
4660 {
4661 	trx_t*		trx;
4662 	DBUG_ENTER("innobase_commit_ordered");
4663 	DBUG_ASSERT(hton == innodb_hton_ptr);
4664 
4665 	trx = check_trx_exists(thd);
4666 
4667 	if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
4668 		/* We cannot throw error here; instead we will catch this error
4669 		again in innobase_commit() and report it from there. */
4670 		DBUG_VOID_RETURN;
4671 	}
4672 
4673 	/* commit_ordered is only called when committing the whole transaction
4674 	(or an SQL statement when autocommit is on). */
4675 	DBUG_ASSERT(all ||
4676 		(!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
4677 
4678 	innobase_commit_ordered_2(trx, thd);
4679 	trx->active_commit_ordered = true;
4680 
4681 	DBUG_VOID_RETURN;
4682 }
4683 
4684 /*****************************************************************//**
4685 Commits a transaction in an InnoDB database or marks an SQL statement
4686 ended.
4687 @return 0 or deadlock error if the transaction was aborted by another
4688 	higher priority transaction. */
4689 static
4690 int
innobase_commit(handlerton * hton,THD * thd,bool commit_trx)4691 innobase_commit(
4692 /*============*/
4693 	handlerton*	hton,		/*!< in: InnoDB handlerton */
4694 	THD*		thd,		/*!< in: MySQL thread handle of the
4695 					user for whom the transaction should
4696 					be committed */
4697 	bool		commit_trx)	/*!< in: true - commit transaction
4698 					false - the current SQL statement
4699 					ended */
4700 {
4701 	DBUG_ENTER("innobase_commit");
4702 	DBUG_PRINT("enter", ("commit_trx: %d", commit_trx));
4703 	DBUG_ASSERT(hton == innodb_hton_ptr);
4704 	DBUG_PRINT("trans", ("ending transaction"));
4705 
4706 	trx_t*	trx = check_trx_exists(thd);
4707 
4708 	ut_ad(trx->dict_operation_lock_mode == 0);
4709 	ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
4710 
4711 	/* Transaction is deregistered only in a commit or a rollback. If
4712 	it is deregistered we know there cannot be resources to be freed
4713 	and we could return immediately.  For the time being, we play safe
4714 	and do the cleanup though there should be nothing to clean up. */
4715 
4716 	if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
4717 
4718 		sql_print_error("Transaction not registered for MariaDB 2PC,"
4719 				" but transaction is active");
4720 	}
4721 
4722 	bool	read_only = trx->read_only || trx->id == 0;
4723 	DBUG_PRINT("info", ("readonly: %d", read_only));
4724 
4725 	if (commit_trx
4726 	    || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
4727 
4728 		DBUG_EXECUTE_IF("crash_innodb_before_commit",
4729 				DBUG_SUICIDE(););
4730 
4731 		/* Run the fast part of commit if we did not already. */
4732 		if (!trx->active_commit_ordered) {
4733 			innobase_commit_ordered_2(trx, thd);
4734 
4735 		}
4736 
4737 		/* We were instructed to commit the whole transaction, or
4738 		this is an SQL statement end and autocommit is on */
4739 
4740 		/* At this point commit order is fixed and transaction is
4741 		visible to others. So we can wakeup other commits waiting for
4742 		this one, to allow then to group commit with us. */
4743 		thd_wakeup_subsequent_commits(thd, 0);
4744 
4745 		/* Now do a write + flush of logs. */
4746 		trx_commit_complete_for_mysql(trx);
4747 
4748 		trx_deregister_from_2pc(trx);
4749 	} else {
4750 		/* We just mark the SQL statement ended and do not do a
4751 		transaction commit */
4752 
4753 		/* If we had reserved the auto-inc lock for some
4754 		table in this SQL statement we release it now */
4755 
4756 		if (!read_only) {
4757 			lock_unlock_table_autoinc(trx);
4758 		}
4759 
4760 		/* Store the current undo_no of the transaction so that we
4761 		know where to roll back if we have to roll back the next
4762 		SQL statement */
4763 
4764 		trx_mark_sql_stat_end(trx);
4765 	}
4766 
4767 	/* Reset the number AUTO-INC rows required */
4768 	trx->n_autoinc_rows = 0;
4769 
4770 	/* This is a statement level variable. */
4771 	trx->fts_next_doc_id = 0;
4772 
4773 	innobase_srv_conc_force_exit_innodb(trx);
4774 
4775 	DBUG_RETURN(0);
4776 }
4777 
4778 /*****************************************************************//**
4779 Rolls back a transaction or the latest SQL statement.
4780 @return 0 or error number */
4781 static
4782 int
innobase_rollback(handlerton * hton,THD * thd,bool rollback_trx)4783 innobase_rollback(
4784 /*==============*/
4785 	handlerton*	hton,		/*!< in: InnoDB handlerton */
4786 	THD*		thd,		/*!< in: handle to the MySQL thread
4787 					of the user whose transaction should
4788 					be rolled back */
4789 	bool		rollback_trx)	/*!< in: TRUE - rollback entire
4790 					transaction FALSE - rollback the current
4791 					statement only */
4792 {
4793 	DBUG_ENTER("innobase_rollback");
4794 	DBUG_ASSERT(hton == innodb_hton_ptr);
4795 	DBUG_PRINT("trans", ("aborting transaction"));
4796 
4797 	trx_t*	trx = check_trx_exists(thd);
4798 
4799 	ut_ad(trx->dict_operation_lock_mode == 0);
4800 	ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
4801 
4802 	innobase_srv_conc_force_exit_innodb(trx);
4803 
4804 	/* Reset the number AUTO-INC rows required */
4805 
4806 	trx->n_autoinc_rows = 0;
4807 
4808 	/* If we had reserved the auto-inc lock for some table (if
4809 	we come here to roll back the latest SQL statement) we
4810 	release it now before a possibly lengthy rollback */
4811 	lock_unlock_table_autoinc(trx);
4812 
4813 	/* This is a statement level variable. */
4814 
4815 	trx->fts_next_doc_id = 0;
4816 
4817 	dberr_t		error;
4818 
4819 	if (rollback_trx
4820 	    || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
4821 
4822 		error = trx_rollback_for_mysql(trx);
4823 
4824 		trx_deregister_from_2pc(trx);
4825 	} else {
4826 
4827 		error = trx_rollback_last_sql_stat_for_mysql(trx);
4828 	}
4829 
4830 	DBUG_RETURN(convert_error_code_to_mysql(error, 0, trx->mysql_thd));
4831 }
4832 
4833 /*****************************************************************//**
4834 Rolls back a transaction
4835 @return 0 or error number */
4836 static
4837 int
innobase_rollback_trx(trx_t * trx)4838 innobase_rollback_trx(
4839 /*==================*/
4840 	trx_t*	trx)	/*!< in: transaction */
4841 {
4842 	DBUG_ENTER("innobase_rollback_trx");
4843 	DBUG_PRINT("trans", ("aborting transaction"));
4844 
4845 	innobase_srv_conc_force_exit_innodb(trx);
4846 
4847 	/* If we had reserved the auto-inc lock for some table (if
4848 	we come here to roll back the latest SQL statement) we
4849 	release it now before a possibly lengthy rollback */
4850 	lock_unlock_table_autoinc(trx);
4851 
4852 	if (!trx->has_logged()) {
4853 		trx->will_lock = false;
4854 #ifdef WITH_WSREP
4855 		trx->wsrep = false;
4856 #endif
4857 		DBUG_RETURN(0);
4858 	}
4859 
4860 	DBUG_RETURN(convert_error_code_to_mysql(trx_rollback_for_mysql(trx),
4861 						0, trx->mysql_thd));
4862 }
4863 
4864 
4865 struct pending_checkpoint {
4866 	struct pending_checkpoint *next;
4867 	handlerton *hton;
4868 	void *cookie;
4869 	ib_uint64_t lsn;
4870 };
4871 static struct pending_checkpoint *pending_checkpoint_list;
4872 static struct pending_checkpoint *pending_checkpoint_list_end;
4873 
4874 /*****************************************************************//**
4875 Handle a commit checkpoint request from server layer.
4876 We put the request in a queue, so that we can notify upper layer about
4877 checkpoint complete when we have flushed the redo log.
4878 If we have already flushed all relevant redo log, we notify immediately.*/
4879 static
4880 void
innobase_checkpoint_request(handlerton * hton,void * cookie)4881 innobase_checkpoint_request(
4882 	handlerton *hton,
4883 	void *cookie)
4884 {
4885 	ib_uint64_t			lsn;
4886 	ib_uint64_t			flush_lsn;
4887 	struct pending_checkpoint *	entry;
4888 
4889 	/* Do the allocation outside of lock to reduce contention. The normal
4890 	case is that not everything is flushed, so we will need to enqueue. */
4891 	entry = static_cast<struct pending_checkpoint *>
4892 		(my_malloc(sizeof(*entry), MYF(MY_WME)));
4893 	if (!entry) {
4894 		sql_print_error("Failed to allocate %u bytes."
4895 				" Commit checkpoint will be skipped.",
4896 				static_cast<unsigned>(sizeof(*entry)));
4897 		return;
4898 	}
4899 
4900 	entry->next = NULL;
4901 	entry->hton = hton;
4902 	entry->cookie = cookie;
4903 
4904 	mysql_mutex_lock(&pending_checkpoint_mutex);
4905 	lsn = log_get_lsn();
4906 	flush_lsn = log_get_flush_lsn();
4907 	if (lsn > flush_lsn) {
4908 		/* Put the request in queue.
4909 		When the log gets flushed past the lsn, we will remove the
4910 		entry from the queue and notify the upper layer. */
4911 		entry->lsn = lsn;
4912 		if (pending_checkpoint_list_end) {
4913 			pending_checkpoint_list_end->next = entry;
4914 			/* There is no need to order the entries in the list
4915 			by lsn. The upper layer can accept notifications in
4916 			any order, and short delays in notifications do not
4917 			significantly impact performance. */
4918 		} else {
4919 			pending_checkpoint_list = entry;
4920 		}
4921 		pending_checkpoint_list_end = entry;
4922 		entry = NULL;
4923 	}
4924 	mysql_mutex_unlock(&pending_checkpoint_mutex);
4925 
4926 	if (entry) {
4927 		/* We are already flushed. Notify the checkpoint immediately. */
4928 		commit_checkpoint_notify_ha(entry->hton, entry->cookie);
4929 		my_free(entry);
4930 	}
4931 }
4932 
4933 /*****************************************************************//**
4934 Log code calls this whenever log has been written and/or flushed up
4935 to a new position. We use this to notify upper layer of a new commit
4936 checkpoint when necessary.*/
4937 UNIV_INTERN
4938 void
innobase_mysql_log_notify(ib_uint64_t flush_lsn)4939 innobase_mysql_log_notify(
4940 /*======================*/
4941 	ib_uint64_t	flush_lsn)	/*!< in: LSN flushed to disk */
4942 {
4943 	struct pending_checkpoint *	pending;
4944 	struct pending_checkpoint *	entry;
4945 	struct pending_checkpoint *	last_ready;
4946 
4947 	/* It is safe to do a quick check for NULL first without lock.
4948 	Even if we should race, we will at most skip one checkpoint and
4949 	take the next one, which is harmless. */
4950 	if (!pending_checkpoint_list)
4951 		return;
4952 
4953 	mysql_mutex_lock(&pending_checkpoint_mutex);
4954 	pending = pending_checkpoint_list;
4955 	if (!pending)
4956 	{
4957 		mysql_mutex_unlock(&pending_checkpoint_mutex);
4958 		return;
4959 	}
4960 
4961 	last_ready = NULL;
4962 	for (entry = pending; entry != NULL; entry = entry -> next)
4963 	{
4964 		/* Notify checkpoints up until the first entry that has not
4965 		been fully flushed to the redo log. Since we do not maintain
4966 		the list ordered, in principle there could be more entries
4967 		later than were also flushed. But there is no harm in
4968 		delaying notifications for those a bit. And in practise, the
4969 		list is unlikely to have more than one element anyway, as we
4970 		flush the redo log at least once every second. */
4971 		if (entry->lsn > flush_lsn)
4972 			break;
4973 		last_ready = entry;
4974 	}
4975 
4976 	if (last_ready)
4977 	{
4978 		/* We found some pending checkpoints that are now flushed to
4979 		disk. So remove them from the list. */
4980 		pending_checkpoint_list = entry;
4981 		if (!entry)
4982 			pending_checkpoint_list_end = NULL;
4983 	}
4984 
4985 	mysql_mutex_unlock(&pending_checkpoint_mutex);
4986 
4987 	if (!last_ready)
4988 		return;
4989 
4990 	/* Now that we have released the lock, notify upper layer about all
4991 	commit checkpoints that have now completed. */
4992 	for (;;) {
4993 		entry = pending;
4994 		pending = pending->next;
4995 
4996 		commit_checkpoint_notify_ha(entry->hton, entry->cookie);
4997 
4998 		my_free(entry);
4999 		if (entry == last_ready)
5000 			break;
5001 	}
5002 }
5003 
5004 /*****************************************************************//**
5005 Rolls back a transaction to a savepoint.
5006 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
5007 given name */
5008 static
5009 int
innobase_rollback_to_savepoint(handlerton * hton,THD * thd,void * savepoint)5010 innobase_rollback_to_savepoint(
5011 /*===========================*/
5012 	handlerton*	hton,		/*!< in: InnoDB handlerton */
5013 	THD*		thd,		/*!< in: handle to the MySQL thread
5014 					of the user whose transaction should
5015 					be rolled back to savepoint */
5016 	void*		savepoint)	/*!< in: savepoint data */
5017 {
5018 
5019 	DBUG_ENTER("innobase_rollback_to_savepoint");
5020 	DBUG_ASSERT(hton == innodb_hton_ptr);
5021 
5022 	trx_t*	trx = check_trx_exists(thd);
5023 
5024 	innobase_srv_conc_force_exit_innodb(trx);
5025 
5026 	/* TODO: use provided savepoint data area to store savepoint data */
5027 
5028 	char	name[64];
5029 
5030 	longlong2str(longlong(savepoint), name, 36);
5031 
5032 	int64_t	mysql_binlog_cache_pos;
5033 
5034 	dberr_t	error = trx_rollback_to_savepoint_for_mysql(
5035 		trx, name, &mysql_binlog_cache_pos);
5036 
5037 	if (error == DB_SUCCESS && trx->fts_trx != NULL) {
5038 		fts_savepoint_rollback(trx, name);
5039 	}
5040 
5041 	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
5042 }
5043 
5044 /*****************************************************************//**
5045 Check whether innodb state allows to safely release MDL locks after
5046 rollback to savepoint.
5047 When binlog is on, MDL locks acquired after savepoint unit are not
5048 released if there are any locks held in InnoDB.
5049 @return true if it is safe, false if its not safe. */
5050 static
5051 bool
innobase_rollback_to_savepoint_can_release_mdl(handlerton * hton,THD * thd)5052 innobase_rollback_to_savepoint_can_release_mdl(
5053 /*===========================================*/
5054 	handlerton*	hton,		/*!< in: InnoDB handlerton */
5055 	THD*		thd)		/*!< in: handle to the MySQL thread
5056 					of the user whose transaction should
5057 					be rolled back to savepoint */
5058 {
5059 	DBUG_ENTER("innobase_rollback_to_savepoint_can_release_mdl");
5060 	DBUG_ASSERT(hton == innodb_hton_ptr);
5061 
5062 	trx_t*	trx = check_trx_exists(thd);
5063 
5064 	/* If transaction has not acquired any locks then it is safe
5065 	to release MDL after rollback to savepoint */
5066 	if (UT_LIST_GET_LEN(trx->lock.trx_locks) == 0) {
5067 
5068 		DBUG_RETURN(true);
5069 	}
5070 
5071 	DBUG_RETURN(false);
5072 }
5073 
5074 /*****************************************************************//**
5075 Release transaction savepoint name.
5076 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
5077 given name */
5078 static
5079 int
innobase_release_savepoint(handlerton * hton,THD * thd,void * savepoint)5080 innobase_release_savepoint(
5081 /*=======================*/
5082 	handlerton*	hton,		/*!< in: handlerton for InnoDB */
5083 	THD*		thd,		/*!< in: handle to the MySQL thread
5084 					of the user whose transaction's
5085 					savepoint should be released */
5086 	void*		savepoint)	/*!< in: savepoint data */
5087 {
5088 	dberr_t		error;
5089 	trx_t*		trx;
5090 	char		name[64];
5091 
5092 	DBUG_ENTER("innobase_release_savepoint");
5093 	DBUG_ASSERT(hton == innodb_hton_ptr);
5094 
5095 	trx = check_trx_exists(thd);
5096 
5097 	/* TODO: use provided savepoint data area to store savepoint data */
5098 
5099 	longlong2str(longlong(savepoint), name, 36);
5100 
5101 	error = trx_release_savepoint_for_mysql(trx, name);
5102 
5103 	if (error == DB_SUCCESS && trx->fts_trx != NULL) {
5104 		fts_savepoint_release(trx, name);
5105 	}
5106 
5107 	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
5108 }
5109 
5110 /*****************************************************************//**
5111 Sets a transaction savepoint.
5112 @return always 0, that is, always succeeds */
5113 static
5114 int
innobase_savepoint(handlerton * hton,THD * thd,void * savepoint)5115 innobase_savepoint(
5116 /*===============*/
5117 	handlerton*	hton,	/*!< in: handle to the InnoDB handlerton */
5118 	THD*		thd,	/*!< in: handle to the MySQL thread */
5119 	void*		savepoint)/*!< in: savepoint data */
5120 {
5121 	DBUG_ENTER("innobase_savepoint");
5122 	DBUG_ASSERT(hton == innodb_hton_ptr);
5123 
5124 	/* In the autocommit mode there is no sense to set a savepoint
5125 	(unless we are in sub-statement), so SQL layer ensures that
5126 	this method is never called in such situation.  */
5127 
5128 	trx_t*	trx = check_trx_exists(thd);
5129 
5130 	innobase_srv_conc_force_exit_innodb(trx);
5131 
5132 	/* Cannot happen outside of transaction */
5133 	DBUG_ASSERT(trx_is_registered_for_2pc(trx));
5134 
5135 	/* TODO: use provided savepoint data area to store savepoint data */
5136 	char	name[64];
5137 
5138 	longlong2str(longlong(savepoint), name, 36);
5139 
5140 	dberr_t	error = trx_savepoint_for_mysql(trx, name, 0);
5141 
5142 	if (error == DB_SUCCESS && trx->fts_trx != NULL) {
5143 		fts_savepoint_take(trx->fts_trx, name);
5144 	}
5145 
5146 	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
5147 }
5148 
5149 /*****************************************************************//**
5150 Frees a possible InnoDB trx object associated with the current THD.
5151 @return 0 or error number */
5152 static
5153 int
innobase_close_connection(handlerton * hton,THD * thd)5154 innobase_close_connection(
5155 /*======================*/
5156 	handlerton*	hton,	/*!< in: innobase handlerton */
5157 	THD*		thd)	/*!< in: handle to the MySQL thread of the user
5158 				whose resources should be free'd */
5159 {
5160 
5161 	DBUG_ENTER("innobase_close_connection");
5162 	DBUG_ASSERT(hton == innodb_hton_ptr);
5163 
5164 	trx_t*	trx = thd_to_trx(thd);
5165 
5166 	/* During server initialization MySQL layer will try to open
5167 	some of the master-slave tables those residing in InnoDB.
5168 	After MySQL layer is done with needed checks these tables
5169 	are closed followed by invocation of close_connection on the
5170 	associated thd.
5171 
5172 	close_connection rolls back the trx and then frees it.
5173 	Once trx is freed thd should avoid maintaining reference to
5174 	it else it can be classified as stale reference.
5175 
5176 	Re-invocation of innodb_close_connection on same thd should
5177 	get trx as NULL. */
5178 
5179 	if (trx) {
5180 
5181 		thd_set_ha_data(thd, hton, NULL);
5182 		if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
5183 
5184 			sql_print_error("Transaction not registered for MariaDB 2PC, "
5185 				"but transaction is active");
5186 		}
5187 
5188 		/* Disconnect causes rollback in the following cases:
5189 		- trx is not started, or
5190 		- trx is in *not* in PREPARED state, or
5191 		- trx has not updated any persistent data.
5192 		TODO/FIXME: it does not make sense to initiate rollback
5193 		in the 1st and 3rd case. */
5194 		if (trx_is_started(trx)) {
5195 			if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
5196 				if (trx->has_logged_persistent()) {
5197 					trx_disconnect_prepared(trx);
5198 				} else {
5199 					trx_deregister_from_2pc(trx);
5200 					goto rollback_and_free;
5201 				}
5202 			} else {
5203 			sql_print_warning(
5204 				"MariaDB is closing a connection that has an active "
5205 				"InnoDB transaction.  " TRX_ID_FMT " row modifications "
5206 				"will roll back.",
5207 					trx->undo_no);
5208 				goto rollback_and_free;
5209 			}
5210 		} else {
5211 rollback_and_free:
5212 			innobase_rollback_trx(trx);
5213 			trx->free();
5214 		}
5215 	}
5216 
5217 	DBUG_RETURN(0);
5218 }
5219 
5220 UNIV_INTERN void lock_cancel_waiting_and_release(lock_t* lock);
5221 
5222 /** Cancel any pending lock request associated with the current THD.
5223 @sa THD::awake() @sa ha_kill_query() */
innobase_kill_query(handlerton *,THD * thd,enum thd_kill_levels)5224 static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels)
5225 {
5226   DBUG_ENTER("innobase_kill_query");
5227 #ifdef WITH_WSREP
5228   if (wsrep_thd_get_conflict_state(thd) != NO_CONFLICT)
5229   {
5230     /* if victim has been signaled by BF thread and/or aborting
5231     is already progressing, following query aborting is not necessary
5232     any more. */
5233     WSREP_DEBUG("Victim thread %ld bail out conflict_state %s query %s",
5234 		thd_get_thread_id(thd),
5235 		wsrep_thd_conflict_state_str(thd), wsrep_thd_query(thd));
5236     DBUG_VOID_RETURN;
5237   }
5238 #endif /* WITH_WSREP */
5239 
5240   if (trx_t* trx= thd_to_trx(thd))
5241   {
5242     ut_ad(trx->mysql_thd == thd);
5243     lock_mutex_enter();
5244     if (lock_t *lock= trx->lock.wait_lock)
5245     {
5246       trx_mutex_enter(trx);
5247       lock_cancel_waiting_and_release(lock);
5248       trx_mutex_exit(trx);
5249     }
5250     lock_mutex_exit();
5251   }
5252 
5253   DBUG_VOID_RETURN;
5254 }
5255 
5256 
5257 /*************************************************************************//**
5258 ** InnoDB database tables
5259 *****************************************************************************/
5260 
5261 /** Get the record format from the data dictionary.
5262 @return one of ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT,
5263 ROW_TYPE_COMPRESSED, ROW_TYPE_DYNAMIC */
5264 
5265 enum row_type
get_row_type() const5266 ha_innobase::get_row_type() const
5267 {
5268 	if (m_prebuilt && m_prebuilt->table) {
5269 		const ulint	flags = m_prebuilt->table->flags;
5270 
5271 		switch (dict_tf_get_rec_format(flags)) {
5272 		case REC_FORMAT_REDUNDANT:
5273 			return(ROW_TYPE_REDUNDANT);
5274 		case REC_FORMAT_COMPACT:
5275 			return(ROW_TYPE_COMPACT);
5276 		case REC_FORMAT_COMPRESSED:
5277 			return(ROW_TYPE_COMPRESSED);
5278 		case REC_FORMAT_DYNAMIC:
5279 			return(ROW_TYPE_DYNAMIC);
5280 		}
5281 	}
5282 	ut_ad(0);
5283 	return(ROW_TYPE_NOT_USED);
5284 }
5285 
5286 /****************************************************************//**
5287 Get the table flags to use for the statement.
5288 @return table flags */
5289 
5290 handler::Table_flags
table_flags() const5291 ha_innobase::table_flags() const
5292 /*============================*/
5293 {
5294 	THD*			thd = ha_thd();
5295 	handler::Table_flags	flags = m_int_table_flags;
5296 
5297 	/* Need to use tx_isolation here since table flags is (also)
5298 	called before prebuilt is inited. */
5299 
5300 	if (thd_tx_isolation(thd) <= ISO_READ_COMMITTED) {
5301 		return(flags);
5302 	}
5303 
5304 	return(flags | HA_BINLOG_STMT_CAPABLE);
5305 }
5306 
5307 /****************************************************************//**
5308 Returns the table type (storage engine name).
5309 @return table type */
5310 
5311 const char*
table_type() const5312 ha_innobase::table_type() const
5313 /*===========================*/
5314 {
5315 	return(innobase_hton_name);
5316 }
5317 
5318 /****************************************************************//**
5319 Returns the index type.
5320 @return index type */
5321 
5322 const char*
index_type(uint keynr)5323 ha_innobase::index_type(
5324 /*====================*/
5325 	uint	keynr)		/*!< : index number */
5326 {
5327 	dict_index_t*	index = innobase_get_index(keynr);
5328 
5329 	if (!index) {
5330 		return "Corrupted";
5331 	}
5332 
5333 	if (index->type & DICT_FTS) {
5334 		return("FULLTEXT");
5335 	}
5336 
5337 	if (dict_index_is_spatial(index)) {
5338 		return("SPATIAL");
5339 	}
5340 
5341 	return("BTREE");
5342 }
5343 
5344 /****************************************************************//**
5345 Returns the table file name extension.
5346 @return file extension string */
5347 
5348 const char**
bas_ext() const5349 ha_innobase::bas_ext() const
5350 /*========================*/
5351 {
5352 	return(ha_innobase_exts);
5353 }
5354 
5355 /****************************************************************//**
5356 Returns the operations supported for indexes.
5357 @return flags of supported operations */
5358 
5359 ulong
index_flags(uint key,uint,bool) const5360 ha_innobase::index_flags(
5361 /*=====================*/
5362 	uint	key,
5363 	uint,
5364 	bool) const
5365 {
5366 	if (table_share->key_info[key].algorithm == HA_KEY_ALG_FULLTEXT) {
5367 		return(0);
5368 	}
5369 
5370 	ulong extra_flag= 0;
5371 
5372 	if (table && key == table->s->primary_key) {
5373 		extra_flag= HA_CLUSTERED_INDEX;
5374 	}
5375 
5376 	ulong flags = HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
5377 		      | HA_READ_RANGE | HA_KEYREAD_ONLY
5378 		      | extra_flag
5379 		      | HA_DO_INDEX_COND_PUSHDOWN;
5380 
5381 	/* For spatial index, we don't support descending scan
5382 	and ICP so far. */
5383 	if (table_share->key_info[key].flags & HA_SPATIAL) {
5384 		flags = HA_READ_NEXT | HA_READ_ORDER| HA_READ_RANGE
5385 			| HA_KEYREAD_ONLY | HA_KEY_SCAN_NOT_ROR;
5386 	}
5387 
5388 	return(flags);
5389 }
5390 
5391 /****************************************************************//**
5392 Returns the maximum number of keys.
5393 @return MAX_KEY */
5394 
5395 uint
max_supported_keys() const5396 ha_innobase::max_supported_keys() const
5397 /*===================================*/
5398 {
5399 	return(MAX_KEY);
5400 }
5401 
5402 /****************************************************************//**
5403 Returns the maximum key length.
5404 @return maximum supported key length, in bytes */
5405 
5406 uint
max_supported_key_length() const5407 ha_innobase::max_supported_key_length() const
5408 /*=========================================*/
5409 {
5410 	/* An InnoDB page must store >= 2 keys; a secondary key record
5411 	must also contain the primary key value.  Therefore, if both
5412 	the primary key and the secondary key are at this maximum length,
5413 	it must be less than 1/4th of the free space on a page including
5414 	record overhead.
5415 
5416 	MySQL imposes its own limit to this number; MAX_KEY_LENGTH = 3072.
5417 
5418 	For page sizes = 16k, InnoDB historically reported 3500 bytes here,
5419 	But the MySQL limit of 3072 was always used through the handler
5420 	interface.
5421 
5422 	Note: Handle 16k and 32k pages the same here since the limits
5423 	are higher than imposed by MySQL. */
5424 
5425 	switch (srv_page_size) {
5426 	case 4096:
5427 		/* Hack: allow mysql.innodb_index_stats to be created. */
5428 		/* FIXME: rewrite this API, and in sql_table.cc consider
5429 		that in index-organized tables (such as InnoDB), secondary
5430 		index records will be padded with the PRIMARY KEY, instead
5431 		of some short ROWID or record heap address. */
5432 		return(1173);
5433 	case 8192:
5434 		return(1536);
5435 	default:
5436 		return(3500);
5437 	}
5438 }
5439 
5440 /****************************************************************//**
5441 Returns the key map of keys that are usable for scanning.
5442 @return key_map_full */
5443 
5444 const key_map*
keys_to_use_for_scanning()5445 ha_innobase::keys_to_use_for_scanning()
5446 /*===================================*/
5447 {
5448 	return(&key_map_full);
5449 }
5450 
5451 /****************************************************************//**
5452 Ensures that if there's a concurrent inplace ADD INDEX, being-indexed virtual
5453 columns are computed. They are not marked as indexed in the old table, so the
5454 server won't add them to the vcol_set automatically */
5455 void
column_bitmaps_signal()5456 ha_innobase::column_bitmaps_signal()
5457 /*================================*/
5458 {
5459 	if (!table->vfield || table->current_lock != F_WRLCK) {
5460 		return;
5461 	}
5462 
5463 	dict_index_t*	clust_index = dict_table_get_first_index(m_prebuilt->table);
5464 	uint	num_v = 0;
5465 	for (uint j = 0; j < table->s->virtual_fields; j++) {
5466 		if (table->vfield[j]->stored_in_db()) {
5467 			continue;
5468 		}
5469 
5470 		dict_col_t*	col = &m_prebuilt->table->v_cols[num_v].m_col;
5471 		if (col->ord_part ||
5472 		    (dict_index_is_online_ddl(clust_index) &&
5473 		     row_log_col_is_indexed(clust_index, num_v))) {
5474 			table->mark_virtual_col(table->vfield[j]);
5475 		}
5476 		num_v++;
5477 	}
5478 }
5479 
5480 
5481 /****************************************************************//**
5482 Determines if table caching is supported.
5483 @return HA_CACHE_TBL_ASKTRANSACT */
5484 
5485 uint8
table_cache_type()5486 ha_innobase::table_cache_type()
5487 /*===========================*/
5488 {
5489 	return(HA_CACHE_TBL_ASKTRANSACT);
5490 }
5491 
5492 /****************************************************************//**
5493 Determines if the primary key is clustered index.
5494 @return true */
5495 
5496 bool
primary_key_is_clustered()5497 ha_innobase::primary_key_is_clustered()
5498 /*===================================*/
5499 {
5500 	return(true);
5501 }
5502 
5503 /** Normalizes a table name string.
5504 A normalized name consists of the database name catenated to '/'
5505 and table name. For example: test/mytable.
5506 On Windows, normalization puts both the database name and the
5507 table name always to lower case if "set_lower_case" is set to TRUE.
5508 @param[out]	norm_name	Normalized name, null-terminated.
5509 @param[in]	name		Name to normalize.
5510 @param[in]	set_lower_case	True if we also should fold to lower case. */
5511 void
normalize_table_name_c_low(char * norm_name,const char * name,ibool set_lower_case)5512 normalize_table_name_c_low(
5513 /*=======================*/
5514 	char*           norm_name,      /* out: normalized name as a
5515 					null-terminated string */
5516 	const char*     name,           /* in: table name string */
5517 	ibool           set_lower_case) /* in: TRUE if we want to set
5518 					 name to lower case */
5519 {
5520 	char*	name_ptr;
5521 	ulint	name_len;
5522 	char*	db_ptr;
5523 	ulint	db_len;
5524 	char*	ptr;
5525 	ulint	norm_len;
5526 
5527 	/* Scan name from the end */
5528 
5529 	ptr = strend(name) - 1;
5530 
5531 	/* seek to the last path separator */
5532 	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
5533 		ptr--;
5534 	}
5535 
5536 	name_ptr = ptr + 1;
5537 	name_len = strlen(name_ptr);
5538 
5539 	/* skip any number of path separators */
5540 	while (ptr >= name && (*ptr == '\\' || *ptr == '/')) {
5541 		ptr--;
5542 	}
5543 
5544 	DBUG_ASSERT(ptr >= name);
5545 
5546 	/* seek to the last but one path separator or one char before
5547 	the beginning of name */
5548 	db_len = 0;
5549 	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
5550 		ptr--;
5551 		db_len++;
5552 	}
5553 
5554 	db_ptr = ptr + 1;
5555 
5556 	norm_len = db_len + name_len + sizeof "/";
5557 	ut_a(norm_len < FN_REFLEN - 1);
5558 
5559 	memcpy(norm_name, db_ptr, db_len);
5560 
5561 	norm_name[db_len] = '/';
5562 
5563 	/* Copy the name and null-byte. */
5564 	memcpy(norm_name + db_len + 1, name_ptr, name_len + 1);
5565 
5566 	if (set_lower_case) {
5567 		innobase_casedn_str(norm_name);
5568 	}
5569 }
5570 
create_table_info_t(THD * thd,const TABLE * form,HA_CREATE_INFO * create_info,char * table_name,char * remote_path,bool file_per_table,trx_t * trx)5571 create_table_info_t::create_table_info_t(
5572 	THD*		thd,
5573 	const TABLE*	form,
5574 	HA_CREATE_INFO*	create_info,
5575 	char*		table_name,
5576 	char*		remote_path,
5577 	bool		file_per_table,
5578 	trx_t*		trx)
5579 	: m_thd(thd),
5580 	  m_trx(trx),
5581 	  m_form(form),
5582 	  m_default_row_format(innodb_default_row_format),
5583 	  m_create_info(create_info),
5584 	  m_table_name(table_name), m_table(NULL),
5585 	  m_drop_before_rollback(false),
5586 	  m_remote_path(remote_path),
5587 	  m_innodb_file_per_table(file_per_table)
5588 {
5589 }
5590 
5591 /** Normalizes a table name string.
5592 A normalized name consists of the database name catenated to '/'
5593 and table name. For example: test/mytable.
5594 On Windows, normalization puts both the database name and the
5595 table name always to lower case if "set_lower_case" is set to TRUE.
5596 @param[out]	norm_name	Normalized name, null-terminated.
5597 @param[in]	name		Name to normalize.
5598 @param[in]	set_lower_case	True if we also should fold to lower case. */
5599 void
normalize_table_name_low(char * norm_name,const char * name,ibool set_lower_case)5600 create_table_info_t::normalize_table_name_low(
5601 	char*		norm_name,
5602 	const char*	name,
5603 	ibool		set_lower_case)
5604 {
5605 	normalize_table_name_c_low(norm_name, name, set_lower_case);
5606 }
5607 
5608 #if !defined(DBUG_OFF)
5609 /*********************************************************************
5610 Test normalize_table_name_low(). */
5611 static
5612 void
test_normalize_table_name_low()5613 test_normalize_table_name_low()
5614 /*===========================*/
5615 {
5616 	char		norm_name[FN_REFLEN];
5617 	const char*	test_data[][2] = {
5618 		/* input, expected result */
5619 		{"./mysqltest/t1", "mysqltest/t1"},
5620 		{"./test/#sql-842b_2", "test/#sql-842b_2"},
5621 		{"./test/#sql-85a3_10", "test/#sql-85a3_10"},
5622 		{"./test/#sql2-842b-2", "test/#sql2-842b-2"},
5623 		{"./test/bug29807", "test/bug29807"},
5624 		{"./test/foo", "test/foo"},
5625 		{"./test/innodb_bug52663", "test/innodb_bug52663"},
5626 		{"./test/t", "test/t"},
5627 		{"./test/t1", "test/t1"},
5628 		{"./test/t10", "test/t10"},
5629 		{"/a/b/db/table", "db/table"},
5630 		{"/a/b/db///////table", "db/table"},
5631 		{"/a/b////db///////table", "db/table"},
5632 		{"/var/tmp/mysqld.1/#sql842b_2_10", "mysqld.1/#sql842b_2_10"},
5633 		{"db/table", "db/table"},
5634 		{"ddd/t", "ddd/t"},
5635 		{"d/ttt", "d/ttt"},
5636 		{"d/t", "d/t"},
5637 		{".\\mysqltest\\t1", "mysqltest/t1"},
5638 		{".\\test\\#sql-842b_2", "test/#sql-842b_2"},
5639 		{".\\test\\#sql-85a3_10", "test/#sql-85a3_10"},
5640 		{".\\test\\#sql2-842b-2", "test/#sql2-842b-2"},
5641 		{".\\test\\bug29807", "test/bug29807"},
5642 		{".\\test\\foo", "test/foo"},
5643 		{".\\test\\innodb_bug52663", "test/innodb_bug52663"},
5644 		{".\\test\\t", "test/t"},
5645 		{".\\test\\t1", "test/t1"},
5646 		{".\\test\\t10", "test/t10"},
5647 		{"C:\\a\\b\\db\\table", "db/table"},
5648 		{"C:\\a\\b\\db\\\\\\\\\\\\\\table", "db/table"},
5649 		{"C:\\a\\b\\\\\\\\db\\\\\\\\\\\\\\table", "db/table"},
5650 		{"C:\\var\\tmp\\mysqld.1\\#sql842b_2_10", "mysqld.1/#sql842b_2_10"},
5651 		{"db\\table", "db/table"},
5652 		{"ddd\\t", "ddd/t"},
5653 		{"d\\ttt", "d/ttt"},
5654 		{"d\\t", "d/t"},
5655 	};
5656 
5657 	for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
5658 		printf("test_normalize_table_name_low():"
5659 		       " testing \"%s\", expected \"%s\"... ",
5660 		       test_data[i][0], test_data[i][1]);
5661 
5662 		create_table_info_t::normalize_table_name_low(
5663 			norm_name, test_data[i][0], FALSE);
5664 
5665 		if (strcmp(norm_name, test_data[i][1]) == 0) {
5666 			printf("ok\n");
5667 		} else {
5668 			printf("got \"%s\"\n", norm_name);
5669 			ut_error;
5670 		}
5671 	}
5672 }
5673 
5674 /*********************************************************************
5675 Test ut_format_name(). */
5676 static
5677 void
test_ut_format_name()5678 test_ut_format_name()
5679 /*=================*/
5680 {
5681 	char		buf[NAME_LEN * 3];
5682 
5683 	struct {
5684 		const char*	name;
5685 		ulint		buf_size;
5686 		const char*	expected;
5687 	} test_data[] = {
5688 		{"test/t1",	sizeof(buf),	"`test`.`t1`"},
5689 		{"test/t1",	12,		"`test`.`t1`"},
5690 		{"test/t1",	11,		"`test`.`t1"},
5691 		{"test/t1",	10,		"`test`.`t"},
5692 		{"test/t1",	9,		"`test`.`"},
5693 		{"test/t1",	8,		"`test`."},
5694 		{"test/t1",	7,		"`test`"},
5695 		{"test/t1",	6,		"`test"},
5696 		{"test/t1",	5,		"`tes"},
5697 		{"test/t1",	4,		"`te"},
5698 		{"test/t1",	3,		"`t"},
5699 		{"test/t1",	2,		"`"},
5700 		{"test/t1",	1,		""},
5701 		{"test/t1",	0,		"BUF_NOT_CHANGED"},
5702 		{"table",	sizeof(buf),	"`table`"},
5703 		{"ta'le",	sizeof(buf),	"`ta'le`"},
5704 		{"ta\"le",	sizeof(buf),	"`ta\"le`"},
5705 		{"ta`le",	sizeof(buf),	"`ta``le`"},
5706 	};
5707 
5708 	for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
5709 
5710 		memcpy(buf, "BUF_NOT_CHANGED", strlen("BUF_NOT_CHANGED") + 1);
5711 
5712 		char*	ret;
5713 
5714 		ret = ut_format_name(test_data[i].name,
5715 				     buf,
5716 				     test_data[i].buf_size);
5717 
5718 		ut_a(ret == buf);
5719 
5720 		if (strcmp(buf, test_data[i].expected) == 0) {
5721 			ib::info() << "ut_format_name(" << test_data[i].name
5722 				<< ", buf, " << test_data[i].buf_size << "),"
5723 				" expected " << test_data[i].expected
5724 				<< ", OK";
5725 		} else {
5726 			ib::error() << "ut_format_name(" << test_data[i].name
5727 				<< ", buf, " << test_data[i].buf_size << "),"
5728 				" expected " << test_data[i].expected
5729 				<< ", ERROR: got " << buf;
5730 			ut_error;
5731 		}
5732 	}
5733 }
5734 #endif /* !DBUG_OFF */
5735 
5736 /** Match index columns between MySQL and InnoDB.
5737 This function checks whether the index column information
5738 is consistent between KEY info from mysql and that from innodb index.
5739 @param[in]	key_info	Index info from mysql
5740 @param[in]	index_info	Index info from InnoDB
5741 @return true if all column types match. */
5742 static
5743 bool
innobase_match_index_columns(const KEY * key_info,const dict_index_t * index_info)5744 innobase_match_index_columns(
5745 	const KEY*		key_info,
5746 	const dict_index_t*	index_info)
5747 {
5748 	const KEY_PART_INFO*	key_part;
5749 	const KEY_PART_INFO*	key_end;
5750 	const dict_field_t*	innodb_idx_fld;
5751 	const dict_field_t*	innodb_idx_fld_end;
5752 
5753 	DBUG_ENTER("innobase_match_index_columns");
5754 
5755 	/* Check whether user defined index column count matches */
5756 	if (key_info->user_defined_key_parts !=
5757 		index_info->n_user_defined_cols) {
5758 		DBUG_RETURN(FALSE);
5759 	}
5760 
5761 	key_part = key_info->key_part;
5762 	key_end = key_part + key_info->user_defined_key_parts;
5763 	innodb_idx_fld = index_info->fields;
5764 	innodb_idx_fld_end = index_info->fields + index_info->n_fields;
5765 
5766 	/* Check each index column's datatype. We do not check
5767 	column name because there exists case that index
5768 	column name got modified in mysql but such change does not
5769 	propagate to InnoDB.
5770 	One hidden assumption here is that the index column sequences
5771 	are matched up between those in mysql and InnoDB. */
5772 	for (; key_part != key_end; ++key_part) {
5773 		ulint	col_type;
5774 		ibool	is_unsigned;
5775 		ulint	mtype = innodb_idx_fld->col->mtype;
5776 
5777 		/* Need to translate to InnoDB column type before
5778 		comparison. */
5779 		col_type = get_innobase_type_from_mysql_type(
5780 			&is_unsigned, key_part->field);
5781 
5782 		/* Ignore InnoDB specific system columns. */
5783 		while (mtype == DATA_SYS) {
5784 			innodb_idx_fld++;
5785 
5786 			if (innodb_idx_fld >= innodb_idx_fld_end) {
5787 				DBUG_RETURN(FALSE);
5788 			}
5789 		}
5790 
5791 		/* MariaDB-5.5 compatibility */
5792 		if ((key_part->field->real_type() == MYSQL_TYPE_ENUM ||
5793 		     key_part->field->real_type() == MYSQL_TYPE_SET) &&
5794 		    mtype == DATA_FIXBINARY) {
5795 			col_type= DATA_FIXBINARY;
5796 		}
5797 
5798 		if (col_type != mtype) {
5799 			/* If the col_type we get from mysql type is a geometry
5800 			data type, we should check if mtype is a legacy type
5801 			from 5.6, either upgraded to DATA_GEOMETRY or not.
5802 			This is indeed not an accurate check, but should be
5803 			safe, since DATA_BLOB would be upgraded once we create
5804 			spatial index on it and we intend to use DATA_GEOMETRY
5805 			for legacy GIS data types which are of var-length. */
5806 			switch (col_type) {
5807 			case DATA_GEOMETRY:
5808 				if (mtype == DATA_BLOB) {
5809 					break;
5810 				}
5811 				/* Fall through */
5812 			default:
5813 				/* Column type mismatches */
5814 				DBUG_RETURN(false);
5815 			}
5816 		}
5817 
5818 		innodb_idx_fld++;
5819 	}
5820 
5821 	DBUG_RETURN(TRUE);
5822 }
5823 
5824 /** Build a template for a base column for a virtual column
5825 @param[in]	table		MySQL TABLE
5826 @param[in]	clust_index	InnoDB clustered index
5827 @param[in]	field		field in MySQL table
5828 @param[in]	col		InnoDB column
5829 @param[in,out]	templ		template to fill
5830 @param[in]	col_no		field index for virtual col
5831 */
5832 static
5833 void
innobase_vcol_build_templ(const TABLE * table,dict_index_t * clust_index,Field * field,const dict_col_t * col,mysql_row_templ_t * templ,ulint col_no)5834 innobase_vcol_build_templ(
5835 	const TABLE*		table,
5836 	dict_index_t*		clust_index,
5837 	Field*			field,
5838 	const dict_col_t*	col,
5839 	mysql_row_templ_t*	templ,
5840 	ulint			col_no)
5841 {
5842 	templ->col_no = col_no;
5843 	templ->is_virtual = col->is_virtual();
5844 
5845 	if (templ->is_virtual) {
5846 		templ->clust_rec_field_no = ULINT_UNDEFINED;
5847 		templ->rec_field_no = col->ind;
5848 	} else {
5849 		templ->clust_rec_field_no = dict_col_get_clust_pos(
5850 						col, clust_index);
5851 		ut_a(templ->clust_rec_field_no != ULINT_UNDEFINED);
5852 
5853 		templ->rec_field_no = templ->clust_rec_field_no;
5854 	}
5855 
5856 	if (field->real_maybe_null()) {
5857                 templ->mysql_null_byte_offset =
5858                         field->null_offset();
5859 
5860                 templ->mysql_null_bit_mask = (ulint) field->null_bit;
5861         } else {
5862                 templ->mysql_null_bit_mask = 0;
5863         }
5864 
5865         templ->mysql_col_offset = static_cast<ulint>(
5866 					get_field_offset(table, field));
5867 	templ->mysql_col_len = static_cast<ulint>(field->pack_length());
5868         templ->type = col->mtype;
5869         templ->mysql_type = static_cast<ulint>(field->type());
5870 
5871 	if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
5872 		templ->mysql_length_bytes = static_cast<ulint>(
5873 			((Field_varstring*) field)->length_bytes);
5874 	}
5875 
5876         templ->charset = dtype_get_charset_coll(col->prtype);
5877         templ->mbminlen = dict_col_get_mbminlen(col);
5878         templ->mbmaxlen = dict_col_get_mbmaxlen(col);
5879         templ->is_unsigned = col->prtype & DATA_UNSIGNED;
5880 }
5881 
5882 /** Build template for the virtual columns and their base columns. This
5883 is done when the table first opened.
5884 @param[in]	table		MySQL TABLE
5885 @param[in]	ib_table	InnoDB dict_table_t
5886 @param[in,out]	s_templ		InnoDB template structure
5887 @param[in]	add_v		new virtual columns added along with
5888 				add index call
5889 @param[in]	locked		true if dict_sys mutex is held */
5890 void
innobase_build_v_templ(const TABLE * table,const dict_table_t * ib_table,dict_vcol_templ_t * s_templ,const dict_add_v_col_t * add_v,bool locked)5891 innobase_build_v_templ(
5892 	const TABLE*		table,
5893 	const dict_table_t*	ib_table,
5894 	dict_vcol_templ_t*	s_templ,
5895 	const dict_add_v_col_t*	add_v,
5896 	bool			locked)
5897 {
5898 	ulint	ncol = unsigned(ib_table->n_cols) - DATA_N_SYS_COLS;
5899 	ulint	n_v_col = ib_table->n_v_cols;
5900 	bool	marker[REC_MAX_N_FIELDS];
5901 
5902 	DBUG_ENTER("innobase_build_v_templ");
5903 	ut_ad(ncol < REC_MAX_N_FIELDS);
5904 
5905 	if (add_v != NULL) {
5906 		n_v_col += add_v->n_v_col;
5907 	}
5908 
5909 	ut_ad(n_v_col > 0);
5910 
5911 	if (!locked) {
5912 		mutex_enter(&dict_sys->mutex);
5913 	}
5914 
5915 	if (s_templ->vtempl) {
5916 		if (!locked) {
5917 			mutex_exit(&dict_sys->mutex);
5918 		}
5919 		DBUG_VOID_RETURN;
5920 	}
5921 
5922 	memset(marker, 0, sizeof(bool) * ncol);
5923 
5924 	s_templ->vtempl = static_cast<mysql_row_templ_t**>(
5925 		ut_zalloc_nokey((ncol + n_v_col)
5926 				* sizeof *s_templ->vtempl));
5927 	s_templ->n_col = ncol;
5928 	s_templ->n_v_col = n_v_col;
5929 	s_templ->rec_len = table->s->reclength;
5930 	s_templ->default_rec = UT_NEW_ARRAY_NOKEY(uchar, s_templ->rec_len);
5931 	memcpy(s_templ->default_rec, table->s->default_values, s_templ->rec_len);
5932 
5933 	/* Mark those columns could be base columns */
5934 	for (ulint i = 0; i < ib_table->n_v_cols; i++) {
5935 		const dict_v_col_t*	vcol = dict_table_get_nth_v_col(
5936 							ib_table, i);
5937 
5938 		for (ulint j = 0; j < vcol->num_base; j++) {
5939 			ulint	col_no = vcol->base_col[j]->ind;
5940 			marker[col_no] = true;
5941 		}
5942 	}
5943 
5944 	if (add_v) {
5945 		for (ulint i = 0; i < add_v->n_v_col; i++) {
5946 			const dict_v_col_t*	vcol = &add_v->v_col[i];
5947 
5948 			for (ulint j = 0; j < vcol->num_base; j++) {
5949 				ulint	col_no = vcol->base_col[j]->ind;
5950 				marker[col_no] = true;
5951 			}
5952 		}
5953 	}
5954 
5955 	ulint	j = 0;
5956 	ulint	z = 0;
5957 
5958 	dict_index_t*	clust_index = dict_table_get_first_index(ib_table);
5959 
5960 	for (ulint i = 0; i < table->s->fields; i++) {
5961 		Field*  field = table->field[i];
5962 
5963 		/* Build template for virtual columns */
5964 		if (!field->stored_in_db()) {
5965 #ifdef UNIV_DEBUG
5966 			const char*	name;
5967 
5968 			if (z >= ib_table->n_v_def) {
5969 				name = add_v->v_col_name[z - ib_table->n_v_def];
5970 			} else {
5971 				name = dict_table_get_v_col_name(ib_table, z);
5972 			}
5973 
5974 			ut_ad(!my_strcasecmp(system_charset_info, name,
5975 					     field->field_name.str));
5976 #endif
5977 			const dict_v_col_t*	vcol;
5978 
5979 			if (z >= ib_table->n_v_def) {
5980 				vcol = &add_v->v_col[z - ib_table->n_v_def];
5981 			} else {
5982 				vcol = dict_table_get_nth_v_col(ib_table, z);
5983 			}
5984 
5985 			s_templ->vtempl[z + s_templ->n_col]
5986 				= static_cast<mysql_row_templ_t*>(
5987 					ut_malloc_nokey(
5988 					sizeof *s_templ->vtempl[j]));
5989 
5990 			innobase_vcol_build_templ(
5991 				table, clust_index, field,
5992 				&vcol->m_col,
5993 				s_templ->vtempl[z + s_templ->n_col],
5994 				z);
5995 			z++;
5996 			continue;
5997                 }
5998 
5999 		ut_ad(j < ncol);
6000 
6001 		/* Build template for base columns */
6002 		if (marker[j]) {
6003 			dict_col_t*   col = dict_table_get_nth_col(
6004 						ib_table, j);
6005 
6006 			ut_ad(!my_strcasecmp(system_charset_info,
6007 					     dict_table_get_col_name(
6008 						     ib_table, j),
6009 					     field->field_name.str));
6010 
6011 			s_templ->vtempl[j] = static_cast<
6012 				mysql_row_templ_t*>(
6013 					ut_malloc_nokey(
6014 					sizeof *s_templ->vtempl[j]));
6015 
6016 			innobase_vcol_build_templ(
6017 				table, clust_index, field, col,
6018 				s_templ->vtempl[j], j);
6019 		}
6020 
6021 		j++;
6022 	}
6023 
6024 	if (!locked) {
6025 		mutex_exit(&dict_sys->mutex);
6026 	}
6027 
6028 	s_templ->db_name = table->s->db.str;
6029 	s_templ->tb_name = table->s->table_name.str;
6030 	DBUG_VOID_RETURN;
6031 }
6032 
6033 /** Check consistency between .frm indexes and InnoDB indexes.
6034 @param[in]	table	table object formed from .frm
6035 @param[in]	ib_table	InnoDB table definition
6036 @retval	true if not errors were found */
6037 static bool
check_index_consistency(const TABLE * table,const dict_table_t * ib_table)6038 check_index_consistency(const TABLE* table, const dict_table_t* ib_table)
6039 {
6040 	ulint mysql_num_index = table->s->keys;
6041 	ulint ib_num_index = UT_LIST_GET_LEN(ib_table->indexes);
6042 	bool ret = true;
6043 
6044 	/* If there exists inconsistency between MySQL and InnoDB dictionary
6045 	(metadata) information, the number of index defined in MySQL
6046 	could exceed that in InnoDB, return error */
6047 	if (ib_num_index < mysql_num_index) {
6048 		ret = false;
6049 		goto func_exit;
6050 	}
6051 
6052 	/* For each index in the mysql key_info array, fetch its
6053 	corresponding InnoDB index pointer into index_mapping
6054 	array. */
6055 	for (ulint count = 0; count < mysql_num_index; count++) {
6056 		const dict_index_t* index = dict_table_get_index_on_name(
6057 			ib_table, table->key_info[count].name.str);
6058 
6059 		if (index == NULL) {
6060 			sql_print_error("Cannot find index %s in InnoDB"
6061 					" index dictionary.",
6062 					table->key_info[count].name.str);
6063 			ret = false;
6064 			goto func_exit;
6065 		}
6066 
6067 		/* Double check fetched index has the same
6068 		column info as those in mysql key_info. */
6069 		if (!innobase_match_index_columns(&table->key_info[count],
6070 						  index)) {
6071 			sql_print_error("Found index %s whose column info"
6072 					" does not match that of MariaDB.",
6073 					table->key_info[count].name.str);
6074 			ret = false;
6075 			goto func_exit;
6076 		}
6077 	}
6078 
6079 func_exit:
6080 	return ret;
6081 }
6082 
6083 /********************************************************************//**
6084 Get the upper limit of the MySQL integral and floating-point type.
6085 @return maximum allowed value for the field */
6086 UNIV_INTERN
6087 ulonglong
innobase_get_int_col_max_value(const Field * field)6088 innobase_get_int_col_max_value(
6089 /*===========================*/
6090 	const Field*	field)	/*!< in: MySQL field */
6091 {
6092 	ulonglong	max_value = 0;
6093 
6094 	switch (field->key_type()) {
6095 	/* TINY */
6096 	case HA_KEYTYPE_BINARY:
6097 		max_value = 0xFFULL;
6098 		break;
6099 	case HA_KEYTYPE_INT8:
6100 		max_value = 0x7FULL;
6101 		break;
6102 	/* SHORT */
6103 	case HA_KEYTYPE_USHORT_INT:
6104 		max_value = 0xFFFFULL;
6105 		break;
6106 	case HA_KEYTYPE_SHORT_INT:
6107 		max_value = 0x7FFFULL;
6108 		break;
6109 	/* MEDIUM */
6110 	case HA_KEYTYPE_UINT24:
6111 		max_value = 0xFFFFFFULL;
6112 		break;
6113 	case HA_KEYTYPE_INT24:
6114 		max_value = 0x7FFFFFULL;
6115 		break;
6116 	/* LONG */
6117 	case HA_KEYTYPE_ULONG_INT:
6118 		max_value = 0xFFFFFFFFULL;
6119 		break;
6120 	case HA_KEYTYPE_LONG_INT:
6121 		max_value = 0x7FFFFFFFULL;
6122 		break;
6123 	/* BIG */
6124 	case HA_KEYTYPE_ULONGLONG:
6125 		max_value = 0xFFFFFFFFFFFFFFFFULL;
6126 		break;
6127 	case HA_KEYTYPE_LONGLONG:
6128 		max_value = 0x7FFFFFFFFFFFFFFFULL;
6129 		break;
6130 	case HA_KEYTYPE_FLOAT:
6131 		/* We use the maximum as per IEEE754-2008 standard, 2^24 */
6132 		max_value = 0x1000000ULL;
6133 		break;
6134 	case HA_KEYTYPE_DOUBLE:
6135 		/* We use the maximum as per IEEE754-2008 standard, 2^53 */
6136 		max_value = 0x20000000000000ULL;
6137 		break;
6138 	default:
6139 		ut_error;
6140 	}
6141 
6142 	return(max_value);
6143 }
6144 
6145 /** Initialize the AUTO_INCREMENT column metadata.
6146 
6147 Since a partial table definition for a persistent table can already be
6148 present in the InnoDB dict_sys cache before it is accessed from SQL,
6149 we have to initialize the AUTO_INCREMENT counter on the first
6150 ha_innobase::open().
6151 
6152 @param[in,out]	table	persistent table
6153 @param[in]	field	the AUTO_INCREMENT column */
6154 static
6155 void
initialize_auto_increment(dict_table_t * table,const Field * field)6156 initialize_auto_increment(dict_table_t* table, const Field* field)
6157 {
6158 	ut_ad(!table->is_temporary());
6159 
6160 	const unsigned	col_no = innodb_col_no(field);
6161 
6162 	dict_table_autoinc_lock(table);
6163 
6164 	table->persistent_autoinc = 1
6165 		+ dict_table_get_nth_col_pos(table, col_no, NULL);
6166 
6167 	if (table->autoinc) {
6168 		/* Already initialized. Our caller checked
6169 		table->persistent_autoinc without
6170 		dict_table_autoinc_lock(), and there might be multiple
6171 		ha_innobase::open() executing concurrently. */
6172 	} else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
6173 		/* If the recovery level is set so high that writes
6174 		are disabled we force the AUTOINC counter to 0
6175 		value effectively disabling writes to the table.
6176 		Secondly, we avoid reading the table in case the read
6177 		results in failure due to a corrupted table/index.
6178 
6179 		We will not return an error to the client, so that the
6180 		tables can be dumped with minimal hassle.  If an error
6181 		were returned in this case, the first attempt to read
6182 		the table would fail and subsequent SELECTs would succeed. */
6183 	} else if (table->persistent_autoinc) {
6184 		table->autoinc = innobase_next_autoinc(
6185 			btr_read_autoinc_with_fallback(table, col_no),
6186 			1 /* need */,
6187 			1 /* auto_increment_increment */,
6188 			0 /* auto_increment_offset */,
6189 			innobase_get_int_col_max_value(field));
6190 	}
6191 
6192 	dict_table_autoinc_unlock(table);
6193 }
6194 
6195 /** Open an InnoDB table
6196 @param[in]	name	table name
6197 @return	error code
6198 @retval	0	on success */
6199 int
open(const char * name,int,uint)6200 ha_innobase::open(const char* name, int, uint)
6201 {
6202 	char			norm_name[FN_REFLEN];
6203 
6204 	DBUG_ENTER("ha_innobase::open");
6205 
6206 	normalize_table_name(norm_name, name);
6207 
6208 	m_user_thd = NULL;
6209 
6210 	/* Will be allocated if it is needed in ::update_row() */
6211 	m_upd_buf = NULL;
6212 	m_upd_buf_size = 0;
6213 
6214 	char*	is_part = is_partition(norm_name);
6215 	THD*	thd = ha_thd();
6216 	dict_table_t* ib_table = open_dict_table(name, norm_name, is_part,
6217 						 DICT_ERR_IGNORE_FK_NOKEY);
6218 
6219 	DEBUG_SYNC(thd, "ib_open_after_dict_open");
6220 
6221 	if (NULL == ib_table) {
6222 
6223 		if (is_part) {
6224 			sql_print_error("Failed to open table %s.\n",
6225 					norm_name);
6226 		}
6227 no_such_table:
6228 		set_my_errno(ENOENT);
6229 
6230 		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
6231 	}
6232 
6233 	size_t n_fields = omits_virtual_cols(*table_share)
6234 		? table_share->stored_fields : table_share->fields;
6235 	size_t n_cols = dict_table_get_n_user_cols(ib_table)
6236 		+ dict_table_get_n_v_cols(ib_table)
6237 		- !!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID);
6238 
6239 	if (UNIV_UNLIKELY(n_cols != n_fields)) {
6240 		ib::warn() << "Table " << norm_name << " contains "
6241 			<< n_cols << " user"
6242 			" defined columns in InnoDB, but " << n_fields
6243 			<< " columns in MariaDB. Please check"
6244 			" INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and"
6245 			" https://mariadb.com/kb/en/innodb-data-dictionary-troubleshooting/"
6246 			" for how to resolve the issue.";
6247 
6248 		/* Mark this table as corrupted, so the drop table
6249 		or force recovery can still use it, but not others. */
6250 		ib_table->file_unreadable = true;
6251 		ib_table->corrupted = true;
6252 		dict_table_close(ib_table, FALSE, FALSE);
6253 		goto no_such_table;
6254 	}
6255 
6256 	innobase_copy_frm_flags_from_table_share(ib_table, table->s);
6257 
6258 	MONITOR_INC(MONITOR_TABLE_OPEN);
6259 
6260 	if ((ib_table->flags2 & DICT_TF2_DISCARDED)) {
6261 
6262 		ib_senderrf(thd,
6263 			IB_LOG_LEVEL_WARN, ER_TABLESPACE_DISCARDED,
6264 			table->s->table_name.str);
6265 
6266 		/* Allow an open because a proper DISCARD should have set
6267 		all the flags and index root page numbers to FIL_NULL that
6268 		should prevent any DML from running but it should allow DDL
6269 		operations. */
6270 	} else if (!ib_table->is_readable()) {
6271 		const fil_space_t* space = ib_table->space;
6272 		if (!space) {
6273 			ib_senderrf(
6274 				thd, IB_LOG_LEVEL_WARN,
6275 				ER_TABLESPACE_MISSING, norm_name);
6276 		}
6277 
6278 		if (!thd_tablespace_op(thd)) {
6279 			set_my_errno(ENOENT);
6280 			int ret_err = HA_ERR_TABLESPACE_MISSING;
6281 
6282 			if (space && space->crypt_data
6283 			    && space->crypt_data->is_encrypted()) {
6284 				push_warning_printf(
6285 					thd,
6286 					Sql_condition::WARN_LEVEL_WARN,
6287 					HA_ERR_DECRYPTION_FAILED,
6288 					"Table %s in file %s is encrypted"
6289 					" but encryption service or"
6290 					" used key_id %u is not available. "
6291 					" Can't continue reading table.",
6292 					table_share->table_name.str,
6293 					space->chain.start->name,
6294 					space->crypt_data->key_id);
6295 				ret_err = HA_ERR_DECRYPTION_FAILED;
6296 			}
6297 
6298 			dict_table_close(ib_table, FALSE, FALSE);
6299 			DBUG_RETURN(ret_err);
6300 		}
6301 	}
6302 
6303 	m_prebuilt = row_create_prebuilt(ib_table, table->s->reclength);
6304 
6305 	m_prebuilt->default_rec = table->s->default_values;
6306 	ut_ad(m_prebuilt->default_rec);
6307 
6308 	m_prebuilt->m_mysql_table = table;
6309 
6310 	/* Looks like MySQL-3.23 sometimes has primary key number != 0 */
6311 	m_primary_key = table->s->primary_key;
6312 
6313 	key_used_on_scan = m_primary_key;
6314 
6315 	if (ib_table->n_v_cols) {
6316 		mutex_enter(&dict_sys->mutex);
6317 		if (ib_table->vc_templ == NULL) {
6318 			ib_table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
6319 			innobase_build_v_templ(
6320 				table, ib_table, ib_table->vc_templ, NULL,
6321 				true);
6322 		}
6323 
6324 		mutex_exit(&dict_sys->mutex);
6325 	}
6326 
6327 	if (!check_index_consistency(table, ib_table)) {
6328 		sql_print_error("InnoDB indexes are inconsistent with what "
6329 				"defined in .frm for table %s",
6330 				name);
6331 	}
6332 
6333 	/* Allocate a buffer for a 'row reference'. A row reference is
6334 	a string of bytes of length ref_length which uniquely specifies
6335 	a row in our table. Note that MySQL may also compare two row
6336 	references for equality by doing a simple memcmp on the strings
6337 	of length ref_length! */
6338 	if (!(m_prebuilt->clust_index_was_generated
6339 	      = dict_index_is_auto_gen_clust(ib_table->indexes.start))) {
6340 		if (m_primary_key >= MAX_KEY) {
6341 			ib_table->dict_frm_mismatch = DICT_FRM_NO_PK;
6342 
6343 			/* This mismatch could cause further problems
6344 			if not attended, bring this to the user's attention
6345 			by printing a warning in addition to log a message
6346 			in the errorlog */
6347 
6348 			ib_push_frm_error(thd, ib_table, table, 0, true);
6349 
6350 			/* If m_primary_key >= MAX_KEY, its (m_primary_key)
6351 			value could be out of bound if continue to index
6352 			into key_info[] array. Find InnoDB primary index,
6353 			and assign its key_length to ref_length.
6354 			In addition, since MySQL indexes are sorted starting
6355 			with primary index, unique index etc., initialize
6356 			ref_length to the first index key length in
6357 			case we fail to find InnoDB cluster index.
6358 
6359 			Please note, this will not resolve the primary
6360 			index mismatch problem, other side effects are
6361 			possible if users continue to use the table.
6362 			However, we allow this table to be opened so
6363 			that user can adopt necessary measures for the
6364 			mismatch while still being accessible to the table
6365 			date. */
6366 			if (!table->key_info) {
6367 				ut_ad(!table->s->keys);
6368 				ref_length = 0;
6369 			} else {
6370 				ref_length = table->key_info[0].key_length;
6371 			}
6372 
6373 			/* Find corresponding cluster index
6374 			key length in MySQL's key_info[] array */
6375 			for (uint i = 0; i < table->s->keys; i++) {
6376 				dict_index_t*	index;
6377 				index = innobase_get_index(i);
6378 				if (dict_index_is_clust(index)) {
6379 					ref_length =
6380 						 table->key_info[i].key_length;
6381 				}
6382 			}
6383 		} else {
6384 			/* MySQL allocates the buffer for ref.
6385 			key_info->key_length includes space for all key
6386 			columns + one byte for each column that may be
6387 			NULL. ref_length must be as exact as possible to
6388 			save space, because all row reference buffers are
6389 			allocated based on ref_length. */
6390 
6391 			ref_length = table->key_info[m_primary_key].key_length;
6392 		}
6393 	} else {
6394 		if (m_primary_key != MAX_KEY) {
6395 
6396 			ib_table->dict_frm_mismatch = DICT_NO_PK_FRM_HAS;
6397 
6398 			/* This mismatch could cause further problems
6399 			if not attended, bring this to the user attention
6400 			by printing a warning in addition to log a message
6401 			in the errorlog */
6402 			ib_push_frm_error(thd, ib_table, table, 0, true);
6403 		}
6404 
6405 		ref_length = DATA_ROW_ID_LEN;
6406 
6407 		/* If we automatically created the clustered index, then
6408 		MySQL does not know about it, and MySQL must NOT be aware
6409 		of the index used on scan, to make it avoid checking if we
6410 		update the column of the index. That is why we assert below
6411 		that key_used_on_scan is the undefined value MAX_KEY.
6412 		The column is the row id in the automatical generation case,
6413 		and it will never be updated anyway. */
6414 
6415 		if (key_used_on_scan != MAX_KEY) {
6416 			sql_print_warning(
6417 				"Table %s key_used_on_scan is %u even "
6418 				"though there is no primary key inside "
6419 				"InnoDB.", name, key_used_on_scan);
6420 		}
6421 	}
6422 
6423 	/* Index block size in InnoDB: used by MySQL in query optimization */
6424 	stats.block_size = srv_page_size;
6425 
6426 	const my_bool for_vc_purge = THDVAR(thd, background_thread);
6427 
6428 	if (for_vc_purge || !m_prebuilt->table
6429 	    || m_prebuilt->table->is_temporary()
6430 	    || m_prebuilt->table->persistent_autoinc
6431 	    || !m_prebuilt->table->is_readable()) {
6432 	} else if (const Field* ai = table->found_next_number_field) {
6433 		initialize_auto_increment(m_prebuilt->table, ai);
6434 	}
6435 
6436 	/* Set plugin parser for fulltext index */
6437 	for (uint i = 0; i < table->s->keys; i++) {
6438 		if (table->key_info[i].flags & HA_USES_PARSER) {
6439 			dict_index_t*	index = innobase_get_index(i);
6440 			plugin_ref	parser = table->key_info[i].parser;
6441 
6442 			ut_ad(index->type & DICT_FTS);
6443 			index->parser =
6444 				static_cast<st_mysql_ftparser *>(
6445 					plugin_decl(parser)->info);
6446 
6447 			DBUG_EXECUTE_IF("fts_instrument_use_default_parser",
6448 				index->parser = &fts_default_parser;);
6449 		}
6450 	}
6451 
6452 	ut_ad(!m_prebuilt->table
6453 	      || table->versioned() == m_prebuilt->table->versioned());
6454 
6455 	if (!for_vc_purge) {
6456 		info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST
6457 		     | HA_STATUS_OPEN);
6458 	}
6459 
6460 	DBUG_RETURN(0);
6461 }
6462 
6463 /** Convert MySQL column number to dict_table_t::cols[] offset.
6464 @param[in]	field	non-virtual column
6465 @return	column number relative to dict_table_t::cols[] */
6466 unsigned
innodb_col_no(const Field * field)6467 innodb_col_no(const Field* field)
6468 {
6469 	ut_ad(!innobase_is_s_fld(field));
6470 	const TABLE*	table	= field->table;
6471 	unsigned	col_no	= 0;
6472 	ut_ad(field == table->field[field->field_index]);
6473 	for (unsigned i = 0; i < field->field_index; i++) {
6474 		if (table->field[i]->stored_in_db()) {
6475 			col_no++;
6476 		}
6477 	}
6478 	return(col_no);
6479 }
6480 
6481 /** Opens dictionary table object using table name. For partition, we need to
6482 try alternative lower/upper case names to support moving data files across
6483 platforms.
6484 @param[in]	table_name	name of the table/partition
6485 @param[in]	norm_name	normalized name of the table/partition
6486 @param[in]	is_partition	if this is a partition of a table
6487 @param[in]	ignore_err	error to ignore for loading dictionary object
6488 @return dictionary table object or NULL if not found */
6489 dict_table_t*
open_dict_table(const char * table_name,const char * norm_name,bool is_partition,dict_err_ignore_t ignore_err)6490 ha_innobase::open_dict_table(
6491 	const char*
6492 #ifdef _WIN32
6493 	table_name
6494 #endif
6495 	,
6496 	const char*		norm_name,
6497 	bool			is_partition,
6498 	dict_err_ignore_t	ignore_err)
6499 {
6500 	DBUG_ENTER("ha_innobase::open_dict_table");
6501 	dict_table_t*	ib_table = dict_table_open_on_name(norm_name, FALSE,
6502 							   TRUE, ignore_err);
6503 
6504 	if (NULL == ib_table && is_partition) {
6505 		/* MySQL partition engine hard codes the file name
6506 		separator as "#P#". The text case is fixed even if
6507 		lower_case_table_names is set to 1 or 2. This is true
6508 		for sub-partition names as well. InnoDB always
6509 		normalises file names to lower case on Windows, this
6510 		can potentially cause problems when copying/moving
6511 		tables between platforms.
6512 
6513 		1) If boot against an installation from Windows
6514 		platform, then its partition table name could
6515 		be in lower case in system tables. So we will
6516 		need to check lower case name when load table.
6517 
6518 		2) If we boot an installation from other case
6519 		sensitive platform in Windows, we might need to
6520 		check the existence of table name without lower
6521 		case in the system table. */
6522 		if (innobase_get_lower_case_table_names() == 1) {
6523 			char	par_case_name[FN_REFLEN];
6524 
6525 #ifndef _WIN32
6526 			/* Check for the table using lower
6527 			case name, including the partition
6528 			separator "P" */
6529 			strcpy(par_case_name, norm_name);
6530 			innobase_casedn_str(par_case_name);
6531 #else
6532 			/* On Windows platfrom, check
6533 			whether there exists table name in
6534 			system table whose name is
6535 			not being normalized to lower case */
6536 			create_table_info_t::
6537 				normalize_table_name_low(
6538 					par_case_name,
6539 					table_name, FALSE);
6540 #endif
6541 			ib_table = dict_table_open_on_name(
6542 				par_case_name, FALSE, TRUE,
6543 				ignore_err);
6544 		}
6545 
6546 		if (ib_table != NULL) {
6547 #ifndef _WIN32
6548 			sql_print_warning("Partition table %s opened"
6549 					  " after converting to lower"
6550 					  " case. The table may have"
6551 					  " been moved from a case"
6552 					  " in-sensitive file system."
6553 					  " Please recreate table in"
6554 					  " the current file system\n",
6555 					  norm_name);
6556 #else
6557 			sql_print_warning("Partition table %s opened"
6558 					  " after skipping the step to"
6559 					  " lower case the table name."
6560 					  " The table may have been"
6561 					  " moved from a case sensitive"
6562 					  " file system. Please"
6563 					  " recreate table in the"
6564 					  " current file system\n",
6565 					  norm_name);
6566 #endif
6567 		}
6568 	}
6569 
6570 	DBUG_RETURN(ib_table);
6571 }
6572 
6573 handler*
clone(const char * name,MEM_ROOT * mem_root)6574 ha_innobase::clone(
6575 /*===============*/
6576 	const char*	name,		/*!< in: table name */
6577 	MEM_ROOT*	mem_root)	/*!< in: memory context */
6578 {
6579 	DBUG_ENTER("ha_innobase::clone");
6580 
6581 	ha_innobase*	new_handler = static_cast<ha_innobase*>(
6582 		handler::clone(m_prebuilt->table->name.m_name, mem_root));
6583 
6584 	if (new_handler != NULL) {
6585 		DBUG_ASSERT(new_handler->m_prebuilt != NULL);
6586 
6587 		new_handler->m_prebuilt->select_lock_type
6588 			= m_prebuilt->select_lock_type;
6589 	}
6590 
6591 	DBUG_RETURN(new_handler);
6592 }
6593 
6594 
6595 uint
max_supported_key_part_length() const6596 ha_innobase::max_supported_key_part_length() const
6597 /*==============================================*/
6598 {
6599 	/* A table format specific index column length check will be performed
6600 	at ha_innobase::add_index() and row_create_index_for_mysql() */
6601 	return(REC_VERSION_56_MAX_INDEX_COL_LEN);
6602 }
6603 
6604 /******************************************************************//**
6605 Closes a handle to an InnoDB table.
6606 @return 0 */
6607 
6608 int
close()6609 ha_innobase::close()
6610 /*================*/
6611 {
6612 	DBUG_ENTER("ha_innobase::close");
6613 
6614 	row_prebuilt_free(m_prebuilt, FALSE);
6615 
6616 	if (m_upd_buf != NULL) {
6617 		ut_ad(m_upd_buf_size != 0);
6618 		my_free(m_upd_buf);
6619 		m_upd_buf = NULL;
6620 		m_upd_buf_size = 0;
6621 	}
6622 
6623 	MONITOR_INC(MONITOR_TABLE_CLOSE);
6624 
6625 	/* Tell InnoDB server that there might be work for
6626 	utility threads: */
6627 
6628 	srv_active_wake_master_thread();
6629 
6630 	DBUG_RETURN(0);
6631 }
6632 
6633 /* The following accessor functions should really be inside MySQL code! */
6634 
6635 #ifdef WITH_WSREP
6636 UNIV_INTERN
6637 ulint
wsrep_innobase_mysql_sort(int mysql_type,uint charset_number,unsigned char * str,unsigned int str_length,unsigned int buf_length)6638 wsrep_innobase_mysql_sort(
6639 /*======================*/
6640 					/* out: str contains sort string */
6641 	int		mysql_type,	/* in: MySQL type */
6642 	uint		charset_number,	/* in: number of the charset */
6643 	unsigned char*	str,		/* in: data field */
6644 	unsigned int	str_length,	/* in: data field length,
6645 					not UNIV_SQL_NULL */
6646 	unsigned int	buf_length)	/* in: total str buffer length */
6647 
6648 {
6649 	CHARSET_INFO*		charset;
6650 	enum_field_types	mysql_tp;
6651 	ulint			ret_length =	str_length;
6652 
6653 	DBUG_ASSERT(str_length != UNIV_SQL_NULL);
6654 
6655 	mysql_tp = (enum_field_types) mysql_type;
6656 
6657 	switch (mysql_tp) {
6658 
6659 	case MYSQL_TYPE_BIT:
6660 	case MYSQL_TYPE_STRING:
6661 	case MYSQL_TYPE_VAR_STRING:
6662 	case MYSQL_TYPE_TINY_BLOB:
6663 	case MYSQL_TYPE_MEDIUM_BLOB:
6664 	case MYSQL_TYPE_BLOB:
6665 	case MYSQL_TYPE_LONG_BLOB:
6666 	case MYSQL_TYPE_VARCHAR:
6667 	{
6668 		uchar tmp_str[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'};
6669 		uint tmp_length = REC_VERSION_56_MAX_INDEX_COL_LEN;
6670 
6671 		/* Use the charset number to pick the right charset struct for
6672 		the comparison. Since the MySQL function get_charset may be
6673 		slow before Bar removes the mutex operation there, we first
6674 		look at 2 common charsets directly. */
6675 
6676 		if (charset_number == default_charset_info->number) {
6677 			charset = default_charset_info;
6678 		} else if (charset_number == my_charset_latin1.number) {
6679 			charset = &my_charset_latin1;
6680 		} else {
6681 			charset = get_charset(charset_number, MYF(MY_WME));
6682 
6683 			if (charset == NULL) {
6684 			  sql_print_error("InnoDB needs charset %lu for doing "
6685 					  "a comparison, but MariaDB cannot "
6686 					  "find that charset.",
6687 					  (ulong) charset_number);
6688 				ut_a(0);
6689 			}
6690 		}
6691 
6692 		ut_a(str_length <= tmp_length);
6693 		memcpy(tmp_str, str, str_length);
6694 
6695 		tmp_length = charset->coll->strnxfrm(charset, str, str_length,
6696 						     str_length, tmp_str,
6697 						     tmp_length, 0);
6698 		DBUG_ASSERT(tmp_length <= str_length);
6699 		if (wsrep_protocol_version < 3) {
6700 			tmp_length = charset->coll->strnxfrm(
6701 				charset, str, str_length,
6702 				str_length, tmp_str, tmp_length, 0);
6703 			DBUG_ASSERT(tmp_length <= str_length);
6704 		} else {
6705 			/* strnxfrm will expand the destination string,
6706 			   protocols < 3 truncated the sorted sring
6707 			   protocols >= 3 gets full sorted sring
6708 			*/
6709 			tmp_length = charset->coll->strnxfrm(
6710 				charset, str, buf_length,
6711 				str_length, tmp_str, str_length, 0);
6712 			DBUG_ASSERT(tmp_length <= buf_length);
6713 			ret_length = tmp_length;
6714 		}
6715 
6716 		break;
6717 	}
6718 	case MYSQL_TYPE_DECIMAL :
6719 	case MYSQL_TYPE_TINY :
6720 	case MYSQL_TYPE_SHORT :
6721 	case MYSQL_TYPE_LONG :
6722 	case MYSQL_TYPE_FLOAT :
6723 	case MYSQL_TYPE_DOUBLE :
6724 	case MYSQL_TYPE_NULL :
6725 	case MYSQL_TYPE_TIMESTAMP :
6726 	case MYSQL_TYPE_LONGLONG :
6727 	case MYSQL_TYPE_INT24 :
6728 	case MYSQL_TYPE_DATE :
6729 	case MYSQL_TYPE_TIME :
6730 	case MYSQL_TYPE_DATETIME :
6731 	case MYSQL_TYPE_YEAR :
6732 	case MYSQL_TYPE_NEWDATE :
6733 	case MYSQL_TYPE_NEWDECIMAL :
6734 	case MYSQL_TYPE_ENUM :
6735 	case MYSQL_TYPE_SET :
6736 	case MYSQL_TYPE_GEOMETRY :
6737 		break;
6738 	default:
6739 		break;
6740 	}
6741 
6742 	return ret_length;
6743 }
6744 #endif /* WITH_WSREP */
6745 
6746 /******************************************************************//**
6747 compare two character string according to their charset. */
6748 int
innobase_fts_text_cmp(const void * cs,const void * p1,const void * p2)6749 innobase_fts_text_cmp(
6750 /*==================*/
6751 	const void*	cs,		/*!< in: Character set */
6752 	const void*     p1,		/*!< in: key */
6753 	const void*     p2)		/*!< in: node */
6754 {
6755 	const CHARSET_INFO*	charset = (const CHARSET_INFO*) cs;
6756 	const fts_string_t*	s1 = (const fts_string_t*) p1;
6757 	const fts_string_t*	s2 = (const fts_string_t*) p2;
6758 
6759 	return(ha_compare_text(
6760 		charset, s1->f_str, static_cast<uint>(s1->f_len),
6761 		s2->f_str, static_cast<uint>(s2->f_len), 0));
6762 }
6763 
6764 /******************************************************************//**
6765 compare two character string case insensitively according to their charset. */
6766 int
innobase_fts_text_case_cmp(const void * cs,const void * p1,const void * p2)6767 innobase_fts_text_case_cmp(
6768 /*=======================*/
6769 	const void*	cs,		/*!< in: Character set */
6770 	const void*     p1,		/*!< in: key */
6771 	const void*     p2)		/*!< in: node */
6772 {
6773 	const CHARSET_INFO*	charset = (const CHARSET_INFO*) cs;
6774 	const fts_string_t*	s1 = (const fts_string_t*) p1;
6775 	const fts_string_t*	s2 = (const fts_string_t*) p2;
6776 	ulint			newlen;
6777 
6778 	my_casedn_str(charset, (char*) s2->f_str);
6779 
6780 	newlen = strlen((const char*) s2->f_str);
6781 
6782 	return(ha_compare_text(
6783 		charset, s1->f_str, static_cast<uint>(s1->f_len),
6784 		s2->f_str, static_cast<uint>(newlen), 0));
6785 }
6786 
6787 /******************************************************************//**
6788 Get the first character's code position for FTS index partition. */
6789 ulint
innobase_strnxfrm(const CHARSET_INFO * cs,const uchar * str,const ulint len)6790 innobase_strnxfrm(
6791 /*==============*/
6792 	const CHARSET_INFO*
6793 			cs,		/*!< in: Character set */
6794 	const uchar*	str,		/*!< in: string */
6795 	const ulint	len)		/*!< in: string length */
6796 {
6797 	uchar		mystr[2];
6798 	ulint		value;
6799 
6800 	if (!str || len == 0) {
6801 		return(0);
6802 	}
6803 
6804 	my_strnxfrm(cs, (uchar*) mystr, 2, str, len);
6805 
6806 	value = mach_read_from_2(mystr);
6807 
6808 	if (value > 255) {
6809 		value = value / 256;
6810 	}
6811 
6812 	return(value);
6813 }
6814 
6815 /******************************************************************//**
6816 compare two character string according to their charset. */
6817 int
innobase_fts_text_cmp_prefix(const void * cs,const void * p1,const void * p2)6818 innobase_fts_text_cmp_prefix(
6819 /*=========================*/
6820 	const void*	cs,		/*!< in: Character set */
6821 	const void*	p1,		/*!< in: prefix key */
6822 	const void*	p2)		/*!< in: value to compare */
6823 {
6824 	const CHARSET_INFO*	charset = (const CHARSET_INFO*) cs;
6825 	const fts_string_t*	s1 = (const fts_string_t*) p1;
6826 	const fts_string_t*	s2 = (const fts_string_t*) p2;
6827 	int			result;
6828 
6829 	result = ha_compare_text(
6830 		charset, s2->f_str, static_cast<uint>(s2->f_len),
6831 		s1->f_str, static_cast<uint>(s1->f_len), 1);
6832 
6833 	/* We switched s1, s2 position in ha_compare_text. So we need
6834 	to negate the result */
6835 	return(-result);
6836 }
6837 
6838 /******************************************************************//**
6839 Makes all characters in a string lower case. */
6840 size_t
innobase_fts_casedn_str(CHARSET_INFO * cs,char * src,size_t src_len,char * dst,size_t dst_len)6841 innobase_fts_casedn_str(
6842 /*====================*/
6843 	CHARSET_INFO*	cs,	/*!< in: Character set */
6844 	char*		src,	/*!< in: string to put in lower case */
6845 	size_t		src_len,/*!< in: input string length */
6846 	char*		dst,	/*!< in: buffer for result string */
6847 	size_t		dst_len)/*!< in: buffer size */
6848 {
6849 	if (cs->casedn_multiply == 1) {
6850 		memcpy(dst, src, src_len);
6851 		dst[src_len] = 0;
6852 		my_casedn_str(cs, dst);
6853 
6854 		return(strlen(dst));
6855 	} else {
6856 		return(cs->cset->casedn(cs, src, src_len, dst, dst_len));
6857 	}
6858 }
6859 
6860 #define true_word_char(c, ch) ((c) & (_MY_U | _MY_L | _MY_NMR) || (ch) == '_')
6861 
6862 #define misc_word_char(X)       0
6863 
6864 /*************************************************************//**
6865 Get the next token from the given string and store it in *token.
6866 It is mostly copied from MyISAM's doc parsing function ft_simple_get_word()
6867 @return length of string processed */
6868 ulint
innobase_mysql_fts_get_token(CHARSET_INFO * cs,const byte * start,const byte * end,fts_string_t * token)6869 innobase_mysql_fts_get_token(
6870 /*=========================*/
6871 	CHARSET_INFO*	cs,		/*!< in: Character set */
6872 	const byte*	start,		/*!< in: start of text */
6873 	const byte*	end,		/*!< in: one character past end of
6874 					text */
6875 	fts_string_t*	token)		/*!< out: token's text */
6876 {
6877 	int		mbl;
6878 	const uchar*	doc = start;
6879 
6880 	ut_a(cs);
6881 
6882 	token->f_n_char = token->f_len = 0;
6883 	token->f_str = NULL;
6884 
6885 	for (;;) {
6886 
6887 		if (doc >= end) {
6888 			return ulint(doc - start);
6889 		}
6890 
6891 		int	ctype;
6892 
6893 		mbl = cs->cset->ctype(
6894 			cs, &ctype, doc, (const uchar*) end);
6895 
6896 		if (true_word_char(ctype, *doc)) {
6897 			break;
6898 		}
6899 
6900 		doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
6901 	}
6902 
6903 	ulint	mwc = 0;
6904 	ulint	length = 0;
6905 
6906 	token->f_str = const_cast<byte*>(doc);
6907 
6908 	while (doc < end) {
6909 
6910 		int	ctype;
6911 
6912 		mbl = cs->cset->ctype(
6913 			cs, &ctype, (uchar*) doc, (uchar*) end);
6914 		if (true_word_char(ctype, *doc)) {
6915 			mwc = 0;
6916 		} else if (!misc_word_char(*doc) || mwc) {
6917 			break;
6918 		} else {
6919 			++mwc;
6920 		}
6921 
6922 		++length;
6923 
6924 		doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
6925 	}
6926 
6927 	token->f_len = (uint) (doc - token->f_str) - mwc;
6928 	token->f_n_char = length;
6929 
6930 	return ulint(doc - start);
6931 }
6932 
6933 /** Converts a MySQL type to an InnoDB type. Note that this function returns
6934 the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
6935 VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
6936 @param[out]	unsigned_flag	DATA_UNSIGNED if an 'unsigned type'; at least
6937 ENUM and SET, and unsigned integer types are 'unsigned types'
6938 @param[in]	f		MySQL Field
6939 @return DATA_BINARY, DATA_VARCHAR, ... */
6940 ulint
get_innobase_type_from_mysql_type(ulint * unsigned_flag,const void * f)6941 get_innobase_type_from_mysql_type(
6942 	ulint*			unsigned_flag,
6943 	const void*		f)
6944 {
6945 	const class Field* field = reinterpret_cast<const class Field*>(f);
6946 
6947 	/* The following asserts try to check that the MySQL type code fits in
6948 	8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
6949 	the type */
6950 
6951 	DBUG_ASSERT((ulint)MYSQL_TYPE_STRING < 256);
6952 	DBUG_ASSERT((ulint)MYSQL_TYPE_VAR_STRING < 256);
6953 	DBUG_ASSERT((ulint)MYSQL_TYPE_DOUBLE < 256);
6954 	DBUG_ASSERT((ulint)MYSQL_TYPE_FLOAT < 256);
6955 	DBUG_ASSERT((ulint)MYSQL_TYPE_DECIMAL < 256);
6956 
6957 	if (field->flags & UNSIGNED_FLAG) {
6958 
6959 		*unsigned_flag = DATA_UNSIGNED;
6960 	} else {
6961 		*unsigned_flag = 0;
6962 	}
6963 
6964 	if (field->real_type() == MYSQL_TYPE_ENUM
6965 		|| field->real_type() == MYSQL_TYPE_SET) {
6966 
6967 		/* MySQL has field->type() a string type for these, but the
6968 		data is actually internally stored as an unsigned integer
6969 		code! */
6970 
6971 		*unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
6972 						flag set to zero, even though
6973 						internally this is an unsigned
6974 						integer type */
6975 		return(DATA_INT);
6976 	}
6977 
6978 	switch (field->type()) {
6979 		/* NOTE that we only allow string types in DATA_MYSQL and
6980 		DATA_VARMYSQL */
6981 	case MYSQL_TYPE_VAR_STRING:	/* old <= 4.1 VARCHAR */
6982 	case MYSQL_TYPE_VARCHAR:	/* new >= 5.0.3 true VARCHAR */
6983 		if (field->binary()) {
6984 			return(DATA_BINARY);
6985 		} else if (field->charset() == &my_charset_latin1) {
6986 			return(DATA_VARCHAR);
6987 		} else {
6988 			return(DATA_VARMYSQL);
6989 		}
6990 	case MYSQL_TYPE_BIT:
6991 	case MYSQL_TYPE_STRING:
6992 		if (field->binary()) {
6993 			return(DATA_FIXBINARY);
6994 		} else if (field->charset() == &my_charset_latin1) {
6995 			return(DATA_CHAR);
6996 		} else {
6997 			return(DATA_MYSQL);
6998 		}
6999 	case MYSQL_TYPE_NEWDECIMAL:
7000 		return(DATA_FIXBINARY);
7001 	case MYSQL_TYPE_LONG:
7002 	case MYSQL_TYPE_LONGLONG:
7003 	case MYSQL_TYPE_TINY:
7004 	case MYSQL_TYPE_SHORT:
7005 	case MYSQL_TYPE_INT24:
7006 	case MYSQL_TYPE_DATE:
7007 	case MYSQL_TYPE_YEAR:
7008 	case MYSQL_TYPE_NEWDATE:
7009 		return(DATA_INT);
7010 	case MYSQL_TYPE_TIME:
7011 	case MYSQL_TYPE_DATETIME:
7012 	case MYSQL_TYPE_TIMESTAMP:
7013 		if (field->key_type() == HA_KEYTYPE_BINARY) {
7014 			return(DATA_FIXBINARY);
7015 		} else {
7016 			return(DATA_INT);
7017 		}
7018 	case MYSQL_TYPE_FLOAT:
7019 		return(DATA_FLOAT);
7020 	case MYSQL_TYPE_DOUBLE:
7021 		return(DATA_DOUBLE);
7022 	case MYSQL_TYPE_DECIMAL:
7023 		return(DATA_DECIMAL);
7024 	case MYSQL_TYPE_GEOMETRY:
7025 		return(DATA_GEOMETRY);
7026 	case MYSQL_TYPE_TINY_BLOB:
7027 	case MYSQL_TYPE_MEDIUM_BLOB:
7028 	case MYSQL_TYPE_BLOB:
7029 	case MYSQL_TYPE_LONG_BLOB:
7030 		return(DATA_BLOB);
7031 	case MYSQL_TYPE_NULL:
7032 		/* MySQL currently accepts "NULL" datatype, but will
7033 		reject such datatype in the next release. We will cope
7034 		with it and not trigger assertion failure in 5.1 */
7035 		break;
7036 	default:
7037 		ut_error;
7038 	}
7039 
7040 	return(0);
7041 }
7042 
7043 /*******************************************************************//**
7044 Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
7045 storage format.
7046 @return value */
7047 static inline
7048 uint
innobase_read_from_2_little_endian(const uchar * buf)7049 innobase_read_from_2_little_endian(
7050 /*===============================*/
7051 	const uchar*	buf)	/*!< in: from where to read */
7052 {
7053 	return((uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1]))));
7054 }
7055 
7056 #ifdef WITH_WSREP
7057 /*******************************************************************//**
7058 Stores a key value for a row to a buffer.
7059 @return	key value length as stored in buff */
7060 UNIV_INTERN
7061 uint
wsrep_store_key_val_for_row(THD * thd,TABLE * table,uint keynr,char * buff,uint buff_len,const uchar * record,ibool * key_is_null)7062 wsrep_store_key_val_for_row(
7063 /*=========================*/
7064 	THD* 		thd,
7065 	TABLE*		table,
7066 	uint		keynr,	/*!< in: key number */
7067 	char*		buff,	/*!< in/out: buffer for the key value (in MySQL
7068 				format) */
7069 	uint		buff_len,/*!< in: buffer length */
7070 	const uchar*	record,
7071 	ibool*          key_is_null)/*!< out: full key was null */
7072 {
7073 	KEY*		key_info	= table->key_info + keynr;
7074 	KEY_PART_INFO*	key_part	= key_info->key_part;
7075 	KEY_PART_INFO*	end		= key_part + key_info->user_defined_key_parts;
7076 	char*		buff_start	= buff;
7077 	enum_field_types mysql_type;
7078 	Field*		field;
7079 	uint buff_space = buff_len;
7080 
7081 	DBUG_ENTER("wsrep_store_key_val_for_row");
7082 
7083 	memset(buff, 0, buff_len);
7084 	*key_is_null = TRUE;
7085 
7086 	for (; key_part != end; key_part++) {
7087 
7088 		uchar sorted[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'};
7089 		ibool part_is_null = FALSE;
7090 
7091 		if (key_part->null_bit) {
7092 			if (buff_space > 0) {
7093 				if (record[key_part->null_offset]
7094 				    & key_part->null_bit) {
7095 					*buff = 1;
7096 					part_is_null = TRUE;
7097 				} else {
7098 					*buff = 0;
7099 				}
7100 				buff++;
7101 				buff_space--;
7102 			} else {
7103 				fprintf (stderr, "WSREP: key truncated: %s\n",
7104 					 wsrep_thd_query(thd));
7105 			}
7106 		}
7107 		if (!part_is_null)  *key_is_null = FALSE;
7108 
7109 		field = key_part->field;
7110 		mysql_type = field->type();
7111 
7112 		if (mysql_type == MYSQL_TYPE_VARCHAR) {
7113 						/* >= 5.0.3 true VARCHAR */
7114 			ulint		lenlen;
7115 			ulint		len;
7116 			const byte*	data;
7117 			ulint		key_len;
7118 			ulint		true_len;
7119 			const CHARSET_INFO* cs;
7120 			int		error=0;
7121 
7122 			key_len = key_part->length;
7123 
7124 			if (part_is_null) {
7125 				true_len = key_len + 2;
7126 				if (true_len > buff_space) {
7127 					fprintf (stderr,
7128 						 "WSREP: key truncated: %s\n",
7129 						 wsrep_thd_query(thd));
7130 					true_len = buff_space;
7131 				}
7132 				buff       += true_len;
7133 				buff_space -= true_len;
7134 				continue;
7135 			}
7136 			cs = field->charset();
7137 
7138 			lenlen = (ulint)
7139 				(((Field_varstring*)field)->length_bytes);
7140 
7141 			data = row_mysql_read_true_varchar(&len,
7142 				(byte*) (record
7143 				+ (ulint)get_field_offset(table, field)),
7144 				lenlen);
7145 
7146 			true_len = len;
7147 
7148 			/* For multi byte character sets we need to calculate
7149 			the true length of the key */
7150 
7151 			if (len > 0 && cs->mbmaxlen > 1) {
7152 				true_len = (ulint) my_well_formed_length(cs,
7153 						(const char *) data,
7154 						(const char *) data + len,
7155 						(uint) (key_len /
7156 						cs->mbmaxlen),
7157 						&error);
7158 			}
7159 
7160 			/* In a column prefix index, we may need to truncate
7161 			the stored value: */
7162 			if (true_len > key_len) {
7163 				true_len = key_len;
7164 			}
7165 			/* cannot exceed max column lenght either, we may need to truncate
7166 			the stored value: */
7167 			if (true_len > sizeof(sorted)) {
7168 			  true_len = sizeof(sorted);
7169 			}
7170 
7171 			memcpy(sorted, data, true_len);
7172 			true_len = wsrep_innobase_mysql_sort(
7173 				mysql_type, cs->number, sorted, true_len,
7174 				REC_VERSION_56_MAX_INDEX_COL_LEN);
7175 			if (wsrep_protocol_version > 1) {
7176 				/* Note that we always reserve the maximum possible
7177 				length of the true VARCHAR in the key value, though
7178 				only len first bytes after the 2 length bytes contain
7179 				actual data. The rest of the space was reset to zero
7180 				in the bzero() call above. */
7181 				if (true_len > buff_space) {
7182 					WSREP_DEBUG (
7183 						 "write set key truncated for: %s\n",
7184 						 wsrep_thd_query(thd));
7185 					true_len = buff_space;
7186 				}
7187  				memcpy(buff, sorted, true_len);
7188 				buff += true_len;
7189 				buff_space -= true_len;
7190 			} else {
7191 				buff += key_len;
7192 			}
7193 		} else if (mysql_type == MYSQL_TYPE_TINY_BLOB
7194 			|| mysql_type == MYSQL_TYPE_MEDIUM_BLOB
7195 			|| mysql_type == MYSQL_TYPE_BLOB
7196 			|| mysql_type == MYSQL_TYPE_LONG_BLOB
7197 			/* MYSQL_TYPE_GEOMETRY data is treated
7198 			as BLOB data in innodb. */
7199 			|| mysql_type == MYSQL_TYPE_GEOMETRY) {
7200 
7201 			const CHARSET_INFO* cs;
7202 			ulint		key_len;
7203 			ulint		true_len;
7204 			int		error=0;
7205 			ulint		blob_len;
7206 			const byte*	blob_data;
7207 
7208 			ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
7209 
7210 			key_len = key_part->length;
7211 
7212 			if (part_is_null) {
7213 				true_len = key_len + 2;
7214 				if (true_len > buff_space) {
7215 					fprintf (stderr,
7216 						 "WSREP: key truncated: %s\n",
7217 						 wsrep_thd_query(thd));
7218 					true_len = buff_space;
7219 				}
7220 				buff       += true_len;
7221 				buff_space -= true_len;
7222 
7223 				continue;
7224 			}
7225 
7226 			cs = field->charset();
7227 
7228 			blob_data = row_mysql_read_blob_ref(&blob_len,
7229 				(byte*) (record
7230 				+ (ulint)get_field_offset(table, field)),
7231 					(ulint) field->pack_length());
7232 
7233 			true_len = blob_len;
7234 
7235 			ut_a(get_field_offset(table, field)
7236 				== key_part->offset);
7237 
7238 			/* For multi byte character sets we need to calculate
7239 			the true length of the key */
7240 
7241 			if (blob_len > 0 && cs->mbmaxlen > 1) {
7242 				true_len = (ulint) my_well_formed_length(cs,
7243 						(const char *) blob_data,
7244 						(const char *) blob_data
7245 							+ blob_len,
7246 						(uint) (key_len /
7247 							cs->mbmaxlen),
7248 						&error);
7249 			}
7250 
7251 			/* All indexes on BLOB and TEXT are column prefix
7252 			indexes, and we may need to truncate the data to be
7253 			stored in the key value: */
7254 
7255 			if (true_len > key_len) {
7256 				true_len = key_len;
7257 			}
7258 
7259 			memcpy(sorted, blob_data, true_len);
7260 			true_len = wsrep_innobase_mysql_sort(
7261 				mysql_type, cs->number, sorted, true_len,
7262 				REC_VERSION_56_MAX_INDEX_COL_LEN);
7263 
7264 
7265 			/* Note that we always reserve the maximum possible
7266 			length of the BLOB prefix in the key value. */
7267 			if (wsrep_protocol_version > 1) {
7268 				if (true_len > buff_space) {
7269 					fprintf (stderr,
7270 						 "WSREP: key truncated: %s\n",
7271 						 wsrep_thd_query(thd));
7272 					true_len = buff_space;
7273 				}
7274 				buff       += true_len;
7275 				buff_space -= true_len;
7276 			} else {
7277 				buff += key_len;
7278 			}
7279 			memcpy(buff, sorted, true_len);
7280 		} else {
7281 			/* Here we handle all other data types except the
7282 			true VARCHAR, BLOB and TEXT. Note that the column
7283 			value we store may be also in a column prefix
7284 			index. */
7285 
7286 			const CHARSET_INFO*	cs = NULL;
7287 			ulint			true_len;
7288 			ulint			key_len;
7289 			const uchar*		src_start;
7290 			int			error=0;
7291 			enum_field_types	real_type;
7292 
7293 			key_len = key_part->length;
7294 
7295 			if (part_is_null) {
7296 				true_len = key_len;
7297 				if (true_len > buff_space) {
7298 					fprintf (stderr,
7299 						 "WSREP: key truncated: %s\n",
7300 						 wsrep_thd_query(thd));
7301 					true_len = buff_space;
7302 				}
7303 				buff       += true_len;
7304 				buff_space -= true_len;
7305 
7306 				continue;
7307 			}
7308 
7309 			src_start = record + key_part->offset;
7310 			real_type = field->real_type();
7311 			true_len = key_len;
7312 
7313 			/* Character set for the field is defined only
7314 			to fields whose type is string and real field
7315 			type is not enum or set. For these fields check
7316 			if character set is multi byte. */
7317 
7318 			if (real_type != MYSQL_TYPE_ENUM
7319 				&& real_type != MYSQL_TYPE_SET
7320 				&& ( mysql_type == MYSQL_TYPE_VAR_STRING
7321 					|| mysql_type == MYSQL_TYPE_STRING)) {
7322 
7323 				cs = field->charset();
7324 
7325 				/* For multi byte character sets we need to
7326 				calculate the true length of the key */
7327 
7328 				if (key_len > 0 && cs->mbmaxlen > 1) {
7329 
7330 					true_len = (ulint)
7331 						my_well_formed_length(cs,
7332 							(const char *)src_start,
7333 							(const char *)src_start
7334 								+ key_len,
7335 							(uint) (key_len /
7336 								cs->mbmaxlen),
7337 							&error);
7338 				}
7339 				memcpy(sorted, src_start, true_len);
7340 				true_len = wsrep_innobase_mysql_sort(
7341 					mysql_type, cs->number, sorted, true_len,
7342 					REC_VERSION_56_MAX_INDEX_COL_LEN);
7343 
7344 				if (true_len > buff_space) {
7345 					fprintf (stderr,
7346 						 "WSREP: key truncated: %s\n",
7347 						 wsrep_thd_query(thd));
7348 					true_len   = buff_space;
7349 				}
7350 				memcpy(buff, sorted, true_len);
7351 			} else {
7352 				memcpy(buff, src_start, true_len);
7353 			}
7354 			buff       += true_len;
7355 			buff_space -= true_len;
7356 		}
7357 	}
7358 
7359 	ut_a(buff <= buff_start + buff_len);
7360 
7361 	DBUG_RETURN((uint)(buff - buff_start));
7362 }
7363 #endif /* WITH_WSREP */
7364 /**************************************************************//**
7365 Determines if a field is needed in a m_prebuilt struct 'template'.
7366 @return field to use, or NULL if the field is not needed */
7367 static
7368 const Field*
build_template_needs_field(ibool index_contains,ibool read_just_key,ibool fetch_all_in_key,ibool fetch_primary_key_cols,dict_index_t * index,const TABLE * table,ulint i,ulint num_v)7369 build_template_needs_field(
7370 /*=======================*/
7371 	ibool		index_contains,	/*!< in:
7372 					dict_index_contains_col_or_prefix(
7373 					index, i) */
7374 	ibool		read_just_key,	/*!< in: TRUE when MySQL calls
7375 					ha_innobase::extra with the
7376 					argument HA_EXTRA_KEYREAD; it is enough
7377 					to read just columns defined in
7378 					the index (i.e., no read of the
7379 					clustered index record necessary) */
7380 	ibool		fetch_all_in_key,
7381 					/*!< in: true=fetch all fields in
7382 					the index */
7383 	ibool		fetch_primary_key_cols,
7384 					/*!< in: true=fetch the
7385 					primary key columns */
7386 	dict_index_t*	index,		/*!< in: InnoDB index to use */
7387 	const TABLE*	table,		/*!< in: MySQL table object */
7388 	ulint		i,		/*!< in: field index in InnoDB table */
7389 	ulint		num_v)		/*!< in: num virtual column so far */
7390 {
7391 	const Field*	field	= table->field[i];
7392 
7393 	if (!field->stored_in_db()
7394 	    && ha_innobase::omits_virtual_cols(*table->s)) {
7395 		return NULL;
7396 	}
7397 
7398 	if (!index_contains) {
7399 		if (read_just_key) {
7400 			/* If this is a 'key read', we do not need
7401 			columns that are not in the key */
7402 
7403 			return(NULL);
7404 		}
7405 	} else if (fetch_all_in_key) {
7406 		/* This field is needed in the query */
7407 
7408 		return(field);
7409 	}
7410 
7411 	if (bitmap_is_set(table->read_set, static_cast<uint>(i))
7412 	    || bitmap_is_set(table->write_set, static_cast<uint>(i))) {
7413 		/* This field is needed in the query */
7414 
7415 		return(field);
7416 	}
7417 
7418 	ut_ad(i >= num_v);
7419 	if (fetch_primary_key_cols
7420 	    && dict_table_col_in_clustered_key(index->table, i - num_v)) {
7421 		/* This field is needed in the query */
7422 		return(field);
7423 	}
7424 
7425 	/* This field is not needed in the query, skip it */
7426 
7427 	return(NULL);
7428 }
7429 
7430 /**************************************************************//**
7431 Determines if a field is needed in a m_prebuilt struct 'template'.
7432 @return whether the field is needed for index condition pushdown */
7433 inline
7434 bool
build_template_needs_field_in_icp(const dict_index_t * index,const row_prebuilt_t * prebuilt,bool contains,ulint i,bool is_virtual)7435 build_template_needs_field_in_icp(
7436 /*==============================*/
7437 	const dict_index_t*	index,	/*!< in: InnoDB index */
7438 	const row_prebuilt_t*	prebuilt,/*!< in: row fetch template */
7439 	bool			contains,/*!< in: whether the index contains
7440 					column i */
7441 	ulint			i,	/*!< in: column number */
7442 	bool			is_virtual)
7443 					/*!< in: a virtual column or not */
7444 {
7445 	ut_ad(contains == dict_index_contains_col_or_prefix(index, i, is_virtual));
7446 
7447 	return(index == prebuilt->index
7448 	       ? contains
7449 	       : dict_index_contains_col_or_prefix(prebuilt->index, i, is_virtual));
7450 }
7451 
7452 /**************************************************************//**
7453 Adds a field to a m_prebuilt struct 'template'.
7454 @return the field template */
7455 static
7456 mysql_row_templ_t*
build_template_field(row_prebuilt_t * prebuilt,dict_index_t * clust_index,dict_index_t * index,TABLE * table,const Field * field,ulint i,ulint v_no)7457 build_template_field(
7458 /*=================*/
7459 	row_prebuilt_t*	prebuilt,	/*!< in/out: template */
7460 	dict_index_t*	clust_index,	/*!< in: InnoDB clustered index */
7461 	dict_index_t*	index,		/*!< in: InnoDB index to use */
7462 	TABLE*		table,		/*!< in: MySQL table object */
7463 	const Field*	field,		/*!< in: field in MySQL table */
7464 	ulint		i,		/*!< in: field index in InnoDB table */
7465 	ulint		v_no)		/*!< in: field index for virtual col */
7466 {
7467 	mysql_row_templ_t*	templ;
7468 	const dict_col_t*	col;
7469 
7470 	ut_ad(clust_index->table == index->table);
7471 
7472 	templ = prebuilt->mysql_template + prebuilt->n_template++;
7473 	MEM_UNDEFINED(templ, sizeof *templ);
7474 	templ->rec_field_is_prefix = FALSE;
7475 	templ->rec_prefix_field_no = ULINT_UNDEFINED;
7476 	templ->is_virtual = !field->stored_in_db();
7477 
7478 	if (!templ->is_virtual) {
7479 		templ->col_no = i;
7480 		col = dict_table_get_nth_col(index->table, i);
7481 		templ->clust_rec_field_no = dict_col_get_clust_pos(
7482 						col, clust_index);
7483 		/* If clustered index record field is not found, lets print out
7484 		field names and all the rest to understand why field is not found. */
7485 		if (templ->clust_rec_field_no == ULINT_UNDEFINED) {
7486 			const char* tb_col_name = dict_table_get_col_name(clust_index->table, i);
7487 			dict_field_t* field=NULL;
7488 			size_t size = 0;
7489 
7490 			for(ulint j=0; j < clust_index->n_user_defined_cols; j++) {
7491 				dict_field_t* ifield = &(clust_index->fields[j]);
7492 				if (ifield && !memcmp(tb_col_name, ifield->name,
7493 						strlen(tb_col_name))) {
7494 					field = ifield;
7495 					break;
7496 				}
7497 			}
7498 
7499 			ib::info() << "Looking for field " << i << " name "
7500 				<< (tb_col_name ? tb_col_name : "NULL")
7501 				<< " from table " << clust_index->table->name;
7502 
7503 
7504 			for(ulint j=0; j < clust_index->n_user_defined_cols; j++) {
7505 				dict_field_t* ifield = &(clust_index->fields[j]);
7506 				ib::info() << "InnoDB Table "
7507 					<< clust_index->table->name
7508 					<< "field " << j << " name "
7509 					<< (ifield ? ifield->name() : "NULL");
7510 			}
7511 
7512 			for(ulint j=0; j < table->s->stored_fields; j++) {
7513 				ib::info() << "MySQL table "
7514 					<< table->s->table_name.str
7515 					<< " field " << j << " name "
7516 					<< table->field[j]->field_name.str;
7517 			}
7518 
7519 			ib::fatal() << "Clustered record field for column " << i
7520 				<< " not found table n_user_defined "
7521 				<< clust_index->n_user_defined_cols
7522 				<< " index n_user_defined "
7523 				<< clust_index->table->n_cols - DATA_N_SYS_COLS
7524 				<< " InnoDB table "
7525 				<< clust_index->table->name
7526 				<< " field name "
7527 				<< (field ? field->name() : "NULL")
7528 				<< " MySQL table "
7529 				<< table->s->table_name.str
7530 				<< " field name "
7531 				<< (tb_col_name ? tb_col_name : "NULL")
7532 				<< " n_fields "
7533 				<< table->s->stored_fields
7534 				<< " query "
7535 				<< innobase_get_stmt_unsafe(current_thd, &size);
7536 		}
7537 
7538 		if (dict_index_is_clust(index)) {
7539 			templ->rec_field_no = templ->clust_rec_field_no;
7540 		} else {
7541 			/* If we're in a secondary index, keep track
7542 			* of the original index position even if this
7543 			* is just a prefix index; we will use this
7544 			* later to avoid a cluster index lookup in
7545 			* some cases.*/
7546 
7547 			templ->rec_field_no = dict_index_get_nth_col_pos(index, i,
7548 						&templ->rec_prefix_field_no);
7549 		}
7550 	} else {
7551 		DBUG_ASSERT(!ha_innobase::omits_virtual_cols(*table->s));
7552 		col = &dict_table_get_nth_v_col(index->table, v_no)->m_col;
7553 		templ->clust_rec_field_no = v_no;
7554 
7555 		if (dict_index_is_clust(index)) {
7556 			templ->rec_field_no = templ->clust_rec_field_no;
7557 		} else {
7558 			templ->rec_field_no
7559 				= dict_index_get_nth_col_or_prefix_pos(
7560 					index, v_no, FALSE, true,
7561 					&templ->rec_prefix_field_no);
7562 		}
7563 		templ->icp_rec_field_no = ULINT_UNDEFINED;
7564 	}
7565 
7566 	if (field->real_maybe_null()) {
7567 		templ->mysql_null_byte_offset =
7568 			field->null_offset();
7569 
7570 		templ->mysql_null_bit_mask = (ulint) field->null_bit;
7571 	} else {
7572 		templ->mysql_null_bit_mask = 0;
7573 	}
7574 
7575 
7576 	templ->mysql_col_offset = (ulint) get_field_offset(table, field);
7577 	templ->mysql_col_len = (ulint) field->pack_length();
7578 	templ->type = col->mtype;
7579 	templ->mysql_type = (ulint) field->type();
7580 
7581 	if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
7582 		templ->mysql_length_bytes = (ulint)
7583 			(((Field_varstring*) field)->length_bytes);
7584 	} else {
7585 		templ->mysql_length_bytes = 0;
7586 	}
7587 
7588 	templ->charset = dtype_get_charset_coll(col->prtype);
7589 	templ->mbminlen = dict_col_get_mbminlen(col);
7590 	templ->mbmaxlen = dict_col_get_mbmaxlen(col);
7591 	templ->is_unsigned = col->prtype & DATA_UNSIGNED;
7592 
7593 	if (!dict_index_is_clust(index)
7594 	    && templ->rec_field_no == ULINT_UNDEFINED) {
7595 		prebuilt->need_to_access_clustered = TRUE;
7596 
7597 		if (templ->rec_prefix_field_no != ULINT_UNDEFINED) {
7598 			dict_field_t* field = dict_index_get_nth_field(
7599 						index,
7600 						templ->rec_prefix_field_no);
7601 			templ->rec_field_is_prefix = (field->prefix_len != 0);
7602 		}
7603 	}
7604 
7605 	/* For spatial index, we need to access cluster index. */
7606 	if (dict_index_is_spatial(index)) {
7607 		prebuilt->need_to_access_clustered = TRUE;
7608 	}
7609 
7610 	if (prebuilt->mysql_prefix_len < templ->mysql_col_offset
7611 	    + templ->mysql_col_len) {
7612 		prebuilt->mysql_prefix_len = templ->mysql_col_offset
7613 			+ templ->mysql_col_len;
7614 	}
7615 
7616 	if (DATA_LARGE_MTYPE(templ->type)) {
7617 		prebuilt->templ_contains_blob = TRUE;
7618 	}
7619 
7620 	return(templ);
7621 }
7622 
7623 /**************************************************************//**
7624 Builds a 'template' to the m_prebuilt struct. The template is used in fast
7625 retrieval of just those column values MySQL needs in its processing. */
7626 
7627 void
build_template(bool whole_row)7628 ha_innobase::build_template(
7629 /*========================*/
7630 	bool		whole_row)	/*!< in: true=ROW_MYSQL_WHOLE_ROW,
7631 					false=ROW_MYSQL_REC_FIELDS */
7632 {
7633 	dict_index_t*	index;
7634 	dict_index_t*	clust_index;
7635 	ibool		fetch_all_in_key	= FALSE;
7636 	ibool		fetch_primary_key_cols	= FALSE;
7637 
7638 	if (m_prebuilt->select_lock_type == LOCK_X || m_prebuilt->table->no_rollback()) {
7639 		/* We always retrieve the whole clustered index record if we
7640 		use exclusive row level locks, for example, if the read is
7641 		done in an UPDATE statement or if we are using a no rollback
7642                 table */
7643 
7644 		whole_row = true;
7645 	} else if (!whole_row) {
7646 		if (m_prebuilt->hint_need_to_fetch_extra_cols
7647 			== ROW_RETRIEVE_ALL_COLS) {
7648 
7649 			/* We know we must at least fetch all columns in the
7650 			key, or all columns in the table */
7651 
7652 			if (m_prebuilt->read_just_key) {
7653 				/* MySQL has instructed us that it is enough
7654 				to fetch the columns in the key; looks like
7655 				MySQL can set this flag also when there is
7656 				only a prefix of the column in the key: in
7657 				that case we retrieve the whole column from
7658 				the clustered index */
7659 
7660 				fetch_all_in_key = TRUE;
7661 			} else {
7662 				whole_row = true;
7663 			}
7664 		} else if (m_prebuilt->hint_need_to_fetch_extra_cols
7665 			== ROW_RETRIEVE_PRIMARY_KEY) {
7666 			/* We must at least fetch all primary key cols. Note
7667 			that if the clustered index was internally generated
7668 			by InnoDB on the row id (no primary key was
7669 			defined), then row_search_for_mysql() will always
7670 			retrieve the row id to a special buffer in the
7671 			m_prebuilt struct. */
7672 
7673 			fetch_primary_key_cols = TRUE;
7674 		}
7675 	}
7676 
7677 	clust_index = dict_table_get_first_index(m_prebuilt->table);
7678 
7679 	index = whole_row ? clust_index : m_prebuilt->index;
7680 
7681 	m_prebuilt->versioned_write = table->versioned_write(VERS_TRX_ID);
7682 	m_prebuilt->need_to_access_clustered = (index == clust_index);
7683 
7684 	/* Either m_prebuilt->index should be a secondary index, or it
7685 	should be the clustered index. */
7686 	ut_ad(dict_index_is_clust(index) == (index == clust_index));
7687 
7688 	/* Below we check column by column if we need to access
7689 	the clustered index. */
7690 
7691 	const bool skip_virtual = omits_virtual_cols(*table_share);
7692 	const ulint n_fields = table_share->fields;
7693 
7694 	if (!m_prebuilt->mysql_template) {
7695 		m_prebuilt->mysql_template = (mysql_row_templ_t*)
7696 			ut_malloc_nokey(n_fields * sizeof(mysql_row_templ_t));
7697 	}
7698 
7699 	m_prebuilt->template_type = whole_row
7700 		? ROW_MYSQL_WHOLE_ROW : ROW_MYSQL_REC_FIELDS;
7701 	m_prebuilt->null_bitmap_len = table->s->null_bytes;
7702 
7703 	/* Prepare to build m_prebuilt->mysql_template[]. */
7704 	m_prebuilt->templ_contains_blob = FALSE;
7705 	m_prebuilt->mysql_prefix_len = 0;
7706 	m_prebuilt->n_template = 0;
7707 	m_prebuilt->idx_cond_n_cols = 0;
7708 
7709 	/* Note that in InnoDB, i is the column number in the table.
7710 	MySQL calls columns 'fields'. */
7711 
7712 	ulint num_v = 0;
7713 
7714 	if (active_index != MAX_KEY
7715 	    && active_index == pushed_idx_cond_keyno) {
7716 		/* Push down an index condition or an end_range check. */
7717 		for (ulint i = 0; i < n_fields; i++) {
7718 			const Field* field = table->field[i];
7719 			const bool is_v = !field->stored_in_db();
7720 			if (is_v && skip_virtual) {
7721 				num_v++;
7722 				continue;
7723 			}
7724 			ibool index_contains
7725 				= dict_index_contains_col_or_prefix(
7726 					index, is_v ? num_v : i - num_v, is_v);
7727 			if (is_v && index_contains) {
7728 				m_prebuilt->n_template = 0;
7729 				num_v = 0;
7730 				goto no_icp;
7731 			}
7732 
7733 			/* Test if an end_range or an index condition
7734 			refers to the field. Note that "index" and
7735 			"index_contains" may refer to the clustered index.
7736 			Index condition pushdown is relative to
7737 			m_prebuilt->index (the index that is being
7738 			looked up first). */
7739 
7740 			/* When join_read_always_key() invokes this
7741 			code via handler::ha_index_init() and
7742 			ha_innobase::index_init(), end_range is not
7743 			yet initialized. Because of that, we must
7744 			always check for index_contains, instead of
7745 			the subset
7746 			field->part_of_key.is_set(active_index)
7747 			which would be acceptable if end_range==NULL. */
7748 			if (build_template_needs_field_in_icp(
7749 				    index, m_prebuilt, index_contains,
7750 				    is_v ? num_v : i - num_v, is_v)) {
7751 				if (!whole_row) {
7752 					field = build_template_needs_field(
7753 						index_contains,
7754 						m_prebuilt->read_just_key,
7755 						fetch_all_in_key,
7756 						fetch_primary_key_cols,
7757 						index, table, i, num_v);
7758 					if (!field) {
7759 						if (is_v) {
7760 							num_v++;
7761 						}
7762 						continue;
7763 					}
7764 				}
7765 
7766 				ut_ad(!is_v);
7767 
7768 				mysql_row_templ_t* templ= build_template_field(
7769 					m_prebuilt, clust_index, index,
7770 					table, field, i - num_v, 0);
7771 
7772 				ut_ad(!templ->is_virtual);
7773 
7774 				m_prebuilt->idx_cond_n_cols++;
7775 				ut_ad(m_prebuilt->idx_cond_n_cols
7776 				      == m_prebuilt->n_template);
7777 
7778 				if (index == m_prebuilt->index) {
7779 					templ->icp_rec_field_no
7780 						= templ->rec_field_no;
7781 				} else {
7782 					templ->icp_rec_field_no
7783 						= dict_index_get_nth_col_pos(
7784 							m_prebuilt->index,
7785 							i - num_v,
7786 							&templ->rec_prefix_field_no);
7787 				}
7788 
7789 				if (dict_index_is_clust(m_prebuilt->index)) {
7790 					ut_ad(templ->icp_rec_field_no
7791 					      != ULINT_UNDEFINED);
7792 					/* If the primary key includes
7793 					a column prefix, use it in
7794 					index condition pushdown,
7795 					because the condition is
7796 					evaluated before fetching any
7797 					off-page (externally stored)
7798 					columns. */
7799 					if (templ->icp_rec_field_no
7800 					    < m_prebuilt->index->n_uniq) {
7801 						/* This is a key column;
7802 						all set. */
7803 						continue;
7804 					}
7805 				} else if (templ->icp_rec_field_no
7806 					   != ULINT_UNDEFINED) {
7807 					continue;
7808 				}
7809 
7810 				/* This is a column prefix index.
7811 				The column prefix can be used in
7812 				an end_range comparison. */
7813 
7814 				templ->icp_rec_field_no
7815 					= dict_index_get_nth_col_or_prefix_pos(
7816 						m_prebuilt->index, i - num_v,
7817 						true, false,
7818 						&templ->rec_prefix_field_no);
7819 				ut_ad(templ->icp_rec_field_no
7820 				      != ULINT_UNDEFINED);
7821 
7822 				/* Index condition pushdown can be used on
7823 				all columns of a secondary index, and on
7824 				the PRIMARY KEY columns. On the clustered
7825 				index, it must never be used on other than
7826 				PRIMARY KEY columns, because those columns
7827 				may be stored off-page, and we will not
7828 				fetch externally stored columns before
7829 				checking the index condition. */
7830 				/* TODO: test the above with an assertion
7831 				like this. Note that index conditions are
7832 				currently pushed down as part of the
7833 				"optimizer phase" while end_range is done
7834 				as part of the execution phase. Therefore,
7835 				we were unable to use an accurate condition
7836 				for end_range in the "if" condition above,
7837 				and the following assertion would fail.
7838 				ut_ad(!dict_index_is_clust(m_prebuilt->index)
7839 				      || templ->rec_field_no
7840 				      < m_prebuilt->index->n_uniq);
7841 				*/
7842 			}
7843 
7844 			if (is_v) {
7845 				num_v++;
7846 			}
7847 		}
7848 
7849 		ut_ad(m_prebuilt->idx_cond_n_cols > 0);
7850 		ut_ad(m_prebuilt->idx_cond_n_cols == m_prebuilt->n_template);
7851 
7852 		num_v = 0;
7853 
7854 		/* Include the fields that are not needed in index condition
7855 		pushdown. */
7856 		for (ulint i = 0; i < n_fields; i++) {
7857 			const Field*		field = table->field[i];
7858 			const bool is_v = !field->stored_in_db();
7859 			if (is_v && skip_virtual) {
7860 				num_v++;
7861 				continue;
7862 			}
7863 
7864 			ibool index_contains
7865 				= dict_index_contains_col_or_prefix(
7866 					index, is_v ? num_v : i - num_v, is_v);
7867 
7868 			if (!build_template_needs_field_in_icp(
7869 				    index, m_prebuilt, index_contains,
7870 				    is_v ? num_v : i - num_v, is_v)) {
7871 				/* Not needed in ICP */
7872 				if (!whole_row) {
7873 					field = build_template_needs_field(
7874 						index_contains,
7875 						m_prebuilt->read_just_key,
7876 						fetch_all_in_key,
7877 						fetch_primary_key_cols,
7878 						index, table, i, num_v);
7879 					if (!field) {
7880 						if (is_v) {
7881 							num_v++;
7882 						}
7883 						continue;
7884 					}
7885 				}
7886 
7887 				ut_d(mysql_row_templ_t*	templ =)
7888 				build_template_field(
7889 					m_prebuilt, clust_index, index,
7890 					table, field, i - num_v, num_v);
7891 				ut_ad(templ->is_virtual == (ulint)is_v);
7892 
7893 				if (is_v) {
7894 					num_v++;
7895 				}
7896 			}
7897 		}
7898 
7899 		m_prebuilt->idx_cond = this;
7900 	} else {
7901 no_icp:
7902 		/* No index condition pushdown */
7903 		m_prebuilt->idx_cond = NULL;
7904 		ut_ad(num_v == 0);
7905 
7906 		for (ulint i = 0; i < n_fields; i++) {
7907 			const Field*	field = table->field[i];
7908 			const bool is_v = !field->stored_in_db();
7909 
7910 			if (whole_row) {
7911 				if (is_v && skip_virtual) {
7912 					num_v++;
7913 					continue;
7914 				}
7915 				/* Even this is whole_row, if the seach is
7916 				on a virtual column, and read_just_key is
7917 				set, and field is not in this index, we
7918 				will not try to fill the value since they
7919 				are not stored in such index nor in the
7920 				cluster index. */
7921 				if (is_v
7922 				    && m_prebuilt->read_just_key
7923 				    && !dict_index_contains_col_or_prefix(
7924 					m_prebuilt->index, num_v, true))
7925 				{
7926 					/* Turn off ROW_MYSQL_WHOLE_ROW */
7927 					m_prebuilt->template_type =
7928 						 ROW_MYSQL_REC_FIELDS;
7929 					num_v++;
7930 					continue;
7931 				}
7932 			} else {
7933 				ibool	contain;
7934 
7935 				if (!is_v) {
7936 					contain = dict_index_contains_col_or_prefix(
7937 						index, i - num_v,
7938 						false);
7939 				} else if (skip_virtual
7940 					   || dict_index_is_clust(index)) {
7941 					num_v++;
7942 					continue;
7943 				} else {
7944 					contain = dict_index_contains_col_or_prefix(
7945 						index, num_v, true);
7946 				}
7947 
7948 				field = build_template_needs_field(
7949 					contain,
7950 					m_prebuilt->read_just_key,
7951 					fetch_all_in_key,
7952 					fetch_primary_key_cols,
7953 					index, table, i, num_v);
7954 				if (!field) {
7955 					if (is_v) {
7956 						num_v++;
7957 					}
7958 					continue;
7959 				}
7960 			}
7961 
7962 			ut_d(mysql_row_templ_t* templ =)
7963 			build_template_field(
7964 				m_prebuilt, clust_index, index,
7965 				table, field, i - num_v, num_v);
7966 			ut_ad(templ->is_virtual == (ulint)is_v);
7967 			if (is_v) {
7968 				num_v++;
7969 			}
7970 		}
7971 	}
7972 
7973 	if (index != clust_index && m_prebuilt->need_to_access_clustered) {
7974 		/* Change rec_field_no's to correspond to the clustered index
7975 		record */
7976 		for (ulint i = 0; i < m_prebuilt->n_template; i++) {
7977 			mysql_row_templ_t*	templ
7978 				= &m_prebuilt->mysql_template[i];
7979 
7980 			templ->rec_field_no = templ->clust_rec_field_no;
7981 		}
7982 	}
7983 }
7984 
7985 /********************************************************************//**
7986 This special handling is really to overcome the limitations of MySQL's
7987 binlogging. We need to eliminate the non-determinism that will arise in
7988 INSERT ... SELECT type of statements, since MySQL binlog only stores the
7989 min value of the autoinc interval. Once that is fixed we can get rid of
7990 the special lock handling.
7991 @return DB_SUCCESS if all OK else error code */
7992 
7993 dberr_t
innobase_lock_autoinc(void)7994 ha_innobase::innobase_lock_autoinc(void)
7995 /*====================================*/
7996 {
7997 	DBUG_ENTER("ha_innobase::innobase_lock_autoinc");
7998 	dberr_t		error = DB_SUCCESS;
7999 
8000 	ut_ad(!srv_read_only_mode);
8001 
8002 	switch (innobase_autoinc_lock_mode) {
8003 	case AUTOINC_NO_LOCKING:
8004 		/* Acquire only the AUTOINC mutex. */
8005 		dict_table_autoinc_lock(m_prebuilt->table);
8006 		break;
8007 
8008 	case AUTOINC_NEW_STYLE_LOCKING:
8009 		/* For simple (single/multi) row INSERTs/REPLACEs and RBR
8010 		events, we fallback to the old style only if another
8011 		transaction has already acquired the AUTOINC lock on
8012 		behalf of a LOAD FILE or INSERT ... SELECT etc. type of
8013 		statement. */
8014 		if (thd_sql_command(m_user_thd) == SQLCOM_INSERT
8015 		    || thd_sql_command(m_user_thd) == SQLCOM_REPLACE
8016 		    || thd_sql_command(m_user_thd) == SQLCOM_END // RBR event
8017 		) {
8018 
8019 			/* Acquire the AUTOINC mutex. */
8020 			dict_table_autoinc_lock(m_prebuilt->table);
8021 
8022 			/* We need to check that another transaction isn't
8023 			already holding the AUTOINC lock on the table. */
8024 			if (m_prebuilt->table->n_waiting_or_granted_auto_inc_locks) {
8025 				/* Release the mutex to avoid deadlocks and
8026 				fall back to old style locking. */
8027 				dict_table_autoinc_unlock(m_prebuilt->table);
8028 			} else {
8029 				/* Do not fall back to old style locking. */
8030 				break;
8031 			}
8032 		}
8033 		/* Use old style locking. */
8034 		/* fall through */
8035 	case AUTOINC_OLD_STYLE_LOCKING:
8036 		DBUG_EXECUTE_IF("die_if_autoinc_old_lock_style_used",
8037 				ut_ad(0););
8038 		error = row_lock_table_autoinc_for_mysql(m_prebuilt);
8039 
8040 		if (error == DB_SUCCESS) {
8041 
8042 			/* Acquire the AUTOINC mutex. */
8043 			dict_table_autoinc_lock(m_prebuilt->table);
8044 		}
8045 		break;
8046 
8047 	default:
8048 		ut_error;
8049 	}
8050 
8051 	DBUG_RETURN(error);
8052 }
8053 
8054 /********************************************************************//**
8055 Store the autoinc value in the table. The autoinc value is only set if
8056 it's greater than the existing autoinc value in the table.
8057 @return DB_SUCCESS if all went well else error code */
8058 
8059 dberr_t
innobase_set_max_autoinc(ulonglong auto_inc)8060 ha_innobase::innobase_set_max_autoinc(
8061 /*==================================*/
8062 	ulonglong	auto_inc)	/*!< in: value to store */
8063 {
8064 	dberr_t		error;
8065 
8066 	error = innobase_lock_autoinc();
8067 
8068 	if (error == DB_SUCCESS) {
8069 
8070 		dict_table_autoinc_update_if_greater(m_prebuilt->table, auto_inc);
8071 
8072 		dict_table_autoinc_unlock(m_prebuilt->table);
8073 	}
8074 
8075 	return(error);
8076 }
8077 
8078 /********************************************************************//**
8079 Stores a row in an InnoDB database, to the table specified in this
8080 handle.
8081 @return error code */
8082 
8083 int
write_row(uchar * record)8084 ha_innobase::write_row(
8085 /*===================*/
8086 	uchar*	record)	/*!< in: a row in MySQL format */
8087 {
8088 	dberr_t		error;
8089 #ifdef WITH_WSREP
8090 	bool		wsrep_auto_inc_inserted= false;
8091 #endif
8092 	int		error_result = 0;
8093 	bool		auto_inc_used = false;
8094 
8095 	DBUG_ENTER("ha_innobase::write_row");
8096 
8097 	trx_t*		trx = thd_to_trx(m_user_thd);
8098 
8099 	/* Validation checks before we commence write_row operation. */
8100 	if (high_level_read_only) {
8101 		ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
8102 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
8103 	}
8104 
8105 	ut_a(m_prebuilt->trx == trx);
8106 
8107 	if (!trx_is_started(trx)) {
8108 		trx->will_lock = true;
8109 	}
8110 
8111 #ifdef WITH_WSREP
8112 	if (trx->is_wsrep() && wsrep_is_load_multi_commit(m_user_thd))
8113 	{
8114 		/* Note that this transaction is still active. */
8115 		trx_register_for_2pc(m_prebuilt->trx);
8116 		/* We will need an IX lock on the destination table. */
8117 		m_prebuilt->sql_stat_start = TRUE;
8118 	}
8119 #endif /* WITH_WSREP */
8120 
8121 	ins_mode_t	vers_set_fields;
8122 	/* Handling of Auto-Increment Columns. */
8123 	if (table->next_number_field && record == table->record[0]) {
8124 
8125 		/* Reset the error code before calling
8126 		innobase_get_auto_increment(). */
8127 		m_prebuilt->autoinc_error = DB_SUCCESS;
8128 
8129 #ifdef WITH_WSREP
8130 		wsrep_auto_inc_inserted = trx->is_wsrep()
8131 			&& wsrep_drupal_282555_workaround
8132 			&& table->next_number_field->val_int() == 0;
8133 #endif
8134 
8135 		if ((error_result = update_auto_increment())) {
8136 			/* We don't want to mask autoinc overflow errors. */
8137 
8138 			/* Handle the case where the AUTOINC sub-system
8139 			failed during initialization. */
8140 			if (m_prebuilt->autoinc_error == DB_UNSUPPORTED) {
8141 				error_result = ER_AUTOINC_READ_FAILED;
8142 				/* Set the error message to report too. */
8143 				my_error(ER_AUTOINC_READ_FAILED, MYF(0));
8144 				goto func_exit;
8145 			} else if (m_prebuilt->autoinc_error != DB_SUCCESS) {
8146 				error = m_prebuilt->autoinc_error;
8147 				goto report_error;
8148 			}
8149 
8150 			/* MySQL errors are passed straight back. */
8151 			goto func_exit;
8152 		}
8153 
8154 		auto_inc_used = true;
8155 	}
8156 
8157 	/* Prepare INSERT graph that will be executed for actual INSERT
8158 	(This is a one time operation) */
8159 	if (m_prebuilt->mysql_template == NULL
8160 	    || m_prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
8161 
8162 		/* Build the template used in converting quickly between
8163 		the two database formats */
8164 
8165 		build_template(true);
8166 	}
8167 
8168 	innobase_srv_conc_enter_innodb(m_prebuilt);
8169 
8170 	vers_set_fields = table->versioned_write(VERS_TRX_ID) ?
8171 		ROW_INS_VERSIONED : ROW_INS_NORMAL;
8172 
8173 	/* Execute insert graph that will result in actual insert. */
8174 	error = row_insert_for_mysql((byte*) record, m_prebuilt, vers_set_fields);
8175 
8176 	DEBUG_SYNC(m_user_thd, "ib_after_row_insert");
8177 
8178 	/* Handling of errors related to auto-increment. */
8179 	if (auto_inc_used) {
8180 		ulonglong	auto_inc;
8181 
8182 		/* Note the number of rows processed for this statement, used
8183 		by get_auto_increment() to determine the number of AUTO-INC
8184 		values to reserve. This is only useful for a mult-value INSERT
8185 		and is a statement level counter. */
8186 		if (trx->n_autoinc_rows > 0) {
8187 			--trx->n_autoinc_rows;
8188 		}
8189 
8190 		/* Get the value that MySQL attempted to store in the table.*/
8191 		auto_inc = table->next_number_field->val_uint();
8192 
8193 		switch (error) {
8194 		case DB_DUPLICATE_KEY:
8195 
8196 			/* A REPLACE command and LOAD DATA INFILE REPLACE
8197 			handle a duplicate key error themselves, but we
8198 			must update the autoinc counter if we are performing
8199 			those statements. */
8200 
8201 			switch (thd_sql_command(m_user_thd)) {
8202 			case SQLCOM_LOAD:
8203 				if (!trx->duplicates) {
8204 					break;
8205 				}
8206 
8207 			case SQLCOM_REPLACE:
8208 			case SQLCOM_INSERT_SELECT:
8209 			case SQLCOM_REPLACE_SELECT:
8210 				goto set_max_autoinc;
8211 
8212 #ifdef WITH_WSREP
8213 			/* workaround for LP bug #355000, retrying the insert */
8214 			case SQLCOM_INSERT:
8215 
8216 				WSREP_DEBUG("DUPKEY error for autoinc\n"
8217 				      "THD %ld, value %llu, off %llu inc %llu",
8218 				      thd_get_thread_id(m_user_thd),
8219 				      auto_inc,
8220 				      m_prebuilt->autoinc_offset,
8221 				      m_prebuilt->autoinc_increment);
8222 
8223                                if (wsrep_auto_inc_inserted &&
8224                                    wsrep_thd_retry_counter(m_user_thd) == 0  &&
8225 				    !thd_test_options(m_user_thd,
8226 						      OPTION_NOT_AUTOCOMMIT |
8227 						      OPTION_BEGIN)) {
8228 					WSREP_DEBUG(
8229 					    "retrying insert: %s",
8230 					    wsrep_thd_query(m_user_thd));
8231 					error= DB_SUCCESS;
8232 					wsrep_thd_set_conflict_state(
8233 						m_user_thd, MUST_ABORT);
8234                                         innobase_srv_conc_exit_innodb(m_prebuilt);
8235                                         /* jump straight to func exit over
8236                                          * later wsrep hooks */
8237                                         goto func_exit;
8238 				}
8239                                 break;
8240 #endif /* WITH_WSREP */
8241 
8242 			default:
8243 				break;
8244 			}
8245 
8246 			break;
8247 
8248 		case DB_SUCCESS:
8249 			/* If the actual value inserted is greater than
8250 			the upper limit of the interval, then we try and
8251 			update the table upper limit. Note: last_value
8252 			will be 0 if get_auto_increment() was not called. */
8253 
8254 			if (auto_inc >= m_prebuilt->autoinc_last_value) {
8255 set_max_autoinc:
8256 				/* We need the upper limit of the col type to check for
8257 				whether we update the table autoinc counter or not. */
8258 				ulonglong	col_max_value =
8259 					table->next_number_field->get_max_int_value();
8260 
8261 				/* This should filter out the negative
8262 				values set explicitly by the user. */
8263 				if (auto_inc <= col_max_value) {
8264 					ut_ad(m_prebuilt->autoinc_increment > 0);
8265 
8266 					ulonglong	offset;
8267 					ulonglong	increment;
8268 					dberr_t		err;
8269 
8270 					offset = m_prebuilt->autoinc_offset;
8271 					increment = m_prebuilt->autoinc_increment;
8272 
8273 					auto_inc = innobase_next_autoinc(
8274 						auto_inc, 1, increment, offset,
8275 						col_max_value);
8276 
8277 					err = innobase_set_max_autoinc(
8278 						auto_inc);
8279 
8280 					if (err != DB_SUCCESS) {
8281 						error = err;
8282 					}
8283 				}
8284 			}
8285 			break;
8286 		default:
8287 			break;
8288 		}
8289 	}
8290 
8291 	innobase_srv_conc_exit_innodb(m_prebuilt);
8292 
8293 report_error:
8294 	/* Cleanup and exit. */
8295 	if (error == DB_TABLESPACE_DELETED) {
8296 		ib_senderrf(
8297 			trx->mysql_thd, IB_LOG_LEVEL_ERROR,
8298 			ER_TABLESPACE_DISCARDED,
8299 			table->s->table_name.str);
8300 	}
8301 
8302 	error_result = convert_error_code_to_mysql(
8303 		error, m_prebuilt->table->flags, m_user_thd);
8304 
8305 #ifdef WITH_WSREP
8306 	if (!error_result && trx->is_wsrep()
8307 	    && wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE
8308 	    && !wsrep_consistency_check(m_user_thd)
8309 	    && !wsrep_thd_ignore_table(m_user_thd)) {
8310 		if (wsrep_append_keys(m_user_thd, WSREP_KEY_EXCLUSIVE, record,
8311 				      NULL)) {
8312 			DBUG_PRINT("wsrep", ("row key failed"));
8313 			error_result = HA_ERR_INTERNAL_ERROR;
8314 			goto func_exit;
8315 		}
8316 	}
8317 #endif /* WITH_WSREP */
8318 
8319 	if (error_result == HA_FTS_INVALID_DOCID) {
8320 		my_error(HA_FTS_INVALID_DOCID, MYF(0));
8321 	}
8322 
8323 func_exit:
8324 	innobase_active_small();
8325 
8326 	DBUG_RETURN(error_result);
8327 }
8328 
8329 /** Fill the update vector's "old_vrow" field for those non-updated,
8330 but indexed columns. Such columns could stil present in the virtual
8331 index rec fields even if they are not updated (some other fields updated),
8332 so needs to be logged.
8333 @param[in]	prebuilt		InnoDB prebuilt struct
8334 @param[in,out]	vfield			field to filled
8335 @param[in]	o_len			actual column length
8336 @param[in,out]	col			column to be filled
8337 @param[in]	old_mysql_row_col	MySQL old field ptr
8338 @param[in]	col_pack_len		MySQL field col length
8339 @param[in,out]	buf			buffer for a converted integer value
8340 @return used buffer ptr from row_mysql_store_col_in_innobase_format() */
8341 static
8342 byte*
innodb_fill_old_vcol_val(row_prebuilt_t * prebuilt,dfield_t * vfield,ulint o_len,dict_col_t * col,const byte * old_mysql_row_col,ulint col_pack_len,byte * buf)8343 innodb_fill_old_vcol_val(
8344 	row_prebuilt_t*	prebuilt,
8345 	dfield_t*	vfield,
8346 	ulint		o_len,
8347 	dict_col_t*	col,
8348 	const byte*	old_mysql_row_col,
8349 	ulint		col_pack_len,
8350 	byte*		buf)
8351 {
8352 	dict_col_copy_type(
8353 		col, dfield_get_type(vfield));
8354 	if (o_len != UNIV_SQL_NULL) {
8355 
8356 		buf = row_mysql_store_col_in_innobase_format(
8357 			vfield,
8358 			buf,
8359 			TRUE,
8360 			old_mysql_row_col,
8361 			col_pack_len,
8362 			dict_table_is_comp(prebuilt->table));
8363 	} else {
8364 		dfield_set_null(vfield);
8365 	}
8366 
8367 	return(buf);
8368 }
8369 
8370 /** Calculate an update vector corresponding to the changes
8371 between old_row and new_row.
8372 @param[out]	uvect		update vector
8373 @param[in]	old_row		current row in MySQL format
8374 @param[in]	new_row		intended updated row in MySQL format
8375 @param[in]	table		MySQL table handle
8376 @param[in,out]	upd_buff	buffer to use for converted values
8377 @param[in]	buff_len	length of upd_buff
8378 @param[in,out]	prebuilt	InnoDB execution context
8379 @param[out]	auto_inc	updated AUTO_INCREMENT value, or 0 if none
8380 @return DB_SUCCESS or error code */
8381 static
8382 dberr_t
calc_row_difference(upd_t * uvect,const uchar * old_row,const uchar * new_row,TABLE * table,uchar * upd_buff,ulint buff_len,row_prebuilt_t * prebuilt,ib_uint64_t & auto_inc)8383 calc_row_difference(
8384 	upd_t*		uvect,
8385 	const uchar*	old_row,
8386 	const uchar*	new_row,
8387 	TABLE*		table,
8388 	uchar*		upd_buff,
8389 	ulint		buff_len,
8390 	row_prebuilt_t*	prebuilt,
8391 	ib_uint64_t&	auto_inc)
8392 {
8393 	uchar*		original_upd_buff = upd_buff;
8394 	Field*		field;
8395 	enum_field_types field_mysql_type;
8396 	ulint		o_len;
8397 	ulint		n_len;
8398 	ulint		col_pack_len;
8399 	const byte*	new_mysql_row_col;
8400 	const byte*	old_mysql_row_col;
8401 	const byte*	o_ptr;
8402 	const byte*	n_ptr;
8403 	byte*		buf;
8404 	upd_field_t*	ufield;
8405 	ulint		col_type;
8406 	ulint		n_changed = 0;
8407 	dfield_t	dfield;
8408 	dict_index_t*	clust_index;
8409 	ibool		changes_fts_column = FALSE;
8410 	ibool		changes_fts_doc_col = FALSE;
8411 	trx_t* const	trx = prebuilt->trx;
8412 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
8413 	ulint		num_v = 0;
8414 	const bool skip_virtual = ha_innobase::omits_virtual_cols(*table->s);
8415 
8416 	ut_ad(!srv_read_only_mode);
8417 
8418 	clust_index = dict_table_get_first_index(prebuilt->table);
8419 	auto_inc = 0;
8420 
8421 	/* We use upd_buff to convert changed fields */
8422 	buf = (byte*) upd_buff;
8423 
8424 	for (uint i = 0; i < table->s->fields; i++) {
8425 		field = table->field[i];
8426 		const bool is_virtual = !field->stored_in_db();
8427 		if (is_virtual && skip_virtual) {
8428 			num_v++;
8429 			continue;
8430 		}
8431 		dict_col_t* col = is_virtual
8432 			? &prebuilt->table->v_cols[num_v].m_col
8433 			: &prebuilt->table->cols[i - num_v];
8434 
8435 		o_ptr = (const byte*) old_row + get_field_offset(table, field);
8436 		n_ptr = (const byte*) new_row + get_field_offset(table, field);
8437 
8438 		/* Use new_mysql_row_col and col_pack_len save the values */
8439 
8440 		new_mysql_row_col = n_ptr;
8441 		old_mysql_row_col = o_ptr;
8442 		col_pack_len = field->pack_length();
8443 
8444 		o_len = col_pack_len;
8445 		n_len = col_pack_len;
8446 
8447 		/* We use o_ptr and n_ptr to dig up the actual data for
8448 		comparison. */
8449 
8450 		field_mysql_type = field->type();
8451 
8452 		col_type = col->mtype;
8453 
8454 		switch (col_type) {
8455 
8456 		case DATA_BLOB:
8457 		case DATA_GEOMETRY:
8458 			o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
8459 			n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
8460 
8461 			break;
8462 
8463 		case DATA_VARCHAR:
8464 		case DATA_BINARY:
8465 		case DATA_VARMYSQL:
8466 			if (field_mysql_type == MYSQL_TYPE_VARCHAR) {
8467 				/* This is a >= 5.0.3 type true VARCHAR where
8468 				the real payload data length is stored in
8469 				1 or 2 bytes */
8470 
8471 				o_ptr = row_mysql_read_true_varchar(
8472 					&o_len, o_ptr,
8473 					(ulint)
8474 					(((Field_varstring*) field)->length_bytes));
8475 
8476 				n_ptr = row_mysql_read_true_varchar(
8477 					&n_len, n_ptr,
8478 					(ulint)
8479 					(((Field_varstring*) field)->length_bytes));
8480 			}
8481 
8482 			break;
8483 		default:
8484 			;
8485 		}
8486 
8487 		if (field_mysql_type == MYSQL_TYPE_LONGLONG
8488 		    && prebuilt->table->fts
8489 		    && innobase_strcasecmp(
8490 			field->field_name.str, FTS_DOC_ID_COL_NAME) == 0) {
8491 			doc_id = mach_read_uint64_little_endian(n_ptr);
8492 			if (doc_id == 0) {
8493 				return(DB_FTS_INVALID_DOCID);
8494 			}
8495 		}
8496 
8497 		if (field->real_maybe_null()) {
8498 			if (field->is_null_in_record(old_row)) {
8499 				o_len = UNIV_SQL_NULL;
8500 			}
8501 
8502 			if (field->is_null_in_record(new_row)) {
8503 				n_len = UNIV_SQL_NULL;
8504 			}
8505 		}
8506 
8507 #ifdef UNIV_DEBUG
8508 		bool	online_ord_part = false;
8509 #endif
8510 
8511 		if (is_virtual) {
8512 			/* If the virtual column is not indexed,
8513 			we shall ignore it for update */
8514 			if (!col->ord_part) {
8515 				/* Check whether there is a table-rebuilding
8516 				online ALTER TABLE in progress, and this
8517 				virtual column could be newly indexed, thus
8518 				it will be materialized. Then we will have
8519 				to log its update.
8520 				Note, we do not support online dropping virtual
8521 				column while adding new index, nor with
8522 				online alter column order while adding index,
8523 				so the virtual column sequence must not change
8524 				if it is online operation */
8525 				if (dict_index_is_online_ddl(clust_index)
8526 				    && row_log_col_is_indexed(clust_index,
8527 							      num_v)) {
8528 #ifdef UNIV_DEBUG
8529 					online_ord_part = true;
8530 #endif
8531 				} else {
8532 					num_v++;
8533 					continue;
8534 				}
8535 			}
8536 
8537 			if (!uvect->old_vrow) {
8538 				uvect->old_vrow = dtuple_create_with_vcol(
8539 					uvect->heap, 0, prebuilt->table->n_v_cols);
8540 			}
8541 
8542 			ulint   max_field_len = DICT_MAX_FIELD_LEN_BY_FORMAT(
8543 						prebuilt->table);
8544 
8545 			/* for virtual columns, we only materialize
8546 			its index, and index field length would not
8547 			exceed max_field_len. So continue if the
8548 			first max_field_len bytes are matched up */
8549 			if (o_len != UNIV_SQL_NULL
8550 			   && n_len != UNIV_SQL_NULL
8551 			   && o_len >= max_field_len
8552 			   && n_len >= max_field_len
8553 			   && memcmp(o_ptr, n_ptr, max_field_len) == 0) {
8554 				dfield_t*	vfield = dtuple_get_nth_v_field(
8555 					uvect->old_vrow, num_v);
8556 				buf = innodb_fill_old_vcol_val(
8557 					prebuilt, vfield, o_len,
8558 					col, old_mysql_row_col,
8559 					col_pack_len, buf);
8560 			       num_v++;
8561 			       continue;
8562 			}
8563 		}
8564 
8565 		if (o_len != n_len || (o_len != 0 && o_len != UNIV_SQL_NULL
8566 				       && 0 != memcmp(o_ptr, n_ptr, o_len))) {
8567 			/* The field has changed */
8568 
8569 			ufield = uvect->fields + n_changed;
8570 			MEM_UNDEFINED(ufield, sizeof *ufield);
8571 
8572 			/* Let us use a dummy dfield to make the conversion
8573 			from the MySQL column format to the InnoDB format */
8574 
8575 
8576 			/* If the length of new geometry object is 0, means
8577 			this object is invalid geometry object, we need
8578 			to block it. */
8579 			if (DATA_GEOMETRY_MTYPE(col_type)
8580 			    && o_len != 0 && n_len == 0) {
8581 				return(DB_CANT_CREATE_GEOMETRY_OBJECT);
8582 			}
8583 
8584 			if (n_len != UNIV_SQL_NULL) {
8585 				dict_col_copy_type(
8586 					col, dfield_get_type(&dfield));
8587 
8588 				buf = row_mysql_store_col_in_innobase_format(
8589 					&dfield,
8590 					(byte*) buf,
8591 					TRUE,
8592 					new_mysql_row_col,
8593 					col_pack_len,
8594 					dict_table_is_comp(prebuilt->table));
8595 				dfield_copy(&ufield->new_val, &dfield);
8596 			} else {
8597 				dict_col_copy_type(
8598 					col, dfield_get_type(&ufield->new_val));
8599 				dfield_set_null(&ufield->new_val);
8600 			}
8601 
8602 			ufield->exp = NULL;
8603 			ufield->orig_len = 0;
8604 			if (is_virtual) {
8605 				dfield_t*	vfield = dtuple_get_nth_v_field(
8606 					uvect->old_vrow, num_v);
8607 				upd_fld_set_virtual_col(ufield);
8608 				ufield->field_no = num_v;
8609 
8610 				ut_ad(col->ord_part || online_ord_part);
8611 				ufield->old_v_val = static_cast<dfield_t*>(
8612 					mem_heap_alloc(
8613 						uvect->heap,
8614 						sizeof *ufield->old_v_val));
8615 
8616 				if (!field->is_null_in_record(old_row)) {
8617 					if (n_len == UNIV_SQL_NULL) {
8618 						dict_col_copy_type(
8619 							col, dfield_get_type(
8620 								&dfield));
8621 					}
8622 
8623 					buf = row_mysql_store_col_in_innobase_format(
8624 						&dfield,
8625 						(byte*) buf,
8626 						TRUE,
8627 						old_mysql_row_col,
8628 						col_pack_len,
8629 						dict_table_is_comp(
8630 						prebuilt->table));
8631 					dfield_copy(ufield->old_v_val,
8632 						    &dfield);
8633 					dfield_copy(vfield, &dfield);
8634 				} else {
8635 					dict_col_copy_type(
8636 						col, dfield_get_type(
8637 						ufield->old_v_val));
8638 					dfield_set_null(ufield->old_v_val);
8639 					dfield_set_null(vfield);
8640 				}
8641 				num_v++;
8642 				ut_ad(field != table->found_next_number_field);
8643 			} else {
8644 				ufield->field_no = dict_col_get_clust_pos(
8645 					&prebuilt->table->cols[i - num_v],
8646 					clust_index);
8647 				ufield->old_v_val = NULL;
8648 				if (field != table->found_next_number_field
8649 				    || dfield_is_null(&ufield->new_val)) {
8650 				} else {
8651 					auto_inc = field->val_uint();
8652 				}
8653 			}
8654 			n_changed++;
8655 
8656 			/* If an FTS indexed column was changed by this
8657 			UPDATE then we need to inform the FTS sub-system.
8658 
8659 			NOTE: Currently we re-index all FTS indexed columns
8660 			even if only a subset of the FTS indexed columns
8661 			have been updated. That is the reason we are
8662 			checking only once here. Later we will need to
8663 			note which columns have been updated and do
8664 			selective processing. */
8665 			if (prebuilt->table->fts != NULL && !is_virtual) {
8666 				ulint		offset;
8667 				dict_table_t*   innodb_table;
8668 
8669 				innodb_table = prebuilt->table;
8670 
8671 				if (!changes_fts_column) {
8672 					offset = row_upd_changes_fts_column(
8673 						innodb_table, ufield);
8674 
8675 					if (offset != ULINT_UNDEFINED) {
8676 						changes_fts_column = TRUE;
8677 					}
8678 				}
8679 
8680 				if (!changes_fts_doc_col) {
8681 					changes_fts_doc_col =
8682 					row_upd_changes_doc_id(
8683 						innodb_table, ufield);
8684 				}
8685 			}
8686 		} else if (is_virtual) {
8687 			dfield_t*	vfield = dtuple_get_nth_v_field(
8688 				uvect->old_vrow, num_v);
8689 			buf = innodb_fill_old_vcol_val(
8690 				prebuilt, vfield, o_len,
8691 				col, old_mysql_row_col,
8692 				col_pack_len, buf);
8693 			ut_ad(col->ord_part || online_ord_part);
8694 			num_v++;
8695 		}
8696 	}
8697 
8698 	/* If the update changes a column with an FTS index on it, we
8699 	then add an update column node with a new document id to the
8700 	other changes. We piggy back our changes on the normal UPDATE
8701 	to reduce processing and IO overhead. */
8702 	if (!prebuilt->table->fts) {
8703 		trx->fts_next_doc_id = 0;
8704 	} else if (changes_fts_column || changes_fts_doc_col) {
8705 		dict_table_t*   innodb_table = prebuilt->table;
8706 
8707 		ufield = uvect->fields + n_changed;
8708 
8709 		if (!DICT_TF2_FLAG_IS_SET(
8710 			innodb_table, DICT_TF2_FTS_HAS_DOC_ID)) {
8711 
8712 			/* If Doc ID is managed by user, and if any
8713 			FTS indexed column has been updated, its corresponding
8714 			Doc ID must also be updated. Otherwise, return
8715 			error */
8716 			if (changes_fts_column && !changes_fts_doc_col) {
8717 				ib::warn() << "A new Doc ID must be supplied"
8718 					" while updating FTS indexed columns.";
8719 				return(DB_FTS_INVALID_DOCID);
8720 			}
8721 
8722 			/* Doc ID must monotonically increase */
8723 			ut_ad(innodb_table->fts->cache);
8724 			if (doc_id < prebuilt->table->fts->cache->next_doc_id) {
8725 
8726 				ib::warn() << "FTS Doc ID must be larger than "
8727 					<< innodb_table->fts->cache->next_doc_id
8728 					- 1  << " for table "
8729 					<< innodb_table->name;
8730 
8731 				return(DB_FTS_INVALID_DOCID);
8732 			}
8733 
8734 
8735 			trx->fts_next_doc_id = doc_id;
8736 		} else {
8737 			/* If the Doc ID is a hidden column, it can't be
8738 			changed by user */
8739 			ut_ad(!changes_fts_doc_col);
8740 
8741 			/* Doc ID column is hidden, a new Doc ID will be
8742 			generated by following fts_update_doc_id() call */
8743 			trx->fts_next_doc_id = 0;
8744 		}
8745 
8746 		fts_update_doc_id(
8747 			innodb_table, ufield, &trx->fts_next_doc_id);
8748 
8749 		++n_changed;
8750 	} else {
8751 		/* We have a Doc ID column, but none of FTS indexed
8752 		columns are touched, nor the Doc ID column, so set
8753 		fts_next_doc_id to UINT64_UNDEFINED, which means do not
8754 		update the Doc ID column */
8755 		trx->fts_next_doc_id = UINT64_UNDEFINED;
8756 	}
8757 
8758 	uvect->n_fields = n_changed;
8759 	uvect->info_bits = 0;
8760 
8761 	ut_a(buf <= (byte*) original_upd_buff + buff_len);
8762 
8763 	ut_ad(uvect->validate());
8764 	return(DB_SUCCESS);
8765 }
8766 
8767 #ifdef WITH_WSREP
8768 static
8769 int
wsrep_calc_row_hash(byte * digest,const uchar * row,TABLE * table,row_prebuilt_t * prebuilt)8770 wsrep_calc_row_hash(
8771 /*================*/
8772 	byte*		digest,		/*!< in/out: md5 sum */
8773 	const uchar*	row,		/*!< in: row in MySQL format */
8774 	TABLE*		table,		/*!< in: table in MySQL data
8775 					dictionary */
8776 	row_prebuilt_t*	prebuilt)	/*!< in: InnoDB prebuilt struct */
8777 {
8778 	ulint		len;
8779 	const byte*	ptr;
8780 
8781 	void *ctx = alloca(my_md5_context_size());
8782 	my_md5_init(ctx);
8783 
8784 	for (uint i = 0; i < table->s->fields; i++) {
8785 		byte null_byte=0;
8786 		byte true_byte=1;
8787 		ulint col_type;
8788 		ulint is_unsigned;
8789 
8790 		const Field* field = table->field[i];
8791 		if (!field->stored_in_db()) {
8792 			continue;
8793 		}
8794 
8795 		ptr = (const byte*) row + get_field_offset(table, field);
8796 		len = field->pack_length();
8797 		col_type = get_innobase_type_from_mysql_type(&is_unsigned, field);
8798 
8799 		switch (col_type) {
8800 
8801 		case DATA_BLOB:
8802 			ptr = row_mysql_read_blob_ref(&len, ptr, len);
8803 
8804 			break;
8805 
8806 		case DATA_VARCHAR:
8807 		case DATA_BINARY:
8808 		case DATA_VARMYSQL:
8809 			if (field->type() == MYSQL_TYPE_VARCHAR) {
8810 				/* This is a >= 5.0.3 type true VARCHAR where
8811 				the real payload data length is stored in
8812 				1 or 2 bytes */
8813 
8814 				ptr = row_mysql_read_true_varchar(
8815 					&len, ptr,
8816 					(ulint)
8817 					(((Field_varstring*)field)->length_bytes));
8818 
8819 			}
8820 
8821 			break;
8822 		default:
8823 			;
8824 		}
8825 		/*
8826 		if (field->null_ptr &&
8827 		    field_in_record_is_null(table, field, (char*) row)) {
8828 		*/
8829 
8830 		if (field->is_null_in_record(row)) {
8831 			my_md5_input(ctx, &null_byte, 1);
8832 		} else {
8833 			my_md5_input(ctx, &true_byte, 1);
8834 			my_md5_input(ctx, ptr, len);
8835 		}
8836 	}
8837 
8838 	my_md5_result(ctx, digest);
8839 
8840 	return(0);
8841 }
8842 #endif /* WITH_WSREP */
8843 
8844 /**
8845 Updates a row given as a parameter to a new value. Note that we are given
8846 whole rows, not just the fields which are updated: this incurs some
8847 overhead for CPU when we check which fields are actually updated.
8848 TODO: currently InnoDB does not prevent the 'Halloween problem':
8849 in a searched update a single row can get updated several times
8850 if its index columns are updated!
8851 @param[in] old_row	Old row contents in MySQL format
8852 @param[out] new_row	Updated row contents in MySQL format
8853 @return error number or 0 */
8854 
8855 int
update_row(const uchar * old_row,const uchar * new_row)8856 ha_innobase::update_row(
8857 	const uchar*	old_row,
8858 	const uchar*	new_row)
8859 {
8860 	int		err;
8861 
8862 	dberr_t		error;
8863 	trx_t*		trx = thd_to_trx(m_user_thd);
8864 
8865 	DBUG_ENTER("ha_innobase::update_row");
8866 
8867 	ut_a(m_prebuilt->trx == trx);
8868 
8869 	if (high_level_read_only) {
8870 		ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
8871 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
8872 	} else if (!trx_is_started(trx)) {
8873 		trx->will_lock = true;
8874 	}
8875 
8876 	if (m_upd_buf == NULL) {
8877 		ut_ad(m_upd_buf_size == 0);
8878 
8879 		/* Create a buffer for packing the fields of a record. Why
8880 		table->reclength did not work here? Obviously, because char
8881 		fields when packed actually became 1 byte longer, when we also
8882 		stored the string length as the first byte. */
8883 
8884 		m_upd_buf_size = table->s->reclength + table->s->max_key_length
8885 			+ MAX_REF_PARTS * 3;
8886 
8887 		m_upd_buf = reinterpret_cast<uchar*>(
8888 			my_malloc(//PSI_INSTRUMENT_ME,
8889                                   m_upd_buf_size,
8890 				MYF(MY_WME)));
8891 
8892 		if (m_upd_buf == NULL) {
8893 			m_upd_buf_size = 0;
8894 			DBUG_RETURN(HA_ERR_OUT_OF_MEM);
8895 		}
8896 	}
8897 
8898 	upd_t*		uvect = row_get_prebuilt_update_vector(m_prebuilt);
8899 	ib_uint64_t	autoinc;
8900 
8901 	/* Build an update vector from the modified fields in the rows
8902 	(uses m_upd_buf of the handle) */
8903 
8904 	error = calc_row_difference(
8905 		uvect, old_row, new_row, table, m_upd_buf, m_upd_buf_size,
8906 		m_prebuilt, autoinc);
8907 
8908 	if (error != DB_SUCCESS) {
8909 		goto func_exit;
8910 	}
8911 
8912 	if (!uvect->n_fields) {
8913 		/* This is the same as success, but instructs
8914 		MySQL that the row is not really updated and it
8915 		should not increase the count of updated rows.
8916 		This is fix for http://bugs.mysql.com/29157 */
8917 		if (m_prebuilt->versioned_write
8918 		    && thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE
8919 		    /* Multiple UPDATE of same rows in single transaction create
8920 		       historical rows only once. */
8921 		    && trx->id != table->vers_start_id()) {
8922 			error = row_insert_for_mysql((byte*) old_row,
8923 						     m_prebuilt,
8924 						     ROW_INS_HISTORICAL);
8925 			if (error != DB_SUCCESS) {
8926 				goto func_exit;
8927 			}
8928 			innobase_srv_conc_exit_innodb(m_prebuilt);
8929 			innobase_active_small();
8930 		}
8931 		DBUG_RETURN(HA_ERR_RECORD_IS_THE_SAME);
8932 	} else {
8933 		const bool vers_set_fields = m_prebuilt->versioned_write
8934 			&& m_prebuilt->upd_node->update->affects_versioned();
8935 		const bool vers_ins_row = vers_set_fields
8936 			&& thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE;
8937 
8938 		/* This is not a delete */
8939 		m_prebuilt->upd_node->is_delete =
8940 			(vers_set_fields && !vers_ins_row) ||
8941 			(thd_sql_command(m_user_thd) == SQLCOM_DELETE &&
8942 				table->versioned(VERS_TIMESTAMP))
8943 			? VERSIONED_DELETE
8944 			: NO_DELETE;
8945 
8946 		innobase_srv_conc_enter_innodb(m_prebuilt);
8947 
8948 		error = row_update_for_mysql(m_prebuilt);
8949 
8950 		if (error == DB_SUCCESS && vers_ins_row
8951 		    /* Multiple UPDATE of same rows in single transaction create
8952 		       historical rows only once. */
8953 		    && trx->id != table->vers_start_id()) {
8954 			error = row_insert_for_mysql((byte*) old_row,
8955 						     m_prebuilt,
8956 						     ROW_INS_HISTORICAL);
8957 		}
8958 	}
8959 
8960 	if (error == DB_SUCCESS && autoinc) {
8961 		/* A value for an AUTO_INCREMENT column
8962 		was specified in the UPDATE statement. */
8963 
8964 		/* We need the upper limit of the col type to check for
8965 		whether we update the table autoinc counter or not. */
8966 		ulonglong	col_max_value =
8967 			table->found_next_number_field->get_max_int_value();
8968 
8969 		/* This should filter out the negative
8970 		values set explicitly by the user. */
8971 		if (autoinc <= col_max_value) {
8972 			ulonglong	offset;
8973 			ulonglong	increment;
8974 
8975 			offset = m_prebuilt->autoinc_offset;
8976 			increment = m_prebuilt->autoinc_increment;
8977 
8978 			autoinc = innobase_next_autoinc(
8979 				autoinc, 1, increment, offset,
8980 				col_max_value);
8981 
8982 			error = innobase_set_max_autoinc(autoinc);
8983 
8984 			if (m_prebuilt->table->persistent_autoinc) {
8985 				/* Update the PAGE_ROOT_AUTO_INC. Yes, we do
8986 				this even if dict_table_t::autoinc already was
8987 				greater than autoinc, because we cannot know
8988 				if any INSERT actually used (and wrote to
8989 				PAGE_ROOT_AUTO_INC) a value bigger than our
8990 				autoinc. */
8991 				btr_write_autoinc(dict_table_get_first_index(
8992 							  m_prebuilt->table),
8993 						  autoinc);
8994 			}
8995 		}
8996 	}
8997 
8998 	innobase_srv_conc_exit_innodb(m_prebuilt);
8999 
9000 func_exit:
9001 	if (error == DB_FTS_INVALID_DOCID) {
9002 		err = HA_FTS_INVALID_DOCID;
9003 		my_error(HA_FTS_INVALID_DOCID, MYF(0));
9004 	} else {
9005 		err = convert_error_code_to_mysql(
9006 			error, m_prebuilt->table->flags, m_user_thd);
9007 	}
9008 
9009 	/* Tell InnoDB server that there might be work for
9010 	utility threads: */
9011 
9012 	innobase_active_small();
9013 
9014 #ifdef WITH_WSREP
9015 	if (error == DB_SUCCESS && trx->is_wsrep() &&
9016 	    wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE &&
9017 	    !wsrep_thd_ignore_table(m_user_thd)) {
9018 		DBUG_PRINT("wsrep", ("update row key"));
9019 
9020 		if (wsrep_append_keys(m_user_thd, WSREP_KEY_EXCLUSIVE, old_row,
9021 				      new_row)) {
9022 			WSREP_DEBUG("WSREP: UPDATE_ROW_KEY FAILED");
9023 			DBUG_PRINT("wsrep", ("row key failed"));
9024 			DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
9025 		}
9026 	}
9027 #endif /* WITH_WSREP */
9028 
9029 	DBUG_RETURN(err);
9030 }
9031 
9032 /**********************************************************************//**
9033 Deletes a row given as the parameter.
9034 @return error number or 0 */
9035 
9036 int
delete_row(const uchar * record)9037 ha_innobase::delete_row(
9038 /*====================*/
9039 	const uchar*	record)	/*!< in: a row in MySQL format */
9040 {
9041 	dberr_t		error;
9042 	trx_t*		trx = thd_to_trx(m_user_thd);
9043 
9044 	DBUG_ENTER("ha_innobase::delete_row");
9045 
9046 	ut_a(m_prebuilt->trx == trx);
9047 
9048 	if (high_level_read_only) {
9049 		ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
9050 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
9051 	} else if (!trx_is_started(trx)) {
9052 		trx->will_lock = true;
9053 	}
9054 
9055 	if (!m_prebuilt->upd_node) {
9056 		row_get_prebuilt_update_vector(m_prebuilt);
9057 	}
9058 
9059 	/* This is a delete */
9060 	m_prebuilt->upd_node->is_delete = table->versioned_write(VERS_TRX_ID)
9061 		&& table->vers_end_field()->is_max()
9062 		&& trx->id != table->vers_start_id()
9063 		? VERSIONED_DELETE
9064 		: PLAIN_DELETE;
9065 
9066 	innobase_srv_conc_enter_innodb(m_prebuilt);
9067 
9068 	error = row_update_for_mysql(m_prebuilt);
9069 
9070 	innobase_srv_conc_exit_innodb(m_prebuilt);
9071 
9072 	/* Tell the InnoDB server that there might be work for
9073 	utility threads: */
9074 
9075 	innobase_active_small();
9076 
9077 #ifdef WITH_WSREP
9078 	if (error == DB_SUCCESS && trx->is_wsrep()
9079 	    && wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE
9080 	    && !wsrep_thd_ignore_table(m_user_thd)) {
9081 		if (wsrep_append_keys(m_user_thd, WSREP_KEY_EXCLUSIVE, record,
9082 			              NULL)) {
9083 			DBUG_PRINT("wsrep", ("delete fail"));
9084 			DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
9085 		}
9086 	}
9087 #endif /* WITH_WSREP */
9088 	DBUG_RETURN(convert_error_code_to_mysql(
9089 			    error, m_prebuilt->table->flags, m_user_thd));
9090 }
9091 
9092 /** Delete all rows from the table.
9093 @return error number or 0 */
9094 
9095 int
delete_all_rows()9096 ha_innobase::delete_all_rows()
9097 {
9098 	DBUG_ENTER("ha_innobase::delete_all_rows");
9099 	DBUG_RETURN(HA_ERR_WRONG_COMMAND);
9100 }
9101 
9102 /**********************************************************************//**
9103 Removes a new lock set on a row, if it was not read optimistically. This can
9104 be called after a row has been read in the processing of an UPDATE or a DELETE
9105 query, if the option innodb_locks_unsafe_for_binlog is set. */
9106 
9107 void
unlock_row(void)9108 ha_innobase::unlock_row(void)
9109 /*=========================*/
9110 {
9111 	DBUG_ENTER("ha_innobase::unlock_row");
9112 
9113 	if (m_prebuilt->select_lock_type == LOCK_NONE) {
9114 		DBUG_VOID_RETURN;
9115 	}
9116 
9117 	ut_ad(trx_state_eq(m_prebuilt->trx, TRX_STATE_ACTIVE, true));
9118 
9119 	switch (m_prebuilt->row_read_type) {
9120 	case ROW_READ_WITH_LOCKS:
9121 		if (!srv_locks_unsafe_for_binlog
9122 		    && m_prebuilt->trx->isolation_level
9123 		    > TRX_ISO_READ_COMMITTED) {
9124 			break;
9125 		}
9126 		/* fall through */
9127 	case ROW_READ_TRY_SEMI_CONSISTENT:
9128 		row_unlock_for_mysql(m_prebuilt, FALSE);
9129 		break;
9130 	case ROW_READ_DID_SEMI_CONSISTENT:
9131 		m_prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
9132 		break;
9133 	}
9134 
9135 	DBUG_VOID_RETURN;
9136 }
9137 
9138 /* See handler.h and row0mysql.h for docs on this function. */
9139 
9140 bool
was_semi_consistent_read(void)9141 ha_innobase::was_semi_consistent_read(void)
9142 /*=======================================*/
9143 {
9144 	return(m_prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT);
9145 }
9146 
9147 /* See handler.h and row0mysql.h for docs on this function. */
9148 
9149 void
try_semi_consistent_read(bool yes)9150 ha_innobase::try_semi_consistent_read(bool yes)
9151 /*===========================================*/
9152 {
9153 	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
9154 
9155 	/* Row read type is set to semi consistent read if this was
9156 	requested by the MySQL and either innodb_locks_unsafe_for_binlog
9157 	option is used or this session is using READ COMMITTED isolation
9158 	level. */
9159 
9160 	if (yes
9161 	    && (srv_locks_unsafe_for_binlog
9162 		|| m_prebuilt->trx->isolation_level
9163 		<= TRX_ISO_READ_COMMITTED)) {
9164 
9165 		m_prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
9166 
9167 	} else {
9168 		m_prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
9169 	}
9170 }
9171 
9172 /******************************************************************//**
9173 Initializes a handle to use an index.
9174 @return 0 or error number */
9175 
9176 int
index_init(uint keynr,bool)9177 ha_innobase::index_init(
9178 /*====================*/
9179 	uint		keynr,	/*!< in: key (index) number */
9180 	bool)
9181 {
9182 	DBUG_ENTER("index_init");
9183 
9184 	DBUG_RETURN(change_active_index(keynr));
9185 }
9186 
9187 /******************************************************************//**
9188 Currently does nothing.
9189 @return 0 */
9190 
9191 int
index_end(void)9192 ha_innobase::index_end(void)
9193 /*========================*/
9194 {
9195 	DBUG_ENTER("index_end");
9196 
9197 	active_index = MAX_KEY;
9198 
9199 	in_range_check_pushed_down = FALSE;
9200 
9201 	m_ds_mrr.dsmrr_close();
9202 
9203 	DBUG_RETURN(0);
9204 }
9205 
9206 /*********************************************************************//**
9207 Converts a search mode flag understood by MySQL to a flag understood
9208 by InnoDB. */
9209 page_cur_mode_t
convert_search_mode_to_innobase(ha_rkey_function find_flag)9210 convert_search_mode_to_innobase(
9211 /*============================*/
9212 	ha_rkey_function	find_flag)
9213 {
9214 	switch (find_flag) {
9215 	case HA_READ_KEY_EXACT:
9216 		/* this does not require the index to be UNIQUE */
9217 	case HA_READ_KEY_OR_NEXT:
9218 		return(PAGE_CUR_GE);
9219 	case HA_READ_AFTER_KEY:
9220 		return(PAGE_CUR_G);
9221 	case HA_READ_BEFORE_KEY:
9222 		return(PAGE_CUR_L);
9223 	case HA_READ_KEY_OR_PREV:
9224 	case HA_READ_PREFIX_LAST:
9225 	case HA_READ_PREFIX_LAST_OR_PREV:
9226 		return(PAGE_CUR_LE);
9227 	case HA_READ_MBR_CONTAIN:
9228 		return(PAGE_CUR_CONTAIN);
9229 	case HA_READ_MBR_INTERSECT:
9230 		return(PAGE_CUR_INTERSECT);
9231 	case HA_READ_MBR_WITHIN:
9232 		return(PAGE_CUR_WITHIN);
9233 	case HA_READ_MBR_DISJOINT:
9234 		return(PAGE_CUR_DISJOINT);
9235 	case HA_READ_MBR_EQUAL:
9236 		return(PAGE_CUR_MBR_EQUAL);
9237 	case HA_READ_PREFIX:
9238 		return(PAGE_CUR_UNSUPP);
9239 	/* do not use "default:" in order to produce a gcc warning:
9240 	enumeration value '...' not handled in switch
9241 	(if -Wswitch or -Wall is used) */
9242 	}
9243 
9244 	my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "this functionality");
9245 
9246 	return(PAGE_CUR_UNSUPP);
9247 }
9248 
9249 /*
9250    BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
9251    ---------------------------------------------------
9252 The following does not cover all the details, but explains how we determine
9253 the start of a new SQL statement, and what is associated with it.
9254 
9255 For each table in the database the MySQL interpreter may have several
9256 table handle instances in use, also in a single SQL query. For each table
9257 handle instance there is an InnoDB  'm_prebuilt' struct which contains most
9258 of the InnoDB data associated with this table handle instance.
9259 
9260   A) if the user has not explicitly set any MySQL table level locks:
9261 
9262   1) MySQL calls ::external_lock to set an 'intention' table level lock on
9263 the table of the handle instance. There we set
9264 m_prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
9265 true if we are taking this table handle instance to use in a new SQL
9266 statement issued by the user. We also increment trx->n_mysql_tables_in_use.
9267 
9268   2) If m_prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
9269 instructions to m_prebuilt->template of the table handle instance in
9270 ::index_read. The template is used to save CPU time in large joins.
9271 
9272   3) In row_search_for_mysql, if m_prebuilt->sql_stat_start is true, we
9273 allocate a new consistent read view for the trx if it does not yet have one,
9274 or in the case of a locking read, set an InnoDB 'intention' table level
9275 lock on the table.
9276 
9277   4) We do the SELECT. MySQL may repeatedly call ::index_read for the
9278 same table handle instance, if it is a join.
9279 
9280   5) When the SELECT ends, MySQL removes its intention table level locks
9281 in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
9282  (a) we execute a COMMIT there if the autocommit is on,
9283  (b) we also release possible 'SQL statement level resources' InnoDB may
9284 have for this SQL statement. The MySQL interpreter does NOT execute
9285 autocommit for pure read transactions, though it should. That is why the
9286 table handler in that case has to execute the COMMIT in ::external_lock.
9287 
9288   B) If the user has explicitly set MySQL table level locks, then MySQL
9289 does NOT call ::external_lock at the start of the statement. To determine
9290 when we are at the start of a new SQL statement we at the start of
9291 ::index_read also compare the query id to the latest query id where the
9292 table handle instance was used. If it has changed, we know we are at the
9293 start of a new SQL statement. Since the query id can theoretically
9294 overwrap, we use this test only as a secondary way of determining the
9295 start of a new SQL statement. */
9296 
9297 
9298 /**********************************************************************//**
9299 Positions an index cursor to the index specified in the handle. Fetches the
9300 row if any.
9301 @return 0, HA_ERR_KEY_NOT_FOUND, or error number */
9302 
9303 int
index_read(uchar * buf,const uchar * key_ptr,uint key_len,enum ha_rkey_function find_flag)9304 ha_innobase::index_read(
9305 /*====================*/
9306 	uchar*		buf,		/*!< in/out: buffer for the returned
9307 					row */
9308 	const uchar*	key_ptr,	/*!< in: key value; if this is NULL
9309 					we position the cursor at the
9310 					start or end of index; this can
9311 					also contain an InnoDB row id, in
9312 					which case key_len is the InnoDB
9313 					row id length; the key value can
9314 					also be a prefix of a full key value,
9315 					and the last column can be a prefix
9316 					of a full column */
9317 	uint			key_len,/*!< in: key value length */
9318 	enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
9319 {
9320 	DBUG_ENTER("index_read");
9321 	DEBUG_SYNC_C("ha_innobase_index_read_begin");
9322 
9323 	ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd));
9324 	ut_ad(key_len != 0 || find_flag != HA_READ_KEY_EXACT);
9325 
9326 	dict_index_t*	index = m_prebuilt->index;
9327 
9328 	if (index == NULL || index->is_corrupted()) {
9329 		m_prebuilt->index_usable = FALSE;
9330 		DBUG_RETURN(HA_ERR_CRASHED);
9331 	}
9332 
9333 	if (!m_prebuilt->index_usable) {
9334 		DBUG_RETURN(index->is_corrupted()
9335 			    ? HA_ERR_INDEX_CORRUPT
9336 			    : HA_ERR_TABLE_DEF_CHANGED);
9337 	}
9338 
9339 	if (index->type & DICT_FTS) {
9340 		DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
9341 	}
9342 
9343 	/* For R-Tree index, we will always place the page lock to
9344 	pages being searched */
9345 	if (index->is_spatial() && !m_prebuilt->trx->will_lock) {
9346 		if (trx_is_started(m_prebuilt->trx)) {
9347 			DBUG_RETURN(HA_ERR_READ_ONLY_TRANSACTION);
9348 		} else {
9349 			m_prebuilt->trx->will_lock = true;
9350 		}
9351 	}
9352 
9353 	/* Note that if the index for which the search template is built is not
9354 	necessarily m_prebuilt->index, but can also be the clustered index */
9355 
9356 	if (m_prebuilt->sql_stat_start) {
9357 		build_template(false);
9358 	}
9359 
9360 	if (key_ptr != NULL) {
9361 		/* Convert the search key value to InnoDB format into
9362 		m_prebuilt->search_tuple */
9363 
9364 		row_sel_convert_mysql_key_to_innobase(
9365 			m_prebuilt->search_tuple,
9366 			m_prebuilt->srch_key_val1,
9367 			m_prebuilt->srch_key_val_len,
9368 			index,
9369 			(byte*) key_ptr,
9370 			(ulint) key_len);
9371 
9372 		DBUG_ASSERT(m_prebuilt->search_tuple->n_fields > 0);
9373 	} else {
9374 		/* We position the cursor to the last or the first entry
9375 		in the index */
9376 
9377 		dtuple_set_n_fields(m_prebuilt->search_tuple, 0);
9378 	}
9379 
9380 	page_cur_mode_t	mode = convert_search_mode_to_innobase(find_flag);
9381 
9382 	ulint	match_mode = 0;
9383 
9384 	if (find_flag == HA_READ_KEY_EXACT) {
9385 
9386 		match_mode = ROW_SEL_EXACT;
9387 
9388 	} else if (find_flag == HA_READ_PREFIX_LAST) {
9389 
9390 		match_mode = ROW_SEL_EXACT_PREFIX;
9391 	}
9392 
9393 	m_last_match_mode = (uint) match_mode;
9394 
9395 	dberr_t		ret;
9396 
9397 	if (mode != PAGE_CUR_UNSUPP) {
9398 
9399 		innobase_srv_conc_enter_innodb(m_prebuilt);
9400 
9401 		ret = row_search_mvcc(
9402 			buf, mode, m_prebuilt, match_mode, 0);
9403 
9404 		innobase_srv_conc_exit_innodb(m_prebuilt);
9405 	} else {
9406 
9407 		ret = DB_UNSUPPORTED;
9408 	}
9409 
9410 	DBUG_EXECUTE_IF("ib_select_query_failure", ret = DB_ERROR;);
9411 
9412 	int	error;
9413 
9414 	switch (ret) {
9415 	case DB_SUCCESS:
9416 		error = 0;
9417 		table->status = 0;
9418 		if (m_prebuilt->table->is_system_db) {
9419 			srv_stats.n_system_rows_read.add(
9420 				thd_get_thread_id(m_prebuilt->trx->mysql_thd), 1);
9421 		} else {
9422 			srv_stats.n_rows_read.add(
9423 				thd_get_thread_id(m_prebuilt->trx->mysql_thd), 1);
9424 		}
9425 		break;
9426 
9427 	case DB_RECORD_NOT_FOUND:
9428 		error = HA_ERR_KEY_NOT_FOUND;
9429 		table->status = STATUS_NOT_FOUND;
9430 		break;
9431 
9432 	case DB_END_OF_INDEX:
9433 		error = HA_ERR_KEY_NOT_FOUND;
9434 		table->status = STATUS_NOT_FOUND;
9435 		break;
9436 
9437 	case DB_TABLESPACE_DELETED:
9438 		ib_senderrf(
9439 			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9440 			ER_TABLESPACE_DISCARDED,
9441 			table->s->table_name.str);
9442 
9443 		table->status = STATUS_NOT_FOUND;
9444 		error = HA_ERR_TABLESPACE_MISSING;
9445 		break;
9446 
9447 	case DB_TABLESPACE_NOT_FOUND:
9448 
9449 		ib_senderrf(
9450 			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9451 			ER_TABLESPACE_MISSING,
9452 			table->s->table_name.str);
9453 
9454 		table->status = STATUS_NOT_FOUND;
9455 		error = HA_ERR_TABLESPACE_MISSING;
9456 		break;
9457 
9458 	default:
9459 		error = convert_error_code_to_mysql(
9460 			ret, m_prebuilt->table->flags, m_user_thd);
9461 
9462 		table->status = STATUS_NOT_FOUND;
9463 		break;
9464 	}
9465 
9466 	DBUG_RETURN(error);
9467 }
9468 
9469 /*******************************************************************//**
9470 The following functions works like index_read, but it find the last
9471 row with the current key value or prefix.
9472 @return 0, HA_ERR_KEY_NOT_FOUND, or an error code */
9473 
9474 int
index_read_last(uchar * buf,const uchar * key_ptr,uint key_len)9475 ha_innobase::index_read_last(
9476 /*=========================*/
9477 	uchar*		buf,	/*!< out: fetched row */
9478 	const uchar*	key_ptr,/*!< in: key value, or a prefix of a full
9479 				key value */
9480 	uint		key_len)/*!< in: length of the key val or prefix
9481 				in bytes */
9482 {
9483 	return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
9484 }
9485 
9486 /********************************************************************//**
9487 Get the index for a handle. Does not change active index.
9488 @return NULL or index instance. */
9489 
9490 dict_index_t*
innobase_get_index(uint keynr)9491 ha_innobase::innobase_get_index(
9492 /*============================*/
9493 	uint		keynr)	/*!< in: use this index; MAX_KEY means always
9494 				clustered index, even if it was internally
9495 				generated by InnoDB */
9496 {
9497 	KEY*		key = NULL;
9498 	dict_table_t*	ib_table = m_prebuilt->table;
9499 	dict_index_t*	index;
9500 
9501 	DBUG_ENTER("innobase_get_index");
9502 
9503 	if (keynr != MAX_KEY && table->s->keys > 0) {
9504 		key = &table->key_info[keynr];
9505 		index = dict_table_get_index_on_name(ib_table, key->name.str);
9506 	} else {
9507 		index = dict_table_get_first_index(ib_table);
9508 	}
9509 
9510 	if (index == NULL) {
9511 		sql_print_error(
9512 			"InnoDB could not find key no %u with name %s"
9513 			" from dict cache for table %s",
9514 			keynr, key ? key->name.str : "NULL",
9515 			ib_table->name.m_name);
9516 	}
9517 
9518 	DBUG_RETURN(index);
9519 }
9520 
9521 /********************************************************************//**
9522 Changes the active index of a handle.
9523 @return 0 or error code */
9524 
9525 int
change_active_index(uint keynr)9526 ha_innobase::change_active_index(
9527 /*=============================*/
9528 	uint	keynr)	/*!< in: use this index; MAX_KEY means always clustered
9529 			index, even if it was internally generated by
9530 			InnoDB */
9531 {
9532 	DBUG_ENTER("change_active_index");
9533 
9534 	ut_ad(m_user_thd == ha_thd());
9535 	ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd));
9536 
9537 	active_index = keynr;
9538 
9539 	m_prebuilt->index = innobase_get_index(keynr);
9540 
9541 	if (m_prebuilt->index == NULL) {
9542 		sql_print_warning("InnoDB: change_active_index(%u) failed",
9543 				  keynr);
9544 		m_prebuilt->index_usable = FALSE;
9545 		DBUG_RETURN(1);
9546 	}
9547 
9548 	m_prebuilt->index_usable = row_merge_is_index_usable(
9549 		m_prebuilt->trx, m_prebuilt->index);
9550 
9551 	if (!m_prebuilt->index_usable) {
9552 		if (m_prebuilt->index->is_corrupted()) {
9553 			char	table_name[MAX_FULL_NAME_LEN + 1];
9554 
9555 			innobase_format_name(
9556 				table_name, sizeof table_name,
9557 				m_prebuilt->index->table->name.m_name);
9558 
9559 			if (m_prebuilt->index->is_primary()) {
9560 				ut_ad(m_prebuilt->index->table->corrupted);
9561 				push_warning_printf(
9562 					m_user_thd, Sql_condition::WARN_LEVEL_WARN,
9563 					ER_TABLE_CORRUPT,
9564 					"InnoDB: Table %s is corrupted.",
9565 					table_name);
9566 				DBUG_RETURN(ER_TABLE_CORRUPT);
9567 			} else {
9568 				push_warning_printf(
9569 					m_user_thd, Sql_condition::WARN_LEVEL_WARN,
9570 					HA_ERR_INDEX_CORRUPT,
9571 					"InnoDB: Index %s for table %s is"
9572 					" marked as corrupted",
9573 					m_prebuilt->index->name(),
9574 					table_name);
9575 				DBUG_RETURN(HA_ERR_INDEX_CORRUPT);
9576 			}
9577 		} else {
9578 			push_warning_printf(
9579 				m_user_thd, Sql_condition::WARN_LEVEL_WARN,
9580 				HA_ERR_TABLE_DEF_CHANGED,
9581 				"InnoDB: insufficient history for index %u",
9582 				keynr);
9583 		}
9584 
9585 		/* The caller seems to ignore this.  Thus, we must check
9586 		this again in row_search_for_mysql(). */
9587 		DBUG_RETURN(convert_error_code_to_mysql(DB_MISSING_HISTORY,
9588 				0, NULL));
9589 	}
9590 
9591 	ut_a(m_prebuilt->search_tuple != 0);
9592 
9593 	/* Initialization of search_tuple is not needed for FT index
9594 	since FT search returns rank only. In addition engine should
9595 	be able to retrieve FTS_DOC_ID column value if necessary. */
9596 	if (m_prebuilt->index->type & DICT_FTS) {
9597 		for (uint i = 0; i < table->s->fields; i++) {
9598 			if (m_prebuilt->read_just_key
9599 			    && bitmap_is_set(table->read_set, i)
9600 			    && !strcmp(table->s->field[i]->field_name.str,
9601 				       FTS_DOC_ID_COL_NAME)) {
9602 				m_prebuilt->fts_doc_id_in_read_set = true;
9603 				break;
9604 			}
9605 		}
9606 	} else {
9607 		dtuple_set_n_fields(m_prebuilt->search_tuple,
9608 				    m_prebuilt->index->n_fields);
9609 
9610 		dict_index_copy_types(
9611 			m_prebuilt->search_tuple, m_prebuilt->index,
9612 			m_prebuilt->index->n_fields);
9613 
9614 		/* If it's FTS query and FTS_DOC_ID exists FTS_DOC_ID field is
9615 		always added to read_set. */
9616 		m_prebuilt->fts_doc_id_in_read_set = m_prebuilt->in_fts_query
9617 			&& m_prebuilt->read_just_key
9618 			&& dict_index_contains_col_or_prefix(
9619 					m_prebuilt->index,
9620 					m_prebuilt->table->fts->doc_col,
9621 					false);
9622 	}
9623 
9624 	/* MySQL changes the active index for a handle also during some
9625 	queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
9626 	and then calculates the sum. Previously we played safe and used
9627 	the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
9628 	copying. Starting from MySQL-4.1 we use a more efficient flag here. */
9629 
9630 	build_template(false);
9631 
9632 	DBUG_RETURN(0);
9633 }
9634 
9635 /***********************************************************************//**
9636 Reads the next or previous row from a cursor, which must have previously been
9637 positioned using index_read.
9638 @return 0, HA_ERR_END_OF_FILE, or error number */
9639 
9640 int
general_fetch(uchar * buf,uint direction,uint match_mode)9641 ha_innobase::general_fetch(
9642 /*=======================*/
9643 	uchar*	buf,		/*!< in/out: buffer for next row in MySQL
9644 				format */
9645 	uint	direction,	/*!< in: ROW_SEL_NEXT or ROW_SEL_PREV */
9646 	uint	match_mode)	/*!< in: 0, ROW_SEL_EXACT, or
9647 				ROW_SEL_EXACT_PREFIX */
9648 {
9649 	DBUG_ENTER("general_fetch");
9650 
9651 	const trx_t*	trx = m_prebuilt->trx;
9652 
9653 	ut_ad(trx == thd_to_trx(m_user_thd));
9654 
9655 	if (m_prebuilt->table->is_readable()) {
9656 	} else if (m_prebuilt->table->corrupted) {
9657 		DBUG_RETURN(HA_ERR_CRASHED);
9658 	} else {
9659 		DBUG_RETURN(m_prebuilt->table->space
9660 			    ? HA_ERR_DECRYPTION_FAILED
9661 			    : HA_ERR_NO_SUCH_TABLE);
9662 	}
9663 
9664 	innobase_srv_conc_enter_innodb(m_prebuilt);
9665 
9666 	dberr_t	ret = row_search_mvcc(
9667 		buf, PAGE_CUR_UNSUPP, m_prebuilt, match_mode, direction);
9668 
9669 	innobase_srv_conc_exit_innodb(m_prebuilt);
9670 
9671 	int	error;
9672 
9673 	switch (ret) {
9674 	case DB_SUCCESS:
9675 		error = 0;
9676 		table->status = 0;
9677 		if (m_prebuilt->table->is_system_db) {
9678 			srv_stats.n_system_rows_read.add(
9679 				thd_get_thread_id(trx->mysql_thd), 1);
9680 		} else {
9681 			srv_stats.n_rows_read.add(
9682 				thd_get_thread_id(trx->mysql_thd), 1);
9683 		}
9684 		break;
9685 	case DB_RECORD_NOT_FOUND:
9686 		error = HA_ERR_END_OF_FILE;
9687 		table->status = STATUS_NOT_FOUND;
9688 		break;
9689 	case DB_END_OF_INDEX:
9690 		error = HA_ERR_END_OF_FILE;
9691 		table->status = STATUS_NOT_FOUND;
9692 		break;
9693 	case DB_TABLESPACE_DELETED:
9694 		ib_senderrf(
9695 			trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9696 			ER_TABLESPACE_DISCARDED,
9697 			table->s->table_name.str);
9698 
9699 		table->status = STATUS_NOT_FOUND;
9700 		error = HA_ERR_TABLESPACE_MISSING;
9701 		break;
9702 	case DB_TABLESPACE_NOT_FOUND:
9703 
9704 		ib_senderrf(
9705 			trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9706 			ER_TABLESPACE_MISSING,
9707 			table->s->table_name.str);
9708 
9709 		table->status = STATUS_NOT_FOUND;
9710 		error = HA_ERR_TABLESPACE_MISSING;
9711 		break;
9712 	default:
9713 		error = convert_error_code_to_mysql(
9714 			ret, m_prebuilt->table->flags, m_user_thd);
9715 
9716 		table->status = STATUS_NOT_FOUND;
9717 		break;
9718 	}
9719 
9720 	DBUG_RETURN(error);
9721 }
9722 
9723 /***********************************************************************//**
9724 Reads the next row from a cursor, which must have previously been
9725 positioned using index_read.
9726 @return 0, HA_ERR_END_OF_FILE, or error number */
9727 
9728 int
index_next(uchar * buf)9729 ha_innobase::index_next(
9730 /*====================*/
9731 	uchar*		buf)	/*!< in/out: buffer for next row in MySQL
9732 				format */
9733 {
9734 	return(general_fetch(buf, ROW_SEL_NEXT, 0));
9735 }
9736 
9737 /*******************************************************************//**
9738 Reads the next row matching to the key value given as the parameter.
9739 @return 0, HA_ERR_END_OF_FILE, or error number */
9740 
9741 int
index_next_same(uchar * buf,const uchar *,uint)9742 ha_innobase::index_next_same(
9743 /*=========================*/
9744 	uchar*		buf,	/*!< in/out: buffer for the row */
9745 	const uchar*, uint)
9746 {
9747 	return(general_fetch(buf, ROW_SEL_NEXT, m_last_match_mode));
9748 }
9749 
9750 /***********************************************************************//**
9751 Reads the previous row from a cursor, which must have previously been
9752 positioned using index_read.
9753 @return 0, HA_ERR_END_OF_FILE, or error number */
9754 
9755 int
index_prev(uchar * buf)9756 ha_innobase::index_prev(
9757 /*====================*/
9758 	uchar*	buf)	/*!< in/out: buffer for previous row in MySQL format */
9759 {
9760 	return(general_fetch(buf, ROW_SEL_PREV, 0));
9761 }
9762 
9763 /********************************************************************//**
9764 Positions a cursor on the first record in an index and reads the
9765 corresponding row to buf.
9766 @return 0, HA_ERR_END_OF_FILE, or error code */
9767 
9768 int
index_first(uchar * buf)9769 ha_innobase::index_first(
9770 /*=====================*/
9771 	uchar*	buf)	/*!< in/out: buffer for the row */
9772 {
9773 	DBUG_ENTER("index_first");
9774 
9775 	int	error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
9776 
9777 	/* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
9778 
9779 	if (error == HA_ERR_KEY_NOT_FOUND) {
9780 		error = HA_ERR_END_OF_FILE;
9781 	}
9782 
9783 	DBUG_RETURN(error);
9784 }
9785 
9786 /********************************************************************//**
9787 Positions a cursor on the last record in an index and reads the
9788 corresponding row to buf.
9789 @return 0, HA_ERR_END_OF_FILE, or error code */
9790 
9791 int
index_last(uchar * buf)9792 ha_innobase::index_last(
9793 /*====================*/
9794 	uchar*	buf)	/*!< in/out: buffer for the row */
9795 {
9796 	DBUG_ENTER("index_last");
9797 
9798 	int	error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
9799 
9800 	/* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
9801 
9802 	if (error == HA_ERR_KEY_NOT_FOUND) {
9803 		error = HA_ERR_END_OF_FILE;
9804 	}
9805 
9806 	DBUG_RETURN(error);
9807 }
9808 
9809 /****************************************************************//**
9810 Initialize a table scan.
9811 @return 0 or error number */
9812 
9813 int
rnd_init(bool scan)9814 ha_innobase::rnd_init(
9815 /*==================*/
9816 	bool	scan)	/*!< in: true if table/index scan FALSE otherwise */
9817 {
9818 	int		err;
9819 
9820 	/* Store the active index value so that we can restore the original
9821 	value after a scan */
9822 
9823 	if (m_prebuilt->clust_index_was_generated) {
9824 		err = change_active_index(MAX_KEY);
9825 	} else {
9826 		err = change_active_index(m_primary_key);
9827 	}
9828 
9829 	/* Don't use semi-consistent read in random row reads (by position).
9830 	This means we must disable semi_consistent_read if scan is false */
9831 
9832 	if (!scan) {
9833 		try_semi_consistent_read(0);
9834 	}
9835 
9836 	m_start_of_scan = true;
9837 
9838 	return(err);
9839 }
9840 
9841 /*****************************************************************//**
9842 Ends a table scan.
9843 @return 0 or error number */
9844 
9845 int
rnd_end(void)9846 ha_innobase::rnd_end(void)
9847 /*======================*/
9848 {
9849 	return(index_end());
9850 }
9851 
9852 /*****************************************************************//**
9853 Reads the next row in a table scan (also used to read the FIRST row
9854 in a table scan).
9855 @return 0, HA_ERR_END_OF_FILE, or error number */
9856 
9857 int
rnd_next(uchar * buf)9858 ha_innobase::rnd_next(
9859 /*==================*/
9860 	uchar*	buf)	/*!< in/out: returns the row in this buffer,
9861 			in MySQL format */
9862 {
9863 	int	error;
9864 
9865 	DBUG_ENTER("rnd_next");
9866 
9867 	if (m_start_of_scan) {
9868 		error = index_first(buf);
9869 
9870 		if (error == HA_ERR_KEY_NOT_FOUND) {
9871 			error = HA_ERR_END_OF_FILE;
9872 		}
9873 
9874 		m_start_of_scan = false;
9875 	} else {
9876 		error = general_fetch(buf, ROW_SEL_NEXT, 0);
9877 	}
9878 
9879 	DBUG_RETURN(error);
9880 }
9881 
9882 /**********************************************************************//**
9883 Fetches a row from the table based on a row reference.
9884 @return 0, HA_ERR_KEY_NOT_FOUND, or error code */
9885 
9886 int
rnd_pos(uchar * buf,uchar * pos)9887 ha_innobase::rnd_pos(
9888 /*=================*/
9889 	uchar*	buf,	/*!< in/out: buffer for the row */
9890 	uchar*	pos)	/*!< in: primary key value of the row in the
9891 			MySQL format, or the row id if the clustered
9892 			index was internally generated by InnoDB; the
9893 			length of data in pos has to be ref_length */
9894 {
9895 	DBUG_ENTER("rnd_pos");
9896 	DBUG_DUMP("key", pos, ref_length);
9897 
9898 	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
9899 
9900 	/* Note that we assume the length of the row reference is fixed
9901 	for the table, and it is == ref_length */
9902 
9903 	int	error = index_read(buf, pos, (uint)ref_length, HA_READ_KEY_EXACT);
9904 
9905 	if (error != 0) {
9906 		DBUG_PRINT("error", ("Got error: %d", error));
9907 	}
9908 
9909 	DBUG_RETURN(error);
9910 }
9911 
9912 /**********************************************************************//**
9913 Initialize FT index scan
9914 @return 0 or error number */
9915 
9916 int
ft_init()9917 ha_innobase::ft_init()
9918 /*==================*/
9919 {
9920 	DBUG_ENTER("ft_init");
9921 
9922 	trx_t*	trx = check_trx_exists(ha_thd());
9923 
9924 	/* FTS queries are not treated as autocommit non-locking selects.
9925 	This is because the FTS implementation can acquire locks behind
9926 	the scenes. This has not been verified but it is safer to treat
9927 	them as regular read only transactions for now. */
9928 
9929 	if (!trx_is_started(trx)) {
9930 		trx->will_lock = true;
9931 	}
9932 
9933 	DBUG_RETURN(rnd_init(false));
9934 }
9935 
9936 /**********************************************************************//**
9937 Initialize FT index scan
9938 @return FT_INFO structure if successful or NULL */
9939 
9940 FT_INFO*
ft_init_ext(uint flags,uint keynr,String * key)9941 ha_innobase::ft_init_ext(
9942 /*=====================*/
9943 	uint			flags,	/* in: */
9944 	uint			keynr,	/* in: */
9945 	String*			key)	/* in: */
9946 {
9947 	NEW_FT_INFO*		fts_hdl = NULL;
9948 	dict_index_t*		index;
9949 	fts_result_t*		result;
9950 	char			buf_tmp[8192];
9951 	ulint			buf_tmp_used;
9952 	uint			num_errors;
9953 	ulint			query_len = key->length();
9954 	const CHARSET_INFO*	char_set = key->charset();
9955 	const char*		query = key->ptr();
9956 
9957 	if (UNIV_UNLIKELY(fts_enable_diag_print)) {
9958 		{
9959 			ib::info	out;
9960 			out << "keynr=" << keynr << ", '";
9961 			out.write(key->ptr(), key->length());
9962 		}
9963 
9964 		if (flags & FT_BOOL) {
9965 			ib::info() << "BOOL search";
9966 		} else {
9967 			ib::info() << "NL search";
9968 		}
9969 	}
9970 
9971 	/* FIXME: utf32 and utf16 are not compatible with some
9972 	string function used. So to convert them to uft8 before
9973 	we proceed. */
9974 	if (strcmp(char_set->csname, "utf32") == 0
9975 	    || strcmp(char_set->csname, "utf16") == 0) {
9976 
9977 		buf_tmp_used = innobase_convert_string(
9978 			buf_tmp, sizeof(buf_tmp) - 1,
9979 			&my_charset_utf8_general_ci,
9980 			query, query_len, (CHARSET_INFO*) char_set,
9981 			&num_errors);
9982 
9983 		buf_tmp[buf_tmp_used] = 0;
9984 		query = buf_tmp;
9985 		query_len = buf_tmp_used;
9986 	}
9987 
9988 	trx_t*	trx = m_prebuilt->trx;
9989 
9990 	/* FTS queries are not treated as autocommit non-locking selects.
9991 	This is because the FTS implementation can acquire locks behind
9992 	the scenes. This has not been verified but it is safer to treat
9993 	them as regular read only transactions for now. */
9994 
9995 	if (!trx_is_started(trx)) {
9996 		trx->will_lock = true;
9997 	}
9998 
9999 	dict_table_t*	ft_table = m_prebuilt->table;
10000 
10001 	/* Table does not have an FTS index */
10002 	if (!ft_table->fts || ib_vector_is_empty(ft_table->fts->indexes)) {
10003 		my_error(ER_TABLE_HAS_NO_FT, MYF(0));
10004 		return(NULL);
10005 	}
10006 
10007 	/* If tablespace is discarded, we should return here */
10008 	if (!ft_table->space) {
10009 		my_error(ER_TABLESPACE_MISSING, MYF(0), table->s->db.str,
10010 			 table->s->table_name.str);
10011 		return(NULL);
10012 	}
10013 
10014 	if (keynr == NO_SUCH_KEY) {
10015 		/* FIXME: Investigate the NO_SUCH_KEY usage */
10016 		index = reinterpret_cast<dict_index_t*>
10017 			(ib_vector_getp(ft_table->fts->indexes, 0));
10018 	} else {
10019 		index = innobase_get_index(keynr);
10020 	}
10021 
10022 	if (index == NULL || index->type != DICT_FTS) {
10023 		my_error(ER_TABLE_HAS_NO_FT, MYF(0));
10024 		return(NULL);
10025 	}
10026 
10027 	if (!(ft_table->fts->added_synced)) {
10028 		fts_init_index(ft_table, FALSE);
10029 
10030 		ft_table->fts->added_synced = true;
10031 	}
10032 
10033 	const byte*	q = reinterpret_cast<const byte*>(
10034 		const_cast<char*>(query));
10035 
10036 	// FIXME: support ft_init_ext_with_hints(), pass LIMIT
10037 	dberr_t	error = fts_query(trx, index, flags, q, query_len, &result);
10038 
10039 	if (error != DB_SUCCESS) {
10040 		my_error(convert_error_code_to_mysql(error, 0, NULL), MYF(0));
10041 		return(NULL);
10042 	}
10043 
10044 	/* Allocate FTS handler, and instantiate it before return */
10045 	fts_hdl = reinterpret_cast<NEW_FT_INFO*>(
10046 		my_malloc(/*PSI_INSTRUMENT_ME,*/ sizeof(NEW_FT_INFO), MYF(0)));
10047 
10048 	fts_hdl->please = const_cast<_ft_vft*>(&ft_vft_result);
10049 	fts_hdl->could_you = const_cast<_ft_vft_ext*>(&ft_vft_ext_result);
10050 	fts_hdl->ft_prebuilt = m_prebuilt;
10051 	fts_hdl->ft_result = result;
10052 
10053 	/* FIXME: Re-evaluate the condition when Bug 14469540 is resolved */
10054 	m_prebuilt->in_fts_query = true;
10055 
10056 	return(reinterpret_cast<FT_INFO*>(fts_hdl));
10057 }
10058 
10059 /*****************************************************************//**
10060 Set up search tuple for a query through FTS_DOC_ID_INDEX on
10061 supplied Doc ID. This is used by MySQL to retrieve the documents
10062 once the search result (Doc IDs) is available */
10063 static
10064 void
innobase_fts_create_doc_id_key(dtuple_t * tuple,const dict_index_t * index,doc_id_t * doc_id)10065 innobase_fts_create_doc_id_key(
10066 /*===========================*/
10067 	dtuple_t*	tuple,		/* in/out: m_prebuilt->search_tuple */
10068 	const dict_index_t*
10069 			index,		/* in: index (FTS_DOC_ID_INDEX) */
10070 	doc_id_t*	doc_id)		/* in/out: doc id to search, value
10071 					could be changed to storage format
10072 					used for search. */
10073 {
10074 	doc_id_t	temp_doc_id;
10075 	dfield_t*	dfield = dtuple_get_nth_field(tuple, 0);
10076 
10077 	ut_a(dict_index_get_n_unique(index) == 1);
10078 
10079 	dtuple_set_n_fields(tuple, index->n_fields);
10080 	dict_index_copy_types(tuple, index, index->n_fields);
10081 
10082 #ifdef UNIV_DEBUG
10083 	/* The unique Doc ID field should be an eight-bytes integer */
10084 	dict_field_t*	field = dict_index_get_nth_field(index, 0);
10085         ut_a(field->col->mtype == DATA_INT);
10086 	ut_ad(sizeof(*doc_id) == field->fixed_len);
10087 	ut_ad(!strcmp(index->name, FTS_DOC_ID_INDEX_NAME));
10088 #endif /* UNIV_DEBUG */
10089 
10090 	/* Convert to storage byte order */
10091 	mach_write_to_8(reinterpret_cast<byte*>(&temp_doc_id), *doc_id);
10092 	*doc_id = temp_doc_id;
10093 	dfield_set_data(dfield, doc_id, sizeof(*doc_id));
10094 
10095         dtuple_set_n_fields_cmp(tuple, 1);
10096 
10097 	for (ulint i = 1; i < index->n_fields; i++) {
10098 		dfield = dtuple_get_nth_field(tuple, i);
10099 		dfield_set_null(dfield);
10100 	}
10101 }
10102 
10103 /**********************************************************************//**
10104 Fetch next result from the FT result set
10105 @return error code */
10106 
10107 int
ft_read(uchar * buf)10108 ha_innobase::ft_read(
10109 /*=================*/
10110 	uchar*		buf)		/*!< in/out: buf contain result row */
10111 {
10112 	row_prebuilt_t*	ft_prebuilt;
10113 
10114 	ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(ft_handler)->ft_prebuilt;
10115 
10116 	ut_a(ft_prebuilt == m_prebuilt);
10117 
10118 	fts_result_t*	result;
10119 
10120 	result = reinterpret_cast<NEW_FT_INFO*>(ft_handler)->ft_result;
10121 
10122 	if (result->current == NULL) {
10123 		/* This is the case where the FTS query did not
10124 		contain and matching documents. */
10125 		if (result->rankings_by_id != NULL) {
10126 			/* Now that we have the complete result, we
10127 			need to sort the document ids on their rank
10128 			calculation. */
10129 
10130 			fts_query_sort_result_on_rank(result);
10131 
10132 			result->current = const_cast<ib_rbt_node_t*>(
10133 				rbt_first(result->rankings_by_rank));
10134 		} else {
10135 			ut_a(result->current == NULL);
10136 		}
10137 	} else {
10138 		result->current = const_cast<ib_rbt_node_t*>(
10139 			rbt_next(result->rankings_by_rank, result->current));
10140 	}
10141 
10142 next_record:
10143 
10144 	if (result->current != NULL) {
10145 		doc_id_t	search_doc_id;
10146 		dtuple_t*	tuple = m_prebuilt->search_tuple;
10147 
10148 		/* If we only need information from result we can return
10149 		   without fetching the table row */
10150 		if (ft_prebuilt->read_just_key) {
10151 #ifdef MYSQL_STORE_FTS_DOC_ID
10152 			if (m_prebuilt->fts_doc_id_in_read_set) {
10153 				fts_ranking_t* ranking;
10154 				ranking = rbt_value(fts_ranking_t,
10155 						    result->current);
10156 				innobase_fts_store_docid(
10157 					table, ranking->doc_id);
10158 			}
10159 #endif
10160 			table->status= 0;
10161 			return(0);
10162 		}
10163 
10164 		dict_index_t*	index;
10165 
10166 		index = m_prebuilt->table->fts_doc_id_index;
10167 
10168 		/* Must find the index */
10169 		ut_a(index != NULL);
10170 
10171 		/* Switch to the FTS doc id index */
10172 		m_prebuilt->index = index;
10173 
10174 		fts_ranking_t*	ranking = rbt_value(
10175 			fts_ranking_t, result->current);
10176 
10177 		search_doc_id = ranking->doc_id;
10178 
10179 		/* We pass a pointer of search_doc_id because it will be
10180 		converted to storage byte order used in the search
10181 		tuple. */
10182 		innobase_fts_create_doc_id_key(tuple, index, &search_doc_id);
10183 
10184 		innobase_srv_conc_enter_innodb(m_prebuilt);
10185 
10186 		dberr_t ret = row_search_for_mysql(
10187 			(byte*) buf, PAGE_CUR_GE, m_prebuilt, ROW_SEL_EXACT, 0);
10188 
10189 		innobase_srv_conc_exit_innodb(m_prebuilt);
10190 
10191 		int	error;
10192 
10193 		switch (ret) {
10194 		case DB_SUCCESS:
10195 			error = 0;
10196 			table->status = 0;
10197 			break;
10198 		case DB_RECORD_NOT_FOUND:
10199 			result->current = const_cast<ib_rbt_node_t*>(
10200 				rbt_next(result->rankings_by_rank,
10201 					 result->current));
10202 
10203 			if (!result->current) {
10204 				/* exhaust the result set, should return
10205 				HA_ERR_END_OF_FILE just like
10206 				ha_innobase::general_fetch() and/or
10207 				ha_innobase::index_first() etc. */
10208 				error = HA_ERR_END_OF_FILE;
10209 				table->status = STATUS_NOT_FOUND;
10210 			} else {
10211 				goto next_record;
10212 			}
10213 			break;
10214 		case DB_END_OF_INDEX:
10215 			error = HA_ERR_END_OF_FILE;
10216 			table->status = STATUS_NOT_FOUND;
10217 			break;
10218 		case DB_TABLESPACE_DELETED:
10219 
10220 			ib_senderrf(
10221 				m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
10222 				ER_TABLESPACE_DISCARDED,
10223 				table->s->table_name.str);
10224 
10225 			table->status = STATUS_NOT_FOUND;
10226 			error = HA_ERR_TABLESPACE_MISSING;
10227 			break;
10228 		case DB_TABLESPACE_NOT_FOUND:
10229 
10230 			ib_senderrf(
10231 				m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
10232 				ER_TABLESPACE_MISSING,
10233 				table->s->table_name.str);
10234 
10235 			table->status = STATUS_NOT_FOUND;
10236 			error = HA_ERR_TABLESPACE_MISSING;
10237 			break;
10238 		default:
10239 			error = convert_error_code_to_mysql(
10240 				ret, 0, m_user_thd);
10241 
10242 			table->status = STATUS_NOT_FOUND;
10243 			break;
10244 		}
10245 
10246 		return(error);
10247 	}
10248 
10249 	return(HA_ERR_END_OF_FILE);
10250 }
10251 
10252 #ifdef WITH_WSREP
10253 inline
10254 const char*
wsrep_key_type_to_str(wsrep_key_type type)10255 wsrep_key_type_to_str(wsrep_key_type type)
10256 {
10257 	switch (type) {
10258 	case WSREP_KEY_SHARED:
10259 		return "shared";
10260 	case WSREP_KEY_SEMI:
10261 		return "semi";
10262 	case WSREP_KEY_EXCLUSIVE:
10263 		return "exclusive";
10264 	};
10265 	return "unknown";
10266 }
10267 
10268 ulint
wsrep_append_foreign_key(trx_t * trx,dict_foreign_t * foreign,const rec_t * rec,dict_index_t * index,ibool referenced,wsrep_key_type key_type)10269 wsrep_append_foreign_key(
10270 /*===========================*/
10271 	trx_t*		trx,		/*!< in: trx */
10272 	dict_foreign_t*	foreign,	/*!< in: foreign key constraint */
10273 	const rec_t*	rec,		/*!<in: clustered index record */
10274 	dict_index_t*	index,		/*!<in: clustered index */
10275 	ibool		referenced,	/*!<in: is check for referenced table */
10276 	wsrep_key_type	key_type)	/*!< in: access type of this key
10277 					(shared, exclusive, semi...) */
10278 {
10279 	THD*  thd = trx->mysql_thd;
10280 
10281 	if (!trx->is_wsrep() || wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
10282 		return DB_SUCCESS;
10283 	}
10284 
10285 	if (!foreign ||
10286 	    (!foreign->referenced_table && !foreign->foreign_table)) {
10287 		WSREP_INFO("FK: %s missing in: %s",
10288 			   (!foreign ? "constraint" :
10289 			    (!foreign->referenced_table ?
10290 			     "referenced table" : "foreign table")),
10291 			   wsrep_thd_query(thd));
10292 		return DB_ERROR;
10293 	}
10294 
10295 	ulint rcode = DB_SUCCESS;
10296 	char  cache_key[513] = {'\0'};
10297 	int   cache_key_len=0;
10298 	bool const copy = true;
10299 
10300 	if ( !((referenced) ?
10301 		foreign->referenced_table : foreign->foreign_table)) {
10302 		WSREP_DEBUG("pulling %s table into cache",
10303 			    (referenced) ? "referenced" : "foreign");
10304 		mutex_enter(&(dict_sys->mutex));
10305 
10306 		if (referenced) {
10307 			foreign->referenced_table =
10308 				dict_table_get_low(
10309 					foreign->referenced_table_name_lookup);
10310 			if (foreign->referenced_table) {
10311 				foreign->referenced_index =
10312 					dict_foreign_find_index(
10313 						foreign->referenced_table, NULL,
10314 						foreign->referenced_col_names,
10315 						foreign->n_fields,
10316 						foreign->foreign_index,
10317 						TRUE, FALSE);
10318 			}
10319 		} else {
10320 	  		foreign->foreign_table =
10321 				dict_table_get_low(
10322 					foreign->foreign_table_name_lookup);
10323 
10324 			if (foreign->foreign_table) {
10325 				foreign->foreign_index =
10326 					dict_foreign_find_index(
10327 						foreign->foreign_table, NULL,
10328 						foreign->foreign_col_names,
10329 						foreign->n_fields,
10330 						foreign->referenced_index,
10331 						TRUE, FALSE);
10332 			}
10333 		}
10334 		mutex_exit(&(dict_sys->mutex));
10335 	}
10336 
10337 	if ( !((referenced) ?
10338 		foreign->referenced_table : foreign->foreign_table)) {
10339 		WSREP_WARN("FK: %s missing in query: %s",
10340 			   (!foreign->referenced_table) ?
10341 			   "referenced table" : "foreign table",
10342 			   (wsrep_thd_query(thd)) ?
10343 			   wsrep_thd_query(thd) : "void");
10344 		return DB_ERROR;
10345 	}
10346 
10347 	byte  key[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
10348 	ulint len = WSREP_MAX_SUPPORTED_KEY_LENGTH;
10349 
10350 	dict_index_t *idx_target = (referenced) ?
10351 		foreign->referenced_index : index;
10352 	dict_index_t *idx = (referenced) ?
10353 		UT_LIST_GET_FIRST(foreign->referenced_table->indexes) :
10354 		UT_LIST_GET_FIRST(foreign->foreign_table->indexes);
10355 	int i = 0;
10356 
10357 	while (idx != NULL && idx != idx_target) {
10358 		if (innobase_strcasecmp (idx->name, innobase_index_reserve_name) != 0) {
10359 			i++;
10360 		}
10361 		idx = UT_LIST_GET_NEXT(indexes, idx);
10362 	}
10363 
10364 	ut_a(idx);
10365 	key[0] = byte(i);
10366 
10367 	rcode = wsrep_rec_get_foreign_key(
10368 		&key[1], &len, rec, index, idx,
10369 		wsrep_protocol_version > 1);
10370 
10371 	if (rcode != DB_SUCCESS) {
10372 		WSREP_ERROR(
10373 			"FK key set failed: " ULINTPF
10374 			" (" ULINTPF " %s), index: %s %s, %s",
10375 			rcode, referenced, wsrep_key_type_to_str(key_type),
10376 			index ? index->name() : "void index",
10377 			(index && index->table) ? index->table->name.m_name :
10378 			"void table",
10379 			wsrep_thd_query(thd));
10380 		return DB_ERROR;
10381 	}
10382 
10383 	strncpy(cache_key,
10384 		(wsrep_protocol_version > 1) ?
10385 		((referenced) ?
10386 			foreign->referenced_table->name.m_name :
10387 			foreign->foreign_table->name.m_name) :
10388 		foreign->foreign_table->name.m_name, sizeof(cache_key) - 1);
10389 	cache_key_len = strlen(cache_key);
10390 
10391 #ifdef WSREP_DEBUG_PRINT
10392 	ulint j;
10393 	fprintf(stderr, "FK parent key, table: %s %s len: %lu ",
10394 		cache_key, (shared) ? "shared" : "exclusive", len+1);
10395 	for (j=0; j<len+1; j++) {
10396 		fprintf(stderr, " %hhX, ", key[j]);
10397 	}
10398 	fprintf(stderr, "\n");
10399 #endif
10400 	char *p = strchr(cache_key, '/');
10401 
10402 	if (p) {
10403 		*p = '\0';
10404 	} else {
10405 		WSREP_WARN("unexpected foreign key table %s %s",
10406 			   foreign->referenced_table->name.m_name,
10407 			   foreign->foreign_table->name.m_name);
10408 	}
10409 
10410 	wsrep_buf_t wkey_part[3];
10411         wsrep_key_t wkey = {wkey_part, 3};
10412 
10413 	if (!wsrep_prepare_key(
10414 		(const uchar*)cache_key,
10415 		cache_key_len +  1,
10416 		(const uchar*)key, len+1,
10417 		wkey_part,
10418 		(size_t*)&wkey.key_parts_num)) {
10419 		WSREP_WARN("key prepare failed for cascaded FK: %s",
10420 			   (wsrep_thd_query(thd)) ?
10421 			    wsrep_thd_query(thd) : "void");
10422 		return DB_ERROR;
10423 	}
10424 
10425 	wsrep_t *wsrep= get_wsrep();
10426 
10427 	rcode = (int)wsrep->append_key(
10428 		wsrep,
10429 		wsrep_ws_handle(thd, trx),
10430 		&wkey,
10431 		1,
10432 		key_type,
10433                 copy);
10434 
10435 	if (rcode) {
10436 		DBUG_PRINT("wsrep", ("row key failed: " ULINTPF, rcode));
10437 		WSREP_ERROR("Appending cascaded fk row key failed: %s, "
10438 			    ULINTPF,
10439 			    (wsrep_thd_query(thd)) ?
10440 			    wsrep_thd_query(thd) : "void", rcode);
10441 		return DB_ERROR;
10442 	}
10443 
10444 	return DB_SUCCESS;
10445 }
10446 
10447 static int
wsrep_append_key(THD * thd,trx_t * trx,TABLE_SHARE * table_share,const char * key,uint16_t key_len,wsrep_key_type key_type)10448 wsrep_append_key(
10449 /*=============*/
10450 	THD		*thd,
10451 	trx_t 		*trx,
10452 	TABLE_SHARE 	*table_share,
10453 	const char*	key,
10454 	uint16_t        key_len,
10455 	wsrep_key_type	key_type	/*!< in: access type of this key
10456 					(shared, exclusive, semi...) */
10457 )
10458 {
10459 	DBUG_ENTER("wsrep_append_key");
10460 	bool const copy = true;
10461 #ifdef WSREP_DEBUG_PRINT
10462 	fprintf(stderr, "%s conn %ld, trx %llu, keylen %d, table %s\n Query: %s ",
10463 		wsrep_key_type_to_str(key_type),
10464 		wsrep_thd_thread_id(thd), trx->id, key_len,
10465 		table_share->table_name.str, wsrep_thd_query(thd));
10466 	for (int i=0; i<key_len; i++) {
10467 		fprintf(stderr, "%hhX, ", key[i]);
10468 	}
10469 	fprintf(stderr, "\n");
10470 #endif
10471 	wsrep_buf_t wkey_part[3];
10472         wsrep_key_t wkey = {wkey_part, 3};
10473 
10474 	if (!wsrep_prepare_key(
10475 			(const uchar*)table_share->table_cache_key.str,
10476 			table_share->table_cache_key.length,
10477 			(const uchar*)key, key_len,
10478 			wkey_part,
10479 			(size_t*)&wkey.key_parts_num)) {
10480 		WSREP_WARN("key prepare failed for: %s",
10481 			   (wsrep_thd_query(thd)) ?
10482 			   wsrep_thd_query(thd) : "void");
10483 		DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
10484 	}
10485 
10486 	wsrep_t *wsrep= get_wsrep();
10487 
10488 	int rcode = (int)wsrep->append_key(
10489 			       wsrep,
10490 			       wsrep_ws_handle(thd, trx),
10491 			       &wkey,
10492 			       1,
10493 			       key_type,
10494                                copy);
10495 	if (rcode) {
10496 		DBUG_PRINT("wsrep", ("row key failed: %d", rcode));
10497 		WSREP_WARN("Appending row key failed: %s, %d",
10498 			   (wsrep_thd_query(thd)) ?
10499 			   wsrep_thd_query(thd) : "void", rcode);
10500 		DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
10501 	}
10502 
10503 	DBUG_RETURN(0);
10504 }
10505 
10506 static bool
referenced_by_foreign_key2(dict_table_t * table,dict_index_t * index)10507 referenced_by_foreign_key2(
10508 /*=======================*/
10509 	dict_table_t* table,
10510 	dict_index_t* index)
10511 {
10512 	ut_ad(table != NULL);
10513 	ut_ad(index != NULL);
10514 
10515 	const dict_foreign_set* fks = &table->referenced_set;
10516 
10517 	for (dict_foreign_set::const_iterator it = fks->begin();
10518              it != fks->end();
10519              ++it) {
10520                 dict_foreign_t* foreign = *it;
10521 
10522                 if (foreign->referenced_index != index) {
10523                         continue;
10524                 }
10525                 ut_ad(table == foreign->referenced_table);
10526                 return true;
10527         }
10528         return false;
10529 }
10530 
10531 int
wsrep_append_keys(THD * thd,wsrep_key_type key_type,const uchar * record0,const uchar * record1)10532 ha_innobase::wsrep_append_keys(
10533 /*===========================*/
10534 	THD 		*thd,
10535 	wsrep_key_type	key_type,	/*!< in: access type of this key
10536 					(shared, exclusive, semi...) */
10537 	const uchar*	record0,	/* in: row in MySQL format */
10538 	const uchar*	record1)	/* in: row in MySQL format */
10539 {
10540 	int rcode;
10541 	DBUG_ENTER("wsrep_append_keys");
10542 
10543 	bool key_appended = false;
10544 	trx_t *trx = thd_to_trx(thd);
10545 
10546 	if (table_share && table_share->tmp_table  != NO_TMP_TABLE) {
10547 		WSREP_DEBUG("skipping tmp table DML: THD: %lu tmp: %d SQL: %s",
10548 			    thd_get_thread_id(thd),
10549 			    table_share->tmp_table,
10550 			    (wsrep_thd_query(thd)) ?
10551 			    wsrep_thd_query(thd) : "void");
10552 		DBUG_RETURN(0);
10553 	}
10554 
10555 	if (wsrep_protocol_version == 0) {
10556 		uint	len;
10557 		char 	keyval[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
10558 		char 	*key 		= &keyval[0];
10559 		ibool    is_null;
10560 
10561 		len = wsrep_store_key_val_for_row(
10562 			thd, table, 0, key, WSREP_MAX_SUPPORTED_KEY_LENGTH,
10563 			record0, &is_null);
10564 
10565 		if (!is_null) {
10566 			rcode = wsrep_append_key(
10567 				thd, trx, table_share, keyval,
10568 				len, key_type);
10569 
10570 			if (rcode) DBUG_RETURN(rcode);
10571 		} else {
10572 			WSREP_DEBUG("NULL key skipped (proto 0): %s",
10573 				    wsrep_thd_query(thd));
10574 		}
10575 	} else {
10576 		ut_a(table->s->keys <= 256);
10577 		uint i;
10578                 bool hasPK= false;
10579 
10580 		for (i=0; i<table->s->keys; ++i) {
10581 			KEY*  key_info	= table->key_info + i;
10582 			if (key_info->flags & HA_NOSAME) {
10583 				hasPK = true;
10584 			}
10585 		}
10586 
10587 		for (i=0; i<table->s->keys; ++i) {
10588 			uint  len;
10589 			char  keyval0[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
10590 			char  keyval1[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
10591 			char* key0 		= &keyval0[1];
10592 			char* key1 		= &keyval1[1];
10593 			KEY*  key_info	= table->key_info + i;
10594 			ibool is_null;
10595 
10596 			dict_index_t* idx  = innobase_get_index(i);
10597 			dict_table_t* tab  = (idx) ? idx->table : NULL;
10598 
10599 			keyval0[0] = (char)i;
10600 			keyval1[0] = (char)i;
10601 
10602 			if (!tab) {
10603 				WSREP_WARN("MariaDB-InnoDB key mismatch %s %s",
10604 					   table->s->table_name.str,
10605 					   key_info->name.str);
10606 			}
10607 			/* !hasPK == table with no PK, must append all non-unique keys */
10608 			if (!hasPK || key_info->flags & HA_NOSAME ||
10609 			    ((tab &&
10610 			      referenced_by_foreign_key2(tab, idx)) ||
10611 			     (!tab && referenced_by_foreign_key()))) {
10612 
10613 				len = wsrep_store_key_val_for_row(
10614 					thd, table, i, key0,
10615 					WSREP_MAX_SUPPORTED_KEY_LENGTH,
10616 					record0, &is_null);
10617 				if (!is_null) {
10618 					rcode = wsrep_append_key(
10619 						thd, trx, table_share,
10620 						keyval0, len+1, key_type);
10621 					if (rcode) DBUG_RETURN(rcode);
10622 
10623 					if (key_info->flags & HA_NOSAME ||
10624 					    key_type == WSREP_KEY_SHARED)
10625 			  		key_appended = true;
10626 				} else {
10627 					WSREP_DEBUG("NULL key skipped: %s",
10628 						    wsrep_thd_query(thd));
10629 				}
10630 
10631 				if (record1) {
10632 					len = wsrep_store_key_val_for_row(
10633 						thd, table, i, key1,
10634 						WSREP_MAX_SUPPORTED_KEY_LENGTH,
10635 						record1, &is_null);
10636 
10637 					if (!is_null
10638 					    && memcmp(key0, key1, len)) {
10639 						rcode = wsrep_append_key(
10640 							thd, trx, table_share,
10641 							keyval1, len+1,
10642                                                         key_type);
10643 						if (rcode) DBUG_RETURN(rcode);
10644 					}
10645 				}
10646 			}
10647 		}
10648 	}
10649 
10650 	/* if no PK, calculate hash of full row, to be the key value */
10651 	if (!key_appended && wsrep_certify_nonPK) {
10652 		uchar digest[16];
10653 
10654 		wsrep_calc_row_hash(digest, record0, table, m_prebuilt);
10655 
10656 		if (int rcode = wsrep_append_key(thd, trx, table_share,
10657 						 reinterpret_cast<char*>
10658 						 (digest), 16, key_type)) {
10659 			DBUG_RETURN(rcode);
10660 		}
10661 
10662 		if (record1) {
10663 			wsrep_calc_row_hash(
10664 				digest, record1, table, m_prebuilt);
10665 			if (int rcode = wsrep_append_key(
10666 				    thd, trx, table_share,
10667 				    reinterpret_cast<char*>(digest), 16,
10668 				    key_type)) {
10669 				DBUG_RETURN(rcode);
10670 			}
10671 		}
10672 		DBUG_RETURN(0);
10673 	}
10674 
10675 	DBUG_RETURN(0);
10676 }
10677 #endif /* WITH_WSREP */
10678 
10679 /*********************************************************************//**
10680 Stores a reference to the current row to 'ref' field of the handle. Note
10681 that in the case where we have generated the clustered index for the
10682 table, the function parameter is illogical: we MUST ASSUME that 'record'
10683 is the current 'position' of the handle, because if row ref is actually
10684 the row id internally generated in InnoDB, then 'record' does not contain
10685 it. We just guess that the row id must be for the record where the handle
10686 was positioned the last time. */
10687 
10688 void
position(const uchar * record)10689 ha_innobase::position(
10690 /*==================*/
10691 	const uchar*	record)	/*!< in: row in MySQL format */
10692 {
10693 	uint		len;
10694 
10695 	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
10696 
10697 	if (m_prebuilt->clust_index_was_generated) {
10698 		/* No primary key was defined for the table and we
10699 		generated the clustered index from row id: the
10700 		row reference will be the row id, not any key value
10701 		that MySQL knows of */
10702 
10703 		len = DATA_ROW_ID_LEN;
10704 
10705 		memcpy(ref, m_prebuilt->row_id, len);
10706 	} else {
10707 
10708 		/* Copy primary key as the row reference */
10709 		KEY*	key_info = table->key_info + m_primary_key;
10710 		key_copy(ref, (uchar*)record, key_info, key_info->key_length);
10711 		len = key_info->key_length;
10712 	}
10713 
10714 	ut_ad(len == ref_length);
10715 }
10716 
10717 /*****************************************************************//**
10718 Check whether there exist a column named as "FTS_DOC_ID", which is
10719 reserved for InnoDB FTS Doc ID
10720 @return true if there exist a "FTS_DOC_ID" column */
10721 static
10722 bool
create_table_check_doc_id_col(trx_t * trx,const TABLE * form,ulint * doc_id_col)10723 create_table_check_doc_id_col(
10724 /*==========================*/
10725 	trx_t*		trx,		/*!< in: InnoDB transaction handle */
10726 	const TABLE*	form,		/*!< in: information on table
10727 					columns and indexes */
10728 	ulint*		doc_id_col)	/*!< out: Doc ID column number if
10729 					there exist a FTS_DOC_ID column,
10730 					ULINT_UNDEFINED if column is of the
10731 					wrong type/name/size */
10732 {
10733 	for (ulint i = 0; i < form->s->fields; i++) {
10734 		const Field*	field;
10735 		ulint		col_type;
10736 		ulint		col_len;
10737 		ulint		unsigned_type;
10738 
10739 		field = form->field[i];
10740 		if (!field->stored_in_db()) {
10741 			continue;
10742 		}
10743 
10744 		col_type = get_innobase_type_from_mysql_type(
10745 			&unsigned_type, field);
10746 
10747 		col_len = field->pack_length();
10748 
10749 		if (innobase_strcasecmp(field->field_name.str,
10750 					FTS_DOC_ID_COL_NAME) == 0) {
10751 
10752 			/* Note the name is case sensitive due to
10753 			our internal query parser */
10754 			if (col_type == DATA_INT
10755 			    && !field->real_maybe_null()
10756 			    && col_len == sizeof(doc_id_t)
10757 			    && (strcmp(field->field_name.str,
10758 				      FTS_DOC_ID_COL_NAME) == 0)) {
10759 				*doc_id_col = i;
10760 			} else {
10761 				push_warning_printf(
10762 					trx->mysql_thd,
10763 					Sql_condition::WARN_LEVEL_WARN,
10764 					ER_ILLEGAL_HA_CREATE_OPTION,
10765 					"InnoDB: FTS_DOC_ID column must be"
10766 					" of BIGINT NOT NULL type, and named"
10767 					" in all capitalized characters");
10768 				my_error(ER_WRONG_COLUMN_NAME, MYF(0),
10769 					 field->field_name.str);
10770 				*doc_id_col = ULINT_UNDEFINED;
10771 			}
10772 
10773 			return(true);
10774 		}
10775 	}
10776 
10777 	return(false);
10778 }
10779 
10780 
10781 /** Finds all base columns needed to compute a given generated column.
10782 This is returned as a bitmap, in field->table->tmp_set.
10783 Works for both dict_v_col_t and dict_s_col_t columns.
10784 @param[in]	table		InnoDB table
10785 @param[in]	field		MySQL field
10786 @param[in,out]	col		virtual or stored column */
10787 template <typename T>
10788 void
prepare_vcol_for_base_setup(const dict_table_t * table,const Field * field,T * col)10789 prepare_vcol_for_base_setup(
10790 /*========================*/
10791 	const dict_table_t*	table,
10792 	const Field*	field,
10793 	T*		col)
10794 {
10795 	ut_ad(col->num_base == 0);
10796 	ut_ad(col->base_col == NULL);
10797 
10798 	MY_BITMAP *old_read_set = field->table->read_set;
10799 	MY_BITMAP *old_vcol_set = field->table->vcol_set;
10800 
10801 	field->table->read_set = field->table->vcol_set = &field->table->tmp_set;
10802 
10803 	bitmap_clear_all(&field->table->tmp_set);
10804 	field->vcol_info->expr->walk(
10805 		&Item::register_field_in_read_map, 1, field->table);
10806 	col->num_base= bitmap_bits_set(&field->table->tmp_set);
10807 	if (col->num_base != 0) {
10808 		col->base_col = static_cast<dict_col_t**>(mem_heap_zalloc(
10809 					table->heap, col->num_base * sizeof(
10810 						* col->base_col)));
10811 	}
10812 	field->table->read_set= old_read_set;
10813 	field->table->vcol_set= old_vcol_set;
10814 }
10815 
10816 
10817 /** Set up base columns for virtual column
10818 @param[in]	table		InnoDB table
10819 @param[in]	field		MySQL field
10820 @param[in,out]	v_col		virtual column */
10821 void
innodb_base_col_setup(dict_table_t * table,const Field * field,dict_v_col_t * v_col)10822 innodb_base_col_setup(
10823 	dict_table_t*	table,
10824 	const Field*	field,
10825 	dict_v_col_t*	v_col)
10826 {
10827 	ulint n = 0;
10828 
10829 	prepare_vcol_for_base_setup(table, field, v_col);
10830 
10831 	for (uint i= 0; i < field->table->s->fields; ++i) {
10832 		const Field* base_field = field->table->field[i];
10833 		if (base_field->stored_in_db()
10834 			&& bitmap_is_set(&field->table->tmp_set, i)) {
10835 			ulint   z;
10836 
10837 			for (z = 0; z < table->n_cols; z++) {
10838 				const char* name = dict_table_get_col_name(table, z);
10839 				if (!innobase_strcasecmp(name,
10840 						base_field->field_name.str)) {
10841 					break;
10842 				}
10843 			}
10844 
10845 			ut_ad(z != table->n_cols);
10846 
10847 			v_col->base_col[n] = dict_table_get_nth_col(table, z);
10848 			ut_ad(v_col->base_col[n]->ind == z);
10849 			n++;
10850 		}
10851 	}
10852 	v_col->num_base= n;
10853 }
10854 
10855 /** Set up base columns for stored column
10856 @param[in]	table	InnoDB table
10857 @param[in]	field	MySQL field
10858 @param[in,out]	s_col	stored column */
10859 void
innodb_base_col_setup_for_stored(const dict_table_t * table,const Field * field,dict_s_col_t * s_col)10860 innodb_base_col_setup_for_stored(
10861 	const dict_table_t*	table,
10862 	const Field*		field,
10863 	dict_s_col_t*		s_col)
10864 {
10865 	ulint	n = 0;
10866 
10867 	prepare_vcol_for_base_setup(table, field, s_col);
10868 
10869 	for (uint i= 0; i < field->table->s->fields; ++i) {
10870 		const Field* base_field = field->table->field[i];
10871 
10872 		if (base_field->stored_in_db()
10873 		    && bitmap_is_set(&field->table->tmp_set, i)) {
10874 			ulint	z;
10875 			for (z = 0; z < table->n_cols; z++) {
10876 				const char* name = dict_table_get_col_name(
10877 						table, z);
10878 				if (!innobase_strcasecmp(
10879 					name, base_field->field_name.str)) {
10880 					break;
10881 				}
10882 			}
10883 
10884 			ut_ad(z != table->n_cols);
10885 
10886 			s_col->base_col[n] = dict_table_get_nth_col(table, z);
10887 			n++;
10888 
10889 			if (n == s_col->num_base) {
10890 				break;
10891 			}
10892 		}
10893 	}
10894 	s_col->num_base= n;
10895 }
10896 
10897 /** Create a table definition to an InnoDB database.
10898 @return ER_* level error */
10899 inline MY_ATTRIBUTE((warn_unused_result))
10900 int
create_table_def()10901 create_table_info_t::create_table_def()
10902 {
10903 	dict_table_t*	table;
10904 	ulint		col_type;
10905 	ulint		col_len;
10906 	ulint		nulls_allowed;
10907 	ulint		unsigned_type;
10908 	ulint		binary_type;
10909 	ulint		long_true_varchar;
10910 	ulint		charset_no;
10911 	ulint		doc_id_col = 0;
10912 	ibool		has_doc_id_col = FALSE;
10913 	mem_heap_t*	heap;
10914 	ha_table_option_struct *options= m_form->s->option_struct;
10915 	dberr_t		err = DB_SUCCESS;
10916 
10917 	DBUG_ENTER("create_table_def");
10918 	DBUG_PRINT("enter", ("table_name: %s", m_table_name));
10919 
10920 	DBUG_ASSERT(m_trx->mysql_thd == m_thd);
10921 	DBUG_ASSERT(!m_drop_before_rollback);
10922 
10923 	/* MySQL does the name length check. But we do additional check
10924 	on the name length here */
10925 	const size_t	table_name_len = strlen(m_table_name);
10926 	if (table_name_len > MAX_FULL_NAME_LEN) {
10927 		push_warning_printf(
10928 			m_thd, Sql_condition::WARN_LEVEL_WARN,
10929 			ER_TABLE_NAME,
10930 			"InnoDB: Table Name or Database Name is too long");
10931 
10932 		DBUG_RETURN(ER_TABLE_NAME);
10933 	}
10934 
10935 	if (m_table_name[table_name_len - 1] == '/') {
10936 		push_warning_printf(
10937 			m_thd, Sql_condition::WARN_LEVEL_WARN,
10938 			ER_TABLE_NAME,
10939 			"InnoDB: Table name is empty");
10940 
10941 		DBUG_RETURN(ER_WRONG_TABLE_NAME);
10942 	}
10943 
10944 	/* Find out the number of virtual columns. */
10945 	ulint num_v = 0;
10946 	const bool omit_virtual = ha_innobase::omits_virtual_cols(*m_form->s);
10947 	const ulint n_cols = omit_virtual
10948 		? m_form->s->stored_fields : m_form->s->fields;
10949 
10950 	if (!omit_virtual) {
10951 		for (ulint i = 0; i < n_cols; i++) {
10952 			num_v += !m_form->field[i]->stored_in_db();
10953 		}
10954 	}
10955 
10956 	/* Check whether there already exists a FTS_DOC_ID column */
10957 	if (create_table_check_doc_id_col(m_trx, m_form, &doc_id_col)){
10958 
10959 		/* Raise error if the Doc ID column is of wrong type or name */
10960 		if (doc_id_col == ULINT_UNDEFINED) {
10961 			DBUG_RETURN(HA_ERR_GENERIC);
10962 		} else {
10963 			has_doc_id_col = TRUE;
10964 		}
10965 	}
10966 
10967 	/* Adjust the number of columns for the FTS hidden field */
10968 	const ulint actual_n_cols = n_cols
10969 		+ (m_flags2 & DICT_TF2_FTS && !has_doc_id_col);
10970 
10971 	table = dict_mem_table_create(m_table_name, NULL,
10972 				      actual_n_cols, num_v, m_flags, m_flags2);
10973 
10974 	/* Set the hidden doc_id column. */
10975 	if (m_flags2 & DICT_TF2_FTS) {
10976 		table->fts->doc_col = has_doc_id_col
10977 				      ? doc_id_col : n_cols - num_v;
10978 	}
10979 
10980 	if (DICT_TF_HAS_DATA_DIR(m_flags)) {
10981 		ut_a(strlen(m_remote_path));
10982 
10983 		table->data_dir_path = mem_heap_strdup(
10984 			table->heap, m_remote_path);
10985 
10986 	} else {
10987 		table->data_dir_path = NULL;
10988 	}
10989 
10990 	heap = mem_heap_create(1000);
10991 
10992 	ut_d(bool have_vers_start = false);
10993 	ut_d(bool have_vers_end = false);
10994 
10995 	for (ulint i = 0, j = 0; j < n_cols; i++) {
10996 		Field*	field = m_form->field[i];
10997 		ulint vers_row = 0;
10998 
10999 		if (m_form->versioned()) {
11000 			if (i == m_form->s->row_start_field) {
11001 				vers_row = DATA_VERS_START;
11002 				ut_d(have_vers_start = true);
11003 			} else if (i == m_form->s->row_end_field) {
11004 				vers_row = DATA_VERS_END;
11005 				ut_d(have_vers_end = true);
11006 			} else if (!(field->flags
11007 				     & VERS_UPDATE_UNVERSIONED_FLAG)) {
11008 				vers_row = DATA_VERSIONED;
11009 			}
11010 		}
11011 
11012 		col_type = get_innobase_type_from_mysql_type(
11013 			&unsigned_type, field);
11014 
11015 		if (!col_type) {
11016 			push_warning_printf(
11017 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11018 				ER_CANT_CREATE_TABLE,
11019 				"Error creating table '%s' with"
11020 				" column '%s'. Please check its"
11021 				" column type and try to re-create"
11022 				" the table with an appropriate"
11023 				" column type.",
11024 				table->name.m_name, field->field_name.str);
11025 			goto err_col;
11026 		}
11027 
11028 		nulls_allowed = field->real_maybe_null() ? 0 : DATA_NOT_NULL;
11029 		binary_type = field->binary() ? DATA_BINARY_TYPE : 0;
11030 
11031 		charset_no = 0;
11032 
11033 		if (dtype_is_string_type(col_type)) {
11034 
11035 			charset_no = (ulint) field->charset()->number;
11036 
11037 			DBUG_EXECUTE_IF("simulate_max_char_col",
11038 					charset_no = MAX_CHAR_COLL_NUM + 1;
11039 					);
11040 
11041 			if (charset_no > MAX_CHAR_COLL_NUM) {
11042 				/* in data0type.h we assume that the
11043 				number fits in one byte in prtype */
11044 				push_warning_printf(
11045 					m_thd, Sql_condition::WARN_LEVEL_WARN,
11046 					ER_CANT_CREATE_TABLE,
11047 					"In InnoDB, charset-collation codes"
11048 					" must be below 256."
11049 					" Unsupported code " ULINTPF ".",
11050 					charset_no);
11051 				mem_heap_free(heap);
11052 				dict_mem_table_free(table);
11053 
11054 				DBUG_RETURN(ER_CANT_CREATE_TABLE);
11055 			}
11056 		}
11057 
11058 		col_len = field->pack_length();
11059 
11060 		/* The MySQL pack length contains 1 or 2 bytes length field
11061 		for a true VARCHAR. Let us subtract that, so that the InnoDB
11062 		column length in the InnoDB data dictionary is the real
11063 		maximum byte length of the actual data. */
11064 
11065 		long_true_varchar = 0;
11066 
11067 		if (field->type() == MYSQL_TYPE_VARCHAR) {
11068 			col_len -= ((Field_varstring*) field)->length_bytes;
11069 
11070 			if (((Field_varstring*) field)->length_bytes == 2) {
11071 				long_true_varchar = DATA_LONG_TRUE_VARCHAR;
11072 			}
11073 		}
11074 
11075 		/* First check whether the column to be added has a
11076 		system reserved name. */
11077 		if (dict_col_name_is_reserved(field->field_name.str)){
11078 			my_error(ER_WRONG_COLUMN_NAME, MYF(0),
11079 				 field->field_name.str);
11080 err_col:
11081 			dict_mem_table_free(table);
11082 			mem_heap_free(heap);
11083 			ut_ad(trx_state_eq(m_trx, TRX_STATE_NOT_STARTED));
11084 			DBUG_RETURN(HA_ERR_GENERIC);
11085 		}
11086 
11087 		ulint is_virtual = !field->stored_in_db() ? DATA_VIRTUAL : 0;
11088 
11089 		if (!is_virtual) {
11090 			dict_mem_table_add_col(table, heap,
11091 				field->field_name.str, col_type,
11092 				dtype_form_prtype(
11093 					(ulint) field->type()
11094 					| nulls_allowed | unsigned_type
11095 					| binary_type | long_true_varchar
11096 					| vers_row,
11097 					charset_no),
11098 				col_len);
11099 		} else if (!omit_virtual) {
11100 			dict_mem_table_add_v_col(table, heap,
11101 				field->field_name.str, col_type,
11102 				dtype_form_prtype(
11103 					(ulint) field->type()
11104 					| nulls_allowed | unsigned_type
11105 					| binary_type | long_true_varchar
11106 					| vers_row
11107 					| is_virtual,
11108 					charset_no),
11109 				col_len, i, 0);
11110 		}
11111 
11112 		if (innobase_is_s_fld(field)) {
11113 			ut_ad(!is_virtual);
11114 			/* Added stored column in m_s_cols list. */
11115 			dict_mem_table_add_s_col(
11116 				table, 0);
11117 		}
11118 
11119 		if (is_virtual && omit_virtual) {
11120 			continue;
11121 		}
11122 
11123 		j++;
11124 	}
11125 
11126 	ut_ad(have_vers_start == have_vers_end);
11127 	ut_ad(table->versioned() == have_vers_start);
11128 	ut_ad(!table->versioned() || table->vers_start != table->vers_end);
11129 
11130 	if (num_v) {
11131 		for (ulint i = 0, j = 0; i < n_cols; i++) {
11132 			dict_v_col_t*	v_col;
11133 
11134 			const Field* field = m_form->field[i];
11135 
11136 			if (field->stored_in_db()) {
11137 				continue;
11138 			}
11139 
11140 			v_col = dict_table_get_nth_v_col(table, j);
11141 
11142 			j++;
11143 
11144 			innodb_base_col_setup(table, field, v_col);
11145 		}
11146 	}
11147 
11148 	/** Fill base columns for the stored column present in the list. */
11149 	if (table->s_cols && table->s_cols->size()) {
11150 		for (ulint i = 0; i < n_cols; i++) {
11151 			Field*  field = m_form->field[i];
11152 
11153 			if (!innobase_is_s_fld(field)) {
11154 				continue;
11155 			}
11156 
11157 			dict_s_col_list::iterator       it;
11158 			for (it = table->s_cols->begin();
11159 			     it != table->s_cols->end(); ++it) {
11160 				dict_s_col_t	s_col = *it;
11161 
11162 				if (s_col.s_pos == i) {
11163 					innodb_base_col_setup_for_stored(
11164 						table, field, &s_col);
11165 					break;
11166 				}
11167 			}
11168 		}
11169 	}
11170 
11171 	/* Add the FTS doc_id hidden column. */
11172 	if (m_flags2 & DICT_TF2_FTS && !has_doc_id_col) {
11173 		fts_add_doc_id_column(table, heap);
11174 	}
11175 
11176 	dict_table_add_system_columns(table, heap);
11177 
11178 	if (table->is_temporary()) {
11179 		if ((options->encryption == 1
11180 		     && !innodb_encrypt_temporary_tables)
11181 		    || (options->encryption == 2
11182 			&& innodb_encrypt_temporary_tables)) {
11183 			push_warning_printf(m_thd,
11184 					    Sql_condition::WARN_LEVEL_WARN,
11185 					    ER_ILLEGAL_HA_CREATE_OPTION,
11186 					    "Ignoring encryption parameter during "
11187 					    "temporary table creation.");
11188 		}
11189 
11190 		/* Get a new table ID. FIXME: Make this a private
11191 		sequence, not shared with persistent tables! */
11192 		dict_table_assign_new_id(table, m_trx);
11193 		ut_ad(dict_tf_get_rec_format(table->flags)
11194 		      != REC_FORMAT_COMPRESSED);
11195 		table->space_id = SRV_TMP_SPACE_ID;
11196 		table->space = fil_system.temp_space;
11197 		table->add_to_cache();
11198 	} else {
11199 		if (err == DB_SUCCESS) {
11200 			err = row_create_table_for_mysql(
11201 				table, m_trx,
11202 				fil_encryption_t(options->encryption),
11203 				uint32_t(options->encryption_key_id));
11204 			m_drop_before_rollback = (err == DB_SUCCESS);
11205 		}
11206 
11207 		DBUG_EXECUTE_IF("ib_crash_during_create_for_encryption",
11208 				DBUG_SUICIDE(););
11209 	}
11210 
11211 	mem_heap_free(heap);
11212 
11213 	DBUG_EXECUTE_IF("ib_create_err_tablespace_exist",
11214 			err = DB_TABLESPACE_EXISTS;);
11215 
11216 	switch (err) {
11217 	case DB_SUCCESS:
11218 		ut_ad(table);
11219 		m_table = table;
11220 		DBUG_RETURN(0);
11221 	default:
11222 		break;
11223 	case DB_DUPLICATE_KEY:
11224 	case DB_TABLESPACE_EXISTS:
11225 		char display_name[FN_REFLEN];
11226 		char* buf_end = innobase_convert_identifier(
11227 			display_name, sizeof(display_name) - 1,
11228 			m_table_name, strlen(m_table_name),
11229 			m_thd);
11230 
11231 		*buf_end = '\0';
11232 
11233 		my_error(err == DB_DUPLICATE_KEY
11234 			 ? ER_TABLE_EXISTS_ERROR
11235 			 : ER_TABLESPACE_EXISTS, MYF(0), display_name);
11236 	}
11237 
11238 	DBUG_RETURN(convert_error_code_to_mysql(err, m_flags, m_thd));
11239 }
11240 
11241 /*****************************************************************//**
11242 Creates an index in an InnoDB database. */
11243 inline
11244 int
create_index(trx_t * trx,const TABLE * form,dict_table_t * table,uint key_num)11245 create_index(
11246 /*=========*/
11247 	trx_t*		trx,		/*!< in: InnoDB transaction handle */
11248 	const TABLE*	form,		/*!< in: information on table
11249 					columns and indexes */
11250 	dict_table_t*	table,		/*!< in,out: table */
11251 	uint		key_num)	/*!< in: index number */
11252 {
11253 	dict_index_t*	index;
11254 	int		error;
11255 	const KEY*	key;
11256 	ulint*		field_lengths;
11257 
11258 	DBUG_ENTER("create_index");
11259 
11260 	key = form->key_info + key_num;
11261 
11262 	/* Assert that "GEN_CLUST_INDEX" cannot be used as non-primary index */
11263 	ut_a(innobase_strcasecmp(key->name.str, innobase_index_reserve_name) != 0);
11264 
11265 	if (key->flags & (HA_SPATIAL | HA_FULLTEXT)) {
11266 		/* Only one of these can be specified at a time. */
11267 		ut_ad(~key->flags & (HA_SPATIAL | HA_FULLTEXT));
11268 		ut_ad(!(key->flags & HA_NOSAME));
11269 		index = dict_mem_index_create(table, key->name.str,
11270 					      (key->flags & HA_SPATIAL)
11271 					      ? DICT_SPATIAL : DICT_FTS,
11272 					      key->user_defined_key_parts);
11273 
11274 		for (ulint i = 0; i < key->user_defined_key_parts; i++) {
11275 			const Field* field = key->key_part[i].field;
11276 
11277 			/* We do not support special (Fulltext or Spatial)
11278 			index on virtual columns */
11279 			if (!field->stored_in_db()) {
11280 				ut_ad(0);
11281 				DBUG_RETURN(HA_ERR_UNSUPPORTED);
11282 			}
11283 
11284 			dict_mem_index_add_field(index, field->field_name.str,
11285 						 0);
11286 		}
11287 
11288 		DBUG_RETURN(convert_error_code_to_mysql(
11289 				    row_create_index_for_mysql(
11290 					    index, trx, NULL),
11291 				    table->flags, NULL));
11292 	}
11293 
11294 	ulint ind_type = 0;
11295 
11296 	if (key_num == form->s->primary_key) {
11297 		ind_type |= DICT_CLUSTERED;
11298 	}
11299 
11300 	if (key->flags & HA_NOSAME) {
11301 		ind_type |= DICT_UNIQUE;
11302 	}
11303 
11304 	field_lengths = (ulint*) my_malloc(//PSI_INSTRUMENT_ME,
11305 		key->user_defined_key_parts * sizeof *
11306 				field_lengths, MYF(MY_FAE));
11307 
11308 	/* We pass 0 as the space id, and determine at a lower level the space
11309 	id where to store the table */
11310 
11311 	index = dict_mem_index_create(table, key->name.str,
11312 				      ind_type, key->user_defined_key_parts);
11313 
11314 	for (ulint i = 0; i < key->user_defined_key_parts; i++) {
11315 		KEY_PART_INFO*	key_part = key->key_part + i;
11316 		ulint		prefix_len;
11317 		ulint		col_type;
11318 		ulint		is_unsigned;
11319 
11320 
11321 		/* (The flag HA_PART_KEY_SEG denotes in MySQL a
11322 		column prefix field in an index: we only store a
11323 		specified number of first bytes of the column to
11324 		the index field.) The flag does not seem to be
11325 		properly set by MySQL. Let us fall back on testing
11326 		the length of the key part versus the column.
11327 		We first reach to the table's column; if the index is on a
11328 		prefix, key_part->field is not the table's column (it's a
11329 		"fake" field forged in open_table_from_share() with length
11330 		equal to the length of the prefix); so we have to go to
11331 		form->fied. */
11332 		Field*	field= form->field[key_part->field->field_index];
11333 		if (field == NULL)
11334 		  ut_error;
11335 
11336 		const char*	field_name = key_part->field->field_name.str;
11337 
11338 		col_type = get_innobase_type_from_mysql_type(
11339 			&is_unsigned, key_part->field);
11340 
11341 		if (DATA_LARGE_MTYPE(col_type)
11342 		    || (key_part->length < field->pack_length()
11343 			&& field->type() != MYSQL_TYPE_VARCHAR)
11344 		    || (field->type() == MYSQL_TYPE_VARCHAR
11345 			&& key_part->length < field->pack_length()
11346 			- ((Field_varstring*) field)->length_bytes)) {
11347 
11348 			switch (col_type) {
11349 			default:
11350 				prefix_len = key_part->length;
11351 				break;
11352 			case DATA_INT:
11353 			case DATA_FLOAT:
11354 			case DATA_DOUBLE:
11355 			case DATA_DECIMAL:
11356 				sql_print_error(
11357 					"MariaDB is trying to create a column"
11358 					" prefix index field, on an"
11359 					" inappropriate data type. Table"
11360 					" name %s, column name %s.",
11361 					form->s->table_name.str,
11362 					key_part->field->field_name.str);
11363 
11364 				prefix_len = 0;
11365 			}
11366 		} else {
11367 			prefix_len = 0;
11368 		}
11369 
11370 		field_lengths[i] = key_part->length;
11371 
11372 		if (!key_part->field->stored_in_db()) {
11373 			index->type |= DICT_VIRTUAL;
11374 		}
11375 
11376 		dict_mem_index_add_field(index, field_name, prefix_len);
11377 	}
11378 
11379 	ut_ad(key->flags & HA_FULLTEXT || !(index->type & DICT_FTS));
11380 
11381 	/* Even though we've defined max_supported_key_part_length, we
11382 	still do our own checking using field_lengths to be absolutely
11383 	sure we don't create too long indexes. */
11384 	ulint flags = table->flags;
11385 
11386 	error = convert_error_code_to_mysql(
11387 		row_create_index_for_mysql(index, trx, field_lengths),
11388 		flags, NULL);
11389 
11390 	my_free(field_lengths);
11391 
11392 	DBUG_RETURN(error);
11393 }
11394 
11395 /** Return a display name for the row format
11396 @param[in]	row_format	Row Format
11397 @return row format name */
11398 static
11399 const char*
get_row_format_name(enum row_type row_format)11400 get_row_format_name(
11401 	enum row_type	row_format)
11402 {
11403 	switch (row_format) {
11404 	case ROW_TYPE_COMPACT:
11405 		return("COMPACT");
11406 	case ROW_TYPE_COMPRESSED:
11407 		return("COMPRESSED");
11408 	case ROW_TYPE_DYNAMIC:
11409 		return("DYNAMIC");
11410 	case ROW_TYPE_REDUNDANT:
11411 		return("REDUNDANT");
11412 	case ROW_TYPE_DEFAULT:
11413 		return("DEFAULT");
11414 	case ROW_TYPE_FIXED:
11415 		return("FIXED");
11416 	case ROW_TYPE_PAGE:
11417 	case ROW_TYPE_NOT_USED:
11418 		break;
11419 	}
11420 	return("NOT USED");
11421 }
11422 
11423 /** Validate DATA DIRECTORY option.
11424 @return true if valid, false if not. */
11425 bool
create_option_data_directory_is_valid()11426 create_table_info_t::create_option_data_directory_is_valid()
11427 {
11428 	bool		is_valid = true;
11429 
11430 	ut_ad(m_create_info->data_file_name
11431 	      && m_create_info->data_file_name[0] != '\0');
11432 
11433 	/* Use DATA DIRECTORY only with file-per-table. */
11434 	if (!m_allow_file_per_table) {
11435 		push_warning(
11436 			m_thd, Sql_condition::WARN_LEVEL_WARN,
11437 			ER_ILLEGAL_HA_CREATE_OPTION,
11438 			"InnoDB: DATA DIRECTORY requires"
11439 			" innodb_file_per_table.");
11440 		is_valid = false;
11441 	}
11442 
11443 	/* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
11444 	if (m_create_info->options & HA_LEX_CREATE_TMP_TABLE) {
11445 		push_warning(
11446 			m_thd, Sql_condition::WARN_LEVEL_WARN,
11447 			ER_ILLEGAL_HA_CREATE_OPTION,
11448 			"InnoDB: DATA DIRECTORY cannot be used"
11449 			" for TEMPORARY tables.");
11450 		is_valid = false;
11451 	}
11452 
11453 	/* We check for a DATA DIRECTORY mixed with TABLESPACE in
11454 	create_option_tablespace_is_valid(), no need to here. */
11455 
11456 	return(is_valid);
11457 }
11458 
11459 /** Validate the create options. Check that the options KEY_BLOCK_SIZE,
11460 ROW_FORMAT, DATA DIRECTORY, TEMPORARY are compatible with
11461 each other and other settings.  These CREATE OPTIONS are not validated
11462 here unless innodb_strict_mode is on. With strict mode, this function
11463 will report each problem it finds using a custom message with error
11464 code ER_ILLEGAL_HA_CREATE_OPTION, not its built-in message.
11465 @return NULL if valid, string name of bad option if not. */
11466 const char*
create_options_are_invalid()11467 create_table_info_t::create_options_are_invalid()
11468 {
11469 	bool	has_key_block_size = (m_create_info->key_block_size != 0);
11470 
11471 	const char*	ret = NULL;
11472 	enum row_type	row_format	= m_create_info->row_type;
11473 	const bool	is_temp
11474 		= m_create_info->options & HA_LEX_CREATE_TMP_TABLE;
11475 
11476 	ut_ad(m_thd != NULL);
11477 
11478 	/* If innodb_strict_mode is not set don't do any more validation. */
11479 	if (!THDVAR(m_thd, strict_mode)) {
11480 		return(NULL);
11481 	}
11482 
11483 	/* Check if a non-zero KEY_BLOCK_SIZE was specified. */
11484 	if (has_key_block_size) {
11485 		if (is_temp) {
11486 			my_error(ER_UNSUPPORT_COMPRESSED_TEMPORARY_TABLE,
11487 				 MYF(0));
11488 			return("KEY_BLOCK_SIZE");
11489 		}
11490 
11491 		switch (m_create_info->key_block_size) {
11492 			ulint	kbs_max;
11493 		case 1:
11494 		case 2:
11495 		case 4:
11496 		case 8:
11497 		case 16:
11498 			/* The maximum KEY_BLOCK_SIZE (KBS) is
11499 			UNIV_PAGE_SIZE_MAX. But if srv_page_size is
11500 			smaller than UNIV_PAGE_SIZE_MAX, the maximum
11501 			KBS is also smaller. */
11502 			kbs_max = ut_min(
11503 				1U << (UNIV_PAGE_SSIZE_MAX - 1),
11504 				1U << (PAGE_ZIP_SSIZE_MAX - 1));
11505 			if (m_create_info->key_block_size > kbs_max) {
11506 				push_warning_printf(
11507 					m_thd, Sql_condition::WARN_LEVEL_WARN,
11508 					ER_ILLEGAL_HA_CREATE_OPTION,
11509 					"InnoDB: KEY_BLOCK_SIZE=%ld"
11510 					" cannot be larger than %ld.",
11511 					m_create_info->key_block_size,
11512 					kbs_max);
11513 				ret = "KEY_BLOCK_SIZE";
11514 			}
11515 
11516 			/* Valid KEY_BLOCK_SIZE, check its dependencies. */
11517 			if (!m_allow_file_per_table) {
11518 				push_warning(
11519 					m_thd, Sql_condition::WARN_LEVEL_WARN,
11520 					ER_ILLEGAL_HA_CREATE_OPTION,
11521 					"InnoDB: KEY_BLOCK_SIZE requires"
11522 					" innodb_file_per_table.");
11523 				ret = "KEY_BLOCK_SIZE";
11524 			}
11525 			break;
11526 		default:
11527 			push_warning_printf(
11528 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11529 				ER_ILLEGAL_HA_CREATE_OPTION,
11530 				"InnoDB: invalid KEY_BLOCK_SIZE = %u."
11531 				" Valid values are [1, 2, 4, 8, 16]",
11532 				(uint) m_create_info->key_block_size);
11533 			ret = "KEY_BLOCK_SIZE";
11534 			break;
11535 		}
11536 	}
11537 
11538 	/* Check for a valid InnoDB ROW_FORMAT specifier and
11539 	other incompatibilities. */
11540 	switch (row_format) {
11541 	case ROW_TYPE_COMPRESSED:
11542 		if (is_temp) {
11543 			my_error(ER_UNSUPPORT_COMPRESSED_TEMPORARY_TABLE,
11544 				 MYF(0));
11545 			return("ROW_FORMAT");
11546 		}
11547 		if (!m_allow_file_per_table) {
11548 			push_warning_printf(
11549 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11550 				ER_ILLEGAL_HA_CREATE_OPTION,
11551 				"InnoDB: ROW_FORMAT=%s requires"
11552 				" innodb_file_per_table.",
11553 				get_row_format_name(row_format));
11554 			ret = "ROW_FORMAT";
11555 		}
11556 		break;
11557 	case ROW_TYPE_DYNAMIC:
11558 	case ROW_TYPE_COMPACT:
11559 	case ROW_TYPE_REDUNDANT:
11560 		if (has_key_block_size) {
11561 			push_warning_printf(
11562 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11563 				ER_ILLEGAL_HA_CREATE_OPTION,
11564 				"InnoDB: cannot specify ROW_FORMAT = %s"
11565 				" with KEY_BLOCK_SIZE.",
11566 				get_row_format_name(row_format));
11567 			ret = "KEY_BLOCK_SIZE";
11568 		}
11569 		break;
11570 	case ROW_TYPE_DEFAULT:
11571 		break;
11572 	case ROW_TYPE_FIXED:
11573 	case ROW_TYPE_PAGE:
11574 	case ROW_TYPE_NOT_USED:
11575 		push_warning(
11576 			m_thd, Sql_condition::WARN_LEVEL_WARN,
11577 			ER_ILLEGAL_HA_CREATE_OPTION,
11578 			"InnoDB: invalid ROW_FORMAT specifier.");
11579 		ret = "ROW_TYPE";
11580 		break;
11581 	}
11582 
11583 	if (!m_create_info->data_file_name
11584 	    || !m_create_info->data_file_name[0]) {
11585 	} else if (!my_use_symdir) {
11586 		my_error(WARN_OPTION_IGNORED, MYF(ME_JUST_WARNING),
11587 			 "DATA DIRECTORY");
11588 	} else if (!create_option_data_directory_is_valid()) {
11589 		ret = "DATA DIRECTORY";
11590 	}
11591 
11592 	/* Do not allow INDEX_DIRECTORY */
11593 	if (m_create_info->index_file_name) {
11594 		push_warning_printf(
11595 			m_thd, Sql_condition::WARN_LEVEL_WARN,
11596 			ER_ILLEGAL_HA_CREATE_OPTION,
11597 			"InnoDB: INDEX DIRECTORY is not supported");
11598 		ret = "INDEX DIRECTORY";
11599 	}
11600 
11601 	/* Don't support compressed table when page size > 16k. */
11602 	if ((has_key_block_size || row_format == ROW_TYPE_COMPRESSED)
11603 	    && srv_page_size > UNIV_PAGE_SIZE_DEF) {
11604 		push_warning(m_thd, Sql_condition::WARN_LEVEL_WARN,
11605 			     ER_ILLEGAL_HA_CREATE_OPTION,
11606 			     "InnoDB: Cannot create a COMPRESSED table"
11607 			     " when innodb_page_size > 16k.");
11608 
11609 		if (has_key_block_size) {
11610 			ret = "KEY_BLOCK_SIZE";
11611 		} else {
11612 			ret = "ROW_TYPE";
11613 		}
11614 	}
11615 
11616 	return(ret);
11617 }
11618 
11619 /*****************************************************************//**
11620 Check engine specific table options not handled by SQL-parser.
11621 @return	NULL if valid, string if not */
11622 const char*
check_table_options()11623 create_table_info_t::check_table_options()
11624 {
11625 	enum row_type row_format = m_create_info->row_type;
11626 	const ha_table_option_struct *options= m_form->s->option_struct;
11627 
11628 	switch (options->encryption) {
11629 	case FIL_ENCRYPTION_OFF:
11630 		if (options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) {
11631 			push_warning(
11632 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11633 				HA_WRONG_CREATE_OPTION,
11634 				"InnoDB: ENCRYPTED=NO implies"
11635 				" ENCRYPTION_KEY_ID=1");
11636 			compile_time_assert(FIL_DEFAULT_ENCRYPTION_KEY == 1);
11637 		}
11638 		if (srv_encrypt_tables != 2) {
11639 			break;
11640 		}
11641 		push_warning(
11642 			m_thd, Sql_condition::WARN_LEVEL_WARN,
11643 			HA_WRONG_CREATE_OPTION,
11644 			"InnoDB: ENCRYPTED=NO cannot be used with"
11645 			" innodb_encrypt_tables=FORCE");
11646 		return "ENCRYPTED";
11647 	case FIL_ENCRYPTION_DEFAULT:
11648 		if (!srv_encrypt_tables) {
11649 			break;
11650 		}
11651 		/* fall through */
11652 	case FIL_ENCRYPTION_ON:
11653 		const uint32_t key_id = uint32_t(options->encryption_key_id);
11654 		if (!encryption_key_id_exists(key_id)) {
11655 			push_warning_printf(
11656 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11657 				HA_WRONG_CREATE_OPTION,
11658 				"InnoDB: ENCRYPTION_KEY_ID %u not available",
11659 				key_id);
11660 			return "ENCRYPTION_KEY_ID";
11661 		}
11662 
11663 		/* Currently we do not support encryption for spatial indexes.
11664 		Do not allow ENCRYPTED=YES if any SPATIAL INDEX exists. */
11665 		if (options->encryption != FIL_ENCRYPTION_ON) {
11666 			break;
11667 		}
11668 		for (ulint i = 0; i < m_form->s->keys; i++) {
11669 			if (m_form->key_info[i].flags & HA_SPATIAL) {
11670 				push_warning(m_thd,
11671 					     Sql_condition::WARN_LEVEL_WARN,
11672 					     HA_ERR_UNSUPPORTED,
11673 					     "InnoDB: ENCRYPTED=YES is not"
11674 					     " supported for SPATIAL INDEX");
11675 				return "ENCRYPTED";
11676 			}
11677 		}
11678 	}
11679 
11680 	if (!m_allow_file_per_table
11681 	    && options->encryption != FIL_ENCRYPTION_DEFAULT) {
11682 		push_warning(
11683 			m_thd, Sql_condition::WARN_LEVEL_WARN,
11684 			HA_WRONG_CREATE_OPTION,
11685 			"InnoDB: ENCRYPTED requires innodb_file_per_table");
11686 		return "ENCRYPTED";
11687  	}
11688 
11689 	/* Check page compression requirements */
11690 	if (options->page_compressed) {
11691 
11692 		if (row_format == ROW_TYPE_COMPRESSED) {
11693 			push_warning(
11694 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11695 				HA_WRONG_CREATE_OPTION,
11696 				"InnoDB: PAGE_COMPRESSED table can't have"
11697 				" ROW_TYPE=COMPRESSED");
11698 			return "PAGE_COMPRESSED";
11699 		}
11700 
11701 		switch (row_format) {
11702 		default:
11703 			break;
11704 		case ROW_TYPE_DEFAULT:
11705 			if (m_default_row_format
11706 			    != DEFAULT_ROW_FORMAT_REDUNDANT) {
11707 				break;
11708 			}
11709 			/* fall through */
11710 		case ROW_TYPE_REDUNDANT:
11711 			push_warning(
11712 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11713 				HA_WRONG_CREATE_OPTION,
11714 				"InnoDB: PAGE_COMPRESSED table can't have"
11715 				" ROW_TYPE=REDUNDANT");
11716 			return "PAGE_COMPRESSED";
11717 		}
11718 
11719 		if (!m_allow_file_per_table) {
11720 			push_warning(
11721 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11722 				HA_WRONG_CREATE_OPTION,
11723 				"InnoDB: PAGE_COMPRESSED requires"
11724 				" innodb_file_per_table.");
11725 			return "PAGE_COMPRESSED";
11726 		}
11727 
11728 		if (m_create_info->key_block_size) {
11729 			push_warning(
11730 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11731 				HA_WRONG_CREATE_OPTION,
11732 				"InnoDB: PAGE_COMPRESSED table can't have"
11733 				" key_block_size");
11734 			return "PAGE_COMPRESSED";
11735 		}
11736 	}
11737 
11738 	/* Check page compression level requirements, some of them are
11739 	already checked above */
11740 	if (options->page_compression_level != 0) {
11741 		if (options->page_compressed == false) {
11742 			push_warning(
11743 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11744 				HA_WRONG_CREATE_OPTION,
11745 				"InnoDB: PAGE_COMPRESSION_LEVEL requires"
11746 				" PAGE_COMPRESSED");
11747 			return "PAGE_COMPRESSION_LEVEL";
11748 		}
11749 
11750 		if (options->page_compression_level < 1 || options->page_compression_level > 9) {
11751 			push_warning_printf(
11752 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11753 				HA_WRONG_CREATE_OPTION,
11754 				"InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu."
11755 				" Valid values are [1, 2, 3, 4, 5, 6, 7, 8, 9]",
11756 				options->page_compression_level);
11757 			return "PAGE_COMPRESSION_LEVEL";
11758 		}
11759 	}
11760 
11761 	return NULL;
11762 }
11763 
11764 /*****************************************************************//**
11765 Update create_info.  Used in SHOW CREATE TABLE et al. */
11766 
11767 void
update_create_info(HA_CREATE_INFO * create_info)11768 ha_innobase::update_create_info(
11769 /*============================*/
11770 	HA_CREATE_INFO*	create_info)	/*!< in/out: create info */
11771 {
11772 	if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
11773 		info(HA_STATUS_AUTO);
11774 		create_info->auto_increment_value = stats.auto_increment_value;
11775 	}
11776 
11777 	if (m_prebuilt->table->is_temporary()) {
11778 		return;
11779 	}
11780 
11781 	/* Update the DATA DIRECTORY name from SYS_DATAFILES. */
11782 	dict_get_and_save_data_dir_path(m_prebuilt->table, false);
11783 
11784 	if (m_prebuilt->table->data_dir_path) {
11785 		create_info->data_file_name = m_prebuilt->table->data_dir_path;
11786 	}
11787 }
11788 
11789 /*****************************************************************//**
11790 Initialize the table FTS stopword list
11791 @return TRUE if success */
11792 ibool
innobase_fts_load_stopword(dict_table_t * table,trx_t * trx,THD * thd)11793 innobase_fts_load_stopword(
11794 /*=======================*/
11795 	dict_table_t*	table,	/*!< in: Table has the FTS */
11796 	trx_t*		trx,	/*!< in: transaction */
11797 	THD*		thd)	/*!< in: current thread */
11798 {
11799   const char *stopword_table= THDVAR(thd, ft_user_stopword_table);
11800   if (!stopword_table)
11801   {
11802     mysql_mutex_lock(&LOCK_global_system_variables);
11803     if (innobase_server_stopword_table)
11804       stopword_table= thd_strdup(thd, innobase_server_stopword_table);
11805     mysql_mutex_unlock(&LOCK_global_system_variables);
11806   }
11807 
11808   return fts_load_stopword(table, trx, stopword_table,
11809                            THDVAR(thd, ft_enable_stopword), false);
11810 }
11811 
11812 /** Parse the table name into normal name and remote path if needed.
11813 @param[in]	name	Table name (db/table or full path).
11814 @return 0 if successful, otherwise, error number */
11815 int
parse_table_name(const char * name)11816 create_table_info_t::parse_table_name(
11817 	const char*
11818 #ifdef _WIN32
11819 	name
11820 #endif
11821 				      )
11822 {
11823 	DBUG_ENTER("parse_table_name");
11824 
11825 #ifdef _WIN32
11826 	/* Names passed in from server are in two formats:
11827 	1. <database_name>/<table_name>: for normal table creation
11828 	2. full path: for temp table creation, or DATA DIRECTORY.
11829 
11830 	When srv_file_per_table is on and mysqld_embedded is off,
11831 	check for full path pattern, i.e.
11832 	X:\dir\...,		X is a driver letter, or
11833 	\\dir1\dir2\...,	UNC path
11834 	returns error if it is in full path format, but not creating a temp.
11835 	table. Currently InnoDB does not support symbolic link on Windows. */
11836 
11837 	if (m_innodb_file_per_table
11838 	    && !mysqld_embedded
11839 	    && !(m_create_info->options & HA_LEX_CREATE_TMP_TABLE)) {
11840 
11841 		if ((name[1] == ':')
11842 		    || (name[0] == '\\' && name[1] == '\\')) {
11843 			sql_print_error("Cannot create table %s\n", name);
11844 			DBUG_RETURN(HA_ERR_GENERIC);
11845 		}
11846 	}
11847 #endif
11848 
11849 	m_remote_path[0] = '\0';
11850 
11851 	/* Make sure DATA DIRECTORY is compatible with other options
11852 	and set the remote path.  In the case of either;
11853 	  CREATE TEMPORARY TABLE ... DATA DIRECTORY={path} ... ;
11854 	  CREATE TABLE ... DATA DIRECTORY={path} TABLESPACE={name}... ;
11855 	we ignore the DATA DIRECTORY. */
11856 	if (m_create_info->data_file_name
11857 	    && m_create_info->data_file_name[0]
11858 	    && my_use_symdir) {
11859 		if (!create_option_data_directory_is_valid()) {
11860 			push_warning_printf(
11861 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11862 				WARN_OPTION_IGNORED,
11863 				ER_DEFAULT(WARN_OPTION_IGNORED),
11864 				"DATA DIRECTORY");
11865 
11866 			m_flags &= ~DICT_TF_MASK_DATA_DIR;
11867 		} else {
11868 			strncpy(m_remote_path,
11869 				m_create_info->data_file_name,
11870 				FN_REFLEN - 1);
11871 		}
11872 	}
11873 
11874 	if (m_create_info->index_file_name) {
11875 		my_error(WARN_OPTION_IGNORED, ME_JUST_WARNING,
11876 			"INDEX DIRECTORY");
11877 	}
11878 
11879 	DBUG_RETURN(0);
11880 }
11881 
11882 /** Determine InnoDB table flags.
11883 If strict_mode=OFF, this will adjust the flags to what should be assumed.
11884 @retval true on success
11885 @retval false on error */
innobase_table_flags()11886 bool create_table_info_t::innobase_table_flags()
11887 {
11888 	DBUG_ENTER("innobase_table_flags");
11889 
11890 	const char*	fts_doc_id_index_bad = NULL;
11891 	ulint		zip_ssize = 0;
11892 	enum row_type	row_type;
11893 	rec_format_t	innodb_row_format =
11894 		get_row_format(m_default_row_format);
11895 	const bool	is_temp
11896 		= m_create_info->options & HA_LEX_CREATE_TMP_TABLE;
11897 	bool		zip_allowed
11898 		= !is_temp;
11899 
11900 	const ulint	zip_ssize_max =
11901 		ut_min(static_cast<ulint>(UNIV_PAGE_SSIZE_MAX),
11902 		       static_cast<ulint>(PAGE_ZIP_SSIZE_MAX));
11903 
11904 	ha_table_option_struct *options= m_form->s->option_struct;
11905 
11906 	m_flags = 0;
11907 	m_flags2 = 0;
11908 
11909 	/* Check if there are any FTS indexes defined on this table. */
11910 	for (uint i = 0; i < m_form->s->keys; i++) {
11911 		const KEY*	key = &m_form->key_info[i];
11912 
11913 		if (key->flags & HA_FULLTEXT) {
11914 			m_flags2 |= DICT_TF2_FTS;
11915 
11916 			/* We don't support FTS indexes in temporary
11917 			tables. */
11918 			if (is_temp) {
11919 				my_error(ER_INNODB_NO_FT_TEMP_TABLE, MYF(0));
11920 				DBUG_RETURN(false);
11921 			}
11922 
11923 			if (fts_doc_id_index_bad) {
11924 				goto index_bad;
11925 			}
11926 		}
11927 
11928 		if (innobase_strcasecmp(key->name.str, FTS_DOC_ID_INDEX_NAME)) {
11929 			continue;
11930 		}
11931 
11932 		/* Do a pre-check on FTS DOC ID index */
11933 		if (!(key->flags & HA_NOSAME)
11934 		    || strcmp(key->name.str, FTS_DOC_ID_INDEX_NAME)
11935 		    || strcmp(key->key_part[0].field->field_name.str,
11936 			      FTS_DOC_ID_COL_NAME)) {
11937 			fts_doc_id_index_bad = key->name.str;
11938 		}
11939 
11940 		if (fts_doc_id_index_bad && (m_flags2 & DICT_TF2_FTS)) {
11941 index_bad:
11942 			my_error(ER_INNODB_FT_WRONG_DOCID_INDEX, MYF(0),
11943 				 fts_doc_id_index_bad);
11944 			DBUG_RETURN(false);
11945 		}
11946 	}
11947 
11948 	if (m_create_info->key_block_size > 0) {
11949 		/* The requested compressed page size (key_block_size)
11950 		is given in kilobytes. If it is a valid number, store
11951 		that value as the number of log2 shifts from 512 in
11952 		zip_ssize. Zero means it is not compressed. */
11953 		ulint	zssize;		/* Zip Shift Size */
11954 		ulint	kbsize;		/* Key Block Size */
11955 		for (zssize = kbsize = 1;
11956 		     zssize <= zip_ssize_max;
11957 		     zssize++, kbsize <<= 1) {
11958 			if (kbsize == m_create_info->key_block_size) {
11959 				zip_ssize = zssize;
11960 				break;
11961 			}
11962 		}
11963 
11964 		/* Make sure compressed row format is allowed. */
11965 		if (is_temp) {
11966 			push_warning(
11967 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11968 				ER_ILLEGAL_HA_CREATE_OPTION,
11969 				"InnoDB: KEY_BLOCK_SIZE is ignored"
11970 				" for TEMPORARY TABLE.");
11971 			zip_allowed = false;
11972 		} else if (!m_allow_file_per_table) {
11973 			push_warning(
11974 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11975 				ER_ILLEGAL_HA_CREATE_OPTION,
11976 				"InnoDB: KEY_BLOCK_SIZE requires"
11977 				" innodb_file_per_table.");
11978 			zip_allowed = false;
11979 		}
11980 
11981 		if (!zip_allowed
11982 		    || zssize > zip_ssize_max) {
11983 			push_warning_printf(
11984 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11985 				ER_ILLEGAL_HA_CREATE_OPTION,
11986 				"InnoDB: ignoring KEY_BLOCK_SIZE=%u.",
11987 				(uint) m_create_info->key_block_size);
11988 		}
11989 	}
11990 
11991 	row_type = m_create_info->row_type;
11992 
11993 	if (zip_ssize && zip_allowed) {
11994 		/* if ROW_FORMAT is set to default,
11995 		automatically change it to COMPRESSED. */
11996 		if (row_type == ROW_TYPE_DEFAULT) {
11997 			row_type = ROW_TYPE_COMPRESSED;
11998 		} else if (row_type != ROW_TYPE_COMPRESSED) {
11999 			/* ROW_FORMAT other than COMPRESSED
12000 			ignores KEY_BLOCK_SIZE.  It does not
12001 			make sense to reject conflicting
12002 			KEY_BLOCK_SIZE and ROW_FORMAT, because
12003 			such combinations can be obtained
12004 			with ALTER TABLE anyway. */
12005 			push_warning_printf(
12006 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12007 				ER_ILLEGAL_HA_CREATE_OPTION,
12008 				"InnoDB: ignoring KEY_BLOCK_SIZE=%u"
12009 				" unless ROW_FORMAT=COMPRESSED.",
12010 				(uint) m_create_info->key_block_size);
12011 			zip_allowed = false;
12012 		}
12013 	} else {
12014 		/* zip_ssize == 0 means no KEY_BLOCK_SIZE. */
12015 		if (row_type == ROW_TYPE_COMPRESSED && zip_allowed) {
12016 			/* ROW_FORMAT=COMPRESSED without KEY_BLOCK_SIZE
12017 			implies half the maximum KEY_BLOCK_SIZE(*1k) or
12018 			srv_page_size, whichever is less. */
12019 			zip_ssize = zip_ssize_max - 1;
12020 		}
12021 	}
12022 
12023 	/* Validate the row format.  Correct it if necessary */
12024 
12025 	switch (row_type) {
12026 	case ROW_TYPE_REDUNDANT:
12027 		innodb_row_format = REC_FORMAT_REDUNDANT;
12028 		break;
12029 	case ROW_TYPE_COMPACT:
12030 		innodb_row_format = REC_FORMAT_COMPACT;
12031 		break;
12032 	case ROW_TYPE_COMPRESSED:
12033 		if (is_temp) {
12034 			push_warning_printf(
12035 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12036 				ER_ILLEGAL_HA_CREATE_OPTION,
12037 				"InnoDB: ROW_FORMAT=%s is ignored for"
12038 				" TEMPORARY TABLE.",
12039 				get_row_format_name(row_type));
12040 		} else if (!m_allow_file_per_table) {
12041 			push_warning_printf(
12042 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12043 				ER_ILLEGAL_HA_CREATE_OPTION,
12044 				"InnoDB: ROW_FORMAT=COMPRESSED requires"
12045 				" innodb_file_per_table.");
12046 		} else {
12047 			innodb_row_format = REC_FORMAT_COMPRESSED;
12048 			break;
12049 		}
12050 		zip_allowed = false;
12051 		/* Set ROW_FORMAT = COMPACT */
12052 		/* fall through */
12053 	case ROW_TYPE_NOT_USED:
12054 	case ROW_TYPE_FIXED:
12055 	case ROW_TYPE_PAGE:
12056 		push_warning(
12057 			m_thd, Sql_condition::WARN_LEVEL_WARN,
12058 			ER_ILLEGAL_HA_CREATE_OPTION,
12059 			"InnoDB: assuming ROW_FORMAT=DYNAMIC.");
12060 		/* fall through */
12061 	case ROW_TYPE_DYNAMIC:
12062 		innodb_row_format = REC_FORMAT_DYNAMIC;
12063 		break;
12064 	case ROW_TYPE_DEFAULT:
12065 		;
12066 	}
12067 
12068 	/* Don't support compressed table when page size > 16k. */
12069 	if (zip_allowed && zip_ssize && srv_page_size > UNIV_PAGE_SIZE_DEF) {
12070 		push_warning(m_thd, Sql_condition::WARN_LEVEL_WARN,
12071 			     ER_ILLEGAL_HA_CREATE_OPTION,
12072 			     "InnoDB: Cannot create a COMPRESSED table"
12073 			     " when innodb_page_size > 16k."
12074 			     " Assuming ROW_FORMAT=DYNAMIC.");
12075 		zip_allowed = false;
12076 	}
12077 
12078 	ut_ad(!is_temp || !zip_allowed);
12079 	ut_ad(!is_temp || innodb_row_format != REC_FORMAT_COMPRESSED);
12080 
12081 	/* Set the table flags */
12082 	if (!zip_allowed) {
12083 		zip_ssize = 0;
12084 	}
12085 
12086 	if (is_temp) {
12087 		m_flags2 |= DICT_TF2_TEMPORARY;
12088 	} else if (m_use_file_per_table) {
12089 		m_flags2 |= DICT_TF2_USE_FILE_PER_TABLE;
12090 	}
12091 
12092 	ulint level = ulint(options->page_compression_level);
12093 	if (!level) {
12094 		level = page_zip_level;
12095 		if (!level && options->page_compressed) {
12096 			push_warning_printf(
12097 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12098 				ER_ILLEGAL_HA_CREATE_OPTION,
12099 				"InnoDB: PAGE_COMPRESSED requires"
12100 				" PAGE_COMPRESSION_LEVEL or"
12101 				" innodb_compression_level > 0");
12102 			DBUG_RETURN(false);
12103 		}
12104 	}
12105 
12106 	/* Set the table flags */
12107 	dict_tf_set(&m_flags, innodb_row_format, zip_ssize,
12108 		    m_use_data_dir, options->page_compressed, level);
12109 
12110 	if (m_form->s->table_type == TABLE_TYPE_SEQUENCE) {
12111 		m_flags |= DICT_TF_MASK_NO_ROLLBACK;
12112 	}
12113 
12114 	/* Set the flags2 when create table or alter tables */
12115 	m_flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
12116 	DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
12117 			m_flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;);
12118 
12119 	DBUG_RETURN(true);
12120 }
12121 
12122 /** Parse MERGE_THRESHOLD value from the string.
12123 @param[in]	thd	connection
12124 @param[in]	str	string which might include 'MERGE_THRESHOLD='
12125 @return	value parsed. 0 means not found or invalid value. */
12126 static
12127 ulint
innobase_parse_merge_threshold(THD * thd,const char * str)12128 innobase_parse_merge_threshold(
12129 	THD*		thd,
12130 	const char*	str)
12131 {
12132 	static const char*	label = "MERGE_THRESHOLD=";
12133 	static const size_t	label_len = strlen(label);
12134 	const char*		pos = str;
12135 
12136 	pos = strstr(str, label);
12137 
12138 	if (pos == NULL) {
12139 		return(0);
12140 	}
12141 
12142 	pos += label_len;
12143 
12144 	lint	ret = atoi(pos);
12145 
12146 	if (ret > 0 && ret <= 50) {
12147 		return(static_cast<ulint>(ret));
12148 	}
12149 
12150 	push_warning_printf(
12151 		thd, Sql_condition::WARN_LEVEL_WARN,
12152 		ER_ILLEGAL_HA_CREATE_OPTION,
12153 		"InnoDB: Invalid value for MERGE_THRESHOLD in the CREATE TABLE"
12154 		" statement. The value is ignored.");
12155 
12156 	return(0);
12157 }
12158 
12159 /** Parse hint for table and its indexes, and update the information
12160 in dictionary.
12161 @param[in]	thd		connection
12162 @param[in,out]	table		target table
12163 @param[in]	table_share	table definition */
12164 void
innobase_parse_hint_from_comment(THD * thd,dict_table_t * table,const TABLE_SHARE * table_share)12165 innobase_parse_hint_from_comment(
12166 	THD*			thd,
12167 	dict_table_t*		table,
12168 	const TABLE_SHARE*	table_share)
12169 {
12170 	ulint	merge_threshold_table;
12171 	ulint	merge_threshold_index[MAX_KEY];
12172 	bool	is_found[MAX_KEY];
12173 
12174 	if (table_share->comment.str != NULL) {
12175 		merge_threshold_table
12176 			= innobase_parse_merge_threshold(
12177 				thd, table_share->comment.str);
12178 	} else {
12179 		merge_threshold_table = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
12180 	}
12181 
12182 	if (merge_threshold_table == 0) {
12183 		merge_threshold_table = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
12184 	}
12185 
12186 	for (uint i = 0; i < table_share->keys; i++) {
12187 		KEY*	key_info = &table_share->key_info[i];
12188 
12189 		ut_ad(i < sizeof(merge_threshold_index)
12190 			  / sizeof(merge_threshold_index[0]));
12191 
12192 		if (key_info->flags & HA_USES_COMMENT
12193 		    && key_info->comment.str != NULL) {
12194 			merge_threshold_index[i]
12195 				= innobase_parse_merge_threshold(
12196 					thd, key_info->comment.str);
12197 		} else {
12198 			merge_threshold_index[i] = merge_threshold_table;
12199 		}
12200 
12201 		if (merge_threshold_index[i] == 0) {
12202 			merge_threshold_index[i] = merge_threshold_table;
12203 		}
12204 	}
12205 
12206 	/* update SYS_INDEX table */
12207 	if (!table->is_temporary()) {
12208 		for (uint i = 0; i < table_share->keys; i++) {
12209 			is_found[i] = false;
12210 		}
12211 
12212 		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
12213 		     index != NULL;
12214 		     index = UT_LIST_GET_NEXT(indexes, index)) {
12215 
12216 			if (dict_index_is_auto_gen_clust(index)) {
12217 
12218 				/* GEN_CLUST_INDEX should use
12219 				merge_threshold_table */
12220 				dict_index_set_merge_threshold(
12221 					index, merge_threshold_table);
12222 				continue;
12223 			}
12224 
12225 			for (uint i = 0; i < table_share->keys; i++) {
12226 				if (is_found[i]) {
12227 					continue;
12228 				}
12229 
12230 				KEY*	key_info = &table_share->key_info[i];
12231 
12232 				if (innobase_strcasecmp(
12233 					index->name, key_info->name.str) == 0) {
12234 
12235 					dict_index_set_merge_threshold(
12236 						index,
12237 						merge_threshold_index[i]);
12238 					is_found[i] = true;
12239 					break;
12240 				}
12241 			}
12242 		}
12243 	}
12244 
12245 	for (uint i = 0; i < table_share->keys; i++) {
12246 		is_found[i] = false;
12247 	}
12248 
12249 	/* update in memory */
12250 	for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
12251 	     index != NULL;
12252 	     index = UT_LIST_GET_NEXT(indexes, index)) {
12253 
12254 		if (dict_index_is_auto_gen_clust(index)) {
12255 
12256 			/* GEN_CLUST_INDEX should use merge_threshold_table */
12257 
12258 			/* x-lock index is needed to exclude concurrent
12259 			pessimistic tree operations */
12260 			rw_lock_x_lock(dict_index_get_lock(index));
12261 			index->merge_threshold = merge_threshold_table;
12262 			rw_lock_x_unlock(dict_index_get_lock(index));
12263 
12264 			continue;
12265 		}
12266 
12267 		for (uint i = 0; i < table_share->keys; i++) {
12268 			if (is_found[i]) {
12269 				continue;
12270 			}
12271 
12272 			KEY*	key_info = &table_share->key_info[i];
12273 
12274 			if (innobase_strcasecmp(
12275 				index->name, key_info->name.str) == 0) {
12276 
12277 				/* x-lock index is needed to exclude concurrent
12278 				pessimistic tree operations */
12279 				rw_lock_x_lock(dict_index_get_lock(index));
12280 				index->merge_threshold
12281 					= merge_threshold_index[i];
12282 				rw_lock_x_unlock(dict_index_get_lock(index));
12283 				is_found[i] = true;
12284 
12285 				break;
12286 			}
12287 		}
12288 	}
12289 }
12290 
12291 /** Set m_use_* flags. */
12292 void
set_tablespace_type(bool table_being_altered_is_file_per_table)12293 create_table_info_t::set_tablespace_type(
12294 	bool	table_being_altered_is_file_per_table)
12295 {
12296 	/** Allow file_per_table for this table either because:
12297 	1) the setting innodb_file_per_table=on,
12298 	2) the table being altered is currently file_per_table */
12299 	m_allow_file_per_table =
12300 		m_innodb_file_per_table
12301 		|| table_being_altered_is_file_per_table;
12302 
12303 	/* Ignore the current innodb-file-per-table setting if we are
12304 	creating a temporary table. */
12305 	m_use_file_per_table =
12306 		m_allow_file_per_table
12307 		&& !(m_create_info->options & HA_LEX_CREATE_TMP_TABLE);
12308 
12309 	/* DATA DIRECTORY must have m_use_file_per_table but cannot be
12310 	used with TEMPORARY tables. */
12311 	m_use_data_dir =
12312 		m_use_file_per_table
12313 		&& m_create_info->data_file_name
12314 		&& m_create_info->data_file_name[0]
12315 		&& my_use_symdir;
12316 }
12317 
12318 /** Initialize the create_table_info_t object.
12319 @return error number */
12320 int
initialize()12321 create_table_info_t::initialize()
12322 {
12323 	DBUG_ENTER("create_table_info_t::initialize");
12324 
12325 	ut_ad(m_thd != NULL);
12326 	ut_ad(m_create_info != NULL);
12327 
12328 	if (m_form->s->fields > REC_MAX_N_USER_FIELDS) {
12329 		DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS);
12330 	}
12331 
12332 	/* Check for name conflicts (with reserved name) for
12333 	any user indices to be created. */
12334 	if (innobase_index_name_is_reserved(m_thd, m_form->key_info,
12335 					    m_form->s->keys)) {
12336 		DBUG_RETURN(HA_ERR_WRONG_INDEX);
12337 	}
12338 
12339 	/* Get the transaction associated with the current thd, or create one
12340 	if not yet created */
12341 
12342 	check_trx_exists(m_thd);
12343 
12344 	DBUG_RETURN(0);
12345 }
12346 
12347 
12348 /** Check if a virtual column is part of a fulltext or spatial index. */
12349 bool
gcols_in_fulltext_or_spatial()12350 create_table_info_t::gcols_in_fulltext_or_spatial()
12351 {
12352 	for (ulint i = 0; i < m_form->s->keys; i++) {
12353 		const KEY*	key = m_form->key_info + i;
12354 		if (!(key->flags & (HA_SPATIAL | HA_FULLTEXT))) {
12355 			continue;
12356 		}
12357 		for (ulint j = 0; j < key->user_defined_key_parts; j++) {
12358 			/* We do not support special (Fulltext or
12359 			Spatial) index on virtual columns */
12360 			if (!key->key_part[j].field->stored_in_db()) {
12361 				my_error(ER_UNSUPPORTED_ACTION_ON_GENERATED_COLUMN, MYF(0));
12362 				return true;
12363 			}
12364 		}
12365 	}
12366 	return false;
12367 }
12368 
12369 
12370 /** Prepare to create a new table to an InnoDB database.
12371 @param[in]	name	Table name
12372 @return error number */
prepare_create_table(const char * name,bool strict)12373 int create_table_info_t::prepare_create_table(const char* name, bool strict)
12374 {
12375 	DBUG_ENTER("prepare_create_table");
12376 
12377 	ut_ad(m_thd != NULL);
12378 	ut_ad(m_create_info != NULL);
12379 
12380 	set_tablespace_type(false);
12381 
12382 	normalize_table_name(m_table_name, name);
12383 
12384 	/* Validate table options not handled by the SQL-parser */
12385 	if (check_table_options()) {
12386 		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
12387 	}
12388 
12389 	/* Validate the create options if innodb_strict_mode is set.
12390 	Do not use the regular message for ER_ILLEGAL_HA_CREATE_OPTION
12391 	because InnoDB might actually support the option, but not under
12392 	the current conditions.  The messages revealing the specific
12393 	problems are reported inside this function. */
12394 	if (strict && create_options_are_invalid()) {
12395 		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
12396 	}
12397 
12398 	/* Create the table flags and flags2 */
12399 	if (!innobase_table_flags()) {
12400 		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
12401 	}
12402 
12403 	if (high_level_read_only) {
12404 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
12405 	}
12406 
12407 	if (gcols_in_fulltext_or_spatial()) {
12408 		DBUG_RETURN(HA_ERR_UNSUPPORTED);
12409 	}
12410 
12411 	for (uint i = 0; i < m_form->s->keys; i++) {
12412 		const size_t max_field_len
12413 		    = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(m_flags);
12414 		const KEY& key = m_form->key_info[i];
12415 
12416 		if (key.algorithm == HA_KEY_ALG_FULLTEXT) {
12417 			continue;
12418 		}
12419 
12420 		if (too_big_key_part_length(max_field_len, key)) {
12421 			DBUG_RETURN(convert_error_code_to_mysql(
12422 			    DB_TOO_BIG_INDEX_COL, m_flags, NULL));
12423 		}
12424 	}
12425 
12426 	DBUG_RETURN(parse_table_name(name));
12427 }
12428 
12429 /** Create the internal innodb table.
12430 @param create_fk	whether to add FOREIGN KEY constraints */
create_table(bool create_fk)12431 int create_table_info_t::create_table(bool create_fk)
12432 {
12433 	int		error;
12434 	int		primary_key_no;
12435 	uint		i;
12436 
12437 	DBUG_ENTER("create_table");
12438 
12439 	/* Look for a primary key */
12440 	primary_key_no = (m_form->s->primary_key != MAX_KEY ?
12441 			  (int) m_form->s->primary_key : -1);
12442 
12443 	/* Our function innobase_get_mysql_key_number_for_index assumes
12444 	the primary key is always number 0, if it exists */
12445 	ut_a(primary_key_no == -1 || primary_key_no == 0);
12446 
12447 	error = create_table_def();
12448 
12449 	if (error) {
12450 		DBUG_RETURN(error);
12451 	}
12452 
12453 	DBUG_ASSERT(m_drop_before_rollback
12454 		    == !(m_flags2 & DICT_TF2_TEMPORARY));
12455 
12456 	/* Create the keys */
12457 
12458 	if (m_form->s->keys == 0 || primary_key_no == -1) {
12459 		/* Create an index which is used as the clustered index;
12460 		order the rows by their row id which is internally generated
12461 		by InnoDB */
12462 		ulint flags = m_table->flags;
12463 		dict_index_t* index = dict_mem_index_create(
12464 			m_table, innobase_index_reserve_name,
12465 			DICT_CLUSTERED, 0);
12466 		error = convert_error_code_to_mysql(
12467 			row_create_index_for_mysql(index, m_trx, NULL),
12468 			flags, m_thd);
12469 		if (error) {
12470 			DBUG_RETURN(error);
12471 		}
12472 	}
12473 
12474 	if (primary_key_no != -1) {
12475 		/* In InnoDB the clustered index must always be created
12476 		first */
12477 		if ((error = create_index(m_trx, m_form, m_table,
12478 					  (uint) primary_key_no))) {
12479 			DBUG_RETURN(error);
12480 		}
12481 	}
12482 
12483 	/* Create the ancillary tables that are common to all FTS indexes on
12484 	this table. */
12485 	if (m_flags2 & DICT_TF2_FTS) {
12486 		fts_doc_id_index_enum	ret;
12487 
12488 		/* Check whether there already exists FTS_DOC_ID_INDEX */
12489 		ret = innobase_fts_check_doc_id_index_in_def(
12490 			m_form->s->keys, m_form->key_info);
12491 
12492 		switch (ret) {
12493 		case FTS_INCORRECT_DOC_ID_INDEX:
12494 			push_warning_printf(m_thd,
12495 					    Sql_condition::WARN_LEVEL_WARN,
12496 					    ER_WRONG_NAME_FOR_INDEX,
12497 					    " InnoDB: Index name %s is reserved"
12498 					    " for the unique index on"
12499 					    " FTS_DOC_ID column for FTS"
12500 					    " Document ID indexing"
12501 					    " on table %s. Please check"
12502 					    " the index definition to"
12503 					    " make sure it is of correct"
12504 					    " type\n",
12505 					    FTS_DOC_ID_INDEX_NAME,
12506 					    m_table->name.m_name);
12507 
12508 			if (m_table->fts) {
12509 				fts_free(m_table);
12510 			}
12511 
12512 			my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
12513 				 FTS_DOC_ID_INDEX_NAME);
12514 			DBUG_RETURN(-1);
12515 		case FTS_EXIST_DOC_ID_INDEX:
12516 		case FTS_NOT_EXIST_DOC_ID_INDEX:
12517 			break;
12518 		}
12519 
12520 		dberr_t	err = fts_create_common_tables(
12521 			m_trx, m_table,
12522 			(ret == FTS_EXIST_DOC_ID_INDEX));
12523 
12524 		error = convert_error_code_to_mysql(err, 0, NULL);
12525 
12526 		if (error) {
12527 			DBUG_RETURN(error);
12528 		}
12529 	}
12530 
12531 	for (i = 0; i < m_form->s->keys; i++) {
12532 		if (i != uint(primary_key_no)
12533 		    && (error = create_index(m_trx, m_form, m_table, i))) {
12534 			DBUG_RETURN(error);
12535 		}
12536 	}
12537 
12538 	/* Cache all the FTS indexes on this table in the FTS specific
12539 	structure. They are used for FTS indexed column update handling. */
12540 	if (m_flags2 & DICT_TF2_FTS) {
12541 		fts_t*          fts = m_table->fts;
12542 
12543 		ut_a(fts != NULL);
12544 
12545 		dict_table_get_all_fts_indexes(m_table, fts->indexes);
12546 	}
12547 
12548 	size_t stmt_len;
12549 	if (const char* stmt = innobase_get_stmt_unsafe(m_thd, &stmt_len)) {
12550 		dberr_t err = create_fk
12551 			? dict_create_foreign_constraints(
12552 				m_trx, stmt, stmt_len, m_table_name,
12553 				m_flags2 & DICT_TF2_TEMPORARY)
12554 			: DB_SUCCESS;
12555 		if (err == DB_SUCCESS) {
12556 			/* Check that also referencing constraints are ok */
12557 			dict_names_t	fk_tables;
12558 			err = dict_load_foreigns(m_table_name, NULL,
12559 						 false, true,
12560 						 DICT_ERR_IGNORE_NONE,
12561 						 fk_tables);
12562 			while (err == DB_SUCCESS && !fk_tables.empty()) {
12563 				dict_load_table(fk_tables.front(),
12564 						DICT_ERR_IGNORE_NONE);
12565 				fk_tables.pop_front();
12566 			}
12567 		}
12568 
12569 		switch (err) {
12570 		case DB_PARENT_NO_INDEX:
12571 			push_warning_printf(
12572 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12573 				HA_ERR_CANNOT_ADD_FOREIGN,
12574 				"Create table '%s' with foreign key constraint"
12575 				" failed. There is no index in the referenced"
12576 				" table where the referenced columns appear"
12577 				" as the first columns.\n", m_table_name);
12578 			break;
12579 
12580 		case DB_CHILD_NO_INDEX:
12581 			push_warning_printf(
12582 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12583 				HA_ERR_CANNOT_ADD_FOREIGN,
12584 				"Create table '%s' with foreign key constraint"
12585 				" failed. There is no index in the referencing"
12586 				" table where referencing columns appear"
12587 				" as the first columns.\n", m_table_name);
12588 			break;
12589 		case DB_NO_FK_ON_S_BASE_COL:
12590 			push_warning_printf(
12591 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12592 				HA_ERR_CANNOT_ADD_FOREIGN,
12593 				"Create table '%s' with foreign key constraint"
12594 				" failed. Cannot add foreign key constraint"
12595 				" placed on the base column of stored"
12596 				" column. \n",
12597 				m_table_name);
12598 		default:
12599 			break;
12600 		}
12601 
12602 		if (err != DB_SUCCESS) {
12603 			DBUG_RETURN(convert_error_code_to_mysql(
12604 					    err, m_flags, NULL));
12605 		}
12606 	}
12607 
12608 	/* In TRUNCATE TABLE, we will merely warn about the maximum
12609 	row size being too large. */
12610 	if (!row_size_is_acceptable(*m_table, create_fk)) {
12611 		DBUG_RETURN(convert_error_code_to_mysql(
12612 			    DB_TOO_BIG_RECORD, m_flags, NULL));
12613 	}
12614 
12615 	DBUG_RETURN(0);
12616 }
12617 
row_size_is_acceptable(const dict_table_t & table,bool strict) const12618 bool create_table_info_t::row_size_is_acceptable(
12619   const dict_table_t &table, bool strict) const
12620 {
12621   for (dict_index_t *index= dict_table_get_first_index(&table); index;
12622        index= dict_table_get_next_index(index))
12623     if (!row_size_is_acceptable(*index, strict))
12624       return false;
12625   return true;
12626 }
12627 
12628 /* FIXME: row size check has some flaws and should be improved */
record_size_info() const12629 dict_index_t::record_size_info_t dict_index_t::record_size_info() const
12630 {
12631   ut_ad(!(type & DICT_FTS));
12632 
12633   /* maximum allowed size of a node pointer record */
12634   ulint page_ptr_max;
12635   const bool comp= dict_table_is_comp(table);
12636   /* table->space == NULL after DISCARD TABLESPACE */
12637   const page_size_t page_size(dict_tf_get_page_size(table->flags));
12638   record_size_info_t result;
12639 
12640   if (page_size.is_compressed() &&
12641       page_size.physical() < univ_page_size.physical())
12642   {
12643     /* On a ROW_FORMAT=COMPRESSED page, two records must fit in the
12644     uncompressed page modification log. On compressed pages
12645     with size.physical() == univ_page_size.physical(),
12646     this limit will never be reached. */
12647     ut_ad(comp);
12648     /* The maximum allowed record size is the size of
12649     an empty page, minus a byte for recoding the heap
12650     number in the page modification log.  The maximum
12651     allowed node pointer size is half that. */
12652     result.max_leaf_size= page_zip_empty_size(n_fields, page_size.physical());
12653     if (result.max_leaf_size)
12654     {
12655       result.max_leaf_size--;
12656     }
12657     page_ptr_max= result.max_leaf_size / 2;
12658     /* On a compressed page, there is a two-byte entry in
12659     the dense page directory for every record.  But there
12660     is no record header. */
12661     result.shortest_size= 2;
12662   }
12663   else
12664   {
12665     /* The maximum allowed record size is half a B-tree
12666     page(16k for 64k page size).  No additional sparse
12667     page directory entry will be generated for the first
12668     few user records. */
12669     result.max_leaf_size= (comp || srv_page_size < UNIV_PAGE_SIZE_MAX)
12670                               ? page_get_free_space_of_empty(comp) / 2
12671                               : REDUNDANT_REC_MAX_DATA_SIZE;
12672 
12673     page_ptr_max= result.max_leaf_size;
12674     /* Each record has a header. */
12675     result.shortest_size= comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES;
12676   }
12677 
12678   if (comp)
12679   {
12680     /* Include the "null" flags in the
12681     maximum possible record size. */
12682     result.shortest_size+= UT_BITS_IN_BYTES(n_nullable);
12683   }
12684   else
12685   {
12686     /* For each column, include a 2-byte offset and a
12687     "null" flag.  The 1-byte format is only used in short
12688     records that do not contain externally stored columns.
12689     Such records could never exceed the page limit, even
12690     when using the 2-byte format. */
12691     result.shortest_size+= 2 * n_fields;
12692   }
12693 
12694   const ulint max_local_len= table->get_overflow_field_local_len();
12695 
12696   /* Compute the maximum possible record size. */
12697   for (unsigned i= 0; i < n_fields; i++)
12698   {
12699     const dict_field_t &f= fields[i];
12700     const dict_col_t &col= *f.col;
12701 
12702     /* In dtuple_convert_big_rec(), variable-length columns
12703     that are longer than BTR_EXTERN_LOCAL_STORED_MAX_SIZE
12704     may be chosen for external storage.
12705 
12706     Fixed-length columns, and all columns of secondary
12707     index records are always stored inline. */
12708 
12709     /* Determine the maximum length of the index field.
12710     The field_ext_max_size should be computed as the worst
12711     case in rec_get_converted_size_comp() for
12712     REC_STATUS_ORDINARY records. */
12713 
12714     size_t field_max_size= dict_col_get_fixed_size(&col, comp);
12715     if (field_max_size && f.fixed_len != 0)
12716     {
12717       /* dict_index_add_col() should guarantee this */
12718       ut_ad(!f.prefix_len || f.fixed_len == f.prefix_len);
12719       /* Fixed lengths are not encoded
12720       in ROW_FORMAT=COMPACT. */
12721       goto add_field_size;
12722     }
12723 
12724     field_max_size= dict_col_get_max_size(&col);
12725 
12726     if (f.prefix_len)
12727     {
12728       if (f.prefix_len < field_max_size)
12729       {
12730         field_max_size= f.prefix_len;
12731       }
12732 
12733       /* those conditions were copied from dtuple_convert_big_rec()*/
12734     }
12735     else if (field_max_size > max_local_len &&
12736              field_max_size > BTR_EXTERN_LOCAL_STORED_MAX_SIZE &&
12737              DATA_BIG_COL(&col) && dict_index_is_clust(this))
12738     {
12739 
12740       /* In the worst case, we have a locally stored
12741       column of BTR_EXTERN_LOCAL_STORED_MAX_SIZE bytes.
12742       The length can be stored in one byte.  If the
12743       column were stored externally, the lengths in
12744       the clustered index page would be
12745       BTR_EXTERN_FIELD_REF_SIZE and 2. */
12746       field_max_size= max_local_len;
12747     }
12748 
12749     if (comp)
12750     {
12751       /* Add the extra size for ROW_FORMAT=COMPACT.
12752       For ROW_FORMAT=REDUNDANT, these bytes were
12753       added to result.shortest_size before this loop. */
12754       result.shortest_size+= field_max_size < 256 ? 1 : 2;
12755     }
12756   add_field_size:
12757     result.shortest_size+= field_max_size;
12758 
12759     /* Check the size limit on leaf pages. */
12760     if (result.shortest_size >= result.max_leaf_size)
12761     {
12762       result.set_too_big(i);
12763     }
12764 
12765     /* Check the size limit on non-leaf pages.  Records
12766     stored in non-leaf B-tree pages consist of the unique
12767     columns of the record (the key columns of the B-tree)
12768     and a node pointer field.  When we have processed the
12769     unique columns, result.shortest_size equals the size of the
12770     node pointer record minus the node pointer column. */
12771     if (i + 1 == dict_index_get_n_unique_in_tree(this) &&
12772         result.shortest_size + REC_NODE_PTR_SIZE >= page_ptr_max)
12773     {
12774       result.set_too_big(i);
12775     }
12776   }
12777 
12778   return result;
12779 }
12780 
12781 /** Issue a warning that the row is too big. */
ib_warn_row_too_big(THD * thd,const dict_table_t * table)12782 static void ib_warn_row_too_big(THD *thd, const dict_table_t *table)
12783 {
12784   /* FIXME: this row size check should be improved */
12785   /* If prefix is true then a 768-byte prefix is stored
12786   locally for BLOB fields. Refer to dict_table_get_format() */
12787   const bool prefix= !dict_table_has_atomic_blobs(table);
12788 
12789   const ulint free_space=
12790       page_get_free_space_of_empty(table->flags & DICT_TF_COMPACT) / 2;
12791 
12792   push_warning_printf(
12793       thd, Sql_condition::WARN_LEVEL_WARN, HA_ERR_TO_BIG_ROW,
12794       "Row size too large (> " ULINTPF "). Changing some columns to TEXT"
12795       " or BLOB %smay help. In current row format, BLOB prefix of"
12796       " %d bytes is stored inline.",
12797       free_space,
12798       prefix ? "or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED " : "",
12799       prefix ? DICT_MAX_FIXED_COL_LEN : 0);
12800 }
12801 
row_size_is_acceptable(const dict_index_t & index,bool strict) const12802 bool create_table_info_t::row_size_is_acceptable(
12803     const dict_index_t &index, bool strict) const
12804 {
12805   if ((index.type & DICT_FTS) || index.table->is_system_db)
12806   {
12807     /* Ignore system tables check because innodb_table_stats
12808     maximum row size can not fit on 4k page. */
12809     return true;
12810   }
12811 
12812   const bool innodb_strict_mode= THDVAR(m_thd, strict_mode);
12813   dict_index_t::record_size_info_t info= index.record_size_info();
12814 
12815   if (info.row_is_too_big())
12816   {
12817     ut_ad(info.get_overrun_size() != 0);
12818     ut_ad(info.max_leaf_size != 0);
12819 
12820     const size_t idx= info.get_first_overrun_field_index();
12821     const dict_field_t *field= dict_index_get_nth_field(&index, idx);
12822 
12823     if (innodb_strict_mode || global_system_variables.log_warnings > 2)
12824     {
12825       ib::error_or_warn(strict && innodb_strict_mode)
12826           << "Cannot add field " << field->name << " in table "
12827           << index.table->name << " because after adding it, the row size is "
12828           << info.get_overrun_size()
12829           << " which is greater than maximum allowed size ("
12830           << info.max_leaf_size << " bytes) for a record on index leaf page.";
12831     }
12832 
12833     if (strict && innodb_strict_mode)
12834       return false;
12835 
12836     ib_warn_row_too_big(m_thd, index.table);
12837   }
12838 
12839   return true;
12840 }
12841 
12842 /** Update a new table in an InnoDB database.
12843 @return error number */
12844 int
create_table_update_dict()12845 create_table_info_t::create_table_update_dict()
12846 {
12847 	dict_table_t*	innobase_table;
12848 
12849 	DBUG_ENTER("create_table_update_dict");
12850 
12851 	innobase_table = dict_table_open_on_name(
12852 		m_table_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
12853 
12854 	DBUG_ASSERT(innobase_table != 0);
12855 	if (innobase_table->fts != NULL) {
12856 		if (innobase_table->fts_doc_id_index == NULL) {
12857 			innobase_table->fts_doc_id_index
12858 				= dict_table_get_index_on_name(
12859 					innobase_table, FTS_DOC_ID_INDEX_NAME);
12860 			DBUG_ASSERT(innobase_table->fts_doc_id_index != NULL);
12861 		} else {
12862 			DBUG_ASSERT(innobase_table->fts_doc_id_index
12863 				    == dict_table_get_index_on_name(
12864 						innobase_table,
12865 						FTS_DOC_ID_INDEX_NAME));
12866 		}
12867 	}
12868 
12869 	DBUG_ASSERT((innobase_table->fts == NULL)
12870 		    == (innobase_table->fts_doc_id_index == NULL));
12871 
12872 	innobase_copy_frm_flags_from_create_info(innobase_table, m_create_info);
12873 
12874 	dict_stats_update(innobase_table, DICT_STATS_EMPTY_TABLE);
12875 
12876 	/* Load server stopword into FTS cache */
12877 	if (m_flags2 & DICT_TF2_FTS) {
12878 		if (!innobase_fts_load_stopword(innobase_table, NULL, m_thd)) {
12879 			dict_table_close(innobase_table, FALSE, FALSE);
12880 			srv_active_wake_master_thread();
12881 			DBUG_RETURN(-1);
12882 		}
12883 
12884 		mutex_enter(&dict_sys->mutex);
12885 		fts_optimize_add_table(innobase_table);
12886 		mutex_exit(&dict_sys->mutex);
12887 	}
12888 
12889 	if (const Field* ai = m_form->found_next_number_field) {
12890 		ut_ad(ai->stored_in_db());
12891 
12892 		ib_uint64_t	autoinc = m_create_info->auto_increment_value;
12893 
12894 		if (autoinc == 0) {
12895 			autoinc = 1;
12896 		}
12897 
12898 		dict_table_autoinc_lock(innobase_table);
12899 		dict_table_autoinc_initialize(innobase_table, autoinc);
12900 
12901 		if (innobase_table->is_temporary()) {
12902 			/* AUTO_INCREMENT is not persistent for
12903 			TEMPORARY TABLE. Temporary tables are never
12904 			evicted. Keep the counter in memory only. */
12905 		} else {
12906 			const unsigned	col_no = innodb_col_no(ai);
12907 
12908 			innobase_table->persistent_autoinc = 1
12909 				+ dict_table_get_nth_col_pos(
12910 					innobase_table, col_no, NULL);
12911 
12912 			/* Persist the "last used" value, which
12913 			typically is AUTO_INCREMENT - 1.
12914 			In btr_create(), the value 0 was already written. */
12915 			if (--autoinc) {
12916 				btr_write_autoinc(
12917 					dict_table_get_first_index(
12918 						innobase_table),
12919 					autoinc);
12920 			}
12921 		}
12922 
12923 		dict_table_autoinc_unlock(innobase_table);
12924 	}
12925 
12926 	innobase_parse_hint_from_comment(m_thd, innobase_table, m_form->s);
12927 
12928 	dict_table_close(innobase_table, FALSE, FALSE);
12929 	DBUG_RETURN(0);
12930 }
12931 
12932 /** Allocate a new trx. */
12933 void
allocate_trx()12934 create_table_info_t::allocate_trx()
12935 {
12936 	m_trx = innobase_trx_allocate(m_thd);
12937 
12938 	m_trx->will_lock = true;
12939 	m_trx->ddl = true;
12940 }
12941 
12942 /** Create a new table to an InnoDB database.
12943 @param[in]	name		Table name, format: "db/table_name".
12944 @param[in]	form		Table format; columns and index information.
12945 @param[in]	create_info	Create info (including create statement string).
12946 @param[in]	file_per_table	whether to create .ibd file
12947 @param[in,out]	trx		dictionary transaction, or NULL to create new
12948 @return	0 if success else error number. */
12949 inline int
create(const char * name,TABLE * form,HA_CREATE_INFO * create_info,bool file_per_table,trx_t * trx)12950 ha_innobase::create(
12951 	const char*	name,
12952 	TABLE*		form,
12953 	HA_CREATE_INFO*	create_info,
12954 	bool		file_per_table,
12955 	trx_t*		trx)
12956 {
12957 	int		error;
12958 	char		norm_name[FN_REFLEN];	/* {database}/{tablename} */
12959 	char		remote_path[FN_REFLEN];	/* Absolute path of table */
12960 
12961 	DBUG_ENTER("ha_innobase::create");
12962 
12963 	DBUG_ASSERT(form->s == table_share);
12964 	DBUG_ASSERT(table_share->table_type == TABLE_TYPE_SEQUENCE
12965 		    || table_share->table_type == TABLE_TYPE_NORMAL);
12966 
12967 	create_table_info_t	info(ha_thd(),
12968 				     form,
12969 				     create_info,
12970 				     norm_name,
12971 				     remote_path,
12972 				     file_per_table, trx);
12973 
12974 	if ((error = info.initialize())
12975 	    || (error = info.prepare_create_table(name, !trx))) {
12976 		if (trx) {
12977 			trx_rollback_for_mysql(trx);
12978 			row_mysql_unlock_data_dictionary(trx);
12979 		}
12980 		DBUG_RETURN(error);
12981 	}
12982 
12983 	const bool own_trx = !trx;
12984 
12985 	if (own_trx) {
12986 		info.allocate_trx();
12987 		trx = info.trx();
12988 		/* Latch the InnoDB data dictionary exclusively so that no deadlocks
12989 		or lock waits can happen in it during a table create operation.
12990 		Drop table etc. do this latching in row0mysql.cc. */
12991 		row_mysql_lock_data_dictionary(trx);
12992 		DBUG_ASSERT(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
12993 	}
12994 
12995 	if ((error = info.create_table(own_trx))) {
12996 		/* Drop the being-created table before rollback,
12997 		so that rollback can possibly rename back a table
12998 		that could have been renamed before the failed creation. */
12999 		if (info.drop_before_rollback()) {
13000 			trx->error_state = DB_SUCCESS;
13001 			row_drop_table_for_mysql(info.table_name(),
13002 						 trx, SQLCOM_TRUNCATE, true,
13003 						 false);
13004 		}
13005 		trx_rollback_for_mysql(trx);
13006 		row_mysql_unlock_data_dictionary(trx);
13007 		goto func_exit;
13008 	}
13009 
13010 	innobase_commit_low(trx);
13011 	row_mysql_unlock_data_dictionary(trx);
13012 
13013 	/* Flush the log to reduce probability that the .frm files and
13014 	the InnoDB data dictionary get out-of-sync if the user runs
13015 	with innodb_flush_log_at_trx_commit = 0 */
13016 	log_buffer_flush_to_disk();
13017 
13018 	ut_ad(!srv_read_only_mode);
13019 
13020 	error = info.create_table_update_dict();
13021 
13022 func_exit:
13023 	if (own_trx) {
13024 		trx->free();
13025 	}
13026 
13027 	/* Tell the InnoDB server that there might be work for
13028 	utility threads: */
13029 
13030 	srv_active_wake_master_thread();
13031 
13032 	DBUG_RETURN(error);
13033 }
13034 
13035 /** Create a new table to an InnoDB database.
13036 @param[in]	name		Table name, format: "db/table_name".
13037 @param[in]	form		Table format; columns and index information.
13038 @param[in]	create_info	Create info (including create statement string).
13039 @return	0 if success else error number. */
13040 int
create(const char * name,TABLE * form,HA_CREATE_INFO * create_info)13041 ha_innobase::create(
13042 	const char*	name,
13043 	TABLE*		form,
13044 	HA_CREATE_INFO*	create_info)
13045 {
13046 	return create(name, form, create_info, srv_file_per_table);
13047 }
13048 
13049 /*****************************************************************//**
13050 Discards or imports an InnoDB tablespace.
13051 @return 0 == success, -1 == error */
13052 
13053 int
discard_or_import_tablespace(my_bool discard)13054 ha_innobase::discard_or_import_tablespace(
13055 /*======================================*/
13056 	my_bool		discard)	/*!< in: TRUE if discard, else import */
13057 {
13058 
13059 	DBUG_ENTER("ha_innobase::discard_or_import_tablespace");
13060 
13061 	ut_a(m_prebuilt->trx != NULL);
13062 	ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
13063 	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
13064 
13065 	if (high_level_read_only) {
13066 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
13067 	}
13068 
13069 	if (m_prebuilt->table->is_temporary()) {
13070 		ib_senderrf(
13071 			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
13072 			ER_CANNOT_DISCARD_TEMPORARY_TABLE);
13073 
13074 		DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
13075 	}
13076 
13077 	if (m_prebuilt->table->space == fil_system.sys_space) {
13078 		ib_senderrf(
13079 			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
13080 			ER_TABLE_IN_SYSTEM_TABLESPACE,
13081 			m_prebuilt->table->name.m_name);
13082 
13083 		DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
13084 	}
13085 
13086 	trx_start_if_not_started(m_prebuilt->trx, true);
13087 
13088 	/* Obtain an exclusive lock on the table. */
13089 	dberr_t	err = row_mysql_lock_table(
13090 		m_prebuilt->trx, m_prebuilt->table, LOCK_X,
13091 		discard ? "setting table lock for DISCARD TABLESPACE"
13092 			: "setting table lock for IMPORT TABLESPACE");
13093 
13094 	if (err != DB_SUCCESS) {
13095 		/* unable to lock the table: do nothing */
13096 	} else if (discard) {
13097 
13098 		/* Discarding an already discarded tablespace should be an
13099 		idempotent operation. Also, if the .ibd file is missing the
13100 		user may want to set the DISCARD flag in order to IMPORT
13101 		a new tablespace. */
13102 
13103 		if (!m_prebuilt->table->is_readable()) {
13104 			ib_senderrf(
13105 				m_prebuilt->trx->mysql_thd,
13106 				IB_LOG_LEVEL_WARN, ER_TABLESPACE_MISSING,
13107 				m_prebuilt->table->name.m_name);
13108 		}
13109 
13110 		err = row_discard_tablespace_for_mysql(
13111 			m_prebuilt->table->name.m_name, m_prebuilt->trx);
13112 
13113 	} else if (m_prebuilt->table->is_readable()) {
13114 		/* Commit the transaction in order to
13115 		release the table lock. */
13116 		trx_commit_for_mysql(m_prebuilt->trx);
13117 
13118 		ib::error() << "Unable to import tablespace "
13119 			<< m_prebuilt->table->name << " because it already"
13120 			" exists.  Please DISCARD the tablespace"
13121 			" before IMPORT.";
13122 		ib_senderrf(
13123 			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
13124 			ER_TABLESPACE_EXISTS, m_prebuilt->table->name.m_name);
13125 
13126 		DBUG_RETURN(HA_ERR_TABLE_EXIST);
13127 	} else {
13128 		err = row_import_for_mysql(m_prebuilt->table, m_prebuilt);
13129 
13130 		if (err == DB_SUCCESS) {
13131 
13132 			info(HA_STATUS_TIME
13133 			     | HA_STATUS_CONST
13134 			     | HA_STATUS_VARIABLE
13135 			     | HA_STATUS_AUTO);
13136 
13137 			fil_crypt_set_encrypt_tables(srv_encrypt_tables);
13138 		}
13139 	}
13140 
13141 	/* Commit the transaction in order to release the table lock. */
13142 	trx_commit_for_mysql(m_prebuilt->trx);
13143 
13144 	if (discard || err != DB_SUCCESS) {
13145 		DBUG_RETURN(convert_error_code_to_mysql(
13146 				    err, m_prebuilt->table->flags, NULL));
13147 	}
13148 
13149 	/* Evict and reload the table definition in order to invoke
13150 	btr_cur_instant_init(). */
13151 	table_id_t id = m_prebuilt->table->id;
13152 	ut_ad(id);
13153 	mutex_enter(&dict_sys->mutex);
13154 	dict_table_close(m_prebuilt->table, TRUE, FALSE);
13155 	dict_table_remove_from_cache(m_prebuilt->table);
13156 	m_prebuilt->table = dict_table_open_on_id(id, TRUE,
13157 						  DICT_TABLE_OP_NORMAL);
13158 	mutex_exit(&dict_sys->mutex);
13159 	if (!m_prebuilt->table) {
13160 		err = DB_TABLE_NOT_FOUND;
13161 	} else {
13162 		if (const Field* ai = table->found_next_number_field) {
13163 			initialize_auto_increment(m_prebuilt->table, ai);
13164 		}
13165 		dict_stats_init(m_prebuilt->table);
13166 	}
13167 
13168 	if (dict_stats_is_persistent_enabled(m_prebuilt->table)) {
13169 		dberr_t		ret;
13170 
13171 		/* Adjust the persistent statistics. */
13172 		ret = dict_stats_update(m_prebuilt->table,
13173 					DICT_STATS_RECALC_PERSISTENT);
13174 
13175 		if (ret != DB_SUCCESS) {
13176 			push_warning_printf(
13177 				ha_thd(),
13178 				Sql_condition::WARN_LEVEL_WARN,
13179 				ER_ALTER_INFO,
13180 				"Error updating stats for table '%s'"
13181 				" after table rebuild: %s",
13182 				m_prebuilt->table->name.m_name,
13183 				ut_strerr(ret));
13184 		}
13185 	}
13186 
13187 	DBUG_RETURN(0);
13188 }
13189 
13190 /**
13191 Drops a table from an InnoDB database. Before calling this function,
13192 MySQL calls innobase_commit to commit the transaction of the current user.
13193 Then the current user cannot have locks set on the table. Drop table
13194 operation inside InnoDB will remove all locks any user has on the table
13195 inside InnoDB.
13196 @param[in]	name	table name
13197 @param[in]	sqlcom	SQLCOM_DROP_DB, SQLCOM_TRUNCATE, ...
13198 @return error number */
delete_table(const char * name,enum_sql_command sqlcom)13199 inline int ha_innobase::delete_table(const char* name, enum_sql_command sqlcom)
13200 {
13201 	dberr_t	err;
13202 	THD*	thd = ha_thd();
13203 	char	norm_name[FN_REFLEN];
13204 
13205 	DBUG_ENTER("ha_innobase::delete_table");
13206 
13207 	DBUG_EXECUTE_IF(
13208 		"test_normalize_table_name_low",
13209 		test_normalize_table_name_low();
13210 	);
13211 	DBUG_EXECUTE_IF(
13212 		"test_ut_format_name",
13213 		test_ut_format_name();
13214 	);
13215 
13216 	/* Strangely, MySQL passes the table name without the '.frm'
13217 	extension, in contrast to ::create */
13218 	normalize_table_name(norm_name, name);
13219 
13220 	if (high_level_read_only) {
13221 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
13222 	}
13223 
13224 	trx_t*	parent_trx = check_trx_exists(thd);
13225 
13226 	/* Remove the to-be-dropped table from the list of modified tables
13227 	by parent_trx. Otherwise we may end up with an orphaned pointer to
13228 	the table object from parent_trx::mod_tables. This could happen in:
13229 	SET AUTOCOMMIT=0;
13230 	CREATE TABLE t (PRIMARY KEY (a)) ENGINE=INNODB SELECT 1 AS a UNION
13231 	ALL SELECT 1 AS a; */
13232 	trx_mod_tables_t::const_iterator	iter;
13233 
13234 	for (iter = parent_trx->mod_tables.begin();
13235 	     iter != parent_trx->mod_tables.end();
13236 	     ++iter) {
13237 
13238 		dict_table_t*	table_to_drop = iter->first;
13239 
13240 		if (strcmp(norm_name, table_to_drop->name.m_name) == 0) {
13241 			parent_trx->mod_tables.erase(table_to_drop);
13242 			break;
13243 		}
13244 	}
13245 
13246 	trx_t*	trx = innobase_trx_allocate(thd);
13247 
13248 	ulint	name_len = strlen(name);
13249 
13250 	ut_a(name_len < 1000);
13251 
13252 	trx->will_lock = true;
13253 
13254 	/* Drop the table in InnoDB */
13255 
13256 	err = row_drop_table_for_mysql(norm_name, trx, sqlcom);
13257 
13258 	if (err == DB_TABLE_NOT_FOUND
13259 	    && innobase_get_lower_case_table_names() == 1) {
13260 		char*	is_part = is_partition(norm_name);
13261 
13262 		if (is_part) {
13263 			char	par_case_name[FN_REFLEN];
13264 
13265 #ifndef __WIN__
13266 			/* Check for the table using lower
13267 			case name, including the partition
13268 			separator "P" */
13269 			strcpy(par_case_name, norm_name);
13270 			innobase_casedn_str(par_case_name);
13271 #else
13272 			/* On Windows platfrom, check
13273 			whether there exists table name in
13274 			system table whose name is
13275 			not being normalized to lower case */
13276 			normalize_table_name_c_low(
13277 				par_case_name, name, FALSE);
13278 #endif
13279 			err = row_drop_table_for_mysql(
13280 				par_case_name, trx, sqlcom);
13281 		}
13282 	}
13283 
13284 	if (err == DB_TABLE_NOT_FOUND) {
13285 		/* Test to drop all tables which matches db/tablename + '#'.
13286 		Only partitions can have '#' as non-first character in
13287 		the table name!
13288 
13289 		Temporary table names always start with '#', partitions are
13290 		the only 'tables' that can have '#' after the first character
13291 		and table name must have length > 0. User tables cannot have
13292 		'#' since it would be translated to @0023. Therefor this should
13293 		only match partitions. */
13294 		uint	len = (uint) strlen(norm_name);
13295 		ulint	num_partitions;
13296 		ut_a(len < FN_REFLEN);
13297 		norm_name[len] = '#';
13298 		norm_name[len + 1] = 0;
13299 		err = row_drop_database_for_mysql(norm_name, trx,
13300 			&num_partitions);
13301 		norm_name[len] = 0;
13302 		table_name_t tbl_name(norm_name);
13303 		if (num_partitions == 0 && !tbl_name.is_temporary()) {
13304 			ib::error() << "Table " << tbl_name <<
13305 				" does not exist in the InnoDB"
13306 				" internal data dictionary though MariaDB is"
13307 				" trying to drop it. Have you copied the .frm"
13308 				" file of the table to the MariaDB database"
13309 				" directory from another database? "
13310 				<< TROUBLESHOOTING_MSG;
13311 		}
13312 		if (num_partitions == 0) {
13313 			err = DB_TABLE_NOT_FOUND;
13314 		}
13315 	}
13316 
13317 	if (err == DB_TABLE_NOT_FOUND
13318 	    && innobase_get_lower_case_table_names() == 1) {
13319 		char*	is_part = is_partition(norm_name);
13320 
13321 		if (is_part != NULL) {
13322 			char	par_case_name[FN_REFLEN];
13323 
13324 #ifndef _WIN32
13325 			/* Check for the table using lower
13326 			case name, including the partition
13327 			separator "P" */
13328 			strcpy(par_case_name, norm_name);
13329 			innobase_casedn_str(par_case_name);
13330 #else
13331 			/* On Windows platfrom, check
13332 			whether there exists table name in
13333 			system table whose name is
13334 			not being normalized to lower case */
13335 			create_table_info_t::normalize_table_name_low(
13336 				par_case_name, name, FALSE);
13337 #endif /* _WIN32 */
13338 			err = row_drop_table_for_mysql(
13339 				par_case_name, trx, sqlcom, true);
13340 		}
13341 	}
13342 
13343 	ut_ad(!srv_read_only_mode);
13344 	/* Flush the log to reduce probability that the .frm files and
13345 	the InnoDB data dictionary get out-of-sync if the user runs
13346 	with innodb_flush_log_at_trx_commit = 0 */
13347 
13348 	log_buffer_flush_to_disk();
13349 
13350 	innobase_commit_low(trx);
13351 
13352 	trx->free();
13353 
13354 	DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL));
13355 }
13356 
13357 /** Drop an InnoDB table.
13358 @param[in]	name	table name
13359 @return error number */
delete_table(const char * name)13360 int ha_innobase::delete_table(const char* name)
13361 {
13362 	enum_sql_command sqlcom = enum_sql_command(thd_sql_command(ha_thd()));
13363 	/* SQLCOM_TRUNCATE should be passed via ha_innobase::truncate() only.
13364 
13365 	On client disconnect, when dropping temporary tables, the
13366 	previous sqlcom would not be overwritten.  In such a case, we
13367 	will have thd_kill_level() != NOT_KILLED, !m_prebuilt can
13368 	hold, and sqlcom could be anything, including TRUNCATE.
13369 
13370 	The sqlcom only matters for persistent tables; no persistent
13371 	metadata or FOREIGN KEY metadata is kept for temporary
13372 	tables. Therefore, we relax the assertion. If there is a bug
13373 	that slips through this assertion due to !m_prebuilt, the
13374 	worst impact should be that on DROP TABLE of a persistent
13375 	table, FOREIGN KEY constraints will be ignored and their
13376 	metadata will not be removed. */
13377 	DBUG_ASSERT(sqlcom != SQLCOM_TRUNCATE
13378 		    || (thd_kill_level(ha_thd()) != THD_IS_NOT_KILLED
13379 			&& (!m_prebuilt
13380 			    || m_prebuilt->table->is_temporary())));
13381 	return delete_table(name, sqlcom);
13382 }
13383 
13384 /** Remove all tables in the named database inside InnoDB.
13385 @param[in]	hton	handlerton from InnoDB
13386 @param[in]	path	Database path; Inside InnoDB the name of the last
13387 directory in the path is used as the database name.
13388 For example, in 'mysql/data/test' the database name is 'test'. */
13389 
13390 static
13391 void
innobase_drop_database(handlerton * hton,char * path)13392 innobase_drop_database(
13393 	handlerton*	hton,
13394 	char*		path)
13395 {
13396 	char*	namebuf;
13397 
13398 	/* Get the transaction associated with the current thd, or create one
13399 	if not yet created */
13400 
13401 	DBUG_ASSERT(hton == innodb_hton_ptr);
13402 
13403 	if (high_level_read_only) {
13404 		return;
13405 	}
13406 
13407 	THD*	thd = current_thd;
13408 
13409 	ulint	len = 0;
13410 	char*	ptr = strend(path) - 2;
13411 
13412 	while (ptr >= path && *ptr != '\\' && *ptr != '/') {
13413 		ptr--;
13414 		len++;
13415 	}
13416 
13417 	ptr++;
13418 	namebuf = (char*) my_malloc(/*PSI_INSTRUMENT_ME,*/ (uint) len + 2, MYF(0));
13419 
13420 	memcpy(namebuf, ptr, len);
13421 	namebuf[len] = '/';
13422 	namebuf[len + 1] = '\0';
13423 
13424 #ifdef	_WIN32
13425 	innobase_casedn_str(namebuf);
13426 #endif /* _WIN32 */
13427 
13428 	trx_t*	trx = innobase_trx_allocate(thd);
13429 	trx->will_lock = true;
13430 
13431 	ulint	dummy;
13432 
13433 	row_drop_database_for_mysql(namebuf, trx, &dummy);
13434 
13435 	my_free(namebuf);
13436 
13437 	/* Flush the log to reduce probability that the .frm files and
13438 	the InnoDB data dictionary get out-of-sync if the user runs
13439 	with innodb_flush_log_at_trx_commit = 0 */
13440 
13441 	log_buffer_flush_to_disk();
13442 
13443 	innobase_commit_low(trx);
13444 
13445 	trx->free();
13446 }
13447 
13448 /** Rename an InnoDB table.
13449 @param[in,out]	trx	InnoDB data dictionary transaction
13450 @param[in]	from	old table name
13451 @param[in]	to	new table name
13452 @param[in]	commit	whether to commit trx (and to enforce FOREIGN KEY)
13453 @return DB_SUCCESS or error code */
innobase_rename_table(trx_t * trx,const char * from,const char * to,bool commit)13454 inline dberr_t innobase_rename_table(trx_t *trx, const char *from,
13455                                      const char *to, bool commit)
13456 {
13457 	dberr_t	error;
13458 	char	norm_to[FN_REFLEN];
13459 	char	norm_from[FN_REFLEN];
13460 
13461 	DBUG_ENTER("innobase_rename_table");
13462 	DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX
13463 		    || trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE);
13464 
13465 	ut_ad(!srv_read_only_mode);
13466 
13467 	normalize_table_name(norm_to, to);
13468 	normalize_table_name(norm_from, from);
13469 
13470 	DEBUG_SYNC_C("innodb_rename_table_ready");
13471 
13472 	trx_start_if_not_started(trx, true);
13473 	ut_ad(trx->will_lock);
13474 
13475 	if (commit) {
13476 		/* Serialize data dictionary operations with dictionary mutex:
13477 		no deadlocks can occur then in these operations. */
13478 		row_mysql_lock_data_dictionary(trx);
13479 	}
13480 
13481 	dict_table_t*   table = dict_table_open_on_name(
13482 		norm_from, TRUE, FALSE, DICT_ERR_IGNORE_FK_NOKEY);
13483 
13484 	/* Since DICT_BG_YIELD has sleep for 250 milliseconds,
13485 	Convert lock_wait_timeout unit from second to 250 milliseconds */
13486 	long int lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd) * 4;
13487 	if (table != NULL) {
13488 		if (commit) {
13489 			dict_stats_wait_bg_to_stop_using_table(table, trx);
13490 		}
13491 		for (dict_index_t* index = dict_table_get_first_index(table);
13492 		     index != NULL;
13493 		     index = dict_table_get_next_index(index)) {
13494 
13495 			if (index->type & DICT_FTS) {
13496 				/* Found */
13497 				while (index->index_fts_syncing
13498 					&& !trx_is_interrupted(trx)
13499 					&& (lock_wait_timeout--) > 0) {
13500 					DICT_BG_YIELD(trx);
13501 				}
13502 			}
13503 		}
13504 		if (!commit) {
13505 			dict_table_close(table, TRUE, FALSE);
13506 		}
13507 	}
13508 
13509 	/* FTS sync is in progress. We shall timeout this operation */
13510 	if (lock_wait_timeout < 0) {
13511 		error = DB_LOCK_WAIT_TIMEOUT;
13512 		goto func_exit;
13513 	}
13514 
13515 	error = row_rename_table_for_mysql(norm_from, norm_to, trx, commit,
13516 					   commit);
13517 
13518 	if (error != DB_SUCCESS) {
13519 		if (error == DB_TABLE_NOT_FOUND
13520 		    && innobase_get_lower_case_table_names() == 1) {
13521 			char*	is_part = is_partition(norm_from);
13522 
13523 			if (is_part) {
13524 				char	par_case_name[FN_REFLEN];
13525 #ifndef _WIN32
13526 				/* Check for the table using lower
13527 				case name, including the partition
13528 				separator "P" */
13529 				strcpy(par_case_name, norm_from);
13530 				innobase_casedn_str(par_case_name);
13531 #else
13532 				/* On Windows platfrom, check
13533 				whether there exists table name in
13534 				system table whose name is
13535 				not being normalized to lower case */
13536 				create_table_info_t::normalize_table_name_low(
13537 					par_case_name, from, FALSE);
13538 #endif /* _WIN32 */
13539 				trx_start_if_not_started(trx, true);
13540 				error = row_rename_table_for_mysql(
13541 					par_case_name, norm_to, trx,
13542 					true, false);
13543 			}
13544 		}
13545 
13546 		if (error == DB_SUCCESS) {
13547 #ifndef _WIN32
13548 			sql_print_warning("Rename partition table %s"
13549 					  " succeeds after converting to lower"
13550 					  " case. The table may have"
13551 					  " been moved from a case"
13552 					  " in-sensitive file system.\n",
13553 					  norm_from);
13554 #else
13555 			sql_print_warning("Rename partition table %s"
13556 					  " succeeds after skipping the step to"
13557 					  " lower case the table name."
13558 					  " The table may have been"
13559 					  " moved from a case sensitive"
13560 					  " file system.\n",
13561 					  norm_from);
13562 #endif /* _WIN32 */
13563 		}
13564 	}
13565 
13566 func_exit:
13567 	if (commit) {
13568 		if (table) {
13569 			table->stats_bg_flag &= ~BG_STAT_SHOULD_QUIT;
13570 			dict_table_close(table, TRUE, FALSE);
13571 		}
13572 		row_mysql_unlock_data_dictionary(trx);
13573 	}
13574 
13575 	/* Flush the log to reduce probability that the .frm
13576 	files and the InnoDB data dictionary get out-of-sync
13577 	if the user runs with innodb_flush_log_at_trx_commit = 0 */
13578 
13579 	log_buffer_flush_to_disk();
13580 
13581 	DBUG_RETURN(error);
13582 }
13583 
13584 /** TRUNCATE TABLE
13585 @return	error code
13586 @retval	0	on success */
truncate()13587 int ha_innobase::truncate()
13588 {
13589 	DBUG_ENTER("ha_innobase::truncate");
13590 
13591 	if (high_level_read_only) {
13592 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
13593 	}
13594 
13595 	update_thd();
13596 
13597 	HA_CREATE_INFO	info;
13598 	mem_heap_t*	heap = mem_heap_create(1000);
13599 	dict_table_t*	ib_table = m_prebuilt->table;
13600 	const time_t	update_time = ib_table->update_time;
13601 	const ulint	stored_lock = m_prebuilt->stored_select_lock_type;
13602 	info.init();
13603 	update_create_info_from_table(&info, table);
13604 
13605 	if (ib_table->is_temporary()) {
13606 		info.options|= HA_LEX_CREATE_TMP_TABLE;
13607 	} else {
13608 		dict_get_and_save_data_dir_path(ib_table, false);
13609 	}
13610 
13611 	char* data_file_name = ib_table->data_dir_path;
13612 
13613 	if (data_file_name) {
13614 		info.data_file_name = data_file_name
13615 			= mem_heap_strdup(heap, data_file_name);
13616 	}
13617 
13618 	const char* temp_name = dict_mem_create_temporary_tablename(
13619 		heap, ib_table->name.m_name, ib_table->id);
13620 	const char* name = mem_heap_strdup(heap, ib_table->name.m_name);
13621 	trx_t*	trx = innobase_trx_allocate(m_user_thd);
13622 	trx->will_lock = true;
13623 	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
13624 	row_mysql_lock_data_dictionary(trx);
13625 	dict_stats_wait_bg_to_stop_using_table(ib_table, trx);
13626 
13627 	int err = convert_error_code_to_mysql(
13628 		innobase_rename_table(trx, ib_table->name.m_name, temp_name,
13629 				      false),
13630 		ib_table->flags, m_user_thd);
13631 	if (err) {
13632 		trx_rollback_for_mysql(trx);
13633 		row_mysql_unlock_data_dictionary(trx);
13634 	} else {
13635 		switch (dict_tf_get_rec_format(ib_table->flags)) {
13636 		case REC_FORMAT_REDUNDANT:
13637 			info.row_type = ROW_TYPE_REDUNDANT;
13638 			break;
13639 		case REC_FORMAT_COMPACT:
13640 			info.row_type = ROW_TYPE_COMPACT;
13641 			break;
13642 		case REC_FORMAT_COMPRESSED:
13643 			info.row_type = ROW_TYPE_COMPRESSED;
13644 			break;
13645 		case REC_FORMAT_DYNAMIC:
13646 			info.row_type = ROW_TYPE_DYNAMIC;
13647 			break;
13648 		}
13649 
13650 		err = create(name, table, &info,
13651 			     ib_table->is_temporary()
13652 			     || dict_table_is_file_per_table(ib_table), trx);
13653 	}
13654 
13655 	trx->free();
13656 
13657 	if (!err) {
13658 		/* Reopen the newly created table, and drop the
13659 		original table that was renamed to temp_name. */
13660 
13661 		row_prebuilt_t* prebuilt = m_prebuilt;
13662 		uchar* upd_buf = m_upd_buf;
13663 		ulint upd_buf_size = m_upd_buf_size;
13664 		/* Mimic ha_innobase::close(). */
13665 		m_prebuilt = NULL;
13666 		m_upd_buf = NULL;
13667 		m_upd_buf_size = 0;
13668 		err = open(name, 0, 0);
13669 		if (!err) {
13670 			m_prebuilt->stored_select_lock_type = stored_lock;
13671 			m_prebuilt->table->update_time = update_time;
13672 			row_prebuilt_free(prebuilt, FALSE);
13673 			delete_table(temp_name, SQLCOM_TRUNCATE);
13674 			my_free(upd_buf);
13675 		} else {
13676 			/* Revert to the old table before truncation. */
13677 			m_prebuilt = prebuilt;
13678 			m_upd_buf = upd_buf;
13679 			m_upd_buf_size = upd_buf_size;
13680 		}
13681 	}
13682 
13683 	mem_heap_free(heap);
13684 	DBUG_RETURN(err);
13685 }
13686 
13687 /*********************************************************************//**
13688 Renames an InnoDB table.
13689 @return 0 or error code */
13690 
13691 int
rename_table(const char * from,const char * to)13692 ha_innobase::rename_table(
13693 /*======================*/
13694 	const char*	from,	/*!< in: old name of the table */
13695 	const char*	to)	/*!< in: new name of the table */
13696 {
13697 	THD*	thd = ha_thd();
13698 
13699 	DBUG_ENTER("ha_innobase::rename_table");
13700 
13701 	if (high_level_read_only) {
13702 		ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
13703 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
13704 	}
13705 
13706 	trx_t*	trx = innobase_trx_allocate(thd);
13707 	trx->will_lock = true;
13708 	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
13709 
13710 	dberr_t	error = innobase_rename_table(trx, from, to, true);
13711 
13712 	DEBUG_SYNC(thd, "after_innobase_rename_table");
13713 
13714 	innobase_commit_low(trx);
13715 
13716 	trx->free();
13717 
13718 	if (error == DB_SUCCESS) {
13719 		char	norm_from[MAX_FULL_NAME_LEN];
13720 		char	norm_to[MAX_FULL_NAME_LEN];
13721 		char	errstr[512];
13722 		dberr_t	ret;
13723 
13724 		normalize_table_name(norm_from, from);
13725 		normalize_table_name(norm_to, to);
13726 
13727 		ret = dict_stats_rename_table(norm_from, norm_to,
13728 					      errstr, sizeof(errstr));
13729 
13730 		if (ret != DB_SUCCESS) {
13731 			ib::error() << errstr;
13732 
13733 			push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
13734 				     ER_LOCK_WAIT_TIMEOUT, errstr);
13735 		}
13736 	}
13737 
13738 	/* Add a special case to handle the Duplicated Key error
13739 	and return DB_ERROR instead.
13740 	This is to avoid a possible SIGSEGV error from mysql error
13741 	handling code. Currently, mysql handles the Duplicated Key
13742 	error by re-entering the storage layer and getting dup key
13743 	info by calling get_dup_key(). This operation requires a valid
13744 	table handle ('row_prebuilt_t' structure) which could no
13745 	longer be available in the error handling stage. The suggested
13746 	solution is to report a 'table exists' error message (since
13747 	the dup key error here is due to an existing table whose name
13748 	is the one we are trying to rename to) and return the generic
13749 	error code. */
13750 	if (error == DB_DUPLICATE_KEY) {
13751 		my_error(ER_TABLE_EXISTS_ERROR, MYF(0), to);
13752 
13753 		error = DB_ERROR;
13754 	} else if (error == DB_LOCK_WAIT_TIMEOUT) {
13755 		my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0), to);
13756 
13757 		error = DB_LOCK_WAIT;
13758 	}
13759 
13760 	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
13761 }
13762 
13763 /*********************************************************************//**
13764 Estimates the number of index records in a range.
13765 @return estimated number of rows */
13766 
13767 ha_rows
records_in_range(uint keynr,key_range * min_key,key_range * max_key)13768 ha_innobase::records_in_range(
13769 /*==========================*/
13770 	uint			keynr,		/*!< in: index number */
13771 	key_range		*min_key,	/*!< in: start key value of the
13772 						range, may also be 0 */
13773 	key_range		*max_key)	/*!< in: range end key val, may
13774 						also be 0 */
13775 {
13776 	KEY*		key;
13777 	dict_index_t*	index;
13778 	dtuple_t*	range_start;
13779 	dtuple_t*	range_end;
13780 	ha_rows		n_rows;
13781 	page_cur_mode_t	mode1;
13782 	page_cur_mode_t	mode2;
13783 	mem_heap_t*	heap;
13784 
13785 	DBUG_ENTER("records_in_range");
13786 
13787 	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
13788 
13789 	m_prebuilt->trx->op_info = "estimating records in index range";
13790 
13791 	active_index = keynr;
13792 
13793 	key = table->key_info + active_index;
13794 
13795 	index = innobase_get_index(keynr);
13796 
13797 	/* There exists possibility of not being able to find requested
13798 	index due to inconsistency between MySQL and InoDB dictionary info.
13799 	Necessary message should have been printed in innobase_get_index() */
13800 	if (!m_prebuilt->table->space) {
13801 		n_rows = HA_POS_ERROR;
13802 		goto func_exit;
13803 	}
13804 	if (!index) {
13805 		n_rows = HA_POS_ERROR;
13806 		goto func_exit;
13807 	}
13808 	if (index->is_corrupted()) {
13809 		n_rows = HA_ERR_INDEX_CORRUPT;
13810 		goto func_exit;
13811 	}
13812 	if (!row_merge_is_index_usable(m_prebuilt->trx, index)) {
13813 		n_rows = HA_ERR_TABLE_DEF_CHANGED;
13814 		goto func_exit;
13815 	}
13816 
13817 	heap = mem_heap_create(2 * (key->ext_key_parts * sizeof(dfield_t)
13818 				    + sizeof(dtuple_t)));
13819 
13820 	range_start = dtuple_create(heap, key->ext_key_parts);
13821 	dict_index_copy_types(range_start, index, key->ext_key_parts);
13822 
13823 	range_end = dtuple_create(heap, key->ext_key_parts);
13824 	dict_index_copy_types(range_end, index, key->ext_key_parts);
13825 
13826 	row_sel_convert_mysql_key_to_innobase(
13827 		range_start,
13828 		m_prebuilt->srch_key_val1,
13829 		m_prebuilt->srch_key_val_len,
13830 		index,
13831 		(byte*) (min_key ? min_key->key : (const uchar*) 0),
13832 		(ulint) (min_key ? min_key->length : 0));
13833 
13834 	DBUG_ASSERT(min_key
13835 		    ? range_start->n_fields > 0
13836 		    : range_start->n_fields == 0);
13837 
13838 	row_sel_convert_mysql_key_to_innobase(
13839 		range_end,
13840 		m_prebuilt->srch_key_val2,
13841 		m_prebuilt->srch_key_val_len,
13842 		index,
13843 		(byte*) (max_key ? max_key->key : (const uchar*) 0),
13844 		(ulint) (max_key ? max_key->length : 0));
13845 
13846 	DBUG_ASSERT(max_key
13847 		    ? range_end->n_fields > 0
13848 		    : range_end->n_fields == 0);
13849 
13850 	mode1 = convert_search_mode_to_innobase(
13851 		min_key ? min_key->flag : HA_READ_KEY_EXACT);
13852 
13853 	mode2 = convert_search_mode_to_innobase(
13854 		max_key ? max_key->flag : HA_READ_KEY_EXACT);
13855 
13856 	if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) {
13857 
13858 		if (dict_index_is_spatial(index)) {
13859 			/*Only min_key used in spatial index. */
13860 			n_rows = rtr_estimate_n_rows_in_range(
13861 				index, range_start, mode1);
13862 		} else {
13863 			n_rows = btr_estimate_n_rows_in_range(
13864 				index, range_start, mode1, range_end, mode2);
13865 		}
13866 	} else {
13867 
13868 		n_rows = HA_POS_ERROR;
13869 	}
13870 
13871 	mem_heap_free(heap);
13872 
13873 	DBUG_EXECUTE_IF(
13874 		"print_btr_estimate_n_rows_in_range_return_value",
13875 		push_warning_printf(
13876 			ha_thd(), Sql_condition::WARN_LEVEL_WARN,
13877 			ER_NO_DEFAULT,
13878 			"btr_estimate_n_rows_in_range(): %lld",
13879                         (longlong) n_rows);
13880 	);
13881 
13882 func_exit:
13883 
13884 	m_prebuilt->trx->op_info = (char*)"";
13885 
13886 	/* The MySQL optimizer seems to believe an estimate of 0 rows is
13887 	always accurate and may return the result 'Empty set' based on that.
13888 	The accuracy is not guaranteed, and even if it were, for a locking
13889 	read we should anyway perform the search to set the next-key lock.
13890 	Add 1 to the value to make sure MySQL does not make the assumption! */
13891 
13892 	if (n_rows == 0) {
13893 		n_rows = 1;
13894 	}
13895 
13896 	DBUG_RETURN((ha_rows) n_rows);
13897 }
13898 
13899 /*********************************************************************//**
13900 Gives an UPPER BOUND to the number of rows in a table. This is used in
13901 filesort.cc.
13902 @return upper bound of rows */
13903 
13904 ha_rows
estimate_rows_upper_bound()13905 ha_innobase::estimate_rows_upper_bound()
13906 /*====================================*/
13907 {
13908 	const dict_index_t*	index;
13909 	ulonglong		estimate;
13910 	ulonglong		local_data_file_length;
13911 
13912 	DBUG_ENTER("estimate_rows_upper_bound");
13913 
13914 	/* We do not know if MySQL can call this function before calling
13915 	external_lock(). To be safe, update the thd of the current table
13916 	handle. */
13917 
13918 	update_thd(ha_thd());
13919 
13920 	m_prebuilt->trx->op_info = "calculating upper bound for table rows";
13921 
13922 	index = dict_table_get_first_index(m_prebuilt->table);
13923 
13924 	ulint	stat_n_leaf_pages = index->stat_n_leaf_pages;
13925 
13926 	ut_a(stat_n_leaf_pages > 0);
13927 
13928 	local_data_file_length = ulonglong(stat_n_leaf_pages)
13929 		<< srv_page_size_shift;
13930 
13931 	/* Calculate a minimum length for a clustered index record and from
13932 	that an upper bound for the number of rows. Since we only calculate
13933 	new statistics in row0mysql.cc when a table has grown by a threshold
13934 	factor, we must add a safety factor 2 in front of the formula below. */
13935 
13936 	estimate = 2 * local_data_file_length
13937 		/ dict_index_calc_min_rec_len(index);
13938 
13939 	m_prebuilt->trx->op_info = "";
13940 
13941         /* Set num_rows less than MERGEBUFF to simulate the case where we do
13942         not have enough space to merge the externally sorted file blocks. */
13943         DBUG_EXECUTE_IF("set_num_rows_lt_MERGEBUFF",
13944                         estimate = 2;
13945                         DBUG_SET("-d,set_num_rows_lt_MERGEBUFF");
13946                        );
13947 
13948 	DBUG_RETURN((ha_rows) estimate);
13949 }
13950 
13951 /*********************************************************************//**
13952 How many seeks it will take to read through the table. This is to be
13953 comparable to the number returned by records_in_range so that we can
13954 decide if we should scan the table or use keys.
13955 @return estimated time measured in disk seeks */
13956 
13957 double
scan_time()13958 ha_innobase::scan_time()
13959 /*====================*/
13960 {
13961 	/* Since MySQL seems to favor table scans too much over index
13962 	searches, we pretend that a sequential read takes the same time
13963 	as a random disk read, that is, we do not divide the following
13964 	by 10, which would be physically realistic. */
13965 
13966 	/* The locking below is disabled for performance reasons. Without
13967 	it we could end up returning uninitialized value to the caller,
13968 	which in the worst case could make some query plan go bogus or
13969 	issue a Valgrind warning. */
13970 	if (m_prebuilt == NULL) {
13971 		/* In case of derived table, Optimizer will try to fetch stat
13972 		for table even before table is create or open. In such
13973 		cases return default value of 1.
13974 		TODO: This will be further improved to return some approximate
13975 		estimate but that would also needs pre-population of stats
13976 		structure. As of now approach is in sync with MyISAM. */
13977 		return(ulonglong2double(stats.data_file_length) / IO_SIZE + 2);
13978 	}
13979 
13980 	ulint	stat_clustered_index_size;
13981 
13982 	ut_a(m_prebuilt->table->stat_initialized);
13983 
13984 	stat_clustered_index_size =
13985 		m_prebuilt->table->stat_clustered_index_size;
13986 
13987 	return((double) stat_clustered_index_size);
13988 }
13989 
13990 /******************************************************************//**
13991 Calculate the time it takes to read a set of ranges through an index
13992 This enables us to optimise reads for clustered indexes.
13993 @return estimated time measured in disk seeks */
13994 
13995 double
read_time(uint index,uint ranges,ha_rows rows)13996 ha_innobase::read_time(
13997 /*===================*/
13998 	uint	index,	/*!< in: key number */
13999 	uint	ranges,	/*!< in: how many ranges */
14000 	ha_rows rows)	/*!< in: estimated number of rows in the ranges */
14001 {
14002 	ha_rows total_rows;
14003 
14004 	if (index != table->s->primary_key) {
14005 		/* Not clustered */
14006 		return(handler::read_time(index, ranges, rows));
14007 	}
14008 
14009 	/* Assume that the read time is proportional to the scan time for all
14010 	rows + at most one seek per range. */
14011 
14012 	double	time_for_scan = scan_time();
14013 
14014 	if ((total_rows = estimate_rows_upper_bound()) < rows) {
14015 
14016 		return(time_for_scan);
14017 	}
14018 
14019 	return(ranges + (double) rows / (double) total_rows * time_for_scan);
14020 }
14021 
14022 /** Update the system variable with the given value of the InnoDB
14023 buffer pool size.
14024 @param[in]	buf_pool_size	given value of buffer pool size.*/
14025 void
innodb_set_buf_pool_size(ulonglong buf_pool_size)14026 innodb_set_buf_pool_size(ulonglong buf_pool_size)
14027 {
14028 	innobase_buffer_pool_size = buf_pool_size;
14029 }
14030 
14031 /*********************************************************************//**
14032 Calculates the key number used inside MySQL for an Innobase index.
14033 @return the key number used inside MySQL */
14034 static
14035 unsigned
innobase_get_mysql_key_number_for_index(const TABLE * table,dict_table_t * ib_table,const dict_index_t * index)14036 innobase_get_mysql_key_number_for_index(
14037 /*====================================*/
14038 	const TABLE*		table,	/*!< in: table in MySQL data
14039 					dictionary */
14040 	dict_table_t*		ib_table,/*!< in: table in InnoDB data
14041 					dictionary */
14042 	const dict_index_t*	index)	/*!< in: index */
14043 {
14044 	const dict_index_t*	ind;
14045 	unsigned int		i;
14046 
14047 	/* If index does not belong to the table object of share structure
14048 	(ib_table comes from the share structure) search the index->table
14049 	object instead */
14050 	if (index->table != ib_table) {
14051 		i = 0;
14052 		ind = dict_table_get_first_index(index->table);
14053 
14054 		while (index != ind) {
14055 			ind = dict_table_get_next_index(ind);
14056 			i++;
14057 		}
14058 
14059 		if (dict_index_is_auto_gen_clust(index)) {
14060 			ut_a(i > 0);
14061 			i--;
14062 		}
14063 
14064 		return(i);
14065 	}
14066 
14067 	/* Directly find matching index with information from mysql TABLE
14068 	structure and InnoDB dict_index_t list */
14069 	for (i = 0; i < table->s->keys; i++) {
14070 		ind = dict_table_get_index_on_name(
14071 			ib_table, table->key_info[i].name.str);
14072 
14073 		if (index == ind) {
14074 			return(i);
14075 		}
14076 	}
14077 
14078 	/* Loop through each index of the table and lock them */
14079 	for (ind = dict_table_get_first_index(ib_table);
14080 	     ind != NULL;
14081 	     ind = dict_table_get_next_index(ind)) {
14082 		if (index == ind) {
14083 			/* Temp index is internal to InnoDB, that is
14084 			not present in the MySQL index list, so no
14085 			need to print such mismatch warning. */
14086 			if (index->is_committed()) {
14087 				sql_print_warning(
14088 					"Found index %s in InnoDB index list"
14089 					" but not its MariaDB index number."
14090 					" It could be an InnoDB internal"
14091 					" index.",
14092 					index->name());
14093 			}
14094 			return(~0U);
14095 		}
14096 	}
14097 
14098 	ut_error;
14099 
14100 	return(~0U);
14101 }
14102 
14103 /*********************************************************************//**
14104 Calculate Record Per Key value. Need to exclude the NULL value if
14105 innodb_stats_method is set to "nulls_ignored"
14106 @return estimated record per key value */
14107 rec_per_key_t
innodb_rec_per_key(dict_index_t * index,ulint i,ha_rows records)14108 innodb_rec_per_key(
14109 /*===============*/
14110 	dict_index_t*	index,		/*!< in: dict_index_t structure */
14111 	ulint		i,		/*!< in: the column we are
14112 					calculating rec per key */
14113 	ha_rows		records)	/*!< in: estimated total records */
14114 {
14115 	rec_per_key_t	rec_per_key;
14116 	ib_uint64_t	n_diff;
14117 
14118 	ut_a(index->table->stat_initialized);
14119 
14120 	ut_ad(i < dict_index_get_n_unique(index));
14121 	ut_ad(!dict_index_is_spatial(index));
14122 
14123 	if (records == 0) {
14124 		/* "Records per key" is meaningless for empty tables.
14125 		Return 1.0 because that is most convenient to the Optimizer. */
14126 		return(1.0);
14127 	}
14128 
14129 	n_diff = index->stat_n_diff_key_vals[i];
14130 
14131 	if (n_diff == 0) {
14132 
14133 		rec_per_key = static_cast<rec_per_key_t>(records);
14134 	} else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) {
14135 		ib_uint64_t	n_null;
14136 		ib_uint64_t	n_non_null;
14137 
14138 		n_non_null = index->stat_n_non_null_key_vals[i];
14139 
14140 		/* In theory, index->stat_n_non_null_key_vals[i]
14141 		should always be less than the number of records.
14142 		Since this is statistics value, the value could
14143 		have slight discrepancy. But we will make sure
14144 		the number of null values is not a negative number. */
14145 		if (records < n_non_null) {
14146 			n_null = 0;
14147 		} else {
14148 			n_null = records - n_non_null;
14149 		}
14150 
14151 		/* If the number of NULL values is the same as or
14152 		larger than that of the distinct values, we could
14153 		consider that the table consists mostly of NULL value.
14154 		Set rec_per_key to 1. */
14155 		if (n_diff <= n_null) {
14156 			rec_per_key = 1.0;
14157 		} else {
14158 			/* Need to exclude rows with NULL values from
14159 			rec_per_key calculation */
14160 			rec_per_key
14161 				= static_cast<rec_per_key_t>(records - n_null)
14162 				/ (n_diff - n_null);
14163 		}
14164 	} else {
14165 		DEBUG_SYNC_C("after_checking_for_0");
14166 		rec_per_key = static_cast<rec_per_key_t>(records) / n_diff;
14167 	}
14168 
14169 	if (rec_per_key < 1.0) {
14170 		/* Values below 1.0 are meaningless and must be due to the
14171 		stats being imprecise. */
14172 		rec_per_key = 1.0;
14173 	}
14174 
14175 	return(rec_per_key);
14176 }
14177 
14178 /** Calculate how many KiB of new data we will be able to insert to the
14179 tablespace without running out of space. Start with a space object that has
14180 been acquired by the caller who holds it for the calculation,
14181 @param[in]	space		tablespace object from fil_space_acquire()
14182 @return available space in KiB */
14183 static uintmax_t
fsp_get_available_space_in_free_extents(const fil_space_t & space)14184 fsp_get_available_space_in_free_extents(const fil_space_t& space)
14185 {
14186 	ulint	size_in_header = space.size_in_header;
14187 	if (size_in_header < FSP_EXTENT_SIZE) {
14188 		return 0;		/* TODO: count free frag pages and
14189 					return a value based on that */
14190 	}
14191 
14192 	/* Below we play safe when counting free extents above the free limit:
14193 	some of them will contain extent descriptor pages, and therefore
14194 	will not be free extents */
14195 	ut_ad(size_in_header >= space.free_limit);
14196 	ulint	n_free_up =
14197 		(size_in_header - space.free_limit) / FSP_EXTENT_SIZE;
14198 
14199 	const ulint size = page_size_t(space.flags).physical();
14200 	if (n_free_up > 0) {
14201 		n_free_up--;
14202 		n_free_up -= n_free_up / (size / FSP_EXTENT_SIZE);
14203 	}
14204 
14205 	/* We reserve 1 extent + 0.5 % of the space size to undo logs
14206 	and 1 extent + 0.5 % to cleaning operations; NOTE: this source
14207 	code is duplicated in the function above! */
14208 
14209 	ulint	reserve = 2 + ((size_in_header / FSP_EXTENT_SIZE) * 2) / 200;
14210 	ulint	n_free = space.free_len + n_free_up;
14211 
14212 	if (reserve > n_free) {
14213 		return(0);
14214 	}
14215 
14216 	return(static_cast<uintmax_t>(n_free - reserve)
14217 	       * FSP_EXTENT_SIZE * (size / 1024));
14218 }
14219 
14220 /*********************************************************************//**
14221 Returns statistics information of the table to the MySQL interpreter,
14222 in various fields of the handle object.
14223 @return HA_ERR_* error code or 0 */
14224 
14225 int
info_low(uint flag,bool is_analyze)14226 ha_innobase::info_low(
14227 /*==================*/
14228 	uint	flag,	/*!< in: what information is requested */
14229 	bool	is_analyze)
14230 {
14231 	dict_table_t*	ib_table;
14232 	ib_uint64_t	n_rows;
14233 	char		path[FN_REFLEN];
14234 	os_file_stat_t	stat_info;
14235 
14236 	DBUG_ENTER("info");
14237 
14238 	DEBUG_SYNC_C("ha_innobase_info_low");
14239 
14240 	ut_ad(!mutex_own(&dict_sys->mutex));
14241 
14242 	/* If we are forcing recovery at a high level, we will suppress
14243 	statistics calculation on tables, because that may crash the
14244 	server if an index is badly corrupted. */
14245 
14246 	/* We do not know if MySQL can call this function before calling
14247 	external_lock(). To be safe, update the thd of the current table
14248 	handle. */
14249 
14250 	update_thd(ha_thd());
14251 
14252 	m_prebuilt->trx->op_info = "returning various info to MariaDB";
14253 
14254 	ib_table = m_prebuilt->table;
14255 	DBUG_ASSERT(ib_table->get_ref_count() > 0);
14256 
14257 	if (!ib_table->is_readable()) {
14258 		ib_table->stat_initialized = true;
14259 	}
14260 
14261 	if (flag & HA_STATUS_TIME) {
14262 		if (is_analyze || innobase_stats_on_metadata) {
14263 
14264 			dict_stats_upd_option_t	opt;
14265 			dberr_t			ret;
14266 
14267 			m_prebuilt->trx->op_info = "updating table statistics";
14268 
14269 			if (dict_stats_is_persistent_enabled(ib_table)) {
14270 
14271 				if (is_analyze) {
14272 					row_mysql_lock_data_dictionary(
14273 						m_prebuilt->trx);
14274 					dict_stats_recalc_pool_del(ib_table);
14275 					dict_stats_wait_bg_to_stop_using_table(
14276 						ib_table, m_prebuilt->trx);
14277 					row_mysql_unlock_data_dictionary(
14278 						m_prebuilt->trx);
14279 					opt = DICT_STATS_RECALC_PERSISTENT;
14280 				} else {
14281 					/* This is e.g. 'SHOW INDEXES', fetch
14282 					the persistent stats from disk. */
14283 					opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
14284 				}
14285 			} else {
14286 				opt = DICT_STATS_RECALC_TRANSIENT;
14287 			}
14288 
14289 			ret = dict_stats_update(ib_table, opt);
14290 
14291 			if (opt == DICT_STATS_RECALC_PERSISTENT) {
14292 				mutex_enter(&dict_sys->mutex);
14293 				ib_table->stats_bg_flag
14294 					&= byte(~BG_STAT_SHOULD_QUIT);
14295 				mutex_exit(&dict_sys->mutex);
14296 			}
14297 
14298 			if (ret != DB_SUCCESS) {
14299 				m_prebuilt->trx->op_info = "";
14300 				DBUG_RETURN(HA_ERR_GENERIC);
14301 			}
14302 
14303 			m_prebuilt->trx->op_info =
14304 				"returning various info to MariaDB";
14305 		}
14306 
14307 
14308 		stats.update_time = (ulong) ib_table->update_time;
14309 	}
14310 
14311 	DBUG_EXECUTE_IF("dict_sys_mutex_avoid", goto func_exit;);
14312 
14313 	dict_stats_init(ib_table);
14314 
14315 	if (flag & HA_STATUS_VARIABLE) {
14316 
14317 		ulint	stat_clustered_index_size;
14318 		ulint	stat_sum_of_other_index_sizes;
14319 
14320 		mutex_enter(&dict_sys->mutex);
14321 
14322 		ut_a(ib_table->stat_initialized);
14323 
14324 		n_rows = ib_table->stat_n_rows;
14325 
14326 		stat_clustered_index_size
14327 			= ib_table->stat_clustered_index_size;
14328 
14329 		stat_sum_of_other_index_sizes
14330 			= ib_table->stat_sum_of_other_index_sizes;
14331 
14332 		mutex_exit(&dict_sys->mutex);
14333 
14334 		/*
14335 		The MySQL optimizer seems to assume in a left join that n_rows
14336 		is an accurate estimate if it is zero. Of course, it is not,
14337 		since we do not have any locks on the rows yet at this phase.
14338 		Since SHOW TABLE STATUS seems to call this function with the
14339 		HA_STATUS_TIME flag set, while the left join optimizer does not
14340 		set that flag, we add one to a zero value if the flag is not
14341 		set. That way SHOW TABLE STATUS will show the best estimate,
14342 		while the optimizer never sees the table empty. */
14343 
14344 		if (n_rows == 0 && !(flag & (HA_STATUS_TIME | HA_STATUS_OPEN))) {
14345 			n_rows++;
14346 		}
14347 
14348 		/* Fix bug#40386: Not flushing query cache after truncate.
14349 		n_rows can not be 0 unless the table is empty, set to 1
14350 		instead. The original problem of bug#29507 is actually
14351 		fixed in the server code. */
14352 		if (thd_sql_command(m_user_thd) == SQLCOM_TRUNCATE) {
14353 
14354 			n_rows = 1;
14355 
14356 			/* We need to reset the m_prebuilt value too, otherwise
14357 			checks for values greater than the last value written
14358 			to the table will fail and the autoinc counter will
14359 			not be updated. This will force write_row() into
14360 			attempting an update of the table's AUTOINC counter. */
14361 
14362 			m_prebuilt->autoinc_last_value = 0;
14363 		}
14364 
14365 		stats.records = (ha_rows) n_rows;
14366 		stats.deleted = 0;
14367 		if (fil_space_t* space = ib_table->space) {
14368 			const ulint size = page_size_t(space->flags)
14369 				.physical();
14370 			stats.data_file_length
14371 				= ulonglong(stat_clustered_index_size)
14372 				* size;
14373 			stats.index_file_length
14374 				= ulonglong(stat_sum_of_other_index_sizes)
14375 				* size;
14376 			stats.delete_length = 1024
14377 				* fsp_get_available_space_in_free_extents(
14378 					*space);
14379 		}
14380 		stats.check_time = 0;
14381 		stats.mrr_length_per_rec= (uint)ref_length +  8; // 8 = max(sizeof(void *));
14382 
14383 		if (stats.records == 0) {
14384 			stats.mean_rec_length = 0;
14385 		} else {
14386 			stats.mean_rec_length = (ulong)
14387 				(stats.data_file_length / stats.records);
14388 		}
14389 	}
14390 
14391 	if (flag & HA_STATUS_CONST) {
14392 		ulong	i;
14393 		/* Verify the number of index in InnoDB and MySQL
14394 		matches up. If m_prebuilt->clust_index_was_generated
14395 		holds, InnoDB defines GEN_CLUST_INDEX internally */
14396 		ulint	num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes)
14397 			- m_prebuilt->clust_index_was_generated;
14398 		if (table->s->keys < num_innodb_index) {
14399 			/* If there are too many indexes defined
14400 			inside InnoDB, ignore those that are being
14401 			created, because MySQL will only consider
14402 			the fully built indexes here. */
14403 
14404 			for (const dict_index_t* index
14405 				     = UT_LIST_GET_FIRST(ib_table->indexes);
14406 			     index != NULL;
14407 			     index = UT_LIST_GET_NEXT(indexes, index)) {
14408 
14409 				/* First, online index creation is
14410 				completed inside InnoDB, and then
14411 				MySQL attempts to upgrade the
14412 				meta-data lock so that it can rebuild
14413 				the .frm file. If we get here in that
14414 				time frame, dict_index_is_online_ddl()
14415 				would not hold and the index would
14416 				still not be included in TABLE_SHARE. */
14417 				if (!index->is_committed()) {
14418 					num_innodb_index--;
14419 				}
14420 			}
14421 
14422 			if (table->s->keys < num_innodb_index
14423 			    && innobase_fts_check_doc_id_index(
14424 				    ib_table, NULL, NULL)
14425 			    == FTS_EXIST_DOC_ID_INDEX) {
14426 				num_innodb_index--;
14427 			}
14428 		}
14429 
14430 		if (table->s->keys != num_innodb_index) {
14431 			ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
14432 			ib_push_frm_error(m_user_thd, ib_table, table, num_innodb_index, true);
14433 		}
14434 
14435 
14436 		snprintf(path, sizeof(path), "%s/%s%s",
14437 			 mysql_data_home, table->s->normalized_path.str,
14438 			 reg_ext);
14439 
14440 		unpack_filename(path,path);
14441 
14442 		/* Note that we do not know the access time of the table,
14443 		nor the CHECK TABLE time, nor the UPDATE or INSERT time. */
14444 
14445 		if (os_file_get_status(
14446 			    path, &stat_info, false,
14447 			    srv_read_only_mode) == DB_SUCCESS) {
14448 			stats.create_time = (ulong) stat_info.ctime;
14449 		}
14450 
14451 		struct Locking {
14452 			Locking() { mutex_enter(&dict_sys->mutex); }
14453 			~Locking() { mutex_exit(&dict_sys->mutex); }
14454 		} locking;
14455 
14456 		ut_a(ib_table->stat_initialized);
14457 
14458 		for (i = 0; i < table->s->keys; i++) {
14459 			ulong	j;
14460 
14461 			dict_index_t* index = innobase_get_index(i);
14462 
14463 			if (index == NULL) {
14464 				ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
14465 				ib_push_frm_error(m_user_thd, ib_table, table, num_innodb_index, true);
14466 				break;
14467 			}
14468 
14469 			KEY*	key = &table->key_info[i];
14470 
14471 			for (j = 0; j < key->ext_key_parts; j++) {
14472 
14473 				if ((key->flags & HA_FULLTEXT)
14474 				    || (key->flags & HA_SPATIAL)) {
14475 
14476 					/* The record per key does not apply to
14477 					FTS or Spatial indexes. */
14478 				/*
14479 					key->rec_per_key[j] = 1;
14480 					key->set_records_per_key(j, 1.0);
14481 				*/
14482 					continue;
14483 				}
14484 
14485 				if (j + 1 > index->n_uniq) {
14486 					sql_print_error(
14487 						"Index %s of %s has %u columns"
14488 					        " unique inside InnoDB, but "
14489 						"MySQL is asking statistics for"
14490 					        " %lu columns. Have you mixed "
14491 						"up .frm files from different "
14492 						" installations? %s",
14493 						index->name(),
14494 						ib_table->name.m_name,
14495 						index->n_uniq, j + 1,
14496 						TROUBLESHOOTING_MSG);
14497 					break;
14498 				}
14499 
14500 				/* innodb_rec_per_key() will use
14501 				index->stat_n_diff_key_vals[] and the value we
14502 				pass index->table->stat_n_rows. Both are
14503 				calculated by ANALYZE and by the background
14504 				stats gathering thread (which kicks in when too
14505 				much of the table has been changed). In
14506 				addition table->stat_n_rows is adjusted with
14507 				each DML (e.g. ++ on row insert). Those
14508 				adjustments are not MVCC'ed and not even
14509 				reversed on rollback. So,
14510 				index->stat_n_diff_key_vals[] and
14511 				index->table->stat_n_rows could have been
14512 				calculated at different time. This is
14513 				acceptable. */
14514 
14515 				ulong	rec_per_key_int = static_cast<ulong>(
14516 					innodb_rec_per_key(index, j,
14517 							   stats.records));
14518 
14519 				/* Since MySQL seems to favor table scans
14520 				too much over index searches, we pretend
14521 				index selectivity is 2 times better than
14522 				our estimate: */
14523 
14524 				rec_per_key_int = rec_per_key_int / 2;
14525 
14526 				if (rec_per_key_int == 0) {
14527 					rec_per_key_int = 1;
14528 				}
14529 
14530 				key->rec_per_key[j] = rec_per_key_int;
14531 			}
14532 		}
14533 	}
14534 
14535 	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
14536 
14537 		goto func_exit;
14538 
14539 	} else if (flag & HA_STATUS_ERRKEY) {
14540 		const dict_index_t*	err_index;
14541 
14542 		ut_a(m_prebuilt->trx);
14543 		ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
14544 
14545 		err_index = trx_get_error_info(m_prebuilt->trx);
14546 
14547 		if (err_index) {
14548 			errkey = innobase_get_mysql_key_number_for_index(
14549 					table, ib_table, err_index);
14550 		} else {
14551 			errkey = (unsigned int) (
14552 				(m_prebuilt->trx->error_key_num
14553 				 == ULINT_UNDEFINED)
14554 					? ~0U
14555 					: m_prebuilt->trx->error_key_num);
14556 		}
14557 	}
14558 
14559 	if ((flag & HA_STATUS_AUTO) && table->found_next_number_field) {
14560 		stats.auto_increment_value = innobase_peek_autoinc();
14561 	}
14562 
14563 func_exit:
14564 	m_prebuilt->trx->op_info = (char*)"";
14565 
14566 	DBUG_RETURN(0);
14567 }
14568 
14569 /*********************************************************************//**
14570 Returns statistics information of the table to the MySQL interpreter,
14571 in various fields of the handle object.
14572 @return HA_ERR_* error code or 0 */
14573 
14574 int
info(uint flag)14575 ha_innobase::info(
14576 /*==============*/
14577 	uint	flag)	/*!< in: what information is requested */
14578 {
14579 	return(info_low(flag, false /* not ANALYZE */));
14580 }
14581 
14582 /*
14583 Updates index cardinalities of the table, based on random dives into
14584 each index tree. This does NOT calculate exact statistics on the table.
14585 @return HA_ADMIN_* error code or HA_ADMIN_OK */
14586 
14587 int
analyze(THD *,HA_CHECK_OPT *)14588 ha_innobase::analyze(THD*, HA_CHECK_OPT*)
14589 {
14590 	/* Simply call info_low() with all the flags
14591 	and request recalculation of the statistics */
14592 	int	ret = info_low(
14593 		HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE,
14594 		true /* this is ANALYZE */);
14595 
14596 	if (ret != 0) {
14597 		return(HA_ADMIN_FAILED);
14598 	}
14599 
14600 	return(HA_ADMIN_OK);
14601 }
14602 
14603 /*****************************************************************//**
14604 Defragment table.
14605 @return	error number */
defragment_table(const char * name)14606 inline int ha_innobase::defragment_table(const char *name)
14607 {
14608 	char		norm_name[FN_REFLEN];
14609 	dict_table_t*	table = NULL;
14610 	dict_index_t*	index = NULL;
14611 	int		ret = 0;
14612 	dberr_t		err = DB_SUCCESS;
14613 
14614 	normalize_table_name(norm_name, name);
14615 
14616 	table = dict_table_open_on_name(norm_name, FALSE,
14617 		FALSE, DICT_ERR_IGNORE_FK_NOKEY);
14618 
14619 	for (index = dict_table_get_first_index(table); index;
14620 	     index = dict_table_get_next_index(index)) {
14621 
14622 		if (index->is_corrupted()) {
14623 			continue;
14624 		}
14625 
14626 		if (dict_index_is_spatial(index)) {
14627 			/* Do not try to defragment spatial indexes,
14628 			because doing it properly would require
14629 			appropriate logic around the SSN (split
14630 			sequence number). */
14631 			continue;
14632 		}
14633 
14634 		if (index->page == FIL_NULL) {
14635 			/* Do not defragment auxiliary tables related
14636 			to FULLTEXT INDEX. */
14637 			ut_ad(index->type & DICT_FTS);
14638 			continue;
14639 		}
14640 
14641 		if (btr_defragment_find_index(index)) {
14642 			// We borrow this error code. When the same index is
14643 			// already in the defragmentation queue, issue another
14644 			// defragmentation only introduces overhead. We return
14645 			// an error here to let the user know this is not
14646 			// necessary. Note that this will fail a query that's
14647 			// trying to defragment a full table if one of the
14648 			// indicies in that table is already in defragmentation.
14649 			// We choose this behavior so user is aware of this
14650 			// rather than silently defragment other indicies of
14651 			// that table.
14652 			ret = ER_SP_ALREADY_EXISTS;
14653 			break;
14654 		}
14655 
14656 		os_event_t event = btr_defragment_add_index(index, &err);
14657 
14658 		if (err != DB_SUCCESS) {
14659 			push_warning_printf(
14660 				current_thd,
14661 				Sql_condition::WARN_LEVEL_WARN,
14662 				ER_NO_SUCH_TABLE,
14663 				"Table %s is encrypted but encryption service or"
14664 				" used key_id is not available. "
14665 				" Can't continue checking table.",
14666 				index->table->name.m_name);
14667 
14668 			ret = convert_error_code_to_mysql(err, 0, current_thd);
14669 			break;
14670 		}
14671 
14672 		if (event) {
14673 			while(os_event_wait_time(event, 1000000)) {
14674 				if (thd_killed(current_thd)) {
14675 					btr_defragment_remove_index(index);
14676 					ret = ER_QUERY_INTERRUPTED;
14677 					break;
14678 				}
14679 			}
14680 			os_event_destroy(event);
14681 		}
14682 
14683 		if (ret) {
14684 			break;
14685 		}
14686 	}
14687 
14688 	dict_table_close(table, FALSE, FALSE);
14689 	return ret;
14690 }
14691 
14692 /**********************************************************************//**
14693 This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
14694 the table in MySQL. */
14695 
14696 int
optimize(THD * thd,HA_CHECK_OPT *)14697 ha_innobase::optimize(
14698 /*==================*/
14699 	THD*		thd,		/*!< in: connection thread handle */
14700 	HA_CHECK_OPT*)
14701 {
14702 
14703 	/* FTS-FIXME: Since MySQL doesn't support engine-specific commands,
14704 	we have to hijack some existing command in order to be able to test
14705 	the new admin commands added in InnoDB's FTS support. For now, we
14706 	use MySQL's OPTIMIZE command, normally mapped to ALTER TABLE in
14707 	InnoDB (so it recreates the table anew), and map it to OPTIMIZE.
14708 
14709 	This works OK otherwise, but MySQL locks the entire table during
14710 	calls to OPTIMIZE, which is undesirable. */
14711 	bool try_alter = true;
14712 
14713 	if (!m_prebuilt->table->is_temporary() && srv_defragment) {
14714 		int err = defragment_table(m_prebuilt->table->name.m_name);
14715 
14716 		if (err == 0) {
14717 			try_alter = false;
14718 		} else {
14719 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
14720 					    uint(err),
14721 				"InnoDB: Cannot defragment table %s: returned error code %d\n",
14722 				m_prebuilt->table->name.m_name, err);
14723 
14724 			if(err == ER_SP_ALREADY_EXISTS) {
14725 				try_alter = false;
14726 			}
14727 		}
14728 	}
14729 
14730 	if (innodb_optimize_fulltext_only) {
14731 		if (m_prebuilt->table->fts && m_prebuilt->table->fts->cache
14732 		    && m_prebuilt->table->space) {
14733 			fts_sync_table(m_prebuilt->table);
14734 			fts_optimize_table(m_prebuilt->table);
14735 		}
14736 		try_alter = false;
14737 	}
14738 
14739 	return try_alter ? HA_ADMIN_TRY_ALTER : HA_ADMIN_OK;
14740 }
14741 
14742 /*******************************************************************//**
14743 Tries to check that an InnoDB table is not corrupted. If corruption is
14744 noticed, prints to stderr information about it. In case of corruption
14745 may also assert a failure and crash the server.
14746 @return HA_ADMIN_CORRUPT or HA_ADMIN_OK */
14747 
14748 int
check(THD * thd,HA_CHECK_OPT * check_opt)14749 ha_innobase::check(
14750 /*===============*/
14751 	THD*		thd,		/*!< in: user thread handle */
14752 	HA_CHECK_OPT*	check_opt)	/*!< in: check options */
14753 {
14754 	dict_index_t*	index;
14755 	ulint		n_rows;
14756 	ulint		n_rows_in_table	= ULINT_UNDEFINED;
14757 	bool		is_ok		= true;
14758 	ulint		old_isolation_level;
14759 	dberr_t		ret;
14760 
14761 	DBUG_ENTER("ha_innobase::check");
14762 	DBUG_ASSERT(thd == ha_thd());
14763 	ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
14764 	ut_a(m_prebuilt->trx == thd_to_trx(thd));
14765 
14766 	if (m_prebuilt->mysql_template == NULL) {
14767 		/* Build the template; we will use a dummy template
14768 		in index scans done in checking */
14769 
14770 		build_template(true);
14771 	}
14772 
14773 	if (!m_prebuilt->table->space) {
14774 
14775 		ib_senderrf(
14776 			thd,
14777 			IB_LOG_LEVEL_ERROR,
14778 			ER_TABLESPACE_DISCARDED,
14779 			table->s->table_name.str);
14780 
14781 		DBUG_RETURN(HA_ADMIN_CORRUPT);
14782 
14783 	} else if (!m_prebuilt->table->is_readable() &&
14784 		   !m_prebuilt->table->space) {
14785 
14786 		ib_senderrf(
14787 			thd, IB_LOG_LEVEL_ERROR,
14788 			ER_TABLESPACE_MISSING,
14789 			table->s->table_name.str);
14790 
14791 		DBUG_RETURN(HA_ADMIN_CORRUPT);
14792 	}
14793 
14794 	m_prebuilt->trx->op_info = "checking table";
14795 
14796 	if (m_prebuilt->table->corrupted) {
14797 		/* If some previous operation has marked the table as
14798 		corrupted in memory, and has not propagated such to
14799 		clustered index, we will do so here */
14800 		index = dict_table_get_first_index(m_prebuilt->table);
14801 
14802 		if (!index->is_corrupted()) {
14803 			dict_set_corrupted(
14804 				index, m_prebuilt->trx, "CHECK TABLE");
14805 		}
14806 
14807 		push_warning_printf(m_user_thd,
14808 				    Sql_condition::WARN_LEVEL_WARN,
14809 				    HA_ERR_INDEX_CORRUPT,
14810 				    "InnoDB: Index %s is marked as"
14811 				    " corrupted",
14812 				    index->name());
14813 
14814 		/* Now that the table is already marked as corrupted,
14815 		there is no need to check any index of this table */
14816 		m_prebuilt->trx->op_info = "";
14817 
14818 		DBUG_RETURN(HA_ADMIN_CORRUPT);
14819 	}
14820 
14821 	old_isolation_level = m_prebuilt->trx->isolation_level;
14822 
14823 	/* We must run the index record counts at an isolation level
14824 	>= READ COMMITTED, because a dirty read can see a wrong number
14825 	of records in some index; to play safe, we normally use
14826 	REPEATABLE READ here */
14827 	m_prebuilt->trx->isolation_level = high_level_read_only
14828 		? TRX_ISO_READ_UNCOMMITTED
14829 		: TRX_ISO_REPEATABLE_READ;
14830 
14831 	ut_ad(!m_prebuilt->table->corrupted);
14832 
14833 	for (index = dict_table_get_first_index(m_prebuilt->table);
14834 	     index != NULL;
14835 	     index = dict_table_get_next_index(index)) {
14836 		/* If this is an index being created or dropped, skip */
14837 		if (!index->is_committed()) {
14838 			continue;
14839 		}
14840 
14841 		if (!(check_opt->flags & T_QUICK)
14842 		    && !index->is_corrupted()) {
14843 			/* Enlarge the fatal lock wait timeout during
14844 			CHECK TABLE. */
14845 			my_atomic_addlong(
14846 				&srv_fatal_semaphore_wait_threshold,
14847 				SRV_SEMAPHORE_WAIT_EXTENSION);
14848 
14849 			dberr_t err = btr_validate_index(
14850 					index, m_prebuilt->trx, false);
14851 
14852 			/* Restore the fatal lock wait timeout after
14853 			CHECK TABLE. */
14854 			my_atomic_addlong(
14855 				&srv_fatal_semaphore_wait_threshold,
14856 				-SRV_SEMAPHORE_WAIT_EXTENSION);
14857 
14858 			if (err != DB_SUCCESS) {
14859 				is_ok = false;
14860 
14861 				if (err == DB_DECRYPTION_FAILED) {
14862 					push_warning_printf(
14863 						thd,
14864 						Sql_condition::WARN_LEVEL_WARN,
14865 						ER_NO_SUCH_TABLE,
14866 						"Table %s is encrypted but encryption service or"
14867 						" used key_id is not available. "
14868 						" Can't continue checking table.",
14869 						index->table->name.m_name);
14870 				} else {
14871 					push_warning_printf(
14872 						thd,
14873 						Sql_condition::WARN_LEVEL_WARN,
14874 						ER_NOT_KEYFILE,
14875 						"InnoDB: The B-tree of"
14876 						" index %s is corrupted.",
14877 						index->name());
14878 				}
14879 
14880 				continue;
14881 			}
14882 		}
14883 
14884 		/* Instead of invoking change_active_index(), set up
14885 		a dummy template for non-locking reads, disabling
14886 		access to the clustered index. */
14887 		m_prebuilt->index = index;
14888 
14889 		m_prebuilt->index_usable = row_merge_is_index_usable(
14890 			m_prebuilt->trx, m_prebuilt->index);
14891 
14892 		DBUG_EXECUTE_IF(
14893 			"dict_set_index_corrupted",
14894 			if (!index->is_primary()) {
14895 				m_prebuilt->index_usable = FALSE;
14896 				// row_mysql_lock_data_dictionary(m_prebuilt->trx);
14897 				dict_set_corrupted(index, m_prebuilt->trx, "dict_set_index_corrupted");
14898 				// row_mysql_unlock_data_dictionary(m_prebuilt->trx);
14899 			});
14900 
14901 		if (UNIV_UNLIKELY(!m_prebuilt->index_usable)) {
14902 			if (index->is_corrupted()) {
14903 				push_warning_printf(
14904 					m_user_thd,
14905 					Sql_condition::WARN_LEVEL_WARN,
14906 					HA_ERR_INDEX_CORRUPT,
14907 					"InnoDB: Index %s is marked as"
14908 					" corrupted",
14909 					index->name());
14910 				is_ok = false;
14911 			} else {
14912 				push_warning_printf(
14913 					m_user_thd,
14914 					Sql_condition::WARN_LEVEL_WARN,
14915 					HA_ERR_TABLE_DEF_CHANGED,
14916 					"InnoDB: Insufficient history for"
14917 					" index %s",
14918 					index->name());
14919 			}
14920 			continue;
14921 		}
14922 
14923 		m_prebuilt->sql_stat_start = TRUE;
14924 		m_prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
14925 		m_prebuilt->n_template = 0;
14926 		m_prebuilt->need_to_access_clustered = FALSE;
14927 
14928 		dtuple_set_n_fields(m_prebuilt->search_tuple, 0);
14929 
14930 		m_prebuilt->select_lock_type = LOCK_NONE;
14931 
14932 		/* Scan this index. */
14933 		if (dict_index_is_spatial(index)) {
14934 			ret = row_count_rtree_recs(m_prebuilt, &n_rows);
14935 		} else {
14936 			ret = row_scan_index_for_mysql(
14937 				m_prebuilt, index, &n_rows);
14938 		}
14939 
14940 		DBUG_EXECUTE_IF(
14941 			"dict_set_index_corrupted",
14942 			if (!index->is_primary()) {
14943 				ret = DB_CORRUPTION;
14944 			});
14945 
14946 		if (ret == DB_INTERRUPTED || thd_killed(m_user_thd)) {
14947 			/* Do not report error since this could happen
14948 			during shutdown */
14949 			break;
14950 		}
14951 		if (ret != DB_SUCCESS) {
14952 			/* Assume some kind of corruption. */
14953 			push_warning_printf(
14954 				thd, Sql_condition::WARN_LEVEL_WARN,
14955 				ER_NOT_KEYFILE,
14956 				"InnoDB: The B-tree of"
14957 				" index %s is corrupted.",
14958 				index->name());
14959 			is_ok = false;
14960 			dict_set_corrupted(
14961 				index, m_prebuilt->trx, "CHECK TABLE-check index");
14962 		}
14963 
14964 
14965 		if (index == dict_table_get_first_index(m_prebuilt->table)) {
14966 			n_rows_in_table = n_rows;
14967 		} else if (!(index->type & DICT_FTS)
14968 			   && (n_rows != n_rows_in_table)) {
14969 			push_warning_printf(
14970 				thd, Sql_condition::WARN_LEVEL_WARN,
14971 				ER_NOT_KEYFILE,
14972 				"InnoDB: Index '%-.200s' contains " ULINTPF
14973 				" entries, should be " ULINTPF ".",
14974 				index->name(), n_rows, n_rows_in_table);
14975 			is_ok = false;
14976 			dict_set_corrupted(
14977 				index, m_prebuilt->trx,
14978 				"CHECK TABLE; Wrong count");
14979 		}
14980 	}
14981 
14982 	/* Restore the original isolation level */
14983 	m_prebuilt->trx->isolation_level = old_isolation_level;
14984 #ifdef BTR_CUR_HASH_ADAPT
14985 # if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
14986 	/* We validate the whole adaptive hash index for all tables
14987 	at every CHECK TABLE only when QUICK flag is not present. */
14988 
14989 	if (!(check_opt->flags & T_QUICK) && !btr_search_validate()) {
14990 		push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
14991 			     ER_NOT_KEYFILE,
14992 			     "InnoDB: The adaptive hash index is corrupted.");
14993 		is_ok = false;
14994 	}
14995 # endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
14996 #endif /* BTR_CUR_HASH_ADAPT */
14997 	m_prebuilt->trx->op_info = "";
14998 
14999 	DBUG_RETURN(is_ok ? HA_ADMIN_OK : HA_ADMIN_CORRUPT);
15000 }
15001 
15002 /*******************************************************************//**
15003 Gets the foreign key create info for a table stored in InnoDB.
15004 @return own: character string in the form which can be inserted to the
15005 CREATE TABLE statement, MUST be freed with
15006 ha_innobase::free_foreign_key_create_info */
15007 
15008 char*
get_foreign_key_create_info(void)15009 ha_innobase::get_foreign_key_create_info(void)
15010 /*==========================================*/
15011 {
15012 	ut_a(m_prebuilt != NULL);
15013 
15014 	/* We do not know if MySQL can call this function before calling
15015 	external_lock(). To be safe, update the thd of the current table
15016 	handle. */
15017 
15018 	update_thd(ha_thd());
15019 
15020 	m_prebuilt->trx->op_info = "getting info on foreign keys";
15021 
15022 	/* Output the data to a temporary string */
15023 	std::string str = dict_print_info_on_foreign_keys(
15024 		TRUE, m_prebuilt->trx,
15025 		m_prebuilt->table);
15026 
15027 	m_prebuilt->trx->op_info = "";
15028 
15029 	/* Allocate buffer for the string */
15030 	char* fk_str = (char*) my_malloc(str.length() + 1, MYF(0));
15031 
15032 	/* JAN: TODO: MySQL 5.7
15033 	fk_str = reinterpret_cast<char*>(
15034 			my_malloc(PSI_INSTRUMENT_ME, str.length() + 1, MYF(0)));
15035 	*/
15036 
15037 
15038 
15039 	if (fk_str) {
15040 		memcpy(fk_str, str.c_str(), str.length());
15041 		fk_str[str.length()]='\0';
15042 	}
15043 
15044 	return(fk_str);
15045 }
15046 
15047 
15048 /***********************************************************************//**
15049 Maps a InnoDB foreign key constraint to a equivalent MySQL foreign key info.
15050 @return pointer to foreign key info */
15051 static
15052 FOREIGN_KEY_INFO*
get_foreign_key_info(THD * thd,dict_foreign_t * foreign)15053 get_foreign_key_info(
15054 /*=================*/
15055 	THD*			thd,	/*!< in: user thread handle */
15056 	dict_foreign_t*		foreign)/*!< in: foreign key constraint */
15057 {
15058 	FOREIGN_KEY_INFO	f_key_info;
15059 	FOREIGN_KEY_INFO*	pf_key_info;
15060 	uint			i = 0;
15061 	size_t			len;
15062 	char			tmp_buff[NAME_LEN+1];
15063 	char			name_buff[NAME_LEN+1];
15064 	const char*		ptr;
15065 	LEX_CSTRING*		referenced_key_name;
15066 	LEX_CSTRING*		name = NULL;
15067 
15068 	if (dict_table_t::is_temporary_name(foreign->foreign_table_name)) {
15069 		return NULL;
15070 	}
15071 
15072 	ptr = dict_remove_db_name(foreign->id);
15073 	f_key_info.foreign_id = thd_make_lex_string(
15074 		thd, 0, ptr, strlen(ptr), 1);
15075 
15076 	/* Name format: database name, '/', table name, '\0' */
15077 
15078 	/* Referenced (parent) database name */
15079 	len = dict_get_db_name_len(foreign->referenced_table_name);
15080 	ut_a(len < sizeof(tmp_buff));
15081 	ut_memcpy(tmp_buff, foreign->referenced_table_name, len);
15082 	tmp_buff[len] = 0;
15083 
15084 	len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
15085 	f_key_info.referenced_db = thd_make_lex_string(
15086 		thd, 0, name_buff, len, 1);
15087 
15088 	/* Referenced (parent) table name */
15089 	ptr = dict_remove_db_name(foreign->referenced_table_name);
15090 	len = filename_to_tablename(ptr, name_buff, sizeof(name_buff), 1);
15091 	f_key_info.referenced_table = thd_make_lex_string(
15092 		thd, 0, name_buff, len, 1);
15093 
15094 	/* Dependent (child) database name */
15095 	len = dict_get_db_name_len(foreign->foreign_table_name);
15096 	ut_a(len < sizeof(tmp_buff));
15097 	ut_memcpy(tmp_buff, foreign->foreign_table_name, len);
15098 	tmp_buff[len] = 0;
15099 
15100 	len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
15101 	f_key_info.foreign_db = thd_make_lex_string(
15102 		thd, 0, name_buff, len, 1);
15103 
15104 	/* Dependent (child) table name */
15105 	ptr = dict_remove_db_name(foreign->foreign_table_name);
15106 	len = filename_to_tablename(ptr, name_buff, sizeof(name_buff), 1);
15107 	f_key_info.foreign_table = thd_make_lex_string(
15108 		thd, 0, name_buff, len, 1);
15109 
15110 	do {
15111 		ptr = foreign->foreign_col_names[i];
15112 		name = thd_make_lex_string(thd, name, ptr,
15113 					   strlen(ptr), 1);
15114 		f_key_info.foreign_fields.push_back(name);
15115 		ptr = foreign->referenced_col_names[i];
15116 		name = thd_make_lex_string(thd, name, ptr,
15117 					   strlen(ptr), 1);
15118 		f_key_info.referenced_fields.push_back(name);
15119 	} while (++i < foreign->n_fields);
15120 
15121 	if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE) {
15122 		f_key_info.delete_method = FK_OPTION_CASCADE;
15123 	} else if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) {
15124 		f_key_info.delete_method = FK_OPTION_SET_NULL;
15125 	} else if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) {
15126 		f_key_info.delete_method = FK_OPTION_NO_ACTION;
15127 	} else {
15128 		f_key_info.delete_method = FK_OPTION_RESTRICT;
15129 	}
15130 
15131 
15132 	if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) {
15133 		f_key_info.update_method = FK_OPTION_CASCADE;
15134 	} else if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) {
15135 		f_key_info.update_method = FK_OPTION_SET_NULL;
15136 	} else if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) {
15137 		f_key_info.update_method = FK_OPTION_NO_ACTION;
15138 	} else {
15139 		f_key_info.update_method = FK_OPTION_RESTRICT;
15140 	}
15141 
15142 	/* Load referenced table to update FK referenced key name. */
15143 	if (foreign->referenced_table == NULL) {
15144 
15145 		dict_table_t*	ref_table;
15146 
15147 		ut_ad(mutex_own(&dict_sys->mutex));
15148 		ref_table = dict_table_open_on_name(
15149 			foreign->referenced_table_name_lookup,
15150 			TRUE, FALSE, DICT_ERR_IGNORE_NONE);
15151 
15152 		if (ref_table == NULL) {
15153 
15154 			if (!thd_test_options(
15155 				thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
15156 				ib::info()
15157 					<< "Foreign Key referenced table "
15158 					<< foreign->referenced_table_name
15159 					<< " not found for foreign table "
15160 					<< foreign->foreign_table_name;
15161 			}
15162 		} else {
15163 
15164 			dict_table_close(ref_table, TRUE, FALSE);
15165 		}
15166 	}
15167 
15168 	if (foreign->referenced_index
15169 	    && foreign->referenced_index->name != NULL) {
15170 		referenced_key_name = thd_make_lex_string(
15171 			thd,
15172 			f_key_info.referenced_key_name,
15173 			foreign->referenced_index->name,
15174 			strlen(foreign->referenced_index->name),
15175 			1);
15176 	} else {
15177 		referenced_key_name = NULL;
15178 	}
15179 
15180 	f_key_info.referenced_key_name = referenced_key_name;
15181 
15182 	pf_key_info = (FOREIGN_KEY_INFO*) thd_memdup(thd, &f_key_info,
15183 						      sizeof(FOREIGN_KEY_INFO));
15184 
15185 	return(pf_key_info);
15186 }
15187 
15188 /*******************************************************************//**
15189 Gets the list of foreign keys in this table.
15190 @return always 0, that is, always succeeds */
15191 
15192 int
get_foreign_key_list(THD * thd,List<FOREIGN_KEY_INFO> * f_key_list)15193 ha_innobase::get_foreign_key_list(
15194 /*==============================*/
15195 	THD*			thd,		/*!< in: user thread handle */
15196 	List<FOREIGN_KEY_INFO>*	f_key_list)	/*!< out: foreign key list */
15197 {
15198 	update_thd(ha_thd());
15199 
15200 	m_prebuilt->trx->op_info = "getting list of foreign keys";
15201 
15202 	mutex_enter(&dict_sys->mutex);
15203 
15204 	for (dict_foreign_set::iterator it
15205 		= m_prebuilt->table->foreign_set.begin();
15206 	     it != m_prebuilt->table->foreign_set.end();
15207 	     ++it) {
15208 
15209 		FOREIGN_KEY_INFO*	pf_key_info;
15210 		dict_foreign_t*		foreign = *it;
15211 
15212 		pf_key_info = get_foreign_key_info(thd, foreign);
15213 
15214 		if (pf_key_info != NULL) {
15215 			f_key_list->push_back(pf_key_info);
15216 		}
15217 	}
15218 
15219 	mutex_exit(&dict_sys->mutex);
15220 
15221 	m_prebuilt->trx->op_info = "";
15222 
15223 	return(0);
15224 }
15225 
15226 /*******************************************************************//**
15227 Gets the set of foreign keys where this table is the referenced table.
15228 @return always 0, that is, always succeeds */
15229 
15230 int
get_parent_foreign_key_list(THD * thd,List<FOREIGN_KEY_INFO> * f_key_list)15231 ha_innobase::get_parent_foreign_key_list(
15232 /*=====================================*/
15233 	THD*			thd,		/*!< in: user thread handle */
15234 	List<FOREIGN_KEY_INFO>*	f_key_list)	/*!< out: foreign key list */
15235 {
15236 	update_thd(ha_thd());
15237 
15238 	m_prebuilt->trx->op_info = "getting list of referencing foreign keys";
15239 
15240 	mutex_enter(&dict_sys->mutex);
15241 
15242 	for (dict_foreign_set::iterator it
15243 		= m_prebuilt->table->referenced_set.begin();
15244 	     it != m_prebuilt->table->referenced_set.end();
15245 	     ++it) {
15246 
15247 		FOREIGN_KEY_INFO*	pf_key_info;
15248 		dict_foreign_t*		foreign = *it;
15249 
15250 		pf_key_info = get_foreign_key_info(thd, foreign);
15251 
15252 		if (pf_key_info != NULL) {
15253 			f_key_list->push_back(pf_key_info);
15254 		}
15255 	}
15256 
15257 	mutex_exit(&dict_sys->mutex);
15258 
15259 	m_prebuilt->trx->op_info = "";
15260 
15261 	return(0);
15262 }
15263 
15264 /** Table list item structure is used to store only the table
15265 and name. It is used by get_cascade_foreign_key_table_list to store
15266 the intermediate result for fetching the table set. */
15267 struct table_list_item {
15268 	/** InnoDB table object */
15269 	const dict_table_t*	table;
15270 	/** Table name */
15271 	const char*		name;
15272 };
15273 
15274 /** Structure to compare two st_tablename objects using their
15275 db and tablename. It is used in the ordering of cascade_fk_set.
15276 It returns true if the first argument precedes the second argument
15277 and false otherwise. */
15278 struct tablename_compare {
15279 
operator ()tablename_compare15280 	bool operator()(const st_handler_tablename lhs,
15281 			const st_handler_tablename rhs) const
15282 	{
15283 		int cmp = strcmp(lhs.db, rhs.db);
15284 		if (cmp == 0) {
15285 			cmp = strcmp(lhs.tablename, rhs.tablename);
15286 		}
15287 
15288 		return(cmp < 0);
15289 	}
15290 };
15291 
15292 /** Get the table name and database name for the given table.
15293 @param[in,out]	thd		user thread handle
15294 @param[out]	f_key_info	pointer to table_name_info object
15295 @param[in]	foreign		foreign key constraint. */
15296 static
15297 void
get_table_name_info(THD * thd,st_handler_tablename * f_key_info,const dict_foreign_t * foreign)15298 get_table_name_info(
15299 	THD*			thd,
15300 	st_handler_tablename*	f_key_info,
15301 	const dict_foreign_t*	foreign)
15302 {
15303 #define FILENAME_CHARSET_MBMAXLEN 5
15304 	char	tmp_buff[NAME_CHAR_LEN * FILENAME_CHARSET_MBMAXLEN + 1];
15305 	char	name_buff[NAME_CHAR_LEN * FILENAME_CHARSET_MBMAXLEN + 1];
15306 	const char*	ptr;
15307 
15308 	size_t  len = dict_get_db_name_len(
15309 		foreign->referenced_table_name_lookup);
15310 	ut_memcpy(tmp_buff, foreign->referenced_table_name_lookup, len);
15311 	tmp_buff[len] = 0;
15312 
15313 	ut_ad(len < sizeof(tmp_buff));
15314 
15315 	len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
15316 	f_key_info->db = thd_strmake(thd, name_buff, len);
15317 
15318 	ptr = dict_remove_db_name(foreign->referenced_table_name_lookup);
15319 	len = filename_to_tablename(ptr, name_buff, sizeof(name_buff));
15320 	f_key_info->tablename = thd_strmake(thd, name_buff, len);
15321 }
15322 
15323 /** Get the list of tables ordered by the dependency on the other tables using
15324 the 'CASCADE' foreign key constraint.
15325 @param[in,out]	thd		user thread handle
15326 @param[out]	fk_table_list	set of tables name info for the
15327 				dependent table
15328 @retval 0 for success. */
15329 int
get_cascade_foreign_key_table_list(THD * thd,List<st_handler_tablename> * fk_table_list)15330 ha_innobase::get_cascade_foreign_key_table_list(
15331 	THD*				thd,
15332 	List<st_handler_tablename>*	fk_table_list)
15333 {
15334 	m_prebuilt->trx->op_info = "getting cascading foreign keys";
15335 
15336 	std::list<table_list_item, ut_allocator<table_list_item> > table_list;
15337 
15338 	typedef std::set<st_handler_tablename, tablename_compare,
15339 			 ut_allocator<st_handler_tablename> >	cascade_fk_set;
15340 
15341 	cascade_fk_set	fk_set;
15342 
15343 	mutex_enter(&dict_sys->mutex);
15344 
15345 	/* Initialize the table_list with prebuilt->table name. */
15346 	struct table_list_item	item = {m_prebuilt->table,
15347 					m_prebuilt->table->name.m_name};
15348 
15349 	table_list.push_back(item);
15350 
15351 	/* Get the parent table, grand parent table info from the
15352 	table list by depth-first traversal. */
15353 	do {
15354 		const dict_table_t*			parent_table;
15355 		dict_table_t*				parent = NULL;
15356 		std::pair<cascade_fk_set::iterator,bool>	ret;
15357 
15358 		item = table_list.back();
15359 		table_list.pop_back();
15360 		parent_table = item.table;
15361 
15362 		if (parent_table == NULL) {
15363 
15364 			ut_ad(item.name != NULL);
15365 
15366 			parent_table = parent = dict_table_open_on_name(
15367 					item.name, TRUE, FALSE,
15368 					DICT_ERR_IGNORE_NONE);
15369 
15370 			if (parent_table == NULL) {
15371 				/* foreign_key_checks is or was probably
15372 				disabled; ignore the constraint */
15373 				continue;
15374 			}
15375 		}
15376 
15377 		for (dict_foreign_set::const_iterator it =
15378 		     parent_table->foreign_set.begin();
15379 		     it != parent_table->foreign_set.end(); ++it) {
15380 
15381 			const dict_foreign_t*	foreign = *it;
15382 			st_handler_tablename	f1;
15383 
15384 			/* Skip the table if there is no
15385 			cascading operation. */
15386 			if (0 == (foreign->type
15387 				  & ~(DICT_FOREIGN_ON_DELETE_NO_ACTION
15388 				      | DICT_FOREIGN_ON_UPDATE_NO_ACTION))) {
15389 				continue;
15390 			}
15391 
15392 			if (foreign->referenced_table_name_lookup != NULL) {
15393 				get_table_name_info(thd, &f1, foreign);
15394 				ret = fk_set.insert(f1);
15395 
15396 				/* Ignore the table if it is already
15397 				in the set. */
15398 				if (!ret.second) {
15399 					continue;
15400 				}
15401 
15402 				struct table_list_item	item1 = {
15403 					foreign->referenced_table,
15404 					foreign->referenced_table_name_lookup};
15405 
15406 				table_list.push_back(item1);
15407 
15408 				st_handler_tablename*	fk_table =
15409 					(st_handler_tablename*) thd_memdup(
15410 						thd, &f1, sizeof(*fk_table));
15411 
15412 				fk_table_list->push_back(fk_table);
15413 			}
15414 		}
15415 
15416 		if (parent != NULL) {
15417 			dict_table_close(parent, true, false);
15418 		}
15419 
15420 	} while(!table_list.empty());
15421 
15422 	mutex_exit(&dict_sys->mutex);
15423 
15424 	m_prebuilt->trx->op_info = "";
15425 
15426 	return(0);
15427 }
15428 
15429 /*****************************************************************//**
15430 Checks if ALTER TABLE may change the storage engine of the table.
15431 Changing storage engines is not allowed for tables for which there
15432 are foreign key constraints (parent or child tables).
15433 @return TRUE if can switch engines */
15434 
15435 bool
can_switch_engines(void)15436 ha_innobase::can_switch_engines(void)
15437 /*=================================*/
15438 {
15439 	DBUG_ENTER("ha_innobase::can_switch_engines");
15440 
15441 	update_thd();
15442 
15443 	m_prebuilt->trx->op_info =
15444 			"determining if there are foreign key constraints";
15445 
15446 	row_mysql_freeze_data_dictionary(m_prebuilt->trx);
15447 
15448 	bool	can_switch = m_prebuilt->table->referenced_set.empty()
15449 		&& m_prebuilt->table->foreign_set.empty();
15450 
15451 	row_mysql_unfreeze_data_dictionary(m_prebuilt->trx);
15452 	m_prebuilt->trx->op_info = "";
15453 
15454 	DBUG_RETURN(can_switch);
15455 }
15456 
15457 /*******************************************************************//**
15458 Checks if a table is referenced by a foreign key. The MySQL manual states that
15459 a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
15460 delete is then allowed internally to resolve a duplicate key conflict in
15461 REPLACE, not an update.
15462 @return > 0 if referenced by a FOREIGN KEY */
15463 
15464 uint
referenced_by_foreign_key(void)15465 ha_innobase::referenced_by_foreign_key(void)
15466 /*========================================*/
15467 {
15468 	if (dict_table_is_referenced_by_foreign_key(m_prebuilt->table)) {
15469 
15470 		return(1);
15471 	}
15472 
15473 	return(0);
15474 }
15475 
15476 /*******************************************************************//**
15477 Frees the foreign key create info for a table stored in InnoDB, if it is
15478 non-NULL. */
15479 
15480 void
free_foreign_key_create_info(char * str)15481 ha_innobase::free_foreign_key_create_info(
15482 /*======================================*/
15483 	char*	str)	/*!< in, own: create info string to free */
15484 {
15485 	if (str != NULL) {
15486 		my_free(str);
15487 	}
15488 }
15489 
15490 /*******************************************************************//**
15491 Tells something additional to the handler about how to do things.
15492 @return 0 or error number */
15493 
15494 int
extra(enum ha_extra_function operation)15495 ha_innobase::extra(
15496 /*===============*/
15497 	enum ha_extra_function operation)
15498 			   /*!< in: HA_EXTRA_FLUSH or some other flag */
15499 {
15500 	check_trx_exists(ha_thd());
15501 
15502 	/* Warning: since it is not sure that MySQL calls external_lock
15503 	before calling this function, the trx field in m_prebuilt can be
15504 	obsolete! */
15505 
15506 	switch (operation) {
15507 	case HA_EXTRA_FLUSH:
15508 		if (m_prebuilt->blob_heap) {
15509 			row_mysql_prebuilt_free_blob_heap(m_prebuilt);
15510 		}
15511 		break;
15512 	case HA_EXTRA_RESET_STATE:
15513 		reset_template();
15514 		thd_to_trx(ha_thd())->duplicates = 0;
15515 		break;
15516 	case HA_EXTRA_NO_KEYREAD:
15517 		m_prebuilt->read_just_key = 0;
15518 		break;
15519 	case HA_EXTRA_KEYREAD:
15520 		m_prebuilt->read_just_key = 1;
15521 		break;
15522 	case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
15523 		m_prebuilt->keep_other_fields_on_keyread = 1;
15524 		break;
15525 
15526 		/* IMPORTANT: m_prebuilt->trx can be obsolete in
15527 		this method, because it is not sure that MySQL
15528 		calls external_lock before this method with the
15529 		parameters below.  We must not invoke update_thd()
15530 		either, because the calling threads may change.
15531 		CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */
15532 	case HA_EXTRA_INSERT_WITH_UPDATE:
15533 		thd_to_trx(ha_thd())->duplicates |= TRX_DUP_IGNORE;
15534 		break;
15535 	case HA_EXTRA_NO_IGNORE_DUP_KEY:
15536 		thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_IGNORE;
15537 		break;
15538 	case HA_EXTRA_WRITE_CAN_REPLACE:
15539 		thd_to_trx(ha_thd())->duplicates |= TRX_DUP_REPLACE;
15540 		break;
15541 	case HA_EXTRA_WRITE_CANNOT_REPLACE:
15542 		thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE;
15543 		break;
15544 	case HA_EXTRA_BEGIN_ALTER_COPY:
15545 		m_prebuilt->table->skip_alter_undo = 1;
15546 		if (m_prebuilt->table->is_temporary()
15547 		    || !m_prebuilt->table->versioned_by_id()) {
15548 			break;
15549 		}
15550 		trx_start_if_not_started(m_prebuilt->trx, true);
15551 		m_prebuilt->trx->mod_tables.insert(
15552 			trx_mod_tables_t::value_type(
15553 				const_cast<dict_table_t*>(m_prebuilt->table),
15554 				0))
15555 			.first->second.set_versioned(0);
15556 		break;
15557 	case HA_EXTRA_END_ALTER_COPY:
15558 		m_prebuilt->table->skip_alter_undo = 0;
15559 		break;
15560 	case HA_EXTRA_FAKE_START_STMT:
15561 		trx_register_for_2pc(m_prebuilt->trx);
15562 		m_prebuilt->sql_stat_start = true;
15563 		break;
15564 	default:/* Do nothing */
15565 		;
15566 	}
15567 
15568 	return(0);
15569 }
15570 
15571 /**
15572 MySQL calls this method at the end of each statement. This method
15573 exists for readability only. ha_innobase::reset() doesn't give any
15574 clue about the method. */
15575 
15576 int
end_stmt()15577 ha_innobase::end_stmt()
15578 {
15579 	if (m_prebuilt->blob_heap) {
15580 		row_mysql_prebuilt_free_blob_heap(m_prebuilt);
15581 	}
15582 
15583 	reset_template();
15584 
15585 	m_ds_mrr.dsmrr_close();
15586 
15587 	/* TODO: This should really be reset in reset_template() but for now
15588 	it's safer to do it explicitly here. */
15589 
15590 	/* This is a statement level counter. */
15591 	m_prebuilt->autoinc_last_value = 0;
15592 
15593 	return(0);
15594 }
15595 
15596 /**
15597 MySQL calls this method at the end of each statement */
15598 
15599 int
reset()15600 ha_innobase::reset()
15601 {
15602 	return(end_stmt());
15603 }
15604 
15605 /******************************************************************//**
15606 MySQL calls this function at the start of each SQL statement inside LOCK
15607 TABLES. Inside LOCK TABLES the ::external_lock method does not work to
15608 mark SQL statement borders. Note also a special case: if a temporary table
15609 is created inside LOCK TABLES, MySQL has not called external_lock() at all
15610 on that table.
15611 MySQL-5.0 also calls this before each statement in an execution of a stored
15612 procedure. To make the execution more deterministic for binlogging, MySQL-5.0
15613 locks all tables involved in a stored procedure with full explicit table
15614 locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
15615 procedure.
15616 @return 0 or error code */
15617 
15618 int
start_stmt(THD * thd,thr_lock_type lock_type)15619 ha_innobase::start_stmt(
15620 /*====================*/
15621 	THD*		thd,	/*!< in: handle to the user thread */
15622 	thr_lock_type	lock_type)
15623 {
15624 	trx_t*		trx = m_prebuilt->trx;
15625 
15626 	DBUG_ENTER("ha_innobase::start_stmt");
15627 
15628 	update_thd(thd);
15629 
15630 	ut_ad(m_prebuilt->table != NULL);
15631 
15632 	trx = m_prebuilt->trx;
15633 
15634 	innobase_srv_conc_force_exit_innodb(trx);
15635 
15636 	/* Reset the AUTOINC statement level counter for multi-row INSERTs. */
15637 	trx->n_autoinc_rows = 0;
15638 
15639 	m_prebuilt->sql_stat_start = TRUE;
15640 	m_prebuilt->hint_need_to_fetch_extra_cols = 0;
15641 	reset_template();
15642 
15643 	if (m_prebuilt->table->is_temporary()
15644 	    && m_mysql_has_locked
15645 	    && m_prebuilt->select_lock_type == LOCK_NONE) {
15646 		dberr_t error;
15647 
15648 		switch (thd_sql_command(thd)) {
15649 		case SQLCOM_INSERT:
15650 		case SQLCOM_UPDATE:
15651 		case SQLCOM_DELETE:
15652 		case SQLCOM_REPLACE:
15653 			init_table_handle_for_HANDLER();
15654 			m_prebuilt->select_lock_type = LOCK_X;
15655 			m_prebuilt->stored_select_lock_type = LOCK_X;
15656 			error = row_lock_table(m_prebuilt);
15657 
15658 			if (error != DB_SUCCESS) {
15659 				int	st = convert_error_code_to_mysql(
15660 					error, 0, thd);
15661 				DBUG_RETURN(st);
15662 			}
15663 			break;
15664 		}
15665 	}
15666 
15667 	if (!m_mysql_has_locked) {
15668 		/* This handle is for a temporary table created inside
15669 		this same LOCK TABLES; since MySQL does NOT call external_lock
15670 		in this case, we must use x-row locks inside InnoDB to be
15671 		prepared for an update of a row */
15672 
15673 		m_prebuilt->select_lock_type = LOCK_X;
15674 
15675 	} else if (trx->isolation_level != TRX_ISO_SERIALIZABLE
15676 		   && thd_sql_command(thd) == SQLCOM_SELECT
15677 		   && lock_type == TL_READ) {
15678 
15679 		/* For other than temporary tables, we obtain
15680 		no lock for consistent read (plain SELECT). */
15681 
15682 		m_prebuilt->select_lock_type = LOCK_NONE;
15683 	} else {
15684 		/* Not a consistent read: restore the
15685 		select_lock_type value. The value of
15686 		stored_select_lock_type was decided in:
15687 		1) ::store_lock(),
15688 		2) ::external_lock(),
15689 		3) ::init_table_handle_for_HANDLER(). */
15690 
15691 		ut_a(m_prebuilt->stored_select_lock_type != LOCK_NONE_UNSET);
15692 
15693 		m_prebuilt->select_lock_type =
15694 			m_prebuilt->stored_select_lock_type;
15695 	}
15696 
15697 	*trx->detailed_error = 0;
15698 
15699 	innobase_register_trx(ht, thd, trx);
15700 
15701 	if (!trx_is_started(trx)) {
15702 		trx->will_lock = true;
15703 	}
15704 
15705 	DBUG_RETURN(0);
15706 }
15707 
15708 /******************************************************************//**
15709 Maps a MySQL trx isolation level code to the InnoDB isolation level code
15710 @return InnoDB isolation level */
15711 static inline
15712 ulint
innobase_map_isolation_level(enum_tx_isolation iso)15713 innobase_map_isolation_level(
15714 /*=========================*/
15715 	enum_tx_isolation	iso)	/*!< in: MySQL isolation level code */
15716 {
15717 	if (UNIV_UNLIKELY(srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN)
15718 	    || UNIV_UNLIKELY(srv_read_only_mode)) {
15719 		return TRX_ISO_READ_UNCOMMITTED;
15720 	}
15721 	switch (iso) {
15722 	case ISO_REPEATABLE_READ:	return(TRX_ISO_REPEATABLE_READ);
15723 	case ISO_READ_COMMITTED:	return(TRX_ISO_READ_COMMITTED);
15724 	case ISO_SERIALIZABLE:		return(TRX_ISO_SERIALIZABLE);
15725 	case ISO_READ_UNCOMMITTED:	return(TRX_ISO_READ_UNCOMMITTED);
15726 	}
15727 
15728 	ut_error;
15729 
15730 	return(0);
15731 }
15732 
15733 /******************************************************************//**
15734 As MySQL will execute an external lock for every new table it uses when it
15735 starts to process an SQL statement (an exception is when MySQL calls
15736 start_stmt for the handle) we can use this function to store the pointer to
15737 the THD in the handle. We will also use this function to communicate
15738 to InnoDB that a new SQL statement has started and that we must store a
15739 savepoint to our transaction handle, so that we are able to roll back
15740 the SQL statement in case of an error.
15741 @return 0 */
15742 
15743 int
external_lock(THD * thd,int lock_type)15744 ha_innobase::external_lock(
15745 /*=======================*/
15746 	THD*	thd,		/*!< in: handle to the user thread */
15747 	int	lock_type)	/*!< in: lock type */
15748 {
15749 	DBUG_ENTER("ha_innobase::external_lock");
15750 	DBUG_PRINT("enter",("lock_type: %d", lock_type));
15751 
15752 	update_thd(thd);
15753 	trx_t* trx = m_prebuilt->trx;
15754 	ut_ad(m_prebuilt->table);
15755 
15756 	/* Statement based binlogging does not work in isolation level
15757 	READ UNCOMMITTED and READ COMMITTED since the necessary
15758 	locks cannot be taken. In this case, we print an
15759 	informative error message and return with an error.
15760 	Note: decide_logging_format would give the same error message,
15761 	except it cannot give the extra details. */
15762 
15763 	if (lock_type == F_WRLCK
15764 	    && !(table_flags() & HA_BINLOG_STMT_CAPABLE)
15765 	    && thd_binlog_format(thd) == BINLOG_FORMAT_STMT
15766 	    && thd_binlog_filter_ok(thd)
15767 	    && thd_sqlcom_can_generate_row_events(thd)) {
15768 		bool skip = false;
15769 #ifdef WITH_WSREP
15770 		skip = trx->is_wsrep()
15771 			&& wsrep_thd_exec_mode(thd) != LOCAL_STATE;
15772 #endif /* WITH_WSREP */
15773 		/* used by test case */
15774 		DBUG_EXECUTE_IF("no_innodb_binlog_errors", skip = true;);
15775 
15776 		if (!skip) {
15777 			my_error(ER_BINLOG_STMT_MODE_AND_ROW_ENGINE, MYF(0),
15778 			         " InnoDB is limited to row-logging when"
15779 			         " transaction isolation level is"
15780 			         " READ COMMITTED or READ UNCOMMITTED.");
15781 
15782 			DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE);
15783 		}
15784 	}
15785 
15786 	/* Check for UPDATEs in read-only mode. */
15787 	if (srv_read_only_mode) {
15788 		switch (thd_sql_command(thd)) {
15789 		case SQLCOM_CREATE_TABLE:
15790 			if (lock_type != F_WRLCK) {
15791 				break;
15792 			}
15793 			/* fall through */
15794 		case SQLCOM_UPDATE:
15795 		case SQLCOM_INSERT:
15796 		case SQLCOM_REPLACE:
15797 		case SQLCOM_DROP_TABLE:
15798 		case SQLCOM_ALTER_TABLE:
15799 		case SQLCOM_OPTIMIZE:
15800 		case SQLCOM_CREATE_INDEX:
15801 		case SQLCOM_DROP_INDEX:
15802 		case SQLCOM_CREATE_SEQUENCE:
15803 		case SQLCOM_DROP_SEQUENCE:
15804 		case SQLCOM_DELETE:
15805 			ib_senderrf(thd, IB_LOG_LEVEL_WARN,
15806 				    ER_READ_ONLY_MODE);
15807 			DBUG_RETURN(HA_ERR_TABLE_READONLY);
15808 		}
15809 	}
15810 
15811 	m_prebuilt->sql_stat_start = TRUE;
15812 	m_prebuilt->hint_need_to_fetch_extra_cols = 0;
15813 
15814 	reset_template();
15815 
15816 	switch (m_prebuilt->table->quiesce) {
15817 	case QUIESCE_START:
15818 		/* Check for FLUSH TABLE t WITH READ LOCK; */
15819 		if (!srv_read_only_mode
15820 		    && thd_sql_command(thd) == SQLCOM_FLUSH
15821 		    && lock_type == F_RDLCK) {
15822 
15823 			if (!m_prebuilt->table->space) {
15824 				ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
15825 					    ER_TABLESPACE_DISCARDED,
15826 					    table->s->table_name.str);
15827 
15828 				DBUG_RETURN(HA_ERR_TABLESPACE_MISSING);
15829 			}
15830 
15831 			row_quiesce_table_start(m_prebuilt->table, trx);
15832 
15833 			/* Use the transaction instance to track UNLOCK
15834 			TABLES. It can be done via START TRANSACTION; too
15835 			implicitly. */
15836 
15837 			++trx->flush_tables;
15838 		}
15839 		break;
15840 
15841 	case QUIESCE_COMPLETE:
15842 		/* Check for UNLOCK TABLES; implicit or explicit
15843 		or trx interruption. */
15844 		if (trx->flush_tables > 0
15845 		    && (lock_type == F_UNLCK || trx_is_interrupted(trx))) {
15846 
15847 			row_quiesce_table_complete(m_prebuilt->table, trx);
15848 
15849 			ut_a(trx->flush_tables > 0);
15850 			--trx->flush_tables;
15851 		}
15852 
15853 		break;
15854 
15855 	case QUIESCE_NONE:
15856 		break;
15857 	}
15858 
15859 	if (lock_type == F_WRLCK) {
15860 
15861 		/* If this is a SELECT, then it is in UPDATE TABLE ...
15862 		or SELECT ... FOR UPDATE */
15863 		m_prebuilt->select_lock_type = LOCK_X;
15864 		m_prebuilt->stored_select_lock_type = LOCK_X;
15865 	}
15866 
15867 	if (lock_type != F_UNLCK) {
15868 		/* MySQL is setting a new table lock */
15869 
15870 		*trx->detailed_error = 0;
15871 
15872 		innobase_register_trx(ht, thd, trx);
15873 
15874 		if (trx->isolation_level == TRX_ISO_SERIALIZABLE
15875 		    && m_prebuilt->select_lock_type == LOCK_NONE
15876 		    && thd_test_options(
15877 			    thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
15878 
15879 			/* To get serializable execution, we let InnoDB
15880 			conceptually add 'LOCK IN SHARE MODE' to all SELECTs
15881 			which otherwise would have been consistent reads. An
15882 			exception is consistent reads in the AUTOCOMMIT=1 mode:
15883 			we know that they are read-only transactions, and they
15884 			can be serialized also if performed as consistent
15885 			reads. */
15886 
15887 			m_prebuilt->select_lock_type = LOCK_S;
15888 			m_prebuilt->stored_select_lock_type = LOCK_S;
15889 		}
15890 
15891 		/* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
15892 		TABLES if AUTOCOMMIT=1. It does not make much sense to acquire
15893 		an InnoDB table lock if it is released immediately at the end
15894 		of LOCK TABLES, and InnoDB's table locks in that case cause
15895 		VERY easily deadlocks.
15896 
15897 		We do not set InnoDB table locks if user has not explicitly
15898 		requested a table lock. Note that thd_in_lock_tables(thd)
15899 		can hold in some cases, e.g., at the start of a stored
15900 		procedure call (SQLCOM_CALL). */
15901 
15902 		if (m_prebuilt->select_lock_type != LOCK_NONE) {
15903 
15904 			if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES
15905 			    && THDVAR(thd, table_locks)
15906 			    && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)
15907 			    && thd_in_lock_tables(thd)) {
15908 
15909 				dberr_t	error = row_lock_table(m_prebuilt);
15910 
15911 				if (error != DB_SUCCESS) {
15912 
15913 					DBUG_RETURN(
15914 						convert_error_code_to_mysql(
15915 							error, 0, thd));
15916 				}
15917 			}
15918 
15919 			trx->mysql_n_tables_locked++;
15920 		}
15921 
15922 		trx->n_mysql_tables_in_use++;
15923 		m_mysql_has_locked = true;
15924 
15925 		if (!trx_is_started(trx)
15926 		    && (m_prebuilt->select_lock_type != LOCK_NONE
15927 			|| m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
15928 
15929 			trx->will_lock = true;
15930 		}
15931 
15932 		DBUG_RETURN(0);
15933 	} else {
15934 		DEBUG_SYNC_C("ha_innobase_end_statement");
15935 	}
15936 
15937 	/* MySQL is releasing a table lock */
15938 
15939 	trx->n_mysql_tables_in_use--;
15940 	m_mysql_has_locked = false;
15941 
15942 	innobase_srv_conc_force_exit_innodb(trx);
15943 
15944 	/* If the MySQL lock count drops to zero we know that the current SQL
15945 	statement has ended */
15946 
15947 	if (trx->n_mysql_tables_in_use == 0) {
15948 
15949 		trx->mysql_n_tables_locked = 0;
15950 		m_prebuilt->used_in_HANDLER = FALSE;
15951 
15952 		if (!thd_test_options(
15953 				thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
15954 
15955 			if (trx_is_started(trx)) {
15956 
15957 				innobase_commit(ht, thd, TRUE);
15958 			}
15959 
15960 		} else if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
15961 			trx->read_view.close();
15962 		}
15963 	}
15964 
15965 	if (!trx_is_started(trx)
15966 	    && lock_type != F_UNLCK
15967 	    && (m_prebuilt->select_lock_type != LOCK_NONE
15968 		|| m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
15969 
15970 		trx->will_lock = true;
15971 	}
15972 
15973 	DBUG_RETURN(0);
15974 }
15975 
15976 /************************************************************************//**
15977 Here we export InnoDB status variables to MySQL. */
15978 static
15979 void
innodb_export_status()15980 innodb_export_status()
15981 /*==================*/
15982 {
15983 	if (srv_was_started) {
15984 		srv_export_innodb_status();
15985 	}
15986 }
15987 
15988 /************************************************************************//**
15989 Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the
15990 InnoDB Monitor to the client.
15991 @return 0 on success */
15992 static
15993 int
innodb_show_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)15994 innodb_show_status(
15995 /*===============*/
15996 	handlerton*	hton,	/*!< in: the innodb handlerton */
15997 	THD*		thd,	/*!< in: the MySQL query thread of the caller */
15998 	stat_print_fn*	stat_print)
15999 {
16000 	static const char	truncated_msg[] = "... truncated...\n";
16001 	const long		MAX_STATUS_SIZE = 1048576;
16002 	ulint			trx_list_start = ULINT_UNDEFINED;
16003 	ulint			trx_list_end = ULINT_UNDEFINED;
16004 	bool			ret_val;
16005 
16006 	DBUG_ENTER("innodb_show_status");
16007 	DBUG_ASSERT(hton == innodb_hton_ptr);
16008 
16009 	/* We don't create the temp files or associated
16010 	mutexes in read-only-mode */
16011 
16012 	if (srv_read_only_mode) {
16013 		DBUG_RETURN(0);
16014 	}
16015 
16016 	srv_wake_purge_thread_if_not_active();
16017 
16018 	trx_t*	trx = check_trx_exists(thd);
16019 
16020 	innobase_srv_conc_force_exit_innodb(trx);
16021 
16022 	/* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE
16023 	bytes of text. */
16024 
16025 	char*	str;
16026 	size_t	flen;
16027 
16028 	mutex_enter(&srv_monitor_file_mutex);
16029 	rewind(srv_monitor_file);
16030 
16031 	srv_printf_innodb_monitor(srv_monitor_file, FALSE,
16032 				  &trx_list_start, &trx_list_end);
16033 
16034 	os_file_set_eof(srv_monitor_file);
16035 
16036 	flen = size_t(ftell(srv_monitor_file));
16037 	if (ssize_t(flen) < 0) {
16038 		flen = 0;
16039 	}
16040 
16041 	size_t	usable_len;
16042 
16043 	if (flen > MAX_STATUS_SIZE) {
16044 		usable_len = MAX_STATUS_SIZE;
16045 		srv_truncated_status_writes++;
16046 	} else {
16047 		usable_len = flen;
16048 	}
16049 
16050 	/* allocate buffer for the string, and
16051 	read the contents of the temporary file */
16052 
16053 	if (!(str = (char*) my_malloc(//PSI_INSTRUMENT_ME,
16054 		      usable_len + 1, MYF(0)))) {
16055 		mutex_exit(&srv_monitor_file_mutex);
16056 		DBUG_RETURN(1);
16057 	}
16058 
16059 	rewind(srv_monitor_file);
16060 
16061 	if (flen < MAX_STATUS_SIZE) {
16062 		/* Display the entire output. */
16063 		flen = fread(str, 1, flen, srv_monitor_file);
16064 	} else if (trx_list_end < flen
16065 		   && trx_list_start < trx_list_end
16066 		   && trx_list_start + flen - trx_list_end
16067 		   < MAX_STATUS_SIZE - sizeof truncated_msg - 1) {
16068 
16069 		/* Omit the beginning of the list of active transactions. */
16070 		size_t	len = fread(str, 1, trx_list_start, srv_monitor_file);
16071 
16072 		memcpy(str + len, truncated_msg, sizeof truncated_msg - 1);
16073 		len += sizeof truncated_msg - 1;
16074 		usable_len = (MAX_STATUS_SIZE - 1) - len;
16075 		fseek(srv_monitor_file, long(flen - usable_len), SEEK_SET);
16076 		len += fread(str + len, 1, usable_len, srv_monitor_file);
16077 		flen = len;
16078 	} else {
16079 		/* Omit the end of the output. */
16080 		flen = fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file);
16081 	}
16082 
16083 	mutex_exit(&srv_monitor_file_mutex);
16084 
16085 	ret_val= stat_print(
16086 		thd, innobase_hton_name,
16087 		static_cast<uint>(strlen(innobase_hton_name)),
16088 		STRING_WITH_LEN(""), str, static_cast<uint>(flen));
16089 
16090 	my_free(str);
16091 
16092 	DBUG_RETURN(ret_val);
16093 }
16094 
16095 /** Callback for collecting mutex statistics */
16096 struct ShowStatus {
16097 
16098 	/** For tracking the mutex metrics */
16099 	struct Value {
16100 
16101 		/** Constructor
16102 		@param[in]	name		Name of the mutex
16103 		@param[in]	spins		Number of spins
16104 		@param[in]	os_waits	OS waits so far
16105 		@param[in]	calls		Number of calls to enter() */
ValueShowStatus::Value16106 		Value(const char*	name,
16107 		      ulint		spins,
16108 		      uint64_t		waits,
16109 		      uint64_t		calls)
16110 			:
16111 			m_name(name),
16112 			m_spins(spins),
16113 			m_waits(waits),
16114 			m_calls(calls)
16115 		{
16116 			/* No op */
16117 		}
16118 
16119 		/** Mutex name */
16120 		std::string		m_name;
16121 
16122 		/** Spins so far */
16123 		ulint			m_spins;
16124 
16125 		/** Waits so far */
16126 		uint64_t		m_waits;
16127 
16128 		/** Number of calls so far */
16129 		uint64_t		m_calls;
16130 	};
16131 
16132 	/** Order by m_waits, in descending order. */
16133 	struct OrderByWaits: public std::binary_function<Value, Value, bool>
16134 	{
16135 		/** @return true if rhs < lhs */
operator ()ShowStatus::OrderByWaits16136 		bool operator()(
16137 			const Value& lhs,
16138 			const Value& rhs) const
16139 			UNIV_NOTHROW
16140 		{
16141 			return(rhs.m_waits < lhs.m_waits);
16142 		}
16143 	};
16144 
16145 	typedef std::vector<Value, ut_allocator<Value> > Values;
16146 
16147 	/** Collect the individual latch counts */
16148 	struct GetCount {
16149 		typedef latch_meta_t::CounterType::Count Count;
16150 
16151 		/** Constructor
16152 		@param[in]	name		Latch name
16153 		@param[in,out]	values		Put the values here */
GetCountShowStatus::GetCount16154 		GetCount(
16155 			const char*	name,
16156 			Values*		values)
16157 			UNIV_NOTHROW
16158 			:
16159 			m_name(name),
16160 			m_values(values)
16161 		{
16162 			/* No op */
16163 		}
16164 
16165 		/** Collect the latch metrics. Ignore entries where the
16166 		spins and waits are zero.
16167 		@param[in]	count		The latch metrics */
operator ()ShowStatus::GetCount16168 		void operator()(Count* count) const UNIV_NOTHROW
16169 		{
16170 			if (count->m_spins > 0 || count->m_waits > 0) {
16171 
16172 				m_values->push_back(Value(
16173 					m_name,
16174 					count->m_spins,
16175 					count->m_waits,
16176 					count->m_calls));
16177 			}
16178 		}
16179 
16180 		/** The latch name */
16181 		const char*	m_name;
16182 
16183 		/** For collecting the active mutex stats. */
16184 		Values*		m_values;
16185 	};
16186 
16187 	/** Constructor */
ShowStatusShowStatus16188 	ShowStatus() { }
16189 
16190 	/** Callback for collecting the stats
16191 	@param[in]	latch_meta		Latch meta data
16192 	@return always returns true */
operator ()ShowStatus16193 	bool operator()(latch_meta_t& latch_meta)
16194 		UNIV_NOTHROW
16195 	{
16196 		latch_meta.get_counter()->iterate(
16197 			GetCount(latch_meta.get_name(), &m_values));
16198 
16199 		return(true);
16200 	}
16201 
16202 	/** Implements the SHOW MUTEX STATUS command, for mutexes.
16203 	The table structure is like so: Engine | Mutex Name | Status
16204 	We store the metrics  in the "Status" column as:
16205 
16206 		spins=N,waits=N,calls=N"
16207 
16208 	The user has to parse the dataunfortunately
16209 	@param[in,out]	thd		the MySQL query thread of the caller
16210 	@param[in,out]	stat_print	function for printing statistics
16211 	@return true on success. */
16212 	bool to_string(
16213 		THD*		thd,
16214 		stat_print_fn*	stat_print)
16215 		UNIV_NOTHROW;
16216 
16217 	/** For collecting the active mutex stats. */
16218 	Values		m_values;
16219 };
16220 
16221 /** Implements the SHOW MUTEX STATUS command, for mutexes.
16222 The table structure is like so: Engine | Mutex Name | Status
16223 We store the metrics  in the "Status" column as:
16224 
16225 	spins=N,waits=N,calls=N"
16226 
16227 The user has to parse the dataunfortunately
16228 @param[in,out]	thd		the MySQL query thread of the caller
16229 @param[in,out]	stat_print	function for printing statistics
16230 @return true on success. */
16231 bool
to_string(THD * thd,stat_print_fn * stat_print)16232 ShowStatus::to_string(
16233 	THD*		thd,
16234 	stat_print_fn*	stat_print)
16235 	UNIV_NOTHROW
16236 {
16237 	uint		hton_name_len = (uint) strlen(innobase_hton_name);
16238 
16239 	std::sort(m_values.begin(), m_values.end(), OrderByWaits());
16240 
16241 	Values::iterator	end = m_values.end();
16242 
16243 	for (Values::iterator it = m_values.begin(); it != end; ++it) {
16244 
16245 		int	name_len;
16246 		char	name_buf[IO_SIZE];
16247 
16248 		name_len = snprintf(
16249 			name_buf, sizeof(name_buf), "%s", it->m_name.c_str());
16250 
16251 		int	status_len;
16252 		char	status_buf[IO_SIZE];
16253 
16254 		status_len = snprintf(
16255 			status_buf, sizeof(status_buf),
16256 			"spins=%lu,waits=%lu,calls=%llu",
16257 			static_cast<ulong>(it->m_spins),
16258 			static_cast<long>(it->m_waits),
16259 			(ulonglong) it->m_calls);
16260 
16261 		if (stat_print(thd, innobase_hton_name,
16262 			       hton_name_len,
16263 			       name_buf, static_cast<uint>(name_len),
16264 			       status_buf, static_cast<uint>(status_len))) {
16265 
16266 			return(false);
16267 		}
16268 	}
16269 
16270 	return(true);
16271 }
16272 
16273 /** Implements the SHOW MUTEX STATUS command, for mutexes.
16274 @param[in,out]	hton		the innodb handlerton
16275 @param[in,out]	thd		the MySQL query thread of the caller
16276 @param[in,out]	stat_print	function for printing statistics
16277 @return 0 on success. */
16278 static
16279 int
innodb_show_mutex_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)16280 innodb_show_mutex_status(
16281 	handlerton*
16282 #ifdef DBUG_ASSERT_EXISTS
16283 	hton
16284 #endif
16285 	,
16286 	THD*		thd,
16287 	stat_print_fn*	stat_print)
16288 {
16289 	DBUG_ENTER("innodb_show_mutex_status");
16290 
16291 	ShowStatus	collector;
16292 
16293 	DBUG_ASSERT(hton == innodb_hton_ptr);
16294 
16295 	mutex_monitor.iterate(collector);
16296 
16297 	if (!collector.to_string(thd, stat_print)) {
16298 		DBUG_RETURN(1);
16299 	}
16300 
16301 	DBUG_RETURN(0);
16302 }
16303 
16304 /** Implements the SHOW MUTEX STATUS command.
16305 @param[in,out]	hton		the innodb handlerton
16306 @param[in,out]	thd		the MySQL query thread of the caller
16307 @param[in,out]	stat_print	function for printing statistics
16308 @return 0 on success. */
16309 static
16310 int
innodb_show_rwlock_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)16311 innodb_show_rwlock_status(
16312 	handlerton*
16313 #ifdef DBUG_ASSERT_EXISTS
16314 	hton
16315 #endif
16316 	,
16317 	THD*		thd,
16318 	stat_print_fn*	stat_print)
16319 {
16320 	DBUG_ENTER("innodb_show_rwlock_status");
16321 
16322 	rw_lock_t*	block_rwlock = NULL;
16323 	ulint		block_rwlock_oswait_count = 0;
16324 	uint		hton_name_len = (uint) strlen(innobase_hton_name);
16325 
16326 	DBUG_ASSERT(hton == innodb_hton_ptr);
16327 
16328 	mutex_enter(&rw_lock_list_mutex);
16329 
16330 	for (rw_lock_t* rw_lock = UT_LIST_GET_FIRST(rw_lock_list);
16331 	     rw_lock != NULL;
16332 	     rw_lock = UT_LIST_GET_NEXT(list, rw_lock)) {
16333 
16334 		if (rw_lock->count_os_wait == 0) {
16335 			continue;
16336 		}
16337 
16338 		int		buf1len;
16339 		char		buf1[IO_SIZE];
16340 
16341 		if (rw_lock->is_block_lock) {
16342 
16343 			block_rwlock = rw_lock;
16344 			block_rwlock_oswait_count += rw_lock->count_os_wait;
16345 
16346 			continue;
16347 		}
16348 
16349 		buf1len = snprintf(
16350 			buf1, sizeof buf1, "rwlock: %s:%u",
16351 			innobase_basename(rw_lock->cfile_name),
16352 			rw_lock->cline);
16353 
16354 		int		buf2len;
16355 		char		buf2[IO_SIZE];
16356 
16357 		buf2len = snprintf(
16358 			buf2, sizeof buf2, "waits=%u",
16359 			rw_lock->count_os_wait);
16360 
16361 		if (stat_print(thd, innobase_hton_name,
16362 			       hton_name_len,
16363 			       buf1, static_cast<uint>(buf1len),
16364 			       buf2, static_cast<uint>(buf2len))) {
16365 
16366 			mutex_exit(&rw_lock_list_mutex);
16367 
16368 			DBUG_RETURN(1);
16369 		}
16370 	}
16371 
16372 	if (block_rwlock != NULL) {
16373 
16374 		int		buf1len;
16375 		char		buf1[IO_SIZE];
16376 
16377 		buf1len = snprintf(
16378 			buf1, sizeof buf1, "sum rwlock: %s:%u",
16379 			innobase_basename(block_rwlock->cfile_name),
16380 			block_rwlock->cline);
16381 
16382 		int		buf2len;
16383 		char		buf2[IO_SIZE];
16384 
16385 		buf2len = snprintf(
16386 			buf2, sizeof buf2, "waits=" ULINTPF,
16387 			block_rwlock_oswait_count);
16388 
16389 		if (stat_print(thd, innobase_hton_name,
16390 			       hton_name_len,
16391 			       buf1, static_cast<uint>(buf1len),
16392 			       buf2, static_cast<uint>(buf2len))) {
16393 
16394 			mutex_exit(&rw_lock_list_mutex);
16395 
16396 			DBUG_RETURN(1);
16397 		}
16398 	}
16399 
16400 	mutex_exit(&rw_lock_list_mutex);
16401 
16402 	DBUG_RETURN(0);
16403 }
16404 
16405 /** Implements the SHOW MUTEX STATUS command.
16406 @param[in,out]	hton		the innodb handlerton
16407 @param[in,out]	thd		the MySQL query thread of the caller
16408 @param[in,out]	stat_print	function for printing statistics
16409 @return 0 on success. */
16410 static
16411 int
innodb_show_latch_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)16412 innodb_show_latch_status(
16413 	handlerton*	hton,
16414 	THD*		thd,
16415 	stat_print_fn*	stat_print)
16416 {
16417 	int	ret = innodb_show_mutex_status(hton, thd, stat_print);
16418 
16419 	if (ret != 0) {
16420 		return(ret);
16421 	}
16422 
16423 	return(innodb_show_rwlock_status(hton, thd, stat_print));
16424 }
16425 
16426 /************************************************************************//**
16427 Return 0 on success and non-zero on failure. Note: the bool return type
16428 seems to be abused here, should be an int. */
16429 static
16430 bool
innobase_show_status(handlerton * hton,THD * thd,stat_print_fn * stat_print,enum ha_stat_type stat_type)16431 innobase_show_status(
16432 /*=================*/
16433 	handlerton*		hton,	/*!< in: the innodb handlerton */
16434 	THD*			thd,	/*!< in: the MySQL query thread
16435 					of the caller */
16436 	stat_print_fn*		stat_print,
16437 	enum ha_stat_type	stat_type)
16438 {
16439 	DBUG_ASSERT(hton == innodb_hton_ptr);
16440 
16441 	switch (stat_type) {
16442 	case HA_ENGINE_STATUS:
16443 		/* Non-zero return value means there was an error. */
16444 		return(innodb_show_status(hton, thd, stat_print) != 0);
16445 
16446 	case HA_ENGINE_MUTEX:
16447 		return(innodb_show_latch_status(hton, thd, stat_print) != 0);
16448 
16449 	case HA_ENGINE_LOGS:
16450 		/* Not handled */
16451 		break;
16452 	}
16453 
16454 	/* Success */
16455 	return(false);
16456 }
16457 /*********************************************************************//**
16458 Returns number of THR_LOCK locks used for one instance of InnoDB table.
16459 InnoDB no longer relies on THR_LOCK locks so 0 value is returned.
16460 Instead of THR_LOCK locks InnoDB relies on combination of metadata locks
16461 (e.g. for LOCK TABLES and DDL) and its own locking subsystem.
16462 Note that even though this method returns 0, SQL-layer still calls
16463 ::store_lock(), ::start_stmt() and ::external_lock() methods for InnoDB
16464 tables. */
16465 
16466 uint
lock_count(void) const16467 ha_innobase::lock_count(void) const
16468 /*===============================*/
16469 {
16470 	return 0;
16471 }
16472 
16473 /*****************************************************************//**
16474 Supposed to convert a MySQL table lock stored in the 'lock' field of the
16475 handle to a proper type before storing pointer to the lock into an array
16476 of pointers.
16477 In practice, since InnoDB no longer relies on THR_LOCK locks and its
16478 lock_count() method returns 0 it just informs storage engine about type
16479 of THR_LOCK which SQL-layer would have acquired for this specific statement
16480 on this specific table.
16481 MySQL also calls this if it wants to reset some table locks to a not-locked
16482 state during the processing of an SQL query. An example is that during a
16483 SELECT the read lock is released early on the 'const' tables where we only
16484 fetch one row. MySQL does not call this when it releases all locks at the
16485 end of an SQL statement.
16486 @return pointer to the current element in the 'to' array. */
16487 
16488 THR_LOCK_DATA**
store_lock(THD * thd,THR_LOCK_DATA ** to,thr_lock_type lock_type)16489 ha_innobase::store_lock(
16490 /*====================*/
16491 	THD*			thd,		/*!< in: user thread handle */
16492 	THR_LOCK_DATA**		to,		/*!< in: pointer to the current
16493 						element in an array of pointers
16494 						to lock structs;
16495 						only used as return value */
16496 	thr_lock_type		lock_type)	/*!< in: lock type to store in
16497 						'lock'; this may also be
16498 						TL_IGNORE */
16499 {
16500 	/* Note that trx in this function is NOT necessarily m_prebuilt->trx
16501 	because we call update_thd() later, in ::external_lock()! Failure to
16502 	understand this caused a serious memory corruption bug in 5.1.11. */
16503 
16504 	trx_t*	trx = check_trx_exists(thd);
16505 
16506 	/* NOTE: MySQL can call this function with lock 'type' TL_IGNORE!
16507 	Be careful to ignore TL_IGNORE if we are going to do something with
16508 	only 'real' locks! */
16509 
16510 	/* If no MySQL table is in use, we need to set the isolation level
16511 	of the transaction. */
16512 
16513 	if (lock_type != TL_IGNORE
16514 	    && trx->n_mysql_tables_in_use == 0) {
16515 		trx->isolation_level = innobase_map_isolation_level(
16516 			(enum_tx_isolation) thd_tx_isolation(thd));
16517 
16518 		if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
16519 
16520 			/* At low transaction isolation levels we let
16521 			each consistent read set its own snapshot */
16522 			trx->read_view.close();
16523 		}
16524 	}
16525 
16526 	DBUG_ASSERT(EQ_CURRENT_THD(thd));
16527 	const bool in_lock_tables = thd_in_lock_tables(thd);
16528 	const int sql_command = thd_sql_command(thd);
16529 
16530 	if (srv_read_only_mode
16531 	    && (sql_command == SQLCOM_UPDATE
16532 		|| sql_command == SQLCOM_INSERT
16533 		|| sql_command == SQLCOM_REPLACE
16534 		|| sql_command == SQLCOM_DROP_TABLE
16535 		|| sql_command == SQLCOM_ALTER_TABLE
16536 		|| sql_command == SQLCOM_OPTIMIZE
16537 		|| (sql_command == SQLCOM_CREATE_TABLE
16538 		    && (lock_type >= TL_WRITE_CONCURRENT_INSERT
16539 			 && lock_type <= TL_WRITE))
16540 		|| sql_command == SQLCOM_CREATE_INDEX
16541 		|| sql_command == SQLCOM_DROP_INDEX
16542 		|| sql_command == SQLCOM_CREATE_SEQUENCE
16543 		|| sql_command == SQLCOM_DROP_SEQUENCE
16544 		|| sql_command == SQLCOM_DELETE)) {
16545 
16546 		ib_senderrf(trx->mysql_thd,
16547 			    IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
16548 
16549 	} else if (sql_command == SQLCOM_FLUSH
16550 		   && lock_type == TL_READ_NO_INSERT) {
16551 
16552 		/* Check for FLUSH TABLES ... WITH READ LOCK */
16553 
16554 		/* Note: This call can fail, but there is no way to return
16555 		the error to the caller. We simply ignore it for now here
16556 		and push the error code to the caller where the error is
16557 		detected in the function. */
16558 
16559 		dberr_t	err = row_quiesce_set_state(
16560 			m_prebuilt->table, QUIESCE_START, trx);
16561 
16562 		ut_a(err == DB_SUCCESS || err == DB_UNSUPPORTED);
16563 
16564 		if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
16565 			m_prebuilt->select_lock_type = LOCK_S;
16566 			m_prebuilt->stored_select_lock_type = LOCK_S;
16567 		} else {
16568 			m_prebuilt->select_lock_type = LOCK_NONE;
16569 			m_prebuilt->stored_select_lock_type = LOCK_NONE;
16570 		}
16571 
16572 	/* Check for DROP TABLE */
16573 	} else if (sql_command == SQLCOM_DROP_TABLE ||
16574                    sql_command == SQLCOM_DROP_SEQUENCE) {
16575 
16576 		/* MySQL calls this function in DROP TABLE though this table
16577 		handle may belong to another thd that is running a query. Let
16578 		us in that case skip any changes to the m_prebuilt struct. */
16579 
16580 	/* Check for LOCK TABLE t1,...,tn WITH SHARED LOCKS */
16581 	} else if ((lock_type == TL_READ && in_lock_tables)
16582 		   || (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables)
16583 		   || lock_type == TL_READ_WITH_SHARED_LOCKS
16584 		   || lock_type == TL_READ_NO_INSERT
16585 		   || (lock_type != TL_IGNORE
16586 		       && sql_command != SQLCOM_SELECT)) {
16587 
16588 		/* The OR cases above are in this order:
16589 		1) MySQL is doing LOCK TABLES ... READ LOCAL, or we
16590 		are processing a stored procedure or function, or
16591 		2) (we do not know when TL_READ_HIGH_PRIORITY is used), or
16592 		3) this is a SELECT ... IN SHARE MODE, or
16593 		4) we are doing a complex SQL statement like
16594 		INSERT INTO ... SELECT ... and the logical logging (MySQL
16595 		binlog) requires the use of a locking read, or
16596 		MySQL is doing LOCK TABLES ... READ.
16597 		5) we let InnoDB do locking reads for all SQL statements that
16598 		are not simple SELECTs; note that select_lock_type in this
16599 		case may get strengthened in ::external_lock() to LOCK_X.
16600 		Note that we MUST use a locking read in all data modifying
16601 		SQL statements, because otherwise the execution would not be
16602 		serializable, and also the results from the update could be
16603 		unexpected if an obsolete consistent read view would be
16604 		used. */
16605 
16606 		/* Use consistent read for checksum table */
16607 
16608 		if (sql_command == SQLCOM_CHECKSUM
16609 		    || sql_command == SQLCOM_CREATE_SEQUENCE
16610 		    || (sql_command == SQLCOM_ANALYZE && lock_type == TL_READ)
16611 		    || ((srv_locks_unsafe_for_binlog
16612 			|| trx->isolation_level <= TRX_ISO_READ_COMMITTED)
16613 			&& trx->isolation_level != TRX_ISO_SERIALIZABLE
16614 			&& (lock_type == TL_READ
16615 			    || lock_type == TL_READ_NO_INSERT)
16616 			&& (sql_command == SQLCOM_INSERT_SELECT
16617 			    || sql_command == SQLCOM_REPLACE_SELECT
16618 			    || sql_command == SQLCOM_UPDATE
16619 			    || sql_command == SQLCOM_CREATE_SEQUENCE
16620 			    || sql_command == SQLCOM_CREATE_TABLE))) {
16621 
16622 			/* If we either have innobase_locks_unsafe_for_binlog
16623 			option set or this session is using READ COMMITTED
16624 			isolation level and isolation level of the transaction
16625 			is not set to serializable and MySQL is doing
16626 			INSERT INTO...SELECT or REPLACE INTO...SELECT
16627 			or UPDATE ... = (SELECT ...) or CREATE  ...
16628 			SELECT... without FOR UPDATE or IN SHARE
16629 			MODE in select, then we use consistent read
16630 			for select. */
16631 
16632 			m_prebuilt->select_lock_type = LOCK_NONE;
16633 			m_prebuilt->stored_select_lock_type = LOCK_NONE;
16634 		} else {
16635 			m_prebuilt->select_lock_type = LOCK_S;
16636 			m_prebuilt->stored_select_lock_type = LOCK_S;
16637 		}
16638 
16639 	} else if (lock_type != TL_IGNORE) {
16640 
16641 		/* We set possible LOCK_X value in external_lock, not yet
16642 		here even if this would be SELECT ... FOR UPDATE */
16643 
16644 		m_prebuilt->select_lock_type = LOCK_NONE;
16645 		m_prebuilt->stored_select_lock_type = LOCK_NONE;
16646 	}
16647 
16648 	if (!trx_is_started(trx)
16649 	    && (m_prebuilt->select_lock_type != LOCK_NONE
16650 	        || m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
16651 
16652 		trx->will_lock = true;
16653 	}
16654 
16655 	return(to);
16656 }
16657 
16658 /*********************************************************************//**
16659 Read the next autoinc value. Acquire the relevant locks before reading
16660 the AUTOINC value. If SUCCESS then the table AUTOINC mutex will be locked
16661 on return and all relevant locks acquired.
16662 @return DB_SUCCESS or error code */
16663 
16664 dberr_t
innobase_get_autoinc(ulonglong * value)16665 ha_innobase::innobase_get_autoinc(
16666 /*==============================*/
16667 	ulonglong*	value)		/*!< out: autoinc value */
16668 {
16669 	*value = 0;
16670 
16671 	m_prebuilt->autoinc_error = innobase_lock_autoinc();
16672 
16673 	if (m_prebuilt->autoinc_error == DB_SUCCESS) {
16674 
16675 		/* Determine the first value of the interval */
16676 		*value = dict_table_autoinc_read(m_prebuilt->table);
16677 
16678 		/* It should have been initialized during open. */
16679 		if (*value == 0) {
16680 			m_prebuilt->autoinc_error = DB_UNSUPPORTED;
16681 			dict_table_autoinc_unlock(m_prebuilt->table);
16682 		}
16683 	}
16684 
16685 	return(m_prebuilt->autoinc_error);
16686 }
16687 
16688 /*******************************************************************//**
16689 This function reads the global auto-inc counter. It doesn't use the
16690 AUTOINC lock even if the lock mode is set to TRADITIONAL.
16691 @return the autoinc value */
16692 
16693 ulonglong
innobase_peek_autoinc(void)16694 ha_innobase::innobase_peek_autoinc(void)
16695 /*====================================*/
16696 {
16697 	ulonglong	auto_inc;
16698 	dict_table_t*	innodb_table;
16699 
16700 	ut_a(m_prebuilt != NULL);
16701 	ut_a(m_prebuilt->table != NULL);
16702 
16703 	innodb_table = m_prebuilt->table;
16704 
16705 	dict_table_autoinc_lock(innodb_table);
16706 
16707 	auto_inc = dict_table_autoinc_read(innodb_table);
16708 
16709 	if (auto_inc == 0) {
16710 		ib::info() << "AUTOINC next value generation is disabled for"
16711 			" '" << innodb_table->name << "'";
16712 	}
16713 
16714 	dict_table_autoinc_unlock(innodb_table);
16715 
16716 	return(auto_inc);
16717 }
16718 
16719 /*********************************************************************//**
16720 Returns the value of the auto-inc counter in *first_value and ~0 on failure. */
16721 
16722 void
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)16723 ha_innobase::get_auto_increment(
16724 /*============================*/
16725 	ulonglong	offset,			/*!< in: table autoinc offset */
16726 	ulonglong	increment,		/*!< in: table autoinc
16727 						increment */
16728 	ulonglong	nb_desired_values,	/*!< in: number of values
16729 						reqd */
16730 	ulonglong*	first_value,		/*!< out: the autoinc value */
16731 	ulonglong*	nb_reserved_values)	/*!< out: count of reserved
16732 						values */
16733 {
16734 	trx_t*		trx;
16735 	dberr_t		error;
16736 	ulonglong	autoinc = 0;
16737 
16738 	/* Prepare m_prebuilt->trx in the table handle */
16739 	update_thd(ha_thd());
16740 
16741 	error = innobase_get_autoinc(&autoinc);
16742 
16743 	if (error != DB_SUCCESS) {
16744 		*first_value = (~(ulonglong) 0);
16745 		return;
16746 	}
16747 
16748 	/* This is a hack, since nb_desired_values seems to be accurate only
16749 	for the first call to get_auto_increment() for multi-row INSERT and
16750 	meaningless for other statements e.g, LOAD etc. Subsequent calls to
16751 	this method for the same statement results in different values which
16752 	don't make sense. Therefore we store the value the first time we are
16753 	called and count down from that as rows are written (see write_row()).
16754 	*/
16755 
16756 	trx = m_prebuilt->trx;
16757 
16758 	/* Note: We can't rely on *first_value since some MySQL engines,
16759 	in particular the partition engine, don't initialize it to 0 when
16760 	invoking this method. So we are not sure if it's guaranteed to
16761 	be 0 or not. */
16762 
16763 	/* We need the upper limit of the col type to check for
16764 	whether we update the table autoinc counter or not. */
16765 	ulonglong col_max_value =
16766 			table->next_number_field->get_max_int_value();
16767 
16768 	/** The following logic is needed to avoid duplicate key error
16769 	for autoincrement column.
16770 
16771 	(1) InnoDB gives the current autoincrement value with respect
16772 	to increment and offset value.
16773 
16774 	(2) Basically it does compute_next_insert_id() logic inside InnoDB
16775 	to avoid the current auto increment value changed by handler layer.
16776 
16777 	(3) It is restricted only for insert operations. */
16778 
16779 	if (increment > 1 && thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE
16780 	    && autoinc < col_max_value) {
16781 
16782 		ulonglong prev_auto_inc = autoinc;
16783 
16784 		autoinc = ((autoinc - 1) + increment - offset)/ increment;
16785 
16786 		autoinc = autoinc * increment + offset;
16787 
16788 		/* If autoinc exceeds the col_max_value then reset
16789 		to old autoinc value. Because in case of non-strict
16790 		sql mode, boundary value is not considered as error. */
16791 
16792 		if (autoinc >= col_max_value) {
16793 			autoinc = prev_auto_inc;
16794 		}
16795 
16796 		ut_ad(autoinc > 0);
16797 	}
16798 
16799 	/* Called for the first time ? */
16800 	if (trx->n_autoinc_rows == 0) {
16801 
16802 		trx->n_autoinc_rows = (ulint) nb_desired_values;
16803 
16804 		/* It's possible for nb_desired_values to be 0:
16805 		e.g., INSERT INTO T1(C) SELECT C FROM T2; */
16806 		if (nb_desired_values == 0) {
16807 
16808 			trx->n_autoinc_rows = 1;
16809 		}
16810 
16811 		set_if_bigger(*first_value, autoinc);
16812 	/* Not in the middle of a mult-row INSERT. */
16813 	} else if (m_prebuilt->autoinc_last_value == 0) {
16814 		set_if_bigger(*first_value, autoinc);
16815 	}
16816 
16817 	if (*first_value > col_max_value) {
16818 		/* Out of range number. Let handler::update_auto_increment()
16819 		take care of this */
16820 		m_prebuilt->autoinc_last_value = 0;
16821 		dict_table_autoinc_unlock(m_prebuilt->table);
16822 		*nb_reserved_values= 0;
16823 		return;
16824 	}
16825 
16826 	*nb_reserved_values = trx->n_autoinc_rows;
16827 
16828 	/* With old style AUTOINC locking we only update the table's
16829 	AUTOINC counter after attempting to insert the row. */
16830 	if (innobase_autoinc_lock_mode != AUTOINC_OLD_STYLE_LOCKING) {
16831 		ulonglong	current;
16832 		ulonglong	next_value;
16833 
16834 		current = *first_value;
16835 
16836 		/* Compute the last value in the interval */
16837 		next_value = innobase_next_autoinc(
16838 			current, *nb_reserved_values, increment, offset,
16839 			col_max_value);
16840 
16841 		m_prebuilt->autoinc_last_value = next_value;
16842 
16843 		if (m_prebuilt->autoinc_last_value < *first_value) {
16844 			*first_value = (~(ulonglong) 0);
16845 		} else {
16846 			/* Update the table autoinc variable */
16847 			dict_table_autoinc_update_if_greater(
16848 				m_prebuilt->table,
16849 				m_prebuilt->autoinc_last_value);
16850 		}
16851 	} else {
16852 		/* This will force write_row() into attempting an update
16853 		of the table's AUTOINC counter. */
16854 		m_prebuilt->autoinc_last_value = 0;
16855 	}
16856 
16857 	/* The increment to be used to increase the AUTOINC value, we use
16858 	this in write_row() and update_row() to increase the autoinc counter
16859 	for columns that are filled by the user. We need the offset and
16860 	the increment. */
16861 	m_prebuilt->autoinc_offset = offset;
16862 	m_prebuilt->autoinc_increment = increment;
16863 
16864 	dict_table_autoinc_unlock(m_prebuilt->table);
16865 }
16866 
16867 /*******************************************************************//**
16868 See comment in handler.cc */
16869 
16870 bool
get_error_message(int error,String * buf)16871 ha_innobase::get_error_message(
16872 /*===========================*/
16873 	int	error,
16874 	String*	buf)
16875 {
16876 	trx_t*	trx = check_trx_exists(ha_thd());
16877 
16878 	if (error == HA_ERR_DECRYPTION_FAILED) {
16879 		const char *msg = "Table encrypted but decryption failed. This could be because correct encryption management plugin is not loaded, used encryption key is not available or encryption method does not match.";
16880 		buf->copy(msg, (uint)strlen(msg), system_charset_info);
16881 	} else {
16882 		buf->copy(trx->detailed_error, (uint) strlen(trx->detailed_error),
16883 			system_charset_info);
16884 	}
16885 
16886 	return(FALSE);
16887 }
16888 
16889 /** Retrieves the names of the table and the key for which there was a
16890 duplicate entry in the case of HA_ERR_FOREIGN_DUPLICATE_KEY.
16891 
16892 If any of the names is not available, then this method will return
16893 false and will not change any of child_table_name or child_key_name.
16894 
16895 @param[out] child_table_name Table name
16896 @param[in] child_table_name_len Table name buffer size
16897 @param[out] child_key_name Key name
16898 @param[in] child_key_name_len Key name buffer size
16899 
16900 @retval true table and key names were available and were written into the
16901 corresponding out parameters.
16902 @retval false table and key names were not available, the out parameters
16903 were not touched. */
16904 bool
get_foreign_dup_key(char * child_table_name,uint child_table_name_len,char * child_key_name,uint child_key_name_len)16905 ha_innobase::get_foreign_dup_key(
16906 /*=============================*/
16907 	char*	child_table_name,
16908 	uint	child_table_name_len,
16909 	char*	child_key_name,
16910 	uint	child_key_name_len)
16911 {
16912 	const dict_index_t*	err_index;
16913 
16914 	ut_a(m_prebuilt->trx != NULL);
16915 	ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
16916 
16917 	err_index = trx_get_error_info(m_prebuilt->trx);
16918 
16919 	if (err_index == NULL) {
16920 		return(false);
16921 	}
16922 	/* else */
16923 
16924 	/* copy table name (and convert from filename-safe encoding to
16925 	system_charset_info) */
16926 	char*	p = strchr(err_index->table->name.m_name, '/');
16927 
16928 	/* strip ".../" prefix if any */
16929 	if (p != NULL) {
16930 		p++;
16931 	} else {
16932 		p = err_index->table->name.m_name;
16933 	}
16934 
16935 	size_t	len;
16936 
16937 	len = filename_to_tablename(p, child_table_name, child_table_name_len);
16938 
16939 	child_table_name[len] = '\0';
16940 
16941 	/* copy index name */
16942 	snprintf(child_key_name, child_key_name_len, "%s",
16943 		    err_index->name());
16944 
16945 	return(true);
16946 }
16947 
16948 /*******************************************************************//**
16949 Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
16950 If there is no explicitly declared non-null unique key or a primary key, then
16951 InnoDB internally uses the row id as the primary key.
16952 @return < 0 if ref1 < ref2, 0 if equal, else > 0 */
16953 
16954 int
cmp_ref(const uchar * ref1,const uchar * ref2)16955 ha_innobase::cmp_ref(
16956 /*=================*/
16957 	const uchar*	ref1,	/*!< in: an (internal) primary key value in the
16958 				MySQL key value format */
16959 	const uchar*	ref2)	/*!< in: an (internal) primary key value in the
16960 				MySQL key value format */
16961 {
16962 	enum_field_types mysql_type;
16963 	Field*		field;
16964 	KEY_PART_INFO*	key_part;
16965 	KEY_PART_INFO*	key_part_end;
16966 	uint		len1;
16967 	uint		len2;
16968 	int		result;
16969 
16970 	if (m_prebuilt->clust_index_was_generated) {
16971 		/* The 'ref' is an InnoDB row id */
16972 
16973 		return(memcmp(ref1, ref2, DATA_ROW_ID_LEN));
16974 	}
16975 
16976 	/* Do a type-aware comparison of primary key fields. PK fields
16977 	are always NOT NULL, so no checks for NULL are performed. */
16978 
16979 	key_part = table->key_info[table->s->primary_key].key_part;
16980 
16981 	key_part_end = key_part
16982 		+ table->key_info[table->s->primary_key].user_defined_key_parts;
16983 
16984 	for (; key_part != key_part_end; ++key_part) {
16985 		field = key_part->field;
16986 		mysql_type = field->type();
16987 
16988 		if (mysql_type == MYSQL_TYPE_TINY_BLOB
16989 			|| mysql_type == MYSQL_TYPE_MEDIUM_BLOB
16990 			|| mysql_type == MYSQL_TYPE_BLOB
16991 			|| mysql_type == MYSQL_TYPE_LONG_BLOB) {
16992 
16993 			/* In the MySQL key value format, a column prefix of
16994 			a BLOB is preceded by a 2-byte length field */
16995 
16996 			len1 = innobase_read_from_2_little_endian(ref1);
16997 			len2 = innobase_read_from_2_little_endian(ref2);
16998 
16999 			result = ((Field_blob*) field)->cmp(
17000 				ref1 + 2, len1, ref2 + 2, len2);
17001 		} else {
17002 			result = field->key_cmp(ref1, ref2);
17003 		}
17004 
17005 		if (result) {
17006 
17007 			return(result);
17008 		}
17009 
17010 		ref1 += key_part->store_length;
17011 		ref2 += key_part->store_length;
17012 	}
17013 
17014 	return(0);
17015 }
17016 
17017 /*******************************************************************//**
17018 Ask InnoDB if a query to a table can be cached.
17019 @return TRUE if query caching of the table is permitted */
17020 
17021 my_bool
register_query_cache_table(THD * thd,const char * table_key,uint key_length,qc_engine_callback * call_back,ulonglong * engine_data)17022 ha_innobase::register_query_cache_table(
17023 /*====================================*/
17024 	THD*		thd,		/*!< in: user thread handle */
17025 	const char*	table_key,	/*!< in: normalized path to the
17026 					table */
17027 	uint		key_length,	/*!< in: length of the normalized
17028 					path to the table */
17029 	qc_engine_callback*
17030 			call_back,	/*!< out: pointer to function for
17031 					checking if query caching
17032 					is permitted */
17033 	ulonglong	*engine_data)	/*!< in/out: data to call_back */
17034 {
17035 	*engine_data = 0;
17036 	*call_back = innobase_query_caching_of_table_permitted;
17037 
17038 	return(innobase_query_caching_of_table_permitted(
17039 			thd, table_key,
17040 			static_cast<uint>(key_length),
17041 			engine_data));
17042 }
17043 
17044 /******************************************************************//**
17045 This function is used to find the storage length in bytes of the first n
17046 characters for prefix indexes using a multibyte character set. The function
17047 finds charset information and returns length of prefix_len characters in the
17048 index field in bytes.
17049 @return number of bytes occupied by the first n characters */
17050 ulint
innobase_get_at_most_n_mbchars(ulint charset_id,ulint prefix_len,ulint data_len,const char * str)17051 innobase_get_at_most_n_mbchars(
17052 /*===========================*/
17053 	ulint charset_id,	/*!< in: character set id */
17054 	ulint prefix_len,	/*!< in: prefix length in bytes of the index
17055 				(this has to be divided by mbmaxlen to get the
17056 				number of CHARACTERS n in the prefix) */
17057 	ulint data_len,		/*!< in: length of the string in bytes */
17058 	const char* str)	/*!< in: character string */
17059 {
17060 	ulint char_length;	/*!< character length in bytes */
17061 	ulint n_chars;		/*!< number of characters in prefix */
17062 	CHARSET_INFO* charset;	/*!< charset used in the field */
17063 
17064 	charset = get_charset((uint) charset_id, MYF(MY_WME));
17065 
17066 	ut_ad(charset);
17067 	ut_ad(charset->mbmaxlen);
17068 
17069 	/* Calculate how many characters at most the prefix index contains */
17070 
17071 	n_chars = prefix_len / charset->mbmaxlen;
17072 
17073 	/* If the charset is multi-byte, then we must find the length of the
17074 	first at most n chars in the string. If the string contains less
17075 	characters than n, then we return the length to the end of the last
17076 	character. */
17077 
17078 	if (charset->mbmaxlen > 1) {
17079 		/* my_charpos() returns the byte length of the first n_chars
17080 		characters, or a value bigger than the length of str, if
17081 		there were not enough full characters in str.
17082 
17083 		Why does the code below work:
17084 		Suppose that we are looking for n UTF-8 characters.
17085 
17086 		1) If the string is long enough, then the prefix contains at
17087 		least n complete UTF-8 characters + maybe some extra
17088 		characters + an incomplete UTF-8 character. No problem in
17089 		this case. The function returns the pointer to the
17090 		end of the nth character.
17091 
17092 		2) If the string is not long enough, then the string contains
17093 		the complete value of a column, that is, only complete UTF-8
17094 		characters, and we can store in the column prefix index the
17095 		whole string. */
17096 
17097 		char_length= my_charpos(charset, str, str + data_len, n_chars);
17098 		if (char_length > data_len) {
17099 			char_length = data_len;
17100 		}
17101 	} else if (data_len < prefix_len) {
17102 
17103 		char_length = data_len;
17104 
17105 	} else {
17106 
17107 		char_length = prefix_len;
17108 	}
17109 
17110 	return(char_length);
17111 }
17112 
17113 /*******************************************************************//**
17114 This function is used to prepare an X/Open XA distributed transaction.
17115 @return 0 or error number */
17116 static
17117 int
innobase_xa_prepare(handlerton * hton,THD * thd,bool prepare_trx)17118 innobase_xa_prepare(
17119 /*================*/
17120 	handlerton*	hton,		/*!< in: InnoDB handlerton */
17121 	THD*		thd,		/*!< in: handle to the MySQL thread of
17122 					the user whose XA transaction should
17123 					be prepared */
17124 	bool		prepare_trx)	/*!< in: true - prepare transaction
17125 					false - the current SQL statement
17126 					ended */
17127 {
17128 	trx_t*		trx = check_trx_exists(thd);
17129 
17130 	DBUG_ASSERT(hton == innodb_hton_ptr);
17131 
17132 	thd_get_xid(thd, (MYSQL_XID*) trx->xid);
17133 
17134 	innobase_srv_conc_force_exit_innodb(trx);
17135 
17136 	if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
17137 
17138 		sql_print_error("Transaction not registered for MariaDB 2PC,"
17139 				" but transaction is active");
17140 	}
17141 
17142 	if (prepare_trx
17143 	    || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
17144 
17145 		/* We were instructed to prepare the whole transaction, or
17146 		this is an SQL statement end and autocommit is on */
17147 
17148 		ut_ad(trx_is_registered_for_2pc(trx));
17149 
17150 		trx_prepare_for_mysql(trx);
17151 	} else {
17152 		/* We just mark the SQL statement ended and do not do a
17153 		transaction prepare */
17154 
17155 		/* If we had reserved the auto-inc lock for some
17156 		table in this SQL statement we release it now */
17157 
17158 		lock_unlock_table_autoinc(trx);
17159 
17160 		/* Store the current undo_no of the transaction so that we
17161 		know where to roll back if we have to roll back the next
17162 		SQL statement */
17163 
17164 		trx_mark_sql_stat_end(trx);
17165 	}
17166 
17167 	if (thd_sql_command(thd) != SQLCOM_XA_PREPARE
17168 	    && (prepare_trx
17169 		|| !thd_test_options(
17170 			thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
17171 
17172 		/* For mysqlbackup to work the order of transactions in binlog
17173 		and InnoDB must be the same. Consider the situation
17174 
17175 		  thread1> prepare; write to binlog; ...
17176 			  <context switch>
17177 		  thread2> prepare; write to binlog; commit
17178 		  thread1>			     ... commit
17179 
17180 		The server guarantees that writes to the binary log
17181 		and commits are in the same order, so we do not have
17182 		to handle this case. */
17183 	}
17184 
17185 	return(0);
17186 }
17187 
17188 /*******************************************************************//**
17189 This function is used to recover X/Open XA distributed transactions.
17190 @return number of prepared transactions stored in xid_list */
17191 static
17192 int
innobase_xa_recover(handlerton * hton,XID * xid_list,uint len)17193 innobase_xa_recover(
17194 /*================*/
17195 	handlerton*	hton,	/*!< in: InnoDB handlerton */
17196 	XID*		xid_list,/*!< in/out: prepared transactions */
17197 	uint		len)	/*!< in: number of slots in xid_list */
17198 {
17199 	DBUG_ASSERT(hton == innodb_hton_ptr);
17200 
17201 	if (len == 0 || xid_list == NULL) {
17202 
17203 		return(0);
17204 	}
17205 
17206 	return(trx_recover_for_mysql(xid_list, len));
17207 }
17208 
17209 /*******************************************************************//**
17210 This function is used to commit one X/Open XA distributed transaction
17211 which is in the prepared state
17212 @return 0 or error number */
17213 static
17214 int
innobase_commit_by_xid(handlerton * hton,XID * xid)17215 innobase_commit_by_xid(
17216 /*===================*/
17217 	handlerton*	hton,
17218 	XID*		xid)	/*!< in: X/Open XA transaction identification */
17219 {
17220 	DBUG_ASSERT(hton == innodb_hton_ptr);
17221 
17222 	if (high_level_read_only) {
17223 		return(XAER_RMFAIL);
17224 	}
17225 
17226 	if (trx_t* trx = trx_get_trx_by_xid(xid)) {
17227 		/* use cases are: disconnected xa, slave xa, recovery */
17228 		innobase_commit_low(trx);
17229 		ut_ad(trx->mysql_thd == NULL);
17230 		trx_deregister_from_2pc(trx);
17231 		ut_ad(!trx->will_lock);    /* trx cache requirement */
17232 		trx->free();
17233 
17234 		return(XA_OK);
17235 	} else {
17236 		return(XAER_NOTA);
17237 	}
17238 }
17239 
17240 /** This function is used to rollback one X/Open XA distributed transaction
17241 which is in the prepared state
17242 
17243 @param[in] hton InnoDB handlerton
17244 @param[in] xid X/Open XA transaction identification
17245 
17246 @return 0 or error number */
innobase_rollback_by_xid(handlerton * hton,XID * xid)17247 int innobase_rollback_by_xid(handlerton* hton, XID* xid)
17248 {
17249 	DBUG_ASSERT(hton == innodb_hton_ptr);
17250 
17251 	if (high_level_read_only) {
17252 		return(XAER_RMFAIL);
17253 	}
17254 
17255 	if (trx_t* trx = trx_get_trx_by_xid(xid)) {
17256 #ifdef WITH_WSREP
17257 		/* If a wsrep transaction is being rolled back during
17258 		the recovery, we must clear the xid in order to avoid
17259 		writing serialisation history for rolled back transaction. */
17260 		if (wsrep_is_wsrep_xid(trx->xid)) {
17261 			trx->xid->null();
17262 		}
17263 #endif /* WITH_WSREP */
17264 		int ret = innobase_rollback_trx(trx);
17265 		trx_deregister_from_2pc(trx);
17266 		ut_ad(!trx->will_lock);
17267 		trx->free();
17268 
17269 		return(ret);
17270 	} else {
17271 		return(XAER_NOTA);
17272 	}
17273 }
17274 
17275 bool
check_if_incompatible_data(HA_CREATE_INFO * info,uint table_changes)17276 ha_innobase::check_if_incompatible_data(
17277 /*====================================*/
17278 	HA_CREATE_INFO*	info,
17279 	uint		table_changes)
17280 {
17281 	ha_table_option_struct *param_old, *param_new;
17282 
17283 	/* Cache engine specific options */
17284 	param_new = info->option_struct;
17285 	param_old = table->s->option_struct;
17286 
17287 	innobase_copy_frm_flags_from_create_info(m_prebuilt->table, info);
17288 
17289 	if (table_changes != IS_EQUAL_YES) {
17290 
17291 		return(COMPATIBLE_DATA_NO);
17292 	}
17293 
17294 	/* Check that auto_increment value was not changed */
17295 	if ((info->used_fields & HA_CREATE_USED_AUTO)
17296 	    && info->auto_increment_value != 0) {
17297 
17298 		return(COMPATIBLE_DATA_NO);
17299 	}
17300 
17301 	/* Check that row format didn't change */
17302 	if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT)
17303 	    && info->row_type != get_row_type()) {
17304 
17305 		return(COMPATIBLE_DATA_NO);
17306 	}
17307 
17308 	/* Specifying KEY_BLOCK_SIZE requests a rebuild of the table. */
17309 	if (info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE) {
17310 		return(COMPATIBLE_DATA_NO);
17311 	}
17312 
17313 	/* Changes on engine specific table options requests a rebuild of the table. */
17314 	if (param_new->page_compressed != param_old->page_compressed ||
17315 	    param_new->page_compression_level != param_old->page_compression_level)
17316         {
17317 		return(COMPATIBLE_DATA_NO);
17318 	}
17319 
17320 	return(COMPATIBLE_DATA_YES);
17321 }
17322 
17323 /****************************************************************//**
17324 Update the system variable innodb_io_capacity_max using the "saved"
17325 value. This function is registered as a callback with MySQL. */
17326 static
17327 void
innodb_io_capacity_max_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17328 innodb_io_capacity_max_update(
17329 /*===========================*/
17330 	THD*				thd,	/*!< in: thread handle */
17331 	st_mysql_sys_var*, void*,
17332 	const void*			save)	/*!< in: immediate result
17333 						from check function */
17334 {
17335 	ulong	in_val = *static_cast<const ulong*>(save);
17336 
17337 	if (in_val < srv_io_capacity) {
17338 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17339 				    ER_WRONG_ARGUMENTS,
17340 				    "Setting innodb_io_capacity_max %lu"
17341 			" lower than innodb_io_capacity %lu.",
17342 			in_val, srv_io_capacity);
17343 
17344 		srv_io_capacity = in_val;
17345 
17346 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17347 					    ER_WRONG_ARGUMENTS,
17348 				    "Setting innodb_io_capacity to %lu",
17349 				    srv_io_capacity);
17350 	}
17351 
17352 	srv_max_io_capacity = in_val;
17353 }
17354 
17355 /****************************************************************//**
17356 Update the system variable innodb_io_capacity using the "saved"
17357 value. This function is registered as a callback with MySQL. */
17358 static
17359 void
innodb_io_capacity_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17360 innodb_io_capacity_update(
17361 /*======================*/
17362 	THD*				thd,	/*!< in: thread handle */
17363 	st_mysql_sys_var*, void*,
17364 	const void*			save)	/*!< in: immediate result
17365 						from check function */
17366 {
17367 	ulong	in_val = *static_cast<const ulong*>(save);
17368 
17369 	if (in_val > srv_max_io_capacity) {
17370 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17371 				    ER_WRONG_ARGUMENTS,
17372 				    "Setting innodb_io_capacity to %lu"
17373 				    " higher than innodb_io_capacity_max %lu",
17374 				    in_val, srv_max_io_capacity);
17375 
17376 		srv_max_io_capacity = (in_val & ~(~0UL >> 1))
17377 			? in_val : in_val * 2;
17378 
17379 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17380 				    ER_WRONG_ARGUMENTS,
17381 				    "Setting innodb_max_io_capacity to %lu",
17382 				    srv_max_io_capacity);
17383 	}
17384 
17385 	srv_io_capacity = in_val;
17386 }
17387 
17388 /****************************************************************//**
17389 Update the system variable innodb_max_dirty_pages_pct using the "saved"
17390 value. This function is registered as a callback with MySQL. */
17391 static
17392 void
innodb_max_dirty_pages_pct_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17393 innodb_max_dirty_pages_pct_update(
17394 /*==============================*/
17395 	THD*				thd,	/*!< in: thread handle */
17396 	st_mysql_sys_var*, void*,
17397 	const void*			save)	/*!< in: immediate result
17398 						from check function */
17399 {
17400 	double	in_val = *static_cast<const double*>(save);
17401 	if (in_val < srv_max_dirty_pages_pct_lwm) {
17402 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17403 				    ER_WRONG_ARGUMENTS,
17404 				    "innodb_max_dirty_pages_pct cannot be"
17405 				    " set lower than"
17406 				    " innodb_max_dirty_pages_pct_lwm.");
17407 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17408 				    ER_WRONG_ARGUMENTS,
17409 				    "Lowering"
17410 				    " innodb_max_dirty_page_pct_lwm to %lf",
17411 				    in_val);
17412 
17413 		srv_max_dirty_pages_pct_lwm = in_val;
17414 	}
17415 
17416 	srv_max_buf_pool_modified_pct = in_val;
17417 }
17418 
17419 /****************************************************************//**
17420 Update the system variable innodb_max_dirty_pages_pct_lwm using the
17421 "saved" value. This function is registered as a callback with MySQL. */
17422 static
17423 void
innodb_max_dirty_pages_pct_lwm_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17424 innodb_max_dirty_pages_pct_lwm_update(
17425 /*==================================*/
17426 	THD*				thd,	/*!< in: thread handle */
17427 	st_mysql_sys_var*, void*,
17428 	const void*			save)	/*!< in: immediate result
17429 						from check function */
17430 {
17431 	double	in_val = *static_cast<const double*>(save);
17432 	if (in_val > srv_max_buf_pool_modified_pct) {
17433 		in_val = srv_max_buf_pool_modified_pct;
17434 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17435 				    ER_WRONG_ARGUMENTS,
17436 				    "innodb_max_dirty_pages_pct_lwm"
17437 				    " cannot be set higher than"
17438 				    " innodb_max_dirty_pages_pct.");
17439 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17440 				    ER_WRONG_ARGUMENTS,
17441 				    "Setting innodb_max_dirty_page_pct_lwm"
17442 				    " to %lf",
17443 				    in_val);
17444 	}
17445 
17446 	srv_max_dirty_pages_pct_lwm = in_val;
17447 }
17448 
17449 /*************************************************************//**
17450 Don't allow to set innodb_fast_shutdown=0 if purge threads are
17451 already down.
17452 @return 0 if innodb_fast_shutdown can be set */
17453 static
17454 int
fast_shutdown_validate(THD * thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)17455 fast_shutdown_validate(
17456 /*=============================*/
17457 	THD*				thd,	/*!< in: thread handle */
17458 	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
17459 						variable */
17460 	void*				save,	/*!< out: immediate result
17461 						for update function */
17462 	struct st_mysql_value*		value)	/*!< in: incoming string */
17463 {
17464 	if (check_sysvar_int(thd, var, save, value)) {
17465 		return(1);
17466 	}
17467 
17468 	uint new_val = *reinterpret_cast<uint*>(save);
17469 
17470 	if (srv_fast_shutdown && !new_val
17471 	    && !my_atomic_loadptr_explicit(reinterpret_cast<void**>
17472 					   (&srv_running),
17473 					   MY_MEMORY_ORDER_RELAXED)) {
17474 		return(1);
17475 	}
17476 
17477 	return(0);
17478 }
17479 
17480 /*************************************************************//**
17481 Check whether valid argument given to innobase_*_stopword_table.
17482 This function is registered as a callback with MySQL.
17483 @return 0 for valid stopword table */
17484 static
17485 int
innodb_stopword_table_validate(THD * thd,st_mysql_sys_var *,void * save,struct st_mysql_value * value)17486 innodb_stopword_table_validate(
17487 /*===========================*/
17488 	THD*				thd,	/*!< in: thread handle */
17489 	st_mysql_sys_var*,
17490 	void*				save,	/*!< out: immediate result
17491 						for update function */
17492 	struct st_mysql_value*		value)	/*!< in: incoming string */
17493 {
17494 	const char*	stopword_table_name;
17495 	char		buff[STRING_BUFFER_USUAL_SIZE];
17496 	int		len = sizeof(buff);
17497 	trx_t*		trx;
17498 
17499 	ut_a(save != NULL);
17500 	ut_a(value != NULL);
17501 
17502 	stopword_table_name = value->val_str(value, buff, &len);
17503 
17504 	trx = check_trx_exists(thd);
17505 
17506 	row_mysql_lock_data_dictionary(trx);
17507 
17508 	/* Validate the stopword table's (if supplied) existence and
17509 	of the right format */
17510 	int ret = stopword_table_name && !fts_valid_stopword_table(
17511 		stopword_table_name);
17512 
17513 	row_mysql_unlock_data_dictionary(trx);
17514 
17515 	if (!ret) {
17516 		if (stopword_table_name == buff) {
17517 			ut_ad(static_cast<size_t>(len) < sizeof buff);
17518 			stopword_table_name = thd_strmake(thd,
17519 							  stopword_table_name,
17520 							  len);
17521 		}
17522 
17523 		*static_cast<const char**>(save) = stopword_table_name;
17524 	}
17525 
17526 	return(ret);
17527 }
17528 
17529 /** Update the system variable innodb_buffer_pool_size using the "saved"
17530 value. This function is registered as a callback with MySQL.
17531 @param[in]	save	immediate result from check function */
17532 static
17533 void
innodb_buffer_pool_size_update(THD *,st_mysql_sys_var *,void *,const void * save)17534 innodb_buffer_pool_size_update(THD*,st_mysql_sys_var*,void*, const void* save)
17535 {
17536         longlong	in_val = *static_cast<const longlong*>(save);
17537 
17538 	snprintf(export_vars.innodb_buffer_pool_resize_status,
17539 	        sizeof(export_vars.innodb_buffer_pool_resize_status),
17540 		"Requested to resize buffer pool.");
17541 
17542 	os_event_set(srv_buf_resize_event);
17543 
17544 	ib::info() << export_vars.innodb_buffer_pool_resize_status
17545 		<< " (new size: " << in_val << " bytes)";
17546 }
17547 
17548 /** The latest assigned innodb_ft_aux_table name */
17549 static char* innodb_ft_aux_table;
17550 
17551 /** Update innodb_ft_aux_table_id on SET GLOBAL innodb_ft_aux_table.
17552 @param[in,out]	thd	connection
17553 @param[out]	save	new value of innodb_ft_aux_table
17554 @param[in]	value	user-specified value */
innodb_ft_aux_table_validate(THD * thd,st_mysql_sys_var *,void * save,st_mysql_value * value)17555 static int innodb_ft_aux_table_validate(THD *thd, st_mysql_sys_var*,
17556 					void* save, st_mysql_value* value)
17557 {
17558 	char buf[STRING_BUFFER_USUAL_SIZE];
17559 	int len = sizeof buf;
17560 
17561 	if (const char* table_name = value->val_str(value, buf, &len)) {
17562 		if (dict_table_t* table = dict_table_open_on_name(
17563 			    table_name, FALSE, TRUE, DICT_ERR_IGNORE_NONE)) {
17564 			const table_id_t id = dict_table_has_fts_index(table)
17565 				? table->id : 0;
17566 			dict_table_close(table, FALSE, FALSE);
17567 			if (id) {
17568 				innodb_ft_aux_table_id = id;
17569 				if (table_name == buf) {
17570 					ut_ad(static_cast<size_t>(len)
17571 					      < sizeof buf);
17572 					table_name = thd_strmake(thd,
17573 								 table_name,
17574 								 len);
17575 				}
17576 
17577 
17578 				*static_cast<const char**>(save) = table_name;
17579 				return 0;
17580 			}
17581 		}
17582 
17583 		return 1;
17584 	} else {
17585 		*static_cast<char**>(save) = NULL;
17586 		innodb_ft_aux_table_id = 0;
17587 		return 0;
17588 	}
17589 }
17590 
17591 #ifdef BTR_CUR_HASH_ADAPT
17592 /****************************************************************//**
17593 Update the system variable innodb_adaptive_hash_index using the "saved"
17594 value. This function is registered as a callback with MySQL. */
17595 static
17596 void
innodb_adaptive_hash_index_update(THD *,st_mysql_sys_var *,void *,const void * save)17597 innodb_adaptive_hash_index_update(THD*, st_mysql_sys_var*, void*,
17598 				  const void* save)
17599 {
17600 	mysql_mutex_unlock(&LOCK_global_system_variables);
17601 	if (*(my_bool*) save) {
17602 		btr_search_enable();
17603 	} else {
17604 		btr_search_disable();
17605 	}
17606 	mysql_mutex_lock(&LOCK_global_system_variables);
17607 }
17608 #endif /* BTR_CUR_HASH_ADAPT */
17609 
17610 /****************************************************************//**
17611 Update the system variable innodb_cmp_per_index using the "saved"
17612 value. This function is registered as a callback with MySQL. */
17613 static
17614 void
innodb_cmp_per_index_update(THD *,st_mysql_sys_var *,void *,const void * save)17615 innodb_cmp_per_index_update(THD*, st_mysql_sys_var*, void*, const void* save)
17616 {
17617 	/* Reset the stats whenever we enable the table
17618 	INFORMATION_SCHEMA.innodb_cmp_per_index. */
17619 	if (!srv_cmp_per_index_enabled && *(my_bool*) save) {
17620 		mysql_mutex_unlock(&LOCK_global_system_variables);
17621 		page_zip_reset_stat_per_index();
17622 		mysql_mutex_lock(&LOCK_global_system_variables);
17623 	}
17624 
17625 	srv_cmp_per_index_enabled = !!(*(my_bool*) save);
17626 }
17627 
17628 /****************************************************************//**
17629 Update the system variable innodb_old_blocks_pct using the "saved"
17630 value. This function is registered as a callback with MySQL. */
17631 static
17632 void
innodb_old_blocks_pct_update(THD *,st_mysql_sys_var *,void *,const void * save)17633 innodb_old_blocks_pct_update(THD*, st_mysql_sys_var*, void*, const void* save)
17634 {
17635 	mysql_mutex_unlock(&LOCK_global_system_variables);
17636 	uint ratio = buf_LRU_old_ratio_update(*static_cast<const uint*>(save),
17637 					      true);
17638 	mysql_mutex_lock(&LOCK_global_system_variables);
17639 	innobase_old_blocks_pct = ratio;
17640 }
17641 
17642 /****************************************************************//**
17643 Update the system variable innodb_old_blocks_pct using the "saved"
17644 value. This function is registered as a callback with MySQL. */
17645 static
17646 void
innodb_change_buffer_max_size_update(THD *,st_mysql_sys_var *,void *,const void * save)17647 innodb_change_buffer_max_size_update(THD*, st_mysql_sys_var*, void*,
17648 				     const void* save)
17649 {
17650 	srv_change_buffer_max_size = *static_cast<const uint*>(save);
17651 	mysql_mutex_unlock(&LOCK_global_system_variables);
17652 	ibuf_max_size_update(srv_change_buffer_max_size);
17653 	mysql_mutex_lock(&LOCK_global_system_variables);
17654 }
17655 
17656 #ifdef UNIV_DEBUG
17657 static ulong srv_fil_make_page_dirty_debug = 0;
17658 static ulong srv_saved_page_number_debug = 0;
17659 
17660 /****************************************************************//**
17661 Save an InnoDB page number. */
17662 static
17663 void
innodb_save_page_no(THD *,st_mysql_sys_var *,void *,const void * save)17664 innodb_save_page_no(THD*, st_mysql_sys_var*, void*, const void* save)
17665 {
17666 	srv_saved_page_number_debug = *static_cast<const ulong*>(save);
17667 
17668 	ib::info() << "Saving InnoDB page number: "
17669 		<< srv_saved_page_number_debug;
17670 }
17671 
17672 /****************************************************************//**
17673 Make the first page of given user tablespace dirty. */
17674 static
17675 void
innodb_make_page_dirty(THD *,st_mysql_sys_var *,void *,const void * save)17676 innodb_make_page_dirty(THD*, st_mysql_sys_var*, void*, const void* save)
17677 {
17678 	mtr_t		mtr;
17679 	ulong		space_id = *static_cast<const ulong*>(save);
17680 	mysql_mutex_unlock(&LOCK_global_system_variables);
17681 	fil_space_t*	space = fil_space_acquire_silent(space_id);
17682 
17683 	if (space == NULL) {
17684 func_exit_no_space:
17685 		mysql_mutex_lock(&LOCK_global_system_variables);
17686 		return;
17687 	}
17688 
17689 	if (srv_saved_page_number_debug >= space->size) {
17690 func_exit:
17691 		space->release();
17692 		goto func_exit_no_space;
17693 	}
17694 
17695 	mtr.start();
17696 	mtr.set_named_space(space);
17697 
17698 	buf_block_t*	block = buf_page_get(
17699 		page_id_t(space_id, srv_saved_page_number_debug),
17700 		page_size_t(space->flags), RW_X_LATCH, &mtr);
17701 
17702 	if (block != NULL) {
17703 		byte*	page = block->frame;
17704 
17705 		ib::info() << "Dirtying page: " << page_id_t(
17706 			page_get_space_id(page), page_get_page_no(page));
17707 
17708 		mlog_write_ulint(page + FIL_PAGE_TYPE,
17709 				 fil_page_get_type(page),
17710 				 MLOG_2BYTES, &mtr);
17711 	}
17712 	mtr.commit();
17713 	goto func_exit;
17714 }
17715 #endif // UNIV_DEBUG
17716 /*************************************************************//**
17717 Just emit a warning that the usage of the variable is deprecated.
17718 @return 0 */
17719 static
17720 void
innodb_stats_sample_pages_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17721 innodb_stats_sample_pages_update(
17722 /*=============================*/
17723 	THD*				thd,	/*!< in: thread handle */
17724 	st_mysql_sys_var*, void*,
17725 	const void*			save)	/*!< in: immediate result
17726 						from check function */
17727 {
17728 
17729 	const char*	STATS_SAMPLE_PAGES_DEPRECATED_MSG =
17730 		"Using innodb_stats_sample_pages is deprecated and"
17731 		" the variable may be removed in future releases."
17732 		" Please use innodb_stats_transient_sample_pages instead.";
17733 
17734 	push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
17735 		     HA_ERR_WRONG_COMMAND, STATS_SAMPLE_PAGES_DEPRECATED_MSG);
17736 
17737 	ib::warn() << STATS_SAMPLE_PAGES_DEPRECATED_MSG;
17738 
17739 	srv_stats_transient_sample_pages =
17740 		*static_cast<const unsigned long long*>(save);
17741 }
17742 
17743 /****************************************************************//**
17744 Update the monitor counter according to the "set_option",  turn
17745 on/off or reset specified monitor counter. */
17746 static
17747 void
innodb_monitor_set_option(const monitor_info_t * monitor_info,mon_option_t set_option)17748 innodb_monitor_set_option(
17749 /*======================*/
17750 	const monitor_info_t* monitor_info,/*!< in: monitor info for the monitor
17751 					to set */
17752 	mon_option_t	set_option)	/*!< in: Turn on/off reset the
17753 					counter */
17754 {
17755 	monitor_id_t	monitor_id = monitor_info->monitor_id;
17756 
17757 	/* If module type is MONITOR_GROUP_MODULE, it cannot be
17758 	turned on/off individually. It should never use this
17759 	function to set options */
17760 	ut_a(!(monitor_info->monitor_type & MONITOR_GROUP_MODULE));
17761 
17762 	switch (set_option) {
17763 	case MONITOR_TURN_ON:
17764 		MONITOR_ON(monitor_id);
17765 		MONITOR_INIT(monitor_id);
17766 		MONITOR_SET_START(monitor_id);
17767 
17768 		/* If the monitor to be turned on uses
17769 		exisitng monitor counter (status variable),
17770 		make special processing to remember existing
17771 		counter value. */
17772 		if (monitor_info->monitor_type & MONITOR_EXISTING) {
17773 			srv_mon_process_existing_counter(
17774 				monitor_id, MONITOR_TURN_ON);
17775 		}
17776 
17777 		if (MONITOR_IS_ON(MONITOR_LATCHES)) {
17778 
17779 			mutex_monitor.enable();
17780 		}
17781 		break;
17782 
17783 	case MONITOR_TURN_OFF:
17784 		if (monitor_info->monitor_type & MONITOR_EXISTING) {
17785 			srv_mon_process_existing_counter(
17786 				monitor_id, MONITOR_TURN_OFF);
17787 		}
17788 
17789 		MONITOR_OFF(monitor_id);
17790 		MONITOR_SET_OFF(monitor_id);
17791 
17792 		if (!MONITOR_IS_ON(MONITOR_LATCHES)) {
17793 
17794 			mutex_monitor.disable();
17795 		}
17796 		break;
17797 
17798 	case MONITOR_RESET_VALUE:
17799 		srv_mon_reset(monitor_id);
17800 
17801 		if (monitor_id == (MONITOR_LATCHES)) {
17802 
17803 			mutex_monitor.reset();
17804 		}
17805 		break;
17806 
17807 	case MONITOR_RESET_ALL_VALUE:
17808 		srv_mon_reset_all(monitor_id);
17809 		mutex_monitor.reset();
17810 		break;
17811 
17812 	default:
17813 		ut_error;
17814 	}
17815 }
17816 
17817 /****************************************************************//**
17818 Find matching InnoDB monitor counters and update their status
17819 according to the "set_option",  turn on/off or reset specified
17820 monitor counter. */
17821 static
17822 void
innodb_monitor_update_wildcard(const char * name,mon_option_t set_option)17823 innodb_monitor_update_wildcard(
17824 /*===========================*/
17825 	const char*	name,		/*!< in: monitor name to match */
17826 	mon_option_t	set_option)	/*!< in: the set option, whether
17827 					to turn on/off or reset the counter */
17828 {
17829 	ut_a(name);
17830 
17831 	for (ulint use = 0; use < NUM_MONITOR; use++) {
17832 		ulint		type;
17833 		monitor_id_t	monitor_id = static_cast<monitor_id_t>(use);
17834 		monitor_info_t*	monitor_info;
17835 
17836 		if (!innobase_wildcasecmp(
17837 			srv_mon_get_name(monitor_id), name)) {
17838 			monitor_info = srv_mon_get_info(monitor_id);
17839 
17840 			type = monitor_info->monitor_type;
17841 
17842 			/* If the monitor counter is of MONITOR_MODULE
17843 			type, skip it. Except for those also marked with
17844 			MONITOR_GROUP_MODULE flag, which can be turned
17845 			on only as a module. */
17846 			if (!(type & MONITOR_MODULE)
17847 			     && !(type & MONITOR_GROUP_MODULE)) {
17848 				innodb_monitor_set_option(monitor_info,
17849 							  set_option);
17850 			}
17851 
17852 			/* Need to special handle counters marked with
17853 			MONITOR_GROUP_MODULE, turn on the whole module if
17854 			any one of it comes here. Currently, only
17855 			"module_buf_page" is marked with MONITOR_GROUP_MODULE */
17856 			if (type & MONITOR_GROUP_MODULE) {
17857 				if ((monitor_id >= MONITOR_MODULE_BUF_PAGE)
17858 				     && (monitor_id < MONITOR_MODULE_OS)) {
17859 					if (set_option == MONITOR_TURN_ON
17860 					    && MONITOR_IS_ON(
17861 						MONITOR_MODULE_BUF_PAGE)) {
17862 						continue;
17863 					}
17864 
17865 					srv_mon_set_module_control(
17866 						MONITOR_MODULE_BUF_PAGE,
17867 						set_option);
17868 				} else {
17869 					/* If new monitor is added with
17870 					MONITOR_GROUP_MODULE, it needs
17871 					to be added here. */
17872 					ut_ad(0);
17873 				}
17874 			}
17875 		}
17876 	}
17877 }
17878 
17879 /*************************************************************//**
17880 Given a configuration variable name, find corresponding monitor counter
17881 and return its monitor ID if found.
17882 @return monitor ID if found, MONITOR_NO_MATCH if there is no match */
17883 static
17884 ulint
innodb_monitor_id_by_name_get(const char * name)17885 innodb_monitor_id_by_name_get(
17886 /*==========================*/
17887 	const char*	name)	/*!< in: monitor counter namer */
17888 {
17889 	ut_a(name);
17890 
17891 	/* Search for wild character '%' in the name, if
17892 	found, we treat it as a wildcard match. We do not search for
17893 	single character wildcard '_' since our monitor names already contain
17894 	such character. To avoid confusion, we request user must include
17895 	at least one '%' character to activate the wildcard search. */
17896 	if (strchr(name, '%')) {
17897 		return(MONITOR_WILDCARD_MATCH);
17898 	}
17899 
17900 	/* Not wildcard match, check for an exact match */
17901 	for (ulint i = 0; i < NUM_MONITOR; i++) {
17902 		if (!innobase_strcasecmp(
17903 			name, srv_mon_get_name(static_cast<monitor_id_t>(i)))) {
17904 			return(i);
17905 		}
17906 	}
17907 
17908 	return(MONITOR_NO_MATCH);
17909 }
17910 /*************************************************************//**
17911 Validate that the passed in monitor name matches at least one
17912 monitor counter name with wildcard compare.
17913 @return TRUE if at least one monitor name matches */
17914 static
17915 ibool
innodb_monitor_validate_wildcard_name(const char * name)17916 innodb_monitor_validate_wildcard_name(
17917 /*==================================*/
17918 	const char*	name)	/*!< in: monitor counter namer */
17919 {
17920 	for (ulint i = 0; i < NUM_MONITOR; i++) {
17921 		if (!innobase_wildcasecmp(
17922 			srv_mon_get_name(static_cast<monitor_id_t>(i)), name)) {
17923 			return(TRUE);
17924 		}
17925 	}
17926 
17927 	return(FALSE);
17928 }
17929 /*************************************************************//**
17930 Validate the passed in monitor name, find and save the
17931 corresponding monitor name in the function parameter "save".
17932 @return 0 if monitor name is valid */
17933 static
17934 int
innodb_monitor_valid_byname(void * save,const char * name)17935 innodb_monitor_valid_byname(
17936 /*========================*/
17937 	void*			save,	/*!< out: immediate result
17938 					for update function */
17939 	const char*		name)	/*!< in: incoming monitor name */
17940 {
17941 	ulint		use;
17942 	monitor_info_t*	monitor_info;
17943 
17944 	if (!name) {
17945 		return(1);
17946 	}
17947 
17948 	use = innodb_monitor_id_by_name_get(name);
17949 
17950 	/* No monitor name matches, nor it is wildcard match */
17951 	if (use == MONITOR_NO_MATCH) {
17952 		return(1);
17953 	}
17954 
17955 	if (use < NUM_MONITOR) {
17956 		monitor_info = srv_mon_get_info((monitor_id_t) use);
17957 
17958 		/* If the monitor counter is marked with
17959 		MONITOR_GROUP_MODULE flag, then this counter
17960 		cannot be turned on/off individually, instead
17961 		it shall be turned on/off as a group using
17962 		its module name */
17963 		if ((monitor_info->monitor_type & MONITOR_GROUP_MODULE)
17964 		    && (!(monitor_info->monitor_type & MONITOR_MODULE))) {
17965 			sql_print_warning(
17966 				"Monitor counter '%s' cannot"
17967 				" be turned on/off individually."
17968 				" Please use its module name"
17969 				" to turn on/off the counters"
17970 				" in the module as a group.\n",
17971 				name);
17972 
17973 			return(1);
17974 		}
17975 
17976 	} else {
17977 		ut_a(use == MONITOR_WILDCARD_MATCH);
17978 
17979 		/* For wildcard match, if there is not a single monitor
17980 		counter name that matches, treat it as an invalid
17981 		value for the system configuration variables */
17982 		if (!innodb_monitor_validate_wildcard_name(name)) {
17983 			return(1);
17984 		}
17985 	}
17986 
17987 	/* Save the configure name for innodb_monitor_update() */
17988 	*static_cast<const char**>(save) = name;
17989 
17990 	return(0);
17991 }
17992 /*************************************************************//**
17993 Validate passed-in "value" is a valid monitor counter name.
17994 This function is registered as a callback with MySQL.
17995 @return 0 for valid name */
17996 static
17997 int
innodb_monitor_validate(THD *,st_mysql_sys_var *,void * save,struct st_mysql_value * value)17998 innodb_monitor_validate(
17999 /*====================*/
18000 	THD*, st_mysql_sys_var*,
18001 	void*				save,	/*!< out: immediate result
18002 						for update function */
18003 	struct st_mysql_value*		value)	/*!< in: incoming string */
18004 {
18005 	const char*	name;
18006 	char*		monitor_name;
18007 	char		buff[STRING_BUFFER_USUAL_SIZE];
18008 	int		len = sizeof(buff);
18009 	int		ret;
18010 
18011 	ut_a(save != NULL);
18012 	ut_a(value != NULL);
18013 
18014 	name = value->val_str(value, buff, &len);
18015 
18016 	/* monitor_name could point to memory from MySQL
18017 	or buff[]. Always dup the name to memory allocated
18018 	by InnoDB, so we can access it in another callback
18019 	function innodb_monitor_update() and free it appropriately */
18020 	if (name) {
18021 		monitor_name = my_strdup(//PSI_INSTRUMENT_ME,
18022                                          name, MYF(0));
18023 	} else {
18024 		return(1);
18025 	}
18026 
18027 	ret = innodb_monitor_valid_byname(save, monitor_name);
18028 
18029 	if (ret) {
18030 		/* Validation failed */
18031 		my_free(monitor_name);
18032 	} else {
18033 		/* monitor_name will be freed in separate callback function
18034 		innodb_monitor_update(). Assert "save" point to
18035 		the "monitor_name" variable */
18036 		ut_ad(*static_cast<char**>(save) == monitor_name);
18037 	}
18038 
18039 	return(ret);
18040 }
18041 
18042 /****************************************************************//**
18043 Update the system variable innodb_enable(disable/reset/reset_all)_monitor
18044 according to the "set_option" and turn on/off or reset specified monitor
18045 counter. */
18046 static
18047 void
innodb_monitor_update(THD * thd,void * var_ptr,const void * save,mon_option_t set_option,ibool free_mem)18048 innodb_monitor_update(
18049 /*==================*/
18050 	THD*			thd,		/*!< in: thread handle */
18051 	void*			var_ptr,	/*!< out: where the
18052 						formal string goes */
18053 	const void*		save,		/*!< in: immediate result
18054 						from check function */
18055 	mon_option_t		set_option,	/*!< in: the set option,
18056 						whether to turn on/off or
18057 						reset the counter */
18058 	ibool			free_mem)	/*!< in: whether we will
18059 						need to free the memory */
18060 {
18061 	monitor_info_t*	monitor_info;
18062 	ulint		monitor_id;
18063 	ulint		err_monitor = 0;
18064 	const char*	name;
18065 
18066 	ut_a(save != NULL);
18067 
18068 	name = *static_cast<const char*const*>(save);
18069 
18070 	if (!name) {
18071 		monitor_id = MONITOR_DEFAULT_START;
18072 	} else {
18073 		monitor_id = innodb_monitor_id_by_name_get(name);
18074 
18075 		/* Double check we have a valid monitor ID */
18076 		if (monitor_id == MONITOR_NO_MATCH) {
18077 			return;
18078 		}
18079 	}
18080 
18081 	if (monitor_id == MONITOR_DEFAULT_START) {
18082 		/* If user set the variable to "default", we will
18083 		print a message and make this set operation a "noop".
18084 		The check is being made here is because "set default"
18085 		does not go through validation function */
18086 		if (thd) {
18087 			push_warning_printf(
18088 				thd, Sql_condition::WARN_LEVEL_WARN,
18089 				ER_NO_DEFAULT,
18090 				"Default value is not defined for"
18091 				" this set option. Please specify"
18092 				" correct counter or module name.");
18093 		} else {
18094 			sql_print_error(
18095 				"Default value is not defined for"
18096 				" this set option. Please specify"
18097 				" correct counter or module name.\n");
18098 		}
18099 
18100 		if (var_ptr) {
18101 			*(const char**) var_ptr = NULL;
18102 		}
18103 	} else if (monitor_id == MONITOR_WILDCARD_MATCH) {
18104 		innodb_monitor_update_wildcard(name, set_option);
18105 	} else {
18106 		monitor_info = srv_mon_get_info(
18107 			static_cast<monitor_id_t>(monitor_id));
18108 
18109 		ut_a(monitor_info);
18110 
18111 		/* If monitor is already truned on, someone could already
18112 		collect monitor data, exit and ask user to turn off the
18113 		monitor before turn it on again. */
18114 		if (set_option == MONITOR_TURN_ON
18115 		    && MONITOR_IS_ON(monitor_id)) {
18116 			err_monitor = monitor_id;
18117 			goto exit;
18118 		}
18119 
18120 		if (var_ptr) {
18121 			*(const char**) var_ptr = monitor_info->monitor_name;
18122 		}
18123 
18124 		/* Depending on the monitor name is for a module or
18125 		a counter, process counters in the whole module or
18126 		individual counter. */
18127 		if (monitor_info->monitor_type & MONITOR_MODULE) {
18128 			srv_mon_set_module_control(
18129 				static_cast<monitor_id_t>(monitor_id),
18130 				set_option);
18131 		} else {
18132 			innodb_monitor_set_option(monitor_info, set_option);
18133 		}
18134 	}
18135 exit:
18136 	/* Only if we are trying to turn on a monitor that already
18137 	been turned on, we will set err_monitor. Print related
18138 	information */
18139 	if (err_monitor) {
18140 		sql_print_warning("InnoDB: Monitor %s is already enabled.",
18141 				  srv_mon_get_name((monitor_id_t) err_monitor));
18142 	}
18143 
18144 	if (free_mem && name) {
18145 		my_free((void*) name);
18146 	}
18147 
18148 	return;
18149 }
18150 
18151 /** Validate SET GLOBAL innodb_buffer_pool_filename.
18152 On Windows, file names with colon (:) are not allowed.
18153 @param thd   connection
18154 @param save  &srv_buf_dump_filename
18155 @param value new value to be validated
18156 @return	0 for valid name */
innodb_srv_buf_dump_filename_validate(THD * thd,st_mysql_sys_var *,void * save,st_mysql_value * value)18157 static int innodb_srv_buf_dump_filename_validate(THD *thd, st_mysql_sys_var*,
18158 						 void *save,
18159 						 st_mysql_value *value)
18160 {
18161   char buff[OS_FILE_MAX_PATH];
18162   int len= sizeof buff;
18163 
18164   if (const char *buf_name= value->val_str(value, buff, &len))
18165   {
18166 #ifdef _WIN32
18167     if (!is_filename_allowed(buf_name, len, FALSE))
18168     {
18169       push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18170 			  ER_WRONG_ARGUMENTS,
18171 			  "InnoDB: innodb_buffer_pool_filename "
18172 			  "cannot have colon (:) in the file name.");
18173       return 1;
18174     }
18175 #endif /* _WIN32 */
18176     if (buf_name == buff)
18177     {
18178       ut_ad(static_cast<size_t>(len) < sizeof buff);
18179       buf_name= thd_strmake(thd, buf_name, len);
18180     }
18181 
18182     *static_cast<const char**>(save)= buf_name;
18183     return 0;
18184   }
18185 
18186   return 1;
18187 }
18188 
18189 #ifdef UNIV_DEBUG
18190 static char* srv_buffer_pool_evict;
18191 
18192 /****************************************************************//**
18193 Evict all uncompressed pages of compressed tables from the buffer pool.
18194 Keep the compressed pages in the buffer pool.
18195 @return whether all uncompressed pages were evicted */
innodb_buffer_pool_evict_uncompressed()18196 static bool innodb_buffer_pool_evict_uncompressed()
18197 {
18198 	bool	all_evicted = true;
18199 
18200 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
18201 		buf_pool_t*	buf_pool = &buf_pool_ptr[i];
18202 
18203 		buf_pool_mutex_enter(buf_pool);
18204 
18205 		for (buf_block_t* block = UT_LIST_GET_LAST(
18206 			     buf_pool->unzip_LRU);
18207 		     block != NULL; ) {
18208 			buf_block_t*	prev_block = UT_LIST_GET_PREV(
18209 				unzip_LRU, block);
18210 			ut_ad(buf_block_get_state(block)
18211 			      == BUF_BLOCK_FILE_PAGE);
18212 			ut_ad(block->in_unzip_LRU_list);
18213 			ut_ad(block->page.in_LRU_list);
18214 
18215 			if (!buf_LRU_free_page(&block->page, false)) {
18216 				all_evicted = false;
18217 				block = prev_block;
18218 			} else {
18219 				/* Because buf_LRU_free_page() may release
18220 				and reacquire buf_pool_t::mutex, prev_block
18221 				may be invalid. */
18222 				block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
18223 			}
18224 		}
18225 
18226 		buf_pool_mutex_exit(buf_pool);
18227 	}
18228 
18229 	return(all_evicted);
18230 }
18231 
18232 /****************************************************************//**
18233 Called on SET GLOBAL innodb_buffer_pool_evict=...
18234 Handles some values specially, to evict pages from the buffer pool.
18235 SET GLOBAL innodb_buffer_pool_evict='uncompressed'
18236 evicts all uncompressed page frames of compressed tablespaces. */
18237 static
18238 void
innodb_buffer_pool_evict_update(THD *,st_mysql_sys_var *,void *,const void * save)18239 innodb_buffer_pool_evict_update(THD*, st_mysql_sys_var*, void*,
18240 				const void* save)
18241 {
18242 	if (const char* op = *static_cast<const char*const*>(save)) {
18243 		if (!strcmp(op, "uncompressed")) {
18244 			mysql_mutex_unlock(&LOCK_global_system_variables);
18245 			for (uint tries = 0; tries < 10000; tries++) {
18246 				if (innodb_buffer_pool_evict_uncompressed()) {
18247 					mysql_mutex_lock(
18248 						&LOCK_global_system_variables);
18249 					return;
18250 				}
18251 
18252 				os_thread_sleep(10000);
18253 			}
18254 
18255 			/* We failed to evict all uncompressed pages. */
18256 			ut_ad(0);
18257 		}
18258 	}
18259 }
18260 #endif /* UNIV_DEBUG */
18261 
18262 /****************************************************************//**
18263 Update the system variable innodb_monitor_enable and enable
18264 specified monitor counter.
18265 This function is registered as a callback with MySQL. */
18266 static
18267 void
innodb_enable_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18268 innodb_enable_monitor_update(
18269 /*=========================*/
18270 	THD*				thd,	/*!< in: thread handle */
18271 	st_mysql_sys_var*,
18272 	void*				var_ptr,/*!< out: where the
18273 						formal string goes */
18274 	const void*			save)	/*!< in: immediate result
18275 						from check function */
18276 {
18277 	innodb_monitor_update(thd, var_ptr, save, MONITOR_TURN_ON, TRUE);
18278 }
18279 
18280 /****************************************************************//**
18281 Update the system variable innodb_monitor_disable and turn
18282 off specified monitor counter. */
18283 static
18284 void
innodb_disable_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18285 innodb_disable_monitor_update(
18286 /*==========================*/
18287 	THD*				thd,	/*!< in: thread handle */
18288 	st_mysql_sys_var*,
18289 	void*				var_ptr,/*!< out: where the
18290 						formal string goes */
18291 	const void*			save)	/*!< in: immediate result
18292 						from check function */
18293 {
18294 	innodb_monitor_update(thd, var_ptr, save, MONITOR_TURN_OFF, TRUE);
18295 }
18296 
18297 /****************************************************************//**
18298 Update the system variable innodb_monitor_reset and reset
18299 specified monitor counter(s).
18300 This function is registered as a callback with MySQL. */
18301 static
18302 void
innodb_reset_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18303 innodb_reset_monitor_update(
18304 /*========================*/
18305 	THD*				thd,	/*!< in: thread handle */
18306 	st_mysql_sys_var*,
18307 	void*				var_ptr,/*!< out: where the
18308 						formal string goes */
18309 	const void*			save)	/*!< in: immediate result
18310 						from check function */
18311 {
18312 	innodb_monitor_update(thd, var_ptr, save, MONITOR_RESET_VALUE, TRUE);
18313 }
18314 
18315 /****************************************************************//**
18316 Update the system variable innodb_monitor_reset_all and reset
18317 all value related monitor counter.
18318 This function is registered as a callback with MySQL. */
18319 static
18320 void
innodb_reset_all_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18321 innodb_reset_all_monitor_update(
18322 /*============================*/
18323 	THD*				thd,	/*!< in: thread handle */
18324 	st_mysql_sys_var*,
18325 	void*				var_ptr,/*!< out: where the
18326 						formal string goes */
18327 	const void*			save)	/*!< in: immediate result
18328 						from check function */
18329 {
18330 	innodb_monitor_update(thd, var_ptr, save, MONITOR_RESET_ALL_VALUE,
18331 			      TRUE);
18332 }
18333 
18334 static
18335 void
innodb_defragment_frequency_update(THD *,st_mysql_sys_var *,void *,const void * save)18336 innodb_defragment_frequency_update(THD*, st_mysql_sys_var*, void*,
18337 				   const void* save)
18338 {
18339 	srv_defragment_frequency = (*static_cast<const uint*>(save));
18340 	srv_defragment_interval = 1000000000ULL / srv_defragment_frequency;
18341 }
18342 
my_strtok_r(char * str,const char * delim,char ** saveptr)18343 static inline char *my_strtok_r(char *str, const char *delim, char **saveptr)
18344 {
18345 #if defined _WIN32
18346 	return strtok_s(str, delim, saveptr);
18347 #else
18348 	return strtok_r(str, delim, saveptr);
18349 #endif
18350 }
18351 
18352 /****************************************************************//**
18353 Parse and enable InnoDB monitor counters during server startup.
18354 User can list the monitor counters/groups to be enable by specifying
18355 "loose-innodb_monitor_enable=monitor_name1;monitor_name2..."
18356 in server configuration file or at the command line. The string
18357 separate could be ";", "," or empty space. */
18358 static
18359 void
innodb_enable_monitor_at_startup(char * str)18360 innodb_enable_monitor_at_startup(
18361 /*=============================*/
18362 	char*	str)	/*!< in/out: monitor counter enable list */
18363 {
18364 	static const char*	sep = " ;,";
18365 	char*			last;
18366 
18367 	ut_a(str);
18368 
18369 	/* Walk through the string, and separate each monitor counter
18370 	and/or counter group name, and calling innodb_monitor_update()
18371 	if successfully updated. Please note that the "str" would be
18372 	changed by strtok_r() as it walks through it. */
18373 	for (char* option = my_strtok_r(str, sep, &last);
18374 	     option;
18375 	     option = my_strtok_r(NULL, sep, &last)) {
18376 		char*	option_name;
18377 		if (!innodb_monitor_valid_byname(&option_name, option)) {
18378 			innodb_monitor_update(NULL, NULL, &option,
18379 					      MONITOR_TURN_ON, FALSE);
18380 		} else {
18381 			sql_print_warning("Invalid monitor counter"
18382 					  " name: '%s'", option);
18383 		}
18384 	}
18385 }
18386 
18387 /****************************************************************//**
18388 Callback function for accessing the InnoDB variables from MySQL:
18389 SHOW VARIABLES. */
show_innodb_vars(THD *,SHOW_VAR * var,char *)18390 static int show_innodb_vars(THD*, SHOW_VAR* var, char*)
18391 {
18392 	innodb_export_status();
18393 	var->type = SHOW_ARRAY;
18394 	var->value = (char*) &innodb_status_variables;
18395 	//var->scope = SHOW_SCOPE_GLOBAL;
18396 
18397 	return(0);
18398 }
18399 
18400 /****************************************************************//**
18401 This function checks each index name for a table against reserved
18402 system default primary index name 'GEN_CLUST_INDEX'. If a name
18403 matches, this function pushes an warning message to the client,
18404 and returns true.
18405 @return true if the index name matches the reserved name */
18406 bool
innobase_index_name_is_reserved(THD * thd,const KEY * key_info,ulint num_of_keys)18407 innobase_index_name_is_reserved(
18408 /*============================*/
18409 	THD*		thd,		/*!< in/out: MySQL connection */
18410 	const KEY*	key_info,	/*!< in: Indexes to be created */
18411 	ulint		num_of_keys)	/*!< in: Number of indexes to
18412 					be created. */
18413 {
18414 	const KEY*	key;
18415 	uint		key_num;	/* index number */
18416 
18417 	for (key_num = 0; key_num < num_of_keys; key_num++) {
18418 		key = &key_info[key_num];
18419 
18420 		if (innobase_strcasecmp(key->name.str,
18421 					innobase_index_reserve_name) == 0) {
18422 			/* Push warning to mysql */
18423 			push_warning_printf(thd,
18424 					    Sql_condition::WARN_LEVEL_WARN,
18425 					    ER_WRONG_NAME_FOR_INDEX,
18426 					    "Cannot Create Index with name"
18427 					    " '%s'. The name is reserved"
18428 					    " for the system default primary"
18429 					    " index.",
18430 					    innobase_index_reserve_name);
18431 
18432 			my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
18433 				 innobase_index_reserve_name);
18434 
18435 			return(true);
18436 		}
18437 	}
18438 
18439 	return(false);
18440 }
18441 
18442 /** Retrieve the FTS Relevance Ranking result for doc with doc_id
18443 of m_prebuilt->fts_doc_id
18444 @param[in,out]	fts_hdl	FTS handler
18445 @return the relevance ranking value */
18446 static
18447 float
innobase_fts_retrieve_ranking(FT_INFO * fts_hdl)18448 innobase_fts_retrieve_ranking(
18449 	FT_INFO*	fts_hdl)
18450 {
18451 	fts_result_t*	result;
18452 	row_prebuilt_t*	ft_prebuilt;
18453 
18454 	result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
18455 
18456 	ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_prebuilt;
18457 
18458 	fts_ranking_t*  ranking = rbt_value(fts_ranking_t, result->current);
18459 	ft_prebuilt->fts_doc_id= ranking->doc_id;
18460 
18461 	return(ranking->rank);
18462 }
18463 
18464 /** Free the memory for the FTS handler
18465 @param[in,out]	fts_hdl	FTS handler */
18466 static
18467 void
innobase_fts_close_ranking(FT_INFO * fts_hdl)18468 innobase_fts_close_ranking(
18469 	FT_INFO*	fts_hdl)
18470 {
18471 	fts_result_t*	result;
18472 
18473 	result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
18474 
18475 	fts_query_free_result(result);
18476 
18477 	my_free((uchar*) fts_hdl);
18478 }
18479 
18480 /** Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
18481 of m_prebuilt->fts_doc_id
18482 @param[in,out]	fts_hdl	FTS handler
18483 @return the relevance ranking value */
18484 static
18485 float
innobase_fts_find_ranking(FT_INFO * fts_hdl,uchar *,uint)18486 innobase_fts_find_ranking(FT_INFO* fts_hdl, uchar*, uint)
18487 {
18488 	fts_result_t*	result;
18489 	row_prebuilt_t*	ft_prebuilt;
18490 
18491 	ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_prebuilt;
18492 	result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
18493 
18494 	/* Retrieve the ranking value for doc_id with value of
18495 	m_prebuilt->fts_doc_id */
18496 	return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id));
18497 }
18498 
18499 #ifdef UNIV_DEBUG
18500 static my_bool	innodb_background_drop_list_empty = TRUE;
18501 static my_bool	innodb_log_checkpoint_now = TRUE;
18502 static my_bool	innodb_buf_flush_list_now = TRUE;
18503 static uint	innodb_merge_threshold_set_all_debug
18504 	= DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
18505 
18506 /** Wait for the background drop list to become empty. */
18507 static
18508 void
wait_background_drop_list_empty(THD *,st_mysql_sys_var *,void *,const void *)18509 wait_background_drop_list_empty(THD*, st_mysql_sys_var*, void*, const void*)
18510 {
18511 	row_wait_for_background_drop_list_empty();
18512 }
18513 
18514 /****************************************************************//**
18515 Force innodb to checkpoint. */
18516 static
18517 void
checkpoint_now_set(THD *,st_mysql_sys_var *,void *,const void * save)18518 checkpoint_now_set(THD*, st_mysql_sys_var*, void*, const void* save)
18519 {
18520 	if (*(my_bool*) save) {
18521 		mysql_mutex_unlock(&LOCK_global_system_variables);
18522 
18523 		while (log_sys.last_checkpoint_lsn
18524 		       + SIZE_OF_MLOG_CHECKPOINT
18525 		       + (log_sys.append_on_checkpoint != NULL
18526 			  ? log_sys.append_on_checkpoint->size() : 0)
18527 		       < log_sys.lsn) {
18528 			log_make_checkpoint();
18529 			fil_flush_file_spaces(FIL_TYPE_LOG);
18530 		}
18531 
18532 		dberr_t err = fil_write_flushed_lsn(log_sys.lsn);
18533 
18534 		if (err != DB_SUCCESS) {
18535 			ib::warn() << "Checkpoint set failed " << err;
18536 		}
18537 
18538 		mysql_mutex_lock(&LOCK_global_system_variables);
18539 	}
18540 }
18541 
18542 /****************************************************************//**
18543 Force a dirty pages flush now. */
18544 static
18545 void
buf_flush_list_now_set(THD *,st_mysql_sys_var *,void *,const void * save)18546 buf_flush_list_now_set(THD*, st_mysql_sys_var*, void*, const void* save)
18547 {
18548 	if (*(my_bool*) save) {
18549 		mysql_mutex_unlock(&LOCK_global_system_variables);
18550 		buf_flush_sync_all_buf_pools();
18551 		mysql_mutex_lock(&LOCK_global_system_variables);
18552 	}
18553 }
18554 
18555 /** Override current MERGE_THRESHOLD setting for all indexes at dictionary
18556 now.
18557 @param[in]	save	immediate result from check function */
18558 static
18559 void
innodb_merge_threshold_set_all_debug_update(THD *,st_mysql_sys_var *,void *,const void * save)18560 innodb_merge_threshold_set_all_debug_update(THD*, st_mysql_sys_var*, void*,
18561 					    const void* save)
18562 {
18563 	innodb_merge_threshold_set_all_debug
18564 		= (*static_cast<const uint*>(save));
18565 	dict_set_merge_threshold_all_debug(
18566 		innodb_merge_threshold_set_all_debug);
18567 }
18568 #endif /* UNIV_DEBUG */
18569 
18570 /** Find and Retrieve the FTS doc_id for the current result row
18571 @param[in,out]	fts_hdl	FTS handler
18572 @return the document ID */
18573 static
18574 ulonglong
innobase_fts_retrieve_docid(FT_INFO_EXT * fts_hdl)18575 innobase_fts_retrieve_docid(
18576 	FT_INFO_EXT*	fts_hdl)
18577 {
18578 	fts_result_t*	result;
18579 	row_prebuilt_t* ft_prebuilt;
18580 
18581 	ft_prebuilt = reinterpret_cast<NEW_FT_INFO *>(fts_hdl)->ft_prebuilt;
18582 	result = reinterpret_cast<NEW_FT_INFO *>(fts_hdl)->ft_result;
18583 
18584 	if (ft_prebuilt->read_just_key) {
18585 
18586 		fts_ranking_t* ranking =
18587 			rbt_value(fts_ranking_t, result->current);
18588 
18589 		return(ranking->doc_id);
18590 	}
18591 
18592 	return(ft_prebuilt->fts_doc_id);
18593 }
18594 
18595 /* These variables are never read by InnoDB or changed. They are a kind of
18596 dummies that are needed by the MySQL infrastructure to call
18597 buffer_pool_dump_now(), buffer_pool_load_now() and buffer_pool_load_abort()
18598 by the user by doing:
18599   SET GLOBAL innodb_buffer_pool_dump_now=ON;
18600   SET GLOBAL innodb_buffer_pool_load_now=ON;
18601   SET GLOBAL innodb_buffer_pool_load_abort=ON;
18602 Their values are read by MySQL and displayed to the user when the variables
18603 are queried, e.g.:
18604   SELECT @@innodb_buffer_pool_dump_now;
18605   SELECT @@innodb_buffer_pool_load_now;
18606   SELECT @@innodb_buffer_pool_load_abort; */
18607 static my_bool	innodb_buffer_pool_dump_now = FALSE;
18608 static my_bool	innodb_buffer_pool_load_now = FALSE;
18609 static my_bool	innodb_buffer_pool_load_abort = FALSE;
18610 
18611 /****************************************************************//**
18612 Trigger a dump of the buffer pool if innodb_buffer_pool_dump_now is set
18613 to ON. This function is registered as a callback with MySQL. */
18614 static
18615 void
buffer_pool_dump_now(THD * thd MY_ATTRIBUTE ((unused)),struct st_mysql_sys_var * var MY_ATTRIBUTE ((unused)),void * var_ptr MY_ATTRIBUTE ((unused)),const void * save)18616 buffer_pool_dump_now(
18617 /*=================*/
18618 	THD*				thd	/*!< in: thread handle */
18619 					MY_ATTRIBUTE((unused)),
18620 	struct st_mysql_sys_var*	var	/*!< in: pointer to system
18621 						variable */
18622 					MY_ATTRIBUTE((unused)),
18623 	void*				var_ptr	/*!< out: where the formal
18624 						string goes */
18625 					MY_ATTRIBUTE((unused)),
18626 	const void*			save)	/*!< in: immediate result from
18627 						check function */
18628 {
18629 	if (*(my_bool*) save && !srv_read_only_mode) {
18630 		mysql_mutex_unlock(&LOCK_global_system_variables);
18631 		buf_dump_start();
18632 		mysql_mutex_lock(&LOCK_global_system_variables);
18633 	}
18634 }
18635 
18636 /****************************************************************//**
18637 Trigger a load of the buffer pool if innodb_buffer_pool_load_now is set
18638 to ON. This function is registered as a callback with MySQL. */
18639 static
18640 void
buffer_pool_load_now(THD * thd MY_ATTRIBUTE ((unused)),struct st_mysql_sys_var * var MY_ATTRIBUTE ((unused)),void * var_ptr MY_ATTRIBUTE ((unused)),const void * save)18641 buffer_pool_load_now(
18642 /*=================*/
18643 	THD*				thd	/*!< in: thread handle */
18644 					MY_ATTRIBUTE((unused)),
18645 	struct st_mysql_sys_var*	var	/*!< in: pointer to system
18646 						variable */
18647 					MY_ATTRIBUTE((unused)),
18648 	void*				var_ptr	/*!< out: where the formal
18649 						string goes */
18650 					MY_ATTRIBUTE((unused)),
18651 	const void*			save)	/*!< in: immediate result from
18652 						check function */
18653 {
18654 	if (*(my_bool*) save && !srv_read_only_mode) {
18655 		mysql_mutex_unlock(&LOCK_global_system_variables);
18656 		buf_load_start();
18657 		mysql_mutex_lock(&LOCK_global_system_variables);
18658 	}
18659 }
18660 
18661 /****************************************************************//**
18662 Abort a load of the buffer pool if innodb_buffer_pool_load_abort
18663 is set to ON. This function is registered as a callback with MySQL. */
18664 static
18665 void
buffer_pool_load_abort(THD * thd MY_ATTRIBUTE ((unused)),struct st_mysql_sys_var * var MY_ATTRIBUTE ((unused)),void * var_ptr MY_ATTRIBUTE ((unused)),const void * save)18666 buffer_pool_load_abort(
18667 /*===================*/
18668 	THD*				thd	/*!< in: thread handle */
18669 					MY_ATTRIBUTE((unused)),
18670 	struct st_mysql_sys_var*	var	/*!< in: pointer to system
18671 						variable */
18672 					MY_ATTRIBUTE((unused)),
18673 	void*				var_ptr	/*!< out: where the formal
18674 						string goes */
18675 					MY_ATTRIBUTE((unused)),
18676 	const void*			save)	/*!< in: immediate result from
18677 						check function */
18678 {
18679 	if (*(my_bool*) save && !srv_read_only_mode) {
18680 		mysql_mutex_unlock(&LOCK_global_system_variables);
18681 		buf_load_abort();
18682 		mysql_mutex_lock(&LOCK_global_system_variables);
18683 	}
18684 }
18685 
18686 /****************************************************************//**
18687 Update the system variable innodb_log_write_ahead_size using the "saved"
18688 value. This function is registered as a callback with MySQL. */
18689 static
18690 void
innodb_log_write_ahead_size_update(THD * thd,st_mysql_sys_var *,void *,const void * save)18691 innodb_log_write_ahead_size_update(
18692 /*===============================*/
18693 	THD*				thd,	/*!< in: thread handle */
18694 	st_mysql_sys_var*, void*,
18695 	const void*			save)	/*!< in: immediate result
18696 						from check function */
18697 {
18698 	ulong	val = OS_FILE_LOG_BLOCK_SIZE;
18699 	ulong	in_val = *static_cast<const ulong*>(save);
18700 
18701 	while (val < in_val) {
18702 		val = val * 2;
18703 	}
18704 
18705 	if (val > srv_page_size) {
18706 		val = srv_page_size;
18707 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18708 				    ER_WRONG_ARGUMENTS,
18709 				    "innodb_log_write_ahead_size cannot"
18710 				    " be set higher than innodb_page_size.");
18711 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18712 				    ER_WRONG_ARGUMENTS,
18713 				    "Setting innodb_log_write_ahead_size"
18714 				    " to %lu",
18715 				    srv_page_size);
18716 	} else if (val != in_val) {
18717 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18718 				    ER_WRONG_ARGUMENTS,
18719 				    "innodb_log_write_ahead_size should be"
18720 				    " set 2^n value and larger than 512.");
18721 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18722 				    ER_WRONG_ARGUMENTS,
18723 				    "Setting innodb_log_write_ahead_size"
18724 				    " to %lu",
18725 				    val);
18726 	}
18727 
18728 	srv_log_write_ahead_size = val;
18729 }
18730 
18731 /** Update innodb_status_output or innodb_status_output_locks,
18732 which control InnoDB "status monitor" output to the error log.
18733 @param[out]	var	current value
18734 @param[in]	save	to-be-assigned value */
18735 static
18736 void
innodb_status_output_update(THD *,st_mysql_sys_var *,void * var,const void * save)18737 innodb_status_output_update(THD*,st_mysql_sys_var*,void*var,const void*save)
18738 {
18739   *static_cast<my_bool*>(var)= *static_cast<const my_bool*>(save);
18740   if (srv_monitor_event)
18741   {
18742     mysql_mutex_unlock(&LOCK_global_system_variables);
18743     /* Wakeup server monitor thread. */
18744     os_event_set(srv_monitor_event);
18745     mysql_mutex_lock(&LOCK_global_system_variables);
18746   }
18747 }
18748 
18749 /** Update the system variable innodb_encryption_threads.
18750 @param[in]	save	to-be-assigned value */
18751 static
18752 void
innodb_encryption_threads_update(THD *,st_mysql_sys_var *,void *,const void * save)18753 innodb_encryption_threads_update(THD*,st_mysql_sys_var*,void*,const void*save)
18754 {
18755 	mysql_mutex_unlock(&LOCK_global_system_variables);
18756 	fil_crypt_set_thread_cnt(*static_cast<const uint*>(save));
18757 	mysql_mutex_lock(&LOCK_global_system_variables);
18758 }
18759 
18760 /** Update the system variable innodb_encryption_rotate_key_age.
18761 @param[in]	save	to-be-assigned value */
18762 static
18763 void
innodb_encryption_rotate_key_age_update(THD *,st_mysql_sys_var *,void *,const void * save)18764 innodb_encryption_rotate_key_age_update(THD*, st_mysql_sys_var*, void*,
18765 					const void* save)
18766 {
18767 	mysql_mutex_unlock(&LOCK_global_system_variables);
18768 	fil_crypt_set_rotate_key_age(*static_cast<const uint*>(save));
18769 	mysql_mutex_lock(&LOCK_global_system_variables);
18770 }
18771 
18772 /** Update the system variable innodb_encryption_rotation_iops.
18773 @param[in]	save	to-be-assigned value */
18774 static
18775 void
innodb_encryption_rotation_iops_update(THD *,st_mysql_sys_var *,void *,const void * save)18776 innodb_encryption_rotation_iops_update(THD*, st_mysql_sys_var*, void*,
18777 				       const void* save)
18778 {
18779 	mysql_mutex_unlock(&LOCK_global_system_variables);
18780 	fil_crypt_set_rotation_iops(*static_cast<const uint*>(save));
18781 	mysql_mutex_lock(&LOCK_global_system_variables);
18782 }
18783 
18784 /** Update the system variable innodb_encrypt_tables.
18785 @param[in]	save	to-be-assigned value */
18786 static
18787 void
innodb_encrypt_tables_update(THD *,st_mysql_sys_var *,void *,const void * save)18788 innodb_encrypt_tables_update(THD*, st_mysql_sys_var*, void*, const void* save)
18789 {
18790 	mysql_mutex_unlock(&LOCK_global_system_variables);
18791 	fil_crypt_set_encrypt_tables(*static_cast<const ulong*>(save));
18792 	mysql_mutex_lock(&LOCK_global_system_variables);
18793 }
18794 
18795 /** Update the innodb_log_checksums parameter.
18796 @param[in,out]	thd	client connection
18797 @param[out]	var_ptr	current value
18798 @param[in]	save	immediate result from check function */
18799 static
18800 void
innodb_log_checksums_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18801 innodb_log_checksums_update(THD* thd, st_mysql_sys_var*, void* var_ptr,
18802 			    const void* save)
18803 {
18804 	*static_cast<my_bool*>(var_ptr) = innodb_log_checksums_func_update(
18805 		thd, *static_cast<const my_bool*>(save));
18806 }
18807 
18808 #ifdef UNIV_DEBUG
18809 static
18810 void
innobase_debug_sync_callback(srv_slot_t * slot,const void * value)18811 innobase_debug_sync_callback(srv_slot_t *slot, const void *value)
18812 {
18813 	const char *value_str = *static_cast<const char* const*>(value);
18814 	size_t len = strlen(value_str) + 1;
18815 
18816 
18817 	// One allocation for list node object and value.
18818 	void *buf = ut_malloc_nokey(sizeof(srv_slot_t::debug_sync_t) + len-1);
18819 	srv_slot_t::debug_sync_t *sync = new(buf) srv_slot_t::debug_sync_t();
18820 	strcpy(sync->str, value_str);
18821 
18822 	rw_lock_x_lock(&slot->debug_sync_lock);
18823 	UT_LIST_ADD_LAST(slot->debug_sync, sync);
18824 	rw_lock_x_unlock(&slot->debug_sync_lock);
18825 }
18826 static
18827 void
innobase_debug_sync_set(THD * thd,st_mysql_sys_var *,void *,const void * value)18828 innobase_debug_sync_set(THD *thd, st_mysql_sys_var*, void *, const void *value)
18829 {
18830 	srv_for_each_thread(SRV_WORKER, innobase_debug_sync_callback, value);
18831 	srv_for_each_thread(SRV_PURGE, innobase_debug_sync_callback, value);
18832 }
18833 #endif
18834 
18835 static SHOW_VAR innodb_status_variables_export[]= {
18836 	{"Innodb", (char*) &show_innodb_vars, SHOW_FUNC},
18837 	{NullS, NullS, SHOW_LONG}
18838 };
18839 
18840 static struct st_mysql_storage_engine innobase_storage_engine=
18841 { MYSQL_HANDLERTON_INTERFACE_VERSION };
18842 
18843 #ifdef WITH_WSREP
18844 static
18845 void
wsrep_abort_slave_trx(THD * bf_thd,THD * victim_thd)18846 wsrep_abort_slave_trx(
18847 	THD* bf_thd,
18848 	THD* victim_thd)
18849 {
18850   wsrep_seqno_t bf_seqno= wsrep_thd_trx_seqno(bf_thd);
18851   wsrep_seqno_t victim_seqno= wsrep_thd_trx_seqno(victim_thd);
18852 
18853   WSREP_ERROR("wsrep_abort_slave_trx: BF Aborter %s thread: %ld "
18854 	      "seqno: %lld query_state: %s conflict_state: %s "
18855 	      "exec mode %s query: %s",
18856 	      wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
18857 	      thd_get_thread_id(bf_thd),
18858 	      bf_seqno,
18859 	      wsrep_thd_query_state_str(bf_thd),
18860 	      wsrep_thd_conflict_state_str(bf_thd),
18861 	      wsrep_thd_exec_mode_str(bf_thd),
18862 	      wsrep_thd_query(bf_thd));
18863 
18864   WSREP_ERROR("wsrep_abort_slave_trx: Victim %s thread: %ld "
18865 	      "seqno: %lld query_state: %s  conflict_state: %s "
18866 	      "exec mode %s query: %s",
18867 	      wsrep_thd_is_BF(victim_thd, false) ? "BF" : "normal",
18868 	      thd_get_thread_id(victim_thd),
18869 	      wsrep_thd_trx_seqno(victim_thd),
18870 	      wsrep_thd_query_state_str(victim_thd),
18871 	      wsrep_thd_conflict_state_str(victim_thd),
18872 	      wsrep_thd_exec_mode_str(victim_thd),
18873 	      wsrep_thd_query(victim_thd));
18874 
18875   WSREP_ERROR("Trx %lld tries to abort slave trx %lld. This could be "
18876 	      "caused by:\n\t"
18877 	      "1) unsupported configuration options combination, please check documentation.\n\t"
18878 	      "2) a bug in the code.\n\t"
18879 	      "3) a database corruption.\n Node consistency compromized, "
18880 	      "need to abort. Restart the node to resync with cluster.",
18881 	      (long long)bf_seqno, (long long)victim_seqno);
18882   abort();
18883 }
18884 
18885 /** This function is used to kill one transaction in BF. */
18886 static
18887 void
wsrep_kill_victim(MYSQL_THD const bf_thd,const trx_t * const bf_trx,MYSQL_THD thd,trx_t * victim_trx,my_bool signal)18888 wsrep_kill_victim(
18889 	MYSQL_THD const bf_thd,
18890 	const trx_t* const bf_trx,
18891 	MYSQL_THD thd,
18892 	trx_t* victim_trx,
18893 	my_bool signal)
18894 {
18895 	ut_ad(bf_thd);
18896 	ut_ad(thd);
18897 	ut_ad(victim_trx);
18898 	ut_ad(lock_mutex_own());
18899 	ut_ad(trx_mutex_own(victim_trx));
18900 
18901 	DBUG_ENTER("wsrep_kill_victim");
18902 
18903 	const int64_t bf_seqno= wsrep_thd_trx_seqno(bf_thd);
18904 
18905 	if (wsrep_thd_query_state(thd) == QUERY_EXITING) {
18906 		WSREP_DEBUG("kill trx EXITING for " TRX_ID_FMT,
18907 			    victim_trx->id);
18908 		wsrep_thd_UNLOCK(thd);
18909 		DBUG_VOID_RETURN;
18910 	}
18911 
18912 	if (wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
18913 		WSREP_DEBUG("withdraw for BF trx: " TRX_ID_FMT
18914 			    ", state: %s exec %s",
18915 			    victim_trx->id,
18916 			    wsrep_thd_conflict_state_str(thd),
18917 			    wsrep_thd_exec_mode_str(thd));
18918 	}
18919 
18920 	switch (wsrep_thd_get_conflict_state(thd)) {
18921 	case NO_CONFLICT:
18922 		/* This will cause any call to innobase_kill_query()
18923 		for this thd to bail out. */
18924 		wsrep_thd_set_conflict_state(thd, MUST_ABORT);
18925 		break;
18926         case MUST_ABORT:
18927 		WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state",
18928 			    victim_trx->id);
18929 		wsrep_thd_awake(thd, signal);
18930 		wsrep_thd_UNLOCK(thd);
18931 		DBUG_VOID_RETURN;
18932 		break;
18933 	case ABORTED:
18934 	case ABORTING: // fall through
18935 	default:
18936 		WSREP_DEBUG("victim " TRX_ID_FMT " in state %s",
18937 			    victim_trx->id,
18938 			    wsrep_thd_conflict_state_str(thd));
18939 		wsrep_thd_UNLOCK(thd);
18940 		DBUG_VOID_RETURN;
18941 		break;
18942 	}
18943 
18944 	switch (wsrep_thd_query_state(thd)) {
18945 	case QUERY_COMMITTING:
18946 	{
18947 		enum wsrep_status rcode;
18948 
18949 		WSREP_DEBUG("kill query for: %ld",
18950 			    thd_get_thread_id(thd));
18951 		WSREP_DEBUG("kill trx QUERY_COMMITTING for " TRX_ID_FMT,
18952 			    victim_trx->id);
18953 
18954 		if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
18955 			wsrep_abort_slave_trx(bf_thd, thd);
18956 		} else {
18957 			wsrep_t *wsrep= get_wsrep();
18958 			rcode = wsrep->abort_pre_commit(
18959 				wsrep, bf_seqno,
18960 				(wsrep_trx_id_t)wsrep_thd_ws_handle(thd)->trx_id
18961 			);
18962 
18963 			switch (rcode) {
18964 			case WSREP_WARNING:
18965 				WSREP_DEBUG("cancel commit warning: "
18966 					    TRX_ID_FMT,
18967 					    victim_trx->id);
18968 				wsrep_thd_awake(thd, signal);
18969 				wsrep_thd_UNLOCK(thd);
18970 				DBUG_VOID_RETURN;
18971 				break;
18972 			case WSREP_OK:
18973 				break;
18974 			default:
18975 				WSREP_ERROR(
18976 					"cancel commit bad exit: %d "
18977 					TRX_ID_FMT,
18978 					rcode, victim_trx->id);
18979 				/* unable to interrupt, must abort */
18980 				/* note: kill_mysql() will block, if we cannot.
18981 				 * kill the lock holder first.
18982 				 */
18983 				abort();
18984 				break;
18985 			}
18986 		}
18987 		wsrep_thd_awake(thd, signal);
18988 		wsrep_thd_UNLOCK(thd);
18989 		break;
18990 	}
18991 	case QUERY_EXEC:
18992 	{
18993 		/* it is possible that victim trx is itself waiting for some
18994 		 * other lock. We need to cancel this waiting
18995 		 */
18996 		WSREP_DEBUG("kill trx QUERY_EXEC for " TRX_ID_FMT,
18997 			    victim_trx->id);
18998 
18999 		if (victim_trx->lock.wait_lock) {
19000 			WSREP_DEBUG("victim has wait flag: %ld",
19001 				    thd_get_thread_id(thd));
19002 			lock_t*  wait_lock = victim_trx->lock.wait_lock;
19003 
19004 			if (wait_lock) {
19005 				WSREP_DEBUG("canceling wait lock");
19006 				victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
19007 				lock_cancel_waiting_and_release(wait_lock);
19008 			}
19009 
19010 			wsrep_thd_awake(thd, signal);
19011 			wsrep_thd_UNLOCK(thd);
19012 		} else {
19013 			/* abort currently executing query */
19014 			WSREP_DEBUG("kill query for: %ld",
19015 				    thd_get_thread_id(thd));
19016 
19017 			/* for BF thd, we need to prevent him from committing */
19018 			if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
19019 				wsrep_abort_slave_trx(bf_thd, thd);
19020 			}
19021 
19022 			/* Note that innobase_kill_query will take lock_mutex
19023 			and trx_mutex */
19024 			wsrep_thd_awake(thd, signal);
19025 			wsrep_thd_UNLOCK(thd);
19026 		}
19027 		break;
19028 	}
19029 	case QUERY_IDLE:
19030 	{
19031 		WSREP_DEBUG("kill IDLE for " TRX_ID_FMT, victim_trx->id);
19032 
19033 		if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
19034 			WSREP_DEBUG("kill BF IDLE, seqno: %lld",
19035 				    wsrep_thd_trx_seqno(thd));
19036 			wsrep_abort_slave_trx(bf_thd, thd);
19037 		}
19038 
19039                 /* This will lock thd from proceeding after net_read() and
19040 		will cause any call to innobase_kill_query() for this
19041 		thd to bail out. */
19042 		wsrep_thd_set_conflict_state(thd, ABORTING);
19043 		wsrep_lock_rollback();
19044 
19045 		if (wsrep_aborting_thd_contains(thd)) {
19046 			WSREP_WARN("duplicate thd aborter %lu",
19047 			           thd_get_thread_id(thd));
19048 		} else {
19049 			wsrep_aborting_thd_enqueue(thd);
19050 			WSREP_DEBUG("enqueuing trx abort for (%lu)",
19051 				    thd_get_thread_id(thd));
19052 		}
19053 
19054 		WSREP_DEBUG("signaling aborter");
19055 		wsrep_unlock_rollback();
19056 		wsrep_thd_UNLOCK(thd);
19057 		break;
19058 	}
19059 	default:
19060 		WSREP_WARN("bad wsrep query state: %d",
19061 			  wsrep_thd_query_state(thd));
19062 		ut_error;
19063 	}
19064 	DBUG_VOID_RETURN;
19065 }
19066 
19067 /*******************************************************************
19068 This function is used to kill one transaction in BF. */
19069 void
wsrep_innobase_kill_one_trx(MYSQL_THD const bf_thd,const trx_t * const bf_trx,trx_t * victim_trx,my_bool signal)19070 wsrep_innobase_kill_one_trx(
19071 	MYSQL_THD const bf_thd,
19072 	const trx_t * const bf_trx,
19073 	trx_t *victim_trx,
19074 	my_bool signal)
19075 {
19076   ut_ad(bf_thd);
19077   ut_ad(victim_trx);
19078   ut_ad(lock_mutex_own());
19079   ut_ad(trx_mutex_own(victim_trx));
19080 
19081   DBUG_ENTER("wsrep_innobase_kill_one_trx");
19082   THD *thd= (THD *) victim_trx->mysql_thd;
19083 
19084   /* Here we need to lock THD::LOCK_thd_data to protect from
19085   concurrent usage or disconnect or delete. */
19086   DEBUG_SYNC(bf_thd, "wsrep_before_BF_victim_lock");
19087   wsrep_thd_LOCK(thd);
19088   DEBUG_SYNC(bf_thd, "wsrep_after_BF_victim_lock");
19089 
19090   WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
19091 
19092   WSREP_DEBUG("wsrep_innobase_kill_one_trx: Aborter %s "
19093 	      "trx_id: " TRX_ID_FMT " thread: %ld "
19094 	      "seqno: %lld query_state: %s conflict_state: %s "
19095 	      "exec mode %s query: %s",
19096 	      wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
19097 	      bf_trx ? bf_trx->id : TRX_ID_MAX,
19098 	      thd_get_thread_id(bf_thd),
19099 	      wsrep_thd_trx_seqno(bf_thd),
19100 	      wsrep_thd_query_state_str(bf_thd),
19101 	      wsrep_thd_conflict_state_str(bf_thd),
19102 	      wsrep_thd_exec_mode_str(bf_thd),
19103 	      wsrep_thd_query(bf_thd));
19104 
19105   WSREP_DEBUG("wsrep_innobase_kill_one_trx: Victim %s "
19106 	      "trx_id: " TRX_ID_FMT " thread: %ld "
19107 	      "seqno: %lld query_state: %s  conflict_state: %s "
19108 	      "exec mode %s query: %s",
19109 	      wsrep_thd_is_BF(thd, false) ? "BF" : "normal",
19110 	      victim_trx->id,
19111 	      thd_get_thread_id(thd),
19112 	      wsrep_thd_trx_seqno(thd),
19113 	      wsrep_thd_query_state_str(thd),
19114 	      wsrep_thd_conflict_state_str(thd),
19115 	      wsrep_thd_exec_mode_str(thd),
19116 	      wsrep_thd_query(thd));
19117 
19118   wsrep_kill_victim(bf_thd, bf_trx, thd, victim_trx, signal);
19119   DBUG_VOID_RETURN;
19120 }
19121 
19122 static
19123 void
wsrep_abort_transaction(handlerton *,THD * bf_thd,THD * victim_thd,my_bool signal)19124 wsrep_abort_transaction(
19125 	handlerton*,
19126 	THD *bf_thd,
19127 	THD *victim_thd,
19128 	my_bool signal)
19129 {
19130   DBUG_ENTER("wsrep_abort_transaction");
19131   /* Note that victim thd is protected with
19132   THD::LOCK_thd_data here. */
19133   trx_t* victim_trx= thd_to_trx(victim_thd);
19134   trx_t* bf_trx= thd_to_trx(bf_thd);
19135 
19136   WSREP_DEBUG("wsrep_abort_transaction: BF:"
19137 	      " thread %ld query_state %s conflict_state %s"
19138 	      " exec %s query %s trx " TRX_ID_FMT,
19139 	      thd_get_thread_id(bf_thd),
19140 	      wsrep_thd_query_state_str(bf_thd),
19141 	      wsrep_thd_conflict_state_str(bf_thd),
19142 	      wsrep_thd_exec_mode_str(bf_thd),
19143 	      wsrep_thd_query(bf_thd),
19144 	      bf_trx ? bf_trx->id : 0);
19145 
19146   WSREP_DEBUG("wsrep_abort_transaction: victim:"
19147 	      " thread %ld query_state %s conflict_state %s"
19148 	      " exec %s query %s trx " TRX_ID_FMT,
19149 	      thd_get_thread_id(victim_thd),
19150 	      wsrep_thd_query_state_str(victim_thd),
19151 	      wsrep_thd_conflict_state_str(victim_thd),
19152 	      wsrep_thd_exec_mode_str(victim_thd),
19153 	      wsrep_thd_query(victim_thd),
19154 	      victim_trx ? victim_trx->id : 0);
19155 
19156   if (victim_trx) {
19157     lock_mutex_enter();
19158     trx_mutex_enter(victim_trx);
19159     wsrep_kill_victim(bf_thd, bf_trx, victim_thd, victim_trx, signal);
19160     lock_mutex_exit();
19161     trx_mutex_exit(victim_trx);
19162     wsrep_srv_conc_cancel_wait(victim_trx);
19163   } else {
19164     wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT);
19165     wsrep_thd_awake(victim_thd, signal);
19166     wsrep_thd_UNLOCK(victim_thd);
19167   }
19168 
19169   DBUG_VOID_RETURN;
19170 }
19171 
19172 static
19173 int
innobase_wsrep_set_checkpoint(handlerton * hton,const XID * xid)19174 innobase_wsrep_set_checkpoint(
19175 /*==========================*/
19176 	handlerton* hton,
19177 	const XID* xid)
19178 {
19179 	DBUG_ASSERT(hton == innodb_hton_ptr);
19180 
19181 	if (wsrep_is_wsrep_xid(xid)) {
19182 
19183 		trx_rseg_update_wsrep_checkpoint(xid);
19184 		innobase_flush_logs(hton, false);
19185 		return 0;
19186 	} else {
19187 		return 1;
19188 	}
19189 }
19190 
19191 static
19192 int
innobase_wsrep_get_checkpoint(handlerton * hton,XID * xid)19193 innobase_wsrep_get_checkpoint(
19194 /*==========================*/
19195 	handlerton* hton,
19196 	XID* xid)
19197 {
19198 	DBUG_ASSERT(hton == innodb_hton_ptr);
19199         trx_rseg_read_wsrep_checkpoint(*xid);
19200         return 0;
19201 }
19202 
wsrep_fake_trx_id(handlerton *,THD * thd)19203 static void wsrep_fake_trx_id(handlerton *, THD *thd)
19204 {
19205 	trx_id_t trx_id = trx_sys.get_new_trx_id();
19206 	WSREP_DEBUG("innodb fake trx id: " TRX_ID_FMT " thd: %s",
19207 		    trx_id, wsrep_thd_query(thd));
19208 	wsrep_ws_handle_for_trx(wsrep_thd_ws_handle(thd), trx_id);
19209 }
19210 
19211 #endif /* WITH_WSREP */
19212 
innodb_idle_flush_pct_update(THD * thd,st_mysql_sys_var * var,void *,const void * save)19213 static void innodb_idle_flush_pct_update(THD *thd, st_mysql_sys_var *var,
19214                                          void*, const void *save)
19215 {
19216   innodb_idle_flush_pct = *static_cast<const ulong*>(save);
19217   push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
19218                HA_ERR_WRONG_COMMAND, deprecated_idle_flush_pct);
19219 }
19220 
19221 /* plugin options */
19222 
19223 static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm,
19224   PLUGIN_VAR_RQCMDARG,
19225   "The algorithm InnoDB uses for page checksumming. Possible values are"
19226   " CRC32 (hardware accelerated if the CPU supports it)"
19227     " write crc32, allow any of the other checksums to match when reading;"
19228   " STRICT_CRC32"
19229     " write crc32, do not allow other algorithms to match when reading;"
19230   " INNODB"
19231     " write a software calculated checksum, allow any other checksums"
19232     " to match when reading;"
19233   " STRICT_INNODB"
19234     " write a software calculated checksum, do not allow other algorithms"
19235     " to match when reading;"
19236   " NONE"
19237     " write a constant magic number, do not do any checksum verification"
19238     " when reading (same as innodb_checksums=OFF);"
19239   " STRICT_NONE"
19240     " write a constant magic number, do not allow values other than that"
19241     " magic number when reading;"
19242   " Files updated when this option is set to crc32 or strict_crc32 will"
19243   " not be readable by MariaDB versions older than 10.0.4",
19244   NULL, innodb_checksum_algorithm_update, SRV_CHECKSUM_ALGORITHM_CRC32,
19245   &innodb_checksum_algorithm_typelib);
19246 
19247 static MYSQL_SYSVAR_BOOL(log_checksums, innodb_log_checksums,
19248   PLUGIN_VAR_RQCMDARG,
19249   "DEPRECATED. Whether to require checksums for InnoDB redo log blocks.",
19250   NULL, innodb_log_checksums_update, TRUE);
19251 
19252 static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums,
19253   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19254   "DEPRECATED. Use innodb_checksum_algorithm=NONE instead of setting"
19255   " this to OFF."
19256   " Enable InnoDB checksums validation (enabled by default)."
19257   " Disable with --skip-innodb-checksums.",
19258   NULL, NULL, TRUE);
19259 
19260 static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
19261   PLUGIN_VAR_READONLY,
19262   "The common part for InnoDB table spaces.",
19263   NULL, NULL, NULL);
19264 
19265 static MYSQL_SYSVAR_BOOL(doublewrite, srv_use_doublewrite_buf,
19266   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19267   "Enable InnoDB doublewrite buffer (enabled by default)."
19268   " Disable with --skip-innodb-doublewrite.",
19269   NULL, NULL, TRUE);
19270 
19271 static MYSQL_SYSVAR_BOOL(use_atomic_writes, srv_use_atomic_writes,
19272   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19273   "Enable atomic writes, instead of using the doublewrite buffer, for files "
19274   "on devices that supports atomic writes. "
19275   "This option only works on Linux with either FusionIO cards using "
19276   "the directFS filesystem or with Shannon cards using any file system.",
19277   NULL, NULL, TRUE);
19278 
19279 static MYSQL_SYSVAR_BOOL(stats_include_delete_marked,
19280   srv_stats_include_delete_marked,
19281   PLUGIN_VAR_OPCMDARG,
19282   "Include delete marked records when calculating persistent statistics",
19283   NULL, NULL, FALSE);
19284 
19285 static MYSQL_SYSVAR_ENUM(instant_alter_column_allowed,
19286 			 innodb_instant_alter_column_allowed,
19287   PLUGIN_VAR_RQCMDARG,
19288   "File format constraint for ALTER TABLE", NULL, NULL, 1/*add_last*/,
19289   &innodb_instant_alter_column_allowed_typelib);
19290 
19291 static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
19292   PLUGIN_VAR_RQCMDARG,
19293   "Number of IOPs the server can do. Tunes the background IO rate",
19294   NULL, innodb_io_capacity_update, 200, 100, ~0UL, 0);
19295 
19296 static MYSQL_SYSVAR_ULONG(io_capacity_max, srv_max_io_capacity,
19297   PLUGIN_VAR_RQCMDARG,
19298   "Limit to which innodb_io_capacity can be inflated.",
19299   NULL, innodb_io_capacity_max_update,
19300   SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT, 100,
19301   SRV_MAX_IO_CAPACITY_LIMIT, 0);
19302 
19303 static MYSQL_SYSVAR_ULONG(idle_flush_pct, innodb_idle_flush_pct,
19304   PLUGIN_VAR_RQCMDARG,
19305   "DEPRECATED. This setting has no effect.",
19306   NULL, innodb_idle_flush_pct_update, 100, 0, 100, 0);
19307 
19308 #ifdef UNIV_DEBUG
19309 static MYSQL_SYSVAR_BOOL(background_drop_list_empty,
19310   innodb_background_drop_list_empty,
19311   PLUGIN_VAR_OPCMDARG,
19312   "Wait for the background drop list to become empty",
19313   NULL, wait_background_drop_list_empty, FALSE);
19314 
19315 static MYSQL_SYSVAR_BOOL(log_checkpoint_now, innodb_log_checkpoint_now,
19316   PLUGIN_VAR_OPCMDARG,
19317   "Force checkpoint now",
19318   NULL, checkpoint_now_set, FALSE);
19319 
19320 static MYSQL_SYSVAR_BOOL(buf_flush_list_now, innodb_buf_flush_list_now,
19321   PLUGIN_VAR_OPCMDARG,
19322   "Force dirty page flush now",
19323   NULL, buf_flush_list_now_set, FALSE);
19324 
19325 static MYSQL_SYSVAR_UINT(merge_threshold_set_all_debug,
19326   innodb_merge_threshold_set_all_debug,
19327   PLUGIN_VAR_RQCMDARG,
19328   "Override current MERGE_THRESHOLD setting for all indexes at dictionary"
19329   " cache by the specified value dynamically, at the time.",
19330   NULL, innodb_merge_threshold_set_all_debug_update,
19331   DICT_INDEX_MERGE_THRESHOLD_DEFAULT, 1, 50, 0);
19332 #endif /* UNIV_DEBUG */
19333 
19334 static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
19335   PLUGIN_VAR_OPCMDARG,
19336   "Number of UNDO log pages to purge in one batch from the history list.",
19337   NULL, NULL,
19338   300,			/* Default setting */
19339   1,			/* Minimum value */
19340   5000, 0);		/* Maximum value */
19341 
19342 static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
19343   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19344   "Purge threads can be from 1 to 32. Default is 4.",
19345   NULL, NULL,
19346   4,			/* Default setting */
19347   1,			/* Minimum value */
19348   srv_max_purge_threads,/* Maximum value */
19349   0);
19350 
19351 static MYSQL_SYSVAR_ULONG(sync_array_size, srv_sync_array_size,
19352   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19353   "Size of the mutex/lock wait array.",
19354   NULL, NULL,
19355   1,			/* Default setting */
19356   1,			/* Minimum value */
19357   1024, 0);		/* Maximum value */
19358 
19359 static MYSQL_SYSVAR_UINT(fast_shutdown, srv_fast_shutdown,
19360   PLUGIN_VAR_OPCMDARG,
19361   "Speeds up the shutdown process of the InnoDB storage engine. Possible"
19362   " values are 0, 1 (faster), 2 (crash-like), 3 (fastest clean).",
19363   fast_shutdown_validate, NULL, 1, 0, 3, 0);
19364 
19365 static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table,
19366   PLUGIN_VAR_NOCMDARG,
19367   "Stores each InnoDB table to an .ibd file in the database dir.",
19368   NULL, NULL, TRUE);
19369 
19370 static MYSQL_SYSVAR_STR(ft_server_stopword_table, innobase_server_stopword_table,
19371   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
19372   "The user supplied stopword table name.",
19373   innodb_stopword_table_validate,
19374   NULL,
19375   NULL);
19376 
19377 static MYSQL_SYSVAR_UINT(flush_log_at_timeout, srv_flush_log_at_timeout,
19378   PLUGIN_VAR_OPCMDARG,
19379   "Write and flush logs every (n) second.",
19380   NULL, NULL, 1, 0, 2700, 0);
19381 
19382 static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
19383   PLUGIN_VAR_OPCMDARG,
19384   "Controls the durability/speed trade-off for commits."
19385   " Set to 0 (write and flush redo log to disk only once per second),"
19386   " 1 (flush to disk at each commit),"
19387   " 2 (write to log at commit but flush to disk only once per second)"
19388   " or 3 (flush to disk at prepare and at commit, slower and usually redundant)."
19389   " 1 and 3 guarantees that after a crash, committed transactions will"
19390   " not be lost and will be consistent with the binlog and other transactional"
19391   " engines. 2 can get inconsistent and lose transactions if there is a"
19392   " power failure or kernel crash but not if mysqld crashes. 0 has no"
19393   " guarantees in case of crash. 0 and 2 can be faster than 1 or 3.",
19394   NULL, NULL, 1, 0, 3, 0);
19395 
19396 static MYSQL_SYSVAR_ENUM(flush_method, innodb_flush_method,
19397   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19398   "With which method to flush data.",
19399   NULL, NULL, IF_WIN(SRV_ALL_O_DIRECT_FSYNC, SRV_FSYNC),
19400   &innodb_flush_method_typelib);
19401 
19402 static MYSQL_SYSVAR_STR(file_format, innodb_file_format,
19403   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19404   "Deprecated parameter with no effect.", NULL, NULL, NULL);
19405 
19406 static MYSQL_SYSVAR_STR(large_prefix, innodb_large_prefix,
19407   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19408   "Deprecated parameter with no effect.", NULL, NULL, NULL);
19409 
19410 static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted,
19411   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19412   "Force InnoDB to load metadata of corrupted table.",
19413   NULL, NULL, FALSE);
19414 
19415 static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog,
19416   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19417   "DEPRECATED. This option may be removed in future releases."
19418   " Please use READ COMMITTED transaction isolation level instead."
19419   " Force InnoDB to not use next-key locking, to use only row-level locking.",
19420   NULL, NULL, FALSE);
19421 
19422 static MYSQL_SYSVAR_STR(log_group_home_dir, srv_log_group_home_dir,
19423   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19424   "Path to InnoDB log files.", NULL, NULL, NULL);
19425 
19426 /** Update innodb_page_cleaners.
19427 @param[in]	save	the new value of innodb_page_cleaners */
19428 static
19429 void
innodb_page_cleaners_threads_update(THD *,struct st_mysql_sys_var *,void *,const void * save)19430 innodb_page_cleaners_threads_update(THD*, struct st_mysql_sys_var*, void*, const void *save)
19431 {
19432 	buf_flush_set_page_cleaner_thread_cnt(*static_cast<const ulong*>(save));
19433 }
19434 
19435 static MYSQL_SYSVAR_ULONG(page_cleaners, srv_n_page_cleaners,
19436   PLUGIN_VAR_RQCMDARG,
19437   "Page cleaner threads can be from 1 to 64. Default is 4.",
19438   NULL,
19439   innodb_page_cleaners_threads_update, 4, 1, 64, 0);
19440 
19441 static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
19442   PLUGIN_VAR_RQCMDARG,
19443   "Percentage of dirty pages allowed in bufferpool.",
19444   NULL, innodb_max_dirty_pages_pct_update, 75.0, 0, 99.999, 0);
19445 
19446 static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct_lwm,
19447   srv_max_dirty_pages_pct_lwm,
19448   PLUGIN_VAR_RQCMDARG,
19449   "Percentage of dirty pages at which flushing kicks in.",
19450   NULL, innodb_max_dirty_pages_pct_lwm_update, 0, 0, 99.999, 0);
19451 
19452 static MYSQL_SYSVAR_DOUBLE(adaptive_flushing_lwm,
19453   srv_adaptive_flushing_lwm,
19454   PLUGIN_VAR_RQCMDARG,
19455   "Percentage of log capacity below which no adaptive flushing happens.",
19456   NULL, NULL, 10.0, 0.0, 70.0, 0);
19457 
19458 static MYSQL_SYSVAR_BOOL(adaptive_flushing, srv_adaptive_flushing,
19459   PLUGIN_VAR_NOCMDARG,
19460   "Attempt flushing dirty pages to avoid IO bursts at checkpoints.",
19461   NULL, NULL, TRUE);
19462 
19463 static MYSQL_SYSVAR_BOOL(flush_sync, srv_flush_sync,
19464   PLUGIN_VAR_NOCMDARG,
19465   "Allow IO bursts at the checkpoints ignoring io_capacity setting.",
19466   NULL, NULL, TRUE);
19467 
19468 static MYSQL_SYSVAR_ULONG(flushing_avg_loops,
19469   srv_flushing_avg_loops,
19470   PLUGIN_VAR_RQCMDARG,
19471   "Number of iterations over which the background flushing is averaged.",
19472   NULL, NULL, 30, 1, 1000, 0);
19473 
19474 static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
19475   PLUGIN_VAR_RQCMDARG,
19476   "Desired maximum length of the purge queue (0 = no limit)",
19477   NULL, NULL, 0, 0, ~0UL, 0);
19478 
19479 static MYSQL_SYSVAR_ULONG(max_purge_lag_delay, srv_max_purge_lag_delay,
19480    PLUGIN_VAR_RQCMDARG,
19481    "Maximum delay of user threads in micro-seconds",
19482    NULL, NULL,
19483    0L,			/* Default seting */
19484    0L,			/* Minimum value */
19485    10000000UL, 0);	/* Maximum value */
19486 
19487 static MYSQL_SYSVAR_UINT(max_purge_lag_wait, innodb_max_purge_lag_wait,
19488   PLUGIN_VAR_RQCMDARG,
19489   "Wait until History list length is below the specified limit",
19490   NULL, innodb_max_purge_lag_wait_update, UINT_MAX, 0, UINT_MAX, 0);
19491 
19492 static MYSQL_SYSVAR_BOOL(rollback_on_timeout, innobase_rollback_on_timeout,
19493   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19494   "Roll back the complete transaction on lock wait timeout, for 4.x compatibility (disabled by default)",
19495   NULL, NULL, FALSE);
19496 
19497 static MYSQL_SYSVAR_BOOL(status_file, innobase_create_status_file,
19498   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NOSYSVAR,
19499   "Enable SHOW ENGINE INNODB STATUS output in the innodb_status.<pid> file",
19500   NULL, NULL, FALSE);
19501 
19502 static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
19503   PLUGIN_VAR_OPCMDARG,
19504   "Enable statistics gathering for metadata commands such as"
19505   " SHOW TABLE STATUS for tables that use transient statistics (off by default)",
19506   NULL, NULL, FALSE);
19507 
19508 static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_transient_sample_pages,
19509   PLUGIN_VAR_RQCMDARG,
19510   "Deprecated, use innodb_stats_transient_sample_pages instead",
19511   NULL, innodb_stats_sample_pages_update, 8, 1, ~0ULL, 0);
19512 
19513 static MYSQL_SYSVAR_ULONGLONG(stats_transient_sample_pages,
19514   srv_stats_transient_sample_pages,
19515   PLUGIN_VAR_RQCMDARG,
19516   "The number of leaf index pages to sample when calculating transient"
19517   " statistics (if persistent statistics are not used, default 8)",
19518   NULL, NULL, 8, 1, ~0ULL, 0);
19519 
19520 static MYSQL_SYSVAR_BOOL(stats_persistent, srv_stats_persistent,
19521   PLUGIN_VAR_OPCMDARG,
19522   "InnoDB persistent statistics enabled for all tables unless overridden"
19523   " at table level",
19524   NULL, NULL, TRUE);
19525 
19526 static MYSQL_SYSVAR_BOOL(stats_auto_recalc, srv_stats_auto_recalc,
19527   PLUGIN_VAR_OPCMDARG,
19528   "InnoDB automatic recalculation of persistent statistics enabled for all"
19529   " tables unless overridden at table level (automatic recalculation is only"
19530   " done when InnoDB decides that the table has changed too much and needs a"
19531   " new statistics)",
19532   NULL, NULL, TRUE);
19533 
19534 static MYSQL_SYSVAR_ULONGLONG(stats_persistent_sample_pages,
19535   srv_stats_persistent_sample_pages,
19536   PLUGIN_VAR_RQCMDARG,
19537   "The number of leaf index pages to sample when calculating persistent"
19538   " statistics (by ANALYZE, default 20)",
19539   NULL, NULL, 20, 1, ~0ULL, 0);
19540 
19541 static MYSQL_SYSVAR_ULONGLONG(stats_modified_counter, srv_stats_modified_counter,
19542   PLUGIN_VAR_RQCMDARG,
19543   "The number of rows modified before we calculate new statistics (default 0 = current limits)",
19544   NULL, NULL, 0, 0, ~0ULL, 0);
19545 
19546 static MYSQL_SYSVAR_BOOL(stats_traditional, srv_stats_sample_traditional,
19547   PLUGIN_VAR_RQCMDARG,
19548   "Enable traditional statistic calculation based on number of configured pages (default true)",
19549   NULL, NULL, TRUE);
19550 
19551 #ifdef BTR_CUR_HASH_ADAPT
19552 static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled,
19553   PLUGIN_VAR_OPCMDARG,
19554   "Enable InnoDB adaptive hash index (enabled by default). "
19555   " Disable with --skip-innodb-adaptive-hash-index.",
19556   NULL, innodb_adaptive_hash_index_update, true);
19557 
19558 /** Number of distinct partitions of AHI.
19559 Each partition is protected by its own latch and so we have parts number
19560 of latches protecting complete search system. */
19561 static MYSQL_SYSVAR_ULONG(adaptive_hash_index_parts, btr_ahi_parts,
19562   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19563   "Number of InnoDB Adaptive Hash Index Partitions (default 8)",
19564   NULL, NULL, 8, 1, 512, 0);
19565 #endif /* BTR_CUR_HASH_ADAPT */
19566 
19567 static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
19568   PLUGIN_VAR_RQCMDARG,
19569   "Replication thread delay (ms) on the slave server if"
19570   " innodb_thread_concurrency is reached (0 by default)",
19571   NULL, NULL, 0, 0, ~0UL, 0);
19572 
19573 static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
19574   PLUGIN_VAR_RQCMDARG,
19575   "Compression level used for zlib compression.  0 is no compression"
19576   ", 1 is fastest, 9 is best compression and default is 6.",
19577   NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
19578 
19579 static MYSQL_SYSVAR_BOOL(log_compressed_pages, page_zip_log_pages,
19580        PLUGIN_VAR_OPCMDARG,
19581   "Enables/disables the logging of entire compressed page images."
19582   " InnoDB logs the compressed pages to prevent corruption if"
19583   " the zlib compression algorithm changes."
19584   " When turned OFF, InnoDB will assume that the zlib"
19585   " compression algorithm doesn't change.",
19586   NULL, NULL, TRUE);
19587 
19588 static MYSQL_SYSVAR_BOOL(log_optimize_ddl, innodb_log_optimize_ddl,
19589   PLUGIN_VAR_OPCMDARG,
19590   "DEPRECATED. Ignored in MariaDB 10.5."
19591   " Reduce redo logging when natively creating indexes or rebuilding tables."
19592   " Enabling this may slow down backup and cause delay due to page flushing.",
19593   NULL, NULL, FALSE);
19594 
19595 static MYSQL_SYSVAR_ULONG(autoextend_increment,
19596   sys_tablespace_auto_extend_increment,
19597   PLUGIN_VAR_RQCMDARG,
19598   "Data file autoextend increment in megabytes",
19599   NULL, NULL, 64L, 1L, 1000L, 0);
19600 
19601 static MYSQL_SYSVAR_ULONG(buffer_pool_chunk_size, srv_buf_pool_chunk_unit,
19602   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19603   "Size of a single memory chunk within each buffer pool instance"
19604   " for resizing buffer pool. Online buffer pool resizing happens"
19605   " at this granularity. 0 means disable resizing buffer pool.",
19606   NULL, NULL,
19607   128 * 1024 * 1024, 1024 * 1024, LONG_MAX, 1024 * 1024);
19608 
19609 #if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
19610 static MYSQL_SYSVAR_ULONG(page_hash_locks, srv_n_page_hash_locks,
19611   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19612   "Number of rw_locks protecting buffer pool page_hash. Rounded up to the next power of 2",
19613   NULL, NULL, 16, 1, MAX_PAGE_HASH_LOCKS, 0);
19614 
19615 static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size,
19616   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19617   "Number of pages reserved in doublewrite buffer for batch flushing",
19618   NULL, NULL, 120, 1, 127, 0);
19619 #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
19620 
19621 static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm,
19622   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19623   "The algorithm Innodb uses for deciding which locks to grant next when"
19624   " a lock is released. Possible values are"
19625   " FCFS"
19626   " grant the locks in First-Come-First-Served order;"
19627   " VATS"
19628   " use the Variance-Aware-Transaction-Scheduling algorithm, which"
19629   " uses an Eldest-Transaction-First heuristic.",
19630   NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
19631   &innodb_lock_schedule_algorithm_typelib);
19632 
19633 static MYSQL_SYSVAR_ULONG(buffer_pool_instances, srv_buf_pool_instances,
19634   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19635   "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
19636   NULL, NULL, srv_buf_pool_instances_default, 0, MAX_BUFFER_POOLS, 0);
19637 
19638 static MYSQL_SYSVAR_STR(buffer_pool_filename, srv_buf_dump_filename,
19639   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
19640   "Filename to/from which to dump/load the InnoDB buffer pool",
19641   innodb_srv_buf_dump_filename_validate, NULL, SRV_BUF_DUMP_FILENAME_DEFAULT);
19642 
19643 static MYSQL_SYSVAR_BOOL(buffer_pool_dump_now, innodb_buffer_pool_dump_now,
19644   PLUGIN_VAR_RQCMDARG,
19645   "Trigger an immediate dump of the buffer pool into a file named @@innodb_buffer_pool_filename",
19646   NULL, buffer_pool_dump_now, FALSE);
19647 
19648 static MYSQL_SYSVAR_BOOL(buffer_pool_dump_at_shutdown, srv_buffer_pool_dump_at_shutdown,
19649   PLUGIN_VAR_RQCMDARG,
19650   "Dump the buffer pool into a file named @@innodb_buffer_pool_filename",
19651   NULL, NULL, TRUE);
19652 
19653 static MYSQL_SYSVAR_ULONG(buffer_pool_dump_pct, srv_buf_pool_dump_pct,
19654   PLUGIN_VAR_RQCMDARG,
19655   "Dump only the hottest N% of each buffer pool, defaults to 25",
19656   NULL, NULL, 25, 1, 100, 0);
19657 
19658 #ifdef UNIV_DEBUG
19659 /* Added to test the innodb_buffer_pool_load_incomplete status variable. */
19660 static MYSQL_SYSVAR_ULONG(buffer_pool_load_pages_abort, srv_buf_pool_load_pages_abort,
19661   PLUGIN_VAR_RQCMDARG,
19662   "Number of pages during a buffer pool load to process before signaling innodb_buffer_pool_load_abort=1",
19663   NULL, NULL, LONG_MAX, 1, LONG_MAX, 0);
19664 
19665 static MYSQL_SYSVAR_STR(buffer_pool_evict, srv_buffer_pool_evict,
19666   PLUGIN_VAR_RQCMDARG,
19667   "Evict pages from the buffer pool",
19668   NULL, innodb_buffer_pool_evict_update, "");
19669 #endif /* UNIV_DEBUG */
19670 
19671 static MYSQL_SYSVAR_BOOL(buffer_pool_load_now, innodb_buffer_pool_load_now,
19672   PLUGIN_VAR_RQCMDARG,
19673   "Trigger an immediate load of the buffer pool from a file named @@innodb_buffer_pool_filename",
19674   NULL, buffer_pool_load_now, FALSE);
19675 
19676 static MYSQL_SYSVAR_BOOL(buffer_pool_load_abort, innodb_buffer_pool_load_abort,
19677   PLUGIN_VAR_RQCMDARG,
19678   "Abort a currently running load of the buffer pool",
19679   NULL, buffer_pool_load_abort, FALSE);
19680 
19681 /* there is no point in changing this during runtime, thus readonly */
19682 static MYSQL_SYSVAR_BOOL(buffer_pool_load_at_startup, srv_buffer_pool_load_at_startup,
19683   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19684   "Load the buffer pool from a file named @@innodb_buffer_pool_filename",
19685   NULL, NULL, TRUE);
19686 
19687 static MYSQL_SYSVAR_BOOL(defragment, srv_defragment,
19688   PLUGIN_VAR_RQCMDARG,
19689   "Enable/disable InnoDB defragmentation (default FALSE). When set to FALSE, all existing "
19690   "defragmentation will be paused. And new defragmentation command will fail."
19691   "Paused defragmentation commands will resume when this variable is set to "
19692   "true again.",
19693   NULL, NULL, FALSE);
19694 
19695 static MYSQL_SYSVAR_UINT(defragment_n_pages, srv_defragment_n_pages,
19696   PLUGIN_VAR_RQCMDARG,
19697   "Number of pages considered at once when merging multiple pages to "
19698   "defragment",
19699   NULL, NULL, 7, 2, 32, 0);
19700 
19701 static MYSQL_SYSVAR_UINT(defragment_stats_accuracy,
19702   srv_defragment_stats_accuracy,
19703   PLUGIN_VAR_RQCMDARG,
19704   "How many defragment stats changes there are before the stats "
19705   "are written to persistent storage. Set to 0 meaning disable "
19706   "defragment stats tracking.",
19707   NULL, NULL, 0, 0, ~0U, 0);
19708 
19709 static MYSQL_SYSVAR_UINT(defragment_fill_factor_n_recs,
19710   srv_defragment_fill_factor_n_recs,
19711   PLUGIN_VAR_RQCMDARG,
19712   "How many records of space defragmentation should leave on the page. "
19713   "This variable, together with innodb_defragment_fill_factor, is introduced "
19714   "so defragmentation won't pack the page too full and cause page split on "
19715   "the next insert on every page. The variable indicating more defragmentation"
19716   " gain is the one effective.",
19717   NULL, NULL, 20, 1, 100, 0);
19718 
19719 static MYSQL_SYSVAR_DOUBLE(defragment_fill_factor, srv_defragment_fill_factor,
19720   PLUGIN_VAR_RQCMDARG,
19721   "A number between [0.7, 1] that tells defragmentation how full it should "
19722   "fill a page. Default is 0.9. Number below 0.7 won't make much sense."
19723   "This variable, together with innodb_defragment_fill_factor_n_recs, is "
19724   "introduced so defragmentation won't pack the page too full and cause "
19725   "page split on the next insert on every page. The variable indicating more "
19726   "defragmentation gain is the one effective.",
19727   NULL, NULL, 0.9, 0.7, 1, 0);
19728 
19729 static MYSQL_SYSVAR_UINT(defragment_frequency, srv_defragment_frequency,
19730   PLUGIN_VAR_RQCMDARG,
19731   "Do not defragment a single index more than this number of time per second."
19732   "This controls the number of time defragmentation thread can request X_LOCK "
19733   "on an index. Defragmentation thread will check whether "
19734   "1/defragment_frequency (s) has passed since it worked on this index last "
19735   "time, and put the index back to the queue if not enough time has passed. "
19736   "The actual frequency can only be lower than this given number.",
19737   NULL, innodb_defragment_frequency_update,
19738   SRV_DEFRAGMENT_FREQUENCY_DEFAULT, 1, 1000, 0);
19739 
19740 
19741 static MYSQL_SYSVAR_ULONG(lru_scan_depth, srv_LRU_scan_depth,
19742   PLUGIN_VAR_RQCMDARG,
19743   "How deep to scan LRU to keep it clean",
19744   NULL, NULL, 1024, 100, ~0UL, 0);
19745 
19746 static MYSQL_SYSVAR_ULONG(flush_neighbors, srv_flush_neighbors,
19747   PLUGIN_VAR_OPCMDARG,
19748   "Set to 0 (don't flush neighbors from buffer pool),"
19749   " 1 (flush contiguous neighbors from buffer pool)"
19750   " or 2 (flush neighbors from buffer pool),"
19751   " when flushing a block",
19752   NULL, NULL, 1, 0, 2, 0);
19753 
19754 static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
19755   PLUGIN_VAR_RQCMDARG,
19756   "Helps in performance tuning in heavily concurrent environments.",
19757   innobase_commit_concurrency_validate, NULL, 0, 0, 1000, 0);
19758 
19759 static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
19760   PLUGIN_VAR_RQCMDARG,
19761   "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
19762   NULL, NULL, 5000L, 1L, ~0UL, 0);
19763 
19764 static MYSQL_SYSVAR_BOOL(deadlock_detect, innobase_deadlock_detect,
19765   PLUGIN_VAR_NOCMDARG,
19766   "Enable/disable InnoDB deadlock detector (default ON)."
19767   " if set to OFF, deadlock detection is skipped,"
19768   " and we rely on innodb_lock_wait_timeout in case of deadlock.",
19769   NULL, NULL, TRUE);
19770 
19771 static MYSQL_SYSVAR_UINT(fill_factor, innobase_fill_factor,
19772   PLUGIN_VAR_RQCMDARG,
19773   "Percentage of B-tree page filled during bulk insert",
19774   NULL, NULL, 100, 10, 100, 0);
19775 
19776 static MYSQL_SYSVAR_BOOL(ft_enable_diag_print, fts_enable_diag_print,
19777   PLUGIN_VAR_OPCMDARG,
19778   "Whether to enable additional FTS diagnostic printout ",
19779   NULL, NULL, FALSE);
19780 
19781 static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
19782   PLUGIN_VAR_OPCMDARG,
19783   "Whether to disable OS system file cache for sort I/O",
19784   NULL, NULL, FALSE);
19785 
19786 static MYSQL_SYSVAR_STR(ft_aux_table, innodb_ft_aux_table,
19787   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
19788   "FTS internal auxiliary table to be checked",
19789   innodb_ft_aux_table_validate, NULL, NULL);
19790 
19791 static MYSQL_SYSVAR_ULONG(ft_cache_size, fts_max_cache_size,
19792   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19793   "InnoDB Fulltext search cache size in bytes",
19794   NULL, NULL, 8000000, 1600000, 80000000, 0);
19795 
19796 static MYSQL_SYSVAR_ULONG(ft_total_cache_size, fts_max_total_cache_size,
19797   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19798   "Total memory allocated for InnoDB Fulltext Search cache",
19799   NULL, NULL, 640000000, 32000000, 1600000000, 0);
19800 
19801 static MYSQL_SYSVAR_SIZE_T(ft_result_cache_limit, fts_result_cache_limit,
19802   PLUGIN_VAR_RQCMDARG,
19803   "InnoDB Fulltext search query result cache limit in bytes",
19804   NULL, NULL, 2000000000L, 1000000L, SIZE_T_MAX, 0);
19805 
19806 static MYSQL_SYSVAR_ULONG(ft_min_token_size, fts_min_token_size,
19807   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19808   "InnoDB Fulltext search minimum token size in characters",
19809   NULL, NULL, 3, 0, 16, 0);
19810 
19811 static MYSQL_SYSVAR_ULONG(ft_max_token_size, fts_max_token_size,
19812   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19813   "InnoDB Fulltext search maximum token size in characters",
19814   NULL, NULL, FTS_MAX_WORD_LEN_IN_CHAR, 10, FTS_MAX_WORD_LEN_IN_CHAR, 0);
19815 
19816 static MYSQL_SYSVAR_ULONG(ft_num_word_optimize, fts_num_word_optimize,
19817   PLUGIN_VAR_OPCMDARG,
19818   "InnoDB Fulltext search number of words to optimize for each optimize table call ",
19819   NULL, NULL, 2000, 1000, 10000, 0);
19820 
19821 static MYSQL_SYSVAR_ULONG(ft_sort_pll_degree, fts_sort_pll_degree,
19822   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19823   "InnoDB Fulltext search parallel sort degree, will round up to nearest power of 2 number",
19824   NULL, NULL, 2, 1, 16, 0);
19825 
19826 static MYSQL_SYSVAR_ULONG(sort_buffer_size, srv_sort_buf_size,
19827   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19828   "Memory buffer size for index creation",
19829   NULL, NULL, 1048576, 65536, 64<<20, 0);
19830 
19831 static MYSQL_SYSVAR_ULONGLONG(online_alter_log_max_size, srv_online_max_size,
19832   PLUGIN_VAR_RQCMDARG,
19833   "Maximum modification log file size for online index creation",
19834   NULL, NULL, 128<<20, 65536, ~0ULL, 0);
19835 
19836 static MYSQL_SYSVAR_BOOL(optimize_fulltext_only, innodb_optimize_fulltext_only,
19837   PLUGIN_VAR_NOCMDARG,
19838   "Only optimize the Fulltext index of the table",
19839   NULL, NULL, FALSE);
19840 
19841 static MYSQL_SYSVAR_ULONG(read_io_threads, srv_n_read_io_threads,
19842   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19843   "Number of background read I/O threads in InnoDB.",
19844   NULL, NULL, 4, 1, 64, 0);
19845 
19846 static MYSQL_SYSVAR_ULONG(write_io_threads, srv_n_write_io_threads,
19847   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19848   "Number of background write I/O threads in InnoDB.",
19849   NULL, NULL, 4, 1, 64, 0);
19850 
19851 static MYSQL_SYSVAR_ULONG(force_recovery, srv_force_recovery,
19852   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19853   "Helps to save your data in case the disk image of the database becomes corrupt.",
19854   NULL, NULL, 0, 0, 6, 0);
19855 
19856 static MYSQL_SYSVAR_ULONG(page_size, srv_page_size,
19857   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19858   "Page size to use for all InnoDB tablespaces.",
19859   NULL, NULL, UNIV_PAGE_SIZE_DEF,
19860   UNIV_PAGE_SIZE_MIN, UNIV_PAGE_SIZE_MAX, 0);
19861 
19862 static MYSQL_SYSVAR_ULONG(log_buffer_size, srv_log_buffer_size,
19863   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19864   "The size of the buffer which InnoDB uses to write log to the log files on disk.",
19865   NULL, NULL, 16L << 20, 256L << 10, LONG_MAX, 1024);
19866 
19867 static MYSQL_SYSVAR_ULONGLONG(log_file_size, srv_log_file_size,
19868   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19869   "Size of each log file in a log group.",
19870   NULL, NULL, 48 << 20, 1 << 20, log_group_max_size, UNIV_PAGE_SIZE_MAX);
19871 /* OS_FILE_LOG_BLOCK_SIZE would be more appropriate than UNIV_PAGE_SIZE_MAX,
19872 but fil_space_t is being used for the redo log, and it uses data pages. */
19873 
19874 static MYSQL_SYSVAR_ULONG(log_files_in_group, srv_n_log_files,
19875   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19876   "Number of log files in the log group. InnoDB writes to the files in a circular fashion.",
19877   NULL, NULL, 2, 1, SRV_N_LOG_FILES_MAX, 0);
19878 
19879 static MYSQL_SYSVAR_ULONG(log_write_ahead_size, srv_log_write_ahead_size,
19880   PLUGIN_VAR_RQCMDARG,
19881   "Redo log write ahead unit size to avoid read-on-write,"
19882   " it should match the OS cache block IO size",
19883   NULL, innodb_log_write_ahead_size_update,
19884   8*1024L, OS_FILE_LOG_BLOCK_SIZE, UNIV_PAGE_SIZE_DEF, OS_FILE_LOG_BLOCK_SIZE);
19885 
19886 static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct,
19887   PLUGIN_VAR_RQCMDARG,
19888   "Percentage of the buffer pool to reserve for 'old' blocks.",
19889   NULL, innodb_old_blocks_pct_update, 100 * 3 / 8, 5, 95, 0);
19890 
19891 static MYSQL_SYSVAR_UINT(old_blocks_time, buf_LRU_old_threshold_ms,
19892   PLUGIN_VAR_RQCMDARG,
19893   "Move blocks to the 'new' end of the buffer pool if the first access"
19894   " was at least this many milliseconds ago."
19895   " The timeout is disabled if 0.",
19896   NULL, NULL, 1000, 0, UINT_MAX32, 0);
19897 
19898 static MYSQL_SYSVAR_ULONG(open_files, innobase_open_files,
19899   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19900   "How many files at the maximum InnoDB keeps open at the same time.",
19901   NULL, NULL, 0, 0, LONG_MAX, 0);
19902 
19903 static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
19904   PLUGIN_VAR_RQCMDARG,
19905   "Count of spin-loop rounds in InnoDB mutexes (30 by default)",
19906   NULL, NULL, 30L, 0L, ~0UL, 0);
19907 
19908 static MYSQL_SYSVAR_UINT(spin_wait_delay, srv_spin_wait_delay,
19909   PLUGIN_VAR_OPCMDARG,
19910   "Maximum delay between polling for a spin lock (4 by default)",
19911   NULL, NULL, 4, 0, 6000, 0);
19912 
19913 static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
19914   PLUGIN_VAR_RQCMDARG,
19915   "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
19916   NULL, NULL, 0, 0, 1000, 0);
19917 
19918 static MYSQL_SYSVAR_ULONG(
19919   adaptive_max_sleep_delay, srv_adaptive_max_sleep_delay,
19920   PLUGIN_VAR_RQCMDARG,
19921   "The upper limit of the sleep delay in usec. Value of 0 disables it.",
19922   NULL, NULL,
19923   150000,			/* Default setting */
19924   0,				/* Minimum value */
19925   1000000, 0);			/* Maximum value */
19926 
19927 static MYSQL_SYSVAR_BOOL(prefix_index_cluster_optimization,
19928   srv_prefix_index_cluster_optimization,
19929   PLUGIN_VAR_OPCMDARG,
19930   "Enable prefix optimization to sometimes avoid cluster index lookups.",
19931   NULL, NULL, FALSE);
19932 
19933 static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay,
19934   PLUGIN_VAR_RQCMDARG,
19935   "Time of innodb thread sleeping before joining InnoDB queue (usec)."
19936   " Value 0 disable a sleep",
19937   NULL, NULL,
19938   10000L,
19939   0L,
19940   1000000L, 0);
19941 
19942 static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path,
19943   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19944   "Path to individual files and their sizes.",
19945   NULL, NULL, "ibdata1:12M:autoextend");
19946 
19947 static MYSQL_SYSVAR_STR(temp_data_file_path, innobase_temp_data_file_path,
19948   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19949   "Path to files and their sizes making temp-tablespace.",
19950   NULL, NULL, "ibtmp1:12M:autoextend");
19951 
19952 static MYSQL_SYSVAR_STR(undo_directory, srv_undo_dir,
19953   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19954   "Directory where undo tablespace files live, this path can be absolute.",
19955   NULL, NULL, NULL);
19956 
19957 static MYSQL_SYSVAR_ULONG(undo_tablespaces, srv_undo_tablespaces,
19958   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19959   "Number of undo tablespaces to use.",
19960   NULL, NULL,
19961   0L,			/* Default seting */
19962   0L,			/* Minimum value */
19963   TRX_SYS_MAX_UNDO_SPACES, 0); /* Maximum value */
19964 
19965 static MYSQL_SYSVAR_ULONG(undo_logs, srv_undo_logs,
19966   PLUGIN_VAR_OPCMDARG,
19967   "Number of undo logs to use.",
19968   NULL, NULL,
19969   TRX_SYS_N_RSEGS,	/* Default setting */
19970   1,			/* Minimum value */
19971   TRX_SYS_N_RSEGS, 0);	/* Maximum value */
19972 
19973 static MYSQL_SYSVAR_ULONGLONG(max_undo_log_size, srv_max_undo_log_size,
19974   PLUGIN_VAR_OPCMDARG,
19975   "Desired maximum UNDO tablespace size in bytes",
19976   NULL, NULL,
19977   10 << 20, 10 << 20,
19978   1ULL << (32 + UNIV_PAGE_SIZE_SHIFT_MAX), 0);
19979 
19980 static MYSQL_SYSVAR_ULONG(purge_rseg_truncate_frequency,
19981   srv_purge_rseg_truncate_frequency,
19982   PLUGIN_VAR_OPCMDARG,
19983   "Dictates rate at which UNDO records are purged. Value N means"
19984   " purge rollback segment(s) on every Nth iteration of purge invocation",
19985   NULL, NULL, 128, 1, 128, 0);
19986 
19987 static MYSQL_SYSVAR_BOOL(undo_log_truncate, srv_undo_log_truncate,
19988   PLUGIN_VAR_OPCMDARG,
19989   "Enable or Disable Truncate of UNDO tablespace.",
19990   NULL, NULL, FALSE);
19991 
19992 /* Alias for innodb_undo_logs, this config variable is deprecated. */
19993 static MYSQL_SYSVAR_ULONG(rollback_segments, srv_undo_logs,
19994   PLUGIN_VAR_OPCMDARG,
19995   "Number of undo logs to use (deprecated).",
19996   NULL, NULL,
19997   TRX_SYS_N_RSEGS,	/* Default setting */
19998   1,			/* Minimum value */
19999   TRX_SYS_N_RSEGS, 0);	/* Maximum value */
20000 
20001 static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
20002   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
20003   "The AUTOINC lock modes supported by InnoDB:"
20004   " 0 => Old style AUTOINC locking (for backward compatibility);"
20005   " 1 => New style AUTOINC locking;"
20006   " 2 => No AUTOINC locking (unsafe for SBR)",
20007   NULL, NULL,
20008   AUTOINC_NEW_STYLE_LOCKING,	/* Default setting */
20009   AUTOINC_OLD_STYLE_LOCKING,	/* Minimum value */
20010   AUTOINC_NO_LOCKING, 0);	/* Maximum value */
20011 
20012 static MYSQL_SYSVAR_STR(version, innodb_version_str,
20013   PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
20014   "InnoDB version", NULL, NULL, INNODB_VERSION_STR);
20015 
20016 static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
20017   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
20018   "Use native AIO if supported on this platform.",
20019   NULL, NULL, TRUE);
20020 
20021 #ifdef HAVE_LIBNUMA
20022 static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
20023   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
20024   "Use NUMA interleave memory policy to allocate InnoDB buffer pool.",
20025   NULL, NULL, FALSE);
20026 #endif /* HAVE_LIBNUMA */
20027 
20028 static MYSQL_SYSVAR_ENUM(change_buffering, innodb_change_buffering,
20029   PLUGIN_VAR_RQCMDARG,
20030   "Buffer changes to secondary indexes.",
20031   NULL, NULL, IBUF_USE_ALL, &innodb_change_buffering_typelib);
20032 
20033 static MYSQL_SYSVAR_UINT(change_buffer_max_size,
20034   srv_change_buffer_max_size,
20035   PLUGIN_VAR_RQCMDARG,
20036   "Maximum on-disk size of change buffer in terms of percentage"
20037   " of the buffer pool.",
20038   NULL, innodb_change_buffer_max_size_update,
20039   CHANGE_BUFFER_DEFAULT_SIZE, 0, 50, 0);
20040 
20041 static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
20042    PLUGIN_VAR_RQCMDARG,
20043   "Specifies how InnoDB index statistics collection code should"
20044   " treat NULLs. Possible values are NULLS_EQUAL (default),"
20045   " NULLS_UNEQUAL and NULLS_IGNORED",
20046    NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib);
20047 
20048 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
20049 static MYSQL_SYSVAR_BOOL(change_buffer_dump, ibuf_dump,
20050   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
20051   "Dump the change buffer at startup.",
20052   NULL, NULL, FALSE);
20053 
20054 static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
20055   PLUGIN_VAR_RQCMDARG,
20056   "Debug flags for InnoDB change buffering (0=none, 1=try to buffer)",
20057   NULL, NULL, 0, 0, 1, 0);
20058 
20059 static MYSQL_SYSVAR_BOOL(disable_background_merge,
20060   srv_ibuf_disable_background_merge,
20061   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_RQCMDARG,
20062   "Disable change buffering merges by the master thread",
20063   NULL, NULL, FALSE);
20064 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
20065 
20066 static MYSQL_SYSVAR_ULONG(buf_dump_status_frequency, srv_buf_dump_status_frequency,
20067   PLUGIN_VAR_RQCMDARG,
20068   "A number between [0, 100] that tells how oftern buffer pool dump status "
20069   "in percentages should be printed. E.g. 10 means that buffer pool dump "
20070   "status is printed when every 10% of number of buffer pool pages are "
20071   "dumped. Default is 0 (only start and end status is printed).",
20072   NULL, NULL, 0, 0, 100, 0);
20073 
20074 #ifdef WITH_INNODB_DISALLOW_WRITES
20075 /*******************************************************
20076  *    innobase_disallow_writes variable definition     *
20077  *******************************************************/
20078 
20079 /* Must always init to FALSE. */
20080 static my_bool	innobase_disallow_writes	= FALSE;
20081 
20082 /**************************************************************************
20083 An "update" method for innobase_disallow_writes variable. */
20084 static
20085 void
innobase_disallow_writes_update(THD *,st_mysql_sys_var *,void * var_ptr,const void * save)20086 innobase_disallow_writes_update(THD*, st_mysql_sys_var*,
20087 				void* var_ptr, const void* save)
20088 {
20089 	const my_bool val = *static_cast<const my_bool*>(save);
20090 	*static_cast<my_bool*>(var_ptr) = val;
20091 	ut_a(srv_allow_writes_event);
20092 	mysql_mutex_unlock(&LOCK_global_system_variables);
20093 	if (val) {
20094 		os_event_reset(srv_allow_writes_event);
20095 	} else {
20096 		os_event_set(srv_allow_writes_event);
20097 	}
20098 	mysql_mutex_lock(&LOCK_global_system_variables);
20099 }
20100 
20101 static MYSQL_SYSVAR_BOOL(disallow_writes, innobase_disallow_writes,
20102   PLUGIN_VAR_NOCMDOPT,
20103   "Tell InnoDB to stop any writes to disk",
20104   NULL, innobase_disallow_writes_update, FALSE);
20105 #endif /* WITH_INNODB_DISALLOW_WRITES */
20106 
20107 static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
20108   PLUGIN_VAR_NOCMDARG,
20109   "Whether to use read ahead for random access within an extent.",
20110   NULL, NULL, FALSE);
20111 
20112 static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
20113   PLUGIN_VAR_RQCMDARG,
20114   "Number of pages that must be accessed sequentially for InnoDB to"
20115   " trigger a readahead.",
20116   NULL, NULL, 56, 0, 64, 0);
20117 
20118 static MYSQL_SYSVAR_STR(monitor_enable, innobase_enable_monitor_counter,
20119   PLUGIN_VAR_RQCMDARG,
20120   "Turn on a monitor counter",
20121   innodb_monitor_validate,
20122   innodb_enable_monitor_update, NULL);
20123 
20124 static MYSQL_SYSVAR_STR(monitor_disable, innobase_disable_monitor_counter,
20125   PLUGIN_VAR_RQCMDARG,
20126   "Turn off a monitor counter",
20127   innodb_monitor_validate,
20128   innodb_disable_monitor_update, NULL);
20129 
20130 static MYSQL_SYSVAR_STR(monitor_reset, innobase_reset_monitor_counter,
20131   PLUGIN_VAR_RQCMDARG,
20132   "Reset a monitor counter",
20133   innodb_monitor_validate,
20134   innodb_reset_monitor_update, NULL);
20135 
20136 static MYSQL_SYSVAR_STR(monitor_reset_all, innobase_reset_all_monitor_counter,
20137   PLUGIN_VAR_RQCMDARG,
20138   "Reset all values for a monitor counter",
20139   innodb_monitor_validate,
20140   innodb_reset_all_monitor_update, NULL);
20141 
20142 static MYSQL_SYSVAR_BOOL(status_output, srv_print_innodb_monitor,
20143   PLUGIN_VAR_OPCMDARG, "Enable InnoDB monitor output to the error log.",
20144   NULL, innodb_status_output_update, FALSE);
20145 
20146 static MYSQL_SYSVAR_BOOL(status_output_locks, srv_print_innodb_lock_monitor,
20147   PLUGIN_VAR_OPCMDARG, "Enable InnoDB lock monitor output to the error log."
20148   " Requires innodb_status_output=ON.",
20149   NULL, innodb_status_output_update, FALSE);
20150 
20151 static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks,
20152   PLUGIN_VAR_OPCMDARG,
20153   "Print all deadlocks to MariaDB error log (off by default)",
20154   NULL, NULL, FALSE);
20155 
20156 static MYSQL_SYSVAR_ULONG(compression_failure_threshold_pct,
20157   zip_failure_threshold_pct, PLUGIN_VAR_OPCMDARG,
20158   "If the compression failure rate of a table is greater than this number"
20159   " more padding is added to the pages to reduce the failures. A value of"
20160   " zero implies no padding",
20161   NULL, NULL, 5, 0, 100, 0);
20162 
20163 static MYSQL_SYSVAR_ULONG(compression_pad_pct_max,
20164   zip_pad_max, PLUGIN_VAR_OPCMDARG,
20165   "Percentage of empty space on a data page that can be reserved"
20166   " to make the page compressible.",
20167   NULL, NULL, 50, 0, 75, 0);
20168 
20169 static MYSQL_SYSVAR_BOOL(read_only, srv_read_only_mode,
20170   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20171   "Start InnoDB in read only mode (off by default)",
20172   NULL, NULL, FALSE);
20173 
20174 static MYSQL_SYSVAR_BOOL(cmp_per_index_enabled, srv_cmp_per_index_enabled,
20175   PLUGIN_VAR_OPCMDARG,
20176   "Enable INFORMATION_SCHEMA.innodb_cmp_per_index,"
20177   " may have negative impact on performance (off by default)",
20178   NULL, innodb_cmp_per_index_update, FALSE);
20179 
20180 static MYSQL_SYSVAR_ENUM(default_row_format, innodb_default_row_format,
20181   PLUGIN_VAR_RQCMDARG,
20182   "The default ROW FORMAT for all innodb tables created without explicit"
20183   " ROW_FORMAT. Possible values are REDUNDANT, COMPACT, and DYNAMIC."
20184   " The ROW_FORMAT value COMPRESSED is not allowed",
20185   NULL, NULL, DEFAULT_ROW_FORMAT_DYNAMIC,
20186   &innodb_default_row_format_typelib);
20187 
20188 #ifdef UNIV_DEBUG
20189 static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug,
20190   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_NOCMDOPT,
20191   "Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()",
20192   NULL, NULL, 0, 0, 1024, 0);
20193 
20194 static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug,
20195   btr_cur_limit_optimistic_insert_debug, PLUGIN_VAR_RQCMDARG,
20196   "Artificially limit the number of records per B-tree page (0=unlimited).",
20197   NULL, NULL, 0, 0, UINT_MAX32, 0);
20198 
20199 static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
20200   srv_purge_view_update_only_debug, PLUGIN_VAR_NOCMDOPT,
20201   "Pause actual purging any delete-marked records, but merely update the purge view."
20202   " It is to create artificially the situation the purge view have been updated"
20203   " but the each purges were not done yet.",
20204   NULL, NULL, FALSE);
20205 
20206 static MYSQL_SYSVAR_BOOL(evict_tables_on_commit_debug,
20207   innodb_evict_tables_on_commit_debug, PLUGIN_VAR_OPCMDARG,
20208   "On transaction commit, try to evict tables from the data dictionary cache.",
20209   NULL, NULL, FALSE);
20210 
20211 static MYSQL_SYSVAR_UINT(data_file_size_debug,
20212   srv_sys_space_size_debug,
20213   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
20214   "InnoDB system tablespace size to be set in recovery.",
20215   NULL, NULL, 0, 0, 256U << 20, 0);
20216 
20217 static MYSQL_SYSVAR_ULONG(fil_make_page_dirty_debug,
20218   srv_fil_make_page_dirty_debug, PLUGIN_VAR_OPCMDARG,
20219   "Make the first page of the given tablespace dirty.",
20220   NULL, innodb_make_page_dirty, 0, 0, UINT_MAX32, 0);
20221 
20222 static MYSQL_SYSVAR_ULONG(saved_page_number_debug,
20223   srv_saved_page_number_debug, PLUGIN_VAR_OPCMDARG,
20224   "An InnoDB page number.",
20225   NULL, innodb_save_page_no, 0, 0, UINT_MAX32, 0);
20226 
20227 static MYSQL_SYSVAR_BOOL(disable_resize_buffer_pool_debug,
20228   buf_disable_resize_buffer_pool_debug, PLUGIN_VAR_NOCMDARG,
20229   "Disable resizing buffer pool to make assertion code not expensive.",
20230   NULL, NULL, TRUE);
20231 
20232 static MYSQL_SYSVAR_BOOL(page_cleaner_disabled_debug,
20233   innodb_page_cleaner_disabled_debug,
20234   PLUGIN_VAR_OPCMDARG,
20235   "Disable page cleaner",
20236   NULL, buf_flush_page_cleaner_disabled_debug_update, FALSE);
20237 
20238 static MYSQL_SYSVAR_BOOL(sync_debug, srv_sync_debug,
20239   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20240   "Enable the sync debug checks",
20241   NULL, NULL, FALSE);
20242 
20243 static MYSQL_SYSVAR_BOOL(dict_stats_disabled_debug,
20244   innodb_dict_stats_disabled_debug,
20245   PLUGIN_VAR_OPCMDARG,
20246   "Disable dict_stats thread",
20247   NULL, dict_stats_disabled_debug_update, FALSE);
20248 
20249 static MYSQL_SYSVAR_BOOL(master_thread_disabled_debug,
20250   srv_master_thread_disabled_debug,
20251   PLUGIN_VAR_OPCMDARG,
20252   "Disable master thread",
20253   NULL, srv_master_thread_disabled_debug_update, FALSE);
20254 #endif /* UNIV_DEBUG */
20255 
20256 static MYSQL_SYSVAR_BOOL(force_primary_key,
20257   srv_force_primary_key,
20258   PLUGIN_VAR_OPCMDARG,
20259   "Do not allow to create table without primary key (off by default)",
20260   NULL, NULL, FALSE);
20261 
20262 static const char *page_compression_algorithms[]= { "none", "zlib", "lz4", "lzo", "lzma", "bzip2", "snappy", 0 };
20263 static TYPELIB page_compression_algorithms_typelib=
20264 {
20265   array_elements(page_compression_algorithms) - 1, 0,
20266   page_compression_algorithms, 0
20267 };
20268 static MYSQL_SYSVAR_ENUM(compression_algorithm, innodb_compression_algorithm,
20269   PLUGIN_VAR_OPCMDARG,
20270   "Compression algorithm used on page compression. One of: none, zlib, lz4, lzo, lzma, bzip2, or snappy",
20271   innodb_compression_algorithm_validate, NULL,
20272   /* We use here the largest number of supported compression method to
20273   enable all those methods that are available. Availability of compression
20274   method is verified on innodb_compression_algorithm_validate function. */
20275   PAGE_ZLIB_ALGORITHM,
20276   &page_compression_algorithms_typelib);
20277 
20278 static MYSQL_SYSVAR_ULONG(fatal_semaphore_wait_threshold, srv_fatal_semaphore_wait_threshold,
20279   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
20280   "Maximum number of seconds that semaphore times out in InnoDB.",
20281   NULL, NULL,
20282   DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT, /* Default setting */
20283   1, /* Minimum setting */
20284   UINT_MAX32, /* Maximum setting */
20285   0);
20286 
20287 static const char* srv_encrypt_tables_names[] = { "OFF", "ON", "FORCE", 0 };
20288 static TYPELIB srv_encrypt_tables_typelib = {
20289 	array_elements(srv_encrypt_tables_names)-1, 0, srv_encrypt_tables_names,
20290 	NULL
20291 };
20292 static MYSQL_SYSVAR_ENUM(encrypt_tables, srv_encrypt_tables,
20293 			 PLUGIN_VAR_OPCMDARG,
20294 			 "Enable encryption for tables. "
20295 			 "Don't forget to enable --innodb-encrypt-log too",
20296 			 innodb_encrypt_tables_validate,
20297 			 innodb_encrypt_tables_update,
20298 			 0,
20299 			 &srv_encrypt_tables_typelib);
20300 
20301 static MYSQL_SYSVAR_UINT(encryption_threads, srv_n_fil_crypt_threads,
20302 			 PLUGIN_VAR_RQCMDARG,
20303 			 "Number of threads performing background key rotation and "
20304 			 "scrubbing",
20305 			 NULL,
20306 			 innodb_encryption_threads_update,
20307 			 0, 0, 255, 0);
20308 
20309 static MYSQL_SYSVAR_UINT(encryption_rotate_key_age,
20310 			 srv_fil_crypt_rotate_key_age,
20311 			 PLUGIN_VAR_RQCMDARG,
20312 			 "Key rotation - re-encrypt in background "
20313                          "all pages that were encrypted with a key that "
20314                          "many (or more) versions behind. Value 0 indicates "
20315 			 "that key rotation is disabled.",
20316 			 NULL,
20317 			 innodb_encryption_rotate_key_age_update,
20318 			 1, 0, UINT_MAX32, 0);
20319 
20320 static MYSQL_SYSVAR_UINT(encryption_rotation_iops, srv_n_fil_crypt_iops,
20321 			 PLUGIN_VAR_RQCMDARG,
20322 			 "Use this many iops for background key rotation",
20323 			 NULL,
20324 			 innodb_encryption_rotation_iops_update,
20325 			 srv_n_fil_crypt_iops, 0, UINT_MAX32, 0);
20326 
20327 static MYSQL_SYSVAR_BOOL(scrub_log, srv_scrub_log,
20328   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20329   "Enable background redo log (ib_logfile0, ib_logfile1...) scrubbing",
20330   0, 0, 0);
20331 
20332 static MYSQL_SYSVAR_ULONGLONG(scrub_log_speed, innodb_scrub_log_speed,
20333   PLUGIN_VAR_OPCMDARG,
20334   "Background redo log scrubbing speed in bytes/sec",
20335   NULL, NULL,
20336   256,              /* 256 bytes/sec, corresponds to 2000 ms scrub_log_interval */
20337   1,                /* min */
20338   50000, 0);        /* 50Kbyte/sec, corresponds to 10 ms scrub_log_interval */
20339 
20340 static MYSQL_SYSVAR_BOOL(encrypt_log, srv_encrypt_log,
20341   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20342   "Enable redo log encryption",
20343   NULL, NULL, FALSE);
20344 
20345 static MYSQL_SYSVAR_BOOL(immediate_scrub_data_uncompressed,
20346 			 srv_immediate_scrub_data_uncompressed,
20347 			 0,
20348 			 "Enable scrubbing of data",
20349 			 NULL, NULL, FALSE);
20350 
20351 static MYSQL_SYSVAR_BOOL(background_scrub_data_uncompressed,
20352 			 srv_background_scrub_data_uncompressed,
20353 			 0,
20354 			 "Enable scrubbing of uncompressed data by "
20355 			 "background threads (same as encryption_threads)",
20356 			 NULL, NULL, FALSE);
20357 
20358 static MYSQL_SYSVAR_BOOL(background_scrub_data_compressed,
20359 			 srv_background_scrub_data_compressed,
20360 			 0,
20361 			 "Enable scrubbing of compressed data by "
20362 			 "background threads (same as encryption_threads)",
20363 			 NULL, NULL, FALSE);
20364 
20365 static MYSQL_SYSVAR_UINT(background_scrub_data_check_interval,
20366 			 srv_background_scrub_data_check_interval,
20367 			 0,
20368 			 "check if spaces needs scrubbing every "
20369 			 "innodb_background_scrub_data_check_interval "
20370 			 "seconds",
20371 			 NULL, NULL,
20372 			 srv_background_scrub_data_check_interval,
20373 			 1,
20374 			 UINT_MAX32, 0);
20375 
20376 static MYSQL_SYSVAR_UINT(background_scrub_data_interval,
20377 			 srv_background_scrub_data_interval,
20378 			 0,
20379 			 "scrub spaces that were last scrubbed longer than "
20380 			 " innodb_background_scrub_data_interval seconds ago",
20381 			 NULL, NULL,
20382 			 srv_background_scrub_data_interval,
20383 			 1,
20384 			 UINT_MAX32, 0);
20385 
20386 #ifdef UNIV_DEBUG
20387 static MYSQL_SYSVAR_BOOL(debug_force_scrubbing,
20388 			 srv_scrub_force_testing,
20389 			 0,
20390 			 "Perform extra scrubbing to increase test exposure",
20391 			 NULL, NULL, FALSE);
20392 
20393 char *innobase_debug_sync;
20394 static MYSQL_SYSVAR_STR(debug_sync, innobase_debug_sync,
20395 			PLUGIN_VAR_NOCMDARG,
20396 			"debug_sync for innodb purge threads. "
20397 			"Use it to set up sync points for all purge threads "
20398 			"at once. The commands will be applied sequentially at"
20399 			" the beginning of purging the next undo record.",
20400 			NULL,
20401 			innobase_debug_sync_set, NULL);
20402 #endif /* UNIV_DEBUG */
20403 
20404 static MYSQL_SYSVAR_BOOL(encrypt_temporary_tables, innodb_encrypt_temporary_tables,
20405   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20406   "Enrypt the temporary table data.",
20407   NULL, NULL, false);
20408 
20409 static struct st_mysql_sys_var* innobase_system_variables[]= {
20410   MYSQL_SYSVAR(autoextend_increment),
20411   MYSQL_SYSVAR(buffer_pool_size),
20412   MYSQL_SYSVAR(buffer_pool_chunk_size),
20413   MYSQL_SYSVAR(buffer_pool_instances),
20414   MYSQL_SYSVAR(buffer_pool_filename),
20415   MYSQL_SYSVAR(buffer_pool_dump_now),
20416   MYSQL_SYSVAR(buffer_pool_dump_at_shutdown),
20417   MYSQL_SYSVAR(buffer_pool_dump_pct),
20418 #ifdef UNIV_DEBUG
20419   MYSQL_SYSVAR(buffer_pool_evict),
20420 #endif /* UNIV_DEBUG */
20421   MYSQL_SYSVAR(buffer_pool_load_now),
20422   MYSQL_SYSVAR(buffer_pool_load_abort),
20423 #ifdef UNIV_DEBUG
20424   MYSQL_SYSVAR(buffer_pool_load_pages_abort),
20425 #endif /* UNIV_DEBUG */
20426   MYSQL_SYSVAR(buffer_pool_load_at_startup),
20427   MYSQL_SYSVAR(defragment),
20428   MYSQL_SYSVAR(defragment_n_pages),
20429   MYSQL_SYSVAR(defragment_stats_accuracy),
20430   MYSQL_SYSVAR(defragment_fill_factor),
20431   MYSQL_SYSVAR(defragment_fill_factor_n_recs),
20432   MYSQL_SYSVAR(defragment_frequency),
20433   MYSQL_SYSVAR(lru_scan_depth),
20434   MYSQL_SYSVAR(flush_neighbors),
20435   MYSQL_SYSVAR(checksum_algorithm),
20436   MYSQL_SYSVAR(log_checksums),
20437   MYSQL_SYSVAR(checksums),
20438   MYSQL_SYSVAR(commit_concurrency),
20439   MYSQL_SYSVAR(concurrency_tickets),
20440   MYSQL_SYSVAR(compression_level),
20441   MYSQL_SYSVAR(data_file_path),
20442   MYSQL_SYSVAR(temp_data_file_path),
20443   MYSQL_SYSVAR(data_home_dir),
20444   MYSQL_SYSVAR(doublewrite),
20445   MYSQL_SYSVAR(stats_include_delete_marked),
20446   MYSQL_SYSVAR(use_atomic_writes),
20447   MYSQL_SYSVAR(fast_shutdown),
20448   MYSQL_SYSVAR(read_io_threads),
20449   MYSQL_SYSVAR(write_io_threads),
20450   MYSQL_SYSVAR(file_per_table),
20451   MYSQL_SYSVAR(file_format), /* deprecated in MariaDB 10.2; no effect */
20452   MYSQL_SYSVAR(flush_log_at_timeout),
20453   MYSQL_SYSVAR(flush_log_at_trx_commit),
20454   MYSQL_SYSVAR(flush_method),
20455   MYSQL_SYSVAR(force_recovery),
20456   MYSQL_SYSVAR(fill_factor),
20457   MYSQL_SYSVAR(ft_cache_size),
20458   MYSQL_SYSVAR(ft_total_cache_size),
20459   MYSQL_SYSVAR(ft_result_cache_limit),
20460   MYSQL_SYSVAR(ft_enable_stopword),
20461   MYSQL_SYSVAR(ft_max_token_size),
20462   MYSQL_SYSVAR(ft_min_token_size),
20463   MYSQL_SYSVAR(ft_num_word_optimize),
20464   MYSQL_SYSVAR(ft_sort_pll_degree),
20465   MYSQL_SYSVAR(large_prefix), /* deprecated in MariaDB 10.2; no effect */
20466   MYSQL_SYSVAR(force_load_corrupted),
20467   MYSQL_SYSVAR(lock_schedule_algorithm),
20468   MYSQL_SYSVAR(locks_unsafe_for_binlog),
20469   MYSQL_SYSVAR(lock_wait_timeout),
20470   MYSQL_SYSVAR(deadlock_detect),
20471   MYSQL_SYSVAR(page_size),
20472   MYSQL_SYSVAR(log_buffer_size),
20473   MYSQL_SYSVAR(log_file_size),
20474   MYSQL_SYSVAR(log_files_in_group),
20475   MYSQL_SYSVAR(log_write_ahead_size),
20476   MYSQL_SYSVAR(log_group_home_dir),
20477   MYSQL_SYSVAR(log_compressed_pages),
20478   MYSQL_SYSVAR(log_optimize_ddl),
20479   MYSQL_SYSVAR(max_dirty_pages_pct),
20480   MYSQL_SYSVAR(max_dirty_pages_pct_lwm),
20481   MYSQL_SYSVAR(adaptive_flushing_lwm),
20482   MYSQL_SYSVAR(adaptive_flushing),
20483   MYSQL_SYSVAR(flush_sync),
20484   MYSQL_SYSVAR(flushing_avg_loops),
20485   MYSQL_SYSVAR(max_purge_lag),
20486   MYSQL_SYSVAR(max_purge_lag_delay),
20487   MYSQL_SYSVAR(max_purge_lag_wait),
20488   MYSQL_SYSVAR(old_blocks_pct),
20489   MYSQL_SYSVAR(old_blocks_time),
20490   MYSQL_SYSVAR(open_files),
20491   MYSQL_SYSVAR(optimize_fulltext_only),
20492   MYSQL_SYSVAR(rollback_on_timeout),
20493   MYSQL_SYSVAR(ft_aux_table),
20494   MYSQL_SYSVAR(ft_enable_diag_print),
20495   MYSQL_SYSVAR(ft_server_stopword_table),
20496   MYSQL_SYSVAR(ft_user_stopword_table),
20497   MYSQL_SYSVAR(disable_sort_file_cache),
20498   MYSQL_SYSVAR(stats_on_metadata),
20499   MYSQL_SYSVAR(stats_sample_pages),
20500   MYSQL_SYSVAR(stats_transient_sample_pages),
20501   MYSQL_SYSVAR(stats_persistent),
20502   MYSQL_SYSVAR(stats_persistent_sample_pages),
20503   MYSQL_SYSVAR(stats_auto_recalc),
20504   MYSQL_SYSVAR(stats_modified_counter),
20505   MYSQL_SYSVAR(stats_traditional),
20506 #ifdef BTR_CUR_HASH_ADAPT
20507   MYSQL_SYSVAR(adaptive_hash_index),
20508   MYSQL_SYSVAR(adaptive_hash_index_parts),
20509 #endif /* BTR_CUR_HASH_ADAPT */
20510   MYSQL_SYSVAR(stats_method),
20511   MYSQL_SYSVAR(replication_delay),
20512   MYSQL_SYSVAR(status_file),
20513   MYSQL_SYSVAR(strict_mode),
20514   MYSQL_SYSVAR(sort_buffer_size),
20515   MYSQL_SYSVAR(online_alter_log_max_size),
20516   MYSQL_SYSVAR(sync_spin_loops),
20517   MYSQL_SYSVAR(spin_wait_delay),
20518   MYSQL_SYSVAR(table_locks),
20519   MYSQL_SYSVAR(thread_concurrency),
20520   MYSQL_SYSVAR(adaptive_max_sleep_delay),
20521   MYSQL_SYSVAR(prefix_index_cluster_optimization),
20522   MYSQL_SYSVAR(thread_sleep_delay),
20523   MYSQL_SYSVAR(tmpdir),
20524   MYSQL_SYSVAR(autoinc_lock_mode),
20525   MYSQL_SYSVAR(version),
20526   MYSQL_SYSVAR(use_native_aio),
20527 #ifdef HAVE_LIBNUMA
20528   MYSQL_SYSVAR(numa_interleave),
20529 #endif /* HAVE_LIBNUMA */
20530   MYSQL_SYSVAR(change_buffering),
20531   MYSQL_SYSVAR(change_buffer_max_size),
20532 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
20533   MYSQL_SYSVAR(change_buffer_dump),
20534   MYSQL_SYSVAR(change_buffering_debug),
20535   MYSQL_SYSVAR(disable_background_merge),
20536 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
20537 #ifdef WITH_INNODB_DISALLOW_WRITES
20538   MYSQL_SYSVAR(disallow_writes),
20539 #endif /* WITH_INNODB_DISALLOW_WRITES */
20540   MYSQL_SYSVAR(random_read_ahead),
20541   MYSQL_SYSVAR(read_ahead_threshold),
20542   MYSQL_SYSVAR(read_only),
20543   MYSQL_SYSVAR(instant_alter_column_allowed),
20544   MYSQL_SYSVAR(io_capacity),
20545   MYSQL_SYSVAR(io_capacity_max),
20546   MYSQL_SYSVAR(page_cleaners),
20547   MYSQL_SYSVAR(idle_flush_pct),
20548   MYSQL_SYSVAR(monitor_enable),
20549   MYSQL_SYSVAR(monitor_disable),
20550   MYSQL_SYSVAR(monitor_reset),
20551   MYSQL_SYSVAR(monitor_reset_all),
20552   MYSQL_SYSVAR(purge_threads),
20553   MYSQL_SYSVAR(purge_batch_size),
20554 #ifdef UNIV_DEBUG
20555   MYSQL_SYSVAR(background_drop_list_empty),
20556   MYSQL_SYSVAR(log_checkpoint_now),
20557   MYSQL_SYSVAR(buf_flush_list_now),
20558   MYSQL_SYSVAR(merge_threshold_set_all_debug),
20559 #endif /* UNIV_DEBUG */
20560 #if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
20561   MYSQL_SYSVAR(page_hash_locks),
20562   MYSQL_SYSVAR(doublewrite_batch_size),
20563 #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
20564   MYSQL_SYSVAR(status_output),
20565   MYSQL_SYSVAR(status_output_locks),
20566   MYSQL_SYSVAR(print_all_deadlocks),
20567   MYSQL_SYSVAR(cmp_per_index_enabled),
20568   MYSQL_SYSVAR(undo_logs),
20569   MYSQL_SYSVAR(max_undo_log_size),
20570   MYSQL_SYSVAR(purge_rseg_truncate_frequency),
20571   MYSQL_SYSVAR(undo_log_truncate),
20572   MYSQL_SYSVAR(rollback_segments),
20573   MYSQL_SYSVAR(undo_directory),
20574   MYSQL_SYSVAR(undo_tablespaces),
20575   MYSQL_SYSVAR(sync_array_size),
20576   MYSQL_SYSVAR(compression_failure_threshold_pct),
20577   MYSQL_SYSVAR(compression_pad_pct_max),
20578   MYSQL_SYSVAR(default_row_format),
20579 #ifdef UNIV_DEBUG
20580   MYSQL_SYSVAR(trx_rseg_n_slots_debug),
20581   MYSQL_SYSVAR(limit_optimistic_insert_debug),
20582   MYSQL_SYSVAR(trx_purge_view_update_only_debug),
20583   MYSQL_SYSVAR(evict_tables_on_commit_debug),
20584   MYSQL_SYSVAR(data_file_size_debug),
20585   MYSQL_SYSVAR(fil_make_page_dirty_debug),
20586   MYSQL_SYSVAR(saved_page_number_debug),
20587   MYSQL_SYSVAR(disable_resize_buffer_pool_debug),
20588   MYSQL_SYSVAR(page_cleaner_disabled_debug),
20589   MYSQL_SYSVAR(dict_stats_disabled_debug),
20590   MYSQL_SYSVAR(master_thread_disabled_debug),
20591   MYSQL_SYSVAR(sync_debug),
20592 #endif /* UNIV_DEBUG */
20593   MYSQL_SYSVAR(force_primary_key),
20594   MYSQL_SYSVAR(fatal_semaphore_wait_threshold),
20595   /* Table page compression feature */
20596   MYSQL_SYSVAR(compression_default),
20597   MYSQL_SYSVAR(compression_algorithm),
20598   /* Encryption feature */
20599   MYSQL_SYSVAR(encrypt_tables),
20600   MYSQL_SYSVAR(encryption_threads),
20601   MYSQL_SYSVAR(encryption_rotate_key_age),
20602   MYSQL_SYSVAR(encryption_rotation_iops),
20603   MYSQL_SYSVAR(scrub_log),
20604   MYSQL_SYSVAR(scrub_log_speed),
20605   MYSQL_SYSVAR(encrypt_log),
20606   MYSQL_SYSVAR(default_encryption_key_id),
20607   /* Scrubing feature */
20608   MYSQL_SYSVAR(immediate_scrub_data_uncompressed),
20609   MYSQL_SYSVAR(background_scrub_data_uncompressed),
20610   MYSQL_SYSVAR(background_scrub_data_compressed),
20611   MYSQL_SYSVAR(background_scrub_data_interval),
20612   MYSQL_SYSVAR(background_scrub_data_check_interval),
20613 #ifdef UNIV_DEBUG
20614   MYSQL_SYSVAR(debug_force_scrubbing),
20615   MYSQL_SYSVAR(debug_sync),
20616 #endif
20617   MYSQL_SYSVAR(buf_dump_status_frequency),
20618   MYSQL_SYSVAR(background_thread),
20619   MYSQL_SYSVAR(encrypt_temporary_tables),
20620 
20621   NULL
20622 };
20623 
maria_declare_plugin(innobase)20624 maria_declare_plugin(innobase)
20625 {
20626   MYSQL_STORAGE_ENGINE_PLUGIN,
20627   &innobase_storage_engine,
20628   innobase_hton_name,
20629   plugin_author,
20630   "Supports transactions, row-level locking, foreign keys and encryption for tables",
20631   PLUGIN_LICENSE_GPL,
20632   innodb_init, /* Plugin Init */
20633   NULL, /* Plugin Deinit */
20634   INNODB_VERSION_SHORT,
20635   innodb_status_variables_export,/* status variables             */
20636   innobase_system_variables, /* system variables */
20637   INNODB_VERSION_STR,         /* string version */
20638   MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
20639 },
20640 i_s_innodb_trx,
20641 i_s_innodb_locks,
20642 i_s_innodb_lock_waits,
20643 i_s_innodb_cmp,
20644 i_s_innodb_cmp_reset,
20645 i_s_innodb_cmpmem,
20646 i_s_innodb_cmpmem_reset,
20647 i_s_innodb_cmp_per_index,
20648 i_s_innodb_cmp_per_index_reset,
20649 i_s_innodb_buffer_page,
20650 i_s_innodb_buffer_page_lru,
20651 i_s_innodb_buffer_stats,
20652 i_s_innodb_metrics,
20653 i_s_innodb_ft_default_stopword,
20654 i_s_innodb_ft_deleted,
20655 i_s_innodb_ft_being_deleted,
20656 i_s_innodb_ft_config,
20657 i_s_innodb_ft_index_cache,
20658 i_s_innodb_ft_index_table,
20659 i_s_innodb_sys_tables,
20660 i_s_innodb_sys_tablestats,
20661 i_s_innodb_sys_indexes,
20662 i_s_innodb_sys_columns,
20663 i_s_innodb_sys_fields,
20664 i_s_innodb_sys_foreign,
20665 i_s_innodb_sys_foreign_cols,
20666 i_s_innodb_sys_tablespaces,
20667 i_s_innodb_sys_datafiles,
20668 i_s_innodb_sys_virtual,
20669 i_s_innodb_mutexes,
20670 i_s_innodb_sys_semaphore_waits,
20671 i_s_innodb_tablespaces_encryption,
20672 i_s_innodb_tablespaces_scrubbing
20673 maria_declare_plugin_end;
20674 
20675 /** @brief Initialize the default value of innodb_commit_concurrency.
20676 
20677 Once InnoDB is running, the innodb_commit_concurrency must not change
20678 from zero to nonzero. (Bug #42101)
20679 
20680 The initial default value is 0, and without this extra initialization,
20681 SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter
20682 to 0, even if it was initially set to nonzero at the command line
20683 or configuration file. */
20684 static
20685 void
innobase_commit_concurrency_init_default()20686 innobase_commit_concurrency_init_default()
20687 /*======================================*/
20688 {
20689 	MYSQL_SYSVAR_NAME(commit_concurrency).def_val
20690 		= innobase_commit_concurrency;
20691 }
20692 
20693 /** @brief Adjust some InnoDB startup parameters based on file contents
20694 or innodb_page_size. */
20695 static
20696 void
innodb_params_adjust()20697 innodb_params_adjust()
20698 {
20699 	/* The default value and the max value of
20700 	innodb_undo_logs must be equal to the available undo logs. */
20701 	MYSQL_SYSVAR_NAME(undo_logs).max_val
20702 		= MYSQL_SYSVAR_NAME(undo_logs).def_val
20703 		= srv_available_undo_logs;
20704 	MYSQL_SYSVAR_NAME(max_undo_log_size).max_val
20705 		= 1ULL << (32U + srv_page_size_shift);
20706 	MYSQL_SYSVAR_NAME(max_undo_log_size).min_val
20707 		= MYSQL_SYSVAR_NAME(max_undo_log_size).def_val
20708 		= ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
20709 		<< srv_page_size_shift;
20710 	MYSQL_SYSVAR_NAME(max_undo_log_size).max_val
20711 		= 1ULL << (32U + srv_page_size_shift);
20712 }
20713 
20714 /****************************************************************************
20715  * DS-MRR implementation
20716  ***************************************************************************/
20717 
20718 /**
20719 Multi Range Read interface, DS-MRR calls */
20720 int
multi_range_read_init(RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)20721 ha_innobase::multi_range_read_init(
20722 	RANGE_SEQ_IF*	seq,
20723 	void*		seq_init_param,
20724 	uint		n_ranges,
20725 	uint		mode,
20726 	HANDLER_BUFFER*	buf)
20727 {
20728 	return(m_ds_mrr.dsmrr_init(this, seq, seq_init_param,
20729 				 n_ranges, mode, buf));
20730 }
20731 
20732 int
multi_range_read_next(range_id_t * range_info)20733 ha_innobase::multi_range_read_next(
20734 	range_id_t*		range_info)
20735 {
20736 	return(m_ds_mrr.dsmrr_next(range_info));
20737 }
20738 
20739 ha_rows
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)20740 ha_innobase::multi_range_read_info_const(
20741 	uint		keyno,
20742 	RANGE_SEQ_IF*	seq,
20743 	void*		seq_init_param,
20744 	uint		n_ranges,
20745 	uint*		bufsz,
20746 	uint*		flags,
20747 	Cost_estimate*	cost)
20748 {
20749 	/* See comments in ha_myisam::multi_range_read_info_const */
20750 	m_ds_mrr.init(this, table);
20751 
20752 	if (m_prebuilt->select_lock_type != LOCK_NONE) {
20753 		*flags |= HA_MRR_USE_DEFAULT_IMPL;
20754 	}
20755 
20756 	ha_rows res= m_ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
20757 			bufsz, flags, cost);
20758 	return res;
20759 }
20760 
20761 ha_rows
multi_range_read_info(uint keyno,uint n_ranges,uint keys,uint key_parts,uint * bufsz,uint * flags,Cost_estimate * cost)20762 ha_innobase::multi_range_read_info(
20763 	uint		keyno,
20764 	uint		n_ranges,
20765 	uint		keys,
20766 	uint		key_parts,
20767 	uint*		bufsz,
20768 	uint*		flags,
20769 	Cost_estimate*	cost)
20770 {
20771 	m_ds_mrr.init(this, table);
20772 	ha_rows res= m_ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
20773 					flags, cost);
20774 	return res;
20775 }
20776 
20777 int
multi_range_read_explain_info(uint mrr_mode,char * str,size_t size)20778 ha_innobase::multi_range_read_explain_info(
20779 	uint mrr_mode,
20780 	char *str,
20781 	size_t size)
20782 {
20783 	return m_ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
20784 }
20785 
20786 /**
20787 Index Condition Pushdown interface implementation */
20788 
20789 /*************************************************************//**
20790 InnoDB index push-down condition check
20791 @return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
20792 ICP_RESULT
innobase_index_cond(void * file)20793 innobase_index_cond(
20794 /*================*/
20795 	void*	file)	/*!< in/out: pointer to ha_innobase */
20796 {
20797 	return handler_index_cond_check(file);
20798 }
20799 
20800 /** Parse the table file name into table name and database name.
20801 @param[in]	tbl_name	InnoDB table name
20802 @param[out]	dbname		database name buffer (NAME_LEN + 1 bytes)
20803 @param[out]	tblname		table name buffer (NAME_LEN + 1 bytes)
20804 @param[out]	dbnamelen	database name length
20805 @param[out]	tblnamelen	table name length
20806 @return true if the table name is parsed properly. */
table_name_parse(const table_name_t & tbl_name,char * dbname,char * tblname,ulint & dbnamelen,ulint & tblnamelen)20807 static bool table_name_parse(
20808 	const table_name_t&	tbl_name,
20809 	char*			dbname,
20810 	char*			tblname,
20811 	ulint&			dbnamelen,
20812 	ulint&			tblnamelen)
20813 {
20814 	dbnamelen = dict_get_db_name_len(tbl_name.m_name);
20815 	char db_buf[MAX_DATABASE_NAME_LEN  + 1];
20816 	char tbl_buf[MAX_TABLE_NAME_LEN + 1];
20817 
20818 	ut_ad(dbnamelen > 0);
20819 	ut_ad(dbnamelen <= MAX_DATABASE_NAME_LEN);
20820 
20821 	memcpy(db_buf, tbl_name.m_name, dbnamelen);
20822 	db_buf[dbnamelen] = 0;
20823 
20824 	tblnamelen = strlen(tbl_name.m_name + dbnamelen + 1);
20825 	memcpy(tbl_buf, tbl_name.m_name + dbnamelen + 1, tblnamelen);
20826 	tbl_buf[tblnamelen] = 0;
20827 
20828 	dbnamelen = filename_to_tablename(db_buf, dbname, MAX_DATABASE_NAME_LEN + 1, true);
20829 
20830 	if (tblnamelen > TEMP_FILE_PREFIX_LENGTH
20831 	    && !strncmp(tbl_buf, TEMP_FILE_PREFIX, TEMP_FILE_PREFIX_LENGTH)) {
20832 		return false;
20833 	}
20834 
20835 	if (char *is_part = strchr(tbl_buf, '#')) {
20836 		*is_part = '\0';
20837 		tblnamelen = is_part - tbl_buf;
20838 	}
20839 
20840 	tblnamelen = filename_to_tablename(tbl_buf, tblname, MAX_TABLE_NAME_LEN + 1, true);
20841 	return true;
20842 }
20843 
20844 
20845 /** Acquire metadata lock and MariaDB table handle for an InnoDB table.
20846 @param[in,out]	thd	thread handle
20847 @param[in,out]	table	InnoDB table
20848 @return MariaDB table handle
20849 @retval NULL if the table does not exist, is unaccessible or corrupted. */
innodb_acquire_mdl(THD * thd,dict_table_t * table)20850 static TABLE* innodb_acquire_mdl(THD* thd, dict_table_t* table)
20851 {
20852 	char	db_buf[NAME_LEN + 1], db_buf1[NAME_LEN + 1];
20853 	char	tbl_buf[NAME_LEN + 1], tbl_buf1[NAME_LEN + 1];
20854 	ulint	db_buf_len, db_buf1_len;
20855 	ulint	tbl_buf_len, tbl_buf1_len;
20856 
20857 	if (!table_name_parse(table->name, db_buf, tbl_buf,
20858 			      db_buf_len, tbl_buf_len)) {
20859 		table->release();
20860 		return NULL;
20861 	}
20862 
20863 	DEBUG_SYNC(thd, "ib_purge_virtual_latch_released");
20864 
20865 	const table_id_t table_id = table->id;
20866 retry_mdl:
20867 	const bool unaccessible = !table->is_readable() || table->corrupted;
20868 	table->release();
20869 
20870 	if (unaccessible) {
20871 		return NULL;
20872 	}
20873 
20874 	TABLE*	mariadb_table = open_purge_table(thd, db_buf, db_buf_len,
20875 						 tbl_buf, tbl_buf_len);
20876 	if (!mariadb_table)
20877 		thd_clear_error(thd);
20878 
20879 	DEBUG_SYNC(thd, "ib_purge_virtual_got_no_such_table");
20880 
20881 	table = dict_table_open_on_id(table_id, false, DICT_TABLE_OP_NORMAL);
20882 
20883 	if (table == NULL) {
20884 		/* Table is dropped. */
20885 		goto fail;
20886 	}
20887 
20888 	if (!fil_table_accessible(table)) {
20889 release_fail:
20890 		table->release();
20891 fail:
20892 		if (mariadb_table) {
20893 			close_thread_tables(thd);
20894 		}
20895 
20896 		return NULL;
20897 	}
20898 
20899 	if (!table_name_parse(table->name, db_buf1, tbl_buf1,
20900 			      db_buf1_len, tbl_buf1_len)) {
20901 		goto release_fail;
20902 	}
20903 
20904 	if (!mariadb_table) {
20905 	} else if (!strcmp(db_buf, db_buf1) && !strcmp(tbl_buf, tbl_buf1)) {
20906 		return mariadb_table;
20907 	} else {
20908 		/* Table is renamed. So release MDL for old name and try
20909 		to acquire the MDL for new table name. */
20910 		close_thread_tables(thd);
20911 	}
20912 
20913 	strcpy(tbl_buf, tbl_buf1);
20914 	strcpy(db_buf, db_buf1);
20915 	tbl_buf_len = tbl_buf1_len;
20916 	db_buf_len = db_buf1_len;
20917 	goto retry_mdl;
20918 }
20919 
20920 /** Find or open a table handle for the virtual column template
20921 @param[in]	thd	thread handle
20922 @param[in,out]	table	InnoDB table whose virtual column template
20923 			is to be updated
20924 @return table handle
20925 @retval NULL if the table is dropped, unaccessible or corrupted
20926 for purge thread */
innodb_find_table_for_vc(THD * thd,dict_table_t * table)20927 static TABLE* innodb_find_table_for_vc(THD* thd, dict_table_t* table)
20928 {
20929 	DBUG_EXECUTE_IF(
20930 		"ib_purge_virtual_mdev_16222_1",
20931 		DBUG_ASSERT(!debug_sync_set_action(
20932 			    thd,
20933 			    STRING_WITH_LEN("ib_purge_virtual_latch_released "
20934 					    "SIGNAL latch_released "
20935 					    "WAIT_FOR drop_started"))););
20936 	DBUG_EXECUTE_IF(
20937 		"ib_purge_virtual_mdev_16222_2",
20938 		DBUG_ASSERT(!debug_sync_set_action(
20939 			    thd,
20940 			    STRING_WITH_LEN("ib_purge_virtual_got_no_such_table "
20941 					    "SIGNAL got_no_such_table"))););
20942 
20943 	if (THDVAR(thd, background_thread)) {
20944 		/* Purge thread acquires dict_operation_lock while
20945 		processing undo log record. Release the dict_operation_lock
20946 		before acquiring MDL on the table. */
20947 		rw_lock_s_unlock(&dict_operation_lock);
20948 		return innodb_acquire_mdl(thd, table);
20949 	} else {
20950 		if (table->vc_templ->mysql_table_query_id
20951 		    == thd_get_query_id(thd)) {
20952 			return table->vc_templ->mysql_table;
20953 		}
20954 	}
20955 
20956 	char	db_buf[NAME_LEN + 1];
20957 	char	tbl_buf[NAME_LEN + 1];
20958 	ulint	db_buf_len, tbl_buf_len;
20959 
20960 	if (!table_name_parse(table->name, db_buf, tbl_buf,
20961 			      db_buf_len, tbl_buf_len)) {
20962 		ut_ad(!"invalid table name");
20963 		return NULL;
20964 	}
20965 
20966 	TABLE* mysql_table = find_fk_open_table(thd, db_buf, db_buf_len,
20967 						tbl_buf, tbl_buf_len);
20968 
20969 	table->vc_templ->mysql_table = mysql_table;
20970 	table->vc_templ->mysql_table_query_id = thd_get_query_id(thd);
20971 	return mysql_table;
20972 }
20973 
20974 /** Get the computed value by supplying the base column values.
20975 @param[in,out]	table		table whose virtual column
20976 				template to be built */
innobase_init_vc_templ(dict_table_t * table)20977 TABLE* innobase_init_vc_templ(dict_table_t* table)
20978 {
20979 	if (table->vc_templ != NULL) {
20980 		return NULL;
20981 	}
20982 	DBUG_ENTER("innobase_init_vc_templ");
20983 
20984 	table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
20985 
20986 	TABLE	*mysql_table= innodb_find_table_for_vc(current_thd, table);
20987 
20988 	ut_ad(mysql_table);
20989 	if (!mysql_table) {
20990 		DBUG_RETURN(NULL);
20991 	}
20992 
20993 	mutex_enter(&dict_sys->mutex);
20994 	innobase_build_v_templ(mysql_table, table, table->vc_templ, NULL, true);
20995 	mutex_exit(&dict_sys->mutex);
20996 	DBUG_RETURN(mysql_table);
20997 }
20998 
20999 /** Change dbname and table name in table->vc_templ.
21000 @param[in,out]	table	the table whose virtual column template
21001 dbname and tbname to be renamed. */
21002 void
innobase_rename_vc_templ(dict_table_t * table)21003 innobase_rename_vc_templ(
21004 	dict_table_t*	table)
21005 {
21006 	char	dbname[MAX_DATABASE_NAME_LEN + 1];
21007 	char	tbname[MAX_DATABASE_NAME_LEN + 1];
21008 	char*	name = table->name.m_name;
21009 	ulint	dbnamelen = dict_get_db_name_len(name);
21010 	ulint	tbnamelen = strlen(name) - dbnamelen - 1;
21011 	char	t_dbname[MAX_DATABASE_NAME_LEN + 1];
21012 	char	t_tbname[MAX_TABLE_NAME_LEN + 1];
21013 
21014 	strncpy(dbname, name, dbnamelen);
21015 	dbname[dbnamelen] = 0;
21016 	strncpy(tbname, name + dbnamelen + 1, tbnamelen);
21017 	tbname[tbnamelen] =0;
21018 
21019 	/* For partition table, remove the partition name and use the
21020 	"main" table name to build the template */
21021 	char*	is_part = is_partition(tbname);
21022 
21023 	if (is_part != NULL) {
21024 		*is_part = '\0';
21025 		tbnamelen = ulint(is_part - tbname);
21026 	}
21027 
21028 	dbnamelen = filename_to_tablename(dbname, t_dbname,
21029 					  MAX_DATABASE_NAME_LEN + 1);
21030 	tbnamelen = filename_to_tablename(tbname, t_tbname,
21031 					  MAX_TABLE_NAME_LEN + 1);
21032 
21033 	table->vc_templ->db_name = t_dbname;
21034 	table->vc_templ->tb_name = t_tbname;
21035 }
21036 
21037 
21038 /**
21039    Allocate a heap and record for calculating virtual fields
21040    Used mainly for virtual fields in indexes
21041 
21042 @param[in]      thd             MariaDB THD
21043 @param[in]      index           Index in use
21044 @param[out]     heap            Heap that holds temporary row
21045 @param[in,out]  table           MariaDB table
21046 @param[out]     record	        Pointer to allocated MariaDB record
21047 @param[out]     storage	        Internal storage for blobs etc
21048 
21049 @retval		true on success
21050 @retval		false on malloc failure or failed to open the maria table
21051 		for purge thread.
21052 */
21053 
innobase_allocate_row_for_vcol(THD * thd,dict_index_t * index,mem_heap_t ** heap,TABLE ** table,VCOL_STORAGE * storage)21054 bool innobase_allocate_row_for_vcol(THD *thd, dict_index_t *index,
21055                                     mem_heap_t **heap, TABLE **table,
21056                                     VCOL_STORAGE *storage)
21057 {
21058   TABLE *maria_table;
21059   String *blob_value_storage;
21060   if (!*table)
21061     *table = innodb_find_table_for_vc(thd, index->table);
21062 
21063   /* For purge thread, there is a possiblity that table could have
21064      dropped, corrupted or unaccessible. */
21065   if (!*table)
21066     return false;
21067   maria_table = *table;
21068   if (!*heap && !(*heap = mem_heap_create(srv_page_size)))
21069     return false;
21070 
21071   uchar *record = static_cast<byte *>(mem_heap_alloc(*heap,
21072                                                     maria_table->s->reclength));
21073 
21074   size_t len = maria_table->s->virtual_not_stored_blob_fields * sizeof(String);
21075   blob_value_storage = static_cast<String *>(mem_heap_alloc(*heap, len));
21076 
21077   if (!record || !blob_value_storage)
21078     return false;
21079 
21080   storage->maria_table = maria_table;
21081   storage->innobase_record = record;
21082   storage->maria_record = maria_table->field[0]->record_ptr();
21083   storage->blob_value_storage = blob_value_storage;
21084 
21085   maria_table->move_fields(maria_table->field, record, storage->maria_record);
21086   maria_table->remember_blob_values(blob_value_storage);
21087 
21088   return true;
21089 }
21090 
21091 
21092 /** Free memory allocated by innobase_allocate_row_for_vcol() */
21093 
innobase_free_row_for_vcol(VCOL_STORAGE * storage)21094 void innobase_free_row_for_vcol(VCOL_STORAGE *storage)
21095 {
21096 	TABLE *maria_table= storage->maria_table;
21097 	maria_table->move_fields(maria_table->field, storage->maria_record,
21098                                  storage->innobase_record);
21099         maria_table->restore_blob_values(storage->blob_value_storage);
21100 }
21101 
21102 
innobase_report_computed_value_failed(dtuple_t * row)21103 void innobase_report_computed_value_failed(dtuple_t *row)
21104 {
21105   ib::error() << "Compute virtual column values failed for "
21106               << rec_printer(row).str();
21107 }
21108 
21109 
21110 /** Get the computed value by supplying the base column values.
21111 @param[in,out]	row		the data row
21112 @param[in]	col		virtual column
21113 @param[in]	index		index
21114 @param[in,out]	local_heap	heap memory for processing large data etc.
21115 @param[in,out]	heap		memory heap that copies the actual index row
21116 @param[in]	ifield		index field
21117 @param[in]	thd		MySQL thread handle
21118 @param[in,out]	mysql_table	mysql table object
21119 @param[in,out]	mysql_rec	MariaDB record buffer
21120 @param[in]	old_table	during ALTER TABLE, this is the old table
21121 				or NULL.
21122 @param[in]	update		update vector for the row, if any
21123 @param[in]	foreign		foreign key information
21124 @return the field filled with computed value, or NULL if just want
21125 to store the value in passed in "my_rec" */
21126 dfield_t*
innobase_get_computed_value(dtuple_t * row,const dict_v_col_t * col,const dict_index_t * index,mem_heap_t ** local_heap,mem_heap_t * heap,const dict_field_t * ifield,THD * thd,TABLE * mysql_table,byte * mysql_rec,const dict_table_t * old_table,const upd_t * update)21127 innobase_get_computed_value(
21128 	dtuple_t*		row,
21129 	const dict_v_col_t*	col,
21130 	const dict_index_t*	index,
21131 	mem_heap_t**		local_heap,
21132 	mem_heap_t*		heap,
21133 	const dict_field_t*	ifield,
21134 	THD*			thd,
21135 	TABLE*			mysql_table,
21136 	byte*			mysql_rec,
21137 	const dict_table_t*	old_table,
21138 	const upd_t*		update)
21139 {
21140 	byte		rec_buf2[REC_VERSION_56_MAX_INDEX_COL_LEN];
21141 	byte*		buf;
21142 	dfield_t*	field;
21143 	ulint		len;
21144 
21145 	const page_size_t page_size = (old_table == NULL)
21146 		? dict_table_page_size(index->table)
21147 		: dict_table_page_size(old_table);
21148 
21149 	ulint		ret = 0;
21150 
21151 	dict_index_t *clust_index= dict_table_get_first_index(index->table);
21152 
21153 	ut_ad(index->table->vc_templ);
21154 	ut_ad(thd != NULL);
21155 	ut_ad(mysql_table);
21156 
21157 	DBUG_ENTER("innobase_get_computed_value");
21158 	const mysql_row_templ_t*
21159 			vctempl =  index->table->vc_templ->vtempl[
21160 				index->table->vc_templ->n_col + col->v_pos];
21161 
21162 	if (!heap || index->table->vc_templ->rec_len
21163 		     >= REC_VERSION_56_MAX_INDEX_COL_LEN) {
21164 		if (*local_heap == NULL) {
21165 			*local_heap = mem_heap_create(srv_page_size);
21166 		}
21167 
21168 		buf = static_cast<byte*>(mem_heap_alloc(
21169 				*local_heap, index->table->vc_templ->rec_len));
21170 	} else {
21171 		buf = rec_buf2;
21172 	}
21173 
21174 	for (ulint i = 0; i < col->num_base; i++) {
21175 		dict_col_t*			base_col = col->base_col[i];
21176 		const dfield_t*			row_field = NULL;
21177 		ulint				col_no = base_col->ind;
21178 		const mysql_row_templ_t*	templ
21179 			= index->table->vc_templ->vtempl[col_no];
21180 		const byte*			data;
21181 
21182 		if (update) {
21183 			ulint clust_no = dict_col_get_clust_pos(base_col,
21184 								clust_index);
21185 			if (const upd_field_t *uf = upd_get_field_by_field_no(
21186 				    update, clust_no, false)) {
21187 				row_field = &uf->new_val;
21188 			}
21189 		}
21190 
21191 		if (!row_field) {
21192 			row_field = dtuple_get_nth_field(row, col_no);
21193 		}
21194 
21195 		data = static_cast<const byte*>(row_field->data);
21196 		len = row_field->len;
21197 
21198 		if (row_field->ext) {
21199 			if (*local_heap == NULL) {
21200 				*local_heap = mem_heap_create(srv_page_size);
21201 			}
21202 
21203 			data = btr_copy_externally_stored_field(
21204 				&len, data, page_size,
21205 				dfield_get_len(row_field), *local_heap);
21206 		}
21207 
21208 		if (len == UNIV_SQL_NULL) {
21209                         mysql_rec[templ->mysql_null_byte_offset]
21210                                 |= (byte) templ->mysql_null_bit_mask;
21211                         memcpy(mysql_rec + templ->mysql_col_offset,
21212                                static_cast<const byte*>(
21213 					index->table->vc_templ->default_rec
21214 					+ templ->mysql_col_offset),
21215                                templ->mysql_col_len);
21216                 } else {
21217 
21218 			row_sel_field_store_in_mysql_format(
21219 				mysql_rec + templ->mysql_col_offset,
21220 				templ, index, templ->clust_rec_field_no,
21221 				(const byte*)data, len);
21222 
21223 			if (templ->mysql_null_bit_mask) {
21224 				/* It is a nullable column with a
21225 				non-NULL value */
21226 				mysql_rec[templ->mysql_null_byte_offset]
21227 					&= ~(byte) templ->mysql_null_bit_mask;
21228 			}
21229 		}
21230 	}
21231 
21232 	field = dtuple_get_nth_v_field(row, col->v_pos);
21233 
21234 	MY_BITMAP *old_write_set = dbug_tmp_use_all_columns(mysql_table, &mysql_table->write_set);
21235 	MY_BITMAP *old_read_set = dbug_tmp_use_all_columns(mysql_table, &mysql_table->read_set);
21236 	ret = mysql_table->update_virtual_field(mysql_table->field[col->m_col.ind]);
21237 	dbug_tmp_restore_column_map(&mysql_table->read_set, old_read_set);
21238 	dbug_tmp_restore_column_map(&mysql_table->write_set, old_write_set);
21239 
21240 	if (ret != 0) {
21241 		DBUG_RETURN(NULL);
21242 	}
21243 
21244 	if (vctempl->mysql_null_bit_mask
21245 	    && (mysql_rec[vctempl->mysql_null_byte_offset]
21246 	        & vctempl->mysql_null_bit_mask)) {
21247 		dfield_set_null(field);
21248 		field->type.prtype |= DATA_VIRTUAL;
21249 		DBUG_RETURN(field);
21250 	}
21251 
21252 	row_mysql_store_col_in_innobase_format(
21253 		field, buf,
21254 		TRUE, mysql_rec + vctempl->mysql_col_offset,
21255 		vctempl->mysql_col_len, dict_table_is_comp(index->table));
21256 	field->type.prtype |= DATA_VIRTUAL;
21257 
21258 	ulint	max_prefix = col->m_col.max_prefix;
21259 
21260 	if (max_prefix && ifield
21261 	    && (ifield->prefix_len == 0
21262 	        || ifield->prefix_len > col->m_col.max_prefix)) {
21263 		max_prefix = ifield->prefix_len;
21264 	}
21265 
21266 	/* If this is a prefix index, we only need a portion of the field */
21267 	if (max_prefix) {
21268 		len = dtype_get_at_most_n_mbchars(
21269 			col->m_col.prtype,
21270 			col->m_col.mbminlen, col->m_col.mbmaxlen,
21271 			max_prefix,
21272 			field->len,
21273 			static_cast<char*>(dfield_get_data(field)));
21274 		dfield_set_len(field, len);
21275 	}
21276 
21277 	if (heap) {
21278 		dfield_dup(field, heap);
21279 	}
21280 
21281 	DBUG_RETURN(field);
21282 }
21283 
21284 
21285 /** Attempt to push down an index condition.
21286 @param[in] keyno MySQL key number
21287 @param[in] idx_cond Index condition to be checked
21288 @return Part of idx_cond which the handler will not evaluate */
21289 
21290 class Item*
idx_cond_push(uint keyno,class Item * idx_cond)21291 ha_innobase::idx_cond_push(
21292 	uint		keyno,
21293 	class Item*	idx_cond)
21294 {
21295 	DBUG_ENTER("ha_innobase::idx_cond_push");
21296 	DBUG_ASSERT(keyno != MAX_KEY);
21297 	DBUG_ASSERT(idx_cond != NULL);
21298 
21299 	/* We can only evaluate the condition if all columns are stored.*/
21300 	dict_index_t* idx  = innobase_get_index(keyno);
21301 	if (idx && dict_index_has_virtual(idx)) {
21302 		DBUG_RETURN(idx_cond);
21303 	}
21304 
21305 	pushed_idx_cond = idx_cond;
21306 	pushed_idx_cond_keyno = keyno;
21307 	in_range_check_pushed_down = TRUE;
21308 	/* We will evaluate the condition entirely */
21309 	DBUG_RETURN(NULL);
21310 }
21311 
21312 /******************************************************************//**
21313 Use this when the args are passed to the format string from
21314 errmsg-utf8.txt directly as is.
21315 
21316 Push a warning message to the client, it is a wrapper around:
21317 
21318 void push_warning_printf(
21319 	THD *thd, Sql_condition::enum_condition_level level,
21320 	uint code, const char *format, ...);
21321 */
21322 void
ib_senderrf(THD * thd,ib_log_level_t level,ib_uint32_t code,...)21323 ib_senderrf(
21324 /*========*/
21325 	THD*		thd,		/*!< in/out: session */
21326 	ib_log_level_t	level,		/*!< in: warning level */
21327 	ib_uint32_t	code,		/*!< MySQL error code */
21328 	...)				/*!< Args */
21329 {
21330 	va_list		args;
21331 	const char*	format = my_get_err_msg(code);
21332 
21333 	/* If the caller wants to push a message to the client then
21334 	the caller must pass a valid session handle. */
21335 
21336 	ut_a(thd != 0);
21337 
21338 	/* The error code must exist in the errmsg-utf8.txt file. */
21339 	ut_a(format != 0);
21340 
21341 	va_start(args, code);
21342 
21343 	myf l;
21344 
21345 	switch (level) {
21346 	case IB_LOG_LEVEL_INFO:
21347 		l = ME_JUST_INFO;
21348 		break;
21349 	case IB_LOG_LEVEL_WARN:
21350 		l = ME_JUST_WARNING;
21351 		break;
21352 	default:
21353 		l = 0;
21354 		break;
21355 	}
21356 
21357 	my_printv_error(code, format, MYF(l), args);
21358 
21359 	va_end(args);
21360 
21361 	if (level == IB_LOG_LEVEL_FATAL) {
21362 		ut_error;
21363 	}
21364 }
21365 
21366 /******************************************************************//**
21367 Use this when the args are first converted to a formatted string and then
21368 passed to the format string from errmsg-utf8.txt. The error message format
21369 must be: "Some string ... %s".
21370 
21371 Push a warning message to the client, it is a wrapper around:
21372 
21373 void push_warning_printf(
21374 	THD *thd, Sql_condition::enum_condition_level level,
21375 	uint code, const char *format, ...);
21376 */
21377 void
ib_errf(THD * thd,ib_log_level_t level,ib_uint32_t code,const char * format,...)21378 ib_errf(
21379 /*====*/
21380 	THD*		thd,		/*!< in/out: session */
21381 	ib_log_level_t	level,		/*!< in: warning level */
21382 	ib_uint32_t	code,		/*!< MySQL error code */
21383 	const char*	format,		/*!< printf format */
21384 	...)				/*!< Args */
21385 {
21386 	char*		str = NULL;
21387 	va_list         args;
21388 
21389 	/* If the caller wants to push a message to the client then
21390 	the caller must pass a valid session handle. */
21391 
21392 	ut_a(thd != 0);
21393 	ut_a(format != 0);
21394 
21395 	va_start(args, format);
21396 
21397 #ifdef _WIN32
21398 	int		size = _vscprintf(format, args) + 1;
21399 	if (size > 0) {
21400 		str = static_cast<char*>(malloc(size));
21401 	}
21402 	if (str == NULL) {
21403 		va_end(args);
21404 		return;	/* Watch for Out-Of-Memory */
21405 	}
21406 	str[size - 1] = 0x0;
21407 	vsnprintf(str, size, format, args);
21408 #elif HAVE_VASPRINTF
21409 	if (vasprintf(&str, format, args) == -1) {
21410 		/* In case of failure use a fixed length string */
21411 		str = static_cast<char*>(malloc(BUFSIZ));
21412 		vsnprintf(str, BUFSIZ, format, args);
21413 	}
21414 #else
21415 	/* Use a fixed length string. */
21416 	str = static_cast<char*>(malloc(BUFSIZ));
21417 	if (str == NULL) {
21418 		va_end(args);
21419 		return;	/* Watch for Out-Of-Memory */
21420 	}
21421 	vsnprintf(str, BUFSIZ, format, args);
21422 #endif /* _WIN32 */
21423 
21424 	ib_senderrf(thd, level, code, str);
21425 
21426 	va_end(args);
21427 	free(str);
21428 }
21429 
21430 /* Keep the first 16 characters as-is, since the url is sometimes used
21431 as an offset from this.*/
21432 const char*	TROUBLESHOOTING_MSG =
21433 	"Please refer to https://mariadb.com/kb/en/innodb-troubleshooting/"
21434 	" for how to resolve the issue.";
21435 
21436 const char*	TROUBLESHOOT_DATADICT_MSG =
21437 	"Please refer to https://mariadb.com/kb/en/innodb-data-dictionary-troubleshooting/"
21438 	" for how to resolve the issue.";
21439 
21440 const char*	BUG_REPORT_MSG =
21441 	"Submit a detailed bug report to https://jira.mariadb.org/";
21442 
21443 const char*	FORCE_RECOVERY_MSG =
21444 	"Please refer to "
21445 	"https://mariadb.com/kb/en/library/innodb-recovery-modes/"
21446 	" for information about forcing recovery.";
21447 
21448 const char*	OPERATING_SYSTEM_ERROR_MSG =
21449 	"Some operating system error numbers are described at"
21450 	" https://mariadb.com/kb/en/library/operating-system-error-codes/";
21451 
21452 const char*	FOREIGN_KEY_CONSTRAINTS_MSG =
21453 	"Please refer to https://mariadb.com/kb/en/library/foreign-keys/"
21454 	" for correct foreign key definition.";
21455 
21456 const char*	SET_TRANSACTION_MSG =
21457 	"Please refer to https://mariadb.com/kb/en/library/set-transaction/";
21458 
21459 const char*	INNODB_PARAMETERS_MSG =
21460 	"Please refer to https://mariadb.com/kb/en/library/innodb-system-variables/";
21461 
21462 /**********************************************************************
21463 Converts an identifier from my_charset_filename to UTF-8 charset.
21464 @return result string length, as returned by strconvert() */
21465 uint
innobase_convert_to_filename_charset(char * to,const char * from,ulint len)21466 innobase_convert_to_filename_charset(
21467 /*=================================*/
21468 	char*		to,	/* out: converted identifier */
21469 	const char*	from,	/* in: identifier to convert */
21470 	ulint		len)	/* in: length of 'to', in bytes */
21471 {
21472 	uint		errors;
21473 	CHARSET_INFO*	cs_to = &my_charset_filename;
21474 	CHARSET_INFO*	cs_from = system_charset_info;
21475 
21476 	return(static_cast<uint>(strconvert(
21477 				cs_from, from, uint(strlen(from)),
21478 				cs_to, to, static_cast<uint>(len), &errors)));
21479 }
21480 
21481 /**********************************************************************
21482 Converts an identifier from my_charset_filename to UTF-8 charset.
21483 @return result string length, as returned by strconvert() */
21484 uint
innobase_convert_to_system_charset(char * to,const char * from,ulint len,uint * errors)21485 innobase_convert_to_system_charset(
21486 /*===============================*/
21487 	char*		to,	/* out: converted identifier */
21488 	const char*	from,	/* in: identifier to convert */
21489 	ulint		len,	/* in: length of 'to', in bytes */
21490 	uint*		errors)	/* out: error return */
21491 {
21492 	CHARSET_INFO*	cs1 = &my_charset_filename;
21493 	CHARSET_INFO*	cs2 = system_charset_info;
21494 
21495 	return(static_cast<uint>(strconvert(
21496 				cs1, from, static_cast<uint>(strlen(from)),
21497 				cs2, to, static_cast<uint>(len), errors)));
21498 }
21499 
21500 /** Validate the requested buffer pool size.  Also, reserve the necessary
21501 memory needed for buffer pool resize.
21502 @param[in]	thd	thread handle
21503 @param[out]	save	immediate result for update function
21504 @param[in]	value	incoming string
21505 @return 0 on success, 1 on failure.
21506 */
21507 static
21508 int
innodb_buffer_pool_size_validate(THD * thd,st_mysql_sys_var *,void * save,struct st_mysql_value * value)21509 innodb_buffer_pool_size_validate(
21510 	THD*				thd,
21511 	st_mysql_sys_var*,
21512 	void*				save,
21513 	struct st_mysql_value*		value)
21514 {
21515 	longlong	intbuf;
21516 
21517 	value->val_int(value, &intbuf);
21518 
21519 	if (static_cast<ulonglong>(intbuf) < MYSQL_SYSVAR_NAME(buffer_pool_size).min_val) {
21520 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21521 				    ER_WRONG_ARGUMENTS,
21522 				    "innodb_buffer_pool_size must be at least"
21523 				    " %lld for innodb_page_size=%lu",
21524 				    MYSQL_SYSVAR_NAME(buffer_pool_size).min_val,
21525 				    srv_page_size);
21526 		return(1);
21527 	}
21528 
21529 	if (!srv_was_started) {
21530 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21531 				    ER_WRONG_ARGUMENTS,
21532 				    "Cannot update innodb_buffer_pool_size,"
21533 				    " because InnoDB is not started.");
21534 		return(1);
21535 	}
21536 
21537 #ifdef UNIV_DEBUG
21538 	if (buf_disable_resize_buffer_pool_debug == TRUE) {
21539 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21540 			ER_WRONG_ARGUMENTS,
21541 			"Cannot update innodb_buffer_pool_size,"
21542 			" because innodb_disable_resize_buffer_pool_debug"
21543 			" is set.");
21544 		ib::warn() << "Cannot update innodb_buffer_pool_size,"
21545 			" because innodb_disable_resize_buffer_pool_debug"
21546 			" is set.";
21547 		return(1);
21548 	}
21549 #endif /* UNIV_DEBUG */
21550 
21551 
21552 	buf_pool_mutex_enter_all();
21553 
21554 	if (srv_buf_pool_old_size != srv_buf_pool_size) {
21555 		buf_pool_mutex_exit_all();
21556 		my_printf_error(ER_WRONG_ARGUMENTS,
21557 			"Another buffer pool resize is already in progress.", MYF(0));
21558 		return(1);
21559 	}
21560 
21561 	if (srv_buf_pool_instances > 1 && intbuf < BUF_POOL_SIZE_THRESHOLD) {
21562 		buf_pool_mutex_exit_all();
21563 
21564 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21565 				    ER_WRONG_ARGUMENTS,
21566 				    "Cannot update innodb_buffer_pool_size"
21567 				    " to less than 1GB if"
21568 				    " innodb_buffer_pool_instances > 1.");
21569 		return(1);
21570 	}
21571 
21572 	ulint	requested_buf_pool_size = buf_pool_size_align(ulint(intbuf));
21573 
21574 	*static_cast<ulonglong*>(save) = requested_buf_pool_size;
21575 
21576 	if (srv_buf_pool_size == ulint(intbuf)) {
21577 		buf_pool_mutex_exit_all();
21578 		/* nothing to do */
21579 		return(0);
21580 	}
21581 
21582 	if (srv_buf_pool_size == requested_buf_pool_size) {
21583 		buf_pool_mutex_exit_all();
21584 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21585 				    ER_WRONG_ARGUMENTS,
21586 				    "innodb_buffer_pool_size must be at least"
21587 				    " innodb_buffer_pool_chunk_size=%lu",
21588 				    srv_buf_pool_chunk_unit);
21589 		/* nothing to do */
21590 		return(0);
21591 	}
21592 
21593 	srv_buf_pool_size = requested_buf_pool_size;
21594 	buf_pool_mutex_exit_all();
21595 
21596 	if (intbuf != static_cast<longlong>(requested_buf_pool_size)) {
21597 		char	buf[64];
21598 		int	len = 64;
21599 		value->val_str(value, buf, &len);
21600 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21601 				    ER_TRUNCATED_WRONG_VALUE,
21602 				    "Truncated incorrect %-.32s value: '%-.128s'",
21603 				    mysql_sysvar_buffer_pool_size.name,
21604 				    value->val_str(value, buf, &len));
21605 	}
21606 
21607 	return(0);
21608 }
21609 
21610 /*************************************************************//**
21611 Check for a valid value of innobase_compression_algorithm.
21612 @return	0 for valid innodb_compression_algorithm. */
21613 static
21614 int
innodb_compression_algorithm_validate(THD * thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)21615 innodb_compression_algorithm_validate(
21616 /*==================================*/
21617 	THD*				thd,	/*!< in: thread handle */
21618 	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
21619 						variable */
21620 	void*				save,	/*!< out: immediate result
21621 						for update function */
21622 	struct st_mysql_value*		value)	/*!< in: incoming string */
21623 {
21624 	ulong		compression_algorithm;
21625 	DBUG_ENTER("innobase_compression_algorithm_validate");
21626 
21627 	if (check_sysvar_enum(thd, var, save, value)) {
21628 		DBUG_RETURN(1);
21629 	}
21630 
21631 	compression_algorithm = *reinterpret_cast<ulong*>(save);
21632 	(void)compression_algorithm;
21633 
21634 #ifndef HAVE_LZ4
21635 	if (compression_algorithm == PAGE_LZ4_ALGORITHM) {
21636 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21637 				    HA_ERR_UNSUPPORTED,
21638 				    "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21639 				    "InnoDB: liblz4 is not installed. \n",
21640 				    compression_algorithm);
21641 		DBUG_RETURN(1);
21642 	}
21643 #endif
21644 
21645 #ifndef HAVE_LZO
21646 	if (compression_algorithm == PAGE_LZO_ALGORITHM) {
21647 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21648 				    HA_ERR_UNSUPPORTED,
21649 				    "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21650 				    "InnoDB: liblzo is not installed. \n",
21651 				    compression_algorithm);
21652 		DBUG_RETURN(1);
21653 	}
21654 #endif
21655 
21656 #ifndef HAVE_LZMA
21657 	if (compression_algorithm == PAGE_LZMA_ALGORITHM) {
21658 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21659 				    HA_ERR_UNSUPPORTED,
21660 				    "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21661 				    "InnoDB: liblzma is not installed. \n",
21662 				    compression_algorithm);
21663 		DBUG_RETURN(1);
21664 	}
21665 #endif
21666 
21667 #ifndef HAVE_BZIP2
21668 	if (compression_algorithm == PAGE_BZIP2_ALGORITHM) {
21669 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21670 				    HA_ERR_UNSUPPORTED,
21671 				    "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21672 				    "InnoDB: libbz2 is not installed. \n",
21673 				    compression_algorithm);
21674 		DBUG_RETURN(1);
21675 	}
21676 #endif
21677 
21678 #ifndef HAVE_SNAPPY
21679 	if (compression_algorithm == PAGE_SNAPPY_ALGORITHM) {
21680 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21681 				    HA_ERR_UNSUPPORTED,
21682 				    "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21683 				    "InnoDB: libsnappy is not installed. \n",
21684 				    compression_algorithm);
21685 		DBUG_RETURN(1);
21686 	}
21687 #endif
21688 	DBUG_RETURN(0);
21689 }
21690 
21691 static
21692 int
innodb_encrypt_tables_validate(THD * thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)21693 innodb_encrypt_tables_validate(
21694 /*=================================*/
21695 	THD*				thd,	/*!< in: thread handle */
21696 	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
21697 						variable */
21698 	void*				save,	/*!< out: immediate result
21699 						for update function */
21700 	struct st_mysql_value*		value)	/*!< in: incoming string */
21701 {
21702 	if (check_sysvar_enum(thd, var, save, value)) {
21703 		return 1;
21704 	}
21705 
21706 	ulong encrypt_tables = *(ulong*)save;
21707 
21708 	if (encrypt_tables
21709 	    && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) {
21710 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21711 				    HA_ERR_UNSUPPORTED,
21712 				    "InnoDB: cannot enable encryption, "
21713 		                    "encryption plugin is not available");
21714 		return 1;
21715 	}
21716 
21717 	return 0;
21718 }
21719 
innodb_remember_check_sysvar_funcs()21720 static void innodb_remember_check_sysvar_funcs()
21721 {
21722 	/* remember build-in sysvar check functions */
21723 	ut_ad((MYSQL_SYSVAR_NAME(checksum_algorithm).flags & 0x1FF) == PLUGIN_VAR_ENUM);
21724 	check_sysvar_enum = MYSQL_SYSVAR_NAME(checksum_algorithm).check;
21725 
21726 	ut_ad((MYSQL_SYSVAR_NAME(flush_log_at_timeout).flags & 15) == PLUGIN_VAR_INT);
21727 	check_sysvar_int = MYSQL_SYSVAR_NAME(flush_log_at_timeout).check;
21728 }
21729 
21730 /********************************************************************//**
21731 Helper function to push warnings from InnoDB internals to SQL-layer. */
21732 UNIV_INTERN
21733 void
ib_push_warning(trx_t * trx,dberr_t error,const char * format,...)21734 ib_push_warning(
21735 	trx_t*		trx,	/*!< in: trx */
21736 	dberr_t		error,	/*!< in: error code to push as warning */
21737 	const char	*format,/*!< in: warning message */
21738 	...)
21739 {
21740 	if (trx && trx->mysql_thd) {
21741 		THD *thd = (THD *)trx->mysql_thd;
21742 		va_list args;
21743 		char *buf;
21744 #define MAX_BUF_SIZE 4*1024
21745 
21746 		va_start(args, format);
21747 		buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME));
21748 		buf[MAX_BUF_SIZE - 1] = 0;
21749 		vsnprintf(buf, MAX_BUF_SIZE - 1, format, args);
21750 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21751 				    uint(convert_error_code_to_mysql(error, 0,
21752 								     thd)),
21753 				    buf);
21754 		my_free(buf);
21755 		va_end(args);
21756 	}
21757 }
21758 
21759 /********************************************************************//**
21760 Helper function to push warnings from InnoDB internals to SQL-layer. */
21761 UNIV_INTERN
21762 void
ib_push_warning(void * ithd,dberr_t error,const char * format,...)21763 ib_push_warning(
21764 	void*		ithd,	/*!< in: thd */
21765 	dberr_t		error,	/*!< in: error code to push as warning */
21766 	const char	*format,/*!< in: warning message */
21767 	...)
21768 {
21769 	va_list args;
21770 	THD *thd = (THD *)ithd;
21771 	char *buf;
21772 #define MAX_BUF_SIZE 4*1024
21773 
21774 	if (ithd == NULL) {
21775 		thd = current_thd;
21776 	}
21777 
21778 	if (thd) {
21779 		va_start(args, format);
21780 		buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME));
21781 		buf[MAX_BUF_SIZE - 1] = 0;
21782 		vsnprintf(buf, MAX_BUF_SIZE - 1, format, args);
21783 
21784 		push_warning_printf(
21785 			thd, Sql_condition::WARN_LEVEL_WARN,
21786 			uint(convert_error_code_to_mysql(error, 0, thd)), buf);
21787 		my_free(buf);
21788 		va_end(args);
21789 	}
21790 }
21791 
21792 /********************************************************************//**
21793 Helper function to push frm mismatch error to error log and
21794 if needed to sql-layer. */
21795 UNIV_INTERN
21796 void
ib_push_frm_error(THD * thd,dict_table_t * ib_table,TABLE * table,ulint n_keys,bool push_warning)21797 ib_push_frm_error(
21798 /*==============*/
21799 	THD*		thd,		/*!< in: MySQL thd */
21800 	dict_table_t*	ib_table,	/*!< in: InnoDB table */
21801 	TABLE*		table,		/*!< in: MySQL table */
21802 	ulint		n_keys,		/*!< in: InnoDB #keys */
21803 	bool		push_warning)	/*!< in: print warning ? */
21804 {
21805 	switch (ib_table->dict_frm_mismatch) {
21806 	case DICT_FRM_NO_PK:
21807 		sql_print_error("Table %s has a primary key in "
21808 			"InnoDB data dictionary, but not "
21809 			"in MariaDB!"
21810 			" Have you mixed up "
21811 			".frm files from different "
21812 			"installations? See "
21813 			"https://mariadb.com/kb/en/innodb-troubleshooting/\n",
21814 			ib_table->name.m_name);
21815 
21816 		if (push_warning) {
21817 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21818 				ER_NO_SUCH_INDEX,
21819 				"InnoDB: Table %s has a "
21820 				"primary key in InnoDB data "
21821 				"dictionary, but not in "
21822 				"MariaDB!", ib_table->name.m_name);
21823 		}
21824 		break;
21825 	case DICT_NO_PK_FRM_HAS:
21826 		sql_print_error(
21827 				"Table %s has no primary key in InnoDB data "
21828 				"dictionary, but has one in MariaDB! If you "
21829 				"created the table with a MariaDB version < "
21830 				"3.23.54 and did not define a primary key, "
21831 				"but defined a unique key with all non-NULL "
21832 				"columns, then MariaDB internally treats that "
21833 				"key as the primary key. You can fix this "
21834 				"error by dump + DROP + CREATE + reimport "
21835 				"of the table.", ib_table->name.m_name);
21836 
21837 		if (push_warning) {
21838 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21839 				ER_NO_SUCH_INDEX,
21840 				"InnoDB: Table %s has no "
21841 				"primary key in InnoDB data "
21842 				"dictionary, but has one in "
21843 				"MariaDB!",
21844 				ib_table->name.m_name);
21845 		}
21846 		break;
21847 
21848 	case DICT_FRM_INCONSISTENT_KEYS:
21849 		sql_print_error("InnoDB: Table %s contains " ULINTPF " "
21850 			"indexes inside InnoDB, which "
21851 			"is different from the number of "
21852 			"indexes %u defined in the MariaDB "
21853 			" Have you mixed up "
21854 			".frm files from different "
21855 			"installations? See "
21856 			"https://mariadb.com/kb/en/innodb-troubleshooting/\n",
21857 			ib_table->name.m_name, n_keys,
21858 			table->s->keys);
21859 
21860 		if (push_warning) {
21861 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21862 				ER_NO_SUCH_INDEX,
21863 				"InnoDB: Table %s contains " ULINTPF " "
21864 				"indexes inside InnoDB, which "
21865 				"is different from the number of "
21866 				"indexes %u defined in the MariaDB ",
21867                                 ib_table->name.m_name, n_keys,
21868 				table->s->keys);
21869 		}
21870 		break;
21871 
21872 	case DICT_FRM_CONSISTENT:
21873 	default:
21874 		sql_print_error("InnoDB: Table %s is consistent "
21875 			"on InnoDB data dictionary and MariaDB "
21876 			" FRM file.",
21877 			ib_table->name.m_name);
21878 		ut_error;
21879 		break;
21880 	}
21881 }
21882 
21883 /** Writes 8 bytes to nth tuple field
21884 @param[in]	tuple	where to write
21885 @param[in]	nth	index in tuple
21886 @param[in]	data	what to write
21887 @param[in]	buf	field data buffer */
set_tuple_col_8(dtuple_t * tuple,int col,uint64_t data,byte * buf)21888 static void set_tuple_col_8(dtuple_t *tuple, int col, uint64_t data, byte *buf)
21889 {
21890   dfield_t *dfield= dtuple_get_nth_field(tuple, col);
21891   ut_ad(dfield->type.len == 8);
21892   if (dfield->len == UNIV_SQL_NULL)
21893   {
21894     dfield_set_data(dfield, buf, 8);
21895   }
21896   ut_ad(dfield->len == dfield->type.len && dfield->data);
21897   mach_write_to_8(dfield->data, data);
21898 }
21899 
vers_update_end(row_prebuilt_t * prebuilt,bool history_row)21900 void ins_node_t::vers_update_end(row_prebuilt_t *prebuilt, bool history_row)
21901 {
21902   ut_ad(prebuilt->ins_node == this);
21903   trx_t *trx= prebuilt->trx;
21904 #ifndef DBUG_OFF
21905   ut_ad(table->vers_start != table->vers_end);
21906   const mysql_row_templ_t *t= prebuilt->get_template_by_col(table->vers_end);
21907   ut_ad(t);
21908   ut_ad(t->mysql_col_len == 8);
21909 #endif
21910 
21911   if (history_row)
21912   {
21913     set_tuple_col_8(row, table->vers_end, trx->id, vers_end_buf);
21914   }
21915   else /* ROW_INS_VERSIONED */
21916   {
21917     set_tuple_col_8(row, table->vers_end, TRX_ID_MAX, vers_end_buf);
21918 #ifndef DBUG_OFF
21919     t= prebuilt->get_template_by_col(table->vers_start);
21920     ut_ad(t);
21921     ut_ad(t->mysql_col_len == 8);
21922 #endif
21923     set_tuple_col_8(row, table->vers_start, trx->id, vers_start_buf);
21924   }
21925   dict_index_t *clust_index= dict_table_get_first_index(table);
21926   THD *thd= trx->mysql_thd;
21927   TABLE *mysql_table= prebuilt->m_mysql_table;
21928   mem_heap_t *local_heap= NULL;
21929   for (ulint col_no= 0; col_no < dict_table_get_n_v_cols(table); col_no++)
21930   {
21931     const dict_v_col_t *v_col= dict_table_get_nth_v_col(table, col_no);
21932     for (ulint i= 0; i < unsigned(v_col->num_base); i++)
21933       if (v_col->base_col[i]->ind == table->vers_end)
21934         innobase_get_computed_value(row, v_col, clust_index, &local_heap,
21935                                     table->heap, NULL, thd, mysql_table,
21936                                     mysql_table->record[0], NULL, NULL);
21937   }
21938   if (UNIV_LIKELY_NULL(local_heap))
21939     mem_heap_free(local_heap);
21940 }
21941 
21942 /** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
21943 if needed.
21944 @param[in]	size	size in bytes
21945 @return	aligned size */
21946 ulint
buf_pool_size_align(ulint size)21947 buf_pool_size_align(
21948 	ulint	size)
21949 {
21950   const ib_uint64_t	m = ((ib_uint64_t)srv_buf_pool_instances) * srv_buf_pool_chunk_unit;
21951   size = ut_max((size_t) size, (size_t) MYSQL_SYSVAR_NAME(buffer_pool_size).min_val);
21952 
21953   if (size % m == 0) {
21954     return(size);
21955   } else {
21956     return (ulint)((size / m + 1) * m);
21957   }
21958 }
21959