1 /*****************************************************************************
2 
3 Copyright (c) 2000, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2008, 2009 Google Inc.
5 Copyright (c) 2009, Percona Inc.
6 Copyright (c) 2012, Facebook Inc.
7 Copyright (c) 2013, 2022, MariaDB Corporation.
8 
9 Portions of this file contain modifications contributed and copyrighted by
10 Google, Inc. Those modifications are gratefully acknowledged and are described
11 briefly in the InnoDB documentation. The contributions by Google are
12 incorporated with their permission, and subject to the conditions contained in
13 the file COPYING.Google.
14 
15 Portions of this file contain modifications contributed and copyrighted
16 by Percona Inc.. Those modifications are
17 gratefully acknowledged and are described briefly in the InnoDB
18 documentation. The contributions by Percona Inc. are incorporated with
19 their permission, and subject to the conditions contained in the file
20 COPYING.Percona.
21 
22 This program is free software; you can redistribute it and/or modify it under
23 the terms of the GNU General Public License as published by the Free Software
24 Foundation; version 2 of the License.
25 
26 This program is distributed in the hope that it will be useful, but WITHOUT
27 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
28 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
29 
30 You should have received a copy of the GNU General Public License along with
31 this program; if not, write to the Free Software Foundation, Inc.,
32 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
33 
34 *****************************************************************************/
35 
36 /** @file ha_innodb.cc */
37 
38 #include "univ.i"
39 
40 /* Include necessary SQL headers */
41 #include "ha_prototypes.h"
42 #include <debug_sync.h>
43 #include <gstream.h>
44 #include <log.h>
45 #include <mysys_err.h>
46 #include <innodb_priv.h>
47 #include <strfunc.h>
48 #include <sql_acl.h>
49 #include <sql_class.h>
50 #include <sql_show.h>
51 #include <sql_table.h>
52 #include <table_cache.h>
53 #include <my_check_opt.h>
54 #include <my_bitmap.h>
55 #include <mysql/service_thd_alloc.h>
56 #include <mysql/service_thd_wait.h>
57 #include "field.h"
58 #include "scope.h"
59 
60 // MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system;
61 // MYSQL_PLUGIN_IMPORT extern char mysql_unpacked_real_data_home[];
62 
63 #include <my_service_manager.h>
64 #include <key.h>
65 
66 /* Include necessary InnoDB headers */
67 #include "btr0btr.h"
68 #include "btr0cur.h"
69 #include "btr0bulk.h"
70 #include "btr0sea.h"
71 #include "buf0dblwr.h"
72 #include "buf0dump.h"
73 #include "buf0flu.h"
74 #include "buf0lru.h"
75 #include "dict0boot.h"
76 #include "dict0load.h"
77 #include "btr0defragment.h"
78 #include "dict0crea.h"
79 #include "dict0dict.h"
80 #include "dict0stats.h"
81 #include "dict0stats_bg.h"
82 #include "fil0fil.h"
83 #include "fsp0fsp.h"
84 #include "fts0fts.h"
85 #include "fts0plugin.h"
86 #include "fts0priv.h"
87 #include "fts0types.h"
88 #include "ibuf0ibuf.h"
89 #include "lock0lock.h"
90 #include "log0crypt.h"
91 #include "mtr0mtr.h"
92 #include "os0file.h"
93 #include "page0zip.h"
94 #include "pars0pars.h"
95 #include "rem0types.h"
96 #include "row0import.h"
97 #include "row0ins.h"
98 #include "row0merge.h"
99 #include "row0mysql.h"
100 #include "row0quiesce.h"
101 #include "row0sel.h"
102 #include "row0upd.h"
103 #include "fil0crypt.h"
104 #include "srv0mon.h"
105 #include "srv0srv.h"
106 #include "srv0start.h"
107 #include "rem0rec.h"
108 #ifdef UNIV_DEBUG
109 #include "trx0purge.h"
110 #endif /* UNIV_DEBUG */
111 #include "trx0roll.h"
112 #include "trx0rseg.h"
113 #include "trx0trx.h"
114 #include "fil0pagecompress.h"
115 #include "ut0mem.h"
116 #include "row0ext.h"
117 
118 #define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X))
119 
120 extern "C" void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
121 unsigned long long thd_get_query_id(const MYSQL_THD thd);
122 void thd_clear_error(MYSQL_THD thd);
123 
124 TABLE *find_fk_open_table(THD *thd, const char *db, size_t db_len,
125 			  const char *table, size_t table_len);
126 MYSQL_THD create_thd();
127 void destroy_thd(MYSQL_THD thd);
128 void reset_thd(MYSQL_THD thd);
129 TABLE *open_purge_table(THD *thd, const char *db, size_t dblen,
130 			const char *tb, size_t tblen);
131 void close_thread_tables(THD* thd);
132 
133 #ifdef MYSQL_DYNAMIC_PLUGIN
134 #define tc_size  400
135 #define tdc_size 400
136 #endif
137 
138 #include <mysql/plugin.h>
139 #include <mysql/service_wsrep.h>
140 
141 #include "ha_innodb.h"
142 #include "i_s.h"
143 #include "sync0sync.h"
144 
145 #include <string>
146 #include <sstream>
147 
148 #ifdef WITH_WSREP
149 #include "dict0priv.h"
150 #include <mysql/service_md5.h>
151 #include "wsrep_sst.h"
152 #endif /* WITH_WSREP */
153 
154 /** to force correct commit order in binlog */
155 static ulong commit_threads = 0;
156 static mysql_cond_t commit_cond;
157 static mysql_mutex_t commit_cond_m;
158 static mysql_mutex_t pending_checkpoint_mutex;
159 
160 #define INSIDE_HA_INNOBASE_CC
161 
162 #define EQ_CURRENT_THD(thd) ((thd) == current_thd)
163 
164 struct handlerton* innodb_hton_ptr;
165 
166 static const long AUTOINC_OLD_STYLE_LOCKING = 0;
167 static const long AUTOINC_NEW_STYLE_LOCKING = 1;
168 static const long AUTOINC_NO_LOCKING = 2;
169 
170 static ulong innobase_open_files;
171 static long innobase_autoinc_lock_mode;
172 static ulong innobase_commit_concurrency;
173 
174 static ulonglong innobase_buffer_pool_size;
175 
176 /** Percentage of the buffer pool to reserve for 'old' blocks.
177 Connected to buf_LRU_old_ratio. */
178 static uint innobase_old_blocks_pct;
179 
180 static char*	innobase_data_file_path;
181 static char*	innobase_temp_data_file_path;
182 
183 /* The default values for the following char* start-up parameters
184 are determined in innodb_init_params(). */
185 
186 static char*	innobase_data_home_dir;
187 static char*	innobase_enable_monitor_counter;
188 static char*	innobase_disable_monitor_counter;
189 static char*	innobase_reset_monitor_counter;
190 static char*	innobase_reset_all_monitor_counter;
191 
192 static ulong	innodb_flush_method;
193 
194 /** Deprecated; no effect other than issuing a deprecation warning. */
195 static char* innodb_file_format;
196 /** Deprecated; no effect other than issuing a deprecation warning. */
197 static char* innodb_large_prefix;
198 
199 /* This variable can be set in the server configure file, specifying
200 stopword table to be used */
201 static char*	innobase_server_stopword_table;
202 
203 static my_bool	innobase_use_checksums;
204 static my_bool	innobase_locks_unsafe_for_binlog;
205 static my_bool	innobase_rollback_on_timeout;
206 static my_bool	innobase_create_status_file;
207 my_bool	innobase_stats_on_metadata;
208 static my_bool	innodb_optimize_fulltext_only;
209 
210 static char*	innodb_version_str = (char*) INNODB_VERSION_STR;
211 
212 extern uint srv_fil_crypt_rotate_key_age;
213 extern uint srv_n_fil_crypt_iops;
214 
215 extern my_bool srv_immediate_scrub_data_uncompressed;
216 extern my_bool srv_background_scrub_data_uncompressed;
217 extern my_bool srv_background_scrub_data_compressed;
218 extern uint srv_background_scrub_data_interval;
219 extern uint srv_background_scrub_data_check_interval;
220 #ifdef UNIV_DEBUG
221 my_bool innodb_evict_tables_on_commit_debug;
222 extern my_bool srv_scrub_force_testing;
223 #endif
224 
225 /** File format constraint for ALTER TABLE */
226 ulong innodb_instant_alter_column_allowed;
227 
228 /** Note we cannot use rec_format_enum because we do not allow
229 COMPRESSED row format for innodb_default_row_format option. */
230 enum default_row_format_enum {
231 	DEFAULT_ROW_FORMAT_REDUNDANT = 0,
232 	DEFAULT_ROW_FORMAT_COMPACT = 1,
233 	DEFAULT_ROW_FORMAT_DYNAMIC = 2,
234 };
235 
236 /** A dummy variable */
237 static uint innodb_max_purge_lag_wait;
238 
239 /** Wait for trx_sys_t::rseg_history_len to be below a limit. */
innodb_max_purge_lag_wait_update(THD * thd,st_mysql_sys_var *,void *,const void * limit)240 static void innodb_max_purge_lag_wait_update(THD *thd, st_mysql_sys_var *,
241                                              void *, const void *limit)
242 {
243   const uint l= *static_cast<const uint*>(limit);
244   if (trx_sys.rseg_history_len <= l)
245     return;
246   mysql_mutex_unlock(&LOCK_global_system_variables);
247   while (trx_sys.rseg_history_len > l)
248   {
249     if (thd_kill_level(thd))
250       break;
251     srv_wake_purge_thread_if_not_active();
252     os_thread_sleep(100000);
253   }
254   mysql_mutex_lock(&LOCK_global_system_variables);
255 }
256 
257 static
set_my_errno(int err)258 void set_my_errno(int err)
259 {
260 	errno = err;
261 }
262 
263 /** Checks whether the file name belongs to a partition of a table.
264 @param[in]	file_name	file name
265 @return pointer to the end of the table name part of the file name, or NULL */
266 static
267 char*
is_partition(char * file_name)268 is_partition(
269 /*=========*/
270 	char*		file_name)
271 {
272 	/* We look for pattern #P# to see if the table is partitioned
273 	MariaDB table. */
274 	return strstr(file_name, table_name_t::part_suffix);
275 }
276 
277 /** Signal to shut down InnoDB (NULL if shutdown was signaled, or if
278 running in innodb_read_only mode, srv_read_only_mode) */
279 std::atomic <st_my_thread_var *> srv_running;
280 /** Service thread that waits for the server shutdown and stops purge threads.
281 Purge workers have THDs that are needed to calculate virtual columns.
282 This THDs must be destroyed rather early in the server shutdown sequence.
283 This service thread creates a THD and idly waits for it to get a signal to
284 die. Then it notifies all purge workers to shutdown.
285 */
286 static pthread_t thd_destructor_thread;
287 
288 pthread_handler_t
thd_destructor_proxy(void *)289 thd_destructor_proxy(void *)
290 {
291 	mysql_mutex_t thd_destructor_mutex;
292 	mysql_cond_t thd_destructor_cond;
293 
294 	my_thread_init();
295 	mysql_mutex_init(PSI_NOT_INSTRUMENTED, &thd_destructor_mutex, 0);
296 	mysql_cond_init(PSI_NOT_INSTRUMENTED, &thd_destructor_cond, 0);
297 
298 	st_my_thread_var *myvar= _my_thread_var();
299 	myvar->current_mutex = &thd_destructor_mutex;
300 	myvar->current_cond = &thd_destructor_cond;
301 
302 	THD *thd= create_thd();
303 	thd_proc_info(thd, "InnoDB shutdown handler");
304 
305 
306 	mysql_mutex_lock(&thd_destructor_mutex);
307 	srv_running.store(myvar, std::memory_order_relaxed);
308 	/* wait until the server wakes the THD to abort and die */
309 	while (!myvar->abort)
310 		mysql_cond_wait(&thd_destructor_cond, &thd_destructor_mutex);
311 	mysql_mutex_unlock(&thd_destructor_mutex);
312 	srv_running.store(NULL, std::memory_order_relaxed);
313 
314 	while (srv_fast_shutdown == 0 &&
315 	       (trx_sys.any_active_transactions() ||
316 		THD_count::value() > srv_n_purge_threads + 1)) {
317 		thd_proc_info(thd, "InnoDB slow shutdown wait");
318 		os_thread_sleep(1000);
319 	}
320 
321 	/* Some background threads might generate undo pages that will
322 	need to be purged, so they have to be shut down before purge
323 	threads if slow shutdown is requested.  */
324 	srv_shutdown_bg_undo_sources();
325 	srv_purge_shutdown();
326 
327 	destroy_thd(thd);
328 	mysql_cond_destroy(&thd_destructor_cond);
329 	mysql_mutex_destroy(&thd_destructor_mutex);
330 	my_thread_end();
331 	return 0;
332 }
333 
334 /** Return the InnoDB ROW_FORMAT enum value
335 @param[in]	row_format	row_format from "innodb_default_row_format"
336 @return InnoDB ROW_FORMAT value from rec_format_t enum. */
337 static
338 rec_format_t
get_row_format(ulong row_format)339 get_row_format(
340 	ulong row_format)
341 {
342 	switch(row_format) {
343 	case DEFAULT_ROW_FORMAT_REDUNDANT:
344 		return(REC_FORMAT_REDUNDANT);
345 	case DEFAULT_ROW_FORMAT_COMPACT:
346 		return(REC_FORMAT_COMPACT);
347 	case DEFAULT_ROW_FORMAT_DYNAMIC:
348 		return(REC_FORMAT_DYNAMIC);
349 	default:
350 		ut_ad(0);
351 		return(REC_FORMAT_DYNAMIC);
352 	}
353 }
354 
355 static ulong	innodb_default_row_format = DEFAULT_ROW_FORMAT_DYNAMIC;
356 
357 /** Possible values for system variable "innodb_stats_method". The values
358 are defined the same as its corresponding MyISAM system variable
359 "myisam_stats_method"(see "myisam_stats_method_names"), for better usability */
360 static const char* innodb_stats_method_names[] = {
361 	"nulls_equal",
362 	"nulls_unequal",
363 	"nulls_ignored",
364 	NullS
365 };
366 
367 /** Used to define an enumerate type of the system variable innodb_stats_method.
368 This is the same as "myisam_stats_method_typelib" */
369 static TYPELIB innodb_stats_method_typelib = {
370 	array_elements(innodb_stats_method_names) - 1,
371 	"innodb_stats_method_typelib",
372 	innodb_stats_method_names,
373 	NULL
374 };
375 
376 /** Possible values of the parameter innodb_checksum_algorithm */
377 const char* innodb_checksum_algorithm_names[] = {
378 	"crc32",
379 	"strict_crc32",
380 	"innodb",
381 	"strict_innodb",
382 	"none",
383 	"strict_none",
384 	"full_crc32",
385 	"strict_full_crc32",
386 	NullS
387 };
388 
389 /** Used to define an enumerate type of the system variable
390 innodb_checksum_algorithm. */
391 TYPELIB innodb_checksum_algorithm_typelib = {
392 	array_elements(innodb_checksum_algorithm_names) - 1,
393 	"innodb_checksum_algorithm_typelib",
394 	innodb_checksum_algorithm_names,
395 	NULL
396 };
397 
398 /** Possible values for system variable "innodb_default_row_format". */
399 static const char* innodb_default_row_format_names[] = {
400 	"redundant",
401 	"compact",
402 	"dynamic",
403 	NullS
404 };
405 
406 /** Used to define an enumerate type of the system variable
407 innodb_default_row_format. */
408 static TYPELIB innodb_default_row_format_typelib = {
409 	array_elements(innodb_default_row_format_names) - 1,
410 	"innodb_default_row_format_typelib",
411 	innodb_default_row_format_names,
412 	NULL
413 };
414 
415 /** Possible values of the parameter innodb_lock_schedule_algorithm */
416 static const char* innodb_lock_schedule_algorithm_names[] = {
417 	"fcfs",
418 	"vats",
419 	NullS
420 };
421 
422 /** Used to define an enumerate type of the system variable
423 innodb_lock_schedule_algorithm. */
424 static TYPELIB innodb_lock_schedule_algorithm_typelib = {
425 	array_elements(innodb_lock_schedule_algorithm_names) - 1,
426 	"innodb_lock_schedule_algorithm_typelib",
427 	innodb_lock_schedule_algorithm_names,
428 	NULL
429 };
430 
431 /** Names of allowed values of innodb_flush_method */
432 const char* innodb_flush_method_names[] = {
433 	"fsync",
434 	"O_DSYNC",
435 	"littlesync",
436 	"nosync",
437 	"O_DIRECT",
438 	"O_DIRECT_NO_FSYNC",
439 #ifdef _WIN32
440 	"unbuffered",
441 	"async_unbuffered" /* alias for "unbuffered" */,
442 	"normal" /* alias for "fsync" */,
443 #endif
444 	NullS
445 };
446 
447 /** Enumeration of innodb_flush_method */
448 TYPELIB innodb_flush_method_typelib = {
449 	array_elements(innodb_flush_method_names) - 1,
450 	"innodb_flush_method_typelib",
451 	innodb_flush_method_names,
452 	NULL
453 };
454 
455 /* The following counter is used to convey information to InnoDB
456 about server activity: in case of normal DML ops it is not
457 sensible to call srv_active_wake_master_thread after each
458 operation, we only do it every INNOBASE_WAKE_INTERVAL'th step. */
459 
460 #define INNOBASE_WAKE_INTERVAL	32
461 static ulong	innobase_active_counter	= 0;
462 
463 /** Allowed values of innodb_change_buffering */
464 static const char* innodb_change_buffering_names[] = {
465 	"none",		/* IBUF_USE_NONE */
466 	"inserts",	/* IBUF_USE_INSERT */
467 	"deletes",	/* IBUF_USE_DELETE_MARK */
468 	"changes",	/* IBUF_USE_INSERT_DELETE_MARK */
469 	"purges",	/* IBUF_USE_DELETE */
470 	"all",		/* IBUF_USE_ALL */
471 	NullS
472 };
473 
474 /** Enumeration of innodb_change_buffering */
475 static TYPELIB innodb_change_buffering_typelib = {
476 	array_elements(innodb_change_buffering_names) - 1,
477 	"innodb_change_buffering_typelib",
478 	innodb_change_buffering_names,
479 	NULL
480 };
481 
482 /** Allowed values of innodb_instant_alter_column_allowed */
483 const char* innodb_instant_alter_column_allowed_names[] = {
484 	"never", /* compatible with MariaDB 5.5 to 10.2 */
485 	"add_last",/* allow instant ADD COLUMN ... LAST */
486 	"add_drop_reorder", /* allow instant ADD anywhere & DROP & reorder */
487 	NullS
488 };
489 
490 /** Enumeration of innodb_instant_alter_column_allowed */
491 static TYPELIB innodb_instant_alter_column_allowed_typelib = {
492 	array_elements(innodb_instant_alter_column_allowed_names) - 1,
493 	"innodb_instant_alter_column_allowed_typelib",
494 	innodb_instant_alter_column_allowed_names,
495 	NULL
496 };
497 
498 /** Retrieve the FTS Relevance Ranking result for doc with doc_id
499 of m_prebuilt->fts_doc_id
500 @param[in,out]	fts_hdl	FTS handler
501 @return the relevance ranking value */
502 static
503 float
504 innobase_fts_retrieve_ranking(
505 	FT_INFO*	fts_hdl);
506 /** Free the memory for the FTS handler
507 @param[in,out]	fts_hdl	FTS handler */
508 static
509 void
510 innobase_fts_close_ranking(
511 	FT_INFO*	fts_hdl);
512 /** Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
513 of m_prebuilt->fts_doc_id
514 @param[in,out]	fts_hdl	FTS handler
515 @return the relevance ranking value */
516 static
517 float
518 innobase_fts_find_ranking(
519 	FT_INFO*	fts_hdl,
520 	uchar*,
521 	uint);
522 
523 /* Call back function array defined by MySQL and used to
524 retrieve FTS results. */
525 const struct _ft_vft ft_vft_result = {NULL,
526 				      innobase_fts_find_ranking,
527 				      innobase_fts_close_ranking,
528 				      innobase_fts_retrieve_ranking,
529 				      NULL};
530 
531 /** @return version of the extended FTS API */
532 static
533 uint
innobase_fts_get_version()534 innobase_fts_get_version()
535 {
536 	/* Currently this doesn't make much sense as returning
537 	HA_CAN_FULLTEXT_EXT automatically mean this version is supported.
538 	This supposed to ease future extensions.  */
539 	return(2);
540 }
541 
542 /** @return Which part of the extended FTS API is supported */
543 static
544 ulonglong
innobase_fts_flags()545 innobase_fts_flags()
546 {
547 	return(FTS_ORDERED_RESULT | FTS_DOCID_IN_RESULT);
548 }
549 
550 /** Find and Retrieve the FTS doc_id for the current result row
551 @param[in,out]	fts_hdl	FTS handler
552 @return the document ID */
553 static
554 ulonglong
555 innobase_fts_retrieve_docid(
556 	FT_INFO_EXT*	fts_hdl);
557 
558 /** Find and retrieve the size of the current result
559 @param[in,out]	fts_hdl	FTS handler
560 @return number of matching rows */
561 static
562 ulonglong
innobase_fts_count_matches(FT_INFO_EXT * fts_hdl)563 innobase_fts_count_matches(
564 	FT_INFO_EXT*	fts_hdl)	/*!< in: FTS handler */
565 {
566 	NEW_FT_INFO*	handle = reinterpret_cast<NEW_FT_INFO*>(fts_hdl);
567 
568 	if (handle->ft_result->rankings_by_id != NULL) {
569 		return(rbt_size(handle->ft_result->rankings_by_id));
570 	} else {
571 		return(0);
572 	}
573 }
574 
575 const struct _ft_vft_ext ft_vft_ext_result = {innobase_fts_get_version,
576 					      innobase_fts_flags,
577 					      innobase_fts_retrieve_docid,
578 					      innobase_fts_count_matches};
579 
580 #ifdef HAVE_PSI_INTERFACE
581 # define PSI_KEY(n) {&n##_key, #n, 0}
582 /* All RWLOCK used in Innodb are SX-locks */
583 # define PSI_RWLOCK_KEY(n) {&n##_key, #n, PSI_RWLOCK_FLAG_SX}
584 
585 /* Keys to register pthread mutexes/cond in the current file with
586 performance schema */
587 static mysql_pfs_key_t	commit_cond_mutex_key;
588 static mysql_pfs_key_t	commit_cond_key;
589 static mysql_pfs_key_t	pending_checkpoint_mutex_key;
590 static mysql_pfs_key_t  thd_destructor_thread_key;
591 
592 static PSI_mutex_info	all_pthread_mutexes[] = {
593 	PSI_KEY(commit_cond_mutex),
594 	PSI_KEY(pending_checkpoint_mutex),
595 };
596 
597 static PSI_cond_info	all_innodb_conds[] = {
598 	PSI_KEY(commit_cond)
599 };
600 
601 # ifdef UNIV_PFS_MUTEX
602 /* all_innodb_mutexes array contains mutexes that are
603 performance schema instrumented if "UNIV_PFS_MUTEX"
604 is defined */
605 static PSI_mutex_info all_innodb_mutexes[] = {
606 #  ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
607 	PSI_KEY(buffer_block_mutex),
608 #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
609 	PSI_KEY(buf_pool_mutex),
610 	PSI_KEY(buf_pool_zip_mutex),
611 	PSI_KEY(dict_foreign_err_mutex),
612 	PSI_KEY(dict_sys_mutex),
613 	PSI_KEY(recalc_pool_mutex),
614 	PSI_KEY(fil_system_mutex),
615 	PSI_KEY(flush_list_mutex),
616 	PSI_KEY(fts_delete_mutex),
617 	PSI_KEY(fts_doc_id_mutex),
618 	PSI_KEY(log_flush_order_mutex),
619 	PSI_KEY(hash_table_mutex),
620 	PSI_KEY(ibuf_bitmap_mutex),
621 	PSI_KEY(ibuf_mutex),
622 	PSI_KEY(ibuf_pessimistic_insert_mutex),
623 	PSI_KEY(index_online_log),
624 	PSI_KEY(log_sys_mutex),
625 	PSI_KEY(log_sys_write_mutex),
626 	PSI_KEY(mutex_list_mutex),
627 	PSI_KEY(page_zip_stat_per_index_mutex),
628 	PSI_KEY(purge_sys_pq_mutex),
629 	PSI_KEY(recv_sys_mutex),
630 	PSI_KEY(recv_writer_mutex),
631 	PSI_KEY(redo_rseg_mutex),
632 	PSI_KEY(noredo_rseg_mutex),
633 #  ifdef UNIV_DEBUG
634 	PSI_KEY(rw_lock_debug_mutex),
635 #  endif /* UNIV_DEBUG */
636 	PSI_KEY(rw_lock_list_mutex),
637 	PSI_KEY(rw_lock_mutex),
638 	PSI_KEY(srv_innodb_monitor_mutex),
639 	PSI_KEY(srv_misc_tmpfile_mutex),
640 	PSI_KEY(srv_monitor_file_mutex),
641 	PSI_KEY(buf_dblwr_mutex),
642 	PSI_KEY(trx_pool_mutex),
643 	PSI_KEY(trx_pool_manager_mutex),
644 	PSI_KEY(srv_sys_mutex),
645 	PSI_KEY(lock_mutex),
646 	PSI_KEY(lock_wait_mutex),
647 	PSI_KEY(trx_mutex),
648 	PSI_KEY(srv_threads_mutex),
649 #  ifndef PFS_SKIP_EVENT_MUTEX
650 	PSI_KEY(event_mutex),
651 #  endif /* PFS_SKIP_EVENT_MUTEX */
652 	PSI_KEY(rtr_active_mutex),
653 	PSI_KEY(rtr_match_mutex),
654 	PSI_KEY(rtr_path_mutex),
655 	PSI_KEY(trx_sys_mutex),
656 };
657 # endif /* UNIV_PFS_MUTEX */
658 
659 # ifdef UNIV_PFS_RWLOCK
660 /* all_innodb_rwlocks array contains rwlocks that are
661 performance schema instrumented if "UNIV_PFS_RWLOCK"
662 is defined */
663 static PSI_rwlock_info all_innodb_rwlocks[] = {
664 	PSI_RWLOCK_KEY(btr_search_latch),
665 #  ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
666 	PSI_RWLOCK_KEY(buf_block_lock),
667 #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
668 #  ifdef UNIV_DEBUG
669 	PSI_RWLOCK_KEY(buf_block_debug_latch),
670 #  endif /* UNIV_DEBUG */
671 	PSI_RWLOCK_KEY(dict_operation_lock),
672 	PSI_RWLOCK_KEY(fil_space_latch),
673 	PSI_RWLOCK_KEY(checkpoint_lock),
674 	PSI_RWLOCK_KEY(fts_cache_rw_lock),
675 	PSI_RWLOCK_KEY(fts_cache_init_rw_lock),
676 	PSI_RWLOCK_KEY(trx_i_s_cache_lock),
677 	PSI_RWLOCK_KEY(trx_purge_latch),
678 	PSI_RWLOCK_KEY(index_tree_rw_lock),
679 	PSI_RWLOCK_KEY(hash_table_locks)
680 };
681 # endif /* UNIV_PFS_RWLOCK */
682 
683 # ifdef UNIV_PFS_THREAD
684 /* all_innodb_threads array contains threads that are
685 performance schema instrumented if "UNIV_PFS_THREAD"
686 is defined */
687 static PSI_thread_info	all_innodb_threads[] = {
688 	PSI_KEY(buf_dump_thread),
689 	PSI_KEY(dict_stats_thread),
690 	PSI_KEY(io_handler_thread),
691 	PSI_KEY(io_ibuf_thread),
692 	PSI_KEY(io_log_thread),
693 	PSI_KEY(io_read_thread),
694 	PSI_KEY(io_write_thread),
695 	PSI_KEY(page_cleaner_thread),
696 	PSI_KEY(recv_writer_thread),
697 	PSI_KEY(srv_error_monitor_thread),
698 	PSI_KEY(srv_lock_timeout_thread),
699 	PSI_KEY(srv_master_thread),
700 	PSI_KEY(srv_monitor_thread),
701 	PSI_KEY(srv_purge_thread),
702 	PSI_KEY(srv_worker_thread),
703 	PSI_KEY(trx_rollback_clean_thread),
704 	PSI_KEY(thd_destructor_thread),
705 };
706 # endif /* UNIV_PFS_THREAD */
707 
708 # ifdef UNIV_PFS_IO
709 /* all_innodb_files array contains the type of files that are
710 performance schema instrumented if "UNIV_PFS_IO" is defined */
711 static PSI_file_info	all_innodb_files[] = {
712 	PSI_KEY(innodb_data_file),
713 	PSI_KEY(innodb_log_file),
714 	PSI_KEY(innodb_temp_file)
715 };
716 # endif /* UNIV_PFS_IO */
717 #endif /* HAVE_PSI_INTERFACE */
718 
719 static void innodb_remember_check_sysvar_funcs();
720 mysql_var_check_func check_sysvar_enum;
721 mysql_var_check_func check_sysvar_int;
722 
723 // should page compression be used by default for new tables
724 static MYSQL_THDVAR_BOOL(compression_default, PLUGIN_VAR_OPCMDARG,
725   "Is compression the default for new tables",
726   NULL, NULL, FALSE);
727 
728 /** Update callback for SET [SESSION] innodb_default_encryption_key_id */
729 static void
innodb_default_encryption_key_id_update(THD * thd,st_mysql_sys_var * var,void * var_ptr,const void * save)730 innodb_default_encryption_key_id_update(THD* thd, st_mysql_sys_var* var,
731 					void* var_ptr, const void *save)
732 {
733 	uint key_id = *static_cast<const uint*>(save);
734 	if (key_id != FIL_DEFAULT_ENCRYPTION_KEY
735 	    && !encryption_key_id_exists(key_id)) {
736 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
737 				    ER_WRONG_ARGUMENTS,
738 				    "innodb_default_encryption_key=%u"
739 				    " is not available", key_id);
740 	}
741 	*static_cast<uint*>(var_ptr) = key_id;
742 }
743 
744 static MYSQL_THDVAR_UINT(default_encryption_key_id, PLUGIN_VAR_RQCMDARG,
745 			 "Default encryption key id used for table encryption.",
746 			 NULL, innodb_default_encryption_key_id_update,
747 			 FIL_DEFAULT_ENCRYPTION_KEY, 1, UINT_MAX32, 0);
748 
749 /**
750   Structure for CREATE TABLE options (table options).
751   It needs to be called ha_table_option_struct.
752 
753   The option values can be specified in the CREATE TABLE at the end:
754   CREATE TABLE ( ... ) *here*
755 */
756 
757 ha_create_table_option innodb_table_option_list[]=
758 {
759   /* With this option user can enable page compression feature for the
760   table */
761   HA_TOPTION_SYSVAR("PAGE_COMPRESSED", page_compressed, compression_default),
762   /* With this option user can set zip compression level for page
763   compression for this table*/
764   HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, 0, 1, 9, 1),
765   /* With this option the user can enable encryption for the table */
766   HA_TOPTION_ENUM("ENCRYPTED", encryption, "DEFAULT,YES,NO", 0),
767   /* With this option the user defines the key identifier using for the encryption */
768   HA_TOPTION_SYSVAR("ENCRYPTION_KEY_ID", encryption_key_id, default_encryption_key_id),
769 
770   HA_TOPTION_END
771 };
772 
773 /*************************************************************//**
774 Check whether valid argument given to innodb_ft_*_stopword_table.
775 This function is registered as a callback with MySQL.
776 @return 0 for valid stopword table */
777 static
778 int
779 innodb_stopword_table_validate(
780 /*===========================*/
781 	THD*				thd,	/*!< in: thread handle */
782 	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
783 						variable */
784 	void*				save,	/*!< out: immediate result
785 						for update function */
786 	struct st_mysql_value*		value);	/*!< in: incoming string */
787 
788 static bool is_mysql_datadir_path(const char *path);
789 
790 /** Validate passed-in "value" is a valid directory name.
791 This function is registered as a callback with MySQL.
792 @param[in,out]	thd	thread handle
793 @param[in]	var	pointer to system variable
794 @param[out]	save	immediate result for update
795 @param[in]	value	incoming string
796 @return 0 for valid name */
797 static
798 int
innodb_tmpdir_validate(THD * thd,struct st_mysql_sys_var *,void * save,struct st_mysql_value * value)799 innodb_tmpdir_validate(
800 	THD*				thd,
801 	struct st_mysql_sys_var*,
802 	void*				save,
803 	struct st_mysql_value*		value)
804 {
805 
806 	char*	alter_tmp_dir;
807 	char*	innodb_tmp_dir;
808 	char	buff[OS_FILE_MAX_PATH];
809 	int	len = sizeof(buff);
810 	char	tmp_abs_path[FN_REFLEN + 2];
811 
812 	ut_ad(save != NULL);
813 	ut_ad(value != NULL);
814 
815 	if (check_global_access(thd, FILE_ACL)) {
816 		push_warning_printf(
817 			thd, Sql_condition::WARN_LEVEL_WARN,
818 			ER_WRONG_ARGUMENTS,
819 			"InnoDB: FILE Permissions required");
820 		*static_cast<const char**>(save) = NULL;
821 		return(1);
822 	}
823 
824 	alter_tmp_dir = (char*) value->val_str(value, buff, &len);
825 
826 	if (!alter_tmp_dir) {
827 		*static_cast<const char**>(save) = alter_tmp_dir;
828 		return(0);
829 	}
830 
831 	if (strlen(alter_tmp_dir) > FN_REFLEN) {
832 		push_warning_printf(
833 			thd, Sql_condition::WARN_LEVEL_WARN,
834 			ER_WRONG_ARGUMENTS,
835 			"Path length should not exceed %d bytes", FN_REFLEN);
836 		*static_cast<const char**>(save) = NULL;
837 		return(1);
838 	}
839 
840 	os_normalize_path(alter_tmp_dir);
841 	my_realpath(tmp_abs_path, alter_tmp_dir, 0);
842 	size_t	tmp_abs_len = strlen(tmp_abs_path);
843 
844 	if (my_access(tmp_abs_path, F_OK)) {
845 
846 		push_warning_printf(
847 			thd, Sql_condition::WARN_LEVEL_WARN,
848 			ER_WRONG_ARGUMENTS,
849 			"InnoDB: Path doesn't exist.");
850 		*static_cast<const char**>(save) = NULL;
851 		return(1);
852 	} else if (my_access(tmp_abs_path, R_OK | W_OK)) {
853 		push_warning_printf(
854 			thd, Sql_condition::WARN_LEVEL_WARN,
855 			ER_WRONG_ARGUMENTS,
856 			"InnoDB: Server doesn't have permission in "
857 			"the given location.");
858 		*static_cast<const char**>(save) = NULL;
859 		return(1);
860 	}
861 
862 	MY_STAT stat_info_dir;
863 
864 	if (my_stat(tmp_abs_path, &stat_info_dir, MYF(0))) {
865 		if ((stat_info_dir.st_mode & S_IFDIR) != S_IFDIR) {
866 
867 			push_warning_printf(
868 				thd, Sql_condition::WARN_LEVEL_WARN,
869 				ER_WRONG_ARGUMENTS,
870 				"Given path is not a directory. ");
871 			*static_cast<const char**>(save) = NULL;
872 			return(1);
873 		}
874 	}
875 
876 	if (!is_mysql_datadir_path(tmp_abs_path)) {
877 
878 		push_warning_printf(
879 			thd, Sql_condition::WARN_LEVEL_WARN,
880 			ER_WRONG_ARGUMENTS,
881 			"InnoDB: Path Location should not be same as "
882 			"mysql data directory location.");
883 		*static_cast<const char**>(save) = NULL;
884 		return(1);
885 	}
886 
887 	innodb_tmp_dir = static_cast<char*>(
888 		thd_memdup(thd, tmp_abs_path, tmp_abs_len + 1));
889 	*static_cast<const char**>(save) = innodb_tmp_dir;
890 	return(0);
891 }
892 
893 /******************************************************************//**
894 Maps a MySQL trx isolation level code to the InnoDB isolation level code
895 @return	InnoDB isolation level */
896 static inline
897 ulint
898 innobase_map_isolation_level(
899 /*=========================*/
900 	enum_tx_isolation	iso);	/*!< in: MySQL isolation level code */
901 
902 /** Gets field offset for a field in a table.
903 @param[in]	table	MySQL table object
904 @param[in]	field	MySQL field object (from table->field array)
905 @return offset */
906 static inline
907 uint
get_field_offset(const TABLE * table,const Field * field)908 get_field_offset(
909 	const TABLE*	table,
910 	const Field*	field)
911 {
912 	return field->offset(table->record[0]);
913 }
914 
915 
916 /*************************************************************//**
917 Check for a valid value of innobase_compression_algorithm.
918 @return	0 for valid innodb_compression_algorithm. */
919 static
920 int
921 innodb_compression_algorithm_validate(
922 /*==================================*/
923 	THD*				thd,	/*!< in: thread handle */
924 	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
925 						variable */
926 	void*				save,	/*!< out: immediate result
927 						for update function */
928 	struct st_mysql_value*		value);	/*!< in: incoming string */
929 
930 static ibool innodb_have_lzo=IF_LZO(1, 0);
931 static ibool innodb_have_lz4=IF_LZ4(1, 0);
932 static ibool innodb_have_lzma=IF_LZMA(1, 0);
933 static ibool innodb_have_bzip2=IF_BZIP2(1, 0);
934 static ibool innodb_have_snappy=IF_SNAPPY(1, 0);
935 static ibool innodb_have_punch_hole=IF_PUNCH_HOLE(1, 0);
936 
937 static
938 int
939 innodb_encrypt_tables_validate(
940 /*==================================*/
941 	THD*				thd,	/*!< in: thread handle */
942 	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
943 						variable */
944 	void*				save,	/*!< out: immediate result
945 						for update function */
946 	struct st_mysql_value*		value);	/*!< in: incoming string */
947 
948 static const char innobase_hton_name[]= "InnoDB";
949 
950 static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
951   "Enable InnoDB locking in LOCK TABLES",
952   /* check_func */ NULL, /* update_func */ NULL,
953   /* default */ TRUE);
954 
955 static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG,
956   "Use strict mode when evaluating create options.",
957   NULL, NULL, TRUE);
958 
959 static MYSQL_THDVAR_BOOL(ft_enable_stopword, PLUGIN_VAR_OPCMDARG,
960   "Create FTS index with stopword.",
961   NULL, NULL,
962   /* default */ TRUE);
963 
964 static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
965   "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
966   NULL, NULL, 50, 0, 1024 * 1024 * 1024, 0);
967 
968 static MYSQL_THDVAR_STR(ft_user_stopword_table,
969   PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC,
970   "User supplied stopword table name, effective in the session level.",
971   innodb_stopword_table_validate, NULL, NULL);
972 
973 static MYSQL_THDVAR_STR(tmpdir,
974   PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC,
975   "Directory for temporary non-tablespace files.",
976   innodb_tmpdir_validate, NULL, NULL);
977 
978 static SHOW_VAR innodb_status_variables[]= {
979   {"buffer_pool_dump_status",
980   (char*) &export_vars.innodb_buffer_pool_dump_status,	  SHOW_CHAR},
981   {"buffer_pool_load_status",
982   (char*) &export_vars.innodb_buffer_pool_load_status,	  SHOW_CHAR},
983   {"buffer_pool_resize_status",
984   (char*) &export_vars.innodb_buffer_pool_resize_status,  SHOW_CHAR},
985   {"buffer_pool_load_incomplete",
986   &export_vars.innodb_buffer_pool_load_incomplete,        SHOW_BOOL},
987   {"buffer_pool_pages_data",
988   (char*) &export_vars.innodb_buffer_pool_pages_data,	  SHOW_LONG},
989   {"buffer_pool_bytes_data",
990   (char*) &export_vars.innodb_buffer_pool_bytes_data,	  SHOW_LONG},
991   {"buffer_pool_pages_dirty",
992   (char*) &export_vars.innodb_buffer_pool_pages_dirty,	  SHOW_LONG},
993   {"buffer_pool_bytes_dirty",
994   (char*) &export_vars.innodb_buffer_pool_bytes_dirty,	  SHOW_LONG},
995   {"buffer_pool_pages_flushed",
996   (char*) &export_vars.innodb_buffer_pool_pages_flushed,  SHOW_LONG},
997   {"buffer_pool_pages_free",
998   (char*) &export_vars.innodb_buffer_pool_pages_free,	  SHOW_LONG},
999 #ifdef UNIV_DEBUG
1000   {"buffer_pool_pages_latched",
1001   (char*) &export_vars.innodb_buffer_pool_pages_latched,  SHOW_LONG},
1002 #endif /* UNIV_DEBUG */
1003   {"buffer_pool_pages_misc",
1004   (char*) &export_vars.innodb_buffer_pool_pages_misc,	  SHOW_LONG},
1005   {"buffer_pool_pages_total",
1006   (char*) &export_vars.innodb_buffer_pool_pages_total,	  SHOW_LONG},
1007   {"buffer_pool_read_ahead_rnd",
1008   (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
1009   {"buffer_pool_read_ahead",
1010   (char*) &export_vars.innodb_buffer_pool_read_ahead,	  SHOW_LONG},
1011   {"buffer_pool_read_ahead_evicted",
1012   (char*) &export_vars.innodb_buffer_pool_read_ahead_evicted, SHOW_LONG},
1013   {"buffer_pool_read_requests",
1014   (char*) &export_vars.innodb_buffer_pool_read_requests,  SHOW_LONG},
1015   {"buffer_pool_reads",
1016   (char*) &export_vars.innodb_buffer_pool_reads,	  SHOW_LONG},
1017   {"buffer_pool_wait_free",
1018   (char*) &export_vars.innodb_buffer_pool_wait_free,	  SHOW_LONG},
1019   {"buffer_pool_write_requests",
1020   (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
1021   {"data_fsyncs",
1022   (char*) &export_vars.innodb_data_fsyncs,		  SHOW_LONG},
1023   {"data_pending_fsyncs",
1024   (char*) &export_vars.innodb_data_pending_fsyncs,	  SHOW_LONG},
1025   {"data_pending_reads",
1026   (char*) &export_vars.innodb_data_pending_reads,	  SHOW_LONG},
1027   {"data_pending_writes",
1028   (char*) &export_vars.innodb_data_pending_writes,	  SHOW_LONG},
1029   {"data_read",
1030   (char*) &export_vars.innodb_data_read,		  SHOW_LONG},
1031   {"data_reads",
1032   (char*) &export_vars.innodb_data_reads,		  SHOW_LONG},
1033   {"data_writes",
1034   (char*) &export_vars.innodb_data_writes,		  SHOW_LONG},
1035   {"data_written",
1036   (char*) &export_vars.innodb_data_written,		  SHOW_LONG},
1037   {"dblwr_pages_written",
1038   (char*) &export_vars.innodb_dblwr_pages_written,	  SHOW_LONG},
1039   {"dblwr_writes",
1040   (char*) &export_vars.innodb_dblwr_writes,		  SHOW_LONG},
1041   {"log_waits",
1042   (char*) &export_vars.innodb_log_waits,		  SHOW_LONG},
1043   {"log_write_requests",
1044   (char*) &export_vars.innodb_log_write_requests,	  SHOW_LONG},
1045   {"log_writes",
1046   (char*) &export_vars.innodb_log_writes,		  SHOW_LONG},
1047   {"os_log_fsyncs",
1048   (char*) &export_vars.innodb_os_log_fsyncs,		  SHOW_LONG},
1049   {"os_log_pending_fsyncs",
1050   (char*) &export_vars.innodb_os_log_pending_fsyncs,	  SHOW_LONG},
1051   {"os_log_pending_writes",
1052   (char*) &export_vars.innodb_os_log_pending_writes,	  SHOW_LONG},
1053   {"os_log_written",
1054   (char*) &export_vars.innodb_os_log_written,		  SHOW_LONGLONG},
1055   {"page_size",
1056   (char*) &export_vars.innodb_page_size,		  SHOW_LONG},
1057   {"pages_created",
1058   (char*) &export_vars.innodb_pages_created,		  SHOW_LONG},
1059   {"pages_read",
1060   (char*) &export_vars.innodb_pages_read,		  SHOW_LONG},
1061   {"pages_written",
1062   (char*) &export_vars.innodb_pages_written,		  SHOW_LONG},
1063   {"row_lock_current_waits",
1064   (char*) &export_vars.innodb_row_lock_current_waits,	  SHOW_LONG},
1065   {"row_lock_time",
1066   (char*) &export_vars.innodb_row_lock_time,		  SHOW_LONGLONG},
1067   {"row_lock_time_avg",
1068   (char*) &export_vars.innodb_row_lock_time_avg,	  SHOW_LONG},
1069   {"row_lock_time_max",
1070   (char*) &export_vars.innodb_row_lock_time_max,	  SHOW_LONG},
1071   {"row_lock_waits",
1072   (char*) &export_vars.innodb_row_lock_waits,		  SHOW_LONG},
1073   {"rows_deleted",
1074   (char*) &export_vars.innodb_rows_deleted,		  SHOW_LONG},
1075   {"rows_inserted",
1076   (char*) &export_vars.innodb_rows_inserted,		  SHOW_LONG},
1077   {"rows_read",
1078   (char*) &export_vars.innodb_rows_read,		  SHOW_LONG},
1079   {"rows_updated",
1080   (char*) &export_vars.innodb_rows_updated,		  SHOW_LONG},
1081   {"system_rows_deleted",
1082   (char*) &export_vars.innodb_system_rows_deleted, SHOW_LONG},
1083   {"system_rows_inserted",
1084   (char*) &export_vars.innodb_system_rows_inserted, SHOW_LONG},
1085   {"system_rows_read",
1086   (char*) &export_vars.innodb_system_rows_read, SHOW_LONG},
1087   {"system_rows_updated",
1088   (char*) &export_vars.innodb_system_rows_updated, SHOW_LONG},
1089   {"num_open_files",
1090   (char*) &export_vars.innodb_num_open_files,		  SHOW_LONG},
1091   {"truncated_status_writes",
1092   (char*) &export_vars.innodb_truncated_status_writes,	  SHOW_LONG},
1093   {"available_undo_logs",
1094   (char*) &export_vars.innodb_available_undo_logs,        SHOW_LONG},
1095   {"undo_truncations",
1096   (char*) &export_vars.innodb_undo_truncations,           SHOW_LONG},
1097 
1098   /* Status variables for page compression */
1099   {"page_compression_saved",
1100    (char*) &export_vars.innodb_page_compression_saved,    SHOW_LONGLONG},
1101   {"num_index_pages_written",
1102    (char*) &export_vars.innodb_index_pages_written,       SHOW_LONGLONG},
1103   {"num_non_index_pages_written",
1104    (char*) &export_vars.innodb_non_index_pages_written,       SHOW_LONGLONG},
1105   {"num_pages_page_compressed",
1106    (char*) &export_vars.innodb_pages_page_compressed,     SHOW_LONGLONG},
1107   {"num_page_compressed_trim_op",
1108    (char*) &export_vars.innodb_page_compressed_trim_op,     SHOW_LONGLONG},
1109   {"num_pages_page_decompressed",
1110    (char*) &export_vars.innodb_pages_page_decompressed,   SHOW_LONGLONG},
1111   {"num_pages_page_compression_error",
1112    (char*) &export_vars.innodb_pages_page_compression_error,   SHOW_LONGLONG},
1113   {"num_pages_encrypted",
1114    (char*) &export_vars.innodb_pages_encrypted,   SHOW_LONGLONG},
1115   {"num_pages_decrypted",
1116    (char*) &export_vars.innodb_pages_decrypted,   SHOW_LONGLONG},
1117   {"have_lz4",
1118   (char*) &innodb_have_lz4,		  SHOW_BOOL},
1119   {"have_lzo",
1120   (char*) &innodb_have_lzo,		  SHOW_BOOL},
1121   {"have_lzma",
1122   (char*) &innodb_have_lzma,		  SHOW_BOOL},
1123   {"have_bzip2",
1124   (char*) &innodb_have_bzip2,		  SHOW_BOOL},
1125   {"have_snappy",
1126   (char*) &innodb_have_snappy,		  SHOW_BOOL},
1127   {"have_punch_hole",
1128   (char*) &innodb_have_punch_hole,	  SHOW_BOOL},
1129 
1130   /* Defragmentation */
1131   {"defragment_compression_failures",
1132   (char*) &export_vars.innodb_defragment_compression_failures, SHOW_LONG},
1133   {"defragment_failures",
1134   (char*) &export_vars.innodb_defragment_failures, SHOW_LONG},
1135   {"defragment_count",
1136   (char*) &export_vars.innodb_defragment_count, SHOW_LONG},
1137 
1138   {"instant_alter_column",
1139   (char*) &export_vars.innodb_instant_alter_column, SHOW_LONG},
1140 
1141   /* Online alter table status variables */
1142   {"onlineddl_rowlog_rows",
1143   (char*) &export_vars.innodb_onlineddl_rowlog_rows, SHOW_LONG},
1144   {"onlineddl_rowlog_pct_used",
1145   (char*) &export_vars.innodb_onlineddl_rowlog_pct_used, SHOW_LONG},
1146   {"onlineddl_pct_progress",
1147   (char*) &export_vars.innodb_onlineddl_pct_progress, SHOW_LONG},
1148 
1149   /* Times secondary index lookup triggered cluster lookup and
1150   times prefix optimization avoided triggering cluster lookup */
1151   {"secondary_index_triggered_cluster_reads",
1152   (char*) &export_vars.innodb_sec_rec_cluster_reads,	  SHOW_LONG},
1153   {"secondary_index_triggered_cluster_reads_avoided",
1154   (char*) &export_vars.innodb_sec_rec_cluster_reads_avoided, SHOW_LONG},
1155 
1156   /* Encryption */
1157   {"encryption_rotation_pages_read_from_cache",
1158    (char*) &export_vars.innodb_encryption_rotation_pages_read_from_cache,
1159    SHOW_LONG},
1160   {"encryption_rotation_pages_read_from_disk",
1161   (char*) &export_vars.innodb_encryption_rotation_pages_read_from_disk,
1162    SHOW_LONG},
1163   {"encryption_rotation_pages_modified",
1164   (char*) &export_vars.innodb_encryption_rotation_pages_modified,
1165    SHOW_LONG},
1166   {"encryption_rotation_pages_flushed",
1167   (char*) &export_vars.innodb_encryption_rotation_pages_flushed,
1168    SHOW_LONG},
1169   {"encryption_rotation_estimated_iops",
1170   (char*) &export_vars.innodb_encryption_rotation_estimated_iops,
1171    SHOW_LONG},
1172   {"encryption_key_rotation_list_length",
1173   (char*)&export_vars.innodb_key_rotation_list_length,
1174    SHOW_LONGLONG},
1175   {"encryption_n_merge_blocks_encrypted",
1176   (char*)&export_vars.innodb_n_merge_blocks_encrypted,
1177    SHOW_LONGLONG},
1178   {"encryption_n_merge_blocks_decrypted",
1179   (char*)&export_vars.innodb_n_merge_blocks_decrypted,
1180    SHOW_LONGLONG},
1181   {"encryption_n_rowlog_blocks_encrypted",
1182   (char*)&export_vars.innodb_n_rowlog_blocks_encrypted,
1183    SHOW_LONGLONG},
1184   {"encryption_n_rowlog_blocks_decrypted",
1185   (char*)&export_vars.innodb_n_rowlog_blocks_decrypted,
1186    SHOW_LONGLONG},
1187   {"encryption_n_temp_blocks_encrypted",
1188   (char*)&export_vars.innodb_n_temp_blocks_encrypted,
1189    SHOW_LONGLONG},
1190   {"encryption_n_temp_blocks_decrypted",
1191   (char*)&export_vars.innodb_n_temp_blocks_decrypted,
1192    SHOW_LONGLONG},
1193 
1194   /* scrubing */
1195   {"scrub_background_page_reorganizations",
1196    (char*) &export_vars.innodb_scrub_page_reorganizations,
1197    SHOW_LONG},
1198   {"scrub_background_page_splits",
1199    (char*) &export_vars.innodb_scrub_page_splits,
1200    SHOW_LONG},
1201   {"scrub_background_page_split_failures_underflow",
1202    (char*) &export_vars.innodb_scrub_page_split_failures_underflow,
1203    SHOW_LONG},
1204   {"scrub_background_page_split_failures_out_of_filespace",
1205    (char*) &export_vars.innodb_scrub_page_split_failures_out_of_filespace,
1206    SHOW_LONG},
1207   {"scrub_background_page_split_failures_missing_index",
1208    (char*) &export_vars.innodb_scrub_page_split_failures_missing_index,
1209    SHOW_LONG},
1210   {"scrub_background_page_split_failures_unknown",
1211    (char*) &export_vars.innodb_scrub_page_split_failures_unknown,
1212    SHOW_LONG},
1213   {"scrub_log",
1214    (char*) &export_vars.innodb_scrub_log,
1215    SHOW_LONGLONG},
1216   {"encryption_num_key_requests",
1217    (char*) &export_vars.innodb_encryption_key_requests, SHOW_LONGLONG},
1218 
1219   {NullS, NullS, SHOW_LONG}
1220 };
1221 
1222 /*****************************************************************//**
1223 Frees a possible InnoDB trx object associated with the current THD.
1224 @return 0 or error number */
1225 static
1226 int
1227 innobase_close_connection(
1228 /*======================*/
1229 	handlerton*	hton,		/*!< in/out: InnoDB handlerton */
1230 	THD*		thd);		/*!< in: MySQL thread handle for
1231 					which to close the connection */
1232 
1233 /** Cancel any pending lock request associated with the current THD.
1234 @sa THD::awake() @sa ha_kill_query() */
1235 static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels);
1236 static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
1237 
1238 /*****************************************************************//**
1239 Commits a transaction in an InnoDB database or marks an SQL statement
1240 ended.
1241 @return 0 */
1242 static
1243 int
1244 innobase_commit(
1245 /*============*/
1246 	handlerton*	hton,		/*!< in/out: InnoDB handlerton */
1247 	THD*		thd,		/*!< in: MySQL thread handle of the
1248 					user for whom the transaction should
1249 					be committed */
1250 	bool		commit_trx);	/*!< in: true - commit transaction
1251 					false - the current SQL statement
1252 					ended */
1253 
1254 /*****************************************************************//**
1255 Rolls back a transaction to a savepoint.
1256 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
1257 given name */
1258 static
1259 int
1260 innobase_rollback(
1261 /*==============*/
1262 	handlerton*	hton,		/*!< in/out: InnoDB handlerton */
1263 	THD*		thd,		/*!< in: handle to the MySQL thread
1264 					of the user whose transaction should
1265 					be rolled back */
1266 	bool		rollback_trx);	/*!< in: TRUE - rollback entire
1267 					transaction FALSE - rollback the current
1268 					statement only */
1269 
1270 /*****************************************************************//**
1271 Rolls back a transaction to a savepoint.
1272 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
1273 given name */
1274 static
1275 int
1276 innobase_rollback_to_savepoint(
1277 /*===========================*/
1278 	handlerton*	hton,		/*!< in/out: InnoDB handlerton */
1279 	THD*		thd,		/*!< in: handle to the MySQL thread of
1280 					the user whose XA transaction should
1281 					be rolled back to savepoint */
1282 	void*		savepoint);	/*!< in: savepoint data */
1283 
1284 /*****************************************************************//**
1285 Check whether innodb state allows to safely release MDL locks after
1286 rollback to savepoint.
1287 @return true if it is safe, false if its not safe. */
1288 static
1289 bool
1290 innobase_rollback_to_savepoint_can_release_mdl(
1291 /*===========================================*/
1292 	handlerton*	hton,		/*!< in/out: InnoDB handlerton */
1293 	THD*		thd);		/*!< in: handle to the MySQL thread of
1294 					the user whose XA transaction should
1295 					be rolled back to savepoint */
1296 
1297 /*****************************************************************//**
1298 Sets a transaction savepoint.
1299 @return always 0, that is, always succeeds */
1300 static
1301 int
1302 innobase_savepoint(
1303 /*===============*/
1304 	handlerton*	hton,		/*!< in/out: InnoDB handlerton */
1305 	THD*		thd,		/*!< in: handle to the MySQL thread of
1306 					the user's XA transaction for which
1307 					we need to take a savepoint */
1308 	void*		savepoint);	/*!< in: savepoint data */
1309 
1310 /*****************************************************************//**
1311 Release transaction savepoint name.
1312 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
1313 given name */
1314 static
1315 int
1316 innobase_release_savepoint(
1317 /*=======================*/
1318 	handlerton*	hton,		/*!< in/out: handlerton for InnoDB */
1319 	THD*		thd,		/*!< in: handle to the MySQL thread
1320 					of the user whose transaction's
1321 					savepoint should be released */
1322 	void*		savepoint);	/*!< in: savepoint data */
1323 
1324 static void innobase_checkpoint_request(handlerton *hton, void *cookie);
1325 
1326 /** @brief Initialize the default value of innodb_commit_concurrency.
1327 
1328 Once InnoDB is running, the innodb_commit_concurrency must not change
1329 from zero to nonzero. (Bug #42101)
1330 
1331 The initial default value is 0, and without this extra initialization,
1332 SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter
1333 to 0, even if it was initially set to nonzero at the command line
1334 or configuration file. */
1335 static
1336 void
1337 innobase_commit_concurrency_init_default();
1338 /*=======================================*/
1339 
1340 /** @brief Adjust some InnoDB startup parameters based on file contents
1341 or innodb_page_size. */
1342 static
1343 void
1344 innodb_params_adjust();
1345 
1346 /*******************************************************************//**
1347 This function is used to prepare an X/Open XA distributed transaction.
1348 @return 0 or error number */
1349 static
1350 int
1351 innobase_xa_prepare(
1352 /*================*/
1353 	handlerton*	hton,		/*!< in: InnoDB handlerton */
1354 	THD*		thd,		/*!< in: handle to the MySQL thread of
1355 					the user whose XA transaction should
1356 					be prepared */
1357 	bool		all);		/*!< in: true - prepare transaction
1358 					false - the current SQL statement
1359 					ended */
1360 /*******************************************************************//**
1361 This function is used to recover X/Open XA distributed transactions.
1362 @return number of prepared transactions stored in xid_list */
1363 static
1364 int
1365 innobase_xa_recover(
1366 /*================*/
1367 	handlerton*	hton,		/*!< in: InnoDB handlerton */
1368 	XID*		xid_list,	/*!< in/out: prepared transactions */
1369 	uint		len);		/*!< in: number of slots in xid_list */
1370 /*******************************************************************//**
1371 This function is used to commit one X/Open XA distributed transaction
1372 which is in the prepared state
1373 @return 0 or error number */
1374 static
1375 int
1376 innobase_commit_by_xid(
1377 /*===================*/
1378 	handlerton*	hton,		/*!< in: InnoDB handlerton */
1379 	XID*		xid);		/*!< in: X/Open XA transaction
1380 					identification */
1381 /** Remove all tables in the named database inside InnoDB.
1382 @param[in]	hton	handlerton from InnoDB
1383 @param[in]	path	Database path; Inside InnoDB the name of the last
1384 directory in the path is used as the database name.
1385 For example, in 'mysql/data/test' the database name is 'test'. */
1386 static
1387 void
1388 innobase_drop_database(
1389 	handlerton*	hton,
1390 	char*		path);
1391 
1392 /** Shut down the InnoDB storage engine.
1393 @return	0 */
1394 static
1395 int
1396 innobase_end(handlerton*, ha_panic_function);
1397 
1398 /*****************************************************************//**
1399 Creates an InnoDB transaction struct for the thd if it does not yet have one.
1400 Starts a new InnoDB transaction if a transaction is not yet started. And
1401 assigns a new snapshot for a consistent read if the transaction does not yet
1402 have one.
1403 @return 0 */
1404 static
1405 int
1406 innobase_start_trx_and_assign_read_view(
1407 /*====================================*/
1408 	handlerton*	hton,		/* in: InnoDB handlerton */
1409 	THD*		thd);		/* in: MySQL thread handle of the
1410 					user for whom the transaction should
1411 					be committed */
1412 
1413 /** Flush InnoDB redo logs to the file system.
1414 @param[in]	hton			InnoDB handlerton
1415 @param[in]	binlog_group_flush	true if we got invoked by binlog
1416 group commit during flush stage, false in other cases.
1417 @return false */
1418 static
1419 bool
innobase_flush_logs(handlerton * hton,bool binlog_group_flush)1420 innobase_flush_logs(
1421 	handlerton*	hton,
1422 	bool		binlog_group_flush)
1423 {
1424 	DBUG_ENTER("innobase_flush_logs");
1425 	DBUG_ASSERT(hton == innodb_hton_ptr);
1426 
1427 	if (srv_read_only_mode) {
1428 		DBUG_RETURN(false);
1429 	}
1430 
1431 	/* If !binlog_group_flush, we got invoked by FLUSH LOGS or similar.
1432 	Else, we got invoked by binlog group commit during flush stage. */
1433 
1434 	if (binlog_group_flush && srv_flush_log_at_trx_commit == 0) {
1435 		/* innodb_flush_log_at_trx_commit=0
1436 		(write and sync once per second).
1437 		Do not flush the redo log during binlog group commit. */
1438 		DBUG_RETURN(false);
1439 	}
1440 
1441 	/* Flush the redo log buffer to the redo log file.
1442 	Sync it to disc if we are in FLUSH LOGS, or if
1443 	innodb_flush_log_at_trx_commit=1
1444 	(write and sync at each commit). */
1445 	log_buffer_flush_to_disk(!binlog_group_flush
1446 				 || srv_flush_log_at_trx_commit == 1);
1447 
1448 	DBUG_RETURN(false);
1449 }
1450 
1451 /** Flush InnoDB redo logs to the file system.
1452 @param[in]	hton			InnoDB handlerton
1453 @param[in]	binlog_group_flush	true if we got invoked by binlog
1454 group commit during flush stage, false in other cases.
1455 @return false */
1456 static
1457 bool
innobase_flush_logs(handlerton * hton)1458 innobase_flush_logs(
1459 	handlerton*	hton)
1460 {
1461 	return innobase_flush_logs(hton, true);
1462 }
1463 
1464 /************************************************************************//**
1465 Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the
1466 InnoDB Monitor to the client.
1467 @return 0 on success */
1468 static
1469 int
1470 innodb_show_status(
1471 /*===============*/
1472 	handlerton*	hton,		/*!< in: the innodb handlerton */
1473 	THD*		thd,		/*!< in: the MySQL query thread of
1474 					the caller */
1475 	stat_print_fn*	stat_print);
1476 /************************************************************************//**
1477 Return 0 on success and non-zero on failure. Note: the bool return type
1478 seems to be abused here, should be an int. */
1479 static
1480 bool
1481 innobase_show_status(
1482 /*=================*/
1483 	handlerton*		hton,	/*!< in: the innodb handlerton */
1484 	THD*			thd,	/*!< in: the MySQL query thread of
1485 					the caller */
1486 	stat_print_fn*		stat_print,
1487 	enum ha_stat_type	stat_type);
1488 
1489 /****************************************************************//**
1490 Parse and enable InnoDB monitor counters during server startup.
1491 User can enable monitor counters/groups by specifying
1492 "loose-innodb_monitor_enable = monitor_name1;monitor_name2..."
1493 in server configuration file or at the command line. */
1494 static
1495 void
1496 innodb_enable_monitor_at_startup(
1497 /*=============================*/
1498 	char*	str);	/*!< in: monitor counter enable list */
1499 
1500 #ifdef MYSQL_STORE_FTS_DOC_ID
1501 /** Store doc_id value into FTS_DOC_ID field
1502 @param[in,out]	tbl	table containing FULLTEXT index
1503 @param[in]	doc_id	FTS_DOC_ID value */
1504 static
1505 void
innobase_fts_store_docid(TABLE * tbl,ulonglong doc_id)1506 innobase_fts_store_docid(
1507 	TABLE*		tbl,
1508 	ulonglong	doc_id)
1509 {
1510 	my_bitmap_map*	old_map
1511 		= dbug_tmp_use_all_columns(tbl, tbl->write_set);
1512 
1513 	tbl->fts_doc_id_field->store(static_cast<longlong>(doc_id), true);
1514 
1515 	dbug_tmp_restore_column_map(tbl->write_set, old_map);
1516 }
1517 #endif
1518 
1519 /*************************************************************//**
1520 Check for a valid value of innobase_commit_concurrency.
1521 @return 0 for valid innodb_commit_concurrency */
1522 static
1523 int
innobase_commit_concurrency_validate(THD *,st_mysql_sys_var *,void * save,struct st_mysql_value * value)1524 innobase_commit_concurrency_validate(
1525 /*=================================*/
1526 	THD*, st_mysql_sys_var*,
1527 	void*				save,	/*!< out: immediate result
1528 						for update function */
1529 	struct st_mysql_value*		value)	/*!< in: incoming string */
1530 {
1531 	long long	intbuf;
1532 	ulong		commit_concurrency;
1533 
1534 	DBUG_ENTER("innobase_commit_concurrency_validate");
1535 
1536 	if (value->val_int(value, &intbuf)) {
1537 		/* The value is NULL. That is invalid. */
1538 		DBUG_RETURN(1);
1539 	}
1540 
1541 	*reinterpret_cast<ulong*>(save) = commit_concurrency
1542 		= static_cast<ulong>(intbuf);
1543 
1544 	/* Allow the value to be updated, as long as it remains zero
1545 	or nonzero. */
1546 	DBUG_RETURN(!(!commit_concurrency == !innobase_commit_concurrency));
1547 }
1548 
1549 /*******************************************************************//**
1550 Function for constructing an InnoDB table handler instance. */
1551 static
1552 handler*
innobase_create_handler(handlerton * hton,TABLE_SHARE * table,MEM_ROOT * mem_root)1553 innobase_create_handler(
1554 /*====================*/
1555 	handlerton*	hton,	/*!< in: InnoDB handlerton */
1556 	TABLE_SHARE*	table,
1557 	MEM_ROOT*	mem_root)
1558 {
1559 	return(new (mem_root) ha_innobase(hton, table));
1560 }
1561 
1562 /* General functions */
1563 
1564 /** Check that a page_size is correct for InnoDB.
1565 If correct, set the associated page_size_shift which is the power of 2
1566 for this page size.
1567 @param[in]	page_size	Page Size to evaluate
1568 @return an associated page_size_shift if valid, 0 if invalid. */
1569 inline
1570 ulong
innodb_page_size_validate(ulong page_size)1571 innodb_page_size_validate(
1572 	ulong	page_size)
1573 {
1574 	ulong		n;
1575 
1576 	DBUG_ENTER("innodb_page_size_validate");
1577 
1578 	for (n = UNIV_PAGE_SIZE_SHIFT_MIN;
1579 	     n <= UNIV_PAGE_SIZE_SHIFT_MAX;
1580 	     n++) {
1581 		if (page_size == static_cast<ulong>(1 << n)) {
1582 			DBUG_RETURN(n);
1583 		}
1584 	}
1585 
1586 	DBUG_RETURN(0);
1587 }
1588 
1589 /******************************************************************//**
1590 Returns true if the thread is the replication thread on the slave
1591 server. Used in srv_conc_enter_innodb() to determine if the thread
1592 should be allowed to enter InnoDB - the replication thread is treated
1593 differently than other threads. Also used in
1594 srv_conc_force_exit_innodb().
1595 @return true if thd is the replication thread */
1596 ibool
thd_is_replication_slave_thread(THD * thd)1597 thd_is_replication_slave_thread(
1598 /*============================*/
1599 	THD*	thd)	/*!< in: thread handle */
1600 {
1601 	return thd && ((ibool) thd_slave_thread(thd));
1602 }
1603 
1604 /******************************************************************//**
1605 Returns true if transaction should be flagged as read-only.
1606 @return true if the thd is marked as read-only */
1607 bool
thd_trx_is_read_only(THD * thd)1608 thd_trx_is_read_only(
1609 /*=================*/
1610 	THD*	thd)	/*!< in: thread handle */
1611 {
1612 	return(thd != 0 && thd_tx_is_read_only(thd));
1613 }
1614 
1615 static MYSQL_THDVAR_BOOL(background_thread,
1616 			 PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_NOSYSVAR,
1617 			 "Internal (not user visible) flag to mark "
1618 			 "background purge threads", NULL, NULL, 0);
1619 
1620 /** Create a MYSQL_THD for a background thread and mark it as such.
1621 @param name thread info for SHOW PROCESSLIST
1622 @return new MYSQL_THD */
1623 MYSQL_THD
innobase_create_background_thd(const char * name)1624 innobase_create_background_thd(const char* name)
1625 /*============================*/
1626 {
1627 	MYSQL_THD thd= create_thd();
1628 	thd_proc_info(thd, name);
1629 	THDVAR(thd, background_thread) = true;
1630 	return thd;
1631 }
1632 
1633 
1634 /** Destroy a background purge thread THD.
1635 @param[in]	thd	MYSQL_THD to destroy */
1636 void
innobase_destroy_background_thd(MYSQL_THD thd)1637 innobase_destroy_background_thd(
1638 /*============================*/
1639 	MYSQL_THD thd)
1640 {
1641 	/* need to close the connection explicitly, the server won't do it
1642 	if innodb is in the PLUGIN_IS_DYING state */
1643 	innobase_close_connection(innodb_hton_ptr, thd);
1644 	thd_set_ha_data(thd, innodb_hton_ptr, NULL);
1645 	destroy_thd(thd);
1646 }
1647 
1648 /** Close opened tables, free memory, delete items for a MYSQL_THD.
1649 @param[in]	thd	MYSQL_THD to reset */
1650 void
innobase_reset_background_thd(MYSQL_THD thd)1651 innobase_reset_background_thd(MYSQL_THD thd)
1652 {
1653 	if (!thd) {
1654 		thd = current_thd;
1655 	}
1656 
1657 	ut_ad(thd);
1658 	ut_ad(THDVAR(thd, background_thread));
1659 
1660 	/* background purge thread */
1661 	const char *proc_info= thd_proc_info(thd, "reset");
1662 	reset_thd(thd);
1663 	thd_proc_info(thd, proc_info);
1664 }
1665 
1666 
1667 /******************************************************************//**
1668 Check if the transaction is an auto-commit transaction. TRUE also
1669 implies that it is a SELECT (read-only) transaction.
1670 @return true if the transaction is an auto commit read-only transaction. */
1671 ibool
thd_trx_is_auto_commit(THD * thd)1672 thd_trx_is_auto_commit(
1673 /*===================*/
1674 	THD*	thd)	/*!< in: thread handle, can be NULL */
1675 {
1676 	return(thd != NULL
1677 	       && !thd_test_options(
1678 		       thd,
1679 		       OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)
1680 	       && thd_is_select(thd));
1681 }
1682 
1683 /** Enter InnoDB engine after checking the max number of user threads
1684 allowed, else the thread is put into sleep.
1685 @param[in,out]	prebuilt	row prebuilt handler */
innobase_srv_conc_enter_innodb(row_prebuilt_t * prebuilt)1686 static inline void innobase_srv_conc_enter_innodb(row_prebuilt_t *prebuilt)
1687 {
1688 	trx_t* trx = prebuilt->trx;
1689 
1690 #ifdef WITH_WSREP
1691 	if (global_system_variables.wsrep_on &&
1692 	    (wsrep_thd_is_applying(trx->mysql_thd)
1693 	     || wsrep_thd_is_toi(trx->mysql_thd))) {
1694 		return;
1695 	}
1696 #endif /* WITH_WSREP */
1697 
1698 	if (srv_thread_concurrency) {
1699 		if (trx->n_tickets_to_enter_innodb > 0) {
1700 
1701 			/* If trx has 'free tickets' to enter the engine left,
1702 			then use one such ticket */
1703 
1704 			--trx->n_tickets_to_enter_innodb;
1705 
1706 		} else if (trx->mysql_thd != NULL
1707 			   && thd_is_replication_slave_thread(trx->mysql_thd)) {
1708 			const ulonglong end = my_interval_timer()
1709 				+ ulonglong(srv_replication_delay) * 1000000;
1710 			while ((srv_conc_get_active_threads()
1711 			        >= srv_thread_concurrency)
1712 			       && my_interval_timer() < end) {
1713 				os_thread_sleep(2000 /* 2 ms */);
1714 			}
1715 		} else {
1716 			srv_conc_enter_innodb(prebuilt);
1717 		}
1718 	}
1719 }
1720 
1721 /** Note that the thread wants to leave InnoDB only if it doesn't have
1722 any spare tickets.
1723 @param[in,out]	m_prebuilt	row prebuilt handler */
innobase_srv_conc_exit_innodb(row_prebuilt_t * prebuilt)1724 static inline void innobase_srv_conc_exit_innodb(row_prebuilt_t *prebuilt)
1725 {
1726 	ut_ad(!sync_check_iterate(sync_check()));
1727 
1728 	trx_t* trx = prebuilt->trx;
1729 
1730 #ifdef WITH_WSREP
1731 	if (global_system_variables.wsrep_on &&
1732 	    (wsrep_thd_is_applying(trx->mysql_thd)
1733 	     || wsrep_thd_is_toi(trx->mysql_thd))) {
1734 		return;
1735 	}
1736 #endif /* WITH_WSREP */
1737 
1738 	/* This is to avoid making an unnecessary function call. */
1739 	if (trx->declared_to_be_inside_innodb
1740 	    && trx->n_tickets_to_enter_innodb == 0) {
1741 
1742 		srv_conc_force_exit_innodb(trx);
1743 	}
1744 }
1745 
1746 /******************************************************************//**
1747 Force a thread to leave InnoDB even if it has spare tickets. */
1748 static inline
1749 void
innobase_srv_conc_force_exit_innodb(trx_t * trx)1750 innobase_srv_conc_force_exit_innodb(
1751 /*================================*/
1752 	trx_t*	trx)	/*!< in: transaction handle */
1753 {
1754 	ut_ad(!sync_check_iterate(sync_check()));
1755 
1756 	/* This is to avoid making an unnecessary function call. */
1757 	if (trx->declared_to_be_inside_innodb) {
1758 		srv_conc_force_exit_innodb(trx);
1759 	}
1760 }
1761 
1762 /******************************************************************//**
1763 Returns the NUL terminated value of glob_hostname.
1764 @return pointer to glob_hostname. */
1765 const char*
server_get_hostname()1766 server_get_hostname()
1767 /*=================*/
1768 {
1769 	return(glob_hostname);
1770 }
1771 
1772 /******************************************************************//**
1773 Returns true if the transaction this thread is processing has edited
1774 non-transactional tables. Used by the deadlock detector when deciding
1775 which transaction to rollback in case of a deadlock - we try to avoid
1776 rolling back transactions that have edited non-transactional tables.
1777 @return true if non-transactional tables have been edited */
1778 ibool
thd_has_edited_nontrans_tables(THD * thd)1779 thd_has_edited_nontrans_tables(
1780 /*===========================*/
1781 	THD*	thd)	/*!< in: thread handle */
1782 {
1783 	return((ibool) thd_non_transactional_update(thd));
1784 }
1785 
1786 /* Return high resolution timestamp for the start of the current query */
1787 UNIV_INTERN
1788 unsigned long long
thd_query_start_micro(const THD * thd)1789 thd_query_start_micro(
1790 	const THD*	thd)	/*!< in: thread handle */
1791 {
1792 	return thd_start_utime(thd);
1793 }
1794 
1795 /******************************************************************//**
1796 Returns true if the thread is executing a SELECT statement.
1797 @return true if thd is executing SELECT */
1798 ibool
thd_is_select(const THD * thd)1799 thd_is_select(
1800 /*==========*/
1801 	const THD*	thd)	/*!< in: thread handle */
1802 {
1803 	return(thd_sql_command(thd) == SQLCOM_SELECT);
1804 }
1805 
1806 /******************************************************************//**
1807 Returns the lock wait timeout for the current connection.
1808 @return the lock wait timeout, in seconds */
1809 ulong
thd_lock_wait_timeout(THD * thd)1810 thd_lock_wait_timeout(
1811 /*==================*/
1812 	THD*	thd)	/*!< in: thread handle, or NULL to query
1813 			the global innodb_lock_wait_timeout */
1814 {
1815 	/* According to <mysql/plugin.h>, passing thd == NULL
1816 	returns the global value of the session variable. */
1817 	return(THDVAR(thd, lock_wait_timeout));
1818 }
1819 
1820 /** Get the value of innodb_tmpdir.
1821 @param[in]	thd	thread handle, or NULL to query
1822 			the global innodb_tmpdir.
1823 @retval NULL if innodb_tmpdir="" */
1824 const char*
thd_innodb_tmpdir(THD * thd)1825 thd_innodb_tmpdir(
1826 	THD*	thd)
1827 {
1828 	ut_ad(!sync_check_iterate(sync_check()));
1829 
1830 	const char*	tmp_dir = THDVAR(thd, tmpdir);
1831 
1832 	if (tmp_dir != NULL && *tmp_dir == '\0') {
1833 		tmp_dir = NULL;
1834 	}
1835 
1836 	return(tmp_dir);
1837 }
1838 
1839 /** Obtain the InnoDB transaction of a MySQL thread.
1840 @param[in,out]	thd	thread handle
1841 @return reference to transaction pointer */
thd_to_trx(THD * thd)1842 static trx_t* thd_to_trx(THD* thd)
1843 {
1844 	return reinterpret_cast<trx_t*>(thd_get_ha_data(thd, innodb_hton_ptr));
1845 }
1846 
1847 #ifdef WITH_WSREP
1848 /********************************************************************//**
1849 Obtain the InnoDB transaction id of a MySQL thread.
1850 @return	transaction id */
1851 __attribute__((warn_unused_result, nonnull))
1852 ulonglong
thd_to_trx_id(THD * thd)1853 thd_to_trx_id(
1854 	THD*	thd)	/*!< in: MySQL thread */
1855 {
1856 	return(thd_to_trx(thd)->id);
1857 }
1858 
1859 static void wsrep_abort_transaction(handlerton*, THD *, THD *, my_bool);
1860 static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid);
1861 static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid);
1862 #endif /* WITH_WSREP */
1863 /********************************************************************//**
1864 Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
1865 time calls srv_active_wake_master_thread. This function should be used
1866 when a single database operation may introduce a small need for
1867 server utility activity, like checkpointing. */
1868 inline
1869 void
innobase_active_small(void)1870 innobase_active_small(void)
1871 /*=======================*/
1872 {
1873 	innobase_active_counter++;
1874 
1875 	if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
1876 		srv_active_wake_master_thread();
1877 	}
1878 }
1879 
1880 /********************************************************************//**
1881 Converts an InnoDB error code to a MySQL error code and also tells to MySQL
1882 about a possible transaction rollback inside InnoDB caused by a lock wait
1883 timeout or a deadlock.
1884 @return MySQL error code */
1885 static int
convert_error_code_to_mysql(dberr_t error,ulint flags,THD * thd)1886 convert_error_code_to_mysql(
1887 /*========================*/
1888 	dberr_t	error,	/*!< in: InnoDB error code */
1889 	ulint	flags,  /*!< in: InnoDB table flags, or 0 */
1890 	THD*	thd)	/*!< in: user thread handle or NULL */
1891 {
1892 	switch (error) {
1893 	case DB_SUCCESS:
1894 		return(0);
1895 
1896 	case DB_INTERRUPTED:
1897 		return(HA_ERR_ABORTED_BY_USER);
1898 
1899 	case DB_FOREIGN_EXCEED_MAX_CASCADE:
1900 		ut_ad(thd);
1901 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1902 				    HA_ERR_ROW_IS_REFERENCED,
1903 				    "InnoDB: Cannot delete/update "
1904 				    "rows with cascading foreign key "
1905 				    "constraints that exceed max "
1906 				    "depth of %d. Please "
1907 				    "drop extra constraints and try "
1908 				    "again", DICT_FK_MAX_RECURSIVE_LOAD);
1909 		return(HA_ERR_FK_DEPTH_EXCEEDED);
1910 
1911 	case DB_CANT_CREATE_GEOMETRY_OBJECT:
1912 		my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, MYF(0));
1913 		return(HA_ERR_NULL_IN_SPATIAL);
1914 
1915 	case DB_ERROR:
1916 	default:
1917 		return(HA_ERR_GENERIC); /* unspecified error */
1918 
1919 	case DB_DUPLICATE_KEY:
1920 		/* Be cautious with returning this error, since
1921 		mysql could re-enter the storage layer to get
1922 		duplicated key info, the operation requires a
1923 		valid table handle and/or transaction information,
1924 		which might not always be available in the error
1925 		handling stage. */
1926 		return(HA_ERR_FOUND_DUPP_KEY);
1927 
1928 	case DB_READ_ONLY:
1929 		return(HA_ERR_TABLE_READONLY);
1930 
1931 	case DB_FOREIGN_DUPLICATE_KEY:
1932 		return(HA_ERR_FOREIGN_DUPLICATE_KEY);
1933 
1934 	case DB_MISSING_HISTORY:
1935 		return(HA_ERR_TABLE_DEF_CHANGED);
1936 
1937 	case DB_RECORD_NOT_FOUND:
1938 		return(HA_ERR_NO_ACTIVE_RECORD);
1939 
1940 	case DB_DEADLOCK:
1941 		/* Since we rolled back the whole transaction, we must
1942 		tell it also to MySQL so that MySQL knows to empty the
1943 		cached binlog for this transaction */
1944 
1945 		if (thd != NULL) {
1946 			thd_mark_transaction_to_rollback(thd, 1);
1947 		}
1948 
1949 		return(HA_ERR_LOCK_DEADLOCK);
1950 
1951 	case DB_LOCK_WAIT_TIMEOUT:
1952 		/* Starting from 5.0.13, we let MySQL just roll back the
1953 		latest SQL statement in a lock wait timeout. Previously, we
1954 		rolled back the whole transaction. */
1955 
1956 		if (thd) {
1957 			thd_mark_transaction_to_rollback(
1958 				thd, (bool) row_rollback_on_timeout);
1959 		}
1960 
1961 		return(HA_ERR_LOCK_WAIT_TIMEOUT);
1962 
1963 	case DB_NO_REFERENCED_ROW:
1964 		return(HA_ERR_NO_REFERENCED_ROW);
1965 
1966 	case DB_ROW_IS_REFERENCED:
1967 		return(HA_ERR_ROW_IS_REFERENCED);
1968 
1969 	case DB_NO_FK_ON_S_BASE_COL:
1970 	case DB_CANNOT_ADD_CONSTRAINT:
1971 	case DB_CHILD_NO_INDEX:
1972 	case DB_PARENT_NO_INDEX:
1973 		return(HA_ERR_CANNOT_ADD_FOREIGN);
1974 
1975 	case DB_CANNOT_DROP_CONSTRAINT:
1976 
1977 		return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
1978 						misleading, a new MySQL error
1979 						code should be introduced */
1980 
1981 	case DB_CORRUPTION:
1982 		return(HA_ERR_CRASHED);
1983 
1984 	case DB_OUT_OF_FILE_SPACE:
1985 		return(HA_ERR_RECORD_FILE_FULL);
1986 
1987 	case DB_TEMP_FILE_WRITE_FAIL:
1988 		my_error(ER_GET_ERRMSG, MYF(0),
1989                          DB_TEMP_FILE_WRITE_FAIL,
1990                          ut_strerr(DB_TEMP_FILE_WRITE_FAIL),
1991                          "InnoDB");
1992 		return(HA_ERR_INTERNAL_ERROR);
1993 
1994 	case DB_TABLE_IN_FK_CHECK:
1995 		return(HA_ERR_TABLE_IN_FK_CHECK);
1996 
1997 	case DB_TABLE_IS_BEING_USED:
1998 		return(HA_ERR_WRONG_COMMAND);
1999 
2000 	case DB_TABLE_NOT_FOUND:
2001 		return(HA_ERR_NO_SUCH_TABLE);
2002 
2003 	case DB_DECRYPTION_FAILED:
2004 		return(HA_ERR_DECRYPTION_FAILED);
2005 
2006 	case DB_TABLESPACE_NOT_FOUND:
2007 		return(HA_ERR_TABLESPACE_MISSING);
2008 
2009 	case DB_TOO_BIG_RECORD: {
2010 		/* If prefix is true then a 768-byte prefix is stored
2011 		locally for BLOB fields. Refer to dict_table_get_format().
2012 		We limit max record size to 16k for 64k page size. */
2013 		bool prefix = !DICT_TF_HAS_ATOMIC_BLOBS(flags);
2014 		bool comp = !!(flags & DICT_TF_COMPACT);
2015 		ulint free_space = page_get_free_space_of_empty(comp) / 2;
2016 
2017 		if (free_space >= ulint(comp ? COMPRESSED_REC_MAX_DATA_SIZE :
2018 				          REDUNDANT_REC_MAX_DATA_SIZE)) {
2019 			free_space = (comp ? COMPRESSED_REC_MAX_DATA_SIZE :
2020 				REDUNDANT_REC_MAX_DATA_SIZE) - 1;
2021 		}
2022 
2023 		my_printf_error(ER_TOO_BIG_ROWSIZE,
2024 			"Row size too large (> " ULINTPF "). Changing some columns "
2025 			"to TEXT or BLOB %smay help. In current row "
2026 			"format, BLOB prefix of %d bytes is stored inline.",
2027 			MYF(0),
2028 			free_space,
2029 			prefix
2030 			? "or using ROW_FORMAT=DYNAMIC or"
2031 			  " ROW_FORMAT=COMPRESSED "
2032 			: "",
2033 			prefix
2034 			? DICT_MAX_FIXED_COL_LEN
2035 			: 0);
2036 		return(HA_ERR_TO_BIG_ROW);
2037 	}
2038 
2039 	case DB_TOO_BIG_INDEX_COL:
2040 		my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
2041 			 (ulong) DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags));
2042 		return(HA_ERR_INDEX_COL_TOO_LONG);
2043 
2044 	case DB_NO_SAVEPOINT:
2045 		return(HA_ERR_NO_SAVEPOINT);
2046 
2047 	case DB_LOCK_TABLE_FULL:
2048 		/* Since we rolled back the whole transaction, we must
2049 		tell it also to MySQL so that MySQL knows to empty the
2050 		cached binlog for this transaction */
2051 
2052 		if (thd) {
2053 			thd_mark_transaction_to_rollback(thd, 1);
2054 		}
2055 
2056 		return(HA_ERR_LOCK_TABLE_FULL);
2057 
2058 	case DB_FTS_INVALID_DOCID:
2059 		return(HA_FTS_INVALID_DOCID);
2060 	case DB_FTS_EXCEED_RESULT_CACHE_LIMIT:
2061 		return(HA_ERR_OUT_OF_MEM);
2062 	case DB_TOO_MANY_CONCURRENT_TRXS:
2063 		return(HA_ERR_TOO_MANY_CONCURRENT_TRXS);
2064 	case DB_UNSUPPORTED:
2065 		return(HA_ERR_UNSUPPORTED);
2066 	case DB_INDEX_CORRUPT:
2067 		return(HA_ERR_INDEX_CORRUPT);
2068 	case DB_UNDO_RECORD_TOO_BIG:
2069 		return(HA_ERR_UNDO_REC_TOO_BIG);
2070 	case DB_OUT_OF_MEMORY:
2071 		return(HA_ERR_OUT_OF_MEM);
2072 	case DB_TABLESPACE_EXISTS:
2073 		return(HA_ERR_TABLESPACE_EXISTS);
2074 	case DB_TABLESPACE_DELETED:
2075 		return(HA_ERR_TABLESPACE_MISSING);
2076 	case DB_IDENTIFIER_TOO_LONG:
2077 		return(HA_ERR_INTERNAL_ERROR);
2078 	case DB_TABLE_CORRUPT:
2079 		return(HA_ERR_TABLE_CORRUPT);
2080 	case DB_FTS_TOO_MANY_WORDS_IN_PHRASE:
2081 		return(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE);
2082 	case DB_COMPUTE_VALUE_FAILED:
2083 		return(HA_ERR_GENERIC); // impossible
2084 	}
2085 }
2086 
2087 /*************************************************************//**
2088 Prints info of a THD object (== user session thread) to the given file. */
2089 void
innobase_mysql_print_thd(FILE * f,THD * thd,uint max_query_len)2090 innobase_mysql_print_thd(
2091 /*=====================*/
2092 	FILE*	f,		/*!< in: output stream */
2093 	THD*	thd,		/*!< in: MySQL THD object */
2094 	uint	max_query_len)	/*!< in: max query length to print, or 0 to
2095 				use the default max length */
2096 {
2097 	char	buffer[1024];
2098 
2099 	fputs(thd_get_error_context_description(thd, buffer, sizeof buffer,
2100 						max_query_len), f);
2101 	putc('\n', f);
2102 }
2103 
2104 /******************************************************************//**
2105 Get the variable length bounds of the given character set. */
2106 void
innobase_get_cset_width(ulint cset,ulint * mbminlen,ulint * mbmaxlen)2107 innobase_get_cset_width(
2108 /*====================*/
2109 	ulint	cset,		/*!< in: MySQL charset-collation code */
2110 	ulint*	mbminlen,	/*!< out: minimum length of a char (in bytes) */
2111 	ulint*	mbmaxlen)	/*!< out: maximum length of a char (in bytes) */
2112 {
2113 	CHARSET_INFO*	cs;
2114 	ut_ad(cset <= MAX_CHAR_COLL_NUM);
2115 	ut_ad(mbminlen);
2116 	ut_ad(mbmaxlen);
2117 
2118 	cs = all_charsets[cset];
2119 	if (cs) {
2120 		*mbminlen = cs->mbminlen;
2121 		*mbmaxlen = cs->mbmaxlen;
2122 		ut_ad(*mbminlen < DATA_MBMAX);
2123 		ut_ad(*mbmaxlen < DATA_MBMAX);
2124 	} else {
2125 		THD*	thd = current_thd;
2126 
2127 		if (thd && thd_sql_command(thd) == SQLCOM_DROP_TABLE) {
2128 
2129 			/* Fix bug#46256: allow tables to be dropped if the
2130 			collation is not found, but issue a warning. */
2131 			if (cset != 0) {
2132 
2133 				sql_print_warning(
2134 					"Unknown collation #" ULINTPF ".",
2135 					cset);
2136 			}
2137 		} else {
2138 
2139 			ut_a(cset == 0);
2140 		}
2141 
2142 		*mbminlen = *mbmaxlen = 0;
2143 	}
2144 }
2145 
2146 /******************************************************************//**
2147 Converts an identifier to a table name. */
2148 void
innobase_convert_from_table_id(CHARSET_INFO * cs,char * to,const char * from,ulint len)2149 innobase_convert_from_table_id(
2150 /*===========================*/
2151 	CHARSET_INFO*	cs,	/*!< in: the 'from' character set */
2152 	char*		to,	/*!< out: converted identifier */
2153 	const char*	from,	/*!< in: identifier to convert */
2154 	ulint		len)	/*!< in: length of 'to', in bytes */
2155 {
2156 	uint	errors;
2157 
2158 	strconvert(cs, from, FN_REFLEN, &my_charset_filename, to, (uint) len, &errors);
2159 }
2160 
2161 /**********************************************************************
2162 Check if the length of the identifier exceeds the maximum allowed.
2163 return true when length of identifier is too long. */
2164 my_bool
innobase_check_identifier_length(const char * id)2165 innobase_check_identifier_length(
2166 /*=============================*/
2167 	const char*	id)	/* in: FK identifier to check excluding the
2168 				database portion. */
2169 {
2170 	int		well_formed_error = 0;
2171 	CHARSET_INFO	*cs = system_charset_info;
2172 	DBUG_ENTER("innobase_check_identifier_length");
2173 
2174 	size_t len = my_well_formed_length(
2175 		cs, id, id + strlen(id),
2176 		NAME_CHAR_LEN, &well_formed_error);
2177 
2178 	if (well_formed_error || len == NAME_CHAR_LEN) {
2179 		my_error(ER_TOO_LONG_IDENT, MYF(0), id);
2180 		DBUG_RETURN(true);
2181 	}
2182 	DBUG_RETURN(false);
2183 }
2184 
2185 /******************************************************************//**
2186 Converts an identifier to UTF-8. */
2187 void
innobase_convert_from_id(CHARSET_INFO * cs,char * to,const char * from,ulint len)2188 innobase_convert_from_id(
2189 /*=====================*/
2190 	CHARSET_INFO*	cs,	/*!< in: the 'from' character set */
2191 	char*		to,	/*!< out: converted identifier */
2192 	const char*	from,	/*!< in: identifier to convert */
2193 	ulint		len)	/*!< in: length of 'to', in bytes */
2194 {
2195 	uint	errors;
2196 
2197 	strconvert(cs, from, FN_REFLEN, system_charset_info, to, (uint) len, &errors);
2198 }
2199 
2200 /******************************************************************//**
2201 Compares NUL-terminated UTF-8 strings case insensitively.
2202 @return 0 if a=b, <0 if a<b, >1 if a>b */
2203 int
innobase_strcasecmp(const char * a,const char * b)2204 innobase_strcasecmp(
2205 /*================*/
2206 	const char*	a,	/*!< in: first string to compare */
2207 	const char*	b)	/*!< in: second string to compare */
2208 {
2209 	if (!a) {
2210 		if (!b) {
2211 			return(0);
2212 		} else {
2213 			return(-1);
2214 		}
2215 	} else if (!b) {
2216 		return(1);
2217 	}
2218 
2219 	return(my_strcasecmp(system_charset_info, a, b));
2220 }
2221 
2222 /******************************************************************//**
2223 Compares NUL-terminated UTF-8 strings case insensitively. The
2224 second string contains wildcards.
2225 @return 0 if a match is found, 1 if not */
2226 static
2227 int
innobase_wildcasecmp(const char * a,const char * b)2228 innobase_wildcasecmp(
2229 /*=================*/
2230 	const char*	a,	/*!< in: string to compare */
2231 	const char*	b)	/*!< in: wildcard string to compare */
2232 {
2233 	return(wild_case_compare(system_charset_info, a, b));
2234 }
2235 
2236 /** Strip dir name from a full path name and return only the file name
2237 @param[in]	path_name	full path name
2238 @return file name or "null" if no file name */
2239 const char*
innobase_basename(const char * path_name)2240 innobase_basename(
2241 	const char*	path_name)
2242 {
2243 	const char*	name = base_name(path_name);
2244 
2245 	return((name) ? name : "null");
2246 }
2247 
2248 /******************************************************************//**
2249 Makes all characters in a NUL-terminated UTF-8 string lower case. */
2250 void
innobase_casedn_str(char * a)2251 innobase_casedn_str(
2252 /*================*/
2253 	char*	a)	/*!< in/out: string to put in lower case */
2254 {
2255 	my_casedn_str(system_charset_info, a);
2256 }
2257 
2258 /** Determines the current SQL statement.
2259 Thread unsafe, can only be called from the thread owning the THD.
2260 @param[in]	thd	MySQL thread handle
2261 @param[out]	length	Length of the SQL statement
2262 @return			SQL statement string */
2263 const char*
innobase_get_stmt_unsafe(THD * thd,size_t * length)2264 innobase_get_stmt_unsafe(
2265 	THD*	thd,
2266 	size_t*	length)
2267 {
2268 	if (const LEX_STRING *stmt = thd_query_string(thd)) {
2269 		*length = stmt->length;
2270 		return stmt->str;
2271 	}
2272 
2273 	*length = 0;
2274 	return NULL;
2275 }
2276 
2277 /**********************************************************************//**
2278 Get the current setting of the tdc_size global parameter. We do
2279 a dirty read because for one there is no synchronization object and
2280 secondly there is little harm in doing so even if we get a torn read.
2281 @return	value of tdc_size */
2282 ulint
innobase_get_table_cache_size(void)2283 innobase_get_table_cache_size(void)
2284 /*===============================*/
2285 {
2286 	return(tdc_size);
2287 }
2288 
2289 /**********************************************************************//**
2290 Get the current setting of the lower_case_table_names global parameter from
2291 mysqld.cc. We do a dirty read because for one there is no synchronization
2292 object and secondly there is little harm in doing so even if we get a torn
2293 read.
2294 @return value of lower_case_table_names */
2295 ulint
innobase_get_lower_case_table_names(void)2296 innobase_get_lower_case_table_names(void)
2297 /*=====================================*/
2298 {
2299 	return(lower_case_table_names);
2300 }
2301 
2302 /**
2303   Test a file path whether it is same as mysql data directory path.
2304 
2305   @param path null terminated character string
2306 
2307   @return
2308     @retval TRUE The path is different from mysql data directory.
2309     @retval FALSE The path is same as mysql data directory.
2310 */
is_mysql_datadir_path(const char * path)2311 static bool is_mysql_datadir_path(const char *path)
2312 {
2313   if (path == NULL)
2314     return false;
2315 
2316   char mysql_data_dir[FN_REFLEN], path_dir[FN_REFLEN];
2317   convert_dirname(path_dir, path, NullS);
2318   convert_dirname(mysql_data_dir, mysql_unpacked_real_data_home, NullS);
2319   size_t mysql_data_home_len= dirname_length(mysql_data_dir);
2320   size_t path_len = dirname_length(path_dir);
2321 
2322   if (path_len < mysql_data_home_len)
2323     return true;
2324 
2325   if (!lower_case_file_system)
2326     return(memcmp(mysql_data_dir, path_dir, mysql_data_home_len));
2327 
2328   return(files_charset_info->coll->strnncoll(files_charset_info,
2329                                             (uchar *) path_dir, path_len,
2330                                             (uchar *) mysql_data_dir,
2331                                             mysql_data_home_len,
2332                                             TRUE));
2333 }
2334 
mysql_tmpfile_path(const char * path,const char * prefix)2335 static int mysql_tmpfile_path(const char *path, const char *prefix)
2336 {
2337   DBUG_ASSERT(path != NULL);
2338   DBUG_ASSERT((strlen(path) + strlen(prefix)) <= FN_REFLEN);
2339 
2340   char filename[FN_REFLEN];
2341   File fd = create_temp_file(filename, path, prefix, O_BINARY | O_SEQUENTIAL,
2342                              MYF(MY_WME | MY_TEMPORARY));
2343   return fd;
2344 }
2345 
2346 /** Creates a temporary file in the location specified by the parameter
2347 path. If the path is NULL, then it will be created in tmpdir.
2348 @param[in]	path	location for creating temporary file
2349 @return temporary file descriptor, or < 0 on error */
2350 os_file_t
innobase_mysql_tmpfile(const char * path)2351 innobase_mysql_tmpfile(
2352 	const char*	path)
2353 {
2354 #ifdef WITH_INNODB_DISALLOW_WRITES
2355 	os_event_wait(srv_allow_writes_event);
2356 #endif /* WITH_INNODB_DISALLOW_WRITES */
2357 	File	fd;
2358 
2359 	DBUG_EXECUTE_IF(
2360 		"innobase_tmpfile_creation_failure",
2361 		return(OS_FILE_CLOSED);
2362 	);
2363 
2364 	if (path == NULL) {
2365 		fd = mysql_tmpfile("ib");
2366 	} else {
2367 		fd = mysql_tmpfile_path(path, "ib");
2368 	}
2369 
2370 	if (fd < 0)
2371 		return OS_FILE_CLOSED;
2372 
2373 	/* Copy the file descriptor, so that the additional resources
2374 	allocated by create_temp_file() can be freed by invoking
2375 	my_close().
2376 
2377 	Because the file descriptor returned by this function
2378 	will be passed to fdopen(), it will be closed by invoking
2379 	fclose(), which in turn will invoke close() instead of
2380 	my_close(). */
2381 
2382 #ifdef _WIN32
2383 	/* Note that on Windows, the integer returned by mysql_tmpfile
2384 	has no relation to C runtime file descriptor. Here, we need
2385 	to call my_get_osfhandle to get the HANDLE and then convert it
2386 	to C runtime filedescriptor. */
2387 
2388 	HANDLE hFile = my_get_osfhandle(fd);
2389 	HANDLE hDup;
2390 	BOOL bOK = DuplicateHandle(
2391 			GetCurrentProcess(),
2392 			hFile, GetCurrentProcess(),
2393 			&hDup, 0, FALSE, DUPLICATE_SAME_ACCESS);
2394 	my_close(fd, MYF(MY_WME));
2395 
2396 	if (!bOK) {
2397 		my_osmaperr(GetLastError());
2398 		goto error;
2399 	}
2400 	return hDup;
2401 #else
2402 #ifdef F_DUPFD_CLOEXEC
2403 	int fd2 = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2404 #else
2405 	int fd2 = dup(fd);
2406 #endif
2407 	my_close(fd, MYF(MY_WME));
2408 	if (fd2 < 0) {
2409 		set_my_errno(errno);
2410 		goto error;
2411 	}
2412 	return fd2;
2413 #endif
2414 
2415 error:
2416 	char errbuf[MYSYS_STRERROR_SIZE];
2417 
2418 	my_error(EE_OUT_OF_FILERESOURCES,
2419 		MYF(0),
2420 		"ib*", errno,
2421 		my_strerror(errbuf, sizeof(errbuf), errno));
2422 	return (OS_FILE_CLOSED);
2423 }
2424 
2425 /*********************************************************************//**
2426 Wrapper around MySQL's copy_and_convert function.
2427 @return number of bytes copied to 'to' */
2428 static
2429 ulint
innobase_convert_string(void * to,ulint to_length,CHARSET_INFO * to_cs,const void * from,ulint from_length,CHARSET_INFO * from_cs,uint * errors)2430 innobase_convert_string(
2431 /*====================*/
2432 	void*		to,		/*!< out: converted string */
2433 	ulint		to_length,	/*!< in: number of bytes reserved
2434 					for the converted string */
2435 	CHARSET_INFO*	to_cs,		/*!< in: character set to convert to */
2436 	const void*	from,		/*!< in: string to convert */
2437 	ulint		from_length,	/*!< in: number of bytes to convert */
2438 	CHARSET_INFO*	from_cs,	/*!< in: character set to convert
2439 					from */
2440 	uint*		errors)		/*!< out: number of errors encountered
2441 					during the conversion */
2442 {
2443 	return(copy_and_convert(
2444 			(char*) to, (uint32) to_length, to_cs,
2445 			(const char*) from, (uint32) from_length, from_cs,
2446 			errors));
2447 }
2448 
2449 /*******************************************************************//**
2450 Formats the raw data in "data" (in InnoDB on-disk format) that is of
2451 type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
2452 the result to "buf". The result is converted to "system_charset_info".
2453 Not more than "buf_size" bytes are written to "buf".
2454 The result is always NUL-terminated (provided buf_size > 0) and the
2455 number of bytes that were written to "buf" is returned (including the
2456 terminating NUL).
2457 @return number of bytes that were written */
2458 ulint
innobase_raw_format(const char * data,ulint data_len,ulint charset_coll,char * buf,ulint buf_size)2459 innobase_raw_format(
2460 /*================*/
2461 	const char*	data,		/*!< in: raw data */
2462 	ulint		data_len,	/*!< in: raw data length
2463 					in bytes */
2464 	ulint		charset_coll,	/*!< in: charset collation */
2465 	char*		buf,		/*!< out: output buffer */
2466 	ulint		buf_size)	/*!< in: output buffer size
2467 					in bytes */
2468 {
2469 	/* XXX we use a hard limit instead of allocating
2470 	but_size bytes from the heap */
2471 	CHARSET_INFO*	data_cs;
2472 	char		buf_tmp[8192];
2473 	ulint		buf_tmp_used;
2474 	uint		num_errors;
2475 
2476 	data_cs = all_charsets[charset_coll];
2477 
2478 	buf_tmp_used = innobase_convert_string(buf_tmp, sizeof(buf_tmp),
2479 					       system_charset_info,
2480 					       data, data_len, data_cs,
2481 					       &num_errors);
2482 
2483 	return(ut_str_sql_format(buf_tmp, buf_tmp_used, buf, buf_size));
2484 }
2485 
2486 /*
2487 The helper function nlz(x) calculates the number of leading zeros
2488 in the binary representation of the number "x", either using a
2489 built-in compiler function or a substitute trick based on the use
2490 of the multiplication operation and a table indexed by the prefix
2491 of the multiplication result:
2492 */
2493 #ifdef __GNUC__
2494 #define nlz(x) __builtin_clzll(x)
2495 #elif defined(_MSC_VER) && !defined(_M_CEE_PURE) && \
2496   (defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64))
2497 #ifndef __INTRIN_H_
2498 #pragma warning(push, 4)
2499 #pragma warning(disable: 4255 4668)
2500 #include <intrin.h>
2501 #pragma warning(pop)
2502 #endif
nlz(ulonglong x)2503 __forceinline unsigned int nlz (ulonglong x)
2504 {
2505 #if defined(_M_IX86) || defined(_M_X64)
2506   unsigned long n;
2507 #ifdef _M_X64
2508   _BitScanReverse64(&n, x);
2509   return (unsigned int) n ^ 63;
2510 #else
2511   unsigned long y = (unsigned long) (x >> 32);
2512   unsigned int m = 31;
2513   if (y == 0)
2514   {
2515     y = (unsigned long) x;
2516     m = 63;
2517   }
2518   _BitScanReverse(&n, y);
2519   return (unsigned int) n ^ m;
2520 #endif
2521 #elif defined(_M_ARM64)
2522   return _CountLeadingZeros(x);
2523 #endif
2524 }
2525 #else
nlz(ulonglong x)2526 inline unsigned int nlz (ulonglong x)
2527 {
2528   static unsigned char table [48] = {
2529     32,  6,  5,  0,  4, 12,  0, 20,
2530     15,  3, 11,  0,  0, 18, 25, 31,
2531      8, 14,  2,  0, 10,  0,  0,  0,
2532      0,  0,  0, 21,  0,  0, 19, 26,
2533      7,  0, 13,  0, 16,  1, 22, 27,
2534      9,  0, 17, 23, 28, 24, 29, 30
2535   };
2536   unsigned int y= (unsigned int) (x >> 32);
2537   unsigned int n= 0;
2538   if (y == 0) {
2539     y= (unsigned int) x;
2540     n= 32;
2541   }
2542   y = y | (y >> 1); // Propagate leftmost 1-bit to the right.
2543   y = y | (y >> 2);
2544   y = y | (y >> 4);
2545   y = y | (y >> 8);
2546   y = y & ~(y >> 16);
2547   y = y * 0x3EF5D037;
2548   return n + table[y >> 26];
2549 }
2550 #endif
2551 
2552 /*********************************************************************//**
2553 Compute the next autoinc value.
2554 
2555 For MySQL replication the autoincrement values can be partitioned among
2556 the nodes. The offset is the start or origin of the autoincrement value
2557 for a particular node. For n nodes the increment will be n and the offset
2558 will be in the interval [1, n]. The formula tries to allocate the next
2559 value for a particular node.
2560 
2561 Note: This function is also called with increment set to the number of
2562 values we want to reserve for multi-value inserts e.g.,
2563 
2564 	INSERT INTO T VALUES(), (), ();
2565 
2566 innobase_next_autoinc() will be called with increment set to 3 where
2567 autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
2568 the multi-value INSERT above.
2569 @return the next value */
2570 ulonglong
innobase_next_autoinc(ulonglong current,ulonglong need,ulonglong step,ulonglong offset,ulonglong max_value)2571 innobase_next_autoinc(
2572 /*==================*/
2573 	ulonglong	current,	/*!< in: Current value */
2574 	ulonglong	need,		/*!< in: count of values needed */
2575 	ulonglong	step,		/*!< in: AUTOINC increment step */
2576 	ulonglong	offset,		/*!< in: AUTOINC offset */
2577 	ulonglong	max_value)	/*!< in: max value for type */
2578 {
2579 	ulonglong	next_value;
2580 	ulonglong	block;
2581 
2582 	/* Should never be 0. */
2583 	ut_a(need > 0);
2584 	ut_a(step > 0);
2585 	ut_a(max_value > 0);
2586 
2587 	/*
2588 	  We need to calculate the "block" value equal to the product
2589 	  "step * need". However, when calculating this product, an integer
2590 	  overflow can occur, so we cannot simply use the usual multiplication
2591 	  operation. The snippet below calculates the product of two numbers
2592 	  and detects an unsigned integer overflow:
2593 	*/
2594 	unsigned int	m= nlz(need);
2595 	unsigned int	n= nlz(step);
2596 	if (m + n <= 8 * sizeof(ulonglong) - 2) {
2597 		// The bit width of the original values is too large,
2598 		// therefore we are guaranteed to get an overflow.
2599 		goto overflow;
2600 	}
2601 	block = need * (step >> 1);
2602 	if ((longlong) block < 0) {
2603 		goto overflow;
2604 	}
2605 	block += block;
2606 	if (step & 1) {
2607 		block += need;
2608 		if (block < need) {
2609 			goto overflow;
2610 		}
2611 	}
2612 
2613 	/* Check for overflow. Current can be > max_value if the value
2614 	is in reality a negative value. Also, the visual studio compiler
2615 	converts large double values (which hypothetically can then be
2616 	passed here as the values of the "current" parameter) automatically
2617 	into unsigned long long datatype maximum value: */
2618 	if (current > max_value) {
2619 		goto overflow;
2620 	}
2621 
2622 	/* According to MySQL documentation, if the offset is greater than
2623 	the step then the offset is ignored. */
2624 	if (offset > step) {
2625 		offset = 0;
2626 	}
2627 
2628 	/*
2629 	  Let's round the current value to within a step-size block:
2630 	*/
2631 	if (current > offset) {
2632 		next_value = current - offset;
2633 	} else {
2634 		next_value = offset - current;
2635 	}
2636 	next_value -= next_value % step;
2637 
2638 	/*
2639 	  Add an offset to the next value and check that the addition
2640 	  does not cause an integer overflow:
2641 	*/
2642 	next_value += offset;
2643 	if (next_value < offset) {
2644 		goto overflow;
2645 	}
2646 
2647 	/*
2648 	  Add a block to the next value and check that the addition
2649 	  does not cause an integer overflow:
2650 	*/
2651 	next_value += block;
2652 	if (next_value < block) {
2653 		goto overflow;
2654 	}
2655 
2656 	return(next_value);
2657 
2658 overflow:
2659 	/*
2660 	  Allow auto_increment to go over max_value up to max ulonglong.
2661 	  This allows us to detect that all values are exhausted.
2662 	  If we don't do this, we will return max_value several times
2663 	  and get duplicate key errors instead of auto increment value
2664 	  out of range:
2665 	*/
2666 	return(~(ulonglong) 0);
2667 }
2668 
2669 /********************************************************************//**
2670 Reset the autoinc value in the table.
2671 @return	DB_SUCCESS if all went well else error code */
2672 UNIV_INTERN
2673 dberr_t
innobase_reset_autoinc(ulonglong autoinc)2674 ha_innobase::innobase_reset_autoinc(
2675 /*================================*/
2676 	ulonglong	autoinc)	/*!< in: value to store */
2677 {
2678 	dberr_t		error;
2679 
2680 	error = innobase_lock_autoinc();
2681 
2682 	if (error == DB_SUCCESS) {
2683 
2684 		dict_table_autoinc_initialize(m_prebuilt->table, autoinc);
2685 		m_prebuilt->table->autoinc_mutex.unlock();
2686 	}
2687 
2688 	return(error);
2689 }
2690 
2691 /*******************************************************************//**
2692 Reset the auto-increment counter to the given value, i.e. the next row
2693 inserted will get the given value. This is called e.g. after TRUNCATE
2694 is emulated by doing a 'DELETE FROM t'. HA_ERR_WRONG_COMMAND is
2695 returned by storage engines that don't support this operation.
2696 @return	0 or error code */
2697 UNIV_INTERN
2698 int
reset_auto_increment(ulonglong value)2699 ha_innobase::reset_auto_increment(
2700 /*==============================*/
2701 	ulonglong	value)		/*!< in: new value for table autoinc */
2702 {
2703 	DBUG_ENTER("ha_innobase::reset_auto_increment");
2704 
2705 	dberr_t	error;
2706 
2707 	update_thd(ha_thd());
2708 
2709 	error = row_lock_table_autoinc_for_mysql(m_prebuilt);
2710 
2711 	if (error != DB_SUCCESS) {
2712 		DBUG_RETURN(convert_error_code_to_mysql(
2713 				    error, m_prebuilt->table->flags, m_user_thd));
2714 	}
2715 
2716 	/* The next value can never be 0. */
2717 	if (value == 0) {
2718 		value = 1;
2719 	}
2720 
2721 	innobase_reset_autoinc(value);
2722 
2723 	DBUG_RETURN(0);
2724 }
2725 
2726 /*********************************************************************//**
2727 Initializes some fields in an InnoDB transaction object. */
2728 static
2729 void
innobase_trx_init(THD * thd,trx_t * trx)2730 innobase_trx_init(
2731 /*==============*/
2732 	THD*	thd,	/*!< in: user thread handle */
2733 	trx_t*	trx)	/*!< in/out: InnoDB transaction handle */
2734 {
2735 	DBUG_ENTER("innobase_trx_init");
2736 	DBUG_ASSERT(thd == trx->mysql_thd);
2737 
2738 	/* Ensure that thd_lock_wait_timeout(), which may be called
2739 	while holding lock_sys.mutex, by lock_rec_enqueue_waiting(),
2740 	will not end up acquiring LOCK_global_system_variables in
2741 	intern_sys_var_ptr(). */
2742 	THDVAR(thd, lock_wait_timeout);
2743 
2744 	trx->check_foreigns = !thd_test_options(
2745 		thd, OPTION_NO_FOREIGN_KEY_CHECKS);
2746 
2747 	trx->check_unique_secondary = !thd_test_options(
2748 		thd, OPTION_RELAXED_UNIQUE_CHECKS);
2749 #ifdef WITH_WSREP
2750 	trx->wsrep = wsrep_on(thd);
2751 #endif
2752 
2753 	DBUG_VOID_RETURN;
2754 }
2755 
2756 /*********************************************************************//**
2757 Allocates an InnoDB transaction for a MySQL handler object for DML.
2758 @return InnoDB transaction handle */
2759 trx_t*
innobase_trx_allocate(THD * thd)2760 innobase_trx_allocate(
2761 /*==================*/
2762 	THD*	thd)	/*!< in: user thread handle */
2763 {
2764 	trx_t*	trx;
2765 
2766 	DBUG_ENTER("innobase_trx_allocate");
2767 	DBUG_ASSERT(thd != NULL);
2768 	DBUG_ASSERT(EQ_CURRENT_THD(thd));
2769 
2770 	trx = trx_create();
2771 
2772 	trx->mysql_thd = thd;
2773 
2774 	innobase_trx_init(thd, trx);
2775 
2776 	DBUG_RETURN(trx);
2777 }
2778 
2779 /*********************************************************************//**
2780 Gets the InnoDB transaction handle for a MySQL handler object, creates
2781 an InnoDB transaction struct if the corresponding MySQL thread struct still
2782 lacks one.
2783 @return InnoDB transaction handle */
2784 static inline
2785 trx_t*
check_trx_exists(THD * thd)2786 check_trx_exists(
2787 /*=============*/
2788 	THD*	thd)	/*!< in: user thread handle */
2789 {
2790 	if (trx_t* trx = thd_to_trx(thd)) {
2791 		ut_a(trx->magic_n == TRX_MAGIC_N);
2792 		innobase_trx_init(thd, trx);
2793 		return trx;
2794 	} else {
2795 		trx = innobase_trx_allocate(thd);
2796 		thd_set_ha_data(thd, innodb_hton_ptr, trx);
2797 		return trx;
2798 	}
2799 }
2800 
2801 /**
2802   Gets current trx.
2803 
2804   This function may be called during InnoDB initialisation, when
2805   innodb_hton_ptr->slot is not yet set to meaningful value.
2806 */
2807 
current_trx()2808 trx_t *current_trx()
2809 {
2810 	THD *thd=current_thd;
2811 	if (likely(thd != 0) && innodb_hton_ptr->slot != HA_SLOT_UNDEF) {
2812 		return thd_to_trx(thd);
2813 	} else {
2814 		return(NULL);
2815 	}
2816 }
2817 
2818 /*********************************************************************//**
2819 Note that a transaction has been registered with MySQL.
2820 @return true if transaction is registered with MySQL 2PC coordinator */
2821 static inline
2822 bool
trx_is_registered_for_2pc(const trx_t * trx)2823 trx_is_registered_for_2pc(
2824 /*======================*/
2825 	const trx_t*	trx)	/* in: transaction */
2826 {
2827 	return(trx->is_registered == 1);
2828 }
2829 
2830 /*********************************************************************//**
2831 Note that a transaction has been registered with MySQL 2PC coordinator. */
2832 static inline
2833 void
trx_register_for_2pc(trx_t * trx)2834 trx_register_for_2pc(
2835 /*==================*/
2836 	trx_t*	trx)	/* in: transaction */
2837 {
2838 	trx->is_registered = 1;
2839 	ut_ad(!trx->active_commit_ordered);
2840 }
2841 
2842 /*********************************************************************//**
2843 Note that a transaction has been deregistered. */
2844 static inline
2845 void
trx_deregister_from_2pc(trx_t * trx)2846 trx_deregister_from_2pc(
2847 /*====================*/
2848 	trx_t*	trx)	/* in: transaction */
2849 {
2850   trx->is_registered= false;
2851   trx->active_commit_ordered= false;
2852 }
2853 
2854 /*********************************************************************//**
2855 Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object.
2856 Those flags are stored in .frm file and end up in the MySQL table object,
2857 but are frequently used inside InnoDB so we keep their copies into the
2858 InnoDB table object. */
2859 static
2860 void
innobase_copy_frm_flags_from_create_info(dict_table_t * innodb_table,const HA_CREATE_INFO * create_info)2861 innobase_copy_frm_flags_from_create_info(
2862 /*=====================================*/
2863 	dict_table_t*		innodb_table,	/*!< in/out: InnoDB table */
2864 	const HA_CREATE_INFO*	create_info)	/*!< in: create info */
2865 {
2866 	ibool	ps_on;
2867 	ibool	ps_off;
2868 
2869 	if (innodb_table->is_temporary()
2870 	    || innodb_table->no_rollback()) {
2871 		/* Temp tables do not use persistent stats. */
2872 		ps_on = FALSE;
2873 		ps_off = TRUE;
2874 	} else {
2875 		ps_on = create_info->table_options
2876 			& HA_OPTION_STATS_PERSISTENT;
2877 		ps_off = create_info->table_options
2878 			& HA_OPTION_NO_STATS_PERSISTENT;
2879 	}
2880 
2881 	dict_stats_set_persistent(innodb_table, ps_on, ps_off);
2882 
2883 	dict_stats_auto_recalc_set(
2884 		innodb_table,
2885 		create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON,
2886 		create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF);
2887 
2888 	innodb_table->stats_sample_pages = create_info->stats_sample_pages;
2889 }
2890 
2891 /*********************************************************************//**
2892 Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
2893 Those flags are stored in .frm file and end up in the MySQL table object,
2894 but are frequently used inside InnoDB so we keep their copies into the
2895 InnoDB table object. */
2896 void
innobase_copy_frm_flags_from_table_share(dict_table_t * innodb_table,const TABLE_SHARE * table_share)2897 innobase_copy_frm_flags_from_table_share(
2898 /*=====================================*/
2899 	dict_table_t*		innodb_table,	/*!< in/out: InnoDB table */
2900 	const TABLE_SHARE*	table_share)	/*!< in: table share */
2901 {
2902 	ibool	ps_on;
2903 	ibool	ps_off;
2904 
2905 	if (innodb_table->is_temporary()) {
2906 		/* Temp tables do not use persistent stats */
2907 		ps_on = FALSE;
2908 		ps_off = TRUE;
2909 	} else {
2910 		ps_on = table_share->db_create_options
2911 			& HA_OPTION_STATS_PERSISTENT;
2912 		ps_off = table_share->db_create_options
2913 			& HA_OPTION_NO_STATS_PERSISTENT;
2914 	}
2915 
2916 	dict_stats_set_persistent(innodb_table, ps_on, ps_off);
2917 
2918 	dict_stats_auto_recalc_set(
2919 		innodb_table,
2920 		table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON,
2921 		table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF);
2922 
2923 	innodb_table->stats_sample_pages = table_share->stats_sample_pages;
2924 }
2925 
2926 /*********************************************************************//**
2927 Construct ha_innobase handler. */
2928 
ha_innobase(handlerton * hton,TABLE_SHARE * table_arg)2929 ha_innobase::ha_innobase(
2930 /*=====================*/
2931 	handlerton*	hton,
2932 	TABLE_SHARE*	table_arg)
2933 	:handler(hton, table_arg),
2934 	m_prebuilt(),
2935 	m_user_thd(),
2936 	m_int_table_flags(HA_REC_NOT_IN_SEQ
2937 			  | HA_NULL_IN_KEY
2938 			  | HA_CAN_VIRTUAL_COLUMNS
2939 			  | HA_CAN_INDEX_BLOBS
2940 			  | HA_CAN_SQL_HANDLER
2941 			  | HA_REQUIRES_KEY_COLUMNS_FOR_DELETE
2942 			  | HA_PRIMARY_KEY_REQUIRED_FOR_POSITION
2943 			  | HA_PRIMARY_KEY_IN_READ_INDEX
2944 			  | HA_BINLOG_ROW_CAPABLE
2945 			  | HA_CAN_GEOMETRY
2946 			  | HA_PARTIAL_COLUMN_READ
2947 			  | HA_TABLE_SCAN_ON_INDEX
2948 			  | HA_CAN_FULLTEXT
2949 			  | HA_CAN_FULLTEXT_EXT
2950 		/* JAN: TODO: MySQL 5.7
2951 			  | HA_CAN_FULLTEXT_HINTS
2952 		*/
2953 			  | HA_CAN_EXPORT
2954 			  | HA_CAN_RTREEKEYS
2955                           | HA_CAN_TABLES_WITHOUT_ROLLBACK
2956                           | HA_CAN_ONLINE_BACKUPS
2957 			  | HA_CONCURRENT_OPTIMIZE
2958 			  |  (srv_force_primary_key ? HA_REQUIRE_PRIMARY_KEY : 0)
2959 		  ),
2960 	m_start_of_scan(),
2961         m_mysql_has_locked()
2962 {}
2963 
2964 /*********************************************************************//**
2965 Destruct ha_innobase handler. */
2966 
~ha_innobase()2967 ha_innobase::~ha_innobase()
2968 /*======================*/
2969 {
2970 }
2971 
2972 /*********************************************************************//**
2973 Updates the user_thd field in a handle and also allocates a new InnoDB
2974 transaction handle if needed, and updates the transaction fields in the
2975 m_prebuilt struct. */
2976 void
update_thd(THD * thd)2977 ha_innobase::update_thd(
2978 /*====================*/
2979 	THD*	thd)	/*!< in: thd to use the handle */
2980 {
2981 	DBUG_ENTER("ha_innobase::update_thd");
2982 	DBUG_PRINT("ha_innobase::update_thd", ("user_thd: %p -> %p",
2983 		   m_user_thd, thd));
2984 
2985 	/* The table should have been opened in ha_innobase::open(). */
2986 	DBUG_ASSERT(m_prebuilt->table->get_ref_count() > 0);
2987 
2988 	trx_t*	trx = check_trx_exists(thd);
2989 
2990 	ut_ad(trx->dict_operation_lock_mode == 0);
2991 	ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
2992 
2993 	if (m_prebuilt->trx != trx) {
2994 
2995 		row_update_prebuilt_trx(m_prebuilt, trx);
2996 	}
2997 
2998 	m_user_thd = thd;
2999 
3000 	DBUG_ASSERT(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
3001 	DBUG_ASSERT(m_prebuilt->trx == thd_to_trx(m_user_thd));
3002 
3003 	DBUG_VOID_RETURN;
3004 }
3005 
3006 /*********************************************************************//**
3007 Updates the user_thd field in a handle and also allocates a new InnoDB
3008 transaction handle if needed, and updates the transaction fields in the
3009 m_prebuilt struct. */
3010 
3011 void
update_thd()3012 ha_innobase::update_thd()
3013 /*=====================*/
3014 {
3015 	THD*	thd = ha_thd();
3016 
3017 	ut_ad(EQ_CURRENT_THD(thd));
3018 	update_thd(thd);
3019 }
3020 
3021 /*********************************************************************//**
3022 Registers an InnoDB transaction with the MySQL 2PC coordinator, so that
3023 the MySQL XA code knows to call the InnoDB prepare and commit, or rollback
3024 for the transaction. This MUST be called for every transaction for which
3025 the user may call commit or rollback. Calling this several times to register
3026 the same transaction is allowed, too. This function also registers the
3027 current SQL statement. */
3028 static inline
3029 void
innobase_register_trx(handlerton * hton,THD * thd,trx_t * trx)3030 innobase_register_trx(
3031 /*==================*/
3032 	handlerton*	hton,	/* in: Innobase handlerton */
3033 	THD*		thd,	/* in: MySQL thd (connection) object */
3034 	trx_t*		trx)	/* in: transaction to register */
3035 {
3036 	/* JAN: TODO: MySQL 5.7 PSI
3037 	const ulonglong	trx_id = static_cast<const ulonglong>(
3038 		trx_get_id_for_print(trx));
3039 
3040 	trans_register_ha(thd, FALSE, hton, &trx_id);
3041 	*/
3042 	trans_register_ha(thd, FALSE, hton);
3043 
3044 	if (!trx_is_registered_for_2pc(trx)
3045 	    && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
3046 
3047 		//trans_register_ha(thd, TRUE, hton, &trx_id);
3048 		trans_register_ha(thd, TRUE, hton);
3049 	}
3050 
3051 	trx_register_for_2pc(trx);
3052 }
3053 
3054 /*	BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
3055 	------------------------------------------------------------
3056 
3057 1) The use of the query cache for TBL is disabled when there is an
3058 uncommitted change to TBL.
3059 
3060 2) When a change to TBL commits, InnoDB stores the current value of
3061 its global trx id counter, let us denote it by INV_TRX_ID, to the table object
3062 in the InnoDB data dictionary, and does only allow such transactions whose
3063 id <= INV_TRX_ID to use the query cache.
3064 
3065 3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
3066 modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
3067 of TBL immediately.
3068 
3069 How this is implemented inside InnoDB:
3070 
3071 1) Since every modification always sets an IX type table lock on the InnoDB
3072 table, it is easy to check if there can be uncommitted modifications for a
3073 table: just check if there are locks in the lock list of the table.
3074 
3075 2) When a transaction inside InnoDB commits, it reads the global trx id
3076 counter and stores the value INV_TRX_ID to the tables on which it had a lock.
3077 
3078 3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
3079 InnoDB calls an invalidate method for the MySQL query cache for that table.
3080 
3081 How this is implemented inside sql_cache.cc:
3082 
3083 1) The query cache for an InnoDB table TBL is invalidated immediately at an
3084 INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
3085 invalidation to the transaction commit.
3086 
3087 2) To store or retrieve a value from the query cache of an InnoDB table TBL,
3088 any query must first ask InnoDB's permission. We must pass the thd as a
3089 parameter because InnoDB will look at the trx id, if any, associated with
3090 that thd. Also the full_name which is used as key to search for the table
3091 object. The full_name is a string containing the normalized path to the
3092 table in the canonical format.
3093 
3094 3) Use of the query cache for InnoDB tables is now allowed also when
3095 AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
3096 put restrictions on the use of the query cache.
3097 */
3098 
3099 /** Check if mysql can allow the transaction to read from/store to
3100 the query cache.
3101 @param[in]	table	table object
3102 @param[in]	trx	transaction object
3103 @return whether the storing or retrieving from the query cache is permitted */
innobase_query_caching_table_check_low(const dict_table_t * table,trx_t * trx)3104 static bool innobase_query_caching_table_check_low(
3105 	const dict_table_t*	table,
3106 	trx_t*			trx)
3107 {
3108 	/* The following conditions will decide the query cache
3109 	retrieval or storing into:
3110 
3111 	(1) There should not be any locks on the table.
3112 	(2) Someother trx shouldn't invalidate the cache before this
3113 	transaction started.
3114 	(3) Read view shouldn't exist. If exists then the view
3115 	low_limit_id should be greater than or equal to the transaction that
3116 	invalidates the cache for the particular table.
3117 
3118 	For read-only transaction: should satisfy (1) and (3)
3119 	For read-write transaction: should satisfy (1), (2), (3) */
3120 
3121 	if (lock_table_get_n_locks(table)) {
3122 		return false;
3123 	}
3124 
3125 	if (trx->id && trx->id < table->query_cache_inv_trx_id) {
3126 		return false;
3127 	}
3128 
3129 	return !trx->read_view.is_open()
3130 		|| trx->read_view.low_limit_id()
3131 		>= table->query_cache_inv_trx_id;
3132 }
3133 
3134 /** Checks if MySQL at the moment is allowed for this table to retrieve a
3135 consistent read result, or store it to the query cache.
3136 @param[in,out]	trx		transaction
3137 @param[in]	norm_name	concatenation of database name,
3138 				'/' char, table name
3139 @return whether storing or retrieving from the query cache is permitted */
innobase_query_caching_table_check(trx_t * trx,const char * norm_name)3140 static bool innobase_query_caching_table_check(
3141 	trx_t*		trx,
3142 	const char*	norm_name)
3143 {
3144 	dict_table_t*   table = dict_table_open_on_name(
3145 		norm_name, FALSE, FALSE, DICT_ERR_IGNORE_FK_NOKEY);
3146 
3147 	if (table == NULL) {
3148 		return false;
3149 	}
3150 
3151 	/* Start the transaction if it is not started yet */
3152 	trx_start_if_not_started(trx, false);
3153 
3154 	bool allow = innobase_query_caching_table_check_low(table, trx);
3155 
3156 	dict_table_close(table, FALSE, FALSE);
3157 
3158 	if (allow) {
3159 		/* If the isolation level is high, assign a read view for the
3160 		transaction if it does not yet have one */
3161 
3162 		if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
3163 		    && !srv_read_only_mode
3164 		    && !trx->read_view.is_open()) {
3165 
3166 			/* Start the transaction if it is not started yet */
3167 			trx_start_if_not_started(trx, false);
3168 
3169 			trx->read_view.open(trx);
3170 		}
3171 	}
3172 
3173 	return allow;
3174 }
3175 
3176 /******************************************************************//**
3177 The MySQL query cache uses this to check from InnoDB if the query cache at
3178 the moment is allowed to operate on an InnoDB table. The SQL query must
3179 be a non-locking SELECT.
3180 
3181 The query cache is allowed to operate on certain query only if this function
3182 returns TRUE for all tables in the query.
3183 
3184 If thd is not in the autocommit state, this function also starts a new
3185 transaction for thd if there is no active trx yet, and assigns a consistent
3186 read view to it if there is no read view yet.
3187 
3188 Why a deadlock of threads is not possible: the query cache calls this function
3189 at the start of a SELECT processing. Then the calling thread cannot be
3190 holding any InnoDB semaphores. The calling thread is holding the
3191 query cache mutex, and this function will reserve the InnoDB trx_sys.mutex.
3192 Thus, the 'rank' in sync0mutex.h of the MySQL query cache mutex is above
3193 the InnoDB trx_sys.mutex.
3194 @return TRUE if permitted, FALSE if not; note that the value FALSE
3195 does not mean we should invalidate the query cache: invalidation is
3196 called explicitly */
3197 static
3198 my_bool
innobase_query_caching_of_table_permitted(THD * thd,const char * full_name,uint full_name_len,ulonglong *)3199 innobase_query_caching_of_table_permitted(
3200 /*======================================*/
3201 	THD*	thd,		/*!< in: thd of the user who is trying to
3202 				store a result to the query cache or
3203 				retrieve it */
3204 	const char* full_name,	/*!< in: normalized path to the table */
3205 	uint	full_name_len,	/*!< in: length of the normalized path
3206 				to the table */
3207 	ulonglong *)
3208 {
3209 	char	norm_name[1000];
3210 	trx_t*	trx = check_trx_exists(thd);
3211 
3212 	ut_a(full_name_len < 999);
3213 
3214 	if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
3215 		/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
3216 		plain SELECT if AUTOCOMMIT is not on. */
3217 
3218 		return(false);
3219 	}
3220 
3221 	innobase_srv_conc_force_exit_innodb(trx);
3222 
3223 	if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)
3224 	    && trx->n_mysql_tables_in_use == 0) {
3225 		/* We are going to retrieve the query result from the query
3226 		cache. This cannot be a store operation to the query cache
3227 		because then MySQL would have locks on tables already.
3228 
3229 		TODO: if the user has used LOCK TABLES to lock the table,
3230 		then we open a transaction in the call of row_.. below.
3231 		That trx can stay open until UNLOCK TABLES. The same problem
3232 		exists even if we do not use the query cache. MySQL should be
3233 		modified so that it ALWAYS calls some cleanup function when
3234 		the processing of a query ends!
3235 
3236 		We can imagine we instantaneously serialize this consistent
3237 		read trx to the current trx id counter. If trx2 would have
3238 		changed the tables of a query result stored in the cache, and
3239 		trx2 would have already committed, making the result obsolete,
3240 		then trx2 would have already invalidated the cache. Thus we
3241 		can trust the result in the cache is ok for this query. */
3242 
3243 		return(true);
3244 	}
3245 
3246 	/* Normalize the table name to InnoDB format */
3247 	normalize_table_name(norm_name, full_name);
3248 
3249 	innobase_register_trx(innodb_hton_ptr, thd, trx);
3250 
3251 	return innobase_query_caching_table_check(trx, norm_name);
3252 }
3253 
3254 /*****************************************************************//**
3255 Invalidates the MySQL query cache for the table. */
3256 void
innobase_invalidate_query_cache(trx_t * trx,const char * full_name)3257 innobase_invalidate_query_cache(
3258 /*============================*/
3259 	trx_t*		trx,		/*!< in: transaction which
3260 					modifies the table */
3261 	const char*	full_name)	/*!< in: concatenation of
3262 					database name, path separator,
3263 					table name, null char NUL;
3264 					NOTE that in Windows this is
3265 					always in LOWER CASE! */
3266 {
3267 	/* Note that the sync0mutex.h rank of the query cache mutex is just
3268 	above the InnoDB trx_sys_t->lock. The caller of this function must
3269 	not have latches of a lower rank. */
3270 
3271 #ifdef HAVE_QUERY_CACHE
3272         char    qcache_key_name[2 * (NAME_LEN + 1)];
3273         char db_name[NAME_CHAR_LEN * MY_CS_MBMAXLEN + 1];
3274         const char *key_ptr;
3275         size_t  tabname_len;
3276 
3277         // Extract the database name.
3278         key_ptr= strchr(full_name, '/');
3279         DBUG_ASSERT(key_ptr != NULL); // Database name should be present
3280         size_t  dbname_len= size_t(key_ptr - full_name);
3281         memcpy(db_name, full_name, dbname_len);
3282         db_name[dbname_len]= '\0';
3283 
3284         /* Construct the key("db-name\0table$name\0") for the query cache using
3285         the path name("db@002dname\0table@0024name\0") of the table in its
3286         canonical form. */
3287         dbname_len = filename_to_tablename(db_name, qcache_key_name,
3288                                            sizeof(qcache_key_name));
3289         tabname_len = filename_to_tablename(++key_ptr,
3290                                             (qcache_key_name + dbname_len + 1),
3291                                             sizeof(qcache_key_name) -
3292                                             dbname_len - 1);
3293 
3294         /* Argument TRUE below means we are using transactions */
3295         mysql_query_cache_invalidate4(trx->mysql_thd,
3296                                       qcache_key_name,
3297                                       uint(dbname_len + tabname_len + 2),
3298                                       TRUE);
3299 #endif
3300 }
3301 
3302 /** Quote a standard SQL identifier like index or column name.
3303 @param[in]	file	output stream
3304 @param[in]	trx	InnoDB transaction, or NULL
3305 @param[in]	id	identifier to quote */
3306 void
innobase_quote_identifier(FILE * file,trx_t * trx,const char * id)3307 innobase_quote_identifier(
3308 	FILE*		file,
3309 	trx_t*		trx,
3310 	const char*	id)
3311 {
3312 	const int	q = trx != NULL && trx->mysql_thd != NULL
3313 		? get_quote_char_for_identifier(trx->mysql_thd, id, strlen(id))
3314 		: '`';
3315 
3316 	if (q == EOF) {
3317 		fputs(id, file);
3318 	} else {
3319 		putc(q, file);
3320 
3321 		while (int c = *id++) {
3322 			if (c == q) {
3323 				putc(c, file);
3324 			}
3325 			putc(c, file);
3326 		}
3327 
3328 		putc(q, file);
3329 	}
3330 }
3331 
3332 /** Quote a standard SQL identifier like tablespace, index or column name.
3333 @param[in]	trx	InnoDB transaction, or NULL
3334 @param[in]	id	identifier to quote
3335 @return quoted identifier */
3336 std::string
innobase_quote_identifier(trx_t * trx,const char * id)3337 innobase_quote_identifier(
3338 /*======================*/
3339 	trx_t*		trx,
3340 	const char*	id)
3341 {
3342 	std::string quoted_identifier;
3343 	const int	q = trx != NULL && trx->mysql_thd != NULL
3344 		? get_quote_char_for_identifier(trx->mysql_thd, id, strlen(id))
3345 		: '`';
3346 
3347 	if (q == EOF) {
3348 		quoted_identifier.append(id);
3349 	} else {
3350 		quoted_identifier += char(q);
3351 		quoted_identifier.append(id);
3352 		quoted_identifier += char(q);
3353 	}
3354 
3355 	return (quoted_identifier);
3356 }
3357 
3358 /** Convert a table name to the MySQL system_charset_info (UTF-8)
3359 and quote it.
3360 @param[out]	buf	buffer for converted identifier
3361 @param[in]	buflen	length of buf, in bytes
3362 @param[in]	id	identifier to convert
3363 @param[in]	idlen	length of id, in bytes
3364 @param[in]	thd	MySQL connection thread, or NULL
3365 @return pointer to the end of buf */
3366 static
3367 char*
innobase_convert_identifier(char * buf,ulint buflen,const char * id,ulint idlen,THD * thd)3368 innobase_convert_identifier(
3369 	char*		buf,
3370 	ulint		buflen,
3371 	const char*	id,
3372 	ulint		idlen,
3373 	THD*		thd)
3374 {
3375 	const char*	s	= id;
3376 
3377 	char nz[MAX_TABLE_NAME_LEN + 1];
3378 	char nz2[MAX_TABLE_NAME_LEN + 1];
3379 
3380 	/* Decode the table name.  The MySQL function expects
3381 	a NUL-terminated string.  The input and output strings
3382 	buffers must not be shared. */
3383 	ut_a(idlen <= MAX_TABLE_NAME_LEN);
3384 	memcpy(nz, id, idlen);
3385 	nz[idlen] = 0;
3386 
3387 	s = nz2;
3388 	idlen = explain_filename(thd, nz, nz2, sizeof nz2,
3389 				 EXPLAIN_PARTITIONS_AS_COMMENT);
3390 	if (idlen > buflen) {
3391 		idlen = buflen;
3392 	}
3393 	memcpy(buf, s, idlen);
3394 	return(buf + idlen);
3395 }
3396 
3397 /*****************************************************************//**
3398 Convert a table name to the MySQL system_charset_info (UTF-8).
3399 @return pointer to the end of buf */
3400 char*
innobase_convert_name(char * buf,ulint buflen,const char * id,ulint idlen,THD * thd)3401 innobase_convert_name(
3402 /*==================*/
3403 	char*		buf,	/*!< out: buffer for converted identifier */
3404 	ulint		buflen,	/*!< in: length of buf, in bytes */
3405 	const char*	id,	/*!< in: table name to convert */
3406 	ulint		idlen,	/*!< in: length of id, in bytes */
3407 	THD*		thd)	/*!< in: MySQL connection thread, or NULL */
3408 {
3409 	char*		s	= buf;
3410 	const char*	bufend	= buf + buflen;
3411 
3412 	const char*	slash = (const char*) memchr(id, '/', idlen);
3413 
3414 	if (slash == NULL) {
3415 		return(innobase_convert_identifier(
3416 				buf, buflen, id, idlen, thd));
3417 	}
3418 
3419 	/* Print the database name and table name separately. */
3420 	s = innobase_convert_identifier(s, ulint(bufend - s),
3421 					id, ulint(slash - id), thd);
3422 	if (s < bufend) {
3423 		*s++ = '.';
3424 		s = innobase_convert_identifier(s, ulint(bufend - s),
3425 						slash + 1, idlen
3426 						- ulint(slash - id) - 1,
3427 						thd);
3428 	}
3429 
3430 	return(s);
3431 }
3432 
3433 /*****************************************************************//**
3434 A wrapper function of innobase_convert_name(), convert a table name
3435 to the MySQL system_charset_info (UTF-8) and quote it if needed.
3436 @return pointer to the end of buf */
3437 void
innobase_format_name(char * buf,ulint buflen,const char * name)3438 innobase_format_name(
3439 /*==================*/
3440 	char*		buf,	/*!< out: buffer for converted identifier */
3441 	ulint		buflen,	/*!< in: length of buf, in bytes */
3442 	const char*	name)	/*!< in: table name to format */
3443 {
3444 	const char*     bufend;
3445 
3446 	bufend = innobase_convert_name(buf, buflen, name, strlen(name), NULL);
3447 
3448 	ut_ad((ulint) (bufend - buf) < buflen);
3449 
3450 	buf[bufend - buf] = '\0';
3451 }
3452 
3453 /**********************************************************************//**
3454 Determines if the currently running transaction has been interrupted.
3455 @return true if interrupted */
3456 bool
trx_is_interrupted(const trx_t * trx)3457 trx_is_interrupted(
3458 /*===============*/
3459 	const trx_t*	trx)	/*!< in: transaction */
3460 {
3461 	return(trx && trx->mysql_thd && thd_kill_level(trx->mysql_thd));
3462 }
3463 
3464 /**************************************************************//**
3465 Resets some fields of a m_prebuilt struct. The template is used in fast
3466 retrieval of just those column values MySQL needs in its processing. */
3467 void
reset_template(void)3468 ha_innobase::reset_template(void)
3469 /*=============================*/
3470 {
3471 	ut_ad(m_prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
3472 	ut_ad(m_prebuilt->magic_n2 == m_prebuilt->magic_n);
3473 
3474 	/* Force table to be freed in close_thread_table(). */
3475 	DBUG_EXECUTE_IF("free_table_in_fts_query",
3476 		if (m_prebuilt->in_fts_query) {
3477                   table->mark_table_for_reopen();
3478 		}
3479 	);
3480 
3481 	m_prebuilt->keep_other_fields_on_keyread = false;
3482 	m_prebuilt->read_just_key = 0;
3483 	m_prebuilt->in_fts_query = 0;
3484 
3485 	/* Reset index condition pushdown state. */
3486 	if (m_prebuilt->idx_cond) {
3487 		m_prebuilt->idx_cond = NULL;
3488 		m_prebuilt->idx_cond_n_cols = 0;
3489 		/* Invalidate m_prebuilt->mysql_template
3490 		in ha_innobase::write_row(). */
3491 		m_prebuilt->template_type = ROW_MYSQL_NO_TEMPLATE;
3492 	}
3493 	if (m_prebuilt->pk_filter) {
3494 		m_prebuilt->pk_filter = NULL;
3495 		m_prebuilt->template_type = ROW_MYSQL_NO_TEMPLATE;
3496 	}
3497 }
3498 
3499 /*****************************************************************//**
3500 Call this when you have opened a new table handle in HANDLER, before you
3501 call index_read_map() etc. Actually, we can let the cursor stay open even
3502 over a transaction commit! Then you should call this before every operation,
3503 fetch next etc. This function inits the necessary things even after a
3504 transaction commit. */
3505 
3506 void
init_table_handle_for_HANDLER(void)3507 ha_innobase::init_table_handle_for_HANDLER(void)
3508 /*============================================*/
3509 {
3510 	/* If current thd does not yet have a trx struct, create one.
3511 	If the current handle does not yet have a m_prebuilt struct, create
3512 	one. Update the trx pointers in the m_prebuilt struct. Normally
3513 	this operation is done in external_lock. */
3514 
3515 	update_thd(ha_thd());
3516 
3517 	/* Initialize the m_prebuilt struct much like it would be inited in
3518 	external_lock */
3519 
3520 	innobase_srv_conc_force_exit_innodb(m_prebuilt->trx);
3521 
3522 	/* If the transaction is not started yet, start it */
3523 
3524 	trx_start_if_not_started_xa(m_prebuilt->trx, false);
3525 
3526 	/* Assign a read view if the transaction does not have it yet */
3527 
3528 	m_prebuilt->trx->read_view.open(m_prebuilt->trx);
3529 
3530 	innobase_register_trx(ht, m_user_thd, m_prebuilt->trx);
3531 
3532 	/* We did the necessary inits in this function, no need to repeat them
3533 	in row_search_for_mysql */
3534 
3535 	m_prebuilt->sql_stat_start = FALSE;
3536 
3537 	/* We let HANDLER always to do the reads as consistent reads, even
3538 	if the trx isolation level would have been specified as SERIALIZABLE */
3539 
3540 	m_prebuilt->select_lock_type = LOCK_NONE;
3541 	m_prebuilt->stored_select_lock_type = LOCK_NONE;
3542 
3543 	/* Always fetch all columns in the index record */
3544 
3545 	m_prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
3546 
3547 	/* We want always to fetch all columns in the whole row? Or do
3548 	we???? */
3549 
3550 	m_prebuilt->used_in_HANDLER = TRUE;
3551 
3552 	reset_template();
3553 }
3554 
3555 /*********************************************************************//**
3556 Free any resources that were allocated and return failure.
3557 @return always return 1 */
innodb_init_abort()3558 static int innodb_init_abort()
3559 {
3560 	DBUG_ENTER("innodb_init_abort");
3561 
3562 	if (fil_system.temp_space) {
3563 		fil_system.temp_space->close();
3564 	}
3565 
3566 	srv_sys_space.shutdown();
3567 	if (srv_tmp_space.get_sanity_check_status()) {
3568 		srv_tmp_space.delete_files();
3569 	}
3570 	srv_tmp_space.shutdown();
3571 
3572 #ifdef WITH_INNODB_DISALLOW_WRITES
3573 	os_event_destroy(srv_allow_writes_event);
3574 #endif /* WITH_INNODB_DISALLOW_WRITES */
3575 	DBUG_RETURN(1);
3576 }
3577 
3578 /** Return the minimum buffer pool size based on page size */
min_buffer_pool_size()3579 static inline ulint min_buffer_pool_size()
3580 {
3581   ulint s= (BUF_LRU_MIN_LEN + BUF_LRU_MIN_LEN / 4) * srv_page_size;
3582   /* buf_pool_chunk_size minimum is 1M, so round up to a multiple */
3583   ulint alignment= 1U << 20;
3584   return UT_CALC_ALIGN(s, alignment);
3585 }
3586 
3587 /** Validate the requested buffer pool size.  Also, reserve the necessary
3588 memory needed for buffer pool resize.
3589 @param[in]	thd	thread handle
3590 @param[in]	var	pointer to system variable
3591 @param[out]	save	immediate result for update function
3592 @param[in]	value	incoming string
3593 @return 0 on success, 1 on failure.
3594 */
3595 static
3596 int
3597 innodb_buffer_pool_size_validate(
3598 	THD*				thd,
3599 	struct st_mysql_sys_var*	var,
3600 	void*				save,
3601 	struct st_mysql_value*		value);
3602 
3603 /** Update the system variable innodb_buffer_pool_size using the "saved"
3604 value. This function is registered as a callback with MySQL.
3605 @param[in]	thd	thread handle
3606 @param[in]	var	pointer to system variable
3607 @param[out]	var_ptr	where the formal string goes
3608 @param[in]	save	immediate result from check function */
3609 static
3610 void
3611 innodb_buffer_pool_size_update(
3612 	THD*				thd,
3613 	struct st_mysql_sys_var*	var,
3614 	void*				var_ptr,
3615 	const void*			save);
3616 
3617 /* If the default value of innodb_buffer_pool_size is increased to be more than
3618 BUF_POOL_SIZE_THRESHOLD (srv/srv0start.cc), then srv_buf_pool_instances_default
3619 can be removed and 8 used instead. The problem with the current setup is that
3620 with 128MiB default buffer pool size and 8 instances by default we would emit
3621 a warning when no options are specified. */
3622 static MYSQL_SYSVAR_ULONGLONG(buffer_pool_size, innobase_buffer_pool_size,
3623   PLUGIN_VAR_RQCMDARG,
3624   "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
3625   innodb_buffer_pool_size_validate,
3626   innodb_buffer_pool_size_update,
3627   128ULL << 20,
3628   2ULL << 20,
3629   LLONG_MAX, 1024*1024L);
3630 
3631 /** Deprecation message about innodb_idle_flush_pct */
3632 static const char*	deprecated_idle_flush_pct
3633 	= "innodb_idle_flush_pct is DEPRECATED and has no effect.";
3634 
3635 static const char*	deprecated_innodb_checksum_algorithm
3636 	= "Setting innodb_checksum_algorithm to values other than"
3637 	" crc32, full_crc32, strict_crc32 or strict_full_crc32"
3638 	" is UNSAFE and DEPRECATED."
3639 	" These deprecated values will be disallowed in MariaDB 10.6.";
3640 
3641 static ulong innodb_idle_flush_pct;
3642 
3643 /** If applicable, emit a message that log checksums cannot be disabled.
3644 @param[in,out]	thd	client session, or NULL if at startup
3645 @param[in]	check	whether redo log block checksums are enabled
3646 @return whether redo log block checksums are enabled */
3647 static inline
3648 bool
innodb_log_checksums_func_update(THD * thd,bool check)3649 innodb_log_checksums_func_update(THD* thd, bool check)
3650 {
3651 	static const char msg[] = "innodb_log_checksums is deprecated"
3652 		" and has no effect outside recovery";
3653 
3654 	ut_ad(!thd == !srv_was_started);
3655 
3656 	if (!check) {
3657 		if (thd) {
3658 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
3659 					    HA_ERR_UNSUPPORTED, msg);
3660 			check = true;
3661 		} else {
3662 			sql_print_warning(msg);
3663 		}
3664 	}
3665 
3666 	return(check);
3667 }
3668 
innodb_checksum_algorithm_update(THD * thd,st_mysql_sys_var *,void *,const void * save)3669 static void innodb_checksum_algorithm_update(THD *thd, st_mysql_sys_var*,
3670                                              void *, const void *save)
3671 {
3672   srv_checksum_algorithm= *static_cast<const ulong*>(save);
3673   switch (srv_checksum_algorithm) {
3674   case SRV_CHECKSUM_ALGORITHM_CRC32:
3675   case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
3676   case SRV_CHECKSUM_ALGORITHM_FULL_CRC32:
3677   case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
3678     break;
3679   default:
3680     push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
3681                         HA_ERR_UNSUPPORTED,
3682                         deprecated_innodb_checksum_algorithm);
3683   }
3684 }
3685 
3686 /****************************************************************//**
3687 Gives the file extension of an InnoDB single-table tablespace. */
3688 static const char* ha_innobase_exts[] = {
3689 	dot_ext[IBD],
3690 	dot_ext[ISL],
3691 	NullS
3692 };
3693 
3694 /** Determine if system-versioned data was modified by the transaction.
3695 @param[in,out]	thd	current session
3696 @param[out]	trx_id	transaction start ID
3697 @return	transaction commit ID
3698 @retval	0	if no system-versioned data was affected by the transaction */
innodb_prepare_commit_versioned(THD * thd,ulonglong * trx_id)3699 static ulonglong innodb_prepare_commit_versioned(THD* thd, ulonglong *trx_id)
3700 {
3701 	if (const trx_t* trx = thd_to_trx(thd)) {
3702 		*trx_id = trx->id;
3703 
3704 		for (trx_mod_tables_t::const_iterator t
3705 			     = trx->mod_tables.begin();
3706 		     t != trx->mod_tables.end(); t++) {
3707 			if (t->second.is_versioned()) {
3708 				DBUG_ASSERT(t->first->versioned_by_id());
3709 				DBUG_ASSERT(trx->rsegs.m_redo.rseg);
3710 
3711 				return trx_sys.get_new_trx_id();
3712 			}
3713 		}
3714 
3715 		return 0;
3716 	}
3717 
3718 	*trx_id = 0;
3719 	return 0;
3720 }
3721 
3722 /** Initialize and normalize innodb_buffer_pool_size. */
innodb_buffer_pool_size_init()3723 static void innodb_buffer_pool_size_init()
3724 {
3725 	if (srv_buf_pool_size >= BUF_POOL_SIZE_THRESHOLD) {
3726 
3727 		if (srv_buf_pool_instances == srv_buf_pool_instances_default) {
3728 #if defined(_WIN32) && !defined(_WIN64)
3729 			/* Do not allocate too large of a buffer pool on
3730 			Windows 32-bit systems, which can have trouble
3731 			allocating larger single contiguous memory blocks. */
3732 			srv_buf_pool_size = ulint(
3733 				ut_uint64_align_up(srv_buf_pool_size,
3734 						   srv_buf_pool_chunk_unit));
3735 			srv_buf_pool_instances = std::min<ulong>(
3736 				MAX_BUFFER_POOLS,
3737 				ulong(srv_buf_pool_size
3738 				      / srv_buf_pool_chunk_unit));
3739 #else /* defined(_WIN32) && !defined(_WIN64) */
3740 			/* Default to 8 instances when size > 1GB. */
3741 			srv_buf_pool_instances = 8;
3742 #endif /* defined(_WIN32) && !defined(_WIN64) */
3743 		}
3744 	} else {
3745 		/* If buffer pool is less than 1 GiB, assume fewer
3746 		threads. Also use only one buffer pool instance. */
3747 		if (srv_buf_pool_instances != srv_buf_pool_instances_default
3748 		    && srv_buf_pool_instances != 1) {
3749 			/* We can't distinguish whether the user has explicitly
3750 			started mysqld with --innodb-buffer-pool-instances=0,
3751 			(srv_buf_pool_instances_default is 0) or has not
3752 			specified that option at all. Thus we have the
3753 			limitation that if the user started with =0, we
3754 			will not emit a warning here, but we should actually
3755 			do so. */
3756 			ib::info()
3757 				<< "Adjusting innodb_buffer_pool_instances"
3758 				" from " << srv_buf_pool_instances << " to 1"
3759 				" since innodb_buffer_pool_size is less than "
3760 				<< BUF_POOL_SIZE_THRESHOLD / (1024 * 1024)
3761 				<< " MiB";
3762 		}
3763 
3764 		srv_buf_pool_instances = 1;
3765 	}
3766 
3767 	if (srv_buf_pool_chunk_unit * srv_buf_pool_instances
3768 	    > srv_buf_pool_size) {
3769 		/* Size unit of buffer pool is larger than srv_buf_pool_size.
3770 		adjust srv_buf_pool_chunk_unit for srv_buf_pool_size. */
3771 		srv_buf_pool_chunk_unit
3772 			= static_cast<ulong>(srv_buf_pool_size)
3773 			  / srv_buf_pool_instances;
3774 		if (srv_buf_pool_size % srv_buf_pool_instances != 0) {
3775 			++srv_buf_pool_chunk_unit;
3776 		}
3777 	}
3778 
3779 	srv_buf_pool_size = buf_pool_size_align(srv_buf_pool_size);
3780 	innobase_buffer_pool_size = srv_buf_pool_size;
3781 }
3782 
3783 /** Initialize, validate and normalize the InnoDB startup parameters.
3784 @return failure code
3785 @retval 0 on success
3786 @retval HA_ERR_OUT_OF_MEM	when out of memory
3787 @retval HA_ERR_INITIALIZATION	when some parameters are out of range */
innodb_init_params()3788 static int innodb_init_params()
3789 {
3790 	DBUG_ENTER("innodb_init_params");
3791 
3792 	static char	current_dir[3];
3793 	char		*default_path;
3794 	ulong		num_pll_degree;
3795 
3796 	if (innodb_large_prefix || innodb_file_format) {
3797 		const char* p = innodb_file_format
3798 			? "file_format"
3799 			: "large_prefix";
3800 		sql_print_warning("The parameter innodb_%s is deprecated"
3801 				  " and has no effect."
3802 				  " It may be removed in future releases."
3803 				  " See https://mariadb.com/kb/en/library/"
3804 				  "xtradbinnodb-file-format/", p);
3805 	}
3806 
3807 	/* Check that values don't overflow on 32-bit systems. */
3808 	if (sizeof(ulint) == 4) {
3809 		if (innobase_buffer_pool_size > UINT_MAX32) {
3810 			sql_print_error(
3811 				"innodb_buffer_pool_size can't be over 4GB"
3812 				" on 32-bit systems");
3813 			DBUG_RETURN(HA_ERR_OUT_OF_MEM);
3814 		}
3815 	}
3816 
3817 	/* The buffer pool needs to be able to accommodate enough many
3818 	pages, even for larger pages */
3819 	MYSQL_SYSVAR_NAME(buffer_pool_size).min_val= min_buffer_pool_size();
3820 
3821 	if (innobase_buffer_pool_size < MYSQL_SYSVAR_NAME(buffer_pool_size).min_val) {
3822 		ib::error() << "innodb_page_size="
3823 			<< srv_page_size << " requires "
3824 			<< "innodb_buffer_pool_size >= "
3825 			<< (MYSQL_SYSVAR_NAME(buffer_pool_size).min_val >> 20)
3826 			<< "MiB current " << (innobase_buffer_pool_size >> 20)
3827 			<< "MiB";
3828 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3829 	}
3830 
3831 	if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS) {
3832 		ib::warn() << "The parameter innodb_lock_schedule_algorithm"
3833 			" is deprecated, and the setting"
3834 			" innodb_lock_schedule_algorithm=vats"
3835 			" may cause corruption. The parameter may be removed"
3836 			" in future releases.";
3837 
3838 #ifdef WITH_WSREP
3839 		/* Currently, Galera does not support VATS lock schedule algorithm. */
3840 		if (global_system_variables.wsrep_on) {
3841 			ib::info() << "For Galera, using innodb_lock_schedule_algorithm=fcfs";
3842 			innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;
3843 		}
3844 #endif /* WITH_WSREP */
3845 	}
3846 
3847 #ifdef WITH_WSREP
3848 	/* Print deprecation info if xtrabackup is used for SST method */
3849 	if (global_system_variables.wsrep_on
3850 	    && wsrep_sst_method
3851 	    && (!strcmp(wsrep_sst_method, "xtrabackup")
3852 	        || !strcmp(wsrep_sst_method, "xtrabackup-v2"))) {
3853 		ib::info() << "Galera SST method xtrabackup is deprecated and the "
3854 			" support for it may be removed in future releases.";
3855 	}
3856 #endif /* WITH_WSREP */
3857 
3858 #ifndef HAVE_LZ4
3859 	if (innodb_compression_algorithm == PAGE_LZ4_ALGORITHM) {
3860 		sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3861 				"InnoDB: liblz4 is not installed. \n",
3862 				innodb_compression_algorithm);
3863 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3864 	}
3865 #endif
3866 
3867 #ifndef HAVE_LZO
3868 	if (innodb_compression_algorithm == PAGE_LZO_ALGORITHM) {
3869 		sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3870 				"InnoDB: liblzo is not installed. \n",
3871 				innodb_compression_algorithm);
3872 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3873 	}
3874 #endif
3875 
3876 #ifndef HAVE_LZMA
3877 	if (innodb_compression_algorithm == PAGE_LZMA_ALGORITHM) {
3878 		sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3879 				"InnoDB: liblzma is not installed. \n",
3880 				innodb_compression_algorithm);
3881 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3882 	}
3883 #endif
3884 
3885 #ifndef HAVE_BZIP2
3886 	if (innodb_compression_algorithm == PAGE_BZIP2_ALGORITHM) {
3887 		sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3888 				"InnoDB: libbz2 is not installed. \n",
3889 				innodb_compression_algorithm);
3890 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3891 	}
3892 #endif
3893 
3894 #ifndef HAVE_SNAPPY
3895 	if (innodb_compression_algorithm == PAGE_SNAPPY_ALGORITHM) {
3896 		sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
3897 				"InnoDB: libsnappy is not installed. \n",
3898 				innodb_compression_algorithm);
3899 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3900 	}
3901 #endif
3902 
3903 	if ((srv_encrypt_tables || srv_encrypt_log
3904 	     || innodb_encrypt_temporary_tables)
3905 	     && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) {
3906 		sql_print_error("InnoDB: cannot enable encryption, "
3907 				"encryption plugin is not available");
3908 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3909 	}
3910 
3911 #ifdef _WIN32
3912 	if (!is_filename_allowed(srv_buf_dump_filename,
3913 				 strlen(srv_buf_dump_filename), FALSE)) {
3914 		sql_print_error("InnoDB: innodb_buffer_pool_filename"
3915 			" cannot have colon (:) in the file name.");
3916 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3917 	}
3918 #endif
3919 
3920 	/* First calculate the default path for innodb_data_home_dir etc.,
3921 	in case the user has not given any value.
3922 
3923 	Note that when using the embedded server, the datadirectory is not
3924 	necessarily the current directory of this program. */
3925 
3926 	if (mysqld_embedded) {
3927 		default_path = mysql_real_data_home;
3928 	} else {
3929 		/* It's better to use current lib, to keep paths short */
3930 		current_dir[0] = FN_CURLIB;
3931 		current_dir[1] = FN_LIBCHAR;
3932 		current_dir[2] = 0;
3933 		default_path = current_dir;
3934 	}
3935 
3936 	ut_a(default_path);
3937 
3938 	fil_path_to_mysql_datadir = default_path;
3939 
3940 	/* Set InnoDB initialization parameters according to the values
3941 	read from MySQL .cnf file */
3942 
3943 	/* The default dir for data files is the datadir of MySQL */
3944 
3945 	srv_data_home = innobase_data_home_dir
3946 		? innobase_data_home_dir : default_path;
3947 #ifdef WITH_WSREP
3948 	/* If we use the wsrep API, then we need to tell the server
3949 	the path to the data files (for passing it to the SST scripts): */
3950 	wsrep_set_data_home_dir(srv_data_home);
3951 #endif /* WITH_WSREP */
3952 
3953 
3954 	/*--------------- Shared tablespaces -------------------------*/
3955 
3956 	/* Check that the value of system variable innodb_page_size was
3957 	set correctly.  Its value was put into srv_page_size. If valid,
3958 	return the associated srv_page_size_shift. */
3959 	srv_page_size_shift = innodb_page_size_validate(srv_page_size);
3960 	if (!srv_page_size_shift) {
3961 		sql_print_error("InnoDB: Invalid page size=%lu.\n",
3962 				srv_page_size);
3963 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3964 	}
3965 
3966 	srv_sys_space.set_space_id(TRX_SYS_SPACE);
3967 
3968 	switch (srv_checksum_algorithm) {
3969 	case SRV_CHECKSUM_ALGORITHM_FULL_CRC32:
3970 	case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
3971 		srv_sys_space.set_flags(FSP_FLAGS_FCRC32_MASK_MARKER
3972 					| FSP_FLAGS_FCRC32_PAGE_SSIZE());
3973 		break;
3974 	default:
3975 		srv_sys_space.set_flags(FSP_FLAGS_PAGE_SSIZE());
3976 	}
3977 
3978 	srv_sys_space.set_name("innodb_system");
3979 	srv_sys_space.set_path(srv_data_home);
3980 
3981 	/* Supports raw devices */
3982 	if (!srv_sys_space.parse_params(innobase_data_file_path, true)) {
3983 		ib::error() << "Unable to parse innodb_data_file_path="
3984 			    << innobase_data_file_path;
3985 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3986 	}
3987 
3988 	srv_tmp_space.set_name("innodb_temporary");
3989 	srv_tmp_space.set_path(srv_data_home);
3990 
3991 	/* Temporary tablespace is in full crc32 format. */
3992 	srv_tmp_space.set_flags(FSP_FLAGS_FCRC32_MASK_MARKER
3993 				| FSP_FLAGS_FCRC32_PAGE_SSIZE());
3994 
3995 	if (!srv_tmp_space.parse_params(innobase_temp_data_file_path, false)) {
3996 		ib::error() << "Unable to parse innodb_temp_data_file_path="
3997 			    << innobase_temp_data_file_path;
3998 		DBUG_RETURN(HA_ERR_INITIALIZATION);
3999 	}
4000 
4001 	/* Perform all sanity check before we take action of deleting files*/
4002 	if (srv_sys_space.intersection(&srv_tmp_space)) {
4003 		sql_print_error("%s and %s file names seem to be the same.",
4004 			srv_tmp_space.name(), srv_sys_space.name());
4005 		DBUG_RETURN(HA_ERR_INITIALIZATION);
4006 	}
4007 
4008 	srv_sys_space.normalize_size();
4009 	srv_tmp_space.normalize_size();
4010 
4011 	/* ------------ UNDO tablespaces files ---------------------*/
4012 	if (!srv_undo_dir) {
4013 		srv_undo_dir = default_path;
4014 	}
4015 
4016 	os_normalize_path(srv_undo_dir);
4017 
4018 	if (strchr(srv_undo_dir, ';')) {
4019 		sql_print_error("syntax error in innodb_undo_directory");
4020 		DBUG_RETURN(HA_ERR_INITIALIZATION);
4021 	}
4022 
4023 	/* -------------- All log files ---------------------------*/
4024 
4025 	/* The default dir for log files is the datadir of MySQL */
4026 
4027 	if (!srv_log_group_home_dir) {
4028 		srv_log_group_home_dir = default_path;
4029 	}
4030 
4031 	os_normalize_path(srv_log_group_home_dir);
4032 
4033 	if (strchr(srv_log_group_home_dir, ';')) {
4034 		sql_print_error("syntax error in innodb_log_group_home_dir");
4035 		DBUG_RETURN(HA_ERR_INITIALIZATION);
4036 	}
4037 
4038 	if (srv_n_log_files * srv_log_file_size >= log_group_max_size) {
4039 		/* Log group size is limited by the size of page number.
4040 		Remove this limitation when fil_io() is not used for
4041 		recovery log io. */
4042 		ib::error() << "Combined size of log files must be < "
4043 			<< log_group_max_size;
4044 		DBUG_RETURN(HA_ERR_INITIALIZATION);
4045 	}
4046 
4047 	DBUG_ASSERT(innodb_change_buffering <= IBUF_USE_ALL);
4048 
4049 	/* Check that interdependent parameters have sane values. */
4050 	if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) {
4051 		sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm"
4052 				  " cannot be set higher than"
4053 				  " innodb_max_dirty_pages_pct.\n"
4054 				  "InnoDB: Setting"
4055 				  " innodb_max_dirty_pages_pct_lwm to %lf\n",
4056 				  srv_max_buf_pool_modified_pct);
4057 
4058 		srv_max_dirty_pages_pct_lwm = srv_max_buf_pool_modified_pct;
4059 	}
4060 
4061 	if (srv_max_io_capacity == SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT) {
4062 
4063 		if (srv_io_capacity >= SRV_MAX_IO_CAPACITY_LIMIT / 2) {
4064 			/* Avoid overflow. */
4065 			srv_max_io_capacity = SRV_MAX_IO_CAPACITY_LIMIT;
4066 		} else {
4067 			/* The user has not set the value. We should
4068 			set it based on innodb_io_capacity. */
4069 			srv_max_io_capacity =
4070 				ut_max(2 * srv_io_capacity, 2000UL);
4071 		}
4072 
4073 	} else if (srv_max_io_capacity < srv_io_capacity) {
4074 		sql_print_warning("InnoDB: innodb_io_capacity"
4075 				  " cannot be set higher than"
4076 				  " innodb_io_capacity_max."
4077 				  "Setting innodb_io_capacity=%lu",
4078 				  srv_max_io_capacity);
4079 
4080 		srv_io_capacity = srv_max_io_capacity;
4081 	}
4082 
4083 	if (UNIV_PAGE_SIZE_DEF != srv_page_size) {
4084 		ib::info() << "innodb_page_size=" << srv_page_size;
4085 
4086 		srv_max_undo_log_size = std::max(
4087 			srv_max_undo_log_size,
4088 			ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
4089 			<< srv_page_size_shift);
4090 	}
4091 
4092 	if (srv_log_write_ahead_size > srv_page_size) {
4093 		srv_log_write_ahead_size = srv_page_size;
4094 	} else {
4095 		ulong	srv_log_write_ahead_size_tmp = OS_FILE_LOG_BLOCK_SIZE;
4096 
4097 		while (srv_log_write_ahead_size_tmp
4098 		       < srv_log_write_ahead_size) {
4099 			srv_log_write_ahead_size_tmp
4100 				= srv_log_write_ahead_size_tmp * 2;
4101 		}
4102 		if (srv_log_write_ahead_size_tmp
4103 		    != srv_log_write_ahead_size) {
4104 			srv_log_write_ahead_size
4105 				= srv_log_write_ahead_size_tmp / 2;
4106 		}
4107 	}
4108 
4109 	srv_buf_pool_size = ulint(innobase_buffer_pool_size);
4110 
4111 	if (!innobase_use_checksums) {
4112 		ib::warn() << "Setting innodb_checksums to OFF is DEPRECATED."
4113 			" This option was removed in MariaDB 10.5.";
4114 		srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_NONE;
4115 	} else {
4116 		switch (srv_checksum_algorithm) {
4117 		case SRV_CHECKSUM_ALGORITHM_CRC32:
4118 		case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
4119 		case SRV_CHECKSUM_ALGORITHM_FULL_CRC32:
4120 		case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
4121 			break;
4122 		default:
4123 			ib::warn() << deprecated_innodb_checksum_algorithm;
4124 		}
4125 	}
4126 
4127 	innodb_log_checksums = innodb_log_checksums_func_update(
4128 		NULL, innodb_log_checksums);
4129 
4130 	row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout;
4131 
4132 	srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
4133 	if (innobase_locks_unsafe_for_binlog) {
4134 		ib::warn() << "Using innodb_locks_unsafe_for_binlog is"
4135 			" DEPRECATED. This option may be removed in future"
4136 			" releases. Please use READ COMMITTED transaction"
4137 			" isolation level instead; " << SET_TRANSACTION_MSG;
4138 	}
4139 
4140 	if (innobase_open_files < 10) {
4141 		innobase_open_files = 300;
4142 		if (srv_file_per_table && tc_size > 300 && tc_size < open_files_limit) {
4143 			innobase_open_files = tc_size;
4144 		}
4145 	}
4146 
4147 	if (innobase_open_files > open_files_limit) {
4148 		ib::warn() << "innodb_open_files " << innobase_open_files
4149 			   << " should not be greater"
4150 			   << " than the open_files_limit " << open_files_limit;
4151 		if (innobase_open_files > tc_size) {
4152 			innobase_open_files = tc_size;
4153 		}
4154 	}
4155 
4156 	srv_max_n_open_files = innobase_open_files;
4157 	srv_innodb_status = (ibool) innobase_create_status_file;
4158 
4159 	srv_print_verbose_log = mysqld_embedded ? 0 : 1;
4160 
4161 	/* Round up fts_sort_pll_degree to nearest power of 2 number */
4162 	for (num_pll_degree = 1;
4163 	     num_pll_degree < fts_sort_pll_degree;
4164 	     num_pll_degree <<= 1) {
4165 
4166 		/* No op */
4167 	}
4168 
4169 	fts_sort_pll_degree = num_pll_degree;
4170 
4171 	/* Store the default charset-collation number of this MySQL
4172 	installation */
4173 
4174 	data_mysql_default_charset_coll = (ulint) default_charset_info->number;
4175 
4176 	innobase_commit_concurrency_init_default();
4177 
4178 	if (innodb_idle_flush_pct != 100) {
4179 		ib::warn() << deprecated_idle_flush_pct;
4180 	}
4181 
4182 #ifndef _WIN32
4183 	if (srv_use_atomic_writes && my_may_have_atomic_write) {
4184 		/*
4185                   Force O_DIRECT on Unixes (on Windows writes are always
4186                   unbuffered)
4187                 */
4188 		switch (innodb_flush_method) {
4189 		case SRV_O_DIRECT:
4190 		case SRV_O_DIRECT_NO_FSYNC:
4191 			break;
4192 		default:
4193 			innodb_flush_method = SRV_O_DIRECT;
4194 			fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n");
4195 		}
4196 	}
4197 #endif
4198 
4199 	if (srv_read_only_mode) {
4200 		ib::info() << "Started in read only mode";
4201 		srv_use_doublewrite_buf = FALSE;
4202 	}
4203 
4204 #ifdef LINUX_NATIVE_AIO
4205 	if (srv_use_native_aio) {
4206 		ib::info() << "Using Linux native AIO";
4207 	}
4208 #elif !defined _WIN32
4209 	/* Currently native AIO is supported only on windows and linux
4210 	and that also when the support is compiled in. In all other
4211 	cases, we ignore the setting of innodb_use_native_aio. */
4212 	srv_use_native_aio = FALSE;
4213 #endif
4214 
4215 #ifndef _WIN32
4216 	ut_ad(innodb_flush_method <= SRV_O_DIRECT_NO_FSYNC);
4217 #else
4218 	switch (innodb_flush_method) {
4219 	case SRV_ALL_O_DIRECT_FSYNC + 1 /* "async_unbuffered"="unbuffered" */:
4220 		innodb_flush_method = SRV_ALL_O_DIRECT_FSYNC;
4221 		break;
4222 	case SRV_ALL_O_DIRECT_FSYNC + 2 /* "normal"="fsync" */:
4223 		innodb_flush_method = SRV_FSYNC;
4224 		break;
4225 	default:
4226 		ut_ad(innodb_flush_method <= SRV_ALL_O_DIRECT_FSYNC);
4227 	}
4228 #endif
4229 	srv_file_flush_method = srv_flush_t(innodb_flush_method);
4230 
4231 	innodb_buffer_pool_size_init();
4232 
4233 	if (srv_n_page_cleaners > srv_buf_pool_instances) {
4234 		/* limit of page_cleaner parallelizability
4235 		is number of buffer pool instances. */
4236 		srv_n_page_cleaners = srv_buf_pool_instances;
4237 	}
4238 
4239 	srv_lock_table_size = 5 * (srv_buf_pool_size >> srv_page_size_shift);
4240 	DBUG_RETURN(0);
4241 }
4242 
4243 /** Initialize the InnoDB storage engine plugin.
4244 @param[in,out]	p	InnoDB handlerton
4245 @return error code
4246 @retval 0 on success */
innodb_init(void * p)4247 static int innodb_init(void* p)
4248 {
4249 	DBUG_ENTER("innodb_init");
4250 	handlerton* innobase_hton= static_cast<handlerton*>(p);
4251 	innodb_hton_ptr = innobase_hton;
4252 
4253 	innobase_hton->state = SHOW_OPTION_YES;
4254 	innobase_hton->db_type = DB_TYPE_INNODB;
4255 	innobase_hton->savepoint_offset = sizeof(trx_named_savept_t);
4256 	innobase_hton->close_connection = innobase_close_connection;
4257 	innobase_hton->kill_query = innobase_kill_query;
4258 	innobase_hton->savepoint_set = innobase_savepoint;
4259 	innobase_hton->savepoint_rollback = innobase_rollback_to_savepoint;
4260 
4261 	innobase_hton->savepoint_rollback_can_release_mdl =
4262 				innobase_rollback_to_savepoint_can_release_mdl;
4263 
4264 	innobase_hton->savepoint_release = innobase_release_savepoint;
4265 	innobase_hton->prepare_ordered= NULL;
4266 	innobase_hton->commit_ordered= innobase_commit_ordered;
4267 	innobase_hton->commit = innobase_commit;
4268 	innobase_hton->rollback = innobase_rollback;
4269 	innobase_hton->prepare = innobase_xa_prepare;
4270 	innobase_hton->recover = innobase_xa_recover;
4271 	innobase_hton->commit_by_xid = innobase_commit_by_xid;
4272 	innobase_hton->rollback_by_xid = innobase_rollback_by_xid;
4273 	innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
4274 	innobase_hton->create = innobase_create_handler;
4275 
4276 	innobase_hton->drop_database = innobase_drop_database;
4277 	innobase_hton->panic = innobase_end;
4278 
4279 	innobase_hton->start_consistent_snapshot =
4280 		innobase_start_trx_and_assign_read_view;
4281 
4282 	innobase_hton->flush_logs = innobase_flush_logs;
4283 	innobase_hton->show_status = innobase_show_status;
4284 	innobase_hton->flags =
4285 		HTON_SUPPORTS_EXTENDED_KEYS | HTON_SUPPORTS_FOREIGN_KEYS |
4286 		HTON_NATIVE_SYS_VERSIONING |
4287 		HTON_WSREP_REPLICATION |
4288 		HTON_REQUIRES_CLOSE_AFTER_TRUNCATE;
4289 
4290 #ifdef WITH_WSREP
4291 	innobase_hton->abort_transaction=wsrep_abort_transaction;
4292 	innobase_hton->set_checkpoint=innobase_wsrep_set_checkpoint;
4293 	innobase_hton->get_checkpoint=innobase_wsrep_get_checkpoint;
4294 #endif /* WITH_WSREP */
4295 
4296 	innobase_hton->tablefile_extensions = ha_innobase_exts;
4297 	innobase_hton->table_options = innodb_table_option_list;
4298 
4299 	/* System Versioning */
4300 	innobase_hton->prepare_commit_versioned
4301 		= innodb_prepare_commit_versioned;
4302 
4303 	innodb_remember_check_sysvar_funcs();
4304 
4305 	compile_time_assert(DATA_MYSQL_TRUE_VARCHAR == MYSQL_TYPE_VARCHAR);
4306 
4307 #ifndef DBUG_OFF
4308 	static const char	test_filename[] = "-@";
4309 	char			test_tablename[sizeof test_filename
4310 				+ sizeof(srv_mysql50_table_name_prefix) - 1];
4311 	DBUG_ASSERT(sizeof test_tablename - 1
4312 		    == filename_to_tablename(test_filename,
4313 					     test_tablename,
4314 					     sizeof test_tablename, true));
4315 	DBUG_ASSERT(!strncmp(test_tablename,
4316 			     srv_mysql50_table_name_prefix,
4317 			     sizeof srv_mysql50_table_name_prefix - 1));
4318 	DBUG_ASSERT(!strcmp(test_tablename
4319 			    + sizeof srv_mysql50_table_name_prefix - 1,
4320 			    test_filename));
4321 #endif /* DBUG_OFF */
4322 
4323 	os_file_set_umask(my_umask);
4324 
4325 	/* Setup the memory alloc/free tracing mechanisms before calling
4326 	any functions that could possibly allocate memory. */
4327 	ut_new_boot();
4328 
4329 	if (int error = innodb_init_params()) {
4330 		DBUG_RETURN(error);
4331 	}
4332 
4333 	/* After this point, error handling has to use
4334 	innodb_init_abort(). */
4335 
4336 #ifdef HAVE_PSI_INTERFACE
4337 	/* Register keys with MySQL performance schema */
4338 	int	count;
4339 
4340 	count = array_elements(all_pthread_mutexes);
4341 	mysql_mutex_register("innodb", all_pthread_mutexes, count);
4342 
4343 # ifdef UNIV_PFS_MUTEX
4344 	count = array_elements(all_innodb_mutexes);
4345 	mysql_mutex_register("innodb", all_innodb_mutexes, count);
4346 # endif /* UNIV_PFS_MUTEX */
4347 
4348 # ifdef UNIV_PFS_RWLOCK
4349 	count = array_elements(all_innodb_rwlocks);
4350 	mysql_rwlock_register("innodb", all_innodb_rwlocks, count);
4351 # endif /* UNIV_PFS_MUTEX */
4352 
4353 # ifdef UNIV_PFS_THREAD
4354 	count = array_elements(all_innodb_threads);
4355 	mysql_thread_register("innodb", all_innodb_threads, count);
4356 # endif /* UNIV_PFS_THREAD */
4357 
4358 # ifdef UNIV_PFS_IO
4359 	count = array_elements(all_innodb_files);
4360 	mysql_file_register("innodb", all_innodb_files, count);
4361 # endif /* UNIV_PFS_IO */
4362 
4363 	count = array_elements(all_innodb_conds);
4364 	mysql_cond_register("innodb", all_innodb_conds, count);
4365 #endif /* HAVE_PSI_INTERFACE */
4366 
4367 	bool	create_new_db = false;
4368 
4369 	/* Check whether the data files exist. */
4370 	dberr_t	err = srv_sys_space.check_file_spec(&create_new_db, 5U << 20);
4371 
4372 	if (err != DB_SUCCESS) {
4373 		DBUG_RETURN(innodb_init_abort());
4374 	}
4375 
4376 	err = srv_start(create_new_db);
4377 
4378 	if (err != DB_SUCCESS) {
4379 		innodb_shutdown();
4380 		DBUG_RETURN(innodb_init_abort());
4381 	} else if (!srv_read_only_mode) {
4382 		mysql_thread_create(thd_destructor_thread_key,
4383 				    &thd_destructor_thread,
4384 				    NULL, thd_destructor_proxy, NULL);
4385 		while (!srv_running.load(std::memory_order_relaxed))
4386 			os_thread_sleep(20);
4387 	}
4388 
4389 	srv_was_started = true;
4390 	innodb_params_adjust();
4391 
4392 	innobase_old_blocks_pct = static_cast<uint>(
4393 		buf_LRU_old_ratio_update(innobase_old_blocks_pct, TRUE));
4394 
4395 	ibuf_max_size_update(srv_change_buffer_max_size);
4396 
4397 	mysql_mutex_init(commit_cond_mutex_key,
4398 			 &commit_cond_m, MY_MUTEX_INIT_FAST);
4399 	mysql_cond_init(commit_cond_key, &commit_cond, 0);
4400 	mysql_mutex_init(pending_checkpoint_mutex_key,
4401 			 &pending_checkpoint_mutex,
4402 			 MY_MUTEX_INIT_FAST);
4403 #ifdef MYSQL_DYNAMIC_PLUGIN
4404 	if (innobase_hton != p) {
4405 		innobase_hton = reinterpret_cast<handlerton*>(p);
4406 		*innobase_hton = *innodb_hton_ptr;
4407 	}
4408 #endif /* MYSQL_DYNAMIC_PLUGIN */
4409 
4410 	memset(innodb_counter_value, 0, sizeof innodb_counter_value);
4411 
4412 	/* Do this as late as possible so server is fully starts up,
4413 	since  we might get some initial stats if user choose to turn
4414 	on some counters from start up */
4415 	if (innobase_enable_monitor_counter) {
4416 		innodb_enable_monitor_at_startup(
4417 			innobase_enable_monitor_counter);
4418 	}
4419 
4420 	/* Turn on monitor counters that are default on */
4421 	srv_mon_default_on();
4422 
4423 	/* Unit Tests */
4424 #ifdef UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
4425 	unit_test_os_file_get_parent_dir();
4426 #endif /* UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR */
4427 
4428 #ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
4429 	test_make_filepath();
4430 #endif /*UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
4431 
4432 #ifdef UNIV_ENABLE_DICT_STATS_TEST
4433 	test_dict_stats_all();
4434 #endif /*UNIV_ENABLE_DICT_STATS_TEST */
4435 
4436 #ifdef UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT
4437 # ifdef HAVE_UT_CHRONO_T
4438 	test_row_raw_format_int();
4439 # endif /* HAVE_UT_CHRONO_T */
4440 #endif /* UNIV_ENABLE_UNIT_TEST_ROW_RAW_FORMAT_INT */
4441 
4442 	DBUG_RETURN(0);
4443 }
4444 
4445 /** Shut down the InnoDB storage engine.
4446 @return	0 */
4447 static
4448 int
innobase_end(handlerton *,ha_panic_function)4449 innobase_end(handlerton*, ha_panic_function)
4450 {
4451 	DBUG_ENTER("innobase_end");
4452 
4453 	if (srv_was_started) {
4454 		THD *thd= current_thd;
4455 		if (thd) { // may be UNINSTALL PLUGIN statement
4456 		 	if (trx_t* trx = thd_to_trx(thd)) {
4457 				trx->free();
4458 		 	}
4459 		}
4460 
4461 		if (auto r = srv_running.load(std::memory_order_relaxed)) {
4462 			ut_ad(!srv_read_only_mode);
4463 			if (!abort_loop) {
4464 				// may be UNINSTALL PLUGIN statement
4465 				mysql_mutex_lock(r->current_mutex);
4466 				r->abort = 1;
4467 				mysql_cond_broadcast(r->current_cond);
4468 				mysql_mutex_unlock(r->current_mutex);
4469 			}
4470 			pthread_join(thd_destructor_thread, NULL);
4471 		}
4472 
4473 		innodb_shutdown();
4474 
4475 		mysql_mutex_destroy(&commit_cond_m);
4476 		mysql_cond_destroy(&commit_cond);
4477 		mysql_mutex_destroy(&pending_checkpoint_mutex);
4478 	}
4479 
4480 	DBUG_RETURN(0);
4481 }
4482 
4483 /*****************************************************************//**
4484 Commits a transaction in an InnoDB database. */
4485 void
innobase_commit_low(trx_t * trx)4486 innobase_commit_low(
4487 /*================*/
4488 	trx_t*	trx)	/*!< in: transaction handle */
4489 {
4490 #ifdef WITH_WSREP
4491 	const char* tmp = 0;
4492 	const bool is_wsrep = trx->is_wsrep();
4493 	THD* thd = trx->mysql_thd;
4494 	if (is_wsrep) {
4495 		tmp = thd_proc_info(thd, "innobase_commit_low()");
4496 	}
4497 #endif /* WITH_WSREP */
4498 	if (trx_is_started(trx)) {
4499 		trx_commit_for_mysql(trx);
4500 	} else {
4501 		trx->will_lock = false;
4502 #ifdef WITH_WSREP
4503 		trx->wsrep = false;
4504 #endif /* WITH_WSREP */
4505 	}
4506 
4507 #ifdef WITH_WSREP
4508 	if (is_wsrep) {
4509 		thd_proc_info(thd, tmp);
4510 	}
4511 #endif /* WITH_WSREP */
4512 }
4513 
4514 /*****************************************************************//**
4515 Creates an InnoDB transaction struct for the thd if it does not yet have one.
4516 Starts a new InnoDB transaction if a transaction is not yet started. And
4517 assigns a new snapshot for a consistent read if the transaction does not yet
4518 have one.
4519 @return 0 */
4520 static
4521 int
innobase_start_trx_and_assign_read_view(handlerton * hton,THD * thd)4522 innobase_start_trx_and_assign_read_view(
4523 /*====================================*/
4524 	handlerton*	hton,	/*!< in: InnoDB handlerton */
4525 	THD*		thd)	/*!< in: MySQL thread handle of the user for
4526 				whom the transaction should be committed */
4527 {
4528 	DBUG_ENTER("innobase_start_trx_and_assign_read_view");
4529 	DBUG_ASSERT(hton == innodb_hton_ptr);
4530 
4531 	/* Create a new trx struct for thd, if it does not yet have one */
4532 
4533 	trx_t*	trx = check_trx_exists(thd);
4534 
4535 	innobase_srv_conc_force_exit_innodb(trx);
4536 
4537 	/* The transaction should not be active yet, start it */
4538 
4539 	ut_ad(!trx_is_started(trx));
4540 
4541 	trx_start_if_not_started_xa(trx, false);
4542 
4543 	/* Assign a read view if the transaction does not have it yet.
4544 	Do this only if transaction is using REPEATABLE READ isolation
4545 	level. */
4546 	trx->isolation_level = innobase_map_isolation_level(
4547 		thd_get_trx_isolation(thd));
4548 
4549 	if (trx->isolation_level == TRX_ISO_REPEATABLE_READ) {
4550 		trx->read_view.open(trx);
4551 	} else {
4552 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4553 				    HA_ERR_UNSUPPORTED,
4554 				    "InnoDB: WITH CONSISTENT SNAPSHOT"
4555 				    " was ignored because this phrase"
4556 				    " can only be used with"
4557 				    " REPEATABLE READ isolation level.");
4558 	}
4559 
4560 	/* Set the MySQL flag to mark that there is an active transaction */
4561 
4562 	innobase_register_trx(hton, current_thd, trx);
4563 
4564 	DBUG_RETURN(0);
4565 }
4566 
4567 static
4568 void
innobase_commit_ordered_2(trx_t * trx,THD * thd)4569 innobase_commit_ordered_2(
4570 /*======================*/
4571 	trx_t*	trx, 	/*!< in: Innodb transaction */
4572 	THD*	thd)	/*!< in: MySQL thread handle */
4573 {
4574 	DBUG_ENTER("innobase_commit_ordered_2");
4575 
4576 	bool	read_only = trx->read_only || trx->id == 0;
4577 
4578 	if (!read_only) {
4579 
4580 		while (innobase_commit_concurrency > 0) {
4581 
4582 			mysql_mutex_lock(&commit_cond_m);
4583 
4584 			++commit_threads;
4585 
4586 			if (commit_threads
4587 				<= innobase_commit_concurrency) {
4588 
4589 				mysql_mutex_unlock(&commit_cond_m);
4590 				break;
4591 			}
4592 
4593 			--commit_threads;
4594 
4595 			mysql_cond_wait(&commit_cond, &commit_cond_m);
4596 
4597 			mysql_mutex_unlock(&commit_cond_m);
4598 		}
4599 
4600 		/* The following call reads the binary log position of
4601 		the transaction being committed.
4602 
4603 		Binary logging of other engines is not relevant to
4604 		InnoDB as all InnoDB requires is that committing
4605 		InnoDB transactions appear in the same order in the
4606 		MySQL binary log as they appear in InnoDB logs, which
4607 		is guaranteed by the server.
4608 
4609 		If the binary log is not enabled, or the transaction
4610 		is not written to the binary log, the file name will
4611 		be a NULL pointer. */
4612 		thd_binlog_pos(thd, &trx->mysql_log_file_name,
4613 			       &trx->mysql_log_offset);
4614 
4615 		/* Don't do write + flush right now. For group commit
4616 		to work we want to do the flush later. */
4617 		trx->flush_log_later = true;
4618 	}
4619 
4620 #ifdef WITH_WSREP
4621 	/* If the transaction is not run in 2pc, we must assign wsrep
4622 	XID here in order to get it written in rollback segment. */
4623 	if (trx->is_wsrep()) {
4624 		thd_get_xid(thd, (MYSQL_XID*)trx->xid);
4625 	}
4626 #endif /* WITH_WSREP */
4627 
4628 	innobase_commit_low(trx);
4629 
4630 	if (!read_only) {
4631 		trx->flush_log_later = false;
4632 
4633 		if (innobase_commit_concurrency > 0) {
4634 
4635 			mysql_mutex_lock(&commit_cond_m);
4636 
4637 			ut_ad(commit_threads > 0);
4638 			--commit_threads;
4639 
4640 			mysql_cond_signal(&commit_cond);
4641 
4642 			mysql_mutex_unlock(&commit_cond_m);
4643 		}
4644 	}
4645 
4646 	DBUG_VOID_RETURN;
4647 }
4648 
4649 /*****************************************************************//**
4650 Perform the first, fast part of InnoDB commit.
4651 
4652 Doing it in this call ensures that we get the same commit order here
4653 as in binlog and any other participating transactional storage engines.
4654 
4655 Note that we want to do as little as really needed here, as we run
4656 under a global mutex. The expensive fsync() is done later, in
4657 innobase_commit(), without a lock so group commit can take place.
4658 
4659 Note also that this method can be called from a different thread than
4660 the one handling the rest of the transaction. */
4661 static
4662 void
innobase_commit_ordered(handlerton * hton,THD * thd,bool all)4663 innobase_commit_ordered(
4664 /*====================*/
4665 	handlerton *hton, /*!< in: Innodb handlerton */
4666 	THD*	thd,	/*!< in: MySQL thread handle of the user for whom
4667 			the transaction should be committed */
4668 	bool	all)	/*!< in:	TRUE - commit transaction
4669 				FALSE - the current SQL statement ended */
4670 {
4671 	trx_t*		trx;
4672 	DBUG_ENTER("innobase_commit_ordered");
4673 	DBUG_ASSERT(hton == innodb_hton_ptr);
4674 
4675 	trx = check_trx_exists(thd);
4676 
4677 	if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
4678 		/* We cannot throw error here; instead we will catch this error
4679 		again in innobase_commit() and report it from there. */
4680 		DBUG_VOID_RETURN;
4681 	}
4682 
4683 	/* commit_ordered is only called when committing the whole transaction
4684 	(or an SQL statement when autocommit is on). */
4685 	DBUG_ASSERT(all ||
4686 		(!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
4687 
4688 	innobase_commit_ordered_2(trx, thd);
4689 	trx->active_commit_ordered = true;
4690 
4691 	DBUG_VOID_RETURN;
4692 }
4693 
4694 /*****************************************************************//**
4695 Commits a transaction in an InnoDB database or marks an SQL statement
4696 ended.
4697 @return 0 or deadlock error if the transaction was aborted by another
4698 	higher priority transaction. */
4699 static
4700 int
innobase_commit(handlerton * hton,THD * thd,bool commit_trx)4701 innobase_commit(
4702 /*============*/
4703 	handlerton*	hton,		/*!< in: InnoDB handlerton */
4704 	THD*		thd,		/*!< in: MySQL thread handle of the
4705 					user for whom the transaction should
4706 					be committed */
4707 	bool		commit_trx)	/*!< in: true - commit transaction
4708 					false - the current SQL statement
4709 					ended */
4710 {
4711 	DBUG_ENTER("innobase_commit");
4712 	DBUG_PRINT("enter", ("commit_trx: %d", commit_trx));
4713 	DBUG_ASSERT(hton == innodb_hton_ptr);
4714 	DBUG_PRINT("trans", ("ending transaction"));
4715 
4716 	trx_t*	trx = check_trx_exists(thd);
4717 
4718 	ut_ad(trx->dict_operation_lock_mode == 0);
4719 	ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
4720 
4721 	/* Transaction is deregistered only in a commit or a rollback. If
4722 	it is deregistered we know there cannot be resources to be freed
4723 	and we could return immediately.  For the time being, we play safe
4724 	and do the cleanup though there should be nothing to clean up. */
4725 
4726 	if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
4727 
4728 		sql_print_error("Transaction not registered for MariaDB 2PC,"
4729 				" but transaction is active");
4730 	}
4731 
4732 	bool	read_only = trx->read_only || trx->id == 0;
4733 	DBUG_PRINT("info", ("readonly: %d", read_only));
4734 
4735 	if (commit_trx
4736 	    || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
4737 
4738 		DBUG_EXECUTE_IF("crash_innodb_before_commit",
4739 				DBUG_SUICIDE(););
4740 
4741 		/* Run the fast part of commit if we did not already. */
4742 		if (!trx->active_commit_ordered) {
4743 			innobase_commit_ordered_2(trx, thd);
4744 
4745 		}
4746 
4747 		/* We were instructed to commit the whole transaction, or
4748 		this is an SQL statement end and autocommit is on */
4749 
4750 		/* At this point commit order is fixed and transaction is
4751 		visible to others. So we can wakeup other commits waiting for
4752 		this one, to allow then to group commit with us. */
4753 		thd_wakeup_subsequent_commits(thd, 0);
4754 
4755 		/* Now do a write + flush of logs. */
4756 		trx_commit_complete_for_mysql(trx);
4757 
4758 		trx_deregister_from_2pc(trx);
4759 	} else {
4760 		/* We just mark the SQL statement ended and do not do a
4761 		transaction commit */
4762 
4763 		/* If we had reserved the auto-inc lock for some
4764 		table in this SQL statement we release it now */
4765 
4766 		if (!read_only) {
4767 			lock_unlock_table_autoinc(trx);
4768 		}
4769 
4770 		/* Store the current undo_no of the transaction so that we
4771 		know where to roll back if we have to roll back the next
4772 		SQL statement */
4773 
4774 		trx_mark_sql_stat_end(trx);
4775 	}
4776 
4777 	/* Reset the number AUTO-INC rows required */
4778 	trx->n_autoinc_rows = 0;
4779 
4780 	/* This is a statement level variable. */
4781 	trx->fts_next_doc_id = 0;
4782 
4783 	innobase_srv_conc_force_exit_innodb(trx);
4784 
4785 	DBUG_RETURN(0);
4786 }
4787 
4788 /*****************************************************************//**
4789 Rolls back a transaction or the latest SQL statement.
4790 @return 0 or error number */
4791 static
4792 int
innobase_rollback(handlerton * hton,THD * thd,bool rollback_trx)4793 innobase_rollback(
4794 /*==============*/
4795 	handlerton*	hton,		/*!< in: InnoDB handlerton */
4796 	THD*		thd,		/*!< in: handle to the MySQL thread
4797 					of the user whose transaction should
4798 					be rolled back */
4799 	bool		rollback_trx)	/*!< in: TRUE - rollback entire
4800 					transaction FALSE - rollback the current
4801 					statement only */
4802 {
4803 	DBUG_ENTER("innobase_rollback");
4804 	DBUG_ASSERT(hton == innodb_hton_ptr);
4805 	DBUG_PRINT("trans", ("aborting transaction"));
4806 
4807 	trx_t*	trx = check_trx_exists(thd);
4808 
4809 	ut_ad(trx->dict_operation_lock_mode == 0);
4810 	ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
4811 
4812 	innobase_srv_conc_force_exit_innodb(trx);
4813 
4814 	/* Reset the number AUTO-INC rows required */
4815 
4816 	trx->n_autoinc_rows = 0;
4817 
4818 	/* If we had reserved the auto-inc lock for some table (if
4819 	we come here to roll back the latest SQL statement) we
4820 	release it now before a possibly lengthy rollback */
4821 	lock_unlock_table_autoinc(trx);
4822 
4823 	/* This is a statement level variable. */
4824 
4825 	trx->fts_next_doc_id = 0;
4826 
4827 	dberr_t		error;
4828 
4829 #ifdef WITH_WSREP
4830 	/* If trx was assigned wsrep XID in prepare phase and the
4831 	trx is being rolled back due to BF abort, clear XID in order
4832 	to avoid writing it to rollback segment out of order. The XID
4833 	will be reassigned when the transaction is replayed. */
4834 	if (trx->state != TRX_STATE_NOT_STARTED && wsrep_is_wsrep_xid(trx->xid)) {
4835 		trx->xid->null();
4836 	}
4837 #endif /* WITH_WSREP */
4838 	if (rollback_trx
4839 	    || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
4840 
4841 		error = trx_rollback_for_mysql(trx);
4842 
4843 		trx_deregister_from_2pc(trx);
4844 	} else {
4845 
4846 		error = trx_rollback_last_sql_stat_for_mysql(trx);
4847 	}
4848 
4849 	DBUG_RETURN(convert_error_code_to_mysql(error, 0, trx->mysql_thd));
4850 }
4851 
4852 /*****************************************************************//**
4853 Rolls back a transaction
4854 @return 0 or error number */
4855 static
4856 int
innobase_rollback_trx(trx_t * trx)4857 innobase_rollback_trx(
4858 /*==================*/
4859 	trx_t*	trx)	/*!< in: transaction */
4860 {
4861 	DBUG_ENTER("innobase_rollback_trx");
4862 	DBUG_PRINT("trans", ("aborting transaction"));
4863 
4864 	innobase_srv_conc_force_exit_innodb(trx);
4865 
4866 	/* If we had reserved the auto-inc lock for some table (if
4867 	we come here to roll back the latest SQL statement) we
4868 	release it now before a possibly lengthy rollback */
4869 	lock_unlock_table_autoinc(trx);
4870 
4871 	if (!trx->has_logged()) {
4872 		trx->will_lock = false;
4873 #ifdef WITH_WSREP
4874 		trx->wsrep= false;
4875 		trx->lock.was_chosen_as_wsrep_victim= false;
4876 #endif
4877 		DBUG_RETURN(0);
4878 	}
4879 
4880 	DBUG_RETURN(convert_error_code_to_mysql(trx_rollback_for_mysql(trx),
4881 						0, trx->mysql_thd));
4882 }
4883 
4884 
4885 struct pending_checkpoint {
4886 	struct pending_checkpoint *next;
4887 	handlerton *hton;
4888 	void *cookie;
4889 	ib_uint64_t lsn;
4890 };
4891 static struct pending_checkpoint *pending_checkpoint_list;
4892 static struct pending_checkpoint *pending_checkpoint_list_end;
4893 
4894 /*****************************************************************//**
4895 Handle a commit checkpoint request from server layer.
4896 We put the request in a queue, so that we can notify upper layer about
4897 checkpoint complete when we have flushed the redo log.
4898 If we have already flushed all relevant redo log, we notify immediately.*/
4899 static
4900 void
innobase_checkpoint_request(handlerton * hton,void * cookie)4901 innobase_checkpoint_request(
4902 	handlerton *hton,
4903 	void *cookie)
4904 {
4905 	ib_uint64_t			lsn;
4906 	ib_uint64_t			flush_lsn;
4907 	struct pending_checkpoint *	entry;
4908 
4909 	/* Do the allocation outside of lock to reduce contention. The normal
4910 	case is that not everything is flushed, so we will need to enqueue. */
4911 	entry = static_cast<struct pending_checkpoint *>
4912 		(my_malloc(sizeof(*entry), MYF(MY_WME)));
4913 	if (!entry) {
4914 		sql_print_error("Failed to allocate %u bytes."
4915 				" Commit checkpoint will be skipped.",
4916 				static_cast<unsigned>(sizeof(*entry)));
4917 		return;
4918 	}
4919 
4920 	entry->next = NULL;
4921 	entry->hton = hton;
4922 	entry->cookie = cookie;
4923 
4924 	mysql_mutex_lock(&pending_checkpoint_mutex);
4925 	lsn = log_get_lsn();
4926 	flush_lsn = log_get_flush_lsn();
4927 	if (lsn > flush_lsn) {
4928 		/* Put the request in queue.
4929 		When the log gets flushed past the lsn, we will remove the
4930 		entry from the queue and notify the upper layer. */
4931 		entry->lsn = lsn;
4932 		if (pending_checkpoint_list_end) {
4933 			pending_checkpoint_list_end->next = entry;
4934 			/* There is no need to order the entries in the list
4935 			by lsn. The upper layer can accept notifications in
4936 			any order, and short delays in notifications do not
4937 			significantly impact performance. */
4938 		} else {
4939 			pending_checkpoint_list = entry;
4940 		}
4941 		pending_checkpoint_list_end = entry;
4942 		entry = NULL;
4943 	}
4944 	mysql_mutex_unlock(&pending_checkpoint_mutex);
4945 
4946 	if (entry) {
4947 		/* We are already flushed. Notify the checkpoint immediately. */
4948 		commit_checkpoint_notify_ha(entry->hton, entry->cookie);
4949 		my_free(entry);
4950 	}
4951 }
4952 
4953 /*****************************************************************//**
4954 Log code calls this whenever log has been written and/or flushed up
4955 to a new position. We use this to notify upper layer of a new commit
4956 checkpoint when necessary.*/
4957 UNIV_INTERN
4958 void
innobase_mysql_log_notify(ib_uint64_t flush_lsn)4959 innobase_mysql_log_notify(
4960 /*======================*/
4961 	ib_uint64_t	flush_lsn)	/*!< in: LSN flushed to disk */
4962 {
4963 	struct pending_checkpoint *	pending;
4964 	struct pending_checkpoint *	entry;
4965 	struct pending_checkpoint *	last_ready;
4966 
4967 	/* It is safe to do a quick check for NULL first without lock.
4968 	Even if we should race, we will at most skip one checkpoint and
4969 	take the next one, which is harmless. */
4970 	if (!pending_checkpoint_list)
4971 		return;
4972 
4973 	mysql_mutex_lock(&pending_checkpoint_mutex);
4974 	pending = pending_checkpoint_list;
4975 	if (!pending)
4976 	{
4977 		mysql_mutex_unlock(&pending_checkpoint_mutex);
4978 		return;
4979 	}
4980 
4981 	last_ready = NULL;
4982 	for (entry = pending; entry != NULL; entry = entry -> next)
4983 	{
4984 		/* Notify checkpoints up until the first entry that has not
4985 		been fully flushed to the redo log. Since we do not maintain
4986 		the list ordered, in principle there could be more entries
4987 		later than were also flushed. But there is no harm in
4988 		delaying notifications for those a bit. And in practise, the
4989 		list is unlikely to have more than one element anyway, as we
4990 		flush the redo log at least once every second. */
4991 		if (entry->lsn > flush_lsn)
4992 			break;
4993 		last_ready = entry;
4994 	}
4995 
4996 	if (last_ready)
4997 	{
4998 		/* We found some pending checkpoints that are now flushed to
4999 		disk. So remove them from the list. */
5000 		pending_checkpoint_list = entry;
5001 		if (!entry)
5002 			pending_checkpoint_list_end = NULL;
5003 	}
5004 
5005 	mysql_mutex_unlock(&pending_checkpoint_mutex);
5006 
5007 	if (!last_ready)
5008 		return;
5009 
5010 	/* Now that we have released the lock, notify upper layer about all
5011 	commit checkpoints that have now completed. */
5012 	for (;;) {
5013 		entry = pending;
5014 		pending = pending->next;
5015 
5016 		commit_checkpoint_notify_ha(entry->hton, entry->cookie);
5017 
5018 		my_free(entry);
5019 		if (entry == last_ready)
5020 			break;
5021 	}
5022 }
5023 
5024 /*****************************************************************//**
5025 Rolls back a transaction to a savepoint.
5026 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
5027 given name */
5028 static
5029 int
innobase_rollback_to_savepoint(handlerton * hton,THD * thd,void * savepoint)5030 innobase_rollback_to_savepoint(
5031 /*===========================*/
5032 	handlerton*	hton,		/*!< in: InnoDB handlerton */
5033 	THD*		thd,		/*!< in: handle to the MySQL thread
5034 					of the user whose transaction should
5035 					be rolled back to savepoint */
5036 	void*		savepoint)	/*!< in: savepoint data */
5037 {
5038 
5039 	DBUG_ENTER("innobase_rollback_to_savepoint");
5040 	DBUG_ASSERT(hton == innodb_hton_ptr);
5041 
5042 	trx_t*	trx = check_trx_exists(thd);
5043 
5044 	innobase_srv_conc_force_exit_innodb(trx);
5045 
5046 	/* TODO: use provided savepoint data area to store savepoint data */
5047 
5048 	char	name[64];
5049 
5050 	longlong2str(longlong(savepoint), name, 36);
5051 
5052 	int64_t	mysql_binlog_cache_pos;
5053 
5054 	dberr_t	error = trx_rollback_to_savepoint_for_mysql(
5055 		trx, name, &mysql_binlog_cache_pos);
5056 
5057 	if (error == DB_SUCCESS && trx->fts_trx != NULL) {
5058 		fts_savepoint_rollback(trx, name);
5059 	}
5060 
5061 	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
5062 }
5063 
5064 /*****************************************************************//**
5065 Check whether innodb state allows to safely release MDL locks after
5066 rollback to savepoint.
5067 When binlog is on, MDL locks acquired after savepoint unit are not
5068 released if there are any locks held in InnoDB.
5069 @return true if it is safe, false if its not safe. */
5070 static
5071 bool
innobase_rollback_to_savepoint_can_release_mdl(handlerton * hton,THD * thd)5072 innobase_rollback_to_savepoint_can_release_mdl(
5073 /*===========================================*/
5074 	handlerton*	hton,		/*!< in: InnoDB handlerton */
5075 	THD*		thd)		/*!< in: handle to the MySQL thread
5076 					of the user whose transaction should
5077 					be rolled back to savepoint */
5078 {
5079 	DBUG_ENTER("innobase_rollback_to_savepoint_can_release_mdl");
5080 	DBUG_ASSERT(hton == innodb_hton_ptr);
5081 
5082 	trx_t*	trx = check_trx_exists(thd);
5083 
5084 	/* If transaction has not acquired any locks then it is safe
5085 	to release MDL after rollback to savepoint */
5086 	if (UT_LIST_GET_LEN(trx->lock.trx_locks) == 0) {
5087 
5088 		DBUG_RETURN(true);
5089 	}
5090 
5091 	DBUG_RETURN(false);
5092 }
5093 
5094 /*****************************************************************//**
5095 Release transaction savepoint name.
5096 @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
5097 given name */
5098 static
5099 int
innobase_release_savepoint(handlerton * hton,THD * thd,void * savepoint)5100 innobase_release_savepoint(
5101 /*=======================*/
5102 	handlerton*	hton,		/*!< in: handlerton for InnoDB */
5103 	THD*		thd,		/*!< in: handle to the MySQL thread
5104 					of the user whose transaction's
5105 					savepoint should be released */
5106 	void*		savepoint)	/*!< in: savepoint data */
5107 {
5108 	dberr_t		error;
5109 	trx_t*		trx;
5110 	char		name[64];
5111 
5112 	DBUG_ENTER("innobase_release_savepoint");
5113 	DBUG_ASSERT(hton == innodb_hton_ptr);
5114 
5115 	trx = check_trx_exists(thd);
5116 
5117 	/* TODO: use provided savepoint data area to store savepoint data */
5118 
5119 	longlong2str(longlong(savepoint), name, 36);
5120 
5121 	error = trx_release_savepoint_for_mysql(trx, name);
5122 
5123 	if (error == DB_SUCCESS && trx->fts_trx != NULL) {
5124 		fts_savepoint_release(trx, name);
5125 	}
5126 
5127 	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
5128 }
5129 
5130 /*****************************************************************//**
5131 Sets a transaction savepoint.
5132 @return always 0, that is, always succeeds */
5133 static
5134 int
innobase_savepoint(handlerton * hton,THD * thd,void * savepoint)5135 innobase_savepoint(
5136 /*===============*/
5137 	handlerton*	hton,	/*!< in: handle to the InnoDB handlerton */
5138 	THD*		thd,	/*!< in: handle to the MySQL thread */
5139 	void*		savepoint)/*!< in: savepoint data */
5140 {
5141 	DBUG_ENTER("innobase_savepoint");
5142 	DBUG_ASSERT(hton == innodb_hton_ptr);
5143 
5144 	/* In the autocommit mode there is no sense to set a savepoint
5145 	(unless we are in sub-statement), so SQL layer ensures that
5146 	this method is never called in such situation.  */
5147 
5148 	trx_t*	trx = check_trx_exists(thd);
5149 
5150 	innobase_srv_conc_force_exit_innodb(trx);
5151 
5152 	/* Cannot happen outside of transaction */
5153 	DBUG_ASSERT(trx_is_registered_for_2pc(trx));
5154 
5155 	/* TODO: use provided savepoint data area to store savepoint data */
5156 	char	name[64];
5157 
5158 	longlong2str(longlong(savepoint), name, 36);
5159 
5160 	dberr_t	error = trx_savepoint_for_mysql(trx, name, 0);
5161 
5162 	if (error == DB_SUCCESS && trx->fts_trx != NULL) {
5163 		fts_savepoint_take(trx->fts_trx, name);
5164 	}
5165 
5166 	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
5167 }
5168 
5169 /*****************************************************************//**
5170 Frees a possible InnoDB trx object associated with the current THD.
5171 @return 0 or error number */
5172 static
5173 int
innobase_close_connection(handlerton * hton,THD * thd)5174 innobase_close_connection(
5175 /*======================*/
5176 	handlerton*	hton,	/*!< in: innobase handlerton */
5177 	THD*		thd)	/*!< in: handle to the MySQL thread of the user
5178 				whose resources should be free'd */
5179 {
5180 
5181 	DBUG_ENTER("innobase_close_connection");
5182 	DBUG_ASSERT(hton == innodb_hton_ptr);
5183 
5184 	trx_t*	trx = thd_to_trx(thd);
5185 
5186 	/* During server initialization MySQL layer will try to open
5187 	some of the master-slave tables those residing in InnoDB.
5188 	After MySQL layer is done with needed checks these tables
5189 	are closed followed by invocation of close_connection on the
5190 	associated thd.
5191 
5192 	close_connection rolls back the trx and then frees it.
5193 	Once trx is freed thd should avoid maintaining reference to
5194 	it else it can be classified as stale reference.
5195 
5196 	Re-invocation of innodb_close_connection on same thd should
5197 	get trx as NULL. */
5198 
5199 	if (trx) {
5200 
5201 		thd_set_ha_data(thd, hton, NULL);
5202 		if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
5203 
5204 			sql_print_error("Transaction not registered for MariaDB 2PC, "
5205 				"but transaction is active");
5206 		}
5207 
5208 		/* Disconnect causes rollback in the following cases:
5209 		- trx is not started, or
5210 		- trx is in *not* in PREPARED state, or
5211 		- trx has not updated any persistent data.
5212 		TODO/FIXME: it does not make sense to initiate rollback
5213 		in the 1st and 3rd case. */
5214 		if (trx_is_started(trx)) {
5215 			if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
5216 				if (trx->has_logged_persistent()) {
5217 					trx_disconnect_prepared(trx);
5218 				} else {
5219 					trx_deregister_from_2pc(trx);
5220 					goto rollback_and_free;
5221 				}
5222 			} else {
5223 			sql_print_warning(
5224 				"MariaDB is closing a connection that has an active "
5225 				"InnoDB transaction.  " TRX_ID_FMT " row modifications "
5226 				"will roll back.",
5227 					trx->undo_no);
5228 				goto rollback_and_free;
5229 			}
5230 		} else {
5231 rollback_and_free:
5232 			innobase_rollback_trx(trx);
5233 			trx->free();
5234 		}
5235 	}
5236 
5237 	DBUG_RETURN(0);
5238 }
5239 
5240 void lock_cancel_waiting_and_release(lock_t *lock);
5241 
5242 /** Cancel any pending lock request associated with the current THD.
5243 @sa THD::awake() @sa ha_kill_query() */
innobase_kill_query(handlerton *,THD * thd,enum thd_kill_levels)5244 static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels)
5245 {
5246   DBUG_ENTER("innobase_kill_query");
5247 
5248   if (trx_t* trx= thd_to_trx(thd))
5249   {
5250     ut_ad(trx->mysql_thd == thd);
5251 #ifdef WITH_WSREP
5252     if (wsrep_thd_is_aborting(thd) || trx->lock.was_chosen_as_wsrep_victim)
5253       /* if victim has been signaled by BF thread and/or aborting is already
5254       progressing, following query aborting is not necessary any more.
5255       Also, BF thread should own trx mutex for the victim. */
5256       DBUG_VOID_RETURN;
5257 #endif /* WITH_WSREP */
5258     lock_mutex_enter();
5259     if (lock_t *lock= trx->lock.wait_lock)
5260     {
5261       trx_mutex_enter(trx);
5262       if (trx->is_wsrep() && wsrep_thd_is_aborting(thd))
5263         trx->lock.was_chosen_as_deadlock_victim= TRUE;
5264       lock_cancel_waiting_and_release(lock);
5265       trx_mutex_exit(trx);
5266     }
5267     lock_mutex_exit();
5268   }
5269 
5270   DBUG_VOID_RETURN;
5271 }
5272 
5273 
5274 /*************************************************************************//**
5275 ** InnoDB database tables
5276 *****************************************************************************/
5277 
5278 /** Get the record format from the data dictionary.
5279 @return one of ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT,
5280 ROW_TYPE_COMPRESSED, ROW_TYPE_DYNAMIC */
5281 
5282 enum row_type
get_row_type() const5283 ha_innobase::get_row_type() const
5284 {
5285 	if (m_prebuilt && m_prebuilt->table) {
5286 		const ulint	flags = m_prebuilt->table->flags;
5287 
5288 		switch (dict_tf_get_rec_format(flags)) {
5289 		case REC_FORMAT_REDUNDANT:
5290 			return(ROW_TYPE_REDUNDANT);
5291 		case REC_FORMAT_COMPACT:
5292 			return(ROW_TYPE_COMPACT);
5293 		case REC_FORMAT_COMPRESSED:
5294 			return(ROW_TYPE_COMPRESSED);
5295 		case REC_FORMAT_DYNAMIC:
5296 			return(ROW_TYPE_DYNAMIC);
5297 		}
5298 	}
5299 	ut_ad(0);
5300 	return(ROW_TYPE_NOT_USED);
5301 }
5302 
5303 /****************************************************************//**
5304 Get the table flags to use for the statement.
5305 @return table flags */
5306 
5307 handler::Table_flags
table_flags() const5308 ha_innobase::table_flags() const
5309 /*============================*/
5310 {
5311 	THD*			thd = ha_thd();
5312 	handler::Table_flags	flags = m_int_table_flags;
5313 
5314 	/* Need to use tx_isolation here since table flags is (also)
5315 	called before prebuilt is inited. */
5316 
5317 	if (thd_tx_isolation(thd) <= ISO_READ_COMMITTED) {
5318 		return(flags);
5319 	}
5320 
5321 	return(flags | HA_BINLOG_STMT_CAPABLE);
5322 }
5323 
5324 /****************************************************************//**
5325 Returns the table type (storage engine name).
5326 @return table type */
5327 
5328 const char*
table_type() const5329 ha_innobase::table_type() const
5330 /*===========================*/
5331 {
5332 	return(innobase_hton_name);
5333 }
5334 
5335 /****************************************************************//**
5336 Returns the index type.
5337 @return index type */
5338 
5339 const char*
index_type(uint keynr)5340 ha_innobase::index_type(
5341 /*====================*/
5342 	uint	keynr)		/*!< : index number */
5343 {
5344 	dict_index_t*	index = innobase_get_index(keynr);
5345 
5346 	if (!index) {
5347 		return "Corrupted";
5348 	}
5349 
5350 	if (index->type & DICT_FTS) {
5351 		return("FULLTEXT");
5352 	}
5353 
5354 	if (dict_index_is_spatial(index)) {
5355 		return("SPATIAL");
5356 	}
5357 
5358 	return("BTREE");
5359 }
5360 
5361 /****************************************************************//**
5362 Returns the operations supported for indexes.
5363 @return flags of supported operations */
5364 
5365 ulong
index_flags(uint key,uint,bool) const5366 ha_innobase::index_flags(
5367 /*=====================*/
5368 	uint	key,
5369 	uint,
5370 	bool) const
5371 {
5372 	if (table_share->key_info[key].algorithm == HA_KEY_ALG_FULLTEXT) {
5373 		return(0);
5374 	}
5375 
5376 	/* For spatial index, we don't support descending scan
5377 	and ICP so far. */
5378 	if (table_share->key_info[key].flags & HA_SPATIAL) {
5379 		return HA_READ_NEXT | HA_READ_ORDER| HA_READ_RANGE
5380 			| HA_KEYREAD_ONLY | HA_KEY_SCAN_NOT_ROR;
5381 	}
5382 
5383 	ulong flags= key == table_share->primary_key
5384 		? HA_CLUSTERED_INDEX : 0;
5385 
5386 	flags |= HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
5387 		| HA_READ_RANGE | HA_KEYREAD_ONLY
5388 		| HA_DO_INDEX_COND_PUSHDOWN
5389 		| HA_DO_RANGE_FILTER_PUSHDOWN;
5390 
5391 	return(flags);
5392 }
5393 
5394 /****************************************************************//**
5395 Returns the maximum number of keys.
5396 @return MAX_KEY */
5397 
5398 uint
max_supported_keys() const5399 ha_innobase::max_supported_keys() const
5400 /*===================================*/
5401 {
5402 	return(MAX_KEY);
5403 }
5404 
5405 /****************************************************************//**
5406 Returns the maximum key length.
5407 @return maximum supported key length, in bytes */
5408 
5409 uint
max_supported_key_length() const5410 ha_innobase::max_supported_key_length() const
5411 /*=========================================*/
5412 {
5413 	/* An InnoDB page must store >= 2 keys; a secondary key record
5414 	must also contain the primary key value.  Therefore, if both
5415 	the primary key and the secondary key are at this maximum length,
5416 	it must be less than 1/4th of the free space on a page including
5417 	record overhead.
5418 
5419 	MySQL imposes its own limit to this number; MAX_KEY_LENGTH = 3072.
5420 
5421 	For page sizes = 16k, InnoDB historically reported 3500 bytes here,
5422 	But the MySQL limit of 3072 was always used through the handler
5423 	interface.
5424 
5425 	Note: Handle 16k and 32k pages the same here since the limits
5426 	are higher than imposed by MySQL. */
5427 
5428 	switch (srv_page_size) {
5429 	case 4096:
5430 		/* Hack: allow mysql.innodb_index_stats to be created. */
5431 		/* FIXME: rewrite this API, and in sql_table.cc consider
5432 		that in index-organized tables (such as InnoDB), secondary
5433 		index records will be padded with the PRIMARY KEY, instead
5434 		of some short ROWID or record heap address. */
5435 		return(1173);
5436 	case 8192:
5437 		return(1536);
5438 	default:
5439 		return(3500);
5440 	}
5441 }
5442 
5443 /****************************************************************//**
5444 Returns the key map of keys that are usable for scanning.
5445 @return key_map_full */
5446 
5447 const key_map*
keys_to_use_for_scanning()5448 ha_innobase::keys_to_use_for_scanning()
5449 /*===================================*/
5450 {
5451 	return(&key_map_full);
5452 }
5453 
5454 /****************************************************************//**
5455 Ensures that if there's a concurrent inplace ADD INDEX, being-indexed virtual
5456 columns are computed. They are not marked as indexed in the old table, so the
5457 server won't add them to the read_set automatically */
5458 void
column_bitmaps_signal()5459 ha_innobase::column_bitmaps_signal()
5460 /*================================*/
5461 {
5462 	if (!table->vfield || table->current_lock != F_WRLCK) {
5463 		return;
5464 	}
5465 
5466 	dict_index_t*	clust_index = dict_table_get_first_index(m_prebuilt->table);
5467 	uint	num_v = 0;
5468 	for (uint j = 0; j < table->s->virtual_fields; j++) {
5469 		if (table->vfield[j]->stored_in_db()) {
5470 			continue;
5471 		}
5472 
5473 		dict_col_t*	col = &m_prebuilt->table->v_cols[num_v].m_col;
5474 		if (col->ord_part ||
5475 		    (dict_index_is_online_ddl(clust_index) &&
5476 		     row_log_col_is_indexed(clust_index, num_v))) {
5477 			table->mark_virtual_column_with_deps(table->vfield[j]);
5478 		}
5479 		num_v++;
5480 	}
5481 }
5482 
5483 
5484 /****************************************************************//**
5485 Determines if table caching is supported.
5486 @return HA_CACHE_TBL_ASKTRANSACT */
5487 
5488 uint8
table_cache_type()5489 ha_innobase::table_cache_type()
5490 /*===========================*/
5491 {
5492 	return(HA_CACHE_TBL_ASKTRANSACT);
5493 }
5494 
5495 /****************************************************************//**
5496 Determines if the primary key is clustered index.
5497 @return true */
5498 
5499 bool
primary_key_is_clustered()5500 ha_innobase::primary_key_is_clustered()
5501 /*===================================*/
5502 {
5503 	return(true);
5504 }
5505 
5506 /** Normalizes a table name string.
5507 A normalized name consists of the database name catenated to '/'
5508 and table name. For example: test/mytable.
5509 On Windows, normalization puts both the database name and the
5510 table name always to lower case if "set_lower_case" is set to TRUE.
5511 @param[out]	norm_name	Normalized name, null-terminated.
5512 @param[in]	name		Name to normalize.
5513 @param[in]	set_lower_case	True if we also should fold to lower case. */
5514 void
normalize_table_name_c_low(char * norm_name,const char * name,ibool set_lower_case)5515 normalize_table_name_c_low(
5516 /*=======================*/
5517 	char*           norm_name,      /* out: normalized name as a
5518 					null-terminated string */
5519 	const char*     name,           /* in: table name string */
5520 	ibool           set_lower_case) /* in: TRUE if we want to set
5521 					 name to lower case */
5522 {
5523 	char*	name_ptr;
5524 	ulint	name_len;
5525 	char*	db_ptr;
5526 	ulint	db_len;
5527 	char*	ptr;
5528 	ulint	norm_len;
5529 
5530 	/* Scan name from the end */
5531 
5532 	ptr = strend(name) - 1;
5533 
5534 	/* seek to the last path separator */
5535 	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
5536 		ptr--;
5537 	}
5538 
5539 	name_ptr = ptr + 1;
5540 	name_len = strlen(name_ptr);
5541 
5542 	/* skip any number of path separators */
5543 	while (ptr >= name && (*ptr == '\\' || *ptr == '/')) {
5544 		ptr--;
5545 	}
5546 
5547 	DBUG_ASSERT(ptr >= name);
5548 
5549 	/* seek to the last but one path separator or one char before
5550 	the beginning of name */
5551 	db_len = 0;
5552 	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
5553 		ptr--;
5554 		db_len++;
5555 	}
5556 
5557 	db_ptr = ptr + 1;
5558 
5559 	norm_len = db_len + name_len + sizeof "/";
5560 	ut_a(norm_len < FN_REFLEN - 1);
5561 
5562 	memcpy(norm_name, db_ptr, db_len);
5563 
5564 	norm_name[db_len] = '/';
5565 
5566 	/* Copy the name and null-byte. */
5567 	memcpy(norm_name + db_len + 1, name_ptr, name_len + 1);
5568 
5569 	if (set_lower_case) {
5570 		innobase_casedn_str(norm_name);
5571 	}
5572 }
5573 
create_table_info_t(THD * thd,const TABLE * form,HA_CREATE_INFO * create_info,char * table_name,char * remote_path,bool file_per_table,trx_t * trx)5574 create_table_info_t::create_table_info_t(
5575 	THD*		thd,
5576 	const TABLE*	form,
5577 	HA_CREATE_INFO*	create_info,
5578 	char*		table_name,
5579 	char*		remote_path,
5580 	bool		file_per_table,
5581 	trx_t*		trx)
5582 	: m_thd(thd),
5583 	  m_trx(trx),
5584 	  m_form(form),
5585 	  m_default_row_format(innodb_default_row_format),
5586 	  m_create_info(create_info),
5587 	  m_table_name(table_name), m_table(NULL),
5588 	  m_drop_before_rollback(false),
5589 	  m_remote_path(remote_path),
5590 	  m_innodb_file_per_table(file_per_table)
5591 {
5592 }
5593 
5594 /** Normalizes a table name string.
5595 A normalized name consists of the database name catenated to '/'
5596 and table name. For example: test/mytable.
5597 On Windows, normalization puts both the database name and the
5598 table name always to lower case if "set_lower_case" is set to TRUE.
5599 @param[out]	norm_name	Normalized name, null-terminated.
5600 @param[in]	name		Name to normalize.
5601 @param[in]	set_lower_case	True if we also should fold to lower case. */
5602 void
normalize_table_name_low(char * norm_name,const char * name,ibool set_lower_case)5603 create_table_info_t::normalize_table_name_low(
5604 	char*		norm_name,
5605 	const char*	name,
5606 	ibool		set_lower_case)
5607 {
5608 	normalize_table_name_c_low(norm_name, name, set_lower_case);
5609 }
5610 
5611 #if !defined(DBUG_OFF)
5612 /*********************************************************************
5613 Test normalize_table_name_low(). */
5614 static
5615 void
test_normalize_table_name_low()5616 test_normalize_table_name_low()
5617 /*===========================*/
5618 {
5619 	char		norm_name[FN_REFLEN];
5620 	const char*	test_data[][2] = {
5621 		/* input, expected result */
5622 		{"./mysqltest/t1", "mysqltest/t1"},
5623 		{"./test/#sql-842b_2", "test/#sql-842b_2"},
5624 		{"./test/#sql-85a3_10", "test/#sql-85a3_10"},
5625 		{"./test/#sql2-842b-2", "test/#sql2-842b-2"},
5626 		{"./test/bug29807", "test/bug29807"},
5627 		{"./test/foo", "test/foo"},
5628 		{"./test/innodb_bug52663", "test/innodb_bug52663"},
5629 		{"./test/t", "test/t"},
5630 		{"./test/t1", "test/t1"},
5631 		{"./test/t10", "test/t10"},
5632 		{"/a/b/db/table", "db/table"},
5633 		{"/a/b/db///////table", "db/table"},
5634 		{"/a/b////db///////table", "db/table"},
5635 		{"/var/tmp/mysqld.1/#sql842b_2_10", "mysqld.1/#sql842b_2_10"},
5636 		{"db/table", "db/table"},
5637 		{"ddd/t", "ddd/t"},
5638 		{"d/ttt", "d/ttt"},
5639 		{"d/t", "d/t"},
5640 		{".\\mysqltest\\t1", "mysqltest/t1"},
5641 		{".\\test\\#sql-842b_2", "test/#sql-842b_2"},
5642 		{".\\test\\#sql-85a3_10", "test/#sql-85a3_10"},
5643 		{".\\test\\#sql2-842b-2", "test/#sql2-842b-2"},
5644 		{".\\test\\bug29807", "test/bug29807"},
5645 		{".\\test\\foo", "test/foo"},
5646 		{".\\test\\innodb_bug52663", "test/innodb_bug52663"},
5647 		{".\\test\\t", "test/t"},
5648 		{".\\test\\t1", "test/t1"},
5649 		{".\\test\\t10", "test/t10"},
5650 		{"C:\\a\\b\\db\\table", "db/table"},
5651 		{"C:\\a\\b\\db\\\\\\\\\\\\\\table", "db/table"},
5652 		{"C:\\a\\b\\\\\\\\db\\\\\\\\\\\\\\table", "db/table"},
5653 		{"C:\\var\\tmp\\mysqld.1\\#sql842b_2_10", "mysqld.1/#sql842b_2_10"},
5654 		{"db\\table", "db/table"},
5655 		{"ddd\\t", "ddd/t"},
5656 		{"d\\ttt", "d/ttt"},
5657 		{"d\\t", "d/t"},
5658 	};
5659 
5660 	for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
5661 		printf("test_normalize_table_name_low():"
5662 		       " testing \"%s\", expected \"%s\"... ",
5663 		       test_data[i][0], test_data[i][1]);
5664 
5665 		create_table_info_t::normalize_table_name_low(
5666 			norm_name, test_data[i][0], FALSE);
5667 
5668 		if (strcmp(norm_name, test_data[i][1]) == 0) {
5669 			printf("ok\n");
5670 		} else {
5671 			printf("got \"%s\"\n", norm_name);
5672 			ut_error;
5673 		}
5674 	}
5675 }
5676 
5677 /*********************************************************************
5678 Test ut_format_name(). */
5679 static
5680 void
test_ut_format_name()5681 test_ut_format_name()
5682 /*=================*/
5683 {
5684 	char		buf[NAME_LEN * 3];
5685 
5686 	struct {
5687 		const char*	name;
5688 		ulint		buf_size;
5689 		const char*	expected;
5690 	} test_data[] = {
5691 		{"test/t1",	sizeof(buf),	"`test`.`t1`"},
5692 		{"test/t1",	12,		"`test`.`t1`"},
5693 		{"test/t1",	11,		"`test`.`t1"},
5694 		{"test/t1",	10,		"`test`.`t"},
5695 		{"test/t1",	9,		"`test`.`"},
5696 		{"test/t1",	8,		"`test`."},
5697 		{"test/t1",	7,		"`test`"},
5698 		{"test/t1",	6,		"`test"},
5699 		{"test/t1",	5,		"`tes"},
5700 		{"test/t1",	4,		"`te"},
5701 		{"test/t1",	3,		"`t"},
5702 		{"test/t1",	2,		"`"},
5703 		{"test/t1",	1,		""},
5704 		{"test/t1",	0,		"BUF_NOT_CHANGED"},
5705 		{"table",	sizeof(buf),	"`table`"},
5706 		{"ta'le",	sizeof(buf),	"`ta'le`"},
5707 		{"ta\"le",	sizeof(buf),	"`ta\"le`"},
5708 		{"ta`le",	sizeof(buf),	"`ta``le`"},
5709 	};
5710 
5711 	for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
5712 
5713 		memcpy(buf, "BUF_NOT_CHANGED", strlen("BUF_NOT_CHANGED") + 1);
5714 
5715 		char*	ret;
5716 
5717 		ret = ut_format_name(test_data[i].name,
5718 				     buf,
5719 				     test_data[i].buf_size);
5720 
5721 		ut_a(ret == buf);
5722 
5723 		if (strcmp(buf, test_data[i].expected) == 0) {
5724 			ib::info() << "ut_format_name(" << test_data[i].name
5725 				<< ", buf, " << test_data[i].buf_size << "),"
5726 				" expected " << test_data[i].expected
5727 				<< ", OK";
5728 		} else {
5729 			ib::error() << "ut_format_name(" << test_data[i].name
5730 				<< ", buf, " << test_data[i].buf_size << "),"
5731 				" expected " << test_data[i].expected
5732 				<< ", ERROR: got " << buf;
5733 			ut_error;
5734 		}
5735 	}
5736 }
5737 #endif /* !DBUG_OFF */
5738 
5739 /** Match index columns between MySQL and InnoDB.
5740 This function checks whether the index column information
5741 is consistent between KEY info from mysql and that from innodb index.
5742 @param[in]	key_info	Index info from mysql
5743 @param[in]	index_info	Index info from InnoDB
5744 @return true if all column types match. */
5745 static
5746 bool
innobase_match_index_columns(const KEY * key_info,const dict_index_t * index_info)5747 innobase_match_index_columns(
5748 	const KEY*		key_info,
5749 	const dict_index_t*	index_info)
5750 {
5751 	const KEY_PART_INFO*	key_part;
5752 	const KEY_PART_INFO*	key_end;
5753 	const dict_field_t*	innodb_idx_fld;
5754 	const dict_field_t*	innodb_idx_fld_end;
5755 
5756 	DBUG_ENTER("innobase_match_index_columns");
5757 
5758 	/* Check whether user defined index column count matches */
5759 	if (key_info->user_defined_key_parts !=
5760 		index_info->n_user_defined_cols) {
5761 		DBUG_RETURN(FALSE);
5762 	}
5763 
5764 	key_part = key_info->key_part;
5765 	key_end = key_part + key_info->user_defined_key_parts;
5766 	innodb_idx_fld = index_info->fields;
5767 	innodb_idx_fld_end = index_info->fields + index_info->n_fields;
5768 
5769 	/* Check each index column's datatype. We do not check
5770 	column name because there exists case that index
5771 	column name got modified in mysql but such change does not
5772 	propagate to InnoDB.
5773 	One hidden assumption here is that the index column sequences
5774 	are matched up between those in mysql and InnoDB. */
5775 	for (; key_part != key_end; ++key_part) {
5776 		ulint	col_type;
5777 		ibool	is_unsigned;
5778 		ulint	mtype = innodb_idx_fld->col->mtype;
5779 
5780 		/* Need to translate to InnoDB column type before
5781 		comparison. */
5782 		col_type = get_innobase_type_from_mysql_type(
5783 			&is_unsigned, key_part->field);
5784 
5785 		/* Ignore InnoDB specific system columns. */
5786 		while (mtype == DATA_SYS) {
5787 			innodb_idx_fld++;
5788 
5789 			if (innodb_idx_fld >= innodb_idx_fld_end) {
5790 				DBUG_RETURN(FALSE);
5791 			}
5792 		}
5793 
5794 		/* MariaDB-5.5 compatibility */
5795 		if ((key_part->field->real_type() == MYSQL_TYPE_ENUM ||
5796 		     key_part->field->real_type() == MYSQL_TYPE_SET) &&
5797 		    mtype == DATA_FIXBINARY) {
5798 			col_type= DATA_FIXBINARY;
5799 		}
5800 
5801 		if (col_type != mtype) {
5802 			/* If the col_type we get from mysql type is a geometry
5803 			data type, we should check if mtype is a legacy type
5804 			from 5.6, either upgraded to DATA_GEOMETRY or not.
5805 			This is indeed not an accurate check, but should be
5806 			safe, since DATA_BLOB would be upgraded once we create
5807 			spatial index on it and we intend to use DATA_GEOMETRY
5808 			for legacy GIS data types which are of var-length. */
5809 			switch (col_type) {
5810 			case DATA_GEOMETRY:
5811 				if (mtype == DATA_BLOB) {
5812 					break;
5813 				}
5814 				/* Fall through */
5815 			default:
5816 				/* Column type mismatches */
5817 				DBUG_RETURN(false);
5818 			}
5819 		}
5820 
5821 		innodb_idx_fld++;
5822 	}
5823 
5824 	DBUG_RETURN(TRUE);
5825 }
5826 
5827 /** Build a template for a base column for a virtual column
5828 @param[in]	table		MySQL TABLE
5829 @param[in]	clust_index	InnoDB clustered index
5830 @param[in]	field		field in MySQL table
5831 @param[in]	col		InnoDB column
5832 @param[in,out]	templ		template to fill
5833 @param[in]	col_no		field index for virtual col
5834 */
5835 static
5836 void
innobase_vcol_build_templ(const TABLE * table,dict_index_t * clust_index,Field * field,const dict_col_t * col,mysql_row_templ_t * templ,ulint col_no)5837 innobase_vcol_build_templ(
5838 	const TABLE*		table,
5839 	dict_index_t*		clust_index,
5840 	Field*			field,
5841 	const dict_col_t*	col,
5842 	mysql_row_templ_t*	templ,
5843 	ulint			col_no)
5844 {
5845 	templ->col_no = col_no;
5846 	templ->is_virtual = col->is_virtual();
5847 
5848 	if (templ->is_virtual) {
5849 		templ->clust_rec_field_no = ULINT_UNDEFINED;
5850 		templ->rec_field_no = col->ind;
5851 	} else {
5852 		templ->clust_rec_field_no = dict_col_get_clust_pos(
5853 						col, clust_index);
5854 		ut_a(templ->clust_rec_field_no != ULINT_UNDEFINED);
5855 
5856 		templ->rec_field_no = templ->clust_rec_field_no;
5857 	}
5858 
5859 	if (field->real_maybe_null()) {
5860                 templ->mysql_null_byte_offset =
5861                         field->null_offset();
5862 
5863                 templ->mysql_null_bit_mask = (ulint) field->null_bit;
5864         } else {
5865                 templ->mysql_null_bit_mask = 0;
5866         }
5867 
5868         templ->mysql_col_offset = static_cast<ulint>(
5869 					get_field_offset(table, field));
5870 	templ->mysql_col_len = static_cast<ulint>(field->pack_length());
5871         templ->type = col->mtype;
5872         templ->mysql_type = static_cast<ulint>(field->type());
5873 
5874 	if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
5875 		templ->mysql_length_bytes = static_cast<ulint>(
5876 			((Field_varstring*) field)->length_bytes);
5877 	}
5878 
5879         templ->charset = dtype_get_charset_coll(col->prtype);
5880         templ->mbminlen = dict_col_get_mbminlen(col);
5881         templ->mbmaxlen = dict_col_get_mbmaxlen(col);
5882         templ->is_unsigned = col->prtype & DATA_UNSIGNED;
5883 }
5884 
5885 /** Build template for the virtual columns and their base columns. This
5886 is done when the table first opened.
5887 @param[in]	table		MySQL TABLE
5888 @param[in]	ib_table	InnoDB dict_table_t
5889 @param[in,out]	s_templ		InnoDB template structure
5890 @param[in]	add_v		new virtual columns added along with
5891 				add index call
5892 @param[in]	locked		true if dict_sys mutex is held */
5893 void
innobase_build_v_templ(const TABLE * table,const dict_table_t * ib_table,dict_vcol_templ_t * s_templ,const dict_add_v_col_t * add_v,bool locked)5894 innobase_build_v_templ(
5895 	const TABLE*		table,
5896 	const dict_table_t*	ib_table,
5897 	dict_vcol_templ_t*	s_templ,
5898 	const dict_add_v_col_t*	add_v,
5899 	bool			locked)
5900 {
5901 	ulint	ncol = unsigned(ib_table->n_cols) - DATA_N_SYS_COLS;
5902 	ulint	n_v_col = ib_table->n_v_cols;
5903 	bool	marker[REC_MAX_N_FIELDS];
5904 
5905 	DBUG_ENTER("innobase_build_v_templ");
5906 	ut_ad(ncol < REC_MAX_N_FIELDS);
5907 
5908 	if (add_v != NULL) {
5909 		n_v_col += add_v->n_v_col;
5910 	}
5911 
5912 	ut_ad(n_v_col > 0);
5913 
5914 	if (!locked) {
5915 		mutex_enter(&dict_sys.mutex);
5916 	}
5917 
5918 	if (s_templ->vtempl) {
5919 		if (!locked) {
5920 			mutex_exit(&dict_sys.mutex);
5921 		}
5922 		DBUG_VOID_RETURN;
5923 	}
5924 
5925 	memset(marker, 0, sizeof(bool) * ncol);
5926 
5927 	s_templ->vtempl = static_cast<mysql_row_templ_t**>(
5928 		ut_zalloc_nokey((ncol + n_v_col)
5929 				* sizeof *s_templ->vtempl));
5930 	s_templ->n_col = ncol;
5931 	s_templ->n_v_col = n_v_col;
5932 	s_templ->rec_len = table->s->reclength;
5933 	s_templ->default_rec = UT_NEW_ARRAY_NOKEY(uchar, s_templ->rec_len);
5934 	memcpy(s_templ->default_rec, table->s->default_values, s_templ->rec_len);
5935 
5936 	/* Mark those columns could be base columns */
5937 	for (ulint i = 0; i < ib_table->n_v_cols; i++) {
5938 		const dict_v_col_t*	vcol = dict_table_get_nth_v_col(
5939 							ib_table, i);
5940 
5941 		for (ulint j = vcol->num_base; j--; ) {
5942 			marker[vcol->base_col[j]->ind] = true;
5943 		}
5944 	}
5945 
5946 	if (add_v) {
5947 		for (ulint i = 0; i < add_v->n_v_col; i++) {
5948 			const dict_v_col_t*	vcol = &add_v->v_col[i];
5949 
5950 			for (ulint j = vcol->num_base; j--; ) {
5951 				marker[vcol->base_col[j]->ind] = true;
5952 			}
5953 		}
5954 	}
5955 
5956 	ulint	j = 0;
5957 	ulint	z = 0;
5958 
5959 	dict_index_t*	clust_index = dict_table_get_first_index(ib_table);
5960 
5961 	for (ulint i = 0; i < table->s->fields; i++) {
5962 		Field*  field = table->field[i];
5963 
5964 		/* Build template for virtual columns */
5965 		if (!field->stored_in_db()) {
5966 #ifdef UNIV_DEBUG
5967 			const char*	name;
5968 
5969 			if (z >= ib_table->n_v_def) {
5970 				name = add_v->v_col_name[z - ib_table->n_v_def];
5971 			} else {
5972 				name = dict_table_get_v_col_name(ib_table, z);
5973 			}
5974 
5975 			ut_ad(!my_strcasecmp(system_charset_info, name,
5976 					     field->field_name.str));
5977 #endif
5978 			const dict_v_col_t*	vcol;
5979 
5980 			if (z >= ib_table->n_v_def) {
5981 				vcol = &add_v->v_col[z - ib_table->n_v_def];
5982 			} else {
5983 				vcol = dict_table_get_nth_v_col(ib_table, z);
5984 			}
5985 
5986 			s_templ->vtempl[z + s_templ->n_col]
5987 				= static_cast<mysql_row_templ_t*>(
5988 					ut_malloc_nokey(
5989 					sizeof *s_templ->vtempl[j]));
5990 
5991 			innobase_vcol_build_templ(
5992 				table, clust_index, field,
5993 				&vcol->m_col,
5994 				s_templ->vtempl[z + s_templ->n_col],
5995 				z);
5996 			z++;
5997 			continue;
5998                 }
5999 
6000 		ut_ad(j < ncol);
6001 
6002 		/* Build template for base columns */
6003 		if (marker[j]) {
6004 			dict_col_t*   col = dict_table_get_nth_col(
6005 						ib_table, j);
6006 
6007 			ut_ad(!my_strcasecmp(system_charset_info,
6008 					     dict_table_get_col_name(
6009 						     ib_table, j),
6010 					     field->field_name.str));
6011 
6012 			s_templ->vtempl[j] = static_cast<
6013 				mysql_row_templ_t*>(
6014 					ut_malloc_nokey(
6015 					sizeof *s_templ->vtempl[j]));
6016 
6017 			innobase_vcol_build_templ(
6018 				table, clust_index, field, col,
6019 				s_templ->vtempl[j], j);
6020 		}
6021 
6022 		j++;
6023 	}
6024 
6025 	if (!locked) {
6026 		mutex_exit(&dict_sys.mutex);
6027 	}
6028 
6029 	s_templ->db_name = table->s->db.str;
6030 	s_templ->tb_name = table->s->table_name.str;
6031 	DBUG_VOID_RETURN;
6032 }
6033 
6034 /** Check consistency between .frm indexes and InnoDB indexes.
6035 @param[in]	table	table object formed from .frm
6036 @param[in]	ib_table	InnoDB table definition
6037 @retval	true if not errors were found */
6038 static bool
check_index_consistency(const TABLE * table,const dict_table_t * ib_table)6039 check_index_consistency(const TABLE* table, const dict_table_t* ib_table)
6040 {
6041 	ulint mysql_num_index = table->s->keys;
6042 	ulint ib_num_index = UT_LIST_GET_LEN(ib_table->indexes);
6043 	bool ret = true;
6044 
6045 	/* If there exists inconsistency between MySQL and InnoDB dictionary
6046 	(metadata) information, the number of index defined in MySQL
6047 	could exceed that in InnoDB, return error */
6048 	if (ib_num_index < mysql_num_index) {
6049 		ret = false;
6050 		goto func_exit;
6051 	}
6052 
6053 	/* For each index in the mysql key_info array, fetch its
6054 	corresponding InnoDB index pointer into index_mapping
6055 	array. */
6056 	for (ulint count = 0; count < mysql_num_index; count++) {
6057 		const dict_index_t* index = dict_table_get_index_on_name(
6058 			ib_table, table->key_info[count].name.str);
6059 
6060 		if (index == NULL) {
6061 			sql_print_error("Cannot find index %s in InnoDB"
6062 					" index dictionary.",
6063 					table->key_info[count].name.str);
6064 			ret = false;
6065 			goto func_exit;
6066 		}
6067 
6068 		/* Double check fetched index has the same
6069 		column info as those in mysql key_info. */
6070 		if (!innobase_match_index_columns(&table->key_info[count],
6071 						  index)) {
6072 			sql_print_error("Found index %s whose column info"
6073 					" does not match that of MariaDB.",
6074 					table->key_info[count].name.str);
6075 			ret = false;
6076 			goto func_exit;
6077 		}
6078 	}
6079 
6080 func_exit:
6081 	return ret;
6082 }
6083 
6084 /********************************************************************//**
6085 Get the upper limit of the MySQL integral and floating-point type.
6086 @return maximum allowed value for the field */
6087 UNIV_INTERN
6088 ulonglong
innobase_get_int_col_max_value(const Field * field)6089 innobase_get_int_col_max_value(
6090 /*===========================*/
6091 	const Field*	field)	/*!< in: MySQL field */
6092 {
6093 	ulonglong	max_value = 0;
6094 
6095 	switch (field->key_type()) {
6096 	/* TINY */
6097 	case HA_KEYTYPE_BINARY:
6098 		max_value = 0xFFULL;
6099 		break;
6100 	case HA_KEYTYPE_INT8:
6101 		max_value = 0x7FULL;
6102 		break;
6103 	/* SHORT */
6104 	case HA_KEYTYPE_USHORT_INT:
6105 		max_value = 0xFFFFULL;
6106 		break;
6107 	case HA_KEYTYPE_SHORT_INT:
6108 		max_value = 0x7FFFULL;
6109 		break;
6110 	/* MEDIUM */
6111 	case HA_KEYTYPE_UINT24:
6112 		max_value = 0xFFFFFFULL;
6113 		break;
6114 	case HA_KEYTYPE_INT24:
6115 		max_value = 0x7FFFFFULL;
6116 		break;
6117 	/* LONG */
6118 	case HA_KEYTYPE_ULONG_INT:
6119 		max_value = 0xFFFFFFFFULL;
6120 		break;
6121 	case HA_KEYTYPE_LONG_INT:
6122 		max_value = 0x7FFFFFFFULL;
6123 		break;
6124 	/* BIG */
6125 	case HA_KEYTYPE_ULONGLONG:
6126 		max_value = 0xFFFFFFFFFFFFFFFFULL;
6127 		break;
6128 	case HA_KEYTYPE_LONGLONG:
6129 		max_value = 0x7FFFFFFFFFFFFFFFULL;
6130 		break;
6131 	case HA_KEYTYPE_FLOAT:
6132 		/* We use the maximum as per IEEE754-2008 standard, 2^24 */
6133 		max_value = 0x1000000ULL;
6134 		break;
6135 	case HA_KEYTYPE_DOUBLE:
6136 		/* We use the maximum as per IEEE754-2008 standard, 2^53 */
6137 		max_value = 0x20000000000000ULL;
6138 		break;
6139 	default:
6140 		ut_error;
6141 	}
6142 
6143 	return(max_value);
6144 }
6145 
6146 /** Initialize the AUTO_INCREMENT column metadata.
6147 
6148 Since a partial table definition for a persistent table can already be
6149 present in the InnoDB dict_sys cache before it is accessed from SQL,
6150 we have to initialize the AUTO_INCREMENT counter on the first
6151 ha_innobase::open().
6152 
6153 @param[in,out]	table	persistent table
6154 @param[in]	field	the AUTO_INCREMENT column */
6155 static
6156 void
initialize_auto_increment(dict_table_t * table,const Field * field)6157 initialize_auto_increment(dict_table_t* table, const Field* field)
6158 {
6159 	ut_ad(!table->is_temporary());
6160 
6161 	const unsigned	col_no = innodb_col_no(field);
6162 
6163 	table->autoinc_mutex.lock();
6164 
6165 	table->persistent_autoinc = 1
6166 		+ dict_table_get_nth_col_pos(table, col_no, NULL);
6167 
6168 	if (table->autoinc) {
6169 		/* Already initialized. Our caller checked
6170 		table->persistent_autoinc without
6171 		autoinc_mutex protection, and there might be multiple
6172 		ha_innobase::open() executing concurrently. */
6173 	} else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
6174 		/* If the recovery level is set so high that writes
6175 		are disabled we force the AUTOINC counter to 0
6176 		value effectively disabling writes to the table.
6177 		Secondly, we avoid reading the table in case the read
6178 		results in failure due to a corrupted table/index.
6179 
6180 		We will not return an error to the client, so that the
6181 		tables can be dumped with minimal hassle.  If an error
6182 		were returned in this case, the first attempt to read
6183 		the table would fail and subsequent SELECTs would succeed. */
6184 	} else if (table->persistent_autoinc) {
6185 		table->autoinc = innobase_next_autoinc(
6186 			btr_read_autoinc_with_fallback(table, col_no),
6187 			1 /* need */,
6188 			1 /* auto_increment_increment */,
6189 			0 /* auto_increment_offset */,
6190 			innobase_get_int_col_max_value(field));
6191 	}
6192 
6193 	table->autoinc_mutex.unlock();
6194 }
6195 
6196 /** Open an InnoDB table
6197 @param[in]	name	table name
6198 @return	error code
6199 @retval	0	on success */
6200 int
open(const char * name,int,uint)6201 ha_innobase::open(const char* name, int, uint)
6202 {
6203 	/* TODO: If trx_rollback_recovered(bool all=false) is ever
6204 	removed, the first-time open() must hold (or acquire and release)
6205 	a table lock that conflicts with trx_resurrect_table_locks(),
6206 	to ensure that any recovered incomplete ALTER TABLE will have been
6207 	rolled back. Otherwise, dict_table_t::instant could be cleared by
6208 	the rollback invoking dict_index_t::clear_instant_alter() while
6209 	open table handles exist in client connections. */
6210 
6211 	char			norm_name[FN_REFLEN];
6212 
6213 	DBUG_ENTER("ha_innobase::open");
6214 
6215 	normalize_table_name(norm_name, name);
6216 
6217 	m_user_thd = NULL;
6218 
6219 	/* Will be allocated if it is needed in ::update_row() */
6220 	m_upd_buf = NULL;
6221 	m_upd_buf_size = 0;
6222 
6223 	char*	is_part = is_partition(norm_name);
6224 	THD*	thd = ha_thd();
6225 	dict_table_t* ib_table = open_dict_table(name, norm_name, is_part,
6226 						 DICT_ERR_IGNORE_FK_NOKEY);
6227 
6228 	DEBUG_SYNC(thd, "ib_open_after_dict_open");
6229 
6230 	if (NULL == ib_table) {
6231 
6232 		if (is_part) {
6233 			sql_print_error("Failed to open table %s.\n",
6234 					norm_name);
6235 		}
6236 		set_my_errno(ENOENT);
6237 
6238 		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
6239 	}
6240 
6241 	size_t n_fields = omits_virtual_cols(*table_share)
6242 		? table_share->stored_fields : table_share->fields;
6243 	size_t n_cols = dict_table_get_n_user_cols(ib_table)
6244 		+ dict_table_get_n_v_cols(ib_table)
6245 		- !!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID);
6246 
6247 	if (UNIV_UNLIKELY(n_cols != n_fields)) {
6248 		ib::warn() << "Table " << norm_name << " contains "
6249 			<< n_cols << " user"
6250 			" defined columns in InnoDB, but " << n_fields
6251 			<< " columns in MariaDB. Please check"
6252 			" INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and"
6253 			" https://mariadb.com/kb/en/innodb-data-dictionary-troubleshooting/"
6254 			" for how to resolve the issue.";
6255 
6256 		/* Mark this table as corrupted, so the drop table
6257 		or force recovery can still use it, but not others. */
6258 		ib_table->file_unreadable = true;
6259 		ib_table->corrupted = true;
6260 		dict_table_close(ib_table, FALSE, FALSE);
6261 		set_my_errno(ENOENT);
6262 		DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
6263 	}
6264 
6265 	innobase_copy_frm_flags_from_table_share(ib_table, table->s);
6266 
6267 	MONITOR_INC(MONITOR_TABLE_OPEN);
6268 
6269 	if ((ib_table->flags2 & DICT_TF2_DISCARDED)) {
6270 
6271 		ib_senderrf(thd,
6272 			IB_LOG_LEVEL_WARN, ER_TABLESPACE_DISCARDED,
6273 			table->s->table_name.str);
6274 
6275 		/* Allow an open because a proper DISCARD should have set
6276 		all the flags and index root page numbers to FIL_NULL that
6277 		should prevent any DML from running but it should allow DDL
6278 		operations. */
6279 	} else if (!ib_table->is_readable()) {
6280 		const fil_space_t* space = ib_table->space;
6281 		if (!space) {
6282 			ib_senderrf(
6283 				thd, IB_LOG_LEVEL_WARN,
6284 				ER_TABLESPACE_MISSING, norm_name);
6285 		}
6286 
6287 		if (!thd_tablespace_op(thd)) {
6288 			set_my_errno(ENOENT);
6289 			int ret_err = HA_ERR_TABLESPACE_MISSING;
6290 
6291 			if (space && space->crypt_data
6292 			    && space->crypt_data->is_encrypted()) {
6293 				push_warning_printf(
6294 					thd,
6295 					Sql_condition::WARN_LEVEL_WARN,
6296 					HA_ERR_DECRYPTION_FAILED,
6297 					"Table %s in file %s is encrypted"
6298 					" but encryption service or"
6299 					" used key_id %u is not available. "
6300 					" Can't continue reading table.",
6301 					table_share->table_name.str,
6302 					space->chain.start->name,
6303 					space->crypt_data->key_id);
6304 				ret_err = HA_ERR_DECRYPTION_FAILED;
6305 			}
6306 
6307 			dict_table_close(ib_table, FALSE, FALSE);
6308 			DBUG_RETURN(ret_err);
6309 		}
6310 	}
6311 
6312 	m_prebuilt = row_create_prebuilt(ib_table, table->s->reclength);
6313 
6314 	m_prebuilt->default_rec = table->s->default_values;
6315 	ut_ad(m_prebuilt->default_rec);
6316 
6317 	m_prebuilt->m_mysql_table = table;
6318 
6319 	/* Looks like MySQL-3.23 sometimes has primary key number != 0 */
6320 	m_primary_key = table->s->primary_key;
6321 
6322 	key_used_on_scan = m_primary_key;
6323 
6324 	if (ib_table->n_v_cols) {
6325 		mutex_enter(&dict_sys.mutex);
6326 		if (ib_table->vc_templ == NULL) {
6327 			ib_table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
6328 			innobase_build_v_templ(
6329 				table, ib_table, ib_table->vc_templ, NULL,
6330 				true);
6331 		}
6332 
6333 		mutex_exit(&dict_sys.mutex);
6334 	}
6335 
6336 	if (!check_index_consistency(table, ib_table)) {
6337 		sql_print_error("InnoDB indexes are inconsistent with what "
6338 				"defined in .frm for table %s",
6339 				name);
6340 	}
6341 
6342 	/* Allocate a buffer for a 'row reference'. A row reference is
6343 	a string of bytes of length ref_length which uniquely specifies
6344 	a row in our table. Note that MySQL may also compare two row
6345 	references for equality by doing a simple memcmp on the strings
6346 	of length ref_length! */
6347 	if (!(m_prebuilt->clust_index_was_generated
6348 	      = dict_index_is_auto_gen_clust(ib_table->indexes.start))) {
6349 		if (m_primary_key >= MAX_KEY) {
6350 			ib_table->dict_frm_mismatch = DICT_FRM_NO_PK;
6351 
6352 			/* This mismatch could cause further problems
6353 			if not attended, bring this to the user's attention
6354 			by printing a warning in addition to log a message
6355 			in the errorlog */
6356 
6357 			ib_push_frm_error(thd, ib_table, table, 0, true);
6358 
6359 			/* If m_primary_key >= MAX_KEY, its (m_primary_key)
6360 			value could be out of bound if continue to index
6361 			into key_info[] array. Find InnoDB primary index,
6362 			and assign its key_length to ref_length.
6363 			In addition, since MySQL indexes are sorted starting
6364 			with primary index, unique index etc., initialize
6365 			ref_length to the first index key length in
6366 			case we fail to find InnoDB cluster index.
6367 
6368 			Please note, this will not resolve the primary
6369 			index mismatch problem, other side effects are
6370 			possible if users continue to use the table.
6371 			However, we allow this table to be opened so
6372 			that user can adopt necessary measures for the
6373 			mismatch while still being accessible to the table
6374 			date. */
6375 			if (!table->key_info) {
6376 				ut_ad(!table->s->keys);
6377 				ref_length = 0;
6378 			} else {
6379 				ref_length = table->key_info[0].key_length;
6380 			}
6381 
6382 			/* Find corresponding cluster index
6383 			key length in MySQL's key_info[] array */
6384 			for (uint i = 0; i < table->s->keys; i++) {
6385 				dict_index_t*	index;
6386 				index = innobase_get_index(i);
6387 				if (dict_index_is_clust(index)) {
6388 					ref_length =
6389 						 table->key_info[i].key_length;
6390 				}
6391 			}
6392 		} else {
6393 			/* MySQL allocates the buffer for ref.
6394 			key_info->key_length includes space for all key
6395 			columns + one byte for each column that may be
6396 			NULL. ref_length must be as exact as possible to
6397 			save space, because all row reference buffers are
6398 			allocated based on ref_length. */
6399 
6400 			ref_length = table->key_info[m_primary_key].key_length;
6401 		}
6402 	} else {
6403 		if (m_primary_key != MAX_KEY) {
6404 
6405 			ib_table->dict_frm_mismatch = DICT_NO_PK_FRM_HAS;
6406 
6407 			/* This mismatch could cause further problems
6408 			if not attended, bring this to the user attention
6409 			by printing a warning in addition to log a message
6410 			in the errorlog */
6411 			ib_push_frm_error(thd, ib_table, table, 0, true);
6412 		}
6413 
6414 		ref_length = DATA_ROW_ID_LEN;
6415 
6416 		/* If we automatically created the clustered index, then
6417 		MySQL does not know about it, and MySQL must NOT be aware
6418 		of the index used on scan, to make it avoid checking if we
6419 		update the column of the index. That is why we assert below
6420 		that key_used_on_scan is the undefined value MAX_KEY.
6421 		The column is the row id in the automatical generation case,
6422 		and it will never be updated anyway. */
6423 
6424 		if (key_used_on_scan != MAX_KEY) {
6425 			sql_print_warning(
6426 				"Table %s key_used_on_scan is %u even "
6427 				"though there is no primary key inside "
6428 				"InnoDB.", name, key_used_on_scan);
6429 		}
6430 	}
6431 
6432 	/* Index block size in InnoDB: used by MySQL in query optimization */
6433 	stats.block_size = srv_page_size;
6434 
6435 	const my_bool for_vc_purge = THDVAR(thd, background_thread);
6436 
6437 	if (for_vc_purge || !m_prebuilt->table
6438 	    || m_prebuilt->table->is_temporary()
6439 	    || m_prebuilt->table->persistent_autoinc
6440 	    || !m_prebuilt->table->is_readable()) {
6441 	} else if (const Field* ai = table->found_next_number_field) {
6442 		initialize_auto_increment(m_prebuilt->table, ai);
6443 	}
6444 
6445 	/* Set plugin parser for fulltext index */
6446 	for (uint i = 0; i < table->s->keys; i++) {
6447 		if (table->key_info[i].flags & HA_USES_PARSER) {
6448 			dict_index_t*	index = innobase_get_index(i);
6449 			plugin_ref	parser = table->key_info[i].parser;
6450 
6451 			ut_ad(index->type & DICT_FTS);
6452 			index->parser =
6453 				static_cast<st_mysql_ftparser *>(
6454 					plugin_decl(parser)->info);
6455 
6456 			DBUG_EXECUTE_IF("fts_instrument_use_default_parser",
6457 				index->parser = &fts_default_parser;);
6458 		}
6459 	}
6460 
6461 	ut_ad(!m_prebuilt->table
6462 	      || table->versioned() == m_prebuilt->table->versioned());
6463 
6464 	if (!for_vc_purge) {
6465 		info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST
6466 		     | HA_STATUS_OPEN);
6467 	}
6468 
6469 	DBUG_RETURN(0);
6470 }
6471 
6472 /** Convert MySQL column number to dict_table_t::cols[] offset.
6473 @param[in]	field	non-virtual column
6474 @return	column number relative to dict_table_t::cols[] */
6475 unsigned
innodb_col_no(const Field * field)6476 innodb_col_no(const Field* field)
6477 {
6478 	ut_ad(!innobase_is_s_fld(field));
6479 	const TABLE*	table	= field->table;
6480 	unsigned	col_no	= 0;
6481 	ut_ad(field == table->field[field->field_index]);
6482 	for (unsigned i = 0; i < field->field_index; i++) {
6483 		if (table->field[i]->stored_in_db()) {
6484 			col_no++;
6485 		}
6486 	}
6487 	return(col_no);
6488 }
6489 
6490 /** Opens dictionary table object using table name. For partition, we need to
6491 try alternative lower/upper case names to support moving data files across
6492 platforms.
6493 @param[in]	table_name	name of the table/partition
6494 @param[in]	norm_name	normalized name of the table/partition
6495 @param[in]	is_partition	if this is a partition of a table
6496 @param[in]	ignore_err	error to ignore for loading dictionary object
6497 @return dictionary table object or NULL if not found */
6498 dict_table_t*
open_dict_table(const char * table_name,const char * norm_name,bool is_partition,dict_err_ignore_t ignore_err)6499 ha_innobase::open_dict_table(
6500 	const char*
6501 #ifdef _WIN32
6502 	table_name
6503 #endif
6504 	,
6505 	const char*		norm_name,
6506 	bool			is_partition,
6507 	dict_err_ignore_t	ignore_err)
6508 {
6509 	DBUG_ENTER("ha_innobase::open_dict_table");
6510 	dict_table_t*	ib_table = dict_table_open_on_name(norm_name, FALSE,
6511 							   TRUE, ignore_err);
6512 
6513 	if (NULL == ib_table && is_partition) {
6514 		/* MySQL partition engine hard codes the file name
6515 		separator as "#P#". The text case is fixed even if
6516 		lower_case_table_names is set to 1 or 2. This is true
6517 		for sub-partition names as well. InnoDB always
6518 		normalises file names to lower case on Windows, this
6519 		can potentially cause problems when copying/moving
6520 		tables between platforms.
6521 
6522 		1) If boot against an installation from Windows
6523 		platform, then its partition table name could
6524 		be in lower case in system tables. So we will
6525 		need to check lower case name when load table.
6526 
6527 		2) If we boot an installation from other case
6528 		sensitive platform in Windows, we might need to
6529 		check the existence of table name without lower
6530 		case in the system table. */
6531 		if (innobase_get_lower_case_table_names() == 1) {
6532 			char	par_case_name[FN_REFLEN];
6533 
6534 #ifndef _WIN32
6535 			/* Check for the table using lower
6536 			case name, including the partition
6537 			separator "P" */
6538 			strcpy(par_case_name, norm_name);
6539 			innobase_casedn_str(par_case_name);
6540 #else
6541 			/* On Windows platfrom, check
6542 			whether there exists table name in
6543 			system table whose name is
6544 			not being normalized to lower case */
6545 			create_table_info_t::
6546 				normalize_table_name_low(
6547 					par_case_name,
6548 					table_name, FALSE);
6549 #endif
6550 			ib_table = dict_table_open_on_name(
6551 				par_case_name, FALSE, TRUE,
6552 				ignore_err);
6553 		}
6554 
6555 		if (ib_table != NULL) {
6556 #ifndef _WIN32
6557 			sql_print_warning("Partition table %s opened"
6558 					  " after converting to lower"
6559 					  " case. The table may have"
6560 					  " been moved from a case"
6561 					  " in-sensitive file system."
6562 					  " Please recreate table in"
6563 					  " the current file system\n",
6564 					  norm_name);
6565 #else
6566 			sql_print_warning("Partition table %s opened"
6567 					  " after skipping the step to"
6568 					  " lower case the table name."
6569 					  " The table may have been"
6570 					  " moved from a case sensitive"
6571 					  " file system. Please"
6572 					  " recreate table in the"
6573 					  " current file system\n",
6574 					  norm_name);
6575 #endif
6576 		}
6577 	}
6578 
6579 	DBUG_RETURN(ib_table);
6580 }
6581 
6582 handler*
clone(const char * name,MEM_ROOT * mem_root)6583 ha_innobase::clone(
6584 /*===============*/
6585 	const char*	name,		/*!< in: table name */
6586 	MEM_ROOT*	mem_root)	/*!< in: memory context */
6587 {
6588 	DBUG_ENTER("ha_innobase::clone");
6589 
6590 	ha_innobase*	new_handler = static_cast<ha_innobase*>(
6591 		handler::clone(m_prebuilt->table->name.m_name, mem_root));
6592 
6593 	if (new_handler != NULL) {
6594 		DBUG_ASSERT(new_handler->m_prebuilt != NULL);
6595 
6596 		new_handler->m_prebuilt->select_lock_type
6597 			= m_prebuilt->select_lock_type;
6598 	}
6599 
6600 	DBUG_RETURN(new_handler);
6601 }
6602 
6603 
6604 uint
max_supported_key_part_length() const6605 ha_innobase::max_supported_key_part_length() const
6606 /*==============================================*/
6607 {
6608 	/* A table format specific index column length check will be performed
6609 	at ha_innobase::add_index() and row_create_index_for_mysql() */
6610 	return(REC_VERSION_56_MAX_INDEX_COL_LEN);
6611 }
6612 
6613 /******************************************************************//**
6614 Closes a handle to an InnoDB table.
6615 @return 0 */
6616 
6617 int
close()6618 ha_innobase::close()
6619 /*================*/
6620 {
6621 	DBUG_ENTER("ha_innobase::close");
6622 
6623 	row_prebuilt_free(m_prebuilt, FALSE);
6624 
6625 	if (m_upd_buf != NULL) {
6626 		ut_ad(m_upd_buf_size != 0);
6627 		my_free(m_upd_buf);
6628 		m_upd_buf = NULL;
6629 		m_upd_buf_size = 0;
6630 	}
6631 
6632 	MONITOR_INC(MONITOR_TABLE_CLOSE);
6633 
6634 	/* Tell InnoDB server that there might be work for
6635 	utility threads: */
6636 
6637 	srv_active_wake_master_thread();
6638 
6639 	DBUG_RETURN(0);
6640 }
6641 
6642 /* The following accessor functions should really be inside MySQL code! */
6643 
6644 #ifdef WITH_WSREP
6645 UNIV_INTERN
6646 ulint
wsrep_innobase_mysql_sort(int mysql_type,uint charset_number,unsigned char * str,unsigned int str_length,unsigned int buf_length)6647 wsrep_innobase_mysql_sort(
6648 /*======================*/
6649 					/* out: str contains sort string */
6650 	int		mysql_type,	/* in: MySQL type */
6651 	uint		charset_number,	/* in: number of the charset */
6652 	unsigned char*	str,		/* in: data field */
6653 	unsigned int	str_length,	/* in: data field length,
6654 					not UNIV_SQL_NULL */
6655 	unsigned int	buf_length)	/* in: total str buffer length */
6656 
6657 {
6658 	CHARSET_INFO*		charset;
6659 	enum_field_types	mysql_tp;
6660 	ulint			ret_length =	str_length;
6661 
6662 	DBUG_ASSERT(str_length != UNIV_SQL_NULL);
6663 
6664 	mysql_tp = (enum_field_types) mysql_type;
6665 
6666 	switch (mysql_tp) {
6667 
6668 	case MYSQL_TYPE_BIT:
6669 	case MYSQL_TYPE_STRING:
6670 	case MYSQL_TYPE_VAR_STRING:
6671 	case MYSQL_TYPE_TINY_BLOB:
6672 	case MYSQL_TYPE_MEDIUM_BLOB:
6673 	case MYSQL_TYPE_BLOB:
6674 	case MYSQL_TYPE_LONG_BLOB:
6675 	case MYSQL_TYPE_VARCHAR:
6676 	{
6677 		uchar tmp_str[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'};
6678 		uint tmp_length = REC_VERSION_56_MAX_INDEX_COL_LEN;
6679 
6680 		/* Use the charset number to pick the right charset struct for
6681 		the comparison. Since the MySQL function get_charset may be
6682 		slow before Bar removes the mutex operation there, we first
6683 		look at 2 common charsets directly. */
6684 
6685 		if (charset_number == default_charset_info->number) {
6686 			charset = default_charset_info;
6687 		} else if (charset_number == my_charset_latin1.number) {
6688 			charset = &my_charset_latin1;
6689 		} else {
6690 			charset = get_charset(charset_number, MYF(MY_WME));
6691 
6692 			if (charset == NULL) {
6693 			  sql_print_error("InnoDB needs charset %lu for doing "
6694 					  "a comparison, but MariaDB cannot "
6695 					  "find that charset.",
6696 					  (ulong) charset_number);
6697 				ut_a(0);
6698 			}
6699 		}
6700 
6701 		ut_a(str_length <= tmp_length);
6702 		memcpy(tmp_str, str, str_length);
6703 
6704 		tmp_length = charset->coll->strnxfrm(charset, str, str_length,
6705 						     str_length, tmp_str,
6706 						     tmp_length, 0);
6707 		DBUG_ASSERT(tmp_length <= str_length);
6708 		if (wsrep_protocol_version < 3) {
6709 			tmp_length = charset->coll->strnxfrm(
6710 				charset, str, str_length,
6711 				str_length, tmp_str, tmp_length, 0);
6712 			DBUG_ASSERT(tmp_length <= str_length);
6713 		} else {
6714 			/* strnxfrm will expand the destination string,
6715 			   protocols < 3 truncated the sorted sring
6716 			   protocols >= 3 gets full sorted sring
6717 			*/
6718 			tmp_length = charset->coll->strnxfrm(
6719 				charset, str, buf_length,
6720 				str_length, tmp_str, str_length, 0);
6721 			DBUG_ASSERT(tmp_length <= buf_length);
6722 			ret_length = tmp_length;
6723 		}
6724 
6725 		break;
6726 	}
6727 	case MYSQL_TYPE_DECIMAL :
6728 	case MYSQL_TYPE_TINY :
6729 	case MYSQL_TYPE_SHORT :
6730 	case MYSQL_TYPE_LONG :
6731 	case MYSQL_TYPE_FLOAT :
6732 	case MYSQL_TYPE_DOUBLE :
6733 	case MYSQL_TYPE_NULL :
6734 	case MYSQL_TYPE_TIMESTAMP :
6735 	case MYSQL_TYPE_LONGLONG :
6736 	case MYSQL_TYPE_INT24 :
6737 	case MYSQL_TYPE_DATE :
6738 	case MYSQL_TYPE_TIME :
6739 	case MYSQL_TYPE_DATETIME :
6740 	case MYSQL_TYPE_YEAR :
6741 	case MYSQL_TYPE_NEWDATE :
6742 	case MYSQL_TYPE_NEWDECIMAL :
6743 	case MYSQL_TYPE_ENUM :
6744 	case MYSQL_TYPE_SET :
6745 	case MYSQL_TYPE_GEOMETRY :
6746 		break;
6747 	default:
6748 		break;
6749 	}
6750 
6751 	return ret_length;
6752 }
6753 #endif /* WITH_WSREP */
6754 
6755 /******************************************************************//**
6756 compare two character string according to their charset. */
6757 int
innobase_fts_text_cmp(const void * cs,const void * p1,const void * p2)6758 innobase_fts_text_cmp(
6759 /*==================*/
6760 	const void*	cs,		/*!< in: Character set */
6761 	const void*     p1,		/*!< in: key */
6762 	const void*     p2)		/*!< in: node */
6763 {
6764 	const CHARSET_INFO*	charset = (const CHARSET_INFO*) cs;
6765 	const fts_string_t*	s1 = (const fts_string_t*) p1;
6766 	const fts_string_t*	s2 = (const fts_string_t*) p2;
6767 
6768 	return(ha_compare_text(
6769 		charset, s1->f_str, static_cast<uint>(s1->f_len),
6770 		s2->f_str, static_cast<uint>(s2->f_len), 0));
6771 }
6772 
6773 /******************************************************************//**
6774 compare two character string case insensitively according to their charset. */
6775 int
innobase_fts_text_case_cmp(const void * cs,const void * p1,const void * p2)6776 innobase_fts_text_case_cmp(
6777 /*=======================*/
6778 	const void*	cs,		/*!< in: Character set */
6779 	const void*     p1,		/*!< in: key */
6780 	const void*     p2)		/*!< in: node */
6781 {
6782 	const CHARSET_INFO*	charset = (const CHARSET_INFO*) cs;
6783 	const fts_string_t*	s1 = (const fts_string_t*) p1;
6784 	const fts_string_t*	s2 = (const fts_string_t*) p2;
6785 	ulint			newlen;
6786 
6787 	my_casedn_str(charset, (char*) s2->f_str);
6788 
6789 	newlen = strlen((const char*) s2->f_str);
6790 
6791 	return(ha_compare_text(
6792 		charset, s1->f_str, static_cast<uint>(s1->f_len),
6793 		s2->f_str, static_cast<uint>(newlen), 0));
6794 }
6795 
6796 /******************************************************************//**
6797 Get the first character's code position for FTS index partition. */
6798 ulint
innobase_strnxfrm(const CHARSET_INFO * cs,const uchar * str,const ulint len)6799 innobase_strnxfrm(
6800 /*==============*/
6801 	const CHARSET_INFO*
6802 			cs,		/*!< in: Character set */
6803 	const uchar*	str,		/*!< in: string */
6804 	const ulint	len)		/*!< in: string length */
6805 {
6806 	uchar		mystr[2];
6807 	ulint		value;
6808 
6809 	if (!str || len == 0) {
6810 		return(0);
6811 	}
6812 
6813 	my_strnxfrm(cs, (uchar*) mystr, 2, str, len);
6814 
6815 	value = mach_read_from_2(mystr);
6816 
6817 	if (value > 255) {
6818 		value = value / 256;
6819 	}
6820 
6821 	return(value);
6822 }
6823 
6824 /******************************************************************//**
6825 compare two character string according to their charset. */
6826 int
innobase_fts_text_cmp_prefix(const void * cs,const void * p1,const void * p2)6827 innobase_fts_text_cmp_prefix(
6828 /*=========================*/
6829 	const void*	cs,		/*!< in: Character set */
6830 	const void*	p1,		/*!< in: prefix key */
6831 	const void*	p2)		/*!< in: value to compare */
6832 {
6833 	const CHARSET_INFO*	charset = (const CHARSET_INFO*) cs;
6834 	const fts_string_t*	s1 = (const fts_string_t*) p1;
6835 	const fts_string_t*	s2 = (const fts_string_t*) p2;
6836 	int			result;
6837 
6838 	result = ha_compare_text(
6839 		charset, s2->f_str, static_cast<uint>(s2->f_len),
6840 		s1->f_str, static_cast<uint>(s1->f_len), 1);
6841 
6842 	/* We switched s1, s2 position in ha_compare_text. So we need
6843 	to negate the result */
6844 	return(-result);
6845 }
6846 
6847 /******************************************************************//**
6848 Makes all characters in a string lower case. */
6849 size_t
innobase_fts_casedn_str(CHARSET_INFO * cs,char * src,size_t src_len,char * dst,size_t dst_len)6850 innobase_fts_casedn_str(
6851 /*====================*/
6852 	CHARSET_INFO*	cs,	/*!< in: Character set */
6853 	char*		src,	/*!< in: string to put in lower case */
6854 	size_t		src_len,/*!< in: input string length */
6855 	char*		dst,	/*!< in: buffer for result string */
6856 	size_t		dst_len)/*!< in: buffer size */
6857 {
6858 	if (cs->casedn_multiply == 1) {
6859 		memcpy(dst, src, src_len);
6860 		dst[src_len] = 0;
6861 		my_casedn_str(cs, dst);
6862 
6863 		return(strlen(dst));
6864 	} else {
6865 		return(cs->cset->casedn(cs, src, src_len, dst, dst_len));
6866 	}
6867 }
6868 
6869 #define true_word_char(c, ch) ((c) & (_MY_U | _MY_L | _MY_NMR) || (ch) == '_')
6870 
6871 #define misc_word_char(X)       0
6872 
6873 /*************************************************************//**
6874 Get the next token from the given string and store it in *token.
6875 It is mostly copied from MyISAM's doc parsing function ft_simple_get_word()
6876 @return length of string processed */
6877 ulint
innobase_mysql_fts_get_token(CHARSET_INFO * cs,const byte * start,const byte * end,fts_string_t * token)6878 innobase_mysql_fts_get_token(
6879 /*=========================*/
6880 	CHARSET_INFO*	cs,		/*!< in: Character set */
6881 	const byte*	start,		/*!< in: start of text */
6882 	const byte*	end,		/*!< in: one character past end of
6883 					text */
6884 	fts_string_t*	token)		/*!< out: token's text */
6885 {
6886 	int		mbl;
6887 	const uchar*	doc = start;
6888 
6889 	ut_a(cs);
6890 
6891 	token->f_n_char = token->f_len = 0;
6892 	token->f_str = NULL;
6893 
6894 	for (;;) {
6895 
6896 		if (doc >= end) {
6897 			return ulint(doc - start);
6898 		}
6899 
6900 		int	ctype;
6901 
6902 		mbl = cs->cset->ctype(
6903 			cs, &ctype, doc, (const uchar*) end);
6904 
6905 		if (true_word_char(ctype, *doc)) {
6906 			break;
6907 		}
6908 
6909 		doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
6910 	}
6911 
6912 	ulint	mwc = 0;
6913 	ulint	length = 0;
6914 
6915 	token->f_str = const_cast<byte*>(doc);
6916 
6917 	while (doc < end) {
6918 
6919 		int	ctype;
6920 
6921 		mbl = cs->cset->ctype(
6922 			cs, &ctype, (uchar*) doc, (uchar*) end);
6923 		if (true_word_char(ctype, *doc)) {
6924 			mwc = 0;
6925 		} else if (!misc_word_char(*doc) || mwc) {
6926 			break;
6927 		} else {
6928 			++mwc;
6929 		}
6930 
6931 		++length;
6932 
6933 		doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
6934 	}
6935 
6936 	token->f_len = (uint) (doc - token->f_str) - mwc;
6937 	token->f_n_char = length;
6938 
6939 	return ulint(doc - start);
6940 }
6941 
6942 /** Converts a MySQL type to an InnoDB type. Note that this function returns
6943 the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
6944 VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
6945 @param[out]	unsigned_flag	DATA_UNSIGNED if an 'unsigned type'; at least
6946 ENUM and SET, and unsigned integer types are 'unsigned types'
6947 @param[in]	f		MySQL Field
6948 @return DATA_BINARY, DATA_VARCHAR, ... */
6949 ulint
get_innobase_type_from_mysql_type(ulint * unsigned_flag,const void * f)6950 get_innobase_type_from_mysql_type(
6951 	ulint*			unsigned_flag,
6952 	const void*		f)
6953 {
6954 	const class Field* field = reinterpret_cast<const class Field*>(f);
6955 
6956 	/* The following asserts try to check that the MySQL type code fits in
6957 	8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
6958 	the type */
6959 
6960 	DBUG_ASSERT((ulint)MYSQL_TYPE_STRING < 256);
6961 	DBUG_ASSERT((ulint)MYSQL_TYPE_VAR_STRING < 256);
6962 	DBUG_ASSERT((ulint)MYSQL_TYPE_DOUBLE < 256);
6963 	DBUG_ASSERT((ulint)MYSQL_TYPE_FLOAT < 256);
6964 	DBUG_ASSERT((ulint)MYSQL_TYPE_DECIMAL < 256);
6965 
6966 	if (field->flags & UNSIGNED_FLAG) {
6967 
6968 		*unsigned_flag = DATA_UNSIGNED;
6969 	} else {
6970 		*unsigned_flag = 0;
6971 	}
6972 
6973 	if (field->real_type() == MYSQL_TYPE_ENUM
6974 		|| field->real_type() == MYSQL_TYPE_SET) {
6975 
6976 		/* MySQL has field->type() a string type for these, but the
6977 		data is actually internally stored as an unsigned integer
6978 		code! */
6979 
6980 		*unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
6981 						flag set to zero, even though
6982 						internally this is an unsigned
6983 						integer type */
6984 		return(DATA_INT);
6985 	}
6986 
6987 	switch (field->type()) {
6988 		/* NOTE that we only allow string types in DATA_MYSQL and
6989 		DATA_VARMYSQL */
6990 	case MYSQL_TYPE_VAR_STRING:	/* old <= 4.1 VARCHAR */
6991 	case MYSQL_TYPE_VARCHAR:	/* new >= 5.0.3 true VARCHAR */
6992 		if (field->binary()) {
6993 			return(DATA_BINARY);
6994 		} else if (field->charset() == &my_charset_latin1) {
6995 			return(DATA_VARCHAR);
6996 		} else {
6997 			return(DATA_VARMYSQL);
6998 		}
6999 	case MYSQL_TYPE_BIT:
7000 	case MYSQL_TYPE_STRING:
7001 		if (field->binary()) {
7002 			return(DATA_FIXBINARY);
7003 		} else if (field->charset() == &my_charset_latin1) {
7004 			return(DATA_CHAR);
7005 		} else {
7006 			return(DATA_MYSQL);
7007 		}
7008 	case MYSQL_TYPE_NEWDECIMAL:
7009 		return(DATA_FIXBINARY);
7010 	case MYSQL_TYPE_LONG:
7011 	case MYSQL_TYPE_LONGLONG:
7012 	case MYSQL_TYPE_TINY:
7013 	case MYSQL_TYPE_SHORT:
7014 	case MYSQL_TYPE_INT24:
7015 	case MYSQL_TYPE_DATE:
7016 	case MYSQL_TYPE_YEAR:
7017 	case MYSQL_TYPE_NEWDATE:
7018 		return(DATA_INT);
7019 	case MYSQL_TYPE_TIME:
7020 	case MYSQL_TYPE_DATETIME:
7021 	case MYSQL_TYPE_TIMESTAMP:
7022 		if (field->key_type() == HA_KEYTYPE_BINARY) {
7023 			return(DATA_FIXBINARY);
7024 		} else {
7025 			return(DATA_INT);
7026 		}
7027 	case MYSQL_TYPE_FLOAT:
7028 		return(DATA_FLOAT);
7029 	case MYSQL_TYPE_DOUBLE:
7030 		return(DATA_DOUBLE);
7031 	case MYSQL_TYPE_DECIMAL:
7032 		return(DATA_DECIMAL);
7033 	case MYSQL_TYPE_GEOMETRY:
7034 		return(DATA_GEOMETRY);
7035 	case MYSQL_TYPE_TINY_BLOB:
7036 	case MYSQL_TYPE_MEDIUM_BLOB:
7037 	case MYSQL_TYPE_BLOB:
7038 	case MYSQL_TYPE_LONG_BLOB:
7039 		return(DATA_BLOB);
7040 	case MYSQL_TYPE_NULL:
7041 		/* MySQL currently accepts "NULL" datatype, but will
7042 		reject such datatype in the next release. We will cope
7043 		with it and not trigger assertion failure in 5.1 */
7044 		break;
7045 	default:
7046 		ut_error;
7047 	}
7048 
7049 	return(0);
7050 }
7051 
7052 /*******************************************************************//**
7053 Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
7054 storage format.
7055 @return value */
7056 static inline
7057 uint
innobase_read_from_2_little_endian(const uchar * buf)7058 innobase_read_from_2_little_endian(
7059 /*===============================*/
7060 	const uchar*	buf)	/*!< in: from where to read */
7061 {
7062 	return((uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1]))));
7063 }
7064 
7065 #ifdef WITH_WSREP
7066 /*******************************************************************//**
7067 Stores a key value for a row to a buffer.
7068 @return	key value length as stored in buff */
7069 UNIV_INTERN
7070 uint
wsrep_store_key_val_for_row(THD * thd,TABLE * table,uint keynr,char * buff,uint buff_len,const uchar * record,ibool * key_is_null)7071 wsrep_store_key_val_for_row(
7072 /*=========================*/
7073 	THD* 		thd,
7074 	TABLE*		table,
7075 	uint		keynr,	/*!< in: key number */
7076 	char*		buff,	/*!< in/out: buffer for the key value (in MySQL
7077 				format) */
7078 	uint		buff_len,/*!< in: buffer length */
7079 	const uchar*	record,
7080 	ibool*          key_is_null)/*!< out: full key was null */
7081 {
7082 	KEY*		key_info	= table->key_info + keynr;
7083 	KEY_PART_INFO*	key_part	= key_info->key_part;
7084 	KEY_PART_INFO*	end		= key_part + key_info->user_defined_key_parts;
7085 	char*		buff_start	= buff;
7086 	enum_field_types mysql_type;
7087 	Field*		field;
7088 	uint buff_space = buff_len;
7089 
7090 	DBUG_ENTER("wsrep_store_key_val_for_row");
7091 
7092 	memset(buff, 0, buff_len);
7093 	*key_is_null = TRUE;
7094 
7095 	for (; key_part != end; key_part++) {
7096 
7097 		uchar sorted[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'};
7098 		ibool part_is_null = FALSE;
7099 
7100 		if (key_part->null_bit) {
7101 			if (buff_space > 0) {
7102 				if (record[key_part->null_offset]
7103 				    & key_part->null_bit) {
7104 					*buff = 1;
7105 					part_is_null = TRUE;
7106 				} else {
7107 					*buff = 0;
7108 				}
7109 				buff++;
7110 				buff_space--;
7111 			} else {
7112 				fprintf (stderr, "WSREP: key truncated: %s\n",
7113 					 wsrep_thd_query(thd));
7114 			}
7115 		}
7116 		if (!part_is_null)  *key_is_null = FALSE;
7117 
7118 		field = key_part->field;
7119 		mysql_type = field->type();
7120 
7121 		if (mysql_type == MYSQL_TYPE_VARCHAR) {
7122 						/* >= 5.0.3 true VARCHAR */
7123 			ulint		lenlen;
7124 			ulint		len;
7125 			const byte*	data;
7126 			ulint		key_len;
7127 			ulint		true_len;
7128 			const CHARSET_INFO* cs;
7129 			int		error=0;
7130 
7131 			key_len = key_part->length;
7132 
7133 			if (part_is_null) {
7134 				true_len = key_len + 2;
7135 				if (true_len > buff_space) {
7136 					fprintf (stderr,
7137 						 "WSREP: key truncated: %s\n",
7138 						 wsrep_thd_query(thd));
7139 					true_len = buff_space;
7140 				}
7141 				buff       += true_len;
7142 				buff_space -= true_len;
7143 				continue;
7144 			}
7145 			cs = field->charset();
7146 
7147 			lenlen = (ulint)
7148 				(((Field_varstring*)field)->length_bytes);
7149 
7150 			data = row_mysql_read_true_varchar(&len,
7151 				(byte*) (record
7152 				+ (ulint)get_field_offset(table, field)),
7153 				lenlen);
7154 
7155 			true_len = len;
7156 
7157 			/* For multi byte character sets we need to calculate
7158 			the true length of the key */
7159 
7160 			if (len > 0 && cs->mbmaxlen > 1) {
7161 				true_len = (ulint) my_well_formed_length(cs,
7162 						(const char *) data,
7163 						(const char *) data + len,
7164 						(uint) (key_len /
7165 						cs->mbmaxlen),
7166 						&error);
7167 			}
7168 
7169 			/* In a column prefix index, we may need to truncate
7170 			the stored value: */
7171 			if (true_len > key_len) {
7172 				true_len = key_len;
7173 			}
7174 			/* cannot exceed max column lenght either, we may need to truncate
7175 			the stored value: */
7176 			if (true_len > sizeof(sorted)) {
7177 			  true_len = sizeof(sorted);
7178 			}
7179 
7180 			memcpy(sorted, data, true_len);
7181 			true_len = wsrep_innobase_mysql_sort(
7182 				mysql_type, cs->number, sorted, true_len,
7183 				REC_VERSION_56_MAX_INDEX_COL_LEN);
7184 			if (wsrep_protocol_version > 1) {
7185 				/* Note that we always reserve the maximum possible
7186 				length of the true VARCHAR in the key value, though
7187 				only len first bytes after the 2 length bytes contain
7188 				actual data. The rest of the space was reset to zero
7189 				in the bzero() call above. */
7190 				if (true_len > buff_space) {
7191 					WSREP_DEBUG (
7192 						 "write set key truncated for: %s\n",
7193 						 wsrep_thd_query(thd));
7194 					true_len = buff_space;
7195 				}
7196  				memcpy(buff, sorted, true_len);
7197 				buff += true_len;
7198 				buff_space -= true_len;
7199 			} else {
7200 				buff += key_len;
7201 			}
7202 		} else if (mysql_type == MYSQL_TYPE_TINY_BLOB
7203 			|| mysql_type == MYSQL_TYPE_MEDIUM_BLOB
7204 			|| mysql_type == MYSQL_TYPE_BLOB
7205 			|| mysql_type == MYSQL_TYPE_LONG_BLOB
7206 			/* MYSQL_TYPE_GEOMETRY data is treated
7207 			as BLOB data in innodb. */
7208 			|| mysql_type == MYSQL_TYPE_GEOMETRY) {
7209 
7210 			const CHARSET_INFO* cs;
7211 			ulint		key_len;
7212 			ulint		true_len;
7213 			int		error=0;
7214 			ulint		blob_len;
7215 			const byte*	blob_data;
7216 
7217 			ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
7218 
7219 			key_len = key_part->length;
7220 
7221 			if (part_is_null) {
7222 				true_len = key_len + 2;
7223 				if (true_len > buff_space) {
7224 					fprintf (stderr,
7225 						 "WSREP: key truncated: %s\n",
7226 						 wsrep_thd_query(thd));
7227 					true_len = buff_space;
7228 				}
7229 				buff       += true_len;
7230 				buff_space -= true_len;
7231 
7232 				continue;
7233 			}
7234 
7235 			cs = field->charset();
7236 
7237 			blob_data = row_mysql_read_blob_ref(&blob_len,
7238 				(byte*) (record
7239 				+ (ulint)get_field_offset(table, field)),
7240 					(ulint) field->pack_length());
7241 
7242 			true_len = blob_len;
7243 
7244 			ut_a(get_field_offset(table, field)
7245 				== key_part->offset);
7246 
7247 			/* For multi byte character sets we need to calculate
7248 			the true length of the key */
7249 
7250 			if (blob_len > 0 && cs->mbmaxlen > 1) {
7251 				true_len = (ulint) my_well_formed_length(cs,
7252 						(const char *) blob_data,
7253 						(const char *) blob_data
7254 							+ blob_len,
7255 						(uint) (key_len /
7256 							cs->mbmaxlen),
7257 						&error);
7258 			}
7259 
7260 			/* All indexes on BLOB and TEXT are column prefix
7261 			indexes, and we may need to truncate the data to be
7262 			stored in the key value: */
7263 
7264 			if (true_len > key_len) {
7265 				true_len = key_len;
7266 			}
7267 
7268 			memcpy(sorted, blob_data, true_len);
7269 			true_len = wsrep_innobase_mysql_sort(
7270 				mysql_type, cs->number, sorted, true_len,
7271 				REC_VERSION_56_MAX_INDEX_COL_LEN);
7272 
7273 
7274 			/* Note that we always reserve the maximum possible
7275 			length of the BLOB prefix in the key value. */
7276 			if (wsrep_protocol_version > 1) {
7277 				if (true_len > buff_space) {
7278 					fprintf (stderr,
7279 						 "WSREP: key truncated: %s\n",
7280 						 wsrep_thd_query(thd));
7281 					true_len = buff_space;
7282 				}
7283 				buff       += true_len;
7284 				buff_space -= true_len;
7285 			} else {
7286 				buff += key_len;
7287 			}
7288 			memcpy(buff, sorted, true_len);
7289 		} else {
7290 			/* Here we handle all other data types except the
7291 			true VARCHAR, BLOB and TEXT. Note that the column
7292 			value we store may be also in a column prefix
7293 			index. */
7294 
7295 			const CHARSET_INFO*	cs = NULL;
7296 			ulint			true_len;
7297 			ulint			key_len;
7298 			const uchar*		src_start;
7299 			int			error=0;
7300 			enum_field_types	real_type;
7301 
7302 			key_len = key_part->length;
7303 
7304 			if (part_is_null) {
7305 				true_len = key_len;
7306 				if (true_len > buff_space) {
7307 					fprintf (stderr,
7308 						 "WSREP: key truncated: %s\n",
7309 						 wsrep_thd_query(thd));
7310 					true_len = buff_space;
7311 				}
7312 				buff       += true_len;
7313 				buff_space -= true_len;
7314 
7315 				continue;
7316 			}
7317 
7318 			src_start = record + key_part->offset;
7319 			real_type = field->real_type();
7320 			true_len = key_len;
7321 
7322 			/* Character set for the field is defined only
7323 			to fields whose type is string and real field
7324 			type is not enum or set. For these fields check
7325 			if character set is multi byte. */
7326 
7327 			if (real_type != MYSQL_TYPE_ENUM
7328 				&& real_type != MYSQL_TYPE_SET
7329 				&& ( mysql_type == MYSQL_TYPE_VAR_STRING
7330 					|| mysql_type == MYSQL_TYPE_STRING)) {
7331 
7332 				cs = field->charset();
7333 
7334 				/* For multi byte character sets we need to
7335 				calculate the true length of the key */
7336 
7337 				if (key_len > 0 && cs->mbmaxlen > 1) {
7338 
7339 					true_len = (ulint)
7340 						my_well_formed_length(cs,
7341 							(const char *)src_start,
7342 							(const char *)src_start
7343 								+ key_len,
7344 							(uint) (key_len /
7345 								cs->mbmaxlen),
7346 							&error);
7347 				}
7348 				memcpy(sorted, src_start, true_len);
7349 				true_len = wsrep_innobase_mysql_sort(
7350 					mysql_type, cs->number, sorted, true_len,
7351 					REC_VERSION_56_MAX_INDEX_COL_LEN);
7352 
7353 				if (true_len > buff_space) {
7354 					fprintf (stderr,
7355 						 "WSREP: key truncated: %s\n",
7356 						 wsrep_thd_query(thd));
7357 					true_len   = buff_space;
7358 				}
7359 				memcpy(buff, sorted, true_len);
7360 			} else {
7361 				memcpy(buff, src_start, true_len);
7362 			}
7363 			buff       += true_len;
7364 			buff_space -= true_len;
7365 		}
7366 	}
7367 
7368 	ut_a(buff <= buff_start + buff_len);
7369 
7370 	DBUG_RETURN((uint)(buff - buff_start));
7371 }
7372 #endif /* WITH_WSREP */
7373 /**************************************************************//**
7374 Determines if a field is needed in a m_prebuilt struct 'template'.
7375 @return field to use, or NULL if the field is not needed */
7376 static
7377 const Field*
build_template_needs_field(bool index_contains,bool read_just_key,bool fetch_all_in_key,bool fetch_primary_key_cols,dict_index_t * index,const TABLE * table,ulint i,ulint num_v)7378 build_template_needs_field(
7379 /*=======================*/
7380 	bool		index_contains,	/*!< in:
7381 					dict_index_t::contains_col_or_prefix(
7382 					i) */
7383 	bool		read_just_key,	/*!< in: TRUE when MySQL calls
7384 					ha_innobase::extra with the
7385 					argument HA_EXTRA_KEYREAD; it is enough
7386 					to read just columns defined in
7387 					the index (i.e., no read of the
7388 					clustered index record necessary) */
7389 	bool		fetch_all_in_key,
7390 					/*!< in: true=fetch all fields in
7391 					the index */
7392 	bool		fetch_primary_key_cols,
7393 					/*!< in: true=fetch the
7394 					primary key columns */
7395 	dict_index_t*	index,		/*!< in: InnoDB index to use */
7396 	const TABLE*	table,		/*!< in: MySQL table object */
7397 	ulint		i,		/*!< in: field index in InnoDB table */
7398 	ulint		num_v)		/*!< in: num virtual column so far */
7399 {
7400 	const Field*	field	= table->field[i];
7401 
7402 	if (!field->stored_in_db()
7403 	    && ha_innobase::omits_virtual_cols(*table->s)) {
7404 		return NULL;
7405 	}
7406 
7407 	if (!index_contains) {
7408 		if (read_just_key) {
7409 			/* If this is a 'key read', we do not need
7410 			columns that are not in the key */
7411 
7412 			return(NULL);
7413 		}
7414 	} else if (fetch_all_in_key) {
7415 		/* This field is needed in the query */
7416 
7417 		return(field);
7418 	}
7419 
7420 	if (bitmap_is_set(table->read_set, static_cast<uint>(i))
7421 	    || bitmap_is_set(table->write_set, static_cast<uint>(i))) {
7422 		/* This field is needed in the query */
7423 
7424 		return(field);
7425 	}
7426 
7427 	ut_ad(i >= num_v);
7428 	if (fetch_primary_key_cols
7429 	    && dict_table_col_in_clustered_key(index->table, i - num_v)) {
7430 		/* This field is needed in the query */
7431 		return(field);
7432 	}
7433 
7434 	/* This field is not needed in the query, skip it */
7435 
7436 	return(NULL);
7437 }
7438 
7439 /**************************************************************//**
7440 Determines if a field is needed in a m_prebuilt struct 'template'.
7441 @return whether the field is needed for index condition pushdown */
7442 inline
7443 bool
build_template_needs_field_in_icp(const dict_index_t * index,const row_prebuilt_t * prebuilt,bool contains,ulint i,bool is_virtual)7444 build_template_needs_field_in_icp(
7445 /*==============================*/
7446 	const dict_index_t*	index,	/*!< in: InnoDB index */
7447 	const row_prebuilt_t*	prebuilt,/*!< in: row fetch template */
7448 	bool			contains,/*!< in: whether the index contains
7449 					column i */
7450 	ulint			i,	/*!< in: column number */
7451 	bool			is_virtual)
7452 					/*!< in: a virtual column or not */
7453 {
7454 	ut_ad(contains == index->contains_col_or_prefix(i, is_virtual));
7455 
7456 	return(index == prebuilt->index
7457 	       ? contains
7458 	       : prebuilt->index->contains_col_or_prefix(i, is_virtual));
7459 }
7460 
7461 /**************************************************************//**
7462 Adds a field to a m_prebuilt struct 'template'.
7463 @return the field template */
7464 static
7465 mysql_row_templ_t*
build_template_field(row_prebuilt_t * prebuilt,dict_index_t * clust_index,dict_index_t * index,TABLE * table,const Field * field,ulint i,ulint v_no)7466 build_template_field(
7467 /*=================*/
7468 	row_prebuilt_t*	prebuilt,	/*!< in/out: template */
7469 	dict_index_t*	clust_index,	/*!< in: InnoDB clustered index */
7470 	dict_index_t*	index,		/*!< in: InnoDB index to use */
7471 	TABLE*		table,		/*!< in: MySQL table object */
7472 	const Field*	field,		/*!< in: field in MySQL table */
7473 	ulint		i,		/*!< in: field index in InnoDB table */
7474 	ulint		v_no)		/*!< in: field index for virtual col */
7475 {
7476 	mysql_row_templ_t*	templ;
7477 	const dict_col_t*	col;
7478 
7479 	ut_ad(clust_index->table == index->table);
7480 
7481 	templ = prebuilt->mysql_template + prebuilt->n_template++;
7482 	MEM_UNDEFINED(templ, sizeof *templ);
7483 	templ->rec_field_is_prefix = FALSE;
7484 	templ->rec_prefix_field_no = ULINT_UNDEFINED;
7485 	templ->is_virtual = !field->stored_in_db();
7486 
7487 	if (!templ->is_virtual) {
7488 		templ->col_no = i;
7489 		col = dict_table_get_nth_col(index->table, i);
7490 		templ->clust_rec_field_no = dict_col_get_clust_pos(
7491 						col, clust_index);
7492 		/* If clustered index record field is not found, lets print out
7493 		field names and all the rest to understand why field is not found. */
7494 		if (templ->clust_rec_field_no == ULINT_UNDEFINED) {
7495 			const char* tb_col_name = dict_table_get_col_name(clust_index->table, i);
7496 			dict_field_t* field=NULL;
7497 			size_t size = 0;
7498 
7499 			for(ulint j=0; j < clust_index->n_user_defined_cols; j++) {
7500 				dict_field_t* ifield = &(clust_index->fields[j]);
7501 				if (ifield && !memcmp(tb_col_name, ifield->name,
7502 						strlen(tb_col_name))) {
7503 					field = ifield;
7504 					break;
7505 				}
7506 			}
7507 
7508 			ib::info() << "Looking for field " << i << " name "
7509 				<< (tb_col_name ? tb_col_name : "NULL")
7510 				<< " from table " << clust_index->table->name;
7511 
7512 
7513 			for(ulint j=0; j < clust_index->n_user_defined_cols; j++) {
7514 				dict_field_t* ifield = &(clust_index->fields[j]);
7515 				ib::info() << "InnoDB Table "
7516 					<< clust_index->table->name
7517 					<< "field " << j << " name "
7518 					<< (ifield ? ifield->name() : "NULL");
7519 			}
7520 
7521 			for(ulint j=0; j < table->s->stored_fields; j++) {
7522 				ib::info() << "MySQL table "
7523 					<< table->s->table_name.str
7524 					<< " field " << j << " name "
7525 					<< table->field[j]->field_name.str;
7526 			}
7527 
7528 			ib::fatal() << "Clustered record field for column " << i
7529 				<< " not found table n_user_defined "
7530 				<< clust_index->n_user_defined_cols
7531 				<< " index n_user_defined "
7532 				<< clust_index->table->n_cols - DATA_N_SYS_COLS
7533 				<< " InnoDB table "
7534 				<< clust_index->table->name
7535 				<< " field name "
7536 				<< (field ? field->name() : "NULL")
7537 				<< " MySQL table "
7538 				<< table->s->table_name.str
7539 				<< " field name "
7540 				<< (tb_col_name ? tb_col_name : "NULL")
7541 				<< " n_fields "
7542 				<< table->s->stored_fields
7543 				<< " query "
7544 				<< innobase_get_stmt_unsafe(current_thd, &size);
7545 		}
7546 
7547 		if (dict_index_is_clust(index)) {
7548 			templ->rec_field_no = templ->clust_rec_field_no;
7549 		} else {
7550 			/* If we're in a secondary index, keep track
7551 			* of the original index position even if this
7552 			* is just a prefix index; we will use this
7553 			* later to avoid a cluster index lookup in
7554 			* some cases.*/
7555 
7556 			templ->rec_field_no = dict_index_get_nth_col_pos(index, i,
7557 						&templ->rec_prefix_field_no);
7558 		}
7559 	} else {
7560 		DBUG_ASSERT(!ha_innobase::omits_virtual_cols(*table->s));
7561 		col = &dict_table_get_nth_v_col(index->table, v_no)->m_col;
7562 		templ->clust_rec_field_no = v_no;
7563 
7564 		if (dict_index_is_clust(index)) {
7565 			templ->rec_field_no = templ->clust_rec_field_no;
7566 		} else {
7567 			templ->rec_field_no
7568 				= dict_index_get_nth_col_or_prefix_pos(
7569 					index, v_no, FALSE, true,
7570 					&templ->rec_prefix_field_no);
7571 		}
7572 		templ->icp_rec_field_no = ULINT_UNDEFINED;
7573 	}
7574 
7575 	if (field->real_maybe_null()) {
7576 		templ->mysql_null_byte_offset =
7577 			field->null_offset();
7578 
7579 		templ->mysql_null_bit_mask = (ulint) field->null_bit;
7580 	} else {
7581 		templ->mysql_null_bit_mask = 0;
7582 	}
7583 
7584 
7585 	templ->mysql_col_offset = (ulint) get_field_offset(table, field);
7586 	templ->mysql_col_len = (ulint) field->pack_length();
7587 	templ->type = col->mtype;
7588 	templ->mysql_type = (ulint) field->type();
7589 
7590 	if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
7591 		templ->mysql_length_bytes = (ulint)
7592 			(((Field_varstring*) field)->length_bytes);
7593 	} else {
7594 		templ->mysql_length_bytes = 0;
7595 	}
7596 
7597 	templ->charset = dtype_get_charset_coll(col->prtype);
7598 	templ->mbminlen = dict_col_get_mbminlen(col);
7599 	templ->mbmaxlen = dict_col_get_mbmaxlen(col);
7600 	templ->is_unsigned = col->prtype & DATA_UNSIGNED;
7601 
7602 	if (!dict_index_is_clust(index)
7603 	    && templ->rec_field_no == ULINT_UNDEFINED) {
7604 		prebuilt->need_to_access_clustered = TRUE;
7605 
7606 		if (templ->rec_prefix_field_no != ULINT_UNDEFINED) {
7607 			dict_field_t* field = dict_index_get_nth_field(
7608 						index,
7609 						templ->rec_prefix_field_no);
7610 			templ->rec_field_is_prefix = (field->prefix_len != 0);
7611 		}
7612 	}
7613 
7614 	/* For spatial index, we need to access cluster index. */
7615 	if (dict_index_is_spatial(index)) {
7616 		prebuilt->need_to_access_clustered = TRUE;
7617 	}
7618 
7619 	if (prebuilt->mysql_prefix_len < templ->mysql_col_offset
7620 	    + templ->mysql_col_len) {
7621 		prebuilt->mysql_prefix_len = templ->mysql_col_offset
7622 			+ templ->mysql_col_len;
7623 	}
7624 
7625 	if (DATA_LARGE_MTYPE(templ->type)) {
7626 		prebuilt->templ_contains_blob = TRUE;
7627 	}
7628 
7629 	return(templ);
7630 }
7631 
7632 /**************************************************************//**
7633 Builds a 'template' to the m_prebuilt struct. The template is used in fast
7634 retrieval of just those column values MySQL needs in its processing. */
7635 
7636 void
build_template(bool whole_row)7637 ha_innobase::build_template(
7638 /*========================*/
7639 	bool		whole_row)	/*!< in: true=ROW_MYSQL_WHOLE_ROW,
7640 					false=ROW_MYSQL_REC_FIELDS */
7641 {
7642 	dict_index_t*	index;
7643 	dict_index_t*	clust_index;
7644 	ibool		fetch_all_in_key	= FALSE;
7645 	ibool		fetch_primary_key_cols	= FALSE;
7646 
7647 	if (m_prebuilt->select_lock_type == LOCK_X || m_prebuilt->table->no_rollback()) {
7648 		/* We always retrieve the whole clustered index record if we
7649 		use exclusive row level locks, for example, if the read is
7650 		done in an UPDATE statement or if we are using a no rollback
7651                 table */
7652 
7653 		whole_row = true;
7654 	} else if (!whole_row) {
7655 		if (m_prebuilt->hint_need_to_fetch_extra_cols
7656 			== ROW_RETRIEVE_ALL_COLS) {
7657 
7658 			/* We know we must at least fetch all columns in the
7659 			key, or all columns in the table */
7660 
7661 			if (m_prebuilt->read_just_key) {
7662 				/* MySQL has instructed us that it is enough
7663 				to fetch the columns in the key; looks like
7664 				MySQL can set this flag also when there is
7665 				only a prefix of the column in the key: in
7666 				that case we retrieve the whole column from
7667 				the clustered index */
7668 
7669 				fetch_all_in_key = TRUE;
7670 			} else {
7671 				whole_row = true;
7672 			}
7673 		} else if (m_prebuilt->hint_need_to_fetch_extra_cols
7674 			== ROW_RETRIEVE_PRIMARY_KEY) {
7675 			/* We must at least fetch all primary key cols. Note
7676 			that if the clustered index was internally generated
7677 			by InnoDB on the row id (no primary key was
7678 			defined), then row_search_for_mysql() will always
7679 			retrieve the row id to a special buffer in the
7680 			m_prebuilt struct. */
7681 
7682 			fetch_primary_key_cols = TRUE;
7683 		}
7684 	}
7685 
7686 	clust_index = dict_table_get_first_index(m_prebuilt->table);
7687 
7688 	index = whole_row ? clust_index : m_prebuilt->index;
7689 
7690 	m_prebuilt->versioned_write = table->versioned_write(VERS_TRX_ID);
7691 	m_prebuilt->need_to_access_clustered = (index == clust_index);
7692 
7693 	/* Either m_prebuilt->index should be a secondary index, or it
7694 	should be the clustered index. */
7695 	ut_ad(dict_index_is_clust(index) == (index == clust_index));
7696 
7697 	/* Below we check column by column if we need to access
7698 	the clustered index. */
7699 
7700 	if (pushed_rowid_filter && rowid_filter_is_active) {
7701 		fetch_primary_key_cols = TRUE;
7702 		m_prebuilt->pk_filter = this;
7703 	} else {
7704 		m_prebuilt->pk_filter = NULL;
7705 	}
7706 
7707 	const bool skip_virtual = omits_virtual_cols(*table_share);
7708 	const ulint n_fields = table_share->fields;
7709 
7710 	if (!m_prebuilt->mysql_template) {
7711 		m_prebuilt->mysql_template = (mysql_row_templ_t*)
7712 			ut_malloc_nokey(n_fields * sizeof(mysql_row_templ_t));
7713 	}
7714 
7715 	m_prebuilt->template_type = whole_row
7716 		? ROW_MYSQL_WHOLE_ROW : ROW_MYSQL_REC_FIELDS;
7717 	m_prebuilt->null_bitmap_len = table->s->null_bytes;
7718 
7719 	/* Prepare to build m_prebuilt->mysql_template[]. */
7720 	m_prebuilt->templ_contains_blob = FALSE;
7721 	m_prebuilt->mysql_prefix_len = 0;
7722 	m_prebuilt->n_template = 0;
7723 	m_prebuilt->idx_cond_n_cols = 0;
7724 
7725 	/* Note that in InnoDB, i is the column number in the table.
7726 	MySQL calls columns 'fields'. */
7727 
7728 	ulint num_v = 0;
7729 
7730 	if ((active_index != MAX_KEY
7731 	     && active_index == pushed_idx_cond_keyno)
7732 	    || (pushed_rowid_filter && rowid_filter_is_active)) {
7733 		/* Push down an index condition or an end_range check. */
7734 		for (ulint i = 0; i < n_fields; i++) {
7735 			const Field* field = table->field[i];
7736 			const bool is_v = !field->stored_in_db();
7737 			if (is_v && skip_virtual) {
7738 				num_v++;
7739 				continue;
7740 			}
7741 			bool index_contains = index->contains_col_or_prefix(
7742 				is_v ? num_v : i - num_v, is_v);
7743 			if (is_v && index_contains) {
7744 				m_prebuilt->n_template = 0;
7745 				num_v = 0;
7746 				goto no_icp;
7747 			}
7748 
7749 			/* Test if an end_range or an index condition
7750 			refers to the field. Note that "index" and
7751 			"index_contains" may refer to the clustered index.
7752 			Index condition pushdown is relative to
7753 			m_prebuilt->index (the index that is being
7754 			looked up first). */
7755 
7756 			/* When join_read_always_key() invokes this
7757 			code via handler::ha_index_init() and
7758 			ha_innobase::index_init(), end_range is not
7759 			yet initialized. Because of that, we must
7760 			always check for index_contains, instead of
7761 			the subset
7762 			field->part_of_key.is_set(active_index)
7763 			which would be acceptable if end_range==NULL. */
7764 			if (build_template_needs_field_in_icp(
7765 				    index, m_prebuilt, index_contains,
7766 				    is_v ? num_v : i - num_v, is_v)) {
7767 				if (!whole_row) {
7768 					field = build_template_needs_field(
7769 						index_contains,
7770 						m_prebuilt->read_just_key,
7771 						fetch_all_in_key,
7772 						fetch_primary_key_cols,
7773 						index, table, i, num_v);
7774 					if (!field) {
7775 						if (is_v) {
7776 							num_v++;
7777 						}
7778 						continue;
7779 					}
7780 				}
7781 
7782 				ut_ad(!is_v);
7783 
7784 				mysql_row_templ_t* templ= build_template_field(
7785 					m_prebuilt, clust_index, index,
7786 					table, field, i - num_v, 0);
7787 
7788 				ut_ad(!templ->is_virtual);
7789 
7790 				m_prebuilt->idx_cond_n_cols++;
7791 				ut_ad(m_prebuilt->idx_cond_n_cols
7792 				      == m_prebuilt->n_template);
7793 
7794 				if (index == m_prebuilt->index) {
7795 					templ->icp_rec_field_no
7796 						= templ->rec_field_no;
7797 				} else {
7798 					templ->icp_rec_field_no
7799 						= dict_index_get_nth_col_pos(
7800 							m_prebuilt->index,
7801 							i - num_v,
7802 							&templ->rec_prefix_field_no);
7803 				}
7804 
7805 				if (dict_index_is_clust(m_prebuilt->index)) {
7806 					ut_ad(templ->icp_rec_field_no
7807 					      != ULINT_UNDEFINED);
7808 					/* If the primary key includes
7809 					a column prefix, use it in
7810 					index condition pushdown,
7811 					because the condition is
7812 					evaluated before fetching any
7813 					off-page (externally stored)
7814 					columns. */
7815 					if (templ->icp_rec_field_no
7816 					    < m_prebuilt->index->n_uniq) {
7817 						/* This is a key column;
7818 						all set. */
7819 						continue;
7820 					}
7821 				} else if (templ->icp_rec_field_no
7822 					   != ULINT_UNDEFINED) {
7823 					continue;
7824 				}
7825 
7826 				/* This is a column prefix index.
7827 				The column prefix can be used in
7828 				an end_range comparison. */
7829 
7830 				templ->icp_rec_field_no
7831 					= dict_index_get_nth_col_or_prefix_pos(
7832 						m_prebuilt->index, i - num_v,
7833 						true, false,
7834 						&templ->rec_prefix_field_no);
7835 				ut_ad(templ->icp_rec_field_no
7836 				      != ULINT_UNDEFINED);
7837 
7838 				/* Index condition pushdown can be used on
7839 				all columns of a secondary index, and on
7840 				the PRIMARY KEY columns. On the clustered
7841 				index, it must never be used on other than
7842 				PRIMARY KEY columns, because those columns
7843 				may be stored off-page, and we will not
7844 				fetch externally stored columns before
7845 				checking the index condition. */
7846 				/* TODO: test the above with an assertion
7847 				like this. Note that index conditions are
7848 				currently pushed down as part of the
7849 				"optimizer phase" while end_range is done
7850 				as part of the execution phase. Therefore,
7851 				we were unable to use an accurate condition
7852 				for end_range in the "if" condition above,
7853 				and the following assertion would fail.
7854 				ut_ad(!dict_index_is_clust(m_prebuilt->index)
7855 				      || templ->rec_field_no
7856 				      < m_prebuilt->index->n_uniq);
7857 				*/
7858 			}
7859 
7860 			if (is_v) {
7861 				num_v++;
7862 			}
7863 		}
7864 
7865 		ut_ad(m_prebuilt->idx_cond_n_cols > 0);
7866 		ut_ad(m_prebuilt->idx_cond_n_cols == m_prebuilt->n_template);
7867 
7868 		num_v = 0;
7869 
7870 		/* Include the fields that are not needed in index condition
7871 		pushdown. */
7872 		for (ulint i = 0; i < n_fields; i++) {
7873 			const Field*		field = table->field[i];
7874 			const bool is_v = !field->stored_in_db();
7875 			if (is_v && skip_virtual) {
7876 				num_v++;
7877 				continue;
7878 			}
7879 
7880 			bool index_contains = index->contains_col_or_prefix(
7881 				is_v ? num_v : i - num_v, is_v);
7882 
7883 			if (!build_template_needs_field_in_icp(
7884 				    index, m_prebuilt, index_contains,
7885 				    is_v ? num_v : i - num_v, is_v)) {
7886 				/* Not needed in ICP */
7887 				if (!whole_row) {
7888 					field = build_template_needs_field(
7889 						index_contains,
7890 						m_prebuilt->read_just_key,
7891 						fetch_all_in_key,
7892 						fetch_primary_key_cols,
7893 						index, table, i, num_v);
7894 					if (!field) {
7895 						if (is_v) {
7896 							num_v++;
7897 						}
7898 						continue;
7899 					}
7900 				}
7901 
7902 				ut_d(mysql_row_templ_t*	templ =)
7903 				build_template_field(
7904 					m_prebuilt, clust_index, index,
7905 					table, field, i - num_v, num_v);
7906 				ut_ad(templ->is_virtual == (ulint)is_v);
7907 
7908 				if (is_v) {
7909 					num_v++;
7910 				}
7911 			}
7912 		}
7913 		if (active_index == pushed_idx_cond_keyno) {
7914 			m_prebuilt->idx_cond = this;
7915 		}
7916 	} else {
7917 no_icp:
7918 		/* No index condition pushdown */
7919 		m_prebuilt->idx_cond = NULL;
7920 		ut_ad(num_v == 0);
7921 
7922 		for (ulint i = 0; i < n_fields; i++) {
7923 			const Field*	field = table->field[i];
7924 			const bool is_v = !field->stored_in_db();
7925 
7926 			if (whole_row) {
7927 				if (is_v && skip_virtual) {
7928 					num_v++;
7929 					continue;
7930 				}
7931 				/* Even this is whole_row, if the seach is
7932 				on a virtual column, and read_just_key is
7933 				set, and field is not in this index, we
7934 				will not try to fill the value since they
7935 				are not stored in such index nor in the
7936 				cluster index. */
7937 				if (is_v
7938 				    && m_prebuilt->read_just_key
7939 				    && !m_prebuilt->index->contains_col_or_prefix(
7940 					num_v, true))
7941 				{
7942 					/* Turn off ROW_MYSQL_WHOLE_ROW */
7943 					m_prebuilt->template_type =
7944 						 ROW_MYSQL_REC_FIELDS;
7945 					num_v++;
7946 					continue;
7947 				}
7948 			} else {
7949 				if (is_v
7950 				    && (skip_virtual || index->is_primary())) {
7951 					num_v++;
7952 					continue;
7953 				}
7954 
7955 				bool contain = index->contains_col_or_prefix(
7956 					is_v ? num_v: i - num_v, is_v);
7957 
7958 				field = build_template_needs_field(
7959 					contain,
7960 					m_prebuilt->read_just_key,
7961 					fetch_all_in_key,
7962 					fetch_primary_key_cols,
7963 					index, table, i, num_v);
7964 				if (!field) {
7965 					if (is_v) {
7966 						num_v++;
7967 					}
7968 					continue;
7969 				}
7970 			}
7971 
7972 			ut_d(mysql_row_templ_t* templ =)
7973 			build_template_field(
7974 				m_prebuilt, clust_index, index,
7975 				table, field, i - num_v, num_v);
7976 			ut_ad(templ->is_virtual == (ulint)is_v);
7977 			if (is_v) {
7978 				num_v++;
7979 			}
7980 		}
7981 	}
7982 
7983 	if (index != clust_index && m_prebuilt->need_to_access_clustered) {
7984 		/* Change rec_field_no's to correspond to the clustered index
7985 		record */
7986 		for (ulint i = 0; i < m_prebuilt->n_template; i++) {
7987 			mysql_row_templ_t*	templ
7988 				= &m_prebuilt->mysql_template[i];
7989 
7990 			templ->rec_field_no = templ->clust_rec_field_no;
7991 		}
7992 	}
7993 }
7994 
7995 /********************************************************************//**
7996 This special handling is really to overcome the limitations of MySQL's
7997 binlogging. We need to eliminate the non-determinism that will arise in
7998 INSERT ... SELECT type of statements, since MySQL binlog only stores the
7999 min value of the autoinc interval. Once that is fixed we can get rid of
8000 the special lock handling.
8001 @return DB_SUCCESS if all OK else error code */
8002 
8003 dberr_t
innobase_lock_autoinc(void)8004 ha_innobase::innobase_lock_autoinc(void)
8005 /*====================================*/
8006 {
8007 	DBUG_ENTER("ha_innobase::innobase_lock_autoinc");
8008 	dberr_t		error = DB_SUCCESS;
8009 
8010 	ut_ad(!srv_read_only_mode);
8011 
8012 	switch (innobase_autoinc_lock_mode) {
8013 	case AUTOINC_NO_LOCKING:
8014 		/* Acquire only the AUTOINC mutex. */
8015 		m_prebuilt->table->autoinc_mutex.lock();
8016 		break;
8017 
8018 	case AUTOINC_NEW_STYLE_LOCKING:
8019 		/* For simple (single/multi) row INSERTs/REPLACEs and RBR
8020 		events, we fallback to the old style only if another
8021 		transaction has already acquired the AUTOINC lock on
8022 		behalf of a LOAD FILE or INSERT ... SELECT etc. type of
8023 		statement. */
8024 		switch (thd_sql_command(m_user_thd)) {
8025 		case SQLCOM_INSERT:
8026 		case SQLCOM_REPLACE:
8027 		case SQLCOM_END: // RBR event
8028 			/* Acquire the AUTOINC mutex. */
8029 			m_prebuilt->table->autoinc_mutex.lock();
8030 			/* We need to check that another transaction isn't
8031 			already holding the AUTOINC lock on the table. */
8032 			if (!m_prebuilt->table->n_waiting_or_granted_auto_inc_locks) {
8033 				/* Do not fall back to old style locking. */
8034 				DBUG_RETURN(error);
8035 			}
8036 			m_prebuilt->table->autoinc_mutex.unlock();
8037 		}
8038 		/* Use old style locking. */
8039 		/* fall through */
8040 	case AUTOINC_OLD_STYLE_LOCKING:
8041 		DBUG_EXECUTE_IF("die_if_autoinc_old_lock_style_used",
8042 				ut_ad(0););
8043 		error = row_lock_table_autoinc_for_mysql(m_prebuilt);
8044 
8045 		if (error == DB_SUCCESS) {
8046 
8047 			/* Acquire the AUTOINC mutex. */
8048 			m_prebuilt->table->autoinc_mutex.lock();
8049 		}
8050 		break;
8051 
8052 	default:
8053 		ut_error;
8054 	}
8055 
8056 	DBUG_RETURN(error);
8057 }
8058 
8059 /********************************************************************//**
8060 Store the autoinc value in the table. The autoinc value is only set if
8061 it's greater than the existing autoinc value in the table.
8062 @return DB_SUCCESS if all went well else error code */
8063 
8064 dberr_t
innobase_set_max_autoinc(ulonglong auto_inc)8065 ha_innobase::innobase_set_max_autoinc(
8066 /*==================================*/
8067 	ulonglong	auto_inc)	/*!< in: value to store */
8068 {
8069 	dberr_t		error;
8070 
8071 	error = innobase_lock_autoinc();
8072 
8073 	if (error == DB_SUCCESS) {
8074 
8075 		dict_table_autoinc_update_if_greater(m_prebuilt->table, auto_inc);
8076 		m_prebuilt->table->autoinc_mutex.unlock();
8077 	}
8078 
8079 	return(error);
8080 }
8081 
8082 /********************************************************************//**
8083 Stores a row in an InnoDB database, to the table specified in this
8084 handle.
8085 @return error code */
8086 
8087 int
write_row(const uchar * record)8088 ha_innobase::write_row(
8089 /*===================*/
8090 	const uchar*	record)	/*!< in: a row in MySQL format */
8091 {
8092 	dberr_t		error;
8093 #ifdef WITH_WSREP
8094 	bool		wsrep_auto_inc_inserted= false;
8095 #endif
8096 	int		error_result = 0;
8097 	bool		auto_inc_used = false;
8098 
8099 	DBUG_ENTER("ha_innobase::write_row");
8100 
8101 	trx_t*		trx = thd_to_trx(m_user_thd);
8102 
8103 	/* Validation checks before we commence write_row operation. */
8104 	if (high_level_read_only) {
8105 		ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
8106 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
8107 	}
8108 
8109 	ut_a(m_prebuilt->trx == trx);
8110 
8111 	if (!trx_is_started(trx)) {
8112 		trx->will_lock = true;
8113 	}
8114 
8115 	ins_mode_t	vers_set_fields;
8116 	/* Handling of Auto-Increment Columns. */
8117 	if (table->next_number_field && record == table->record[0]) {
8118 
8119 		/* Reset the error code before calling
8120 		innobase_get_auto_increment(). */
8121 		m_prebuilt->autoinc_error = DB_SUCCESS;
8122 
8123 #ifdef WITH_WSREP
8124 		wsrep_auto_inc_inserted = trx->is_wsrep()
8125 			&& wsrep_drupal_282555_workaround
8126 			&& table->next_number_field->val_int() == 0;
8127 #endif
8128 
8129 		if ((error_result = update_auto_increment())) {
8130 			/* We don't want to mask autoinc overflow errors. */
8131 
8132 			/* Handle the case where the AUTOINC sub-system
8133 			failed during initialization. */
8134 			if (m_prebuilt->autoinc_error == DB_UNSUPPORTED) {
8135 				error_result = ER_AUTOINC_READ_FAILED;
8136 				/* Set the error message to report too. */
8137 				my_error(ER_AUTOINC_READ_FAILED, MYF(0));
8138 				goto func_exit;
8139 			} else if (m_prebuilt->autoinc_error != DB_SUCCESS) {
8140 				error = m_prebuilt->autoinc_error;
8141 				goto report_error;
8142 			}
8143 
8144 			/* MySQL errors are passed straight back. */
8145 			goto func_exit;
8146 		}
8147 
8148 		auto_inc_used = true;
8149 	}
8150 
8151 	/* Prepare INSERT graph that will be executed for actual INSERT
8152 	(This is a one time operation) */
8153 	if (m_prebuilt->mysql_template == NULL
8154 	    || m_prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
8155 
8156 		/* Build the template used in converting quickly between
8157 		the two database formats */
8158 
8159 		build_template(true);
8160 	}
8161 
8162 	innobase_srv_conc_enter_innodb(m_prebuilt);
8163 
8164 	vers_set_fields = table->versioned_write(VERS_TRX_ID) ?
8165 		ROW_INS_VERSIONED : ROW_INS_NORMAL;
8166 
8167 	/* Execute insert graph that will result in actual insert. */
8168 	error = row_insert_for_mysql((byte*) record, m_prebuilt, vers_set_fields);
8169 
8170 	DEBUG_SYNC(m_user_thd, "ib_after_row_insert");
8171 
8172 	/* Handling of errors related to auto-increment. */
8173 	if (auto_inc_used) {
8174 		ulonglong	auto_inc;
8175 
8176 		/* Note the number of rows processed for this statement, used
8177 		by get_auto_increment() to determine the number of AUTO-INC
8178 		values to reserve. This is only useful for a mult-value INSERT
8179 		and is a statement level counter. */
8180 		if (trx->n_autoinc_rows > 0) {
8181 			--trx->n_autoinc_rows;
8182 		}
8183 
8184 		/* Get the value that MySQL attempted to store in the table.*/
8185 		auto_inc = table->next_number_field->val_uint();
8186 
8187 		switch (error) {
8188 		case DB_DUPLICATE_KEY:
8189 
8190 			/* A REPLACE command and LOAD DATA INFILE REPLACE
8191 			handle a duplicate key error themselves, but we
8192 			must update the autoinc counter if we are performing
8193 			those statements. */
8194 
8195 			switch (thd_sql_command(m_user_thd)) {
8196 			case SQLCOM_LOAD:
8197 				if (!trx->duplicates) {
8198 					break;
8199 				}
8200 
8201 			case SQLCOM_REPLACE:
8202 			case SQLCOM_INSERT_SELECT:
8203 			case SQLCOM_REPLACE_SELECT:
8204 				goto set_max_autoinc;
8205 
8206 #ifdef WITH_WSREP
8207 			/* workaround for LP bug #355000, retrying the insert */
8208 			case SQLCOM_INSERT:
8209 
8210 				WSREP_DEBUG("DUPKEY error for autoinc\n"
8211 				      "THD %ld, value %llu, off %llu inc %llu",
8212 				      thd_get_thread_id(m_user_thd),
8213 				      auto_inc,
8214 				      m_prebuilt->autoinc_offset,
8215 				      m_prebuilt->autoinc_increment);
8216 
8217                                if (wsrep_auto_inc_inserted &&
8218                                    wsrep_thd_retry_counter(m_user_thd) == 0  &&
8219 				    !thd_test_options(m_user_thd,
8220 						      OPTION_NOT_AUTOCOMMIT |
8221 						      OPTION_BEGIN)) {
8222 					WSREP_DEBUG(
8223 					    "retrying insert: %s",
8224 					    wsrep_thd_query(m_user_thd));
8225 					error= DB_SUCCESS;
8226 					wsrep_thd_self_abort(m_user_thd);
8227                                         innobase_srv_conc_exit_innodb(
8228 						m_prebuilt);
8229                                         /* jump straight to func exit over
8230                                          * later wsrep hooks */
8231                                         goto func_exit;
8232 				}
8233                                 break;
8234 #endif /* WITH_WSREP */
8235 
8236 			default:
8237 				break;
8238 			}
8239 
8240 			break;
8241 
8242 		case DB_SUCCESS:
8243 			/* If the actual value inserted is greater than
8244 			the upper limit of the interval, then we try and
8245 			update the table upper limit. Note: last_value
8246 			will be 0 if get_auto_increment() was not called. */
8247 
8248 			if (auto_inc >= m_prebuilt->autoinc_last_value) {
8249 set_max_autoinc:
8250 				/* We need the upper limit of the col type to check for
8251 				whether we update the table autoinc counter or not. */
8252 				ulonglong	col_max_value =
8253 					table->next_number_field->get_max_int_value();
8254 
8255 				/* This should filter out the negative
8256 				values set explicitly by the user. */
8257 				if (auto_inc <= col_max_value) {
8258 					ut_ad(m_prebuilt->autoinc_increment > 0);
8259 
8260 					ulonglong	offset;
8261 					ulonglong	increment;
8262 					dberr_t		err;
8263 
8264 					offset = m_prebuilt->autoinc_offset;
8265 					increment = m_prebuilt->autoinc_increment;
8266 
8267 					auto_inc = innobase_next_autoinc(
8268 						auto_inc, 1, increment, offset,
8269 						col_max_value);
8270 
8271 					err = innobase_set_max_autoinc(
8272 						auto_inc);
8273 
8274 					if (err != DB_SUCCESS) {
8275 						error = err;
8276 					}
8277 				}
8278 			}
8279 			break;
8280 		default:
8281 			break;
8282 		}
8283 	}
8284 
8285 	innobase_srv_conc_exit_innodb(m_prebuilt);
8286 
8287 report_error:
8288 	/* Cleanup and exit. */
8289 	if (error == DB_TABLESPACE_DELETED) {
8290 		ib_senderrf(
8291 			trx->mysql_thd, IB_LOG_LEVEL_ERROR,
8292 			ER_TABLESPACE_DISCARDED,
8293 			table->s->table_name.str);
8294 	}
8295 
8296 	error_result = convert_error_code_to_mysql(
8297 		error, m_prebuilt->table->flags, m_user_thd);
8298 
8299 #ifdef WITH_WSREP
8300 	if (!error_result && trx->is_wsrep()
8301 	    && wsrep_thd_is_local(m_user_thd)
8302 	    && !wsrep_thd_ignore_table(m_user_thd)
8303 	    && !wsrep_consistency_check(m_user_thd)
8304 	    && (thd_sql_command(m_user_thd) != SQLCOM_CREATE_TABLE)
8305 	    && (thd_sql_command(m_user_thd) != SQLCOM_LOAD ||
8306 	        thd_binlog_format(m_user_thd) == BINLOG_FORMAT_ROW)) {
8307 		if (wsrep_append_keys(m_user_thd, WSREP_SERVICE_KEY_EXCLUSIVE,
8308 				      record,
8309 				      NULL)) {
8310 			DBUG_PRINT("wsrep", ("row key failed"));
8311 			error_result = HA_ERR_INTERNAL_ERROR;
8312 			goto func_exit;
8313 		}
8314 	}
8315 #endif /* WITH_WSREP */
8316 
8317 	if (error_result == HA_FTS_INVALID_DOCID) {
8318 		my_error(HA_FTS_INVALID_DOCID, MYF(0));
8319 	}
8320 
8321 func_exit:
8322 	innobase_active_small();
8323 
8324 	DBUG_RETURN(error_result);
8325 }
8326 
8327 /** Fill the update vector's "old_vrow" field for those non-updated,
8328 but indexed columns. Such columns could stil present in the virtual
8329 index rec fields even if they are not updated (some other fields updated),
8330 so needs to be logged.
8331 @param[in]	prebuilt		InnoDB prebuilt struct
8332 @param[in,out]	vfield			field to filled
8333 @param[in]	o_len			actual column length
8334 @param[in,out]	col			column to be filled
8335 @param[in]	old_mysql_row_col	MySQL old field ptr
8336 @param[in]	col_pack_len		MySQL field col length
8337 @param[in,out]	buf			buffer for a converted integer value
8338 @return used buffer ptr from row_mysql_store_col_in_innobase_format() */
8339 static
8340 byte*
innodb_fill_old_vcol_val(row_prebuilt_t * prebuilt,dfield_t * vfield,ulint o_len,dict_col_t * col,const byte * old_mysql_row_col,ulint col_pack_len,byte * buf)8341 innodb_fill_old_vcol_val(
8342 	row_prebuilt_t*	prebuilt,
8343 	dfield_t*	vfield,
8344 	ulint		o_len,
8345 	dict_col_t*	col,
8346 	const byte*	old_mysql_row_col,
8347 	ulint		col_pack_len,
8348 	byte*		buf)
8349 {
8350 	dict_col_copy_type(
8351 		col, dfield_get_type(vfield));
8352 	if (o_len != UNIV_SQL_NULL) {
8353 
8354 		buf = row_mysql_store_col_in_innobase_format(
8355 			vfield,
8356 			buf,
8357 			TRUE,
8358 			old_mysql_row_col,
8359 			col_pack_len,
8360 			dict_table_is_comp(prebuilt->table));
8361 	} else {
8362 		dfield_set_null(vfield);
8363 	}
8364 
8365 	return(buf);
8366 }
8367 
8368 /** Calculate an update vector corresponding to the changes
8369 between old_row and new_row.
8370 @param[out]	uvect		update vector
8371 @param[in]	old_row		current row in MySQL format
8372 @param[in]	new_row		intended updated row in MySQL format
8373 @param[in]	table		MySQL table handle
8374 @param[in,out]	upd_buff	buffer to use for converted values
8375 @param[in]	buff_len	length of upd_buff
8376 @param[in,out]	prebuilt	InnoDB execution context
8377 @param[out]	auto_inc	updated AUTO_INCREMENT value, or 0 if none
8378 @return DB_SUCCESS or error code */
8379 static
8380 dberr_t
calc_row_difference(upd_t * uvect,const uchar * old_row,const uchar * new_row,TABLE * table,uchar * upd_buff,ulint buff_len,row_prebuilt_t * prebuilt,ib_uint64_t & auto_inc)8381 calc_row_difference(
8382 	upd_t*		uvect,
8383 	const uchar*	old_row,
8384 	const uchar*	new_row,
8385 	TABLE*		table,
8386 	uchar*		upd_buff,
8387 	ulint		buff_len,
8388 	row_prebuilt_t*	prebuilt,
8389 	ib_uint64_t&	auto_inc)
8390 {
8391 	uchar*		original_upd_buff = upd_buff;
8392 	Field*		field;
8393 	enum_field_types field_mysql_type;
8394 	ulint		o_len;
8395 	ulint		n_len;
8396 	ulint		col_pack_len;
8397 	const byte*	new_mysql_row_col;
8398 	const byte*	old_mysql_row_col;
8399 	const byte*	o_ptr;
8400 	const byte*	n_ptr;
8401 	byte*		buf;
8402 	upd_field_t*	ufield;
8403 	ulint		col_type;
8404 	ulint		n_changed = 0;
8405 	dfield_t	dfield;
8406 	dict_index_t*	clust_index;
8407 	ibool		changes_fts_column = FALSE;
8408 	ibool		changes_fts_doc_col = FALSE;
8409 	trx_t* const	trx = prebuilt->trx;
8410 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
8411 	ulint		num_v = 0;
8412 	const bool skip_virtual = ha_innobase::omits_virtual_cols(*table->s);
8413 
8414 	ut_ad(!srv_read_only_mode);
8415 
8416 	clust_index = dict_table_get_first_index(prebuilt->table);
8417 	auto_inc = 0;
8418 
8419 	/* We use upd_buff to convert changed fields */
8420 	buf = (byte*) upd_buff;
8421 
8422 	for (uint i = 0; i < table->s->fields; i++) {
8423 		field = table->field[i];
8424 		const bool is_virtual = !field->stored_in_db();
8425 		if (is_virtual && skip_virtual) {
8426 			num_v++;
8427 			continue;
8428 		}
8429 		dict_col_t* col = is_virtual
8430 			? &prebuilt->table->v_cols[num_v].m_col
8431 			: &prebuilt->table->cols[i - num_v];
8432 
8433 		o_ptr = (const byte*) old_row + get_field_offset(table, field);
8434 		n_ptr = (const byte*) new_row + get_field_offset(table, field);
8435 
8436 		/* Use new_mysql_row_col and col_pack_len save the values */
8437 
8438 		new_mysql_row_col = n_ptr;
8439 		old_mysql_row_col = o_ptr;
8440 		col_pack_len = field->pack_length();
8441 
8442 		o_len = col_pack_len;
8443 		n_len = col_pack_len;
8444 
8445 		/* We use o_ptr and n_ptr to dig up the actual data for
8446 		comparison. */
8447 
8448 		field_mysql_type = field->type();
8449 
8450 		col_type = col->mtype;
8451 
8452 		switch (col_type) {
8453 
8454 		case DATA_BLOB:
8455 		case DATA_GEOMETRY:
8456 			o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
8457 			n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
8458 
8459 			break;
8460 
8461 		case DATA_VARCHAR:
8462 		case DATA_BINARY:
8463 		case DATA_VARMYSQL:
8464 			if (field_mysql_type == MYSQL_TYPE_VARCHAR) {
8465 				/* This is a >= 5.0.3 type true VARCHAR where
8466 				the real payload data length is stored in
8467 				1 or 2 bytes */
8468 
8469 				o_ptr = row_mysql_read_true_varchar(
8470 					&o_len, o_ptr,
8471 					(ulint)
8472 					(((Field_varstring*) field)->length_bytes));
8473 
8474 				n_ptr = row_mysql_read_true_varchar(
8475 					&n_len, n_ptr,
8476 					(ulint)
8477 					(((Field_varstring*) field)->length_bytes));
8478 			}
8479 
8480 			break;
8481 		default:
8482 			;
8483 		}
8484 
8485 		if (field_mysql_type == MYSQL_TYPE_LONGLONG
8486 		    && prebuilt->table->fts
8487 		    && innobase_strcasecmp(
8488 			field->field_name.str, FTS_DOC_ID_COL_NAME) == 0) {
8489 			doc_id = mach_read_uint64_little_endian(n_ptr);
8490 			if (doc_id == 0) {
8491 				return(DB_FTS_INVALID_DOCID);
8492 			}
8493 		}
8494 
8495 		if (field->real_maybe_null()) {
8496 			if (field->is_null_in_record(old_row)) {
8497 				o_len = UNIV_SQL_NULL;
8498 			}
8499 
8500 			if (field->is_null_in_record(new_row)) {
8501 				n_len = UNIV_SQL_NULL;
8502 			}
8503 		}
8504 
8505 #ifdef UNIV_DEBUG
8506 		bool	online_ord_part = false;
8507 #endif
8508 
8509 		if (is_virtual) {
8510 			/* If the virtual column is not indexed,
8511 			we shall ignore it for update */
8512 			if (!col->ord_part) {
8513 				/* Check whether there is a table-rebuilding
8514 				online ALTER TABLE in progress, and this
8515 				virtual column could be newly indexed, thus
8516 				it will be materialized. Then we will have
8517 				to log its update.
8518 				Note, we do not support online dropping virtual
8519 				column while adding new index, nor with
8520 				online alter column order while adding index,
8521 				so the virtual column sequence must not change
8522 				if it is online operation */
8523 				if (dict_index_is_online_ddl(clust_index)
8524 				    && row_log_col_is_indexed(clust_index,
8525 							      num_v)) {
8526 #ifdef UNIV_DEBUG
8527 					online_ord_part = true;
8528 #endif
8529 				} else {
8530 					num_v++;
8531 					continue;
8532 				}
8533 			}
8534 
8535 			if (!uvect->old_vrow) {
8536 				uvect->old_vrow = dtuple_create_with_vcol(
8537 					uvect->heap, 0, prebuilt->table->n_v_cols);
8538 			}
8539 
8540 			ulint   max_field_len = DICT_MAX_FIELD_LEN_BY_FORMAT(
8541 						prebuilt->table);
8542 
8543 			/* for virtual columns, we only materialize
8544 			its index, and index field length would not
8545 			exceed max_field_len. So continue if the
8546 			first max_field_len bytes are matched up */
8547 			if (o_len != UNIV_SQL_NULL
8548 			   && n_len != UNIV_SQL_NULL
8549 			   && o_len >= max_field_len
8550 			   && n_len >= max_field_len
8551 			   && memcmp(o_ptr, n_ptr, max_field_len) == 0) {
8552 				dfield_t*	vfield = dtuple_get_nth_v_field(
8553 					uvect->old_vrow, num_v);
8554 				buf = innodb_fill_old_vcol_val(
8555 					prebuilt, vfield, o_len,
8556 					col, old_mysql_row_col,
8557 					col_pack_len, buf);
8558 			       num_v++;
8559 			       continue;
8560 			}
8561 		}
8562 
8563 		if (o_len != n_len || (o_len != 0 && o_len != UNIV_SQL_NULL
8564 				       && 0 != memcmp(o_ptr, n_ptr, o_len))) {
8565 			/* The field has changed */
8566 
8567 			ufield = uvect->fields + n_changed;
8568 			MEM_UNDEFINED(ufield, sizeof *ufield);
8569 
8570 			/* Let us use a dummy dfield to make the conversion
8571 			from the MySQL column format to the InnoDB format */
8572 
8573 
8574 			/* If the length of new geometry object is 0, means
8575 			this object is invalid geometry object, we need
8576 			to block it. */
8577 			if (DATA_GEOMETRY_MTYPE(col_type)
8578 			    && o_len != 0 && n_len == 0) {
8579 				return(DB_CANT_CREATE_GEOMETRY_OBJECT);
8580 			}
8581 
8582 			if (n_len != UNIV_SQL_NULL) {
8583 				dict_col_copy_type(
8584 					col, dfield_get_type(&dfield));
8585 
8586 				buf = row_mysql_store_col_in_innobase_format(
8587 					&dfield,
8588 					(byte*) buf,
8589 					TRUE,
8590 					new_mysql_row_col,
8591 					col_pack_len,
8592 					dict_table_is_comp(prebuilt->table));
8593 				dfield_copy(&ufield->new_val, &dfield);
8594 			} else {
8595 				dict_col_copy_type(
8596 					col, dfield_get_type(&ufield->new_val));
8597 				dfield_set_null(&ufield->new_val);
8598 			}
8599 
8600 			ufield->exp = NULL;
8601 			ufield->orig_len = 0;
8602 			if (is_virtual) {
8603 				dfield_t*	vfield = dtuple_get_nth_v_field(
8604 					uvect->old_vrow, num_v);
8605 				upd_fld_set_virtual_col(ufield);
8606 				ufield->field_no = num_v;
8607 
8608 				ut_ad(col->ord_part || online_ord_part);
8609 				ufield->old_v_val = static_cast<dfield_t*>(
8610 					mem_heap_alloc(
8611 						uvect->heap,
8612 						sizeof *ufield->old_v_val));
8613 
8614 				if (!field->is_null_in_record(old_row)) {
8615 					if (n_len == UNIV_SQL_NULL) {
8616 						dict_col_copy_type(
8617 							col, dfield_get_type(
8618 								&dfield));
8619 					}
8620 
8621 					buf = row_mysql_store_col_in_innobase_format(
8622 						&dfield,
8623 						(byte*) buf,
8624 						TRUE,
8625 						old_mysql_row_col,
8626 						col_pack_len,
8627 						dict_table_is_comp(
8628 						prebuilt->table));
8629 					dfield_copy(ufield->old_v_val,
8630 						    &dfield);
8631 					dfield_copy(vfield, &dfield);
8632 				} else {
8633 					dict_col_copy_type(
8634 						col, dfield_get_type(
8635 						ufield->old_v_val));
8636 					dfield_set_null(ufield->old_v_val);
8637 					dfield_set_null(vfield);
8638 				}
8639 				num_v++;
8640 				ut_ad(field != table->found_next_number_field);
8641 			} else {
8642 				ufield->field_no = dict_col_get_clust_pos(
8643 					&prebuilt->table->cols[i - num_v],
8644 					clust_index);
8645 				ufield->old_v_val = NULL;
8646 				if (field != table->found_next_number_field
8647 				    || dfield_is_null(&ufield->new_val)) {
8648 				} else {
8649 					auto_inc = field->val_uint();
8650 				}
8651 			}
8652 			n_changed++;
8653 
8654 			/* If an FTS indexed column was changed by this
8655 			UPDATE then we need to inform the FTS sub-system.
8656 
8657 			NOTE: Currently we re-index all FTS indexed columns
8658 			even if only a subset of the FTS indexed columns
8659 			have been updated. That is the reason we are
8660 			checking only once here. Later we will need to
8661 			note which columns have been updated and do
8662 			selective processing. */
8663 			if (prebuilt->table->fts != NULL && !is_virtual) {
8664 				ulint		offset;
8665 				dict_table_t*   innodb_table;
8666 
8667 				innodb_table = prebuilt->table;
8668 
8669 				if (!changes_fts_column) {
8670 					offset = row_upd_changes_fts_column(
8671 						innodb_table, ufield);
8672 
8673 					if (offset != ULINT_UNDEFINED) {
8674 						changes_fts_column = TRUE;
8675 					}
8676 				}
8677 
8678 				if (!changes_fts_doc_col) {
8679 					changes_fts_doc_col =
8680 					row_upd_changes_doc_id(
8681 						innodb_table, ufield);
8682 				}
8683 			}
8684 		} else if (is_virtual) {
8685 			dfield_t*	vfield = dtuple_get_nth_v_field(
8686 				uvect->old_vrow, num_v);
8687 			buf = innodb_fill_old_vcol_val(
8688 				prebuilt, vfield, o_len,
8689 				col, old_mysql_row_col,
8690 				col_pack_len, buf);
8691 			ut_ad(col->ord_part || online_ord_part);
8692 			num_v++;
8693 		}
8694 	}
8695 
8696 	/* If the update changes a column with an FTS index on it, we
8697 	then add an update column node with a new document id to the
8698 	other changes. We piggy back our changes on the normal UPDATE
8699 	to reduce processing and IO overhead. */
8700 	if (!prebuilt->table->fts) {
8701 		trx->fts_next_doc_id = 0;
8702 	} else if (changes_fts_column || changes_fts_doc_col) {
8703 		dict_table_t*   innodb_table = prebuilt->table;
8704 
8705 		ufield = uvect->fields + n_changed;
8706 
8707 		if (!DICT_TF2_FLAG_IS_SET(
8708 			innodb_table, DICT_TF2_FTS_HAS_DOC_ID)) {
8709 
8710 			/* If Doc ID is managed by user, and if any
8711 			FTS indexed column has been updated, its corresponding
8712 			Doc ID must also be updated. Otherwise, return
8713 			error */
8714 			if (changes_fts_column && !changes_fts_doc_col) {
8715 				ib::warn() << "A new Doc ID must be supplied"
8716 					" while updating FTS indexed columns.";
8717 				return(DB_FTS_INVALID_DOCID);
8718 			}
8719 
8720 			/* Doc ID must monotonically increase */
8721 			ut_ad(innodb_table->fts->cache);
8722 			if (doc_id < prebuilt->table->fts->cache->next_doc_id) {
8723 
8724 				ib::warn() << "FTS Doc ID must be larger than "
8725 					<< innodb_table->fts->cache->next_doc_id
8726 					- 1  << " for table "
8727 					<< innodb_table->name;
8728 
8729 				return(DB_FTS_INVALID_DOCID);
8730 			}
8731 
8732 
8733 			trx->fts_next_doc_id = doc_id;
8734 		} else {
8735 			/* If the Doc ID is a hidden column, it can't be
8736 			changed by user */
8737 			ut_ad(!changes_fts_doc_col);
8738 
8739 			/* Doc ID column is hidden, a new Doc ID will be
8740 			generated by following fts_update_doc_id() call */
8741 			trx->fts_next_doc_id = 0;
8742 		}
8743 
8744 		fts_update_doc_id(
8745 			innodb_table, ufield, &trx->fts_next_doc_id);
8746 
8747 		++n_changed;
8748 	} else {
8749 		/* We have a Doc ID column, but none of FTS indexed
8750 		columns are touched, nor the Doc ID column, so set
8751 		fts_next_doc_id to UINT64_UNDEFINED, which means do not
8752 		update the Doc ID column */
8753 		trx->fts_next_doc_id = UINT64_UNDEFINED;
8754 	}
8755 
8756 	uvect->n_fields = n_changed;
8757 	uvect->info_bits = 0;
8758 
8759 	ut_a(buf <= (byte*) original_upd_buff + buff_len);
8760 
8761 	ut_ad(uvect->validate());
8762 	return(DB_SUCCESS);
8763 }
8764 
8765 #ifdef WITH_WSREP
8766 static
8767 int
wsrep_calc_row_hash(byte * digest,const uchar * row,TABLE * table,row_prebuilt_t * prebuilt)8768 wsrep_calc_row_hash(
8769 /*================*/
8770 	byte*		digest,		/*!< in/out: md5 sum */
8771 	const uchar*	row,		/*!< in: row in MySQL format */
8772 	TABLE*		table,		/*!< in: table in MySQL data
8773 					dictionary */
8774 	row_prebuilt_t*	prebuilt)	/*!< in: InnoDB prebuilt struct */
8775 {
8776 	ulint		len;
8777 	const byte*	ptr;
8778 
8779 	void *ctx = alloca(my_md5_context_size());
8780 	my_md5_init(ctx);
8781 
8782 	for (uint i = 0; i < table->s->fields; i++) {
8783 		byte null_byte=0;
8784 		byte true_byte=1;
8785 		ulint col_type;
8786 		ulint is_unsigned;
8787 
8788 		const Field* field = table->field[i];
8789 		if (!field->stored_in_db()) {
8790 			continue;
8791 		}
8792 
8793 		ptr = (const byte*) row + get_field_offset(table, field);
8794 		len = field->pack_length();
8795 		col_type = get_innobase_type_from_mysql_type(&is_unsigned, field);
8796 
8797 		switch (col_type) {
8798 
8799 		case DATA_BLOB:
8800 			ptr = row_mysql_read_blob_ref(&len, ptr, len);
8801 
8802 			break;
8803 
8804 		case DATA_VARCHAR:
8805 		case DATA_BINARY:
8806 		case DATA_VARMYSQL:
8807 			if (field->type() == MYSQL_TYPE_VARCHAR) {
8808 				/* This is a >= 5.0.3 type true VARCHAR where
8809 				the real payload data length is stored in
8810 				1 or 2 bytes */
8811 
8812 				ptr = row_mysql_read_true_varchar(
8813 					&len, ptr,
8814 					(ulint)
8815 					(((Field_varstring*)field)->length_bytes));
8816 
8817 			}
8818 
8819 			break;
8820 		default:
8821 			;
8822 		}
8823 		/*
8824 		if (field->null_ptr &&
8825 		    field_in_record_is_null(table, field, (char*) row)) {
8826 		*/
8827 
8828 		if (field->is_null_in_record(row)) {
8829 			my_md5_input(ctx, &null_byte, 1);
8830 		} else {
8831 			my_md5_input(ctx, &true_byte, 1);
8832 			my_md5_input(ctx, ptr, len);
8833 		}
8834 	}
8835 
8836 	my_md5_result(ctx, digest);
8837 
8838 	return(0);
8839 }
8840 #endif /* WITH_WSREP */
8841 
8842 /**
8843 Updates a row given as a parameter to a new value. Note that we are given
8844 whole rows, not just the fields which are updated: this incurs some
8845 overhead for CPU when we check which fields are actually updated.
8846 TODO: currently InnoDB does not prevent the 'Halloween problem':
8847 in a searched update a single row can get updated several times
8848 if its index columns are updated!
8849 @param[in] old_row	Old row contents in MySQL format
8850 @param[out] new_row	Updated row contents in MySQL format
8851 @return error number or 0 */
8852 
8853 int
update_row(const uchar * old_row,const uchar * new_row)8854 ha_innobase::update_row(
8855 	const uchar*	old_row,
8856 	const uchar*	new_row)
8857 {
8858 	int		err;
8859 
8860 	dberr_t		error;
8861 	trx_t*		trx = thd_to_trx(m_user_thd);
8862 
8863 	DBUG_ENTER("ha_innobase::update_row");
8864 
8865 	ut_a(m_prebuilt->trx == trx);
8866 
8867 	if (high_level_read_only) {
8868 		ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
8869 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
8870 	} else if (!trx_is_started(trx)) {
8871 		trx->will_lock = true;
8872 	}
8873 
8874 	if (m_upd_buf == NULL) {
8875 		ut_ad(m_upd_buf_size == 0);
8876 
8877 		/* Create a buffer for packing the fields of a record. Why
8878 		table->reclength did not work here? Obviously, because char
8879 		fields when packed actually became 1 byte longer, when we also
8880 		stored the string length as the first byte. */
8881 
8882 		m_upd_buf_size = table->s->reclength + table->s->max_key_length
8883 			+ MAX_REF_PARTS * 3;
8884 
8885 		m_upd_buf = reinterpret_cast<uchar*>(
8886 			my_malloc(//PSI_INSTRUMENT_ME,
8887                                   m_upd_buf_size,
8888 				MYF(MY_WME)));
8889 
8890 		if (m_upd_buf == NULL) {
8891 			m_upd_buf_size = 0;
8892 			DBUG_RETURN(HA_ERR_OUT_OF_MEM);
8893 		}
8894 	}
8895 
8896 	upd_t*		uvect = row_get_prebuilt_update_vector(m_prebuilt);
8897 	ib_uint64_t	autoinc;
8898 
8899 	/* Build an update vector from the modified fields in the rows
8900 	(uses m_upd_buf of the handle) */
8901 
8902 	error = calc_row_difference(
8903 		uvect, old_row, new_row, table, m_upd_buf, m_upd_buf_size,
8904 		m_prebuilt, autoinc);
8905 
8906 	if (error != DB_SUCCESS) {
8907 		goto func_exit;
8908 	}
8909 
8910 	if (!uvect->n_fields) {
8911 		/* This is the same as success, but instructs
8912 		MySQL that the row is not really updated and it
8913 		should not increase the count of updated rows.
8914 		This is fix for http://bugs.mysql.com/29157 */
8915 		if (m_prebuilt->versioned_write
8916 		    && thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE
8917 		    /* Multiple UPDATE of same rows in single transaction create
8918 		       historical rows only once. */
8919 		    && trx->id != table->vers_start_id()) {
8920 			error = row_insert_for_mysql((byte*) old_row,
8921 						     m_prebuilt,
8922 						     ROW_INS_HISTORICAL);
8923 			if (error != DB_SUCCESS) {
8924 				goto func_exit;
8925 			}
8926 			innobase_srv_conc_exit_innodb(m_prebuilt);
8927 			innobase_active_small();
8928 		}
8929 		DBUG_RETURN(HA_ERR_RECORD_IS_THE_SAME);
8930 	} else {
8931 		const bool vers_set_fields = m_prebuilt->versioned_write
8932 			&& m_prebuilt->upd_node->update->affects_versioned();
8933 		const bool vers_ins_row = vers_set_fields
8934 			&& thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE;
8935 
8936 		/* This is not a delete */
8937 		m_prebuilt->upd_node->is_delete =
8938 			(vers_set_fields && !vers_ins_row) ||
8939 			(thd_sql_command(m_user_thd) == SQLCOM_DELETE &&
8940 				table->versioned(VERS_TIMESTAMP))
8941 			? VERSIONED_DELETE
8942 			: NO_DELETE;
8943 
8944 		innobase_srv_conc_enter_innodb(m_prebuilt);
8945 
8946 		error = row_update_for_mysql(m_prebuilt);
8947 
8948 		if (error == DB_SUCCESS && vers_ins_row
8949 		    /* Multiple UPDATE of same rows in single transaction create
8950 		       historical rows only once. */
8951 		    && trx->id != table->vers_start_id()) {
8952 			error = row_insert_for_mysql((byte*) old_row,
8953 						     m_prebuilt,
8954 						     ROW_INS_HISTORICAL);
8955 		}
8956 	}
8957 
8958 	if (error == DB_SUCCESS && autoinc) {
8959 		/* A value for an AUTO_INCREMENT column
8960 		was specified in the UPDATE statement. */
8961 
8962 		/* We need the upper limit of the col type to check for
8963 		whether we update the table autoinc counter or not. */
8964 		ulonglong	col_max_value =
8965 			table->found_next_number_field->get_max_int_value();
8966 
8967 		/* This should filter out the negative
8968 		values set explicitly by the user. */
8969 		if (autoinc <= col_max_value) {
8970 			ulonglong	offset;
8971 			ulonglong	increment;
8972 
8973 			offset = m_prebuilt->autoinc_offset;
8974 			increment = m_prebuilt->autoinc_increment;
8975 
8976 			autoinc = innobase_next_autoinc(
8977 				autoinc, 1, increment, offset,
8978 				col_max_value);
8979 
8980 			error = innobase_set_max_autoinc(autoinc);
8981 
8982 			if (m_prebuilt->table->persistent_autoinc) {
8983 				/* Update the PAGE_ROOT_AUTO_INC. Yes, we do
8984 				this even if dict_table_t::autoinc already was
8985 				greater than autoinc, because we cannot know
8986 				if any INSERT actually used (and wrote to
8987 				PAGE_ROOT_AUTO_INC) a value bigger than our
8988 				autoinc. */
8989 				btr_write_autoinc(dict_table_get_first_index(
8990 							  m_prebuilt->table),
8991 						  autoinc);
8992 			}
8993 		}
8994 	}
8995 
8996 	innobase_srv_conc_exit_innodb(m_prebuilt);
8997 
8998 func_exit:
8999 	if (error == DB_FTS_INVALID_DOCID) {
9000 		err = HA_FTS_INVALID_DOCID;
9001 		my_error(HA_FTS_INVALID_DOCID, MYF(0));
9002 	} else {
9003 		err = convert_error_code_to_mysql(
9004 			error, m_prebuilt->table->flags, m_user_thd);
9005 	}
9006 
9007 	/* Tell InnoDB server that there might be work for
9008 	utility threads: */
9009 
9010 	innobase_active_small();
9011 
9012 #ifdef WITH_WSREP
9013 	if (error == DB_SUCCESS && trx->is_wsrep()
9014 	    && wsrep_thd_is_local(m_user_thd)
9015 	    && !wsrep_thd_ignore_table(m_user_thd)) {
9016 		DBUG_PRINT("wsrep", ("update row key"));
9017 
9018 		if (wsrep_append_keys(m_user_thd,
9019 				      wsrep_protocol_version >= 4
9020 				      ? WSREP_SERVICE_KEY_UPDATE
9021 				      : WSREP_SERVICE_KEY_EXCLUSIVE,
9022 				      old_row, new_row)){
9023 			WSREP_DEBUG("WSREP: UPDATE_ROW_KEY FAILED");
9024 			DBUG_PRINT("wsrep", ("row key failed"));
9025 			DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
9026 		}
9027 	}
9028 #endif /* WITH_WSREP */
9029 
9030 	DBUG_RETURN(err);
9031 }
9032 
9033 /**********************************************************************//**
9034 Deletes a row given as the parameter.
9035 @return error number or 0 */
9036 
9037 int
delete_row(const uchar * record)9038 ha_innobase::delete_row(
9039 /*====================*/
9040 	const uchar*	record)	/*!< in: a row in MySQL format */
9041 {
9042 	dberr_t		error;
9043 	trx_t*		trx = thd_to_trx(m_user_thd);
9044 
9045 	DBUG_ENTER("ha_innobase::delete_row");
9046 
9047 	ut_a(m_prebuilt->trx == trx);
9048 
9049 	if (high_level_read_only) {
9050 		ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
9051 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
9052 	} else if (!trx_is_started(trx)) {
9053 		trx->will_lock = true;
9054 	}
9055 
9056 	if (!m_prebuilt->upd_node) {
9057 		row_get_prebuilt_update_vector(m_prebuilt);
9058 	}
9059 
9060 	/* This is a delete */
9061 	m_prebuilt->upd_node->is_delete = table->versioned_write(VERS_TRX_ID)
9062 		&& table->vers_end_field()->is_max()
9063 		&& trx->id != table->vers_start_id()
9064 		? VERSIONED_DELETE
9065 		: PLAIN_DELETE;
9066 
9067 	innobase_srv_conc_enter_innodb(m_prebuilt);
9068 
9069 	error = row_update_for_mysql(m_prebuilt);
9070 
9071 	innobase_srv_conc_exit_innodb(m_prebuilt);
9072 
9073 	/* Tell the InnoDB server that there might be work for
9074 	utility threads: */
9075 
9076 	innobase_active_small();
9077 
9078 #ifdef WITH_WSREP
9079 	if (error == DB_SUCCESS && trx->is_wsrep()
9080 	    && wsrep_thd_is_local(m_user_thd)
9081 	    && !wsrep_thd_ignore_table(m_user_thd)) {
9082 		if (wsrep_append_keys(m_user_thd, WSREP_SERVICE_KEY_EXCLUSIVE,
9083 				      record,
9084 				      NULL)) {
9085 			DBUG_PRINT("wsrep", ("delete fail"));
9086 			DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
9087 		}
9088 	}
9089 #endif /* WITH_WSREP */
9090 	DBUG_RETURN(convert_error_code_to_mysql(
9091 			    error, m_prebuilt->table->flags, m_user_thd));
9092 }
9093 
9094 /** Delete all rows from the table.
9095 @return error number or 0 */
9096 
9097 int
delete_all_rows()9098 ha_innobase::delete_all_rows()
9099 {
9100 	DBUG_ENTER("ha_innobase::delete_all_rows");
9101 	DBUG_RETURN(HA_ERR_WRONG_COMMAND);
9102 }
9103 
9104 /**********************************************************************//**
9105 Removes a new lock set on a row, if it was not read optimistically. This can
9106 be called after a row has been read in the processing of an UPDATE or a DELETE
9107 query, if the option innodb_locks_unsafe_for_binlog is set. */
9108 
9109 void
unlock_row(void)9110 ha_innobase::unlock_row(void)
9111 /*=========================*/
9112 {
9113 	DBUG_ENTER("ha_innobase::unlock_row");
9114 
9115 	if (m_prebuilt->select_lock_type == LOCK_NONE) {
9116 		DBUG_VOID_RETURN;
9117 	}
9118 
9119 	ut_ad(trx_state_eq(m_prebuilt->trx, TRX_STATE_ACTIVE, true));
9120 
9121 	switch (m_prebuilt->row_read_type) {
9122 	case ROW_READ_WITH_LOCKS:
9123 		if (!srv_locks_unsafe_for_binlog
9124 		    && m_prebuilt->trx->isolation_level
9125 		    > TRX_ISO_READ_COMMITTED) {
9126 			break;
9127 		}
9128 		/* fall through */
9129 	case ROW_READ_TRY_SEMI_CONSISTENT:
9130 		row_unlock_for_mysql(m_prebuilt, FALSE);
9131 		break;
9132 	case ROW_READ_DID_SEMI_CONSISTENT:
9133 		m_prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
9134 		break;
9135 	}
9136 
9137 	DBUG_VOID_RETURN;
9138 }
9139 
9140 /* See handler.h and row0mysql.h for docs on this function. */
9141 
9142 bool
was_semi_consistent_read(void)9143 ha_innobase::was_semi_consistent_read(void)
9144 /*=======================================*/
9145 {
9146 	return(m_prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT);
9147 }
9148 
9149 /* See handler.h and row0mysql.h for docs on this function. */
9150 
9151 void
try_semi_consistent_read(bool yes)9152 ha_innobase::try_semi_consistent_read(bool yes)
9153 /*===========================================*/
9154 {
9155 	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
9156 
9157 	/* Row read type is set to semi consistent read if this was
9158 	requested by the MySQL and either innodb_locks_unsafe_for_binlog
9159 	option is used or this session is using READ COMMITTED isolation
9160 	level. */
9161 
9162 	if (yes
9163 	    && (srv_locks_unsafe_for_binlog
9164 		|| m_prebuilt->trx->isolation_level
9165 		<= TRX_ISO_READ_COMMITTED)) {
9166 
9167 		m_prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
9168 
9169 	} else {
9170 		m_prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
9171 	}
9172 }
9173 
9174 /******************************************************************//**
9175 Initializes a handle to use an index.
9176 @return 0 or error number */
9177 
9178 int
index_init(uint keynr,bool)9179 ha_innobase::index_init(
9180 /*====================*/
9181 	uint		keynr,	/*!< in: key (index) number */
9182 	bool)
9183 {
9184 	DBUG_ENTER("index_init");
9185 
9186 	DBUG_RETURN(change_active_index(keynr));
9187 }
9188 
9189 /******************************************************************//**
9190 Currently does nothing.
9191 @return 0 */
9192 
9193 int
index_end(void)9194 ha_innobase::index_end(void)
9195 /*========================*/
9196 {
9197 	DBUG_ENTER("index_end");
9198 
9199 	active_index = MAX_KEY;
9200 
9201 	in_range_check_pushed_down = FALSE;
9202 
9203 	m_ds_mrr.dsmrr_close();
9204 
9205 	DBUG_RETURN(0);
9206 }
9207 
9208 /*********************************************************************//**
9209 Converts a search mode flag understood by MySQL to a flag understood
9210 by InnoDB. */
9211 page_cur_mode_t
convert_search_mode_to_innobase(ha_rkey_function find_flag)9212 convert_search_mode_to_innobase(
9213 /*============================*/
9214 	ha_rkey_function	find_flag)
9215 {
9216 	switch (find_flag) {
9217 	case HA_READ_KEY_EXACT:
9218 		/* this does not require the index to be UNIQUE */
9219 	case HA_READ_KEY_OR_NEXT:
9220 		return(PAGE_CUR_GE);
9221 	case HA_READ_AFTER_KEY:
9222 		return(PAGE_CUR_G);
9223 	case HA_READ_BEFORE_KEY:
9224 		return(PAGE_CUR_L);
9225 	case HA_READ_KEY_OR_PREV:
9226 	case HA_READ_PREFIX_LAST:
9227 	case HA_READ_PREFIX_LAST_OR_PREV:
9228 		return(PAGE_CUR_LE);
9229 	case HA_READ_MBR_CONTAIN:
9230 		return(PAGE_CUR_CONTAIN);
9231 	case HA_READ_MBR_INTERSECT:
9232 		return(PAGE_CUR_INTERSECT);
9233 	case HA_READ_MBR_WITHIN:
9234 		return(PAGE_CUR_WITHIN);
9235 	case HA_READ_MBR_DISJOINT:
9236 		return(PAGE_CUR_DISJOINT);
9237 	case HA_READ_MBR_EQUAL:
9238 		return(PAGE_CUR_MBR_EQUAL);
9239 	case HA_READ_PREFIX:
9240 		return(PAGE_CUR_UNSUPP);
9241 	/* do not use "default:" in order to produce a gcc warning:
9242 	enumeration value '...' not handled in switch
9243 	(if -Wswitch or -Wall is used) */
9244 	}
9245 
9246 	my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "this functionality");
9247 
9248 	return(PAGE_CUR_UNSUPP);
9249 }
9250 
9251 /*
9252    BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
9253    ---------------------------------------------------
9254 The following does not cover all the details, but explains how we determine
9255 the start of a new SQL statement, and what is associated with it.
9256 
9257 For each table in the database the MySQL interpreter may have several
9258 table handle instances in use, also in a single SQL query. For each table
9259 handle instance there is an InnoDB  'm_prebuilt' struct which contains most
9260 of the InnoDB data associated with this table handle instance.
9261 
9262   A) if the user has not explicitly set any MySQL table level locks:
9263 
9264   1) MySQL calls ::external_lock to set an 'intention' table level lock on
9265 the table of the handle instance. There we set
9266 m_prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
9267 true if we are taking this table handle instance to use in a new SQL
9268 statement issued by the user. We also increment trx->n_mysql_tables_in_use.
9269 
9270   2) If m_prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
9271 instructions to m_prebuilt->template of the table handle instance in
9272 ::index_read. The template is used to save CPU time in large joins.
9273 
9274   3) In row_search_for_mysql, if m_prebuilt->sql_stat_start is true, we
9275 allocate a new consistent read view for the trx if it does not yet have one,
9276 or in the case of a locking read, set an InnoDB 'intention' table level
9277 lock on the table.
9278 
9279   4) We do the SELECT. MySQL may repeatedly call ::index_read for the
9280 same table handle instance, if it is a join.
9281 
9282   5) When the SELECT ends, MySQL removes its intention table level locks
9283 in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
9284  (a) we execute a COMMIT there if the autocommit is on,
9285  (b) we also release possible 'SQL statement level resources' InnoDB may
9286 have for this SQL statement. The MySQL interpreter does NOT execute
9287 autocommit for pure read transactions, though it should. That is why the
9288 table handler in that case has to execute the COMMIT in ::external_lock.
9289 
9290   B) If the user has explicitly set MySQL table level locks, then MySQL
9291 does NOT call ::external_lock at the start of the statement. To determine
9292 when we are at the start of a new SQL statement we at the start of
9293 ::index_read also compare the query id to the latest query id where the
9294 table handle instance was used. If it has changed, we know we are at the
9295 start of a new SQL statement. Since the query id can theoretically
9296 overwrap, we use this test only as a secondary way of determining the
9297 start of a new SQL statement. */
9298 
9299 
9300 /**********************************************************************//**
9301 Positions an index cursor to the index specified in the handle. Fetches the
9302 row if any.
9303 @return 0, HA_ERR_KEY_NOT_FOUND, or error number */
9304 
9305 int
index_read(uchar * buf,const uchar * key_ptr,uint key_len,enum ha_rkey_function find_flag)9306 ha_innobase::index_read(
9307 /*====================*/
9308 	uchar*		buf,		/*!< in/out: buffer for the returned
9309 					row */
9310 	const uchar*	key_ptr,	/*!< in: key value; if this is NULL
9311 					we position the cursor at the
9312 					start or end of index; this can
9313 					also contain an InnoDB row id, in
9314 					which case key_len is the InnoDB
9315 					row id length; the key value can
9316 					also be a prefix of a full key value,
9317 					and the last column can be a prefix
9318 					of a full column */
9319 	uint			key_len,/*!< in: key value length */
9320 	enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
9321 {
9322 	DBUG_ENTER("index_read");
9323 	DEBUG_SYNC_C("ha_innobase_index_read_begin");
9324 
9325 	ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd));
9326 	ut_ad(key_len != 0 || find_flag != HA_READ_KEY_EXACT);
9327 
9328 	dict_index_t*	index = m_prebuilt->index;
9329 
9330 	if (index == NULL || index->is_corrupted()) {
9331 		m_prebuilt->index_usable = FALSE;
9332 		DBUG_RETURN(HA_ERR_CRASHED);
9333 	}
9334 
9335 	if (!m_prebuilt->index_usable) {
9336 		DBUG_RETURN(index->is_corrupted()
9337 			    ? HA_ERR_INDEX_CORRUPT
9338 			    : HA_ERR_TABLE_DEF_CHANGED);
9339 	}
9340 
9341 	if (index->type & DICT_FTS) {
9342 		DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
9343 	}
9344 
9345 	/* For R-Tree index, we will always place the page lock to
9346 	pages being searched */
9347 	if (index->is_spatial() && !m_prebuilt->trx->will_lock) {
9348 		if (trx_is_started(m_prebuilt->trx)) {
9349 			DBUG_RETURN(HA_ERR_READ_ONLY_TRANSACTION);
9350 		} else {
9351 			m_prebuilt->trx->will_lock = true;
9352 		}
9353 	}
9354 
9355 	/* Note that if the index for which the search template is built is not
9356 	necessarily m_prebuilt->index, but can also be the clustered index */
9357 
9358 	if (m_prebuilt->sql_stat_start) {
9359 		build_template(false);
9360 	}
9361 
9362 	if (key_ptr != NULL) {
9363 		/* Convert the search key value to InnoDB format into
9364 		m_prebuilt->search_tuple */
9365 
9366 		row_sel_convert_mysql_key_to_innobase(
9367 			m_prebuilt->search_tuple,
9368 			m_prebuilt->srch_key_val1,
9369 			m_prebuilt->srch_key_val_len,
9370 			index,
9371 			(byte*) key_ptr,
9372 			(ulint) key_len);
9373 
9374 		DBUG_ASSERT(m_prebuilt->search_tuple->n_fields > 0);
9375 	} else {
9376 		/* We position the cursor to the last or the first entry
9377 		in the index */
9378 
9379 		dtuple_set_n_fields(m_prebuilt->search_tuple, 0);
9380 	}
9381 
9382 	page_cur_mode_t	mode = convert_search_mode_to_innobase(find_flag);
9383 
9384 	ulint	match_mode = 0;
9385 
9386 	if (find_flag == HA_READ_KEY_EXACT) {
9387 
9388 		match_mode = ROW_SEL_EXACT;
9389 
9390 	} else if (find_flag == HA_READ_PREFIX_LAST) {
9391 
9392 		match_mode = ROW_SEL_EXACT_PREFIX;
9393 	}
9394 
9395 	m_last_match_mode = (uint) match_mode;
9396 
9397 	dberr_t		ret;
9398 
9399 	if (mode != PAGE_CUR_UNSUPP) {
9400 
9401 		innobase_srv_conc_enter_innodb(m_prebuilt);
9402 
9403 		ret = row_search_mvcc(
9404 			buf, mode, m_prebuilt, match_mode, 0);
9405 
9406 		innobase_srv_conc_exit_innodb(m_prebuilt);
9407 	} else {
9408 
9409 		ret = DB_UNSUPPORTED;
9410 	}
9411 
9412 	DBUG_EXECUTE_IF("ib_select_query_failure", ret = DB_ERROR;);
9413 
9414 	int	error;
9415 
9416 	switch (ret) {
9417 	case DB_SUCCESS:
9418 		error = 0;
9419 		table->status = 0;
9420 		if (m_prebuilt->table->is_system_db) {
9421 			srv_stats.n_system_rows_read.add(
9422 				thd_get_thread_id(m_prebuilt->trx->mysql_thd), 1);
9423 		} else {
9424 			srv_stats.n_rows_read.add(
9425 				thd_get_thread_id(m_prebuilt->trx->mysql_thd), 1);
9426 		}
9427 		break;
9428 
9429 	case DB_RECORD_NOT_FOUND:
9430 		error = HA_ERR_KEY_NOT_FOUND;
9431 		table->status = STATUS_NOT_FOUND;
9432 		break;
9433 
9434 	case DB_END_OF_INDEX:
9435 		error = HA_ERR_KEY_NOT_FOUND;
9436 		table->status = STATUS_NOT_FOUND;
9437 		break;
9438 
9439 	case DB_TABLESPACE_DELETED:
9440 		ib_senderrf(
9441 			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9442 			ER_TABLESPACE_DISCARDED,
9443 			table->s->table_name.str);
9444 
9445 		table->status = STATUS_NOT_FOUND;
9446 		error = HA_ERR_TABLESPACE_MISSING;
9447 		break;
9448 
9449 	case DB_TABLESPACE_NOT_FOUND:
9450 
9451 		ib_senderrf(
9452 			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9453 			ER_TABLESPACE_MISSING,
9454 			table->s->table_name.str);
9455 
9456 		table->status = STATUS_NOT_FOUND;
9457 		error = HA_ERR_TABLESPACE_MISSING;
9458 		break;
9459 
9460 	default:
9461 		error = convert_error_code_to_mysql(
9462 			ret, m_prebuilt->table->flags, m_user_thd);
9463 
9464 		table->status = STATUS_NOT_FOUND;
9465 		break;
9466 	}
9467 
9468 	DBUG_RETURN(error);
9469 }
9470 
9471 /*******************************************************************//**
9472 The following functions works like index_read, but it find the last
9473 row with the current key value or prefix.
9474 @return 0, HA_ERR_KEY_NOT_FOUND, or an error code */
9475 
9476 int
index_read_last(uchar * buf,const uchar * key_ptr,uint key_len)9477 ha_innobase::index_read_last(
9478 /*=========================*/
9479 	uchar*		buf,	/*!< out: fetched row */
9480 	const uchar*	key_ptr,/*!< in: key value, or a prefix of a full
9481 				key value */
9482 	uint		key_len)/*!< in: length of the key val or prefix
9483 				in bytes */
9484 {
9485 	return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
9486 }
9487 
9488 /********************************************************************//**
9489 Get the index for a handle. Does not change active index.
9490 @return NULL or index instance. */
9491 
9492 dict_index_t*
innobase_get_index(uint keynr)9493 ha_innobase::innobase_get_index(
9494 /*============================*/
9495 	uint		keynr)	/*!< in: use this index; MAX_KEY means always
9496 				clustered index, even if it was internally
9497 				generated by InnoDB */
9498 {
9499 	KEY*		key = NULL;
9500 	dict_table_t*	ib_table = m_prebuilt->table;
9501 	dict_index_t*	index;
9502 
9503 	DBUG_ENTER("innobase_get_index");
9504 
9505 	if (keynr != MAX_KEY && table->s->keys > 0) {
9506 		key = &table->key_info[keynr];
9507 		index = dict_table_get_index_on_name(ib_table, key->name.str);
9508 	} else {
9509 		index = dict_table_get_first_index(ib_table);
9510 	}
9511 
9512 	if (index == NULL) {
9513 		sql_print_error(
9514 			"InnoDB could not find key no %u with name %s"
9515 			" from dict cache for table %s",
9516 			keynr, key ? key->name.str : "NULL",
9517 			ib_table->name.m_name);
9518 	}
9519 
9520 	DBUG_RETURN(index);
9521 }
9522 
9523 /********************************************************************//**
9524 Changes the active index of a handle.
9525 @return 0 or error code */
9526 
9527 int
change_active_index(uint keynr)9528 ha_innobase::change_active_index(
9529 /*=============================*/
9530 	uint	keynr)	/*!< in: use this index; MAX_KEY means always clustered
9531 			index, even if it was internally generated by
9532 			InnoDB */
9533 {
9534 	DBUG_ENTER("change_active_index");
9535 
9536 	ut_ad(m_user_thd == ha_thd());
9537 	ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd));
9538 
9539 	active_index = keynr;
9540 
9541 	m_prebuilt->index = innobase_get_index(keynr);
9542 
9543 	if (m_prebuilt->index == NULL) {
9544 		sql_print_warning("InnoDB: change_active_index(%u) failed",
9545 				  keynr);
9546 		m_prebuilt->index_usable = FALSE;
9547 		DBUG_RETURN(1);
9548 	}
9549 
9550 	m_prebuilt->index_usable = row_merge_is_index_usable(
9551 		m_prebuilt->trx, m_prebuilt->index);
9552 
9553 	if (!m_prebuilt->index_usable) {
9554 		if (m_prebuilt->index->is_corrupted()) {
9555 			char	table_name[MAX_FULL_NAME_LEN + 1];
9556 
9557 			innobase_format_name(
9558 				table_name, sizeof table_name,
9559 				m_prebuilt->index->table->name.m_name);
9560 
9561 			if (m_prebuilt->index->is_primary()) {
9562 				ut_ad(m_prebuilt->index->table->corrupted);
9563 				push_warning_printf(
9564 					m_user_thd, Sql_condition::WARN_LEVEL_WARN,
9565 					ER_TABLE_CORRUPT,
9566 					"InnoDB: Table %s is corrupted.",
9567 					table_name);
9568 				DBUG_RETURN(ER_TABLE_CORRUPT);
9569 			} else {
9570 				push_warning_printf(
9571 					m_user_thd, Sql_condition::WARN_LEVEL_WARN,
9572 					HA_ERR_INDEX_CORRUPT,
9573 					"InnoDB: Index %s for table %s is"
9574 					" marked as corrupted",
9575 					m_prebuilt->index->name(),
9576 					table_name);
9577 				DBUG_RETURN(HA_ERR_INDEX_CORRUPT);
9578 			}
9579 		} else {
9580 			push_warning_printf(
9581 				m_user_thd, Sql_condition::WARN_LEVEL_WARN,
9582 				HA_ERR_TABLE_DEF_CHANGED,
9583 				"InnoDB: insufficient history for index %u",
9584 				keynr);
9585 		}
9586 
9587 		/* The caller seems to ignore this.  Thus, we must check
9588 		this again in row_search_for_mysql(). */
9589 		DBUG_RETURN(convert_error_code_to_mysql(DB_MISSING_HISTORY,
9590 				0, NULL));
9591 	}
9592 
9593 	ut_a(m_prebuilt->search_tuple != 0);
9594 
9595 	/* Initialization of search_tuple is not needed for FT index
9596 	since FT search returns rank only. In addition engine should
9597 	be able to retrieve FTS_DOC_ID column value if necessary. */
9598 	if (m_prebuilt->index->type & DICT_FTS) {
9599 		for (uint i = 0; i < table->s->fields; i++) {
9600 			if (m_prebuilt->read_just_key
9601 			    && bitmap_is_set(table->read_set, i)
9602 			    && !strcmp(table->s->field[i]->field_name.str,
9603 				       FTS_DOC_ID_COL_NAME)) {
9604 				m_prebuilt->fts_doc_id_in_read_set = true;
9605 				break;
9606 			}
9607 		}
9608 	} else {
9609 		ulint n_fields = dict_index_get_n_unique_in_tree(
9610 			m_prebuilt->index);
9611 
9612 		dtuple_set_n_fields(m_prebuilt->search_tuple, n_fields);
9613 
9614 		dict_index_copy_types(
9615 			m_prebuilt->search_tuple, m_prebuilt->index,
9616 			n_fields);
9617 
9618 		/* If it's FTS query and FTS_DOC_ID exists FTS_DOC_ID field is
9619 		always added to read_set. */
9620 		m_prebuilt->fts_doc_id_in_read_set = m_prebuilt->in_fts_query
9621 			&& m_prebuilt->read_just_key
9622 			&& m_prebuilt->index->contains_col_or_prefix(
9623 				m_prebuilt->table->fts->doc_col, false);
9624 	}
9625 
9626 	/* MySQL changes the active index for a handle also during some
9627 	queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
9628 	and then calculates the sum. Previously we played safe and used
9629 	the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
9630 	copying. Starting from MySQL-4.1 we use a more efficient flag here. */
9631 
9632 	build_template(false);
9633 
9634 	DBUG_RETURN(0);
9635 }
9636 
9637 /***********************************************************************//**
9638 Reads the next or previous row from a cursor, which must have previously been
9639 positioned using index_read.
9640 @return 0, HA_ERR_END_OF_FILE, or error number */
9641 
9642 int
general_fetch(uchar * buf,uint direction,uint match_mode)9643 ha_innobase::general_fetch(
9644 /*=======================*/
9645 	uchar*	buf,		/*!< in/out: buffer for next row in MySQL
9646 				format */
9647 	uint	direction,	/*!< in: ROW_SEL_NEXT or ROW_SEL_PREV */
9648 	uint	match_mode)	/*!< in: 0, ROW_SEL_EXACT, or
9649 				ROW_SEL_EXACT_PREFIX */
9650 {
9651 	DBUG_ENTER("general_fetch");
9652 
9653 	const trx_t*	trx = m_prebuilt->trx;
9654 
9655 	ut_ad(trx == thd_to_trx(m_user_thd));
9656 
9657 	if (m_prebuilt->table->is_readable()) {
9658 	} else if (m_prebuilt->table->corrupted) {
9659 		DBUG_RETURN(HA_ERR_CRASHED);
9660 	} else {
9661 		DBUG_RETURN(m_prebuilt->table->space
9662 			    ? HA_ERR_DECRYPTION_FAILED
9663 			    : HA_ERR_NO_SUCH_TABLE);
9664 	}
9665 
9666 	innobase_srv_conc_enter_innodb(m_prebuilt);
9667 
9668 	dberr_t	ret = row_search_mvcc(
9669 		buf, PAGE_CUR_UNSUPP, m_prebuilt, match_mode, direction);
9670 
9671 	innobase_srv_conc_exit_innodb(m_prebuilt);
9672 
9673 	int	error;
9674 
9675 	switch (ret) {
9676 	case DB_SUCCESS:
9677 		error = 0;
9678 		table->status = 0;
9679 		if (m_prebuilt->table->is_system_db) {
9680 			srv_stats.n_system_rows_read.add(
9681 				thd_get_thread_id(trx->mysql_thd), 1);
9682 		} else {
9683 			srv_stats.n_rows_read.add(
9684 				thd_get_thread_id(trx->mysql_thd), 1);
9685 		}
9686 		break;
9687 	case DB_RECORD_NOT_FOUND:
9688 		error = HA_ERR_END_OF_FILE;
9689 		table->status = STATUS_NOT_FOUND;
9690 		break;
9691 	case DB_END_OF_INDEX:
9692 		error = HA_ERR_END_OF_FILE;
9693 		table->status = STATUS_NOT_FOUND;
9694 		break;
9695 	case DB_TABLESPACE_DELETED:
9696 		ib_senderrf(
9697 			trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9698 			ER_TABLESPACE_DISCARDED,
9699 			table->s->table_name.str);
9700 
9701 		table->status = STATUS_NOT_FOUND;
9702 		error = HA_ERR_TABLESPACE_MISSING;
9703 		break;
9704 	case DB_TABLESPACE_NOT_FOUND:
9705 
9706 		ib_senderrf(
9707 			trx->mysql_thd, IB_LOG_LEVEL_ERROR,
9708 			ER_TABLESPACE_MISSING,
9709 			table->s->table_name.str);
9710 
9711 		table->status = STATUS_NOT_FOUND;
9712 		error = HA_ERR_TABLESPACE_MISSING;
9713 		break;
9714 	default:
9715 		error = convert_error_code_to_mysql(
9716 			ret, m_prebuilt->table->flags, m_user_thd);
9717 
9718 		table->status = STATUS_NOT_FOUND;
9719 		break;
9720 	}
9721 
9722 	DBUG_RETURN(error);
9723 }
9724 
9725 /***********************************************************************//**
9726 Reads the next row from a cursor, which must have previously been
9727 positioned using index_read.
9728 @return 0, HA_ERR_END_OF_FILE, or error number */
9729 
9730 int
index_next(uchar * buf)9731 ha_innobase::index_next(
9732 /*====================*/
9733 	uchar*		buf)	/*!< in/out: buffer for next row in MySQL
9734 				format */
9735 {
9736 	return(general_fetch(buf, ROW_SEL_NEXT, 0));
9737 }
9738 
9739 /*******************************************************************//**
9740 Reads the next row matching to the key value given as the parameter.
9741 @return 0, HA_ERR_END_OF_FILE, or error number */
9742 
9743 int
index_next_same(uchar * buf,const uchar *,uint)9744 ha_innobase::index_next_same(
9745 /*=========================*/
9746 	uchar*		buf,	/*!< in/out: buffer for the row */
9747 	const uchar*, uint)
9748 {
9749 	return(general_fetch(buf, ROW_SEL_NEXT, m_last_match_mode));
9750 }
9751 
9752 /***********************************************************************//**
9753 Reads the previous row from a cursor, which must have previously been
9754 positioned using index_read.
9755 @return 0, HA_ERR_END_OF_FILE, or error number */
9756 
9757 int
index_prev(uchar * buf)9758 ha_innobase::index_prev(
9759 /*====================*/
9760 	uchar*	buf)	/*!< in/out: buffer for previous row in MySQL format */
9761 {
9762 	return(general_fetch(buf, ROW_SEL_PREV, 0));
9763 }
9764 
9765 /********************************************************************//**
9766 Positions a cursor on the first record in an index and reads the
9767 corresponding row to buf.
9768 @return 0, HA_ERR_END_OF_FILE, or error code */
9769 
9770 int
index_first(uchar * buf)9771 ha_innobase::index_first(
9772 /*=====================*/
9773 	uchar*	buf)	/*!< in/out: buffer for the row */
9774 {
9775 	DBUG_ENTER("index_first");
9776 
9777 	int	error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
9778 
9779 	/* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
9780 
9781 	if (error == HA_ERR_KEY_NOT_FOUND) {
9782 		error = HA_ERR_END_OF_FILE;
9783 	}
9784 
9785 	DBUG_RETURN(error);
9786 }
9787 
9788 /********************************************************************//**
9789 Positions a cursor on the last record in an index and reads the
9790 corresponding row to buf.
9791 @return 0, HA_ERR_END_OF_FILE, or error code */
9792 
9793 int
index_last(uchar * buf)9794 ha_innobase::index_last(
9795 /*====================*/
9796 	uchar*	buf)	/*!< in/out: buffer for the row */
9797 {
9798 	DBUG_ENTER("index_last");
9799 
9800 	int	error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
9801 
9802 	/* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
9803 
9804 	if (error == HA_ERR_KEY_NOT_FOUND) {
9805 		error = HA_ERR_END_OF_FILE;
9806 	}
9807 
9808 	DBUG_RETURN(error);
9809 }
9810 
9811 /****************************************************************//**
9812 Initialize a table scan.
9813 @return 0 or error number */
9814 
9815 int
rnd_init(bool scan)9816 ha_innobase::rnd_init(
9817 /*==================*/
9818 	bool	scan)	/*!< in: true if table/index scan FALSE otherwise */
9819 {
9820 	int		err;
9821 
9822 	/* Store the active index value so that we can restore the original
9823 	value after a scan */
9824 
9825 	if (m_prebuilt->clust_index_was_generated) {
9826 		err = change_active_index(MAX_KEY);
9827 	} else {
9828 		err = change_active_index(m_primary_key);
9829 	}
9830 
9831 	/* Don't use semi-consistent read in random row reads (by position).
9832 	This means we must disable semi_consistent_read if scan is false */
9833 
9834 	if (!scan) {
9835 		try_semi_consistent_read(0);
9836 	}
9837 
9838 	m_start_of_scan = true;
9839 
9840 	return(err);
9841 }
9842 
9843 /*****************************************************************//**
9844 Ends a table scan.
9845 @return 0 or error number */
9846 
9847 int
rnd_end(void)9848 ha_innobase::rnd_end(void)
9849 /*======================*/
9850 {
9851 	return(index_end());
9852 }
9853 
9854 /*****************************************************************//**
9855 Reads the next row in a table scan (also used to read the FIRST row
9856 in a table scan).
9857 @return 0, HA_ERR_END_OF_FILE, or error number */
9858 
9859 int
rnd_next(uchar * buf)9860 ha_innobase::rnd_next(
9861 /*==================*/
9862 	uchar*	buf)	/*!< in/out: returns the row in this buffer,
9863 			in MySQL format */
9864 {
9865 	int	error;
9866 
9867 	DBUG_ENTER("rnd_next");
9868 
9869 	if (m_start_of_scan) {
9870 		error = index_first(buf);
9871 
9872 		if (error == HA_ERR_KEY_NOT_FOUND) {
9873 			error = HA_ERR_END_OF_FILE;
9874 		}
9875 
9876 		m_start_of_scan = false;
9877 	} else {
9878 		error = general_fetch(buf, ROW_SEL_NEXT, 0);
9879 	}
9880 
9881 	DBUG_RETURN(error);
9882 }
9883 
9884 /**********************************************************************//**
9885 Fetches a row from the table based on a row reference.
9886 @return 0, HA_ERR_KEY_NOT_FOUND, or error code */
9887 
9888 int
rnd_pos(uchar * buf,uchar * pos)9889 ha_innobase::rnd_pos(
9890 /*=================*/
9891 	uchar*	buf,	/*!< in/out: buffer for the row */
9892 	uchar*	pos)	/*!< in: primary key value of the row in the
9893 			MySQL format, or the row id if the clustered
9894 			index was internally generated by InnoDB; the
9895 			length of data in pos has to be ref_length */
9896 {
9897 	DBUG_ENTER("rnd_pos");
9898 	DBUG_DUMP("key", pos, ref_length);
9899 
9900 	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
9901 
9902 	/* Note that we assume the length of the row reference is fixed
9903 	for the table, and it is == ref_length */
9904 
9905 	int	error = index_read(buf, pos, (uint)ref_length, HA_READ_KEY_EXACT);
9906 
9907 	if (error != 0) {
9908 		DBUG_PRINT("error", ("Got error: %d", error));
9909 	}
9910 
9911 	DBUG_RETURN(error);
9912 }
9913 
9914 /**********************************************************************//**
9915 Initialize FT index scan
9916 @return 0 or error number */
9917 
9918 int
ft_init()9919 ha_innobase::ft_init()
9920 /*==================*/
9921 {
9922 	DBUG_ENTER("ft_init");
9923 
9924 	trx_t*	trx = check_trx_exists(ha_thd());
9925 
9926 	/* FTS queries are not treated as autocommit non-locking selects.
9927 	This is because the FTS implementation can acquire locks behind
9928 	the scenes. This has not been verified but it is safer to treat
9929 	them as regular read only transactions for now. */
9930 
9931 	if (!trx_is_started(trx)) {
9932 		trx->will_lock = true;
9933 	}
9934 
9935 	DBUG_RETURN(rnd_init(false));
9936 }
9937 
9938 /**********************************************************************//**
9939 Initialize FT index scan
9940 @return FT_INFO structure if successful or NULL */
9941 
9942 FT_INFO*
ft_init_ext(uint flags,uint keynr,String * key)9943 ha_innobase::ft_init_ext(
9944 /*=====================*/
9945 	uint			flags,	/* in: */
9946 	uint			keynr,	/* in: */
9947 	String*			key)	/* in: */
9948 {
9949 	NEW_FT_INFO*		fts_hdl = NULL;
9950 	dict_index_t*		index;
9951 	fts_result_t*		result;
9952 	char			buf_tmp[8192];
9953 	ulint			buf_tmp_used;
9954 	uint			num_errors;
9955 	ulint			query_len = key->length();
9956 	const CHARSET_INFO*	char_set = key->charset();
9957 	const char*		query = key->ptr();
9958 
9959 	if (UNIV_UNLIKELY(fts_enable_diag_print)) {
9960 		{
9961 			ib::info	out;
9962 			out << "keynr=" << keynr << ", '";
9963 			out.write(key->ptr(), key->length());
9964 		}
9965 
9966 		if (flags & FT_BOOL) {
9967 			ib::info() << "BOOL search";
9968 		} else {
9969 			ib::info() << "NL search";
9970 		}
9971 	}
9972 
9973 	/* FIXME: utf32 and utf16 are not compatible with some
9974 	string function used. So to convert them to uft8 before
9975 	we proceed. */
9976 	if (strcmp(char_set->csname, "utf32") == 0
9977 	    || strcmp(char_set->csname, "utf16") == 0) {
9978 
9979 		buf_tmp_used = innobase_convert_string(
9980 			buf_tmp, sizeof(buf_tmp) - 1,
9981 			&my_charset_utf8_general_ci,
9982 			query, query_len, (CHARSET_INFO*) char_set,
9983 			&num_errors);
9984 
9985 		buf_tmp[buf_tmp_used] = 0;
9986 		query = buf_tmp;
9987 		query_len = buf_tmp_used;
9988 	}
9989 
9990 	trx_t*	trx = m_prebuilt->trx;
9991 
9992 	/* FTS queries are not treated as autocommit non-locking selects.
9993 	This is because the FTS implementation can acquire locks behind
9994 	the scenes. This has not been verified but it is safer to treat
9995 	them as regular read only transactions for now. */
9996 
9997 	if (!trx_is_started(trx)) {
9998 		trx->will_lock = true;
9999 	}
10000 
10001 	dict_table_t*	ft_table = m_prebuilt->table;
10002 
10003 	/* Table does not have an FTS index */
10004 	if (!ft_table->fts || ib_vector_is_empty(ft_table->fts->indexes)) {
10005 		my_error(ER_TABLE_HAS_NO_FT, MYF(0));
10006 		return(NULL);
10007 	}
10008 
10009 	/* If tablespace is discarded, we should return here */
10010 	if (!ft_table->space) {
10011 		my_error(ER_TABLESPACE_MISSING, MYF(0), table->s->db.str,
10012 			 table->s->table_name.str);
10013 		return(NULL);
10014 	}
10015 
10016 	if (keynr == NO_SUCH_KEY) {
10017 		/* FIXME: Investigate the NO_SUCH_KEY usage */
10018 		index = reinterpret_cast<dict_index_t*>
10019 			(ib_vector_getp(ft_table->fts->indexes, 0));
10020 	} else {
10021 		index = innobase_get_index(keynr);
10022 	}
10023 
10024 	if (index == NULL || index->type != DICT_FTS) {
10025 		my_error(ER_TABLE_HAS_NO_FT, MYF(0));
10026 		return(NULL);
10027 	}
10028 
10029 	if (!(ft_table->fts->added_synced)) {
10030 		fts_init_index(ft_table, FALSE);
10031 
10032 		ft_table->fts->added_synced = true;
10033 	}
10034 
10035 	const byte*	q = reinterpret_cast<const byte*>(
10036 		const_cast<char*>(query));
10037 
10038 	// FIXME: support ft_init_ext_with_hints(), pass LIMIT
10039 	dberr_t	error = fts_query(trx, index, flags, q, query_len, &result);
10040 
10041 	if (error != DB_SUCCESS) {
10042 		my_error(convert_error_code_to_mysql(error, 0, NULL), MYF(0));
10043 		return(NULL);
10044 	}
10045 
10046 	/* Allocate FTS handler, and instantiate it before return */
10047 	fts_hdl = reinterpret_cast<NEW_FT_INFO*>(
10048 		my_malloc(/*PSI_INSTRUMENT_ME,*/ sizeof(NEW_FT_INFO), MYF(0)));
10049 
10050 	fts_hdl->please = const_cast<_ft_vft*>(&ft_vft_result);
10051 	fts_hdl->could_you = const_cast<_ft_vft_ext*>(&ft_vft_ext_result);
10052 	fts_hdl->ft_prebuilt = m_prebuilt;
10053 	fts_hdl->ft_result = result;
10054 
10055 	/* FIXME: Re-evaluate the condition when Bug 14469540 is resolved */
10056 	m_prebuilt->in_fts_query = true;
10057 
10058 	return(reinterpret_cast<FT_INFO*>(fts_hdl));
10059 }
10060 
10061 /*****************************************************************//**
10062 Set up search tuple for a query through FTS_DOC_ID_INDEX on
10063 supplied Doc ID. This is used by MySQL to retrieve the documents
10064 once the search result (Doc IDs) is available */
10065 static
10066 void
innobase_fts_create_doc_id_key(dtuple_t * tuple,const dict_index_t * index,doc_id_t * doc_id)10067 innobase_fts_create_doc_id_key(
10068 /*===========================*/
10069 	dtuple_t*	tuple,		/* in/out: m_prebuilt->search_tuple */
10070 	const dict_index_t*
10071 			index,		/* in: index (FTS_DOC_ID_INDEX) */
10072 	doc_id_t*	doc_id)		/* in/out: doc id to search, value
10073 					could be changed to storage format
10074 					used for search. */
10075 {
10076 	doc_id_t	temp_doc_id;
10077 	dfield_t*	dfield = dtuple_get_nth_field(tuple, 0);
10078 
10079 	ut_a(dict_index_get_n_unique(index) == 1);
10080 
10081 	dtuple_set_n_fields(tuple, index->n_fields);
10082 	dict_index_copy_types(tuple, index, index->n_fields);
10083 
10084 #ifdef UNIV_DEBUG
10085 	/* The unique Doc ID field should be an eight-bytes integer */
10086 	dict_field_t*	field = dict_index_get_nth_field(index, 0);
10087         ut_a(field->col->mtype == DATA_INT);
10088 	ut_ad(sizeof(*doc_id) == field->fixed_len);
10089 	ut_ad(!strcmp(index->name, FTS_DOC_ID_INDEX_NAME));
10090 #endif /* UNIV_DEBUG */
10091 
10092 	/* Convert to storage byte order */
10093 	mach_write_to_8(reinterpret_cast<byte*>(&temp_doc_id), *doc_id);
10094 	*doc_id = temp_doc_id;
10095 	dfield_set_data(dfield, doc_id, sizeof(*doc_id));
10096 
10097         dtuple_set_n_fields_cmp(tuple, 1);
10098 
10099 	for (ulint i = 1; i < index->n_fields; i++) {
10100 		dfield = dtuple_get_nth_field(tuple, i);
10101 		dfield_set_null(dfield);
10102 	}
10103 }
10104 
10105 /**********************************************************************//**
10106 Fetch next result from the FT result set
10107 @return error code */
10108 
10109 int
ft_read(uchar * buf)10110 ha_innobase::ft_read(
10111 /*=================*/
10112 	uchar*		buf)		/*!< in/out: buf contain result row */
10113 {
10114 	row_prebuilt_t*	ft_prebuilt;
10115 
10116 	ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(ft_handler)->ft_prebuilt;
10117 
10118 	ut_a(ft_prebuilt == m_prebuilt);
10119 
10120 	fts_result_t*	result;
10121 
10122 	result = reinterpret_cast<NEW_FT_INFO*>(ft_handler)->ft_result;
10123 
10124 	if (result->current == NULL) {
10125 		/* This is the case where the FTS query did not
10126 		contain and matching documents. */
10127 		if (result->rankings_by_id != NULL) {
10128 			/* Now that we have the complete result, we
10129 			need to sort the document ids on their rank
10130 			calculation. */
10131 
10132 			fts_query_sort_result_on_rank(result);
10133 
10134 			result->current = const_cast<ib_rbt_node_t*>(
10135 				rbt_first(result->rankings_by_rank));
10136 		} else {
10137 			ut_a(result->current == NULL);
10138 		}
10139 	} else {
10140 		result->current = const_cast<ib_rbt_node_t*>(
10141 			rbt_next(result->rankings_by_rank, result->current));
10142 	}
10143 
10144 next_record:
10145 
10146 	if (result->current != NULL) {
10147 		doc_id_t	search_doc_id;
10148 		dtuple_t*	tuple = m_prebuilt->search_tuple;
10149 
10150 		/* If we only need information from result we can return
10151 		   without fetching the table row */
10152 		if (ft_prebuilt->read_just_key) {
10153 #ifdef MYSQL_STORE_FTS_DOC_ID
10154 			if (m_prebuilt->fts_doc_id_in_read_set) {
10155 				fts_ranking_t* ranking;
10156 				ranking = rbt_value(fts_ranking_t,
10157 						    result->current);
10158 				innobase_fts_store_docid(
10159 					table, ranking->doc_id);
10160 			}
10161 #endif
10162 			table->status= 0;
10163 			return(0);
10164 		}
10165 
10166 		dict_index_t*	index;
10167 
10168 		index = m_prebuilt->table->fts_doc_id_index;
10169 
10170 		/* Must find the index */
10171 		ut_a(index != NULL);
10172 
10173 		/* Switch to the FTS doc id index */
10174 		m_prebuilt->index = index;
10175 
10176 		fts_ranking_t*	ranking = rbt_value(
10177 			fts_ranking_t, result->current);
10178 
10179 		search_doc_id = ranking->doc_id;
10180 
10181 		/* We pass a pointer of search_doc_id because it will be
10182 		converted to storage byte order used in the search
10183 		tuple. */
10184 		innobase_fts_create_doc_id_key(tuple, index, &search_doc_id);
10185 
10186 		innobase_srv_conc_enter_innodb(m_prebuilt);
10187 
10188 		dberr_t ret = row_search_for_mysql(
10189 			(byte*) buf, PAGE_CUR_GE, m_prebuilt, ROW_SEL_EXACT, 0);
10190 
10191 		innobase_srv_conc_exit_innodb(m_prebuilt);
10192 
10193 		int	error;
10194 
10195 		switch (ret) {
10196 		case DB_SUCCESS:
10197 			error = 0;
10198 			table->status = 0;
10199 			break;
10200 		case DB_RECORD_NOT_FOUND:
10201 			result->current = const_cast<ib_rbt_node_t*>(
10202 				rbt_next(result->rankings_by_rank,
10203 					 result->current));
10204 
10205 			if (!result->current) {
10206 				/* exhaust the result set, should return
10207 				HA_ERR_END_OF_FILE just like
10208 				ha_innobase::general_fetch() and/or
10209 				ha_innobase::index_first() etc. */
10210 				error = HA_ERR_END_OF_FILE;
10211 				table->status = STATUS_NOT_FOUND;
10212 			} else {
10213 				goto next_record;
10214 			}
10215 			break;
10216 		case DB_END_OF_INDEX:
10217 			error = HA_ERR_END_OF_FILE;
10218 			table->status = STATUS_NOT_FOUND;
10219 			break;
10220 		case DB_TABLESPACE_DELETED:
10221 
10222 			ib_senderrf(
10223 				m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
10224 				ER_TABLESPACE_DISCARDED,
10225 				table->s->table_name.str);
10226 
10227 			table->status = STATUS_NOT_FOUND;
10228 			error = HA_ERR_TABLESPACE_MISSING;
10229 			break;
10230 		case DB_TABLESPACE_NOT_FOUND:
10231 
10232 			ib_senderrf(
10233 				m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
10234 				ER_TABLESPACE_MISSING,
10235 				table->s->table_name.str);
10236 
10237 			table->status = STATUS_NOT_FOUND;
10238 			error = HA_ERR_TABLESPACE_MISSING;
10239 			break;
10240 		default:
10241 			error = convert_error_code_to_mysql(
10242 				ret, 0, m_user_thd);
10243 
10244 			table->status = STATUS_NOT_FOUND;
10245 			break;
10246 		}
10247 
10248 		return(error);
10249 	}
10250 
10251 	return(HA_ERR_END_OF_FILE);
10252 }
10253 
10254 #ifdef WITH_WSREP
10255 inline
10256 const char*
wsrep_key_type_to_str(Wsrep_service_key_type type)10257 wsrep_key_type_to_str(Wsrep_service_key_type type)
10258 {
10259 	switch (type) {
10260 	case WSREP_SERVICE_KEY_SHARED:
10261 		return "shared";
10262 	case WSREP_SERVICE_KEY_REFERENCE:
10263 		return "reference";
10264 	case WSREP_SERVICE_KEY_UPDATE:
10265 		return "update";
10266 	case WSREP_SERVICE_KEY_EXCLUSIVE:
10267 		return "exclusive";
10268 	};
10269 	return "unknown";
10270 }
10271 
10272 extern dberr_t
wsrep_append_foreign_key(trx_t * trx,dict_foreign_t * foreign,const rec_t * rec,dict_index_t * index,bool referenced,upd_node_t * upd_node,bool pa_disable,Wsrep_service_key_type key_type)10273 wsrep_append_foreign_key(
10274 /*===========================*/
10275 	trx_t*		trx,		/*!< in: trx */
10276 	dict_foreign_t*	foreign,	/*!< in: foreign key constraint */
10277 	const rec_t*	rec,		/*!<in: clustered index record */
10278 	dict_index_t*	index,		/*!<in: clustered index */
10279 	bool		referenced,	/*!<in: is check for
10280 					referenced table */
10281 	upd_node_t*	upd_node,	/*<!in: update node */
10282 	bool		pa_disable,	/*<!in: disable parallel apply ?*/
10283 	Wsrep_service_key_type	key_type)	/*!< in: access type of this key
10284 					(shared, exclusive, reference...) */
10285 {
10286 	ut_ad(trx->is_wsrep());
10287 
10288 	if (!wsrep_thd_is_local(trx->mysql_thd))
10289 		return DB_SUCCESS;
10290 
10291 	if (upd_node && wsrep_protocol_version < 4) {
10292 		key_type = WSREP_SERVICE_KEY_SHARED;
10293 	}
10294 
10295 	THD* thd = trx->mysql_thd;
10296 
10297 	if (!foreign ||
10298 	    (!foreign->referenced_table && !foreign->foreign_table)) {
10299 		WSREP_INFO("FK: %s missing in: %s",
10300 			   (!foreign ? "constraint" :
10301 			    (!foreign->referenced_table ?
10302 			     "referenced table" : "foreign table")),
10303 			   wsrep_thd_query(thd));
10304 		return DB_ERROR;
10305 	}
10306 
10307 	ulint rcode = DB_SUCCESS;
10308 	char  cache_key[513] = {'\0'};
10309 	int   cache_key_len=0;
10310 
10311 	if ( !((referenced) ?
10312 		foreign->referenced_table : foreign->foreign_table)) {
10313 		WSREP_DEBUG("pulling %s table into cache",
10314 			    (referenced) ? "referenced" : "foreign");
10315 		mutex_enter(&dict_sys.mutex);
10316 
10317 		if (referenced) {
10318 			foreign->referenced_table =
10319 				dict_table_get_low(
10320 					foreign->referenced_table_name_lookup);
10321 			if (foreign->referenced_table) {
10322 				foreign->referenced_index =
10323 					dict_foreign_find_index(
10324 						foreign->referenced_table, NULL,
10325 						foreign->referenced_col_names,
10326 						foreign->n_fields,
10327 						foreign->foreign_index,
10328 						TRUE, FALSE);
10329 			}
10330 		} else {
10331 	  		foreign->foreign_table =
10332 				dict_table_get_low(
10333 					foreign->foreign_table_name_lookup);
10334 
10335 			if (foreign->foreign_table) {
10336 				foreign->foreign_index =
10337 					dict_foreign_find_index(
10338 						foreign->foreign_table, NULL,
10339 						foreign->foreign_col_names,
10340 						foreign->n_fields,
10341 						foreign->referenced_index,
10342 						TRUE, FALSE);
10343 			}
10344 		}
10345 		mutex_exit(&dict_sys.mutex);
10346 	}
10347 
10348 	if ( !((referenced) ?
10349 		foreign->referenced_table : foreign->foreign_table)) {
10350 		WSREP_WARN("FK: %s missing in query: %s",
10351 			   (!foreign->referenced_table) ?
10352 			   "referenced table" : "foreign table",
10353 			   wsrep_thd_query(thd));
10354 		return DB_ERROR;
10355 	}
10356 
10357 	byte  key[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
10358 	ulint len = WSREP_MAX_SUPPORTED_KEY_LENGTH;
10359 
10360 	dict_index_t *idx_target = (referenced) ?
10361 		foreign->referenced_index : index;
10362 	dict_index_t *idx = (referenced) ?
10363 		UT_LIST_GET_FIRST(foreign->referenced_table->indexes) :
10364 		UT_LIST_GET_FIRST(foreign->foreign_table->indexes);
10365 	int i = 0;
10366 
10367 	while (idx != NULL && idx != idx_target) {
10368 		if (innobase_strcasecmp (idx->name, innobase_index_reserve_name) != 0) {
10369 			i++;
10370 		}
10371 		idx = UT_LIST_GET_NEXT(indexes, idx);
10372 	}
10373 
10374 	ut_a(idx);
10375 	key[0] = byte(i);
10376 
10377 	rcode = wsrep_rec_get_foreign_key(
10378 		&key[1], &len, rec, index, idx,
10379 		wsrep_protocol_version > 1);
10380 
10381 	if (rcode != DB_SUCCESS) {
10382 		WSREP_ERROR(
10383 			"FK key set failed: " ULINTPF
10384 			" (" ULINTPF "%s), index: %s %s, %s",
10385 			rcode, referenced, wsrep_key_type_to_str(key_type),
10386 			(index)       ? index->name() : "void index",
10387 			(index && index->table) ? index->table->name.m_name :
10388 				"void table",
10389 			wsrep_thd_query(thd));
10390 		return DB_ERROR;
10391 	}
10392 
10393 	strncpy(cache_key,
10394 		(wsrep_protocol_version > 1) ?
10395 		((referenced) ?
10396 			foreign->referenced_table->name.m_name :
10397 			foreign->foreign_table->name.m_name) :
10398 		foreign->foreign_table->name.m_name, sizeof(cache_key) - 1);
10399 	cache_key_len = strlen(cache_key);
10400 
10401 #ifdef WSREP_DEBUG_PRINT
10402 	ulint j;
10403 	fprintf(stderr, "FK parent key, table: %s %s len: %lu ",
10404 		cache_key, wsrep_key_type_to_str(key_type), len+1);
10405 	for (j=0; j<len+1; j++) {
10406 		fprintf(stderr, " %hhX, ", key[j]);
10407 	}
10408 	fprintf(stderr, "\n");
10409 #endif
10410 	char *p = strchr(cache_key, '/');
10411 
10412 	if (p) {
10413 		*p = '\0';
10414 	} else {
10415 		WSREP_WARN("unexpected foreign key table %s %s",
10416 			   foreign->referenced_table->name.m_name,
10417 			   foreign->foreign_table->name.m_name);
10418 	}
10419 
10420 	wsrep_buf_t wkey_part[3];
10421         wsrep_key_t wkey = {wkey_part, 3};
10422 
10423 	if (!wsrep_prepare_key_for_innodb(
10424 		thd,
10425 		(const uchar*)cache_key,
10426 		cache_key_len +  1,
10427 		(const uchar*)key, len+1,
10428 		wkey_part,
10429 		(size_t*)&wkey.key_parts_num)) {
10430 		WSREP_WARN("key prepare failed for cascaded FK: %s",
10431 			   wsrep_thd_query(thd));
10432 		return DB_ERROR;
10433 	}
10434 
10435 	rcode = wsrep_thd_append_key(thd, &wkey, 1, key_type);
10436 
10437 	if (rcode) {
10438 		WSREP_ERROR("Appending cascaded fk row key failed: %s, "
10439 			    ULINTPF,
10440 			    wsrep_thd_query(thd),
10441 			    rcode);
10442 		return DB_ERROR;
10443 	}
10444 
10445 	if (pa_disable) {
10446 		wsrep_thd_set_PA_unsafe(trx->mysql_thd);
10447 	}
10448 
10449 	return DB_SUCCESS;
10450 }
10451 
10452 static int
wsrep_append_key(THD * thd,trx_t * trx,TABLE_SHARE * table_share,const char * key,uint16_t key_len,Wsrep_service_key_type key_type)10453 wsrep_append_key(
10454 /*=============*/
10455 	THD		*thd,
10456 	trx_t 		*trx,
10457 	TABLE_SHARE 	*table_share,
10458 	const char*	key,
10459 	uint16_t        key_len,
10460 	Wsrep_service_key_type	key_type	/*!< in: access type of this key
10461 					(shared, exclusive, semi...) */
10462 )
10463 {
10464 	DBUG_ENTER("wsrep_append_key");
10465 	DBUG_PRINT("enter",
10466 		    ("thd: %lu trx: %lld", thd_get_thread_id(thd),
10467 		    (long long)trx->id));
10468 #ifdef WSREP_DEBUG_PRINT
10469 	fprintf(stderr, "%s conn %lu, trx " TRX_ID_FMT ", keylen %d, key %s.%s\n",
10470 		wsrep_key_type_to_str(key_type),
10471 		thd_get_thread_id(thd), trx->id, key_len,
10472 		table_share->table_name.str, key);
10473 	for (int i=0; i<key_len; i++) {
10474 		fprintf(stderr, "%hhX, ", key[i]);
10475 	}
10476 	fprintf(stderr, "\n");
10477 #endif
10478 	wsrep_buf_t wkey_part[3];
10479         wsrep_key_t wkey = {wkey_part, 3};
10480 
10481 	if (!wsrep_prepare_key_for_innodb(
10482 			thd,
10483 			(const uchar*)table_share->table_cache_key.str,
10484 			table_share->table_cache_key.length,
10485 			(const uchar*)key, key_len,
10486 			wkey_part,
10487 			(size_t*)&wkey.key_parts_num)) {
10488 		WSREP_WARN("key prepare failed for: %s",
10489 			   (wsrep_thd_query(thd)) ?
10490 			   wsrep_thd_query(thd) : "void");
10491 		DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
10492 	}
10493 
10494 	int rcode = wsrep_thd_append_key(thd, &wkey, 1, key_type);
10495 	if (rcode) {
10496 		DBUG_PRINT("wsrep", ("row key failed: %d", rcode));
10497 		WSREP_WARN("Appending row key failed: %s, %d",
10498 			   (wsrep_thd_query(thd)) ?
10499 			   wsrep_thd_query(thd) : "void", rcode);
10500 		DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
10501 	}
10502 
10503 	DBUG_RETURN(0);
10504 }
10505 
10506 static bool
referenced_by_foreign_key2(dict_table_t * table,dict_index_t * index)10507 referenced_by_foreign_key2(
10508 /*=======================*/
10509 	dict_table_t* table,
10510 	dict_index_t* index)
10511 {
10512 	ut_ad(table != NULL);
10513 	ut_ad(index != NULL);
10514 
10515 	const dict_foreign_set* fks = &table->referenced_set;
10516 
10517 	for (dict_foreign_set::const_iterator it = fks->begin();
10518              it != fks->end();
10519              ++it) {
10520                 dict_foreign_t* foreign = *it;
10521 
10522                 if (foreign->referenced_index != index) {
10523                         continue;
10524                 }
10525                 ut_ad(table == foreign->referenced_table);
10526                 return true;
10527         }
10528         return false;
10529 }
10530 
10531 int
wsrep_append_keys(THD * thd,Wsrep_service_key_type key_type,const uchar * record0,const uchar * record1)10532 ha_innobase::wsrep_append_keys(
10533 /*===========================*/
10534 	THD 		*thd,
10535 	Wsrep_service_key_type	key_type,	/*!< in: access type of this row
10536 					operation:
10537 					(shared, exclusive, reference...) */
10538 	const uchar*	record0,	/* in: row in MySQL format */
10539 	const uchar*	record1)	/* in: row in MySQL format */
10540 {
10541 	/* Sanity check: newly inserted records should always be passed with
10542 	   EXCLUSIVE key type, all the rest are expected to carry a pre-image
10543 	 */
10544 	ut_a(record1 != NULL || key_type == WSREP_SERVICE_KEY_EXCLUSIVE);
10545 
10546 	int rcode;
10547 	DBUG_ENTER("wsrep_append_keys");
10548 
10549 	bool key_appended = false;
10550 	trx_t *trx = thd_to_trx(thd);
10551 
10552 #ifdef WSREP_DEBUG_PRINT
10553 	fprintf(stderr, "%s conn %lu, trx " TRX_ID_FMT ", table %s\nSQL: %s\n",
10554 		wsrep_key_type_to_str(key_type),
10555 		thd_get_thread_id(thd), trx->id,
10556 		table_share->table_name.str, wsrep_thd_query(thd));
10557 #endif
10558 
10559 	if (table_share && table_share->tmp_table  != NO_TMP_TABLE) {
10560 		WSREP_DEBUG("skipping tmp table DML: THD: %lu tmp: %d SQL: %s",
10561 			    thd_get_thread_id(thd),
10562 			    table_share->tmp_table,
10563 			    (wsrep_thd_query(thd)) ?
10564 			    wsrep_thd_query(thd) : "void");
10565 		DBUG_RETURN(0);
10566 	}
10567 
10568 	if (wsrep_protocol_version == 0) {
10569 		uint	len;
10570 		char 	keyval[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
10571 		char 	*key 		= &keyval[0];
10572 		ibool    is_null;
10573 
10574 		len = wsrep_store_key_val_for_row(
10575 			thd, table, 0, key, WSREP_MAX_SUPPORTED_KEY_LENGTH,
10576 			record0, &is_null);
10577 
10578 		if (!is_null) {
10579 			rcode = wsrep_append_key(
10580 				thd, trx, table_share, keyval,
10581 				len, key_type);
10582 
10583 			if (rcode) {
10584 				DBUG_RETURN(rcode);
10585 			}
10586 		} else {
10587 			WSREP_DEBUG("NULL key skipped (proto 0): %s",
10588 				    wsrep_thd_query(thd));
10589 		}
10590 	} else {
10591 		ut_a(table->s->keys <= 256);
10592 		uint i;
10593                 bool hasPK= false;
10594 
10595 		for (i=0; i<table->s->keys; ++i) {
10596 			KEY*  key_info	= table->key_info + i;
10597 			if (key_info->flags & HA_NOSAME) {
10598 				hasPK = true;
10599 				break;
10600 			}
10601 		}
10602 
10603 		for (i=0; i<table->s->keys; ++i) {
10604 			KEY*  key_info	= table->key_info + i;
10605 
10606 			dict_index_t* idx  = innobase_get_index(i);
10607 			dict_table_t* tab  = (idx) ? idx->table : NULL;
10608 
10609 			/* keyval[] shall contain an ordinal number at byte 0
10610 			   and the actual key data shall be written at byte 1.
10611 			   Hence the total data length is the key length + 1 */
10612 			char keyval0[WSREP_MAX_SUPPORTED_KEY_LENGTH+1]= {'\0'};
10613 			char keyval1[WSREP_MAX_SUPPORTED_KEY_LENGTH+1]= {'\0'};
10614 			keyval0[0] = (char)i;
10615 			keyval1[0] = (char)i;
10616 			char* key0 = &keyval0[1];
10617 			char* key1 = &keyval1[1];
10618 
10619 			if (!tab) {
10620 				WSREP_WARN("MariaDB-InnoDB key mismatch %s %s",
10621 					   table->s->table_name.str,
10622 					   key_info->name.str);
10623 			}
10624 			/* !hasPK == table with no PK,
10625 			   must append all non-unique keys */
10626 			if (!hasPK || key_info->flags & HA_NOSAME ||
10627 			    ((tab &&
10628 			      referenced_by_foreign_key2(tab, idx)) ||
10629 			     (!tab && referenced_by_foreign_key()))) {
10630 
10631 				ibool is_null0;
10632 				uint len0 = wsrep_store_key_val_for_row(
10633 					thd, table, i, key0,
10634 					WSREP_MAX_SUPPORTED_KEY_LENGTH,
10635 					record0, &is_null0);
10636 
10637 				if (record1) {
10638 					ibool is_null1;
10639 					uint len1 = wsrep_store_key_val_for_row(
10640 						thd, table, i, key1,
10641 						WSREP_MAX_SUPPORTED_KEY_LENGTH,
10642 						record1, &is_null1);
10643 
10644 					if (is_null0 != is_null1 ||
10645 					    len0 != len1 ||
10646 					    memcmp(key0, key1, len0)) {
10647 						/* This key has chaged. If it
10648 						  is unique, this is an exclusive
10649 						  operation -> upgrade key type */
10650 						if (key_info->flags & HA_NOSAME) {
10651 						    key_type = WSREP_SERVICE_KEY_EXCLUSIVE;
10652 						}
10653 
10654 						if (!is_null1) {
10655 						    rcode = wsrep_append_key(
10656 							thd, trx, table_share,
10657 							keyval1,
10658 						    /* for len1+1 see keyval1
10659 						     initialization comment */
10660 							len1+1, key_type);
10661 						    if (rcode)
10662 							DBUG_RETURN(rcode);
10663 						}
10664 					}
10665 				}
10666 
10667 				if (!is_null0) {
10668 					rcode = wsrep_append_key(
10669 						thd, trx, table_share,
10670 						/* for len0+1 see keyval0
10671 						   initialization comment */
10672 						keyval0, len0+1, key_type);
10673 					if (rcode)
10674 						DBUG_RETURN(rcode);
10675 
10676 					if (key_info->flags & HA_NOSAME  ||
10677 					    key_type == WSREP_SERVICE_KEY_SHARED||
10678 					    key_type == WSREP_SERVICE_KEY_REFERENCE)
10679 						key_appended = true;
10680 				} else {
10681 					WSREP_DEBUG("NULL key skipped: %s",
10682 						    wsrep_thd_query(thd));
10683 				}
10684 			}
10685 		}
10686 	}
10687 
10688 	/* if no PK, calculate hash of full row, to be the key value */
10689 	if (!key_appended && wsrep_certify_nonPK) {
10690 		uchar digest[16];
10691 
10692 		wsrep_calc_row_hash(digest, record0, table, m_prebuilt);
10693 
10694 		if (int rcode = wsrep_append_key(thd, trx, table_share,
10695 						 reinterpret_cast<char*>
10696 						 (digest), 16, key_type)) {
10697 			DBUG_RETURN(rcode);
10698 		}
10699 
10700 		if (record1) {
10701 			wsrep_calc_row_hash(
10702 				digest, record1, table, m_prebuilt);
10703 			if (int rcode = wsrep_append_key(
10704 				    thd, trx, table_share,
10705 				    reinterpret_cast<char*>(digest), 16,
10706 				    key_type)) {
10707 				DBUG_RETURN(rcode);
10708 			}
10709 		}
10710 		DBUG_RETURN(0);
10711 	}
10712 
10713 	DBUG_RETURN(0);
10714 }
10715 #endif /* WITH_WSREP */
10716 
10717 /*********************************************************************//**
10718 Stores a reference to the current row to 'ref' field of the handle. Note
10719 that in the case where we have generated the clustered index for the
10720 table, the function parameter is illogical: we MUST ASSUME that 'record'
10721 is the current 'position' of the handle, because if row ref is actually
10722 the row id internally generated in InnoDB, then 'record' does not contain
10723 it. We just guess that the row id must be for the record where the handle
10724 was positioned the last time. */
10725 
10726 void
position(const uchar * record)10727 ha_innobase::position(
10728 /*==================*/
10729 	const uchar*	record)	/*!< in: row in MySQL format */
10730 {
10731 	uint		len;
10732 
10733 	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
10734 
10735 	if (m_prebuilt->clust_index_was_generated) {
10736 		/* No primary key was defined for the table and we
10737 		generated the clustered index from row id: the
10738 		row reference will be the row id, not any key value
10739 		that MySQL knows of */
10740 
10741 		len = DATA_ROW_ID_LEN;
10742 
10743 		memcpy(ref, m_prebuilt->row_id, len);
10744 	} else {
10745 
10746 		/* Copy primary key as the row reference */
10747 		KEY*	key_info = table->key_info + m_primary_key;
10748 		key_copy(ref, (uchar*)record, key_info, key_info->key_length);
10749 		len = key_info->key_length;
10750 	}
10751 
10752 	ut_ad(len == ref_length);
10753 }
10754 
10755 /*****************************************************************//**
10756 Check whether there exist a column named as "FTS_DOC_ID", which is
10757 reserved for InnoDB FTS Doc ID
10758 @return true if there exist a "FTS_DOC_ID" column */
10759 static
10760 bool
create_table_check_doc_id_col(trx_t * trx,const TABLE * form,ulint * doc_id_col)10761 create_table_check_doc_id_col(
10762 /*==========================*/
10763 	trx_t*		trx,		/*!< in: InnoDB transaction handle */
10764 	const TABLE*	form,		/*!< in: information on table
10765 					columns and indexes */
10766 	ulint*		doc_id_col)	/*!< out: Doc ID column number if
10767 					there exist a FTS_DOC_ID column,
10768 					ULINT_UNDEFINED if column is of the
10769 					wrong type/name/size */
10770 {
10771 	for (ulint i = 0; i < form->s->fields; i++) {
10772 		const Field*	field;
10773 		ulint		col_type;
10774 		ulint		col_len;
10775 		ulint		unsigned_type;
10776 
10777 		field = form->field[i];
10778 		if (!field->stored_in_db()) {
10779 			continue;
10780 		}
10781 
10782 		col_type = get_innobase_type_from_mysql_type(
10783 			&unsigned_type, field);
10784 
10785 		col_len = field->pack_length();
10786 
10787 		if (innobase_strcasecmp(field->field_name.str,
10788 					FTS_DOC_ID_COL_NAME) == 0) {
10789 
10790 			/* Note the name is case sensitive due to
10791 			our internal query parser */
10792 			if (col_type == DATA_INT
10793 			    && !field->real_maybe_null()
10794 			    && col_len == sizeof(doc_id_t)
10795 			    && (strcmp(field->field_name.str,
10796 				      FTS_DOC_ID_COL_NAME) == 0)) {
10797 				*doc_id_col = i;
10798 			} else {
10799 				push_warning_printf(
10800 					trx->mysql_thd,
10801 					Sql_condition::WARN_LEVEL_WARN,
10802 					ER_ILLEGAL_HA_CREATE_OPTION,
10803 					"InnoDB: FTS_DOC_ID column must be"
10804 					" of BIGINT NOT NULL type, and named"
10805 					" in all capitalized characters");
10806 				my_error(ER_WRONG_COLUMN_NAME, MYF(0),
10807 					 field->field_name.str);
10808 				*doc_id_col = ULINT_UNDEFINED;
10809 			}
10810 
10811 			return(true);
10812 		}
10813 	}
10814 
10815 	return(false);
10816 }
10817 
10818 
10819 /** Finds all base columns needed to compute a given generated column.
10820 This is returned as a bitmap, in field->table->tmp_set.
10821 Works for both dict_v_col_t and dict_s_col_t columns.
10822 @param[in]	table		InnoDB table
10823 @param[in]	field		MySQL field
10824 @param[in,out]	col		virtual or stored column */
10825 template <typename T>
10826 void
prepare_vcol_for_base_setup(const dict_table_t * table,const Field * field,T * col)10827 prepare_vcol_for_base_setup(
10828 /*========================*/
10829 	const dict_table_t*	table,
10830 	const Field*	field,
10831 	T*		col)
10832 {
10833 	ut_ad(col->num_base == 0);
10834 	ut_ad(col->base_col == NULL);
10835 
10836 	MY_BITMAP *old_read_set = field->table->read_set;
10837 
10838 	field->table->read_set = &field->table->tmp_set;
10839 
10840 	bitmap_clear_all(&field->table->tmp_set);
10841 	field->vcol_info->expr->walk(
10842 		&Item::register_field_in_read_map, 1, field->table);
10843 	col->num_base= bitmap_bits_set(&field->table->tmp_set);
10844 	if (col->num_base != 0) {
10845 		col->base_col = static_cast<dict_col_t**>(mem_heap_zalloc(
10846 					table->heap, col->num_base * sizeof(
10847 						* col->base_col)));
10848 	}
10849 	field->table->read_set= old_read_set;
10850 }
10851 
10852 
10853 /** Set up base columns for virtual column
10854 @param[in]	table		InnoDB table
10855 @param[in]	field		MySQL field
10856 @param[in,out]	v_col		virtual column */
10857 void
innodb_base_col_setup(dict_table_t * table,const Field * field,dict_v_col_t * v_col)10858 innodb_base_col_setup(
10859 	dict_table_t*	table,
10860 	const Field*	field,
10861 	dict_v_col_t*	v_col)
10862 {
10863 	ulint n = 0;
10864 
10865 	prepare_vcol_for_base_setup(table, field, v_col);
10866 
10867 	for (uint i= 0; i < field->table->s->fields; ++i) {
10868 		const Field* base_field = field->table->field[i];
10869 		if (base_field->stored_in_db()
10870 			&& bitmap_is_set(&field->table->tmp_set, i)) {
10871 			ulint   z;
10872 
10873 			for (z = 0; z < table->n_cols; z++) {
10874 				const char* name = dict_table_get_col_name(table, z);
10875 				if (!innobase_strcasecmp(name,
10876 						base_field->field_name.str)) {
10877 					break;
10878 				}
10879 			}
10880 
10881 			ut_ad(z != table->n_cols);
10882 
10883 			v_col->base_col[n] = dict_table_get_nth_col(table, z);
10884 			ut_ad(v_col->base_col[n]->ind == z);
10885 			n++;
10886 		}
10887 	}
10888 	v_col->num_base= n;
10889 }
10890 
10891 /** Set up base columns for stored column
10892 @param[in]	table	InnoDB table
10893 @param[in]	field	MySQL field
10894 @param[in,out]	s_col	stored column */
10895 void
innodb_base_col_setup_for_stored(const dict_table_t * table,const Field * field,dict_s_col_t * s_col)10896 innodb_base_col_setup_for_stored(
10897 	const dict_table_t*	table,
10898 	const Field*		field,
10899 	dict_s_col_t*		s_col)
10900 {
10901 	ulint	n = 0;
10902 
10903 	prepare_vcol_for_base_setup(table, field, s_col);
10904 
10905 	for (uint i= 0; i < field->table->s->fields; ++i) {
10906 		const Field* base_field = field->table->field[i];
10907 
10908 		if (base_field->stored_in_db()
10909 		    && bitmap_is_set(&field->table->tmp_set, i)) {
10910 			ulint	z;
10911 			for (z = 0; z < table->n_cols; z++) {
10912 				const char* name = dict_table_get_col_name(
10913 						table, z);
10914 				if (!innobase_strcasecmp(
10915 					name, base_field->field_name.str)) {
10916 					break;
10917 				}
10918 			}
10919 
10920 			ut_ad(z != table->n_cols);
10921 
10922 			s_col->base_col[n] = dict_table_get_nth_col(table, z);
10923 			n++;
10924 
10925 			if (n == s_col->num_base) {
10926 				break;
10927 			}
10928 		}
10929 	}
10930 	s_col->num_base= n;
10931 }
10932 
10933 /** Create a table definition to an InnoDB database.
10934 @return ER_* level error */
10935 inline MY_ATTRIBUTE((warn_unused_result))
10936 int
create_table_def()10937 create_table_info_t::create_table_def()
10938 {
10939 	dict_table_t*	table;
10940 	ulint		col_type;
10941 	ulint		col_len;
10942 	ulint		nulls_allowed;
10943 	ulint		unsigned_type;
10944 	ulint		binary_type;
10945 	ulint		long_true_varchar;
10946 	ulint		charset_no;
10947 	ulint		doc_id_col = 0;
10948 	ibool		has_doc_id_col = FALSE;
10949 	mem_heap_t*	heap;
10950 	ha_table_option_struct *options= m_form->s->option_struct;
10951 	dberr_t		err = DB_SUCCESS;
10952 
10953 	DBUG_ENTER("create_table_def");
10954 	DBUG_PRINT("enter", ("table_name: %s", m_table_name));
10955 
10956 	DBUG_ASSERT(m_trx->mysql_thd == m_thd);
10957 	DBUG_ASSERT(!m_drop_before_rollback);
10958 
10959 	/* MySQL does the name length check. But we do additional check
10960 	on the name length here */
10961 	const size_t	table_name_len = strlen(m_table_name);
10962 	if (table_name_len > MAX_FULL_NAME_LEN) {
10963 		push_warning_printf(
10964 			m_thd, Sql_condition::WARN_LEVEL_WARN,
10965 			ER_TABLE_NAME,
10966 			"InnoDB: Table Name or Database Name is too long");
10967 
10968 		DBUG_RETURN(ER_TABLE_NAME);
10969 	}
10970 
10971 	if (m_table_name[table_name_len - 1] == '/') {
10972 		push_warning_printf(
10973 			m_thd, Sql_condition::WARN_LEVEL_WARN,
10974 			ER_TABLE_NAME,
10975 			"InnoDB: Table name is empty");
10976 
10977 		DBUG_RETURN(ER_WRONG_TABLE_NAME);
10978 	}
10979 
10980 	/* Find out the number of virtual columns. */
10981 	ulint num_v = 0;
10982 	const bool omit_virtual = ha_innobase::omits_virtual_cols(*m_form->s);
10983 	const ulint n_cols = omit_virtual
10984 		? m_form->s->stored_fields : m_form->s->fields;
10985 
10986 	if (!omit_virtual) {
10987 		for (ulint i = 0; i < n_cols; i++) {
10988 			num_v += !m_form->field[i]->stored_in_db();
10989 		}
10990 	}
10991 
10992 	/* Check whether there already exists a FTS_DOC_ID column */
10993 	if (create_table_check_doc_id_col(m_trx, m_form, &doc_id_col)){
10994 
10995 		/* Raise error if the Doc ID column is of wrong type or name */
10996 		if (doc_id_col == ULINT_UNDEFINED) {
10997 			DBUG_RETURN(HA_ERR_GENERIC);
10998 		} else {
10999 			has_doc_id_col = TRUE;
11000 		}
11001 	}
11002 
11003 	/* Adjust the number of columns for the FTS hidden field */
11004 	const ulint actual_n_cols = n_cols
11005 		+ (m_flags2 & DICT_TF2_FTS && !has_doc_id_col);
11006 
11007 	table = dict_mem_table_create(m_table_name, NULL,
11008 				      actual_n_cols, num_v, m_flags, m_flags2);
11009 
11010 	/* Set the hidden doc_id column. */
11011 	if (m_flags2 & DICT_TF2_FTS) {
11012 		table->fts->doc_col = has_doc_id_col
11013 				      ? doc_id_col : n_cols - num_v;
11014 	}
11015 
11016 	if (DICT_TF_HAS_DATA_DIR(m_flags)) {
11017 		ut_a(strlen(m_remote_path));
11018 
11019 		table->data_dir_path = mem_heap_strdup(
11020 			table->heap, m_remote_path);
11021 
11022 	} else {
11023 		table->data_dir_path = NULL;
11024 	}
11025 
11026 	heap = mem_heap_create(1000);
11027 	auto _ = make_scope_exit([heap]() { mem_heap_free(heap); });
11028 
11029 	ut_d(bool have_vers_start = false);
11030 	ut_d(bool have_vers_end = false);
11031 
11032 	for (ulint i = 0, j = 0; j < n_cols; i++) {
11033 		Field*	field = m_form->field[i];
11034 		ulint vers_row = 0;
11035 
11036 		if (m_form->versioned()) {
11037 			if (i == m_form->s->vers.start_fieldno) {
11038 				vers_row = DATA_VERS_START;
11039 				ut_d(have_vers_start = true);
11040 			} else if (i == m_form->s->vers.end_fieldno) {
11041 				vers_row = DATA_VERS_END;
11042 				ut_d(have_vers_end = true);
11043 			} else if (!(field->flags
11044 				     & VERS_UPDATE_UNVERSIONED_FLAG)) {
11045 				vers_row = DATA_VERSIONED;
11046 			}
11047 		}
11048 
11049 		col_type = get_innobase_type_from_mysql_type(
11050 			&unsigned_type, field);
11051 
11052 		if (!col_type) {
11053 			push_warning_printf(
11054 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11055 				ER_CANT_CREATE_TABLE,
11056 				"Error creating table '%s' with"
11057 				" column '%s'. Please check its"
11058 				" column type and try to re-create"
11059 				" the table with an appropriate"
11060 				" column type.",
11061 				table->name.m_name, field->field_name.str);
11062 			goto err_col;
11063 		}
11064 
11065 		nulls_allowed = field->real_maybe_null() ? 0 : DATA_NOT_NULL;
11066 		binary_type = field->binary() ? DATA_BINARY_TYPE : 0;
11067 
11068 		charset_no = 0;
11069 
11070 		if (dtype_is_string_type(col_type)) {
11071 
11072 			charset_no = (ulint) field->charset()->number;
11073 
11074 			DBUG_EXECUTE_IF("simulate_max_char_col",
11075 					charset_no = MAX_CHAR_COLL_NUM + 1;
11076 					);
11077 
11078 			if (charset_no > MAX_CHAR_COLL_NUM) {
11079 				/* in data0type.h we assume that the
11080 				number fits in one byte in prtype */
11081 				push_warning_printf(
11082 					m_thd, Sql_condition::WARN_LEVEL_WARN,
11083 					ER_CANT_CREATE_TABLE,
11084 					"In InnoDB, charset-collation codes"
11085 					" must be below 256."
11086 					" Unsupported code " ULINTPF ".",
11087 					charset_no);
11088 				dict_mem_table_free(table);
11089 
11090 				DBUG_RETURN(ER_CANT_CREATE_TABLE);
11091 			}
11092 		}
11093 
11094 		col_len = field->pack_length();
11095 
11096 		/* The MySQL pack length contains 1 or 2 bytes length field
11097 		for a true VARCHAR. Let us subtract that, so that the InnoDB
11098 		column length in the InnoDB data dictionary is the real
11099 		maximum byte length of the actual data. */
11100 
11101 		long_true_varchar = 0;
11102 
11103 		if (field->type() == MYSQL_TYPE_VARCHAR) {
11104 			col_len -= ((Field_varstring*) field)->length_bytes;
11105 
11106 			if (((Field_varstring*) field)->length_bytes == 2) {
11107 				long_true_varchar = DATA_LONG_TRUE_VARCHAR;
11108 			}
11109 		}
11110 
11111 		/* First check whether the column to be added has a
11112 		system reserved name. */
11113 		if (dict_col_name_is_reserved(field->field_name.str)){
11114 			my_error(ER_WRONG_COLUMN_NAME, MYF(0),
11115 				 field->field_name.str);
11116 err_col:
11117 			dict_mem_table_free(table);
11118 			ut_ad(trx_state_eq(m_trx, TRX_STATE_NOT_STARTED));
11119 			DBUG_RETURN(HA_ERR_GENERIC);
11120 		}
11121 
11122 		ulint is_virtual = !field->stored_in_db() ? DATA_VIRTUAL : 0;
11123 
11124 		if (!is_virtual) {
11125 			dict_mem_table_add_col(table, heap,
11126 				field->field_name.str, col_type,
11127 				dtype_form_prtype(
11128 					(ulint) field->type()
11129 					| nulls_allowed | unsigned_type
11130 					| binary_type | long_true_varchar
11131 					| vers_row,
11132 					charset_no),
11133 				col_len);
11134 		} else if (!omit_virtual) {
11135 			dict_mem_table_add_v_col(table, heap,
11136 				field->field_name.str, col_type,
11137 				dtype_form_prtype(
11138 					(ulint) field->type()
11139 					| nulls_allowed | unsigned_type
11140 					| binary_type | long_true_varchar
11141 					| vers_row
11142 					| is_virtual,
11143 					charset_no),
11144 				col_len, i, 0);
11145 		}
11146 
11147 		if (innobase_is_s_fld(field)) {
11148 			ut_ad(!is_virtual);
11149 			/* Added stored column in m_s_cols list. */
11150 			dict_mem_table_add_s_col(
11151 				table, 0);
11152 		}
11153 
11154 		if (is_virtual && omit_virtual) {
11155 			continue;
11156 		}
11157 
11158 		j++;
11159 	}
11160 
11161 	ut_ad(have_vers_start == have_vers_end);
11162 	ut_ad(table->versioned() == have_vers_start);
11163 	ut_ad(!table->versioned() || table->vers_start != table->vers_end);
11164 
11165 	if (num_v) {
11166 		for (ulint i = 0, j = 0; i < n_cols; i++) {
11167 			dict_v_col_t*	v_col;
11168 
11169 			const Field* field = m_form->field[i];
11170 
11171 			if (field->stored_in_db()) {
11172 				continue;
11173 			}
11174 
11175 			v_col = dict_table_get_nth_v_col(table, j);
11176 
11177 			j++;
11178 
11179 			innodb_base_col_setup(table, field, v_col);
11180 		}
11181 	}
11182 
11183 	/** Fill base columns for the stored column present in the list. */
11184 	if (table->s_cols && !table->s_cols->empty()) {
11185 		for (ulint i = 0; i < n_cols; i++) {
11186 			Field*  field = m_form->field[i];
11187 
11188 			if (!innobase_is_s_fld(field)) {
11189 				continue;
11190 			}
11191 
11192 			dict_s_col_list::iterator       it;
11193 			for (it = table->s_cols->begin();
11194 			     it != table->s_cols->end(); ++it) {
11195 				dict_s_col_t	s_col = *it;
11196 
11197 				if (s_col.s_pos == i) {
11198 					innodb_base_col_setup_for_stored(
11199 						table, field, &s_col);
11200 					break;
11201 				}
11202 			}
11203 		}
11204 	}
11205 
11206 	/* Add the FTS doc_id hidden column. */
11207 	if (m_flags2 & DICT_TF2_FTS && !has_doc_id_col) {
11208 		fts_add_doc_id_column(table, heap);
11209 	}
11210 
11211 	dict_table_add_system_columns(table, heap);
11212 
11213 	if (table->is_temporary()) {
11214 		if ((options->encryption == 1
11215 		     && !innodb_encrypt_temporary_tables)
11216 		    || (options->encryption == 2
11217 			&& innodb_encrypt_temporary_tables)) {
11218 			push_warning_printf(m_thd,
11219 					    Sql_condition::WARN_LEVEL_WARN,
11220 					    ER_ILLEGAL_HA_CREATE_OPTION,
11221 					    "Ignoring encryption parameter during "
11222 					    "temporary table creation.");
11223 		}
11224 
11225 		m_trx->table_id = table->id
11226 			= dict_sys.get_temporary_table_id();
11227 		ut_ad(dict_tf_get_rec_format(table->flags)
11228 		      != REC_FORMAT_COMPRESSED);
11229 		table->space_id = SRV_TMP_SPACE_ID;
11230 		table->space = fil_system.temp_space;
11231 		table->add_to_cache();
11232 	} else {
11233 		if (err == DB_SUCCESS) {
11234 			err = row_create_table_for_mysql(
11235 				table, m_trx,
11236 				fil_encryption_t(options->encryption),
11237 				uint32_t(options->encryption_key_id));
11238 			m_drop_before_rollback = (err == DB_SUCCESS);
11239 		}
11240 
11241 		DBUG_EXECUTE_IF("ib_crash_during_create_for_encryption",
11242 				DBUG_SUICIDE(););
11243 	}
11244 
11245 	DBUG_EXECUTE_IF("ib_create_err_tablespace_exist",
11246 			err = DB_TABLESPACE_EXISTS;);
11247 
11248 	switch (err) {
11249 	case DB_SUCCESS:
11250 		ut_ad(table);
11251 		m_table = table;
11252 		DBUG_RETURN(0);
11253 	default:
11254 		break;
11255 	case DB_DUPLICATE_KEY:
11256 	case DB_TABLESPACE_EXISTS:
11257 		char display_name[FN_REFLEN];
11258 		char* buf_end = innobase_convert_identifier(
11259 			display_name, sizeof(display_name) - 1,
11260 			m_table_name, strlen(m_table_name),
11261 			m_thd);
11262 
11263 		*buf_end = '\0';
11264 
11265 		my_error(err == DB_DUPLICATE_KEY
11266 			 ? ER_TABLE_EXISTS_ERROR
11267 			 : ER_TABLESPACE_EXISTS, MYF(0), display_name);
11268 	}
11269 
11270 	DBUG_RETURN(convert_error_code_to_mysql(err, m_flags, m_thd));
11271 }
11272 
11273 /*****************************************************************//**
11274 Creates an index in an InnoDB database. */
11275 inline
11276 int
create_index(trx_t * trx,const TABLE * form,dict_table_t * table,uint key_num)11277 create_index(
11278 /*=========*/
11279 	trx_t*		trx,		/*!< in: InnoDB transaction handle */
11280 	const TABLE*	form,		/*!< in: information on table
11281 					columns and indexes */
11282 	dict_table_t*	table,		/*!< in,out: table */
11283 	uint		key_num)	/*!< in: index number */
11284 {
11285 	dict_index_t*	index;
11286 	int		error;
11287 	const KEY*	key;
11288 	ulint*		field_lengths;
11289 
11290 	DBUG_ENTER("create_index");
11291 
11292 	key = form->key_info + key_num;
11293 
11294 	/* Assert that "GEN_CLUST_INDEX" cannot be used as non-primary index */
11295 	ut_a(innobase_strcasecmp(key->name.str, innobase_index_reserve_name) != 0);
11296 
11297 	if (key->flags & (HA_SPATIAL | HA_FULLTEXT)) {
11298 		/* Only one of these can be specified at a time. */
11299 		ut_ad(~key->flags & (HA_SPATIAL | HA_FULLTEXT));
11300 		ut_ad(!(key->flags & HA_NOSAME));
11301 		index = dict_mem_index_create(table, key->name.str,
11302 					      (key->flags & HA_SPATIAL)
11303 					      ? DICT_SPATIAL : DICT_FTS,
11304 					      key->user_defined_key_parts);
11305 
11306 		for (ulint i = 0; i < key->user_defined_key_parts; i++) {
11307 			const Field* field = key->key_part[i].field;
11308 
11309 			/* We do not support special (Fulltext or Spatial)
11310 			index on virtual columns */
11311 			if (!field->stored_in_db()) {
11312 				ut_ad(0);
11313 				DBUG_RETURN(HA_ERR_UNSUPPORTED);
11314 			}
11315 
11316 			dict_mem_index_add_field(index, field->field_name.str,
11317 						 0);
11318 		}
11319 
11320 		DBUG_RETURN(convert_error_code_to_mysql(
11321 				    row_create_index_for_mysql(
11322 					    index, trx, NULL),
11323 				    table->flags, NULL));
11324 	}
11325 
11326 	ulint ind_type = 0;
11327 
11328 	if (key_num == form->s->primary_key) {
11329 		ind_type |= DICT_CLUSTERED;
11330 	}
11331 
11332 	if (key->flags & HA_NOSAME) {
11333 		ind_type |= DICT_UNIQUE;
11334 	}
11335 
11336 	field_lengths = (ulint*) my_malloc(//PSI_INSTRUMENT_ME,
11337 		key->user_defined_key_parts * sizeof *
11338 				field_lengths, MYF(MY_FAE));
11339 
11340 	/* We pass 0 as the space id, and determine at a lower level the space
11341 	id where to store the table */
11342 
11343 	index = dict_mem_index_create(table, key->name.str,
11344 				      ind_type, key->user_defined_key_parts);
11345 
11346 	for (ulint i = 0; i < key->user_defined_key_parts; i++) {
11347 		KEY_PART_INFO*	key_part = key->key_part + i;
11348 		ulint		prefix_len;
11349 		ulint		col_type;
11350 		ulint		is_unsigned;
11351 
11352 
11353 		/* (The flag HA_PART_KEY_SEG denotes in MySQL a
11354 		column prefix field in an index: we only store a
11355 		specified number of first bytes of the column to
11356 		the index field.) The flag does not seem to be
11357 		properly set by MySQL. Let us fall back on testing
11358 		the length of the key part versus the column.
11359 		We first reach to the table's column; if the index is on a
11360 		prefix, key_part->field is not the table's column (it's a
11361 		"fake" field forged in open_table_from_share() with length
11362 		equal to the length of the prefix); so we have to go to
11363 		form->fied. */
11364 		Field*	field= form->field[key_part->field->field_index];
11365 		if (field == NULL)
11366 		  ut_error;
11367 
11368 		const char*	field_name = key_part->field->field_name.str;
11369 
11370 		col_type = get_innobase_type_from_mysql_type(
11371 			&is_unsigned, key_part->field);
11372 
11373 		if (DATA_LARGE_MTYPE(col_type)
11374 		    || (key_part->length < field->pack_length()
11375 			&& field->type() != MYSQL_TYPE_VARCHAR)
11376 		    || (field->type() == MYSQL_TYPE_VARCHAR
11377 			&& key_part->length < field->pack_length()
11378 			- ((Field_varstring*) field)->length_bytes)) {
11379 
11380 			switch (col_type) {
11381 			default:
11382 				prefix_len = key_part->length;
11383 				break;
11384 			case DATA_INT:
11385 			case DATA_FLOAT:
11386 			case DATA_DOUBLE:
11387 			case DATA_DECIMAL:
11388 				sql_print_error(
11389 					"MariaDB is trying to create a column"
11390 					" prefix index field, on an"
11391 					" inappropriate data type. Table"
11392 					" name %s, column name %s.",
11393 					form->s->table_name.str,
11394 					key_part->field->field_name.str);
11395 
11396 				prefix_len = 0;
11397 			}
11398 		} else {
11399 			prefix_len = 0;
11400 		}
11401 
11402 		ut_ad(prefix_len % field->charset()->mbmaxlen == 0);
11403 
11404 		field_lengths[i] = key_part->length;
11405 
11406 		if (!key_part->field->stored_in_db()) {
11407 			index->type |= DICT_VIRTUAL;
11408 		}
11409 
11410 		dict_mem_index_add_field(index, field_name, prefix_len);
11411 	}
11412 
11413 	ut_ad(key->flags & HA_FULLTEXT || !(index->type & DICT_FTS));
11414 
11415 	/* Even though we've defined max_supported_key_part_length, we
11416 	still do our own checking using field_lengths to be absolutely
11417 	sure we don't create too long indexes. */
11418 	ulint flags = table->flags;
11419 
11420 	error = convert_error_code_to_mysql(
11421 		row_create_index_for_mysql(index, trx, field_lengths),
11422 		flags, NULL);
11423 
11424 	my_free(field_lengths);
11425 
11426 	DBUG_RETURN(error);
11427 }
11428 
11429 /** Return a display name for the row format
11430 @param[in]	row_format	Row Format
11431 @return row format name */
11432 static
11433 const char*
get_row_format_name(enum row_type row_format)11434 get_row_format_name(
11435 	enum row_type	row_format)
11436 {
11437 	switch (row_format) {
11438 	case ROW_TYPE_COMPACT:
11439 		return("COMPACT");
11440 	case ROW_TYPE_COMPRESSED:
11441 		return("COMPRESSED");
11442 	case ROW_TYPE_DYNAMIC:
11443 		return("DYNAMIC");
11444 	case ROW_TYPE_REDUNDANT:
11445 		return("REDUNDANT");
11446 	case ROW_TYPE_DEFAULT:
11447 		return("DEFAULT");
11448 	case ROW_TYPE_FIXED:
11449 		return("FIXED");
11450 	case ROW_TYPE_PAGE:
11451 	case ROW_TYPE_NOT_USED:
11452 		break;
11453 	}
11454 	return("NOT USED");
11455 }
11456 
11457 /** Validate DATA DIRECTORY option.
11458 @return true if valid, false if not. */
11459 bool
create_option_data_directory_is_valid()11460 create_table_info_t::create_option_data_directory_is_valid()
11461 {
11462 	bool		is_valid = true;
11463 
11464 	ut_ad(m_create_info->data_file_name
11465 	      && m_create_info->data_file_name[0] != '\0');
11466 
11467 	/* Use DATA DIRECTORY only with file-per-table. */
11468 	if (!m_allow_file_per_table) {
11469 		push_warning(
11470 			m_thd, Sql_condition::WARN_LEVEL_WARN,
11471 			ER_ILLEGAL_HA_CREATE_OPTION,
11472 			"InnoDB: DATA DIRECTORY requires"
11473 			" innodb_file_per_table.");
11474 		is_valid = false;
11475 	}
11476 
11477 	/* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
11478 	if (m_create_info->tmp_table()) {
11479 		push_warning(
11480 			m_thd, Sql_condition::WARN_LEVEL_WARN,
11481 			ER_ILLEGAL_HA_CREATE_OPTION,
11482 			"InnoDB: DATA DIRECTORY cannot be used"
11483 			" for TEMPORARY tables.");
11484 		is_valid = false;
11485 	}
11486 
11487 	/* We check for a DATA DIRECTORY mixed with TABLESPACE in
11488 	create_option_tablespace_is_valid(), no need to here. */
11489 
11490 	return(is_valid);
11491 }
11492 
11493 /** Validate the create options. Check that the options KEY_BLOCK_SIZE,
11494 ROW_FORMAT, DATA DIRECTORY, TEMPORARY are compatible with
11495 each other and other settings.  These CREATE OPTIONS are not validated
11496 here unless innodb_strict_mode is on. With strict mode, this function
11497 will report each problem it finds using a custom message with error
11498 code ER_ILLEGAL_HA_CREATE_OPTION, not its built-in message.
11499 @return NULL if valid, string name of bad option if not. */
11500 const char*
create_options_are_invalid()11501 create_table_info_t::create_options_are_invalid()
11502 {
11503 	bool	has_key_block_size = (m_create_info->key_block_size != 0);
11504 
11505 	const char*	ret = NULL;
11506 	enum row_type	row_format	= m_create_info->row_type;
11507 	const bool	is_temp 	= m_create_info->tmp_table();
11508 
11509 	ut_ad(m_thd != NULL);
11510 
11511 	/* If innodb_strict_mode is not set don't do any more validation. */
11512 	if (!THDVAR(m_thd, strict_mode)) {
11513 		return(NULL);
11514 	}
11515 
11516 	/* Check if a non-zero KEY_BLOCK_SIZE was specified. */
11517 	if (has_key_block_size) {
11518 		if (is_temp) {
11519 			my_error(ER_UNSUPPORT_COMPRESSED_TEMPORARY_TABLE,
11520 				 MYF(0));
11521 			return("KEY_BLOCK_SIZE");
11522 		}
11523 
11524 		switch (m_create_info->key_block_size) {
11525 			ulint	kbs_max;
11526 		case 1:
11527 		case 2:
11528 		case 4:
11529 		case 8:
11530 		case 16:
11531 			/* The maximum KEY_BLOCK_SIZE (KBS) is
11532 			UNIV_PAGE_SIZE_MAX. But if srv_page_size is
11533 			smaller than UNIV_PAGE_SIZE_MAX, the maximum
11534 			KBS is also smaller. */
11535 			kbs_max = ut_min(
11536 				1U << (UNIV_PAGE_SSIZE_MAX - 1),
11537 				1U << (PAGE_ZIP_SSIZE_MAX - 1));
11538 			if (m_create_info->key_block_size > kbs_max) {
11539 				push_warning_printf(
11540 					m_thd, Sql_condition::WARN_LEVEL_WARN,
11541 					ER_ILLEGAL_HA_CREATE_OPTION,
11542 					"InnoDB: KEY_BLOCK_SIZE=%ld"
11543 					" cannot be larger than %ld.",
11544 					m_create_info->key_block_size,
11545 					kbs_max);
11546 				ret = "KEY_BLOCK_SIZE";
11547 			}
11548 
11549 			/* Valid KEY_BLOCK_SIZE, check its dependencies. */
11550 			if (!m_allow_file_per_table) {
11551 				push_warning(
11552 					m_thd, Sql_condition::WARN_LEVEL_WARN,
11553 					ER_ILLEGAL_HA_CREATE_OPTION,
11554 					"InnoDB: KEY_BLOCK_SIZE requires"
11555 					" innodb_file_per_table.");
11556 				ret = "KEY_BLOCK_SIZE";
11557 			}
11558 			break;
11559 		default:
11560 			push_warning_printf(
11561 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11562 				ER_ILLEGAL_HA_CREATE_OPTION,
11563 				"InnoDB: invalid KEY_BLOCK_SIZE = %u."
11564 				" Valid values are [1, 2, 4, 8, 16]",
11565 				(uint) m_create_info->key_block_size);
11566 			ret = "KEY_BLOCK_SIZE";
11567 			break;
11568 		}
11569 	}
11570 
11571 	/* Check for a valid InnoDB ROW_FORMAT specifier and
11572 	other incompatibilities. */
11573 	switch (row_format) {
11574 	case ROW_TYPE_COMPRESSED:
11575 		if (is_temp) {
11576 			my_error(ER_UNSUPPORT_COMPRESSED_TEMPORARY_TABLE,
11577 				 MYF(0));
11578 			return("ROW_FORMAT");
11579 		}
11580 		if (!m_allow_file_per_table) {
11581 			push_warning_printf(
11582 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11583 				ER_ILLEGAL_HA_CREATE_OPTION,
11584 				"InnoDB: ROW_FORMAT=%s requires"
11585 				" innodb_file_per_table.",
11586 				get_row_format_name(row_format));
11587 			ret = "ROW_FORMAT";
11588 		}
11589 		break;
11590 	case ROW_TYPE_DYNAMIC:
11591 	case ROW_TYPE_COMPACT:
11592 	case ROW_TYPE_REDUNDANT:
11593 		if (has_key_block_size) {
11594 			push_warning_printf(
11595 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11596 				ER_ILLEGAL_HA_CREATE_OPTION,
11597 				"InnoDB: cannot specify ROW_FORMAT = %s"
11598 				" with KEY_BLOCK_SIZE.",
11599 				get_row_format_name(row_format));
11600 			ret = "KEY_BLOCK_SIZE";
11601 		}
11602 		break;
11603 	case ROW_TYPE_DEFAULT:
11604 		break;
11605 	case ROW_TYPE_FIXED:
11606 	case ROW_TYPE_PAGE:
11607 	case ROW_TYPE_NOT_USED:
11608 		push_warning(
11609 			m_thd, Sql_condition::WARN_LEVEL_WARN,
11610 			ER_ILLEGAL_HA_CREATE_OPTION,
11611 			"InnoDB: invalid ROW_FORMAT specifier.");
11612 		ret = "ROW_TYPE";
11613 		break;
11614 	}
11615 
11616 	if (!m_create_info->data_file_name
11617 	    || !m_create_info->data_file_name[0]) {
11618 	} else if (!my_use_symdir) {
11619 		my_error(WARN_OPTION_IGNORED, MYF(ME_WARNING),
11620 			 "DATA DIRECTORY");
11621 	} else if (!create_option_data_directory_is_valid()) {
11622 		ret = "DATA DIRECTORY";
11623 	}
11624 
11625 	/* Do not allow INDEX_DIRECTORY */
11626 	if (m_create_info->index_file_name) {
11627 		push_warning_printf(
11628 			m_thd, Sql_condition::WARN_LEVEL_WARN,
11629 			ER_ILLEGAL_HA_CREATE_OPTION,
11630 			"InnoDB: INDEX DIRECTORY is not supported");
11631 		ret = "INDEX DIRECTORY";
11632 	}
11633 
11634 	/* Don't support compressed table when page size > 16k. */
11635 	if ((has_key_block_size || row_format == ROW_TYPE_COMPRESSED)
11636 	    && srv_page_size > UNIV_PAGE_SIZE_DEF) {
11637 		push_warning(m_thd, Sql_condition::WARN_LEVEL_WARN,
11638 			     ER_ILLEGAL_HA_CREATE_OPTION,
11639 			     "InnoDB: Cannot create a COMPRESSED table"
11640 			     " when innodb_page_size > 16k.");
11641 
11642 		if (has_key_block_size) {
11643 			ret = "KEY_BLOCK_SIZE";
11644 		} else {
11645 			ret = "ROW_TYPE";
11646 		}
11647 	}
11648 
11649 	return(ret);
11650 }
11651 
11652 /*****************************************************************//**
11653 Check engine specific table options not handled by SQL-parser.
11654 @return	NULL if valid, string if not */
11655 const char*
check_table_options()11656 create_table_info_t::check_table_options()
11657 {
11658 	enum row_type row_format = m_create_info->row_type;
11659 	const ha_table_option_struct *options= m_form->s->option_struct;
11660 
11661 	switch (options->encryption) {
11662 	case FIL_ENCRYPTION_OFF:
11663 		if (options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) {
11664 			push_warning(
11665 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11666 				HA_WRONG_CREATE_OPTION,
11667 				"InnoDB: ENCRYPTED=NO implies"
11668 				" ENCRYPTION_KEY_ID=1");
11669 			compile_time_assert(FIL_DEFAULT_ENCRYPTION_KEY == 1);
11670 		}
11671 		if (srv_encrypt_tables != 2) {
11672 			break;
11673 		}
11674 		push_warning(
11675 			m_thd, Sql_condition::WARN_LEVEL_WARN,
11676 			HA_WRONG_CREATE_OPTION,
11677 			"InnoDB: ENCRYPTED=NO cannot be used with"
11678 			" innodb_encrypt_tables=FORCE");
11679 		return "ENCRYPTED";
11680 	case FIL_ENCRYPTION_DEFAULT:
11681 		if (!srv_encrypt_tables) {
11682 			break;
11683 		}
11684 		/* fall through */
11685 	case FIL_ENCRYPTION_ON:
11686 		const uint32_t key_id = uint32_t(options->encryption_key_id);
11687 		if (!encryption_key_id_exists(key_id)) {
11688 			push_warning_printf(
11689 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11690 				HA_WRONG_CREATE_OPTION,
11691 				"InnoDB: ENCRYPTION_KEY_ID %u not available",
11692 				key_id);
11693 			return "ENCRYPTION_KEY_ID";
11694 		}
11695 
11696 		/* We do not support encryption for spatial indexes,
11697 		except if innodb_checksum_algorithm=full_crc32.
11698 		Do not allow ENCRYPTED=YES if any SPATIAL INDEX exists. */
11699 		if (options->encryption != FIL_ENCRYPTION_ON
11700 		    || srv_checksum_algorithm
11701 		    >= SRV_CHECKSUM_ALGORITHM_FULL_CRC32) {
11702 			break;
11703 		}
11704 		for (ulint i = 0; i < m_form->s->keys; i++) {
11705 			if (m_form->key_info[i].flags & HA_SPATIAL) {
11706 				push_warning(m_thd,
11707 					     Sql_condition::WARN_LEVEL_WARN,
11708 					     HA_ERR_UNSUPPORTED,
11709 					     "InnoDB: ENCRYPTED=YES is not"
11710 					     " supported for SPATIAL INDEX");
11711 				return "ENCRYPTED";
11712 			}
11713 		}
11714 	}
11715 
11716 	if (!m_allow_file_per_table
11717 	    && options->encryption != FIL_ENCRYPTION_DEFAULT) {
11718 		push_warning(
11719 			m_thd, Sql_condition::WARN_LEVEL_WARN,
11720 			HA_WRONG_CREATE_OPTION,
11721 			"InnoDB: ENCRYPTED requires innodb_file_per_table");
11722 		return "ENCRYPTED";
11723  	}
11724 
11725 	/* Check page compression requirements */
11726 	if (options->page_compressed) {
11727 
11728 		if (row_format == ROW_TYPE_COMPRESSED) {
11729 			push_warning(
11730 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11731 				HA_WRONG_CREATE_OPTION,
11732 				"InnoDB: PAGE_COMPRESSED table can't have"
11733 				" ROW_TYPE=COMPRESSED");
11734 			return "PAGE_COMPRESSED";
11735 		}
11736 
11737 		switch (row_format) {
11738 		default:
11739 			break;
11740 		case ROW_TYPE_DEFAULT:
11741 			if (m_default_row_format
11742 			    != DEFAULT_ROW_FORMAT_REDUNDANT) {
11743 				break;
11744 			}
11745 			/* fall through */
11746 		case ROW_TYPE_REDUNDANT:
11747 			push_warning(
11748 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11749 				HA_WRONG_CREATE_OPTION,
11750 				"InnoDB: PAGE_COMPRESSED table can't have"
11751 				" ROW_TYPE=REDUNDANT");
11752 			return "PAGE_COMPRESSED";
11753 		}
11754 
11755 		if (!m_allow_file_per_table) {
11756 			push_warning(
11757 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11758 				HA_WRONG_CREATE_OPTION,
11759 				"InnoDB: PAGE_COMPRESSED requires"
11760 				" innodb_file_per_table.");
11761 			return "PAGE_COMPRESSED";
11762 		}
11763 
11764 		if (m_create_info->key_block_size) {
11765 			push_warning(
11766 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11767 				HA_WRONG_CREATE_OPTION,
11768 				"InnoDB: PAGE_COMPRESSED table can't have"
11769 				" key_block_size");
11770 			return "PAGE_COMPRESSED";
11771 		}
11772 	}
11773 
11774 	/* Check page compression level requirements, some of them are
11775 	already checked above */
11776 	if (options->page_compression_level != 0) {
11777 		if (options->page_compressed == false) {
11778 			push_warning(
11779 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11780 				HA_WRONG_CREATE_OPTION,
11781 				"InnoDB: PAGE_COMPRESSION_LEVEL requires"
11782 				" PAGE_COMPRESSED");
11783 			return "PAGE_COMPRESSION_LEVEL";
11784 		}
11785 
11786 		if (options->page_compression_level < 1 || options->page_compression_level > 9) {
11787 			push_warning_printf(
11788 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11789 				HA_WRONG_CREATE_OPTION,
11790 				"InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu."
11791 				" Valid values are [1, 2, 3, 4, 5, 6, 7, 8, 9]",
11792 				options->page_compression_level);
11793 			return "PAGE_COMPRESSION_LEVEL";
11794 		}
11795 	}
11796 
11797 	return NULL;
11798 }
11799 
11800 /*****************************************************************//**
11801 Update create_info.  Used in SHOW CREATE TABLE et al. */
11802 
11803 void
update_create_info(HA_CREATE_INFO * create_info)11804 ha_innobase::update_create_info(
11805 /*============================*/
11806 	HA_CREATE_INFO*	create_info)	/*!< in/out: create info */
11807 {
11808 	if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
11809 		info(HA_STATUS_AUTO);
11810 		create_info->auto_increment_value = stats.auto_increment_value;
11811 	}
11812 
11813 	if (m_prebuilt->table->is_temporary()) {
11814 		return;
11815 	}
11816 
11817 	/* Update the DATA DIRECTORY name from SYS_DATAFILES. */
11818 	dict_get_and_save_data_dir_path(m_prebuilt->table, false);
11819 
11820 	if (m_prebuilt->table->data_dir_path) {
11821 		create_info->data_file_name = m_prebuilt->table->data_dir_path;
11822 	}
11823 }
11824 
11825 /*****************************************************************//**
11826 Initialize the table FTS stopword list
11827 @return TRUE if success */
11828 ibool
innobase_fts_load_stopword(dict_table_t * table,trx_t * trx,THD * thd)11829 innobase_fts_load_stopword(
11830 /*=======================*/
11831 	dict_table_t*	table,	/*!< in: Table has the FTS */
11832 	trx_t*		trx,	/*!< in: transaction */
11833 	THD*		thd)	/*!< in: current thread */
11834 {
11835   const char *stopword_table= THDVAR(thd, ft_user_stopword_table);
11836   if (!stopword_table)
11837   {
11838     mysql_mutex_lock(&LOCK_global_system_variables);
11839     if (innobase_server_stopword_table)
11840       stopword_table= thd_strdup(thd, innobase_server_stopword_table);
11841     mysql_mutex_unlock(&LOCK_global_system_variables);
11842   }
11843 
11844   return fts_load_stopword(table, trx, stopword_table,
11845                            THDVAR(thd, ft_enable_stopword), false);
11846 }
11847 
11848 /** Parse the table name into normal name and remote path if needed.
11849 @param[in]	name	Table name (db/table or full path).
11850 @return 0 if successful, otherwise, error number */
11851 int
parse_table_name(const char * name)11852 create_table_info_t::parse_table_name(
11853 	const char*
11854 #ifdef _WIN32
11855 	name
11856 #endif
11857 				      )
11858 {
11859 	DBUG_ENTER("parse_table_name");
11860 
11861 #ifdef _WIN32
11862 	/* Names passed in from server are in two formats:
11863 	1. <database_name>/<table_name>: for normal table creation
11864 	2. full path: for temp table creation, or DATA DIRECTORY.
11865 
11866 	When srv_file_per_table is on and mysqld_embedded is off,
11867 	check for full path pattern, i.e.
11868 	X:\dir\...,		X is a driver letter, or
11869 	\\dir1\dir2\...,	UNC path
11870 	returns error if it is in full path format, but not creating a temp.
11871 	table. Currently InnoDB does not support symbolic link on Windows. */
11872 
11873 	if (m_innodb_file_per_table
11874 	    && !mysqld_embedded
11875 	    && !m_create_info->tmp_table()) {
11876 
11877 		if ((name[1] == ':')
11878 		    || (name[0] == '\\' && name[1] == '\\')) {
11879 			sql_print_error("Cannot create table %s\n", name);
11880 			DBUG_RETURN(HA_ERR_GENERIC);
11881 		}
11882 	}
11883 #endif
11884 
11885 	m_remote_path[0] = '\0';
11886 
11887 	/* Make sure DATA DIRECTORY is compatible with other options
11888 	and set the remote path.  In the case of either;
11889 	  CREATE TEMPORARY TABLE ... DATA DIRECTORY={path} ... ;
11890 	  CREATE TABLE ... DATA DIRECTORY={path} TABLESPACE={name}... ;
11891 	we ignore the DATA DIRECTORY. */
11892 	if (m_create_info->data_file_name
11893 	    && m_create_info->data_file_name[0]
11894 	    && my_use_symdir) {
11895 		if (!create_option_data_directory_is_valid()) {
11896 			push_warning_printf(
11897 				m_thd, Sql_condition::WARN_LEVEL_WARN,
11898 				WARN_OPTION_IGNORED,
11899 				ER_DEFAULT(WARN_OPTION_IGNORED),
11900 				"DATA DIRECTORY");
11901 
11902 			m_flags &= ~DICT_TF_MASK_DATA_DIR;
11903 		} else {
11904 			strncpy(m_remote_path,
11905 				m_create_info->data_file_name,
11906 				FN_REFLEN - 1);
11907 		}
11908 	}
11909 
11910 	if (m_create_info->index_file_name) {
11911 		my_error(WARN_OPTION_IGNORED, ME_WARNING,
11912 			"INDEX DIRECTORY");
11913 	}
11914 
11915 	DBUG_RETURN(0);
11916 }
11917 
11918 /** @return whether innodb_strict_mode is active */
is_innodb_strict_mode(THD * thd)11919 bool ha_innobase::is_innodb_strict_mode(THD *thd)
11920 {
11921   return THDVAR(thd, strict_mode);
11922 }
11923 
11924 /** Determine InnoDB table flags.
11925 If strict_mode=OFF, this will adjust the flags to what should be assumed.
11926 @retval true on success
11927 @retval false on error */
innobase_table_flags()11928 bool create_table_info_t::innobase_table_flags()
11929 {
11930 	DBUG_ENTER("innobase_table_flags");
11931 
11932 	const char*	fts_doc_id_index_bad = NULL;
11933 	ulint		zip_ssize = 0;
11934 	enum row_type	row_type;
11935 	rec_format_t	innodb_row_format =
11936 		get_row_format(m_default_row_format);
11937 	const bool	is_temp = m_create_info->tmp_table();
11938 	bool		zip_allowed = !is_temp;
11939 
11940 	const ulint	zip_ssize_max =
11941 		ut_min(static_cast<ulint>(UNIV_PAGE_SSIZE_MAX),
11942 		       static_cast<ulint>(PAGE_ZIP_SSIZE_MAX));
11943 
11944 	ha_table_option_struct *options= m_form->s->option_struct;
11945 
11946 	m_flags = 0;
11947 	m_flags2 = 0;
11948 
11949 	/* Check if there are any FTS indexes defined on this table. */
11950 	for (uint i = 0; i < m_form->s->keys; i++) {
11951 		const KEY*	key = &m_form->key_info[i];
11952 
11953 		if (key->flags & HA_FULLTEXT) {
11954 			m_flags2 |= DICT_TF2_FTS;
11955 
11956 			/* We don't support FTS indexes in temporary
11957 			tables. */
11958 			if (is_temp) {
11959 				my_error(ER_INNODB_NO_FT_TEMP_TABLE, MYF(0));
11960 				DBUG_RETURN(false);
11961 			}
11962 
11963 			if (fts_doc_id_index_bad) {
11964 				goto index_bad;
11965 			}
11966 		}
11967 
11968 		if (innobase_strcasecmp(key->name.str, FTS_DOC_ID_INDEX_NAME)) {
11969 			continue;
11970 		}
11971 
11972 		/* Do a pre-check on FTS DOC ID index */
11973 		if (!(key->flags & HA_NOSAME)
11974 		    || strcmp(key->name.str, FTS_DOC_ID_INDEX_NAME)
11975 		    || strcmp(key->key_part[0].field->field_name.str,
11976 			      FTS_DOC_ID_COL_NAME)) {
11977 			fts_doc_id_index_bad = key->name.str;
11978 		}
11979 
11980 		if (fts_doc_id_index_bad && (m_flags2 & DICT_TF2_FTS)) {
11981 index_bad:
11982 			my_error(ER_INNODB_FT_WRONG_DOCID_INDEX, MYF(0),
11983 				 fts_doc_id_index_bad);
11984 			DBUG_RETURN(false);
11985 		}
11986 	}
11987 
11988 	if (m_create_info->key_block_size > 0) {
11989 		/* The requested compressed page size (key_block_size)
11990 		is given in kilobytes. If it is a valid number, store
11991 		that value as the number of log2 shifts from 512 in
11992 		zip_ssize. Zero means it is not compressed. */
11993 		ulint	zssize;		/* Zip Shift Size */
11994 		ulint	kbsize;		/* Key Block Size */
11995 		for (zssize = kbsize = 1;
11996 		     zssize <= zip_ssize_max;
11997 		     zssize++, kbsize <<= 1) {
11998 			if (kbsize == m_create_info->key_block_size) {
11999 				zip_ssize = zssize;
12000 				break;
12001 			}
12002 		}
12003 
12004 		/* Make sure compressed row format is allowed. */
12005 		if (is_temp) {
12006 			push_warning(
12007 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12008 				ER_ILLEGAL_HA_CREATE_OPTION,
12009 				"InnoDB: KEY_BLOCK_SIZE is ignored"
12010 				" for TEMPORARY TABLE.");
12011 			zip_allowed = false;
12012 		} else if (!m_allow_file_per_table) {
12013 			push_warning(
12014 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12015 				ER_ILLEGAL_HA_CREATE_OPTION,
12016 				"InnoDB: KEY_BLOCK_SIZE requires"
12017 				" innodb_file_per_table.");
12018 			zip_allowed = false;
12019 		}
12020 
12021 		if (!zip_allowed
12022 		    || zssize > zip_ssize_max) {
12023 			push_warning_printf(
12024 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12025 				ER_ILLEGAL_HA_CREATE_OPTION,
12026 				"InnoDB: ignoring KEY_BLOCK_SIZE=%u.",
12027 				(uint) m_create_info->key_block_size);
12028 		}
12029 	}
12030 
12031 	row_type = m_create_info->row_type;
12032 
12033 	if (zip_ssize && zip_allowed) {
12034 		/* if ROW_FORMAT is set to default,
12035 		automatically change it to COMPRESSED. */
12036 		if (row_type == ROW_TYPE_DEFAULT) {
12037 			row_type = ROW_TYPE_COMPRESSED;
12038 		} else if (row_type != ROW_TYPE_COMPRESSED) {
12039 			/* ROW_FORMAT other than COMPRESSED
12040 			ignores KEY_BLOCK_SIZE.  It does not
12041 			make sense to reject conflicting
12042 			KEY_BLOCK_SIZE and ROW_FORMAT, because
12043 			such combinations can be obtained
12044 			with ALTER TABLE anyway. */
12045 			push_warning_printf(
12046 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12047 				ER_ILLEGAL_HA_CREATE_OPTION,
12048 				"InnoDB: ignoring KEY_BLOCK_SIZE=%u"
12049 				" unless ROW_FORMAT=COMPRESSED.",
12050 				(uint) m_create_info->key_block_size);
12051 			zip_allowed = false;
12052 		}
12053 	} else {
12054 		/* zip_ssize == 0 means no KEY_BLOCK_SIZE. */
12055 		if (row_type == ROW_TYPE_COMPRESSED && zip_allowed) {
12056 			/* ROW_FORMAT=COMPRESSED without KEY_BLOCK_SIZE
12057 			implies half the maximum KEY_BLOCK_SIZE(*1k) or
12058 			srv_page_size, whichever is less. */
12059 			zip_ssize = zip_ssize_max - 1;
12060 		}
12061 	}
12062 
12063 	/* Validate the row format.  Correct it if necessary */
12064 
12065 	switch (row_type) {
12066 	case ROW_TYPE_REDUNDANT:
12067 		innodb_row_format = REC_FORMAT_REDUNDANT;
12068 		break;
12069 	case ROW_TYPE_COMPACT:
12070 		innodb_row_format = REC_FORMAT_COMPACT;
12071 		break;
12072 	case ROW_TYPE_COMPRESSED:
12073 		if (is_temp) {
12074 			push_warning_printf(
12075 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12076 				ER_ILLEGAL_HA_CREATE_OPTION,
12077 				"InnoDB: ROW_FORMAT=%s is ignored for"
12078 				" TEMPORARY TABLE.",
12079 				get_row_format_name(row_type));
12080 		} else if (!m_allow_file_per_table) {
12081 			push_warning_printf(
12082 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12083 				ER_ILLEGAL_HA_CREATE_OPTION,
12084 				"InnoDB: ROW_FORMAT=COMPRESSED requires"
12085 				" innodb_file_per_table.");
12086 		} else {
12087 			innodb_row_format = REC_FORMAT_COMPRESSED;
12088 			break;
12089 		}
12090 		zip_allowed = false;
12091 		/* Set ROW_FORMAT = COMPACT */
12092 		/* fall through */
12093 	case ROW_TYPE_NOT_USED:
12094 	case ROW_TYPE_FIXED:
12095 	case ROW_TYPE_PAGE:
12096 		push_warning(
12097 			m_thd, Sql_condition::WARN_LEVEL_WARN,
12098 			ER_ILLEGAL_HA_CREATE_OPTION,
12099 			"InnoDB: assuming ROW_FORMAT=DYNAMIC.");
12100 		/* fall through */
12101 	case ROW_TYPE_DYNAMIC:
12102 		innodb_row_format = REC_FORMAT_DYNAMIC;
12103 		break;
12104 	case ROW_TYPE_DEFAULT:
12105 		;
12106 	}
12107 
12108 	/* Don't support compressed table when page size > 16k. */
12109 	if (zip_allowed && zip_ssize && srv_page_size > UNIV_PAGE_SIZE_DEF) {
12110 		push_warning(m_thd, Sql_condition::WARN_LEVEL_WARN,
12111 			     ER_ILLEGAL_HA_CREATE_OPTION,
12112 			     "InnoDB: Cannot create a COMPRESSED table"
12113 			     " when innodb_page_size > 16k."
12114 			     " Assuming ROW_FORMAT=DYNAMIC.");
12115 		zip_allowed = false;
12116 	}
12117 
12118 	ut_ad(!is_temp || !zip_allowed);
12119 	ut_ad(!is_temp || innodb_row_format != REC_FORMAT_COMPRESSED);
12120 
12121 	/* Set the table flags */
12122 	if (!zip_allowed) {
12123 		zip_ssize = 0;
12124 	}
12125 
12126 	if (is_temp) {
12127 		m_flags2 |= DICT_TF2_TEMPORARY;
12128 	} else if (m_use_file_per_table) {
12129 		m_flags2 |= DICT_TF2_USE_FILE_PER_TABLE;
12130 	}
12131 
12132 	ulint level = ulint(options->page_compression_level);
12133 	if (!level) {
12134 		level = page_zip_level;
12135 		if (!level && options->page_compressed) {
12136 			push_warning_printf(
12137 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12138 				ER_ILLEGAL_HA_CREATE_OPTION,
12139 				"InnoDB: PAGE_COMPRESSED requires"
12140 				" PAGE_COMPRESSION_LEVEL or"
12141 				" innodb_compression_level > 0");
12142 			DBUG_RETURN(false);
12143 		}
12144 	}
12145 
12146 	/* Set the table flags */
12147 	dict_tf_set(&m_flags, innodb_row_format, zip_ssize,
12148 		    m_use_data_dir, options->page_compressed, level);
12149 
12150 	if (m_form->s->table_type == TABLE_TYPE_SEQUENCE) {
12151 		m_flags |= DICT_TF_MASK_NO_ROLLBACK;
12152 	}
12153 
12154 	/* Set the flags2 when create table or alter tables */
12155 	m_flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
12156 	DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
12157 			m_flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;);
12158 
12159 	DBUG_RETURN(true);
12160 }
12161 
12162 /** Parse MERGE_THRESHOLD value from the string.
12163 @param[in]	thd	connection
12164 @param[in]	str	string which might include 'MERGE_THRESHOLD='
12165 @return	value parsed. 0 means not found or invalid value. */
12166 static
12167 ulint
innobase_parse_merge_threshold(THD * thd,const char * str)12168 innobase_parse_merge_threshold(
12169 	THD*		thd,
12170 	const char*	str)
12171 {
12172 	static const char*	label = "MERGE_THRESHOLD=";
12173 	static const size_t	label_len = strlen(label);
12174 	const char*		pos = str;
12175 
12176 	pos = strstr(str, label);
12177 
12178 	if (pos == NULL) {
12179 		return(0);
12180 	}
12181 
12182 	pos += label_len;
12183 
12184 	lint	ret = atoi(pos);
12185 
12186 	if (ret > 0 && ret <= 50) {
12187 		return(static_cast<ulint>(ret));
12188 	}
12189 
12190 	push_warning_printf(
12191 		thd, Sql_condition::WARN_LEVEL_WARN,
12192 		ER_ILLEGAL_HA_CREATE_OPTION,
12193 		"InnoDB: Invalid value for MERGE_THRESHOLD in the CREATE TABLE"
12194 		" statement. The value is ignored.");
12195 
12196 	return(0);
12197 }
12198 
12199 /** Parse hint for table and its indexes, and update the information
12200 in dictionary.
12201 @param[in]	thd		connection
12202 @param[in,out]	table		target table
12203 @param[in]	table_share	table definition */
12204 void
innobase_parse_hint_from_comment(THD * thd,dict_table_t * table,const TABLE_SHARE * table_share)12205 innobase_parse_hint_from_comment(
12206 	THD*			thd,
12207 	dict_table_t*		table,
12208 	const TABLE_SHARE*	table_share)
12209 {
12210 	ulint	merge_threshold_table;
12211 	ulint	merge_threshold_index[MAX_KEY];
12212 	bool	is_found[MAX_KEY];
12213 
12214 	if (table_share->comment.str != NULL) {
12215 		merge_threshold_table
12216 			= innobase_parse_merge_threshold(
12217 				thd, table_share->comment.str);
12218 	} else {
12219 		merge_threshold_table = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
12220 	}
12221 
12222 	if (merge_threshold_table == 0) {
12223 		merge_threshold_table = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
12224 	}
12225 
12226 	for (uint i = 0; i < table_share->keys; i++) {
12227 		KEY*	key_info = &table_share->key_info[i];
12228 
12229 		ut_ad(i < sizeof(merge_threshold_index)
12230 			  / sizeof(merge_threshold_index[0]));
12231 
12232 		if (key_info->flags & HA_USES_COMMENT
12233 		    && key_info->comment.str != NULL) {
12234 			merge_threshold_index[i]
12235 				= innobase_parse_merge_threshold(
12236 					thd, key_info->comment.str);
12237 		} else {
12238 			merge_threshold_index[i] = merge_threshold_table;
12239 		}
12240 
12241 		if (merge_threshold_index[i] == 0) {
12242 			merge_threshold_index[i] = merge_threshold_table;
12243 		}
12244 	}
12245 
12246 	/* update SYS_INDEX table */
12247 	if (!table->is_temporary()) {
12248 		for (uint i = 0; i < table_share->keys; i++) {
12249 			is_found[i] = false;
12250 		}
12251 
12252 		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
12253 		     index != NULL;
12254 		     index = UT_LIST_GET_NEXT(indexes, index)) {
12255 
12256 			if (dict_index_is_auto_gen_clust(index)) {
12257 
12258 				/* GEN_CLUST_INDEX should use
12259 				merge_threshold_table */
12260 				dict_index_set_merge_threshold(
12261 					index, merge_threshold_table);
12262 				continue;
12263 			}
12264 
12265 			for (uint i = 0; i < table_share->keys; i++) {
12266 				if (is_found[i]) {
12267 					continue;
12268 				}
12269 
12270 				KEY*	key_info = &table_share->key_info[i];
12271 
12272 				if (innobase_strcasecmp(
12273 					index->name, key_info->name.str) == 0) {
12274 
12275 					dict_index_set_merge_threshold(
12276 						index,
12277 						merge_threshold_index[i]);
12278 					is_found[i] = true;
12279 					break;
12280 				}
12281 			}
12282 		}
12283 	}
12284 
12285 	for (uint i = 0; i < table_share->keys; i++) {
12286 		is_found[i] = false;
12287 	}
12288 
12289 	/* update in memory */
12290 	for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
12291 	     index != NULL;
12292 	     index = UT_LIST_GET_NEXT(indexes, index)) {
12293 
12294 		if (dict_index_is_auto_gen_clust(index)) {
12295 
12296 			/* GEN_CLUST_INDEX should use merge_threshold_table */
12297 
12298 			/* x-lock index is needed to exclude concurrent
12299 			pessimistic tree operations */
12300 			rw_lock_x_lock(dict_index_get_lock(index));
12301 			index->merge_threshold = merge_threshold_table;
12302 			rw_lock_x_unlock(dict_index_get_lock(index));
12303 
12304 			continue;
12305 		}
12306 
12307 		for (uint i = 0; i < table_share->keys; i++) {
12308 			if (is_found[i]) {
12309 				continue;
12310 			}
12311 
12312 			KEY*	key_info = &table_share->key_info[i];
12313 
12314 			if (innobase_strcasecmp(
12315 				index->name, key_info->name.str) == 0) {
12316 
12317 				/* x-lock index is needed to exclude concurrent
12318 				pessimistic tree operations */
12319 				rw_lock_x_lock(dict_index_get_lock(index));
12320 				index->merge_threshold
12321 					= merge_threshold_index[i];
12322 				rw_lock_x_unlock(dict_index_get_lock(index));
12323 				is_found[i] = true;
12324 
12325 				break;
12326 			}
12327 		}
12328 	}
12329 }
12330 
12331 /** Set m_use_* flags. */
12332 void
set_tablespace_type(bool table_being_altered_is_file_per_table)12333 create_table_info_t::set_tablespace_type(
12334 	bool	table_being_altered_is_file_per_table)
12335 {
12336 	/** Allow file_per_table for this table either because:
12337 	1) the setting innodb_file_per_table=on,
12338 	2) the table being altered is currently file_per_table */
12339 	m_allow_file_per_table =
12340 		m_innodb_file_per_table
12341 		|| table_being_altered_is_file_per_table;
12342 
12343 	/* Ignore the current innodb-file-per-table setting if we are
12344 	creating a temporary table. */
12345 	m_use_file_per_table = m_allow_file_per_table
12346 		&& !m_create_info->tmp_table();
12347 
12348 	/* DATA DIRECTORY must have m_use_file_per_table but cannot be
12349 	used with TEMPORARY tables. */
12350 	m_use_data_dir =
12351 		m_use_file_per_table
12352 		&& m_create_info->data_file_name
12353 		&& m_create_info->data_file_name[0]
12354 		&& my_use_symdir;
12355 }
12356 
12357 /** Initialize the create_table_info_t object.
12358 @return error number */
12359 int
initialize()12360 create_table_info_t::initialize()
12361 {
12362 	DBUG_ENTER("create_table_info_t::initialize");
12363 
12364 	ut_ad(m_thd != NULL);
12365 	ut_ad(m_create_info != NULL);
12366 
12367 	if (m_form->s->fields > REC_MAX_N_USER_FIELDS) {
12368 		DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS);
12369 	}
12370 
12371 	/* Check for name conflicts (with reserved name) for
12372 	any user indices to be created. */
12373 	if (innobase_index_name_is_reserved(m_thd, m_form->key_info,
12374 					    m_form->s->keys)) {
12375 		DBUG_RETURN(HA_ERR_WRONG_INDEX);
12376 	}
12377 
12378 	/* Get the transaction associated with the current thd, or create one
12379 	if not yet created */
12380 
12381 	check_trx_exists(m_thd);
12382 
12383 	DBUG_RETURN(0);
12384 }
12385 
12386 
12387 /** Check if a virtual column is part of a fulltext or spatial index. */
12388 bool
gcols_in_fulltext_or_spatial()12389 create_table_info_t::gcols_in_fulltext_or_spatial()
12390 {
12391 	for (ulint i = 0; i < m_form->s->keys; i++) {
12392 		const KEY*	key = m_form->key_info + i;
12393 		if (!(key->flags & (HA_SPATIAL | HA_FULLTEXT))) {
12394 			continue;
12395 		}
12396 		for (ulint j = 0; j < key->user_defined_key_parts; j++) {
12397 			/* We do not support special (Fulltext or
12398 			Spatial) index on virtual columns */
12399 			if (!key->key_part[j].field->stored_in_db()) {
12400 				my_error(ER_UNSUPPORTED_ACTION_ON_GENERATED_COLUMN, MYF(0));
12401 				return true;
12402 			}
12403 		}
12404 	}
12405 	return false;
12406 }
12407 
12408 
12409 /** Prepare to create a new table to an InnoDB database.
12410 @param[in]	name	Table name
12411 @return error number */
prepare_create_table(const char * name,bool strict)12412 int create_table_info_t::prepare_create_table(const char* name, bool strict)
12413 {
12414 	DBUG_ENTER("prepare_create_table");
12415 
12416 	ut_ad(m_thd != NULL);
12417 	ut_ad(m_create_info != NULL);
12418 
12419 	set_tablespace_type(false);
12420 
12421 	normalize_table_name(m_table_name, name);
12422 
12423 	/* Validate table options not handled by the SQL-parser */
12424 	if (check_table_options()) {
12425 		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
12426 	}
12427 
12428 	/* Validate the create options if innodb_strict_mode is set.
12429 	Do not use the regular message for ER_ILLEGAL_HA_CREATE_OPTION
12430 	because InnoDB might actually support the option, but not under
12431 	the current conditions.  The messages revealing the specific
12432 	problems are reported inside this function. */
12433 	if (strict && create_options_are_invalid()) {
12434 		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
12435 	}
12436 
12437 	/* Create the table flags and flags2 */
12438 	if (!innobase_table_flags()) {
12439 		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
12440 	}
12441 
12442 	if (high_level_read_only) {
12443 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
12444 	}
12445 
12446 	if (gcols_in_fulltext_or_spatial()) {
12447 		DBUG_RETURN(HA_ERR_UNSUPPORTED);
12448 	}
12449 
12450 	for (uint i = 0; i < m_form->s->keys; i++) {
12451 		const size_t max_field_len
12452 		    = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(m_flags);
12453 		const KEY& key = m_form->key_info[i];
12454 
12455 		if (key.algorithm == HA_KEY_ALG_FULLTEXT) {
12456 			continue;
12457 		}
12458 
12459 		if (too_big_key_part_length(max_field_len, key)) {
12460 			DBUG_RETURN(convert_error_code_to_mysql(
12461 			    DB_TOO_BIG_INDEX_COL, m_flags, NULL));
12462 		}
12463 	}
12464 
12465 	DBUG_RETURN(parse_table_name(name));
12466 }
12467 
12468 /** Create the internal innodb table.
12469 @param create_fk	whether to add FOREIGN KEY constraints */
create_table(bool create_fk)12470 int create_table_info_t::create_table(bool create_fk)
12471 {
12472 	int		error;
12473 	int		primary_key_no;
12474 	uint		i;
12475 
12476 	DBUG_ENTER("create_table");
12477 
12478 	/* Look for a primary key */
12479 	primary_key_no = (m_form->s->primary_key != MAX_KEY ?
12480 			  (int) m_form->s->primary_key : -1);
12481 
12482 	/* Our function innobase_get_mysql_key_number_for_index assumes
12483 	the primary key is always number 0, if it exists */
12484 	ut_a(primary_key_no == -1 || primary_key_no == 0);
12485 
12486 	error = create_table_def();
12487 
12488 	if (error) {
12489 		DBUG_RETURN(error);
12490 	}
12491 
12492 	DBUG_ASSERT(m_drop_before_rollback
12493 		    == !(m_flags2 & DICT_TF2_TEMPORARY));
12494 
12495 	/* Create the keys */
12496 
12497 	if (m_form->s->keys == 0 || primary_key_no == -1) {
12498 		/* Create an index which is used as the clustered index;
12499 		order the rows by their row id which is internally generated
12500 		by InnoDB */
12501 		ulint flags = m_table->flags;
12502 		dict_index_t* index = dict_mem_index_create(
12503 			m_table, innobase_index_reserve_name,
12504 			DICT_CLUSTERED, 0);
12505 		error = convert_error_code_to_mysql(
12506 			row_create_index_for_mysql(index, m_trx, NULL),
12507 			flags, m_thd);
12508 		if (error) {
12509 			DBUG_RETURN(error);
12510 		}
12511 	}
12512 
12513 	if (primary_key_no != -1) {
12514 		/* In InnoDB the clustered index must always be created
12515 		first */
12516 		if ((error = create_index(m_trx, m_form, m_table,
12517 					  (uint) primary_key_no))) {
12518 			DBUG_RETURN(error);
12519 		}
12520 	}
12521 
12522 	/* Create the ancillary tables that are common to all FTS indexes on
12523 	this table. */
12524 	if (m_flags2 & DICT_TF2_FTS) {
12525 		fts_doc_id_index_enum	ret;
12526 
12527 		/* Check whether there already exists FTS_DOC_ID_INDEX */
12528 		ret = innobase_fts_check_doc_id_index_in_def(
12529 			m_form->s->keys, m_form->key_info);
12530 
12531 		switch (ret) {
12532 		case FTS_INCORRECT_DOC_ID_INDEX:
12533 			push_warning_printf(m_thd,
12534 					    Sql_condition::WARN_LEVEL_WARN,
12535 					    ER_WRONG_NAME_FOR_INDEX,
12536 					    " InnoDB: Index name %s is reserved"
12537 					    " for the unique index on"
12538 					    " FTS_DOC_ID column for FTS"
12539 					    " Document ID indexing"
12540 					    " on table %s. Please check"
12541 					    " the index definition to"
12542 					    " make sure it is of correct"
12543 					    " type\n",
12544 					    FTS_DOC_ID_INDEX_NAME,
12545 					    m_table->name.m_name);
12546 
12547 			if (m_table->fts) {
12548 				fts_free(m_table);
12549 			}
12550 
12551 			my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
12552 				 FTS_DOC_ID_INDEX_NAME);
12553 			DBUG_RETURN(-1);
12554 		case FTS_EXIST_DOC_ID_INDEX:
12555 		case FTS_NOT_EXIST_DOC_ID_INDEX:
12556 			break;
12557 		}
12558 
12559 		dberr_t	err = fts_create_common_tables(
12560 			m_trx, m_table,
12561 			(ret == FTS_EXIST_DOC_ID_INDEX));
12562 
12563 		error = convert_error_code_to_mysql(err, 0, NULL);
12564 
12565 		if (error) {
12566 			DBUG_RETURN(error);
12567 		}
12568 	}
12569 
12570 	for (i = 0; i < m_form->s->keys; i++) {
12571 		if (i != uint(primary_key_no)
12572 		    && (error = create_index(m_trx, m_form, m_table, i))) {
12573 			DBUG_RETURN(error);
12574 		}
12575 	}
12576 
12577 	/* Cache all the FTS indexes on this table in the FTS specific
12578 	structure. They are used for FTS indexed column update handling. */
12579 	if (m_flags2 & DICT_TF2_FTS) {
12580 		fts_t*          fts = m_table->fts;
12581 
12582 		ut_a(fts != NULL);
12583 
12584 		dict_table_get_all_fts_indexes(m_table, fts->indexes);
12585 	}
12586 
12587 	size_t stmt_len;
12588 	if (const char* stmt = innobase_get_stmt_unsafe(m_thd, &stmt_len)) {
12589 		dberr_t err = create_fk
12590 			? dict_create_foreign_constraints(
12591 				m_trx, stmt, stmt_len, m_table_name,
12592 				m_flags2 & DICT_TF2_TEMPORARY)
12593 			: DB_SUCCESS;
12594 		if (err == DB_SUCCESS) {
12595 			/* Check that also referencing constraints are ok */
12596 			dict_names_t	fk_tables;
12597 			err = dict_load_foreigns(m_table_name, NULL,
12598 						 false, true,
12599 						 DICT_ERR_IGNORE_NONE,
12600 						 fk_tables);
12601 			while (err == DB_SUCCESS && !fk_tables.empty()) {
12602 				dict_load_table(fk_tables.front(),
12603 						DICT_ERR_IGNORE_NONE);
12604 				fk_tables.pop_front();
12605 			}
12606 		}
12607 
12608 		switch (err) {
12609 		case DB_PARENT_NO_INDEX:
12610 			push_warning_printf(
12611 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12612 				HA_ERR_CANNOT_ADD_FOREIGN,
12613 				"Create table '%s' with foreign key constraint"
12614 				" failed. There is no index in the referenced"
12615 				" table where the referenced columns appear"
12616 				" as the first columns.\n", m_table_name);
12617 			break;
12618 
12619 		case DB_CHILD_NO_INDEX:
12620 			push_warning_printf(
12621 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12622 				HA_ERR_CANNOT_ADD_FOREIGN,
12623 				"Create table '%s' with foreign key constraint"
12624 				" failed. There is no index in the referencing"
12625 				" table where referencing columns appear"
12626 				" as the first columns.\n", m_table_name);
12627 			break;
12628 		case DB_NO_FK_ON_S_BASE_COL:
12629 			push_warning_printf(
12630 				m_thd, Sql_condition::WARN_LEVEL_WARN,
12631 				HA_ERR_CANNOT_ADD_FOREIGN,
12632 				"Create table '%s' with foreign key constraint"
12633 				" failed. Cannot add foreign key constraint"
12634 				" placed on the base column of stored"
12635 				" column. \n",
12636 				m_table_name);
12637 		default:
12638 			break;
12639 		}
12640 
12641 		if (err != DB_SUCCESS) {
12642 			DBUG_RETURN(convert_error_code_to_mysql(
12643 					    err, m_flags, NULL));
12644 		}
12645 	}
12646 
12647 	/* In TRUNCATE TABLE, we will merely warn about the maximum
12648 	row size being too large. */
12649 	if (!row_size_is_acceptable(*m_table, create_fk)) {
12650 		DBUG_RETURN(convert_error_code_to_mysql(
12651 			    DB_TOO_BIG_RECORD, m_flags, NULL));
12652 	}
12653 
12654 	DBUG_RETURN(0);
12655 }
12656 
row_size_is_acceptable(const dict_table_t & table,bool strict) const12657 bool create_table_info_t::row_size_is_acceptable(
12658   const dict_table_t &table, bool strict) const
12659 {
12660   for (dict_index_t *index= dict_table_get_first_index(&table); index;
12661        index= dict_table_get_next_index(index))
12662     if (!row_size_is_acceptable(*index, strict))
12663       return false;
12664   return true;
12665 }
12666 
12667 /* FIXME: row size check has some flaws and should be improved */
record_size_info() const12668 dict_index_t::record_size_info_t dict_index_t::record_size_info() const
12669 {
12670   ut_ad(!(type & DICT_FTS));
12671 
12672   /* maximum allowed size of a node pointer record */
12673   ulint page_ptr_max;
12674   const bool comp= table->not_redundant();
12675   /* table->space == NULL after DISCARD TABLESPACE */
12676   const ulint zip_size= dict_tf_get_zip_size(table->flags);
12677   record_size_info_t result;
12678 
12679   if (zip_size && zip_size < srv_page_size)
12680   {
12681     /* On a ROW_FORMAT=COMPRESSED page, two records must fit in the
12682     uncompressed page modification log. On compressed pages
12683     with size.physical() == univ_page_size.physical(),
12684     this limit will never be reached. */
12685     ut_ad(comp);
12686     /* The maximum allowed record size is the size of
12687     an empty page, minus a byte for recoding the heap
12688     number in the page modification log.  The maximum
12689     allowed node pointer size is half that. */
12690     result.max_leaf_size= page_zip_empty_size(n_fields, zip_size);
12691     if (result.max_leaf_size)
12692     {
12693       result.max_leaf_size--;
12694     }
12695     page_ptr_max= result.max_leaf_size / 2;
12696     /* On a compressed page, there is a two-byte entry in
12697     the dense page directory for every record.  But there
12698     is no record header. */
12699     result.shortest_size= 2;
12700   }
12701   else
12702   {
12703     /* The maximum allowed record size is half a B-tree
12704     page(16k for 64k page size).  No additional sparse
12705     page directory entry will be generated for the first
12706     few user records. */
12707     result.max_leaf_size= (comp || srv_page_size < UNIV_PAGE_SIZE_MAX)
12708                               ? page_get_free_space_of_empty(comp) / 2
12709                               : REDUNDANT_REC_MAX_DATA_SIZE;
12710 
12711     page_ptr_max= result.max_leaf_size;
12712     /* Each record has a header. */
12713     result.shortest_size= comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES;
12714   }
12715 
12716   if (comp)
12717   {
12718     /* Include the "null" flags in the
12719     maximum possible record size. */
12720     result.shortest_size+= UT_BITS_IN_BYTES(n_nullable);
12721   }
12722   else
12723   {
12724     /* For each column, include a 2-byte offset and a
12725     "null" flag.  The 1-byte format is only used in short
12726     records that do not contain externally stored columns.
12727     Such records could never exceed the page limit, even
12728     when using the 2-byte format. */
12729     result.shortest_size+= 2 * n_fields;
12730   }
12731 
12732   const ulint max_local_len= table->get_overflow_field_local_len();
12733 
12734   /* Compute the maximum possible record size. */
12735   for (unsigned i= 0; i < n_fields; i++)
12736   {
12737     const dict_field_t &f= fields[i];
12738     const dict_col_t &col= *f.col;
12739 
12740     /* In dtuple_convert_big_rec(), variable-length columns
12741     that are longer than BTR_EXTERN_LOCAL_STORED_MAX_SIZE
12742     may be chosen for external storage.
12743 
12744     Fixed-length columns, and all columns of secondary
12745     index records are always stored inline. */
12746 
12747     /* Determine the maximum length of the index field.
12748     The field_ext_max_size should be computed as the worst
12749     case in rec_get_converted_size_comp() for
12750     REC_STATUS_ORDINARY records. */
12751 
12752     size_t field_max_size= dict_col_get_fixed_size(&col, comp);
12753     if (field_max_size && f.fixed_len != 0)
12754     {
12755       /* dict_index_add_col() should guarantee this */
12756       ut_ad(!f.prefix_len || f.fixed_len == f.prefix_len);
12757       /* Fixed lengths are not encoded
12758       in ROW_FORMAT=COMPACT. */
12759       goto add_field_size;
12760     }
12761 
12762     field_max_size= dict_col_get_max_size(&col);
12763 
12764     if (f.prefix_len)
12765     {
12766       if (f.prefix_len < field_max_size)
12767       {
12768         field_max_size= f.prefix_len;
12769       }
12770 
12771       /* those conditions were copied from dtuple_convert_big_rec()*/
12772     }
12773     else if (field_max_size > max_local_len &&
12774              field_max_size > BTR_EXTERN_LOCAL_STORED_MAX_SIZE &&
12775              DATA_BIG_COL(&col) && dict_index_is_clust(this))
12776     {
12777 
12778       /* In the worst case, we have a locally stored
12779       column of BTR_EXTERN_LOCAL_STORED_MAX_SIZE bytes.
12780       The length can be stored in one byte.  If the
12781       column were stored externally, the lengths in
12782       the clustered index page would be
12783       BTR_EXTERN_FIELD_REF_SIZE and 2. */
12784       field_max_size= max_local_len;
12785     }
12786 
12787     if (comp)
12788     {
12789       /* Add the extra size for ROW_FORMAT=COMPACT.
12790       For ROW_FORMAT=REDUNDANT, these bytes were
12791       added to result.shortest_size before this loop. */
12792       result.shortest_size+= field_max_size < 256 ? 1 : 2;
12793     }
12794   add_field_size:
12795     result.shortest_size+= field_max_size;
12796 
12797     /* Check the size limit on leaf pages. */
12798     if (result.shortest_size >= result.max_leaf_size)
12799     {
12800       result.set_too_big(i);
12801     }
12802 
12803     /* Check the size limit on non-leaf pages.  Records
12804     stored in non-leaf B-tree pages consist of the unique
12805     columns of the record (the key columns of the B-tree)
12806     and a node pointer field.  When we have processed the
12807     unique columns, result.shortest_size equals the size of the
12808     node pointer record minus the node pointer column. */
12809     if (i + 1 == dict_index_get_n_unique_in_tree(this) &&
12810         result.shortest_size + REC_NODE_PTR_SIZE >= page_ptr_max)
12811     {
12812       result.set_too_big(i);
12813     }
12814   }
12815 
12816   return result;
12817 }
12818 
12819 /** Issue a warning that the row is too big. */
ib_warn_row_too_big(THD * thd,const dict_table_t * table)12820 static void ib_warn_row_too_big(THD *thd, const dict_table_t *table)
12821 {
12822   /* FIXME: this row size check should be improved */
12823   /* If prefix is true then a 768-byte prefix is stored
12824   locally for BLOB fields. Refer to dict_table_get_format() */
12825   const bool prefix= !dict_table_has_atomic_blobs(table);
12826 
12827   const ulint free_space=
12828       page_get_free_space_of_empty(table->flags & DICT_TF_COMPACT) / 2;
12829 
12830   push_warning_printf(
12831       thd, Sql_condition::WARN_LEVEL_WARN, HA_ERR_TO_BIG_ROW,
12832       "Row size too large (> " ULINTPF "). Changing some columns to TEXT"
12833       " or BLOB %smay help. In current row format, BLOB prefix of"
12834       " %d bytes is stored inline.",
12835       free_space,
12836       prefix ? "or using ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED " : "",
12837       prefix ? DICT_MAX_FIXED_COL_LEN : 0);
12838 }
12839 
row_size_is_acceptable(const dict_index_t & index,bool strict) const12840 bool create_table_info_t::row_size_is_acceptable(
12841     const dict_index_t &index, bool strict) const
12842 {
12843   if ((index.type & DICT_FTS) || index.table->is_system_db)
12844   {
12845     /* Ignore system tables check because innodb_table_stats
12846     maximum row size can not fit on 4k page. */
12847     return true;
12848   }
12849 
12850   const bool innodb_strict_mode= THDVAR(m_thd, strict_mode);
12851   dict_index_t::record_size_info_t info= index.record_size_info();
12852 
12853   if (info.row_is_too_big())
12854   {
12855     ut_ad(info.get_overrun_size() != 0);
12856     ut_ad(info.max_leaf_size != 0);
12857 
12858     const size_t idx= info.get_first_overrun_field_index();
12859     const dict_field_t *field= dict_index_get_nth_field(&index, idx);
12860 
12861     ut_ad((!field->name) == field->col->is_dropped());
12862     if (innodb_strict_mode || global_system_variables.log_warnings > 2)
12863     {
12864       ib::error_or_warn eow(strict && innodb_strict_mode);
12865       if (field->name)
12866         eow << "Cannot add field " << field->name << " in table ";
12867       else
12868         eow << "Cannot add an instantly dropped column in table ";
12869       eow << index.table->name << " because after adding it, the row size is "
12870           << info.get_overrun_size()
12871           << " which is greater than maximum allowed size ("
12872           << info.max_leaf_size << " bytes) for a record on index leaf page.";
12873     }
12874 
12875     if (strict && innodb_strict_mode)
12876       return false;
12877 
12878     ib_warn_row_too_big(m_thd, index.table);
12879   }
12880 
12881   return true;
12882 }
12883 
12884 /** Update a new table in an InnoDB database.
12885 @return error number */
12886 int
create_table_update_dict()12887 create_table_info_t::create_table_update_dict()
12888 {
12889 	dict_table_t*	innobase_table;
12890 
12891 	DBUG_ENTER("create_table_update_dict");
12892 
12893 	innobase_table = dict_table_open_on_name(
12894 		m_table_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
12895 
12896 	DBUG_ASSERT(innobase_table != 0);
12897 	if (innobase_table->fts != NULL) {
12898 		if (innobase_table->fts_doc_id_index == NULL) {
12899 			innobase_table->fts_doc_id_index
12900 				= dict_table_get_index_on_name(
12901 					innobase_table, FTS_DOC_ID_INDEX_NAME);
12902 			DBUG_ASSERT(innobase_table->fts_doc_id_index != NULL);
12903 		} else {
12904 			DBUG_ASSERT(innobase_table->fts_doc_id_index
12905 				    == dict_table_get_index_on_name(
12906 						innobase_table,
12907 						FTS_DOC_ID_INDEX_NAME));
12908 		}
12909 	}
12910 
12911 	DBUG_ASSERT((innobase_table->fts == NULL)
12912 		    == (innobase_table->fts_doc_id_index == NULL));
12913 
12914 	innobase_copy_frm_flags_from_create_info(innobase_table, m_create_info);
12915 
12916 	dict_stats_update(innobase_table, DICT_STATS_EMPTY_TABLE);
12917 
12918 	/* Load server stopword into FTS cache */
12919 	if (m_flags2 & DICT_TF2_FTS) {
12920 		if (!innobase_fts_load_stopword(innobase_table, NULL, m_thd)) {
12921 			dict_table_close(innobase_table, FALSE, FALSE);
12922 			srv_active_wake_master_thread();
12923 			DBUG_RETURN(-1);
12924 		}
12925 
12926 		mutex_enter(&dict_sys.mutex);
12927 		fts_optimize_add_table(innobase_table);
12928 		mutex_exit(&dict_sys.mutex);
12929 	}
12930 
12931 	if (const Field* ai = m_form->found_next_number_field) {
12932 		ut_ad(ai->stored_in_db());
12933 
12934 		ib_uint64_t	autoinc = m_create_info->auto_increment_value;
12935 
12936 		if (autoinc == 0) {
12937 			autoinc = 1;
12938 		}
12939 
12940 		innobase_table->autoinc_mutex.lock();
12941 		dict_table_autoinc_initialize(innobase_table, autoinc);
12942 
12943 		if (innobase_table->is_temporary()) {
12944 			/* AUTO_INCREMENT is not persistent for
12945 			TEMPORARY TABLE. Temporary tables are never
12946 			evicted. Keep the counter in memory only. */
12947 		} else {
12948 			const unsigned	col_no = innodb_col_no(ai);
12949 
12950 			innobase_table->persistent_autoinc = 1
12951 				+ dict_table_get_nth_col_pos(
12952 					innobase_table, col_no, NULL);
12953 
12954 			/* Persist the "last used" value, which
12955 			typically is AUTO_INCREMENT - 1.
12956 			In btr_create(), the value 0 was already written. */
12957 			if (--autoinc) {
12958 				btr_write_autoinc(
12959 					dict_table_get_first_index(
12960 						innobase_table),
12961 					autoinc);
12962 			}
12963 		}
12964 
12965 		innobase_table->autoinc_mutex.unlock();
12966 	}
12967 
12968 	innobase_parse_hint_from_comment(m_thd, innobase_table, m_form->s);
12969 
12970 	dict_table_close(innobase_table, FALSE, FALSE);
12971 	DBUG_RETURN(0);
12972 }
12973 
12974 /** Allocate a new trx. */
12975 void
allocate_trx()12976 create_table_info_t::allocate_trx()
12977 {
12978 	m_trx = innobase_trx_allocate(m_thd);
12979 
12980 	m_trx->will_lock = true;
12981 	m_trx->ddl = true;
12982 }
12983 
12984 /** Create a new table to an InnoDB database.
12985 @param[in]	name		Table name, format: "db/table_name".
12986 @param[in]	form		Table format; columns and index information.
12987 @param[in]	create_info	Create info (including create statement string).
12988 @param[in]	file_per_table	whether to create .ibd file
12989 @param[in,out]	trx		dictionary transaction, or NULL to create new
12990 @return	0 if success else error number. */
12991 inline int
create(const char * name,TABLE * form,HA_CREATE_INFO * create_info,bool file_per_table,trx_t * trx)12992 ha_innobase::create(
12993 	const char*	name,
12994 	TABLE*		form,
12995 	HA_CREATE_INFO*	create_info,
12996 	bool		file_per_table,
12997 	trx_t*		trx)
12998 {
12999 	int		error;
13000 	char		norm_name[FN_REFLEN];	/* {database}/{tablename} */
13001 	char		remote_path[FN_REFLEN];	/* Absolute path of table */
13002 
13003 	DBUG_ENTER("ha_innobase::create");
13004 
13005 	DBUG_ASSERT(form->s == table_share);
13006 	DBUG_ASSERT(table_share->table_type == TABLE_TYPE_SEQUENCE
13007 		    || table_share->table_type == TABLE_TYPE_NORMAL);
13008 
13009 	create_table_info_t	info(ha_thd(),
13010 				     form,
13011 				     create_info,
13012 				     norm_name,
13013 				     remote_path,
13014 				     file_per_table, trx);
13015 
13016 	if ((error = info.initialize())
13017 	    || (error = info.prepare_create_table(name, !trx))) {
13018 		if (trx) {
13019 			trx_rollback_for_mysql(trx);
13020 			row_mysql_unlock_data_dictionary(trx);
13021 		}
13022 		DBUG_RETURN(error);
13023 	}
13024 
13025 	const bool own_trx = !trx;
13026 
13027 	if (own_trx) {
13028 		info.allocate_trx();
13029 		trx = info.trx();
13030 		/* Latch the InnoDB data dictionary exclusively so that no deadlocks
13031 		or lock waits can happen in it during a table create operation.
13032 		Drop table etc. do this latching in row0mysql.cc. */
13033 		row_mysql_lock_data_dictionary(trx);
13034 		DBUG_ASSERT(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
13035 	}
13036 
13037 	if ((error = info.create_table(own_trx))) {
13038 		/* Drop the being-created table before rollback,
13039 		so that rollback can possibly rename back a table
13040 		that could have been renamed before the failed creation. */
13041 		if (info.drop_before_rollback()) {
13042 			trx->error_state = DB_SUCCESS;
13043 			row_drop_table_for_mysql(info.table_name(),
13044 						 trx, SQLCOM_TRUNCATE, true,
13045 						 false);
13046 		}
13047 		trx_rollback_for_mysql(trx);
13048 		row_mysql_unlock_data_dictionary(trx);
13049 		goto func_exit;
13050 	}
13051 
13052 	innobase_commit_low(trx);
13053 	row_mysql_unlock_data_dictionary(trx);
13054 
13055 	/* Flush the log to reduce probability that the .frm files and
13056 	the InnoDB data dictionary get out-of-sync if the user runs
13057 	with innodb_flush_log_at_trx_commit = 0 */
13058 	log_buffer_flush_to_disk();
13059 
13060 	ut_ad(!srv_read_only_mode);
13061 
13062 	error = info.create_table_update_dict();
13063 
13064 func_exit:
13065 	if (own_trx) {
13066 		trx->free();
13067 	}
13068 
13069 	/* Tell the InnoDB server that there might be work for
13070 	utility threads: */
13071 
13072 	srv_active_wake_master_thread();
13073 
13074 	DBUG_RETURN(error);
13075 }
13076 
13077 /** Create a new table to an InnoDB database.
13078 @param[in]	name		Table name, format: "db/table_name".
13079 @param[in]	form		Table format; columns and index information.
13080 @param[in]	create_info	Create info (including create statement string).
13081 @return	0 if success else error number. */
13082 int
create(const char * name,TABLE * form,HA_CREATE_INFO * create_info)13083 ha_innobase::create(
13084 	const char*	name,
13085 	TABLE*		form,
13086 	HA_CREATE_INFO*	create_info)
13087 {
13088 	return create(name, form, create_info, srv_file_per_table);
13089 }
13090 
13091 /*****************************************************************//**
13092 Discards or imports an InnoDB tablespace.
13093 @return 0 == success, -1 == error */
13094 
13095 int
discard_or_import_tablespace(my_bool discard)13096 ha_innobase::discard_or_import_tablespace(
13097 /*======================================*/
13098 	my_bool		discard)	/*!< in: TRUE if discard, else import */
13099 {
13100 
13101 	DBUG_ENTER("ha_innobase::discard_or_import_tablespace");
13102 
13103 	ut_a(m_prebuilt->trx != NULL);
13104 	ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
13105 	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
13106 
13107 	if (high_level_read_only) {
13108 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
13109 	}
13110 
13111 	if (m_prebuilt->table->is_temporary()) {
13112 		ib_senderrf(
13113 			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
13114 			ER_CANNOT_DISCARD_TEMPORARY_TABLE);
13115 
13116 		DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
13117 	}
13118 
13119 	if (m_prebuilt->table->space == fil_system.sys_space) {
13120 		ib_senderrf(
13121 			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
13122 			ER_TABLE_IN_SYSTEM_TABLESPACE,
13123 			m_prebuilt->table->name.m_name);
13124 
13125 		DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
13126 	}
13127 
13128 	trx_start_if_not_started(m_prebuilt->trx, true);
13129 
13130 	/* Obtain an exclusive lock on the table. */
13131 	dberr_t	err = row_mysql_lock_table(
13132 		m_prebuilt->trx, m_prebuilt->table, LOCK_X,
13133 		discard ? "setting table lock for DISCARD TABLESPACE"
13134 			: "setting table lock for IMPORT TABLESPACE");
13135 
13136 	if (err != DB_SUCCESS) {
13137 		/* unable to lock the table: do nothing */
13138 	} else if (discard) {
13139 
13140 		/* Discarding an already discarded tablespace should be an
13141 		idempotent operation. Also, if the .ibd file is missing the
13142 		user may want to set the DISCARD flag in order to IMPORT
13143 		a new tablespace. */
13144 
13145 		if (!m_prebuilt->table->is_readable()) {
13146 			ib_senderrf(
13147 				m_prebuilt->trx->mysql_thd,
13148 				IB_LOG_LEVEL_WARN, ER_TABLESPACE_MISSING,
13149 				m_prebuilt->table->name.m_name);
13150 		}
13151 
13152 		err = row_discard_tablespace_for_mysql(
13153 			m_prebuilt->table->name.m_name, m_prebuilt->trx);
13154 
13155 	} else if (m_prebuilt->table->is_readable()) {
13156 		/* Commit the transaction in order to
13157 		release the table lock. */
13158 		trx_commit_for_mysql(m_prebuilt->trx);
13159 
13160 		ib::error() << "Unable to import tablespace "
13161 			<< m_prebuilt->table->name << " because it already"
13162 			" exists.  Please DISCARD the tablespace"
13163 			" before IMPORT.";
13164 		ib_senderrf(
13165 			m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
13166 			ER_TABLESPACE_EXISTS, m_prebuilt->table->name.m_name);
13167 
13168 		DBUG_RETURN(HA_ERR_TABLE_EXIST);
13169 	} else {
13170 		err = row_import_for_mysql(m_prebuilt->table, m_prebuilt);
13171 
13172 		if (err == DB_SUCCESS) {
13173 
13174 			info(HA_STATUS_TIME
13175 			     | HA_STATUS_CONST
13176 			     | HA_STATUS_VARIABLE
13177 			     | HA_STATUS_AUTO);
13178 
13179 			fil_crypt_set_encrypt_tables(srv_encrypt_tables);
13180 		}
13181 	}
13182 
13183 	/* Commit the transaction in order to release the table lock. */
13184 	trx_commit_for_mysql(m_prebuilt->trx);
13185 
13186 	if (discard || err != DB_SUCCESS) {
13187 		DBUG_RETURN(convert_error_code_to_mysql(
13188 				    err, m_prebuilt->table->flags, NULL));
13189 	}
13190 
13191 	if (dict_stats_is_persistent_enabled(m_prebuilt->table)) {
13192 		dberr_t		ret;
13193 
13194 		/* Adjust the persistent statistics. */
13195 		ret = dict_stats_update(m_prebuilt->table,
13196 					DICT_STATS_RECALC_PERSISTENT);
13197 
13198 		if (ret != DB_SUCCESS) {
13199 			push_warning_printf(
13200 				ha_thd(),
13201 				Sql_condition::WARN_LEVEL_WARN,
13202 				ER_ALTER_INFO,
13203 				"Error updating stats for table '%s'"
13204 				" after table rebuild: %s",
13205 				m_prebuilt->table->name.m_name,
13206 				ut_strerr(ret));
13207 		}
13208 	}
13209 
13210 	DBUG_RETURN(0);
13211 }
13212 
13213 /**
13214 Drops a table from an InnoDB database. Before calling this function,
13215 MySQL calls innobase_commit to commit the transaction of the current user.
13216 Then the current user cannot have locks set on the table. Drop table
13217 operation inside InnoDB will remove all locks any user has on the table
13218 inside InnoDB.
13219 @param[in]	name	table name
13220 @param[in]	sqlcom	SQLCOM_DROP_DB, SQLCOM_TRUNCATE, ...
13221 @return error number */
delete_table(const char * name,enum_sql_command sqlcom)13222 inline int ha_innobase::delete_table(const char* name, enum_sql_command sqlcom)
13223 {
13224 	dberr_t	err;
13225 	THD*	thd = ha_thd();
13226 	char	norm_name[FN_REFLEN];
13227 
13228 	DBUG_ENTER("ha_innobase::delete_table");
13229 
13230 	DBUG_EXECUTE_IF(
13231 		"test_normalize_table_name_low",
13232 		test_normalize_table_name_low();
13233 	);
13234 	DBUG_EXECUTE_IF(
13235 		"test_ut_format_name",
13236 		test_ut_format_name();
13237 	);
13238 
13239 	/* Strangely, MySQL passes the table name without the '.frm'
13240 	extension, in contrast to ::create */
13241 	normalize_table_name(norm_name, name);
13242 
13243 	if (high_level_read_only) {
13244 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
13245 	}
13246 
13247 	trx_t*	parent_trx = check_trx_exists(thd);
13248 
13249 	/* Remove the to-be-dropped table from the list of modified tables
13250 	by parent_trx. Otherwise we may end up with an orphaned pointer to
13251 	the table object from parent_trx::mod_tables. This could happen in:
13252 	SET AUTOCOMMIT=0;
13253 	CREATE TABLE t (PRIMARY KEY (a)) ENGINE=INNODB SELECT 1 AS a UNION
13254 	ALL SELECT 1 AS a; */
13255 	trx_mod_tables_t::const_iterator	iter;
13256 
13257 	for (iter = parent_trx->mod_tables.begin();
13258 	     iter != parent_trx->mod_tables.end();
13259 	     ++iter) {
13260 
13261 		dict_table_t*	table_to_drop = iter->first;
13262 
13263 		if (strcmp(norm_name, table_to_drop->name.m_name) == 0) {
13264 			parent_trx->mod_tables.erase(table_to_drop);
13265 			break;
13266 		}
13267 	}
13268 
13269 	trx_t*	trx = innobase_trx_allocate(thd);
13270 
13271 	ulint	name_len = strlen(name);
13272 
13273 	ut_a(name_len < 1000);
13274 
13275 	trx->will_lock = true;
13276 
13277 	/* Drop the table in InnoDB */
13278 
13279 	err = row_drop_table_for_mysql(norm_name, trx, sqlcom);
13280 
13281 	if (err == DB_TABLE_NOT_FOUND
13282 	    && innobase_get_lower_case_table_names() == 1) {
13283 		char*	is_part = is_partition(norm_name);
13284 
13285 		if (is_part) {
13286 			char	par_case_name[FN_REFLEN];
13287 
13288 #ifndef __WIN__
13289 			/* Check for the table using lower
13290 			case name, including the partition
13291 			separator "P" */
13292 			strcpy(par_case_name, norm_name);
13293 			innobase_casedn_str(par_case_name);
13294 #else
13295 			/* On Windows platfrom, check
13296 			whether there exists table name in
13297 			system table whose name is
13298 			not being normalized to lower case */
13299 			normalize_table_name_c_low(
13300 				par_case_name, name, FALSE);
13301 #endif
13302 			err = row_drop_table_for_mysql(
13303 				par_case_name, trx, sqlcom);
13304 		}
13305 	}
13306 
13307 	if (err == DB_TABLE_NOT_FOUND) {
13308 		/* Test to drop all tables which matches db/tablename + '#'.
13309 		Only partitions can have '#' as non-first character in
13310 		the table name!
13311 
13312 		Temporary table names always start with '#', partitions are
13313 		the only 'tables' that can have '#' after the first character
13314 		and table name must have length > 0. User tables cannot have
13315 		'#' since it would be translated to @0023. Therefor this should
13316 		only match partitions. */
13317 		uint	len = (uint) strlen(norm_name);
13318 		ulint	num_partitions;
13319 		ut_a(len < FN_REFLEN);
13320 		norm_name[len] = '#';
13321 		norm_name[len + 1] = 0;
13322 		err = row_drop_database_for_mysql(norm_name, trx,
13323 			&num_partitions);
13324 		norm_name[len] = 0;
13325 		table_name_t tbl_name(norm_name);
13326 		if (num_partitions == 0 && !tbl_name.is_temporary()) {
13327 			ib::error() << "Table " << tbl_name <<
13328 				" does not exist in the InnoDB"
13329 				" internal data dictionary though MariaDB is"
13330 				" trying to drop it. Have you copied the .frm"
13331 				" file of the table to the MariaDB database"
13332 				" directory from another database? "
13333 				<< TROUBLESHOOTING_MSG;
13334 		}
13335 		if (num_partitions == 0) {
13336 			err = DB_TABLE_NOT_FOUND;
13337 		}
13338 	}
13339 
13340 	if (err == DB_TABLE_NOT_FOUND
13341 	    && innobase_get_lower_case_table_names() == 1) {
13342 		char*	is_part = is_partition(norm_name);
13343 
13344 		if (is_part != NULL) {
13345 			char	par_case_name[FN_REFLEN];
13346 
13347 #ifndef _WIN32
13348 			/* Check for the table using lower
13349 			case name, including the partition
13350 			separator "P" */
13351 			strcpy(par_case_name, norm_name);
13352 			innobase_casedn_str(par_case_name);
13353 #else
13354 			/* On Windows platfrom, check
13355 			whether there exists table name in
13356 			system table whose name is
13357 			not being normalized to lower case */
13358 			create_table_info_t::normalize_table_name_low(
13359 				par_case_name, name, FALSE);
13360 #endif /* _WIN32 */
13361 			err = row_drop_table_for_mysql(
13362 				par_case_name, trx, sqlcom, true);
13363 		}
13364 	}
13365 
13366 	ut_ad(!srv_read_only_mode);
13367 	/* Flush the log to reduce probability that the .frm files and
13368 	the InnoDB data dictionary get out-of-sync if the user runs
13369 	with innodb_flush_log_at_trx_commit = 0 */
13370 
13371 	log_buffer_flush_to_disk();
13372 
13373 	innobase_commit_low(trx);
13374 
13375 	trx->free();
13376 
13377 	DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL));
13378 }
13379 
13380 /** Drop an InnoDB table.
13381 @param[in]	name	table name
13382 @return error number */
delete_table(const char * name)13383 int ha_innobase::delete_table(const char* name)
13384 {
13385 	enum_sql_command sqlcom = enum_sql_command(thd_sql_command(ha_thd()));
13386 	/* SQLCOM_TRUNCATE should be passed via ha_innobase::truncate() only.
13387 
13388 	On client disconnect, when dropping temporary tables, the
13389 	previous sqlcom would not be overwritten.  In such a case, we
13390 	will have thd_kill_level() != NOT_KILLED, !m_prebuilt can
13391 	hold, and sqlcom could be anything, including TRUNCATE.
13392 
13393 	The sqlcom only matters for persistent tables; no persistent
13394 	metadata or FOREIGN KEY metadata is kept for temporary
13395 	tables. Therefore, we relax the assertion. If there is a bug
13396 	that slips through this assertion due to !m_prebuilt, the
13397 	worst impact should be that on DROP TABLE of a persistent
13398 	table, FOREIGN KEY constraints will be ignored and their
13399 	metadata will not be removed. */
13400 	DBUG_ASSERT(sqlcom != SQLCOM_TRUNCATE
13401 		    || (thd_kill_level(ha_thd()) != THD_IS_NOT_KILLED
13402 			&& (!m_prebuilt
13403 			    || m_prebuilt->table->is_temporary())));
13404 	return delete_table(name, sqlcom);
13405 }
13406 
13407 /** Remove all tables in the named database inside InnoDB.
13408 @param[in]	hton	handlerton from InnoDB
13409 @param[in]	path	Database path; Inside InnoDB the name of the last
13410 directory in the path is used as the database name.
13411 For example, in 'mysql/data/test' the database name is 'test'. */
13412 
13413 static
13414 void
innobase_drop_database(handlerton * hton,char * path)13415 innobase_drop_database(
13416 	handlerton*	hton,
13417 	char*		path)
13418 {
13419 	char*	namebuf;
13420 
13421 	/* Get the transaction associated with the current thd, or create one
13422 	if not yet created */
13423 
13424 	DBUG_ASSERT(hton == innodb_hton_ptr);
13425 
13426 	if (high_level_read_only) {
13427 		return;
13428 	}
13429 
13430 	THD*	thd = current_thd;
13431 
13432 	ulint	len = 0;
13433 	char*	ptr = strend(path) - 2;
13434 
13435 	while (ptr >= path && *ptr != '\\' && *ptr != '/') {
13436 		ptr--;
13437 		len++;
13438 	}
13439 
13440 	ptr++;
13441 	namebuf = (char*) my_malloc(/*PSI_INSTRUMENT_ME,*/ (uint) len + 2, MYF(0));
13442 
13443 	memcpy(namebuf, ptr, len);
13444 	namebuf[len] = '/';
13445 	namebuf[len + 1] = '\0';
13446 
13447 #ifdef	_WIN32
13448 	innobase_casedn_str(namebuf);
13449 #endif /* _WIN32 */
13450 
13451 	trx_t*	trx = innobase_trx_allocate(thd);
13452 	trx->will_lock = true;
13453 
13454 	ulint	dummy;
13455 
13456 	row_drop_database_for_mysql(namebuf, trx, &dummy);
13457 
13458 	my_free(namebuf);
13459 
13460 	/* Flush the log to reduce probability that the .frm files and
13461 	the InnoDB data dictionary get out-of-sync if the user runs
13462 	with innodb_flush_log_at_trx_commit = 0 */
13463 
13464 	log_buffer_flush_to_disk();
13465 
13466 	innobase_commit_low(trx);
13467 
13468 	trx->free();
13469 }
13470 
13471 /** Rename an InnoDB table.
13472 @param[in,out]	trx	InnoDB data dictionary transaction
13473 @param[in]	from	old table name
13474 @param[in]	to	new table name
13475 @param[in]	commit	whether to commit trx (and to enforce FOREIGN KEY)
13476 @return DB_SUCCESS or error code */
innobase_rename_table(trx_t * trx,const char * from,const char * to,bool commit)13477 inline dberr_t innobase_rename_table(trx_t *trx, const char *from,
13478                                      const char *to, bool commit)
13479 {
13480 	dberr_t	error;
13481 	char	norm_to[FN_REFLEN];
13482 	char	norm_from[FN_REFLEN];
13483 
13484 	DBUG_ENTER("innobase_rename_table");
13485 	DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX
13486 		    || trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE);
13487 
13488 	ut_ad(!srv_read_only_mode);
13489 
13490 	normalize_table_name(norm_to, to);
13491 	normalize_table_name(norm_from, from);
13492 
13493 	DEBUG_SYNC_C("innodb_rename_table_ready");
13494 
13495 	trx_start_if_not_started(trx, true);
13496 	ut_ad(trx->will_lock);
13497 
13498 	if (commit) {
13499 		/* Serialize data dictionary operations with dictionary mutex:
13500 		no deadlocks can occur then in these operations. */
13501 		row_mysql_lock_data_dictionary(trx);
13502 	}
13503 
13504 	dict_table_t*   table = dict_table_open_on_name(
13505 		norm_from, TRUE, FALSE, DICT_ERR_IGNORE_FK_NOKEY);
13506 
13507 	/* Since DICT_BG_YIELD has sleep for 250 milliseconds,
13508 	Convert lock_wait_timeout unit from second to 250 milliseconds */
13509 	long int lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd) * 4;
13510 	if (table != NULL) {
13511 		if (commit) {
13512 			dict_stats_wait_bg_to_stop_using_table(table, trx);
13513 		}
13514 		for (dict_index_t* index = dict_table_get_first_index(table);
13515 		     index != NULL;
13516 		     index = dict_table_get_next_index(index)) {
13517 
13518 			if (index->type & DICT_FTS) {
13519 				/* Found */
13520 				while (index->index_fts_syncing
13521 					&& !trx_is_interrupted(trx)
13522 					&& (lock_wait_timeout--) > 0) {
13523 					DICT_BG_YIELD(trx);
13524 				}
13525 			}
13526 		}
13527 		if (!commit) {
13528 			dict_table_close(table, TRUE, FALSE);
13529 		}
13530 	}
13531 
13532 	/* FTS sync is in progress. We shall timeout this operation */
13533 	if (lock_wait_timeout < 0) {
13534 		error = DB_LOCK_WAIT_TIMEOUT;
13535 		goto func_exit;
13536 	}
13537 
13538 	error = row_rename_table_for_mysql(norm_from, norm_to, trx, commit,
13539 					   commit);
13540 
13541 	if (error != DB_SUCCESS) {
13542 		if (error == DB_TABLE_NOT_FOUND
13543 		    && innobase_get_lower_case_table_names() == 1) {
13544 			char*	is_part = is_partition(norm_from);
13545 
13546 			if (is_part) {
13547 				char	par_case_name[FN_REFLEN];
13548 #ifndef _WIN32
13549 				/* Check for the table using lower
13550 				case name, including the partition
13551 				separator "P" */
13552 				strcpy(par_case_name, norm_from);
13553 				innobase_casedn_str(par_case_name);
13554 #else
13555 				/* On Windows platfrom, check
13556 				whether there exists table name in
13557 				system table whose name is
13558 				not being normalized to lower case */
13559 				create_table_info_t::normalize_table_name_low(
13560 					par_case_name, from, FALSE);
13561 #endif /* _WIN32 */
13562 				trx_start_if_not_started(trx, true);
13563 				error = row_rename_table_for_mysql(
13564 					par_case_name, norm_to, trx,
13565 					true, false);
13566 			}
13567 		}
13568 
13569 		if (error == DB_SUCCESS) {
13570 #ifndef _WIN32
13571 			sql_print_warning("Rename partition table %s"
13572 					  " succeeds after converting to lower"
13573 					  " case. The table may have"
13574 					  " been moved from a case"
13575 					  " in-sensitive file system.\n",
13576 					  norm_from);
13577 #else
13578 			sql_print_warning("Rename partition table %s"
13579 					  " succeeds after skipping the step to"
13580 					  " lower case the table name."
13581 					  " The table may have been"
13582 					  " moved from a case sensitive"
13583 					  " file system.\n",
13584 					  norm_from);
13585 #endif /* _WIN32 */
13586 		}
13587 	}
13588 
13589 func_exit:
13590 	if (commit) {
13591 		if (table) {
13592 			table->stats_bg_flag &= ~BG_STAT_SHOULD_QUIT;
13593 			dict_table_close(table, TRUE, FALSE);
13594 		}
13595 		row_mysql_unlock_data_dictionary(trx);
13596 	}
13597 
13598 	/* Flush the log to reduce probability that the .frm
13599 	files and the InnoDB data dictionary get out-of-sync
13600 	if the user runs with innodb_flush_log_at_trx_commit = 0 */
13601 
13602 	log_buffer_flush_to_disk();
13603 
13604 	DBUG_RETURN(error);
13605 }
13606 
13607 /** TRUNCATE TABLE
13608 @return	error code
13609 @retval	0	on success */
truncate()13610 int ha_innobase::truncate()
13611 {
13612 	DBUG_ENTER("ha_innobase::truncate");
13613 
13614 	if (high_level_read_only) {
13615 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
13616 	}
13617 
13618 	update_thd();
13619 
13620 	HA_CREATE_INFO	info;
13621 	mem_heap_t*	heap = mem_heap_create(1000);
13622 	dict_table_t*	ib_table = m_prebuilt->table;
13623 	const time_t	update_time = ib_table->update_time;
13624 	const ulint	stored_lock = m_prebuilt->stored_select_lock_type;
13625 	info.init();
13626 	update_create_info_from_table(&info, table);
13627 
13628 	if (ib_table->is_temporary()) {
13629 		info.options|= HA_LEX_CREATE_TMP_TABLE;
13630 	} else {
13631 		dict_get_and_save_data_dir_path(ib_table, false);
13632 	}
13633 
13634 	char* data_file_name = ib_table->data_dir_path;
13635 
13636 	if (data_file_name) {
13637 		info.data_file_name = data_file_name
13638 			= mem_heap_strdup(heap, data_file_name);
13639 	}
13640 
13641 	const char* temp_name = dict_mem_create_temporary_tablename(
13642 		heap, ib_table->name.m_name, ib_table->id);
13643 	const char* name = mem_heap_strdup(heap, ib_table->name.m_name);
13644 	trx_t*	trx = innobase_trx_allocate(m_user_thd);
13645 	trx->will_lock = true;
13646 	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
13647 	row_mysql_lock_data_dictionary(trx);
13648 	dict_stats_wait_bg_to_stop_using_table(ib_table, trx);
13649 
13650 	int err = convert_error_code_to_mysql(
13651 		innobase_rename_table(trx, ib_table->name.m_name, temp_name,
13652 				      false),
13653 		ib_table->flags, m_user_thd);
13654 	if (err) {
13655 		trx_rollback_for_mysql(trx);
13656 		row_mysql_unlock_data_dictionary(trx);
13657 	} else {
13658 		switch (dict_tf_get_rec_format(ib_table->flags)) {
13659 		case REC_FORMAT_REDUNDANT:
13660 			info.row_type = ROW_TYPE_REDUNDANT;
13661 			break;
13662 		case REC_FORMAT_COMPACT:
13663 			info.row_type = ROW_TYPE_COMPACT;
13664 			break;
13665 		case REC_FORMAT_COMPRESSED:
13666 			info.row_type = ROW_TYPE_COMPRESSED;
13667 			break;
13668 		case REC_FORMAT_DYNAMIC:
13669 			info.row_type = ROW_TYPE_DYNAMIC;
13670 			break;
13671 		}
13672 
13673 		err = create(name, table, &info,
13674 			     ib_table->is_temporary()
13675 			     || dict_table_is_file_per_table(ib_table), trx);
13676 	}
13677 
13678 	trx->free();
13679 
13680 	if (!err) {
13681 		/* Reopen the newly created table, and drop the
13682 		original table that was renamed to temp_name. */
13683 
13684 		row_prebuilt_t* prebuilt = m_prebuilt;
13685 		uchar* upd_buf = m_upd_buf;
13686 		ulint upd_buf_size = m_upd_buf_size;
13687 		/* Mimic ha_innobase::close(). */
13688 		m_prebuilt = NULL;
13689 		m_upd_buf = NULL;
13690 		m_upd_buf_size = 0;
13691 		err = open(name, 0, 0);
13692 		if (!err) {
13693 			m_prebuilt->stored_select_lock_type = stored_lock;
13694 			m_prebuilt->table->update_time = update_time;
13695 			row_prebuilt_free(prebuilt, FALSE);
13696 			delete_table(temp_name, SQLCOM_TRUNCATE);
13697 			my_free(upd_buf);
13698 		} else {
13699 			/* Revert to the old table before truncation. */
13700 			m_prebuilt = prebuilt;
13701 			m_upd_buf = upd_buf;
13702 			m_upd_buf_size = upd_buf_size;
13703 		}
13704 	}
13705 
13706 	mem_heap_free(heap);
13707 	DBUG_RETURN(err);
13708 }
13709 
13710 /*********************************************************************//**
13711 Renames an InnoDB table.
13712 @return 0 or error code */
13713 
13714 int
rename_table(const char * from,const char * to)13715 ha_innobase::rename_table(
13716 /*======================*/
13717 	const char*	from,	/*!< in: old name of the table */
13718 	const char*	to)	/*!< in: new name of the table */
13719 {
13720 	THD*	thd = ha_thd();
13721 
13722 	DBUG_ENTER("ha_innobase::rename_table");
13723 
13724 	if (high_level_read_only) {
13725 		ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
13726 		DBUG_RETURN(HA_ERR_TABLE_READONLY);
13727 	}
13728 
13729 	trx_t*	trx = innobase_trx_allocate(thd);
13730 	trx->will_lock = true;
13731 	trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
13732 
13733 	dberr_t	error = innobase_rename_table(trx, from, to, true);
13734 
13735 	DEBUG_SYNC(thd, "after_innobase_rename_table");
13736 
13737 	innobase_commit_low(trx);
13738 
13739 	trx->free();
13740 
13741 	if (error == DB_SUCCESS) {
13742 		char	norm_from[MAX_FULL_NAME_LEN];
13743 		char	norm_to[MAX_FULL_NAME_LEN];
13744 		char	errstr[512];
13745 		dberr_t	ret;
13746 
13747 		normalize_table_name(norm_from, from);
13748 		normalize_table_name(norm_to, to);
13749 
13750 		ret = dict_stats_rename_table(norm_from, norm_to,
13751 					      errstr, sizeof(errstr));
13752 
13753 		if (ret != DB_SUCCESS) {
13754 			ib::error() << errstr;
13755 
13756 			push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
13757 				     ER_LOCK_WAIT_TIMEOUT, errstr);
13758 		}
13759 	}
13760 
13761 	/* Add a special case to handle the Duplicated Key error
13762 	and return DB_ERROR instead.
13763 	This is to avoid a possible SIGSEGV error from mysql error
13764 	handling code. Currently, mysql handles the Duplicated Key
13765 	error by re-entering the storage layer and getting dup key
13766 	info by calling get_dup_key(). This operation requires a valid
13767 	table handle ('row_prebuilt_t' structure) which could no
13768 	longer be available in the error handling stage. The suggested
13769 	solution is to report a 'table exists' error message (since
13770 	the dup key error here is due to an existing table whose name
13771 	is the one we are trying to rename to) and return the generic
13772 	error code. */
13773 	if (error == DB_DUPLICATE_KEY) {
13774 		my_error(ER_TABLE_EXISTS_ERROR, MYF(0), to);
13775 
13776 		error = DB_ERROR;
13777 	} else if (error == DB_LOCK_WAIT_TIMEOUT) {
13778 		my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0), to);
13779 
13780 		error = DB_LOCK_WAIT;
13781 	}
13782 
13783 	DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
13784 }
13785 
13786 /*********************************************************************//**
13787 Estimates the number of index records in a range.
13788 @return estimated number of rows */
13789 
13790 ha_rows
records_in_range(uint keynr,key_range * min_key,key_range * max_key)13791 ha_innobase::records_in_range(
13792 /*==========================*/
13793 	uint			keynr,		/*!< in: index number */
13794 	key_range		*min_key,	/*!< in: start key value of the
13795 						range, may also be 0 */
13796 	key_range		*max_key)	/*!< in: range end key val, may
13797 						also be 0 */
13798 {
13799 	KEY*		key;
13800 	dict_index_t*	index;
13801 	dtuple_t*	range_start;
13802 	dtuple_t*	range_end;
13803 	ha_rows		n_rows;
13804 	page_cur_mode_t	mode1;
13805 	page_cur_mode_t	mode2;
13806 	mem_heap_t*	heap;
13807 
13808 	DBUG_ENTER("records_in_range");
13809 
13810 	ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
13811 
13812 	m_prebuilt->trx->op_info = "estimating records in index range";
13813 
13814 	active_index = keynr;
13815 
13816 	key = table->key_info + active_index;
13817 
13818 	index = innobase_get_index(keynr);
13819 
13820 	/* There exists possibility of not being able to find requested
13821 	index due to inconsistency between MySQL and InoDB dictionary info.
13822 	Necessary message should have been printed in innobase_get_index() */
13823 	if (!m_prebuilt->table->space) {
13824 		n_rows = HA_POS_ERROR;
13825 		goto func_exit;
13826 	}
13827 	if (!index) {
13828 		n_rows = HA_POS_ERROR;
13829 		goto func_exit;
13830 	}
13831 	if (index->is_corrupted()) {
13832 		n_rows = HA_ERR_INDEX_CORRUPT;
13833 		goto func_exit;
13834 	}
13835 	if (!row_merge_is_index_usable(m_prebuilt->trx, index)) {
13836 		n_rows = HA_ERR_TABLE_DEF_CHANGED;
13837 		goto func_exit;
13838 	}
13839 
13840 	heap = mem_heap_create(2 * (key->ext_key_parts * sizeof(dfield_t)
13841 				    + sizeof(dtuple_t)));
13842 
13843 	range_start = dtuple_create(heap, key->ext_key_parts);
13844 	dict_index_copy_types(range_start, index, key->ext_key_parts);
13845 
13846 	range_end = dtuple_create(heap, key->ext_key_parts);
13847 	dict_index_copy_types(range_end, index, key->ext_key_parts);
13848 
13849 	row_sel_convert_mysql_key_to_innobase(
13850 		range_start,
13851 		m_prebuilt->srch_key_val1,
13852 		m_prebuilt->srch_key_val_len,
13853 		index,
13854 		(byte*) (min_key ? min_key->key : (const uchar*) 0),
13855 		(ulint) (min_key ? min_key->length : 0));
13856 
13857 	DBUG_ASSERT(min_key
13858 		    ? range_start->n_fields > 0
13859 		    : range_start->n_fields == 0);
13860 
13861 	row_sel_convert_mysql_key_to_innobase(
13862 		range_end,
13863 		m_prebuilt->srch_key_val2,
13864 		m_prebuilt->srch_key_val_len,
13865 		index,
13866 		(byte*) (max_key ? max_key->key : (const uchar*) 0),
13867 		(ulint) (max_key ? max_key->length : 0));
13868 
13869 	DBUG_ASSERT(max_key
13870 		    ? range_end->n_fields > 0
13871 		    : range_end->n_fields == 0);
13872 
13873 	mode1 = convert_search_mode_to_innobase(
13874 		min_key ? min_key->flag : HA_READ_KEY_EXACT);
13875 
13876 	mode2 = convert_search_mode_to_innobase(
13877 		max_key ? max_key->flag : HA_READ_KEY_EXACT);
13878 
13879 	if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) {
13880 
13881 		if (dict_index_is_spatial(index)) {
13882 			/*Only min_key used in spatial index. */
13883 			n_rows = rtr_estimate_n_rows_in_range(
13884 				index, range_start, mode1);
13885 		} else {
13886 			n_rows = btr_estimate_n_rows_in_range(
13887 				index, range_start, mode1, range_end, mode2);
13888 		}
13889 	} else {
13890 
13891 		n_rows = HA_POS_ERROR;
13892 	}
13893 
13894 	mem_heap_free(heap);
13895 
13896 	DBUG_EXECUTE_IF(
13897 		"print_btr_estimate_n_rows_in_range_return_value",
13898 		push_warning_printf(
13899 			ha_thd(), Sql_condition::WARN_LEVEL_WARN,
13900 			ER_NO_DEFAULT,
13901 			"btr_estimate_n_rows_in_range(): %lld",
13902                         (longlong) n_rows);
13903 	);
13904 
13905 func_exit:
13906 
13907 	m_prebuilt->trx->op_info = (char*)"";
13908 
13909 	/* The MySQL optimizer seems to believe an estimate of 0 rows is
13910 	always accurate and may return the result 'Empty set' based on that.
13911 	The accuracy is not guaranteed, and even if it were, for a locking
13912 	read we should anyway perform the search to set the next-key lock.
13913 	Add 1 to the value to make sure MySQL does not make the assumption! */
13914 
13915 	if (n_rows == 0) {
13916 		n_rows = 1;
13917 	}
13918 
13919 	DBUG_RETURN((ha_rows) n_rows);
13920 }
13921 
13922 /*********************************************************************//**
13923 Gives an UPPER BOUND to the number of rows in a table. This is used in
13924 filesort.cc.
13925 @return upper bound of rows */
13926 
13927 ha_rows
estimate_rows_upper_bound()13928 ha_innobase::estimate_rows_upper_bound()
13929 /*====================================*/
13930 {
13931 	const dict_index_t*	index;
13932 	ulonglong		estimate;
13933 	ulonglong		local_data_file_length;
13934 
13935 	DBUG_ENTER("estimate_rows_upper_bound");
13936 
13937 	/* We do not know if MySQL can call this function before calling
13938 	external_lock(). To be safe, update the thd of the current table
13939 	handle. */
13940 
13941 	update_thd(ha_thd());
13942 
13943 	m_prebuilt->trx->op_info = "calculating upper bound for table rows";
13944 
13945 	index = dict_table_get_first_index(m_prebuilt->table);
13946 
13947 	ulint	stat_n_leaf_pages = index->stat_n_leaf_pages;
13948 
13949 	ut_a(stat_n_leaf_pages > 0);
13950 
13951 	local_data_file_length = ulonglong(stat_n_leaf_pages)
13952 		<< srv_page_size_shift;
13953 
13954 	/* Calculate a minimum length for a clustered index record and from
13955 	that an upper bound for the number of rows. Since we only calculate
13956 	new statistics in row0mysql.cc when a table has grown by a threshold
13957 	factor, we must add a safety factor 2 in front of the formula below. */
13958 
13959 	estimate = 2 * local_data_file_length
13960 		/ dict_index_calc_min_rec_len(index);
13961 
13962 	m_prebuilt->trx->op_info = "";
13963 
13964         /* Set num_rows less than MERGEBUFF to simulate the case where we do
13965         not have enough space to merge the externally sorted file blocks. */
13966         DBUG_EXECUTE_IF("set_num_rows_lt_MERGEBUFF",
13967                         estimate = 2;
13968                         DBUG_SET("-d,set_num_rows_lt_MERGEBUFF");
13969                        );
13970 
13971 	DBUG_RETURN((ha_rows) estimate);
13972 }
13973 
13974 /*********************************************************************//**
13975 How many seeks it will take to read through the table. This is to be
13976 comparable to the number returned by records_in_range so that we can
13977 decide if we should scan the table or use keys.
13978 @return estimated time measured in disk seeks */
13979 
13980 double
scan_time()13981 ha_innobase::scan_time()
13982 /*====================*/
13983 {
13984 	/* Since MySQL seems to favor table scans too much over index
13985 	searches, we pretend that a sequential read takes the same time
13986 	as a random disk read, that is, we do not divide the following
13987 	by 10, which would be physically realistic. */
13988 
13989 	/* The locking below is disabled for performance reasons. Without
13990 	it we could end up returning uninitialized value to the caller,
13991 	which in the worst case could make some query plan go bogus or
13992 	issue a Valgrind warning. */
13993 	if (m_prebuilt == NULL) {
13994 		/* In case of derived table, Optimizer will try to fetch stat
13995 		for table even before table is create or open. In such
13996 		cases return default value of 1.
13997 		TODO: This will be further improved to return some approximate
13998 		estimate but that would also needs pre-population of stats
13999 		structure. As of now approach is in sync with MyISAM. */
14000 		return(ulonglong2double(stats.data_file_length) / IO_SIZE + 2);
14001 	}
14002 
14003 	ulint	stat_clustered_index_size;
14004 
14005 	ut_a(m_prebuilt->table->stat_initialized);
14006 
14007 	stat_clustered_index_size =
14008 		m_prebuilt->table->stat_clustered_index_size;
14009 
14010 	return((double) stat_clustered_index_size);
14011 }
14012 
14013 /******************************************************************//**
14014 Calculate the time it takes to read a set of ranges through an index
14015 This enables us to optimise reads for clustered indexes.
14016 @return estimated time measured in disk seeks */
14017 
14018 double
read_time(uint index,uint ranges,ha_rows rows)14019 ha_innobase::read_time(
14020 /*===================*/
14021 	uint	index,	/*!< in: key number */
14022 	uint	ranges,	/*!< in: how many ranges */
14023 	ha_rows rows)	/*!< in: estimated number of rows in the ranges */
14024 {
14025 	ha_rows total_rows;
14026 
14027 	if (index != table->s->primary_key) {
14028 		/* Not clustered */
14029 		return(handler::read_time(index, ranges, rows));
14030 	}
14031 
14032 	/* Assume that the read time is proportional to the scan time for all
14033 	rows + at most one seek per range. */
14034 
14035 	double	time_for_scan = scan_time();
14036 
14037 	if ((total_rows = estimate_rows_upper_bound()) < rows) {
14038 
14039 		return(time_for_scan);
14040 	}
14041 
14042 	return(ranges + (double) rows / (double) total_rows * time_for_scan);
14043 }
14044 
14045 /** Update the system variable with the given value of the InnoDB
14046 buffer pool size.
14047 @param[in]	buf_pool_size	given value of buffer pool size.*/
14048 void
innodb_set_buf_pool_size(ulonglong buf_pool_size)14049 innodb_set_buf_pool_size(ulonglong buf_pool_size)
14050 {
14051 	innobase_buffer_pool_size = buf_pool_size;
14052 }
14053 
14054 /*********************************************************************//**
14055 Calculates the key number used inside MySQL for an Innobase index.
14056 @return the key number used inside MySQL */
14057 static
14058 unsigned
innobase_get_mysql_key_number_for_index(const TABLE * table,dict_table_t * ib_table,const dict_index_t * index)14059 innobase_get_mysql_key_number_for_index(
14060 /*====================================*/
14061 	const TABLE*		table,	/*!< in: table in MySQL data
14062 					dictionary */
14063 	dict_table_t*		ib_table,/*!< in: table in InnoDB data
14064 					dictionary */
14065 	const dict_index_t*	index)	/*!< in: index */
14066 {
14067 	const dict_index_t*	ind;
14068 	unsigned int		i;
14069 
14070 	/* If index does not belong to the table object of share structure
14071 	(ib_table comes from the share structure) search the index->table
14072 	object instead */
14073 	if (index->table != ib_table) {
14074 		i = 0;
14075 		ind = dict_table_get_first_index(index->table);
14076 
14077 		while (index != ind) {
14078 			ind = dict_table_get_next_index(ind);
14079 			i++;
14080 		}
14081 
14082 		if (dict_index_is_auto_gen_clust(index)) {
14083 			ut_a(i > 0);
14084 			i--;
14085 		}
14086 
14087 		return(i);
14088 	}
14089 
14090 	/* Directly find matching index with information from mysql TABLE
14091 	structure and InnoDB dict_index_t list */
14092 	for (i = 0; i < table->s->keys; i++) {
14093 		ind = dict_table_get_index_on_name(
14094 			ib_table, table->key_info[i].name.str);
14095 
14096 		if (index == ind) {
14097 			return(i);
14098 		}
14099 	}
14100 
14101 	/* Loop through each index of the table and lock them */
14102 	for (ind = dict_table_get_first_index(ib_table);
14103 	     ind != NULL;
14104 	     ind = dict_table_get_next_index(ind)) {
14105 		if (index == ind) {
14106 			/* Temp index is internal to InnoDB, that is
14107 			not present in the MySQL index list, so no
14108 			need to print such mismatch warning. */
14109 			if (index->is_committed()) {
14110 				sql_print_warning(
14111 					"Found index %s in InnoDB index list"
14112 					" but not its MariaDB index number."
14113 					" It could be an InnoDB internal"
14114 					" index.",
14115 					index->name());
14116 			}
14117 			return(~0U);
14118 		}
14119 	}
14120 
14121 	ut_error;
14122 
14123 	return(~0U);
14124 }
14125 
14126 /*********************************************************************//**
14127 Calculate Record Per Key value. Need to exclude the NULL value if
14128 innodb_stats_method is set to "nulls_ignored"
14129 @return estimated record per key value */
14130 rec_per_key_t
innodb_rec_per_key(dict_index_t * index,ulint i,ha_rows records)14131 innodb_rec_per_key(
14132 /*===============*/
14133 	dict_index_t*	index,		/*!< in: dict_index_t structure */
14134 	ulint		i,		/*!< in: the column we are
14135 					calculating rec per key */
14136 	ha_rows		records)	/*!< in: estimated total records */
14137 {
14138 	rec_per_key_t	rec_per_key;
14139 	ib_uint64_t	n_diff;
14140 
14141 	ut_a(index->table->stat_initialized);
14142 
14143 	ut_ad(i < dict_index_get_n_unique(index));
14144 	ut_ad(!dict_index_is_spatial(index));
14145 
14146 	if (records == 0) {
14147 		/* "Records per key" is meaningless for empty tables.
14148 		Return 1.0 because that is most convenient to the Optimizer. */
14149 		return(1.0);
14150 	}
14151 
14152 	n_diff = index->stat_n_diff_key_vals[i];
14153 
14154 	if (n_diff == 0) {
14155 
14156 		rec_per_key = static_cast<rec_per_key_t>(records);
14157 	} else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) {
14158 		ib_uint64_t	n_null;
14159 		ib_uint64_t	n_non_null;
14160 
14161 		n_non_null = index->stat_n_non_null_key_vals[i];
14162 
14163 		/* In theory, index->stat_n_non_null_key_vals[i]
14164 		should always be less than the number of records.
14165 		Since this is statistics value, the value could
14166 		have slight discrepancy. But we will make sure
14167 		the number of null values is not a negative number. */
14168 		if (records < n_non_null) {
14169 			n_null = 0;
14170 		} else {
14171 			n_null = records - n_non_null;
14172 		}
14173 
14174 		/* If the number of NULL values is the same as or
14175 		larger than that of the distinct values, we could
14176 		consider that the table consists mostly of NULL value.
14177 		Set rec_per_key to 1. */
14178 		if (n_diff <= n_null) {
14179 			rec_per_key = 1.0;
14180 		} else {
14181 			/* Need to exclude rows with NULL values from
14182 			rec_per_key calculation */
14183 			rec_per_key
14184 				= static_cast<rec_per_key_t>(records - n_null)
14185 				/ (n_diff - n_null);
14186 		}
14187 	} else {
14188 		DEBUG_SYNC_C("after_checking_for_0");
14189 		rec_per_key = static_cast<rec_per_key_t>(records) / n_diff;
14190 	}
14191 
14192 	if (rec_per_key < 1.0) {
14193 		/* Values below 1.0 are meaningless and must be due to the
14194 		stats being imprecise. */
14195 		rec_per_key = 1.0;
14196 	}
14197 
14198 	return(rec_per_key);
14199 }
14200 
14201 /** Calculate how many KiB of new data we will be able to insert to the
14202 tablespace without running out of space. Start with a space object that has
14203 been acquired by the caller who holds it for the calculation,
14204 @param[in]	space		tablespace object from fil_space_acquire()
14205 @return available space in KiB */
14206 static uintmax_t
fsp_get_available_space_in_free_extents(const fil_space_t & space)14207 fsp_get_available_space_in_free_extents(const fil_space_t& space)
14208 {
14209 	ulint	size_in_header = space.size_in_header;
14210 	if (size_in_header < FSP_EXTENT_SIZE) {
14211 		return 0;		/* TODO: count free frag pages and
14212 					return a value based on that */
14213 	}
14214 
14215 	/* Below we play safe when counting free extents above the free limit:
14216 	some of them will contain extent descriptor pages, and therefore
14217 	will not be free extents */
14218 	ut_ad(size_in_header >= space.free_limit);
14219 	ulint	n_free_up =
14220 		(size_in_header - space.free_limit) / FSP_EXTENT_SIZE;
14221 
14222 	const ulint size = space.physical_size();
14223 	if (n_free_up > 0) {
14224 		n_free_up--;
14225 		n_free_up -= n_free_up / (size / FSP_EXTENT_SIZE);
14226 	}
14227 
14228 	/* We reserve 1 extent + 0.5 % of the space size to undo logs
14229 	and 1 extent + 0.5 % to cleaning operations; NOTE: this source
14230 	code is duplicated in the function above! */
14231 
14232 	ulint	reserve = 2 + ((size_in_header / FSP_EXTENT_SIZE) * 2) / 200;
14233 	ulint	n_free = space.free_len + n_free_up;
14234 
14235 	if (reserve > n_free) {
14236 		return(0);
14237 	}
14238 
14239 	return(static_cast<uintmax_t>(n_free - reserve)
14240 	       * FSP_EXTENT_SIZE * (size / 1024));
14241 }
14242 
14243 /*********************************************************************//**
14244 Returns statistics information of the table to the MySQL interpreter,
14245 in various fields of the handle object.
14246 @return HA_ERR_* error code or 0 */
14247 
14248 int
info_low(uint flag,bool is_analyze)14249 ha_innobase::info_low(
14250 /*==================*/
14251 	uint	flag,	/*!< in: what information is requested */
14252 	bool	is_analyze)
14253 {
14254 	dict_table_t*	ib_table;
14255 	ib_uint64_t	n_rows;
14256 	char		path[FN_REFLEN];
14257 	os_file_stat_t	stat_info;
14258 
14259 	DBUG_ENTER("info");
14260 
14261 	DEBUG_SYNC_C("ha_innobase_info_low");
14262 
14263 	ut_ad(!mutex_own(&dict_sys.mutex));
14264 
14265 	/* If we are forcing recovery at a high level, we will suppress
14266 	statistics calculation on tables, because that may crash the
14267 	server if an index is badly corrupted. */
14268 
14269 	/* We do not know if MySQL can call this function before calling
14270 	external_lock(). To be safe, update the thd of the current table
14271 	handle. */
14272 
14273 	update_thd(ha_thd());
14274 
14275 	m_prebuilt->trx->op_info = "returning various info to MariaDB";
14276 
14277 	ib_table = m_prebuilt->table;
14278 	DBUG_ASSERT(ib_table->get_ref_count() > 0);
14279 
14280 	if (!ib_table->is_readable()) {
14281 		ib_table->stat_initialized = true;
14282 	}
14283 
14284 	if (flag & HA_STATUS_TIME) {
14285 		if (is_analyze || innobase_stats_on_metadata) {
14286 
14287 			dict_stats_upd_option_t	opt;
14288 			dberr_t			ret;
14289 
14290 			m_prebuilt->trx->op_info = "updating table statistics";
14291 
14292 			if (dict_stats_is_persistent_enabled(ib_table)) {
14293 
14294 				if (is_analyze) {
14295 					row_mysql_lock_data_dictionary(
14296 						m_prebuilt->trx);
14297 					dict_stats_recalc_pool_del(ib_table);
14298 					dict_stats_wait_bg_to_stop_using_table(
14299 						ib_table, m_prebuilt->trx);
14300 					row_mysql_unlock_data_dictionary(
14301 						m_prebuilt->trx);
14302 					opt = DICT_STATS_RECALC_PERSISTENT;
14303 				} else {
14304 					/* This is e.g. 'SHOW INDEXES', fetch
14305 					the persistent stats from disk. */
14306 					opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
14307 				}
14308 			} else {
14309 				opt = DICT_STATS_RECALC_TRANSIENT;
14310 			}
14311 
14312 			ret = dict_stats_update(ib_table, opt);
14313 
14314 			if (opt == DICT_STATS_RECALC_PERSISTENT) {
14315 				mutex_enter(&dict_sys.mutex);
14316 				ib_table->stats_bg_flag
14317 					&= byte(~BG_STAT_SHOULD_QUIT);
14318 				mutex_exit(&dict_sys.mutex);
14319 			}
14320 
14321 			if (ret != DB_SUCCESS) {
14322 				m_prebuilt->trx->op_info = "";
14323 				DBUG_RETURN(HA_ERR_GENERIC);
14324 			}
14325 
14326 			m_prebuilt->trx->op_info =
14327 				"returning various info to MariaDB";
14328 		}
14329 
14330 
14331 		stats.update_time = (ulong) ib_table->update_time;
14332 	}
14333 
14334 	DBUG_EXECUTE_IF("dict_sys_mutex_avoid", goto func_exit;);
14335 
14336 	dict_stats_init(ib_table);
14337 
14338 	if (flag & HA_STATUS_VARIABLE) {
14339 
14340 		ulint	stat_clustered_index_size;
14341 		ulint	stat_sum_of_other_index_sizes;
14342 
14343 		mutex_enter(&dict_sys.mutex);
14344 
14345 		ut_a(ib_table->stat_initialized);
14346 
14347 		n_rows = ib_table->stat_n_rows;
14348 
14349 		stat_clustered_index_size
14350 			= ib_table->stat_clustered_index_size;
14351 
14352 		stat_sum_of_other_index_sizes
14353 			= ib_table->stat_sum_of_other_index_sizes;
14354 
14355 		mutex_exit(&dict_sys.mutex);
14356 
14357 		/*
14358 		The MySQL optimizer seems to assume in a left join that n_rows
14359 		is an accurate estimate if it is zero. Of course, it is not,
14360 		since we do not have any locks on the rows yet at this phase.
14361 		Since SHOW TABLE STATUS seems to call this function with the
14362 		HA_STATUS_TIME flag set, while the left join optimizer does not
14363 		set that flag, we add one to a zero value if the flag is not
14364 		set. That way SHOW TABLE STATUS will show the best estimate,
14365 		while the optimizer never sees the table empty. */
14366 
14367 		if (n_rows == 0 && !(flag & (HA_STATUS_TIME | HA_STATUS_OPEN))) {
14368 			n_rows++;
14369 		}
14370 
14371 		/* Fix bug#40386: Not flushing query cache after truncate.
14372 		n_rows can not be 0 unless the table is empty, set to 1
14373 		instead. The original problem of bug#29507 is actually
14374 		fixed in the server code. */
14375 		if (thd_sql_command(m_user_thd) == SQLCOM_TRUNCATE) {
14376 
14377 			n_rows = 1;
14378 
14379 			/* We need to reset the m_prebuilt value too, otherwise
14380 			checks for values greater than the last value written
14381 			to the table will fail and the autoinc counter will
14382 			not be updated. This will force write_row() into
14383 			attempting an update of the table's AUTOINC counter. */
14384 
14385 			m_prebuilt->autoinc_last_value = 0;
14386 		}
14387 
14388 		stats.records = (ha_rows) n_rows;
14389 		stats.deleted = 0;
14390 		if (fil_space_t* space = ib_table->space) {
14391 			const ulint size = space->physical_size();
14392 			stats.data_file_length
14393 				= ulonglong(stat_clustered_index_size)
14394 				* size;
14395 			stats.index_file_length
14396 				= ulonglong(stat_sum_of_other_index_sizes)
14397 				* size;
14398 			stats.delete_length = 1024
14399 				* fsp_get_available_space_in_free_extents(
14400 					*space);
14401 		}
14402 		stats.check_time = 0;
14403 		stats.mrr_length_per_rec= (uint)ref_length +  8; // 8 = max(sizeof(void *));
14404 
14405 		if (stats.records == 0) {
14406 			stats.mean_rec_length = 0;
14407 		} else {
14408 			stats.mean_rec_length = (ulong)
14409 				(stats.data_file_length / stats.records);
14410 		}
14411 	}
14412 
14413 	if (flag & HA_STATUS_CONST) {
14414 		ulong	i;
14415 		/* Verify the number of index in InnoDB and MySQL
14416 		matches up. If m_prebuilt->clust_index_was_generated
14417 		holds, InnoDB defines GEN_CLUST_INDEX internally */
14418 		ulint	num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes)
14419 			- m_prebuilt->clust_index_was_generated;
14420 		if (table->s->keys < num_innodb_index) {
14421 			/* If there are too many indexes defined
14422 			inside InnoDB, ignore those that are being
14423 			created, because MySQL will only consider
14424 			the fully built indexes here. */
14425 
14426 			for (const dict_index_t* index
14427 				     = UT_LIST_GET_FIRST(ib_table->indexes);
14428 			     index != NULL;
14429 			     index = UT_LIST_GET_NEXT(indexes, index)) {
14430 
14431 				/* First, online index creation is
14432 				completed inside InnoDB, and then
14433 				MySQL attempts to upgrade the
14434 				meta-data lock so that it can rebuild
14435 				the .frm file. If we get here in that
14436 				time frame, dict_index_is_online_ddl()
14437 				would not hold and the index would
14438 				still not be included in TABLE_SHARE. */
14439 				if (!index->is_committed()) {
14440 					num_innodb_index--;
14441 				}
14442 			}
14443 
14444 			if (table->s->keys < num_innodb_index
14445 			    && innobase_fts_check_doc_id_index(
14446 				    ib_table, NULL, NULL)
14447 			    == FTS_EXIST_DOC_ID_INDEX) {
14448 				num_innodb_index--;
14449 			}
14450 		}
14451 
14452 		if (table->s->keys != num_innodb_index) {
14453 			ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
14454 			ib_push_frm_error(m_user_thd, ib_table, table, num_innodb_index, true);
14455 		}
14456 
14457 		snprintf(path, sizeof(path), "%s/%s%s",
14458 			 mysql_data_home, table->s->normalized_path.str,
14459 			 reg_ext);
14460 
14461 		unpack_filename(path,path);
14462 
14463 		/* Note that we do not know the access time of the table,
14464 		nor the CHECK TABLE time, nor the UPDATE or INSERT time. */
14465 
14466 		if (os_file_get_status(
14467 			    path, &stat_info, false,
14468 			    srv_read_only_mode) == DB_SUCCESS) {
14469 			stats.create_time = (ulong) stat_info.ctime;
14470 		}
14471 
14472 		struct Locking {
14473 			Locking() { mutex_enter(&dict_sys.mutex); }
14474 			~Locking() { mutex_exit(&dict_sys.mutex); }
14475 		} locking;
14476 
14477 		ut_a(ib_table->stat_initialized);
14478 
14479 		for (i = 0; i < table->s->keys; i++) {
14480 			ulong	j;
14481 
14482 			dict_index_t* index = innobase_get_index(i);
14483 
14484 			if (index == NULL) {
14485 				ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
14486 				ib_push_frm_error(m_user_thd, ib_table, table, num_innodb_index, true);
14487 				break;
14488 			}
14489 
14490 			KEY*	key = &table->key_info[i];
14491 
14492 			for (j = 0; j < key->ext_key_parts; j++) {
14493 
14494 				if ((key->flags & HA_FULLTEXT)
14495 				    || (key->flags & HA_SPATIAL)) {
14496 
14497 					/* The record per key does not apply to
14498 					FTS or Spatial indexes. */
14499 				/*
14500 					key->rec_per_key[j] = 1;
14501 					key->set_records_per_key(j, 1.0);
14502 				*/
14503 					continue;
14504 				}
14505 
14506 				if (j + 1 > index->n_uniq) {
14507 					sql_print_error(
14508 						"Index %s of %s has %u columns"
14509 					        " unique inside InnoDB, but "
14510 						"MySQL is asking statistics for"
14511 					        " %lu columns. Have you mixed "
14512 						"up .frm files from different "
14513 						" installations? %s",
14514 						index->name(),
14515 						ib_table->name.m_name,
14516 						index->n_uniq, j + 1,
14517 						TROUBLESHOOTING_MSG);
14518 					break;
14519 				}
14520 
14521 				/* innodb_rec_per_key() will use
14522 				index->stat_n_diff_key_vals[] and the value we
14523 				pass index->table->stat_n_rows. Both are
14524 				calculated by ANALYZE and by the background
14525 				stats gathering thread (which kicks in when too
14526 				much of the table has been changed). In
14527 				addition table->stat_n_rows is adjusted with
14528 				each DML (e.g. ++ on row insert). Those
14529 				adjustments are not MVCC'ed and not even
14530 				reversed on rollback. So,
14531 				index->stat_n_diff_key_vals[] and
14532 				index->table->stat_n_rows could have been
14533 				calculated at different time. This is
14534 				acceptable. */
14535 
14536 				ulong	rec_per_key_int = static_cast<ulong>(
14537 					innodb_rec_per_key(index, j,
14538 							   stats.records));
14539 
14540 				/* Since MySQL seems to favor table scans
14541 				too much over index searches, we pretend
14542 				index selectivity is 2 times better than
14543 				our estimate: */
14544 
14545 				rec_per_key_int = rec_per_key_int / 2;
14546 
14547 				if (rec_per_key_int == 0) {
14548 					rec_per_key_int = 1;
14549 				}
14550 
14551 				key->rec_per_key[j] = rec_per_key_int;
14552 			}
14553 		}
14554 	}
14555 
14556 	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
14557 
14558 		goto func_exit;
14559 
14560 	} else if (flag & HA_STATUS_ERRKEY) {
14561 		const dict_index_t*	err_index;
14562 
14563 		ut_a(m_prebuilt->trx);
14564 		ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
14565 
14566 		err_index = trx_get_error_info(m_prebuilt->trx);
14567 
14568 		if (err_index) {
14569 			errkey = innobase_get_mysql_key_number_for_index(
14570 					table, ib_table, err_index);
14571 		} else {
14572 			errkey = (unsigned int) (
14573 				(m_prebuilt->trx->error_key_num
14574 				 == ULINT_UNDEFINED)
14575 					? ~0U
14576 					: m_prebuilt->trx->error_key_num);
14577 		}
14578 	}
14579 
14580 	if ((flag & HA_STATUS_AUTO) && table->found_next_number_field) {
14581 		stats.auto_increment_value = innobase_peek_autoinc();
14582 	}
14583 
14584 func_exit:
14585 	m_prebuilt->trx->op_info = (char*)"";
14586 
14587 	DBUG_RETURN(0);
14588 }
14589 
14590 /*********************************************************************//**
14591 Returns statistics information of the table to the MySQL interpreter,
14592 in various fields of the handle object.
14593 @return HA_ERR_* error code or 0 */
14594 
14595 int
info(uint flag)14596 ha_innobase::info(
14597 /*==============*/
14598 	uint	flag)	/*!< in: what information is requested */
14599 {
14600 	return(info_low(flag, false /* not ANALYZE */));
14601 }
14602 
14603 /*
14604 Updates index cardinalities of the table, based on random dives into
14605 each index tree. This does NOT calculate exact statistics on the table.
14606 @return HA_ADMIN_* error code or HA_ADMIN_OK */
14607 
14608 int
analyze(THD *,HA_CHECK_OPT *)14609 ha_innobase::analyze(THD*, HA_CHECK_OPT*)
14610 {
14611 	/* Simply call info_low() with all the flags
14612 	and request recalculation of the statistics */
14613 	int	ret = info_low(
14614 		HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE,
14615 		true /* this is ANALYZE */);
14616 
14617 	if (ret != 0) {
14618 		return(HA_ADMIN_FAILED);
14619 	}
14620 
14621 	return(HA_ADMIN_OK);
14622 }
14623 
14624 /*****************************************************************//**
14625 Defragment table.
14626 @return	error number */
defragment_table(const char * name)14627 inline int ha_innobase::defragment_table(const char *name)
14628 {
14629 	char		norm_name[FN_REFLEN];
14630 	dict_table_t*	table = NULL;
14631 	dict_index_t*	index = NULL;
14632 	int		ret = 0;
14633 	dberr_t		err = DB_SUCCESS;
14634 
14635 	normalize_table_name(norm_name, name);
14636 
14637 	table = dict_table_open_on_name(norm_name, FALSE,
14638 		FALSE, DICT_ERR_IGNORE_FK_NOKEY);
14639 
14640 	for (index = dict_table_get_first_index(table); index;
14641 	     index = dict_table_get_next_index(index)) {
14642 
14643 		if (index->is_corrupted()) {
14644 			continue;
14645 		}
14646 
14647 		if (dict_index_is_spatial(index)) {
14648 			/* Do not try to defragment spatial indexes,
14649 			because doing it properly would require
14650 			appropriate logic around the SSN (split
14651 			sequence number). */
14652 			continue;
14653 		}
14654 
14655 		if (index->page == FIL_NULL) {
14656 			/* Do not defragment auxiliary tables related
14657 			to FULLTEXT INDEX. */
14658 			ut_ad(index->type & DICT_FTS);
14659 			continue;
14660 		}
14661 
14662 		if (btr_defragment_find_index(index)) {
14663 			// We borrow this error code. When the same index is
14664 			// already in the defragmentation queue, issue another
14665 			// defragmentation only introduces overhead. We return
14666 			// an error here to let the user know this is not
14667 			// necessary. Note that this will fail a query that's
14668 			// trying to defragment a full table if one of the
14669 			// indicies in that table is already in defragmentation.
14670 			// We choose this behavior so user is aware of this
14671 			// rather than silently defragment other indicies of
14672 			// that table.
14673 			ret = ER_SP_ALREADY_EXISTS;
14674 			break;
14675 		}
14676 
14677 		os_event_t event = btr_defragment_add_index(index, &err);
14678 
14679 		if (err != DB_SUCCESS) {
14680 			push_warning_printf(
14681 				current_thd,
14682 				Sql_condition::WARN_LEVEL_WARN,
14683 				ER_NO_SUCH_TABLE,
14684 				"Table %s is encrypted but encryption service or"
14685 				" used key_id is not available. "
14686 				" Can't continue checking table.",
14687 				index->table->name.m_name);
14688 
14689 			ret = convert_error_code_to_mysql(err, 0, current_thd);
14690 			break;
14691 		}
14692 
14693 		if (event) {
14694 			while(os_event_wait_time(event, 1000000)) {
14695 				if (thd_killed(current_thd)) {
14696 					btr_defragment_remove_index(index);
14697 					ret = ER_QUERY_INTERRUPTED;
14698 					break;
14699 				}
14700 			}
14701 			os_event_destroy(event);
14702 		}
14703 
14704 		if (ret) {
14705 			break;
14706 		}
14707 	}
14708 
14709 	dict_table_close(table, FALSE, FALSE);
14710 	return ret;
14711 }
14712 
14713 /**********************************************************************//**
14714 This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
14715 the table in MySQL. */
14716 
14717 int
optimize(THD * thd,HA_CHECK_OPT *)14718 ha_innobase::optimize(
14719 /*==================*/
14720 	THD*		thd,		/*!< in: connection thread handle */
14721 	HA_CHECK_OPT*)
14722 {
14723 
14724 	/* FTS-FIXME: Since MySQL doesn't support engine-specific commands,
14725 	we have to hijack some existing command in order to be able to test
14726 	the new admin commands added in InnoDB's FTS support. For now, we
14727 	use MySQL's OPTIMIZE command, normally mapped to ALTER TABLE in
14728 	InnoDB (so it recreates the table anew), and map it to OPTIMIZE.
14729 
14730 	This works OK otherwise, but MySQL locks the entire table during
14731 	calls to OPTIMIZE, which is undesirable. */
14732 	bool try_alter = true;
14733 
14734 	if (!m_prebuilt->table->is_temporary() && srv_defragment) {
14735 		int err = defragment_table(m_prebuilt->table->name.m_name);
14736 
14737 		if (err == 0) {
14738 			try_alter = false;
14739 		} else {
14740 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
14741 					    uint(err),
14742 				"InnoDB: Cannot defragment table %s: returned error code %d\n",
14743 				m_prebuilt->table->name.m_name, err);
14744 
14745 			if(err == ER_SP_ALREADY_EXISTS) {
14746 				try_alter = false;
14747 			}
14748 		}
14749 	}
14750 
14751 	if (innodb_optimize_fulltext_only) {
14752 		if (m_prebuilt->table->fts && m_prebuilt->table->fts->cache
14753 		    && m_prebuilt->table->space) {
14754 			fts_sync_table(m_prebuilt->table);
14755 			fts_optimize_table(m_prebuilt->table);
14756 		}
14757 		try_alter = false;
14758 	}
14759 
14760 	return try_alter ? HA_ADMIN_TRY_ALTER : HA_ADMIN_OK;
14761 }
14762 
14763 /*******************************************************************//**
14764 Tries to check that an InnoDB table is not corrupted. If corruption is
14765 noticed, prints to stderr information about it. In case of corruption
14766 may also assert a failure and crash the server.
14767 @return HA_ADMIN_CORRUPT or HA_ADMIN_OK */
14768 
14769 int
check(THD * thd,HA_CHECK_OPT * check_opt)14770 ha_innobase::check(
14771 /*===============*/
14772 	THD*		thd,		/*!< in: user thread handle */
14773 	HA_CHECK_OPT*	check_opt)	/*!< in: check options */
14774 {
14775 	dict_index_t*	index;
14776 	ulint		n_rows;
14777 	ulint		n_rows_in_table	= ULINT_UNDEFINED;
14778 	bool		is_ok		= true;
14779 	ulint		old_isolation_level;
14780 	dberr_t		ret;
14781 
14782 	DBUG_ENTER("ha_innobase::check");
14783 	DBUG_ASSERT(thd == ha_thd());
14784 	ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
14785 	ut_a(m_prebuilt->trx == thd_to_trx(thd));
14786 
14787 	if (m_prebuilt->mysql_template == NULL) {
14788 		/* Build the template; we will use a dummy template
14789 		in index scans done in checking */
14790 
14791 		build_template(true);
14792 	}
14793 
14794 	if (!m_prebuilt->table->space) {
14795 
14796 		ib_senderrf(
14797 			thd,
14798 			IB_LOG_LEVEL_ERROR,
14799 			ER_TABLESPACE_DISCARDED,
14800 			table->s->table_name.str);
14801 
14802 		DBUG_RETURN(HA_ADMIN_CORRUPT);
14803 
14804 	} else if (!m_prebuilt->table->is_readable() &&
14805 		   !m_prebuilt->table->space) {
14806 
14807 		ib_senderrf(
14808 			thd, IB_LOG_LEVEL_ERROR,
14809 			ER_TABLESPACE_MISSING,
14810 			table->s->table_name.str);
14811 
14812 		DBUG_RETURN(HA_ADMIN_CORRUPT);
14813 	}
14814 
14815 	m_prebuilt->trx->op_info = "checking table";
14816 
14817 	if (m_prebuilt->table->corrupted) {
14818 		/* If some previous operation has marked the table as
14819 		corrupted in memory, and has not propagated such to
14820 		clustered index, we will do so here */
14821 		index = dict_table_get_first_index(m_prebuilt->table);
14822 
14823 		if (!index->is_corrupted()) {
14824 			dict_set_corrupted(
14825 				index, m_prebuilt->trx, "CHECK TABLE");
14826 		}
14827 
14828 		push_warning_printf(m_user_thd,
14829 				    Sql_condition::WARN_LEVEL_WARN,
14830 				    HA_ERR_INDEX_CORRUPT,
14831 				    "InnoDB: Index %s is marked as"
14832 				    " corrupted",
14833 				    index->name());
14834 
14835 		/* Now that the table is already marked as corrupted,
14836 		there is no need to check any index of this table */
14837 		m_prebuilt->trx->op_info = "";
14838 
14839 		DBUG_RETURN(HA_ADMIN_CORRUPT);
14840 	}
14841 
14842 	old_isolation_level = m_prebuilt->trx->isolation_level;
14843 
14844 	/* We must run the index record counts at an isolation level
14845 	>= READ COMMITTED, because a dirty read can see a wrong number
14846 	of records in some index; to play safe, we normally use
14847 	REPEATABLE READ here */
14848 	m_prebuilt->trx->isolation_level = high_level_read_only
14849 		? TRX_ISO_READ_UNCOMMITTED
14850 		: TRX_ISO_REPEATABLE_READ;
14851 
14852 	ut_ad(!m_prebuilt->table->corrupted);
14853 
14854 	for (index = dict_table_get_first_index(m_prebuilt->table);
14855 	     index != NULL;
14856 	     index = dict_table_get_next_index(index)) {
14857 		/* If this is an index being created or dropped, skip */
14858 		if (!index->is_committed()) {
14859 			continue;
14860 		}
14861 
14862 		if (!(check_opt->flags & T_QUICK)
14863 		    && !index->is_corrupted()) {
14864 
14865 			dberr_t err = btr_validate_index(
14866 					index, m_prebuilt->trx);
14867 
14868 			if (err != DB_SUCCESS) {
14869 				is_ok = false;
14870 
14871 				if (err == DB_DECRYPTION_FAILED) {
14872 					push_warning_printf(
14873 						thd,
14874 						Sql_condition::WARN_LEVEL_WARN,
14875 						ER_NO_SUCH_TABLE,
14876 						"Table %s is encrypted but encryption service or"
14877 						" used key_id is not available. "
14878 						" Can't continue checking table.",
14879 						index->table->name.m_name);
14880 				} else {
14881 					push_warning_printf(
14882 						thd,
14883 						Sql_condition::WARN_LEVEL_WARN,
14884 						ER_NOT_KEYFILE,
14885 						"InnoDB: The B-tree of"
14886 						" index %s is corrupted.",
14887 						index->name());
14888 				}
14889 
14890 				continue;
14891 			}
14892 		}
14893 
14894 		/* Instead of invoking change_active_index(), set up
14895 		a dummy template for non-locking reads, disabling
14896 		access to the clustered index. */
14897 		m_prebuilt->index = index;
14898 
14899 		m_prebuilt->index_usable = row_merge_is_index_usable(
14900 			m_prebuilt->trx, m_prebuilt->index);
14901 
14902 		DBUG_EXECUTE_IF(
14903 			"dict_set_index_corrupted",
14904 			if (!index->is_primary()) {
14905 				m_prebuilt->index_usable = FALSE;
14906 				// row_mysql_lock_data_dictionary(m_prebuilt->trx);
14907 				dict_set_corrupted(index, m_prebuilt->trx, "dict_set_index_corrupted");
14908 				// row_mysql_unlock_data_dictionary(m_prebuilt->trx);
14909 			});
14910 
14911 		if (UNIV_UNLIKELY(!m_prebuilt->index_usable)) {
14912 			if (index->is_corrupted()) {
14913 				push_warning_printf(
14914 					m_user_thd,
14915 					Sql_condition::WARN_LEVEL_WARN,
14916 					HA_ERR_INDEX_CORRUPT,
14917 					"InnoDB: Index %s is marked as"
14918 					" corrupted",
14919 					index->name());
14920 				is_ok = false;
14921 			} else {
14922 				push_warning_printf(
14923 					m_user_thd,
14924 					Sql_condition::WARN_LEVEL_WARN,
14925 					HA_ERR_TABLE_DEF_CHANGED,
14926 					"InnoDB: Insufficient history for"
14927 					" index %s",
14928 					index->name());
14929 			}
14930 			continue;
14931 		}
14932 
14933 		m_prebuilt->sql_stat_start = TRUE;
14934 		m_prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
14935 		m_prebuilt->n_template = 0;
14936 		m_prebuilt->need_to_access_clustered = FALSE;
14937 
14938 		dtuple_set_n_fields(m_prebuilt->search_tuple, 0);
14939 
14940 		m_prebuilt->select_lock_type = LOCK_NONE;
14941 
14942 		/* Scan this index. */
14943 		if (dict_index_is_spatial(index)) {
14944 			ret = row_count_rtree_recs(m_prebuilt, &n_rows);
14945 		} else {
14946 			ret = row_scan_index_for_mysql(
14947 				m_prebuilt, index, &n_rows);
14948 		}
14949 
14950 		DBUG_EXECUTE_IF(
14951 			"dict_set_index_corrupted",
14952 			if (!index->is_primary()) {
14953 				ret = DB_CORRUPTION;
14954 			});
14955 
14956 		if (ret == DB_INTERRUPTED || thd_killed(m_user_thd)) {
14957 			/* Do not report error since this could happen
14958 			during shutdown */
14959 			break;
14960 		}
14961 		if (ret != DB_SUCCESS) {
14962 			/* Assume some kind of corruption. */
14963 			push_warning_printf(
14964 				thd, Sql_condition::WARN_LEVEL_WARN,
14965 				ER_NOT_KEYFILE,
14966 				"InnoDB: The B-tree of"
14967 				" index %s is corrupted.",
14968 				index->name());
14969 			is_ok = false;
14970 			dict_set_corrupted(
14971 				index, m_prebuilt->trx, "CHECK TABLE-check index");
14972 		}
14973 
14974 
14975 		if (index == dict_table_get_first_index(m_prebuilt->table)) {
14976 			n_rows_in_table = n_rows;
14977 		} else if (!(index->type & DICT_FTS)
14978 			   && (n_rows != n_rows_in_table)) {
14979 			push_warning_printf(
14980 				thd, Sql_condition::WARN_LEVEL_WARN,
14981 				ER_NOT_KEYFILE,
14982 				"InnoDB: Index '%-.200s' contains " ULINTPF
14983 				" entries, should be " ULINTPF ".",
14984 				index->name(), n_rows, n_rows_in_table);
14985 			is_ok = false;
14986 			dict_set_corrupted(
14987 				index, m_prebuilt->trx,
14988 				"CHECK TABLE; Wrong count");
14989 		}
14990 	}
14991 
14992 	/* Restore the original isolation level */
14993 	m_prebuilt->trx->isolation_level = old_isolation_level;
14994 #ifdef BTR_CUR_HASH_ADAPT
14995 # if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
14996 	/* We validate the whole adaptive hash index for all tables
14997 	at every CHECK TABLE only when QUICK flag is not present. */
14998 
14999 	if (!(check_opt->flags & T_QUICK) && !btr_search_validate()) {
15000 		push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
15001 			     ER_NOT_KEYFILE,
15002 			     "InnoDB: The adaptive hash index is corrupted.");
15003 		is_ok = false;
15004 	}
15005 # endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
15006 #endif /* BTR_CUR_HASH_ADAPT */
15007 	m_prebuilt->trx->op_info = "";
15008 
15009 	DBUG_RETURN(is_ok ? HA_ADMIN_OK : HA_ADMIN_CORRUPT);
15010 }
15011 
15012 /*******************************************************************//**
15013 Gets the foreign key create info for a table stored in InnoDB.
15014 @return own: character string in the form which can be inserted to the
15015 CREATE TABLE statement, MUST be freed with
15016 ha_innobase::free_foreign_key_create_info */
15017 
15018 char*
get_foreign_key_create_info(void)15019 ha_innobase::get_foreign_key_create_info(void)
15020 /*==========================================*/
15021 {
15022 	ut_a(m_prebuilt != NULL);
15023 
15024 	/* We do not know if MySQL can call this function before calling
15025 	external_lock(). To be safe, update the thd of the current table
15026 	handle. */
15027 
15028 	update_thd(ha_thd());
15029 
15030 	m_prebuilt->trx->op_info = "getting info on foreign keys";
15031 
15032 	/* Output the data to a temporary string */
15033 	std::string str = dict_print_info_on_foreign_keys(
15034 		TRUE, m_prebuilt->trx,
15035 		m_prebuilt->table);
15036 
15037 	m_prebuilt->trx->op_info = "";
15038 
15039 	/* Allocate buffer for the string */
15040 	char* fk_str = (char*) my_malloc(str.length() + 1, MYF(0));
15041 
15042 	/* JAN: TODO: MySQL 5.7
15043 	fk_str = reinterpret_cast<char*>(
15044 			my_malloc(PSI_INSTRUMENT_ME, str.length() + 1, MYF(0)));
15045 	*/
15046 
15047 
15048 
15049 	if (fk_str) {
15050 		memcpy(fk_str, str.c_str(), str.length());
15051 		fk_str[str.length()]='\0';
15052 	}
15053 
15054 	return(fk_str);
15055 }
15056 
15057 
15058 /***********************************************************************//**
15059 Maps a InnoDB foreign key constraint to a equivalent MySQL foreign key info.
15060 @return pointer to foreign key info */
15061 static
15062 FOREIGN_KEY_INFO*
get_foreign_key_info(THD * thd,dict_foreign_t * foreign)15063 get_foreign_key_info(
15064 /*=================*/
15065 	THD*			thd,	/*!< in: user thread handle */
15066 	dict_foreign_t*		foreign)/*!< in: foreign key constraint */
15067 {
15068 	FOREIGN_KEY_INFO	f_key_info;
15069 	FOREIGN_KEY_INFO*	pf_key_info;
15070 	uint			i = 0;
15071 	size_t			len;
15072 	char			tmp_buff[NAME_LEN+1];
15073 	char			name_buff[NAME_LEN+1];
15074 	const char*		ptr;
15075 	LEX_CSTRING*		referenced_key_name;
15076 	LEX_CSTRING*		name = NULL;
15077 
15078 	if (dict_table_t::is_temporary_name(foreign->foreign_table_name)) {
15079 		return NULL;
15080 	}
15081 
15082 	ptr = dict_remove_db_name(foreign->id);
15083 	f_key_info.foreign_id = thd_make_lex_string(
15084 		thd, 0, ptr, strlen(ptr), 1);
15085 
15086 	/* Name format: database name, '/', table name, '\0' */
15087 
15088 	/* Referenced (parent) database name */
15089 	len = dict_get_db_name_len(foreign->referenced_table_name);
15090 	ut_a(len < sizeof(tmp_buff));
15091 	ut_memcpy(tmp_buff, foreign->referenced_table_name, len);
15092 	tmp_buff[len] = 0;
15093 
15094 	len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
15095 	f_key_info.referenced_db = thd_make_lex_string(
15096 		thd, 0, name_buff, len, 1);
15097 
15098 	/* Referenced (parent) table name */
15099 	ptr = dict_remove_db_name(foreign->referenced_table_name);
15100 	len = filename_to_tablename(ptr, name_buff, sizeof(name_buff), 1);
15101 	f_key_info.referenced_table = thd_make_lex_string(
15102 		thd, 0, name_buff, len, 1);
15103 
15104 	/* Dependent (child) database name */
15105 	len = dict_get_db_name_len(foreign->foreign_table_name);
15106 	ut_a(len < sizeof(tmp_buff));
15107 	ut_memcpy(tmp_buff, foreign->foreign_table_name, len);
15108 	tmp_buff[len] = 0;
15109 
15110 	len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
15111 	f_key_info.foreign_db = thd_make_lex_string(
15112 		thd, 0, name_buff, len, 1);
15113 
15114 	/* Dependent (child) table name */
15115 	ptr = dict_remove_db_name(foreign->foreign_table_name);
15116 	len = filename_to_tablename(ptr, name_buff, sizeof(name_buff), 1);
15117 	f_key_info.foreign_table = thd_make_lex_string(
15118 		thd, 0, name_buff, len, 1);
15119 
15120 	do {
15121 		ptr = foreign->foreign_col_names[i];
15122 		name = thd_make_lex_string(thd, name, ptr,
15123 					   strlen(ptr), 1);
15124 		f_key_info.foreign_fields.push_back(name);
15125 		ptr = foreign->referenced_col_names[i];
15126 		name = thd_make_lex_string(thd, name, ptr,
15127 					   strlen(ptr), 1);
15128 		f_key_info.referenced_fields.push_back(name);
15129 	} while (++i < foreign->n_fields);
15130 
15131 	if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE) {
15132 		f_key_info.delete_method = FK_OPTION_CASCADE;
15133 	} else if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) {
15134 		f_key_info.delete_method = FK_OPTION_SET_NULL;
15135 	} else if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) {
15136 		f_key_info.delete_method = FK_OPTION_NO_ACTION;
15137 	} else {
15138 		f_key_info.delete_method = FK_OPTION_RESTRICT;
15139 	}
15140 
15141 
15142 	if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) {
15143 		f_key_info.update_method = FK_OPTION_CASCADE;
15144 	} else if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) {
15145 		f_key_info.update_method = FK_OPTION_SET_NULL;
15146 	} else if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) {
15147 		f_key_info.update_method = FK_OPTION_NO_ACTION;
15148 	} else {
15149 		f_key_info.update_method = FK_OPTION_RESTRICT;
15150 	}
15151 
15152 	/* Load referenced table to update FK referenced key name. */
15153 	if (foreign->referenced_table == NULL) {
15154 
15155 		dict_table_t*	ref_table;
15156 
15157 		ut_ad(mutex_own(&dict_sys.mutex));
15158 		ref_table = dict_table_open_on_name(
15159 			foreign->referenced_table_name_lookup,
15160 			TRUE, FALSE, DICT_ERR_IGNORE_NONE);
15161 
15162 		if (ref_table == NULL) {
15163 
15164 			if (!thd_test_options(
15165 				thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
15166 				ib::info()
15167 					<< "Foreign Key referenced table "
15168 					<< foreign->referenced_table_name
15169 					<< " not found for foreign table "
15170 					<< foreign->foreign_table_name;
15171 			}
15172 		} else {
15173 
15174 			dict_table_close(ref_table, TRUE, FALSE);
15175 		}
15176 	}
15177 
15178 	if (foreign->referenced_index
15179 	    && foreign->referenced_index->name != NULL) {
15180 		referenced_key_name = thd_make_lex_string(
15181 			thd,
15182 			f_key_info.referenced_key_name,
15183 			foreign->referenced_index->name,
15184 			strlen(foreign->referenced_index->name),
15185 			1);
15186 	} else {
15187 		referenced_key_name = NULL;
15188 	}
15189 
15190 	f_key_info.referenced_key_name = referenced_key_name;
15191 
15192 	pf_key_info = (FOREIGN_KEY_INFO*) thd_memdup(thd, &f_key_info,
15193 						      sizeof(FOREIGN_KEY_INFO));
15194 
15195 	return(pf_key_info);
15196 }
15197 
15198 /*******************************************************************//**
15199 Gets the list of foreign keys in this table.
15200 @return always 0, that is, always succeeds */
15201 
15202 int
get_foreign_key_list(THD * thd,List<FOREIGN_KEY_INFO> * f_key_list)15203 ha_innobase::get_foreign_key_list(
15204 /*==============================*/
15205 	THD*			thd,		/*!< in: user thread handle */
15206 	List<FOREIGN_KEY_INFO>*	f_key_list)	/*!< out: foreign key list */
15207 {
15208 	update_thd(ha_thd());
15209 
15210 	m_prebuilt->trx->op_info = "getting list of foreign keys";
15211 
15212 	mutex_enter(&dict_sys.mutex);
15213 
15214 	for (dict_foreign_set::iterator it
15215 		= m_prebuilt->table->foreign_set.begin();
15216 	     it != m_prebuilt->table->foreign_set.end();
15217 	     ++it) {
15218 
15219 		FOREIGN_KEY_INFO*	pf_key_info;
15220 		dict_foreign_t*		foreign = *it;
15221 
15222 		pf_key_info = get_foreign_key_info(thd, foreign);
15223 
15224 		if (pf_key_info != NULL) {
15225 			f_key_list->push_back(pf_key_info);
15226 		}
15227 	}
15228 
15229 	mutex_exit(&dict_sys.mutex);
15230 
15231 	m_prebuilt->trx->op_info = "";
15232 
15233 	return(0);
15234 }
15235 
15236 /*******************************************************************//**
15237 Gets the set of foreign keys where this table is the referenced table.
15238 @return always 0, that is, always succeeds */
15239 
15240 int
get_parent_foreign_key_list(THD * thd,List<FOREIGN_KEY_INFO> * f_key_list)15241 ha_innobase::get_parent_foreign_key_list(
15242 /*=====================================*/
15243 	THD*			thd,		/*!< in: user thread handle */
15244 	List<FOREIGN_KEY_INFO>*	f_key_list)	/*!< out: foreign key list */
15245 {
15246 	update_thd(ha_thd());
15247 
15248 	m_prebuilt->trx->op_info = "getting list of referencing foreign keys";
15249 
15250 	mutex_enter(&dict_sys.mutex);
15251 
15252 	for (dict_foreign_set::iterator it
15253 		= m_prebuilt->table->referenced_set.begin();
15254 	     it != m_prebuilt->table->referenced_set.end();
15255 	     ++it) {
15256 
15257 		FOREIGN_KEY_INFO*	pf_key_info;
15258 		dict_foreign_t*		foreign = *it;
15259 
15260 		pf_key_info = get_foreign_key_info(thd, foreign);
15261 
15262 		if (pf_key_info != NULL) {
15263 			f_key_list->push_back(pf_key_info);
15264 		}
15265 	}
15266 
15267 	mutex_exit(&dict_sys.mutex);
15268 
15269 	m_prebuilt->trx->op_info = "";
15270 
15271 	return(0);
15272 }
15273 
15274 /** Table list item structure is used to store only the table
15275 and name. It is used by get_cascade_foreign_key_table_list to store
15276 the intermediate result for fetching the table set. */
15277 struct table_list_item {
15278 	/** InnoDB table object */
15279 	const dict_table_t*	table;
15280 	/** Table name */
15281 	const char*		name;
15282 };
15283 
15284 /*****************************************************************//**
15285 Checks if ALTER TABLE may change the storage engine of the table.
15286 Changing storage engines is not allowed for tables for which there
15287 are foreign key constraints (parent or child tables).
15288 @return TRUE if can switch engines */
15289 
15290 bool
can_switch_engines(void)15291 ha_innobase::can_switch_engines(void)
15292 /*=================================*/
15293 {
15294 	DBUG_ENTER("ha_innobase::can_switch_engines");
15295 
15296 	update_thd();
15297 
15298 	m_prebuilt->trx->op_info =
15299 			"determining if there are foreign key constraints";
15300 
15301 	row_mysql_freeze_data_dictionary(m_prebuilt->trx);
15302 
15303 	bool	can_switch = m_prebuilt->table->referenced_set.empty()
15304 		&& m_prebuilt->table->foreign_set.empty();
15305 
15306 	row_mysql_unfreeze_data_dictionary(m_prebuilt->trx);
15307 	m_prebuilt->trx->op_info = "";
15308 
15309 	DBUG_RETURN(can_switch);
15310 }
15311 
15312 /*******************************************************************//**
15313 Checks if a table is referenced by a foreign key. The MySQL manual states that
15314 a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
15315 delete is then allowed internally to resolve a duplicate key conflict in
15316 REPLACE, not an update.
15317 @return > 0 if referenced by a FOREIGN KEY */
15318 
15319 uint
referenced_by_foreign_key(void)15320 ha_innobase::referenced_by_foreign_key(void)
15321 /*========================================*/
15322 {
15323 	if (dict_table_is_referenced_by_foreign_key(m_prebuilt->table)) {
15324 
15325 		return(1);
15326 	}
15327 
15328 	return(0);
15329 }
15330 
15331 /*******************************************************************//**
15332 Frees the foreign key create info for a table stored in InnoDB, if it is
15333 non-NULL. */
15334 
15335 void
free_foreign_key_create_info(char * str)15336 ha_innobase::free_foreign_key_create_info(
15337 /*======================================*/
15338 	char*	str)	/*!< in, own: create info string to free */
15339 {
15340 	if (str != NULL) {
15341 		my_free(str);
15342 	}
15343 }
15344 
15345 /*******************************************************************//**
15346 Tells something additional to the handler about how to do things.
15347 @return 0 or error number */
15348 
15349 int
extra(enum ha_extra_function operation)15350 ha_innobase::extra(
15351 /*===============*/
15352 	enum ha_extra_function operation)
15353 			   /*!< in: HA_EXTRA_FLUSH or some other flag */
15354 {
15355 	check_trx_exists(ha_thd());
15356 
15357 	/* Warning: since it is not sure that MySQL calls external_lock
15358 	before calling this function, the trx field in m_prebuilt can be
15359 	obsolete! */
15360 
15361 	switch (operation) {
15362 	case HA_EXTRA_FLUSH:
15363 		if (m_prebuilt->blob_heap) {
15364 			row_mysql_prebuilt_free_blob_heap(m_prebuilt);
15365 		}
15366 		break;
15367 	case HA_EXTRA_RESET_STATE:
15368 		reset_template();
15369 		thd_to_trx(ha_thd())->duplicates = 0;
15370 		break;
15371 	case HA_EXTRA_NO_KEYREAD:
15372 		m_prebuilt->read_just_key = 0;
15373 		break;
15374 	case HA_EXTRA_KEYREAD:
15375 		m_prebuilt->read_just_key = 1;
15376 		break;
15377 	case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
15378 		m_prebuilt->keep_other_fields_on_keyread = 1;
15379 		break;
15380 
15381 		/* IMPORTANT: m_prebuilt->trx can be obsolete in
15382 		this method, because it is not sure that MySQL
15383 		calls external_lock before this method with the
15384 		parameters below.  We must not invoke update_thd()
15385 		either, because the calling threads may change.
15386 		CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */
15387 	case HA_EXTRA_INSERT_WITH_UPDATE:
15388 		thd_to_trx(ha_thd())->duplicates |= TRX_DUP_IGNORE;
15389 		break;
15390 	case HA_EXTRA_NO_IGNORE_DUP_KEY:
15391 		thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_IGNORE;
15392 		break;
15393 	case HA_EXTRA_WRITE_CAN_REPLACE:
15394 		thd_to_trx(ha_thd())->duplicates |= TRX_DUP_REPLACE;
15395 		break;
15396 	case HA_EXTRA_WRITE_CANNOT_REPLACE:
15397 		thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE;
15398 		break;
15399 	case HA_EXTRA_BEGIN_ALTER_COPY:
15400 		m_prebuilt->table->skip_alter_undo = 1;
15401 		if (m_prebuilt->table->is_temporary()
15402 		    || !m_prebuilt->table->versioned_by_id()) {
15403 			break;
15404 		}
15405 		trx_start_if_not_started(m_prebuilt->trx, true);
15406 		m_prebuilt->trx->mod_tables.insert(
15407 			trx_mod_tables_t::value_type(
15408 				const_cast<dict_table_t*>(m_prebuilt->table),
15409 				0))
15410 			.first->second.set_versioned(0);
15411 		break;
15412 	case HA_EXTRA_END_ALTER_COPY:
15413 		m_prebuilt->table->skip_alter_undo = 0;
15414 		break;
15415 	default:/* Do nothing */
15416 		;
15417 	}
15418 
15419 	return(0);
15420 }
15421 
15422 /**
15423 MySQL calls this method at the end of each statement */
15424 int
reset()15425 ha_innobase::reset()
15426 {
15427 	if (m_prebuilt->blob_heap) {
15428 		row_mysql_prebuilt_free_blob_heap(m_prebuilt);
15429 	}
15430 
15431 	reset_template();
15432 
15433 	m_ds_mrr.dsmrr_close();
15434 
15435 	/* TODO: This should really be reset in reset_template() but for now
15436 	it's safer to do it explicitly here. */
15437 
15438 	/* This is a statement level counter. */
15439 	m_prebuilt->autoinc_last_value = 0;
15440 
15441 	return(0);
15442 }
15443 
15444 /******************************************************************//**
15445 MySQL calls this function at the start of each SQL statement inside LOCK
15446 TABLES. Inside LOCK TABLES the ::external_lock method does not work to
15447 mark SQL statement borders. Note also a special case: if a temporary table
15448 is created inside LOCK TABLES, MySQL has not called external_lock() at all
15449 on that table.
15450 MySQL-5.0 also calls this before each statement in an execution of a stored
15451 procedure. To make the execution more deterministic for binlogging, MySQL-5.0
15452 locks all tables involved in a stored procedure with full explicit table
15453 locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
15454 procedure.
15455 @return 0 or error code */
15456 
15457 int
start_stmt(THD * thd,thr_lock_type lock_type)15458 ha_innobase::start_stmt(
15459 /*====================*/
15460 	THD*		thd,	/*!< in: handle to the user thread */
15461 	thr_lock_type	lock_type)
15462 {
15463 	trx_t*		trx = m_prebuilt->trx;
15464 
15465 	DBUG_ENTER("ha_innobase::start_stmt");
15466 
15467 	update_thd(thd);
15468 
15469 	ut_ad(m_prebuilt->table != NULL);
15470 
15471 	trx = m_prebuilt->trx;
15472 
15473 	innobase_srv_conc_force_exit_innodb(trx);
15474 
15475 	/* Reset the AUTOINC statement level counter for multi-row INSERTs. */
15476 	trx->n_autoinc_rows = 0;
15477 
15478 	m_prebuilt->sql_stat_start = TRUE;
15479 	m_prebuilt->hint_need_to_fetch_extra_cols = 0;
15480 	reset_template();
15481 
15482 	if (m_prebuilt->table->is_temporary()
15483 	    && m_mysql_has_locked
15484 	    && m_prebuilt->select_lock_type == LOCK_NONE) {
15485 		dberr_t error;
15486 
15487 		switch (thd_sql_command(thd)) {
15488 		case SQLCOM_INSERT:
15489 		case SQLCOM_UPDATE:
15490 		case SQLCOM_DELETE:
15491 		case SQLCOM_REPLACE:
15492 			init_table_handle_for_HANDLER();
15493 			m_prebuilt->select_lock_type = LOCK_X;
15494 			m_prebuilt->stored_select_lock_type = LOCK_X;
15495 			error = row_lock_table(m_prebuilt);
15496 
15497 			if (error != DB_SUCCESS) {
15498 				int	st = convert_error_code_to_mysql(
15499 					error, 0, thd);
15500 				DBUG_RETURN(st);
15501 			}
15502 			break;
15503 		}
15504 	}
15505 
15506 	if (!m_mysql_has_locked) {
15507 		/* This handle is for a temporary table created inside
15508 		this same LOCK TABLES; since MySQL does NOT call external_lock
15509 		in this case, we must use x-row locks inside InnoDB to be
15510 		prepared for an update of a row */
15511 
15512 		m_prebuilt->select_lock_type = LOCK_X;
15513 
15514 	} else if (trx->isolation_level != TRX_ISO_SERIALIZABLE
15515 		   && thd_sql_command(thd) == SQLCOM_SELECT
15516 		   && lock_type == TL_READ) {
15517 
15518 		/* For other than temporary tables, we obtain
15519 		no lock for consistent read (plain SELECT). */
15520 
15521 		m_prebuilt->select_lock_type = LOCK_NONE;
15522 	} else {
15523 		/* Not a consistent read: restore the
15524 		select_lock_type value. The value of
15525 		stored_select_lock_type was decided in:
15526 		1) ::store_lock(),
15527 		2) ::external_lock(),
15528 		3) ::init_table_handle_for_HANDLER(). */
15529 
15530 		ut_a(m_prebuilt->stored_select_lock_type != LOCK_NONE_UNSET);
15531 
15532 		m_prebuilt->select_lock_type =
15533 			m_prebuilt->stored_select_lock_type;
15534 	}
15535 
15536 	*trx->detailed_error = 0;
15537 
15538 	innobase_register_trx(ht, thd, trx);
15539 
15540 	if (!trx_is_started(trx)) {
15541 		trx->will_lock = true;
15542 	}
15543 
15544 	DBUG_RETURN(0);
15545 }
15546 
15547 /******************************************************************//**
15548 Maps a MySQL trx isolation level code to the InnoDB isolation level code
15549 @return InnoDB isolation level */
15550 static inline
15551 ulint
innobase_map_isolation_level(enum_tx_isolation iso)15552 innobase_map_isolation_level(
15553 /*=========================*/
15554 	enum_tx_isolation	iso)	/*!< in: MySQL isolation level code */
15555 {
15556 	if (UNIV_UNLIKELY(srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN)
15557 	    || UNIV_UNLIKELY(srv_read_only_mode)) {
15558 		return TRX_ISO_READ_UNCOMMITTED;
15559 	}
15560 	switch (iso) {
15561 	case ISO_REPEATABLE_READ:	return(TRX_ISO_REPEATABLE_READ);
15562 	case ISO_READ_COMMITTED:	return(TRX_ISO_READ_COMMITTED);
15563 	case ISO_SERIALIZABLE:		return(TRX_ISO_SERIALIZABLE);
15564 	case ISO_READ_UNCOMMITTED:	return(TRX_ISO_READ_UNCOMMITTED);
15565 	}
15566 
15567 	ut_error;
15568 
15569 	return(0);
15570 }
15571 
15572 /******************************************************************//**
15573 As MySQL will execute an external lock for every new table it uses when it
15574 starts to process an SQL statement (an exception is when MySQL calls
15575 start_stmt for the handle) we can use this function to store the pointer to
15576 the THD in the handle. We will also use this function to communicate
15577 to InnoDB that a new SQL statement has started and that we must store a
15578 savepoint to our transaction handle, so that we are able to roll back
15579 the SQL statement in case of an error.
15580 @return 0 */
15581 
15582 int
external_lock(THD * thd,int lock_type)15583 ha_innobase::external_lock(
15584 /*=======================*/
15585 	THD*	thd,		/*!< in: handle to the user thread */
15586 	int	lock_type)	/*!< in: lock type */
15587 {
15588 	DBUG_ENTER("ha_innobase::external_lock");
15589 	DBUG_PRINT("enter",("lock_type: %d", lock_type));
15590 
15591 	update_thd(thd);
15592 	trx_t* trx = m_prebuilt->trx;
15593 	ut_ad(m_prebuilt->table);
15594 
15595 	/* Statement based binlogging does not work in isolation level
15596 	READ UNCOMMITTED and READ COMMITTED since the necessary
15597 	locks cannot be taken. In this case, we print an
15598 	informative error message and return with an error.
15599 	Note: decide_logging_format would give the same error message,
15600 	except it cannot give the extra details. */
15601 
15602 	if (lock_type == F_WRLCK
15603 	    && !(table_flags() & HA_BINLOG_STMT_CAPABLE)
15604 	    && thd_binlog_format(thd) == BINLOG_FORMAT_STMT
15605 	    && thd_binlog_filter_ok(thd)
15606 	    && thd_sqlcom_can_generate_row_events(thd)) {
15607 		bool skip = false;
15608 #ifdef WITH_WSREP
15609 		skip = trx->is_wsrep() && !wsrep_thd_is_local(thd);
15610 #endif /* WITH_WSREP */
15611 		/* used by test case */
15612 		DBUG_EXECUTE_IF("no_innodb_binlog_errors", skip = true;);
15613 
15614 		if (!skip) {
15615 			my_error(ER_BINLOG_STMT_MODE_AND_ROW_ENGINE, MYF(0),
15616 			         " InnoDB is limited to row-logging when"
15617 			         " transaction isolation level is"
15618 			         " READ COMMITTED or READ UNCOMMITTED.");
15619 
15620 			DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE);
15621 		}
15622 	}
15623 
15624 	/* Check for UPDATEs in read-only mode. */
15625 	if (srv_read_only_mode) {
15626 		switch (thd_sql_command(thd)) {
15627 		case SQLCOM_CREATE_TABLE:
15628 			if (lock_type != F_WRLCK) {
15629 				break;
15630 			}
15631 			/* fall through */
15632 		case SQLCOM_UPDATE:
15633 		case SQLCOM_INSERT:
15634 		case SQLCOM_REPLACE:
15635 		case SQLCOM_DROP_TABLE:
15636 		case SQLCOM_ALTER_TABLE:
15637 		case SQLCOM_OPTIMIZE:
15638 		case SQLCOM_CREATE_INDEX:
15639 		case SQLCOM_DROP_INDEX:
15640 		case SQLCOM_CREATE_SEQUENCE:
15641 		case SQLCOM_DROP_SEQUENCE:
15642 		case SQLCOM_DELETE:
15643 			ib_senderrf(thd, IB_LOG_LEVEL_WARN,
15644 				    ER_READ_ONLY_MODE);
15645 			DBUG_RETURN(HA_ERR_TABLE_READONLY);
15646 		}
15647 	}
15648 
15649 	m_prebuilt->sql_stat_start = TRUE;
15650 	m_prebuilt->hint_need_to_fetch_extra_cols = 0;
15651 
15652 	reset_template();
15653 
15654 	switch (m_prebuilt->table->quiesce) {
15655 	case QUIESCE_START:
15656 		/* Check for FLUSH TABLE t WITH READ LOCK; */
15657 		if (!srv_read_only_mode
15658 		    && thd_sql_command(thd) == SQLCOM_FLUSH
15659 		    && lock_type == F_RDLCK) {
15660 
15661 			if (!m_prebuilt->table->space) {
15662 				ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
15663 					    ER_TABLESPACE_DISCARDED,
15664 					    table->s->table_name.str);
15665 
15666 				DBUG_RETURN(HA_ERR_TABLESPACE_MISSING);
15667 			}
15668 
15669 			row_quiesce_table_start(m_prebuilt->table, trx);
15670 
15671 			/* Use the transaction instance to track UNLOCK
15672 			TABLES. It can be done via START TRANSACTION; too
15673 			implicitly. */
15674 
15675 			++trx->flush_tables;
15676 		}
15677 		break;
15678 
15679 	case QUIESCE_COMPLETE:
15680 		/* Check for UNLOCK TABLES; implicit or explicit
15681 		or trx interruption. */
15682 		if (trx->flush_tables > 0
15683 		    && (lock_type == F_UNLCK || trx_is_interrupted(trx))) {
15684 
15685 			row_quiesce_table_complete(m_prebuilt->table, trx);
15686 
15687 			ut_a(trx->flush_tables > 0);
15688 			--trx->flush_tables;
15689 		}
15690 
15691 		break;
15692 
15693 	case QUIESCE_NONE:
15694 		break;
15695 	}
15696 
15697 	if (lock_type == F_WRLCK) {
15698 
15699 		/* If this is a SELECT, then it is in UPDATE TABLE ...
15700 		or SELECT ... FOR UPDATE */
15701 		m_prebuilt->select_lock_type = LOCK_X;
15702 		m_prebuilt->stored_select_lock_type = LOCK_X;
15703 	}
15704 
15705 	if (lock_type != F_UNLCK) {
15706 		/* MySQL is setting a new table lock */
15707 
15708 		*trx->detailed_error = 0;
15709 
15710 		innobase_register_trx(ht, thd, trx);
15711 
15712 		if (trx->isolation_level == TRX_ISO_SERIALIZABLE
15713 		    && m_prebuilt->select_lock_type == LOCK_NONE
15714 		    && thd_test_options(
15715 			    thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
15716 
15717 			/* To get serializable execution, we let InnoDB
15718 			conceptually add 'LOCK IN SHARE MODE' to all SELECTs
15719 			which otherwise would have been consistent reads. An
15720 			exception is consistent reads in the AUTOCOMMIT=1 mode:
15721 			we know that they are read-only transactions, and they
15722 			can be serialized also if performed as consistent
15723 			reads. */
15724 
15725 			m_prebuilt->select_lock_type = LOCK_S;
15726 			m_prebuilt->stored_select_lock_type = LOCK_S;
15727 		}
15728 
15729 		/* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
15730 		TABLES if AUTOCOMMIT=1. It does not make much sense to acquire
15731 		an InnoDB table lock if it is released immediately at the end
15732 		of LOCK TABLES, and InnoDB's table locks in that case cause
15733 		VERY easily deadlocks.
15734 
15735 		We do not set InnoDB table locks if user has not explicitly
15736 		requested a table lock. Note that thd_in_lock_tables(thd)
15737 		can hold in some cases, e.g., at the start of a stored
15738 		procedure call (SQLCOM_CALL). */
15739 
15740 		if (m_prebuilt->select_lock_type != LOCK_NONE) {
15741 
15742 			if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES
15743 			    && THDVAR(thd, table_locks)
15744 			    && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)
15745 			    && thd_in_lock_tables(thd)) {
15746 
15747 				dberr_t	error = row_lock_table(m_prebuilt);
15748 
15749 				if (error != DB_SUCCESS) {
15750 
15751 					DBUG_RETURN(
15752 						convert_error_code_to_mysql(
15753 							error, 0, thd));
15754 				}
15755 			}
15756 
15757 			trx->mysql_n_tables_locked++;
15758 		}
15759 
15760 		trx->n_mysql_tables_in_use++;
15761 		m_mysql_has_locked = true;
15762 
15763 		if (!trx_is_started(trx)
15764 		    && (m_prebuilt->select_lock_type != LOCK_NONE
15765 			|| m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
15766 
15767 			trx->will_lock = true;
15768 		}
15769 
15770 		DBUG_RETURN(0);
15771 	} else {
15772 		DEBUG_SYNC_C("ha_innobase_end_statement");
15773 	}
15774 
15775 	/* MySQL is releasing a table lock */
15776 
15777 	trx->n_mysql_tables_in_use--;
15778 	m_mysql_has_locked = false;
15779 
15780 	innobase_srv_conc_force_exit_innodb(trx);
15781 
15782 	/* If the MySQL lock count drops to zero we know that the current SQL
15783 	statement has ended */
15784 
15785 	if (trx->n_mysql_tables_in_use == 0) {
15786 
15787 		trx->mysql_n_tables_locked = 0;
15788 		m_prebuilt->used_in_HANDLER = FALSE;
15789 
15790 		if (!thd_test_options(
15791 				thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
15792 
15793 			if (trx_is_started(trx)) {
15794 
15795 				innobase_commit(ht, thd, TRUE);
15796 			}
15797 
15798 		} else if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
15799 			trx->read_view.close();
15800 		}
15801 	}
15802 
15803 	if (!trx_is_started(trx)
15804 	    && lock_type != F_UNLCK
15805 	    && (m_prebuilt->select_lock_type != LOCK_NONE
15806 		|| m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
15807 
15808 		trx->will_lock = true;
15809 	}
15810 
15811 	DBUG_RETURN(0);
15812 }
15813 
15814 /************************************************************************//**
15815 Here we export InnoDB status variables to MySQL. */
15816 static
15817 void
innodb_export_status()15818 innodb_export_status()
15819 /*==================*/
15820 {
15821 	if (srv_was_started) {
15822 		srv_export_innodb_status();
15823 	}
15824 }
15825 
15826 /************************************************************************//**
15827 Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the
15828 InnoDB Monitor to the client.
15829 @return 0 on success */
15830 static
15831 int
innodb_show_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)15832 innodb_show_status(
15833 /*===============*/
15834 	handlerton*	hton,	/*!< in: the innodb handlerton */
15835 	THD*		thd,	/*!< in: the MySQL query thread of the caller */
15836 	stat_print_fn*	stat_print)
15837 {
15838 	static const char	truncated_msg[] = "... truncated...\n";
15839 	const long		MAX_STATUS_SIZE = 1048576;
15840 	ulint			trx_list_start = ULINT_UNDEFINED;
15841 	ulint			trx_list_end = ULINT_UNDEFINED;
15842 	bool			ret_val;
15843 
15844 	DBUG_ENTER("innodb_show_status");
15845 	DBUG_ASSERT(hton == innodb_hton_ptr);
15846 
15847 	/* We don't create the temp files or associated
15848 	mutexes in read-only-mode */
15849 
15850 	if (srv_read_only_mode) {
15851 		DBUG_RETURN(0);
15852 	}
15853 
15854 	srv_wake_purge_thread_if_not_active();
15855 
15856 	trx_t*	trx = check_trx_exists(thd);
15857 
15858 	innobase_srv_conc_force_exit_innodb(trx);
15859 
15860 	/* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE
15861 	bytes of text. */
15862 
15863 	char*	str;
15864 	size_t	flen;
15865 
15866 	mutex_enter(&srv_monitor_file_mutex);
15867 	rewind(srv_monitor_file);
15868 
15869 	srv_printf_innodb_monitor(srv_monitor_file, FALSE,
15870 				  &trx_list_start, &trx_list_end);
15871 
15872 	os_file_set_eof(srv_monitor_file);
15873 
15874 	flen = size_t(ftell(srv_monitor_file));
15875 	if (ssize_t(flen) < 0) {
15876 		flen = 0;
15877 	}
15878 
15879 	size_t	usable_len;
15880 
15881 	if (flen > MAX_STATUS_SIZE) {
15882 		usable_len = MAX_STATUS_SIZE;
15883 		srv_truncated_status_writes++;
15884 	} else {
15885 		usable_len = flen;
15886 	}
15887 
15888 	/* allocate buffer for the string, and
15889 	read the contents of the temporary file */
15890 
15891 	if (!(str = (char*) my_malloc(//PSI_INSTRUMENT_ME,
15892 		      usable_len + 1, MYF(0)))) {
15893 		mutex_exit(&srv_monitor_file_mutex);
15894 		DBUG_RETURN(1);
15895 	}
15896 
15897 	rewind(srv_monitor_file);
15898 
15899 	if (flen < MAX_STATUS_SIZE) {
15900 		/* Display the entire output. */
15901 		flen = fread(str, 1, flen, srv_monitor_file);
15902 	} else if (trx_list_end < flen
15903 		   && trx_list_start < trx_list_end
15904 		   && trx_list_start + flen - trx_list_end
15905 		   < MAX_STATUS_SIZE - sizeof truncated_msg - 1) {
15906 
15907 		/* Omit the beginning of the list of active transactions. */
15908 		size_t	len = fread(str, 1, trx_list_start, srv_monitor_file);
15909 
15910 		memcpy(str + len, truncated_msg, sizeof truncated_msg - 1);
15911 		len += sizeof truncated_msg - 1;
15912 		usable_len = (MAX_STATUS_SIZE - 1) - len;
15913 		fseek(srv_monitor_file, long(flen - usable_len), SEEK_SET);
15914 		len += fread(str + len, 1, usable_len, srv_monitor_file);
15915 		flen = len;
15916 	} else {
15917 		/* Omit the end of the output. */
15918 		flen = fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file);
15919 	}
15920 
15921 	mutex_exit(&srv_monitor_file_mutex);
15922 
15923 	ret_val= stat_print(
15924 		thd, innobase_hton_name,
15925 		static_cast<uint>(strlen(innobase_hton_name)),
15926 		STRING_WITH_LEN(""), str, static_cast<uint>(flen));
15927 
15928 	my_free(str);
15929 
15930 	DBUG_RETURN(ret_val);
15931 }
15932 
15933 /** Callback for collecting mutex statistics */
15934 struct ShowStatus {
15935 
15936 	/** For tracking the mutex metrics */
15937 	struct Value {
15938 
15939 		/** Constructor
15940 		@param[in]	name		Name of the mutex
15941 		@param[in]	spins		Number of spins
15942 		@param[in]	os_waits	OS waits so far
15943 		@param[in]	calls		Number of calls to enter() */
ValueShowStatus::Value15944 		Value(const char*	name,
15945 		      ulint		spins,
15946 		      uint64_t		waits,
15947 		      uint64_t		calls)
15948 			:
15949 			m_name(name),
15950 			m_spins(spins),
15951 			m_waits(waits),
15952 			m_calls(calls)
15953 		{
15954 			/* No op */
15955 		}
15956 
15957 		/** Mutex name */
15958 		std::string		m_name;
15959 
15960 		/** Spins so far */
15961 		ulint			m_spins;
15962 
15963 		/** Waits so far */
15964 		uint64_t		m_waits;
15965 
15966 		/** Number of calls so far */
15967 		uint64_t		m_calls;
15968 	};
15969 
15970 	/** Order by m_waits, in descending order. */
15971 	struct OrderByWaits: public std::binary_function<Value, Value, bool>
15972 	{
15973 		/** @return true if rhs < lhs */
operator ()ShowStatus::OrderByWaits15974 		bool operator()(
15975 			const Value& lhs,
15976 			const Value& rhs) const
15977 			UNIV_NOTHROW
15978 		{
15979 			return(rhs.m_waits < lhs.m_waits);
15980 		}
15981 	};
15982 
15983 	typedef std::vector<Value, ut_allocator<Value> > Values;
15984 
15985 	/** Collect the individual latch counts */
15986 	struct GetCount {
15987 		typedef latch_meta_t::CounterType::Count Count;
15988 
15989 		/** Constructor
15990 		@param[in]	name		Latch name
15991 		@param[in,out]	values		Put the values here */
GetCountShowStatus::GetCount15992 		GetCount(
15993 			const char*	name,
15994 			Values*		values)
15995 			UNIV_NOTHROW
15996 			:
15997 			m_name(name),
15998 			m_values(values)
15999 		{
16000 			/* No op */
16001 		}
16002 
16003 		/** Collect the latch metrics. Ignore entries where the
16004 		spins and waits are zero.
16005 		@param[in]	count		The latch metrics */
operator ()ShowStatus::GetCount16006 		void operator()(Count* count) const UNIV_NOTHROW
16007 		{
16008 			if (count->m_spins > 0 || count->m_waits > 0) {
16009 
16010 				m_values->push_back(Value(
16011 					m_name,
16012 					count->m_spins,
16013 					count->m_waits,
16014 					count->m_calls));
16015 			}
16016 		}
16017 
16018 		/** The latch name */
16019 		const char*	m_name;
16020 
16021 		/** For collecting the active mutex stats. */
16022 		Values*		m_values;
16023 	};
16024 
16025 	/** Constructor */
ShowStatusShowStatus16026 	ShowStatus() { }
16027 
16028 	/** Callback for collecting the stats
16029 	@param[in]	latch_meta		Latch meta data
16030 	@return always returns true */
operator ()ShowStatus16031 	bool operator()(latch_meta_t& latch_meta)
16032 		UNIV_NOTHROW
16033 	{
16034 		latch_meta.get_counter()->iterate(
16035 			GetCount(latch_meta.get_name(), &m_values));
16036 
16037 		return(true);
16038 	}
16039 
16040 	/** Implements the SHOW MUTEX STATUS command, for mutexes.
16041 	The table structure is like so: Engine | Mutex Name | Status
16042 	We store the metrics  in the "Status" column as:
16043 
16044 		spins=N,waits=N,calls=N"
16045 
16046 	The user has to parse the dataunfortunately
16047 	@param[in,out]	thd		the MySQL query thread of the caller
16048 	@param[in,out]	stat_print	function for printing statistics
16049 	@return true on success. */
16050 	bool to_string(
16051 		THD*		thd,
16052 		stat_print_fn*	stat_print)
16053 		UNIV_NOTHROW;
16054 
16055 	/** For collecting the active mutex stats. */
16056 	Values		m_values;
16057 };
16058 
16059 /** Implements the SHOW MUTEX STATUS command, for mutexes.
16060 The table structure is like so: Engine | Mutex Name | Status
16061 We store the metrics  in the "Status" column as:
16062 
16063 	spins=N,waits=N,calls=N"
16064 
16065 The user has to parse the dataunfortunately
16066 @param[in,out]	thd		the MySQL query thread of the caller
16067 @param[in,out]	stat_print	function for printing statistics
16068 @return true on success. */
16069 bool
to_string(THD * thd,stat_print_fn * stat_print)16070 ShowStatus::to_string(
16071 	THD*		thd,
16072 	stat_print_fn*	stat_print)
16073 	UNIV_NOTHROW
16074 {
16075 	uint		hton_name_len = (uint) strlen(innobase_hton_name);
16076 
16077 	std::sort(m_values.begin(), m_values.end(), OrderByWaits());
16078 
16079 	Values::iterator	end = m_values.end();
16080 
16081 	for (Values::iterator it = m_values.begin(); it != end; ++it) {
16082 
16083 		int	name_len;
16084 		char	name_buf[IO_SIZE];
16085 
16086 		name_len = snprintf(
16087 			name_buf, sizeof(name_buf), "%s", it->m_name.c_str());
16088 
16089 		int	status_len;
16090 		char	status_buf[IO_SIZE];
16091 
16092 		status_len = snprintf(
16093 			status_buf, sizeof(status_buf),
16094 			"spins=%lu,waits=%lu,calls=%llu",
16095 			static_cast<ulong>(it->m_spins),
16096 			static_cast<long>(it->m_waits),
16097 			(ulonglong) it->m_calls);
16098 
16099 		if (stat_print(thd, innobase_hton_name,
16100 			       hton_name_len,
16101 			       name_buf, static_cast<uint>(name_len),
16102 			       status_buf, static_cast<uint>(status_len))) {
16103 
16104 			return(false);
16105 		}
16106 	}
16107 
16108 	return(true);
16109 }
16110 
16111 /** Implements the SHOW MUTEX STATUS command, for mutexes.
16112 @param[in,out]	hton		the innodb handlerton
16113 @param[in,out]	thd		the MySQL query thread of the caller
16114 @param[in,out]	stat_print	function for printing statistics
16115 @return 0 on success. */
16116 static
16117 int
innodb_show_mutex_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)16118 innodb_show_mutex_status(
16119 	handlerton*
16120 #ifdef DBUG_ASSERT_EXISTS
16121 	hton
16122 #endif
16123 	,
16124 	THD*		thd,
16125 	stat_print_fn*	stat_print)
16126 {
16127 	DBUG_ENTER("innodb_show_mutex_status");
16128 
16129 	ShowStatus	collector;
16130 
16131 	DBUG_ASSERT(hton == innodb_hton_ptr);
16132 
16133 	mutex_monitor.iterate(collector);
16134 
16135 	if (!collector.to_string(thd, stat_print)) {
16136 		DBUG_RETURN(1);
16137 	}
16138 
16139 	DBUG_RETURN(0);
16140 }
16141 
16142 /** Implements the SHOW MUTEX STATUS command.
16143 @param[in,out]	hton		the innodb handlerton
16144 @param[in,out]	thd		the MySQL query thread of the caller
16145 @param[in,out]	stat_print	function for printing statistics
16146 @return 0 on success. */
16147 static
16148 int
innodb_show_rwlock_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)16149 innodb_show_rwlock_status(
16150 	handlerton*
16151 #ifdef DBUG_ASSERT_EXISTS
16152 	hton
16153 #endif
16154 	,
16155 	THD*		thd,
16156 	stat_print_fn*	stat_print)
16157 {
16158 	DBUG_ENTER("innodb_show_rwlock_status");
16159 
16160 	rw_lock_t*	block_rwlock = NULL;
16161 	ulint		block_rwlock_oswait_count = 0;
16162 	uint		hton_name_len = (uint) strlen(innobase_hton_name);
16163 
16164 	DBUG_ASSERT(hton == innodb_hton_ptr);
16165 
16166 	mutex_enter(&rw_lock_list_mutex);
16167 
16168 	for (rw_lock_t* rw_lock = UT_LIST_GET_FIRST(rw_lock_list);
16169 	     rw_lock != NULL;
16170 	     rw_lock = UT_LIST_GET_NEXT(list, rw_lock)) {
16171 
16172 		if (rw_lock->count_os_wait == 0) {
16173 			continue;
16174 		}
16175 
16176 		int		buf1len;
16177 		char		buf1[IO_SIZE];
16178 
16179 		if (rw_lock->is_block_lock) {
16180 
16181 			block_rwlock = rw_lock;
16182 			block_rwlock_oswait_count += rw_lock->count_os_wait;
16183 
16184 			continue;
16185 		}
16186 
16187 		buf1len = snprintf(
16188 			buf1, sizeof buf1, "rwlock: %s:%u",
16189 			innobase_basename(rw_lock->cfile_name),
16190 			rw_lock->cline);
16191 
16192 		int		buf2len;
16193 		char		buf2[IO_SIZE];
16194 
16195 		buf2len = snprintf(
16196 			buf2, sizeof buf2, "waits=%u",
16197 			rw_lock->count_os_wait);
16198 
16199 		if (stat_print(thd, innobase_hton_name,
16200 			       hton_name_len,
16201 			       buf1, static_cast<uint>(buf1len),
16202 			       buf2, static_cast<uint>(buf2len))) {
16203 
16204 			mutex_exit(&rw_lock_list_mutex);
16205 
16206 			DBUG_RETURN(1);
16207 		}
16208 	}
16209 
16210 	if (block_rwlock != NULL) {
16211 
16212 		int		buf1len;
16213 		char		buf1[IO_SIZE];
16214 
16215 		buf1len = snprintf(
16216 			buf1, sizeof buf1, "sum rwlock: %s:%u",
16217 			innobase_basename(block_rwlock->cfile_name),
16218 			block_rwlock->cline);
16219 
16220 		int		buf2len;
16221 		char		buf2[IO_SIZE];
16222 
16223 		buf2len = snprintf(
16224 			buf2, sizeof buf2, "waits=" ULINTPF,
16225 			block_rwlock_oswait_count);
16226 
16227 		if (stat_print(thd, innobase_hton_name,
16228 			       hton_name_len,
16229 			       buf1, static_cast<uint>(buf1len),
16230 			       buf2, static_cast<uint>(buf2len))) {
16231 
16232 			mutex_exit(&rw_lock_list_mutex);
16233 
16234 			DBUG_RETURN(1);
16235 		}
16236 	}
16237 
16238 	mutex_exit(&rw_lock_list_mutex);
16239 
16240 	DBUG_RETURN(0);
16241 }
16242 
16243 /** Implements the SHOW MUTEX STATUS command.
16244 @param[in,out]	hton		the innodb handlerton
16245 @param[in,out]	thd		the MySQL query thread of the caller
16246 @param[in,out]	stat_print	function for printing statistics
16247 @return 0 on success. */
16248 static
16249 int
innodb_show_latch_status(handlerton * hton,THD * thd,stat_print_fn * stat_print)16250 innodb_show_latch_status(
16251 	handlerton*	hton,
16252 	THD*		thd,
16253 	stat_print_fn*	stat_print)
16254 {
16255 	int	ret = innodb_show_mutex_status(hton, thd, stat_print);
16256 
16257 	if (ret != 0) {
16258 		return(ret);
16259 	}
16260 
16261 	return(innodb_show_rwlock_status(hton, thd, stat_print));
16262 }
16263 
16264 /************************************************************************//**
16265 Return 0 on success and non-zero on failure. Note: the bool return type
16266 seems to be abused here, should be an int. */
16267 static
16268 bool
innobase_show_status(handlerton * hton,THD * thd,stat_print_fn * stat_print,enum ha_stat_type stat_type)16269 innobase_show_status(
16270 /*=================*/
16271 	handlerton*		hton,	/*!< in: the innodb handlerton */
16272 	THD*			thd,	/*!< in: the MySQL query thread
16273 					of the caller */
16274 	stat_print_fn*		stat_print,
16275 	enum ha_stat_type	stat_type)
16276 {
16277 	DBUG_ASSERT(hton == innodb_hton_ptr);
16278 
16279 	switch (stat_type) {
16280 	case HA_ENGINE_STATUS:
16281 		/* Non-zero return value means there was an error. */
16282 		return(innodb_show_status(hton, thd, stat_print) != 0);
16283 
16284 	case HA_ENGINE_MUTEX:
16285 		return(innodb_show_latch_status(hton, thd, stat_print) != 0);
16286 
16287 	case HA_ENGINE_LOGS:
16288 		/* Not handled */
16289 		break;
16290 	}
16291 
16292 	/* Success */
16293 	return(false);
16294 }
16295 /*********************************************************************//**
16296 Returns number of THR_LOCK locks used for one instance of InnoDB table.
16297 InnoDB no longer relies on THR_LOCK locks so 0 value is returned.
16298 Instead of THR_LOCK locks InnoDB relies on combination of metadata locks
16299 (e.g. for LOCK TABLES and DDL) and its own locking subsystem.
16300 Note that even though this method returns 0, SQL-layer still calls
16301 ::store_lock(), ::start_stmt() and ::external_lock() methods for InnoDB
16302 tables. */
16303 
16304 uint
lock_count(void) const16305 ha_innobase::lock_count(void) const
16306 /*===============================*/
16307 {
16308 	return 0;
16309 }
16310 
16311 /*****************************************************************//**
16312 Supposed to convert a MySQL table lock stored in the 'lock' field of the
16313 handle to a proper type before storing pointer to the lock into an array
16314 of pointers.
16315 In practice, since InnoDB no longer relies on THR_LOCK locks and its
16316 lock_count() method returns 0 it just informs storage engine about type
16317 of THR_LOCK which SQL-layer would have acquired for this specific statement
16318 on this specific table.
16319 MySQL also calls this if it wants to reset some table locks to a not-locked
16320 state during the processing of an SQL query. An example is that during a
16321 SELECT the read lock is released early on the 'const' tables where we only
16322 fetch one row. MySQL does not call this when it releases all locks at the
16323 end of an SQL statement.
16324 @return pointer to the current element in the 'to' array. */
16325 
16326 THR_LOCK_DATA**
store_lock(THD * thd,THR_LOCK_DATA ** to,thr_lock_type lock_type)16327 ha_innobase::store_lock(
16328 /*====================*/
16329 	THD*			thd,		/*!< in: user thread handle */
16330 	THR_LOCK_DATA**		to,		/*!< in: pointer to the current
16331 						element in an array of pointers
16332 						to lock structs;
16333 						only used as return value */
16334 	thr_lock_type		lock_type)	/*!< in: lock type to store in
16335 						'lock'; this may also be
16336 						TL_IGNORE */
16337 {
16338 	/* Note that trx in this function is NOT necessarily m_prebuilt->trx
16339 	because we call update_thd() later, in ::external_lock()! Failure to
16340 	understand this caused a serious memory corruption bug in 5.1.11. */
16341 
16342 	trx_t*	trx = check_trx_exists(thd);
16343 
16344 	/* NOTE: MySQL can call this function with lock 'type' TL_IGNORE!
16345 	Be careful to ignore TL_IGNORE if we are going to do something with
16346 	only 'real' locks! */
16347 
16348 	/* If no MySQL table is in use, we need to set the isolation level
16349 	of the transaction. */
16350 
16351 	if (lock_type != TL_IGNORE
16352 	    && trx->n_mysql_tables_in_use == 0) {
16353 		trx->isolation_level = innobase_map_isolation_level(
16354 			(enum_tx_isolation) thd_tx_isolation(thd));
16355 
16356 		if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
16357 
16358 			/* At low transaction isolation levels we let
16359 			each consistent read set its own snapshot */
16360 			trx->read_view.close();
16361 		}
16362 	}
16363 
16364 	DBUG_ASSERT(EQ_CURRENT_THD(thd));
16365 	const bool in_lock_tables = thd_in_lock_tables(thd);
16366 	const int sql_command = thd_sql_command(thd);
16367 
16368 	if (srv_read_only_mode
16369 	    && (sql_command == SQLCOM_UPDATE
16370 		|| sql_command == SQLCOM_INSERT
16371 		|| sql_command == SQLCOM_REPLACE
16372 		|| sql_command == SQLCOM_DROP_TABLE
16373 		|| sql_command == SQLCOM_ALTER_TABLE
16374 		|| sql_command == SQLCOM_OPTIMIZE
16375 		|| (sql_command == SQLCOM_CREATE_TABLE
16376 		    && (lock_type >= TL_WRITE_CONCURRENT_INSERT
16377 			 && lock_type <= TL_WRITE))
16378 		|| sql_command == SQLCOM_CREATE_INDEX
16379 		|| sql_command == SQLCOM_DROP_INDEX
16380 		|| sql_command == SQLCOM_CREATE_SEQUENCE
16381 		|| sql_command == SQLCOM_DROP_SEQUENCE
16382 		|| sql_command == SQLCOM_DELETE)) {
16383 
16384 		ib_senderrf(trx->mysql_thd,
16385 			    IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
16386 
16387 	} else if (sql_command == SQLCOM_FLUSH
16388 		   && lock_type == TL_READ_NO_INSERT) {
16389 
16390 		/* Check for FLUSH TABLES ... WITH READ LOCK */
16391 
16392 		/* Note: This call can fail, but there is no way to return
16393 		the error to the caller. We simply ignore it for now here
16394 		and push the error code to the caller where the error is
16395 		detected in the function. */
16396 
16397 		dberr_t	err = row_quiesce_set_state(
16398 			m_prebuilt->table, QUIESCE_START, trx);
16399 
16400 		ut_a(err == DB_SUCCESS || err == DB_UNSUPPORTED);
16401 
16402 		if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
16403 			m_prebuilt->select_lock_type = LOCK_S;
16404 			m_prebuilt->stored_select_lock_type = LOCK_S;
16405 		} else {
16406 			m_prebuilt->select_lock_type = LOCK_NONE;
16407 			m_prebuilt->stored_select_lock_type = LOCK_NONE;
16408 		}
16409 
16410 	/* Check for DROP TABLE */
16411 	} else if (sql_command == SQLCOM_DROP_TABLE ||
16412                    sql_command == SQLCOM_DROP_SEQUENCE) {
16413 
16414 		/* MySQL calls this function in DROP TABLE though this table
16415 		handle may belong to another thd that is running a query. Let
16416 		us in that case skip any changes to the m_prebuilt struct. */
16417 
16418 	/* Check for LOCK TABLE t1,...,tn WITH SHARED LOCKS */
16419 	} else if ((lock_type == TL_READ && in_lock_tables)
16420 		   || (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables)
16421 		   || lock_type == TL_READ_WITH_SHARED_LOCKS
16422 		   || lock_type == TL_READ_NO_INSERT
16423 		   || (lock_type != TL_IGNORE
16424 		       && sql_command != SQLCOM_SELECT)) {
16425 
16426 		/* The OR cases above are in this order:
16427 		1) MySQL is doing LOCK TABLES ... READ LOCAL, or we
16428 		are processing a stored procedure or function, or
16429 		2) (we do not know when TL_READ_HIGH_PRIORITY is used), or
16430 		3) this is a SELECT ... IN SHARE MODE, or
16431 		4) we are doing a complex SQL statement like
16432 		INSERT INTO ... SELECT ... and the logical logging (MySQL
16433 		binlog) requires the use of a locking read, or
16434 		MySQL is doing LOCK TABLES ... READ.
16435 		5) we let InnoDB do locking reads for all SQL statements that
16436 		are not simple SELECTs; note that select_lock_type in this
16437 		case may get strengthened in ::external_lock() to LOCK_X.
16438 		Note that we MUST use a locking read in all data modifying
16439 		SQL statements, because otherwise the execution would not be
16440 		serializable, and also the results from the update could be
16441 		unexpected if an obsolete consistent read view would be
16442 		used. */
16443 
16444 		/* Use consistent read for checksum table */
16445 
16446 		if (sql_command == SQLCOM_CHECKSUM
16447 		    || sql_command == SQLCOM_CREATE_SEQUENCE
16448 		    || (sql_command == SQLCOM_ANALYZE && lock_type == TL_READ)
16449 		    || ((srv_locks_unsafe_for_binlog
16450 			|| trx->isolation_level <= TRX_ISO_READ_COMMITTED)
16451 			&& trx->isolation_level != TRX_ISO_SERIALIZABLE
16452 			&& (lock_type == TL_READ
16453 			    || lock_type == TL_READ_NO_INSERT)
16454 			&& (sql_command == SQLCOM_INSERT_SELECT
16455 			    || sql_command == SQLCOM_REPLACE_SELECT
16456 			    || sql_command == SQLCOM_UPDATE
16457 			    || sql_command == SQLCOM_CREATE_SEQUENCE
16458 			    || sql_command == SQLCOM_CREATE_TABLE))) {
16459 
16460 			/* If we either have innobase_locks_unsafe_for_binlog
16461 			option set or this session is using READ COMMITTED
16462 			isolation level and isolation level of the transaction
16463 			is not set to serializable and MySQL is doing
16464 			INSERT INTO...SELECT or REPLACE INTO...SELECT
16465 			or UPDATE ... = (SELECT ...) or CREATE  ...
16466 			SELECT... without FOR UPDATE or IN SHARE
16467 			MODE in select, then we use consistent read
16468 			for select. */
16469 
16470 			m_prebuilt->select_lock_type = LOCK_NONE;
16471 			m_prebuilt->stored_select_lock_type = LOCK_NONE;
16472 		} else {
16473 			m_prebuilt->select_lock_type = LOCK_S;
16474 			m_prebuilt->stored_select_lock_type = LOCK_S;
16475 		}
16476 
16477 	} else if (lock_type != TL_IGNORE) {
16478 
16479 		/* We set possible LOCK_X value in external_lock, not yet
16480 		here even if this would be SELECT ... FOR UPDATE */
16481 
16482 		m_prebuilt->select_lock_type = LOCK_NONE;
16483 		m_prebuilt->stored_select_lock_type = LOCK_NONE;
16484 	}
16485 
16486 	if (!trx_is_started(trx)
16487 	    && (m_prebuilt->select_lock_type != LOCK_NONE
16488 	        || m_prebuilt->stored_select_lock_type != LOCK_NONE)) {
16489 
16490 		trx->will_lock = true;
16491 	}
16492 
16493 	return(to);
16494 }
16495 
16496 /*********************************************************************//**
16497 Read the next autoinc value. Acquire the relevant locks before reading
16498 the AUTOINC value. If SUCCESS then the table AUTOINC mutex will be locked
16499 on return and all relevant locks acquired.
16500 @return DB_SUCCESS or error code */
16501 
16502 dberr_t
innobase_get_autoinc(ulonglong * value)16503 ha_innobase::innobase_get_autoinc(
16504 /*==============================*/
16505 	ulonglong*	value)		/*!< out: autoinc value */
16506 {
16507 	*value = 0;
16508 
16509 	m_prebuilt->autoinc_error = innobase_lock_autoinc();
16510 
16511 	if (m_prebuilt->autoinc_error == DB_SUCCESS) {
16512 
16513 		/* Determine the first value of the interval */
16514 		*value = dict_table_autoinc_read(m_prebuilt->table);
16515 
16516 		/* It should have been initialized during open. */
16517 		if (*value == 0) {
16518 			m_prebuilt->autoinc_error = DB_UNSUPPORTED;
16519 			m_prebuilt->table->autoinc_mutex.unlock();
16520 		}
16521 	}
16522 
16523 	return(m_prebuilt->autoinc_error);
16524 }
16525 
16526 /*******************************************************************//**
16527 This function reads the global auto-inc counter. It doesn't use the
16528 AUTOINC lock even if the lock mode is set to TRADITIONAL.
16529 @return the autoinc value */
16530 
16531 ulonglong
innobase_peek_autoinc(void)16532 ha_innobase::innobase_peek_autoinc(void)
16533 /*====================================*/
16534 {
16535 	ulonglong	auto_inc;
16536 	dict_table_t*	innodb_table;
16537 
16538 	ut_a(m_prebuilt != NULL);
16539 	ut_a(m_prebuilt->table != NULL);
16540 
16541 	innodb_table = m_prebuilt->table;
16542 
16543 	innodb_table->autoinc_mutex.lock();
16544 
16545 	auto_inc = dict_table_autoinc_read(innodb_table);
16546 
16547 	if (auto_inc == 0) {
16548 		ib::info() << "AUTOINC next value generation is disabled for"
16549 			" '" << innodb_table->name << "'";
16550 	}
16551 
16552 	innodb_table->autoinc_mutex.unlock();
16553 
16554 	return(auto_inc);
16555 }
16556 
16557 /*********************************************************************//**
16558 Returns the value of the auto-inc counter in *first_value and ~0 on failure. */
16559 
16560 void
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)16561 ha_innobase::get_auto_increment(
16562 /*============================*/
16563 	ulonglong	offset,			/*!< in: table autoinc offset */
16564 	ulonglong	increment,		/*!< in: table autoinc
16565 						increment */
16566 	ulonglong	nb_desired_values,	/*!< in: number of values
16567 						reqd */
16568 	ulonglong*	first_value,		/*!< out: the autoinc value */
16569 	ulonglong*	nb_reserved_values)	/*!< out: count of reserved
16570 						values */
16571 {
16572 	trx_t*		trx;
16573 	dberr_t		error;
16574 	ulonglong	autoinc = 0;
16575 
16576 	/* Prepare m_prebuilt->trx in the table handle */
16577 	update_thd(ha_thd());
16578 
16579 	error = innobase_get_autoinc(&autoinc);
16580 
16581 	if (error != DB_SUCCESS) {
16582 		*first_value = (~(ulonglong) 0);
16583 		return;
16584 	}
16585 
16586 	/* This is a hack, since nb_desired_values seems to be accurate only
16587 	for the first call to get_auto_increment() for multi-row INSERT and
16588 	meaningless for other statements e.g, LOAD etc. Subsequent calls to
16589 	this method for the same statement results in different values which
16590 	don't make sense. Therefore we store the value the first time we are
16591 	called and count down from that as rows are written (see write_row()).
16592 	*/
16593 
16594 	trx = m_prebuilt->trx;
16595 
16596 	/* Note: We can't rely on *first_value since some MySQL engines,
16597 	in particular the partition engine, don't initialize it to 0 when
16598 	invoking this method. So we are not sure if it's guaranteed to
16599 	be 0 or not. */
16600 
16601 	/* We need the upper limit of the col type to check for
16602 	whether we update the table autoinc counter or not. */
16603 	ulonglong col_max_value =
16604 			table->next_number_field->get_max_int_value();
16605 
16606 	/** The following logic is needed to avoid duplicate key error
16607 	for autoincrement column.
16608 
16609 	(1) InnoDB gives the current autoincrement value with respect
16610 	to increment and offset value.
16611 
16612 	(2) Basically it does compute_next_insert_id() logic inside InnoDB
16613 	to avoid the current auto increment value changed by handler layer.
16614 
16615 	(3) It is restricted only for insert operations. */
16616 
16617 	if (increment > 1 && thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE
16618 	    && autoinc < col_max_value) {
16619 
16620 		ulonglong prev_auto_inc = autoinc;
16621 
16622 		autoinc = ((autoinc - 1) + increment - offset)/ increment;
16623 
16624 		autoinc = autoinc * increment + offset;
16625 
16626 		/* If autoinc exceeds the col_max_value then reset
16627 		to old autoinc value. Because in case of non-strict
16628 		sql mode, boundary value is not considered as error. */
16629 
16630 		if (autoinc >= col_max_value) {
16631 			autoinc = prev_auto_inc;
16632 		}
16633 
16634 		ut_ad(autoinc > 0);
16635 	}
16636 
16637 	/* Called for the first time ? */
16638 	if (trx->n_autoinc_rows == 0) {
16639 
16640 		trx->n_autoinc_rows = (ulint) nb_desired_values;
16641 
16642 		/* It's possible for nb_desired_values to be 0:
16643 		e.g., INSERT INTO T1(C) SELECT C FROM T2; */
16644 		if (nb_desired_values == 0) {
16645 
16646 			trx->n_autoinc_rows = 1;
16647 		}
16648 
16649 		set_if_bigger(*first_value, autoinc);
16650 	/* Not in the middle of a mult-row INSERT. */
16651 	} else if (m_prebuilt->autoinc_last_value == 0) {
16652 		set_if_bigger(*first_value, autoinc);
16653 	}
16654 
16655 	if (*first_value > col_max_value) {
16656 		/* Out of range number. Let handler::update_auto_increment()
16657 		take care of this */
16658 		m_prebuilt->autoinc_last_value = 0;
16659 		m_prebuilt->table->autoinc_mutex.unlock();
16660 		*nb_reserved_values= 0;
16661 		return;
16662 	}
16663 
16664 	*nb_reserved_values = trx->n_autoinc_rows;
16665 
16666 	/* With old style AUTOINC locking we only update the table's
16667 	AUTOINC counter after attempting to insert the row. */
16668 	if (innobase_autoinc_lock_mode != AUTOINC_OLD_STYLE_LOCKING) {
16669 		ulonglong	current;
16670 		ulonglong	next_value;
16671 
16672 		current = *first_value;
16673 
16674 		/* Compute the last value in the interval */
16675 		next_value = innobase_next_autoinc(
16676 			current, *nb_reserved_values, increment, offset,
16677 			col_max_value);
16678 
16679 		m_prebuilt->autoinc_last_value = next_value;
16680 
16681 		if (m_prebuilt->autoinc_last_value < *first_value) {
16682 			*first_value = (~(ulonglong) 0);
16683 		} else {
16684 			/* Update the table autoinc variable */
16685 			dict_table_autoinc_update_if_greater(
16686 				m_prebuilt->table,
16687 				m_prebuilt->autoinc_last_value);
16688 		}
16689 	} else {
16690 		/* This will force write_row() into attempting an update
16691 		of the table's AUTOINC counter. */
16692 		m_prebuilt->autoinc_last_value = 0;
16693 	}
16694 
16695 	/* The increment to be used to increase the AUTOINC value, we use
16696 	this in write_row() and update_row() to increase the autoinc counter
16697 	for columns that are filled by the user. We need the offset and
16698 	the increment. */
16699 	m_prebuilt->autoinc_offset = offset;
16700 	m_prebuilt->autoinc_increment = increment;
16701 
16702 	m_prebuilt->table->autoinc_mutex.unlock();
16703 }
16704 
16705 /*******************************************************************//**
16706 See comment in handler.cc */
16707 
16708 bool
get_error_message(int error,String * buf)16709 ha_innobase::get_error_message(
16710 /*===========================*/
16711 	int	error,
16712 	String*	buf)
16713 {
16714 	trx_t*	trx = check_trx_exists(ha_thd());
16715 
16716 	if (error == HA_ERR_DECRYPTION_FAILED) {
16717 		const char *msg = "Table encrypted but decryption failed. This could be because correct encryption management plugin is not loaded, used encryption key is not available or encryption method does not match.";
16718 		buf->copy(msg, (uint)strlen(msg), system_charset_info);
16719 	} else {
16720 		buf->copy(trx->detailed_error, (uint) strlen(trx->detailed_error),
16721 			system_charset_info);
16722 	}
16723 
16724 	return(FALSE);
16725 }
16726 
16727 /** Retrieves the names of the table and the key for which there was a
16728 duplicate entry in the case of HA_ERR_FOREIGN_DUPLICATE_KEY.
16729 
16730 If any of the names is not available, then this method will return
16731 false and will not change any of child_table_name or child_key_name.
16732 
16733 @param[out] child_table_name Table name
16734 @param[in] child_table_name_len Table name buffer size
16735 @param[out] child_key_name Key name
16736 @param[in] child_key_name_len Key name buffer size
16737 
16738 @retval true table and key names were available and were written into the
16739 corresponding out parameters.
16740 @retval false table and key names were not available, the out parameters
16741 were not touched. */
16742 bool
get_foreign_dup_key(char * child_table_name,uint child_table_name_len,char * child_key_name,uint child_key_name_len)16743 ha_innobase::get_foreign_dup_key(
16744 /*=============================*/
16745 	char*	child_table_name,
16746 	uint	child_table_name_len,
16747 	char*	child_key_name,
16748 	uint	child_key_name_len)
16749 {
16750 	const dict_index_t*	err_index;
16751 
16752 	ut_a(m_prebuilt->trx != NULL);
16753 	ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
16754 
16755 	err_index = trx_get_error_info(m_prebuilt->trx);
16756 
16757 	if (err_index == NULL) {
16758 		return(false);
16759 	}
16760 	/* else */
16761 
16762 	/* copy table name (and convert from filename-safe encoding to
16763 	system_charset_info) */
16764 	char*	p = strchr(err_index->table->name.m_name, '/');
16765 
16766 	/* strip ".../" prefix if any */
16767 	if (p != NULL) {
16768 		p++;
16769 	} else {
16770 		p = err_index->table->name.m_name;
16771 	}
16772 
16773 	size_t	len;
16774 
16775 	len = filename_to_tablename(p, child_table_name, child_table_name_len);
16776 
16777 	child_table_name[len] = '\0';
16778 
16779 	/* copy index name */
16780 	snprintf(child_key_name, child_key_name_len, "%s",
16781 		    err_index->name());
16782 
16783 	return(true);
16784 }
16785 
16786 /*******************************************************************//**
16787 Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
16788 If there is no explicitly declared non-null unique key or a primary key, then
16789 InnoDB internally uses the row id as the primary key.
16790 @return < 0 if ref1 < ref2, 0 if equal, else > 0 */
16791 
16792 int
cmp_ref(const uchar * ref1,const uchar * ref2)16793 ha_innobase::cmp_ref(
16794 /*=================*/
16795 	const uchar*	ref1,	/*!< in: an (internal) primary key value in the
16796 				MySQL key value format */
16797 	const uchar*	ref2)	/*!< in: an (internal) primary key value in the
16798 				MySQL key value format */
16799 {
16800 	enum_field_types mysql_type;
16801 	Field*		field;
16802 	KEY_PART_INFO*	key_part;
16803 	KEY_PART_INFO*	key_part_end;
16804 	uint		len1;
16805 	uint		len2;
16806 	int		result;
16807 
16808 	if (m_prebuilt->clust_index_was_generated) {
16809 		/* The 'ref' is an InnoDB row id */
16810 
16811 		return(memcmp(ref1, ref2, DATA_ROW_ID_LEN));
16812 	}
16813 
16814 	/* Do a type-aware comparison of primary key fields. PK fields
16815 	are always NOT NULL, so no checks for NULL are performed. */
16816 
16817 	key_part = table->key_info[table->s->primary_key].key_part;
16818 
16819 	key_part_end = key_part
16820 		+ table->key_info[table->s->primary_key].user_defined_key_parts;
16821 
16822 	for (; key_part != key_part_end; ++key_part) {
16823 		field = key_part->field;
16824 		mysql_type = field->type();
16825 
16826 		if (mysql_type == MYSQL_TYPE_TINY_BLOB
16827 			|| mysql_type == MYSQL_TYPE_MEDIUM_BLOB
16828 			|| mysql_type == MYSQL_TYPE_BLOB
16829 			|| mysql_type == MYSQL_TYPE_LONG_BLOB) {
16830 
16831 			/* In the MySQL key value format, a column prefix of
16832 			a BLOB is preceded by a 2-byte length field */
16833 
16834 			len1 = innobase_read_from_2_little_endian(ref1);
16835 			len2 = innobase_read_from_2_little_endian(ref2);
16836 
16837 			result = ((Field_blob*) field)->cmp(
16838 				ref1 + 2, len1, ref2 + 2, len2);
16839 		} else {
16840 			result = field->key_cmp(ref1, ref2);
16841 		}
16842 
16843 		if (result) {
16844 
16845 			return(result);
16846 		}
16847 
16848 		ref1 += key_part->store_length;
16849 		ref2 += key_part->store_length;
16850 	}
16851 
16852 	return(0);
16853 }
16854 
16855 /*******************************************************************//**
16856 Ask InnoDB if a query to a table can be cached.
16857 @return TRUE if query caching of the table is permitted */
16858 
16859 my_bool
register_query_cache_table(THD * thd,const char * table_key,uint key_length,qc_engine_callback * call_back,ulonglong * engine_data)16860 ha_innobase::register_query_cache_table(
16861 /*====================================*/
16862 	THD*		thd,		/*!< in: user thread handle */
16863 	const char*	table_key,	/*!< in: normalized path to the
16864 					table */
16865 	uint		key_length,	/*!< in: length of the normalized
16866 					path to the table */
16867 	qc_engine_callback*
16868 			call_back,	/*!< out: pointer to function for
16869 					checking if query caching
16870 					is permitted */
16871 	ulonglong	*engine_data)	/*!< in/out: data to call_back */
16872 {
16873 	*engine_data = 0;
16874 	*call_back = innobase_query_caching_of_table_permitted;
16875 
16876 	return(innobase_query_caching_of_table_permitted(
16877 			thd, table_key,
16878 			static_cast<uint>(key_length),
16879 			engine_data));
16880 }
16881 
16882 /******************************************************************//**
16883 This function is used to find the storage length in bytes of the first n
16884 characters for prefix indexes using a multibyte character set. The function
16885 finds charset information and returns length of prefix_len characters in the
16886 index field in bytes.
16887 @return number of bytes occupied by the first n characters */
16888 ulint
innobase_get_at_most_n_mbchars(ulint charset_id,ulint prefix_len,ulint data_len,const char * str)16889 innobase_get_at_most_n_mbchars(
16890 /*===========================*/
16891 	ulint charset_id,	/*!< in: character set id */
16892 	ulint prefix_len,	/*!< in: prefix length in bytes of the index
16893 				(this has to be divided by mbmaxlen to get the
16894 				number of CHARACTERS n in the prefix) */
16895 	ulint data_len,		/*!< in: length of the string in bytes */
16896 	const char* str)	/*!< in: character string */
16897 {
16898 	ulint char_length;	/*!< character length in bytes */
16899 	ulint n_chars;		/*!< number of characters in prefix */
16900 	CHARSET_INFO* charset;	/*!< charset used in the field */
16901 
16902 	charset = get_charset((uint) charset_id, MYF(MY_WME));
16903 
16904 	ut_ad(charset);
16905 	ut_ad(charset->mbmaxlen);
16906 
16907 	/* Calculate how many characters at most the prefix index contains */
16908 
16909 	n_chars = prefix_len / charset->mbmaxlen;
16910 
16911 	/* If the charset is multi-byte, then we must find the length of the
16912 	first at most n chars in the string. If the string contains less
16913 	characters than n, then we return the length to the end of the last
16914 	character. */
16915 
16916 	if (charset->mbmaxlen > 1) {
16917 		/* my_charpos() returns the byte length of the first n_chars
16918 		characters, or a value bigger than the length of str, if
16919 		there were not enough full characters in str.
16920 
16921 		Why does the code below work:
16922 		Suppose that we are looking for n UTF-8 characters.
16923 
16924 		1) If the string is long enough, then the prefix contains at
16925 		least n complete UTF-8 characters + maybe some extra
16926 		characters + an incomplete UTF-8 character. No problem in
16927 		this case. The function returns the pointer to the
16928 		end of the nth character.
16929 
16930 		2) If the string is not long enough, then the string contains
16931 		the complete value of a column, that is, only complete UTF-8
16932 		characters, and we can store in the column prefix index the
16933 		whole string. */
16934 
16935 		char_length= my_charpos(charset, str, str + data_len, n_chars);
16936 		if (char_length > data_len) {
16937 			char_length = data_len;
16938 		}
16939 	} else if (data_len < prefix_len) {
16940 
16941 		char_length = data_len;
16942 
16943 	} else {
16944 
16945 		char_length = prefix_len;
16946 	}
16947 
16948 	return(char_length);
16949 }
16950 
16951 /*******************************************************************//**
16952 This function is used to prepare an X/Open XA distributed transaction.
16953 @return 0 or error number */
16954 static
16955 int
innobase_xa_prepare(handlerton * hton,THD * thd,bool prepare_trx)16956 innobase_xa_prepare(
16957 /*================*/
16958 	handlerton*	hton,		/*!< in: InnoDB handlerton */
16959 	THD*		thd,		/*!< in: handle to the MySQL thread of
16960 					the user whose XA transaction should
16961 					be prepared */
16962 	bool		prepare_trx)	/*!< in: true - prepare transaction
16963 					false - the current SQL statement
16964 					ended */
16965 {
16966 	trx_t*		trx = check_trx_exists(thd);
16967 
16968 	DBUG_ASSERT(hton == innodb_hton_ptr);
16969 
16970 	thd_get_xid(thd, (MYSQL_XID*) trx->xid);
16971 
16972 	innobase_srv_conc_force_exit_innodb(trx);
16973 
16974 	if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
16975 
16976 		sql_print_error("Transaction not registered for MariaDB 2PC,"
16977 				" but transaction is active");
16978 	}
16979 
16980 	if (prepare_trx
16981 	    || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
16982 
16983 		/* We were instructed to prepare the whole transaction, or
16984 		this is an SQL statement end and autocommit is on */
16985 
16986 		ut_ad(trx_is_registered_for_2pc(trx));
16987 
16988 		trx_prepare_for_mysql(trx);
16989 	} else {
16990 		/* We just mark the SQL statement ended and do not do a
16991 		transaction prepare */
16992 
16993 		/* If we had reserved the auto-inc lock for some
16994 		table in this SQL statement we release it now */
16995 
16996 		lock_unlock_table_autoinc(trx);
16997 
16998 		/* Store the current undo_no of the transaction so that we
16999 		know where to roll back if we have to roll back the next
17000 		SQL statement */
17001 
17002 		trx_mark_sql_stat_end(trx);
17003 	}
17004 
17005 	if (thd_sql_command(thd) != SQLCOM_XA_PREPARE
17006 	    && (prepare_trx
17007 		|| !thd_test_options(
17008 			thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
17009 
17010 		/* For mysqlbackup to work the order of transactions in binlog
17011 		and InnoDB must be the same. Consider the situation
17012 
17013 		  thread1> prepare; write to binlog; ...
17014 			  <context switch>
17015 		  thread2> prepare; write to binlog; commit
17016 		  thread1>			     ... commit
17017 
17018 		The server guarantees that writes to the binary log
17019 		and commits are in the same order, so we do not have
17020 		to handle this case. */
17021 	}
17022 
17023 	return(0);
17024 }
17025 
17026 /*******************************************************************//**
17027 This function is used to recover X/Open XA distributed transactions.
17028 @return number of prepared transactions stored in xid_list */
17029 static
17030 int
innobase_xa_recover(handlerton * hton,XID * xid_list,uint len)17031 innobase_xa_recover(
17032 /*================*/
17033 	handlerton*	hton,	/*!< in: InnoDB handlerton */
17034 	XID*		xid_list,/*!< in/out: prepared transactions */
17035 	uint		len)	/*!< in: number of slots in xid_list */
17036 {
17037 	DBUG_ASSERT(hton == innodb_hton_ptr);
17038 
17039 	if (len == 0 || xid_list == NULL) {
17040 
17041 		return(0);
17042 	}
17043 
17044 	return(trx_recover_for_mysql(xid_list, len));
17045 }
17046 
17047 /*******************************************************************//**
17048 This function is used to commit one X/Open XA distributed transaction
17049 which is in the prepared state
17050 @return 0 or error number */
17051 static
17052 int
innobase_commit_by_xid(handlerton * hton,XID * xid)17053 innobase_commit_by_xid(
17054 /*===================*/
17055 	handlerton*	hton,
17056 	XID*		xid)	/*!< in: X/Open XA transaction identification */
17057 {
17058 	DBUG_ASSERT(hton == innodb_hton_ptr);
17059 
17060 	if (high_level_read_only) {
17061 		return(XAER_RMFAIL);
17062 	}
17063 
17064 	if (trx_t* trx = trx_get_trx_by_xid(xid)) {
17065 		/* use cases are: disconnected xa, slave xa, recovery */
17066 		innobase_commit_low(trx);
17067 		ut_ad(trx->mysql_thd == NULL);
17068 		trx_deregister_from_2pc(trx);
17069 		ut_ad(!trx->will_lock);    /* trx cache requirement */
17070 		trx->free();
17071 
17072 		return(XA_OK);
17073 	} else {
17074 		return(XAER_NOTA);
17075 	}
17076 }
17077 
17078 /** This function is used to rollback one X/Open XA distributed transaction
17079 which is in the prepared state
17080 
17081 @param[in] hton InnoDB handlerton
17082 @param[in] xid X/Open XA transaction identification
17083 
17084 @return 0 or error number */
innobase_rollback_by_xid(handlerton * hton,XID * xid)17085 int innobase_rollback_by_xid(handlerton* hton, XID* xid)
17086 {
17087 	DBUG_ASSERT(hton == innodb_hton_ptr);
17088 
17089 	if (high_level_read_only) {
17090 		return(XAER_RMFAIL);
17091 	}
17092 
17093 	if (trx_t* trx = trx_get_trx_by_xid(xid)) {
17094 #ifdef WITH_WSREP
17095 		/* If a wsrep transaction is being rolled back during
17096 		the recovery, we must clear the xid in order to avoid
17097 		writing serialisation history for rolled back transaction. */
17098 		if (wsrep_is_wsrep_xid(trx->xid)) {
17099 			trx->xid->null();
17100 		}
17101 #endif /* WITH_WSREP */
17102 		int ret = innobase_rollback_trx(trx);
17103 		trx_deregister_from_2pc(trx);
17104 		ut_ad(!trx->will_lock);
17105 		trx->free();
17106 
17107 		return(ret);
17108 	} else {
17109 		return(XAER_NOTA);
17110 	}
17111 }
17112 
17113 bool
check_if_incompatible_data(HA_CREATE_INFO * info,uint table_changes)17114 ha_innobase::check_if_incompatible_data(
17115 /*====================================*/
17116 	HA_CREATE_INFO*	info,
17117 	uint		table_changes)
17118 {
17119 	ha_table_option_struct *param_old, *param_new;
17120 
17121 	/* Cache engine specific options */
17122 	param_new = info->option_struct;
17123 	param_old = table->s->option_struct;
17124 
17125 	innobase_copy_frm_flags_from_create_info(m_prebuilt->table, info);
17126 
17127 	if (table_changes != IS_EQUAL_YES) {
17128 
17129 		return(COMPATIBLE_DATA_NO);
17130 	}
17131 
17132 	/* Check that auto_increment value was not changed */
17133 	if ((info->used_fields & HA_CREATE_USED_AUTO)
17134 	    && info->auto_increment_value != 0) {
17135 
17136 		return(COMPATIBLE_DATA_NO);
17137 	}
17138 
17139 	/* Check that row format didn't change */
17140 	if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT)
17141 	    && info->row_type != get_row_type()) {
17142 
17143 		return(COMPATIBLE_DATA_NO);
17144 	}
17145 
17146 	/* Specifying KEY_BLOCK_SIZE requests a rebuild of the table. */
17147 	if (info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE) {
17148 		return(COMPATIBLE_DATA_NO);
17149 	}
17150 
17151 	/* Changes on engine specific table options requests a rebuild of the table. */
17152 	if (param_new->page_compressed != param_old->page_compressed ||
17153 	    param_new->page_compression_level != param_old->page_compression_level)
17154         {
17155 		return(COMPATIBLE_DATA_NO);
17156 	}
17157 
17158 	return(COMPATIBLE_DATA_YES);
17159 }
17160 
17161 /****************************************************************//**
17162 Update the system variable innodb_io_capacity_max using the "saved"
17163 value. This function is registered as a callback with MySQL. */
17164 static
17165 void
innodb_io_capacity_max_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17166 innodb_io_capacity_max_update(
17167 /*===========================*/
17168 	THD*				thd,	/*!< in: thread handle */
17169 	st_mysql_sys_var*, void*,
17170 	const void*			save)	/*!< in: immediate result
17171 						from check function */
17172 {
17173 	ulong	in_val = *static_cast<const ulong*>(save);
17174 
17175 	if (in_val < srv_io_capacity) {
17176 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17177 				    ER_WRONG_ARGUMENTS,
17178 				    "Setting innodb_io_capacity_max %lu"
17179 			" lower than innodb_io_capacity %lu.",
17180 			in_val, srv_io_capacity);
17181 
17182 		srv_io_capacity = in_val;
17183 
17184 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17185 					    ER_WRONG_ARGUMENTS,
17186 				    "Setting innodb_io_capacity to %lu",
17187 				    srv_io_capacity);
17188 	}
17189 
17190 	srv_max_io_capacity = in_val;
17191 }
17192 
17193 /****************************************************************//**
17194 Update the system variable innodb_io_capacity using the "saved"
17195 value. This function is registered as a callback with MySQL. */
17196 static
17197 void
innodb_io_capacity_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17198 innodb_io_capacity_update(
17199 /*======================*/
17200 	THD*				thd,	/*!< in: thread handle */
17201 	st_mysql_sys_var*, void*,
17202 	const void*			save)	/*!< in: immediate result
17203 						from check function */
17204 {
17205 	ulong	in_val = *static_cast<const ulong*>(save);
17206 
17207 	if (in_val > srv_max_io_capacity) {
17208 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17209 				    ER_WRONG_ARGUMENTS,
17210 				    "Setting innodb_io_capacity to %lu"
17211 				    " higher than innodb_io_capacity_max %lu",
17212 				    in_val, srv_max_io_capacity);
17213 
17214 		srv_max_io_capacity = (in_val & ~(~0UL >> 1))
17215 			? in_val : in_val * 2;
17216 
17217 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17218 				    ER_WRONG_ARGUMENTS,
17219 				    "Setting innodb_max_io_capacity to %lu",
17220 				    srv_max_io_capacity);
17221 	}
17222 
17223 	srv_io_capacity = in_val;
17224 }
17225 
17226 /****************************************************************//**
17227 Update the system variable innodb_max_dirty_pages_pct using the "saved"
17228 value. This function is registered as a callback with MySQL. */
17229 static
17230 void
innodb_max_dirty_pages_pct_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17231 innodb_max_dirty_pages_pct_update(
17232 /*==============================*/
17233 	THD*				thd,	/*!< in: thread handle */
17234 	st_mysql_sys_var*, void*,
17235 	const void*			save)	/*!< in: immediate result
17236 						from check function */
17237 {
17238 	double	in_val = *static_cast<const double*>(save);
17239 	if (in_val < srv_max_dirty_pages_pct_lwm) {
17240 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17241 				    ER_WRONG_ARGUMENTS,
17242 				    "innodb_max_dirty_pages_pct cannot be"
17243 				    " set lower than"
17244 				    " innodb_max_dirty_pages_pct_lwm.");
17245 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17246 				    ER_WRONG_ARGUMENTS,
17247 				    "Lowering"
17248 				    " innodb_max_dirty_page_pct_lwm to %lf",
17249 				    in_val);
17250 
17251 		srv_max_dirty_pages_pct_lwm = in_val;
17252 	}
17253 
17254 	srv_max_buf_pool_modified_pct = in_val;
17255 }
17256 
17257 /****************************************************************//**
17258 Update the system variable innodb_max_dirty_pages_pct_lwm using the
17259 "saved" value. This function is registered as a callback with MySQL. */
17260 static
17261 void
innodb_max_dirty_pages_pct_lwm_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17262 innodb_max_dirty_pages_pct_lwm_update(
17263 /*==================================*/
17264 	THD*				thd,	/*!< in: thread handle */
17265 	st_mysql_sys_var*, void*,
17266 	const void*			save)	/*!< in: immediate result
17267 						from check function */
17268 {
17269 	double	in_val = *static_cast<const double*>(save);
17270 	if (in_val > srv_max_buf_pool_modified_pct) {
17271 		in_val = srv_max_buf_pool_modified_pct;
17272 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17273 				    ER_WRONG_ARGUMENTS,
17274 				    "innodb_max_dirty_pages_pct_lwm"
17275 				    " cannot be set higher than"
17276 				    " innodb_max_dirty_pages_pct.");
17277 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
17278 				    ER_WRONG_ARGUMENTS,
17279 				    "Setting innodb_max_dirty_page_pct_lwm"
17280 				    " to %lf",
17281 				    in_val);
17282 	}
17283 
17284 	srv_max_dirty_pages_pct_lwm = in_val;
17285 }
17286 
17287 /*************************************************************//**
17288 Don't allow to set innodb_fast_shutdown=0 if purge threads are
17289 already down.
17290 @return 0 if innodb_fast_shutdown can be set */
17291 static
17292 int
fast_shutdown_validate(THD * thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)17293 fast_shutdown_validate(
17294 /*=============================*/
17295 	THD*				thd,	/*!< in: thread handle */
17296 	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
17297 						variable */
17298 	void*				save,	/*!< out: immediate result
17299 						for update function */
17300 	struct st_mysql_value*		value)	/*!< in: incoming string */
17301 {
17302 	if (check_sysvar_int(thd, var, save, value)) {
17303 		return(1);
17304 	}
17305 
17306 	uint new_val = *reinterpret_cast<uint*>(save);
17307 
17308 	if (srv_fast_shutdown && !new_val
17309 	    && !srv_running.load(std::memory_order_relaxed)) {
17310 		return(1);
17311 	}
17312 
17313 	return(0);
17314 }
17315 
17316 /*************************************************************//**
17317 Check whether valid argument given to innobase_*_stopword_table.
17318 This function is registered as a callback with MySQL.
17319 @return 0 for valid stopword table */
17320 static
17321 int
innodb_stopword_table_validate(THD * thd,st_mysql_sys_var *,void * save,struct st_mysql_value * value)17322 innodb_stopword_table_validate(
17323 /*===========================*/
17324 	THD*				thd,	/*!< in: thread handle */
17325 	st_mysql_sys_var*,
17326 	void*				save,	/*!< out: immediate result
17327 						for update function */
17328 	struct st_mysql_value*		value)	/*!< in: incoming string */
17329 {
17330 	const char*	stopword_table_name;
17331 	char		buff[STRING_BUFFER_USUAL_SIZE];
17332 	int		len = sizeof(buff);
17333 	trx_t*		trx;
17334 
17335 	ut_a(save != NULL);
17336 	ut_a(value != NULL);
17337 
17338 	stopword_table_name = value->val_str(value, buff, &len);
17339 
17340 	trx = check_trx_exists(thd);
17341 
17342 	row_mysql_lock_data_dictionary(trx);
17343 
17344 	/* Validate the stopword table's (if supplied) existence and
17345 	of the right format */
17346 	int ret = stopword_table_name && !fts_valid_stopword_table(
17347 		stopword_table_name);
17348 
17349 	row_mysql_unlock_data_dictionary(trx);
17350 
17351 	if (!ret) {
17352 		if (stopword_table_name == buff) {
17353 			ut_ad(static_cast<size_t>(len) < sizeof buff);
17354 			stopword_table_name = thd_strmake(thd,
17355 							  stopword_table_name,
17356 							  len);
17357 		}
17358 
17359 		*static_cast<const char**>(save) = stopword_table_name;
17360 	}
17361 
17362 	return(ret);
17363 }
17364 
17365 /** Update the system variable innodb_buffer_pool_size using the "saved"
17366 value. This function is registered as a callback with MySQL.
17367 @param[in]	save	immediate result from check function */
17368 static
17369 void
innodb_buffer_pool_size_update(THD *,st_mysql_sys_var *,void *,const void * save)17370 innodb_buffer_pool_size_update(THD*,st_mysql_sys_var*,void*, const void* save)
17371 {
17372         longlong	in_val = *static_cast<const longlong*>(save);
17373 
17374 	snprintf(export_vars.innodb_buffer_pool_resize_status,
17375 	        sizeof(export_vars.innodb_buffer_pool_resize_status),
17376 		"Requested to resize buffer pool.");
17377 
17378 	os_event_set(srv_buf_resize_event);
17379 
17380 	ib::info() << export_vars.innodb_buffer_pool_resize_status
17381 		<< " (new size: " << in_val << " bytes)";
17382 }
17383 
17384 /** The latest assigned innodb_ft_aux_table name */
17385 static char* innodb_ft_aux_table;
17386 
17387 /** Update innodb_ft_aux_table_id on SET GLOBAL innodb_ft_aux_table.
17388 @param[in,out]	thd	connection
17389 @param[out]	save	new value of innodb_ft_aux_table
17390 @param[in]	value	user-specified value */
innodb_ft_aux_table_validate(THD * thd,st_mysql_sys_var *,void * save,st_mysql_value * value)17391 static int innodb_ft_aux_table_validate(THD *thd, st_mysql_sys_var*,
17392 					void* save, st_mysql_value* value)
17393 {
17394 	char buf[STRING_BUFFER_USUAL_SIZE];
17395 	int len = sizeof buf;
17396 
17397 	if (const char* table_name = value->val_str(value, buf, &len)) {
17398 		if (dict_table_t* table = dict_table_open_on_name(
17399 			    table_name, FALSE, TRUE, DICT_ERR_IGNORE_NONE)) {
17400 			const table_id_t id = dict_table_has_fts_index(table)
17401 				? table->id : 0;
17402 			dict_table_close(table, FALSE, FALSE);
17403 			if (id) {
17404 				innodb_ft_aux_table_id = id;
17405 				if (table_name == buf) {
17406 					ut_ad(static_cast<size_t>(len)
17407 					      < sizeof buf);
17408 					table_name = thd_strmake(thd,
17409 								 table_name,
17410 								 len);
17411 				}
17412 
17413 
17414 				*static_cast<const char**>(save) = table_name;
17415 				return 0;
17416 			}
17417 		}
17418 
17419 		return 1;
17420 	} else {
17421 		*static_cast<char**>(save) = NULL;
17422 		innodb_ft_aux_table_id = 0;
17423 		return 0;
17424 	}
17425 }
17426 
17427 #ifdef BTR_CUR_HASH_ADAPT
17428 /****************************************************************//**
17429 Update the system variable innodb_adaptive_hash_index using the "saved"
17430 value. This function is registered as a callback with MySQL. */
17431 static
17432 void
innodb_adaptive_hash_index_update(THD *,st_mysql_sys_var *,void *,const void * save)17433 innodb_adaptive_hash_index_update(THD*, st_mysql_sys_var*, void*,
17434 				  const void* save)
17435 {
17436 	mysql_mutex_unlock(&LOCK_global_system_variables);
17437 	if (*(my_bool*) save) {
17438 		btr_search_enable();
17439 	} else {
17440 		btr_search_disable();
17441 	}
17442 	mysql_mutex_lock(&LOCK_global_system_variables);
17443 }
17444 #endif /* BTR_CUR_HASH_ADAPT */
17445 
17446 /****************************************************************//**
17447 Update the system variable innodb_cmp_per_index using the "saved"
17448 value. This function is registered as a callback with MySQL. */
17449 static
17450 void
innodb_cmp_per_index_update(THD *,st_mysql_sys_var *,void *,const void * save)17451 innodb_cmp_per_index_update(THD*, st_mysql_sys_var*, void*, const void* save)
17452 {
17453 	/* Reset the stats whenever we enable the table
17454 	INFORMATION_SCHEMA.innodb_cmp_per_index. */
17455 	if (!srv_cmp_per_index_enabled && *(my_bool*) save) {
17456 		mysql_mutex_unlock(&LOCK_global_system_variables);
17457 		page_zip_reset_stat_per_index();
17458 		mysql_mutex_lock(&LOCK_global_system_variables);
17459 	}
17460 
17461 	srv_cmp_per_index_enabled = !!(*(my_bool*) save);
17462 }
17463 
17464 /****************************************************************//**
17465 Update the system variable innodb_old_blocks_pct using the "saved"
17466 value. This function is registered as a callback with MySQL. */
17467 static
17468 void
innodb_old_blocks_pct_update(THD *,st_mysql_sys_var *,void *,const void * save)17469 innodb_old_blocks_pct_update(THD*, st_mysql_sys_var*, void*, const void* save)
17470 {
17471 	mysql_mutex_unlock(&LOCK_global_system_variables);
17472 	uint ratio = buf_LRU_old_ratio_update(*static_cast<const uint*>(save),
17473 					      true);
17474 	mysql_mutex_lock(&LOCK_global_system_variables);
17475 	innobase_old_blocks_pct = ratio;
17476 }
17477 
17478 /****************************************************************//**
17479 Update the system variable innodb_old_blocks_pct using the "saved"
17480 value. This function is registered as a callback with MySQL. */
17481 static
17482 void
innodb_change_buffer_max_size_update(THD *,st_mysql_sys_var *,void *,const void * save)17483 innodb_change_buffer_max_size_update(THD*, st_mysql_sys_var*, void*,
17484 				     const void* save)
17485 {
17486 	srv_change_buffer_max_size = *static_cast<const uint*>(save);
17487 	mysql_mutex_unlock(&LOCK_global_system_variables);
17488 	ibuf_max_size_update(srv_change_buffer_max_size);
17489 	mysql_mutex_lock(&LOCK_global_system_variables);
17490 }
17491 
17492 #ifdef UNIV_DEBUG
17493 static ulong srv_fil_make_page_dirty_debug = 0;
17494 static ulong srv_saved_page_number_debug = 0;
17495 
17496 /****************************************************************//**
17497 Save an InnoDB page number. */
17498 static
17499 void
innodb_save_page_no(THD *,st_mysql_sys_var *,void *,const void * save)17500 innodb_save_page_no(THD*, st_mysql_sys_var*, void*, const void* save)
17501 {
17502 	srv_saved_page_number_debug = *static_cast<const ulong*>(save);
17503 
17504 	ib::info() << "Saving InnoDB page number: "
17505 		<< srv_saved_page_number_debug;
17506 }
17507 
17508 /****************************************************************//**
17509 Make the first page of given user tablespace dirty. */
17510 static
17511 void
innodb_make_page_dirty(THD *,st_mysql_sys_var *,void *,const void * save)17512 innodb_make_page_dirty(THD*, st_mysql_sys_var*, void*, const void* save)
17513 {
17514 	mtr_t		mtr;
17515 	ulong		space_id = *static_cast<const ulong*>(save);
17516 	mysql_mutex_unlock(&LOCK_global_system_variables);
17517 	fil_space_t*	space = fil_space_acquire_silent(space_id);
17518 
17519 	if (space == NULL) {
17520 func_exit_no_space:
17521 		mysql_mutex_lock(&LOCK_global_system_variables);
17522 		return;
17523 	}
17524 
17525 	if (srv_saved_page_number_debug >= space->size) {
17526 func_exit:
17527 		space->release();
17528 		goto func_exit_no_space;
17529 	}
17530 
17531 	mtr.start();
17532 	mtr.set_named_space(space);
17533 
17534 	buf_block_t*	block = buf_page_get(
17535 		page_id_t(space_id, srv_saved_page_number_debug),
17536 		space->zip_size(), RW_X_LATCH, &mtr);
17537 
17538 	if (block != NULL) {
17539 		byte*	page = block->frame;
17540 
17541 		ib::info() << "Dirtying page: " << page_id_t(
17542 			page_get_space_id(page), page_get_page_no(page));
17543 
17544 		mlog_write_ulint(page + FIL_PAGE_TYPE,
17545 				 fil_page_get_type(page),
17546 				 MLOG_2BYTES, &mtr);
17547 	}
17548 	mtr.commit();
17549 	goto func_exit;
17550 }
17551 #endif // UNIV_DEBUG
17552 /*************************************************************//**
17553 Just emit a warning that the usage of the variable is deprecated.
17554 @return 0 */
17555 static
17556 void
innodb_stats_sample_pages_update(THD * thd,st_mysql_sys_var *,void *,const void * save)17557 innodb_stats_sample_pages_update(
17558 /*=============================*/
17559 	THD*				thd,	/*!< in: thread handle */
17560 	st_mysql_sys_var*, void*,
17561 	const void*			save)	/*!< in: immediate result
17562 						from check function */
17563 {
17564 
17565 	const char*	STATS_SAMPLE_PAGES_DEPRECATED_MSG =
17566 		"Using innodb_stats_sample_pages is deprecated and"
17567 		" the variable may be removed in future releases."
17568 		" Please use innodb_stats_transient_sample_pages instead.";
17569 
17570 	push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
17571 		     HA_ERR_WRONG_COMMAND, STATS_SAMPLE_PAGES_DEPRECATED_MSG);
17572 
17573 	ib::warn() << STATS_SAMPLE_PAGES_DEPRECATED_MSG;
17574 
17575 	srv_stats_transient_sample_pages =
17576 		*static_cast<const unsigned long long*>(save);
17577 }
17578 
17579 /****************************************************************//**
17580 Update the monitor counter according to the "set_option",  turn
17581 on/off or reset specified monitor counter. */
17582 static
17583 void
innodb_monitor_set_option(const monitor_info_t * monitor_info,mon_option_t set_option)17584 innodb_monitor_set_option(
17585 /*======================*/
17586 	const monitor_info_t* monitor_info,/*!< in: monitor info for the monitor
17587 					to set */
17588 	mon_option_t	set_option)	/*!< in: Turn on/off reset the
17589 					counter */
17590 {
17591 	monitor_id_t	monitor_id = monitor_info->monitor_id;
17592 
17593 	/* If module type is MONITOR_GROUP_MODULE, it cannot be
17594 	turned on/off individually. It should never use this
17595 	function to set options */
17596 	ut_a(!(monitor_info->monitor_type & MONITOR_GROUP_MODULE));
17597 
17598 	switch (set_option) {
17599 	case MONITOR_TURN_ON:
17600 		MONITOR_ON(monitor_id);
17601 		MONITOR_INIT(monitor_id);
17602 		MONITOR_SET_START(monitor_id);
17603 
17604 		/* If the monitor to be turned on uses
17605 		exisitng monitor counter (status variable),
17606 		make special processing to remember existing
17607 		counter value. */
17608 		if (monitor_info->monitor_type & MONITOR_EXISTING) {
17609 			srv_mon_process_existing_counter(
17610 				monitor_id, MONITOR_TURN_ON);
17611 		}
17612 
17613 		if (MONITOR_IS_ON(MONITOR_LATCHES)) {
17614 
17615 			mutex_monitor.enable();
17616 		}
17617 		break;
17618 
17619 	case MONITOR_TURN_OFF:
17620 		if (monitor_info->monitor_type & MONITOR_EXISTING) {
17621 			srv_mon_process_existing_counter(
17622 				monitor_id, MONITOR_TURN_OFF);
17623 		}
17624 
17625 		MONITOR_OFF(monitor_id);
17626 		MONITOR_SET_OFF(monitor_id);
17627 
17628 		if (!MONITOR_IS_ON(MONITOR_LATCHES)) {
17629 
17630 			mutex_monitor.disable();
17631 		}
17632 		break;
17633 
17634 	case MONITOR_RESET_VALUE:
17635 		srv_mon_reset(monitor_id);
17636 
17637 		if (monitor_id == (MONITOR_LATCHES)) {
17638 
17639 			mutex_monitor.reset();
17640 		}
17641 		break;
17642 
17643 	case MONITOR_RESET_ALL_VALUE:
17644 		srv_mon_reset_all(monitor_id);
17645 		mutex_monitor.reset();
17646 		break;
17647 
17648 	default:
17649 		ut_error;
17650 	}
17651 }
17652 
17653 /****************************************************************//**
17654 Find matching InnoDB monitor counters and update their status
17655 according to the "set_option",  turn on/off or reset specified
17656 monitor counter. */
17657 static
17658 void
innodb_monitor_update_wildcard(const char * name,mon_option_t set_option)17659 innodb_monitor_update_wildcard(
17660 /*===========================*/
17661 	const char*	name,		/*!< in: monitor name to match */
17662 	mon_option_t	set_option)	/*!< in: the set option, whether
17663 					to turn on/off or reset the counter */
17664 {
17665 	ut_a(name);
17666 
17667 	for (ulint use = 0; use < NUM_MONITOR; use++) {
17668 		ulint		type;
17669 		monitor_id_t	monitor_id = static_cast<monitor_id_t>(use);
17670 		monitor_info_t*	monitor_info;
17671 
17672 		if (!innobase_wildcasecmp(
17673 			srv_mon_get_name(monitor_id), name)) {
17674 			monitor_info = srv_mon_get_info(monitor_id);
17675 
17676 			type = monitor_info->monitor_type;
17677 
17678 			/* If the monitor counter is of MONITOR_MODULE
17679 			type, skip it. Except for those also marked with
17680 			MONITOR_GROUP_MODULE flag, which can be turned
17681 			on only as a module. */
17682 			if (!(type & MONITOR_MODULE)
17683 			     && !(type & MONITOR_GROUP_MODULE)) {
17684 				innodb_monitor_set_option(monitor_info,
17685 							  set_option);
17686 			}
17687 
17688 			/* Need to special handle counters marked with
17689 			MONITOR_GROUP_MODULE, turn on the whole module if
17690 			any one of it comes here. Currently, only
17691 			"module_buf_page" is marked with MONITOR_GROUP_MODULE */
17692 			if (type & MONITOR_GROUP_MODULE) {
17693 				if ((monitor_id >= MONITOR_MODULE_BUF_PAGE)
17694 				     && (monitor_id < MONITOR_MODULE_OS)) {
17695 					if (set_option == MONITOR_TURN_ON
17696 					    && MONITOR_IS_ON(
17697 						MONITOR_MODULE_BUF_PAGE)) {
17698 						continue;
17699 					}
17700 
17701 					srv_mon_set_module_control(
17702 						MONITOR_MODULE_BUF_PAGE,
17703 						set_option);
17704 				} else {
17705 					/* If new monitor is added with
17706 					MONITOR_GROUP_MODULE, it needs
17707 					to be added here. */
17708 					ut_ad(0);
17709 				}
17710 			}
17711 		}
17712 	}
17713 }
17714 
17715 /*************************************************************//**
17716 Given a configuration variable name, find corresponding monitor counter
17717 and return its monitor ID if found.
17718 @return monitor ID if found, MONITOR_NO_MATCH if there is no match */
17719 static
17720 ulint
innodb_monitor_id_by_name_get(const char * name)17721 innodb_monitor_id_by_name_get(
17722 /*==========================*/
17723 	const char*	name)	/*!< in: monitor counter namer */
17724 {
17725 	ut_a(name);
17726 
17727 	/* Search for wild character '%' in the name, if
17728 	found, we treat it as a wildcard match. We do not search for
17729 	single character wildcard '_' since our monitor names already contain
17730 	such character. To avoid confusion, we request user must include
17731 	at least one '%' character to activate the wildcard search. */
17732 	if (strchr(name, '%')) {
17733 		return(MONITOR_WILDCARD_MATCH);
17734 	}
17735 
17736 	/* Not wildcard match, check for an exact match */
17737 	for (ulint i = 0; i < NUM_MONITOR; i++) {
17738 		if (!innobase_strcasecmp(
17739 			name, srv_mon_get_name(static_cast<monitor_id_t>(i)))) {
17740 			return(i);
17741 		}
17742 	}
17743 
17744 	return(MONITOR_NO_MATCH);
17745 }
17746 /*************************************************************//**
17747 Validate that the passed in monitor name matches at least one
17748 monitor counter name with wildcard compare.
17749 @return TRUE if at least one monitor name matches */
17750 static
17751 ibool
innodb_monitor_validate_wildcard_name(const char * name)17752 innodb_monitor_validate_wildcard_name(
17753 /*==================================*/
17754 	const char*	name)	/*!< in: monitor counter namer */
17755 {
17756 	for (ulint i = 0; i < NUM_MONITOR; i++) {
17757 		if (!innobase_wildcasecmp(
17758 			srv_mon_get_name(static_cast<monitor_id_t>(i)), name)) {
17759 			return(TRUE);
17760 		}
17761 	}
17762 
17763 	return(FALSE);
17764 }
17765 /*************************************************************//**
17766 Validate the passed in monitor name, find and save the
17767 corresponding monitor name in the function parameter "save".
17768 @return 0 if monitor name is valid */
17769 static
17770 int
innodb_monitor_valid_byname(void * save,const char * name)17771 innodb_monitor_valid_byname(
17772 /*========================*/
17773 	void*			save,	/*!< out: immediate result
17774 					for update function */
17775 	const char*		name)	/*!< in: incoming monitor name */
17776 {
17777 	ulint		use;
17778 	monitor_info_t*	monitor_info;
17779 
17780 	if (!name) {
17781 		return(1);
17782 	}
17783 
17784 	use = innodb_monitor_id_by_name_get(name);
17785 
17786 	/* No monitor name matches, nor it is wildcard match */
17787 	if (use == MONITOR_NO_MATCH) {
17788 		return(1);
17789 	}
17790 
17791 	if (use < NUM_MONITOR) {
17792 		monitor_info = srv_mon_get_info((monitor_id_t) use);
17793 
17794 		/* If the monitor counter is marked with
17795 		MONITOR_GROUP_MODULE flag, then this counter
17796 		cannot be turned on/off individually, instead
17797 		it shall be turned on/off as a group using
17798 		its module name */
17799 		if ((monitor_info->monitor_type & MONITOR_GROUP_MODULE)
17800 		    && (!(monitor_info->monitor_type & MONITOR_MODULE))) {
17801 			sql_print_warning(
17802 				"Monitor counter '%s' cannot"
17803 				" be turned on/off individually."
17804 				" Please use its module name"
17805 				" to turn on/off the counters"
17806 				" in the module as a group.\n",
17807 				name);
17808 
17809 			return(1);
17810 		}
17811 
17812 	} else {
17813 		ut_a(use == MONITOR_WILDCARD_MATCH);
17814 
17815 		/* For wildcard match, if there is not a single monitor
17816 		counter name that matches, treat it as an invalid
17817 		value for the system configuration variables */
17818 		if (!innodb_monitor_validate_wildcard_name(name)) {
17819 			return(1);
17820 		}
17821 	}
17822 
17823 	/* Save the configure name for innodb_monitor_update() */
17824 	*static_cast<const char**>(save) = name;
17825 
17826 	return(0);
17827 }
17828 /*************************************************************//**
17829 Validate passed-in "value" is a valid monitor counter name.
17830 This function is registered as a callback with MySQL.
17831 @return 0 for valid name */
17832 static
17833 int
innodb_monitor_validate(THD *,st_mysql_sys_var *,void * save,struct st_mysql_value * value)17834 innodb_monitor_validate(
17835 /*====================*/
17836 	THD*, st_mysql_sys_var*,
17837 	void*				save,	/*!< out: immediate result
17838 						for update function */
17839 	struct st_mysql_value*		value)	/*!< in: incoming string */
17840 {
17841 	const char*	name;
17842 	char*		monitor_name;
17843 	char		buff[STRING_BUFFER_USUAL_SIZE];
17844 	int		len = sizeof(buff);
17845 	int		ret;
17846 
17847 	ut_a(save != NULL);
17848 	ut_a(value != NULL);
17849 
17850 	name = value->val_str(value, buff, &len);
17851 
17852 	/* monitor_name could point to memory from MySQL
17853 	or buff[]. Always dup the name to memory allocated
17854 	by InnoDB, so we can access it in another callback
17855 	function innodb_monitor_update() and free it appropriately */
17856 	if (name) {
17857 		monitor_name = my_strdup(//PSI_INSTRUMENT_ME,
17858                                          name, MYF(0));
17859 	} else {
17860 		return(1);
17861 	}
17862 
17863 	ret = innodb_monitor_valid_byname(save, monitor_name);
17864 
17865 	if (ret) {
17866 		/* Validation failed */
17867 		my_free(monitor_name);
17868 	} else {
17869 		/* monitor_name will be freed in separate callback function
17870 		innodb_monitor_update(). Assert "save" point to
17871 		the "monitor_name" variable */
17872 		ut_ad(*static_cast<char**>(save) == monitor_name);
17873 	}
17874 
17875 	return(ret);
17876 }
17877 
17878 /****************************************************************//**
17879 Update the system variable innodb_enable(disable/reset/reset_all)_monitor
17880 according to the "set_option" and turn on/off or reset specified monitor
17881 counter. */
17882 static
17883 void
innodb_monitor_update(THD * thd,void * var_ptr,const void * save,mon_option_t set_option,ibool free_mem)17884 innodb_monitor_update(
17885 /*==================*/
17886 	THD*			thd,		/*!< in: thread handle */
17887 	void*			var_ptr,	/*!< out: where the
17888 						formal string goes */
17889 	const void*		save,		/*!< in: immediate result
17890 						from check function */
17891 	mon_option_t		set_option,	/*!< in: the set option,
17892 						whether to turn on/off or
17893 						reset the counter */
17894 	ibool			free_mem)	/*!< in: whether we will
17895 						need to free the memory */
17896 {
17897 	monitor_info_t*	monitor_info;
17898 	ulint		monitor_id;
17899 	ulint		err_monitor = 0;
17900 	const char*	name;
17901 
17902 	ut_a(save != NULL);
17903 
17904 	name = *static_cast<const char*const*>(save);
17905 
17906 	if (!name) {
17907 		monitor_id = MONITOR_DEFAULT_START;
17908 	} else {
17909 		monitor_id = innodb_monitor_id_by_name_get(name);
17910 
17911 		/* Double check we have a valid monitor ID */
17912 		if (monitor_id == MONITOR_NO_MATCH) {
17913 			return;
17914 		}
17915 	}
17916 
17917 	if (monitor_id == MONITOR_DEFAULT_START) {
17918 		/* If user set the variable to "default", we will
17919 		print a message and make this set operation a "noop".
17920 		The check is being made here is because "set default"
17921 		does not go through validation function */
17922 		if (thd) {
17923 			push_warning_printf(
17924 				thd, Sql_condition::WARN_LEVEL_WARN,
17925 				ER_NO_DEFAULT,
17926 				"Default value is not defined for"
17927 				" this set option. Please specify"
17928 				" correct counter or module name.");
17929 		} else {
17930 			sql_print_error(
17931 				"Default value is not defined for"
17932 				" this set option. Please specify"
17933 				" correct counter or module name.\n");
17934 		}
17935 
17936 		if (var_ptr) {
17937 			*(const char**) var_ptr = NULL;
17938 		}
17939 	} else if (monitor_id == MONITOR_WILDCARD_MATCH) {
17940 		innodb_monitor_update_wildcard(name, set_option);
17941 	} else {
17942 		monitor_info = srv_mon_get_info(
17943 			static_cast<monitor_id_t>(monitor_id));
17944 
17945 		ut_a(monitor_info);
17946 
17947 		/* If monitor is already truned on, someone could already
17948 		collect monitor data, exit and ask user to turn off the
17949 		monitor before turn it on again. */
17950 		if (set_option == MONITOR_TURN_ON
17951 		    && MONITOR_IS_ON(monitor_id)) {
17952 			err_monitor = monitor_id;
17953 			goto exit;
17954 		}
17955 
17956 		if (var_ptr) {
17957 			*(const char**) var_ptr = monitor_info->monitor_name;
17958 		}
17959 
17960 		/* Depending on the monitor name is for a module or
17961 		a counter, process counters in the whole module or
17962 		individual counter. */
17963 		if (monitor_info->monitor_type & MONITOR_MODULE) {
17964 			srv_mon_set_module_control(
17965 				static_cast<monitor_id_t>(monitor_id),
17966 				set_option);
17967 		} else {
17968 			innodb_monitor_set_option(monitor_info, set_option);
17969 		}
17970 	}
17971 exit:
17972 	/* Only if we are trying to turn on a monitor that already
17973 	been turned on, we will set err_monitor. Print related
17974 	information */
17975 	if (err_monitor) {
17976 		sql_print_warning("InnoDB: Monitor %s is already enabled.",
17977 				  srv_mon_get_name((monitor_id_t) err_monitor));
17978 	}
17979 
17980 	if (free_mem && name) {
17981 		my_free((void*) name);
17982 	}
17983 
17984 	return;
17985 }
17986 
17987 /** Validate SET GLOBAL innodb_buffer_pool_filename.
17988 On Windows, file names with colon (:) are not allowed.
17989 @param thd   connection
17990 @param save  &srv_buf_dump_filename
17991 @param value new value to be validated
17992 @return	0 for valid name */
innodb_srv_buf_dump_filename_validate(THD * thd,st_mysql_sys_var *,void * save,st_mysql_value * value)17993 static int innodb_srv_buf_dump_filename_validate(THD *thd, st_mysql_sys_var*,
17994 						 void *save,
17995 						 st_mysql_value *value)
17996 {
17997   char buff[OS_FILE_MAX_PATH];
17998   int len= sizeof buff;
17999 
18000   if (const char *buf_name= value->val_str(value, buff, &len))
18001   {
18002 #ifdef _WIN32
18003     if (!is_filename_allowed(buf_name, len, FALSE))
18004     {
18005       push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18006 			  ER_WRONG_ARGUMENTS,
18007 			  "InnoDB: innodb_buffer_pool_filename "
18008 			  "cannot have colon (:) in the file name.");
18009       return 1;
18010     }
18011 #endif /* _WIN32 */
18012     if (buf_name == buff)
18013     {
18014       ut_ad(static_cast<size_t>(len) < sizeof buff);
18015       buf_name= thd_strmake(thd, buf_name, len);
18016     }
18017 
18018     *static_cast<const char**>(save)= buf_name;
18019     return 0;
18020   }
18021 
18022   return 1;
18023 }
18024 
18025 #ifdef UNIV_DEBUG
18026 static char* srv_buffer_pool_evict;
18027 
18028 /****************************************************************//**
18029 Evict all uncompressed pages of compressed tables from the buffer pool.
18030 Keep the compressed pages in the buffer pool.
18031 @return whether all uncompressed pages were evicted */
innodb_buffer_pool_evict_uncompressed()18032 static bool innodb_buffer_pool_evict_uncompressed()
18033 {
18034 	bool	all_evicted = true;
18035 
18036 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
18037 		buf_pool_t*	buf_pool = &buf_pool_ptr[i];
18038 
18039 		buf_pool_mutex_enter(buf_pool);
18040 
18041 		for (buf_block_t* block = UT_LIST_GET_LAST(
18042 			     buf_pool->unzip_LRU);
18043 		     block != NULL; ) {
18044 			buf_block_t*	prev_block = UT_LIST_GET_PREV(
18045 				unzip_LRU, block);
18046 			ut_ad(buf_block_get_state(block)
18047 			      == BUF_BLOCK_FILE_PAGE);
18048 			ut_ad(block->in_unzip_LRU_list);
18049 			ut_ad(block->page.in_LRU_list);
18050 
18051 			if (!buf_LRU_free_page(&block->page, false)) {
18052 				all_evicted = false;
18053 				block = prev_block;
18054 			} else {
18055 				/* Because buf_LRU_free_page() may release
18056 				and reacquire buf_pool_t::mutex, prev_block
18057 				may be invalid. */
18058 				block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
18059 			}
18060 		}
18061 
18062 		buf_pool_mutex_exit(buf_pool);
18063 	}
18064 
18065 	return(all_evicted);
18066 }
18067 
18068 /****************************************************************//**
18069 Called on SET GLOBAL innodb_buffer_pool_evict=...
18070 Handles some values specially, to evict pages from the buffer pool.
18071 SET GLOBAL innodb_buffer_pool_evict='uncompressed'
18072 evicts all uncompressed page frames of compressed tablespaces. */
18073 static
18074 void
innodb_buffer_pool_evict_update(THD *,st_mysql_sys_var *,void *,const void * save)18075 innodb_buffer_pool_evict_update(THD*, st_mysql_sys_var*, void*,
18076 				const void* save)
18077 {
18078 	if (const char* op = *static_cast<const char*const*>(save)) {
18079 		if (!strcmp(op, "uncompressed")) {
18080 			mysql_mutex_unlock(&LOCK_global_system_variables);
18081 			for (uint tries = 0; tries < 10000; tries++) {
18082 				if (innodb_buffer_pool_evict_uncompressed()) {
18083 					mysql_mutex_lock(
18084 						&LOCK_global_system_variables);
18085 					return;
18086 				}
18087 
18088 				os_thread_sleep(10000);
18089 			}
18090 
18091 			/* We failed to evict all uncompressed pages. */
18092 			ut_ad(0);
18093 		}
18094 	}
18095 }
18096 #endif /* UNIV_DEBUG */
18097 
18098 /****************************************************************//**
18099 Update the system variable innodb_monitor_enable and enable
18100 specified monitor counter.
18101 This function is registered as a callback with MySQL. */
18102 static
18103 void
innodb_enable_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18104 innodb_enable_monitor_update(
18105 /*=========================*/
18106 	THD*				thd,	/*!< in: thread handle */
18107 	st_mysql_sys_var*,
18108 	void*				var_ptr,/*!< out: where the
18109 						formal string goes */
18110 	const void*			save)	/*!< in: immediate result
18111 						from check function */
18112 {
18113 	innodb_monitor_update(thd, var_ptr, save, MONITOR_TURN_ON, TRUE);
18114 }
18115 
18116 /****************************************************************//**
18117 Update the system variable innodb_monitor_disable and turn
18118 off specified monitor counter. */
18119 static
18120 void
innodb_disable_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18121 innodb_disable_monitor_update(
18122 /*==========================*/
18123 	THD*				thd,	/*!< in: thread handle */
18124 	st_mysql_sys_var*,
18125 	void*				var_ptr,/*!< out: where the
18126 						formal string goes */
18127 	const void*			save)	/*!< in: immediate result
18128 						from check function */
18129 {
18130 	innodb_monitor_update(thd, var_ptr, save, MONITOR_TURN_OFF, TRUE);
18131 }
18132 
18133 /****************************************************************//**
18134 Update the system variable innodb_monitor_reset and reset
18135 specified monitor counter(s).
18136 This function is registered as a callback with MySQL. */
18137 static
18138 void
innodb_reset_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18139 innodb_reset_monitor_update(
18140 /*========================*/
18141 	THD*				thd,	/*!< in: thread handle */
18142 	st_mysql_sys_var*,
18143 	void*				var_ptr,/*!< out: where the
18144 						formal string goes */
18145 	const void*			save)	/*!< in: immediate result
18146 						from check function */
18147 {
18148 	innodb_monitor_update(thd, var_ptr, save, MONITOR_RESET_VALUE, TRUE);
18149 }
18150 
18151 /****************************************************************//**
18152 Update the system variable innodb_monitor_reset_all and reset
18153 all value related monitor counter.
18154 This function is registered as a callback with MySQL. */
18155 static
18156 void
innodb_reset_all_monitor_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18157 innodb_reset_all_monitor_update(
18158 /*============================*/
18159 	THD*				thd,	/*!< in: thread handle */
18160 	st_mysql_sys_var*,
18161 	void*				var_ptr,/*!< out: where the
18162 						formal string goes */
18163 	const void*			save)	/*!< in: immediate result
18164 						from check function */
18165 {
18166 	innodb_monitor_update(thd, var_ptr, save, MONITOR_RESET_ALL_VALUE,
18167 			      TRUE);
18168 }
18169 
18170 static
18171 void
innodb_defragment_frequency_update(THD *,st_mysql_sys_var *,void *,const void * save)18172 innodb_defragment_frequency_update(THD*, st_mysql_sys_var*, void*,
18173 				   const void* save)
18174 {
18175 	srv_defragment_frequency = (*static_cast<const uint*>(save));
18176 	srv_defragment_interval = 1000000000ULL / srv_defragment_frequency;
18177 }
18178 
my_strtok_r(char * str,const char * delim,char ** saveptr)18179 static inline char *my_strtok_r(char *str, const char *delim, char **saveptr)
18180 {
18181 #if defined _WIN32
18182 	return strtok_s(str, delim, saveptr);
18183 #else
18184 	return strtok_r(str, delim, saveptr);
18185 #endif
18186 }
18187 
18188 /****************************************************************//**
18189 Parse and enable InnoDB monitor counters during server startup.
18190 User can list the monitor counters/groups to be enable by specifying
18191 "loose-innodb_monitor_enable=monitor_name1;monitor_name2..."
18192 in server configuration file or at the command line. The string
18193 separate could be ";", "," or empty space. */
18194 static
18195 void
innodb_enable_monitor_at_startup(char * str)18196 innodb_enable_monitor_at_startup(
18197 /*=============================*/
18198 	char*	str)	/*!< in/out: monitor counter enable list */
18199 {
18200 	static const char*	sep = " ;,";
18201 	char*			last;
18202 
18203 	ut_a(str);
18204 
18205 	/* Walk through the string, and separate each monitor counter
18206 	and/or counter group name, and calling innodb_monitor_update()
18207 	if successfully updated. Please note that the "str" would be
18208 	changed by strtok_r() as it walks through it. */
18209 	for (char* option = my_strtok_r(str, sep, &last);
18210 	     option;
18211 	     option = my_strtok_r(NULL, sep, &last)) {
18212 		char*	option_name;
18213 		if (!innodb_monitor_valid_byname(&option_name, option)) {
18214 			innodb_monitor_update(NULL, NULL, &option,
18215 					      MONITOR_TURN_ON, FALSE);
18216 		} else {
18217 			sql_print_warning("Invalid monitor counter"
18218 					  " name: '%s'", option);
18219 		}
18220 	}
18221 }
18222 
18223 /****************************************************************//**
18224 Callback function for accessing the InnoDB variables from MySQL:
18225 SHOW VARIABLES. */
show_innodb_vars(THD *,SHOW_VAR * var,char *)18226 static int show_innodb_vars(THD*, SHOW_VAR* var, char*)
18227 {
18228 	innodb_export_status();
18229 	var->type = SHOW_ARRAY;
18230 	var->value = (char*) &innodb_status_variables;
18231 	//var->scope = SHOW_SCOPE_GLOBAL;
18232 
18233 	return(0);
18234 }
18235 
18236 /****************************************************************//**
18237 This function checks each index name for a table against reserved
18238 system default primary index name 'GEN_CLUST_INDEX'. If a name
18239 matches, this function pushes an warning message to the client,
18240 and returns true.
18241 @return true if the index name matches the reserved name */
18242 bool
innobase_index_name_is_reserved(THD * thd,const KEY * key_info,ulint num_of_keys)18243 innobase_index_name_is_reserved(
18244 /*============================*/
18245 	THD*		thd,		/*!< in/out: MySQL connection */
18246 	const KEY*	key_info,	/*!< in: Indexes to be created */
18247 	ulint		num_of_keys)	/*!< in: Number of indexes to
18248 					be created. */
18249 {
18250 	const KEY*	key;
18251 	uint		key_num;	/* index number */
18252 
18253 	for (key_num = 0; key_num < num_of_keys; key_num++) {
18254 		key = &key_info[key_num];
18255 
18256 		if (innobase_strcasecmp(key->name.str,
18257 					innobase_index_reserve_name) == 0) {
18258 			/* Push warning to mysql */
18259 			push_warning_printf(thd,
18260 					    Sql_condition::WARN_LEVEL_WARN,
18261 					    ER_WRONG_NAME_FOR_INDEX,
18262 					    "Cannot Create Index with name"
18263 					    " '%s'. The name is reserved"
18264 					    " for the system default primary"
18265 					    " index.",
18266 					    innobase_index_reserve_name);
18267 
18268 			my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
18269 				 innobase_index_reserve_name);
18270 
18271 			return(true);
18272 		}
18273 	}
18274 
18275 	return(false);
18276 }
18277 
18278 /** Retrieve the FTS Relevance Ranking result for doc with doc_id
18279 of m_prebuilt->fts_doc_id
18280 @param[in,out]	fts_hdl	FTS handler
18281 @return the relevance ranking value */
18282 static
18283 float
innobase_fts_retrieve_ranking(FT_INFO * fts_hdl)18284 innobase_fts_retrieve_ranking(
18285 	FT_INFO*	fts_hdl)
18286 {
18287 	fts_result_t*	result;
18288 	row_prebuilt_t*	ft_prebuilt;
18289 
18290 	result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
18291 
18292 	ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_prebuilt;
18293 
18294 	fts_ranking_t*  ranking = rbt_value(fts_ranking_t, result->current);
18295 	ft_prebuilt->fts_doc_id= ranking->doc_id;
18296 
18297 	return(ranking->rank);
18298 }
18299 
18300 /** Free the memory for the FTS handler
18301 @param[in,out]	fts_hdl	FTS handler */
18302 static
18303 void
innobase_fts_close_ranking(FT_INFO * fts_hdl)18304 innobase_fts_close_ranking(
18305 	FT_INFO*	fts_hdl)
18306 {
18307 	fts_result_t*	result;
18308 
18309 	result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
18310 
18311 	fts_query_free_result(result);
18312 
18313 	my_free((uchar*) fts_hdl);
18314 }
18315 
18316 /** Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
18317 of m_prebuilt->fts_doc_id
18318 @param[in,out]	fts_hdl	FTS handler
18319 @return the relevance ranking value */
18320 static
18321 float
innobase_fts_find_ranking(FT_INFO * fts_hdl,uchar *,uint)18322 innobase_fts_find_ranking(FT_INFO* fts_hdl, uchar*, uint)
18323 {
18324 	fts_result_t*	result;
18325 	row_prebuilt_t*	ft_prebuilt;
18326 
18327 	ft_prebuilt = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_prebuilt;
18328 	result = reinterpret_cast<NEW_FT_INFO*>(fts_hdl)->ft_result;
18329 
18330 	/* Retrieve the ranking value for doc_id with value of
18331 	m_prebuilt->fts_doc_id */
18332 	return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id));
18333 }
18334 
18335 #ifdef UNIV_DEBUG
18336 static my_bool	innodb_background_drop_list_empty = TRUE;
18337 static my_bool	innodb_log_checkpoint_now = TRUE;
18338 static my_bool	innodb_buf_flush_list_now = TRUE;
18339 static uint	innodb_merge_threshold_set_all_debug
18340 	= DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
18341 
18342 /** Wait for the background drop list to become empty. */
18343 static
18344 void
wait_background_drop_list_empty(THD *,st_mysql_sys_var *,void *,const void *)18345 wait_background_drop_list_empty(THD*, st_mysql_sys_var*, void*, const void*)
18346 {
18347 	row_wait_for_background_drop_list_empty();
18348 }
18349 
18350 /****************************************************************//**
18351 Force innodb to checkpoint. */
18352 static
18353 void
checkpoint_now_set(THD *,st_mysql_sys_var *,void *,const void * save)18354 checkpoint_now_set(THD*, st_mysql_sys_var*, void*, const void* save)
18355 {
18356 	if (*(my_bool*) save) {
18357 		mysql_mutex_unlock(&LOCK_global_system_variables);
18358 
18359 		while (log_sys.last_checkpoint_lsn
18360 		       + SIZE_OF_MLOG_CHECKPOINT
18361 		       + (log_sys.append_on_checkpoint != NULL
18362 			  ? log_sys.append_on_checkpoint->size() : 0)
18363 		       < log_sys.lsn) {
18364 			log_make_checkpoint();
18365 			fil_flush_file_spaces(FIL_TYPE_LOG);
18366 		}
18367 
18368 		dberr_t err = fil_write_flushed_lsn(log_sys.lsn);
18369 
18370 		if (err != DB_SUCCESS) {
18371 			ib::warn() << "Checkpoint set failed " << err;
18372 		}
18373 
18374 		mysql_mutex_lock(&LOCK_global_system_variables);
18375 	}
18376 }
18377 
18378 /****************************************************************//**
18379 Force a dirty pages flush now. */
18380 static
18381 void
buf_flush_list_now_set(THD *,st_mysql_sys_var *,void *,const void * save)18382 buf_flush_list_now_set(THD*, st_mysql_sys_var*, void*, const void* save)
18383 {
18384 	if (*(my_bool*) save) {
18385 		mysql_mutex_unlock(&LOCK_global_system_variables);
18386 		buf_flush_sync_all_buf_pools();
18387 		mysql_mutex_lock(&LOCK_global_system_variables);
18388 	}
18389 }
18390 
18391 /** Override current MERGE_THRESHOLD setting for all indexes at dictionary
18392 now.
18393 @param[in]	save	immediate result from check function */
18394 static
18395 void
innodb_merge_threshold_set_all_debug_update(THD *,st_mysql_sys_var *,void *,const void * save)18396 innodb_merge_threshold_set_all_debug_update(THD*, st_mysql_sys_var*, void*,
18397 					    const void* save)
18398 {
18399 	innodb_merge_threshold_set_all_debug
18400 		= (*static_cast<const uint*>(save));
18401 	dict_set_merge_threshold_all_debug(
18402 		innodb_merge_threshold_set_all_debug);
18403 }
18404 #endif /* UNIV_DEBUG */
18405 
18406 /** Find and Retrieve the FTS doc_id for the current result row
18407 @param[in,out]	fts_hdl	FTS handler
18408 @return the document ID */
18409 static
18410 ulonglong
innobase_fts_retrieve_docid(FT_INFO_EXT * fts_hdl)18411 innobase_fts_retrieve_docid(
18412 	FT_INFO_EXT*	fts_hdl)
18413 {
18414 	fts_result_t*	result;
18415 	row_prebuilt_t* ft_prebuilt;
18416 
18417 	ft_prebuilt = reinterpret_cast<NEW_FT_INFO *>(fts_hdl)->ft_prebuilt;
18418 	result = reinterpret_cast<NEW_FT_INFO *>(fts_hdl)->ft_result;
18419 
18420 	if (ft_prebuilt->read_just_key) {
18421 
18422 		fts_ranking_t* ranking =
18423 			rbt_value(fts_ranking_t, result->current);
18424 
18425 		return(ranking->doc_id);
18426 	}
18427 
18428 	return(ft_prebuilt->fts_doc_id);
18429 }
18430 
18431 /* These variables are never read by InnoDB or changed. They are a kind of
18432 dummies that are needed by the MySQL infrastructure to call
18433 buffer_pool_dump_now(), buffer_pool_load_now() and buffer_pool_load_abort()
18434 by the user by doing:
18435   SET GLOBAL innodb_buffer_pool_dump_now=ON;
18436   SET GLOBAL innodb_buffer_pool_load_now=ON;
18437   SET GLOBAL innodb_buffer_pool_load_abort=ON;
18438 Their values are read by MySQL and displayed to the user when the variables
18439 are queried, e.g.:
18440   SELECT @@innodb_buffer_pool_dump_now;
18441   SELECT @@innodb_buffer_pool_load_now;
18442   SELECT @@innodb_buffer_pool_load_abort; */
18443 static my_bool	innodb_buffer_pool_dump_now = FALSE;
18444 static my_bool	innodb_buffer_pool_load_now = FALSE;
18445 static my_bool	innodb_buffer_pool_load_abort = FALSE;
18446 
18447 /****************************************************************//**
18448 Trigger a dump of the buffer pool if innodb_buffer_pool_dump_now is set
18449 to ON. This function is registered as a callback with MySQL. */
18450 static
18451 void
buffer_pool_dump_now(THD * thd MY_ATTRIBUTE ((unused)),struct st_mysql_sys_var * var MY_ATTRIBUTE ((unused)),void * var_ptr MY_ATTRIBUTE ((unused)),const void * save)18452 buffer_pool_dump_now(
18453 /*=================*/
18454 	THD*				thd	/*!< in: thread handle */
18455 					MY_ATTRIBUTE((unused)),
18456 	struct st_mysql_sys_var*	var	/*!< in: pointer to system
18457 						variable */
18458 					MY_ATTRIBUTE((unused)),
18459 	void*				var_ptr	/*!< out: where the formal
18460 						string goes */
18461 					MY_ATTRIBUTE((unused)),
18462 	const void*			save)	/*!< in: immediate result from
18463 						check function */
18464 {
18465 	if (*(my_bool*) save && !srv_read_only_mode) {
18466 		mysql_mutex_unlock(&LOCK_global_system_variables);
18467 		buf_dump_start();
18468 		mysql_mutex_lock(&LOCK_global_system_variables);
18469 	}
18470 }
18471 
18472 /****************************************************************//**
18473 Trigger a load of the buffer pool if innodb_buffer_pool_load_now is set
18474 to ON. This function is registered as a callback with MySQL. */
18475 static
18476 void
buffer_pool_load_now(THD * thd MY_ATTRIBUTE ((unused)),struct st_mysql_sys_var * var MY_ATTRIBUTE ((unused)),void * var_ptr MY_ATTRIBUTE ((unused)),const void * save)18477 buffer_pool_load_now(
18478 /*=================*/
18479 	THD*				thd	/*!< in: thread handle */
18480 					MY_ATTRIBUTE((unused)),
18481 	struct st_mysql_sys_var*	var	/*!< in: pointer to system
18482 						variable */
18483 					MY_ATTRIBUTE((unused)),
18484 	void*				var_ptr	/*!< out: where the formal
18485 						string goes */
18486 					MY_ATTRIBUTE((unused)),
18487 	const void*			save)	/*!< in: immediate result from
18488 						check function */
18489 {
18490 	if (*(my_bool*) save && !srv_read_only_mode) {
18491 		mysql_mutex_unlock(&LOCK_global_system_variables);
18492 		buf_load_start();
18493 		mysql_mutex_lock(&LOCK_global_system_variables);
18494 	}
18495 }
18496 
18497 /****************************************************************//**
18498 Abort a load of the buffer pool if innodb_buffer_pool_load_abort
18499 is set to ON. This function is registered as a callback with MySQL. */
18500 static
18501 void
buffer_pool_load_abort(THD * thd MY_ATTRIBUTE ((unused)),struct st_mysql_sys_var * var MY_ATTRIBUTE ((unused)),void * var_ptr MY_ATTRIBUTE ((unused)),const void * save)18502 buffer_pool_load_abort(
18503 /*===================*/
18504 	THD*				thd	/*!< in: thread handle */
18505 					MY_ATTRIBUTE((unused)),
18506 	struct st_mysql_sys_var*	var	/*!< in: pointer to system
18507 						variable */
18508 					MY_ATTRIBUTE((unused)),
18509 	void*				var_ptr	/*!< out: where the formal
18510 						string goes */
18511 					MY_ATTRIBUTE((unused)),
18512 	const void*			save)	/*!< in: immediate result from
18513 						check function */
18514 {
18515 	if (*(my_bool*) save && !srv_read_only_mode) {
18516 		mysql_mutex_unlock(&LOCK_global_system_variables);
18517 		buf_load_abort();
18518 		mysql_mutex_lock(&LOCK_global_system_variables);
18519 	}
18520 }
18521 
18522 /****************************************************************//**
18523 Update the system variable innodb_log_write_ahead_size using the "saved"
18524 value. This function is registered as a callback with MySQL. */
18525 static
18526 void
innodb_log_write_ahead_size_update(THD * thd,st_mysql_sys_var *,void *,const void * save)18527 innodb_log_write_ahead_size_update(
18528 /*===============================*/
18529 	THD*				thd,	/*!< in: thread handle */
18530 	st_mysql_sys_var*, void*,
18531 	const void*			save)	/*!< in: immediate result
18532 						from check function */
18533 {
18534 	ulong	val = OS_FILE_LOG_BLOCK_SIZE;
18535 	ulong	in_val = *static_cast<const ulong*>(save);
18536 
18537 	while (val < in_val) {
18538 		val = val * 2;
18539 	}
18540 
18541 	if (val > srv_page_size) {
18542 		val = srv_page_size;
18543 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18544 				    ER_WRONG_ARGUMENTS,
18545 				    "innodb_log_write_ahead_size cannot"
18546 				    " be set higher than innodb_page_size.");
18547 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18548 				    ER_WRONG_ARGUMENTS,
18549 				    "Setting innodb_log_write_ahead_size"
18550 				    " to %lu",
18551 				    srv_page_size);
18552 	} else if (val != in_val) {
18553 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18554 				    ER_WRONG_ARGUMENTS,
18555 				    "innodb_log_write_ahead_size should be"
18556 				    " set 2^n value and larger than 512.");
18557 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
18558 				    ER_WRONG_ARGUMENTS,
18559 				    "Setting innodb_log_write_ahead_size"
18560 				    " to %lu",
18561 				    val);
18562 	}
18563 
18564 	srv_log_write_ahead_size = val;
18565 }
18566 
18567 /** Update innodb_status_output or innodb_status_output_locks,
18568 which control InnoDB "status monitor" output to the error log.
18569 @param[out]	var	current value
18570 @param[in]	save	to-be-assigned value */
18571 static
18572 void
innodb_status_output_update(THD *,st_mysql_sys_var *,void * var,const void * save)18573 innodb_status_output_update(THD*,st_mysql_sys_var*,void*var,const void*save)
18574 {
18575   *static_cast<my_bool*>(var)= *static_cast<const my_bool*>(save);
18576   if (srv_monitor_event)
18577   {
18578     mysql_mutex_unlock(&LOCK_global_system_variables);
18579     /* Wakeup server monitor thread. */
18580     os_event_set(srv_monitor_event);
18581     mysql_mutex_lock(&LOCK_global_system_variables);
18582   }
18583 }
18584 
18585 /** Update the system variable innodb_encryption_threads.
18586 @param[in]	save	to-be-assigned value */
18587 static
18588 void
innodb_encryption_threads_update(THD *,st_mysql_sys_var *,void *,const void * save)18589 innodb_encryption_threads_update(THD*,st_mysql_sys_var*,void*,const void*save)
18590 {
18591 	mysql_mutex_unlock(&LOCK_global_system_variables);
18592 	fil_crypt_set_thread_cnt(*static_cast<const uint*>(save));
18593 	mysql_mutex_lock(&LOCK_global_system_variables);
18594 }
18595 
18596 /** Update the system variable innodb_encryption_rotate_key_age.
18597 @param[in]	save	to-be-assigned value */
18598 static
18599 void
innodb_encryption_rotate_key_age_update(THD *,st_mysql_sys_var *,void *,const void * save)18600 innodb_encryption_rotate_key_age_update(THD*, st_mysql_sys_var*, void*,
18601 					const void* save)
18602 {
18603 	mysql_mutex_unlock(&LOCK_global_system_variables);
18604 	fil_crypt_set_rotate_key_age(*static_cast<const uint*>(save));
18605 	mysql_mutex_lock(&LOCK_global_system_variables);
18606 }
18607 
18608 /** Update the system variable innodb_encryption_rotation_iops.
18609 @param[in]	save	to-be-assigned value */
18610 static
18611 void
innodb_encryption_rotation_iops_update(THD *,st_mysql_sys_var *,void *,const void * save)18612 innodb_encryption_rotation_iops_update(THD*, st_mysql_sys_var*, void*,
18613 				       const void* save)
18614 {
18615 	mysql_mutex_unlock(&LOCK_global_system_variables);
18616 	fil_crypt_set_rotation_iops(*static_cast<const uint*>(save));
18617 	mysql_mutex_lock(&LOCK_global_system_variables);
18618 }
18619 
18620 /** Update the system variable innodb_encrypt_tables.
18621 @param[in]	save	to-be-assigned value */
18622 static
18623 void
innodb_encrypt_tables_update(THD *,st_mysql_sys_var *,void *,const void * save)18624 innodb_encrypt_tables_update(THD*, st_mysql_sys_var*, void*, const void* save)
18625 {
18626 	mysql_mutex_unlock(&LOCK_global_system_variables);
18627 	fil_crypt_set_encrypt_tables(*static_cast<const ulong*>(save));
18628 	mysql_mutex_lock(&LOCK_global_system_variables);
18629 }
18630 
18631 /** Update the innodb_log_checksums parameter.
18632 @param[in,out]	thd	client connection
18633 @param[out]	var_ptr	current value
18634 @param[in]	save	immediate result from check function */
18635 static
18636 void
innodb_log_checksums_update(THD * thd,st_mysql_sys_var *,void * var_ptr,const void * save)18637 innodb_log_checksums_update(THD* thd, st_mysql_sys_var*, void* var_ptr,
18638 			    const void* save)
18639 {
18640 	*static_cast<my_bool*>(var_ptr) = innodb_log_checksums_func_update(
18641 		thd, *static_cast<const my_bool*>(save));
18642 }
18643 
18644 #ifdef UNIV_DEBUG
18645 static
18646 void
innobase_debug_sync_callback(srv_slot_t * slot,const void * value)18647 innobase_debug_sync_callback(srv_slot_t *slot, const void *value)
18648 {
18649 	const char *value_str = *static_cast<const char* const*>(value);
18650 	size_t len = strlen(value_str) + 1;
18651 
18652 
18653 	// One allocation for list node object and value.
18654 	void *buf = ut_malloc_nokey(sizeof(srv_slot_t::debug_sync_t) + len-1);
18655 	srv_slot_t::debug_sync_t *sync = new(buf) srv_slot_t::debug_sync_t();
18656 	strcpy(sync->str, value_str);
18657 
18658 	rw_lock_x_lock(&slot->debug_sync_lock);
18659 	UT_LIST_ADD_LAST(slot->debug_sync, sync);
18660 	rw_lock_x_unlock(&slot->debug_sync_lock);
18661 }
18662 static
18663 void
innobase_debug_sync_set(THD * thd,st_mysql_sys_var *,void *,const void * value)18664 innobase_debug_sync_set(THD *thd, st_mysql_sys_var*, void *, const void *value)
18665 {
18666 	srv_for_each_thread(SRV_WORKER, innobase_debug_sync_callback, value);
18667 	srv_for_each_thread(SRV_PURGE, innobase_debug_sync_callback, value);
18668 }
18669 #endif
18670 
18671 static SHOW_VAR innodb_status_variables_export[]= {
18672 	{"Innodb", (char*) &show_innodb_vars, SHOW_FUNC},
18673 	{NullS, NullS, SHOW_LONG}
18674 };
18675 
18676 static struct st_mysql_storage_engine innobase_storage_engine=
18677 { MYSQL_HANDLERTON_INTERFACE_VERSION };
18678 
18679 #ifdef WITH_WSREP
18680 
18681 static
18682 void
wsrep_kill_victim(MYSQL_THD const bf_thd,MYSQL_THD thd,trx_t * victim_trx,my_bool signal)18683 wsrep_kill_victim(
18684 	MYSQL_THD const bf_thd,
18685 	MYSQL_THD thd,
18686 	trx_t* victim_trx,
18687 	my_bool signal)
18688 {
18689   DBUG_ENTER("wsrep_kill_victim");
18690 
18691   /* Mark transaction as a victim for Galera abort */
18692   victim_trx->lock.was_chosen_as_wsrep_victim= true;
18693   if (wsrep_thd_set_wsrep_aborter(bf_thd, thd))
18694   {
18695     WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set");
18696     wsrep_thd_UNLOCK(thd);
18697     DBUG_VOID_RETURN;
18698   }
18699 
18700   if (wsrep_thd_bf_abort(bf_thd, thd, signal))
18701   {
18702     lock_t*  wait_lock= victim_trx->lock.wait_lock;
18703     if (wait_lock)
18704     {
18705       DBUG_ASSERT(victim_trx->is_wsrep());
18706       WSREP_DEBUG("victim has wait flag: %lu", thd_get_thread_id(thd));
18707       victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
18708       lock_cancel_waiting_and_release(wait_lock);
18709     }
18710   }
18711 
18712   DBUG_VOID_RETURN;
18713 }
18714 
18715 /** This function is used to kill one transaction.
18716 
18717 This transaction was open on this node (not-yet-committed), and a
18718 conflicting writeset from some other node that was being applied
18719 caused a locking conflict.  First committed (from other node)
18720 wins, thus open transaction is rolled back.  BF stands for
18721 brute-force: any transaction can get aborted by galera any time
18722 it is necessary.
18723 
18724 This conflict can happen only when the replicated writeset (from
18725 other node) is being applied, not when it’s waiting in the queue.
18726 If our local transaction reached its COMMIT and this conflicting
18727 writeset was in the queue, then it should fail the local
18728 certification test instead.
18729 
18730 A brute force abort is only triggered by a locking conflict
18731 between a writeset being applied by an applier thread (slave thread)
18732 and an open transaction on the node, not by a Galera writeset
18733 comparison as in the local certification failure.
18734 
18735 @param[in]	bf_thd		Brute force (BF) thread
18736 @param[in,out]	victim_trx	Vimtim trx to be killed
18737 @param[in]	signal		Should victim be signaled */
18738 void
wsrep_innobase_kill_one_trx(MYSQL_THD const bf_thd,trx_t * victim_trx,my_bool signal)18739 wsrep_innobase_kill_one_trx(
18740 	MYSQL_THD const bf_thd,
18741 	trx_t *victim_trx,
18742 	my_bool signal)
18743 {
18744   ut_ad(bf_thd);
18745   ut_ad(victim_trx);
18746   ut_ad(lock_mutex_own());
18747   ut_ad(trx_mutex_own(victim_trx));
18748 
18749   DBUG_ENTER("wsrep_innobase_kill_one_trx");
18750   THD *thd= (THD *) victim_trx->mysql_thd;
18751   /* Note that bf_trx might not exist here e.g. on MDL conflict
18752   case (test: galera_concurrent_ctas).*/
18753   trx_t* bf_trx= (trx_t*)thd_to_trx(bf_thd);
18754 
18755   if (!thd)
18756   {
18757     WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id);
18758     DBUG_VOID_RETURN;
18759   }
18760 
18761   /* Here we need to lock THD::LOCK_thd_data to protect from
18762   concurrent usage or disconnect or delete. */
18763   DEBUG_SYNC(bf_thd, "wsrep_before_BF_victim_lock");
18764   wsrep_thd_LOCK(thd);
18765   DEBUG_SYNC(bf_thd, "wsrep_after_BF_victim_lock");
18766 
18767   WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
18768 
18769   WSREP_DEBUG("wsrep_innobase_kill_one_trx: Aborter %s "
18770 	      "trx_id: " TRX_ID_FMT " thread: %ld "
18771 	      "seqno: %lld client_state: %s client_mode: %s "
18772 	      "trx_state %s query: %s",
18773 	      wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
18774 	      bf_trx ? bf_trx->id : TRX_ID_MAX,
18775 	      thd_get_thread_id(bf_thd),
18776 	      wsrep_thd_trx_seqno(bf_thd),
18777 	      wsrep_thd_client_state_str(bf_thd),
18778 	      wsrep_thd_client_mode_str(bf_thd),
18779 	      wsrep_thd_transaction_state_str(bf_thd),
18780 	      wsrep_thd_query(bf_thd));
18781 
18782   WSREP_DEBUG("wsrep_innobase_kill_one_trx: Victim %s "
18783 	      "trx_id: " TRX_ID_FMT " thread: %ld "
18784 	      "seqno: %lld client_state: %s client_mode: %s "
18785 	      "trx_state %s query: %s",
18786 	      wsrep_thd_is_BF(thd, false) ? "BF" : "normal",
18787 	      victim_trx->id,
18788 	      thd_get_thread_id(thd),
18789 	      wsrep_thd_trx_seqno(thd),
18790 	      wsrep_thd_client_state_str(thd),
18791 	      wsrep_thd_client_mode_str(thd),
18792 	      wsrep_thd_transaction_state_str(thd),
18793 	      wsrep_thd_query(thd));
18794 
18795   wsrep_kill_victim(bf_thd, thd, victim_trx, signal);
18796   DBUG_VOID_RETURN;
18797 }
18798 
18799 /** This function forces the victim transaction to abort. Aborting the
18800   transaction does NOT end it, it still has to be rolled back.
18801 
18802   @param bf_thd       brute force THD asking for the abort
18803   @param victim_thd   victim THD to be aborted
18804 
18805   @return 0 victim was aborted
18806   @return -1 victim thread was aborted (no transaction)
18807 */
18808 static
18809 void
wsrep_abort_transaction(handlerton *,THD * bf_thd,THD * victim_thd,my_bool signal)18810 wsrep_abort_transaction(
18811 	handlerton*,
18812 	THD *bf_thd,
18813 	THD *victim_thd,
18814 	my_bool signal)
18815 {
18816   /* Note that victim thd is protected with
18817   THD::LOCK_thd_data and THD::LOCK_thd_kill here. */
18818   trx_t* victim_trx= thd_to_trx(victim_thd);
18819   trx_t* bf_trx= thd_to_trx(bf_thd);
18820   WSREP_DEBUG("wsrep_abort_transaction: BF:"
18821 	      " thread %ld client_state %s client_mode %s"
18822 	      " trans_state %s query %s trx " TRX_ID_FMT,
18823 	      thd_get_thread_id(bf_thd),
18824 	      wsrep_thd_client_state_str(bf_thd),
18825 	      wsrep_thd_client_mode_str(bf_thd),
18826 	      wsrep_thd_transaction_state_str(bf_thd),
18827 	      wsrep_thd_query(bf_thd),
18828 	      bf_trx ? bf_trx->id : 0);
18829 
18830   WSREP_DEBUG("wsrep_abort_transaction: victim:"
18831 	      " thread %ld client_state %s client_mode %s"
18832 	      " trans_state %s query %s trx " TRX_ID_FMT,
18833 	      thd_get_thread_id(victim_thd),
18834 	      wsrep_thd_client_state_str(victim_thd),
18835 	      wsrep_thd_client_mode_str(victim_thd),
18836 	      wsrep_thd_transaction_state_str(victim_thd),
18837 	      wsrep_thd_query(victim_thd),
18838 	      victim_trx ? victim_trx->id : 0);
18839 
18840   if (victim_trx)
18841   {
18842     lock_mutex_enter();
18843     trx_mutex_enter(victim_trx);
18844     wsrep_kill_victim(bf_thd, victim_thd, victim_trx, signal);
18845     lock_mutex_exit();
18846     trx_mutex_exit(victim_trx);
18847     wsrep_srv_conc_cancel_wait(victim_trx);
18848   }
18849   else
18850   {
18851     wsrep_thd_bf_abort(bf_thd, victim_thd, signal);
18852   }
18853 }
18854 
18855 static
18856 int
innobase_wsrep_set_checkpoint(handlerton * hton,const XID * xid)18857 innobase_wsrep_set_checkpoint(
18858 /*==========================*/
18859 	handlerton* hton,
18860 	const XID* xid)
18861 {
18862 	DBUG_ASSERT(hton == innodb_hton_ptr);
18863 
18864 	if (wsrep_is_wsrep_xid(xid)) {
18865 
18866 		trx_rseg_update_wsrep_checkpoint(xid);
18867 		innobase_flush_logs(hton, false);
18868 		return 0;
18869 	} else {
18870 		return 1;
18871 	}
18872 }
18873 
18874 static
18875 int
innobase_wsrep_get_checkpoint(handlerton * hton,XID * xid)18876 innobase_wsrep_get_checkpoint(
18877 /*==========================*/
18878 	handlerton* hton,
18879 	XID* xid)
18880 {
18881 	DBUG_ASSERT(hton == innodb_hton_ptr);
18882         trx_rseg_read_wsrep_checkpoint(*xid);
18883         return 0;
18884 }
18885 #endif /* WITH_WSREP */
18886 
innodb_idle_flush_pct_update(THD * thd,st_mysql_sys_var * var,void *,const void * save)18887 static void innodb_idle_flush_pct_update(THD *thd, st_mysql_sys_var *var,
18888                                          void*, const void *save)
18889 {
18890   innodb_idle_flush_pct = *static_cast<const ulong*>(save);
18891   push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
18892                HA_ERR_WRONG_COMMAND, deprecated_idle_flush_pct);
18893 }
18894 
18895 /* plugin options */
18896 
18897 static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm,
18898   PLUGIN_VAR_RQCMDARG,
18899   "The algorithm InnoDB uses for page checksumming. Possible values are"
18900   " FULL_CRC32"
18901     " for new files, always use CRC-32C; for old, see CRC32 below;"
18902   " STRICT_FULL_CRC32"
18903     " for new files, always use CRC-32C; for old, see STRICT_CRC32 below;"
18904   " CRC32"
18905     " write crc32, allow any of the other checksums to match when reading;"
18906   " STRICT_CRC32"
18907     " write crc32, do not allow other algorithms to match when reading;"
18908   " INNODB"
18909     " write a software calculated checksum, allow any other checksums"
18910     " to match when reading;"
18911   " STRICT_INNODB"
18912     " write a software calculated checksum, do not allow other algorithms"
18913     " to match when reading;"
18914   " NONE"
18915     " write a constant magic number, do not do any checksum verification"
18916     " when reading (same as innodb_checksums=OFF);"
18917   " STRICT_NONE"
18918     " write a constant magic number, do not allow values other than that"
18919     " magic number when reading;"
18920   " Files updated when this option is set to crc32 or strict_crc32 will"
18921   " not be readable by MariaDB versions older than 10.0.4;"
18922   " new files created with full_crc32 are readable by MariaDB 10.4.3+",
18923   NULL, innodb_checksum_algorithm_update, SRV_CHECKSUM_ALGORITHM_CRC32,
18924   &innodb_checksum_algorithm_typelib);
18925 
18926 static MYSQL_SYSVAR_BOOL(log_checksums, innodb_log_checksums,
18927   PLUGIN_VAR_RQCMDARG,
18928   "DEPRECATED. Whether to require checksums for InnoDB redo log blocks.",
18929   NULL, innodb_log_checksums_update, TRUE);
18930 
18931 static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums,
18932   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
18933   "DEPRECATED. Use innodb_checksum_algorithm=NONE instead of setting"
18934   " this to OFF."
18935   " Enable InnoDB checksums validation (enabled by default)."
18936   " Disable with --skip-innodb-checksums.",
18937   NULL, NULL, TRUE);
18938 
18939 static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
18940   PLUGIN_VAR_READONLY,
18941   "The common part for InnoDB table spaces.",
18942   NULL, NULL, NULL);
18943 
18944 static MYSQL_SYSVAR_BOOL(doublewrite, srv_use_doublewrite_buf,
18945   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
18946   "Enable InnoDB doublewrite buffer (enabled by default)."
18947   " Disable with --skip-innodb-doublewrite.",
18948   NULL, NULL, TRUE);
18949 
18950 static MYSQL_SYSVAR_BOOL(use_atomic_writes, srv_use_atomic_writes,
18951   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
18952   "Enable atomic writes, instead of using the doublewrite buffer, for files "
18953   "on devices that supports atomic writes. "
18954   "This option only works on Linux with either FusionIO cards using "
18955   "the directFS filesystem or with Shannon cards using any file system.",
18956   NULL, NULL, TRUE);
18957 
18958 static MYSQL_SYSVAR_BOOL(stats_include_delete_marked,
18959   srv_stats_include_delete_marked,
18960   PLUGIN_VAR_OPCMDARG,
18961   "Include delete marked records when calculating persistent statistics",
18962   NULL, NULL, FALSE);
18963 
18964 static MYSQL_SYSVAR_ENUM(instant_alter_column_allowed,
18965 			 innodb_instant_alter_column_allowed,
18966   PLUGIN_VAR_RQCMDARG,
18967   "File format constraint for ALTER TABLE", NULL, NULL, 2/*add_drop_reorder*/,
18968   &innodb_instant_alter_column_allowed_typelib);
18969 
18970 static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
18971   PLUGIN_VAR_RQCMDARG,
18972   "Number of IOPs the server can do. Tunes the background IO rate",
18973   NULL, innodb_io_capacity_update, 200, 100, ~0UL, 0);
18974 
18975 static MYSQL_SYSVAR_ULONG(io_capacity_max, srv_max_io_capacity,
18976   PLUGIN_VAR_RQCMDARG,
18977   "Limit to which innodb_io_capacity can be inflated.",
18978   NULL, innodb_io_capacity_max_update,
18979   SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT, 100,
18980   SRV_MAX_IO_CAPACITY_LIMIT, 0);
18981 
18982 static MYSQL_SYSVAR_ULONG(idle_flush_pct, innodb_idle_flush_pct,
18983   PLUGIN_VAR_RQCMDARG,
18984   "DEPRECATED. This setting has no effect.",
18985   NULL, innodb_idle_flush_pct_update, 100, 0, 100, 0);
18986 
18987 #ifdef UNIV_DEBUG
18988 static MYSQL_SYSVAR_BOOL(background_drop_list_empty,
18989   innodb_background_drop_list_empty,
18990   PLUGIN_VAR_OPCMDARG,
18991   "Wait for the background drop list to become empty",
18992   NULL, wait_background_drop_list_empty, FALSE);
18993 
18994 static MYSQL_SYSVAR_BOOL(log_checkpoint_now, innodb_log_checkpoint_now,
18995   PLUGIN_VAR_OPCMDARG,
18996   "Force checkpoint now",
18997   NULL, checkpoint_now_set, FALSE);
18998 
18999 static MYSQL_SYSVAR_BOOL(buf_flush_list_now, innodb_buf_flush_list_now,
19000   PLUGIN_VAR_OPCMDARG,
19001   "Force dirty page flush now",
19002   NULL, buf_flush_list_now_set, FALSE);
19003 
19004 static MYSQL_SYSVAR_UINT(merge_threshold_set_all_debug,
19005   innodb_merge_threshold_set_all_debug,
19006   PLUGIN_VAR_RQCMDARG,
19007   "Override current MERGE_THRESHOLD setting for all indexes at dictionary"
19008   " cache by the specified value dynamically, at the time.",
19009   NULL, innodb_merge_threshold_set_all_debug_update,
19010   DICT_INDEX_MERGE_THRESHOLD_DEFAULT, 1, 50, 0);
19011 #endif /* UNIV_DEBUG */
19012 
19013 static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
19014   PLUGIN_VAR_OPCMDARG,
19015   "Number of UNDO log pages to purge in one batch from the history list.",
19016   NULL, NULL,
19017   300,			/* Default setting */
19018   1,			/* Minimum value */
19019   5000, 0);		/* Maximum value */
19020 
19021 static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
19022   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19023   "Purge threads can be from 1 to 32. Default is 4.",
19024   NULL, NULL,
19025   4,			/* Default setting */
19026   1,			/* Minimum value */
19027   srv_max_purge_threads,/* Maximum value */
19028   0);
19029 
19030 static MYSQL_SYSVAR_ULONG(sync_array_size, srv_sync_array_size,
19031   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19032   "Size of the mutex/lock wait array.",
19033   NULL, NULL,
19034   1,			/* Default setting */
19035   1,			/* Minimum value */
19036   1024, 0);		/* Maximum value */
19037 
19038 static MYSQL_SYSVAR_UINT(fast_shutdown, srv_fast_shutdown,
19039   PLUGIN_VAR_OPCMDARG,
19040   "Speeds up the shutdown process of the InnoDB storage engine. Possible"
19041   " values are 0, 1 (faster), 2 (crash-like), 3 (fastest clean).",
19042   fast_shutdown_validate, NULL, 1, 0, 3, 0);
19043 
19044 static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table,
19045   PLUGIN_VAR_NOCMDARG,
19046   "Stores each InnoDB table to an .ibd file in the database dir.",
19047   NULL, NULL, TRUE);
19048 
19049 static MYSQL_SYSVAR_STR(ft_server_stopword_table, innobase_server_stopword_table,
19050   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
19051   "The user supplied stopword table name.",
19052   innodb_stopword_table_validate,
19053   NULL,
19054   NULL);
19055 
19056 static MYSQL_SYSVAR_UINT(flush_log_at_timeout, srv_flush_log_at_timeout,
19057   PLUGIN_VAR_OPCMDARG,
19058   "Write and flush logs every (n) second.",
19059   NULL, NULL, 1, 0, 2700, 0);
19060 
19061 static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
19062   PLUGIN_VAR_OPCMDARG,
19063   "Controls the durability/speed trade-off for commits."
19064   " Set to 0 (write and flush redo log to disk only once per second),"
19065   " 1 (flush to disk at each commit),"
19066   " 2 (write to log at commit but flush to disk only once per second)"
19067   " or 3 (flush to disk at prepare and at commit, slower and usually redundant)."
19068   " 1 and 3 guarantees that after a crash, committed transactions will"
19069   " not be lost and will be consistent with the binlog and other transactional"
19070   " engines. 2 can get inconsistent and lose transactions if there is a"
19071   " power failure or kernel crash but not if mysqld crashes. 0 has no"
19072   " guarantees in case of crash. 0 and 2 can be faster than 1 or 3.",
19073   NULL, NULL, 1, 0, 3, 0);
19074 
19075 static MYSQL_SYSVAR_ENUM(flush_method, innodb_flush_method,
19076   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19077   "With which method to flush data.",
19078   NULL, NULL, IF_WIN(SRV_ALL_O_DIRECT_FSYNC, SRV_FSYNC),
19079   &innodb_flush_method_typelib);
19080 
19081 static MYSQL_SYSVAR_STR(file_format, innodb_file_format,
19082   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19083   "Deprecated parameter with no effect.", NULL, NULL, NULL);
19084 
19085 static MYSQL_SYSVAR_STR(large_prefix, innodb_large_prefix,
19086   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19087   "Deprecated parameter with no effect.", NULL, NULL, NULL);
19088 
19089 static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted,
19090   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19091   "Force InnoDB to load metadata of corrupted table.",
19092   NULL, NULL, FALSE);
19093 
19094 static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog,
19095   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19096   "DEPRECATED. This option may be removed in future releases."
19097   " Please use READ COMMITTED transaction isolation level instead."
19098   " Force InnoDB to not use next-key locking, to use only row-level locking.",
19099   NULL, NULL, FALSE);
19100 
19101 static MYSQL_SYSVAR_STR(log_group_home_dir, srv_log_group_home_dir,
19102   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19103   "Path to InnoDB log files.", NULL, NULL, NULL);
19104 
19105 /** Update innodb_page_cleaners.
19106 @param[in]	save	the new value of innodb_page_cleaners */
19107 static
19108 void
innodb_page_cleaners_threads_update(THD *,struct st_mysql_sys_var *,void *,const void * save)19109 innodb_page_cleaners_threads_update(THD*, struct st_mysql_sys_var*, void*, const void *save)
19110 {
19111 	buf_flush_set_page_cleaner_thread_cnt(*static_cast<const ulong*>(save));
19112 }
19113 
19114 static MYSQL_SYSVAR_ULONG(page_cleaners, srv_n_page_cleaners,
19115   PLUGIN_VAR_RQCMDARG,
19116   "Page cleaner threads can be from 1 to 64. Default is 4.",
19117   NULL,
19118   innodb_page_cleaners_threads_update, 4, 1, 64, 0);
19119 
19120 static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
19121   PLUGIN_VAR_RQCMDARG,
19122   "Percentage of dirty pages allowed in bufferpool.",
19123   NULL, innodb_max_dirty_pages_pct_update, 75.0, 0, 99.999, 0);
19124 
19125 static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct_lwm,
19126   srv_max_dirty_pages_pct_lwm,
19127   PLUGIN_VAR_RQCMDARG,
19128   "Percentage of dirty pages at which flushing kicks in.",
19129   NULL, innodb_max_dirty_pages_pct_lwm_update, 0, 0, 99.999, 0);
19130 
19131 static MYSQL_SYSVAR_DOUBLE(adaptive_flushing_lwm,
19132   srv_adaptive_flushing_lwm,
19133   PLUGIN_VAR_RQCMDARG,
19134   "Percentage of log capacity below which no adaptive flushing happens.",
19135   NULL, NULL, 10.0, 0.0, 70.0, 0);
19136 
19137 static MYSQL_SYSVAR_BOOL(adaptive_flushing, srv_adaptive_flushing,
19138   PLUGIN_VAR_NOCMDARG,
19139   "Attempt flushing dirty pages to avoid IO bursts at checkpoints.",
19140   NULL, NULL, TRUE);
19141 
19142 static MYSQL_SYSVAR_BOOL(flush_sync, srv_flush_sync,
19143   PLUGIN_VAR_NOCMDARG,
19144   "Allow IO bursts at the checkpoints ignoring io_capacity setting.",
19145   NULL, NULL, TRUE);
19146 
19147 static MYSQL_SYSVAR_ULONG(flushing_avg_loops,
19148   srv_flushing_avg_loops,
19149   PLUGIN_VAR_RQCMDARG,
19150   "Number of iterations over which the background flushing is averaged.",
19151   NULL, NULL, 30, 1, 1000, 0);
19152 
19153 static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
19154   PLUGIN_VAR_RQCMDARG,
19155   "Desired maximum length of the purge queue (0 = no limit)",
19156   NULL, NULL, 0, 0, ~0UL, 0);
19157 
19158 static MYSQL_SYSVAR_ULONG(max_purge_lag_delay, srv_max_purge_lag_delay,
19159    PLUGIN_VAR_RQCMDARG,
19160    "Maximum delay of user threads in micro-seconds",
19161    NULL, NULL,
19162    0L,			/* Default seting */
19163    0L,			/* Minimum value */
19164    10000000UL, 0);	/* Maximum value */
19165 
19166 static MYSQL_SYSVAR_UINT(max_purge_lag_wait, innodb_max_purge_lag_wait,
19167   PLUGIN_VAR_RQCMDARG,
19168   "Wait until History list length is below the specified limit",
19169   NULL, innodb_max_purge_lag_wait_update, UINT_MAX, 0, UINT_MAX, 0);
19170 
19171 static MYSQL_SYSVAR_BOOL(rollback_on_timeout, innobase_rollback_on_timeout,
19172   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19173   "Roll back the complete transaction on lock wait timeout, for 4.x compatibility (disabled by default)",
19174   NULL, NULL, FALSE);
19175 
19176 static MYSQL_SYSVAR_BOOL(status_file, innobase_create_status_file,
19177   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NOSYSVAR,
19178   "Enable SHOW ENGINE INNODB STATUS output in the innodb_status.<pid> file",
19179   NULL, NULL, FALSE);
19180 
19181 static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
19182   PLUGIN_VAR_OPCMDARG,
19183   "Enable statistics gathering for metadata commands such as"
19184   " SHOW TABLE STATUS for tables that use transient statistics (off by default)",
19185   NULL, NULL, FALSE);
19186 
19187 static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_transient_sample_pages,
19188   PLUGIN_VAR_RQCMDARG,
19189   "Deprecated, use innodb_stats_transient_sample_pages instead",
19190   NULL, innodb_stats_sample_pages_update, 8, 1, ~0ULL, 0);
19191 
19192 static MYSQL_SYSVAR_ULONGLONG(stats_transient_sample_pages,
19193   srv_stats_transient_sample_pages,
19194   PLUGIN_VAR_RQCMDARG,
19195   "The number of leaf index pages to sample when calculating transient"
19196   " statistics (if persistent statistics are not used, default 8)",
19197   NULL, NULL, 8, 1, ~0ULL, 0);
19198 
19199 static MYSQL_SYSVAR_BOOL(stats_persistent, srv_stats_persistent,
19200   PLUGIN_VAR_OPCMDARG,
19201   "InnoDB persistent statistics enabled for all tables unless overridden"
19202   " at table level",
19203   NULL, NULL, TRUE);
19204 
19205 static MYSQL_SYSVAR_BOOL(stats_auto_recalc, srv_stats_auto_recalc,
19206   PLUGIN_VAR_OPCMDARG,
19207   "InnoDB automatic recalculation of persistent statistics enabled for all"
19208   " tables unless overridden at table level (automatic recalculation is only"
19209   " done when InnoDB decides that the table has changed too much and needs a"
19210   " new statistics)",
19211   NULL, NULL, TRUE);
19212 
19213 static MYSQL_SYSVAR_ULONGLONG(stats_persistent_sample_pages,
19214   srv_stats_persistent_sample_pages,
19215   PLUGIN_VAR_RQCMDARG,
19216   "The number of leaf index pages to sample when calculating persistent"
19217   " statistics (by ANALYZE, default 20)",
19218   NULL, NULL, 20, 1, ~0ULL, 0);
19219 
19220 static MYSQL_SYSVAR_ULONGLONG(stats_modified_counter, srv_stats_modified_counter,
19221   PLUGIN_VAR_RQCMDARG,
19222   "The number of rows modified before we calculate new statistics (default 0 = current limits)",
19223   NULL, NULL, 0, 0, ~0ULL, 0);
19224 
19225 static MYSQL_SYSVAR_BOOL(stats_traditional, srv_stats_sample_traditional,
19226   PLUGIN_VAR_RQCMDARG,
19227   "Enable traditional statistic calculation based on number of configured pages (default true)",
19228   NULL, NULL, TRUE);
19229 
19230 #ifdef BTR_CUR_HASH_ADAPT
19231 static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled,
19232   PLUGIN_VAR_OPCMDARG,
19233   "Enable InnoDB adaptive hash index (enabled by default). "
19234   " Disable with --skip-innodb-adaptive-hash-index.",
19235   NULL, innodb_adaptive_hash_index_update, true);
19236 
19237 /** Number of distinct partitions of AHI.
19238 Each partition is protected by its own latch and so we have parts number
19239 of latches protecting complete search system. */
19240 static MYSQL_SYSVAR_ULONG(adaptive_hash_index_parts, btr_ahi_parts,
19241   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19242   "Number of InnoDB Adaptive Hash Index Partitions (default 8)",
19243   NULL, NULL, 8, 1, 512, 0);
19244 #endif /* BTR_CUR_HASH_ADAPT */
19245 
19246 static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
19247   PLUGIN_VAR_RQCMDARG,
19248   "Replication thread delay (ms) on the slave server if"
19249   " innodb_thread_concurrency is reached (0 by default)",
19250   NULL, NULL, 0, 0, ~0UL, 0);
19251 
19252 static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
19253   PLUGIN_VAR_RQCMDARG,
19254   "Compression level used for zlib compression.  0 is no compression"
19255   ", 1 is fastest, 9 is best compression and default is 6.",
19256   NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
19257 
19258 static MYSQL_SYSVAR_BOOL(log_compressed_pages, page_zip_log_pages,
19259        PLUGIN_VAR_OPCMDARG,
19260   "Enables/disables the logging of entire compressed page images."
19261   " InnoDB logs the compressed pages to prevent corruption if"
19262   " the zlib compression algorithm changes."
19263   " When turned OFF, InnoDB will assume that the zlib"
19264   " compression algorithm doesn't change.",
19265   NULL, NULL, TRUE);
19266 
19267 static MYSQL_SYSVAR_BOOL(log_optimize_ddl, innodb_log_optimize_ddl,
19268   PLUGIN_VAR_OPCMDARG,
19269   "DEPRECATED. Ignored in MariaDB 10.5."
19270   " Reduce redo logging when natively creating indexes or rebuilding tables."
19271   " Enabling this may slow down backup and cause delay due to page flushing.",
19272   NULL, NULL, FALSE);
19273 
19274 static MYSQL_SYSVAR_ULONG(autoextend_increment,
19275   sys_tablespace_auto_extend_increment,
19276   PLUGIN_VAR_RQCMDARG,
19277   "Data file autoextend increment in megabytes",
19278   NULL, NULL, 64L, 1L, 1000L, 0);
19279 
19280 static MYSQL_SYSVAR_ULONG(buffer_pool_chunk_size, srv_buf_pool_chunk_unit,
19281   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19282   "Size of a single memory chunk within each buffer pool instance"
19283   " for resizing buffer pool. Online buffer pool resizing happens"
19284   " at this granularity. 0 means disable resizing buffer pool.",
19285   NULL, NULL,
19286   128 * 1024 * 1024, 1024 * 1024, LONG_MAX, 1024 * 1024);
19287 
19288 #if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
19289 static MYSQL_SYSVAR_ULONG(page_hash_locks, srv_n_page_hash_locks,
19290   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19291   "Number of rw_locks protecting buffer pool page_hash. Rounded up to the next power of 2",
19292   NULL, NULL, 16, 1, MAX_PAGE_HASH_LOCKS, 0);
19293 
19294 static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size,
19295   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19296   "Number of pages reserved in doublewrite buffer for batch flushing",
19297   NULL, NULL, 120, 1, 127, 0);
19298 #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
19299 
19300 static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm,
19301   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19302   "The algorithm Innodb uses for deciding which locks to grant next when"
19303   " a lock is released. Possible values are"
19304   " FCFS"
19305   " grant the locks in First-Come-First-Served order;"
19306   " VATS"
19307   " use the Variance-Aware-Transaction-Scheduling algorithm, which"
19308   " uses an Eldest-Transaction-First heuristic.",
19309   NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
19310   &innodb_lock_schedule_algorithm_typelib);
19311 
19312 static MYSQL_SYSVAR_ULONG(buffer_pool_instances, srv_buf_pool_instances,
19313   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19314   "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
19315   NULL, NULL, srv_buf_pool_instances_default, 0, MAX_BUFFER_POOLS, 0);
19316 
19317 static MYSQL_SYSVAR_STR(buffer_pool_filename, srv_buf_dump_filename,
19318   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
19319   "Filename to/from which to dump/load the InnoDB buffer pool",
19320   innodb_srv_buf_dump_filename_validate, NULL, SRV_BUF_DUMP_FILENAME_DEFAULT);
19321 
19322 static MYSQL_SYSVAR_BOOL(buffer_pool_dump_now, innodb_buffer_pool_dump_now,
19323   PLUGIN_VAR_RQCMDARG,
19324   "Trigger an immediate dump of the buffer pool into a file named @@innodb_buffer_pool_filename",
19325   NULL, buffer_pool_dump_now, FALSE);
19326 
19327 static MYSQL_SYSVAR_BOOL(buffer_pool_dump_at_shutdown, srv_buffer_pool_dump_at_shutdown,
19328   PLUGIN_VAR_RQCMDARG,
19329   "Dump the buffer pool into a file named @@innodb_buffer_pool_filename",
19330   NULL, NULL, TRUE);
19331 
19332 static MYSQL_SYSVAR_ULONG(buffer_pool_dump_pct, srv_buf_pool_dump_pct,
19333   PLUGIN_VAR_RQCMDARG,
19334   "Dump only the hottest N% of each buffer pool, defaults to 25",
19335   NULL, NULL, 25, 1, 100, 0);
19336 
19337 #ifdef UNIV_DEBUG
19338 /* Added to test the innodb_buffer_pool_load_incomplete status variable. */
19339 static MYSQL_SYSVAR_ULONG(buffer_pool_load_pages_abort, srv_buf_pool_load_pages_abort,
19340   PLUGIN_VAR_RQCMDARG,
19341   "Number of pages during a buffer pool load to process before signaling innodb_buffer_pool_load_abort=1",
19342   NULL, NULL, LONG_MAX, 1, LONG_MAX, 0);
19343 
19344 static MYSQL_SYSVAR_STR(buffer_pool_evict, srv_buffer_pool_evict,
19345   PLUGIN_VAR_RQCMDARG,
19346   "Evict pages from the buffer pool",
19347   NULL, innodb_buffer_pool_evict_update, "");
19348 #endif /* UNIV_DEBUG */
19349 
19350 static MYSQL_SYSVAR_BOOL(buffer_pool_load_now, innodb_buffer_pool_load_now,
19351   PLUGIN_VAR_RQCMDARG,
19352   "Trigger an immediate load of the buffer pool from a file named @@innodb_buffer_pool_filename",
19353   NULL, buffer_pool_load_now, FALSE);
19354 
19355 static MYSQL_SYSVAR_BOOL(buffer_pool_load_abort, innodb_buffer_pool_load_abort,
19356   PLUGIN_VAR_RQCMDARG,
19357   "Abort a currently running load of the buffer pool",
19358   NULL, buffer_pool_load_abort, FALSE);
19359 
19360 /* there is no point in changing this during runtime, thus readonly */
19361 static MYSQL_SYSVAR_BOOL(buffer_pool_load_at_startup, srv_buffer_pool_load_at_startup,
19362   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19363   "Load the buffer pool from a file named @@innodb_buffer_pool_filename",
19364   NULL, NULL, TRUE);
19365 
19366 static MYSQL_SYSVAR_BOOL(defragment, srv_defragment,
19367   PLUGIN_VAR_RQCMDARG,
19368   "Enable/disable InnoDB defragmentation (default FALSE). When set to FALSE, all existing "
19369   "defragmentation will be paused. And new defragmentation command will fail."
19370   "Paused defragmentation commands will resume when this variable is set to "
19371   "true again.",
19372   NULL, NULL, FALSE);
19373 
19374 static MYSQL_SYSVAR_UINT(defragment_n_pages, srv_defragment_n_pages,
19375   PLUGIN_VAR_RQCMDARG,
19376   "Number of pages considered at once when merging multiple pages to "
19377   "defragment",
19378   NULL, NULL, 7, 2, 32, 0);
19379 
19380 static MYSQL_SYSVAR_UINT(defragment_stats_accuracy,
19381   srv_defragment_stats_accuracy,
19382   PLUGIN_VAR_RQCMDARG,
19383   "How many defragment stats changes there are before the stats "
19384   "are written to persistent storage. Set to 0 meaning disable "
19385   "defragment stats tracking.",
19386   NULL, NULL, 0, 0, ~0U, 0);
19387 
19388 static MYSQL_SYSVAR_UINT(defragment_fill_factor_n_recs,
19389   srv_defragment_fill_factor_n_recs,
19390   PLUGIN_VAR_RQCMDARG,
19391   "How many records of space defragmentation should leave on the page. "
19392   "This variable, together with innodb_defragment_fill_factor, is introduced "
19393   "so defragmentation won't pack the page too full and cause page split on "
19394   "the next insert on every page. The variable indicating more defragmentation"
19395   " gain is the one effective.",
19396   NULL, NULL, 20, 1, 100, 0);
19397 
19398 static MYSQL_SYSVAR_DOUBLE(defragment_fill_factor, srv_defragment_fill_factor,
19399   PLUGIN_VAR_RQCMDARG,
19400   "A number between [0.7, 1] that tells defragmentation how full it should "
19401   "fill a page. Default is 0.9. Number below 0.7 won't make much sense."
19402   "This variable, together with innodb_defragment_fill_factor_n_recs, is "
19403   "introduced so defragmentation won't pack the page too full and cause "
19404   "page split on the next insert on every page. The variable indicating more "
19405   "defragmentation gain is the one effective.",
19406   NULL, NULL, 0.9, 0.7, 1, 0);
19407 
19408 static MYSQL_SYSVAR_UINT(defragment_frequency, srv_defragment_frequency,
19409   PLUGIN_VAR_RQCMDARG,
19410   "Do not defragment a single index more than this number of time per second."
19411   "This controls the number of time defragmentation thread can request X_LOCK "
19412   "on an index. Defragmentation thread will check whether "
19413   "1/defragment_frequency (s) has passed since it worked on this index last "
19414   "time, and put the index back to the queue if not enough time has passed. "
19415   "The actual frequency can only be lower than this given number.",
19416   NULL, innodb_defragment_frequency_update,
19417   SRV_DEFRAGMENT_FREQUENCY_DEFAULT, 1, 1000, 0);
19418 
19419 
19420 static MYSQL_SYSVAR_ULONG(lru_scan_depth, srv_LRU_scan_depth,
19421   PLUGIN_VAR_RQCMDARG,
19422   "How deep to scan LRU to keep it clean",
19423   NULL, NULL, 1024, 100, ~0UL, 0);
19424 
19425 static MYSQL_SYSVAR_ULONG(flush_neighbors, srv_flush_neighbors,
19426   PLUGIN_VAR_OPCMDARG,
19427   "Set to 0 (don't flush neighbors from buffer pool),"
19428   " 1 (flush contiguous neighbors from buffer pool)"
19429   " or 2 (flush neighbors from buffer pool),"
19430   " when flushing a block",
19431   NULL, NULL, 1, 0, 2, 0);
19432 
19433 static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
19434   PLUGIN_VAR_RQCMDARG,
19435   "Helps in performance tuning in heavily concurrent environments.",
19436   innobase_commit_concurrency_validate, NULL, 0, 0, 1000, 0);
19437 
19438 static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
19439   PLUGIN_VAR_RQCMDARG,
19440   "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
19441   NULL, NULL, 5000L, 1L, ~0UL, 0);
19442 
19443 static MYSQL_SYSVAR_BOOL(deadlock_detect, innobase_deadlock_detect,
19444   PLUGIN_VAR_NOCMDARG,
19445   "Enable/disable InnoDB deadlock detector (default ON)."
19446   " if set to OFF, deadlock detection is skipped,"
19447   " and we rely on innodb_lock_wait_timeout in case of deadlock.",
19448   NULL, NULL, TRUE);
19449 
19450 static MYSQL_SYSVAR_UINT(fill_factor, innobase_fill_factor,
19451   PLUGIN_VAR_RQCMDARG,
19452   "Percentage of B-tree page filled during bulk insert",
19453   NULL, NULL, 100, 10, 100, 0);
19454 
19455 static MYSQL_SYSVAR_BOOL(ft_enable_diag_print, fts_enable_diag_print,
19456   PLUGIN_VAR_OPCMDARG,
19457   "Whether to enable additional FTS diagnostic printout ",
19458   NULL, NULL, FALSE);
19459 
19460 static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
19461   PLUGIN_VAR_OPCMDARG,
19462   "Whether to disable OS system file cache for sort I/O",
19463   NULL, NULL, FALSE);
19464 
19465 static MYSQL_SYSVAR_STR(ft_aux_table, innodb_ft_aux_table,
19466   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
19467   "FTS internal auxiliary table to be checked",
19468   innodb_ft_aux_table_validate, NULL, NULL);
19469 
19470 static MYSQL_SYSVAR_ULONG(ft_cache_size, fts_max_cache_size,
19471   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19472   "InnoDB Fulltext search cache size in bytes",
19473   NULL, NULL, 8000000, 1600000, 80000000, 0);
19474 
19475 static MYSQL_SYSVAR_ULONG(ft_total_cache_size, fts_max_total_cache_size,
19476   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19477   "Total memory allocated for InnoDB Fulltext Search cache",
19478   NULL, NULL, 640000000, 32000000, 1600000000, 0);
19479 
19480 static MYSQL_SYSVAR_SIZE_T(ft_result_cache_limit, fts_result_cache_limit,
19481   PLUGIN_VAR_RQCMDARG,
19482   "InnoDB Fulltext search query result cache limit in bytes",
19483   NULL, NULL, 2000000000L, 1000000L, SIZE_T_MAX, 0);
19484 
19485 static MYSQL_SYSVAR_ULONG(ft_min_token_size, fts_min_token_size,
19486   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19487   "InnoDB Fulltext search minimum token size in characters",
19488   NULL, NULL, 3, 0, 16, 0);
19489 
19490 static MYSQL_SYSVAR_ULONG(ft_max_token_size, fts_max_token_size,
19491   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19492   "InnoDB Fulltext search maximum token size in characters",
19493   NULL, NULL, FTS_MAX_WORD_LEN_IN_CHAR, 10, FTS_MAX_WORD_LEN_IN_CHAR, 0);
19494 
19495 static MYSQL_SYSVAR_ULONG(ft_num_word_optimize, fts_num_word_optimize,
19496   PLUGIN_VAR_OPCMDARG,
19497   "InnoDB Fulltext search number of words to optimize for each optimize table call ",
19498   NULL, NULL, 2000, 1000, 10000, 0);
19499 
19500 static MYSQL_SYSVAR_ULONG(ft_sort_pll_degree, fts_sort_pll_degree,
19501   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19502   "InnoDB Fulltext search parallel sort degree, will round up to nearest power of 2 number",
19503   NULL, NULL, 2, 1, 16, 0);
19504 
19505 static MYSQL_SYSVAR_ULONG(sort_buffer_size, srv_sort_buf_size,
19506   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19507   "Memory buffer size for index creation",
19508   NULL, NULL, 1048576, 65536, 64<<20, 0);
19509 
19510 static MYSQL_SYSVAR_ULONGLONG(online_alter_log_max_size, srv_online_max_size,
19511   PLUGIN_VAR_RQCMDARG,
19512   "Maximum modification log file size for online index creation",
19513   NULL, NULL, 128<<20, 65536, ~0ULL, 0);
19514 
19515 static MYSQL_SYSVAR_BOOL(optimize_fulltext_only, innodb_optimize_fulltext_only,
19516   PLUGIN_VAR_NOCMDARG,
19517   "Only optimize the Fulltext index of the table",
19518   NULL, NULL, FALSE);
19519 
19520 static MYSQL_SYSVAR_ULONG(read_io_threads, srv_n_read_io_threads,
19521   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19522   "Number of background read I/O threads in InnoDB.",
19523   NULL, NULL, 4, 1, 64, 0);
19524 
19525 static MYSQL_SYSVAR_ULONG(write_io_threads, srv_n_write_io_threads,
19526   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19527   "Number of background write I/O threads in InnoDB.",
19528   NULL, NULL, 4, 1, 64, 0);
19529 
19530 static MYSQL_SYSVAR_ULONG(force_recovery, srv_force_recovery,
19531   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19532   "Helps to save your data in case the disk image of the database becomes corrupt.",
19533   NULL, NULL, 0, 0, 6, 0);
19534 
19535 static MYSQL_SYSVAR_ULONG(page_size, srv_page_size,
19536   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19537   "Page size to use for all InnoDB tablespaces.",
19538   NULL, NULL, UNIV_PAGE_SIZE_DEF,
19539   UNIV_PAGE_SIZE_MIN, UNIV_PAGE_SIZE_MAX, 0);
19540 
19541 static MYSQL_SYSVAR_ULONG(log_buffer_size, srv_log_buffer_size,
19542   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19543   "The size of the buffer which InnoDB uses to write log to the log files on disk.",
19544   NULL, NULL, 16L << 20, 256L << 10, LONG_MAX, 1024);
19545 
19546 static MYSQL_SYSVAR_ULONGLONG(log_file_size, srv_log_file_size,
19547   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19548   "Size of each log file in a log group.",
19549   NULL, NULL, 48 << 20, 1 << 20, log_group_max_size, UNIV_PAGE_SIZE_MAX);
19550 /* OS_FILE_LOG_BLOCK_SIZE would be more appropriate than UNIV_PAGE_SIZE_MAX,
19551 but fil_space_t is being used for the redo log, and it uses data pages. */
19552 
19553 static MYSQL_SYSVAR_ULONG(log_files_in_group, srv_n_log_files,
19554   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19555   "Number of log files in the log group. InnoDB writes to the files in a circular fashion.",
19556   NULL, NULL, 2, 1, SRV_N_LOG_FILES_MAX, 0);
19557 
19558 static MYSQL_SYSVAR_ULONG(log_write_ahead_size, srv_log_write_ahead_size,
19559   PLUGIN_VAR_RQCMDARG,
19560   "Redo log write ahead unit size to avoid read-on-write,"
19561   " it should match the OS cache block IO size",
19562   NULL, innodb_log_write_ahead_size_update,
19563   8*1024L, OS_FILE_LOG_BLOCK_SIZE, UNIV_PAGE_SIZE_DEF, OS_FILE_LOG_BLOCK_SIZE);
19564 
19565 static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct,
19566   PLUGIN_VAR_RQCMDARG,
19567   "Percentage of the buffer pool to reserve for 'old' blocks.",
19568   NULL, innodb_old_blocks_pct_update, 100 * 3 / 8, 5, 95, 0);
19569 
19570 static MYSQL_SYSVAR_UINT(old_blocks_time, buf_LRU_old_threshold_ms,
19571   PLUGIN_VAR_RQCMDARG,
19572   "Move blocks to the 'new' end of the buffer pool if the first access"
19573   " was at least this many milliseconds ago."
19574   " The timeout is disabled if 0.",
19575   NULL, NULL, 1000, 0, UINT_MAX32, 0);
19576 
19577 static MYSQL_SYSVAR_ULONG(open_files, innobase_open_files,
19578   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19579   "How many files at the maximum InnoDB keeps open at the same time.",
19580   NULL, NULL, 0, 0, LONG_MAX, 0);
19581 
19582 static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
19583   PLUGIN_VAR_RQCMDARG,
19584   "Count of spin-loop rounds in InnoDB mutexes (30 by default)",
19585   NULL, NULL, 30L, 0L, ~0UL, 0);
19586 
19587 static MYSQL_SYSVAR_UINT(spin_wait_delay, srv_spin_wait_delay,
19588   PLUGIN_VAR_OPCMDARG,
19589   "Maximum delay between polling for a spin lock (4 by default)",
19590   NULL, NULL, 4, 0, 6000, 0);
19591 
19592 static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
19593   PLUGIN_VAR_RQCMDARG,
19594   "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
19595   NULL, NULL, 0, 0, 1000, 0);
19596 
19597 static MYSQL_SYSVAR_ULONG(
19598   adaptive_max_sleep_delay, srv_adaptive_max_sleep_delay,
19599   PLUGIN_VAR_RQCMDARG,
19600   "The upper limit of the sleep delay in usec. Value of 0 disables it.",
19601   NULL, NULL,
19602   150000,			/* Default setting */
19603   0,				/* Minimum value */
19604   1000000, 0);			/* Maximum value */
19605 
19606 static MYSQL_SYSVAR_BOOL(prefix_index_cluster_optimization,
19607   srv_prefix_index_cluster_optimization,
19608   PLUGIN_VAR_OPCMDARG,
19609   "Enable prefix optimization to sometimes avoid cluster index lookups.",
19610   NULL, NULL, FALSE);
19611 
19612 static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay,
19613   PLUGIN_VAR_RQCMDARG,
19614   "Time of innodb thread sleeping before joining InnoDB queue (usec)."
19615   " Value 0 disable a sleep",
19616   NULL, NULL,
19617   10000L,
19618   0L,
19619   1000000L, 0);
19620 
19621 static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path,
19622   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19623   "Path to individual files and their sizes.",
19624   NULL, NULL, "ibdata1:12M:autoextend");
19625 
19626 static MYSQL_SYSVAR_STR(temp_data_file_path, innobase_temp_data_file_path,
19627   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19628   "Path to files and their sizes making temp-tablespace.",
19629   NULL, NULL, "ibtmp1:12M:autoextend");
19630 
19631 static MYSQL_SYSVAR_STR(undo_directory, srv_undo_dir,
19632   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19633   "Directory where undo tablespace files live, this path can be absolute.",
19634   NULL, NULL, NULL);
19635 
19636 static MYSQL_SYSVAR_ULONG(undo_tablespaces, srv_undo_tablespaces,
19637   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19638   "Number of undo tablespaces to use.",
19639   NULL, NULL,
19640   0L,			/* Default seting */
19641   0L,			/* Minimum value */
19642   TRX_SYS_MAX_UNDO_SPACES, 0); /* Maximum value */
19643 
19644 static MYSQL_SYSVAR_ULONG(undo_logs, srv_undo_logs,
19645   PLUGIN_VAR_OPCMDARG,
19646   "Number of undo logs to use.",
19647   NULL, NULL,
19648   TRX_SYS_N_RSEGS,	/* Default setting */
19649   1,			/* Minimum value */
19650   TRX_SYS_N_RSEGS, 0);	/* Maximum value */
19651 
19652 static MYSQL_SYSVAR_ULONGLONG(max_undo_log_size, srv_max_undo_log_size,
19653   PLUGIN_VAR_OPCMDARG,
19654   "Desired maximum UNDO tablespace size in bytes",
19655   NULL, NULL,
19656   10 << 20, 10 << 20,
19657   1ULL << (32 + UNIV_PAGE_SIZE_SHIFT_MAX), 0);
19658 
19659 static MYSQL_SYSVAR_ULONG(purge_rseg_truncate_frequency,
19660   srv_purge_rseg_truncate_frequency,
19661   PLUGIN_VAR_OPCMDARG,
19662   "Dictates rate at which UNDO records are purged. Value N means"
19663   " purge rollback segment(s) on every Nth iteration of purge invocation",
19664   NULL, NULL, 128, 1, 128, 0);
19665 
19666 static MYSQL_SYSVAR_BOOL(undo_log_truncate, srv_undo_log_truncate,
19667   PLUGIN_VAR_OPCMDARG,
19668   "Enable or Disable Truncate of UNDO tablespace.",
19669   NULL, NULL, FALSE);
19670 
19671 /* Alias for innodb_undo_logs, this config variable is deprecated. */
19672 static MYSQL_SYSVAR_ULONG(rollback_segments, srv_undo_logs,
19673   PLUGIN_VAR_OPCMDARG,
19674   "Number of undo logs to use (deprecated).",
19675   NULL, NULL,
19676   TRX_SYS_N_RSEGS,	/* Default setting */
19677   1,			/* Minimum value */
19678   TRX_SYS_N_RSEGS, 0);	/* Maximum value */
19679 
19680 static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
19681   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19682   "The AUTOINC lock modes supported by InnoDB:"
19683   " 0 => Old style AUTOINC locking (for backward compatibility);"
19684   " 1 => New style AUTOINC locking;"
19685   " 2 => No AUTOINC locking (unsafe for SBR)",
19686   NULL, NULL,
19687   AUTOINC_NEW_STYLE_LOCKING,	/* Default setting */
19688   AUTOINC_OLD_STYLE_LOCKING,	/* Minimum value */
19689   AUTOINC_NO_LOCKING, 0);	/* Maximum value */
19690 
19691 static MYSQL_SYSVAR_STR(version, innodb_version_str,
19692   PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
19693   "InnoDB version", NULL, NULL, INNODB_VERSION_STR);
19694 
19695 static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
19696   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19697   "Use native AIO if supported on this platform.",
19698   NULL, NULL, TRUE);
19699 
19700 #ifdef HAVE_LIBNUMA
19701 static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
19702   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19703   "Use NUMA interleave memory policy to allocate InnoDB buffer pool.",
19704   NULL, NULL, FALSE);
19705 #endif /* HAVE_LIBNUMA */
19706 
19707 static MYSQL_SYSVAR_ENUM(change_buffering, innodb_change_buffering,
19708   PLUGIN_VAR_RQCMDARG,
19709   "Buffer changes to secondary indexes.",
19710   NULL, NULL, IBUF_USE_ALL, &innodb_change_buffering_typelib);
19711 
19712 static MYSQL_SYSVAR_UINT(change_buffer_max_size,
19713   srv_change_buffer_max_size,
19714   PLUGIN_VAR_RQCMDARG,
19715   "Maximum on-disk size of change buffer in terms of percentage"
19716   " of the buffer pool.",
19717   NULL, innodb_change_buffer_max_size_update,
19718   CHANGE_BUFFER_DEFAULT_SIZE, 0, 50, 0);
19719 
19720 static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
19721    PLUGIN_VAR_RQCMDARG,
19722   "Specifies how InnoDB index statistics collection code should"
19723   " treat NULLs. Possible values are NULLS_EQUAL (default),"
19724   " NULLS_UNEQUAL and NULLS_IGNORED",
19725    NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib);
19726 
19727 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
19728 static MYSQL_SYSVAR_BOOL(change_buffer_dump, ibuf_dump,
19729   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
19730   "Dump the change buffer at startup.",
19731   NULL, NULL, FALSE);
19732 
19733 static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
19734   PLUGIN_VAR_RQCMDARG,
19735   "Debug flags for InnoDB change buffering (0=none, 1=try to buffer)",
19736   NULL, NULL, 0, 0, 1, 0);
19737 
19738 static MYSQL_SYSVAR_BOOL(disable_background_merge,
19739   srv_ibuf_disable_background_merge,
19740   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_RQCMDARG,
19741   "Disable change buffering merges by the master thread",
19742   NULL, NULL, FALSE);
19743 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
19744 
19745 static MYSQL_SYSVAR_ULONG(buf_dump_status_frequency, srv_buf_dump_status_frequency,
19746   PLUGIN_VAR_RQCMDARG,
19747   "A number between [0, 100] that tells how oftern buffer pool dump status "
19748   "in percentages should be printed. E.g. 10 means that buffer pool dump "
19749   "status is printed when every 10% of number of buffer pool pages are "
19750   "dumped. Default is 0 (only start and end status is printed).",
19751   NULL, NULL, 0, 0, 100, 0);
19752 
19753 #ifdef WITH_INNODB_DISALLOW_WRITES
19754 /*******************************************************
19755  *    innobase_disallow_writes variable definition     *
19756  *******************************************************/
19757 
19758 /* Must always init to FALSE. */
19759 static my_bool	innobase_disallow_writes	= FALSE;
19760 
19761 /**************************************************************************
19762 An "update" method for innobase_disallow_writes variable. */
19763 static
19764 void
innobase_disallow_writes_update(THD *,st_mysql_sys_var *,void * var_ptr,const void * save)19765 innobase_disallow_writes_update(THD*, st_mysql_sys_var*,
19766 				void* var_ptr, const void* save)
19767 {
19768 	const my_bool val = *static_cast<const my_bool*>(save);
19769 	*static_cast<my_bool*>(var_ptr) = val;
19770 	ut_a(srv_allow_writes_event);
19771 	mysql_mutex_unlock(&LOCK_global_system_variables);
19772 	if (val) {
19773 		os_event_reset(srv_allow_writes_event);
19774 	} else {
19775 		os_event_set(srv_allow_writes_event);
19776 	}
19777 	mysql_mutex_lock(&LOCK_global_system_variables);
19778 }
19779 
19780 static MYSQL_SYSVAR_BOOL(disallow_writes, innobase_disallow_writes,
19781   PLUGIN_VAR_NOCMDOPT,
19782   "Tell InnoDB to stop any writes to disk",
19783   NULL, innobase_disallow_writes_update, FALSE);
19784 #endif /* WITH_INNODB_DISALLOW_WRITES */
19785 
19786 static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
19787   PLUGIN_VAR_NOCMDARG,
19788   "Whether to use read ahead for random access within an extent.",
19789   NULL, NULL, FALSE);
19790 
19791 static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
19792   PLUGIN_VAR_RQCMDARG,
19793   "Number of pages that must be accessed sequentially for InnoDB to"
19794   " trigger a readahead.",
19795   NULL, NULL, 56, 0, 64, 0);
19796 
19797 static MYSQL_SYSVAR_STR(monitor_enable, innobase_enable_monitor_counter,
19798   PLUGIN_VAR_RQCMDARG,
19799   "Turn on a monitor counter",
19800   innodb_monitor_validate,
19801   innodb_enable_monitor_update, NULL);
19802 
19803 static MYSQL_SYSVAR_STR(monitor_disable, innobase_disable_monitor_counter,
19804   PLUGIN_VAR_RQCMDARG,
19805   "Turn off a monitor counter",
19806   innodb_monitor_validate,
19807   innodb_disable_monitor_update, NULL);
19808 
19809 static MYSQL_SYSVAR_STR(monitor_reset, innobase_reset_monitor_counter,
19810   PLUGIN_VAR_RQCMDARG,
19811   "Reset a monitor counter",
19812   innodb_monitor_validate,
19813   innodb_reset_monitor_update, NULL);
19814 
19815 static MYSQL_SYSVAR_STR(monitor_reset_all, innobase_reset_all_monitor_counter,
19816   PLUGIN_VAR_RQCMDARG,
19817   "Reset all values for a monitor counter",
19818   innodb_monitor_validate,
19819   innodb_reset_all_monitor_update, NULL);
19820 
19821 static MYSQL_SYSVAR_BOOL(status_output, srv_print_innodb_monitor,
19822   PLUGIN_VAR_OPCMDARG, "Enable InnoDB monitor output to the error log.",
19823   NULL, innodb_status_output_update, FALSE);
19824 
19825 static MYSQL_SYSVAR_BOOL(status_output_locks, srv_print_innodb_lock_monitor,
19826   PLUGIN_VAR_OPCMDARG, "Enable InnoDB lock monitor output to the error log."
19827   " Requires innodb_status_output=ON.",
19828   NULL, innodb_status_output_update, FALSE);
19829 
19830 static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks,
19831   PLUGIN_VAR_OPCMDARG,
19832   "Print all deadlocks to MariaDB error log (off by default)",
19833   NULL, NULL, FALSE);
19834 
19835 static MYSQL_SYSVAR_ULONG(compression_failure_threshold_pct,
19836   zip_failure_threshold_pct, PLUGIN_VAR_OPCMDARG,
19837   "If the compression failure rate of a table is greater than this number"
19838   " more padding is added to the pages to reduce the failures. A value of"
19839   " zero implies no padding",
19840   NULL, NULL, 5, 0, 100, 0);
19841 
19842 static MYSQL_SYSVAR_ULONG(compression_pad_pct_max,
19843   zip_pad_max, PLUGIN_VAR_OPCMDARG,
19844   "Percentage of empty space on a data page that can be reserved"
19845   " to make the page compressible.",
19846   NULL, NULL, 50, 0, 75, 0);
19847 
19848 static MYSQL_SYSVAR_BOOL(read_only, srv_read_only_mode,
19849   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19850   "Start InnoDB in read only mode (off by default)",
19851   NULL, NULL, FALSE);
19852 
19853 static MYSQL_SYSVAR_BOOL(cmp_per_index_enabled, srv_cmp_per_index_enabled,
19854   PLUGIN_VAR_OPCMDARG,
19855   "Enable INFORMATION_SCHEMA.innodb_cmp_per_index,"
19856   " may have negative impact on performance (off by default)",
19857   NULL, innodb_cmp_per_index_update, FALSE);
19858 
19859 static MYSQL_SYSVAR_ENUM(default_row_format, innodb_default_row_format,
19860   PLUGIN_VAR_RQCMDARG,
19861   "The default ROW FORMAT for all innodb tables created without explicit"
19862   " ROW_FORMAT. Possible values are REDUNDANT, COMPACT, and DYNAMIC."
19863   " The ROW_FORMAT value COMPRESSED is not allowed",
19864   NULL, NULL, DEFAULT_ROW_FORMAT_DYNAMIC,
19865   &innodb_default_row_format_typelib);
19866 
19867 #ifdef UNIV_DEBUG
19868 static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug,
19869   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_NOCMDOPT,
19870   "Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()",
19871   NULL, NULL, 0, 0, 1024, 0);
19872 
19873 static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug,
19874   btr_cur_limit_optimistic_insert_debug, PLUGIN_VAR_RQCMDARG,
19875   "Artificially limit the number of records per B-tree page (0=unlimited).",
19876   NULL, NULL, 0, 0, UINT_MAX32, 0);
19877 
19878 static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
19879   srv_purge_view_update_only_debug, PLUGIN_VAR_NOCMDOPT,
19880   "Pause actual purging any delete-marked records, but merely update the purge view."
19881   " It is to create artificially the situation the purge view have been updated"
19882   " but the each purges were not done yet.",
19883   NULL, NULL, FALSE);
19884 
19885 static MYSQL_SYSVAR_BOOL(evict_tables_on_commit_debug,
19886   innodb_evict_tables_on_commit_debug, PLUGIN_VAR_OPCMDARG,
19887   "On transaction commit, try to evict tables from the data dictionary cache.",
19888   NULL, NULL, FALSE);
19889 
19890 static MYSQL_SYSVAR_UINT(data_file_size_debug,
19891   srv_sys_space_size_debug,
19892   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19893   "InnoDB system tablespace size to be set in recovery.",
19894   NULL, NULL, 0, 0, 256U << 20, 0);
19895 
19896 static MYSQL_SYSVAR_ULONG(fil_make_page_dirty_debug,
19897   srv_fil_make_page_dirty_debug, PLUGIN_VAR_OPCMDARG,
19898   "Make the first page of the given tablespace dirty.",
19899   NULL, innodb_make_page_dirty, 0, 0, UINT_MAX32, 0);
19900 
19901 static MYSQL_SYSVAR_ULONG(saved_page_number_debug,
19902   srv_saved_page_number_debug, PLUGIN_VAR_OPCMDARG,
19903   "An InnoDB page number.",
19904   NULL, innodb_save_page_no, 0, 0, UINT_MAX32, 0);
19905 
19906 static MYSQL_SYSVAR_BOOL(disable_resize_buffer_pool_debug,
19907   buf_disable_resize_buffer_pool_debug, PLUGIN_VAR_NOCMDARG,
19908   "Disable resizing buffer pool to make assertion code not expensive.",
19909   NULL, NULL, TRUE);
19910 
19911 static MYSQL_SYSVAR_BOOL(page_cleaner_disabled_debug,
19912   innodb_page_cleaner_disabled_debug,
19913   PLUGIN_VAR_OPCMDARG,
19914   "Disable page cleaner",
19915   NULL, buf_flush_page_cleaner_disabled_debug_update, FALSE);
19916 
19917 static MYSQL_SYSVAR_BOOL(sync_debug, srv_sync_debug,
19918   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
19919   "Enable the sync debug checks",
19920   NULL, NULL, FALSE);
19921 
19922 static MYSQL_SYSVAR_BOOL(dict_stats_disabled_debug,
19923   innodb_dict_stats_disabled_debug,
19924   PLUGIN_VAR_OPCMDARG,
19925   "Disable dict_stats thread",
19926   NULL, dict_stats_disabled_debug_update, FALSE);
19927 
19928 static MYSQL_SYSVAR_BOOL(master_thread_disabled_debug,
19929   srv_master_thread_disabled_debug,
19930   PLUGIN_VAR_OPCMDARG,
19931   "Disable master thread",
19932   NULL, srv_master_thread_disabled_debug_update, FALSE);
19933 #endif /* UNIV_DEBUG */
19934 
19935 static MYSQL_SYSVAR_BOOL(force_primary_key,
19936   srv_force_primary_key,
19937   PLUGIN_VAR_OPCMDARG,
19938   "Do not allow to create table without primary key (off by default)",
19939   NULL, NULL, FALSE);
19940 
19941 static const char *page_compression_algorithms[]= { "none", "zlib", "lz4", "lzo", "lzma", "bzip2", "snappy", 0 };
19942 static TYPELIB page_compression_algorithms_typelib=
19943 {
19944   array_elements(page_compression_algorithms) - 1, 0,
19945   page_compression_algorithms, 0
19946 };
19947 static MYSQL_SYSVAR_ENUM(compression_algorithm, innodb_compression_algorithm,
19948   PLUGIN_VAR_OPCMDARG,
19949   "Compression algorithm used on page compression. One of: none, zlib, lz4, lzo, lzma, bzip2, or snappy",
19950   innodb_compression_algorithm_validate, NULL,
19951   /* We use here the largest number of supported compression method to
19952   enable all those methods that are available. Availability of compression
19953   method is verified on innodb_compression_algorithm_validate function. */
19954   PAGE_ZLIB_ALGORITHM,
19955   &page_compression_algorithms_typelib);
19956 
19957 static MYSQL_SYSVAR_ULONG(fatal_semaphore_wait_threshold, srv_fatal_semaphore_wait_threshold,
19958   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19959   "Maximum number of seconds that semaphore times out in InnoDB.",
19960   NULL, NULL,
19961   DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT, /* Default setting */
19962   1, /* Minimum setting */
19963   UINT_MAX32, /* Maximum setting */
19964   0);
19965 
19966 static const char* srv_encrypt_tables_names[] = { "OFF", "ON", "FORCE", 0 };
19967 static TYPELIB srv_encrypt_tables_typelib = {
19968 	array_elements(srv_encrypt_tables_names)-1, 0, srv_encrypt_tables_names,
19969 	NULL
19970 };
19971 static MYSQL_SYSVAR_ENUM(encrypt_tables, srv_encrypt_tables,
19972 			 PLUGIN_VAR_OPCMDARG,
19973 			 "Enable encryption for tables. "
19974 			 "Don't forget to enable --innodb-encrypt-log too",
19975 			 innodb_encrypt_tables_validate,
19976 			 innodb_encrypt_tables_update,
19977 			 0,
19978 			 &srv_encrypt_tables_typelib);
19979 
19980 static MYSQL_SYSVAR_UINT(encryption_threads, srv_n_fil_crypt_threads,
19981 			 PLUGIN_VAR_RQCMDARG,
19982 			 "Number of threads performing background key rotation and "
19983 			 "scrubbing",
19984 			 NULL,
19985 			 innodb_encryption_threads_update,
19986 			 0, 0, 255, 0);
19987 
19988 static MYSQL_SYSVAR_UINT(encryption_rotate_key_age,
19989 			 srv_fil_crypt_rotate_key_age,
19990 			 PLUGIN_VAR_RQCMDARG,
19991 			 "Key rotation - re-encrypt in background "
19992                          "all pages that were encrypted with a key that "
19993                          "many (or more) versions behind. Value 0 indicates "
19994 			 "that key rotation is disabled.",
19995 			 NULL,
19996 			 innodb_encryption_rotate_key_age_update,
19997 			 1, 0, UINT_MAX32, 0);
19998 
19999 static MYSQL_SYSVAR_UINT(encryption_rotation_iops, srv_n_fil_crypt_iops,
20000 			 PLUGIN_VAR_RQCMDARG,
20001 			 "Use this many iops for background key rotation",
20002 			 NULL,
20003 			 innodb_encryption_rotation_iops_update,
20004 			 srv_n_fil_crypt_iops, 0, UINT_MAX32, 0);
20005 
20006 static MYSQL_SYSVAR_BOOL(scrub_log, srv_scrub_log,
20007   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20008   "Enable background redo log (ib_logfile0, ib_logfile1...) scrubbing",
20009   0, 0, 0);
20010 
20011 static MYSQL_SYSVAR_ULONGLONG(scrub_log_speed, innodb_scrub_log_speed,
20012   PLUGIN_VAR_OPCMDARG,
20013   "Background redo log scrubbing speed in bytes/sec",
20014   NULL, NULL,
20015   256,              /* 256 bytes/sec, corresponds to 2000 ms scrub_log_interval */
20016   1,                /* min */
20017   50000, 0);        /* 50Kbyte/sec, corresponds to 10 ms scrub_log_interval */
20018 
20019 static MYSQL_SYSVAR_BOOL(encrypt_log, srv_encrypt_log,
20020   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20021   "Enable redo log encryption",
20022   NULL, NULL, FALSE);
20023 
20024 static MYSQL_SYSVAR_BOOL(immediate_scrub_data_uncompressed,
20025 			 srv_immediate_scrub_data_uncompressed,
20026 			 0,
20027 			 "Enable scrubbing of data",
20028 			 NULL, NULL, FALSE);
20029 
20030 static MYSQL_SYSVAR_BOOL(background_scrub_data_uncompressed,
20031 			 srv_background_scrub_data_uncompressed,
20032 			 0,
20033 			 "Enable scrubbing of uncompressed data by "
20034 			 "background threads (same as encryption_threads)",
20035 			 NULL, NULL, FALSE);
20036 
20037 static MYSQL_SYSVAR_BOOL(background_scrub_data_compressed,
20038 			 srv_background_scrub_data_compressed,
20039 			 0,
20040 			 "Enable scrubbing of compressed data by "
20041 			 "background threads (same as encryption_threads)",
20042 			 NULL, NULL, FALSE);
20043 
20044 static MYSQL_SYSVAR_UINT(background_scrub_data_check_interval,
20045 			 srv_background_scrub_data_check_interval,
20046 			 0,
20047 			 "check if spaces needs scrubbing every "
20048 			 "innodb_background_scrub_data_check_interval "
20049 			 "seconds",
20050 			 NULL, NULL,
20051 			 srv_background_scrub_data_check_interval,
20052 			 1,
20053 			 UINT_MAX32, 0);
20054 
20055 static MYSQL_SYSVAR_UINT(background_scrub_data_interval,
20056 			 srv_background_scrub_data_interval,
20057 			 0,
20058 			 "scrub spaces that were last scrubbed longer than "
20059 			 " innodb_background_scrub_data_interval seconds ago",
20060 			 NULL, NULL,
20061 			 srv_background_scrub_data_interval,
20062 			 1,
20063 			 UINT_MAX32, 0);
20064 
20065 #ifdef UNIV_DEBUG
20066 static MYSQL_SYSVAR_BOOL(debug_force_scrubbing,
20067 			 srv_scrub_force_testing,
20068 			 0,
20069 			 "Perform extra scrubbing to increase test exposure",
20070 			 NULL, NULL, FALSE);
20071 
20072 char *innobase_debug_sync;
20073 static MYSQL_SYSVAR_STR(debug_sync, innobase_debug_sync,
20074 			PLUGIN_VAR_NOCMDARG,
20075 			"debug_sync for innodb purge threads. "
20076 			"Use it to set up sync points for all purge threads "
20077 			"at once. The commands will be applied sequentially at"
20078 			" the beginning of purging the next undo record.",
20079 			NULL,
20080 			innobase_debug_sync_set, NULL);
20081 #endif /* UNIV_DEBUG */
20082 
20083 static MYSQL_SYSVAR_BOOL(encrypt_temporary_tables, innodb_encrypt_temporary_tables,
20084   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
20085   "Enrypt the temporary table data.",
20086   NULL, NULL, false);
20087 
20088 static struct st_mysql_sys_var* innobase_system_variables[]= {
20089   MYSQL_SYSVAR(autoextend_increment),
20090   MYSQL_SYSVAR(buffer_pool_size),
20091   MYSQL_SYSVAR(buffer_pool_chunk_size),
20092   MYSQL_SYSVAR(buffer_pool_instances),
20093   MYSQL_SYSVAR(buffer_pool_filename),
20094   MYSQL_SYSVAR(buffer_pool_dump_now),
20095   MYSQL_SYSVAR(buffer_pool_dump_at_shutdown),
20096   MYSQL_SYSVAR(buffer_pool_dump_pct),
20097 #ifdef UNIV_DEBUG
20098   MYSQL_SYSVAR(buffer_pool_evict),
20099 #endif /* UNIV_DEBUG */
20100   MYSQL_SYSVAR(buffer_pool_load_now),
20101   MYSQL_SYSVAR(buffer_pool_load_abort),
20102 #ifdef UNIV_DEBUG
20103   MYSQL_SYSVAR(buffer_pool_load_pages_abort),
20104 #endif /* UNIV_DEBUG */
20105   MYSQL_SYSVAR(buffer_pool_load_at_startup),
20106   MYSQL_SYSVAR(defragment),
20107   MYSQL_SYSVAR(defragment_n_pages),
20108   MYSQL_SYSVAR(defragment_stats_accuracy),
20109   MYSQL_SYSVAR(defragment_fill_factor),
20110   MYSQL_SYSVAR(defragment_fill_factor_n_recs),
20111   MYSQL_SYSVAR(defragment_frequency),
20112   MYSQL_SYSVAR(lru_scan_depth),
20113   MYSQL_SYSVAR(flush_neighbors),
20114   MYSQL_SYSVAR(checksum_algorithm),
20115   MYSQL_SYSVAR(log_checksums),
20116   MYSQL_SYSVAR(checksums),
20117   MYSQL_SYSVAR(commit_concurrency),
20118   MYSQL_SYSVAR(concurrency_tickets),
20119   MYSQL_SYSVAR(compression_level),
20120   MYSQL_SYSVAR(data_file_path),
20121   MYSQL_SYSVAR(temp_data_file_path),
20122   MYSQL_SYSVAR(data_home_dir),
20123   MYSQL_SYSVAR(doublewrite),
20124   MYSQL_SYSVAR(stats_include_delete_marked),
20125   MYSQL_SYSVAR(use_atomic_writes),
20126   MYSQL_SYSVAR(fast_shutdown),
20127   MYSQL_SYSVAR(read_io_threads),
20128   MYSQL_SYSVAR(write_io_threads),
20129   MYSQL_SYSVAR(file_per_table),
20130   MYSQL_SYSVAR(file_format), /* deprecated in MariaDB 10.2; no effect */
20131   MYSQL_SYSVAR(flush_log_at_timeout),
20132   MYSQL_SYSVAR(flush_log_at_trx_commit),
20133   MYSQL_SYSVAR(flush_method),
20134   MYSQL_SYSVAR(force_recovery),
20135   MYSQL_SYSVAR(fill_factor),
20136   MYSQL_SYSVAR(ft_cache_size),
20137   MYSQL_SYSVAR(ft_total_cache_size),
20138   MYSQL_SYSVAR(ft_result_cache_limit),
20139   MYSQL_SYSVAR(ft_enable_stopword),
20140   MYSQL_SYSVAR(ft_max_token_size),
20141   MYSQL_SYSVAR(ft_min_token_size),
20142   MYSQL_SYSVAR(ft_num_word_optimize),
20143   MYSQL_SYSVAR(ft_sort_pll_degree),
20144   MYSQL_SYSVAR(large_prefix), /* deprecated in MariaDB 10.2; no effect */
20145   MYSQL_SYSVAR(force_load_corrupted),
20146   MYSQL_SYSVAR(lock_schedule_algorithm),
20147   MYSQL_SYSVAR(locks_unsafe_for_binlog),
20148   MYSQL_SYSVAR(lock_wait_timeout),
20149   MYSQL_SYSVAR(deadlock_detect),
20150   MYSQL_SYSVAR(page_size),
20151   MYSQL_SYSVAR(log_buffer_size),
20152   MYSQL_SYSVAR(log_file_size),
20153   MYSQL_SYSVAR(log_files_in_group),
20154   MYSQL_SYSVAR(log_write_ahead_size),
20155   MYSQL_SYSVAR(log_group_home_dir),
20156   MYSQL_SYSVAR(log_compressed_pages),
20157   MYSQL_SYSVAR(log_optimize_ddl),
20158   MYSQL_SYSVAR(max_dirty_pages_pct),
20159   MYSQL_SYSVAR(max_dirty_pages_pct_lwm),
20160   MYSQL_SYSVAR(adaptive_flushing_lwm),
20161   MYSQL_SYSVAR(adaptive_flushing),
20162   MYSQL_SYSVAR(flush_sync),
20163   MYSQL_SYSVAR(flushing_avg_loops),
20164   MYSQL_SYSVAR(max_purge_lag),
20165   MYSQL_SYSVAR(max_purge_lag_delay),
20166   MYSQL_SYSVAR(max_purge_lag_wait),
20167   MYSQL_SYSVAR(old_blocks_pct),
20168   MYSQL_SYSVAR(old_blocks_time),
20169   MYSQL_SYSVAR(open_files),
20170   MYSQL_SYSVAR(optimize_fulltext_only),
20171   MYSQL_SYSVAR(rollback_on_timeout),
20172   MYSQL_SYSVAR(ft_aux_table),
20173   MYSQL_SYSVAR(ft_enable_diag_print),
20174   MYSQL_SYSVAR(ft_server_stopword_table),
20175   MYSQL_SYSVAR(ft_user_stopword_table),
20176   MYSQL_SYSVAR(disable_sort_file_cache),
20177   MYSQL_SYSVAR(stats_on_metadata),
20178   MYSQL_SYSVAR(stats_sample_pages),
20179   MYSQL_SYSVAR(stats_transient_sample_pages),
20180   MYSQL_SYSVAR(stats_persistent),
20181   MYSQL_SYSVAR(stats_persistent_sample_pages),
20182   MYSQL_SYSVAR(stats_auto_recalc),
20183   MYSQL_SYSVAR(stats_modified_counter),
20184   MYSQL_SYSVAR(stats_traditional),
20185 #ifdef BTR_CUR_HASH_ADAPT
20186   MYSQL_SYSVAR(adaptive_hash_index),
20187   MYSQL_SYSVAR(adaptive_hash_index_parts),
20188 #endif /* BTR_CUR_HASH_ADAPT */
20189   MYSQL_SYSVAR(stats_method),
20190   MYSQL_SYSVAR(replication_delay),
20191   MYSQL_SYSVAR(status_file),
20192   MYSQL_SYSVAR(strict_mode),
20193   MYSQL_SYSVAR(sort_buffer_size),
20194   MYSQL_SYSVAR(online_alter_log_max_size),
20195   MYSQL_SYSVAR(sync_spin_loops),
20196   MYSQL_SYSVAR(spin_wait_delay),
20197   MYSQL_SYSVAR(table_locks),
20198   MYSQL_SYSVAR(thread_concurrency),
20199   MYSQL_SYSVAR(adaptive_max_sleep_delay),
20200   MYSQL_SYSVAR(prefix_index_cluster_optimization),
20201   MYSQL_SYSVAR(thread_sleep_delay),
20202   MYSQL_SYSVAR(tmpdir),
20203   MYSQL_SYSVAR(autoinc_lock_mode),
20204   MYSQL_SYSVAR(version),
20205   MYSQL_SYSVAR(use_native_aio),
20206 #ifdef HAVE_LIBNUMA
20207   MYSQL_SYSVAR(numa_interleave),
20208 #endif /* HAVE_LIBNUMA */
20209   MYSQL_SYSVAR(change_buffering),
20210   MYSQL_SYSVAR(change_buffer_max_size),
20211 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
20212   MYSQL_SYSVAR(change_buffer_dump),
20213   MYSQL_SYSVAR(change_buffering_debug),
20214   MYSQL_SYSVAR(disable_background_merge),
20215 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
20216 #ifdef WITH_INNODB_DISALLOW_WRITES
20217   MYSQL_SYSVAR(disallow_writes),
20218 #endif /* WITH_INNODB_DISALLOW_WRITES */
20219   MYSQL_SYSVAR(random_read_ahead),
20220   MYSQL_SYSVAR(read_ahead_threshold),
20221   MYSQL_SYSVAR(read_only),
20222   MYSQL_SYSVAR(instant_alter_column_allowed),
20223   MYSQL_SYSVAR(io_capacity),
20224   MYSQL_SYSVAR(io_capacity_max),
20225   MYSQL_SYSVAR(page_cleaners),
20226   MYSQL_SYSVAR(idle_flush_pct),
20227   MYSQL_SYSVAR(monitor_enable),
20228   MYSQL_SYSVAR(monitor_disable),
20229   MYSQL_SYSVAR(monitor_reset),
20230   MYSQL_SYSVAR(monitor_reset_all),
20231   MYSQL_SYSVAR(purge_threads),
20232   MYSQL_SYSVAR(purge_batch_size),
20233 #ifdef UNIV_DEBUG
20234   MYSQL_SYSVAR(background_drop_list_empty),
20235   MYSQL_SYSVAR(log_checkpoint_now),
20236   MYSQL_SYSVAR(buf_flush_list_now),
20237   MYSQL_SYSVAR(merge_threshold_set_all_debug),
20238 #endif /* UNIV_DEBUG */
20239 #if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
20240   MYSQL_SYSVAR(page_hash_locks),
20241   MYSQL_SYSVAR(doublewrite_batch_size),
20242 #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
20243   MYSQL_SYSVAR(status_output),
20244   MYSQL_SYSVAR(status_output_locks),
20245   MYSQL_SYSVAR(print_all_deadlocks),
20246   MYSQL_SYSVAR(cmp_per_index_enabled),
20247   MYSQL_SYSVAR(undo_logs),
20248   MYSQL_SYSVAR(max_undo_log_size),
20249   MYSQL_SYSVAR(purge_rseg_truncate_frequency),
20250   MYSQL_SYSVAR(undo_log_truncate),
20251   MYSQL_SYSVAR(rollback_segments),
20252   MYSQL_SYSVAR(undo_directory),
20253   MYSQL_SYSVAR(undo_tablespaces),
20254   MYSQL_SYSVAR(sync_array_size),
20255   MYSQL_SYSVAR(compression_failure_threshold_pct),
20256   MYSQL_SYSVAR(compression_pad_pct_max),
20257   MYSQL_SYSVAR(default_row_format),
20258 #ifdef UNIV_DEBUG
20259   MYSQL_SYSVAR(trx_rseg_n_slots_debug),
20260   MYSQL_SYSVAR(limit_optimistic_insert_debug),
20261   MYSQL_SYSVAR(trx_purge_view_update_only_debug),
20262   MYSQL_SYSVAR(evict_tables_on_commit_debug),
20263   MYSQL_SYSVAR(data_file_size_debug),
20264   MYSQL_SYSVAR(fil_make_page_dirty_debug),
20265   MYSQL_SYSVAR(saved_page_number_debug),
20266   MYSQL_SYSVAR(disable_resize_buffer_pool_debug),
20267   MYSQL_SYSVAR(page_cleaner_disabled_debug),
20268   MYSQL_SYSVAR(dict_stats_disabled_debug),
20269   MYSQL_SYSVAR(master_thread_disabled_debug),
20270   MYSQL_SYSVAR(sync_debug),
20271 #endif /* UNIV_DEBUG */
20272   MYSQL_SYSVAR(force_primary_key),
20273   MYSQL_SYSVAR(fatal_semaphore_wait_threshold),
20274   /* Table page compression feature */
20275   MYSQL_SYSVAR(compression_default),
20276   MYSQL_SYSVAR(compression_algorithm),
20277   /* Encryption feature */
20278   MYSQL_SYSVAR(encrypt_tables),
20279   MYSQL_SYSVAR(encryption_threads),
20280   MYSQL_SYSVAR(encryption_rotate_key_age),
20281   MYSQL_SYSVAR(encryption_rotation_iops),
20282   MYSQL_SYSVAR(scrub_log),
20283   MYSQL_SYSVAR(scrub_log_speed),
20284   MYSQL_SYSVAR(encrypt_log),
20285   MYSQL_SYSVAR(default_encryption_key_id),
20286   /* Scrubing feature */
20287   MYSQL_SYSVAR(immediate_scrub_data_uncompressed),
20288   MYSQL_SYSVAR(background_scrub_data_uncompressed),
20289   MYSQL_SYSVAR(background_scrub_data_compressed),
20290   MYSQL_SYSVAR(background_scrub_data_interval),
20291   MYSQL_SYSVAR(background_scrub_data_check_interval),
20292 #ifdef UNIV_DEBUG
20293   MYSQL_SYSVAR(debug_force_scrubbing),
20294   MYSQL_SYSVAR(debug_sync),
20295 #endif
20296   MYSQL_SYSVAR(buf_dump_status_frequency),
20297   MYSQL_SYSVAR(background_thread),
20298   MYSQL_SYSVAR(encrypt_temporary_tables),
20299 
20300   NULL
20301 };
20302 
maria_declare_plugin(innobase)20303 maria_declare_plugin(innobase)
20304 {
20305   MYSQL_STORAGE_ENGINE_PLUGIN,
20306   &innobase_storage_engine,
20307   innobase_hton_name,
20308   plugin_author,
20309   "Supports transactions, row-level locking, foreign keys and encryption for tables",
20310   PLUGIN_LICENSE_GPL,
20311   innodb_init, /* Plugin Init */
20312   NULL, /* Plugin Deinit */
20313   INNODB_VERSION_SHORT,
20314   innodb_status_variables_export,/* status variables             */
20315   innobase_system_variables, /* system variables */
20316   INNODB_VERSION_STR,         /* string version */
20317   MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
20318 },
20319 i_s_innodb_trx,
20320 i_s_innodb_locks,
20321 i_s_innodb_lock_waits,
20322 i_s_innodb_cmp,
20323 i_s_innodb_cmp_reset,
20324 i_s_innodb_cmpmem,
20325 i_s_innodb_cmpmem_reset,
20326 i_s_innodb_cmp_per_index,
20327 i_s_innodb_cmp_per_index_reset,
20328 i_s_innodb_buffer_page,
20329 i_s_innodb_buffer_page_lru,
20330 i_s_innodb_buffer_stats,
20331 i_s_innodb_metrics,
20332 i_s_innodb_ft_default_stopword,
20333 i_s_innodb_ft_deleted,
20334 i_s_innodb_ft_being_deleted,
20335 i_s_innodb_ft_config,
20336 i_s_innodb_ft_index_cache,
20337 i_s_innodb_ft_index_table,
20338 i_s_innodb_sys_tables,
20339 i_s_innodb_sys_tablestats,
20340 i_s_innodb_sys_indexes,
20341 i_s_innodb_sys_columns,
20342 i_s_innodb_sys_fields,
20343 i_s_innodb_sys_foreign,
20344 i_s_innodb_sys_foreign_cols,
20345 i_s_innodb_sys_tablespaces,
20346 i_s_innodb_sys_datafiles,
20347 i_s_innodb_sys_virtual,
20348 i_s_innodb_mutexes,
20349 i_s_innodb_sys_semaphore_waits,
20350 i_s_innodb_tablespaces_encryption,
20351 i_s_innodb_tablespaces_scrubbing
20352 maria_declare_plugin_end;
20353 
20354 /** @brief Initialize the default value of innodb_commit_concurrency.
20355 
20356 Once InnoDB is running, the innodb_commit_concurrency must not change
20357 from zero to nonzero. (Bug #42101)
20358 
20359 The initial default value is 0, and without this extra initialization,
20360 SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter
20361 to 0, even if it was initially set to nonzero at the command line
20362 or configuration file. */
20363 static
20364 void
innobase_commit_concurrency_init_default()20365 innobase_commit_concurrency_init_default()
20366 /*======================================*/
20367 {
20368 	MYSQL_SYSVAR_NAME(commit_concurrency).def_val
20369 		= innobase_commit_concurrency;
20370 }
20371 
20372 /** @brief Adjust some InnoDB startup parameters based on file contents
20373 or innodb_page_size. */
20374 static
20375 void
innodb_params_adjust()20376 innodb_params_adjust()
20377 {
20378 	/* The default value and the max value of
20379 	innodb_undo_logs must be equal to the available undo logs. */
20380 	MYSQL_SYSVAR_NAME(undo_logs).max_val
20381 		= MYSQL_SYSVAR_NAME(undo_logs).def_val
20382 		= srv_available_undo_logs;
20383 	MYSQL_SYSVAR_NAME(max_undo_log_size).max_val
20384 		= 1ULL << (32U + srv_page_size_shift);
20385 	MYSQL_SYSVAR_NAME(max_undo_log_size).min_val
20386 		= MYSQL_SYSVAR_NAME(max_undo_log_size).def_val
20387 		= ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
20388 		<< srv_page_size_shift;
20389 	MYSQL_SYSVAR_NAME(max_undo_log_size).max_val
20390 		= 1ULL << (32U + srv_page_size_shift);
20391 }
20392 
20393 /****************************************************************************
20394  * DS-MRR implementation
20395  ***************************************************************************/
20396 
20397 /**
20398 Multi Range Read interface, DS-MRR calls */
20399 int
multi_range_read_init(RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)20400 ha_innobase::multi_range_read_init(
20401 	RANGE_SEQ_IF*	seq,
20402 	void*		seq_init_param,
20403 	uint		n_ranges,
20404 	uint		mode,
20405 	HANDLER_BUFFER*	buf)
20406 {
20407 	return(m_ds_mrr.dsmrr_init(this, seq, seq_init_param,
20408 				 n_ranges, mode, buf));
20409 }
20410 
20411 int
multi_range_read_next(range_id_t * range_info)20412 ha_innobase::multi_range_read_next(
20413 	range_id_t*		range_info)
20414 {
20415 	return(m_ds_mrr.dsmrr_next(range_info));
20416 }
20417 
20418 ha_rows
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)20419 ha_innobase::multi_range_read_info_const(
20420 	uint		keyno,
20421 	RANGE_SEQ_IF*	seq,
20422 	void*		seq_init_param,
20423 	uint		n_ranges,
20424 	uint*		bufsz,
20425 	uint*		flags,
20426 	Cost_estimate*	cost)
20427 {
20428 	/* See comments in ha_myisam::multi_range_read_info_const */
20429 	m_ds_mrr.init(this, table);
20430 
20431 	if (m_prebuilt->select_lock_type != LOCK_NONE) {
20432 		*flags |= HA_MRR_USE_DEFAULT_IMPL;
20433 	}
20434 
20435 	ha_rows res= m_ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
20436 			bufsz, flags, cost);
20437 	return res;
20438 }
20439 
20440 ha_rows
multi_range_read_info(uint keyno,uint n_ranges,uint keys,uint key_parts,uint * bufsz,uint * flags,Cost_estimate * cost)20441 ha_innobase::multi_range_read_info(
20442 	uint		keyno,
20443 	uint		n_ranges,
20444 	uint		keys,
20445 	uint		key_parts,
20446 	uint*		bufsz,
20447 	uint*		flags,
20448 	Cost_estimate*	cost)
20449 {
20450 	m_ds_mrr.init(this, table);
20451 	ha_rows res= m_ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
20452 					flags, cost);
20453 	return res;
20454 }
20455 
20456 int
multi_range_read_explain_info(uint mrr_mode,char * str,size_t size)20457 ha_innobase::multi_range_read_explain_info(
20458 	uint mrr_mode,
20459 	char *str,
20460 	size_t size)
20461 {
20462 	return m_ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
20463 }
20464 
20465 /** Parse the table file name into table name and database name.
20466 @param[in]	tbl_name	InnoDB table name
20467 @param[out]	dbname		database name buffer (NAME_LEN + 1 bytes)
20468 @param[out]	tblname		table name buffer (NAME_LEN + 1 bytes)
20469 @param[out]	dbnamelen	database name length
20470 @param[out]	tblnamelen	table name length
20471 @return true if the table name is parsed properly. */
table_name_parse(const table_name_t & tbl_name,char * dbname,char * tblname,ulint & dbnamelen,ulint & tblnamelen)20472 static bool table_name_parse(
20473 	const table_name_t&	tbl_name,
20474 	char*			dbname,
20475 	char*			tblname,
20476 	ulint&			dbnamelen,
20477 	ulint&			tblnamelen)
20478 {
20479 	dbnamelen = dict_get_db_name_len(tbl_name.m_name);
20480 	char db_buf[MAX_DATABASE_NAME_LEN  + 1];
20481 	char tbl_buf[MAX_TABLE_NAME_LEN + 1];
20482 
20483 	ut_ad(dbnamelen > 0);
20484 	ut_ad(dbnamelen <= MAX_DATABASE_NAME_LEN);
20485 
20486 	memcpy(db_buf, tbl_name.m_name, dbnamelen);
20487 	db_buf[dbnamelen] = 0;
20488 
20489 	tblnamelen = strlen(tbl_name.m_name + dbnamelen + 1);
20490 	memcpy(tbl_buf, tbl_name.m_name + dbnamelen + 1, tblnamelen);
20491 	tbl_buf[tblnamelen] = 0;
20492 
20493 	dbnamelen = filename_to_tablename(db_buf, dbname, MAX_DATABASE_NAME_LEN + 1, true);
20494 
20495 	if (tblnamelen > TEMP_FILE_PREFIX_LENGTH
20496 	    && !strncmp(tbl_buf, TEMP_FILE_PREFIX, TEMP_FILE_PREFIX_LENGTH)) {
20497 		return false;
20498 	}
20499 
20500 	if (char *is_part = strchr(tbl_buf, '#')) {
20501 		*is_part = '\0';
20502 		tblnamelen = is_part - tbl_buf;
20503 	}
20504 
20505 	tblnamelen = filename_to_tablename(tbl_buf, tblname, MAX_TABLE_NAME_LEN + 1, true);
20506 	return true;
20507 }
20508 
20509 
20510 /** Acquire metadata lock and MariaDB table handle for an InnoDB table.
20511 @param[in,out]	thd	thread handle
20512 @param[in,out]	table	InnoDB table
20513 @return MariaDB table handle
20514 @retval NULL if the table does not exist, is unaccessible or corrupted. */
innodb_acquire_mdl(THD * thd,dict_table_t * table)20515 static TABLE* innodb_acquire_mdl(THD* thd, dict_table_t* table)
20516 {
20517 	char	db_buf[NAME_LEN + 1], db_buf1[NAME_LEN + 1];
20518 	char	tbl_buf[NAME_LEN + 1], tbl_buf1[NAME_LEN + 1];
20519 	ulint	db_buf_len, db_buf1_len;
20520 	ulint	tbl_buf_len, tbl_buf1_len;
20521 
20522 	if (!table_name_parse(table->name, db_buf, tbl_buf,
20523 			      db_buf_len, tbl_buf_len)) {
20524 		table->release();
20525 		return NULL;
20526 	}
20527 
20528 	DEBUG_SYNC(thd, "ib_purge_virtual_latch_released");
20529 
20530 	const table_id_t table_id = table->id;
20531 retry_mdl:
20532 	const bool unaccessible = !table->is_readable() || table->corrupted;
20533 	table->release();
20534 
20535 	if (unaccessible) {
20536 		return NULL;
20537 	}
20538 
20539 	TABLE*	mariadb_table = open_purge_table(thd, db_buf, db_buf_len,
20540 						 tbl_buf, tbl_buf_len);
20541 	if (!mariadb_table)
20542 		thd_clear_error(thd);
20543 
20544 	DEBUG_SYNC(thd, "ib_purge_virtual_got_no_such_table");
20545 
20546 	table = dict_table_open_on_id(table_id, false, DICT_TABLE_OP_NORMAL);
20547 
20548 	if (table == NULL) {
20549 		/* Table is dropped. */
20550 		goto fail;
20551 	}
20552 
20553 	if (!fil_table_accessible(table)) {
20554 release_fail:
20555 		table->release();
20556 fail:
20557 		if (mariadb_table) {
20558 			close_thread_tables(thd);
20559 		}
20560 
20561 		return NULL;
20562 	}
20563 
20564 	if (!table_name_parse(table->name, db_buf1, tbl_buf1,
20565 			      db_buf1_len, tbl_buf1_len)) {
20566 		goto release_fail;
20567 	}
20568 
20569 	if (!mariadb_table) {
20570 	} else if (!strcmp(db_buf, db_buf1) && !strcmp(tbl_buf, tbl_buf1)) {
20571 		return mariadb_table;
20572 	} else {
20573 		/* Table is renamed. So release MDL for old name and try
20574 		to acquire the MDL for new table name. */
20575 		close_thread_tables(thd);
20576 	}
20577 
20578 	strcpy(tbl_buf, tbl_buf1);
20579 	strcpy(db_buf, db_buf1);
20580 	tbl_buf_len = tbl_buf1_len;
20581 	db_buf_len = db_buf1_len;
20582 	goto retry_mdl;
20583 }
20584 
20585 /** Find or open a table handle for the virtual column template
20586 @param[in]	thd	thread handle
20587 @param[in,out]	table	InnoDB table whose virtual column template
20588 			is to be updated
20589 @return table handle
20590 @retval NULL if the table is dropped, unaccessible or corrupted
20591 for purge thread */
innodb_find_table_for_vc(THD * thd,dict_table_t * table)20592 static TABLE* innodb_find_table_for_vc(THD* thd, dict_table_t* table)
20593 {
20594 	DBUG_EXECUTE_IF(
20595 		"ib_purge_virtual_mdev_16222_1",
20596 		DBUG_ASSERT(!debug_sync_set_action(
20597 			    thd,
20598 			    STRING_WITH_LEN("ib_purge_virtual_latch_released "
20599 					    "SIGNAL latch_released "
20600 					    "WAIT_FOR drop_started"))););
20601 	DBUG_EXECUTE_IF(
20602 		"ib_purge_virtual_mdev_16222_2",
20603 		DBUG_ASSERT(!debug_sync_set_action(
20604 			    thd,
20605 			    STRING_WITH_LEN("ib_purge_virtual_got_no_such_table "
20606 					    "SIGNAL got_no_such_table"))););
20607 
20608 	if (THDVAR(thd, background_thread)) {
20609 		/* Purge thread acquires dict_sys.latch while
20610 		processing undo log record. Release it
20611 		before acquiring MDL on the table. */
20612 		rw_lock_s_unlock(&dict_sys.latch);
20613 		return innodb_acquire_mdl(thd, table);
20614 	} else {
20615 		if (table->vc_templ->mysql_table_query_id
20616 		    == thd_get_query_id(thd)) {
20617 			return table->vc_templ->mysql_table;
20618 		}
20619 	}
20620 
20621 	char	db_buf[NAME_LEN + 1];
20622 	char	tbl_buf[NAME_LEN + 1];
20623 	ulint	db_buf_len, tbl_buf_len;
20624 
20625 	if (!table_name_parse(table->name, db_buf, tbl_buf,
20626 			      db_buf_len, tbl_buf_len)) {
20627 		ut_ad(!"invalid table name");
20628 		return NULL;
20629 	}
20630 
20631 	TABLE* mysql_table = find_fk_open_table(thd, db_buf, db_buf_len,
20632 						tbl_buf, tbl_buf_len);
20633 
20634 	table->vc_templ->mysql_table = mysql_table;
20635 	table->vc_templ->mysql_table_query_id = thd_get_query_id(thd);
20636 	return mysql_table;
20637 }
20638 
20639 /** Get the computed value by supplying the base column values.
20640 @param[in,out]	table		table whose virtual column
20641 				template to be built */
innobase_init_vc_templ(dict_table_t * table)20642 TABLE* innobase_init_vc_templ(dict_table_t* table)
20643 {
20644 	if (table->vc_templ != NULL) {
20645 		return NULL;
20646 	}
20647 	DBUG_ENTER("innobase_init_vc_templ");
20648 
20649 	table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t());
20650 
20651 	TABLE	*mysql_table= innodb_find_table_for_vc(current_thd, table);
20652 
20653 	ut_ad(mysql_table);
20654 	if (!mysql_table) {
20655 		DBUG_RETURN(NULL);
20656 	}
20657 
20658 	mutex_enter(&dict_sys.mutex);
20659 	innobase_build_v_templ(mysql_table, table, table->vc_templ, NULL, true);
20660 	mutex_exit(&dict_sys.mutex);
20661 	DBUG_RETURN(mysql_table);
20662 }
20663 
20664 /** Change dbname and table name in table->vc_templ.
20665 @param[in,out]	table	the table whose virtual column template
20666 dbname and tbname to be renamed. */
20667 void
innobase_rename_vc_templ(dict_table_t * table)20668 innobase_rename_vc_templ(
20669 	dict_table_t*	table)
20670 {
20671 	char	dbname[MAX_DATABASE_NAME_LEN + 1];
20672 	char	tbname[MAX_DATABASE_NAME_LEN + 1];
20673 	char*	name = table->name.m_name;
20674 	ulint	dbnamelen = dict_get_db_name_len(name);
20675 	ulint	tbnamelen = strlen(name) - dbnamelen - 1;
20676 	char	t_dbname[MAX_DATABASE_NAME_LEN + 1];
20677 	char	t_tbname[MAX_TABLE_NAME_LEN + 1];
20678 
20679 	strncpy(dbname, name, dbnamelen);
20680 	dbname[dbnamelen] = 0;
20681 	strncpy(tbname, name + dbnamelen + 1, tbnamelen);
20682 	tbname[tbnamelen] =0;
20683 
20684 	/* For partition table, remove the partition name and use the
20685 	"main" table name to build the template */
20686 	char*	is_part = is_partition(tbname);
20687 
20688 	if (is_part != NULL) {
20689 		*is_part = '\0';
20690 		tbnamelen = ulint(is_part - tbname);
20691 	}
20692 
20693 	dbnamelen = filename_to_tablename(dbname, t_dbname,
20694 					  MAX_DATABASE_NAME_LEN + 1);
20695 	tbnamelen = filename_to_tablename(tbname, t_tbname,
20696 					  MAX_TABLE_NAME_LEN + 1);
20697 
20698 	table->vc_templ->db_name = t_dbname;
20699 	table->vc_templ->tb_name = t_tbname;
20700 }
20701 
20702 
20703 /**
20704    Allocate a heap and record for calculating virtual fields
20705    Used mainly for virtual fields in indexes
20706 
20707 @param[in]      thd             MariaDB THD
20708 @param[in]      index           Index in use
20709 @param[out]     heap            Heap that holds temporary row
20710 @param[in,out]  table           MariaDB table
20711 @param[out]     record	        Pointer to allocated MariaDB record
20712 @param[out]     storage	        Internal storage for blobs etc
20713 
20714 @retval		true on success
20715 @retval		false on malloc failure or failed to open the maria table
20716 		for purge thread.
20717 */
20718 
innobase_allocate_row_for_vcol(THD * thd,dict_index_t * index,mem_heap_t ** heap,TABLE ** table,VCOL_STORAGE * storage)20719 bool innobase_allocate_row_for_vcol(THD *thd, dict_index_t *index,
20720                                     mem_heap_t **heap, TABLE **table,
20721                                     VCOL_STORAGE *storage)
20722 {
20723   TABLE *maria_table;
20724   String *blob_value_storage;
20725   if (!*table)
20726     *table = innodb_find_table_for_vc(thd, index->table);
20727 
20728   /* For purge thread, there is a possiblity that table could have
20729      dropped, corrupted or unaccessible. */
20730   if (!*table)
20731     return false;
20732   maria_table = *table;
20733   if (!*heap && !(*heap = mem_heap_create(srv_page_size)))
20734     return false;
20735 
20736   uchar *record = static_cast<byte *>(mem_heap_alloc(*heap,
20737                                                     maria_table->s->reclength));
20738 
20739   size_t len = maria_table->s->virtual_not_stored_blob_fields * sizeof(String);
20740   blob_value_storage = static_cast<String *>(mem_heap_alloc(*heap, len));
20741 
20742   if (!record || !blob_value_storage)
20743     return false;
20744 
20745   storage->maria_table = maria_table;
20746   storage->innobase_record = record;
20747   storage->maria_record = maria_table->field[0]->record_ptr();
20748   storage->blob_value_storage = blob_value_storage;
20749 
20750   maria_table->move_fields(maria_table->field, record, storage->maria_record);
20751   maria_table->remember_blob_values(blob_value_storage);
20752 
20753   return true;
20754 }
20755 
20756 
20757 /** Free memory allocated by innobase_allocate_row_for_vcol() */
20758 
innobase_free_row_for_vcol(VCOL_STORAGE * storage)20759 void innobase_free_row_for_vcol(VCOL_STORAGE *storage)
20760 {
20761 	TABLE *maria_table= storage->maria_table;
20762 	maria_table->move_fields(maria_table->field, storage->maria_record,
20763                                  storage->innobase_record);
20764         maria_table->restore_blob_values(storage->blob_value_storage);
20765 }
20766 
20767 
innobase_report_computed_value_failed(dtuple_t * row)20768 void innobase_report_computed_value_failed(dtuple_t *row)
20769 {
20770   ib::error() << "Compute virtual column values failed for "
20771               << rec_printer(row).str();
20772 }
20773 
20774 
20775 /** Get the computed value by supplying the base column values.
20776 @param[in,out]	row		the data row
20777 @param[in]	col		virtual column
20778 @param[in]	index		index
20779 @param[in,out]	local_heap	heap memory for processing large data etc.
20780 @param[in,out]	heap		memory heap that copies the actual index row
20781 @param[in]	ifield		index field
20782 @param[in]	thd		MySQL thread handle
20783 @param[in,out]	mysql_table	mysql table object
20784 @param[in,out]	mysql_rec	MariaDB record buffer
20785 @param[in]	old_table	during ALTER TABLE, this is the old table
20786 				or NULL.
20787 @param[in]	update		update vector for the row, if any
20788 @param[in]	foreign		foreign key information
20789 @return the field filled with computed value, or NULL if just want
20790 to store the value in passed in "my_rec" */
20791 dfield_t*
innobase_get_computed_value(dtuple_t * row,const dict_v_col_t * col,const dict_index_t * index,mem_heap_t ** local_heap,mem_heap_t * heap,const dict_field_t * ifield,THD * thd,TABLE * mysql_table,byte * mysql_rec,const dict_table_t * old_table,const upd_t * update)20792 innobase_get_computed_value(
20793 	dtuple_t*		row,
20794 	const dict_v_col_t*	col,
20795 	const dict_index_t*	index,
20796 	mem_heap_t**		local_heap,
20797 	mem_heap_t*		heap,
20798 	const dict_field_t*	ifield,
20799 	THD*			thd,
20800 	TABLE*			mysql_table,
20801 	byte*			mysql_rec,
20802 	const dict_table_t*	old_table,
20803 	const upd_t*		update)
20804 {
20805 	byte		rec_buf2[REC_VERSION_56_MAX_INDEX_COL_LEN];
20806 	byte*		buf;
20807 	dfield_t*	field;
20808 	ulint		len;
20809 
20810 	const ulint zip_size = old_table
20811 		? old_table->space->zip_size()
20812 		: dict_tf_get_zip_size(index->table->flags);
20813 
20814 	ulint		ret = 0;
20815 
20816 	dict_index_t *clust_index= dict_table_get_first_index(index->table);
20817 
20818 	ut_ad(index->table->vc_templ);
20819 	ut_ad(thd != NULL);
20820 	ut_ad(mysql_table);
20821 
20822 	DBUG_ENTER("innobase_get_computed_value");
20823 	const mysql_row_templ_t*
20824 			vctempl =  index->table->vc_templ->vtempl[
20825 				index->table->vc_templ->n_col + col->v_pos];
20826 
20827 	if (!heap || index->table->vc_templ->rec_len
20828 		     >= REC_VERSION_56_MAX_INDEX_COL_LEN) {
20829 		if (*local_heap == NULL) {
20830 			*local_heap = mem_heap_create(srv_page_size);
20831 		}
20832 
20833 		buf = static_cast<byte*>(mem_heap_alloc(
20834 				*local_heap, index->table->vc_templ->rec_len));
20835 	} else {
20836 		buf = rec_buf2;
20837 	}
20838 
20839 	for (ulint i = 0; i < unsigned{col->num_base}; i++) {
20840 		dict_col_t*			base_col = col->base_col[i];
20841 		const dfield_t*			row_field = NULL;
20842 		ulint				col_no = base_col->ind;
20843 		const mysql_row_templ_t*	templ
20844 			= index->table->vc_templ->vtempl[col_no];
20845 		const byte*			data;
20846 
20847 		if (update) {
20848 			ulint clust_no = dict_col_get_clust_pos(base_col,
20849 								clust_index);
20850 			if (const upd_field_t *uf = upd_get_field_by_field_no(
20851 				    update, clust_no, false)) {
20852 				row_field = &uf->new_val;
20853 			}
20854 		}
20855 
20856 		if (!row_field) {
20857 			row_field = dtuple_get_nth_field(row, col_no);
20858 		}
20859 
20860 		data = static_cast<const byte*>(row_field->data);
20861 		len = row_field->len;
20862 
20863 		if (row_field->ext) {
20864 			if (*local_heap == NULL) {
20865 				*local_heap = mem_heap_create(srv_page_size);
20866 			}
20867 
20868 			data = btr_copy_externally_stored_field(
20869 				&len, data, zip_size,
20870 				dfield_get_len(row_field), *local_heap);
20871 		}
20872 
20873 		if (len == UNIV_SQL_NULL) {
20874                         mysql_rec[templ->mysql_null_byte_offset]
20875                                 |= (byte) templ->mysql_null_bit_mask;
20876                         memcpy(mysql_rec + templ->mysql_col_offset,
20877                                static_cast<const byte*>(
20878 					index->table->vc_templ->default_rec
20879 					+ templ->mysql_col_offset),
20880                                templ->mysql_col_len);
20881                 } else {
20882 
20883 			row_sel_field_store_in_mysql_format(
20884 				mysql_rec + templ->mysql_col_offset,
20885 				templ, index, templ->clust_rec_field_no,
20886 				(const byte*)data, len);
20887 
20888 			if (templ->mysql_null_bit_mask) {
20889 				/* It is a nullable column with a
20890 				non-NULL value */
20891 				mysql_rec[templ->mysql_null_byte_offset]
20892 					&= ~(byte) templ->mysql_null_bit_mask;
20893 			}
20894 		}
20895 	}
20896 
20897 	field = dtuple_get_nth_v_field(row, col->v_pos);
20898 
20899 	MY_BITMAP *old_write_set = dbug_tmp_use_all_columns(mysql_table, &mysql_table->write_set);
20900 	MY_BITMAP *old_read_set = dbug_tmp_use_all_columns(mysql_table, &mysql_table->read_set);
20901 	ret = mysql_table->update_virtual_field(mysql_table->field[col->m_col.ind]);
20902 	dbug_tmp_restore_column_map(&mysql_table->read_set, old_read_set);
20903 	dbug_tmp_restore_column_map(&mysql_table->write_set, old_write_set);
20904 
20905 	if (ret != 0) {
20906 		DBUG_RETURN(NULL);
20907 	}
20908 
20909 	if (vctempl->mysql_null_bit_mask
20910 	    && (mysql_rec[vctempl->mysql_null_byte_offset]
20911 	        & vctempl->mysql_null_bit_mask)) {
20912 		dfield_set_null(field);
20913 		field->type.prtype |= DATA_VIRTUAL;
20914 		DBUG_RETURN(field);
20915 	}
20916 
20917 	row_mysql_store_col_in_innobase_format(
20918 		field, buf,
20919 		TRUE, mysql_rec + vctempl->mysql_col_offset,
20920 		vctempl->mysql_col_len, dict_table_is_comp(index->table));
20921 	field->type.prtype |= DATA_VIRTUAL;
20922 
20923 	ulint	max_prefix = col->m_col.max_prefix;
20924 
20925 	if (max_prefix && ifield
20926 	    && (ifield->prefix_len == 0
20927 	        || ifield->prefix_len > col->m_col.max_prefix)) {
20928 		max_prefix = ifield->prefix_len;
20929 	}
20930 
20931 	/* If this is a prefix index, we only need a portion of the field */
20932 	if (max_prefix) {
20933 		len = dtype_get_at_most_n_mbchars(
20934 			col->m_col.prtype,
20935 			col->m_col.mbminlen, col->m_col.mbmaxlen,
20936 			max_prefix,
20937 			field->len,
20938 			static_cast<char*>(dfield_get_data(field)));
20939 		dfield_set_len(field, len);
20940 	}
20941 
20942 	if (heap) {
20943 		dfield_dup(field, heap);
20944 	}
20945 
20946 	DBUG_RETURN(field);
20947 }
20948 
20949 
20950 /** Attempt to push down an index condition.
20951 @param[in] keyno MySQL key number
20952 @param[in] idx_cond Index condition to be checked
20953 @return Part of idx_cond which the handler will not evaluate */
20954 
20955 class Item*
idx_cond_push(uint keyno,class Item * idx_cond)20956 ha_innobase::idx_cond_push(
20957 	uint		keyno,
20958 	class Item*	idx_cond)
20959 {
20960 	DBUG_ENTER("ha_innobase::idx_cond_push");
20961 	DBUG_ASSERT(keyno != MAX_KEY);
20962 	DBUG_ASSERT(idx_cond != NULL);
20963 
20964 	/* We can only evaluate the condition if all columns are stored.*/
20965 	dict_index_t* idx  = innobase_get_index(keyno);
20966 	if (idx && dict_index_has_virtual(idx)) {
20967 		DBUG_RETURN(idx_cond);
20968 	}
20969 
20970 	pushed_idx_cond = idx_cond;
20971 	pushed_idx_cond_keyno = keyno;
20972 	in_range_check_pushed_down = TRUE;
20973 	/* We will evaluate the condition entirely */
20974 	DBUG_RETURN(NULL);
20975 }
20976 
20977 
20978 /** Push a primary key filter.
20979 @param[in]	pk_filter	filter against which primary keys
20980 				are to be checked
20981 @retval	false if pushed (always) */
rowid_filter_push(Rowid_filter * pk_filter)20982 bool ha_innobase::rowid_filter_push(Rowid_filter* pk_filter)
20983 {
20984 	DBUG_ENTER("ha_innobase::rowid_filter_push");
20985 	DBUG_ASSERT(pk_filter != NULL);
20986 	pushed_rowid_filter= pk_filter;
20987 	DBUG_RETURN(false);
20988 }
20989 
is_part_of_a_key_prefix(const Field_longstr * field)20990 static bool is_part_of_a_key_prefix(const Field_longstr *field)
20991 {
20992   const TABLE_SHARE *s= field->table->s;
20993 
20994   for (uint i= 0; i < s->keys; i++)
20995   {
20996     const KEY &key= s->key_info[i];
20997     for (uint j= 0; j < key.user_defined_key_parts; j++)
20998     {
20999       const KEY_PART_INFO &info= key.key_part[j];
21000       // When field is a part of some key, a key part and field will have the
21001       // same length. And their length will be different when only some prefix
21002       // of a field is used as a key part. That's what we're looking for here.
21003       if (info.field->field_index == field->field_index &&
21004           info.length != field->field_length)
21005       {
21006         DBUG_ASSERT(info.length < field->field_length);
21007         return true;
21008       }
21009     }
21010   }
21011 
21012   return false;
21013 }
21014 
21015 static bool
is_part_of_a_primary_key(const Field * field)21016 is_part_of_a_primary_key(const Field* field)
21017 {
21018 	const TABLE_SHARE* s = field->table->s;
21019 
21020 	return s->primary_key != MAX_KEY
21021 	       && field->part_of_key.is_set(s->primary_key);
21022 }
21023 
can_convert_string(const Field_string * field,const Column_definition & new_type) const21024 bool ha_innobase::can_convert_string(const Field_string *field,
21025                                      const Column_definition &new_type) const
21026 {
21027   DBUG_ASSERT(!field->compression_method());
21028   if (new_type.type_handler() != field->type_handler())
21029     return false;
21030 
21031   if (new_type.char_length != field->char_length())
21032     return false;
21033 
21034   const Charset field_cs(field->charset());
21035 
21036   if (new_type.length != field->max_display_length() &&
21037       (!m_prebuilt->table->not_redundant() ||
21038        field_cs.mbminlen() == field_cs.mbmaxlen()))
21039     return false;
21040 
21041   if (new_type.charset != field->charset())
21042   {
21043     if (!field_cs.encoding_allows_reinterpret_as(new_type.charset))
21044       return false;
21045 
21046     if (!field_cs.eq_collation_specific_names(new_type.charset))
21047       return !is_part_of_a_primary_key(field);
21048 
21049     // Fully indexed case works instantly like
21050     // Compare_keys::EqualButKeyPartLength. But prefix case isn't implemented.
21051     if (is_part_of_a_key_prefix(field))
21052 	    return false;
21053 
21054     return true;
21055   }
21056 
21057   return true;
21058 }
21059 
21060 static bool
supports_enlarging(const dict_table_t * table,const Field_varstring * field,const Column_definition & new_type)21061 supports_enlarging(const dict_table_t* table, const Field_varstring* field,
21062 		   const Column_definition& new_type)
21063 {
21064 	return field->field_length <= 127 || new_type.length <= 255
21065 	       || field->field_length > 255 || !table->not_redundant();
21066 }
21067 
can_convert_varstring(const Field_varstring * field,const Column_definition & new_type) const21068 bool ha_innobase::can_convert_varstring(
21069     const Field_varstring *field, const Column_definition &new_type) const
21070 {
21071   if (new_type.length < field->field_length)
21072     return false;
21073 
21074   if (new_type.char_length < field->char_length())
21075     return false;
21076 
21077   if (!new_type.compression_method() != !field->compression_method())
21078     return false;
21079 
21080   if (new_type.type_handler() != field->type_handler())
21081     return false;
21082 
21083   if (new_type.charset != field->charset())
21084   {
21085     if (!supports_enlarging(m_prebuilt->table, field, new_type))
21086       return false;
21087 
21088     Charset field_cs(field->charset());
21089     if (!field_cs.encoding_allows_reinterpret_as(new_type.charset))
21090       return false;
21091 
21092     if (!field_cs.eq_collation_specific_names(new_type.charset))
21093       return !is_part_of_a_primary_key(field);
21094 
21095     // Fully indexed case works instantly like
21096     // Compare_keys::EqualButKeyPartLength. But prefix case isn't implemented.
21097     if (is_part_of_a_key_prefix(field))
21098       return false;
21099 
21100     return true;
21101   }
21102 
21103   if (new_type.length != field->field_length)
21104   {
21105     if (!supports_enlarging(m_prebuilt->table, field, new_type))
21106       return false;
21107 
21108     return true;
21109   }
21110 
21111   return true;
21112 }
21113 
is_part_of_a_key(const Field_blob * field)21114 static bool is_part_of_a_key(const Field_blob *field)
21115 {
21116   const TABLE_SHARE *s= field->table->s;
21117 
21118   for (uint i= 0; i < s->keys; i++)
21119   {
21120     const KEY &key= s->key_info[i];
21121     for (uint j= 0; j < key.user_defined_key_parts; j++)
21122     {
21123       const KEY_PART_INFO &info= key.key_part[j];
21124       if (info.field->field_index == field->field_index)
21125         return true;
21126     }
21127   }
21128 
21129   return false;
21130 }
21131 
can_convert_blob(const Field_blob * field,const Column_definition & new_type) const21132 bool ha_innobase::can_convert_blob(const Field_blob *field,
21133                                    const Column_definition &new_type) const
21134 {
21135   if (new_type.type_handler() != field->type_handler())
21136     return false;
21137 
21138   if (!new_type.compression_method() != !field->compression_method())
21139     return false;
21140 
21141   if (new_type.pack_length != field->pack_length())
21142     return false;
21143 
21144   if (new_type.charset != field->charset())
21145   {
21146     Charset field_cs(field->charset());
21147     if (!field_cs.encoding_allows_reinterpret_as(new_type.charset))
21148       return false;
21149 
21150     if (!field_cs.eq_collation_specific_names(new_type.charset))
21151       return !is_part_of_a_key(field);
21152 
21153     // Fully indexed case works instantly like
21154     // Compare_keys::EqualButKeyPartLength. But prefix case isn't implemented.
21155     if (is_part_of_a_key_prefix(field))
21156       return false;
21157 
21158     return true;
21159   }
21160 
21161   return true;
21162 }
21163 
compare_key_parts(const Field & old_field,const Column_definition & new_field,const KEY_PART_INFO & old_part,const KEY_PART_INFO & new_part) const21164 Compare_keys ha_innobase::compare_key_parts(
21165     const Field &old_field, const Column_definition &new_field,
21166     const KEY_PART_INFO &old_part, const KEY_PART_INFO &new_part) const
21167 {
21168   const bool is_equal= old_field.is_equal(new_field);
21169   const CHARSET_INFO *old_cs= old_field.charset();
21170   const CHARSET_INFO *new_cs= new_field.charset;
21171 
21172   if (!is_equal)
21173   {
21174     if (!old_field.can_be_converted_by_engine(new_field))
21175       return Compare_keys::NotEqual;
21176 
21177     if (!Charset(old_cs).eq_collation_specific_names(new_cs))
21178       return Compare_keys::NotEqual;
21179   }
21180 
21181   if (old_part.length / old_cs->mbmaxlen != new_part.length / new_cs->mbmaxlen)
21182   {
21183     if (old_part.length != old_field.field_length)
21184       return Compare_keys::NotEqual;
21185 
21186     if (old_part.length >= new_part.length)
21187       return Compare_keys::NotEqual;
21188 
21189     return Compare_keys::EqualButKeyPartLength;
21190   }
21191 
21192   return Compare_keys::Equal;
21193 }
21194 
21195 /******************************************************************//**
21196 Use this when the args are passed to the format string from
21197 errmsg-utf8.txt directly as is.
21198 
21199 Push a warning message to the client, it is a wrapper around:
21200 
21201 void push_warning_printf(
21202 	THD *thd, Sql_condition::enum_condition_level level,
21203 	uint code, const char *format, ...);
21204 */
21205 void
ib_senderrf(THD * thd,ib_log_level_t level,ib_uint32_t code,...)21206 ib_senderrf(
21207 /*========*/
21208 	THD*		thd,		/*!< in/out: session */
21209 	ib_log_level_t	level,		/*!< in: warning level */
21210 	ib_uint32_t	code,		/*!< MySQL error code */
21211 	...)				/*!< Args */
21212 {
21213 	va_list		args;
21214 	const char*	format = my_get_err_msg(code);
21215 
21216 	/* If the caller wants to push a message to the client then
21217 	the caller must pass a valid session handle. */
21218 
21219 	ut_a(thd != 0);
21220 
21221 	/* The error code must exist in the errmsg-utf8.txt file. */
21222 	ut_a(format != 0);
21223 
21224 	va_start(args, code);
21225 
21226 	myf l;
21227 
21228 	switch (level) {
21229 	case IB_LOG_LEVEL_INFO:
21230 		l = ME_NOTE;
21231 		break;
21232 	case IB_LOG_LEVEL_WARN:
21233 		l = ME_WARNING;
21234 		break;
21235 	default:
21236 		l = 0;
21237 		break;
21238 	}
21239 
21240 	my_printv_error(code, format, MYF(l), args);
21241 
21242 	va_end(args);
21243 
21244 	if (level == IB_LOG_LEVEL_FATAL) {
21245 		ut_error;
21246 	}
21247 }
21248 
21249 /******************************************************************//**
21250 Use this when the args are first converted to a formatted string and then
21251 passed to the format string from errmsg-utf8.txt. The error message format
21252 must be: "Some string ... %s".
21253 
21254 Push a warning message to the client, it is a wrapper around:
21255 
21256 void push_warning_printf(
21257 	THD *thd, Sql_condition::enum_condition_level level,
21258 	uint code, const char *format, ...);
21259 */
21260 void
ib_errf(THD * thd,ib_log_level_t level,ib_uint32_t code,const char * format,...)21261 ib_errf(
21262 /*====*/
21263 	THD*		thd,		/*!< in/out: session */
21264 	ib_log_level_t	level,		/*!< in: warning level */
21265 	ib_uint32_t	code,		/*!< MySQL error code */
21266 	const char*	format,		/*!< printf format */
21267 	...)				/*!< Args */
21268 {
21269 	char*		str = NULL;
21270 	va_list         args;
21271 
21272 	/* If the caller wants to push a message to the client then
21273 	the caller must pass a valid session handle. */
21274 
21275 	ut_a(thd != 0);
21276 	ut_a(format != 0);
21277 
21278 	va_start(args, format);
21279 
21280 #ifdef _WIN32
21281 	int		size = _vscprintf(format, args) + 1;
21282 	if (size > 0) {
21283 		str = static_cast<char*>(malloc(size));
21284 	}
21285 	if (str == NULL) {
21286 		va_end(args);
21287 		return;	/* Watch for Out-Of-Memory */
21288 	}
21289 	str[size - 1] = 0x0;
21290 	vsnprintf(str, size, format, args);
21291 #elif HAVE_VASPRINTF
21292 	if (vasprintf(&str, format, args) == -1) {
21293 		/* In case of failure use a fixed length string */
21294 		str = static_cast<char*>(malloc(BUFSIZ));
21295 		vsnprintf(str, BUFSIZ, format, args);
21296 	}
21297 #else
21298 	/* Use a fixed length string. */
21299 	str = static_cast<char*>(malloc(BUFSIZ));
21300 	if (str == NULL) {
21301 		va_end(args);
21302 		return;	/* Watch for Out-Of-Memory */
21303 	}
21304 	vsnprintf(str, BUFSIZ, format, args);
21305 #endif /* _WIN32 */
21306 
21307 	ib_senderrf(thd, level, code, str);
21308 
21309 	va_end(args);
21310 	free(str);
21311 }
21312 
21313 /* Keep the first 16 characters as-is, since the url is sometimes used
21314 as an offset from this.*/
21315 const char*	TROUBLESHOOTING_MSG =
21316 	"Please refer to https://mariadb.com/kb/en/innodb-troubleshooting/"
21317 	" for how to resolve the issue.";
21318 
21319 const char*	TROUBLESHOOT_DATADICT_MSG =
21320 	"Please refer to https://mariadb.com/kb/en/innodb-data-dictionary-troubleshooting/"
21321 	" for how to resolve the issue.";
21322 
21323 const char*	BUG_REPORT_MSG =
21324 	"Submit a detailed bug report to https://jira.mariadb.org/";
21325 
21326 const char*	FORCE_RECOVERY_MSG =
21327 	"Please refer to "
21328 	"https://mariadb.com/kb/en/library/innodb-recovery-modes/"
21329 	" for information about forcing recovery.";
21330 
21331 const char*	OPERATING_SYSTEM_ERROR_MSG =
21332 	"Some operating system error numbers are described at"
21333 	" https://mariadb.com/kb/en/library/operating-system-error-codes/";
21334 
21335 const char*	FOREIGN_KEY_CONSTRAINTS_MSG =
21336 	"Please refer to https://mariadb.com/kb/en/library/foreign-keys/"
21337 	" for correct foreign key definition.";
21338 
21339 const char*	SET_TRANSACTION_MSG =
21340 	"Please refer to https://mariadb.com/kb/en/library/set-transaction/";
21341 
21342 const char*	INNODB_PARAMETERS_MSG =
21343 	"Please refer to https://mariadb.com/kb/en/library/innodb-system-variables/";
21344 
21345 /**********************************************************************
21346 Converts an identifier from my_charset_filename to UTF-8 charset.
21347 @return result string length, as returned by strconvert() */
21348 uint
innobase_convert_to_filename_charset(char * to,const char * from,ulint len)21349 innobase_convert_to_filename_charset(
21350 /*=================================*/
21351 	char*		to,	/* out: converted identifier */
21352 	const char*	from,	/* in: identifier to convert */
21353 	ulint		len)	/* in: length of 'to', in bytes */
21354 {
21355 	uint		errors;
21356 	CHARSET_INFO*	cs_to = &my_charset_filename;
21357 	CHARSET_INFO*	cs_from = system_charset_info;
21358 
21359 	return(static_cast<uint>(strconvert(
21360 				cs_from, from, uint(strlen(from)),
21361 				cs_to, to, static_cast<uint>(len), &errors)));
21362 }
21363 
21364 /**********************************************************************
21365 Converts an identifier from my_charset_filename to UTF-8 charset.
21366 @return result string length, as returned by strconvert() */
21367 uint
innobase_convert_to_system_charset(char * to,const char * from,ulint len,uint * errors)21368 innobase_convert_to_system_charset(
21369 /*===============================*/
21370 	char*		to,	/* out: converted identifier */
21371 	const char*	from,	/* in: identifier to convert */
21372 	ulint		len,	/* in: length of 'to', in bytes */
21373 	uint*		errors)	/* out: error return */
21374 {
21375 	CHARSET_INFO*	cs1 = &my_charset_filename;
21376 	CHARSET_INFO*	cs2 = system_charset_info;
21377 
21378 	return(static_cast<uint>(strconvert(
21379 				cs1, from, static_cast<uint>(strlen(from)),
21380 				cs2, to, static_cast<uint>(len), errors)));
21381 }
21382 
21383 /** Validate the requested buffer pool size.  Also, reserve the necessary
21384 memory needed for buffer pool resize.
21385 @param[in]	thd	thread handle
21386 @param[out]	save	immediate result for update function
21387 @param[in]	value	incoming string
21388 @return 0 on success, 1 on failure.
21389 */
21390 static
21391 int
innodb_buffer_pool_size_validate(THD * thd,st_mysql_sys_var *,void * save,struct st_mysql_value * value)21392 innodb_buffer_pool_size_validate(
21393 	THD*				thd,
21394 	st_mysql_sys_var*,
21395 	void*				save,
21396 	struct st_mysql_value*		value)
21397 {
21398 	longlong	intbuf;
21399 
21400 	value->val_int(value, &intbuf);
21401 
21402 	if (static_cast<ulonglong>(intbuf) < MYSQL_SYSVAR_NAME(buffer_pool_size).min_val) {
21403 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21404 				    ER_WRONG_ARGUMENTS,
21405 				    "innodb_buffer_pool_size must be at least"
21406 				    " %lld for innodb_page_size=%lu",
21407 				    MYSQL_SYSVAR_NAME(buffer_pool_size).min_val,
21408 				    srv_page_size);
21409 		return(1);
21410 	}
21411 
21412 	if (!srv_was_started) {
21413 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21414 				    ER_WRONG_ARGUMENTS,
21415 				    "Cannot update innodb_buffer_pool_size,"
21416 				    " because InnoDB is not started.");
21417 		return(1);
21418 	}
21419 
21420 #ifdef UNIV_DEBUG
21421 	if (buf_disable_resize_buffer_pool_debug == TRUE) {
21422 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21423 			ER_WRONG_ARGUMENTS,
21424 			"Cannot update innodb_buffer_pool_size,"
21425 			" because innodb_disable_resize_buffer_pool_debug"
21426 			" is set.");
21427 		ib::warn() << "Cannot update innodb_buffer_pool_size,"
21428 			" because innodb_disable_resize_buffer_pool_debug"
21429 			" is set.";
21430 		return(1);
21431 	}
21432 #endif /* UNIV_DEBUG */
21433 
21434 
21435 	buf_pool_mutex_enter_all();
21436 
21437 	if (srv_buf_pool_old_size != srv_buf_pool_size) {
21438 		buf_pool_mutex_exit_all();
21439 		my_printf_error(ER_WRONG_ARGUMENTS,
21440 			"Another buffer pool resize is already in progress.", MYF(0));
21441 		return(1);
21442 	}
21443 
21444 	if (srv_buf_pool_instances > 1 && intbuf < BUF_POOL_SIZE_THRESHOLD) {
21445 		buf_pool_mutex_exit_all();
21446 
21447 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21448 				    ER_WRONG_ARGUMENTS,
21449 				    "Cannot update innodb_buffer_pool_size"
21450 				    " to less than 1GB if"
21451 				    " innodb_buffer_pool_instances > 1.");
21452 		return(1);
21453 	}
21454 
21455 	ulint	requested_buf_pool_size = buf_pool_size_align(ulint(intbuf));
21456 
21457 	*static_cast<ulonglong*>(save) = requested_buf_pool_size;
21458 
21459 	if (srv_buf_pool_size == ulint(intbuf)) {
21460 		buf_pool_mutex_exit_all();
21461 		/* nothing to do */
21462 		return(0);
21463 	}
21464 
21465 	if (srv_buf_pool_size == requested_buf_pool_size) {
21466 		buf_pool_mutex_exit_all();
21467 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21468 				    ER_WRONG_ARGUMENTS,
21469 				    "innodb_buffer_pool_size must be at least"
21470 				    " innodb_buffer_pool_chunk_size=%lu",
21471 				    srv_buf_pool_chunk_unit);
21472 		/* nothing to do */
21473 		return(0);
21474 	}
21475 
21476 	srv_buf_pool_size = requested_buf_pool_size;
21477 	buf_pool_mutex_exit_all();
21478 
21479 	if (intbuf != static_cast<longlong>(requested_buf_pool_size)) {
21480 		char	buf[64];
21481 		int	len = 64;
21482 		value->val_str(value, buf, &len);
21483 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21484 				    ER_TRUNCATED_WRONG_VALUE,
21485 				    "Truncated incorrect %-.32s value: '%-.128s'",
21486 				    mysql_sysvar_buffer_pool_size.name,
21487 				    value->val_str(value, buf, &len));
21488 	}
21489 
21490 	return(0);
21491 }
21492 
21493 /*************************************************************//**
21494 Check for a valid value of innobase_compression_algorithm.
21495 @return	0 for valid innodb_compression_algorithm. */
21496 static
21497 int
innodb_compression_algorithm_validate(THD * thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)21498 innodb_compression_algorithm_validate(
21499 /*==================================*/
21500 	THD*				thd,	/*!< in: thread handle */
21501 	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
21502 						variable */
21503 	void*				save,	/*!< out: immediate result
21504 						for update function */
21505 	struct st_mysql_value*		value)	/*!< in: incoming string */
21506 {
21507 	ulong		compression_algorithm;
21508 	DBUG_ENTER("innobase_compression_algorithm_validate");
21509 
21510 	if (check_sysvar_enum(thd, var, save, value)) {
21511 		DBUG_RETURN(1);
21512 	}
21513 
21514 	compression_algorithm = *reinterpret_cast<ulong*>(save);
21515 	(void)compression_algorithm;
21516 
21517 #ifndef HAVE_LZ4
21518 	if (compression_algorithm == PAGE_LZ4_ALGORITHM) {
21519 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21520 				    HA_ERR_UNSUPPORTED,
21521 				    "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21522 				    "InnoDB: liblz4 is not installed. \n",
21523 				    compression_algorithm);
21524 		DBUG_RETURN(1);
21525 	}
21526 #endif
21527 
21528 #ifndef HAVE_LZO
21529 	if (compression_algorithm == PAGE_LZO_ALGORITHM) {
21530 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21531 				    HA_ERR_UNSUPPORTED,
21532 				    "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21533 				    "InnoDB: liblzo is not installed. \n",
21534 				    compression_algorithm);
21535 		DBUG_RETURN(1);
21536 	}
21537 #endif
21538 
21539 #ifndef HAVE_LZMA
21540 	if (compression_algorithm == PAGE_LZMA_ALGORITHM) {
21541 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21542 				    HA_ERR_UNSUPPORTED,
21543 				    "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21544 				    "InnoDB: liblzma is not installed. \n",
21545 				    compression_algorithm);
21546 		DBUG_RETURN(1);
21547 	}
21548 #endif
21549 
21550 #ifndef HAVE_BZIP2
21551 	if (compression_algorithm == PAGE_BZIP2_ALGORITHM) {
21552 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21553 				    HA_ERR_UNSUPPORTED,
21554 				    "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21555 				    "InnoDB: libbz2 is not installed. \n",
21556 				    compression_algorithm);
21557 		DBUG_RETURN(1);
21558 	}
21559 #endif
21560 
21561 #ifndef HAVE_SNAPPY
21562 	if (compression_algorithm == PAGE_SNAPPY_ALGORITHM) {
21563 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21564 				    HA_ERR_UNSUPPORTED,
21565 				    "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
21566 				    "InnoDB: libsnappy is not installed. \n",
21567 				    compression_algorithm);
21568 		DBUG_RETURN(1);
21569 	}
21570 #endif
21571 	DBUG_RETURN(0);
21572 }
21573 
21574 static
21575 int
innodb_encrypt_tables_validate(THD * thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)21576 innodb_encrypt_tables_validate(
21577 /*=================================*/
21578 	THD*				thd,	/*!< in: thread handle */
21579 	struct st_mysql_sys_var*	var,	/*!< in: pointer to system
21580 						variable */
21581 	void*				save,	/*!< out: immediate result
21582 						for update function */
21583 	struct st_mysql_value*		value)	/*!< in: incoming string */
21584 {
21585 	if (check_sysvar_enum(thd, var, save, value)) {
21586 		return 1;
21587 	}
21588 
21589 	ulong encrypt_tables = *(ulong*)save;
21590 
21591 	if (encrypt_tables
21592 	    && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) {
21593 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21594 				    HA_ERR_UNSUPPORTED,
21595 				    "InnoDB: cannot enable encryption, "
21596 		                    "encryption plugin is not available");
21597 		return 1;
21598 	}
21599 
21600 	return 0;
21601 }
21602 
innodb_remember_check_sysvar_funcs()21603 static void innodb_remember_check_sysvar_funcs()
21604 {
21605 	/* remember build-in sysvar check functions */
21606 	ut_ad((MYSQL_SYSVAR_NAME(checksum_algorithm).flags & 0x1FF) == PLUGIN_VAR_ENUM);
21607 	check_sysvar_enum = MYSQL_SYSVAR_NAME(checksum_algorithm).check;
21608 
21609 	ut_ad((MYSQL_SYSVAR_NAME(flush_log_at_timeout).flags & 15) == PLUGIN_VAR_INT);
21610 	check_sysvar_int = MYSQL_SYSVAR_NAME(flush_log_at_timeout).check;
21611 }
21612 
21613 /********************************************************************//**
21614 Helper function to push warnings from InnoDB internals to SQL-layer. */
21615 UNIV_INTERN
21616 void
ib_push_warning(trx_t * trx,dberr_t error,const char * format,...)21617 ib_push_warning(
21618 	trx_t*		trx,	/*!< in: trx */
21619 	dberr_t		error,	/*!< in: error code to push as warning */
21620 	const char	*format,/*!< in: warning message */
21621 	...)
21622 {
21623 	if (trx && trx->mysql_thd) {
21624 		THD *thd = (THD *)trx->mysql_thd;
21625 		va_list args;
21626 		char *buf;
21627 #define MAX_BUF_SIZE 4*1024
21628 
21629 		va_start(args, format);
21630 		buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME));
21631 		buf[MAX_BUF_SIZE - 1] = 0;
21632 		vsnprintf(buf, MAX_BUF_SIZE - 1, format, args);
21633 		push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21634 				    uint(convert_error_code_to_mysql(error, 0,
21635 								     thd)),
21636 				    buf);
21637 		my_free(buf);
21638 		va_end(args);
21639 	}
21640 }
21641 
21642 /********************************************************************//**
21643 Helper function to push warnings from InnoDB internals to SQL-layer. */
21644 UNIV_INTERN
21645 void
ib_push_warning(void * ithd,dberr_t error,const char * format,...)21646 ib_push_warning(
21647 	void*		ithd,	/*!< in: thd */
21648 	dberr_t		error,	/*!< in: error code to push as warning */
21649 	const char	*format,/*!< in: warning message */
21650 	...)
21651 {
21652 	va_list args;
21653 	THD *thd = (THD *)ithd;
21654 	char *buf;
21655 #define MAX_BUF_SIZE 4*1024
21656 
21657 	if (ithd == NULL) {
21658 		thd = current_thd;
21659 	}
21660 
21661 	if (thd) {
21662 		va_start(args, format);
21663 		buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME));
21664 		buf[MAX_BUF_SIZE - 1] = 0;
21665 		vsnprintf(buf, MAX_BUF_SIZE - 1, format, args);
21666 
21667 		push_warning_printf(
21668 			thd, Sql_condition::WARN_LEVEL_WARN,
21669 			uint(convert_error_code_to_mysql(error, 0, thd)), buf);
21670 		my_free(buf);
21671 		va_end(args);
21672 	}
21673 }
21674 
21675 /********************************************************************//**
21676 Helper function to push frm mismatch error to error log and
21677 if needed to sql-layer. */
21678 UNIV_INTERN
21679 void
ib_push_frm_error(THD * thd,dict_table_t * ib_table,TABLE * table,ulint n_keys,bool push_warning)21680 ib_push_frm_error(
21681 /*==============*/
21682 	THD*		thd,		/*!< in: MySQL thd */
21683 	dict_table_t*	ib_table,	/*!< in: InnoDB table */
21684 	TABLE*		table,		/*!< in: MySQL table */
21685 	ulint		n_keys,		/*!< in: InnoDB #keys */
21686 	bool		push_warning)	/*!< in: print warning ? */
21687 {
21688 	switch (ib_table->dict_frm_mismatch) {
21689 	case DICT_FRM_NO_PK:
21690 		sql_print_error("Table %s has a primary key in "
21691 			"InnoDB data dictionary, but not "
21692 			"in MariaDB!"
21693 			" Have you mixed up "
21694 			".frm files from different "
21695 			"installations? See "
21696 			"https://mariadb.com/kb/en/innodb-troubleshooting/\n",
21697 			ib_table->name.m_name);
21698 
21699 		if (push_warning) {
21700 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21701 				ER_NO_SUCH_INDEX,
21702 				"InnoDB: Table %s has a "
21703 				"primary key in InnoDB data "
21704 				"dictionary, but not in "
21705 				"MariaDB!", ib_table->name.m_name);
21706 		}
21707 		break;
21708 	case DICT_NO_PK_FRM_HAS:
21709 		sql_print_error(
21710 				"Table %s has no primary key in InnoDB data "
21711 				"dictionary, but has one in MariaDB! If you "
21712 				"created the table with a MariaDB version < "
21713 				"3.23.54 and did not define a primary key, "
21714 				"but defined a unique key with all non-NULL "
21715 				"columns, then MariaDB internally treats that "
21716 				"key as the primary key. You can fix this "
21717 				"error by dump + DROP + CREATE + reimport "
21718 				"of the table.", ib_table->name.m_name);
21719 
21720 		if (push_warning) {
21721 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21722 				ER_NO_SUCH_INDEX,
21723 				"InnoDB: Table %s has no "
21724 				"primary key in InnoDB data "
21725 				"dictionary, but has one in "
21726 				"MariaDB!",
21727 				ib_table->name.m_name);
21728 		}
21729 		break;
21730 
21731 	case DICT_FRM_INCONSISTENT_KEYS:
21732 		sql_print_error("InnoDB: Table %s contains " ULINTPF " "
21733 			"indexes inside InnoDB, which "
21734 			"is different from the number of "
21735 			"indexes %u defined in the MariaDB "
21736 			" Have you mixed up "
21737 			".frm files from different "
21738 			"installations? See "
21739 			"https://mariadb.com/kb/en/innodb-troubleshooting/\n",
21740 			ib_table->name.m_name, n_keys,
21741 			table->s->keys);
21742 
21743 		if (push_warning) {
21744 			push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
21745 				ER_NO_SUCH_INDEX,
21746 				"InnoDB: Table %s contains " ULINTPF " "
21747 				"indexes inside InnoDB, which "
21748 				"is different from the number of "
21749 				"indexes %u defined in the MariaDB ",
21750                                 ib_table->name.m_name, n_keys,
21751 				table->s->keys);
21752 		}
21753 		break;
21754 
21755 	case DICT_FRM_CONSISTENT:
21756 	default:
21757 		sql_print_error("InnoDB: Table %s is consistent "
21758 			"on InnoDB data dictionary and MariaDB "
21759 			" FRM file.",
21760 			ib_table->name.m_name);
21761 		ut_error;
21762 		break;
21763 	}
21764 }
21765 
21766 /** Writes 8 bytes to nth tuple field
21767 @param[in]	tuple	where to write
21768 @param[in]	nth	index in tuple
21769 @param[in]	data	what to write
21770 @param[in]	buf	field data buffer */
set_tuple_col_8(dtuple_t * tuple,int col,uint64_t data,byte * buf)21771 static void set_tuple_col_8(dtuple_t *tuple, int col, uint64_t data, byte *buf)
21772 {
21773   dfield_t *dfield= dtuple_get_nth_field(tuple, col);
21774   ut_ad(dfield->type.len == 8);
21775   if (dfield->len == UNIV_SQL_NULL)
21776   {
21777     dfield_set_data(dfield, buf, 8);
21778   }
21779   ut_ad(dfield->len == dfield->type.len && dfield->data);
21780   mach_write_to_8(dfield->data, data);
21781 }
21782 
vers_update_end(row_prebuilt_t * prebuilt,bool history_row)21783 void ins_node_t::vers_update_end(row_prebuilt_t *prebuilt, bool history_row)
21784 {
21785   ut_ad(prebuilt->ins_node == this);
21786   trx_t *trx= prebuilt->trx;
21787 #ifndef DBUG_OFF
21788   ut_ad(table->vers_start != table->vers_end);
21789   const mysql_row_templ_t *t= prebuilt->get_template_by_col(table->vers_end);
21790   ut_ad(t);
21791   ut_ad(t->mysql_col_len == 8);
21792 #endif
21793 
21794   if (history_row)
21795   {
21796     set_tuple_col_8(row, table->vers_end, trx->id, vers_end_buf);
21797   }
21798   else /* ROW_INS_VERSIONED */
21799   {
21800     set_tuple_col_8(row, table->vers_end, TRX_ID_MAX, vers_end_buf);
21801 #ifndef DBUG_OFF
21802     t= prebuilt->get_template_by_col(table->vers_start);
21803     ut_ad(t);
21804     ut_ad(t->mysql_col_len == 8);
21805 #endif
21806     set_tuple_col_8(row, table->vers_start, trx->id, vers_start_buf);
21807   }
21808   dict_index_t *clust_index= dict_table_get_first_index(table);
21809   THD *thd= trx->mysql_thd;
21810   TABLE *mysql_table= prebuilt->m_mysql_table;
21811   mem_heap_t *local_heap= NULL;
21812   for (ulint col_no= 0; col_no < dict_table_get_n_v_cols(table); col_no++)
21813   {
21814     const dict_v_col_t *v_col= dict_table_get_nth_v_col(table, col_no);
21815     for (ulint i= 0; i < unsigned(v_col->num_base); i++)
21816       if (v_col->base_col[i]->ind == table->vers_end)
21817         innobase_get_computed_value(row, v_col, clust_index, &local_heap,
21818                                     table->heap, NULL, thd, mysql_table,
21819                                     mysql_table->record[0], NULL, NULL);
21820   }
21821   if (UNIV_LIKELY_NULL(local_heap))
21822     mem_heap_free(local_heap);
21823 }
21824 
21825 /** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
21826 if needed.
21827 @param[in]	size	size in bytes
21828 @return	aligned size */
21829 ulint
buf_pool_size_align(ulint size)21830 buf_pool_size_align(
21831 	ulint	size)
21832 {
21833   const ib_uint64_t	m = ((ib_uint64_t)srv_buf_pool_instances) * srv_buf_pool_chunk_unit;
21834   size = ut_max((size_t) size, (size_t) MYSQL_SYSVAR_NAME(buffer_pool_size).min_val);
21835 
21836   if (size % m == 0) {
21837     return(size);
21838   } else {
21839     return (ulint)((size / m + 1) * m);
21840   }
21841 }
21842