1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2021, Oracle and/or its affiliates.
4 Copyright (c) 2008, 2009 Google Inc.
5 Copyright (c) 2009, 2016, Percona Inc.
6 
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12 
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19 
20 This program is free software; you can redistribute it and/or modify
21 it under the terms of the GNU General Public License, version 2.0,
22 as published by the Free Software Foundation.
23 
24 This program is also distributed with certain software (including
25 but not limited to OpenSSL) that is licensed under separate terms,
26 as designated in a particular file or component or in included license
27 documentation.  The authors of MySQL hereby grant you an additional
28 permission to link the program and your derivative works with the
29 separately licensed software that they have included with MySQL.
30 
31 This program is distributed in the hope that it will be useful,
32 but WITHOUT ANY WARRANTY; without even the implied warranty of
33 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34 GNU General Public License, version 2.0, for more details.
35 
36 You should have received a copy of the GNU General Public License along with
37 this program; if not, write to the Free Software Foundation, Inc.,
38 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
39 
40 *****************************************************************************/
41 
42 /**************************************************//**
43 @file srv/srv0srv.cc
44 The database server main program
45 
46 Created 10/8/1995 Heikki Tuuri
47 *******************************************************/
48 
49 #include "my_global.h"
50 #include "my_thread.h"
51 
52 #include "mysql/psi/mysql_stage.h"
53 #include "mysql/psi/psi.h"
54 #include "sql_thd_internal_api.h"
55 
56 #include "ha_prototypes.h"
57 
58 #include "btr0sea.h"
59 #include "buf0flu.h"
60 #include "buf0lru.h"
61 #include "btr0scrub.h"
62 #include "dict0boot.h"
63 #include "dict0load.h"
64 #include "dict0stats_bg.h"
65 #include "fsp0sysspace.h"
66 #include "ibuf0ibuf.h"
67 #include "lock0lock.h"
68 #include "log0online.h"
69 #include "log0recv.h"
70 #include "mem0mem.h"
71 #include "os0proc.h"
72 #include "pars0pars.h"
73 #include "que0que.h"
74 #include "row0mysql.h"
75 #include "row0trunc.h"
76 #include "row0log.h"
77 #include "srv0mon.h"
78 #include "srv0srv.h"
79 #include "srv0start.h"
80 #include "sync0sync.h"
81 #include "trx0i_s.h"
82 #include "trx0purge.h"
83 #include "trx0rseg.h"
84 #include "usr0sess.h"
85 #include "ut0crc32.h"
86 #include "ut0mem.h"
87 #include "handler.h"
88 #include "ha_innodb.h"
89 #include "fil0crypt.h"
90 #include "system_key.h"
91 
92 
93 #ifndef UNIV_PFS_THREAD
94 #define create_thd(x,y,z,PFS_KEY)	create_thd(x,y,z,PFS_NOT_INSTRUMENTED.m_value)
95 #endif /* UNIV_PFS_THREAD */
96 
97 /* The following is the maximum allowed duration of a lock wait. */
98 ulong	srv_fatal_semaphore_wait_threshold = 600;
99 
100 lint	srv_kill_idle_transaction = 0;
101 
102 /* How much data manipulation language (DML) statements need to be delayed,
103 in microseconds, in order to reduce the lagging of the purge thread. */
104 ulint	srv_dml_needed_delay = 0;
105 
106 ibool	srv_monitor_active = FALSE;
107 ibool	srv_error_monitor_active = FALSE;
108 
109 ibool	srv_buf_dump_thread_active = FALSE;
110 
111 bool	srv_buf_resize_thread_active = false;
112 
113 ibool	srv_dict_stats_thread_active = FALSE;
114 
115 my_bool srv_scrub_log;
116 
117 const char*	srv_main_thread_op_info = "";
118 
119 /** Prefix used by MySQL to indicate pre-5.1 table name encoding */
120 const char		srv_mysql50_table_name_prefix[10] = "#mysql50#";
121 
122 /* Server parameters which are read from the initfile */
123 
124 /* The following three are dir paths which are catenated before file
125 names, where the file name itself may also contain a path */
126 
127 char*	srv_data_home	= NULL;
128 
129 /** Rollback files directory, can be absolute. */
130 char*	srv_undo_dir = NULL;
131 
132 /** The number of tablespaces to use for rollback segments. */
133 ulong	srv_undo_tablespaces = 0;
134 
135 /** The number of UNDO tablespaces that are open and ready to use. */
136 ulint	srv_undo_tablespaces_open = 0;
137 
138 /** The number of UNDO tablespaces that are active (hosting some rollback
139 segment). It is quite possible that some of the tablespaces doesn't host
140 any of the rollback-segment based on configuration used. */
141 ulint	srv_undo_tablespaces_active = 0;
142 
143 /* The number of rollback segments to use */
144 ulong	srv_rollback_segments = 1;
145 
146 /* Used for the deprecated setting innodb_undo_logs. This will still get
147 put into srv_rollback_segments if it is set to a non-default value. */
148 ulong	srv_undo_logs = 0;
149 const char* deprecated_undo_logs =
150 	"The parameter innodb_undo_logs is deprecated"
151 	" and may be removed in future releases."
152 	" Please use innodb_rollback_segments instead."
153 	" See " REFMAN "innodb-undo-logs.html";
154 
155 
156 /** Rate at which UNDO records should be purged. */
157 ulong	srv_purge_rseg_truncate_frequency = 128;
158 
159 /** Enable or Disable Truncate of UNDO tablespace.
160 Note: If enabled then UNDO tablespace will be selected for truncate.
161 While Server waits for undo-tablespace to truncate if user disables
162 it, truncate action is completed but no new tablespace is marked
163 for truncate (action is never aborted). */
164 my_bool	srv_undo_log_truncate = FALSE;
165 
166 /** Maximum size of undo tablespace. */
167 unsigned long long	srv_max_undo_log_size;
168 
169 /** Enable or disable Encrypt of REDO tablespace. */
170 my_bool	srv_undo_log_encrypt = 0;
171 
172 /** UNDO logs that are not redo logged.
173 These logs reside in the temp tablespace.*/
174 const ulong		srv_tmp_undo_logs = 32;
175 
176 /** Enable or disable encryption of temporary tablespace.*/
177 my_bool	srv_tmp_tablespace_encrypt;
178 
179 /** Option to enable encryption of system tablespace. */
180 my_bool	srv_sys_tablespace_encrypt;
181 
182 /** Enable or disable encryption of pages in parallel doublewrite buffer
183 file */
184 my_bool	srv_parallel_dblwr_encrypt;
185 
186 /** Default undo tablespace size in UNIV_PAGEs count (10MB). */
187 const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
188 	((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
189 
190 /** Set if InnoDB must operate in read-only mode. We don't do any
191 recovery and open all tables in RO mode instead of RW mode. We don't
192 sync the max trx id to disk either. */
193 my_bool	srv_read_only_mode;
194 /** store to its own file each table created by an user; data
195 dictionary tables are in the system tablespace 0 */
196 my_bool	srv_file_per_table;
197 /** The file format to use on new *.ibd files. */
198 ulint	srv_file_format = 0;
199 /** Whether to check file format during startup.  A value of
200 UNIV_FORMAT_MAX + 1 means no checking ie. FALSE.  The default is to
201 set it to the highest format we support. */
202 ulint	srv_max_file_format_at_startup = UNIV_FORMAT_MAX;
203 /** Set if InnoDB operates in read-only mode or innodb-force-recovery
204 is greater than SRV_FORCE_NO_TRX_UNDO. */
205 my_bool	high_level_read_only;
206 
207 #if UNIV_FORMAT_A
208 # error "UNIV_FORMAT_A must be 0!"
209 #endif
210 
211 /** Place locks to records only i.e. do not use next-key locking except
212 on duplicate key checking and foreign key checking */
213 ibool	srv_locks_unsafe_for_binlog = FALSE;
214 /** Sort buffer size in index creation */
215 ulong	srv_sort_buf_size = 1048576;
216 /** Maximum modification log file size for online index creation */
217 unsigned long long	srv_online_max_size;
218 
219 /* If this flag is TRUE, then we will use the native aio of the
220 OS (provided we compiled Innobase with it in), otherwise we will
221 use simulated aio we build below with threads.
222 Currently we support native aio on windows and linux */
223 my_bool	srv_use_native_aio = TRUE;
224 
225 /** Whether the redo log tracking is currently enabled. Note that it is
226 possible for the log tracker thread to be running and the tracking to be
227 disabled */
228 my_bool	srv_track_changed_pages = FALSE;
229 
230 ulonglong	srv_max_bitmap_file_size = 100 * 1024 * 1024;
231 
232 ulonglong	srv_max_changed_pages = 0;
233 #ifdef UNIV_DEBUG
234 /** Force all user tables to use page compression. */
235 ulong	srv_debug_compress;
236 /** Used by SET GLOBAL innodb_master_thread_disabled_debug = X. */
237 my_bool	srv_master_thread_disabled_debug;
238 /** Event used to inform that master thread is disabled. */
239 static os_event_t	srv_master_thread_disabled_event;
240 /** Debug variable to find if any background threads are adding
241 to purge during slow shutdown. */
242 extern bool		trx_commit_disallowed;
243 #endif /* UNIV_DEBUG */
244 
245 /*------------------------- LOG FILES ------------------------ */
246 char*	srv_log_group_home_dir	= NULL;
247 
248 /** Enable or disable Encrypt of REDO tablespace. */
249 ulong	srv_redo_log_encrypt = REDO_LOG_ENCRYPT_OFF;
250 
251 ulong	srv_n_log_files		= SRV_N_LOG_FILES_MAX;
252 /** At startup, this is the current redo log file size.
253 During startup, if this is different from srv_log_file_size_requested
254 (innodb_log_file_size), the redo log will be rebuilt and this size
255 will be initialized to srv_log_file_size_requested.
256 When upgrading from a previous redo log format, this will be set to 0,
257 and writing to the redo log is not allowed.
258 
259 During startup, this is in bytes, and later converted to pages. */
260 ib_uint64_t	srv_log_file_size;
261 /** The value of the startup parameter innodb_log_file_size */
262 ib_uint64_t	srv_log_file_size_requested;
263 /* size in database pages */
264 ulint		srv_log_buffer_size = ULINT_MAX;
265 uint		srv_flush_log_at_timeout = 1;
266 ulong		srv_page_size = UNIV_PAGE_SIZE_DEF;
267 ulong		srv_page_size_shift = UNIV_PAGE_SIZE_SHIFT_DEF;
268 ulong		srv_log_write_ahead_size = 0;
269 
270 page_size_t	univ_page_size(0, 0, false);
271 
272 char	srv_use_global_flush_log_at_trx_commit	= TRUE;
273 
274 /* Try to flush dirty pages so as to avoid IO bursts at
275 the checkpoints. */
276 char	srv_adaptive_flushing	= TRUE;
277 
278 ulint	srv_show_locks_held	= 10;
279 ulint	srv_show_verbose_locks	= 0;
280 
281 /* Allow IO bursts at the checkpoints ignoring io_capacity setting. */
282 my_bool	srv_flush_sync		= TRUE;
283 
284 /** Maximum number of times allowed to conditionally acquire
285 mutex before switching to blocking wait on the mutex */
286 #define MAX_MUTEX_NOWAIT	20
287 
288 /** Check whether the number of failed nonblocking mutex
289 acquisition attempts exceeds maximum allowed value. If so,
290 srv_printf_innodb_monitor() will request mutex acquisition
291 with mutex_enter(), which will wait until it gets the mutex. */
292 #define MUTEX_NOWAIT(mutex_skipped)	((mutex_skipped) < MAX_MUTEX_NOWAIT)
293 
294 /** Requested size in bytes */
295 ulint	srv_buf_pool_size	= ULINT_MAX;
296 /** Minimum pool size in bytes */
297 const ulint	srv_buf_pool_min_size	= 5 * 1024 * 1024;
298 /** Default pool size in bytes */
299 const ulint	srv_buf_pool_def_size	= 128 * 1024 * 1024;
300 /** Requested buffer pool chunk size. Each buffer pool instance consists
301 of one or more chunks. */
302 ulonglong	srv_buf_pool_chunk_unit;
303 /** Requested number of buffer pool instances */
304 ulong	srv_buf_pool_instances;
305 /** Default number of buffer pool instances */
306 const ulong	srv_buf_pool_instances_default = 0;
307 /** Number of locks to protect buf_pool->page_hash */
308 ulong	srv_n_page_hash_locks = 16;
309 
310 /** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
311 ulong	srv_LRU_scan_depth	= 1024;
312 /** Whether or not to flush neighbors of a block */
313 ulong	srv_flush_neighbors	= 1;
314 /** Previously requested size. Accesses protected by memory barriers. */
315 ulint	srv_buf_pool_old_size	= 0;
316 /** Current size as scaling factor for the other components */
317 ulint	srv_buf_pool_base_size	= 0;
318 /** Current size in bytes */
319 ulint	srv_buf_pool_curr_size	= 0;
320 /** Dump this % of each buffer pool during BP dump */
321 ulong	srv_buf_pool_dump_pct;
322 /** Lock table size in bytes */
323 ulint	srv_lock_table_size	= ULINT_MAX;
324 
325 /** The maximum time limit for a single LRU tail flush iteration by the page
326 cleaner thread */
327 ulint	srv_cleaner_max_lru_time = 1000;
328 
329 /** The maximum time limit for a single flush list flush iteration by the page
330 cleaner thread */
331 ulint	srv_cleaner_max_flush_time = 1000;
332 
333 /** Page cleaner LSN age factor formula option */
334 ulong	srv_cleaner_lsn_age_factor
335 	= SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT;
336 
337 /** Empty free list for a query thread handling algorithm option  */
338 ulong	srv_empty_free_list_algorithm = SRV_EMPTY_FREE_LIST_BACKOFF;
339 
340 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
341 instead. */
342 ulint	srv_n_read_io_threads	= ULINT_MAX;
343 ulint	srv_n_write_io_threads	= ULINT_MAX;
344 
345 /* Switch to enable random read ahead. */
346 my_bool	srv_random_read_ahead	= FALSE;
347 /* User settable value of the number of pages that must be present
348 in the buffer cache and accessed sequentially for InnoDB to trigger a
349 readahead request. */
350 ulong	srv_read_ahead_threshold	= 56;
351 
352 /** Maximum on-disk size of change buffer in terms of percentage
353 of the buffer pool. */
354 uint	srv_change_buffer_max_size = CHANGE_BUFFER_DEFAULT_SIZE;
355 
356 /* This parameter is used to throttle the number of insert buffers that are
357 merged in a batch. By increasing this parameter on a faster disk you can
358 possibly reduce the number of I/O operations performed to complete the
359 merge operation. The value of this parameter is used as is by the
360 background loop when the system is idle (low load), on a busy system
361 the parameter is scaled down by a factor of 4, this is to avoid putting
362 a heavier load on the I/O sub system. */
363 
364 ulong	srv_insert_buffer_batch_size = 20;
365 
366 char*	srv_file_flush_method_str = NULL;
367 #ifndef _WIN32
368 enum srv_unix_flush_t	srv_unix_file_flush_method = SRV_UNIX_FSYNC;
369 #else
370 enum srv_win_flush_t	srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
371 #endif /* _WIN32 */
372 
373 ulint	srv_max_n_open_files	  = 300;
374 
375 /* Number of IO operations per second the server can do */
376 ulong	srv_io_capacity         = 200;
377 ulong	srv_max_io_capacity     = 400;
378 
379 /* The number of page cleaner threads to use.*/
380 ulong	srv_n_page_cleaners = 4;
381 
382 /* The InnoDB main thread tries to keep the ratio of modified pages
383 in the buffer pool to all database pages in the buffer pool smaller than
384 the following number. But it is not guaranteed that the value stays below
385 that during a time of heavy update/insert activity. */
386 
387 double	srv_max_buf_pool_modified_pct	= 75.0;
388 double	srv_max_dirty_pages_pct_lwm	= 0.0;
389 
390 /* This is the percentage of log capacity at which adaptive flushing,
391 if enabled, will kick in. */
392 ulong	srv_adaptive_flushing_lwm	= 10;
393 
394 /* Number of iterations over which adaptive flushing is averaged. */
395 ulong	srv_flushing_avg_loops		= 30;
396 
397 /* The tids of the purge threads */
398 os_tid_t	srv_purge_tids[SRV_MAX_N_PURGE_THREADS];
399 
400 /* The tids of the I/O threads */
401 os_tid_t	srv_io_tids[SRV_MAX_N_IO_THREADS];
402 
403 /* The tid of the master thread */
404 os_tid_t	srv_master_tid;
405 
406 /* The relative scheduling priority of the purge threads */
407 ulint	srv_sched_priority_purge	= 19;
408 
409 /* The relative scheduling priority of the I/O threads */
410 ulint	srv_sched_priority_io		= 19;
411 
412 /* The relative scheduling priority of the master thread */
413 ulint	srv_sched_priority_master	= 19;
414 
415 /* The relative priority of the current thread.  If 0, low priority; if 1, high
416 priority.  */
417 UNIV_THREAD_LOCAL ulint srv_current_thread_priority = 0;
418 
419 /* The relative priority of the purge coordinator and worker threads.  */
420 my_bool	srv_purge_thread_priority	= FALSE;
421 
422 /* The relative priority of the master thread.  */
423 my_bool	srv_master_thread_priority	= FALSE;
424 
425 /* The number of purge threads to use.*/
426 ulong	srv_n_purge_threads = 4;
427 
428 /* the number of pages to purge in one batch */
429 ulong	srv_purge_batch_size = 20;
430 
431 ulong srv_encrypt_tables = 0;
432 
433 /* Internal setting for "innodb_stats_method". Decides how InnoDB treats
434 NULL value when collecting statistics. By default, it is set to
435 SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
436 ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
437 
438 srv_stats_t	srv_stats;
439 
440 /* structure to pass status variables to MySQL */
441 export_var_t export_vars;
442 
443 /** Normally 0. When nonzero, skip some phases of crash recovery,
444 starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered
445 by SELECT or mysqldump. When this is nonzero, we do not allow any user
446 modifications to the data. */
447 ulong	srv_force_recovery;
448 #ifndef NDEBUG
449 /** Inject a crash at different steps of the recovery process.
450 This is for testing and debugging only. */
451 ulong	srv_force_recovery_crash;
452 #endif /* !NDEBUG */
453 
454 /** Print all user-level transactions deadlocks to mysqld stderr */
455 
456 my_bool	srv_print_all_deadlocks = FALSE;
457 
458 /** Print lock wait timeout info to mysqld stderr */
459 
460 my_bool	srv_print_lock_wait_timeout_info = FALSE;
461 
462 /** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */
463 my_bool	srv_cmp_per_index_enabled = FALSE;
464 
465 /* If the following is set to 1 then we do not run purge and insert buffer
466 merge to completion before shutdown. If it is set to 2, do not even flush the
467 buffer pool to data files at the shutdown: we effectively 'crash'
468 InnoDB (but lose no committed transactions). */
469 ulint	srv_fast_shutdown	= 0;
470 
471 /* Generate a innodb_status.<pid> file */
472 ibool	srv_innodb_status	= FALSE;
473 
474 /* When estimating number of different key values in an index, sample
475 this many index pages, there are 2 ways to calculate statistics:
476 * persistent stats that are calculated by ANALYZE TABLE and saved
477   in the innodb database.
478 * quick transient stats, that are used if persistent stats for the given
479   table/index are not found in the innodb database */
480 unsigned long long	srv_stats_transient_sample_pages = 8;
481 my_bool		srv_stats_persistent = TRUE;
482 my_bool		srv_stats_include_delete_marked = FALSE;
483 unsigned long long	srv_stats_persistent_sample_pages = 20;
484 my_bool		srv_stats_auto_recalc = TRUE;
485 
486 ibool	srv_use_doublewrite_buf	= TRUE;
487 
488 /** doublewrite buffer is 1MB is size i.e.: it can hold 128 16K pages.
489 The following parameter is the size of the buffer that is used for
490 batch flushing i.e.: LRU flushing and flush_list flushing. The rest
491 of the pages are used for single page flushing. */
492 ulong	srv_doublewrite_batch_size	= 120;
493 
494 ulong	srv_replication_delay		= 0;
495 
496 ulint	srv_pass_corrupt_table = 0; /* 0:disable 1:enable */
497 
498 /*-------------------------------------------*/
499 ulong	srv_n_spin_wait_rounds	= 30;
500 ulong	srv_spin_wait_delay	= 6;
501 ibool	srv_priority_boost	= TRUE;
502 
503 static ulint		srv_n_rows_inserted_old		= 0;
504 static ulint		srv_n_rows_updated_old		= 0;
505 static ulint		srv_n_rows_deleted_old		= 0;
506 static ulint		srv_n_rows_read_old		= 0;
507 
508 ulint	srv_truncated_status_writes	= 0;
509 ulint	srv_available_undo_logs         = 0;
510 
511 /* Set the following to 0 if you want InnoDB to write messages on
512 stderr on startup/shutdown. */
513 ibool	srv_print_verbose_log		= TRUE;
514 my_bool	srv_print_innodb_monitor	= FALSE;
515 my_bool	srv_print_innodb_lock_monitor	= FALSE;
516 
517 /* Array of English strings describing the current state of an
518 i/o handler thread */
519 
520 const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
521 const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
522 
523 ib_time_monotonic_t	srv_last_monitor_time;
524 
525 ib_mutex_t	srv_innodb_monitor_mutex;
526 
527 /** Mutex protecting page_zip_stat_per_index */
528 ib_mutex_t	page_zip_stat_per_index_mutex;
529 
530 /* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
531 ib_mutex_t	srv_monitor_file_mutex;
532 
533 /** Temporary file for innodb monitor output */
534 FILE*	srv_monitor_file;
535 /** Mutex for locking srv_dict_tmpfile. Not created if srv_read_only_mode.
536 This mutex has a very high rank; threads reserving it should not
537 be holding any InnoDB latches. */
538 ib_mutex_t	srv_dict_tmpfile_mutex;
539 /** Temporary file for output from the data dictionary */
540 FILE*	srv_dict_tmpfile;
541 /** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode.
542 This mutex has a very low rank; threads reserving it should not
543 acquire any further latches or sleep before releasing this one. */
544 ib_mutex_t	srv_misc_tmpfile_mutex;
545 /** Temporary file for miscellanous diagnostic output */
546 FILE*	srv_misc_tmpfile;
547 
548 ulint	srv_main_thread_process_no	= 0;
549 ulint	srv_main_thread_id		= 0;
550 
551 /* The following counts are used by the srv_master_thread. */
552 
553 /** Iterations of the loop bounded by 'srv_active' label. */
554 static ulint		srv_main_active_loops		= 0;
555 /** Iterations of the loop bounded by the 'srv_idle' label. */
556 static ulint		srv_main_idle_loops		= 0;
557 /** Iterations of the loop bounded by the 'srv_shutdown' label. */
558 static ulint		srv_main_shutdown_loops		= 0;
559 /** Log writes involving flush. */
560 static ulint		srv_log_writes_and_flush	= 0;
561 
562 /** Number of times secondary index lookup triggered cluster lookup */
563 ulint	srv_sec_rec_cluster_reads		= 0;
564 
565 /** Number of times prefix optimization avoided triggering cluster lookup */
566 ulint	srv_sec_rec_cluster_reads_avoided	= 0;
567 
568 /* This is only ever touched by the master thread. It records the
569 time when the last flush of log file has happened. The master
570 thread ensures that we flush the log files at least once per
571 second. */
572 static ib_time_monotonic_t	srv_last_log_flush_time;
573 
574 /* Interval in seconds at which various tasks are performed by the
575 master thread when server is active. In order to balance the workload,
576 we should try to keep intervals such that they are not multiple of
577 each other. For example, if we have intervals for various tasks
578 defined as 5, 10, 15, 60 then all tasks will be performed when
579 current_time % 60 == 0 and no tasks will be performed when
580 current_time % 5 != 0. */
581 
582 # define	SRV_MASTER_CHECKPOINT_INTERVAL		(7)
583 # define	SRV_MASTER_PURGE_INTERVAL		(10)
584 # define	SRV_MASTER_DICT_LRU_INTERVAL		(47)
585 
586 /** Acquire the system_mutex. */
587 #define srv_sys_mutex_enter() do {			\
588 	mutex_enter(&srv_sys->mutex);			\
589 } while (0)
590 
591 /** Test if the system mutex is owned. */
592 #define srv_sys_mutex_own() (mutex_own(&srv_sys->mutex)	\
593 			     && !srv_read_only_mode)
594 
595 /** Release the system mutex. */
596 #define srv_sys_mutex_exit() do {			\
597 	mutex_exit(&srv_sys->mutex);			\
598 } while (0)
599 
600 #define fetch_lock_wait_timeout(trx)			\
601 	((trx)->lock.allowed_to_wait			\
602 	 ? thd_lock_wait_timeout((trx)->mysql_thd)	\
603 	 : 0)
604 
605 /*
606 	IMPLEMENTATION OF THE SERVER MAIN PROGRAM
607 	=========================================
608 
609 There is the following analogue between this database
610 server and an operating system kernel:
611 
612 DB concept			equivalent OS concept
613 ----------			---------------------
614 transaction		--	process;
615 
616 query thread		--	thread;
617 
618 lock			--	semaphore;
619 
620 kernel			--	kernel;
621 
622 query thread execution:
623 (a) without lock mutex
624 reserved		--	process executing in user mode;
625 (b) with lock mutex reserved
626 			--	process executing in kernel mode;
627 
628 The server has several backgroind threads all running at the same
629 priority as user threads. It periodically checks if here is anything
630 happening in the server which requires intervention of the master
631 thread. Such situations may be, for example, when flushing of dirty
632 blocks is needed in the buffer pool or old version of database rows
633 have to be cleaned away (purged). The user can configure a separate
634 dedicated purge thread(s) too, in which case the master thread does not
635 do any purging.
636 
637 The threads which we call user threads serve the queries of the MySQL
638 server. They run at normal priority.
639 
640 When there is no activity in the system, also the master thread
641 suspends itself to wait for an event making the server totally silent.
642 
643 There is still one complication in our server design. If a
644 background utility thread obtains a resource (e.g., mutex) needed by a user
645 thread, and there is also some other user activity in the system,
646 the user thread may have to wait indefinitely long for the
647 resource, as the OS does not schedule a background thread if
648 there is some other runnable user thread. This problem is called
649 priority inversion in real-time programming.
650 
651 One solution to the priority inversion problem would be to keep record
652 of which thread owns which resource and in the above case boost the
653 priority of the background thread so that it will be scheduled and it
654 can release the resource.  This solution is called priority inheritance
655 in real-time programming.  A drawback of this solution is that the overhead
656 of acquiring a mutex increases slightly, maybe 0.2 microseconds on a 100
657 MHz Pentium, because the thread has to call os_thread_get_curr_id.  This may
658 be compared to 0.5 microsecond overhead for a mutex lock-unlock pair. Note
659 that the thread cannot store the information in the resource , say mutex,
660 itself, because competing threads could wipe out the information if it is
661 stored before acquiring the mutex, and if it stored afterwards, the
662 information is outdated for the time of one machine instruction, at least.
663 (To be precise, the information could be stored to lock_word in mutex if
664 the machine supports atomic swap.)
665 
666 The above solution with priority inheritance may become actual in the
667 future, currently we do not implement any priority twiddling solution.
668 Our general aim is to reduce the contention of all mutexes by making
669 them more fine grained.
670 
671 The thread table contains information of the current status of each
672 thread existing in the system, and also the event semaphores used in
673 suspending the master thread and utility threads when they have nothing
674 to do.  The thread table can be seen as an analogue to the process table
675 in a traditional Unix implementation. */
676 
677 /** The server system struct */
678 struct srv_sys_t{
679 	ib_mutex_t	tasks_mutex;		/*!< variable protecting the
680 						tasks queue */
681 	UT_LIST_BASE_NODE_T(que_thr_t)
682 			tasks;			/*!< task queue */
683 
684 	ib_mutex_t	mutex;			/*!< variable protecting the
685 						fields below. */
686 	ulint		n_sys_threads;		/*!< size of the sys_threads
687 						array */
688 
689 	srv_slot_t*	sys_threads;		/*!< server thread table */
690 
691 	ulint		n_threads_active[SRV_MASTER + 1];
692 						/*!< number of threads active
693 						in a thread class */
694 
695 	srv_stats_t::ulint_ctr_1_t
696 			activity_count;		/*!< For tracking server
697 						activity */
698 	srv_stats_t::ulint_ctr_1_t
699 			ibuf_merge_activity_count;/*!< For tracking change
700 						buffer merge activity, a subset
701 						of overall server activity */
702 };
703 
704 static srv_sys_t*	srv_sys	= NULL;
705 
706 /** Event to signal the monitor thread. */
707 os_event_t	srv_monitor_event;
708 
709 /** Event to signal the error thread */
710 os_event_t	srv_error_event;
711 
712 /** Event to signal the buffer pool dump/load thread */
713 os_event_t	srv_buf_dump_event;
714 
715 /** Event to signal the buffer pool resize thread */
716 os_event_t	srv_buf_resize_event;
717 
718 /** The buffer pool dump/load file name */
719 char*	srv_buf_dump_filename;
720 
721 /** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
722 and/or load it during startup. */
723 char	srv_buffer_pool_dump_at_shutdown = TRUE;
724 char	srv_buffer_pool_load_at_startup = TRUE;
725 
726 /** Path to the parallel doublewrite buffer */
727 char*	srv_parallel_doublewrite_path;
728 
729 /** Slot index in the srv_sys->sys_threads array for the purge thread. */
730 static const ulint	SRV_PURGE_SLOT	= 1;
731 
732 /** Slot index in the srv_sys->sys_threads array for the master thread. */
733 static const ulint	SRV_MASTER_SLOT = 0;
734 
735 os_event_t	srv_checkpoint_completed_event;
736 
737 os_event_t	srv_redo_log_tracked_event;
738 
739 /** Whether the redo log tracker thread has been started. Does not take into
740 account whether the tracking is currently enabled (see srv_track_changed_pages
741 for that) */
742 bool	srv_redo_log_thread_started = false;
743 
744 #ifdef HAVE_PSI_STAGE_INTERFACE
745 /** Performance schema stage event for monitoring ALTER TABLE progress
746 everything after flush log_make_checkpoint_at(). */
747 PSI_stage_info	srv_stage_alter_table_end
748 	= {0, "alter table (end)", PSI_FLAG_STAGE_PROGRESS};
749 
750 /** Performance schema stage event for monitoring ALTER TABLE progress
751 log_make_checkpoint_at(). */
752 PSI_stage_info	srv_stage_alter_table_flush
753 	= {0, "alter table (flush)", PSI_FLAG_STAGE_PROGRESS};
754 
755 /** Performance schema stage event for monitoring ALTER TABLE progress
756 row_merge_insert_index_tuples(). */
757 PSI_stage_info	srv_stage_alter_table_insert
758 	= {0, "alter table (insert)", PSI_FLAG_STAGE_PROGRESS};
759 
760 /** Performance schema stage event for monitoring ALTER TABLE progress
761 row_log_apply(). */
762 PSI_stage_info	srv_stage_alter_table_log_index
763 	= {0, "alter table (log apply index)", PSI_FLAG_STAGE_PROGRESS};
764 
765 /** Performance schema stage event for monitoring ALTER TABLE progress
766 row_log_table_apply(). */
767 PSI_stage_info	srv_stage_alter_table_log_table
768 	= {0, "alter table (log apply table)", PSI_FLAG_STAGE_PROGRESS};
769 
770 /** Performance schema stage event for monitoring ALTER TABLE progress
771 row_merge_sort(). */
772 PSI_stage_info	srv_stage_alter_table_merge_sort
773 	= {0, "alter table (merge sort)", PSI_FLAG_STAGE_PROGRESS};
774 
775 /** Performance schema stage event for monitoring ALTER TABLE progress
776 row_merge_read_clustered_index(). */
777 PSI_stage_info	srv_stage_alter_table_read_pk_internal_sort
778 	= {0, "alter table (read PK and internal sort)", PSI_FLAG_STAGE_PROGRESS};
779 
780 /** Performance schema stage event for monitoring buffer pool load progress. */
781 PSI_stage_info	srv_stage_buffer_pool_load
782 	= {0, "buffer pool load", PSI_FLAG_STAGE_PROGRESS};
783 #endif /* HAVE_PSI_STAGE_INTERFACE */
784 
785 static
786 void
787 srv_enable_undo_encryption_if_set();
788 
789 /*********************************************************************//**
790 Prints counters for work done by srv_master_thread. */
791 static
792 void
srv_print_master_thread_info(FILE * file)793 srv_print_master_thread_info(
794 /*=========================*/
795 	FILE  *file)    /* in: output stream */
796 {
797 	fprintf(file,
798 		"srv_master_thread loops: "
799 		ULINTPF " srv_active, "
800 		ULINTPF " srv_shutdown, "
801 		ULINTPF " srv_idle\n",
802 		srv_main_active_loops,
803 		srv_main_shutdown_loops,
804 		srv_main_idle_loops);
805 	fprintf(file,
806 		"srv_master_thread log flush and writes: " ULINTPF "\n",
807 		srv_log_writes_and_flush);
808 }
809 
810 /*********************************************************************//**
811 Sets the info describing an i/o thread current state. */
812 void
srv_set_io_thread_op_info(ulint i,const char * str)813 srv_set_io_thread_op_info(
814 /*======================*/
815 	ulint		i,	/*!< in: the 'segment' of the i/o thread */
816 	const char*	str)	/*!< in: constant char string describing the
817 				state */
818 {
819 	ut_a(i < SRV_MAX_N_IO_THREADS);
820 
821 	srv_io_thread_op_info[i] = str;
822 }
823 
824 /*********************************************************************//**
825 Resets the info describing an i/o thread current state. */
826 void
srv_reset_io_thread_op_info()827 srv_reset_io_thread_op_info()
828 /*=========================*/
829 {
830 	for (ulint i = 0; i < UT_ARR_SIZE(srv_io_thread_op_info); ++i) {
831 		srv_io_thread_op_info[i] = "not started yet";
832 	}
833 }
834 
835 #ifdef UNIV_DEBUG
836 /*********************************************************************//**
837 Validates the type of a thread table slot.
838 @return TRUE if ok */
839 static
840 ibool
srv_thread_type_validate(srv_thread_type type)841 srv_thread_type_validate(
842 /*=====================*/
843 	srv_thread_type	type)	/*!< in: thread type */
844 {
845 	switch (type) {
846 	case SRV_NONE:
847 		break;
848 	case SRV_WORKER:
849 	case SRV_PURGE:
850 	case SRV_MASTER:
851 		return(TRUE);
852 	}
853 	ut_error;
854 	return(FALSE);
855 }
856 #endif /* UNIV_DEBUG */
857 
858 /*********************************************************************//**
859 Gets the type of a thread table slot.
860 @return thread type */
861 static
862 srv_thread_type
srv_slot_get_type(const srv_slot_t * slot)863 srv_slot_get_type(
864 /*==============*/
865 	const srv_slot_t*	slot)	/*!< in: thread slot */
866 {
867 	srv_thread_type	type = slot->type;
868 	ut_ad(srv_thread_type_validate(type));
869 	return(type);
870 }
871 
872 /*********************************************************************//**
873 Reserves a slot in the thread table for the current thread.
874 @return reserved slot */
875 static
876 srv_slot_t*
srv_reserve_slot(srv_thread_type type)877 srv_reserve_slot(
878 /*=============*/
879 	srv_thread_type	type)	/*!< in: type of the thread */
880 {
881 	srv_slot_t*	slot = 0;
882 
883 	srv_sys_mutex_enter();
884 
885 	ut_ad(srv_thread_type_validate(type));
886 
887 	switch (type) {
888 	case SRV_MASTER:
889 		slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
890 		break;
891 
892 	case SRV_PURGE:
893 		slot = &srv_sys->sys_threads[SRV_PURGE_SLOT];
894 		break;
895 
896 	case SRV_WORKER:
897 		/* Find an empty slot, skip the master and purge slots. */
898 		for (slot = &srv_sys->sys_threads[2];
899 		     slot->in_use;
900 		     ++slot) {
901 
902 			ut_a(slot < &srv_sys->sys_threads[
903 			     srv_sys->n_sys_threads]);
904 		}
905 		break;
906 
907 	case SRV_NONE:
908 		ut_error;
909 	}
910 
911 	ut_a(!slot->in_use);
912 
913 	slot->in_use = TRUE;
914 	slot->suspended = FALSE;
915 	slot->type = type;
916 
917 	ut_ad(srv_slot_get_type(slot) == type);
918 
919 	++srv_sys->n_threads_active[type];
920 
921 	srv_sys_mutex_exit();
922 
923 	return(slot);
924 }
925 
926 /*********************************************************************//**
927 Suspends the calling thread to wait for the event in its thread slot.
928 @return the current signal count of the event. */
929 static
930 int64_t
srv_suspend_thread_low(srv_slot_t * slot)931 srv_suspend_thread_low(
932 /*===================*/
933 	srv_slot_t*	slot)	/*!< in/out: thread slot */
934 {
935 
936 	ut_ad(!srv_read_only_mode);
937 	ut_ad(srv_sys_mutex_own());
938 
939 	ut_ad(slot->in_use);
940 
941 	srv_thread_type	type = srv_slot_get_type(slot);
942 
943 	switch (type) {
944 	case SRV_NONE:
945 		ut_error;
946 
947 	case SRV_MASTER:
948 		/* We have only one master thread and it
949 		should be the first entry always. */
950 		ut_a(srv_sys->n_threads_active[type] == 1);
951 		break;
952 
953 	case SRV_PURGE:
954 		/* We have only one purge coordinator thread
955 		and it should be the second entry always. */
956 		ut_a(srv_sys->n_threads_active[type] == 1);
957 		break;
958 
959 	case SRV_WORKER:
960 		ut_a(srv_n_purge_threads > 1);
961 		ut_a(srv_sys->n_threads_active[type] > 0);
962 		break;
963 	}
964 
965 	ut_a(!slot->suspended);
966 	slot->suspended = TRUE;
967 
968 	ut_a(srv_sys->n_threads_active[type] > 0);
969 
970 	srv_sys->n_threads_active[type]--;
971 
972 	return(os_event_reset(slot->event));
973 }
974 
975 /*********************************************************************//**
976 Suspends the calling thread to wait for the event in its thread slot.
977 @return the current signal count of the event. */
978 static
979 int64_t
srv_suspend_thread(srv_slot_t * slot)980 srv_suspend_thread(
981 /*===============*/
982 	srv_slot_t*	slot)	/*!< in/out: thread slot */
983 {
984 	srv_sys_mutex_enter();
985 
986 	int64_t		sig_count = srv_suspend_thread_low(slot);
987 
988 	srv_sys_mutex_exit();
989 
990 	return(sig_count);
991 }
992 
993 /*********************************************************************//**
994 Releases threads of the type given from suspension in the thread table.
995 NOTE! The server mutex has to be reserved by the caller!
996 @return number of threads released: this may be less than n if not
997         enough threads were suspended at the moment. */
998 ulint
srv_release_threads(srv_thread_type type,ulint n)999 srv_release_threads(
1000 /*================*/
1001 	srv_thread_type	type,	/*!< in: thread type */
1002 	ulint		n)	/*!< in: number of threads to release */
1003 {
1004 	ulint		i;
1005 	ulint		count	= 0;
1006 
1007 	ut_ad(srv_thread_type_validate(type));
1008 	ut_ad(n > 0);
1009 
1010 	srv_sys_mutex_enter();
1011 
1012 	for (i = 0; i < srv_sys->n_sys_threads; i++) {
1013 		srv_slot_t*	slot;
1014 
1015 		slot = &srv_sys->sys_threads[i];
1016 
1017 		if (slot->in_use
1018 		    && srv_slot_get_type(slot) == type
1019 		    && slot->suspended) {
1020 
1021 			switch (type) {
1022 			case SRV_NONE:
1023 				ut_error;
1024 
1025 			case SRV_MASTER:
1026 				/* We have only one master thread and it
1027 				should be the first entry always. */
1028 				ut_a(n == 1);
1029 				ut_a(i == SRV_MASTER_SLOT);
1030 				ut_a(srv_sys->n_threads_active[type] == 0);
1031 				break;
1032 
1033 			case SRV_PURGE:
1034 				/* We have only one purge coordinator thread
1035 				and it should be the second entry always. */
1036 				ut_a(n == 1);
1037 				ut_a(i == SRV_PURGE_SLOT);
1038 				ut_a(srv_n_purge_threads > 0);
1039 				ut_a(srv_sys->n_threads_active[type] == 0);
1040 				break;
1041 
1042 			case SRV_WORKER:
1043 				ut_a(srv_n_purge_threads > 1);
1044 				ut_a(srv_sys->n_threads_active[type]
1045 				     < srv_n_purge_threads - 1);
1046 				break;
1047 			}
1048 
1049 			slot->suspended = FALSE;
1050 
1051 			++srv_sys->n_threads_active[type];
1052 
1053 			os_event_set(slot->event);
1054 
1055 			if (++count == n) {
1056 				break;
1057 			}
1058 		}
1059 	}
1060 
1061 	srv_sys_mutex_exit();
1062 
1063 	return(count);
1064 }
1065 
1066 /*********************************************************************//**
1067 Release a thread's slot. */
1068 static
1069 void
srv_free_slot(srv_slot_t * slot)1070 srv_free_slot(
1071 /*==========*/
1072 	srv_slot_t*	slot)	/*!< in/out: thread slot */
1073 {
1074 	srv_sys_mutex_enter();
1075 
1076 	if (!slot->suspended) {
1077 		/* Mark the thread as inactive. */
1078 		srv_suspend_thread_low(slot);
1079 	}
1080 
1081 	/* Free the slot for reuse. */
1082 	ut_ad(slot->in_use);
1083 	slot->in_use = FALSE;
1084 
1085 	srv_sys_mutex_exit();
1086 }
1087 
1088 /*********************************************************************//**
1089 Initializes the server. */
1090 void
srv_init(void)1091 srv_init(void)
1092 /*==========*/
1093 {
1094 	ulint	n_sys_threads = 0;
1095 	ulint	srv_sys_sz = sizeof(*srv_sys);
1096 
1097 	mutex_create(LATCH_ID_SRV_INNODB_MONITOR, &srv_innodb_monitor_mutex);
1098 
1099 	if (!srv_read_only_mode) {
1100 
1101 		/* Number of purge threads + master thread */
1102 		n_sys_threads = srv_n_purge_threads + 1;
1103 
1104 		srv_sys_sz += n_sys_threads * sizeof(*srv_sys->sys_threads);
1105 	}
1106 
1107 	srv_sys = static_cast<srv_sys_t*>(ut_zalloc_nokey(srv_sys_sz));
1108 
1109 	srv_sys->n_sys_threads = n_sys_threads;
1110 
1111 	/* Even in read-only mode we flush pages related to intrinsic table
1112 	and so mutex creation is needed. */
1113 	{
1114 
1115 		mutex_create(LATCH_ID_SRV_SYS, &srv_sys->mutex);
1116 
1117 		mutex_create(LATCH_ID_SRV_SYS_TASKS, &srv_sys->tasks_mutex);
1118 
1119 		srv_sys->sys_threads = (srv_slot_t*) &srv_sys[1];
1120 
1121 		for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
1122 			srv_slot_t*	slot = &srv_sys->sys_threads[i];
1123 
1124 			slot->event = os_event_create(0);
1125 
1126 			ut_a(slot->event);
1127 		}
1128 
1129 		srv_error_event = os_event_create(0);
1130 
1131 		srv_monitor_event = os_event_create(0);
1132 
1133 		srv_buf_dump_event = os_event_create(0);
1134 
1135 		buf_flush_event = os_event_create("buf_flush_event");
1136 
1137 		UT_LIST_INIT(srv_sys->tasks, &que_thr_t::queue);
1138 
1139 		srv_checkpoint_completed_event = os_event_create(0);
1140 
1141 		srv_redo_log_tracked_event = os_event_create(0);
1142 		os_event_set(srv_redo_log_tracked_event);
1143 	}
1144 
1145 	srv_buf_resize_event = os_event_create(0);
1146 
1147 	ut_d(srv_master_thread_disabled_event = os_event_create(0));
1148 
1149 	/* page_zip_stat_per_index_mutex is acquired from:
1150 	1. page_zip_compress() (after SYNC_FSP)
1151 	2. page_zip_decompress()
1152 	3. i_s_cmp_per_index_fill_low() (where SYNC_DICT is acquired)
1153 	4. innodb_cmp_per_index_update(), no other latches
1154 	since we do not acquire any other latches while holding this mutex,
1155 	it can have very low level. We pick SYNC_ANY_LATCH for it. */
1156 	mutex_create(LATCH_ID_PAGE_ZIP_STAT_PER_INDEX,
1157 		     &page_zip_stat_per_index_mutex);
1158 
1159 	/* Create dummy indexes for infimum and supremum records */
1160 
1161 	dict_ind_init();
1162 
1163 	/* Initialize some INFORMATION SCHEMA internal structures */
1164 	trx_i_s_cache_init(trx_i_s_cache);
1165 
1166 	ut_crc32_init();
1167 
1168 	dict_mem_init();
1169 }
1170 
1171 /*********************************************************************//**
1172 Frees the data structures created in srv_init(). */
1173 void
srv_free(void)1174 srv_free(void)
1175 /*==========*/
1176 {
1177 	mutex_free(&srv_innodb_monitor_mutex);
1178 	mutex_free(&page_zip_stat_per_index_mutex);
1179 
1180 	{
1181 		mutex_free(&srv_sys->mutex);
1182 		mutex_free(&srv_sys->tasks_mutex);
1183 
1184 		for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
1185 			srv_slot_t*	slot = &srv_sys->sys_threads[i];
1186 
1187 			os_event_destroy(slot->event);
1188 		}
1189 
1190 		os_event_destroy(srv_error_event);
1191 		os_event_destroy(srv_monitor_event);
1192 		os_event_destroy(srv_buf_dump_event);
1193 		os_event_destroy(buf_flush_event);
1194 		os_event_destroy(srv_checkpoint_completed_event);
1195 		os_event_destroy(srv_redo_log_tracked_event);
1196 	}
1197 
1198 	os_event_destroy(srv_buf_resize_event);
1199 
1200 #ifdef UNIV_DEBUG
1201 	os_event_destroy(srv_master_thread_disabled_event);
1202 	srv_master_thread_disabled_event = NULL;
1203 #endif /* UNIV_DEBUG */
1204 
1205 	trx_i_s_cache_free(trx_i_s_cache);
1206 
1207 	ut_free(srv_sys);
1208 
1209 	srv_sys = 0;
1210 }
1211 
1212 /*********************************************************************//**
1213 Initializes the synchronization primitives, memory system, and the thread
1214 local storage. */
1215 void
srv_general_init(void)1216 srv_general_init(void)
1217 /*==================*/
1218 {
1219 	sync_check_init();
1220 	/* Reset the system variables in the recovery module. */
1221 	recv_sys_var_init();
1222 	os_thread_init();
1223 	trx_pool_init();
1224 	que_init();
1225 	row_mysql_init();
1226 }
1227 
1228 /*********************************************************************//**
1229 Normalizes init parameter values to use units we use inside InnoDB. */
1230 static
1231 void
srv_normalize_init_values(void)1232 srv_normalize_init_values(void)
1233 /*===========================*/
1234 {
1235 	srv_sys_space.normalize();
1236 
1237 	srv_tmp_space.normalize();
1238 
1239 	srv_log_file_size /= UNIV_PAGE_SIZE;
1240 
1241 	srv_log_buffer_size /= UNIV_PAGE_SIZE;
1242 
1243 	srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
1244 }
1245 
1246 /*********************************************************************//**
1247 Boots the InnoDB server. */
1248 void
srv_boot(void)1249 srv_boot(void)
1250 /*==========*/
1251 {
1252 	/* Transform the init parameter values given by MySQL to
1253 	use units we use inside InnoDB: */
1254 
1255 	srv_normalize_init_values();
1256 
1257 	/* Initialize synchronization primitives, memory management, and thread
1258 	local storage */
1259 
1260 	srv_general_init();
1261 
1262 	/* Initialize this module */
1263 
1264 	srv_init();
1265 }
1266 
1267 /******************************************************************//**
1268 Refreshes the values used to calculate per-second averages. */
1269 static
1270 void
srv_refresh_innodb_monitor_stats(void)1271 srv_refresh_innodb_monitor_stats(void)
1272 /*==================================*/
1273 {
1274 	mutex_enter(&srv_innodb_monitor_mutex);
1275 
1276 	srv_last_monitor_time = ut_time_monotonic();
1277 
1278 	os_aio_refresh_stats();
1279 
1280 	btr_cur_n_sea_old = btr_cur_n_sea;
1281 	btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1282 
1283 	log_refresh_stats();
1284 
1285 	buf_refresh_io_stats_all();
1286 
1287 	srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
1288 	srv_n_rows_updated_old = srv_stats.n_rows_updated;
1289 	srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
1290 	srv_n_rows_read_old = srv_stats.n_rows_read;
1291 
1292 	mutex_exit(&srv_innodb_monitor_mutex);
1293 }
1294 
1295 /******************************************************************//**
1296 Outputs to a file the output of the InnoDB Monitor.
1297 @return FALSE if not all information printed
1298 due to failure to obtain necessary mutex */
1299 ibool
srv_printf_innodb_monitor(FILE * file,ibool nowait,ulint * trx_start_pos,ulint * trx_end)1300 srv_printf_innodb_monitor(
1301 /*======================*/
1302 	FILE*	file,		/*!< in: output stream */
1303 	ibool	nowait,		/*!< in: whether to wait for the
1304 				lock_sys_t:: mutex */
1305 	ulint*	trx_start_pos,	/*!< out: file position of the start of
1306 				the list of active transactions */
1307 	ulint*	trx_end)	/*!< out: file position of the end of
1308 				the list of active transactions */
1309 {
1310 	double 			time_elapsed;
1311 	ib_time_monotonic_t	current_time;
1312 	ulint	n_reserved;
1313 	ibool	ret;
1314 
1315 	ulong	btr_search_sys_constant;
1316 	ulong	btr_search_sys_variable;
1317 	ulint	lock_sys_subtotal;
1318 	ulint	recv_sys_subtotal;
1319 	size_t	dict_sys_hash_size;
1320 	ulint	dict_sys_size;
1321 
1322 	trx_t*	trx;
1323 
1324 	mutex_enter(&srv_innodb_monitor_mutex);
1325 
1326 	current_time = ut_time_monotonic();
1327 
1328 	/* We add 0.001 seconds to time_elapsed to prevent division
1329 	by zero if two users happen to call SHOW ENGINE INNODB STATUS at the
1330 	same time */
1331 
1332 	time_elapsed = current_time - srv_last_monitor_time + 0.001;
1333 
1334 	srv_last_monitor_time = ut_time_monotonic();
1335 
1336 	fputs("\n=====================================\n", file);
1337 
1338 	ut_print_timestamp(file);
1339 	fprintf(file,
1340 		" INNODB MONITOR OUTPUT\n"
1341 		"=====================================\n"
1342 		"Per second averages calculated from the last %lu seconds\n",
1343 		(ulong) time_elapsed);
1344 
1345 	fputs("-----------------\n"
1346 	      "BACKGROUND THREAD\n"
1347 	      "-----------------\n", file);
1348 	srv_print_master_thread_info(file);
1349 
1350 	fputs("----------\n"
1351 	      "SEMAPHORES\n"
1352 	      "----------\n", file);
1353 
1354 	sync_print(file);
1355 
1356 	/* Conceptually, srv_innodb_monitor_mutex has a very high latching
1357 	order level in sync0sync.h, while dict_foreign_err_mutex has a very
1358 	low level 135. Therefore we can reserve the latter mutex here without
1359 	a danger of a deadlock of threads. */
1360 
1361 	if (!recv_recovery_on) {
1362 
1363 		mutex_enter(&dict_foreign_err_mutex);
1364 
1365 		if (!srv_read_only_mode
1366 		    && ftell(dict_foreign_err_file) != 0L) {
1367 			fputs("------------------------\n"
1368 			      "LATEST FOREIGN KEY ERROR\n"
1369 			      "------------------------\n", file);
1370 			ut_copy_file(file, dict_foreign_err_file);
1371 		}
1372 
1373 		mutex_exit(&dict_foreign_err_mutex);
1374 	}
1375 
1376 	/* Only if lock_print_info_summary proceeds correctly,
1377 	before we call the lock_print_info_all_transactions
1378 	to print all the lock information. IMPORTANT NOTE: This
1379 	function acquires the lock mutex on success. */
1380 	ret = recv_recovery_on ? FALSE : lock_print_info_summary(file, nowait);
1381 
1382 	if (ret) {
1383 		if (trx_start_pos) {
1384 			long	t = ftell(file);
1385 			if (t < 0) {
1386 				*trx_start_pos = ULINT_UNDEFINED;
1387 			} else {
1388 				*trx_start_pos = (ulint) t;
1389 			}
1390 		}
1391 
1392 		/* NOTE: If we get here then we have the lock mutex. This
1393 		function will release the lock mutex that we acquired when
1394 		we called the lock_print_info_summary() function earlier. */
1395 
1396 		lock_print_info_all_transactions(file);
1397 
1398 		if (trx_end) {
1399 			long	t = ftell(file);
1400 			if (t < 0) {
1401 				*trx_end = ULINT_UNDEFINED;
1402 			} else {
1403 				*trx_end = (ulint) t;
1404 			}
1405 		}
1406 	}
1407 
1408 	fputs("--------\n"
1409 	      "FILE I/O\n"
1410 	      "--------\n", file);
1411 	os_aio_print(file);
1412 
1413 	if (!recv_recovery_on) {
1414 
1415 		fputs("-------------------------------------\n"
1416 		      "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1417 		      "-------------------------------------\n", file);
1418 		ibuf_print(file);
1419 	}
1420 
1421 	for (ulint i = 0; i < btr_ahi_parts; ++i) {
1422 		rw_lock_s_lock(btr_search_latches[i]);
1423 		ha_print_info(file, btr_search_sys->hash_tables[i]);
1424 		rw_lock_s_unlock(btr_search_latches[i]);
1425 	}
1426 
1427 	fprintf(file,
1428 		"%.2f hash searches/s, %.2f non-hash searches/s\n",
1429 		(btr_cur_n_sea - btr_cur_n_sea_old)
1430 		/ time_elapsed,
1431 		(btr_cur_n_non_sea - btr_cur_n_non_sea_old)
1432 		/ time_elapsed);
1433 	btr_cur_n_sea_old = btr_cur_n_sea;
1434 	btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1435 
1436 	if (!recv_recovery_on) {
1437 
1438 		fputs("---\n"
1439 		      "LOG\n"
1440 		      "---\n", file);
1441 		log_print(file);
1442 	}
1443 
1444 	fputs("----------------------\n"
1445 	      "BUFFER POOL AND MEMORY\n"
1446 	      "----------------------\n", file);
1447 	fprintf(file,
1448 		"Total large memory allocated " ULINTPF "\n"
1449 		"Dictionary memory allocated " ULINTPF "\n",
1450 		os_total_large_mem_allocated, dict_sys ? dict_sys->size : 0UL);
1451 
1452 	/* Calculate AHI constant and variable memory allocations */
1453 
1454 	btr_search_sys_constant = btr_search_sys_constant_mem;
1455 	os_rmb;
1456 	btr_search_sys_variable = btr_search_sys_variable_mem;
1457 
1458 	lock_sys_subtotal = 0;
1459 	if (trx_sys) {
1460 		mutex_enter(&trx_sys->mutex);
1461 		trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
1462 		while (trx) {
1463 			lock_sys_subtotal
1464 				+= ((trx->lock.lock_heap)
1465 				    ? mem_heap_get_size(trx->lock.lock_heap)
1466 				    : 0);
1467 			trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
1468 		}
1469 		mutex_exit(&trx_sys->mutex);
1470 	}
1471 
1472 	recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash)
1473 			? mem_heap_get_size(recv_sys->heap) : 0);
1474 
1475 	dict_sys_hash_size = dict_sys ? dict_sys->hash_size : 0;
1476 	dict_sys_size = dict_sys ? dict_sys->size : 0;
1477 
1478 	fprintf(file,
1479 			"Internal hash tables (constant factor + variable factor)\n"
1480 			"    Adaptive hash index %lu \t(%lu + " ULINTPF ")\n"
1481 			"    Page hash           %lu (buffer pool 0 only)\n"
1482 			"    Dictionary cache    %lu \t(%lu + " ULINTPF ")\n"
1483 			"    File system         %lu \t(%lu + " ULINTPF ")\n"
1484 			"    Lock system         %lu \t(%lu + " ULINTPF ")\n"
1485 			"    Recovery system     %lu \t(%lu + " ULINTPF ")\n",
1486 
1487 			btr_search_sys_constant + btr_search_sys_variable,
1488 			btr_search_sys_constant,
1489 			btr_search_sys_variable,
1490 
1491 			(ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t)),
1492 
1493 			(ulong) (dict_sys_hash_size + dict_sys_size),
1494 			(ulong) (dict_sys_hash_size),
1495 			(ulong) (dict_sys_size),
1496 
1497 			(ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)
1498 					+ fil_system_hash_nodes()),
1499 			(ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)),
1500 			fil_system_hash_nodes(),
1501 
1502 			(ulong) ((lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0)
1503 					+ lock_sys_subtotal),
1504 			(ulong) (lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0),
1505 			lock_sys_subtotal,
1506 
1507 			(ulong) (((recv_sys && recv_sys->addr_hash)
1508 						? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0)
1509 					+ recv_sys_subtotal),
1510 			(ulong) ((recv_sys && recv_sys->addr_hash)
1511 					? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0),
1512 			recv_sys_subtotal);
1513 
1514 	buf_print_io(file);
1515 
1516 	fputs("--------------\n"
1517 	      "ROW OPERATIONS\n"
1518 	      "--------------\n", file);
1519 	fprintf(file,
1520 		ULINTPF " queries inside InnoDB, "
1521 		ULINTPF " queries in queue\n",
1522 		srv_conc_get_active_threads(),
1523 		srv_conc_get_waiting_threads());
1524 
1525 	/* This is a dirty read, without holding trx_sys->mutex. */
1526 	fprintf(file,
1527 		ULINTPF " read views open inside InnoDB\n",
1528 		trx_sys->mvcc->size());
1529 
1530 	mutex_enter(&trx_sys->mutex);
1531 
1532 	fprintf(file, "%lu RW transactions active inside InnoDB\n",
1533 		UT_LIST_GET_LEN(trx_sys->rw_trx_list));
1534 
1535 	ReadView*	oldest_view = trx_sys->mvcc->get_oldest_view();
1536 	if (oldest_view) {
1537 
1538 		fprintf(file, "---OLDEST VIEW---\n");
1539 		oldest_view->print(file);
1540 		fprintf(file, "-----------------\n");
1541 	}
1542 
1543 	mutex_exit(&trx_sys->mutex);
1544 
1545 	n_reserved = fil_space_get_n_reserved_extents(0);
1546 	if (n_reserved > 0) {
1547 		fprintf(file,
1548 			ULINTPF " tablespace extents now reserved for"
1549 			" B-tree split operations\n",
1550 			n_reserved);
1551 	}
1552 
1553 	fprintf(file,
1554 		"Process ID=" ULINTPF
1555 		", Main thread ID=" ULINTPF
1556 		", state: %s\n",
1557 		srv_main_thread_process_no,
1558 		srv_main_thread_id,
1559 		srv_main_thread_op_info);
1560 	fprintf(file,
1561 		"Number of rows inserted " ULINTPF
1562 		", updated " ULINTPF
1563 		", deleted " ULINTPF
1564 		", read " ULINTPF "\n",
1565 		(ulint) srv_stats.n_rows_inserted,
1566 		(ulint) srv_stats.n_rows_updated,
1567 		(ulint) srv_stats.n_rows_deleted,
1568 		(ulint) srv_stats.n_rows_read);
1569 	fprintf(file,
1570 		"%.2f inserts/s, %.2f updates/s,"
1571 		" %.2f deletes/s, %.2f reads/s\n",
1572 		((ulint) srv_stats.n_rows_inserted - srv_n_rows_inserted_old)
1573 		/ time_elapsed,
1574 		((ulint) srv_stats.n_rows_updated - srv_n_rows_updated_old)
1575 		/ time_elapsed,
1576 		((ulint) srv_stats.n_rows_deleted - srv_n_rows_deleted_old)
1577 		/ time_elapsed,
1578 		((ulint) srv_stats.n_rows_read - srv_n_rows_read_old)
1579 		/ time_elapsed);
1580 
1581 	srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
1582 	srv_n_rows_updated_old = srv_stats.n_rows_updated;
1583 	srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
1584 	srv_n_rows_read_old = srv_stats.n_rows_read;
1585 
1586 	fputs("----------------------------\n"
1587 	      "END OF INNODB MONITOR OUTPUT\n"
1588 	      "============================\n", file);
1589 	mutex_exit(&srv_innodb_monitor_mutex);
1590 	fflush(file);
1591 
1592 #ifndef NDEBUG
1593 	srv_debug_monitor_printed = true;
1594 #endif
1595 
1596 	return(ret);
1597 }
1598 
1599 /******************************************************************//**
1600 Function to pass InnoDB status variables to MySQL */
1601 void
srv_export_innodb_status(void)1602 srv_export_innodb_status(void)
1603 /*==========================*/
1604 {
1605 	buf_pool_stat_t		stat;
1606 	buf_pools_list_size_t	buf_pools_list_size;
1607 	ulint			LRU_len;
1608 	ulint			free_len;
1609 	ulint			flush_list_len;
1610 	fil_crypt_stat_t	crypt_stat;
1611 	btr_scrub_stat_t	scrub_stat;
1612 	ulint			mem_adaptive_hash, mem_dictionary;
1613 	ReadView*		oldest_view;
1614 	ulint			i;
1615 
1616 	buf_get_total_stat(&stat);
1617 	buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
1618 	buf_get_total_list_size_in_bytes(&buf_pools_list_size);
1619 	if (!srv_read_only_mode) {
1620 		fil_crypt_total_stat(&crypt_stat);
1621 		btr_scrub_total_stat(&scrub_stat);
1622 	}
1623 
1624 	os_rmb;
1625 	mem_adaptive_hash
1626 		= btr_search_sys_constant_mem + btr_search_sys_variable_mem;
1627 
1628 	mem_dictionary = dict_sys->hash_size + dict_sys->size;
1629 
1630 	mutex_enter(&srv_innodb_monitor_mutex);
1631 
1632 	export_vars.innodb_data_pending_reads =
1633 		os_n_pending_reads;
1634 
1635 	export_vars.innodb_data_pending_writes =
1636 		os_n_pending_writes;
1637 
1638 	export_vars.innodb_data_pending_fsyncs =
1639 		fil_n_pending_log_flushes
1640 		+ fil_n_pending_tablespace_flushes;
1641 	export_vars.innodb_adaptive_hash_hash_searches
1642 		= btr_cur_n_sea;
1643 	export_vars.innodb_adaptive_hash_non_hash_searches
1644 		= btr_cur_n_non_sea;
1645 	export_vars.innodb_background_log_sync
1646 		= srv_log_writes_and_flush;
1647 
1648 	export_vars.innodb_data_fsyncs = os_n_fsyncs;
1649 
1650 	export_vars.innodb_data_read = srv_stats.data_read;
1651 
1652 	export_vars.innodb_data_reads = os_n_file_reads;
1653 
1654 	export_vars.innodb_data_writes = os_n_file_writes;
1655 
1656 	export_vars.innodb_data_written = srv_stats.data_written;
1657 
1658 	export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
1659 
1660 	export_vars.innodb_buffer_pool_write_requests =
1661 		srv_stats.buf_pool_write_requests;
1662 
1663 	export_vars.innodb_buffer_pool_wait_free =
1664 		srv_stats.buf_pool_wait_free;
1665 
1666 	export_vars.innodb_buffer_pool_pages_flushed =
1667 		srv_stats.buf_pool_flushed;
1668 
1669 	export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads;
1670 
1671 	export_vars.innodb_buffer_pool_read_ahead_rnd =
1672 		stat.n_ra_pages_read_rnd;
1673 
1674 	export_vars.innodb_buffer_pool_read_ahead =
1675 		stat.n_ra_pages_read;
1676 
1677 	export_vars.innodb_buffer_pool_read_ahead_evicted =
1678 		stat.n_ra_pages_evicted;
1679 
1680 	export_vars.innodb_buffer_pool_pages_LRU_flushed =
1681 		stat.buf_lru_flush_page_count;
1682 
1683 	export_vars.innodb_buffer_pool_pages_data = LRU_len;
1684 
1685 	export_vars.innodb_buffer_pool_bytes_data =
1686 		buf_pools_list_size.LRU_bytes
1687 		+ buf_pools_list_size.unzip_LRU_bytes;
1688 
1689 	export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
1690 
1691 	export_vars.innodb_buffer_pool_bytes_dirty =
1692 		buf_pools_list_size.flush_list_bytes;
1693 
1694 	export_vars.innodb_buffer_pool_pages_free = free_len;
1695 
1696 #ifdef UNIV_DEBUG
1697 	export_vars.innodb_buffer_pool_pages_latched =
1698 		buf_get_latched_pages_number();
1699 #endif /* UNIV_DEBUG */
1700 	export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
1701 
1702 	export_vars.innodb_buffer_pool_pages_misc =
1703 		buf_pool_get_n_pages() - LRU_len - free_len;
1704 
1705 	export_vars.innodb_buffer_pool_pages_made_young
1706 		= stat.n_pages_made_young;
1707 	export_vars.innodb_buffer_pool_pages_made_not_young
1708 		= stat.n_pages_not_made_young;
1709 	export_vars.innodb_buffer_pool_pages_old = 0;
1710 	for (i = 0; i < srv_buf_pool_instances; i++) {
1711 		buf_pool_t*	buf_pool = buf_pool_from_array(i);
1712 		export_vars.innodb_buffer_pool_pages_old
1713 			+= buf_pool->LRU_old_len;
1714 	}
1715 	export_vars.innodb_checkpoint_age
1716 		= (log_sys->lsn - log_sys->last_checkpoint_lsn);
1717 	export_vars.innodb_checkpoint_max_age
1718 		= log_sys->max_checkpoint_age;
1719 	ibuf_export_ibuf_status(
1720 			&export_vars.innodb_ibuf_free_list,
1721 			&export_vars.innodb_ibuf_segment_size);
1722 	export_vars.innodb_lsn_current
1723 		= log_sys->lsn;
1724 	export_vars.innodb_lsn_flushed
1725 		= log_sys->flushed_to_disk_lsn;
1726 	export_vars.innodb_lsn_last_checkpoint
1727 		= log_sys->last_checkpoint_lsn;
1728 	export_vars.innodb_master_thread_active_loops
1729 		= srv_main_active_loops;
1730 	export_vars.innodb_master_thread_idle_loops
1731 		= srv_main_idle_loops;
1732 	export_vars.innodb_max_trx_id
1733 		= trx_sys->max_trx_id;
1734 	export_vars.innodb_mem_adaptive_hash
1735 		= mem_adaptive_hash;
1736 	export_vars.innodb_mem_dictionary
1737 		= mem_dictionary;
1738 
1739 	mutex_enter(&trx_sys->mutex);
1740 	oldest_view = trx_sys->mvcc->get_oldest_view();
1741 	mutex_exit(&trx_sys->mutex);
1742 	export_vars.innodb_oldest_view_low_limit_trx_id
1743 		= oldest_view ? oldest_view->low_limit_id() : 0;
1744 
1745 	export_vars.innodb_purge_trx_id = purge_sys->limit.trx_no;
1746 	export_vars.innodb_purge_undo_no = purge_sys->limit.undo_no;
1747 
1748 	export_vars.innodb_page_size = UNIV_PAGE_SIZE;
1749 
1750 	export_vars.innodb_log_waits = srv_stats.log_waits;
1751 
1752 	export_vars.innodb_os_log_written = srv_stats.os_log_written;
1753 
1754 	export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
1755 
1756 	export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
1757 
1758 	export_vars.innodb_os_log_pending_writes =
1759 		srv_stats.os_log_pending_writes;
1760 
1761 	export_vars.innodb_log_write_requests = srv_stats.log_write_requests;
1762 
1763 	export_vars.innodb_log_writes = srv_stats.log_writes;
1764 
1765 	export_vars.innodb_dblwr_pages_written =
1766 		srv_stats.dblwr_pages_written;
1767 
1768 	export_vars.innodb_dblwr_writes = srv_stats.dblwr_writes;
1769 
1770 	export_vars.innodb_pages_created = stat.n_pages_created;
1771 
1772 	export_vars.innodb_pages_read = stat.n_pages_read;
1773 	export_vars.innodb_page0_read = srv_stats.page0_read;
1774 
1775 	export_vars.innodb_pages_written = stat.n_pages_written;
1776 
1777 	export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count;
1778 
1779 	export_vars.innodb_row_lock_current_waits =
1780 		srv_stats.n_lock_wait_current_count;
1781 
1782 	export_vars.innodb_row_lock_time = srv_stats.n_lock_wait_time / 1000;
1783 
1784 	if (srv_stats.n_lock_wait_count > 0) {
1785 
1786 		export_vars.innodb_row_lock_time_avg = (ulint)
1787 			(srv_stats.n_lock_wait_time
1788 			 / 1000 / srv_stats.n_lock_wait_count);
1789 
1790 	} else {
1791 		export_vars.innodb_row_lock_time_avg = 0;
1792 	}
1793 
1794 	export_vars.innodb_row_lock_time_max =
1795 		lock_sys->n_lock_max_wait_time / 1000;
1796 
1797 	export_vars.innodb_rows_read = srv_stats.n_rows_read;
1798 
1799 	export_vars.innodb_rows_inserted = srv_stats.n_rows_inserted;
1800 
1801 	export_vars.innodb_rows_updated = srv_stats.n_rows_updated;
1802 
1803 	export_vars.innodb_rows_deleted = srv_stats.n_rows_deleted;
1804 
1805 	export_vars.innodb_num_open_files = fil_n_file_opened;
1806 
1807 	export_vars.innodb_truncated_status_writes =
1808 		srv_truncated_status_writes;
1809 
1810 	export_vars.innodb_available_undo_logs = srv_available_undo_logs;
1811 
1812 	export_vars.innodb_pages_decrypted = srv_stats.pages_decrypted;
1813 	export_vars.innodb_pages_encrypted = srv_stats.pages_encrypted;
1814 
1815 	export_vars.innodb_n_merge_blocks_encrypted =
1816 		srv_stats.n_merge_blocks_encrypted;
1817 
1818 	export_vars.innodb_n_merge_blocks_decrypted =
1819 		srv_stats.n_merge_blocks_decrypted;
1820 
1821 	export_vars.innodb_n_rowlog_blocks_encrypted =
1822 		srv_stats.n_rowlog_blocks_encrypted;
1823 
1824 	export_vars.innodb_n_rowlog_blocks_decrypted =
1825 		srv_stats.n_rowlog_blocks_decrypted;
1826 
1827 #ifdef UNIV_DEBUG
1828 	rw_lock_s_lock(&purge_sys->latch);
1829 	trx_id_t	up_limit_id;
1830 	trx_id_t	done_trx_no	= purge_sys->done.trx_no;
1831 
1832 	up_limit_id	= purge_sys->view_active
1833 		? purge_sys->view.up_limit_id() : 0;
1834 
1835 	rw_lock_s_unlock(&purge_sys->latch);
1836 
1837 	mutex_enter(&trx_sys->mutex);
1838 	trx_id_t	max_trx_id	= trx_sys->rw_max_trx_id;
1839 	mutex_exit(&trx_sys->mutex);
1840 
1841 	if (!done_trx_no || max_trx_id < done_trx_no - 1) {
1842 		export_vars.innodb_purge_trx_id_age = 0;
1843 	} else {
1844 		export_vars.innodb_purge_trx_id_age =
1845 			(ulint) (max_trx_id - done_trx_no + 1);
1846 	}
1847 
1848 	if (!up_limit_id
1849 	    || max_trx_id < up_limit_id) {
1850 		export_vars.innodb_purge_view_trx_id_age = 0;
1851 	} else {
1852 		export_vars.innodb_purge_view_trx_id_age =
1853 			(ulint) (max_trx_id - up_limit_id);
1854 	}
1855 #endif /* UNIV_DEBUG */
1856 
1857 	os_rmb;
1858 	export_vars.innodb_sec_rec_cluster_reads =
1859 		srv_sec_rec_cluster_reads;
1860 	export_vars.innodb_sec_rec_cluster_reads_avoided =
1861 		srv_sec_rec_cluster_reads_avoided;
1862 
1863 	export_vars.innodb_buffered_aio_submitted =
1864 		srv_stats.n_aio_submitted;
1865 
1866 	thd_get_fragmentation_stats(current_thd,
1867 		&export_vars.innodb_fragmentation_stats);
1868 
1869 	if (!srv_read_only_mode) {
1870 	export_vars.innodb_encryption_rotation_pages_read_from_cache =
1871 		crypt_stat.pages_read_from_cache;
1872 	export_vars.innodb_encryption_rotation_pages_read_from_disk =
1873 		crypt_stat.pages_read_from_disk;
1874 	export_vars.innodb_encryption_rotation_pages_modified =
1875 		crypt_stat.pages_modified;
1876 	export_vars.innodb_encryption_rotation_pages_flushed =
1877 		crypt_stat.pages_flushed;
1878 	export_vars.innodb_encryption_rotation_estimated_iops =
1879 		crypt_stat.estimated_iops;
1880 	export_vars.innodb_encryption_key_requests =
1881 		srv_stats.n_key_requests;
1882 	export_vars.innodb_key_rotation_list_length =
1883 		srv_stats.key_rotation_list_length;
1884 
1885 	export_vars.innodb_scrub_page_reorganizations =
1886 		scrub_stat.page_reorganizations;
1887 	export_vars.innodb_scrub_page_splits =
1888 		scrub_stat.page_splits;
1889 	export_vars.innodb_scrub_page_split_failures_underflow =
1890 		scrub_stat.page_split_failures_underflow;
1891 	export_vars.innodb_scrub_page_split_failures_out_of_filespace =
1892 		scrub_stat.page_split_failures_out_of_filespace;
1893 	export_vars.innodb_scrub_page_split_failures_missing_index =
1894 		scrub_stat.page_split_failures_missing_index;
1895 	export_vars.innodb_scrub_page_split_failures_unknown =
1896 		scrub_stat.page_split_failures_unknown;
1897 	export_vars.innodb_scrub_log = srv_stats.n_log_scrubs;
1898 
1899 	export_vars.innodb_redo_key_version
1900 		= srv_redo_log_key_version;
1901         }
1902 
1903 	mutex_exit(&srv_innodb_monitor_mutex);
1904 }
1905 
1906 #ifndef NDEBUG
1907 /** false before InnoDB monitor has been printed at least once, true
1908 afterwards */
1909 bool	srv_debug_monitor_printed	= false;
1910 #endif
1911 
1912 /*********************************************************************//**
1913 A thread which prints the info output by various InnoDB monitors.
1914 @return a dummy parameter */
1915 extern "C"
1916 os_thread_ret_t
DECLARE_THREAD(srv_monitor_thread)1917 DECLARE_THREAD(srv_monitor_thread)(
1918 /*===============================*/
1919 	void*	arg MY_ATTRIBUTE((unused)))
1920 			/*!< in: a dummy parameter required by
1921 			os_thread_create */
1922 {
1923 	int64_t		sig_count;
1924 	ib_time_monotonic_t		time_elapsed;
1925 	ib_time_monotonic_t		current_time;
1926 	ib_time_monotonic_t		last_monitor_time;
1927 	ulint		mutex_skipped;
1928 	ibool		last_srv_print_monitor;
1929 
1930 	ut_ad(!srv_read_only_mode);
1931 
1932 #ifdef UNIV_DEBUG_THREAD_CREATION
1933 	ib::info() << "Lock timeout thread starts, id "
1934 		<< os_thread_pf(os_thread_get_curr_id());
1935 #endif /* UNIV_DEBUG_THREAD_CREATION */
1936 
1937 #ifdef UNIV_PFS_THREAD
1938 	pfs_register_thread(srv_monitor_thread_key);
1939 #endif /* UNIV_PFS_THREAD */
1940 	srv_monitor_active = TRUE;
1941 
1942 	UT_NOT_USED(arg);
1943 	srv_last_monitor_time = last_monitor_time = ut_time_monotonic();
1944 	mutex_skipped = 0;
1945 	last_srv_print_monitor = srv_print_innodb_monitor;
1946 loop:
1947 	/* Wake up every 5 seconds to see if we need to print
1948 	monitor information or if signalled at shutdown. */
1949 
1950 	sig_count = os_event_reset(srv_monitor_event);
1951 
1952 	os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
1953 
1954 	current_time = ut_time_monotonic();
1955 
1956 	time_elapsed = current_time - last_monitor_time;
1957 
1958 	if (time_elapsed > 15) {
1959 		last_monitor_time = ut_time_monotonic();
1960 
1961 		if (srv_print_innodb_monitor) {
1962 			/* Reset mutex_skipped counter everytime
1963 			srv_print_innodb_monitor changes. This is to
1964 			ensure we will not be blocked by lock_sys->mutex
1965 			for short duration information printing,
1966 			such as requested by sync_array_print_long_waits() */
1967 			if (!last_srv_print_monitor) {
1968 				mutex_skipped = 0;
1969 				last_srv_print_monitor = TRUE;
1970 			}
1971 
1972 			if (!srv_printf_innodb_monitor(stderr,
1973 						MUTEX_NOWAIT(mutex_skipped),
1974 						NULL, NULL)) {
1975 				mutex_skipped++;
1976 			} else {
1977 				/* Reset the counter */
1978 				mutex_skipped = 0;
1979 			}
1980 		} else {
1981 			last_srv_print_monitor = FALSE;
1982 		}
1983 
1984 
1985 		/* We don't create the temp files or associated
1986 		mutexes in read-only-mode */
1987 
1988 		if (!srv_read_only_mode && srv_innodb_status) {
1989 			mutex_enter(&srv_monitor_file_mutex);
1990 			rewind(srv_monitor_file);
1991 			if (!srv_printf_innodb_monitor(srv_monitor_file,
1992 						MUTEX_NOWAIT(mutex_skipped),
1993 						NULL, NULL)) {
1994 				mutex_skipped++;
1995 			} else {
1996 				mutex_skipped = 0;
1997 			}
1998 
1999 			os_file_set_eof(srv_monitor_file);
2000 			mutex_exit(&srv_monitor_file_mutex);
2001 		}
2002 	}
2003 
2004 	if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
2005 		goto exit_func;
2006 	}
2007 
2008 	if (srv_print_innodb_monitor || srv_print_innodb_lock_monitor) {
2009 		goto loop;
2010 	}
2011 
2012 	goto loop;
2013 
2014 exit_func:
2015 	srv_monitor_active = FALSE;
2016 
2017 	/* We count the number of threads in os_thread_exit(). A created
2018 	thread should always use that to exit and not use return() to exit. */
2019 
2020 	os_thread_exit();
2021 
2022 	OS_THREAD_DUMMY_RETURN;
2023 }
2024 
2025 /*********************************************************************//**
2026 A thread which prints warnings about semaphore waits which have lasted
2027 too long. These can be used to track bugs which cause hangs.
2028 @return a dummy parameter */
2029 extern "C"
2030 os_thread_ret_t
DECLARE_THREAD(srv_error_monitor_thread)2031 DECLARE_THREAD(srv_error_monitor_thread)(
2032 /*=====================================*/
2033 	void*	arg MY_ATTRIBUTE((unused)))
2034 			/*!< in: a dummy parameter required by
2035 			os_thread_create */
2036 {
2037 	/* number of successive fatal timeouts observed */
2038 	ulint		fatal_cnt	= 0;
2039 	lsn_t		old_lsn;
2040 	lsn_t		new_lsn;
2041 	int64_t		sig_count;
2042 	/* longest waiting thread for a semaphore */
2043 	os_thread_id_t	waiter		= os_thread_get_curr_id();
2044 	os_thread_id_t	old_waiter	= waiter;
2045 	/* the semaphore that is being waited for */
2046 	const void*	sema		= NULL;
2047 	const void*	old_sema	= NULL;
2048 
2049 	ut_ad(!srv_read_only_mode);
2050 
2051 	old_lsn = srv_start_lsn;
2052 
2053 #ifdef UNIV_DEBUG_THREAD_CREATION
2054 	ib::info() << "Error monitor thread starts, id "
2055 		<< os_thread_pf(os_thread_get_curr_id());
2056 #endif /* UNIV_DEBUG_THREAD_CREATION */
2057 
2058 #ifdef UNIV_PFS_THREAD
2059 	pfs_register_thread(srv_error_monitor_thread_key);
2060 #endif /* UNIV_PFS_THREAD */
2061 	srv_error_monitor_active = TRUE;
2062 
2063 loop:
2064 	/* Try to track a strange bug reported by Harald Fuchs and others,
2065 	where the lsn seems to decrease at times */
2066 
2067 	new_lsn = log_get_lsn();
2068 
2069 	if (new_lsn < old_lsn) {
2070 		ib::error() << "Old log sequence number " << old_lsn << " was"
2071 			<< " greater than the new log sequence number "
2072 			<< new_lsn << ". Please submit a bug report to"
2073 			" http://bugs.mysql.com";
2074 		ut_ad(0);
2075 	}
2076 
2077 	old_lsn = new_lsn;
2078 
2079 	if (ut_difftime(ut_time_monotonic(), srv_last_monitor_time) > 60) {
2080 		/* We referesh InnoDB Monitor values so that averages are
2081 		printed from at most 60 last seconds */
2082 
2083 		srv_refresh_innodb_monitor_stats();
2084 	}
2085 
2086 	/* Update the statistics collected for deciding LRU
2087 	eviction policy. */
2088 	buf_LRU_stat_update();
2089 
2090 	if (sync_array_print_long_waits(&waiter, &sema)
2091 	    && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
2092 		fatal_cnt++;
2093 		if (fatal_cnt > 10) {
2094 			ib::fatal() << "Semaphore wait has lasted > "
2095 				<< srv_fatal_semaphore_wait_threshold
2096 				<< " seconds. We intentionally crash the"
2097 				" server because it appears to be hung.";
2098 		}
2099 	} else {
2100 		fatal_cnt = 0;
2101 		old_waiter = waiter;
2102 		old_sema = sema;
2103 	}
2104 
2105 	/* Flush stderr so that a database user gets the output
2106 	to possible MySQL error file */
2107 
2108 	fflush(stderr);
2109 
2110 	sig_count = os_event_reset(srv_error_event);
2111 
2112 	os_event_wait_time_low(srv_error_event, 1000000, sig_count);
2113 
2114 	if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
2115 
2116 		goto loop;
2117 	}
2118 
2119 	srv_error_monitor_active = FALSE;
2120 
2121 	/* We count the number of threads in os_thread_exit(). A created
2122 	thread should always use that to exit and not use return() to exit. */
2123 
2124 	os_thread_exit();
2125 
2126 	OS_THREAD_DUMMY_RETURN;
2127 }
2128 
2129 /******************************************************************//**
2130 Increment the server activity count. */
2131 void
srv_inc_activity_count(bool ibuf_merge_activity)2132 srv_inc_activity_count(
2133 /*===================*/
2134 	bool ibuf_merge_activity)	/*!< whether this activity bump
2135 					is caused by the background
2136 					change buffer merge */
2137 {
2138 	srv_sys->activity_count.inc();
2139 	if (ibuf_merge_activity)
2140 		srv_sys->ibuf_merge_activity_count.inc();
2141 }
2142 
2143 /**********************************************************************//**
2144 Check whether any background thread is active. If so return the thread
2145 type.
2146 @return SRV_NONE if all are suspended or have exited, thread
2147 type if any are still active. */
2148 srv_thread_type
srv_get_active_thread_type(void)2149 srv_get_active_thread_type(void)
2150 /*============================*/
2151 {
2152 	srv_thread_type ret = SRV_NONE;
2153 
2154 	if (srv_read_only_mode) {
2155 		return(SRV_NONE);
2156 	}
2157 
2158 	srv_sys_mutex_enter();
2159 
2160 	for (ulint i = SRV_WORKER; i <= SRV_MASTER; ++i) {
2161 		if (srv_sys->n_threads_active[i] != 0) {
2162 			ret = static_cast<srv_thread_type>(i);
2163 			break;
2164 		}
2165 	}
2166 
2167 	srv_sys_mutex_exit();
2168 
2169 	/* Check only on shutdown. */
2170 	if (ret == SRV_NONE
2171 	    && srv_shutdown_state != SRV_SHUTDOWN_NONE
2172 	    && trx_purge_state() != PURGE_STATE_DISABLED
2173 	    && trx_purge_state() != PURGE_STATE_EXIT) {
2174 
2175 		ret = SRV_PURGE;
2176 	}
2177 
2178 	return(ret);
2179 }
2180 
2181 /**********************************************************************//**
2182 Check whether any background thread are active. If so print which thread
2183 is active. Send the threads wakeup signal.
2184 @return name of thread that is active or NULL */
2185 const char*
srv_any_background_threads_are_active(void)2186 srv_any_background_threads_are_active(void)
2187 /*=======================================*/
2188 {
2189 	const char*	thread_active = NULL;
2190 
2191 	if (srv_read_only_mode) {
2192 		if (srv_buf_resize_thread_active) {
2193 			thread_active = "buf_resize_thread";
2194 		}
2195 		os_event_set(srv_buf_resize_event);
2196 		return(thread_active);
2197 	} else if (srv_error_monitor_active) {
2198 		thread_active = "srv_error_monitor_thread";
2199 	} else if (lock_sys->timeout_thread_active) {
2200 		thread_active = "srv_lock_timeout thread";
2201 	} else if (srv_monitor_active) {
2202 		thread_active = "srv_monitor_thread";
2203 	} else if (srv_buf_dump_thread_active) {
2204 		thread_active = "buf_dump_thread";
2205 	} else if (srv_buf_resize_thread_active) {
2206 		thread_active = "buf_resize_thread";
2207 	} else if (srv_dict_stats_thread_active) {
2208 		thread_active = "dict_stats_thread";
2209 	} else if (srv_n_fil_crypt_threads_started) {
2210 		thread_active = "fil_crypt_thread";
2211 	}
2212 
2213 	os_event_set(srv_error_event);
2214 	os_event_set(srv_monitor_event);
2215 	os_event_set(srv_buf_dump_event);
2216 	os_event_set(lock_sys->timeout_event);
2217 	os_event_set(dict_stats_event);
2218 	os_event_set(srv_buf_resize_event);
2219 	os_event_set(fil_crypt_threads_event);
2220 
2221 	return(thread_active);
2222 }
2223 
2224 /******************************************************************//**
2225 A thread which follows the redo log and outputs the changed page bitmap.
2226 @return a dummy value */
2227 extern "C"
2228 os_thread_ret_t
DECLARE_THREAD(srv_redo_log_follow_thread)2229 DECLARE_THREAD(srv_redo_log_follow_thread)(
2230 /*=======================================*/
2231 	void*	arg MY_ATTRIBUTE((unused)))	/*!< in: a dummy parameter
2232 						     required by
2233 						     os_thread_create */
2234 {
2235 	ut_ad(!srv_read_only_mode);
2236 
2237 #ifdef UNIV_DEBUG_THREAD_CREATION
2238 	ib::info() << "Redo log follower thread starts, id "
2239 		   << os_thread_pf(os_thread_get_curr_id());
2240 #endif
2241 
2242 #ifdef UNIV_PFS_THREAD
2243 	pfs_register_thread(srv_log_tracking_thread_key);
2244 #endif
2245 
2246 	my_thread_init();
2247 	srv_redo_log_thread_started = true;
2248 
2249 	do {
2250 		os_event_wait(srv_checkpoint_completed_event);
2251 		os_event_reset(srv_checkpoint_completed_event);
2252 
2253 		if (srv_track_changed_pages
2254 		    && srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
2255 			if (!log_online_follow_redo_log()) {
2256 				/* TODO: sync with I_S log tracking status? */
2257 				ib::error() << "Log tracking bitmap write "
2258 					"failed, stopping log tracking thread!";
2259 				break;
2260 			}
2261 			os_event_set(srv_redo_log_tracked_event);
2262 		}
2263 
2264 	} while (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE);
2265 
2266 	log_online_read_shutdown();
2267 	os_event_set(srv_redo_log_tracked_event);
2268 
2269 	my_thread_end();
2270 	os_thread_exit();
2271 
2272 	OS_THREAD_DUMMY_RETURN;
2273 }
2274 
2275 /*******************************************************************//**
2276 Tells the InnoDB server that there has been activity in the database
2277 and wakes up the master thread if it is suspended (not sleeping). Used
2278 in the MySQL interface. Note that there is a small chance that the master
2279 thread stays suspended (we do not protect our operation with the
2280 srv_sys_t->mutex, for performance reasons). */
2281 void
srv_active_wake_master_thread_low()2282 srv_active_wake_master_thread_low()
2283 /*===============================*/
2284 {
2285 	ut_ad(!srv_read_only_mode);
2286 	ut_ad(!srv_sys_mutex_own());
2287 
2288 	srv_inc_activity_count();
2289 
2290 	if (srv_sys->n_threads_active[SRV_MASTER] == 0) {
2291 		srv_slot_t*	slot;
2292 
2293 		srv_sys_mutex_enter();
2294 
2295 		slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
2296 
2297 		/* Only if the master thread has been started. */
2298 
2299 		if (slot->in_use) {
2300 			ut_a(srv_slot_get_type(slot) == SRV_MASTER);
2301 
2302 			if (slot->suspended) {
2303 
2304 				slot->suspended = FALSE;
2305 
2306 				++srv_sys->n_threads_active[SRV_MASTER];
2307 
2308 				os_event_set(slot->event);
2309 			}
2310 		}
2311 
2312 		srv_sys_mutex_exit();
2313 	}
2314 }
2315 
2316 /*******************************************************************//**
2317 Tells the purge thread that there has been activity in the database
2318 and wakes up the purge thread if it is suspended (not sleeping).  Note
2319 that there is a small chance that the purge thread stays suspended
2320 (we do not protect our check with the srv_sys_t:mutex and the
2321 purge_sys->latch, for performance reasons). */
2322 void
srv_wake_purge_thread_if_not_active(void)2323 srv_wake_purge_thread_if_not_active(void)
2324 /*=====================================*/
2325 {
2326 	ut_ad(!srv_sys_mutex_own());
2327 
2328 	if (purge_sys->state == PURGE_STATE_RUN
2329 	    && srv_sys->n_threads_active[SRV_PURGE] == 0) {
2330 
2331 		srv_release_threads(SRV_PURGE, 1);
2332 	}
2333 }
2334 
2335 /*******************************************************************//**
2336 Wakes up the master thread if it is suspended or being suspended. */
2337 void
srv_wake_master_thread(void)2338 srv_wake_master_thread(void)
2339 /*========================*/
2340 {
2341 	ut_ad(!srv_sys_mutex_own());
2342 
2343 	srv_inc_activity_count();
2344 
2345 	srv_release_threads(SRV_MASTER, 1);
2346 }
2347 
2348 /*******************************************************************//**
2349 Get current server activity count. We don't hold srv_sys::mutex while
2350 reading this value as it is only used in heuristics.
2351 @return activity count. */
2352 ulint
srv_get_activity_count(void)2353 srv_get_activity_count(void)
2354 /*========================*/
2355 {
2356 	return(srv_sys->activity_count);
2357 }
2358 
2359 /** Get current server ibuf merge activity count.
2360 @return ibuf merge activity count */
2361 static
2362 ulint
srv_get_ibuf_merge_activity_count(void)2363 srv_get_ibuf_merge_activity_count(void)
2364 {
2365 	return(srv_sys->ibuf_merge_activity_count);
2366 }
2367 
2368 /*******************************************************************//**
2369 Check if there has been any activity. Considers background change buffer
2370 merge as regular server activity unless a non-default
2371 old_ibuf_merge_activity_count value is passed, in which case the merge will be
2372 treated as keeping server idle.
2373 @return FALSE if no change in activity counter. */
2374 ibool
srv_check_activity(ulint old_activity_count,ulint old_ibuf_merge_activity_count)2375 srv_check_activity(
2376 /*===============*/
2377 	ulint		old_activity_count,	/*!< in: old activity count */
2378 						/*!< old change buffer merge
2379 						activity count, or
2380 						ULINT_UNDEFINED */
2381 	ulint		old_ibuf_merge_activity_count)
2382 {
2383 	ulint	new_activity_count = srv_sys->activity_count;
2384 	if (old_ibuf_merge_activity_count == ULINT_UNDEFINED)
2385 		return(new_activity_count != old_activity_count);
2386 
2387 	/* If we care about ibuf merge activity, then the server is considered
2388 	idle if all activity, if any, was due to ibuf merge. */
2389 	ulint	new_ibuf_merge_activity_count
2390 		= srv_sys->ibuf_merge_activity_count;
2391 
2392 	ut_ad(new_ibuf_merge_activity_count <= new_activity_count);
2393 	ut_ad(new_ibuf_merge_activity_count >= old_ibuf_merge_activity_count);
2394 	ut_ad(new_activity_count >= old_activity_count);
2395 
2396 	ulint	ibuf_merge_activity_delta =
2397 		new_ibuf_merge_activity_count - old_ibuf_merge_activity_count;
2398 	ulint	activity_delta = new_activity_count - old_activity_count;
2399 
2400 	return (activity_delta > ibuf_merge_activity_delta);
2401 }
2402 
2403 /********************************************************************//**
2404 The master thread is tasked to ensure that flush of log file happens
2405 once every second in the background. This is to ensure that not more
2406 than one second of trxs are lost in case of crash when
2407 innodb_flush_logs_at_trx_commit != 1 */
2408 static
2409 void
srv_sync_log_buffer_in_background(void)2410 srv_sync_log_buffer_in_background(void)
2411 /*===================================*/
2412 {
2413 	ib_time_monotonic_t	current_time = ut_time_monotonic();
2414 
2415 	srv_main_thread_op_info = "flushing log";
2416 	if ((current_time - srv_last_log_flush_time)
2417 			>= srv_flush_log_at_timeout) {
2418 		log_buffer_sync_in_background(true);
2419 		srv_last_log_flush_time = current_time;
2420 		srv_log_writes_and_flush++;
2421 	}
2422 }
2423 
2424 /********************************************************************//**
2425 Make room in the table cache by evicting an unused table.
2426 @return number of tables evicted. */
2427 static
2428 ulint
srv_master_evict_from_table_cache(ulint pct_check)2429 srv_master_evict_from_table_cache(
2430 /*==============================*/
2431 	ulint	pct_check)	/*!< in: max percent to check */
2432 {
2433 	ulint	n_tables_evicted = 0;
2434 
2435 	rw_lock_x_lock(dict_operation_lock);
2436 
2437 	dict_mutex_enter_for_mysql();
2438 
2439 	n_tables_evicted = dict_make_room_in_cache(
2440 		innobase_get_table_cache_size(), pct_check);
2441 
2442 	dict_mutex_exit_for_mysql();
2443 
2444 	rw_lock_x_unlock(dict_operation_lock);
2445 
2446 	return(n_tables_evicted);
2447 }
2448 
2449 /*********************************************************************//**
2450 This function prints progress message every 60 seconds during server
2451 shutdown, for any activities that master thread is pending on. */
2452 static
2453 void
srv_shutdown_print_master_pending(ib_time_monotonic_t * last_print_time,ulint n_tables_to_drop,ulint n_bytes_merged)2454 srv_shutdown_print_master_pending(
2455 /*==============================*/
2456 	ib_time_monotonic_t*	last_print_time, /*!< last time the function
2457 						 print the message */
2458 	ulint		n_tables_to_drop,	 /*!< number of tables to
2459 						 be dropped */
2460 	ulint		n_bytes_merged)		 /*!< number of change buffer
2461 						 just merged */
2462 {
2463 	ib_time_monotonic_t	current_time;
2464 	ib_time_monotonic_t	time_elapsed;
2465 
2466 	current_time = ut_time_monotonic();
2467 	time_elapsed = current_time - *last_print_time;
2468 
2469 	if (time_elapsed > 60) {
2470 		*last_print_time = ut_time_monotonic();
2471 
2472 		if (n_tables_to_drop) {
2473 			ib::info() << "Waiting for " << n_tables_to_drop
2474 				<< " table(s) to be dropped";
2475 		}
2476 
2477 		/* Check change buffer merge, we only wait for change buffer
2478 		merge if it is a slow shutdown */
2479 		if (!srv_fast_shutdown && n_bytes_merged) {
2480 			ib::info() << "Waiting for change buffer merge to"
2481 				" complete number of bytes of change buffer"
2482 				" just merged: " << n_bytes_merged;
2483 		}
2484 	}
2485 }
2486 
2487 #ifdef UNIV_DEBUG
2488 /** Waits in loop as long as master thread is disabled (debug) */
2489 static
2490 void
srv_master_do_disabled_loop(void)2491 srv_master_do_disabled_loop(void)
2492 {
2493 	if (!srv_master_thread_disabled_debug) {
2494 		/* We return here to avoid changing op_info. */
2495 		return;
2496 	}
2497 
2498 	srv_main_thread_op_info = "disabled";
2499 
2500 	while (srv_master_thread_disabled_debug) {
2501 		os_event_set(srv_master_thread_disabled_event);
2502 		if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
2503 			break;
2504 		}
2505 		os_thread_sleep(100000);
2506 	}
2507 
2508 	srv_main_thread_op_info = "";
2509 }
2510 
2511 /** Disables master thread. It's used by:
2512 	SET GLOBAL innodb_master_thread_disabled_debug = 1 (0).
2513 @param[in]	thd		thread handle
2514 @param[in]	var		pointer to system variable
2515 @param[out]	var_ptr		where the formal string goes
2516 @param[in]	save		immediate result from check function */
2517 void
srv_master_thread_disabled_debug_update(THD * thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)2518 srv_master_thread_disabled_debug_update(
2519 	THD*				thd,
2520 	struct st_mysql_sys_var*	var,
2521 	void*				var_ptr,
2522 	const void*			save)
2523 {
2524 	/* This method is protected by mutex, as every SET GLOBAL .. */
2525 	ut_ad(srv_master_thread_disabled_event != NULL);
2526 
2527 	const bool disable = *static_cast<const my_bool*>(save);
2528 
2529 	const int64_t sig_count = os_event_reset(
2530 		srv_master_thread_disabled_event);
2531 
2532 	srv_master_thread_disabled_debug = disable;
2533 
2534 	if (disable) {
2535 		os_event_wait_low(
2536 			srv_master_thread_disabled_event, sig_count);
2537 	}
2538 }
2539 #endif /* UNIV_DEBUG */
2540 
2541 /*********************************************************************//**
2542 Perform the tasks that the master thread is supposed to do when the
2543 server is active. There are two types of tasks. The first category is
2544 of such tasks which are performed at each inovcation of this function.
2545 We assume that this function is called roughly every second when the
2546 server is active. The second category is of such tasks which are
2547 performed at some interval e.g.: purge, dict_LRU cleanup etc. */
2548 static
2549 void
srv_master_do_active_tasks(void)2550 srv_master_do_active_tasks(void)
2551 /*============================*/
2552 {
2553 	ib_time_monotonic_t	cur_time     = ut_time_monotonic();
2554 	ib_time_monotonic_us_t	counter_time = ut_time_monotonic_us();
2555 
2556 	/* First do the tasks that we are suppose to do at each
2557 	invocation of this function. */
2558 
2559 	++srv_main_active_loops;
2560 
2561 	MONITOR_INC(MONITOR_MASTER_ACTIVE_LOOPS);
2562 
2563 	/* ALTER TABLE in MySQL requires on Unix that the table handler
2564 	can drop tables lazily after there no longer are SELECT
2565 	queries to them. */
2566 	srv_main_thread_op_info = "doing background drop tables";
2567 	row_drop_tables_for_mysql_in_background();
2568 	MONITOR_INC_TIME_IN_MICRO_SECS(
2569 		MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time);
2570 
2571 	ut_d(srv_master_do_disabled_loop());
2572 
2573 	if (srv_shutdown_state > 0) {
2574 		return;
2575 	}
2576 
2577 	/* make sure that there is enough reusable space in the redo
2578 	log files */
2579 	srv_main_thread_op_info = "checking free log space";
2580 	log_free_check();
2581 
2582 	/* Do an ibuf merge */
2583 	srv_main_thread_op_info = "doing insert buffer merge";
2584 	counter_time = ut_time_monotonic_us();
2585 	ibuf_merge_in_background(false);
2586 	MONITOR_INC_TIME_IN_MICRO_SECS(
2587 		MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
2588 
2589 	/* Flush logs if needed */
2590 	srv_main_thread_op_info = "flushing log";
2591 	srv_sync_log_buffer_in_background();
2592 	MONITOR_INC_TIME_IN_MICRO_SECS(
2593 		MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
2594 
2595 	/* Now see if various tasks that are performed at defined
2596 	intervals need to be performed. */
2597 
2598 	if (srv_shutdown_state > 0) {
2599 		return;
2600 	}
2601 
2602 	if (srv_shutdown_state > 0) {
2603 		return;
2604 	}
2605 
2606 	if (trx_sys->rseg_history_len > 0) {
2607 		srv_wake_purge_thread_if_not_active();
2608 	}
2609 
2610 	if (cur_time % SRV_MASTER_DICT_LRU_INTERVAL == 0) {
2611 		srv_main_thread_op_info = "enforcing dict cache limit";
2612 		ulint	n_evicted = srv_master_evict_from_table_cache(50);
2613 		if (n_evicted != 0) {
2614 			MONITOR_INC_VALUE(
2615 				MONITOR_SRV_DICT_LRU_EVICT_COUNT, n_evicted);
2616 		}
2617 		MONITOR_INC_TIME_IN_MICRO_SECS(
2618 			MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
2619 	}
2620 
2621 	if (srv_shutdown_state > 0) {
2622 		return;
2623 	}
2624 
2625 	/* Make a new checkpoint */
2626 	if (cur_time % SRV_MASTER_CHECKPOINT_INTERVAL == 0) {
2627 		srv_main_thread_op_info = "making checkpoint";
2628 		log_checkpoint(TRUE, FALSE);
2629 		MONITOR_INC_TIME_IN_MICRO_SECS(
2630 			MONITOR_SRV_CHECKPOINT_MICROSECOND, counter_time);
2631 	}
2632 }
2633 
2634 /*********************************************************************//**
2635 Perform the tasks that the master thread is supposed to do whenever the
2636 server is idle. We do check for the server state during this function
2637 and if the server has entered the shutdown phase we may return from
2638 the function without completing the required tasks.
2639 Note that the server can move to active state when we are executing this
2640 function but we don't check for that as we are suppose to perform more
2641 or less same tasks when server is active. */
2642 static
2643 void
srv_master_do_idle_tasks(void)2644 srv_master_do_idle_tasks(void)
2645 /*==========================*/
2646 {
2647 	ib_time_monotonic_t	counter_time;
2648 
2649 	++srv_main_idle_loops;
2650 
2651 	MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS);
2652 
2653 
2654 	/* ALTER TABLE in MySQL requires on Unix that the table handler
2655 	can drop tables lazily after there no longer are SELECT
2656 	queries to them. */
2657 	counter_time = ut_time_monotonic_us();
2658 	srv_main_thread_op_info = "doing background drop tables";
2659 	row_drop_tables_for_mysql_in_background();
2660 	MONITOR_INC_TIME_IN_MICRO_SECS(
2661 		MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
2662 			 counter_time);
2663 
2664 	ut_d(srv_master_do_disabled_loop());
2665 
2666 	if (srv_shutdown_state > 0) {
2667 		return;
2668 	}
2669 
2670 	/* make sure that there is enough reusable space in the redo
2671 	log files */
2672 	srv_main_thread_op_info = "checking free log space";
2673 	log_free_check();
2674 
2675 	/* Do an ibuf merge */
2676 	counter_time = ut_time_monotonic_us();
2677 	srv_main_thread_op_info = "doing insert buffer merge";
2678 	ibuf_merge_in_background(true);
2679 	MONITOR_INC_TIME_IN_MICRO_SECS(
2680 		MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
2681 
2682 	if (srv_shutdown_state > 0) {
2683 		return;
2684 	}
2685 
2686 	if (trx_sys->rseg_history_len > 0) {
2687 		srv_wake_purge_thread_if_not_active();
2688 	}
2689 
2690 	srv_main_thread_op_info = "enforcing dict cache limit";
2691 	ulint	n_evicted = srv_master_evict_from_table_cache(100);
2692 	if (n_evicted != 0) {
2693 		MONITOR_INC_VALUE(
2694 			MONITOR_SRV_DICT_LRU_EVICT_COUNT, n_evicted);
2695 	}
2696 	MONITOR_INC_TIME_IN_MICRO_SECS(
2697 		MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
2698 
2699 	/* Flush logs if needed */
2700 	srv_sync_log_buffer_in_background();
2701 	MONITOR_INC_TIME_IN_MICRO_SECS(
2702 		MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
2703 
2704 	if (srv_shutdown_state > 0) {
2705 		return;
2706 	}
2707 
2708 	/* Make a new checkpoint */
2709 	srv_main_thread_op_info = "making checkpoint";
2710 	log_checkpoint(TRUE, FALSE);
2711 	MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_CHECKPOINT_MICROSECOND,
2712 				       counter_time);
2713 }
2714 
2715 /*********************************************************************//**
2716 Perform the tasks during shutdown. The tasks that we do at shutdown
2717 depend on srv_fast_shutdown:
2718 2 => very fast shutdown => do no book keeping
2719 1 => normal shutdown => clear drop table queue and make checkpoint
2720 0 => slow shutdown => in addition to above do complete purge and ibuf
2721 merge
2722 @return TRUE if some work was done. FALSE otherwise */
2723 static
2724 ibool
srv_master_do_shutdown_tasks(ib_time_monotonic_t * last_print_time)2725 srv_master_do_shutdown_tasks(
2726 /*=========================*/
2727 	ib_time_monotonic_t*	last_print_time)/*!< last time the function
2728 					print the message */
2729 {
2730 	ulint		n_bytes_merged = 0;
2731 	ulint		n_tables_to_drop = 0;
2732 
2733 	ut_ad(!srv_read_only_mode);
2734 
2735 	++srv_main_shutdown_loops;
2736 
2737 	ut_a(srv_shutdown_state > 0);
2738 
2739 	/* In very fast shutdown none of the following is necessary */
2740 	if (srv_fast_shutdown == 2) {
2741 		return(FALSE);
2742 	}
2743 
2744 	/* ALTER TABLE in MySQL requires on Unix that the table handler
2745 	can drop tables lazily after there no longer are SELECT
2746 	queries to them. */
2747 	srv_main_thread_op_info = "doing background drop tables";
2748 	n_tables_to_drop = row_drop_tables_for_mysql_in_background();
2749 
2750 	/* make sure that there is enough reusable space in the redo
2751 	log files */
2752 	srv_main_thread_op_info = "checking free log space";
2753 	log_free_check();
2754 
2755 	/* In case of normal shutdown we don't do ibuf merge or purge */
2756 	if (srv_fast_shutdown == 1) {
2757 		goto func_exit;
2758 	}
2759 
2760 	/* Do an ibuf merge */
2761 	srv_main_thread_op_info = "doing insert buffer merge";
2762 	n_bytes_merged = ibuf_merge_in_background(true);
2763 
2764 	/* Flush logs if needed */
2765 	srv_sync_log_buffer_in_background();
2766 
2767 func_exit:
2768 	/* Make a new checkpoint about once in 10 seconds */
2769 	srv_main_thread_op_info = "making checkpoint";
2770 	log_checkpoint(TRUE, FALSE);
2771 
2772 	/* Print progress message every 60 seconds during shutdown */
2773 	if (srv_shutdown_state > 0 && srv_print_verbose_log) {
2774 		srv_shutdown_print_master_pending(
2775 			last_print_time, n_tables_to_drop, n_bytes_merged);
2776 	}
2777 
2778 	return(n_bytes_merged || n_tables_to_drop);
2779 }
2780 
2781 /** Set temporary tablespace to be encrypted if global variable
2782 innodb_temp_tablespace_encrypt is TRUE
2783 @param[in]	enable	true to enable encryption, false to disable
2784 @return DB_SUCCESS on success, DB_ERROR on failure */
2785 dberr_t
srv_temp_encryption_update(bool enable)2786 srv_temp_encryption_update(bool enable)
2787 {
2788 	ut_ad(!srv_read_only_mode);
2789 
2790 	fil_space_t*	space = fil_space_get(srv_tmp_space.space_id());
2791 	bool		is_encrypted = FSP_FLAGS_GET_ENCRYPTION(space->flags);
2792 
2793 	ut_ad(fsp_is_system_temporary(space->id));
2794 
2795 	if (enable != is_encrypted) {
2796 		/* Toggle encryption */
2797 		dberr_t err = fil_temp_update_encryption(space, enable);
2798 		if (err == DB_SUCCESS) {
2799 			srv_tmp_space.set_flags(space->flags);
2800 		}
2801 		return (err);
2802 	}
2803 	return (DB_SUCCESS);
2804 }
2805 
2806 /*********************************************************************//**
2807 Puts master thread to sleep. At this point we are using polling to
2808 service various activities. Master thread sleeps for one second before
2809 checking the state of the server again */
2810 static
2811 void
srv_master_sleep(void)2812 srv_master_sleep(void)
2813 /*==================*/
2814 {
2815 	srv_main_thread_op_info = "sleeping";
2816 	os_thread_sleep(1000000);
2817 	srv_main_thread_op_info = "";
2818 }
2819 
2820 /*********************************************************************//**
2821 The master thread controlling the server.
2822 @return a dummy parameter */
2823 extern "C"
2824 os_thread_ret_t
DECLARE_THREAD(srv_master_thread)2825 DECLARE_THREAD(srv_master_thread)(
2826 /*==============================*/
2827 	void*	arg MY_ATTRIBUTE((unused)))
2828 			/*!< in: a dummy parameter required by
2829 			os_thread_create */
2830 {
2831 	my_thread_init();
2832 	DBUG_ENTER("srv_master_thread");
2833 
2834 	srv_slot_t*	slot;
2835 	ulint		old_activity_count = srv_get_activity_count();
2836 	ulint		old_ibuf_merge_activity_count
2837 		= srv_get_ibuf_merge_activity_count();
2838 	ib_time_monotonic_t	last_print_time;
2839 
2840 	ut_ad(!srv_read_only_mode);
2841 
2842 	srv_master_tid = os_thread_get_tid();
2843 
2844 	os_thread_set_priority(srv_master_tid, srv_sched_priority_master);
2845 
2846 #ifdef UNIV_DEBUG_THREAD_CREATION
2847 	ib::info() << "Master thread starts, id "
2848 		<< os_thread_pf(os_thread_get_curr_id());
2849 #endif /* UNIV_DEBUG_THREAD_CREATION */
2850 
2851 #ifdef UNIV_PFS_THREAD
2852 	pfs_register_thread(srv_master_thread_key);
2853 #endif /* UNIV_PFS_THREAD */
2854 
2855 	srv_main_thread_process_no = os_proc_get_number();
2856 	srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
2857 
2858 	slot = srv_reserve_slot(SRV_MASTER);
2859 	ut_a(slot == srv_sys->sys_threads);
2860 
2861 	last_print_time = ut_time_monotonic();
2862 loop:
2863 	if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2864 		goto suspend_thread;
2865 	}
2866 
2867 	while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
2868 
2869 		srv_master_sleep();
2870 
2871 		MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP);
2872 
2873 		srv_current_thread_priority = srv_master_thread_priority;
2874 
2875 		if (srv_check_activity(old_activity_count,
2876 				       old_ibuf_merge_activity_count)) {
2877 
2878 			old_activity_count = srv_get_activity_count();
2879 			old_ibuf_merge_activity_count
2880 				= srv_get_ibuf_merge_activity_count();
2881 			srv_master_do_active_tasks();
2882 		} else {
2883 			srv_master_do_idle_tasks();
2884 		}
2885 
2886 		srv_enable_undo_encryption_if_set();
2887 
2888 		log_check_new_key_version();
2889 	}
2890 
2891 	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS
2892 	       && srv_master_do_shutdown_tasks(&last_print_time)) {
2893 
2894 		/* Shouldn't loop here in case of very fast shutdown */
2895 		ut_ad(srv_fast_shutdown < 2);
2896 	}
2897 
2898 suspend_thread:
2899 	srv_main_thread_op_info = "suspending";
2900 
2901 	srv_suspend_thread(slot);
2902 
2903 	/* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
2904 	waits for database activity to die down when converting < 4.1.x
2905 	databases, and relies on this string being exactly as it is. InnoDB
2906 	manual also mentions this string in several places. */
2907 	srv_main_thread_op_info = "waiting for server activity";
2908 
2909 	os_event_wait(slot->event);
2910 
2911 	if (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
2912 		goto loop;
2913 	}
2914 
2915 	my_thread_end();
2916 	os_thread_exit();
2917 	DBUG_RETURN(0);
2918 }
2919 
2920 /**
2921 Check if purge should stop.
2922 @return true if it should shutdown. */
2923 static
2924 bool
srv_purge_should_exit(ulint n_purged)2925 srv_purge_should_exit(
2926 	ulint		n_purged)	/*!< in: pages purged in last batch */
2927 {
2928 	switch (srv_shutdown_state) {
2929 	case SRV_SHUTDOWN_NONE:
2930 		/* Normal operation. */
2931 		break;
2932 
2933 	case SRV_SHUTDOWN_CLEANUP:
2934 	case SRV_SHUTDOWN_EXIT_THREADS:
2935 		/* Exit unless slow shutdown requested or all done. */
2936 		return(srv_fast_shutdown != 0 || n_purged == 0);
2937 
2938 	case SRV_SHUTDOWN_LAST_PHASE:
2939 	case SRV_SHUTDOWN_FLUSH_PHASE:
2940 		ut_error;
2941 	}
2942 
2943 	return(false);
2944 }
2945 
2946 /*********************************************************************//**
2947 Fetch and execute a task from the work queue.
2948 @return true if a task was executed */
2949 static
2950 bool
srv_task_execute(void)2951 srv_task_execute(void)
2952 /*==================*/
2953 {
2954 	que_thr_t*	thr = NULL;
2955 
2956 	ut_ad(!srv_read_only_mode);
2957 	ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
2958 
2959 	mutex_enter(&srv_sys->tasks_mutex);
2960 
2961 	if (UT_LIST_GET_LEN(srv_sys->tasks) > 0) {
2962 
2963 		thr = UT_LIST_GET_FIRST(srv_sys->tasks);
2964 
2965 		ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE);
2966 
2967 		UT_LIST_REMOVE(srv_sys->tasks, thr);
2968 	}
2969 
2970 	mutex_exit(&srv_sys->tasks_mutex);
2971 
2972 	if (thr != NULL) {
2973 
2974 		que_run_threads(thr);
2975 
2976 		os_atomic_inc_ulint(
2977 			&purge_sys->pq_mutex, &purge_sys->n_completed, 1);
2978 	}
2979 
2980 	return(thr != NULL);
2981 }
2982 
2983 static ulint purge_tid_i = 0;
2984 
2985 /*********************************************************************//**
2986 Worker thread that reads tasks from the work queue and executes them.
2987 @return a dummy parameter */
2988 extern "C"
2989 os_thread_ret_t
DECLARE_THREAD(srv_worker_thread)2990 DECLARE_THREAD(srv_worker_thread)(
2991 /*==============================*/
2992 	void*	arg MY_ATTRIBUTE((unused)))	/*!< in: a dummy parameter
2993 						required by os_thread_create */
2994 {
2995 	srv_slot_t*	slot;
2996 	ulint		tid_i = os_atomic_increment_ulint(&purge_tid_i, 1);
2997 
2998 	ut_ad(tid_i < srv_n_purge_threads);
2999 	ut_ad(!srv_read_only_mode);
3000 	ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
3001 	my_thread_init();
3002 	THD *thd= create_thd(false, true, true, srv_worker_thread_key.m_value);
3003 
3004 	srv_purge_tids[tid_i] = os_thread_get_tid();
3005 	os_thread_set_priority(srv_purge_tids[tid_i],
3006 			       srv_sched_priority_purge);
3007 
3008 #ifdef UNIV_DEBUG_THREAD_CREATION
3009 	ib::info() << "Worker thread starting, id "
3010 		<< os_thread_pf(os_thread_get_curr_id());
3011 #endif /* UNIV_DEBUG_THREAD_CREATION */
3012 
3013 	slot = srv_reserve_slot(SRV_WORKER);
3014 
3015 	ut_a(srv_n_purge_threads > 1);
3016 
3017 	srv_sys_mutex_enter();
3018 
3019 	ut_a(srv_sys->n_threads_active[SRV_WORKER] < srv_n_purge_threads);
3020 
3021 	srv_sys_mutex_exit();
3022 
3023 	/* We need to ensure that the worker threads exit after the
3024 	purge coordinator thread. Otherwise the purge coordinaor can
3025 	end up waiting forever in trx_purge_wait_for_workers_to_complete() */
3026 
3027 	do {
3028 		srv_suspend_thread(slot);
3029 
3030 		os_event_wait(slot->event);
3031 
3032 		srv_current_thread_priority = srv_purge_thread_priority;
3033 
3034 		if (srv_task_execute()) {
3035 
3036 			/* If there are tasks in the queue, wakeup
3037 			the purge coordinator thread. */
3038 
3039 			srv_wake_purge_thread_if_not_active();
3040 		}
3041 
3042 		/* Note: we are checking the state without holding the
3043 		purge_sys->latch here. */
3044 	} while (purge_sys->state != PURGE_STATE_EXIT);
3045 
3046 	srv_free_slot(slot);
3047 
3048 	rw_lock_x_lock(&purge_sys->latch);
3049 
3050 	ut_a(!purge_sys->running);
3051 	ut_a(purge_sys->state == PURGE_STATE_EXIT);
3052 	ut_a(srv_shutdown_state > SRV_SHUTDOWN_NONE);
3053 
3054 	rw_lock_x_unlock(&purge_sys->latch);
3055 
3056 #ifdef UNIV_DEBUG_THREAD_CREATION
3057 	ib::info() << "Purge worker thread exiting, id "
3058 		<< os_thread_pf(os_thread_get_curr_id());
3059 #endif /* UNIV_DEBUG_THREAD_CREATION */
3060 
3061 	thd_free_innodb_session(thd);
3062 	destroy_thd(thd);
3063         my_thread_end();
3064 	/* We count the number of threads in os_thread_exit(). A created
3065 	thread should always use that to exit and not use return() to exit. */
3066 	os_thread_exit();
3067 
3068 	OS_THREAD_DUMMY_RETURN;	/* Not reached, avoid compiler warning */
3069 }
3070 
3071 /*********************************************************************//**
3072 Do the actual purge operation.
3073 @return length of history list before the last purge batch. */
3074 static
3075 ulint
srv_do_purge(ulint n_threads,ulint * n_total_purged)3076 srv_do_purge(
3077 /*=========*/
3078 	ulint		n_threads,	/*!< in: number of threads to use */
3079 	ulint*		n_total_purged)	/*!< in/out: total pages purged */
3080 {
3081 	ulint		n_pages_purged;
3082 
3083 	static ulint	count = 0;
3084 	static ulint	n_use_threads = 0;
3085 	static ulint	rseg_history_len = 0;
3086 	ulint		old_activity_count = srv_get_activity_count();
3087 
3088 	ut_a(n_threads > 0);
3089 	ut_ad(!srv_read_only_mode);
3090 
3091 	/* Purge until there are no more records to purge and there is
3092 	no change in configuration or server state. If the user has
3093 	configured more than one purge thread then we treat that as a
3094 	pool of threads and only use the extra threads if purge can't
3095 	keep up with updates. */
3096 
3097 	if (n_use_threads == 0) {
3098 		n_use_threads = n_threads;
3099 	}
3100 
3101 	do {
3102 		srv_current_thread_priority = srv_purge_thread_priority;
3103 
3104 		if (trx_sys->rseg_history_len > rseg_history_len
3105 		    || (srv_max_purge_lag > 0
3106 			&& rseg_history_len > srv_max_purge_lag)) {
3107 
3108 			/* History length is now longer than what it was
3109 			when we took the last snapshot. Use more threads. */
3110 
3111 			if (n_use_threads < n_threads) {
3112 				++n_use_threads;
3113 			}
3114 
3115 		} else if (srv_check_activity(old_activity_count)
3116 			   && n_use_threads > 1) {
3117 
3118 			/* History length same or smaller since last snapshot,
3119 			use fewer threads. */
3120 
3121 			--n_use_threads;
3122 
3123 			old_activity_count = srv_get_activity_count();
3124 		}
3125 
3126 		/* Ensure that the purge threads are less than what
3127 		was configured. */
3128 
3129 		ut_a(n_use_threads > 0);
3130 		ut_a(n_use_threads <= n_threads);
3131 
3132 		/* Take a snapshot of the history list before purge. */
3133 		if ((rseg_history_len = trx_sys->rseg_history_len) == 0) {
3134 			break;
3135 		}
3136 
3137 		ulint	undo_trunc_freq =
3138 			purge_sys->undo_trunc.get_rseg_truncate_frequency();
3139 
3140 		ulint	rseg_truncate_frequency = ut_min(
3141 			static_cast<ulint>(srv_purge_rseg_truncate_frequency),
3142 			undo_trunc_freq);
3143 
3144 		n_pages_purged = trx_purge(
3145 			n_use_threads, srv_purge_batch_size,
3146 			(++count % rseg_truncate_frequency) == 0);
3147 
3148 		*n_total_purged += n_pages_purged;
3149 
3150 	} while (!srv_purge_should_exit(n_pages_purged)
3151 		 && n_pages_purged > 0
3152 		 && purge_sys->state == PURGE_STATE_RUN);
3153 
3154 	return(rseg_history_len);
3155 }
3156 
3157 /*********************************************************************//**
3158 Suspend the purge coordinator thread. */
3159 static
3160 void
srv_purge_coordinator_suspend(srv_slot_t * slot,ulint rseg_history_len)3161 srv_purge_coordinator_suspend(
3162 /*==========================*/
3163 	srv_slot_t*	slot,			/*!< in/out: Purge coordinator
3164 						thread slot */
3165 	ulint		rseg_history_len)	/*!< in: history list length
3166 						before last purge */
3167 {
3168 	ut_ad(!srv_read_only_mode);
3169 	ut_a(slot->type == SRV_PURGE);
3170 
3171 	bool		stop = false;
3172 
3173 	/** Maximum wait time on the purge event, in micro-seconds. */
3174 	static const ulint SRV_PURGE_MAX_TIMEOUT = 10000;
3175 
3176 	int64_t		sig_count = srv_suspend_thread(slot);
3177 
3178 	do {
3179 		ulint		ret;
3180 
3181 		rw_lock_x_lock(&purge_sys->latch);
3182 
3183 		purge_sys->running = false;
3184 
3185 		rw_lock_x_unlock(&purge_sys->latch);
3186 
3187 		/* We don't wait right away on the the non-timed wait because
3188 		we want to signal the thread that wants to suspend purge. */
3189 
3190 		if (stop) {
3191 			os_event_wait_low(slot->event, sig_count);
3192 			ret = 0;
3193 		} else if (rseg_history_len <= trx_sys->rseg_history_len) {
3194 			ret = os_event_wait_time_low(
3195 				slot->event, SRV_PURGE_MAX_TIMEOUT, sig_count);
3196 		} else {
3197 			/* We don't want to waste time waiting, if the
3198 			history list increased by the time we got here,
3199 			unless purge has been stopped. */
3200 			ret = 0;
3201 		}
3202 
3203 		srv_sys_mutex_enter();
3204 
3205 		/* The thread can be in state !suspended after the timeout
3206 		but before this check if another thread sent a wakeup signal. */
3207 
3208 		if (slot->suspended) {
3209 			slot->suspended = FALSE;
3210 			++srv_sys->n_threads_active[slot->type];
3211 			ut_a(srv_sys->n_threads_active[slot->type] == 1);
3212 		}
3213 
3214 		srv_sys_mutex_exit();
3215 
3216 		sig_count = srv_suspend_thread(slot);
3217 
3218 		rw_lock_x_lock(&purge_sys->latch);
3219 
3220 		stop = (srv_shutdown_state == SRV_SHUTDOWN_NONE
3221 			&& purge_sys->state == PURGE_STATE_STOP);
3222 
3223 		if (!stop) {
3224 			ut_a(purge_sys->n_stop == 0);
3225 			purge_sys->running = true;
3226 		} else {
3227 			ut_a(purge_sys->n_stop > 0);
3228 
3229 			/* Signal that we are suspended. */
3230 			os_event_set(purge_sys->event);
3231 		}
3232 
3233 		rw_lock_x_unlock(&purge_sys->latch);
3234 
3235 		if (ret == OS_SYNC_TIME_EXCEEDED) {
3236 
3237 			/* No new records added since wait started then simply
3238 			wait for new records. The magic number 5000 is an
3239 			approximation for the case where we have cached UNDO
3240 			log records which prevent truncate of the UNDO
3241 			segments. */
3242 
3243 			if (rseg_history_len == trx_sys->rseg_history_len
3244 			    && trx_sys->rseg_history_len < 5000) {
3245 
3246 				stop = true;
3247 			}
3248 		}
3249 
3250 	} while (stop);
3251 
3252 	srv_sys_mutex_enter();
3253 
3254 	if (slot->suspended) {
3255 		slot->suspended = FALSE;
3256 		++srv_sys->n_threads_active[slot->type];
3257 		ut_a(srv_sys->n_threads_active[slot->type] == 1);
3258 	}
3259 
3260 	srv_sys_mutex_exit();
3261 }
3262 
3263 /*********************************************************************//**
3264 Purge coordinator thread that schedules the purge tasks.
3265 @return a dummy parameter */
3266 extern "C"
3267 os_thread_ret_t
DECLARE_THREAD(srv_purge_coordinator_thread)3268 DECLARE_THREAD(srv_purge_coordinator_thread)(
3269 /*=========================================*/
3270 	void*	arg MY_ATTRIBUTE((unused)))	/*!< in: a dummy parameter
3271 						required by os_thread_create */
3272 {
3273 	my_thread_init();
3274 	THD *thd= create_thd(false, true, true, srv_purge_thread_key.m_value);
3275 	srv_slot_t*	slot;
3276 	ulint           n_total_purged = ULINT_UNDEFINED;
3277 
3278 	ut_ad(!srv_read_only_mode);
3279 	ut_a(srv_n_purge_threads >= 1);
3280 	ut_a(trx_purge_state() == PURGE_STATE_INIT);
3281 	ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
3282 
3283 	srv_purge_tids[0] = os_thread_get_tid();
3284 	os_thread_set_priority(srv_purge_tids[0], srv_sched_priority_purge);
3285 
3286 	rw_lock_x_lock(&purge_sys->latch);
3287 
3288 	purge_sys->running = true;
3289 	purge_sys->state = PURGE_STATE_RUN;
3290 
3291 	rw_lock_x_unlock(&purge_sys->latch);
3292 
3293 #ifdef UNIV_DEBUG_THREAD_CREATION
3294 	ib::info() << "Purge coordinator thread created, id "
3295 		<< os_thread_pf(os_thread_get_curr_id());
3296 #endif /* UNIV_DEBUG_THREAD_CREATION */
3297 
3298 	slot = srv_reserve_slot(SRV_PURGE);
3299 
3300 	ulint	rseg_history_len = trx_sys->rseg_history_len;
3301 
3302 	do {
3303 		/* If there are no records to purge or the last
3304 		purge didn't purge any records then wait for activity. */
3305 
3306 		if (srv_shutdown_state == SRV_SHUTDOWN_NONE
3307 		    && (purge_sys->state == PURGE_STATE_STOP
3308 			|| n_total_purged == 0)) {
3309 
3310 			srv_purge_coordinator_suspend(slot, rseg_history_len);
3311 		}
3312 
3313 		if (srv_purge_should_exit(n_total_purged)) {
3314 			ut_a(!slot->suspended);
3315 			break;
3316 		}
3317 
3318 		n_total_purged = 0;
3319 
3320 		srv_current_thread_priority = srv_purge_thread_priority;
3321 
3322 		rseg_history_len = srv_do_purge(
3323 			srv_n_purge_threads, &n_total_purged);
3324 
3325 		if (n_total_purged != 0) {
3326 			srv_inc_activity_count();
3327 		}
3328 
3329 	} while (!srv_purge_should_exit(n_total_purged));
3330 
3331 	/* Ensure that we don't jump out of the loop unless the
3332 	exit condition is satisfied. */
3333 
3334 	ut_a(srv_purge_should_exit(n_total_purged));
3335 
3336 	ulint	n_pages_purged = ULINT_MAX;
3337 
3338 	/* Ensure that all records are purged if it is not a fast shutdown.
3339 	This covers the case where a record can be added after we exit the
3340 	loop above. */
3341 	while (srv_fast_shutdown == 0 && n_pages_purged > 0) {
3342 		n_pages_purged = trx_purge(1, srv_purge_batch_size, false);
3343 	}
3344 
3345 #ifdef UNIV_DEBUG
3346 	if (srv_fast_shutdown == 0) {
3347 		trx_commit_disallowed = true;
3348 	}
3349 #endif /* UNIV_DEBUG */
3350 
3351 	/* This trx_purge is called to remove any undo records (added by
3352 	background threads) after completion of the above loop. When
3353 	srv_fast_shutdown != 0, a large batch size can cause significant
3354 	delay in shutdown ,so reducing the batch size to magic number 20
3355 	(which was default in 5.5), which we hope will be sufficient to
3356 	remove all the undo records */
3357 	const	uint temp_batch_size = 20;
3358 
3359 	n_pages_purged = trx_purge(1, srv_purge_batch_size <= temp_batch_size
3360 				      ? srv_purge_batch_size : temp_batch_size,
3361 				   true);
3362 	ut_a(n_pages_purged == 0 || srv_fast_shutdown != 0);
3363 
3364 	/* The task queue should always be empty, independent of fast
3365 	shutdown state. */
3366 	ut_a(srv_get_task_queue_length() == 0);
3367 
3368 	srv_free_slot(slot);
3369 
3370 	/* Note that we are shutting down. */
3371 	rw_lock_x_lock(&purge_sys->latch);
3372 
3373 	purge_sys->state = PURGE_STATE_EXIT;
3374 
3375 	/* If there are any pending undo-tablespace truncate then clear
3376 	it off as we plan to shutdown the purge thread. */
3377 	purge_sys->undo_trunc.clear();
3378 
3379 	purge_sys->running = false;
3380 
3381 	rw_lock_x_unlock(&purge_sys->latch);
3382 
3383 #ifdef UNIV_DEBUG_THREAD_CREATION
3384 	ib::info() << "Purge coordinator exiting, id "
3385 		<< os_thread_pf(os_thread_get_curr_id());
3386 #endif /* UNIV_DEBUG_THREAD_CREATION */
3387 
3388 	/* Ensure that all the worker threads quit. */
3389 	if (srv_n_purge_threads > 1) {
3390 		srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1);
3391 	}
3392 
3393 	thd_free_innodb_session(thd);
3394 	destroy_thd(thd);
3395 	my_thread_end();
3396 	/* We count the number of threads in os_thread_exit(). A created
3397 	thread should always use that to exit and not use return() to exit. */
3398 	os_thread_exit();
3399 
3400 	OS_THREAD_DUMMY_RETURN;	/* Not reached, avoid compiler warning */
3401 }
3402 
3403 /**********************************************************************//**
3404 Enqueues a task to server task queue and releases a worker thread, if there
3405 is a suspended one. */
3406 void
srv_que_task_enqueue_low(que_thr_t * thr)3407 srv_que_task_enqueue_low(
3408 /*=====================*/
3409 	que_thr_t*	thr)	/*!< in: query thread */
3410 {
3411 	ut_ad(!srv_read_only_mode);
3412 	mutex_enter(&srv_sys->tasks_mutex);
3413 
3414 	UT_LIST_ADD_LAST(srv_sys->tasks, thr);
3415 
3416 	mutex_exit(&srv_sys->tasks_mutex);
3417 
3418 	srv_release_threads(SRV_WORKER, 1);
3419 }
3420 
3421 /**********************************************************************//**
3422 Get count of tasks in the queue.
3423 @return number of tasks in queue */
3424 ulint
srv_get_task_queue_length(void)3425 srv_get_task_queue_length(void)
3426 /*===========================*/
3427 {
3428 	ulint	n_tasks;
3429 
3430 	ut_ad(!srv_read_only_mode);
3431 
3432 	mutex_enter(&srv_sys->tasks_mutex);
3433 
3434 	n_tasks = UT_LIST_GET_LEN(srv_sys->tasks);
3435 
3436 	mutex_exit(&srv_sys->tasks_mutex);
3437 
3438 	return(n_tasks);
3439 }
3440 
3441 /**********************************************************************//**
3442 Wakeup the purge threads. */
3443 void
srv_purge_wakeup(void)3444 srv_purge_wakeup(void)
3445 /*==================*/
3446 {
3447 	ut_ad(!srv_read_only_mode);
3448 
3449 	if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
3450 
3451 		srv_release_threads(SRV_PURGE, 1);
3452 
3453 		if (srv_n_purge_threads > 1) {
3454 			ulint	n_workers = srv_n_purge_threads - 1;
3455 
3456 			srv_release_threads(SRV_WORKER, n_workers);
3457 		}
3458 	}
3459 }
3460 
3461 /** Check if tablespace is being truncated.
3462 (Ignore system-tablespace as we don't re-create the tablespace
3463 and so some of the action that are suppressed by this function
3464 for independent tablespace are not applicable to system-tablespace).
3465 @param	space_id	space_id to check for truncate action
3466 @return true		if being truncated, false if not being
3467 			truncated or tablespace is system-tablespace. */
3468 bool
srv_is_tablespace_truncated(ulint space_id)3469 srv_is_tablespace_truncated(ulint space_id)
3470 {
3471 	if (is_system_tablespace(space_id)) {
3472 		return(false);
3473 	}
3474 
3475 	return(truncate_t::is_tablespace_truncated(space_id)
3476 	       || undo::Truncate::is_tablespace_truncated(space_id));
3477 
3478 }
3479 
3480 /** Check if tablespace was truncated.
3481 @param[in]	space	space object to check for truncate action
3482 @return true if tablespace was truncated and we still have an active
3483 MLOG_TRUNCATE REDO log record. */
3484 bool
srv_was_tablespace_truncated(const fil_space_t * space)3485 srv_was_tablespace_truncated(const fil_space_t* space)
3486 {
3487 	if (space == NULL) {
3488 		ut_ad(0);
3489 		return(false);
3490 	}
3491 
3492 	bool	has_shared_space = FSP_FLAGS_GET_SHARED(space->flags);
3493 
3494 	if (is_system_tablespace(space->id) || has_shared_space) {
3495 		return(false);
3496 	}
3497 
3498 	return(truncate_t::was_tablespace_truncated(space->id));
3499 }
3500 
3501 /** Call exit(3) */
3502 void
srv_fatal_error()3503 srv_fatal_error()
3504 {
3505 
3506 	ib::error() << "Cannot continue operation.";
3507 
3508 	fflush(stderr);
3509 
3510 	ut_d(innodb_calling_exit = true);
3511 
3512 	srv_shutdown_all_bg_threads();
3513 
3514 	exit(3);
3515 }
3516 
3517 /** Check whether given space id is undo tablespace id
3518 @param[in]	space_id	space id to check
3519 @return true if it is undo tablespace else false. */
3520 bool
srv_is_undo_tablespace(ulint space_id)3521 srv_is_undo_tablespace(
3522 	ulint	space_id)
3523 {
3524 	if (srv_undo_space_id_start == 0) {
3525 		return(false);
3526 	}
3527 
3528 	return(space_id >= srv_undo_space_id_start
3529 	       && space_id < (srv_undo_space_id_start
3530 			      + srv_undo_tablespaces_open));
3531 }
3532 
3533 bool
srv_enable_redo_encryption(THD * thd)3534 srv_enable_redo_encryption(THD* thd)
3535 {
3536 	if (srv_redo_log_encrypt == REDO_LOG_ENCRYPT_MK) {
3537 		return srv_enable_redo_encryption_mk(thd);
3538 	}
3539 
3540 	if (srv_redo_log_encrypt == REDO_LOG_ENCRYPT_RK) {
3541 		return srv_enable_redo_encryption_rk(thd);
3542 	}
3543 
3544 	return false;
3545 }
3546 
3547 bool
srv_enable_redo_encryption_mk(THD * thd)3548 srv_enable_redo_encryption_mk(THD* thd)
3549 {
3550 	switch (existing_redo_encryption_mode) {
3551 	case REDO_LOG_ENCRYPT_RK:
3552                 ib::warn() <<
3553                         "Redo log encryption mode"
3554                         " can't be switched without stopping the server and"
3555                         " recreating the redo logs. Current mode is "
3556                         << log_encrypt_name(existing_redo_encryption_mode)
3557                         << ", requested master_key.";
3558 		if (thd != NULL) {
3559 			ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_REDO_ENCRYPTION_CANT_BE_CHANGED,
3560 				    "master_key",
3561 				    log_encrypt_name(existing_redo_encryption_mode));
3562 		}
3563 
3564 		return true;
3565 	case REDO_LOG_ENCRYPT_OFF:
3566 	case REDO_LOG_ENCRYPT_MK:
3567 		break;
3568 	}
3569 
3570 	fil_space_t* space = fil_space_get(dict_sys_t::s_log_space_first_id);
3571 	if (FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
3572 		return false;
3573 	}
3574 	byte key[ENCRYPTION_KEY_LEN];
3575 	byte iv[ENCRYPTION_KEY_LEN];
3576 
3577 	Encryption::random_value(iv);
3578 	Encryption::random_value(key);
3579 
3580 	if (!log_write_encryption(key, iv, REDO_LOG_ENCRYPT_MK)) {
3581 
3582 		ib::error() << "Can't set redo log tablespace to be encrypted.";
3583 		if (thd != NULL) {
3584 			ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_REDO_ENCRYPTION_ERROR,
3585 				    "Can't set redo log tablespace to be"
3586 				    " encrypted.");
3587 		}
3588 		return true;
3589 	}
3590 
3591 	space->flags |= FSP_FLAGS_MASK_ENCRYPTION;
3592 
3593 	const dberr_t err = fil_set_encryption(space->id, Encryption::AES, key, iv);
3594 	if (err != DB_SUCCESS) {
3595 		ib::error() << "Can't set redo log tablespace to be encrypted.";
3596 		if (thd != NULL) {
3597 			ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_REDO_ENCRYPTION_ERROR,
3598 				    "Can't set redo log tablespace to be"
3599 				    " encrypted.");
3600 		}
3601 		return true;
3602 	}
3603 
3604 	ib::info() << "Redo log encryption is enabled.";
3605 
3606 	return false;
3607 }
3608 
3609 
3610 bool
srv_enable_redo_encryption_rk(THD * thd)3611 srv_enable_redo_encryption_rk(THD* thd)
3612 {
3613 	switch (existing_redo_encryption_mode) {
3614 	case REDO_LOG_ENCRYPT_MK:
3615                 ib::error() <<
3616                         "Redo log encryption mode"
3617                         " can't be switched without stopping the server and"
3618                         " recreating the redo logs. Current mode is "
3619                         << log_encrypt_name(existing_redo_encryption_mode)
3620                         << ", requested keyring_key.";
3621 		if (thd != NULL) {
3622 			ib_senderrf(thd, IB_LOG_LEVEL_WARN,
3623 				    ER_REDO_ENCRYPTION_CANT_BE_CHANGED,
3624 				    "keyring_key",
3625 				    log_encrypt_name(existing_redo_encryption_mode));
3626 		}
3627 		return true;
3628 	case REDO_LOG_ENCRYPT_OFF:
3629 	case REDO_LOG_ENCRYPT_RK:
3630 		break;
3631 	}
3632 
3633 	fil_space_t* space = fil_space_get(dict_sys_t::s_log_space_first_id);
3634 	if (FSP_FLAGS_GET_ENCRYPTION(space->flags))
3635 	{
3636 		return(false);
3637 	}
3638 
3639 	byte key[ENCRYPTION_KEY_LEN];
3640         byte iv[ENCRYPTION_KEY_LEN];
3641 	uint version;
3642 
3643 	Encryption::random_value(iv);
3644 
3645 	// load latest key & write version
3646 
3647         redo_log_key* mkey = redo_log_key_mgr.load_latest_key(thd, true);
3648 	if (mkey == NULL) {
3649 		return(true);
3650 	}
3651 	version = mkey->version;
3652 	srv_redo_log_key_version = version;
3653 	memcpy(key, mkey->key, ENCRYPTION_KEY_LEN);
3654 
3655 #ifdef UNIV_ENCRYPT_DEBUG
3656 	fprintf(stderr, "Fetched redo key: %s.\n", key);
3657 #endif
3658 
3659 	if (!log_write_encryption(key, iv, REDO_LOG_ENCRYPT_RK)) {
3660 		ib::error() << "Can't set redo log tablespace to be"
3661 			" encrypted.";
3662 		if (thd != NULL) {
3663 			ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_REDO_ENCRYPTION_ERROR,
3664 				    "Can't set redo log tablespace to be"
3665 				    " encrypted.");
3666 		}
3667 		return(true);
3668 	}
3669 
3670 	space->encryption_redo_key = mkey;
3671 	space->flags |= FSP_FLAGS_MASK_ENCRYPTION;
3672 	space->encryption_key_version = version;
3673 	dberr_t err = fil_set_encryption(
3674 			space->id, Encryption::KEYRING,
3675 			key, iv);
3676 
3677 	if(err != DB_SUCCESS) {
3678 		ib::error() << "Can't set redo log tablespace to be encrypted.";
3679 		if (thd != NULL) {
3680 			ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_REDO_ENCRYPTION_ERROR,
3681 				    "Can't set redo log tablespace to be"
3682 				    " encrypted.");
3683 		}
3684 		return(true);
3685 	}
3686 
3687 	ib::info() << "Redo log encryption is enabled.";
3688 
3689 	return(false);
3690 }
3691 
3692 
3693 /** Enable the undo log encryption if it is set.
3694 It will try to enable the undo log encryption and write the metadata to
3695 undo log file header, if innodb_undo_log_encrypt is ON. */
3696 static
3697 void
srv_enable_undo_encryption_if_set()3698 srv_enable_undo_encryption_if_set()
3699 {
3700 	fil_space_t*	space;
3701 	const char*	cant_set_undo_tablespace = "Can't set undo tablespace";
3702 	const char*	to_be_encrypted = " to be encrypted";
3703 	if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
3704 		return;
3705 	}
3706 
3707 	/* Check if encryption for undo log is enabled or not. If it's
3708 	   enabled, we will store the encryption metadata to the space header
3709 	   and start to encrypt the undo log block from now on. */
3710 	if (srv_undo_log_encrypt) {
3711 		if (srv_undo_tablespaces == 0) {
3712 			srv_undo_log_encrypt = false;
3713 			ib::error() << cant_set_undo_tablespace << "s"
3714 				<< to_be_encrypted
3715 				<< ", since innodb_undo_tablespaces=0.";
3716 			return;
3717 		}
3718 		if (srv_read_only_mode) {
3719 			srv_undo_log_encrypt = false;
3720 			ib::error() << cant_set_undo_tablespace << "s"
3721 				<< to_be_encrypted
3722 				<< " in read-only mode.";
3723 			return;
3724 		}
3725 		ulint undo_spaces[TRX_SYS_N_RSEGS + 1];
3726 		const ulint undo_spaces_no = trx_rseg_get_n_undo_tablespaces(undo_spaces);
3727 		for (ulint undo_idx = 0; undo_idx < undo_spaces_no; ++undo_idx)
3728 		{
3729 			/* Skip system tablespace, since it's also shared
3730 			   tablespace. */
3731 			const ulint space_id = undo_spaces[undo_idx];
3732 			if (space_id == TRX_SYS_SPACE) {
3733 				continue;
3734 			}
3735 			space = fil_space_get(space_id);
3736 			ut_ad(fsp_is_undo_tablespace(space_id));
3737 			/* This flag will be written to the header
3738 			   later, by calling the fsp_header_write_encryption()
3739 function: */
3740 			ulint	new_flags =
3741 				space->flags | FSP_FLAGS_MASK_ENCRYPTION;
3742 			/* We need the server_uuid initialized, otherwise,
3743 			   the keyname will not contains server uuid. */
3744 			if (FSP_FLAGS_GET_ENCRYPTION(space->flags)
3745 					|| strlen(server_uuid) == 0) {
3746 				continue;
3747 			}
3748 			dberr_t err;
3749 			mtr_t	mtr;
3750 			byte	encrypt_info[ENCRYPTION_INFO_SIZE_V2];
3751 			byte	key[ENCRYPTION_KEY_LEN];
3752 			byte	iv[ENCRYPTION_KEY_LEN];
3753 			Encryption::random_value(key);
3754 			Encryption::random_value(iv);
3755 			mtr_start(&mtr);
3756 			mtr_x_lock_space(space->id, &mtr);
3757 			memset(encrypt_info, 0,
3758 					ENCRYPTION_INFO_SIZE_V2);
3759 			if (!Encryption::fill_encryption_info(
3760 						key, iv,
3761 						encrypt_info)) {
3762 				srv_undo_log_encrypt = false;
3763 				ib::error() << cant_set_undo_tablespace
3764 					<< " number " << space_id
3765 					<< to_be_encrypted << ".";
3766 				mtr_commit(&mtr);
3767 				return;
3768 			} else {
3769 				if (!fsp_header_write_encryption(
3770 							space->id,
3771 							new_flags,
3772 							encrypt_info,
3773 							true,
3774 							&mtr)) {
3775 					srv_undo_log_encrypt = false;
3776 					ib::error() << cant_set_undo_tablespace
3777 						<< " number "
3778 						<< space_id
3779 						<< to_be_encrypted
3780 						<< ". Failed to write header"
3781 						<< " page.";
3782 					mtr_commit(&mtr);
3783 					return;
3784 				}
3785 				space->flags |=
3786 					FSP_FLAGS_MASK_ENCRYPTION;
3787 				err = fil_set_encryption(
3788 						space->id, Encryption::AES,
3789 						key, iv);
3790 				if (err != DB_SUCCESS) {
3791 					srv_undo_log_encrypt = false;
3792 					ib::error() << cant_set_undo_tablespace
3793 						<< " number "
3794 						<< space_id
3795 						<< to_be_encrypted
3796 						<< ". Error=" << err << ".";
3797 					mtr_commit(&mtr);
3798 					return;
3799 				} else {
3800 					ib::info() << "Encryption is enabled"
3801 						" for undo tablespace number "
3802 						<< space_id << ".";
3803 #ifdef UNIV_ENCRYPT_DEBUG
3804 					ut_print_buf(stderr, key, 32);
3805 					ut_print_buf(stderr, iv, 32);
3806 #endif
3807 				}
3808 			}
3809 			mtr_commit(&mtr);
3810 		}
3811 		//undo::spaces->s_unlock();
3812 		return;
3813 	}
3814 	/* If the undo log space is using default key, rotate
3815 	   it. We need the server_uuid initialized, otherwise,
3816 	   the keyname will not contains server uuid. */
3817 	if (Encryption::master_key_id != 0
3818 			|| srv_read_only_mode
3819 			|| strlen(server_uuid) == 0) {
3820 		return;
3821 	}
3822 	ulint undo_spaces[TRX_SYS_N_RSEGS + 1];
3823 	const ulint undo_spaces_no = trx_rseg_get_n_undo_tablespaces(undo_spaces);
3824 	for (ulint undo_idx = 0; undo_idx < undo_spaces_no; ++undo_idx)
3825 	{
3826 		const ulint space_id = undo_spaces[undo_idx];
3827 		ut_ad(fsp_is_undo_tablespace(space_id));
3828 		space = fil_space_get(space_id);
3829 		ut_ad(space);
3830 		if (space->encryption_type == Encryption::NONE) {
3831 			continue;
3832 		}
3833 		byte	encrypt_info[ENCRYPTION_INFO_SIZE_V2];
3834 		mtr_t	mtr;
3835 		ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
3836 		mtr_start(&mtr);
3837 		mtr_x_lock_space(space->id, &mtr);
3838 		memset(encrypt_info, 0,
3839 				ENCRYPTION_INFO_SIZE_V2);
3840 		if (!fsp_header_rotate_encryption(
3841 					space,
3842 					encrypt_info,
3843 					&mtr)) {
3844 			ib::error() << "Can't rotate encryption on undo"
3845 				" tablespace number "
3846 				<< space_id << ".";
3847 		} else {
3848 			ib::info() << "Encryption is enabled"
3849 				" for undo tablespace number "
3850 				<< space_id << ".";
3851 		}
3852 		mtr_commit(&mtr);
3853 	}
3854 }
3855