1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2008, 2009 Google Inc.
5 Copyright (c) 2009, Percona Inc.
6 
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12 
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19 
20 This program is free software; you can redistribute it and/or modify it under
21 the terms of the GNU General Public License, version 2.0, as published by the
22 Free Software Foundation.
23 
24 This program is also distributed with certain software (including but not
25 limited to OpenSSL) that is licensed under separate terms, as designated in a
26 particular file or component or in included license documentation. The authors
27 of MySQL hereby grant you an additional permission to link the program and
28 your derivative works with the separately licensed software that they have
29 included with MySQL.
30 
31 This program is distributed in the hope that it will be useful, but WITHOUT
32 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
33 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
34 for more details.
35 
36 You should have received a copy of the GNU General Public License along with
37 this program; if not, write to the Free Software Foundation, Inc.,
38 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
39 
40 *****************************************************************************/
41 
42 /** @file srv/srv0srv.cc
43  The database server main program
44 
45  Created 10/8/1995 Heikki Tuuri
46  *******************************************************/
47 
48 #ifndef UNIV_HOTBACKUP
49 #include <mysqld.h>
50 #include <sys/types.h>
51 #include <time.h>
52 
53 #include <chrono>
54 
55 #include "btr0sea.h"
56 #include "buf0flu.h"
57 #include "buf0lru.h"
58 #include "clone0api.h"
59 #include "dict0boot.h"
60 #include "dict0load.h"
61 #include "dict0stats_bg.h"
62 #include "fsp0sysspace.h"
63 #include "ha_prototypes.h"
64 #endif /* !UNIV_HOTBACKUP */
65 #include "ibuf0ibuf.h"
66 #ifndef UNIV_HOTBACKUP
67 #include "lock0lock.h"
68 #include "log0recv.h"
69 #include "mem0mem.h"
70 #include "os0proc.h"
71 #include "os0thread-create.h"
72 #include "pars0pars.h"
73 #include "que0que.h"
74 #include "row0mysql.h"
75 #include "sql_thd_internal_api.h"
76 #include "srv0mon.h"
77 
78 #include "my_dbug.h"
79 #include "my_psi_config.h"
80 
81 #endif /* !UNIV_HOTBACKUP */
82 #include "srv0srv.h"
83 #include "srv0start.h"
84 #include "sync0sync.h"
85 #ifndef UNIV_HOTBACKUP
86 #include "trx0i_s.h"
87 #include "trx0purge.h"
88 #include "usr0sess.h"
89 #include "ut0crc32.h"
90 #endif /* !UNIV_HOTBACKUP */
91 #include "ut0mem.h"
92 
93 #ifdef UNIV_HOTBACKUP
94 #include "page0size.h"
95 #else
96 /** Structure with state of srv background threads. */
97 Srv_threads srv_threads;
98 
99 /** Structure with cpu usage information. */
100 Srv_cpu_usage srv_cpu_usage;
101 #endif /* UNIV_HOTBACKUP */
102 
103 #ifdef INNODB_DD_TABLE
104 /* true when upgrading. */
105 bool srv_is_upgrade_mode = false;
106 bool srv_downgrade_logs = false;
107 bool srv_upgrade_old_undo_found = false;
108 #endif /* INNODB_DD_TABLE */
109 
110 /* Revert to old partition file name if upgrade fails. */
111 bool srv_downgrade_partition_files = false;
112 
113 /* The following is the maximum allowed duration of a lock wait. */
114 ulong srv_fatal_semaphore_wait_threshold = 600;
115 
116 /* How much data manipulation language (DML) statements need to be delayed,
117 in microseconds, in order to reduce the lagging of the purge thread. */
118 ulint srv_dml_needed_delay = 0;
119 
120 const char *srv_main_thread_op_info = "";
121 
122 /* Server parameters which are read from the initfile */
123 
124 /* The following three are dir paths which are catenated before file
125 names, where the file name itself may also contain a path */
126 
127 char *srv_data_home = nullptr;
128 
129 /** Separate directory for doublewrite files, if it is not NULL */
130 char *srv_doublewrite_dir = NULL;
131 
132 /** The innodb_directories variable value. This a list of directories
133 deliminated by ';', i.e the FIL_PATH_SEPARATOR. */
134 char *srv_innodb_directories = nullptr;
135 
136 /** Undo tablespace directories.  This can be multiple paths
137 separated by ';' and can also be absolute paths. */
138 char *srv_undo_dir = nullptr;
139 
140 /** The number of implicit undo tablespaces to use for rollback
141 segments. */
142 ulong srv_undo_tablespaces = FSP_IMPLICIT_UNDO_TABLESPACES;
143 
144 #ifndef UNIV_HOTBACKUP
145 /* The number of rollback segments per tablespace */
146 ulong srv_rollback_segments = TRX_SYS_N_RSEGS;
147 
148 /* Used for the deprecated setting innodb_undo_logs. This will still get
149 put into srv_rollback_segments if it is set to a non-default value. */
150 ulong srv_undo_logs = 0;
151 const char *deprecated_undo_logs =
152     "The parameter innodb_undo_logs is deprecated"
153     " and may be removed in future releases."
154     " Please use innodb_rollback_segments instead."
155     " See " REFMAN "innodb-undo-logs.html";
156 
157 /** Rate at which UNDO records should be purged. */
158 ulong srv_purge_rseg_truncate_frequency =
159     static_cast<ulong>(undo::TRUNCATE_FREQUENCY);
160 #endif /* !UNIV_HOTBACKUP */
161 
162 /** Enable or Disable Truncate of UNDO tablespace.
163 Note: If enabled then UNDO tablespace will be selected for truncate.
164 While Server waits for undo-tablespace to truncate if user disables
165 it, truncate action is completed but no new tablespace is marked
166 for truncate (action is never aborted). */
167 bool srv_undo_log_truncate = FALSE;
168 
169 /** Enable or disable Encrypt of UNDO tablespace. */
170 bool srv_undo_log_encrypt = FALSE;
171 
172 /** Maximum size of undo tablespace. */
173 unsigned long long srv_max_undo_tablespace_size;
174 
175 /** Default undo tablespace size in UNIV_PAGEs count (10MB). */
176 const page_no_t SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
177     ((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
178 
179 /** Maximum number of recently truncated undo tablespace IDs for
180 the same undo number. */
181 const size_t CONCURRENT_UNDO_TRUNCATE_LIMIT =
182     dict_sys_t::undo_space_id_range / 8;
183 
184 /** Set if InnoDB must operate in read-only mode. We don't do any
185 recovery and open all tables in RO mode instead of RW mode. We don't
186 sync the max trx id to disk either. */
187 bool srv_read_only_mode;
188 
189 /** store to its own file each table created by an user; data
190 dictionary tables are in the system tablespace 0 */
191 bool srv_file_per_table;
192 
193 /** Sort buffer size in index creation */
194 ulong srv_sort_buf_size = 1048576;
195 /** Maximum modification log file size for online index creation */
196 unsigned long long srv_online_max_size;
197 /** Set if InnoDB operates in read-only mode or innodb-force-recovery
198 is greater than SRV_FORCE_NO_TRX_UNDO. */
199 bool high_level_read_only;
200 
201 /** Number of threads to use for parallel reads. */
202 ulong srv_parallel_read_threads;
203 
204 /** If this flag is true, then we will use the native aio of the
205 OS (provided we compiled Innobase with it in), otherwise we will
206 use simulated aio we build below with threads. */
207 bool srv_use_native_aio = false;
208 
209 bool srv_numa_interleave = false;
210 
211 #ifdef UNIV_DEBUG
212 /** Force all user tables to use page compression. */
213 ulong srv_debug_compress;
214 /** Set when InnoDB has invoked exit(). */
215 bool innodb_calling_exit;
216 /** Used by SET GLOBAL innodb_master_thread_disabled_debug = X. */
217 bool srv_master_thread_disabled_debug;
218 #ifndef UNIV_HOTBACKUP
219 /** Event used to inform that master thread is disabled. */
220 static os_event_t srv_master_thread_disabled_event;
221 #endif /* !UNIV_HOTBACKUP */
222 #endif /* UNIV_DEBUG */
223 
224 /*------------------------- LOG FILES ------------------------ */
225 char *srv_log_group_home_dir = nullptr;
226 
227 /** Enable or disable Encrypt of REDO tablespace. */
228 bool srv_redo_log_encrypt = false;
229 
230 ulong srv_n_log_files = SRV_N_LOG_FILES_MAX;
231 
232 #ifdef UNIV_DEBUG_DEDICATED
233 ulong srv_debug_system_mem_size;
234 #endif /* UNIV_DEBUG_DEDICATED */
235 
236 /** At startup, this is the current redo log file size.
237 During startup, if this is different from srv_log_file_size_requested
238 (innodb_log_file_size), the redo log will be rebuilt and this size
239 will be initialized to srv_log_file_size_requested.
240 When upgrading from a previous redo log format, this will be set to 0,
241 and writing to the redo log is not allowed. Expressed in bytes. */
242 ulonglong srv_log_file_size;
243 
244 /** The value of the startup parameter innodb_log_file_size. */
245 ulonglong srv_log_file_size_requested;
246 
247 /** Space for log buffer, expressed in bytes. Note, that log buffer
248 will use only the largest power of two, which is not greater than
249 the assigned space. */
250 ulong srv_log_buffer_size;
251 
252 /** Size of block, used for writing ahead to avoid read-on-write. */
253 ulong srv_log_write_ahead_size;
254 
255 /** Minimum absolute value of cpu time for which spin-delay is used. */
256 uint srv_log_spin_cpu_abs_lwm;
257 
258 /** Maximum percentage of cpu time for which spin-delay is used. */
259 uint srv_log_spin_cpu_pct_hwm;
260 
261 /** Maximum value of average log flush time for which spin-delay is used.
262 When flushing takes longer, user threads no longer spin when waiting for
263 flushed redo. Expressed in microseconds. */
264 ulong srv_log_wait_for_flush_spin_hwm;
265 
266 /* EXPERIMENTAL sys vars below - we need defaults set explicitly here. */
267 
268 /** When log writer follows links in the log recent written buffer,
269 it stops when it has reached at least that many bytes to write,
270 limiting how many bytes can be written in single call. */
271 ulong srv_log_write_max_size = INNODB_LOG_WRITE_MAX_SIZE_DEFAULT;
272 
273 /** Number of events used for notifications about redo write. */
274 ulong srv_log_write_events = INNODB_LOG_EVENTS_DEFAULT;
275 
276 /** Number of events used for notifications about redo flush. */
277 ulong srv_log_flush_events = INNODB_LOG_EVENTS_DEFAULT;
278 
279 /** Number of slots in a small buffer, which is used to allow concurrent
280 writes to log buffer. The slots are addressed by LSN values modulo number
281 of the slots. */
282 ulong srv_log_recent_written_size = INNODB_LOG_RECENT_WRITTEN_SIZE_DEFAULT;
283 
284 /** Number of slots in a small buffer, which is used to break requirement
285 for total order of dirty pages, when they are added to flush lists.
286 The slots are addressed by LSN values modulo number of the slots. */
287 ulong srv_log_recent_closed_size = INNODB_LOG_RECENT_CLOSED_SIZE_DEFAULT;
288 
289 /** Number of spin iterations, when spinning and waiting for log buffer
290 written up to given LSN, before we fallback to loop with sleeps.
291 This is not used when user thread has to wait for log flushed to disk. */
292 ulong srv_log_wait_for_write_spin_delay =
293     INNODB_LOG_WAIT_FOR_WRITE_SPIN_DELAY_DEFAULT;
294 
295 /** Timeout used when waiting for redo write (microseconds). */
296 ulong srv_log_wait_for_write_timeout =
297     INNODB_LOG_WAIT_FOR_WRITE_TIMEOUT_DEFAULT;
298 
299 /** Number of spin iterations, when spinning and waiting for log flushed. */
300 ulong srv_log_wait_for_flush_spin_delay =
301     INNODB_LOG_WAIT_FOR_FLUSH_SPIN_DELAY_DEFAULT;
302 
303 /** Timeout used when waiting for redo flush (microseconds). */
304 ulong srv_log_wait_for_flush_timeout =
305     INNODB_LOG_WAIT_FOR_FLUSH_TIMEOUT_DEFAULT;
306 
307 /** Number of spin iterations, for which log writer thread is waiting
308 for new data to write or flush without sleeping. */
309 ulong srv_log_writer_spin_delay = INNODB_LOG_WRITER_SPIN_DELAY_DEFAULT;
310 
311 /** Initial timeout used to wait on writer_event. */
312 ulong srv_log_writer_timeout = INNODB_LOG_WRITER_TIMEOUT_DEFAULT;
313 
314 /** Number of milliseconds every which a periodical checkpoint is written
315 by the log checkpointer thread (unless periodical checkpoints are disabled,
316 which is a case during initial phase of startup). */
317 ulong srv_log_checkpoint_every = INNODB_LOG_CHECKPOINT_EVERY_DEFAULT;
318 
319 /** Number of spin iterations, for which log flusher thread is waiting
320 for new data to flush, without sleeping. */
321 ulong srv_log_flusher_spin_delay = INNODB_LOG_FLUSHER_SPIN_DELAY_DEFAULT;
322 
323 /** Initial timeout used to wait on flusher_event. */
324 ulong srv_log_flusher_timeout = INNODB_LOG_FLUSHER_TIMEOUT_DEFAULT;
325 
326 /** Number of spin iterations, for which log write notifier thread is waiting
327 for advanced flushed_to_disk_lsn without sleeping. */
328 ulong srv_log_write_notifier_spin_delay =
329     INNODB_LOG_WRITE_NOTIFIER_SPIN_DELAY_DEFAULT;
330 
331 /** Initial timeout used to wait on write_notifier_event. */
332 ulong srv_log_write_notifier_timeout =
333     INNODB_LOG_WRITE_NOTIFIER_TIMEOUT_DEFAULT;
334 
335 /** Number of spin iterations, for which log flush notifier thread is waiting
336 for advanced flushed_to_disk_lsn without sleeping. */
337 ulong srv_log_flush_notifier_spin_delay =
338     INNODB_LOG_FLUSH_NOTIFIER_SPIN_DELAY_DEFAULT;
339 
340 /** Initial timeout used to wait on flush_notifier_event. */
341 ulong srv_log_flush_notifier_timeout =
342     INNODB_LOG_FLUSH_NOTIFIER_TIMEOUT_DEFAULT;
343 
344 /** Number of spin iterations, for which log closerr thread is waiting
345 for a reachable untraversed link in recent_closed. */
346 ulong srv_log_closer_spin_delay = INNODB_LOG_CLOSER_SPIN_DELAY_DEFAULT;
347 
348 /** Initial sleep used in log closer after spin delay is finished. */
349 ulong srv_log_closer_timeout = INNODB_LOG_CLOSER_TIMEOUT_DEFAULT;
350 
351 /* End of EXPERIMENTAL sys vars */
352 
353 /** Whether to generate and require checksums on the redo log pages. */
354 bool srv_log_checksums;
355 
356 #ifdef UNIV_DEBUG
357 
358 bool srv_checkpoint_disabled = false;
359 
360 bool srv_inject_too_many_concurrent_trxs = false;
361 
362 #endif /* UNIV_DEBUG */
363 
364 ulong srv_flush_log_at_trx_commit = 1;
365 uint srv_flush_log_at_timeout = 1;
366 ulong srv_page_size = UNIV_PAGE_SIZE_DEF;
367 ulong srv_page_size_shift = UNIV_PAGE_SIZE_SHIFT_DEF;
368 
369 page_size_t univ_page_size(0, 0, false);
370 
371 /* Try to flush dirty pages so as to avoid IO bursts at
372 the checkpoints. */
373 bool srv_adaptive_flushing = TRUE;
374 
375 /* Allow IO bursts at the checkpoints ignoring io_capacity setting. */
376 bool srv_flush_sync = TRUE;
377 
378 /** Maximum number of times allowed to conditionally acquire
379 mutex before switching to blocking wait on the mutex */
380 #define MAX_MUTEX_NOWAIT 20
381 
382 /** Check whether the number of failed nonblocking mutex
383 acquisition attempts exceeds maximum allowed value. If so,
384 srv_printf_innodb_monitor() will request mutex acquisition
385 with mutex_enter(), which will wait until it gets the mutex. */
386 #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
387 
388 /** Dedicated server setting */
389 bool srv_dedicated_server = true;
390 /** Requested size in bytes */
391 ulint srv_buf_pool_size = ULINT_MAX;
392 /** Minimum pool size in bytes */
393 const ulint srv_buf_pool_min_size = 5 * 1024 * 1024;
394 /** Default pool size in bytes */
395 const ulint srv_buf_pool_def_size = 128 * 1024 * 1024;
396 /** Maximum pool size in bytes */
397 const longlong srv_buf_pool_max_size = LLONG_MAX;
398 /** Requested buffer pool chunk size. Each buffer pool instance consists
399 of one or more chunks. */
400 ulonglong srv_buf_pool_chunk_unit;
401 /** Minimum buffer pool chunk size. */
402 const ulonglong srv_buf_pool_chunk_unit_min = (1024 * 1024);
403 /** The buffer pool chunk size must be a multiple of this number. */
404 const ulonglong srv_buf_pool_chunk_unit_blk_sz = (1024 * 1024);
405 /** Maximum buffer pool chunk size. */
406 const ulonglong srv_buf_pool_chunk_unit_max =
407     srv_buf_pool_max_size / MAX_BUFFER_POOLS;
408 /** Requested number of buffer pool instances */
409 ulong srv_buf_pool_instances;
410 /** Default number of buffer pool instances */
411 const ulong srv_buf_pool_instances_default = 0;
412 /** Number of locks to protect buf_pool->page_hash */
413 ulong srv_n_page_hash_locks = 16;
414 /** Whether to validate InnoDB tablespace paths on startup */
415 bool srv_validate_tablespace_paths = true;
416 /** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
417 ulong srv_LRU_scan_depth = 1024;
418 /** Whether or not to flush neighbors of a block */
419 ulong srv_flush_neighbors = 1;
420 /** Previously requested size. Accesses protected by memory barriers. */
421 ulint srv_buf_pool_old_size = 0;
422 /** Current size as scaling factor for the other components */
423 ulint srv_buf_pool_base_size = 0;
424 /** Current size in bytes */
425 long long srv_buf_pool_curr_size = 0;
426 /** Dump this % of each buffer pool during BP dump */
427 ulong srv_buf_pool_dump_pct;
428 /** Lock table size in bytes */
429 ulint srv_lock_table_size = ULINT_MAX;
430 
431 const ulong srv_idle_flush_pct_default = 100;
432 ulong srv_idle_flush_pct = srv_idle_flush_pct_default;
433 
434 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
435 instead. */
436 ulong srv_n_read_io_threads;
437 ulong srv_n_write_io_threads;
438 
439 /* Switch to enable random read ahead. */
440 bool srv_random_read_ahead = FALSE;
441 /* User settable value of the number of pages that must be present
442 in the buffer cache and accessed sequentially for InnoDB to trigger a
443 readahead request. */
444 ulong srv_read_ahead_threshold = 56;
445 
446 /** Maximum on-disk size of change buffer in terms of percentage
447 of the buffer pool. */
448 uint srv_change_buffer_max_size = CHANGE_BUFFER_DEFAULT_SIZE;
449 
450 #ifndef _WIN32
451 enum srv_unix_flush_t srv_unix_file_flush_method = SRV_UNIX_FSYNC;
452 #else
453 enum srv_win_flush_t srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
454 #endif /* _WIN32 */
455 
456 ulint srv_max_n_open_files = 300;
457 
458 /* Number of IO operations per second the server can do */
459 ulong srv_io_capacity = 200;
460 ulong srv_max_io_capacity = 400;
461 
462 /* The number of page cleaner threads to use.*/
463 ulong srv_n_page_cleaners = 4;
464 
465 /* The InnoDB main thread tries to keep the ratio of modified pages
466 in the buffer pool to all database pages in the buffer pool smaller than
467 the following number. But it is not guaranteed that the value stays below
468 that during a time of heavy update/insert activity. */
469 
470 double srv_max_buf_pool_modified_pct = 75.0;
471 double srv_max_dirty_pages_pct_lwm = 0.0;
472 
473 /* This is the percentage of log capacity at which adaptive flushing,
474 if enabled, will kick in. */
475 ulong srv_adaptive_flushing_lwm = 10;
476 
477 /* Number of iterations over which adaptive flushing is averaged. */
478 ulong srv_flushing_avg_loops = 30;
479 
480 /* The number of purge threads to use.*/
481 ulong srv_n_purge_threads = 4;
482 
483 /* the number of pages to purge in one batch */
484 ulong srv_purge_batch_size = 20;
485 
486 /* Internal setting for "innodb_stats_method". Decides how InnoDB treats
487 NULL value when collecting statistics. By default, it is set to
488 SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
489 ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
490 
491 #ifndef UNIV_HOTBACKUP
492 srv_stats_t srv_stats;
493 #endif /* !UNIV_HOTBACKUP */
494 
495 /* structure to pass status variables to MySQL */
496 export_var_t export_vars;
497 
498 /** Normally 0. When nonzero, skip some phases of crash recovery,
499 starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered
500 by SELECT or mysqldump. When this is nonzero, we do not allow any user
501 modifications to the data. */
502 ulong srv_force_recovery;
503 #ifdef UNIV_DEBUG
504 /** Inject a crash at different steps of the recovery process.
505 This is for testing and debugging only. */
506 ulong srv_force_recovery_crash;
507 #endif /* UNIV_DEBUG */
508 
509 /** Print all user-level transactions deadlocks to mysqld stderr */
510 bool srv_print_all_deadlocks = FALSE;
511 
512 /** Print all DDL logs to mysqld stderr */
513 bool srv_print_ddl_logs = false;
514 
515 /** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */
516 bool srv_cmp_per_index_enabled = FALSE;
517 
518 /** If innodb redo logging is enabled. */
519 bool srv_redo_log = true;
520 
521 /** The value of the configuration parameter innodb_fast_shutdown,
522 controlling the InnoDB shutdown.
523 
524 If innodb_fast_shutdown=0, InnoDB shutdown will purge all undo log
525 records (except XA PREPARE transactions) and complete the merge of the
526 entire change buffer, and then shut down the redo log.
527 
528 If innodb_fast_shutdown=1, InnoDB shutdown will only flush the buffer
529 pool to data files, cleanly shutting down the redo log.
530 
531 If innodb_fast_shutdown=2, shutdown will effectively 'crash' InnoDB
532 (but lose no committed transactions). */
533 ulong srv_fast_shutdown;
534 
535 /* Generate a innodb_status.<pid> file */
536 ibool srv_innodb_status = FALSE;
537 
538 /* When estimating number of different key values in an index, sample
539 this many index pages, there are 2 ways to calculate statistics:
540 * persistent stats that are calculated by ANALYZE TABLE and saved
541   in the innodb database.
542 * quick transient stats, that are used if persistent stats for the given
543   table/index are not found in the innodb database */
544 unsigned long long srv_stats_transient_sample_pages = 8;
545 bool srv_stats_persistent = TRUE;
546 bool srv_stats_include_delete_marked = FALSE;
547 unsigned long long srv_stats_persistent_sample_pages = 20;
548 bool srv_stats_auto_recalc = TRUE;
549 
550 ulong srv_replication_delay = 0;
551 
552 bool srv_apply_log_only = false;
553 
554 bool srv_backup_mode = false;
555 bool srv_close_files = true;
556 bool srv_rollback_prepared_trx = false;
557 
558 /*-------------------------------------------*/
559 ulong srv_n_spin_wait_rounds = 30;
560 ulong srv_spin_wait_delay = 6;
561 ibool srv_priority_boost = TRUE;
562 
563 #ifndef UNIV_HOTBACKUP
564 static ulint srv_n_rows_inserted_old = 0;
565 static ulint srv_n_rows_updated_old = 0;
566 static ulint srv_n_rows_deleted_old = 0;
567 static ulint srv_n_rows_read_old = 0;
568 
569 static ulint srv_n_system_rows_inserted_old = 0;
570 static ulint srv_n_system_rows_updated_old = 0;
571 static ulint srv_n_system_rows_deleted_old = 0;
572 static ulint srv_n_system_rows_read_old = 0;
573 #endif /* !UNIV_HOTBACKUP */
574 
575 ulint srv_truncated_status_writes = 0;
576 
577 bool srv_print_innodb_monitor = FALSE;
578 bool srv_print_innodb_lock_monitor = FALSE;
579 
580 /* Array of English strings describing the current state of an
581 i/o handler thread */
582 
583 const char *srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
584 const char *srv_io_thread_function[SRV_MAX_N_IO_THREADS];
585 
586 #ifndef UNIV_HOTBACKUP
587 static ib_time_monotonic_t srv_last_monitor_time;
588 #endif /* !UNIV_HOTBACKUP */
589 
590 static ib_mutex_t srv_innodb_monitor_mutex;
591 
592 /** Mutex protecting page_zip_stat_per_index */
593 ib_mutex_t page_zip_stat_per_index_mutex;
594 
595 /* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
596 ib_mutex_t srv_monitor_file_mutex;
597 
598 /** Temporary file for innodb monitor output */
599 FILE *srv_monitor_file;
600 /** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode.
601 This mutex has a very low rank; threads reserving it should not
602 acquire any further latches or sleep before releasing this one. */
603 ib_mutex_t srv_misc_tmpfile_mutex;
604 /** Temporary file for miscellanous diagnostic output */
605 FILE *srv_misc_tmpfile;
606 
607 #ifndef UNIV_HOTBACKUP
608 static ulint srv_main_thread_process_no = 0;
609 static os_thread_id_t srv_main_thread_id = 0;
610 
611 /* The following counts are used by the srv_master_thread. */
612 
613 /** Iterations of the loop bounded by 'srv_active' label. */
614 static ulint srv_main_active_loops = 0;
615 /** Iterations of the loop bounded by the 'srv_idle' label. */
616 static ulint srv_main_idle_loops = 0;
617 /** Iterations of the loop bounded by the 'srv_shutdown' label. */
618 static ulint srv_main_shutdown_loops = 0;
619 /** Log writes involving flush. */
620 static ulint srv_log_writes_and_flush = 0;
621 
622 #endif /* !UNIV_HOTBACKUP */
623 
624 /* Interval in seconds at which various tasks are performed by the
625 master thread when server is active. In order to balance the workload,
626 we should try to keep intervals such that they are not multiple of
627 each other. For example, if we have intervals for various tasks
628 defined as 5, 10, 15, 60 then all tasks will be performed when
629 current_time % 60 == 0 and no tasks will be performed when
630 current_time % 5 != 0. */
631 
632 #define SRV_MASTER_DICT_LRU_INTERVAL (47)
633 
634 /** Acquire the system_mutex. */
635 #define srv_sys_mutex_enter()     \
636   do {                            \
637     mutex_enter(&srv_sys->mutex); \
638   } while (0)
639 
640 /** Test if the system mutex is owned. */
641 #define srv_sys_mutex_own() (mutex_own(&srv_sys->mutex) && !srv_read_only_mode)
642 
643 /** Release the system mutex. */
644 #define srv_sys_mutex_exit()     \
645   do {                           \
646     mutex_exit(&srv_sys->mutex); \
647   } while (0)
648 
649 #ifndef UNIV_HOTBACKUP
650 /*
651         IMPLEMENTATION OF THE SERVER MAIN PROGRAM
652         =========================================
653 
654 There is the following analogue between this database
655 server and an operating system kernel:
656 
657 DB concept			equivalent OS concept
658 ----------			---------------------
659 transaction		--	process;
660 
661 query thread		--	thread;
662 
663 lock			--	semaphore;
664 
665 kernel			--	kernel;
666 
667 query thread execution:
668 (a) without lock mutex
669 reserved		--	process executing in user mode;
670 (b) with lock mutex reserved
671                         --	process executing in kernel mode;
672 
673 The server has several backgroind threads all running at the same
674 priority as user threads. It periodically checks if here is anything
675 happening in the server which requires intervention of the master
676 thread. Such situations may be, for example, when flushing of dirty
677 blocks is needed in the buffer pool or old version of database rows
678 have to be cleaned away (purged). The user can configure a separate
679 dedicated purge thread(s) too, in which case the master thread does not
680 do any purging.
681 
682 The threads which we call user threads serve the queries of the MySQL
683 server. They run at normal priority.
684 
685 When there is no activity in the system, also the master thread
686 suspends itself to wait for an event making the server totally silent.
687 
688 There is still one complication in our server design. If a
689 background utility thread obtains a resource (e.g., mutex) needed by a user
690 thread, and there is also some other user activity in the system,
691 the user thread may have to wait indefinitely long for the
692 resource, as the OS does not schedule a background thread if
693 there is some other runnable user thread. This problem is called
694 priority inversion in real-time programming.
695 
696 One solution to the priority inversion problem would be to keep record
697 of which thread owns which resource and in the above case boost the
698 priority of the background thread so that it will be scheduled and it
699 can release the resource.  This solution is called priority inheritance
700 in real-time programming.  A drawback of this solution is that the overhead
701 of acquiring a mutex increases slightly, maybe 0.2 microseconds on a 100
702 MHz Pentium, because the thread has to call os_thread_get_curr_id.  This may
703 be compared to 0.5 microsecond overhead for a mutex lock-unlock pair. Note
704 that the thread cannot store the information in the resource , say mutex,
705 itself, because competing threads could wipe out the information if it is
706 stored before acquiring the mutex, and if it stored afterwards, the
707 information is outdated for the time of one machine instruction, at least.
708 (To be precise, the information could be stored to lock_word in mutex if
709 the machine supports atomic swap.)
710 
711 The above solution with priority inheritance may become actual in the
712 future, currently we do not implement any priority twiddling solution.
713 Our general aim is to reduce the contention of all mutexes by making
714 them more fine grained.
715 
716 The thread table contains information of the current status of each
717 thread existing in the system, and also the event semaphores used in
718 suspending the master thread and utility threads when they have nothing
719 to do.  The thread table can be seen as an analogue to the process table
720 in a traditional Unix implementation. */
721 
722 /** The server system struct */
723 struct srv_sys_t {
724   ib_mutex_t tasks_mutex; /*!< variable protecting the
725                           tasks queue */
726   UT_LIST_BASE_NODE_T(que_thr_t)
727   tasks; /*!< task queue */
728 
729   ib_mutex_t mutex;    /*!< variable protecting the
730                        fields below. */
731   ulint n_sys_threads; /*!< size of the sys_threads
732                        array */
733 
734   srv_slot_t *sys_threads; /*!< server thread table */
735 
736   ulint n_threads_active[SRV_MASTER + 1];
737   /*!< number of threads active
738   in a thread class */
739 
740   srv_stats_t::ulint_ctr_1_t activity_count; /*!< For tracking server
741                                              activity */
742 };
743 
744 static srv_sys_t *srv_sys = nullptr;
745 
746 /** Event to signal the monitor thread. */
747 os_event_t srv_monitor_event;
748 
749 /** Event to signal the error thread */
750 os_event_t srv_error_event;
751 
752 /** Event to signal the buffer pool dump/load thread */
753 os_event_t srv_buf_dump_event;
754 
755 /** Event to signal the buffer pool resize thread */
756 os_event_t srv_buf_resize_event;
757 
758 /** The buffer pool dump/load file name */
759 char *srv_buf_dump_filename;
760 
761 /** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
762 and/or load it during startup. */
763 bool srv_buffer_pool_dump_at_shutdown = true;
764 bool srv_buffer_pool_load_at_startup = true;
765 
766 /** Slot index in the srv_sys->sys_threads array for the purge thread. */
767 static const ulint SRV_PURGE_SLOT = 1;
768 
769 /** Slot index in the srv_sys->sys_threads array for the master thread. */
770 static const ulint SRV_MASTER_SLOT = 0;
771 
772 #ifdef HAVE_PSI_STAGE_INTERFACE
773 /** Performance schema stage event for monitoring ALTER TABLE progress
774 everything after flush log_make_latest_checkpoint(). */
775 PSI_stage_info srv_stage_alter_table_end = {
776     0, "alter table (end)", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
777 
778 /** Performance schema stage event for monitoring ALTER TABLE progress
779 log_make_latest_checkpoint(). */
780 PSI_stage_info srv_stage_alter_table_flush = {
781     0, "alter table (flush)", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
782 
783 /** Performance schema stage event for monitoring ALTER TABLE progress
784 row_merge_insert_index_tuples(). */
785 PSI_stage_info srv_stage_alter_table_insert = {
786     0, "alter table (insert)", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
787 
788 /** Performance schema stage event for monitoring ALTER TABLE progress
789 row_log_apply(). */
790 PSI_stage_info srv_stage_alter_table_log_index = {
791     0, "alter table (log apply index)", PSI_FLAG_STAGE_PROGRESS,
792     PSI_DOCUMENT_ME};
793 
794 /** Performance schema stage event for monitoring ALTER TABLE progress
795 row_log_table_apply(). */
796 PSI_stage_info srv_stage_alter_table_log_table = {
797     0, "alter table (log apply table)", PSI_FLAG_STAGE_PROGRESS,
798     PSI_DOCUMENT_ME};
799 
800 /** Performance schema stage event for monitoring ALTER TABLE progress
801 row_merge_sort(). */
802 PSI_stage_info srv_stage_alter_table_merge_sort = {
803     0, "alter table (merge sort)", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
804 
805 /** Performance schema stage event for monitoring ALTER TABLE progress
806 row_merge_read_clustered_index(). */
807 PSI_stage_info srv_stage_alter_table_read_pk_internal_sort = {
808     0, "alter table (read PK and internal sort)", PSI_FLAG_STAGE_PROGRESS,
809     PSI_DOCUMENT_ME};
810 
811 /** Performance schema stage event for monitoring ALTER TABLESPACE
812 ENCRYPTION progress. */
813 PSI_stage_info srv_stage_alter_tablespace_encryption = {
814     0, "alter tablespace (encryption)", PSI_FLAG_STAGE_PROGRESS,
815     PSI_DOCUMENT_ME};
816 
817 /** Performance schema stage event for monitoring buffer pool load progress. */
818 PSI_stage_info srv_stage_buffer_pool_load = {
819     0, "buffer pool load", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
820 #endif /* HAVE_PSI_STAGE_INTERFACE */
821 
822 /** Performance schema stage event for monitoring clone file copy progress. */
823 PSI_stage_info srv_stage_clone_file_copy = {
824     0, "clone (file copy)", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
825 
826 /** Performance schema stage event for monitoring clone redo copy progress. */
827 PSI_stage_info srv_stage_clone_redo_copy = {
828     0, "clone (redo copy)", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
829 
830 /** Performance schema stage event for monitoring clone page copy progress. */
831 PSI_stage_info srv_stage_clone_page_copy = {
832     0, "clone (page copy)", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
833 
834 /** Prints counters for work done by srv_master_thread. */
srv_print_master_thread_info(FILE * file)835 static void srv_print_master_thread_info(FILE *file) /* in: output stream */
836 {
837   fprintf(file,
838           "srv_master_thread loops: " ULINTPF " srv_active, " ULINTPF
839           " srv_shutdown, " ULINTPF " srv_idle\n",
840           srv_main_active_loops, srv_main_shutdown_loops, srv_main_idle_loops);
841   fprintf(file, "srv_master_thread log flush and writes: " ULINTPF "\n",
842           srv_log_writes_and_flush);
843 }
844 #endif /* !UNIV_HOTBACKUP */
845 
846 /** Sets the info describing an i/o thread current state. */
srv_set_io_thread_op_info(ulint i,const char * str)847 void srv_set_io_thread_op_info(
848     ulint i,         /*!< in: the 'segment' of the i/o thread */
849     const char *str) /*!< in: constant char string describing the
850                      state */
851 {
852   ut_a(i < SRV_MAX_N_IO_THREADS);
853 
854   srv_io_thread_op_info[i] = str;
855 }
856 
857 /** Resets the info describing an i/o thread current state. */
srv_reset_io_thread_op_info()858 void srv_reset_io_thread_op_info() {
859   for (ulint i = 0; i < UT_ARR_SIZE(srv_io_thread_op_info); ++i) {
860     srv_io_thread_op_info[i] = "not started yet";
861   }
862 }
863 
864 #ifndef UNIV_HOTBACKUP
865 #ifdef UNIV_DEBUG
866 /** Validates the type of a thread table slot.
867  @return true if ok */
srv_thread_type_validate(srv_thread_type type)868 static ibool srv_thread_type_validate(
869     srv_thread_type type) /*!< in: thread type */
870 {
871   switch (type) {
872     case SRV_NONE:
873       break;
874     case SRV_WORKER:
875     case SRV_PURGE:
876     case SRV_MASTER:
877       return (TRUE);
878   }
879   ut_error;
880 }
881 #endif /* UNIV_DEBUG */
882 
883 /** Gets the type of a thread table slot.
884  @return thread type */
srv_slot_get_type(const srv_slot_t * slot)885 static srv_thread_type srv_slot_get_type(
886     const srv_slot_t *slot) /*!< in: thread slot */
887 {
888   srv_thread_type type = slot->type;
889   ut_ad(srv_thread_type_validate(type));
890   return (type);
891 }
892 
893 /** Reserves a slot in the thread table for the current thread.
894  @return reserved slot */
srv_reserve_slot(srv_thread_type type)895 static srv_slot_t *srv_reserve_slot(
896     srv_thread_type type) /*!< in: type of the thread */
897 {
898   srv_slot_t *slot = nullptr;
899 
900   srv_sys_mutex_enter();
901 
902   ut_ad(srv_thread_type_validate(type));
903 
904   switch (type) {
905     case SRV_MASTER:
906       slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
907       break;
908 
909     case SRV_PURGE:
910       slot = &srv_sys->sys_threads[SRV_PURGE_SLOT];
911       break;
912 
913     case SRV_WORKER:
914       /* Find an empty slot, skip the master and purge slots. */
915       for (slot = &srv_sys->sys_threads[2]; slot->in_use; ++slot) {
916         ut_a(slot < &srv_sys->sys_threads[srv_sys->n_sys_threads]);
917       }
918       break;
919 
920     case SRV_NONE:
921       ut_error;
922   }
923 
924   ut_a(!slot->in_use);
925 
926   slot->in_use = TRUE;
927   slot->suspended = FALSE;
928   slot->type = type;
929 
930   ut_ad(srv_slot_get_type(slot) == type);
931 
932   ++srv_sys->n_threads_active[type];
933 
934   srv_sys_mutex_exit();
935 
936   return (slot);
937 }
938 
939 /** Suspends the calling thread to wait for the event in its thread slot.
940  @return the current signal count of the event. */
srv_suspend_thread_low(srv_slot_t * slot)941 static int64_t srv_suspend_thread_low(
942     srv_slot_t *slot) /*!< in/out: thread slot */
943 {
944   ut_ad(!srv_read_only_mode);
945   ut_ad(srv_sys_mutex_own());
946 
947   ut_ad(slot->in_use);
948 
949   srv_thread_type type = srv_slot_get_type(slot);
950 
951   switch (type) {
952     case SRV_NONE:
953       ut_error;
954 
955     case SRV_MASTER:
956       /* We have only one master thread and it
957       should be the first entry always. */
958       ut_a(srv_sys->n_threads_active[type] == 1);
959       break;
960 
961     case SRV_PURGE:
962       /* We have only one purge coordinator thread
963       and it should be the second entry always. */
964       ut_a(srv_sys->n_threads_active[type] == 1);
965       break;
966 
967     case SRV_WORKER:
968       ut_a(srv_n_purge_threads > 1);
969       ut_a(srv_sys->n_threads_active[type] > 0);
970       break;
971   }
972 
973   ut_a(!slot->suspended);
974   slot->suspended = TRUE;
975 
976   ut_a(srv_sys->n_threads_active[type] > 0);
977 
978   srv_sys->n_threads_active[type]--;
979 
980   return (os_event_reset(slot->event));
981 }
982 
983 /** Suspends the calling thread to wait for the event in its thread slot.
984  @return the current signal count of the event. */
srv_suspend_thread(srv_slot_t * slot)985 static int64_t srv_suspend_thread(srv_slot_t *slot) /*!< in/out: thread slot */
986 {
987   srv_sys_mutex_enter();
988 
989   int64_t sig_count = srv_suspend_thread_low(slot);
990 
991   srv_sys_mutex_exit();
992 
993   return (sig_count);
994 }
995 
996 /** Releases threads of the type given from suspension in the thread table.
997  NOTE! The server mutex has to be reserved by the caller!
998  @return number of threads released: this may be less than n if not
999          enough threads were suspended at the moment. */
srv_release_threads(srv_thread_type type,ulint n)1000 ulint srv_release_threads(srv_thread_type type, /*!< in: thread type */
1001                           ulint n) /*!< in: number of threads to release */
1002 {
1003   ulint i;
1004   ulint count = 0;
1005 
1006   ut_ad(srv_thread_type_validate(type));
1007   ut_ad(n > 0);
1008 
1009   srv_sys_mutex_enter();
1010 
1011   for (i = 0; i < srv_sys->n_sys_threads; i++) {
1012     srv_slot_t *slot;
1013 
1014     slot = &srv_sys->sys_threads[i];
1015 
1016     if (slot->in_use && srv_slot_get_type(slot) == type && slot->suspended) {
1017       switch (type) {
1018         case SRV_NONE:
1019           ut_error;
1020 
1021         case SRV_MASTER:
1022           /* We have only one master thread and it
1023           should be the first entry always. */
1024           ut_a(n == 1);
1025           ut_a(i == SRV_MASTER_SLOT);
1026           ut_a(srv_sys->n_threads_active[type] == 0);
1027           break;
1028 
1029         case SRV_PURGE:
1030           /* We have only one purge coordinator thread
1031           and it should be the second entry always. */
1032           ut_a(n == 1);
1033           ut_a(i == SRV_PURGE_SLOT);
1034           ut_a(srv_n_purge_threads > 0);
1035           ut_a(srv_sys->n_threads_active[type] == 0);
1036           break;
1037 
1038         case SRV_WORKER:
1039           ut_a(srv_n_purge_threads > 1);
1040           ut_a(srv_sys->n_threads_active[type] < srv_n_purge_threads - 1);
1041           break;
1042       }
1043 
1044       slot->suspended = FALSE;
1045 
1046       ++srv_sys->n_threads_active[type];
1047 
1048       os_event_set(slot->event);
1049 
1050       if (++count == n) {
1051         break;
1052       }
1053     }
1054   }
1055 
1056   srv_sys_mutex_exit();
1057 
1058   return (count);
1059 }
1060 
1061 /** Release a thread's slot. */
srv_free_slot(srv_slot_t * slot)1062 static void srv_free_slot(srv_slot_t *slot) /*!< in/out: thread slot */
1063 {
1064   srv_sys_mutex_enter();
1065 
1066   if (!slot->suspended) {
1067     /* Mark the thread as inactive. */
1068     srv_suspend_thread_low(slot);
1069   }
1070 
1071   /* Free the slot for reuse. */
1072   ut_ad(slot->in_use);
1073   slot->in_use = FALSE;
1074 
1075   srv_sys_mutex_exit();
1076 }
1077 
1078 /** Initializes the server. */
srv_init(void)1079 static void srv_init(void) {
1080   ulint n_sys_threads = 0;
1081   ulint srv_sys_sz = sizeof(*srv_sys);
1082 
1083   mutex_create(LATCH_ID_SRV_INNODB_MONITOR, &srv_innodb_monitor_mutex);
1084 
1085   ut_d(srv_threads.m_shutdown_cleanup_dbg = os_event_create());
1086 
1087   srv_threads.m_master_ready_for_dd_shutdown = os_event_create();
1088 
1089   srv_threads.m_purge_coordinator = {};
1090 
1091   srv_threads.m_purge_workers_n = srv_n_purge_threads;
1092 
1093   srv_threads.m_purge_workers =
1094       UT_NEW_ARRAY_NOKEY(IB_thread, srv_threads.m_purge_workers_n);
1095 
1096   if (!srv_read_only_mode) {
1097     /* Number of purge threads + master thread */
1098     n_sys_threads = srv_n_purge_threads + 1;
1099 
1100     srv_sys_sz += n_sys_threads * sizeof(*srv_sys->sys_threads);
1101   }
1102 
1103   srv_threads.m_page_cleaner_coordinator = {};
1104 
1105   srv_threads.m_page_cleaner_workers_n = srv_n_page_cleaners;
1106 
1107   srv_threads.m_page_cleaner_workers =
1108       UT_NEW_ARRAY_NOKEY(IB_thread, srv_threads.m_page_cleaner_workers_n);
1109 
1110   srv_sys = static_cast<srv_sys_t *>(ut_zalloc_nokey(srv_sys_sz));
1111 
1112   srv_sys->n_sys_threads = n_sys_threads;
1113 
1114   /* Even in read-only mode we flush pages related to intrinsic table
1115   and so mutex creation is needed. */
1116   {
1117     mutex_create(LATCH_ID_SRV_SYS, &srv_sys->mutex);
1118 
1119     mutex_create(LATCH_ID_SRV_SYS_TASKS, &srv_sys->tasks_mutex);
1120 
1121     srv_sys->sys_threads = (srv_slot_t *)&srv_sys[1];
1122 
1123     for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
1124       srv_slot_t *slot = &srv_sys->sys_threads[i];
1125 
1126       slot->event = os_event_create();
1127 
1128       slot->in_use = false;
1129 
1130       ut_a(slot->event);
1131     }
1132 
1133     srv_error_event = os_event_create();
1134 
1135     srv_monitor_event = os_event_create();
1136 
1137     srv_buf_dump_event = os_event_create();
1138 
1139     buf_flush_event = os_event_create();
1140 
1141     UT_LIST_INIT(srv_sys->tasks, &que_thr_t::queue);
1142   }
1143 
1144   srv_buf_resize_event = os_event_create();
1145 
1146   ut_d(srv_master_thread_disabled_event = os_event_create());
1147 
1148   /* page_zip_stat_per_index_mutex is acquired from:
1149   1. page_zip_compress() (after SYNC_FSP)
1150   2. page_zip_decompress()
1151   3. i_s_cmp_per_index_fill_low() (where SYNC_DICT is acquired)
1152   4. innodb_cmp_per_index_update(), no other latches
1153   since we do not acquire any other latches while holding this mutex,
1154   it can have very low level. We pick SYNC_ANY_LATCH for it. */
1155   mutex_create(LATCH_ID_PAGE_ZIP_STAT_PER_INDEX,
1156                &page_zip_stat_per_index_mutex);
1157 
1158   /* Create dummy indexes for infimum and supremum records */
1159 
1160   dict_ind_init();
1161 
1162   /* Initialize some INFORMATION SCHEMA internal structures */
1163   trx_i_s_cache_init(trx_i_s_cache);
1164 
1165   ut_crc32_init();
1166 
1167   dict_mem_init();
1168 }
1169 
1170 /** Frees the data structures created in srv_init(). */
srv_free(void)1171 void srv_free(void) {
1172   mutex_free(&srv_innodb_monitor_mutex);
1173   mutex_free(&page_zip_stat_per_index_mutex);
1174 
1175   {
1176     mutex_free(&srv_sys->mutex);
1177     mutex_free(&srv_sys->tasks_mutex);
1178 
1179     for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
1180       srv_slot_t *slot = &srv_sys->sys_threads[i];
1181 
1182       os_event_destroy(slot->event);
1183     }
1184 
1185     os_event_destroy(srv_error_event);
1186     os_event_destroy(srv_monitor_event);
1187     os_event_destroy(srv_buf_dump_event);
1188     os_event_destroy(buf_flush_event);
1189   }
1190 
1191   os_event_destroy(srv_buf_resize_event);
1192 
1193 #ifdef UNIV_DEBUG
1194   os_event_destroy(srv_master_thread_disabled_event);
1195   srv_master_thread_disabled_event = nullptr;
1196 #endif /* UNIV_DEBUG */
1197 
1198   trx_i_s_cache_free(trx_i_s_cache);
1199 
1200   ut_free(srv_sys);
1201 
1202   srv_sys = nullptr;
1203 
1204   if (srv_threads.m_page_cleaner_workers != nullptr) {
1205     for (size_t i = 0; i < srv_threads.m_page_cleaner_workers_n; ++i) {
1206       srv_threads.m_page_cleaner_workers[i] = {};
1207     }
1208     ut_free(srv_threads.m_page_cleaner_workers);
1209     srv_threads.m_page_cleaner_workers = nullptr;
1210   }
1211 
1212   if (srv_threads.m_purge_workers != nullptr) {
1213     for (size_t i = 0; i < srv_threads.m_purge_workers_n; ++i) {
1214       srv_threads.m_purge_workers[i] = {};
1215     }
1216     ut_free(srv_threads.m_purge_workers);
1217     srv_threads.m_purge_workers = nullptr;
1218   }
1219 
1220   os_event_destroy(srv_threads.m_master_ready_for_dd_shutdown);
1221 
1222   ut_d(os_event_destroy(srv_threads.m_shutdown_cleanup_dbg));
1223 
1224   srv_threads = {};
1225 }
1226 
1227 /** Initializes the synchronization primitives, memory system, and the thread
1228  local storage. */
srv_general_init()1229 void srv_general_init() {
1230   sync_check_init(srv_max_n_threads);
1231   /* Reset the system variables in the recovery module. */
1232   recv_sys_var_init();
1233   os_thread_open();
1234   trx_pool_init();
1235   que_init();
1236   row_mysql_init();
1237   undo_spaces_init();
1238 }
1239 
1240 /** Boots the InnoDB server. */
srv_boot(void)1241 void srv_boot(void) {
1242   /* Initialize synchronization primitives, memory management, and thread
1243   local storage */
1244 
1245   srv_general_init();
1246 
1247   /* Initialize this module */
1248 
1249   srv_init();
1250 }
1251 
1252 /** Refreshes the values used to calculate per-second averages. */
srv_refresh_innodb_monitor_stats(void)1253 static void srv_refresh_innodb_monitor_stats(void) {
1254   mutex_enter(&srv_innodb_monitor_mutex);
1255 
1256   srv_last_monitor_time = ut_time_monotonic();
1257 
1258   os_aio_refresh_stats();
1259 
1260   btr_cur_n_sea_old = btr_cur_n_sea;
1261   btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1262 
1263   log_refresh_stats(*log_sys);
1264 
1265   buf_refresh_io_stats_all();
1266 
1267   srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
1268   srv_n_rows_updated_old = srv_stats.n_rows_updated;
1269   srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
1270   srv_n_rows_read_old = srv_stats.n_rows_read;
1271 
1272   srv_n_system_rows_inserted_old = srv_stats.n_system_rows_inserted;
1273   srv_n_system_rows_updated_old = srv_stats.n_system_rows_updated;
1274   srv_n_system_rows_deleted_old = srv_stats.n_system_rows_deleted;
1275   srv_n_system_rows_read_old = srv_stats.n_system_rows_read;
1276 
1277   mutex_exit(&srv_innodb_monitor_mutex);
1278 }
1279 
1280 /**
1281 Prints info summary and info about all transactions to the file, recording the
1282 position where the part about transactions starts.
1283 @param[in]    file            output stream
1284 @param[out]   trx_start_pos   file position of the start of the list of active
1285                               transactions
1286 */
srv_printf_locks_and_transactions(FILE * file,ulint * trx_start_pos)1287 static void srv_printf_locks_and_transactions(FILE *file,
1288                                               ulint *trx_start_pos) {
1289   ut_ad(locksys::owns_exclusive_global_latch());
1290   lock_print_info_summary(file);
1291   if (trx_start_pos) {
1292     long t = ftell(file);
1293     if (t < 0) {
1294       *trx_start_pos = ULINT_UNDEFINED;
1295     } else {
1296       *trx_start_pos = (ulint)t;
1297     }
1298   }
1299   lock_print_info_all_transactions(file);
1300 }
1301 
srv_printf_innodb_monitor(FILE * file,bool nowait,ulint * trx_start_pos,ulint * trx_end)1302 bool srv_printf_innodb_monitor(FILE *file, bool nowait, ulint *trx_start_pos,
1303                                ulint *trx_end) {
1304   ulint n_reserved;
1305   ibool ret;
1306 
1307   mutex_enter(&srv_innodb_monitor_mutex);
1308 
1309   const auto current_time = ut_time_monotonic();
1310 
1311   /* We add 0.001 seconds to time_elapsed to prevent division
1312   by zero if two users happen to call SHOW ENGINE INNODB STATUS at the
1313   same time */
1314 
1315   const auto time_elapsed = current_time - srv_last_monitor_time + 0.001;
1316 
1317   srv_last_monitor_time = ut_time_monotonic();
1318 
1319   fputs("\n=====================================\n", file);
1320 
1321   ut_print_timestamp(file);
1322   fprintf(file,
1323           " INNODB MONITOR OUTPUT\n"
1324           "=====================================\n"
1325           "Per second averages calculated from the last %lu seconds\n",
1326           (ulong)time_elapsed);
1327 
1328   fputs(
1329       "-----------------\n"
1330       "BACKGROUND THREAD\n"
1331       "-----------------\n",
1332       file);
1333   srv_print_master_thread_info(file);
1334 
1335   fputs(
1336       "----------\n"
1337       "SEMAPHORES\n"
1338       "----------\n",
1339       file);
1340 
1341   sync_print(file);
1342 
1343   /* Conceptually, srv_innodb_monitor_mutex has a very high latching
1344   order level in sync0sync.h, while dict_foreign_err_mutex has a very
1345   low level 135. Therefore we can reserve the latter mutex here without
1346   a danger of a deadlock of threads. */
1347 
1348   mutex_enter(&dict_foreign_err_mutex);
1349 
1350   if (!srv_read_only_mode && ftell(dict_foreign_err_file) != 0L) {
1351     fputs(
1352         "------------------------\n"
1353         "LATEST FOREIGN KEY ERROR\n"
1354         "------------------------\n",
1355         file);
1356     ut_copy_file(file, dict_foreign_err_file);
1357   }
1358 
1359   mutex_exit(&dict_foreign_err_mutex);
1360 
1361   ret = true;
1362   if (nowait) {
1363     locksys::Global_exclusive_try_latch guard{};
1364     if (guard.owns_lock()) {
1365       srv_printf_locks_and_transactions(file, trx_start_pos);
1366     } else {
1367       fputs("FAIL TO OBTAIN LOCK MUTEX, SKIP LOCK INFO PRINTING\n", file);
1368       ret = false;
1369     }
1370   } else {
1371     locksys::Global_exclusive_latch_guard guard{};
1372     srv_printf_locks_and_transactions(file, trx_start_pos);
1373   }
1374 
1375   if (ret) {
1376     ut_ad(lock_validate());
1377 
1378     if (trx_end) {
1379       long t = ftell(file);
1380       if (t < 0) {
1381         *trx_end = ULINT_UNDEFINED;
1382       } else {
1383         *trx_end = (ulint)t;
1384       }
1385     }
1386   }
1387 
1388   fputs(
1389       "--------\n"
1390       "FILE I/O\n"
1391       "--------\n",
1392       file);
1393   os_aio_print(file);
1394 
1395   fputs(
1396       "-------------------------------------\n"
1397       "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1398       "-------------------------------------\n",
1399       file);
1400   ibuf_print(file);
1401 
1402   for (ulint i = 0; i < btr_ahi_parts; ++i) {
1403     rw_lock_s_lock(btr_search_latches[i]);
1404     ha_print_info(file, btr_search_sys->hash_tables[i]);
1405     rw_lock_s_unlock(btr_search_latches[i]);
1406   }
1407 
1408   fprintf(file, "%.2f hash searches/s, %.2f non-hash searches/s\n",
1409           (btr_cur_n_sea - btr_cur_n_sea_old) / time_elapsed,
1410           (btr_cur_n_non_sea - btr_cur_n_non_sea_old) / time_elapsed);
1411   btr_cur_n_sea_old = btr_cur_n_sea;
1412   btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1413 
1414   fputs(
1415       "---\n"
1416       "LOG\n"
1417       "---\n",
1418       file);
1419   log_print(*log_sys, file);
1420 
1421   fputs(
1422       "----------------------\n"
1423       "BUFFER POOL AND MEMORY\n"
1424       "----------------------\n",
1425       file);
1426   fprintf(file,
1427           "Total large memory allocated " ULINTPF
1428           "\n"
1429           "Dictionary memory allocated " ULINTPF "\n",
1430           os_total_large_mem_allocated, dict_sys->size);
1431 
1432   buf_print_io(file);
1433 
1434   fputs(
1435       "--------------\n"
1436       "ROW OPERATIONS\n"
1437       "--------------\n",
1438       file);
1439   fprintf(file,
1440           ULINTPF " queries inside InnoDB, " ULINTPF " queries in queue\n",
1441           srv_conc_get_active_threads(), srv_conc_get_waiting_threads());
1442 
1443   /* This is a dirty read, without holding trx_sys->mutex. */
1444   fprintf(file, ULINTPF " read views open inside InnoDB\n",
1445           trx_sys->mvcc->size());
1446 
1447   n_reserved = fil_space_get_n_reserved_extents(0);
1448   if (n_reserved > 0) {
1449     fprintf(file,
1450             ULINTPF
1451             " tablespace extents now reserved for"
1452             " B-tree split operations\n",
1453             n_reserved);
1454   }
1455 
1456   std::ostringstream msg;
1457 
1458   msg << "Process ID=" << srv_main_thread_process_no
1459       << ", Main thread ID=" << srv_main_thread_id
1460       << " , state=" << srv_main_thread_op_info;
1461 
1462   fprintf(file, "%s\n", msg.str().c_str());
1463 
1464   fprintf(file,
1465           "Number of rows inserted " ULINTPF ", updated " ULINTPF
1466           ", deleted " ULINTPF ", read " ULINTPF "\n",
1467           (ulint)srv_stats.n_rows_inserted, (ulint)srv_stats.n_rows_updated,
1468           (ulint)srv_stats.n_rows_deleted, (ulint)srv_stats.n_rows_read);
1469   fprintf(
1470       file,
1471       "%.2f inserts/s, %.2f updates/s,"
1472       " %.2f deletes/s, %.2f reads/s\n",
1473       ((ulint)srv_stats.n_rows_inserted - srv_n_rows_inserted_old) /
1474           time_elapsed,
1475       ((ulint)srv_stats.n_rows_updated - srv_n_rows_updated_old) / time_elapsed,
1476       ((ulint)srv_stats.n_rows_deleted - srv_n_rows_deleted_old) / time_elapsed,
1477       ((ulint)srv_stats.n_rows_read - srv_n_rows_read_old) / time_elapsed);
1478 
1479   fprintf(file,
1480           "Number of system rows inserted " ULINTPF ", updated " ULINTPF
1481           ", deleted " ULINTPF ", read " ULINTPF "\n",
1482           (ulint)srv_stats.n_system_rows_inserted,
1483           (ulint)srv_stats.n_system_rows_updated,
1484           (ulint)srv_stats.n_system_rows_deleted,
1485           (ulint)srv_stats.n_system_rows_read);
1486   fprintf(
1487       file,
1488       "%.2f inserts/s, %.2f updates/s,"
1489       " %.2f deletes/s, %.2f reads/s\n",
1490       ((ulint)srv_stats.n_system_rows_inserted -
1491        srv_n_system_rows_inserted_old) /
1492           time_elapsed,
1493       ((ulint)srv_stats.n_system_rows_updated - srv_n_system_rows_updated_old) /
1494           time_elapsed,
1495       ((ulint)srv_stats.n_system_rows_deleted - srv_n_system_rows_deleted_old) /
1496           time_elapsed,
1497       ((ulint)srv_stats.n_system_rows_read - srv_n_system_rows_read_old) /
1498           time_elapsed);
1499 
1500   srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
1501   srv_n_rows_updated_old = srv_stats.n_rows_updated;
1502   srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
1503   srv_n_rows_read_old = srv_stats.n_rows_read;
1504 
1505   srv_n_system_rows_inserted_old = srv_stats.n_system_rows_inserted;
1506   srv_n_system_rows_updated_old = srv_stats.n_system_rows_updated;
1507   srv_n_system_rows_deleted_old = srv_stats.n_system_rows_deleted;
1508   srv_n_system_rows_read_old = srv_stats.n_system_rows_read;
1509 
1510   fputs(
1511       "----------------------------\n"
1512       "END OF INNODB MONITOR OUTPUT\n"
1513       "============================\n",
1514       file);
1515   mutex_exit(&srv_innodb_monitor_mutex);
1516   fflush(file);
1517 
1518   return (ret);
1519 }
1520 
1521 /** Function to pass InnoDB status variables to MySQL */
srv_export_innodb_status(void)1522 void srv_export_innodb_status(void) {
1523   buf_pool_stat_t stat;
1524   buf_pools_list_size_t buf_pools_list_size;
1525   ulint LRU_len;
1526   ulint free_len;
1527   ulint flush_list_len;
1528 
1529   buf_get_total_stat(&stat);
1530   buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
1531   buf_get_total_list_size_in_bytes(&buf_pools_list_size);
1532 
1533   mutex_enter(&srv_innodb_monitor_mutex);
1534 
1535   export_vars.innodb_data_pending_reads = os_n_pending_reads;
1536 
1537   export_vars.innodb_data_pending_writes = os_n_pending_writes;
1538 
1539   export_vars.innodb_data_pending_fsyncs =
1540       fil_n_pending_log_flushes + fil_n_pending_tablespace_flushes;
1541 
1542   export_vars.innodb_data_fsyncs = os_n_fsyncs;
1543 
1544   export_vars.innodb_data_read = srv_stats.data_read;
1545 
1546   export_vars.innodb_data_reads = os_n_file_reads;
1547 
1548   export_vars.innodb_data_writes = os_n_file_writes;
1549 
1550   export_vars.innodb_data_written = srv_stats.data_written;
1551 
1552   export_vars.innodb_buffer_pool_read_requests =
1553       Counter::total(stat.m_n_page_gets);
1554 
1555   export_vars.innodb_buffer_pool_write_requests =
1556       srv_stats.buf_pool_write_requests;
1557 
1558   export_vars.innodb_buffer_pool_wait_free = srv_stats.buf_pool_wait_free;
1559 
1560   export_vars.innodb_buffer_pool_pages_flushed = srv_stats.buf_pool_flushed;
1561 
1562   export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads;
1563 
1564   export_vars.innodb_buffer_pool_read_ahead_rnd = stat.n_ra_pages_read_rnd;
1565 
1566   export_vars.innodb_buffer_pool_read_ahead = stat.n_ra_pages_read;
1567 
1568   export_vars.innodb_buffer_pool_read_ahead_evicted = stat.n_ra_pages_evicted;
1569 
1570   export_vars.innodb_buffer_pool_pages_data = LRU_len;
1571 
1572   export_vars.innodb_buffer_pool_bytes_data =
1573       buf_pools_list_size.LRU_bytes + buf_pools_list_size.unzip_LRU_bytes;
1574 
1575   export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
1576 
1577   export_vars.innodb_buffer_pool_bytes_dirty =
1578       buf_pools_list_size.flush_list_bytes;
1579 
1580   export_vars.innodb_buffer_pool_pages_free = free_len;
1581 
1582 #ifdef UNIV_DEBUG
1583   export_vars.innodb_buffer_pool_pages_latched = buf_get_latched_pages_number();
1584 #endif /* UNIV_DEBUG */
1585   export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
1586 
1587   export_vars.innodb_buffer_pool_pages_misc =
1588       buf_pool_get_n_pages() - LRU_len - free_len;
1589 
1590   export_vars.innodb_page_size = UNIV_PAGE_SIZE;
1591 
1592   export_vars.innodb_log_waits = srv_stats.log_waits;
1593 
1594   export_vars.innodb_os_log_written = srv_stats.os_log_written;
1595 
1596   export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
1597 
1598   export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
1599 
1600   export_vars.innodb_os_log_pending_writes = srv_stats.os_log_pending_writes;
1601 
1602   export_vars.innodb_log_write_requests = srv_stats.log_write_requests;
1603 
1604   export_vars.innodb_log_writes = srv_stats.log_writes;
1605 
1606   export_vars.innodb_dblwr_pages_written = srv_stats.dblwr_pages_written;
1607 
1608   export_vars.innodb_dblwr_writes = srv_stats.dblwr_writes;
1609 
1610   export_vars.innodb_pages_created = stat.n_pages_created;
1611 
1612   export_vars.innodb_pages_read = stat.n_pages_read;
1613 
1614   export_vars.innodb_pages_written = stat.n_pages_written;
1615 
1616   export_vars.innodb_redo_log_enabled = srv_redo_log;
1617 
1618   export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count;
1619 
1620   export_vars.innodb_row_lock_current_waits =
1621       srv_stats.n_lock_wait_current_count;
1622 
1623   export_vars.innodb_row_lock_time = srv_stats.n_lock_wait_time / 1000;
1624 
1625   if (srv_stats.n_lock_wait_count > 0) {
1626     export_vars.innodb_row_lock_time_avg = (ulint)(
1627         srv_stats.n_lock_wait_time / 1000 / srv_stats.n_lock_wait_count);
1628 
1629   } else {
1630     export_vars.innodb_row_lock_time_avg = 0;
1631   }
1632 
1633   export_vars.innodb_row_lock_time_max = lock_sys->n_lock_max_wait_time / 1000;
1634 
1635   export_vars.innodb_rows_read = srv_stats.n_rows_read;
1636 
1637   export_vars.innodb_rows_inserted = srv_stats.n_rows_inserted;
1638 
1639   export_vars.innodb_rows_updated = srv_stats.n_rows_updated;
1640 
1641   export_vars.innodb_rows_deleted = srv_stats.n_rows_deleted;
1642 
1643   export_vars.innodb_system_rows_read = srv_stats.n_system_rows_read;
1644 
1645   export_vars.innodb_system_rows_inserted = srv_stats.n_system_rows_inserted;
1646 
1647   export_vars.innodb_system_rows_updated = srv_stats.n_system_rows_updated;
1648 
1649   export_vars.innodb_system_rows_deleted = srv_stats.n_system_rows_deleted;
1650 
1651   export_vars.innodb_sampled_pages_read = srv_stats.n_sampled_pages_read;
1652 
1653   export_vars.innodb_sampled_pages_skipped = srv_stats.n_sampled_pages_skipped;
1654 
1655   export_vars.innodb_num_open_files = fil_n_file_opened;
1656 
1657   export_vars.innodb_truncated_status_writes = srv_truncated_status_writes;
1658 
1659   export_vars.innodb_undo_tablespaces_implicit = FSP_IMPLICIT_UNDO_TABLESPACES;
1660 
1661   undo::spaces->s_lock();
1662 
1663   export_vars.innodb_undo_tablespaces_total = undo::spaces->size();
1664 
1665   export_vars.innodb_undo_tablespaces_explicit =
1666       export_vars.innodb_undo_tablespaces_total - FSP_IMPLICIT_UNDO_TABLESPACES;
1667 
1668   export_vars.innodb_undo_tablespaces_active = 0;
1669 
1670   for (auto undo_space : undo::spaces->m_spaces) {
1671     if (undo_space->is_active()) {
1672       export_vars.innodb_undo_tablespaces_active++;
1673     }
1674   }
1675   undo::spaces->s_unlock();
1676 
1677 #ifdef UNIV_DEBUG
1678   rw_lock_s_lock(&purge_sys->latch);
1679   trx_id_t done_trx_no = purge_sys->done.trx_no;
1680 
1681   /* Purge always deals with transaction end points represented by
1682   transaction number. We are allowed to purge transactions with number
1683   below the low limit. */
1684   ReadView oldest_view;
1685   trx_sys->mvcc->clone_oldest_view(&oldest_view);
1686   trx_id_t low_limit_no = oldest_view.view_low_limit_no();
1687 
1688   rw_lock_s_unlock(&purge_sys->latch);
1689 
1690   mutex_enter(&trx_sys->mutex);
1691   /* Maximum transaction number added to history list for purge. */
1692   trx_id_t max_trx_no = trx_sys->rw_max_trx_no;
1693   mutex_exit(&trx_sys->mutex);
1694 
1695   if (done_trx_no == 0 || max_trx_no < done_trx_no) {
1696     export_vars.innodb_purge_trx_id_age = 0;
1697   } else {
1698     /* Add 1 as done_trx_no always points to the next transaction ID. */
1699     export_vars.innodb_purge_trx_id_age = (ulint)(max_trx_no - done_trx_no + 1);
1700   }
1701 
1702   if (low_limit_no == 0 || max_trx_no < low_limit_no) {
1703     export_vars.innodb_purge_view_trx_id_age = 0;
1704   } else {
1705     /* Add 1 as low_limit_no always points to the next transaction ID. */
1706     export_vars.innodb_purge_view_trx_id_age =
1707         (ulint)(max_trx_no - low_limit_no + 1);
1708   }
1709 #endif /* UNIV_DEBUG */
1710 
1711   mutex_exit(&srv_innodb_monitor_mutex);
1712 }
1713 
1714 /** A thread which prints the info output by various InnoDB monitors. */
srv_monitor_thread()1715 void srv_monitor_thread() {
1716   int64_t sig_count;
1717   ib_time_monotonic_t current_time;
1718   ib_time_monotonic_t time_elapsed;
1719   ulint mutex_skipped;
1720   bool last_srv_print_monitor = srv_print_innodb_monitor;
1721 
1722   ut_ad(!srv_read_only_mode);
1723 
1724   auto last_monitor_time = ut_time_monotonic();
1725   srv_last_monitor_time = last_monitor_time;
1726 
1727   mutex_skipped = 0;
1728 
1729 loop:
1730   /* Wake up every 5 seconds to see if we need to print
1731   monitor information or if signaled at shutdown. */
1732 
1733   sig_count = os_event_reset(srv_monitor_event);
1734 
1735   os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
1736 
1737   current_time = ut_time_monotonic();
1738 
1739   time_elapsed = current_time - last_monitor_time;
1740 
1741   if (time_elapsed > 15) {
1742     last_monitor_time = ut_time_monotonic();
1743 
1744     if (srv_print_innodb_monitor) {
1745       /* Reset mutex_skipped counter every time srv_print_innodb_monitor
1746       changes. This is to ensure we will not be blocked by lock_sys global latch
1747       for short duration information printing, such as requested by
1748       sync_array_print_long_waits() */
1749       if (!last_srv_print_monitor) {
1750         mutex_skipped = 0;
1751         last_srv_print_monitor = true;
1752       }
1753 
1754       if (!srv_printf_innodb_monitor(stderr, MUTEX_NOWAIT(mutex_skipped),
1755                                      nullptr, nullptr)) {
1756         mutex_skipped++;
1757       } else {
1758         /* Reset the counter */
1759         mutex_skipped = 0;
1760       }
1761     } else {
1762       last_srv_print_monitor = false;
1763     }
1764 
1765     /* We don't create the temp files or associated
1766     mutexes in read-only-mode */
1767 
1768     if (!srv_read_only_mode && srv_innodb_status) {
1769       mutex_enter(&srv_monitor_file_mutex);
1770       rewind(srv_monitor_file);
1771       if (!srv_printf_innodb_monitor(srv_monitor_file,
1772                                      MUTEX_NOWAIT(mutex_skipped), nullptr,
1773                                      nullptr)) {
1774         mutex_skipped++;
1775       } else {
1776         mutex_skipped = 0;
1777       }
1778 
1779       os_file_set_eof(srv_monitor_file);
1780       mutex_exit(&srv_monitor_file_mutex);
1781     }
1782   }
1783 
1784   if (srv_shutdown_state.load() < SRV_SHUTDOWN_CLEANUP) {
1785     goto loop;
1786   }
1787 }
1788 
1789 /** A thread which prints warnings about semaphore waits which have lasted
1790 too long. These can be used to track bugs which cause hangs. */
srv_error_monitor_thread()1791 void srv_error_monitor_thread() {
1792   /* number of successive fatal timeouts observed */
1793   ulint fatal_cnt = 0;
1794   lsn_t old_lsn;
1795   lsn_t new_lsn;
1796   int64_t sig_count;
1797   /* longest waiting thread for a semaphore */
1798   os_thread_id_t waiter = os_thread_get_curr_id();
1799   os_thread_id_t old_waiter = waiter;
1800   /* the semaphore that is being waited for */
1801   const void *sema = nullptr;
1802   const void *old_sema = nullptr;
1803 
1804   ut_ad(!srv_read_only_mode);
1805 
1806   old_lsn = log_get_lsn(*log_sys);
1807 
1808 loop:
1809   /* Try to track a strange bug reported by Harald Fuchs and others,
1810   where the lsn seems to decrease at times */
1811 
1812   new_lsn = log_get_lsn(*log_sys);
1813 
1814   if (new_lsn < old_lsn) {
1815     ib::error(ER_IB_MSG_1046, ulonglong{old_lsn}, ulonglong{new_lsn});
1816     ut_ad(0);
1817   }
1818 
1819   old_lsn = new_lsn;
1820 
1821   if (ut_difftime(ut_time_monotonic(), srv_last_monitor_time) > 60) {
1822     /* We referesh InnoDB Monitor values so that averages are
1823     printed from at most 60 last seconds */
1824 
1825     srv_refresh_innodb_monitor_stats();
1826   }
1827 
1828   /* Update the statistics collected for deciding LRU
1829   eviction policy. */
1830   buf_LRU_stat_update();
1831 
1832   /* In case mutex_exit is not a memory barrier, it is
1833   theoretically possible some threads are left waiting though
1834   the semaphore is already released. Wake up those threads: */
1835 
1836   sync_arr_wake_threads_if_sema_free();
1837 
1838   if (sync_array_print_long_waits(&waiter, &sema) && sema == old_sema &&
1839       os_thread_eq(waiter, old_waiter)) {
1840     fatal_cnt++;
1841     if (fatal_cnt > 10) {
1842       ib::fatal(ER_IB_MSG_1047, ulonglong{srv_fatal_semaphore_wait_threshold});
1843     }
1844   } else {
1845     fatal_cnt = 0;
1846     old_waiter = waiter;
1847     old_sema = sema;
1848   }
1849 
1850   /* Flush stderr so that a database user gets the output
1851   to possible MySQL error file */
1852 
1853   fflush(stderr);
1854 
1855   sig_count = os_event_reset(srv_error_event);
1856 
1857   os_event_wait_time_low(srv_error_event, 1000000, sig_count);
1858 
1859   if (srv_shutdown_state.load() < SRV_SHUTDOWN_CLEANUP) {
1860     goto loop;
1861   }
1862 }
1863 
1864 /** Increment the server activity count. */
srv_inc_activity_count(void)1865 void srv_inc_activity_count(void) { srv_sys->activity_count.inc(); }
1866 
1867 /** Check whether the master thread is active.
1868 This is polled during the final phase of shutdown.
1869 The first phase of server shutdown must have already been executed
1870 (or the server must not have been fully started up).
1871 @see srv_pre_dd_shutdown()
1872 @retval true	if any thread is active
1873 @retval false	if no thread is active */
srv_master_thread_is_active()1874 bool srv_master_thread_is_active() {
1875   return (srv_thread_is_active(srv_threads.m_master));
1876 }
1877 
1878 /** Tells the InnoDB server that there has been activity in the database
1879  and wakes up the master thread if it is suspended (not sleeping). Used
1880  in the MySQL interface. Note that there is a small chance that the master
1881  thread stays suspended (we do not protect our operation with the
1882  srv_sys_t->mutex, for performance reasons). */
srv_active_wake_master_thread_low()1883 void srv_active_wake_master_thread_low() {
1884   ut_ad(!srv_read_only_mode);
1885   ut_ad(!srv_sys_mutex_own());
1886 
1887   srv_inc_activity_count();
1888 
1889   if (srv_sys->n_threads_active[SRV_MASTER] == 0) {
1890     srv_slot_t *slot;
1891 
1892     srv_sys_mutex_enter();
1893 
1894     slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
1895 
1896     /* Only if the master thread has been started. */
1897 
1898     if (slot->in_use) {
1899       ut_a(srv_slot_get_type(slot) == SRV_MASTER);
1900 
1901       if (slot->suspended) {
1902         slot->suspended = FALSE;
1903 
1904         ++srv_sys->n_threads_active[SRV_MASTER];
1905 
1906         os_event_set(slot->event);
1907       }
1908     }
1909 
1910     srv_sys_mutex_exit();
1911   }
1912 }
1913 
1914 /** Tells the purge thread that there has been activity in the database
1915  and wakes up the purge thread if it is suspended (not sleeping).  Note
1916  that there is a small chance that the purge thread stays suspended
1917  (we do not protect our check with the srv_sys_t:mutex and the
1918  purge_sys->latch, for performance reasons). */
srv_wake_purge_thread_if_not_active(void)1919 void srv_wake_purge_thread_if_not_active(void) {
1920   ut_ad(!srv_sys_mutex_own());
1921 
1922   if (purge_sys->state == PURGE_STATE_RUN &&
1923       srv_sys->n_threads_active[SRV_PURGE] == 0) {
1924     srv_release_threads(SRV_PURGE, 1);
1925   }
1926 }
1927 
1928 /** Wakes up the master thread if it is suspended or being suspended. */
srv_wake_master_thread(void)1929 void srv_wake_master_thread(void) {
1930   ut_ad(!srv_sys_mutex_own());
1931 
1932   srv_inc_activity_count();
1933 
1934   srv_release_threads(SRV_MASTER, 1);
1935 }
1936 
1937 /** Get current server activity count. We don't hold srv_sys::mutex while
1938  reading this value as it is only used in heuristics.
1939  @return activity count. */
srv_get_activity_count(void)1940 ulint srv_get_activity_count(void) { return (srv_sys->activity_count); }
1941 
1942 /** Check if there has been any activity.
1943  @return false if no change in activity counter. */
srv_check_activity(ulint old_activity_count)1944 ibool srv_check_activity(
1945     ulint old_activity_count) /*!< in: old activity count */
1946 {
1947   return (srv_sys->activity_count != old_activity_count);
1948 }
1949 
1950 /** Make room in the table cache by evicting an unused table.
1951  @return number of tables evicted. */
srv_master_evict_from_table_cache(ulint pct_check)1952 static ulint srv_master_evict_from_table_cache(
1953     ulint pct_check) /*!< in: max percent to check */
1954 {
1955   ulint n_tables_evicted = 0;
1956 
1957   rw_lock_x_lock(dict_operation_lock);
1958 
1959   dict_mutex_enter_for_mysql();
1960 
1961   n_tables_evicted =
1962       dict_make_room_in_cache(innobase_get_table_cache_size(), pct_check);
1963 
1964   dict_mutex_exit_for_mysql();
1965 
1966   rw_lock_x_unlock(dict_operation_lock);
1967 
1968   return (n_tables_evicted);
1969 }
1970 
1971 /** This function prints progress message every 60 seconds during server
1972  shutdown, for any activities that master thread is pending on. */
srv_shutdown_print_master_pending(ib_time_monotonic_t * last_print_time,ulint n_tables_to_drop,ulint n_bytes_merged)1973 static void srv_shutdown_print_master_pending(
1974     ib_time_monotonic_t *last_print_time, /*!< last time the function
1975                                           print the message */
1976     ulint n_tables_to_drop,               /*!< number of tables to
1977                                           be dropped */
1978     ulint n_bytes_merged)                 /*!< number of change buffer
1979                                           just merged */
1980 {
1981   const auto current_time = ut_time_monotonic();
1982 
1983   const auto time_elapsed = current_time - *last_print_time;
1984 
1985   if (time_elapsed > 60) {
1986     *last_print_time = ut_time_monotonic();
1987 
1988     if (n_tables_to_drop) {
1989       ib::info(ER_IB_MSG_1048, ulonglong{n_tables_to_drop});
1990     }
1991 
1992     /* Check change buffer merge, we only wait for change buffer
1993     merge if it is a slow shutdown */
1994     if (!srv_fast_shutdown && n_bytes_merged) {
1995       ib::info(ER_IB_MSG_1049, ulonglong{n_bytes_merged});
1996     }
1997   }
1998 }
1999 
2000 #ifdef UNIV_DEBUG
2001 /** Waits in loop as long as master thread is disabled (debug) */
srv_master_do_disabled_loop(void)2002 static void srv_master_do_disabled_loop(void) {
2003   if (!srv_master_thread_disabled_debug) {
2004     /* We return here to avoid changing op_info. */
2005     return;
2006   }
2007 
2008   srv_main_thread_op_info = "disabled";
2009 
2010   while (srv_master_thread_disabled_debug) {
2011     os_event_set(srv_master_thread_disabled_event);
2012     if (srv_shutdown_state.load() >=
2013         SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS) {
2014       break;
2015     }
2016     os_thread_sleep(100000);
2017   }
2018 
2019   srv_main_thread_op_info = "";
2020 }
2021 
2022 /** Disables master thread. It's used by:
2023         SET GLOBAL innodb_master_thread_disabled_debug = 1 (0).
2024 @param[in]	thd		thread handle
2025 @param[in]	var		pointer to system variable
2026 @param[out]	var_ptr		where the formal string goes
2027 @param[in]	save		immediate result from check function */
srv_master_thread_disabled_debug_update(THD * thd,SYS_VAR * var,void * var_ptr,const void * save)2028 void srv_master_thread_disabled_debug_update(THD *thd, SYS_VAR *var,
2029                                              void *var_ptr, const void *save) {
2030   /* This method is protected by mutex, as every SET GLOBAL .. */
2031   ut_ad(srv_master_thread_disabled_event != nullptr);
2032 
2033   const bool disable = *static_cast<const bool *>(save);
2034 
2035   const int64_t sig_count = os_event_reset(srv_master_thread_disabled_event);
2036 
2037   srv_master_thread_disabled_debug = disable;
2038 
2039   if (disable) {
2040     os_event_wait_low(srv_master_thread_disabled_event, sig_count);
2041   }
2042 }
2043 #endif /* UNIV_DEBUG */
2044 
2045 /** Calculates difference between two timeval values.
2046 @param[in]	a	later timeval
2047 @param[in]	b	earlier timeval
2048 @return a - b; number of microseconds between b and a */
2049 MY_ATTRIBUTE((unused))
timeval_diff_us(timeval a,timeval b)2050 static int64_t timeval_diff_us(timeval a, timeval b) {
2051   return ((a.tv_sec - b.tv_sec) * 1000000LL + a.tv_usec - b.tv_usec);
2052 }
2053 
2054 #ifdef UNIV_LINUX
2055 
2056 /** Updates statistics about current CPU usage. */
srv_update_cpu_usage()2057 static void srv_update_cpu_usage() {
2058   using Clock = std::chrono::high_resolution_clock;
2059   using Clock_point = std::chrono::time_point<Clock>;
2060 
2061   static Clock_point last_time = Clock::now();
2062 
2063   static timeval last_cpu_utime;
2064   static timeval last_cpu_stime;
2065   static bool last_cpu_times_set = false;
2066 
2067   Clock_point cur_time = Clock::now();
2068 
2069   const auto time_diff = std::chrono::duration_cast<std::chrono::microseconds>(
2070                              cur_time - last_time)
2071                              .count();
2072 
2073   if (time_diff < 100 * 1000LL) {
2074     return;
2075   }
2076   last_time = cur_time;
2077 
2078   rusage usage;
2079   if (getrusage(RUSAGE_SELF, &usage) != 0) {
2080     return;
2081   }
2082 
2083   if (!last_cpu_times_set) {
2084     last_cpu_utime = usage.ru_utime;
2085     last_cpu_stime = usage.ru_stime;
2086     last_cpu_times_set = true;
2087     return;
2088   }
2089 
2090   const auto cpu_utime_diff = timeval_diff_us(usage.ru_utime, last_cpu_utime);
2091   last_cpu_utime = usage.ru_utime;
2092 
2093   const auto cpu_stime_diff = timeval_diff_us(usage.ru_stime, last_cpu_stime);
2094   last_cpu_stime = usage.ru_stime;
2095 
2096   /* Calculate absolute. */
2097 
2098   double cpu_utime = cpu_utime_diff * 100.0 / time_diff;
2099   MONITOR_SET(MONITOR_CPU_UTIME_ABS, int64_t(cpu_utime));
2100   srv_cpu_usage.utime_abs = cpu_utime;
2101 
2102   double cpu_stime = cpu_stime_diff * 100.0 / time_diff;
2103   MONITOR_SET(MONITOR_CPU_STIME_ABS, int64_t(cpu_stime));
2104   srv_cpu_usage.stime_abs = cpu_stime;
2105 
2106   /* Calculate relative. */
2107 
2108   cpu_set_t cs;
2109   CPU_ZERO(&cs);
2110   if (sched_getaffinity(0, sizeof(cs), &cs) != 0) {
2111     return;
2112   }
2113 
2114   int n_cpu = 0;
2115   constexpr int MAX_CPU_N = 128;
2116   for (int i = 0; i < MAX_CPU_N; ++i) {
2117     if (CPU_ISSET(i, &cs)) {
2118       ++n_cpu;
2119     }
2120   }
2121 
2122   srv_cpu_usage.n_cpu = n_cpu;
2123   MONITOR_SET(MONITOR_CPU_N, int64_t(n_cpu));
2124 
2125   if (n_cpu == 0) {
2126     return;
2127   }
2128 
2129   cpu_utime /= n_cpu;
2130   MONITOR_SET(MONITOR_CPU_UTIME_PCT, int64_t(cpu_utime));
2131   srv_cpu_usage.utime_pct = cpu_utime;
2132 
2133   cpu_stime /= n_cpu;
2134   MONITOR_SET(MONITOR_CPU_STIME_PCT, int64_t(cpu_stime));
2135   srv_cpu_usage.stime_pct = cpu_stime;
2136 }
2137 #else /* !UNIV_LINUX */
2138 #ifdef _WIN32
2139 /** Convert a FILETIME to microseconds.
2140 Do not cast a pointer to a FILETIME structure to either a ULARGE_INTEGER* or
2141 __int64* value because it can cause alignment faults on 64-bit Windows.
2142 */
FILETIME_to_microseconds(const FILETIME & ft)2143 static uint64 FILETIME_to_microseconds(const FILETIME &ft) {
2144   ULARGE_INTEGER ulg;
2145   ulg.HighPart = ft.dwHighDateTime;
2146   ulg.LowPart = ft.dwLowDateTime;
2147   return ulg.QuadPart / 10;
2148 }
2149 
2150 /** Updates statistics about current CPU usage. */
srv_update_cpu_usage()2151 static void srv_update_cpu_usage() {
2152   using Clock = std::chrono::high_resolution_clock;
2153   using Clock_point = std::chrono::time_point<Clock>;
2154 
2155   static Clock_point last_time = Clock::now();
2156 
2157   static uint64 last_cpu_utime;
2158   static uint64 last_cpu_stime;
2159   static bool last_cpu_times_set = false;
2160 
2161   Clock_point cur_time = Clock::now();
2162 
2163   const auto time_diff = std::chrono::duration_cast<std::chrono::microseconds>(
2164                              cur_time - last_time)
2165                              .count();
2166 
2167   if (time_diff < 100 * 1000LL) {
2168     return;
2169   }
2170   last_time = cur_time;
2171 
2172   FILETIME process_creation_time;
2173   FILETIME process_exit_time;
2174   FILETIME process_kernel_time;
2175   FILETIME process_user_time;
2176 
2177   if (!GetProcessTimes(GetCurrentProcess(), &process_creation_time,
2178                        &process_exit_time, &process_kernel_time,
2179                        &process_user_time)) {
2180     return;
2181   }
2182 
2183   uint64 cur_cpu_utime = FILETIME_to_microseconds(process_user_time);
2184   uint64 cur_cpu_stime = FILETIME_to_microseconds(process_kernel_time);
2185   if (!last_cpu_times_set) {
2186     last_cpu_utime = cur_cpu_utime;
2187     last_cpu_stime = cur_cpu_stime;
2188     last_cpu_times_set = true;
2189     return;
2190   }
2191 
2192   const auto cpu_utime_diff = cur_cpu_utime - last_cpu_utime;
2193   last_cpu_utime = cur_cpu_utime;
2194 
2195   const auto cpu_stime_diff = cur_cpu_stime - last_cpu_stime;
2196   last_cpu_stime = cur_cpu_stime;
2197 
2198   /* Calculate absolute. */
2199 
2200   double cpu_utime = cpu_utime_diff * 100.0 / time_diff;
2201   MONITOR_SET(MONITOR_CPU_UTIME_ABS, int64_t(cpu_utime));
2202   srv_cpu_usage.utime_abs = cpu_utime;
2203 
2204   double cpu_stime = cpu_stime_diff * 100.0 / time_diff;
2205   MONITOR_SET(MONITOR_CPU_STIME_ABS, int64_t(cpu_stime));
2206   srv_cpu_usage.stime_abs = cpu_stime;
2207 
2208   /* Calculate relative. */
2209 
2210   DWORD_PTR process_affinity_mask;
2211   DWORD_PTR system_affinity_mask;
2212   if (!GetProcessAffinityMask(GetCurrentProcess(), &process_affinity_mask,
2213                               &system_affinity_mask)) {
2214     return;
2215   }
2216 
2217   /* If the system has more than 64 processors and the current process
2218      contains threads in multiple groups, GetProcessAffinityMask returns
2219      zero for both affinity masks.
2220   */
2221   if ((process_affinity_mask == 0) && (system_affinity_mask == 0)) {
2222     return;
2223   }
2224 
2225   int n_cpu = 0;
2226   constexpr int MAX_CPU_N = 64;
2227   uint64 j = 1;
2228   for (int i = 0; i < MAX_CPU_N; ++i) {
2229     if (j & process_affinity_mask) {
2230       ++n_cpu;
2231     }
2232     j = j << 1;
2233   }
2234 
2235   srv_cpu_usage.n_cpu = n_cpu;
2236   MONITOR_SET(MONITOR_CPU_N, int64_t(n_cpu));
2237 
2238   if (n_cpu == 0) {
2239     return;
2240   }
2241 
2242   cpu_utime /= n_cpu;
2243   MONITOR_SET(MONITOR_CPU_UTIME_PCT, int64_t(cpu_utime));
2244   srv_cpu_usage.utime_pct = cpu_utime;
2245 
2246   cpu_stime /= n_cpu;
2247   MONITOR_SET(MONITOR_CPU_STIME_PCT, int64_t(cpu_stime));
2248   srv_cpu_usage.stime_pct = cpu_stime;
2249 }
2250 #else
srv_update_cpu_usage()2251 static void srv_update_cpu_usage() {
2252   srv_cpu_usage.utime_pct = 0;
2253   srv_cpu_usage.utime_abs = 0;
2254   srv_cpu_usage.stime_pct = 0;
2255   srv_cpu_usage.stime_abs = 0;
2256   srv_cpu_usage.n_cpu = 1;
2257 }
2258 #endif
2259 
2260 #endif /* UNIV_LINUX || WIN32 */
2261 
2262 /** Perform the tasks that the master thread is supposed to do when the
2263  server is active. There are two types of tasks. The first category is
2264  of such tasks which are performed at each inovcation of this function.
2265  We assume that this function is called roughly every second when the
2266  server is active. The second category is of such tasks which are
2267  performed at some interval e.g.: purge, dict_LRU cleanup etc. */
srv_master_do_active_tasks(void)2268 static void srv_master_do_active_tasks(void) {
2269   const auto cur_time = ut_time_monotonic();
2270   auto counter_time = ut_time_monotonic_us();
2271 
2272   /* First do the tasks that we are suppose to do at each
2273   invocation of this function. */
2274 
2275   ++srv_main_active_loops;
2276 
2277   MONITOR_INC(MONITOR_MASTER_ACTIVE_LOOPS);
2278 
2279   /* ALTER TABLE in MySQL requires on Unix that the table handler
2280   can drop tables lazily after there no longer are SELECT
2281   queries to them. */
2282   srv_main_thread_op_info = "doing background drop tables";
2283   row_drop_tables_for_mysql_in_background();
2284   MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
2285                                  counter_time);
2286 
2287   ut_d(srv_master_do_disabled_loop());
2288 
2289   if (srv_shutdown_state.load() >=
2290       SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS) {
2291     return;
2292   }
2293 
2294   /* Do an ibuf merge */
2295   srv_main_thread_op_info = "doing insert buffer merge";
2296   counter_time = ut_time_monotonic_us();
2297   ibuf_merge_in_background(false);
2298   MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_IBUF_MERGE_MICROSECOND,
2299                                  counter_time);
2300 
2301   /* Now see if various tasks that are performed at defined
2302   intervals need to be performed. */
2303 
2304   if (srv_shutdown_state.load() >=
2305       SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS) {
2306     return;
2307   }
2308 
2309   srv_update_cpu_usage();
2310 
2311   if (trx_sys->rseg_history_len > 0) {
2312     srv_wake_purge_thread_if_not_active();
2313   }
2314 
2315   if (cur_time % SRV_MASTER_DICT_LRU_INTERVAL == 0) {
2316     srv_main_thread_op_info = "enforcing dict cache limit";
2317     ulint n_evicted = srv_master_evict_from_table_cache(50);
2318     if (n_evicted != 0) {
2319       MONITOR_INC_VALUE(MONITOR_SRV_DICT_LRU_EVICT_COUNT, n_evicted);
2320     }
2321     MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_DICT_LRU_MICROSECOND,
2322                                    counter_time);
2323   }
2324 }
2325 
2326 /** Perform the tasks that the master thread is supposed to do whenever the
2327  server is idle. We do check for the server state during this function
2328  and if the server has entered the shutdown phase we may return from
2329  the function without completing the required tasks.
2330  Note that the server can move to active state when we are executing this
2331  function but we don't check for that as we are suppose to perform more
2332  or less same tasks when server is active. */
srv_master_do_idle_tasks(void)2333 static void srv_master_do_idle_tasks(void) {
2334   uintmax_t counter_time;
2335 
2336   ++srv_main_idle_loops;
2337 
2338   MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS);
2339 
2340   /* ALTER TABLE in MySQL requires on Unix that the table handler
2341   can drop tables lazily after there no longer are SELECT
2342   queries to them. */
2343   counter_time = ut_time_monotonic_us();
2344   srv_main_thread_op_info = "doing background drop tables";
2345   row_drop_tables_for_mysql_in_background();
2346   MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
2347                                  counter_time);
2348 
2349   ut_d(srv_master_do_disabled_loop());
2350 
2351   if (srv_shutdown_state.load() >=
2352       SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS) {
2353     return;
2354   }
2355 
2356   /* Do an ibuf merge */
2357   counter_time = ut_time_monotonic_us();
2358   srv_main_thread_op_info = "doing insert buffer merge";
2359   ibuf_merge_in_background(true);
2360   MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_IBUF_MERGE_MICROSECOND,
2361                                  counter_time);
2362 
2363   if (srv_shutdown_state.load() >=
2364       SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS) {
2365     return;
2366   }
2367 
2368   srv_update_cpu_usage();
2369 
2370   if (trx_sys->rseg_history_len > 0) {
2371     srv_wake_purge_thread_if_not_active();
2372   }
2373 
2374   srv_main_thread_op_info = "enforcing dict cache limit";
2375   ulint n_evicted = srv_master_evict_from_table_cache(100);
2376   if (n_evicted != 0) {
2377     MONITOR_INC_VALUE(MONITOR_SRV_DICT_LRU_EVICT_COUNT, n_evicted);
2378   }
2379   MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_DICT_LRU_MICROSECOND,
2380                                  counter_time);
2381 }
2382 
2383 /** Perform the tasks during pre_dd_shutdown phase. The tasks that we do
2384  depend on srv_fast_shutdown:
2385  2 => very fast shutdown => do no book keeping
2386  0, 1 => normal or slow shutdown => clear drop table queue
2387  @param[in,out]   last_print_time       last time log message (about pending
2388                                         operations of shutdown) was printed
2389  @return true if there might be some work left to be done, false otherwise */
srv_master_do_pre_dd_shutdown_tasks(ib_time_monotonic_t * last_print_time)2390 static bool srv_master_do_pre_dd_shutdown_tasks(
2391     ib_time_monotonic_t *last_print_time) /*!< last time the function
2392                                           print the message */
2393 {
2394   ulint n_tables_to_drop = 0;
2395 
2396   ut_ad(!srv_read_only_mode);
2397 
2398   ++srv_main_shutdown_loops;
2399 
2400   ut_a(srv_shutdown_state_matches([](auto state) {
2401     return state == SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS ||
2402            state == SRV_SHUTDOWN_EXIT_THREADS;
2403   }));
2404 
2405   /* In very fast shutdown none of the following is necessary */
2406   if (srv_fast_shutdown == 2) {
2407     return (false);
2408   }
2409 
2410   /* ALTER TABLE in MySQL requires on Unix that the table handler
2411   can drop tables lazily after there no longer are SELECT
2412   queries to them. */
2413   if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
2414     srv_main_thread_op_info = "doing background drop tables";
2415     n_tables_to_drop = row_drop_tables_for_mysql_in_background();
2416   }
2417 
2418   /* Print progress message every 60 seconds during shutdown */
2419   srv_shutdown_print_master_pending(last_print_time, n_tables_to_drop, 0);
2420 
2421   return (n_tables_to_drop != 0);
2422 }
2423 
2424 /** Perform the tasks during shutdown. The tasks that we do at shutdown
2425  depend on srv_fast_shutdown:
2426  1, 2 => very fast shutdown => do no book keeping
2427  0 => slow shutdown => do ibuf merge
2428  @param[in,out]   last_print_time       last time log message (about pending
2429                                         operations of shutdown) was printed
2430  @return true if there might be some work left to be done, false otherwise */
srv_master_do_shutdown_tasks(ib_time_monotonic_t * last_print_time)2431 static bool srv_master_do_shutdown_tasks(
2432     ib_time_monotonic_t *last_print_time) /*!< last time the function
2433                                           print the message */
2434 {
2435   ulint n_bytes_merged = 0;
2436 
2437   ut_ad(!srv_read_only_mode);
2438 
2439   ++srv_main_shutdown_loops;
2440 
2441   ut_a(srv_shutdown_state_matches([](auto state) {
2442     return state == SRV_SHUTDOWN_MASTER_STOP ||
2443            state == SRV_SHUTDOWN_EXIT_THREADS;
2444   }));
2445 
2446   /* In very fast shutdown none of the following is necessary */
2447   if (srv_fast_shutdown >= 1) {
2448     return (false);
2449   }
2450 
2451   /* In case of slow shutdown we do ibuf merge (unless innodb_force_recovery
2452   is greater or equal to SRV_FORCE_NO_IBUF_MERGE). */
2453   if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
2454     srv_main_thread_op_info = "doing insert buffer merge";
2455     n_bytes_merged = ibuf_merge_in_background(true);
2456   }
2457 
2458   /* Print progress message every 60 seconds during shutdown */
2459   srv_shutdown_print_master_pending(last_print_time, 0, n_bytes_merged);
2460 
2461   return (n_bytes_merged != 0);
2462 }
2463 
undo_rotate_default_master_key()2464 void undo_rotate_default_master_key() {
2465   fil_space_t *space;
2466 
2467   if (srv_shutdown_state.load() >= SRV_SHUTDOWN_CLEANUP) {
2468     return;
2469   }
2470 
2471   /* If the undo log space is using default key, rotate
2472   it. We need the server_uuid initialized, otherwise,
2473   the keyname will not contains server uuid. */
2474   if (Encryption::get_master_key_id() != 0 || srv_read_only_mode ||
2475       strlen(server_uuid) == 0) {
2476     return;
2477   }
2478 
2479   DBUG_EXECUTE_IF("skip_rotating_default_master_key", return;);
2480 
2481   undo::spaces->s_lock();
2482   for (auto undo_space : undo::spaces->m_spaces) {
2483     ut_ad(fsp_is_undo_tablespace(undo_space->id()));
2484 
2485     space = fil_space_get(undo_space->id());
2486 
2487     if (space == nullptr || space->encryption_type == Encryption::NONE) {
2488       continue;
2489     }
2490 
2491     byte encrypt_info[Encryption::INFO_SIZE];
2492     mtr_t mtr;
2493 
2494     ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
2495 
2496     /* Make sure that there is enough reusable
2497     space in the redo log files. */
2498     log_free_check();
2499 
2500     mtr_start(&mtr);
2501 
2502     mtr_x_lock_space(space, &mtr);
2503 
2504     memset(encrypt_info, 0, Encryption::INFO_SIZE);
2505 
2506     if (!fsp_header_rotate_encryption(space, encrypt_info, &mtr)) {
2507       ib::error(ER_IB_MSG_1056, undo_space->space_name());
2508     } else {
2509       ib::info(ER_IB_MSG_1057, undo_space->space_name());
2510     }
2511     mtr_commit(&mtr);
2512   }
2513   undo::spaces->s_unlock();
2514 }
2515 
2516 /* Enable REDO tablespace encryption */
srv_enable_redo_encryption(bool is_boot)2517 bool srv_enable_redo_encryption(bool is_boot) {
2518   /* Start to encrypt the redo log block from now on. */
2519   fil_space_t *space = fil_space_get(dict_sys_t::s_log_space_first_id);
2520 
2521   /* While enabling encryption, make sure not to overwrite the tablespace
2522   key. */
2523   if (FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
2524     return false;
2525   }
2526 
2527   dberr_t err;
2528   byte key[Encryption::KEY_LEN];
2529   byte iv[Encryption::KEY_LEN];
2530 
2531   Encryption::random_value(key);
2532   Encryption::random_value(iv);
2533 
2534   if (!log_write_encryption(key, iv, is_boot)) {
2535     ib::error(ER_IB_MSG_1243);
2536     return true;
2537   }
2538 
2539   fsp_flags_set_encryption(space->flags);
2540   err = fil_set_encryption(space->id, Encryption::AES, key, iv);
2541   if (err != DB_SUCCESS) {
2542     ib::warn(ER_IB_MSG_1244);
2543     return true;
2544   }
2545 
2546   /* Announce encryption is successfully enabled for the redo log. */
2547   ib::info(ER_IB_MSG_1245);
2548   return false;
2549 }
2550 
2551 /* Set encryption for UNDO tablespace with given space id. */
set_undo_tablespace_encryption(space_id_t space_id,mtr_t * mtr,bool is_boot)2552 bool set_undo_tablespace_encryption(space_id_t space_id, mtr_t *mtr,
2553                                     bool is_boot) {
2554   ut_ad(fsp_is_undo_tablespace(space_id));
2555   fil_space_t *space = fil_space_get(space_id);
2556 
2557   dberr_t err;
2558   byte encrypt_info[Encryption::INFO_SIZE];
2559   byte key[Encryption::KEY_LEN];
2560   byte iv[Encryption::KEY_LEN];
2561 
2562   Encryption::random_value(key);
2563   Encryption::random_value(iv);
2564 
2565   /* 0 fill encryption info */
2566   memset(encrypt_info, 0, Encryption::INFO_SIZE);
2567 
2568   /* Fill up encryption info to be set */
2569   if (!Encryption::fill_encryption_info(key, iv, encrypt_info, is_boot, true)) {
2570     ib::error(ER_IB_MSG_1052, space->name);
2571     return true;
2572   }
2573 
2574   uint32_t new_flags = space->flags | FSP_FLAGS_MASK_ENCRYPTION;
2575 
2576   /* Write encryption info on tablespace header page */
2577   if (!fsp_header_write_encryption(space->id, new_flags, encrypt_info, true,
2578                                    false, mtr)) {
2579     ib::error(ER_IB_MSG_1053, space->name);
2580     return true;
2581   }
2582 
2583   /* Update In-Mem encryption information for UNDO tablespace */
2584   fsp_flags_set_encryption(space->flags);
2585   err = fil_set_encryption(space->id, Encryption::AES, key, iv);
2586   if (err != DB_SUCCESS) {
2587     ib::error(ER_IB_MSG_1054, space->name, int{err}, ut_strerr(err));
2588     return true;
2589   }
2590 
2591   return false;
2592 }
2593 
2594 /* Enable UNDO tablespace encryption */
srv_enable_undo_encryption(bool is_boot)2595 bool srv_enable_undo_encryption(bool is_boot) {
2596   /* Make sure undo::ddl_mutex is owned. */
2597   ut_ad(mutex_own(&undo::ddl_mutex));
2598 
2599   /* Traverse over all UNDO tablespaces and mark them encrypted. */
2600   undo::spaces->s_lock();
2601   for (auto undo_space : undo::spaces->m_spaces) {
2602     /* Skip system tablespace. */
2603     if (undo_space->id() == TRX_SYS_SPACE) {
2604       continue;
2605     }
2606 
2607     fil_space_t *space = fil_space_get(undo_space->id());
2608     ut_ad(fsp_is_undo_tablespace(undo_space->id()));
2609 
2610     /* While enabling encryption, make sure not to overwrite the tablespace key.
2611     Otherwise, pages encrypted with the old tablespace key can't be read. */
2612     if (FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
2613       continue;
2614     }
2615 
2616     undo_space->rsegs()->s_lock();
2617 
2618     /* Make sure that there is enough reusable space in the redo log files. */
2619     log_free_check();
2620 
2621     mtr_t mtr;
2622     mtr_start(&mtr);
2623     mtr_x_lock_space(space, &mtr);
2624 
2625     if (set_undo_tablespace_encryption(undo_space->id(), &mtr, is_boot)) {
2626       mtr_commit(&mtr);
2627       undo_space->rsegs()->s_unlock();
2628       undo::spaces->s_unlock();
2629       return true;
2630     }
2631 
2632     mtr_commit(&mtr);
2633     undo_space->rsegs()->s_unlock();
2634 
2635     /* Announce encryption is successfully enabled for the undo tablespace. */
2636     ib::info(ER_IB_MSG_1055, undo_space->space_name());
2637   }
2638   undo::spaces->s_unlock();
2639 
2640   return false;
2641 }
2642 
2643 /** Puts master thread to sleep. At this point we are using polling to
2644  service various activities. Master thread sleeps for one second before
2645  checking the state of the server again */
srv_master_sleep(void)2646 static void srv_master_sleep(void) {
2647   srv_main_thread_op_info = "sleeping";
2648   os_thread_sleep(1000000);
2649   srv_main_thread_op_info = "";
2650 }
2651 
2652 /** Check redo and undo log encryption and rotate default master key. */
srv_sys_check_set_encryption()2653 static void srv_sys_check_set_encryption() {
2654   /* Rotate default master key for redo log encryption if it is set */
2655   if (srv_redo_log_encrypt) {
2656     fil_space_t *space = fil_space_get(dict_sys_t::s_log_space_first_id);
2657     ut_a(space);
2658 
2659     /* Encryption for redo tablespace must already have been set. This is
2660     safeguard to encrypt it if not done earlier. */
2661     ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
2662 
2663     if (!FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
2664       ib::warn(ER_IB_MSG_1285, space->name, "srv_redo_log_encrypt");
2665       srv_enable_redo_encryption(false);
2666     }
2667     redo_rotate_default_master_key();
2668   }
2669 
2670   if (!srv_undo_log_encrypt) {
2671     return;
2672   }
2673 
2674   /* Rotate default master key for undo log encryption if it is set */
2675   ut_ad(!undo::spaces->empty());
2676 
2677   mutex_enter(&undo::ddl_mutex);
2678 
2679   bool encrypt_undo = false;
2680   undo::spaces->s_lock();
2681   for (auto &undo_ts : undo::spaces->m_spaces) {
2682     fil_space_t *space = fil_space_get(undo_ts->id());
2683     ut_ad(space != nullptr);
2684 
2685     /* Encryption for undo tablespace must already have been set. This is
2686     safeguard to encrypt it if not done earlier. */
2687     ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
2688     if (!FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
2689       ib::warn(ER_IB_MSG_1285, space->name, "srv_undo_log_encrypt");
2690       /* No need to loop further as srv_enable_undo_encryption() would
2691       loop through all UNDO tablespaces and encrypt. */
2692       encrypt_undo = true;
2693       break;
2694     }
2695   }
2696   undo::spaces->s_unlock();
2697 
2698   if (encrypt_undo) {
2699     ut_d(bool ret =) srv_enable_undo_encryption(false);
2700     ut_ad(!ret);
2701   }
2702   undo_rotate_default_master_key();
2703   mutex_exit(&undo::ddl_mutex);
2704 }
2705 
2706 /** Waits on event in provided slot.
2707 @param[in]   slot     slot reserved as SRV_MASTER */
srv_master_wait(srv_slot_t * slot)2708 static void srv_master_wait(srv_slot_t *slot) {
2709   srv_main_thread_op_info = "suspending";
2710 
2711   srv_suspend_thread(slot);
2712 
2713   /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
2714   waits for database activity to die down when converting < 4.1.x
2715   databases, and relies on this string being exactly as it is. InnoDB
2716   manual also mentions this string in several places. */
2717   srv_main_thread_op_info = "waiting for server activity";
2718 
2719   os_event_wait(slot->event);
2720 }
2721 
2722 /** Executes the main loop of the master thread.
2723 @param[in]   slot     slot reserved as SRV_MASTER */
srv_master_main_loop(srv_slot_t * slot)2724 static void srv_master_main_loop(srv_slot_t *slot) {
2725   if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2726     /* When innodb_force_recovery is at least SRV_FORCE_NO_BACKGROUND,
2727     we avoid performing active/idle master's tasks. However, we still
2728     need to ensure that:
2729       srv_shutdown_state >= SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS,
2730     after we exited srv_master_main_loop(). Keep waiting until that
2731     is satisfied and then exit. */
2732     while (srv_shutdown_state.load() <
2733            SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS) {
2734       srv_master_wait(slot);
2735     }
2736     return;
2737   }
2738 
2739   ulint old_activity_count = srv_get_activity_count();
2740 
2741   while (srv_shutdown_state.load() <
2742          SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS) {
2743     srv_master_sleep();
2744 
2745     MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP);
2746 
2747     /* Just in case - if there is not much free space in redo,
2748     try to avoid asking for troubles because of extra work
2749     performed in such background thread. */
2750     srv_main_thread_op_info = "checking free log space";
2751     log_free_check();
2752 
2753     if (srv_check_activity(old_activity_count)) {
2754       old_activity_count = srv_get_activity_count();
2755       srv_master_do_active_tasks();
2756     } else {
2757       srv_master_do_idle_tasks();
2758     }
2759 
2760     /* Make sure that early encryption processing of UNDO/REDO log is done. */
2761     if (!is_early_redo_undo_encryption_done()) {
2762       continue;
2763     }
2764 
2765     /* Let clone wait when redo/undo log encryption is set. If clone is already
2766     in progress we skip the check and come back later. */
2767     if (!clone_mark_wait()) {
2768       continue;
2769     }
2770 
2771     /* Check encryption property for system tablespaces. */
2772     srv_sys_check_set_encryption();
2773 
2774     /* Allow any blocking clone to progress. */
2775     clone_mark_free();
2776   }
2777 }
2778 
2779 /** Executes pre_dd_shutdown tasks in the master thread. */
srv_master_pre_dd_shutdown_loop()2780 static void srv_master_pre_dd_shutdown_loop() {
2781   ut_a(srv_shutdown_state_matches([](auto state) {
2782     return state == SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS ||
2783            state == SRV_SHUTDOWN_EXIT_THREADS;
2784   }));
2785   auto last_print_time = ut_time_monotonic();
2786   while (srv_shutdown_state.load() < SRV_SHUTDOWN_EXIT_THREADS &&
2787          srv_master_do_pre_dd_shutdown_tasks(&last_print_time)) {
2788     /* Shouldn't loop here in case of very fast shutdown */
2789     ut_ad(srv_fast_shutdown < 2);
2790   }
2791 }
2792 
2793 /** Executes shutdown tasks in the master thread. */
srv_master_shutdown_loop()2794 static void srv_master_shutdown_loop() {
2795   ut_a(srv_shutdown_state_matches([](auto state) {
2796     return state == SRV_SHUTDOWN_MASTER_STOP ||
2797            state == SRV_SHUTDOWN_EXIT_THREADS;
2798   }));
2799   auto last_print_time = ut_time_monotonic();
2800   while (srv_shutdown_state.load() < SRV_SHUTDOWN_EXIT_THREADS &&
2801          srv_master_do_shutdown_tasks(&last_print_time)) {
2802     /* Shouldn't loop here in case of very fast shutdown */
2803     ut_ad(srv_fast_shutdown < 2);
2804   }
2805 }
2806 
2807 /** The master thread controlling the server. */
srv_master_thread()2808 void srv_master_thread() {
2809   DBUG_TRACE;
2810 
2811   srv_slot_t *slot;
2812 
2813   THD *thd = create_thd(false, true, true, 0);
2814 
2815   ut_ad(!srv_read_only_mode);
2816 
2817   srv_main_thread_process_no = os_proc_get_number();
2818   srv_main_thread_id = os_thread_get_curr_id();
2819 
2820   slot = srv_reserve_slot(SRV_MASTER);
2821   ut_a(slot == srv_sys->sys_threads);
2822 
2823   srv_master_main_loop(slot);
2824 
2825   srv_master_pre_dd_shutdown_loop();
2826 
2827   os_event_set(srv_threads.m_master_ready_for_dd_shutdown);
2828 
2829   /* This is just for test scenarios. */
2830   srv_thread_delay_cleanup_if_needed(true);
2831 
2832   while (srv_shutdown_state.load() < SRV_SHUTDOWN_MASTER_STOP) {
2833     srv_master_wait(slot);
2834   }
2835 
2836   srv_master_shutdown_loop();
2837 
2838   srv_main_thread_op_info = "exiting";
2839   destroy_thd(thd);
2840 }
2841 
2842 /**
2843 Check if purge should stop.
2844 @return true if it should shutdown. */
srv_purge_should_exit(ulint n_purged)2845 static bool srv_purge_should_exit(
2846     ulint n_purged) /*!< in: pages purged in last batch */
2847 {
2848   switch (srv_shutdown_state.load()) {
2849     case SRV_SHUTDOWN_NONE:
2850     case SRV_SHUTDOWN_RECOVERY_ROLLBACK:
2851     case SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS:
2852       /* Normal operation. */
2853       break;
2854 
2855     case SRV_SHUTDOWN_PURGE:
2856       /* Exit unless slow shutdown requested or all done. */
2857       return (srv_fast_shutdown != 0 || n_purged == 0);
2858 
2859     case SRV_SHUTDOWN_EXIT_THREADS:
2860       return (true);
2861 
2862     case SRV_SHUTDOWN_LAST_PHASE:
2863     case SRV_SHUTDOWN_FLUSH_PHASE:
2864     case SRV_SHUTDOWN_MASTER_STOP:
2865     case SRV_SHUTDOWN_CLEANUP:
2866     case SRV_SHUTDOWN_DD:
2867       ut_error;
2868   }
2869 
2870   return (false);
2871 }
2872 
2873 /** Fetch and execute a task from the work queue.
2874  @return true if a task was executed */
srv_task_execute(void)2875 static bool srv_task_execute(void) {
2876   que_thr_t *thr = nullptr;
2877 
2878   ut_ad(!srv_read_only_mode);
2879   ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
2880 
2881   mutex_enter(&srv_sys->tasks_mutex);
2882 
2883   if (UT_LIST_GET_LEN(srv_sys->tasks) > 0) {
2884     thr = UT_LIST_GET_FIRST(srv_sys->tasks);
2885 
2886     ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE);
2887 
2888     UT_LIST_REMOVE(srv_sys->tasks, thr);
2889   }
2890 
2891   mutex_exit(&srv_sys->tasks_mutex);
2892 
2893   if (thr != nullptr) {
2894     que_run_threads(thr);
2895 
2896     os_atomic_inc_ulint(&purge_sys->pq_mutex, &purge_sys->n_completed, 1);
2897   }
2898 
2899   return (thr != nullptr);
2900 }
2901 
2902 /** Worker thread that reads tasks from the work queue and executes them. */
srv_worker_thread()2903 void srv_worker_thread() {
2904   srv_slot_t *slot;
2905 
2906   ut_ad(!srv_read_only_mode);
2907   ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
2908 
2909 #ifdef UNIV_PFS_THREAD
2910   THD *thd = create_thd(false, true, true, srv_worker_thread_key.m_value);
2911 #else
2912   THD *thd = create_thd(false, true, true, 0);
2913 #endif
2914   slot = srv_reserve_slot(SRV_WORKER);
2915 
2916   ut_a(srv_n_purge_threads > 1);
2917 
2918   srv_sys_mutex_enter();
2919 
2920   ut_a(srv_sys->n_threads_active[SRV_WORKER] < srv_n_purge_threads);
2921 
2922   srv_sys_mutex_exit();
2923 
2924   /* We need to ensure that the worker threads exit after the
2925   purge coordinator thread. Otherwise the purge coordinaor can
2926   end up waiting forever in trx_purge_wait_for_workers_to_complete() */
2927 
2928   do {
2929     srv_suspend_thread(slot);
2930 
2931     os_event_wait(slot->event);
2932 
2933     if (srv_task_execute()) {
2934       /* If there are tasks in the queue, wakeup
2935       the purge coordinator thread. */
2936 
2937       srv_wake_purge_thread_if_not_active();
2938     }
2939 
2940     /* Note: we are checking the state without holding the
2941     purge_sys->latch here. */
2942   } while (purge_sys->state != PURGE_STATE_EXIT);
2943 
2944   srv_free_slot(slot);
2945 
2946   rw_lock_x_lock(&purge_sys->latch);
2947 
2948   ut_a(!purge_sys->running);
2949   ut_a(purge_sys->state == PURGE_STATE_EXIT);
2950   ut_a(srv_shutdown_state.load() >= SRV_SHUTDOWN_PURGE);
2951 
2952   rw_lock_x_unlock(&purge_sys->latch);
2953 
2954   destroy_thd(thd);
2955 }
2956 
2957 /** Do the actual purge operation.
2958  @return length of history list before the last purge batch. */
srv_do_purge(ulint * n_total_purged)2959 static ulint srv_do_purge(
2960     ulint *n_total_purged) /*!< in/out: total pages purged */
2961 {
2962   ulint n_pages_purged;
2963 
2964   static ulint count = 0;
2965   static ulint n_use_threads = 0;
2966   static ulint rseg_history_len = 0;
2967   ulint old_activity_count = srv_get_activity_count();
2968 
2969   const auto n_threads = srv_threads.m_purge_workers_n;
2970 
2971   ut_a(n_threads > 0);
2972   ut_ad(!srv_read_only_mode);
2973 
2974   /* Purge until there are no more records to purge and there is
2975   no change in configuration or server state. If the user has
2976   configured more than one purge thread then we treat that as a
2977   pool of threads and only use the extra threads if purge can't
2978   keep up with updates. */
2979 
2980   if (n_use_threads == 0) {
2981     n_use_threads = n_threads;
2982   }
2983 
2984   do {
2985     if (trx_sys->rseg_history_len > rseg_history_len ||
2986         (srv_max_purge_lag > 0 && rseg_history_len > srv_max_purge_lag)) {
2987       /* History length is now longer than what it was
2988       when we took the last snapshot. Use more threads. */
2989 
2990       if (n_use_threads < n_threads) {
2991         ++n_use_threads;
2992       }
2993 
2994     } else if (srv_check_activity(old_activity_count) && n_use_threads > 1) {
2995       /* History length same or smaller since last snapshot,
2996       use fewer threads. */
2997 
2998       --n_use_threads;
2999 
3000       old_activity_count = srv_get_activity_count();
3001     }
3002 
3003     /* Ensure that the purge threads are less than what
3004     was configured. */
3005 
3006     ut_a(n_use_threads > 0);
3007     ut_a(n_use_threads <= n_threads);
3008 
3009     /* Take a snapshot of the history list before purge. */
3010     if ((rseg_history_len = trx_sys->rseg_history_len) == 0) {
3011       break;
3012     }
3013 
3014     ulint undo_trunc_freq = purge_sys->undo_trunc.get_rseg_truncate_frequency();
3015 
3016     ulint rseg_truncate_frequency = ut_min(
3017         static_cast<ulint>(srv_purge_rseg_truncate_frequency), undo_trunc_freq);
3018 
3019     n_pages_purged = trx_purge(n_use_threads, srv_purge_batch_size,
3020                                (++count % rseg_truncate_frequency) == 0);
3021 
3022     *n_total_purged += n_pages_purged;
3023 
3024   } while (!srv_purge_should_exit(n_pages_purged) && n_pages_purged > 0 &&
3025            purge_sys->state == PURGE_STATE_RUN);
3026 
3027   return (rseg_history_len);
3028 }
3029 
3030 /** Suspend the purge coordinator thread. */
srv_purge_coordinator_suspend(srv_slot_t * slot,ulint rseg_history_len)3031 static void srv_purge_coordinator_suspend(
3032     srv_slot_t *slot,       /*!< in/out: Purge coordinator
3033                             thread slot */
3034     ulint rseg_history_len) /*!< in: history list length
3035                             before last purge */
3036 {
3037   ut_ad(!srv_read_only_mode);
3038   ut_a(slot->type == SRV_PURGE);
3039 
3040   bool stop = false;
3041 
3042   /** Maximum wait time on the purge event, in micro-seconds. */
3043   static const ulint SRV_PURGE_MAX_TIMEOUT = 10000;
3044 
3045   int64_t sig_count = srv_suspend_thread(slot);
3046 
3047   do {
3048     ulint ret;
3049 
3050     rw_lock_x_lock(&purge_sys->latch);
3051 
3052     purge_sys->running = false;
3053 
3054     rw_lock_x_unlock(&purge_sys->latch);
3055 
3056     /* We don't wait right away on the the non-timed wait because
3057     we want to signal the thread that wants to suspend purge. */
3058 
3059     if (stop) {
3060       os_event_wait_low(slot->event, sig_count);
3061       ret = 0;
3062     } else if (rseg_history_len <= trx_sys->rseg_history_len) {
3063       ret =
3064           os_event_wait_time_low(slot->event, SRV_PURGE_MAX_TIMEOUT, sig_count);
3065     } else {
3066       /* We don't want to waste time waiting, if the
3067       history list increased by the time we got here,
3068       unless purge has been stopped. */
3069       ret = 0;
3070     }
3071 
3072     srv_sys_mutex_enter();
3073 
3074     /* The thread can be in state !suspended after the timeout
3075     but before this check if another thread sent a wakeup signal. */
3076 
3077     if (slot->suspended) {
3078       slot->suspended = FALSE;
3079       ++srv_sys->n_threads_active[slot->type];
3080       ut_a(srv_sys->n_threads_active[slot->type] == 1);
3081     }
3082 
3083     srv_sys_mutex_exit();
3084 
3085     sig_count = srv_suspend_thread(slot);
3086 
3087     rw_lock_x_lock(&purge_sys->latch);
3088 
3089     stop = (srv_shutdown_state.load() < SRV_SHUTDOWN_PURGE &&
3090             purge_sys->state == PURGE_STATE_STOP);
3091 
3092     if (!stop) {
3093       bool check = true;
3094       DBUG_EXECUTE_IF(
3095           "skip_purge_check_shutdown",
3096           if (srv_shutdown_state.load() >= SRV_SHUTDOWN_PURGE &&
3097               purge_sys->state == PURGE_STATE_STOP &&
3098               srv_fast_shutdown != 0) { check = false; };);
3099 
3100       if (check) {
3101         ut_a(purge_sys->n_stop == 0);
3102       }
3103       purge_sys->running = true;
3104     } else {
3105       ut_a(purge_sys->n_stop > 0);
3106 
3107       /* Signal that we are suspended. */
3108       os_event_set(purge_sys->event);
3109     }
3110 
3111     rw_lock_x_unlock(&purge_sys->latch);
3112 
3113     if (ret == OS_SYNC_TIME_EXCEEDED) {
3114       /* No new records added since wait started then simply
3115       wait for new records. The magic number 5000 is an
3116       approximation for the case where we have cached UNDO
3117       log records which prevent truncate of the UNDO
3118       segments. */
3119 
3120       if (rseg_history_len == trx_sys->rseg_history_len &&
3121           trx_sys->rseg_history_len < 5000) {
3122         stop = true;
3123       }
3124     }
3125 
3126   } while (stop);
3127 
3128   srv_sys_mutex_enter();
3129 
3130   if (slot->suspended) {
3131     slot->suspended = FALSE;
3132     ++srv_sys->n_threads_active[slot->type];
3133     ut_a(srv_sys->n_threads_active[slot->type] == 1);
3134   }
3135 
3136   srv_sys_mutex_exit();
3137 }
3138 
3139 /** Purge coordinator thread that schedules the purge tasks. */
srv_purge_coordinator_thread()3140 void srv_purge_coordinator_thread() {
3141   srv_slot_t *slot;
3142 
3143 #ifdef UNIV_PFS_THREAD
3144   THD *thd = create_thd(false, true, true, srv_purge_thread_key.m_value);
3145 #else
3146   THD *thd = create_thd(false, true, true, 0);
3147 #endif
3148 
3149   ulint n_total_purged = ULINT_UNDEFINED;
3150 
3151   ut_ad(!srv_read_only_mode);
3152   ut_a(srv_n_purge_threads >= 1);
3153   ut_a(trx_purge_state() == PURGE_STATE_INIT);
3154   ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
3155 
3156   rw_lock_x_lock(&purge_sys->latch);
3157 
3158   purge_sys->running = true;
3159   purge_sys->state = PURGE_STATE_RUN;
3160 
3161   rw_lock_x_unlock(&purge_sys->latch);
3162 
3163   slot = srv_reserve_slot(SRV_PURGE);
3164 
3165   ulint rseg_history_len = trx_sys->rseg_history_len;
3166 
3167   do {
3168     /* If there are no records to purge or the last
3169     purge didn't purge any records then wait for activity. */
3170 
3171     if (srv_shutdown_state.load() < SRV_SHUTDOWN_PURGE &&
3172         (purge_sys->state == PURGE_STATE_STOP || n_total_purged == 0)) {
3173       srv_purge_coordinator_suspend(slot, rseg_history_len);
3174     }
3175 
3176     if (srv_purge_should_exit(n_total_purged)) {
3177       ut_a(!slot->suspended);
3178       break;
3179     }
3180 
3181     n_total_purged = 0;
3182 
3183     rseg_history_len = srv_do_purge(&n_total_purged);
3184 
3185   } while (!srv_purge_should_exit(n_total_purged));
3186 
3187   /* This is just for test scenarios. Do not pass thd here,
3188   because it would lead to wait on event then, and we would
3189   never exit the srv_pre_dd_shutdown() which waits for this
3190   thread to exit. That's because the signal for which we
3191   would wait is signalled in srv_shutdown which happens
3192   after the srv_pre_dd_shutdown is ended. */
3193   srv_thread_delay_cleanup_if_needed(false);
3194 
3195   /* Ensure that we don't jump out of the loop unless the
3196   exit condition is satisfied. */
3197 
3198   ut_a(srv_purge_should_exit(n_total_purged));
3199 
3200   ulint n_pages_purged = ULINT_MAX;
3201 
3202   /* Ensure that all records are purged if it is not a fast shutdown.
3203   This covers the case where a record can be added after we exit the
3204   loop above. */
3205   while (srv_fast_shutdown == 0 && n_pages_purged > 0) {
3206     n_pages_purged = trx_purge(1, srv_purge_batch_size, false);
3207   }
3208 
3209   /* This trx_purge is called to remove any undo records (added by
3210   background threads) after completion of the above loop. When
3211   srv_fast_shutdown != 0, a large batch size can cause significant
3212   delay in shutdown ,so reducing the batch size to magic number 20
3213   (which was default in 5.5), which we hope will be sufficient to
3214   remove all the undo records */
3215   const uint temp_batch_size = 20;
3216 
3217   n_pages_purged =
3218       trx_purge(1,
3219                 srv_purge_batch_size <= temp_batch_size ? srv_purge_batch_size
3220                                                         : temp_batch_size,
3221                 true);
3222   ut_a(n_pages_purged == 0 || srv_fast_shutdown != 0);
3223 
3224   /* The task queue should always be empty, independent of fast
3225   shutdown state. */
3226   ut_a(srv_get_task_queue_length() == 0);
3227 
3228   srv_free_slot(slot);
3229 
3230   /* Note that we are shutting down. */
3231   rw_lock_x_lock(&purge_sys->latch);
3232 
3233   purge_sys->state = PURGE_STATE_EXIT;
3234 
3235   /* Clear out any pending undo-tablespaces to truncate and reset
3236   the list as we plan to shutdown the purge thread. */
3237   purge_sys->undo_trunc.reset();
3238 
3239   purge_sys->running = false;
3240 
3241   rw_lock_x_unlock(&purge_sys->latch);
3242 
3243   /* Ensure that all the worker threads quit. */
3244   if (srv_n_purge_threads > 1) {
3245     srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1);
3246   }
3247 
3248   /* This is just for test scenarios. Do not pass thd here.
3249   For explanation look at comment for similar usage above. */
3250   srv_thread_delay_cleanup_if_needed(false);
3251 
3252   destroy_thd(thd);
3253 }
3254 
3255 /** Enqueues a task to server task queue and releases a worker thread, if there
3256  is a suspended one. */
srv_que_task_enqueue_low(que_thr_t * thr)3257 void srv_que_task_enqueue_low(que_thr_t *thr) /*!< in: query thread */
3258 {
3259   ut_ad(!srv_read_only_mode);
3260   mutex_enter(&srv_sys->tasks_mutex);
3261 
3262   UT_LIST_ADD_LAST(srv_sys->tasks, thr);
3263 
3264   mutex_exit(&srv_sys->tasks_mutex);
3265 
3266   srv_release_threads(SRV_WORKER, 1);
3267 }
3268 
3269 /** Get count of tasks in the queue.
3270  @return number of tasks in queue */
srv_get_task_queue_length(void)3271 ulint srv_get_task_queue_length(void) {
3272   ulint n_tasks;
3273 
3274   ut_ad(!srv_read_only_mode);
3275 
3276   mutex_enter(&srv_sys->tasks_mutex);
3277 
3278   n_tasks = UT_LIST_GET_LEN(srv_sys->tasks);
3279 
3280   mutex_exit(&srv_sys->tasks_mutex);
3281 
3282   return (n_tasks);
3283 }
3284 
3285 /** Wakeup the purge threads. */
srv_purge_wakeup(void)3286 void srv_purge_wakeup(void) {
3287   ut_ad(!srv_read_only_mode);
3288 
3289   if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
3290     srv_release_threads(SRV_PURGE, 1);
3291 
3292     if (srv_threads.m_purge_workers_n > 1) {
3293       /* SRV_PURGE is not counted here. */
3294       ulint n_workers = srv_threads.m_purge_workers_n - 1;
3295 
3296       srv_release_threads(SRV_WORKER, n_workers);
3297     }
3298   }
3299 }
3300 
3301 /** Check if the purge threads are active, both coordinator and worker threads
3302 @return true if any thread is active, false if no thread is active */
srv_purge_threads_active()3303 bool srv_purge_threads_active() {
3304   if (srv_threads.m_purge_workers == nullptr) {
3305 #ifdef UNIV_DEBUG
3306     ut_a(srv_read_only_mode);
3307 #endif /* UNIV_DEBUG */
3308     ut_ad(!srv_thread_is_active(srv_threads.m_purge_coordinator));
3309     return (false);
3310   }
3311 
3312   for (size_t i = 0; i < srv_threads.m_purge_workers_n; ++i) {
3313     if (srv_thread_is_active(srv_threads.m_purge_workers[i])) {
3314       ut_ad(!srv_read_only_mode);
3315       return (true);
3316     }
3317   }
3318 
3319   ut_ad(!srv_thread_is_active(srv_threads.m_purge_coordinator));
3320 
3321   return (false);
3322 }
3323 
srv_thread_is_active(const IB_thread & thread)3324 bool srv_thread_is_active(const IB_thread &thread) {
3325   return (thread_is_active(thread));
3326 }
3327 
3328 #endif /* !UNIV_HOTBACKUP */
3329 
srv_get_server_errmsgs(int errcode)3330 const char *srv_get_server_errmsgs(int errcode) {
3331   return (error_message_for_error_log(errcode));
3332 }
3333 
set_srv_redo_log(bool enable)3334 void set_srv_redo_log(bool enable) {
3335   mutex_enter(&srv_innodb_monitor_mutex);
3336   srv_redo_log = enable;
3337   mutex_exit(&srv_innodb_monitor_mutex);
3338 }
3339