1 /*****************************************************************************
2
3 Copyright (c) 1995, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2008, 2009 Google Inc.
5 Copyright (c) 2009, Percona Inc.
6
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19
20 This program is free software; you can redistribute it and/or modify it under
21 the terms of the GNU General Public License, version 2.0, as published by the
22 Free Software Foundation.
23
24 This program is also distributed with certain software (including but not
25 limited to OpenSSL) that is licensed under separate terms, as designated in a
26 particular file or component or in included license documentation. The authors
27 of MySQL hereby grant you an additional permission to link the program and
28 your derivative works with the separately licensed software that they have
29 included with MySQL.
30
31 This program is distributed in the hope that it will be useful, but WITHOUT
32 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
33 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
34 for more details.
35
36 You should have received a copy of the GNU General Public License along with
37 this program; if not, write to the Free Software Foundation, Inc.,
38 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
39
40 *****************************************************************************/
41
42 /** @file srv/srv0srv.cc
43 The database server main program
44
45 Created 10/8/1995 Heikki Tuuri
46 *******************************************************/
47
48 #ifndef UNIV_HOTBACKUP
49 #include <mysqld.h>
50 #include <sys/types.h>
51 #include <time.h>
52
53 #include <chrono>
54
55 #include "btr0sea.h"
56 #include "buf0flu.h"
57 #include "buf0lru.h"
58 #include "clone0api.h"
59 #include "dict0boot.h"
60 #include "dict0load.h"
61 #include "dict0stats_bg.h"
62 #include "fsp0sysspace.h"
63 #include "ha_prototypes.h"
64 #endif /* !UNIV_HOTBACKUP */
65 #include "ibuf0ibuf.h"
66 #ifndef UNIV_HOTBACKUP
67 #include "lock0lock.h"
68 #include "log0recv.h"
69 #include "mem0mem.h"
70 #include "os0proc.h"
71 #include "os0thread-create.h"
72 #include "pars0pars.h"
73 #include "que0que.h"
74 #include "row0mysql.h"
75 #include "sql_thd_internal_api.h"
76 #include "srv0mon.h"
77
78 #include "my_dbug.h"
79 #include "my_psi_config.h"
80
81 #endif /* !UNIV_HOTBACKUP */
82 #include "srv0srv.h"
83 #include "srv0start.h"
84 #include "sync0sync.h"
85 #ifndef UNIV_HOTBACKUP
86 #include "trx0i_s.h"
87 #include "trx0purge.h"
88 #include "usr0sess.h"
89 #include "ut0crc32.h"
90 #endif /* !UNIV_HOTBACKUP */
91 #include "ut0mem.h"
92
93 #ifdef UNIV_HOTBACKUP
94 #include "page0size.h"
95 #else
96 /** Structure with state of srv background threads. */
97 Srv_threads srv_threads;
98
99 /** Structure with cpu usage information. */
100 Srv_cpu_usage srv_cpu_usage;
101 #endif /* UNIV_HOTBACKUP */
102
103 #ifdef INNODB_DD_TABLE
104 /* true when upgrading. */
105 bool srv_is_upgrade_mode = false;
106 bool srv_downgrade_logs = false;
107 bool srv_upgrade_old_undo_found = false;
108 #endif /* INNODB_DD_TABLE */
109
110 /* Revert to old partition file name if upgrade fails. */
111 bool srv_downgrade_partition_files = false;
112
113 /* The following is the maximum allowed duration of a lock wait. */
114 ulong srv_fatal_semaphore_wait_threshold = 600;
115
116 /* How much data manipulation language (DML) statements need to be delayed,
117 in microseconds, in order to reduce the lagging of the purge thread. */
118 ulint srv_dml_needed_delay = 0;
119
120 const char *srv_main_thread_op_info = "";
121
122 /* Server parameters which are read from the initfile */
123
124 /* The following three are dir paths which are catenated before file
125 names, where the file name itself may also contain a path */
126
127 char *srv_data_home = nullptr;
128
129 /** Separate directory for doublewrite files, if it is not NULL */
130 char *srv_doublewrite_dir = NULL;
131
132 /** The innodb_directories variable value. This a list of directories
133 deliminated by ';', i.e the FIL_PATH_SEPARATOR. */
134 char *srv_innodb_directories = nullptr;
135
136 /** Undo tablespace directories. This can be multiple paths
137 separated by ';' and can also be absolute paths. */
138 char *srv_undo_dir = nullptr;
139
140 /** The number of implicit undo tablespaces to use for rollback
141 segments. */
142 ulong srv_undo_tablespaces = FSP_IMPLICIT_UNDO_TABLESPACES;
143
144 #ifndef UNIV_HOTBACKUP
145 /* The number of rollback segments per tablespace */
146 ulong srv_rollback_segments = TRX_SYS_N_RSEGS;
147
148 /* Used for the deprecated setting innodb_undo_logs. This will still get
149 put into srv_rollback_segments if it is set to a non-default value. */
150 ulong srv_undo_logs = 0;
151 const char *deprecated_undo_logs =
152 "The parameter innodb_undo_logs is deprecated"
153 " and may be removed in future releases."
154 " Please use innodb_rollback_segments instead."
155 " See " REFMAN "innodb-undo-logs.html";
156
157 /** Rate at which UNDO records should be purged. */
158 ulong srv_purge_rseg_truncate_frequency =
159 static_cast<ulong>(undo::TRUNCATE_FREQUENCY);
160 #endif /* !UNIV_HOTBACKUP */
161
162 /** Enable or Disable Truncate of UNDO tablespace.
163 Note: If enabled then UNDO tablespace will be selected for truncate.
164 While Server waits for undo-tablespace to truncate if user disables
165 it, truncate action is completed but no new tablespace is marked
166 for truncate (action is never aborted). */
167 bool srv_undo_log_truncate = FALSE;
168
169 /** Enable or disable Encrypt of UNDO tablespace. */
170 bool srv_undo_log_encrypt = FALSE;
171
172 /** Maximum size of undo tablespace. */
173 unsigned long long srv_max_undo_tablespace_size;
174
175 /** Default undo tablespace size in UNIV_PAGEs count (10MB). */
176 const page_no_t SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
177 ((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
178
179 /** Maximum number of recently truncated undo tablespace IDs for
180 the same undo number. */
181 const size_t CONCURRENT_UNDO_TRUNCATE_LIMIT =
182 dict_sys_t::undo_space_id_range / 8;
183
184 /** Set if InnoDB must operate in read-only mode. We don't do any
185 recovery and open all tables in RO mode instead of RW mode. We don't
186 sync the max trx id to disk either. */
187 bool srv_read_only_mode;
188
189 /** store to its own file each table created by an user; data
190 dictionary tables are in the system tablespace 0 */
191 bool srv_file_per_table;
192
193 /** Sort buffer size in index creation */
194 ulong srv_sort_buf_size = 1048576;
195 /** Maximum modification log file size for online index creation */
196 unsigned long long srv_online_max_size;
197 /** Set if InnoDB operates in read-only mode or innodb-force-recovery
198 is greater than SRV_FORCE_NO_TRX_UNDO. */
199 bool high_level_read_only;
200
201 /** Number of threads to use for parallel reads. */
202 ulong srv_parallel_read_threads;
203
204 /** If this flag is true, then we will use the native aio of the
205 OS (provided we compiled Innobase with it in), otherwise we will
206 use simulated aio we build below with threads. */
207 bool srv_use_native_aio = false;
208
209 bool srv_numa_interleave = false;
210
211 #ifdef UNIV_DEBUG
212 /** Force all user tables to use page compression. */
213 ulong srv_debug_compress;
214 /** Set when InnoDB has invoked exit(). */
215 bool innodb_calling_exit;
216 /** Used by SET GLOBAL innodb_master_thread_disabled_debug = X. */
217 bool srv_master_thread_disabled_debug;
218 #ifndef UNIV_HOTBACKUP
219 /** Event used to inform that master thread is disabled. */
220 static os_event_t srv_master_thread_disabled_event;
221 #endif /* !UNIV_HOTBACKUP */
222 #endif /* UNIV_DEBUG */
223
224 /*------------------------- LOG FILES ------------------------ */
225 char *srv_log_group_home_dir = nullptr;
226
227 /** Enable or disable Encrypt of REDO tablespace. */
228 bool srv_redo_log_encrypt = false;
229
230 ulong srv_n_log_files = SRV_N_LOG_FILES_MAX;
231
232 #ifdef UNIV_DEBUG_DEDICATED
233 ulong srv_debug_system_mem_size;
234 #endif /* UNIV_DEBUG_DEDICATED */
235
236 /** At startup, this is the current redo log file size.
237 During startup, if this is different from srv_log_file_size_requested
238 (innodb_log_file_size), the redo log will be rebuilt and this size
239 will be initialized to srv_log_file_size_requested.
240 When upgrading from a previous redo log format, this will be set to 0,
241 and writing to the redo log is not allowed. Expressed in bytes. */
242 ulonglong srv_log_file_size;
243
244 /** The value of the startup parameter innodb_log_file_size. */
245 ulonglong srv_log_file_size_requested;
246
247 /** Space for log buffer, expressed in bytes. Note, that log buffer
248 will use only the largest power of two, which is not greater than
249 the assigned space. */
250 ulong srv_log_buffer_size;
251
252 /** Size of block, used for writing ahead to avoid read-on-write. */
253 ulong srv_log_write_ahead_size;
254
255 /** Minimum absolute value of cpu time for which spin-delay is used. */
256 uint srv_log_spin_cpu_abs_lwm;
257
258 /** Maximum percentage of cpu time for which spin-delay is used. */
259 uint srv_log_spin_cpu_pct_hwm;
260
261 /** Maximum value of average log flush time for which spin-delay is used.
262 When flushing takes longer, user threads no longer spin when waiting for
263 flushed redo. Expressed in microseconds. */
264 ulong srv_log_wait_for_flush_spin_hwm;
265
266 /* EXPERIMENTAL sys vars below - we need defaults set explicitly here. */
267
268 /** When log writer follows links in the log recent written buffer,
269 it stops when it has reached at least that many bytes to write,
270 limiting how many bytes can be written in single call. */
271 ulong srv_log_write_max_size = INNODB_LOG_WRITE_MAX_SIZE_DEFAULT;
272
273 /** Number of events used for notifications about redo write. */
274 ulong srv_log_write_events = INNODB_LOG_EVENTS_DEFAULT;
275
276 /** Number of events used for notifications about redo flush. */
277 ulong srv_log_flush_events = INNODB_LOG_EVENTS_DEFAULT;
278
279 /** Number of slots in a small buffer, which is used to allow concurrent
280 writes to log buffer. The slots are addressed by LSN values modulo number
281 of the slots. */
282 ulong srv_log_recent_written_size = INNODB_LOG_RECENT_WRITTEN_SIZE_DEFAULT;
283
284 /** Number of slots in a small buffer, which is used to break requirement
285 for total order of dirty pages, when they are added to flush lists.
286 The slots are addressed by LSN values modulo number of the slots. */
287 ulong srv_log_recent_closed_size = INNODB_LOG_RECENT_CLOSED_SIZE_DEFAULT;
288
289 /** Number of spin iterations, when spinning and waiting for log buffer
290 written up to given LSN, before we fallback to loop with sleeps.
291 This is not used when user thread has to wait for log flushed to disk. */
292 ulong srv_log_wait_for_write_spin_delay =
293 INNODB_LOG_WAIT_FOR_WRITE_SPIN_DELAY_DEFAULT;
294
295 /** Timeout used when waiting for redo write (microseconds). */
296 ulong srv_log_wait_for_write_timeout =
297 INNODB_LOG_WAIT_FOR_WRITE_TIMEOUT_DEFAULT;
298
299 /** Number of spin iterations, when spinning and waiting for log flushed. */
300 ulong srv_log_wait_for_flush_spin_delay =
301 INNODB_LOG_WAIT_FOR_FLUSH_SPIN_DELAY_DEFAULT;
302
303 /** Timeout used when waiting for redo flush (microseconds). */
304 ulong srv_log_wait_for_flush_timeout =
305 INNODB_LOG_WAIT_FOR_FLUSH_TIMEOUT_DEFAULT;
306
307 /** Number of spin iterations, for which log writer thread is waiting
308 for new data to write or flush without sleeping. */
309 ulong srv_log_writer_spin_delay = INNODB_LOG_WRITER_SPIN_DELAY_DEFAULT;
310
311 /** Initial timeout used to wait on writer_event. */
312 ulong srv_log_writer_timeout = INNODB_LOG_WRITER_TIMEOUT_DEFAULT;
313
314 /** Number of milliseconds every which a periodical checkpoint is written
315 by the log checkpointer thread (unless periodical checkpoints are disabled,
316 which is a case during initial phase of startup). */
317 ulong srv_log_checkpoint_every = INNODB_LOG_CHECKPOINT_EVERY_DEFAULT;
318
319 /** Number of spin iterations, for which log flusher thread is waiting
320 for new data to flush, without sleeping. */
321 ulong srv_log_flusher_spin_delay = INNODB_LOG_FLUSHER_SPIN_DELAY_DEFAULT;
322
323 /** Initial timeout used to wait on flusher_event. */
324 ulong srv_log_flusher_timeout = INNODB_LOG_FLUSHER_TIMEOUT_DEFAULT;
325
326 /** Number of spin iterations, for which log write notifier thread is waiting
327 for advanced flushed_to_disk_lsn without sleeping. */
328 ulong srv_log_write_notifier_spin_delay =
329 INNODB_LOG_WRITE_NOTIFIER_SPIN_DELAY_DEFAULT;
330
331 /** Initial timeout used to wait on write_notifier_event. */
332 ulong srv_log_write_notifier_timeout =
333 INNODB_LOG_WRITE_NOTIFIER_TIMEOUT_DEFAULT;
334
335 /** Number of spin iterations, for which log flush notifier thread is waiting
336 for advanced flushed_to_disk_lsn without sleeping. */
337 ulong srv_log_flush_notifier_spin_delay =
338 INNODB_LOG_FLUSH_NOTIFIER_SPIN_DELAY_DEFAULT;
339
340 /** Initial timeout used to wait on flush_notifier_event. */
341 ulong srv_log_flush_notifier_timeout =
342 INNODB_LOG_FLUSH_NOTIFIER_TIMEOUT_DEFAULT;
343
344 /** Number of spin iterations, for which log closerr thread is waiting
345 for a reachable untraversed link in recent_closed. */
346 ulong srv_log_closer_spin_delay = INNODB_LOG_CLOSER_SPIN_DELAY_DEFAULT;
347
348 /** Initial sleep used in log closer after spin delay is finished. */
349 ulong srv_log_closer_timeout = INNODB_LOG_CLOSER_TIMEOUT_DEFAULT;
350
351 /* End of EXPERIMENTAL sys vars */
352
353 /** Whether to generate and require checksums on the redo log pages. */
354 bool srv_log_checksums;
355
356 #ifdef UNIV_DEBUG
357
358 bool srv_checkpoint_disabled = false;
359
360 bool srv_inject_too_many_concurrent_trxs = false;
361
362 #endif /* UNIV_DEBUG */
363
364 ulong srv_flush_log_at_trx_commit = 1;
365 uint srv_flush_log_at_timeout = 1;
366 ulong srv_page_size = UNIV_PAGE_SIZE_DEF;
367 ulong srv_page_size_shift = UNIV_PAGE_SIZE_SHIFT_DEF;
368
369 page_size_t univ_page_size(0, 0, false);
370
371 /* Try to flush dirty pages so as to avoid IO bursts at
372 the checkpoints. */
373 bool srv_adaptive_flushing = TRUE;
374
375 /* Allow IO bursts at the checkpoints ignoring io_capacity setting. */
376 bool srv_flush_sync = TRUE;
377
378 /** Maximum number of times allowed to conditionally acquire
379 mutex before switching to blocking wait on the mutex */
380 #define MAX_MUTEX_NOWAIT 20
381
382 /** Check whether the number of failed nonblocking mutex
383 acquisition attempts exceeds maximum allowed value. If so,
384 srv_printf_innodb_monitor() will request mutex acquisition
385 with mutex_enter(), which will wait until it gets the mutex. */
386 #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
387
388 /** Dedicated server setting */
389 bool srv_dedicated_server = true;
390 /** Requested size in bytes */
391 ulint srv_buf_pool_size = ULINT_MAX;
392 /** Minimum pool size in bytes */
393 const ulint srv_buf_pool_min_size = 5 * 1024 * 1024;
394 /** Default pool size in bytes */
395 const ulint srv_buf_pool_def_size = 128 * 1024 * 1024;
396 /** Maximum pool size in bytes */
397 const longlong srv_buf_pool_max_size = LLONG_MAX;
398 /** Requested buffer pool chunk size. Each buffer pool instance consists
399 of one or more chunks. */
400 ulonglong srv_buf_pool_chunk_unit;
401 /** Minimum buffer pool chunk size. */
402 const ulonglong srv_buf_pool_chunk_unit_min = (1024 * 1024);
403 /** The buffer pool chunk size must be a multiple of this number. */
404 const ulonglong srv_buf_pool_chunk_unit_blk_sz = (1024 * 1024);
405 /** Maximum buffer pool chunk size. */
406 const ulonglong srv_buf_pool_chunk_unit_max =
407 srv_buf_pool_max_size / MAX_BUFFER_POOLS;
408 /** Requested number of buffer pool instances */
409 ulong srv_buf_pool_instances;
410 /** Default number of buffer pool instances */
411 const ulong srv_buf_pool_instances_default = 0;
412 /** Number of locks to protect buf_pool->page_hash */
413 ulong srv_n_page_hash_locks = 16;
414 /** Whether to validate InnoDB tablespace paths on startup */
415 bool srv_validate_tablespace_paths = true;
416 /** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
417 ulong srv_LRU_scan_depth = 1024;
418 /** Whether or not to flush neighbors of a block */
419 ulong srv_flush_neighbors = 1;
420 /** Previously requested size. Accesses protected by memory barriers. */
421 ulint srv_buf_pool_old_size = 0;
422 /** Current size as scaling factor for the other components */
423 ulint srv_buf_pool_base_size = 0;
424 /** Current size in bytes */
425 long long srv_buf_pool_curr_size = 0;
426 /** Dump this % of each buffer pool during BP dump */
427 ulong srv_buf_pool_dump_pct;
428 /** Lock table size in bytes */
429 ulint srv_lock_table_size = ULINT_MAX;
430
431 const ulong srv_idle_flush_pct_default = 100;
432 ulong srv_idle_flush_pct = srv_idle_flush_pct_default;
433
434 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
435 instead. */
436 ulong srv_n_read_io_threads;
437 ulong srv_n_write_io_threads;
438
439 /* Switch to enable random read ahead. */
440 bool srv_random_read_ahead = FALSE;
441 /* User settable value of the number of pages that must be present
442 in the buffer cache and accessed sequentially for InnoDB to trigger a
443 readahead request. */
444 ulong srv_read_ahead_threshold = 56;
445
446 /** Maximum on-disk size of change buffer in terms of percentage
447 of the buffer pool. */
448 uint srv_change_buffer_max_size = CHANGE_BUFFER_DEFAULT_SIZE;
449
450 #ifndef _WIN32
451 enum srv_unix_flush_t srv_unix_file_flush_method = SRV_UNIX_FSYNC;
452 #else
453 enum srv_win_flush_t srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
454 #endif /* _WIN32 */
455
456 ulint srv_max_n_open_files = 300;
457
458 /* Number of IO operations per second the server can do */
459 ulong srv_io_capacity = 200;
460 ulong srv_max_io_capacity = 400;
461
462 /* The number of page cleaner threads to use.*/
463 ulong srv_n_page_cleaners = 4;
464
465 /* The InnoDB main thread tries to keep the ratio of modified pages
466 in the buffer pool to all database pages in the buffer pool smaller than
467 the following number. But it is not guaranteed that the value stays below
468 that during a time of heavy update/insert activity. */
469
470 double srv_max_buf_pool_modified_pct = 75.0;
471 double srv_max_dirty_pages_pct_lwm = 0.0;
472
473 /* This is the percentage of log capacity at which adaptive flushing,
474 if enabled, will kick in. */
475 ulong srv_adaptive_flushing_lwm = 10;
476
477 /* Number of iterations over which adaptive flushing is averaged. */
478 ulong srv_flushing_avg_loops = 30;
479
480 /* The number of purge threads to use.*/
481 ulong srv_n_purge_threads = 4;
482
483 /* the number of pages to purge in one batch */
484 ulong srv_purge_batch_size = 20;
485
486 /* Internal setting for "innodb_stats_method". Decides how InnoDB treats
487 NULL value when collecting statistics. By default, it is set to
488 SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
489 ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
490
491 #ifndef UNIV_HOTBACKUP
492 srv_stats_t srv_stats;
493 #endif /* !UNIV_HOTBACKUP */
494
495 /* structure to pass status variables to MySQL */
496 export_var_t export_vars;
497
498 /** Normally 0. When nonzero, skip some phases of crash recovery,
499 starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered
500 by SELECT or mysqldump. When this is nonzero, we do not allow any user
501 modifications to the data. */
502 ulong srv_force_recovery;
503 #ifdef UNIV_DEBUG
504 /** Inject a crash at different steps of the recovery process.
505 This is for testing and debugging only. */
506 ulong srv_force_recovery_crash;
507 #endif /* UNIV_DEBUG */
508
509 /** Print all user-level transactions deadlocks to mysqld stderr */
510 bool srv_print_all_deadlocks = FALSE;
511
512 /** Print all DDL logs to mysqld stderr */
513 bool srv_print_ddl_logs = false;
514
515 /** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */
516 bool srv_cmp_per_index_enabled = FALSE;
517
518 /** If innodb redo logging is enabled. */
519 bool srv_redo_log = true;
520
521 /** The value of the configuration parameter innodb_fast_shutdown,
522 controlling the InnoDB shutdown.
523
524 If innodb_fast_shutdown=0, InnoDB shutdown will purge all undo log
525 records (except XA PREPARE transactions) and complete the merge of the
526 entire change buffer, and then shut down the redo log.
527
528 If innodb_fast_shutdown=1, InnoDB shutdown will only flush the buffer
529 pool to data files, cleanly shutting down the redo log.
530
531 If innodb_fast_shutdown=2, shutdown will effectively 'crash' InnoDB
532 (but lose no committed transactions). */
533 ulong srv_fast_shutdown;
534
535 /* Generate a innodb_status.<pid> file */
536 ibool srv_innodb_status = FALSE;
537
538 /* When estimating number of different key values in an index, sample
539 this many index pages, there are 2 ways to calculate statistics:
540 * persistent stats that are calculated by ANALYZE TABLE and saved
541 in the innodb database.
542 * quick transient stats, that are used if persistent stats for the given
543 table/index are not found in the innodb database */
544 unsigned long long srv_stats_transient_sample_pages = 8;
545 bool srv_stats_persistent = TRUE;
546 bool srv_stats_include_delete_marked = FALSE;
547 unsigned long long srv_stats_persistent_sample_pages = 20;
548 bool srv_stats_auto_recalc = TRUE;
549
550 ulong srv_replication_delay = 0;
551
552 bool srv_apply_log_only = false;
553
554 bool srv_backup_mode = false;
555 bool srv_close_files = true;
556 bool srv_rollback_prepared_trx = false;
557
558 /*-------------------------------------------*/
559 ulong srv_n_spin_wait_rounds = 30;
560 ulong srv_spin_wait_delay = 6;
561 ibool srv_priority_boost = TRUE;
562
563 #ifndef UNIV_HOTBACKUP
564 static ulint srv_n_rows_inserted_old = 0;
565 static ulint srv_n_rows_updated_old = 0;
566 static ulint srv_n_rows_deleted_old = 0;
567 static ulint srv_n_rows_read_old = 0;
568
569 static ulint srv_n_system_rows_inserted_old = 0;
570 static ulint srv_n_system_rows_updated_old = 0;
571 static ulint srv_n_system_rows_deleted_old = 0;
572 static ulint srv_n_system_rows_read_old = 0;
573 #endif /* !UNIV_HOTBACKUP */
574
575 ulint srv_truncated_status_writes = 0;
576
577 bool srv_print_innodb_monitor = FALSE;
578 bool srv_print_innodb_lock_monitor = FALSE;
579
580 /* Array of English strings describing the current state of an
581 i/o handler thread */
582
583 const char *srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
584 const char *srv_io_thread_function[SRV_MAX_N_IO_THREADS];
585
586 #ifndef UNIV_HOTBACKUP
587 static ib_time_monotonic_t srv_last_monitor_time;
588 #endif /* !UNIV_HOTBACKUP */
589
590 static ib_mutex_t srv_innodb_monitor_mutex;
591
592 /** Mutex protecting page_zip_stat_per_index */
593 ib_mutex_t page_zip_stat_per_index_mutex;
594
595 /* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
596 ib_mutex_t srv_monitor_file_mutex;
597
598 /** Temporary file for innodb monitor output */
599 FILE *srv_monitor_file;
600 /** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode.
601 This mutex has a very low rank; threads reserving it should not
602 acquire any further latches or sleep before releasing this one. */
603 ib_mutex_t srv_misc_tmpfile_mutex;
604 /** Temporary file for miscellanous diagnostic output */
605 FILE *srv_misc_tmpfile;
606
607 #ifndef UNIV_HOTBACKUP
608 static ulint srv_main_thread_process_no = 0;
609 static os_thread_id_t srv_main_thread_id = 0;
610
611 /* The following counts are used by the srv_master_thread. */
612
613 /** Iterations of the loop bounded by 'srv_active' label. */
614 static ulint srv_main_active_loops = 0;
615 /** Iterations of the loop bounded by the 'srv_idle' label. */
616 static ulint srv_main_idle_loops = 0;
617 /** Iterations of the loop bounded by the 'srv_shutdown' label. */
618 static ulint srv_main_shutdown_loops = 0;
619 /** Log writes involving flush. */
620 static ulint srv_log_writes_and_flush = 0;
621
622 #endif /* !UNIV_HOTBACKUP */
623
624 /* Interval in seconds at which various tasks are performed by the
625 master thread when server is active. In order to balance the workload,
626 we should try to keep intervals such that they are not multiple of
627 each other. For example, if we have intervals for various tasks
628 defined as 5, 10, 15, 60 then all tasks will be performed when
629 current_time % 60 == 0 and no tasks will be performed when
630 current_time % 5 != 0. */
631
632 #define SRV_MASTER_DICT_LRU_INTERVAL (47)
633
634 /** Acquire the system_mutex. */
635 #define srv_sys_mutex_enter() \
636 do { \
637 mutex_enter(&srv_sys->mutex); \
638 } while (0)
639
640 /** Test if the system mutex is owned. */
641 #define srv_sys_mutex_own() (mutex_own(&srv_sys->mutex) && !srv_read_only_mode)
642
643 /** Release the system mutex. */
644 #define srv_sys_mutex_exit() \
645 do { \
646 mutex_exit(&srv_sys->mutex); \
647 } while (0)
648
649 #ifndef UNIV_HOTBACKUP
650 /*
651 IMPLEMENTATION OF THE SERVER MAIN PROGRAM
652 =========================================
653
654 There is the following analogue between this database
655 server and an operating system kernel:
656
657 DB concept equivalent OS concept
658 ---------- ---------------------
659 transaction -- process;
660
661 query thread -- thread;
662
663 lock -- semaphore;
664
665 kernel -- kernel;
666
667 query thread execution:
668 (a) without lock mutex
669 reserved -- process executing in user mode;
670 (b) with lock mutex reserved
671 -- process executing in kernel mode;
672
673 The server has several backgroind threads all running at the same
674 priority as user threads. It periodically checks if here is anything
675 happening in the server which requires intervention of the master
676 thread. Such situations may be, for example, when flushing of dirty
677 blocks is needed in the buffer pool or old version of database rows
678 have to be cleaned away (purged). The user can configure a separate
679 dedicated purge thread(s) too, in which case the master thread does not
680 do any purging.
681
682 The threads which we call user threads serve the queries of the MySQL
683 server. They run at normal priority.
684
685 When there is no activity in the system, also the master thread
686 suspends itself to wait for an event making the server totally silent.
687
688 There is still one complication in our server design. If a
689 background utility thread obtains a resource (e.g., mutex) needed by a user
690 thread, and there is also some other user activity in the system,
691 the user thread may have to wait indefinitely long for the
692 resource, as the OS does not schedule a background thread if
693 there is some other runnable user thread. This problem is called
694 priority inversion in real-time programming.
695
696 One solution to the priority inversion problem would be to keep record
697 of which thread owns which resource and in the above case boost the
698 priority of the background thread so that it will be scheduled and it
699 can release the resource. This solution is called priority inheritance
700 in real-time programming. A drawback of this solution is that the overhead
701 of acquiring a mutex increases slightly, maybe 0.2 microseconds on a 100
702 MHz Pentium, because the thread has to call os_thread_get_curr_id. This may
703 be compared to 0.5 microsecond overhead for a mutex lock-unlock pair. Note
704 that the thread cannot store the information in the resource , say mutex,
705 itself, because competing threads could wipe out the information if it is
706 stored before acquiring the mutex, and if it stored afterwards, the
707 information is outdated for the time of one machine instruction, at least.
708 (To be precise, the information could be stored to lock_word in mutex if
709 the machine supports atomic swap.)
710
711 The above solution with priority inheritance may become actual in the
712 future, currently we do not implement any priority twiddling solution.
713 Our general aim is to reduce the contention of all mutexes by making
714 them more fine grained.
715
716 The thread table contains information of the current status of each
717 thread existing in the system, and also the event semaphores used in
718 suspending the master thread and utility threads when they have nothing
719 to do. The thread table can be seen as an analogue to the process table
720 in a traditional Unix implementation. */
721
722 /** The server system struct */
723 struct srv_sys_t {
724 ib_mutex_t tasks_mutex; /*!< variable protecting the
725 tasks queue */
726 UT_LIST_BASE_NODE_T(que_thr_t)
727 tasks; /*!< task queue */
728
729 ib_mutex_t mutex; /*!< variable protecting the
730 fields below. */
731 ulint n_sys_threads; /*!< size of the sys_threads
732 array */
733
734 srv_slot_t *sys_threads; /*!< server thread table */
735
736 ulint n_threads_active[SRV_MASTER + 1];
737 /*!< number of threads active
738 in a thread class */
739
740 srv_stats_t::ulint_ctr_1_t activity_count; /*!< For tracking server
741 activity */
742 };
743
744 static srv_sys_t *srv_sys = nullptr;
745
746 /** Event to signal the monitor thread. */
747 os_event_t srv_monitor_event;
748
749 /** Event to signal the error thread */
750 os_event_t srv_error_event;
751
752 /** Event to signal the buffer pool dump/load thread */
753 os_event_t srv_buf_dump_event;
754
755 /** Event to signal the buffer pool resize thread */
756 os_event_t srv_buf_resize_event;
757
758 /** The buffer pool dump/load file name */
759 char *srv_buf_dump_filename;
760
761 /** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
762 and/or load it during startup. */
763 bool srv_buffer_pool_dump_at_shutdown = true;
764 bool srv_buffer_pool_load_at_startup = true;
765
766 /** Slot index in the srv_sys->sys_threads array for the purge thread. */
767 static const ulint SRV_PURGE_SLOT = 1;
768
769 /** Slot index in the srv_sys->sys_threads array for the master thread. */
770 static const ulint SRV_MASTER_SLOT = 0;
771
772 #ifdef HAVE_PSI_STAGE_INTERFACE
773 /** Performance schema stage event for monitoring ALTER TABLE progress
774 everything after flush log_make_latest_checkpoint(). */
775 PSI_stage_info srv_stage_alter_table_end = {
776 0, "alter table (end)", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
777
778 /** Performance schema stage event for monitoring ALTER TABLE progress
779 log_make_latest_checkpoint(). */
780 PSI_stage_info srv_stage_alter_table_flush = {
781 0, "alter table (flush)", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
782
783 /** Performance schema stage event for monitoring ALTER TABLE progress
784 row_merge_insert_index_tuples(). */
785 PSI_stage_info srv_stage_alter_table_insert = {
786 0, "alter table (insert)", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
787
788 /** Performance schema stage event for monitoring ALTER TABLE progress
789 row_log_apply(). */
790 PSI_stage_info srv_stage_alter_table_log_index = {
791 0, "alter table (log apply index)", PSI_FLAG_STAGE_PROGRESS,
792 PSI_DOCUMENT_ME};
793
794 /** Performance schema stage event for monitoring ALTER TABLE progress
795 row_log_table_apply(). */
796 PSI_stage_info srv_stage_alter_table_log_table = {
797 0, "alter table (log apply table)", PSI_FLAG_STAGE_PROGRESS,
798 PSI_DOCUMENT_ME};
799
800 /** Performance schema stage event for monitoring ALTER TABLE progress
801 row_merge_sort(). */
802 PSI_stage_info srv_stage_alter_table_merge_sort = {
803 0, "alter table (merge sort)", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
804
805 /** Performance schema stage event for monitoring ALTER TABLE progress
806 row_merge_read_clustered_index(). */
807 PSI_stage_info srv_stage_alter_table_read_pk_internal_sort = {
808 0, "alter table (read PK and internal sort)", PSI_FLAG_STAGE_PROGRESS,
809 PSI_DOCUMENT_ME};
810
811 /** Performance schema stage event for monitoring ALTER TABLESPACE
812 ENCRYPTION progress. */
813 PSI_stage_info srv_stage_alter_tablespace_encryption = {
814 0, "alter tablespace (encryption)", PSI_FLAG_STAGE_PROGRESS,
815 PSI_DOCUMENT_ME};
816
817 /** Performance schema stage event for monitoring buffer pool load progress. */
818 PSI_stage_info srv_stage_buffer_pool_load = {
819 0, "buffer pool load", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
820 #endif /* HAVE_PSI_STAGE_INTERFACE */
821
822 /** Performance schema stage event for monitoring clone file copy progress. */
823 PSI_stage_info srv_stage_clone_file_copy = {
824 0, "clone (file copy)", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
825
826 /** Performance schema stage event for monitoring clone redo copy progress. */
827 PSI_stage_info srv_stage_clone_redo_copy = {
828 0, "clone (redo copy)", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
829
830 /** Performance schema stage event for monitoring clone page copy progress. */
831 PSI_stage_info srv_stage_clone_page_copy = {
832 0, "clone (page copy)", PSI_FLAG_STAGE_PROGRESS, PSI_DOCUMENT_ME};
833
834 /** Prints counters for work done by srv_master_thread. */
srv_print_master_thread_info(FILE * file)835 static void srv_print_master_thread_info(FILE *file) /* in: output stream */
836 {
837 fprintf(file,
838 "srv_master_thread loops: " ULINTPF " srv_active, " ULINTPF
839 " srv_shutdown, " ULINTPF " srv_idle\n",
840 srv_main_active_loops, srv_main_shutdown_loops, srv_main_idle_loops);
841 fprintf(file, "srv_master_thread log flush and writes: " ULINTPF "\n",
842 srv_log_writes_and_flush);
843 }
844 #endif /* !UNIV_HOTBACKUP */
845
846 /** Sets the info describing an i/o thread current state. */
srv_set_io_thread_op_info(ulint i,const char * str)847 void srv_set_io_thread_op_info(
848 ulint i, /*!< in: the 'segment' of the i/o thread */
849 const char *str) /*!< in: constant char string describing the
850 state */
851 {
852 ut_a(i < SRV_MAX_N_IO_THREADS);
853
854 srv_io_thread_op_info[i] = str;
855 }
856
857 /** Resets the info describing an i/o thread current state. */
srv_reset_io_thread_op_info()858 void srv_reset_io_thread_op_info() {
859 for (ulint i = 0; i < UT_ARR_SIZE(srv_io_thread_op_info); ++i) {
860 srv_io_thread_op_info[i] = "not started yet";
861 }
862 }
863
864 #ifndef UNIV_HOTBACKUP
865 #ifdef UNIV_DEBUG
866 /** Validates the type of a thread table slot.
867 @return true if ok */
srv_thread_type_validate(srv_thread_type type)868 static ibool srv_thread_type_validate(
869 srv_thread_type type) /*!< in: thread type */
870 {
871 switch (type) {
872 case SRV_NONE:
873 break;
874 case SRV_WORKER:
875 case SRV_PURGE:
876 case SRV_MASTER:
877 return (TRUE);
878 }
879 ut_error;
880 }
881 #endif /* UNIV_DEBUG */
882
883 /** Gets the type of a thread table slot.
884 @return thread type */
srv_slot_get_type(const srv_slot_t * slot)885 static srv_thread_type srv_slot_get_type(
886 const srv_slot_t *slot) /*!< in: thread slot */
887 {
888 srv_thread_type type = slot->type;
889 ut_ad(srv_thread_type_validate(type));
890 return (type);
891 }
892
893 /** Reserves a slot in the thread table for the current thread.
894 @return reserved slot */
srv_reserve_slot(srv_thread_type type)895 static srv_slot_t *srv_reserve_slot(
896 srv_thread_type type) /*!< in: type of the thread */
897 {
898 srv_slot_t *slot = nullptr;
899
900 srv_sys_mutex_enter();
901
902 ut_ad(srv_thread_type_validate(type));
903
904 switch (type) {
905 case SRV_MASTER:
906 slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
907 break;
908
909 case SRV_PURGE:
910 slot = &srv_sys->sys_threads[SRV_PURGE_SLOT];
911 break;
912
913 case SRV_WORKER:
914 /* Find an empty slot, skip the master and purge slots. */
915 for (slot = &srv_sys->sys_threads[2]; slot->in_use; ++slot) {
916 ut_a(slot < &srv_sys->sys_threads[srv_sys->n_sys_threads]);
917 }
918 break;
919
920 case SRV_NONE:
921 ut_error;
922 }
923
924 ut_a(!slot->in_use);
925
926 slot->in_use = TRUE;
927 slot->suspended = FALSE;
928 slot->type = type;
929
930 ut_ad(srv_slot_get_type(slot) == type);
931
932 ++srv_sys->n_threads_active[type];
933
934 srv_sys_mutex_exit();
935
936 return (slot);
937 }
938
939 /** Suspends the calling thread to wait for the event in its thread slot.
940 @return the current signal count of the event. */
srv_suspend_thread_low(srv_slot_t * slot)941 static int64_t srv_suspend_thread_low(
942 srv_slot_t *slot) /*!< in/out: thread slot */
943 {
944 ut_ad(!srv_read_only_mode);
945 ut_ad(srv_sys_mutex_own());
946
947 ut_ad(slot->in_use);
948
949 srv_thread_type type = srv_slot_get_type(slot);
950
951 switch (type) {
952 case SRV_NONE:
953 ut_error;
954
955 case SRV_MASTER:
956 /* We have only one master thread and it
957 should be the first entry always. */
958 ut_a(srv_sys->n_threads_active[type] == 1);
959 break;
960
961 case SRV_PURGE:
962 /* We have only one purge coordinator thread
963 and it should be the second entry always. */
964 ut_a(srv_sys->n_threads_active[type] == 1);
965 break;
966
967 case SRV_WORKER:
968 ut_a(srv_n_purge_threads > 1);
969 ut_a(srv_sys->n_threads_active[type] > 0);
970 break;
971 }
972
973 ut_a(!slot->suspended);
974 slot->suspended = TRUE;
975
976 ut_a(srv_sys->n_threads_active[type] > 0);
977
978 srv_sys->n_threads_active[type]--;
979
980 return (os_event_reset(slot->event));
981 }
982
983 /** Suspends the calling thread to wait for the event in its thread slot.
984 @return the current signal count of the event. */
srv_suspend_thread(srv_slot_t * slot)985 static int64_t srv_suspend_thread(srv_slot_t *slot) /*!< in/out: thread slot */
986 {
987 srv_sys_mutex_enter();
988
989 int64_t sig_count = srv_suspend_thread_low(slot);
990
991 srv_sys_mutex_exit();
992
993 return (sig_count);
994 }
995
996 /** Releases threads of the type given from suspension in the thread table.
997 NOTE! The server mutex has to be reserved by the caller!
998 @return number of threads released: this may be less than n if not
999 enough threads were suspended at the moment. */
srv_release_threads(srv_thread_type type,ulint n)1000 ulint srv_release_threads(srv_thread_type type, /*!< in: thread type */
1001 ulint n) /*!< in: number of threads to release */
1002 {
1003 ulint i;
1004 ulint count = 0;
1005
1006 ut_ad(srv_thread_type_validate(type));
1007 ut_ad(n > 0);
1008
1009 srv_sys_mutex_enter();
1010
1011 for (i = 0; i < srv_sys->n_sys_threads; i++) {
1012 srv_slot_t *slot;
1013
1014 slot = &srv_sys->sys_threads[i];
1015
1016 if (slot->in_use && srv_slot_get_type(slot) == type && slot->suspended) {
1017 switch (type) {
1018 case SRV_NONE:
1019 ut_error;
1020
1021 case SRV_MASTER:
1022 /* We have only one master thread and it
1023 should be the first entry always. */
1024 ut_a(n == 1);
1025 ut_a(i == SRV_MASTER_SLOT);
1026 ut_a(srv_sys->n_threads_active[type] == 0);
1027 break;
1028
1029 case SRV_PURGE:
1030 /* We have only one purge coordinator thread
1031 and it should be the second entry always. */
1032 ut_a(n == 1);
1033 ut_a(i == SRV_PURGE_SLOT);
1034 ut_a(srv_n_purge_threads > 0);
1035 ut_a(srv_sys->n_threads_active[type] == 0);
1036 break;
1037
1038 case SRV_WORKER:
1039 ut_a(srv_n_purge_threads > 1);
1040 ut_a(srv_sys->n_threads_active[type] < srv_n_purge_threads - 1);
1041 break;
1042 }
1043
1044 slot->suspended = FALSE;
1045
1046 ++srv_sys->n_threads_active[type];
1047
1048 os_event_set(slot->event);
1049
1050 if (++count == n) {
1051 break;
1052 }
1053 }
1054 }
1055
1056 srv_sys_mutex_exit();
1057
1058 return (count);
1059 }
1060
1061 /** Release a thread's slot. */
srv_free_slot(srv_slot_t * slot)1062 static void srv_free_slot(srv_slot_t *slot) /*!< in/out: thread slot */
1063 {
1064 srv_sys_mutex_enter();
1065
1066 if (!slot->suspended) {
1067 /* Mark the thread as inactive. */
1068 srv_suspend_thread_low(slot);
1069 }
1070
1071 /* Free the slot for reuse. */
1072 ut_ad(slot->in_use);
1073 slot->in_use = FALSE;
1074
1075 srv_sys_mutex_exit();
1076 }
1077
1078 /** Initializes the server. */
srv_init(void)1079 static void srv_init(void) {
1080 ulint n_sys_threads = 0;
1081 ulint srv_sys_sz = sizeof(*srv_sys);
1082
1083 mutex_create(LATCH_ID_SRV_INNODB_MONITOR, &srv_innodb_monitor_mutex);
1084
1085 ut_d(srv_threads.m_shutdown_cleanup_dbg = os_event_create());
1086
1087 srv_threads.m_master_ready_for_dd_shutdown = os_event_create();
1088
1089 srv_threads.m_purge_coordinator = {};
1090
1091 srv_threads.m_purge_workers_n = srv_n_purge_threads;
1092
1093 srv_threads.m_purge_workers =
1094 UT_NEW_ARRAY_NOKEY(IB_thread, srv_threads.m_purge_workers_n);
1095
1096 if (!srv_read_only_mode) {
1097 /* Number of purge threads + master thread */
1098 n_sys_threads = srv_n_purge_threads + 1;
1099
1100 srv_sys_sz += n_sys_threads * sizeof(*srv_sys->sys_threads);
1101 }
1102
1103 srv_threads.m_page_cleaner_coordinator = {};
1104
1105 srv_threads.m_page_cleaner_workers_n = srv_n_page_cleaners;
1106
1107 srv_threads.m_page_cleaner_workers =
1108 UT_NEW_ARRAY_NOKEY(IB_thread, srv_threads.m_page_cleaner_workers_n);
1109
1110 srv_sys = static_cast<srv_sys_t *>(ut_zalloc_nokey(srv_sys_sz));
1111
1112 srv_sys->n_sys_threads = n_sys_threads;
1113
1114 /* Even in read-only mode we flush pages related to intrinsic table
1115 and so mutex creation is needed. */
1116 {
1117 mutex_create(LATCH_ID_SRV_SYS, &srv_sys->mutex);
1118
1119 mutex_create(LATCH_ID_SRV_SYS_TASKS, &srv_sys->tasks_mutex);
1120
1121 srv_sys->sys_threads = (srv_slot_t *)&srv_sys[1];
1122
1123 for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
1124 srv_slot_t *slot = &srv_sys->sys_threads[i];
1125
1126 slot->event = os_event_create();
1127
1128 slot->in_use = false;
1129
1130 ut_a(slot->event);
1131 }
1132
1133 srv_error_event = os_event_create();
1134
1135 srv_monitor_event = os_event_create();
1136
1137 srv_buf_dump_event = os_event_create();
1138
1139 buf_flush_event = os_event_create();
1140
1141 UT_LIST_INIT(srv_sys->tasks, &que_thr_t::queue);
1142 }
1143
1144 srv_buf_resize_event = os_event_create();
1145
1146 ut_d(srv_master_thread_disabled_event = os_event_create());
1147
1148 /* page_zip_stat_per_index_mutex is acquired from:
1149 1. page_zip_compress() (after SYNC_FSP)
1150 2. page_zip_decompress()
1151 3. i_s_cmp_per_index_fill_low() (where SYNC_DICT is acquired)
1152 4. innodb_cmp_per_index_update(), no other latches
1153 since we do not acquire any other latches while holding this mutex,
1154 it can have very low level. We pick SYNC_ANY_LATCH for it. */
1155 mutex_create(LATCH_ID_PAGE_ZIP_STAT_PER_INDEX,
1156 &page_zip_stat_per_index_mutex);
1157
1158 /* Create dummy indexes for infimum and supremum records */
1159
1160 dict_ind_init();
1161
1162 /* Initialize some INFORMATION SCHEMA internal structures */
1163 trx_i_s_cache_init(trx_i_s_cache);
1164
1165 ut_crc32_init();
1166
1167 dict_mem_init();
1168 }
1169
1170 /** Frees the data structures created in srv_init(). */
srv_free(void)1171 void srv_free(void) {
1172 mutex_free(&srv_innodb_monitor_mutex);
1173 mutex_free(&page_zip_stat_per_index_mutex);
1174
1175 {
1176 mutex_free(&srv_sys->mutex);
1177 mutex_free(&srv_sys->tasks_mutex);
1178
1179 for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
1180 srv_slot_t *slot = &srv_sys->sys_threads[i];
1181
1182 os_event_destroy(slot->event);
1183 }
1184
1185 os_event_destroy(srv_error_event);
1186 os_event_destroy(srv_monitor_event);
1187 os_event_destroy(srv_buf_dump_event);
1188 os_event_destroy(buf_flush_event);
1189 }
1190
1191 os_event_destroy(srv_buf_resize_event);
1192
1193 #ifdef UNIV_DEBUG
1194 os_event_destroy(srv_master_thread_disabled_event);
1195 srv_master_thread_disabled_event = nullptr;
1196 #endif /* UNIV_DEBUG */
1197
1198 trx_i_s_cache_free(trx_i_s_cache);
1199
1200 ut_free(srv_sys);
1201
1202 srv_sys = nullptr;
1203
1204 if (srv_threads.m_page_cleaner_workers != nullptr) {
1205 for (size_t i = 0; i < srv_threads.m_page_cleaner_workers_n; ++i) {
1206 srv_threads.m_page_cleaner_workers[i] = {};
1207 }
1208 ut_free(srv_threads.m_page_cleaner_workers);
1209 srv_threads.m_page_cleaner_workers = nullptr;
1210 }
1211
1212 if (srv_threads.m_purge_workers != nullptr) {
1213 for (size_t i = 0; i < srv_threads.m_purge_workers_n; ++i) {
1214 srv_threads.m_purge_workers[i] = {};
1215 }
1216 ut_free(srv_threads.m_purge_workers);
1217 srv_threads.m_purge_workers = nullptr;
1218 }
1219
1220 os_event_destroy(srv_threads.m_master_ready_for_dd_shutdown);
1221
1222 ut_d(os_event_destroy(srv_threads.m_shutdown_cleanup_dbg));
1223
1224 srv_threads = {};
1225 }
1226
1227 /** Initializes the synchronization primitives, memory system, and the thread
1228 local storage. */
srv_general_init()1229 void srv_general_init() {
1230 sync_check_init(srv_max_n_threads);
1231 /* Reset the system variables in the recovery module. */
1232 recv_sys_var_init();
1233 os_thread_open();
1234 trx_pool_init();
1235 que_init();
1236 row_mysql_init();
1237 undo_spaces_init();
1238 }
1239
1240 /** Boots the InnoDB server. */
srv_boot(void)1241 void srv_boot(void) {
1242 /* Initialize synchronization primitives, memory management, and thread
1243 local storage */
1244
1245 srv_general_init();
1246
1247 /* Initialize this module */
1248
1249 srv_init();
1250 }
1251
1252 /** Refreshes the values used to calculate per-second averages. */
srv_refresh_innodb_monitor_stats(void)1253 static void srv_refresh_innodb_monitor_stats(void) {
1254 mutex_enter(&srv_innodb_monitor_mutex);
1255
1256 srv_last_monitor_time = ut_time_monotonic();
1257
1258 os_aio_refresh_stats();
1259
1260 btr_cur_n_sea_old = btr_cur_n_sea;
1261 btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1262
1263 log_refresh_stats(*log_sys);
1264
1265 buf_refresh_io_stats_all();
1266
1267 srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
1268 srv_n_rows_updated_old = srv_stats.n_rows_updated;
1269 srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
1270 srv_n_rows_read_old = srv_stats.n_rows_read;
1271
1272 srv_n_system_rows_inserted_old = srv_stats.n_system_rows_inserted;
1273 srv_n_system_rows_updated_old = srv_stats.n_system_rows_updated;
1274 srv_n_system_rows_deleted_old = srv_stats.n_system_rows_deleted;
1275 srv_n_system_rows_read_old = srv_stats.n_system_rows_read;
1276
1277 mutex_exit(&srv_innodb_monitor_mutex);
1278 }
1279
1280 /**
1281 Prints info summary and info about all transactions to the file, recording the
1282 position where the part about transactions starts.
1283 @param[in] file output stream
1284 @param[out] trx_start_pos file position of the start of the list of active
1285 transactions
1286 */
srv_printf_locks_and_transactions(FILE * file,ulint * trx_start_pos)1287 static void srv_printf_locks_and_transactions(FILE *file,
1288 ulint *trx_start_pos) {
1289 ut_ad(locksys::owns_exclusive_global_latch());
1290 lock_print_info_summary(file);
1291 if (trx_start_pos) {
1292 long t = ftell(file);
1293 if (t < 0) {
1294 *trx_start_pos = ULINT_UNDEFINED;
1295 } else {
1296 *trx_start_pos = (ulint)t;
1297 }
1298 }
1299 lock_print_info_all_transactions(file);
1300 }
1301
srv_printf_innodb_monitor(FILE * file,bool nowait,ulint * trx_start_pos,ulint * trx_end)1302 bool srv_printf_innodb_monitor(FILE *file, bool nowait, ulint *trx_start_pos,
1303 ulint *trx_end) {
1304 ulint n_reserved;
1305 ibool ret;
1306
1307 mutex_enter(&srv_innodb_monitor_mutex);
1308
1309 const auto current_time = ut_time_monotonic();
1310
1311 /* We add 0.001 seconds to time_elapsed to prevent division
1312 by zero if two users happen to call SHOW ENGINE INNODB STATUS at the
1313 same time */
1314
1315 const auto time_elapsed = current_time - srv_last_monitor_time + 0.001;
1316
1317 srv_last_monitor_time = ut_time_monotonic();
1318
1319 fputs("\n=====================================\n", file);
1320
1321 ut_print_timestamp(file);
1322 fprintf(file,
1323 " INNODB MONITOR OUTPUT\n"
1324 "=====================================\n"
1325 "Per second averages calculated from the last %lu seconds\n",
1326 (ulong)time_elapsed);
1327
1328 fputs(
1329 "-----------------\n"
1330 "BACKGROUND THREAD\n"
1331 "-----------------\n",
1332 file);
1333 srv_print_master_thread_info(file);
1334
1335 fputs(
1336 "----------\n"
1337 "SEMAPHORES\n"
1338 "----------\n",
1339 file);
1340
1341 sync_print(file);
1342
1343 /* Conceptually, srv_innodb_monitor_mutex has a very high latching
1344 order level in sync0sync.h, while dict_foreign_err_mutex has a very
1345 low level 135. Therefore we can reserve the latter mutex here without
1346 a danger of a deadlock of threads. */
1347
1348 mutex_enter(&dict_foreign_err_mutex);
1349
1350 if (!srv_read_only_mode && ftell(dict_foreign_err_file) != 0L) {
1351 fputs(
1352 "------------------------\n"
1353 "LATEST FOREIGN KEY ERROR\n"
1354 "------------------------\n",
1355 file);
1356 ut_copy_file(file, dict_foreign_err_file);
1357 }
1358
1359 mutex_exit(&dict_foreign_err_mutex);
1360
1361 ret = true;
1362 if (nowait) {
1363 locksys::Global_exclusive_try_latch guard{};
1364 if (guard.owns_lock()) {
1365 srv_printf_locks_and_transactions(file, trx_start_pos);
1366 } else {
1367 fputs("FAIL TO OBTAIN LOCK MUTEX, SKIP LOCK INFO PRINTING\n", file);
1368 ret = false;
1369 }
1370 } else {
1371 locksys::Global_exclusive_latch_guard guard{};
1372 srv_printf_locks_and_transactions(file, trx_start_pos);
1373 }
1374
1375 if (ret) {
1376 ut_ad(lock_validate());
1377
1378 if (trx_end) {
1379 long t = ftell(file);
1380 if (t < 0) {
1381 *trx_end = ULINT_UNDEFINED;
1382 } else {
1383 *trx_end = (ulint)t;
1384 }
1385 }
1386 }
1387
1388 fputs(
1389 "--------\n"
1390 "FILE I/O\n"
1391 "--------\n",
1392 file);
1393 os_aio_print(file);
1394
1395 fputs(
1396 "-------------------------------------\n"
1397 "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1398 "-------------------------------------\n",
1399 file);
1400 ibuf_print(file);
1401
1402 for (ulint i = 0; i < btr_ahi_parts; ++i) {
1403 rw_lock_s_lock(btr_search_latches[i]);
1404 ha_print_info(file, btr_search_sys->hash_tables[i]);
1405 rw_lock_s_unlock(btr_search_latches[i]);
1406 }
1407
1408 fprintf(file, "%.2f hash searches/s, %.2f non-hash searches/s\n",
1409 (btr_cur_n_sea - btr_cur_n_sea_old) / time_elapsed,
1410 (btr_cur_n_non_sea - btr_cur_n_non_sea_old) / time_elapsed);
1411 btr_cur_n_sea_old = btr_cur_n_sea;
1412 btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1413
1414 fputs(
1415 "---\n"
1416 "LOG\n"
1417 "---\n",
1418 file);
1419 log_print(*log_sys, file);
1420
1421 fputs(
1422 "----------------------\n"
1423 "BUFFER POOL AND MEMORY\n"
1424 "----------------------\n",
1425 file);
1426 fprintf(file,
1427 "Total large memory allocated " ULINTPF
1428 "\n"
1429 "Dictionary memory allocated " ULINTPF "\n",
1430 os_total_large_mem_allocated, dict_sys->size);
1431
1432 buf_print_io(file);
1433
1434 fputs(
1435 "--------------\n"
1436 "ROW OPERATIONS\n"
1437 "--------------\n",
1438 file);
1439 fprintf(file,
1440 ULINTPF " queries inside InnoDB, " ULINTPF " queries in queue\n",
1441 srv_conc_get_active_threads(), srv_conc_get_waiting_threads());
1442
1443 /* This is a dirty read, without holding trx_sys->mutex. */
1444 fprintf(file, ULINTPF " read views open inside InnoDB\n",
1445 trx_sys->mvcc->size());
1446
1447 n_reserved = fil_space_get_n_reserved_extents(0);
1448 if (n_reserved > 0) {
1449 fprintf(file,
1450 ULINTPF
1451 " tablespace extents now reserved for"
1452 " B-tree split operations\n",
1453 n_reserved);
1454 }
1455
1456 std::ostringstream msg;
1457
1458 msg << "Process ID=" << srv_main_thread_process_no
1459 << ", Main thread ID=" << srv_main_thread_id
1460 << " , state=" << srv_main_thread_op_info;
1461
1462 fprintf(file, "%s\n", msg.str().c_str());
1463
1464 fprintf(file,
1465 "Number of rows inserted " ULINTPF ", updated " ULINTPF
1466 ", deleted " ULINTPF ", read " ULINTPF "\n",
1467 (ulint)srv_stats.n_rows_inserted, (ulint)srv_stats.n_rows_updated,
1468 (ulint)srv_stats.n_rows_deleted, (ulint)srv_stats.n_rows_read);
1469 fprintf(
1470 file,
1471 "%.2f inserts/s, %.2f updates/s,"
1472 " %.2f deletes/s, %.2f reads/s\n",
1473 ((ulint)srv_stats.n_rows_inserted - srv_n_rows_inserted_old) /
1474 time_elapsed,
1475 ((ulint)srv_stats.n_rows_updated - srv_n_rows_updated_old) / time_elapsed,
1476 ((ulint)srv_stats.n_rows_deleted - srv_n_rows_deleted_old) / time_elapsed,
1477 ((ulint)srv_stats.n_rows_read - srv_n_rows_read_old) / time_elapsed);
1478
1479 fprintf(file,
1480 "Number of system rows inserted " ULINTPF ", updated " ULINTPF
1481 ", deleted " ULINTPF ", read " ULINTPF "\n",
1482 (ulint)srv_stats.n_system_rows_inserted,
1483 (ulint)srv_stats.n_system_rows_updated,
1484 (ulint)srv_stats.n_system_rows_deleted,
1485 (ulint)srv_stats.n_system_rows_read);
1486 fprintf(
1487 file,
1488 "%.2f inserts/s, %.2f updates/s,"
1489 " %.2f deletes/s, %.2f reads/s\n",
1490 ((ulint)srv_stats.n_system_rows_inserted -
1491 srv_n_system_rows_inserted_old) /
1492 time_elapsed,
1493 ((ulint)srv_stats.n_system_rows_updated - srv_n_system_rows_updated_old) /
1494 time_elapsed,
1495 ((ulint)srv_stats.n_system_rows_deleted - srv_n_system_rows_deleted_old) /
1496 time_elapsed,
1497 ((ulint)srv_stats.n_system_rows_read - srv_n_system_rows_read_old) /
1498 time_elapsed);
1499
1500 srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
1501 srv_n_rows_updated_old = srv_stats.n_rows_updated;
1502 srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
1503 srv_n_rows_read_old = srv_stats.n_rows_read;
1504
1505 srv_n_system_rows_inserted_old = srv_stats.n_system_rows_inserted;
1506 srv_n_system_rows_updated_old = srv_stats.n_system_rows_updated;
1507 srv_n_system_rows_deleted_old = srv_stats.n_system_rows_deleted;
1508 srv_n_system_rows_read_old = srv_stats.n_system_rows_read;
1509
1510 fputs(
1511 "----------------------------\n"
1512 "END OF INNODB MONITOR OUTPUT\n"
1513 "============================\n",
1514 file);
1515 mutex_exit(&srv_innodb_monitor_mutex);
1516 fflush(file);
1517
1518 return (ret);
1519 }
1520
1521 /** Function to pass InnoDB status variables to MySQL */
srv_export_innodb_status(void)1522 void srv_export_innodb_status(void) {
1523 buf_pool_stat_t stat;
1524 buf_pools_list_size_t buf_pools_list_size;
1525 ulint LRU_len;
1526 ulint free_len;
1527 ulint flush_list_len;
1528
1529 buf_get_total_stat(&stat);
1530 buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
1531 buf_get_total_list_size_in_bytes(&buf_pools_list_size);
1532
1533 mutex_enter(&srv_innodb_monitor_mutex);
1534
1535 export_vars.innodb_data_pending_reads = os_n_pending_reads;
1536
1537 export_vars.innodb_data_pending_writes = os_n_pending_writes;
1538
1539 export_vars.innodb_data_pending_fsyncs =
1540 fil_n_pending_log_flushes + fil_n_pending_tablespace_flushes;
1541
1542 export_vars.innodb_data_fsyncs = os_n_fsyncs;
1543
1544 export_vars.innodb_data_read = srv_stats.data_read;
1545
1546 export_vars.innodb_data_reads = os_n_file_reads;
1547
1548 export_vars.innodb_data_writes = os_n_file_writes;
1549
1550 export_vars.innodb_data_written = srv_stats.data_written;
1551
1552 export_vars.innodb_buffer_pool_read_requests =
1553 Counter::total(stat.m_n_page_gets);
1554
1555 export_vars.innodb_buffer_pool_write_requests =
1556 srv_stats.buf_pool_write_requests;
1557
1558 export_vars.innodb_buffer_pool_wait_free = srv_stats.buf_pool_wait_free;
1559
1560 export_vars.innodb_buffer_pool_pages_flushed = srv_stats.buf_pool_flushed;
1561
1562 export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads;
1563
1564 export_vars.innodb_buffer_pool_read_ahead_rnd = stat.n_ra_pages_read_rnd;
1565
1566 export_vars.innodb_buffer_pool_read_ahead = stat.n_ra_pages_read;
1567
1568 export_vars.innodb_buffer_pool_read_ahead_evicted = stat.n_ra_pages_evicted;
1569
1570 export_vars.innodb_buffer_pool_pages_data = LRU_len;
1571
1572 export_vars.innodb_buffer_pool_bytes_data =
1573 buf_pools_list_size.LRU_bytes + buf_pools_list_size.unzip_LRU_bytes;
1574
1575 export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
1576
1577 export_vars.innodb_buffer_pool_bytes_dirty =
1578 buf_pools_list_size.flush_list_bytes;
1579
1580 export_vars.innodb_buffer_pool_pages_free = free_len;
1581
1582 #ifdef UNIV_DEBUG
1583 export_vars.innodb_buffer_pool_pages_latched = buf_get_latched_pages_number();
1584 #endif /* UNIV_DEBUG */
1585 export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
1586
1587 export_vars.innodb_buffer_pool_pages_misc =
1588 buf_pool_get_n_pages() - LRU_len - free_len;
1589
1590 export_vars.innodb_page_size = UNIV_PAGE_SIZE;
1591
1592 export_vars.innodb_log_waits = srv_stats.log_waits;
1593
1594 export_vars.innodb_os_log_written = srv_stats.os_log_written;
1595
1596 export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
1597
1598 export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
1599
1600 export_vars.innodb_os_log_pending_writes = srv_stats.os_log_pending_writes;
1601
1602 export_vars.innodb_log_write_requests = srv_stats.log_write_requests;
1603
1604 export_vars.innodb_log_writes = srv_stats.log_writes;
1605
1606 export_vars.innodb_dblwr_pages_written = srv_stats.dblwr_pages_written;
1607
1608 export_vars.innodb_dblwr_writes = srv_stats.dblwr_writes;
1609
1610 export_vars.innodb_pages_created = stat.n_pages_created;
1611
1612 export_vars.innodb_pages_read = stat.n_pages_read;
1613
1614 export_vars.innodb_pages_written = stat.n_pages_written;
1615
1616 export_vars.innodb_redo_log_enabled = srv_redo_log;
1617
1618 export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count;
1619
1620 export_vars.innodb_row_lock_current_waits =
1621 srv_stats.n_lock_wait_current_count;
1622
1623 export_vars.innodb_row_lock_time = srv_stats.n_lock_wait_time / 1000;
1624
1625 if (srv_stats.n_lock_wait_count > 0) {
1626 export_vars.innodb_row_lock_time_avg = (ulint)(
1627 srv_stats.n_lock_wait_time / 1000 / srv_stats.n_lock_wait_count);
1628
1629 } else {
1630 export_vars.innodb_row_lock_time_avg = 0;
1631 }
1632
1633 export_vars.innodb_row_lock_time_max = lock_sys->n_lock_max_wait_time / 1000;
1634
1635 export_vars.innodb_rows_read = srv_stats.n_rows_read;
1636
1637 export_vars.innodb_rows_inserted = srv_stats.n_rows_inserted;
1638
1639 export_vars.innodb_rows_updated = srv_stats.n_rows_updated;
1640
1641 export_vars.innodb_rows_deleted = srv_stats.n_rows_deleted;
1642
1643 export_vars.innodb_system_rows_read = srv_stats.n_system_rows_read;
1644
1645 export_vars.innodb_system_rows_inserted = srv_stats.n_system_rows_inserted;
1646
1647 export_vars.innodb_system_rows_updated = srv_stats.n_system_rows_updated;
1648
1649 export_vars.innodb_system_rows_deleted = srv_stats.n_system_rows_deleted;
1650
1651 export_vars.innodb_sampled_pages_read = srv_stats.n_sampled_pages_read;
1652
1653 export_vars.innodb_sampled_pages_skipped = srv_stats.n_sampled_pages_skipped;
1654
1655 export_vars.innodb_num_open_files = fil_n_file_opened;
1656
1657 export_vars.innodb_truncated_status_writes = srv_truncated_status_writes;
1658
1659 export_vars.innodb_undo_tablespaces_implicit = FSP_IMPLICIT_UNDO_TABLESPACES;
1660
1661 undo::spaces->s_lock();
1662
1663 export_vars.innodb_undo_tablespaces_total = undo::spaces->size();
1664
1665 export_vars.innodb_undo_tablespaces_explicit =
1666 export_vars.innodb_undo_tablespaces_total - FSP_IMPLICIT_UNDO_TABLESPACES;
1667
1668 export_vars.innodb_undo_tablespaces_active = 0;
1669
1670 for (auto undo_space : undo::spaces->m_spaces) {
1671 if (undo_space->is_active()) {
1672 export_vars.innodb_undo_tablespaces_active++;
1673 }
1674 }
1675 undo::spaces->s_unlock();
1676
1677 #ifdef UNIV_DEBUG
1678 rw_lock_s_lock(&purge_sys->latch);
1679 trx_id_t done_trx_no = purge_sys->done.trx_no;
1680
1681 /* Purge always deals with transaction end points represented by
1682 transaction number. We are allowed to purge transactions with number
1683 below the low limit. */
1684 ReadView oldest_view;
1685 trx_sys->mvcc->clone_oldest_view(&oldest_view);
1686 trx_id_t low_limit_no = oldest_view.view_low_limit_no();
1687
1688 rw_lock_s_unlock(&purge_sys->latch);
1689
1690 mutex_enter(&trx_sys->mutex);
1691 /* Maximum transaction number added to history list for purge. */
1692 trx_id_t max_trx_no = trx_sys->rw_max_trx_no;
1693 mutex_exit(&trx_sys->mutex);
1694
1695 if (done_trx_no == 0 || max_trx_no < done_trx_no) {
1696 export_vars.innodb_purge_trx_id_age = 0;
1697 } else {
1698 /* Add 1 as done_trx_no always points to the next transaction ID. */
1699 export_vars.innodb_purge_trx_id_age = (ulint)(max_trx_no - done_trx_no + 1);
1700 }
1701
1702 if (low_limit_no == 0 || max_trx_no < low_limit_no) {
1703 export_vars.innodb_purge_view_trx_id_age = 0;
1704 } else {
1705 /* Add 1 as low_limit_no always points to the next transaction ID. */
1706 export_vars.innodb_purge_view_trx_id_age =
1707 (ulint)(max_trx_no - low_limit_no + 1);
1708 }
1709 #endif /* UNIV_DEBUG */
1710
1711 mutex_exit(&srv_innodb_monitor_mutex);
1712 }
1713
1714 /** A thread which prints the info output by various InnoDB monitors. */
srv_monitor_thread()1715 void srv_monitor_thread() {
1716 int64_t sig_count;
1717 ib_time_monotonic_t current_time;
1718 ib_time_monotonic_t time_elapsed;
1719 ulint mutex_skipped;
1720 bool last_srv_print_monitor = srv_print_innodb_monitor;
1721
1722 ut_ad(!srv_read_only_mode);
1723
1724 auto last_monitor_time = ut_time_monotonic();
1725 srv_last_monitor_time = last_monitor_time;
1726
1727 mutex_skipped = 0;
1728
1729 loop:
1730 /* Wake up every 5 seconds to see if we need to print
1731 monitor information or if signaled at shutdown. */
1732
1733 sig_count = os_event_reset(srv_monitor_event);
1734
1735 os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
1736
1737 current_time = ut_time_monotonic();
1738
1739 time_elapsed = current_time - last_monitor_time;
1740
1741 if (time_elapsed > 15) {
1742 last_monitor_time = ut_time_monotonic();
1743
1744 if (srv_print_innodb_monitor) {
1745 /* Reset mutex_skipped counter every time srv_print_innodb_monitor
1746 changes. This is to ensure we will not be blocked by lock_sys global latch
1747 for short duration information printing, such as requested by
1748 sync_array_print_long_waits() */
1749 if (!last_srv_print_monitor) {
1750 mutex_skipped = 0;
1751 last_srv_print_monitor = true;
1752 }
1753
1754 if (!srv_printf_innodb_monitor(stderr, MUTEX_NOWAIT(mutex_skipped),
1755 nullptr, nullptr)) {
1756 mutex_skipped++;
1757 } else {
1758 /* Reset the counter */
1759 mutex_skipped = 0;
1760 }
1761 } else {
1762 last_srv_print_monitor = false;
1763 }
1764
1765 /* We don't create the temp files or associated
1766 mutexes in read-only-mode */
1767
1768 if (!srv_read_only_mode && srv_innodb_status) {
1769 mutex_enter(&srv_monitor_file_mutex);
1770 rewind(srv_monitor_file);
1771 if (!srv_printf_innodb_monitor(srv_monitor_file,
1772 MUTEX_NOWAIT(mutex_skipped), nullptr,
1773 nullptr)) {
1774 mutex_skipped++;
1775 } else {
1776 mutex_skipped = 0;
1777 }
1778
1779 os_file_set_eof(srv_monitor_file);
1780 mutex_exit(&srv_monitor_file_mutex);
1781 }
1782 }
1783
1784 if (srv_shutdown_state.load() < SRV_SHUTDOWN_CLEANUP) {
1785 goto loop;
1786 }
1787 }
1788
1789 /** A thread which prints warnings about semaphore waits which have lasted
1790 too long. These can be used to track bugs which cause hangs. */
srv_error_monitor_thread()1791 void srv_error_monitor_thread() {
1792 /* number of successive fatal timeouts observed */
1793 ulint fatal_cnt = 0;
1794 lsn_t old_lsn;
1795 lsn_t new_lsn;
1796 int64_t sig_count;
1797 /* longest waiting thread for a semaphore */
1798 os_thread_id_t waiter = os_thread_get_curr_id();
1799 os_thread_id_t old_waiter = waiter;
1800 /* the semaphore that is being waited for */
1801 const void *sema = nullptr;
1802 const void *old_sema = nullptr;
1803
1804 ut_ad(!srv_read_only_mode);
1805
1806 old_lsn = log_get_lsn(*log_sys);
1807
1808 loop:
1809 /* Try to track a strange bug reported by Harald Fuchs and others,
1810 where the lsn seems to decrease at times */
1811
1812 new_lsn = log_get_lsn(*log_sys);
1813
1814 if (new_lsn < old_lsn) {
1815 ib::error(ER_IB_MSG_1046, ulonglong{old_lsn}, ulonglong{new_lsn});
1816 ut_ad(0);
1817 }
1818
1819 old_lsn = new_lsn;
1820
1821 if (ut_difftime(ut_time_monotonic(), srv_last_monitor_time) > 60) {
1822 /* We referesh InnoDB Monitor values so that averages are
1823 printed from at most 60 last seconds */
1824
1825 srv_refresh_innodb_monitor_stats();
1826 }
1827
1828 /* Update the statistics collected for deciding LRU
1829 eviction policy. */
1830 buf_LRU_stat_update();
1831
1832 /* In case mutex_exit is not a memory barrier, it is
1833 theoretically possible some threads are left waiting though
1834 the semaphore is already released. Wake up those threads: */
1835
1836 sync_arr_wake_threads_if_sema_free();
1837
1838 if (sync_array_print_long_waits(&waiter, &sema) && sema == old_sema &&
1839 os_thread_eq(waiter, old_waiter)) {
1840 fatal_cnt++;
1841 if (fatal_cnt > 10) {
1842 ib::fatal(ER_IB_MSG_1047, ulonglong{srv_fatal_semaphore_wait_threshold});
1843 }
1844 } else {
1845 fatal_cnt = 0;
1846 old_waiter = waiter;
1847 old_sema = sema;
1848 }
1849
1850 /* Flush stderr so that a database user gets the output
1851 to possible MySQL error file */
1852
1853 fflush(stderr);
1854
1855 sig_count = os_event_reset(srv_error_event);
1856
1857 os_event_wait_time_low(srv_error_event, 1000000, sig_count);
1858
1859 if (srv_shutdown_state.load() < SRV_SHUTDOWN_CLEANUP) {
1860 goto loop;
1861 }
1862 }
1863
1864 /** Increment the server activity count. */
srv_inc_activity_count(void)1865 void srv_inc_activity_count(void) { srv_sys->activity_count.inc(); }
1866
1867 /** Check whether the master thread is active.
1868 This is polled during the final phase of shutdown.
1869 The first phase of server shutdown must have already been executed
1870 (or the server must not have been fully started up).
1871 @see srv_pre_dd_shutdown()
1872 @retval true if any thread is active
1873 @retval false if no thread is active */
srv_master_thread_is_active()1874 bool srv_master_thread_is_active() {
1875 return (srv_thread_is_active(srv_threads.m_master));
1876 }
1877
1878 /** Tells the InnoDB server that there has been activity in the database
1879 and wakes up the master thread if it is suspended (not sleeping). Used
1880 in the MySQL interface. Note that there is a small chance that the master
1881 thread stays suspended (we do not protect our operation with the
1882 srv_sys_t->mutex, for performance reasons). */
srv_active_wake_master_thread_low()1883 void srv_active_wake_master_thread_low() {
1884 ut_ad(!srv_read_only_mode);
1885 ut_ad(!srv_sys_mutex_own());
1886
1887 srv_inc_activity_count();
1888
1889 if (srv_sys->n_threads_active[SRV_MASTER] == 0) {
1890 srv_slot_t *slot;
1891
1892 srv_sys_mutex_enter();
1893
1894 slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
1895
1896 /* Only if the master thread has been started. */
1897
1898 if (slot->in_use) {
1899 ut_a(srv_slot_get_type(slot) == SRV_MASTER);
1900
1901 if (slot->suspended) {
1902 slot->suspended = FALSE;
1903
1904 ++srv_sys->n_threads_active[SRV_MASTER];
1905
1906 os_event_set(slot->event);
1907 }
1908 }
1909
1910 srv_sys_mutex_exit();
1911 }
1912 }
1913
1914 /** Tells the purge thread that there has been activity in the database
1915 and wakes up the purge thread if it is suspended (not sleeping). Note
1916 that there is a small chance that the purge thread stays suspended
1917 (we do not protect our check with the srv_sys_t:mutex and the
1918 purge_sys->latch, for performance reasons). */
srv_wake_purge_thread_if_not_active(void)1919 void srv_wake_purge_thread_if_not_active(void) {
1920 ut_ad(!srv_sys_mutex_own());
1921
1922 if (purge_sys->state == PURGE_STATE_RUN &&
1923 srv_sys->n_threads_active[SRV_PURGE] == 0) {
1924 srv_release_threads(SRV_PURGE, 1);
1925 }
1926 }
1927
1928 /** Wakes up the master thread if it is suspended or being suspended. */
srv_wake_master_thread(void)1929 void srv_wake_master_thread(void) {
1930 ut_ad(!srv_sys_mutex_own());
1931
1932 srv_inc_activity_count();
1933
1934 srv_release_threads(SRV_MASTER, 1);
1935 }
1936
1937 /** Get current server activity count. We don't hold srv_sys::mutex while
1938 reading this value as it is only used in heuristics.
1939 @return activity count. */
srv_get_activity_count(void)1940 ulint srv_get_activity_count(void) { return (srv_sys->activity_count); }
1941
1942 /** Check if there has been any activity.
1943 @return false if no change in activity counter. */
srv_check_activity(ulint old_activity_count)1944 ibool srv_check_activity(
1945 ulint old_activity_count) /*!< in: old activity count */
1946 {
1947 return (srv_sys->activity_count != old_activity_count);
1948 }
1949
1950 /** Make room in the table cache by evicting an unused table.
1951 @return number of tables evicted. */
srv_master_evict_from_table_cache(ulint pct_check)1952 static ulint srv_master_evict_from_table_cache(
1953 ulint pct_check) /*!< in: max percent to check */
1954 {
1955 ulint n_tables_evicted = 0;
1956
1957 rw_lock_x_lock(dict_operation_lock);
1958
1959 dict_mutex_enter_for_mysql();
1960
1961 n_tables_evicted =
1962 dict_make_room_in_cache(innobase_get_table_cache_size(), pct_check);
1963
1964 dict_mutex_exit_for_mysql();
1965
1966 rw_lock_x_unlock(dict_operation_lock);
1967
1968 return (n_tables_evicted);
1969 }
1970
1971 /** This function prints progress message every 60 seconds during server
1972 shutdown, for any activities that master thread is pending on. */
srv_shutdown_print_master_pending(ib_time_monotonic_t * last_print_time,ulint n_tables_to_drop,ulint n_bytes_merged)1973 static void srv_shutdown_print_master_pending(
1974 ib_time_monotonic_t *last_print_time, /*!< last time the function
1975 print the message */
1976 ulint n_tables_to_drop, /*!< number of tables to
1977 be dropped */
1978 ulint n_bytes_merged) /*!< number of change buffer
1979 just merged */
1980 {
1981 const auto current_time = ut_time_monotonic();
1982
1983 const auto time_elapsed = current_time - *last_print_time;
1984
1985 if (time_elapsed > 60) {
1986 *last_print_time = ut_time_monotonic();
1987
1988 if (n_tables_to_drop) {
1989 ib::info(ER_IB_MSG_1048, ulonglong{n_tables_to_drop});
1990 }
1991
1992 /* Check change buffer merge, we only wait for change buffer
1993 merge if it is a slow shutdown */
1994 if (!srv_fast_shutdown && n_bytes_merged) {
1995 ib::info(ER_IB_MSG_1049, ulonglong{n_bytes_merged});
1996 }
1997 }
1998 }
1999
2000 #ifdef UNIV_DEBUG
2001 /** Waits in loop as long as master thread is disabled (debug) */
srv_master_do_disabled_loop(void)2002 static void srv_master_do_disabled_loop(void) {
2003 if (!srv_master_thread_disabled_debug) {
2004 /* We return here to avoid changing op_info. */
2005 return;
2006 }
2007
2008 srv_main_thread_op_info = "disabled";
2009
2010 while (srv_master_thread_disabled_debug) {
2011 os_event_set(srv_master_thread_disabled_event);
2012 if (srv_shutdown_state.load() >=
2013 SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS) {
2014 break;
2015 }
2016 os_thread_sleep(100000);
2017 }
2018
2019 srv_main_thread_op_info = "";
2020 }
2021
2022 /** Disables master thread. It's used by:
2023 SET GLOBAL innodb_master_thread_disabled_debug = 1 (0).
2024 @param[in] thd thread handle
2025 @param[in] var pointer to system variable
2026 @param[out] var_ptr where the formal string goes
2027 @param[in] save immediate result from check function */
srv_master_thread_disabled_debug_update(THD * thd,SYS_VAR * var,void * var_ptr,const void * save)2028 void srv_master_thread_disabled_debug_update(THD *thd, SYS_VAR *var,
2029 void *var_ptr, const void *save) {
2030 /* This method is protected by mutex, as every SET GLOBAL .. */
2031 ut_ad(srv_master_thread_disabled_event != nullptr);
2032
2033 const bool disable = *static_cast<const bool *>(save);
2034
2035 const int64_t sig_count = os_event_reset(srv_master_thread_disabled_event);
2036
2037 srv_master_thread_disabled_debug = disable;
2038
2039 if (disable) {
2040 os_event_wait_low(srv_master_thread_disabled_event, sig_count);
2041 }
2042 }
2043 #endif /* UNIV_DEBUG */
2044
2045 /** Calculates difference between two timeval values.
2046 @param[in] a later timeval
2047 @param[in] b earlier timeval
2048 @return a - b; number of microseconds between b and a */
2049 MY_ATTRIBUTE((unused))
timeval_diff_us(timeval a,timeval b)2050 static int64_t timeval_diff_us(timeval a, timeval b) {
2051 return ((a.tv_sec - b.tv_sec) * 1000000LL + a.tv_usec - b.tv_usec);
2052 }
2053
2054 #ifdef UNIV_LINUX
2055
2056 /** Updates statistics about current CPU usage. */
srv_update_cpu_usage()2057 static void srv_update_cpu_usage() {
2058 using Clock = std::chrono::high_resolution_clock;
2059 using Clock_point = std::chrono::time_point<Clock>;
2060
2061 static Clock_point last_time = Clock::now();
2062
2063 static timeval last_cpu_utime;
2064 static timeval last_cpu_stime;
2065 static bool last_cpu_times_set = false;
2066
2067 Clock_point cur_time = Clock::now();
2068
2069 const auto time_diff = std::chrono::duration_cast<std::chrono::microseconds>(
2070 cur_time - last_time)
2071 .count();
2072
2073 if (time_diff < 100 * 1000LL) {
2074 return;
2075 }
2076 last_time = cur_time;
2077
2078 rusage usage;
2079 if (getrusage(RUSAGE_SELF, &usage) != 0) {
2080 return;
2081 }
2082
2083 if (!last_cpu_times_set) {
2084 last_cpu_utime = usage.ru_utime;
2085 last_cpu_stime = usage.ru_stime;
2086 last_cpu_times_set = true;
2087 return;
2088 }
2089
2090 const auto cpu_utime_diff = timeval_diff_us(usage.ru_utime, last_cpu_utime);
2091 last_cpu_utime = usage.ru_utime;
2092
2093 const auto cpu_stime_diff = timeval_diff_us(usage.ru_stime, last_cpu_stime);
2094 last_cpu_stime = usage.ru_stime;
2095
2096 /* Calculate absolute. */
2097
2098 double cpu_utime = cpu_utime_diff * 100.0 / time_diff;
2099 MONITOR_SET(MONITOR_CPU_UTIME_ABS, int64_t(cpu_utime));
2100 srv_cpu_usage.utime_abs = cpu_utime;
2101
2102 double cpu_stime = cpu_stime_diff * 100.0 / time_diff;
2103 MONITOR_SET(MONITOR_CPU_STIME_ABS, int64_t(cpu_stime));
2104 srv_cpu_usage.stime_abs = cpu_stime;
2105
2106 /* Calculate relative. */
2107
2108 cpu_set_t cs;
2109 CPU_ZERO(&cs);
2110 if (sched_getaffinity(0, sizeof(cs), &cs) != 0) {
2111 return;
2112 }
2113
2114 int n_cpu = 0;
2115 constexpr int MAX_CPU_N = 128;
2116 for (int i = 0; i < MAX_CPU_N; ++i) {
2117 if (CPU_ISSET(i, &cs)) {
2118 ++n_cpu;
2119 }
2120 }
2121
2122 srv_cpu_usage.n_cpu = n_cpu;
2123 MONITOR_SET(MONITOR_CPU_N, int64_t(n_cpu));
2124
2125 if (n_cpu == 0) {
2126 return;
2127 }
2128
2129 cpu_utime /= n_cpu;
2130 MONITOR_SET(MONITOR_CPU_UTIME_PCT, int64_t(cpu_utime));
2131 srv_cpu_usage.utime_pct = cpu_utime;
2132
2133 cpu_stime /= n_cpu;
2134 MONITOR_SET(MONITOR_CPU_STIME_PCT, int64_t(cpu_stime));
2135 srv_cpu_usage.stime_pct = cpu_stime;
2136 }
2137 #else /* !UNIV_LINUX */
2138 #ifdef _WIN32
2139 /** Convert a FILETIME to microseconds.
2140 Do not cast a pointer to a FILETIME structure to either a ULARGE_INTEGER* or
2141 __int64* value because it can cause alignment faults on 64-bit Windows.
2142 */
FILETIME_to_microseconds(const FILETIME & ft)2143 static uint64 FILETIME_to_microseconds(const FILETIME &ft) {
2144 ULARGE_INTEGER ulg;
2145 ulg.HighPart = ft.dwHighDateTime;
2146 ulg.LowPart = ft.dwLowDateTime;
2147 return ulg.QuadPart / 10;
2148 }
2149
2150 /** Updates statistics about current CPU usage. */
srv_update_cpu_usage()2151 static void srv_update_cpu_usage() {
2152 using Clock = std::chrono::high_resolution_clock;
2153 using Clock_point = std::chrono::time_point<Clock>;
2154
2155 static Clock_point last_time = Clock::now();
2156
2157 static uint64 last_cpu_utime;
2158 static uint64 last_cpu_stime;
2159 static bool last_cpu_times_set = false;
2160
2161 Clock_point cur_time = Clock::now();
2162
2163 const auto time_diff = std::chrono::duration_cast<std::chrono::microseconds>(
2164 cur_time - last_time)
2165 .count();
2166
2167 if (time_diff < 100 * 1000LL) {
2168 return;
2169 }
2170 last_time = cur_time;
2171
2172 FILETIME process_creation_time;
2173 FILETIME process_exit_time;
2174 FILETIME process_kernel_time;
2175 FILETIME process_user_time;
2176
2177 if (!GetProcessTimes(GetCurrentProcess(), &process_creation_time,
2178 &process_exit_time, &process_kernel_time,
2179 &process_user_time)) {
2180 return;
2181 }
2182
2183 uint64 cur_cpu_utime = FILETIME_to_microseconds(process_user_time);
2184 uint64 cur_cpu_stime = FILETIME_to_microseconds(process_kernel_time);
2185 if (!last_cpu_times_set) {
2186 last_cpu_utime = cur_cpu_utime;
2187 last_cpu_stime = cur_cpu_stime;
2188 last_cpu_times_set = true;
2189 return;
2190 }
2191
2192 const auto cpu_utime_diff = cur_cpu_utime - last_cpu_utime;
2193 last_cpu_utime = cur_cpu_utime;
2194
2195 const auto cpu_stime_diff = cur_cpu_stime - last_cpu_stime;
2196 last_cpu_stime = cur_cpu_stime;
2197
2198 /* Calculate absolute. */
2199
2200 double cpu_utime = cpu_utime_diff * 100.0 / time_diff;
2201 MONITOR_SET(MONITOR_CPU_UTIME_ABS, int64_t(cpu_utime));
2202 srv_cpu_usage.utime_abs = cpu_utime;
2203
2204 double cpu_stime = cpu_stime_diff * 100.0 / time_diff;
2205 MONITOR_SET(MONITOR_CPU_STIME_ABS, int64_t(cpu_stime));
2206 srv_cpu_usage.stime_abs = cpu_stime;
2207
2208 /* Calculate relative. */
2209
2210 DWORD_PTR process_affinity_mask;
2211 DWORD_PTR system_affinity_mask;
2212 if (!GetProcessAffinityMask(GetCurrentProcess(), &process_affinity_mask,
2213 &system_affinity_mask)) {
2214 return;
2215 }
2216
2217 /* If the system has more than 64 processors and the current process
2218 contains threads in multiple groups, GetProcessAffinityMask returns
2219 zero for both affinity masks.
2220 */
2221 if ((process_affinity_mask == 0) && (system_affinity_mask == 0)) {
2222 return;
2223 }
2224
2225 int n_cpu = 0;
2226 constexpr int MAX_CPU_N = 64;
2227 uint64 j = 1;
2228 for (int i = 0; i < MAX_CPU_N; ++i) {
2229 if (j & process_affinity_mask) {
2230 ++n_cpu;
2231 }
2232 j = j << 1;
2233 }
2234
2235 srv_cpu_usage.n_cpu = n_cpu;
2236 MONITOR_SET(MONITOR_CPU_N, int64_t(n_cpu));
2237
2238 if (n_cpu == 0) {
2239 return;
2240 }
2241
2242 cpu_utime /= n_cpu;
2243 MONITOR_SET(MONITOR_CPU_UTIME_PCT, int64_t(cpu_utime));
2244 srv_cpu_usage.utime_pct = cpu_utime;
2245
2246 cpu_stime /= n_cpu;
2247 MONITOR_SET(MONITOR_CPU_STIME_PCT, int64_t(cpu_stime));
2248 srv_cpu_usage.stime_pct = cpu_stime;
2249 }
2250 #else
srv_update_cpu_usage()2251 static void srv_update_cpu_usage() {
2252 srv_cpu_usage.utime_pct = 0;
2253 srv_cpu_usage.utime_abs = 0;
2254 srv_cpu_usage.stime_pct = 0;
2255 srv_cpu_usage.stime_abs = 0;
2256 srv_cpu_usage.n_cpu = 1;
2257 }
2258 #endif
2259
2260 #endif /* UNIV_LINUX || WIN32 */
2261
2262 /** Perform the tasks that the master thread is supposed to do when the
2263 server is active. There are two types of tasks. The first category is
2264 of such tasks which are performed at each inovcation of this function.
2265 We assume that this function is called roughly every second when the
2266 server is active. The second category is of such tasks which are
2267 performed at some interval e.g.: purge, dict_LRU cleanup etc. */
srv_master_do_active_tasks(void)2268 static void srv_master_do_active_tasks(void) {
2269 const auto cur_time = ut_time_monotonic();
2270 auto counter_time = ut_time_monotonic_us();
2271
2272 /* First do the tasks that we are suppose to do at each
2273 invocation of this function. */
2274
2275 ++srv_main_active_loops;
2276
2277 MONITOR_INC(MONITOR_MASTER_ACTIVE_LOOPS);
2278
2279 /* ALTER TABLE in MySQL requires on Unix that the table handler
2280 can drop tables lazily after there no longer are SELECT
2281 queries to them. */
2282 srv_main_thread_op_info = "doing background drop tables";
2283 row_drop_tables_for_mysql_in_background();
2284 MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
2285 counter_time);
2286
2287 ut_d(srv_master_do_disabled_loop());
2288
2289 if (srv_shutdown_state.load() >=
2290 SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS) {
2291 return;
2292 }
2293
2294 /* Do an ibuf merge */
2295 srv_main_thread_op_info = "doing insert buffer merge";
2296 counter_time = ut_time_monotonic_us();
2297 ibuf_merge_in_background(false);
2298 MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_IBUF_MERGE_MICROSECOND,
2299 counter_time);
2300
2301 /* Now see if various tasks that are performed at defined
2302 intervals need to be performed. */
2303
2304 if (srv_shutdown_state.load() >=
2305 SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS) {
2306 return;
2307 }
2308
2309 srv_update_cpu_usage();
2310
2311 if (trx_sys->rseg_history_len > 0) {
2312 srv_wake_purge_thread_if_not_active();
2313 }
2314
2315 if (cur_time % SRV_MASTER_DICT_LRU_INTERVAL == 0) {
2316 srv_main_thread_op_info = "enforcing dict cache limit";
2317 ulint n_evicted = srv_master_evict_from_table_cache(50);
2318 if (n_evicted != 0) {
2319 MONITOR_INC_VALUE(MONITOR_SRV_DICT_LRU_EVICT_COUNT, n_evicted);
2320 }
2321 MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_DICT_LRU_MICROSECOND,
2322 counter_time);
2323 }
2324 }
2325
2326 /** Perform the tasks that the master thread is supposed to do whenever the
2327 server is idle. We do check for the server state during this function
2328 and if the server has entered the shutdown phase we may return from
2329 the function without completing the required tasks.
2330 Note that the server can move to active state when we are executing this
2331 function but we don't check for that as we are suppose to perform more
2332 or less same tasks when server is active. */
srv_master_do_idle_tasks(void)2333 static void srv_master_do_idle_tasks(void) {
2334 uintmax_t counter_time;
2335
2336 ++srv_main_idle_loops;
2337
2338 MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS);
2339
2340 /* ALTER TABLE in MySQL requires on Unix that the table handler
2341 can drop tables lazily after there no longer are SELECT
2342 queries to them. */
2343 counter_time = ut_time_monotonic_us();
2344 srv_main_thread_op_info = "doing background drop tables";
2345 row_drop_tables_for_mysql_in_background();
2346 MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
2347 counter_time);
2348
2349 ut_d(srv_master_do_disabled_loop());
2350
2351 if (srv_shutdown_state.load() >=
2352 SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS) {
2353 return;
2354 }
2355
2356 /* Do an ibuf merge */
2357 counter_time = ut_time_monotonic_us();
2358 srv_main_thread_op_info = "doing insert buffer merge";
2359 ibuf_merge_in_background(true);
2360 MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_IBUF_MERGE_MICROSECOND,
2361 counter_time);
2362
2363 if (srv_shutdown_state.load() >=
2364 SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS) {
2365 return;
2366 }
2367
2368 srv_update_cpu_usage();
2369
2370 if (trx_sys->rseg_history_len > 0) {
2371 srv_wake_purge_thread_if_not_active();
2372 }
2373
2374 srv_main_thread_op_info = "enforcing dict cache limit";
2375 ulint n_evicted = srv_master_evict_from_table_cache(100);
2376 if (n_evicted != 0) {
2377 MONITOR_INC_VALUE(MONITOR_SRV_DICT_LRU_EVICT_COUNT, n_evicted);
2378 }
2379 MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_DICT_LRU_MICROSECOND,
2380 counter_time);
2381 }
2382
2383 /** Perform the tasks during pre_dd_shutdown phase. The tasks that we do
2384 depend on srv_fast_shutdown:
2385 2 => very fast shutdown => do no book keeping
2386 0, 1 => normal or slow shutdown => clear drop table queue
2387 @param[in,out] last_print_time last time log message (about pending
2388 operations of shutdown) was printed
2389 @return true if there might be some work left to be done, false otherwise */
srv_master_do_pre_dd_shutdown_tasks(ib_time_monotonic_t * last_print_time)2390 static bool srv_master_do_pre_dd_shutdown_tasks(
2391 ib_time_monotonic_t *last_print_time) /*!< last time the function
2392 print the message */
2393 {
2394 ulint n_tables_to_drop = 0;
2395
2396 ut_ad(!srv_read_only_mode);
2397
2398 ++srv_main_shutdown_loops;
2399
2400 ut_a(srv_shutdown_state_matches([](auto state) {
2401 return state == SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS ||
2402 state == SRV_SHUTDOWN_EXIT_THREADS;
2403 }));
2404
2405 /* In very fast shutdown none of the following is necessary */
2406 if (srv_fast_shutdown == 2) {
2407 return (false);
2408 }
2409
2410 /* ALTER TABLE in MySQL requires on Unix that the table handler
2411 can drop tables lazily after there no longer are SELECT
2412 queries to them. */
2413 if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
2414 srv_main_thread_op_info = "doing background drop tables";
2415 n_tables_to_drop = row_drop_tables_for_mysql_in_background();
2416 }
2417
2418 /* Print progress message every 60 seconds during shutdown */
2419 srv_shutdown_print_master_pending(last_print_time, n_tables_to_drop, 0);
2420
2421 return (n_tables_to_drop != 0);
2422 }
2423
2424 /** Perform the tasks during shutdown. The tasks that we do at shutdown
2425 depend on srv_fast_shutdown:
2426 1, 2 => very fast shutdown => do no book keeping
2427 0 => slow shutdown => do ibuf merge
2428 @param[in,out] last_print_time last time log message (about pending
2429 operations of shutdown) was printed
2430 @return true if there might be some work left to be done, false otherwise */
srv_master_do_shutdown_tasks(ib_time_monotonic_t * last_print_time)2431 static bool srv_master_do_shutdown_tasks(
2432 ib_time_monotonic_t *last_print_time) /*!< last time the function
2433 print the message */
2434 {
2435 ulint n_bytes_merged = 0;
2436
2437 ut_ad(!srv_read_only_mode);
2438
2439 ++srv_main_shutdown_loops;
2440
2441 ut_a(srv_shutdown_state_matches([](auto state) {
2442 return state == SRV_SHUTDOWN_MASTER_STOP ||
2443 state == SRV_SHUTDOWN_EXIT_THREADS;
2444 }));
2445
2446 /* In very fast shutdown none of the following is necessary */
2447 if (srv_fast_shutdown >= 1) {
2448 return (false);
2449 }
2450
2451 /* In case of slow shutdown we do ibuf merge (unless innodb_force_recovery
2452 is greater or equal to SRV_FORCE_NO_IBUF_MERGE). */
2453 if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
2454 srv_main_thread_op_info = "doing insert buffer merge";
2455 n_bytes_merged = ibuf_merge_in_background(true);
2456 }
2457
2458 /* Print progress message every 60 seconds during shutdown */
2459 srv_shutdown_print_master_pending(last_print_time, 0, n_bytes_merged);
2460
2461 return (n_bytes_merged != 0);
2462 }
2463
undo_rotate_default_master_key()2464 void undo_rotate_default_master_key() {
2465 fil_space_t *space;
2466
2467 if (srv_shutdown_state.load() >= SRV_SHUTDOWN_CLEANUP) {
2468 return;
2469 }
2470
2471 /* If the undo log space is using default key, rotate
2472 it. We need the server_uuid initialized, otherwise,
2473 the keyname will not contains server uuid. */
2474 if (Encryption::get_master_key_id() != 0 || srv_read_only_mode ||
2475 strlen(server_uuid) == 0) {
2476 return;
2477 }
2478
2479 DBUG_EXECUTE_IF("skip_rotating_default_master_key", return;);
2480
2481 undo::spaces->s_lock();
2482 for (auto undo_space : undo::spaces->m_spaces) {
2483 ut_ad(fsp_is_undo_tablespace(undo_space->id()));
2484
2485 space = fil_space_get(undo_space->id());
2486
2487 if (space == nullptr || space->encryption_type == Encryption::NONE) {
2488 continue;
2489 }
2490
2491 byte encrypt_info[Encryption::INFO_SIZE];
2492 mtr_t mtr;
2493
2494 ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
2495
2496 /* Make sure that there is enough reusable
2497 space in the redo log files. */
2498 log_free_check();
2499
2500 mtr_start(&mtr);
2501
2502 mtr_x_lock_space(space, &mtr);
2503
2504 memset(encrypt_info, 0, Encryption::INFO_SIZE);
2505
2506 if (!fsp_header_rotate_encryption(space, encrypt_info, &mtr)) {
2507 ib::error(ER_IB_MSG_1056, undo_space->space_name());
2508 } else {
2509 ib::info(ER_IB_MSG_1057, undo_space->space_name());
2510 }
2511 mtr_commit(&mtr);
2512 }
2513 undo::spaces->s_unlock();
2514 }
2515
2516 /* Enable REDO tablespace encryption */
srv_enable_redo_encryption(bool is_boot)2517 bool srv_enable_redo_encryption(bool is_boot) {
2518 /* Start to encrypt the redo log block from now on. */
2519 fil_space_t *space = fil_space_get(dict_sys_t::s_log_space_first_id);
2520
2521 /* While enabling encryption, make sure not to overwrite the tablespace
2522 key. */
2523 if (FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
2524 return false;
2525 }
2526
2527 dberr_t err;
2528 byte key[Encryption::KEY_LEN];
2529 byte iv[Encryption::KEY_LEN];
2530
2531 Encryption::random_value(key);
2532 Encryption::random_value(iv);
2533
2534 if (!log_write_encryption(key, iv, is_boot)) {
2535 ib::error(ER_IB_MSG_1243);
2536 return true;
2537 }
2538
2539 fsp_flags_set_encryption(space->flags);
2540 err = fil_set_encryption(space->id, Encryption::AES, key, iv);
2541 if (err != DB_SUCCESS) {
2542 ib::warn(ER_IB_MSG_1244);
2543 return true;
2544 }
2545
2546 /* Announce encryption is successfully enabled for the redo log. */
2547 ib::info(ER_IB_MSG_1245);
2548 return false;
2549 }
2550
2551 /* Set encryption for UNDO tablespace with given space id. */
set_undo_tablespace_encryption(space_id_t space_id,mtr_t * mtr,bool is_boot)2552 bool set_undo_tablespace_encryption(space_id_t space_id, mtr_t *mtr,
2553 bool is_boot) {
2554 ut_ad(fsp_is_undo_tablespace(space_id));
2555 fil_space_t *space = fil_space_get(space_id);
2556
2557 dberr_t err;
2558 byte encrypt_info[Encryption::INFO_SIZE];
2559 byte key[Encryption::KEY_LEN];
2560 byte iv[Encryption::KEY_LEN];
2561
2562 Encryption::random_value(key);
2563 Encryption::random_value(iv);
2564
2565 /* 0 fill encryption info */
2566 memset(encrypt_info, 0, Encryption::INFO_SIZE);
2567
2568 /* Fill up encryption info to be set */
2569 if (!Encryption::fill_encryption_info(key, iv, encrypt_info, is_boot, true)) {
2570 ib::error(ER_IB_MSG_1052, space->name);
2571 return true;
2572 }
2573
2574 uint32_t new_flags = space->flags | FSP_FLAGS_MASK_ENCRYPTION;
2575
2576 /* Write encryption info on tablespace header page */
2577 if (!fsp_header_write_encryption(space->id, new_flags, encrypt_info, true,
2578 false, mtr)) {
2579 ib::error(ER_IB_MSG_1053, space->name);
2580 return true;
2581 }
2582
2583 /* Update In-Mem encryption information for UNDO tablespace */
2584 fsp_flags_set_encryption(space->flags);
2585 err = fil_set_encryption(space->id, Encryption::AES, key, iv);
2586 if (err != DB_SUCCESS) {
2587 ib::error(ER_IB_MSG_1054, space->name, int{err}, ut_strerr(err));
2588 return true;
2589 }
2590
2591 return false;
2592 }
2593
2594 /* Enable UNDO tablespace encryption */
srv_enable_undo_encryption(bool is_boot)2595 bool srv_enable_undo_encryption(bool is_boot) {
2596 /* Make sure undo::ddl_mutex is owned. */
2597 ut_ad(mutex_own(&undo::ddl_mutex));
2598
2599 /* Traverse over all UNDO tablespaces and mark them encrypted. */
2600 undo::spaces->s_lock();
2601 for (auto undo_space : undo::spaces->m_spaces) {
2602 /* Skip system tablespace. */
2603 if (undo_space->id() == TRX_SYS_SPACE) {
2604 continue;
2605 }
2606
2607 fil_space_t *space = fil_space_get(undo_space->id());
2608 ut_ad(fsp_is_undo_tablespace(undo_space->id()));
2609
2610 /* While enabling encryption, make sure not to overwrite the tablespace key.
2611 Otherwise, pages encrypted with the old tablespace key can't be read. */
2612 if (FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
2613 continue;
2614 }
2615
2616 undo_space->rsegs()->s_lock();
2617
2618 /* Make sure that there is enough reusable space in the redo log files. */
2619 log_free_check();
2620
2621 mtr_t mtr;
2622 mtr_start(&mtr);
2623 mtr_x_lock_space(space, &mtr);
2624
2625 if (set_undo_tablespace_encryption(undo_space->id(), &mtr, is_boot)) {
2626 mtr_commit(&mtr);
2627 undo_space->rsegs()->s_unlock();
2628 undo::spaces->s_unlock();
2629 return true;
2630 }
2631
2632 mtr_commit(&mtr);
2633 undo_space->rsegs()->s_unlock();
2634
2635 /* Announce encryption is successfully enabled for the undo tablespace. */
2636 ib::info(ER_IB_MSG_1055, undo_space->space_name());
2637 }
2638 undo::spaces->s_unlock();
2639
2640 return false;
2641 }
2642
2643 /** Puts master thread to sleep. At this point we are using polling to
2644 service various activities. Master thread sleeps for one second before
2645 checking the state of the server again */
srv_master_sleep(void)2646 static void srv_master_sleep(void) {
2647 srv_main_thread_op_info = "sleeping";
2648 os_thread_sleep(1000000);
2649 srv_main_thread_op_info = "";
2650 }
2651
2652 /** Check redo and undo log encryption and rotate default master key. */
srv_sys_check_set_encryption()2653 static void srv_sys_check_set_encryption() {
2654 /* Rotate default master key for redo log encryption if it is set */
2655 if (srv_redo_log_encrypt) {
2656 fil_space_t *space = fil_space_get(dict_sys_t::s_log_space_first_id);
2657 ut_a(space);
2658
2659 /* Encryption for redo tablespace must already have been set. This is
2660 safeguard to encrypt it if not done earlier. */
2661 ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
2662
2663 if (!FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
2664 ib::warn(ER_IB_MSG_1285, space->name, "srv_redo_log_encrypt");
2665 srv_enable_redo_encryption(false);
2666 }
2667 redo_rotate_default_master_key();
2668 }
2669
2670 if (!srv_undo_log_encrypt) {
2671 return;
2672 }
2673
2674 /* Rotate default master key for undo log encryption if it is set */
2675 ut_ad(!undo::spaces->empty());
2676
2677 mutex_enter(&undo::ddl_mutex);
2678
2679 bool encrypt_undo = false;
2680 undo::spaces->s_lock();
2681 for (auto &undo_ts : undo::spaces->m_spaces) {
2682 fil_space_t *space = fil_space_get(undo_ts->id());
2683 ut_ad(space != nullptr);
2684
2685 /* Encryption for undo tablespace must already have been set. This is
2686 safeguard to encrypt it if not done earlier. */
2687 ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
2688 if (!FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
2689 ib::warn(ER_IB_MSG_1285, space->name, "srv_undo_log_encrypt");
2690 /* No need to loop further as srv_enable_undo_encryption() would
2691 loop through all UNDO tablespaces and encrypt. */
2692 encrypt_undo = true;
2693 break;
2694 }
2695 }
2696 undo::spaces->s_unlock();
2697
2698 if (encrypt_undo) {
2699 ut_d(bool ret =) srv_enable_undo_encryption(false);
2700 ut_ad(!ret);
2701 }
2702 undo_rotate_default_master_key();
2703 mutex_exit(&undo::ddl_mutex);
2704 }
2705
2706 /** Waits on event in provided slot.
2707 @param[in] slot slot reserved as SRV_MASTER */
srv_master_wait(srv_slot_t * slot)2708 static void srv_master_wait(srv_slot_t *slot) {
2709 srv_main_thread_op_info = "suspending";
2710
2711 srv_suspend_thread(slot);
2712
2713 /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
2714 waits for database activity to die down when converting < 4.1.x
2715 databases, and relies on this string being exactly as it is. InnoDB
2716 manual also mentions this string in several places. */
2717 srv_main_thread_op_info = "waiting for server activity";
2718
2719 os_event_wait(slot->event);
2720 }
2721
2722 /** Executes the main loop of the master thread.
2723 @param[in] slot slot reserved as SRV_MASTER */
srv_master_main_loop(srv_slot_t * slot)2724 static void srv_master_main_loop(srv_slot_t *slot) {
2725 if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2726 /* When innodb_force_recovery is at least SRV_FORCE_NO_BACKGROUND,
2727 we avoid performing active/idle master's tasks. However, we still
2728 need to ensure that:
2729 srv_shutdown_state >= SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS,
2730 after we exited srv_master_main_loop(). Keep waiting until that
2731 is satisfied and then exit. */
2732 while (srv_shutdown_state.load() <
2733 SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS) {
2734 srv_master_wait(slot);
2735 }
2736 return;
2737 }
2738
2739 ulint old_activity_count = srv_get_activity_count();
2740
2741 while (srv_shutdown_state.load() <
2742 SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS) {
2743 srv_master_sleep();
2744
2745 MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP);
2746
2747 /* Just in case - if there is not much free space in redo,
2748 try to avoid asking for troubles because of extra work
2749 performed in such background thread. */
2750 srv_main_thread_op_info = "checking free log space";
2751 log_free_check();
2752
2753 if (srv_check_activity(old_activity_count)) {
2754 old_activity_count = srv_get_activity_count();
2755 srv_master_do_active_tasks();
2756 } else {
2757 srv_master_do_idle_tasks();
2758 }
2759
2760 /* Make sure that early encryption processing of UNDO/REDO log is done. */
2761 if (!is_early_redo_undo_encryption_done()) {
2762 continue;
2763 }
2764
2765 /* Let clone wait when redo/undo log encryption is set. If clone is already
2766 in progress we skip the check and come back later. */
2767 if (!clone_mark_wait()) {
2768 continue;
2769 }
2770
2771 /* Check encryption property for system tablespaces. */
2772 srv_sys_check_set_encryption();
2773
2774 /* Allow any blocking clone to progress. */
2775 clone_mark_free();
2776 }
2777 }
2778
2779 /** Executes pre_dd_shutdown tasks in the master thread. */
srv_master_pre_dd_shutdown_loop()2780 static void srv_master_pre_dd_shutdown_loop() {
2781 ut_a(srv_shutdown_state_matches([](auto state) {
2782 return state == SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS ||
2783 state == SRV_SHUTDOWN_EXIT_THREADS;
2784 }));
2785 auto last_print_time = ut_time_monotonic();
2786 while (srv_shutdown_state.load() < SRV_SHUTDOWN_EXIT_THREADS &&
2787 srv_master_do_pre_dd_shutdown_tasks(&last_print_time)) {
2788 /* Shouldn't loop here in case of very fast shutdown */
2789 ut_ad(srv_fast_shutdown < 2);
2790 }
2791 }
2792
2793 /** Executes shutdown tasks in the master thread. */
srv_master_shutdown_loop()2794 static void srv_master_shutdown_loop() {
2795 ut_a(srv_shutdown_state_matches([](auto state) {
2796 return state == SRV_SHUTDOWN_MASTER_STOP ||
2797 state == SRV_SHUTDOWN_EXIT_THREADS;
2798 }));
2799 auto last_print_time = ut_time_monotonic();
2800 while (srv_shutdown_state.load() < SRV_SHUTDOWN_EXIT_THREADS &&
2801 srv_master_do_shutdown_tasks(&last_print_time)) {
2802 /* Shouldn't loop here in case of very fast shutdown */
2803 ut_ad(srv_fast_shutdown < 2);
2804 }
2805 }
2806
2807 /** The master thread controlling the server. */
srv_master_thread()2808 void srv_master_thread() {
2809 DBUG_TRACE;
2810
2811 srv_slot_t *slot;
2812
2813 THD *thd = create_thd(false, true, true, 0);
2814
2815 ut_ad(!srv_read_only_mode);
2816
2817 srv_main_thread_process_no = os_proc_get_number();
2818 srv_main_thread_id = os_thread_get_curr_id();
2819
2820 slot = srv_reserve_slot(SRV_MASTER);
2821 ut_a(slot == srv_sys->sys_threads);
2822
2823 srv_master_main_loop(slot);
2824
2825 srv_master_pre_dd_shutdown_loop();
2826
2827 os_event_set(srv_threads.m_master_ready_for_dd_shutdown);
2828
2829 /* This is just for test scenarios. */
2830 srv_thread_delay_cleanup_if_needed(true);
2831
2832 while (srv_shutdown_state.load() < SRV_SHUTDOWN_MASTER_STOP) {
2833 srv_master_wait(slot);
2834 }
2835
2836 srv_master_shutdown_loop();
2837
2838 srv_main_thread_op_info = "exiting";
2839 destroy_thd(thd);
2840 }
2841
2842 /**
2843 Check if purge should stop.
2844 @return true if it should shutdown. */
srv_purge_should_exit(ulint n_purged)2845 static bool srv_purge_should_exit(
2846 ulint n_purged) /*!< in: pages purged in last batch */
2847 {
2848 switch (srv_shutdown_state.load()) {
2849 case SRV_SHUTDOWN_NONE:
2850 case SRV_SHUTDOWN_RECOVERY_ROLLBACK:
2851 case SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS:
2852 /* Normal operation. */
2853 break;
2854
2855 case SRV_SHUTDOWN_PURGE:
2856 /* Exit unless slow shutdown requested or all done. */
2857 return (srv_fast_shutdown != 0 || n_purged == 0);
2858
2859 case SRV_SHUTDOWN_EXIT_THREADS:
2860 return (true);
2861
2862 case SRV_SHUTDOWN_LAST_PHASE:
2863 case SRV_SHUTDOWN_FLUSH_PHASE:
2864 case SRV_SHUTDOWN_MASTER_STOP:
2865 case SRV_SHUTDOWN_CLEANUP:
2866 case SRV_SHUTDOWN_DD:
2867 ut_error;
2868 }
2869
2870 return (false);
2871 }
2872
2873 /** Fetch and execute a task from the work queue.
2874 @return true if a task was executed */
srv_task_execute(void)2875 static bool srv_task_execute(void) {
2876 que_thr_t *thr = nullptr;
2877
2878 ut_ad(!srv_read_only_mode);
2879 ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
2880
2881 mutex_enter(&srv_sys->tasks_mutex);
2882
2883 if (UT_LIST_GET_LEN(srv_sys->tasks) > 0) {
2884 thr = UT_LIST_GET_FIRST(srv_sys->tasks);
2885
2886 ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE);
2887
2888 UT_LIST_REMOVE(srv_sys->tasks, thr);
2889 }
2890
2891 mutex_exit(&srv_sys->tasks_mutex);
2892
2893 if (thr != nullptr) {
2894 que_run_threads(thr);
2895
2896 os_atomic_inc_ulint(&purge_sys->pq_mutex, &purge_sys->n_completed, 1);
2897 }
2898
2899 return (thr != nullptr);
2900 }
2901
2902 /** Worker thread that reads tasks from the work queue and executes them. */
srv_worker_thread()2903 void srv_worker_thread() {
2904 srv_slot_t *slot;
2905
2906 ut_ad(!srv_read_only_mode);
2907 ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
2908
2909 #ifdef UNIV_PFS_THREAD
2910 THD *thd = create_thd(false, true, true, srv_worker_thread_key.m_value);
2911 #else
2912 THD *thd = create_thd(false, true, true, 0);
2913 #endif
2914 slot = srv_reserve_slot(SRV_WORKER);
2915
2916 ut_a(srv_n_purge_threads > 1);
2917
2918 srv_sys_mutex_enter();
2919
2920 ut_a(srv_sys->n_threads_active[SRV_WORKER] < srv_n_purge_threads);
2921
2922 srv_sys_mutex_exit();
2923
2924 /* We need to ensure that the worker threads exit after the
2925 purge coordinator thread. Otherwise the purge coordinaor can
2926 end up waiting forever in trx_purge_wait_for_workers_to_complete() */
2927
2928 do {
2929 srv_suspend_thread(slot);
2930
2931 os_event_wait(slot->event);
2932
2933 if (srv_task_execute()) {
2934 /* If there are tasks in the queue, wakeup
2935 the purge coordinator thread. */
2936
2937 srv_wake_purge_thread_if_not_active();
2938 }
2939
2940 /* Note: we are checking the state without holding the
2941 purge_sys->latch here. */
2942 } while (purge_sys->state != PURGE_STATE_EXIT);
2943
2944 srv_free_slot(slot);
2945
2946 rw_lock_x_lock(&purge_sys->latch);
2947
2948 ut_a(!purge_sys->running);
2949 ut_a(purge_sys->state == PURGE_STATE_EXIT);
2950 ut_a(srv_shutdown_state.load() >= SRV_SHUTDOWN_PURGE);
2951
2952 rw_lock_x_unlock(&purge_sys->latch);
2953
2954 destroy_thd(thd);
2955 }
2956
2957 /** Do the actual purge operation.
2958 @return length of history list before the last purge batch. */
srv_do_purge(ulint * n_total_purged)2959 static ulint srv_do_purge(
2960 ulint *n_total_purged) /*!< in/out: total pages purged */
2961 {
2962 ulint n_pages_purged;
2963
2964 static ulint count = 0;
2965 static ulint n_use_threads = 0;
2966 static ulint rseg_history_len = 0;
2967 ulint old_activity_count = srv_get_activity_count();
2968
2969 const auto n_threads = srv_threads.m_purge_workers_n;
2970
2971 ut_a(n_threads > 0);
2972 ut_ad(!srv_read_only_mode);
2973
2974 /* Purge until there are no more records to purge and there is
2975 no change in configuration or server state. If the user has
2976 configured more than one purge thread then we treat that as a
2977 pool of threads and only use the extra threads if purge can't
2978 keep up with updates. */
2979
2980 if (n_use_threads == 0) {
2981 n_use_threads = n_threads;
2982 }
2983
2984 do {
2985 if (trx_sys->rseg_history_len > rseg_history_len ||
2986 (srv_max_purge_lag > 0 && rseg_history_len > srv_max_purge_lag)) {
2987 /* History length is now longer than what it was
2988 when we took the last snapshot. Use more threads. */
2989
2990 if (n_use_threads < n_threads) {
2991 ++n_use_threads;
2992 }
2993
2994 } else if (srv_check_activity(old_activity_count) && n_use_threads > 1) {
2995 /* History length same or smaller since last snapshot,
2996 use fewer threads. */
2997
2998 --n_use_threads;
2999
3000 old_activity_count = srv_get_activity_count();
3001 }
3002
3003 /* Ensure that the purge threads are less than what
3004 was configured. */
3005
3006 ut_a(n_use_threads > 0);
3007 ut_a(n_use_threads <= n_threads);
3008
3009 /* Take a snapshot of the history list before purge. */
3010 if ((rseg_history_len = trx_sys->rseg_history_len) == 0) {
3011 break;
3012 }
3013
3014 ulint undo_trunc_freq = purge_sys->undo_trunc.get_rseg_truncate_frequency();
3015
3016 ulint rseg_truncate_frequency = ut_min(
3017 static_cast<ulint>(srv_purge_rseg_truncate_frequency), undo_trunc_freq);
3018
3019 n_pages_purged = trx_purge(n_use_threads, srv_purge_batch_size,
3020 (++count % rseg_truncate_frequency) == 0);
3021
3022 *n_total_purged += n_pages_purged;
3023
3024 } while (!srv_purge_should_exit(n_pages_purged) && n_pages_purged > 0 &&
3025 purge_sys->state == PURGE_STATE_RUN);
3026
3027 return (rseg_history_len);
3028 }
3029
3030 /** Suspend the purge coordinator thread. */
srv_purge_coordinator_suspend(srv_slot_t * slot,ulint rseg_history_len)3031 static void srv_purge_coordinator_suspend(
3032 srv_slot_t *slot, /*!< in/out: Purge coordinator
3033 thread slot */
3034 ulint rseg_history_len) /*!< in: history list length
3035 before last purge */
3036 {
3037 ut_ad(!srv_read_only_mode);
3038 ut_a(slot->type == SRV_PURGE);
3039
3040 bool stop = false;
3041
3042 /** Maximum wait time on the purge event, in micro-seconds. */
3043 static const ulint SRV_PURGE_MAX_TIMEOUT = 10000;
3044
3045 int64_t sig_count = srv_suspend_thread(slot);
3046
3047 do {
3048 ulint ret;
3049
3050 rw_lock_x_lock(&purge_sys->latch);
3051
3052 purge_sys->running = false;
3053
3054 rw_lock_x_unlock(&purge_sys->latch);
3055
3056 /* We don't wait right away on the the non-timed wait because
3057 we want to signal the thread that wants to suspend purge. */
3058
3059 if (stop) {
3060 os_event_wait_low(slot->event, sig_count);
3061 ret = 0;
3062 } else if (rseg_history_len <= trx_sys->rseg_history_len) {
3063 ret =
3064 os_event_wait_time_low(slot->event, SRV_PURGE_MAX_TIMEOUT, sig_count);
3065 } else {
3066 /* We don't want to waste time waiting, if the
3067 history list increased by the time we got here,
3068 unless purge has been stopped. */
3069 ret = 0;
3070 }
3071
3072 srv_sys_mutex_enter();
3073
3074 /* The thread can be in state !suspended after the timeout
3075 but before this check if another thread sent a wakeup signal. */
3076
3077 if (slot->suspended) {
3078 slot->suspended = FALSE;
3079 ++srv_sys->n_threads_active[slot->type];
3080 ut_a(srv_sys->n_threads_active[slot->type] == 1);
3081 }
3082
3083 srv_sys_mutex_exit();
3084
3085 sig_count = srv_suspend_thread(slot);
3086
3087 rw_lock_x_lock(&purge_sys->latch);
3088
3089 stop = (srv_shutdown_state.load() < SRV_SHUTDOWN_PURGE &&
3090 purge_sys->state == PURGE_STATE_STOP);
3091
3092 if (!stop) {
3093 bool check = true;
3094 DBUG_EXECUTE_IF(
3095 "skip_purge_check_shutdown",
3096 if (srv_shutdown_state.load() >= SRV_SHUTDOWN_PURGE &&
3097 purge_sys->state == PURGE_STATE_STOP &&
3098 srv_fast_shutdown != 0) { check = false; };);
3099
3100 if (check) {
3101 ut_a(purge_sys->n_stop == 0);
3102 }
3103 purge_sys->running = true;
3104 } else {
3105 ut_a(purge_sys->n_stop > 0);
3106
3107 /* Signal that we are suspended. */
3108 os_event_set(purge_sys->event);
3109 }
3110
3111 rw_lock_x_unlock(&purge_sys->latch);
3112
3113 if (ret == OS_SYNC_TIME_EXCEEDED) {
3114 /* No new records added since wait started then simply
3115 wait for new records. The magic number 5000 is an
3116 approximation for the case where we have cached UNDO
3117 log records which prevent truncate of the UNDO
3118 segments. */
3119
3120 if (rseg_history_len == trx_sys->rseg_history_len &&
3121 trx_sys->rseg_history_len < 5000) {
3122 stop = true;
3123 }
3124 }
3125
3126 } while (stop);
3127
3128 srv_sys_mutex_enter();
3129
3130 if (slot->suspended) {
3131 slot->suspended = FALSE;
3132 ++srv_sys->n_threads_active[slot->type];
3133 ut_a(srv_sys->n_threads_active[slot->type] == 1);
3134 }
3135
3136 srv_sys_mutex_exit();
3137 }
3138
3139 /** Purge coordinator thread that schedules the purge tasks. */
srv_purge_coordinator_thread()3140 void srv_purge_coordinator_thread() {
3141 srv_slot_t *slot;
3142
3143 #ifdef UNIV_PFS_THREAD
3144 THD *thd = create_thd(false, true, true, srv_purge_thread_key.m_value);
3145 #else
3146 THD *thd = create_thd(false, true, true, 0);
3147 #endif
3148
3149 ulint n_total_purged = ULINT_UNDEFINED;
3150
3151 ut_ad(!srv_read_only_mode);
3152 ut_a(srv_n_purge_threads >= 1);
3153 ut_a(trx_purge_state() == PURGE_STATE_INIT);
3154 ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
3155
3156 rw_lock_x_lock(&purge_sys->latch);
3157
3158 purge_sys->running = true;
3159 purge_sys->state = PURGE_STATE_RUN;
3160
3161 rw_lock_x_unlock(&purge_sys->latch);
3162
3163 slot = srv_reserve_slot(SRV_PURGE);
3164
3165 ulint rseg_history_len = trx_sys->rseg_history_len;
3166
3167 do {
3168 /* If there are no records to purge or the last
3169 purge didn't purge any records then wait for activity. */
3170
3171 if (srv_shutdown_state.load() < SRV_SHUTDOWN_PURGE &&
3172 (purge_sys->state == PURGE_STATE_STOP || n_total_purged == 0)) {
3173 srv_purge_coordinator_suspend(slot, rseg_history_len);
3174 }
3175
3176 if (srv_purge_should_exit(n_total_purged)) {
3177 ut_a(!slot->suspended);
3178 break;
3179 }
3180
3181 n_total_purged = 0;
3182
3183 rseg_history_len = srv_do_purge(&n_total_purged);
3184
3185 } while (!srv_purge_should_exit(n_total_purged));
3186
3187 /* This is just for test scenarios. Do not pass thd here,
3188 because it would lead to wait on event then, and we would
3189 never exit the srv_pre_dd_shutdown() which waits for this
3190 thread to exit. That's because the signal for which we
3191 would wait is signalled in srv_shutdown which happens
3192 after the srv_pre_dd_shutdown is ended. */
3193 srv_thread_delay_cleanup_if_needed(false);
3194
3195 /* Ensure that we don't jump out of the loop unless the
3196 exit condition is satisfied. */
3197
3198 ut_a(srv_purge_should_exit(n_total_purged));
3199
3200 ulint n_pages_purged = ULINT_MAX;
3201
3202 /* Ensure that all records are purged if it is not a fast shutdown.
3203 This covers the case where a record can be added after we exit the
3204 loop above. */
3205 while (srv_fast_shutdown == 0 && n_pages_purged > 0) {
3206 n_pages_purged = trx_purge(1, srv_purge_batch_size, false);
3207 }
3208
3209 /* This trx_purge is called to remove any undo records (added by
3210 background threads) after completion of the above loop. When
3211 srv_fast_shutdown != 0, a large batch size can cause significant
3212 delay in shutdown ,so reducing the batch size to magic number 20
3213 (which was default in 5.5), which we hope will be sufficient to
3214 remove all the undo records */
3215 const uint temp_batch_size = 20;
3216
3217 n_pages_purged =
3218 trx_purge(1,
3219 srv_purge_batch_size <= temp_batch_size ? srv_purge_batch_size
3220 : temp_batch_size,
3221 true);
3222 ut_a(n_pages_purged == 0 || srv_fast_shutdown != 0);
3223
3224 /* The task queue should always be empty, independent of fast
3225 shutdown state. */
3226 ut_a(srv_get_task_queue_length() == 0);
3227
3228 srv_free_slot(slot);
3229
3230 /* Note that we are shutting down. */
3231 rw_lock_x_lock(&purge_sys->latch);
3232
3233 purge_sys->state = PURGE_STATE_EXIT;
3234
3235 /* Clear out any pending undo-tablespaces to truncate and reset
3236 the list as we plan to shutdown the purge thread. */
3237 purge_sys->undo_trunc.reset();
3238
3239 purge_sys->running = false;
3240
3241 rw_lock_x_unlock(&purge_sys->latch);
3242
3243 /* Ensure that all the worker threads quit. */
3244 if (srv_n_purge_threads > 1) {
3245 srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1);
3246 }
3247
3248 /* This is just for test scenarios. Do not pass thd here.
3249 For explanation look at comment for similar usage above. */
3250 srv_thread_delay_cleanup_if_needed(false);
3251
3252 destroy_thd(thd);
3253 }
3254
3255 /** Enqueues a task to server task queue and releases a worker thread, if there
3256 is a suspended one. */
srv_que_task_enqueue_low(que_thr_t * thr)3257 void srv_que_task_enqueue_low(que_thr_t *thr) /*!< in: query thread */
3258 {
3259 ut_ad(!srv_read_only_mode);
3260 mutex_enter(&srv_sys->tasks_mutex);
3261
3262 UT_LIST_ADD_LAST(srv_sys->tasks, thr);
3263
3264 mutex_exit(&srv_sys->tasks_mutex);
3265
3266 srv_release_threads(SRV_WORKER, 1);
3267 }
3268
3269 /** Get count of tasks in the queue.
3270 @return number of tasks in queue */
srv_get_task_queue_length(void)3271 ulint srv_get_task_queue_length(void) {
3272 ulint n_tasks;
3273
3274 ut_ad(!srv_read_only_mode);
3275
3276 mutex_enter(&srv_sys->tasks_mutex);
3277
3278 n_tasks = UT_LIST_GET_LEN(srv_sys->tasks);
3279
3280 mutex_exit(&srv_sys->tasks_mutex);
3281
3282 return (n_tasks);
3283 }
3284
3285 /** Wakeup the purge threads. */
srv_purge_wakeup(void)3286 void srv_purge_wakeup(void) {
3287 ut_ad(!srv_read_only_mode);
3288
3289 if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
3290 srv_release_threads(SRV_PURGE, 1);
3291
3292 if (srv_threads.m_purge_workers_n > 1) {
3293 /* SRV_PURGE is not counted here. */
3294 ulint n_workers = srv_threads.m_purge_workers_n - 1;
3295
3296 srv_release_threads(SRV_WORKER, n_workers);
3297 }
3298 }
3299 }
3300
3301 /** Check if the purge threads are active, both coordinator and worker threads
3302 @return true if any thread is active, false if no thread is active */
srv_purge_threads_active()3303 bool srv_purge_threads_active() {
3304 if (srv_threads.m_purge_workers == nullptr) {
3305 #ifdef UNIV_DEBUG
3306 ut_a(srv_read_only_mode);
3307 #endif /* UNIV_DEBUG */
3308 ut_ad(!srv_thread_is_active(srv_threads.m_purge_coordinator));
3309 return (false);
3310 }
3311
3312 for (size_t i = 0; i < srv_threads.m_purge_workers_n; ++i) {
3313 if (srv_thread_is_active(srv_threads.m_purge_workers[i])) {
3314 ut_ad(!srv_read_only_mode);
3315 return (true);
3316 }
3317 }
3318
3319 ut_ad(!srv_thread_is_active(srv_threads.m_purge_coordinator));
3320
3321 return (false);
3322 }
3323
srv_thread_is_active(const IB_thread & thread)3324 bool srv_thread_is_active(const IB_thread &thread) {
3325 return (thread_is_active(thread));
3326 }
3327
3328 #endif /* !UNIV_HOTBACKUP */
3329
srv_get_server_errmsgs(int errcode)3330 const char *srv_get_server_errmsgs(int errcode) {
3331 return (error_message_for_error_log(errcode));
3332 }
3333
set_srv_redo_log(bool enable)3334 void set_srv_redo_log(bool enable) {
3335 mutex_enter(&srv_innodb_monitor_mutex);
3336 srv_redo_log = enable;
3337 mutex_exit(&srv_innodb_monitor_mutex);
3338 }
3339