1 /*****************************************************************************
2
3 Copyright (c) 1995, 2021, Oracle and/or its affiliates.
4 Copyright (c) 2008, 2009 Google Inc.
5 Copyright (c) 2009, 2016, Percona Inc.
6
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19
20 This program is free software; you can redistribute it and/or modify
21 it under the terms of the GNU General Public License, version 2.0,
22 as published by the Free Software Foundation.
23
24 This program is also distributed with certain software (including
25 but not limited to OpenSSL) that is licensed under separate terms,
26 as designated in a particular file or component or in included license
27 documentation. The authors of MySQL hereby grant you an additional
28 permission to link the program and your derivative works with the
29 separately licensed software that they have included with MySQL.
30
31 This program is distributed in the hope that it will be useful,
32 but WITHOUT ANY WARRANTY; without even the implied warranty of
33 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 GNU General Public License, version 2.0, for more details.
35
36 You should have received a copy of the GNU General Public License along with
37 this program; if not, write to the Free Software Foundation, Inc.,
38 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
39
40 *****************************************************************************/
41
42 /**************************************************//**
43 @file srv/srv0srv.cc
44 The database server main program
45
46 Created 10/8/1995 Heikki Tuuri
47 *******************************************************/
48
49 #include "my_global.h"
50 #include "my_thread.h"
51
52 #include "mysql/psi/mysql_stage.h"
53 #include "mysql/psi/psi.h"
54 #include "sql_thd_internal_api.h"
55
56 #include "ha_prototypes.h"
57
58 #include "btr0sea.h"
59 #include "buf0flu.h"
60 #include "buf0lru.h"
61 #include "btr0scrub.h"
62 #include "dict0boot.h"
63 #include "dict0load.h"
64 #include "dict0stats_bg.h"
65 #include "fsp0sysspace.h"
66 #include "ibuf0ibuf.h"
67 #include "lock0lock.h"
68 #include "log0online.h"
69 #include "log0recv.h"
70 #include "mem0mem.h"
71 #include "os0proc.h"
72 #include "pars0pars.h"
73 #include "que0que.h"
74 #include "row0mysql.h"
75 #include "row0trunc.h"
76 #include "row0log.h"
77 #include "srv0mon.h"
78 #include "srv0srv.h"
79 #include "srv0start.h"
80 #include "sync0sync.h"
81 #include "trx0i_s.h"
82 #include "trx0purge.h"
83 #include "trx0rseg.h"
84 #include "usr0sess.h"
85 #include "ut0crc32.h"
86 #include "ut0mem.h"
87 #include "handler.h"
88 #include "ha_innodb.h"
89 #include "fil0crypt.h"
90 #include "system_key.h"
91
92
93 #ifndef UNIV_PFS_THREAD
94 #define create_thd(x,y,z,PFS_KEY) create_thd(x,y,z,PFS_NOT_INSTRUMENTED.m_value)
95 #endif /* UNIV_PFS_THREAD */
96
97 /* The following is the maximum allowed duration of a lock wait. */
98 ulong srv_fatal_semaphore_wait_threshold = 600;
99
100 lint srv_kill_idle_transaction = 0;
101
102 /* How much data manipulation language (DML) statements need to be delayed,
103 in microseconds, in order to reduce the lagging of the purge thread. */
104 ulint srv_dml_needed_delay = 0;
105
106 ibool srv_monitor_active = FALSE;
107 ibool srv_error_monitor_active = FALSE;
108
109 ibool srv_buf_dump_thread_active = FALSE;
110
111 bool srv_buf_resize_thread_active = false;
112
113 ibool srv_dict_stats_thread_active = FALSE;
114
115 my_bool srv_scrub_log;
116
117 const char* srv_main_thread_op_info = "";
118
119 /** Prefix used by MySQL to indicate pre-5.1 table name encoding */
120 const char srv_mysql50_table_name_prefix[10] = "#mysql50#";
121
122 /* Server parameters which are read from the initfile */
123
124 /* The following three are dir paths which are catenated before file
125 names, where the file name itself may also contain a path */
126
127 char* srv_data_home = NULL;
128
129 /** Rollback files directory, can be absolute. */
130 char* srv_undo_dir = NULL;
131
132 /** The number of tablespaces to use for rollback segments. */
133 ulong srv_undo_tablespaces = 0;
134
135 /** The number of UNDO tablespaces that are open and ready to use. */
136 ulint srv_undo_tablespaces_open = 0;
137
138 /** The number of UNDO tablespaces that are active (hosting some rollback
139 segment). It is quite possible that some of the tablespaces doesn't host
140 any of the rollback-segment based on configuration used. */
141 ulint srv_undo_tablespaces_active = 0;
142
143 /* The number of rollback segments to use */
144 ulong srv_rollback_segments = 1;
145
146 /* Used for the deprecated setting innodb_undo_logs. This will still get
147 put into srv_rollback_segments if it is set to a non-default value. */
148 ulong srv_undo_logs = 0;
149 const char* deprecated_undo_logs =
150 "The parameter innodb_undo_logs is deprecated"
151 " and may be removed in future releases."
152 " Please use innodb_rollback_segments instead."
153 " See " REFMAN "innodb-undo-logs.html";
154
155
156 /** Rate at which UNDO records should be purged. */
157 ulong srv_purge_rseg_truncate_frequency = 128;
158
159 /** Enable or Disable Truncate of UNDO tablespace.
160 Note: If enabled then UNDO tablespace will be selected for truncate.
161 While Server waits for undo-tablespace to truncate if user disables
162 it, truncate action is completed but no new tablespace is marked
163 for truncate (action is never aborted). */
164 my_bool srv_undo_log_truncate = FALSE;
165
166 /** Maximum size of undo tablespace. */
167 unsigned long long srv_max_undo_log_size;
168
169 /** Enable or disable Encrypt of REDO tablespace. */
170 my_bool srv_undo_log_encrypt = 0;
171
172 /** UNDO logs that are not redo logged.
173 These logs reside in the temp tablespace.*/
174 const ulong srv_tmp_undo_logs = 32;
175
176 /** Enable or disable encryption of temporary tablespace.*/
177 my_bool srv_tmp_tablespace_encrypt;
178
179 /** Option to enable encryption of system tablespace. */
180 my_bool srv_sys_tablespace_encrypt;
181
182 /** Enable or disable encryption of pages in parallel doublewrite buffer
183 file */
184 my_bool srv_parallel_dblwr_encrypt;
185
186 /** Default undo tablespace size in UNIV_PAGEs count (10MB). */
187 const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
188 ((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
189
190 /** Set if InnoDB must operate in read-only mode. We don't do any
191 recovery and open all tables in RO mode instead of RW mode. We don't
192 sync the max trx id to disk either. */
193 my_bool srv_read_only_mode;
194 /** store to its own file each table created by an user; data
195 dictionary tables are in the system tablespace 0 */
196 my_bool srv_file_per_table;
197 /** The file format to use on new *.ibd files. */
198 ulint srv_file_format = 0;
199 /** Whether to check file format during startup. A value of
200 UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
201 set it to the highest format we support. */
202 ulint srv_max_file_format_at_startup = UNIV_FORMAT_MAX;
203 /** Set if InnoDB operates in read-only mode or innodb-force-recovery
204 is greater than SRV_FORCE_NO_TRX_UNDO. */
205 my_bool high_level_read_only;
206
207 #if UNIV_FORMAT_A
208 # error "UNIV_FORMAT_A must be 0!"
209 #endif
210
211 /** Place locks to records only i.e. do not use next-key locking except
212 on duplicate key checking and foreign key checking */
213 ibool srv_locks_unsafe_for_binlog = FALSE;
214 /** Sort buffer size in index creation */
215 ulong srv_sort_buf_size = 1048576;
216 /** Maximum modification log file size for online index creation */
217 unsigned long long srv_online_max_size;
218
219 /* If this flag is TRUE, then we will use the native aio of the
220 OS (provided we compiled Innobase with it in), otherwise we will
221 use simulated aio we build below with threads.
222 Currently we support native aio on windows and linux */
223 my_bool srv_use_native_aio = TRUE;
224
225 /** Whether the redo log tracking is currently enabled. Note that it is
226 possible for the log tracker thread to be running and the tracking to be
227 disabled */
228 my_bool srv_track_changed_pages = FALSE;
229
230 ulonglong srv_max_bitmap_file_size = 100 * 1024 * 1024;
231
232 ulonglong srv_max_changed_pages = 0;
233 #ifdef UNIV_DEBUG
234 /** Force all user tables to use page compression. */
235 ulong srv_debug_compress;
236 /** Used by SET GLOBAL innodb_master_thread_disabled_debug = X. */
237 my_bool srv_master_thread_disabled_debug;
238 /** Event used to inform that master thread is disabled. */
239 static os_event_t srv_master_thread_disabled_event;
240 /** Debug variable to find if any background threads are adding
241 to purge during slow shutdown. */
242 extern bool trx_commit_disallowed;
243 #endif /* UNIV_DEBUG */
244
245 /*------------------------- LOG FILES ------------------------ */
246 char* srv_log_group_home_dir = NULL;
247
248 /** Enable or disable Encrypt of REDO tablespace. */
249 ulong srv_redo_log_encrypt = REDO_LOG_ENCRYPT_OFF;
250
251 ulong srv_n_log_files = SRV_N_LOG_FILES_MAX;
252 /** At startup, this is the current redo log file size.
253 During startup, if this is different from srv_log_file_size_requested
254 (innodb_log_file_size), the redo log will be rebuilt and this size
255 will be initialized to srv_log_file_size_requested.
256 When upgrading from a previous redo log format, this will be set to 0,
257 and writing to the redo log is not allowed.
258
259 During startup, this is in bytes, and later converted to pages. */
260 ib_uint64_t srv_log_file_size;
261 /** The value of the startup parameter innodb_log_file_size */
262 ib_uint64_t srv_log_file_size_requested;
263 /* size in database pages */
264 ulint srv_log_buffer_size = ULINT_MAX;
265 uint srv_flush_log_at_timeout = 1;
266 ulong srv_page_size = UNIV_PAGE_SIZE_DEF;
267 ulong srv_page_size_shift = UNIV_PAGE_SIZE_SHIFT_DEF;
268 ulong srv_log_write_ahead_size = 0;
269
270 page_size_t univ_page_size(0, 0, false);
271
272 char srv_use_global_flush_log_at_trx_commit = TRUE;
273
274 /* Try to flush dirty pages so as to avoid IO bursts at
275 the checkpoints. */
276 char srv_adaptive_flushing = TRUE;
277
278 ulint srv_show_locks_held = 10;
279 ulint srv_show_verbose_locks = 0;
280
281 /* Allow IO bursts at the checkpoints ignoring io_capacity setting. */
282 my_bool srv_flush_sync = TRUE;
283
284 /** Maximum number of times allowed to conditionally acquire
285 mutex before switching to blocking wait on the mutex */
286 #define MAX_MUTEX_NOWAIT 20
287
288 /** Check whether the number of failed nonblocking mutex
289 acquisition attempts exceeds maximum allowed value. If so,
290 srv_printf_innodb_monitor() will request mutex acquisition
291 with mutex_enter(), which will wait until it gets the mutex. */
292 #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
293
294 /** Requested size in bytes */
295 ulint srv_buf_pool_size = ULINT_MAX;
296 /** Minimum pool size in bytes */
297 const ulint srv_buf_pool_min_size = 5 * 1024 * 1024;
298 /** Default pool size in bytes */
299 const ulint srv_buf_pool_def_size = 128 * 1024 * 1024;
300 /** Requested buffer pool chunk size. Each buffer pool instance consists
301 of one or more chunks. */
302 ulonglong srv_buf_pool_chunk_unit;
303 /** Requested number of buffer pool instances */
304 ulong srv_buf_pool_instances;
305 /** Default number of buffer pool instances */
306 const ulong srv_buf_pool_instances_default = 0;
307 /** Number of locks to protect buf_pool->page_hash */
308 ulong srv_n_page_hash_locks = 16;
309
310 /** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
311 ulong srv_LRU_scan_depth = 1024;
312 /** Whether or not to flush neighbors of a block */
313 ulong srv_flush_neighbors = 1;
314 /** Previously requested size. Accesses protected by memory barriers. */
315 ulint srv_buf_pool_old_size = 0;
316 /** Current size as scaling factor for the other components */
317 ulint srv_buf_pool_base_size = 0;
318 /** Current size in bytes */
319 ulint srv_buf_pool_curr_size = 0;
320 /** Dump this % of each buffer pool during BP dump */
321 ulong srv_buf_pool_dump_pct;
322 /** Lock table size in bytes */
323 ulint srv_lock_table_size = ULINT_MAX;
324
325 /** The maximum time limit for a single LRU tail flush iteration by the page
326 cleaner thread */
327 ulint srv_cleaner_max_lru_time = 1000;
328
329 /** The maximum time limit for a single flush list flush iteration by the page
330 cleaner thread */
331 ulint srv_cleaner_max_flush_time = 1000;
332
333 /** Page cleaner LSN age factor formula option */
334 ulong srv_cleaner_lsn_age_factor
335 = SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT;
336
337 /** Empty free list for a query thread handling algorithm option */
338 ulong srv_empty_free_list_algorithm = SRV_EMPTY_FREE_LIST_BACKOFF;
339
340 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
341 instead. */
342 ulint srv_n_read_io_threads = ULINT_MAX;
343 ulint srv_n_write_io_threads = ULINT_MAX;
344
345 /* Switch to enable random read ahead. */
346 my_bool srv_random_read_ahead = FALSE;
347 /* User settable value of the number of pages that must be present
348 in the buffer cache and accessed sequentially for InnoDB to trigger a
349 readahead request. */
350 ulong srv_read_ahead_threshold = 56;
351
352 /** Maximum on-disk size of change buffer in terms of percentage
353 of the buffer pool. */
354 uint srv_change_buffer_max_size = CHANGE_BUFFER_DEFAULT_SIZE;
355
356 /* This parameter is used to throttle the number of insert buffers that are
357 merged in a batch. By increasing this parameter on a faster disk you can
358 possibly reduce the number of I/O operations performed to complete the
359 merge operation. The value of this parameter is used as is by the
360 background loop when the system is idle (low load), on a busy system
361 the parameter is scaled down by a factor of 4, this is to avoid putting
362 a heavier load on the I/O sub system. */
363
364 ulong srv_insert_buffer_batch_size = 20;
365
366 char* srv_file_flush_method_str = NULL;
367 #ifndef _WIN32
368 enum srv_unix_flush_t srv_unix_file_flush_method = SRV_UNIX_FSYNC;
369 #else
370 enum srv_win_flush_t srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
371 #endif /* _WIN32 */
372
373 ulint srv_max_n_open_files = 300;
374
375 /* Number of IO operations per second the server can do */
376 ulong srv_io_capacity = 200;
377 ulong srv_max_io_capacity = 400;
378
379 /* The number of page cleaner threads to use.*/
380 ulong srv_n_page_cleaners = 4;
381
382 /* The InnoDB main thread tries to keep the ratio of modified pages
383 in the buffer pool to all database pages in the buffer pool smaller than
384 the following number. But it is not guaranteed that the value stays below
385 that during a time of heavy update/insert activity. */
386
387 double srv_max_buf_pool_modified_pct = 75.0;
388 double srv_max_dirty_pages_pct_lwm = 0.0;
389
390 /* This is the percentage of log capacity at which adaptive flushing,
391 if enabled, will kick in. */
392 ulong srv_adaptive_flushing_lwm = 10;
393
394 /* Number of iterations over which adaptive flushing is averaged. */
395 ulong srv_flushing_avg_loops = 30;
396
397 /* The tids of the purge threads */
398 os_tid_t srv_purge_tids[SRV_MAX_N_PURGE_THREADS];
399
400 /* The tids of the I/O threads */
401 os_tid_t srv_io_tids[SRV_MAX_N_IO_THREADS];
402
403 /* The tid of the master thread */
404 os_tid_t srv_master_tid;
405
406 /* The relative scheduling priority of the purge threads */
407 ulint srv_sched_priority_purge = 19;
408
409 /* The relative scheduling priority of the I/O threads */
410 ulint srv_sched_priority_io = 19;
411
412 /* The relative scheduling priority of the master thread */
413 ulint srv_sched_priority_master = 19;
414
415 /* The relative priority of the current thread. If 0, low priority; if 1, high
416 priority. */
417 UNIV_THREAD_LOCAL ulint srv_current_thread_priority = 0;
418
419 /* The relative priority of the purge coordinator and worker threads. */
420 my_bool srv_purge_thread_priority = FALSE;
421
422 /* The relative priority of the master thread. */
423 my_bool srv_master_thread_priority = FALSE;
424
425 /* The number of purge threads to use.*/
426 ulong srv_n_purge_threads = 4;
427
428 /* the number of pages to purge in one batch */
429 ulong srv_purge_batch_size = 20;
430
431 ulong srv_encrypt_tables = 0;
432
433 /* Internal setting for "innodb_stats_method". Decides how InnoDB treats
434 NULL value when collecting statistics. By default, it is set to
435 SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
436 ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
437
438 srv_stats_t srv_stats;
439
440 /* structure to pass status variables to MySQL */
441 export_var_t export_vars;
442
443 /** Normally 0. When nonzero, skip some phases of crash recovery,
444 starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered
445 by SELECT or mysqldump. When this is nonzero, we do not allow any user
446 modifications to the data. */
447 ulong srv_force_recovery;
448 #ifndef NDEBUG
449 /** Inject a crash at different steps of the recovery process.
450 This is for testing and debugging only. */
451 ulong srv_force_recovery_crash;
452 #endif /* !NDEBUG */
453
454 /** Print all user-level transactions deadlocks to mysqld stderr */
455
456 my_bool srv_print_all_deadlocks = FALSE;
457
458 /** Print lock wait timeout info to mysqld stderr */
459
460 my_bool srv_print_lock_wait_timeout_info = FALSE;
461
462 /** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */
463 my_bool srv_cmp_per_index_enabled = FALSE;
464
465 /* If the following is set to 1 then we do not run purge and insert buffer
466 merge to completion before shutdown. If it is set to 2, do not even flush the
467 buffer pool to data files at the shutdown: we effectively 'crash'
468 InnoDB (but lose no committed transactions). */
469 ulint srv_fast_shutdown = 0;
470
471 /* Generate a innodb_status.<pid> file */
472 ibool srv_innodb_status = FALSE;
473
474 /* When estimating number of different key values in an index, sample
475 this many index pages, there are 2 ways to calculate statistics:
476 * persistent stats that are calculated by ANALYZE TABLE and saved
477 in the innodb database.
478 * quick transient stats, that are used if persistent stats for the given
479 table/index are not found in the innodb database */
480 unsigned long long srv_stats_transient_sample_pages = 8;
481 my_bool srv_stats_persistent = TRUE;
482 my_bool srv_stats_include_delete_marked = FALSE;
483 unsigned long long srv_stats_persistent_sample_pages = 20;
484 my_bool srv_stats_auto_recalc = TRUE;
485
486 ibool srv_use_doublewrite_buf = TRUE;
487
488 /** doublewrite buffer is 1MB is size i.e.: it can hold 128 16K pages.
489 The following parameter is the size of the buffer that is used for
490 batch flushing i.e.: LRU flushing and flush_list flushing. The rest
491 of the pages are used for single page flushing. */
492 ulong srv_doublewrite_batch_size = 120;
493
494 ulong srv_replication_delay = 0;
495
496 ulint srv_pass_corrupt_table = 0; /* 0:disable 1:enable */
497
498 /*-------------------------------------------*/
499 ulong srv_n_spin_wait_rounds = 30;
500 ulong srv_spin_wait_delay = 6;
501 ibool srv_priority_boost = TRUE;
502
503 static ulint srv_n_rows_inserted_old = 0;
504 static ulint srv_n_rows_updated_old = 0;
505 static ulint srv_n_rows_deleted_old = 0;
506 static ulint srv_n_rows_read_old = 0;
507
508 ulint srv_truncated_status_writes = 0;
509 ulint srv_available_undo_logs = 0;
510
511 /* Set the following to 0 if you want InnoDB to write messages on
512 stderr on startup/shutdown. */
513 ibool srv_print_verbose_log = TRUE;
514 my_bool srv_print_innodb_monitor = FALSE;
515 my_bool srv_print_innodb_lock_monitor = FALSE;
516
517 /* Array of English strings describing the current state of an
518 i/o handler thread */
519
520 const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
521 const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
522
523 ib_time_monotonic_t srv_last_monitor_time;
524
525 ib_mutex_t srv_innodb_monitor_mutex;
526
527 /** Mutex protecting page_zip_stat_per_index */
528 ib_mutex_t page_zip_stat_per_index_mutex;
529
530 /* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
531 ib_mutex_t srv_monitor_file_mutex;
532
533 /** Temporary file for innodb monitor output */
534 FILE* srv_monitor_file;
535 /** Mutex for locking srv_dict_tmpfile. Not created if srv_read_only_mode.
536 This mutex has a very high rank; threads reserving it should not
537 be holding any InnoDB latches. */
538 ib_mutex_t srv_dict_tmpfile_mutex;
539 /** Temporary file for output from the data dictionary */
540 FILE* srv_dict_tmpfile;
541 /** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode.
542 This mutex has a very low rank; threads reserving it should not
543 acquire any further latches or sleep before releasing this one. */
544 ib_mutex_t srv_misc_tmpfile_mutex;
545 /** Temporary file for miscellanous diagnostic output */
546 FILE* srv_misc_tmpfile;
547
548 ulint srv_main_thread_process_no = 0;
549 ulint srv_main_thread_id = 0;
550
551 /* The following counts are used by the srv_master_thread. */
552
553 /** Iterations of the loop bounded by 'srv_active' label. */
554 static ulint srv_main_active_loops = 0;
555 /** Iterations of the loop bounded by the 'srv_idle' label. */
556 static ulint srv_main_idle_loops = 0;
557 /** Iterations of the loop bounded by the 'srv_shutdown' label. */
558 static ulint srv_main_shutdown_loops = 0;
559 /** Log writes involving flush. */
560 static ulint srv_log_writes_and_flush = 0;
561
562 /** Number of times secondary index lookup triggered cluster lookup */
563 ulint srv_sec_rec_cluster_reads = 0;
564
565 /** Number of times prefix optimization avoided triggering cluster lookup */
566 ulint srv_sec_rec_cluster_reads_avoided = 0;
567
568 /* This is only ever touched by the master thread. It records the
569 time when the last flush of log file has happened. The master
570 thread ensures that we flush the log files at least once per
571 second. */
572 static ib_time_monotonic_t srv_last_log_flush_time;
573
574 /* Interval in seconds at which various tasks are performed by the
575 master thread when server is active. In order to balance the workload,
576 we should try to keep intervals such that they are not multiple of
577 each other. For example, if we have intervals for various tasks
578 defined as 5, 10, 15, 60 then all tasks will be performed when
579 current_time % 60 == 0 and no tasks will be performed when
580 current_time % 5 != 0. */
581
582 # define SRV_MASTER_CHECKPOINT_INTERVAL (7)
583 # define SRV_MASTER_PURGE_INTERVAL (10)
584 # define SRV_MASTER_DICT_LRU_INTERVAL (47)
585
586 /** Acquire the system_mutex. */
587 #define srv_sys_mutex_enter() do { \
588 mutex_enter(&srv_sys->mutex); \
589 } while (0)
590
591 /** Test if the system mutex is owned. */
592 #define srv_sys_mutex_own() (mutex_own(&srv_sys->mutex) \
593 && !srv_read_only_mode)
594
595 /** Release the system mutex. */
596 #define srv_sys_mutex_exit() do { \
597 mutex_exit(&srv_sys->mutex); \
598 } while (0)
599
600 #define fetch_lock_wait_timeout(trx) \
601 ((trx)->lock.allowed_to_wait \
602 ? thd_lock_wait_timeout((trx)->mysql_thd) \
603 : 0)
604
605 /*
606 IMPLEMENTATION OF THE SERVER MAIN PROGRAM
607 =========================================
608
609 There is the following analogue between this database
610 server and an operating system kernel:
611
612 DB concept equivalent OS concept
613 ---------- ---------------------
614 transaction -- process;
615
616 query thread -- thread;
617
618 lock -- semaphore;
619
620 kernel -- kernel;
621
622 query thread execution:
623 (a) without lock mutex
624 reserved -- process executing in user mode;
625 (b) with lock mutex reserved
626 -- process executing in kernel mode;
627
628 The server has several backgroind threads all running at the same
629 priority as user threads. It periodically checks if here is anything
630 happening in the server which requires intervention of the master
631 thread. Such situations may be, for example, when flushing of dirty
632 blocks is needed in the buffer pool or old version of database rows
633 have to be cleaned away (purged). The user can configure a separate
634 dedicated purge thread(s) too, in which case the master thread does not
635 do any purging.
636
637 The threads which we call user threads serve the queries of the MySQL
638 server. They run at normal priority.
639
640 When there is no activity in the system, also the master thread
641 suspends itself to wait for an event making the server totally silent.
642
643 There is still one complication in our server design. If a
644 background utility thread obtains a resource (e.g., mutex) needed by a user
645 thread, and there is also some other user activity in the system,
646 the user thread may have to wait indefinitely long for the
647 resource, as the OS does not schedule a background thread if
648 there is some other runnable user thread. This problem is called
649 priority inversion in real-time programming.
650
651 One solution to the priority inversion problem would be to keep record
652 of which thread owns which resource and in the above case boost the
653 priority of the background thread so that it will be scheduled and it
654 can release the resource. This solution is called priority inheritance
655 in real-time programming. A drawback of this solution is that the overhead
656 of acquiring a mutex increases slightly, maybe 0.2 microseconds on a 100
657 MHz Pentium, because the thread has to call os_thread_get_curr_id. This may
658 be compared to 0.5 microsecond overhead for a mutex lock-unlock pair. Note
659 that the thread cannot store the information in the resource , say mutex,
660 itself, because competing threads could wipe out the information if it is
661 stored before acquiring the mutex, and if it stored afterwards, the
662 information is outdated for the time of one machine instruction, at least.
663 (To be precise, the information could be stored to lock_word in mutex if
664 the machine supports atomic swap.)
665
666 The above solution with priority inheritance may become actual in the
667 future, currently we do not implement any priority twiddling solution.
668 Our general aim is to reduce the contention of all mutexes by making
669 them more fine grained.
670
671 The thread table contains information of the current status of each
672 thread existing in the system, and also the event semaphores used in
673 suspending the master thread and utility threads when they have nothing
674 to do. The thread table can be seen as an analogue to the process table
675 in a traditional Unix implementation. */
676
677 /** The server system struct */
678 struct srv_sys_t{
679 ib_mutex_t tasks_mutex; /*!< variable protecting the
680 tasks queue */
681 UT_LIST_BASE_NODE_T(que_thr_t)
682 tasks; /*!< task queue */
683
684 ib_mutex_t mutex; /*!< variable protecting the
685 fields below. */
686 ulint n_sys_threads; /*!< size of the sys_threads
687 array */
688
689 srv_slot_t* sys_threads; /*!< server thread table */
690
691 ulint n_threads_active[SRV_MASTER + 1];
692 /*!< number of threads active
693 in a thread class */
694
695 srv_stats_t::ulint_ctr_1_t
696 activity_count; /*!< For tracking server
697 activity */
698 srv_stats_t::ulint_ctr_1_t
699 ibuf_merge_activity_count;/*!< For tracking change
700 buffer merge activity, a subset
701 of overall server activity */
702 };
703
704 static srv_sys_t* srv_sys = NULL;
705
706 /** Event to signal the monitor thread. */
707 os_event_t srv_monitor_event;
708
709 /** Event to signal the error thread */
710 os_event_t srv_error_event;
711
712 /** Event to signal the buffer pool dump/load thread */
713 os_event_t srv_buf_dump_event;
714
715 /** Event to signal the buffer pool resize thread */
716 os_event_t srv_buf_resize_event;
717
718 /** The buffer pool dump/load file name */
719 char* srv_buf_dump_filename;
720
721 /** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
722 and/or load it during startup. */
723 char srv_buffer_pool_dump_at_shutdown = TRUE;
724 char srv_buffer_pool_load_at_startup = TRUE;
725
726 /** Path to the parallel doublewrite buffer */
727 char* srv_parallel_doublewrite_path;
728
729 /** Slot index in the srv_sys->sys_threads array for the purge thread. */
730 static const ulint SRV_PURGE_SLOT = 1;
731
732 /** Slot index in the srv_sys->sys_threads array for the master thread. */
733 static const ulint SRV_MASTER_SLOT = 0;
734
735 os_event_t srv_checkpoint_completed_event;
736
737 os_event_t srv_redo_log_tracked_event;
738
739 /** Whether the redo log tracker thread has been started. Does not take into
740 account whether the tracking is currently enabled (see srv_track_changed_pages
741 for that) */
742 bool srv_redo_log_thread_started = false;
743
744 #ifdef HAVE_PSI_STAGE_INTERFACE
745 /** Performance schema stage event for monitoring ALTER TABLE progress
746 everything after flush log_make_checkpoint_at(). */
747 PSI_stage_info srv_stage_alter_table_end
748 = {0, "alter table (end)", PSI_FLAG_STAGE_PROGRESS};
749
750 /** Performance schema stage event for monitoring ALTER TABLE progress
751 log_make_checkpoint_at(). */
752 PSI_stage_info srv_stage_alter_table_flush
753 = {0, "alter table (flush)", PSI_FLAG_STAGE_PROGRESS};
754
755 /** Performance schema stage event for monitoring ALTER TABLE progress
756 row_merge_insert_index_tuples(). */
757 PSI_stage_info srv_stage_alter_table_insert
758 = {0, "alter table (insert)", PSI_FLAG_STAGE_PROGRESS};
759
760 /** Performance schema stage event for monitoring ALTER TABLE progress
761 row_log_apply(). */
762 PSI_stage_info srv_stage_alter_table_log_index
763 = {0, "alter table (log apply index)", PSI_FLAG_STAGE_PROGRESS};
764
765 /** Performance schema stage event for monitoring ALTER TABLE progress
766 row_log_table_apply(). */
767 PSI_stage_info srv_stage_alter_table_log_table
768 = {0, "alter table (log apply table)", PSI_FLAG_STAGE_PROGRESS};
769
770 /** Performance schema stage event for monitoring ALTER TABLE progress
771 row_merge_sort(). */
772 PSI_stage_info srv_stage_alter_table_merge_sort
773 = {0, "alter table (merge sort)", PSI_FLAG_STAGE_PROGRESS};
774
775 /** Performance schema stage event for monitoring ALTER TABLE progress
776 row_merge_read_clustered_index(). */
777 PSI_stage_info srv_stage_alter_table_read_pk_internal_sort
778 = {0, "alter table (read PK and internal sort)", PSI_FLAG_STAGE_PROGRESS};
779
780 /** Performance schema stage event for monitoring buffer pool load progress. */
781 PSI_stage_info srv_stage_buffer_pool_load
782 = {0, "buffer pool load", PSI_FLAG_STAGE_PROGRESS};
783 #endif /* HAVE_PSI_STAGE_INTERFACE */
784
785 static
786 void
787 srv_enable_undo_encryption_if_set();
788
789 /*********************************************************************//**
790 Prints counters for work done by srv_master_thread. */
791 static
792 void
srv_print_master_thread_info(FILE * file)793 srv_print_master_thread_info(
794 /*=========================*/
795 FILE *file) /* in: output stream */
796 {
797 fprintf(file,
798 "srv_master_thread loops: "
799 ULINTPF " srv_active, "
800 ULINTPF " srv_shutdown, "
801 ULINTPF " srv_idle\n",
802 srv_main_active_loops,
803 srv_main_shutdown_loops,
804 srv_main_idle_loops);
805 fprintf(file,
806 "srv_master_thread log flush and writes: " ULINTPF "\n",
807 srv_log_writes_and_flush);
808 }
809
810 /*********************************************************************//**
811 Sets the info describing an i/o thread current state. */
812 void
srv_set_io_thread_op_info(ulint i,const char * str)813 srv_set_io_thread_op_info(
814 /*======================*/
815 ulint i, /*!< in: the 'segment' of the i/o thread */
816 const char* str) /*!< in: constant char string describing the
817 state */
818 {
819 ut_a(i < SRV_MAX_N_IO_THREADS);
820
821 srv_io_thread_op_info[i] = str;
822 }
823
824 /*********************************************************************//**
825 Resets the info describing an i/o thread current state. */
826 void
srv_reset_io_thread_op_info()827 srv_reset_io_thread_op_info()
828 /*=========================*/
829 {
830 for (ulint i = 0; i < UT_ARR_SIZE(srv_io_thread_op_info); ++i) {
831 srv_io_thread_op_info[i] = "not started yet";
832 }
833 }
834
835 #ifdef UNIV_DEBUG
836 /*********************************************************************//**
837 Validates the type of a thread table slot.
838 @return TRUE if ok */
839 static
840 ibool
srv_thread_type_validate(srv_thread_type type)841 srv_thread_type_validate(
842 /*=====================*/
843 srv_thread_type type) /*!< in: thread type */
844 {
845 switch (type) {
846 case SRV_NONE:
847 break;
848 case SRV_WORKER:
849 case SRV_PURGE:
850 case SRV_MASTER:
851 return(TRUE);
852 }
853 ut_error;
854 return(FALSE);
855 }
856 #endif /* UNIV_DEBUG */
857
858 /*********************************************************************//**
859 Gets the type of a thread table slot.
860 @return thread type */
861 static
862 srv_thread_type
srv_slot_get_type(const srv_slot_t * slot)863 srv_slot_get_type(
864 /*==============*/
865 const srv_slot_t* slot) /*!< in: thread slot */
866 {
867 srv_thread_type type = slot->type;
868 ut_ad(srv_thread_type_validate(type));
869 return(type);
870 }
871
872 /*********************************************************************//**
873 Reserves a slot in the thread table for the current thread.
874 @return reserved slot */
875 static
876 srv_slot_t*
srv_reserve_slot(srv_thread_type type)877 srv_reserve_slot(
878 /*=============*/
879 srv_thread_type type) /*!< in: type of the thread */
880 {
881 srv_slot_t* slot = 0;
882
883 srv_sys_mutex_enter();
884
885 ut_ad(srv_thread_type_validate(type));
886
887 switch (type) {
888 case SRV_MASTER:
889 slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
890 break;
891
892 case SRV_PURGE:
893 slot = &srv_sys->sys_threads[SRV_PURGE_SLOT];
894 break;
895
896 case SRV_WORKER:
897 /* Find an empty slot, skip the master and purge slots. */
898 for (slot = &srv_sys->sys_threads[2];
899 slot->in_use;
900 ++slot) {
901
902 ut_a(slot < &srv_sys->sys_threads[
903 srv_sys->n_sys_threads]);
904 }
905 break;
906
907 case SRV_NONE:
908 ut_error;
909 }
910
911 ut_a(!slot->in_use);
912
913 slot->in_use = TRUE;
914 slot->suspended = FALSE;
915 slot->type = type;
916
917 ut_ad(srv_slot_get_type(slot) == type);
918
919 ++srv_sys->n_threads_active[type];
920
921 srv_sys_mutex_exit();
922
923 return(slot);
924 }
925
926 /*********************************************************************//**
927 Suspends the calling thread to wait for the event in its thread slot.
928 @return the current signal count of the event. */
929 static
930 int64_t
srv_suspend_thread_low(srv_slot_t * slot)931 srv_suspend_thread_low(
932 /*===================*/
933 srv_slot_t* slot) /*!< in/out: thread slot */
934 {
935
936 ut_ad(!srv_read_only_mode);
937 ut_ad(srv_sys_mutex_own());
938
939 ut_ad(slot->in_use);
940
941 srv_thread_type type = srv_slot_get_type(slot);
942
943 switch (type) {
944 case SRV_NONE:
945 ut_error;
946
947 case SRV_MASTER:
948 /* We have only one master thread and it
949 should be the first entry always. */
950 ut_a(srv_sys->n_threads_active[type] == 1);
951 break;
952
953 case SRV_PURGE:
954 /* We have only one purge coordinator thread
955 and it should be the second entry always. */
956 ut_a(srv_sys->n_threads_active[type] == 1);
957 break;
958
959 case SRV_WORKER:
960 ut_a(srv_n_purge_threads > 1);
961 ut_a(srv_sys->n_threads_active[type] > 0);
962 break;
963 }
964
965 ut_a(!slot->suspended);
966 slot->suspended = TRUE;
967
968 ut_a(srv_sys->n_threads_active[type] > 0);
969
970 srv_sys->n_threads_active[type]--;
971
972 return(os_event_reset(slot->event));
973 }
974
975 /*********************************************************************//**
976 Suspends the calling thread to wait for the event in its thread slot.
977 @return the current signal count of the event. */
978 static
979 int64_t
srv_suspend_thread(srv_slot_t * slot)980 srv_suspend_thread(
981 /*===============*/
982 srv_slot_t* slot) /*!< in/out: thread slot */
983 {
984 srv_sys_mutex_enter();
985
986 int64_t sig_count = srv_suspend_thread_low(slot);
987
988 srv_sys_mutex_exit();
989
990 return(sig_count);
991 }
992
993 /*********************************************************************//**
994 Releases threads of the type given from suspension in the thread table.
995 NOTE! The server mutex has to be reserved by the caller!
996 @return number of threads released: this may be less than n if not
997 enough threads were suspended at the moment. */
998 ulint
srv_release_threads(srv_thread_type type,ulint n)999 srv_release_threads(
1000 /*================*/
1001 srv_thread_type type, /*!< in: thread type */
1002 ulint n) /*!< in: number of threads to release */
1003 {
1004 ulint i;
1005 ulint count = 0;
1006
1007 ut_ad(srv_thread_type_validate(type));
1008 ut_ad(n > 0);
1009
1010 srv_sys_mutex_enter();
1011
1012 for (i = 0; i < srv_sys->n_sys_threads; i++) {
1013 srv_slot_t* slot;
1014
1015 slot = &srv_sys->sys_threads[i];
1016
1017 if (slot->in_use
1018 && srv_slot_get_type(slot) == type
1019 && slot->suspended) {
1020
1021 switch (type) {
1022 case SRV_NONE:
1023 ut_error;
1024
1025 case SRV_MASTER:
1026 /* We have only one master thread and it
1027 should be the first entry always. */
1028 ut_a(n == 1);
1029 ut_a(i == SRV_MASTER_SLOT);
1030 ut_a(srv_sys->n_threads_active[type] == 0);
1031 break;
1032
1033 case SRV_PURGE:
1034 /* We have only one purge coordinator thread
1035 and it should be the second entry always. */
1036 ut_a(n == 1);
1037 ut_a(i == SRV_PURGE_SLOT);
1038 ut_a(srv_n_purge_threads > 0);
1039 ut_a(srv_sys->n_threads_active[type] == 0);
1040 break;
1041
1042 case SRV_WORKER:
1043 ut_a(srv_n_purge_threads > 1);
1044 ut_a(srv_sys->n_threads_active[type]
1045 < srv_n_purge_threads - 1);
1046 break;
1047 }
1048
1049 slot->suspended = FALSE;
1050
1051 ++srv_sys->n_threads_active[type];
1052
1053 os_event_set(slot->event);
1054
1055 if (++count == n) {
1056 break;
1057 }
1058 }
1059 }
1060
1061 srv_sys_mutex_exit();
1062
1063 return(count);
1064 }
1065
1066 /*********************************************************************//**
1067 Release a thread's slot. */
1068 static
1069 void
srv_free_slot(srv_slot_t * slot)1070 srv_free_slot(
1071 /*==========*/
1072 srv_slot_t* slot) /*!< in/out: thread slot */
1073 {
1074 srv_sys_mutex_enter();
1075
1076 if (!slot->suspended) {
1077 /* Mark the thread as inactive. */
1078 srv_suspend_thread_low(slot);
1079 }
1080
1081 /* Free the slot for reuse. */
1082 ut_ad(slot->in_use);
1083 slot->in_use = FALSE;
1084
1085 srv_sys_mutex_exit();
1086 }
1087
1088 /*********************************************************************//**
1089 Initializes the server. */
1090 void
srv_init(void)1091 srv_init(void)
1092 /*==========*/
1093 {
1094 ulint n_sys_threads = 0;
1095 ulint srv_sys_sz = sizeof(*srv_sys);
1096
1097 mutex_create(LATCH_ID_SRV_INNODB_MONITOR, &srv_innodb_monitor_mutex);
1098
1099 if (!srv_read_only_mode) {
1100
1101 /* Number of purge threads + master thread */
1102 n_sys_threads = srv_n_purge_threads + 1;
1103
1104 srv_sys_sz += n_sys_threads * sizeof(*srv_sys->sys_threads);
1105 }
1106
1107 srv_sys = static_cast<srv_sys_t*>(ut_zalloc_nokey(srv_sys_sz));
1108
1109 srv_sys->n_sys_threads = n_sys_threads;
1110
1111 /* Even in read-only mode we flush pages related to intrinsic table
1112 and so mutex creation is needed. */
1113 {
1114
1115 mutex_create(LATCH_ID_SRV_SYS, &srv_sys->mutex);
1116
1117 mutex_create(LATCH_ID_SRV_SYS_TASKS, &srv_sys->tasks_mutex);
1118
1119 srv_sys->sys_threads = (srv_slot_t*) &srv_sys[1];
1120
1121 for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
1122 srv_slot_t* slot = &srv_sys->sys_threads[i];
1123
1124 slot->event = os_event_create(0);
1125
1126 ut_a(slot->event);
1127 }
1128
1129 srv_error_event = os_event_create(0);
1130
1131 srv_monitor_event = os_event_create(0);
1132
1133 srv_buf_dump_event = os_event_create(0);
1134
1135 buf_flush_event = os_event_create("buf_flush_event");
1136
1137 UT_LIST_INIT(srv_sys->tasks, &que_thr_t::queue);
1138
1139 srv_checkpoint_completed_event = os_event_create(0);
1140
1141 srv_redo_log_tracked_event = os_event_create(0);
1142 os_event_set(srv_redo_log_tracked_event);
1143 }
1144
1145 srv_buf_resize_event = os_event_create(0);
1146
1147 ut_d(srv_master_thread_disabled_event = os_event_create(0));
1148
1149 /* page_zip_stat_per_index_mutex is acquired from:
1150 1. page_zip_compress() (after SYNC_FSP)
1151 2. page_zip_decompress()
1152 3. i_s_cmp_per_index_fill_low() (where SYNC_DICT is acquired)
1153 4. innodb_cmp_per_index_update(), no other latches
1154 since we do not acquire any other latches while holding this mutex,
1155 it can have very low level. We pick SYNC_ANY_LATCH for it. */
1156 mutex_create(LATCH_ID_PAGE_ZIP_STAT_PER_INDEX,
1157 &page_zip_stat_per_index_mutex);
1158
1159 /* Create dummy indexes for infimum and supremum records */
1160
1161 dict_ind_init();
1162
1163 /* Initialize some INFORMATION SCHEMA internal structures */
1164 trx_i_s_cache_init(trx_i_s_cache);
1165
1166 ut_crc32_init();
1167
1168 dict_mem_init();
1169 }
1170
1171 /*********************************************************************//**
1172 Frees the data structures created in srv_init(). */
1173 void
srv_free(void)1174 srv_free(void)
1175 /*==========*/
1176 {
1177 mutex_free(&srv_innodb_monitor_mutex);
1178 mutex_free(&page_zip_stat_per_index_mutex);
1179
1180 {
1181 mutex_free(&srv_sys->mutex);
1182 mutex_free(&srv_sys->tasks_mutex);
1183
1184 for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
1185 srv_slot_t* slot = &srv_sys->sys_threads[i];
1186
1187 os_event_destroy(slot->event);
1188 }
1189
1190 os_event_destroy(srv_error_event);
1191 os_event_destroy(srv_monitor_event);
1192 os_event_destroy(srv_buf_dump_event);
1193 os_event_destroy(buf_flush_event);
1194 os_event_destroy(srv_checkpoint_completed_event);
1195 os_event_destroy(srv_redo_log_tracked_event);
1196 }
1197
1198 os_event_destroy(srv_buf_resize_event);
1199
1200 #ifdef UNIV_DEBUG
1201 os_event_destroy(srv_master_thread_disabled_event);
1202 srv_master_thread_disabled_event = NULL;
1203 #endif /* UNIV_DEBUG */
1204
1205 trx_i_s_cache_free(trx_i_s_cache);
1206
1207 ut_free(srv_sys);
1208
1209 srv_sys = 0;
1210 }
1211
1212 /*********************************************************************//**
1213 Initializes the synchronization primitives, memory system, and the thread
1214 local storage. */
1215 void
srv_general_init(void)1216 srv_general_init(void)
1217 /*==================*/
1218 {
1219 sync_check_init();
1220 /* Reset the system variables in the recovery module. */
1221 recv_sys_var_init();
1222 os_thread_init();
1223 trx_pool_init();
1224 que_init();
1225 row_mysql_init();
1226 }
1227
1228 /*********************************************************************//**
1229 Normalizes init parameter values to use units we use inside InnoDB. */
1230 static
1231 void
srv_normalize_init_values(void)1232 srv_normalize_init_values(void)
1233 /*===========================*/
1234 {
1235 srv_sys_space.normalize();
1236
1237 srv_tmp_space.normalize();
1238
1239 srv_log_file_size /= UNIV_PAGE_SIZE;
1240
1241 srv_log_buffer_size /= UNIV_PAGE_SIZE;
1242
1243 srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
1244 }
1245
1246 /*********************************************************************//**
1247 Boots the InnoDB server. */
1248 void
srv_boot(void)1249 srv_boot(void)
1250 /*==========*/
1251 {
1252 /* Transform the init parameter values given by MySQL to
1253 use units we use inside InnoDB: */
1254
1255 srv_normalize_init_values();
1256
1257 /* Initialize synchronization primitives, memory management, and thread
1258 local storage */
1259
1260 srv_general_init();
1261
1262 /* Initialize this module */
1263
1264 srv_init();
1265 }
1266
1267 /******************************************************************//**
1268 Refreshes the values used to calculate per-second averages. */
1269 static
1270 void
srv_refresh_innodb_monitor_stats(void)1271 srv_refresh_innodb_monitor_stats(void)
1272 /*==================================*/
1273 {
1274 mutex_enter(&srv_innodb_monitor_mutex);
1275
1276 srv_last_monitor_time = ut_time_monotonic();
1277
1278 os_aio_refresh_stats();
1279
1280 btr_cur_n_sea_old = btr_cur_n_sea;
1281 btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1282
1283 log_refresh_stats();
1284
1285 buf_refresh_io_stats_all();
1286
1287 srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
1288 srv_n_rows_updated_old = srv_stats.n_rows_updated;
1289 srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
1290 srv_n_rows_read_old = srv_stats.n_rows_read;
1291
1292 mutex_exit(&srv_innodb_monitor_mutex);
1293 }
1294
1295 /******************************************************************//**
1296 Outputs to a file the output of the InnoDB Monitor.
1297 @return FALSE if not all information printed
1298 due to failure to obtain necessary mutex */
1299 ibool
srv_printf_innodb_monitor(FILE * file,ibool nowait,ulint * trx_start_pos,ulint * trx_end)1300 srv_printf_innodb_monitor(
1301 /*======================*/
1302 FILE* file, /*!< in: output stream */
1303 ibool nowait, /*!< in: whether to wait for the
1304 lock_sys_t:: mutex */
1305 ulint* trx_start_pos, /*!< out: file position of the start of
1306 the list of active transactions */
1307 ulint* trx_end) /*!< out: file position of the end of
1308 the list of active transactions */
1309 {
1310 double time_elapsed;
1311 ib_time_monotonic_t current_time;
1312 ulint n_reserved;
1313 ibool ret;
1314
1315 ulong btr_search_sys_constant;
1316 ulong btr_search_sys_variable;
1317 ulint lock_sys_subtotal;
1318 ulint recv_sys_subtotal;
1319 size_t dict_sys_hash_size;
1320 ulint dict_sys_size;
1321
1322 trx_t* trx;
1323
1324 mutex_enter(&srv_innodb_monitor_mutex);
1325
1326 current_time = ut_time_monotonic();
1327
1328 /* We add 0.001 seconds to time_elapsed to prevent division
1329 by zero if two users happen to call SHOW ENGINE INNODB STATUS at the
1330 same time */
1331
1332 time_elapsed = current_time - srv_last_monitor_time + 0.001;
1333
1334 srv_last_monitor_time = ut_time_monotonic();
1335
1336 fputs("\n=====================================\n", file);
1337
1338 ut_print_timestamp(file);
1339 fprintf(file,
1340 " INNODB MONITOR OUTPUT\n"
1341 "=====================================\n"
1342 "Per second averages calculated from the last %lu seconds\n",
1343 (ulong) time_elapsed);
1344
1345 fputs("-----------------\n"
1346 "BACKGROUND THREAD\n"
1347 "-----------------\n", file);
1348 srv_print_master_thread_info(file);
1349
1350 fputs("----------\n"
1351 "SEMAPHORES\n"
1352 "----------\n", file);
1353
1354 sync_print(file);
1355
1356 /* Conceptually, srv_innodb_monitor_mutex has a very high latching
1357 order level in sync0sync.h, while dict_foreign_err_mutex has a very
1358 low level 135. Therefore we can reserve the latter mutex here without
1359 a danger of a deadlock of threads. */
1360
1361 if (!recv_recovery_on) {
1362
1363 mutex_enter(&dict_foreign_err_mutex);
1364
1365 if (!srv_read_only_mode
1366 && ftell(dict_foreign_err_file) != 0L) {
1367 fputs("------------------------\n"
1368 "LATEST FOREIGN KEY ERROR\n"
1369 "------------------------\n", file);
1370 ut_copy_file(file, dict_foreign_err_file);
1371 }
1372
1373 mutex_exit(&dict_foreign_err_mutex);
1374 }
1375
1376 /* Only if lock_print_info_summary proceeds correctly,
1377 before we call the lock_print_info_all_transactions
1378 to print all the lock information. IMPORTANT NOTE: This
1379 function acquires the lock mutex on success. */
1380 ret = recv_recovery_on ? FALSE : lock_print_info_summary(file, nowait);
1381
1382 if (ret) {
1383 if (trx_start_pos) {
1384 long t = ftell(file);
1385 if (t < 0) {
1386 *trx_start_pos = ULINT_UNDEFINED;
1387 } else {
1388 *trx_start_pos = (ulint) t;
1389 }
1390 }
1391
1392 /* NOTE: If we get here then we have the lock mutex. This
1393 function will release the lock mutex that we acquired when
1394 we called the lock_print_info_summary() function earlier. */
1395
1396 lock_print_info_all_transactions(file);
1397
1398 if (trx_end) {
1399 long t = ftell(file);
1400 if (t < 0) {
1401 *trx_end = ULINT_UNDEFINED;
1402 } else {
1403 *trx_end = (ulint) t;
1404 }
1405 }
1406 }
1407
1408 fputs("--------\n"
1409 "FILE I/O\n"
1410 "--------\n", file);
1411 os_aio_print(file);
1412
1413 if (!recv_recovery_on) {
1414
1415 fputs("-------------------------------------\n"
1416 "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1417 "-------------------------------------\n", file);
1418 ibuf_print(file);
1419 }
1420
1421 for (ulint i = 0; i < btr_ahi_parts; ++i) {
1422 rw_lock_s_lock(btr_search_latches[i]);
1423 ha_print_info(file, btr_search_sys->hash_tables[i]);
1424 rw_lock_s_unlock(btr_search_latches[i]);
1425 }
1426
1427 fprintf(file,
1428 "%.2f hash searches/s, %.2f non-hash searches/s\n",
1429 (btr_cur_n_sea - btr_cur_n_sea_old)
1430 / time_elapsed,
1431 (btr_cur_n_non_sea - btr_cur_n_non_sea_old)
1432 / time_elapsed);
1433 btr_cur_n_sea_old = btr_cur_n_sea;
1434 btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1435
1436 if (!recv_recovery_on) {
1437
1438 fputs("---\n"
1439 "LOG\n"
1440 "---\n", file);
1441 log_print(file);
1442 }
1443
1444 fputs("----------------------\n"
1445 "BUFFER POOL AND MEMORY\n"
1446 "----------------------\n", file);
1447 fprintf(file,
1448 "Total large memory allocated " ULINTPF "\n"
1449 "Dictionary memory allocated " ULINTPF "\n",
1450 os_total_large_mem_allocated, dict_sys ? dict_sys->size : 0UL);
1451
1452 /* Calculate AHI constant and variable memory allocations */
1453
1454 btr_search_sys_constant = btr_search_sys_constant_mem;
1455 os_rmb;
1456 btr_search_sys_variable = btr_search_sys_variable_mem;
1457
1458 lock_sys_subtotal = 0;
1459 if (trx_sys) {
1460 mutex_enter(&trx_sys->mutex);
1461 trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
1462 while (trx) {
1463 lock_sys_subtotal
1464 += ((trx->lock.lock_heap)
1465 ? mem_heap_get_size(trx->lock.lock_heap)
1466 : 0);
1467 trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
1468 }
1469 mutex_exit(&trx_sys->mutex);
1470 }
1471
1472 recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash)
1473 ? mem_heap_get_size(recv_sys->heap) : 0);
1474
1475 dict_sys_hash_size = dict_sys ? dict_sys->hash_size : 0;
1476 dict_sys_size = dict_sys ? dict_sys->size : 0;
1477
1478 fprintf(file,
1479 "Internal hash tables (constant factor + variable factor)\n"
1480 " Adaptive hash index %lu \t(%lu + " ULINTPF ")\n"
1481 " Page hash %lu (buffer pool 0 only)\n"
1482 " Dictionary cache %lu \t(%lu + " ULINTPF ")\n"
1483 " File system %lu \t(%lu + " ULINTPF ")\n"
1484 " Lock system %lu \t(%lu + " ULINTPF ")\n"
1485 " Recovery system %lu \t(%lu + " ULINTPF ")\n",
1486
1487 btr_search_sys_constant + btr_search_sys_variable,
1488 btr_search_sys_constant,
1489 btr_search_sys_variable,
1490
1491 (ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t)),
1492
1493 (ulong) (dict_sys_hash_size + dict_sys_size),
1494 (ulong) (dict_sys_hash_size),
1495 (ulong) (dict_sys_size),
1496
1497 (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)
1498 + fil_system_hash_nodes()),
1499 (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)),
1500 fil_system_hash_nodes(),
1501
1502 (ulong) ((lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0)
1503 + lock_sys_subtotal),
1504 (ulong) (lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0),
1505 lock_sys_subtotal,
1506
1507 (ulong) (((recv_sys && recv_sys->addr_hash)
1508 ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0)
1509 + recv_sys_subtotal),
1510 (ulong) ((recv_sys && recv_sys->addr_hash)
1511 ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0),
1512 recv_sys_subtotal);
1513
1514 buf_print_io(file);
1515
1516 fputs("--------------\n"
1517 "ROW OPERATIONS\n"
1518 "--------------\n", file);
1519 fprintf(file,
1520 ULINTPF " queries inside InnoDB, "
1521 ULINTPF " queries in queue\n",
1522 srv_conc_get_active_threads(),
1523 srv_conc_get_waiting_threads());
1524
1525 /* This is a dirty read, without holding trx_sys->mutex. */
1526 fprintf(file,
1527 ULINTPF " read views open inside InnoDB\n",
1528 trx_sys->mvcc->size());
1529
1530 mutex_enter(&trx_sys->mutex);
1531
1532 fprintf(file, "%lu RW transactions active inside InnoDB\n",
1533 UT_LIST_GET_LEN(trx_sys->rw_trx_list));
1534
1535 ReadView* oldest_view = trx_sys->mvcc->get_oldest_view();
1536 if (oldest_view) {
1537
1538 fprintf(file, "---OLDEST VIEW---\n");
1539 oldest_view->print(file);
1540 fprintf(file, "-----------------\n");
1541 }
1542
1543 mutex_exit(&trx_sys->mutex);
1544
1545 n_reserved = fil_space_get_n_reserved_extents(0);
1546 if (n_reserved > 0) {
1547 fprintf(file,
1548 ULINTPF " tablespace extents now reserved for"
1549 " B-tree split operations\n",
1550 n_reserved);
1551 }
1552
1553 fprintf(file,
1554 "Process ID=" ULINTPF
1555 ", Main thread ID=" ULINTPF
1556 ", state: %s\n",
1557 srv_main_thread_process_no,
1558 srv_main_thread_id,
1559 srv_main_thread_op_info);
1560 fprintf(file,
1561 "Number of rows inserted " ULINTPF
1562 ", updated " ULINTPF
1563 ", deleted " ULINTPF
1564 ", read " ULINTPF "\n",
1565 (ulint) srv_stats.n_rows_inserted,
1566 (ulint) srv_stats.n_rows_updated,
1567 (ulint) srv_stats.n_rows_deleted,
1568 (ulint) srv_stats.n_rows_read);
1569 fprintf(file,
1570 "%.2f inserts/s, %.2f updates/s,"
1571 " %.2f deletes/s, %.2f reads/s\n",
1572 ((ulint) srv_stats.n_rows_inserted - srv_n_rows_inserted_old)
1573 / time_elapsed,
1574 ((ulint) srv_stats.n_rows_updated - srv_n_rows_updated_old)
1575 / time_elapsed,
1576 ((ulint) srv_stats.n_rows_deleted - srv_n_rows_deleted_old)
1577 / time_elapsed,
1578 ((ulint) srv_stats.n_rows_read - srv_n_rows_read_old)
1579 / time_elapsed);
1580
1581 srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
1582 srv_n_rows_updated_old = srv_stats.n_rows_updated;
1583 srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
1584 srv_n_rows_read_old = srv_stats.n_rows_read;
1585
1586 fputs("----------------------------\n"
1587 "END OF INNODB MONITOR OUTPUT\n"
1588 "============================\n", file);
1589 mutex_exit(&srv_innodb_monitor_mutex);
1590 fflush(file);
1591
1592 #ifndef NDEBUG
1593 srv_debug_monitor_printed = true;
1594 #endif
1595
1596 return(ret);
1597 }
1598
1599 /******************************************************************//**
1600 Function to pass InnoDB status variables to MySQL */
1601 void
srv_export_innodb_status(void)1602 srv_export_innodb_status(void)
1603 /*==========================*/
1604 {
1605 buf_pool_stat_t stat;
1606 buf_pools_list_size_t buf_pools_list_size;
1607 ulint LRU_len;
1608 ulint free_len;
1609 ulint flush_list_len;
1610 fil_crypt_stat_t crypt_stat;
1611 btr_scrub_stat_t scrub_stat;
1612 ulint mem_adaptive_hash, mem_dictionary;
1613 ReadView* oldest_view;
1614 ulint i;
1615
1616 buf_get_total_stat(&stat);
1617 buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
1618 buf_get_total_list_size_in_bytes(&buf_pools_list_size);
1619 if (!srv_read_only_mode) {
1620 fil_crypt_total_stat(&crypt_stat);
1621 btr_scrub_total_stat(&scrub_stat);
1622 }
1623
1624 os_rmb;
1625 mem_adaptive_hash
1626 = btr_search_sys_constant_mem + btr_search_sys_variable_mem;
1627
1628 mem_dictionary = dict_sys->hash_size + dict_sys->size;
1629
1630 mutex_enter(&srv_innodb_monitor_mutex);
1631
1632 export_vars.innodb_data_pending_reads =
1633 os_n_pending_reads;
1634
1635 export_vars.innodb_data_pending_writes =
1636 os_n_pending_writes;
1637
1638 export_vars.innodb_data_pending_fsyncs =
1639 fil_n_pending_log_flushes
1640 + fil_n_pending_tablespace_flushes;
1641 export_vars.innodb_adaptive_hash_hash_searches
1642 = btr_cur_n_sea;
1643 export_vars.innodb_adaptive_hash_non_hash_searches
1644 = btr_cur_n_non_sea;
1645 export_vars.innodb_background_log_sync
1646 = srv_log_writes_and_flush;
1647
1648 export_vars.innodb_data_fsyncs = os_n_fsyncs;
1649
1650 export_vars.innodb_data_read = srv_stats.data_read;
1651
1652 export_vars.innodb_data_reads = os_n_file_reads;
1653
1654 export_vars.innodb_data_writes = os_n_file_writes;
1655
1656 export_vars.innodb_data_written = srv_stats.data_written;
1657
1658 export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
1659
1660 export_vars.innodb_buffer_pool_write_requests =
1661 srv_stats.buf_pool_write_requests;
1662
1663 export_vars.innodb_buffer_pool_wait_free =
1664 srv_stats.buf_pool_wait_free;
1665
1666 export_vars.innodb_buffer_pool_pages_flushed =
1667 srv_stats.buf_pool_flushed;
1668
1669 export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads;
1670
1671 export_vars.innodb_buffer_pool_read_ahead_rnd =
1672 stat.n_ra_pages_read_rnd;
1673
1674 export_vars.innodb_buffer_pool_read_ahead =
1675 stat.n_ra_pages_read;
1676
1677 export_vars.innodb_buffer_pool_read_ahead_evicted =
1678 stat.n_ra_pages_evicted;
1679
1680 export_vars.innodb_buffer_pool_pages_LRU_flushed =
1681 stat.buf_lru_flush_page_count;
1682
1683 export_vars.innodb_buffer_pool_pages_data = LRU_len;
1684
1685 export_vars.innodb_buffer_pool_bytes_data =
1686 buf_pools_list_size.LRU_bytes
1687 + buf_pools_list_size.unzip_LRU_bytes;
1688
1689 export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
1690
1691 export_vars.innodb_buffer_pool_bytes_dirty =
1692 buf_pools_list_size.flush_list_bytes;
1693
1694 export_vars.innodb_buffer_pool_pages_free = free_len;
1695
1696 #ifdef UNIV_DEBUG
1697 export_vars.innodb_buffer_pool_pages_latched =
1698 buf_get_latched_pages_number();
1699 #endif /* UNIV_DEBUG */
1700 export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
1701
1702 export_vars.innodb_buffer_pool_pages_misc =
1703 buf_pool_get_n_pages() - LRU_len - free_len;
1704
1705 export_vars.innodb_buffer_pool_pages_made_young
1706 = stat.n_pages_made_young;
1707 export_vars.innodb_buffer_pool_pages_made_not_young
1708 = stat.n_pages_not_made_young;
1709 export_vars.innodb_buffer_pool_pages_old = 0;
1710 for (i = 0; i < srv_buf_pool_instances; i++) {
1711 buf_pool_t* buf_pool = buf_pool_from_array(i);
1712 export_vars.innodb_buffer_pool_pages_old
1713 += buf_pool->LRU_old_len;
1714 }
1715 export_vars.innodb_checkpoint_age
1716 = (log_sys->lsn - log_sys->last_checkpoint_lsn);
1717 export_vars.innodb_checkpoint_max_age
1718 = log_sys->max_checkpoint_age;
1719 ibuf_export_ibuf_status(
1720 &export_vars.innodb_ibuf_free_list,
1721 &export_vars.innodb_ibuf_segment_size);
1722 export_vars.innodb_lsn_current
1723 = log_sys->lsn;
1724 export_vars.innodb_lsn_flushed
1725 = log_sys->flushed_to_disk_lsn;
1726 export_vars.innodb_lsn_last_checkpoint
1727 = log_sys->last_checkpoint_lsn;
1728 export_vars.innodb_master_thread_active_loops
1729 = srv_main_active_loops;
1730 export_vars.innodb_master_thread_idle_loops
1731 = srv_main_idle_loops;
1732 export_vars.innodb_max_trx_id
1733 = trx_sys->max_trx_id;
1734 export_vars.innodb_mem_adaptive_hash
1735 = mem_adaptive_hash;
1736 export_vars.innodb_mem_dictionary
1737 = mem_dictionary;
1738
1739 mutex_enter(&trx_sys->mutex);
1740 oldest_view = trx_sys->mvcc->get_oldest_view();
1741 mutex_exit(&trx_sys->mutex);
1742 export_vars.innodb_oldest_view_low_limit_trx_id
1743 = oldest_view ? oldest_view->low_limit_id() : 0;
1744
1745 export_vars.innodb_purge_trx_id = purge_sys->limit.trx_no;
1746 export_vars.innodb_purge_undo_no = purge_sys->limit.undo_no;
1747
1748 export_vars.innodb_page_size = UNIV_PAGE_SIZE;
1749
1750 export_vars.innodb_log_waits = srv_stats.log_waits;
1751
1752 export_vars.innodb_os_log_written = srv_stats.os_log_written;
1753
1754 export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
1755
1756 export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
1757
1758 export_vars.innodb_os_log_pending_writes =
1759 srv_stats.os_log_pending_writes;
1760
1761 export_vars.innodb_log_write_requests = srv_stats.log_write_requests;
1762
1763 export_vars.innodb_log_writes = srv_stats.log_writes;
1764
1765 export_vars.innodb_dblwr_pages_written =
1766 srv_stats.dblwr_pages_written;
1767
1768 export_vars.innodb_dblwr_writes = srv_stats.dblwr_writes;
1769
1770 export_vars.innodb_pages_created = stat.n_pages_created;
1771
1772 export_vars.innodb_pages_read = stat.n_pages_read;
1773 export_vars.innodb_page0_read = srv_stats.page0_read;
1774
1775 export_vars.innodb_pages_written = stat.n_pages_written;
1776
1777 export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count;
1778
1779 export_vars.innodb_row_lock_current_waits =
1780 srv_stats.n_lock_wait_current_count;
1781
1782 export_vars.innodb_row_lock_time = srv_stats.n_lock_wait_time / 1000;
1783
1784 if (srv_stats.n_lock_wait_count > 0) {
1785
1786 export_vars.innodb_row_lock_time_avg = (ulint)
1787 (srv_stats.n_lock_wait_time
1788 / 1000 / srv_stats.n_lock_wait_count);
1789
1790 } else {
1791 export_vars.innodb_row_lock_time_avg = 0;
1792 }
1793
1794 export_vars.innodb_row_lock_time_max =
1795 lock_sys->n_lock_max_wait_time / 1000;
1796
1797 export_vars.innodb_rows_read = srv_stats.n_rows_read;
1798
1799 export_vars.innodb_rows_inserted = srv_stats.n_rows_inserted;
1800
1801 export_vars.innodb_rows_updated = srv_stats.n_rows_updated;
1802
1803 export_vars.innodb_rows_deleted = srv_stats.n_rows_deleted;
1804
1805 export_vars.innodb_num_open_files = fil_n_file_opened;
1806
1807 export_vars.innodb_truncated_status_writes =
1808 srv_truncated_status_writes;
1809
1810 export_vars.innodb_available_undo_logs = srv_available_undo_logs;
1811
1812 export_vars.innodb_pages_decrypted = srv_stats.pages_decrypted;
1813 export_vars.innodb_pages_encrypted = srv_stats.pages_encrypted;
1814
1815 export_vars.innodb_n_merge_blocks_encrypted =
1816 srv_stats.n_merge_blocks_encrypted;
1817
1818 export_vars.innodb_n_merge_blocks_decrypted =
1819 srv_stats.n_merge_blocks_decrypted;
1820
1821 export_vars.innodb_n_rowlog_blocks_encrypted =
1822 srv_stats.n_rowlog_blocks_encrypted;
1823
1824 export_vars.innodb_n_rowlog_blocks_decrypted =
1825 srv_stats.n_rowlog_blocks_decrypted;
1826
1827 #ifdef UNIV_DEBUG
1828 rw_lock_s_lock(&purge_sys->latch);
1829 trx_id_t up_limit_id;
1830 trx_id_t done_trx_no = purge_sys->done.trx_no;
1831
1832 up_limit_id = purge_sys->view_active
1833 ? purge_sys->view.up_limit_id() : 0;
1834
1835 rw_lock_s_unlock(&purge_sys->latch);
1836
1837 mutex_enter(&trx_sys->mutex);
1838 trx_id_t max_trx_id = trx_sys->rw_max_trx_id;
1839 mutex_exit(&trx_sys->mutex);
1840
1841 if (!done_trx_no || max_trx_id < done_trx_no - 1) {
1842 export_vars.innodb_purge_trx_id_age = 0;
1843 } else {
1844 export_vars.innodb_purge_trx_id_age =
1845 (ulint) (max_trx_id - done_trx_no + 1);
1846 }
1847
1848 if (!up_limit_id
1849 || max_trx_id < up_limit_id) {
1850 export_vars.innodb_purge_view_trx_id_age = 0;
1851 } else {
1852 export_vars.innodb_purge_view_trx_id_age =
1853 (ulint) (max_trx_id - up_limit_id);
1854 }
1855 #endif /* UNIV_DEBUG */
1856
1857 os_rmb;
1858 export_vars.innodb_sec_rec_cluster_reads =
1859 srv_sec_rec_cluster_reads;
1860 export_vars.innodb_sec_rec_cluster_reads_avoided =
1861 srv_sec_rec_cluster_reads_avoided;
1862
1863 export_vars.innodb_buffered_aio_submitted =
1864 srv_stats.n_aio_submitted;
1865
1866 thd_get_fragmentation_stats(current_thd,
1867 &export_vars.innodb_fragmentation_stats);
1868
1869 if (!srv_read_only_mode) {
1870 export_vars.innodb_encryption_rotation_pages_read_from_cache =
1871 crypt_stat.pages_read_from_cache;
1872 export_vars.innodb_encryption_rotation_pages_read_from_disk =
1873 crypt_stat.pages_read_from_disk;
1874 export_vars.innodb_encryption_rotation_pages_modified =
1875 crypt_stat.pages_modified;
1876 export_vars.innodb_encryption_rotation_pages_flushed =
1877 crypt_stat.pages_flushed;
1878 export_vars.innodb_encryption_rotation_estimated_iops =
1879 crypt_stat.estimated_iops;
1880 export_vars.innodb_encryption_key_requests =
1881 srv_stats.n_key_requests;
1882 export_vars.innodb_key_rotation_list_length =
1883 srv_stats.key_rotation_list_length;
1884
1885 export_vars.innodb_scrub_page_reorganizations =
1886 scrub_stat.page_reorganizations;
1887 export_vars.innodb_scrub_page_splits =
1888 scrub_stat.page_splits;
1889 export_vars.innodb_scrub_page_split_failures_underflow =
1890 scrub_stat.page_split_failures_underflow;
1891 export_vars.innodb_scrub_page_split_failures_out_of_filespace =
1892 scrub_stat.page_split_failures_out_of_filespace;
1893 export_vars.innodb_scrub_page_split_failures_missing_index =
1894 scrub_stat.page_split_failures_missing_index;
1895 export_vars.innodb_scrub_page_split_failures_unknown =
1896 scrub_stat.page_split_failures_unknown;
1897 export_vars.innodb_scrub_log = srv_stats.n_log_scrubs;
1898
1899 export_vars.innodb_redo_key_version
1900 = srv_redo_log_key_version;
1901 }
1902
1903 mutex_exit(&srv_innodb_monitor_mutex);
1904 }
1905
1906 #ifndef NDEBUG
1907 /** false before InnoDB monitor has been printed at least once, true
1908 afterwards */
1909 bool srv_debug_monitor_printed = false;
1910 #endif
1911
1912 /*********************************************************************//**
1913 A thread which prints the info output by various InnoDB monitors.
1914 @return a dummy parameter */
1915 extern "C"
1916 os_thread_ret_t
DECLARE_THREAD(srv_monitor_thread)1917 DECLARE_THREAD(srv_monitor_thread)(
1918 /*===============================*/
1919 void* arg MY_ATTRIBUTE((unused)))
1920 /*!< in: a dummy parameter required by
1921 os_thread_create */
1922 {
1923 int64_t sig_count;
1924 ib_time_monotonic_t time_elapsed;
1925 ib_time_monotonic_t current_time;
1926 ib_time_monotonic_t last_monitor_time;
1927 ulint mutex_skipped;
1928 ibool last_srv_print_monitor;
1929
1930 ut_ad(!srv_read_only_mode);
1931
1932 #ifdef UNIV_DEBUG_THREAD_CREATION
1933 ib::info() << "Lock timeout thread starts, id "
1934 << os_thread_pf(os_thread_get_curr_id());
1935 #endif /* UNIV_DEBUG_THREAD_CREATION */
1936
1937 #ifdef UNIV_PFS_THREAD
1938 pfs_register_thread(srv_monitor_thread_key);
1939 #endif /* UNIV_PFS_THREAD */
1940 srv_monitor_active = TRUE;
1941
1942 UT_NOT_USED(arg);
1943 srv_last_monitor_time = last_monitor_time = ut_time_monotonic();
1944 mutex_skipped = 0;
1945 last_srv_print_monitor = srv_print_innodb_monitor;
1946 loop:
1947 /* Wake up every 5 seconds to see if we need to print
1948 monitor information or if signalled at shutdown. */
1949
1950 sig_count = os_event_reset(srv_monitor_event);
1951
1952 os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
1953
1954 current_time = ut_time_monotonic();
1955
1956 time_elapsed = current_time - last_monitor_time;
1957
1958 if (time_elapsed > 15) {
1959 last_monitor_time = ut_time_monotonic();
1960
1961 if (srv_print_innodb_monitor) {
1962 /* Reset mutex_skipped counter everytime
1963 srv_print_innodb_monitor changes. This is to
1964 ensure we will not be blocked by lock_sys->mutex
1965 for short duration information printing,
1966 such as requested by sync_array_print_long_waits() */
1967 if (!last_srv_print_monitor) {
1968 mutex_skipped = 0;
1969 last_srv_print_monitor = TRUE;
1970 }
1971
1972 if (!srv_printf_innodb_monitor(stderr,
1973 MUTEX_NOWAIT(mutex_skipped),
1974 NULL, NULL)) {
1975 mutex_skipped++;
1976 } else {
1977 /* Reset the counter */
1978 mutex_skipped = 0;
1979 }
1980 } else {
1981 last_srv_print_monitor = FALSE;
1982 }
1983
1984
1985 /* We don't create the temp files or associated
1986 mutexes in read-only-mode */
1987
1988 if (!srv_read_only_mode && srv_innodb_status) {
1989 mutex_enter(&srv_monitor_file_mutex);
1990 rewind(srv_monitor_file);
1991 if (!srv_printf_innodb_monitor(srv_monitor_file,
1992 MUTEX_NOWAIT(mutex_skipped),
1993 NULL, NULL)) {
1994 mutex_skipped++;
1995 } else {
1996 mutex_skipped = 0;
1997 }
1998
1999 os_file_set_eof(srv_monitor_file);
2000 mutex_exit(&srv_monitor_file_mutex);
2001 }
2002 }
2003
2004 if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
2005 goto exit_func;
2006 }
2007
2008 if (srv_print_innodb_monitor || srv_print_innodb_lock_monitor) {
2009 goto loop;
2010 }
2011
2012 goto loop;
2013
2014 exit_func:
2015 srv_monitor_active = FALSE;
2016
2017 /* We count the number of threads in os_thread_exit(). A created
2018 thread should always use that to exit and not use return() to exit. */
2019
2020 os_thread_exit();
2021
2022 OS_THREAD_DUMMY_RETURN;
2023 }
2024
2025 /*********************************************************************//**
2026 A thread which prints warnings about semaphore waits which have lasted
2027 too long. These can be used to track bugs which cause hangs.
2028 @return a dummy parameter */
2029 extern "C"
2030 os_thread_ret_t
DECLARE_THREAD(srv_error_monitor_thread)2031 DECLARE_THREAD(srv_error_monitor_thread)(
2032 /*=====================================*/
2033 void* arg MY_ATTRIBUTE((unused)))
2034 /*!< in: a dummy parameter required by
2035 os_thread_create */
2036 {
2037 /* number of successive fatal timeouts observed */
2038 ulint fatal_cnt = 0;
2039 lsn_t old_lsn;
2040 lsn_t new_lsn;
2041 int64_t sig_count;
2042 /* longest waiting thread for a semaphore */
2043 os_thread_id_t waiter = os_thread_get_curr_id();
2044 os_thread_id_t old_waiter = waiter;
2045 /* the semaphore that is being waited for */
2046 const void* sema = NULL;
2047 const void* old_sema = NULL;
2048
2049 ut_ad(!srv_read_only_mode);
2050
2051 old_lsn = srv_start_lsn;
2052
2053 #ifdef UNIV_DEBUG_THREAD_CREATION
2054 ib::info() << "Error monitor thread starts, id "
2055 << os_thread_pf(os_thread_get_curr_id());
2056 #endif /* UNIV_DEBUG_THREAD_CREATION */
2057
2058 #ifdef UNIV_PFS_THREAD
2059 pfs_register_thread(srv_error_monitor_thread_key);
2060 #endif /* UNIV_PFS_THREAD */
2061 srv_error_monitor_active = TRUE;
2062
2063 loop:
2064 /* Try to track a strange bug reported by Harald Fuchs and others,
2065 where the lsn seems to decrease at times */
2066
2067 new_lsn = log_get_lsn();
2068
2069 if (new_lsn < old_lsn) {
2070 ib::error() << "Old log sequence number " << old_lsn << " was"
2071 << " greater than the new log sequence number "
2072 << new_lsn << ". Please submit a bug report to"
2073 " http://bugs.mysql.com";
2074 ut_ad(0);
2075 }
2076
2077 old_lsn = new_lsn;
2078
2079 if (ut_difftime(ut_time_monotonic(), srv_last_monitor_time) > 60) {
2080 /* We referesh InnoDB Monitor values so that averages are
2081 printed from at most 60 last seconds */
2082
2083 srv_refresh_innodb_monitor_stats();
2084 }
2085
2086 /* Update the statistics collected for deciding LRU
2087 eviction policy. */
2088 buf_LRU_stat_update();
2089
2090 if (sync_array_print_long_waits(&waiter, &sema)
2091 && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
2092 fatal_cnt++;
2093 if (fatal_cnt > 10) {
2094 ib::fatal() << "Semaphore wait has lasted > "
2095 << srv_fatal_semaphore_wait_threshold
2096 << " seconds. We intentionally crash the"
2097 " server because it appears to be hung.";
2098 }
2099 } else {
2100 fatal_cnt = 0;
2101 old_waiter = waiter;
2102 old_sema = sema;
2103 }
2104
2105 /* Flush stderr so that a database user gets the output
2106 to possible MySQL error file */
2107
2108 fflush(stderr);
2109
2110 sig_count = os_event_reset(srv_error_event);
2111
2112 os_event_wait_time_low(srv_error_event, 1000000, sig_count);
2113
2114 if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
2115
2116 goto loop;
2117 }
2118
2119 srv_error_monitor_active = FALSE;
2120
2121 /* We count the number of threads in os_thread_exit(). A created
2122 thread should always use that to exit and not use return() to exit. */
2123
2124 os_thread_exit();
2125
2126 OS_THREAD_DUMMY_RETURN;
2127 }
2128
2129 /******************************************************************//**
2130 Increment the server activity count. */
2131 void
srv_inc_activity_count(bool ibuf_merge_activity)2132 srv_inc_activity_count(
2133 /*===================*/
2134 bool ibuf_merge_activity) /*!< whether this activity bump
2135 is caused by the background
2136 change buffer merge */
2137 {
2138 srv_sys->activity_count.inc();
2139 if (ibuf_merge_activity)
2140 srv_sys->ibuf_merge_activity_count.inc();
2141 }
2142
2143 /**********************************************************************//**
2144 Check whether any background thread is active. If so return the thread
2145 type.
2146 @return SRV_NONE if all are suspended or have exited, thread
2147 type if any are still active. */
2148 srv_thread_type
srv_get_active_thread_type(void)2149 srv_get_active_thread_type(void)
2150 /*============================*/
2151 {
2152 srv_thread_type ret = SRV_NONE;
2153
2154 if (srv_read_only_mode) {
2155 return(SRV_NONE);
2156 }
2157
2158 srv_sys_mutex_enter();
2159
2160 for (ulint i = SRV_WORKER; i <= SRV_MASTER; ++i) {
2161 if (srv_sys->n_threads_active[i] != 0) {
2162 ret = static_cast<srv_thread_type>(i);
2163 break;
2164 }
2165 }
2166
2167 srv_sys_mutex_exit();
2168
2169 /* Check only on shutdown. */
2170 if (ret == SRV_NONE
2171 && srv_shutdown_state != SRV_SHUTDOWN_NONE
2172 && trx_purge_state() != PURGE_STATE_DISABLED
2173 && trx_purge_state() != PURGE_STATE_EXIT) {
2174
2175 ret = SRV_PURGE;
2176 }
2177
2178 return(ret);
2179 }
2180
2181 /**********************************************************************//**
2182 Check whether any background thread are active. If so print which thread
2183 is active. Send the threads wakeup signal.
2184 @return name of thread that is active or NULL */
2185 const char*
srv_any_background_threads_are_active(void)2186 srv_any_background_threads_are_active(void)
2187 /*=======================================*/
2188 {
2189 const char* thread_active = NULL;
2190
2191 if (srv_read_only_mode) {
2192 if (srv_buf_resize_thread_active) {
2193 thread_active = "buf_resize_thread";
2194 }
2195 os_event_set(srv_buf_resize_event);
2196 return(thread_active);
2197 } else if (srv_error_monitor_active) {
2198 thread_active = "srv_error_monitor_thread";
2199 } else if (lock_sys->timeout_thread_active) {
2200 thread_active = "srv_lock_timeout thread";
2201 } else if (srv_monitor_active) {
2202 thread_active = "srv_monitor_thread";
2203 } else if (srv_buf_dump_thread_active) {
2204 thread_active = "buf_dump_thread";
2205 } else if (srv_buf_resize_thread_active) {
2206 thread_active = "buf_resize_thread";
2207 } else if (srv_dict_stats_thread_active) {
2208 thread_active = "dict_stats_thread";
2209 } else if (srv_n_fil_crypt_threads_started) {
2210 thread_active = "fil_crypt_thread";
2211 }
2212
2213 os_event_set(srv_error_event);
2214 os_event_set(srv_monitor_event);
2215 os_event_set(srv_buf_dump_event);
2216 os_event_set(lock_sys->timeout_event);
2217 os_event_set(dict_stats_event);
2218 os_event_set(srv_buf_resize_event);
2219 os_event_set(fil_crypt_threads_event);
2220
2221 return(thread_active);
2222 }
2223
2224 /******************************************************************//**
2225 A thread which follows the redo log and outputs the changed page bitmap.
2226 @return a dummy value */
2227 extern "C"
2228 os_thread_ret_t
DECLARE_THREAD(srv_redo_log_follow_thread)2229 DECLARE_THREAD(srv_redo_log_follow_thread)(
2230 /*=======================================*/
2231 void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter
2232 required by
2233 os_thread_create */
2234 {
2235 ut_ad(!srv_read_only_mode);
2236
2237 #ifdef UNIV_DEBUG_THREAD_CREATION
2238 ib::info() << "Redo log follower thread starts, id "
2239 << os_thread_pf(os_thread_get_curr_id());
2240 #endif
2241
2242 #ifdef UNIV_PFS_THREAD
2243 pfs_register_thread(srv_log_tracking_thread_key);
2244 #endif
2245
2246 my_thread_init();
2247 srv_redo_log_thread_started = true;
2248
2249 do {
2250 os_event_wait(srv_checkpoint_completed_event);
2251 os_event_reset(srv_checkpoint_completed_event);
2252
2253 if (srv_track_changed_pages
2254 && srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
2255 if (!log_online_follow_redo_log()) {
2256 /* TODO: sync with I_S log tracking status? */
2257 ib::error() << "Log tracking bitmap write "
2258 "failed, stopping log tracking thread!";
2259 break;
2260 }
2261 os_event_set(srv_redo_log_tracked_event);
2262 }
2263
2264 } while (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE);
2265
2266 log_online_read_shutdown();
2267 os_event_set(srv_redo_log_tracked_event);
2268
2269 my_thread_end();
2270 os_thread_exit();
2271
2272 OS_THREAD_DUMMY_RETURN;
2273 }
2274
2275 /*******************************************************************//**
2276 Tells the InnoDB server that there has been activity in the database
2277 and wakes up the master thread if it is suspended (not sleeping). Used
2278 in the MySQL interface. Note that there is a small chance that the master
2279 thread stays suspended (we do not protect our operation with the
2280 srv_sys_t->mutex, for performance reasons). */
2281 void
srv_active_wake_master_thread_low()2282 srv_active_wake_master_thread_low()
2283 /*===============================*/
2284 {
2285 ut_ad(!srv_read_only_mode);
2286 ut_ad(!srv_sys_mutex_own());
2287
2288 srv_inc_activity_count();
2289
2290 if (srv_sys->n_threads_active[SRV_MASTER] == 0) {
2291 srv_slot_t* slot;
2292
2293 srv_sys_mutex_enter();
2294
2295 slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
2296
2297 /* Only if the master thread has been started. */
2298
2299 if (slot->in_use) {
2300 ut_a(srv_slot_get_type(slot) == SRV_MASTER);
2301
2302 if (slot->suspended) {
2303
2304 slot->suspended = FALSE;
2305
2306 ++srv_sys->n_threads_active[SRV_MASTER];
2307
2308 os_event_set(slot->event);
2309 }
2310 }
2311
2312 srv_sys_mutex_exit();
2313 }
2314 }
2315
2316 /*******************************************************************//**
2317 Tells the purge thread that there has been activity in the database
2318 and wakes up the purge thread if it is suspended (not sleeping). Note
2319 that there is a small chance that the purge thread stays suspended
2320 (we do not protect our check with the srv_sys_t:mutex and the
2321 purge_sys->latch, for performance reasons). */
2322 void
srv_wake_purge_thread_if_not_active(void)2323 srv_wake_purge_thread_if_not_active(void)
2324 /*=====================================*/
2325 {
2326 ut_ad(!srv_sys_mutex_own());
2327
2328 if (purge_sys->state == PURGE_STATE_RUN
2329 && srv_sys->n_threads_active[SRV_PURGE] == 0) {
2330
2331 srv_release_threads(SRV_PURGE, 1);
2332 }
2333 }
2334
2335 /*******************************************************************//**
2336 Wakes up the master thread if it is suspended or being suspended. */
2337 void
srv_wake_master_thread(void)2338 srv_wake_master_thread(void)
2339 /*========================*/
2340 {
2341 ut_ad(!srv_sys_mutex_own());
2342
2343 srv_inc_activity_count();
2344
2345 srv_release_threads(SRV_MASTER, 1);
2346 }
2347
2348 /*******************************************************************//**
2349 Get current server activity count. We don't hold srv_sys::mutex while
2350 reading this value as it is only used in heuristics.
2351 @return activity count. */
2352 ulint
srv_get_activity_count(void)2353 srv_get_activity_count(void)
2354 /*========================*/
2355 {
2356 return(srv_sys->activity_count);
2357 }
2358
2359 /** Get current server ibuf merge activity count.
2360 @return ibuf merge activity count */
2361 static
2362 ulint
srv_get_ibuf_merge_activity_count(void)2363 srv_get_ibuf_merge_activity_count(void)
2364 {
2365 return(srv_sys->ibuf_merge_activity_count);
2366 }
2367
2368 /*******************************************************************//**
2369 Check if there has been any activity. Considers background change buffer
2370 merge as regular server activity unless a non-default
2371 old_ibuf_merge_activity_count value is passed, in which case the merge will be
2372 treated as keeping server idle.
2373 @return FALSE if no change in activity counter. */
2374 ibool
srv_check_activity(ulint old_activity_count,ulint old_ibuf_merge_activity_count)2375 srv_check_activity(
2376 /*===============*/
2377 ulint old_activity_count, /*!< in: old activity count */
2378 /*!< old change buffer merge
2379 activity count, or
2380 ULINT_UNDEFINED */
2381 ulint old_ibuf_merge_activity_count)
2382 {
2383 ulint new_activity_count = srv_sys->activity_count;
2384 if (old_ibuf_merge_activity_count == ULINT_UNDEFINED)
2385 return(new_activity_count != old_activity_count);
2386
2387 /* If we care about ibuf merge activity, then the server is considered
2388 idle if all activity, if any, was due to ibuf merge. */
2389 ulint new_ibuf_merge_activity_count
2390 = srv_sys->ibuf_merge_activity_count;
2391
2392 ut_ad(new_ibuf_merge_activity_count <= new_activity_count);
2393 ut_ad(new_ibuf_merge_activity_count >= old_ibuf_merge_activity_count);
2394 ut_ad(new_activity_count >= old_activity_count);
2395
2396 ulint ibuf_merge_activity_delta =
2397 new_ibuf_merge_activity_count - old_ibuf_merge_activity_count;
2398 ulint activity_delta = new_activity_count - old_activity_count;
2399
2400 return (activity_delta > ibuf_merge_activity_delta);
2401 }
2402
2403 /********************************************************************//**
2404 The master thread is tasked to ensure that flush of log file happens
2405 once every second in the background. This is to ensure that not more
2406 than one second of trxs are lost in case of crash when
2407 innodb_flush_logs_at_trx_commit != 1 */
2408 static
2409 void
srv_sync_log_buffer_in_background(void)2410 srv_sync_log_buffer_in_background(void)
2411 /*===================================*/
2412 {
2413 ib_time_monotonic_t current_time = ut_time_monotonic();
2414
2415 srv_main_thread_op_info = "flushing log";
2416 if ((current_time - srv_last_log_flush_time)
2417 >= srv_flush_log_at_timeout) {
2418 log_buffer_sync_in_background(true);
2419 srv_last_log_flush_time = current_time;
2420 srv_log_writes_and_flush++;
2421 }
2422 }
2423
2424 /********************************************************************//**
2425 Make room in the table cache by evicting an unused table.
2426 @return number of tables evicted. */
2427 static
2428 ulint
srv_master_evict_from_table_cache(ulint pct_check)2429 srv_master_evict_from_table_cache(
2430 /*==============================*/
2431 ulint pct_check) /*!< in: max percent to check */
2432 {
2433 ulint n_tables_evicted = 0;
2434
2435 rw_lock_x_lock(dict_operation_lock);
2436
2437 dict_mutex_enter_for_mysql();
2438
2439 n_tables_evicted = dict_make_room_in_cache(
2440 innobase_get_table_cache_size(), pct_check);
2441
2442 dict_mutex_exit_for_mysql();
2443
2444 rw_lock_x_unlock(dict_operation_lock);
2445
2446 return(n_tables_evicted);
2447 }
2448
2449 /*********************************************************************//**
2450 This function prints progress message every 60 seconds during server
2451 shutdown, for any activities that master thread is pending on. */
2452 static
2453 void
srv_shutdown_print_master_pending(ib_time_monotonic_t * last_print_time,ulint n_tables_to_drop,ulint n_bytes_merged)2454 srv_shutdown_print_master_pending(
2455 /*==============================*/
2456 ib_time_monotonic_t* last_print_time, /*!< last time the function
2457 print the message */
2458 ulint n_tables_to_drop, /*!< number of tables to
2459 be dropped */
2460 ulint n_bytes_merged) /*!< number of change buffer
2461 just merged */
2462 {
2463 ib_time_monotonic_t current_time;
2464 ib_time_monotonic_t time_elapsed;
2465
2466 current_time = ut_time_monotonic();
2467 time_elapsed = current_time - *last_print_time;
2468
2469 if (time_elapsed > 60) {
2470 *last_print_time = ut_time_monotonic();
2471
2472 if (n_tables_to_drop) {
2473 ib::info() << "Waiting for " << n_tables_to_drop
2474 << " table(s) to be dropped";
2475 }
2476
2477 /* Check change buffer merge, we only wait for change buffer
2478 merge if it is a slow shutdown */
2479 if (!srv_fast_shutdown && n_bytes_merged) {
2480 ib::info() << "Waiting for change buffer merge to"
2481 " complete number of bytes of change buffer"
2482 " just merged: " << n_bytes_merged;
2483 }
2484 }
2485 }
2486
2487 #ifdef UNIV_DEBUG
2488 /** Waits in loop as long as master thread is disabled (debug) */
2489 static
2490 void
srv_master_do_disabled_loop(void)2491 srv_master_do_disabled_loop(void)
2492 {
2493 if (!srv_master_thread_disabled_debug) {
2494 /* We return here to avoid changing op_info. */
2495 return;
2496 }
2497
2498 srv_main_thread_op_info = "disabled";
2499
2500 while (srv_master_thread_disabled_debug) {
2501 os_event_set(srv_master_thread_disabled_event);
2502 if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
2503 break;
2504 }
2505 os_thread_sleep(100000);
2506 }
2507
2508 srv_main_thread_op_info = "";
2509 }
2510
2511 /** Disables master thread. It's used by:
2512 SET GLOBAL innodb_master_thread_disabled_debug = 1 (0).
2513 @param[in] thd thread handle
2514 @param[in] var pointer to system variable
2515 @param[out] var_ptr where the formal string goes
2516 @param[in] save immediate result from check function */
2517 void
srv_master_thread_disabled_debug_update(THD * thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)2518 srv_master_thread_disabled_debug_update(
2519 THD* thd,
2520 struct st_mysql_sys_var* var,
2521 void* var_ptr,
2522 const void* save)
2523 {
2524 /* This method is protected by mutex, as every SET GLOBAL .. */
2525 ut_ad(srv_master_thread_disabled_event != NULL);
2526
2527 const bool disable = *static_cast<const my_bool*>(save);
2528
2529 const int64_t sig_count = os_event_reset(
2530 srv_master_thread_disabled_event);
2531
2532 srv_master_thread_disabled_debug = disable;
2533
2534 if (disable) {
2535 os_event_wait_low(
2536 srv_master_thread_disabled_event, sig_count);
2537 }
2538 }
2539 #endif /* UNIV_DEBUG */
2540
2541 /*********************************************************************//**
2542 Perform the tasks that the master thread is supposed to do when the
2543 server is active. There are two types of tasks. The first category is
2544 of such tasks which are performed at each inovcation of this function.
2545 We assume that this function is called roughly every second when the
2546 server is active. The second category is of such tasks which are
2547 performed at some interval e.g.: purge, dict_LRU cleanup etc. */
2548 static
2549 void
srv_master_do_active_tasks(void)2550 srv_master_do_active_tasks(void)
2551 /*============================*/
2552 {
2553 ib_time_monotonic_t cur_time = ut_time_monotonic();
2554 ib_time_monotonic_us_t counter_time = ut_time_monotonic_us();
2555
2556 /* First do the tasks that we are suppose to do at each
2557 invocation of this function. */
2558
2559 ++srv_main_active_loops;
2560
2561 MONITOR_INC(MONITOR_MASTER_ACTIVE_LOOPS);
2562
2563 /* ALTER TABLE in MySQL requires on Unix that the table handler
2564 can drop tables lazily after there no longer are SELECT
2565 queries to them. */
2566 srv_main_thread_op_info = "doing background drop tables";
2567 row_drop_tables_for_mysql_in_background();
2568 MONITOR_INC_TIME_IN_MICRO_SECS(
2569 MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time);
2570
2571 ut_d(srv_master_do_disabled_loop());
2572
2573 if (srv_shutdown_state > 0) {
2574 return;
2575 }
2576
2577 /* make sure that there is enough reusable space in the redo
2578 log files */
2579 srv_main_thread_op_info = "checking free log space";
2580 log_free_check();
2581
2582 /* Do an ibuf merge */
2583 srv_main_thread_op_info = "doing insert buffer merge";
2584 counter_time = ut_time_monotonic_us();
2585 ibuf_merge_in_background(false);
2586 MONITOR_INC_TIME_IN_MICRO_SECS(
2587 MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
2588
2589 /* Flush logs if needed */
2590 srv_main_thread_op_info = "flushing log";
2591 srv_sync_log_buffer_in_background();
2592 MONITOR_INC_TIME_IN_MICRO_SECS(
2593 MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
2594
2595 /* Now see if various tasks that are performed at defined
2596 intervals need to be performed. */
2597
2598 if (srv_shutdown_state > 0) {
2599 return;
2600 }
2601
2602 if (srv_shutdown_state > 0) {
2603 return;
2604 }
2605
2606 if (trx_sys->rseg_history_len > 0) {
2607 srv_wake_purge_thread_if_not_active();
2608 }
2609
2610 if (cur_time % SRV_MASTER_DICT_LRU_INTERVAL == 0) {
2611 srv_main_thread_op_info = "enforcing dict cache limit";
2612 ulint n_evicted = srv_master_evict_from_table_cache(50);
2613 if (n_evicted != 0) {
2614 MONITOR_INC_VALUE(
2615 MONITOR_SRV_DICT_LRU_EVICT_COUNT, n_evicted);
2616 }
2617 MONITOR_INC_TIME_IN_MICRO_SECS(
2618 MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
2619 }
2620
2621 if (srv_shutdown_state > 0) {
2622 return;
2623 }
2624
2625 /* Make a new checkpoint */
2626 if (cur_time % SRV_MASTER_CHECKPOINT_INTERVAL == 0) {
2627 srv_main_thread_op_info = "making checkpoint";
2628 log_checkpoint(TRUE, FALSE);
2629 MONITOR_INC_TIME_IN_MICRO_SECS(
2630 MONITOR_SRV_CHECKPOINT_MICROSECOND, counter_time);
2631 }
2632 }
2633
2634 /*********************************************************************//**
2635 Perform the tasks that the master thread is supposed to do whenever the
2636 server is idle. We do check for the server state during this function
2637 and if the server has entered the shutdown phase we may return from
2638 the function without completing the required tasks.
2639 Note that the server can move to active state when we are executing this
2640 function but we don't check for that as we are suppose to perform more
2641 or less same tasks when server is active. */
2642 static
2643 void
srv_master_do_idle_tasks(void)2644 srv_master_do_idle_tasks(void)
2645 /*==========================*/
2646 {
2647 ib_time_monotonic_t counter_time;
2648
2649 ++srv_main_idle_loops;
2650
2651 MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS);
2652
2653
2654 /* ALTER TABLE in MySQL requires on Unix that the table handler
2655 can drop tables lazily after there no longer are SELECT
2656 queries to them. */
2657 counter_time = ut_time_monotonic_us();
2658 srv_main_thread_op_info = "doing background drop tables";
2659 row_drop_tables_for_mysql_in_background();
2660 MONITOR_INC_TIME_IN_MICRO_SECS(
2661 MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
2662 counter_time);
2663
2664 ut_d(srv_master_do_disabled_loop());
2665
2666 if (srv_shutdown_state > 0) {
2667 return;
2668 }
2669
2670 /* make sure that there is enough reusable space in the redo
2671 log files */
2672 srv_main_thread_op_info = "checking free log space";
2673 log_free_check();
2674
2675 /* Do an ibuf merge */
2676 counter_time = ut_time_monotonic_us();
2677 srv_main_thread_op_info = "doing insert buffer merge";
2678 ibuf_merge_in_background(true);
2679 MONITOR_INC_TIME_IN_MICRO_SECS(
2680 MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
2681
2682 if (srv_shutdown_state > 0) {
2683 return;
2684 }
2685
2686 if (trx_sys->rseg_history_len > 0) {
2687 srv_wake_purge_thread_if_not_active();
2688 }
2689
2690 srv_main_thread_op_info = "enforcing dict cache limit";
2691 ulint n_evicted = srv_master_evict_from_table_cache(100);
2692 if (n_evicted != 0) {
2693 MONITOR_INC_VALUE(
2694 MONITOR_SRV_DICT_LRU_EVICT_COUNT, n_evicted);
2695 }
2696 MONITOR_INC_TIME_IN_MICRO_SECS(
2697 MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
2698
2699 /* Flush logs if needed */
2700 srv_sync_log_buffer_in_background();
2701 MONITOR_INC_TIME_IN_MICRO_SECS(
2702 MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
2703
2704 if (srv_shutdown_state > 0) {
2705 return;
2706 }
2707
2708 /* Make a new checkpoint */
2709 srv_main_thread_op_info = "making checkpoint";
2710 log_checkpoint(TRUE, FALSE);
2711 MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_CHECKPOINT_MICROSECOND,
2712 counter_time);
2713 }
2714
2715 /*********************************************************************//**
2716 Perform the tasks during shutdown. The tasks that we do at shutdown
2717 depend on srv_fast_shutdown:
2718 2 => very fast shutdown => do no book keeping
2719 1 => normal shutdown => clear drop table queue and make checkpoint
2720 0 => slow shutdown => in addition to above do complete purge and ibuf
2721 merge
2722 @return TRUE if some work was done. FALSE otherwise */
2723 static
2724 ibool
srv_master_do_shutdown_tasks(ib_time_monotonic_t * last_print_time)2725 srv_master_do_shutdown_tasks(
2726 /*=========================*/
2727 ib_time_monotonic_t* last_print_time)/*!< last time the function
2728 print the message */
2729 {
2730 ulint n_bytes_merged = 0;
2731 ulint n_tables_to_drop = 0;
2732
2733 ut_ad(!srv_read_only_mode);
2734
2735 ++srv_main_shutdown_loops;
2736
2737 ut_a(srv_shutdown_state > 0);
2738
2739 /* In very fast shutdown none of the following is necessary */
2740 if (srv_fast_shutdown == 2) {
2741 return(FALSE);
2742 }
2743
2744 /* ALTER TABLE in MySQL requires on Unix that the table handler
2745 can drop tables lazily after there no longer are SELECT
2746 queries to them. */
2747 srv_main_thread_op_info = "doing background drop tables";
2748 n_tables_to_drop = row_drop_tables_for_mysql_in_background();
2749
2750 /* make sure that there is enough reusable space in the redo
2751 log files */
2752 srv_main_thread_op_info = "checking free log space";
2753 log_free_check();
2754
2755 /* In case of normal shutdown we don't do ibuf merge or purge */
2756 if (srv_fast_shutdown == 1) {
2757 goto func_exit;
2758 }
2759
2760 /* Do an ibuf merge */
2761 srv_main_thread_op_info = "doing insert buffer merge";
2762 n_bytes_merged = ibuf_merge_in_background(true);
2763
2764 /* Flush logs if needed */
2765 srv_sync_log_buffer_in_background();
2766
2767 func_exit:
2768 /* Make a new checkpoint about once in 10 seconds */
2769 srv_main_thread_op_info = "making checkpoint";
2770 log_checkpoint(TRUE, FALSE);
2771
2772 /* Print progress message every 60 seconds during shutdown */
2773 if (srv_shutdown_state > 0 && srv_print_verbose_log) {
2774 srv_shutdown_print_master_pending(
2775 last_print_time, n_tables_to_drop, n_bytes_merged);
2776 }
2777
2778 return(n_bytes_merged || n_tables_to_drop);
2779 }
2780
2781 /** Set temporary tablespace to be encrypted if global variable
2782 innodb_temp_tablespace_encrypt is TRUE
2783 @param[in] enable true to enable encryption, false to disable
2784 @return DB_SUCCESS on success, DB_ERROR on failure */
2785 dberr_t
srv_temp_encryption_update(bool enable)2786 srv_temp_encryption_update(bool enable)
2787 {
2788 ut_ad(!srv_read_only_mode);
2789
2790 fil_space_t* space = fil_space_get(srv_tmp_space.space_id());
2791 bool is_encrypted = FSP_FLAGS_GET_ENCRYPTION(space->flags);
2792
2793 ut_ad(fsp_is_system_temporary(space->id));
2794
2795 if (enable != is_encrypted) {
2796 /* Toggle encryption */
2797 dberr_t err = fil_temp_update_encryption(space, enable);
2798 if (err == DB_SUCCESS) {
2799 srv_tmp_space.set_flags(space->flags);
2800 }
2801 return (err);
2802 }
2803 return (DB_SUCCESS);
2804 }
2805
2806 /*********************************************************************//**
2807 Puts master thread to sleep. At this point we are using polling to
2808 service various activities. Master thread sleeps for one second before
2809 checking the state of the server again */
2810 static
2811 void
srv_master_sleep(void)2812 srv_master_sleep(void)
2813 /*==================*/
2814 {
2815 srv_main_thread_op_info = "sleeping";
2816 os_thread_sleep(1000000);
2817 srv_main_thread_op_info = "";
2818 }
2819
2820 /*********************************************************************//**
2821 The master thread controlling the server.
2822 @return a dummy parameter */
2823 extern "C"
2824 os_thread_ret_t
DECLARE_THREAD(srv_master_thread)2825 DECLARE_THREAD(srv_master_thread)(
2826 /*==============================*/
2827 void* arg MY_ATTRIBUTE((unused)))
2828 /*!< in: a dummy parameter required by
2829 os_thread_create */
2830 {
2831 my_thread_init();
2832 DBUG_ENTER("srv_master_thread");
2833
2834 srv_slot_t* slot;
2835 ulint old_activity_count = srv_get_activity_count();
2836 ulint old_ibuf_merge_activity_count
2837 = srv_get_ibuf_merge_activity_count();
2838 ib_time_monotonic_t last_print_time;
2839
2840 ut_ad(!srv_read_only_mode);
2841
2842 srv_master_tid = os_thread_get_tid();
2843
2844 os_thread_set_priority(srv_master_tid, srv_sched_priority_master);
2845
2846 #ifdef UNIV_DEBUG_THREAD_CREATION
2847 ib::info() << "Master thread starts, id "
2848 << os_thread_pf(os_thread_get_curr_id());
2849 #endif /* UNIV_DEBUG_THREAD_CREATION */
2850
2851 #ifdef UNIV_PFS_THREAD
2852 pfs_register_thread(srv_master_thread_key);
2853 #endif /* UNIV_PFS_THREAD */
2854
2855 srv_main_thread_process_no = os_proc_get_number();
2856 srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
2857
2858 slot = srv_reserve_slot(SRV_MASTER);
2859 ut_a(slot == srv_sys->sys_threads);
2860
2861 last_print_time = ut_time_monotonic();
2862 loop:
2863 if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2864 goto suspend_thread;
2865 }
2866
2867 while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
2868
2869 srv_master_sleep();
2870
2871 MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP);
2872
2873 srv_current_thread_priority = srv_master_thread_priority;
2874
2875 if (srv_check_activity(old_activity_count,
2876 old_ibuf_merge_activity_count)) {
2877
2878 old_activity_count = srv_get_activity_count();
2879 old_ibuf_merge_activity_count
2880 = srv_get_ibuf_merge_activity_count();
2881 srv_master_do_active_tasks();
2882 } else {
2883 srv_master_do_idle_tasks();
2884 }
2885
2886 srv_enable_undo_encryption_if_set();
2887
2888 log_check_new_key_version();
2889 }
2890
2891 while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS
2892 && srv_master_do_shutdown_tasks(&last_print_time)) {
2893
2894 /* Shouldn't loop here in case of very fast shutdown */
2895 ut_ad(srv_fast_shutdown < 2);
2896 }
2897
2898 suspend_thread:
2899 srv_main_thread_op_info = "suspending";
2900
2901 srv_suspend_thread(slot);
2902
2903 /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
2904 waits for database activity to die down when converting < 4.1.x
2905 databases, and relies on this string being exactly as it is. InnoDB
2906 manual also mentions this string in several places. */
2907 srv_main_thread_op_info = "waiting for server activity";
2908
2909 os_event_wait(slot->event);
2910
2911 if (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
2912 goto loop;
2913 }
2914
2915 my_thread_end();
2916 os_thread_exit();
2917 DBUG_RETURN(0);
2918 }
2919
2920 /**
2921 Check if purge should stop.
2922 @return true if it should shutdown. */
2923 static
2924 bool
srv_purge_should_exit(ulint n_purged)2925 srv_purge_should_exit(
2926 ulint n_purged) /*!< in: pages purged in last batch */
2927 {
2928 switch (srv_shutdown_state) {
2929 case SRV_SHUTDOWN_NONE:
2930 /* Normal operation. */
2931 break;
2932
2933 case SRV_SHUTDOWN_CLEANUP:
2934 case SRV_SHUTDOWN_EXIT_THREADS:
2935 /* Exit unless slow shutdown requested or all done. */
2936 return(srv_fast_shutdown != 0 || n_purged == 0);
2937
2938 case SRV_SHUTDOWN_LAST_PHASE:
2939 case SRV_SHUTDOWN_FLUSH_PHASE:
2940 ut_error;
2941 }
2942
2943 return(false);
2944 }
2945
2946 /*********************************************************************//**
2947 Fetch and execute a task from the work queue.
2948 @return true if a task was executed */
2949 static
2950 bool
srv_task_execute(void)2951 srv_task_execute(void)
2952 /*==================*/
2953 {
2954 que_thr_t* thr = NULL;
2955
2956 ut_ad(!srv_read_only_mode);
2957 ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
2958
2959 mutex_enter(&srv_sys->tasks_mutex);
2960
2961 if (UT_LIST_GET_LEN(srv_sys->tasks) > 0) {
2962
2963 thr = UT_LIST_GET_FIRST(srv_sys->tasks);
2964
2965 ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE);
2966
2967 UT_LIST_REMOVE(srv_sys->tasks, thr);
2968 }
2969
2970 mutex_exit(&srv_sys->tasks_mutex);
2971
2972 if (thr != NULL) {
2973
2974 que_run_threads(thr);
2975
2976 os_atomic_inc_ulint(
2977 &purge_sys->pq_mutex, &purge_sys->n_completed, 1);
2978 }
2979
2980 return(thr != NULL);
2981 }
2982
2983 static ulint purge_tid_i = 0;
2984
2985 /*********************************************************************//**
2986 Worker thread that reads tasks from the work queue and executes them.
2987 @return a dummy parameter */
2988 extern "C"
2989 os_thread_ret_t
DECLARE_THREAD(srv_worker_thread)2990 DECLARE_THREAD(srv_worker_thread)(
2991 /*==============================*/
2992 void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter
2993 required by os_thread_create */
2994 {
2995 srv_slot_t* slot;
2996 ulint tid_i = os_atomic_increment_ulint(&purge_tid_i, 1);
2997
2998 ut_ad(tid_i < srv_n_purge_threads);
2999 ut_ad(!srv_read_only_mode);
3000 ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
3001 my_thread_init();
3002 THD *thd= create_thd(false, true, true, srv_worker_thread_key.m_value);
3003
3004 srv_purge_tids[tid_i] = os_thread_get_tid();
3005 os_thread_set_priority(srv_purge_tids[tid_i],
3006 srv_sched_priority_purge);
3007
3008 #ifdef UNIV_DEBUG_THREAD_CREATION
3009 ib::info() << "Worker thread starting, id "
3010 << os_thread_pf(os_thread_get_curr_id());
3011 #endif /* UNIV_DEBUG_THREAD_CREATION */
3012
3013 slot = srv_reserve_slot(SRV_WORKER);
3014
3015 ut_a(srv_n_purge_threads > 1);
3016
3017 srv_sys_mutex_enter();
3018
3019 ut_a(srv_sys->n_threads_active[SRV_WORKER] < srv_n_purge_threads);
3020
3021 srv_sys_mutex_exit();
3022
3023 /* We need to ensure that the worker threads exit after the
3024 purge coordinator thread. Otherwise the purge coordinaor can
3025 end up waiting forever in trx_purge_wait_for_workers_to_complete() */
3026
3027 do {
3028 srv_suspend_thread(slot);
3029
3030 os_event_wait(slot->event);
3031
3032 srv_current_thread_priority = srv_purge_thread_priority;
3033
3034 if (srv_task_execute()) {
3035
3036 /* If there are tasks in the queue, wakeup
3037 the purge coordinator thread. */
3038
3039 srv_wake_purge_thread_if_not_active();
3040 }
3041
3042 /* Note: we are checking the state without holding the
3043 purge_sys->latch here. */
3044 } while (purge_sys->state != PURGE_STATE_EXIT);
3045
3046 srv_free_slot(slot);
3047
3048 rw_lock_x_lock(&purge_sys->latch);
3049
3050 ut_a(!purge_sys->running);
3051 ut_a(purge_sys->state == PURGE_STATE_EXIT);
3052 ut_a(srv_shutdown_state > SRV_SHUTDOWN_NONE);
3053
3054 rw_lock_x_unlock(&purge_sys->latch);
3055
3056 #ifdef UNIV_DEBUG_THREAD_CREATION
3057 ib::info() << "Purge worker thread exiting, id "
3058 << os_thread_pf(os_thread_get_curr_id());
3059 #endif /* UNIV_DEBUG_THREAD_CREATION */
3060
3061 thd_free_innodb_session(thd);
3062 destroy_thd(thd);
3063 my_thread_end();
3064 /* We count the number of threads in os_thread_exit(). A created
3065 thread should always use that to exit and not use return() to exit. */
3066 os_thread_exit();
3067
3068 OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
3069 }
3070
3071 /*********************************************************************//**
3072 Do the actual purge operation.
3073 @return length of history list before the last purge batch. */
3074 static
3075 ulint
srv_do_purge(ulint n_threads,ulint * n_total_purged)3076 srv_do_purge(
3077 /*=========*/
3078 ulint n_threads, /*!< in: number of threads to use */
3079 ulint* n_total_purged) /*!< in/out: total pages purged */
3080 {
3081 ulint n_pages_purged;
3082
3083 static ulint count = 0;
3084 static ulint n_use_threads = 0;
3085 static ulint rseg_history_len = 0;
3086 ulint old_activity_count = srv_get_activity_count();
3087
3088 ut_a(n_threads > 0);
3089 ut_ad(!srv_read_only_mode);
3090
3091 /* Purge until there are no more records to purge and there is
3092 no change in configuration or server state. If the user has
3093 configured more than one purge thread then we treat that as a
3094 pool of threads and only use the extra threads if purge can't
3095 keep up with updates. */
3096
3097 if (n_use_threads == 0) {
3098 n_use_threads = n_threads;
3099 }
3100
3101 do {
3102 srv_current_thread_priority = srv_purge_thread_priority;
3103
3104 if (trx_sys->rseg_history_len > rseg_history_len
3105 || (srv_max_purge_lag > 0
3106 && rseg_history_len > srv_max_purge_lag)) {
3107
3108 /* History length is now longer than what it was
3109 when we took the last snapshot. Use more threads. */
3110
3111 if (n_use_threads < n_threads) {
3112 ++n_use_threads;
3113 }
3114
3115 } else if (srv_check_activity(old_activity_count)
3116 && n_use_threads > 1) {
3117
3118 /* History length same or smaller since last snapshot,
3119 use fewer threads. */
3120
3121 --n_use_threads;
3122
3123 old_activity_count = srv_get_activity_count();
3124 }
3125
3126 /* Ensure that the purge threads are less than what
3127 was configured. */
3128
3129 ut_a(n_use_threads > 0);
3130 ut_a(n_use_threads <= n_threads);
3131
3132 /* Take a snapshot of the history list before purge. */
3133 if ((rseg_history_len = trx_sys->rseg_history_len) == 0) {
3134 break;
3135 }
3136
3137 ulint undo_trunc_freq =
3138 purge_sys->undo_trunc.get_rseg_truncate_frequency();
3139
3140 ulint rseg_truncate_frequency = ut_min(
3141 static_cast<ulint>(srv_purge_rseg_truncate_frequency),
3142 undo_trunc_freq);
3143
3144 n_pages_purged = trx_purge(
3145 n_use_threads, srv_purge_batch_size,
3146 (++count % rseg_truncate_frequency) == 0);
3147
3148 *n_total_purged += n_pages_purged;
3149
3150 } while (!srv_purge_should_exit(n_pages_purged)
3151 && n_pages_purged > 0
3152 && purge_sys->state == PURGE_STATE_RUN);
3153
3154 return(rseg_history_len);
3155 }
3156
3157 /*********************************************************************//**
3158 Suspend the purge coordinator thread. */
3159 static
3160 void
srv_purge_coordinator_suspend(srv_slot_t * slot,ulint rseg_history_len)3161 srv_purge_coordinator_suspend(
3162 /*==========================*/
3163 srv_slot_t* slot, /*!< in/out: Purge coordinator
3164 thread slot */
3165 ulint rseg_history_len) /*!< in: history list length
3166 before last purge */
3167 {
3168 ut_ad(!srv_read_only_mode);
3169 ut_a(slot->type == SRV_PURGE);
3170
3171 bool stop = false;
3172
3173 /** Maximum wait time on the purge event, in micro-seconds. */
3174 static const ulint SRV_PURGE_MAX_TIMEOUT = 10000;
3175
3176 int64_t sig_count = srv_suspend_thread(slot);
3177
3178 do {
3179 ulint ret;
3180
3181 rw_lock_x_lock(&purge_sys->latch);
3182
3183 purge_sys->running = false;
3184
3185 rw_lock_x_unlock(&purge_sys->latch);
3186
3187 /* We don't wait right away on the the non-timed wait because
3188 we want to signal the thread that wants to suspend purge. */
3189
3190 if (stop) {
3191 os_event_wait_low(slot->event, sig_count);
3192 ret = 0;
3193 } else if (rseg_history_len <= trx_sys->rseg_history_len) {
3194 ret = os_event_wait_time_low(
3195 slot->event, SRV_PURGE_MAX_TIMEOUT, sig_count);
3196 } else {
3197 /* We don't want to waste time waiting, if the
3198 history list increased by the time we got here,
3199 unless purge has been stopped. */
3200 ret = 0;
3201 }
3202
3203 srv_sys_mutex_enter();
3204
3205 /* The thread can be in state !suspended after the timeout
3206 but before this check if another thread sent a wakeup signal. */
3207
3208 if (slot->suspended) {
3209 slot->suspended = FALSE;
3210 ++srv_sys->n_threads_active[slot->type];
3211 ut_a(srv_sys->n_threads_active[slot->type] == 1);
3212 }
3213
3214 srv_sys_mutex_exit();
3215
3216 sig_count = srv_suspend_thread(slot);
3217
3218 rw_lock_x_lock(&purge_sys->latch);
3219
3220 stop = (srv_shutdown_state == SRV_SHUTDOWN_NONE
3221 && purge_sys->state == PURGE_STATE_STOP);
3222
3223 if (!stop) {
3224 ut_a(purge_sys->n_stop == 0);
3225 purge_sys->running = true;
3226 } else {
3227 ut_a(purge_sys->n_stop > 0);
3228
3229 /* Signal that we are suspended. */
3230 os_event_set(purge_sys->event);
3231 }
3232
3233 rw_lock_x_unlock(&purge_sys->latch);
3234
3235 if (ret == OS_SYNC_TIME_EXCEEDED) {
3236
3237 /* No new records added since wait started then simply
3238 wait for new records. The magic number 5000 is an
3239 approximation for the case where we have cached UNDO
3240 log records which prevent truncate of the UNDO
3241 segments. */
3242
3243 if (rseg_history_len == trx_sys->rseg_history_len
3244 && trx_sys->rseg_history_len < 5000) {
3245
3246 stop = true;
3247 }
3248 }
3249
3250 } while (stop);
3251
3252 srv_sys_mutex_enter();
3253
3254 if (slot->suspended) {
3255 slot->suspended = FALSE;
3256 ++srv_sys->n_threads_active[slot->type];
3257 ut_a(srv_sys->n_threads_active[slot->type] == 1);
3258 }
3259
3260 srv_sys_mutex_exit();
3261 }
3262
3263 /*********************************************************************//**
3264 Purge coordinator thread that schedules the purge tasks.
3265 @return a dummy parameter */
3266 extern "C"
3267 os_thread_ret_t
DECLARE_THREAD(srv_purge_coordinator_thread)3268 DECLARE_THREAD(srv_purge_coordinator_thread)(
3269 /*=========================================*/
3270 void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter
3271 required by os_thread_create */
3272 {
3273 my_thread_init();
3274 THD *thd= create_thd(false, true, true, srv_purge_thread_key.m_value);
3275 srv_slot_t* slot;
3276 ulint n_total_purged = ULINT_UNDEFINED;
3277
3278 ut_ad(!srv_read_only_mode);
3279 ut_a(srv_n_purge_threads >= 1);
3280 ut_a(trx_purge_state() == PURGE_STATE_INIT);
3281 ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
3282
3283 srv_purge_tids[0] = os_thread_get_tid();
3284 os_thread_set_priority(srv_purge_tids[0], srv_sched_priority_purge);
3285
3286 rw_lock_x_lock(&purge_sys->latch);
3287
3288 purge_sys->running = true;
3289 purge_sys->state = PURGE_STATE_RUN;
3290
3291 rw_lock_x_unlock(&purge_sys->latch);
3292
3293 #ifdef UNIV_DEBUG_THREAD_CREATION
3294 ib::info() << "Purge coordinator thread created, id "
3295 << os_thread_pf(os_thread_get_curr_id());
3296 #endif /* UNIV_DEBUG_THREAD_CREATION */
3297
3298 slot = srv_reserve_slot(SRV_PURGE);
3299
3300 ulint rseg_history_len = trx_sys->rseg_history_len;
3301
3302 do {
3303 /* If there are no records to purge or the last
3304 purge didn't purge any records then wait for activity. */
3305
3306 if (srv_shutdown_state == SRV_SHUTDOWN_NONE
3307 && (purge_sys->state == PURGE_STATE_STOP
3308 || n_total_purged == 0)) {
3309
3310 srv_purge_coordinator_suspend(slot, rseg_history_len);
3311 }
3312
3313 if (srv_purge_should_exit(n_total_purged)) {
3314 ut_a(!slot->suspended);
3315 break;
3316 }
3317
3318 n_total_purged = 0;
3319
3320 srv_current_thread_priority = srv_purge_thread_priority;
3321
3322 rseg_history_len = srv_do_purge(
3323 srv_n_purge_threads, &n_total_purged);
3324
3325 if (n_total_purged != 0) {
3326 srv_inc_activity_count();
3327 }
3328
3329 } while (!srv_purge_should_exit(n_total_purged));
3330
3331 /* Ensure that we don't jump out of the loop unless the
3332 exit condition is satisfied. */
3333
3334 ut_a(srv_purge_should_exit(n_total_purged));
3335
3336 ulint n_pages_purged = ULINT_MAX;
3337
3338 /* Ensure that all records are purged if it is not a fast shutdown.
3339 This covers the case where a record can be added after we exit the
3340 loop above. */
3341 while (srv_fast_shutdown == 0 && n_pages_purged > 0) {
3342 n_pages_purged = trx_purge(1, srv_purge_batch_size, false);
3343 }
3344
3345 #ifdef UNIV_DEBUG
3346 if (srv_fast_shutdown == 0) {
3347 trx_commit_disallowed = true;
3348 }
3349 #endif /* UNIV_DEBUG */
3350
3351 /* This trx_purge is called to remove any undo records (added by
3352 background threads) after completion of the above loop. When
3353 srv_fast_shutdown != 0, a large batch size can cause significant
3354 delay in shutdown ,so reducing the batch size to magic number 20
3355 (which was default in 5.5), which we hope will be sufficient to
3356 remove all the undo records */
3357 const uint temp_batch_size = 20;
3358
3359 n_pages_purged = trx_purge(1, srv_purge_batch_size <= temp_batch_size
3360 ? srv_purge_batch_size : temp_batch_size,
3361 true);
3362 ut_a(n_pages_purged == 0 || srv_fast_shutdown != 0);
3363
3364 /* The task queue should always be empty, independent of fast
3365 shutdown state. */
3366 ut_a(srv_get_task_queue_length() == 0);
3367
3368 srv_free_slot(slot);
3369
3370 /* Note that we are shutting down. */
3371 rw_lock_x_lock(&purge_sys->latch);
3372
3373 purge_sys->state = PURGE_STATE_EXIT;
3374
3375 /* If there are any pending undo-tablespace truncate then clear
3376 it off as we plan to shutdown the purge thread. */
3377 purge_sys->undo_trunc.clear();
3378
3379 purge_sys->running = false;
3380
3381 rw_lock_x_unlock(&purge_sys->latch);
3382
3383 #ifdef UNIV_DEBUG_THREAD_CREATION
3384 ib::info() << "Purge coordinator exiting, id "
3385 << os_thread_pf(os_thread_get_curr_id());
3386 #endif /* UNIV_DEBUG_THREAD_CREATION */
3387
3388 /* Ensure that all the worker threads quit. */
3389 if (srv_n_purge_threads > 1) {
3390 srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1);
3391 }
3392
3393 thd_free_innodb_session(thd);
3394 destroy_thd(thd);
3395 my_thread_end();
3396 /* We count the number of threads in os_thread_exit(). A created
3397 thread should always use that to exit and not use return() to exit. */
3398 os_thread_exit();
3399
3400 OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
3401 }
3402
3403 /**********************************************************************//**
3404 Enqueues a task to server task queue and releases a worker thread, if there
3405 is a suspended one. */
3406 void
srv_que_task_enqueue_low(que_thr_t * thr)3407 srv_que_task_enqueue_low(
3408 /*=====================*/
3409 que_thr_t* thr) /*!< in: query thread */
3410 {
3411 ut_ad(!srv_read_only_mode);
3412 mutex_enter(&srv_sys->tasks_mutex);
3413
3414 UT_LIST_ADD_LAST(srv_sys->tasks, thr);
3415
3416 mutex_exit(&srv_sys->tasks_mutex);
3417
3418 srv_release_threads(SRV_WORKER, 1);
3419 }
3420
3421 /**********************************************************************//**
3422 Get count of tasks in the queue.
3423 @return number of tasks in queue */
3424 ulint
srv_get_task_queue_length(void)3425 srv_get_task_queue_length(void)
3426 /*===========================*/
3427 {
3428 ulint n_tasks;
3429
3430 ut_ad(!srv_read_only_mode);
3431
3432 mutex_enter(&srv_sys->tasks_mutex);
3433
3434 n_tasks = UT_LIST_GET_LEN(srv_sys->tasks);
3435
3436 mutex_exit(&srv_sys->tasks_mutex);
3437
3438 return(n_tasks);
3439 }
3440
3441 /**********************************************************************//**
3442 Wakeup the purge threads. */
3443 void
srv_purge_wakeup(void)3444 srv_purge_wakeup(void)
3445 /*==================*/
3446 {
3447 ut_ad(!srv_read_only_mode);
3448
3449 if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
3450
3451 srv_release_threads(SRV_PURGE, 1);
3452
3453 if (srv_n_purge_threads > 1) {
3454 ulint n_workers = srv_n_purge_threads - 1;
3455
3456 srv_release_threads(SRV_WORKER, n_workers);
3457 }
3458 }
3459 }
3460
3461 /** Check if tablespace is being truncated.
3462 (Ignore system-tablespace as we don't re-create the tablespace
3463 and so some of the action that are suppressed by this function
3464 for independent tablespace are not applicable to system-tablespace).
3465 @param space_id space_id to check for truncate action
3466 @return true if being truncated, false if not being
3467 truncated or tablespace is system-tablespace. */
3468 bool
srv_is_tablespace_truncated(ulint space_id)3469 srv_is_tablespace_truncated(ulint space_id)
3470 {
3471 if (is_system_tablespace(space_id)) {
3472 return(false);
3473 }
3474
3475 return(truncate_t::is_tablespace_truncated(space_id)
3476 || undo::Truncate::is_tablespace_truncated(space_id));
3477
3478 }
3479
3480 /** Check if tablespace was truncated.
3481 @param[in] space space object to check for truncate action
3482 @return true if tablespace was truncated and we still have an active
3483 MLOG_TRUNCATE REDO log record. */
3484 bool
srv_was_tablespace_truncated(const fil_space_t * space)3485 srv_was_tablespace_truncated(const fil_space_t* space)
3486 {
3487 if (space == NULL) {
3488 ut_ad(0);
3489 return(false);
3490 }
3491
3492 bool has_shared_space = FSP_FLAGS_GET_SHARED(space->flags);
3493
3494 if (is_system_tablespace(space->id) || has_shared_space) {
3495 return(false);
3496 }
3497
3498 return(truncate_t::was_tablespace_truncated(space->id));
3499 }
3500
3501 /** Call exit(3) */
3502 void
srv_fatal_error()3503 srv_fatal_error()
3504 {
3505
3506 ib::error() << "Cannot continue operation.";
3507
3508 fflush(stderr);
3509
3510 ut_d(innodb_calling_exit = true);
3511
3512 srv_shutdown_all_bg_threads();
3513
3514 exit(3);
3515 }
3516
3517 /** Check whether given space id is undo tablespace id
3518 @param[in] space_id space id to check
3519 @return true if it is undo tablespace else false. */
3520 bool
srv_is_undo_tablespace(ulint space_id)3521 srv_is_undo_tablespace(
3522 ulint space_id)
3523 {
3524 if (srv_undo_space_id_start == 0) {
3525 return(false);
3526 }
3527
3528 return(space_id >= srv_undo_space_id_start
3529 && space_id < (srv_undo_space_id_start
3530 + srv_undo_tablespaces_open));
3531 }
3532
3533 bool
srv_enable_redo_encryption(THD * thd)3534 srv_enable_redo_encryption(THD* thd)
3535 {
3536 if (srv_redo_log_encrypt == REDO_LOG_ENCRYPT_MK) {
3537 return srv_enable_redo_encryption_mk(thd);
3538 }
3539
3540 if (srv_redo_log_encrypt == REDO_LOG_ENCRYPT_RK) {
3541 return srv_enable_redo_encryption_rk(thd);
3542 }
3543
3544 return false;
3545 }
3546
3547 bool
srv_enable_redo_encryption_mk(THD * thd)3548 srv_enable_redo_encryption_mk(THD* thd)
3549 {
3550 switch (existing_redo_encryption_mode) {
3551 case REDO_LOG_ENCRYPT_RK:
3552 ib::warn() <<
3553 "Redo log encryption mode"
3554 " can't be switched without stopping the server and"
3555 " recreating the redo logs. Current mode is "
3556 << log_encrypt_name(existing_redo_encryption_mode)
3557 << ", requested master_key.";
3558 if (thd != NULL) {
3559 ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_REDO_ENCRYPTION_CANT_BE_CHANGED,
3560 "master_key",
3561 log_encrypt_name(existing_redo_encryption_mode));
3562 }
3563
3564 return true;
3565 case REDO_LOG_ENCRYPT_OFF:
3566 case REDO_LOG_ENCRYPT_MK:
3567 break;
3568 }
3569
3570 fil_space_t* space = fil_space_get(dict_sys_t::s_log_space_first_id);
3571 if (FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
3572 return false;
3573 }
3574 byte key[ENCRYPTION_KEY_LEN];
3575 byte iv[ENCRYPTION_KEY_LEN];
3576
3577 Encryption::random_value(iv);
3578 Encryption::random_value(key);
3579
3580 if (!log_write_encryption(key, iv, REDO_LOG_ENCRYPT_MK)) {
3581
3582 ib::error() << "Can't set redo log tablespace to be encrypted.";
3583 if (thd != NULL) {
3584 ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_REDO_ENCRYPTION_ERROR,
3585 "Can't set redo log tablespace to be"
3586 " encrypted.");
3587 }
3588 return true;
3589 }
3590
3591 space->flags |= FSP_FLAGS_MASK_ENCRYPTION;
3592
3593 const dberr_t err = fil_set_encryption(space->id, Encryption::AES, key, iv);
3594 if (err != DB_SUCCESS) {
3595 ib::error() << "Can't set redo log tablespace to be encrypted.";
3596 if (thd != NULL) {
3597 ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_REDO_ENCRYPTION_ERROR,
3598 "Can't set redo log tablespace to be"
3599 " encrypted.");
3600 }
3601 return true;
3602 }
3603
3604 ib::info() << "Redo log encryption is enabled.";
3605
3606 return false;
3607 }
3608
3609
3610 bool
srv_enable_redo_encryption_rk(THD * thd)3611 srv_enable_redo_encryption_rk(THD* thd)
3612 {
3613 switch (existing_redo_encryption_mode) {
3614 case REDO_LOG_ENCRYPT_MK:
3615 ib::error() <<
3616 "Redo log encryption mode"
3617 " can't be switched without stopping the server and"
3618 " recreating the redo logs. Current mode is "
3619 << log_encrypt_name(existing_redo_encryption_mode)
3620 << ", requested keyring_key.";
3621 if (thd != NULL) {
3622 ib_senderrf(thd, IB_LOG_LEVEL_WARN,
3623 ER_REDO_ENCRYPTION_CANT_BE_CHANGED,
3624 "keyring_key",
3625 log_encrypt_name(existing_redo_encryption_mode));
3626 }
3627 return true;
3628 case REDO_LOG_ENCRYPT_OFF:
3629 case REDO_LOG_ENCRYPT_RK:
3630 break;
3631 }
3632
3633 fil_space_t* space = fil_space_get(dict_sys_t::s_log_space_first_id);
3634 if (FSP_FLAGS_GET_ENCRYPTION(space->flags))
3635 {
3636 return(false);
3637 }
3638
3639 byte key[ENCRYPTION_KEY_LEN];
3640 byte iv[ENCRYPTION_KEY_LEN];
3641 uint version;
3642
3643 Encryption::random_value(iv);
3644
3645 // load latest key & write version
3646
3647 redo_log_key* mkey = redo_log_key_mgr.load_latest_key(thd, true);
3648 if (mkey == NULL) {
3649 return(true);
3650 }
3651 version = mkey->version;
3652 srv_redo_log_key_version = version;
3653 memcpy(key, mkey->key, ENCRYPTION_KEY_LEN);
3654
3655 #ifdef UNIV_ENCRYPT_DEBUG
3656 fprintf(stderr, "Fetched redo key: %s.\n", key);
3657 #endif
3658
3659 if (!log_write_encryption(key, iv, REDO_LOG_ENCRYPT_RK)) {
3660 ib::error() << "Can't set redo log tablespace to be"
3661 " encrypted.";
3662 if (thd != NULL) {
3663 ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_REDO_ENCRYPTION_ERROR,
3664 "Can't set redo log tablespace to be"
3665 " encrypted.");
3666 }
3667 return(true);
3668 }
3669
3670 space->encryption_redo_key = mkey;
3671 space->flags |= FSP_FLAGS_MASK_ENCRYPTION;
3672 space->encryption_key_version = version;
3673 dberr_t err = fil_set_encryption(
3674 space->id, Encryption::KEYRING,
3675 key, iv);
3676
3677 if(err != DB_SUCCESS) {
3678 ib::error() << "Can't set redo log tablespace to be encrypted.";
3679 if (thd != NULL) {
3680 ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_REDO_ENCRYPTION_ERROR,
3681 "Can't set redo log tablespace to be"
3682 " encrypted.");
3683 }
3684 return(true);
3685 }
3686
3687 ib::info() << "Redo log encryption is enabled.";
3688
3689 return(false);
3690 }
3691
3692
3693 /** Enable the undo log encryption if it is set.
3694 It will try to enable the undo log encryption and write the metadata to
3695 undo log file header, if innodb_undo_log_encrypt is ON. */
3696 static
3697 void
srv_enable_undo_encryption_if_set()3698 srv_enable_undo_encryption_if_set()
3699 {
3700 fil_space_t* space;
3701 const char* cant_set_undo_tablespace = "Can't set undo tablespace";
3702 const char* to_be_encrypted = " to be encrypted";
3703 if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
3704 return;
3705 }
3706
3707 /* Check if encryption for undo log is enabled or not. If it's
3708 enabled, we will store the encryption metadata to the space header
3709 and start to encrypt the undo log block from now on. */
3710 if (srv_undo_log_encrypt) {
3711 if (srv_undo_tablespaces == 0) {
3712 srv_undo_log_encrypt = false;
3713 ib::error() << cant_set_undo_tablespace << "s"
3714 << to_be_encrypted
3715 << ", since innodb_undo_tablespaces=0.";
3716 return;
3717 }
3718 if (srv_read_only_mode) {
3719 srv_undo_log_encrypt = false;
3720 ib::error() << cant_set_undo_tablespace << "s"
3721 << to_be_encrypted
3722 << " in read-only mode.";
3723 return;
3724 }
3725 ulint undo_spaces[TRX_SYS_N_RSEGS + 1];
3726 const ulint undo_spaces_no = trx_rseg_get_n_undo_tablespaces(undo_spaces);
3727 for (ulint undo_idx = 0; undo_idx < undo_spaces_no; ++undo_idx)
3728 {
3729 /* Skip system tablespace, since it's also shared
3730 tablespace. */
3731 const ulint space_id = undo_spaces[undo_idx];
3732 if (space_id == TRX_SYS_SPACE) {
3733 continue;
3734 }
3735 space = fil_space_get(space_id);
3736 ut_ad(fsp_is_undo_tablespace(space_id));
3737 /* This flag will be written to the header
3738 later, by calling the fsp_header_write_encryption()
3739 function: */
3740 ulint new_flags =
3741 space->flags | FSP_FLAGS_MASK_ENCRYPTION;
3742 /* We need the server_uuid initialized, otherwise,
3743 the keyname will not contains server uuid. */
3744 if (FSP_FLAGS_GET_ENCRYPTION(space->flags)
3745 || strlen(server_uuid) == 0) {
3746 continue;
3747 }
3748 dberr_t err;
3749 mtr_t mtr;
3750 byte encrypt_info[ENCRYPTION_INFO_SIZE_V2];
3751 byte key[ENCRYPTION_KEY_LEN];
3752 byte iv[ENCRYPTION_KEY_LEN];
3753 Encryption::random_value(key);
3754 Encryption::random_value(iv);
3755 mtr_start(&mtr);
3756 mtr_x_lock_space(space->id, &mtr);
3757 memset(encrypt_info, 0,
3758 ENCRYPTION_INFO_SIZE_V2);
3759 if (!Encryption::fill_encryption_info(
3760 key, iv,
3761 encrypt_info)) {
3762 srv_undo_log_encrypt = false;
3763 ib::error() << cant_set_undo_tablespace
3764 << " number " << space_id
3765 << to_be_encrypted << ".";
3766 mtr_commit(&mtr);
3767 return;
3768 } else {
3769 if (!fsp_header_write_encryption(
3770 space->id,
3771 new_flags,
3772 encrypt_info,
3773 true,
3774 &mtr)) {
3775 srv_undo_log_encrypt = false;
3776 ib::error() << cant_set_undo_tablespace
3777 << " number "
3778 << space_id
3779 << to_be_encrypted
3780 << ". Failed to write header"
3781 << " page.";
3782 mtr_commit(&mtr);
3783 return;
3784 }
3785 space->flags |=
3786 FSP_FLAGS_MASK_ENCRYPTION;
3787 err = fil_set_encryption(
3788 space->id, Encryption::AES,
3789 key, iv);
3790 if (err != DB_SUCCESS) {
3791 srv_undo_log_encrypt = false;
3792 ib::error() << cant_set_undo_tablespace
3793 << " number "
3794 << space_id
3795 << to_be_encrypted
3796 << ". Error=" << err << ".";
3797 mtr_commit(&mtr);
3798 return;
3799 } else {
3800 ib::info() << "Encryption is enabled"
3801 " for undo tablespace number "
3802 << space_id << ".";
3803 #ifdef UNIV_ENCRYPT_DEBUG
3804 ut_print_buf(stderr, key, 32);
3805 ut_print_buf(stderr, iv, 32);
3806 #endif
3807 }
3808 }
3809 mtr_commit(&mtr);
3810 }
3811 //undo::spaces->s_unlock();
3812 return;
3813 }
3814 /* If the undo log space is using default key, rotate
3815 it. We need the server_uuid initialized, otherwise,
3816 the keyname will not contains server uuid. */
3817 if (Encryption::master_key_id != 0
3818 || srv_read_only_mode
3819 || strlen(server_uuid) == 0) {
3820 return;
3821 }
3822 ulint undo_spaces[TRX_SYS_N_RSEGS + 1];
3823 const ulint undo_spaces_no = trx_rseg_get_n_undo_tablespaces(undo_spaces);
3824 for (ulint undo_idx = 0; undo_idx < undo_spaces_no; ++undo_idx)
3825 {
3826 const ulint space_id = undo_spaces[undo_idx];
3827 ut_ad(fsp_is_undo_tablespace(space_id));
3828 space = fil_space_get(space_id);
3829 ut_ad(space);
3830 if (space->encryption_type == Encryption::NONE) {
3831 continue;
3832 }
3833 byte encrypt_info[ENCRYPTION_INFO_SIZE_V2];
3834 mtr_t mtr;
3835 ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
3836 mtr_start(&mtr);
3837 mtr_x_lock_space(space->id, &mtr);
3838 memset(encrypt_info, 0,
3839 ENCRYPTION_INFO_SIZE_V2);
3840 if (!fsp_header_rotate_encryption(
3841 space,
3842 encrypt_info,
3843 &mtr)) {
3844 ib::error() << "Can't rotate encryption on undo"
3845 " tablespace number "
3846 << space_id << ".";
3847 } else {
3848 ib::info() << "Encryption is enabled"
3849 " for undo tablespace number "
3850 << space_id << ".";
3851 }
3852 mtr_commit(&mtr);
3853 }
3854 }
3855