1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2008, 2009 Google Inc.
5 Copyright (c) 2009, Percona Inc.
6 
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12 
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19 
20 This program is free software; you can redistribute it and/or modify
21 it under the terms of the GNU General Public License, version 2.0,
22 as published by the Free Software Foundation.
23 
24 This program is also distributed with certain software (including
25 but not limited to OpenSSL) that is licensed under separate terms,
26 as designated in a particular file or component or in included license
27 documentation.  The authors of MySQL hereby grant you an additional
28 permission to link the program and your derivative works with the
29 separately licensed software that they have included with MySQL.
30 
31 This program is distributed in the hope that it will be useful,
32 but WITHOUT ANY WARRANTY; without even the implied warranty of
33 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34 GNU General Public License, version 2.0, for more details.
35 
36 You should have received a copy of the GNU General Public License along with
37 this program; if not, write to the Free Software Foundation, Inc.,
38 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
39 
40 *****************************************************************************/
41 
42 /**************************************************//**
43 @file srv/srv0srv.cc
44 The database server main program
45 
46 Created 10/8/1995 Heikki Tuuri
47 *******************************************************/
48 
49 /* Dummy comment */
50 #include "srv0srv.h"
51 
52 #include "ut0mem.h"
53 #include "ut0ut.h"
54 #include "os0proc.h"
55 #include "mem0mem.h"
56 #include "mem0pool.h"
57 #include "sync0sync.h"
58 #include "que0que.h"
59 #include "log0recv.h"
60 #include "pars0pars.h"
61 #include "usr0sess.h"
62 #include "lock0lock.h"
63 #include "trx0purge.h"
64 #include "ibuf0ibuf.h"
65 #include "buf0flu.h"
66 #include "buf0lru.h"
67 #include "btr0sea.h"
68 #include "dict0load.h"
69 #include "dict0boot.h"
70 #include "dict0stats_bg.h" /* dict_stats_event */
71 #include "srv0start.h"
72 #include "row0mysql.h"
73 #include "ha_prototypes.h"
74 #include "trx0i_s.h"
75 #include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
76 #include "srv0mon.h"
77 #include "ut0crc32.h"
78 
79 #include "mysql/plugin.h"
80 #include "mysql/service_thd_wait.h"
81 
82 #ifdef WITH_WSREP
83 extern int wsrep_debug;
84 extern int wsrep_trx_is_aborting(void *thd_ptr);
85 #endif
86 /* The following is the maximum allowed duration of a lock wait. */
87 UNIV_INTERN ulint	srv_fatal_semaphore_wait_threshold = 600;
88 
89 /* How much data manipulation language (DML) statements need to be delayed,
90 in microseconds, in order to reduce the lagging of the purge thread. */
91 UNIV_INTERN ulint	srv_dml_needed_delay = 0;
92 
93 UNIV_INTERN ibool	srv_monitor_active = FALSE;
94 UNIV_INTERN ibool	srv_error_monitor_active = FALSE;
95 
96 UNIV_INTERN ibool	srv_buf_dump_thread_active = FALSE;
97 
98 UNIV_INTERN ibool	srv_dict_stats_thread_active = FALSE;
99 
100 UNIV_INTERN const char*	srv_main_thread_op_info = "";
101 
102 /** Prefix used by MySQL to indicate pre-5.1 table name encoding */
103 const char		srv_mysql50_table_name_prefix[10] = "#mysql50#";
104 
105 /* Server parameters which are read from the initfile */
106 
107 /* The following three are dir paths which are catenated before file
108 names, where the file name itself may also contain a path */
109 
110 UNIV_INTERN char*	srv_data_home	= NULL;
111 
112 /** Rollback files directory, can be absolute. */
113 UNIV_INTERN char*	srv_undo_dir = NULL;
114 
115 /** The number of tablespaces to use for rollback segments. */
116 UNIV_INTERN ulong	srv_undo_tablespaces = 8;
117 
118 /** The number of UNDO tablespaces that are open and ready to use. */
119 UNIV_INTERN ulint	srv_undo_tablespaces_open = 8;
120 
121 /* The number of rollback segments to use */
122 UNIV_INTERN ulong	srv_undo_logs = 1;
123 
124 #ifdef UNIV_LOG_ARCHIVE
125 UNIV_INTERN char*	srv_arch_dir	= NULL;
126 #endif /* UNIV_LOG_ARCHIVE */
127 
128 /** Set if InnoDB must operate in read-only mode. We don't do any
129 recovery and open all tables in RO mode instead of RW mode. We don't
130 sync the max trx id to disk either. */
131 UNIV_INTERN my_bool	srv_read_only_mode;
132 /** store to its own file each table created by an user; data
133 dictionary tables are in the system tablespace 0 */
134 UNIV_INTERN my_bool	srv_file_per_table;
135 /** The file format to use on new *.ibd files. */
136 UNIV_INTERN ulint	srv_file_format = 0;
137 /** Whether to check file format during startup.  A value of
138 UNIV_FORMAT_MAX + 1 means no checking ie. FALSE.  The default is to
139 set it to the highest format we support. */
140 UNIV_INTERN ulint	srv_max_file_format_at_startup = UNIV_FORMAT_MAX;
141 /** Set if InnoDB operates in read-only mode or innodb-force-recovery
142 is greater than SRV_FORCE_NO_TRX_UNDO. */
143 UNIV_INTERN my_bool	high_level_read_only;
144 
145 #if UNIV_FORMAT_A
146 # error "UNIV_FORMAT_A must be 0!"
147 #endif
148 
149 /** Place locks to records only i.e. do not use next-key locking except
150 on duplicate key checking and foreign key checking */
151 UNIV_INTERN ibool	srv_locks_unsafe_for_binlog = FALSE;
152 /** Sort buffer size in index creation */
153 UNIV_INTERN ulong	srv_sort_buf_size = 1048576;
154 /** Maximum modification log file size for online index creation */
155 UNIV_INTERN unsigned long long	srv_online_max_size;
156 
157 /* If this flag is TRUE, then we will use the native aio of the
158 OS (provided we compiled Innobase with it in), otherwise we will
159 use simulated aio we build below with threads.
160 Currently we support native aio on windows and linux */
161 UNIV_INTERN my_bool	srv_use_native_aio = TRUE;
162 UNIV_INTERN my_bool	srv_numa_interleave = FALSE;
163 
164 #ifdef __WIN__
165 /* Windows native condition variables. We use runtime loading / function
166 pointers, because they are not available on Windows Server 2003 and
167 Windows XP/2000.
168 
169 We use condition for events on Windows if possible, even if os_event
170 resembles Windows kernel event object well API-wise. The reason is
171 performance, kernel objects are heavyweights and WaitForSingleObject() is a
172 performance killer causing calling thread to context switch. Besides, Innodb
173 is preallocating large number (often millions) of os_events. With kernel event
174 objects it takes a big chunk out of non-paged pool, which is better suited
175 for tasks like IO than for storing idle event objects. */
176 UNIV_INTERN ibool	srv_use_native_conditions = FALSE;
177 #endif /* __WIN__ */
178 
179 UNIV_INTERN ulint	srv_n_data_files = 0;
180 UNIV_INTERN char**	srv_data_file_names = NULL;
181 /* size in database pages */
182 UNIV_INTERN ulint*	srv_data_file_sizes = NULL;
183 
184 /* if TRUE, then we auto-extend the last data file */
185 UNIV_INTERN ibool	srv_auto_extend_last_data_file	= FALSE;
186 /* if != 0, this tells the max size auto-extending may increase the
187 last data file size */
188 UNIV_INTERN ulint	srv_last_file_size_max	= 0;
189 /* If the last data file is auto-extended, we add this
190 many pages to it at a time */
191 UNIV_INTERN ulong	srv_auto_extend_increment = 8;
192 UNIV_INTERN ulint*	srv_data_file_is_raw_partition = NULL;
193 
194 /* If the following is TRUE we do not allow inserts etc. This protects
195 the user from forgetting the 'newraw' keyword to my.cnf */
196 
197 UNIV_INTERN ibool	srv_created_new_raw	= FALSE;
198 
199 UNIV_INTERN char*	srv_log_group_home_dir	= NULL;
200 
201 UNIV_INTERN ulong	srv_n_log_files		= SRV_N_LOG_FILES_MAX;
202 /* size in database pages */
203 UNIV_INTERN ib_uint64_t	srv_log_file_size	= IB_UINT64_MAX;
204 UNIV_INTERN ib_uint64_t	srv_log_file_size_requested;
205 /* size in database pages */
206 UNIV_INTERN ulint	srv_log_buffer_size	= ULINT_MAX;
207 UNIV_INTERN ulong	srv_flush_log_at_trx_commit = 1;
208 UNIV_INTERN uint	srv_flush_log_at_timeout = 1;
209 UNIV_INTERN ulong	srv_page_size		= UNIV_PAGE_SIZE_DEF;
210 UNIV_INTERN ulong	srv_page_size_shift	= UNIV_PAGE_SIZE_SHIFT_DEF;
211 
212 /* Try to flush dirty pages so as to avoid IO bursts at
213 the checkpoints. */
214 UNIV_INTERN char	srv_adaptive_flushing	= TRUE;
215 
216 /** Maximum number of times allowed to conditionally acquire
217 mutex before switching to blocking wait on the mutex */
218 #define MAX_MUTEX_NOWAIT	20
219 
220 /** Check whether the number of failed nonblocking mutex
221 acquisition attempts exceeds maximum allowed value. If so,
222 srv_printf_innodb_monitor() will request mutex acquisition
223 with mutex_enter(), which will wait until it gets the mutex. */
224 #define MUTEX_NOWAIT(mutex_skipped)	((mutex_skipped) < MAX_MUTEX_NOWAIT)
225 
226 #ifdef WITH_INNODB_DISALLOW_WRITES
227 UNIV_INTERN os_event_t	srv_allow_writes_event;
228 #endif /* WITH_INNODB_DISALLOW_WRITES */
229 
230 /** The sort order table of the MySQL latin1_swedish_ci character set
231 collation */
232 UNIV_INTERN const byte*	srv_latin1_ordering;
233 
234 /* use os/external memory allocator */
235 UNIV_INTERN my_bool	srv_use_sys_malloc	= TRUE;
236 /* requested size in kilobytes */
237 UNIV_INTERN ulint	srv_buf_pool_size	= ULINT_MAX;
238 /* requested number of buffer pool instances */
239 UNIV_INTERN ulint       srv_buf_pool_instances  = 1;
240 /* number of locks to protect buf_pool->page_hash */
241 UNIV_INTERN ulong	srv_n_page_hash_locks = 16;
242 /** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
243 UNIV_INTERN ulong	srv_LRU_scan_depth	= 1024;
244 /** whether or not to flush neighbors of a block */
245 UNIV_INTERN ulong	srv_flush_neighbors	= 1;
246 /* previously requested size */
247 UNIV_INTERN ulint	srv_buf_pool_old_size;
248 /* current size in kilobytes */
249 UNIV_INTERN ulint	srv_buf_pool_curr_size	= 0;
250 /* size in bytes */
251 UNIV_INTERN ulint	srv_mem_pool_size	= ULINT_MAX;
252 UNIV_INTERN ulint	srv_lock_table_size	= ULINT_MAX;
253 
254 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
255 instead. */
256 UNIV_INTERN ulint	srv_n_file_io_threads	= ULINT_MAX;
257 UNIV_INTERN ulint	srv_n_read_io_threads	= ULINT_MAX;
258 UNIV_INTERN ulint	srv_n_write_io_threads	= ULINT_MAX;
259 
260 /* Switch to enable random read ahead. */
261 UNIV_INTERN my_bool	srv_random_read_ahead	= FALSE;
262 /* User settable value of the number of pages that must be present
263 in the buffer cache and accessed sequentially for InnoDB to trigger a
264 readahead request. */
265 UNIV_INTERN ulong	srv_read_ahead_threshold	= 56;
266 
267 #ifdef UNIV_LOG_ARCHIVE
268 UNIV_INTERN ibool		srv_log_archive_on	= FALSE;
269 UNIV_INTERN ibool		srv_archive_recovery	= 0;
270 UNIV_INTERN ib_uint64_t	srv_archive_recovery_limit_lsn;
271 #endif /* UNIV_LOG_ARCHIVE */
272 
273 /* This parameter is used to throttle the number of insert buffers that are
274 merged in a batch. By increasing this parameter on a faster disk you can
275 possibly reduce the number of I/O operations performed to complete the
276 merge operation. The value of this parameter is used as is by the
277 background loop when the system is idle (low load), on a busy system
278 the parameter is scaled down by a factor of 4, this is to avoid putting
279 a heavier load on the I/O sub system. */
280 
281 UNIV_INTERN ulong	srv_insert_buffer_batch_size = 20;
282 
283 UNIV_INTERN char*	srv_file_flush_method_str = NULL;
284 UNIV_INTERN ulint	srv_unix_file_flush_method = SRV_UNIX_FSYNC;
285 UNIV_INTERN ulint	srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
286 
287 UNIV_INTERN ulint	srv_max_n_open_files	  = 300;
288 
289 /* Number of IO operations per second the server can do */
290 UNIV_INTERN ulong	srv_io_capacity         = 200;
291 UNIV_INTERN ulong	srv_max_io_capacity     = 400;
292 
293 /* The InnoDB main thread tries to keep the ratio of modified pages
294 in the buffer pool to all database pages in the buffer pool smaller than
295 the following number. But it is not guaranteed that the value stays below
296 that during a time of heavy update/insert activity. */
297 
298 UNIV_INTERN ulong	srv_max_buf_pool_modified_pct	= 75;
299 UNIV_INTERN ulong	srv_max_dirty_pages_pct_lwm	= 50;
300 
301 /* This is the percentage of log capacity at which adaptive flushing,
302 if enabled, will kick in. */
303 UNIV_INTERN ulong	srv_adaptive_flushing_lwm	= 10;
304 
305 /* Number of iterations over which adaptive flushing is averaged. */
306 UNIV_INTERN ulong	srv_flushing_avg_loops		= 30;
307 
308 /* The number of purge threads to use.*/
309 UNIV_INTERN ulong	srv_n_purge_threads = 1;
310 
311 /* the number of pages to purge in one batch */
312 UNIV_INTERN ulong	srv_purge_batch_size = 20;
313 
314 /* Internal setting for "innodb_stats_method". Decides how InnoDB treats
315 NULL value when collecting statistics. By default, it is set to
316 SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
317 UNIV_INTERN ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
318 
319 UNIV_INTERN srv_stats_t	srv_stats;
320 
321 /* structure to pass status variables to MySQL */
322 UNIV_INTERN export_var_t export_vars;
323 
324 /** Normally 0. When nonzero, skip some phases of crash recovery,
325 starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered
326 by SELECT or mysqldump. When this is nonzero, we do not allow any user
327 modifications to the data. */
328 UNIV_INTERN ulong	srv_force_recovery;
329 #ifndef DBUG_OFF
330 /** Inject a crash at different steps of the recovery process.
331 This is for testing and debugging only. */
332 UNIV_INTERN ulong	srv_force_recovery_crash;
333 #endif /* !DBUG_OFF */
334 
335 /** Print all user-level transactions deadlocks to mysqld stderr */
336 
337 UNIV_INTERN my_bool	srv_print_all_deadlocks = FALSE;
338 
339 /** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */
340 UNIV_INTERN my_bool	srv_cmp_per_index_enabled = FALSE;
341 
342 /* If the following is set to 1 then we do not run purge and insert buffer
343 merge to completion before shutdown. If it is set to 2, do not even flush the
344 buffer pool to data files at the shutdown: we effectively 'crash'
345 InnoDB (but lose no committed transactions). */
346 UNIV_INTERN ulint	srv_fast_shutdown	= 0;
347 
348 /* Generate a innodb_status.<pid> file */
349 UNIV_INTERN ibool	srv_innodb_status	= FALSE;
350 
351 /* When estimating number of different key values in an index, sample
352 this many index pages, there are 2 ways to calculate statistics:
353 * persistent stats that are calculated by ANALYZE TABLE and saved
354   in the innodb database.
355 * quick transient stats, that are used if persistent stats for the given
356   table/index are not found in the innodb database */
357 UNIV_INTERN unsigned long long	srv_stats_transient_sample_pages = 8;
358 UNIV_INTERN my_bool		srv_stats_persistent = TRUE;
359 UNIV_INTERN my_bool		srv_stats_include_delete_marked = FALSE;
360 UNIV_INTERN unsigned long long	srv_stats_persistent_sample_pages = 20;
361 UNIV_INTERN my_bool		srv_stats_auto_recalc = TRUE;
362 
363 UNIV_INTERN ibool	srv_use_doublewrite_buf	= TRUE;
364 
365 /** doublewrite buffer is 1MB is size i.e.: it can hold 128 16K pages.
366 The following parameter is the size of the buffer that is used for
367 batch flushing i.e.: LRU flushing and flush_list flushing. The rest
368 of the pages are used for single page flushing. */
369 UNIV_INTERN ulong	srv_doublewrite_batch_size	= 120;
370 
371 UNIV_INTERN ulong	srv_replication_delay		= 0;
372 
373 /*-------------------------------------------*/
374 UNIV_INTERN ulong	srv_n_spin_wait_rounds	= 30;
375 UNIV_INTERN ulong	srv_spin_wait_delay	= 6;
376 UNIV_INTERN ibool	srv_priority_boost	= TRUE;
377 
378 #ifdef UNIV_DEBUG
379 UNIV_INTERN ibool	srv_print_thread_releases	= FALSE;
380 UNIV_INTERN ibool	srv_print_lock_waits		= FALSE;
381 UNIV_INTERN ibool	srv_print_buf_io		= FALSE;
382 UNIV_INTERN ibool	srv_print_log_io		= FALSE;
383 UNIV_INTERN ibool	srv_print_latch_waits		= FALSE;
384 #endif /* UNIV_DEBUG */
385 
386 static ulint		srv_n_rows_inserted_old		= 0;
387 static ulint		srv_n_rows_updated_old		= 0;
388 static ulint		srv_n_rows_deleted_old		= 0;
389 static ulint		srv_n_rows_read_old		= 0;
390 
391 UNIV_INTERN ulint	srv_truncated_status_writes	= 0;
392 UNIV_INTERN ulint	srv_available_undo_logs         = 0;
393 
394 /* Set the following to 0 if you want InnoDB to write messages on
395 stderr on startup/shutdown. */
396 UNIV_INTERN ibool	srv_print_verbose_log		= TRUE;
397 UNIV_INTERN my_bool	srv_print_innodb_monitor	= FALSE;
398 UNIV_INTERN my_bool	srv_print_innodb_lock_monitor	= FALSE;
399 UNIV_INTERN ibool	srv_print_innodb_tablespace_monitor = FALSE;
400 UNIV_INTERN ibool	srv_print_innodb_table_monitor = FALSE;
401 
402 /* Array of English strings describing the current state of an
403 i/o handler thread */
404 
405 UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
406 UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
407 
408 UNIV_INTERN time_t	srv_last_monitor_time;
409 
410 UNIV_INTERN ib_mutex_t	srv_innodb_monitor_mutex;
411 
412 /* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
413 UNIV_INTERN ib_mutex_t	srv_monitor_file_mutex;
414 
415 #ifdef UNIV_PFS_MUTEX
416 # ifndef HAVE_ATOMIC_BUILTINS
417 /* Key to register server_mutex with performance schema */
418 UNIV_INTERN mysql_pfs_key_t	server_mutex_key;
419 # endif /* !HAVE_ATOMIC_BUILTINS */
420 /** Key to register srv_innodb_monitor_mutex with performance schema */
421 UNIV_INTERN mysql_pfs_key_t	srv_innodb_monitor_mutex_key;
422 /** Key to register srv_monitor_file_mutex with performance schema */
423 UNIV_INTERN mysql_pfs_key_t	srv_monitor_file_mutex_key;
424 /** Key to register srv_dict_tmpfile_mutex with performance schema */
425 UNIV_INTERN mysql_pfs_key_t	srv_dict_tmpfile_mutex_key;
426 /** Key to register the mutex with performance schema */
427 UNIV_INTERN mysql_pfs_key_t	srv_misc_tmpfile_mutex_key;
428 /** Key to register srv_sys_t::mutex with performance schema */
429 UNIV_INTERN mysql_pfs_key_t	srv_sys_mutex_key;
430 /** Key to register srv_sys_t::tasks_mutex with performance schema */
431 UNIV_INTERN mysql_pfs_key_t	srv_sys_tasks_mutex_key;
432 #endif /* UNIV_PFS_MUTEX */
433 
434 /** Temporary file for innodb monitor output */
435 UNIV_INTERN FILE*	srv_monitor_file;
436 /** Mutex for locking srv_dict_tmpfile. Not created if srv_read_only_mode.
437 This mutex has a very high rank; threads reserving it should not
438 be holding any InnoDB latches. */
439 UNIV_INTERN ib_mutex_t	srv_dict_tmpfile_mutex;
440 /** Temporary file for output from the data dictionary */
441 UNIV_INTERN FILE*	srv_dict_tmpfile;
442 /** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode.
443 This mutex has a very low rank; threads reserving it should not
444 acquire any further latches or sleep before releasing this one. */
445 UNIV_INTERN ib_mutex_t	srv_misc_tmpfile_mutex;
446 /** Temporary file for miscellanous diagnostic output */
447 UNIV_INTERN FILE*	srv_misc_tmpfile;
448 
449 UNIV_INTERN ulint	srv_main_thread_process_no	= 0;
450 UNIV_INTERN ulint	srv_main_thread_id		= 0;
451 
452 /* The following counts are used by the srv_master_thread. */
453 
454 /** Iterations of the loop bounded by 'srv_active' label. */
455 static ulint		srv_main_active_loops		= 0;
456 /** Iterations of the loop bounded by the 'srv_idle' label. */
457 static ulint		srv_main_idle_loops		= 0;
458 /** Iterations of the loop bounded by the 'srv_shutdown' label. */
459 static ulint		srv_main_shutdown_loops		= 0;
460 /** Log writes involving flush. */
461 static ulint		srv_log_writes_and_flush	= 0;
462 
463 /* This is only ever touched by the master thread. It records the
464 time when the last flush of log file has happened. The master
465 thread ensures that we flush the log files at least once per
466 second. */
467 static time_t	srv_last_log_flush_time;
468 
469 /* Interval in seconds at which various tasks are performed by the
470 master thread when server is active. In order to balance the workload,
471 we should try to keep intervals such that they are not multiple of
472 each other. For example, if we have intervals for various tasks
473 defined as 5, 10, 15, 60 then all tasks will be performed when
474 current_time % 60 == 0 and no tasks will be performed when
475 current_time % 5 != 0. */
476 
477 # define	SRV_MASTER_CHECKPOINT_INTERVAL		(7)
478 # define	SRV_MASTER_PURGE_INTERVAL		(10)
479 #ifdef MEM_PERIODIC_CHECK
480 # define	SRV_MASTER_MEM_VALIDATE_INTERVAL	(13)
481 #endif /* MEM_PERIODIC_CHECK */
482 # define	SRV_MASTER_DICT_LRU_INTERVAL		(47)
483 
484 /** Acquire the system_mutex. */
485 #define srv_sys_mutex_enter() do {			\
486 	mutex_enter(&srv_sys->mutex);			\
487 } while (0)
488 
489 /** Test if the system mutex is owned. */
490 #define srv_sys_mutex_own() (mutex_own(&srv_sys->mutex)	\
491 			     && !srv_read_only_mode)
492 
493 /** Release the system mutex. */
494 #define srv_sys_mutex_exit() do {			\
495 	mutex_exit(&srv_sys->mutex);			\
496 } while (0)
497 
498 #define fetch_lock_wait_timeout(trx)			\
499 	((trx)->lock.allowed_to_wait			\
500 	 ? thd_lock_wait_timeout((trx)->mysql_thd)	\
501 	 : 0)
502 
503 /*
504 	IMPLEMENTATION OF THE SERVER MAIN PROGRAM
505 	=========================================
506 
507 There is the following analogue between this database
508 server and an operating system kernel:
509 
510 DB concept			equivalent OS concept
511 ----------			---------------------
512 transaction		--	process;
513 
514 query thread		--	thread;
515 
516 lock			--	semaphore;
517 
518 kernel			--	kernel;
519 
520 query thread execution:
521 (a) without lock mutex
522 reserved		--	process executing in user mode;
523 (b) with lock mutex reserved
524 			--	process executing in kernel mode;
525 
526 The server has several backgroind threads all running at the same
527 priority as user threads. It periodically checks if here is anything
528 happening in the server which requires intervention of the master
529 thread. Such situations may be, for example, when flushing of dirty
530 blocks is needed in the buffer pool or old version of database rows
531 have to be cleaned away (purged). The user can configure a separate
532 dedicated purge thread(s) too, in which case the master thread does not
533 do any purging.
534 
535 The threads which we call user threads serve the queries of the MySQL
536 server. They run at normal priority.
537 
538 When there is no activity in the system, also the master thread
539 suspends itself to wait for an event making the server totally silent.
540 
541 There is still one complication in our server design. If a
542 background utility thread obtains a resource (e.g., mutex) needed by a user
543 thread, and there is also some other user activity in the system,
544 the user thread may have to wait indefinitely long for the
545 resource, as the OS does not schedule a background thread if
546 there is some other runnable user thread. This problem is called
547 priority inversion in real-time programming.
548 
549 One solution to the priority inversion problem would be to keep record
550 of which thread owns which resource and in the above case boost the
551 priority of the background thread so that it will be scheduled and it
552 can release the resource.  This solution is called priority inheritance
553 in real-time programming.  A drawback of this solution is that the overhead
554 of acquiring a mutex increases slightly, maybe 0.2 microseconds on a 100
555 MHz Pentium, because the thread has to call os_thread_get_curr_id.  This may
556 be compared to 0.5 microsecond overhead for a mutex lock-unlock pair. Note
557 that the thread cannot store the information in the resource , say mutex,
558 itself, because competing threads could wipe out the information if it is
559 stored before acquiring the mutex, and if it stored afterwards, the
560 information is outdated for the time of one machine instruction, at least.
561 (To be precise, the information could be stored to lock_word in mutex if
562 the machine supports atomic swap.)
563 
564 The above solution with priority inheritance may become actual in the
565 future, currently we do not implement any priority twiddling solution.
566 Our general aim is to reduce the contention of all mutexes by making
567 them more fine grained.
568 
569 The thread table contains information of the current status of each
570 thread existing in the system, and also the event semaphores used in
571 suspending the master thread and utility threads when they have nothing
572 to do.  The thread table can be seen as an analogue to the process table
573 in a traditional Unix implementation. */
574 
575 /** The server system struct */
576 struct srv_sys_t{
577 	ib_mutex_t	tasks_mutex;		/*!< variable protecting the
578 						tasks queue */
579 	UT_LIST_BASE_NODE_T(que_thr_t)
580 			tasks;			/*!< task queue */
581 
582 	ib_mutex_t	mutex;			/*!< variable protecting the
583 						fields below. */
584 	ulint		n_sys_threads;		/*!< size of the sys_threads
585 						array */
586 
587 	srv_slot_t*	sys_threads;		/*!< server thread table */
588 
589 	ulint		n_threads_active[SRV_MASTER + 1];
590 						/*!< number of threads active
591 						in a thread class */
592 
593 	srv_stats_t::ulint_ctr_1_t
594 			activity_count;		/*!< For tracking server
595 						activity */
596 };
597 
598 #ifndef HAVE_ATOMIC_BUILTINS
599 /** Mutex protecting some server global variables. */
600 UNIV_INTERN ib_mutex_t	server_mutex;
601 #endif /* !HAVE_ATOMIC_BUILTINS */
602 
603 static srv_sys_t*	srv_sys	= NULL;
604 
605 /** Event to signal the monitor thread. */
606 UNIV_INTERN os_event_t	srv_monitor_event;
607 
608 /** Event to signal the error thread */
609 UNIV_INTERN os_event_t	srv_error_event;
610 
611 /** Event to signal the buffer pool dump/load thread */
612 UNIV_INTERN os_event_t	srv_buf_dump_event;
613 
614 /** The buffer pool dump/load file name */
615 UNIV_INTERN char*	srv_buf_dump_filename;
616 
617 /** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
618 and/or load it during startup. */
619 UNIV_INTERN char	srv_buffer_pool_dump_at_shutdown = FALSE;
620 UNIV_INTERN char	srv_buffer_pool_load_at_startup = FALSE;
621 
622 /** Slot index in the srv_sys->sys_threads array for the purge thread. */
623 static const ulint	SRV_PURGE_SLOT	= 1;
624 
625 /** Slot index in the srv_sys->sys_threads array for the master thread. */
626 static const ulint	SRV_MASTER_SLOT = 0;
627 
628 /*********************************************************************//**
629 Prints counters for work done by srv_master_thread. */
630 static
631 void
srv_print_master_thread_info(FILE * file)632 srv_print_master_thread_info(
633 /*=========================*/
634 	FILE  *file)    /* in: output stream */
635 {
636 	fprintf(file, "srv_master_thread loops: %lu srv_active, "
637 		"%lu srv_shutdown, %lu srv_idle\n",
638 		srv_main_active_loops,
639 		srv_main_shutdown_loops,
640 		srv_main_idle_loops);
641 	fprintf(file, "srv_master_thread log flush and writes: %lu\n",
642 		srv_log_writes_and_flush);
643 }
644 
645 /*********************************************************************//**
646 Sets the info describing an i/o thread current state. */
647 UNIV_INTERN
648 void
srv_set_io_thread_op_info(ulint i,const char * str)649 srv_set_io_thread_op_info(
650 /*======================*/
651 	ulint		i,	/*!< in: the 'segment' of the i/o thread */
652 	const char*	str)	/*!< in: constant char string describing the
653 				state */
654 {
655 	ut_a(i < SRV_MAX_N_IO_THREADS);
656 
657 	srv_io_thread_op_info[i] = str;
658 }
659 
660 /*********************************************************************//**
661 Resets the info describing an i/o thread current state. */
662 UNIV_INTERN
663 void
srv_reset_io_thread_op_info()664 srv_reset_io_thread_op_info()
665 /*=========================*/
666 {
667 	for (ulint i = 0; i < UT_ARR_SIZE(srv_io_thread_op_info); ++i) {
668 		srv_io_thread_op_info[i] = "not started yet";
669 	}
670 }
671 
672 #ifdef UNIV_DEBUG
673 /*********************************************************************//**
674 Validates the type of a thread table slot.
675 @return TRUE if ok */
676 static
677 ibool
srv_thread_type_validate(srv_thread_type type)678 srv_thread_type_validate(
679 /*=====================*/
680 	srv_thread_type	type)	/*!< in: thread type */
681 {
682 	switch (type) {
683 	case SRV_NONE:
684 		break;
685 	case SRV_WORKER:
686 	case SRV_PURGE:
687 	case SRV_MASTER:
688 		return(TRUE);
689 	}
690 	ut_error;
691 	return(FALSE);
692 }
693 #endif /* UNIV_DEBUG */
694 
695 /*********************************************************************//**
696 Gets the type of a thread table slot.
697 @return thread type */
698 static
699 srv_thread_type
srv_slot_get_type(const srv_slot_t * slot)700 srv_slot_get_type(
701 /*==============*/
702 	const srv_slot_t*	slot)	/*!< in: thread slot */
703 {
704 	srv_thread_type	type = slot->type;
705 	ut_ad(srv_thread_type_validate(type));
706 	return(type);
707 }
708 
709 /*********************************************************************//**
710 Reserves a slot in the thread table for the current thread.
711 @return	reserved slot */
712 static
713 srv_slot_t*
srv_reserve_slot(srv_thread_type type)714 srv_reserve_slot(
715 /*=============*/
716 	srv_thread_type	type)	/*!< in: type of the thread */
717 {
718 	srv_slot_t*	slot = 0;
719 
720 	srv_sys_mutex_enter();
721 
722 	ut_ad(srv_thread_type_validate(type));
723 
724 	switch (type) {
725 	case SRV_MASTER:
726 		slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
727 		break;
728 
729 	case SRV_PURGE:
730 		slot = &srv_sys->sys_threads[SRV_PURGE_SLOT];
731 		break;
732 
733 	case SRV_WORKER:
734 		/* Find an empty slot, skip the master and purge slots. */
735 		for (slot = &srv_sys->sys_threads[2];
736 		     slot->in_use;
737 		     ++slot) {
738 
739 			ut_a(slot < &srv_sys->sys_threads[
740 			     srv_sys->n_sys_threads]);
741 		}
742 		break;
743 
744 	case SRV_NONE:
745 		ut_error;
746 	}
747 
748 	ut_a(!slot->in_use);
749 
750 	slot->in_use = TRUE;
751 	slot->suspended = FALSE;
752 	slot->type = type;
753 
754 	ut_ad(srv_slot_get_type(slot) == type);
755 
756 	++srv_sys->n_threads_active[type];
757 
758 	srv_sys_mutex_exit();
759 
760 	return(slot);
761 }
762 
763 /*********************************************************************//**
764 Suspends the calling thread to wait for the event in its thread slot.
765 @return the current signal count of the event. */
766 static
767 ib_int64_t
srv_suspend_thread_low(srv_slot_t * slot)768 srv_suspend_thread_low(
769 /*===================*/
770 	srv_slot_t*	slot)	/*!< in/out: thread slot */
771 {
772 
773 	ut_ad(!srv_read_only_mode);
774 	ut_ad(srv_sys_mutex_own());
775 
776 	ut_ad(slot->in_use);
777 
778 	srv_thread_type	type = srv_slot_get_type(slot);
779 
780 	switch (type) {
781 	case SRV_NONE:
782 		ut_error;
783 
784 	case SRV_MASTER:
785 		/* We have only one master thread and it
786 		should be the first entry always. */
787 		ut_a(srv_sys->n_threads_active[type] == 1);
788 		break;
789 
790 	case SRV_PURGE:
791 		/* We have only one purge coordinator thread
792 		and it should be the second entry always. */
793 		ut_a(srv_sys->n_threads_active[type] == 1);
794 		break;
795 
796 	case SRV_WORKER:
797 		ut_a(srv_n_purge_threads > 1);
798 		ut_a(srv_sys->n_threads_active[type] > 0);
799 		break;
800 	}
801 
802 	ut_a(!slot->suspended);
803 	slot->suspended = TRUE;
804 
805 	ut_a(srv_sys->n_threads_active[type] > 0);
806 
807 	srv_sys->n_threads_active[type]--;
808 
809 	return(os_event_reset(slot->event));
810 }
811 
812 /*********************************************************************//**
813 Suspends the calling thread to wait for the event in its thread slot.
814 @return the current signal count of the event. */
815 static
816 ib_int64_t
srv_suspend_thread(srv_slot_t * slot)817 srv_suspend_thread(
818 /*===============*/
819 	srv_slot_t*	slot)	/*!< in/out: thread slot */
820 {
821 	srv_sys_mutex_enter();
822 
823 	ib_int64_t	sig_count = srv_suspend_thread_low(slot);
824 
825 	srv_sys_mutex_exit();
826 
827 	return(sig_count);
828 }
829 
830 /*********************************************************************//**
831 Releases threads of the type given from suspension in the thread table.
832 NOTE! The server mutex has to be reserved by the caller!
833 @return number of threads released: this may be less than n if not
834         enough threads were suspended at the moment. */
835 UNIV_INTERN
836 ulint
srv_release_threads(srv_thread_type type,ulint n)837 srv_release_threads(
838 /*================*/
839 	srv_thread_type	type,	/*!< in: thread type */
840 	ulint		n)	/*!< in: number of threads to release */
841 {
842 	ulint		i;
843 	ulint		count	= 0;
844 
845 	ut_ad(srv_thread_type_validate(type));
846 	ut_ad(n > 0);
847 
848 	srv_sys_mutex_enter();
849 
850 	for (i = 0; i < srv_sys->n_sys_threads; i++) {
851 		srv_slot_t*	slot;
852 
853 		slot = &srv_sys->sys_threads[i];
854 
855 		if (slot->in_use
856 		    && srv_slot_get_type(slot) == type
857 		    && slot->suspended) {
858 
859 			switch (type) {
860 			case SRV_NONE:
861 				ut_error;
862 
863 			case SRV_MASTER:
864 				/* We have only one master thread and it
865 				should be the first entry always. */
866 				ut_a(n == 1);
867 				ut_a(i == SRV_MASTER_SLOT);
868 				ut_a(srv_sys->n_threads_active[type] == 0);
869 				break;
870 
871 			case SRV_PURGE:
872 				/* We have only one purge coordinator thread
873 				and it should be the second entry always. */
874 				ut_a(n == 1);
875 				ut_a(i == SRV_PURGE_SLOT);
876 				ut_a(srv_n_purge_threads > 0);
877 				ut_a(srv_sys->n_threads_active[type] == 0);
878 				break;
879 
880 			case SRV_WORKER:
881 				ut_a(srv_n_purge_threads > 1);
882 				ut_a(srv_sys->n_threads_active[type]
883 				     < srv_n_purge_threads - 1);
884 				break;
885 			}
886 
887 			slot->suspended = FALSE;
888 
889 			++srv_sys->n_threads_active[type];
890 
891 			os_event_set(slot->event);
892 
893 			if (++count == n) {
894 				break;
895 			}
896 		}
897 	}
898 
899 	srv_sys_mutex_exit();
900 
901 	return(count);
902 }
903 
904 /*********************************************************************//**
905 Release a thread's slot. */
906 static
907 void
srv_free_slot(srv_slot_t * slot)908 srv_free_slot(
909 /*==========*/
910 	srv_slot_t*	slot)	/*!< in/out: thread slot */
911 {
912 	srv_sys_mutex_enter();
913 
914 	if (!slot->suspended) {
915 		/* Mark the thread as inactive. */
916 		srv_suspend_thread_low(slot);
917 	}
918 
919 	/* Free the slot for reuse. */
920 	ut_ad(slot->in_use);
921 	slot->in_use = FALSE;
922 
923 	srv_sys_mutex_exit();
924 }
925 
926 /*********************************************************************//**
927 Initializes the server. */
928 UNIV_INTERN
929 void
srv_init(void)930 srv_init(void)
931 /*==========*/
932 {
933 	ulint	n_sys_threads = 0;
934 	ulint	srv_sys_sz = sizeof(*srv_sys);
935 
936 #ifndef HAVE_ATOMIC_BUILTINS
937 	mutex_create(server_mutex_key, &server_mutex, SYNC_ANY_LATCH);
938 #endif /* !HAVE_ATOMIC_BUILTINS */
939 
940 	mutex_create(srv_innodb_monitor_mutex_key,
941 		     &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
942 
943 	if (!srv_read_only_mode) {
944 
945 		/* Number of purge threads + master thread */
946 		n_sys_threads = srv_n_purge_threads + 1;
947 
948 		srv_sys_sz += n_sys_threads * sizeof(*srv_sys->sys_threads);
949 	}
950 
951 	srv_sys = static_cast<srv_sys_t*>(mem_zalloc(srv_sys_sz));
952 
953 	srv_sys->n_sys_threads = n_sys_threads;
954 
955 	if (!srv_read_only_mode) {
956 
957 		mutex_create(srv_sys_mutex_key, &srv_sys->mutex, SYNC_THREADS);
958 
959 		mutex_create(srv_sys_tasks_mutex_key,
960 			     &srv_sys->tasks_mutex, SYNC_ANY_LATCH);
961 
962 		srv_sys->sys_threads = (srv_slot_t*) &srv_sys[1];
963 
964 		for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
965 			srv_slot_t*	slot = &srv_sys->sys_threads[i];
966 
967 			slot->event = os_event_create();
968 
969 			ut_a(slot->event);
970 		}
971 
972 		srv_error_event = os_event_create();
973 
974 		srv_monitor_event = os_event_create();
975 
976 		srv_buf_dump_event = os_event_create();
977 
978 		UT_LIST_INIT(srv_sys->tasks);
979 	}
980 
981 	/* page_zip_stat_per_index_mutex is acquired from:
982 	1. page_zip_compress() (after SYNC_FSP)
983 	2. page_zip_decompress()
984 	3. i_s_cmp_per_index_fill_low() (where SYNC_DICT is acquired)
985 	4. innodb_cmp_per_index_update(), no other latches
986 	since we do not acquire any other latches while holding this mutex,
987 	it can have very low level. We pick SYNC_ANY_LATCH for it. */
988 
989 	mutex_create(
990 		page_zip_stat_per_index_mutex_key,
991 		&page_zip_stat_per_index_mutex, SYNC_ANY_LATCH);
992 
993 	/* Create dummy indexes for infimum and supremum records */
994 
995 	dict_ind_init();
996 
997 	srv_conc_init();
998 
999 #ifdef WITH_INNODB_DISALLOW_WRITES
1000 	/* Writes have to be enabled on init or else we hang. Thus, we
1001 	always set the event here regardless of innobase_disallow_writes.
1002 	That flag will always be 0 at this point because it isn't settable
1003 	via my.cnf or command line arg. */
1004 	srv_allow_writes_event = os_event_create();
1005 	os_event_set(srv_allow_writes_event);
1006 #endif /* WITH_INNODB_DISALLOW_WRITES */
1007 	/* Initialize some INFORMATION SCHEMA internal structures */
1008 	trx_i_s_cache_init(trx_i_s_cache);
1009 
1010 	ut_crc32_init();
1011 
1012 	dict_mem_init();
1013 }
1014 
1015 /*********************************************************************//**
1016 Frees the data structures created in srv_init(). */
1017 UNIV_INTERN
1018 void
srv_free(void)1019 srv_free(void)
1020 /*==========*/
1021 {
1022 	srv_conc_free();
1023 
1024 	/* The mutexes srv_sys->mutex and srv_sys->tasks_mutex should have
1025 	been freed by sync_close() already. */
1026 	mem_free(srv_sys);
1027 	srv_sys = NULL;
1028 
1029 	trx_i_s_cache_free(trx_i_s_cache);
1030 
1031 	if (!srv_read_only_mode) {
1032 		os_event_free(srv_buf_dump_event);
1033 		srv_buf_dump_event = NULL;
1034 	}
1035 }
1036 
1037 /*********************************************************************//**
1038 Initializes the synchronization primitives, memory system, and the thread
1039 local storage. */
1040 UNIV_INTERN
1041 void
srv_general_init(void)1042 srv_general_init(void)
1043 /*==================*/
1044 {
1045 	ut_mem_init();
1046 	/* Reset the system variables in the recovery module. */
1047 	recv_sys_var_init();
1048 	os_sync_init();
1049 	sync_init();
1050 	mem_init(srv_mem_pool_size);
1051 	que_init();
1052 	row_mysql_init();
1053 }
1054 
1055 /*********************************************************************//**
1056 Normalizes init parameter values to use units we use inside InnoDB. */
1057 static
1058 void
srv_normalize_init_values(void)1059 srv_normalize_init_values(void)
1060 /*===========================*/
1061 {
1062 	ulint	n;
1063 	ulint	i;
1064 
1065 	n = srv_n_data_files;
1066 
1067 	for (i = 0; i < n; i++) {
1068 		srv_data_file_sizes[i] = srv_data_file_sizes[i]
1069 			* ((1024 * 1024) / UNIV_PAGE_SIZE);
1070 	}
1071 
1072 	srv_last_file_size_max = srv_last_file_size_max
1073 		* ((1024 * 1024) / UNIV_PAGE_SIZE);
1074 
1075 	srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
1076 
1077 	srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
1078 
1079 	srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
1080 }
1081 
1082 /*********************************************************************//**
1083 Boots the InnoDB server. */
1084 UNIV_INTERN
1085 void
srv_boot(void)1086 srv_boot(void)
1087 /*==========*/
1088 {
1089 	/* Transform the init parameter values given by MySQL to
1090 	use units we use inside InnoDB: */
1091 
1092 	srv_normalize_init_values();
1093 
1094 	/* Initialize synchronization primitives, memory management, and thread
1095 	local storage */
1096 
1097 	srv_general_init();
1098 
1099 	/* Initialize this module */
1100 
1101 	srv_init();
1102 	srv_mon_create();
1103 }
1104 
1105 /******************************************************************//**
1106 Refreshes the values used to calculate per-second averages. */
1107 static
1108 void
srv_refresh_innodb_monitor_stats(void)1109 srv_refresh_innodb_monitor_stats(void)
1110 /*==================================*/
1111 {
1112 	mutex_enter(&srv_innodb_monitor_mutex);
1113 
1114 	srv_last_monitor_time = time(NULL);
1115 
1116 	os_aio_refresh_stats();
1117 
1118 	btr_cur_n_sea_old = btr_cur_n_sea;
1119 	btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1120 
1121 	log_refresh_stats();
1122 
1123 	buf_refresh_io_stats_all();
1124 
1125 	srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
1126 	srv_n_rows_updated_old = srv_stats.n_rows_updated;
1127 	srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
1128 	srv_n_rows_read_old = srv_stats.n_rows_read;
1129 
1130 	mutex_exit(&srv_innodb_monitor_mutex);
1131 }
1132 
1133 /******************************************************************//**
1134 Outputs to a file the output of the InnoDB Monitor.
1135 @return FALSE if not all information printed
1136 due to failure to obtain necessary mutex */
1137 UNIV_INTERN
1138 ibool
srv_printf_innodb_monitor(FILE * file,ibool nowait,ulint * trx_start_pos,ulint * trx_end)1139 srv_printf_innodb_monitor(
1140 /*======================*/
1141 	FILE*	file,		/*!< in: output stream */
1142 	ibool	nowait,		/*!< in: whether to wait for the
1143 				lock_sys_t:: mutex */
1144 	ulint*	trx_start_pos,	/*!< out: file position of the start of
1145 				the list of active transactions */
1146 	ulint*	trx_end)	/*!< out: file position of the end of
1147 				the list of active transactions */
1148 {
1149 	double	time_elapsed;
1150 	time_t	current_time;
1151 	ulint	n_reserved;
1152 	ibool	ret;
1153 
1154 	mutex_enter(&srv_innodb_monitor_mutex);
1155 
1156 	current_time = time(NULL);
1157 
1158 	/* We add 0.001 seconds to time_elapsed to prevent division
1159 	by zero if two users happen to call SHOW ENGINE INNODB STATUS at the
1160 	same time */
1161 
1162 	time_elapsed = difftime(current_time, srv_last_monitor_time)
1163 		+ 0.001;
1164 
1165 	srv_last_monitor_time = time(NULL);
1166 
1167 	fputs("\n=====================================\n", file);
1168 
1169 	ut_print_timestamp(file);
1170 	fprintf(file,
1171 		" INNODB MONITOR OUTPUT\n"
1172 		"=====================================\n"
1173 		"Per second averages calculated from the last %lu seconds\n",
1174 		(ulong) time_elapsed);
1175 
1176 	fputs("-----------------\n"
1177 	      "BACKGROUND THREAD\n"
1178 	      "-----------------\n", file);
1179 	srv_print_master_thread_info(file);
1180 
1181 	fputs("----------\n"
1182 	      "SEMAPHORES\n"
1183 	      "----------\n", file);
1184 	sync_print(file);
1185 
1186 	/* Conceptually, srv_innodb_monitor_mutex has a very high latching
1187 	order level in sync0sync.h, while dict_foreign_err_mutex has a very
1188 	low level 135. Therefore we can reserve the latter mutex here without
1189 	a danger of a deadlock of threads. */
1190 
1191 	mutex_enter(&dict_foreign_err_mutex);
1192 
1193 	if (!srv_read_only_mode && ftell(dict_foreign_err_file) != 0L) {
1194 		fputs("------------------------\n"
1195 		      "LATEST FOREIGN KEY ERROR\n"
1196 		      "------------------------\n", file);
1197 		ut_copy_file(file, dict_foreign_err_file);
1198 	}
1199 
1200 	mutex_exit(&dict_foreign_err_mutex);
1201 
1202 	/* Only if lock_print_info_summary proceeds correctly,
1203 	before we call the lock_print_info_all_transactions
1204 	to print all the lock information. IMPORTANT NOTE: This
1205 	function acquires the lock mutex on success. */
1206 	ret = lock_print_info_summary(file, nowait);
1207 
1208 	if (ret) {
1209 		if (trx_start_pos) {
1210 			long	t = ftell(file);
1211 			if (t < 0) {
1212 				*trx_start_pos = ULINT_UNDEFINED;
1213 			} else {
1214 				*trx_start_pos = (ulint) t;
1215 			}
1216 		}
1217 
1218 		/* NOTE: If we get here then we have the lock mutex. This
1219 		function will release the lock mutex that we acquired when
1220 		we called the lock_print_info_summary() function earlier. */
1221 
1222 		lock_print_info_all_transactions(file);
1223 
1224 		if (trx_end) {
1225 			long	t = ftell(file);
1226 			if (t < 0) {
1227 				*trx_end = ULINT_UNDEFINED;
1228 			} else {
1229 				*trx_end = (ulint) t;
1230 			}
1231 		}
1232 	}
1233 
1234 	fputs("--------\n"
1235 	      "FILE I/O\n"
1236 	      "--------\n", file);
1237 	os_aio_print(file);
1238 
1239 	fputs("-------------------------------------\n"
1240 	      "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1241 	      "-------------------------------------\n", file);
1242 	ibuf_print(file);
1243 
1244 	ha_print_info(file, btr_search_sys->hash_index);
1245 
1246 	fprintf(file,
1247 		"%.2f hash searches/s, %.2f non-hash searches/s\n",
1248 		(btr_cur_n_sea - btr_cur_n_sea_old)
1249 		/ time_elapsed,
1250 		(btr_cur_n_non_sea - btr_cur_n_non_sea_old)
1251 		/ time_elapsed);
1252 	btr_cur_n_sea_old = btr_cur_n_sea;
1253 	btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1254 
1255 	fputs("---\n"
1256 	      "LOG\n"
1257 	      "---\n", file);
1258 	log_print(file);
1259 
1260 	fputs("----------------------\n"
1261 	      "BUFFER POOL AND MEMORY\n"
1262 	      "----------------------\n", file);
1263 	fprintf(file,
1264 		"Total memory allocated " ULINTPF
1265 		"; in additional pool allocated " ULINTPF "\n",
1266 		ut_total_allocated_memory,
1267 		mem_pool_get_reserved(mem_comm_pool));
1268 	fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
1269 		dict_sys->size);
1270 
1271 	buf_print_io(file);
1272 
1273 	fputs("--------------\n"
1274 	      "ROW OPERATIONS\n"
1275 	      "--------------\n", file);
1276 	fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
1277 		(long) srv_conc_get_active_threads(),
1278 		srv_conc_get_waiting_threads());
1279 
1280 	/* This is a dirty read, without holding trx_sys->mutex. */
1281 	fprintf(file, "%lu read views open inside InnoDB\n",
1282 		UT_LIST_GET_LEN(trx_sys->view_list));
1283 
1284 	n_reserved = fil_space_get_n_reserved_extents(0);
1285 	if (n_reserved > 0) {
1286 		fprintf(file,
1287 			"%lu tablespace extents now reserved for"
1288 			" B-tree split operations\n",
1289 			(ulong) n_reserved);
1290 	}
1291 
1292 #ifdef UNIV_LINUX
1293 	fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
1294 		(ulong) srv_main_thread_process_no,
1295 		(ulong) srv_main_thread_id,
1296 		srv_main_thread_op_info);
1297 #else
1298 	fprintf(file, "Main thread id %lu, state: %s\n",
1299 		(ulong) srv_main_thread_id,
1300 		srv_main_thread_op_info);
1301 #endif
1302 	fprintf(file,
1303 		"Number of rows inserted " ULINTPF
1304 		", updated " ULINTPF ", deleted " ULINTPF
1305 		", read " ULINTPF "\n",
1306 		(ulint) srv_stats.n_rows_inserted,
1307 		(ulint) srv_stats.n_rows_updated,
1308 		(ulint) srv_stats.n_rows_deleted,
1309 		(ulint) srv_stats.n_rows_read);
1310 	fprintf(file,
1311 		"%.2f inserts/s, %.2f updates/s,"
1312 		" %.2f deletes/s, %.2f reads/s\n",
1313 		((ulint) srv_stats.n_rows_inserted - srv_n_rows_inserted_old)
1314 		/ time_elapsed,
1315 		((ulint) srv_stats.n_rows_updated - srv_n_rows_updated_old)
1316 		/ time_elapsed,
1317 		((ulint) srv_stats.n_rows_deleted - srv_n_rows_deleted_old)
1318 		/ time_elapsed,
1319 		((ulint) srv_stats.n_rows_read - srv_n_rows_read_old)
1320 		/ time_elapsed);
1321 
1322 	srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
1323 	srv_n_rows_updated_old = srv_stats.n_rows_updated;
1324 	srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
1325 	srv_n_rows_read_old = srv_stats.n_rows_read;
1326 
1327 	fputs("----------------------------\n"
1328 	      "END OF INNODB MONITOR OUTPUT\n"
1329 	      "============================\n", file);
1330 	mutex_exit(&srv_innodb_monitor_mutex);
1331 	fflush(file);
1332 
1333 	return(ret);
1334 }
1335 
1336 /******************************************************************//**
1337 Function to pass InnoDB status variables to MySQL */
1338 UNIV_INTERN
1339 void
srv_export_innodb_status(void)1340 srv_export_innodb_status(void)
1341 /*==========================*/
1342 {
1343 	buf_pool_stat_t		stat;
1344 	buf_pools_list_size_t	buf_pools_list_size;
1345 	ulint			LRU_len;
1346 	ulint			free_len;
1347 	ulint			flush_list_len;
1348 
1349 	buf_get_total_stat(&stat);
1350 	buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
1351 	buf_get_total_list_size_in_bytes(&buf_pools_list_size);
1352 
1353 	mutex_enter(&srv_innodb_monitor_mutex);
1354 
1355 	export_vars.innodb_data_pending_reads =
1356 		os_n_pending_reads;
1357 
1358 	export_vars.innodb_data_pending_writes =
1359 		os_n_pending_writes;
1360 
1361 	export_vars.innodb_data_pending_fsyncs =
1362 		fil_n_pending_log_flushes
1363 		+ fil_n_pending_tablespace_flushes;
1364 
1365 	export_vars.innodb_data_fsyncs = os_n_fsyncs;
1366 
1367 	export_vars.innodb_data_read = srv_stats.data_read;
1368 
1369 	export_vars.innodb_data_reads = os_n_file_reads;
1370 
1371 	export_vars.innodb_data_writes = os_n_file_writes;
1372 
1373 	export_vars.innodb_data_written = srv_stats.data_written;
1374 
1375 	export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
1376 
1377 	export_vars.innodb_buffer_pool_write_requests =
1378 		srv_stats.buf_pool_write_requests;
1379 
1380 	export_vars.innodb_buffer_pool_wait_free =
1381 		srv_stats.buf_pool_wait_free;
1382 
1383 	export_vars.innodb_buffer_pool_pages_flushed =
1384 		srv_stats.buf_pool_flushed;
1385 
1386 	export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads;
1387 
1388 	export_vars.innodb_buffer_pool_read_ahead_rnd =
1389 		stat.n_ra_pages_read_rnd;
1390 
1391 	export_vars.innodb_buffer_pool_read_ahead =
1392 		stat.n_ra_pages_read;
1393 
1394 	export_vars.innodb_buffer_pool_read_ahead_evicted =
1395 		stat.n_ra_pages_evicted;
1396 
1397 	export_vars.innodb_buffer_pool_pages_data = LRU_len;
1398 
1399 	export_vars.innodb_buffer_pool_bytes_data =
1400 		buf_pools_list_size.LRU_bytes
1401 		+ buf_pools_list_size.unzip_LRU_bytes;
1402 
1403 	export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
1404 
1405 	export_vars.innodb_buffer_pool_bytes_dirty =
1406 		buf_pools_list_size.flush_list_bytes;
1407 
1408 	export_vars.innodb_buffer_pool_pages_free = free_len;
1409 
1410 #ifdef UNIV_DEBUG
1411 	export_vars.innodb_buffer_pool_pages_latched =
1412 		buf_get_latched_pages_number();
1413 #endif /* UNIV_DEBUG */
1414 	export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
1415 
1416 	export_vars.innodb_buffer_pool_pages_misc =
1417 		buf_pool_get_n_pages() - LRU_len - free_len;
1418 
1419 #ifdef HAVE_ATOMIC_BUILTINS
1420 	export_vars.innodb_have_atomic_builtins = 1;
1421 #else
1422 	export_vars.innodb_have_atomic_builtins = 0;
1423 #endif
1424 	export_vars.innodb_page_size = UNIV_PAGE_SIZE;
1425 
1426 	export_vars.innodb_log_waits = srv_stats.log_waits;
1427 
1428 	export_vars.innodb_os_log_written = srv_stats.os_log_written;
1429 
1430 	export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
1431 
1432 	export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
1433 
1434 	export_vars.innodb_os_log_pending_writes =
1435 		srv_stats.os_log_pending_writes;
1436 
1437 	export_vars.innodb_log_write_requests = srv_stats.log_write_requests;
1438 
1439 	export_vars.innodb_log_writes = srv_stats.log_writes;
1440 
1441 	export_vars.innodb_dblwr_pages_written =
1442 		srv_stats.dblwr_pages_written;
1443 
1444 	export_vars.innodb_dblwr_writes = srv_stats.dblwr_writes;
1445 
1446 	export_vars.innodb_pages_created = stat.n_pages_created;
1447 
1448 	export_vars.innodb_pages_read = stat.n_pages_read;
1449 
1450 	export_vars.innodb_pages_written = stat.n_pages_written;
1451 
1452 	export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count;
1453 
1454 	export_vars.innodb_row_lock_current_waits =
1455 		srv_stats.n_lock_wait_current_count;
1456 
1457 	export_vars.innodb_row_lock_time = srv_stats.n_lock_wait_time / 1000;
1458 
1459 	if (srv_stats.n_lock_wait_count > 0) {
1460 
1461 		export_vars.innodb_row_lock_time_avg = (ulint)
1462 			(srv_stats.n_lock_wait_time
1463 			 / 1000 / srv_stats.n_lock_wait_count);
1464 
1465 	} else {
1466 		export_vars.innodb_row_lock_time_avg = 0;
1467 	}
1468 
1469 	export_vars.innodb_row_lock_time_max =
1470 		lock_sys->n_lock_max_wait_time / 1000;
1471 
1472 	export_vars.innodb_rows_read = srv_stats.n_rows_read;
1473 
1474 	export_vars.innodb_rows_inserted = srv_stats.n_rows_inserted;
1475 
1476 	export_vars.innodb_rows_updated = srv_stats.n_rows_updated;
1477 
1478 	export_vars.innodb_rows_deleted = srv_stats.n_rows_deleted;
1479 
1480 	export_vars.innodb_num_open_files = fil_n_file_opened;
1481 
1482 	export_vars.innodb_truncated_status_writes =
1483 		srv_truncated_status_writes;
1484 
1485 	export_vars.innodb_available_undo_logs = srv_available_undo_logs;
1486 
1487 #ifdef UNIV_DEBUG
1488 	rw_lock_s_lock(&purge_sys->latch);
1489 	trx_id_t	done_trx_no	= purge_sys->done.trx_no;
1490 	trx_id_t	up_limit_id	= purge_sys->view
1491 		? purge_sys->view->up_limit_id
1492 		: 0;
1493 	rw_lock_s_unlock(&purge_sys->latch);
1494 
1495 	mutex_enter(&trx_sys->mutex);
1496 	trx_id_t	max_trx_id	= trx_sys->rw_max_trx_id;
1497 	mutex_exit(&trx_sys->mutex);
1498 
1499 	if (!done_trx_no || max_trx_id < done_trx_no - 1) {
1500 		export_vars.innodb_purge_trx_id_age = 0;
1501 	} else {
1502 		export_vars.innodb_purge_trx_id_age =
1503 			(ulint) (max_trx_id - done_trx_no + 1);
1504 	}
1505 
1506 	if (!up_limit_id
1507 	    || max_trx_id < up_limit_id) {
1508 		export_vars.innodb_purge_view_trx_id_age = 0;
1509 	} else {
1510 		export_vars.innodb_purge_view_trx_id_age =
1511 			(ulint) (max_trx_id - up_limit_id);
1512 	}
1513 #endif /* UNIV_DEBUG */
1514 
1515 	mutex_exit(&srv_innodb_monitor_mutex);
1516 }
1517 
1518 /*********************************************************************//**
1519 A thread which prints the info output by various InnoDB monitors.
1520 @return	a dummy parameter */
1521 extern "C" UNIV_INTERN
1522 os_thread_ret_t
DECLARE_THREAD(srv_monitor_thread)1523 DECLARE_THREAD(srv_monitor_thread)(
1524 /*===============================*/
1525 	void*	arg MY_ATTRIBUTE((unused)))
1526 			/*!< in: a dummy parameter required by
1527 			os_thread_create */
1528 {
1529 	ib_int64_t	sig_count;
1530 	double		time_elapsed;
1531 	time_t		current_time;
1532 	time_t		last_table_monitor_time;
1533 	time_t		last_tablespace_monitor_time;
1534 	time_t		last_monitor_time;
1535 	ulint		mutex_skipped;
1536 	ibool		last_srv_print_monitor;
1537 
1538 	ut_ad(!srv_read_only_mode);
1539 
1540 #ifdef UNIV_DEBUG_THREAD_CREATION
1541 	fprintf(stderr, "Lock timeout thread starts, id %lu\n",
1542 		os_thread_pf(os_thread_get_curr_id()));
1543 #endif /* UNIV_DEBUG_THREAD_CREATION */
1544 
1545 #ifdef UNIV_PFS_THREAD
1546 	pfs_register_thread(srv_monitor_thread_key);
1547 #endif /* UNIV_PFS_THREAD */
1548 	srv_monitor_active = TRUE;
1549 
1550 	UT_NOT_USED(arg);
1551 	srv_last_monitor_time = ut_time();
1552 	last_table_monitor_time = ut_time();
1553 	last_tablespace_monitor_time = ut_time();
1554 	last_monitor_time = ut_time();
1555 	mutex_skipped = 0;
1556 	last_srv_print_monitor = srv_print_innodb_monitor;
1557 loop:
1558 	/* Wake up every 5 seconds to see if we need to print
1559 	monitor information or if signalled at shutdown. */
1560 
1561 	sig_count = os_event_reset(srv_monitor_event);
1562 
1563 	os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
1564 
1565 	current_time = ut_time();
1566 
1567 	time_elapsed = difftime(current_time, last_monitor_time);
1568 
1569 	if (time_elapsed > 15) {
1570 		last_monitor_time = ut_time();
1571 
1572 		if (srv_print_innodb_monitor) {
1573 			/* Reset mutex_skipped counter everytime
1574 			srv_print_innodb_monitor changes. This is to
1575 			ensure we will not be blocked by lock_sys->mutex
1576 			for short duration information printing,
1577 			such as requested by sync_array_print_long_waits() */
1578 			if (!last_srv_print_monitor) {
1579 				mutex_skipped = 0;
1580 				last_srv_print_monitor = TRUE;
1581 			}
1582 
1583 			if (!srv_printf_innodb_monitor(stderr,
1584 						MUTEX_NOWAIT(mutex_skipped),
1585 						NULL, NULL)) {
1586 				mutex_skipped++;
1587 			} else {
1588 				/* Reset the counter */
1589 				mutex_skipped = 0;
1590 			}
1591 		} else {
1592 			last_srv_print_monitor = FALSE;
1593 		}
1594 
1595 
1596 		/* We don't create the temp files or associated
1597 		mutexes in read-only-mode */
1598 
1599 		if (!srv_read_only_mode && srv_innodb_status) {
1600 			mutex_enter(&srv_monitor_file_mutex);
1601 			rewind(srv_monitor_file);
1602 			if (!srv_printf_innodb_monitor(srv_monitor_file,
1603 						MUTEX_NOWAIT(mutex_skipped),
1604 						NULL, NULL)) {
1605 				mutex_skipped++;
1606 			} else {
1607 				mutex_skipped = 0;
1608 			}
1609 
1610 			os_file_set_eof(srv_monitor_file);
1611 			mutex_exit(&srv_monitor_file_mutex);
1612 		}
1613 
1614 		if (srv_print_innodb_tablespace_monitor
1615 		    && difftime(current_time,
1616 				last_tablespace_monitor_time) > 60) {
1617 			last_tablespace_monitor_time = ut_time();
1618 
1619 			fputs("========================"
1620 			      "========================\n",
1621 			      stderr);
1622 
1623 			ut_print_timestamp(stderr);
1624 
1625 			fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
1626 			      "========================"
1627 			      "========================\n",
1628 			      stderr);
1629 
1630 			fsp_print(0);
1631 			fputs("Validating tablespace\n", stderr);
1632 			fsp_validate(0);
1633 			fputs("Validation ok\n"
1634 			      "---------------------------------------\n"
1635 			      "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
1636 			      "=======================================\n",
1637 			      stderr);
1638 		}
1639 
1640 		if (srv_print_innodb_table_monitor
1641 		    && difftime(current_time, last_table_monitor_time) > 60) {
1642 
1643 			last_table_monitor_time = ut_time();
1644 
1645 			fprintf(stderr, "Warning: %s\n",
1646 				DEPRECATED_MSG_INNODB_TABLE_MONITOR);
1647 
1648 			fputs("===========================================\n",
1649 			      stderr);
1650 
1651 			ut_print_timestamp(stderr);
1652 
1653 			fputs(" INNODB TABLE MONITOR OUTPUT\n"
1654 			      "===========================================\n",
1655 			      stderr);
1656 			dict_print();
1657 
1658 			fputs("-----------------------------------\n"
1659 			      "END OF INNODB TABLE MONITOR OUTPUT\n"
1660 			      "==================================\n",
1661 			      stderr);
1662 
1663 			fprintf(stderr, "Warning: %s\n",
1664 				DEPRECATED_MSG_INNODB_TABLE_MONITOR);
1665 		}
1666 	}
1667 
1668 	if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
1669 		goto exit_func;
1670 	}
1671 
1672 	if (srv_print_innodb_monitor
1673 	    || srv_print_innodb_lock_monitor
1674 	    || srv_print_innodb_tablespace_monitor
1675 	    || srv_print_innodb_table_monitor) {
1676 		goto loop;
1677 	}
1678 
1679 	goto loop;
1680 
1681 exit_func:
1682 	srv_monitor_active = FALSE;
1683 
1684 	/* We count the number of threads in os_thread_exit(). A created
1685 	thread should always use that to exit and not use return() to exit. */
1686 
1687 	os_thread_exit(NULL);
1688 
1689 	OS_THREAD_DUMMY_RETURN;
1690 }
1691 
1692 /*********************************************************************//**
1693 A thread which prints warnings about semaphore waits which have lasted
1694 too long. These can be used to track bugs which cause hangs.
1695 Note: In order to make sync_arr_wake_threads_if_sema_free work as expected,
1696 we should avoid waiting any mutexes in this function!
1697 @return	a dummy parameter */
1698 extern "C" UNIV_INTERN
1699 os_thread_ret_t
DECLARE_THREAD(srv_error_monitor_thread)1700 DECLARE_THREAD(srv_error_monitor_thread)(
1701 /*=====================================*/
1702 	void*	arg MY_ATTRIBUTE((unused)))
1703 			/*!< in: a dummy parameter required by
1704 			os_thread_create */
1705 {
1706 	/* number of successive fatal timeouts observed */
1707 	ulint		fatal_cnt	= 0;
1708 	lsn_t		old_lsn;
1709 	lsn_t		new_lsn;
1710 	ib_int64_t	sig_count;
1711 	/* longest waiting thread for a semaphore */
1712 	os_thread_id_t	waiter		= os_thread_get_curr_id();
1713 	os_thread_id_t	old_waiter	= waiter;
1714 	/* the semaphore that is being waited for */
1715 	const void*	sema		= NULL;
1716 	const void*	old_sema	= NULL;
1717 
1718 	ut_ad(!srv_read_only_mode);
1719 
1720 	old_lsn = srv_start_lsn;
1721 
1722 #ifdef UNIV_DEBUG_THREAD_CREATION
1723 	fprintf(stderr, "Error monitor thread starts, id %lu\n",
1724 		os_thread_pf(os_thread_get_curr_id()));
1725 #endif /* UNIV_DEBUG_THREAD_CREATION */
1726 
1727 #ifdef UNIV_PFS_THREAD
1728 	pfs_register_thread(srv_error_monitor_thread_key);
1729 #endif /* UNIV_PFS_THREAD */
1730 	srv_error_monitor_active = TRUE;
1731 
1732 loop:
1733 	/* Try to track a strange bug reported by Harald Fuchs and others,
1734 	where the lsn seems to decrease at times */
1735 
1736 	if (log_peek_lsn(&new_lsn)) {
1737 		if (new_lsn < old_lsn) {
1738 			ut_print_timestamp(stderr);
1739 			fprintf(stderr,
1740 				"  InnoDB: Error: old log sequence number " LSN_PF
1741 				" was greater\n"
1742 				"InnoDB: than the new log sequence number " LSN_PF "!\n"
1743 				"InnoDB: Please submit a bug report"
1744 				" to http://bugs.mysql.com\n",
1745 				old_lsn, new_lsn);
1746 			ut_ad(0);
1747 		}
1748 
1749 		old_lsn = new_lsn;
1750 	}
1751 
1752 	if (difftime(time(NULL), srv_last_monitor_time) > 60) {
1753 		/* We referesh InnoDB Monitor values so that averages are
1754 		printed from at most 60 last seconds */
1755 
1756 		srv_refresh_innodb_monitor_stats();
1757 	}
1758 
1759 	/* Update the statistics collected for deciding LRU
1760 	eviction policy. */
1761 	buf_LRU_stat_update();
1762 
1763 	/* In case mutex_exit is not a memory barrier, it is
1764 	theoretically possible some threads are left waiting though
1765 	the semaphore is already released. Wake up those threads: */
1766 
1767 	sync_arr_wake_threads_if_sema_free();
1768 
1769 	if (sync_array_print_long_waits(&waiter, &sema)
1770 	    && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
1771 #if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
1772 	  if (srv_allow_writes_event->is_set) {
1773 #endif /* WITH_WSREP */
1774 		fatal_cnt++;
1775 #if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
1776 	  } else {
1777 		fprintf(stderr,
1778 			"WSREP: avoiding InnoDB self crash due to long "
1779 			"semaphore wait of  > %lu seconds\n"
1780 			"Server is processing SST donor operation, "
1781 			"fatal_cnt now: %lu",
1782 			(ulong) srv_fatal_semaphore_wait_threshold, fatal_cnt);
1783 	  }
1784 #endif /* WITH_WSREP */
1785 		if (fatal_cnt > 10) {
1786 
1787 			fprintf(stderr,
1788 				"InnoDB: Error: semaphore wait has lasted"
1789 				" > %lu seconds\n"
1790 				"InnoDB: We intentionally crash the server,"
1791 				" because it appears to be hung.\n",
1792 				(ulong) srv_fatal_semaphore_wait_threshold);
1793 
1794 			ut_error;
1795 		}
1796 	} else {
1797 		fatal_cnt = 0;
1798 		old_waiter = waiter;
1799 		old_sema = sema;
1800 	}
1801 
1802 	/* Flush stderr so that a database user gets the output
1803 	to possible MySQL error file */
1804 
1805 	fflush(stderr);
1806 
1807 	sig_count = os_event_reset(srv_error_event);
1808 
1809 	os_event_wait_time_low(srv_error_event, 1000000, sig_count);
1810 
1811 	if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
1812 
1813 		goto loop;
1814 	}
1815 
1816 	srv_error_monitor_active = FALSE;
1817 
1818 	/* We count the number of threads in os_thread_exit(). A created
1819 	thread should always use that to exit and not use return() to exit. */
1820 
1821 	os_thread_exit(NULL);
1822 
1823 	OS_THREAD_DUMMY_RETURN;
1824 }
1825 
1826 /******************************************************************//**
1827 Increment the server activity count. */
1828 UNIV_INTERN
1829 void
srv_inc_activity_count(void)1830 srv_inc_activity_count(void)
1831 /*========================*/
1832 {
1833 	srv_sys->activity_count.inc();
1834 }
1835 
1836 /**********************************************************************//**
1837 Check whether any background thread is active. If so return the thread
1838 type.
1839 @return SRV_NONE if all are suspended or have exited, thread
1840 type if any are still active. */
1841 UNIV_INTERN
1842 srv_thread_type
srv_get_active_thread_type(void)1843 srv_get_active_thread_type(void)
1844 /*============================*/
1845 {
1846 	srv_thread_type ret = SRV_NONE;
1847 
1848 	if (srv_read_only_mode) {
1849 		return(SRV_NONE);
1850 	}
1851 
1852 	srv_sys_mutex_enter();
1853 
1854 	for (ulint i = SRV_WORKER; i <= SRV_MASTER; ++i) {
1855 		if (srv_sys->n_threads_active[i] != 0) {
1856 			ret = static_cast<srv_thread_type>(i);
1857 			break;
1858 		}
1859 	}
1860 
1861 	srv_sys_mutex_exit();
1862 
1863 	/* Check only on shutdown. */
1864 	if (ret == SRV_NONE
1865 	    && srv_shutdown_state != SRV_SHUTDOWN_NONE
1866 	    && trx_purge_state() != PURGE_STATE_DISABLED
1867 	    && trx_purge_state() != PURGE_STATE_EXIT) {
1868 
1869 		ret = SRV_PURGE;
1870 	}
1871 
1872 	return(ret);
1873 }
1874 
1875 /**********************************************************************//**
1876 Check whether any background thread are active. If so print which thread
1877 is active. Send the threads wakeup signal.
1878 @return name of thread that is active or NULL */
1879 UNIV_INTERN
1880 const char*
srv_any_background_threads_are_active(void)1881 srv_any_background_threads_are_active(void)
1882 /*=======================================*/
1883 {
1884 	const char*	thread_active = NULL;
1885 
1886 	if (srv_read_only_mode) {
1887 		return(NULL);
1888 	} else if (srv_error_monitor_active) {
1889 		thread_active = "srv_error_monitor_thread";
1890 	} else if (lock_sys->timeout_thread_active) {
1891 		thread_active = "srv_lock_timeout thread";
1892 	} else if (srv_monitor_active) {
1893 		thread_active = "srv_monitor_thread";
1894 	} else if (srv_buf_dump_thread_active) {
1895 		thread_active = "buf_dump_thread";
1896 	} else if (srv_dict_stats_thread_active) {
1897 		thread_active = "dict_stats_thread";
1898 	}
1899 
1900 	os_event_set(srv_error_event);
1901 	os_event_set(srv_monitor_event);
1902 	os_event_set(srv_buf_dump_event);
1903 	os_event_set(lock_sys->timeout_event);
1904 	os_event_set(dict_stats_event);
1905 
1906 	return(thread_active);
1907 }
1908 
1909 /*******************************************************************//**
1910 Tells the InnoDB server that there has been activity in the database
1911 and wakes up the master thread if it is suspended (not sleeping). Used
1912 in the MySQL interface. Note that there is a small chance that the master
1913 thread stays suspended (we do not protect our operation with the
1914 srv_sys_t->mutex, for performance reasons). */
1915 UNIV_INTERN
1916 void
srv_active_wake_master_thread(void)1917 srv_active_wake_master_thread(void)
1918 /*===============================*/
1919 {
1920 	if (srv_read_only_mode) {
1921 		return;
1922 	}
1923 
1924 	ut_ad(!srv_sys_mutex_own());
1925 
1926 	srv_inc_activity_count();
1927 
1928 	if (srv_sys->n_threads_active[SRV_MASTER] == 0) {
1929 		srv_slot_t*	slot;
1930 
1931 		srv_sys_mutex_enter();
1932 
1933 		slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
1934 
1935 		/* Only if the master thread has been started. */
1936 
1937 		if (slot->in_use) {
1938 			ut_a(srv_slot_get_type(slot) == SRV_MASTER);
1939 
1940 			if (slot->suspended) {
1941 
1942 				slot->suspended = FALSE;
1943 
1944 				++srv_sys->n_threads_active[SRV_MASTER];
1945 
1946 				os_event_set(slot->event);
1947 			}
1948 		}
1949 
1950 		srv_sys_mutex_exit();
1951 	}
1952 }
1953 
1954 /*******************************************************************//**
1955 Tells the purge thread that there has been activity in the database
1956 and wakes up the purge thread if it is suspended (not sleeping).  Note
1957 that there is a small chance that the purge thread stays suspended
1958 (we do not protect our check with the srv_sys_t:mutex and the
1959 purge_sys->latch, for performance reasons). */
1960 UNIV_INTERN
1961 void
srv_wake_purge_thread_if_not_active(void)1962 srv_wake_purge_thread_if_not_active(void)
1963 /*=====================================*/
1964 {
1965 	ut_ad(!srv_sys_mutex_own());
1966 
1967 	if (purge_sys->state == PURGE_STATE_RUN
1968 	    && srv_sys->n_threads_active[SRV_PURGE] == 0) {
1969 
1970 		srv_release_threads(SRV_PURGE, 1);
1971 	}
1972 }
1973 
1974 /*******************************************************************//**
1975 Wakes up the master thread if it is suspended or being suspended. */
1976 UNIV_INTERN
1977 void
srv_wake_master_thread(void)1978 srv_wake_master_thread(void)
1979 /*========================*/
1980 {
1981 	ut_ad(!srv_sys_mutex_own());
1982 
1983 	srv_inc_activity_count();
1984 
1985 	srv_release_threads(SRV_MASTER, 1);
1986 }
1987 
1988 /*******************************************************************//**
1989 Get current server activity count. We don't hold srv_sys::mutex while
1990 reading this value as it is only used in heuristics.
1991 @return activity count. */
1992 UNIV_INTERN
1993 ulint
srv_get_activity_count(void)1994 srv_get_activity_count(void)
1995 /*========================*/
1996 {
1997 	return(srv_sys->activity_count);
1998 }
1999 
2000 /*******************************************************************//**
2001 Check if there has been any activity.
2002 @return FALSE if no change in activity counter. */
2003 UNIV_INTERN
2004 ibool
srv_check_activity(ulint old_activity_count)2005 srv_check_activity(
2006 /*===============*/
2007 	ulint		old_activity_count)	/*!< in: old activity count */
2008 {
2009 	return(srv_sys->activity_count != old_activity_count);
2010 }
2011 
2012 /********************************************************************//**
2013 The master thread is tasked to ensure that flush of log file happens
2014 once every second in the background. This is to ensure that not more
2015 than one second of trxs are lost in case of crash when
2016 innodb_flush_logs_at_trx_commit != 1 */
2017 static
2018 void
srv_sync_log_buffer_in_background(void)2019 srv_sync_log_buffer_in_background(void)
2020 /*===================================*/
2021 {
2022 	time_t	current_time = time(NULL);
2023 
2024 	srv_main_thread_op_info = "flushing log";
2025 	if (difftime(current_time, srv_last_log_flush_time)
2026 	    >= srv_flush_log_at_timeout) {
2027 		log_buffer_sync_in_background(TRUE);
2028 		srv_last_log_flush_time = current_time;
2029 		srv_log_writes_and_flush++;
2030 	}
2031 }
2032 
2033 /********************************************************************//**
2034 Make room in the table cache by evicting an unused table.
2035 @return number of tables evicted. */
2036 static
2037 ulint
srv_master_evict_from_table_cache(ulint pct_check)2038 srv_master_evict_from_table_cache(
2039 /*==============================*/
2040 	ulint	pct_check)	/*!< in: max percent to check */
2041 {
2042 	ulint	n_tables_evicted = 0;
2043 
2044 	rw_lock_x_lock(&dict_operation_lock);
2045 
2046 	dict_mutex_enter_for_mysql();
2047 
2048 	n_tables_evicted = dict_make_room_in_cache(
2049 		innobase_get_table_cache_size(), pct_check);
2050 
2051 	dict_mutex_exit_for_mysql();
2052 
2053 	rw_lock_x_unlock(&dict_operation_lock);
2054 
2055 	return(n_tables_evicted);
2056 }
2057 
2058 /*********************************************************************//**
2059 This function prints progress message every 60 seconds during server
2060 shutdown, for any activities that master thread is pending on. */
2061 static
2062 void
srv_shutdown_print_master_pending(ib_time_t * last_print_time,ulint n_tables_to_drop,ulint n_bytes_merged)2063 srv_shutdown_print_master_pending(
2064 /*==============================*/
2065 	ib_time_t*	last_print_time,	/*!< last time the function
2066 						print the message */
2067 	ulint		n_tables_to_drop,	/*!< number of tables to
2068 						be dropped */
2069 	ulint		n_bytes_merged)		/*!< number of change buffer
2070 						just merged */
2071 {
2072 	ib_time_t	current_time;
2073 	double		time_elapsed;
2074 
2075 	current_time = ut_time();
2076 	time_elapsed = ut_difftime(current_time, *last_print_time);
2077 
2078 	if (time_elapsed > 60) {
2079 		*last_print_time = ut_time();
2080 
2081 		if (n_tables_to_drop) {
2082 			ut_print_timestamp(stderr);
2083 			fprintf(stderr, "  InnoDB: Waiting for "
2084 				"%lu table(s) to be dropped\n",
2085 				(ulong) n_tables_to_drop);
2086 		}
2087 
2088 		/* Check change buffer merge, we only wait for change buffer
2089 		merge if it is a slow shutdown */
2090 		if (!srv_fast_shutdown && n_bytes_merged) {
2091 			ut_print_timestamp(stderr);
2092 			fprintf(stderr, "  InnoDB: Waiting for change "
2093 				"buffer merge to complete\n"
2094 				"  InnoDB: number of bytes of change buffer "
2095 				"just merged:  %lu\n",
2096 				n_bytes_merged);
2097 		}
2098 	}
2099 }
2100 
2101 /*********************************************************************//**
2102 Perform the tasks that the master thread is supposed to do when the
2103 server is active. There are two types of tasks. The first category is
2104 of such tasks which are performed at each inovcation of this function.
2105 We assume that this function is called roughly every second when the
2106 server is active. The second category is of such tasks which are
2107 performed at some interval e.g.: purge, dict_LRU cleanup etc. */
2108 static
2109 void
srv_master_do_active_tasks(void)2110 srv_master_do_active_tasks(void)
2111 /*============================*/
2112 {
2113 	ib_time_t	cur_time = ut_time();
2114 	ullint		counter_time = ut_time_us(NULL);
2115 
2116 	/* First do the tasks that we are suppose to do at each
2117 	invocation of this function. */
2118 
2119 	++srv_main_active_loops;
2120 
2121 	MONITOR_INC(MONITOR_MASTER_ACTIVE_LOOPS);
2122 
2123 	/* ALTER TABLE in MySQL requires on Unix that the table handler
2124 	can drop tables lazily after there no longer are SELECT
2125 	queries to them. */
2126 	srv_main_thread_op_info = "doing background drop tables";
2127 	row_drop_tables_for_mysql_in_background();
2128 	MONITOR_INC_TIME_IN_MICRO_SECS(
2129 		MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time);
2130 
2131 	if (srv_shutdown_state > 0) {
2132 		return;
2133 	}
2134 
2135 	/* make sure that there is enough reusable space in the redo
2136 	log files */
2137 	srv_main_thread_op_info = "checking free log space";
2138 	log_free_check();
2139 
2140 	/* Do an ibuf merge */
2141 	srv_main_thread_op_info = "doing insert buffer merge";
2142 	counter_time = ut_time_us(NULL);
2143 	ibuf_merge_in_background(false);
2144 	MONITOR_INC_TIME_IN_MICRO_SECS(
2145 		MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
2146 
2147 	/* Flush logs if needed */
2148 	srv_main_thread_op_info = "flushing log";
2149 	srv_sync_log_buffer_in_background();
2150 	MONITOR_INC_TIME_IN_MICRO_SECS(
2151 		MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
2152 
2153 	/* Now see if various tasks that are performed at defined
2154 	intervals need to be performed. */
2155 
2156 #ifdef MEM_PERIODIC_CHECK
2157 	/* Check magic numbers of every allocated mem block once in
2158 	SRV_MASTER_MEM_VALIDATE_INTERVAL seconds */
2159 	if (cur_time % SRV_MASTER_MEM_VALIDATE_INTERVAL == 0) {
2160 		mem_validate_all_blocks();
2161 		MONITOR_INC_TIME_IN_MICRO_SECS(
2162 			MONITOR_SRV_MEM_VALIDATE_MICROSECOND, counter_time);
2163 	}
2164 #endif
2165 	if (srv_shutdown_state > 0) {
2166 		return;
2167 	}
2168 
2169 	if (srv_shutdown_state > 0) {
2170 		return;
2171 	}
2172 
2173 	if (cur_time % SRV_MASTER_DICT_LRU_INTERVAL == 0) {
2174 		srv_main_thread_op_info = "enforcing dict cache limit";
2175 		srv_master_evict_from_table_cache(50);
2176 		MONITOR_INC_TIME_IN_MICRO_SECS(
2177 			MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
2178 	}
2179 
2180 	if (srv_shutdown_state > 0) {
2181 		return;
2182 	}
2183 
2184 	/* Make a new checkpoint */
2185 	if (cur_time % SRV_MASTER_CHECKPOINT_INTERVAL == 0) {
2186 		srv_main_thread_op_info = "making checkpoint";
2187 		log_checkpoint(TRUE, FALSE);
2188 		MONITOR_INC_TIME_IN_MICRO_SECS(
2189 			MONITOR_SRV_CHECKPOINT_MICROSECOND, counter_time);
2190 	}
2191 }
2192 
2193 /*********************************************************************//**
2194 Perform the tasks that the master thread is supposed to do whenever the
2195 server is idle. We do check for the server state during this function
2196 and if the server has entered the shutdown phase we may return from
2197 the function without completing the required tasks.
2198 Note that the server can move to active state when we are executing this
2199 function but we don't check for that as we are suppose to perform more
2200 or less same tasks when server is active. */
2201 static
2202 void
srv_master_do_idle_tasks(void)2203 srv_master_do_idle_tasks(void)
2204 /*==========================*/
2205 {
2206 	ullint	counter_time;
2207 
2208 	++srv_main_idle_loops;
2209 
2210 	MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS);
2211 
2212 
2213 	/* ALTER TABLE in MySQL requires on Unix that the table handler
2214 	can drop tables lazily after there no longer are SELECT
2215 	queries to them. */
2216 	counter_time = ut_time_us(NULL);
2217 	srv_main_thread_op_info = "doing background drop tables";
2218 	row_drop_tables_for_mysql_in_background();
2219 	MONITOR_INC_TIME_IN_MICRO_SECS(
2220 		MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
2221 			 counter_time);
2222 
2223 	if (srv_shutdown_state > 0) {
2224 		return;
2225 	}
2226 
2227 	/* make sure that there is enough reusable space in the redo
2228 	log files */
2229 	srv_main_thread_op_info = "checking free log space";
2230 	log_free_check();
2231 
2232 	/* Do an ibuf merge */
2233 	counter_time = ut_time_us(NULL);
2234 	srv_main_thread_op_info = "doing insert buffer merge";
2235 	ibuf_merge_in_background(true);
2236 	MONITOR_INC_TIME_IN_MICRO_SECS(
2237 		MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
2238 
2239 	if (srv_shutdown_state > 0) {
2240 		return;
2241 	}
2242 
2243 	srv_main_thread_op_info = "enforcing dict cache limit";
2244 	srv_master_evict_from_table_cache(100);
2245 	MONITOR_INC_TIME_IN_MICRO_SECS(
2246 		MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
2247 
2248 	/* Flush logs if needed */
2249 	srv_sync_log_buffer_in_background();
2250 	MONITOR_INC_TIME_IN_MICRO_SECS(
2251 		MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
2252 
2253 	if (srv_shutdown_state > 0) {
2254 		return;
2255 	}
2256 
2257 	/* Make a new checkpoint */
2258 	srv_main_thread_op_info = "making checkpoint";
2259 	log_checkpoint(TRUE, FALSE);
2260 	MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_CHECKPOINT_MICROSECOND,
2261 				       counter_time);
2262 }
2263 
2264 /*********************************************************************//**
2265 Perform the tasks during shutdown. The tasks that we do at shutdown
2266 depend on srv_fast_shutdown:
2267 2 => very fast shutdown => do no book keeping
2268 1 => normal shutdown => clear drop table queue and make checkpoint
2269 0 => slow shutdown => in addition to above do complete purge and ibuf
2270 merge
2271 @return TRUE if some work was done. FALSE otherwise */
2272 static
2273 ibool
srv_master_do_shutdown_tasks(ib_time_t * last_print_time)2274 srv_master_do_shutdown_tasks(
2275 /*=========================*/
2276 	ib_time_t*	last_print_time)/*!< last time the function
2277 					print the message */
2278 {
2279 	ulint		n_bytes_merged = 0;
2280 	ulint		n_tables_to_drop = 0;
2281 
2282 	ut_ad(!srv_read_only_mode);
2283 
2284 	++srv_main_shutdown_loops;
2285 
2286 	ut_a(srv_shutdown_state > 0);
2287 
2288 	/* In very fast shutdown none of the following is necessary */
2289 	if (srv_fast_shutdown == 2) {
2290 		return(FALSE);
2291 	}
2292 
2293 	/* ALTER TABLE in MySQL requires on Unix that the table handler
2294 	can drop tables lazily after there no longer are SELECT
2295 	queries to them. */
2296 	srv_main_thread_op_info = "doing background drop tables";
2297 	n_tables_to_drop = row_drop_tables_for_mysql_in_background();
2298 
2299 	/* make sure that there is enough reusable space in the redo
2300 	log files */
2301 	srv_main_thread_op_info = "checking free log space";
2302 	log_free_check();
2303 
2304 	/* In case of normal shutdown we don't do ibuf merge or purge */
2305 	if (srv_fast_shutdown == 1) {
2306 		goto func_exit;
2307 	}
2308 
2309 	/* Do an ibuf merge */
2310 	srv_main_thread_op_info = "doing insert buffer merge";
2311 	n_bytes_merged = ibuf_merge_in_background(true);
2312 
2313 	/* Flush logs if needed */
2314 	srv_sync_log_buffer_in_background();
2315 
2316 func_exit:
2317 	/* Make a new checkpoint about once in 10 seconds */
2318 	srv_main_thread_op_info = "making checkpoint";
2319 	log_checkpoint(TRUE, FALSE);
2320 
2321 	/* Print progress message every 60 seconds during shutdown */
2322 	if (srv_shutdown_state > 0 && srv_print_verbose_log) {
2323 		srv_shutdown_print_master_pending(
2324 			last_print_time, n_tables_to_drop, n_bytes_merged);
2325 	}
2326 
2327 	return(n_bytes_merged || n_tables_to_drop);
2328 }
2329 
2330 /*********************************************************************//**
2331 Puts master thread to sleep. At this point we are using polling to
2332 service various activities. Master thread sleeps for one second before
2333 checking the state of the server again */
2334 static
2335 void
srv_master_sleep(void)2336 srv_master_sleep(void)
2337 /*==================*/
2338 {
2339 	srv_main_thread_op_info = "sleeping";
2340 	os_thread_sleep(1000000);
2341 	srv_main_thread_op_info = "";
2342 }
2343 
2344 /*********************************************************************//**
2345 The master thread controlling the server.
2346 @return	a dummy parameter */
2347 extern "C" UNIV_INTERN
2348 os_thread_ret_t
DECLARE_THREAD(srv_master_thread)2349 DECLARE_THREAD(srv_master_thread)(
2350 /*==============================*/
2351 	void*	arg MY_ATTRIBUTE((unused)))
2352 			/*!< in: a dummy parameter required by
2353 			os_thread_create */
2354 {
2355 	my_thread_init();
2356 
2357 	srv_slot_t*	slot;
2358 	ulint		old_activity_count = srv_get_activity_count();
2359 	ib_time_t	last_print_time;
2360 
2361 	ut_ad(!srv_read_only_mode);
2362 
2363 #ifdef UNIV_DEBUG_THREAD_CREATION
2364 	fprintf(stderr, "Master thread starts, id %lu\n",
2365 		os_thread_pf(os_thread_get_curr_id()));
2366 #endif /* UNIV_DEBUG_THREAD_CREATION */
2367 
2368 #ifdef UNIV_PFS_THREAD
2369 	pfs_register_thread(srv_master_thread_key);
2370 #endif /* UNIV_PFS_THREAD */
2371 
2372 	srv_main_thread_process_no = os_proc_get_number();
2373 	srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
2374 
2375 	slot = srv_reserve_slot(SRV_MASTER);
2376 	ut_a(slot == srv_sys->sys_threads);
2377 
2378 	last_print_time = ut_time();
2379 loop:
2380 	if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2381 		goto suspend_thread;
2382 	}
2383 
2384 	while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
2385 
2386 		srv_master_sleep();
2387 
2388 		MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP);
2389 
2390 		if (srv_check_activity(old_activity_count)) {
2391 			old_activity_count = srv_get_activity_count();
2392 			srv_master_do_active_tasks();
2393 		} else {
2394 			srv_master_do_idle_tasks();
2395 		}
2396 	}
2397 
2398 	while (srv_master_do_shutdown_tasks(&last_print_time)) {
2399 
2400 		/* Shouldn't loop here in case of very fast shutdown */
2401 		ut_ad(srv_fast_shutdown < 2);
2402 	}
2403 
2404 suspend_thread:
2405 	srv_main_thread_op_info = "suspending";
2406 
2407 	srv_suspend_thread(slot);
2408 
2409 	/* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
2410 	waits for database activity to die down when converting < 4.1.x
2411 	databases, and relies on this string being exactly as it is. InnoDB
2412 	manual also mentions this string in several places. */
2413 	srv_main_thread_op_info = "waiting for server activity";
2414 
2415 	os_event_wait(slot->event);
2416 
2417 	if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
2418 		my_thread_end();
2419 		os_thread_exit(NULL);
2420 	}
2421 
2422 	goto loop;
2423 
2424 	OS_THREAD_DUMMY_RETURN;	/* Not reached, avoid compiler warning */
2425 }
2426 
2427 /*********************************************************************//**
2428 Check if purge should stop.
2429 @return true if it should shutdown. */
2430 static
2431 bool
srv_purge_should_exit(ulint n_purged)2432 srv_purge_should_exit(
2433 /*==============*/
2434 	ulint		n_purged)	/*!< in: pages purged in last batch */
2435 {
2436 	switch (srv_shutdown_state) {
2437 	case SRV_SHUTDOWN_NONE:
2438 		/* Normal operation. */
2439 		break;
2440 
2441 	case SRV_SHUTDOWN_CLEANUP:
2442 	case SRV_SHUTDOWN_EXIT_THREADS:
2443 		/* Exit unless slow shutdown requested or all done. */
2444 		return(srv_fast_shutdown != 0 || n_purged == 0);
2445 
2446 	case SRV_SHUTDOWN_LAST_PHASE:
2447 	case SRV_SHUTDOWN_FLUSH_PHASE:
2448 		ut_error;
2449 	}
2450 
2451 	return(false);
2452 }
2453 
2454 /*********************************************************************//**
2455 Fetch and execute a task from the work queue.
2456 @return	true if a task was executed */
2457 static
2458 bool
srv_task_execute(void)2459 srv_task_execute(void)
2460 /*==================*/
2461 {
2462 	que_thr_t*	thr = NULL;
2463 
2464 	ut_ad(!srv_read_only_mode);
2465 	ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
2466 
2467 	mutex_enter(&srv_sys->tasks_mutex);
2468 
2469 	if (UT_LIST_GET_LEN(srv_sys->tasks) > 0) {
2470 
2471 		thr = UT_LIST_GET_FIRST(srv_sys->tasks);
2472 
2473 		ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE);
2474 
2475 		UT_LIST_REMOVE(queue, srv_sys->tasks, thr);
2476 	}
2477 
2478 	mutex_exit(&srv_sys->tasks_mutex);
2479 
2480 	if (thr != NULL) {
2481 
2482 		que_run_threads(thr);
2483 
2484 		os_atomic_inc_ulint(
2485 			&purge_sys->bh_mutex, &purge_sys->n_completed, 1);
2486 	}
2487 
2488 	return(thr != NULL);
2489 }
2490 
2491 /*********************************************************************//**
2492 Worker thread that reads tasks from the work queue and executes them.
2493 @return	a dummy parameter */
2494 extern "C" UNIV_INTERN
2495 os_thread_ret_t
DECLARE_THREAD(srv_worker_thread)2496 DECLARE_THREAD(srv_worker_thread)(
2497 /*==============================*/
2498 	void*	arg MY_ATTRIBUTE((unused)))	/*!< in: a dummy parameter
2499 						required by os_thread_create */
2500 {
2501 	my_thread_init();
2502 
2503 	srv_slot_t*	slot;
2504 
2505 	ut_ad(!srv_read_only_mode);
2506 	ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
2507 
2508 #ifdef UNIV_DEBUG_THREAD_CREATION
2509 	ut_print_timestamp(stderr);
2510 	fprintf(stderr, " InnoDB: worker thread starting, id %lu\n",
2511 		os_thread_pf(os_thread_get_curr_id()));
2512 #endif /* UNIV_DEBUG_THREAD_CREATION */
2513 
2514 	slot = srv_reserve_slot(SRV_WORKER);
2515 
2516 	ut_a(srv_n_purge_threads > 1);
2517 
2518 	srv_sys_mutex_enter();
2519 
2520 	ut_a(srv_sys->n_threads_active[SRV_WORKER] < srv_n_purge_threads);
2521 
2522 	srv_sys_mutex_exit();
2523 
2524 	/* We need to ensure that the worker threads exit after the
2525 	purge coordinator thread. Otherwise the purge coordinaor can
2526 	end up waiting forever in trx_purge_wait_for_workers_to_complete() */
2527 
2528 	do {
2529 		srv_suspend_thread(slot);
2530 
2531 		os_event_wait(slot->event);
2532 
2533 		if (srv_task_execute()) {
2534 
2535 			/* If there are tasks in the queue, wakeup
2536 			the purge coordinator thread. */
2537 
2538 			srv_wake_purge_thread_if_not_active();
2539 		}
2540 
2541 		/* Note: we are checking the state without holding the
2542 		purge_sys->latch here. */
2543 	} while (purge_sys->state != PURGE_STATE_EXIT);
2544 
2545 	srv_free_slot(slot);
2546 
2547 	rw_lock_x_lock(&purge_sys->latch);
2548 
2549 	ut_a(!purge_sys->running);
2550 	ut_a(purge_sys->state == PURGE_STATE_EXIT);
2551 	ut_a(srv_shutdown_state > SRV_SHUTDOWN_NONE);
2552 
2553 	rw_lock_x_unlock(&purge_sys->latch);
2554 
2555 #ifdef UNIV_DEBUG_THREAD_CREATION
2556 	ut_print_timestamp(stderr);
2557 	fprintf(stderr, " InnoDB: Purge worker thread exiting, id %lu\n",
2558 		os_thread_pf(os_thread_get_curr_id()));
2559 #endif /* UNIV_DEBUG_THREAD_CREATION */
2560 
2561 	my_thread_end();
2562 	/* We count the number of threads in os_thread_exit(). A created
2563 	thread should always use that to exit and not use return() to exit. */
2564 	os_thread_exit(NULL);
2565 
2566 	OS_THREAD_DUMMY_RETURN;	/* Not reached, avoid compiler warning */
2567 }
2568 
2569 /*********************************************************************//**
2570 Do the actual purge operation.
2571 @return length of history list before the last purge batch. */
2572 static
2573 ulint
srv_do_purge(ulint n_threads,ulint * n_total_purged)2574 srv_do_purge(
2575 /*=========*/
2576 	ulint		n_threads,	/*!< in: number of threads to use */
2577 	ulint*		n_total_purged)	/*!< in/out: total pages purged */
2578 {
2579 	ulint		n_pages_purged;
2580 
2581 	static ulint	count = 0;
2582 	static ulint	n_use_threads = 0;
2583 	static ulint	rseg_history_len = 0;
2584 	ulint		old_activity_count = srv_get_activity_count();
2585 
2586 	ut_a(n_threads > 0);
2587 	ut_ad(!srv_read_only_mode);
2588 
2589 	/* Purge until there are no more records to purge and there is
2590 	no change in configuration or server state. If the user has
2591 	configured more than one purge thread then we treat that as a
2592 	pool of threads and only use the extra threads if purge can't
2593 	keep up with updates. */
2594 
2595 	if (n_use_threads == 0) {
2596 		n_use_threads = n_threads;
2597 	}
2598 
2599 	do {
2600 		if (trx_sys->rseg_history_len > rseg_history_len
2601 		    || (srv_max_purge_lag > 0
2602 			&& rseg_history_len > srv_max_purge_lag)) {
2603 
2604 			/* History length is now longer than what it was
2605 			when we took the last snapshot. Use more threads. */
2606 
2607 			if (n_use_threads < n_threads) {
2608 				++n_use_threads;
2609 			}
2610 
2611 		} else if (srv_check_activity(old_activity_count)
2612 			   && n_use_threads > 1) {
2613 
2614 			/* History length same or smaller since last snapshot,
2615 			use fewer threads. */
2616 
2617 			--n_use_threads;
2618 
2619 			old_activity_count = srv_get_activity_count();
2620 		}
2621 
2622 		/* Ensure that the purge threads are less than what
2623 		was configured. */
2624 
2625 		ut_a(n_use_threads > 0);
2626 		ut_a(n_use_threads <= n_threads);
2627 
2628 		/* Take a snapshot of the history list before purge. */
2629 		if ((rseg_history_len = trx_sys->rseg_history_len) == 0) {
2630 			break;
2631 		}
2632 
2633 		n_pages_purged = trx_purge(
2634 			n_use_threads, srv_purge_batch_size,
2635 			(++count % TRX_SYS_N_RSEGS) == 0);
2636 
2637 		*n_total_purged += n_pages_purged;
2638 
2639 	} while (!srv_purge_should_exit(n_pages_purged)
2640 		 && n_pages_purged > 0
2641 		 && purge_sys->state == PURGE_STATE_RUN);
2642 
2643 	return(rseg_history_len);
2644 }
2645 
2646 /*********************************************************************//**
2647 Suspend the purge coordinator thread. */
2648 static
2649 void
srv_purge_coordinator_suspend(srv_slot_t * slot,ulint rseg_history_len)2650 srv_purge_coordinator_suspend(
2651 /*==========================*/
2652 	srv_slot_t*	slot,			/*!< in/out: Purge coordinator
2653 						thread slot */
2654 	ulint		rseg_history_len)	/*!< in: history list length
2655 						before last purge */
2656 {
2657 	ut_ad(!srv_read_only_mode);
2658 	ut_a(slot->type == SRV_PURGE);
2659 
2660 	bool		stop = false;
2661 
2662 	/** Maximum wait time on the purge event, in micro-seconds. */
2663 	static const ulint SRV_PURGE_MAX_TIMEOUT = 10000;
2664 
2665 	ib_int64_t	sig_count = srv_suspend_thread(slot);
2666 
2667 	do {
2668 		ulint		ret;
2669 
2670 		rw_lock_x_lock(&purge_sys->latch);
2671 
2672 		purge_sys->running = false;
2673 
2674 		rw_lock_x_unlock(&purge_sys->latch);
2675 
2676 		/* We don't wait right away on the the non-timed wait because
2677 		we want to signal the thread that wants to suspend purge. */
2678 
2679 		if (stop) {
2680 			os_event_wait_low(slot->event, sig_count);
2681 			ret = 0;
2682 		} else if (rseg_history_len <= trx_sys->rseg_history_len) {
2683 			ret = os_event_wait_time_low(
2684 				slot->event, SRV_PURGE_MAX_TIMEOUT, sig_count);
2685 		} else {
2686 			/* We don't want to waste time waiting, if the
2687 			history list increased by the time we got here,
2688 			unless purge has been stopped. */
2689 			ret = 0;
2690 		}
2691 
2692 		srv_sys_mutex_enter();
2693 
2694 		/* The thread can be in state !suspended after the timeout
2695 		but before this check if another thread sent a wakeup signal. */
2696 
2697 		if (slot->suspended) {
2698 			slot->suspended = FALSE;
2699 			++srv_sys->n_threads_active[slot->type];
2700 			ut_a(srv_sys->n_threads_active[slot->type] == 1);
2701 		}
2702 
2703 		srv_sys_mutex_exit();
2704 
2705 		sig_count = srv_suspend_thread(slot);
2706 
2707 		rw_lock_x_lock(&purge_sys->latch);
2708 
2709 		stop = (srv_shutdown_state == SRV_SHUTDOWN_NONE
2710 			&& purge_sys->state == PURGE_STATE_STOP);
2711 
2712 		if (!stop) {
2713 			ut_a(purge_sys->n_stop == 0);
2714 			purge_sys->running = true;
2715 		} else {
2716 			ut_a(purge_sys->n_stop > 0);
2717 
2718 			/* Signal that we are suspended. */
2719 			os_event_set(purge_sys->event);
2720 		}
2721 
2722 		rw_lock_x_unlock(&purge_sys->latch);
2723 
2724 		if (ret == OS_SYNC_TIME_EXCEEDED) {
2725 
2726 			/* No new records added since wait started then simply
2727 			wait for new records. The magic number 5000 is an
2728 			approximation for the case where we have cached UNDO
2729 			log records which prevent truncate of the UNDO
2730 			segments. */
2731 
2732 			if (rseg_history_len == trx_sys->rseg_history_len
2733 			    && trx_sys->rseg_history_len < 5000) {
2734 
2735 				stop = true;
2736 			}
2737 		}
2738 
2739 	} while (stop);
2740 
2741 	srv_sys_mutex_enter();
2742 
2743 	if (slot->suspended) {
2744 		slot->suspended = FALSE;
2745 		++srv_sys->n_threads_active[slot->type];
2746 		ut_a(srv_sys->n_threads_active[slot->type] == 1);
2747 	}
2748 
2749 	srv_sys_mutex_exit();
2750 }
2751 
2752 /*********************************************************************//**
2753 Purge coordinator thread that schedules the purge tasks.
2754 @return	a dummy parameter */
2755 extern "C" UNIV_INTERN
2756 os_thread_ret_t
DECLARE_THREAD(srv_purge_coordinator_thread)2757 DECLARE_THREAD(srv_purge_coordinator_thread)(
2758 /*=========================================*/
2759 	void*	arg MY_ATTRIBUTE((unused)))	/*!< in: a dummy parameter
2760 						required by os_thread_create */
2761 {
2762 	my_thread_init();
2763 
2764 	srv_slot_t*	slot;
2765 	ulint           n_total_purged = ULINT_UNDEFINED;
2766 
2767 	ut_ad(!srv_read_only_mode);
2768 	ut_a(srv_n_purge_threads >= 1);
2769 	ut_a(trx_purge_state() == PURGE_STATE_INIT);
2770 	ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
2771 
2772 	rw_lock_x_lock(&purge_sys->latch);
2773 
2774 	purge_sys->running = true;
2775 	purge_sys->state = PURGE_STATE_RUN;
2776 
2777 	rw_lock_x_unlock(&purge_sys->latch);
2778 
2779 #ifdef UNIV_PFS_THREAD
2780 	pfs_register_thread(srv_purge_thread_key);
2781 #endif /* UNIV_PFS_THREAD */
2782 
2783 #ifdef UNIV_DEBUG_THREAD_CREATION
2784 	ut_print_timestamp(stderr);
2785 	fprintf(stderr, " InnoDB: Purge coordinator thread created, id %lu\n",
2786 		os_thread_pf(os_thread_get_curr_id()));
2787 #endif /* UNIV_DEBUG_THREAD_CREATION */
2788 
2789 	slot = srv_reserve_slot(SRV_PURGE);
2790 
2791 	ulint	rseg_history_len = trx_sys->rseg_history_len;
2792 
2793 	do {
2794 		/* If there are no records to purge or the last
2795 		purge didn't purge any records then wait for activity. */
2796 
2797 		if (srv_shutdown_state == SRV_SHUTDOWN_NONE
2798 		    && (purge_sys->state == PURGE_STATE_STOP
2799 			|| n_total_purged == 0)) {
2800 
2801 			srv_purge_coordinator_suspend(slot, rseg_history_len);
2802 		}
2803 
2804 		if (srv_purge_should_exit(n_total_purged)) {
2805 			ut_a(!slot->suspended);
2806 			break;
2807 		}
2808 
2809 		n_total_purged = 0;
2810 
2811 		rseg_history_len = srv_do_purge(
2812 			srv_n_purge_threads, &n_total_purged);
2813 
2814 	} while (!srv_purge_should_exit(n_total_purged));
2815 
2816 	/* Ensure that we don't jump out of the loop unless the
2817 	exit condition is satisfied. */
2818 
2819 	ut_a(srv_purge_should_exit(n_total_purged));
2820 
2821 	ulint	n_pages_purged = ULINT_MAX;
2822 
2823 	/* Ensure that all records are purged if it is not a fast shutdown.
2824 	This covers the case where a record can be added after we exit the
2825 	loop above. */
2826 	while (srv_fast_shutdown == 0 && n_pages_purged > 0) {
2827 		n_pages_purged = trx_purge(1, srv_purge_batch_size, false);
2828 	}
2829 
2830 	/* This trx_purge is called to remove any undo records (added by
2831 	background threads) after completion of the above loop. When
2832 	srv_fast_shutdown != 0, a large batch size can cause significant
2833 	delay in shutdown ,so reducing the batch size to magic number 20
2834 	(which was default in 5.5), which we hope will be sufficient to
2835 	remove all the undo records */
2836 	const	uint temp_batch_size = 20;
2837 
2838 	n_pages_purged = trx_purge(1, srv_purge_batch_size <= temp_batch_size
2839 				      ? srv_purge_batch_size : temp_batch_size,
2840 				   true);
2841 	ut_a(n_pages_purged == 0 || srv_fast_shutdown != 0);
2842 
2843 	/* The task queue should always be empty, independent of fast
2844 	shutdown state. */
2845 	ut_a(srv_get_task_queue_length() == 0);
2846 
2847 	srv_free_slot(slot);
2848 
2849 	/* Note that we are shutting down. */
2850 	rw_lock_x_lock(&purge_sys->latch);
2851 
2852 	purge_sys->state = PURGE_STATE_EXIT;
2853 
2854 	purge_sys->running = false;
2855 
2856 	rw_lock_x_unlock(&purge_sys->latch);
2857 
2858 #ifdef UNIV_DEBUG_THREAD_CREATION
2859 	ut_print_timestamp(stderr);
2860 	fprintf(stderr, " InnoDB: Purge coordinator exiting, id %lu\n",
2861 		os_thread_pf(os_thread_get_curr_id()));
2862 #endif /* UNIV_DEBUG_THREAD_CREATION */
2863 
2864 	/* Ensure that all the worker threads quit. */
2865 	if (srv_n_purge_threads > 1) {
2866 		srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1);
2867 	}
2868 
2869 	my_thread_end();
2870 	/* We count the number of threads in os_thread_exit(). A created
2871 	thread should always use that to exit and not use return() to exit. */
2872 	os_thread_exit(NULL);
2873 
2874 	OS_THREAD_DUMMY_RETURN;	/* Not reached, avoid compiler warning */
2875 }
2876 
2877 /**********************************************************************//**
2878 Enqueues a task to server task queue and releases a worker thread, if there
2879 is a suspended one. */
2880 UNIV_INTERN
2881 void
srv_que_task_enqueue_low(que_thr_t * thr)2882 srv_que_task_enqueue_low(
2883 /*=====================*/
2884 	que_thr_t*	thr)	/*!< in: query thread */
2885 {
2886 	ut_ad(!srv_read_only_mode);
2887 	mutex_enter(&srv_sys->tasks_mutex);
2888 
2889 	UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
2890 
2891 	mutex_exit(&srv_sys->tasks_mutex);
2892 
2893 	srv_release_threads(SRV_WORKER, 1);
2894 }
2895 
2896 /**********************************************************************//**
2897 Get count of tasks in the queue.
2898 @return number of tasks in queue  */
2899 UNIV_INTERN
2900 ulint
srv_get_task_queue_length(void)2901 srv_get_task_queue_length(void)
2902 /*===========================*/
2903 {
2904 	ulint	n_tasks;
2905 
2906 	ut_ad(!srv_read_only_mode);
2907 
2908 	mutex_enter(&srv_sys->tasks_mutex);
2909 
2910 	n_tasks = UT_LIST_GET_LEN(srv_sys->tasks);
2911 
2912 	mutex_exit(&srv_sys->tasks_mutex);
2913 
2914 	return(n_tasks);
2915 }
2916 
2917 /**********************************************************************//**
2918 Wakeup the purge threads. */
2919 UNIV_INTERN
2920 void
srv_purge_wakeup(void)2921 srv_purge_wakeup(void)
2922 /*==================*/
2923 {
2924 	ut_ad(!srv_read_only_mode);
2925 
2926 	if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
2927 
2928 		srv_release_threads(SRV_PURGE, 1);
2929 
2930 		if (srv_n_purge_threads > 1) {
2931 			ulint	n_workers = srv_n_purge_threads - 1;
2932 
2933 			srv_release_threads(SRV_WORKER, n_workers);
2934 		}
2935 	}
2936 }
2937 
2938 /** Check whether given space id is undo tablespace id
2939 @param[in]	space_id	space id to check
2940 @return true if it is undo tablespace else false. */
2941 bool
srv_is_undo_tablespace(ulint space_id)2942 srv_is_undo_tablespace(
2943 	ulint	space_id)
2944 {
2945 	if (srv_undo_space_id_start == 0) {
2946 		return (false);
2947 	}
2948 
2949 	return(space_id >= srv_undo_space_id_start
2950 	       && space_id < (srv_undo_space_id_start
2951 			      + srv_undo_tablespaces_open));
2952 }
2953