1 /*****************************************************************************
2
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2008, 2009 Google Inc.
5 Copyright (c) 2009, Percona Inc.
6
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19
20 This program is free software; you can redistribute it and/or modify
21 it under the terms of the GNU General Public License, version 2.0,
22 as published by the Free Software Foundation.
23
24 This program is also distributed with certain software (including
25 but not limited to OpenSSL) that is licensed under separate terms,
26 as designated in a particular file or component or in included license
27 documentation. The authors of MySQL hereby grant you an additional
28 permission to link the program and your derivative works with the
29 separately licensed software that they have included with MySQL.
30
31 This program is distributed in the hope that it will be useful,
32 but WITHOUT ANY WARRANTY; without even the implied warranty of
33 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 GNU General Public License, version 2.0, for more details.
35
36 You should have received a copy of the GNU General Public License along with
37 this program; if not, write to the Free Software Foundation, Inc.,
38 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
39
40 *****************************************************************************/
41
42 /**************************************************//**
43 @file srv/srv0srv.cc
44 The database server main program
45
46 Created 10/8/1995 Heikki Tuuri
47 *******************************************************/
48
49 /* Dummy comment */
50 #include "srv0srv.h"
51
52 #include "ut0mem.h"
53 #include "ut0ut.h"
54 #include "os0proc.h"
55 #include "mem0mem.h"
56 #include "mem0pool.h"
57 #include "sync0sync.h"
58 #include "que0que.h"
59 #include "log0recv.h"
60 #include "pars0pars.h"
61 #include "usr0sess.h"
62 #include "lock0lock.h"
63 #include "trx0purge.h"
64 #include "ibuf0ibuf.h"
65 #include "buf0flu.h"
66 #include "buf0lru.h"
67 #include "btr0sea.h"
68 #include "dict0load.h"
69 #include "dict0boot.h"
70 #include "dict0stats_bg.h" /* dict_stats_event */
71 #include "srv0start.h"
72 #include "row0mysql.h"
73 #include "ha_prototypes.h"
74 #include "trx0i_s.h"
75 #include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
76 #include "srv0mon.h"
77 #include "ut0crc32.h"
78
79 #include "mysql/plugin.h"
80 #include "mysql/service_thd_wait.h"
81
82 #ifdef WITH_WSREP
83 extern int wsrep_debug;
84 extern int wsrep_trx_is_aborting(void *thd_ptr);
85 #endif
86 /* The following is the maximum allowed duration of a lock wait. */
87 UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
88
89 /* How much data manipulation language (DML) statements need to be delayed,
90 in microseconds, in order to reduce the lagging of the purge thread. */
91 UNIV_INTERN ulint srv_dml_needed_delay = 0;
92
93 UNIV_INTERN ibool srv_monitor_active = FALSE;
94 UNIV_INTERN ibool srv_error_monitor_active = FALSE;
95
96 UNIV_INTERN ibool srv_buf_dump_thread_active = FALSE;
97
98 UNIV_INTERN ibool srv_dict_stats_thread_active = FALSE;
99
100 UNIV_INTERN const char* srv_main_thread_op_info = "";
101
102 /** Prefix used by MySQL to indicate pre-5.1 table name encoding */
103 const char srv_mysql50_table_name_prefix[10] = "#mysql50#";
104
105 /* Server parameters which are read from the initfile */
106
107 /* The following three are dir paths which are catenated before file
108 names, where the file name itself may also contain a path */
109
110 UNIV_INTERN char* srv_data_home = NULL;
111
112 /** Rollback files directory, can be absolute. */
113 UNIV_INTERN char* srv_undo_dir = NULL;
114
115 /** The number of tablespaces to use for rollback segments. */
116 UNIV_INTERN ulong srv_undo_tablespaces = 8;
117
118 /** The number of UNDO tablespaces that are open and ready to use. */
119 UNIV_INTERN ulint srv_undo_tablespaces_open = 8;
120
121 /* The number of rollback segments to use */
122 UNIV_INTERN ulong srv_undo_logs = 1;
123
124 #ifdef UNIV_LOG_ARCHIVE
125 UNIV_INTERN char* srv_arch_dir = NULL;
126 #endif /* UNIV_LOG_ARCHIVE */
127
128 /** Set if InnoDB must operate in read-only mode. We don't do any
129 recovery and open all tables in RO mode instead of RW mode. We don't
130 sync the max trx id to disk either. */
131 UNIV_INTERN my_bool srv_read_only_mode;
132 /** store to its own file each table created by an user; data
133 dictionary tables are in the system tablespace 0 */
134 UNIV_INTERN my_bool srv_file_per_table;
135 /** The file format to use on new *.ibd files. */
136 UNIV_INTERN ulint srv_file_format = 0;
137 /** Whether to check file format during startup. A value of
138 UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
139 set it to the highest format we support. */
140 UNIV_INTERN ulint srv_max_file_format_at_startup = UNIV_FORMAT_MAX;
141 /** Set if InnoDB operates in read-only mode or innodb-force-recovery
142 is greater than SRV_FORCE_NO_TRX_UNDO. */
143 UNIV_INTERN my_bool high_level_read_only;
144
145 #if UNIV_FORMAT_A
146 # error "UNIV_FORMAT_A must be 0!"
147 #endif
148
149 /** Place locks to records only i.e. do not use next-key locking except
150 on duplicate key checking and foreign key checking */
151 UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
152 /** Sort buffer size in index creation */
153 UNIV_INTERN ulong srv_sort_buf_size = 1048576;
154 /** Maximum modification log file size for online index creation */
155 UNIV_INTERN unsigned long long srv_online_max_size;
156
157 /* If this flag is TRUE, then we will use the native aio of the
158 OS (provided we compiled Innobase with it in), otherwise we will
159 use simulated aio we build below with threads.
160 Currently we support native aio on windows and linux */
161 UNIV_INTERN my_bool srv_use_native_aio = TRUE;
162 UNIV_INTERN my_bool srv_numa_interleave = FALSE;
163
164 #ifdef __WIN__
165 /* Windows native condition variables. We use runtime loading / function
166 pointers, because they are not available on Windows Server 2003 and
167 Windows XP/2000.
168
169 We use condition for events on Windows if possible, even if os_event
170 resembles Windows kernel event object well API-wise. The reason is
171 performance, kernel objects are heavyweights and WaitForSingleObject() is a
172 performance killer causing calling thread to context switch. Besides, Innodb
173 is preallocating large number (often millions) of os_events. With kernel event
174 objects it takes a big chunk out of non-paged pool, which is better suited
175 for tasks like IO than for storing idle event objects. */
176 UNIV_INTERN ibool srv_use_native_conditions = FALSE;
177 #endif /* __WIN__ */
178
179 UNIV_INTERN ulint srv_n_data_files = 0;
180 UNIV_INTERN char** srv_data_file_names = NULL;
181 /* size in database pages */
182 UNIV_INTERN ulint* srv_data_file_sizes = NULL;
183
184 /* if TRUE, then we auto-extend the last data file */
185 UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
186 /* if != 0, this tells the max size auto-extending may increase the
187 last data file size */
188 UNIV_INTERN ulint srv_last_file_size_max = 0;
189 /* If the last data file is auto-extended, we add this
190 many pages to it at a time */
191 UNIV_INTERN ulong srv_auto_extend_increment = 8;
192 UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL;
193
194 /* If the following is TRUE we do not allow inserts etc. This protects
195 the user from forgetting the 'newraw' keyword to my.cnf */
196
197 UNIV_INTERN ibool srv_created_new_raw = FALSE;
198
199 UNIV_INTERN char* srv_log_group_home_dir = NULL;
200
201 UNIV_INTERN ulong srv_n_log_files = SRV_N_LOG_FILES_MAX;
202 /* size in database pages */
203 UNIV_INTERN ib_uint64_t srv_log_file_size = IB_UINT64_MAX;
204 UNIV_INTERN ib_uint64_t srv_log_file_size_requested;
205 /* size in database pages */
206 UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
207 UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
208 UNIV_INTERN uint srv_flush_log_at_timeout = 1;
209 UNIV_INTERN ulong srv_page_size = UNIV_PAGE_SIZE_DEF;
210 UNIV_INTERN ulong srv_page_size_shift = UNIV_PAGE_SIZE_SHIFT_DEF;
211
212 /* Try to flush dirty pages so as to avoid IO bursts at
213 the checkpoints. */
214 UNIV_INTERN char srv_adaptive_flushing = TRUE;
215
216 /** Maximum number of times allowed to conditionally acquire
217 mutex before switching to blocking wait on the mutex */
218 #define MAX_MUTEX_NOWAIT 20
219
220 /** Check whether the number of failed nonblocking mutex
221 acquisition attempts exceeds maximum allowed value. If so,
222 srv_printf_innodb_monitor() will request mutex acquisition
223 with mutex_enter(), which will wait until it gets the mutex. */
224 #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
225
226 #ifdef WITH_INNODB_DISALLOW_WRITES
227 UNIV_INTERN os_event_t srv_allow_writes_event;
228 #endif /* WITH_INNODB_DISALLOW_WRITES */
229
230 /** The sort order table of the MySQL latin1_swedish_ci character set
231 collation */
232 UNIV_INTERN const byte* srv_latin1_ordering;
233
234 /* use os/external memory allocator */
235 UNIV_INTERN my_bool srv_use_sys_malloc = TRUE;
236 /* requested size in kilobytes */
237 UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX;
238 /* requested number of buffer pool instances */
239 UNIV_INTERN ulint srv_buf_pool_instances = 1;
240 /* number of locks to protect buf_pool->page_hash */
241 UNIV_INTERN ulong srv_n_page_hash_locks = 16;
242 /** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
243 UNIV_INTERN ulong srv_LRU_scan_depth = 1024;
244 /** whether or not to flush neighbors of a block */
245 UNIV_INTERN ulong srv_flush_neighbors = 1;
246 /* previously requested size */
247 UNIV_INTERN ulint srv_buf_pool_old_size;
248 /* current size in kilobytes */
249 UNIV_INTERN ulint srv_buf_pool_curr_size = 0;
250 /* size in bytes */
251 UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
252 UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
253
254 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
255 instead. */
256 UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
257 UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
258 UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
259
260 /* Switch to enable random read ahead. */
261 UNIV_INTERN my_bool srv_random_read_ahead = FALSE;
262 /* User settable value of the number of pages that must be present
263 in the buffer cache and accessed sequentially for InnoDB to trigger a
264 readahead request. */
265 UNIV_INTERN ulong srv_read_ahead_threshold = 56;
266
267 #ifdef UNIV_LOG_ARCHIVE
268 UNIV_INTERN ibool srv_log_archive_on = FALSE;
269 UNIV_INTERN ibool srv_archive_recovery = 0;
270 UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn;
271 #endif /* UNIV_LOG_ARCHIVE */
272
273 /* This parameter is used to throttle the number of insert buffers that are
274 merged in a batch. By increasing this parameter on a faster disk you can
275 possibly reduce the number of I/O operations performed to complete the
276 merge operation. The value of this parameter is used as is by the
277 background loop when the system is idle (low load), on a busy system
278 the parameter is scaled down by a factor of 4, this is to avoid putting
279 a heavier load on the I/O sub system. */
280
281 UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
282
283 UNIV_INTERN char* srv_file_flush_method_str = NULL;
284 UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
285 UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
286
287 UNIV_INTERN ulint srv_max_n_open_files = 300;
288
289 /* Number of IO operations per second the server can do */
290 UNIV_INTERN ulong srv_io_capacity = 200;
291 UNIV_INTERN ulong srv_max_io_capacity = 400;
292
293 /* The InnoDB main thread tries to keep the ratio of modified pages
294 in the buffer pool to all database pages in the buffer pool smaller than
295 the following number. But it is not guaranteed that the value stays below
296 that during a time of heavy update/insert activity. */
297
298 UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
299 UNIV_INTERN ulong srv_max_dirty_pages_pct_lwm = 50;
300
301 /* This is the percentage of log capacity at which adaptive flushing,
302 if enabled, will kick in. */
303 UNIV_INTERN ulong srv_adaptive_flushing_lwm = 10;
304
305 /* Number of iterations over which adaptive flushing is averaged. */
306 UNIV_INTERN ulong srv_flushing_avg_loops = 30;
307
308 /* The number of purge threads to use.*/
309 UNIV_INTERN ulong srv_n_purge_threads = 1;
310
311 /* the number of pages to purge in one batch */
312 UNIV_INTERN ulong srv_purge_batch_size = 20;
313
314 /* Internal setting for "innodb_stats_method". Decides how InnoDB treats
315 NULL value when collecting statistics. By default, it is set to
316 SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
317 UNIV_INTERN ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
318
319 UNIV_INTERN srv_stats_t srv_stats;
320
321 /* structure to pass status variables to MySQL */
322 UNIV_INTERN export_var_t export_vars;
323
324 /** Normally 0. When nonzero, skip some phases of crash recovery,
325 starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered
326 by SELECT or mysqldump. When this is nonzero, we do not allow any user
327 modifications to the data. */
328 UNIV_INTERN ulong srv_force_recovery;
329 #ifndef DBUG_OFF
330 /** Inject a crash at different steps of the recovery process.
331 This is for testing and debugging only. */
332 UNIV_INTERN ulong srv_force_recovery_crash;
333 #endif /* !DBUG_OFF */
334
335 /** Print all user-level transactions deadlocks to mysqld stderr */
336
337 UNIV_INTERN my_bool srv_print_all_deadlocks = FALSE;
338
339 /** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */
340 UNIV_INTERN my_bool srv_cmp_per_index_enabled = FALSE;
341
342 /* If the following is set to 1 then we do not run purge and insert buffer
343 merge to completion before shutdown. If it is set to 2, do not even flush the
344 buffer pool to data files at the shutdown: we effectively 'crash'
345 InnoDB (but lose no committed transactions). */
346 UNIV_INTERN ulint srv_fast_shutdown = 0;
347
348 /* Generate a innodb_status.<pid> file */
349 UNIV_INTERN ibool srv_innodb_status = FALSE;
350
351 /* When estimating number of different key values in an index, sample
352 this many index pages, there are 2 ways to calculate statistics:
353 * persistent stats that are calculated by ANALYZE TABLE and saved
354 in the innodb database.
355 * quick transient stats, that are used if persistent stats for the given
356 table/index are not found in the innodb database */
357 UNIV_INTERN unsigned long long srv_stats_transient_sample_pages = 8;
358 UNIV_INTERN my_bool srv_stats_persistent = TRUE;
359 UNIV_INTERN my_bool srv_stats_include_delete_marked = FALSE;
360 UNIV_INTERN unsigned long long srv_stats_persistent_sample_pages = 20;
361 UNIV_INTERN my_bool srv_stats_auto_recalc = TRUE;
362
363 UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
364
365 /** doublewrite buffer is 1MB is size i.e.: it can hold 128 16K pages.
366 The following parameter is the size of the buffer that is used for
367 batch flushing i.e.: LRU flushing and flush_list flushing. The rest
368 of the pages are used for single page flushing. */
369 UNIV_INTERN ulong srv_doublewrite_batch_size = 120;
370
371 UNIV_INTERN ulong srv_replication_delay = 0;
372
373 /*-------------------------------------------*/
374 UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
375 UNIV_INTERN ulong srv_spin_wait_delay = 6;
376 UNIV_INTERN ibool srv_priority_boost = TRUE;
377
378 #ifdef UNIV_DEBUG
379 UNIV_INTERN ibool srv_print_thread_releases = FALSE;
380 UNIV_INTERN ibool srv_print_lock_waits = FALSE;
381 UNIV_INTERN ibool srv_print_buf_io = FALSE;
382 UNIV_INTERN ibool srv_print_log_io = FALSE;
383 UNIV_INTERN ibool srv_print_latch_waits = FALSE;
384 #endif /* UNIV_DEBUG */
385
386 static ulint srv_n_rows_inserted_old = 0;
387 static ulint srv_n_rows_updated_old = 0;
388 static ulint srv_n_rows_deleted_old = 0;
389 static ulint srv_n_rows_read_old = 0;
390
391 UNIV_INTERN ulint srv_truncated_status_writes = 0;
392 UNIV_INTERN ulint srv_available_undo_logs = 0;
393
394 /* Set the following to 0 if you want InnoDB to write messages on
395 stderr on startup/shutdown. */
396 UNIV_INTERN ibool srv_print_verbose_log = TRUE;
397 UNIV_INTERN my_bool srv_print_innodb_monitor = FALSE;
398 UNIV_INTERN my_bool srv_print_innodb_lock_monitor = FALSE;
399 UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
400 UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
401
402 /* Array of English strings describing the current state of an
403 i/o handler thread */
404
405 UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
406 UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
407
408 UNIV_INTERN time_t srv_last_monitor_time;
409
410 UNIV_INTERN ib_mutex_t srv_innodb_monitor_mutex;
411
412 /* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
413 UNIV_INTERN ib_mutex_t srv_monitor_file_mutex;
414
415 #ifdef UNIV_PFS_MUTEX
416 # ifndef HAVE_ATOMIC_BUILTINS
417 /* Key to register server_mutex with performance schema */
418 UNIV_INTERN mysql_pfs_key_t server_mutex_key;
419 # endif /* !HAVE_ATOMIC_BUILTINS */
420 /** Key to register srv_innodb_monitor_mutex with performance schema */
421 UNIV_INTERN mysql_pfs_key_t srv_innodb_monitor_mutex_key;
422 /** Key to register srv_monitor_file_mutex with performance schema */
423 UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key;
424 /** Key to register srv_dict_tmpfile_mutex with performance schema */
425 UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
426 /** Key to register the mutex with performance schema */
427 UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
428 /** Key to register srv_sys_t::mutex with performance schema */
429 UNIV_INTERN mysql_pfs_key_t srv_sys_mutex_key;
430 /** Key to register srv_sys_t::tasks_mutex with performance schema */
431 UNIV_INTERN mysql_pfs_key_t srv_sys_tasks_mutex_key;
432 #endif /* UNIV_PFS_MUTEX */
433
434 /** Temporary file for innodb monitor output */
435 UNIV_INTERN FILE* srv_monitor_file;
436 /** Mutex for locking srv_dict_tmpfile. Not created if srv_read_only_mode.
437 This mutex has a very high rank; threads reserving it should not
438 be holding any InnoDB latches. */
439 UNIV_INTERN ib_mutex_t srv_dict_tmpfile_mutex;
440 /** Temporary file for output from the data dictionary */
441 UNIV_INTERN FILE* srv_dict_tmpfile;
442 /** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode.
443 This mutex has a very low rank; threads reserving it should not
444 acquire any further latches or sleep before releasing this one. */
445 UNIV_INTERN ib_mutex_t srv_misc_tmpfile_mutex;
446 /** Temporary file for miscellanous diagnostic output */
447 UNIV_INTERN FILE* srv_misc_tmpfile;
448
449 UNIV_INTERN ulint srv_main_thread_process_no = 0;
450 UNIV_INTERN ulint srv_main_thread_id = 0;
451
452 /* The following counts are used by the srv_master_thread. */
453
454 /** Iterations of the loop bounded by 'srv_active' label. */
455 static ulint srv_main_active_loops = 0;
456 /** Iterations of the loop bounded by the 'srv_idle' label. */
457 static ulint srv_main_idle_loops = 0;
458 /** Iterations of the loop bounded by the 'srv_shutdown' label. */
459 static ulint srv_main_shutdown_loops = 0;
460 /** Log writes involving flush. */
461 static ulint srv_log_writes_and_flush = 0;
462
463 /* This is only ever touched by the master thread. It records the
464 time when the last flush of log file has happened. The master
465 thread ensures that we flush the log files at least once per
466 second. */
467 static time_t srv_last_log_flush_time;
468
469 /* Interval in seconds at which various tasks are performed by the
470 master thread when server is active. In order to balance the workload,
471 we should try to keep intervals such that they are not multiple of
472 each other. For example, if we have intervals for various tasks
473 defined as 5, 10, 15, 60 then all tasks will be performed when
474 current_time % 60 == 0 and no tasks will be performed when
475 current_time % 5 != 0. */
476
477 # define SRV_MASTER_CHECKPOINT_INTERVAL (7)
478 # define SRV_MASTER_PURGE_INTERVAL (10)
479 #ifdef MEM_PERIODIC_CHECK
480 # define SRV_MASTER_MEM_VALIDATE_INTERVAL (13)
481 #endif /* MEM_PERIODIC_CHECK */
482 # define SRV_MASTER_DICT_LRU_INTERVAL (47)
483
484 /** Acquire the system_mutex. */
485 #define srv_sys_mutex_enter() do { \
486 mutex_enter(&srv_sys->mutex); \
487 } while (0)
488
489 /** Test if the system mutex is owned. */
490 #define srv_sys_mutex_own() (mutex_own(&srv_sys->mutex) \
491 && !srv_read_only_mode)
492
493 /** Release the system mutex. */
494 #define srv_sys_mutex_exit() do { \
495 mutex_exit(&srv_sys->mutex); \
496 } while (0)
497
498 #define fetch_lock_wait_timeout(trx) \
499 ((trx)->lock.allowed_to_wait \
500 ? thd_lock_wait_timeout((trx)->mysql_thd) \
501 : 0)
502
503 /*
504 IMPLEMENTATION OF THE SERVER MAIN PROGRAM
505 =========================================
506
507 There is the following analogue between this database
508 server and an operating system kernel:
509
510 DB concept equivalent OS concept
511 ---------- ---------------------
512 transaction -- process;
513
514 query thread -- thread;
515
516 lock -- semaphore;
517
518 kernel -- kernel;
519
520 query thread execution:
521 (a) without lock mutex
522 reserved -- process executing in user mode;
523 (b) with lock mutex reserved
524 -- process executing in kernel mode;
525
526 The server has several backgroind threads all running at the same
527 priority as user threads. It periodically checks if here is anything
528 happening in the server which requires intervention of the master
529 thread. Such situations may be, for example, when flushing of dirty
530 blocks is needed in the buffer pool or old version of database rows
531 have to be cleaned away (purged). The user can configure a separate
532 dedicated purge thread(s) too, in which case the master thread does not
533 do any purging.
534
535 The threads which we call user threads serve the queries of the MySQL
536 server. They run at normal priority.
537
538 When there is no activity in the system, also the master thread
539 suspends itself to wait for an event making the server totally silent.
540
541 There is still one complication in our server design. If a
542 background utility thread obtains a resource (e.g., mutex) needed by a user
543 thread, and there is also some other user activity in the system,
544 the user thread may have to wait indefinitely long for the
545 resource, as the OS does not schedule a background thread if
546 there is some other runnable user thread. This problem is called
547 priority inversion in real-time programming.
548
549 One solution to the priority inversion problem would be to keep record
550 of which thread owns which resource and in the above case boost the
551 priority of the background thread so that it will be scheduled and it
552 can release the resource. This solution is called priority inheritance
553 in real-time programming. A drawback of this solution is that the overhead
554 of acquiring a mutex increases slightly, maybe 0.2 microseconds on a 100
555 MHz Pentium, because the thread has to call os_thread_get_curr_id. This may
556 be compared to 0.5 microsecond overhead for a mutex lock-unlock pair. Note
557 that the thread cannot store the information in the resource , say mutex,
558 itself, because competing threads could wipe out the information if it is
559 stored before acquiring the mutex, and if it stored afterwards, the
560 information is outdated for the time of one machine instruction, at least.
561 (To be precise, the information could be stored to lock_word in mutex if
562 the machine supports atomic swap.)
563
564 The above solution with priority inheritance may become actual in the
565 future, currently we do not implement any priority twiddling solution.
566 Our general aim is to reduce the contention of all mutexes by making
567 them more fine grained.
568
569 The thread table contains information of the current status of each
570 thread existing in the system, and also the event semaphores used in
571 suspending the master thread and utility threads when they have nothing
572 to do. The thread table can be seen as an analogue to the process table
573 in a traditional Unix implementation. */
574
575 /** The server system struct */
576 struct srv_sys_t{
577 ib_mutex_t tasks_mutex; /*!< variable protecting the
578 tasks queue */
579 UT_LIST_BASE_NODE_T(que_thr_t)
580 tasks; /*!< task queue */
581
582 ib_mutex_t mutex; /*!< variable protecting the
583 fields below. */
584 ulint n_sys_threads; /*!< size of the sys_threads
585 array */
586
587 srv_slot_t* sys_threads; /*!< server thread table */
588
589 ulint n_threads_active[SRV_MASTER + 1];
590 /*!< number of threads active
591 in a thread class */
592
593 srv_stats_t::ulint_ctr_1_t
594 activity_count; /*!< For tracking server
595 activity */
596 };
597
598 #ifndef HAVE_ATOMIC_BUILTINS
599 /** Mutex protecting some server global variables. */
600 UNIV_INTERN ib_mutex_t server_mutex;
601 #endif /* !HAVE_ATOMIC_BUILTINS */
602
603 static srv_sys_t* srv_sys = NULL;
604
605 /** Event to signal the monitor thread. */
606 UNIV_INTERN os_event_t srv_monitor_event;
607
608 /** Event to signal the error thread */
609 UNIV_INTERN os_event_t srv_error_event;
610
611 /** Event to signal the buffer pool dump/load thread */
612 UNIV_INTERN os_event_t srv_buf_dump_event;
613
614 /** The buffer pool dump/load file name */
615 UNIV_INTERN char* srv_buf_dump_filename;
616
617 /** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
618 and/or load it during startup. */
619 UNIV_INTERN char srv_buffer_pool_dump_at_shutdown = FALSE;
620 UNIV_INTERN char srv_buffer_pool_load_at_startup = FALSE;
621
622 /** Slot index in the srv_sys->sys_threads array for the purge thread. */
623 static const ulint SRV_PURGE_SLOT = 1;
624
625 /** Slot index in the srv_sys->sys_threads array for the master thread. */
626 static const ulint SRV_MASTER_SLOT = 0;
627
628 /*********************************************************************//**
629 Prints counters for work done by srv_master_thread. */
630 static
631 void
srv_print_master_thread_info(FILE * file)632 srv_print_master_thread_info(
633 /*=========================*/
634 FILE *file) /* in: output stream */
635 {
636 fprintf(file, "srv_master_thread loops: %lu srv_active, "
637 "%lu srv_shutdown, %lu srv_idle\n",
638 srv_main_active_loops,
639 srv_main_shutdown_loops,
640 srv_main_idle_loops);
641 fprintf(file, "srv_master_thread log flush and writes: %lu\n",
642 srv_log_writes_and_flush);
643 }
644
645 /*********************************************************************//**
646 Sets the info describing an i/o thread current state. */
647 UNIV_INTERN
648 void
srv_set_io_thread_op_info(ulint i,const char * str)649 srv_set_io_thread_op_info(
650 /*======================*/
651 ulint i, /*!< in: the 'segment' of the i/o thread */
652 const char* str) /*!< in: constant char string describing the
653 state */
654 {
655 ut_a(i < SRV_MAX_N_IO_THREADS);
656
657 srv_io_thread_op_info[i] = str;
658 }
659
660 /*********************************************************************//**
661 Resets the info describing an i/o thread current state. */
662 UNIV_INTERN
663 void
srv_reset_io_thread_op_info()664 srv_reset_io_thread_op_info()
665 /*=========================*/
666 {
667 for (ulint i = 0; i < UT_ARR_SIZE(srv_io_thread_op_info); ++i) {
668 srv_io_thread_op_info[i] = "not started yet";
669 }
670 }
671
672 #ifdef UNIV_DEBUG
673 /*********************************************************************//**
674 Validates the type of a thread table slot.
675 @return TRUE if ok */
676 static
677 ibool
srv_thread_type_validate(srv_thread_type type)678 srv_thread_type_validate(
679 /*=====================*/
680 srv_thread_type type) /*!< in: thread type */
681 {
682 switch (type) {
683 case SRV_NONE:
684 break;
685 case SRV_WORKER:
686 case SRV_PURGE:
687 case SRV_MASTER:
688 return(TRUE);
689 }
690 ut_error;
691 return(FALSE);
692 }
693 #endif /* UNIV_DEBUG */
694
695 /*********************************************************************//**
696 Gets the type of a thread table slot.
697 @return thread type */
698 static
699 srv_thread_type
srv_slot_get_type(const srv_slot_t * slot)700 srv_slot_get_type(
701 /*==============*/
702 const srv_slot_t* slot) /*!< in: thread slot */
703 {
704 srv_thread_type type = slot->type;
705 ut_ad(srv_thread_type_validate(type));
706 return(type);
707 }
708
709 /*********************************************************************//**
710 Reserves a slot in the thread table for the current thread.
711 @return reserved slot */
712 static
713 srv_slot_t*
srv_reserve_slot(srv_thread_type type)714 srv_reserve_slot(
715 /*=============*/
716 srv_thread_type type) /*!< in: type of the thread */
717 {
718 srv_slot_t* slot = 0;
719
720 srv_sys_mutex_enter();
721
722 ut_ad(srv_thread_type_validate(type));
723
724 switch (type) {
725 case SRV_MASTER:
726 slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
727 break;
728
729 case SRV_PURGE:
730 slot = &srv_sys->sys_threads[SRV_PURGE_SLOT];
731 break;
732
733 case SRV_WORKER:
734 /* Find an empty slot, skip the master and purge slots. */
735 for (slot = &srv_sys->sys_threads[2];
736 slot->in_use;
737 ++slot) {
738
739 ut_a(slot < &srv_sys->sys_threads[
740 srv_sys->n_sys_threads]);
741 }
742 break;
743
744 case SRV_NONE:
745 ut_error;
746 }
747
748 ut_a(!slot->in_use);
749
750 slot->in_use = TRUE;
751 slot->suspended = FALSE;
752 slot->type = type;
753
754 ut_ad(srv_slot_get_type(slot) == type);
755
756 ++srv_sys->n_threads_active[type];
757
758 srv_sys_mutex_exit();
759
760 return(slot);
761 }
762
763 /*********************************************************************//**
764 Suspends the calling thread to wait for the event in its thread slot.
765 @return the current signal count of the event. */
766 static
767 ib_int64_t
srv_suspend_thread_low(srv_slot_t * slot)768 srv_suspend_thread_low(
769 /*===================*/
770 srv_slot_t* slot) /*!< in/out: thread slot */
771 {
772
773 ut_ad(!srv_read_only_mode);
774 ut_ad(srv_sys_mutex_own());
775
776 ut_ad(slot->in_use);
777
778 srv_thread_type type = srv_slot_get_type(slot);
779
780 switch (type) {
781 case SRV_NONE:
782 ut_error;
783
784 case SRV_MASTER:
785 /* We have only one master thread and it
786 should be the first entry always. */
787 ut_a(srv_sys->n_threads_active[type] == 1);
788 break;
789
790 case SRV_PURGE:
791 /* We have only one purge coordinator thread
792 and it should be the second entry always. */
793 ut_a(srv_sys->n_threads_active[type] == 1);
794 break;
795
796 case SRV_WORKER:
797 ut_a(srv_n_purge_threads > 1);
798 ut_a(srv_sys->n_threads_active[type] > 0);
799 break;
800 }
801
802 ut_a(!slot->suspended);
803 slot->suspended = TRUE;
804
805 ut_a(srv_sys->n_threads_active[type] > 0);
806
807 srv_sys->n_threads_active[type]--;
808
809 return(os_event_reset(slot->event));
810 }
811
812 /*********************************************************************//**
813 Suspends the calling thread to wait for the event in its thread slot.
814 @return the current signal count of the event. */
815 static
816 ib_int64_t
srv_suspend_thread(srv_slot_t * slot)817 srv_suspend_thread(
818 /*===============*/
819 srv_slot_t* slot) /*!< in/out: thread slot */
820 {
821 srv_sys_mutex_enter();
822
823 ib_int64_t sig_count = srv_suspend_thread_low(slot);
824
825 srv_sys_mutex_exit();
826
827 return(sig_count);
828 }
829
830 /*********************************************************************//**
831 Releases threads of the type given from suspension in the thread table.
832 NOTE! The server mutex has to be reserved by the caller!
833 @return number of threads released: this may be less than n if not
834 enough threads were suspended at the moment. */
835 UNIV_INTERN
836 ulint
srv_release_threads(srv_thread_type type,ulint n)837 srv_release_threads(
838 /*================*/
839 srv_thread_type type, /*!< in: thread type */
840 ulint n) /*!< in: number of threads to release */
841 {
842 ulint i;
843 ulint count = 0;
844
845 ut_ad(srv_thread_type_validate(type));
846 ut_ad(n > 0);
847
848 srv_sys_mutex_enter();
849
850 for (i = 0; i < srv_sys->n_sys_threads; i++) {
851 srv_slot_t* slot;
852
853 slot = &srv_sys->sys_threads[i];
854
855 if (slot->in_use
856 && srv_slot_get_type(slot) == type
857 && slot->suspended) {
858
859 switch (type) {
860 case SRV_NONE:
861 ut_error;
862
863 case SRV_MASTER:
864 /* We have only one master thread and it
865 should be the first entry always. */
866 ut_a(n == 1);
867 ut_a(i == SRV_MASTER_SLOT);
868 ut_a(srv_sys->n_threads_active[type] == 0);
869 break;
870
871 case SRV_PURGE:
872 /* We have only one purge coordinator thread
873 and it should be the second entry always. */
874 ut_a(n == 1);
875 ut_a(i == SRV_PURGE_SLOT);
876 ut_a(srv_n_purge_threads > 0);
877 ut_a(srv_sys->n_threads_active[type] == 0);
878 break;
879
880 case SRV_WORKER:
881 ut_a(srv_n_purge_threads > 1);
882 ut_a(srv_sys->n_threads_active[type]
883 < srv_n_purge_threads - 1);
884 break;
885 }
886
887 slot->suspended = FALSE;
888
889 ++srv_sys->n_threads_active[type];
890
891 os_event_set(slot->event);
892
893 if (++count == n) {
894 break;
895 }
896 }
897 }
898
899 srv_sys_mutex_exit();
900
901 return(count);
902 }
903
904 /*********************************************************************//**
905 Release a thread's slot. */
906 static
907 void
srv_free_slot(srv_slot_t * slot)908 srv_free_slot(
909 /*==========*/
910 srv_slot_t* slot) /*!< in/out: thread slot */
911 {
912 srv_sys_mutex_enter();
913
914 if (!slot->suspended) {
915 /* Mark the thread as inactive. */
916 srv_suspend_thread_low(slot);
917 }
918
919 /* Free the slot for reuse. */
920 ut_ad(slot->in_use);
921 slot->in_use = FALSE;
922
923 srv_sys_mutex_exit();
924 }
925
926 /*********************************************************************//**
927 Initializes the server. */
928 UNIV_INTERN
929 void
srv_init(void)930 srv_init(void)
931 /*==========*/
932 {
933 ulint n_sys_threads = 0;
934 ulint srv_sys_sz = sizeof(*srv_sys);
935
936 #ifndef HAVE_ATOMIC_BUILTINS
937 mutex_create(server_mutex_key, &server_mutex, SYNC_ANY_LATCH);
938 #endif /* !HAVE_ATOMIC_BUILTINS */
939
940 mutex_create(srv_innodb_monitor_mutex_key,
941 &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
942
943 if (!srv_read_only_mode) {
944
945 /* Number of purge threads + master thread */
946 n_sys_threads = srv_n_purge_threads + 1;
947
948 srv_sys_sz += n_sys_threads * sizeof(*srv_sys->sys_threads);
949 }
950
951 srv_sys = static_cast<srv_sys_t*>(mem_zalloc(srv_sys_sz));
952
953 srv_sys->n_sys_threads = n_sys_threads;
954
955 if (!srv_read_only_mode) {
956
957 mutex_create(srv_sys_mutex_key, &srv_sys->mutex, SYNC_THREADS);
958
959 mutex_create(srv_sys_tasks_mutex_key,
960 &srv_sys->tasks_mutex, SYNC_ANY_LATCH);
961
962 srv_sys->sys_threads = (srv_slot_t*) &srv_sys[1];
963
964 for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
965 srv_slot_t* slot = &srv_sys->sys_threads[i];
966
967 slot->event = os_event_create();
968
969 ut_a(slot->event);
970 }
971
972 srv_error_event = os_event_create();
973
974 srv_monitor_event = os_event_create();
975
976 srv_buf_dump_event = os_event_create();
977
978 UT_LIST_INIT(srv_sys->tasks);
979 }
980
981 /* page_zip_stat_per_index_mutex is acquired from:
982 1. page_zip_compress() (after SYNC_FSP)
983 2. page_zip_decompress()
984 3. i_s_cmp_per_index_fill_low() (where SYNC_DICT is acquired)
985 4. innodb_cmp_per_index_update(), no other latches
986 since we do not acquire any other latches while holding this mutex,
987 it can have very low level. We pick SYNC_ANY_LATCH for it. */
988
989 mutex_create(
990 page_zip_stat_per_index_mutex_key,
991 &page_zip_stat_per_index_mutex, SYNC_ANY_LATCH);
992
993 /* Create dummy indexes for infimum and supremum records */
994
995 dict_ind_init();
996
997 srv_conc_init();
998
999 #ifdef WITH_INNODB_DISALLOW_WRITES
1000 /* Writes have to be enabled on init or else we hang. Thus, we
1001 always set the event here regardless of innobase_disallow_writes.
1002 That flag will always be 0 at this point because it isn't settable
1003 via my.cnf or command line arg. */
1004 srv_allow_writes_event = os_event_create();
1005 os_event_set(srv_allow_writes_event);
1006 #endif /* WITH_INNODB_DISALLOW_WRITES */
1007 /* Initialize some INFORMATION SCHEMA internal structures */
1008 trx_i_s_cache_init(trx_i_s_cache);
1009
1010 ut_crc32_init();
1011
1012 dict_mem_init();
1013 }
1014
1015 /*********************************************************************//**
1016 Frees the data structures created in srv_init(). */
1017 UNIV_INTERN
1018 void
srv_free(void)1019 srv_free(void)
1020 /*==========*/
1021 {
1022 srv_conc_free();
1023
1024 /* The mutexes srv_sys->mutex and srv_sys->tasks_mutex should have
1025 been freed by sync_close() already. */
1026 mem_free(srv_sys);
1027 srv_sys = NULL;
1028
1029 trx_i_s_cache_free(trx_i_s_cache);
1030
1031 if (!srv_read_only_mode) {
1032 os_event_free(srv_buf_dump_event);
1033 srv_buf_dump_event = NULL;
1034 }
1035 }
1036
1037 /*********************************************************************//**
1038 Initializes the synchronization primitives, memory system, and the thread
1039 local storage. */
1040 UNIV_INTERN
1041 void
srv_general_init(void)1042 srv_general_init(void)
1043 /*==================*/
1044 {
1045 ut_mem_init();
1046 /* Reset the system variables in the recovery module. */
1047 recv_sys_var_init();
1048 os_sync_init();
1049 sync_init();
1050 mem_init(srv_mem_pool_size);
1051 que_init();
1052 row_mysql_init();
1053 }
1054
1055 /*********************************************************************//**
1056 Normalizes init parameter values to use units we use inside InnoDB. */
1057 static
1058 void
srv_normalize_init_values(void)1059 srv_normalize_init_values(void)
1060 /*===========================*/
1061 {
1062 ulint n;
1063 ulint i;
1064
1065 n = srv_n_data_files;
1066
1067 for (i = 0; i < n; i++) {
1068 srv_data_file_sizes[i] = srv_data_file_sizes[i]
1069 * ((1024 * 1024) / UNIV_PAGE_SIZE);
1070 }
1071
1072 srv_last_file_size_max = srv_last_file_size_max
1073 * ((1024 * 1024) / UNIV_PAGE_SIZE);
1074
1075 srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
1076
1077 srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
1078
1079 srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
1080 }
1081
1082 /*********************************************************************//**
1083 Boots the InnoDB server. */
1084 UNIV_INTERN
1085 void
srv_boot(void)1086 srv_boot(void)
1087 /*==========*/
1088 {
1089 /* Transform the init parameter values given by MySQL to
1090 use units we use inside InnoDB: */
1091
1092 srv_normalize_init_values();
1093
1094 /* Initialize synchronization primitives, memory management, and thread
1095 local storage */
1096
1097 srv_general_init();
1098
1099 /* Initialize this module */
1100
1101 srv_init();
1102 srv_mon_create();
1103 }
1104
1105 /******************************************************************//**
1106 Refreshes the values used to calculate per-second averages. */
1107 static
1108 void
srv_refresh_innodb_monitor_stats(void)1109 srv_refresh_innodb_monitor_stats(void)
1110 /*==================================*/
1111 {
1112 mutex_enter(&srv_innodb_monitor_mutex);
1113
1114 srv_last_monitor_time = time(NULL);
1115
1116 os_aio_refresh_stats();
1117
1118 btr_cur_n_sea_old = btr_cur_n_sea;
1119 btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1120
1121 log_refresh_stats();
1122
1123 buf_refresh_io_stats_all();
1124
1125 srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
1126 srv_n_rows_updated_old = srv_stats.n_rows_updated;
1127 srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
1128 srv_n_rows_read_old = srv_stats.n_rows_read;
1129
1130 mutex_exit(&srv_innodb_monitor_mutex);
1131 }
1132
1133 /******************************************************************//**
1134 Outputs to a file the output of the InnoDB Monitor.
1135 @return FALSE if not all information printed
1136 due to failure to obtain necessary mutex */
1137 UNIV_INTERN
1138 ibool
srv_printf_innodb_monitor(FILE * file,ibool nowait,ulint * trx_start_pos,ulint * trx_end)1139 srv_printf_innodb_monitor(
1140 /*======================*/
1141 FILE* file, /*!< in: output stream */
1142 ibool nowait, /*!< in: whether to wait for the
1143 lock_sys_t:: mutex */
1144 ulint* trx_start_pos, /*!< out: file position of the start of
1145 the list of active transactions */
1146 ulint* trx_end) /*!< out: file position of the end of
1147 the list of active transactions */
1148 {
1149 double time_elapsed;
1150 time_t current_time;
1151 ulint n_reserved;
1152 ibool ret;
1153
1154 mutex_enter(&srv_innodb_monitor_mutex);
1155
1156 current_time = time(NULL);
1157
1158 /* We add 0.001 seconds to time_elapsed to prevent division
1159 by zero if two users happen to call SHOW ENGINE INNODB STATUS at the
1160 same time */
1161
1162 time_elapsed = difftime(current_time, srv_last_monitor_time)
1163 + 0.001;
1164
1165 srv_last_monitor_time = time(NULL);
1166
1167 fputs("\n=====================================\n", file);
1168
1169 ut_print_timestamp(file);
1170 fprintf(file,
1171 " INNODB MONITOR OUTPUT\n"
1172 "=====================================\n"
1173 "Per second averages calculated from the last %lu seconds\n",
1174 (ulong) time_elapsed);
1175
1176 fputs("-----------------\n"
1177 "BACKGROUND THREAD\n"
1178 "-----------------\n", file);
1179 srv_print_master_thread_info(file);
1180
1181 fputs("----------\n"
1182 "SEMAPHORES\n"
1183 "----------\n", file);
1184 sync_print(file);
1185
1186 /* Conceptually, srv_innodb_monitor_mutex has a very high latching
1187 order level in sync0sync.h, while dict_foreign_err_mutex has a very
1188 low level 135. Therefore we can reserve the latter mutex here without
1189 a danger of a deadlock of threads. */
1190
1191 mutex_enter(&dict_foreign_err_mutex);
1192
1193 if (!srv_read_only_mode && ftell(dict_foreign_err_file) != 0L) {
1194 fputs("------------------------\n"
1195 "LATEST FOREIGN KEY ERROR\n"
1196 "------------------------\n", file);
1197 ut_copy_file(file, dict_foreign_err_file);
1198 }
1199
1200 mutex_exit(&dict_foreign_err_mutex);
1201
1202 /* Only if lock_print_info_summary proceeds correctly,
1203 before we call the lock_print_info_all_transactions
1204 to print all the lock information. IMPORTANT NOTE: This
1205 function acquires the lock mutex on success. */
1206 ret = lock_print_info_summary(file, nowait);
1207
1208 if (ret) {
1209 if (trx_start_pos) {
1210 long t = ftell(file);
1211 if (t < 0) {
1212 *trx_start_pos = ULINT_UNDEFINED;
1213 } else {
1214 *trx_start_pos = (ulint) t;
1215 }
1216 }
1217
1218 /* NOTE: If we get here then we have the lock mutex. This
1219 function will release the lock mutex that we acquired when
1220 we called the lock_print_info_summary() function earlier. */
1221
1222 lock_print_info_all_transactions(file);
1223
1224 if (trx_end) {
1225 long t = ftell(file);
1226 if (t < 0) {
1227 *trx_end = ULINT_UNDEFINED;
1228 } else {
1229 *trx_end = (ulint) t;
1230 }
1231 }
1232 }
1233
1234 fputs("--------\n"
1235 "FILE I/O\n"
1236 "--------\n", file);
1237 os_aio_print(file);
1238
1239 fputs("-------------------------------------\n"
1240 "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1241 "-------------------------------------\n", file);
1242 ibuf_print(file);
1243
1244 ha_print_info(file, btr_search_sys->hash_index);
1245
1246 fprintf(file,
1247 "%.2f hash searches/s, %.2f non-hash searches/s\n",
1248 (btr_cur_n_sea - btr_cur_n_sea_old)
1249 / time_elapsed,
1250 (btr_cur_n_non_sea - btr_cur_n_non_sea_old)
1251 / time_elapsed);
1252 btr_cur_n_sea_old = btr_cur_n_sea;
1253 btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1254
1255 fputs("---\n"
1256 "LOG\n"
1257 "---\n", file);
1258 log_print(file);
1259
1260 fputs("----------------------\n"
1261 "BUFFER POOL AND MEMORY\n"
1262 "----------------------\n", file);
1263 fprintf(file,
1264 "Total memory allocated " ULINTPF
1265 "; in additional pool allocated " ULINTPF "\n",
1266 ut_total_allocated_memory,
1267 mem_pool_get_reserved(mem_comm_pool));
1268 fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
1269 dict_sys->size);
1270
1271 buf_print_io(file);
1272
1273 fputs("--------------\n"
1274 "ROW OPERATIONS\n"
1275 "--------------\n", file);
1276 fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
1277 (long) srv_conc_get_active_threads(),
1278 srv_conc_get_waiting_threads());
1279
1280 /* This is a dirty read, without holding trx_sys->mutex. */
1281 fprintf(file, "%lu read views open inside InnoDB\n",
1282 UT_LIST_GET_LEN(trx_sys->view_list));
1283
1284 n_reserved = fil_space_get_n_reserved_extents(0);
1285 if (n_reserved > 0) {
1286 fprintf(file,
1287 "%lu tablespace extents now reserved for"
1288 " B-tree split operations\n",
1289 (ulong) n_reserved);
1290 }
1291
1292 #ifdef UNIV_LINUX
1293 fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
1294 (ulong) srv_main_thread_process_no,
1295 (ulong) srv_main_thread_id,
1296 srv_main_thread_op_info);
1297 #else
1298 fprintf(file, "Main thread id %lu, state: %s\n",
1299 (ulong) srv_main_thread_id,
1300 srv_main_thread_op_info);
1301 #endif
1302 fprintf(file,
1303 "Number of rows inserted " ULINTPF
1304 ", updated " ULINTPF ", deleted " ULINTPF
1305 ", read " ULINTPF "\n",
1306 (ulint) srv_stats.n_rows_inserted,
1307 (ulint) srv_stats.n_rows_updated,
1308 (ulint) srv_stats.n_rows_deleted,
1309 (ulint) srv_stats.n_rows_read);
1310 fprintf(file,
1311 "%.2f inserts/s, %.2f updates/s,"
1312 " %.2f deletes/s, %.2f reads/s\n",
1313 ((ulint) srv_stats.n_rows_inserted - srv_n_rows_inserted_old)
1314 / time_elapsed,
1315 ((ulint) srv_stats.n_rows_updated - srv_n_rows_updated_old)
1316 / time_elapsed,
1317 ((ulint) srv_stats.n_rows_deleted - srv_n_rows_deleted_old)
1318 / time_elapsed,
1319 ((ulint) srv_stats.n_rows_read - srv_n_rows_read_old)
1320 / time_elapsed);
1321
1322 srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
1323 srv_n_rows_updated_old = srv_stats.n_rows_updated;
1324 srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
1325 srv_n_rows_read_old = srv_stats.n_rows_read;
1326
1327 fputs("----------------------------\n"
1328 "END OF INNODB MONITOR OUTPUT\n"
1329 "============================\n", file);
1330 mutex_exit(&srv_innodb_monitor_mutex);
1331 fflush(file);
1332
1333 return(ret);
1334 }
1335
1336 /******************************************************************//**
1337 Function to pass InnoDB status variables to MySQL */
1338 UNIV_INTERN
1339 void
srv_export_innodb_status(void)1340 srv_export_innodb_status(void)
1341 /*==========================*/
1342 {
1343 buf_pool_stat_t stat;
1344 buf_pools_list_size_t buf_pools_list_size;
1345 ulint LRU_len;
1346 ulint free_len;
1347 ulint flush_list_len;
1348
1349 buf_get_total_stat(&stat);
1350 buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
1351 buf_get_total_list_size_in_bytes(&buf_pools_list_size);
1352
1353 mutex_enter(&srv_innodb_monitor_mutex);
1354
1355 export_vars.innodb_data_pending_reads =
1356 os_n_pending_reads;
1357
1358 export_vars.innodb_data_pending_writes =
1359 os_n_pending_writes;
1360
1361 export_vars.innodb_data_pending_fsyncs =
1362 fil_n_pending_log_flushes
1363 + fil_n_pending_tablespace_flushes;
1364
1365 export_vars.innodb_data_fsyncs = os_n_fsyncs;
1366
1367 export_vars.innodb_data_read = srv_stats.data_read;
1368
1369 export_vars.innodb_data_reads = os_n_file_reads;
1370
1371 export_vars.innodb_data_writes = os_n_file_writes;
1372
1373 export_vars.innodb_data_written = srv_stats.data_written;
1374
1375 export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
1376
1377 export_vars.innodb_buffer_pool_write_requests =
1378 srv_stats.buf_pool_write_requests;
1379
1380 export_vars.innodb_buffer_pool_wait_free =
1381 srv_stats.buf_pool_wait_free;
1382
1383 export_vars.innodb_buffer_pool_pages_flushed =
1384 srv_stats.buf_pool_flushed;
1385
1386 export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads;
1387
1388 export_vars.innodb_buffer_pool_read_ahead_rnd =
1389 stat.n_ra_pages_read_rnd;
1390
1391 export_vars.innodb_buffer_pool_read_ahead =
1392 stat.n_ra_pages_read;
1393
1394 export_vars.innodb_buffer_pool_read_ahead_evicted =
1395 stat.n_ra_pages_evicted;
1396
1397 export_vars.innodb_buffer_pool_pages_data = LRU_len;
1398
1399 export_vars.innodb_buffer_pool_bytes_data =
1400 buf_pools_list_size.LRU_bytes
1401 + buf_pools_list_size.unzip_LRU_bytes;
1402
1403 export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
1404
1405 export_vars.innodb_buffer_pool_bytes_dirty =
1406 buf_pools_list_size.flush_list_bytes;
1407
1408 export_vars.innodb_buffer_pool_pages_free = free_len;
1409
1410 #ifdef UNIV_DEBUG
1411 export_vars.innodb_buffer_pool_pages_latched =
1412 buf_get_latched_pages_number();
1413 #endif /* UNIV_DEBUG */
1414 export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
1415
1416 export_vars.innodb_buffer_pool_pages_misc =
1417 buf_pool_get_n_pages() - LRU_len - free_len;
1418
1419 #ifdef HAVE_ATOMIC_BUILTINS
1420 export_vars.innodb_have_atomic_builtins = 1;
1421 #else
1422 export_vars.innodb_have_atomic_builtins = 0;
1423 #endif
1424 export_vars.innodb_page_size = UNIV_PAGE_SIZE;
1425
1426 export_vars.innodb_log_waits = srv_stats.log_waits;
1427
1428 export_vars.innodb_os_log_written = srv_stats.os_log_written;
1429
1430 export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
1431
1432 export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
1433
1434 export_vars.innodb_os_log_pending_writes =
1435 srv_stats.os_log_pending_writes;
1436
1437 export_vars.innodb_log_write_requests = srv_stats.log_write_requests;
1438
1439 export_vars.innodb_log_writes = srv_stats.log_writes;
1440
1441 export_vars.innodb_dblwr_pages_written =
1442 srv_stats.dblwr_pages_written;
1443
1444 export_vars.innodb_dblwr_writes = srv_stats.dblwr_writes;
1445
1446 export_vars.innodb_pages_created = stat.n_pages_created;
1447
1448 export_vars.innodb_pages_read = stat.n_pages_read;
1449
1450 export_vars.innodb_pages_written = stat.n_pages_written;
1451
1452 export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count;
1453
1454 export_vars.innodb_row_lock_current_waits =
1455 srv_stats.n_lock_wait_current_count;
1456
1457 export_vars.innodb_row_lock_time = srv_stats.n_lock_wait_time / 1000;
1458
1459 if (srv_stats.n_lock_wait_count > 0) {
1460
1461 export_vars.innodb_row_lock_time_avg = (ulint)
1462 (srv_stats.n_lock_wait_time
1463 / 1000 / srv_stats.n_lock_wait_count);
1464
1465 } else {
1466 export_vars.innodb_row_lock_time_avg = 0;
1467 }
1468
1469 export_vars.innodb_row_lock_time_max =
1470 lock_sys->n_lock_max_wait_time / 1000;
1471
1472 export_vars.innodb_rows_read = srv_stats.n_rows_read;
1473
1474 export_vars.innodb_rows_inserted = srv_stats.n_rows_inserted;
1475
1476 export_vars.innodb_rows_updated = srv_stats.n_rows_updated;
1477
1478 export_vars.innodb_rows_deleted = srv_stats.n_rows_deleted;
1479
1480 export_vars.innodb_num_open_files = fil_n_file_opened;
1481
1482 export_vars.innodb_truncated_status_writes =
1483 srv_truncated_status_writes;
1484
1485 export_vars.innodb_available_undo_logs = srv_available_undo_logs;
1486
1487 #ifdef UNIV_DEBUG
1488 rw_lock_s_lock(&purge_sys->latch);
1489 trx_id_t done_trx_no = purge_sys->done.trx_no;
1490 trx_id_t up_limit_id = purge_sys->view
1491 ? purge_sys->view->up_limit_id
1492 : 0;
1493 rw_lock_s_unlock(&purge_sys->latch);
1494
1495 mutex_enter(&trx_sys->mutex);
1496 trx_id_t max_trx_id = trx_sys->rw_max_trx_id;
1497 mutex_exit(&trx_sys->mutex);
1498
1499 if (!done_trx_no || max_trx_id < done_trx_no - 1) {
1500 export_vars.innodb_purge_trx_id_age = 0;
1501 } else {
1502 export_vars.innodb_purge_trx_id_age =
1503 (ulint) (max_trx_id - done_trx_no + 1);
1504 }
1505
1506 if (!up_limit_id
1507 || max_trx_id < up_limit_id) {
1508 export_vars.innodb_purge_view_trx_id_age = 0;
1509 } else {
1510 export_vars.innodb_purge_view_trx_id_age =
1511 (ulint) (max_trx_id - up_limit_id);
1512 }
1513 #endif /* UNIV_DEBUG */
1514
1515 mutex_exit(&srv_innodb_monitor_mutex);
1516 }
1517
1518 /*********************************************************************//**
1519 A thread which prints the info output by various InnoDB monitors.
1520 @return a dummy parameter */
1521 extern "C" UNIV_INTERN
1522 os_thread_ret_t
DECLARE_THREAD(srv_monitor_thread)1523 DECLARE_THREAD(srv_monitor_thread)(
1524 /*===============================*/
1525 void* arg MY_ATTRIBUTE((unused)))
1526 /*!< in: a dummy parameter required by
1527 os_thread_create */
1528 {
1529 ib_int64_t sig_count;
1530 double time_elapsed;
1531 time_t current_time;
1532 time_t last_table_monitor_time;
1533 time_t last_tablespace_monitor_time;
1534 time_t last_monitor_time;
1535 ulint mutex_skipped;
1536 ibool last_srv_print_monitor;
1537
1538 ut_ad(!srv_read_only_mode);
1539
1540 #ifdef UNIV_DEBUG_THREAD_CREATION
1541 fprintf(stderr, "Lock timeout thread starts, id %lu\n",
1542 os_thread_pf(os_thread_get_curr_id()));
1543 #endif /* UNIV_DEBUG_THREAD_CREATION */
1544
1545 #ifdef UNIV_PFS_THREAD
1546 pfs_register_thread(srv_monitor_thread_key);
1547 #endif /* UNIV_PFS_THREAD */
1548 srv_monitor_active = TRUE;
1549
1550 UT_NOT_USED(arg);
1551 srv_last_monitor_time = ut_time();
1552 last_table_monitor_time = ut_time();
1553 last_tablespace_monitor_time = ut_time();
1554 last_monitor_time = ut_time();
1555 mutex_skipped = 0;
1556 last_srv_print_monitor = srv_print_innodb_monitor;
1557 loop:
1558 /* Wake up every 5 seconds to see if we need to print
1559 monitor information or if signalled at shutdown. */
1560
1561 sig_count = os_event_reset(srv_monitor_event);
1562
1563 os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
1564
1565 current_time = ut_time();
1566
1567 time_elapsed = difftime(current_time, last_monitor_time);
1568
1569 if (time_elapsed > 15) {
1570 last_monitor_time = ut_time();
1571
1572 if (srv_print_innodb_monitor) {
1573 /* Reset mutex_skipped counter everytime
1574 srv_print_innodb_monitor changes. This is to
1575 ensure we will not be blocked by lock_sys->mutex
1576 for short duration information printing,
1577 such as requested by sync_array_print_long_waits() */
1578 if (!last_srv_print_monitor) {
1579 mutex_skipped = 0;
1580 last_srv_print_monitor = TRUE;
1581 }
1582
1583 if (!srv_printf_innodb_monitor(stderr,
1584 MUTEX_NOWAIT(mutex_skipped),
1585 NULL, NULL)) {
1586 mutex_skipped++;
1587 } else {
1588 /* Reset the counter */
1589 mutex_skipped = 0;
1590 }
1591 } else {
1592 last_srv_print_monitor = FALSE;
1593 }
1594
1595
1596 /* We don't create the temp files or associated
1597 mutexes in read-only-mode */
1598
1599 if (!srv_read_only_mode && srv_innodb_status) {
1600 mutex_enter(&srv_monitor_file_mutex);
1601 rewind(srv_monitor_file);
1602 if (!srv_printf_innodb_monitor(srv_monitor_file,
1603 MUTEX_NOWAIT(mutex_skipped),
1604 NULL, NULL)) {
1605 mutex_skipped++;
1606 } else {
1607 mutex_skipped = 0;
1608 }
1609
1610 os_file_set_eof(srv_monitor_file);
1611 mutex_exit(&srv_monitor_file_mutex);
1612 }
1613
1614 if (srv_print_innodb_tablespace_monitor
1615 && difftime(current_time,
1616 last_tablespace_monitor_time) > 60) {
1617 last_tablespace_monitor_time = ut_time();
1618
1619 fputs("========================"
1620 "========================\n",
1621 stderr);
1622
1623 ut_print_timestamp(stderr);
1624
1625 fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
1626 "========================"
1627 "========================\n",
1628 stderr);
1629
1630 fsp_print(0);
1631 fputs("Validating tablespace\n", stderr);
1632 fsp_validate(0);
1633 fputs("Validation ok\n"
1634 "---------------------------------------\n"
1635 "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
1636 "=======================================\n",
1637 stderr);
1638 }
1639
1640 if (srv_print_innodb_table_monitor
1641 && difftime(current_time, last_table_monitor_time) > 60) {
1642
1643 last_table_monitor_time = ut_time();
1644
1645 fprintf(stderr, "Warning: %s\n",
1646 DEPRECATED_MSG_INNODB_TABLE_MONITOR);
1647
1648 fputs("===========================================\n",
1649 stderr);
1650
1651 ut_print_timestamp(stderr);
1652
1653 fputs(" INNODB TABLE MONITOR OUTPUT\n"
1654 "===========================================\n",
1655 stderr);
1656 dict_print();
1657
1658 fputs("-----------------------------------\n"
1659 "END OF INNODB TABLE MONITOR OUTPUT\n"
1660 "==================================\n",
1661 stderr);
1662
1663 fprintf(stderr, "Warning: %s\n",
1664 DEPRECATED_MSG_INNODB_TABLE_MONITOR);
1665 }
1666 }
1667
1668 if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
1669 goto exit_func;
1670 }
1671
1672 if (srv_print_innodb_monitor
1673 || srv_print_innodb_lock_monitor
1674 || srv_print_innodb_tablespace_monitor
1675 || srv_print_innodb_table_monitor) {
1676 goto loop;
1677 }
1678
1679 goto loop;
1680
1681 exit_func:
1682 srv_monitor_active = FALSE;
1683
1684 /* We count the number of threads in os_thread_exit(). A created
1685 thread should always use that to exit and not use return() to exit. */
1686
1687 os_thread_exit(NULL);
1688
1689 OS_THREAD_DUMMY_RETURN;
1690 }
1691
1692 /*********************************************************************//**
1693 A thread which prints warnings about semaphore waits which have lasted
1694 too long. These can be used to track bugs which cause hangs.
1695 Note: In order to make sync_arr_wake_threads_if_sema_free work as expected,
1696 we should avoid waiting any mutexes in this function!
1697 @return a dummy parameter */
1698 extern "C" UNIV_INTERN
1699 os_thread_ret_t
DECLARE_THREAD(srv_error_monitor_thread)1700 DECLARE_THREAD(srv_error_monitor_thread)(
1701 /*=====================================*/
1702 void* arg MY_ATTRIBUTE((unused)))
1703 /*!< in: a dummy parameter required by
1704 os_thread_create */
1705 {
1706 /* number of successive fatal timeouts observed */
1707 ulint fatal_cnt = 0;
1708 lsn_t old_lsn;
1709 lsn_t new_lsn;
1710 ib_int64_t sig_count;
1711 /* longest waiting thread for a semaphore */
1712 os_thread_id_t waiter = os_thread_get_curr_id();
1713 os_thread_id_t old_waiter = waiter;
1714 /* the semaphore that is being waited for */
1715 const void* sema = NULL;
1716 const void* old_sema = NULL;
1717
1718 ut_ad(!srv_read_only_mode);
1719
1720 old_lsn = srv_start_lsn;
1721
1722 #ifdef UNIV_DEBUG_THREAD_CREATION
1723 fprintf(stderr, "Error monitor thread starts, id %lu\n",
1724 os_thread_pf(os_thread_get_curr_id()));
1725 #endif /* UNIV_DEBUG_THREAD_CREATION */
1726
1727 #ifdef UNIV_PFS_THREAD
1728 pfs_register_thread(srv_error_monitor_thread_key);
1729 #endif /* UNIV_PFS_THREAD */
1730 srv_error_monitor_active = TRUE;
1731
1732 loop:
1733 /* Try to track a strange bug reported by Harald Fuchs and others,
1734 where the lsn seems to decrease at times */
1735
1736 if (log_peek_lsn(&new_lsn)) {
1737 if (new_lsn < old_lsn) {
1738 ut_print_timestamp(stderr);
1739 fprintf(stderr,
1740 " InnoDB: Error: old log sequence number " LSN_PF
1741 " was greater\n"
1742 "InnoDB: than the new log sequence number " LSN_PF "!\n"
1743 "InnoDB: Please submit a bug report"
1744 " to http://bugs.mysql.com\n",
1745 old_lsn, new_lsn);
1746 ut_ad(0);
1747 }
1748
1749 old_lsn = new_lsn;
1750 }
1751
1752 if (difftime(time(NULL), srv_last_monitor_time) > 60) {
1753 /* We referesh InnoDB Monitor values so that averages are
1754 printed from at most 60 last seconds */
1755
1756 srv_refresh_innodb_monitor_stats();
1757 }
1758
1759 /* Update the statistics collected for deciding LRU
1760 eviction policy. */
1761 buf_LRU_stat_update();
1762
1763 /* In case mutex_exit is not a memory barrier, it is
1764 theoretically possible some threads are left waiting though
1765 the semaphore is already released. Wake up those threads: */
1766
1767 sync_arr_wake_threads_if_sema_free();
1768
1769 if (sync_array_print_long_waits(&waiter, &sema)
1770 && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
1771 #if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
1772 if (srv_allow_writes_event->is_set) {
1773 #endif /* WITH_WSREP */
1774 fatal_cnt++;
1775 #if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
1776 } else {
1777 fprintf(stderr,
1778 "WSREP: avoiding InnoDB self crash due to long "
1779 "semaphore wait of > %lu seconds\n"
1780 "Server is processing SST donor operation, "
1781 "fatal_cnt now: %lu",
1782 (ulong) srv_fatal_semaphore_wait_threshold, fatal_cnt);
1783 }
1784 #endif /* WITH_WSREP */
1785 if (fatal_cnt > 10) {
1786
1787 fprintf(stderr,
1788 "InnoDB: Error: semaphore wait has lasted"
1789 " > %lu seconds\n"
1790 "InnoDB: We intentionally crash the server,"
1791 " because it appears to be hung.\n",
1792 (ulong) srv_fatal_semaphore_wait_threshold);
1793
1794 ut_error;
1795 }
1796 } else {
1797 fatal_cnt = 0;
1798 old_waiter = waiter;
1799 old_sema = sema;
1800 }
1801
1802 /* Flush stderr so that a database user gets the output
1803 to possible MySQL error file */
1804
1805 fflush(stderr);
1806
1807 sig_count = os_event_reset(srv_error_event);
1808
1809 os_event_wait_time_low(srv_error_event, 1000000, sig_count);
1810
1811 if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
1812
1813 goto loop;
1814 }
1815
1816 srv_error_monitor_active = FALSE;
1817
1818 /* We count the number of threads in os_thread_exit(). A created
1819 thread should always use that to exit and not use return() to exit. */
1820
1821 os_thread_exit(NULL);
1822
1823 OS_THREAD_DUMMY_RETURN;
1824 }
1825
1826 /******************************************************************//**
1827 Increment the server activity count. */
1828 UNIV_INTERN
1829 void
srv_inc_activity_count(void)1830 srv_inc_activity_count(void)
1831 /*========================*/
1832 {
1833 srv_sys->activity_count.inc();
1834 }
1835
1836 /**********************************************************************//**
1837 Check whether any background thread is active. If so return the thread
1838 type.
1839 @return SRV_NONE if all are suspended or have exited, thread
1840 type if any are still active. */
1841 UNIV_INTERN
1842 srv_thread_type
srv_get_active_thread_type(void)1843 srv_get_active_thread_type(void)
1844 /*============================*/
1845 {
1846 srv_thread_type ret = SRV_NONE;
1847
1848 if (srv_read_only_mode) {
1849 return(SRV_NONE);
1850 }
1851
1852 srv_sys_mutex_enter();
1853
1854 for (ulint i = SRV_WORKER; i <= SRV_MASTER; ++i) {
1855 if (srv_sys->n_threads_active[i] != 0) {
1856 ret = static_cast<srv_thread_type>(i);
1857 break;
1858 }
1859 }
1860
1861 srv_sys_mutex_exit();
1862
1863 /* Check only on shutdown. */
1864 if (ret == SRV_NONE
1865 && srv_shutdown_state != SRV_SHUTDOWN_NONE
1866 && trx_purge_state() != PURGE_STATE_DISABLED
1867 && trx_purge_state() != PURGE_STATE_EXIT) {
1868
1869 ret = SRV_PURGE;
1870 }
1871
1872 return(ret);
1873 }
1874
1875 /**********************************************************************//**
1876 Check whether any background thread are active. If so print which thread
1877 is active. Send the threads wakeup signal.
1878 @return name of thread that is active or NULL */
1879 UNIV_INTERN
1880 const char*
srv_any_background_threads_are_active(void)1881 srv_any_background_threads_are_active(void)
1882 /*=======================================*/
1883 {
1884 const char* thread_active = NULL;
1885
1886 if (srv_read_only_mode) {
1887 return(NULL);
1888 } else if (srv_error_monitor_active) {
1889 thread_active = "srv_error_monitor_thread";
1890 } else if (lock_sys->timeout_thread_active) {
1891 thread_active = "srv_lock_timeout thread";
1892 } else if (srv_monitor_active) {
1893 thread_active = "srv_monitor_thread";
1894 } else if (srv_buf_dump_thread_active) {
1895 thread_active = "buf_dump_thread";
1896 } else if (srv_dict_stats_thread_active) {
1897 thread_active = "dict_stats_thread";
1898 }
1899
1900 os_event_set(srv_error_event);
1901 os_event_set(srv_monitor_event);
1902 os_event_set(srv_buf_dump_event);
1903 os_event_set(lock_sys->timeout_event);
1904 os_event_set(dict_stats_event);
1905
1906 return(thread_active);
1907 }
1908
1909 /*******************************************************************//**
1910 Tells the InnoDB server that there has been activity in the database
1911 and wakes up the master thread if it is suspended (not sleeping). Used
1912 in the MySQL interface. Note that there is a small chance that the master
1913 thread stays suspended (we do not protect our operation with the
1914 srv_sys_t->mutex, for performance reasons). */
1915 UNIV_INTERN
1916 void
srv_active_wake_master_thread(void)1917 srv_active_wake_master_thread(void)
1918 /*===============================*/
1919 {
1920 if (srv_read_only_mode) {
1921 return;
1922 }
1923
1924 ut_ad(!srv_sys_mutex_own());
1925
1926 srv_inc_activity_count();
1927
1928 if (srv_sys->n_threads_active[SRV_MASTER] == 0) {
1929 srv_slot_t* slot;
1930
1931 srv_sys_mutex_enter();
1932
1933 slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
1934
1935 /* Only if the master thread has been started. */
1936
1937 if (slot->in_use) {
1938 ut_a(srv_slot_get_type(slot) == SRV_MASTER);
1939
1940 if (slot->suspended) {
1941
1942 slot->suspended = FALSE;
1943
1944 ++srv_sys->n_threads_active[SRV_MASTER];
1945
1946 os_event_set(slot->event);
1947 }
1948 }
1949
1950 srv_sys_mutex_exit();
1951 }
1952 }
1953
1954 /*******************************************************************//**
1955 Tells the purge thread that there has been activity in the database
1956 and wakes up the purge thread if it is suspended (not sleeping). Note
1957 that there is a small chance that the purge thread stays suspended
1958 (we do not protect our check with the srv_sys_t:mutex and the
1959 purge_sys->latch, for performance reasons). */
1960 UNIV_INTERN
1961 void
srv_wake_purge_thread_if_not_active(void)1962 srv_wake_purge_thread_if_not_active(void)
1963 /*=====================================*/
1964 {
1965 ut_ad(!srv_sys_mutex_own());
1966
1967 if (purge_sys->state == PURGE_STATE_RUN
1968 && srv_sys->n_threads_active[SRV_PURGE] == 0) {
1969
1970 srv_release_threads(SRV_PURGE, 1);
1971 }
1972 }
1973
1974 /*******************************************************************//**
1975 Wakes up the master thread if it is suspended or being suspended. */
1976 UNIV_INTERN
1977 void
srv_wake_master_thread(void)1978 srv_wake_master_thread(void)
1979 /*========================*/
1980 {
1981 ut_ad(!srv_sys_mutex_own());
1982
1983 srv_inc_activity_count();
1984
1985 srv_release_threads(SRV_MASTER, 1);
1986 }
1987
1988 /*******************************************************************//**
1989 Get current server activity count. We don't hold srv_sys::mutex while
1990 reading this value as it is only used in heuristics.
1991 @return activity count. */
1992 UNIV_INTERN
1993 ulint
srv_get_activity_count(void)1994 srv_get_activity_count(void)
1995 /*========================*/
1996 {
1997 return(srv_sys->activity_count);
1998 }
1999
2000 /*******************************************************************//**
2001 Check if there has been any activity.
2002 @return FALSE if no change in activity counter. */
2003 UNIV_INTERN
2004 ibool
srv_check_activity(ulint old_activity_count)2005 srv_check_activity(
2006 /*===============*/
2007 ulint old_activity_count) /*!< in: old activity count */
2008 {
2009 return(srv_sys->activity_count != old_activity_count);
2010 }
2011
2012 /********************************************************************//**
2013 The master thread is tasked to ensure that flush of log file happens
2014 once every second in the background. This is to ensure that not more
2015 than one second of trxs are lost in case of crash when
2016 innodb_flush_logs_at_trx_commit != 1 */
2017 static
2018 void
srv_sync_log_buffer_in_background(void)2019 srv_sync_log_buffer_in_background(void)
2020 /*===================================*/
2021 {
2022 time_t current_time = time(NULL);
2023
2024 srv_main_thread_op_info = "flushing log";
2025 if (difftime(current_time, srv_last_log_flush_time)
2026 >= srv_flush_log_at_timeout) {
2027 log_buffer_sync_in_background(TRUE);
2028 srv_last_log_flush_time = current_time;
2029 srv_log_writes_and_flush++;
2030 }
2031 }
2032
2033 /********************************************************************//**
2034 Make room in the table cache by evicting an unused table.
2035 @return number of tables evicted. */
2036 static
2037 ulint
srv_master_evict_from_table_cache(ulint pct_check)2038 srv_master_evict_from_table_cache(
2039 /*==============================*/
2040 ulint pct_check) /*!< in: max percent to check */
2041 {
2042 ulint n_tables_evicted = 0;
2043
2044 rw_lock_x_lock(&dict_operation_lock);
2045
2046 dict_mutex_enter_for_mysql();
2047
2048 n_tables_evicted = dict_make_room_in_cache(
2049 innobase_get_table_cache_size(), pct_check);
2050
2051 dict_mutex_exit_for_mysql();
2052
2053 rw_lock_x_unlock(&dict_operation_lock);
2054
2055 return(n_tables_evicted);
2056 }
2057
2058 /*********************************************************************//**
2059 This function prints progress message every 60 seconds during server
2060 shutdown, for any activities that master thread is pending on. */
2061 static
2062 void
srv_shutdown_print_master_pending(ib_time_t * last_print_time,ulint n_tables_to_drop,ulint n_bytes_merged)2063 srv_shutdown_print_master_pending(
2064 /*==============================*/
2065 ib_time_t* last_print_time, /*!< last time the function
2066 print the message */
2067 ulint n_tables_to_drop, /*!< number of tables to
2068 be dropped */
2069 ulint n_bytes_merged) /*!< number of change buffer
2070 just merged */
2071 {
2072 ib_time_t current_time;
2073 double time_elapsed;
2074
2075 current_time = ut_time();
2076 time_elapsed = ut_difftime(current_time, *last_print_time);
2077
2078 if (time_elapsed > 60) {
2079 *last_print_time = ut_time();
2080
2081 if (n_tables_to_drop) {
2082 ut_print_timestamp(stderr);
2083 fprintf(stderr, " InnoDB: Waiting for "
2084 "%lu table(s) to be dropped\n",
2085 (ulong) n_tables_to_drop);
2086 }
2087
2088 /* Check change buffer merge, we only wait for change buffer
2089 merge if it is a slow shutdown */
2090 if (!srv_fast_shutdown && n_bytes_merged) {
2091 ut_print_timestamp(stderr);
2092 fprintf(stderr, " InnoDB: Waiting for change "
2093 "buffer merge to complete\n"
2094 " InnoDB: number of bytes of change buffer "
2095 "just merged: %lu\n",
2096 n_bytes_merged);
2097 }
2098 }
2099 }
2100
2101 /*********************************************************************//**
2102 Perform the tasks that the master thread is supposed to do when the
2103 server is active. There are two types of tasks. The first category is
2104 of such tasks which are performed at each inovcation of this function.
2105 We assume that this function is called roughly every second when the
2106 server is active. The second category is of such tasks which are
2107 performed at some interval e.g.: purge, dict_LRU cleanup etc. */
2108 static
2109 void
srv_master_do_active_tasks(void)2110 srv_master_do_active_tasks(void)
2111 /*============================*/
2112 {
2113 ib_time_t cur_time = ut_time();
2114 ullint counter_time = ut_time_us(NULL);
2115
2116 /* First do the tasks that we are suppose to do at each
2117 invocation of this function. */
2118
2119 ++srv_main_active_loops;
2120
2121 MONITOR_INC(MONITOR_MASTER_ACTIVE_LOOPS);
2122
2123 /* ALTER TABLE in MySQL requires on Unix that the table handler
2124 can drop tables lazily after there no longer are SELECT
2125 queries to them. */
2126 srv_main_thread_op_info = "doing background drop tables";
2127 row_drop_tables_for_mysql_in_background();
2128 MONITOR_INC_TIME_IN_MICRO_SECS(
2129 MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time);
2130
2131 if (srv_shutdown_state > 0) {
2132 return;
2133 }
2134
2135 /* make sure that there is enough reusable space in the redo
2136 log files */
2137 srv_main_thread_op_info = "checking free log space";
2138 log_free_check();
2139
2140 /* Do an ibuf merge */
2141 srv_main_thread_op_info = "doing insert buffer merge";
2142 counter_time = ut_time_us(NULL);
2143 ibuf_merge_in_background(false);
2144 MONITOR_INC_TIME_IN_MICRO_SECS(
2145 MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
2146
2147 /* Flush logs if needed */
2148 srv_main_thread_op_info = "flushing log";
2149 srv_sync_log_buffer_in_background();
2150 MONITOR_INC_TIME_IN_MICRO_SECS(
2151 MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
2152
2153 /* Now see if various tasks that are performed at defined
2154 intervals need to be performed. */
2155
2156 #ifdef MEM_PERIODIC_CHECK
2157 /* Check magic numbers of every allocated mem block once in
2158 SRV_MASTER_MEM_VALIDATE_INTERVAL seconds */
2159 if (cur_time % SRV_MASTER_MEM_VALIDATE_INTERVAL == 0) {
2160 mem_validate_all_blocks();
2161 MONITOR_INC_TIME_IN_MICRO_SECS(
2162 MONITOR_SRV_MEM_VALIDATE_MICROSECOND, counter_time);
2163 }
2164 #endif
2165 if (srv_shutdown_state > 0) {
2166 return;
2167 }
2168
2169 if (srv_shutdown_state > 0) {
2170 return;
2171 }
2172
2173 if (cur_time % SRV_MASTER_DICT_LRU_INTERVAL == 0) {
2174 srv_main_thread_op_info = "enforcing dict cache limit";
2175 srv_master_evict_from_table_cache(50);
2176 MONITOR_INC_TIME_IN_MICRO_SECS(
2177 MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
2178 }
2179
2180 if (srv_shutdown_state > 0) {
2181 return;
2182 }
2183
2184 /* Make a new checkpoint */
2185 if (cur_time % SRV_MASTER_CHECKPOINT_INTERVAL == 0) {
2186 srv_main_thread_op_info = "making checkpoint";
2187 log_checkpoint(TRUE, FALSE);
2188 MONITOR_INC_TIME_IN_MICRO_SECS(
2189 MONITOR_SRV_CHECKPOINT_MICROSECOND, counter_time);
2190 }
2191 }
2192
2193 /*********************************************************************//**
2194 Perform the tasks that the master thread is supposed to do whenever the
2195 server is idle. We do check for the server state during this function
2196 and if the server has entered the shutdown phase we may return from
2197 the function without completing the required tasks.
2198 Note that the server can move to active state when we are executing this
2199 function but we don't check for that as we are suppose to perform more
2200 or less same tasks when server is active. */
2201 static
2202 void
srv_master_do_idle_tasks(void)2203 srv_master_do_idle_tasks(void)
2204 /*==========================*/
2205 {
2206 ullint counter_time;
2207
2208 ++srv_main_idle_loops;
2209
2210 MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS);
2211
2212
2213 /* ALTER TABLE in MySQL requires on Unix that the table handler
2214 can drop tables lazily after there no longer are SELECT
2215 queries to them. */
2216 counter_time = ut_time_us(NULL);
2217 srv_main_thread_op_info = "doing background drop tables";
2218 row_drop_tables_for_mysql_in_background();
2219 MONITOR_INC_TIME_IN_MICRO_SECS(
2220 MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
2221 counter_time);
2222
2223 if (srv_shutdown_state > 0) {
2224 return;
2225 }
2226
2227 /* make sure that there is enough reusable space in the redo
2228 log files */
2229 srv_main_thread_op_info = "checking free log space";
2230 log_free_check();
2231
2232 /* Do an ibuf merge */
2233 counter_time = ut_time_us(NULL);
2234 srv_main_thread_op_info = "doing insert buffer merge";
2235 ibuf_merge_in_background(true);
2236 MONITOR_INC_TIME_IN_MICRO_SECS(
2237 MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
2238
2239 if (srv_shutdown_state > 0) {
2240 return;
2241 }
2242
2243 srv_main_thread_op_info = "enforcing dict cache limit";
2244 srv_master_evict_from_table_cache(100);
2245 MONITOR_INC_TIME_IN_MICRO_SECS(
2246 MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
2247
2248 /* Flush logs if needed */
2249 srv_sync_log_buffer_in_background();
2250 MONITOR_INC_TIME_IN_MICRO_SECS(
2251 MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
2252
2253 if (srv_shutdown_state > 0) {
2254 return;
2255 }
2256
2257 /* Make a new checkpoint */
2258 srv_main_thread_op_info = "making checkpoint";
2259 log_checkpoint(TRUE, FALSE);
2260 MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_CHECKPOINT_MICROSECOND,
2261 counter_time);
2262 }
2263
2264 /*********************************************************************//**
2265 Perform the tasks during shutdown. The tasks that we do at shutdown
2266 depend on srv_fast_shutdown:
2267 2 => very fast shutdown => do no book keeping
2268 1 => normal shutdown => clear drop table queue and make checkpoint
2269 0 => slow shutdown => in addition to above do complete purge and ibuf
2270 merge
2271 @return TRUE if some work was done. FALSE otherwise */
2272 static
2273 ibool
srv_master_do_shutdown_tasks(ib_time_t * last_print_time)2274 srv_master_do_shutdown_tasks(
2275 /*=========================*/
2276 ib_time_t* last_print_time)/*!< last time the function
2277 print the message */
2278 {
2279 ulint n_bytes_merged = 0;
2280 ulint n_tables_to_drop = 0;
2281
2282 ut_ad(!srv_read_only_mode);
2283
2284 ++srv_main_shutdown_loops;
2285
2286 ut_a(srv_shutdown_state > 0);
2287
2288 /* In very fast shutdown none of the following is necessary */
2289 if (srv_fast_shutdown == 2) {
2290 return(FALSE);
2291 }
2292
2293 /* ALTER TABLE in MySQL requires on Unix that the table handler
2294 can drop tables lazily after there no longer are SELECT
2295 queries to them. */
2296 srv_main_thread_op_info = "doing background drop tables";
2297 n_tables_to_drop = row_drop_tables_for_mysql_in_background();
2298
2299 /* make sure that there is enough reusable space in the redo
2300 log files */
2301 srv_main_thread_op_info = "checking free log space";
2302 log_free_check();
2303
2304 /* In case of normal shutdown we don't do ibuf merge or purge */
2305 if (srv_fast_shutdown == 1) {
2306 goto func_exit;
2307 }
2308
2309 /* Do an ibuf merge */
2310 srv_main_thread_op_info = "doing insert buffer merge";
2311 n_bytes_merged = ibuf_merge_in_background(true);
2312
2313 /* Flush logs if needed */
2314 srv_sync_log_buffer_in_background();
2315
2316 func_exit:
2317 /* Make a new checkpoint about once in 10 seconds */
2318 srv_main_thread_op_info = "making checkpoint";
2319 log_checkpoint(TRUE, FALSE);
2320
2321 /* Print progress message every 60 seconds during shutdown */
2322 if (srv_shutdown_state > 0 && srv_print_verbose_log) {
2323 srv_shutdown_print_master_pending(
2324 last_print_time, n_tables_to_drop, n_bytes_merged);
2325 }
2326
2327 return(n_bytes_merged || n_tables_to_drop);
2328 }
2329
2330 /*********************************************************************//**
2331 Puts master thread to sleep. At this point we are using polling to
2332 service various activities. Master thread sleeps for one second before
2333 checking the state of the server again */
2334 static
2335 void
srv_master_sleep(void)2336 srv_master_sleep(void)
2337 /*==================*/
2338 {
2339 srv_main_thread_op_info = "sleeping";
2340 os_thread_sleep(1000000);
2341 srv_main_thread_op_info = "";
2342 }
2343
2344 /*********************************************************************//**
2345 The master thread controlling the server.
2346 @return a dummy parameter */
2347 extern "C" UNIV_INTERN
2348 os_thread_ret_t
DECLARE_THREAD(srv_master_thread)2349 DECLARE_THREAD(srv_master_thread)(
2350 /*==============================*/
2351 void* arg MY_ATTRIBUTE((unused)))
2352 /*!< in: a dummy parameter required by
2353 os_thread_create */
2354 {
2355 my_thread_init();
2356
2357 srv_slot_t* slot;
2358 ulint old_activity_count = srv_get_activity_count();
2359 ib_time_t last_print_time;
2360
2361 ut_ad(!srv_read_only_mode);
2362
2363 #ifdef UNIV_DEBUG_THREAD_CREATION
2364 fprintf(stderr, "Master thread starts, id %lu\n",
2365 os_thread_pf(os_thread_get_curr_id()));
2366 #endif /* UNIV_DEBUG_THREAD_CREATION */
2367
2368 #ifdef UNIV_PFS_THREAD
2369 pfs_register_thread(srv_master_thread_key);
2370 #endif /* UNIV_PFS_THREAD */
2371
2372 srv_main_thread_process_no = os_proc_get_number();
2373 srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
2374
2375 slot = srv_reserve_slot(SRV_MASTER);
2376 ut_a(slot == srv_sys->sys_threads);
2377
2378 last_print_time = ut_time();
2379 loop:
2380 if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2381 goto suspend_thread;
2382 }
2383
2384 while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
2385
2386 srv_master_sleep();
2387
2388 MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP);
2389
2390 if (srv_check_activity(old_activity_count)) {
2391 old_activity_count = srv_get_activity_count();
2392 srv_master_do_active_tasks();
2393 } else {
2394 srv_master_do_idle_tasks();
2395 }
2396 }
2397
2398 while (srv_master_do_shutdown_tasks(&last_print_time)) {
2399
2400 /* Shouldn't loop here in case of very fast shutdown */
2401 ut_ad(srv_fast_shutdown < 2);
2402 }
2403
2404 suspend_thread:
2405 srv_main_thread_op_info = "suspending";
2406
2407 srv_suspend_thread(slot);
2408
2409 /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
2410 waits for database activity to die down when converting < 4.1.x
2411 databases, and relies on this string being exactly as it is. InnoDB
2412 manual also mentions this string in several places. */
2413 srv_main_thread_op_info = "waiting for server activity";
2414
2415 os_event_wait(slot->event);
2416
2417 if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
2418 my_thread_end();
2419 os_thread_exit(NULL);
2420 }
2421
2422 goto loop;
2423
2424 OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
2425 }
2426
2427 /*********************************************************************//**
2428 Check if purge should stop.
2429 @return true if it should shutdown. */
2430 static
2431 bool
srv_purge_should_exit(ulint n_purged)2432 srv_purge_should_exit(
2433 /*==============*/
2434 ulint n_purged) /*!< in: pages purged in last batch */
2435 {
2436 switch (srv_shutdown_state) {
2437 case SRV_SHUTDOWN_NONE:
2438 /* Normal operation. */
2439 break;
2440
2441 case SRV_SHUTDOWN_CLEANUP:
2442 case SRV_SHUTDOWN_EXIT_THREADS:
2443 /* Exit unless slow shutdown requested or all done. */
2444 return(srv_fast_shutdown != 0 || n_purged == 0);
2445
2446 case SRV_SHUTDOWN_LAST_PHASE:
2447 case SRV_SHUTDOWN_FLUSH_PHASE:
2448 ut_error;
2449 }
2450
2451 return(false);
2452 }
2453
2454 /*********************************************************************//**
2455 Fetch and execute a task from the work queue.
2456 @return true if a task was executed */
2457 static
2458 bool
srv_task_execute(void)2459 srv_task_execute(void)
2460 /*==================*/
2461 {
2462 que_thr_t* thr = NULL;
2463
2464 ut_ad(!srv_read_only_mode);
2465 ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
2466
2467 mutex_enter(&srv_sys->tasks_mutex);
2468
2469 if (UT_LIST_GET_LEN(srv_sys->tasks) > 0) {
2470
2471 thr = UT_LIST_GET_FIRST(srv_sys->tasks);
2472
2473 ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE);
2474
2475 UT_LIST_REMOVE(queue, srv_sys->tasks, thr);
2476 }
2477
2478 mutex_exit(&srv_sys->tasks_mutex);
2479
2480 if (thr != NULL) {
2481
2482 que_run_threads(thr);
2483
2484 os_atomic_inc_ulint(
2485 &purge_sys->bh_mutex, &purge_sys->n_completed, 1);
2486 }
2487
2488 return(thr != NULL);
2489 }
2490
2491 /*********************************************************************//**
2492 Worker thread that reads tasks from the work queue and executes them.
2493 @return a dummy parameter */
2494 extern "C" UNIV_INTERN
2495 os_thread_ret_t
DECLARE_THREAD(srv_worker_thread)2496 DECLARE_THREAD(srv_worker_thread)(
2497 /*==============================*/
2498 void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter
2499 required by os_thread_create */
2500 {
2501 my_thread_init();
2502
2503 srv_slot_t* slot;
2504
2505 ut_ad(!srv_read_only_mode);
2506 ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
2507
2508 #ifdef UNIV_DEBUG_THREAD_CREATION
2509 ut_print_timestamp(stderr);
2510 fprintf(stderr, " InnoDB: worker thread starting, id %lu\n",
2511 os_thread_pf(os_thread_get_curr_id()));
2512 #endif /* UNIV_DEBUG_THREAD_CREATION */
2513
2514 slot = srv_reserve_slot(SRV_WORKER);
2515
2516 ut_a(srv_n_purge_threads > 1);
2517
2518 srv_sys_mutex_enter();
2519
2520 ut_a(srv_sys->n_threads_active[SRV_WORKER] < srv_n_purge_threads);
2521
2522 srv_sys_mutex_exit();
2523
2524 /* We need to ensure that the worker threads exit after the
2525 purge coordinator thread. Otherwise the purge coordinaor can
2526 end up waiting forever in trx_purge_wait_for_workers_to_complete() */
2527
2528 do {
2529 srv_suspend_thread(slot);
2530
2531 os_event_wait(slot->event);
2532
2533 if (srv_task_execute()) {
2534
2535 /* If there are tasks in the queue, wakeup
2536 the purge coordinator thread. */
2537
2538 srv_wake_purge_thread_if_not_active();
2539 }
2540
2541 /* Note: we are checking the state without holding the
2542 purge_sys->latch here. */
2543 } while (purge_sys->state != PURGE_STATE_EXIT);
2544
2545 srv_free_slot(slot);
2546
2547 rw_lock_x_lock(&purge_sys->latch);
2548
2549 ut_a(!purge_sys->running);
2550 ut_a(purge_sys->state == PURGE_STATE_EXIT);
2551 ut_a(srv_shutdown_state > SRV_SHUTDOWN_NONE);
2552
2553 rw_lock_x_unlock(&purge_sys->latch);
2554
2555 #ifdef UNIV_DEBUG_THREAD_CREATION
2556 ut_print_timestamp(stderr);
2557 fprintf(stderr, " InnoDB: Purge worker thread exiting, id %lu\n",
2558 os_thread_pf(os_thread_get_curr_id()));
2559 #endif /* UNIV_DEBUG_THREAD_CREATION */
2560
2561 my_thread_end();
2562 /* We count the number of threads in os_thread_exit(). A created
2563 thread should always use that to exit and not use return() to exit. */
2564 os_thread_exit(NULL);
2565
2566 OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
2567 }
2568
2569 /*********************************************************************//**
2570 Do the actual purge operation.
2571 @return length of history list before the last purge batch. */
2572 static
2573 ulint
srv_do_purge(ulint n_threads,ulint * n_total_purged)2574 srv_do_purge(
2575 /*=========*/
2576 ulint n_threads, /*!< in: number of threads to use */
2577 ulint* n_total_purged) /*!< in/out: total pages purged */
2578 {
2579 ulint n_pages_purged;
2580
2581 static ulint count = 0;
2582 static ulint n_use_threads = 0;
2583 static ulint rseg_history_len = 0;
2584 ulint old_activity_count = srv_get_activity_count();
2585
2586 ut_a(n_threads > 0);
2587 ut_ad(!srv_read_only_mode);
2588
2589 /* Purge until there are no more records to purge and there is
2590 no change in configuration or server state. If the user has
2591 configured more than one purge thread then we treat that as a
2592 pool of threads and only use the extra threads if purge can't
2593 keep up with updates. */
2594
2595 if (n_use_threads == 0) {
2596 n_use_threads = n_threads;
2597 }
2598
2599 do {
2600 if (trx_sys->rseg_history_len > rseg_history_len
2601 || (srv_max_purge_lag > 0
2602 && rseg_history_len > srv_max_purge_lag)) {
2603
2604 /* History length is now longer than what it was
2605 when we took the last snapshot. Use more threads. */
2606
2607 if (n_use_threads < n_threads) {
2608 ++n_use_threads;
2609 }
2610
2611 } else if (srv_check_activity(old_activity_count)
2612 && n_use_threads > 1) {
2613
2614 /* History length same or smaller since last snapshot,
2615 use fewer threads. */
2616
2617 --n_use_threads;
2618
2619 old_activity_count = srv_get_activity_count();
2620 }
2621
2622 /* Ensure that the purge threads are less than what
2623 was configured. */
2624
2625 ut_a(n_use_threads > 0);
2626 ut_a(n_use_threads <= n_threads);
2627
2628 /* Take a snapshot of the history list before purge. */
2629 if ((rseg_history_len = trx_sys->rseg_history_len) == 0) {
2630 break;
2631 }
2632
2633 n_pages_purged = trx_purge(
2634 n_use_threads, srv_purge_batch_size,
2635 (++count % TRX_SYS_N_RSEGS) == 0);
2636
2637 *n_total_purged += n_pages_purged;
2638
2639 } while (!srv_purge_should_exit(n_pages_purged)
2640 && n_pages_purged > 0
2641 && purge_sys->state == PURGE_STATE_RUN);
2642
2643 return(rseg_history_len);
2644 }
2645
2646 /*********************************************************************//**
2647 Suspend the purge coordinator thread. */
2648 static
2649 void
srv_purge_coordinator_suspend(srv_slot_t * slot,ulint rseg_history_len)2650 srv_purge_coordinator_suspend(
2651 /*==========================*/
2652 srv_slot_t* slot, /*!< in/out: Purge coordinator
2653 thread slot */
2654 ulint rseg_history_len) /*!< in: history list length
2655 before last purge */
2656 {
2657 ut_ad(!srv_read_only_mode);
2658 ut_a(slot->type == SRV_PURGE);
2659
2660 bool stop = false;
2661
2662 /** Maximum wait time on the purge event, in micro-seconds. */
2663 static const ulint SRV_PURGE_MAX_TIMEOUT = 10000;
2664
2665 ib_int64_t sig_count = srv_suspend_thread(slot);
2666
2667 do {
2668 ulint ret;
2669
2670 rw_lock_x_lock(&purge_sys->latch);
2671
2672 purge_sys->running = false;
2673
2674 rw_lock_x_unlock(&purge_sys->latch);
2675
2676 /* We don't wait right away on the the non-timed wait because
2677 we want to signal the thread that wants to suspend purge. */
2678
2679 if (stop) {
2680 os_event_wait_low(slot->event, sig_count);
2681 ret = 0;
2682 } else if (rseg_history_len <= trx_sys->rseg_history_len) {
2683 ret = os_event_wait_time_low(
2684 slot->event, SRV_PURGE_MAX_TIMEOUT, sig_count);
2685 } else {
2686 /* We don't want to waste time waiting, if the
2687 history list increased by the time we got here,
2688 unless purge has been stopped. */
2689 ret = 0;
2690 }
2691
2692 srv_sys_mutex_enter();
2693
2694 /* The thread can be in state !suspended after the timeout
2695 but before this check if another thread sent a wakeup signal. */
2696
2697 if (slot->suspended) {
2698 slot->suspended = FALSE;
2699 ++srv_sys->n_threads_active[slot->type];
2700 ut_a(srv_sys->n_threads_active[slot->type] == 1);
2701 }
2702
2703 srv_sys_mutex_exit();
2704
2705 sig_count = srv_suspend_thread(slot);
2706
2707 rw_lock_x_lock(&purge_sys->latch);
2708
2709 stop = (srv_shutdown_state == SRV_SHUTDOWN_NONE
2710 && purge_sys->state == PURGE_STATE_STOP);
2711
2712 if (!stop) {
2713 ut_a(purge_sys->n_stop == 0);
2714 purge_sys->running = true;
2715 } else {
2716 ut_a(purge_sys->n_stop > 0);
2717
2718 /* Signal that we are suspended. */
2719 os_event_set(purge_sys->event);
2720 }
2721
2722 rw_lock_x_unlock(&purge_sys->latch);
2723
2724 if (ret == OS_SYNC_TIME_EXCEEDED) {
2725
2726 /* No new records added since wait started then simply
2727 wait for new records. The magic number 5000 is an
2728 approximation for the case where we have cached UNDO
2729 log records which prevent truncate of the UNDO
2730 segments. */
2731
2732 if (rseg_history_len == trx_sys->rseg_history_len
2733 && trx_sys->rseg_history_len < 5000) {
2734
2735 stop = true;
2736 }
2737 }
2738
2739 } while (stop);
2740
2741 srv_sys_mutex_enter();
2742
2743 if (slot->suspended) {
2744 slot->suspended = FALSE;
2745 ++srv_sys->n_threads_active[slot->type];
2746 ut_a(srv_sys->n_threads_active[slot->type] == 1);
2747 }
2748
2749 srv_sys_mutex_exit();
2750 }
2751
2752 /*********************************************************************//**
2753 Purge coordinator thread that schedules the purge tasks.
2754 @return a dummy parameter */
2755 extern "C" UNIV_INTERN
2756 os_thread_ret_t
DECLARE_THREAD(srv_purge_coordinator_thread)2757 DECLARE_THREAD(srv_purge_coordinator_thread)(
2758 /*=========================================*/
2759 void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter
2760 required by os_thread_create */
2761 {
2762 my_thread_init();
2763
2764 srv_slot_t* slot;
2765 ulint n_total_purged = ULINT_UNDEFINED;
2766
2767 ut_ad(!srv_read_only_mode);
2768 ut_a(srv_n_purge_threads >= 1);
2769 ut_a(trx_purge_state() == PURGE_STATE_INIT);
2770 ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
2771
2772 rw_lock_x_lock(&purge_sys->latch);
2773
2774 purge_sys->running = true;
2775 purge_sys->state = PURGE_STATE_RUN;
2776
2777 rw_lock_x_unlock(&purge_sys->latch);
2778
2779 #ifdef UNIV_PFS_THREAD
2780 pfs_register_thread(srv_purge_thread_key);
2781 #endif /* UNIV_PFS_THREAD */
2782
2783 #ifdef UNIV_DEBUG_THREAD_CREATION
2784 ut_print_timestamp(stderr);
2785 fprintf(stderr, " InnoDB: Purge coordinator thread created, id %lu\n",
2786 os_thread_pf(os_thread_get_curr_id()));
2787 #endif /* UNIV_DEBUG_THREAD_CREATION */
2788
2789 slot = srv_reserve_slot(SRV_PURGE);
2790
2791 ulint rseg_history_len = trx_sys->rseg_history_len;
2792
2793 do {
2794 /* If there are no records to purge or the last
2795 purge didn't purge any records then wait for activity. */
2796
2797 if (srv_shutdown_state == SRV_SHUTDOWN_NONE
2798 && (purge_sys->state == PURGE_STATE_STOP
2799 || n_total_purged == 0)) {
2800
2801 srv_purge_coordinator_suspend(slot, rseg_history_len);
2802 }
2803
2804 if (srv_purge_should_exit(n_total_purged)) {
2805 ut_a(!slot->suspended);
2806 break;
2807 }
2808
2809 n_total_purged = 0;
2810
2811 rseg_history_len = srv_do_purge(
2812 srv_n_purge_threads, &n_total_purged);
2813
2814 } while (!srv_purge_should_exit(n_total_purged));
2815
2816 /* Ensure that we don't jump out of the loop unless the
2817 exit condition is satisfied. */
2818
2819 ut_a(srv_purge_should_exit(n_total_purged));
2820
2821 ulint n_pages_purged = ULINT_MAX;
2822
2823 /* Ensure that all records are purged if it is not a fast shutdown.
2824 This covers the case where a record can be added after we exit the
2825 loop above. */
2826 while (srv_fast_shutdown == 0 && n_pages_purged > 0) {
2827 n_pages_purged = trx_purge(1, srv_purge_batch_size, false);
2828 }
2829
2830 /* This trx_purge is called to remove any undo records (added by
2831 background threads) after completion of the above loop. When
2832 srv_fast_shutdown != 0, a large batch size can cause significant
2833 delay in shutdown ,so reducing the batch size to magic number 20
2834 (which was default in 5.5), which we hope will be sufficient to
2835 remove all the undo records */
2836 const uint temp_batch_size = 20;
2837
2838 n_pages_purged = trx_purge(1, srv_purge_batch_size <= temp_batch_size
2839 ? srv_purge_batch_size : temp_batch_size,
2840 true);
2841 ut_a(n_pages_purged == 0 || srv_fast_shutdown != 0);
2842
2843 /* The task queue should always be empty, independent of fast
2844 shutdown state. */
2845 ut_a(srv_get_task_queue_length() == 0);
2846
2847 srv_free_slot(slot);
2848
2849 /* Note that we are shutting down. */
2850 rw_lock_x_lock(&purge_sys->latch);
2851
2852 purge_sys->state = PURGE_STATE_EXIT;
2853
2854 purge_sys->running = false;
2855
2856 rw_lock_x_unlock(&purge_sys->latch);
2857
2858 #ifdef UNIV_DEBUG_THREAD_CREATION
2859 ut_print_timestamp(stderr);
2860 fprintf(stderr, " InnoDB: Purge coordinator exiting, id %lu\n",
2861 os_thread_pf(os_thread_get_curr_id()));
2862 #endif /* UNIV_DEBUG_THREAD_CREATION */
2863
2864 /* Ensure that all the worker threads quit. */
2865 if (srv_n_purge_threads > 1) {
2866 srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1);
2867 }
2868
2869 my_thread_end();
2870 /* We count the number of threads in os_thread_exit(). A created
2871 thread should always use that to exit and not use return() to exit. */
2872 os_thread_exit(NULL);
2873
2874 OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
2875 }
2876
2877 /**********************************************************************//**
2878 Enqueues a task to server task queue and releases a worker thread, if there
2879 is a suspended one. */
2880 UNIV_INTERN
2881 void
srv_que_task_enqueue_low(que_thr_t * thr)2882 srv_que_task_enqueue_low(
2883 /*=====================*/
2884 que_thr_t* thr) /*!< in: query thread */
2885 {
2886 ut_ad(!srv_read_only_mode);
2887 mutex_enter(&srv_sys->tasks_mutex);
2888
2889 UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
2890
2891 mutex_exit(&srv_sys->tasks_mutex);
2892
2893 srv_release_threads(SRV_WORKER, 1);
2894 }
2895
2896 /**********************************************************************//**
2897 Get count of tasks in the queue.
2898 @return number of tasks in queue */
2899 UNIV_INTERN
2900 ulint
srv_get_task_queue_length(void)2901 srv_get_task_queue_length(void)
2902 /*===========================*/
2903 {
2904 ulint n_tasks;
2905
2906 ut_ad(!srv_read_only_mode);
2907
2908 mutex_enter(&srv_sys->tasks_mutex);
2909
2910 n_tasks = UT_LIST_GET_LEN(srv_sys->tasks);
2911
2912 mutex_exit(&srv_sys->tasks_mutex);
2913
2914 return(n_tasks);
2915 }
2916
2917 /**********************************************************************//**
2918 Wakeup the purge threads. */
2919 UNIV_INTERN
2920 void
srv_purge_wakeup(void)2921 srv_purge_wakeup(void)
2922 /*==================*/
2923 {
2924 ut_ad(!srv_read_only_mode);
2925
2926 if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
2927
2928 srv_release_threads(SRV_PURGE, 1);
2929
2930 if (srv_n_purge_threads > 1) {
2931 ulint n_workers = srv_n_purge_threads - 1;
2932
2933 srv_release_threads(SRV_WORKER, n_workers);
2934 }
2935 }
2936 }
2937
2938 /** Check whether given space id is undo tablespace id
2939 @param[in] space_id space id to check
2940 @return true if it is undo tablespace else false. */
2941 bool
srv_is_undo_tablespace(ulint space_id)2942 srv_is_undo_tablespace(
2943 ulint space_id)
2944 {
2945 if (srv_undo_space_id_start == 0) {
2946 return (false);
2947 }
2948
2949 return(space_id >= srv_undo_space_id_start
2950 && space_id < (srv_undo_space_id_start
2951 + srv_undo_tablespaces_open));
2952 }
2953