1 /*****************************************************************************
2
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2008, 2009 Google Inc.
5 Copyright (c) 2009, Percona Inc.
6
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19
20 This program is free software; you can redistribute it and/or modify
21 it under the terms of the GNU General Public License, version 2.0,
22 as published by the Free Software Foundation.
23
24 This program is also distributed with certain software (including
25 but not limited to OpenSSL) that is licensed under separate terms,
26 as designated in a particular file or component or in included license
27 documentation. The authors of MySQL hereby grant you an additional
28 permission to link the program and your derivative works with the
29 separately licensed software that they have included with MySQL.
30
31 This program is distributed in the hope that it will be useful,
32 but WITHOUT ANY WARRANTY; without even the implied warranty of
33 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 GNU General Public License, version 2.0, for more details.
35
36 You should have received a copy of the GNU General Public License along with
37 this program; if not, write to the Free Software Foundation, Inc.,
38 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
39
40 *****************************************************************************/
41
42 /**************************************************//**
43 @file srv/srv0srv.cc
44 The database server main program
45
46 Created 10/8/1995 Heikki Tuuri
47 *******************************************************/
48
49 /* Dummy comment */
50 #include "srv0srv.h"
51
52 #include "ut0mem.h"
53 #include "ut0ut.h"
54 #include "os0proc.h"
55 #include "mem0mem.h"
56 #include "mem0pool.h"
57 #include "sync0sync.h"
58 #include "que0que.h"
59 #include "log0online.h"
60 #include "log0recv.h"
61 #include "pars0pars.h"
62 #include "usr0sess.h"
63 #include "lock0lock.h"
64 #include "trx0purge.h"
65 #include "ibuf0ibuf.h"
66 #include "buf0flu.h"
67 #include "buf0lru.h"
68 #include "btr0sea.h"
69 #include "dict0load.h"
70 #include "dict0boot.h"
71 #include "dict0stats_bg.h" /* dict_stats_event */
72 #include "srv0start.h"
73 #include "row0mysql.h"
74 #include "ha_prototypes.h"
75 #include "trx0i_s.h"
76 #include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
77 #include "srv0mon.h"
78 #include "ut0crc32.h"
79 #include "os0file.h"
80
81 #include "mysql/plugin.h"
82 #include "mysql/service_thd_wait.h"
83
84 /* prototypes for new functions added to ha_innodb.cc */
85 ibool innobase_get_slow_log();
86
87 /* The following is the maximum allowed duration of a lock wait. */
88 UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
89
90 /**/
91 UNIV_INTERN lint srv_kill_idle_transaction = 0;
92
93 /* How much data manipulation language (DML) statements need to be delayed,
94 in microseconds, in order to reduce the lagging of the purge thread. */
95 UNIV_INTERN ulint srv_dml_needed_delay = 0;
96
97 UNIV_INTERN ibool srv_monitor_active = FALSE;
98 UNIV_INTERN ibool srv_error_monitor_active = FALSE;
99
100 UNIV_INTERN ibool srv_buf_dump_thread_active = FALSE;
101
102 UNIV_INTERN ibool srv_dict_stats_thread_active = FALSE;
103
104 UNIV_INTERN const char* srv_main_thread_op_info = "";
105
106 /** Prefix used by MySQL to indicate pre-5.1 table name encoding */
107 const char srv_mysql50_table_name_prefix[10] = "#mysql50#";
108
109 /* Server parameters which are read from the initfile */
110
111 /* The following three are dir paths which are catenated before file
112 names, where the file name itself may also contain a path */
113
114 UNIV_INTERN char* srv_data_home = NULL;
115
116 /** Rollback files directory, can be absolute. */
117 UNIV_INTERN char* srv_undo_dir = NULL;
118
119 /** The number of tablespaces to use for rollback segments. */
120 UNIV_INTERN ulong srv_undo_tablespaces = 8;
121
122 /** The number of UNDO tablespaces that are open and ready to use. */
123 UNIV_INTERN ulint srv_undo_tablespaces_open = 8;
124
125 /* The number of rollback segments to use */
126 UNIV_INTERN ulong srv_undo_logs = 1;
127
128 #ifdef UNIV_LOG_ARCHIVE
129 UNIV_INTERN char* srv_arch_dir = NULL;
130 UNIV_INTERN ulong srv_log_arch_expire_sec = 0;
131 #endif /* UNIV_LOG_ARCHIVE */
132
133 /** Set if InnoDB must operate in read-only mode. We don't do any
134 recovery and open all tables in RO mode instead of RW mode. We don't
135 sync the max trx id to disk either. */
136 UNIV_INTERN my_bool srv_read_only_mode;
137 /** store to its own file each table created by an user; data
138 dictionary tables are in the system tablespace 0 */
139 UNIV_INTERN my_bool srv_file_per_table;
140 /** The file format to use on new *.ibd files. */
141 UNIV_INTERN ulint srv_file_format = 0;
142 /** Whether to check file format during startup. A value of
143 UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
144 set it to the highest format we support. */
145 UNIV_INTERN ulint srv_max_file_format_at_startup = UNIV_FORMAT_MAX;
146 /** Set if InnoDB operates in read-only mode or innodb-force-recovery
147 is greater than SRV_FORCE_NO_TRX_UNDO. */
148 UNIV_INTERN my_bool high_level_read_only;
149
150 #if UNIV_FORMAT_A
151 # error "UNIV_FORMAT_A must be 0!"
152 #endif
153
154 /** Place locks to records only i.e. do not use next-key locking except
155 on duplicate key checking and foreign key checking */
156 UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
157 /** Sort buffer size in index creation */
158 UNIV_INTERN ulong srv_sort_buf_size = 1048576;
159 /** Maximum modification log file size for online index creation */
160 UNIV_INTERN unsigned long long srv_online_max_size;
161
162 /* If this flag is TRUE, then we will use the native aio of the
163 OS (provided we compiled Innobase with it in), otherwise we will
164 use simulated aio we build below with threads.
165 Currently we support native aio on windows and linux */
166 /* make srv_use_native_aio to be visible for other plugins */
167 my_bool srv_use_native_aio = TRUE;
168
169 #ifdef __WIN__
170 /* Windows native condition variables. We use runtime loading / function
171 pointers, because they are not available on Windows Server 2003 and
172 Windows XP/2000.
173
174 We use condition for events on Windows if possible, even if os_event
175 resembles Windows kernel event object well API-wise. The reason is
176 performance, kernel objects are heavyweights and WaitForSingleObject() is a
177 performance killer causing calling thread to context switch. Besides, Innodb
178 is preallocating large number (often millions) of os_events. With kernel event
179 objects it takes a big chunk out of non-paged pool, which is better suited
180 for tasks like IO than for storing idle event objects. */
181 UNIV_INTERN ibool srv_use_native_conditions = FALSE;
182 #endif /* __WIN__ */
183
184 UNIV_INTERN ulint srv_n_data_files = 0;
185 UNIV_INTERN char** srv_data_file_names = NULL;
186 /* size in database pages */
187 UNIV_INTERN ulint* srv_data_file_sizes = NULL;
188
189 /** Whether the redo log tracking is currently enabled. Note that it is
190 possible for the log tracker thread to be running and the tracking to be
191 disabled */
192 UNIV_INTERN my_bool srv_track_changed_pages = FALSE;
193
194 UNIV_INTERN ulonglong srv_max_bitmap_file_size = 100 * 1024 * 1024;
195
196 UNIV_INTERN ulonglong srv_max_changed_pages = 0;
197
198 /** When TRUE, fake change transcations take S rather than X row locks.
199 When FALSE, row locks are not taken at all. */
200 UNIV_INTERN my_bool srv_fake_changes_locks = TRUE;
201
202 /* if TRUE, then we auto-extend the last data file */
203 UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
204 /* if != 0, this tells the max size auto-extending may increase the
205 last data file size */
206 UNIV_INTERN ulint srv_last_file_size_max = 0;
207 /* If the last data file is auto-extended, we add this
208 many pages to it at a time */
209 UNIV_INTERN ulong srv_auto_extend_increment = 8;
210 UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL;
211
212 /* If the following is TRUE we do not allow inserts etc. This protects
213 the user from forgetting the 'newraw' keyword to my.cnf */
214
215 UNIV_INTERN ibool srv_created_new_raw = FALSE;
216
217 UNIV_INTERN char* srv_log_group_home_dir = NULL;
218
219 UNIV_INTERN ulong srv_n_log_files = SRV_N_LOG_FILES_MAX;
220 /* size in database pages */
221 UNIV_INTERN ib_uint64_t srv_log_file_size = IB_UINT64_MAX;
222 UNIV_INTERN ib_uint64_t srv_log_file_size_requested;
223 /* size in database pages */
224 UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
225 UNIV_INTERN uint srv_flush_log_at_timeout = 1;
226 UNIV_INTERN ulong srv_page_size = UNIV_PAGE_SIZE_DEF;
227 UNIV_INTERN ulong srv_page_size_shift = UNIV_PAGE_SIZE_SHIFT_DEF;
228 UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE;
229
230 /* Try to flush dirty pages so as to avoid IO bursts at
231 the checkpoints. */
232 UNIV_INTERN char srv_adaptive_flushing = TRUE;
233
234 UNIV_INTERN ulint srv_show_locks_held = 10;
235 UNIV_INTERN ulint srv_show_verbose_locks = 0;
236
237 /** Maximum number of times allowed to conditionally acquire
238 mutex before switching to blocking wait on the mutex */
239 #define MAX_MUTEX_NOWAIT 20
240
241 /** Check whether the number of failed nonblocking mutex
242 acquisition attempts exceeds maximum allowed value. If so,
243 srv_printf_innodb_monitor() will request mutex acquisition
244 with mutex_enter(), which will wait until it gets the mutex. */
245 #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
246
247 /** The sort order table of the MySQL latin1_swedish_ci character set
248 collation */
249 UNIV_INTERN const byte* srv_latin1_ordering;
250
251 /* use os/external memory allocator */
252 UNIV_INTERN my_bool srv_use_sys_malloc = TRUE;
253 /* requested size in kilobytes */
254 UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX;
255 /* requested number of buffer pool instances */
256 UNIV_INTERN ulint srv_buf_pool_instances = 1;
257 /* number of locks to protect buf_pool->page_hash */
258 UNIV_INTERN ulong srv_n_page_hash_locks = 16;
259 /** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
260 UNIV_INTERN ulong srv_LRU_scan_depth = 1024;
261 /** whether or not to flush neighbors of a block */
262 UNIV_INTERN ulong srv_flush_neighbors = 1;
263 /* previously requested size */
264 UNIV_INTERN ulint srv_buf_pool_old_size;
265 /* current size in kilobytes */
266 UNIV_INTERN ulint srv_buf_pool_curr_size = 0;
267 /* size in bytes */
268 UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
269 UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
270
271 /** Query thread preflush algorithm */
272 UNIV_INTERN ulint srv_foreground_preflush
273 = SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF;
274
275 /** The maximum time limit for a single LRU tail flush iteration by the page
276 cleaner thread */
277 UNIV_INTERN ulint srv_cleaner_max_lru_time = 1000;
278
279 /** The maximum time limit for a single flush list flush iteration by the page
280 cleaner thread */
281 UNIV_INTERN ulint srv_cleaner_max_flush_time = 1000;
282
283 /** Page cleaner flush list flush batches are further divided into this chunk
284 size */
285 UNIV_INTERN ulint srv_cleaner_flush_chunk_size = 100;
286
287 /** Page cleaner LRU list flush batches are further divided into this chunk
288 size */
289 UNIV_INTERN ulint srv_cleaner_lru_chunk_size = 100;
290
291 /** If free list length is lower than this percentage of srv_LRU_scan_depth,
292 page cleaner LRU flushes will issue flush batches to the same instance in a
293 row */
294 UNIV_INTERN ulint srv_cleaner_free_list_lwm = 10;
295
296 /** If TRUE, page cleaner heuristics use evicted instead of flushed page counts
297 for its heuristics */
298 UNIV_INTERN my_bool srv_cleaner_eviction_factor = FALSE;
299
300 /** Page cleaner LSN age factor formula option */
301 UNIV_INTERN ulong srv_cleaner_lsn_age_factor
302 = SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT;
303
304 /** Empty free list for a query thread handling algorithm option */
305 UNIV_INTERN ulong srv_empty_free_list_algorithm
306 = SRV_EMPTY_FREE_LIST_BACKOFF;
307
308 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
309 instead. */
310 UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
311 UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
312 UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
313
314 /* Switch to enable random read ahead. */
315 UNIV_INTERN my_bool srv_random_read_ahead = FALSE;
316
317 /* The log block size */
318 UNIV_INTERN ulint srv_log_block_size = 0;
319
320 /* User settable value of the number of pages that must be present
321 in the buffer cache and accessed sequentially for InnoDB to trigger a
322 readahead request. */
323 UNIV_INTERN ulong srv_read_ahead_threshold = 56;
324
325 #ifdef UNIV_LOG_ARCHIVE
326 UNIV_INTERN ibool srv_log_archive_on = FALSE;
327 #endif /* UNIV_LOG_ARCHIVE */
328
329 /* This parameter is used to throttle the number of insert buffers that are
330 merged in a batch. By increasing this parameter on a faster disk you can
331 possibly reduce the number of I/O operations performed to complete the
332 merge operation. The value of this parameter is used as is by the
333 background loop when the system is idle (low load), on a busy system
334 the parameter is scaled down by a factor of 4, this is to avoid putting
335 a heavier load on the I/O sub system. */
336
337 UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
338
339 UNIV_INTERN char* srv_file_flush_method_str = NULL;
340 UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
341 UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
342
343 UNIV_INTERN ulint srv_max_n_open_files = 300;
344
345 /* Number of IO operations per second the server can do */
346 UNIV_INTERN ulong srv_io_capacity = 200;
347 UNIV_INTERN ulong srv_max_io_capacity = 400;
348
349 /* The InnoDB main thread tries to keep the ratio of modified pages
350 in the buffer pool to all database pages in the buffer pool smaller than
351 the following number. But it is not guaranteed that the value stays below
352 that during a time of heavy update/insert activity. */
353
354 UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
355 UNIV_INTERN ulong srv_max_dirty_pages_pct_lwm = 50;
356
357 /* This is the percentage of log capacity at which adaptive flushing,
358 if enabled, will kick in. */
359 UNIV_INTERN ulong srv_adaptive_flushing_lwm = 10;
360
361 /* Number of iterations over which adaptive flushing is averaged. */
362 UNIV_INTERN ulong srv_flushing_avg_loops = 30;
363
364 /* The tid of the cleaner thread */
365 UNIV_INTERN os_tid_t srv_cleaner_tid;
366
367 /* The tid of the LRU manager thread */
368 UNIV_INTERN os_tid_t srv_lru_manager_tid;
369
370 /* The tids of the purge threads */
371 UNIV_INTERN os_tid_t srv_purge_tids[SRV_MAX_N_PURGE_THREADS];
372
373 /* The tids of the I/O threads */
374 UNIV_INTERN os_tid_t srv_io_tids[SRV_MAX_N_IO_THREADS];
375
376 /* The tid of the master thread */
377 UNIV_INTERN os_tid_t srv_master_tid;
378
379 /* The relative scheduling priority of the cleaner and LRU manager threads */
380 UNIV_INTERN ulint srv_sched_priority_cleaner = 19;
381
382 /* The relative scheduling priority of the purge threads */
383 UNIV_INTERN ulint srv_sched_priority_purge = 19;
384
385 /* The relative scheduling priority of the I/O threads */
386 UNIV_INTERN ulint srv_sched_priority_io = 19;
387
388 /* The relative scheduling priority of the master thread */
389 UNIV_INTERN ulint srv_sched_priority_master = 19;
390
391 /* The relative priority of the current thread. If 0, low priority; if 1, high
392 priority. */
393 UNIV_INTERN UNIV_THREAD_LOCAL ulint srv_current_thread_priority = 0;
394
395 /* The relative priority of the purge coordinator and worker threads. */
396 UNIV_INTERN my_bool srv_purge_thread_priority = FALSE;
397
398 /* The relative priority of the I/O threads. */
399 UNIV_INTERN my_bool srv_io_thread_priority = FALSE;
400
401 /* The relative priority of the cleaner thread. */
402 UNIV_INTERN my_bool srv_cleaner_thread_priority = FALSE;
403
404 /* The relative priority of the master thread. */
405 UNIV_INTERN my_bool srv_master_thread_priority = FALSE;
406
407 /* The number of purge threads to use.*/
408 UNIV_INTERN ulong srv_n_purge_threads = 1;
409
410 /* the number of pages to purge in one batch */
411 UNIV_INTERN ulong srv_purge_batch_size = 20;
412
413 /* Internal setting for "innodb_stats_method". Decides how InnoDB treats
414 NULL value when collecting statistics. By default, it is set to
415 SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
416 UNIV_INTERN ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
417
418 UNIV_INTERN srv_stats_t srv_stats;
419
420 /* structure to pass status variables to MySQL */
421 UNIV_INTERN export_var_t export_vars;
422
423 /** Normally 0. When nonzero, skip some phases of crash recovery,
424 starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered
425 by SELECT or mysqldump. When this is nonzero, we do not allow any user
426 modifications to the data. */
427 UNIV_INTERN ulong srv_force_recovery;
428 #ifndef DBUG_OFF
429 /** Inject a crash at different steps of the recovery process.
430 This is for testing and debugging only. */
431 UNIV_INTERN ulong srv_force_recovery_crash;
432 #endif /* !DBUG_OFF */
433
434 /** Print all user-level transactions deadlocks to mysqld stderr */
435
436 UNIV_INTERN my_bool srv_print_all_deadlocks = FALSE;
437
438 /** Print lock wait timeout info to mysqld stderr */
439
440 my_bool srv_print_lock_wait_timeout_info = FALSE;
441
442 /** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */
443 UNIV_INTERN my_bool srv_cmp_per_index_enabled = FALSE;
444
445 /* If the following is set to 1 then we do not run purge and insert buffer
446 merge to completion before shutdown. If it is set to 2, do not even flush the
447 buffer pool to data files at the shutdown: we effectively 'crash'
448 InnoDB (but lose no committed transactions). */
449 UNIV_INTERN ulint srv_fast_shutdown = 0;
450
451 /* Generate a innodb_status.<pid> file */
452 UNIV_INTERN ibool srv_innodb_status = FALSE;
453
454 /* When estimating number of different key values in an index, sample
455 this many index pages, there are 2 ways to calculate statistics:
456 * persistent stats that are calculated by ANALYZE TABLE and saved
457 in the innodb database.
458 * quick transient stats, that are used if persistent stats for the given
459 table/index are not found in the innodb database */
460 UNIV_INTERN unsigned long long srv_stats_transient_sample_pages = 8;
461 UNIV_INTERN my_bool srv_stats_persistent = TRUE;
462 UNIV_INTERN my_bool srv_stats_include_delete_marked = FALSE;
463 UNIV_INTERN unsigned long long srv_stats_persistent_sample_pages = 20;
464 UNIV_INTERN my_bool srv_stats_auto_recalc = TRUE;
465
466 UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
467 UNIV_INTERN ibool srv_use_atomic_writes = FALSE;
468 #ifdef HAVE_POSIX_FALLOCATE
469 UNIV_INTERN ibool srv_use_posix_fallocate = FALSE;
470 #endif
471
472 /** doublewrite buffer is 1MB is size i.e.: it can hold 128 16K pages.
473 The following parameter is the size of the buffer that is used for
474 batch flushing i.e.: LRU flushing and flush_list flushing. The rest
475 of the pages are used for single page flushing. */
476 UNIV_INTERN ulong srv_doublewrite_batch_size = 120;
477
478 UNIV_INTERN ulong srv_replication_delay = 0;
479
480 UNIV_INTERN ulint srv_pass_corrupt_table = 0; /* 0:disable 1:enable */
481
482 UNIV_INTERN ulint srv_log_checksum_algorithm =
483 SRV_CHECKSUM_ALGORITHM_INNODB;
484
485 /*-------------------------------------------*/
486 UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
487 UNIV_INTERN ulong srv_spin_wait_delay = 6;
488 UNIV_INTERN ibool srv_priority_boost = TRUE;
489
490 #ifdef UNIV_DEBUG
491 UNIV_INTERN ibool srv_print_thread_releases = FALSE;
492 UNIV_INTERN ibool srv_print_lock_waits = FALSE;
493 UNIV_INTERN ibool srv_print_buf_io = FALSE;
494 UNIV_INTERN ibool srv_print_log_io = FALSE;
495 UNIV_INTERN ibool srv_print_latch_waits = FALSE;
496 #endif /* UNIV_DEBUG */
497
498 static ulint srv_n_rows_inserted_old = 0;
499 static ulint srv_n_rows_updated_old = 0;
500 static ulint srv_n_rows_deleted_old = 0;
501 static ulint srv_n_rows_read_old = 0;
502
503 UNIV_INTERN ulint srv_truncated_status_writes = 0;
504 UNIV_INTERN ulint srv_available_undo_logs = 0;
505
506 /* Ensure status variables are on separate cache lines */
507
508 #define CACHE_LINE_SIZE 64
509 #define CACHE_ALIGNED MY_ATTRIBUTE((aligned (CACHE_LINE_SIZE)))
510
511 UNIV_INTERN byte
512 counters_pad_start[CACHE_LINE_SIZE] MY_ATTRIBUTE((unused)) = {0};
513
514 UNIV_INTERN ulint srv_read_views_memory CACHE_ALIGNED = 0;
515 UNIV_INTERN ulint srv_descriptors_memory CACHE_ALIGNED = 0;
516
517 UNIV_INTERN byte
518 counters_pad_end[CACHE_LINE_SIZE] MY_ATTRIBUTE((unused)) = {0};
519
520 /* Set the following to 0 if you want InnoDB to write messages on
521 stderr on startup/shutdown. */
522 UNIV_INTERN ibool srv_print_verbose_log = TRUE;
523 UNIV_INTERN my_bool srv_print_innodb_monitor = FALSE;
524 UNIV_INTERN my_bool srv_print_innodb_lock_monitor = FALSE;
525 UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
526 UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
527
528 /* Array of English strings describing the current state of an
529 i/o handler thread */
530
531 UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
532 UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
533
534 UNIV_INTERN time_t srv_last_monitor_time;
535
536 UNIV_INTERN ib_mutex_t srv_innodb_monitor_mutex;
537
538 /* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
539 UNIV_INTERN ib_mutex_t srv_monitor_file_mutex;
540
541 #ifdef UNIV_PFS_MUTEX
542 # ifndef HAVE_ATOMIC_BUILTINS
543 /* Key to register server_mutex with performance schema */
544 UNIV_INTERN mysql_pfs_key_t server_mutex_key;
545 # endif /* !HAVE_ATOMIC_BUILTINS */
546 /** Key to register srv_innodb_monitor_mutex with performance schema */
547 UNIV_INTERN mysql_pfs_key_t srv_innodb_monitor_mutex_key;
548 /** Key to register srv_monitor_file_mutex with performance schema */
549 UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key;
550 /** Key to register srv_dict_tmpfile_mutex with performance schema */
551 UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
552 /** Key to register the mutex with performance schema */
553 UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
554 /** Key to register srv_sys_t::mutex with performance schema */
555 UNIV_INTERN mysql_pfs_key_t srv_sys_mutex_key;
556 /** Key to register srv_sys_t::tasks_mutex with performance schema */
557 UNIV_INTERN mysql_pfs_key_t srv_sys_tasks_mutex_key;
558 #endif /* UNIV_PFS_MUTEX */
559
560 /** Temporary file for innodb monitor output */
561 UNIV_INTERN FILE* srv_monitor_file;
562 /** Mutex for locking srv_dict_tmpfile. Not created if srv_read_only_mode.
563 This mutex has a very high rank; threads reserving it should not
564 be holding any InnoDB latches. */
565 UNIV_INTERN ib_mutex_t srv_dict_tmpfile_mutex;
566 /** Temporary file for output from the data dictionary */
567 UNIV_INTERN FILE* srv_dict_tmpfile;
568 /** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode.
569 This mutex has a very low rank; threads reserving it should not
570 acquire any further latches or sleep before releasing this one. */
571 UNIV_INTERN ib_mutex_t srv_misc_tmpfile_mutex;
572 /** Temporary file for miscellanous diagnostic output */
573 UNIV_INTERN FILE* srv_misc_tmpfile;
574
575 UNIV_INTERN ulint srv_main_thread_process_no = 0;
576 UNIV_INTERN ulint srv_main_thread_id = 0;
577
578 /* The following counts are used by the srv_master_thread. */
579
580 /** Iterations of the loop bounded by 'srv_active' label. */
581 static ulint srv_main_active_loops = 0;
582 /** Iterations of the loop bounded by the 'srv_idle' label. */
583 static ulint srv_main_idle_loops = 0;
584 /** Iterations of the loop bounded by the 'srv_shutdown' label. */
585 static ulint srv_main_shutdown_loops = 0;
586 /** Log writes involving flush. */
587 static ulint srv_log_writes_and_flush = 0;
588
589 /** Number of times secondary index lookup triggered cluster lookup */
590 ulint srv_sec_rec_cluster_reads = 0;
591
592 /** Number of times prefix optimization avoided triggering cluster lookup */
593 ulint srv_sec_rec_cluster_reads_avoided = 0;
594
595 /* This is only ever touched by the master thread. It records the
596 time when the last flush of log file has happened. The master
597 thread ensures that we flush the log files at least once per
598 second. */
599 static time_t srv_last_log_flush_time;
600
601 /* Interval in seconds at which various tasks are performed by the
602 master thread when server is active. In order to balance the workload,
603 we should try to keep intervals such that they are not multiple of
604 each other. For example, if we have intervals for various tasks
605 defined as 5, 10, 15, 60 then all tasks will be performed when
606 current_time % 60 == 0 and no tasks will be performed when
607 current_time % 5 != 0. */
608
609 # define SRV_MASTER_CHECKPOINT_INTERVAL (7)
610 # define SRV_MASTER_PURGE_INTERVAL (10)
611 #ifdef MEM_PERIODIC_CHECK
612 # define SRV_MASTER_MEM_VALIDATE_INTERVAL (13)
613 #endif /* MEM_PERIODIC_CHECK */
614 # define SRV_MASTER_DICT_LRU_INTERVAL (47)
615
616 /** Acquire the system_mutex. */
617 #define srv_sys_mutex_enter() do { \
618 mutex_enter(&srv_sys->mutex); \
619 } while (0)
620
621 /** Test if the system mutex is owned. */
622 #define srv_sys_mutex_own() (mutex_own(&srv_sys->mutex) \
623 && !srv_read_only_mode)
624
625 /** Release the system mutex. */
626 #define srv_sys_mutex_exit() do { \
627 mutex_exit(&srv_sys->mutex); \
628 } while (0)
629
630 #define fetch_lock_wait_timeout(trx) \
631 ((trx)->lock.allowed_to_wait \
632 ? thd_lock_wait_timeout((trx)->mysql_thd) \
633 : 0)
634
635 /*
636 IMPLEMENTATION OF THE SERVER MAIN PROGRAM
637 =========================================
638
639 There is the following analogue between this database
640 server and an operating system kernel:
641
642 DB concept equivalent OS concept
643 ---------- ---------------------
644 transaction -- process;
645
646 query thread -- thread;
647
648 lock -- semaphore;
649
650 kernel -- kernel;
651
652 query thread execution:
653 (a) without lock mutex
654 reserved -- process executing in user mode;
655 (b) with lock mutex reserved
656 -- process executing in kernel mode;
657
658 The server has several backgroind threads all running at the same
659 priority as user threads. It periodically checks if here is anything
660 happening in the server which requires intervention of the master
661 thread. Such situations may be, for example, when flushing of dirty
662 blocks is needed in the buffer pool or old version of database rows
663 have to be cleaned away (purged). The user can configure a separate
664 dedicated purge thread(s) too, in which case the master thread does not
665 do any purging.
666
667 The threads which we call user threads serve the queries of the MySQL
668 server. They run at normal priority.
669
670 When there is no activity in the system, also the master thread
671 suspends itself to wait for an event making the server totally silent.
672
673 There is still one complication in our server design. If a
674 background utility thread obtains a resource (e.g., mutex) needed by a user
675 thread, and there is also some other user activity in the system,
676 the user thread may have to wait indefinitely long for the
677 resource, as the OS does not schedule a background thread if
678 there is some other runnable user thread. This problem is called
679 priority inversion in real-time programming.
680
681 One solution to the priority inversion problem would be to keep record
682 of which thread owns which resource and in the above case boost the
683 priority of the background thread so that it will be scheduled and it
684 can release the resource. This solution is called priority inheritance
685 in real-time programming. A drawback of this solution is that the overhead
686 of acquiring a mutex increases slightly, maybe 0.2 microseconds on a 100
687 MHz Pentium, because the thread has to call os_thread_get_curr_id. This may
688 be compared to 0.5 microsecond overhead for a mutex lock-unlock pair. Note
689 that the thread cannot store the information in the resource , say mutex,
690 itself, because competing threads could wipe out the information if it is
691 stored before acquiring the mutex, and if it stored afterwards, the
692 information is outdated for the time of one machine instruction, at least.
693 (To be precise, the information could be stored to lock_word in mutex if
694 the machine supports atomic swap.)
695
696 The above solution with priority inheritance may become actual in the
697 future, currently we do not implement any priority twiddling solution.
698 Our general aim is to reduce the contention of all mutexes by making
699 them more fine grained.
700
701 The thread table contains information of the current status of each
702 thread existing in the system, and also the event semaphores used in
703 suspending the master thread and utility threads when they have nothing
704 to do. The thread table can be seen as an analogue to the process table
705 in a traditional Unix implementation. */
706
707 /** The server system struct */
708 struct srv_sys_t{
709 ib_mutex_t tasks_mutex; /*!< variable protecting the
710 tasks queue */
711 UT_LIST_BASE_NODE_T(que_thr_t)
712 tasks; /*!< task queue */
713
714 ib_mutex_t mutex; /*!< variable protecting the
715 fields below. */
716 ulint n_sys_threads; /*!< size of the sys_threads
717 array */
718
719 srv_slot_t* sys_threads; /*!< server thread table */
720
721 ulint n_threads_active[SRV_MASTER + 1];
722 /*!< number of threads active
723 in a thread class */
724
725 srv_stats_t::ulint_ctr_1_t
726 activity_count; /*!< For tracking server
727 activity */
728 srv_stats_t::ulint_ctr_1_t
729 ibuf_merge_activity_count;/*!< For tracking change
730 buffer merge activity, a subset
731 of overall server activity */
732 };
733
734 #ifndef HAVE_ATOMIC_BUILTINS
735 /** Mutex protecting some server global variables. */
736 UNIV_INTERN ib_mutex_t server_mutex;
737 #endif /* !HAVE_ATOMIC_BUILTINS */
738
739 static srv_sys_t* srv_sys = NULL;
740
741 /** Event to signal the monitor thread. */
742 UNIV_INTERN os_event_t srv_monitor_event;
743
744 /** Event to signal the error thread */
745 UNIV_INTERN os_event_t srv_error_event;
746
747 /** Event to signal the buffer pool dump/load thread */
748 UNIV_INTERN os_event_t srv_buf_dump_event;
749
750 /** The buffer pool dump/load file name */
751 UNIV_INTERN char* srv_buf_dump_filename;
752
753 /** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
754 and/or load it during startup. */
755 UNIV_INTERN char srv_buffer_pool_dump_at_shutdown = FALSE;
756 UNIV_INTERN char srv_buffer_pool_load_at_startup = FALSE;
757
758 /** Slot index in the srv_sys->sys_threads array for the purge thread. */
759 static const ulint SRV_PURGE_SLOT = 1;
760
761 /** Slot index in the srv_sys->sys_threads array for the master thread. */
762 static const ulint SRV_MASTER_SLOT = 0;
763
764 UNIV_INTERN os_event_t srv_checkpoint_completed_event;
765
766 UNIV_INTERN os_event_t srv_redo_log_tracked_event;
767
768 /** Whether the redo log tracker thread has been started. Does not take into
769 account whether the tracking is currently enabled (see srv_track_changed_pages
770 for that) */
771 UNIV_INTERN bool srv_redo_log_thread_started = false;
772
773 /*********************************************************************//**
774 Prints counters for work done by srv_master_thread. */
775 static
776 void
srv_print_master_thread_info(FILE * file)777 srv_print_master_thread_info(
778 /*=========================*/
779 FILE *file) /* in: output stream */
780 {
781 fprintf(file, "srv_master_thread loops: %lu srv_active, "
782 "%lu srv_shutdown, %lu srv_idle\n",
783 srv_main_active_loops,
784 srv_main_shutdown_loops,
785 srv_main_idle_loops);
786 fprintf(file, "srv_master_thread log flush and writes: %lu\n",
787 srv_log_writes_and_flush);
788 }
789
790 /*********************************************************************//**
791 Sets the info describing an i/o thread current state. */
792 UNIV_INTERN
793 void
srv_set_io_thread_op_info(ulint i,const char * str)794 srv_set_io_thread_op_info(
795 /*======================*/
796 ulint i, /*!< in: the 'segment' of the i/o thread */
797 const char* str) /*!< in: constant char string describing the
798 state */
799 {
800 ut_a(i < SRV_MAX_N_IO_THREADS);
801
802 srv_io_thread_op_info[i] = str;
803 }
804
805 /*********************************************************************//**
806 Resets the info describing an i/o thread current state. */
807 UNIV_INTERN
808 void
srv_reset_io_thread_op_info()809 srv_reset_io_thread_op_info()
810 /*=========================*/
811 {
812 for (ulint i = 0; i < UT_ARR_SIZE(srv_io_thread_op_info); ++i) {
813 srv_io_thread_op_info[i] = "not started yet";
814 }
815 }
816
817 #ifdef UNIV_DEBUG
818 /*********************************************************************//**
819 Validates the type of a thread table slot.
820 @return TRUE if ok */
821 static
822 ibool
srv_thread_type_validate(srv_thread_type type)823 srv_thread_type_validate(
824 /*=====================*/
825 srv_thread_type type) /*!< in: thread type */
826 {
827 switch (type) {
828 case SRV_NONE:
829 break;
830 case SRV_WORKER:
831 case SRV_PURGE:
832 case SRV_MASTER:
833 return(TRUE);
834 }
835 ut_error;
836 return(FALSE);
837 }
838 #endif /* UNIV_DEBUG */
839
840 /*********************************************************************//**
841 Gets the type of a thread table slot.
842 @return thread type */
843 static
844 srv_thread_type
srv_slot_get_type(const srv_slot_t * slot)845 srv_slot_get_type(
846 /*==============*/
847 const srv_slot_t* slot) /*!< in: thread slot */
848 {
849 srv_thread_type type = slot->type;
850 ut_ad(srv_thread_type_validate(type));
851 return(type);
852 }
853
854 /*********************************************************************//**
855 Reserves a slot in the thread table for the current thread.
856 @return reserved slot */
857 static
858 srv_slot_t*
srv_reserve_slot(srv_thread_type type)859 srv_reserve_slot(
860 /*=============*/
861 srv_thread_type type) /*!< in: type of the thread */
862 {
863 srv_slot_t* slot = 0;
864
865 srv_sys_mutex_enter();
866
867 ut_ad(srv_thread_type_validate(type));
868
869 switch (type) {
870 case SRV_MASTER:
871 slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
872 break;
873
874 case SRV_PURGE:
875 slot = &srv_sys->sys_threads[SRV_PURGE_SLOT];
876 break;
877
878 case SRV_WORKER:
879 /* Find an empty slot, skip the master and purge slots. */
880 for (slot = &srv_sys->sys_threads[2];
881 slot->in_use;
882 ++slot) {
883
884 ut_a(slot < &srv_sys->sys_threads[
885 srv_sys->n_sys_threads]);
886 }
887 break;
888
889 case SRV_NONE:
890 ut_error;
891 }
892
893 ut_a(!slot->in_use);
894
895 slot->in_use = TRUE;
896 slot->suspended = FALSE;
897 slot->type = type;
898
899 ut_ad(srv_slot_get_type(slot) == type);
900
901 ++srv_sys->n_threads_active[type];
902
903 srv_sys_mutex_exit();
904
905 return(slot);
906 }
907
908 /*********************************************************************//**
909 Suspends the calling thread to wait for the event in its thread slot.
910 @return the current signal count of the event. */
911 static
912 ib_int64_t
srv_suspend_thread_low(srv_slot_t * slot)913 srv_suspend_thread_low(
914 /*===================*/
915 srv_slot_t* slot) /*!< in/out: thread slot */
916 {
917
918 ut_ad(!srv_read_only_mode);
919 ut_ad(srv_sys_mutex_own());
920
921 ut_ad(slot->in_use);
922
923 srv_thread_type type = srv_slot_get_type(slot);
924
925 switch (type) {
926 case SRV_NONE:
927 ut_error;
928
929 case SRV_MASTER:
930 /* We have only one master thread and it
931 should be the first entry always. */
932 ut_a(srv_sys->n_threads_active[type] == 1);
933 break;
934
935 case SRV_PURGE:
936 /* We have only one purge coordinator thread
937 and it should be the second entry always. */
938 ut_a(srv_sys->n_threads_active[type] == 1);
939 break;
940
941 case SRV_WORKER:
942 ut_a(srv_n_purge_threads > 1);
943 ut_a(srv_sys->n_threads_active[type] > 0);
944 break;
945 }
946
947 ut_a(!slot->suspended);
948 slot->suspended = TRUE;
949
950 ut_a(srv_sys->n_threads_active[type] > 0);
951
952 srv_sys->n_threads_active[type]--;
953
954 return(os_event_reset(slot->event));
955 }
956
957 /*********************************************************************//**
958 Suspends the calling thread to wait for the event in its thread slot.
959 @return the current signal count of the event. */
960 static
961 ib_int64_t
srv_suspend_thread(srv_slot_t * slot)962 srv_suspend_thread(
963 /*===============*/
964 srv_slot_t* slot) /*!< in/out: thread slot */
965 {
966 srv_sys_mutex_enter();
967
968 ib_int64_t sig_count = srv_suspend_thread_low(slot);
969
970 srv_sys_mutex_exit();
971
972 return(sig_count);
973 }
974
975 /*********************************************************************//**
976 Releases threads of the type given from suspension in the thread table.
977 NOTE! The server mutex has to be reserved by the caller!
978 @return number of threads released: this may be less than n if not
979 enough threads were suspended at the moment. */
980 UNIV_INTERN
981 ulint
srv_release_threads(srv_thread_type type,ulint n)982 srv_release_threads(
983 /*================*/
984 srv_thread_type type, /*!< in: thread type */
985 ulint n) /*!< in: number of threads to release */
986 {
987 ulint i;
988 ulint count = 0;
989
990 ut_ad(srv_thread_type_validate(type));
991 ut_ad(n > 0);
992
993 srv_sys_mutex_enter();
994
995 for (i = 0; i < srv_sys->n_sys_threads; i++) {
996 srv_slot_t* slot;
997
998 slot = &srv_sys->sys_threads[i];
999
1000 if (slot->in_use
1001 && srv_slot_get_type(slot) == type
1002 && slot->suspended) {
1003
1004 switch (type) {
1005 case SRV_NONE:
1006 ut_error;
1007
1008 case SRV_MASTER:
1009 /* We have only one master thread and it
1010 should be the first entry always. */
1011 ut_a(n == 1);
1012 ut_a(i == SRV_MASTER_SLOT);
1013 ut_a(srv_sys->n_threads_active[type] == 0);
1014 break;
1015
1016 case SRV_PURGE:
1017 /* We have only one purge coordinator thread
1018 and it should be the second entry always. */
1019 ut_a(n == 1);
1020 ut_a(i == SRV_PURGE_SLOT);
1021 ut_a(srv_n_purge_threads > 0);
1022 ut_a(srv_sys->n_threads_active[type] == 0);
1023 break;
1024
1025 case SRV_WORKER:
1026 ut_a(srv_n_purge_threads > 1);
1027 ut_a(srv_sys->n_threads_active[type]
1028 < srv_n_purge_threads - 1);
1029 break;
1030 }
1031
1032 slot->suspended = FALSE;
1033
1034 ++srv_sys->n_threads_active[type];
1035
1036 os_event_set(slot->event);
1037
1038 if (++count == n) {
1039 break;
1040 }
1041 }
1042 }
1043
1044 srv_sys_mutex_exit();
1045
1046 return(count);
1047 }
1048
1049 /*********************************************************************//**
1050 Release a thread's slot. */
1051 static
1052 void
srv_free_slot(srv_slot_t * slot)1053 srv_free_slot(
1054 /*==========*/
1055 srv_slot_t* slot) /*!< in/out: thread slot */
1056 {
1057 srv_sys_mutex_enter();
1058
1059 if (!slot->suspended) {
1060 /* Mark the thread as inactive. */
1061 srv_suspend_thread_low(slot);
1062 }
1063
1064 /* Free the slot for reuse. */
1065 ut_ad(slot->in_use);
1066 slot->in_use = FALSE;
1067
1068 srv_sys_mutex_exit();
1069 }
1070
1071 /*********************************************************************//**
1072 Initializes the server. */
1073 UNIV_INTERN
1074 void
srv_init(void)1075 srv_init(void)
1076 /*==========*/
1077 {
1078 ulint n_sys_threads = 0;
1079 ulint srv_sys_sz = sizeof(*srv_sys);
1080
1081 #ifndef HAVE_ATOMIC_BUILTINS
1082 mutex_create(server_mutex_key, &server_mutex, SYNC_ANY_LATCH);
1083 #endif /* !HAVE_ATOMIC_BUILTINS */
1084
1085 mutex_create(srv_innodb_monitor_mutex_key,
1086 &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
1087
1088 if (!srv_read_only_mode) {
1089
1090 /* Number of purge threads + master thread */
1091 n_sys_threads = srv_n_purge_threads + 1;
1092
1093 srv_sys_sz += n_sys_threads * sizeof(*srv_sys->sys_threads);
1094 }
1095
1096 srv_sys = static_cast<srv_sys_t*>(mem_zalloc(srv_sys_sz));
1097
1098 srv_sys->n_sys_threads = n_sys_threads;
1099
1100 if (!srv_read_only_mode) {
1101
1102 mutex_create(srv_sys_mutex_key, &srv_sys->mutex, SYNC_THREADS);
1103
1104 mutex_create(srv_sys_tasks_mutex_key,
1105 &srv_sys->tasks_mutex, SYNC_ANY_LATCH);
1106
1107 srv_sys->sys_threads = (srv_slot_t*) &srv_sys[1];
1108
1109 for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
1110 srv_slot_t* slot = &srv_sys->sys_threads[i];
1111
1112 slot->event = os_event_create();
1113
1114 ut_a(slot->event);
1115 }
1116
1117 srv_error_event = os_event_create();
1118
1119 srv_monitor_event = os_event_create();
1120
1121 srv_buf_dump_event = os_event_create();
1122
1123 srv_checkpoint_completed_event = os_event_create();
1124
1125 srv_redo_log_tracked_event = os_event_create();
1126
1127 if (srv_track_changed_pages) {
1128 os_event_set(srv_redo_log_tracked_event);
1129 }
1130
1131 UT_LIST_INIT(srv_sys->tasks);
1132 }
1133
1134 /* page_zip_stat_per_index_mutex is acquired from:
1135 1. page_zip_compress() (after SYNC_FSP)
1136 2. page_zip_decompress()
1137 3. i_s_cmp_per_index_fill_low() (where SYNC_DICT is acquired)
1138 4. innodb_cmp_per_index_update(), no other latches
1139 since we do not acquire any other latches while holding this mutex,
1140 it can have very low level. We pick SYNC_ANY_LATCH for it. */
1141
1142 mutex_create(
1143 page_zip_stat_per_index_mutex_key,
1144 &page_zip_stat_per_index_mutex, SYNC_ANY_LATCH);
1145
1146 /* Create dummy indexes for infimum and supremum records */
1147
1148 dict_ind_init();
1149
1150 srv_conc_init();
1151
1152 /* Initialize some INFORMATION SCHEMA internal structures */
1153 trx_i_s_cache_init(trx_i_s_cache);
1154
1155 ut_crc32_init();
1156
1157 dict_mem_init();
1158 }
1159
1160 /*********************************************************************//**
1161 Frees the data structures created in srv_init(). */
1162 UNIV_INTERN
1163 void
srv_free(void)1164 srv_free(void)
1165 /*==========*/
1166 {
1167 srv_conc_free();
1168
1169 if (!srv_read_only_mode) {
1170
1171 for (ulint i = 0; i < srv_sys->n_sys_threads; i++)
1172 os_event_free(srv_sys->sys_threads[i].event);
1173
1174 os_event_free(srv_error_event);
1175 os_event_free(srv_monitor_event);
1176 os_event_free(srv_buf_dump_event);
1177 os_event_free(srv_checkpoint_completed_event);
1178 os_event_free(srv_redo_log_tracked_event);
1179 mutex_free(&srv_sys->mutex);
1180 mutex_free(&srv_sys->tasks_mutex);
1181 }
1182
1183 #ifndef HAVE_ATOMIC_BUILTINS
1184 mutex_free(&server_mutex);
1185 #endif
1186 mutex_free(&srv_innodb_monitor_mutex);
1187 mutex_free(&page_zip_stat_per_index_mutex);
1188
1189 mem_free(srv_sys);
1190 srv_sys = NULL;
1191
1192 trx_i_s_cache_free(trx_i_s_cache);
1193 }
1194
1195 /*********************************************************************//**
1196 Initializes the synchronization primitives, memory system, and the thread
1197 local storage. */
1198 UNIV_INTERN
1199 void
srv_general_init(void)1200 srv_general_init(void)
1201 /*==================*/
1202 {
1203 ut_mem_init();
1204 /* Reset the system variables in the recovery module. */
1205 recv_sys_var_init();
1206 os_sync_init();
1207 sync_init();
1208 mem_init(srv_mem_pool_size);
1209 que_init();
1210 row_mysql_init();
1211 }
1212
1213 /*********************************************************************//**
1214 Normalizes init parameter values to use units we use inside InnoDB. */
1215 static
1216 void
srv_normalize_init_values(void)1217 srv_normalize_init_values(void)
1218 /*===========================*/
1219 {
1220 ulint n;
1221 ulint i;
1222
1223 n = srv_n_data_files;
1224
1225 for (i = 0; i < n; i++) {
1226 srv_data_file_sizes[i] = srv_data_file_sizes[i]
1227 * ((1024 * 1024) / UNIV_PAGE_SIZE);
1228 }
1229
1230 srv_last_file_size_max = srv_last_file_size_max
1231 * ((1024 * 1024) / UNIV_PAGE_SIZE);
1232
1233 srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
1234
1235 srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
1236
1237 srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
1238 }
1239
1240 /*********************************************************************//**
1241 Boots the InnoDB server. */
1242 UNIV_INTERN
1243 void
srv_boot(void)1244 srv_boot(void)
1245 /*==========*/
1246 {
1247 /* Transform the init parameter values given by MySQL to
1248 use units we use inside InnoDB: */
1249
1250 srv_normalize_init_values();
1251
1252 /* Initialize synchronization primitives, memory management, and thread
1253 local storage */
1254
1255 srv_general_init();
1256
1257 /* Initialize this module */
1258
1259 srv_init();
1260 srv_mon_create();
1261 }
1262
1263 /******************************************************************//**
1264 Refreshes the values used to calculate per-second averages. */
1265 static
1266 void
srv_refresh_innodb_monitor_stats(void)1267 srv_refresh_innodb_monitor_stats(void)
1268 /*==================================*/
1269 {
1270 mutex_enter(&srv_innodb_monitor_mutex);
1271
1272 srv_last_monitor_time = time(NULL);
1273
1274 os_aio_refresh_stats();
1275
1276 btr_cur_n_sea_old = btr_cur_n_sea;
1277 btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1278
1279 log_refresh_stats();
1280
1281 buf_refresh_io_stats_all();
1282
1283 srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
1284 srv_n_rows_updated_old = srv_stats.n_rows_updated;
1285 srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
1286 srv_n_rows_read_old = srv_stats.n_rows_read;
1287
1288 mutex_exit(&srv_innodb_monitor_mutex);
1289 }
1290
1291 /******************************************************************//**
1292 Outputs to a file the output of the InnoDB Monitor.
1293 @return FALSE if not all information printed
1294 due to failure to obtain necessary mutex */
1295 UNIV_INTERN
1296 ibool
srv_printf_innodb_monitor(FILE * file,ibool nowait,ulint * trx_start_pos,ulint * trx_end)1297 srv_printf_innodb_monitor(
1298 /*======================*/
1299 FILE* file, /*!< in: output stream */
1300 ibool nowait, /*!< in: whether to wait for the
1301 lock_sys_t:: mutex */
1302 ulint* trx_start_pos, /*!< out: file position of the start of
1303 the list of active transactions */
1304 ulint* trx_end) /*!< out: file position of the end of
1305 the list of active transactions */
1306 {
1307 double time_elapsed;
1308 time_t current_time;
1309 ulint n_reserved;
1310 ibool ret;
1311
1312 ulong btr_search_sys_constant;
1313 ulong btr_search_sys_variable;
1314 ulint lock_sys_subtotal;
1315 ulint recv_sys_subtotal;
1316
1317 ulint i;
1318 trx_t* trx;
1319
1320 mutex_enter(&srv_innodb_monitor_mutex);
1321
1322 current_time = time(NULL);
1323
1324 /* We add 0.001 seconds to time_elapsed to prevent division
1325 by zero if two users happen to call SHOW ENGINE INNODB STATUS at the
1326 same time */
1327
1328 time_elapsed = difftime(current_time, srv_last_monitor_time)
1329 + 0.001;
1330
1331 srv_last_monitor_time = time(NULL);
1332
1333 fputs("\n=====================================\n", file);
1334
1335 ut_print_timestamp(file);
1336 fprintf(file,
1337 " INNODB MONITOR OUTPUT\n"
1338 "=====================================\n"
1339 "Per second averages calculated from the last %lu seconds\n",
1340 (ulong) time_elapsed);
1341
1342 fputs("-----------------\n"
1343 "BACKGROUND THREAD\n"
1344 "-----------------\n", file);
1345 srv_print_master_thread_info(file);
1346
1347 fputs("----------\n"
1348 "SEMAPHORES\n"
1349 "----------\n", file);
1350 sync_print(file);
1351
1352 /* Conceptually, srv_innodb_monitor_mutex has a very high latching
1353 order level in sync0sync.h, while dict_foreign_err_mutex has a very
1354 low level 135. Therefore we can reserve the latter mutex here without
1355 a danger of a deadlock of threads. */
1356
1357 if (!recv_recovery_on) {
1358
1359 mutex_enter(&dict_foreign_err_mutex);
1360
1361 if (!srv_read_only_mode
1362 && ftell(dict_foreign_err_file) != 0L) {
1363 fputs("------------------------\n"
1364 "LATEST FOREIGN KEY ERROR\n"
1365 "------------------------\n", file);
1366 ut_copy_file(file, dict_foreign_err_file);
1367 }
1368
1369 mutex_exit(&dict_foreign_err_mutex);
1370 }
1371
1372 /* Only if lock_print_info_summary proceeds correctly,
1373 before we call the lock_print_info_all_transactions
1374 to print all the lock information. IMPORTANT NOTE: This
1375 function acquires the lock mutex on success. */
1376 ret = recv_recovery_on ? FALSE : lock_print_info_summary(file, nowait);
1377
1378 if (ret) {
1379 if (trx_start_pos) {
1380 long t = ftell(file);
1381 if (t < 0) {
1382 *trx_start_pos = ULINT_UNDEFINED;
1383 } else {
1384 *trx_start_pos = (ulint) t;
1385 }
1386 }
1387
1388 /* NOTE: If we get here then we have the lock mutex. This
1389 function will release the lock mutex that we acquired when
1390 we called the lock_print_info_summary() function earlier. */
1391
1392 lock_print_info_all_transactions(file);
1393
1394 if (trx_end) {
1395 long t = ftell(file);
1396 if (t < 0) {
1397 *trx_end = ULINT_UNDEFINED;
1398 } else {
1399 *trx_end = (ulint) t;
1400 }
1401 }
1402 }
1403
1404 fputs("--------\n"
1405 "FILE I/O\n"
1406 "--------\n", file);
1407 os_aio_print(file);
1408
1409 if (!recv_recovery_on) {
1410
1411 fputs("-------------------------------------\n"
1412 "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1413 "-------------------------------------\n", file);
1414 ibuf_print(file);
1415 }
1416
1417
1418 fprintf(file,
1419 "%.2f hash searches/s, %.2f non-hash searches/s\n",
1420 (btr_cur_n_sea - btr_cur_n_sea_old)
1421 / time_elapsed,
1422 (btr_cur_n_non_sea - btr_cur_n_non_sea_old)
1423 / time_elapsed);
1424 btr_cur_n_sea_old = btr_cur_n_sea;
1425 btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1426
1427 if (!recv_recovery_on) {
1428
1429 fputs("---\n"
1430 "LOG\n"
1431 "---\n", file);
1432 log_print(file);
1433 }
1434
1435 fputs("----------------------\n"
1436 "BUFFER POOL AND MEMORY\n"
1437 "----------------------\n", file);
1438 fprintf(file,
1439 "Total memory allocated " ULINTPF
1440 "; in additional pool allocated " ULINTPF "\n",
1441 ut_total_allocated_memory,
1442 mem_pool_get_reserved(mem_comm_pool));
1443
1444 fprintf(file,
1445 "Total memory allocated by read views " ULINTPF "\n",
1446 os_atomic_increment_lint(&srv_read_views_memory, 0));
1447
1448 /* Calculate AHI constant and variable memory allocations */
1449
1450 btr_search_sys_constant = 0;
1451 btr_search_sys_variable = 0;
1452
1453 ut_ad(btr_search_sys->hash_tables);
1454
1455 for (i = 0; i < btr_search_index_num; i++) {
1456 hash_table_t* ht = btr_search_sys->hash_tables[i];
1457
1458 ut_ad(ht);
1459 ut_ad(ht->heap);
1460
1461 /* Multiple mutexes/heaps are currently never used for adaptive
1462 hash index tables. */
1463 ut_ad(!ht->n_sync_obj);
1464 ut_ad(!ht->heaps);
1465
1466 btr_search_sys_variable += mem_heap_get_size(ht->heap);
1467 btr_search_sys_constant += ht->n_cells * sizeof(hash_cell_t);
1468 }
1469
1470 lock_sys_subtotal = 0;
1471 if (trx_sys) {
1472 mutex_enter(&trx_sys->mutex);
1473 trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
1474 while (trx) {
1475 lock_sys_subtotal
1476 += ((trx->lock.lock_heap)
1477 ? mem_heap_get_size(trx->lock.lock_heap)
1478 : 0);
1479 trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
1480 }
1481 mutex_exit(&trx_sys->mutex);
1482 }
1483
1484 recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash)
1485 ? mem_heap_get_size(recv_sys->heap) : 0);
1486
1487 fprintf(file,
1488 "Internal hash tables (constant factor + variable factor)\n"
1489 " Adaptive hash index %lu \t(%lu + " ULINTPF ")\n"
1490 " Page hash %lu (buffer pool 0 only)\n"
1491 " Dictionary cache %lu \t(%lu + " ULINTPF ")\n"
1492 " File system %lu \t(%lu + " ULINTPF ")\n"
1493 " Lock system %lu \t(%lu + " ULINTPF ")\n"
1494 " Recovery system %lu \t(%lu + " ULINTPF ")\n",
1495
1496 btr_search_sys_constant + btr_search_sys_variable,
1497 btr_search_sys_constant,
1498 btr_search_sys_variable,
1499
1500 (ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t)),
1501
1502 (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
1503 + dict_sys->table_id_hash->n_cells
1504 ) * sizeof(hash_cell_t)
1505 + dict_sys->size) : 0),
1506 (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
1507 + dict_sys->table_id_hash->n_cells
1508 ) * sizeof(hash_cell_t)) : 0),
1509 dict_sys ? (dict_sys->size) : 0,
1510
1511 (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)
1512 + fil_system_hash_nodes()),
1513 (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)),
1514 fil_system_hash_nodes(),
1515
1516 (ulong) ((lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0)
1517 + lock_sys_subtotal),
1518 (ulong) (lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0),
1519 lock_sys_subtotal,
1520
1521 (ulong) (((recv_sys && recv_sys->addr_hash)
1522 ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0)
1523 + recv_sys_subtotal),
1524 (ulong) ((recv_sys && recv_sys->addr_hash)
1525 ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0),
1526 recv_sys_subtotal);
1527
1528
1529 fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
1530 dict_sys ? dict_sys->size : 0);
1531
1532 buf_print_io(file);
1533
1534 fputs("--------------\n"
1535 "ROW OPERATIONS\n"
1536 "--------------\n", file);
1537 fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
1538 (long) srv_conc_get_active_threads(),
1539 srv_conc_get_waiting_threads());
1540
1541 mutex_enter(&trx_sys->mutex);
1542
1543 fprintf(file, "%lu read views open inside InnoDB\n",
1544 UT_LIST_GET_LEN(trx_sys->view_list));
1545
1546 fprintf(file, "%lu RW transactions active inside InnoDB\n",
1547 UT_LIST_GET_LEN(trx_sys->rw_trx_list));
1548
1549 fprintf(file, "%lu RO transactions active inside InnoDB\n",
1550 UT_LIST_GET_LEN(trx_sys->ro_trx_list));
1551
1552 fprintf(file, "%lu out of %lu descriptors used\n",
1553 trx_sys->descr_n_used, trx_sys->descr_n_max);
1554
1555 if (UT_LIST_GET_LEN(trx_sys->view_list)) {
1556 read_view_t* view = UT_LIST_GET_LAST(trx_sys->view_list);
1557
1558 if (view) {
1559 fprintf(file, "---OLDEST VIEW---\n");
1560 read_view_print(file, view);
1561 fprintf(file, "-----------------\n");
1562 }
1563 }
1564
1565 mutex_exit(&trx_sys->mutex);
1566
1567 n_reserved = fil_space_get_n_reserved_extents(0);
1568 if (n_reserved > 0) {
1569 fprintf(file,
1570 "%lu tablespace extents now reserved for"
1571 " B-tree split operations\n",
1572 (ulong) n_reserved);
1573 }
1574
1575 #ifdef UNIV_LINUX
1576 fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
1577 (ulong) srv_main_thread_process_no,
1578 (ulong) srv_main_thread_id,
1579 srv_main_thread_op_info);
1580 #else
1581 fprintf(file, "Main thread id %lu, state: %s\n",
1582 (ulong) srv_main_thread_id,
1583 srv_main_thread_op_info);
1584 #endif
1585 fprintf(file,
1586 "Number of rows inserted " ULINTPF
1587 ", updated " ULINTPF ", deleted " ULINTPF
1588 ", read " ULINTPF "\n",
1589 (ulint) srv_stats.n_rows_inserted,
1590 (ulint) srv_stats.n_rows_updated,
1591 (ulint) srv_stats.n_rows_deleted,
1592 (ulint) srv_stats.n_rows_read);
1593 fprintf(file,
1594 "%.2f inserts/s, %.2f updates/s,"
1595 " %.2f deletes/s, %.2f reads/s\n",
1596 ((ulint) srv_stats.n_rows_inserted - srv_n_rows_inserted_old)
1597 / time_elapsed,
1598 ((ulint) srv_stats.n_rows_updated - srv_n_rows_updated_old)
1599 / time_elapsed,
1600 ((ulint) srv_stats.n_rows_deleted - srv_n_rows_deleted_old)
1601 / time_elapsed,
1602 ((ulint) srv_stats.n_rows_read - srv_n_rows_read_old)
1603 / time_elapsed);
1604
1605 srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
1606 srv_n_rows_updated_old = srv_stats.n_rows_updated;
1607 srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
1608 srv_n_rows_read_old = srv_stats.n_rows_read;
1609
1610 fputs("----------------------------\n"
1611 "END OF INNODB MONITOR OUTPUT\n"
1612 "============================\n", file);
1613 mutex_exit(&srv_innodb_monitor_mutex);
1614 fflush(file);
1615
1616 #ifndef DBUG_OFF
1617 srv_debug_monitor_printed = true;
1618 #endif
1619
1620 return(ret);
1621 }
1622
1623 /******************************************************************//**
1624 Function to pass InnoDB status variables to MySQL */
1625 UNIV_INTERN
1626 void
srv_export_innodb_status(void)1627 srv_export_innodb_status(void)
1628 /*==========================*/
1629 {
1630 buf_pool_stat_t stat;
1631 buf_pools_list_size_t buf_pools_list_size;
1632 ulint LRU_len;
1633 ulint free_len;
1634 ulint flush_list_len;
1635 ulint mem_adaptive_hash, mem_dictionary;
1636 read_view_t* oldest_view;
1637 ulint i;
1638
1639 buf_get_total_stat(&stat);
1640 buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
1641 buf_get_total_list_size_in_bytes(&buf_pools_list_size);
1642
1643 mem_adaptive_hash = 0;
1644
1645 ut_ad(btr_search_sys->hash_tables);
1646
1647 for (i = 0; i < btr_search_index_num; i++) {
1648 hash_table_t* ht = btr_search_sys->hash_tables[i];
1649
1650 ut_ad(ht);
1651 ut_ad(ht->heap);
1652 /* Multiple mutexes/heaps are currently never used for adaptive
1653 hash index tables. */
1654 ut_ad(!ht->n_sync_obj);
1655 ut_ad(!ht->heaps);
1656
1657 mem_adaptive_hash += mem_heap_get_size(ht->heap);
1658 mem_adaptive_hash += ht->n_cells * sizeof(hash_cell_t);
1659 }
1660
1661 mem_dictionary = (dict_sys ? ((dict_sys->table_hash->n_cells
1662 + dict_sys->table_id_hash->n_cells
1663 ) * sizeof(hash_cell_t)
1664 + dict_sys->size) : 0);
1665
1666 mutex_enter(&srv_innodb_monitor_mutex);
1667
1668 export_vars.innodb_data_pending_reads =
1669 os_n_pending_reads;
1670
1671 export_vars.innodb_data_pending_writes =
1672 os_n_pending_writes;
1673
1674 export_vars.innodb_data_pending_fsyncs =
1675 fil_n_pending_log_flushes
1676 + fil_n_pending_tablespace_flushes;
1677 export_vars.innodb_adaptive_hash_hash_searches
1678 = btr_cur_n_sea;
1679 export_vars.innodb_adaptive_hash_non_hash_searches
1680 = btr_cur_n_non_sea;
1681 export_vars.innodb_background_log_sync
1682 = srv_log_writes_and_flush;
1683
1684 export_vars.innodb_data_fsyncs = os_n_fsyncs;
1685
1686 export_vars.innodb_data_read = srv_stats.data_read;
1687
1688 export_vars.innodb_data_reads = os_n_file_reads;
1689
1690 export_vars.innodb_data_writes = os_n_file_writes;
1691
1692 export_vars.innodb_data_written = srv_stats.data_written;
1693
1694 export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
1695
1696 export_vars.innodb_buffer_pool_write_requests =
1697 srv_stats.buf_pool_write_requests;
1698
1699 export_vars.innodb_buffer_pool_wait_free =
1700 srv_stats.buf_pool_wait_free;
1701
1702 export_vars.innodb_buffer_pool_pages_flushed =
1703 srv_stats.buf_pool_flushed;
1704
1705 export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads;
1706
1707 export_vars.innodb_buffer_pool_read_ahead_rnd =
1708 stat.n_ra_pages_read_rnd;
1709
1710 export_vars.innodb_buffer_pool_read_ahead =
1711 stat.n_ra_pages_read;
1712
1713 export_vars.innodb_buffer_pool_read_ahead_evicted =
1714 stat.n_ra_pages_evicted;
1715
1716 export_vars.innodb_buffer_pool_pages_LRU_flushed =
1717 stat.buf_lru_flush_page_count;
1718
1719 export_vars.innodb_buffer_pool_pages_data = LRU_len;
1720
1721 export_vars.innodb_buffer_pool_bytes_data =
1722 buf_pools_list_size.LRU_bytes
1723 + buf_pools_list_size.unzip_LRU_bytes;
1724
1725 export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
1726
1727 export_vars.innodb_buffer_pool_bytes_dirty =
1728 buf_pools_list_size.flush_list_bytes;
1729
1730 export_vars.innodb_buffer_pool_pages_free = free_len;
1731
1732 export_vars.innodb_deadlocks = srv_stats.lock_deadlock_count;
1733
1734 #ifdef UNIV_DEBUG
1735 export_vars.innodb_buffer_pool_pages_latched =
1736 buf_get_latched_pages_number();
1737 #endif /* UNIV_DEBUG */
1738 export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
1739
1740 export_vars.innodb_buffer_pool_pages_misc =
1741 buf_pool_get_n_pages() - LRU_len - free_len;
1742
1743 export_vars.innodb_buffer_pool_pages_made_young
1744 = stat.n_pages_made_young;
1745 export_vars.innodb_buffer_pool_pages_made_not_young
1746 = stat.n_pages_not_made_young;
1747 export_vars.innodb_buffer_pool_pages_old = 0;
1748 for (i = 0; i < srv_buf_pool_instances; i++) {
1749 buf_pool_t* buf_pool = buf_pool_from_array(i);
1750 export_vars.innodb_buffer_pool_pages_old
1751 += buf_pool->LRU_old_len;
1752 }
1753 export_vars.innodb_checkpoint_age
1754 = (log_sys->lsn - log_sys->last_checkpoint_lsn);
1755 export_vars.innodb_checkpoint_max_age
1756 = log_sys->max_checkpoint_age;
1757 export_vars.innodb_history_list_length
1758 = trx_sys->rseg_history_len;
1759 ibuf_export_ibuf_status(
1760 &export_vars.innodb_ibuf_size,
1761 &export_vars.innodb_ibuf_free_list,
1762 &export_vars.innodb_ibuf_segment_size,
1763 &export_vars.innodb_ibuf_merges,
1764 &export_vars.innodb_ibuf_merged_inserts,
1765 &export_vars.innodb_ibuf_merged_delete_marks,
1766 &export_vars.innodb_ibuf_merged_deletes,
1767 &export_vars.innodb_ibuf_discarded_inserts,
1768 &export_vars.innodb_ibuf_discarded_delete_marks,
1769 &export_vars.innodb_ibuf_discarded_deletes);
1770 export_vars.innodb_lsn_current
1771 = log_sys->lsn;
1772 export_vars.innodb_lsn_flushed
1773 = log_sys->flushed_to_disk_lsn;
1774 export_vars.innodb_lsn_last_checkpoint
1775 = log_sys->last_checkpoint_lsn;
1776 export_vars.innodb_master_thread_active_loops
1777 = srv_main_active_loops;
1778 export_vars.innodb_master_thread_idle_loops
1779 = srv_main_idle_loops;
1780 export_vars.innodb_max_trx_id
1781 = trx_sys->max_trx_id;
1782 export_vars.innodb_mem_adaptive_hash
1783 = mem_adaptive_hash;
1784 export_vars.innodb_mem_dictionary
1785 = mem_dictionary;
1786 export_vars.innodb_mem_total
1787 = ut_total_allocated_memory;
1788 export_vars.innodb_mutex_os_waits
1789 = mutex_os_wait_count;
1790 export_vars.innodb_mutex_spin_rounds
1791 = mutex_spin_round_count;
1792 export_vars.innodb_mutex_spin_waits
1793 = mutex_spin_wait_count;
1794 export_vars.innodb_s_lock_os_waits
1795 = rw_lock_stats.rw_s_os_wait_count;
1796 export_vars.innodb_s_lock_spin_rounds
1797 = rw_lock_stats.rw_s_spin_round_count;
1798 export_vars.innodb_s_lock_spin_waits
1799 = rw_lock_stats.rw_s_spin_wait_count;
1800 export_vars.innodb_x_lock_os_waits
1801 = rw_lock_stats.rw_x_os_wait_count;
1802 export_vars.innodb_x_lock_spin_rounds
1803 = rw_lock_stats.rw_x_spin_round_count;
1804 export_vars.innodb_x_lock_spin_waits
1805 = rw_lock_stats.rw_x_spin_wait_count;
1806
1807 oldest_view = UT_LIST_GET_LAST(trx_sys->view_list);
1808 export_vars.innodb_oldest_view_low_limit_trx_id
1809 = oldest_view ? oldest_view->low_limit_id : 0;
1810
1811 export_vars.innodb_purge_trx_id = purge_sys->limit.trx_no;
1812 export_vars.innodb_purge_undo_no = purge_sys->limit.undo_no;
1813 export_vars.innodb_current_row_locks
1814 = lock_sys->rec_num;
1815
1816 #ifdef HAVE_ATOMIC_BUILTINS
1817 export_vars.innodb_have_atomic_builtins = 1;
1818 #else
1819 export_vars.innodb_have_atomic_builtins = 0;
1820 #endif
1821 export_vars.innodb_page_size = UNIV_PAGE_SIZE;
1822
1823 export_vars.innodb_log_waits = srv_stats.log_waits;
1824
1825 export_vars.innodb_os_log_written = srv_stats.os_log_written;
1826
1827 export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
1828
1829 export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
1830
1831 export_vars.innodb_os_log_pending_writes =
1832 srv_stats.os_log_pending_writes;
1833
1834 export_vars.innodb_log_write_requests = srv_stats.log_write_requests;
1835
1836 export_vars.innodb_log_writes = srv_stats.log_writes;
1837
1838 export_vars.innodb_dblwr_pages_written =
1839 srv_stats.dblwr_pages_written;
1840
1841 export_vars.innodb_dblwr_writes = srv_stats.dblwr_writes;
1842
1843 export_vars.innodb_pages_created = stat.n_pages_created;
1844
1845 export_vars.innodb_pages_read = stat.n_pages_read;
1846
1847 export_vars.innodb_pages_written = stat.n_pages_written;
1848
1849 export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count;
1850
1851 export_vars.innodb_row_lock_current_waits =
1852 srv_stats.n_lock_wait_current_count;
1853
1854 export_vars.innodb_row_lock_time = srv_stats.n_lock_wait_time / 1000;
1855
1856 if (srv_stats.n_lock_wait_count > 0) {
1857
1858 export_vars.innodb_row_lock_time_avg = (ulint)
1859 (srv_stats.n_lock_wait_time
1860 / 1000 / srv_stats.n_lock_wait_count);
1861
1862 } else {
1863 export_vars.innodb_row_lock_time_avg = 0;
1864 }
1865
1866 export_vars.innodb_row_lock_time_max =
1867 lock_sys->n_lock_max_wait_time / 1000;
1868
1869 export_vars.innodb_rows_read = srv_stats.n_rows_read;
1870
1871 export_vars.innodb_rows_inserted = srv_stats.n_rows_inserted;
1872
1873 export_vars.innodb_rows_updated = srv_stats.n_rows_updated;
1874
1875 export_vars.innodb_rows_deleted = srv_stats.n_rows_deleted;
1876
1877 export_vars.innodb_num_open_files = fil_n_file_opened;
1878
1879 export_vars.innodb_truncated_status_writes =
1880 srv_truncated_status_writes;
1881
1882 export_vars.innodb_available_undo_logs = srv_available_undo_logs;
1883 export_vars.innodb_read_views_memory
1884 = os_atomic_increment_lint(&srv_read_views_memory, 0);
1885 export_vars.innodb_descriptors_memory
1886 = os_atomic_increment_lint(&srv_descriptors_memory, 0);
1887
1888 #ifdef UNIV_DEBUG
1889 rw_lock_s_lock(&purge_sys->latch);
1890 trx_id_t done_trx_no = purge_sys->done.trx_no;
1891 trx_id_t up_limit_id = purge_sys->view
1892 ? purge_sys->view->up_limit_id
1893 : 0;
1894 rw_lock_s_unlock(&purge_sys->latch);
1895
1896 mutex_enter(&trx_sys->mutex);
1897 trx_id_t max_trx_id = trx_sys->rw_max_trx_id;
1898 mutex_exit(&trx_sys->mutex);
1899
1900 if (!done_trx_no || max_trx_id < done_trx_no - 1) {
1901 export_vars.innodb_purge_trx_id_age = 0;
1902 } else {
1903 export_vars.innodb_purge_trx_id_age =
1904 (ulint) (max_trx_id - done_trx_no + 1);
1905 }
1906
1907 if (!up_limit_id
1908 || max_trx_id < up_limit_id) {
1909 export_vars.innodb_purge_view_trx_id_age = 0;
1910 } else {
1911 export_vars.innodb_purge_view_trx_id_age =
1912 (ulint) (max_trx_id - up_limit_id);
1913 }
1914 #endif /* UNIV_DEBUG */
1915
1916 os_rmb;
1917 export_vars.innodb_sec_rec_cluster_reads =
1918 srv_sec_rec_cluster_reads;
1919 export_vars.innodb_sec_rec_cluster_reads_avoided =
1920 srv_sec_rec_cluster_reads_avoided;
1921
1922 export_vars.innodb_buffered_aio_submitted =
1923 srv_stats.n_aio_submitted;
1924
1925 thd_get_fragmentation_stats(current_thd,
1926 &export_vars.innodb_fragmentation_stats);
1927
1928 mutex_exit(&srv_innodb_monitor_mutex);
1929 }
1930
1931 #ifndef DBUG_OFF
1932 /** false before InnoDB monitor has been printed at least once, true
1933 afterwards */
1934 bool srv_debug_monitor_printed = false;
1935 #endif
1936
1937 /*********************************************************************//**
1938 A thread which prints the info output by various InnoDB monitors.
1939 @return a dummy parameter */
1940 extern "C" UNIV_INTERN
1941 os_thread_ret_t
DECLARE_THREAD(srv_monitor_thread)1942 DECLARE_THREAD(srv_monitor_thread)(
1943 /*===============================*/
1944 void* arg MY_ATTRIBUTE((unused)))
1945 /*!< in: a dummy parameter required by
1946 os_thread_create */
1947 {
1948 ib_int64_t sig_count;
1949 double time_elapsed;
1950 time_t current_time;
1951 time_t last_table_monitor_time;
1952 time_t last_tablespace_monitor_time;
1953 time_t last_monitor_time;
1954 ulint mutex_skipped;
1955 ibool last_srv_print_monitor;
1956
1957 ut_ad(!srv_read_only_mode);
1958
1959 #ifdef UNIV_DEBUG_THREAD_CREATION
1960 fprintf(stderr, "Lock timeout thread starts, id %lu\n",
1961 os_thread_pf(os_thread_get_curr_id()));
1962 #endif /* UNIV_DEBUG_THREAD_CREATION */
1963
1964 #ifdef UNIV_PFS_THREAD
1965 pfs_register_thread(srv_monitor_thread_key);
1966 #endif /* UNIV_PFS_THREAD */
1967 srv_monitor_active = TRUE;
1968
1969 UT_NOT_USED(arg);
1970 srv_last_monitor_time = ut_time();
1971 last_table_monitor_time = ut_time();
1972 last_tablespace_monitor_time = ut_time();
1973 last_monitor_time = ut_time();
1974 mutex_skipped = 0;
1975 last_srv_print_monitor = srv_print_innodb_monitor;
1976 loop:
1977 /* Wake up every 5 seconds to see if we need to print
1978 monitor information or if signalled at shutdown. */
1979
1980 sig_count = os_event_reset(srv_monitor_event);
1981
1982 os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
1983
1984 current_time = ut_time();
1985
1986 time_elapsed = difftime(current_time, last_monitor_time);
1987
1988 if (time_elapsed > 15) {
1989 last_monitor_time = ut_time();
1990
1991 if (srv_print_innodb_monitor) {
1992 /* Reset mutex_skipped counter everytime
1993 srv_print_innodb_monitor changes. This is to
1994 ensure we will not be blocked by lock_sys->mutex
1995 for short duration information printing,
1996 such as requested by sync_array_print_long_waits() */
1997 if (!last_srv_print_monitor) {
1998 mutex_skipped = 0;
1999 last_srv_print_monitor = TRUE;
2000 }
2001
2002 if (!srv_printf_innodb_monitor(stderr,
2003 MUTEX_NOWAIT(mutex_skipped),
2004 NULL, NULL)) {
2005 mutex_skipped++;
2006 } else {
2007 /* Reset the counter */
2008 mutex_skipped = 0;
2009 }
2010 } else {
2011 last_srv_print_monitor = FALSE;
2012 }
2013
2014
2015 /* We don't create the temp files or associated
2016 mutexes in read-only-mode */
2017
2018 if (!srv_read_only_mode && srv_innodb_status) {
2019 mutex_enter(&srv_monitor_file_mutex);
2020 rewind(srv_monitor_file);
2021 if (!srv_printf_innodb_monitor(srv_monitor_file,
2022 MUTEX_NOWAIT(mutex_skipped),
2023 NULL, NULL)) {
2024 mutex_skipped++;
2025 } else {
2026 mutex_skipped = 0;
2027 }
2028
2029 os_file_set_eof(srv_monitor_file);
2030 mutex_exit(&srv_monitor_file_mutex);
2031 }
2032
2033 if (srv_print_innodb_tablespace_monitor
2034 && difftime(current_time,
2035 last_tablespace_monitor_time) > 60) {
2036 last_tablespace_monitor_time = ut_time();
2037
2038 fputs("========================"
2039 "========================\n",
2040 stderr);
2041
2042 ut_print_timestamp(stderr);
2043
2044 fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
2045 "========================"
2046 "========================\n",
2047 stderr);
2048
2049 fsp_print(0);
2050 fputs("Validating tablespace\n", stderr);
2051 fsp_validate(0);
2052 fputs("Validation ok\n"
2053 "---------------------------------------\n"
2054 "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
2055 "=======================================\n",
2056 stderr);
2057 }
2058
2059 if (srv_print_innodb_table_monitor
2060 && difftime(current_time, last_table_monitor_time) > 60) {
2061
2062 last_table_monitor_time = ut_time();
2063
2064 fprintf(stderr, "Warning: %s\n",
2065 DEPRECATED_MSG_INNODB_TABLE_MONITOR);
2066
2067 fputs("===========================================\n",
2068 stderr);
2069
2070 ut_print_timestamp(stderr);
2071
2072 fputs(" INNODB TABLE MONITOR OUTPUT\n"
2073 "===========================================\n",
2074 stderr);
2075 dict_print();
2076
2077 fputs("-----------------------------------\n"
2078 "END OF INNODB TABLE MONITOR OUTPUT\n"
2079 "==================================\n",
2080 stderr);
2081
2082 fprintf(stderr, "Warning: %s\n",
2083 DEPRECATED_MSG_INNODB_TABLE_MONITOR);
2084 }
2085 }
2086
2087 if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
2088 goto exit_func;
2089 }
2090
2091 if (srv_print_innodb_monitor
2092 || srv_print_innodb_lock_monitor
2093 || srv_print_innodb_tablespace_monitor
2094 || srv_print_innodb_table_monitor) {
2095 goto loop;
2096 }
2097
2098 goto loop;
2099
2100 exit_func:
2101 srv_monitor_active = FALSE;
2102
2103 /* We count the number of threads in os_thread_exit(). A created
2104 thread should always use that to exit and not use return() to exit. */
2105
2106 os_thread_exit(NULL);
2107
2108 OS_THREAD_DUMMY_RETURN;
2109 }
2110
2111 /*********************************************************************//**
2112 A thread which prints warnings about semaphore waits which have lasted
2113 too long. These can be used to track bugs which cause hangs.
2114 Note: In order to make sync_arr_wake_threads_if_sema_free work as expected,
2115 we should avoid waiting any mutexes in this function!
2116 @return a dummy parameter */
2117 extern "C" UNIV_INTERN
2118 os_thread_ret_t
DECLARE_THREAD(srv_error_monitor_thread)2119 DECLARE_THREAD(srv_error_monitor_thread)(
2120 /*=====================================*/
2121 void* arg MY_ATTRIBUTE((unused)))
2122 /*!< in: a dummy parameter required by
2123 os_thread_create */
2124 {
2125 /* number of successive fatal timeouts observed */
2126 ulint fatal_cnt = 0;
2127 lsn_t old_lsn;
2128 lsn_t new_lsn;
2129 ib_int64_t sig_count;
2130 /* longest waiting thread for a semaphore */
2131 os_thread_id_t waiter = os_thread_get_curr_id();
2132 os_thread_id_t old_waiter = waiter;
2133 /* the semaphore that is being waited for */
2134 const void* sema = NULL;
2135 const void* old_sema = NULL;
2136
2137 ut_ad(!srv_read_only_mode);
2138
2139 old_lsn = srv_start_lsn;
2140
2141 #ifdef UNIV_DEBUG_THREAD_CREATION
2142 fprintf(stderr, "Error monitor thread starts, id %lu\n",
2143 os_thread_pf(os_thread_get_curr_id()));
2144 #endif /* UNIV_DEBUG_THREAD_CREATION */
2145
2146 #ifdef UNIV_PFS_THREAD
2147 pfs_register_thread(srv_error_monitor_thread_key);
2148 #endif /* UNIV_PFS_THREAD */
2149 srv_error_monitor_active = TRUE;
2150
2151 loop:
2152 /* Try to track a strange bug reported by Harald Fuchs and others,
2153 where the lsn seems to decrease at times */
2154
2155 if (log_peek_lsn(&new_lsn)) {
2156 if (new_lsn < old_lsn) {
2157 ut_print_timestamp(stderr);
2158 fprintf(stderr,
2159 " InnoDB: Error: old log sequence number " LSN_PF
2160 " was greater\n"
2161 "InnoDB: than the new log sequence number " LSN_PF "!\n"
2162 "InnoDB: Please submit a bug report"
2163 " to http://bugs.mysql.com\n",
2164 old_lsn, new_lsn);
2165 ut_ad(0);
2166 }
2167
2168 old_lsn = new_lsn;
2169 }
2170
2171 if (difftime(time(NULL), srv_last_monitor_time) > 60) {
2172 /* We referesh InnoDB Monitor values so that averages are
2173 printed from at most 60 last seconds */
2174
2175 srv_refresh_innodb_monitor_stats();
2176 }
2177
2178 /* Update the statistics collected for deciding LRU
2179 eviction policy. */
2180 buf_LRU_stat_update();
2181
2182 /* In case mutex_exit is not a memory barrier, it is
2183 theoretically possible some threads are left waiting though
2184 the semaphore is already released. Wake up those threads: */
2185
2186 sync_arr_wake_threads_if_sema_free();
2187
2188 if (sync_array_print_long_waits(&waiter, &sema)
2189 && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
2190 fatal_cnt++;
2191 if (fatal_cnt > 10) {
2192
2193 fprintf(stderr,
2194 "InnoDB: Error: semaphore wait has lasted"
2195 " > %lu seconds\n"
2196 "InnoDB: We intentionally crash the server,"
2197 " because it appears to be hung.\n",
2198 (ulong) srv_fatal_semaphore_wait_threshold);
2199
2200 ut_error;
2201 }
2202 } else {
2203 fatal_cnt = 0;
2204 old_waiter = waiter;
2205 old_sema = sema;
2206 }
2207
2208 /* Flush stderr so that a database user gets the output
2209 to possible MySQL error file */
2210
2211 fflush(stderr);
2212
2213 sig_count = os_event_reset(srv_error_event);
2214
2215 os_event_wait_time_low(srv_error_event, 1000000, sig_count);
2216
2217 if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
2218
2219 goto loop;
2220 }
2221
2222 srv_error_monitor_active = FALSE;
2223
2224 /* We count the number of threads in os_thread_exit(). A created
2225 thread should always use that to exit and not use return() to exit. */
2226
2227 os_thread_exit(NULL);
2228
2229 OS_THREAD_DUMMY_RETURN;
2230 }
2231
2232 /******************************************************************//**
2233 Increment the server activity count. */
2234 UNIV_INTERN
2235 void
srv_inc_activity_count(bool ibuf_merge_activity)2236 srv_inc_activity_count(
2237 /*===================*/
2238 bool ibuf_merge_activity) /*!< whether this activity bump
2239 is caused by the background
2240 change buffer merge */
2241 {
2242 srv_sys->activity_count.inc();
2243 if (ibuf_merge_activity)
2244 srv_sys->ibuf_merge_activity_count.inc();
2245 }
2246
2247 /**********************************************************************//**
2248 Check whether any background thread is active. If so return the thread
2249 type.
2250 @return SRV_NONE if all are suspended or have exited, thread
2251 type if any are still active. */
2252 UNIV_INTERN
2253 srv_thread_type
srv_get_active_thread_type(void)2254 srv_get_active_thread_type(void)
2255 /*============================*/
2256 {
2257 srv_thread_type ret = SRV_NONE;
2258
2259 if (srv_read_only_mode) {
2260 return(SRV_NONE);
2261 }
2262
2263 srv_sys_mutex_enter();
2264
2265 for (ulint i = SRV_WORKER; i <= SRV_MASTER; ++i) {
2266 if (srv_sys->n_threads_active[i] != 0) {
2267 ret = static_cast<srv_thread_type>(i);
2268 break;
2269 }
2270 }
2271
2272 srv_sys_mutex_exit();
2273
2274 /* Check only on shutdown. */
2275 if (ret == SRV_NONE
2276 && srv_shutdown_state != SRV_SHUTDOWN_NONE
2277 && trx_purge_state() != PURGE_STATE_DISABLED
2278 && trx_purge_state() != PURGE_STATE_EXIT) {
2279
2280 ret = SRV_PURGE;
2281 }
2282
2283 return(ret);
2284 }
2285
2286 /**********************************************************************//**
2287 Check whether any background thread are active. If so print which thread
2288 is active. Send the threads wakeup signal.
2289 @return name of thread that is active or NULL */
2290 UNIV_INTERN
2291 const char*
srv_any_background_threads_are_active(void)2292 srv_any_background_threads_are_active(void)
2293 /*=======================================*/
2294 {
2295 const char* thread_active = NULL;
2296
2297 if (srv_read_only_mode) {
2298 return(NULL);
2299 } else if (srv_error_monitor_active) {
2300 thread_active = "srv_error_monitor_thread";
2301 } else if (lock_sys->timeout_thread_active) {
2302 thread_active = "srv_lock_timeout thread";
2303 } else if (srv_monitor_active) {
2304 thread_active = "srv_monitor_thread";
2305 } else if (srv_buf_dump_thread_active) {
2306 thread_active = "buf_dump_thread";
2307 } else if (srv_dict_stats_thread_active) {
2308 thread_active = "dict_stats_thread";
2309 }
2310
2311 os_event_set(srv_error_event);
2312 os_event_set(srv_monitor_event);
2313 os_event_set(srv_buf_dump_event);
2314 os_event_set(lock_sys->timeout_event);
2315 os_event_set(dict_stats_event);
2316
2317 return(thread_active);
2318 }
2319
2320 /******************************************************************//**
2321 A thread which follows the redo log and outputs the changed page bitmap.
2322 @return a dummy value */
2323 extern "C" UNIV_INTERN
2324 os_thread_ret_t
DECLARE_THREAD(srv_redo_log_follow_thread)2325 DECLARE_THREAD(srv_redo_log_follow_thread)(
2326 /*=======================================*/
2327 void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter
2328 required by
2329 os_thread_create */
2330 {
2331 ut_ad(!srv_read_only_mode);
2332
2333 #ifdef UNIV_DEBUG_THREAD_CREATION
2334 fprintf(stderr, "Redo log follower thread starts, id %lu\n",
2335 os_thread_pf(os_thread_get_curr_id()));
2336 #endif
2337
2338 #ifdef UNIV_PFS_THREAD
2339 pfs_register_thread(srv_log_tracking_thread_key);
2340 #endif
2341
2342 my_thread_init();
2343 srv_redo_log_thread_started = true;
2344
2345 do {
2346 os_event_wait(srv_checkpoint_completed_event);
2347 os_event_reset(srv_checkpoint_completed_event);
2348
2349 if (srv_track_changed_pages
2350 && srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
2351 if (!log_online_follow_redo_log()) {
2352 /* TODO: sync with I_S log tracking status? */
2353 ib_logf(IB_LOG_LEVEL_ERROR,
2354 "log tracking bitmap write failed, "
2355 "stopping log tracking thread!");
2356 break;
2357 }
2358 os_event_set(srv_redo_log_tracked_event);
2359 }
2360
2361 } while (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE);
2362
2363 log_online_read_shutdown();
2364 os_event_set(srv_redo_log_tracked_event);
2365
2366 my_thread_end();
2367 os_thread_exit(NULL);
2368
2369 OS_THREAD_DUMMY_RETURN;
2370 }
2371
2372 /*************************************************************//**
2373 Removes old archived transaction log files.
2374 Both parameters couldn't be provided at the same time */
2375 dberr_t
purge_archived_logs(time_t before_date,lsn_t before_no)2376 purge_archived_logs(
2377 time_t before_date, /*!< in: all files modified
2378 before timestamp should be removed */
2379 lsn_t before_no) /*!< in: files with this number in name
2380 and earler should be removed */
2381 {
2382 log_group_t* group = UT_LIST_GET_FIRST(log_sys->log_groups);
2383
2384 os_file_dir_t dir;
2385 os_file_stat_t fileinfo;
2386 char archived_log_filename[OS_FILE_MAX_PATH];
2387 char namegen[OS_FILE_MAX_PATH];
2388 ulint dirnamelen;
2389
2390 if (srv_arch_dir) {
2391 dir = os_file_opendir(srv_arch_dir, FALSE);
2392 if (!dir) {
2393 ib_logf(IB_LOG_LEVEL_WARN,
2394 "opening archived log directory %s failed. "
2395 "Purge archived logs are not available",
2396 srv_arch_dir);
2397 /* failed to open directory */
2398 return(DB_ERROR);
2399 }
2400 } else {
2401 /* log archive directory is not specified */
2402 return(DB_ERROR);
2403 }
2404
2405 dirnamelen = strlen(srv_arch_dir);
2406
2407 memcpy(archived_log_filename, srv_arch_dir, dirnamelen);
2408 if (dirnamelen &&
2409 archived_log_filename[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
2410 archived_log_filename[dirnamelen++] = SRV_PATH_SEPARATOR;
2411 }
2412
2413 memset(&fileinfo, 0, sizeof(fileinfo));
2414 while(!os_file_readdir_next_file(srv_arch_dir, dir,
2415 &fileinfo) ) {
2416 if (strncmp(fileinfo.name,
2417 IB_ARCHIVED_LOGS_PREFIX, IB_ARCHIVED_LOGS_PREFIX_LEN)) {
2418 continue;
2419 }
2420 if (dirnamelen + strlen(fileinfo.name) + 2 > OS_FILE_MAX_PATH)
2421 continue;
2422
2423 snprintf(archived_log_filename + dirnamelen, OS_FILE_MAX_PATH - dirnamelen,
2424 "%s", fileinfo.name);
2425
2426 if (before_no) {
2427 ib_uint64_t log_file_no = strtoull(fileinfo.name +
2428 IB_ARCHIVED_LOGS_PREFIX_LEN,
2429 NULL, 10);
2430 if (log_file_no == 0 || before_no <= log_file_no) {
2431 continue;
2432 }
2433 } else {
2434 fileinfo.mtime = 0;
2435 if (os_file_get_status(archived_log_filename,
2436 &fileinfo, false) != DB_SUCCESS ||
2437 fileinfo.mtime == 0) {
2438 continue;
2439 }
2440
2441 if (before_date == 0 || fileinfo.mtime > before_date) {
2442 continue;
2443 }
2444 }
2445
2446 /* We are going to delete archived file. Acquire log_sys->mutex
2447 to make sure that we are the only who try to delete file. This
2448 also prevents log system from using this file. Do not delete
2449 file if it is currently in progress of writting or have
2450 pending IO. This is enforced by checking:
2451 1. fil_space_contains_node.
2452 2. group->archived_offset % group->file_size != 0, i.e.
2453 there is archive in progress and we are going to delete it.
2454 This covers 3 cases:
2455 a. Usual case when we have one archive in progress,
2456 both 1 and 2 are TRUE
2457 b. When we have more then 1 archive in fil_space,
2458 this can happen when flushed LSN range crosses file
2459 boundary
2460 c. When we have empty fil_space, but existing file will be
2461 opened once archiving operation is requested. This usually
2462 happens on startup.
2463 */
2464
2465 mutex_enter(&log_sys->mutex);
2466
2467 log_archived_file_name_gen(namegen, sizeof(namegen),
2468 group->id, group->archived_file_no);
2469
2470 if (fil_space_contains_node(group->archive_space_id,
2471 archived_log_filename) ||
2472 (group->archived_offset % group->file_size != 0 &&
2473 strcmp(namegen, archived_log_filename) == 0)) {
2474
2475 mutex_exit(&log_sys->mutex);
2476 continue;
2477 }
2478
2479 if (!os_file_delete_if_exists(innodb_file_data_key,
2480 archived_log_filename)) {
2481
2482 ib_logf(IB_LOG_LEVEL_WARN,
2483 "can't delete archived log file %s.",
2484 archived_log_filename);
2485
2486 mutex_exit(&log_sys->mutex);
2487 os_file_closedir(dir);
2488
2489 return(DB_ERROR);
2490 }
2491
2492 mutex_exit(&log_sys->mutex);
2493 }
2494
2495 os_file_closedir(dir);
2496
2497 return(DB_SUCCESS);
2498 }
2499
2500 /*******************************************************************//**
2501 Tells the InnoDB server that there has been activity in the database
2502 and wakes up the master thread if it is suspended (not sleeping). Used
2503 in the MySQL interface. Note that there is a small chance that the master
2504 thread stays suspended (we do not protect our operation with the
2505 srv_sys_t->mutex, for performance reasons). */
2506 UNIV_INTERN
2507 void
srv_active_wake_master_thread(void)2508 srv_active_wake_master_thread(void)
2509 /*===============================*/
2510 {
2511 if (srv_read_only_mode) {
2512 return;
2513 }
2514
2515 ut_ad(!srv_sys_mutex_own());
2516
2517 srv_inc_activity_count();
2518
2519 if (srv_sys->n_threads_active[SRV_MASTER] == 0) {
2520 srv_slot_t* slot;
2521
2522 srv_sys_mutex_enter();
2523
2524 slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
2525
2526 /* Only if the master thread has been started. */
2527
2528 if (slot->in_use) {
2529 ut_a(srv_slot_get_type(slot) == SRV_MASTER);
2530
2531 if (slot->suspended) {
2532
2533 slot->suspended = FALSE;
2534
2535 ++srv_sys->n_threads_active[SRV_MASTER];
2536
2537 os_event_set(slot->event);
2538 }
2539 }
2540
2541 srv_sys_mutex_exit();
2542 }
2543 }
2544
2545 /*******************************************************************//**
2546 Tells the purge thread that there has been activity in the database
2547 and wakes up the purge thread if it is suspended (not sleeping). Note
2548 that there is a small chance that the purge thread stays suspended
2549 (we do not protect our check with the srv_sys_t:mutex and the
2550 purge_sys->latch, for performance reasons). */
2551 UNIV_INTERN
2552 void
srv_wake_purge_thread_if_not_active(void)2553 srv_wake_purge_thread_if_not_active(void)
2554 /*=====================================*/
2555 {
2556 ut_ad(!srv_sys_mutex_own());
2557
2558 if (purge_sys->state == PURGE_STATE_RUN
2559 && srv_sys->n_threads_active[SRV_PURGE] == 0) {
2560
2561 srv_release_threads(SRV_PURGE, 1);
2562 }
2563 }
2564
2565 /*******************************************************************//**
2566 Wakes up the master thread if it is suspended or being suspended. */
2567 UNIV_INTERN
2568 void
srv_wake_master_thread(void)2569 srv_wake_master_thread(void)
2570 /*========================*/
2571 {
2572 ut_ad(!srv_sys_mutex_own());
2573
2574 srv_inc_activity_count();
2575
2576 srv_release_threads(SRV_MASTER, 1);
2577 }
2578
2579 /*******************************************************************//**
2580 Get current server activity count. We don't hold srv_sys::mutex while
2581 reading this value as it is only used in heuristics.
2582 @return activity count. */
2583 UNIV_INTERN
2584 ulint
srv_get_activity_count(void)2585 srv_get_activity_count(void)
2586 /*========================*/
2587 {
2588 return(srv_sys->activity_count);
2589 }
2590
2591 /** Get current server ibuf merge activity count.
2592 @return ibuf merge activity count */
2593 static
2594 ulint
srv_get_ibuf_merge_activity_count(void)2595 srv_get_ibuf_merge_activity_count(void)
2596 {
2597 return(srv_sys->ibuf_merge_activity_count);
2598 }
2599
2600 /*******************************************************************//**
2601 Check if there has been any activity. Considers background change buffer
2602 merge as regular server activity unless a non-default
2603 old_ibuf_merge_activity_count value is passed, in which case the merge will be
2604 treated as keeping server idle.
2605 @return FALSE if no change in activity counter. */
2606 UNIV_INTERN
2607 ibool
srv_check_activity(ulint old_activity_count,ulint old_ibuf_merge_activity_count)2608 srv_check_activity(
2609 /*===============*/
2610 ulint old_activity_count, /*!< in: old activity count */
2611 /*!< old change buffer merge
2612 activity count, or
2613 ULINT_UNDEFINED */
2614 ulint old_ibuf_merge_activity_count)
2615 {
2616 ulint new_activity_count = srv_sys->activity_count;
2617 if (old_ibuf_merge_activity_count == ULINT_UNDEFINED)
2618 return(new_activity_count != old_activity_count);
2619
2620 /* If we care about ibuf merge activity, then the server is considered
2621 idle if all activity, if any, was due to ibuf merge. */
2622 ulint new_ibuf_merge_activity_count
2623 = srv_sys->ibuf_merge_activity_count;
2624
2625 ut_ad(new_ibuf_merge_activity_count <= new_activity_count);
2626 ut_ad(new_ibuf_merge_activity_count >= old_ibuf_merge_activity_count);
2627 ut_ad(new_activity_count >= old_activity_count);
2628
2629 ulint ibuf_merge_activity_delta =
2630 new_ibuf_merge_activity_count - old_ibuf_merge_activity_count;
2631 ulint activity_delta = new_activity_count - old_activity_count;
2632
2633 return (activity_delta > ibuf_merge_activity_delta);
2634 }
2635
2636 /********************************************************************//**
2637 The master thread is tasked to ensure that flush of log file happens
2638 once every second in the background. This is to ensure that not more
2639 than one second of trxs are lost in case of crash when
2640 innodb_flush_logs_at_trx_commit != 1 */
2641 static
2642 void
srv_sync_log_buffer_in_background(void)2643 srv_sync_log_buffer_in_background(void)
2644 /*===================================*/
2645 {
2646 time_t current_time = time(NULL);
2647
2648 srv_main_thread_op_info = "flushing log";
2649 if (difftime(current_time, srv_last_log_flush_time)
2650 >= srv_flush_log_at_timeout) {
2651 log_buffer_sync_in_background(TRUE);
2652 srv_last_log_flush_time = current_time;
2653 srv_log_writes_and_flush++;
2654 }
2655 }
2656
2657 /********************************************************************//**
2658 Make room in the table cache by evicting an unused table.
2659 @return number of tables evicted. */
2660 static
2661 ulint
srv_master_evict_from_table_cache(ulint pct_check)2662 srv_master_evict_from_table_cache(
2663 /*==============================*/
2664 ulint pct_check) /*!< in: max percent to check */
2665 {
2666 ulint n_tables_evicted = 0;
2667
2668 rw_lock_x_lock(&dict_operation_lock);
2669
2670 dict_mutex_enter_for_mysql();
2671
2672 n_tables_evicted = dict_make_room_in_cache(
2673 innobase_get_table_cache_size(), pct_check);
2674
2675 dict_mutex_exit_for_mysql();
2676
2677 rw_lock_x_unlock(&dict_operation_lock);
2678
2679 return(n_tables_evicted);
2680 }
2681
2682 /*********************************************************************//**
2683 This function prints progress message every 60 seconds during server
2684 shutdown, for any activities that master thread is pending on. */
2685 static
2686 void
srv_shutdown_print_master_pending(ib_time_t * last_print_time,ulint n_tables_to_drop,ulint n_bytes_merged)2687 srv_shutdown_print_master_pending(
2688 /*==============================*/
2689 ib_time_t* last_print_time, /*!< last time the function
2690 print the message */
2691 ulint n_tables_to_drop, /*!< number of tables to
2692 be dropped */
2693 ulint n_bytes_merged) /*!< number of change buffer
2694 just merged */
2695 {
2696 ib_time_t current_time;
2697 double time_elapsed;
2698
2699 current_time = ut_time();
2700 time_elapsed = ut_difftime(current_time, *last_print_time);
2701
2702 if (time_elapsed > 60) {
2703 *last_print_time = ut_time();
2704
2705 if (n_tables_to_drop) {
2706 ut_print_timestamp(stderr);
2707 fprintf(stderr, " InnoDB: Waiting for "
2708 "%lu table(s) to be dropped\n",
2709 (ulong) n_tables_to_drop);
2710 }
2711
2712 /* Check change buffer merge, we only wait for change buffer
2713 merge if it is a slow shutdown */
2714 if (!srv_fast_shutdown && n_bytes_merged) {
2715 ut_print_timestamp(stderr);
2716 fprintf(stderr, " InnoDB: Waiting for change "
2717 "buffer merge to complete\n"
2718 " InnoDB: number of bytes of change buffer "
2719 "just merged: %lu\n",
2720 n_bytes_merged);
2721 }
2722 }
2723 }
2724
2725 /*********************************************************************//**
2726 Perform the tasks that the master thread is supposed to do when the
2727 server is active. There are two types of tasks. The first category is
2728 of such tasks which are performed at each inovcation of this function.
2729 We assume that this function is called roughly every second when the
2730 server is active. The second category is of such tasks which are
2731 performed at some interval e.g.: purge, dict_LRU cleanup etc. */
2732 static
2733 void
srv_master_do_active_tasks(void)2734 srv_master_do_active_tasks(void)
2735 /*============================*/
2736 {
2737 ib_time_t cur_time = ut_time();
2738 ullint counter_time = ut_time_us(NULL);
2739
2740 /* First do the tasks that we are suppose to do at each
2741 invocation of this function. */
2742
2743 ++srv_main_active_loops;
2744
2745 MONITOR_INC(MONITOR_MASTER_ACTIVE_LOOPS);
2746
2747 /* ALTER TABLE in MySQL requires on Unix that the table handler
2748 can drop tables lazily after there no longer are SELECT
2749 queries to them. */
2750 srv_main_thread_op_info = "doing background drop tables";
2751 row_drop_tables_for_mysql_in_background();
2752 MONITOR_INC_TIME_IN_MICRO_SECS(
2753 MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time);
2754
2755 if (srv_shutdown_state > 0) {
2756 return;
2757 }
2758
2759 /* make sure that there is enough reusable space in the redo
2760 log files */
2761 srv_main_thread_op_info = "checking free log space";
2762 log_free_check();
2763
2764 /* Do an ibuf merge */
2765 srv_main_thread_op_info = "doing insert buffer merge";
2766 counter_time = ut_time_us(NULL);
2767 ibuf_merge_in_background(false);
2768 MONITOR_INC_TIME_IN_MICRO_SECS(
2769 MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
2770
2771 /* Flush logs if needed */
2772 srv_main_thread_op_info = "flushing log";
2773 srv_sync_log_buffer_in_background();
2774 MONITOR_INC_TIME_IN_MICRO_SECS(
2775 MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
2776
2777 /* Now see if various tasks that are performed at defined
2778 intervals need to be performed. */
2779
2780 #ifdef MEM_PERIODIC_CHECK
2781 /* Check magic numbers of every allocated mem block once in
2782 SRV_MASTER_MEM_VALIDATE_INTERVAL seconds */
2783 if (cur_time % SRV_MASTER_MEM_VALIDATE_INTERVAL == 0) {
2784 mem_validate_all_blocks();
2785 MONITOR_INC_TIME_IN_MICRO_SECS(
2786 MONITOR_SRV_MEM_VALIDATE_MICROSECOND, counter_time);
2787 }
2788 #endif
2789 if (srv_shutdown_state > 0) {
2790 return;
2791 }
2792
2793 if (srv_shutdown_state > 0) {
2794 return;
2795 }
2796
2797 if (cur_time % SRV_MASTER_DICT_LRU_INTERVAL == 0) {
2798 srv_main_thread_op_info = "enforcing dict cache limit";
2799 srv_master_evict_from_table_cache(50);
2800 MONITOR_INC_TIME_IN_MICRO_SECS(
2801 MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
2802 }
2803
2804 if (srv_shutdown_state > 0) {
2805 return;
2806 }
2807
2808 /* Make a new checkpoint */
2809 if (cur_time % SRV_MASTER_CHECKPOINT_INTERVAL == 0) {
2810 srv_main_thread_op_info = "making checkpoint";
2811 log_checkpoint(TRUE, FALSE);
2812 MONITOR_INC_TIME_IN_MICRO_SECS(
2813 MONITOR_SRV_CHECKPOINT_MICROSECOND, counter_time);
2814 }
2815 }
2816
2817 /*********************************************************************//**
2818 Perform the tasks that the master thread is supposed to do whenever the
2819 server is idle. We do check for the server state during this function
2820 and if the server has entered the shutdown phase we may return from
2821 the function without completing the required tasks.
2822 Note that the server can move to active state when we are executing this
2823 function but we don't check for that as we are suppose to perform more
2824 or less same tasks when server is active. */
2825 static
2826 void
srv_master_do_idle_tasks(void)2827 srv_master_do_idle_tasks(void)
2828 /*==========================*/
2829 {
2830 ullint counter_time;
2831
2832 ++srv_main_idle_loops;
2833
2834 MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS);
2835
2836
2837 /* ALTER TABLE in MySQL requires on Unix that the table handler
2838 can drop tables lazily after there no longer are SELECT
2839 queries to them. */
2840 counter_time = ut_time_us(NULL);
2841 srv_main_thread_op_info = "doing background drop tables";
2842 row_drop_tables_for_mysql_in_background();
2843 MONITOR_INC_TIME_IN_MICRO_SECS(
2844 MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
2845 counter_time);
2846
2847 if (srv_shutdown_state > 0) {
2848 return;
2849 }
2850
2851 /* make sure that there is enough reusable space in the redo
2852 log files */
2853 srv_main_thread_op_info = "checking free log space";
2854 log_free_check();
2855
2856 /* Do an ibuf merge */
2857 counter_time = ut_time_us(NULL);
2858 srv_main_thread_op_info = "doing insert buffer merge";
2859 ibuf_merge_in_background(true);
2860 MONITOR_INC_TIME_IN_MICRO_SECS(
2861 MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
2862
2863 if (srv_shutdown_state > 0) {
2864 return;
2865 }
2866
2867 srv_main_thread_op_info = "enforcing dict cache limit";
2868 srv_master_evict_from_table_cache(100);
2869 MONITOR_INC_TIME_IN_MICRO_SECS(
2870 MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
2871
2872 /* Flush logs if needed */
2873 srv_sync_log_buffer_in_background();
2874 MONITOR_INC_TIME_IN_MICRO_SECS(
2875 MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
2876
2877 if (srv_shutdown_state > 0) {
2878 return;
2879 }
2880
2881 /* Make a new checkpoint */
2882 srv_main_thread_op_info = "making checkpoint";
2883 log_checkpoint(TRUE, FALSE);
2884 MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_CHECKPOINT_MICROSECOND,
2885 counter_time);
2886
2887 if (srv_shutdown_state > 0) {
2888 return;
2889 }
2890
2891 if (srv_log_arch_expire_sec) {
2892 srv_main_thread_op_info = "purging archived logs";
2893 purge_archived_logs(ut_time() - srv_log_arch_expire_sec,
2894 0);
2895 }
2896 }
2897
2898 /*********************************************************************//**
2899 Perform the tasks during shutdown. The tasks that we do at shutdown
2900 depend on srv_fast_shutdown:
2901 2 => very fast shutdown => do no book keeping
2902 1 => normal shutdown => clear drop table queue and make checkpoint
2903 0 => slow shutdown => in addition to above do complete purge and ibuf
2904 merge
2905 @return TRUE if some work was done. FALSE otherwise */
2906 static
2907 ibool
srv_master_do_shutdown_tasks(ib_time_t * last_print_time)2908 srv_master_do_shutdown_tasks(
2909 /*=========================*/
2910 ib_time_t* last_print_time)/*!< last time the function
2911 print the message */
2912 {
2913 ulint n_bytes_merged = 0;
2914 ulint n_tables_to_drop = 0;
2915
2916 ut_ad(!srv_read_only_mode);
2917
2918 ++srv_main_shutdown_loops;
2919
2920 ut_a(srv_shutdown_state > 0);
2921
2922 /* In very fast shutdown none of the following is necessary */
2923 if (srv_fast_shutdown == 2) {
2924 return(FALSE);
2925 }
2926
2927 /* ALTER TABLE in MySQL requires on Unix that the table handler
2928 can drop tables lazily after there no longer are SELECT
2929 queries to them. */
2930 srv_main_thread_op_info = "doing background drop tables";
2931 n_tables_to_drop = row_drop_tables_for_mysql_in_background();
2932
2933 /* make sure that there is enough reusable space in the redo
2934 log files */
2935 srv_main_thread_op_info = "checking free log space";
2936 log_free_check();
2937
2938 /* In case of normal shutdown we don't do ibuf merge or purge */
2939 if (srv_fast_shutdown == 1) {
2940 goto func_exit;
2941 }
2942
2943 /* Do an ibuf merge */
2944 srv_main_thread_op_info = "doing insert buffer merge";
2945 n_bytes_merged = ibuf_merge_in_background(true);
2946
2947 /* Flush logs if needed */
2948 srv_sync_log_buffer_in_background();
2949
2950 func_exit:
2951 /* Make a new checkpoint about once in 10 seconds */
2952 srv_main_thread_op_info = "making checkpoint";
2953 log_checkpoint(TRUE, FALSE);
2954
2955 /* Print progress message every 60 seconds during shutdown */
2956 if (srv_shutdown_state > 0 && srv_print_verbose_log) {
2957 srv_shutdown_print_master_pending(
2958 last_print_time, n_tables_to_drop, n_bytes_merged);
2959 }
2960
2961 return(n_bytes_merged || n_tables_to_drop);
2962 }
2963
2964 /*********************************************************************//**
2965 Puts master thread to sleep. At this point we are using polling to
2966 service various activities. Master thread sleeps for one second before
2967 checking the state of the server again */
2968 static
2969 void
srv_master_sleep(void)2970 srv_master_sleep(void)
2971 /*==================*/
2972 {
2973 srv_main_thread_op_info = "sleeping";
2974 os_thread_sleep(1000000);
2975 srv_main_thread_op_info = "";
2976 }
2977
2978 /*********************************************************************//**
2979 The master thread controlling the server.
2980 @return a dummy parameter */
2981 extern "C" UNIV_INTERN
2982 os_thread_ret_t
DECLARE_THREAD(srv_master_thread)2983 DECLARE_THREAD(srv_master_thread)(
2984 /*==============================*/
2985 void* arg MY_ATTRIBUTE((unused)))
2986 /*!< in: a dummy parameter required by
2987 os_thread_create */
2988 {
2989 my_thread_init();
2990
2991 srv_slot_t* slot;
2992 ulint old_activity_count = srv_get_activity_count();
2993 ulint old_ibuf_merge_activity_count
2994 = srv_get_ibuf_merge_activity_count();
2995 ib_time_t last_print_time;
2996
2997 ut_ad(!srv_read_only_mode);
2998
2999 srv_master_tid = os_thread_get_tid();
3000
3001 os_thread_set_priority(srv_master_tid, srv_sched_priority_master);
3002
3003 #ifdef UNIV_DEBUG_THREAD_CREATION
3004 fprintf(stderr, "Master thread starts, id %lu\n",
3005 os_thread_pf(os_thread_get_curr_id()));
3006 #endif /* UNIV_DEBUG_THREAD_CREATION */
3007
3008 #ifdef UNIV_PFS_THREAD
3009 pfs_register_thread(srv_master_thread_key);
3010 #endif /* UNIV_PFS_THREAD */
3011
3012 srv_main_thread_process_no = os_proc_get_number();
3013 srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
3014
3015 slot = srv_reserve_slot(SRV_MASTER);
3016 ut_a(slot == srv_sys->sys_threads);
3017
3018 last_print_time = ut_time();
3019 loop:
3020 if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
3021 goto suspend_thread;
3022 }
3023
3024 while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
3025
3026 srv_master_sleep();
3027
3028 MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP);
3029
3030 srv_current_thread_priority = srv_master_thread_priority;
3031
3032 if (srv_check_activity(old_activity_count,
3033 old_ibuf_merge_activity_count)) {
3034
3035 old_activity_count = srv_get_activity_count();
3036 old_ibuf_merge_activity_count
3037 = srv_get_ibuf_merge_activity_count();
3038 srv_master_do_active_tasks();
3039 } else {
3040 srv_master_do_idle_tasks();
3041 }
3042 }
3043
3044 while (srv_master_do_shutdown_tasks(&last_print_time)) {
3045
3046 /* Shouldn't loop here in case of very fast shutdown */
3047 ut_ad(srv_fast_shutdown < 2);
3048 }
3049
3050 suspend_thread:
3051 srv_main_thread_op_info = "suspending";
3052
3053 srv_suspend_thread(slot);
3054
3055 /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
3056 waits for database activity to die down when converting < 4.1.x
3057 databases, and relies on this string being exactly as it is. InnoDB
3058 manual also mentions this string in several places. */
3059 srv_main_thread_op_info = "waiting for server activity";
3060
3061 os_event_wait(slot->event);
3062
3063 if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
3064 my_thread_end();
3065 os_thread_exit(NULL);
3066 }
3067
3068 goto loop;
3069
3070 OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
3071 }
3072
3073 /*********************************************************************//**
3074 Check if purge should stop.
3075 @return true if it should shutdown. */
3076 static
3077 bool
srv_purge_should_exit(ulint n_purged)3078 srv_purge_should_exit(
3079 /*==============*/
3080 ulint n_purged) /*!< in: pages purged in last batch */
3081 {
3082 switch (srv_shutdown_state) {
3083 case SRV_SHUTDOWN_NONE:
3084 /* Normal operation. */
3085 break;
3086
3087 case SRV_SHUTDOWN_CLEANUP:
3088 case SRV_SHUTDOWN_EXIT_THREADS:
3089 /* Exit unless slow shutdown requested or all done. */
3090 return(srv_fast_shutdown != 0 || n_purged == 0);
3091
3092 case SRV_SHUTDOWN_LAST_PHASE:
3093 case SRV_SHUTDOWN_FLUSH_PHASE:
3094 ut_error;
3095 }
3096
3097 return(false);
3098 }
3099
3100 /*********************************************************************//**
3101 Fetch and execute a task from the work queue.
3102 @return true if a task was executed */
3103 static
3104 bool
srv_task_execute(void)3105 srv_task_execute(void)
3106 /*==================*/
3107 {
3108 que_thr_t* thr = NULL;
3109
3110 ut_ad(!srv_read_only_mode);
3111 ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
3112
3113 mutex_enter(&srv_sys->tasks_mutex);
3114
3115 if (UT_LIST_GET_LEN(srv_sys->tasks) > 0) {
3116
3117 thr = UT_LIST_GET_FIRST(srv_sys->tasks);
3118
3119 ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE);
3120
3121 UT_LIST_REMOVE(queue, srv_sys->tasks, thr);
3122 }
3123
3124 mutex_exit(&srv_sys->tasks_mutex);
3125
3126 if (thr != NULL) {
3127
3128 que_run_threads(thr);
3129
3130 os_atomic_inc_ulint(
3131 &purge_sys->bh_mutex, &purge_sys->n_completed, 1);
3132
3133 srv_inc_activity_count();
3134 }
3135
3136 return(thr != NULL);
3137 }
3138
3139 static ulint purge_tid_i = 0;
3140
3141 /*********************************************************************//**
3142 Worker thread that reads tasks from the work queue and executes them.
3143 @return a dummy parameter */
3144 extern "C" UNIV_INTERN
3145 os_thread_ret_t
DECLARE_THREAD(srv_worker_thread)3146 DECLARE_THREAD(srv_worker_thread)(
3147 /*==============================*/
3148 void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter
3149 required by os_thread_create */
3150 {
3151 my_thread_init();
3152
3153 srv_slot_t* slot;
3154 ulint tid_i = os_atomic_increment_ulint(&purge_tid_i, 1);
3155
3156 ut_ad(tid_i < srv_n_purge_threads);
3157 ut_ad(!srv_read_only_mode);
3158 ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
3159
3160 srv_purge_tids[tid_i] = os_thread_get_tid();
3161 os_thread_set_priority(srv_purge_tids[tid_i],
3162 srv_sched_priority_purge);
3163
3164 #ifdef UNIV_DEBUG_THREAD_CREATION
3165 ut_print_timestamp(stderr);
3166 fprintf(stderr, " InnoDB: worker thread starting, id %lu\n",
3167 os_thread_pf(os_thread_get_curr_id()));
3168 #endif /* UNIV_DEBUG_THREAD_CREATION */
3169
3170 slot = srv_reserve_slot(SRV_WORKER);
3171
3172 ut_a(srv_n_purge_threads > 1);
3173
3174 srv_sys_mutex_enter();
3175
3176 ut_a(srv_sys->n_threads_active[SRV_WORKER] < srv_n_purge_threads);
3177
3178 srv_sys_mutex_exit();
3179
3180 /* We need to ensure that the worker threads exit after the
3181 purge coordinator thread. Otherwise the purge coordinaor can
3182 end up waiting forever in trx_purge_wait_for_workers_to_complete() */
3183
3184 do {
3185 srv_suspend_thread(slot);
3186
3187 os_event_wait(slot->event);
3188
3189 srv_current_thread_priority = srv_purge_thread_priority;
3190
3191 if (srv_task_execute()) {
3192
3193 /* If there are tasks in the queue, wakeup
3194 the purge coordinator thread. */
3195
3196 srv_wake_purge_thread_if_not_active();
3197 }
3198
3199 /* Note: we are checking the state without holding the
3200 purge_sys->latch here. */
3201 } while (purge_sys->state != PURGE_STATE_EXIT);
3202
3203 srv_free_slot(slot);
3204
3205 rw_lock_x_lock(&purge_sys->latch);
3206
3207 ut_a(!purge_sys->running);
3208 ut_a(purge_sys->state == PURGE_STATE_EXIT);
3209 ut_a(srv_shutdown_state > SRV_SHUTDOWN_NONE);
3210
3211 rw_lock_x_unlock(&purge_sys->latch);
3212
3213 #ifdef UNIV_DEBUG_THREAD_CREATION
3214 ut_print_timestamp(stderr);
3215 fprintf(stderr, " InnoDB: Purge worker thread exiting, id %lu\n",
3216 os_thread_pf(os_thread_get_curr_id()));
3217 #endif /* UNIV_DEBUG_THREAD_CREATION */
3218
3219 my_thread_end();
3220 /* We count the number of threads in os_thread_exit(). A created
3221 thread should always use that to exit and not use return() to exit. */
3222 os_thread_exit(NULL);
3223
3224 OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
3225 }
3226
3227 /*********************************************************************//**
3228 Do the actual purge operation.
3229 @return length of history list before the last purge batch. */
3230 static
3231 ulint
srv_do_purge(ulint n_threads,ulint * n_total_purged)3232 srv_do_purge(
3233 /*=========*/
3234 ulint n_threads, /*!< in: number of threads to use */
3235 ulint* n_total_purged) /*!< in/out: total pages purged */
3236 {
3237 ulint n_pages_purged;
3238
3239 static ulint count = 0;
3240 static ulint n_use_threads = 0;
3241 static ulint rseg_history_len = 0;
3242 ulint old_activity_count = srv_get_activity_count();
3243
3244 ut_a(n_threads > 0);
3245 ut_ad(!srv_read_only_mode);
3246
3247 /* Purge until there are no more records to purge and there is
3248 no change in configuration or server state. If the user has
3249 configured more than one purge thread then we treat that as a
3250 pool of threads and only use the extra threads if purge can't
3251 keep up with updates. */
3252
3253 if (n_use_threads == 0) {
3254 n_use_threads = n_threads;
3255 }
3256
3257 do {
3258 srv_current_thread_priority = srv_purge_thread_priority;
3259
3260 if (trx_sys->rseg_history_len > rseg_history_len
3261 || (srv_max_purge_lag > 0
3262 && rseg_history_len > srv_max_purge_lag)) {
3263
3264 /* History length is now longer than what it was
3265 when we took the last snapshot. Use more threads. */
3266
3267 if (n_use_threads < n_threads) {
3268 ++n_use_threads;
3269 }
3270
3271 } else if (srv_check_activity(old_activity_count)
3272 && n_use_threads > 1) {
3273
3274 /* History length same or smaller since last snapshot,
3275 use fewer threads. */
3276
3277 --n_use_threads;
3278
3279 old_activity_count = srv_get_activity_count();
3280 }
3281
3282 /* Ensure that the purge threads are less than what
3283 was configured. */
3284
3285 ut_a(n_use_threads > 0);
3286 ut_a(n_use_threads <= n_threads);
3287
3288 /* Take a snapshot of the history list before purge. */
3289 if ((rseg_history_len = trx_sys->rseg_history_len) == 0) {
3290 break;
3291 }
3292
3293 n_pages_purged = trx_purge(
3294 n_use_threads, srv_purge_batch_size,
3295 (++count % TRX_SYS_N_RSEGS) == 0);
3296
3297 *n_total_purged += n_pages_purged;
3298
3299 } while (!srv_purge_should_exit(n_pages_purged)
3300 && n_pages_purged > 0
3301 && purge_sys->state == PURGE_STATE_RUN);
3302
3303 return(rseg_history_len);
3304 }
3305
3306 /*********************************************************************//**
3307 Suspend the purge coordinator thread. */
3308 static
3309 void
srv_purge_coordinator_suspend(srv_slot_t * slot,ulint rseg_history_len)3310 srv_purge_coordinator_suspend(
3311 /*==========================*/
3312 srv_slot_t* slot, /*!< in/out: Purge coordinator
3313 thread slot */
3314 ulint rseg_history_len) /*!< in: history list length
3315 before last purge */
3316 {
3317 ut_ad(!srv_read_only_mode);
3318 ut_a(slot->type == SRV_PURGE);
3319
3320 bool stop = false;
3321
3322 /** Maximum wait time on the purge event, in micro-seconds. */
3323 static const ulint SRV_PURGE_MAX_TIMEOUT = 10000;
3324
3325 ib_int64_t sig_count = srv_suspend_thread(slot);
3326
3327 do {
3328 ulint ret;
3329
3330 rw_lock_x_lock(&purge_sys->latch);
3331
3332 purge_sys->running = false;
3333
3334 rw_lock_x_unlock(&purge_sys->latch);
3335
3336 /* We don't wait right away on the the non-timed wait because
3337 we want to signal the thread that wants to suspend purge. */
3338
3339 if (stop) {
3340 os_event_wait_low(slot->event, sig_count);
3341 ret = 0;
3342 } else if (rseg_history_len <= trx_sys->rseg_history_len) {
3343 ret = os_event_wait_time_low(
3344 slot->event, SRV_PURGE_MAX_TIMEOUT, sig_count);
3345 } else {
3346 /* We don't want to waste time waiting, if the
3347 history list increased by the time we got here,
3348 unless purge has been stopped. */
3349 ret = 0;
3350 }
3351
3352 srv_sys_mutex_enter();
3353
3354 /* The thread can be in state !suspended after the timeout
3355 but before this check if another thread sent a wakeup signal. */
3356
3357 if (slot->suspended) {
3358 slot->suspended = FALSE;
3359 ++srv_sys->n_threads_active[slot->type];
3360 ut_a(srv_sys->n_threads_active[slot->type] == 1);
3361 }
3362
3363 srv_sys_mutex_exit();
3364
3365 sig_count = srv_suspend_thread(slot);
3366
3367 rw_lock_x_lock(&purge_sys->latch);
3368
3369 stop = (srv_shutdown_state == SRV_SHUTDOWN_NONE
3370 && purge_sys->state == PURGE_STATE_STOP);
3371
3372 if (!stop) {
3373 ut_a(purge_sys->n_stop == 0);
3374 purge_sys->running = true;
3375 } else {
3376 ut_a(purge_sys->n_stop > 0);
3377
3378 /* Signal that we are suspended. */
3379 os_event_set(purge_sys->event);
3380 }
3381
3382 rw_lock_x_unlock(&purge_sys->latch);
3383
3384 if (ret == OS_SYNC_TIME_EXCEEDED) {
3385
3386 /* No new records added since wait started then simply
3387 wait for new records. The magic number 5000 is an
3388 approximation for the case where we have cached UNDO
3389 log records which prevent truncate of the UNDO
3390 segments. */
3391
3392 if (rseg_history_len == trx_sys->rseg_history_len
3393 && trx_sys->rseg_history_len < 5000) {
3394
3395 stop = true;
3396 }
3397 }
3398
3399 } while (stop);
3400
3401 srv_sys_mutex_enter();
3402
3403 if (slot->suspended) {
3404 slot->suspended = FALSE;
3405 ++srv_sys->n_threads_active[slot->type];
3406 ut_a(srv_sys->n_threads_active[slot->type] == 1);
3407 }
3408
3409 srv_sys_mutex_exit();
3410 }
3411
3412 /*********************************************************************//**
3413 Purge coordinator thread that schedules the purge tasks.
3414 @return a dummy parameter */
3415 extern "C" UNIV_INTERN
3416 os_thread_ret_t
DECLARE_THREAD(srv_purge_coordinator_thread)3417 DECLARE_THREAD(srv_purge_coordinator_thread)(
3418 /*=========================================*/
3419 void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter
3420 required by os_thread_create */
3421 {
3422 my_thread_init();
3423
3424 srv_slot_t* slot;
3425 ulint n_total_purged = ULINT_UNDEFINED;
3426
3427 ut_ad(!srv_read_only_mode);
3428 ut_a(srv_n_purge_threads >= 1);
3429 ut_a(trx_purge_state() == PURGE_STATE_INIT);
3430 ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
3431
3432 srv_purge_tids[0] = os_thread_get_tid();
3433 os_thread_set_priority(srv_purge_tids[0], srv_sched_priority_purge);
3434
3435 rw_lock_x_lock(&purge_sys->latch);
3436
3437 purge_sys->running = true;
3438 purge_sys->state = PURGE_STATE_RUN;
3439
3440 rw_lock_x_unlock(&purge_sys->latch);
3441
3442 #ifdef UNIV_PFS_THREAD
3443 pfs_register_thread(srv_purge_thread_key);
3444 #endif /* UNIV_PFS_THREAD */
3445
3446 #ifdef UNIV_DEBUG_THREAD_CREATION
3447 ut_print_timestamp(stderr);
3448 fprintf(stderr, " InnoDB: Purge coordinator thread created, id %lu\n",
3449 os_thread_pf(os_thread_get_curr_id()));
3450 #endif /* UNIV_DEBUG_THREAD_CREATION */
3451
3452 slot = srv_reserve_slot(SRV_PURGE);
3453
3454 ulint rseg_history_len = trx_sys->rseg_history_len;
3455
3456 do {
3457 /* If there are no records to purge or the last
3458 purge didn't purge any records then wait for activity. */
3459
3460 if (srv_shutdown_state == SRV_SHUTDOWN_NONE
3461 && (purge_sys->state == PURGE_STATE_STOP
3462 || n_total_purged == 0)) {
3463
3464 srv_purge_coordinator_suspend(slot, rseg_history_len);
3465 }
3466
3467 if (srv_purge_should_exit(n_total_purged)) {
3468 ut_a(!slot->suspended);
3469 break;
3470 }
3471
3472 n_total_purged = 0;
3473
3474 srv_current_thread_priority = srv_purge_thread_priority;
3475
3476 rseg_history_len = srv_do_purge(
3477 srv_n_purge_threads, &n_total_purged);
3478
3479 srv_inc_activity_count();
3480
3481 } while (!srv_purge_should_exit(n_total_purged));
3482
3483 /* Ensure that we don't jump out of the loop unless the
3484 exit condition is satisfied. */
3485
3486 ut_a(srv_purge_should_exit(n_total_purged));
3487
3488 ulint n_pages_purged = ULINT_MAX;
3489
3490 /* Ensure that all records are purged if it is not a fast shutdown.
3491 This covers the case where a record can be added after we exit the
3492 loop above. */
3493 while (srv_fast_shutdown == 0 && n_pages_purged > 0) {
3494 n_pages_purged = trx_purge(1, srv_purge_batch_size, false);
3495 }
3496
3497 /* This trx_purge is called to remove any undo records (added by
3498 background threads) after completion of the above loop. When
3499 srv_fast_shutdown != 0, a large batch size can cause significant
3500 delay in shutdown ,so reducing the batch size to magic number 20
3501 (which was default in 5.5), which we hope will be sufficient to
3502 remove all the undo records */
3503 const uint temp_batch_size = 20;
3504
3505 n_pages_purged = trx_purge(1, srv_purge_batch_size <= temp_batch_size
3506 ? srv_purge_batch_size : temp_batch_size,
3507 true);
3508 ut_a(n_pages_purged == 0 || srv_fast_shutdown != 0);
3509
3510 /* The task queue should always be empty, independent of fast
3511 shutdown state. */
3512 ut_a(srv_get_task_queue_length() == 0);
3513
3514 srv_free_slot(slot);
3515
3516 /* Note that we are shutting down. */
3517 rw_lock_x_lock(&purge_sys->latch);
3518
3519 purge_sys->state = PURGE_STATE_EXIT;
3520
3521 purge_sys->running = false;
3522
3523 rw_lock_x_unlock(&purge_sys->latch);
3524
3525 #ifdef UNIV_DEBUG_THREAD_CREATION
3526 ut_print_timestamp(stderr);
3527 fprintf(stderr, " InnoDB: Purge coordinator exiting, id %lu\n",
3528 os_thread_pf(os_thread_get_curr_id()));
3529 #endif /* UNIV_DEBUG_THREAD_CREATION */
3530
3531 /* Ensure that all the worker threads quit. */
3532 if (srv_n_purge_threads > 1) {
3533 srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1);
3534 }
3535
3536 my_thread_end();
3537 /* We count the number of threads in os_thread_exit(). A created
3538 thread should always use that to exit and not use return() to exit. */
3539 os_thread_exit(NULL);
3540
3541 OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
3542 }
3543
3544 /**********************************************************************//**
3545 Enqueues a task to server task queue and releases a worker thread, if there
3546 is a suspended one. */
3547 UNIV_INTERN
3548 void
srv_que_task_enqueue_low(que_thr_t * thr)3549 srv_que_task_enqueue_low(
3550 /*=====================*/
3551 que_thr_t* thr) /*!< in: query thread */
3552 {
3553 ut_ad(!srv_read_only_mode);
3554 mutex_enter(&srv_sys->tasks_mutex);
3555
3556 UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
3557
3558 mutex_exit(&srv_sys->tasks_mutex);
3559
3560 srv_release_threads(SRV_WORKER, 1);
3561 }
3562
3563 /**********************************************************************//**
3564 Get count of tasks in the queue.
3565 @return number of tasks in queue */
3566 UNIV_INTERN
3567 ulint
srv_get_task_queue_length(void)3568 srv_get_task_queue_length(void)
3569 /*===========================*/
3570 {
3571 ulint n_tasks;
3572
3573 ut_ad(!srv_read_only_mode);
3574
3575 mutex_enter(&srv_sys->tasks_mutex);
3576
3577 n_tasks = UT_LIST_GET_LEN(srv_sys->tasks);
3578
3579 mutex_exit(&srv_sys->tasks_mutex);
3580
3581 return(n_tasks);
3582 }
3583
3584 /**********************************************************************//**
3585 Wakeup the purge threads. */
3586 UNIV_INTERN
3587 void
srv_purge_wakeup(void)3588 srv_purge_wakeup(void)
3589 /*==================*/
3590 {
3591 ut_ad(!srv_read_only_mode);
3592
3593 if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
3594
3595 srv_release_threads(SRV_PURGE, 1);
3596
3597 if (srv_n_purge_threads > 1) {
3598 ulint n_workers = srv_n_purge_threads - 1;
3599
3600 srv_release_threads(SRV_WORKER, n_workers);
3601 }
3602 }
3603 }
3604
3605 /** Check whether given space id is undo tablespace id
3606 @param[in] space_id space id to check
3607 @return true if it is undo tablespace else false. */
3608 bool
srv_is_undo_tablespace(ulint space_id)3609 srv_is_undo_tablespace(
3610 ulint space_id)
3611 {
3612 if (srv_undo_space_id_start == 0) {
3613 return (false);
3614 }
3615
3616 return(space_id >= srv_undo_space_id_start
3617 && space_id < (srv_undo_space_id_start
3618 + srv_undo_tablespaces_open));
3619 }
3620