1 /*****************************************************************************
2
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2008, 2009 Google Inc.
5 Copyright (c) 2009, Percona Inc.
6
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19
20 This program is free software; you can redistribute it and/or modify it under
21 the terms of the GNU General Public License as published by the Free Software
22 Foundation; version 2 of the License.
23
24 This program is distributed in the hope that it will be useful, but WITHOUT
25 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
26 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
27
28 You should have received a copy of the GNU General Public License along with
29 this program; if not, write to the Free Software Foundation, Inc.,
30 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31
32 *****************************************************************************/
33
34 /**************************************************//**
35 @file srv/srv0srv.c
36 The database server main program
37
38 NOTE: SQL Server 7 uses something which the documentation
39 calls user mode scheduled threads (UMS threads). One such
40 thread is usually allocated per processor. Win32
41 documentation does not know any UMS threads, which suggests
42 that the concept is internal to SQL Server 7. It may mean that
43 SQL Server 7 does all the scheduling of threads itself, even
44 in i/o waits. We should maybe modify InnoDB to use the same
45 technique, because thread switches within NT may be too slow.
46
47 SQL Server 7 also mentions fibers, which are cooperatively
48 scheduled threads. They can boost performance by 5 %,
49 according to the Delaney and Soukup's book.
50
51 Windows 2000 will have something called thread pooling
52 (see msdn website), which we could possibly use.
53
54 Another possibility could be to use some very fast user space
55 thread library. This might confuse NT though.
56
57 Created 10/8/1995 Heikki Tuuri
58 *******************************************************/
59
60 /* Dummy comment */
61 #include "m_string.h" /* for my_sys.h */
62 #include "my_sys.h" /* DEBUG_SYNC_C */
63 #include "srv0srv.h"
64
65 #include "ut0mem.h"
66 #include "ut0ut.h"
67 #include "os0proc.h"
68 #include "mem0mem.h"
69 #include "mem0pool.h"
70 #include "sync0sync.h"
71 #include "que0que.h"
72 #include "log0recv.h"
73 #include "pars0pars.h"
74 #include "usr0sess.h"
75 #include "lock0lock.h"
76 #include "trx0purge.h"
77 #include "ibuf0ibuf.h"
78 #include "buf0flu.h"
79 #include "buf0lru.h"
80 #include "btr0sea.h"
81 #include "dict0load.h"
82 #include "dict0boot.h"
83 #include "srv0start.h"
84 #include "row0mysql.h"
85 #include "ha_prototypes.h"
86 #include "trx0i_s.h"
87 #include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
88 #include "read0read.h"
89 #include "mysql/plugin.h"
90 #include "mysql/service_thd_wait.h"
91
92 /* The following counter is incremented whenever there is some user activity
93 in the server */
94 UNIV_INTERN ulint srv_activity_count = 0;
95
96 /* The following is the maximum allowed duration of a lock wait. */
97 UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
98
99 /* How much data manipulation language (DML) statements need to be delayed,
100 in microseconds, in order to reduce the lagging of the purge thread. */
101 UNIV_INTERN ulint srv_dml_needed_delay = 0;
102
103 UNIV_INTERN ibool srv_lock_timeout_active = FALSE;
104 UNIV_INTERN ibool srv_monitor_active = FALSE;
105 UNIV_INTERN ibool srv_error_monitor_active = FALSE;
106
107 UNIV_INTERN const char* srv_main_thread_op_info = "";
108
109 /** Prefix used by MySQL to indicate pre-5.1 table name encoding */
110 UNIV_INTERN const char srv_mysql50_table_name_prefix[9] = "#mysql50#";
111
112 /* Server parameters which are read from the initfile */
113
114 /* The following three are dir paths which are catenated before file
115 names, where the file name itself may also contain a path */
116
117 UNIV_INTERN char* srv_data_home = NULL;
118 #ifdef UNIV_LOG_ARCHIVE
119 UNIV_INTERN char* srv_arch_dir = NULL;
120 #endif /* UNIV_LOG_ARCHIVE */
121
122 /** store to its own file each table created by an user; data
123 dictionary tables are in the system tablespace 0 */
124 UNIV_INTERN my_bool srv_file_per_table;
125 /** The file format to use on new *.ibd files. */
126 UNIV_INTERN ulint srv_file_format = 0;
127 /** Whether to check file format during startup. A value of
128 DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
129 set it to the highest format we support. */
130 UNIV_INTERN ulint srv_max_file_format_at_startup = DICT_TF_FORMAT_MAX;
131
132 #if DICT_TF_FORMAT_51
133 # error "DICT_TF_FORMAT_51 must be 0!"
134 #endif
135 /** Place locks to records only i.e. do not use next-key locking except
136 on duplicate key checking and foreign key checking */
137 UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
138
139 /* If this flag is TRUE, then we will use the native aio of the
140 OS (provided we compiled Innobase with it in), otherwise we will
141 use simulated aio we build below with threads.
142 Currently we support native aio on windows and linux */
143 UNIV_INTERN my_bool srv_use_native_aio = TRUE;
144
145 #ifdef __WIN__
146 /* Windows native condition variables. We use runtime loading / function
147 pointers, because they are not available on Windows Server 2003 and
148 Windows XP/2000.
149
150 We use condition for events on Windows if possible, even if os_event
151 resembles Windows kernel event object well API-wise. The reason is
152 performance, kernel objects are heavyweights and WaitForSingleObject() is a
153 performance killer causing calling thread to context switch. Besides, Innodb
154 is preallocating large number (often millions) of os_events. With kernel event
155 objects it takes a big chunk out of non-paged pool, which is better suited
156 for tasks like IO than for storing idle event objects. */
157 UNIV_INTERN ibool srv_use_native_conditions = FALSE;
158 #endif /* __WIN__ */
159
160 UNIV_INTERN ulint srv_n_data_files = 0;
161 UNIV_INTERN char** srv_data_file_names = NULL;
162 /* size in database pages */
163 UNIV_INTERN ulint* srv_data_file_sizes = NULL;
164
165 /* if TRUE, then we auto-extend the last data file */
166 UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
167 /* if != 0, this tells the max size auto-extending may increase the
168 last data file size */
169 UNIV_INTERN ulint srv_last_file_size_max = 0;
170 /* If the last data file is auto-extended, we add this
171 many pages to it at a time */
172 UNIV_INTERN ulong srv_auto_extend_increment = 8;
173 UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL;
174
175 /* If the following is TRUE we do not allow inserts etc. This protects
176 the user from forgetting the 'newraw' keyword to my.cnf */
177
178 UNIV_INTERN ibool srv_created_new_raw = FALSE;
179
180 UNIV_INTERN char** srv_log_group_home_dirs = NULL;
181
182 UNIV_INTERN ulint srv_n_log_groups = ULINT_MAX;
183 UNIV_INTERN ulint srv_n_log_files = ULINT_MAX;
184 /* size in database pages */
185 UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
186 /* size in database pages */
187 UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
188 UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
189
190 /* Try to flush dirty pages so as to avoid IO bursts at
191 the checkpoints. */
192 UNIV_INTERN char srv_adaptive_flushing = TRUE;
193
194 /** Maximum number of times allowed to conditionally acquire
195 mutex before switching to blocking wait on the mutex */
196 #define MAX_MUTEX_NOWAIT 20
197
198 /** Check whether the number of failed nonblocking mutex
199 acquisition attempts exceeds maximum allowed value. If so,
200 srv_printf_innodb_monitor() will request mutex acquisition
201 with mutex_enter(), which will wait until it gets the mutex. */
202 #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
203
204 /** The sort order table of the MySQL latin1_swedish_ci character set
205 collation */
206 UNIV_INTERN const byte* srv_latin1_ordering;
207
208 /* use os/external memory allocator */
209 UNIV_INTERN my_bool srv_use_sys_malloc = TRUE;
210 /* requested size in kilobytes */
211 UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX;
212 /* requested number of buffer pool instances */
213 UNIV_INTERN ulint srv_buf_pool_instances = 1;
214 /* previously requested size */
215 UNIV_INTERN ulint srv_buf_pool_old_size;
216 /* current size in kilobytes */
217 UNIV_INTERN ulint srv_buf_pool_curr_size = 0;
218 /* size in bytes */
219 UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
220 UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
221
222 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
223 instead. */
224 UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
225 UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
226 UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
227
228 /* Switch to enable random read ahead. */
229 UNIV_INTERN my_bool srv_random_read_ahead = FALSE;
230 /* User settable value of the number of pages that must be present
231 in the buffer cache and accessed sequentially for InnoDB to trigger a
232 readahead request. */
233 UNIV_INTERN ulong srv_read_ahead_threshold = 56;
234
235 #ifdef UNIV_LOG_ARCHIVE
236 UNIV_INTERN ibool srv_log_archive_on = FALSE;
237 UNIV_INTERN ibool srv_archive_recovery = 0;
238 UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn;
239 #endif /* UNIV_LOG_ARCHIVE */
240
241 /* This parameter is used to throttle the number of insert buffers that are
242 merged in a batch. By increasing this parameter on a faster disk you can
243 possibly reduce the number of I/O operations performed to complete the
244 merge operation. The value of this parameter is used as is by the
245 background loop when the system is idle (low load), on a busy system
246 the parameter is scaled down by a factor of 4, this is to avoid putting
247 a heavier load on the I/O sub system. */
248
249 UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
250
251 UNIV_INTERN char* srv_file_flush_method_str = NULL;
252 UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
253 UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
254
255 UNIV_INTERN ulint srv_max_n_open_files = 300;
256
257 /* Number of IO operations per second the server can do */
258 UNIV_INTERN ulong srv_io_capacity = 200;
259
260 /* The InnoDB main thread tries to keep the ratio of modified pages
261 in the buffer pool to all database pages in the buffer pool smaller than
262 the following number. But it is not guaranteed that the value stays below
263 that during a time of heavy update/insert activity. */
264
265 UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
266
267 /* the number of purge threads to use from the worker pool (currently 0 or 1).*/
268 UNIV_INTERN ulong srv_n_purge_threads = 0;
269
270 /* the number of pages to purge in one batch */
271 UNIV_INTERN ulong srv_purge_batch_size = 20;
272
273 /* the number of rollback segments to use */
274 UNIV_INTERN ulong srv_rollback_segments = TRX_SYS_N_RSEGS;
275
276 /* variable counts amount of data read in total (in bytes) */
277 UNIV_INTERN ulint srv_data_read = 0;
278
279 /* Internal setting for "innodb_stats_method". Decides how InnoDB treats
280 NULL value when collecting statistics. By default, it is set to
281 SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
282 UNIV_INTERN ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
283
284 /* here we count the amount of data written in total (in bytes) */
285 UNIV_INTERN ulint srv_data_written = 0;
286
287 /* the number of the log write requests done */
288 UNIV_INTERN ulint srv_log_write_requests = 0;
289
290 /* the number of physical writes to the log performed */
291 UNIV_INTERN ulint srv_log_writes = 0;
292
293 /* amount of data written to the log files in bytes */
294 UNIV_INTERN ulint srv_os_log_written = 0;
295
296 /* amount of writes being done to the log files */
297 UNIV_INTERN ulint srv_os_log_pending_writes = 0;
298
299 /* we increase this counter, when there we don't have enough space in the
300 log buffer and have to flush it */
301 UNIV_INTERN ulint srv_log_waits = 0;
302
303 /* this variable counts the amount of times, when the doublewrite buffer
304 was flushed */
305 UNIV_INTERN ulint srv_dblwr_writes = 0;
306
307 /* here we store the number of pages that have been flushed to the
308 doublewrite buffer */
309 UNIV_INTERN ulint srv_dblwr_pages_written = 0;
310
311 /* in this variable we store the number of write requests issued */
312 UNIV_INTERN ulint srv_buf_pool_write_requests = 0;
313
314 /* here we store the number of times when we had to wait for a free page
315 in the buffer pool. It happens when the buffer pool is full and we need
316 to make a flush, in order to be able to read or create a page. */
317 UNIV_INTERN ulint srv_buf_pool_wait_free = 0;
318
319 /* variable to count the number of pages that were written from buffer
320 pool to the disk */
321 UNIV_INTERN ulint srv_buf_pool_flushed = 0;
322
323 /** Number of buffer pool reads that led to the
324 reading of a disk page */
325 UNIV_INTERN ulint srv_buf_pool_reads = 0;
326
327 /* structure to pass status variables to MySQL */
328 UNIV_INTERN export_struc export_vars;
329
330 /* If the following is != 0 we do not allow inserts etc. This protects
331 the user from forgetting the innodb_force_recovery keyword to my.cnf */
332
333 UNIV_INTERN ulint srv_force_recovery = 0;
334 /*-----------------------*/
335 /* We are prepared for a situation that we have this many threads waiting for
336 a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
337 value. */
338
339 UNIV_INTERN ulint srv_max_n_threads = 0;
340
341 /* The following controls how many threads we let inside InnoDB concurrently:
342 threads waiting for locks are not counted into the number because otherwise
343 we could get a deadlock. MySQL creates a thread for each user session, and
344 semaphore contention and convoy problems can occur withput this restriction.
345 Value 10 should be good if there are less than 4 processors + 4 disks in the
346 computer. Bigger computers need bigger values. Value 0 will disable the
347 concurrency check. */
348
349 UNIV_INTERN ulong srv_thread_concurrency = 0;
350
351 /* this mutex protects srv_conc data structures */
352 UNIV_INTERN os_fast_mutex_t srv_conc_mutex;
353 /* number of transactions that have declared_to_be_inside_innodb set.
354 It used to be a non-error for this value to drop below zero temporarily.
355 This is no longer true. We'll, however, keep the lint datatype to add
356 assertions to catch any corner cases that we may have missed. */
357 UNIV_INTERN lint srv_conc_n_threads = 0;
358 /* number of OS threads waiting in the FIFO for a permission to enter
359 InnoDB */
360 UNIV_INTERN ulint srv_conc_n_waiting_threads = 0;
361
362 /* print all user-level transactions deadlocks to mysqld stderr */
363 UNIV_INTERN my_bool srv_print_all_deadlocks = FALSE;
364
365 typedef struct srv_conc_slot_struct srv_conc_slot_t;
366 struct srv_conc_slot_struct{
367 os_event_t event; /*!< event to wait */
368 ibool reserved; /*!< TRUE if slot
369 reserved */
370 ibool wait_ended; /*!< TRUE when another
371 thread has already set
372 the event and the
373 thread in this slot is
374 free to proceed; but
375 reserved may still be
376 TRUE at that point */
377 UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /*!< queue node */
378 };
379
380 /* queue of threads waiting to get in */
381 UNIV_INTERN UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue;
382 /* array of wait slots */
383 UNIV_INTERN srv_conc_slot_t* srv_conc_slots;
384
385 /* Number of times a thread is allowed to enter InnoDB within the same
386 SQL query after it has once got the ticket at srv_conc_enter_innodb */
387 #define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter
388 #define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay
389 /*-----------------------*/
390 /* If the following is set to 1 then we do not run purge and insert buffer
391 merge to completion before shutdown. If it is set to 2, do not even flush the
392 buffer pool to data files at the shutdown: we effectively 'crash'
393 InnoDB (but lose no committed transactions). */
394 UNIV_INTERN ulint srv_fast_shutdown = 0;
395
396 /* Generate a innodb_status.<pid> file */
397 UNIV_INTERN ibool srv_innodb_status = FALSE;
398
399 /* When estimating number of different key values in an index, sample
400 this many index pages */
401 UNIV_INTERN unsigned long long srv_stats_sample_pages = 8;
402
403 UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
404 UNIV_INTERN ibool srv_use_checksums = TRUE;
405
406 UNIV_INTERN ulong srv_replication_delay = 0;
407
408 /*-------------------------------------------*/
409 UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
410 UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
411 UNIV_INTERN ulong srv_thread_sleep_delay = 10000;
412 UNIV_INTERN ulong srv_spin_wait_delay = 6;
413 UNIV_INTERN ibool srv_priority_boost = TRUE;
414
415 #ifdef UNIV_DEBUG
416 UNIV_INTERN ibool srv_print_thread_releases = FALSE;
417 UNIV_INTERN ibool srv_print_lock_waits = FALSE;
418 UNIV_INTERN ibool srv_print_buf_io = FALSE;
419 UNIV_INTERN ibool srv_print_log_io = FALSE;
420 UNIV_INTERN ibool srv_print_latch_waits = FALSE;
421 #endif /* UNIV_DEBUG */
422
423 UNIV_INTERN ulint srv_n_rows_inserted = 0;
424 UNIV_INTERN ulint srv_n_rows_updated = 0;
425 UNIV_INTERN ulint srv_n_rows_deleted = 0;
426 UNIV_INTERN ulint srv_n_rows_read = 0;
427
428 static ulint srv_n_rows_inserted_old = 0;
429 static ulint srv_n_rows_updated_old = 0;
430 static ulint srv_n_rows_deleted_old = 0;
431 static ulint srv_n_rows_read_old = 0;
432
433 UNIV_INTERN ulint srv_n_lock_wait_count = 0;
434 UNIV_INTERN ulint srv_n_lock_wait_current_count = 0;
435 UNIV_INTERN ib_int64_t srv_n_lock_wait_time = 0;
436 UNIV_INTERN ulint srv_n_lock_max_wait_time = 0;
437
438 UNIV_INTERN ulint srv_truncated_status_writes = 0;
439
440 /*
441 Set the following to 0 if you want InnoDB to write messages on
442 stderr on startup/shutdown
443 */
444 UNIV_INTERN ibool srv_print_verbose_log = TRUE;
445 UNIV_INTERN ibool srv_print_innodb_monitor = FALSE;
446 UNIV_INTERN ibool srv_print_innodb_lock_monitor = FALSE;
447 UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
448 UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
449
450 /* Array of English strings describing the current state of an
451 i/o handler thread */
452
453 UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
454 UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
455
456 UNIV_INTERN time_t srv_last_monitor_time;
457
458 UNIV_INTERN mutex_t srv_innodb_monitor_mutex;
459
460 /* Mutex for locking srv_monitor_file */
461 UNIV_INTERN mutex_t srv_monitor_file_mutex;
462
463 #ifdef UNIV_PFS_MUTEX
464 /* Key to register kernel_mutex with performance schema */
465 UNIV_INTERN mysql_pfs_key_t kernel_mutex_key;
466 /* Key to register srv_innodb_monitor_mutex with performance schema */
467 UNIV_INTERN mysql_pfs_key_t srv_innodb_monitor_mutex_key;
468 /* Key to register srv_monitor_file_mutex with performance schema */
469 UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key;
470 /* Key to register srv_dict_tmpfile_mutex with performance schema */
471 UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
472 /* Key to register the mutex with performance schema */
473 UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
474 #endif /* UNIV_PFS_MUTEX */
475
476 /* Temporary file for innodb monitor output */
477 UNIV_INTERN FILE* srv_monitor_file;
478 /* Mutex for locking srv_dict_tmpfile.
479 This mutex has a very high rank; threads reserving it should not
480 be holding any InnoDB latches. */
481 UNIV_INTERN mutex_t srv_dict_tmpfile_mutex;
482 /* Temporary file for output from the data dictionary */
483 UNIV_INTERN FILE* srv_dict_tmpfile;
484 /* Mutex for locking srv_misc_tmpfile.
485 This mutex has a very low rank; threads reserving it should not
486 acquire any further latches or sleep before releasing this one. */
487 UNIV_INTERN mutex_t srv_misc_tmpfile_mutex;
488 /* Temporary file for miscellanous diagnostic output */
489 UNIV_INTERN FILE* srv_misc_tmpfile;
490
491 UNIV_INTERN ulint srv_main_thread_process_no = 0;
492 UNIV_INTERN ulint srv_main_thread_id = 0;
493
494 /* The following count work done by srv_master_thread. */
495
496 /* Iterations by the 'once per second' loop. */
497 static ulint srv_main_1_second_loops = 0;
498 /* Calls to sleep by the 'once per second' loop. */
499 static ulint srv_main_sleeps = 0;
500 /* Iterations by the 'once per 10 seconds' loop. */
501 static ulint srv_main_10_second_loops = 0;
502 /* Iterations of the loop bounded by the 'background_loop' label. */
503 static ulint srv_main_background_loops = 0;
504 /* Iterations of the loop bounded by the 'flush_loop' label. */
505 static ulint srv_main_flush_loops = 0;
506 /* Log writes involving flush. */
507 static ulint srv_log_writes_and_flush = 0;
508
509 /* This is only ever touched by the master thread. It records the
510 time when the last flush of log file has happened. The master
511 thread ensures that we flush the log files at least once per
512 second. */
513 static time_t srv_last_log_flush_time;
514
515 /* The master thread performs various tasks based on the current
516 state of IO activity and the level of IO utilization is past
517 intervals. Following macros define thresholds for these conditions. */
518 #define SRV_PEND_IO_THRESHOLD (PCT_IO(3))
519 #define SRV_RECENT_IO_ACTIVITY (PCT_IO(5))
520 #define SRV_PAST_IO_ACTIVITY (PCT_IO(200))
521
522 /*
523 IMPLEMENTATION OF THE SERVER MAIN PROGRAM
524 =========================================
525
526 There is the following analogue between this database
527 server and an operating system kernel:
528
529 DB concept equivalent OS concept
530 ---------- ---------------------
531 transaction -- process;
532
533 query thread -- thread;
534
535 lock -- semaphore;
536
537 transaction set to
538 the rollback state -- kill signal delivered to a process;
539
540 kernel -- kernel;
541
542 query thread execution:
543 (a) without kernel mutex
544 reserved -- process executing in user mode;
545 (b) with kernel mutex reserved
546 -- process executing in kernel mode;
547
548 The server is controlled by a master thread which runs at
549 a priority higher than normal, that is, higher than user threads.
550 It sleeps most of the time, and wakes up, say, every 300 milliseconds,
551 to check whether there is anything happening in the server which
552 requires intervention of the master thread. Such situations may be,
553 for example, when flushing of dirty blocks is needed in the buffer
554 pool or old version of database rows have to be cleaned away.
555
556 The threads which we call user threads serve the queries of
557 the clients and input from the console of the server.
558 They run at normal priority. The server may have several
559 communications endpoints. A dedicated set of user threads waits
560 at each of these endpoints ready to receive a client request.
561 Each request is taken by a single user thread, which then starts
562 processing and, when the result is ready, sends it to the client
563 and returns to wait at the same endpoint the thread started from.
564
565 So, we do not have dedicated communication threads listening at
566 the endpoints and dealing the jobs to dedicated worker threads.
567 Our architecture saves one thread swithch per request, compared
568 to the solution with dedicated communication threads
569 which amounts to 15 microseconds on 100 MHz Pentium
570 running NT. If the client
571 is communicating over a network, this saving is negligible, but
572 if the client resides in the same machine, maybe in an SMP machine
573 on a different processor from the server thread, the saving
574 can be important as the threads can communicate over shared
575 memory with an overhead of a few microseconds.
576
577 We may later implement a dedicated communication thread solution
578 for those endpoints which communicate over a network.
579
580 Our solution with user threads has two problems: for each endpoint
581 there has to be a number of listening threads. If there are many
582 communication endpoints, it may be difficult to set the right number
583 of concurrent threads in the system, as many of the threads
584 may always be waiting at less busy endpoints. Another problem
585 is queuing of the messages, as the server internally does not
586 offer any queue for jobs.
587
588 Another group of user threads is intended for splitting the
589 queries and processing them in parallel. Let us call these
590 parallel communication threads. These threads are waiting for
591 parallelized tasks, suspended on event semaphores.
592
593 A single user thread waits for input from the console,
594 like a command to shut the database.
595
596 Utility threads are a different group of threads which takes
597 care of the buffer pool flushing and other, mainly background
598 operations, in the server.
599 Some of these utility threads always run at a lower than normal
600 priority, so that they are always in background. Some of them
601 may dynamically boost their priority by the pri_adjust function,
602 even to higher than normal priority, if their task becomes urgent.
603 The running of utilities is controlled by high- and low-water marks
604 of urgency. The urgency may be measured by the number of dirty blocks
605 in the buffer pool, in the case of the flush thread, for example.
606 When the high-water mark is exceeded, an utility starts running, until
607 the urgency drops under the low-water mark. Then the utility thread
608 suspend itself to wait for an event. The master thread is
609 responsible of signaling this event when the utility thread is
610 again needed.
611
612 For each individual type of utility, some threads always remain
613 at lower than normal priority. This is because pri_adjust is implemented
614 so that the threads at normal or higher priority control their
615 share of running time by calling sleep. Thus, if the load of the
616 system sudenly drops, these threads cannot necessarily utilize
617 the system fully. The background priority threads make up for this,
618 starting to run when the load drops.
619
620 When there is no activity in the system, also the master thread
621 suspends itself to wait for an event making
622 the server totally silent. The responsibility to signal this
623 event is on the user thread which again receives a message
624 from a client.
625
626 There is still one complication in our server design. If a
627 background utility thread obtains a resource (e.g., mutex) needed by a user
628 thread, and there is also some other user activity in the system,
629 the user thread may have to wait indefinitely long for the
630 resource, as the OS does not schedule a background thread if
631 there is some other runnable user thread. This problem is called
632 priority inversion in real-time programming.
633
634 One solution to the priority inversion problem would be to
635 keep record of which thread owns which resource and
636 in the above case boost the priority of the background thread
637 so that it will be scheduled and it can release the resource.
638 This solution is called priority inheritance in real-time programming.
639 A drawback of this solution is that the overhead of acquiring a mutex
640 increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
641 the thread has to call os_thread_get_curr_id.
642 This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
643 pair. Note that the thread
644 cannot store the information in the resource, say mutex, itself,
645 because competing threads could wipe out the information if it is
646 stored before acquiring the mutex, and if it stored afterwards,
647 the information is outdated for the time of one machine instruction,
648 at least. (To be precise, the information could be stored to
649 lock_word in mutex if the machine supports atomic swap.)
650
651 The above solution with priority inheritance may become actual in the
652 future, but at the moment we plan to implement a more coarse solution,
653 which could be called a global priority inheritance. If a thread
654 has to wait for a long time, say 300 milliseconds, for a resource,
655 we just guess that it may be waiting for a resource owned by a background
656 thread, and boost the priority of all runnable background threads
657 to the normal level. The background threads then themselves adjust
658 their fixed priority back to background after releasing all resources
659 they had (or, at some fixed points in their program code).
660
661 What is the performance of the global priority inheritance solution?
662 We may weigh the length of the wait time 300 milliseconds, during
663 which the system processes some other thread
664 to the cost of boosting the priority of each runnable background
665 thread, rescheduling it, and lowering the priority again.
666 On 100 MHz Pentium + NT this overhead may be of the order 100
667 microseconds per thread. So, if the number of runnable background
668 threads is not very big, say < 100, the cost is tolerable.
669 Utility threads probably will access resources used by
670 user threads not very often, so collisions of user threads
671 to preempted utility threads should not happen very often.
672
673 The thread table contains
674 information of the current status of each thread existing in the system,
675 and also the event semaphores used in suspending the master thread
676 and utility and parallel communication threads when they have nothing to do.
677 The thread table can be seen as an analogue to the process table
678 in a traditional Unix implementation.
679
680 The thread table is also used in the global priority inheritance
681 scheme. This brings in one additional complication: threads accessing
682 the thread table must have at least normal fixed priority,
683 because the priority inheritance solution does not work if a background
684 thread is preempted while possessing the mutex protecting the thread table.
685 So, if a thread accesses the thread table, its priority has to be
686 boosted at least to normal. This priority requirement can be seen similar to
687 the privileged mode used when processing the kernel calls in traditional
688 Unix.*/
689
690 /* Thread slot in the thread table */
691 struct srv_slot_struct{
692 unsigned type:1; /*!< thread type: user, utility etc. */
693 unsigned in_use:1; /*!< TRUE if this slot is in use */
694 unsigned suspended:1; /*!< TRUE if the thread is waiting
695 for the event of this slot */
696 ib_time_t suspend_time; /*!< time when the thread was
697 suspended */
698 os_event_t event; /*!< event used in suspending the
699 thread when it has nothing to do */
700 que_thr_t* thr; /*!< suspended query thread (only
701 used for MySQL threads) */
702 };
703
704 /* Table for MySQL threads where they will be suspended to wait for locks */
705 UNIV_INTERN srv_slot_t* srv_mysql_table = NULL;
706
707 UNIV_INTERN os_event_t srv_timeout_event;
708
709 UNIV_INTERN os_event_t srv_monitor_event;
710
711 UNIV_INTERN os_event_t srv_error_event;
712
713 UNIV_INTERN os_event_t srv_lock_timeout_thread_event;
714
715 UNIV_INTERN srv_sys_t* srv_sys = NULL;
716
717 /* padding to prevent other memory update hotspots from residing on
718 the same memory cache line */
719 UNIV_INTERN byte srv_pad1[64];
720 /* mutex protecting the server, trx structs, query threads, and lock table */
721 UNIV_INTERN mutex_t* kernel_mutex_temp;
722 /* padding to prevent other memory update hotspots from residing on
723 the same memory cache line */
724 UNIV_INTERN byte srv_pad2[64];
725
726 #if 0
727 /* The following three values measure the urgency of the jobs of
728 buffer, version, and insert threads. They may vary from 0 - 1000.
729 The server mutex protects all these variables. The low-water values
730 tell that the server can acquiesce the utility when the value
731 drops below this low-water mark. */
732
733 static ulint srv_meter[SRV_MASTER + 1];
734 static ulint srv_meter_low_water[SRV_MASTER + 1];
735 static ulint srv_meter_high_water[SRV_MASTER + 1];
736 static ulint srv_meter_high_water2[SRV_MASTER + 1];
737 static ulint srv_meter_foreground[SRV_MASTER + 1];
738 #endif
739
740 /* The following values give info about the activity going on in
741 the database. They are protected by the server mutex. The arrays
742 are indexed by the type of the thread. */
743
744 UNIV_INTERN ulint srv_n_threads_active[SRV_MASTER + 1];
745 UNIV_INTERN ulint srv_n_threads[SRV_MASTER + 1];
746
747 /*********************************************************************//**
748 Asynchronous purge thread.
749 @return a dummy parameter */
750 UNIV_INTERN
751 os_thread_ret_t
752 srv_purge_thread(
753 /*=============*/
754 void* arg __attribute__((unused))); /*!< in: a dummy parameter
755 required by os_thread_create */
756
757 /***********************************************************************
758 Prints counters for work done by srv_master_thread. */
759 static
760 void
srv_print_master_thread_info(FILE * file)761 srv_print_master_thread_info(
762 /*=========================*/
763 FILE *file) /* in: output stream */
764 {
765 fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
766 "%lu 10_second, %lu background, %lu flush\n",
767 srv_main_1_second_loops, srv_main_sleeps,
768 srv_main_10_second_loops, srv_main_background_loops,
769 srv_main_flush_loops);
770 fprintf(file, "srv_master_thread log flush and writes: %lu\n",
771 srv_log_writes_and_flush);
772 }
773
774 /*********************************************************************//**
775 Sets the info describing an i/o thread current state. */
776 UNIV_INTERN
777 void
srv_set_io_thread_op_info(ulint i,const char * str)778 srv_set_io_thread_op_info(
779 /*======================*/
780 ulint i, /*!< in: the 'segment' of the i/o thread */
781 const char* str) /*!< in: constant char string describing the
782 state */
783 {
784 ut_a(i < SRV_MAX_N_IO_THREADS);
785
786 srv_io_thread_op_info[i] = str;
787 }
788
789 /*********************************************************************//**
790 Accessor function to get pointer to n'th slot in the server thread
791 table.
792 @return pointer to the slot */
793 static
794 srv_slot_t*
srv_table_get_nth_slot(ulint index)795 srv_table_get_nth_slot(
796 /*===================*/
797 ulint index) /*!< in: index of the slot */
798 {
799 ut_ad(mutex_own(&kernel_mutex));
800 ut_a(index < OS_THREAD_MAX_N);
801
802 return(srv_sys->threads + index);
803 }
804
805 /*********************************************************************//**
806 Gets the number of threads in the system.
807 @return sum of srv_n_threads[] */
808 UNIV_INTERN
809 ulint
srv_get_n_threads(void)810 srv_get_n_threads(void)
811 /*===================*/
812 {
813 ulint i;
814 ulint n_threads = 0;
815
816 mutex_enter(&kernel_mutex);
817
818 for (i = 0; i < SRV_MASTER + 1; i++) {
819
820 n_threads += srv_n_threads[i];
821 }
822
823 mutex_exit(&kernel_mutex);
824
825 return(n_threads);
826 }
827
828 #ifdef UNIV_DEBUG
829 /*********************************************************************//**
830 Validates the type of a thread table slot.
831 @return TRUE if ok */
832 static
833 ibool
srv_thread_type_validate(enum srv_thread_type type)834 srv_thread_type_validate(
835 /*=====================*/
836 enum srv_thread_type type) /*!< in: thread type */
837 {
838 switch (type) {
839 case SRV_WORKER:
840 case SRV_MASTER:
841 return(TRUE);
842 }
843 ut_error;
844 return(FALSE);
845 }
846 #endif /* UNIV_DEBUG */
847
848 /*********************************************************************//**
849 Gets the type of a thread table slot.
850 @return thread type */
851 static
852 enum srv_thread_type
srv_slot_get_type(const srv_slot_t * slot)853 srv_slot_get_type(
854 /*==============*/
855 const srv_slot_t* slot) /*!< in: thread slot */
856 {
857 enum srv_thread_type type = (enum srv_thread_type) slot->type;
858 ut_ad(srv_thread_type_validate(type));
859 return(type);
860 }
861
862 /*********************************************************************//**
863 Reserves a slot in the thread table for the current thread.
864 NOTE! The server mutex has to be reserved by the caller!
865 @return reserved slot */
866 static
867 srv_slot_t*
srv_table_reserve_slot(enum srv_thread_type type)868 srv_table_reserve_slot(
869 /*===================*/
870 enum srv_thread_type type) /*!< in: type of the thread */
871 {
872 srv_slot_t* slot;
873 ulint i;
874
875 ut_ad(srv_thread_type_validate(type));
876 ut_ad(mutex_own(&kernel_mutex));
877
878 i = 0;
879 slot = srv_table_get_nth_slot(i);
880
881 while (slot->in_use) {
882 i++;
883 slot = srv_table_get_nth_slot(i);
884 }
885
886 slot->in_use = TRUE;
887 slot->suspended = FALSE;
888 slot->type = type;
889 ut_ad(srv_slot_get_type(slot) == type);
890
891 return(slot);
892 }
893
894 /*********************************************************************//**
895 Suspends the calling thread to wait for the event in its thread slot.
896 NOTE! The server mutex has to be reserved by the caller! */
897 static
898 void
srv_suspend_thread(srv_slot_t * slot)899 srv_suspend_thread(
900 /*===============*/
901 srv_slot_t* slot) /*!< in/out: thread slot */
902 {
903 enum srv_thread_type type;
904
905 ut_ad(mutex_own(&kernel_mutex));
906 ut_ad(slot->in_use);
907 ut_ad(!slot->suspended);
908
909 if (srv_print_thread_releases) {
910 fprintf(stderr,
911 "Suspending thread %lu to slot %lu\n",
912 (ulong) os_thread_get_curr_id(),
913 (ulong) (slot - srv_sys->threads));
914 }
915
916 type = srv_slot_get_type(slot);
917
918 slot->suspended = TRUE;
919
920 ut_ad(srv_n_threads_active[type] > 0);
921
922 srv_n_threads_active[type]--;
923
924 os_event_reset(slot->event);
925 }
926
927 /*********************************************************************//**
928 Releases threads of the type given from suspension in the thread table.
929 NOTE! The server mutex has to be reserved by the caller!
930 @return number of threads released: this may be less than n if not
931 enough threads were suspended at the moment */
932 UNIV_INTERN
933 ulint
srv_release_threads(enum srv_thread_type type,ulint n)934 srv_release_threads(
935 /*================*/
936 enum srv_thread_type type, /*!< in: thread type */
937 ulint n) /*!< in: number of threads to release */
938 {
939 srv_slot_t* slot;
940 ulint i;
941 ulint count = 0;
942
943 ut_ad(srv_thread_type_validate(type));
944 ut_ad(n > 0);
945 ut_ad(mutex_own(&kernel_mutex));
946
947 for (i = 0; i < OS_THREAD_MAX_N; i++) {
948
949 slot = srv_table_get_nth_slot(i);
950
951 if (slot->in_use && slot->suspended
952 && srv_slot_get_type(slot) == type) {
953
954 slot->suspended = FALSE;
955
956 srv_n_threads_active[type]++;
957
958 os_event_set(slot->event);
959
960 if (srv_print_thread_releases) {
961 fprintf(stderr,
962 "Releasing thread type %lu"
963 " from slot %lu\n",
964 (ulong) type, (ulong) i);
965 }
966
967 count++;
968
969 if (count == n) {
970 break;
971 }
972 }
973 }
974
975 return(count);
976 }
977
978 /*********************************************************************//**
979 Check whether thread type has reserved a slot. Return the first slot that
980 is found. This works because we currently have only 1 thread of each type.
981 @return slot number or ULINT_UNDEFINED if not found*/
982 UNIV_INTERN
983 ulint
srv_thread_has_reserved_slot(enum srv_thread_type type)984 srv_thread_has_reserved_slot(
985 /*=========================*/
986 enum srv_thread_type type) /*!< in: thread type to check */
987 {
988 ulint i;
989 ulint slot_no = ULINT_UNDEFINED;
990
991 ut_ad(srv_thread_type_validate(type));
992 mutex_enter(&kernel_mutex);
993
994 for (i = 0; i < OS_THREAD_MAX_N; i++) {
995 srv_slot_t* slot;
996
997 slot = srv_table_get_nth_slot(i);
998
999 if (slot->in_use && slot->type == type) {
1000 slot_no = i;
1001 break;
1002 }
1003 }
1004
1005 mutex_exit(&kernel_mutex);
1006
1007 return(slot_no);
1008 }
1009
1010 /*********************************************************************//**
1011 Initializes the server. */
1012 UNIV_INTERN
1013 void
srv_init(void)1014 srv_init(void)
1015 /*==========*/
1016 {
1017 srv_conc_slot_t* conc_slot;
1018 srv_slot_t* slot;
1019 ulint i;
1020
1021 srv_sys = mem_alloc(sizeof(srv_sys_t));
1022
1023 kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
1024 mutex_create(kernel_mutex_key, &kernel_mutex, SYNC_KERNEL);
1025
1026 mutex_create(srv_innodb_monitor_mutex_key,
1027 &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
1028
1029 srv_sys->threads = mem_zalloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
1030
1031 for (i = 0; i < OS_THREAD_MAX_N; i++) {
1032 slot = srv_sys->threads + i;
1033 slot->event = os_event_create(NULL);
1034 ut_a(slot->event);
1035 }
1036
1037 srv_mysql_table = mem_zalloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
1038
1039 for (i = 0; i < OS_THREAD_MAX_N; i++) {
1040 slot = srv_mysql_table + i;
1041 slot->event = os_event_create(NULL);
1042 ut_a(slot->event);
1043 }
1044
1045 srv_error_event = os_event_create(NULL);
1046
1047 srv_timeout_event = os_event_create(NULL);
1048
1049 srv_monitor_event = os_event_create(NULL);
1050
1051 srv_lock_timeout_thread_event = os_event_create(NULL);
1052
1053 for (i = 0; i < SRV_MASTER + 1; i++) {
1054 srv_n_threads_active[i] = 0;
1055 srv_n_threads[i] = 0;
1056 #if 0
1057 srv_meter[i] = 30;
1058 srv_meter_low_water[i] = 50;
1059 srv_meter_high_water[i] = 100;
1060 srv_meter_high_water2[i] = 200;
1061 srv_meter_foreground[i] = 250;
1062 #endif
1063 }
1064
1065 UT_LIST_INIT(srv_sys->tasks);
1066
1067 /* Create dummy indexes for infimum and supremum records */
1068
1069 dict_ind_init();
1070
1071 /* Init the server concurrency restriction data structures */
1072
1073 os_fast_mutex_init(&srv_conc_mutex);
1074
1075 UT_LIST_INIT(srv_conc_queue);
1076
1077 srv_conc_slots = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t));
1078
1079 for (i = 0; i < OS_THREAD_MAX_N; i++) {
1080 conc_slot = srv_conc_slots + i;
1081 conc_slot->reserved = FALSE;
1082 conc_slot->event = os_event_create(NULL);
1083 ut_a(conc_slot->event);
1084 }
1085
1086 /* Initialize some INFORMATION SCHEMA internal structures */
1087 trx_i_s_cache_init(trx_i_s_cache);
1088 }
1089
1090 /*********************************************************************//**
1091 Frees the data structures created in srv_init(). */
1092 UNIV_INTERN
1093 void
srv_free(void)1094 srv_free(void)
1095 /*==========*/
1096 {
1097 os_fast_mutex_free(&srv_conc_mutex);
1098 mem_free(srv_conc_slots);
1099 srv_conc_slots = NULL;
1100
1101 mem_free(srv_sys->threads);
1102 mem_free(srv_sys);
1103 srv_sys = NULL;
1104
1105 mem_free(kernel_mutex_temp);
1106 kernel_mutex_temp = NULL;
1107 mem_free(srv_mysql_table);
1108 srv_mysql_table = NULL;
1109
1110 trx_i_s_cache_free(trx_i_s_cache);
1111 }
1112
1113 /*********************************************************************//**
1114 Initializes the synchronization primitives, memory system, and the thread
1115 local storage. */
1116 UNIV_INTERN
1117 void
srv_general_init(void)1118 srv_general_init(void)
1119 /*==================*/
1120 {
1121 ut_mem_init();
1122 /* Reset the system variables in the recovery module. */
1123 recv_sys_var_init();
1124 os_sync_init();
1125 sync_init();
1126 mem_init(srv_mem_pool_size);
1127 }
1128
1129 /*======================= InnoDB Server FIFO queue =======================*/
1130
1131 /* Maximum allowable purge history length. <=0 means 'infinite'. */
1132 UNIV_INTERN ulong srv_max_purge_lag = 0;
1133
1134 /*********************************************************************//**
1135 Puts an OS thread to wait if there are too many concurrent threads
1136 (>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
1137 UNIV_INTERN
1138 void
srv_conc_enter_innodb(trx_t * trx)1139 srv_conc_enter_innodb(
1140 /*==================*/
1141 trx_t* trx) /*!< in: transaction object associated with the
1142 thread */
1143 {
1144 ibool has_slept = FALSE;
1145 srv_conc_slot_t* slot = NULL;
1146 ulint i;
1147
1148 #ifdef UNIV_SYNC_DEBUG
1149 ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
1150 #endif /* UNIV_SYNC_DEBUG */
1151
1152 if (trx->mysql_thd != NULL
1153 && thd_is_replication_slave_thread(trx->mysql_thd)) {
1154
1155 UT_WAIT_FOR(srv_conc_n_threads
1156 < (lint)srv_thread_concurrency,
1157 srv_replication_delay * 1000);
1158
1159 return;
1160 }
1161
1162 /* If trx has 'free tickets' to enter the engine left, then use one
1163 such ticket */
1164
1165 if (trx->n_tickets_to_enter_innodb > 0) {
1166 trx->n_tickets_to_enter_innodb--;
1167
1168 return;
1169 }
1170
1171 os_fast_mutex_lock(&srv_conc_mutex);
1172 retry:
1173 if (trx->declared_to_be_inside_innodb) {
1174 ut_print_timestamp(stderr);
1175 fputs(" InnoDB: Error: trying to declare trx"
1176 " to enter InnoDB, but\n"
1177 "InnoDB: it already is declared.\n", stderr);
1178 trx_print(stderr, trx, 0);
1179 putc('\n', stderr);
1180 os_fast_mutex_unlock(&srv_conc_mutex);
1181
1182 return;
1183 }
1184
1185 ut_ad(srv_conc_n_threads >= 0);
1186
1187 if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1188
1189 srv_conc_n_threads++;
1190 trx->declared_to_be_inside_innodb = TRUE;
1191 trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1192
1193 os_fast_mutex_unlock(&srv_conc_mutex);
1194
1195 return;
1196 }
1197
1198 /* If the transaction is not holding resources, let it sleep
1199 for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */
1200
1201 if (!has_slept && !trx->has_search_latch
1202 && NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
1203
1204 has_slept = TRUE; /* We let it sleep only once to avoid
1205 starvation */
1206
1207 srv_conc_n_waiting_threads++;
1208
1209 os_fast_mutex_unlock(&srv_conc_mutex);
1210
1211 trx->op_info = "sleeping before joining InnoDB queue";
1212
1213 /* Peter Zaitsev suggested that we take the sleep away
1214 altogether. But the sleep may be good in pathological
1215 situations of lots of thread switches. Simply put some
1216 threads aside for a while to reduce the number of thread
1217 switches. */
1218 if (SRV_THREAD_SLEEP_DELAY > 0) {
1219 os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
1220 }
1221
1222 trx->op_info = "";
1223
1224 os_fast_mutex_lock(&srv_conc_mutex);
1225
1226 srv_conc_n_waiting_threads--;
1227
1228 goto retry;
1229 }
1230
1231 /* Too many threads inside: put the current thread to a queue */
1232
1233 for (i = 0; i < OS_THREAD_MAX_N; i++) {
1234 slot = srv_conc_slots + i;
1235
1236 if (!slot->reserved) {
1237
1238 break;
1239 }
1240 }
1241
1242 if (i == OS_THREAD_MAX_N) {
1243 /* Could not find a free wait slot, we must let the
1244 thread enter */
1245
1246 srv_conc_n_threads++;
1247 trx->declared_to_be_inside_innodb = TRUE;
1248 trx->n_tickets_to_enter_innodb = 0;
1249
1250 os_fast_mutex_unlock(&srv_conc_mutex);
1251
1252 return;
1253 }
1254
1255 /* Release possible search system latch this thread has */
1256 if (trx->has_search_latch) {
1257 trx_search_latch_release_if_reserved(trx);
1258 }
1259
1260 /* Add to the queue */
1261 slot->reserved = TRUE;
1262 slot->wait_ended = FALSE;
1263
1264 UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
1265
1266 os_event_reset(slot->event);
1267
1268 srv_conc_n_waiting_threads++;
1269
1270 os_fast_mutex_unlock(&srv_conc_mutex);
1271
1272 /* Go to wait for the event; when a thread leaves InnoDB it will
1273 release this thread */
1274
1275 ut_ad(!trx->has_search_latch);
1276 #ifdef UNIV_SYNC_DEBUG
1277 ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
1278 #endif /* UNIV_SYNC_DEBUG */
1279 trx->op_info = "waiting in InnoDB queue";
1280
1281 thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
1282 os_event_wait(slot->event);
1283 thd_wait_end(trx->mysql_thd);
1284
1285 trx->op_info = "";
1286
1287 os_fast_mutex_lock(&srv_conc_mutex);
1288
1289 srv_conc_n_waiting_threads--;
1290
1291 /* NOTE that the thread which released this thread already
1292 incremented the thread counter on behalf of this thread */
1293
1294 slot->reserved = FALSE;
1295
1296 UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
1297
1298 trx->declared_to_be_inside_innodb = TRUE;
1299 trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1300
1301 os_fast_mutex_unlock(&srv_conc_mutex);
1302 }
1303
1304 /*********************************************************************//**
1305 This lets a thread enter InnoDB regardless of the number of threads inside
1306 InnoDB. This must be called when a thread ends a lock wait. */
1307 UNIV_INTERN
1308 void
srv_conc_force_enter_innodb(trx_t * trx)1309 srv_conc_force_enter_innodb(
1310 /*========================*/
1311 trx_t* trx) /*!< in: transaction object associated with the
1312 thread */
1313 {
1314 #ifdef UNIV_SYNC_DEBUG
1315 ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
1316 #endif /* UNIV_SYNC_DEBUG */
1317
1318 if (UNIV_LIKELY(!srv_thread_concurrency)) {
1319
1320 return;
1321 }
1322
1323 ut_ad(srv_conc_n_threads >= 0);
1324
1325 os_fast_mutex_lock(&srv_conc_mutex);
1326
1327 srv_conc_n_threads++;
1328 trx->declared_to_be_inside_innodb = TRUE;
1329 trx->n_tickets_to_enter_innodb = 1;
1330
1331 os_fast_mutex_unlock(&srv_conc_mutex);
1332 }
1333
1334 /*********************************************************************//**
1335 This must be called when a thread exits InnoDB in a lock wait or at the
1336 end of an SQL statement. */
1337 UNIV_INTERN
1338 void
srv_conc_force_exit_innodb(trx_t * trx)1339 srv_conc_force_exit_innodb(
1340 /*=======================*/
1341 trx_t* trx) /*!< in: transaction object associated with the
1342 thread */
1343 {
1344 srv_conc_slot_t* slot = NULL;
1345
1346 if (trx->mysql_thd != NULL
1347 && thd_is_replication_slave_thread(trx->mysql_thd)) {
1348
1349 return;
1350 }
1351
1352 if (trx->declared_to_be_inside_innodb == FALSE) {
1353
1354 return;
1355 }
1356
1357 os_fast_mutex_lock(&srv_conc_mutex);
1358
1359 ut_ad(srv_conc_n_threads > 0);
1360 srv_conc_n_threads--;
1361 trx->declared_to_be_inside_innodb = FALSE;
1362 trx->n_tickets_to_enter_innodb = 0;
1363
1364 if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1365 /* Look for a slot where a thread is waiting and no other
1366 thread has yet released the thread */
1367
1368 slot = UT_LIST_GET_FIRST(srv_conc_queue);
1369
1370 while (slot && slot->wait_ended == TRUE) {
1371 slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
1372 }
1373
1374 if (slot != NULL) {
1375 slot->wait_ended = TRUE;
1376
1377 /* We increment the count on behalf of the released
1378 thread */
1379
1380 srv_conc_n_threads++;
1381 }
1382 }
1383
1384 os_fast_mutex_unlock(&srv_conc_mutex);
1385
1386 if (slot != NULL) {
1387 os_event_set(slot->event);
1388 }
1389
1390 #ifdef UNIV_SYNC_DEBUG
1391 ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
1392 #endif /* UNIV_SYNC_DEBUG */
1393 }
1394
1395 /*********************************************************************//**
1396 This must be called when a thread exits InnoDB. */
1397 UNIV_INTERN
1398 void
srv_conc_exit_innodb(trx_t * trx)1399 srv_conc_exit_innodb(
1400 /*=================*/
1401 trx_t* trx) /*!< in: transaction object associated with the
1402 thread */
1403 {
1404 #ifdef UNIV_SYNC_DEBUG
1405 ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
1406 #endif /* UNIV_SYNC_DEBUG */
1407
1408 if (trx->n_tickets_to_enter_innodb > 0) {
1409 /* We will pretend the thread is still inside InnoDB though it
1410 now leaves the InnoDB engine. In this way we save
1411 a lot of semaphore operations. srv_conc_force_exit_innodb is
1412 used to declare the thread definitely outside InnoDB. It
1413 should be called when there is a lock wait or an SQL statement
1414 ends. */
1415
1416 return;
1417 }
1418
1419 srv_conc_force_exit_innodb(trx);
1420 }
1421
1422 /*========================================================================*/
1423
1424 /*********************************************************************//**
1425 Normalizes init parameter values to use units we use inside InnoDB.
1426 @return DB_SUCCESS or error code */
1427 static
1428 ulint
srv_normalize_init_values(void)1429 srv_normalize_init_values(void)
1430 /*===========================*/
1431 {
1432 ulint n;
1433 ulint i;
1434
1435 n = srv_n_data_files;
1436
1437 for (i = 0; i < n; i++) {
1438 srv_data_file_sizes[i] = srv_data_file_sizes[i]
1439 * ((1024 * 1024) / UNIV_PAGE_SIZE);
1440 }
1441
1442 srv_last_file_size_max = srv_last_file_size_max
1443 * ((1024 * 1024) / UNIV_PAGE_SIZE);
1444
1445 srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
1446
1447 srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
1448
1449 srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
1450
1451 return(DB_SUCCESS);
1452 }
1453
1454 /*********************************************************************//**
1455 Boots the InnoDB server.
1456 @return DB_SUCCESS or error code */
1457 UNIV_INTERN
1458 ulint
srv_boot(void)1459 srv_boot(void)
1460 /*==========*/
1461 {
1462 ulint err;
1463
1464 /* Transform the init parameter values given by MySQL to
1465 use units we use inside InnoDB: */
1466
1467 err = srv_normalize_init_values();
1468
1469 if (err != DB_SUCCESS) {
1470 return(err);
1471 }
1472
1473 /* Initialize synchronization primitives, memory management, and thread
1474 local storage */
1475
1476 srv_general_init();
1477
1478 /* Initialize this module */
1479
1480 srv_init();
1481
1482 return(DB_SUCCESS);
1483 }
1484
1485 /*********************************************************************//**
1486 Reserves a slot in the thread table for the current MySQL OS thread.
1487 NOTE! The kernel mutex has to be reserved by the caller!
1488 @return reserved slot */
1489 static
1490 srv_slot_t*
srv_table_reserve_slot_for_mysql(void)1491 srv_table_reserve_slot_for_mysql(void)
1492 /*==================================*/
1493 {
1494 srv_slot_t* slot;
1495 ulint i;
1496
1497 ut_ad(mutex_own(&kernel_mutex));
1498
1499 i = 0;
1500 slot = srv_mysql_table + i;
1501
1502 while (slot->in_use) {
1503 i++;
1504
1505 if (UNIV_UNLIKELY(i >= OS_THREAD_MAX_N)) {
1506
1507 ut_print_timestamp(stderr);
1508
1509 fprintf(stderr,
1510 " InnoDB: There appear to be %lu MySQL"
1511 " threads currently waiting\n"
1512 "InnoDB: inside InnoDB, which is the"
1513 " upper limit. Cannot continue operation.\n"
1514 "InnoDB: We intentionally generate"
1515 " a seg fault to print a stack trace\n"
1516 "InnoDB: on Linux. But first we print"
1517 " a list of waiting threads.\n", (ulong) i);
1518
1519 for (i = 0; i < OS_THREAD_MAX_N; i++) {
1520
1521 slot = srv_mysql_table + i;
1522
1523 fprintf(stderr,
1524 "Slot %lu: thread type %lu,"
1525 " in use %lu, susp %lu, time %lu\n",
1526 (ulong) i,
1527 (ulong) slot->type,
1528 (ulong) slot->in_use,
1529 (ulong) slot->suspended,
1530 (ulong) difftime(ut_time(),
1531 slot->suspend_time));
1532 }
1533
1534 ut_error;
1535 }
1536
1537 slot = srv_mysql_table + i;
1538 }
1539
1540 ut_a(slot->in_use == FALSE);
1541
1542 slot->in_use = TRUE;
1543
1544 return(slot);
1545 }
1546
1547 /***************************************************************//**
1548 Puts a MySQL OS thread to wait for a lock to be released. If an error
1549 occurs during the wait trx->error_state associated with thr is
1550 != DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
1551 are possible errors. DB_DEADLOCK is returned if selective deadlock
1552 resolution chose this transaction as a victim. */
1553 UNIV_INTERN
1554 void
srv_suspend_mysql_thread(que_thr_t * thr)1555 srv_suspend_mysql_thread(
1556 /*=====================*/
1557 que_thr_t* thr) /*!< in: query thread associated with the MySQL
1558 OS thread */
1559 {
1560 srv_slot_t* slot;
1561 os_event_t event;
1562 double wait_time;
1563 trx_t* trx;
1564 ulint had_dict_lock;
1565 ibool was_declared_inside_innodb = FALSE;
1566 ib_int64_t start_time = 0;
1567 ib_int64_t finish_time;
1568 ulint diff_time;
1569 ulint sec;
1570 ulint ms;
1571 ulong lock_wait_timeout;
1572
1573 ut_ad(!mutex_own(&kernel_mutex));
1574
1575 trx = thr_get_trx(thr);
1576
1577 if (trx->mysql_thd != 0) {
1578 DEBUG_SYNC_C("srv_suspend_mysql_thread_enter");
1579 }
1580
1581 os_event_set(srv_lock_timeout_thread_event);
1582
1583 mutex_enter(&kernel_mutex);
1584
1585 trx->error_state = DB_SUCCESS;
1586
1587 if (thr->state == QUE_THR_RUNNING) {
1588
1589 ut_ad(thr->is_active == TRUE);
1590
1591 /* The lock has already been released or this transaction
1592 was chosen as a deadlock victim: no need to suspend */
1593
1594 if (trx->was_chosen_as_deadlock_victim) {
1595
1596 trx->error_state = DB_DEADLOCK;
1597 trx->was_chosen_as_deadlock_victim = FALSE;
1598 }
1599
1600 mutex_exit(&kernel_mutex);
1601
1602 return;
1603 }
1604
1605 ut_ad(thr->is_active == FALSE);
1606
1607 slot = srv_table_reserve_slot_for_mysql();
1608
1609 event = slot->event;
1610
1611 slot->thr = thr;
1612
1613 os_event_reset(event);
1614
1615 slot->suspend_time = ut_time();
1616
1617 if (thr->lock_state == QUE_THR_LOCK_ROW) {
1618 srv_n_lock_wait_count++;
1619 srv_n_lock_wait_current_count++;
1620
1621 if (ut_usectime(&sec, &ms) == -1) {
1622 start_time = -1;
1623 } else {
1624 start_time = (ib_int64_t) sec * 1000000 + ms;
1625 }
1626 }
1627 /* Wake the lock timeout monitor thread, if it is suspended */
1628
1629 os_event_set(srv_lock_timeout_thread_event);
1630
1631 mutex_exit(&kernel_mutex);
1632
1633 had_dict_lock = trx->dict_operation_lock_mode;
1634
1635 switch (had_dict_lock) {
1636 case RW_S_LATCH:
1637 /* Release foreign key check latch */
1638 row_mysql_unfreeze_data_dictionary(trx);
1639 break;
1640 case RW_X_LATCH:
1641 /* There should never be a lock wait when the
1642 dictionary latch is reserved in X mode. Dictionary
1643 transactions should only acquire locks on dictionary
1644 tables, not other tables. All access to dictionary
1645 tables should be covered by dictionary
1646 transactions. */
1647 ut_print_timestamp(stderr);
1648 fputs(" InnoDB: Error: dict X latch held in "
1649 "srv_suspend_mysql_thread\n", stderr);
1650 /* This should never occur. This incorrect handling
1651 was added in the early development of
1652 ha_innobase::add_index() in InnoDB Plugin 1.0. */
1653 /* Release fast index creation latch */
1654 row_mysql_unlock_data_dictionary(trx);
1655 break;
1656 }
1657
1658 ut_a(trx->dict_operation_lock_mode == 0);
1659
1660 if (trx->declared_to_be_inside_innodb) {
1661
1662 was_declared_inside_innodb = TRUE;
1663
1664 /* We must declare this OS thread to exit InnoDB, since a
1665 possible other thread holding a lock which this thread waits
1666 for must be allowed to enter, sooner or later */
1667
1668 srv_conc_force_exit_innodb(trx);
1669 }
1670
1671 /* Suspend this thread and wait for the event. */
1672
1673 thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_LOCK);
1674 os_event_wait(event);
1675 thd_wait_end(trx->mysql_thd);
1676
1677 #ifdef UNIV_SYNC_DEBUG
1678 ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
1679 #endif /* UNIV_SYNC_DEBUG */
1680
1681 if (was_declared_inside_innodb) {
1682
1683 /* Return back inside InnoDB */
1684
1685 srv_conc_force_enter_innodb(trx);
1686 }
1687
1688 /* After resuming, reacquire the data dictionary latch if
1689 necessary. */
1690
1691 switch (had_dict_lock) {
1692 case RW_S_LATCH:
1693 row_mysql_freeze_data_dictionary(trx);
1694 break;
1695 case RW_X_LATCH:
1696 /* This should never occur. This incorrect handling
1697 was added in the early development of
1698 ha_innobase::add_index() in InnoDB Plugin 1.0. */
1699 row_mysql_lock_data_dictionary(trx);
1700 break;
1701 }
1702
1703 mutex_enter(&kernel_mutex);
1704
1705 /* Release the slot for others to use */
1706
1707 slot->in_use = FALSE;
1708
1709 wait_time = ut_difftime(ut_time(), slot->suspend_time);
1710
1711 if (thr->lock_state == QUE_THR_LOCK_ROW) {
1712 if (ut_usectime(&sec, &ms) == -1) {
1713 finish_time = -1;
1714 } else {
1715 finish_time = (ib_int64_t) sec * 1000000 + ms;
1716 }
1717
1718 diff_time = (finish_time > start_time) ?
1719 (ulint) (finish_time - start_time) : 0;
1720
1721 srv_n_lock_wait_current_count--;
1722 srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
1723 if (diff_time > srv_n_lock_max_wait_time &&
1724 /* only update the variable if we successfully
1725 retrieved the start and finish times. See Bug#36819. */
1726 start_time != -1 && finish_time != -1) {
1727 srv_n_lock_max_wait_time = diff_time;
1728 }
1729
1730 /* Record the lock wait time for this thread */
1731 thd_set_lock_wait_time(trx->mysql_thd, diff_time);
1732 }
1733
1734 if (trx->was_chosen_as_deadlock_victim) {
1735
1736 trx->error_state = DB_DEADLOCK;
1737 trx->was_chosen_as_deadlock_victim = FALSE;
1738 }
1739
1740 mutex_exit(&kernel_mutex);
1741
1742 /* InnoDB system transactions (such as the purge, and
1743 incomplete transactions that are being rolled back after crash
1744 recovery) will use the global value of
1745 innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */
1746 lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd);
1747
1748 if (lock_wait_timeout < 100000000
1749 && wait_time > (double) lock_wait_timeout) {
1750
1751 trx->error_state = DB_LOCK_WAIT_TIMEOUT;
1752 }
1753
1754 if (trx_is_interrupted(trx)) {
1755
1756 trx->error_state = DB_INTERRUPTED;
1757 }
1758 }
1759
1760 /********************************************************************//**
1761 Releases a MySQL OS thread waiting for a lock to be released, if the
1762 thread is already suspended. */
1763 UNIV_INTERN
1764 void
srv_release_mysql_thread_if_suspended(que_thr_t * thr)1765 srv_release_mysql_thread_if_suspended(
1766 /*==================================*/
1767 que_thr_t* thr) /*!< in: query thread associated with the
1768 MySQL OS thread */
1769 {
1770 srv_slot_t* slot;
1771 ulint i;
1772
1773 ut_ad(mutex_own(&kernel_mutex));
1774
1775 for (i = 0; i < OS_THREAD_MAX_N; i++) {
1776
1777 slot = srv_mysql_table + i;
1778
1779 if (slot->in_use && slot->thr == thr) {
1780 /* Found */
1781
1782 os_event_set(slot->event);
1783
1784 return;
1785 }
1786 }
1787
1788 /* not found */
1789 }
1790
1791 /******************************************************************//**
1792 Refreshes the values used to calculate per-second averages. */
1793 static
1794 void
srv_refresh_innodb_monitor_stats(void)1795 srv_refresh_innodb_monitor_stats(void)
1796 /*==================================*/
1797 {
1798 mutex_enter(&srv_innodb_monitor_mutex);
1799
1800 srv_last_monitor_time = time(NULL);
1801
1802 os_aio_refresh_stats();
1803
1804 btr_cur_n_sea_old = btr_cur_n_sea;
1805 btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1806
1807 log_refresh_stats();
1808
1809 buf_refresh_io_stats_all();
1810
1811 srv_n_rows_inserted_old = srv_n_rows_inserted;
1812 srv_n_rows_updated_old = srv_n_rows_updated;
1813 srv_n_rows_deleted_old = srv_n_rows_deleted;
1814 srv_n_rows_read_old = srv_n_rows_read;
1815
1816 mutex_exit(&srv_innodb_monitor_mutex);
1817 }
1818
1819 /******************************************************************//**
1820 Outputs to a file the output of the InnoDB Monitor.
1821 @return FALSE if not all information printed
1822 due to failure to obtain necessary mutex */
1823 UNIV_INTERN
1824 ibool
srv_printf_innodb_monitor(FILE * file,ibool nowait,ulint * trx_start,ulint * trx_end)1825 srv_printf_innodb_monitor(
1826 /*======================*/
1827 FILE* file, /*!< in: output stream */
1828 ibool nowait, /*!< in: whether to wait for kernel mutex */
1829 ulint* trx_start, /*!< out: file position of the start of
1830 the list of active transactions */
1831 ulint* trx_end) /*!< out: file position of the end of
1832 the list of active transactions */
1833 {
1834 double time_elapsed;
1835 time_t current_time;
1836 ulint n_reserved;
1837 ibool ret;
1838
1839 mutex_enter(&srv_innodb_monitor_mutex);
1840
1841 current_time = time(NULL);
1842
1843 /* We add 0.001 seconds to time_elapsed to prevent division
1844 by zero if two users happen to call SHOW INNODB STATUS at the same
1845 time */
1846
1847 time_elapsed = difftime(current_time, srv_last_monitor_time)
1848 + 0.001;
1849
1850 srv_last_monitor_time = time(NULL);
1851
1852 fputs("\n=====================================\n", file);
1853
1854 ut_print_timestamp(file);
1855 fprintf(file,
1856 " INNODB MONITOR OUTPUT\n"
1857 "=====================================\n"
1858 "Per second averages calculated from the last %lu seconds\n",
1859 (ulong)time_elapsed);
1860
1861 fputs("-----------------\n"
1862 "BACKGROUND THREAD\n"
1863 "-----------------\n", file);
1864 srv_print_master_thread_info(file);
1865
1866 fputs("----------\n"
1867 "SEMAPHORES\n"
1868 "----------\n", file);
1869 sync_print(file);
1870
1871 /* Conceptually, srv_innodb_monitor_mutex has a very high latching
1872 order level in sync0sync.h, while dict_foreign_err_mutex has a very
1873 low level 135. Therefore we can reserve the latter mutex here without
1874 a danger of a deadlock of threads. */
1875
1876 mutex_enter(&dict_foreign_err_mutex);
1877
1878 if (ftell(dict_foreign_err_file) != 0L) {
1879 fputs("------------------------\n"
1880 "LATEST FOREIGN KEY ERROR\n"
1881 "------------------------\n", file);
1882 ut_copy_file(file, dict_foreign_err_file);
1883 }
1884
1885 mutex_exit(&dict_foreign_err_mutex);
1886
1887 /* Only if lock_print_info_summary proceeds correctly,
1888 before we call the lock_print_info_all_transactions
1889 to print all the lock information. */
1890 ret = lock_print_info_summary(file, nowait);
1891
1892 if (ret) {
1893 if (trx_start) {
1894 long t = ftell(file);
1895 if (t < 0) {
1896 *trx_start = ULINT_UNDEFINED;
1897 } else {
1898 *trx_start = (ulint) t;
1899 }
1900 }
1901 lock_print_info_all_transactions(file);
1902 if (trx_end) {
1903 long t = ftell(file);
1904 if (t < 0) {
1905 *trx_end = ULINT_UNDEFINED;
1906 } else {
1907 *trx_end = (ulint) t;
1908 }
1909 }
1910 }
1911
1912 fputs("--------\n"
1913 "FILE I/O\n"
1914 "--------\n", file);
1915 os_aio_print(file);
1916
1917 fputs("-------------------------------------\n"
1918 "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1919 "-------------------------------------\n", file);
1920 ibuf_print(file);
1921
1922 ha_print_info(file, btr_search_sys->hash_index);
1923
1924 fprintf(file,
1925 "%.2f hash searches/s, %.2f non-hash searches/s\n",
1926 (btr_cur_n_sea - btr_cur_n_sea_old)
1927 / time_elapsed,
1928 (btr_cur_n_non_sea - btr_cur_n_non_sea_old)
1929 / time_elapsed);
1930 btr_cur_n_sea_old = btr_cur_n_sea;
1931 btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1932
1933 fputs("---\n"
1934 "LOG\n"
1935 "---\n", file);
1936 log_print(file);
1937
1938 fputs("----------------------\n"
1939 "BUFFER POOL AND MEMORY\n"
1940 "----------------------\n", file);
1941 fprintf(file,
1942 "Total memory allocated " ULINTPF
1943 "; in additional pool allocated " ULINTPF "\n",
1944 ut_total_allocated_memory,
1945 mem_pool_get_reserved(mem_comm_pool));
1946 fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
1947 dict_sys->size);
1948
1949 buf_print_io(file);
1950
1951 fputs("--------------\n"
1952 "ROW OPERATIONS\n"
1953 "--------------\n", file);
1954 fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
1955 (long) srv_conc_n_threads,
1956 (ulong) srv_conc_n_waiting_threads);
1957
1958 fprintf(file, "%lu read views open inside InnoDB\n",
1959 UT_LIST_GET_LEN(trx_sys->view_list));
1960
1961 n_reserved = fil_space_get_n_reserved_extents(0);
1962 if (n_reserved > 0) {
1963 fprintf(file,
1964 "%lu tablespace extents now reserved for"
1965 " B-tree split operations\n",
1966 (ulong) n_reserved);
1967 }
1968
1969 #ifdef UNIV_LINUX
1970 fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
1971 (ulong) srv_main_thread_process_no,
1972 (ulong) srv_main_thread_id,
1973 srv_main_thread_op_info);
1974 #else
1975 fprintf(file, "Main thread id %lu, state: %s\n",
1976 (ulong) srv_main_thread_id,
1977 srv_main_thread_op_info);
1978 #endif
1979 fprintf(file,
1980 "Number of rows inserted " ULINTPF
1981 ", updated " ULINTPF ", deleted " ULINTPF
1982 ", read " ULINTPF "\n",
1983 srv_n_rows_inserted,
1984 srv_n_rows_updated,
1985 srv_n_rows_deleted,
1986 srv_n_rows_read);
1987 fprintf(file,
1988 "%.2f inserts/s, %.2f updates/s,"
1989 " %.2f deletes/s, %.2f reads/s\n",
1990 (srv_n_rows_inserted - srv_n_rows_inserted_old)
1991 / time_elapsed,
1992 (srv_n_rows_updated - srv_n_rows_updated_old)
1993 / time_elapsed,
1994 (srv_n_rows_deleted - srv_n_rows_deleted_old)
1995 / time_elapsed,
1996 (srv_n_rows_read - srv_n_rows_read_old)
1997 / time_elapsed);
1998
1999 srv_n_rows_inserted_old = srv_n_rows_inserted;
2000 srv_n_rows_updated_old = srv_n_rows_updated;
2001 srv_n_rows_deleted_old = srv_n_rows_deleted;
2002 srv_n_rows_read_old = srv_n_rows_read;
2003
2004 fputs("----------------------------\n"
2005 "END OF INNODB MONITOR OUTPUT\n"
2006 "============================\n", file);
2007 mutex_exit(&srv_innodb_monitor_mutex);
2008 fflush(file);
2009
2010 return(ret);
2011 }
2012
2013 /******************************************************************//**
2014 Function to pass InnoDB status variables to MySQL */
2015 UNIV_INTERN
2016 void
srv_export_innodb_status(void)2017 srv_export_innodb_status(void)
2018 /*==========================*/
2019 {
2020 buf_pool_stat_t stat;
2021 buf_pools_list_size_t buf_pools_list_size;
2022 ulint LRU_len;
2023 ulint free_len;
2024 ulint flush_list_len;
2025
2026 buf_get_total_stat(&stat);
2027 buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
2028 buf_get_total_list_size_in_bytes(&buf_pools_list_size);
2029
2030 mutex_enter(&srv_innodb_monitor_mutex);
2031
2032 export_vars.innodb_data_pending_reads
2033 = os_n_pending_reads;
2034 export_vars.innodb_data_pending_writes
2035 = os_n_pending_writes;
2036 export_vars.innodb_data_pending_fsyncs
2037 = fil_n_pending_log_flushes
2038 + fil_n_pending_tablespace_flushes;
2039 export_vars.innodb_data_fsyncs = os_n_fsyncs;
2040 export_vars.innodb_data_read = srv_data_read;
2041 export_vars.innodb_data_reads = os_n_file_reads;
2042 export_vars.innodb_data_writes = os_n_file_writes;
2043 export_vars.innodb_data_written = srv_data_written;
2044 export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
2045 export_vars.innodb_buffer_pool_write_requests
2046 = srv_buf_pool_write_requests;
2047 export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
2048 export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
2049 export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
2050 export_vars.innodb_buffer_pool_read_ahead_rnd
2051 = stat.n_ra_pages_read_rnd;
2052 export_vars.innodb_buffer_pool_read_ahead
2053 = stat.n_ra_pages_read;
2054 export_vars.innodb_buffer_pool_read_ahead_evicted
2055 = stat.n_ra_pages_evicted;
2056 export_vars.innodb_buffer_pool_pages_data = LRU_len;
2057 export_vars.innodb_buffer_pool_bytes_data =
2058 buf_pools_list_size.LRU_bytes
2059 + buf_pools_list_size.unzip_LRU_bytes;
2060 export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
2061 export_vars.innodb_buffer_pool_bytes_dirty =
2062 buf_pools_list_size.flush_list_bytes;
2063 export_vars.innodb_buffer_pool_pages_free = free_len;
2064 #ifdef UNIV_DEBUG
2065 export_vars.innodb_buffer_pool_pages_latched
2066 = buf_get_latched_pages_number();
2067 #endif /* UNIV_DEBUG */
2068 export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
2069
2070 export_vars.innodb_buffer_pool_pages_misc
2071 = buf_pool_get_n_pages() - LRU_len - free_len;
2072 #ifdef HAVE_ATOMIC_BUILTINS
2073 export_vars.innodb_have_atomic_builtins = 1;
2074 #else
2075 export_vars.innodb_have_atomic_builtins = 0;
2076 #endif
2077 export_vars.innodb_page_size = UNIV_PAGE_SIZE;
2078 export_vars.innodb_log_waits = srv_log_waits;
2079 export_vars.innodb_os_log_written = srv_os_log_written;
2080 export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
2081 export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
2082 export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
2083 export_vars.innodb_log_write_requests = srv_log_write_requests;
2084 export_vars.innodb_log_writes = srv_log_writes;
2085 export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
2086 export_vars.innodb_dblwr_writes = srv_dblwr_writes;
2087 export_vars.innodb_pages_created = stat.n_pages_created;
2088 export_vars.innodb_pages_read = stat.n_pages_read;
2089 export_vars.innodb_pages_written = stat.n_pages_written;
2090 export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
2091 export_vars.innodb_row_lock_current_waits
2092 = srv_n_lock_wait_current_count;
2093 export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
2094 if (srv_n_lock_wait_count > 0) {
2095 export_vars.innodb_row_lock_time_avg = (ulint)
2096 (srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
2097 } else {
2098 export_vars.innodb_row_lock_time_avg = 0;
2099 }
2100 export_vars.innodb_row_lock_time_max
2101 = srv_n_lock_max_wait_time / 1000;
2102 export_vars.innodb_rows_read = srv_n_rows_read;
2103 export_vars.innodb_rows_inserted = srv_n_rows_inserted;
2104 export_vars.innodb_rows_updated = srv_n_rows_updated;
2105 export_vars.innodb_rows_deleted = srv_n_rows_deleted;
2106 export_vars.innodb_truncated_status_writes = srv_truncated_status_writes;
2107
2108 #ifdef UNIV_DEBUG
2109 {
2110 trx_id_t done_trx_no;
2111 trx_id_t up_limit_id;
2112
2113 rw_lock_s_lock(&purge_sys->latch);
2114 done_trx_no = purge_sys->done_trx_no;
2115 up_limit_id = purge_sys->view
2116 ? purge_sys->view->up_limit_id
2117 : 0;
2118 rw_lock_s_unlock(&purge_sys->latch);
2119
2120 if (trx_sys->max_trx_id < done_trx_no) {
2121 export_vars.innodb_purge_trx_id_age = 0;
2122 } else {
2123 export_vars.innodb_purge_trx_id_age =
2124 trx_sys->max_trx_id - done_trx_no;
2125 }
2126
2127 if (!up_limit_id
2128 || trx_sys->max_trx_id < up_limit_id) {
2129 export_vars.innodb_purge_view_trx_id_age = 0;
2130 } else {
2131 export_vars.innodb_purge_view_trx_id_age =
2132 trx_sys->max_trx_id - up_limit_id;
2133 }
2134 }
2135 #endif /* UNIV_DEBUG */
2136
2137 mutex_exit(&srv_innodb_monitor_mutex);
2138 }
2139
2140 /*********************************************************************//**
2141 A thread which prints the info output by various InnoDB monitors.
2142 @return a dummy parameter */
2143 UNIV_INTERN
2144 os_thread_ret_t
srv_monitor_thread(void * arg)2145 srv_monitor_thread(
2146 /*===============*/
2147 void* arg __attribute__((unused)))
2148 /*!< in: a dummy parameter required by
2149 os_thread_create */
2150 {
2151 ib_int64_t sig_count;
2152 double time_elapsed;
2153 time_t current_time;
2154 time_t last_table_monitor_time;
2155 time_t last_tablespace_monitor_time;
2156 time_t last_monitor_time;
2157 ulint mutex_skipped;
2158 ibool last_srv_print_monitor;
2159
2160 #ifdef UNIV_DEBUG_THREAD_CREATION
2161 fprintf(stderr, "Lock timeout thread starts, id %lu\n",
2162 os_thread_pf(os_thread_get_curr_id()));
2163 #endif
2164
2165 #ifdef UNIV_PFS_THREAD
2166 pfs_register_thread(srv_monitor_thread_key);
2167 #endif
2168
2169 UT_NOT_USED(arg);
2170 srv_last_monitor_time = ut_time();
2171 last_table_monitor_time = ut_time();
2172 last_tablespace_monitor_time = ut_time();
2173 last_monitor_time = ut_time();
2174 mutex_skipped = 0;
2175 last_srv_print_monitor = srv_print_innodb_monitor;
2176 loop:
2177 srv_monitor_active = TRUE;
2178
2179 /* Wake up every 5 seconds to see if we need to print
2180 monitor information or if signalled at shutdown. */
2181
2182 sig_count = os_event_reset(srv_monitor_event);
2183
2184 os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
2185
2186 current_time = ut_time();
2187
2188 time_elapsed = difftime(current_time, last_monitor_time);
2189
2190 if (time_elapsed > 15) {
2191 last_monitor_time = ut_time();
2192
2193 if (srv_print_innodb_monitor) {
2194 /* Reset mutex_skipped counter everytime
2195 srv_print_innodb_monitor changes. This is to
2196 ensure we will not be blocked by kernel_mutex
2197 for short duration information printing,
2198 such as requested by sync_array_print_long_waits() */
2199 if (!last_srv_print_monitor) {
2200 mutex_skipped = 0;
2201 last_srv_print_monitor = TRUE;
2202 }
2203
2204 if (!srv_printf_innodb_monitor(stderr,
2205 MUTEX_NOWAIT(mutex_skipped),
2206 NULL, NULL)) {
2207 mutex_skipped++;
2208 } else {
2209 /* Reset the counter */
2210 mutex_skipped = 0;
2211 }
2212 } else {
2213 last_srv_print_monitor = FALSE;
2214 }
2215
2216
2217 if (srv_innodb_status) {
2218 mutex_enter(&srv_monitor_file_mutex);
2219 rewind(srv_monitor_file);
2220 if (!srv_printf_innodb_monitor(srv_monitor_file,
2221 MUTEX_NOWAIT(mutex_skipped),
2222 NULL, NULL)) {
2223 mutex_skipped++;
2224 } else {
2225 mutex_skipped = 0;
2226 }
2227
2228 os_file_set_eof(srv_monitor_file);
2229 mutex_exit(&srv_monitor_file_mutex);
2230 }
2231
2232 if (srv_print_innodb_tablespace_monitor
2233 && difftime(current_time,
2234 last_tablespace_monitor_time) > 60) {
2235 last_tablespace_monitor_time = ut_time();
2236
2237 fputs("========================"
2238 "========================\n",
2239 stderr);
2240
2241 ut_print_timestamp(stderr);
2242
2243 fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
2244 "========================"
2245 "========================\n",
2246 stderr);
2247
2248 fsp_print(0);
2249 fputs("Validating tablespace\n", stderr);
2250 fsp_validate(0);
2251 fputs("Validation ok\n"
2252 "---------------------------------------\n"
2253 "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
2254 "=======================================\n",
2255 stderr);
2256 }
2257
2258 if (srv_print_innodb_table_monitor
2259 && difftime(current_time, last_table_monitor_time) > 60) {
2260
2261 last_table_monitor_time = ut_time();
2262
2263 fputs("===========================================\n",
2264 stderr);
2265
2266 ut_print_timestamp(stderr);
2267
2268 fputs(" INNODB TABLE MONITOR OUTPUT\n"
2269 "===========================================\n",
2270 stderr);
2271 dict_print();
2272
2273 fputs("-----------------------------------\n"
2274 "END OF INNODB TABLE MONITOR OUTPUT\n"
2275 "==================================\n",
2276 stderr);
2277 }
2278 }
2279
2280 if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
2281 goto exit_func;
2282 }
2283
2284 if (srv_print_innodb_monitor
2285 || srv_print_innodb_lock_monitor
2286 || srv_print_innodb_tablespace_monitor
2287 || srv_print_innodb_table_monitor) {
2288 goto loop;
2289 }
2290
2291 srv_monitor_active = FALSE;
2292
2293 goto loop;
2294
2295 exit_func:
2296 srv_monitor_active = FALSE;
2297
2298 /* We count the number of threads in os_thread_exit(). A created
2299 thread should always use that to exit and not use return() to exit. */
2300
2301 os_thread_exit(NULL);
2302
2303 OS_THREAD_DUMMY_RETURN;
2304 }
2305
2306 /*********************************************************************//**
2307 A thread which wakes up threads whose lock wait may have lasted too long.
2308 @return a dummy parameter */
2309 UNIV_INTERN
2310 os_thread_ret_t
srv_lock_timeout_thread(void * arg)2311 srv_lock_timeout_thread(
2312 /*====================*/
2313 void* arg __attribute__((unused)))
2314 /* in: a dummy parameter required by
2315 os_thread_create */
2316 {
2317 srv_slot_t* slot;
2318 ibool some_waits;
2319 double wait_time;
2320 ulint i;
2321 ib_int64_t sig_count;
2322
2323 #ifdef UNIV_PFS_THREAD
2324 pfs_register_thread(srv_lock_timeout_thread_key);
2325 #endif
2326
2327 loop:
2328
2329 /* When someone is waiting for a lock, we wake up every second
2330 and check if a timeout has passed for a lock wait */
2331
2332 sig_count = os_event_reset(srv_timeout_event);
2333
2334 os_event_wait_time_low(srv_timeout_event, 1000000, sig_count);
2335
2336 srv_lock_timeout_active = TRUE;
2337
2338 mutex_enter(&kernel_mutex);
2339
2340 some_waits = FALSE;
2341
2342 /* Check of all slots if a thread is waiting there, and if it
2343 has exceeded the time limit */
2344
2345 for (i = 0; i < OS_THREAD_MAX_N; i++) {
2346
2347 slot = srv_mysql_table + i;
2348
2349 if (slot->in_use) {
2350 trx_t* trx;
2351 ulong lock_wait_timeout;
2352
2353 some_waits = TRUE;
2354
2355 wait_time = ut_difftime(ut_time(), slot->suspend_time);
2356
2357 trx = thr_get_trx(slot->thr);
2358 lock_wait_timeout = thd_lock_wait_timeout(
2359 trx->mysql_thd);
2360
2361 if (trx_is_interrupted(trx)
2362 || (lock_wait_timeout < 100000000
2363 && (wait_time > (double) lock_wait_timeout
2364 || wait_time < 0))) {
2365
2366 /* Timeout exceeded or a wrap-around in system
2367 time counter: cancel the lock request queued
2368 by the transaction and release possible
2369 other transactions waiting behind; it is
2370 possible that the lock has already been
2371 granted: in that case do nothing */
2372
2373 if (trx->wait_lock) {
2374 lock_cancel_waiting_and_release(
2375 trx->wait_lock);
2376 }
2377 }
2378 }
2379 }
2380
2381 os_event_reset(srv_lock_timeout_thread_event);
2382
2383 mutex_exit(&kernel_mutex);
2384
2385 if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
2386 goto exit_func;
2387 }
2388
2389 if (some_waits) {
2390 goto loop;
2391 }
2392
2393 srv_lock_timeout_active = FALSE;
2394
2395 #if 0
2396 /* The following synchronisation is disabled, since
2397 the InnoDB monitor output is to be updated every 15 seconds. */
2398 os_event_wait(srv_lock_timeout_thread_event);
2399 #endif
2400 goto loop;
2401
2402 exit_func:
2403 srv_lock_timeout_active = FALSE;
2404
2405 /* We count the number of threads in os_thread_exit(). A created
2406 thread should always use that to exit and not use return() to exit. */
2407
2408 os_thread_exit(NULL);
2409
2410 OS_THREAD_DUMMY_RETURN;
2411 }
2412
2413 /*********************************************************************//**
2414 A thread which prints warnings about semaphore waits which have lasted
2415 too long. These can be used to track bugs which cause hangs.
2416 Note: In order to make sync_arr_wake_threads_if_sema_free work as expected,
2417 we should avoid waiting any mutexes in this function!
2418 @return a dummy parameter */
2419 UNIV_INTERN
2420 os_thread_ret_t
srv_error_monitor_thread(void * arg)2421 srv_error_monitor_thread(
2422 /*=====================*/
2423 void* arg __attribute__((unused)))
2424 /*!< in: a dummy parameter required by
2425 os_thread_create */
2426 {
2427 /* number of successive fatal timeouts observed */
2428 ulint fatal_cnt = 0;
2429 ib_uint64_t old_lsn;
2430 ib_uint64_t new_lsn;
2431 ib_int64_t sig_count;
2432 /* longest waiting thread for a semaphore */
2433 os_thread_id_t waiter = os_thread_get_curr_id();
2434 os_thread_id_t old_waiter = waiter;
2435 /* the semaphore that is being waited for */
2436 const void* sema = NULL;
2437 const void* old_sema = NULL;
2438
2439 old_lsn = srv_start_lsn;
2440
2441 #ifdef UNIV_DEBUG_THREAD_CREATION
2442 fprintf(stderr, "Error monitor thread starts, id %lu\n",
2443 os_thread_pf(os_thread_get_curr_id()));
2444 #endif
2445
2446 #ifdef UNIV_PFS_THREAD
2447 pfs_register_thread(srv_error_monitor_thread_key);
2448 #endif
2449
2450 loop:
2451 srv_error_monitor_active = TRUE;
2452
2453 /* Try to track a strange bug reported by Harald Fuchs and others,
2454 where the lsn seems to decrease at times */
2455 if (log_peek_lsn(&new_lsn)) {
2456 if (new_lsn < old_lsn) {
2457 ut_print_timestamp(stderr);
2458 fprintf(stderr,
2459 " InnoDB: Error: old log sequence number %llu"
2460 " was greater\n"
2461 "InnoDB: than the new log sequence number %llu!\n"
2462 "InnoDB: Please submit a bug report"
2463 " to http://bugs.mysql.com\n",
2464 old_lsn, new_lsn);
2465 ut_ad(0);
2466 }
2467
2468 old_lsn = new_lsn;
2469 }
2470
2471 if (difftime(time(NULL), srv_last_monitor_time) > 60) {
2472 /* We referesh InnoDB Monitor values so that averages are
2473 printed from at most 60 last seconds */
2474
2475 srv_refresh_innodb_monitor_stats();
2476 }
2477
2478 /* Update the statistics collected for deciding LRU
2479 eviction policy. */
2480 buf_LRU_stat_update();
2481
2482 /* Update the statistics collected for flush rate policy. */
2483 buf_flush_stat_update();
2484
2485 /* In case mutex_exit is not a memory barrier, it is
2486 theoretically possible some threads are left waiting though
2487 the semaphore is already released. Wake up those threads: */
2488
2489 sync_arr_wake_threads_if_sema_free();
2490
2491 if (sync_array_print_long_waits(&waiter, &sema)
2492 && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
2493 fatal_cnt++;
2494 if (fatal_cnt > 10) {
2495
2496 fprintf(stderr,
2497 "InnoDB: Error: semaphore wait has lasted"
2498 " > %lu seconds\n"
2499 "InnoDB: We intentionally crash the server,"
2500 " because it appears to be hung.\n",
2501 (ulong) srv_fatal_semaphore_wait_threshold);
2502
2503 ut_error;
2504 }
2505 } else {
2506 fatal_cnt = 0;
2507 old_waiter = waiter;
2508 old_sema = sema;
2509 }
2510
2511 /* Flush stderr so that a database user gets the output
2512 to possible MySQL error file */
2513
2514 fflush(stderr);
2515
2516 sig_count = os_event_reset(srv_error_event);
2517
2518 os_event_wait_time_low(srv_error_event, 1000000, sig_count);
2519
2520 if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
2521
2522 goto loop;
2523 }
2524
2525 srv_error_monitor_active = FALSE;
2526
2527 /* We count the number of threads in os_thread_exit(). A created
2528 thread should always use that to exit and not use return() to exit. */
2529
2530 os_thread_exit(NULL);
2531
2532 OS_THREAD_DUMMY_RETURN;
2533 }
2534
2535 /**********************************************************************//**
2536 Check whether any background thread is active. If so return the thread
2537 type
2538 @return ULINT_UNDEFINED if all are suspended or have exited, thread
2539 type if any are still active. */
2540 UNIV_INTERN
2541 ulint
srv_get_active_thread_type(void)2542 srv_get_active_thread_type(void)
2543 /*============================*/
2544 {
2545 ulint i;
2546 ibool ret = ULINT_UNDEFINED;
2547
2548 mutex_enter(&kernel_mutex);
2549
2550 for (i = 0; i <= SRV_MASTER; ++i) {
2551 if (srv_n_threads_active[i] != 0) {
2552 ret = i;
2553 break;
2554 }
2555 }
2556
2557 mutex_exit(&kernel_mutex);
2558
2559 return(ret);
2560 }
2561
2562 /*********************************************************************//**
2563 This function prints progress message every 60 seconds during server
2564 shutdown, for any activities that master thread is pending on. */
2565 static
2566 void
srv_shutdown_print_master_pending(ib_time_t * last_print_time,ulint n_tables_to_drop,ulint n_bytes_merged,ulint n_pages_flushed)2567 srv_shutdown_print_master_pending(
2568 /*==============================*/
2569 ib_time_t* last_print_time, /*!< last time the function
2570 print the message */
2571 ulint n_tables_to_drop, /*!< number of tables to
2572 be dropped */
2573 ulint n_bytes_merged, /*!< number of change buffer
2574 just merged */
2575 ulint n_pages_flushed) /*!< number of pages flushed */
2576 {
2577 ib_time_t current_time;
2578 double time_elapsed;
2579
2580 current_time = ut_time();
2581 time_elapsed = ut_difftime(current_time, *last_print_time);
2582
2583 if (time_elapsed > 60) {
2584 *last_print_time = ut_time();
2585
2586 if (n_tables_to_drop) {
2587 ut_print_timestamp(stderr);
2588 fprintf(stderr, " InnoDB: Waiting for "
2589 "%lu table(s) to be dropped\n",
2590 (ulong) n_tables_to_drop);
2591 }
2592
2593 /* Check change buffer merge, we only wait for change buffer
2594 merge if it is a slow shutdown */
2595 if (!srv_fast_shutdown && n_bytes_merged) {
2596 ut_print_timestamp(stderr);
2597 fprintf(stderr, " InnoDB: Waiting for change "
2598 "buffer merge to complete\n"
2599 " InnoDB: number of bytes of change buffer "
2600 "just merged: %lu\n",
2601 n_bytes_merged);
2602 }
2603
2604 if (n_pages_flushed) {
2605 ut_print_timestamp(stderr);
2606 fprintf(stderr, " InnoDB: Waiting for "
2607 "%lu pages to be flushed\n",
2608 (ulong) n_pages_flushed);
2609 }
2610 }
2611 }
2612
2613 /*******************************************************************//**
2614 Tells the InnoDB server that there has been activity in the database
2615 and wakes up the master thread if it is suspended (not sleeping). Used
2616 in the MySQL interface. Note that there is a small chance that the master
2617 thread stays suspended (we do not protect our operation with the
2618 srv_sys_t->mutex, for performance reasons). */
2619 UNIV_INTERN
2620 void
srv_active_wake_master_thread(void)2621 srv_active_wake_master_thread(void)
2622 /*===============================*/
2623 {
2624 srv_activity_count++;
2625
2626 if (srv_n_threads_active[SRV_MASTER] == 0) {
2627
2628 mutex_enter(&kernel_mutex);
2629
2630 srv_release_threads(SRV_MASTER, 1);
2631
2632 mutex_exit(&kernel_mutex);
2633 }
2634 }
2635
2636 /*******************************************************************//**
2637 Tells the purge thread that there has been activity in the database
2638 and wakes up the purge thread if it is suspended (not sleeping). Note
2639 that there is a small chance that the purge thread stays suspended
2640 (we do not protect our operation with the kernel mutex, for
2641 performace reasons). */
2642 UNIV_INTERN
2643 void
srv_wake_purge_thread_if_not_active(void)2644 srv_wake_purge_thread_if_not_active(void)
2645 /*=====================================*/
2646 {
2647 ut_ad(!mutex_own(&kernel_mutex));
2648
2649 if (srv_n_purge_threads > 0
2650 && srv_n_threads_active[SRV_WORKER] == 0) {
2651
2652 mutex_enter(&kernel_mutex);
2653
2654 srv_release_threads(SRV_WORKER, 1);
2655
2656 mutex_exit(&kernel_mutex);
2657 }
2658 }
2659
2660 /*******************************************************************//**
2661 Wakes up the master thread if it is suspended or being suspended. */
2662 UNIV_INTERN
2663 void
srv_wake_master_thread(void)2664 srv_wake_master_thread(void)
2665 /*========================*/
2666 {
2667 srv_activity_count++;
2668
2669 mutex_enter(&kernel_mutex);
2670
2671 srv_release_threads(SRV_MASTER, 1);
2672
2673 mutex_exit(&kernel_mutex);
2674 }
2675
2676 /*******************************************************************//**
2677 Wakes up the purge thread if it's not already awake. */
2678 UNIV_INTERN
2679 void
srv_wake_purge_thread(void)2680 srv_wake_purge_thread(void)
2681 /*=======================*/
2682 {
2683 ut_ad(!mutex_own(&kernel_mutex));
2684
2685 if (srv_n_purge_threads > 0) {
2686
2687 mutex_enter(&kernel_mutex);
2688
2689 srv_release_threads(SRV_WORKER, 1);
2690
2691 mutex_exit(&kernel_mutex);
2692 }
2693 }
2694
2695 /**********************************************************************
2696 The master thread is tasked to ensure that flush of log file happens
2697 once every second in the background. This is to ensure that not more
2698 than one second of trxs are lost in case of crash when
2699 innodb_flush_logs_at_trx_commit != 1 */
2700 static
2701 void
srv_sync_log_buffer_in_background(void)2702 srv_sync_log_buffer_in_background(void)
2703 /*===================================*/
2704 {
2705 time_t current_time = time(NULL);
2706
2707 srv_main_thread_op_info = "flushing log";
2708 if (difftime(current_time, srv_last_log_flush_time) >= 1) {
2709 log_buffer_sync_in_background(TRUE);
2710 srv_last_log_flush_time = current_time;
2711 srv_log_writes_and_flush++;
2712 }
2713 }
2714
2715 /********************************************************************//**
2716 Do a full purge, reconfigure the purge sub-system if a dynamic
2717 change is detected. */
2718 static
2719 void
srv_master_do_purge(void)2720 srv_master_do_purge(void)
2721 /*=====================*/
2722 {
2723 ulint n_pages_purged;
2724
2725 ut_ad(!mutex_own(&kernel_mutex));
2726
2727 ut_a(srv_n_purge_threads == 0);
2728
2729 do {
2730 /* Check for shutdown and change in purge config. */
2731 if (srv_fast_shutdown && srv_shutdown_state > 0) {
2732 /* Nothing to purge. */
2733 n_pages_purged = 0;
2734 } else {
2735 n_pages_purged = trx_purge(srv_purge_batch_size);
2736 }
2737
2738 srv_sync_log_buffer_in_background();
2739
2740 } while (n_pages_purged > 0);
2741 }
2742
2743 /*********************************************************************//**
2744 The master thread controlling the server.
2745 @return a dummy parameter */
2746 UNIV_INTERN
2747 os_thread_ret_t
srv_master_thread(void * arg)2748 srv_master_thread(
2749 /*==============*/
2750 void* arg __attribute__((unused)))
2751 /*!< in: a dummy parameter required by
2752 os_thread_create */
2753 {
2754 buf_pool_stat_t buf_stat;
2755 srv_slot_t* slot;
2756 ulint old_activity_count;
2757 ulint n_pages_purged = 0;
2758 ulint n_bytes_merged;
2759 ulint n_pages_flushed;
2760 ulint n_bytes_archived;
2761 ulint n_tables_to_drop;
2762 ulint n_ios;
2763 ulint n_ios_old;
2764 ulint n_ios_very_old;
2765 ulint n_pend_ios;
2766 ulint next_itr_time;
2767 ulint i;
2768 ib_time_t last_print_time;
2769
2770 my_thread_init();
2771 #ifdef UNIV_DEBUG_THREAD_CREATION
2772 fprintf(stderr, "Master thread starts, id %lu\n",
2773 os_thread_pf(os_thread_get_curr_id()));
2774 #endif
2775
2776 #ifdef UNIV_PFS_THREAD
2777 pfs_register_thread(srv_master_thread_key);
2778 #endif
2779
2780 srv_main_thread_process_no = os_proc_get_number();
2781 srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
2782
2783 mutex_enter(&kernel_mutex);
2784
2785 slot = srv_table_reserve_slot(SRV_MASTER);
2786
2787 srv_n_threads_active[SRV_MASTER]++;
2788
2789 mutex_exit(&kernel_mutex);
2790
2791 last_print_time = ut_time();
2792 loop:
2793 /*****************************************************************/
2794 /* ---- When there is database activity by users, we cycle in this
2795 loop */
2796
2797 srv_main_thread_op_info = "reserving kernel mutex";
2798
2799 buf_get_total_stat(&buf_stat);
2800 n_ios_very_old = log_sys->n_log_ios + buf_stat.n_pages_read
2801 + buf_stat.n_pages_written;
2802 mutex_enter(&kernel_mutex);
2803
2804 /* Store the user activity counter at the start of this loop */
2805 old_activity_count = srv_activity_count;
2806
2807 mutex_exit(&kernel_mutex);
2808
2809 if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2810
2811 goto suspend_thread;
2812 }
2813
2814 /* ---- We run the following loop approximately once per second
2815 when there is database activity */
2816
2817 srv_last_log_flush_time = time(NULL);
2818
2819 /* Sleep for 1 second on entrying the for loop below the first time. */
2820 next_itr_time = ut_time_ms() + 1000;
2821
2822 for (i = 0; i < 10; i++) {
2823 ulint cur_time = ut_time_ms();
2824
2825 #ifdef UNIV_DEBUG
2826 if (btr_cur_limit_optimistic_insert_debug
2827 && srv_n_purge_threads == 0) {
2828 /* If btr_cur_limit_optimistic_insert_debug is enabled
2829 and no purge_threads, purge opportunity is increased
2830 by x100 (1purge/100msec), to speed up debug scripts
2831 which should wait for purged. */
2832 next_itr_time -= 900;
2833
2834 srv_main_thread_op_info = "master purging";
2835
2836 srv_master_do_purge();
2837
2838 if (srv_fast_shutdown && srv_shutdown_state > 0) {
2839
2840 goto background_loop;
2841 }
2842 }
2843 #endif /* UNIV_DEBUG */
2844
2845 /* ALTER TABLE in MySQL requires on Unix that the table handler
2846 can drop tables lazily after there no longer are SELECT
2847 queries to them. */
2848
2849 srv_main_thread_op_info = "doing background drop tables";
2850
2851 row_drop_tables_for_mysql_in_background();
2852
2853 srv_main_thread_op_info = "";
2854
2855 if (srv_fast_shutdown && srv_shutdown_state > 0) {
2856
2857 goto background_loop;
2858 }
2859
2860 buf_get_total_stat(&buf_stat);
2861
2862 n_ios_old = log_sys->n_log_ios + buf_stat.n_pages_read
2863 + buf_stat.n_pages_written;
2864
2865 srv_main_thread_op_info = "sleeping";
2866 srv_main_1_second_loops++;
2867
2868 if (next_itr_time > cur_time
2869 && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
2870
2871 /* Get sleep interval in micro seconds. We use
2872 ut_min() to avoid long sleep in case of
2873 wrap around. */
2874 os_thread_sleep(ut_min(1000000,
2875 (next_itr_time - cur_time)
2876 * 1000));
2877 srv_main_sleeps++;
2878 }
2879
2880 /* Each iteration should happen at 1 second interval. */
2881 next_itr_time = ut_time_ms() + 1000;
2882
2883 /* Flush logs if needed */
2884 srv_sync_log_buffer_in_background();
2885
2886 srv_main_thread_op_info = "making checkpoint";
2887 log_free_check();
2888
2889 /* If i/os during one second sleep were less than 5% of
2890 capacity, we assume that there is free disk i/o capacity
2891 available, and it makes sense to do an insert buffer merge. */
2892
2893 buf_get_total_stat(&buf_stat);
2894 n_pend_ios = buf_get_n_pending_ios()
2895 + log_sys->n_pending_writes;
2896 n_ios = log_sys->n_log_ios + buf_stat.n_pages_read
2897 + buf_stat.n_pages_written;
2898 if (n_pend_ios < SRV_PEND_IO_THRESHOLD
2899 && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
2900 srv_main_thread_op_info = "doing insert buffer merge";
2901 ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
2902
2903 /* Flush logs if needed */
2904 srv_sync_log_buffer_in_background();
2905 }
2906
2907 if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
2908 > srv_max_buf_pool_modified_pct)) {
2909
2910 /* Try to keep the number of modified pages in the
2911 buffer pool under the limit wished by the user */
2912
2913 srv_main_thread_op_info =
2914 "flushing buffer pool pages";
2915 n_pages_flushed = buf_flush_list(
2916 PCT_IO(100), IB_ULONGLONG_MAX);
2917
2918 } else if (srv_adaptive_flushing) {
2919
2920 /* Try to keep the rate of flushing of dirty
2921 pages such that redo log generation does not
2922 produce bursts of IO at checkpoint time. */
2923 ulint n_flush = buf_flush_get_desired_flush_rate();
2924
2925 if (n_flush) {
2926 srv_main_thread_op_info =
2927 "flushing buffer pool pages";
2928 n_flush = ut_min(PCT_IO(100), n_flush);
2929 n_pages_flushed =
2930 buf_flush_list(
2931 n_flush,
2932 IB_ULONGLONG_MAX);
2933 }
2934 }
2935
2936 if (srv_activity_count == old_activity_count) {
2937
2938 /* There is no user activity at the moment, go to
2939 the background loop */
2940
2941 goto background_loop;
2942 }
2943 }
2944
2945 /* ---- We perform the following code approximately once per
2946 10 seconds when there is database activity */
2947
2948 #ifdef MEM_PERIODIC_CHECK
2949 /* Check magic numbers of every allocated mem block once in 10
2950 seconds */
2951 mem_validate_all_blocks();
2952 #endif
2953 /* If i/os during the 10 second period were less than 200% of
2954 capacity, we assume that there is free disk i/o capacity
2955 available, and it makes sense to flush srv_io_capacity pages.
2956
2957 Note that this is done regardless of the fraction of dirty
2958 pages relative to the max requested by the user. The one second
2959 loop above requests writes for that case. The writes done here
2960 are not required, and may be disabled. */
2961
2962 buf_get_total_stat(&buf_stat);
2963 n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
2964 n_ios = log_sys->n_log_ios + buf_stat.n_pages_read
2965 + buf_stat.n_pages_written;
2966
2967 srv_main_10_second_loops++;
2968 if (n_pend_ios < SRV_PEND_IO_THRESHOLD
2969 && (n_ios - n_ios_very_old < SRV_PAST_IO_ACTIVITY)) {
2970
2971 srv_main_thread_op_info = "flushing buffer pool pages";
2972 buf_flush_list(PCT_IO(100), IB_ULONGLONG_MAX);
2973
2974 /* Flush logs if needed */
2975 srv_sync_log_buffer_in_background();
2976 }
2977
2978 /* We run a batch of insert buffer merge every 10 seconds,
2979 even if the server were active */
2980
2981 srv_main_thread_op_info = "doing insert buffer merge";
2982 ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
2983
2984 /* Flush logs if needed */
2985 srv_sync_log_buffer_in_background();
2986
2987 if (srv_n_purge_threads == 0) {
2988 srv_main_thread_op_info = "master purging";
2989
2990 srv_master_do_purge();
2991
2992 if (srv_fast_shutdown && srv_shutdown_state > 0) {
2993
2994 goto background_loop;
2995 }
2996 }
2997
2998 srv_main_thread_op_info = "flushing buffer pool pages";
2999
3000 /* Flush a few oldest pages to make a new checkpoint younger */
3001
3002 if (buf_get_modified_ratio_pct() > 70) {
3003
3004 /* If there are lots of modified pages in the buffer pool
3005 (> 70 %), we assume we can afford reserving the disk(s) for
3006 the time it requires to flush 100 pages */
3007
3008 n_pages_flushed = buf_flush_list(
3009 PCT_IO(100), IB_ULONGLONG_MAX);
3010 } else {
3011 /* Otherwise, we only flush a small number of pages so that
3012 we do not unnecessarily use much disk i/o capacity from
3013 other work */
3014
3015 n_pages_flushed = buf_flush_list(
3016 PCT_IO(10), IB_ULONGLONG_MAX);
3017 }
3018
3019 srv_main_thread_op_info = "making checkpoint";
3020
3021 /* Make a new checkpoint about once in 10 seconds */
3022
3023 log_checkpoint(TRUE, FALSE);
3024
3025 srv_main_thread_op_info = "reserving kernel mutex";
3026
3027 mutex_enter(&kernel_mutex);
3028
3029 /* ---- When there is database activity, we jump from here back to
3030 the start of loop */
3031
3032 if (srv_activity_count != old_activity_count) {
3033 mutex_exit(&kernel_mutex);
3034 goto loop;
3035 }
3036
3037 mutex_exit(&kernel_mutex);
3038
3039 /* If the database is quiet, we enter the background loop */
3040
3041 /*****************************************************************/
3042 background_loop:
3043 /* ---- In this loop we run background operations when the server
3044 is quiet from user activity. Also in the case of a shutdown, we
3045 loop here, flushing the buffer pool to the data files. */
3046
3047 /* The server has been quiet for a while: start running background
3048 operations */
3049 srv_main_background_loops++;
3050 srv_main_thread_op_info = "doing background drop tables";
3051
3052 n_tables_to_drop = row_drop_tables_for_mysql_in_background();
3053
3054 if (n_tables_to_drop > 0) {
3055 /* Do not monopolize the CPU even if there are tables waiting
3056 in the background drop queue. (It is essentially a bug if
3057 MySQL tries to drop a table while there are still open handles
3058 to it and we had to put it to the background drop queue.) */
3059
3060 if (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
3061 os_thread_sleep(100000);
3062 }
3063 }
3064
3065 if (srv_n_purge_threads == 0) {
3066 srv_main_thread_op_info = "master purging";
3067
3068 srv_master_do_purge();
3069 }
3070
3071 srv_main_thread_op_info = "reserving kernel mutex";
3072
3073 mutex_enter(&kernel_mutex);
3074 if (srv_activity_count != old_activity_count) {
3075 mutex_exit(&kernel_mutex);
3076 goto loop;
3077 }
3078 mutex_exit(&kernel_mutex);
3079
3080 srv_main_thread_op_info = "doing insert buffer merge";
3081
3082 if (srv_fast_shutdown && srv_shutdown_state > 0) {
3083 n_bytes_merged = 0;
3084 } else {
3085 /* This should do an amount of IO similar to the number of
3086 dirty pages that will be flushed in the call to
3087 buf_flush_list below. Otherwise, the system favors
3088 clean pages over cleanup throughput. */
3089 n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
3090 PCT_IO(100));
3091 }
3092
3093 srv_main_thread_op_info = "reserving kernel mutex";
3094
3095 mutex_enter(&kernel_mutex);
3096 if (srv_activity_count != old_activity_count) {
3097 mutex_exit(&kernel_mutex);
3098 goto loop;
3099 }
3100 mutex_exit(&kernel_mutex);
3101
3102 flush_loop:
3103 srv_main_thread_op_info = "flushing buffer pool pages";
3104 srv_main_flush_loops++;
3105 if (srv_fast_shutdown < 2 || srv_shutdown_state == SRV_SHUTDOWN_NONE) {
3106 n_pages_flushed = buf_flush_list(
3107 PCT_IO(100), IB_ULONGLONG_MAX);
3108 } else {
3109 /* In the fastest shutdown we do not flush the buffer pool
3110 to data files: we set n_pages_flushed to 0 artificially. */
3111 ut_ad(srv_fast_shutdown == 2);
3112 ut_ad(srv_shutdown_state > 0);
3113
3114 n_pages_flushed = 0;
3115
3116 DBUG_PRINT("master", ("doing very fast shutdown"));
3117 }
3118
3119 srv_main_thread_op_info = "reserving kernel mutex";
3120
3121 mutex_enter(&kernel_mutex);
3122 if (srv_activity_count != old_activity_count) {
3123 mutex_exit(&kernel_mutex);
3124 goto loop;
3125 }
3126 mutex_exit(&kernel_mutex);
3127
3128 srv_main_thread_op_info = "waiting for buffer pool flush to end";
3129 buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
3130
3131 /* Flush logs if needed */
3132 srv_sync_log_buffer_in_background();
3133
3134 srv_main_thread_op_info = "making checkpoint";
3135
3136 log_checkpoint(TRUE, FALSE);
3137
3138 if (!(srv_fast_shutdown == 2 && srv_shutdown_state > 0)
3139 && (buf_get_modified_ratio_pct()
3140 > srv_max_buf_pool_modified_pct)) {
3141
3142 /* If the server is doing a very fast shutdown, then
3143 we will not come here. */
3144
3145 /* Try to keep the number of modified pages in the
3146 buffer pool under the limit wished by the user */
3147
3148 goto flush_loop;
3149 }
3150
3151 srv_main_thread_op_info = "reserving kernel mutex";
3152
3153 mutex_enter(&kernel_mutex);
3154 if (srv_activity_count != old_activity_count) {
3155 mutex_exit(&kernel_mutex);
3156 goto loop;
3157 }
3158 mutex_exit(&kernel_mutex);
3159 /*
3160 srv_main_thread_op_info = "archiving log (if log archive is on)";
3161
3162 log_archive_do(FALSE, &n_bytes_archived);
3163 */
3164 n_bytes_archived = 0;
3165
3166 /* Print progress message every 60 seconds during shutdown */
3167 if (srv_shutdown_state > 0 && srv_print_verbose_log) {
3168 srv_shutdown_print_master_pending(&last_print_time,
3169 n_tables_to_drop,
3170 n_bytes_merged,
3171 n_pages_flushed);
3172 }
3173
3174 /* Keep looping in the background loop if still work to do */
3175
3176 if (srv_fast_shutdown && srv_shutdown_state > 0) {
3177 if (n_tables_to_drop + n_pages_flushed
3178 + n_bytes_archived != 0) {
3179
3180 /* If we are doing a fast shutdown (= the default)
3181 we do not do purge or insert buffer merge. But we
3182 flush the buffer pool completely to disk.
3183 In a 'very fast' shutdown we do not flush the buffer
3184 pool to data files: we have set n_pages_flushed to
3185 0 artificially. */
3186
3187 goto background_loop;
3188 }
3189 } else if (n_tables_to_drop
3190 + n_pages_purged + n_bytes_merged + n_pages_flushed
3191 + n_bytes_archived != 0) {
3192
3193 /* In a 'slow' shutdown we run purge and the insert buffer
3194 merge to completion */
3195
3196 goto background_loop;
3197 }
3198
3199 /* There is no work for background operations either: suspend
3200 master thread to wait for more server activity */
3201
3202 suspend_thread:
3203 srv_main_thread_op_info = "suspending";
3204
3205 mutex_enter(&kernel_mutex);
3206
3207 if (row_get_background_drop_list_len_low() > 0) {
3208 mutex_exit(&kernel_mutex);
3209
3210 goto loop;
3211 }
3212
3213 srv_suspend_thread(slot);
3214
3215 mutex_exit(&kernel_mutex);
3216
3217 /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
3218 waits for database activity to die down when converting < 4.1.x
3219 databases, and relies on this string being exactly as it is. InnoDB
3220 manual also mentions this string in several places. */
3221 srv_main_thread_op_info = "waiting for server activity";
3222
3223 os_event_wait(slot->event);
3224
3225 if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
3226 my_thread_end();
3227 os_thread_exit(NULL);
3228 }
3229
3230 /* When there is user activity, InnoDB will set the event and the
3231 main thread goes back to loop. */
3232
3233 goto loop;
3234 }
3235
3236 /*********************************************************************//**
3237 Asynchronous purge thread.
3238 @return a dummy parameter */
3239 UNIV_INTERN
3240 os_thread_ret_t
srv_purge_thread(void * arg)3241 srv_purge_thread(
3242 /*=============*/
3243 void* arg __attribute__((unused))) /*!< in: a dummy parameter
3244 required by os_thread_create */
3245 {
3246 srv_slot_t* slot;
3247 ulint retries = 0;
3248 ulint n_total_purged = ULINT_UNDEFINED;
3249
3250 my_thread_init();
3251 ut_a(srv_n_purge_threads == 1);
3252
3253 #ifdef UNIV_PFS_THREAD
3254 pfs_register_thread(srv_purge_thread_key);
3255 #endif /* UNIV_PFS_THREAD */
3256
3257 #ifdef UNIV_DEBUG_THREAD_CREATION
3258 fprintf(stderr, "InnoDB: Purge thread running, id %lu\n",
3259 os_thread_pf(os_thread_get_curr_id()));
3260 #endif /* UNIV_DEBUG_THREAD_CREATION */
3261
3262 mutex_enter(&kernel_mutex);
3263
3264 slot = srv_table_reserve_slot(SRV_WORKER);
3265
3266 ++srv_n_threads_active[SRV_WORKER];
3267
3268 mutex_exit(&kernel_mutex);
3269
3270 while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
3271
3272 ulint n_pages_purged = 0;
3273
3274 /* If there are very few records to purge or the last
3275 purge didn't purge any records then wait for activity.
3276 We peek at the history len without holding any mutex
3277 because in the worst case we will end up waiting for
3278 the next purge event. */
3279 if (trx_sys->rseg_history_len < srv_purge_batch_size
3280 || (n_total_purged == 0
3281 && retries >= TRX_SYS_N_RSEGS)) {
3282
3283 mutex_enter(&kernel_mutex);
3284
3285 srv_suspend_thread(slot);
3286
3287 mutex_exit(&kernel_mutex);
3288
3289 os_event_wait(slot->event);
3290
3291 retries = 0;
3292 }
3293
3294 /* Check for shutdown and whether we should do purge at all. */
3295 if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND
3296 || srv_shutdown_state != 0
3297 || srv_fast_shutdown) {
3298
3299 break;
3300 }
3301
3302 if (n_total_purged == 0 && retries <= TRX_SYS_N_RSEGS) {
3303 ++retries;
3304 } else if (n_total_purged > 0) {
3305 retries = 0;
3306 n_total_purged = 0;
3307 }
3308
3309 /* Purge until there are no more records to purge and there is
3310 no change in configuration or server state. */
3311 do {
3312 n_pages_purged = trx_purge(srv_purge_batch_size);
3313
3314 n_total_purged += n_pages_purged;
3315
3316 } while (n_pages_purged > 0 && !srv_fast_shutdown);
3317
3318 srv_sync_log_buffer_in_background();
3319 }
3320
3321 mutex_enter(&kernel_mutex);
3322
3323 /* Decrement the active count. */
3324 srv_suspend_thread(slot);
3325
3326 slot->in_use = FALSE;
3327
3328 mutex_exit(&kernel_mutex);
3329
3330 #ifdef UNIV_DEBUG_THREAD_CREATION
3331 fprintf(stderr, "InnoDB: Purge thread exiting, id %lu\n",
3332 os_thread_pf(os_thread_get_curr_id()));
3333 #endif /* UNIV_DEBUG_THREAD_CREATION */
3334
3335 my_thread_end();
3336
3337 /* We count the number of threads in os_thread_exit(). A created
3338 thread should always use that to exit and not use return() to exit. */
3339 os_thread_exit(NULL);
3340
3341 OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
3342 }
3343
3344 /**********************************************************************//**
3345 Enqueues a task to server task queue and releases a worker thread, if there
3346 is a suspended one. */
3347 UNIV_INTERN
3348 void
srv_que_task_enqueue_low(que_thr_t * thr)3349 srv_que_task_enqueue_low(
3350 /*=====================*/
3351 que_thr_t* thr) /*!< in: query thread */
3352 {
3353 ut_ad(thr);
3354
3355 mutex_enter(&kernel_mutex);
3356
3357 UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
3358
3359 srv_release_threads(SRV_WORKER, 1);
3360
3361 mutex_exit(&kernel_mutex);
3362 }
3363