1 /*****************************************************************************
2 
3 Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
4 
5 Portions of this file contain modifications contributed and copyrighted by
6 Google, Inc. Those modifications are gratefully acknowledged and are described
7 briefly in the InnoDB documentation. The contributions by Google are
8 incorporated with their permission, and subject to the conditions contained in
9 the file COPYING.Google.
10 
11 Portions of this file contain modifications contributed and copyrighted
12 by Percona Inc.. Those modifications are
13 gratefully acknowledged and are described briefly in the InnoDB
14 documentation. The contributions by Percona Inc. are incorporated with
15 their permission, and subject to the conditions contained in the file
16 COPYING.Percona.
17 
18 This program is free software; you can redistribute it and/or modify
19 it under the terms of the GNU General Public License, version 2.0,
20 as published by the Free Software Foundation.
21 
22 This program is also distributed with certain software (including
23 but not limited to OpenSSL) that is licensed under separate terms,
24 as designated in a particular file or component or in included license
25 documentation.  The authors of MySQL hereby grant you an additional
26 permission to link the program and your derivative works with the
27 separately licensed software that they have included with MySQL.
28 
29 This program is distributed in the hope that it will be useful,
30 but WITHOUT ANY WARRANTY; without even the implied warranty of
31 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
32 GNU General Public License, version 2.0, for more details.
33 
34 You should have received a copy of the GNU General Public License along with
35 this program; if not, write to the Free Software Foundation, Inc.,
36 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
37 
38 *****************************************************************************/
39 
40 /**************************************************//**
41 @file srv/srv0conc.cc
42 
43 InnoDB concurrency manager
44 
45 Created 2011/04/18 Sunny Bains
46 *******************************************************/
47 
48 #include "srv0srv.h"
49 #include "sync0sync.h"
50 #include "btr0types.h"
51 #include "trx0trx.h"
52 
53 #include "mysql/plugin.h"
54 
55 /** Number of times a thread is allowed to enter InnoDB within the same
56 SQL query after it has once got the ticket. */
57 UNIV_INTERN ulong	srv_n_free_tickets_to_enter = 500;
58 
59 #ifdef HAVE_ATOMIC_BUILTINS
60 /** Maximum sleep delay (in micro-seconds), value of 0 disables it. */
61 UNIV_INTERN ulong	srv_adaptive_max_sleep_delay = 150000;
62 #endif /* HAVE_ATOMIC_BUILTINS */
63 
64 UNIV_INTERN ulong	srv_thread_sleep_delay	= 10000;
65 
66 
67 /** We are prepared for a situation that we have this many threads waiting for
68 a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
69 value. */
70 
71 UNIV_INTERN ulint	srv_max_n_threads	= 0;
72 
73 /** The following controls how many threads we let inside InnoDB concurrently:
74 threads waiting for locks are not counted into the number because otherwise
75 we could get a deadlock. Value of 0 will disable the concurrency check. */
76 
77 UNIV_INTERN ulong	srv_thread_concurrency	= 0;
78 
79 #ifndef HAVE_ATOMIC_BUILTINS
80 
81 /** This mutex protects srv_conc data structures */
82 static os_fast_mutex_t	srv_conc_mutex;
83 
84 /** Concurrency list node */
85 typedef UT_LIST_NODE_T(struct srv_conc_slot_t)	srv_conc_node_t;
86 
87 /** Slot for a thread waiting in the concurrency control queue. */
88 struct srv_conc_slot_t{
89 	os_event_t	event;		/*!< event to wait */
90 	ibool		reserved;	/*!< TRUE if slot
91 					reserved */
92 	ibool		wait_ended;	/*!< TRUE when another thread has
93 					already set the event and the thread
94 					in this slot is free to proceed; but
95 					reserved may still be TRUE at that
96 					point */
97 	srv_conc_node_t	srv_conc_queue;	/*!< queue node */
98 };
99 
100 /** Queue of threads waiting to get in */
101 typedef UT_LIST_BASE_NODE_T(srv_conc_slot_t)	srv_conc_queue_t;
102 
103 static srv_conc_queue_t	srv_conc_queue;
104 
105 /** Array of wait slots */
106 static srv_conc_slot_t*	srv_conc_slots;
107 
108 #if defined(UNIV_PFS_MUTEX)
109 /* Key to register srv_conc_mutex_key with performance schema */
110 UNIV_INTERN mysql_pfs_key_t	srv_conc_mutex_key;
111 #endif /* UNIV_PFS_MUTEX */
112 
113 #endif /* !HAVE_ATOMIC_BUILTINS */
114 
115 /** Variables tracking the active and waiting threads. */
116 struct srv_conc_t {
117 	char		pad[64  - (sizeof(ulint) + sizeof(lint))];
118 
119 	/** Number of transactions that have declared_to_be_inside_innodb set.
120 	It used to be a non-error for this value to drop below zero temporarily.
121 	This is no longer true. We'll, however, keep the lint datatype to add
122 	assertions to catch any corner cases that we may have missed. */
123 
124 	volatile lint	n_active;
125 
126 	/** Number of OS threads waiting in the FIFO for permission to
127 	enter InnoDB */
128 	volatile lint	n_waiting;
129 };
130 
131 /* Control variables for tracking concurrency. */
132 static srv_conc_t	srv_conc;
133 
134 /*********************************************************************//**
135 Initialise the concurrency management data structures */
136 void
srv_conc_init(void)137 srv_conc_init(void)
138 /*===============*/
139 {
140 #ifndef HAVE_ATOMIC_BUILTINS
141 	ulint		i;
142 
143 	/* Init the server concurrency restriction data structures */
144 
145 	os_fast_mutex_init(srv_conc_mutex_key, &srv_conc_mutex);
146 
147 	UT_LIST_INIT(srv_conc_queue);
148 
149 	srv_conc_slots = static_cast<srv_conc_slot_t*>(
150 		mem_zalloc(OS_THREAD_MAX_N * sizeof(*srv_conc_slots)));
151 
152 	for (i = 0; i < OS_THREAD_MAX_N; i++) {
153 		srv_conc_slot_t*	conc_slot = &srv_conc_slots[i];
154 
155 		conc_slot->event = os_event_create();
156 		ut_a(conc_slot->event);
157 	}
158 #endif /* !HAVE_ATOMIC_BUILTINS */
159 }
160 
161 /*********************************************************************//**
162 Free the concurrency management data structures */
163 void
srv_conc_free(void)164 srv_conc_free(void)
165 /*===============*/
166 {
167 #ifndef HAVE_ATOMIC_BUILTINS
168 	os_fast_mutex_free(&srv_conc_mutex);
169 
170 	for (ulint i = 0; i < OS_THREAD_MAX_N; i++)
171 		os_event_free(srv_conc_slots[i].event);
172 
173 	mem_free(srv_conc_slots);
174 	srv_conc_slots = NULL;
175 #endif /* !HAVE_ATOMIC_BUILTINS */
176 }
177 
178 #ifdef HAVE_ATOMIC_BUILTINS
179 /*********************************************************************//**
180 Note that a user thread is entering InnoDB. */
181 static
182 void
srv_enter_innodb_with_tickets(trx_t * trx)183 srv_enter_innodb_with_tickets(
184 /*==========================*/
185 	trx_t*	trx)			/*!< in/out: transaction that wants
186 					to enter InnoDB */
187 {
188 	trx->declared_to_be_inside_innodb = TRUE;
189 	trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
190 }
191 
192 /*********************************************************************//**
193 Handle the scheduling of a user thread that wants to enter InnoDB.  Setting
194 srv_adaptive_max_sleep_delay > 0 switches the adaptive sleep calibration to
195 ON. When set, we want to wait in the queue for as little time as possible.
196 However, very short waits will result in a lot of context switches and that
197 is also not desirable. When threads need to sleep multiple times we increment
198 os_thread_sleep_delay by one. When we see threads getting a slot without
199 waiting and there are no other threads waiting in the queue, we try and reduce
200 the wait as much as we can. Currently we reduce it by half each time. If the
201 thread only had to wait for one turn before it was able to enter InnoDB we
202 decrement it by one. This is to try and keep the sleep time stable around the
203 "optimum" sleep time. */
204 static
205 void
srv_conc_enter_innodb_with_atomics(trx_t * trx)206 srv_conc_enter_innodb_with_atomics(
207 /*===============================*/
208 	trx_t*	trx)			/*!< in/out: transaction that wants
209 					to enter InnoDB */
210 {
211 	ulint	n_sleeps = 0;
212 	ibool	notified_mysql = FALSE;
213 
214 	ut_a(!trx->declared_to_be_inside_innodb);
215 
216 	for (;;) {
217 		ulint	sleep_in_us;
218 
219 		if (srv_conc.n_active < (lint) srv_thread_concurrency) {
220 			ulint	n_active;
221 
222 			/* Check if there are any free tickets. */
223 			n_active = os_atomic_increment_lint(
224 				&srv_conc.n_active, 1);
225 
226 			if (n_active <= srv_thread_concurrency) {
227 
228 				srv_enter_innodb_with_tickets(trx);
229 
230 				if (notified_mysql) {
231 
232 					(void) os_atomic_decrement_lint(
233 						&srv_conc.n_waiting, 1);
234 
235 					thd_wait_end(trx->mysql_thd);
236 				}
237 
238 				if (srv_adaptive_max_sleep_delay > 0) {
239 					if (srv_thread_sleep_delay > 20
240 					    && n_sleeps == 1) {
241 
242 						--srv_thread_sleep_delay;
243 					}
244 
245 					if (srv_conc.n_waiting == 0) {
246 						srv_thread_sleep_delay >>= 1;
247 					}
248 				}
249 
250 				return;
251 			}
252 
253 			/* Since there were no free seats, we relinquish
254 			the overbooked ticket. */
255 
256 			(void) os_atomic_decrement_lint(
257 				&srv_conc.n_active, 1);
258 		}
259 
260 		if (!notified_mysql) {
261 			(void) os_atomic_increment_lint(
262 				&srv_conc.n_waiting, 1);
263 
264 			/* Release possible search system latch this
265 			thread has */
266 
267 			if (trx->has_search_latch) {
268 				trx_search_latch_release_if_reserved(trx);
269 			}
270 
271 			thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
272 
273 			notified_mysql = TRUE;
274 		}
275 
276 		DEBUG_SYNC_C("user_thread_waiting");
277 		trx->op_info = "sleeping before entering InnoDB";
278 
279 		sleep_in_us = srv_thread_sleep_delay;
280 
281 		/* Guard against overflow when adaptive sleep delay is on. */
282 
283 		if (srv_adaptive_max_sleep_delay > 0
284 		    && sleep_in_us > srv_adaptive_max_sleep_delay) {
285 
286 			sleep_in_us = srv_adaptive_max_sleep_delay;
287 			srv_thread_sleep_delay = static_cast<ulong>(sleep_in_us);
288 		}
289 
290 		os_thread_sleep(sleep_in_us);
291 		trx->innodb_que_wait_timer += sleep_in_us;
292 
293 		trx->op_info = "";
294 
295 		++n_sleeps;
296 
297 		if (srv_adaptive_max_sleep_delay > 0 && n_sleeps > 1) {
298 			++srv_thread_sleep_delay;
299 		}
300 	}
301 }
302 
303 /*********************************************************************//**
304 Note that a user thread is leaving InnoDB code. */
305 static
306 void
srv_conc_exit_innodb_with_atomics(trx_t * trx)307 srv_conc_exit_innodb_with_atomics(
308 /*==============================*/
309 	trx_t*	trx)		/*!< in/out: transaction */
310 {
311 	trx->n_tickets_to_enter_innodb = 0;
312 	trx->declared_to_be_inside_innodb = FALSE;
313 
314 	(void) os_atomic_decrement_lint(&srv_conc.n_active, 1);
315 }
316 #else
317 /*********************************************************************//**
318 Note that a user thread is leaving InnoDB code. */
319 static
320 void
srv_conc_exit_innodb_without_atomics(trx_t * trx)321 srv_conc_exit_innodb_without_atomics(
322 /*=================================*/
323 	trx_t*	trx)		/*!< in/out: transaction */
324 {
325 	srv_conc_slot_t*	slot;
326 
327 	os_fast_mutex_lock(&srv_conc_mutex);
328 
329 	ut_ad(srv_conc.n_active > 0);
330 	srv_conc.n_active--;
331 	trx->declared_to_be_inside_innodb = FALSE;
332 	trx->n_tickets_to_enter_innodb = 0;
333 
334 	slot = NULL;
335 
336 	if (srv_conc.n_active < (lint) srv_thread_concurrency) {
337 		/* Look for a slot where a thread is waiting and no other
338 		thread has yet released the thread */
339 
340 		for (slot = UT_LIST_GET_FIRST(srv_conc_queue);
341 		     slot != NULL && slot->wait_ended == TRUE;
342 		     slot = UT_LIST_GET_NEXT(srv_conc_queue, slot)) {
343 
344 			/* No op */
345 		}
346 
347 		if (slot != NULL) {
348 			slot->wait_ended = TRUE;
349 
350 			/* We increment the count on behalf of the released
351 			thread */
352 
353 			srv_conc.n_active++;
354 		}
355 	}
356 
357 	os_fast_mutex_unlock(&srv_conc_mutex);
358 
359 	if (slot != NULL) {
360 		os_event_set(slot->event);
361 	}
362 }
363 
364 /*********************************************************************//**
365 Handle the scheduling of a user thread that wants to enter InnoDB. */
366 static
367 void
srv_conc_enter_innodb_without_atomics(trx_t * trx)368 srv_conc_enter_innodb_without_atomics(
369 /*==================================*/
370 	trx_t*	trx)			/*!< in/out: transaction that wants
371 					to enter InnoDB */
372 {
373 	ulint			i;
374 	srv_conc_slot_t*	slot = NULL;
375 	ibool			has_slept = FALSE;
376 	ib_uint64_t		start_time = 0L;
377 	ib_uint64_t		finish_time = 0L;
378 	ulint			sec;
379 	ulint			ms;
380 
381 	os_fast_mutex_lock(&srv_conc_mutex);
382 retry:
383 	if (UNIV_UNLIKELY(trx->declared_to_be_inside_innodb)) {
384 		os_fast_mutex_unlock(&srv_conc_mutex);
385 		ut_print_timestamp(stderr);
386 		fputs("  InnoDB: Error: trying to declare trx"
387 		      " to enter InnoDB, but\n"
388 		      "InnoDB: it already is declared.\n", stderr);
389 		trx_print(stderr, trx, 0);
390 		putc('\n', stderr);
391 		return;
392 	}
393 
394 	ut_ad(srv_conc.n_active >= 0);
395 
396 	if (srv_conc.n_active < (lint) srv_thread_concurrency) {
397 
398 		srv_conc.n_active++;
399 		trx->declared_to_be_inside_innodb = TRUE;
400 		trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
401 
402 		os_fast_mutex_unlock(&srv_conc_mutex);
403 
404 		return;
405 	}
406 
407 	/* If the transaction is not holding resources, let it sleep
408 	for srv_thread_sleep_delay microseconds, and try again then */
409 
410 	if (!has_slept && !trx->has_search_latch
411 	    && NULL == UT_LIST_GET_FIRST(trx->lock.trx_locks)) {
412 
413 		has_slept = TRUE; /* We let it sleep only once to avoid
414 				starvation */
415 
416 		srv_conc.n_waiting++;
417 
418 		os_fast_mutex_unlock(&srv_conc_mutex);
419 
420 		trx->op_info = "sleeping before joining InnoDB queue";
421 
422 		/* Peter Zaitsev suggested that we take the sleep away
423 		altogether. But the sleep may be good in pathological
424 		situations of lots of thread switches. Simply put some
425 		threads aside for a while to reduce the number of thread
426 		switches. */
427 		if (srv_thread_sleep_delay > 0) {
428 			os_thread_sleep(srv_thread_sleep_delay);
429 			trx->innodb_que_wait_timer += sleep_in_us;
430 		}
431 
432 		trx->op_info = "";
433 
434 		os_fast_mutex_lock(&srv_conc_mutex);
435 
436 		srv_conc.n_waiting--;
437 
438 		goto retry;
439 	}
440 
441 	/* Too many threads inside: put the current thread to a queue */
442 
443 	for (i = 0; i < OS_THREAD_MAX_N; i++) {
444 		slot = srv_conc_slots + i;
445 
446 		if (!slot->reserved) {
447 
448 			break;
449 		}
450 	}
451 
452 	if (i == OS_THREAD_MAX_N) {
453 		/* Could not find a free wait slot, we must let the
454 		thread enter */
455 
456 		srv_conc.n_active++;
457 		trx->declared_to_be_inside_innodb = TRUE;
458 		trx->n_tickets_to_enter_innodb = 0;
459 
460 		os_fast_mutex_unlock(&srv_conc_mutex);
461 
462 		return;
463 	}
464 
465 	/* Release possible search system latch this thread has */
466 	if (trx->has_search_latch) {
467 		trx_search_latch_release_if_reserved(trx);
468 	}
469 
470 	/* Add to the queue */
471 	slot->reserved = TRUE;
472 	slot->wait_ended = FALSE;
473 
474 	UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
475 
476 	os_event_reset(slot->event);
477 
478 	srv_conc.n_waiting++;
479 
480 	os_fast_mutex_unlock(&srv_conc_mutex);
481 
482 	/* Go to wait for the event; when a thread leaves InnoDB it will
483 	release this thread */
484 
485 	ut_ad(!trx->has_search_latch);
486 #ifdef UNIV_SYNC_DEBUG
487 	ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
488 #endif /* UNIV_SYNC_DEBUG */
489 
490 	if (UNIV_UNLIKELY(trx->take_stats)) {
491 		ut_usectime(&sec, &ms);
492 		start_time = (ib_uint64_t)sec * 1000000 + ms;
493 	} else {
494 		start_time = 0;
495 	}
496 
497 	trx->op_info = "waiting in InnoDB queue";
498 
499 	thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
500 
501 	os_event_wait(slot->event);
502 	thd_wait_end(trx->mysql_thd);
503 
504 	trx->op_info = "";
505 
506 	if (UNIV_UNLIKELY(start_time != 0)) {
507 		ut_usectime(&sec, &ms);
508 		finish_time = (ib_uint64_t)sec * 1000000 + ms;
509 		trx->innodb_que_wait_timer += finish_time - start_time;
510 	}
511 
512 	os_fast_mutex_lock(&srv_conc_mutex);
513 
514 	srv_conc.n_waiting--;
515 
516 	/* NOTE that the thread which released this thread already
517 	incremented the thread counter on behalf of this thread */
518 
519 	slot->reserved = FALSE;
520 
521 	UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
522 
523 	trx->declared_to_be_inside_innodb = TRUE;
524 	trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
525 
526 	os_fast_mutex_unlock(&srv_conc_mutex);
527 }
528 #endif /* HAVE_ATOMIC_BUILTINS */
529 
530 /*********************************************************************//**
531 Puts an OS thread to wait if there are too many concurrent threads
532 (>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
533 UNIV_INTERN
534 void
srv_conc_enter_innodb(trx_t * trx)535 srv_conc_enter_innodb(
536 /*==================*/
537 	trx_t*	trx)	/*!< in: transaction object associated with the
538 			thread */
539 {
540 #ifdef UNIV_SYNC_DEBUG
541 	ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
542 #endif /* UNIV_SYNC_DEBUG */
543 
544 #ifdef HAVE_ATOMIC_BUILTINS
545 	srv_conc_enter_innodb_with_atomics(trx);
546 #else
547 	srv_conc_enter_innodb_without_atomics(trx);
548 #endif /* HAVE_ATOMIC_BUILTINS */
549 }
550 
551 /*********************************************************************//**
552 This lets a thread enter InnoDB regardless of the number of threads inside
553 InnoDB. This must be called when a thread ends a lock wait. */
554 UNIV_INTERN
555 void
srv_conc_force_enter_innodb(trx_t * trx)556 srv_conc_force_enter_innodb(
557 /*========================*/
558 	trx_t*	trx)	/*!< in: transaction object associated with the
559 			thread */
560 {
561 #ifdef UNIV_SYNC_DEBUG
562 	ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
563 #endif /* UNIV_SYNC_DEBUG */
564 
565 	if (!srv_thread_concurrency) {
566 
567 		return;
568 	}
569 
570 	ut_ad(srv_conc.n_active >= 0);
571 
572 #ifdef HAVE_ATOMIC_BUILTINS
573 	(void) os_atomic_increment_lint(&srv_conc.n_active, 1);
574 #else
575 	os_fast_mutex_lock(&srv_conc_mutex);
576 	++srv_conc.n_active;
577 	os_fast_mutex_unlock(&srv_conc_mutex);
578 #endif /* HAVE_ATOMIC_BUILTINS */
579 
580 	trx->n_tickets_to_enter_innodb = 1;
581 	trx->declared_to_be_inside_innodb = TRUE;
582 }
583 
584 /*********************************************************************//**
585 This must be called when a thread exits InnoDB in a lock wait or at the
586 end of an SQL statement. */
587 UNIV_INTERN
588 void
srv_conc_force_exit_innodb(trx_t * trx)589 srv_conc_force_exit_innodb(
590 /*=======================*/
591 	trx_t*	trx)	/*!< in: transaction object associated with the
592 			thread */
593 {
594 	if ((trx->mysql_thd != NULL
595 	     && thd_is_replication_slave_thread(trx->mysql_thd))
596 	    || trx->declared_to_be_inside_innodb == FALSE) {
597 
598 		return;
599 	}
600 
601 #ifdef HAVE_ATOMIC_BUILTINS
602 	srv_conc_exit_innodb_with_atomics(trx);
603 #else
604 	srv_conc_exit_innodb_without_atomics(trx);
605 #endif /* HAVE_ATOMIC_BUILTINS */
606 
607 #ifdef UNIV_SYNC_DEBUG
608 	ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
609 #endif /* UNIV_SYNC_DEBUG */
610 }
611 
612 /*********************************************************************//**
613 Get the count of threads waiting inside InnoDB. */
614 UNIV_INTERN
615 ulint
srv_conc_get_waiting_threads(void)616 srv_conc_get_waiting_threads(void)
617 /*==============================*/
618 {
619 	return(srv_conc.n_waiting);
620 }
621 
622 /*********************************************************************//**
623 Get the count of threads active inside InnoDB. */
624 UNIV_INTERN
625 ulint
srv_conc_get_active_threads(void)626 srv_conc_get_active_threads(void)
627 /*==============================*/
628 {
629 	return(srv_conc.n_active);
630  }
631 
632