1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2008, Google Inc.
5 Copyright (c) 2013, 2020, MariaDB Corporation.
6 
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12 
13 This program is free software; you can redistribute it and/or modify it under
14 the terms of the GNU General Public License as published by the Free Software
15 Foundation; version 2 of the License.
16 
17 This program is distributed in the hope that it will be useful, but WITHOUT
18 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file sync/sync0arr.cc
29 The wait array used in synchronization primitives
30 
31 Created 9/5/1995 Heikki Tuuri
32 *******************************************************/
33 
34 #include "sync0arr.h"
35 #include <mysqld_error.h>
36 #include <mysql/plugin.h>
37 #include <hash.h>
38 #include <myisampack.h>
39 #include <sql_acl.h>
40 #include <mysys_err.h>
41 #include <my_sys.h>
42 #include "srv0srv.h"
43 #include "srv0start.h"
44 #include "i_s.h"
45 #include <sql_plugin.h>
46 #include <innodb_priv.h>
47 
48 #include "lock0lock.h"
49 #include "sync0rw.h"
50 
51 /*
52 			WAIT ARRAY
53 			==========
54 
55 The wait array consists of cells each of which has an an event object created
56 for it. The threads waiting for a mutex, for example, can reserve a cell
57 in the array and suspend themselves to wait for the event to become signaled.
58 When using the wait array, remember to make sure that some thread holding
59 the synchronization object will eventually know that there is a waiter in
60 the array and signal the object, to prevent infinite wait.  Why we chose
61 to implement a wait array? First, to make mutexes fast, we had to code
62 our own implementation of them, which only in usually uncommon cases
63 resorts to using slow operating system primitives. Then we had the choice of
64 assigning a unique OS event for each mutex, which would be simpler, or
65 using a global wait array. In some operating systems, the global wait
66 array solution is more efficient and flexible, because we can do with
67 a very small number of OS events, say 200. In NT 3.51, allocating events
68 seems to be a quadratic algorithm, because 10 000 events are created fast,
69 but 100 000 events takes a couple of minutes to create.
70 
71 As of 5.0.30 the above mentioned design is changed. Since now OS can handle
72 millions of wait events efficiently, we no longer have this concept of each
73 cell of wait array having one event.  Instead, now the event that a thread
74 wants to wait on is embedded in the wait object (mutex or rw_lock). We still
75 keep the global wait array for the sake of diagnostics and also to avoid
76 infinite wait The error_monitor thread scans the global wait array to signal
77 any waiting threads who have missed the signal. */
78 
79 typedef TTASEventMutex<GenericPolicy> WaitMutex;
80 
81 /** The latch types that use the sync array. */
82 union sync_object_t {
83 
84 	/** RW lock instance */
85 	rw_lock_t*	lock;
86 
87 	/** Mutex instance */
88 	WaitMutex*	mutex;
89 };
90 
91 /** A cell where an individual thread may wait suspended until a resource
92 is released. The suspending is implemented using an operating system
93 event semaphore. */
94 
95 struct sync_cell_t {
96 	sync_object_t	latch;		/*!< pointer to the object the
97 					thread is waiting for; if NULL
98 					the cell is free for use */
99 	ulint		request_type;	/*!< lock type requested on the
100 					object */
101 	const char*	file;		/*!< in debug version file where
102 					requested */
103 	ulint		line;		/*!< in debug version line where
104 					requested, or ULINT_UNDEFINED */
105 	os_thread_id_t	thread_id;	/*!< thread id of this waiting
106 					thread */
107 	bool		waiting;	/*!< TRUE if the thread has already
108 					called sync_array_event_wait
109 					on this cell */
110 	int64_t		signal_count;	/*!< We capture the signal_count
111 					of the latch when we
112 					reset the event. This value is
113 					then passed on to os_event_wait
114 					and we wait only if the event
115 					has not been signalled in the
116 					period between the reset and
117 					wait call. */
118 	/** time(NULL) when the wait cell was reserved.
119 	FIXME: sync_array_print_long_waits_low() may display bogus
120 	warnings when the system time is adjusted to the past! */
121 	time_t		reservation_time;
122 };
123 
124 /* NOTE: It is allowed for a thread to wait for an event allocated for
125 the array without owning the protecting mutex (depending on the case:
126 OS or database mutex), but all changes (set or reset) to the state of
127 the event must be made while owning the mutex. */
128 
129 /** Synchronization array */
130 struct sync_array_t {
131 
132 	/** Constructor
133 	Creates a synchronization wait array. It is protected by a mutex
134 	which is automatically reserved when the functions operating on it
135 	are called.
136 	@param[in]	num_cells	Number of cells to create */
137 	sync_array_t(ulint num_cells)
138 		UNIV_NOTHROW;
139 
140 	/** Destructor */
141 	~sync_array_t()
142 		UNIV_NOTHROW;
143 
144 	ulint		n_reserved;	/*!< number of currently reserved
145 					cells in the wait array */
146 	ulint		n_cells;	/*!< number of cells in the
147 					wait array */
148 	sync_cell_t*	array;		/*!< pointer to wait array */
149 	SysMutex	mutex;		/*!< System mutex protecting the
150 					data structure.  As this data
151 					structure is used in constructing
152 					the database mutex, to prevent
153 					infinite recursion in implementation,
154 					we fall back to an OS mutex. */
155 	ulint		res_count;	/*!< count of cell reservations
156 					since creation of the array */
157 	ulint           next_free_slot; /*!< the next free cell in the array */
158 	ulint           first_free_slot;/*!< the last slot that was freed */
159 };
160 
161 /** User configured sync array size */
162 ulong	srv_sync_array_size = 1;
163 
164 /** Locally stored copy of srv_sync_array_size */
165 ulint	sync_array_size;
166 
167 /** The global array of wait cells for implementation of the database's own
168 mutexes and read-write locks */
169 sync_array_t**	sync_wait_array;
170 
171 /** count of how many times an object has been signalled */
172 ulint sg_count;
173 
174 #define sync_array_exit(a)	mutex_exit(&(a)->mutex)
175 #define sync_array_enter(a)	mutex_enter(&(a)->mutex)
176 
177 #ifdef UNIV_DEBUG
178 /******************************************************************//**
179 This function is called only in the debug version. Detects a deadlock
180 of one or more threads because of waits of semaphores.
181 @return TRUE if deadlock detected */
182 static
183 bool
184 sync_array_detect_deadlock(
185 /*=======================*/
186 	sync_array_t*	arr,	/*!< in: wait array; NOTE! the caller must
187 				own the mutex to array */
188 	sync_cell_t*	start,	/*!< in: cell where recursive search started */
189 	sync_cell_t*	cell,	/*!< in: cell to search */
190 	ulint		depth);	/*!< in: recursion depth */
191 #endif /* UNIV_DEBUG */
192 
193 /** Constructor
194 Creates a synchronization wait array. It is protected by a mutex
195 which is automatically reserved when the functions operating on it
196 are called.
197 @param[in]	num_cells		Number of cells to create */
sync_array_t(ulint num_cells)198 sync_array_t::sync_array_t(ulint num_cells)
199 	UNIV_NOTHROW
200 	:
201 	n_reserved(),
202 	n_cells(num_cells),
203 	array(UT_NEW_ARRAY_NOKEY(sync_cell_t, num_cells)),
204 	mutex(),
205 	res_count(),
206 	next_free_slot(),
207 	first_free_slot(ULINT_UNDEFINED)
208 {
209 	ut_a(num_cells > 0);
210 
211 	memset(array, 0x0, sizeof(sync_cell_t) * n_cells);
212 
213 	/* Then create the mutex to protect the wait array */
214 	mutex_create(LATCH_ID_SYNC_ARRAY_MUTEX, &mutex);
215 }
216 
217 /** Validate the integrity of the wait array. Check
218 that the number of reserved cells equals the count variable.
219 @param[in,out]	arr	sync wait array */
220 static
221 void
sync_array_validate(sync_array_t * arr)222 sync_array_validate(sync_array_t* arr)
223 {
224 	ulint		i;
225 	ulint		count		= 0;
226 
227 	sync_array_enter(arr);
228 
229 	for (i = 0; i < arr->n_cells; i++) {
230 		sync_cell_t*	cell;
231 
232 		cell = sync_array_get_nth_cell(arr, i);
233 
234 		if (cell->latch.mutex != NULL) {
235 			count++;
236 		}
237 	}
238 
239 	ut_a(count == arr->n_reserved);
240 
241 	sync_array_exit(arr);
242 }
243 
244 /** Destructor */
~sync_array_t()245 sync_array_t::~sync_array_t()
246 	UNIV_NOTHROW
247 {
248 	ut_a(n_reserved == 0);
249 
250 	sync_array_validate(this);
251 
252 	/* Release the mutex protecting the wait array */
253 
254 	mutex_free(&mutex);
255 
256 	UT_DELETE_ARRAY(array);
257 }
258 
259 /*****************************************************************//**
260 Gets the nth cell in array.
261 @return cell */
262 UNIV_INTERN
263 sync_cell_t*
sync_array_get_nth_cell(sync_array_t * arr,ulint n)264 sync_array_get_nth_cell(
265 /*====================*/
266 	sync_array_t*	arr,	/*!< in: sync array */
267 	ulint		n)	/*!< in: index */
268 {
269 	ut_a(n < arr->n_cells);
270 
271 	return(arr->array + n);
272 }
273 
274 /******************************************************************//**
275 Frees the resources in a wait array. */
276 static
277 void
sync_array_free(sync_array_t * arr)278 sync_array_free(
279 /*============*/
280 	sync_array_t*	arr)	/*!< in, own: sync wait array */
281 {
282 	UT_DELETE(arr);
283 }
284 
285 /*******************************************************************//**
286 Returns the event that the thread owning the cell waits for. */
287 static
288 os_event_t
sync_cell_get_event(sync_cell_t * cell)289 sync_cell_get_event(
290 /*================*/
291 	sync_cell_t*	cell) /*!< in: non-empty sync array cell */
292 {
293 	switch(cell->request_type) {
294 	case SYNC_MUTEX:
295 		return(cell->latch.mutex->event());
296 	case RW_LOCK_X_WAIT:
297 		return(cell->latch.lock->wait_ex_event);
298 	default:
299 		return(cell->latch.lock->event);
300 	}
301 }
302 
303 /******************************************************************//**
304 Reserves a wait array cell for waiting for an object.
305 The event of the cell is reset to nonsignalled state.
306 @return sync cell to wait on */
307 sync_cell_t*
sync_array_reserve_cell(sync_array_t * arr,void * object,ulint type,const char * file,unsigned line)308 sync_array_reserve_cell(
309 /*====================*/
310 	sync_array_t*	arr,	/*!< in: wait array */
311 	void*		object, /*!< in: pointer to the object to wait for */
312 	ulint		type,	/*!< in: lock request type */
313 	const char*	file,	/*!< in: file where requested */
314 	unsigned	line)	/*!< in: line where requested */
315 {
316 	sync_cell_t*	cell;
317 
318 	sync_array_enter(arr);
319 
320 	if (arr->first_free_slot != ULINT_UNDEFINED) {
321 		/* Try and find a slot in the free list */
322 		ut_ad(arr->first_free_slot < arr->next_free_slot);
323 		cell = sync_array_get_nth_cell(arr, arr->first_free_slot);
324 		arr->first_free_slot = cell->line;
325 	} else if (arr->next_free_slot < arr->n_cells) {
326 		/* Try and find a slot after the currently allocated slots */
327 		cell = sync_array_get_nth_cell(arr, arr->next_free_slot);
328 		++arr->next_free_slot;
329 	} else {
330 		sync_array_exit(arr);
331 
332 		// We should return NULL and if there is more than
333 		// one sync array, try another sync array instance.
334 		return(NULL);
335 	}
336 
337 	++arr->res_count;
338 
339 	ut_ad(arr->n_reserved < arr->n_cells);
340 	ut_ad(arr->next_free_slot <= arr->n_cells);
341 
342 	++arr->n_reserved;
343 
344 	/* Reserve the cell. */
345 	ut_ad(cell->latch.mutex == NULL);
346 
347 	cell->request_type = type;
348 
349 	if (cell->request_type == SYNC_MUTEX) {
350 		cell->latch.mutex = reinterpret_cast<WaitMutex*>(object);
351 	} else {
352 		cell->latch.lock = reinterpret_cast<rw_lock_t*>(object);
353 	}
354 
355 	cell->waiting = false;
356 
357 	cell->file = file;
358 	cell->line = line;
359 
360 	sync_array_exit(arr);
361 
362 	cell->thread_id = os_thread_get_curr_id();
363 
364 	cell->reservation_time = time(NULL);
365 
366 	/* Make sure the event is reset and also store the value of
367 	signal_count at which the event was reset. */
368 	os_event_t	event = sync_cell_get_event(cell);
369 	cell->signal_count = os_event_reset(event);
370 
371 	return(cell);
372 }
373 
374 /******************************************************************//**
375 Frees the cell. NOTE! sync_array_wait_event frees the cell
376 automatically! */
377 void
sync_array_free_cell(sync_array_t * arr,sync_cell_t * & cell)378 sync_array_free_cell(
379 /*=================*/
380 	sync_array_t*	arr,	/*!< in: wait array */
381 	sync_cell_t*&	cell)	/*!< in/out: the cell in the array */
382 {
383 	sync_array_enter(arr);
384 
385 	ut_a(cell->latch.mutex != NULL);
386 
387 	cell->waiting = false;
388 	cell->signal_count = 0;
389 	cell->latch.mutex = NULL;
390 
391 	/* Setup the list of free slots in the array */
392 	cell->line = arr->first_free_slot;
393 
394 	arr->first_free_slot = cell - arr->array;
395 
396 	ut_a(arr->n_reserved > 0);
397 	arr->n_reserved--;
398 
399 	if (arr->next_free_slot > arr->n_cells / 2 && arr->n_reserved == 0) {
400 #ifdef UNIV_DEBUG
401 		for (ulint i = 0; i < arr->next_free_slot; ++i) {
402 			cell = sync_array_get_nth_cell(arr, i);
403 
404 			ut_ad(!cell->waiting);
405 			ut_ad(cell->latch.mutex == 0);
406 			ut_ad(cell->signal_count == 0);
407 		}
408 #endif /* UNIV_DEBUG */
409 		arr->next_free_slot = 0;
410 		arr->first_free_slot = ULINT_UNDEFINED;
411 	}
412 	sync_array_exit(arr);
413 
414 	cell = 0;
415 }
416 
417 /******************************************************************//**
418 This function should be called when a thread starts to wait on
419 a wait array cell. In the debug version this function checks
420 if the wait for a semaphore will result in a deadlock, in which
421 case prints info and asserts. */
422 void
sync_array_wait_event(sync_array_t * arr,sync_cell_t * & cell)423 sync_array_wait_event(
424 /*==================*/
425 	sync_array_t*	arr,	/*!< in: wait array */
426 	sync_cell_t*&	cell)	/*!< in: index of the reserved cell */
427 {
428 	sync_array_enter(arr);
429 
430 	ut_ad(!cell->waiting);
431 	ut_ad(cell->latch.mutex);
432 	ut_ad(os_thread_get_curr_id() == cell->thread_id);
433 
434 	cell->waiting = true;
435 
436 #ifdef UNIV_DEBUG
437 
438 	/* We use simple enter to the mutex below, because if
439 	we cannot acquire it at once, mutex_enter would call
440 	recursively sync_array routines, leading to trouble.
441 	rw_lock_debug_mutex freezes the debug lists. */
442 
443 	rw_lock_debug_mutex_enter();
444 
445 	if (sync_array_detect_deadlock(arr, cell, cell, 0)) {
446 
447 		ib::fatal() << "########################################"
448                         " Deadlock Detected!";
449 	}
450 
451 	rw_lock_debug_mutex_exit();
452 #endif /* UNIV_DEBUG */
453 	sync_array_exit(arr);
454 
455 	tpool::tpool_wait_begin();
456 	os_event_wait_low(sync_cell_get_event(cell), cell->signal_count);
457 	tpool::tpool_wait_end();
458 
459 	sync_array_free_cell(arr, cell);
460 
461 	cell = 0;
462 }
463 
464 /******************************************************************//**
465 Reports info of a wait array cell. */
466 static
467 void
sync_array_cell_print(FILE * file,sync_cell_t * cell)468 sync_array_cell_print(
469 /*==================*/
470 	FILE*		file,		/*!< in: file where to print */
471 	sync_cell_t*	cell)		/*!< in: sync cell */
472 {
473 	rw_lock_t*	rwlock;
474 	ulint		type;
475 	ulint		writer;
476 
477 	type = cell->request_type;
478 
479 	fprintf(file,
480 		"--Thread " ULINTPF " has waited at %s line " ULINTPF
481 		" for %.2f seconds the semaphore:\n",
482 		ulint(cell->thread_id),
483 		innobase_basename(cell->file), cell->line,
484 		difftime(time(NULL), cell->reservation_time));
485 
486 	switch (type) {
487 	default:
488 		ut_error;
489 	case RW_LOCK_X:
490 	case RW_LOCK_X_WAIT:
491 	case RW_LOCK_SX:
492 	case RW_LOCK_S:
493 		fputs(type == RW_LOCK_X ? "X-lock on"
494 		      : type == RW_LOCK_X_WAIT ? "X-lock (wait_ex) on"
495 		      : type == RW_LOCK_SX ? "SX-lock on"
496 		      : "S-lock on", file);
497 
498 		rwlock = cell->latch.lock;
499 
500 		if (rwlock) {
501 			fprintf(file,
502 				" RW-latch at %p created in file %s line %u\n",
503 				(void*) rwlock, innobase_basename(rwlock->cfile_name),
504 				rwlock->cline);
505 
506 			writer = rw_lock_get_writer(rwlock);
507 
508 			if (writer != RW_LOCK_NOT_LOCKED) {
509 
510 				fprintf(file,
511 					"a writer (thread id " ULINTPF ") has"
512 					" reserved it in mode %s",
513 					ulint(rwlock->writer_thread),
514 				writer == RW_LOCK_X ? " exclusive\n"
515 				: writer == RW_LOCK_SX ? " SX\n"
516 					: " wait exclusive\n");
517 			}
518 
519 			fprintf(file,
520 				"number of readers " ULINTPF
521 				", waiters flag %d, "
522 				"lock_word: %x\n"
523 				"Last time write locked in file %s line %u"
524 #if 0 /* JAN: TODO: FIX LATER */
525 				"\nHolder thread " ULINTPF
526 				" file %s line " ULINTPF
527 #endif
528 				"\n",
529 				rw_lock_get_reader_count(rwlock),
530 				uint32_t{rwlock->waiters},
531 				int32_t{rwlock->lock_word},
532 				innobase_basename(rwlock->last_x_file_name),
533 				rwlock->last_x_line
534 #if 0 /* JAN: TODO: FIX LATER */
535 				, ulint(rwlock->thread_id),
536 				innobase_basename(rwlock->file_name),
537 				rwlock->line
538 #endif
539 				);
540 		}
541 		break;
542 	case SYNC_MUTEX:
543 		WaitMutex*	mutex = cell->latch.mutex;
544 		const WaitMutex::MutexPolicy&	policy = mutex->policy();
545 #ifdef UNIV_DEBUG
546 		const char*	name = policy.context.get_enter_filename();
547 		if (name == NULL) {
548 			/* The mutex might have been released. */
549 			name = "NULL";
550 		}
551 #endif /* UNIV_DEBUG */
552 
553 		if (mutex) {
554 		fprintf(file,
555 			"Mutex at %p, %s, lock var %x\n"
556 #ifdef UNIV_DEBUG
557 			"Last time reserved in file %s line %u"
558 #endif /* UNIV_DEBUG */
559 			"\n",
560 			(void*) mutex,
561 			policy.to_string().c_str(),
562 			mutex->state()
563 #ifdef UNIV_DEBUG
564 			,name,
565 			policy.context.get_enter_line()
566 #endif /* UNIV_DEBUG */
567 			);
568 		}
569 		break;
570 	}
571 
572 	if (!cell->waiting) {
573 		fputs("wait has ended\n", file);
574 	}
575 }
576 
577 #ifdef UNIV_DEBUG
578 /******************************************************************//**
579 Looks for a cell with the given thread id.
580 @return pointer to cell or NULL if not found */
581 static
582 sync_cell_t*
sync_array_find_thread(sync_array_t * arr,os_thread_id_t thread)583 sync_array_find_thread(
584 /*===================*/
585 	sync_array_t*	arr,	/*!< in: wait array */
586 	os_thread_id_t	thread)	/*!< in: thread id */
587 {
588 	ulint		i;
589 
590 	for (i = 0; i < arr->n_cells; i++) {
591 		sync_cell_t*	cell;
592 
593 		cell = sync_array_get_nth_cell(arr, i);
594 
595 		if (cell->latch.mutex != NULL
596 		    && os_thread_eq(cell->thread_id, thread)) {
597 
598 			return(cell);	/* Found */
599 		}
600 	}
601 
602 	return(NULL);	/* Not found */
603 }
604 
605 /******************************************************************//**
606 Recursion step for deadlock detection.
607 @return TRUE if deadlock detected */
608 static
609 ibool
sync_array_deadlock_step(sync_array_t * arr,sync_cell_t * start,os_thread_id_t thread,ulint pass,ulint depth)610 sync_array_deadlock_step(
611 /*=====================*/
612 	sync_array_t*	arr,	/*!< in: wait array; NOTE! the caller must
613 				own the mutex to array */
614 	sync_cell_t*	start,	/*!< in: cell where recursive search
615 				started */
616 	os_thread_id_t	thread,	/*!< in: thread to look at */
617 	ulint		pass,	/*!< in: pass value */
618 	ulint		depth)	/*!< in: recursion depth */
619 {
620 	sync_cell_t*	new_cell;
621 
622 	if (pass != 0) {
623 		/* If pass != 0, then we do not know which threads are
624 		responsible of releasing the lock, and no deadlock can
625 		be detected. */
626 
627 		return(FALSE);
628 	}
629 
630 	new_cell = sync_array_find_thread(arr, thread);
631 
632 	if (new_cell == start) {
633 		/* Deadlock */
634 		fputs("########################################\n"
635 		      "DEADLOCK of threads detected!\n", stderr);
636 
637 		return(TRUE);
638 
639 	} else if (new_cell) {
640 		return(sync_array_detect_deadlock(
641 			arr, start, new_cell, depth + 1));
642 	}
643 	return(FALSE);
644 }
645 
646 /**
647 Report an error to stderr.
648 @param lock		rw-lock instance
649 @param debug		rw-lock debug information
650 @param cell		thread context */
651 static
652 void
sync_array_report_error(rw_lock_t * lock,rw_lock_debug_t * debug,sync_cell_t * cell)653 sync_array_report_error(
654 	rw_lock_t*		lock,
655 	rw_lock_debug_t*	debug,
656 	sync_cell_t* 		cell)
657 {
658 	fprintf(stderr, "rw-lock %p ", (void*) lock);
659 	sync_array_cell_print(stderr, cell);
660 	rw_lock_debug_print(stderr, debug);
661 }
662 
663 /******************************************************************//**
664 This function is called only in the debug version. Detects a deadlock
665 of one or more threads because of waits of semaphores.
666 @return TRUE if deadlock detected */
667 static
668 bool
sync_array_detect_deadlock(sync_array_t * arr,sync_cell_t * start,sync_cell_t * cell,ulint depth)669 sync_array_detect_deadlock(
670 /*=======================*/
671 	sync_array_t*	arr,	/*!< in: wait array; NOTE! the caller must
672 				own the mutex to array */
673 	sync_cell_t*	start,	/*!< in: cell where recursive search started */
674 	sync_cell_t*	cell,	/*!< in: cell to search */
675 	ulint		depth)	/*!< in: recursion depth */
676 {
677 	rw_lock_t*	lock;
678 	os_thread_id_t	thread;
679 	ibool		ret;
680 	rw_lock_debug_t*debug;
681 
682 	ut_a(arr);
683 	ut_a(start);
684 	ut_a(cell);
685 	ut_ad(cell->latch.mutex != 0);
686 	ut_ad(os_thread_get_curr_id() == start->thread_id);
687 	ut_ad(depth < 100);
688 
689 	depth++;
690 
691 	if (!cell->waiting) {
692 		/* No deadlock here */
693 		return(false);
694 	}
695 
696 	switch (cell->request_type) {
697 	case SYNC_MUTEX: {
698 
699 		WaitMutex*	mutex = cell->latch.mutex;
700 		const WaitMutex::MutexPolicy&	policy = mutex->policy();
701 
702 		if (mutex->state() != MUTEX_STATE_UNLOCKED) {
703 			thread = policy.context.get_thread_id();
704 
705 			/* Note that mutex->thread_id above may be
706 			also OS_THREAD_ID_UNDEFINED, because the
707 			thread which held the mutex maybe has not
708 			yet updated the value, or it has already
709 			released the mutex: in this case no deadlock
710 			can occur, as the wait array cannot contain
711 			a thread with ID_UNDEFINED value. */
712 			ret = sync_array_deadlock_step(
713 				arr, start, thread, 0, depth);
714 
715 			if (ret) {
716 				const char*	name;
717 
718 				name = policy.context.get_enter_filename();
719 
720 				if (name == NULL) {
721 					/* The mutex might have been
722 					released. */
723 					name = "NULL";
724 				}
725 
726 				ib::info()
727 					<< "Mutex " << mutex << " owned by"
728 					" thread " << thread
729 					<< " file " << name << " line "
730 					<< policy.context.get_enter_line();
731 
732 				sync_array_cell_print(stderr, cell);
733 
734 				return(true);
735 			}
736 		}
737 
738 		/* No deadlock */
739 		return(false);
740 		}
741 
742 	case RW_LOCK_X:
743 	case RW_LOCK_X_WAIT:
744 
745 		lock = cell->latch.lock;
746 
747 		for (debug = UT_LIST_GET_FIRST(lock->debug_list);
748 		     debug != NULL;
749 		     debug = UT_LIST_GET_NEXT(list, debug)) {
750 
751 			thread = debug->thread_id;
752 
753 			switch (debug->lock_type) {
754 			case RW_LOCK_X:
755 			case RW_LOCK_SX:
756 			case RW_LOCK_X_WAIT:
757 				if (os_thread_eq(thread, cell->thread_id)) {
758 					break;
759 				}
760 				/* fall through */
761 			case RW_LOCK_S:
762 
763 				/* The (wait) x-lock request can block
764 				infinitely only if someone (can be also cell
765 				thread) is holding s-lock, or someone
766 				(cannot be cell thread) (wait) x-lock or
767 				sx-lock, and he is blocked by start thread */
768 
769 				ret = sync_array_deadlock_step(
770 					arr, start, thread, debug->pass,
771 					depth);
772 
773 				if (ret) {
774 					sync_array_report_error(
775 						lock, debug, cell);
776 					rw_lock_debug_print(stderr, debug);
777 					return(TRUE);
778 				}
779 			}
780 		}
781 
782 		return(false);
783 
784 	case RW_LOCK_SX:
785 
786 		lock = cell->latch.lock;
787 
788 		for (debug = UT_LIST_GET_FIRST(lock->debug_list);
789 		     debug != 0;
790 		     debug = UT_LIST_GET_NEXT(list, debug)) {
791 
792 			thread = debug->thread_id;
793 
794 			switch (debug->lock_type) {
795 			case RW_LOCK_X:
796 			case RW_LOCK_SX:
797 			case RW_LOCK_X_WAIT:
798 
799 				if (os_thread_eq(thread, cell->thread_id)) {
800 					break;
801 				}
802 
803 				/* The sx-lock request can block infinitely
804 				only if someone (can be also cell thread) is
805 				holding (wait) x-lock or sx-lock, and he is
806 				blocked by start thread */
807 
808 				ret = sync_array_deadlock_step(
809 					arr, start, thread, debug->pass,
810 					depth);
811 
812 				if (ret) {
813 					sync_array_report_error(
814 						lock, debug, cell);
815 					return(TRUE);
816 				}
817 			}
818 		}
819 
820 		return(false);
821 
822 	case RW_LOCK_S:
823 
824 		lock = cell->latch.lock;
825 
826 		for (debug = UT_LIST_GET_FIRST(lock->debug_list);
827 		     debug != 0;
828 		     debug = UT_LIST_GET_NEXT(list, debug)) {
829 
830 			thread = debug->thread_id;
831 
832 			if (debug->lock_type == RW_LOCK_X
833 			    || debug->lock_type == RW_LOCK_X_WAIT) {
834 
835 				/* The s-lock request can block infinitely
836 				only if someone (can also be cell thread) is
837 				holding (wait) x-lock, and he is blocked by
838 				start thread */
839 
840 				ret = sync_array_deadlock_step(
841 					arr, start, thread, debug->pass,
842 					depth);
843 
844 				if (ret) {
845 					sync_array_report_error(
846 						lock, debug, cell);
847 					return(TRUE);
848 				}
849 			}
850 		}
851 
852 		return(false);
853 
854 	default:
855 		ut_error;
856 	}
857 
858 	return(true);
859 }
860 #endif /* UNIV_DEBUG */
861 
862 /**********************************************************************//**
863 Prints warnings of long semaphore waits to stderr.
864 @return TRUE if fatal semaphore wait threshold was exceeded */
865 static
866 bool
sync_array_print_long_waits_low(sync_array_t * arr,os_thread_id_t * waiter,const void ** sema,ibool * noticed)867 sync_array_print_long_waits_low(
868 /*============================*/
869 	sync_array_t*	arr,	/*!< in: sync array instance */
870 	os_thread_id_t*	waiter,	/*!< out: longest waiting thread */
871 	const void**	sema,	/*!< out: longest-waited-for semaphore */
872 	ibool*		noticed)/*!< out: TRUE if long wait noticed */
873 {
874 	double		fatal_timeout = static_cast<double>(
875 		srv_fatal_semaphore_wait_threshold);
876 	ibool		fatal = FALSE;
877 	double		longest_diff = 0;
878 	ulint		i;
879 
880 	/* For huge tables, skip the check during CHECK TABLE etc... */
881 	if (btr_validate_index_running) {
882 		return(false);
883 	}
884 
885 #if defined HAVE_valgrind && !__has_feature(memory_sanitizer)
886 	/* Increase the timeouts if running under valgrind because it executes
887 	extremely slowly. HAVE_valgrind does not necessary mean that
888 	we are running under valgrind but we have no better way to tell.
889 	See Bug#58432 innodb.innodb_bug56143 fails under valgrind
890 	for an example */
891 # define SYNC_ARRAY_TIMEOUT	2400
892 	fatal_timeout *= 10;
893 #else
894 # define SYNC_ARRAY_TIMEOUT	240
895 #endif
896 	const time_t now = time(NULL);
897 
898 	for (ulint i = 0; i < arr->n_cells; i++) {
899 
900 		sync_cell_t*	cell;
901 		void*		latch;
902 
903 		cell = sync_array_get_nth_cell(arr, i);
904 
905 		latch = cell->latch.mutex;
906 
907 		if (latch == NULL || !cell->waiting) {
908 
909 			continue;
910 		}
911 
912 		double	diff = difftime(now, cell->reservation_time);
913 
914 		if (diff > SYNC_ARRAY_TIMEOUT) {
915 			ib::warn() << "A long semaphore wait:";
916 			sync_array_cell_print(stderr, cell);
917 			*noticed = TRUE;
918 		}
919 
920 		if (diff > fatal_timeout) {
921 			fatal = TRUE;
922 		}
923 
924 		if (diff > longest_diff) {
925 			longest_diff = diff;
926 			*sema = latch;
927 			*waiter = cell->thread_id;
928 		}
929 	}
930 
931 	/* We found a long semaphore wait, print all threads that are
932 	waiting for a semaphore. */
933 	if (*noticed) {
934 		for (i = 0; i < arr->n_cells; i++) {
935 			void*	wait_object;
936 			sync_cell_t*	cell;
937 
938 			cell = sync_array_get_nth_cell(arr, i);
939 
940 			wait_object = cell->latch.mutex;
941 
942 			if (wait_object == NULL || !cell->waiting) {
943 
944 				continue;
945 			}
946 
947 			ib::info() << "A semaphore wait:";
948 			sync_array_cell_print(stderr, cell);
949 		}
950 	}
951 
952 #undef SYNC_ARRAY_TIMEOUT
953 
954 	return(fatal);
955 }
956 
957 /**********************************************************************//**
958 Prints warnings of long semaphore waits to stderr.
959 @return TRUE if fatal semaphore wait threshold was exceeded */
960 ibool
sync_array_print_long_waits(os_thread_id_t * waiter,const void ** sema)961 sync_array_print_long_waits(
962 /*========================*/
963 	os_thread_id_t*	waiter,	/*!< out: longest waiting thread */
964 	const void**	sema)	/*!< out: longest-waited-for semaphore */
965 {
966 	ulint		i;
967 	ibool		fatal = FALSE;
968 	ibool		noticed = FALSE;
969 
970 	for (i = 0; i < sync_array_size; ++i) {
971 
972 		sync_array_t*	arr = sync_wait_array[i];
973 
974 		sync_array_enter(arr);
975 
976 		if (sync_array_print_long_waits_low(
977 				arr, waiter, sema, &noticed)) {
978 
979 			fatal = TRUE;
980 		}
981 
982 		sync_array_exit(arr);
983 	}
984 
985 	if (noticed) {
986 		/* If some crucial semaphore is reserved, then also the InnoDB
987 		Monitor can hang, and we do not get diagnostics. Since in
988 		many cases an InnoDB hang is caused by a pwrite() or a pread()
989 		call hanging inside the operating system, let us print right
990 		now the values of pending calls of these. */
991 
992 		fprintf(stderr,
993 			"InnoDB: Pending reads " UINT64PF
994 			", writes " UINT64PF "\n",
995 			MONITOR_VALUE(MONITOR_OS_PENDING_READS),
996 			MONITOR_VALUE(MONITOR_OS_PENDING_WRITES));
997 
998 		lock_wait_timeout_task(nullptr);
999 	}
1000 
1001 	return(fatal);
1002 }
1003 
1004 /**********************************************************************//**
1005 Prints info of the wait array. */
1006 static
1007 void
sync_array_print_info_low(FILE * file,sync_array_t * arr)1008 sync_array_print_info_low(
1009 /*======================*/
1010 	FILE*		file,	/*!< in: file where to print */
1011 	sync_array_t*	arr)	/*!< in: wait array */
1012 {
1013 	ulint		i;
1014 	ulint		count = 0;
1015 
1016 	fprintf(file,
1017 		"OS WAIT ARRAY INFO: reservation count " ULINTPF "\n",
1018 		arr->res_count);
1019 
1020 	for (i = 0; count < arr->n_reserved; ++i) {
1021 		sync_cell_t*	cell;
1022 
1023 		cell = sync_array_get_nth_cell(arr, i);
1024 
1025 		if (cell->latch.mutex != 0) {
1026 			count++;
1027 			sync_array_cell_print(file, cell);
1028 		}
1029 	}
1030 }
1031 
1032 /**********************************************************************//**
1033 Prints info of the wait array. */
1034 static
1035 void
sync_array_print_info(FILE * file,sync_array_t * arr)1036 sync_array_print_info(
1037 /*==================*/
1038 	FILE*		file,	/*!< in: file where to print */
1039 	sync_array_t*	arr)	/*!< in: wait array */
1040 {
1041 	sync_array_enter(arr);
1042 
1043 	sync_array_print_info_low(file, arr);
1044 
1045 	sync_array_exit(arr);
1046 }
1047 
1048 /** Create the primary system wait arrays */
sync_array_init()1049 void sync_array_init()
1050 {
1051 	ut_a(sync_wait_array == NULL);
1052 	ut_a(srv_sync_array_size > 0);
1053 	ut_a(srv_max_n_threads > 0);
1054 
1055 	sync_array_size = srv_sync_array_size;
1056 
1057 	sync_wait_array = UT_NEW_ARRAY_NOKEY(sync_array_t*, sync_array_size);
1058 
1059 	ulint	n_slots = 1 + (srv_max_n_threads - 1) / sync_array_size;
1060 
1061 	for (ulint i = 0; i < sync_array_size; ++i) {
1062 
1063 		sync_wait_array[i] = UT_NEW_NOKEY(sync_array_t(n_slots));
1064 	}
1065 }
1066 
1067 /** Destroy the sync array wait sub-system. */
sync_array_close()1068 void sync_array_close()
1069 {
1070 	for (ulint i = 0; i < sync_array_size; ++i) {
1071 		sync_array_free(sync_wait_array[i]);
1072 	}
1073 
1074 	UT_DELETE_ARRAY(sync_wait_array);
1075 	sync_wait_array = NULL;
1076 }
1077 
1078 /**********************************************************************//**
1079 Print info about the sync array(s). */
1080 void
sync_array_print(FILE * file)1081 sync_array_print(
1082 /*=============*/
1083 	FILE*		file)		/*!< in/out: Print to this stream */
1084 {
1085 	for (ulint i = 0; i < sync_array_size; ++i) {
1086 		sync_array_print_info(file, sync_wait_array[i]);
1087 	}
1088 
1089 	fprintf(file,
1090 		"OS WAIT ARRAY INFO: signal count " ULINTPF "\n", sg_count);
1091 
1092 }
1093 
1094 /**********************************************************************//**
1095 Prints info of the wait array without using any mutexes/semaphores. */
1096 UNIV_INTERN
1097 void
sync_array_print_innodb(void)1098 sync_array_print_innodb(void)
1099 /*=========================*/
1100 {
1101 	ulint i;
1102 	sync_array_t*	arr = sync_array_get();
1103 
1104 	fputs("InnoDB: Semaphore wait debug output started for InnoDB:\n", stderr);
1105 
1106 	for (i = 0; i < arr->n_cells; i++) {
1107 		void*	wait_object;
1108 		sync_cell_t*	cell;
1109 
1110 		cell = sync_array_get_nth_cell(arr, i);
1111 
1112 		wait_object = cell->latch.mutex;
1113 
1114 		if (wait_object == NULL || !cell->waiting) {
1115 
1116 			continue;
1117 		}
1118 
1119 		fputs("InnoDB: Warning: semaphore wait:\n",
1120 			      stderr);
1121 		sync_array_cell_print(stderr, cell);
1122 	}
1123 
1124 	fputs("InnoDB: Semaphore wait debug output ended:\n", stderr);
1125 
1126 }
1127 
1128 /**********************************************************************//**
1129 Get number of items on sync array. */
1130 UNIV_INTERN
1131 ulint
sync_arr_get_n_items(void)1132 sync_arr_get_n_items(void)
1133 /*======================*/
1134 {
1135 	sync_array_t*	sync_arr = sync_array_get();
1136 	return (ulint) sync_arr->n_cells;
1137 }
1138 
1139 /******************************************************************//**
1140 Get specified item from sync array if it is reserved. Set given
1141 pointer to array item if it is reserved.
1142 @return true if item is reserved, false othervise */
1143 UNIV_INTERN
1144 ibool
sync_arr_get_item(ulint i,sync_cell_t ** cell)1145 sync_arr_get_item(
1146 /*==============*/
1147 	ulint		i,		/*!< in: requested item */
1148 	sync_cell_t	**cell)		/*!< out: cell contents if item
1149 					reserved */
1150 {
1151 	sync_array_t*	sync_arr;
1152 	sync_cell_t*	wait_cell;
1153 	void*		wait_object;
1154 	ibool		found = FALSE;
1155 
1156 	sync_arr = sync_array_get();
1157 	wait_cell = sync_array_get_nth_cell(sync_arr, i);
1158 
1159 	if (wait_cell) {
1160 		wait_object = wait_cell->latch.mutex;
1161 
1162 		if(wait_object != NULL && wait_cell->waiting) {
1163 			found = TRUE;
1164 			*cell = wait_cell;
1165 		}
1166 	}
1167 
1168 	return found;
1169 }
1170 
1171 /*******************************************************************//**
1172 Function to populate INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
1173 Loop through each item on sync array, and extract the column
1174 information and fill the INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
1175 @return 0 on success */
1176 UNIV_INTERN
1177 int
sync_arr_fill_sys_semphore_waits_table(THD * thd,TABLE_LIST * tables,Item *)1178 sync_arr_fill_sys_semphore_waits_table(
1179 /*===================================*/
1180 	THD*		thd,	/*!< in: thread */
1181 	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
1182 	Item*		)	/*!< in: condition (not used) */
1183 {
1184 	Field**		fields;
1185 	ulint		n_items;
1186 
1187 	DBUG_ENTER("i_s_sys_semaphore_waits_fill_table");
1188 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
1189 
1190 	/* deny access to user without PROCESS_ACL privilege */
1191 	if (check_global_access(thd, PROCESS_ACL)) {
1192 		DBUG_RETURN(0);
1193 	}
1194 
1195 	fields = tables->table->field;
1196 	n_items = sync_arr_get_n_items();
1197 	ulint type;
1198 
1199 	for(ulint i=0; i < n_items;i++) {
1200 		sync_cell_t *cell=NULL;
1201 		if (sync_arr_get_item(i, &cell)) {
1202 			WaitMutex* mutex;
1203 			type = cell->request_type;
1204 			/* JAN: FIXME
1205 			OK(fields[SYS_SEMAPHORE_WAITS_THREAD_ID]->store(,
1206 			ulint(cell->thread), true));
1207 			*/
1208 			OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_FILE], innobase_basename(cell->file)));
1209 			OK(fields[SYS_SEMAPHORE_WAITS_LINE]->store(cell->line, true));
1210 			fields[SYS_SEMAPHORE_WAITS_LINE]->set_notnull();
1211 			OK(fields[SYS_SEMAPHORE_WAITS_WAIT_TIME]->store(
1212 				   difftime(time(NULL),
1213 					    cell->reservation_time)));
1214 
1215 			if (type == SYNC_MUTEX) {
1216 				mutex = static_cast<WaitMutex*>(cell->latch.mutex);
1217 
1218 				if (mutex) {
1219 					// JAN: FIXME
1220 					// OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], mutex->cmutex_name));
1221 					OK(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT]->store((longlong)mutex, true));
1222 					OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "MUTEX"));
1223 					//OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID]->store(mutex->thread_id, true));
1224 					//OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(mutex->file_name)));
1225 					//OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->store(mutex->line, true));
1226 					//fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->set_notnull();
1227 					//OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_CREATED_FILE], innobase_basename(mutex->cfile_name)));
1228 					//OK(fields[SYS_SEMAPHORE_WAITS_CREATED_LINE]->store(mutex->cline, true));
1229 					//fields[SYS_SEMAPHORE_WAITS_CREATED_LINE]->set_notnull();
1230 					//OK(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG]->store(mutex->waiters, true));
1231 					//OK(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD]->store(mutex->lock_word, true));
1232 					//OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(mutex->file_name)));
1233 					//OK(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->store(mutex->line, true));
1234 					//fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->set_notnull();
1235 					//OK(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT]->store(mutex->count_os_wait, true));
1236 				}
1237 			} else if (type == RW_LOCK_X_WAIT
1238 				|| type == RW_LOCK_X
1239 				|| type == RW_LOCK_SX
1240 			        || type == RW_LOCK_S) {
1241 				rw_lock_t* rwlock=NULL;
1242 
1243 				rwlock = static_cast<rw_lock_t *> (cell->latch.lock);
1244 
1245 				if (rwlock) {
1246 					ulint writer = rw_lock_get_writer(rwlock);
1247 
1248 					OK(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT]->store((longlong)rwlock, true));
1249 					if (type == RW_LOCK_X) {
1250 						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X"));
1251 					} else if (type == RW_LOCK_X_WAIT) {
1252 						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X_WAIT"));
1253 					} else if (type == RW_LOCK_S) {
1254 						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_S"));
1255 					} else if (type == RW_LOCK_SX) {
1256 						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_SX"));
1257 					}
1258 
1259 					if (writer != RW_LOCK_NOT_LOCKED) {
1260 						// JAN: FIXME
1261 						// OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], rwlock->lock_name));
1262 						OK(fields[SYS_SEMAPHORE_WAITS_WRITER_THREAD]->store(ulint(rwlock->writer_thread), true));
1263 
1264 						if (writer == RW_LOCK_X) {
1265 							OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X"));
1266 						} else if (writer == RW_LOCK_X_WAIT) {
1267 							OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X_WAIT"));
1268 						} else if (type == RW_LOCK_SX) {
1269 							OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_SX"));
1270 						}
1271 
1272 						//OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID]->store(rwlock->thread_id, true));
1273 						//OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(rwlock->file_name)));
1274 						//OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->store(rwlock->line, true));
1275 						//fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->set_notnull();
1276 						OK(fields[SYS_SEMAPHORE_WAITS_READERS]->store(rw_lock_get_reader_count(rwlock), true));
1277 						OK(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG]->store(
1278 							   rwlock->waiters,
1279 							   true));
1280 						OK(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD]->store(
1281 							   rwlock->lock_word,
1282 							   true));
1283 						OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(rwlock->last_x_file_name)));
1284 						OK(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->store(rwlock->last_x_line, true));
1285 						fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->set_notnull();
1286 						OK(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT]->store(rwlock->count_os_wait, true));
1287 					}
1288 				}
1289 			}
1290 
1291 			OK(schema_table_store_record(thd, tables->table));
1292 		}
1293 	}
1294 
1295 	DBUG_RETURN(0);
1296 }
1297