1 /*****************************************************************************
2
3 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2008, Google Inc.
5 Copyright (c) 2013, 2020, MariaDB Corporation.
6
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12
13 This program is free software; you can redistribute it and/or modify it under
14 the terms of the GNU General Public License as published by the Free Software
15 Foundation; version 2 of the License.
16
17 This program is distributed in the hope that it will be useful, but WITHOUT
18 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file sync/sync0arr.cc
29 The wait array used in synchronization primitives
30
31 Created 9/5/1995 Heikki Tuuri
32 *******************************************************/
33
34 #include "sync0arr.h"
35 #include <mysqld_error.h>
36 #include <mysql/plugin.h>
37 #include <hash.h>
38 #include <myisampack.h>
39 #include <sql_acl.h>
40 #include <mysys_err.h>
41 #include <my_sys.h>
42 #include "srv0srv.h"
43 #include "srv0start.h"
44 #include "i_s.h"
45 #include <sql_plugin.h>
46 #include <innodb_priv.h>
47
48 #include "lock0lock.h"
49 #include "sync0rw.h"
50
51 /*
52 WAIT ARRAY
53 ==========
54
55 The wait array consists of cells each of which has an an event object created
56 for it. The threads waiting for a mutex, for example, can reserve a cell
57 in the array and suspend themselves to wait for the event to become signaled.
58 When using the wait array, remember to make sure that some thread holding
59 the synchronization object will eventually know that there is a waiter in
60 the array and signal the object, to prevent infinite wait. Why we chose
61 to implement a wait array? First, to make mutexes fast, we had to code
62 our own implementation of them, which only in usually uncommon cases
63 resorts to using slow operating system primitives. Then we had the choice of
64 assigning a unique OS event for each mutex, which would be simpler, or
65 using a global wait array. In some operating systems, the global wait
66 array solution is more efficient and flexible, because we can do with
67 a very small number of OS events, say 200. In NT 3.51, allocating events
68 seems to be a quadratic algorithm, because 10 000 events are created fast,
69 but 100 000 events takes a couple of minutes to create.
70
71 As of 5.0.30 the above mentioned design is changed. Since now OS can handle
72 millions of wait events efficiently, we no longer have this concept of each
73 cell of wait array having one event. Instead, now the event that a thread
74 wants to wait on is embedded in the wait object (mutex or rw_lock). We still
75 keep the global wait array for the sake of diagnostics and also to avoid
76 infinite wait The error_monitor thread scans the global wait array to signal
77 any waiting threads who have missed the signal. */
78
79 typedef TTASEventMutex<GenericPolicy> WaitMutex;
80
81 /** The latch types that use the sync array. */
82 union sync_object_t {
83
84 /** RW lock instance */
85 rw_lock_t* lock;
86
87 /** Mutex instance */
88 WaitMutex* mutex;
89 };
90
91 /** A cell where an individual thread may wait suspended until a resource
92 is released. The suspending is implemented using an operating system
93 event semaphore. */
94
95 struct sync_cell_t {
96 sync_object_t latch; /*!< pointer to the object the
97 thread is waiting for; if NULL
98 the cell is free for use */
99 ulint request_type; /*!< lock type requested on the
100 object */
101 const char* file; /*!< in debug version file where
102 requested */
103 ulint line; /*!< in debug version line where
104 requested, or ULINT_UNDEFINED */
105 os_thread_id_t thread_id; /*!< thread id of this waiting
106 thread */
107 bool waiting; /*!< TRUE if the thread has already
108 called sync_array_event_wait
109 on this cell */
110 int64_t signal_count; /*!< We capture the signal_count
111 of the latch when we
112 reset the event. This value is
113 then passed on to os_event_wait
114 and we wait only if the event
115 has not been signalled in the
116 period between the reset and
117 wait call. */
118 /** time(NULL) when the wait cell was reserved.
119 FIXME: sync_array_print_long_waits_low() may display bogus
120 warnings when the system time is adjusted to the past! */
121 time_t reservation_time;
122 };
123
124 /* NOTE: It is allowed for a thread to wait for an event allocated for
125 the array without owning the protecting mutex (depending on the case:
126 OS or database mutex), but all changes (set or reset) to the state of
127 the event must be made while owning the mutex. */
128
129 /** Synchronization array */
130 struct sync_array_t {
131
132 /** Constructor
133 Creates a synchronization wait array. It is protected by a mutex
134 which is automatically reserved when the functions operating on it
135 are called.
136 @param[in] num_cells Number of cells to create */
137 sync_array_t(ulint num_cells)
138 UNIV_NOTHROW;
139
140 /** Destructor */
141 ~sync_array_t()
142 UNIV_NOTHROW;
143
144 ulint n_reserved; /*!< number of currently reserved
145 cells in the wait array */
146 ulint n_cells; /*!< number of cells in the
147 wait array */
148 sync_cell_t* array; /*!< pointer to wait array */
149 SysMutex mutex; /*!< System mutex protecting the
150 data structure. As this data
151 structure is used in constructing
152 the database mutex, to prevent
153 infinite recursion in implementation,
154 we fall back to an OS mutex. */
155 ulint res_count; /*!< count of cell reservations
156 since creation of the array */
157 ulint next_free_slot; /*!< the next free cell in the array */
158 ulint first_free_slot;/*!< the last slot that was freed */
159 };
160
161 /** User configured sync array size */
162 ulong srv_sync_array_size = 1;
163
164 /** Locally stored copy of srv_sync_array_size */
165 ulint sync_array_size;
166
167 /** The global array of wait cells for implementation of the database's own
168 mutexes and read-write locks */
169 sync_array_t** sync_wait_array;
170
171 /** count of how many times an object has been signalled */
172 ulint sg_count;
173
174 #define sync_array_exit(a) mutex_exit(&(a)->mutex)
175 #define sync_array_enter(a) mutex_enter(&(a)->mutex)
176
177 #ifdef UNIV_DEBUG
178 /******************************************************************//**
179 This function is called only in the debug version. Detects a deadlock
180 of one or more threads because of waits of semaphores.
181 @return TRUE if deadlock detected */
182 static
183 bool
184 sync_array_detect_deadlock(
185 /*=======================*/
186 sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
187 own the mutex to array */
188 sync_cell_t* start, /*!< in: cell where recursive search started */
189 sync_cell_t* cell, /*!< in: cell to search */
190 ulint depth); /*!< in: recursion depth */
191 #endif /* UNIV_DEBUG */
192
193 /** Constructor
194 Creates a synchronization wait array. It is protected by a mutex
195 which is automatically reserved when the functions operating on it
196 are called.
197 @param[in] num_cells Number of cells to create */
sync_array_t(ulint num_cells)198 sync_array_t::sync_array_t(ulint num_cells)
199 UNIV_NOTHROW
200 :
201 n_reserved(),
202 n_cells(num_cells),
203 array(UT_NEW_ARRAY_NOKEY(sync_cell_t, num_cells)),
204 mutex(),
205 res_count(),
206 next_free_slot(),
207 first_free_slot(ULINT_UNDEFINED)
208 {
209 ut_a(num_cells > 0);
210
211 memset(array, 0x0, sizeof(sync_cell_t) * n_cells);
212
213 /* Then create the mutex to protect the wait array */
214 mutex_create(LATCH_ID_SYNC_ARRAY_MUTEX, &mutex);
215 }
216
217 /** Validate the integrity of the wait array. Check
218 that the number of reserved cells equals the count variable.
219 @param[in,out] arr sync wait array */
220 static
221 void
sync_array_validate(sync_array_t * arr)222 sync_array_validate(sync_array_t* arr)
223 {
224 ulint i;
225 ulint count = 0;
226
227 sync_array_enter(arr);
228
229 for (i = 0; i < arr->n_cells; i++) {
230 sync_cell_t* cell;
231
232 cell = sync_array_get_nth_cell(arr, i);
233
234 if (cell->latch.mutex != NULL) {
235 count++;
236 }
237 }
238
239 ut_a(count == arr->n_reserved);
240
241 sync_array_exit(arr);
242 }
243
244 /** Destructor */
~sync_array_t()245 sync_array_t::~sync_array_t()
246 UNIV_NOTHROW
247 {
248 ut_a(n_reserved == 0);
249
250 sync_array_validate(this);
251
252 /* Release the mutex protecting the wait array */
253
254 mutex_free(&mutex);
255
256 UT_DELETE_ARRAY(array);
257 }
258
259 /*****************************************************************//**
260 Gets the nth cell in array.
261 @return cell */
262 UNIV_INTERN
263 sync_cell_t*
sync_array_get_nth_cell(sync_array_t * arr,ulint n)264 sync_array_get_nth_cell(
265 /*====================*/
266 sync_array_t* arr, /*!< in: sync array */
267 ulint n) /*!< in: index */
268 {
269 ut_a(n < arr->n_cells);
270
271 return(arr->array + n);
272 }
273
274 /******************************************************************//**
275 Frees the resources in a wait array. */
276 static
277 void
sync_array_free(sync_array_t * arr)278 sync_array_free(
279 /*============*/
280 sync_array_t* arr) /*!< in, own: sync wait array */
281 {
282 UT_DELETE(arr);
283 }
284
285 /*******************************************************************//**
286 Returns the event that the thread owning the cell waits for. */
287 static
288 os_event_t
sync_cell_get_event(sync_cell_t * cell)289 sync_cell_get_event(
290 /*================*/
291 sync_cell_t* cell) /*!< in: non-empty sync array cell */
292 {
293 switch(cell->request_type) {
294 case SYNC_MUTEX:
295 return(cell->latch.mutex->event());
296 case RW_LOCK_X_WAIT:
297 return(cell->latch.lock->wait_ex_event);
298 default:
299 return(cell->latch.lock->event);
300 }
301 }
302
303 /******************************************************************//**
304 Reserves a wait array cell for waiting for an object.
305 The event of the cell is reset to nonsignalled state.
306 @return sync cell to wait on */
307 sync_cell_t*
sync_array_reserve_cell(sync_array_t * arr,void * object,ulint type,const char * file,unsigned line)308 sync_array_reserve_cell(
309 /*====================*/
310 sync_array_t* arr, /*!< in: wait array */
311 void* object, /*!< in: pointer to the object to wait for */
312 ulint type, /*!< in: lock request type */
313 const char* file, /*!< in: file where requested */
314 unsigned line) /*!< in: line where requested */
315 {
316 sync_cell_t* cell;
317
318 sync_array_enter(arr);
319
320 if (arr->first_free_slot != ULINT_UNDEFINED) {
321 /* Try and find a slot in the free list */
322 ut_ad(arr->first_free_slot < arr->next_free_slot);
323 cell = sync_array_get_nth_cell(arr, arr->first_free_slot);
324 arr->first_free_slot = cell->line;
325 } else if (arr->next_free_slot < arr->n_cells) {
326 /* Try and find a slot after the currently allocated slots */
327 cell = sync_array_get_nth_cell(arr, arr->next_free_slot);
328 ++arr->next_free_slot;
329 } else {
330 sync_array_exit(arr);
331
332 // We should return NULL and if there is more than
333 // one sync array, try another sync array instance.
334 return(NULL);
335 }
336
337 ++arr->res_count;
338
339 ut_ad(arr->n_reserved < arr->n_cells);
340 ut_ad(arr->next_free_slot <= arr->n_cells);
341
342 ++arr->n_reserved;
343
344 /* Reserve the cell. */
345 ut_ad(cell->latch.mutex == NULL);
346
347 cell->request_type = type;
348
349 if (cell->request_type == SYNC_MUTEX) {
350 cell->latch.mutex = reinterpret_cast<WaitMutex*>(object);
351 } else {
352 cell->latch.lock = reinterpret_cast<rw_lock_t*>(object);
353 }
354
355 cell->waiting = false;
356
357 cell->file = file;
358 cell->line = line;
359
360 sync_array_exit(arr);
361
362 cell->thread_id = os_thread_get_curr_id();
363
364 cell->reservation_time = time(NULL);
365
366 /* Make sure the event is reset and also store the value of
367 signal_count at which the event was reset. */
368 os_event_t event = sync_cell_get_event(cell);
369 cell->signal_count = os_event_reset(event);
370
371 return(cell);
372 }
373
374 /******************************************************************//**
375 Frees the cell. NOTE! sync_array_wait_event frees the cell
376 automatically! */
377 void
sync_array_free_cell(sync_array_t * arr,sync_cell_t * & cell)378 sync_array_free_cell(
379 /*=================*/
380 sync_array_t* arr, /*!< in: wait array */
381 sync_cell_t*& cell) /*!< in/out: the cell in the array */
382 {
383 sync_array_enter(arr);
384
385 ut_a(cell->latch.mutex != NULL);
386
387 cell->waiting = false;
388 cell->signal_count = 0;
389 cell->latch.mutex = NULL;
390
391 /* Setup the list of free slots in the array */
392 cell->line = arr->first_free_slot;
393
394 arr->first_free_slot = cell - arr->array;
395
396 ut_a(arr->n_reserved > 0);
397 arr->n_reserved--;
398
399 if (arr->next_free_slot > arr->n_cells / 2 && arr->n_reserved == 0) {
400 #ifdef UNIV_DEBUG
401 for (ulint i = 0; i < arr->next_free_slot; ++i) {
402 cell = sync_array_get_nth_cell(arr, i);
403
404 ut_ad(!cell->waiting);
405 ut_ad(cell->latch.mutex == 0);
406 ut_ad(cell->signal_count == 0);
407 }
408 #endif /* UNIV_DEBUG */
409 arr->next_free_slot = 0;
410 arr->first_free_slot = ULINT_UNDEFINED;
411 }
412 sync_array_exit(arr);
413
414 cell = 0;
415 }
416
417 /******************************************************************//**
418 This function should be called when a thread starts to wait on
419 a wait array cell. In the debug version this function checks
420 if the wait for a semaphore will result in a deadlock, in which
421 case prints info and asserts. */
422 void
sync_array_wait_event(sync_array_t * arr,sync_cell_t * & cell)423 sync_array_wait_event(
424 /*==================*/
425 sync_array_t* arr, /*!< in: wait array */
426 sync_cell_t*& cell) /*!< in: index of the reserved cell */
427 {
428 sync_array_enter(arr);
429
430 ut_ad(!cell->waiting);
431 ut_ad(cell->latch.mutex);
432 ut_ad(os_thread_get_curr_id() == cell->thread_id);
433
434 cell->waiting = true;
435
436 #ifdef UNIV_DEBUG
437
438 /* We use simple enter to the mutex below, because if
439 we cannot acquire it at once, mutex_enter would call
440 recursively sync_array routines, leading to trouble.
441 rw_lock_debug_mutex freezes the debug lists. */
442
443 rw_lock_debug_mutex_enter();
444
445 if (sync_array_detect_deadlock(arr, cell, cell, 0)) {
446
447 ib::fatal() << "########################################"
448 " Deadlock Detected!";
449 }
450
451 rw_lock_debug_mutex_exit();
452 #endif /* UNIV_DEBUG */
453 sync_array_exit(arr);
454
455 tpool::tpool_wait_begin();
456 os_event_wait_low(sync_cell_get_event(cell), cell->signal_count);
457 tpool::tpool_wait_end();
458
459 sync_array_free_cell(arr, cell);
460
461 cell = 0;
462 }
463
464 /******************************************************************//**
465 Reports info of a wait array cell. */
466 static
467 void
sync_array_cell_print(FILE * file,sync_cell_t * cell)468 sync_array_cell_print(
469 /*==================*/
470 FILE* file, /*!< in: file where to print */
471 sync_cell_t* cell) /*!< in: sync cell */
472 {
473 rw_lock_t* rwlock;
474 ulint type;
475 ulint writer;
476
477 type = cell->request_type;
478
479 fprintf(file,
480 "--Thread " ULINTPF " has waited at %s line " ULINTPF
481 " for %.2f seconds the semaphore:\n",
482 ulint(cell->thread_id),
483 innobase_basename(cell->file), cell->line,
484 difftime(time(NULL), cell->reservation_time));
485
486 switch (type) {
487 default:
488 ut_error;
489 case RW_LOCK_X:
490 case RW_LOCK_X_WAIT:
491 case RW_LOCK_SX:
492 case RW_LOCK_S:
493 fputs(type == RW_LOCK_X ? "X-lock on"
494 : type == RW_LOCK_X_WAIT ? "X-lock (wait_ex) on"
495 : type == RW_LOCK_SX ? "SX-lock on"
496 : "S-lock on", file);
497
498 rwlock = cell->latch.lock;
499
500 if (rwlock) {
501 fprintf(file,
502 " RW-latch at %p created in file %s line %u\n",
503 (void*) rwlock, innobase_basename(rwlock->cfile_name),
504 rwlock->cline);
505
506 writer = rw_lock_get_writer(rwlock);
507
508 if (writer != RW_LOCK_NOT_LOCKED) {
509
510 fprintf(file,
511 "a writer (thread id " ULINTPF ") has"
512 " reserved it in mode %s",
513 ulint(rwlock->writer_thread),
514 writer == RW_LOCK_X ? " exclusive\n"
515 : writer == RW_LOCK_SX ? " SX\n"
516 : " wait exclusive\n");
517 }
518
519 fprintf(file,
520 "number of readers " ULINTPF
521 ", waiters flag %d, "
522 "lock_word: %x\n"
523 "Last time write locked in file %s line %u"
524 #if 0 /* JAN: TODO: FIX LATER */
525 "\nHolder thread " ULINTPF
526 " file %s line " ULINTPF
527 #endif
528 "\n",
529 rw_lock_get_reader_count(rwlock),
530 uint32_t{rwlock->waiters},
531 int32_t{rwlock->lock_word},
532 innobase_basename(rwlock->last_x_file_name),
533 rwlock->last_x_line
534 #if 0 /* JAN: TODO: FIX LATER */
535 , ulint(rwlock->thread_id),
536 innobase_basename(rwlock->file_name),
537 rwlock->line
538 #endif
539 );
540 }
541 break;
542 case SYNC_MUTEX:
543 WaitMutex* mutex = cell->latch.mutex;
544 const WaitMutex::MutexPolicy& policy = mutex->policy();
545 #ifdef UNIV_DEBUG
546 const char* name = policy.context.get_enter_filename();
547 if (name == NULL) {
548 /* The mutex might have been released. */
549 name = "NULL";
550 }
551 #endif /* UNIV_DEBUG */
552
553 if (mutex) {
554 fprintf(file,
555 "Mutex at %p, %s, lock var %x\n"
556 #ifdef UNIV_DEBUG
557 "Last time reserved in file %s line %u"
558 #endif /* UNIV_DEBUG */
559 "\n",
560 (void*) mutex,
561 policy.to_string().c_str(),
562 mutex->state()
563 #ifdef UNIV_DEBUG
564 ,name,
565 policy.context.get_enter_line()
566 #endif /* UNIV_DEBUG */
567 );
568 }
569 break;
570 }
571
572 if (!cell->waiting) {
573 fputs("wait has ended\n", file);
574 }
575 }
576
577 #ifdef UNIV_DEBUG
578 /******************************************************************//**
579 Looks for a cell with the given thread id.
580 @return pointer to cell or NULL if not found */
581 static
582 sync_cell_t*
sync_array_find_thread(sync_array_t * arr,os_thread_id_t thread)583 sync_array_find_thread(
584 /*===================*/
585 sync_array_t* arr, /*!< in: wait array */
586 os_thread_id_t thread) /*!< in: thread id */
587 {
588 ulint i;
589
590 for (i = 0; i < arr->n_cells; i++) {
591 sync_cell_t* cell;
592
593 cell = sync_array_get_nth_cell(arr, i);
594
595 if (cell->latch.mutex != NULL
596 && os_thread_eq(cell->thread_id, thread)) {
597
598 return(cell); /* Found */
599 }
600 }
601
602 return(NULL); /* Not found */
603 }
604
605 /******************************************************************//**
606 Recursion step for deadlock detection.
607 @return TRUE if deadlock detected */
608 static
609 ibool
sync_array_deadlock_step(sync_array_t * arr,sync_cell_t * start,os_thread_id_t thread,ulint pass,ulint depth)610 sync_array_deadlock_step(
611 /*=====================*/
612 sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
613 own the mutex to array */
614 sync_cell_t* start, /*!< in: cell where recursive search
615 started */
616 os_thread_id_t thread, /*!< in: thread to look at */
617 ulint pass, /*!< in: pass value */
618 ulint depth) /*!< in: recursion depth */
619 {
620 sync_cell_t* new_cell;
621
622 if (pass != 0) {
623 /* If pass != 0, then we do not know which threads are
624 responsible of releasing the lock, and no deadlock can
625 be detected. */
626
627 return(FALSE);
628 }
629
630 new_cell = sync_array_find_thread(arr, thread);
631
632 if (new_cell == start) {
633 /* Deadlock */
634 fputs("########################################\n"
635 "DEADLOCK of threads detected!\n", stderr);
636
637 return(TRUE);
638
639 } else if (new_cell) {
640 return(sync_array_detect_deadlock(
641 arr, start, new_cell, depth + 1));
642 }
643 return(FALSE);
644 }
645
646 /**
647 Report an error to stderr.
648 @param lock rw-lock instance
649 @param debug rw-lock debug information
650 @param cell thread context */
651 static
652 void
sync_array_report_error(rw_lock_t * lock,rw_lock_debug_t * debug,sync_cell_t * cell)653 sync_array_report_error(
654 rw_lock_t* lock,
655 rw_lock_debug_t* debug,
656 sync_cell_t* cell)
657 {
658 fprintf(stderr, "rw-lock %p ", (void*) lock);
659 sync_array_cell_print(stderr, cell);
660 rw_lock_debug_print(stderr, debug);
661 }
662
663 /******************************************************************//**
664 This function is called only in the debug version. Detects a deadlock
665 of one or more threads because of waits of semaphores.
666 @return TRUE if deadlock detected */
667 static
668 bool
sync_array_detect_deadlock(sync_array_t * arr,sync_cell_t * start,sync_cell_t * cell,ulint depth)669 sync_array_detect_deadlock(
670 /*=======================*/
671 sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
672 own the mutex to array */
673 sync_cell_t* start, /*!< in: cell where recursive search started */
674 sync_cell_t* cell, /*!< in: cell to search */
675 ulint depth) /*!< in: recursion depth */
676 {
677 rw_lock_t* lock;
678 os_thread_id_t thread;
679 ibool ret;
680 rw_lock_debug_t*debug;
681
682 ut_a(arr);
683 ut_a(start);
684 ut_a(cell);
685 ut_ad(cell->latch.mutex != 0);
686 ut_ad(os_thread_get_curr_id() == start->thread_id);
687 ut_ad(depth < 100);
688
689 depth++;
690
691 if (!cell->waiting) {
692 /* No deadlock here */
693 return(false);
694 }
695
696 switch (cell->request_type) {
697 case SYNC_MUTEX: {
698
699 WaitMutex* mutex = cell->latch.mutex;
700 const WaitMutex::MutexPolicy& policy = mutex->policy();
701
702 if (mutex->state() != MUTEX_STATE_UNLOCKED) {
703 thread = policy.context.get_thread_id();
704
705 /* Note that mutex->thread_id above may be
706 also OS_THREAD_ID_UNDEFINED, because the
707 thread which held the mutex maybe has not
708 yet updated the value, or it has already
709 released the mutex: in this case no deadlock
710 can occur, as the wait array cannot contain
711 a thread with ID_UNDEFINED value. */
712 ret = sync_array_deadlock_step(
713 arr, start, thread, 0, depth);
714
715 if (ret) {
716 const char* name;
717
718 name = policy.context.get_enter_filename();
719
720 if (name == NULL) {
721 /* The mutex might have been
722 released. */
723 name = "NULL";
724 }
725
726 ib::info()
727 << "Mutex " << mutex << " owned by"
728 " thread " << thread
729 << " file " << name << " line "
730 << policy.context.get_enter_line();
731
732 sync_array_cell_print(stderr, cell);
733
734 return(true);
735 }
736 }
737
738 /* No deadlock */
739 return(false);
740 }
741
742 case RW_LOCK_X:
743 case RW_LOCK_X_WAIT:
744
745 lock = cell->latch.lock;
746
747 for (debug = UT_LIST_GET_FIRST(lock->debug_list);
748 debug != NULL;
749 debug = UT_LIST_GET_NEXT(list, debug)) {
750
751 thread = debug->thread_id;
752
753 switch (debug->lock_type) {
754 case RW_LOCK_X:
755 case RW_LOCK_SX:
756 case RW_LOCK_X_WAIT:
757 if (os_thread_eq(thread, cell->thread_id)) {
758 break;
759 }
760 /* fall through */
761 case RW_LOCK_S:
762
763 /* The (wait) x-lock request can block
764 infinitely only if someone (can be also cell
765 thread) is holding s-lock, or someone
766 (cannot be cell thread) (wait) x-lock or
767 sx-lock, and he is blocked by start thread */
768
769 ret = sync_array_deadlock_step(
770 arr, start, thread, debug->pass,
771 depth);
772
773 if (ret) {
774 sync_array_report_error(
775 lock, debug, cell);
776 rw_lock_debug_print(stderr, debug);
777 return(TRUE);
778 }
779 }
780 }
781
782 return(false);
783
784 case RW_LOCK_SX:
785
786 lock = cell->latch.lock;
787
788 for (debug = UT_LIST_GET_FIRST(lock->debug_list);
789 debug != 0;
790 debug = UT_LIST_GET_NEXT(list, debug)) {
791
792 thread = debug->thread_id;
793
794 switch (debug->lock_type) {
795 case RW_LOCK_X:
796 case RW_LOCK_SX:
797 case RW_LOCK_X_WAIT:
798
799 if (os_thread_eq(thread, cell->thread_id)) {
800 break;
801 }
802
803 /* The sx-lock request can block infinitely
804 only if someone (can be also cell thread) is
805 holding (wait) x-lock or sx-lock, and he is
806 blocked by start thread */
807
808 ret = sync_array_deadlock_step(
809 arr, start, thread, debug->pass,
810 depth);
811
812 if (ret) {
813 sync_array_report_error(
814 lock, debug, cell);
815 return(TRUE);
816 }
817 }
818 }
819
820 return(false);
821
822 case RW_LOCK_S:
823
824 lock = cell->latch.lock;
825
826 for (debug = UT_LIST_GET_FIRST(lock->debug_list);
827 debug != 0;
828 debug = UT_LIST_GET_NEXT(list, debug)) {
829
830 thread = debug->thread_id;
831
832 if (debug->lock_type == RW_LOCK_X
833 || debug->lock_type == RW_LOCK_X_WAIT) {
834
835 /* The s-lock request can block infinitely
836 only if someone (can also be cell thread) is
837 holding (wait) x-lock, and he is blocked by
838 start thread */
839
840 ret = sync_array_deadlock_step(
841 arr, start, thread, debug->pass,
842 depth);
843
844 if (ret) {
845 sync_array_report_error(
846 lock, debug, cell);
847 return(TRUE);
848 }
849 }
850 }
851
852 return(false);
853
854 default:
855 ut_error;
856 }
857
858 return(true);
859 }
860 #endif /* UNIV_DEBUG */
861
862 /**********************************************************************//**
863 Prints warnings of long semaphore waits to stderr.
864 @return TRUE if fatal semaphore wait threshold was exceeded */
865 static
866 bool
sync_array_print_long_waits_low(sync_array_t * arr,os_thread_id_t * waiter,const void ** sema,ibool * noticed)867 sync_array_print_long_waits_low(
868 /*============================*/
869 sync_array_t* arr, /*!< in: sync array instance */
870 os_thread_id_t* waiter, /*!< out: longest waiting thread */
871 const void** sema, /*!< out: longest-waited-for semaphore */
872 ibool* noticed)/*!< out: TRUE if long wait noticed */
873 {
874 double fatal_timeout = static_cast<double>(
875 srv_fatal_semaphore_wait_threshold);
876 ibool fatal = FALSE;
877 double longest_diff = 0;
878 ulint i;
879
880 /* For huge tables, skip the check during CHECK TABLE etc... */
881 if (btr_validate_index_running) {
882 return(false);
883 }
884
885 #if defined HAVE_valgrind && !__has_feature(memory_sanitizer)
886 /* Increase the timeouts if running under valgrind because it executes
887 extremely slowly. HAVE_valgrind does not necessary mean that
888 we are running under valgrind but we have no better way to tell.
889 See Bug#58432 innodb.innodb_bug56143 fails under valgrind
890 for an example */
891 # define SYNC_ARRAY_TIMEOUT 2400
892 fatal_timeout *= 10;
893 #else
894 # define SYNC_ARRAY_TIMEOUT 240
895 #endif
896 const time_t now = time(NULL);
897
898 for (ulint i = 0; i < arr->n_cells; i++) {
899
900 sync_cell_t* cell;
901 void* latch;
902
903 cell = sync_array_get_nth_cell(arr, i);
904
905 latch = cell->latch.mutex;
906
907 if (latch == NULL || !cell->waiting) {
908
909 continue;
910 }
911
912 double diff = difftime(now, cell->reservation_time);
913
914 if (diff > SYNC_ARRAY_TIMEOUT) {
915 ib::warn() << "A long semaphore wait:";
916 sync_array_cell_print(stderr, cell);
917 *noticed = TRUE;
918 }
919
920 if (diff > fatal_timeout) {
921 fatal = TRUE;
922 }
923
924 if (diff > longest_diff) {
925 longest_diff = diff;
926 *sema = latch;
927 *waiter = cell->thread_id;
928 }
929 }
930
931 /* We found a long semaphore wait, print all threads that are
932 waiting for a semaphore. */
933 if (*noticed) {
934 for (i = 0; i < arr->n_cells; i++) {
935 void* wait_object;
936 sync_cell_t* cell;
937
938 cell = sync_array_get_nth_cell(arr, i);
939
940 wait_object = cell->latch.mutex;
941
942 if (wait_object == NULL || !cell->waiting) {
943
944 continue;
945 }
946
947 ib::info() << "A semaphore wait:";
948 sync_array_cell_print(stderr, cell);
949 }
950 }
951
952 #undef SYNC_ARRAY_TIMEOUT
953
954 return(fatal);
955 }
956
957 /**********************************************************************//**
958 Prints warnings of long semaphore waits to stderr.
959 @return TRUE if fatal semaphore wait threshold was exceeded */
960 ibool
sync_array_print_long_waits(os_thread_id_t * waiter,const void ** sema)961 sync_array_print_long_waits(
962 /*========================*/
963 os_thread_id_t* waiter, /*!< out: longest waiting thread */
964 const void** sema) /*!< out: longest-waited-for semaphore */
965 {
966 ulint i;
967 ibool fatal = FALSE;
968 ibool noticed = FALSE;
969
970 for (i = 0; i < sync_array_size; ++i) {
971
972 sync_array_t* arr = sync_wait_array[i];
973
974 sync_array_enter(arr);
975
976 if (sync_array_print_long_waits_low(
977 arr, waiter, sema, ¬iced)) {
978
979 fatal = TRUE;
980 }
981
982 sync_array_exit(arr);
983 }
984
985 if (noticed) {
986 /* If some crucial semaphore is reserved, then also the InnoDB
987 Monitor can hang, and we do not get diagnostics. Since in
988 many cases an InnoDB hang is caused by a pwrite() or a pread()
989 call hanging inside the operating system, let us print right
990 now the values of pending calls of these. */
991
992 fprintf(stderr,
993 "InnoDB: Pending reads " UINT64PF
994 ", writes " UINT64PF "\n",
995 MONITOR_VALUE(MONITOR_OS_PENDING_READS),
996 MONITOR_VALUE(MONITOR_OS_PENDING_WRITES));
997
998 lock_wait_timeout_task(nullptr);
999 }
1000
1001 return(fatal);
1002 }
1003
1004 /**********************************************************************//**
1005 Prints info of the wait array. */
1006 static
1007 void
sync_array_print_info_low(FILE * file,sync_array_t * arr)1008 sync_array_print_info_low(
1009 /*======================*/
1010 FILE* file, /*!< in: file where to print */
1011 sync_array_t* arr) /*!< in: wait array */
1012 {
1013 ulint i;
1014 ulint count = 0;
1015
1016 fprintf(file,
1017 "OS WAIT ARRAY INFO: reservation count " ULINTPF "\n",
1018 arr->res_count);
1019
1020 for (i = 0; count < arr->n_reserved; ++i) {
1021 sync_cell_t* cell;
1022
1023 cell = sync_array_get_nth_cell(arr, i);
1024
1025 if (cell->latch.mutex != 0) {
1026 count++;
1027 sync_array_cell_print(file, cell);
1028 }
1029 }
1030 }
1031
1032 /**********************************************************************//**
1033 Prints info of the wait array. */
1034 static
1035 void
sync_array_print_info(FILE * file,sync_array_t * arr)1036 sync_array_print_info(
1037 /*==================*/
1038 FILE* file, /*!< in: file where to print */
1039 sync_array_t* arr) /*!< in: wait array */
1040 {
1041 sync_array_enter(arr);
1042
1043 sync_array_print_info_low(file, arr);
1044
1045 sync_array_exit(arr);
1046 }
1047
1048 /** Create the primary system wait arrays */
sync_array_init()1049 void sync_array_init()
1050 {
1051 ut_a(sync_wait_array == NULL);
1052 ut_a(srv_sync_array_size > 0);
1053 ut_a(srv_max_n_threads > 0);
1054
1055 sync_array_size = srv_sync_array_size;
1056
1057 sync_wait_array = UT_NEW_ARRAY_NOKEY(sync_array_t*, sync_array_size);
1058
1059 ulint n_slots = 1 + (srv_max_n_threads - 1) / sync_array_size;
1060
1061 for (ulint i = 0; i < sync_array_size; ++i) {
1062
1063 sync_wait_array[i] = UT_NEW_NOKEY(sync_array_t(n_slots));
1064 }
1065 }
1066
1067 /** Destroy the sync array wait sub-system. */
sync_array_close()1068 void sync_array_close()
1069 {
1070 for (ulint i = 0; i < sync_array_size; ++i) {
1071 sync_array_free(sync_wait_array[i]);
1072 }
1073
1074 UT_DELETE_ARRAY(sync_wait_array);
1075 sync_wait_array = NULL;
1076 }
1077
1078 /**********************************************************************//**
1079 Print info about the sync array(s). */
1080 void
sync_array_print(FILE * file)1081 sync_array_print(
1082 /*=============*/
1083 FILE* file) /*!< in/out: Print to this stream */
1084 {
1085 for (ulint i = 0; i < sync_array_size; ++i) {
1086 sync_array_print_info(file, sync_wait_array[i]);
1087 }
1088
1089 fprintf(file,
1090 "OS WAIT ARRAY INFO: signal count " ULINTPF "\n", sg_count);
1091
1092 }
1093
1094 /**********************************************************************//**
1095 Prints info of the wait array without using any mutexes/semaphores. */
1096 UNIV_INTERN
1097 void
sync_array_print_innodb(void)1098 sync_array_print_innodb(void)
1099 /*=========================*/
1100 {
1101 ulint i;
1102 sync_array_t* arr = sync_array_get();
1103
1104 fputs("InnoDB: Semaphore wait debug output started for InnoDB:\n", stderr);
1105
1106 for (i = 0; i < arr->n_cells; i++) {
1107 void* wait_object;
1108 sync_cell_t* cell;
1109
1110 cell = sync_array_get_nth_cell(arr, i);
1111
1112 wait_object = cell->latch.mutex;
1113
1114 if (wait_object == NULL || !cell->waiting) {
1115
1116 continue;
1117 }
1118
1119 fputs("InnoDB: Warning: semaphore wait:\n",
1120 stderr);
1121 sync_array_cell_print(stderr, cell);
1122 }
1123
1124 fputs("InnoDB: Semaphore wait debug output ended:\n", stderr);
1125
1126 }
1127
1128 /**********************************************************************//**
1129 Get number of items on sync array. */
1130 UNIV_INTERN
1131 ulint
sync_arr_get_n_items(void)1132 sync_arr_get_n_items(void)
1133 /*======================*/
1134 {
1135 sync_array_t* sync_arr = sync_array_get();
1136 return (ulint) sync_arr->n_cells;
1137 }
1138
1139 /******************************************************************//**
1140 Get specified item from sync array if it is reserved. Set given
1141 pointer to array item if it is reserved.
1142 @return true if item is reserved, false othervise */
1143 UNIV_INTERN
1144 ibool
sync_arr_get_item(ulint i,sync_cell_t ** cell)1145 sync_arr_get_item(
1146 /*==============*/
1147 ulint i, /*!< in: requested item */
1148 sync_cell_t **cell) /*!< out: cell contents if item
1149 reserved */
1150 {
1151 sync_array_t* sync_arr;
1152 sync_cell_t* wait_cell;
1153 void* wait_object;
1154 ibool found = FALSE;
1155
1156 sync_arr = sync_array_get();
1157 wait_cell = sync_array_get_nth_cell(sync_arr, i);
1158
1159 if (wait_cell) {
1160 wait_object = wait_cell->latch.mutex;
1161
1162 if(wait_object != NULL && wait_cell->waiting) {
1163 found = TRUE;
1164 *cell = wait_cell;
1165 }
1166 }
1167
1168 return found;
1169 }
1170
1171 /*******************************************************************//**
1172 Function to populate INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
1173 Loop through each item on sync array, and extract the column
1174 information and fill the INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
1175 @return 0 on success */
1176 UNIV_INTERN
1177 int
sync_arr_fill_sys_semphore_waits_table(THD * thd,TABLE_LIST * tables,Item *)1178 sync_arr_fill_sys_semphore_waits_table(
1179 /*===================================*/
1180 THD* thd, /*!< in: thread */
1181 TABLE_LIST* tables, /*!< in/out: tables to fill */
1182 Item* ) /*!< in: condition (not used) */
1183 {
1184 Field** fields;
1185 ulint n_items;
1186
1187 DBUG_ENTER("i_s_sys_semaphore_waits_fill_table");
1188 RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
1189
1190 /* deny access to user without PROCESS_ACL privilege */
1191 if (check_global_access(thd, PROCESS_ACL)) {
1192 DBUG_RETURN(0);
1193 }
1194
1195 fields = tables->table->field;
1196 n_items = sync_arr_get_n_items();
1197 ulint type;
1198
1199 for(ulint i=0; i < n_items;i++) {
1200 sync_cell_t *cell=NULL;
1201 if (sync_arr_get_item(i, &cell)) {
1202 WaitMutex* mutex;
1203 type = cell->request_type;
1204 /* JAN: FIXME
1205 OK(fields[SYS_SEMAPHORE_WAITS_THREAD_ID]->store(,
1206 ulint(cell->thread), true));
1207 */
1208 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_FILE], innobase_basename(cell->file)));
1209 OK(fields[SYS_SEMAPHORE_WAITS_LINE]->store(cell->line, true));
1210 fields[SYS_SEMAPHORE_WAITS_LINE]->set_notnull();
1211 OK(fields[SYS_SEMAPHORE_WAITS_WAIT_TIME]->store(
1212 difftime(time(NULL),
1213 cell->reservation_time)));
1214
1215 if (type == SYNC_MUTEX) {
1216 mutex = static_cast<WaitMutex*>(cell->latch.mutex);
1217
1218 if (mutex) {
1219 // JAN: FIXME
1220 // OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], mutex->cmutex_name));
1221 OK(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT]->store((longlong)mutex, true));
1222 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "MUTEX"));
1223 //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID]->store(mutex->thread_id, true));
1224 //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(mutex->file_name)));
1225 //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->store(mutex->line, true));
1226 //fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->set_notnull();
1227 //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_CREATED_FILE], innobase_basename(mutex->cfile_name)));
1228 //OK(fields[SYS_SEMAPHORE_WAITS_CREATED_LINE]->store(mutex->cline, true));
1229 //fields[SYS_SEMAPHORE_WAITS_CREATED_LINE]->set_notnull();
1230 //OK(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG]->store(mutex->waiters, true));
1231 //OK(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD]->store(mutex->lock_word, true));
1232 //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(mutex->file_name)));
1233 //OK(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->store(mutex->line, true));
1234 //fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->set_notnull();
1235 //OK(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT]->store(mutex->count_os_wait, true));
1236 }
1237 } else if (type == RW_LOCK_X_WAIT
1238 || type == RW_LOCK_X
1239 || type == RW_LOCK_SX
1240 || type == RW_LOCK_S) {
1241 rw_lock_t* rwlock=NULL;
1242
1243 rwlock = static_cast<rw_lock_t *> (cell->latch.lock);
1244
1245 if (rwlock) {
1246 ulint writer = rw_lock_get_writer(rwlock);
1247
1248 OK(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT]->store((longlong)rwlock, true));
1249 if (type == RW_LOCK_X) {
1250 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X"));
1251 } else if (type == RW_LOCK_X_WAIT) {
1252 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X_WAIT"));
1253 } else if (type == RW_LOCK_S) {
1254 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_S"));
1255 } else if (type == RW_LOCK_SX) {
1256 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_SX"));
1257 }
1258
1259 if (writer != RW_LOCK_NOT_LOCKED) {
1260 // JAN: FIXME
1261 // OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], rwlock->lock_name));
1262 OK(fields[SYS_SEMAPHORE_WAITS_WRITER_THREAD]->store(ulint(rwlock->writer_thread), true));
1263
1264 if (writer == RW_LOCK_X) {
1265 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X"));
1266 } else if (writer == RW_LOCK_X_WAIT) {
1267 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X_WAIT"));
1268 } else if (type == RW_LOCK_SX) {
1269 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_SX"));
1270 }
1271
1272 //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID]->store(rwlock->thread_id, true));
1273 //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(rwlock->file_name)));
1274 //OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->store(rwlock->line, true));
1275 //fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->set_notnull();
1276 OK(fields[SYS_SEMAPHORE_WAITS_READERS]->store(rw_lock_get_reader_count(rwlock), true));
1277 OK(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG]->store(
1278 rwlock->waiters,
1279 true));
1280 OK(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD]->store(
1281 rwlock->lock_word,
1282 true));
1283 OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(rwlock->last_x_file_name)));
1284 OK(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->store(rwlock->last_x_line, true));
1285 fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->set_notnull();
1286 OK(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT]->store(rwlock->count_os_wait, true));
1287 }
1288 }
1289 }
1290
1291 OK(schema_table_store_record(thd, tables->table));
1292 }
1293 }
1294
1295 DBUG_RETURN(0);
1296 }
1297