1 /*****************************************************************************
2
3 Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2008, Google Inc.
5
6 Portions of this file contain modifications contributed and copyrighted by
7 Google, Inc. Those modifications are gratefully acknowledged and are described
8 briefly in the InnoDB documentation. The contributions by Google are
9 incorporated with their permission, and subject to the conditions contained in
10 the file COPYING.Google.
11
12 This program is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License, version 2.0,
14 as published by the Free Software Foundation.
15
16 This program is also distributed with certain software (including
17 but not limited to OpenSSL) that is licensed under separate terms,
18 as designated in a particular file or component or in included license
19 documentation. The authors of MySQL hereby grant you an additional
20 permission to link the program and your derivative works with the
21 separately licensed software that they have included with MySQL.
22
23 This program is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 GNU General Public License, version 2.0, for more details.
27
28 You should have received a copy of the GNU General Public License along with
29 this program; if not, write to the Free Software Foundation, Inc.,
30 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
31
32 *****************************************************************************/
33
34 /**************************************************//**
35 @file sync/sync0arr.cc
36 The wait array used in synchronization primitives
37
38 Created 9/5/1995 Heikki Tuuri
39 *******************************************************/
40
41 #include "sync0arr.h"
42 #ifdef UNIV_NONINL
43 #include "sync0arr.ic"
44 #endif
45
46 #include "sync0sync.h"
47 #include "sync0rw.h"
48 #include "os0sync.h"
49 #include "os0file.h"
50 #include "lock0lock.h"
51 #include "srv0srv.h"
52 #include "ha_prototypes.h"
53
54 /*
55 WAIT ARRAY
56 ==========
57
58 The wait array consists of cells each of which has an
59 an operating system event object created for it. The threads
60 waiting for a mutex, for example, can reserve a cell
61 in the array and suspend themselves to wait for the event
62 to become signaled. When using the wait array, remember to make
63 sure that some thread holding the synchronization object
64 will eventually know that there is a waiter in the array and
65 signal the object, to prevent infinite wait.
66 Why we chose to implement a wait array? First, to make
67 mutexes fast, we had to code our own implementation of them,
68 which only in usually uncommon cases resorts to using
69 slow operating system primitives. Then we had the choice of
70 assigning a unique OS event for each mutex, which would
71 be simpler, or using a global wait array. In some operating systems,
72 the global wait array solution is more efficient and flexible,
73 because we can do with a very small number of OS events,
74 say 200. In NT 3.51, allocating events seems to be a quadratic
75 algorithm, because 10 000 events are created fast, but
76 100 000 events takes a couple of minutes to create.
77
78 As of 5.0.30 the above mentioned design is changed. Since now
79 OS can handle millions of wait events efficiently, we no longer
80 have this concept of each cell of wait array having one event.
81 Instead, now the event that a thread wants to wait on is embedded
82 in the wait object (mutex or rw_lock). We still keep the global
83 wait array for the sake of diagnostics and also to avoid infinite
84 wait The error_monitor thread scans the global wait array to signal
85 any waiting threads who have missed the signal. */
86
87 /** A cell where an individual thread may wait suspended
88 until a resource is released. The suspending is implemented
89 using an operating system event semaphore. */
90 struct sync_cell_t {
91 void* wait_object; /*!< pointer to the object the
92 thread is waiting for; if NULL
93 the cell is free for use */
94 ib_mutex_t* old_wait_mutex; /*!< the latest wait mutex in cell */
95 rw_lock_t* old_wait_rw_lock;
96 /*!< the latest wait rw-lock
97 in cell */
98 ulint request_type; /*!< lock type requested on the
99 object */
100 const char* file; /*!< in debug version file where
101 requested */
102 ulint line; /*!< in debug version line where
103 requested */
104 os_thread_id_t thread; /*!< thread id of this waiting
105 thread */
106 ibool waiting; /*!< TRUE if the thread has already
107 called sync_array_event_wait
108 on this cell */
109 ib_int64_t signal_count; /*!< We capture the signal_count
110 of the wait_object when we
111 reset the event. This value is
112 then passed on to os_event_wait
113 and we wait only if the event
114 has not been signalled in the
115 period between the reset and
116 wait call. */
117 time_t reservation_time;/*!< time when the thread reserved
118 the wait cell */
119 };
120
121 /* NOTE: It is allowed for a thread to wait
122 for an event allocated for the array without owning the
123 protecting mutex (depending on the case: OS or database mutex), but
124 all changes (set or reset) to the state of the event must be made
125 while owning the mutex. */
126
127 /** Synchronization array */
128 struct sync_array_t {
129 ulint n_reserved; /*!< number of currently reserved
130 cells in the wait array */
131 ulint n_cells; /*!< number of cells in the
132 wait array */
133 sync_cell_t* array; /*!< pointer to wait array */
134 ib_mutex_t mutex; /*!< possible database mutex
135 protecting this data structure */
136 os_ib_mutex_t os_mutex; /*!< Possible operating system mutex
137 protecting the data structure.
138 As this data structure is used in
139 constructing the database mutex,
140 to prevent infinite recursion
141 in implementation, we fall back to
142 an OS mutex. */
143 ulint res_count; /*!< count of cell reservations
144 since creation of the array */
145 };
146
147 /** User configured sync array size */
148 UNIV_INTERN ulong srv_sync_array_size = 32;
149
150 /** Locally stored copy of srv_sync_array_size */
151 static ulint sync_array_size;
152
153 /** The global array of wait cells for implementation of the database's own
154 mutexes and read-write locks */
155 static sync_array_t** sync_wait_array;
156
157 /** count of how many times an object has been signalled */
158 static ulint sg_count;
159
160 #ifdef UNIV_SYNC_DEBUG
161 /******************************************************************//**
162 This function is called only in the debug version. Detects a deadlock
163 of one or more threads because of waits of semaphores.
164 @return TRUE if deadlock detected */
165 static
166 ibool
167 sync_array_detect_deadlock(
168 /*=======================*/
169 sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
170 own the mutex to array */
171 sync_cell_t* start, /*!< in: cell where recursive search started */
172 sync_cell_t* cell, /*!< in: cell to search */
173 ulint depth); /*!< in: recursion depth */
174 #endif /* UNIV_SYNC_DEBUG */
175
176 /*****************************************************************//**
177 Gets the nth cell in array.
178 @return cell */
179 static
180 sync_cell_t*
sync_array_get_nth_cell(sync_array_t * arr,ulint n)181 sync_array_get_nth_cell(
182 /*====================*/
183 sync_array_t* arr, /*!< in: sync array */
184 ulint n) /*!< in: index */
185 {
186 ut_a(arr);
187 ut_a(n < arr->n_cells);
188
189 return(arr->array + n);
190 }
191
192 /******************************************************************//**
193 Reserves the mutex semaphore protecting a sync array. */
194 static
195 void
sync_array_enter(sync_array_t * arr)196 sync_array_enter(
197 /*=============*/
198 sync_array_t* arr) /*!< in: sync wait array */
199 {
200 os_mutex_enter(arr->os_mutex);
201 }
202
203 /******************************************************************//**
204 Releases the mutex semaphore protecting a sync array. */
205 static
206 void
sync_array_exit(sync_array_t * arr)207 sync_array_exit(
208 /*============*/
209 sync_array_t* arr) /*!< in: sync wait array */
210 {
211 os_mutex_exit(arr->os_mutex);
212 }
213
214 /*******************************************************************//**
215 Creates a synchronization wait array. It is protected by a mutex
216 which is automatically reserved when the functions operating on it
217 are called.
218 @return own: created wait array */
219 static
220 sync_array_t*
sync_array_create(ulint n_cells)221 sync_array_create(
222 /*==============*/
223 ulint n_cells) /*!< in: number of cells in the array
224 to create */
225 {
226 ulint sz;
227 sync_array_t* arr;
228
229 ut_a(n_cells > 0);
230
231 /* Allocate memory for the data structures */
232 arr = static_cast<sync_array_t*>(ut_malloc(sizeof(*arr)));
233 memset(arr, 0x0, sizeof(*arr));
234
235 sz = sizeof(sync_cell_t) * n_cells;
236 arr->array = static_cast<sync_cell_t*>(ut_malloc(sz));
237 memset(arr->array, 0x0, sz);
238
239 arr->n_cells = n_cells;
240
241 /* Then create the mutex to protect the wait array complex */
242 arr->os_mutex = os_mutex_create();
243
244 return(arr);
245 }
246
247 /******************************************************************//**
248 Frees the resources in a wait array. */
249 static
250 void
sync_array_free(sync_array_t * arr)251 sync_array_free(
252 /*============*/
253 sync_array_t* arr) /*!< in, own: sync wait array */
254 {
255 ut_a(arr->n_reserved == 0);
256
257 sync_array_validate(arr);
258
259 /* Release the mutex protecting the wait array complex */
260
261 os_mutex_free(arr->os_mutex);
262
263 ut_free(arr->array);
264 ut_free(arr);
265 }
266
267 /********************************************************************//**
268 Validates the integrity of the wait array. Checks
269 that the number of reserved cells equals the count variable. */
270 UNIV_INTERN
271 void
sync_array_validate(sync_array_t * arr)272 sync_array_validate(
273 /*================*/
274 sync_array_t* arr) /*!< in: sync wait array */
275 {
276 ulint i;
277 sync_cell_t* cell;
278 ulint count = 0;
279
280 sync_array_enter(arr);
281
282 for (i = 0; i < arr->n_cells; i++) {
283 cell = sync_array_get_nth_cell(arr, i);
284 if (cell->wait_object != NULL) {
285 count++;
286 }
287 }
288
289 ut_a(count == arr->n_reserved);
290
291 sync_array_exit(arr);
292 }
293
294 /*******************************************************************//**
295 Returns the event that the thread owning the cell waits for. */
296 static
297 os_event_t
sync_cell_get_event(sync_cell_t * cell)298 sync_cell_get_event(
299 /*================*/
300 sync_cell_t* cell) /*!< in: non-empty sync array cell */
301 {
302 ulint type = cell->request_type;
303
304 if (type == SYNC_MUTEX) {
305 return(((ib_mutex_t*) cell->wait_object)->event);
306 } else if (type == RW_LOCK_WAIT_EX) {
307 return(((rw_lock_t*) cell->wait_object)->wait_ex_event);
308 } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
309 return(((rw_lock_t*) cell->wait_object)->event);
310 }
311 }
312
313 /******************************************************************//**
314 Reserves a wait array cell for waiting for an object.
315 The event of the cell is reset to nonsignalled state.
316 @return true if free cell is found, otherwise false */
317 UNIV_INTERN
318 bool
sync_array_reserve_cell(sync_array_t * arr,void * object,ulint type,const char * file,ulint line,ulint * index)319 sync_array_reserve_cell(
320 /*====================*/
321 sync_array_t* arr, /*!< in: wait array */
322 void* object, /*!< in: pointer to the object to wait for */
323 ulint type, /*!< in: lock request type */
324 const char* file, /*!< in: file where requested */
325 ulint line, /*!< in: line where requested */
326 ulint* index) /*!< out: index of the reserved cell */
327 {
328 sync_cell_t* cell;
329 os_event_t event;
330 ulint i;
331
332 ut_a(object);
333 ut_a(index);
334
335 sync_array_enter(arr);
336
337 arr->res_count++;
338
339 /* Reserve a new cell. */
340 for (i = 0; i < arr->n_cells; i++) {
341 cell = sync_array_get_nth_cell(arr, i);
342
343 if (cell->wait_object == NULL) {
344
345 cell->waiting = FALSE;
346 cell->wait_object = object;
347
348 if (type == SYNC_MUTEX) {
349 cell->old_wait_mutex =
350 static_cast<ib_mutex_t*>(object);
351 } else {
352 cell->old_wait_rw_lock =
353 static_cast<rw_lock_t*>(object);
354 }
355
356 cell->request_type = type;
357
358 cell->file = file;
359 cell->line = line;
360
361 arr->n_reserved++;
362
363 *index = i;
364
365 sync_array_exit(arr);
366
367 /* Make sure the event is reset and also store
368 the value of signal_count at which the event
369 was reset. */
370 event = sync_cell_get_event(cell);
371 cell->signal_count = os_event_reset(event);
372
373 cell->reservation_time = ut_time();
374
375 cell->thread = os_thread_get_curr_id();
376
377 return(true);
378 }
379 }
380
381 /* No free cell found */
382 return false;
383 }
384
385 /******************************************************************//**
386 This function should be called when a thread starts to wait on
387 a wait array cell. In the debug version this function checks
388 if the wait for a semaphore will result in a deadlock, in which
389 case prints info and asserts. */
390 UNIV_INTERN
391 void
sync_array_wait_event(sync_array_t * arr,ulint index)392 sync_array_wait_event(
393 /*==================*/
394 sync_array_t* arr, /*!< in: wait array */
395 ulint index) /*!< in: index of the reserved cell */
396 {
397 sync_cell_t* cell;
398 os_event_t event;
399
400 ut_a(arr);
401
402 sync_array_enter(arr);
403
404 cell = sync_array_get_nth_cell(arr, index);
405
406 ut_a(cell->wait_object);
407 ut_a(!cell->waiting);
408 ut_ad(os_thread_get_curr_id() == cell->thread);
409
410 event = sync_cell_get_event(cell);
411 cell->waiting = TRUE;
412
413 #ifdef UNIV_SYNC_DEBUG
414
415 /* We use simple enter to the mutex below, because if
416 we cannot acquire it at once, mutex_enter would call
417 recursively sync_array routines, leading to trouble.
418 rw_lock_debug_mutex freezes the debug lists. */
419
420 rw_lock_debug_mutex_enter();
421
422 if (TRUE == sync_array_detect_deadlock(arr, cell, cell, 0)) {
423
424 fputs("########################################\n", stderr);
425 ut_error;
426 }
427
428 rw_lock_debug_mutex_exit();
429 #endif
430 sync_array_exit(arr);
431
432 os_event_wait_low(event, cell->signal_count);
433
434 sync_array_free_cell(arr, index);
435 }
436
437 /******************************************************************//**
438 Reports info of a wait array cell. */
439 static
440 void
sync_array_cell_print(FILE * file,sync_cell_t * cell)441 sync_array_cell_print(
442 /*==================*/
443 FILE* file, /*!< in: file where to print */
444 sync_cell_t* cell) /*!< in: sync cell */
445 {
446 ib_mutex_t* mutex;
447 rw_lock_t* rwlock;
448 ulint type;
449 ulint writer;
450
451 type = cell->request_type;
452
453 fprintf(file,
454 "--Thread %lu has waited at %s line %lu"
455 " for %.2f seconds the semaphore:\n",
456 (ulong) os_thread_pf(cell->thread),
457 innobase_basename(cell->file), (ulong) cell->line,
458 difftime(time(NULL), cell->reservation_time));
459
460 if (type == SYNC_MUTEX) {
461 /* We use old_wait_mutex in case the cell has already
462 been freed meanwhile */
463 mutex = cell->old_wait_mutex;
464
465 fprintf(file,
466 "Mutex at %p created file %s line %lu, lock var %lu\n"
467 #ifdef UNIV_SYNC_DEBUG
468 "Last time reserved in file %s line %lu, "
469 #endif /* UNIV_SYNC_DEBUG */
470 "waiters flag %lu\n",
471 (void*) mutex, innobase_basename(mutex->cfile_name),
472 (ulong) mutex->cline,
473 (ulong) mutex->lock_word,
474 #ifdef UNIV_SYNC_DEBUG
475 mutex->file_name, (ulong) mutex->line,
476 #endif /* UNIV_SYNC_DEBUG */
477 (ulong) mutex->waiters);
478
479 } else if (type == RW_LOCK_EX
480 || type == RW_LOCK_WAIT_EX
481 || type == RW_LOCK_SHARED) {
482
483 fputs(type == RW_LOCK_EX ? "X-lock on"
484 : type == RW_LOCK_WAIT_EX ? "X-lock (wait_ex) on"
485 : "S-lock on", file);
486
487 rwlock = cell->old_wait_rw_lock;
488
489 fprintf(file,
490 " RW-latch at %p created in file %s line %lu\n",
491 (void*) rwlock, innobase_basename(rwlock->cfile_name),
492 (ulong) rwlock->cline);
493 writer = rw_lock_get_writer(rwlock);
494 if (writer != RW_LOCK_NOT_LOCKED) {
495 fprintf(file,
496 "a writer (thread id %lu) has"
497 " reserved it in mode %s",
498 (ulong) os_thread_pf(rwlock->writer_thread),
499 writer == RW_LOCK_EX
500 ? " exclusive\n"
501 : " wait exclusive\n");
502 }
503
504 fprintf(file,
505 "number of readers %lu, waiters flag %lu, "
506 "lock_word: %lx\n"
507 "Last time read locked in file %s line %lu\n"
508 "Last time write locked in file %s line %lu\n",
509 (ulong) rw_lock_get_reader_count(rwlock),
510 (ulong) rwlock->waiters,
511 rwlock->lock_word,
512 innobase_basename(rwlock->last_s_file_name),
513 (ulong) rwlock->last_s_line,
514 rwlock->last_x_file_name,
515 (ulong) rwlock->last_x_line);
516 } else {
517 ut_error;
518 }
519
520 if (!cell->waiting) {
521 fputs("wait has ended\n", file);
522 }
523 }
524
525 #ifdef UNIV_SYNC_DEBUG
526 /******************************************************************//**
527 Looks for a cell with the given thread id.
528 @return pointer to cell or NULL if not found */
529 static
530 sync_cell_t*
sync_array_find_thread(sync_array_t * arr,os_thread_id_t thread)531 sync_array_find_thread(
532 /*===================*/
533 sync_array_t* arr, /*!< in: wait array */
534 os_thread_id_t thread) /*!< in: thread id */
535 {
536 ulint i;
537 sync_cell_t* cell;
538
539 for (i = 0; i < arr->n_cells; i++) {
540
541 cell = sync_array_get_nth_cell(arr, i);
542
543 if (cell->wait_object != NULL
544 && os_thread_eq(cell->thread, thread)) {
545
546 return(cell); /* Found */
547 }
548 }
549
550 return(NULL); /* Not found */
551 }
552
553 /******************************************************************//**
554 Recursion step for deadlock detection.
555 @return TRUE if deadlock detected */
556 static
557 ibool
sync_array_deadlock_step(sync_array_t * arr,sync_cell_t * start,os_thread_id_t thread,ulint pass,ulint depth)558 sync_array_deadlock_step(
559 /*=====================*/
560 sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
561 own the mutex to array */
562 sync_cell_t* start, /*!< in: cell where recursive search
563 started */
564 os_thread_id_t thread, /*!< in: thread to look at */
565 ulint pass, /*!< in: pass value */
566 ulint depth) /*!< in: recursion depth */
567 {
568 sync_cell_t* new_cell;
569
570 if (pass != 0) {
571 /* If pass != 0, then we do not know which threads are
572 responsible of releasing the lock, and no deadlock can
573 be detected. */
574
575 return(FALSE);
576 }
577
578 new_cell = sync_array_find_thread(arr, thread);
579
580 if (new_cell == start) {
581 /* Deadlock */
582 fputs("########################################\n"
583 "DEADLOCK of threads detected!\n", stderr);
584
585 return(TRUE);
586
587 } else if (new_cell) {
588 return(sync_array_detect_deadlock(
589 arr, start, new_cell, depth + 1));
590 }
591 return(FALSE);
592 }
593
594 /******************************************************************//**
595 This function is called only in the debug version. Detects a deadlock
596 of one or more threads because of waits of semaphores.
597 @return TRUE if deadlock detected */
598 static
599 ibool
sync_array_detect_deadlock(sync_array_t * arr,sync_cell_t * start,sync_cell_t * cell,ulint depth)600 sync_array_detect_deadlock(
601 /*=======================*/
602 sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
603 own the mutex to array */
604 sync_cell_t* start, /*!< in: cell where recursive search started */
605 sync_cell_t* cell, /*!< in: cell to search */
606 ulint depth) /*!< in: recursion depth */
607 {
608 ib_mutex_t* mutex;
609 rw_lock_t* lock;
610 os_thread_id_t thread;
611 ibool ret;
612 rw_lock_debug_t*debug;
613
614 ut_a(arr);
615 ut_a(start);
616 ut_a(cell);
617 ut_ad(cell->wait_object);
618 ut_ad(os_thread_get_curr_id() == start->thread);
619 ut_ad(depth < 100);
620
621 depth++;
622
623 if (!cell->waiting) {
624
625 return(FALSE); /* No deadlock here */
626 }
627
628 if (cell->request_type == SYNC_MUTEX) {
629
630 mutex = static_cast<ib_mutex_t*>(cell->wait_object);
631
632 if (mutex_get_lock_word(mutex) != 0) {
633
634 thread = mutex->thread_id;
635
636 /* Note that mutex->thread_id above may be
637 also OS_THREAD_ID_UNDEFINED, because the
638 thread which held the mutex maybe has not
639 yet updated the value, or it has already
640 released the mutex: in this case no deadlock
641 can occur, as the wait array cannot contain
642 a thread with ID_UNDEFINED value. */
643
644 ret = sync_array_deadlock_step(arr, start, thread, 0,
645 depth);
646 if (ret) {
647 fprintf(stderr,
648 "Mutex %p owned by thread %lu file %s line %lu\n",
649 mutex, (ulong) os_thread_pf(mutex->thread_id),
650 mutex->file_name, (ulong) mutex->line);
651 sync_array_cell_print(stderr, cell);
652
653 return(TRUE);
654 }
655 }
656
657 return(FALSE); /* No deadlock */
658
659 } else if (cell->request_type == RW_LOCK_EX
660 || cell->request_type == RW_LOCK_WAIT_EX) {
661
662 lock = static_cast<rw_lock_t*>(cell->wait_object);
663
664 for (debug = UT_LIST_GET_FIRST(lock->debug_list);
665 debug != 0;
666 debug = UT_LIST_GET_NEXT(list, debug)) {
667
668 thread = debug->thread_id;
669
670 if (((debug->lock_type == RW_LOCK_EX)
671 && !os_thread_eq(thread, cell->thread))
672 || ((debug->lock_type == RW_LOCK_WAIT_EX)
673 && !os_thread_eq(thread, cell->thread))
674 || (debug->lock_type == RW_LOCK_SHARED)) {
675
676 /* The (wait) x-lock request can block
677 infinitely only if someone (can be also cell
678 thread) is holding s-lock, or someone
679 (cannot be cell thread) (wait) x-lock, and
680 he is blocked by start thread */
681
682 ret = sync_array_deadlock_step(
683 arr, start, thread, debug->pass,
684 depth);
685 if (ret) {
686 print:
687 fprintf(stderr, "rw-lock %p ",
688 (void*) lock);
689 sync_array_cell_print(stderr, cell);
690 rw_lock_debug_print(stderr, debug);
691 return(TRUE);
692 }
693 }
694 }
695
696 return(FALSE);
697
698 } else if (cell->request_type == RW_LOCK_SHARED) {
699
700 lock = static_cast<rw_lock_t*>(cell->wait_object);
701
702 for (debug = UT_LIST_GET_FIRST(lock->debug_list);
703 debug != 0;
704 debug = UT_LIST_GET_NEXT(list, debug)) {
705
706 thread = debug->thread_id;
707
708 if ((debug->lock_type == RW_LOCK_EX)
709 || (debug->lock_type == RW_LOCK_WAIT_EX)) {
710
711 /* The s-lock request can block infinitely
712 only if someone (can also be cell thread) is
713 holding (wait) x-lock, and he is blocked by
714 start thread */
715
716 ret = sync_array_deadlock_step(
717 arr, start, thread, debug->pass,
718 depth);
719 if (ret) {
720 goto print;
721 }
722 }
723 }
724
725 return(FALSE);
726
727 } else {
728 ut_error;
729 }
730
731 return(TRUE); /* Execution never reaches this line: for compiler
732 fooling only */
733 }
734 #endif /* UNIV_SYNC_DEBUG */
735
736 /******************************************************************//**
737 Determines if we can wake up the thread waiting for a sempahore. */
738 static
739 ibool
sync_arr_cell_can_wake_up(sync_cell_t * cell)740 sync_arr_cell_can_wake_up(
741 /*======================*/
742 sync_cell_t* cell) /*!< in: cell to search */
743 {
744 ib_mutex_t* mutex;
745 rw_lock_t* lock;
746
747 if (cell->request_type == SYNC_MUTEX) {
748
749 mutex = static_cast<ib_mutex_t*>(cell->wait_object);
750
751 os_rmb;
752 if (mutex_get_lock_word(mutex) == 0) {
753
754 return(TRUE);
755 }
756
757 } else if (cell->request_type == RW_LOCK_EX) {
758
759 lock = static_cast<rw_lock_t*>(cell->wait_object);
760
761 os_rmb;
762 if (lock->lock_word > 0) {
763 /* Either unlocked or only read locked. */
764
765 return(TRUE);
766 }
767
768 } else if (cell->request_type == RW_LOCK_WAIT_EX) {
769
770 lock = static_cast<rw_lock_t*>(cell->wait_object);
771
772 /* lock_word == 0 means all readers have left */
773 os_rmb;
774 if (lock->lock_word == 0) {
775
776 return(TRUE);
777 }
778 } else if (cell->request_type == RW_LOCK_SHARED) {
779 lock = static_cast<rw_lock_t*>(cell->wait_object);
780
781 /* lock_word > 0 means no writer or reserved writer */
782 os_rmb;
783 if (lock->lock_word > 0) {
784
785 return(TRUE);
786 }
787 }
788
789 return(FALSE);
790 }
791
792 /******************************************************************//**
793 Frees the cell. NOTE! sync_array_wait_event frees the cell
794 automatically! */
795 UNIV_INTERN
796 void
sync_array_free_cell(sync_array_t * arr,ulint index)797 sync_array_free_cell(
798 /*=================*/
799 sync_array_t* arr, /*!< in: wait array */
800 ulint index) /*!< in: index of the cell in array */
801 {
802 sync_cell_t* cell;
803
804 sync_array_enter(arr);
805
806 cell = sync_array_get_nth_cell(arr, index);
807
808 ut_a(cell->wait_object != NULL);
809
810 cell->waiting = FALSE;
811 cell->wait_object = NULL;
812 cell->signal_count = 0;
813
814 ut_a(arr->n_reserved > 0);
815 arr->n_reserved--;
816
817 sync_array_exit(arr);
818 }
819
820 /**********************************************************************//**
821 Increments the signalled count. */
822 UNIV_INTERN
823 void
sync_array_object_signalled(void)824 sync_array_object_signalled(void)
825 /*=============================*/
826 {
827 #ifdef HAVE_ATOMIC_BUILTINS
828 (void) os_atomic_increment_ulint(&sg_count, 1);
829 #else
830 ++sg_count;
831 #endif /* HAVE_ATOMIC_BUILTINS */
832 }
833
834 /**********************************************************************//**
835 If the wakeup algorithm does not work perfectly at semaphore relases,
836 this function will do the waking (see the comment in mutex_exit). This
837 function should be called about every 1 second in the server.
838
839 Note that there's a race condition between this thread and mutex_exit
840 changing the lock_word and calling signal_object, so sometimes this finds
841 threads to wake up even when nothing has gone wrong. */
842 static
843 void
sync_array_wake_threads_if_sema_free_low(sync_array_t * arr)844 sync_array_wake_threads_if_sema_free_low(
845 /*=====================================*/
846 sync_array_t* arr) /* in/out: wait array */
847 {
848 ulint i = 0;
849 ulint count;
850
851 sync_array_enter(arr);
852
853 for (count = 0; count < arr->n_reserved; ++i) {
854 sync_cell_t* cell;
855
856 cell = sync_array_get_nth_cell(arr, i);
857
858 if (cell->wait_object != NULL) {
859
860 count++;
861
862 if (sync_arr_cell_can_wake_up(cell)) {
863 os_event_t event;
864
865 event = sync_cell_get_event(cell);
866
867 os_event_set(event);
868 }
869 }
870 }
871
872 sync_array_exit(arr);
873 }
874
875 /**********************************************************************//**
876 If the wakeup algorithm does not work perfectly at semaphore relases,
877 this function will do the waking (see the comment in mutex_exit). This
878 function should be called about every 1 second in the server.
879
880 Note that there's a race condition between this thread and mutex_exit
881 changing the lock_word and calling signal_object, so sometimes this finds
882 threads to wake up even when nothing has gone wrong. */
883 UNIV_INTERN
884 void
sync_arr_wake_threads_if_sema_free(void)885 sync_arr_wake_threads_if_sema_free(void)
886 /*====================================*/
887 {
888 ulint i;
889
890 for (i = 0; i < sync_array_size; ++i) {
891
892 sync_array_wake_threads_if_sema_free_low(
893 sync_wait_array[i]);
894 }
895 }
896
897 /**********************************************************************//**
898 Prints warnings of long semaphore waits to stderr.
899 @return TRUE if fatal semaphore wait threshold was exceeded */
900 static
901 ibool
sync_array_print_long_waits_low(sync_array_t * arr,os_thread_id_t * waiter,const void ** sema,ibool * noticed)902 sync_array_print_long_waits_low(
903 /*============================*/
904 sync_array_t* arr, /*!< in: sync array instance */
905 os_thread_id_t* waiter, /*!< out: longest waiting thread */
906 const void** sema, /*!< out: longest-waited-for semaphore */
907 ibool* noticed)/*!< out: TRUE if long wait noticed */
908 {
909 ulint i;
910 ulint fatal_timeout = srv_fatal_semaphore_wait_threshold;
911 ibool fatal = FALSE;
912 double longest_diff = 0;
913
914 /* For huge tables, skip the check during CHECK TABLE etc... */
915 if (fatal_timeout > SRV_SEMAPHORE_WAIT_EXTENSION) {
916 return(FALSE);
917 }
918
919 #ifdef UNIV_DEBUG_VALGRIND
920 /* Increase the timeouts if running under valgrind because it executes
921 extremely slowly. UNIV_DEBUG_VALGRIND does not necessary mean that
922 we are running under valgrind but we have no better way to tell.
923 See Bug#58432 innodb.innodb_bug56143 fails under valgrind
924 for an example */
925 # define SYNC_ARRAY_TIMEOUT 2400
926 fatal_timeout *= 10;
927 #else
928 # define SYNC_ARRAY_TIMEOUT 240
929 #endif
930
931 for (i = 0; i < arr->n_cells; i++) {
932
933 double diff;
934 sync_cell_t* cell;
935 void* wait_object;
936
937 cell = sync_array_get_nth_cell(arr, i);
938
939 wait_object = cell->wait_object;
940
941 if (wait_object == NULL || !cell->waiting) {
942
943 continue;
944 }
945
946 diff = difftime(time(NULL), cell->reservation_time);
947
948 if (diff > SYNC_ARRAY_TIMEOUT) {
949 fputs("InnoDB: Warning: a long semaphore wait:\n",
950 stderr);
951 sync_array_cell_print(stderr, cell);
952 *noticed = TRUE;
953 }
954
955 if (diff > fatal_timeout) {
956 fatal = TRUE;
957 }
958
959 if (diff > longest_diff) {
960 longest_diff = diff;
961 *sema = wait_object;
962 *waiter = cell->thread;
963 }
964 }
965
966 #undef SYNC_ARRAY_TIMEOUT
967
968 return(fatal);
969 }
970
971 /**********************************************************************//**
972 Prints warnings of long semaphore waits to stderr.
973 @return TRUE if fatal semaphore wait threshold was exceeded */
974 UNIV_INTERN
975 ibool
sync_array_print_long_waits(os_thread_id_t * waiter,const void ** sema)976 sync_array_print_long_waits(
977 /*========================*/
978 os_thread_id_t* waiter, /*!< out: longest waiting thread */
979 const void** sema) /*!< out: longest-waited-for semaphore */
980 {
981 ulint i;
982 ibool fatal = FALSE;
983 ibool noticed = FALSE;
984
985 for (i = 0; i < sync_array_size; ++i) {
986
987 sync_array_t* arr = sync_wait_array[i];
988
989 sync_array_enter(arr);
990
991 if (sync_array_print_long_waits_low(
992 arr, waiter, sema, ¬iced)) {
993
994 fatal = TRUE;
995 }
996
997 sync_array_exit(arr);
998 }
999
1000 if (noticed) {
1001 ibool old_val;
1002
1003 fprintf(stderr,
1004 "InnoDB: ###### Starts InnoDB Monitor"
1005 " for 30 secs to print diagnostic info:\n");
1006
1007 old_val = srv_print_innodb_monitor;
1008
1009 /* If some crucial semaphore is reserved, then also the InnoDB
1010 Monitor can hang, and we do not get diagnostics. Since in
1011 many cases an InnoDB hang is caused by a pwrite() or a pread()
1012 call hanging inside the operating system, let us print right
1013 now the values of pending calls of these. */
1014
1015 fprintf(stderr,
1016 "InnoDB: Pending preads %lu, pwrites %lu\n",
1017 (ulong) os_file_n_pending_preads,
1018 (ulong) os_file_n_pending_pwrites);
1019
1020 srv_print_innodb_monitor = TRUE;
1021 os_event_set(lock_sys->timeout_event);
1022
1023 os_thread_sleep(30000000);
1024
1025 srv_print_innodb_monitor = static_cast<my_bool>(old_val);
1026 fprintf(stderr,
1027 "InnoDB: ###### Diagnostic info printed"
1028 " to the standard error stream\n");
1029 }
1030
1031 return(fatal);
1032 }
1033
1034 /**********************************************************************//**
1035 Prints info of the wait array. */
1036 static
1037 void
sync_array_print_info_low(FILE * file,sync_array_t * arr)1038 sync_array_print_info_low(
1039 /*======================*/
1040 FILE* file, /*!< in: file where to print */
1041 sync_array_t* arr) /*!< in: wait array */
1042 {
1043 ulint i;
1044 ulint count = 0;
1045
1046 fprintf(file,
1047 "OS WAIT ARRAY INFO: reservation count " ULINTPF "\n",
1048 arr->res_count);
1049
1050 for (i = 0; count < arr->n_reserved; ++i) {
1051 sync_cell_t* cell;
1052
1053 cell = sync_array_get_nth_cell(arr, i);
1054
1055 if (cell->wait_object != NULL) {
1056 count++;
1057 sync_array_cell_print(file, cell);
1058 }
1059 }
1060 }
1061
1062 /**********************************************************************//**
1063 Prints info of the wait array. */
1064 static
1065 void
sync_array_print_info(FILE * file,sync_array_t * arr)1066 sync_array_print_info(
1067 /*==================*/
1068 FILE* file, /*!< in: file where to print */
1069 sync_array_t* arr) /*!< in: wait array */
1070 {
1071 sync_array_enter(arr);
1072
1073 sync_array_print_info_low(file, arr);
1074
1075 sync_array_exit(arr);
1076 }
1077
1078 /**********************************************************************//**
1079 Create the primary system wait array(s), they are protected by an OS mutex */
1080 UNIV_INTERN
1081 void
sync_array_init(ulint n_threads)1082 sync_array_init(
1083 /*============*/
1084 ulint n_threads) /*!< in: Number of slots to
1085 create in all arrays */
1086 {
1087 ulint i;
1088 ulint n_slots;
1089
1090 ut_a(sync_wait_array == NULL);
1091 ut_a(srv_sync_array_size > 0);
1092 ut_a(n_threads > 0);
1093
1094 sync_array_size = srv_sync_array_size;
1095
1096 /* We have to use ut_malloc() because the mutex infrastructure
1097 hasn't been initialised yet. It is required by mem_alloc() and
1098 the heap functions. */
1099
1100 sync_wait_array = static_cast<sync_array_t**>(
1101 ut_malloc(sizeof(*sync_wait_array) * sync_array_size));
1102
1103 n_slots = 1 + (n_threads - 1) / sync_array_size;
1104
1105 for (i = 0; i < sync_array_size; ++i) {
1106
1107 sync_wait_array[i] = sync_array_create(n_slots);
1108 }
1109 }
1110
1111 /**********************************************************************//**
1112 Close sync array wait sub-system. */
1113 UNIV_INTERN
1114 void
sync_array_close(void)1115 sync_array_close(void)
1116 /*==================*/
1117 {
1118 ulint i;
1119
1120 for (i = 0; i < sync_array_size; ++i) {
1121 sync_array_free(sync_wait_array[i]);
1122 }
1123
1124 ut_free(sync_wait_array);
1125 sync_wait_array = NULL;
1126 }
1127
1128 /**********************************************************************//**
1129 Print info about the sync array(s). */
1130 UNIV_INTERN
1131 void
sync_array_print(FILE * file)1132 sync_array_print(
1133 /*=============*/
1134 FILE* file) /*!< in/out: Print to this stream */
1135 {
1136 ulint i;
1137
1138 for (i = 0; i < sync_array_size; ++i) {
1139 sync_array_print_info(file, sync_wait_array[i]);
1140 }
1141
1142 fprintf(file,
1143 "OS WAIT ARRAY INFO: signal count " ULINTPF "\n", sg_count);
1144
1145 }
1146
1147 /**********************************************************************//**
1148 Get an instance of the sync wait array. */
1149 UNIV_INTERN
1150 sync_array_t*
sync_array_get(void)1151 sync_array_get(void)
1152 /*================*/
1153 {
1154 ulint i;
1155 static ulint count;
1156
1157 #ifdef HAVE_ATOMIC_BUILTINS
1158 i = os_atomic_increment_ulint(&count, 1);
1159 #else
1160 i = count++;
1161 #endif /* HAVE_ATOMIC_BUILTINS */
1162
1163 return(sync_wait_array[i % sync_array_size]);
1164 }
1165