1 /*
2 * %CopyrightBegin%
3 *
4 * Copyright Ericsson AB 2001-2020. All Rights Reserved.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * %CopyrightEnd%
19 */
20
21 /* Description: A dynamic lock order checker.
22 * A global locking order is recorded during runtime
23 * and continuously checked against itself.
24 *
25 * Author: Sverker Eriksson
26 */
27
28 /*
29 * The primary objective is to check the order in which locks are seized
30 * to avoid deadlocks. The strategy is to continuously construct a directed
31 * graph describing the order in which locks have been seized. Each edge A->B in
32 * the graph describes a locking order; I held lock A while locking B. Trylocks
33 * do not introduce edges in the graph. For each added edge we check that we
34 * don't introduce cycles in the graph. A cycle would indicate a potential
35 * deadlock bug, waiting to happen.
36 *
37 * We assume that locks are primarily ordered by their lock _types_
38 * and secondarily by instance information of locks of the same type. No lock
39 * order checking is implemented between lock instances of the same type (yet).
40 *
41 * The name given when a lock is created is used as identifying its type.
42 * The '[' character can be used as a delimiter between lock type and
43 * instance information. Example: "esock.wrt[17]" is of type "esock.wrt".
44 *
45 */
46 #ifdef HAVE_CONFIG_H
47 # include "config.h"
48 #endif
49
50 #include "erl_dyn_lock_check.h"
51 #ifdef ERTS_DYN_LOCK_CHECK
52
53 #include "sys.h"
54 #include "erl_threads.h"
55
56 #define DLC_ASSERT(X) ERTS_ASSERT(X)
57
58 #ifdef ERTS_DYN_LOCK_CHECK_INTERNAL
59 # define MAX_LOCK_TYPES (64*2)
60 #else
61 # define MAX_LOCK_TYPES (32)
62 #endif
63
64 #define BITS_PER_WORD (sizeof(UWord)*8)
65 #define LOCK_TYPE_WORDS ((MAX_LOCK_TYPES-1)/BITS_PER_WORD + 1)
66 #define MAX_LOCK_NAME_SZ 64
67
68 static erts_atomic_t n_lock_types;
69 static erts_mtx_t lock_types_mtx;
70
71 struct lock_type
72 {
73 char name[MAX_LOCK_NAME_SZ];
74 };
75
76 static struct lock_type lock_types[MAX_LOCK_TYPES];
77
78 static erts_tsd_key_t dlc_thread_key;
79
80 /* Thread specific data */
81 typedef struct
82 {
83 UWord locked_now[LOCK_TYPE_WORDS];
84 /* Bit vector with all lock types currently held by this thread */
85
86 UWord locked_before[LOCK_TYPE_WORDS];
87 /* Bit vector same as 'locked_now' PLUS all unlocked locks that were locked
88 * by this thread ~before~ the locks in 'locked_now'.
89 * A lock in 'locked_before' is only cleared when all locked after it
90 * have been unlocked.
91 *
92 * Example 1:
93 * 1. Lock A: locked_now = A locked_before = A
94 * 2. Lock B: locked_now = A|B locked_before = A|B
95 * 3. Unlock A: locked_now = B locked_before = A|B
96 * 4. Lock C: locked_now = B|C locked_before = A|B|C
97 * 5. Unlock B: locked_now = C locked_before = A|B|C
98 * 6. Unlock C: locked_now = locked_before =
99 *
100 * Example 2:
101 * 1. Lock A: locked_now = A locked_before = A
102 * 2. Trylock B: locked_now = A|B locked_before = A|B
103 * 3. Unlock A: locked_now = B locked_before = B
104 * 4. Lock C: locked_now = B|C locked_before = B|C
105 * 5. Unlock B: locked_now = C locked_before = B|C
106 * 6. Unlock C: locked_now = locked_before =
107 *
108 * The trylock of B imposes no ordering dependency to A (locked before B),
109 * but it will have a dependency to C (locked after B).
110 */
111
112 struct {
113 unsigned ix; /* lock type id (bit index) */
114 unsigned cnt; /* nr of locked instances of this type (may be 0) */
115 unsigned trylock; /* true if only trylocked instances */
116 } lock_order[MAX_LOCK_TYPES];
117 /* The locks in 'locked_before' ordered the way they were locked by this thread */
118
119 unsigned n_locked;
120 /* Number or lock types in 'locked_before' and 'lock_order' */
121
122 } dlc_thread_t;
123
124 static erts_atomic_t locked_before[MAX_LOCK_TYPES][LOCK_TYPE_WORDS];
125 /* The recorded global lock order as a bit matrix.
126 *
127 * Bit A is set in locked_before[B] if A has been locked before B.
128 */
129
130 static int check_lock_order(dlc_thread_t*, erts_dlc_t*);
131
132 /*#define DLC_UNIT_TEST*/
133 #ifdef DLC_UNIT_TEST
134 static int is_dlc_unit_test = 0;
135 static void dlc_unit_test(void);
136 # define DLC_IS_UNIT_TEST() (is_dlc_unit_test)
137 #else
138 # define DLC_IS_UNIT_TEST() (0)
139 #endif
140
141 static int dlc_initialized = 0;
142
erts_dlc_init(void)143 void erts_dlc_init(void)
144 {
145 int i, j;
146 erts_atomic_init_nob(&n_lock_types, 0);
147 erts_tsd_key_create(&dlc_thread_key, "dyn_lock_check");
148
149 for (i = 0; i < MAX_LOCK_TYPES; i++) {
150 for (j = 0; j < LOCK_TYPE_WORDS; j++)
151 erts_atomic_init_nob(&locked_before[i][j], 0);
152 }
153
154 erts_mtx_init(&lock_types_mtx, "dyn_lock_check", NIL,
155 (ERTS_LOCK_FLAGS_PROPERTY_STATIC |
156 ERTS_LOCK_FLAGS_CATEGORY_GENERIC));
157
158 dlc_initialized = 1;
159
160 #ifdef DLC_UNIT_TEST
161 dlc_unit_test();
162 #endif
163 }
164
erts_dlc_create_lock(erts_dlc_t * dlc,const char * name)165 void erts_dlc_create_lock(erts_dlc_t* dlc, const char* name)
166 {
167 erts_aint_t i, n = erts_atomic_read_nob(&n_lock_types);
168 int name_len;
169
170 for (i = 0; name[i]; i++) {
171 if (name[i] == '[')
172 break;
173 }
174 name_len = i;
175
176 for (i=0; i < n; i++) {
177 if (sys_strncmp(name, lock_types[i].name, name_len) == 0) {
178 dlc->ix = i;
179 return; /* already exists */
180 }
181 }
182
183 if (dlc_initialized)
184 erts_mtx_lock(&lock_types_mtx);
185 else
186 DLC_ASSERT(n == 0);
187
188 n = erts_atomic_read_nob(&n_lock_types);
189
190 for ( ; i < n; i++) {
191 if (sys_strncmp(name, lock_types[i].name, name_len) == 0) {
192 dlc->ix = i;
193 goto done; /* already exists (race) */
194 }
195 }
196
197 ERTS_ASSERT(n < MAX_LOCK_TYPES);
198 ERTS_ASSERT(name_len < MAX_LOCK_NAME_SZ);
199 sys_strncpy(lock_types[n].name, name, name_len);
200 lock_types[n].name[name_len] = 0;
201 erts_atomic_set_nob(&n_lock_types, n+1);
202 dlc->ix = n;
203
204 done:
205 if (dlc_initialized)
206 erts_mtx_unlock(&lock_types_mtx);
207 }
208
209 #define IX_TO_BIT(IX) ((UWord)1 << ((IX) % BITS_PER_WORD))
210 #define IX_TO_WORD(IX) ((IX) / BITS_PER_WORD)
211
get_thr(void)212 static dlc_thread_t *get_thr(void)
213 {
214 dlc_thread_t *thr = (dlc_thread_t*) erts_tsd_get(dlc_thread_key);
215 int i;
216
217 if (!thr) {
218 thr = malloc(sizeof(dlc_thread_t));
219 for (i = 0; i < LOCK_TYPE_WORDS; i++) {
220 thr->locked_now[i] = 0;
221 thr->locked_before[i] = 0;
222 }
223 thr->n_locked = 0;
224 erts_tsd_set(dlc_thread_key, thr);
225 }
226 return thr;
227 }
228
is_bit_set(unsigned ix,const UWord * words)229 static ERTS_INLINE int is_bit_set(unsigned ix, const UWord* words)
230 {
231 DLC_ASSERT(ix < MAX_LOCK_TYPES);
232 return (words[IX_TO_WORD(ix)] & IX_TO_BIT(ix)) != (UWord)0;
233 }
234
is_any_bit_set(const UWord * words)235 static ERTS_INLINE int is_any_bit_set(const UWord* words)
236 {
237 UWord bor = 0;
238 int i=0;
239 for (i = 0; i < LOCK_TYPE_WORDS; i++)
240 bor |= words[i];
241 return bor != (UWord)0;
242 }
243
set_bit(unsigned ix,UWord * words)244 static ERTS_INLINE void set_bit(unsigned ix, UWord* words)
245 {
246 DLC_ASSERT(ix < MAX_LOCK_TYPES);
247 words[IX_TO_WORD(ix)] |= IX_TO_BIT(ix);
248 }
249
clr_bit(unsigned ix,UWord * words)250 static ERTS_INLINE void clr_bit(unsigned ix, UWord* words)
251 {
252 DLC_ASSERT(ix < MAX_LOCK_TYPES);
253 words[IX_TO_WORD(ix)] &= ~IX_TO_BIT(ix);
254 }
255
erts_dlc_lock(erts_dlc_t * dlc)256 int erts_dlc_lock(erts_dlc_t* dlc)
257 {
258 dlc_thread_t *thr = get_thr();
259
260 if (thr->n_locked) {
261 int i;
262
263 DLC_ASSERT(is_any_bit_set(thr->locked_now));
264
265 /*
266 * Check if we introduce new lock dependencies
267 */
268 for (i=0; i < LOCK_TYPE_WORDS; i++) {
269 UWord before = erts_atomic_read_nob(&locked_before[dlc->ix][i]);
270 UWord new_before = (thr->locked_before[i] & ~before);
271
272 if (new_before) {
273 if (!check_lock_order(thr, dlc)) {
274 DLC_ASSERT(DLC_IS_UNIT_TEST());
275 return 0;
276 }
277 erts_atomic_read_bor_mb(&locked_before[dlc->ix][i],
278 new_before);
279 /* check again to detect racing deadlock */
280 if (!check_lock_order(thr, dlc)) {
281 DLC_ASSERT(DLC_IS_UNIT_TEST());
282 /* can't continue test as 'locked_before' is inconsistent */
283 abort();
284 }
285 }
286 }
287
288 if (is_bit_set(dlc->ix, thr->locked_now)) {
289 /*
290 * Lock of this type already held.
291 * Must be other instance of last locked lock
292 */
293 DLC_ASSERT(is_bit_set(dlc->ix, thr->locked_before));
294 i = thr->n_locked-1;
295 while (dlc->ix != thr->lock_order[i].ix) {
296 DLC_ASSERT(thr->lock_order[i].trylock);
297 i--;
298 DLC_ASSERT(i >= 0);
299 }
300 thr->lock_order[i].cnt++;
301 thr->lock_order[i].trylock = 0;
302 return 1;
303 }
304 }
305 else {
306 DLC_ASSERT(!is_any_bit_set(thr->locked_now));
307 DLC_ASSERT(!is_any_bit_set(thr->locked_before));
308 }
309 set_bit(dlc->ix, thr->locked_now);
310 set_bit(dlc->ix, thr->locked_before);
311 thr->lock_order[thr->n_locked].ix = dlc->ix;
312 thr->lock_order[thr->n_locked].cnt = 1;
313 thr->lock_order[thr->n_locked].trylock = 0;
314 thr->n_locked++;
315 return 1;
316 }
317
get_lock_order(dlc_thread_t * thr,erts_dlc_t * dlc)318 static ERTS_INLINE int get_lock_order(dlc_thread_t* thr,
319 erts_dlc_t* dlc)
320 {
321 int i;
322 DLC_ASSERT(is_bit_set(dlc->ix, thr->locked_before));
323 for (i = 0; ; i++) {
324 DLC_ASSERT(i < thr->n_locked);
325 if (dlc->ix == thr->lock_order[i].ix)
326 return i;
327 }
328 }
329
erts_dlc_trylock(erts_dlc_t * dlc,int locked)330 void erts_dlc_trylock(erts_dlc_t* dlc, int locked)
331 {
332 dlc_thread_t *thr = get_thr();
333
334 if (!locked) {
335 /* We have no way to detect trylock of self-locked instance (yet)
336 * so nothing to do here. */
337 return;
338 }
339
340 if (is_bit_set(dlc->ix, thr->locked_now)) {
341 int i = get_lock_order(thr, dlc);
342 DLC_ASSERT(thr->lock_order[i].cnt > 0);
343 thr->lock_order[i].cnt++;
344 /* keep .trylock as is */
345 }
346 else {
347 set_bit(dlc->ix, thr->locked_now);
348
349 if (!is_bit_set(dlc->ix, thr->locked_before)) {
350 set_bit(dlc->ix, thr->locked_before);
351 thr->lock_order[thr->n_locked].ix = dlc->ix;
352 thr->lock_order[thr->n_locked].cnt = 1;
353 thr->lock_order[thr->n_locked].trylock = 1;
354 thr->n_locked++;
355 }
356 else {
357 int i = get_lock_order(thr, dlc);
358 DLC_ASSERT(thr->lock_order[i].cnt == 0);
359 thr->lock_order[i].cnt = 1;
360 thr->lock_order[i].trylock = 1;
361 }
362 }
363 }
364
erts_dlc_unlock(erts_dlc_t * dlc)365 void erts_dlc_unlock(erts_dlc_t* dlc)
366 {
367 dlc_thread_t *thr = (dlc_thread_t*) erts_tsd_get(dlc_thread_key);
368 int i;
369
370 ERTS_ASSERT(thr);
371 ERTS_ASSERT(is_bit_set(dlc->ix, thr->locked_now));
372 DLC_ASSERT(is_bit_set(dlc->ix, thr->locked_before));
373 DLC_ASSERT(thr->n_locked > 0);
374
375 i = get_lock_order(thr, dlc);
376
377 DLC_ASSERT(thr->lock_order[i].cnt > 0);
378 thr->lock_order[i].cnt--;
379 if (thr->lock_order[i].cnt > 0)
380 return; /* still locked by other instance */
381
382 clr_bit(dlc->ix, thr->locked_now);
383
384 /*
385 * Now clear and forget all our unlocked locks (including this one)
386 * THAT was not locked *before* any of our still held locked locks.
387 */
388 for (i = thr->n_locked-1; i >= 0; i--) {
389 if (thr->lock_order[i].cnt) {
390 DLC_ASSERT(is_bit_set(thr->lock_order[i].ix, thr->locked_now));
391 if (!thr->lock_order[i].trylock) {
392 /* A locked lock, must remember it and all locked before it. */
393 break;
394 }
395 }
396 else { /* forget this unlocked lock */
397 int j;
398
399 DLC_ASSERT(!is_bit_set(thr->lock_order[i].ix, thr->locked_now));
400 DLC_ASSERT(is_bit_set(thr->lock_order[i].ix, thr->locked_before));
401 clr_bit(thr->lock_order[i].ix, thr->locked_before);
402 thr->n_locked--;
403
404 /* and compact all trylocks that we may have skipped over */
405 for (j = i; j < thr->n_locked; j++) {
406 DLC_ASSERT(thr->lock_order[j+1].trylock);
407 thr->lock_order[j] = thr->lock_order[j+1];
408 }
409 }
410 }
411 }
412
check_lock_order(dlc_thread_t * thr,erts_dlc_t * dlc)413 static int check_lock_order(dlc_thread_t *thr, erts_dlc_t* dlc)
414 {
415 const UWord lock_bit = IX_TO_BIT(dlc->ix % 64);
416 const unsigned lock_word = IX_TO_WORD(dlc->ix);
417 int i, error = 0;
418
419 for (i = 0; i < thr->n_locked; i++) {
420 const unsigned ix = thr->lock_order[i].ix;
421
422 if (ix != dlc->ix &&
423 lock_bit & erts_atomic_read_nob(&locked_before[ix][lock_word])) {
424 if (!error) {
425 error = 1;
426 erts_fprintf(stderr, "###### DYNAMIC LOCK ORDER VIOLATION ######\n");
427 erts_fprintf(stderr, "# Trying to lock '%s'\n", lock_types[dlc->ix].name);
428 }
429 erts_fprintf(stderr, "# while '%s' is held\n",
430 lock_types[thr->lock_order[i].ix].name);
431 }
432 }
433 if (error) {
434 if (DLC_IS_UNIT_TEST())
435 return 0;
436 abort();
437 }
438 return 1;
439 }
440
441 #ifdef DLC_UNIT_TEST
442
dlc_clear_order(void)443 static void dlc_clear_order(void)
444 {
445 int i, j, n = erts_atomic_read_nob(&n_lock_types);
446
447 for (i = 0; i < n; i++) {
448 for (j = 0; j < LOCK_TYPE_WORDS; j++)
449 erts_atomic_set_nob(&locked_before[i][j], 0);
450 }
451 }
452
dlc_unit_test(void)453 static void dlc_unit_test(void)
454 {
455 erts_aint_t save_n_lock_types = erts_atomic_read_nob(&n_lock_types);
456 dlc_thread_t* thr = get_thr();
457 dlc_thread_t save_thr = *thr;
458 erts_dlc_t A,B,C,D,E,F;
459
460 ERTS_ASSERT(save_n_lock_types <= 1); /* no need to save existing order */
461
462 is_dlc_unit_test = 1;
463 erts_dlc_create_lock(&A, "A");
464 erts_dlc_create_lock(&B, "B");
465 erts_dlc_create_lock(&C, "C");
466 erts_dlc_create_lock(&D, "D");
467 erts_dlc_create_lock(&E, "E");
468 erts_dlc_create_lock(&F, "F");
469
470 ERTS_ASSERT(erts_dlc_lock(&A));
471 ERTS_ASSERT(erts_dlc_lock(&C));
472 ERTS_ASSERT(!erts_dlc_lock(&A));
473
474 erts_dlc_unlock(&A);
475 ERTS_ASSERT(!erts_dlc_lock(&A));
476 erts_dlc_unlock(&C);
477 ERTS_ASSERT(erts_dlc_lock(&A));
478 ERTS_ASSERT(erts_dlc_lock(&B));
479 ERTS_ASSERT(erts_dlc_lock(&C));
480 erts_dlc_unlock(&A);
481 erts_dlc_unlock(&B);
482 erts_dlc_unlock(&C);
483 ERTS_ASSERT(erts_dlc_lock(&A));
484 ERTS_ASSERT(erts_dlc_lock(&C));
485 ERTS_ASSERT(!erts_dlc_lock(&B));
486 erts_dlc_unlock(&A);
487 erts_dlc_unlock(&C);
488
489 dlc_clear_order();
490
491 ERTS_ASSERT(erts_dlc_lock(&A));
492 ERTS_ASSERT(erts_dlc_lock(&B));
493 erts_dlc_unlock(&A);
494 ERTS_ASSERT(erts_dlc_lock(&C));
495 erts_dlc_unlock(&B);
496 ERTS_ASSERT(erts_dlc_lock(&D));
497 erts_dlc_unlock(&C);
498 ERTS_ASSERT(erts_dlc_lock(&E));
499 erts_dlc_unlock(&D);
500 ERTS_ASSERT(erts_dlc_lock(&F));
501 erts_dlc_unlock(&E);
502 erts_dlc_unlock(&F);
503 ERTS_ASSERT(erts_dlc_lock(&F));
504 ERTS_ASSERT(!erts_dlc_lock(&A));
505 erts_dlc_unlock(&F);
506
507 dlc_clear_order();
508 ERTS_ASSERT(erts_dlc_lock(&A));
509 erts_dlc_trylock(&B, 1);
510 erts_dlc_unlock(&A);
511 ERTS_ASSERT(erts_dlc_lock(&A));
512 erts_dlc_unlock(&A);
513 erts_dlc_unlock(&B);
514
515 dlc_clear_order();
516 ERTS_ASSERT(erts_dlc_lock(&A));
517 ERTS_ASSERT(erts_dlc_lock(&B));
518 ERTS_ASSERT(erts_dlc_lock(&C));
519 erts_dlc_trylock(&D, 1);
520 erts_dlc_trylock(&E, 1);
521 ERTS_ASSERT(erts_dlc_lock(&F));
522 erts_dlc_unlock(&C);
523 erts_dlc_unlock(&F);
524 ERTS_ASSERT(erts_dlc_lock(&B));
525 erts_dlc_unlock(&B);
526 ERTS_ASSERT(!erts_dlc_lock(&A));
527 erts_dlc_unlock(&B);
528 ERTS_ASSERT(erts_dlc_lock(&A));
529 erts_dlc_unlock(&A);
530 erts_dlc_unlock(&A);
531 erts_dlc_unlock(&D);
532 erts_dlc_unlock(&E);
533
534 dlc_clear_order();
535 ERTS_ASSERT(erts_dlc_lock(&A));
536 erts_dlc_trylock(&B, 1);
537 erts_dlc_trylock(&C, 1);
538 ERTS_ASSERT(erts_dlc_lock(&B));
539 erts_dlc_unlock(&B);
540 ERTS_ASSERT(!erts_dlc_lock(&C));
541
542 /* Restore */
543 is_dlc_unit_test = 0;
544 dlc_clear_order();
545 erts_atomic_set_nob(&n_lock_types, save_n_lock_types);
546 *thr = save_thr;
547 }
548 #endif /* DLC_UNIT_TEST */
549
550 #endif /* ERTS_DYN_LOCK_CHECK */
551
552
553
554