1 /*
2  * %CopyrightBegin%
3  *
4  * Copyright Ericsson AB 2001-2020. All Rights Reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * %CopyrightEnd%
19  */
20 
21 /* Description: A dynamic lock order checker.
22  *              A global locking order is recorded during runtime
23  *              and continuously checked against itself.
24  *
25  * Author: Sverker Eriksson
26  */
27 
28 /*
29  * The primary objective is to check the order in which locks are seized
30  * to avoid deadlocks. The strategy is to continuously construct a directed
31  * graph describing the order in which locks have been seized. Each edge A->B in
32  * the graph describes a locking order; I held lock A while locking B. Trylocks
33  * do not introduce edges in the graph. For each added edge we check that we
34  * don't introduce cycles in the graph. A cycle would indicate a potential
35  * deadlock bug, waiting to happen.
36  *
37  * We assume that locks are primarily ordered by their lock _types_
38  * and secondarily by instance information of locks of the same type. No lock
39  * order checking is implemented between lock instances of the same type (yet).
40  *
41  * The name given when a lock is created is used as identifying its type.
42  * The '[' character can be used as a delimiter between lock type and
43  * instance information. Example: "esock.wrt[17]" is of type "esock.wrt".
44  *
45  */
46 #ifdef HAVE_CONFIG_H
47 #  include "config.h"
48 #endif
49 
50 #include "erl_dyn_lock_check.h"
51 #ifdef ERTS_DYN_LOCK_CHECK
52 
53 #include "sys.h"
54 #include "erl_threads.h"
55 
56 #define DLC_ASSERT(X) ERTS_ASSERT(X)
57 
58 #ifdef ERTS_DYN_LOCK_CHECK_INTERNAL
59 # define MAX_LOCK_TYPES (64*2)
60 #else
61 # define MAX_LOCK_TYPES (32)
62 #endif
63 
64 #define BITS_PER_WORD (sizeof(UWord)*8)
65 #define LOCK_TYPE_WORDS ((MAX_LOCK_TYPES-1)/BITS_PER_WORD + 1)
66 #define MAX_LOCK_NAME_SZ 64
67 
68 static erts_atomic_t n_lock_types;
69 static erts_mtx_t lock_types_mtx;
70 
71 struct lock_type
72 {
73     char name[MAX_LOCK_NAME_SZ];
74 };
75 
76 static struct lock_type lock_types[MAX_LOCK_TYPES];
77 
78 static erts_tsd_key_t dlc_thread_key;
79 
80 /* Thread specific data */
81 typedef struct
82 {
83     UWord locked_now[LOCK_TYPE_WORDS];
84     /* Bit vector with all lock types currently held by this thread */
85 
86     UWord locked_before[LOCK_TYPE_WORDS];
87     /* Bit vector same as 'locked_now' PLUS all unlocked locks that were locked
88      * by this thread ~before~ the locks in 'locked_now'.
89      * A lock in 'locked_before' is only cleared when all locked after it
90      * have been unlocked.
91      *
92      * Example 1:
93      *   1. Lock A:    locked_now = A    locked_before = A
94      *   2. Lock B:    locked_now = A|B  locked_before = A|B
95      *   3. Unlock A:  locked_now = B    locked_before = A|B
96      *   4. Lock C:    locked_now = B|C  locked_before = A|B|C
97      *   5. Unlock B:  locked_now = C    locked_before = A|B|C
98      *   6. Unlock C:  locked_now =      locked_before =
99      *
100      * Example 2:
101      *   1. Lock A:    locked_now = A    locked_before = A
102      *   2. Trylock B: locked_now = A|B  locked_before = A|B
103      *   3. Unlock A:  locked_now = B    locked_before = B
104      *   4. Lock C:    locked_now = B|C  locked_before = B|C
105      *   5. Unlock B:  locked_now = C    locked_before = B|C
106      *   6. Unlock C:  locked_now =      locked_before =
107      *
108      *   The trylock of B imposes no ordering dependency to A (locked before B),
109      *   but it will have a dependency to C (locked after B).
110      */
111 
112     struct {
113         unsigned ix;      /* lock type id (bit index) */
114         unsigned cnt;     /* nr of locked instances of this type (may be 0) */
115         unsigned trylock; /* true if only trylocked instances */
116     } lock_order[MAX_LOCK_TYPES];
117     /* The locks in 'locked_before' ordered the way they were locked by this thread */
118 
119     unsigned n_locked;
120     /* Number or lock types in 'locked_before' and 'lock_order' */
121 
122 }  dlc_thread_t;
123 
124 static erts_atomic_t locked_before[MAX_LOCK_TYPES][LOCK_TYPE_WORDS];
125 /* The recorded global lock order as a bit matrix.
126  *
127  * Bit A is set in locked_before[B] if A has been locked before B.
128  */
129 
130 static int check_lock_order(dlc_thread_t*, erts_dlc_t*);
131 
132 /*#define DLC_UNIT_TEST*/
133 #ifdef DLC_UNIT_TEST
134 static int is_dlc_unit_test = 0;
135 static void dlc_unit_test(void);
136 # define DLC_IS_UNIT_TEST() (is_dlc_unit_test)
137 #else
138 # define DLC_IS_UNIT_TEST() (0)
139 #endif
140 
141 static int dlc_initialized = 0;
142 
erts_dlc_init(void)143 void erts_dlc_init(void)
144 {
145     int i, j;
146     erts_atomic_init_nob(&n_lock_types, 0);
147     erts_tsd_key_create(&dlc_thread_key, "dyn_lock_check");
148 
149     for (i = 0; i < MAX_LOCK_TYPES; i++) {
150         for (j = 0; j < LOCK_TYPE_WORDS; j++)
151             erts_atomic_init_nob(&locked_before[i][j], 0);
152     }
153 
154     erts_mtx_init(&lock_types_mtx, "dyn_lock_check", NIL,
155                   (ERTS_LOCK_FLAGS_PROPERTY_STATIC |
156                    ERTS_LOCK_FLAGS_CATEGORY_GENERIC));
157 
158     dlc_initialized = 1;
159 
160 #ifdef DLC_UNIT_TEST
161     dlc_unit_test();
162 #endif
163 }
164 
erts_dlc_create_lock(erts_dlc_t * dlc,const char * name)165 void erts_dlc_create_lock(erts_dlc_t* dlc, const char* name)
166 {
167     erts_aint_t i, n = erts_atomic_read_nob(&n_lock_types);
168     int name_len;
169 
170     for (i = 0; name[i]; i++) {
171         if (name[i] == '[')
172             break;
173     }
174     name_len = i;
175 
176     for (i=0; i < n; i++) {
177         if (sys_strncmp(name, lock_types[i].name, name_len) == 0) {
178             dlc->ix = i;
179             return; /* already exists */
180         }
181     }
182 
183     if (dlc_initialized)
184         erts_mtx_lock(&lock_types_mtx);
185     else
186         DLC_ASSERT(n == 0);
187 
188     n = erts_atomic_read_nob(&n_lock_types);
189 
190     for ( ; i < n; i++) {
191         if (sys_strncmp(name, lock_types[i].name, name_len) == 0) {
192             dlc->ix = i;
193             goto done; /* already exists (race) */
194         }
195     }
196 
197     ERTS_ASSERT(n < MAX_LOCK_TYPES);
198     ERTS_ASSERT(name_len < MAX_LOCK_NAME_SZ);
199     sys_strncpy(lock_types[n].name, name, name_len);
200     lock_types[n].name[name_len] = 0;
201     erts_atomic_set_nob(&n_lock_types, n+1);
202     dlc->ix = n;
203 
204 done:
205     if (dlc_initialized)
206         erts_mtx_unlock(&lock_types_mtx);
207 }
208 
209 #define IX_TO_BIT(IX) ((UWord)1 << ((IX) % BITS_PER_WORD))
210 #define IX_TO_WORD(IX) ((IX) / BITS_PER_WORD)
211 
get_thr(void)212 static dlc_thread_t *get_thr(void)
213 {
214     dlc_thread_t *thr = (dlc_thread_t*) erts_tsd_get(dlc_thread_key);
215     int i;
216 
217     if (!thr) {
218         thr = malloc(sizeof(dlc_thread_t));
219         for (i = 0; i < LOCK_TYPE_WORDS; i++) {
220             thr->locked_now[i] = 0;
221             thr->locked_before[i] = 0;
222         }
223         thr->n_locked = 0;
224         erts_tsd_set(dlc_thread_key, thr);
225     }
226     return thr;
227 }
228 
is_bit_set(unsigned ix,const UWord * words)229 static ERTS_INLINE int is_bit_set(unsigned ix, const UWord* words)
230 {
231     DLC_ASSERT(ix < MAX_LOCK_TYPES);
232     return (words[IX_TO_WORD(ix)] & IX_TO_BIT(ix)) != (UWord)0;
233 }
234 
is_any_bit_set(const UWord * words)235 static ERTS_INLINE int is_any_bit_set(const UWord* words)
236 {
237     UWord bor = 0;
238     int i=0;
239     for (i = 0; i < LOCK_TYPE_WORDS; i++)
240         bor |= words[i];
241     return bor != (UWord)0;
242 }
243 
set_bit(unsigned ix,UWord * words)244 static ERTS_INLINE void set_bit(unsigned ix, UWord* words)
245 {
246     DLC_ASSERT(ix < MAX_LOCK_TYPES);
247     words[IX_TO_WORD(ix)] |= IX_TO_BIT(ix);
248 }
249 
clr_bit(unsigned ix,UWord * words)250 static ERTS_INLINE void clr_bit(unsigned ix, UWord* words)
251 {
252     DLC_ASSERT(ix < MAX_LOCK_TYPES);
253     words[IX_TO_WORD(ix)] &= ~IX_TO_BIT(ix);
254 }
255 
erts_dlc_lock(erts_dlc_t * dlc)256 int erts_dlc_lock(erts_dlc_t* dlc)
257 {
258     dlc_thread_t *thr = get_thr();
259 
260     if (thr->n_locked) {
261         int i;
262 
263         DLC_ASSERT(is_any_bit_set(thr->locked_now));
264 
265         /*
266          * Check if we introduce new lock dependencies
267          */
268         for (i=0; i < LOCK_TYPE_WORDS; i++) {
269             UWord before = erts_atomic_read_nob(&locked_before[dlc->ix][i]);
270             UWord new_before = (thr->locked_before[i] & ~before);
271 
272             if (new_before) {
273                 if (!check_lock_order(thr, dlc)) {
274                     DLC_ASSERT(DLC_IS_UNIT_TEST());
275                     return 0;
276                 }
277                 erts_atomic_read_bor_mb(&locked_before[dlc->ix][i],
278                                         new_before);
279                 /* check again to detect racing deadlock */
280                 if (!check_lock_order(thr, dlc)) {
281                     DLC_ASSERT(DLC_IS_UNIT_TEST());
282                     /* can't continue test as 'locked_before' is inconsistent */
283                     abort();
284                 }
285             }
286         }
287 
288         if (is_bit_set(dlc->ix, thr->locked_now)) {
289             /*
290              * Lock of this type already held.
291              * Must be other instance of last locked lock
292              */
293             DLC_ASSERT(is_bit_set(dlc->ix, thr->locked_before));
294             i = thr->n_locked-1;
295             while (dlc->ix != thr->lock_order[i].ix) {
296                 DLC_ASSERT(thr->lock_order[i].trylock);
297                 i--;
298                 DLC_ASSERT(i >= 0);
299             }
300             thr->lock_order[i].cnt++;
301             thr->lock_order[i].trylock = 0;
302             return 1;
303         }
304     }
305     else {
306         DLC_ASSERT(!is_any_bit_set(thr->locked_now));
307         DLC_ASSERT(!is_any_bit_set(thr->locked_before));
308     }
309     set_bit(dlc->ix, thr->locked_now);
310     set_bit(dlc->ix, thr->locked_before);
311     thr->lock_order[thr->n_locked].ix = dlc->ix;
312     thr->lock_order[thr->n_locked].cnt = 1;
313     thr->lock_order[thr->n_locked].trylock = 0;
314     thr->n_locked++;
315     return 1;
316 }
317 
get_lock_order(dlc_thread_t * thr,erts_dlc_t * dlc)318 static ERTS_INLINE int get_lock_order(dlc_thread_t* thr,
319                                       erts_dlc_t* dlc)
320 {
321     int i;
322     DLC_ASSERT(is_bit_set(dlc->ix, thr->locked_before));
323     for (i = 0; ; i++) {
324         DLC_ASSERT(i < thr->n_locked);
325         if (dlc->ix == thr->lock_order[i].ix)
326             return i;
327     }
328 }
329 
erts_dlc_trylock(erts_dlc_t * dlc,int locked)330 void erts_dlc_trylock(erts_dlc_t* dlc, int locked)
331 {
332     dlc_thread_t *thr = get_thr();
333 
334     if (!locked) {
335         /* We have no way to detect trylock of self-locked instance (yet)
336          * so nothing to do here. */
337         return;
338     }
339 
340     if (is_bit_set(dlc->ix, thr->locked_now)) {
341         int i = get_lock_order(thr, dlc);
342         DLC_ASSERT(thr->lock_order[i].cnt > 0);
343         thr->lock_order[i].cnt++;
344         /* keep .trylock as is */
345     }
346     else {
347         set_bit(dlc->ix, thr->locked_now);
348 
349         if (!is_bit_set(dlc->ix, thr->locked_before)) {
350             set_bit(dlc->ix, thr->locked_before);
351             thr->lock_order[thr->n_locked].ix = dlc->ix;
352             thr->lock_order[thr->n_locked].cnt = 1;
353             thr->lock_order[thr->n_locked].trylock = 1;
354             thr->n_locked++;
355         }
356         else {
357             int i = get_lock_order(thr, dlc);
358             DLC_ASSERT(thr->lock_order[i].cnt == 0);
359             thr->lock_order[i].cnt = 1;
360             thr->lock_order[i].trylock = 1;
361         }
362     }
363 }
364 
erts_dlc_unlock(erts_dlc_t * dlc)365 void erts_dlc_unlock(erts_dlc_t* dlc)
366 {
367     dlc_thread_t *thr = (dlc_thread_t*) erts_tsd_get(dlc_thread_key);
368     int i;
369 
370     ERTS_ASSERT(thr);
371     ERTS_ASSERT(is_bit_set(dlc->ix, thr->locked_now));
372     DLC_ASSERT(is_bit_set(dlc->ix, thr->locked_before));
373     DLC_ASSERT(thr->n_locked > 0);
374 
375     i = get_lock_order(thr, dlc);
376 
377     DLC_ASSERT(thr->lock_order[i].cnt > 0);
378     thr->lock_order[i].cnt--;
379     if (thr->lock_order[i].cnt > 0)
380         return; /* still locked by other instance */
381 
382     clr_bit(dlc->ix, thr->locked_now);
383 
384     /*
385      * Now clear and forget all our unlocked locks (including this one)
386      * THAT was not locked *before* any of our still held locked locks.
387      */
388     for (i = thr->n_locked-1; i >= 0; i--) {
389         if (thr->lock_order[i].cnt) {
390             DLC_ASSERT(is_bit_set(thr->lock_order[i].ix, thr->locked_now));
391             if (!thr->lock_order[i].trylock) {
392                 /* A locked lock, must remember it and all locked before it. */
393                 break;
394             }
395         }
396         else { /* forget this unlocked lock */
397             int j;
398 
399             DLC_ASSERT(!is_bit_set(thr->lock_order[i].ix, thr->locked_now));
400             DLC_ASSERT(is_bit_set(thr->lock_order[i].ix, thr->locked_before));
401             clr_bit(thr->lock_order[i].ix, thr->locked_before);
402             thr->n_locked--;
403 
404             /* and compact all trylocks that we may have skipped over */
405             for (j = i; j < thr->n_locked; j++) {
406                 DLC_ASSERT(thr->lock_order[j+1].trylock);
407                 thr->lock_order[j] = thr->lock_order[j+1];
408             }
409         }
410     }
411 }
412 
check_lock_order(dlc_thread_t * thr,erts_dlc_t * dlc)413 static int check_lock_order(dlc_thread_t *thr, erts_dlc_t* dlc)
414 {
415     const UWord lock_bit = IX_TO_BIT(dlc->ix % 64);
416     const unsigned lock_word = IX_TO_WORD(dlc->ix);
417     int i, error = 0;
418 
419     for (i = 0; i < thr->n_locked; i++) {
420         const unsigned ix = thr->lock_order[i].ix;
421 
422         if (ix != dlc->ix &&
423             lock_bit & erts_atomic_read_nob(&locked_before[ix][lock_word])) {
424             if (!error) {
425                 error = 1;
426                 erts_fprintf(stderr, "###### DYNAMIC LOCK ORDER VIOLATION ######\n");
427                 erts_fprintf(stderr, "# Trying to lock '%s'\n", lock_types[dlc->ix].name);
428             }
429             erts_fprintf(stderr, "# while '%s' is held\n",
430                          lock_types[thr->lock_order[i].ix].name);
431         }
432     }
433     if (error) {
434         if (DLC_IS_UNIT_TEST())
435             return 0;
436         abort();
437     }
438     return 1;
439 }
440 
441 #ifdef DLC_UNIT_TEST
442 
dlc_clear_order(void)443 static void dlc_clear_order(void)
444 {
445     int i, j, n = erts_atomic_read_nob(&n_lock_types);
446 
447     for (i = 0; i < n; i++) {
448         for (j = 0; j < LOCK_TYPE_WORDS; j++)
449             erts_atomic_set_nob(&locked_before[i][j], 0);
450     }
451 }
452 
dlc_unit_test(void)453 static void dlc_unit_test(void)
454 {
455     erts_aint_t save_n_lock_types = erts_atomic_read_nob(&n_lock_types);
456     dlc_thread_t* thr = get_thr();
457     dlc_thread_t save_thr = *thr;
458     erts_dlc_t A,B,C,D,E,F;
459 
460     ERTS_ASSERT(save_n_lock_types <= 1); /* no need to save existing order */
461 
462     is_dlc_unit_test = 1;
463     erts_dlc_create_lock(&A, "A");
464     erts_dlc_create_lock(&B, "B");
465     erts_dlc_create_lock(&C, "C");
466     erts_dlc_create_lock(&D, "D");
467     erts_dlc_create_lock(&E, "E");
468     erts_dlc_create_lock(&F, "F");
469 
470     ERTS_ASSERT(erts_dlc_lock(&A));
471     ERTS_ASSERT(erts_dlc_lock(&C));
472     ERTS_ASSERT(!erts_dlc_lock(&A));
473 
474     erts_dlc_unlock(&A);
475     ERTS_ASSERT(!erts_dlc_lock(&A));
476     erts_dlc_unlock(&C);
477     ERTS_ASSERT(erts_dlc_lock(&A));
478     ERTS_ASSERT(erts_dlc_lock(&B));
479     ERTS_ASSERT(erts_dlc_lock(&C));
480     erts_dlc_unlock(&A);
481     erts_dlc_unlock(&B);
482     erts_dlc_unlock(&C);
483     ERTS_ASSERT(erts_dlc_lock(&A));
484     ERTS_ASSERT(erts_dlc_lock(&C));
485     ERTS_ASSERT(!erts_dlc_lock(&B));
486     erts_dlc_unlock(&A);
487     erts_dlc_unlock(&C);
488 
489     dlc_clear_order();
490 
491     ERTS_ASSERT(erts_dlc_lock(&A));
492     ERTS_ASSERT(erts_dlc_lock(&B));
493     erts_dlc_unlock(&A);
494     ERTS_ASSERT(erts_dlc_lock(&C));
495     erts_dlc_unlock(&B);
496     ERTS_ASSERT(erts_dlc_lock(&D));
497     erts_dlc_unlock(&C);
498     ERTS_ASSERT(erts_dlc_lock(&E));
499     erts_dlc_unlock(&D);
500     ERTS_ASSERT(erts_dlc_lock(&F));
501     erts_dlc_unlock(&E);
502     erts_dlc_unlock(&F);
503     ERTS_ASSERT(erts_dlc_lock(&F));
504     ERTS_ASSERT(!erts_dlc_lock(&A));
505     erts_dlc_unlock(&F);
506 
507     dlc_clear_order();
508     ERTS_ASSERT(erts_dlc_lock(&A));
509     erts_dlc_trylock(&B, 1);
510     erts_dlc_unlock(&A);
511     ERTS_ASSERT(erts_dlc_lock(&A));
512     erts_dlc_unlock(&A);
513     erts_dlc_unlock(&B);
514 
515     dlc_clear_order();
516     ERTS_ASSERT(erts_dlc_lock(&A));
517     ERTS_ASSERT(erts_dlc_lock(&B));
518     ERTS_ASSERT(erts_dlc_lock(&C));
519     erts_dlc_trylock(&D, 1);
520     erts_dlc_trylock(&E, 1);
521     ERTS_ASSERT(erts_dlc_lock(&F));
522     erts_dlc_unlock(&C);
523     erts_dlc_unlock(&F);
524     ERTS_ASSERT(erts_dlc_lock(&B));
525     erts_dlc_unlock(&B);
526     ERTS_ASSERT(!erts_dlc_lock(&A));
527     erts_dlc_unlock(&B);
528     ERTS_ASSERT(erts_dlc_lock(&A));
529     erts_dlc_unlock(&A);
530     erts_dlc_unlock(&A);
531     erts_dlc_unlock(&D);
532     erts_dlc_unlock(&E);
533 
534     dlc_clear_order();
535     ERTS_ASSERT(erts_dlc_lock(&A));
536     erts_dlc_trylock(&B, 1);
537     erts_dlc_trylock(&C, 1);
538     ERTS_ASSERT(erts_dlc_lock(&B));
539     erts_dlc_unlock(&B);
540     ERTS_ASSERT(!erts_dlc_lock(&C));
541 
542     /* Restore */
543     is_dlc_unit_test = 0;
544     dlc_clear_order();
545     erts_atomic_set_nob(&n_lock_types, save_n_lock_types);
546     *thr = save_thr;
547 }
548 #endif /* DLC_UNIT_TEST */
549 
550 #endif /* ERTS_DYN_LOCK_CHECK */
551 
552 
553 
554