1 /* $OpenBSD: subr_witness.c,v 1.37 2020/03/15 05:58:48 visa Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 Isilon Systems, Inc. 5 * Copyright (c) 2008 Ilya Maykov <ivmaykov@gmail.com> 6 * Copyright (c) 1998 Berkeley Software Design, Inc. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Berkeley Software Design Inc's name may not be used to endorse or 18 * promote products derived from this software without specific prior 19 * written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from BSDI Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp 34 * and BSDI Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp 35 */ 36 37 /* 38 * Implementation of the `witness' lock verifier. Originally implemented for 39 * mutexes in BSD/OS. Extended to handle generic lock objects and lock 40 * classes in FreeBSD. 41 */ 42 43 /* 44 * Main Entry: witness 45 * Pronunciation: 'wit-n&s 46 * Function: noun 47 * Etymology: Middle English witnesse, from Old English witnes knowledge, 48 * testimony, witness, from 2wit 49 * Date: before 12th century 50 * 1 : attestation of a fact or event : TESTIMONY 51 * 2 : one that gives evidence; specifically : one who testifies in 52 * a cause or before a judicial tribunal 53 * 3 : one asked to be present at a transaction so as to be able to 54 * testify to its having taken place 55 * 4 : one who has personal knowledge of something 56 * 5 a : something serving as evidence or proof : SIGN 57 * b : public affirmation by word or example of usually 58 * religious faith or conviction <the heroic witness to divine 59 * life -- Pilot> 60 * 6 capitalized : a member of the Jehovah's Witnesses 61 */ 62 63 /* 64 * Special rules concerning Giant and lock orders: 65 * 66 * 1) Giant must be acquired before any other mutexes. Stated another way, 67 * no other mutex may be held when Giant is acquired. 68 * 69 * 2) Giant must be released when blocking on a sleepable lock. 70 * 71 * This rule is less obvious, but is a result of Giant providing the same 72 * semantics as spl(). Basically, when a thread sleeps, it must release 73 * Giant. When a thread blocks on a sleepable lock, it sleeps. Hence rule 74 * 2). 75 * 76 * 3) Giant may be acquired before or after sleepable locks. 77 * 78 * This rule is also not quite as obvious. Giant may be acquired after 79 * a sleepable lock because it is a non-sleepable lock and non-sleepable 80 * locks may always be acquired while holding a sleepable lock. The second 81 * case, Giant before a sleepable lock, follows from rule 2) above. Suppose 82 * you have two threads T1 and T2 and a sleepable lock X. Suppose that T1 83 * acquires X and blocks on Giant. Then suppose that T2 acquires Giant and 84 * blocks on X. When T2 blocks on X, T2 will release Giant allowing T1 to 85 * execute. Thus, acquiring Giant both before and after a sleepable lock 86 * will not result in a lock order reversal. 87 */ 88 89 #include <sys/param.h> 90 #include <sys/systm.h> 91 #include <sys/kernel.h> 92 #include <sys/malloc.h> 93 #ifdef MULTIPROCESSOR 94 #include <sys/mplock.h> 95 #endif 96 #include <sys/mutex.h> 97 #include <sys/percpu.h> 98 #include <sys/proc.h> 99 #include <sys/sched.h> 100 #include <sys/stacktrace.h> 101 #include <sys/stdint.h> 102 #include <sys/sysctl.h> 103 #include <sys/syslog.h> 104 #include <sys/witness.h> 105 106 #include <machine/cpu.h> 107 108 #include <uvm/uvm_extern.h> /* uvm_pageboot_alloc */ 109 110 #ifndef DDB 111 #error "DDB is required for WITNESS" 112 #endif 113 114 #include <machine/db_machdep.h> 115 116 #include <ddb/db_access.h> 117 #include <ddb/db_var.h> 118 #include <ddb/db_output.h> 119 120 #define LI_RECURSEMASK 0x0000ffff /* Recursion depth of lock instance. */ 121 #define LI_EXCLUSIVE 0x00010000 /* Exclusive lock instance. */ 122 #define LI_NORELEASE 0x00020000 /* Lock not allowed to be released. */ 123 124 #ifndef WITNESS_COUNT 125 #define WITNESS_COUNT 1536 126 #endif 127 #define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */ 128 #define WITNESS_PENDLIST (1024 + MAXCPUS) 129 130 /* Allocate 256 KB of stack data space */ 131 #define WITNESS_LO_DATA_COUNT 2048 132 133 /* Prime, gives load factor of ~2 at full load */ 134 #define WITNESS_LO_HASH_SIZE 1021 135 136 /* 137 * XXX: This is somewhat bogus, as we assume here that at most 2048 threads 138 * will hold LOCK_NCHILDREN locks. We handle failure ok, and we should 139 * probably be safe for the most part, but it's still a SWAG. 140 */ 141 #define LOCK_NCHILDREN 5 142 #define LOCK_CHILDCOUNT 2048 143 144 #define FULLGRAPH_SBUF_SIZE 512 145 146 /* 147 * These flags go in the witness relationship matrix and describe the 148 * relationship between any two struct witness objects. 149 */ 150 #define WITNESS_UNRELATED 0x00 /* No lock order relation. */ 151 #define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */ 152 #define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */ 153 #define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */ 154 #define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */ 155 #define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR) 156 #define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT) 157 #define WITNESS_RELATED_MASK \ 158 (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK) 159 #define WITNESS_REVERSAL 0x10 /* A lock order reversal has been 160 * observed. */ 161 #define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */ 162 #define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */ 163 #define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */ 164 165 /* Descendant to ancestor flags */ 166 #define WITNESS_DTOA(x) (((x) & WITNESS_RELATED_MASK) >> 2) 167 168 /* Ancestor to descendant flags */ 169 #define WITNESS_ATOD(x) (((x) & WITNESS_RELATED_MASK) << 2) 170 171 #define WITNESS_INDEX_ASSERT(i) \ 172 KASSERT((i) > 0 && (i) <= w_max_used_index && (i) < witness_count) 173 174 /* 175 * Lock classes. Each lock has a class which describes characteristics 176 * common to all types of locks of a given class. 177 * 178 * Spin locks in general must always protect against preemption, as it is 179 * an error to perform any type of context switch while holding a spin lock. 180 * Also, for an individual lock to be recursable, its class must allow 181 * recursion and the lock itself must explicitly allow recursion. 182 */ 183 184 struct lock_class { 185 const char *lc_name; 186 u_int lc_flags; 187 }; 188 189 union lock_stack { 190 union lock_stack *ls_next; 191 struct stacktrace ls_stack; 192 }; 193 194 #define LC_SLEEPLOCK 0x00000001 /* Sleep lock. */ 195 #define LC_SPINLOCK 0x00000002 /* Spin lock. */ 196 #define LC_SLEEPABLE 0x00000004 /* Sleeping allowed with this lock. */ 197 #define LC_RECURSABLE 0x00000008 /* Locks of this type may recurse. */ 198 #define LC_UPGRADABLE 0x00000010 /* Upgrades and downgrades permitted. */ 199 200 /* 201 * Lock instances. A lock instance is the data associated with a lock while 202 * it is held by witness. For example, a lock instance will hold the 203 * recursion count of a lock. Lock instances are held in lists. Spin locks 204 * are held in a per-cpu list while sleep locks are held in per-thread list. 205 */ 206 struct lock_instance { 207 struct lock_object *li_lock; 208 union lock_stack *li_stack; 209 u_int li_flags; 210 }; 211 212 /* 213 * A simple list type used to build the list of locks held by a thread 214 * or CPU. We can't simply embed the list in struct lock_object since a 215 * lock may be held by more than one thread if it is a shared lock. Locks 216 * are added to the head of the list, so we fill up each list entry from 217 * "the back" logically. To ease some of the arithmetic, we actually fill 218 * in each list entry the normal way (children[0] then children[1], etc.) but 219 * when we traverse the list we read children[count-1] as the first entry 220 * down to children[0] as the final entry. 221 */ 222 struct lock_list_entry { 223 struct lock_list_entry *ll_next; 224 struct lock_instance ll_children[LOCK_NCHILDREN]; 225 int ll_count; 226 }; 227 228 /* 229 * The main witness structure. One of these per named lock type in the system 230 * (for example, "vnode interlock"). 231 */ 232 struct witness { 233 const struct lock_type *w_type; 234 const char *w_subtype; 235 uint32_t w_index; /* Index in the relationship matrix */ 236 struct lock_class *w_class; 237 SIMPLEQ_ENTRY(witness) w_list; /* List of all witnesses. */ 238 SIMPLEQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */ 239 struct witness *w_hash_next; /* Linked list in hash buckets. */ 240 uint16_t w_num_ancestors; /* direct/indirect 241 * ancestor count */ 242 uint16_t w_num_descendants; /* direct/indirect 243 * descendant count */ 244 int16_t w_ddb_level; 245 unsigned w_acquired:1; 246 unsigned w_displayed:1; 247 unsigned w_reversed:1; 248 }; 249 250 SIMPLEQ_HEAD(witness_list, witness); 251 252 /* 253 * The witness hash table. Keys are witness names (const char *), elements are 254 * witness objects (struct witness *). 255 */ 256 struct witness_hash { 257 struct witness *wh_array[WITNESS_HASH_SIZE]; 258 uint32_t wh_size; 259 uint32_t wh_count; 260 }; 261 262 /* 263 * Key type for the lock order data hash table. 264 */ 265 struct witness_lock_order_key { 266 uint16_t from; 267 uint16_t to; 268 }; 269 270 struct witness_lock_order_data { 271 struct stacktrace wlod_stack; 272 struct witness_lock_order_key wlod_key; 273 struct witness_lock_order_data *wlod_next; 274 }; 275 276 /* 277 * The witness lock order data hash table. Keys are witness index tuples 278 * (struct witness_lock_order_key), elements are lock order data objects 279 * (struct witness_lock_order_data). 280 */ 281 struct witness_lock_order_hash { 282 struct witness_lock_order_data *wloh_array[WITNESS_LO_HASH_SIZE]; 283 u_int wloh_size; 284 u_int wloh_count; 285 }; 286 287 struct witness_pendhelp { 288 const struct lock_type *wh_type; 289 struct lock_object *wh_lock; 290 }; 291 292 struct witness_cpu { 293 struct lock_list_entry *wc_spinlocks; 294 struct lock_list_entry *wc_lle_cache; 295 union lock_stack *wc_stk_cache; 296 unsigned int wc_lle_count; 297 unsigned int wc_stk_count; 298 } __aligned(CACHELINESIZE); 299 300 #define WITNESS_LLE_CACHE_MAX 8 301 #define WITNESS_STK_CACHE_MAX (WITNESS_LLE_CACHE_MAX * LOCK_NCHILDREN) 302 303 struct witness_cpu witness_cpu[MAXCPUS]; 304 305 /* 306 * Returns 0 if one of the locks is a spin lock and the other is not. 307 * Returns 1 otherwise. 308 */ 309 static __inline int 310 witness_lock_type_equal(struct witness *w1, struct witness *w2) 311 { 312 313 return ((w1->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) == 314 (w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))); 315 } 316 317 static __inline int 318 witness_lock_order_key_equal(const struct witness_lock_order_key *a, 319 const struct witness_lock_order_key *b) 320 { 321 322 return (a->from == b->from && a->to == b->to); 323 } 324 325 static int _isitmyx(struct witness *w1, struct witness *w2, int rmask, 326 const char *fname); 327 static void adopt(struct witness *parent, struct witness *child); 328 static struct witness *enroll(const struct lock_type *, const char *, 329 struct lock_class *); 330 static struct lock_instance *find_instance(struct lock_list_entry *list, 331 const struct lock_object *lock); 332 static int isitmychild(struct witness *parent, struct witness *child); 333 static int isitmydescendant(struct witness *parent, struct witness *child); 334 static void itismychild(struct witness *parent, struct witness *child); 335 #ifdef DDB 336 static void db_witness_add_fullgraph(struct witness *parent); 337 static void witness_ddb_compute_levels(void); 338 static void witness_ddb_display(int(*)(const char *fmt, ...)); 339 static void witness_ddb_display_descendants(int(*)(const char *fmt, ...), 340 struct witness *, int indent); 341 static void witness_ddb_display_list(int(*prnt)(const char *fmt, ...), 342 struct witness_list *list); 343 static void witness_ddb_level_descendants(struct witness *parent, int l); 344 static void witness_ddb_list(struct proc *td); 345 #endif 346 static int witness_alloc_stacks(void); 347 static void witness_debugger(int dump); 348 static void witness_free(struct witness *m); 349 static struct witness *witness_get(void); 350 static uint32_t witness_hash_djb2(const uint8_t *key, uint32_t size); 351 static struct witness *witness_hash_get(const struct lock_type *, 352 const char *); 353 static void witness_hash_put(struct witness *w); 354 static void witness_init_hash_tables(void); 355 static void witness_increment_graph_generation(void); 356 static int witness_list_locks(struct lock_list_entry **, 357 int (*)(const char *, ...)); 358 static void witness_lock_list_free(struct lock_list_entry *lle); 359 static struct lock_list_entry *witness_lock_list_get(void); 360 static void witness_lock_stack_free(union lock_stack *stack); 361 static union lock_stack *witness_lock_stack_get(void); 362 static int witness_lock_order_add(struct witness *parent, 363 struct witness *child); 364 static int witness_lock_order_check(struct witness *parent, 365 struct witness *child); 366 static struct witness_lock_order_data *witness_lock_order_get( 367 struct witness *parent, 368 struct witness *child); 369 static void witness_list_lock(struct lock_instance *instance, 370 int (*prnt)(const char *fmt, ...)); 371 static void witness_setflag(struct lock_object *lock, int flag, int set); 372 373 /* 374 * If set to 0, lock order checking is disabled. If set to -1, 375 * witness is completely disabled. Otherwise witness performs full 376 * lock order checking for all locks. At runtime, lock order checking 377 * may be toggled. However, witness cannot be reenabled once it is 378 * completely disabled. 379 */ 380 #ifdef WITNESS_WATCH 381 static int witness_watch = 3; 382 #else 383 static int witness_watch = 2; 384 #endif 385 386 #ifdef WITNESS_LOCKTRACE 387 static int witness_locktrace = 1; 388 #else 389 static int witness_locktrace = 0; 390 #endif 391 392 int witness_count = WITNESS_COUNT; 393 394 static struct mutex w_mtx; 395 static struct rwlock w_ctlock = RWLOCK_INITIALIZER("w_ctlock"); 396 397 /* w_list */ 398 static struct witness_list w_free = SIMPLEQ_HEAD_INITIALIZER(w_free); 399 static struct witness_list w_all = SIMPLEQ_HEAD_INITIALIZER(w_all); 400 401 /* w_typelist */ 402 static struct witness_list w_spin = SIMPLEQ_HEAD_INITIALIZER(w_spin); 403 static struct witness_list w_sleep = SIMPLEQ_HEAD_INITIALIZER(w_sleep); 404 405 /* lock list */ 406 static struct lock_list_entry *w_lock_list_free = NULL; 407 static struct witness_pendhelp pending_locks[WITNESS_PENDLIST]; 408 static u_int pending_cnt; 409 410 static int w_free_cnt, w_spin_cnt, w_sleep_cnt; 411 412 static struct witness *w_data; 413 static uint8_t **w_rmatrix; 414 static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT]; 415 static struct witness_hash w_hash; /* The witness hash table. */ 416 417 /* The lock order data hash */ 418 static struct witness_lock_order_data w_lodata[WITNESS_LO_DATA_COUNT]; 419 static struct witness_lock_order_data *w_lofree = NULL; 420 static struct witness_lock_order_hash w_lohash; 421 static int w_max_used_index = 0; 422 static unsigned int w_generation = 0; 423 424 static union lock_stack *w_lock_stack_free; 425 static unsigned int w_lock_stack_num; 426 427 static struct lock_class lock_class_kernel_lock = { 428 .lc_name = "kernel_lock", 429 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_SLEEPABLE 430 }; 431 432 static struct lock_class lock_class_sched_lock = { 433 .lc_name = "sched_lock", 434 .lc_flags = LC_SPINLOCK | LC_RECURSABLE 435 }; 436 437 static struct lock_class lock_class_mutex = { 438 .lc_name = "mutex", 439 .lc_flags = LC_SPINLOCK 440 }; 441 442 static struct lock_class lock_class_rwlock = { 443 .lc_name = "rwlock", 444 .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_UPGRADABLE 445 }; 446 447 static struct lock_class lock_class_rrwlock = { 448 .lc_name = "rrwlock", 449 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_SLEEPABLE | 450 LC_UPGRADABLE 451 }; 452 453 static struct lock_class *lock_classes[] = { 454 &lock_class_kernel_lock, 455 &lock_class_sched_lock, 456 &lock_class_mutex, 457 &lock_class_rwlock, 458 &lock_class_rrwlock, 459 }; 460 461 /* 462 * This global is set to 0 once it becomes safe to use the witness code. 463 */ 464 static int witness_cold = 1; 465 466 /* 467 * This global is set to 1 once the static lock orders have been enrolled 468 * so that a warning can be issued for any spin locks enrolled later. 469 */ 470 static int witness_spin_warn = 0; 471 472 /* 473 * The WITNESS-enabled diagnostic code. Note that the witness code does 474 * assume that the early boot is single-threaded at least until after this 475 * routine is completed. 476 */ 477 void 478 witness_initialize(void) 479 { 480 struct lock_object *lock; 481 union lock_stack *stacks; 482 struct witness *w; 483 int i, s; 484 485 w_data = (void *)uvm_pageboot_alloc(sizeof(struct witness) * 486 witness_count); 487 memset(w_data, 0, sizeof(struct witness) * witness_count); 488 489 w_rmatrix = (void *)uvm_pageboot_alloc(sizeof(*w_rmatrix) * 490 (witness_count + 1)); 491 492 for (i = 0; i < witness_count + 1; i++) { 493 w_rmatrix[i] = (void *)uvm_pageboot_alloc( 494 sizeof(*w_rmatrix[i]) * (witness_count + 1)); 495 memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) * 496 (witness_count + 1)); 497 } 498 499 mtx_init_flags(&w_mtx, IPL_HIGH, "witness lock", MTX_NOWITNESS); 500 for (i = witness_count - 1; i >= 0; i--) { 501 w = &w_data[i]; 502 memset(w, 0, sizeof(*w)); 503 w_data[i].w_index = i; /* Witness index never changes. */ 504 witness_free(w); 505 } 506 KASSERTMSG(SIMPLEQ_FIRST(&w_free)->w_index == 0, 507 "%s: Invalid list of free witness objects", __func__); 508 509 /* Witness with index 0 is not used to aid in debugging. */ 510 SIMPLEQ_REMOVE_HEAD(&w_free, w_list); 511 w_free_cnt--; 512 513 for (i = 0; i < witness_count; i++) { 514 memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) * 515 (witness_count + 1)); 516 } 517 518 if (witness_locktrace) { 519 w_lock_stack_num = LOCK_CHILDCOUNT * LOCK_NCHILDREN; 520 stacks = (void *)uvm_pageboot_alloc(sizeof(*stacks) * 521 w_lock_stack_num); 522 } 523 524 s = splhigh(); 525 for (i = 0; i < w_lock_stack_num; i++) 526 witness_lock_stack_free(&stacks[i]); 527 for (i = 0; i < LOCK_CHILDCOUNT; i++) 528 witness_lock_list_free(&w_locklistdata[i]); 529 splx(s); 530 witness_init_hash_tables(); 531 witness_spin_warn = 1; 532 533 /* Iterate through all locks and add them to witness. */ 534 for (i = 0; pending_locks[i].wh_lock != NULL; i++) { 535 lock = pending_locks[i].wh_lock; 536 KASSERTMSG(lock->lo_flags & LO_WITNESS, 537 "%s: lock %s is on pending list but not LO_WITNESS", 538 __func__, lock->lo_name); 539 lock->lo_witness = enroll(pending_locks[i].wh_type, 540 lock->lo_name, LOCK_CLASS(lock)); 541 } 542 543 /* Mark the witness code as being ready for use. */ 544 witness_cold = 0; 545 } 546 547 void 548 witness_init(struct lock_object *lock, const struct lock_type *type) 549 { 550 struct lock_class *class; 551 552 /* Various sanity checks. */ 553 class = LOCK_CLASS(lock); 554 if ((lock->lo_flags & LO_RECURSABLE) != 0 && 555 (class->lc_flags & LC_RECURSABLE) == 0) 556 panic("%s: lock (%s) %s can not be recursable", 557 __func__, class->lc_name, lock->lo_name); 558 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 559 (class->lc_flags & LC_SLEEPABLE) == 0) 560 panic("%s: lock (%s) %s can not be sleepable", 561 __func__, class->lc_name, lock->lo_name); 562 if ((lock->lo_flags & LO_UPGRADABLE) != 0 && 563 (class->lc_flags & LC_UPGRADABLE) == 0) 564 panic("%s: lock (%s) %s can not be upgradable", 565 __func__, class->lc_name, lock->lo_name); 566 567 /* 568 * If we shouldn't watch this lock, then just clear lo_witness. 569 * Record the type in case the lock becomes watched later. 570 * Otherwise, if witness_cold is set, then it is too early to 571 * enroll this lock, so defer it to witness_initialize() by adding 572 * it to the pending_locks list. If it is not too early, then enroll 573 * the lock now. 574 */ 575 if (witness_watch < 1 || panicstr != NULL || db_active || 576 (lock->lo_flags & LO_WITNESS) == 0) { 577 lock->lo_witness = NULL; 578 lock->lo_type = type; 579 } else if (witness_cold) { 580 pending_locks[pending_cnt].wh_lock = lock; 581 pending_locks[pending_cnt++].wh_type = type; 582 if (pending_cnt > WITNESS_PENDLIST) 583 panic("%s: pending locks list is too small, " 584 "increase WITNESS_PENDLIST", 585 __func__); 586 } else 587 lock->lo_witness = enroll(type, lock->lo_name, class); 588 } 589 590 static inline int 591 is_kernel_lock(const struct lock_object *lock) 592 { 593 #ifdef MULTIPROCESSOR 594 return (lock == &kernel_lock.mpl_lock_obj); 595 #else 596 return (0); 597 #endif 598 } 599 600 #ifdef DDB 601 static void 602 witness_ddb_compute_levels(void) 603 { 604 struct witness *w; 605 606 /* 607 * First clear all levels. 608 */ 609 SIMPLEQ_FOREACH(w, &w_all, w_list) 610 w->w_ddb_level = -1; 611 612 /* 613 * Look for locks with no parents and level all their descendants. 614 */ 615 SIMPLEQ_FOREACH(w, &w_all, w_list) { 616 617 /* If the witness has ancestors (is not a root), skip it. */ 618 if (w->w_num_ancestors > 0) 619 continue; 620 witness_ddb_level_descendants(w, 0); 621 } 622 } 623 624 static void 625 witness_ddb_level_descendants(struct witness *w, int l) 626 { 627 int i; 628 629 if (w->w_ddb_level >= l) 630 return; 631 632 w->w_ddb_level = l; 633 l++; 634 635 for (i = 1; i <= w_max_used_index; i++) { 636 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) 637 witness_ddb_level_descendants(&w_data[i], l); 638 } 639 } 640 641 static void 642 witness_ddb_display_descendants(int(*prnt)(const char *fmt, ...), 643 struct witness *w, int indent) 644 { 645 int i; 646 647 for (i = 0; i < indent; i++) 648 prnt(" "); 649 prnt("%s (type: %s, depth: %d)", 650 w->w_type->lt_name, w->w_class->lc_name, w->w_ddb_level); 651 if (w->w_displayed) { 652 prnt(" -- (already displayed)\n"); 653 return; 654 } 655 w->w_displayed = 1; 656 if (!w->w_acquired) 657 prnt(" -- never acquired\n"); 658 else 659 prnt("\n"); 660 indent++; 661 WITNESS_INDEX_ASSERT(w->w_index); 662 for (i = 1; i <= w_max_used_index; i++) { 663 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) 664 witness_ddb_display_descendants(prnt, &w_data[i], 665 indent); 666 } 667 } 668 669 static void 670 witness_ddb_display_list(int(*prnt)(const char *fmt, ...), 671 struct witness_list *list) 672 { 673 struct witness *w; 674 675 SIMPLEQ_FOREACH(w, list, w_typelist) { 676 if (!w->w_acquired || w->w_ddb_level > 0) 677 continue; 678 679 /* This lock has no anscestors - display its descendants. */ 680 witness_ddb_display_descendants(prnt, w, 0); 681 } 682 } 683 684 static void 685 witness_ddb_display(int(*prnt)(const char *fmt, ...)) 686 { 687 struct witness *w; 688 689 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 690 witness_ddb_compute_levels(); 691 692 /* Clear all the displayed flags. */ 693 SIMPLEQ_FOREACH(w, &w_all, w_list) 694 w->w_displayed = 0; 695 696 /* 697 * First, handle sleep locks which have been acquired at least 698 * once. 699 */ 700 prnt("Sleep locks:\n"); 701 witness_ddb_display_list(prnt, &w_sleep); 702 703 /* 704 * Now do spin locks which have been acquired at least once. 705 */ 706 prnt("\nSpin locks:\n"); 707 witness_ddb_display_list(prnt, &w_spin); 708 709 /* 710 * Finally, any locks which have not been acquired yet. 711 */ 712 prnt("\nLocks which were never acquired:\n"); 713 SIMPLEQ_FOREACH(w, &w_all, w_list) { 714 if (w->w_acquired) 715 continue; 716 prnt("%s (type: %s, depth: %d)\n", w->w_type->lt_name, 717 w->w_class->lc_name, w->w_ddb_level); 718 } 719 } 720 #endif /* DDB */ 721 722 int 723 witness_defineorder(struct lock_object *lock1, struct lock_object *lock2) 724 { 725 726 if (witness_watch < 0 || panicstr != NULL || db_active) 727 return (0); 728 729 /* Require locks that witness knows about. */ 730 if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL || 731 lock2->lo_witness == NULL) 732 return (EINVAL); 733 734 MUTEX_ASSERT_UNLOCKED(&w_mtx); 735 mtx_enter(&w_mtx); 736 737 /* 738 * If we already have either an explicit or implied lock order that 739 * is the other way around, then return an error. 740 */ 741 if (witness_watch && 742 isitmydescendant(lock2->lo_witness, lock1->lo_witness)) { 743 mtx_leave(&w_mtx); 744 return (EINVAL); 745 } 746 747 /* Try to add the new order. */ 748 itismychild(lock1->lo_witness, lock2->lo_witness); 749 mtx_leave(&w_mtx); 750 return (0); 751 } 752 753 void 754 witness_checkorder(struct lock_object *lock, int flags, 755 struct lock_object *interlock) 756 { 757 struct lock_list_entry *lock_list, *lle; 758 struct lock_instance *lock1, *lock2, *plock; 759 struct lock_class *class, *iclass; 760 struct proc *p; 761 struct witness *w, *w1; 762 int i, j, s; 763 764 if (witness_cold || witness_watch < 1 || panicstr != NULL || 765 db_active || (lock->lo_flags & LO_WITNESS) == 0) 766 return; 767 768 w = lock->lo_witness; 769 class = LOCK_CLASS(lock); 770 771 if (w == NULL) 772 w = lock->lo_witness = 773 enroll(lock->lo_type, lock->lo_name, class); 774 775 p = curproc; 776 777 if (class->lc_flags & LC_SLEEPLOCK) { 778 /* 779 * Since spin locks include a critical section, this check 780 * implicitly enforces a lock order of all sleep locks before 781 * all spin locks. 782 */ 783 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 784 if (lock_list != NULL && lock_list->ll_count > 0) { 785 panic("acquiring blockable sleep lock with " 786 "spinlock or critical section held (%s) %s", 787 class->lc_name, lock->lo_name); 788 } 789 790 /* 791 * If this is the first lock acquired then just return as 792 * no order checking is needed. 793 */ 794 lock_list = p->p_sleeplocks; 795 if (lock_list == NULL || lock_list->ll_count == 0) 796 return; 797 } else { 798 799 /* 800 * If this is the first lock, just return as no order 801 * checking is needed. 802 */ 803 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 804 if (lock_list == NULL || lock_list->ll_count == 0) 805 return; 806 } 807 808 s = splhigh(); 809 810 /* 811 * Check to see if we are recursing on a lock we already own. If 812 * so, make sure that we don't mismatch exclusive and shared lock 813 * acquires. 814 */ 815 lock1 = find_instance(lock_list, lock); 816 if (lock1 != NULL) { 817 if ((lock1->li_flags & LI_EXCLUSIVE) != 0 && 818 (flags & LOP_EXCLUSIVE) == 0) { 819 printf("witness: shared lock of (%s) %s " 820 "while exclusively locked\n", 821 class->lc_name, lock->lo_name); 822 panic("excl->share"); 823 } 824 if ((lock1->li_flags & LI_EXCLUSIVE) == 0 && 825 (flags & LOP_EXCLUSIVE) != 0) { 826 printf("witness: exclusive lock of (%s) %s " 827 "while share locked\n", 828 class->lc_name, lock->lo_name); 829 panic("share->excl"); 830 } 831 goto out_splx; 832 } 833 834 /* Warn if the interlock is not locked exactly once. */ 835 if (interlock != NULL) { 836 iclass = LOCK_CLASS(interlock); 837 lock1 = find_instance(lock_list, interlock); 838 if (lock1 == NULL) 839 panic("interlock (%s) %s not locked", 840 iclass->lc_name, interlock->lo_name); 841 else if ((lock1->li_flags & LI_RECURSEMASK) != 0) 842 panic("interlock (%s) %s recursed", 843 iclass->lc_name, interlock->lo_name); 844 } 845 846 /* 847 * Find the previously acquired lock, but ignore interlocks. 848 */ 849 plock = &lock_list->ll_children[lock_list->ll_count - 1]; 850 if (interlock != NULL && plock->li_lock == interlock) { 851 if (lock_list->ll_count > 1) 852 plock = 853 &lock_list->ll_children[lock_list->ll_count - 2]; 854 else { 855 lle = lock_list->ll_next; 856 857 /* 858 * The interlock is the only lock we hold, so 859 * simply return. 860 */ 861 if (lle == NULL) 862 goto out_splx; 863 plock = &lle->ll_children[lle->ll_count - 1]; 864 } 865 } 866 867 /* 868 * Try to perform most checks without a lock. If this succeeds we 869 * can skip acquiring the lock and return success. Otherwise we redo 870 * the check with the lock held to handle races with concurrent updates. 871 */ 872 w1 = plock->li_lock->lo_witness; 873 if (witness_lock_order_check(w1, w)) 874 goto out_splx; 875 876 mtx_enter(&w_mtx); 877 if (witness_lock_order_check(w1, w)) 878 goto out; 879 880 witness_lock_order_add(w1, w); 881 882 /* 883 * Check for duplicate locks of the same type. Note that we only 884 * have to check for this on the last lock we just acquired. Any 885 * other cases will be caught as lock order violations. 886 */ 887 if (w1 == w) { 888 i = w->w_index; 889 if (!(lock->lo_flags & LO_DUPOK) && !(flags & LOP_DUPOK) && 890 !(w_rmatrix[i][i] & WITNESS_REVERSAL)) { 891 w_rmatrix[i][i] |= WITNESS_REVERSAL; 892 w->w_reversed = 1; 893 mtx_leave(&w_mtx); 894 printf("witness: acquiring duplicate lock of " 895 "same type: \"%s\"\n", w->w_type->lt_name); 896 printf(" 1st %s\n", plock->li_lock->lo_name); 897 printf(" 2nd %s\n", lock->lo_name); 898 witness_debugger(1); 899 } else 900 mtx_leave(&w_mtx); 901 goto out_splx; 902 } 903 MUTEX_ASSERT_LOCKED(&w_mtx); 904 905 /* 906 * If we know that the lock we are acquiring comes after 907 * the lock we most recently acquired in the lock order tree, 908 * then there is no need for any further checks. 909 */ 910 if (isitmychild(w1, w)) 911 goto out; 912 913 for (j = 0, lle = lock_list; lle != NULL; lle = lle->ll_next) { 914 for (i = lle->ll_count - 1; i >= 0; i--, j++) { 915 916 KASSERT(j < LOCK_CHILDCOUNT * LOCK_NCHILDREN); 917 lock1 = &lle->ll_children[i]; 918 919 /* 920 * Ignore the interlock. 921 */ 922 if (interlock == lock1->li_lock) 923 continue; 924 925 /* 926 * If this lock doesn't undergo witness checking, 927 * then skip it. 928 */ 929 w1 = lock1->li_lock->lo_witness; 930 if (w1 == NULL) { 931 KASSERTMSG((lock1->li_lock->lo_flags & 932 LO_WITNESS) == 0, 933 "lock missing witness structure"); 934 continue; 935 } 936 937 /* 938 * If we are locking Giant and this is a sleepable 939 * lock, then skip it. 940 */ 941 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 && 942 is_kernel_lock(lock)) 943 continue; 944 945 /* 946 * If we are locking a sleepable lock and this lock 947 * is Giant, then skip it. 948 */ 949 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 950 is_kernel_lock(lock1->li_lock)) 951 continue; 952 953 /* 954 * If we are locking a sleepable lock and this lock 955 * isn't sleepable, we want to treat it as a lock 956 * order violation to enfore a general lock order of 957 * sleepable locks before non-sleepable locks. 958 */ 959 if (((lock->lo_flags & LO_SLEEPABLE) != 0 && 960 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 961 goto reversal; 962 963 /* 964 * If we are locking Giant and this is a non-sleepable 965 * lock, then treat it as a reversal. 966 */ 967 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 && 968 is_kernel_lock(lock)) 969 goto reversal; 970 971 /* 972 * Check the lock order hierarchy for a reveresal. 973 */ 974 if (!isitmydescendant(w, w1)) 975 continue; 976 reversal: 977 978 /* 979 * We have a lock order violation, check to see if it 980 * is allowed or has already been yelled about. 981 */ 982 983 /* Bail if this violation is known */ 984 if (w_rmatrix[w1->w_index][w->w_index] & WITNESS_REVERSAL) 985 goto out; 986 987 /* Record this as a violation */ 988 w_rmatrix[w1->w_index][w->w_index] |= WITNESS_REVERSAL; 989 w_rmatrix[w->w_index][w1->w_index] |= WITNESS_REVERSAL; 990 w->w_reversed = w1->w_reversed = 1; 991 witness_increment_graph_generation(); 992 mtx_leave(&w_mtx); 993 994 /* 995 * There are known LORs between VNODE locks. They are 996 * not an indication of a bug. VNODE locks are flagged 997 * as such (LO_IS_VNODE) and we don't yell if the LOR 998 * is between 2 VNODE locks. 999 */ 1000 if ((lock->lo_flags & LO_IS_VNODE) != 0 && 1001 (lock1->li_lock->lo_flags & LO_IS_VNODE) != 0) 1002 goto out_splx; 1003 1004 /* 1005 * Ok, yell about it. 1006 */ 1007 printf("witness: "); 1008 if (((lock->lo_flags & LO_SLEEPABLE) != 0 && 1009 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 1010 printf("lock order reversal: " 1011 "(sleepable after non-sleepable)\n"); 1012 else if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 1013 && is_kernel_lock(lock)) 1014 printf("lock order reversal: " 1015 "(Giant after non-sleepable)\n"); 1016 else 1017 printf("lock order reversal:\n"); 1018 1019 /* 1020 * Try to locate an earlier lock with 1021 * witness w in our list. 1022 */ 1023 do { 1024 lock2 = &lle->ll_children[i]; 1025 KASSERT(lock2->li_lock != NULL); 1026 if (lock2->li_lock->lo_witness == w) 1027 break; 1028 if (i == 0 && lle->ll_next != NULL) { 1029 lle = lle->ll_next; 1030 i = lle->ll_count - 1; 1031 KASSERT(i >= 0 && i < LOCK_NCHILDREN); 1032 } else 1033 i--; 1034 } while (i >= 0); 1035 if (i < 0) { 1036 printf(" 1st %p %s (%s)\n", 1037 lock1->li_lock, lock1->li_lock->lo_name, 1038 w1->w_type->lt_name); 1039 printf(" 2nd %p %s (%s)\n", 1040 lock, lock->lo_name, w->w_type->lt_name); 1041 } else { 1042 printf(" 1st %p %s (%s)\n", 1043 lock2->li_lock, lock2->li_lock->lo_name, 1044 lock2->li_lock->lo_witness->w_type-> 1045 lt_name); 1046 printf(" 2nd %p %s (%s)\n", 1047 lock1->li_lock, lock1->li_lock->lo_name, 1048 w1->w_type->lt_name); 1049 printf(" 3rd %p %s (%s)\n", lock, 1050 lock->lo_name, w->w_type->lt_name); 1051 } 1052 if (witness_watch > 1) { 1053 struct witness_lock_order_data *wlod1, *wlod2; 1054 1055 mtx_enter(&w_mtx); 1056 wlod1 = witness_lock_order_get(w, w1); 1057 wlod2 = witness_lock_order_get(w1, w); 1058 mtx_leave(&w_mtx); 1059 1060 /* 1061 * It is safe to access saved stack traces, 1062 * w_type, and w_class without the lock. 1063 * Once written, they never change. 1064 */ 1065 1066 if (wlod1 != NULL) { 1067 printf("lock order \"%s\"(%s) -> " 1068 "\"%s\"(%s) first seen at:\n", 1069 w->w_type->lt_name, 1070 w->w_class->lc_name, 1071 w1->w_type->lt_name, 1072 w1->w_class->lc_name); 1073 stacktrace_print( 1074 &wlod1->wlod_stack, printf); 1075 } 1076 if (wlod2 != NULL) { 1077 printf("lock order \"%s\"(%s) -> " 1078 "\"%s\"(%s) first seen at:\n", 1079 w1->w_type->lt_name, 1080 w1->w_class->lc_name, 1081 w->w_type->lt_name, 1082 w->w_class->lc_name); 1083 stacktrace_print( 1084 &wlod2->wlod_stack, printf); 1085 } 1086 } 1087 witness_debugger(0); 1088 goto out_splx; 1089 } 1090 } 1091 1092 /* 1093 * If requested, build a new lock order. However, don't build a new 1094 * relationship between a sleepable lock and Giant if it is in the 1095 * wrong direction. The correct lock order is that sleepable locks 1096 * always come before Giant. 1097 */ 1098 if (flags & LOP_NEWORDER && 1099 !(is_kernel_lock(plock->li_lock) && 1100 (lock->lo_flags & LO_SLEEPABLE) != 0)) 1101 itismychild(plock->li_lock->lo_witness, w); 1102 out: 1103 mtx_leave(&w_mtx); 1104 out_splx: 1105 splx(s); 1106 } 1107 1108 void 1109 witness_lock(struct lock_object *lock, int flags) 1110 { 1111 struct lock_list_entry **lock_list, *lle; 1112 struct lock_instance *instance; 1113 struct proc *p; 1114 struct witness *w; 1115 int s; 1116 1117 if (witness_cold || witness_watch < 0 || panicstr != NULL || 1118 db_active || (lock->lo_flags & LO_WITNESS) == 0) 1119 return; 1120 1121 w = lock->lo_witness; 1122 if (w == NULL) 1123 w = lock->lo_witness = 1124 enroll(lock->lo_type, lock->lo_name, LOCK_CLASS(lock)); 1125 1126 p = curproc; 1127 1128 /* Determine lock list for this lock. */ 1129 if (LOCK_CLASS(lock)->lc_flags & LC_SLEEPLOCK) 1130 lock_list = &p->p_sleeplocks; 1131 else 1132 lock_list = &witness_cpu[cpu_number()].wc_spinlocks; 1133 1134 s = splhigh(); 1135 1136 /* Check to see if we are recursing on a lock we already own. */ 1137 instance = find_instance(*lock_list, lock); 1138 if (instance != NULL) { 1139 instance->li_flags++; 1140 goto out; 1141 } 1142 1143 w->w_acquired = 1; 1144 1145 /* Find the next open lock instance in the list and fill it. */ 1146 lle = *lock_list; 1147 if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) { 1148 lle = witness_lock_list_get(); 1149 if (lle == NULL) 1150 goto out; 1151 lle->ll_next = *lock_list; 1152 *lock_list = lle; 1153 } 1154 instance = &lle->ll_children[lle->ll_count++]; 1155 instance->li_lock = lock; 1156 if ((flags & LOP_EXCLUSIVE) != 0) 1157 instance->li_flags = LI_EXCLUSIVE; 1158 else 1159 instance->li_flags = 0; 1160 instance->li_stack = NULL; 1161 if (witness_locktrace) { 1162 instance->li_stack = witness_lock_stack_get(); 1163 if (instance->li_stack != NULL) 1164 stacktrace_save(&instance->li_stack->ls_stack); 1165 } 1166 out: 1167 splx(s); 1168 } 1169 1170 void 1171 witness_upgrade(struct lock_object *lock, int flags) 1172 { 1173 struct lock_instance *instance; 1174 struct lock_class *class; 1175 int s; 1176 1177 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 1178 if (lock->lo_witness == NULL || witness_watch < 0 || 1179 panicstr != NULL || db_active) 1180 return; 1181 class = LOCK_CLASS(lock); 1182 if (witness_watch) { 1183 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1184 panic("upgrade of non-upgradable lock (%s) %s", 1185 class->lc_name, lock->lo_name); 1186 if ((class->lc_flags & LC_SLEEPLOCK) == 0) 1187 panic("upgrade of non-sleep lock (%s) %s", 1188 class->lc_name, lock->lo_name); 1189 } 1190 s = splhigh(); 1191 instance = find_instance(curproc->p_sleeplocks, lock); 1192 if (instance == NULL) { 1193 panic("upgrade of unlocked lock (%s) %s", 1194 class->lc_name, lock->lo_name); 1195 goto out; 1196 } 1197 if (witness_watch) { 1198 if ((instance->li_flags & LI_EXCLUSIVE) != 0) 1199 panic("upgrade of exclusive lock (%s) %s", 1200 class->lc_name, lock->lo_name); 1201 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1202 panic("upgrade of recursed lock (%s) %s r=%d", 1203 class->lc_name, lock->lo_name, 1204 instance->li_flags & LI_RECURSEMASK); 1205 } 1206 instance->li_flags |= LI_EXCLUSIVE; 1207 out: 1208 splx(s); 1209 } 1210 1211 void 1212 witness_downgrade(struct lock_object *lock, int flags) 1213 { 1214 struct lock_instance *instance; 1215 struct lock_class *class; 1216 int s; 1217 1218 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 1219 if (lock->lo_witness == NULL || witness_watch < 0 || 1220 panicstr != NULL || db_active) 1221 return; 1222 class = LOCK_CLASS(lock); 1223 if (witness_watch) { 1224 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1225 panic( 1226 "downgrade of non-upgradable lock (%s) %s", 1227 class->lc_name, lock->lo_name); 1228 if ((class->lc_flags & LC_SLEEPLOCK) == 0) 1229 panic("downgrade of non-sleep lock (%s) %s", 1230 class->lc_name, lock->lo_name); 1231 } 1232 s = splhigh(); 1233 instance = find_instance(curproc->p_sleeplocks, lock); 1234 if (instance == NULL) { 1235 panic("downgrade of unlocked lock (%s) %s", 1236 class->lc_name, lock->lo_name); 1237 goto out; 1238 } 1239 if (witness_watch) { 1240 if ((instance->li_flags & LI_EXCLUSIVE) == 0) 1241 panic("downgrade of shared lock (%s) %s", 1242 class->lc_name, lock->lo_name); 1243 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1244 panic("downgrade of recursed lock (%s) %s r=%d", 1245 class->lc_name, lock->lo_name, 1246 instance->li_flags & LI_RECURSEMASK); 1247 } 1248 instance->li_flags &= ~LI_EXCLUSIVE; 1249 out: 1250 splx(s); 1251 } 1252 1253 void 1254 witness_unlock(struct lock_object *lock, int flags) 1255 { 1256 struct lock_list_entry **lock_list, *lle; 1257 struct lock_instance *instance; 1258 struct lock_class *class; 1259 struct proc *p; 1260 int i, j; 1261 int s; 1262 1263 if (witness_cold || lock->lo_witness == NULL || 1264 panicstr != NULL || db_active) 1265 return; 1266 p = curproc; 1267 class = LOCK_CLASS(lock); 1268 1269 /* Find lock instance associated with this lock. */ 1270 if (class->lc_flags & LC_SLEEPLOCK) 1271 lock_list = &p->p_sleeplocks; 1272 else 1273 lock_list = &witness_cpu[cpu_number()].wc_spinlocks; 1274 1275 s = splhigh(); 1276 1277 lle = *lock_list; 1278 for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next) 1279 for (i = 0; i < (*lock_list)->ll_count; i++) { 1280 instance = &(*lock_list)->ll_children[i]; 1281 if (instance->li_lock == lock) 1282 goto found; 1283 } 1284 1285 /* 1286 * When disabling WITNESS through witness_watch we could end up in 1287 * having registered locks in the p_sleeplocks queue. 1288 * We have to make sure we flush these queues, so just search for 1289 * eventual register locks and remove them. 1290 */ 1291 if (witness_watch > 0) { 1292 panic("lock (%s) %s not locked", class->lc_name, lock->lo_name); 1293 } 1294 goto out; 1295 1296 found: 1297 1298 /* First, check for shared/exclusive mismatches. */ 1299 if ((instance->li_flags & LI_EXCLUSIVE) != 0 && witness_watch > 0 && 1300 (flags & LOP_EXCLUSIVE) == 0) { 1301 printf("witness: shared unlock of (%s) %s " 1302 "while exclusively locked\n", 1303 class->lc_name, lock->lo_name); 1304 panic("excl->ushare"); 1305 } 1306 if ((instance->li_flags & LI_EXCLUSIVE) == 0 && witness_watch > 0 && 1307 (flags & LOP_EXCLUSIVE) != 0) { 1308 printf("witness: exclusive unlock of (%s) %s " 1309 "while share locked\n", class->lc_name, lock->lo_name); 1310 panic("share->uexcl"); 1311 } 1312 /* If we are recursed, unrecurse. */ 1313 if ((instance->li_flags & LI_RECURSEMASK) > 0) { 1314 instance->li_flags--; 1315 goto out; 1316 } 1317 /* The lock is now being dropped, check for NORELEASE flag */ 1318 if ((instance->li_flags & LI_NORELEASE) != 0 && witness_watch > 0) { 1319 printf("witness: forbidden unlock of (%s) %s\n", 1320 class->lc_name, lock->lo_name); 1321 panic("lock marked norelease"); 1322 } 1323 1324 /* Release the stack buffer, if any. */ 1325 if (instance->li_stack != NULL) { 1326 witness_lock_stack_free(instance->li_stack); 1327 instance->li_stack = NULL; 1328 } 1329 1330 /* Remove this item from the list. */ 1331 for (j = i; j < (*lock_list)->ll_count - 1; j++) 1332 (*lock_list)->ll_children[j] = 1333 (*lock_list)->ll_children[j + 1]; 1334 (*lock_list)->ll_count--; 1335 1336 /* 1337 * In order to reduce contention on w_mtx, we want to keep always an 1338 * head object into lists so that frequent allocation from the 1339 * free witness pool (and subsequent locking) is avoided. 1340 * In order to maintain the current code simple, when the head 1341 * object is totally unloaded it means also that we do not have 1342 * further objects in the list, so the list ownership needs to be 1343 * hand over to another object if the current head needs to be freed. 1344 */ 1345 if ((*lock_list)->ll_count == 0) { 1346 if (*lock_list == lle) { 1347 if (lle->ll_next == NULL) 1348 goto out; 1349 } else 1350 lle = *lock_list; 1351 *lock_list = lle->ll_next; 1352 witness_lock_list_free(lle); 1353 } 1354 out: 1355 splx(s); 1356 } 1357 1358 void 1359 witness_thread_exit(struct proc *p) 1360 { 1361 struct lock_list_entry *lle; 1362 int i, n, s; 1363 1364 lle = p->p_sleeplocks; 1365 if (lle == NULL || panicstr != NULL || db_active) 1366 return; 1367 if (lle->ll_count != 0) { 1368 for (n = 0; lle != NULL; lle = lle->ll_next) 1369 for (i = lle->ll_count - 1; i >= 0; i--) { 1370 if (n == 0) 1371 printf("witness: thread %p exiting " 1372 "with the following locks held:\n", 1373 p); 1374 n++; 1375 witness_list_lock(&lle->ll_children[i], 1376 printf); 1377 } 1378 panic("thread %p cannot exit while holding sleeplocks", p); 1379 } 1380 KASSERT(lle->ll_next == NULL); 1381 s = splhigh(); 1382 witness_lock_list_free(lle); 1383 splx(s); 1384 } 1385 1386 /* 1387 * Warn if any locks other than 'lock' are held. Flags can be passed in to 1388 * exempt Giant and sleepable locks from the checks as well. If any 1389 * non-exempt locks are held, then a supplied message is printed to the 1390 * output channel along with a list of the offending locks. If indicated in the 1391 * flags then a failure results in a panic as well. 1392 */ 1393 int 1394 witness_warn(int flags, struct lock_object *lock, const char *fmt, ...) 1395 { 1396 struct lock_list_entry *lock_list, *lle; 1397 struct lock_instance *lock1; 1398 struct proc *p; 1399 va_list ap; 1400 int i, n; 1401 1402 if (witness_cold || witness_watch < 1 || panicstr != NULL || db_active) 1403 return (0); 1404 n = 0; 1405 p = curproc; 1406 for (lle = p->p_sleeplocks; lle != NULL; lle = lle->ll_next) 1407 for (i = lle->ll_count - 1; i >= 0; i--) { 1408 lock1 = &lle->ll_children[i]; 1409 if (lock1->li_lock == lock) 1410 continue; 1411 if (flags & WARN_KERNELOK && 1412 is_kernel_lock(lock1->li_lock)) 1413 continue; 1414 if (flags & WARN_SLEEPOK && 1415 (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0) 1416 continue; 1417 if (n == 0) { 1418 printf("witness: "); 1419 va_start(ap, fmt); 1420 vprintf(fmt, ap); 1421 va_end(ap); 1422 printf(" with the following %slocks held:\n", 1423 (flags & WARN_SLEEPOK) != 0 ? 1424 "non-sleepable " : ""); 1425 } 1426 n++; 1427 witness_list_lock(lock1, printf); 1428 } 1429 1430 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 1431 if (lock_list != NULL && lock_list->ll_count != 0) { 1432 /* 1433 * We should only have one spinlock and as long as 1434 * the flags cannot match for this locks class, 1435 * check if the first spinlock is the one curproc 1436 * should hold. 1437 */ 1438 lock1 = &lock_list->ll_children[lock_list->ll_count - 1]; 1439 if (lock_list->ll_count == 1 && lock_list->ll_next == NULL && 1440 lock1->li_lock == lock && n == 0) 1441 return (0); 1442 1443 printf("witness: "); 1444 va_start(ap, fmt); 1445 vprintf(fmt, ap); 1446 va_end(ap); 1447 printf(" with the following %slocks held:\n", 1448 (flags & WARN_SLEEPOK) != 0 ? "non-sleepable " : ""); 1449 n += witness_list_locks(&lock_list, printf); 1450 } 1451 if (n > 0) { 1452 if (flags & WARN_PANIC) 1453 panic("%s", __func__); 1454 else 1455 witness_debugger(1); 1456 } 1457 return (n); 1458 } 1459 1460 static struct witness * 1461 enroll(const struct lock_type *type, const char *subtype, 1462 struct lock_class *lock_class) 1463 { 1464 struct witness *w; 1465 struct witness_list *typelist; 1466 1467 KASSERT(type != NULL); 1468 1469 if (witness_watch < 0 || panicstr != NULL || db_active) 1470 return (NULL); 1471 if ((lock_class->lc_flags & LC_SPINLOCK)) { 1472 typelist = &w_spin; 1473 } else if ((lock_class->lc_flags & LC_SLEEPLOCK)) { 1474 typelist = &w_sleep; 1475 } else { 1476 panic("lock class %s is not sleep or spin", 1477 lock_class->lc_name); 1478 return (NULL); 1479 } 1480 1481 mtx_enter(&w_mtx); 1482 w = witness_hash_get(type, subtype); 1483 if (w) 1484 goto found; 1485 if ((w = witness_get()) == NULL) 1486 return (NULL); 1487 w->w_type = type; 1488 w->w_subtype = subtype; 1489 w->w_class = lock_class; 1490 SIMPLEQ_INSERT_HEAD(&w_all, w, w_list); 1491 if (lock_class->lc_flags & LC_SPINLOCK) { 1492 SIMPLEQ_INSERT_HEAD(&w_spin, w, w_typelist); 1493 w_spin_cnt++; 1494 } else if (lock_class->lc_flags & LC_SLEEPLOCK) { 1495 SIMPLEQ_INSERT_HEAD(&w_sleep, w, w_typelist); 1496 w_sleep_cnt++; 1497 } 1498 1499 /* Insert new witness into the hash */ 1500 witness_hash_put(w); 1501 witness_increment_graph_generation(); 1502 mtx_leave(&w_mtx); 1503 return (w); 1504 found: 1505 mtx_leave(&w_mtx); 1506 if (lock_class != w->w_class) 1507 panic("lock (%s) %s does not match earlier (%s) lock", 1508 type->lt_name, lock_class->lc_name, w->w_class->lc_name); 1509 return (w); 1510 } 1511 1512 static void 1513 adopt(struct witness *parent, struct witness *child) 1514 { 1515 int pi, ci, i, j; 1516 1517 if (witness_cold == 0) 1518 MUTEX_ASSERT_LOCKED(&w_mtx); 1519 1520 /* If the relationship is already known, there's no work to be done. */ 1521 if (isitmychild(parent, child)) 1522 return; 1523 1524 /* When the structure of the graph changes, bump up the generation. */ 1525 witness_increment_graph_generation(); 1526 1527 /* 1528 * The hard part ... create the direct relationship, then propagate all 1529 * indirect relationships. 1530 */ 1531 pi = parent->w_index; 1532 ci = child->w_index; 1533 WITNESS_INDEX_ASSERT(pi); 1534 WITNESS_INDEX_ASSERT(ci); 1535 KASSERT(pi != ci); 1536 w_rmatrix[pi][ci] |= WITNESS_PARENT; 1537 w_rmatrix[ci][pi] |= WITNESS_CHILD; 1538 1539 /* 1540 * If parent was not already an ancestor of child, 1541 * then we increment the descendant and ancestor counters. 1542 */ 1543 if ((w_rmatrix[pi][ci] & WITNESS_ANCESTOR) == 0) { 1544 parent->w_num_descendants++; 1545 child->w_num_ancestors++; 1546 } 1547 1548 /* 1549 * Find each ancestor of 'pi'. Note that 'pi' itself is counted as 1550 * an ancestor of 'pi' during this loop. 1551 */ 1552 for (i = 1; i <= w_max_used_index; i++) { 1553 if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 && 1554 (i != pi)) 1555 continue; 1556 1557 /* Find each descendant of 'i' and mark it as a descendant. */ 1558 for (j = 1; j <= w_max_used_index; j++) { 1559 1560 /* 1561 * Skip children that are already marked as 1562 * descendants of 'i'. 1563 */ 1564 if (w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) 1565 continue; 1566 1567 /* 1568 * We are only interested in descendants of 'ci'. Note 1569 * that 'ci' itself is counted as a descendant of 'ci'. 1570 */ 1571 if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 && 1572 (j != ci)) 1573 continue; 1574 w_rmatrix[i][j] |= WITNESS_ANCESTOR; 1575 w_rmatrix[j][i] |= WITNESS_DESCENDANT; 1576 w_data[i].w_num_descendants++; 1577 w_data[j].w_num_ancestors++; 1578 1579 /* 1580 * Make sure we aren't marking a node as both an 1581 * ancestor and descendant. We should have caught 1582 * this as a lock order reversal earlier. 1583 */ 1584 if ((w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) && 1585 (w_rmatrix[i][j] & WITNESS_DESCENDANT_MASK)) { 1586 printf("witness: rmatrix paradox! [%d][%d]=%d " 1587 "both ancestor and descendant\n", 1588 i, j, w_rmatrix[i][j]); 1589 #ifdef DDB 1590 db_stack_dump(); 1591 #endif 1592 printf("witness disabled\n"); 1593 witness_watch = -1; 1594 } 1595 if ((w_rmatrix[j][i] & WITNESS_ANCESTOR_MASK) && 1596 (w_rmatrix[j][i] & WITNESS_DESCENDANT_MASK)) { 1597 printf("witness: rmatrix paradox! [%d][%d]=%d " 1598 "both ancestor and descendant\n", 1599 j, i, w_rmatrix[j][i]); 1600 #ifdef DDB 1601 db_stack_dump(); 1602 #endif 1603 printf("witness disabled\n"); 1604 witness_watch = -1; 1605 } 1606 } 1607 } 1608 } 1609 1610 static void 1611 itismychild(struct witness *parent, struct witness *child) 1612 { 1613 int unlocked; 1614 1615 KASSERT(child != NULL && parent != NULL); 1616 if (witness_cold == 0) 1617 MUTEX_ASSERT_LOCKED(&w_mtx); 1618 1619 if (!witness_lock_type_equal(parent, child)) { 1620 if (witness_cold == 0) { 1621 unlocked = 1; 1622 mtx_leave(&w_mtx); 1623 } else { 1624 unlocked = 0; 1625 } 1626 panic( 1627 "%s: parent \"%s\" (%s) and child \"%s\" (%s) are not " 1628 "the same lock type", __func__, parent->w_type->lt_name, 1629 parent->w_class->lc_name, child->w_type->lt_name, 1630 child->w_class->lc_name); 1631 if (unlocked) 1632 mtx_enter(&w_mtx); 1633 } 1634 adopt(parent, child); 1635 } 1636 1637 /* 1638 * Generic code for the isitmy*() functions. The rmask parameter is the 1639 * expected relationship of w1 to w2. 1640 */ 1641 static int 1642 _isitmyx(struct witness *w1, struct witness *w2, int rmask, const char *fname) 1643 { 1644 unsigned char r1, r2; 1645 int i1, i2; 1646 1647 i1 = w1->w_index; 1648 i2 = w2->w_index; 1649 WITNESS_INDEX_ASSERT(i1); 1650 WITNESS_INDEX_ASSERT(i2); 1651 r1 = w_rmatrix[i1][i2] & WITNESS_RELATED_MASK; 1652 r2 = w_rmatrix[i2][i1] & WITNESS_RELATED_MASK; 1653 1654 /* The flags on one better be the inverse of the flags on the other */ 1655 if (!((WITNESS_ATOD(r1) == r2 && WITNESS_DTOA(r2) == r1) || 1656 (WITNESS_DTOA(r1) == r2 && WITNESS_ATOD(r2) == r1))) { 1657 /* Don't squawk if we're potentially racing with an update. */ 1658 if (w_mtx.mtx_owner != curcpu()) 1659 return (0); 1660 printf("witness: %s: rmatrix mismatch between %s (index %d) " 1661 "and %s (index %d): w_rmatrix[%d][%d] == %x but " 1662 "w_rmatrix[%d][%d] == %x\n", 1663 fname, w1->w_type->lt_name, i1, w2->w_type->lt_name, 1664 i2, i1, i2, r1, 1665 i2, i1, r2); 1666 #ifdef DDB 1667 db_stack_dump(); 1668 #endif 1669 printf("witness disabled\n"); 1670 witness_watch = -1; 1671 } 1672 return (r1 & rmask); 1673 } 1674 1675 /* 1676 * Checks if @child is a direct child of @parent. 1677 */ 1678 static int 1679 isitmychild(struct witness *parent, struct witness *child) 1680 { 1681 1682 return (_isitmyx(parent, child, WITNESS_PARENT, __func__)); 1683 } 1684 1685 /* 1686 * Checks if @descendant is a direct or inderect descendant of @ancestor. 1687 */ 1688 static int 1689 isitmydescendant(struct witness *ancestor, struct witness *descendant) 1690 { 1691 1692 return (_isitmyx(ancestor, descendant, WITNESS_ANCESTOR_MASK, 1693 __func__)); 1694 } 1695 1696 static struct witness * 1697 witness_get(void) 1698 { 1699 struct witness *w; 1700 int index; 1701 1702 if (witness_cold == 0) 1703 MUTEX_ASSERT_LOCKED(&w_mtx); 1704 1705 if (witness_watch < 0) { 1706 mtx_leave(&w_mtx); 1707 return (NULL); 1708 } 1709 if (SIMPLEQ_EMPTY(&w_free)) { 1710 witness_watch = -1; 1711 mtx_leave(&w_mtx); 1712 printf("WITNESS: unable to allocate a new witness object\n"); 1713 return (NULL); 1714 } 1715 w = SIMPLEQ_FIRST(&w_free); 1716 SIMPLEQ_REMOVE_HEAD(&w_free, w_list); 1717 w_free_cnt--; 1718 index = w->w_index; 1719 KASSERT(index > 0 && index == w_max_used_index + 1 && 1720 index < witness_count); 1721 memset(w, 0, sizeof(*w)); 1722 w->w_index = index; 1723 if (index > w_max_used_index) 1724 w_max_used_index = index; 1725 return (w); 1726 } 1727 1728 static void 1729 witness_free(struct witness *w) 1730 { 1731 1732 SIMPLEQ_INSERT_HEAD(&w_free, w, w_list); 1733 w_free_cnt++; 1734 } 1735 1736 static struct lock_list_entry * 1737 witness_lock_list_get(void) 1738 { 1739 struct lock_list_entry *lle; 1740 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1741 1742 if (witness_watch < 0) 1743 return (NULL); 1744 1745 splassert(IPL_HIGH); 1746 1747 if (wcpu->wc_lle_count > 0) { 1748 lle = wcpu->wc_lle_cache; 1749 wcpu->wc_lle_cache = lle->ll_next; 1750 wcpu->wc_lle_count--; 1751 memset(lle, 0, sizeof(*lle)); 1752 return (lle); 1753 } 1754 1755 mtx_enter(&w_mtx); 1756 lle = w_lock_list_free; 1757 if (lle == NULL) { 1758 witness_watch = -1; 1759 mtx_leave(&w_mtx); 1760 printf("%s: witness exhausted\n", __func__); 1761 return (NULL); 1762 } 1763 w_lock_list_free = lle->ll_next; 1764 mtx_leave(&w_mtx); 1765 memset(lle, 0, sizeof(*lle)); 1766 return (lle); 1767 } 1768 1769 static void 1770 witness_lock_list_free(struct lock_list_entry *lle) 1771 { 1772 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1773 1774 splassert(IPL_HIGH); 1775 1776 if (wcpu->wc_lle_count < WITNESS_LLE_CACHE_MAX) { 1777 lle->ll_next = wcpu->wc_lle_cache; 1778 wcpu->wc_lle_cache = lle; 1779 wcpu->wc_lle_count++; 1780 return; 1781 } 1782 1783 mtx_enter(&w_mtx); 1784 lle->ll_next = w_lock_list_free; 1785 w_lock_list_free = lle; 1786 mtx_leave(&w_mtx); 1787 } 1788 1789 static union lock_stack * 1790 witness_lock_stack_get(void) 1791 { 1792 union lock_stack *stack = NULL; 1793 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1794 1795 splassert(IPL_HIGH); 1796 1797 if (wcpu->wc_stk_count > 0) { 1798 stack = wcpu->wc_stk_cache; 1799 wcpu->wc_stk_cache = stack->ls_next; 1800 wcpu->wc_stk_count--; 1801 return (stack); 1802 } 1803 1804 mtx_enter(&w_mtx); 1805 if (w_lock_stack_free != NULL) { 1806 stack = w_lock_stack_free; 1807 w_lock_stack_free = stack->ls_next; 1808 } 1809 mtx_leave(&w_mtx); 1810 return (stack); 1811 } 1812 1813 static void 1814 witness_lock_stack_free(union lock_stack *stack) 1815 { 1816 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1817 1818 splassert(IPL_HIGH); 1819 1820 if (wcpu->wc_stk_count < WITNESS_STK_CACHE_MAX) { 1821 stack->ls_next = wcpu->wc_stk_cache; 1822 wcpu->wc_stk_cache = stack; 1823 wcpu->wc_stk_count++; 1824 return; 1825 } 1826 1827 mtx_enter(&w_mtx); 1828 stack->ls_next = w_lock_stack_free; 1829 w_lock_stack_free = stack; 1830 mtx_leave(&w_mtx); 1831 } 1832 1833 static struct lock_instance * 1834 find_instance(struct lock_list_entry *list, const struct lock_object *lock) 1835 { 1836 struct lock_list_entry *lle; 1837 struct lock_instance *instance; 1838 int i; 1839 1840 for (lle = list; lle != NULL; lle = lle->ll_next) { 1841 for (i = lle->ll_count - 1; i >= 0; i--) { 1842 instance = &lle->ll_children[i]; 1843 if (instance->li_lock == lock) 1844 return (instance); 1845 } 1846 } 1847 return (NULL); 1848 } 1849 1850 static void 1851 witness_list_lock(struct lock_instance *instance, 1852 int (*prnt)(const char *fmt, ...)) 1853 { 1854 struct lock_object *lock; 1855 1856 lock = instance->li_lock; 1857 prnt("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ? 1858 "exclusive" : "shared", LOCK_CLASS(lock)->lc_name, lock->lo_name); 1859 prnt(" r = %d (%p)\n", instance->li_flags & LI_RECURSEMASK, lock); 1860 if (instance->li_stack != NULL) 1861 stacktrace_print(&instance->li_stack->ls_stack, prnt); 1862 } 1863 1864 #ifdef DDB 1865 static int 1866 witness_thread_has_locks(struct proc *p) 1867 { 1868 1869 if (p->p_sleeplocks == NULL) 1870 return (0); 1871 return (p->p_sleeplocks->ll_count != 0); 1872 } 1873 1874 static int 1875 witness_process_has_locks(struct process *pr) 1876 { 1877 struct proc *p; 1878 1879 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { 1880 if (witness_thread_has_locks(p)) 1881 return (1); 1882 } 1883 return (0); 1884 } 1885 #endif 1886 1887 int 1888 witness_list_locks(struct lock_list_entry **lock_list, 1889 int (*prnt)(const char *fmt, ...)) 1890 { 1891 struct lock_list_entry *lle; 1892 int i, nheld; 1893 1894 nheld = 0; 1895 for (lle = *lock_list; lle != NULL; lle = lle->ll_next) 1896 for (i = lle->ll_count - 1; i >= 0; i--) { 1897 witness_list_lock(&lle->ll_children[i], prnt); 1898 nheld++; 1899 } 1900 return (nheld); 1901 } 1902 1903 /* 1904 * This is a bit risky at best. We call this function when we have timed 1905 * out acquiring a spin lock, and we assume that the other CPU is stuck 1906 * with this lock held. So, we go groveling around in the other CPU's 1907 * per-cpu data to try to find the lock instance for this spin lock to 1908 * see when it was last acquired. 1909 */ 1910 void 1911 witness_display_spinlock(struct lock_object *lock, struct proc *owner, 1912 int (*prnt)(const char *fmt, ...)) 1913 { 1914 struct lock_instance *instance; 1915 1916 if (owner->p_stat != SONPROC) 1917 return; 1918 instance = find_instance( 1919 witness_cpu[owner->p_cpu->ci_cpuid].wc_spinlocks, lock); 1920 if (instance != NULL) 1921 witness_list_lock(instance, prnt); 1922 } 1923 1924 void 1925 witness_assert(const struct lock_object *lock, int flags) 1926 { 1927 struct lock_instance *instance; 1928 struct lock_class *class; 1929 1930 if (lock->lo_witness == NULL || witness_watch < 1 || 1931 panicstr != NULL || db_active) 1932 return; 1933 class = LOCK_CLASS(lock); 1934 if ((class->lc_flags & LC_SLEEPLOCK) != 0) 1935 instance = find_instance(curproc->p_sleeplocks, lock); 1936 else if ((class->lc_flags & LC_SPINLOCK) != 0) 1937 instance = find_instance( 1938 witness_cpu[cpu_number()].wc_spinlocks, lock); 1939 else { 1940 panic("lock (%s) %s is not sleep or spin!", 1941 class->lc_name, lock->lo_name); 1942 return; 1943 } 1944 switch (flags) { 1945 case LA_UNLOCKED: 1946 if (instance != NULL) 1947 panic("lock (%s) %s locked", 1948 class->lc_name, lock->lo_name); 1949 break; 1950 case LA_LOCKED: 1951 case LA_LOCKED | LA_RECURSED: 1952 case LA_LOCKED | LA_NOTRECURSED: 1953 case LA_SLOCKED: 1954 case LA_SLOCKED | LA_RECURSED: 1955 case LA_SLOCKED | LA_NOTRECURSED: 1956 case LA_XLOCKED: 1957 case LA_XLOCKED | LA_RECURSED: 1958 case LA_XLOCKED | LA_NOTRECURSED: 1959 if (instance == NULL) { 1960 panic("lock (%s) %s not locked", 1961 class->lc_name, lock->lo_name); 1962 break; 1963 } 1964 if ((flags & LA_XLOCKED) != 0 && 1965 (instance->li_flags & LI_EXCLUSIVE) == 0) 1966 panic( 1967 "lock (%s) %s not exclusively locked", 1968 class->lc_name, lock->lo_name); 1969 if ((flags & LA_SLOCKED) != 0 && 1970 (instance->li_flags & LI_EXCLUSIVE) != 0) 1971 panic( 1972 "lock (%s) %s exclusively locked", 1973 class->lc_name, lock->lo_name); 1974 if ((flags & LA_RECURSED) != 0 && 1975 (instance->li_flags & LI_RECURSEMASK) == 0) 1976 panic("lock (%s) %s not recursed", 1977 class->lc_name, lock->lo_name); 1978 if ((flags & LA_NOTRECURSED) != 0 && 1979 (instance->li_flags & LI_RECURSEMASK) != 0) 1980 panic("lock (%s) %s recursed", 1981 class->lc_name, lock->lo_name); 1982 break; 1983 default: 1984 panic("invalid lock assertion"); 1985 1986 } 1987 } 1988 1989 static void 1990 witness_setflag(struct lock_object *lock, int flag, int set) 1991 { 1992 struct lock_list_entry *lock_list; 1993 struct lock_instance *instance; 1994 struct lock_class *class; 1995 1996 if (lock->lo_witness == NULL || witness_watch < 0 || 1997 panicstr != NULL || db_active) 1998 return; 1999 class = LOCK_CLASS(lock); 2000 if (class->lc_flags & LC_SLEEPLOCK) 2001 lock_list = curproc->p_sleeplocks; 2002 else 2003 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 2004 instance = find_instance(lock_list, lock); 2005 if (instance == NULL) { 2006 panic("%s: lock (%s) %s not locked", __func__, 2007 class->lc_name, lock->lo_name); 2008 return; 2009 } 2010 2011 if (set) 2012 instance->li_flags |= flag; 2013 else 2014 instance->li_flags &= ~flag; 2015 } 2016 2017 void 2018 witness_norelease(struct lock_object *lock) 2019 { 2020 2021 witness_setflag(lock, LI_NORELEASE, 1); 2022 } 2023 2024 void 2025 witness_releaseok(struct lock_object *lock) 2026 { 2027 2028 witness_setflag(lock, LI_NORELEASE, 0); 2029 } 2030 2031 #ifdef DDB 2032 static void 2033 witness_ddb_list(struct proc *p) 2034 { 2035 struct witness_cpu *wc = &witness_cpu[cpu_number()]; 2036 2037 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 2038 KASSERTMSG(db_active, "%s: not in the debugger", __func__); 2039 2040 if (witness_watch < 1) 2041 return; 2042 2043 witness_list_locks(&p->p_sleeplocks, db_printf); 2044 2045 /* 2046 * We only handle spinlocks if td == curproc. This is somewhat broken 2047 * if td is currently executing on some other CPU and holds spin locks 2048 * as we won't display those locks. If we had a MI way of getting 2049 * the per-cpu data for a given cpu then we could use 2050 * td->td_oncpu to get the list of spinlocks for this thread 2051 * and "fix" this. 2052 * 2053 * That still wouldn't really fix this unless we locked the scheduler 2054 * lock or stopped the other CPU to make sure it wasn't changing the 2055 * list out from under us. It is probably best to just not try to 2056 * handle threads on other CPU's for now. 2057 */ 2058 if (p == curproc && wc->wc_spinlocks != NULL) 2059 witness_list_locks(&wc->wc_spinlocks, db_printf); 2060 } 2061 2062 void 2063 db_witness_list(db_expr_t addr, int have_addr, db_expr_t count, char *modif) 2064 { 2065 struct proc *p; 2066 2067 if (have_addr) 2068 p = (struct proc *)addr; 2069 else 2070 p = curproc; 2071 witness_ddb_list(p); 2072 } 2073 2074 void 2075 db_witness_list_all(db_expr_t addr, int have_addr, db_expr_t count, char *modif) 2076 { 2077 CPU_INFO_ITERATOR cii; 2078 struct cpu_info *ci; 2079 struct lock_list_entry *lock_list; 2080 struct process *pr; 2081 struct proc *p; 2082 2083 CPU_INFO_FOREACH(cii, ci) { 2084 lock_list = witness_cpu[CPU_INFO_UNIT(ci)].wc_spinlocks; 2085 if (lock_list == NULL || lock_list->ll_count == 0) 2086 continue; 2087 db_printf("CPU %d:\n", CPU_INFO_UNIT(ci)); 2088 witness_list_locks(&lock_list, db_printf); 2089 } 2090 2091 /* 2092 * It would be nice to list only threads and processes that actually 2093 * held sleep locks, but that information is currently not exported 2094 * by WITNESS. 2095 */ 2096 LIST_FOREACH(pr, &allprocess, ps_list) { 2097 if (!witness_process_has_locks(pr)) 2098 continue; 2099 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { 2100 if (!witness_thread_has_locks(p)) 2101 continue; 2102 db_printf("Process %d (%s) thread %p (%d)\n", 2103 pr->ps_pid, pr->ps_comm, p, p->p_tid); 2104 witness_ddb_list(p); 2105 } 2106 } 2107 } 2108 2109 void 2110 witness_print_badstacks(void) 2111 { 2112 static struct witness tmp_w1, tmp_w2; 2113 static struct witness_lock_order_data tmp_data1, tmp_data2; 2114 struct witness_lock_order_data *data1, *data2; 2115 struct witness *w1, *w2; 2116 int error, generation, i, j; 2117 2118 if (witness_watch < 1) { 2119 db_printf("witness watch is disabled\n"); 2120 return; 2121 } 2122 if (witness_cold) { 2123 db_printf("witness is cold\n"); 2124 return; 2125 } 2126 error = 0; 2127 2128 memset(&tmp_w1, 0, sizeof(tmp_w1)); 2129 memset(&tmp_w2, 0, sizeof(tmp_w2)); 2130 memset(&tmp_data1, 0, sizeof(tmp_data1)); 2131 memset(&tmp_data2, 0, sizeof(tmp_data2)); 2132 2133 restart: 2134 mtx_enter(&w_mtx); 2135 generation = w_generation; 2136 mtx_leave(&w_mtx); 2137 db_printf("Number of known direct relationships is %d\n", 2138 w_lohash.wloh_count); 2139 for (i = 1; i < w_max_used_index; i++) { 2140 mtx_enter(&w_mtx); 2141 if (generation != w_generation) { 2142 mtx_leave(&w_mtx); 2143 2144 /* The graph has changed, try again. */ 2145 db_printf("Lock graph changed, restarting trace.\n"); 2146 goto restart; 2147 } 2148 2149 w1 = &w_data[i]; 2150 if (w1->w_reversed == 0) { 2151 mtx_leave(&w_mtx); 2152 continue; 2153 } 2154 2155 /* Copy w1 locally so we can release the spin lock. */ 2156 tmp_w1 = *w1; 2157 mtx_leave(&w_mtx); 2158 2159 if (tmp_w1.w_reversed == 0) 2160 continue; 2161 for (j = 1; j < w_max_used_index; j++) { 2162 if ((w_rmatrix[i][j] & WITNESS_REVERSAL) == 0 || i > j) 2163 continue; 2164 2165 mtx_enter(&w_mtx); 2166 if (generation != w_generation) { 2167 mtx_leave(&w_mtx); 2168 2169 /* The graph has changed, try again. */ 2170 db_printf("Lock graph changed, " 2171 "restarting trace.\n"); 2172 goto restart; 2173 } 2174 2175 w2 = &w_data[j]; 2176 data1 = witness_lock_order_get(w1, w2); 2177 data2 = witness_lock_order_get(w2, w1); 2178 2179 /* 2180 * Copy information locally so we can release the 2181 * spin lock. 2182 */ 2183 tmp_w2 = *w2; 2184 2185 if (data1) 2186 tmp_data1.wlod_stack = data1->wlod_stack; 2187 if (data2 && data2 != data1) 2188 tmp_data2.wlod_stack = data2->wlod_stack; 2189 mtx_leave(&w_mtx); 2190 2191 db_printf("\nLock order reversal between \"%s\"(%s) " 2192 "and \"%s\"(%s)!\n", 2193 tmp_w1.w_type->lt_name, tmp_w1.w_class->lc_name, 2194 tmp_w2.w_type->lt_name, tmp_w2.w_class->lc_name); 2195 if (data1) { 2196 db_printf("Lock order \"%s\"(%s) -> \"%s\"(%s) " 2197 "first seen at:\n", 2198 tmp_w1.w_type->lt_name, 2199 tmp_w1.w_class->lc_name, 2200 tmp_w2.w_type->lt_name, 2201 tmp_w2.w_class->lc_name); 2202 stacktrace_print(&tmp_data1.wlod_stack, 2203 db_printf); 2204 db_printf("\n"); 2205 } 2206 if (data2 && data2 != data1) { 2207 db_printf("Lock order \"%s\"(%s) -> \"%s\"(%s) " 2208 "first seen at:\n", 2209 tmp_w2.w_type->lt_name, 2210 tmp_w2.w_class->lc_name, 2211 tmp_w1.w_type->lt_name, 2212 tmp_w1.w_class->lc_name); 2213 stacktrace_print(&tmp_data2.wlod_stack, 2214 db_printf); 2215 db_printf("\n"); 2216 } 2217 } 2218 } 2219 mtx_enter(&w_mtx); 2220 if (generation != w_generation) { 2221 mtx_leave(&w_mtx); 2222 2223 /* 2224 * The graph changed while we were printing stack data, 2225 * try again. 2226 */ 2227 db_printf("Lock graph changed, restarting trace.\n"); 2228 goto restart; 2229 } 2230 mtx_leave(&w_mtx); 2231 } 2232 2233 void 2234 db_witness_display(db_expr_t addr, int have_addr, db_expr_t count, char *modif) 2235 { 2236 switch (modif[0]) { 2237 case 'b': 2238 witness_print_badstacks(); 2239 break; 2240 default: 2241 witness_ddb_display(db_printf); 2242 break; 2243 } 2244 } 2245 #endif 2246 2247 void 2248 db_witness_print_fullgraph(void) 2249 { 2250 struct witness *w; 2251 int error; 2252 2253 if (witness_watch < 1) { 2254 db_printf("witness watch is disabled\n"); 2255 return; 2256 } 2257 if (witness_cold) { 2258 db_printf("witness is cold\n"); 2259 return; 2260 } 2261 error = 0; 2262 2263 mtx_enter(&w_mtx); 2264 SIMPLEQ_FOREACH(w, &w_all, w_list) 2265 w->w_displayed = 0; 2266 SIMPLEQ_FOREACH(w, &w_all, w_list) 2267 db_witness_add_fullgraph(w); 2268 mtx_leave(&w_mtx); 2269 } 2270 2271 static void 2272 db_witness_add_fullgraph(struct witness *w) 2273 { 2274 int i; 2275 2276 if (w->w_displayed != 0 || w->w_acquired == 0) 2277 return; 2278 w->w_displayed = 1; 2279 2280 WITNESS_INDEX_ASSERT(w->w_index); 2281 for (i = 1; i <= w_max_used_index; i++) { 2282 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) { 2283 db_printf("\"%s\",\"%s\"\n", w->w_type->lt_name, 2284 w_data[i].w_type->lt_name); 2285 db_witness_add_fullgraph(&w_data[i]); 2286 } 2287 } 2288 } 2289 2290 /* 2291 * A simple hash function. Takes a key pointer and a key size. If size == 0, 2292 * interprets the key as a string and reads until the null 2293 * terminator. Otherwise, reads the first size bytes. Returns an unsigned 32-bit 2294 * hash value computed from the key. 2295 */ 2296 static uint32_t 2297 witness_hash_djb2(const uint8_t *key, uint32_t size) 2298 { 2299 unsigned int hash = 5381; 2300 int i; 2301 2302 /* hash = hash * 33 + key[i] */ 2303 if (size) 2304 for (i = 0; i < size; i++) 2305 hash = ((hash << 5) + hash) + (unsigned int)key[i]; 2306 else 2307 for (i = 0; key[i] != 0; i++) 2308 hash = ((hash << 5) + hash) + (unsigned int)key[i]; 2309 2310 return (hash); 2311 } 2312 2313 2314 /* 2315 * Initializes the two witness hash tables. Called exactly once from 2316 * witness_initialize(). 2317 */ 2318 static void 2319 witness_init_hash_tables(void) 2320 { 2321 int i; 2322 2323 KASSERT(witness_cold); 2324 2325 /* Initialize the hash tables. */ 2326 for (i = 0; i < WITNESS_HASH_SIZE; i++) 2327 w_hash.wh_array[i] = NULL; 2328 2329 w_hash.wh_size = WITNESS_HASH_SIZE; 2330 w_hash.wh_count = 0; 2331 2332 /* Initialize the lock order data hash. */ 2333 w_lofree = NULL; 2334 for (i = 0; i < WITNESS_LO_DATA_COUNT; i++) { 2335 memset(&w_lodata[i], 0, sizeof(w_lodata[i])); 2336 w_lodata[i].wlod_next = w_lofree; 2337 w_lofree = &w_lodata[i]; 2338 } 2339 w_lohash.wloh_size = WITNESS_LO_HASH_SIZE; 2340 w_lohash.wloh_count = 0; 2341 for (i = 0; i < WITNESS_LO_HASH_SIZE; i++) 2342 w_lohash.wloh_array[i] = NULL; 2343 } 2344 2345 static struct witness * 2346 witness_hash_get(const struct lock_type *type, const char *subtype) 2347 { 2348 struct witness *w; 2349 uint32_t hash; 2350 2351 KASSERT(type != NULL); 2352 if (witness_cold == 0) 2353 MUTEX_ASSERT_LOCKED(&w_mtx); 2354 hash = (uint32_t)((uintptr_t)type ^ (uintptr_t)subtype) % 2355 w_hash.wh_size; 2356 w = w_hash.wh_array[hash]; 2357 while (w != NULL) { 2358 if (w->w_type == type && w->w_subtype == subtype) 2359 goto out; 2360 w = w->w_hash_next; 2361 } 2362 2363 out: 2364 return (w); 2365 } 2366 2367 static void 2368 witness_hash_put(struct witness *w) 2369 { 2370 uint32_t hash; 2371 2372 KASSERT(w != NULL); 2373 KASSERT(w->w_type != NULL); 2374 if (witness_cold == 0) 2375 MUTEX_ASSERT_LOCKED(&w_mtx); 2376 KASSERTMSG(witness_hash_get(w->w_type, w->w_subtype) == NULL, 2377 "%s: trying to add a hash entry that already exists!", __func__); 2378 KASSERTMSG(w->w_hash_next == NULL, 2379 "%s: w->w_hash_next != NULL", __func__); 2380 2381 hash = (uint32_t)((uintptr_t)w->w_type ^ (uintptr_t)w->w_subtype) % 2382 w_hash.wh_size; 2383 w->w_hash_next = w_hash.wh_array[hash]; 2384 w_hash.wh_array[hash] = w; 2385 w_hash.wh_count++; 2386 } 2387 2388 2389 static struct witness_lock_order_data * 2390 witness_lock_order_get(struct witness *parent, struct witness *child) 2391 { 2392 struct witness_lock_order_data *data = NULL; 2393 struct witness_lock_order_key key; 2394 unsigned int hash; 2395 2396 KASSERT(parent != NULL && child != NULL); 2397 key.from = parent->w_index; 2398 key.to = child->w_index; 2399 WITNESS_INDEX_ASSERT(key.from); 2400 WITNESS_INDEX_ASSERT(key.to); 2401 if ((w_rmatrix[parent->w_index][child->w_index] 2402 & WITNESS_LOCK_ORDER_KNOWN) == 0) 2403 goto out; 2404 2405 hash = witness_hash_djb2((const char*)&key, 2406 sizeof(key)) % w_lohash.wloh_size; 2407 data = w_lohash.wloh_array[hash]; 2408 while (data != NULL) { 2409 if (witness_lock_order_key_equal(&data->wlod_key, &key)) 2410 break; 2411 data = data->wlod_next; 2412 } 2413 2414 out: 2415 return (data); 2416 } 2417 2418 /* 2419 * Verify that parent and child have a known relationship, are not the same, 2420 * and child is actually a child of parent. This is done without w_mtx 2421 * to avoid contention in the common case. 2422 */ 2423 static int 2424 witness_lock_order_check(struct witness *parent, struct witness *child) 2425 { 2426 2427 if (parent != child && 2428 w_rmatrix[parent->w_index][child->w_index] 2429 & WITNESS_LOCK_ORDER_KNOWN && 2430 isitmychild(parent, child)) 2431 return (1); 2432 2433 return (0); 2434 } 2435 2436 static int 2437 witness_lock_order_add(struct witness *parent, struct witness *child) 2438 { 2439 struct witness_lock_order_data *data = NULL; 2440 struct witness_lock_order_key key; 2441 unsigned int hash; 2442 2443 KASSERT(parent != NULL && child != NULL); 2444 key.from = parent->w_index; 2445 key.to = child->w_index; 2446 WITNESS_INDEX_ASSERT(key.from); 2447 WITNESS_INDEX_ASSERT(key.to); 2448 if (w_rmatrix[parent->w_index][child->w_index] 2449 & WITNESS_LOCK_ORDER_KNOWN) 2450 return (1); 2451 2452 hash = witness_hash_djb2((const char*)&key, 2453 sizeof(key)) % w_lohash.wloh_size; 2454 w_rmatrix[parent->w_index][child->w_index] |= WITNESS_LOCK_ORDER_KNOWN; 2455 data = w_lofree; 2456 if (data == NULL) 2457 return (0); 2458 w_lofree = data->wlod_next; 2459 data->wlod_next = w_lohash.wloh_array[hash]; 2460 data->wlod_key = key; 2461 w_lohash.wloh_array[hash] = data; 2462 w_lohash.wloh_count++; 2463 stacktrace_save(&data->wlod_stack); 2464 return (1); 2465 } 2466 2467 /* Call this whenever the structure of the witness graph changes. */ 2468 static void 2469 witness_increment_graph_generation(void) 2470 { 2471 2472 if (witness_cold == 0) 2473 MUTEX_ASSERT_LOCKED(&w_mtx); 2474 w_generation++; 2475 } 2476 2477 static void 2478 witness_debugger(int dump) 2479 { 2480 switch (witness_watch) { 2481 case 1: 2482 break; 2483 case 2: 2484 if (dump) 2485 db_stack_dump(); 2486 break; 2487 case 3: 2488 if (dump) 2489 db_stack_dump(); 2490 db_enter(); 2491 break; 2492 default: 2493 panic("witness: locking error"); 2494 } 2495 } 2496 2497 static int 2498 witness_alloc_stacks(void) 2499 { 2500 union lock_stack *stacks; 2501 unsigned int i, nstacks = LOCK_CHILDCOUNT * LOCK_NCHILDREN; 2502 2503 rw_assert_wrlock(&w_ctlock); 2504 2505 if (w_lock_stack_num >= nstacks) 2506 return (0); 2507 2508 nstacks -= w_lock_stack_num; 2509 stacks = mallocarray(nstacks, sizeof(*stacks), M_WITNESS, 2510 M_WAITOK | M_CANFAIL | M_ZERO); 2511 if (stacks == NULL) 2512 return (ENOMEM); 2513 2514 mtx_enter(&w_mtx); 2515 for (i = 0; i < nstacks; i++) { 2516 stacks[i].ls_next = w_lock_stack_free; 2517 w_lock_stack_free = &stacks[i]; 2518 } 2519 mtx_leave(&w_mtx); 2520 w_lock_stack_num += nstacks; 2521 2522 return (0); 2523 } 2524 2525 int 2526 witness_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, 2527 void *newp, size_t newlen) 2528 { 2529 int error, value; 2530 2531 if (namelen != 1) 2532 return (ENOTDIR); 2533 2534 rw_enter_write(&w_ctlock); 2535 2536 switch (name[0]) { 2537 case KERN_WITNESS_WATCH: 2538 error = witness_sysctl_watch(oldp, oldlenp, newp, newlen); 2539 break; 2540 case KERN_WITNESS_LOCKTRACE: 2541 value = witness_locktrace; 2542 error = sysctl_int(oldp, oldlenp, newp, newlen, &value); 2543 if (error == 0 && newp != NULL) { 2544 switch (value) { 2545 case 1: 2546 error = witness_alloc_stacks(); 2547 /* FALLTHROUGH */ 2548 case 0: 2549 if (error == 0) 2550 witness_locktrace = value; 2551 break; 2552 default: 2553 error = EINVAL; 2554 break; 2555 } 2556 } 2557 break; 2558 default: 2559 error = EOPNOTSUPP; 2560 break; 2561 } 2562 2563 rw_exit_write(&w_ctlock); 2564 2565 return (error); 2566 } 2567 2568 int 2569 witness_sysctl_watch(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 2570 { 2571 int error; 2572 int value; 2573 2574 value = witness_watch; 2575 error = sysctl_int(oldp, oldlenp, newp, newlen, &value); 2576 if (error == 0 && newp != NULL) { 2577 if (value >= -1 && value <= 3) { 2578 mtx_enter(&w_mtx); 2579 if (value < 0 || witness_watch >= 0) 2580 witness_watch = value; 2581 else 2582 error = EINVAL; 2583 mtx_leave(&w_mtx); 2584 } else 2585 error = EINVAL; 2586 } 2587 return (error); 2588 } 2589