1 /* $OpenBSD: subr_witness.c,v 1.47 2021/03/23 10:22:20 mpi Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 Isilon Systems, Inc. 5 * Copyright (c) 2008 Ilya Maykov <ivmaykov@gmail.com> 6 * Copyright (c) 1998 Berkeley Software Design, Inc. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Berkeley Software Design Inc's name may not be used to endorse or 18 * promote products derived from this software without specific prior 19 * written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from BSDI Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp 34 * and BSDI Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp 35 */ 36 37 /* 38 * Implementation of the `witness' lock verifier. Originally implemented for 39 * mutexes in BSD/OS. Extended to handle generic lock objects and lock 40 * classes in FreeBSD. 41 */ 42 43 /* 44 * Main Entry: witness 45 * Pronunciation: 'wit-n&s 46 * Function: noun 47 * Etymology: Middle English witnesse, from Old English witnes knowledge, 48 * testimony, witness, from 2wit 49 * Date: before 12th century 50 * 1 : attestation of a fact or event : TESTIMONY 51 * 2 : one that gives evidence; specifically : one who testifies in 52 * a cause or before a judicial tribunal 53 * 3 : one asked to be present at a transaction so as to be able to 54 * testify to its having taken place 55 * 4 : one who has personal knowledge of something 56 * 5 a : something serving as evidence or proof : SIGN 57 * b : public affirmation by word or example of usually 58 * religious faith or conviction <the heroic witness to divine 59 * life -- Pilot> 60 * 6 capitalized : a member of the Jehovah's Witnesses 61 */ 62 63 /* 64 * Special rules concerning Giant and lock orders: 65 * 66 * 1) Giant must be acquired before any other mutexes. Stated another way, 67 * no other mutex may be held when Giant is acquired. 68 * 69 * 2) Giant must be released when blocking on a sleepable lock. 70 * 71 * This rule is less obvious, but is a result of Giant providing the same 72 * semantics as spl(). Basically, when a thread sleeps, it must release 73 * Giant. When a thread blocks on a sleepable lock, it sleeps. Hence rule 74 * 2). 75 * 76 * 3) Giant may be acquired before or after sleepable locks. 77 * 78 * This rule is also not quite as obvious. Giant may be acquired after 79 * a sleepable lock because it is a non-sleepable lock and non-sleepable 80 * locks may always be acquired while holding a sleepable lock. The second 81 * case, Giant before a sleepable lock, follows from rule 2) above. Suppose 82 * you have two threads T1 and T2 and a sleepable lock X. Suppose that T1 83 * acquires X and blocks on Giant. Then suppose that T2 acquires Giant and 84 * blocks on X. When T2 blocks on X, T2 will release Giant allowing T1 to 85 * execute. Thus, acquiring Giant both before and after a sleepable lock 86 * will not result in a lock order reversal. 87 */ 88 89 #include <sys/param.h> 90 #include <sys/systm.h> 91 #include <sys/kernel.h> 92 #include <sys/malloc.h> 93 #ifdef MULTIPROCESSOR 94 #include <sys/mplock.h> 95 #endif 96 #include <sys/mutex.h> 97 #include <sys/percpu.h> 98 #include <sys/proc.h> 99 #include <sys/sched.h> 100 #include <sys/stacktrace.h> 101 #include <sys/stdint.h> 102 #include <sys/sysctl.h> 103 #include <sys/syslog.h> 104 #include <sys/witness.h> 105 106 #include <machine/cpu.h> 107 108 #include <uvm/uvm_extern.h> /* uvm_pageboot_alloc */ 109 110 #ifndef DDB 111 #error "DDB is required for WITNESS" 112 #endif 113 114 #include <machine/db_machdep.h> 115 116 #include <ddb/db_access.h> 117 #include <ddb/db_var.h> 118 #include <ddb/db_output.h> 119 120 #define LI_RECURSEMASK 0x0000ffff /* Recursion depth of lock instance. */ 121 #define LI_EXCLUSIVE 0x00010000 /* Exclusive lock instance. */ 122 #define LI_NORELEASE 0x00020000 /* Lock not allowed to be released. */ 123 124 #ifndef WITNESS_COUNT 125 #define WITNESS_COUNT 1536 126 #endif 127 #define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */ 128 #define WITNESS_PENDLIST (1024 + MAXCPUS) 129 130 /* Allocate 256 KB of stack data space */ 131 #define WITNESS_LO_DATA_COUNT 2048 132 133 /* Prime, gives load factor of ~2 at full load */ 134 #define WITNESS_LO_HASH_SIZE 1021 135 136 /* 137 * XXX: This is somewhat bogus, as we assume here that at most 2048 threads 138 * will hold LOCK_NCHILDREN locks. We handle failure ok, and we should 139 * probably be safe for the most part, but it's still a SWAG. 140 */ 141 #define LOCK_NCHILDREN 5 142 #define LOCK_CHILDCOUNT 2048 143 144 #define FULLGRAPH_SBUF_SIZE 512 145 146 /* 147 * These flags go in the witness relationship matrix and describe the 148 * relationship between any two struct witness objects. 149 */ 150 #define WITNESS_UNRELATED 0x00 /* No lock order relation. */ 151 #define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */ 152 #define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */ 153 #define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */ 154 #define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */ 155 #define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR) 156 #define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT) 157 #define WITNESS_RELATED_MASK \ 158 (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK) 159 #define WITNESS_REVERSAL 0x10 /* A lock order reversal has been 160 * observed. */ 161 #define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */ 162 #define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */ 163 #define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */ 164 165 /* Descendant to ancestor flags */ 166 #define WITNESS_DTOA(x) (((x) & WITNESS_RELATED_MASK) >> 2) 167 168 /* Ancestor to descendant flags */ 169 #define WITNESS_ATOD(x) (((x) & WITNESS_RELATED_MASK) << 2) 170 171 #define WITNESS_INDEX_ASSERT(i) \ 172 KASSERT((i) > 0 && (i) <= w_max_used_index && (i) < witness_count) 173 174 /* 175 * Lock classes. Each lock has a class which describes characteristics 176 * common to all types of locks of a given class. 177 * 178 * Spin locks in general must always protect against preemption, as it is 179 * an error to perform any type of context switch while holding a spin lock. 180 * Also, for an individual lock to be recursable, its class must allow 181 * recursion and the lock itself must explicitly allow recursion. 182 */ 183 184 struct lock_class { 185 const char *lc_name; 186 u_int lc_flags; 187 }; 188 189 union lock_stack { 190 union lock_stack *ls_next; 191 struct stacktrace ls_stack; 192 }; 193 194 #define LC_SLEEPLOCK 0x00000001 /* Sleep lock. */ 195 #define LC_SPINLOCK 0x00000002 /* Spin lock. */ 196 #define LC_SLEEPABLE 0x00000004 /* Sleeping allowed with this lock. */ 197 #define LC_RECURSABLE 0x00000008 /* Locks of this type may recurse. */ 198 #define LC_UPGRADABLE 0x00000010 /* Upgrades and downgrades permitted. */ 199 200 /* 201 * Lock instances. A lock instance is the data associated with a lock while 202 * it is held by witness. For example, a lock instance will hold the 203 * recursion count of a lock. Lock instances are held in lists. Spin locks 204 * are held in a per-cpu list while sleep locks are held in per-thread list. 205 */ 206 struct lock_instance { 207 struct lock_object *li_lock; 208 union lock_stack *li_stack; 209 u_int li_flags; 210 }; 211 212 /* 213 * A simple list type used to build the list of locks held by a thread 214 * or CPU. We can't simply embed the list in struct lock_object since a 215 * lock may be held by more than one thread if it is a shared lock. Locks 216 * are added to the head of the list, so we fill up each list entry from 217 * "the back" logically. To ease some of the arithmetic, we actually fill 218 * in each list entry the normal way (children[0] then children[1], etc.) but 219 * when we traverse the list we read children[count-1] as the first entry 220 * down to children[0] as the final entry. 221 */ 222 struct lock_list_entry { 223 struct lock_list_entry *ll_next; 224 struct lock_instance ll_children[LOCK_NCHILDREN]; 225 int ll_count; 226 }; 227 228 /* 229 * The main witness structure. One of these per named lock type in the system 230 * (for example, "vnode interlock"). 231 */ 232 struct witness { 233 const struct lock_type *w_type; 234 const char *w_subtype; 235 uint32_t w_index; /* Index in the relationship matrix */ 236 struct lock_class *w_class; 237 SLIST_ENTRY(witness) w_list; /* List of all witnesses. */ 238 SLIST_ENTRY(witness) w_typelist; /* Witnesses of a type. */ 239 SLIST_ENTRY(witness) w_hash_next; /* Linked list in 240 * hash buckets. */ 241 uint16_t w_num_ancestors; /* direct/indirect 242 * ancestor count */ 243 uint16_t w_num_descendants; /* direct/indirect 244 * descendant count */ 245 int16_t w_ddb_level; 246 unsigned w_acquired:1; 247 unsigned w_displayed:1; 248 unsigned w_reversed:1; 249 }; 250 251 SLIST_HEAD(witness_list, witness); 252 253 /* 254 * The witness hash table. Keys are witness names (const char *), elements are 255 * witness objects (struct witness *). 256 */ 257 struct witness_hash { 258 struct witness_list wh_array[WITNESS_HASH_SIZE]; 259 uint32_t wh_size; 260 uint32_t wh_count; 261 }; 262 263 /* 264 * Key type for the lock order data hash table. 265 */ 266 struct witness_lock_order_key { 267 uint16_t from; 268 uint16_t to; 269 }; 270 271 struct witness_lock_order_data { 272 struct stacktrace wlod_stack; 273 struct witness_lock_order_key wlod_key; 274 struct witness_lock_order_data *wlod_next; 275 }; 276 277 /* 278 * The witness lock order data hash table. Keys are witness index tuples 279 * (struct witness_lock_order_key), elements are lock order data objects 280 * (struct witness_lock_order_data). 281 */ 282 struct witness_lock_order_hash { 283 struct witness_lock_order_data *wloh_array[WITNESS_LO_HASH_SIZE]; 284 u_int wloh_size; 285 u_int wloh_count; 286 }; 287 288 struct witness_pendhelp { 289 const struct lock_type *wh_type; 290 struct lock_object *wh_lock; 291 }; 292 293 struct witness_cpu { 294 struct lock_list_entry *wc_spinlocks; 295 struct lock_list_entry *wc_lle_cache; 296 union lock_stack *wc_stk_cache; 297 unsigned int wc_lle_count; 298 unsigned int wc_stk_count; 299 } __aligned(CACHELINESIZE); 300 301 #define WITNESS_LLE_CACHE_MAX 8 302 #define WITNESS_STK_CACHE_MAX (WITNESS_LLE_CACHE_MAX * LOCK_NCHILDREN) 303 304 struct witness_cpu witness_cpu[MAXCPUS]; 305 306 /* 307 * Returns 0 if one of the locks is a spin lock and the other is not. 308 * Returns 1 otherwise. 309 */ 310 static __inline int 311 witness_lock_type_equal(struct witness *w1, struct witness *w2) 312 { 313 314 return ((w1->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) == 315 (w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))); 316 } 317 318 static __inline int 319 witness_lock_order_key_equal(const struct witness_lock_order_key *a, 320 const struct witness_lock_order_key *b) 321 { 322 323 return (a->from == b->from && a->to == b->to); 324 } 325 326 static int _isitmyx(struct witness *w1, struct witness *w2, int rmask, 327 const char *fname); 328 static void adopt(struct witness *parent, struct witness *child); 329 static struct witness *enroll(const struct lock_type *, const char *, 330 struct lock_class *); 331 static struct lock_instance *find_instance(struct lock_list_entry *list, 332 const struct lock_object *lock); 333 static int isitmychild(struct witness *parent, struct witness *child); 334 static int isitmydescendant(struct witness *parent, struct witness *child); 335 static void itismychild(struct witness *parent, struct witness *child); 336 #ifdef DDB 337 static void db_witness_add_fullgraph(struct witness *parent); 338 static void witness_ddb_compute_levels(void); 339 static void witness_ddb_display(int(*)(const char *fmt, ...)); 340 static void witness_ddb_display_descendants(int(*)(const char *fmt, ...), 341 struct witness *, int indent); 342 static void witness_ddb_display_list(int(*prnt)(const char *fmt, ...), 343 struct witness_list *list); 344 static void witness_ddb_level_descendants(struct witness *parent, int l); 345 static void witness_ddb_list(struct proc *td); 346 #endif 347 static int witness_alloc_stacks(void); 348 static void witness_debugger(int dump); 349 static void witness_free(struct witness *m); 350 static struct witness *witness_get(void); 351 static uint32_t witness_hash_djb2(const uint8_t *key, uint32_t size); 352 static struct witness *witness_hash_get(const struct lock_type *, 353 const char *); 354 static void witness_hash_put(struct witness *w); 355 static void witness_init_hash_tables(void); 356 static void witness_increment_graph_generation(void); 357 static int witness_list_locks(struct lock_list_entry **, 358 int (*)(const char *, ...)); 359 static void witness_lock_list_free(struct lock_list_entry *lle); 360 static struct lock_list_entry *witness_lock_list_get(void); 361 static void witness_lock_stack_free(union lock_stack *stack); 362 static union lock_stack *witness_lock_stack_get(void); 363 static int witness_lock_order_add(struct witness *parent, 364 struct witness *child); 365 static int witness_lock_order_check(struct witness *parent, 366 struct witness *child); 367 static struct witness_lock_order_data *witness_lock_order_get( 368 struct witness *parent, 369 struct witness *child); 370 static void witness_list_lock(struct lock_instance *instance, 371 int (*prnt)(const char *fmt, ...)); 372 static void witness_setflag(struct lock_object *lock, int flag, int set); 373 374 /* 375 * If set to 0, lock order checking is disabled. If set to -1, 376 * witness is completely disabled. Otherwise witness performs full 377 * lock order checking for all locks. At runtime, lock order checking 378 * may be toggled. However, witness cannot be reenabled once it is 379 * completely disabled. 380 */ 381 #ifdef WITNESS_WATCH 382 static int witness_watch = 3; 383 #else 384 static int witness_watch = 2; 385 #endif 386 387 #ifdef WITNESS_LOCKTRACE 388 static int witness_locktrace = 1; 389 #else 390 static int witness_locktrace = 0; 391 #endif 392 393 int witness_count = WITNESS_COUNT; 394 int witness_uninitialized_report = 5; 395 396 static struct mutex w_mtx; 397 static struct rwlock w_ctlock = RWLOCK_INITIALIZER("w_ctlock"); 398 399 /* w_list */ 400 static struct witness_list w_free = SLIST_HEAD_INITIALIZER(w_free); 401 static struct witness_list w_all = SLIST_HEAD_INITIALIZER(w_all); 402 403 /* w_typelist */ 404 static struct witness_list w_spin = SLIST_HEAD_INITIALIZER(w_spin); 405 static struct witness_list w_sleep = SLIST_HEAD_INITIALIZER(w_sleep); 406 407 /* lock list */ 408 static struct lock_list_entry *w_lock_list_free = NULL; 409 static struct witness_pendhelp pending_locks[WITNESS_PENDLIST]; 410 static u_int pending_cnt; 411 412 static int w_free_cnt, w_spin_cnt, w_sleep_cnt; 413 414 static struct witness *w_data; 415 static uint8_t **w_rmatrix; 416 static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT]; 417 static struct witness_hash w_hash; /* The witness hash table. */ 418 419 /* The lock order data hash */ 420 static struct witness_lock_order_data w_lodata[WITNESS_LO_DATA_COUNT]; 421 static struct witness_lock_order_data *w_lofree = NULL; 422 static struct witness_lock_order_hash w_lohash; 423 static int w_max_used_index = 0; 424 static unsigned int w_generation = 0; 425 426 static union lock_stack *w_lock_stack_free; 427 static unsigned int w_lock_stack_num; 428 429 static struct lock_class lock_class_kernel_lock = { 430 .lc_name = "kernel_lock", 431 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_SLEEPABLE 432 }; 433 434 static struct lock_class lock_class_sched_lock = { 435 .lc_name = "sched_lock", 436 .lc_flags = LC_SPINLOCK | LC_RECURSABLE 437 }; 438 439 static struct lock_class lock_class_mutex = { 440 .lc_name = "mutex", 441 .lc_flags = LC_SPINLOCK 442 }; 443 444 static struct lock_class lock_class_rwlock = { 445 .lc_name = "rwlock", 446 .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_UPGRADABLE 447 }; 448 449 static struct lock_class lock_class_rrwlock = { 450 .lc_name = "rrwlock", 451 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_SLEEPABLE | 452 LC_UPGRADABLE 453 }; 454 455 static struct lock_class *lock_classes[] = { 456 &lock_class_kernel_lock, 457 &lock_class_sched_lock, 458 &lock_class_mutex, 459 &lock_class_rwlock, 460 &lock_class_rrwlock, 461 }; 462 463 /* 464 * This global is set to 0 once it becomes safe to use the witness code. 465 */ 466 static int witness_cold = 1; 467 468 /* 469 * This global is set to 1 once the static lock orders have been enrolled 470 * so that a warning can be issued for any spin locks enrolled later. 471 */ 472 static int witness_spin_warn = 0; 473 474 /* 475 * The WITNESS-enabled diagnostic code. Note that the witness code does 476 * assume that the early boot is single-threaded at least until after this 477 * routine is completed. 478 */ 479 void 480 witness_initialize(void) 481 { 482 struct lock_object *lock; 483 union lock_stack *stacks; 484 struct witness *w; 485 int i, s; 486 487 w_data = (void *)uvm_pageboot_alloc(sizeof(struct witness) * 488 witness_count); 489 memset(w_data, 0, sizeof(struct witness) * witness_count); 490 491 w_rmatrix = (void *)uvm_pageboot_alloc(sizeof(*w_rmatrix) * 492 (witness_count + 1)); 493 494 for (i = 0; i < witness_count + 1; i++) { 495 w_rmatrix[i] = (void *)uvm_pageboot_alloc( 496 sizeof(*w_rmatrix[i]) * (witness_count + 1)); 497 memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) * 498 (witness_count + 1)); 499 } 500 501 mtx_init_flags(&w_mtx, IPL_HIGH, "witness lock", MTX_NOWITNESS); 502 for (i = witness_count - 1; i >= 0; i--) { 503 w = &w_data[i]; 504 memset(w, 0, sizeof(*w)); 505 w_data[i].w_index = i; /* Witness index never changes. */ 506 witness_free(w); 507 } 508 KASSERTMSG(SLIST_FIRST(&w_free)->w_index == 0, 509 "%s: Invalid list of free witness objects", __func__); 510 511 /* Witness with index 0 is not used to aid in debugging. */ 512 SLIST_REMOVE_HEAD(&w_free, w_list); 513 w_free_cnt--; 514 515 for (i = 0; i < witness_count; i++) { 516 memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) * 517 (witness_count + 1)); 518 } 519 520 if (witness_locktrace) { 521 w_lock_stack_num = LOCK_CHILDCOUNT * LOCK_NCHILDREN; 522 stacks = (void *)uvm_pageboot_alloc(sizeof(*stacks) * 523 w_lock_stack_num); 524 } 525 526 s = splhigh(); 527 for (i = 0; i < w_lock_stack_num; i++) 528 witness_lock_stack_free(&stacks[i]); 529 for (i = 0; i < LOCK_CHILDCOUNT; i++) 530 witness_lock_list_free(&w_locklistdata[i]); 531 splx(s); 532 witness_init_hash_tables(); 533 witness_spin_warn = 1; 534 535 /* Iterate through all locks and add them to witness. */ 536 for (i = 0; pending_locks[i].wh_lock != NULL; i++) { 537 lock = pending_locks[i].wh_lock; 538 KASSERTMSG(lock->lo_flags & LO_WITNESS, 539 "%s: lock %s is on pending list but not LO_WITNESS", 540 __func__, lock->lo_name); 541 lock->lo_witness = enroll(pending_locks[i].wh_type, 542 lock->lo_name, LOCK_CLASS(lock)); 543 } 544 545 /* Mark the witness code as being ready for use. */ 546 witness_cold = 0; 547 } 548 549 void 550 witness_init(struct lock_object *lock, const struct lock_type *type) 551 { 552 struct lock_class *class; 553 554 /* Various sanity checks. */ 555 class = LOCK_CLASS(lock); 556 if ((lock->lo_flags & LO_RECURSABLE) != 0 && 557 (class->lc_flags & LC_RECURSABLE) == 0) 558 panic("%s: lock (%s) %s can not be recursable", 559 __func__, class->lc_name, lock->lo_name); 560 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 561 (class->lc_flags & LC_SLEEPABLE) == 0) 562 panic("%s: lock (%s) %s can not be sleepable", 563 __func__, class->lc_name, lock->lo_name); 564 if ((lock->lo_flags & LO_UPGRADABLE) != 0 && 565 (class->lc_flags & LC_UPGRADABLE) == 0) 566 panic("%s: lock (%s) %s can not be upgradable", 567 __func__, class->lc_name, lock->lo_name); 568 569 /* 570 * If we shouldn't watch this lock, then just clear lo_witness. 571 * Record the type in case the lock becomes watched later. 572 * Otherwise, if witness_cold is set, then it is too early to 573 * enroll this lock, so defer it to witness_initialize() by adding 574 * it to the pending_locks list. If it is not too early, then enroll 575 * the lock now. 576 */ 577 if (witness_watch < 1 || panicstr != NULL || db_active || 578 (lock->lo_flags & LO_WITNESS) == 0) { 579 lock->lo_witness = NULL; 580 lock->lo_type = type; 581 } else if (witness_cold) { 582 pending_locks[pending_cnt].wh_lock = lock; 583 pending_locks[pending_cnt++].wh_type = type; 584 if (pending_cnt > WITNESS_PENDLIST) 585 panic("%s: pending locks list is too small, " 586 "increase WITNESS_PENDLIST", 587 __func__); 588 } else 589 lock->lo_witness = enroll(type, lock->lo_name, class); 590 } 591 592 static inline int 593 is_kernel_lock(const struct lock_object *lock) 594 { 595 #ifdef MULTIPROCESSOR 596 return (lock == &kernel_lock.mpl_lock_obj); 597 #else 598 return (0); 599 #endif 600 } 601 602 #ifdef DDB 603 static void 604 witness_ddb_compute_levels(void) 605 { 606 struct witness *w; 607 608 /* 609 * First clear all levels. 610 */ 611 SLIST_FOREACH(w, &w_all, w_list) 612 w->w_ddb_level = -1; 613 614 /* 615 * Look for locks with no parents and level all their descendants. 616 */ 617 SLIST_FOREACH(w, &w_all, w_list) { 618 /* If the witness has ancestors (is not a root), skip it. */ 619 if (w->w_num_ancestors > 0) 620 continue; 621 witness_ddb_level_descendants(w, 0); 622 } 623 } 624 625 static void 626 witness_ddb_level_descendants(struct witness *w, int l) 627 { 628 int i; 629 630 if (w->w_ddb_level >= l) 631 return; 632 633 w->w_ddb_level = l; 634 l++; 635 636 for (i = 1; i <= w_max_used_index; i++) { 637 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) 638 witness_ddb_level_descendants(&w_data[i], l); 639 } 640 } 641 642 static void 643 witness_ddb_display_descendants(int(*prnt)(const char *fmt, ...), 644 struct witness *w, int indent) 645 { 646 int i; 647 648 for (i = 0; i < indent; i++) 649 prnt(" "); 650 prnt("%s (type: %s, depth: %d)", 651 w->w_type->lt_name, w->w_class->lc_name, w->w_ddb_level); 652 if (w->w_displayed) { 653 prnt(" -- (already displayed)\n"); 654 return; 655 } 656 w->w_displayed = 1; 657 if (!w->w_acquired) 658 prnt(" -- never acquired\n"); 659 else 660 prnt("\n"); 661 indent++; 662 WITNESS_INDEX_ASSERT(w->w_index); 663 for (i = 1; i <= w_max_used_index; i++) { 664 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) 665 witness_ddb_display_descendants(prnt, &w_data[i], 666 indent); 667 } 668 } 669 670 static void 671 witness_ddb_display_list(int(*prnt)(const char *fmt, ...), 672 struct witness_list *list) 673 { 674 struct witness *w; 675 676 SLIST_FOREACH(w, list, w_typelist) { 677 if (!w->w_acquired || w->w_ddb_level > 0) 678 continue; 679 680 /* This lock has no anscestors - display its descendants. */ 681 witness_ddb_display_descendants(prnt, w, 0); 682 } 683 } 684 685 static void 686 witness_ddb_display(int(*prnt)(const char *fmt, ...)) 687 { 688 struct witness *w; 689 690 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 691 witness_ddb_compute_levels(); 692 693 /* Clear all the displayed flags. */ 694 SLIST_FOREACH(w, &w_all, w_list) 695 w->w_displayed = 0; 696 697 /* 698 * First, handle sleep locks which have been acquired at least 699 * once. 700 */ 701 prnt("Sleep locks:\n"); 702 witness_ddb_display_list(prnt, &w_sleep); 703 704 /* 705 * Now do spin locks which have been acquired at least once. 706 */ 707 prnt("\nSpin locks:\n"); 708 witness_ddb_display_list(prnt, &w_spin); 709 710 /* 711 * Finally, any locks which have not been acquired yet. 712 */ 713 prnt("\nLocks which were never acquired:\n"); 714 SLIST_FOREACH(w, &w_all, w_list) { 715 if (w->w_acquired) 716 continue; 717 prnt("%s (type: %s, depth: %d)\n", w->w_type->lt_name, 718 w->w_class->lc_name, w->w_ddb_level); 719 } 720 } 721 #endif /* DDB */ 722 723 int 724 witness_defineorder(struct lock_object *lock1, struct lock_object *lock2) 725 { 726 727 if (witness_watch < 0 || panicstr != NULL || db_active) 728 return (0); 729 730 /* Require locks that witness knows about. */ 731 if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL || 732 lock2->lo_witness == NULL) 733 return (EINVAL); 734 735 MUTEX_ASSERT_UNLOCKED(&w_mtx); 736 mtx_enter(&w_mtx); 737 738 /* 739 * If we already have either an explicit or implied lock order that 740 * is the other way around, then return an error. 741 */ 742 if (witness_watch && 743 isitmydescendant(lock2->lo_witness, lock1->lo_witness)) { 744 mtx_leave(&w_mtx); 745 return (EINVAL); 746 } 747 748 /* Try to add the new order. */ 749 itismychild(lock1->lo_witness, lock2->lo_witness); 750 mtx_leave(&w_mtx); 751 return (0); 752 } 753 754 void 755 witness_checkorder(struct lock_object *lock, int flags, 756 struct lock_object *interlock) 757 { 758 struct lock_list_entry *lock_list, *lle; 759 struct lock_instance *lock1, *lock2, *plock; 760 struct lock_class *class, *iclass; 761 struct proc *p; 762 struct witness *w, *w1; 763 int i, j, s; 764 765 if (witness_cold || witness_watch < 1 || panicstr != NULL || db_active) 766 return; 767 768 if ((lock->lo_flags & LO_INITIALIZED) == 0) { 769 if (witness_uninitialized_report > 0) { 770 witness_uninitialized_report--; 771 printf("witness: lock_object uninitialized: %p\n", lock); 772 witness_debugger(1); 773 } 774 lock->lo_flags |= LO_INITIALIZED; 775 } 776 777 if ((lock->lo_flags & LO_WITNESS) == 0) 778 return; 779 780 w = lock->lo_witness; 781 class = LOCK_CLASS(lock); 782 783 if (w == NULL) 784 w = lock->lo_witness = 785 enroll(lock->lo_type, lock->lo_name, class); 786 787 p = curproc; 788 789 if (class->lc_flags & LC_SLEEPLOCK) { 790 /* 791 * Since spin locks include a critical section, this check 792 * implicitly enforces a lock order of all sleep locks before 793 * all spin locks. 794 */ 795 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 796 if (lock_list != NULL && lock_list->ll_count > 0) { 797 panic("acquiring blockable sleep lock with " 798 "spinlock or critical section held (%s) %s", 799 class->lc_name, lock->lo_name); 800 } 801 802 /* 803 * If this is the first lock acquired then just return as 804 * no order checking is needed. 805 */ 806 lock_list = p->p_sleeplocks; 807 if (lock_list == NULL || lock_list->ll_count == 0) 808 return; 809 } else { 810 811 /* 812 * If this is the first lock, just return as no order 813 * checking is needed. 814 */ 815 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 816 if (lock_list == NULL || lock_list->ll_count == 0) 817 return; 818 } 819 820 s = splhigh(); 821 822 /* 823 * Check to see if we are recursing on a lock we already own. If 824 * so, make sure that we don't mismatch exclusive and shared lock 825 * acquires. 826 */ 827 lock1 = find_instance(lock_list, lock); 828 if (lock1 != NULL) { 829 if ((lock1->li_flags & LI_EXCLUSIVE) != 0 && 830 (flags & LOP_EXCLUSIVE) == 0) { 831 printf("witness: shared lock of (%s) %s " 832 "while exclusively locked\n", 833 class->lc_name, lock->lo_name); 834 panic("excl->share"); 835 } 836 if ((lock1->li_flags & LI_EXCLUSIVE) == 0 && 837 (flags & LOP_EXCLUSIVE) != 0) { 838 printf("witness: exclusive lock of (%s) %s " 839 "while share locked\n", 840 class->lc_name, lock->lo_name); 841 panic("share->excl"); 842 } 843 goto out_splx; 844 } 845 846 /* Warn if the interlock is not locked exactly once. */ 847 if (interlock != NULL) { 848 iclass = LOCK_CLASS(interlock); 849 lock1 = find_instance(lock_list, interlock); 850 if (lock1 == NULL) 851 panic("interlock (%s) %s not locked", 852 iclass->lc_name, interlock->lo_name); 853 else if ((lock1->li_flags & LI_RECURSEMASK) != 0) 854 panic("interlock (%s) %s recursed", 855 iclass->lc_name, interlock->lo_name); 856 } 857 858 /* 859 * Find the previously acquired lock, but ignore interlocks. 860 */ 861 plock = &lock_list->ll_children[lock_list->ll_count - 1]; 862 if (interlock != NULL && plock->li_lock == interlock) { 863 if (lock_list->ll_count > 1) 864 plock = 865 &lock_list->ll_children[lock_list->ll_count - 2]; 866 else { 867 lle = lock_list->ll_next; 868 869 /* 870 * The interlock is the only lock we hold, so 871 * simply return. 872 */ 873 if (lle == NULL) 874 goto out_splx; 875 plock = &lle->ll_children[lle->ll_count - 1]; 876 } 877 } 878 879 /* 880 * Try to perform most checks without a lock. If this succeeds we 881 * can skip acquiring the lock and return success. Otherwise we redo 882 * the check with the lock held to handle races with concurrent updates. 883 */ 884 w1 = plock->li_lock->lo_witness; 885 if (witness_lock_order_check(w1, w)) 886 goto out_splx; 887 888 mtx_enter(&w_mtx); 889 if (witness_lock_order_check(w1, w)) 890 goto out; 891 892 witness_lock_order_add(w1, w); 893 894 /* 895 * Check for duplicate locks of the same type. Note that we only 896 * have to check for this on the last lock we just acquired. Any 897 * other cases will be caught as lock order violations. 898 */ 899 if (w1 == w) { 900 i = w->w_index; 901 if (!(lock->lo_flags & LO_DUPOK) && !(flags & LOP_DUPOK) && 902 !(w_rmatrix[i][i] & WITNESS_REVERSAL)) { 903 w_rmatrix[i][i] |= WITNESS_REVERSAL; 904 w->w_reversed = 1; 905 mtx_leave(&w_mtx); 906 printf("witness: acquiring duplicate lock of " 907 "same type: \"%s\"\n", w->w_type->lt_name); 908 printf(" 1st %s\n", plock->li_lock->lo_name); 909 printf(" 2nd %s\n", lock->lo_name); 910 witness_debugger(1); 911 } else 912 mtx_leave(&w_mtx); 913 goto out_splx; 914 } 915 MUTEX_ASSERT_LOCKED(&w_mtx); 916 917 /* 918 * If we know that the lock we are acquiring comes after 919 * the lock we most recently acquired in the lock order tree, 920 * then there is no need for any further checks. 921 */ 922 if (isitmychild(w1, w)) 923 goto out; 924 925 for (j = 0, lle = lock_list; lle != NULL; lle = lle->ll_next) { 926 for (i = lle->ll_count - 1; i >= 0; i--, j++) { 927 928 KASSERT(j < LOCK_CHILDCOUNT * LOCK_NCHILDREN); 929 lock1 = &lle->ll_children[i]; 930 931 /* 932 * Ignore the interlock. 933 */ 934 if (interlock == lock1->li_lock) 935 continue; 936 937 /* 938 * If this lock doesn't undergo witness checking, 939 * then skip it. 940 */ 941 w1 = lock1->li_lock->lo_witness; 942 if (w1 == NULL) { 943 KASSERTMSG((lock1->li_lock->lo_flags & 944 LO_WITNESS) == 0, 945 "lock missing witness structure"); 946 continue; 947 } 948 949 /* 950 * If we are locking Giant and this is a sleepable 951 * lock, then skip it. 952 */ 953 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 && 954 is_kernel_lock(lock)) 955 continue; 956 957 /* 958 * If we are locking a sleepable lock and this lock 959 * is Giant, then skip it. 960 */ 961 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 962 is_kernel_lock(lock1->li_lock)) 963 continue; 964 965 /* 966 * If we are locking a sleepable lock and this lock 967 * isn't sleepable, we want to treat it as a lock 968 * order violation to enfore a general lock order of 969 * sleepable locks before non-sleepable locks. 970 */ 971 if (((lock->lo_flags & LO_SLEEPABLE) != 0 && 972 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 973 goto reversal; 974 975 /* 976 * If we are locking Giant and this is a non-sleepable 977 * lock, then treat it as a reversal. 978 */ 979 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 && 980 is_kernel_lock(lock)) 981 goto reversal; 982 983 /* 984 * Check the lock order hierarchy for a reveresal. 985 */ 986 if (!isitmydescendant(w, w1)) 987 continue; 988 reversal: 989 990 /* 991 * We have a lock order violation, check to see if it 992 * is allowed or has already been yelled about. 993 */ 994 995 /* Bail if this violation is known */ 996 if (w_rmatrix[w1->w_index][w->w_index] & WITNESS_REVERSAL) 997 goto out; 998 999 /* Record this as a violation */ 1000 w_rmatrix[w1->w_index][w->w_index] |= WITNESS_REVERSAL; 1001 w_rmatrix[w->w_index][w1->w_index] |= WITNESS_REVERSAL; 1002 w->w_reversed = w1->w_reversed = 1; 1003 witness_increment_graph_generation(); 1004 mtx_leave(&w_mtx); 1005 1006 /* 1007 * There are known LORs between VNODE locks. They are 1008 * not an indication of a bug. VNODE locks are flagged 1009 * as such (LO_IS_VNODE) and we don't yell if the LOR 1010 * is between 2 VNODE locks. 1011 */ 1012 if ((lock->lo_flags & LO_IS_VNODE) != 0 && 1013 (lock1->li_lock->lo_flags & LO_IS_VNODE) != 0) 1014 goto out_splx; 1015 1016 /* 1017 * Ok, yell about it. 1018 */ 1019 printf("witness: "); 1020 if (((lock->lo_flags & LO_SLEEPABLE) != 0 && 1021 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 1022 printf("lock order reversal: " 1023 "(sleepable after non-sleepable)\n"); 1024 else if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 1025 && is_kernel_lock(lock)) 1026 printf("lock order reversal: " 1027 "(Giant after non-sleepable)\n"); 1028 else 1029 printf("lock order reversal:\n"); 1030 1031 /* 1032 * Try to locate an earlier lock with 1033 * witness w in our list. 1034 */ 1035 do { 1036 lock2 = &lle->ll_children[i]; 1037 KASSERT(lock2->li_lock != NULL); 1038 if (lock2->li_lock->lo_witness == w) 1039 break; 1040 if (i == 0 && lle->ll_next != NULL) { 1041 lle = lle->ll_next; 1042 i = lle->ll_count - 1; 1043 KASSERT(i >= 0 && i < LOCK_NCHILDREN); 1044 } else 1045 i--; 1046 } while (i >= 0); 1047 if (i < 0) { 1048 printf(" 1st %p %s (%s)\n", 1049 lock1->li_lock, lock1->li_lock->lo_name, 1050 w1->w_type->lt_name); 1051 printf(" 2nd %p %s (%s)\n", 1052 lock, lock->lo_name, w->w_type->lt_name); 1053 } else { 1054 printf(" 1st %p %s (%s)\n", 1055 lock2->li_lock, lock2->li_lock->lo_name, 1056 lock2->li_lock->lo_witness->w_type-> 1057 lt_name); 1058 printf(" 2nd %p %s (%s)\n", 1059 lock1->li_lock, lock1->li_lock->lo_name, 1060 w1->w_type->lt_name); 1061 printf(" 3rd %p %s (%s)\n", lock, 1062 lock->lo_name, w->w_type->lt_name); 1063 } 1064 if (witness_watch > 1) { 1065 struct witness_lock_order_data *wlod1, *wlod2; 1066 1067 mtx_enter(&w_mtx); 1068 wlod1 = witness_lock_order_get(w, w1); 1069 wlod2 = witness_lock_order_get(w1, w); 1070 mtx_leave(&w_mtx); 1071 1072 /* 1073 * It is safe to access saved stack traces, 1074 * w_type, and w_class without the lock. 1075 * Once written, they never change. 1076 */ 1077 1078 if (wlod1 != NULL) { 1079 printf("lock order \"%s\"(%s) -> " 1080 "\"%s\"(%s) first seen at:\n", 1081 w->w_type->lt_name, 1082 w->w_class->lc_name, 1083 w1->w_type->lt_name, 1084 w1->w_class->lc_name); 1085 stacktrace_print( 1086 &wlod1->wlod_stack, printf); 1087 } else { 1088 printf("lock order data " 1089 "w2 -> w1 missing\n"); 1090 } 1091 if (wlod2 != NULL) { 1092 printf("lock order \"%s\"(%s) -> " 1093 "\"%s\"(%s) first seen at:\n", 1094 w1->w_type->lt_name, 1095 w1->w_class->lc_name, 1096 w->w_type->lt_name, 1097 w->w_class->lc_name); 1098 stacktrace_print( 1099 &wlod2->wlod_stack, printf); 1100 } else { 1101 printf("lock order data " 1102 "w1 -> w2 missing\n"); 1103 } 1104 } 1105 witness_debugger(0); 1106 goto out_splx; 1107 } 1108 } 1109 1110 /* 1111 * If requested, build a new lock order. However, don't build a new 1112 * relationship between a sleepable lock and Giant if it is in the 1113 * wrong direction. The correct lock order is that sleepable locks 1114 * always come before Giant. 1115 */ 1116 if (flags & LOP_NEWORDER && 1117 !(is_kernel_lock(plock->li_lock) && 1118 (lock->lo_flags & LO_SLEEPABLE) != 0)) 1119 itismychild(plock->li_lock->lo_witness, w); 1120 out: 1121 mtx_leave(&w_mtx); 1122 out_splx: 1123 splx(s); 1124 } 1125 1126 void 1127 witness_lock(struct lock_object *lock, int flags) 1128 { 1129 struct lock_list_entry **lock_list, *lle; 1130 struct lock_instance *instance; 1131 struct proc *p; 1132 struct witness *w; 1133 int s; 1134 1135 if (witness_cold || witness_watch < 0 || panicstr != NULL || 1136 db_active || (lock->lo_flags & LO_WITNESS) == 0) 1137 return; 1138 1139 w = lock->lo_witness; 1140 if (w == NULL) 1141 w = lock->lo_witness = 1142 enroll(lock->lo_type, lock->lo_name, LOCK_CLASS(lock)); 1143 1144 p = curproc; 1145 1146 /* Determine lock list for this lock. */ 1147 if (LOCK_CLASS(lock)->lc_flags & LC_SLEEPLOCK) 1148 lock_list = &p->p_sleeplocks; 1149 else 1150 lock_list = &witness_cpu[cpu_number()].wc_spinlocks; 1151 1152 s = splhigh(); 1153 1154 /* Check to see if we are recursing on a lock we already own. */ 1155 instance = find_instance(*lock_list, lock); 1156 if (instance != NULL) { 1157 instance->li_flags++; 1158 goto out; 1159 } 1160 1161 w->w_acquired = 1; 1162 1163 /* Find the next open lock instance in the list and fill it. */ 1164 lle = *lock_list; 1165 if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) { 1166 lle = witness_lock_list_get(); 1167 if (lle == NULL) 1168 goto out; 1169 lle->ll_next = *lock_list; 1170 *lock_list = lle; 1171 } 1172 instance = &lle->ll_children[lle->ll_count++]; 1173 instance->li_lock = lock; 1174 if ((flags & LOP_EXCLUSIVE) != 0) 1175 instance->li_flags = LI_EXCLUSIVE; 1176 else 1177 instance->li_flags = 0; 1178 instance->li_stack = NULL; 1179 if (witness_locktrace) { 1180 instance->li_stack = witness_lock_stack_get(); 1181 if (instance->li_stack != NULL) 1182 stacktrace_save(&instance->li_stack->ls_stack); 1183 } 1184 out: 1185 splx(s); 1186 } 1187 1188 void 1189 witness_upgrade(struct lock_object *lock, int flags) 1190 { 1191 struct lock_instance *instance; 1192 struct lock_class *class; 1193 int s; 1194 1195 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 1196 if (lock->lo_witness == NULL || witness_watch < 0 || 1197 panicstr != NULL || db_active) 1198 return; 1199 class = LOCK_CLASS(lock); 1200 if (witness_watch) { 1201 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1202 panic("upgrade of non-upgradable lock (%s) %s", 1203 class->lc_name, lock->lo_name); 1204 if ((class->lc_flags & LC_SLEEPLOCK) == 0) 1205 panic("upgrade of non-sleep lock (%s) %s", 1206 class->lc_name, lock->lo_name); 1207 } 1208 s = splhigh(); 1209 instance = find_instance(curproc->p_sleeplocks, lock); 1210 if (instance == NULL) { 1211 panic("upgrade of unlocked lock (%s) %s", 1212 class->lc_name, lock->lo_name); 1213 goto out; 1214 } 1215 if (witness_watch) { 1216 if ((instance->li_flags & LI_EXCLUSIVE) != 0) 1217 panic("upgrade of exclusive lock (%s) %s", 1218 class->lc_name, lock->lo_name); 1219 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1220 panic("upgrade of recursed lock (%s) %s r=%d", 1221 class->lc_name, lock->lo_name, 1222 instance->li_flags & LI_RECURSEMASK); 1223 } 1224 instance->li_flags |= LI_EXCLUSIVE; 1225 out: 1226 splx(s); 1227 } 1228 1229 void 1230 witness_downgrade(struct lock_object *lock, int flags) 1231 { 1232 struct lock_instance *instance; 1233 struct lock_class *class; 1234 int s; 1235 1236 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 1237 if (lock->lo_witness == NULL || witness_watch < 0 || 1238 panicstr != NULL || db_active) 1239 return; 1240 class = LOCK_CLASS(lock); 1241 if (witness_watch) { 1242 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1243 panic( 1244 "downgrade of non-upgradable lock (%s) %s", 1245 class->lc_name, lock->lo_name); 1246 if ((class->lc_flags & LC_SLEEPLOCK) == 0) 1247 panic("downgrade of non-sleep lock (%s) %s", 1248 class->lc_name, lock->lo_name); 1249 } 1250 s = splhigh(); 1251 instance = find_instance(curproc->p_sleeplocks, lock); 1252 if (instance == NULL) { 1253 panic("downgrade of unlocked lock (%s) %s", 1254 class->lc_name, lock->lo_name); 1255 goto out; 1256 } 1257 if (witness_watch) { 1258 if ((instance->li_flags & LI_EXCLUSIVE) == 0) 1259 panic("downgrade of shared lock (%s) %s", 1260 class->lc_name, lock->lo_name); 1261 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1262 panic("downgrade of recursed lock (%s) %s r=%d", 1263 class->lc_name, lock->lo_name, 1264 instance->li_flags & LI_RECURSEMASK); 1265 } 1266 instance->li_flags &= ~LI_EXCLUSIVE; 1267 out: 1268 splx(s); 1269 } 1270 1271 void 1272 witness_unlock(struct lock_object *lock, int flags) 1273 { 1274 struct lock_list_entry **lock_list, *lle; 1275 struct lock_instance *instance; 1276 struct lock_class *class; 1277 struct proc *p; 1278 int i, j; 1279 int s; 1280 1281 if (witness_cold || lock->lo_witness == NULL || 1282 panicstr != NULL || db_active) 1283 return; 1284 p = curproc; 1285 class = LOCK_CLASS(lock); 1286 1287 /* Find lock instance associated with this lock. */ 1288 if (class->lc_flags & LC_SLEEPLOCK) 1289 lock_list = &p->p_sleeplocks; 1290 else 1291 lock_list = &witness_cpu[cpu_number()].wc_spinlocks; 1292 1293 s = splhigh(); 1294 1295 lle = *lock_list; 1296 for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next) 1297 for (i = 0; i < (*lock_list)->ll_count; i++) { 1298 instance = &(*lock_list)->ll_children[i]; 1299 if (instance->li_lock == lock) 1300 goto found; 1301 } 1302 1303 /* 1304 * When disabling WITNESS through witness_watch we could end up in 1305 * having registered locks in the p_sleeplocks queue. 1306 * We have to make sure we flush these queues, so just search for 1307 * eventual register locks and remove them. 1308 */ 1309 if (witness_watch > 0) { 1310 panic("lock (%s) %s not locked", class->lc_name, lock->lo_name); 1311 } 1312 goto out; 1313 1314 found: 1315 1316 /* First, check for shared/exclusive mismatches. */ 1317 if ((instance->li_flags & LI_EXCLUSIVE) != 0 && witness_watch > 0 && 1318 (flags & LOP_EXCLUSIVE) == 0) { 1319 printf("witness: shared unlock of (%s) %s " 1320 "while exclusively locked\n", 1321 class->lc_name, lock->lo_name); 1322 panic("excl->ushare"); 1323 } 1324 if ((instance->li_flags & LI_EXCLUSIVE) == 0 && witness_watch > 0 && 1325 (flags & LOP_EXCLUSIVE) != 0) { 1326 printf("witness: exclusive unlock of (%s) %s " 1327 "while share locked\n", class->lc_name, lock->lo_name); 1328 panic("share->uexcl"); 1329 } 1330 /* If we are recursed, unrecurse. */ 1331 if ((instance->li_flags & LI_RECURSEMASK) > 0) { 1332 instance->li_flags--; 1333 goto out; 1334 } 1335 /* The lock is now being dropped, check for NORELEASE flag */ 1336 if ((instance->li_flags & LI_NORELEASE) != 0 && witness_watch > 0) { 1337 printf("witness: forbidden unlock of (%s) %s\n", 1338 class->lc_name, lock->lo_name); 1339 panic("lock marked norelease"); 1340 } 1341 1342 /* Release the stack buffer, if any. */ 1343 if (instance->li_stack != NULL) { 1344 witness_lock_stack_free(instance->li_stack); 1345 instance->li_stack = NULL; 1346 } 1347 1348 /* Remove this item from the list. */ 1349 for (j = i; j < (*lock_list)->ll_count - 1; j++) 1350 (*lock_list)->ll_children[j] = 1351 (*lock_list)->ll_children[j + 1]; 1352 (*lock_list)->ll_count--; 1353 1354 /* 1355 * In order to reduce contention on w_mtx, we want to keep always an 1356 * head object into lists so that frequent allocation from the 1357 * free witness pool (and subsequent locking) is avoided. 1358 * In order to maintain the current code simple, when the head 1359 * object is totally unloaded it means also that we do not have 1360 * further objects in the list, so the list ownership needs to be 1361 * hand over to another object if the current head needs to be freed. 1362 */ 1363 if ((*lock_list)->ll_count == 0) { 1364 if (*lock_list == lle) { 1365 if (lle->ll_next == NULL) 1366 goto out; 1367 } else 1368 lle = *lock_list; 1369 *lock_list = lle->ll_next; 1370 witness_lock_list_free(lle); 1371 } 1372 out: 1373 splx(s); 1374 } 1375 1376 void 1377 witness_thread_exit(struct proc *p) 1378 { 1379 struct lock_list_entry *lle; 1380 int i, n, s; 1381 1382 lle = p->p_sleeplocks; 1383 if (lle == NULL || panicstr != NULL || db_active) 1384 return; 1385 if (lle->ll_count != 0) { 1386 for (n = 0; lle != NULL; lle = lle->ll_next) 1387 for (i = lle->ll_count - 1; i >= 0; i--) { 1388 if (n == 0) 1389 printf("witness: thread %p exiting " 1390 "with the following locks held:\n", 1391 p); 1392 n++; 1393 witness_list_lock(&lle->ll_children[i], 1394 printf); 1395 } 1396 panic("thread %p cannot exit while holding sleeplocks", p); 1397 } 1398 KASSERT(lle->ll_next == NULL); 1399 s = splhigh(); 1400 witness_lock_list_free(lle); 1401 splx(s); 1402 } 1403 1404 /* 1405 * Warn if any locks other than 'lock' are held. Flags can be passed in to 1406 * exempt Giant and sleepable locks from the checks as well. If any 1407 * non-exempt locks are held, then a supplied message is printed to the 1408 * output channel along with a list of the offending locks. If indicated in the 1409 * flags then a failure results in a panic as well. 1410 */ 1411 int 1412 witness_warn(int flags, struct lock_object *lock, const char *fmt, ...) 1413 { 1414 struct lock_list_entry *lock_list, *lle; 1415 struct lock_instance *lock1; 1416 struct proc *p; 1417 va_list ap; 1418 int i, n; 1419 1420 if (witness_cold || witness_watch < 1 || panicstr != NULL || db_active) 1421 return (0); 1422 n = 0; 1423 p = curproc; 1424 for (lle = p->p_sleeplocks; lle != NULL; lle = lle->ll_next) 1425 for (i = lle->ll_count - 1; i >= 0; i--) { 1426 lock1 = &lle->ll_children[i]; 1427 if (lock1->li_lock == lock) 1428 continue; 1429 if (flags & WARN_KERNELOK && 1430 is_kernel_lock(lock1->li_lock)) 1431 continue; 1432 if (flags & WARN_SLEEPOK && 1433 (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0) 1434 continue; 1435 if (n == 0) { 1436 printf("witness: "); 1437 va_start(ap, fmt); 1438 vprintf(fmt, ap); 1439 va_end(ap); 1440 printf(" with the following %slocks held:\n", 1441 (flags & WARN_SLEEPOK) != 0 ? 1442 "non-sleepable " : ""); 1443 } 1444 n++; 1445 witness_list_lock(lock1, printf); 1446 } 1447 1448 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 1449 if (lock_list != NULL && lock_list->ll_count != 0) { 1450 /* 1451 * We should only have one spinlock and as long as 1452 * the flags cannot match for this locks class, 1453 * check if the first spinlock is the one curproc 1454 * should hold. 1455 */ 1456 lock1 = &lock_list->ll_children[lock_list->ll_count - 1]; 1457 if (lock_list->ll_count == 1 && lock_list->ll_next == NULL && 1458 lock1->li_lock == lock && n == 0) 1459 return (0); 1460 1461 printf("witness: "); 1462 va_start(ap, fmt); 1463 vprintf(fmt, ap); 1464 va_end(ap); 1465 printf(" with the following %slocks held:\n", 1466 (flags & WARN_SLEEPOK) != 0 ? "non-sleepable " : ""); 1467 n += witness_list_locks(&lock_list, printf); 1468 } 1469 if (n > 0) { 1470 if (flags & WARN_PANIC) 1471 panic("%s", __func__); 1472 else 1473 witness_debugger(1); 1474 } 1475 return (n); 1476 } 1477 1478 static struct witness * 1479 enroll(const struct lock_type *type, const char *subtype, 1480 struct lock_class *lock_class) 1481 { 1482 struct witness *w; 1483 struct witness_list *typelist; 1484 1485 KASSERT(type != NULL); 1486 1487 if (witness_watch < 0 || panicstr != NULL || db_active) 1488 return (NULL); 1489 if ((lock_class->lc_flags & LC_SPINLOCK)) { 1490 typelist = &w_spin; 1491 } else if ((lock_class->lc_flags & LC_SLEEPLOCK)) { 1492 typelist = &w_sleep; 1493 } else { 1494 panic("lock class %s is not sleep or spin", 1495 lock_class->lc_name); 1496 return (NULL); 1497 } 1498 1499 mtx_enter(&w_mtx); 1500 w = witness_hash_get(type, subtype); 1501 if (w) 1502 goto found; 1503 if ((w = witness_get()) == NULL) 1504 return (NULL); 1505 w->w_type = type; 1506 w->w_subtype = subtype; 1507 w->w_class = lock_class; 1508 SLIST_INSERT_HEAD(&w_all, w, w_list); 1509 if (lock_class->lc_flags & LC_SPINLOCK) { 1510 SLIST_INSERT_HEAD(&w_spin, w, w_typelist); 1511 w_spin_cnt++; 1512 } else if (lock_class->lc_flags & LC_SLEEPLOCK) { 1513 SLIST_INSERT_HEAD(&w_sleep, w, w_typelist); 1514 w_sleep_cnt++; 1515 } 1516 1517 /* Insert new witness into the hash */ 1518 witness_hash_put(w); 1519 witness_increment_graph_generation(); 1520 mtx_leave(&w_mtx); 1521 return (w); 1522 found: 1523 mtx_leave(&w_mtx); 1524 if (lock_class != w->w_class) 1525 panic("lock (%s) %s does not match earlier (%s) lock", 1526 type->lt_name, lock_class->lc_name, w->w_class->lc_name); 1527 return (w); 1528 } 1529 1530 static void 1531 adopt(struct witness *parent, struct witness *child) 1532 { 1533 int pi, ci, i, j; 1534 1535 if (witness_cold == 0) 1536 MUTEX_ASSERT_LOCKED(&w_mtx); 1537 1538 /* If the relationship is already known, there's no work to be done. */ 1539 if (isitmychild(parent, child)) 1540 return; 1541 1542 /* When the structure of the graph changes, bump up the generation. */ 1543 witness_increment_graph_generation(); 1544 1545 /* 1546 * The hard part ... create the direct relationship, then propagate all 1547 * indirect relationships. 1548 */ 1549 pi = parent->w_index; 1550 ci = child->w_index; 1551 WITNESS_INDEX_ASSERT(pi); 1552 WITNESS_INDEX_ASSERT(ci); 1553 KASSERT(pi != ci); 1554 w_rmatrix[pi][ci] |= WITNESS_PARENT; 1555 w_rmatrix[ci][pi] |= WITNESS_CHILD; 1556 1557 /* 1558 * If parent was not already an ancestor of child, 1559 * then we increment the descendant and ancestor counters. 1560 */ 1561 if ((w_rmatrix[pi][ci] & WITNESS_ANCESTOR) == 0) { 1562 parent->w_num_descendants++; 1563 child->w_num_ancestors++; 1564 } 1565 1566 /* 1567 * Find each ancestor of 'pi'. Note that 'pi' itself is counted as 1568 * an ancestor of 'pi' during this loop. 1569 */ 1570 for (i = 1; i <= w_max_used_index; i++) { 1571 if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 && 1572 (i != pi)) 1573 continue; 1574 1575 /* Find each descendant of 'i' and mark it as a descendant. */ 1576 for (j = 1; j <= w_max_used_index; j++) { 1577 1578 /* 1579 * Skip children that are already marked as 1580 * descendants of 'i'. 1581 */ 1582 if (w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) 1583 continue; 1584 1585 /* 1586 * We are only interested in descendants of 'ci'. Note 1587 * that 'ci' itself is counted as a descendant of 'ci'. 1588 */ 1589 if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 && 1590 (j != ci)) 1591 continue; 1592 w_rmatrix[i][j] |= WITNESS_ANCESTOR; 1593 w_rmatrix[j][i] |= WITNESS_DESCENDANT; 1594 w_data[i].w_num_descendants++; 1595 w_data[j].w_num_ancestors++; 1596 1597 /* 1598 * Make sure we aren't marking a node as both an 1599 * ancestor and descendant. We should have caught 1600 * this as a lock order reversal earlier. 1601 */ 1602 if ((w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) && 1603 (w_rmatrix[i][j] & WITNESS_DESCENDANT_MASK)) { 1604 printf("witness: rmatrix paradox! [%d][%d]=%d " 1605 "both ancestor and descendant\n", 1606 i, j, w_rmatrix[i][j]); 1607 #ifdef DDB 1608 db_stack_dump(); 1609 #endif 1610 printf("witness disabled\n"); 1611 witness_watch = -1; 1612 } 1613 if ((w_rmatrix[j][i] & WITNESS_ANCESTOR_MASK) && 1614 (w_rmatrix[j][i] & WITNESS_DESCENDANT_MASK)) { 1615 printf("witness: rmatrix paradox! [%d][%d]=%d " 1616 "both ancestor and descendant\n", 1617 j, i, w_rmatrix[j][i]); 1618 #ifdef DDB 1619 db_stack_dump(); 1620 #endif 1621 printf("witness disabled\n"); 1622 witness_watch = -1; 1623 } 1624 } 1625 } 1626 } 1627 1628 static void 1629 itismychild(struct witness *parent, struct witness *child) 1630 { 1631 KASSERT(child != NULL && parent != NULL); 1632 if (witness_cold == 0) 1633 MUTEX_ASSERT_LOCKED(&w_mtx); 1634 1635 if (!witness_lock_type_equal(parent, child)) { 1636 if (witness_cold == 0) 1637 mtx_leave(&w_mtx); 1638 panic( 1639 "%s: parent \"%s\" (%s) and child \"%s\" (%s) are not " 1640 "the same lock type", __func__, parent->w_type->lt_name, 1641 parent->w_class->lc_name, child->w_type->lt_name, 1642 child->w_class->lc_name); 1643 } 1644 adopt(parent, child); 1645 } 1646 1647 /* 1648 * Generic code for the isitmy*() functions. The rmask parameter is the 1649 * expected relationship of w1 to w2. 1650 */ 1651 static int 1652 _isitmyx(struct witness *w1, struct witness *w2, int rmask, const char *fname) 1653 { 1654 unsigned char r1, r2; 1655 int i1, i2; 1656 1657 i1 = w1->w_index; 1658 i2 = w2->w_index; 1659 WITNESS_INDEX_ASSERT(i1); 1660 WITNESS_INDEX_ASSERT(i2); 1661 r1 = w_rmatrix[i1][i2] & WITNESS_RELATED_MASK; 1662 r2 = w_rmatrix[i2][i1] & WITNESS_RELATED_MASK; 1663 1664 /* The flags on one better be the inverse of the flags on the other */ 1665 if (!((WITNESS_ATOD(r1) == r2 && WITNESS_DTOA(r2) == r1) || 1666 (WITNESS_DTOA(r1) == r2 && WITNESS_ATOD(r2) == r1))) { 1667 /* Don't squawk if we're potentially racing with an update. */ 1668 if (w_mtx.mtx_owner != curcpu()) 1669 return (0); 1670 printf("witness: %s: rmatrix mismatch between %s (index %d) " 1671 "and %s (index %d): w_rmatrix[%d][%d] == %x but " 1672 "w_rmatrix[%d][%d] == %x\n", 1673 fname, w1->w_type->lt_name, i1, w2->w_type->lt_name, 1674 i2, i1, i2, r1, 1675 i2, i1, r2); 1676 #ifdef DDB 1677 db_stack_dump(); 1678 #endif 1679 printf("witness disabled\n"); 1680 witness_watch = -1; 1681 } 1682 return (r1 & rmask); 1683 } 1684 1685 /* 1686 * Checks if @child is a direct child of @parent. 1687 */ 1688 static int 1689 isitmychild(struct witness *parent, struct witness *child) 1690 { 1691 1692 return (_isitmyx(parent, child, WITNESS_PARENT, __func__)); 1693 } 1694 1695 /* 1696 * Checks if @descendant is a direct or indirect descendant of @ancestor. 1697 */ 1698 static int 1699 isitmydescendant(struct witness *ancestor, struct witness *descendant) 1700 { 1701 1702 return (_isitmyx(ancestor, descendant, WITNESS_ANCESTOR_MASK, 1703 __func__)); 1704 } 1705 1706 static struct witness * 1707 witness_get(void) 1708 { 1709 struct witness *w; 1710 int index; 1711 1712 if (witness_cold == 0) 1713 MUTEX_ASSERT_LOCKED(&w_mtx); 1714 1715 if (witness_watch < 0) { 1716 mtx_leave(&w_mtx); 1717 return (NULL); 1718 } 1719 if (SLIST_EMPTY(&w_free)) { 1720 witness_watch = -1; 1721 mtx_leave(&w_mtx); 1722 printf("WITNESS: unable to allocate a new witness object\n"); 1723 return (NULL); 1724 } 1725 w = SLIST_FIRST(&w_free); 1726 SLIST_REMOVE_HEAD(&w_free, w_list); 1727 w_free_cnt--; 1728 index = w->w_index; 1729 KASSERT(index > 0 && index == w_max_used_index + 1 && 1730 index < witness_count); 1731 memset(w, 0, sizeof(*w)); 1732 w->w_index = index; 1733 if (index > w_max_used_index) 1734 w_max_used_index = index; 1735 return (w); 1736 } 1737 1738 static void 1739 witness_free(struct witness *w) 1740 { 1741 SLIST_INSERT_HEAD(&w_free, w, w_list); 1742 w_free_cnt++; 1743 } 1744 1745 static struct lock_list_entry * 1746 witness_lock_list_get(void) 1747 { 1748 struct lock_list_entry *lle; 1749 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1750 1751 if (witness_watch < 0) 1752 return (NULL); 1753 1754 splassert(IPL_HIGH); 1755 1756 if (wcpu->wc_lle_count > 0) { 1757 lle = wcpu->wc_lle_cache; 1758 wcpu->wc_lle_cache = lle->ll_next; 1759 wcpu->wc_lle_count--; 1760 memset(lle, 0, sizeof(*lle)); 1761 return (lle); 1762 } 1763 1764 mtx_enter(&w_mtx); 1765 lle = w_lock_list_free; 1766 if (lle == NULL) { 1767 witness_watch = -1; 1768 mtx_leave(&w_mtx); 1769 printf("%s: witness exhausted\n", __func__); 1770 return (NULL); 1771 } 1772 w_lock_list_free = lle->ll_next; 1773 mtx_leave(&w_mtx); 1774 memset(lle, 0, sizeof(*lle)); 1775 return (lle); 1776 } 1777 1778 static void 1779 witness_lock_list_free(struct lock_list_entry *lle) 1780 { 1781 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1782 1783 splassert(IPL_HIGH); 1784 1785 if (wcpu->wc_lle_count < WITNESS_LLE_CACHE_MAX) { 1786 lle->ll_next = wcpu->wc_lle_cache; 1787 wcpu->wc_lle_cache = lle; 1788 wcpu->wc_lle_count++; 1789 return; 1790 } 1791 1792 mtx_enter(&w_mtx); 1793 lle->ll_next = w_lock_list_free; 1794 w_lock_list_free = lle; 1795 mtx_leave(&w_mtx); 1796 } 1797 1798 static union lock_stack * 1799 witness_lock_stack_get(void) 1800 { 1801 union lock_stack *stack = NULL; 1802 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1803 1804 splassert(IPL_HIGH); 1805 1806 if (wcpu->wc_stk_count > 0) { 1807 stack = wcpu->wc_stk_cache; 1808 wcpu->wc_stk_cache = stack->ls_next; 1809 wcpu->wc_stk_count--; 1810 return (stack); 1811 } 1812 1813 mtx_enter(&w_mtx); 1814 if (w_lock_stack_free != NULL) { 1815 stack = w_lock_stack_free; 1816 w_lock_stack_free = stack->ls_next; 1817 } 1818 mtx_leave(&w_mtx); 1819 return (stack); 1820 } 1821 1822 static void 1823 witness_lock_stack_free(union lock_stack *stack) 1824 { 1825 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1826 1827 splassert(IPL_HIGH); 1828 1829 if (wcpu->wc_stk_count < WITNESS_STK_CACHE_MAX) { 1830 stack->ls_next = wcpu->wc_stk_cache; 1831 wcpu->wc_stk_cache = stack; 1832 wcpu->wc_stk_count++; 1833 return; 1834 } 1835 1836 mtx_enter(&w_mtx); 1837 stack->ls_next = w_lock_stack_free; 1838 w_lock_stack_free = stack; 1839 mtx_leave(&w_mtx); 1840 } 1841 1842 static struct lock_instance * 1843 find_instance(struct lock_list_entry *list, const struct lock_object *lock) 1844 { 1845 struct lock_list_entry *lle; 1846 struct lock_instance *instance; 1847 int i; 1848 1849 for (lle = list; lle != NULL; lle = lle->ll_next) { 1850 for (i = lle->ll_count - 1; i >= 0; i--) { 1851 instance = &lle->ll_children[i]; 1852 if (instance->li_lock == lock) 1853 return (instance); 1854 } 1855 } 1856 return (NULL); 1857 } 1858 1859 static void 1860 witness_list_lock(struct lock_instance *instance, 1861 int (*prnt)(const char *fmt, ...)) 1862 { 1863 struct lock_object *lock; 1864 1865 lock = instance->li_lock; 1866 prnt("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ? 1867 "exclusive" : "shared", LOCK_CLASS(lock)->lc_name, lock->lo_name); 1868 prnt(" r = %d (%p)\n", instance->li_flags & LI_RECURSEMASK, lock); 1869 if (instance->li_stack != NULL) 1870 stacktrace_print(&instance->li_stack->ls_stack, prnt); 1871 } 1872 1873 #ifdef DDB 1874 static int 1875 witness_thread_has_locks(struct proc *p) 1876 { 1877 1878 if (p->p_sleeplocks == NULL) 1879 return (0); 1880 return (p->p_sleeplocks->ll_count != 0); 1881 } 1882 1883 static int 1884 witness_process_has_locks(struct process *pr) 1885 { 1886 struct proc *p; 1887 1888 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { 1889 if (witness_thread_has_locks(p)) 1890 return (1); 1891 } 1892 return (0); 1893 } 1894 #endif 1895 1896 int 1897 witness_list_locks(struct lock_list_entry **lock_list, 1898 int (*prnt)(const char *fmt, ...)) 1899 { 1900 struct lock_list_entry *lle; 1901 int i, nheld; 1902 1903 nheld = 0; 1904 for (lle = *lock_list; lle != NULL; lle = lle->ll_next) 1905 for (i = lle->ll_count - 1; i >= 0; i--) { 1906 witness_list_lock(&lle->ll_children[i], prnt); 1907 nheld++; 1908 } 1909 return (nheld); 1910 } 1911 1912 /* 1913 * This is a bit risky at best. We call this function when we have timed 1914 * out acquiring a spin lock, and we assume that the other CPU is stuck 1915 * with this lock held. So, we go groveling around in the other CPU's 1916 * per-cpu data to try to find the lock instance for this spin lock to 1917 * see when it was last acquired. 1918 */ 1919 void 1920 witness_display_spinlock(struct lock_object *lock, struct proc *owner, 1921 int (*prnt)(const char *fmt, ...)) 1922 { 1923 struct lock_instance *instance; 1924 1925 if (owner->p_stat != SONPROC) 1926 return; 1927 instance = find_instance( 1928 witness_cpu[owner->p_cpu->ci_cpuid].wc_spinlocks, lock); 1929 if (instance != NULL) 1930 witness_list_lock(instance, prnt); 1931 } 1932 1933 void 1934 witness_assert(const struct lock_object *lock, int flags) 1935 { 1936 struct lock_instance *instance; 1937 struct lock_class *class; 1938 1939 if (lock->lo_witness == NULL || witness_watch < 1 || 1940 panicstr != NULL || db_active) 1941 return; 1942 class = LOCK_CLASS(lock); 1943 if ((class->lc_flags & LC_SLEEPLOCK) != 0) 1944 instance = find_instance(curproc->p_sleeplocks, lock); 1945 else if ((class->lc_flags & LC_SPINLOCK) != 0) 1946 instance = find_instance( 1947 witness_cpu[cpu_number()].wc_spinlocks, lock); 1948 else { 1949 panic("lock (%s) %s is not sleep or spin!", 1950 class->lc_name, lock->lo_name); 1951 return; 1952 } 1953 switch (flags) { 1954 case LA_UNLOCKED: 1955 if (instance != NULL) 1956 panic("lock (%s) %s locked", 1957 class->lc_name, lock->lo_name); 1958 break; 1959 case LA_LOCKED: 1960 case LA_LOCKED | LA_RECURSED: 1961 case LA_LOCKED | LA_NOTRECURSED: 1962 case LA_SLOCKED: 1963 case LA_SLOCKED | LA_RECURSED: 1964 case LA_SLOCKED | LA_NOTRECURSED: 1965 case LA_XLOCKED: 1966 case LA_XLOCKED | LA_RECURSED: 1967 case LA_XLOCKED | LA_NOTRECURSED: 1968 if (instance == NULL) { 1969 panic("lock (%s) %s not locked", 1970 class->lc_name, lock->lo_name); 1971 break; 1972 } 1973 if ((flags & LA_XLOCKED) != 0 && 1974 (instance->li_flags & LI_EXCLUSIVE) == 0) 1975 panic( 1976 "lock (%s) %s not exclusively locked", 1977 class->lc_name, lock->lo_name); 1978 if ((flags & LA_SLOCKED) != 0 && 1979 (instance->li_flags & LI_EXCLUSIVE) != 0) 1980 panic( 1981 "lock (%s) %s exclusively locked", 1982 class->lc_name, lock->lo_name); 1983 if ((flags & LA_RECURSED) != 0 && 1984 (instance->li_flags & LI_RECURSEMASK) == 0) 1985 panic("lock (%s) %s not recursed", 1986 class->lc_name, lock->lo_name); 1987 if ((flags & LA_NOTRECURSED) != 0 && 1988 (instance->li_flags & LI_RECURSEMASK) != 0) 1989 panic("lock (%s) %s recursed", 1990 class->lc_name, lock->lo_name); 1991 break; 1992 default: 1993 panic("invalid lock assertion"); 1994 1995 } 1996 } 1997 1998 static void 1999 witness_setflag(struct lock_object *lock, int flag, int set) 2000 { 2001 struct lock_list_entry *lock_list; 2002 struct lock_instance *instance; 2003 struct lock_class *class; 2004 2005 if (lock->lo_witness == NULL || witness_watch < 0 || 2006 panicstr != NULL || db_active) 2007 return; 2008 class = LOCK_CLASS(lock); 2009 if (class->lc_flags & LC_SLEEPLOCK) 2010 lock_list = curproc->p_sleeplocks; 2011 else 2012 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 2013 instance = find_instance(lock_list, lock); 2014 if (instance == NULL) { 2015 panic("%s: lock (%s) %s not locked", __func__, 2016 class->lc_name, lock->lo_name); 2017 return; 2018 } 2019 2020 if (set) 2021 instance->li_flags |= flag; 2022 else 2023 instance->li_flags &= ~flag; 2024 } 2025 2026 void 2027 witness_norelease(struct lock_object *lock) 2028 { 2029 2030 witness_setflag(lock, LI_NORELEASE, 1); 2031 } 2032 2033 void 2034 witness_releaseok(struct lock_object *lock) 2035 { 2036 2037 witness_setflag(lock, LI_NORELEASE, 0); 2038 } 2039 2040 #ifdef DDB 2041 static void 2042 witness_ddb_list(struct proc *p) 2043 { 2044 struct witness_cpu *wc = &witness_cpu[cpu_number()]; 2045 2046 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 2047 KASSERTMSG(db_active, "%s: not in the debugger", __func__); 2048 2049 if (witness_watch < 1) 2050 return; 2051 2052 witness_list_locks(&p->p_sleeplocks, db_printf); 2053 2054 /* 2055 * We only handle spinlocks if td == curproc. This is somewhat broken 2056 * if td is currently executing on some other CPU and holds spin locks 2057 * as we won't display those locks. If we had a MI way of getting 2058 * the per-cpu data for a given cpu then we could use 2059 * td->td_oncpu to get the list of spinlocks for this thread 2060 * and "fix" this. 2061 * 2062 * That still wouldn't really fix this unless we locked the scheduler 2063 * lock or stopped the other CPU to make sure it wasn't changing the 2064 * list out from under us. It is probably best to just not try to 2065 * handle threads on other CPU's for now. 2066 */ 2067 if (p == curproc && wc->wc_spinlocks != NULL) 2068 witness_list_locks(&wc->wc_spinlocks, db_printf); 2069 } 2070 2071 void 2072 db_witness_list(db_expr_t addr, int have_addr, db_expr_t count, char *modif) 2073 { 2074 struct proc *p; 2075 2076 if (have_addr) 2077 p = (struct proc *)addr; 2078 else 2079 p = curproc; 2080 witness_ddb_list(p); 2081 } 2082 2083 void 2084 db_witness_list_all(db_expr_t addr, int have_addr, db_expr_t count, char *modif) 2085 { 2086 CPU_INFO_ITERATOR cii; 2087 struct cpu_info *ci; 2088 struct lock_list_entry *lock_list; 2089 struct process *pr; 2090 struct proc *p; 2091 2092 CPU_INFO_FOREACH(cii, ci) { 2093 lock_list = witness_cpu[CPU_INFO_UNIT(ci)].wc_spinlocks; 2094 if (lock_list == NULL || lock_list->ll_count == 0) 2095 continue; 2096 db_printf("CPU %d:\n", CPU_INFO_UNIT(ci)); 2097 witness_list_locks(&lock_list, db_printf); 2098 } 2099 2100 /* 2101 * It would be nice to list only threads and processes that actually 2102 * held sleep locks, but that information is currently not exported 2103 * by WITNESS. 2104 */ 2105 LIST_FOREACH(pr, &allprocess, ps_list) { 2106 if (!witness_process_has_locks(pr)) 2107 continue; 2108 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { 2109 if (!witness_thread_has_locks(p)) 2110 continue; 2111 db_printf("Process %d (%s) thread %p (%d)\n", 2112 pr->ps_pid, pr->ps_comm, p, p->p_tid); 2113 witness_ddb_list(p); 2114 } 2115 } 2116 } 2117 2118 void 2119 witness_print_badstacks(void) 2120 { 2121 static struct witness tmp_w1, tmp_w2; 2122 static struct witness_lock_order_data tmp_data1, tmp_data2; 2123 struct witness_lock_order_data *data1, *data2; 2124 struct witness *w1, *w2; 2125 int error, generation, i, j; 2126 2127 if (witness_watch < 1) { 2128 db_printf("witness watch is disabled\n"); 2129 return; 2130 } 2131 if (witness_cold) { 2132 db_printf("witness is cold\n"); 2133 return; 2134 } 2135 error = 0; 2136 2137 memset(&tmp_w1, 0, sizeof(tmp_w1)); 2138 memset(&tmp_w2, 0, sizeof(tmp_w2)); 2139 memset(&tmp_data1, 0, sizeof(tmp_data1)); 2140 memset(&tmp_data2, 0, sizeof(tmp_data2)); 2141 2142 restart: 2143 mtx_enter(&w_mtx); 2144 generation = w_generation; 2145 mtx_leave(&w_mtx); 2146 db_printf("Number of known direct relationships is %d\n", 2147 w_lohash.wloh_count); 2148 for (i = 1; i < w_max_used_index; i++) { 2149 mtx_enter(&w_mtx); 2150 if (generation != w_generation) { 2151 mtx_leave(&w_mtx); 2152 2153 /* The graph has changed, try again. */ 2154 db_printf("Lock graph changed, restarting trace.\n"); 2155 goto restart; 2156 } 2157 2158 w1 = &w_data[i]; 2159 if (w1->w_reversed == 0) { 2160 mtx_leave(&w_mtx); 2161 continue; 2162 } 2163 2164 /* Copy w1 locally so we can release the spin lock. */ 2165 tmp_w1 = *w1; 2166 mtx_leave(&w_mtx); 2167 2168 if (tmp_w1.w_reversed == 0) 2169 continue; 2170 for (j = 1; j < w_max_used_index; j++) { 2171 if ((w_rmatrix[i][j] & WITNESS_REVERSAL) == 0 || i > j) 2172 continue; 2173 2174 mtx_enter(&w_mtx); 2175 if (generation != w_generation) { 2176 mtx_leave(&w_mtx); 2177 2178 /* The graph has changed, try again. */ 2179 db_printf("Lock graph changed, " 2180 "restarting trace.\n"); 2181 goto restart; 2182 } 2183 2184 w2 = &w_data[j]; 2185 data1 = witness_lock_order_get(w1, w2); 2186 data2 = witness_lock_order_get(w2, w1); 2187 2188 /* 2189 * Copy information locally so we can release the 2190 * spin lock. 2191 */ 2192 tmp_w2 = *w2; 2193 2194 if (data1) 2195 tmp_data1.wlod_stack = data1->wlod_stack; 2196 if (data2 && data2 != data1) 2197 tmp_data2.wlod_stack = data2->wlod_stack; 2198 mtx_leave(&w_mtx); 2199 2200 db_printf("\nLock order reversal between \"%s\"(%s) " 2201 "and \"%s\"(%s)!\n", 2202 tmp_w1.w_type->lt_name, tmp_w1.w_class->lc_name, 2203 tmp_w2.w_type->lt_name, tmp_w2.w_class->lc_name); 2204 if (data1) { 2205 db_printf("Lock order \"%s\"(%s) -> \"%s\"(%s) " 2206 "first seen at:\n", 2207 tmp_w1.w_type->lt_name, 2208 tmp_w1.w_class->lc_name, 2209 tmp_w2.w_type->lt_name, 2210 tmp_w2.w_class->lc_name); 2211 stacktrace_print(&tmp_data1.wlod_stack, 2212 db_printf); 2213 db_printf("\n"); 2214 } 2215 if (data2 && data2 != data1) { 2216 db_printf("Lock order \"%s\"(%s) -> \"%s\"(%s) " 2217 "first seen at:\n", 2218 tmp_w2.w_type->lt_name, 2219 tmp_w2.w_class->lc_name, 2220 tmp_w1.w_type->lt_name, 2221 tmp_w1.w_class->lc_name); 2222 stacktrace_print(&tmp_data2.wlod_stack, 2223 db_printf); 2224 db_printf("\n"); 2225 } 2226 } 2227 } 2228 mtx_enter(&w_mtx); 2229 if (generation != w_generation) { 2230 mtx_leave(&w_mtx); 2231 2232 /* 2233 * The graph changed while we were printing stack data, 2234 * try again. 2235 */ 2236 db_printf("Lock graph changed, restarting trace.\n"); 2237 goto restart; 2238 } 2239 mtx_leave(&w_mtx); 2240 } 2241 2242 void 2243 db_witness_display(db_expr_t addr, int have_addr, db_expr_t count, char *modif) 2244 { 2245 switch (modif[0]) { 2246 case 'b': 2247 witness_print_badstacks(); 2248 break; 2249 default: 2250 witness_ddb_display(db_printf); 2251 break; 2252 } 2253 } 2254 #endif 2255 2256 void 2257 db_witness_print_fullgraph(void) 2258 { 2259 struct witness *w; 2260 int error; 2261 2262 if (witness_watch < 1) { 2263 db_printf("witness watch is disabled\n"); 2264 return; 2265 } 2266 if (witness_cold) { 2267 db_printf("witness is cold\n"); 2268 return; 2269 } 2270 error = 0; 2271 2272 mtx_enter(&w_mtx); 2273 SLIST_FOREACH(w, &w_all, w_list) 2274 w->w_displayed = 0; 2275 SLIST_FOREACH(w, &w_all, w_list) 2276 db_witness_add_fullgraph(w); 2277 mtx_leave(&w_mtx); 2278 } 2279 2280 static void 2281 db_witness_add_fullgraph(struct witness *w) 2282 { 2283 int i; 2284 2285 if (w->w_displayed != 0 || w->w_acquired == 0) 2286 return; 2287 w->w_displayed = 1; 2288 2289 WITNESS_INDEX_ASSERT(w->w_index); 2290 for (i = 1; i <= w_max_used_index; i++) { 2291 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) { 2292 db_printf("\"%s\",\"%s\"\n", w->w_type->lt_name, 2293 w_data[i].w_type->lt_name); 2294 db_witness_add_fullgraph(&w_data[i]); 2295 } 2296 } 2297 } 2298 2299 /* 2300 * A simple hash function. Takes a key pointer and a key size. If size == 0, 2301 * interprets the key as a string and reads until the null 2302 * terminator. Otherwise, reads the first size bytes. Returns an unsigned 32-bit 2303 * hash value computed from the key. 2304 */ 2305 static uint32_t 2306 witness_hash_djb2(const uint8_t *key, uint32_t size) 2307 { 2308 unsigned int hash = 5381; 2309 int i; 2310 2311 /* hash = hash * 33 + key[i] */ 2312 if (size) 2313 for (i = 0; i < size; i++) 2314 hash = ((hash << 5) + hash) + (unsigned int)key[i]; 2315 else 2316 for (i = 0; key[i] != 0; i++) 2317 hash = ((hash << 5) + hash) + (unsigned int)key[i]; 2318 2319 return (hash); 2320 } 2321 2322 2323 /* 2324 * Initializes the two witness hash tables. Called exactly once from 2325 * witness_initialize(). 2326 */ 2327 static void 2328 witness_init_hash_tables(void) 2329 { 2330 int i; 2331 2332 KASSERT(witness_cold); 2333 2334 /* Initialize the hash tables. */ 2335 for (i = 0; i < WITNESS_HASH_SIZE; i++) 2336 SLIST_INIT(&w_hash.wh_array[i]); 2337 2338 w_hash.wh_size = WITNESS_HASH_SIZE; 2339 w_hash.wh_count = 0; 2340 2341 /* Initialize the lock order data hash. */ 2342 w_lofree = NULL; 2343 for (i = 0; i < WITNESS_LO_DATA_COUNT; i++) { 2344 memset(&w_lodata[i], 0, sizeof(w_lodata[i])); 2345 w_lodata[i].wlod_next = w_lofree; 2346 w_lofree = &w_lodata[i]; 2347 } 2348 w_lohash.wloh_size = WITNESS_LO_HASH_SIZE; 2349 w_lohash.wloh_count = 0; 2350 for (i = 0; i < WITNESS_LO_HASH_SIZE; i++) 2351 w_lohash.wloh_array[i] = NULL; 2352 } 2353 2354 static struct witness * 2355 witness_hash_get(const struct lock_type *type, const char *subtype) 2356 { 2357 struct witness *w; 2358 uint32_t hash; 2359 2360 KASSERT(type != NULL); 2361 if (witness_cold == 0) 2362 MUTEX_ASSERT_LOCKED(&w_mtx); 2363 hash = (uint32_t)((uintptr_t)type ^ (uintptr_t)subtype) % 2364 w_hash.wh_size; 2365 SLIST_FOREACH(w, &w_hash.wh_array[hash], w_hash_next) { 2366 if (w->w_type == type && w->w_subtype == subtype) 2367 goto out; 2368 } 2369 2370 out: 2371 return (w); 2372 } 2373 2374 static void 2375 witness_hash_put(struct witness *w) 2376 { 2377 uint32_t hash; 2378 2379 KASSERT(w != NULL); 2380 KASSERT(w->w_type != NULL); 2381 if (witness_cold == 0) 2382 MUTEX_ASSERT_LOCKED(&w_mtx); 2383 KASSERTMSG(witness_hash_get(w->w_type, w->w_subtype) == NULL, 2384 "%s: trying to add a hash entry that already exists!", __func__); 2385 KASSERTMSG(SLIST_NEXT(w, w_hash_next) == NULL, 2386 "%s: w->w_hash_next != NULL", __func__); 2387 2388 hash = (uint32_t)((uintptr_t)w->w_type ^ (uintptr_t)w->w_subtype) % 2389 w_hash.wh_size; 2390 SLIST_INSERT_HEAD(&w_hash.wh_array[hash], w, w_hash_next); 2391 w_hash.wh_count++; 2392 } 2393 2394 2395 static struct witness_lock_order_data * 2396 witness_lock_order_get(struct witness *parent, struct witness *child) 2397 { 2398 struct witness_lock_order_data *data = NULL; 2399 struct witness_lock_order_key key; 2400 unsigned int hash; 2401 2402 KASSERT(parent != NULL && child != NULL); 2403 key.from = parent->w_index; 2404 key.to = child->w_index; 2405 WITNESS_INDEX_ASSERT(key.from); 2406 WITNESS_INDEX_ASSERT(key.to); 2407 if ((w_rmatrix[parent->w_index][child->w_index] 2408 & WITNESS_LOCK_ORDER_KNOWN) == 0) 2409 goto out; 2410 2411 hash = witness_hash_djb2((const char*)&key, 2412 sizeof(key)) % w_lohash.wloh_size; 2413 data = w_lohash.wloh_array[hash]; 2414 while (data != NULL) { 2415 if (witness_lock_order_key_equal(&data->wlod_key, &key)) 2416 break; 2417 data = data->wlod_next; 2418 } 2419 2420 out: 2421 return (data); 2422 } 2423 2424 /* 2425 * Verify that parent and child have a known relationship, are not the same, 2426 * and child is actually a child of parent. This is done without w_mtx 2427 * to avoid contention in the common case. 2428 */ 2429 static int 2430 witness_lock_order_check(struct witness *parent, struct witness *child) 2431 { 2432 2433 if (parent != child && 2434 w_rmatrix[parent->w_index][child->w_index] 2435 & WITNESS_LOCK_ORDER_KNOWN && 2436 isitmychild(parent, child)) 2437 return (1); 2438 2439 return (0); 2440 } 2441 2442 static int 2443 witness_lock_order_add(struct witness *parent, struct witness *child) 2444 { 2445 static int lofree_empty_reported = 0; 2446 struct witness_lock_order_data *data = NULL; 2447 struct witness_lock_order_key key; 2448 unsigned int hash; 2449 2450 KASSERT(parent != NULL && child != NULL); 2451 key.from = parent->w_index; 2452 key.to = child->w_index; 2453 WITNESS_INDEX_ASSERT(key.from); 2454 WITNESS_INDEX_ASSERT(key.to); 2455 if (w_rmatrix[parent->w_index][child->w_index] 2456 & WITNESS_LOCK_ORDER_KNOWN) 2457 return (1); 2458 2459 hash = witness_hash_djb2((const char*)&key, 2460 sizeof(key)) % w_lohash.wloh_size; 2461 w_rmatrix[parent->w_index][child->w_index] |= WITNESS_LOCK_ORDER_KNOWN; 2462 data = w_lofree; 2463 if (data == NULL) { 2464 if (!lofree_empty_reported) { 2465 lofree_empty_reported = 1; 2466 printf("witness: out of free lock order entries\n"); 2467 } 2468 return (0); 2469 } 2470 w_lofree = data->wlod_next; 2471 data->wlod_next = w_lohash.wloh_array[hash]; 2472 data->wlod_key = key; 2473 w_lohash.wloh_array[hash] = data; 2474 w_lohash.wloh_count++; 2475 stacktrace_save_at(&data->wlod_stack, 1); 2476 return (1); 2477 } 2478 2479 /* Call this whenever the structure of the witness graph changes. */ 2480 static void 2481 witness_increment_graph_generation(void) 2482 { 2483 2484 if (witness_cold == 0) 2485 MUTEX_ASSERT_LOCKED(&w_mtx); 2486 w_generation++; 2487 } 2488 2489 static void 2490 witness_debugger(int dump) 2491 { 2492 switch (witness_watch) { 2493 case 1: 2494 break; 2495 case 2: 2496 if (dump) 2497 db_stack_dump(); 2498 break; 2499 case 3: 2500 if (dump) 2501 db_stack_dump(); 2502 db_enter(); 2503 break; 2504 default: 2505 panic("witness: locking error"); 2506 } 2507 } 2508 2509 static int 2510 witness_alloc_stacks(void) 2511 { 2512 union lock_stack *stacks; 2513 unsigned int i, nstacks = LOCK_CHILDCOUNT * LOCK_NCHILDREN; 2514 2515 rw_assert_wrlock(&w_ctlock); 2516 2517 if (w_lock_stack_num >= nstacks) 2518 return (0); 2519 2520 nstacks -= w_lock_stack_num; 2521 stacks = mallocarray(nstacks, sizeof(*stacks), M_WITNESS, 2522 M_WAITOK | M_CANFAIL | M_ZERO); 2523 if (stacks == NULL) 2524 return (ENOMEM); 2525 2526 mtx_enter(&w_mtx); 2527 for (i = 0; i < nstacks; i++) { 2528 stacks[i].ls_next = w_lock_stack_free; 2529 w_lock_stack_free = &stacks[i]; 2530 } 2531 mtx_leave(&w_mtx); 2532 w_lock_stack_num += nstacks; 2533 2534 return (0); 2535 } 2536 2537 int 2538 witness_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, 2539 void *newp, size_t newlen) 2540 { 2541 int error, value; 2542 2543 if (namelen != 1) 2544 return (ENOTDIR); 2545 2546 rw_enter_write(&w_ctlock); 2547 2548 switch (name[0]) { 2549 case KERN_WITNESS_WATCH: 2550 error = witness_sysctl_watch(oldp, oldlenp, newp, newlen); 2551 break; 2552 case KERN_WITNESS_LOCKTRACE: 2553 value = witness_locktrace; 2554 error = sysctl_int(oldp, oldlenp, newp, newlen, &value); 2555 if (error == 0 && newp != NULL) { 2556 switch (value) { 2557 case 1: 2558 error = witness_alloc_stacks(); 2559 /* FALLTHROUGH */ 2560 case 0: 2561 if (error == 0) 2562 witness_locktrace = value; 2563 break; 2564 default: 2565 error = EINVAL; 2566 break; 2567 } 2568 } 2569 break; 2570 default: 2571 error = EOPNOTSUPP; 2572 break; 2573 } 2574 2575 rw_exit_write(&w_ctlock); 2576 2577 return (error); 2578 } 2579 2580 int 2581 witness_sysctl_watch(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 2582 { 2583 int error; 2584 int value; 2585 2586 value = witness_watch; 2587 error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, 2588 &value, -1, 3); 2589 if (error == 0 && newp != NULL) { 2590 mtx_enter(&w_mtx); 2591 if (value < 0 || witness_watch >= 0) 2592 witness_watch = value; 2593 else 2594 error = EINVAL; 2595 mtx_leave(&w_mtx); 2596 } 2597 return (error); 2598 } 2599