xref: /freebsd/sys/kern/kern_lock.c (revision 206b73d0)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice(s), this list of conditions and the following disclaimer as
12  *    the first lines of this file unmodified other than the possible
13  *    addition of one or more copyright notices.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice(s), this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
28  * DAMAGE.
29  */
30 
31 #include "opt_ddb.h"
32 #include "opt_hwpmc_hooks.h"
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include <sys/param.h>
38 #include <sys/kdb.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/lock_profile.h>
42 #include <sys/lockmgr.h>
43 #include <sys/lockstat.h>
44 #include <sys/mutex.h>
45 #include <sys/proc.h>
46 #include <sys/sleepqueue.h>
47 #ifdef DEBUG_LOCKS
48 #include <sys/stack.h>
49 #endif
50 #include <sys/sysctl.h>
51 #include <sys/systm.h>
52 
53 #include <machine/cpu.h>
54 
55 #ifdef DDB
56 #include <ddb/ddb.h>
57 #endif
58 
59 #ifdef HWPMC_HOOKS
60 #include <sys/pmckern.h>
61 PMC_SOFT_DECLARE( , , lock, failed);
62 #endif
63 
64 CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) ==
65     (LK_ADAPTIVE | LK_NOSHARE));
66 CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
67     ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
68 
69 #define	SQ_EXCLUSIVE_QUEUE	0
70 #define	SQ_SHARED_QUEUE		1
71 
72 #ifndef INVARIANTS
73 #define	_lockmgr_assert(lk, what, file, line)
74 #endif
75 
76 #define	TD_SLOCKS_INC(td)	((td)->td_lk_slocks++)
77 #define	TD_SLOCKS_DEC(td)	((td)->td_lk_slocks--)
78 
79 #ifndef DEBUG_LOCKS
80 #define	STACK_PRINT(lk)
81 #define	STACK_SAVE(lk)
82 #define	STACK_ZERO(lk)
83 #else
84 #define	STACK_PRINT(lk)	stack_print_ddb(&(lk)->lk_stack)
85 #define	STACK_SAVE(lk)	stack_save(&(lk)->lk_stack)
86 #define	STACK_ZERO(lk)	stack_zero(&(lk)->lk_stack)
87 #endif
88 
89 #define	LOCK_LOG2(lk, string, arg1, arg2)				\
90 	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
91 		CTR2(KTR_LOCK, (string), (arg1), (arg2))
92 #define	LOCK_LOG3(lk, string, arg1, arg2, arg3)				\
93 	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
94 		CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
95 
96 #define	GIANT_DECLARE							\
97 	int _i = 0;							\
98 	WITNESS_SAVE_DECL(Giant)
99 #define	GIANT_RESTORE() do {						\
100 	if (__predict_false(_i > 0)) {					\
101 		while (_i--)						\
102 			mtx_lock(&Giant);				\
103 		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
104 	}								\
105 } while (0)
106 #define	GIANT_SAVE() do {						\
107 	if (__predict_false(mtx_owned(&Giant))) {			\
108 		WITNESS_SAVE(&Giant.lock_object, Giant);		\
109 		while (mtx_owned(&Giant)) {				\
110 			_i++;						\
111 			mtx_unlock(&Giant);				\
112 		}							\
113 	}								\
114 } while (0)
115 
116 static bool __always_inline
117 LK_CAN_SHARE(uintptr_t x, int flags, bool fp)
118 {
119 
120 	if ((x & (LK_SHARE | LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) ==
121 	    LK_SHARE)
122 		return (true);
123 	if (fp || (!(x & LK_SHARE)))
124 		return (false);
125 	if ((curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) ||
126 	    (curthread->td_pflags & TDP_DEADLKTREAT))
127 		return (true);
128 	return (false);
129 }
130 
131 #define	LK_TRYOP(x)							\
132 	((x) & LK_NOWAIT)
133 
134 #define	LK_CAN_WITNESS(x)						\
135 	(((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
136 #define	LK_TRYWIT(x)							\
137 	(LK_TRYOP(x) ? LOP_TRYLOCK : 0)
138 
139 #define	LK_CAN_ADAPT(lk, f)						\
140 	(((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 &&		\
141 	((f) & LK_SLEEPFAIL) == 0)
142 
143 #define	lockmgr_disowned(lk)						\
144 	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
145 
146 #define	lockmgr_xlocked_v(v)						\
147 	(((v) & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
148 
149 #define	lockmgr_xlocked(lk) lockmgr_xlocked_v((lk)->lk_lock)
150 
151 static void	assert_lockmgr(const struct lock_object *lock, int how);
152 #ifdef DDB
153 static void	db_show_lockmgr(const struct lock_object *lock);
154 #endif
155 static void	lock_lockmgr(struct lock_object *lock, uintptr_t how);
156 #ifdef KDTRACE_HOOKS
157 static int	owner_lockmgr(const struct lock_object *lock,
158 		    struct thread **owner);
159 #endif
160 static uintptr_t unlock_lockmgr(struct lock_object *lock);
161 
162 struct lock_class lock_class_lockmgr = {
163 	.lc_name = "lockmgr",
164 	.lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
165 	.lc_assert = assert_lockmgr,
166 #ifdef DDB
167 	.lc_ddb_show = db_show_lockmgr,
168 #endif
169 	.lc_lock = lock_lockmgr,
170 	.lc_unlock = unlock_lockmgr,
171 #ifdef KDTRACE_HOOKS
172 	.lc_owner = owner_lockmgr,
173 #endif
174 };
175 
176 struct lockmgr_wait {
177 	const char *iwmesg;
178 	int ipri;
179 	int itimo;
180 };
181 
182 static bool __always_inline lockmgr_slock_try(struct lock *lk, uintptr_t *xp,
183     int flags, bool fp);
184 static bool __always_inline lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp);
185 
186 static void
187 lockmgr_exit(u_int flags, struct lock_object *ilk, int wakeup_swapper)
188 {
189 	struct lock_class *class;
190 
191 	if (flags & LK_INTERLOCK) {
192 		class = LOCK_CLASS(ilk);
193 		class->lc_unlock(ilk);
194 	}
195 
196 	if (__predict_false(wakeup_swapper))
197 		kick_proc0();
198 }
199 
200 static void
201 lockmgr_note_shared_acquire(struct lock *lk, int contested,
202     uint64_t waittime, const char *file, int line, int flags)
203 {
204 
205 	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(lockmgr__acquire, lk, contested,
206 	    waittime, file, line, LOCKSTAT_READER);
207 	LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file, line);
208 	WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file, line);
209 	TD_LOCKS_INC(curthread);
210 	TD_SLOCKS_INC(curthread);
211 	STACK_SAVE(lk);
212 }
213 
214 static void
215 lockmgr_note_shared_release(struct lock *lk, const char *file, int line)
216 {
217 
218 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk, LOCKSTAT_READER);
219 	WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
220 	LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
221 	TD_LOCKS_DEC(curthread);
222 	TD_SLOCKS_DEC(curthread);
223 }
224 
225 static void
226 lockmgr_note_exclusive_acquire(struct lock *lk, int contested,
227     uint64_t waittime, const char *file, int line, int flags)
228 {
229 
230 	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(lockmgr__acquire, lk, contested,
231 	    waittime, file, line, LOCKSTAT_WRITER);
232 	LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
233 	WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file,
234 	    line);
235 	TD_LOCKS_INC(curthread);
236 	STACK_SAVE(lk);
237 }
238 
239 static void
240 lockmgr_note_exclusive_release(struct lock *lk, const char *file, int line)
241 {
242 
243 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk, LOCKSTAT_WRITER);
244 	LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file,
245 	    line);
246 	WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
247 	TD_LOCKS_DEC(curthread);
248 }
249 
250 static __inline struct thread *
251 lockmgr_xholder(const struct lock *lk)
252 {
253 	uintptr_t x;
254 
255 	x = lk->lk_lock;
256 	return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
257 }
258 
259 /*
260  * It assumes sleepq_lock held and returns with this one unheld.
261  * It also assumes the generic interlock is sane and previously checked.
262  * If LK_INTERLOCK is specified the interlock is not reacquired after the
263  * sleep.
264  */
265 static __inline int
266 sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
267     const char *wmesg, int pri, int timo, int queue)
268 {
269 	GIANT_DECLARE;
270 	struct lock_class *class;
271 	int catch, error;
272 
273 	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
274 	catch = pri & PCATCH;
275 	pri &= PRIMASK;
276 	error = 0;
277 
278 	LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
279 	    (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
280 
281 	if (flags & LK_INTERLOCK)
282 		class->lc_unlock(ilk);
283 	if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
284 		lk->lk_exslpfail++;
285 	GIANT_SAVE();
286 	sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
287 	    SLEEPQ_INTERRUPTIBLE : 0), queue);
288 	if ((flags & LK_TIMELOCK) && timo)
289 		sleepq_set_timeout(&lk->lock_object, timo);
290 
291 	/*
292 	 * Decisional switch for real sleeping.
293 	 */
294 	if ((flags & LK_TIMELOCK) && timo && catch)
295 		error = sleepq_timedwait_sig(&lk->lock_object, pri);
296 	else if ((flags & LK_TIMELOCK) && timo)
297 		error = sleepq_timedwait(&lk->lock_object, pri);
298 	else if (catch)
299 		error = sleepq_wait_sig(&lk->lock_object, pri);
300 	else
301 		sleepq_wait(&lk->lock_object, pri);
302 	GIANT_RESTORE();
303 	if ((flags & LK_SLEEPFAIL) && error == 0)
304 		error = ENOLCK;
305 
306 	return (error);
307 }
308 
309 static __inline int
310 wakeupshlk(struct lock *lk, const char *file, int line)
311 {
312 	uintptr_t v, x, orig_x;
313 	u_int realexslp;
314 	int queue, wakeup_swapper;
315 
316 	wakeup_swapper = 0;
317 	for (;;) {
318 		x = lk->lk_lock;
319 		if (lockmgr_sunlock_try(lk, &x))
320 			break;
321 
322 		/*
323 		 * We should have a sharer with waiters, so enter the hard
324 		 * path in order to handle wakeups correctly.
325 		 */
326 		sleepq_lock(&lk->lock_object);
327 		orig_x = lk->lk_lock;
328 retry_sleepq:
329 		x = orig_x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
330 		v = LK_UNLOCKED;
331 
332 		/*
333 		 * If the lock has exclusive waiters, give them preference in
334 		 * order to avoid deadlock with shared runners up.
335 		 * If interruptible sleeps left the exclusive queue empty
336 		 * avoid a starvation for the threads sleeping on the shared
337 		 * queue by giving them precedence and cleaning up the
338 		 * exclusive waiters bit anyway.
339 		 * Please note that lk_exslpfail count may be lying about
340 		 * the real number of waiters with the LK_SLEEPFAIL flag on
341 		 * because they may be used in conjunction with interruptible
342 		 * sleeps so lk_exslpfail might be considered an 'upper limit'
343 		 * bound, including the edge cases.
344 		 */
345 		realexslp = sleepq_sleepcnt(&lk->lock_object,
346 		    SQ_EXCLUSIVE_QUEUE);
347 		if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
348 			if (lk->lk_exslpfail < realexslp) {
349 				lk->lk_exslpfail = 0;
350 				queue = SQ_EXCLUSIVE_QUEUE;
351 				v |= (x & LK_SHARED_WAITERS);
352 			} else {
353 				lk->lk_exslpfail = 0;
354 				LOCK_LOG2(lk,
355 				    "%s: %p has only LK_SLEEPFAIL sleepers",
356 				    __func__, lk);
357 				LOCK_LOG2(lk,
358 			    "%s: %p waking up threads on the exclusive queue",
359 				    __func__, lk);
360 				wakeup_swapper =
361 				    sleepq_broadcast(&lk->lock_object,
362 				    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
363 				queue = SQ_SHARED_QUEUE;
364 			}
365 
366 		} else {
367 
368 			/*
369 			 * Exclusive waiters sleeping with LK_SLEEPFAIL on
370 			 * and using interruptible sleeps/timeout may have
371 			 * left spourious lk_exslpfail counts on, so clean
372 			 * it up anyway.
373 			 */
374 			lk->lk_exslpfail = 0;
375 			queue = SQ_SHARED_QUEUE;
376 		}
377 
378 		if (lockmgr_sunlock_try(lk, &orig_x)) {
379 			sleepq_release(&lk->lock_object);
380 			break;
381 		}
382 
383 		x |= LK_SHARERS_LOCK(1);
384 		if (!atomic_fcmpset_rel_ptr(&lk->lk_lock, &x, v)) {
385 			orig_x = x;
386 			goto retry_sleepq;
387 		}
388 		LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
389 		    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
390 		    "exclusive");
391 		wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
392 		    0, queue);
393 		sleepq_release(&lk->lock_object);
394 		break;
395 	}
396 
397 	lockmgr_note_shared_release(lk, file, line);
398 	return (wakeup_swapper);
399 }
400 
401 static void
402 assert_lockmgr(const struct lock_object *lock, int what)
403 {
404 
405 	panic("lockmgr locks do not support assertions");
406 }
407 
408 static void
409 lock_lockmgr(struct lock_object *lock, uintptr_t how)
410 {
411 
412 	panic("lockmgr locks do not support sleep interlocking");
413 }
414 
415 static uintptr_t
416 unlock_lockmgr(struct lock_object *lock)
417 {
418 
419 	panic("lockmgr locks do not support sleep interlocking");
420 }
421 
422 #ifdef KDTRACE_HOOKS
423 static int
424 owner_lockmgr(const struct lock_object *lock, struct thread **owner)
425 {
426 
427 	panic("lockmgr locks do not support owner inquiring");
428 }
429 #endif
430 
431 void
432 lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
433 {
434 	int iflags;
435 
436 	MPASS((flags & ~LK_INIT_MASK) == 0);
437 	ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
438             ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
439             &lk->lk_lock));
440 
441 	iflags = LO_SLEEPABLE | LO_UPGRADABLE;
442 	if (flags & LK_CANRECURSE)
443 		iflags |= LO_RECURSABLE;
444 	if ((flags & LK_NODUP) == 0)
445 		iflags |= LO_DUPOK;
446 	if (flags & LK_NOPROFILE)
447 		iflags |= LO_NOPROFILE;
448 	if ((flags & LK_NOWITNESS) == 0)
449 		iflags |= LO_WITNESS;
450 	if (flags & LK_QUIET)
451 		iflags |= LO_QUIET;
452 	if (flags & LK_IS_VNODE)
453 		iflags |= LO_IS_VNODE;
454 	if (flags & LK_NEW)
455 		iflags |= LO_NEW;
456 	iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
457 
458 	lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
459 	lk->lk_lock = LK_UNLOCKED;
460 	lk->lk_recurse = 0;
461 	lk->lk_exslpfail = 0;
462 	lk->lk_timo = timo;
463 	lk->lk_pri = pri;
464 	STACK_ZERO(lk);
465 }
466 
467 /*
468  * XXX: Gross hacks to manipulate external lock flags after
469  * initialization.  Used for certain vnode and buf locks.
470  */
471 void
472 lockallowshare(struct lock *lk)
473 {
474 
475 	lockmgr_assert(lk, KA_XLOCKED);
476 	lk->lock_object.lo_flags &= ~LK_NOSHARE;
477 }
478 
479 void
480 lockdisableshare(struct lock *lk)
481 {
482 
483 	lockmgr_assert(lk, KA_XLOCKED);
484 	lk->lock_object.lo_flags |= LK_NOSHARE;
485 }
486 
487 void
488 lockallowrecurse(struct lock *lk)
489 {
490 
491 	lockmgr_assert(lk, KA_XLOCKED);
492 	lk->lock_object.lo_flags |= LO_RECURSABLE;
493 }
494 
495 void
496 lockdisablerecurse(struct lock *lk)
497 {
498 
499 	lockmgr_assert(lk, KA_XLOCKED);
500 	lk->lock_object.lo_flags &= ~LO_RECURSABLE;
501 }
502 
503 void
504 lockdestroy(struct lock *lk)
505 {
506 
507 	KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
508 	KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
509 	KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
510 	lock_destroy(&lk->lock_object);
511 }
512 
513 static bool __always_inline
514 lockmgr_slock_try(struct lock *lk, uintptr_t *xp, int flags, bool fp)
515 {
516 
517 	/*
518 	 * If no other thread has an exclusive lock, or
519 	 * no exclusive waiter is present, bump the count of
520 	 * sharers.  Since we have to preserve the state of
521 	 * waiters, if we fail to acquire the shared lock
522 	 * loop back and retry.
523 	 */
524 	*xp = lk->lk_lock;
525 	while (LK_CAN_SHARE(*xp, flags, fp)) {
526 		if (atomic_fcmpset_acq_ptr(&lk->lk_lock, xp,
527 		    *xp + LK_ONE_SHARER)) {
528 			return (true);
529 		}
530 	}
531 	return (false);
532 }
533 
534 static bool __always_inline
535 lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp)
536 {
537 
538 	for (;;) {
539 		if (LK_SHARERS(*xp) > 1 || !(*xp & LK_ALL_WAITERS)) {
540 			if (atomic_fcmpset_rel_ptr(&lk->lk_lock, xp,
541 			    *xp - LK_ONE_SHARER))
542 				return (true);
543 			continue;
544 		}
545 		break;
546 	}
547 	return (false);
548 }
549 
550 static __noinline int
551 lockmgr_slock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
552     const char *file, int line, struct lockmgr_wait *lwa)
553 {
554 	uintptr_t tid, x;
555 	int error = 0;
556 	const char *iwmesg;
557 	int ipri, itimo;
558 
559 #ifdef KDTRACE_HOOKS
560 	uint64_t sleep_time = 0;
561 #endif
562 #ifdef LOCK_PROFILING
563 	uint64_t waittime = 0;
564 	int contested = 0;
565 #endif
566 
567 	if (__predict_false(panicstr != NULL))
568 		goto out;
569 
570 	tid = (uintptr_t)curthread;
571 
572 	if (LK_CAN_WITNESS(flags))
573 		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
574 		    file, line, flags & LK_INTERLOCK ? ilk : NULL);
575 	for (;;) {
576 		if (lockmgr_slock_try(lk, &x, flags, false))
577 			break;
578 #ifdef HWPMC_HOOKS
579 		PMC_SOFT_CALL( , , lock, failed);
580 #endif
581 		lock_profile_obtain_lock_failed(&lk->lock_object,
582 		    &contested, &waittime);
583 
584 		/*
585 		 * If the lock is already held by curthread in
586 		 * exclusive way avoid a deadlock.
587 		 */
588 		if (LK_HOLDER(x) == tid) {
589 			LOCK_LOG2(lk,
590 			    "%s: %p already held in exclusive mode",
591 			    __func__, lk);
592 			error = EDEADLK;
593 			break;
594 		}
595 
596 		/*
597 		 * If the lock is expected to not sleep just give up
598 		 * and return.
599 		 */
600 		if (LK_TRYOP(flags)) {
601 			LOCK_LOG2(lk, "%s: %p fails the try operation",
602 			    __func__, lk);
603 			error = EBUSY;
604 			break;
605 		}
606 
607 		/*
608 		 * Acquire the sleepqueue chain lock because we
609 		 * probabilly will need to manipulate waiters flags.
610 		 */
611 		sleepq_lock(&lk->lock_object);
612 		x = lk->lk_lock;
613 retry_sleepq:
614 
615 		/*
616 		 * if the lock can be acquired in shared mode, try
617 		 * again.
618 		 */
619 		if (LK_CAN_SHARE(x, flags, false)) {
620 			sleepq_release(&lk->lock_object);
621 			continue;
622 		}
623 
624 		/*
625 		 * Try to set the LK_SHARED_WAITERS flag.  If we fail,
626 		 * loop back and retry.
627 		 */
628 		if ((x & LK_SHARED_WAITERS) == 0) {
629 			if (!atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
630 			    x | LK_SHARED_WAITERS)) {
631 				goto retry_sleepq;
632 			}
633 			LOCK_LOG2(lk, "%s: %p set shared waiters flag",
634 			    __func__, lk);
635 		}
636 
637 		if (lwa == NULL) {
638 			iwmesg = lk->lock_object.lo_name;
639 			ipri = lk->lk_pri;
640 			itimo = lk->lk_timo;
641 		} else {
642 			iwmesg = lwa->iwmesg;
643 			ipri = lwa->ipri;
644 			itimo = lwa->itimo;
645 		}
646 
647 		/*
648 		 * As far as we have been unable to acquire the
649 		 * shared lock and the shared waiters flag is set,
650 		 * we will sleep.
651 		 */
652 #ifdef KDTRACE_HOOKS
653 		sleep_time -= lockstat_nsecs(&lk->lock_object);
654 #endif
655 		error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
656 		    SQ_SHARED_QUEUE);
657 #ifdef KDTRACE_HOOKS
658 		sleep_time += lockstat_nsecs(&lk->lock_object);
659 #endif
660 		flags &= ~LK_INTERLOCK;
661 		if (error) {
662 			LOCK_LOG3(lk,
663 			    "%s: interrupted sleep for %p with %d",
664 			    __func__, lk, error);
665 			break;
666 		}
667 		LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
668 		    __func__, lk);
669 	}
670 	if (error == 0) {
671 #ifdef KDTRACE_HOOKS
672 		if (sleep_time != 0)
673 			LOCKSTAT_RECORD4(lockmgr__block, lk, sleep_time,
674 			    LOCKSTAT_READER, (x & LK_SHARE) == 0,
675 			    (x & LK_SHARE) == 0 ? 0 : LK_SHARERS(x));
676 #endif
677 #ifdef LOCK_PROFILING
678 		lockmgr_note_shared_acquire(lk, contested, waittime,
679 		    file, line, flags);
680 #else
681 		lockmgr_note_shared_acquire(lk, 0, 0, file, line,
682 		    flags);
683 #endif
684 	}
685 
686 out:
687 	lockmgr_exit(flags, ilk, 0);
688 	return (error);
689 }
690 
691 static __noinline int
692 lockmgr_xlock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
693     const char *file, int line, struct lockmgr_wait *lwa)
694 {
695 	struct lock_class *class;
696 	uintptr_t tid, x, v;
697 	int error = 0;
698 	const char *iwmesg;
699 	int ipri, itimo;
700 
701 #ifdef KDTRACE_HOOKS
702 	uint64_t sleep_time = 0;
703 #endif
704 #ifdef LOCK_PROFILING
705 	uint64_t waittime = 0;
706 	int contested = 0;
707 #endif
708 
709 	if (__predict_false(panicstr != NULL))
710 		goto out;
711 
712 	tid = (uintptr_t)curthread;
713 
714 	if (LK_CAN_WITNESS(flags))
715 		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
716 		    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
717 		    ilk : NULL);
718 
719 	/*
720 	 * If curthread already holds the lock and this one is
721 	 * allowed to recurse, simply recurse on it.
722 	 */
723 	if (lockmgr_xlocked(lk)) {
724 		if ((flags & LK_CANRECURSE) == 0 &&
725 		    (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
726 			/*
727 			 * If the lock is expected to not panic just
728 			 * give up and return.
729 			 */
730 			if (LK_TRYOP(flags)) {
731 				LOCK_LOG2(lk,
732 				    "%s: %p fails the try operation",
733 				    __func__, lk);
734 				error = EBUSY;
735 				goto out;
736 			}
737 			if (flags & LK_INTERLOCK) {
738 				class = LOCK_CLASS(ilk);
739 				class->lc_unlock(ilk);
740 			}
741 			panic("%s: recursing on non recursive lockmgr %p "
742 			    "@ %s:%d\n", __func__, lk, file, line);
743 		}
744 		lk->lk_recurse++;
745 		LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
746 		LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
747 		    lk->lk_recurse, file, line);
748 		WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
749 		    LK_TRYWIT(flags), file, line);
750 		TD_LOCKS_INC(curthread);
751 		goto out;
752 	}
753 
754 	for (;;) {
755 		if (lk->lk_lock == LK_UNLOCKED &&
756 		    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
757 			break;
758 #ifdef HWPMC_HOOKS
759 		PMC_SOFT_CALL( , , lock, failed);
760 #endif
761 		lock_profile_obtain_lock_failed(&lk->lock_object,
762 		    &contested, &waittime);
763 
764 		/*
765 		 * If the lock is expected to not sleep just give up
766 		 * and return.
767 		 */
768 		if (LK_TRYOP(flags)) {
769 			LOCK_LOG2(lk, "%s: %p fails the try operation",
770 			    __func__, lk);
771 			error = EBUSY;
772 			break;
773 		}
774 
775 		/*
776 		 * Acquire the sleepqueue chain lock because we
777 		 * probabilly will need to manipulate waiters flags.
778 		 */
779 		sleepq_lock(&lk->lock_object);
780 		x = lk->lk_lock;
781 retry_sleepq:
782 
783 		/*
784 		 * if the lock has been released while we spun on
785 		 * the sleepqueue chain lock just try again.
786 		 */
787 		if (x == LK_UNLOCKED) {
788 			sleepq_release(&lk->lock_object);
789 			continue;
790 		}
791 
792 		/*
793 		 * The lock can be in the state where there is a
794 		 * pending queue of waiters, but still no owner.
795 		 * This happens when the lock is contested and an
796 		 * owner is going to claim the lock.
797 		 * If curthread is the one successfully acquiring it
798 		 * claim lock ownership and return, preserving waiters
799 		 * flags.
800 		 */
801 		v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
802 		if ((x & ~v) == LK_UNLOCKED) {
803 			v &= ~LK_EXCLUSIVE_SPINNERS;
804 			if (atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
805 			    tid | v)) {
806 				sleepq_release(&lk->lock_object);
807 				LOCK_LOG2(lk,
808 				    "%s: %p claimed by a new writer",
809 				    __func__, lk);
810 				break;
811 			}
812 			goto retry_sleepq;
813 		}
814 
815 		/*
816 		 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
817 		 * fail, loop back and retry.
818 		 */
819 		if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
820 			if (!atomic_fcmpset_ptr(&lk->lk_lock, &x,
821 			    x | LK_EXCLUSIVE_WAITERS)) {
822 				goto retry_sleepq;
823 			}
824 			LOCK_LOG2(lk, "%s: %p set excl waiters flag",
825 			    __func__, lk);
826 		}
827 
828 		if (lwa == NULL) {
829 			iwmesg = lk->lock_object.lo_name;
830 			ipri = lk->lk_pri;
831 			itimo = lk->lk_timo;
832 		} else {
833 			iwmesg = lwa->iwmesg;
834 			ipri = lwa->ipri;
835 			itimo = lwa->itimo;
836 		}
837 
838 		/*
839 		 * As far as we have been unable to acquire the
840 		 * exclusive lock and the exclusive waiters flag
841 		 * is set, we will sleep.
842 		 */
843 #ifdef KDTRACE_HOOKS
844 		sleep_time -= lockstat_nsecs(&lk->lock_object);
845 #endif
846 		error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
847 		    SQ_EXCLUSIVE_QUEUE);
848 #ifdef KDTRACE_HOOKS
849 		sleep_time += lockstat_nsecs(&lk->lock_object);
850 #endif
851 		flags &= ~LK_INTERLOCK;
852 		if (error) {
853 			LOCK_LOG3(lk,
854 			    "%s: interrupted sleep for %p with %d",
855 			    __func__, lk, error);
856 			break;
857 		}
858 		LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
859 		    __func__, lk);
860 	}
861 	if (error == 0) {
862 #ifdef KDTRACE_HOOKS
863 		if (sleep_time != 0)
864 			LOCKSTAT_RECORD4(lockmgr__block, lk, sleep_time,
865 			    LOCKSTAT_WRITER, (x & LK_SHARE) == 0,
866 			    (x & LK_SHARE) == 0 ? 0 : LK_SHARERS(x));
867 #endif
868 #ifdef LOCK_PROFILING
869 		lockmgr_note_exclusive_acquire(lk, contested, waittime,
870 		    file, line, flags);
871 #else
872 		lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
873 		    flags);
874 #endif
875 	}
876 
877 out:
878 	lockmgr_exit(flags, ilk, 0);
879 	return (error);
880 }
881 
882 static __noinline int
883 lockmgr_upgrade(struct lock *lk, u_int flags, struct lock_object *ilk,
884     const char *file, int line, struct lockmgr_wait *lwa)
885 {
886 	uintptr_t tid, x, v;
887 	int error = 0;
888 	int wakeup_swapper = 0;
889 	int op;
890 
891 	if (__predict_false(panicstr != NULL))
892 		goto out;
893 
894 	tid = (uintptr_t)curthread;
895 
896 	_lockmgr_assert(lk, KA_SLOCKED, file, line);
897 	v = lk->lk_lock;
898 	x = v & LK_ALL_WAITERS;
899 	v &= LK_EXCLUSIVE_SPINNERS;
900 
901 	/*
902 	 * Try to switch from one shared lock to an exclusive one.
903 	 * We need to preserve waiters flags during the operation.
904 	 */
905 	if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
906 	    tid | x)) {
907 		LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
908 		    line);
909 		WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
910 		    LK_TRYWIT(flags), file, line);
911 		LOCKSTAT_RECORD0(lockmgr__upgrade, lk);
912 		TD_SLOCKS_DEC(curthread);
913 		goto out;
914 	}
915 
916 	op = flags & LK_TYPE_MASK;
917 
918 	/*
919 	 * In LK_TRYUPGRADE mode, do not drop the lock,
920 	 * returning EBUSY instead.
921 	 */
922 	if (op == LK_TRYUPGRADE) {
923 		LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
924 		    __func__, lk);
925 		error = EBUSY;
926 		goto out;
927 	}
928 
929 	/*
930 	 * We have been unable to succeed in upgrading, so just
931 	 * give up the shared lock.
932 	 */
933 	wakeup_swapper |= wakeupshlk(lk, file, line);
934 	error = lockmgr_xlock_hard(lk, flags, ilk, file, line, lwa);
935 	flags &= ~LK_INTERLOCK;
936 out:
937 	lockmgr_exit(flags, ilk, wakeup_swapper);
938 	return (error);
939 }
940 
941 int
942 lockmgr_lock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk,
943     const char *file, int line)
944 {
945 	struct lock_class *class;
946 	uintptr_t x, tid;
947 	u_int op;
948 	bool locked;
949 
950 	if (__predict_false(panicstr != NULL))
951 		return (0);
952 
953 	op = flags & LK_TYPE_MASK;
954 	locked = false;
955 	switch (op) {
956 	case LK_SHARED:
957 		if (LK_CAN_WITNESS(flags))
958 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
959 			    file, line, flags & LK_INTERLOCK ? ilk : NULL);
960 		if (__predict_false(lk->lock_object.lo_flags & LK_NOSHARE))
961 			break;
962 		if (lockmgr_slock_try(lk, &x, flags, true)) {
963 			lockmgr_note_shared_acquire(lk, 0, 0,
964 			    file, line, flags);
965 			locked = true;
966 		} else {
967 			return (lockmgr_slock_hard(lk, flags, ilk, file, line,
968 			    NULL));
969 		}
970 		break;
971 	case LK_EXCLUSIVE:
972 		if (LK_CAN_WITNESS(flags))
973 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
974 			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
975 			    ilk : NULL);
976 		tid = (uintptr_t)curthread;
977 		if (lk->lk_lock == LK_UNLOCKED &&
978 		    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
979 			lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
980 			    flags);
981 			locked = true;
982 		} else {
983 			return (lockmgr_xlock_hard(lk, flags, ilk, file, line,
984 			    NULL));
985 		}
986 		break;
987 	case LK_UPGRADE:
988 	case LK_TRYUPGRADE:
989 		return (lockmgr_upgrade(lk, flags, ilk, file, line, NULL));
990 	default:
991 		break;
992 	}
993 	if (__predict_true(locked)) {
994 		if (__predict_false(flags & LK_INTERLOCK)) {
995 			class = LOCK_CLASS(ilk);
996 			class->lc_unlock(ilk);
997 		}
998 		return (0);
999 	} else {
1000 		return (__lockmgr_args(lk, flags, ilk, LK_WMESG_DEFAULT,
1001 		    LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, file, line));
1002 	}
1003 }
1004 
1005 static __noinline int
1006 lockmgr_sunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
1007     const char *file, int line)
1008 
1009 {
1010 	int wakeup_swapper = 0;
1011 
1012 	if (__predict_false(panicstr != NULL))
1013 		goto out;
1014 
1015 	wakeup_swapper = wakeupshlk(lk, file, line);
1016 
1017 out:
1018 	lockmgr_exit(flags, ilk, wakeup_swapper);
1019 	return (0);
1020 }
1021 
1022 static __noinline int
1023 lockmgr_xunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
1024     const char *file, int line)
1025 {
1026 	uintptr_t tid, v;
1027 	int wakeup_swapper = 0;
1028 	u_int realexslp;
1029 	int queue;
1030 
1031 	if (__predict_false(panicstr != NULL))
1032 		goto out;
1033 
1034 	tid = (uintptr_t)curthread;
1035 
1036 	/*
1037 	 * As first option, treact the lock as if it has not
1038 	 * any waiter.
1039 	 * Fix-up the tid var if the lock has been disowned.
1040 	 */
1041 	if (LK_HOLDER(x) == LK_KERNPROC)
1042 		tid = LK_KERNPROC;
1043 	else {
1044 		WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1045 		TD_LOCKS_DEC(curthread);
1046 	}
1047 	LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
1048 
1049 	/*
1050 	 * The lock is held in exclusive mode.
1051 	 * If the lock is recursed also, then unrecurse it.
1052 	 */
1053 	if (lockmgr_xlocked_v(x) && lockmgr_recursed(lk)) {
1054 		LOCK_LOG2(lk, "%s: %p unrecursing", __func__, lk);
1055 		lk->lk_recurse--;
1056 		goto out;
1057 	}
1058 	if (tid != LK_KERNPROC)
1059 		LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk,
1060 		    LOCKSTAT_WRITER);
1061 
1062 	if (x == tid && atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED))
1063 		goto out;
1064 
1065 	sleepq_lock(&lk->lock_object);
1066 	x = lk->lk_lock;
1067 	v = LK_UNLOCKED;
1068 
1069 	/*
1070 	 * If the lock has exclusive waiters, give them
1071 	 * preference in order to avoid deadlock with
1072 	 * shared runners up.
1073 	 * If interruptible sleeps left the exclusive queue
1074 	 * empty avoid a starvation for the threads sleeping
1075 	 * on the shared queue by giving them precedence
1076 	 * and cleaning up the exclusive waiters bit anyway.
1077 	 * Please note that lk_exslpfail count may be lying
1078 	 * about the real number of waiters with the
1079 	 * LK_SLEEPFAIL flag on because they may be used in
1080 	 * conjunction with interruptible sleeps so
1081 	 * lk_exslpfail might be considered an 'upper limit'
1082 	 * bound, including the edge cases.
1083 	 */
1084 	MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1085 	realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE);
1086 	if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1087 		if (lk->lk_exslpfail < realexslp) {
1088 			lk->lk_exslpfail = 0;
1089 			queue = SQ_EXCLUSIVE_QUEUE;
1090 			v |= (x & LK_SHARED_WAITERS);
1091 		} else {
1092 			lk->lk_exslpfail = 0;
1093 			LOCK_LOG2(lk,
1094 			    "%s: %p has only LK_SLEEPFAIL sleepers",
1095 			    __func__, lk);
1096 			LOCK_LOG2(lk,
1097 			    "%s: %p waking up threads on the exclusive queue",
1098 			    __func__, lk);
1099 			wakeup_swapper = sleepq_broadcast(&lk->lock_object,
1100 			    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1101 			queue = SQ_SHARED_QUEUE;
1102 		}
1103 	} else {
1104 
1105 		/*
1106 		 * Exclusive waiters sleeping with LK_SLEEPFAIL
1107 		 * on and using interruptible sleeps/timeout
1108 		 * may have left spourious lk_exslpfail counts
1109 		 * on, so clean it up anyway.
1110 		 */
1111 		lk->lk_exslpfail = 0;
1112 		queue = SQ_SHARED_QUEUE;
1113 	}
1114 
1115 	LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
1116 	    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1117 	    "exclusive");
1118 	atomic_store_rel_ptr(&lk->lk_lock, v);
1119 	wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue);
1120 	sleepq_release(&lk->lock_object);
1121 
1122 out:
1123 	lockmgr_exit(flags, ilk, wakeup_swapper);
1124 	return (0);
1125 }
1126 
1127 int
1128 lockmgr_unlock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk)
1129 {
1130 	struct lock_class *class;
1131 	uintptr_t x, tid;
1132 	const char *file;
1133 	int line;
1134 
1135 	if (__predict_false(panicstr != NULL))
1136 		return (0);
1137 
1138 	file = __FILE__;
1139 	line = __LINE__;
1140 
1141 	_lockmgr_assert(lk, KA_LOCKED, file, line);
1142 	x = lk->lk_lock;
1143 	if (__predict_true(x & LK_SHARE) != 0) {
1144 		if (lockmgr_sunlock_try(lk, &x)) {
1145 			lockmgr_note_shared_release(lk, file, line);
1146 		} else {
1147 			return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1148 		}
1149 	} else {
1150 		tid = (uintptr_t)curthread;
1151 		if (!lockmgr_recursed(lk) &&
1152 		    atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED)) {
1153 			lockmgr_note_exclusive_release(lk, file, line);
1154 		} else {
1155 			return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1156 		}
1157 	}
1158 	if (__predict_false(flags & LK_INTERLOCK)) {
1159 		class = LOCK_CLASS(ilk);
1160 		class->lc_unlock(ilk);
1161 	}
1162 	return (0);
1163 }
1164 
1165 int
1166 __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
1167     const char *wmesg, int pri, int timo, const char *file, int line)
1168 {
1169 	GIANT_DECLARE;
1170 	struct lockmgr_wait lwa;
1171 	struct lock_class *class;
1172 	const char *iwmesg;
1173 	uintptr_t tid, v, x;
1174 	u_int op, realexslp;
1175 	int error, ipri, itimo, queue, wakeup_swapper;
1176 #ifdef LOCK_PROFILING
1177 	uint64_t waittime = 0;
1178 	int contested = 0;
1179 #endif
1180 
1181 	if (panicstr != NULL)
1182 		return (0);
1183 
1184 	error = 0;
1185 	tid = (uintptr_t)curthread;
1186 	op = (flags & LK_TYPE_MASK);
1187 	iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
1188 	ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
1189 	itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
1190 
1191 	lwa.iwmesg = iwmesg;
1192 	lwa.ipri = ipri;
1193 	lwa.itimo = itimo;
1194 
1195 	MPASS((flags & ~LK_TOTAL_MASK) == 0);
1196 	KASSERT((op & (op - 1)) == 0,
1197 	    ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
1198 	KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
1199 	    (op != LK_DOWNGRADE && op != LK_RELEASE),
1200 	    ("%s: Invalid flags in regard of the operation desired @ %s:%d",
1201 	    __func__, file, line));
1202 	KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
1203 	    ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
1204 	    __func__, file, line));
1205 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
1206 	    ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
1207 	    lk->lock_object.lo_name, file, line));
1208 
1209 	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
1210 
1211 	if (lk->lock_object.lo_flags & LK_NOSHARE) {
1212 		switch (op) {
1213 		case LK_SHARED:
1214 			op = LK_EXCLUSIVE;
1215 			break;
1216 		case LK_UPGRADE:
1217 		case LK_TRYUPGRADE:
1218 		case LK_DOWNGRADE:
1219 			_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
1220 			    file, line);
1221 			if (flags & LK_INTERLOCK)
1222 				class->lc_unlock(ilk);
1223 			return (0);
1224 		}
1225 	}
1226 
1227 	wakeup_swapper = 0;
1228 	switch (op) {
1229 	case LK_SHARED:
1230 		return (lockmgr_slock_hard(lk, flags, ilk, file, line, &lwa));
1231 		break;
1232 	case LK_UPGRADE:
1233 	case LK_TRYUPGRADE:
1234 		return (lockmgr_upgrade(lk, flags, ilk, file, line, &lwa));
1235 		break;
1236 	case LK_EXCLUSIVE:
1237 		return (lockmgr_xlock_hard(lk, flags, ilk, file, line, &lwa));
1238 		break;
1239 	case LK_DOWNGRADE:
1240 		_lockmgr_assert(lk, KA_XLOCKED, file, line);
1241 		WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
1242 
1243 		/*
1244 		 * Panic if the lock is recursed.
1245 		 */
1246 		if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1247 			if (flags & LK_INTERLOCK)
1248 				class->lc_unlock(ilk);
1249 			panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
1250 			    __func__, iwmesg, file, line);
1251 		}
1252 		TD_SLOCKS_INC(curthread);
1253 
1254 		/*
1255 		 * In order to preserve waiters flags, just spin.
1256 		 */
1257 		for (;;) {
1258 			x = lk->lk_lock;
1259 			MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1260 			x &= LK_ALL_WAITERS;
1261 			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1262 			    LK_SHARERS_LOCK(1) | x))
1263 				break;
1264 			cpu_spinwait();
1265 		}
1266 		LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
1267 		LOCKSTAT_RECORD0(lockmgr__downgrade, lk);
1268 		break;
1269 	case LK_RELEASE:
1270 		_lockmgr_assert(lk, KA_LOCKED, file, line);
1271 		x = lk->lk_lock;
1272 
1273 		if (__predict_true(x & LK_SHARE) != 0) {
1274 			return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1275 		} else {
1276 			return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1277 		}
1278 		break;
1279 	case LK_DRAIN:
1280 		if (LK_CAN_WITNESS(flags))
1281 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1282 			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1283 			    ilk : NULL);
1284 
1285 		/*
1286 		 * Trying to drain a lock we already own will result in a
1287 		 * deadlock.
1288 		 */
1289 		if (lockmgr_xlocked(lk)) {
1290 			if (flags & LK_INTERLOCK)
1291 				class->lc_unlock(ilk);
1292 			panic("%s: draining %s with the lock held @ %s:%d\n",
1293 			    __func__, iwmesg, file, line);
1294 		}
1295 
1296 		for (;;) {
1297 			if (lk->lk_lock == LK_UNLOCKED &&
1298 			    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
1299 				break;
1300 
1301 #ifdef HWPMC_HOOKS
1302 			PMC_SOFT_CALL( , , lock, failed);
1303 #endif
1304 			lock_profile_obtain_lock_failed(&lk->lock_object,
1305 			    &contested, &waittime);
1306 
1307 			/*
1308 			 * If the lock is expected to not sleep just give up
1309 			 * and return.
1310 			 */
1311 			if (LK_TRYOP(flags)) {
1312 				LOCK_LOG2(lk, "%s: %p fails the try operation",
1313 				    __func__, lk);
1314 				error = EBUSY;
1315 				break;
1316 			}
1317 
1318 			/*
1319 			 * Acquire the sleepqueue chain lock because we
1320 			 * probabilly will need to manipulate waiters flags.
1321 			 */
1322 			sleepq_lock(&lk->lock_object);
1323 			x = lk->lk_lock;
1324 
1325 			/*
1326 			 * if the lock has been released while we spun on
1327 			 * the sleepqueue chain lock just try again.
1328 			 */
1329 			if (x == LK_UNLOCKED) {
1330 				sleepq_release(&lk->lock_object);
1331 				continue;
1332 			}
1333 
1334 			v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1335 			if ((x & ~v) == LK_UNLOCKED) {
1336 				v = (x & ~LK_EXCLUSIVE_SPINNERS);
1337 
1338 				/*
1339 				 * If interruptible sleeps left the exclusive
1340 				 * queue empty avoid a starvation for the
1341 				 * threads sleeping on the shared queue by
1342 				 * giving them precedence and cleaning up the
1343 				 * exclusive waiters bit anyway.
1344 				 * Please note that lk_exslpfail count may be
1345 				 * lying about the real number of waiters with
1346 				 * the LK_SLEEPFAIL flag on because they may
1347 				 * be used in conjunction with interruptible
1348 				 * sleeps so lk_exslpfail might be considered
1349 				 * an 'upper limit' bound, including the edge
1350 				 * cases.
1351 				 */
1352 				if (v & LK_EXCLUSIVE_WAITERS) {
1353 					queue = SQ_EXCLUSIVE_QUEUE;
1354 					v &= ~LK_EXCLUSIVE_WAITERS;
1355 				} else {
1356 
1357 					/*
1358 					 * Exclusive waiters sleeping with
1359 					 * LK_SLEEPFAIL on and using
1360 					 * interruptible sleeps/timeout may
1361 					 * have left spourious lk_exslpfail
1362 					 * counts on, so clean it up anyway.
1363 					 */
1364 					MPASS(v & LK_SHARED_WAITERS);
1365 					lk->lk_exslpfail = 0;
1366 					queue = SQ_SHARED_QUEUE;
1367 					v &= ~LK_SHARED_WAITERS;
1368 				}
1369 				if (queue == SQ_EXCLUSIVE_QUEUE) {
1370 					realexslp =
1371 					    sleepq_sleepcnt(&lk->lock_object,
1372 					    SQ_EXCLUSIVE_QUEUE);
1373 					if (lk->lk_exslpfail >= realexslp) {
1374 						lk->lk_exslpfail = 0;
1375 						queue = SQ_SHARED_QUEUE;
1376 						v &= ~LK_SHARED_WAITERS;
1377 						if (realexslp != 0) {
1378 							LOCK_LOG2(lk,
1379 					"%s: %p has only LK_SLEEPFAIL sleepers",
1380 							    __func__, lk);
1381 							LOCK_LOG2(lk,
1382 			"%s: %p waking up threads on the exclusive queue",
1383 							    __func__, lk);
1384 							wakeup_swapper =
1385 							    sleepq_broadcast(
1386 							    &lk->lock_object,
1387 							    SLEEPQ_LK, 0,
1388 							    SQ_EXCLUSIVE_QUEUE);
1389 						}
1390 					} else
1391 						lk->lk_exslpfail = 0;
1392 				}
1393 				if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1394 					sleepq_release(&lk->lock_object);
1395 					continue;
1396 				}
1397 				LOCK_LOG3(lk,
1398 				"%s: %p waking up all threads on the %s queue",
1399 				    __func__, lk, queue == SQ_SHARED_QUEUE ?
1400 				    "shared" : "exclusive");
1401 				wakeup_swapper |= sleepq_broadcast(
1402 				    &lk->lock_object, SLEEPQ_LK, 0, queue);
1403 
1404 				/*
1405 				 * If shared waiters have been woken up we need
1406 				 * to wait for one of them to acquire the lock
1407 				 * before to set the exclusive waiters in
1408 				 * order to avoid a deadlock.
1409 				 */
1410 				if (queue == SQ_SHARED_QUEUE) {
1411 					for (v = lk->lk_lock;
1412 					    (v & LK_SHARE) && !LK_SHARERS(v);
1413 					    v = lk->lk_lock)
1414 						cpu_spinwait();
1415 				}
1416 			}
1417 
1418 			/*
1419 			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1420 			 * fail, loop back and retry.
1421 			 */
1422 			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1423 				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1424 				    x | LK_EXCLUSIVE_WAITERS)) {
1425 					sleepq_release(&lk->lock_object);
1426 					continue;
1427 				}
1428 				LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1429 				    __func__, lk);
1430 			}
1431 
1432 			/*
1433 			 * As far as we have been unable to acquire the
1434 			 * exclusive lock and the exclusive waiters flag
1435 			 * is set, we will sleep.
1436 			 */
1437 			if (flags & LK_INTERLOCK) {
1438 				class->lc_unlock(ilk);
1439 				flags &= ~LK_INTERLOCK;
1440 			}
1441 			GIANT_SAVE();
1442 			sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1443 			    SQ_EXCLUSIVE_QUEUE);
1444 			sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1445 			GIANT_RESTORE();
1446 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1447 			    __func__, lk);
1448 		}
1449 
1450 		if (error == 0) {
1451 			lock_profile_obtain_lock_success(&lk->lock_object,
1452 			    contested, waittime, file, line);
1453 			LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1454 			    lk->lk_recurse, file, line);
1455 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1456 			    LK_TRYWIT(flags), file, line);
1457 			TD_LOCKS_INC(curthread);
1458 			STACK_SAVE(lk);
1459 		}
1460 		break;
1461 	default:
1462 		if (flags & LK_INTERLOCK)
1463 			class->lc_unlock(ilk);
1464 		panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1465 	}
1466 
1467 	if (flags & LK_INTERLOCK)
1468 		class->lc_unlock(ilk);
1469 	if (wakeup_swapper)
1470 		kick_proc0();
1471 
1472 	return (error);
1473 }
1474 
1475 void
1476 _lockmgr_disown(struct lock *lk, const char *file, int line)
1477 {
1478 	uintptr_t tid, x;
1479 
1480 	if (SCHEDULER_STOPPED())
1481 		return;
1482 
1483 	tid = (uintptr_t)curthread;
1484 	_lockmgr_assert(lk, KA_XLOCKED, file, line);
1485 
1486 	/*
1487 	 * Panic if the lock is recursed.
1488 	 */
1489 	if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
1490 		panic("%s: disown a recursed lockmgr @ %s:%d\n",
1491 		    __func__,  file, line);
1492 
1493 	/*
1494 	 * If the owner is already LK_KERNPROC just skip the whole operation.
1495 	 */
1496 	if (LK_HOLDER(lk->lk_lock) != tid)
1497 		return;
1498 	lock_profile_release_lock(&lk->lock_object);
1499 	LOCKSTAT_RECORD1(lockmgr__disown, lk, LOCKSTAT_WRITER);
1500 	LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1501 	WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1502 	TD_LOCKS_DEC(curthread);
1503 	STACK_SAVE(lk);
1504 
1505 	/*
1506 	 * In order to preserve waiters flags, just spin.
1507 	 */
1508 	for (;;) {
1509 		x = lk->lk_lock;
1510 		MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1511 		x &= LK_ALL_WAITERS;
1512 		if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1513 		    LK_KERNPROC | x))
1514 			return;
1515 		cpu_spinwait();
1516 	}
1517 }
1518 
1519 void
1520 lockmgr_printinfo(const struct lock *lk)
1521 {
1522 	struct thread *td;
1523 	uintptr_t x;
1524 
1525 	if (lk->lk_lock == LK_UNLOCKED)
1526 		printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1527 	else if (lk->lk_lock & LK_SHARE)
1528 		printf("lock type %s: SHARED (count %ju)\n",
1529 		    lk->lock_object.lo_name,
1530 		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1531 	else {
1532 		td = lockmgr_xholder(lk);
1533 		if (td == (struct thread *)LK_KERNPROC)
1534 			printf("lock type %s: EXCL by KERNPROC\n",
1535 			    lk->lock_object.lo_name);
1536 		else
1537 			printf("lock type %s: EXCL by thread %p "
1538 			    "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
1539 			    td, td->td_proc->p_pid, td->td_proc->p_comm,
1540 			    td->td_tid);
1541 	}
1542 
1543 	x = lk->lk_lock;
1544 	if (x & LK_EXCLUSIVE_WAITERS)
1545 		printf(" with exclusive waiters pending\n");
1546 	if (x & LK_SHARED_WAITERS)
1547 		printf(" with shared waiters pending\n");
1548 	if (x & LK_EXCLUSIVE_SPINNERS)
1549 		printf(" with exclusive spinners pending\n");
1550 
1551 	STACK_PRINT(lk);
1552 }
1553 
1554 int
1555 lockstatus(const struct lock *lk)
1556 {
1557 	uintptr_t v, x;
1558 	int ret;
1559 
1560 	ret = LK_SHARED;
1561 	x = lk->lk_lock;
1562 	v = LK_HOLDER(x);
1563 
1564 	if ((x & LK_SHARE) == 0) {
1565 		if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1566 			ret = LK_EXCLUSIVE;
1567 		else
1568 			ret = LK_EXCLOTHER;
1569 	} else if (x == LK_UNLOCKED)
1570 		ret = 0;
1571 
1572 	return (ret);
1573 }
1574 
1575 #ifdef INVARIANT_SUPPORT
1576 
1577 FEATURE(invariant_support,
1578     "Support for modules compiled with INVARIANTS option");
1579 
1580 #ifndef INVARIANTS
1581 #undef	_lockmgr_assert
1582 #endif
1583 
1584 void
1585 _lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
1586 {
1587 	int slocked = 0;
1588 
1589 	if (panicstr != NULL)
1590 		return;
1591 	switch (what) {
1592 	case KA_SLOCKED:
1593 	case KA_SLOCKED | KA_NOTRECURSED:
1594 	case KA_SLOCKED | KA_RECURSED:
1595 		slocked = 1;
1596 	case KA_LOCKED:
1597 	case KA_LOCKED | KA_NOTRECURSED:
1598 	case KA_LOCKED | KA_RECURSED:
1599 #ifdef WITNESS
1600 
1601 		/*
1602 		 * We cannot trust WITNESS if the lock is held in exclusive
1603 		 * mode and a call to lockmgr_disown() happened.
1604 		 * Workaround this skipping the check if the lock is held in
1605 		 * exclusive mode even for the KA_LOCKED case.
1606 		 */
1607 		if (slocked || (lk->lk_lock & LK_SHARE)) {
1608 			witness_assert(&lk->lock_object, what, file, line);
1609 			break;
1610 		}
1611 #endif
1612 		if (lk->lk_lock == LK_UNLOCKED ||
1613 		    ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1614 		    (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1615 			panic("Lock %s not %slocked @ %s:%d\n",
1616 			    lk->lock_object.lo_name, slocked ? "share" : "",
1617 			    file, line);
1618 
1619 		if ((lk->lk_lock & LK_SHARE) == 0) {
1620 			if (lockmgr_recursed(lk)) {
1621 				if (what & KA_NOTRECURSED)
1622 					panic("Lock %s recursed @ %s:%d\n",
1623 					    lk->lock_object.lo_name, file,
1624 					    line);
1625 			} else if (what & KA_RECURSED)
1626 				panic("Lock %s not recursed @ %s:%d\n",
1627 				    lk->lock_object.lo_name, file, line);
1628 		}
1629 		break;
1630 	case KA_XLOCKED:
1631 	case KA_XLOCKED | KA_NOTRECURSED:
1632 	case KA_XLOCKED | KA_RECURSED:
1633 		if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1634 			panic("Lock %s not exclusively locked @ %s:%d\n",
1635 			    lk->lock_object.lo_name, file, line);
1636 		if (lockmgr_recursed(lk)) {
1637 			if (what & KA_NOTRECURSED)
1638 				panic("Lock %s recursed @ %s:%d\n",
1639 				    lk->lock_object.lo_name, file, line);
1640 		} else if (what & KA_RECURSED)
1641 			panic("Lock %s not recursed @ %s:%d\n",
1642 			    lk->lock_object.lo_name, file, line);
1643 		break;
1644 	case KA_UNLOCKED:
1645 		if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1646 			panic("Lock %s exclusively locked @ %s:%d\n",
1647 			    lk->lock_object.lo_name, file, line);
1648 		break;
1649 	default:
1650 		panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1651 		    line);
1652 	}
1653 }
1654 #endif
1655 
1656 #ifdef DDB
1657 int
1658 lockmgr_chain(struct thread *td, struct thread **ownerp)
1659 {
1660 	struct lock *lk;
1661 
1662 	lk = td->td_wchan;
1663 
1664 	if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1665 		return (0);
1666 	db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1667 	if (lk->lk_lock & LK_SHARE)
1668 		db_printf("SHARED (count %ju)\n",
1669 		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1670 	else
1671 		db_printf("EXCL\n");
1672 	*ownerp = lockmgr_xholder(lk);
1673 
1674 	return (1);
1675 }
1676 
1677 static void
1678 db_show_lockmgr(const struct lock_object *lock)
1679 {
1680 	struct thread *td;
1681 	const struct lock *lk;
1682 
1683 	lk = (const struct lock *)lock;
1684 
1685 	db_printf(" state: ");
1686 	if (lk->lk_lock == LK_UNLOCKED)
1687 		db_printf("UNLOCKED\n");
1688 	else if (lk->lk_lock & LK_SHARE)
1689 		db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1690 	else {
1691 		td = lockmgr_xholder(lk);
1692 		if (td == (struct thread *)LK_KERNPROC)
1693 			db_printf("XLOCK: LK_KERNPROC\n");
1694 		else
1695 			db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1696 			    td->td_tid, td->td_proc->p_pid,
1697 			    td->td_proc->p_comm);
1698 		if (lockmgr_recursed(lk))
1699 			db_printf(" recursed: %d\n", lk->lk_recurse);
1700 	}
1701 	db_printf(" waiters: ");
1702 	switch (lk->lk_lock & LK_ALL_WAITERS) {
1703 	case LK_SHARED_WAITERS:
1704 		db_printf("shared\n");
1705 		break;
1706 	case LK_EXCLUSIVE_WAITERS:
1707 		db_printf("exclusive\n");
1708 		break;
1709 	case LK_ALL_WAITERS:
1710 		db_printf("shared and exclusive\n");
1711 		break;
1712 	default:
1713 		db_printf("none\n");
1714 	}
1715 	db_printf(" spinners: ");
1716 	if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1717 		db_printf("exclusive\n");
1718 	else
1719 		db_printf("none\n");
1720 }
1721 #endif
1722