xref: /freebsd/sys/kern/kern_umtx.c (revision 39beb93c)
1 /*-
2  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice unmodified, this list of conditions, and the following
11  *    disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_compat.h"
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/limits.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/mutex.h>
38 #include <sys/priv.h>
39 #include <sys/proc.h>
40 #include <sys/sched.h>
41 #include <sys/smp.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysent.h>
44 #include <sys/systm.h>
45 #include <sys/sysproto.h>
46 #include <sys/eventhandler.h>
47 #include <sys/umtx.h>
48 
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_object.h>
54 
55 #include <machine/cpu.h>
56 
57 #ifdef COMPAT_IA32
58 #include <compat/freebsd32/freebsd32_proto.h>
59 #endif
60 
61 #define TYPE_SIMPLE_WAIT	0
62 #define TYPE_CV			1
63 #define TYPE_SIMPLE_LOCK	2
64 #define TYPE_NORMAL_UMUTEX	3
65 #define TYPE_PI_UMUTEX		4
66 #define TYPE_PP_UMUTEX		5
67 #define TYPE_RWLOCK		6
68 
69 #define _UMUTEX_TRY		1
70 #define _UMUTEX_WAIT		2
71 
72 /* Key to represent a unique userland synchronous object */
73 struct umtx_key {
74 	int	hash;
75 	int	type;
76 	int	shared;
77 	union {
78 		struct {
79 			vm_object_t	object;
80 			uintptr_t	offset;
81 		} shared;
82 		struct {
83 			struct vmspace	*vs;
84 			uintptr_t	addr;
85 		} private;
86 		struct {
87 			void		*a;
88 			uintptr_t	b;
89 		} both;
90 	} info;
91 };
92 
93 /* Priority inheritance mutex info. */
94 struct umtx_pi {
95 	/* Owner thread */
96 	struct thread		*pi_owner;
97 
98 	/* Reference count */
99 	int			pi_refcount;
100 
101  	/* List entry to link umtx holding by thread */
102 	TAILQ_ENTRY(umtx_pi)	pi_link;
103 
104 	/* List entry in hash */
105 	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
106 
107 	/* List for waiters */
108 	TAILQ_HEAD(,umtx_q)	pi_blocked;
109 
110 	/* Identify a userland lock object */
111 	struct umtx_key		pi_key;
112 };
113 
114 /* A userland synchronous object user. */
115 struct umtx_q {
116 	/* Linked list for the hash. */
117 	TAILQ_ENTRY(umtx_q)	uq_link;
118 
119 	/* Umtx key. */
120 	struct umtx_key		uq_key;
121 
122 	/* Umtx flags. */
123 	int			uq_flags;
124 #define UQF_UMTXQ	0x0001
125 
126 	/* The thread waits on. */
127 	struct thread		*uq_thread;
128 
129 	/*
130 	 * Blocked on PI mutex. read can use chain lock
131 	 * or umtx_lock, write must have both chain lock and
132 	 * umtx_lock being hold.
133 	 */
134 	struct umtx_pi		*uq_pi_blocked;
135 
136 	/* On blocked list */
137 	TAILQ_ENTRY(umtx_q)	uq_lockq;
138 
139 	/* Thread contending with us */
140 	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
141 
142 	/* Inherited priority from PP mutex */
143 	u_char			uq_inherited_pri;
144 };
145 
146 TAILQ_HEAD(umtxq_head, umtx_q);
147 
148 /* Userland lock object's wait-queue chain */
149 struct umtxq_chain {
150 	/* Lock for this chain. */
151 	struct mtx		uc_lock;
152 
153 	/* List of sleep queues. */
154 	struct umtxq_head	uc_queue[2];
155 #define UMTX_SHARED_QUEUE	0
156 #define UMTX_EXCLUSIVE_QUEUE	1
157 
158 	/* Busy flag */
159 	char			uc_busy;
160 
161 	/* Chain lock waiters */
162 	int			uc_waiters;
163 
164 	/* All PI in the list */
165 	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
166 };
167 
168 #define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
169 
170 /*
171  * Don't propagate time-sharing priority, there is a security reason,
172  * a user can simply introduce PI-mutex, let thread A lock the mutex,
173  * and let another thread B block on the mutex, because B is
174  * sleeping, its priority will be boosted, this causes A's priority to
175  * be boosted via priority propagating too and will never be lowered even
176  * if it is using 100%CPU, this is unfair to other processes.
177  */
178 
179 #define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
180 			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
181 			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
182 
183 #define	GOLDEN_RATIO_PRIME	2654404609U
184 #define	UMTX_CHAINS		128
185 #define	UMTX_SHIFTS		(__WORD_BIT - 7)
186 
187 #define THREAD_SHARE		0
188 #define PROCESS_SHARE		1
189 #define AUTO_SHARE		2
190 
191 #define	GET_SHARE(flags)	\
192     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
193 
194 #define BUSY_SPINS		200
195 
196 static uma_zone_t		umtx_pi_zone;
197 static struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
198 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
199 static int			umtx_pi_allocated;
200 
201 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
202 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
203     &umtx_pi_allocated, 0, "Allocated umtx_pi");
204 
205 static void umtxq_sysinit(void *);
206 static void umtxq_hash(struct umtx_key *key);
207 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
208 static void umtxq_lock(struct umtx_key *key);
209 static void umtxq_unlock(struct umtx_key *key);
210 static void umtxq_busy(struct umtx_key *key);
211 static void umtxq_unbusy(struct umtx_key *key);
212 static void umtxq_insert_queue(struct umtx_q *uq, int q);
213 static void umtxq_remove_queue(struct umtx_q *uq, int q);
214 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
215 static int umtxq_count(struct umtx_key *key);
216 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
217 static int umtx_key_get(void *addr, int type, int share,
218 	struct umtx_key *key);
219 static void umtx_key_release(struct umtx_key *key);
220 static struct umtx_pi *umtx_pi_alloc(int);
221 static void umtx_pi_free(struct umtx_pi *pi);
222 static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
223 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
224 static void umtx_thread_cleanup(struct thread *td);
225 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
226 	struct image_params *imgp __unused);
227 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
228 
229 #define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
230 #define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
231 #define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
232 
233 static struct mtx umtx_lock;
234 
235 static void
236 umtxq_sysinit(void *arg __unused)
237 {
238 	int i, j;
239 
240 	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
241 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
242 	for (i = 0; i < 2; ++i) {
243 		for (j = 0; j < UMTX_CHAINS; ++j) {
244 			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
245 				 MTX_DEF | MTX_DUPOK);
246 			TAILQ_INIT(&umtxq_chains[i][j].uc_queue[0]);
247 			TAILQ_INIT(&umtxq_chains[i][j].uc_queue[1]);
248 			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
249 			umtxq_chains[i][j].uc_busy = 0;
250 			umtxq_chains[i][j].uc_waiters = 0;
251 		}
252 	}
253 	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
254 	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
255 	    EVENTHANDLER_PRI_ANY);
256 }
257 
258 struct umtx_q *
259 umtxq_alloc(void)
260 {
261 	struct umtx_q *uq;
262 
263 	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
264 	TAILQ_INIT(&uq->uq_pi_contested);
265 	uq->uq_inherited_pri = PRI_MAX;
266 	return (uq);
267 }
268 
269 void
270 umtxq_free(struct umtx_q *uq)
271 {
272 	free(uq, M_UMTX);
273 }
274 
275 static inline void
276 umtxq_hash(struct umtx_key *key)
277 {
278 	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
279 	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
280 }
281 
282 static inline int
283 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
284 {
285 	return (k1->type == k2->type &&
286 		k1->info.both.a == k2->info.both.a &&
287 	        k1->info.both.b == k2->info.both.b);
288 }
289 
290 static inline struct umtxq_chain *
291 umtxq_getchain(struct umtx_key *key)
292 {
293 	if (key->type <= TYPE_CV)
294 		return (&umtxq_chains[1][key->hash]);
295 	return (&umtxq_chains[0][key->hash]);
296 }
297 
298 /*
299  * Lock a chain.
300  */
301 static inline void
302 umtxq_lock(struct umtx_key *key)
303 {
304 	struct umtxq_chain *uc;
305 
306 	uc = umtxq_getchain(key);
307 	mtx_lock(&uc->uc_lock);
308 }
309 
310 /*
311  * Unlock a chain.
312  */
313 static inline void
314 umtxq_unlock(struct umtx_key *key)
315 {
316 	struct umtxq_chain *uc;
317 
318 	uc = umtxq_getchain(key);
319 	mtx_unlock(&uc->uc_lock);
320 }
321 
322 /*
323  * Set chain to busy state when following operation
324  * may be blocked (kernel mutex can not be used).
325  */
326 static inline void
327 umtxq_busy(struct umtx_key *key)
328 {
329 	struct umtxq_chain *uc;
330 
331 	uc = umtxq_getchain(key);
332 	mtx_assert(&uc->uc_lock, MA_OWNED);
333 	if (uc->uc_busy) {
334 #ifdef SMP
335 		if (smp_cpus > 1) {
336 			int count = BUSY_SPINS;
337 			if (count > 0) {
338 				umtxq_unlock(key);
339 				while (uc->uc_busy && --count > 0)
340 					cpu_spinwait();
341 				umtxq_lock(key);
342 			}
343 		}
344 #endif
345 		while (uc->uc_busy) {
346 			uc->uc_waiters++;
347 			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
348 			uc->uc_waiters--;
349 		}
350 	}
351 	uc->uc_busy = 1;
352 }
353 
354 /*
355  * Unbusy a chain.
356  */
357 static inline void
358 umtxq_unbusy(struct umtx_key *key)
359 {
360 	struct umtxq_chain *uc;
361 
362 	uc = umtxq_getchain(key);
363 	mtx_assert(&uc->uc_lock, MA_OWNED);
364 	KASSERT(uc->uc_busy != 0, ("not busy"));
365 	uc->uc_busy = 0;
366 	if (uc->uc_waiters)
367 		wakeup_one(uc);
368 }
369 
370 static inline void
371 umtxq_insert_queue(struct umtx_q *uq, int q)
372 {
373 	struct umtxq_chain *uc;
374 
375 	uc = umtxq_getchain(&uq->uq_key);
376 	UMTXQ_LOCKED_ASSERT(uc);
377 	TAILQ_INSERT_TAIL(&uc->uc_queue[q], uq, uq_link);
378 	uq->uq_flags |= UQF_UMTXQ;
379 }
380 
381 static inline void
382 umtxq_remove_queue(struct umtx_q *uq, int q)
383 {
384 	struct umtxq_chain *uc;
385 
386 	uc = umtxq_getchain(&uq->uq_key);
387 	UMTXQ_LOCKED_ASSERT(uc);
388 	if (uq->uq_flags & UQF_UMTXQ) {
389 		TAILQ_REMOVE(&uc->uc_queue[q], uq, uq_link);
390 		uq->uq_flags &= ~UQF_UMTXQ;
391 	}
392 }
393 
394 /*
395  * Check if there are multiple waiters
396  */
397 static int
398 umtxq_count(struct umtx_key *key)
399 {
400 	struct umtxq_chain *uc;
401 	struct umtx_q *uq;
402 	int count = 0;
403 
404 	uc = umtxq_getchain(key);
405 	UMTXQ_LOCKED_ASSERT(uc);
406 	TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
407 		if (umtx_key_match(&uq->uq_key, key)) {
408 			if (++count > 1)
409 				break;
410 		}
411 	}
412 	return (count);
413 }
414 
415 /*
416  * Check if there are multiple PI waiters and returns first
417  * waiter.
418  */
419 static int
420 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
421 {
422 	struct umtxq_chain *uc;
423 	struct umtx_q *uq;
424 	int count = 0;
425 
426 	*first = NULL;
427 	uc = umtxq_getchain(key);
428 	UMTXQ_LOCKED_ASSERT(uc);
429 	TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
430 		if (umtx_key_match(&uq->uq_key, key)) {
431 			if (++count > 1)
432 				break;
433 			*first = uq;
434 		}
435 	}
436 	return (count);
437 }
438 
439 /*
440  * Wake up threads waiting on an userland object.
441  */
442 
443 static int
444 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
445 {
446 	struct umtxq_chain *uc;
447 	struct umtx_q *uq, *next;
448 	int ret;
449 
450 	ret = 0;
451 	uc = umtxq_getchain(key);
452 	UMTXQ_LOCKED_ASSERT(uc);
453 	TAILQ_FOREACH_SAFE(uq, &uc->uc_queue[q], uq_link, next) {
454 		if (umtx_key_match(&uq->uq_key, key)) {
455 			umtxq_remove_queue(uq, q);
456 			wakeup(uq);
457 			if (++ret >= n_wake)
458 				break;
459 		}
460 	}
461 	return (ret);
462 }
463 
464 
465 /*
466  * Wake up specified thread.
467  */
468 static inline void
469 umtxq_signal_thread(struct umtx_q *uq)
470 {
471 	struct umtxq_chain *uc;
472 
473 	uc = umtxq_getchain(&uq->uq_key);
474 	UMTXQ_LOCKED_ASSERT(uc);
475 	umtxq_remove(uq);
476 	wakeup(uq);
477 }
478 
479 /*
480  * Put thread into sleep state, before sleeping, check if
481  * thread was removed from umtx queue.
482  */
483 static inline int
484 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
485 {
486 	struct umtxq_chain *uc;
487 	int error;
488 
489 	uc = umtxq_getchain(&uq->uq_key);
490 	UMTXQ_LOCKED_ASSERT(uc);
491 	if (!(uq->uq_flags & UQF_UMTXQ))
492 		return (0);
493 	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
494 	if (error == EWOULDBLOCK)
495 		error = ETIMEDOUT;
496 	return (error);
497 }
498 
499 /*
500  * Convert userspace address into unique logical address.
501  */
502 static int
503 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
504 {
505 	struct thread *td = curthread;
506 	vm_map_t map;
507 	vm_map_entry_t entry;
508 	vm_pindex_t pindex;
509 	vm_prot_t prot;
510 	boolean_t wired;
511 
512 	key->type = type;
513 	if (share == THREAD_SHARE) {
514 		key->shared = 0;
515 		key->info.private.vs = td->td_proc->p_vmspace;
516 		key->info.private.addr = (uintptr_t)addr;
517 	} else {
518 		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
519 		map = &td->td_proc->p_vmspace->vm_map;
520 		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
521 		    &entry, &key->info.shared.object, &pindex, &prot,
522 		    &wired) != KERN_SUCCESS) {
523 			return EFAULT;
524 		}
525 
526 		if ((share == PROCESS_SHARE) ||
527 		    (share == AUTO_SHARE &&
528 		     VM_INHERIT_SHARE == entry->inheritance)) {
529 			key->shared = 1;
530 			key->info.shared.offset = entry->offset + entry->start -
531 				(vm_offset_t)addr;
532 			vm_object_reference(key->info.shared.object);
533 		} else {
534 			key->shared = 0;
535 			key->info.private.vs = td->td_proc->p_vmspace;
536 			key->info.private.addr = (uintptr_t)addr;
537 		}
538 		vm_map_lookup_done(map, entry);
539 	}
540 
541 	umtxq_hash(key);
542 	return (0);
543 }
544 
545 /*
546  * Release key.
547  */
548 static inline void
549 umtx_key_release(struct umtx_key *key)
550 {
551 	if (key->shared)
552 		vm_object_deallocate(key->info.shared.object);
553 }
554 
555 /*
556  * Lock a umtx object.
557  */
558 static int
559 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
560 {
561 	struct umtx_q *uq;
562 	u_long owner;
563 	u_long old;
564 	int error = 0;
565 
566 	uq = td->td_umtxq;
567 
568 	/*
569 	 * Care must be exercised when dealing with umtx structure. It
570 	 * can fault on any access.
571 	 */
572 	for (;;) {
573 		/*
574 		 * Try the uncontested case.  This should be done in userland.
575 		 */
576 		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
577 
578 		/* The acquire succeeded. */
579 		if (owner == UMTX_UNOWNED)
580 			return (0);
581 
582 		/* The address was invalid. */
583 		if (owner == -1)
584 			return (EFAULT);
585 
586 		/* If no one owns it but it is contested try to acquire it. */
587 		if (owner == UMTX_CONTESTED) {
588 			owner = casuword(&umtx->u_owner,
589 			    UMTX_CONTESTED, id | UMTX_CONTESTED);
590 
591 			if (owner == UMTX_CONTESTED)
592 				return (0);
593 
594 			/* The address was invalid. */
595 			if (owner == -1)
596 				return (EFAULT);
597 
598 			/* If this failed the lock has changed, restart. */
599 			continue;
600 		}
601 
602 		/*
603 		 * If we caught a signal, we have retried and now
604 		 * exit immediately.
605 		 */
606 		if (error != 0)
607 			return (error);
608 
609 		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
610 			AUTO_SHARE, &uq->uq_key)) != 0)
611 			return (error);
612 
613 		umtxq_lock(&uq->uq_key);
614 		umtxq_busy(&uq->uq_key);
615 		umtxq_insert(uq);
616 		umtxq_unbusy(&uq->uq_key);
617 		umtxq_unlock(&uq->uq_key);
618 
619 		/*
620 		 * Set the contested bit so that a release in user space
621 		 * knows to use the system call for unlock.  If this fails
622 		 * either some one else has acquired the lock or it has been
623 		 * released.
624 		 */
625 		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
626 
627 		/* The address was invalid. */
628 		if (old == -1) {
629 			umtxq_lock(&uq->uq_key);
630 			umtxq_remove(uq);
631 			umtxq_unlock(&uq->uq_key);
632 			umtx_key_release(&uq->uq_key);
633 			return (EFAULT);
634 		}
635 
636 		/*
637 		 * We set the contested bit, sleep. Otherwise the lock changed
638 		 * and we need to retry or we lost a race to the thread
639 		 * unlocking the umtx.
640 		 */
641 		umtxq_lock(&uq->uq_key);
642 		if (old == owner)
643 			error = umtxq_sleep(uq, "umtx", timo);
644 		umtxq_remove(uq);
645 		umtxq_unlock(&uq->uq_key);
646 		umtx_key_release(&uq->uq_key);
647 	}
648 
649 	return (0);
650 }
651 
652 /*
653  * Lock a umtx object.
654  */
655 static int
656 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
657 	struct timespec *timeout)
658 {
659 	struct timespec ts, ts2, ts3;
660 	struct timeval tv;
661 	int error;
662 
663 	if (timeout == NULL) {
664 		error = _do_lock_umtx(td, umtx, id, 0);
665 		/* Mutex locking is restarted if it is interrupted. */
666 		if (error == EINTR)
667 			error = ERESTART;
668 	} else {
669 		getnanouptime(&ts);
670 		timespecadd(&ts, timeout);
671 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
672 		for (;;) {
673 			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
674 			if (error != ETIMEDOUT)
675 				break;
676 			getnanouptime(&ts2);
677 			if (timespeccmp(&ts2, &ts, >=)) {
678 				error = ETIMEDOUT;
679 				break;
680 			}
681 			ts3 = ts;
682 			timespecsub(&ts3, &ts2);
683 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
684 		}
685 		/* Timed-locking is not restarted. */
686 		if (error == ERESTART)
687 			error = EINTR;
688 	}
689 	return (error);
690 }
691 
692 /*
693  * Unlock a umtx object.
694  */
695 static int
696 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
697 {
698 	struct umtx_key key;
699 	u_long owner;
700 	u_long old;
701 	int error;
702 	int count;
703 
704 	/*
705 	 * Make sure we own this mtx.
706 	 */
707 	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
708 	if (owner == -1)
709 		return (EFAULT);
710 
711 	if ((owner & ~UMTX_CONTESTED) != id)
712 		return (EPERM);
713 
714 	/* This should be done in userland */
715 	if ((owner & UMTX_CONTESTED) == 0) {
716 		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
717 		if (old == -1)
718 			return (EFAULT);
719 		if (old == owner)
720 			return (0);
721 		owner = old;
722 	}
723 
724 	/* We should only ever be in here for contested locks */
725 	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
726 		&key)) != 0)
727 		return (error);
728 
729 	umtxq_lock(&key);
730 	umtxq_busy(&key);
731 	count = umtxq_count(&key);
732 	umtxq_unlock(&key);
733 
734 	/*
735 	 * When unlocking the umtx, it must be marked as unowned if
736 	 * there is zero or one thread only waiting for it.
737 	 * Otherwise, it must be marked as contested.
738 	 */
739 	old = casuword(&umtx->u_owner, owner,
740 		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
741 	umtxq_lock(&key);
742 	umtxq_signal(&key,1);
743 	umtxq_unbusy(&key);
744 	umtxq_unlock(&key);
745 	umtx_key_release(&key);
746 	if (old == -1)
747 		return (EFAULT);
748 	if (old != owner)
749 		return (EINVAL);
750 	return (0);
751 }
752 
753 #ifdef COMPAT_IA32
754 
755 /*
756  * Lock a umtx object.
757  */
758 static int
759 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
760 {
761 	struct umtx_q *uq;
762 	uint32_t owner;
763 	uint32_t old;
764 	int error = 0;
765 
766 	uq = td->td_umtxq;
767 
768 	/*
769 	 * Care must be exercised when dealing with umtx structure. It
770 	 * can fault on any access.
771 	 */
772 	for (;;) {
773 		/*
774 		 * Try the uncontested case.  This should be done in userland.
775 		 */
776 		owner = casuword32(m, UMUTEX_UNOWNED, id);
777 
778 		/* The acquire succeeded. */
779 		if (owner == UMUTEX_UNOWNED)
780 			return (0);
781 
782 		/* The address was invalid. */
783 		if (owner == -1)
784 			return (EFAULT);
785 
786 		/* If no one owns it but it is contested try to acquire it. */
787 		if (owner == UMUTEX_CONTESTED) {
788 			owner = casuword32(m,
789 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
790 			if (owner == UMUTEX_CONTESTED)
791 				return (0);
792 
793 			/* The address was invalid. */
794 			if (owner == -1)
795 				return (EFAULT);
796 
797 			/* If this failed the lock has changed, restart. */
798 			continue;
799 		}
800 
801 		/*
802 		 * If we caught a signal, we have retried and now
803 		 * exit immediately.
804 		 */
805 		if (error != 0)
806 			return (error);
807 
808 		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
809 			AUTO_SHARE, &uq->uq_key)) != 0)
810 			return (error);
811 
812 		umtxq_lock(&uq->uq_key);
813 		umtxq_busy(&uq->uq_key);
814 		umtxq_insert(uq);
815 		umtxq_unbusy(&uq->uq_key);
816 		umtxq_unlock(&uq->uq_key);
817 
818 		/*
819 		 * Set the contested bit so that a release in user space
820 		 * knows to use the system call for unlock.  If this fails
821 		 * either some one else has acquired the lock or it has been
822 		 * released.
823 		 */
824 		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
825 
826 		/* The address was invalid. */
827 		if (old == -1) {
828 			umtxq_lock(&uq->uq_key);
829 			umtxq_remove(uq);
830 			umtxq_unlock(&uq->uq_key);
831 			umtx_key_release(&uq->uq_key);
832 			return (EFAULT);
833 		}
834 
835 		/*
836 		 * We set the contested bit, sleep. Otherwise the lock changed
837 		 * and we need to retry or we lost a race to the thread
838 		 * unlocking the umtx.
839 		 */
840 		umtxq_lock(&uq->uq_key);
841 		if (old == owner)
842 			error = umtxq_sleep(uq, "umtx", timo);
843 		umtxq_remove(uq);
844 		umtxq_unlock(&uq->uq_key);
845 		umtx_key_release(&uq->uq_key);
846 	}
847 
848 	return (0);
849 }
850 
851 /*
852  * Lock a umtx object.
853  */
854 static int
855 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
856 	struct timespec *timeout)
857 {
858 	struct timespec ts, ts2, ts3;
859 	struct timeval tv;
860 	int error;
861 
862 	if (timeout == NULL) {
863 		error = _do_lock_umtx32(td, m, id, 0);
864 		/* Mutex locking is restarted if it is interrupted. */
865 		if (error == EINTR)
866 			error = ERESTART;
867 	} else {
868 		getnanouptime(&ts);
869 		timespecadd(&ts, timeout);
870 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
871 		for (;;) {
872 			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
873 			if (error != ETIMEDOUT)
874 				break;
875 			getnanouptime(&ts2);
876 			if (timespeccmp(&ts2, &ts, >=)) {
877 				error = ETIMEDOUT;
878 				break;
879 			}
880 			ts3 = ts;
881 			timespecsub(&ts3, &ts2);
882 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
883 		}
884 		/* Timed-locking is not restarted. */
885 		if (error == ERESTART)
886 			error = EINTR;
887 	}
888 	return (error);
889 }
890 
891 /*
892  * Unlock a umtx object.
893  */
894 static int
895 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
896 {
897 	struct umtx_key key;
898 	uint32_t owner;
899 	uint32_t old;
900 	int error;
901 	int count;
902 
903 	/*
904 	 * Make sure we own this mtx.
905 	 */
906 	owner = fuword32(m);
907 	if (owner == -1)
908 		return (EFAULT);
909 
910 	if ((owner & ~UMUTEX_CONTESTED) != id)
911 		return (EPERM);
912 
913 	/* This should be done in userland */
914 	if ((owner & UMUTEX_CONTESTED) == 0) {
915 		old = casuword32(m, owner, UMUTEX_UNOWNED);
916 		if (old == -1)
917 			return (EFAULT);
918 		if (old == owner)
919 			return (0);
920 		owner = old;
921 	}
922 
923 	/* We should only ever be in here for contested locks */
924 	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
925 		&key)) != 0)
926 		return (error);
927 
928 	umtxq_lock(&key);
929 	umtxq_busy(&key);
930 	count = umtxq_count(&key);
931 	umtxq_unlock(&key);
932 
933 	/*
934 	 * When unlocking the umtx, it must be marked as unowned if
935 	 * there is zero or one thread only waiting for it.
936 	 * Otherwise, it must be marked as contested.
937 	 */
938 	old = casuword32(m, owner,
939 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
940 	umtxq_lock(&key);
941 	umtxq_signal(&key,1);
942 	umtxq_unbusy(&key);
943 	umtxq_unlock(&key);
944 	umtx_key_release(&key);
945 	if (old == -1)
946 		return (EFAULT);
947 	if (old != owner)
948 		return (EINVAL);
949 	return (0);
950 }
951 #endif
952 
953 /*
954  * Fetch and compare value, sleep on the address if value is not changed.
955  */
956 static int
957 do_wait(struct thread *td, void *addr, u_long id,
958 	struct timespec *timeout, int compat32, int is_private)
959 {
960 	struct umtx_q *uq;
961 	struct timespec ts, ts2, ts3;
962 	struct timeval tv;
963 	u_long tmp;
964 	int error = 0;
965 
966 	uq = td->td_umtxq;
967 	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
968 		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
969 		return (error);
970 
971 	umtxq_lock(&uq->uq_key);
972 	umtxq_insert(uq);
973 	umtxq_unlock(&uq->uq_key);
974 	if (compat32 == 0)
975 		tmp = fuword(addr);
976         else
977 		tmp = fuword32(addr);
978 	if (tmp != id) {
979 		umtxq_lock(&uq->uq_key);
980 		umtxq_remove(uq);
981 		umtxq_unlock(&uq->uq_key);
982 	} else if (timeout == NULL) {
983 		umtxq_lock(&uq->uq_key);
984 		error = umtxq_sleep(uq, "uwait", 0);
985 		umtxq_remove(uq);
986 		umtxq_unlock(&uq->uq_key);
987 	} else {
988 		getnanouptime(&ts);
989 		timespecadd(&ts, timeout);
990 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
991 		umtxq_lock(&uq->uq_key);
992 		for (;;) {
993 			error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
994 			if (!(uq->uq_flags & UQF_UMTXQ))
995 				break;
996 			if (error != ETIMEDOUT)
997 				break;
998 			umtxq_unlock(&uq->uq_key);
999 			getnanouptime(&ts2);
1000 			if (timespeccmp(&ts2, &ts, >=)) {
1001 				error = ETIMEDOUT;
1002 				umtxq_lock(&uq->uq_key);
1003 				break;
1004 			}
1005 			ts3 = ts;
1006 			timespecsub(&ts3, &ts2);
1007 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1008 			umtxq_lock(&uq->uq_key);
1009 		}
1010 		umtxq_remove(uq);
1011 		umtxq_unlock(&uq->uq_key);
1012 	}
1013 	umtx_key_release(&uq->uq_key);
1014 	if (error == ERESTART)
1015 		error = EINTR;
1016 	return (error);
1017 }
1018 
1019 /*
1020  * Wake up threads sleeping on the specified address.
1021  */
1022 int
1023 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1024 {
1025 	struct umtx_key key;
1026 	int ret;
1027 
1028 	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1029 		is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1030 		return (ret);
1031 	umtxq_lock(&key);
1032 	ret = umtxq_signal(&key, n_wake);
1033 	umtxq_unlock(&key);
1034 	umtx_key_release(&key);
1035 	return (0);
1036 }
1037 
1038 /*
1039  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1040  */
1041 static int
1042 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1043 	int mode)
1044 {
1045 	struct umtx_q *uq;
1046 	uint32_t owner, old, id;
1047 	int error = 0;
1048 
1049 	id = td->td_tid;
1050 	uq = td->td_umtxq;
1051 
1052 	/*
1053 	 * Care must be exercised when dealing with umtx structure. It
1054 	 * can fault on any access.
1055 	 */
1056 	for (;;) {
1057 		owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1058 		if (mode == _UMUTEX_WAIT) {
1059 			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1060 				return (0);
1061 		} else {
1062 			/*
1063 			 * Try the uncontested case.  This should be done in userland.
1064 			 */
1065 			owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1066 
1067 			/* The acquire succeeded. */
1068 			if (owner == UMUTEX_UNOWNED)
1069 				return (0);
1070 
1071 			/* The address was invalid. */
1072 			if (owner == -1)
1073 				return (EFAULT);
1074 
1075 			/* If no one owns it but it is contested try to acquire it. */
1076 			if (owner == UMUTEX_CONTESTED) {
1077 				owner = casuword32(&m->m_owner,
1078 				    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1079 
1080 				if (owner == UMUTEX_CONTESTED)
1081 					return (0);
1082 
1083 				/* The address was invalid. */
1084 				if (owner == -1)
1085 					return (EFAULT);
1086 
1087 				/* If this failed the lock has changed, restart. */
1088 				continue;
1089 			}
1090 		}
1091 
1092 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1093 		    (owner & ~UMUTEX_CONTESTED) == id)
1094 			return (EDEADLK);
1095 
1096 		if (mode == _UMUTEX_TRY)
1097 			return (EBUSY);
1098 
1099 		/*
1100 		 * If we caught a signal, we have retried and now
1101 		 * exit immediately.
1102 		 */
1103 		if (error != 0)
1104 			return (error);
1105 
1106 		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1107 		    GET_SHARE(flags), &uq->uq_key)) != 0)
1108 			return (error);
1109 
1110 		umtxq_lock(&uq->uq_key);
1111 		umtxq_busy(&uq->uq_key);
1112 		umtxq_insert(uq);
1113 		umtxq_unlock(&uq->uq_key);
1114 
1115 		/*
1116 		 * Set the contested bit so that a release in user space
1117 		 * knows to use the system call for unlock.  If this fails
1118 		 * either some one else has acquired the lock or it has been
1119 		 * released.
1120 		 */
1121 		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1122 
1123 		/* The address was invalid. */
1124 		if (old == -1) {
1125 			umtxq_lock(&uq->uq_key);
1126 			umtxq_remove(uq);
1127 			umtxq_unbusy(&uq->uq_key);
1128 			umtxq_unlock(&uq->uq_key);
1129 			umtx_key_release(&uq->uq_key);
1130 			return (EFAULT);
1131 		}
1132 
1133 		/*
1134 		 * We set the contested bit, sleep. Otherwise the lock changed
1135 		 * and we need to retry or we lost a race to the thread
1136 		 * unlocking the umtx.
1137 		 */
1138 		umtxq_lock(&uq->uq_key);
1139 		umtxq_unbusy(&uq->uq_key);
1140 		if (old == owner)
1141 			error = umtxq_sleep(uq, "umtxn", timo);
1142 		umtxq_remove(uq);
1143 		umtxq_unlock(&uq->uq_key);
1144 		umtx_key_release(&uq->uq_key);
1145 	}
1146 
1147 	return (0);
1148 }
1149 
1150 /*
1151  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1152  */
1153 /*
1154  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1155  */
1156 static int
1157 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1158 {
1159 	struct umtx_key key;
1160 	uint32_t owner, old, id;
1161 	int error;
1162 	int count;
1163 
1164 	id = td->td_tid;
1165 	/*
1166 	 * Make sure we own this mtx.
1167 	 */
1168 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1169 	if (owner == -1)
1170 		return (EFAULT);
1171 
1172 	if ((owner & ~UMUTEX_CONTESTED) != id)
1173 		return (EPERM);
1174 
1175 	if ((owner & UMUTEX_CONTESTED) == 0) {
1176 		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1177 		if (old == -1)
1178 			return (EFAULT);
1179 		if (old == owner)
1180 			return (0);
1181 		owner = old;
1182 	}
1183 
1184 	/* We should only ever be in here for contested locks */
1185 	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1186 	    &key)) != 0)
1187 		return (error);
1188 
1189 	umtxq_lock(&key);
1190 	umtxq_busy(&key);
1191 	count = umtxq_count(&key);
1192 	umtxq_unlock(&key);
1193 
1194 	/*
1195 	 * When unlocking the umtx, it must be marked as unowned if
1196 	 * there is zero or one thread only waiting for it.
1197 	 * Otherwise, it must be marked as contested.
1198 	 */
1199 	old = casuword32(&m->m_owner, owner,
1200 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1201 	umtxq_lock(&key);
1202 	umtxq_signal(&key,1);
1203 	umtxq_unbusy(&key);
1204 	umtxq_unlock(&key);
1205 	umtx_key_release(&key);
1206 	if (old == -1)
1207 		return (EFAULT);
1208 	if (old != owner)
1209 		return (EINVAL);
1210 	return (0);
1211 }
1212 
1213 /*
1214  * Check if the mutex is available and wake up a waiter,
1215  * only for simple mutex.
1216  */
1217 static int
1218 do_wake_umutex(struct thread *td, struct umutex *m)
1219 {
1220 	struct umtx_key key;
1221 	uint32_t owner;
1222 	uint32_t flags;
1223 	int error;
1224 	int count;
1225 
1226 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1227 	if (owner == -1)
1228 		return (EFAULT);
1229 
1230 	if ((owner & ~UMUTEX_CONTESTED) != 0)
1231 		return (0);
1232 
1233 	flags = fuword32(&m->m_flags);
1234 
1235 	/* We should only ever be in here for contested locks */
1236 	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1237 	    &key)) != 0)
1238 		return (error);
1239 
1240 	umtxq_lock(&key);
1241 	umtxq_busy(&key);
1242 	count = umtxq_count(&key);
1243 	umtxq_unlock(&key);
1244 
1245 	if (count <= 1)
1246 		owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1247 
1248 	umtxq_lock(&key);
1249 	if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1250 		umtxq_signal(&key, 1);
1251 	umtxq_unbusy(&key);
1252 	umtxq_unlock(&key);
1253 	umtx_key_release(&key);
1254 	return (0);
1255 }
1256 
1257 static inline struct umtx_pi *
1258 umtx_pi_alloc(int flags)
1259 {
1260 	struct umtx_pi *pi;
1261 
1262 	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1263 	TAILQ_INIT(&pi->pi_blocked);
1264 	atomic_add_int(&umtx_pi_allocated, 1);
1265 	return (pi);
1266 }
1267 
1268 static inline void
1269 umtx_pi_free(struct umtx_pi *pi)
1270 {
1271 	uma_zfree(umtx_pi_zone, pi);
1272 	atomic_add_int(&umtx_pi_allocated, -1);
1273 }
1274 
1275 /*
1276  * Adjust the thread's position on a pi_state after its priority has been
1277  * changed.
1278  */
1279 static int
1280 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1281 {
1282 	struct umtx_q *uq, *uq1, *uq2;
1283 	struct thread *td1;
1284 
1285 	mtx_assert(&umtx_lock, MA_OWNED);
1286 	if (pi == NULL)
1287 		return (0);
1288 
1289 	uq = td->td_umtxq;
1290 
1291 	/*
1292 	 * Check if the thread needs to be moved on the blocked chain.
1293 	 * It needs to be moved if either its priority is lower than
1294 	 * the previous thread or higher than the next thread.
1295 	 */
1296 	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1297 	uq2 = TAILQ_NEXT(uq, uq_lockq);
1298 	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1299 	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1300 		/*
1301 		 * Remove thread from blocked chain and determine where
1302 		 * it should be moved to.
1303 		 */
1304 		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1305 		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1306 			td1 = uq1->uq_thread;
1307 			MPASS(td1->td_proc->p_magic == P_MAGIC);
1308 			if (UPRI(td1) > UPRI(td))
1309 				break;
1310 		}
1311 
1312 		if (uq1 == NULL)
1313 			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1314 		else
1315 			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1316 	}
1317 	return (1);
1318 }
1319 
1320 /*
1321  * Propagate priority when a thread is blocked on POSIX
1322  * PI mutex.
1323  */
1324 static void
1325 umtx_propagate_priority(struct thread *td)
1326 {
1327 	struct umtx_q *uq;
1328 	struct umtx_pi *pi;
1329 	int pri;
1330 
1331 	mtx_assert(&umtx_lock, MA_OWNED);
1332 	pri = UPRI(td);
1333 	uq = td->td_umtxq;
1334 	pi = uq->uq_pi_blocked;
1335 	if (pi == NULL)
1336 		return;
1337 
1338 	for (;;) {
1339 		td = pi->pi_owner;
1340 		if (td == NULL)
1341 			return;
1342 
1343 		MPASS(td->td_proc != NULL);
1344 		MPASS(td->td_proc->p_magic == P_MAGIC);
1345 
1346 		if (UPRI(td) <= pri)
1347 			return;
1348 
1349 		thread_lock(td);
1350 		sched_lend_user_prio(td, pri);
1351 		thread_unlock(td);
1352 
1353 		/*
1354 		 * Pick up the lock that td is blocked on.
1355 		 */
1356 		uq = td->td_umtxq;
1357 		pi = uq->uq_pi_blocked;
1358 		/* Resort td on the list if needed. */
1359 		if (!umtx_pi_adjust_thread(pi, td))
1360 			break;
1361 	}
1362 }
1363 
1364 /*
1365  * Unpropagate priority for a PI mutex when a thread blocked on
1366  * it is interrupted by signal or resumed by others.
1367  */
1368 static void
1369 umtx_unpropagate_priority(struct umtx_pi *pi)
1370 {
1371 	struct umtx_q *uq, *uq_owner;
1372 	struct umtx_pi *pi2;
1373 	int pri, oldpri;
1374 
1375 	mtx_assert(&umtx_lock, MA_OWNED);
1376 
1377 	while (pi != NULL && pi->pi_owner != NULL) {
1378 		pri = PRI_MAX;
1379 		uq_owner = pi->pi_owner->td_umtxq;
1380 
1381 		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1382 			uq = TAILQ_FIRST(&pi2->pi_blocked);
1383 			if (uq != NULL) {
1384 				if (pri > UPRI(uq->uq_thread))
1385 					pri = UPRI(uq->uq_thread);
1386 			}
1387 		}
1388 
1389 		if (pri > uq_owner->uq_inherited_pri)
1390 			pri = uq_owner->uq_inherited_pri;
1391 		thread_lock(pi->pi_owner);
1392 		oldpri = pi->pi_owner->td_user_pri;
1393 		sched_unlend_user_prio(pi->pi_owner, pri);
1394 		thread_unlock(pi->pi_owner);
1395 		umtx_pi_adjust_locked(pi->pi_owner, oldpri);
1396 		pi = uq_owner->uq_pi_blocked;
1397 	}
1398 }
1399 
1400 /*
1401  * Insert a PI mutex into owned list.
1402  */
1403 static void
1404 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1405 {
1406 	struct umtx_q *uq_owner;
1407 
1408 	uq_owner = owner->td_umtxq;
1409 	mtx_assert(&umtx_lock, MA_OWNED);
1410 	if (pi->pi_owner != NULL)
1411 		panic("pi_ower != NULL");
1412 	pi->pi_owner = owner;
1413 	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1414 }
1415 
1416 /*
1417  * Claim ownership of a PI mutex.
1418  */
1419 static int
1420 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1421 {
1422 	struct umtx_q *uq, *uq_owner;
1423 
1424 	uq_owner = owner->td_umtxq;
1425 	mtx_lock_spin(&umtx_lock);
1426 	if (pi->pi_owner == owner) {
1427 		mtx_unlock_spin(&umtx_lock);
1428 		return (0);
1429 	}
1430 
1431 	if (pi->pi_owner != NULL) {
1432 		/*
1433 		 * userland may have already messed the mutex, sigh.
1434 		 */
1435 		mtx_unlock_spin(&umtx_lock);
1436 		return (EPERM);
1437 	}
1438 	umtx_pi_setowner(pi, owner);
1439 	uq = TAILQ_FIRST(&pi->pi_blocked);
1440 	if (uq != NULL) {
1441 		int pri;
1442 
1443 		pri = UPRI(uq->uq_thread);
1444 		thread_lock(owner);
1445 		if (pri < UPRI(owner))
1446 			sched_lend_user_prio(owner, pri);
1447 		thread_unlock(owner);
1448 	}
1449 	mtx_unlock_spin(&umtx_lock);
1450 	return (0);
1451 }
1452 
1453 static void
1454 umtx_pi_adjust_locked(struct thread *td, u_char oldpri)
1455 {
1456 	struct umtx_q *uq;
1457 	struct umtx_pi *pi;
1458 
1459 	uq = td->td_umtxq;
1460 	/*
1461 	 * Pick up the lock that td is blocked on.
1462 	 */
1463 	pi = uq->uq_pi_blocked;
1464 	MPASS(pi != NULL);
1465 
1466 	/* Resort the turnstile on the list. */
1467 	if (!umtx_pi_adjust_thread(pi, td))
1468 		return;
1469 
1470 	/*
1471 	 * If our priority was lowered and we are at the head of the
1472 	 * turnstile, then propagate our new priority up the chain.
1473 	 */
1474 	if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1475 		umtx_propagate_priority(td);
1476 }
1477 
1478 /*
1479  * Adjust a thread's order position in its blocked PI mutex,
1480  * this may result new priority propagating process.
1481  */
1482 void
1483 umtx_pi_adjust(struct thread *td, u_char oldpri)
1484 {
1485 	struct umtx_q *uq;
1486 	struct umtx_pi *pi;
1487 
1488 	uq = td->td_umtxq;
1489 	mtx_lock_spin(&umtx_lock);
1490 	/*
1491 	 * Pick up the lock that td is blocked on.
1492 	 */
1493 	pi = uq->uq_pi_blocked;
1494 	if (pi != NULL)
1495 		umtx_pi_adjust_locked(td, oldpri);
1496 	mtx_unlock_spin(&umtx_lock);
1497 }
1498 
1499 /*
1500  * Sleep on a PI mutex.
1501  */
1502 static int
1503 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1504 	uint32_t owner, const char *wmesg, int timo)
1505 {
1506 	struct umtxq_chain *uc;
1507 	struct thread *td, *td1;
1508 	struct umtx_q *uq1;
1509 	int pri;
1510 	int error = 0;
1511 
1512 	td = uq->uq_thread;
1513 	KASSERT(td == curthread, ("inconsistent uq_thread"));
1514 	uc = umtxq_getchain(&uq->uq_key);
1515 	UMTXQ_LOCKED_ASSERT(uc);
1516 	umtxq_insert(uq);
1517 	if (pi->pi_owner == NULL) {
1518 		/* XXX
1519 		 * Current, We only support process private PI-mutex,
1520 		 * non-contended PI-mutexes are locked in userland.
1521 		 * Process shared PI-mutex should always be initialized
1522 		 * by kernel and be registered in kernel, locking should
1523 		 * always be done by kernel to avoid security problems.
1524 		 * For process private PI-mutex, we can find owner
1525 		 * thread and boost its priority safely.
1526 		 */
1527 		PROC_LOCK(curproc);
1528 		td1 = thread_find(curproc, owner);
1529 		mtx_lock_spin(&umtx_lock);
1530 		if (td1 != NULL && pi->pi_owner == NULL) {
1531 			uq1 = td1->td_umtxq;
1532 			umtx_pi_setowner(pi, td1);
1533 		}
1534 		PROC_UNLOCK(curproc);
1535 	} else {
1536 		mtx_lock_spin(&umtx_lock);
1537 	}
1538 
1539 	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1540 		pri = UPRI(uq1->uq_thread);
1541 		if (pri > UPRI(td))
1542 			break;
1543 	}
1544 
1545 	if (uq1 != NULL)
1546 		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1547 	else
1548 		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1549 
1550 	uq->uq_pi_blocked = pi;
1551 	thread_lock(td);
1552 	td->td_flags |= TDF_UPIBLOCKED;
1553 	thread_unlock(td);
1554 	mtx_unlock_spin(&umtx_lock);
1555 	umtxq_unlock(&uq->uq_key);
1556 
1557 	mtx_lock_spin(&umtx_lock);
1558 	umtx_propagate_priority(td);
1559 	mtx_unlock_spin(&umtx_lock);
1560 
1561 	umtxq_lock(&uq->uq_key);
1562 	if (uq->uq_flags & UQF_UMTXQ) {
1563 		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1564 		if (error == EWOULDBLOCK)
1565 			error = ETIMEDOUT;
1566 		if (uq->uq_flags & UQF_UMTXQ) {
1567 			umtxq_busy(&uq->uq_key);
1568 			umtxq_remove(uq);
1569 			umtxq_unbusy(&uq->uq_key);
1570 		}
1571 	}
1572 	umtxq_unlock(&uq->uq_key);
1573 
1574 	mtx_lock_spin(&umtx_lock);
1575 	uq->uq_pi_blocked = NULL;
1576 	thread_lock(td);
1577 	td->td_flags &= ~TDF_UPIBLOCKED;
1578 	thread_unlock(td);
1579 	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1580 	umtx_unpropagate_priority(pi);
1581 	mtx_unlock_spin(&umtx_lock);
1582 
1583 	umtxq_lock(&uq->uq_key);
1584 
1585 	return (error);
1586 }
1587 
1588 /*
1589  * Add reference count for a PI mutex.
1590  */
1591 static void
1592 umtx_pi_ref(struct umtx_pi *pi)
1593 {
1594 	struct umtxq_chain *uc;
1595 
1596 	uc = umtxq_getchain(&pi->pi_key);
1597 	UMTXQ_LOCKED_ASSERT(uc);
1598 	pi->pi_refcount++;
1599 }
1600 
1601 /*
1602  * Decrease reference count for a PI mutex, if the counter
1603  * is decreased to zero, its memory space is freed.
1604  */
1605 static void
1606 umtx_pi_unref(struct umtx_pi *pi)
1607 {
1608 	struct umtxq_chain *uc;
1609 	int free = 0;
1610 
1611 	uc = umtxq_getchain(&pi->pi_key);
1612 	UMTXQ_LOCKED_ASSERT(uc);
1613 	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1614 	if (--pi->pi_refcount == 0) {
1615 		mtx_lock_spin(&umtx_lock);
1616 		if (pi->pi_owner != NULL) {
1617 			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1618 				pi, pi_link);
1619 			pi->pi_owner = NULL;
1620 		}
1621 		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1622 			("blocked queue not empty"));
1623 		mtx_unlock_spin(&umtx_lock);
1624 		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1625 		free = 1;
1626 	}
1627 	if (free)
1628 		umtx_pi_free(pi);
1629 }
1630 
1631 /*
1632  * Find a PI mutex in hash table.
1633  */
1634 static struct umtx_pi *
1635 umtx_pi_lookup(struct umtx_key *key)
1636 {
1637 	struct umtxq_chain *uc;
1638 	struct umtx_pi *pi;
1639 
1640 	uc = umtxq_getchain(key);
1641 	UMTXQ_LOCKED_ASSERT(uc);
1642 
1643 	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1644 		if (umtx_key_match(&pi->pi_key, key)) {
1645 			return (pi);
1646 		}
1647 	}
1648 	return (NULL);
1649 }
1650 
1651 /*
1652  * Insert a PI mutex into hash table.
1653  */
1654 static inline void
1655 umtx_pi_insert(struct umtx_pi *pi)
1656 {
1657 	struct umtxq_chain *uc;
1658 
1659 	uc = umtxq_getchain(&pi->pi_key);
1660 	UMTXQ_LOCKED_ASSERT(uc);
1661 	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1662 }
1663 
1664 /*
1665  * Lock a PI mutex.
1666  */
1667 static int
1668 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1669 	int try)
1670 {
1671 	struct umtx_q *uq;
1672 	struct umtx_pi *pi, *new_pi;
1673 	uint32_t id, owner, old;
1674 	int error;
1675 
1676 	id = td->td_tid;
1677 	uq = td->td_umtxq;
1678 
1679 	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1680 	    &uq->uq_key)) != 0)
1681 		return (error);
1682 	umtxq_lock(&uq->uq_key);
1683 	pi = umtx_pi_lookup(&uq->uq_key);
1684 	if (pi == NULL) {
1685 		new_pi = umtx_pi_alloc(M_NOWAIT);
1686 		if (new_pi == NULL) {
1687 			umtxq_unlock(&uq->uq_key);
1688 			new_pi = umtx_pi_alloc(M_WAITOK);
1689 			new_pi->pi_key = uq->uq_key;
1690 			umtxq_lock(&uq->uq_key);
1691 			pi = umtx_pi_lookup(&uq->uq_key);
1692 			if (pi != NULL) {
1693 				umtx_pi_free(new_pi);
1694 				new_pi = NULL;
1695 			}
1696 		}
1697 		if (new_pi != NULL) {
1698 			new_pi->pi_key = uq->uq_key;
1699 			umtx_pi_insert(new_pi);
1700 			pi = new_pi;
1701 		}
1702 	}
1703 	umtx_pi_ref(pi);
1704 	umtxq_unlock(&uq->uq_key);
1705 
1706 	/*
1707 	 * Care must be exercised when dealing with umtx structure.  It
1708 	 * can fault on any access.
1709 	 */
1710 	for (;;) {
1711 		/*
1712 		 * Try the uncontested case.  This should be done in userland.
1713 		 */
1714 		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1715 
1716 		/* The acquire succeeded. */
1717 		if (owner == UMUTEX_UNOWNED) {
1718 			error = 0;
1719 			break;
1720 		}
1721 
1722 		/* The address was invalid. */
1723 		if (owner == -1) {
1724 			error = EFAULT;
1725 			break;
1726 		}
1727 
1728 		/* If no one owns it but it is contested try to acquire it. */
1729 		if (owner == UMUTEX_CONTESTED) {
1730 			owner = casuword32(&m->m_owner,
1731 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1732 
1733 			if (owner == UMUTEX_CONTESTED) {
1734 				umtxq_lock(&uq->uq_key);
1735 				error = umtx_pi_claim(pi, td);
1736 				umtxq_unlock(&uq->uq_key);
1737 				break;
1738 			}
1739 
1740 			/* The address was invalid. */
1741 			if (owner == -1) {
1742 				error = EFAULT;
1743 				break;
1744 			}
1745 
1746 			/* If this failed the lock has changed, restart. */
1747 			continue;
1748 		}
1749 
1750 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1751 		    (owner & ~UMUTEX_CONTESTED) == id) {
1752 			error = EDEADLK;
1753 			break;
1754 		}
1755 
1756 		if (try != 0) {
1757 			error = EBUSY;
1758 			break;
1759 		}
1760 
1761 		/*
1762 		 * If we caught a signal, we have retried and now
1763 		 * exit immediately.
1764 		 */
1765 		if (error != 0)
1766 			break;
1767 
1768 		umtxq_lock(&uq->uq_key);
1769 		umtxq_busy(&uq->uq_key);
1770 		umtxq_unlock(&uq->uq_key);
1771 
1772 		/*
1773 		 * Set the contested bit so that a release in user space
1774 		 * knows to use the system call for unlock.  If this fails
1775 		 * either some one else has acquired the lock or it has been
1776 		 * released.
1777 		 */
1778 		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1779 
1780 		/* The address was invalid. */
1781 		if (old == -1) {
1782 			umtxq_lock(&uq->uq_key);
1783 			umtxq_unbusy(&uq->uq_key);
1784 			umtxq_unlock(&uq->uq_key);
1785 			error = EFAULT;
1786 			break;
1787 		}
1788 
1789 		umtxq_lock(&uq->uq_key);
1790 		umtxq_unbusy(&uq->uq_key);
1791 		/*
1792 		 * We set the contested bit, sleep. Otherwise the lock changed
1793 		 * and we need to retry or we lost a race to the thread
1794 		 * unlocking the umtx.
1795 		 */
1796 		if (old == owner)
1797 			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1798 				 "umtxpi", timo);
1799 		umtxq_unlock(&uq->uq_key);
1800 	}
1801 
1802 	umtxq_lock(&uq->uq_key);
1803 	umtx_pi_unref(pi);
1804 	umtxq_unlock(&uq->uq_key);
1805 
1806 	umtx_key_release(&uq->uq_key);
1807 	return (error);
1808 }
1809 
1810 /*
1811  * Unlock a PI mutex.
1812  */
1813 static int
1814 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1815 {
1816 	struct umtx_key key;
1817 	struct umtx_q *uq_first, *uq_first2, *uq_me;
1818 	struct umtx_pi *pi, *pi2;
1819 	uint32_t owner, old, id;
1820 	int error;
1821 	int count;
1822 	int pri;
1823 
1824 	id = td->td_tid;
1825 	/*
1826 	 * Make sure we own this mtx.
1827 	 */
1828 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1829 	if (owner == -1)
1830 		return (EFAULT);
1831 
1832 	if ((owner & ~UMUTEX_CONTESTED) != id)
1833 		return (EPERM);
1834 
1835 	/* This should be done in userland */
1836 	if ((owner & UMUTEX_CONTESTED) == 0) {
1837 		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1838 		if (old == -1)
1839 			return (EFAULT);
1840 		if (old == owner)
1841 			return (0);
1842 		owner = old;
1843 	}
1844 
1845 	/* We should only ever be in here for contested locks */
1846 	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1847 	    &key)) != 0)
1848 		return (error);
1849 
1850 	umtxq_lock(&key);
1851 	umtxq_busy(&key);
1852 	count = umtxq_count_pi(&key, &uq_first);
1853 	if (uq_first != NULL) {
1854 		pi = uq_first->uq_pi_blocked;
1855 		if (pi->pi_owner != curthread) {
1856 			umtxq_unbusy(&key);
1857 			umtxq_unlock(&key);
1858 			/* userland messed the mutex */
1859 			return (EPERM);
1860 		}
1861 		uq_me = curthread->td_umtxq;
1862 		mtx_lock_spin(&umtx_lock);
1863 		pi->pi_owner = NULL;
1864 		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1865 		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1866 		pri = PRI_MAX;
1867 		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1868 			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1869 			if (uq_first2 != NULL) {
1870 				if (pri > UPRI(uq_first2->uq_thread))
1871 					pri = UPRI(uq_first2->uq_thread);
1872 			}
1873 		}
1874 		thread_lock(curthread);
1875 		sched_unlend_user_prio(curthread, pri);
1876 		thread_unlock(curthread);
1877 		mtx_unlock_spin(&umtx_lock);
1878 	}
1879 	umtxq_unlock(&key);
1880 
1881 	/*
1882 	 * When unlocking the umtx, it must be marked as unowned if
1883 	 * there is zero or one thread only waiting for it.
1884 	 * Otherwise, it must be marked as contested.
1885 	 */
1886 	old = casuword32(&m->m_owner, owner,
1887 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1888 
1889 	umtxq_lock(&key);
1890 	if (uq_first != NULL)
1891 		umtxq_signal_thread(uq_first);
1892 	umtxq_unbusy(&key);
1893 	umtxq_unlock(&key);
1894 	umtx_key_release(&key);
1895 	if (old == -1)
1896 		return (EFAULT);
1897 	if (old != owner)
1898 		return (EINVAL);
1899 	return (0);
1900 }
1901 
1902 /*
1903  * Lock a PP mutex.
1904  */
1905 static int
1906 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1907 	int try)
1908 {
1909 	struct umtx_q *uq, *uq2;
1910 	struct umtx_pi *pi;
1911 	uint32_t ceiling;
1912 	uint32_t owner, id;
1913 	int error, pri, old_inherited_pri, su;
1914 
1915 	id = td->td_tid;
1916 	uq = td->td_umtxq;
1917 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1918 	    &uq->uq_key)) != 0)
1919 		return (error);
1920 	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1921 	for (;;) {
1922 		old_inherited_pri = uq->uq_inherited_pri;
1923 		umtxq_lock(&uq->uq_key);
1924 		umtxq_busy(&uq->uq_key);
1925 		umtxq_unlock(&uq->uq_key);
1926 
1927 		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1928 		if (ceiling > RTP_PRIO_MAX) {
1929 			error = EINVAL;
1930 			goto out;
1931 		}
1932 
1933 		mtx_lock_spin(&umtx_lock);
1934 		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1935 			mtx_unlock_spin(&umtx_lock);
1936 			error = EINVAL;
1937 			goto out;
1938 		}
1939 		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1940 			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1941 			thread_lock(td);
1942 			if (uq->uq_inherited_pri < UPRI(td))
1943 				sched_lend_user_prio(td, uq->uq_inherited_pri);
1944 			thread_unlock(td);
1945 		}
1946 		mtx_unlock_spin(&umtx_lock);
1947 
1948 		owner = casuword32(&m->m_owner,
1949 		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1950 
1951 		if (owner == UMUTEX_CONTESTED) {
1952 			error = 0;
1953 			break;
1954 		}
1955 
1956 		/* The address was invalid. */
1957 		if (owner == -1) {
1958 			error = EFAULT;
1959 			break;
1960 		}
1961 
1962 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1963 		    (owner & ~UMUTEX_CONTESTED) == id) {
1964 			error = EDEADLK;
1965 			break;
1966 		}
1967 
1968 		if (try != 0) {
1969 			error = EBUSY;
1970 			break;
1971 		}
1972 
1973 		/*
1974 		 * If we caught a signal, we have retried and now
1975 		 * exit immediately.
1976 		 */
1977 		if (error != 0)
1978 			break;
1979 
1980 		umtxq_lock(&uq->uq_key);
1981 		umtxq_insert(uq);
1982 		umtxq_unbusy(&uq->uq_key);
1983 		error = umtxq_sleep(uq, "umtxpp", timo);
1984 		umtxq_remove(uq);
1985 		umtxq_unlock(&uq->uq_key);
1986 
1987 		mtx_lock_spin(&umtx_lock);
1988 		uq->uq_inherited_pri = old_inherited_pri;
1989 		pri = PRI_MAX;
1990 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1991 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1992 			if (uq2 != NULL) {
1993 				if (pri > UPRI(uq2->uq_thread))
1994 					pri = UPRI(uq2->uq_thread);
1995 			}
1996 		}
1997 		if (pri > uq->uq_inherited_pri)
1998 			pri = uq->uq_inherited_pri;
1999 		thread_lock(td);
2000 		sched_unlend_user_prio(td, pri);
2001 		thread_unlock(td);
2002 		mtx_unlock_spin(&umtx_lock);
2003 	}
2004 
2005 	if (error != 0) {
2006 		mtx_lock_spin(&umtx_lock);
2007 		uq->uq_inherited_pri = old_inherited_pri;
2008 		pri = PRI_MAX;
2009 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2010 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2011 			if (uq2 != NULL) {
2012 				if (pri > UPRI(uq2->uq_thread))
2013 					pri = UPRI(uq2->uq_thread);
2014 			}
2015 		}
2016 		if (pri > uq->uq_inherited_pri)
2017 			pri = uq->uq_inherited_pri;
2018 		thread_lock(td);
2019 		sched_unlend_user_prio(td, pri);
2020 		thread_unlock(td);
2021 		mtx_unlock_spin(&umtx_lock);
2022 	}
2023 
2024 out:
2025 	umtxq_lock(&uq->uq_key);
2026 	umtxq_unbusy(&uq->uq_key);
2027 	umtxq_unlock(&uq->uq_key);
2028 	umtx_key_release(&uq->uq_key);
2029 	return (error);
2030 }
2031 
2032 /*
2033  * Unlock a PP mutex.
2034  */
2035 static int
2036 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2037 {
2038 	struct umtx_key key;
2039 	struct umtx_q *uq, *uq2;
2040 	struct umtx_pi *pi;
2041 	uint32_t owner, id;
2042 	uint32_t rceiling;
2043 	int error, pri, new_inherited_pri, su;
2044 
2045 	id = td->td_tid;
2046 	uq = td->td_umtxq;
2047 	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2048 
2049 	/*
2050 	 * Make sure we own this mtx.
2051 	 */
2052 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2053 	if (owner == -1)
2054 		return (EFAULT);
2055 
2056 	if ((owner & ~UMUTEX_CONTESTED) != id)
2057 		return (EPERM);
2058 
2059 	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2060 	if (error != 0)
2061 		return (error);
2062 
2063 	if (rceiling == -1)
2064 		new_inherited_pri = PRI_MAX;
2065 	else {
2066 		rceiling = RTP_PRIO_MAX - rceiling;
2067 		if (rceiling > RTP_PRIO_MAX)
2068 			return (EINVAL);
2069 		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2070 	}
2071 
2072 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2073 	    &key)) != 0)
2074 		return (error);
2075 	umtxq_lock(&key);
2076 	umtxq_busy(&key);
2077 	umtxq_unlock(&key);
2078 	/*
2079 	 * For priority protected mutex, always set unlocked state
2080 	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2081 	 * to lock the mutex, it is necessary because thread priority
2082 	 * has to be adjusted for such mutex.
2083 	 */
2084 	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2085 		UMUTEX_CONTESTED);
2086 
2087 	umtxq_lock(&key);
2088 	if (error == 0)
2089 		umtxq_signal(&key, 1);
2090 	umtxq_unbusy(&key);
2091 	umtxq_unlock(&key);
2092 
2093 	if (error == -1)
2094 		error = EFAULT;
2095 	else {
2096 		mtx_lock_spin(&umtx_lock);
2097 		if (su != 0)
2098 			uq->uq_inherited_pri = new_inherited_pri;
2099 		pri = PRI_MAX;
2100 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2101 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2102 			if (uq2 != NULL) {
2103 				if (pri > UPRI(uq2->uq_thread))
2104 					pri = UPRI(uq2->uq_thread);
2105 			}
2106 		}
2107 		if (pri > uq->uq_inherited_pri)
2108 			pri = uq->uq_inherited_pri;
2109 		thread_lock(td);
2110 		sched_unlend_user_prio(td, pri);
2111 		thread_unlock(td);
2112 		mtx_unlock_spin(&umtx_lock);
2113 	}
2114 	umtx_key_release(&key);
2115 	return (error);
2116 }
2117 
2118 static int
2119 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2120 	uint32_t *old_ceiling)
2121 {
2122 	struct umtx_q *uq;
2123 	uint32_t save_ceiling;
2124 	uint32_t owner, id;
2125 	uint32_t flags;
2126 	int error;
2127 
2128 	flags = fuword32(&m->m_flags);
2129 	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2130 		return (EINVAL);
2131 	if (ceiling > RTP_PRIO_MAX)
2132 		return (EINVAL);
2133 	id = td->td_tid;
2134 	uq = td->td_umtxq;
2135 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2136 	   &uq->uq_key)) != 0)
2137 		return (error);
2138 	for (;;) {
2139 		umtxq_lock(&uq->uq_key);
2140 		umtxq_busy(&uq->uq_key);
2141 		umtxq_unlock(&uq->uq_key);
2142 
2143 		save_ceiling = fuword32(&m->m_ceilings[0]);
2144 
2145 		owner = casuword32(&m->m_owner,
2146 		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2147 
2148 		if (owner == UMUTEX_CONTESTED) {
2149 			suword32(&m->m_ceilings[0], ceiling);
2150 			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2151 				UMUTEX_CONTESTED);
2152 			error = 0;
2153 			break;
2154 		}
2155 
2156 		/* The address was invalid. */
2157 		if (owner == -1) {
2158 			error = EFAULT;
2159 			break;
2160 		}
2161 
2162 		if ((owner & ~UMUTEX_CONTESTED) == id) {
2163 			suword32(&m->m_ceilings[0], ceiling);
2164 			error = 0;
2165 			break;
2166 		}
2167 
2168 		/*
2169 		 * If we caught a signal, we have retried and now
2170 		 * exit immediately.
2171 		 */
2172 		if (error != 0)
2173 			break;
2174 
2175 		/*
2176 		 * We set the contested bit, sleep. Otherwise the lock changed
2177 		 * and we need to retry or we lost a race to the thread
2178 		 * unlocking the umtx.
2179 		 */
2180 		umtxq_lock(&uq->uq_key);
2181 		umtxq_insert(uq);
2182 		umtxq_unbusy(&uq->uq_key);
2183 		error = umtxq_sleep(uq, "umtxpp", 0);
2184 		umtxq_remove(uq);
2185 		umtxq_unlock(&uq->uq_key);
2186 	}
2187 	umtxq_lock(&uq->uq_key);
2188 	if (error == 0)
2189 		umtxq_signal(&uq->uq_key, INT_MAX);
2190 	umtxq_unbusy(&uq->uq_key);
2191 	umtxq_unlock(&uq->uq_key);
2192 	umtx_key_release(&uq->uq_key);
2193 	if (error == 0 && old_ceiling != NULL)
2194 		suword32(old_ceiling, save_ceiling);
2195 	return (error);
2196 }
2197 
2198 static int
2199 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2200 	int mode)
2201 {
2202 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2203 	case 0:
2204 		return (_do_lock_normal(td, m, flags, timo, mode));
2205 	case UMUTEX_PRIO_INHERIT:
2206 		return (_do_lock_pi(td, m, flags, timo, mode));
2207 	case UMUTEX_PRIO_PROTECT:
2208 		return (_do_lock_pp(td, m, flags, timo, mode));
2209 	}
2210 	return (EINVAL);
2211 }
2212 
2213 /*
2214  * Lock a userland POSIX mutex.
2215  */
2216 static int
2217 do_lock_umutex(struct thread *td, struct umutex *m,
2218 	struct timespec *timeout, int mode)
2219 {
2220 	struct timespec ts, ts2, ts3;
2221 	struct timeval tv;
2222 	uint32_t flags;
2223 	int error;
2224 
2225 	flags = fuword32(&m->m_flags);
2226 	if (flags == -1)
2227 		return (EFAULT);
2228 
2229 	if (timeout == NULL) {
2230 		error = _do_lock_umutex(td, m, flags, 0, mode);
2231 		/* Mutex locking is restarted if it is interrupted. */
2232 		if (error == EINTR && mode != _UMUTEX_WAIT)
2233 			error = ERESTART;
2234 	} else {
2235 		getnanouptime(&ts);
2236 		timespecadd(&ts, timeout);
2237 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2238 		for (;;) {
2239 			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2240 			if (error != ETIMEDOUT)
2241 				break;
2242 			getnanouptime(&ts2);
2243 			if (timespeccmp(&ts2, &ts, >=)) {
2244 				error = ETIMEDOUT;
2245 				break;
2246 			}
2247 			ts3 = ts;
2248 			timespecsub(&ts3, &ts2);
2249 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2250 		}
2251 		/* Timed-locking is not restarted. */
2252 		if (error == ERESTART)
2253 			error = EINTR;
2254 	}
2255 	return (error);
2256 }
2257 
2258 /*
2259  * Unlock a userland POSIX mutex.
2260  */
2261 static int
2262 do_unlock_umutex(struct thread *td, struct umutex *m)
2263 {
2264 	uint32_t flags;
2265 
2266 	flags = fuword32(&m->m_flags);
2267 	if (flags == -1)
2268 		return (EFAULT);
2269 
2270 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2271 	case 0:
2272 		return (do_unlock_normal(td, m, flags));
2273 	case UMUTEX_PRIO_INHERIT:
2274 		return (do_unlock_pi(td, m, flags));
2275 	case UMUTEX_PRIO_PROTECT:
2276 		return (do_unlock_pp(td, m, flags));
2277 	}
2278 
2279 	return (EINVAL);
2280 }
2281 
2282 static int
2283 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2284 	struct timespec *timeout, u_long wflags)
2285 {
2286 	struct umtx_q *uq;
2287 	struct timeval tv;
2288 	struct timespec cts, ets, tts;
2289 	uint32_t flags;
2290 	int error;
2291 
2292 	uq = td->td_umtxq;
2293 	flags = fuword32(&cv->c_flags);
2294 	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2295 	if (error != 0)
2296 		return (error);
2297 	umtxq_lock(&uq->uq_key);
2298 	umtxq_busy(&uq->uq_key);
2299 	umtxq_insert(uq);
2300 	umtxq_unlock(&uq->uq_key);
2301 
2302 	/*
2303 	 * The magic thing is we should set c_has_waiters to 1 before
2304 	 * releasing user mutex.
2305 	 */
2306 	suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2307 
2308 	umtxq_lock(&uq->uq_key);
2309 	umtxq_unbusy(&uq->uq_key);
2310 	umtxq_unlock(&uq->uq_key);
2311 
2312 	error = do_unlock_umutex(td, m);
2313 
2314 	umtxq_lock(&uq->uq_key);
2315 	if (error == 0) {
2316 		if ((wflags & UMTX_CHECK_UNPARKING) &&
2317 		    (td->td_pflags & TDP_WAKEUP)) {
2318 			td->td_pflags &= ~TDP_WAKEUP;
2319 			error = EINTR;
2320 		} else if (timeout == NULL) {
2321 			error = umtxq_sleep(uq, "ucond", 0);
2322 		} else {
2323 			getnanouptime(&ets);
2324 			timespecadd(&ets, timeout);
2325 			TIMESPEC_TO_TIMEVAL(&tv, timeout);
2326 			for (;;) {
2327 				error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2328 				if (error != ETIMEDOUT)
2329 					break;
2330 				getnanouptime(&cts);
2331 				if (timespeccmp(&cts, &ets, >=)) {
2332 					error = ETIMEDOUT;
2333 					break;
2334 				}
2335 				tts = ets;
2336 				timespecsub(&tts, &cts);
2337 				TIMESPEC_TO_TIMEVAL(&tv, &tts);
2338 			}
2339 		}
2340 	}
2341 
2342 	if (error != 0) {
2343 		if ((uq->uq_flags & UQF_UMTXQ) == 0) {
2344 			/*
2345 			 * If we concurrently got do_cv_signal()d
2346 			 * and we got an error or UNIX signals or a timeout,
2347 			 * then, perform another umtxq_signal to avoid
2348 			 * consuming the wakeup. This may cause supurious
2349 			 * wakeup for another thread which was just queued,
2350 			 * but SUSV3 explicitly allows supurious wakeup to
2351 			 * occur, and indeed a kernel based implementation
2352 			 * can not avoid it.
2353 			 */
2354 			if (!umtxq_signal(&uq->uq_key, 1))
2355 				error = 0;
2356 		}
2357 		if (error == ERESTART)
2358 			error = EINTR;
2359 	}
2360 	umtxq_remove(uq);
2361 	umtxq_unlock(&uq->uq_key);
2362 	umtx_key_release(&uq->uq_key);
2363 	return (error);
2364 }
2365 
2366 /*
2367  * Signal a userland condition variable.
2368  */
2369 static int
2370 do_cv_signal(struct thread *td, struct ucond *cv)
2371 {
2372 	struct umtx_key key;
2373 	int error, cnt, nwake;
2374 	uint32_t flags;
2375 
2376 	flags = fuword32(&cv->c_flags);
2377 	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2378 		return (error);
2379 	umtxq_lock(&key);
2380 	umtxq_busy(&key);
2381 	cnt = umtxq_count(&key);
2382 	nwake = umtxq_signal(&key, 1);
2383 	if (cnt <= nwake) {
2384 		umtxq_unlock(&key);
2385 		error = suword32(
2386 		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2387 		umtxq_lock(&key);
2388 	}
2389 	umtxq_unbusy(&key);
2390 	umtxq_unlock(&key);
2391 	umtx_key_release(&key);
2392 	return (error);
2393 }
2394 
2395 static int
2396 do_cv_broadcast(struct thread *td, struct ucond *cv)
2397 {
2398 	struct umtx_key key;
2399 	int error;
2400 	uint32_t flags;
2401 
2402 	flags = fuword32(&cv->c_flags);
2403 	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2404 		return (error);
2405 
2406 	umtxq_lock(&key);
2407 	umtxq_busy(&key);
2408 	umtxq_signal(&key, INT_MAX);
2409 	umtxq_unlock(&key);
2410 
2411 	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2412 
2413 	umtxq_lock(&key);
2414 	umtxq_unbusy(&key);
2415 	umtxq_unlock(&key);
2416 
2417 	umtx_key_release(&key);
2418 	return (error);
2419 }
2420 
2421 static int
2422 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2423 {
2424 	struct umtx_q *uq;
2425 	uint32_t flags, wrflags;
2426 	int32_t state, oldstate;
2427 	int32_t blocked_readers;
2428 	int error;
2429 
2430 	uq = td->td_umtxq;
2431 	flags = fuword32(&rwlock->rw_flags);
2432 	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2433 	if (error != 0)
2434 		return (error);
2435 
2436 	wrflags = URWLOCK_WRITE_OWNER;
2437 	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2438 		wrflags |= URWLOCK_WRITE_WAITERS;
2439 
2440 	for (;;) {
2441 		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2442 		/* try to lock it */
2443 		while (!(state & wrflags)) {
2444 			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2445 				umtx_key_release(&uq->uq_key);
2446 				return (EAGAIN);
2447 			}
2448 			oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2449 			if (oldstate == state) {
2450 				umtx_key_release(&uq->uq_key);
2451 				return (0);
2452 			}
2453 			state = oldstate;
2454 		}
2455 
2456 		if (error)
2457 			break;
2458 
2459 		/* grab monitor lock */
2460 		umtxq_lock(&uq->uq_key);
2461 		umtxq_busy(&uq->uq_key);
2462 		umtxq_unlock(&uq->uq_key);
2463 
2464 		/* set read contention bit */
2465 		while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2466 			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2467 			if (oldstate == state)
2468 				goto sleep;
2469 			state = oldstate;
2470 		}
2471 
2472 		/* state is changed while setting flags, restart */
2473 		if (!(state & wrflags)) {
2474 			umtxq_lock(&uq->uq_key);
2475 			umtxq_unbusy(&uq->uq_key);
2476 			umtxq_unlock(&uq->uq_key);
2477 			continue;
2478 		}
2479 
2480 sleep:
2481 		/* contention bit is set, before sleeping, increase read waiter count */
2482 		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2483 		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2484 
2485 		while (state & wrflags) {
2486 			umtxq_lock(&uq->uq_key);
2487 			umtxq_insert(uq);
2488 			umtxq_unbusy(&uq->uq_key);
2489 
2490 			error = umtxq_sleep(uq, "urdlck", timo);
2491 
2492 			umtxq_busy(&uq->uq_key);
2493 			umtxq_remove(uq);
2494 			umtxq_unlock(&uq->uq_key);
2495 			if (error)
2496 				break;
2497 			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2498 		}
2499 
2500 		/* decrease read waiter count, and may clear read contention bit */
2501 		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2502 		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2503 		if (blocked_readers == 1) {
2504 			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2505 			for (;;) {
2506 				oldstate = casuword32(&rwlock->rw_state, state,
2507 					 state & ~URWLOCK_READ_WAITERS);
2508 				if (oldstate == state)
2509 					break;
2510 				state = oldstate;
2511 			}
2512 		}
2513 
2514 		umtxq_lock(&uq->uq_key);
2515 		umtxq_unbusy(&uq->uq_key);
2516 		umtxq_unlock(&uq->uq_key);
2517 	}
2518 	umtx_key_release(&uq->uq_key);
2519 	return (error);
2520 }
2521 
2522 static int
2523 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2524 {
2525 	struct timespec ts, ts2, ts3;
2526 	struct timeval tv;
2527 	int error;
2528 
2529 	getnanouptime(&ts);
2530 	timespecadd(&ts, timeout);
2531 	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2532 	for (;;) {
2533 		error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2534 		if (error != ETIMEDOUT)
2535 			break;
2536 		getnanouptime(&ts2);
2537 		if (timespeccmp(&ts2, &ts, >=)) {
2538 			error = ETIMEDOUT;
2539 			break;
2540 		}
2541 		ts3 = ts;
2542 		timespecsub(&ts3, &ts2);
2543 		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2544 	}
2545 	if (error == ERESTART)
2546 		error = EINTR;
2547 	return (error);
2548 }
2549 
2550 static int
2551 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2552 {
2553 	struct umtx_q *uq;
2554 	uint32_t flags;
2555 	int32_t state, oldstate;
2556 	int32_t blocked_writers;
2557 	int error;
2558 
2559 	uq = td->td_umtxq;
2560 	flags = fuword32(&rwlock->rw_flags);
2561 	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2562 	if (error != 0)
2563 		return (error);
2564 
2565 	for (;;) {
2566 		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2567 		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2568 			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2569 			if (oldstate == state) {
2570 				umtx_key_release(&uq->uq_key);
2571 				return (0);
2572 			}
2573 			state = oldstate;
2574 		}
2575 
2576 		if (error)
2577 			break;
2578 
2579 		/* grab monitor lock */
2580 		umtxq_lock(&uq->uq_key);
2581 		umtxq_busy(&uq->uq_key);
2582 		umtxq_unlock(&uq->uq_key);
2583 
2584 		while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2585 		       (state & URWLOCK_WRITE_WAITERS) == 0) {
2586 			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2587 			if (oldstate == state)
2588 				goto sleep;
2589 			state = oldstate;
2590 		}
2591 
2592 		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2593 			umtxq_lock(&uq->uq_key);
2594 			umtxq_unbusy(&uq->uq_key);
2595 			umtxq_unlock(&uq->uq_key);
2596 			continue;
2597 		}
2598 sleep:
2599 		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2600 		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2601 
2602 		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2603 			umtxq_lock(&uq->uq_key);
2604 			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2605 			umtxq_unbusy(&uq->uq_key);
2606 
2607 			error = umtxq_sleep(uq, "uwrlck", timo);
2608 
2609 			umtxq_busy(&uq->uq_key);
2610 			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2611 			umtxq_unlock(&uq->uq_key);
2612 			if (error)
2613 				break;
2614 			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2615 		}
2616 
2617 		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2618 		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2619 		if (blocked_writers == 1) {
2620 			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2621 			for (;;) {
2622 				oldstate = casuword32(&rwlock->rw_state, state,
2623 					 state & ~URWLOCK_WRITE_WAITERS);
2624 				if (oldstate == state)
2625 					break;
2626 				state = oldstate;
2627 			}
2628 		}
2629 
2630 		umtxq_lock(&uq->uq_key);
2631 		umtxq_unbusy(&uq->uq_key);
2632 		umtxq_unlock(&uq->uq_key);
2633 	}
2634 
2635 	umtx_key_release(&uq->uq_key);
2636 	return (error);
2637 }
2638 
2639 static int
2640 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2641 {
2642 	struct timespec ts, ts2, ts3;
2643 	struct timeval tv;
2644 	int error;
2645 
2646 	getnanouptime(&ts);
2647 	timespecadd(&ts, timeout);
2648 	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2649 	for (;;) {
2650 		error = do_rw_wrlock(td, obj, tvtohz(&tv));
2651 		if (error != ETIMEDOUT)
2652 			break;
2653 		getnanouptime(&ts2);
2654 		if (timespeccmp(&ts2, &ts, >=)) {
2655 			error = ETIMEDOUT;
2656 			break;
2657 		}
2658 		ts3 = ts;
2659 		timespecsub(&ts3, &ts2);
2660 		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2661 	}
2662 	if (error == ERESTART)
2663 		error = EINTR;
2664 	return (error);
2665 }
2666 
2667 static int
2668 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2669 {
2670 	struct umtx_q *uq;
2671 	uint32_t flags;
2672 	int32_t state, oldstate;
2673 	int error, q, count;
2674 
2675 	uq = td->td_umtxq;
2676 	flags = fuword32(&rwlock->rw_flags);
2677 	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2678 	if (error != 0)
2679 		return (error);
2680 
2681 	state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2682 	if (state & URWLOCK_WRITE_OWNER) {
2683 		for (;;) {
2684 			oldstate = casuword32(&rwlock->rw_state, state,
2685 				state & ~URWLOCK_WRITE_OWNER);
2686 			if (oldstate != state) {
2687 				state = oldstate;
2688 				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2689 					error = EPERM;
2690 					goto out;
2691 				}
2692 			} else
2693 				break;
2694 		}
2695 	} else if (URWLOCK_READER_COUNT(state) != 0) {
2696 		for (;;) {
2697 			oldstate = casuword32(&rwlock->rw_state, state,
2698 				state - 1);
2699 			if (oldstate != state) {
2700 				state = oldstate;
2701 				if (URWLOCK_READER_COUNT(oldstate) == 0) {
2702 					error = EPERM;
2703 					goto out;
2704 				}
2705 			}
2706 			else
2707 				break;
2708 		}
2709 	} else {
2710 		error = EPERM;
2711 		goto out;
2712 	}
2713 
2714 	count = 0;
2715 
2716 	if (!(flags & URWLOCK_PREFER_READER)) {
2717 		if (state & URWLOCK_WRITE_WAITERS) {
2718 			count = 1;
2719 			q = UMTX_EXCLUSIVE_QUEUE;
2720 		} else if (state & URWLOCK_READ_WAITERS) {
2721 			count = INT_MAX;
2722 			q = UMTX_SHARED_QUEUE;
2723 		}
2724 	} else {
2725 		if (state & URWLOCK_READ_WAITERS) {
2726 			count = INT_MAX;
2727 			q = UMTX_SHARED_QUEUE;
2728 		} else if (state & URWLOCK_WRITE_WAITERS) {
2729 			count = 1;
2730 			q = UMTX_EXCLUSIVE_QUEUE;
2731 		}
2732 	}
2733 
2734 	if (count) {
2735 		umtxq_lock(&uq->uq_key);
2736 		umtxq_busy(&uq->uq_key);
2737 		umtxq_signal_queue(&uq->uq_key, count, q);
2738 		umtxq_unbusy(&uq->uq_key);
2739 		umtxq_unlock(&uq->uq_key);
2740 	}
2741 out:
2742 	umtx_key_release(&uq->uq_key);
2743 	return (error);
2744 }
2745 
2746 int
2747 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2748     /* struct umtx *umtx */
2749 {
2750 	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2751 }
2752 
2753 int
2754 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2755     /* struct umtx *umtx */
2756 {
2757 	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2758 }
2759 
2760 static int
2761 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2762 {
2763 	struct timespec *ts, timeout;
2764 	int error;
2765 
2766 	/* Allow a null timespec (wait forever). */
2767 	if (uap->uaddr2 == NULL)
2768 		ts = NULL;
2769 	else {
2770 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2771 		if (error != 0)
2772 			return (error);
2773 		if (timeout.tv_nsec >= 1000000000 ||
2774 		    timeout.tv_nsec < 0) {
2775 			return (EINVAL);
2776 		}
2777 		ts = &timeout;
2778 	}
2779 	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2780 }
2781 
2782 static int
2783 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2784 {
2785 	return (do_unlock_umtx(td, uap->obj, uap->val));
2786 }
2787 
2788 static int
2789 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2790 {
2791 	struct timespec *ts, timeout;
2792 	int error;
2793 
2794 	if (uap->uaddr2 == NULL)
2795 		ts = NULL;
2796 	else {
2797 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2798 		if (error != 0)
2799 			return (error);
2800 		if (timeout.tv_nsec >= 1000000000 ||
2801 		    timeout.tv_nsec < 0)
2802 			return (EINVAL);
2803 		ts = &timeout;
2804 	}
2805 	return do_wait(td, uap->obj, uap->val, ts, 0, 0);
2806 }
2807 
2808 static int
2809 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2810 {
2811 	struct timespec *ts, timeout;
2812 	int error;
2813 
2814 	if (uap->uaddr2 == NULL)
2815 		ts = NULL;
2816 	else {
2817 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2818 		if (error != 0)
2819 			return (error);
2820 		if (timeout.tv_nsec >= 1000000000 ||
2821 		    timeout.tv_nsec < 0)
2822 			return (EINVAL);
2823 		ts = &timeout;
2824 	}
2825 	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
2826 }
2827 
2828 static int
2829 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
2830 {
2831 	struct timespec *ts, timeout;
2832 	int error;
2833 
2834 	if (uap->uaddr2 == NULL)
2835 		ts = NULL;
2836 	else {
2837 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2838 		if (error != 0)
2839 			return (error);
2840 		if (timeout.tv_nsec >= 1000000000 ||
2841 		    timeout.tv_nsec < 0)
2842 			return (EINVAL);
2843 		ts = &timeout;
2844 	}
2845 	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
2846 }
2847 
2848 static int
2849 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2850 {
2851 	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
2852 }
2853 
2854 static int
2855 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
2856 {
2857 	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
2858 }
2859 
2860 static int
2861 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2862 {
2863 	struct timespec *ts, timeout;
2864 	int error;
2865 
2866 	/* Allow a null timespec (wait forever). */
2867 	if (uap->uaddr2 == NULL)
2868 		ts = NULL;
2869 	else {
2870 		error = copyin(uap->uaddr2, &timeout,
2871 		    sizeof(timeout));
2872 		if (error != 0)
2873 			return (error);
2874 		if (timeout.tv_nsec >= 1000000000 ||
2875 		    timeout.tv_nsec < 0) {
2876 			return (EINVAL);
2877 		}
2878 		ts = &timeout;
2879 	}
2880 	return do_lock_umutex(td, uap->obj, ts, 0);
2881 }
2882 
2883 static int
2884 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
2885 {
2886 	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
2887 }
2888 
2889 static int
2890 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
2891 {
2892 	struct timespec *ts, timeout;
2893 	int error;
2894 
2895 	/* Allow a null timespec (wait forever). */
2896 	if (uap->uaddr2 == NULL)
2897 		ts = NULL;
2898 	else {
2899 		error = copyin(uap->uaddr2, &timeout,
2900 		    sizeof(timeout));
2901 		if (error != 0)
2902 			return (error);
2903 		if (timeout.tv_nsec >= 1000000000 ||
2904 		    timeout.tv_nsec < 0) {
2905 			return (EINVAL);
2906 		}
2907 		ts = &timeout;
2908 	}
2909 	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
2910 }
2911 
2912 static int
2913 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
2914 {
2915 	return do_wake_umutex(td, uap->obj);
2916 }
2917 
2918 static int
2919 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
2920 {
2921 	return do_unlock_umutex(td, uap->obj);
2922 }
2923 
2924 static int
2925 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
2926 {
2927 	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2928 }
2929 
2930 static int
2931 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
2932 {
2933 	struct timespec *ts, timeout;
2934 	int error;
2935 
2936 	/* Allow a null timespec (wait forever). */
2937 	if (uap->uaddr2 == NULL)
2938 		ts = NULL;
2939 	else {
2940 		error = copyin(uap->uaddr2, &timeout,
2941 		    sizeof(timeout));
2942 		if (error != 0)
2943 			return (error);
2944 		if (timeout.tv_nsec >= 1000000000 ||
2945 		    timeout.tv_nsec < 0) {
2946 			return (EINVAL);
2947 		}
2948 		ts = &timeout;
2949 	}
2950 	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2951 }
2952 
2953 static int
2954 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
2955 {
2956 	return do_cv_signal(td, uap->obj);
2957 }
2958 
2959 static int
2960 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
2961 {
2962 	return do_cv_broadcast(td, uap->obj);
2963 }
2964 
2965 static int
2966 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
2967 {
2968 	struct timespec timeout;
2969 	int error;
2970 
2971 	/* Allow a null timespec (wait forever). */
2972 	if (uap->uaddr2 == NULL) {
2973 		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
2974 	} else {
2975 		error = copyin(uap->uaddr2, &timeout,
2976 		    sizeof(timeout));
2977 		if (error != 0)
2978 			return (error);
2979 		if (timeout.tv_nsec >= 1000000000 ||
2980 		    timeout.tv_nsec < 0) {
2981 			return (EINVAL);
2982 		}
2983 		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
2984 	}
2985 	return (error);
2986 }
2987 
2988 static int
2989 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
2990 {
2991 	struct timespec timeout;
2992 	int error;
2993 
2994 	/* Allow a null timespec (wait forever). */
2995 	if (uap->uaddr2 == NULL) {
2996 		error = do_rw_wrlock(td, uap->obj, 0);
2997 	} else {
2998 		error = copyin(uap->uaddr2, &timeout,
2999 		    sizeof(timeout));
3000 		if (error != 0)
3001 			return (error);
3002 		if (timeout.tv_nsec >= 1000000000 ||
3003 		    timeout.tv_nsec < 0) {
3004 			return (EINVAL);
3005 		}
3006 
3007 		error = do_rw_wrlock2(td, uap->obj, &timeout);
3008 	}
3009 	return (error);
3010 }
3011 
3012 static int
3013 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3014 {
3015 	return do_rw_unlock(td, uap->obj);
3016 }
3017 
3018 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3019 
3020 static _umtx_op_func op_table[] = {
3021 	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
3022 	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
3023 	__umtx_op_wait,			/* UMTX_OP_WAIT */
3024 	__umtx_op_wake,			/* UMTX_OP_WAKE */
3025 	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
3026 	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
3027 	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
3028 	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3029 	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
3030 	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3031 	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3032 	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
3033 	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
3034 	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
3035 	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3036 	__umtx_op_wait_uint_private,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3037 	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3038 	__umtx_op_wait_umutex,		/* UMTX_OP_UMUTEX_WAIT */
3039 	__umtx_op_wake_umutex		/* UMTX_OP_UMUTEX_WAKE */
3040 };
3041 
3042 int
3043 _umtx_op(struct thread *td, struct _umtx_op_args *uap)
3044 {
3045 	if ((unsigned)uap->op < UMTX_OP_MAX)
3046 		return (*op_table[uap->op])(td, uap);
3047 	return (EINVAL);
3048 }
3049 
3050 #ifdef COMPAT_IA32
3051 int
3052 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3053     /* struct umtx *umtx */
3054 {
3055 	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3056 }
3057 
3058 int
3059 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3060     /* struct umtx *umtx */
3061 {
3062 	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3063 }
3064 
3065 struct timespec32 {
3066 	u_int32_t tv_sec;
3067 	u_int32_t tv_nsec;
3068 };
3069 
3070 static inline int
3071 copyin_timeout32(void *addr, struct timespec *tsp)
3072 {
3073 	struct timespec32 ts32;
3074 	int error;
3075 
3076 	error = copyin(addr, &ts32, sizeof(struct timespec32));
3077 	if (error == 0) {
3078 		tsp->tv_sec = ts32.tv_sec;
3079 		tsp->tv_nsec = ts32.tv_nsec;
3080 	}
3081 	return (error);
3082 }
3083 
3084 static int
3085 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3086 {
3087 	struct timespec *ts, timeout;
3088 	int error;
3089 
3090 	/* Allow a null timespec (wait forever). */
3091 	if (uap->uaddr2 == NULL)
3092 		ts = NULL;
3093 	else {
3094 		error = copyin_timeout32(uap->uaddr2, &timeout);
3095 		if (error != 0)
3096 			return (error);
3097 		if (timeout.tv_nsec >= 1000000000 ||
3098 		    timeout.tv_nsec < 0) {
3099 			return (EINVAL);
3100 		}
3101 		ts = &timeout;
3102 	}
3103 	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3104 }
3105 
3106 static int
3107 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3108 {
3109 	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3110 }
3111 
3112 static int
3113 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3114 {
3115 	struct timespec *ts, timeout;
3116 	int error;
3117 
3118 	if (uap->uaddr2 == NULL)
3119 		ts = NULL;
3120 	else {
3121 		error = copyin_timeout32(uap->uaddr2, &timeout);
3122 		if (error != 0)
3123 			return (error);
3124 		if (timeout.tv_nsec >= 1000000000 ||
3125 		    timeout.tv_nsec < 0)
3126 			return (EINVAL);
3127 		ts = &timeout;
3128 	}
3129 	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3130 }
3131 
3132 static int
3133 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3134 {
3135 	struct timespec *ts, timeout;
3136 	int error;
3137 
3138 	/* Allow a null timespec (wait forever). */
3139 	if (uap->uaddr2 == NULL)
3140 		ts = NULL;
3141 	else {
3142 		error = copyin_timeout32(uap->uaddr2, &timeout);
3143 		if (error != 0)
3144 			return (error);
3145 		if (timeout.tv_nsec >= 1000000000 ||
3146 		    timeout.tv_nsec < 0)
3147 			return (EINVAL);
3148 		ts = &timeout;
3149 	}
3150 	return do_lock_umutex(td, uap->obj, ts, 0);
3151 }
3152 
3153 static int
3154 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3155 {
3156 	struct timespec *ts, timeout;
3157 	int error;
3158 
3159 	/* Allow a null timespec (wait forever). */
3160 	if (uap->uaddr2 == NULL)
3161 		ts = NULL;
3162 	else {
3163 		error = copyin_timeout32(uap->uaddr2, &timeout);
3164 		if (error != 0)
3165 			return (error);
3166 		if (timeout.tv_nsec >= 1000000000 ||
3167 		    timeout.tv_nsec < 0)
3168 			return (EINVAL);
3169 		ts = &timeout;
3170 	}
3171 	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3172 }
3173 
3174 static int
3175 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3176 {
3177 	struct timespec *ts, timeout;
3178 	int error;
3179 
3180 	/* Allow a null timespec (wait forever). */
3181 	if (uap->uaddr2 == NULL)
3182 		ts = NULL;
3183 	else {
3184 		error = copyin_timeout32(uap->uaddr2, &timeout);
3185 		if (error != 0)
3186 			return (error);
3187 		if (timeout.tv_nsec >= 1000000000 ||
3188 		    timeout.tv_nsec < 0)
3189 			return (EINVAL);
3190 		ts = &timeout;
3191 	}
3192 	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3193 }
3194 
3195 static int
3196 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3197 {
3198 	struct timespec timeout;
3199 	int error;
3200 
3201 	/* Allow a null timespec (wait forever). */
3202 	if (uap->uaddr2 == NULL) {
3203 		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3204 	} else {
3205 		error = copyin(uap->uaddr2, &timeout,
3206 		    sizeof(timeout));
3207 		if (error != 0)
3208 			return (error);
3209 		if (timeout.tv_nsec >= 1000000000 ||
3210 		    timeout.tv_nsec < 0) {
3211 			return (EINVAL);
3212 		}
3213 		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3214 	}
3215 	return (error);
3216 }
3217 
3218 static int
3219 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3220 {
3221 	struct timespec timeout;
3222 	int error;
3223 
3224 	/* Allow a null timespec (wait forever). */
3225 	if (uap->uaddr2 == NULL) {
3226 		error = do_rw_wrlock(td, uap->obj, 0);
3227 	} else {
3228 		error = copyin_timeout32(uap->uaddr2, &timeout);
3229 		if (error != 0)
3230 			return (error);
3231 		if (timeout.tv_nsec >= 1000000000 ||
3232 		    timeout.tv_nsec < 0) {
3233 			return (EINVAL);
3234 		}
3235 
3236 		error = do_rw_wrlock2(td, uap->obj, &timeout);
3237 	}
3238 	return (error);
3239 }
3240 
3241 static int
3242 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3243 {
3244 	struct timespec *ts, timeout;
3245 	int error;
3246 
3247 	if (uap->uaddr2 == NULL)
3248 		ts = NULL;
3249 	else {
3250 		error = copyin_timeout32(uap->uaddr2, &timeout);
3251 		if (error != 0)
3252 			return (error);
3253 		if (timeout.tv_nsec >= 1000000000 ||
3254 		    timeout.tv_nsec < 0)
3255 			return (EINVAL);
3256 		ts = &timeout;
3257 	}
3258 	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3259 }
3260 
3261 static _umtx_op_func op_table_compat32[] = {
3262 	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
3263 	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
3264 	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
3265 	__umtx_op_wake,			/* UMTX_OP_WAKE */
3266 	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
3267 	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
3268 	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
3269 	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3270 	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
3271 	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3272 	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3273 	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
3274 	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
3275 	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
3276 	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3277 	__umtx_op_wait_uint_private_compat32,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3278 	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3279 	__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3280 	__umtx_op_wake_umutex		/* UMTX_OP_UMUTEX_WAKE */
3281 };
3282 
3283 int
3284 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3285 {
3286 	if ((unsigned)uap->op < UMTX_OP_MAX)
3287 		return (*op_table_compat32[uap->op])(td,
3288 			(struct _umtx_op_args *)uap);
3289 	return (EINVAL);
3290 }
3291 #endif
3292 
3293 void
3294 umtx_thread_init(struct thread *td)
3295 {
3296 	td->td_umtxq = umtxq_alloc();
3297 	td->td_umtxq->uq_thread = td;
3298 }
3299 
3300 void
3301 umtx_thread_fini(struct thread *td)
3302 {
3303 	umtxq_free(td->td_umtxq);
3304 }
3305 
3306 /*
3307  * It will be called when new thread is created, e.g fork().
3308  */
3309 void
3310 umtx_thread_alloc(struct thread *td)
3311 {
3312 	struct umtx_q *uq;
3313 
3314 	uq = td->td_umtxq;
3315 	uq->uq_inherited_pri = PRI_MAX;
3316 
3317 	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3318 	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3319 	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3320 	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3321 }
3322 
3323 /*
3324  * exec() hook.
3325  */
3326 static void
3327 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
3328 	struct image_params *imgp __unused)
3329 {
3330 	umtx_thread_cleanup(curthread);
3331 }
3332 
3333 /*
3334  * thread_exit() hook.
3335  */
3336 void
3337 umtx_thread_exit(struct thread *td)
3338 {
3339 	umtx_thread_cleanup(td);
3340 }
3341 
3342 /*
3343  * clean up umtx data.
3344  */
3345 static void
3346 umtx_thread_cleanup(struct thread *td)
3347 {
3348 	struct umtx_q *uq;
3349 	struct umtx_pi *pi;
3350 
3351 	if ((uq = td->td_umtxq) == NULL)
3352 		return;
3353 
3354 	mtx_lock_spin(&umtx_lock);
3355 	uq->uq_inherited_pri = PRI_MAX;
3356 	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3357 		pi->pi_owner = NULL;
3358 		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3359 	}
3360 	thread_lock(td);
3361 	td->td_flags &= ~TDF_UBORROWING;
3362 	thread_unlock(td);
3363 	mtx_unlock_spin(&umtx_lock);
3364 }
3365