xref: /freebsd/lib/libthr/thread/thr_mutex.c (revision 4bc52338)
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>.
5  * Copyright (c) 2006 David Xu <davidxu@freebsd.org>.
6  * Copyright (c) 2015, 2016 The FreeBSD Foundation
7  *
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Konstantin Belousov
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by John Birrell.
24  * 4. Neither the name of the author nor the names of any co-contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include "namespace.h"
45 #include <stdlib.h>
46 #include <errno.h>
47 #include <string.h>
48 #include <sys/param.h>
49 #include <sys/queue.h>
50 #include <pthread.h>
51 #include <pthread_np.h>
52 #include "un-namespace.h"
53 
54 #include "thr_private.h"
55 
56 _Static_assert(sizeof(struct pthread_mutex) <= PAGE_SIZE,
57     "pthread_mutex is too large for off-page");
58 
59 /*
60  * For adaptive mutexes, how many times to spin doing trylock2
61  * before entering the kernel to block
62  */
63 #define MUTEX_ADAPTIVE_SPINS	2000
64 
65 /*
66  * Prototypes
67  */
68 int	__pthread_mutex_consistent(pthread_mutex_t *mutex);
69 int	__pthread_mutex_init(pthread_mutex_t * __restrict mutex,
70 		const pthread_mutexattr_t * __restrict mutex_attr);
71 int	__pthread_mutex_trylock(pthread_mutex_t *mutex);
72 int	__pthread_mutex_lock(pthread_mutex_t *mutex);
73 int	__pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
74 		const struct timespec * __restrict abstime);
75 int	_pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count);
76 int	_pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
77 int	__pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
78 int	_pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
79 int	_pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count);
80 int	__pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
81 
82 static int	mutex_self_trylock(pthread_mutex_t);
83 static int	mutex_self_lock(pthread_mutex_t,
84 				const struct timespec *abstime);
85 static int	mutex_unlock_common(struct pthread_mutex *, bool, int *);
86 static int	mutex_lock_sleep(struct pthread *, pthread_mutex_t,
87 				const struct timespec *);
88 static void	mutex_init_robust(struct pthread *curthread);
89 static int	mutex_qidx(struct pthread_mutex *m);
90 static bool	is_robust_mutex(struct pthread_mutex *m);
91 static bool	is_pshared_mutex(struct pthread_mutex *m);
92 
93 __weak_reference(__pthread_mutex_init, pthread_mutex_init);
94 __strong_reference(__pthread_mutex_init, _pthread_mutex_init);
95 __weak_reference(__pthread_mutex_lock, pthread_mutex_lock);
96 __strong_reference(__pthread_mutex_lock, _pthread_mutex_lock);
97 __weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock);
98 __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock);
99 __weak_reference(__pthread_mutex_trylock, pthread_mutex_trylock);
100 __strong_reference(__pthread_mutex_trylock, _pthread_mutex_trylock);
101 __weak_reference(_pthread_mutex_consistent, pthread_mutex_consistent);
102 __strong_reference(_pthread_mutex_consistent, __pthread_mutex_consistent);
103 
104 /* Single underscore versions provided for libc internal usage: */
105 /* No difference between libc and application usage of these: */
106 __weak_reference(_pthread_mutex_destroy, pthread_mutex_destroy);
107 __weak_reference(_pthread_mutex_unlock, pthread_mutex_unlock);
108 
109 __weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling);
110 __weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling);
111 
112 __weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np);
113 __strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np);
114 __weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np);
115 
116 __weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np);
117 __strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np);
118 __weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np);
119 __weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np);
120 
121 static void
122 mutex_init_link(struct pthread_mutex *m)
123 {
124 
125 #if defined(_PTHREADS_INVARIANTS)
126 	m->m_qe.tqe_prev = NULL;
127 	m->m_qe.tqe_next = NULL;
128 	m->m_pqe.tqe_prev = NULL;
129 	m->m_pqe.tqe_next = NULL;
130 #endif
131 }
132 
133 static void
134 mutex_assert_is_owned(struct pthread_mutex *m __unused)
135 {
136 
137 #if defined(_PTHREADS_INVARIANTS)
138 	if (__predict_false(m->m_qe.tqe_prev == NULL))
139 		PANIC("mutex %p own %#x is not on list %p %p",
140 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
141 #endif
142 }
143 
144 static void
145 mutex_assert_not_owned(struct pthread *curthread __unused,
146     struct pthread_mutex *m __unused)
147 {
148 
149 #if defined(_PTHREADS_INVARIANTS)
150 	if (__predict_false(m->m_qe.tqe_prev != NULL ||
151 	    m->m_qe.tqe_next != NULL))
152 		PANIC("mutex %p own %#x is on list %p %p",
153 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
154 	if (__predict_false(is_robust_mutex(m) &&
155 	    (m->m_lock.m_rb_lnk != 0 || m->m_rb_prev != NULL ||
156 	    (is_pshared_mutex(m) && curthread->robust_list ==
157 	    (uintptr_t)&m->m_lock) ||
158 	    (!is_pshared_mutex(m) && curthread->priv_robust_list ==
159 	    (uintptr_t)&m->m_lock))))
160 		PANIC(
161     "mutex %p own %#x is on robust linkage %p %p head %p phead %p",
162 		    m, m->m_lock.m_owner, (void *)m->m_lock.m_rb_lnk,
163 		    m->m_rb_prev, (void *)curthread->robust_list,
164 		    (void *)curthread->priv_robust_list);
165 #endif
166 }
167 
168 static bool
169 is_pshared_mutex(struct pthread_mutex *m)
170 {
171 
172 	return ((m->m_lock.m_flags & USYNC_PROCESS_SHARED) != 0);
173 }
174 
175 static bool
176 is_robust_mutex(struct pthread_mutex *m)
177 {
178 
179 	return ((m->m_lock.m_flags & UMUTEX_ROBUST) != 0);
180 }
181 
182 int
183 _mutex_enter_robust(struct pthread *curthread, struct pthread_mutex *m)
184 {
185 
186 #if defined(_PTHREADS_INVARIANTS)
187 	if (__predict_false(curthread->inact_mtx != 0))
188 		PANIC("inact_mtx enter");
189 #endif
190 	if (!is_robust_mutex(m))
191 		return (0);
192 
193 	mutex_init_robust(curthread);
194 	curthread->inact_mtx = (uintptr_t)&m->m_lock;
195 	return (1);
196 }
197 
198 void
199 _mutex_leave_robust(struct pthread *curthread, struct pthread_mutex *m __unused)
200 {
201 
202 #if defined(_PTHREADS_INVARIANTS)
203 	if (__predict_false(curthread->inact_mtx != (uintptr_t)&m->m_lock))
204 		PANIC("inact_mtx leave");
205 #endif
206 	curthread->inact_mtx = 0;
207 }
208 
209 static int
210 mutex_check_attr(const struct pthread_mutex_attr *attr)
211 {
212 
213 	if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK ||
214 	    attr->m_type >= PTHREAD_MUTEX_TYPE_MAX)
215 		return (EINVAL);
216 	if (attr->m_protocol < PTHREAD_PRIO_NONE ||
217 	    attr->m_protocol > PTHREAD_PRIO_PROTECT)
218 		return (EINVAL);
219 	return (0);
220 }
221 
222 static void
223 mutex_init_robust(struct pthread *curthread)
224 {
225 	struct umtx_robust_lists_params rb;
226 
227 	if (curthread == NULL)
228 		curthread = _get_curthread();
229 	if (curthread->robust_inited)
230 		return;
231 	rb.robust_list_offset = (uintptr_t)&curthread->robust_list;
232 	rb.robust_priv_list_offset = (uintptr_t)&curthread->priv_robust_list;
233 	rb.robust_inact_offset = (uintptr_t)&curthread->inact_mtx;
234 	_umtx_op(NULL, UMTX_OP_ROBUST_LISTS, sizeof(rb), &rb, NULL);
235 	curthread->robust_inited = 1;
236 }
237 
238 static void
239 mutex_init_body(struct pthread_mutex *pmutex,
240     const struct pthread_mutex_attr *attr)
241 {
242 
243 	pmutex->m_flags = attr->m_type;
244 	pmutex->m_count = 0;
245 	pmutex->m_spinloops = 0;
246 	pmutex->m_yieldloops = 0;
247 	mutex_init_link(pmutex);
248 	switch (attr->m_protocol) {
249 	case PTHREAD_PRIO_NONE:
250 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
251 		pmutex->m_lock.m_flags = 0;
252 		break;
253 	case PTHREAD_PRIO_INHERIT:
254 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
255 		pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT;
256 		break;
257 	case PTHREAD_PRIO_PROTECT:
258 		pmutex->m_lock.m_owner = UMUTEX_CONTESTED;
259 		pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT;
260 		pmutex->m_lock.m_ceilings[0] = attr->m_ceiling;
261 		break;
262 	}
263 	if (attr->m_pshared == PTHREAD_PROCESS_SHARED)
264 		pmutex->m_lock.m_flags |= USYNC_PROCESS_SHARED;
265 	if (attr->m_robust == PTHREAD_MUTEX_ROBUST) {
266 		mutex_init_robust(NULL);
267 		pmutex->m_lock.m_flags |= UMUTEX_ROBUST;
268 	}
269 	if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
270 		pmutex->m_spinloops =
271 		    _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
272 		pmutex->m_yieldloops = _thr_yieldloops;
273 	}
274 }
275 
276 static int
277 mutex_init(pthread_mutex_t *mutex,
278     const struct pthread_mutex_attr *mutex_attr,
279     void *(calloc_cb)(size_t, size_t))
280 {
281 	const struct pthread_mutex_attr *attr;
282 	struct pthread_mutex *pmutex;
283 	int error;
284 
285 	if (mutex_attr == NULL) {
286 		attr = &_pthread_mutexattr_default;
287 	} else {
288 		attr = mutex_attr;
289 		error = mutex_check_attr(attr);
290 		if (error != 0)
291 			return (error);
292 	}
293 	if ((pmutex = (pthread_mutex_t)
294 		calloc_cb(1, sizeof(struct pthread_mutex))) == NULL)
295 		return (ENOMEM);
296 	mutex_init_body(pmutex, attr);
297 	*mutex = pmutex;
298 	return (0);
299 }
300 
301 static int
302 init_static(struct pthread *thread, pthread_mutex_t *mutex)
303 {
304 	int ret;
305 
306 	THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
307 
308 	if (*mutex == THR_MUTEX_INITIALIZER)
309 		ret = mutex_init(mutex, &_pthread_mutexattr_default,
310 		    __thr_calloc);
311 	else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
312 		ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default,
313 		    __thr_calloc);
314 	else
315 		ret = 0;
316 	THR_LOCK_RELEASE(thread, &_mutex_static_lock);
317 
318 	return (ret);
319 }
320 
321 static void
322 set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m)
323 {
324 	struct pthread_mutex *m2;
325 
326 	m2 = TAILQ_LAST(&curthread->mq[mutex_qidx(m)], mutex_queue);
327 	if (m2 != NULL)
328 		m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0];
329 	else
330 		m->m_lock.m_ceilings[1] = -1;
331 }
332 
333 static void
334 shared_mutex_init(struct pthread_mutex *pmtx, const struct
335     pthread_mutex_attr *mutex_attr)
336 {
337 	static const struct pthread_mutex_attr foobar_mutex_attr = {
338 		.m_type = PTHREAD_MUTEX_DEFAULT,
339 		.m_protocol = PTHREAD_PRIO_NONE,
340 		.m_ceiling = 0,
341 		.m_pshared = PTHREAD_PROCESS_SHARED,
342 		.m_robust = PTHREAD_MUTEX_STALLED,
343 	};
344 	bool done;
345 
346 	/*
347 	 * Hack to allow multiple pthread_mutex_init() calls on the
348 	 * same process-shared mutex.  We rely on kernel allocating
349 	 * zeroed offpage for the mutex, i.e. the
350 	 * PMUTEX_INITSTAGE_ALLOC value must be zero.
351 	 */
352 	for (done = false; !done;) {
353 		switch (pmtx->m_ps) {
354 		case PMUTEX_INITSTAGE_DONE:
355 			atomic_thread_fence_acq();
356 			done = true;
357 			break;
358 		case PMUTEX_INITSTAGE_ALLOC:
359 			if (atomic_cmpset_int(&pmtx->m_ps,
360 			    PMUTEX_INITSTAGE_ALLOC, PMUTEX_INITSTAGE_BUSY)) {
361 				if (mutex_attr == NULL)
362 					mutex_attr = &foobar_mutex_attr;
363 				mutex_init_body(pmtx, mutex_attr);
364 				atomic_store_rel_int(&pmtx->m_ps,
365 				    PMUTEX_INITSTAGE_DONE);
366 				done = true;
367 			}
368 			break;
369 		case PMUTEX_INITSTAGE_BUSY:
370 			_pthread_yield();
371 			break;
372 		default:
373 			PANIC("corrupted offpage");
374 			break;
375 		}
376 	}
377 }
378 
379 int
380 __pthread_mutex_init(pthread_mutex_t * __restrict mutex,
381     const pthread_mutexattr_t * __restrict mutex_attr)
382 {
383 	struct pthread_mutex *pmtx;
384 	int ret;
385 
386 	if (mutex_attr != NULL) {
387 		ret = mutex_check_attr(*mutex_attr);
388 		if (ret != 0)
389 			return (ret);
390 	}
391 	if (mutex_attr == NULL ||
392 	    (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) {
393 		__thr_malloc_init();
394 		return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL,
395 		    __thr_calloc));
396 	}
397 	pmtx = __thr_pshared_offpage(__DECONST(void *, mutex), 1);
398 	if (pmtx == NULL)
399 		return (EFAULT);
400 	*mutex = THR_PSHARED_PTR;
401 	shared_mutex_init(pmtx, *mutex_attr);
402 	return (0);
403 }
404 
405 /* This function is used internally by malloc. */
406 int
407 _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
408     void *(calloc_cb)(size_t, size_t))
409 {
410 	static const struct pthread_mutex_attr attr = {
411 		.m_type = PTHREAD_MUTEX_NORMAL,
412 		.m_protocol = PTHREAD_PRIO_NONE,
413 		.m_ceiling = 0,
414 		.m_pshared = PTHREAD_PROCESS_PRIVATE,
415 		.m_robust = PTHREAD_MUTEX_STALLED,
416 	};
417 	int ret;
418 
419 	ret = mutex_init(mutex, &attr, calloc_cb);
420 	if (ret == 0)
421 		(*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE;
422 	return (ret);
423 }
424 
425 /*
426  * Fix mutex ownership for child process.
427  *
428  * Process private mutex ownership is transmitted from the forking
429  * thread to the child process.
430  *
431  * Process shared mutex should not be inherited because owner is
432  * forking thread which is in parent process, they are removed from
433  * the owned mutex list.
434  */
435 static void
436 queue_fork(struct pthread *curthread, struct mutex_queue *q,
437     struct mutex_queue *qp, uint bit)
438 {
439 	struct pthread_mutex *m;
440 
441 	TAILQ_INIT(q);
442 	TAILQ_FOREACH(m, qp, m_pqe) {
443 		TAILQ_INSERT_TAIL(q, m, m_qe);
444 		m->m_lock.m_owner = TID(curthread) | bit;
445 	}
446 }
447 
448 void
449 _mutex_fork(struct pthread *curthread)
450 {
451 
452 	queue_fork(curthread, &curthread->mq[TMQ_NORM],
453 	    &curthread->mq[TMQ_NORM_PRIV], 0);
454 	queue_fork(curthread, &curthread->mq[TMQ_NORM_PP],
455 	    &curthread->mq[TMQ_NORM_PP_PRIV], UMUTEX_CONTESTED);
456 	queue_fork(curthread, &curthread->mq[TMQ_ROBUST_PP],
457 	    &curthread->mq[TMQ_ROBUST_PP_PRIV], UMUTEX_CONTESTED);
458 	curthread->robust_list = 0;
459 }
460 
461 int
462 _pthread_mutex_destroy(pthread_mutex_t *mutex)
463 {
464 	pthread_mutex_t m, m1;
465 	int ret;
466 
467 	m = *mutex;
468 	if (m < THR_MUTEX_DESTROYED) {
469 		ret = 0;
470 	} else if (m == THR_MUTEX_DESTROYED) {
471 		ret = EINVAL;
472 	} else {
473 		if (m == THR_PSHARED_PTR) {
474 			m1 = __thr_pshared_offpage(mutex, 0);
475 			if (m1 != NULL) {
476 				mutex_assert_not_owned(_get_curthread(), m1);
477 				__thr_pshared_destroy(mutex);
478 			}
479 			*mutex = THR_MUTEX_DESTROYED;
480 			return (0);
481 		}
482 		if (PMUTEX_OWNER_ID(m) != 0 &&
483 		    (uint32_t)m->m_lock.m_owner != UMUTEX_RB_NOTRECOV) {
484 			ret = EBUSY;
485 		} else {
486 			*mutex = THR_MUTEX_DESTROYED;
487 			mutex_assert_not_owned(_get_curthread(), m);
488 			__thr_free(m);
489 			ret = 0;
490 		}
491 	}
492 
493 	return (ret);
494 }
495 
496 static int
497 mutex_qidx(struct pthread_mutex *m)
498 {
499 
500 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
501 		return (TMQ_NORM);
502 	return (is_robust_mutex(m) ? TMQ_ROBUST_PP : TMQ_NORM_PP);
503 }
504 
505 /*
506  * Both enqueue_mutex() and dequeue_mutex() operate on the
507  * thread-private linkage of the locked mutexes and on the robust
508  * linkage.
509  *
510  * Robust list, as seen by kernel, must be consistent even in the case
511  * of thread termination at arbitrary moment.  Since either enqueue or
512  * dequeue for list walked by kernel consists of rewriting a single
513  * forward pointer, it is safe.  On the other hand, rewrite of the
514  * back pointer is not atomic WRT the forward one, but kernel does not
515  * care.
516  */
517 static void
518 enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m,
519     int error)
520 {
521 	struct pthread_mutex *m1;
522 	uintptr_t *rl;
523 	int qidx;
524 
525 	/* Add to the list of owned mutexes: */
526 	if (error != EOWNERDEAD)
527 		mutex_assert_not_owned(curthread, m);
528 	qidx = mutex_qidx(m);
529 	TAILQ_INSERT_TAIL(&curthread->mq[qidx], m, m_qe);
530 	if (!is_pshared_mutex(m))
531 		TAILQ_INSERT_TAIL(&curthread->mq[qidx + 1], m, m_pqe);
532 	if (is_robust_mutex(m)) {
533 		rl = is_pshared_mutex(m) ? &curthread->robust_list :
534 		    &curthread->priv_robust_list;
535 		m->m_rb_prev = NULL;
536 		if (*rl != 0) {
537 			m1 = __containerof((void *)*rl,
538 			    struct pthread_mutex, m_lock);
539 			m->m_lock.m_rb_lnk = (uintptr_t)&m1->m_lock;
540 			m1->m_rb_prev = m;
541 		} else {
542 			m1 = NULL;
543 			m->m_lock.m_rb_lnk = 0;
544 		}
545 		*rl = (uintptr_t)&m->m_lock;
546 	}
547 }
548 
549 static void
550 dequeue_mutex(struct pthread *curthread, struct pthread_mutex *m)
551 {
552 	struct pthread_mutex *mp, *mn;
553 	int qidx;
554 
555 	mutex_assert_is_owned(m);
556 	qidx = mutex_qidx(m);
557 	if (is_robust_mutex(m)) {
558 		mp = m->m_rb_prev;
559 		if (mp == NULL) {
560 			if (is_pshared_mutex(m)) {
561 				curthread->robust_list = m->m_lock.m_rb_lnk;
562 			} else {
563 				curthread->priv_robust_list =
564 				    m->m_lock.m_rb_lnk;
565 			}
566 		} else {
567 			mp->m_lock.m_rb_lnk = m->m_lock.m_rb_lnk;
568 		}
569 		if (m->m_lock.m_rb_lnk != 0) {
570 			mn = __containerof((void *)m->m_lock.m_rb_lnk,
571 			    struct pthread_mutex, m_lock);
572 			mn->m_rb_prev = m->m_rb_prev;
573 		}
574 		m->m_lock.m_rb_lnk = 0;
575 		m->m_rb_prev = NULL;
576 	}
577 	TAILQ_REMOVE(&curthread->mq[qidx], m, m_qe);
578 	if (!is_pshared_mutex(m))
579 		TAILQ_REMOVE(&curthread->mq[qidx + 1], m, m_pqe);
580 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) != 0)
581 		set_inherited_priority(curthread, m);
582 	mutex_init_link(m);
583 }
584 
585 static int
586 check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m)
587 {
588 	int ret;
589 
590 	*m = *mutex;
591 	ret = 0;
592 	if (*m == THR_PSHARED_PTR) {
593 		*m = __thr_pshared_offpage(mutex, 0);
594 		if (*m == NULL)
595 			ret = EINVAL;
596 		else
597 			shared_mutex_init(*m, NULL);
598 	} else if (__predict_false(*m <= THR_MUTEX_DESTROYED)) {
599 		if (*m == THR_MUTEX_DESTROYED) {
600 			ret = EINVAL;
601 		} else {
602 			ret = init_static(_get_curthread(), mutex);
603 			if (ret == 0)
604 				*m = *mutex;
605 		}
606 	}
607 	return (ret);
608 }
609 
610 int
611 __pthread_mutex_trylock(pthread_mutex_t *mutex)
612 {
613 	struct pthread *curthread;
614 	struct pthread_mutex *m;
615 	uint32_t id;
616 	int ret, robust;
617 
618 	ret = check_and_init_mutex(mutex, &m);
619 	if (ret != 0)
620 		return (ret);
621 	curthread = _get_curthread();
622 	id = TID(curthread);
623 	if (m->m_flags & PMUTEX_FLAG_PRIVATE)
624 		THR_CRITICAL_ENTER(curthread);
625 	robust = _mutex_enter_robust(curthread, m);
626 	ret = _thr_umutex_trylock(&m->m_lock, id);
627 	if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
628 		enqueue_mutex(curthread, m, ret);
629 		if (ret == EOWNERDEAD)
630 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
631 	} else if (PMUTEX_OWNER_ID(m) == id) {
632 		ret = mutex_self_trylock(m);
633 	} /* else {} */
634 	if (robust)
635 		_mutex_leave_robust(curthread, m);
636 	if (ret != 0 && ret != EOWNERDEAD &&
637 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0)
638 		THR_CRITICAL_LEAVE(curthread);
639 	return (ret);
640 }
641 
642 static int
643 mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
644     const struct timespec *abstime)
645 {
646 	uint32_t id, owner;
647 	int count, ret;
648 
649 	id = TID(curthread);
650 	if (PMUTEX_OWNER_ID(m) == id)
651 		return (mutex_self_lock(m, abstime));
652 
653 	/*
654 	 * For adaptive mutexes, spin for a bit in the expectation
655 	 * that if the application requests this mutex type then
656 	 * the lock is likely to be released quickly and it is
657 	 * faster than entering the kernel
658 	 */
659 	if (__predict_false((m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT |
660 	    UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) != 0))
661 		goto sleep_in_kernel;
662 
663 	if (!_thr_is_smp)
664 		goto yield_loop;
665 
666 	count = m->m_spinloops;
667 	while (count--) {
668 		owner = m->m_lock.m_owner;
669 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
670 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
671 			    id | owner)) {
672 				ret = 0;
673 				goto done;
674 			}
675 		}
676 		CPU_SPINWAIT;
677 	}
678 
679 yield_loop:
680 	count = m->m_yieldloops;
681 	while (count--) {
682 		_sched_yield();
683 		owner = m->m_lock.m_owner;
684 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
685 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
686 			    id | owner)) {
687 				ret = 0;
688 				goto done;
689 			}
690 		}
691 	}
692 
693 sleep_in_kernel:
694 	if (abstime == NULL)
695 		ret = __thr_umutex_lock(&m->m_lock, id);
696 	else if (__predict_false(abstime->tv_nsec < 0 ||
697 	    abstime->tv_nsec >= 1000000000))
698 		ret = EINVAL;
699 	else
700 		ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
701 done:
702 	if (ret == 0 || ret == EOWNERDEAD) {
703 		enqueue_mutex(curthread, m, ret);
704 		if (ret == EOWNERDEAD)
705 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
706 	}
707 	return (ret);
708 }
709 
710 static inline int
711 mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
712     bool cvattach, bool rb_onlist)
713 {
714 	struct pthread *curthread;
715 	int ret, robust;
716 
717 	robust = 0;  /* pacify gcc */
718 	curthread  = _get_curthread();
719 	if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
720 		THR_CRITICAL_ENTER(curthread);
721 	if (!rb_onlist)
722 		robust = _mutex_enter_robust(curthread, m);
723 	ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread));
724 	if (ret == 0 || ret == EOWNERDEAD) {
725 		enqueue_mutex(curthread, m, ret);
726 		if (ret == EOWNERDEAD)
727 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
728 	} else {
729 		ret = mutex_lock_sleep(curthread, m, abstime);
730 	}
731 	if (!rb_onlist && robust)
732 		_mutex_leave_robust(curthread, m);
733 	if (ret != 0 && ret != EOWNERDEAD &&
734 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0 && !cvattach)
735 		THR_CRITICAL_LEAVE(curthread);
736 	return (ret);
737 }
738 
739 int
740 __pthread_mutex_lock(pthread_mutex_t *mutex)
741 {
742 	struct pthread_mutex *m;
743 	int ret;
744 
745 	_thr_check_init();
746 	ret = check_and_init_mutex(mutex, &m);
747 	if (ret == 0)
748 		ret = mutex_lock_common(m, NULL, false, false);
749 	return (ret);
750 }
751 
752 int
753 __pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
754     const struct timespec * __restrict abstime)
755 {
756 	struct pthread_mutex *m;
757 	int ret;
758 
759 	_thr_check_init();
760 	ret = check_and_init_mutex(mutex, &m);
761 	if (ret == 0)
762 		ret = mutex_lock_common(m, abstime, false, false);
763 	return (ret);
764 }
765 
766 int
767 _pthread_mutex_unlock(pthread_mutex_t *mutex)
768 {
769 	struct pthread_mutex *mp;
770 
771 	if (*mutex == THR_PSHARED_PTR) {
772 		mp = __thr_pshared_offpage(mutex, 0);
773 		if (mp == NULL)
774 			return (EINVAL);
775 		shared_mutex_init(mp, NULL);
776 	} else {
777 		mp = *mutex;
778 	}
779 	return (mutex_unlock_common(mp, false, NULL));
780 }
781 
782 int
783 _mutex_cv_lock(struct pthread_mutex *m, int count, bool rb_onlist)
784 {
785 	int error;
786 
787 	error = mutex_lock_common(m, NULL, true, rb_onlist);
788 	if (error == 0 || error == EOWNERDEAD)
789 		m->m_count = count;
790 	return (error);
791 }
792 
793 int
794 _mutex_cv_unlock(struct pthread_mutex *m, int *count, int *defer)
795 {
796 
797 	/*
798 	 * Clear the count in case this is a recursive mutex.
799 	 */
800 	*count = m->m_count;
801 	m->m_count = 0;
802 	(void)mutex_unlock_common(m, true, defer);
803         return (0);
804 }
805 
806 int
807 _mutex_cv_attach(struct pthread_mutex *m, int count)
808 {
809 	struct pthread *curthread;
810 
811 	curthread = _get_curthread();
812 	enqueue_mutex(curthread, m, 0);
813 	m->m_count = count;
814 	return (0);
815 }
816 
817 int
818 _mutex_cv_detach(struct pthread_mutex *mp, int *recurse)
819 {
820 	struct pthread *curthread;
821 	int deferred, error;
822 
823 	curthread = _get_curthread();
824 	if ((error = _mutex_owned(curthread, mp)) != 0)
825 		return (error);
826 
827 	/*
828 	 * Clear the count in case this is a recursive mutex.
829 	 */
830 	*recurse = mp->m_count;
831 	mp->m_count = 0;
832 	dequeue_mutex(curthread, mp);
833 
834 	/* Will this happen in real-world ? */
835         if ((mp->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
836 		deferred = 1;
837 		mp->m_flags &= ~PMUTEX_FLAG_DEFERRED;
838 	} else
839 		deferred = 0;
840 
841 	if (deferred)  {
842 		_thr_wake_all(curthread->defer_waiters,
843 		    curthread->nwaiter_defer);
844 		curthread->nwaiter_defer = 0;
845 	}
846 	return (0);
847 }
848 
849 static int
850 mutex_self_trylock(struct pthread_mutex *m)
851 {
852 	int ret;
853 
854 	switch (PMUTEX_TYPE(m->m_flags)) {
855 	case PTHREAD_MUTEX_ERRORCHECK:
856 	case PTHREAD_MUTEX_NORMAL:
857 	case PTHREAD_MUTEX_ADAPTIVE_NP:
858 		ret = EBUSY;
859 		break;
860 
861 	case PTHREAD_MUTEX_RECURSIVE:
862 		/* Increment the lock count: */
863 		if (m->m_count + 1 > 0) {
864 			m->m_count++;
865 			ret = 0;
866 		} else
867 			ret = EAGAIN;
868 		break;
869 
870 	default:
871 		/* Trap invalid mutex types; */
872 		ret = EINVAL;
873 	}
874 
875 	return (ret);
876 }
877 
878 static int
879 mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
880 {
881 	struct timespec	ts1, ts2;
882 	int ret;
883 
884 	switch (PMUTEX_TYPE(m->m_flags)) {
885 	case PTHREAD_MUTEX_ERRORCHECK:
886 	case PTHREAD_MUTEX_ADAPTIVE_NP:
887 		if (abstime) {
888 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
889 			    abstime->tv_nsec >= 1000000000) {
890 				ret = EINVAL;
891 			} else {
892 				clock_gettime(CLOCK_REALTIME, &ts1);
893 				TIMESPEC_SUB(&ts2, abstime, &ts1);
894 				__sys_nanosleep(&ts2, NULL);
895 				ret = ETIMEDOUT;
896 			}
897 		} else {
898 			/*
899 			 * POSIX specifies that mutexes should return
900 			 * EDEADLK if a recursive lock is detected.
901 			 */
902 			ret = EDEADLK;
903 		}
904 		break;
905 
906 	case PTHREAD_MUTEX_NORMAL:
907 		/*
908 		 * What SS2 define as a 'normal' mutex.  Intentionally
909 		 * deadlock on attempts to get a lock you already own.
910 		 */
911 		ret = 0;
912 		if (abstime) {
913 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
914 			    abstime->tv_nsec >= 1000000000) {
915 				ret = EINVAL;
916 			} else {
917 				clock_gettime(CLOCK_REALTIME, &ts1);
918 				TIMESPEC_SUB(&ts2, abstime, &ts1);
919 				__sys_nanosleep(&ts2, NULL);
920 				ret = ETIMEDOUT;
921 			}
922 		} else {
923 			ts1.tv_sec = 30;
924 			ts1.tv_nsec = 0;
925 			for (;;)
926 				__sys_nanosleep(&ts1, NULL);
927 		}
928 		break;
929 
930 	case PTHREAD_MUTEX_RECURSIVE:
931 		/* Increment the lock count: */
932 		if (m->m_count + 1 > 0) {
933 			m->m_count++;
934 			ret = 0;
935 		} else
936 			ret = EAGAIN;
937 		break;
938 
939 	default:
940 		/* Trap invalid mutex types; */
941 		ret = EINVAL;
942 	}
943 
944 	return (ret);
945 }
946 
947 static int
948 mutex_unlock_common(struct pthread_mutex *m, bool cv, int *mtx_defer)
949 {
950 	struct pthread *curthread;
951 	uint32_t id;
952 	int deferred, error, private, robust;
953 
954 	if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
955 		if (m == THR_MUTEX_DESTROYED)
956 			return (EINVAL);
957 		return (EPERM);
958 	}
959 
960 	curthread = _get_curthread();
961 	id = TID(curthread);
962 
963 	/*
964 	 * Check if the running thread is not the owner of the mutex.
965 	 */
966 	if (__predict_false(PMUTEX_OWNER_ID(m) != id))
967 		return (EPERM);
968 
969 	error = 0;
970 	private = (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0;
971 	if (__predict_false(PMUTEX_TYPE(m->m_flags) ==
972 	    PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) {
973 		m->m_count--;
974 	} else {
975 		if ((m->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
976 			deferred = 1;
977 			m->m_flags &= ~PMUTEX_FLAG_DEFERRED;
978         	} else
979 			deferred = 0;
980 
981 		robust = _mutex_enter_robust(curthread, m);
982 		dequeue_mutex(curthread, m);
983 		error = _thr_umutex_unlock2(&m->m_lock, id, mtx_defer);
984 		if (deferred)  {
985 			if (mtx_defer == NULL) {
986 				_thr_wake_all(curthread->defer_waiters,
987 				    curthread->nwaiter_defer);
988 				curthread->nwaiter_defer = 0;
989 			} else
990 				*mtx_defer = 1;
991 		}
992 		if (robust)
993 			_mutex_leave_robust(curthread, m);
994 	}
995 	if (!cv && private)
996 		THR_CRITICAL_LEAVE(curthread);
997 	return (error);
998 }
999 
1000 int
1001 _pthread_mutex_getprioceiling(const pthread_mutex_t * __restrict mutex,
1002     int * __restrict prioceiling)
1003 {
1004 	struct pthread_mutex *m;
1005 
1006 	if (*mutex == THR_PSHARED_PTR) {
1007 		m = __thr_pshared_offpage(__DECONST(void *, mutex), 0);
1008 		if (m == NULL)
1009 			return (EINVAL);
1010 		shared_mutex_init(m, NULL);
1011 	} else {
1012 		m = *mutex;
1013 		if (m <= THR_MUTEX_DESTROYED)
1014 			return (EINVAL);
1015 	}
1016 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1017 		return (EINVAL);
1018 	*prioceiling = m->m_lock.m_ceilings[0];
1019 	return (0);
1020 }
1021 
1022 int
1023 _pthread_mutex_setprioceiling(pthread_mutex_t * __restrict mutex,
1024     int ceiling, int * __restrict old_ceiling)
1025 {
1026 	struct pthread *curthread;
1027 	struct pthread_mutex *m, *m1, *m2;
1028 	struct mutex_queue *q, *qp;
1029 	int qidx, ret;
1030 
1031 	if (*mutex == THR_PSHARED_PTR) {
1032 		m = __thr_pshared_offpage(mutex, 0);
1033 		if (m == NULL)
1034 			return (EINVAL);
1035 		shared_mutex_init(m, NULL);
1036 	} else {
1037 		m = *mutex;
1038 		if (m <= THR_MUTEX_DESTROYED)
1039 			return (EINVAL);
1040 	}
1041 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1042 		return (EINVAL);
1043 
1044 	ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling);
1045 	if (ret != 0)
1046 		return (ret);
1047 
1048 	curthread = _get_curthread();
1049 	if (PMUTEX_OWNER_ID(m) == TID(curthread)) {
1050 		mutex_assert_is_owned(m);
1051 		m1 = TAILQ_PREV(m, mutex_queue, m_qe);
1052 		m2 = TAILQ_NEXT(m, m_qe);
1053 		if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) ||
1054 		    (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) {
1055 			qidx = mutex_qidx(m);
1056 			q = &curthread->mq[qidx];
1057 			qp = &curthread->mq[qidx + 1];
1058 			TAILQ_REMOVE(q, m, m_qe);
1059 			if (!is_pshared_mutex(m))
1060 				TAILQ_REMOVE(qp, m, m_pqe);
1061 			TAILQ_FOREACH(m2, q, m_qe) {
1062 				if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) {
1063 					TAILQ_INSERT_BEFORE(m2, m, m_qe);
1064 					if (!is_pshared_mutex(m)) {
1065 						while (m2 != NULL &&
1066 						    is_pshared_mutex(m2)) {
1067 							m2 = TAILQ_PREV(m2,
1068 							    mutex_queue, m_qe);
1069 						}
1070 						if (m2 == NULL) {
1071 							TAILQ_INSERT_HEAD(qp,
1072 							    m, m_pqe);
1073 						} else {
1074 							TAILQ_INSERT_BEFORE(m2,
1075 							    m, m_pqe);
1076 						}
1077 					}
1078 					return (0);
1079 				}
1080 			}
1081 			TAILQ_INSERT_TAIL(q, m, m_qe);
1082 			if (!is_pshared_mutex(m))
1083 				TAILQ_INSERT_TAIL(qp, m, m_pqe);
1084 		}
1085 	}
1086 	return (0);
1087 }
1088 
1089 int
1090 _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count)
1091 {
1092 	struct pthread_mutex *m;
1093 	int ret;
1094 
1095 	ret = check_and_init_mutex(mutex, &m);
1096 	if (ret == 0)
1097 		*count = m->m_spinloops;
1098 	return (ret);
1099 }
1100 
1101 int
1102 __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count)
1103 {
1104 	struct pthread_mutex *m;
1105 	int ret;
1106 
1107 	ret = check_and_init_mutex(mutex, &m);
1108 	if (ret == 0)
1109 		m->m_spinloops = count;
1110 	return (ret);
1111 }
1112 
1113 int
1114 _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count)
1115 {
1116 	struct pthread_mutex *m;
1117 	int ret;
1118 
1119 	ret = check_and_init_mutex(mutex, &m);
1120 	if (ret == 0)
1121 		*count = m->m_yieldloops;
1122 	return (ret);
1123 }
1124 
1125 int
1126 __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count)
1127 {
1128 	struct pthread_mutex *m;
1129 	int ret;
1130 
1131 	ret = check_and_init_mutex(mutex, &m);
1132 	if (ret == 0)
1133 		m->m_yieldloops = count;
1134 	return (0);
1135 }
1136 
1137 int
1138 _pthread_mutex_isowned_np(pthread_mutex_t *mutex)
1139 {
1140 	struct pthread_mutex *m;
1141 
1142 	if (*mutex == THR_PSHARED_PTR) {
1143 		m = __thr_pshared_offpage(mutex, 0);
1144 		if (m == NULL)
1145 			return (0);
1146 		shared_mutex_init(m, NULL);
1147 	} else {
1148 		m = *mutex;
1149 		if (m <= THR_MUTEX_DESTROYED)
1150 			return (0);
1151 	}
1152 	return (PMUTEX_OWNER_ID(m) == TID(_get_curthread()));
1153 }
1154 
1155 int
1156 _mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp)
1157 {
1158 
1159 	if (__predict_false(mp <= THR_MUTEX_DESTROYED)) {
1160 		if (mp == THR_MUTEX_DESTROYED)
1161 			return (EINVAL);
1162 		return (EPERM);
1163 	}
1164 	if (PMUTEX_OWNER_ID(mp) != TID(curthread))
1165 		return (EPERM);
1166 	return (0);
1167 }
1168 
1169 int
1170 _pthread_mutex_consistent(pthread_mutex_t *mutex)
1171 {
1172 	struct pthread_mutex *m;
1173 	struct pthread *curthread;
1174 
1175 	if (*mutex == THR_PSHARED_PTR) {
1176 		m = __thr_pshared_offpage(mutex, 0);
1177 		if (m == NULL)
1178 			return (EINVAL);
1179 		shared_mutex_init(m, NULL);
1180 	} else {
1181 		m = *mutex;
1182 		if (m <= THR_MUTEX_DESTROYED)
1183 			return (EINVAL);
1184 	}
1185 	curthread = _get_curthread();
1186 	if ((m->m_lock.m_flags & (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) !=
1187 	    (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT))
1188 		return (EINVAL);
1189 	if (PMUTEX_OWNER_ID(m) != TID(curthread))
1190 		return (EPERM);
1191 	m->m_lock.m_flags &= ~UMUTEX_NONCONSISTENT;
1192 	return (0);
1193 }
1194