xref: /freebsd/lib/libthr/thread/thr_mutex.c (revision d6b92ffa)
1 /*
2  * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>.
3  * Copyright (c) 2006 David Xu <davidxu@freebsd.org>.
4  * Copyright (c) 2015, 2016 The FreeBSD Foundation
5  *
6  * All rights reserved.
7  *
8  * Portions of this software were developed by Konstantin Belousov
9  * under sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by John Birrell.
22  * 4. Neither the name of the author nor the names of any co-contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include "namespace.h"
43 #include <stdlib.h>
44 #include <errno.h>
45 #include <string.h>
46 #include <sys/param.h>
47 #include <sys/queue.h>
48 #include <pthread.h>
49 #include <pthread_np.h>
50 #include "un-namespace.h"
51 
52 #include "thr_private.h"
53 
54 _Static_assert(sizeof(struct pthread_mutex) <= PAGE_SIZE,
55     "pthread_mutex is too large for off-page");
56 
57 /*
58  * For adaptive mutexes, how many times to spin doing trylock2
59  * before entering the kernel to block
60  */
61 #define MUTEX_ADAPTIVE_SPINS	2000
62 
63 /*
64  * Prototypes
65  */
66 int	__pthread_mutex_consistent(pthread_mutex_t *mutex);
67 int	__pthread_mutex_init(pthread_mutex_t *mutex,
68 		const pthread_mutexattr_t *mutex_attr);
69 int	__pthread_mutex_trylock(pthread_mutex_t *mutex);
70 int	__pthread_mutex_lock(pthread_mutex_t *mutex);
71 int	__pthread_mutex_timedlock(pthread_mutex_t *mutex,
72 		const struct timespec *abstime);
73 int	_pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count);
74 int	_pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
75 int	__pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
76 int	_pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
77 int	_pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count);
78 int	__pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
79 
80 static int	mutex_self_trylock(pthread_mutex_t);
81 static int	mutex_self_lock(pthread_mutex_t,
82 				const struct timespec *abstime);
83 static int	mutex_unlock_common(struct pthread_mutex *, bool, int *);
84 static int	mutex_lock_sleep(struct pthread *, pthread_mutex_t,
85 				const struct timespec *);
86 static void	mutex_init_robust(struct pthread *curthread);
87 static int	mutex_qidx(struct pthread_mutex *m);
88 static bool	is_robust_mutex(struct pthread_mutex *m);
89 static bool	is_pshared_mutex(struct pthread_mutex *m);
90 
91 __weak_reference(__pthread_mutex_init, pthread_mutex_init);
92 __strong_reference(__pthread_mutex_init, _pthread_mutex_init);
93 __weak_reference(__pthread_mutex_lock, pthread_mutex_lock);
94 __strong_reference(__pthread_mutex_lock, _pthread_mutex_lock);
95 __weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock);
96 __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock);
97 __weak_reference(__pthread_mutex_trylock, pthread_mutex_trylock);
98 __strong_reference(__pthread_mutex_trylock, _pthread_mutex_trylock);
99 __weak_reference(_pthread_mutex_consistent, pthread_mutex_consistent);
100 __strong_reference(_pthread_mutex_consistent, __pthread_mutex_consistent);
101 
102 /* Single underscore versions provided for libc internal usage: */
103 /* No difference between libc and application usage of these: */
104 __weak_reference(_pthread_mutex_destroy, pthread_mutex_destroy);
105 __weak_reference(_pthread_mutex_unlock, pthread_mutex_unlock);
106 
107 __weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling);
108 __weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling);
109 
110 __weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np);
111 __strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np);
112 __weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np);
113 
114 __weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np);
115 __strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np);
116 __weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np);
117 __weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np);
118 
119 static void
120 mutex_init_link(struct pthread_mutex *m)
121 {
122 
123 #if defined(_PTHREADS_INVARIANTS)
124 	m->m_qe.tqe_prev = NULL;
125 	m->m_qe.tqe_next = NULL;
126 	m->m_pqe.tqe_prev = NULL;
127 	m->m_pqe.tqe_next = NULL;
128 #endif
129 }
130 
131 static void
132 mutex_assert_is_owned(struct pthread_mutex *m __unused)
133 {
134 
135 #if defined(_PTHREADS_INVARIANTS)
136 	if (__predict_false(m->m_qe.tqe_prev == NULL))
137 		PANIC("mutex %p own %#x is not on list %p %p",
138 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
139 #endif
140 }
141 
142 static void
143 mutex_assert_not_owned(struct pthread *curthread __unused,
144     struct pthread_mutex *m __unused)
145 {
146 
147 #if defined(_PTHREADS_INVARIANTS)
148 	if (__predict_false(m->m_qe.tqe_prev != NULL ||
149 	    m->m_qe.tqe_next != NULL))
150 		PANIC("mutex %p own %#x is on list %p %p",
151 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
152 	if (__predict_false(is_robust_mutex(m) &&
153 	    (m->m_lock.m_rb_lnk != 0 || m->m_rb_prev != NULL ||
154 	    (is_pshared_mutex(m) && curthread->robust_list ==
155 	    (uintptr_t)&m->m_lock) ||
156 	    (!is_pshared_mutex(m) && curthread->priv_robust_list ==
157 	    (uintptr_t)&m->m_lock))))
158 		PANIC(
159     "mutex %p own %#x is on robust linkage %p %p head %p phead %p",
160 		    m, m->m_lock.m_owner, (void *)m->m_lock.m_rb_lnk,
161 		    m->m_rb_prev, (void *)curthread->robust_list,
162 		    (void *)curthread->priv_robust_list);
163 #endif
164 }
165 
166 static bool
167 is_pshared_mutex(struct pthread_mutex *m)
168 {
169 
170 	return ((m->m_lock.m_flags & USYNC_PROCESS_SHARED) != 0);
171 }
172 
173 static bool
174 is_robust_mutex(struct pthread_mutex *m)
175 {
176 
177 	return ((m->m_lock.m_flags & UMUTEX_ROBUST) != 0);
178 }
179 
180 int
181 _mutex_enter_robust(struct pthread *curthread, struct pthread_mutex *m)
182 {
183 
184 #if defined(_PTHREADS_INVARIANTS)
185 	if (__predict_false(curthread->inact_mtx != 0))
186 		PANIC("inact_mtx enter");
187 #endif
188 	if (!is_robust_mutex(m))
189 		return (0);
190 
191 	mutex_init_robust(curthread);
192 	curthread->inact_mtx = (uintptr_t)&m->m_lock;
193 	return (1);
194 }
195 
196 void
197 _mutex_leave_robust(struct pthread *curthread, struct pthread_mutex *m __unused)
198 {
199 
200 #if defined(_PTHREADS_INVARIANTS)
201 	if (__predict_false(curthread->inact_mtx != (uintptr_t)&m->m_lock))
202 		PANIC("inact_mtx leave");
203 #endif
204 	curthread->inact_mtx = 0;
205 }
206 
207 static int
208 mutex_check_attr(const struct pthread_mutex_attr *attr)
209 {
210 
211 	if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK ||
212 	    attr->m_type >= PTHREAD_MUTEX_TYPE_MAX)
213 		return (EINVAL);
214 	if (attr->m_protocol < PTHREAD_PRIO_NONE ||
215 	    attr->m_protocol > PTHREAD_PRIO_PROTECT)
216 		return (EINVAL);
217 	return (0);
218 }
219 
220 static void
221 mutex_init_robust(struct pthread *curthread)
222 {
223 	struct umtx_robust_lists_params rb;
224 
225 	if (curthread == NULL)
226 		curthread = _get_curthread();
227 	if (curthread->robust_inited)
228 		return;
229 	rb.robust_list_offset = (uintptr_t)&curthread->robust_list;
230 	rb.robust_priv_list_offset = (uintptr_t)&curthread->priv_robust_list;
231 	rb.robust_inact_offset = (uintptr_t)&curthread->inact_mtx;
232 	_umtx_op(NULL, UMTX_OP_ROBUST_LISTS, sizeof(rb), &rb, NULL);
233 	curthread->robust_inited = 1;
234 }
235 
236 static void
237 mutex_init_body(struct pthread_mutex *pmutex,
238     const struct pthread_mutex_attr *attr)
239 {
240 
241 	pmutex->m_flags = attr->m_type;
242 	pmutex->m_count = 0;
243 	pmutex->m_spinloops = 0;
244 	pmutex->m_yieldloops = 0;
245 	mutex_init_link(pmutex);
246 	switch (attr->m_protocol) {
247 	case PTHREAD_PRIO_NONE:
248 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
249 		pmutex->m_lock.m_flags = 0;
250 		break;
251 	case PTHREAD_PRIO_INHERIT:
252 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
253 		pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT;
254 		break;
255 	case PTHREAD_PRIO_PROTECT:
256 		pmutex->m_lock.m_owner = UMUTEX_CONTESTED;
257 		pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT;
258 		pmutex->m_lock.m_ceilings[0] = attr->m_ceiling;
259 		break;
260 	}
261 	if (attr->m_pshared == PTHREAD_PROCESS_SHARED)
262 		pmutex->m_lock.m_flags |= USYNC_PROCESS_SHARED;
263 	if (attr->m_robust == PTHREAD_MUTEX_ROBUST) {
264 		mutex_init_robust(NULL);
265 		pmutex->m_lock.m_flags |= UMUTEX_ROBUST;
266 	}
267 	if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
268 		pmutex->m_spinloops =
269 		    _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
270 		pmutex->m_yieldloops = _thr_yieldloops;
271 	}
272 }
273 
274 static int
275 mutex_init(pthread_mutex_t *mutex,
276     const struct pthread_mutex_attr *mutex_attr,
277     void *(calloc_cb)(size_t, size_t))
278 {
279 	const struct pthread_mutex_attr *attr;
280 	struct pthread_mutex *pmutex;
281 	int error;
282 
283 	if (mutex_attr == NULL) {
284 		attr = &_pthread_mutexattr_default;
285 	} else {
286 		attr = mutex_attr;
287 		error = mutex_check_attr(attr);
288 		if (error != 0)
289 			return (error);
290 	}
291 	if ((pmutex = (pthread_mutex_t)
292 		calloc_cb(1, sizeof(struct pthread_mutex))) == NULL)
293 		return (ENOMEM);
294 	mutex_init_body(pmutex, attr);
295 	*mutex = pmutex;
296 	return (0);
297 }
298 
299 static int
300 init_static(struct pthread *thread, pthread_mutex_t *mutex)
301 {
302 	int ret;
303 
304 	THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
305 
306 	if (*mutex == THR_MUTEX_INITIALIZER)
307 		ret = mutex_init(mutex, &_pthread_mutexattr_default, calloc);
308 	else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
309 		ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default,
310 		    calloc);
311 	else
312 		ret = 0;
313 	THR_LOCK_RELEASE(thread, &_mutex_static_lock);
314 
315 	return (ret);
316 }
317 
318 static void
319 set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m)
320 {
321 	struct pthread_mutex *m2;
322 
323 	m2 = TAILQ_LAST(&curthread->mq[mutex_qidx(m)], mutex_queue);
324 	if (m2 != NULL)
325 		m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0];
326 	else
327 		m->m_lock.m_ceilings[1] = -1;
328 }
329 
330 static void
331 shared_mutex_init(struct pthread_mutex *pmtx, const struct
332     pthread_mutex_attr *mutex_attr)
333 {
334 	static const struct pthread_mutex_attr foobar_mutex_attr = {
335 		.m_type = PTHREAD_MUTEX_DEFAULT,
336 		.m_protocol = PTHREAD_PRIO_NONE,
337 		.m_ceiling = 0,
338 		.m_pshared = PTHREAD_PROCESS_SHARED,
339 		.m_robust = PTHREAD_MUTEX_STALLED,
340 	};
341 	bool done;
342 
343 	/*
344 	 * Hack to allow multiple pthread_mutex_init() calls on the
345 	 * same process-shared mutex.  We rely on kernel allocating
346 	 * zeroed offpage for the mutex, i.e. the
347 	 * PMUTEX_INITSTAGE_ALLOC value must be zero.
348 	 */
349 	for (done = false; !done;) {
350 		switch (pmtx->m_ps) {
351 		case PMUTEX_INITSTAGE_DONE:
352 			atomic_thread_fence_acq();
353 			done = true;
354 			break;
355 		case PMUTEX_INITSTAGE_ALLOC:
356 			if (atomic_cmpset_int(&pmtx->m_ps,
357 			    PMUTEX_INITSTAGE_ALLOC, PMUTEX_INITSTAGE_BUSY)) {
358 				if (mutex_attr == NULL)
359 					mutex_attr = &foobar_mutex_attr;
360 				mutex_init_body(pmtx, mutex_attr);
361 				atomic_store_rel_int(&pmtx->m_ps,
362 				    PMUTEX_INITSTAGE_DONE);
363 				done = true;
364 			}
365 			break;
366 		case PMUTEX_INITSTAGE_BUSY:
367 			_pthread_yield();
368 			break;
369 		default:
370 			PANIC("corrupted offpage");
371 			break;
372 		}
373 	}
374 }
375 
376 int
377 __pthread_mutex_init(pthread_mutex_t *mutex,
378     const pthread_mutexattr_t *mutex_attr)
379 {
380 	struct pthread_mutex *pmtx;
381 	int ret;
382 
383 	if (mutex_attr != NULL) {
384 		ret = mutex_check_attr(*mutex_attr);
385 		if (ret != 0)
386 			return (ret);
387 	}
388 	if (mutex_attr == NULL ||
389 	    (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) {
390 		return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL,
391 		    calloc));
392 	}
393 	pmtx = __thr_pshared_offpage(mutex, 1);
394 	if (pmtx == NULL)
395 		return (EFAULT);
396 	*mutex = THR_PSHARED_PTR;
397 	shared_mutex_init(pmtx, *mutex_attr);
398 	return (0);
399 }
400 
401 /* This function is used internally by malloc. */
402 int
403 _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
404     void *(calloc_cb)(size_t, size_t))
405 {
406 	static const struct pthread_mutex_attr attr = {
407 		.m_type = PTHREAD_MUTEX_NORMAL,
408 		.m_protocol = PTHREAD_PRIO_NONE,
409 		.m_ceiling = 0,
410 		.m_pshared = PTHREAD_PROCESS_PRIVATE,
411 		.m_robust = PTHREAD_MUTEX_STALLED,
412 	};
413 	int ret;
414 
415 	ret = mutex_init(mutex, &attr, calloc_cb);
416 	if (ret == 0)
417 		(*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE;
418 	return (ret);
419 }
420 
421 /*
422  * Fix mutex ownership for child process.
423  *
424  * Process private mutex ownership is transmitted from the forking
425  * thread to the child process.
426  *
427  * Process shared mutex should not be inherited because owner is
428  * forking thread which is in parent process, they are removed from
429  * the owned mutex list.
430  */
431 static void
432 queue_fork(struct pthread *curthread, struct mutex_queue *q,
433     struct mutex_queue *qp, uint bit)
434 {
435 	struct pthread_mutex *m;
436 
437 	TAILQ_INIT(q);
438 	TAILQ_FOREACH(m, qp, m_pqe) {
439 		TAILQ_INSERT_TAIL(q, m, m_qe);
440 		m->m_lock.m_owner = TID(curthread) | bit;
441 	}
442 }
443 
444 void
445 _mutex_fork(struct pthread *curthread)
446 {
447 
448 	queue_fork(curthread, &curthread->mq[TMQ_NORM],
449 	    &curthread->mq[TMQ_NORM_PRIV], 0);
450 	queue_fork(curthread, &curthread->mq[TMQ_NORM_PP],
451 	    &curthread->mq[TMQ_NORM_PP_PRIV], UMUTEX_CONTESTED);
452 	queue_fork(curthread, &curthread->mq[TMQ_ROBUST_PP],
453 	    &curthread->mq[TMQ_ROBUST_PP_PRIV], UMUTEX_CONTESTED);
454 	curthread->robust_list = 0;
455 }
456 
457 int
458 _pthread_mutex_destroy(pthread_mutex_t *mutex)
459 {
460 	pthread_mutex_t m, m1;
461 	int ret;
462 
463 	m = *mutex;
464 	if (m < THR_MUTEX_DESTROYED) {
465 		ret = 0;
466 	} else if (m == THR_MUTEX_DESTROYED) {
467 		ret = EINVAL;
468 	} else {
469 		if (m == THR_PSHARED_PTR) {
470 			m1 = __thr_pshared_offpage(mutex, 0);
471 			if (m1 != NULL) {
472 				mutex_assert_not_owned(_get_curthread(), m1);
473 				__thr_pshared_destroy(mutex);
474 			}
475 			*mutex = THR_MUTEX_DESTROYED;
476 			return (0);
477 		}
478 		if (PMUTEX_OWNER_ID(m) != 0 &&
479 		    (uint32_t)m->m_lock.m_owner != UMUTEX_RB_NOTRECOV) {
480 			ret = EBUSY;
481 		} else {
482 			*mutex = THR_MUTEX_DESTROYED;
483 			mutex_assert_not_owned(_get_curthread(), m);
484 			free(m);
485 			ret = 0;
486 		}
487 	}
488 
489 	return (ret);
490 }
491 
492 static int
493 mutex_qidx(struct pthread_mutex *m)
494 {
495 
496 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
497 		return (TMQ_NORM);
498 	return (is_robust_mutex(m) ? TMQ_ROBUST_PP : TMQ_NORM_PP);
499 }
500 
501 /*
502  * Both enqueue_mutex() and dequeue_mutex() operate on the
503  * thread-private linkage of the locked mutexes and on the robust
504  * linkage.
505  *
506  * Robust list, as seen by kernel, must be consistent even in the case
507  * of thread termination at arbitrary moment.  Since either enqueue or
508  * dequeue for list walked by kernel consists of rewriting a single
509  * forward pointer, it is safe.  On the other hand, rewrite of the
510  * back pointer is not atomic WRT the forward one, but kernel does not
511  * care.
512  */
513 static void
514 enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m,
515     int error)
516 {
517 	struct pthread_mutex *m1;
518 	uintptr_t *rl;
519 	int qidx;
520 
521 	/* Add to the list of owned mutexes: */
522 	if (error != EOWNERDEAD)
523 		mutex_assert_not_owned(curthread, m);
524 	qidx = mutex_qidx(m);
525 	TAILQ_INSERT_TAIL(&curthread->mq[qidx], m, m_qe);
526 	if (!is_pshared_mutex(m))
527 		TAILQ_INSERT_TAIL(&curthread->mq[qidx + 1], m, m_pqe);
528 	if (is_robust_mutex(m)) {
529 		rl = is_pshared_mutex(m) ? &curthread->robust_list :
530 		    &curthread->priv_robust_list;
531 		m->m_rb_prev = NULL;
532 		if (*rl != 0) {
533 			m1 = __containerof((void *)*rl,
534 			    struct pthread_mutex, m_lock);
535 			m->m_lock.m_rb_lnk = (uintptr_t)&m1->m_lock;
536 			m1->m_rb_prev = m;
537 		} else {
538 			m1 = NULL;
539 			m->m_lock.m_rb_lnk = 0;
540 		}
541 		*rl = (uintptr_t)&m->m_lock;
542 	}
543 }
544 
545 static void
546 dequeue_mutex(struct pthread *curthread, struct pthread_mutex *m)
547 {
548 	struct pthread_mutex *mp, *mn;
549 	int qidx;
550 
551 	mutex_assert_is_owned(m);
552 	qidx = mutex_qidx(m);
553 	if (is_robust_mutex(m)) {
554 		mp = m->m_rb_prev;
555 		if (mp == NULL) {
556 			if (is_pshared_mutex(m)) {
557 				curthread->robust_list = m->m_lock.m_rb_lnk;
558 			} else {
559 				curthread->priv_robust_list =
560 				    m->m_lock.m_rb_lnk;
561 			}
562 		} else {
563 			mp->m_lock.m_rb_lnk = m->m_lock.m_rb_lnk;
564 		}
565 		if (m->m_lock.m_rb_lnk != 0) {
566 			mn = __containerof((void *)m->m_lock.m_rb_lnk,
567 			    struct pthread_mutex, m_lock);
568 			mn->m_rb_prev = m->m_rb_prev;
569 		}
570 		m->m_lock.m_rb_lnk = 0;
571 		m->m_rb_prev = NULL;
572 	}
573 	TAILQ_REMOVE(&curthread->mq[qidx], m, m_qe);
574 	if (!is_pshared_mutex(m))
575 		TAILQ_REMOVE(&curthread->mq[qidx + 1], m, m_pqe);
576 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) != 0)
577 		set_inherited_priority(curthread, m);
578 	mutex_init_link(m);
579 }
580 
581 static int
582 check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m)
583 {
584 	int ret;
585 
586 	*m = *mutex;
587 	ret = 0;
588 	if (*m == THR_PSHARED_PTR) {
589 		*m = __thr_pshared_offpage(mutex, 0);
590 		if (*m == NULL)
591 			ret = EINVAL;
592 		else
593 			shared_mutex_init(*m, NULL);
594 	} else if (__predict_false(*m <= THR_MUTEX_DESTROYED)) {
595 		if (*m == THR_MUTEX_DESTROYED) {
596 			ret = EINVAL;
597 		} else {
598 			ret = init_static(_get_curthread(), mutex);
599 			if (ret == 0)
600 				*m = *mutex;
601 		}
602 	}
603 	return (ret);
604 }
605 
606 int
607 __pthread_mutex_trylock(pthread_mutex_t *mutex)
608 {
609 	struct pthread *curthread;
610 	struct pthread_mutex *m;
611 	uint32_t id;
612 	int ret, robust;
613 
614 	ret = check_and_init_mutex(mutex, &m);
615 	if (ret != 0)
616 		return (ret);
617 	curthread = _get_curthread();
618 	id = TID(curthread);
619 	if (m->m_flags & PMUTEX_FLAG_PRIVATE)
620 		THR_CRITICAL_ENTER(curthread);
621 	robust = _mutex_enter_robust(curthread, m);
622 	ret = _thr_umutex_trylock(&m->m_lock, id);
623 	if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
624 		enqueue_mutex(curthread, m, ret);
625 		if (ret == EOWNERDEAD)
626 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
627 	} else if (PMUTEX_OWNER_ID(m) == id) {
628 		ret = mutex_self_trylock(m);
629 	} /* else {} */
630 	if (robust)
631 		_mutex_leave_robust(curthread, m);
632 	if (ret != 0 && ret != EOWNERDEAD &&
633 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0)
634 		THR_CRITICAL_LEAVE(curthread);
635 	return (ret);
636 }
637 
638 static int
639 mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
640     const struct timespec *abstime)
641 {
642 	uint32_t id, owner;
643 	int count, ret;
644 
645 	id = TID(curthread);
646 	if (PMUTEX_OWNER_ID(m) == id)
647 		return (mutex_self_lock(m, abstime));
648 
649 	/*
650 	 * For adaptive mutexes, spin for a bit in the expectation
651 	 * that if the application requests this mutex type then
652 	 * the lock is likely to be released quickly and it is
653 	 * faster than entering the kernel
654 	 */
655 	if (__predict_false((m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT |
656 	    UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) != 0))
657 		goto sleep_in_kernel;
658 
659 	if (!_thr_is_smp)
660 		goto yield_loop;
661 
662 	count = m->m_spinloops;
663 	while (count--) {
664 		owner = m->m_lock.m_owner;
665 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
666 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
667 			    id | owner)) {
668 				ret = 0;
669 				goto done;
670 			}
671 		}
672 		CPU_SPINWAIT;
673 	}
674 
675 yield_loop:
676 	count = m->m_yieldloops;
677 	while (count--) {
678 		_sched_yield();
679 		owner = m->m_lock.m_owner;
680 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
681 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
682 			    id | owner)) {
683 				ret = 0;
684 				goto done;
685 			}
686 		}
687 	}
688 
689 sleep_in_kernel:
690 	if (abstime == NULL)
691 		ret = __thr_umutex_lock(&m->m_lock, id);
692 	else if (__predict_false(abstime->tv_nsec < 0 ||
693 	    abstime->tv_nsec >= 1000000000))
694 		ret = EINVAL;
695 	else
696 		ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
697 done:
698 	if (ret == 0 || ret == EOWNERDEAD) {
699 		enqueue_mutex(curthread, m, ret);
700 		if (ret == EOWNERDEAD)
701 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
702 	}
703 	return (ret);
704 }
705 
706 static inline int
707 mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
708     bool cvattach, bool rb_onlist)
709 {
710 	struct pthread *curthread;
711 	int ret, robust;
712 
713 	robust = 0;  /* pacify gcc */
714 	curthread  = _get_curthread();
715 	if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
716 		THR_CRITICAL_ENTER(curthread);
717 	if (!rb_onlist)
718 		robust = _mutex_enter_robust(curthread, m);
719 	ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread));
720 	if (ret == 0 || ret == EOWNERDEAD) {
721 		enqueue_mutex(curthread, m, ret);
722 		if (ret == EOWNERDEAD)
723 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
724 	} else {
725 		ret = mutex_lock_sleep(curthread, m, abstime);
726 	}
727 	if (!rb_onlist && robust)
728 		_mutex_leave_robust(curthread, m);
729 	if (ret != 0 && ret != EOWNERDEAD &&
730 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0 && !cvattach)
731 		THR_CRITICAL_LEAVE(curthread);
732 	return (ret);
733 }
734 
735 int
736 __pthread_mutex_lock(pthread_mutex_t *mutex)
737 {
738 	struct pthread_mutex *m;
739 	int ret;
740 
741 	_thr_check_init();
742 	ret = check_and_init_mutex(mutex, &m);
743 	if (ret == 0)
744 		ret = mutex_lock_common(m, NULL, false, false);
745 	return (ret);
746 }
747 
748 int
749 __pthread_mutex_timedlock(pthread_mutex_t *mutex,
750     const struct timespec *abstime)
751 {
752 	struct pthread_mutex *m;
753 	int ret;
754 
755 	_thr_check_init();
756 	ret = check_and_init_mutex(mutex, &m);
757 	if (ret == 0)
758 		ret = mutex_lock_common(m, abstime, false, false);
759 	return (ret);
760 }
761 
762 int
763 _pthread_mutex_unlock(pthread_mutex_t *mutex)
764 {
765 	struct pthread_mutex *mp;
766 
767 	if (*mutex == THR_PSHARED_PTR) {
768 		mp = __thr_pshared_offpage(mutex, 0);
769 		if (mp == NULL)
770 			return (EINVAL);
771 		shared_mutex_init(mp, NULL);
772 	} else {
773 		mp = *mutex;
774 	}
775 	return (mutex_unlock_common(mp, false, NULL));
776 }
777 
778 int
779 _mutex_cv_lock(struct pthread_mutex *m, int count, bool rb_onlist)
780 {
781 	int error;
782 
783 	error = mutex_lock_common(m, NULL, true, rb_onlist);
784 	if (error == 0 || error == EOWNERDEAD)
785 		m->m_count = count;
786 	return (error);
787 }
788 
789 int
790 _mutex_cv_unlock(struct pthread_mutex *m, int *count, int *defer)
791 {
792 
793 	/*
794 	 * Clear the count in case this is a recursive mutex.
795 	 */
796 	*count = m->m_count;
797 	m->m_count = 0;
798 	(void)mutex_unlock_common(m, true, defer);
799         return (0);
800 }
801 
802 int
803 _mutex_cv_attach(struct pthread_mutex *m, int count)
804 {
805 	struct pthread *curthread;
806 
807 	curthread = _get_curthread();
808 	enqueue_mutex(curthread, m, 0);
809 	m->m_count = count;
810 	return (0);
811 }
812 
813 int
814 _mutex_cv_detach(struct pthread_mutex *mp, int *recurse)
815 {
816 	struct pthread *curthread;
817 	int deferred, error;
818 
819 	curthread = _get_curthread();
820 	if ((error = _mutex_owned(curthread, mp)) != 0)
821 		return (error);
822 
823 	/*
824 	 * Clear the count in case this is a recursive mutex.
825 	 */
826 	*recurse = mp->m_count;
827 	mp->m_count = 0;
828 	dequeue_mutex(curthread, mp);
829 
830 	/* Will this happen in real-world ? */
831         if ((mp->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
832 		deferred = 1;
833 		mp->m_flags &= ~PMUTEX_FLAG_DEFERRED;
834 	} else
835 		deferred = 0;
836 
837 	if (deferred)  {
838 		_thr_wake_all(curthread->defer_waiters,
839 		    curthread->nwaiter_defer);
840 		curthread->nwaiter_defer = 0;
841 	}
842 	return (0);
843 }
844 
845 static int
846 mutex_self_trylock(struct pthread_mutex *m)
847 {
848 	int ret;
849 
850 	switch (PMUTEX_TYPE(m->m_flags)) {
851 	case PTHREAD_MUTEX_ERRORCHECK:
852 	case PTHREAD_MUTEX_NORMAL:
853 	case PTHREAD_MUTEX_ADAPTIVE_NP:
854 		ret = EBUSY;
855 		break;
856 
857 	case PTHREAD_MUTEX_RECURSIVE:
858 		/* Increment the lock count: */
859 		if (m->m_count + 1 > 0) {
860 			m->m_count++;
861 			ret = 0;
862 		} else
863 			ret = EAGAIN;
864 		break;
865 
866 	default:
867 		/* Trap invalid mutex types; */
868 		ret = EINVAL;
869 	}
870 
871 	return (ret);
872 }
873 
874 static int
875 mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
876 {
877 	struct timespec	ts1, ts2;
878 	int ret;
879 
880 	switch (PMUTEX_TYPE(m->m_flags)) {
881 	case PTHREAD_MUTEX_ERRORCHECK:
882 	case PTHREAD_MUTEX_ADAPTIVE_NP:
883 		if (abstime) {
884 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
885 			    abstime->tv_nsec >= 1000000000) {
886 				ret = EINVAL;
887 			} else {
888 				clock_gettime(CLOCK_REALTIME, &ts1);
889 				TIMESPEC_SUB(&ts2, abstime, &ts1);
890 				__sys_nanosleep(&ts2, NULL);
891 				ret = ETIMEDOUT;
892 			}
893 		} else {
894 			/*
895 			 * POSIX specifies that mutexes should return
896 			 * EDEADLK if a recursive lock is detected.
897 			 */
898 			ret = EDEADLK;
899 		}
900 		break;
901 
902 	case PTHREAD_MUTEX_NORMAL:
903 		/*
904 		 * What SS2 define as a 'normal' mutex.  Intentionally
905 		 * deadlock on attempts to get a lock you already own.
906 		 */
907 		ret = 0;
908 		if (abstime) {
909 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
910 			    abstime->tv_nsec >= 1000000000) {
911 				ret = EINVAL;
912 			} else {
913 				clock_gettime(CLOCK_REALTIME, &ts1);
914 				TIMESPEC_SUB(&ts2, abstime, &ts1);
915 				__sys_nanosleep(&ts2, NULL);
916 				ret = ETIMEDOUT;
917 			}
918 		} else {
919 			ts1.tv_sec = 30;
920 			ts1.tv_nsec = 0;
921 			for (;;)
922 				__sys_nanosleep(&ts1, NULL);
923 		}
924 		break;
925 
926 	case PTHREAD_MUTEX_RECURSIVE:
927 		/* Increment the lock count: */
928 		if (m->m_count + 1 > 0) {
929 			m->m_count++;
930 			ret = 0;
931 		} else
932 			ret = EAGAIN;
933 		break;
934 
935 	default:
936 		/* Trap invalid mutex types; */
937 		ret = EINVAL;
938 	}
939 
940 	return (ret);
941 }
942 
943 static int
944 mutex_unlock_common(struct pthread_mutex *m, bool cv, int *mtx_defer)
945 {
946 	struct pthread *curthread;
947 	uint32_t id;
948 	int deferred, error, robust;
949 
950 	if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
951 		if (m == THR_MUTEX_DESTROYED)
952 			return (EINVAL);
953 		return (EPERM);
954 	}
955 
956 	curthread = _get_curthread();
957 	id = TID(curthread);
958 
959 	/*
960 	 * Check if the running thread is not the owner of the mutex.
961 	 */
962 	if (__predict_false(PMUTEX_OWNER_ID(m) != id))
963 		return (EPERM);
964 
965 	error = 0;
966 	if (__predict_false(PMUTEX_TYPE(m->m_flags) ==
967 	    PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) {
968 		m->m_count--;
969 	} else {
970 		if ((m->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
971 			deferred = 1;
972 			m->m_flags &= ~PMUTEX_FLAG_DEFERRED;
973         	} else
974 			deferred = 0;
975 
976 		robust = _mutex_enter_robust(curthread, m);
977 		dequeue_mutex(curthread, m);
978 		error = _thr_umutex_unlock2(&m->m_lock, id, mtx_defer);
979 		if (deferred)  {
980 			if (mtx_defer == NULL) {
981 				_thr_wake_all(curthread->defer_waiters,
982 				    curthread->nwaiter_defer);
983 				curthread->nwaiter_defer = 0;
984 			} else
985 				*mtx_defer = 1;
986 		}
987 		if (robust)
988 			_mutex_leave_robust(curthread, m);
989 	}
990 	if (!cv && m->m_flags & PMUTEX_FLAG_PRIVATE)
991 		THR_CRITICAL_LEAVE(curthread);
992 	return (error);
993 }
994 
995 int
996 _pthread_mutex_getprioceiling(pthread_mutex_t *mutex,
997     int *prioceiling)
998 {
999 	struct pthread_mutex *m;
1000 
1001 	if (*mutex == THR_PSHARED_PTR) {
1002 		m = __thr_pshared_offpage(mutex, 0);
1003 		if (m == NULL)
1004 			return (EINVAL);
1005 		shared_mutex_init(m, NULL);
1006 	} else {
1007 		m = *mutex;
1008 		if (m <= THR_MUTEX_DESTROYED)
1009 			return (EINVAL);
1010 	}
1011 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1012 		return (EINVAL);
1013 	*prioceiling = m->m_lock.m_ceilings[0];
1014 	return (0);
1015 }
1016 
1017 int
1018 _pthread_mutex_setprioceiling(pthread_mutex_t *mutex,
1019     int ceiling, int *old_ceiling)
1020 {
1021 	struct pthread *curthread;
1022 	struct pthread_mutex *m, *m1, *m2;
1023 	struct mutex_queue *q, *qp;
1024 	int qidx, ret;
1025 
1026 	if (*mutex == THR_PSHARED_PTR) {
1027 		m = __thr_pshared_offpage(mutex, 0);
1028 		if (m == NULL)
1029 			return (EINVAL);
1030 		shared_mutex_init(m, NULL);
1031 	} else {
1032 		m = *mutex;
1033 		if (m <= THR_MUTEX_DESTROYED)
1034 			return (EINVAL);
1035 	}
1036 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1037 		return (EINVAL);
1038 
1039 	ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling);
1040 	if (ret != 0)
1041 		return (ret);
1042 
1043 	curthread = _get_curthread();
1044 	if (PMUTEX_OWNER_ID(m) == TID(curthread)) {
1045 		mutex_assert_is_owned(m);
1046 		m1 = TAILQ_PREV(m, mutex_queue, m_qe);
1047 		m2 = TAILQ_NEXT(m, m_qe);
1048 		if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) ||
1049 		    (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) {
1050 			qidx = mutex_qidx(m);
1051 			q = &curthread->mq[qidx];
1052 			qp = &curthread->mq[qidx + 1];
1053 			TAILQ_REMOVE(q, m, m_qe);
1054 			if (!is_pshared_mutex(m))
1055 				TAILQ_REMOVE(qp, m, m_pqe);
1056 			TAILQ_FOREACH(m2, q, m_qe) {
1057 				if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) {
1058 					TAILQ_INSERT_BEFORE(m2, m, m_qe);
1059 					if (!is_pshared_mutex(m)) {
1060 						while (m2 != NULL &&
1061 						    is_pshared_mutex(m2)) {
1062 							m2 = TAILQ_PREV(m2,
1063 							    mutex_queue, m_qe);
1064 						}
1065 						if (m2 == NULL) {
1066 							TAILQ_INSERT_HEAD(qp,
1067 							    m, m_pqe);
1068 						} else {
1069 							TAILQ_INSERT_BEFORE(m2,
1070 							    m, m_pqe);
1071 						}
1072 					}
1073 					return (0);
1074 				}
1075 			}
1076 			TAILQ_INSERT_TAIL(q, m, m_qe);
1077 			if (!is_pshared_mutex(m))
1078 				TAILQ_INSERT_TAIL(qp, m, m_pqe);
1079 		}
1080 	}
1081 	return (0);
1082 }
1083 
1084 int
1085 _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count)
1086 {
1087 	struct pthread_mutex *m;
1088 	int ret;
1089 
1090 	ret = check_and_init_mutex(mutex, &m);
1091 	if (ret == 0)
1092 		*count = m->m_spinloops;
1093 	return (ret);
1094 }
1095 
1096 int
1097 __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count)
1098 {
1099 	struct pthread_mutex *m;
1100 	int ret;
1101 
1102 	ret = check_and_init_mutex(mutex, &m);
1103 	if (ret == 0)
1104 		m->m_spinloops = count;
1105 	return (ret);
1106 }
1107 
1108 int
1109 _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count)
1110 {
1111 	struct pthread_mutex *m;
1112 	int ret;
1113 
1114 	ret = check_and_init_mutex(mutex, &m);
1115 	if (ret == 0)
1116 		*count = m->m_yieldloops;
1117 	return (ret);
1118 }
1119 
1120 int
1121 __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count)
1122 {
1123 	struct pthread_mutex *m;
1124 	int ret;
1125 
1126 	ret = check_and_init_mutex(mutex, &m);
1127 	if (ret == 0)
1128 		m->m_yieldloops = count;
1129 	return (0);
1130 }
1131 
1132 int
1133 _pthread_mutex_isowned_np(pthread_mutex_t *mutex)
1134 {
1135 	struct pthread_mutex *m;
1136 
1137 	if (*mutex == THR_PSHARED_PTR) {
1138 		m = __thr_pshared_offpage(mutex, 0);
1139 		if (m == NULL)
1140 			return (0);
1141 		shared_mutex_init(m, NULL);
1142 	} else {
1143 		m = *mutex;
1144 		if (m <= THR_MUTEX_DESTROYED)
1145 			return (0);
1146 	}
1147 	return (PMUTEX_OWNER_ID(m) == TID(_get_curthread()));
1148 }
1149 
1150 int
1151 _mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp)
1152 {
1153 
1154 	if (__predict_false(mp <= THR_MUTEX_DESTROYED)) {
1155 		if (mp == THR_MUTEX_DESTROYED)
1156 			return (EINVAL);
1157 		return (EPERM);
1158 	}
1159 	if (PMUTEX_OWNER_ID(mp) != TID(curthread))
1160 		return (EPERM);
1161 	return (0);
1162 }
1163 
1164 int
1165 _pthread_mutex_consistent(pthread_mutex_t *mutex)
1166 {
1167 	struct pthread_mutex *m;
1168 	struct pthread *curthread;
1169 
1170 	if (*mutex == THR_PSHARED_PTR) {
1171 		m = __thr_pshared_offpage(mutex, 0);
1172 		if (m == NULL)
1173 			return (EINVAL);
1174 		shared_mutex_init(m, NULL);
1175 	} else {
1176 		m = *mutex;
1177 		if (m <= THR_MUTEX_DESTROYED)
1178 			return (EINVAL);
1179 	}
1180 	curthread = _get_curthread();
1181 	if ((m->m_lock.m_flags & (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) !=
1182 	    (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT))
1183 		return (EINVAL);
1184 	if (PMUTEX_OWNER_ID(m) != TID(curthread))
1185 		return (EPERM);
1186 	m->m_lock.m_flags &= ~UMUTEX_NONCONSISTENT;
1187 	return (0);
1188 }
1189