1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 #include <sys/umtxvar.h> 67 68 #include <security/mac/mac_framework.h> 69 70 #include <vm/vm.h> 71 #include <vm/vm_param.h> 72 #include <vm/pmap.h> 73 #include <vm/vm_map.h> 74 #include <vm/vm_object.h> 75 76 #include <machine/atomic.h> 77 #include <machine/cpu.h> 78 79 #include <compat/freebsd32/freebsd32.h> 80 #ifdef COMPAT_FREEBSD32 81 #include <compat/freebsd32/freebsd32_proto.h> 82 #endif 83 84 #define _UMUTEX_TRY 1 85 #define _UMUTEX_WAIT 2 86 87 #ifdef UMTX_PROFILING 88 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 89 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 90 #endif 91 92 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 93 94 /* 95 * Don't propagate time-sharing priority, there is a security reason, 96 * a user can simply introduce PI-mutex, let thread A lock the mutex, 97 * and let another thread B block on the mutex, because B is 98 * sleeping, its priority will be boosted, this causes A's priority to 99 * be boosted via priority propagating too and will never be lowered even 100 * if it is using 100%CPU, this is unfair to other processes. 101 */ 102 103 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 104 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 105 PRI_MAX_TIMESHARE : (td)->td_user_pri) 106 107 #define GOLDEN_RATIO_PRIME 2654404609U 108 #ifndef UMTX_CHAINS 109 #define UMTX_CHAINS 512 110 #endif 111 #define UMTX_SHIFTS (__WORD_BIT - 9) 112 113 #define GET_SHARE(flags) \ 114 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 115 116 #define BUSY_SPINS 200 117 118 struct umtx_copyops { 119 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 120 int (*copyin_umtx_time)(const void *uaddr, size_t size, 121 struct _umtx_time *tp); 122 int (*copyin_robust_lists)(const void *uaddr, size_t size, 123 struct umtx_robust_lists_params *rbp); 124 int (*copyout_timeout)(void *uaddr, size_t size, 125 struct timespec *tsp); 126 const size_t timespec_sz; 127 const size_t umtx_time_sz; 128 const bool compat32; 129 }; 130 131 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 132 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 133 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 134 135 int umtx_shm_vnobj_persistent = 0; 136 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 137 &umtx_shm_vnobj_persistent, 0, 138 "False forces destruction of umtx attached to file, on last close"); 139 static int umtx_max_rb = 1000; 140 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 141 &umtx_max_rb, 0, 142 "Maximum number of robust mutexes allowed for each thread"); 143 144 static uma_zone_t umtx_pi_zone; 145 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 146 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 147 static int umtx_pi_allocated; 148 149 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 150 "umtx debug"); 151 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 152 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 153 static int umtx_verbose_rb = 1; 154 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 155 &umtx_verbose_rb, 0, 156 ""); 157 158 #ifdef UMTX_PROFILING 159 static long max_length; 160 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 161 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 162 "umtx chain stats"); 163 #endif 164 165 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 166 const struct _umtx_time *umtxtime); 167 static int umtx_abs_timeout_gethz(struct umtx_abs_timeout *timo); 168 static inline void umtx_abs_timeout_update(struct umtx_abs_timeout *timo); 169 170 static void umtx_shm_init(void); 171 static void umtxq_sysinit(void *); 172 static void umtxq_hash(struct umtx_key *key); 173 static struct umtx_pi *umtx_pi_alloc(int); 174 static void umtx_pi_free(struct umtx_pi *pi); 175 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 176 bool rb); 177 static void umtx_thread_cleanup(struct thread *td); 178 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 179 180 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 181 182 static struct mtx umtx_lock; 183 184 #ifdef UMTX_PROFILING 185 static void 186 umtx_init_profiling(void) 187 { 188 struct sysctl_oid *chain_oid; 189 char chain_name[10]; 190 int i; 191 192 for (i = 0; i < UMTX_CHAINS; ++i) { 193 snprintf(chain_name, sizeof(chain_name), "%d", i); 194 chain_oid = SYSCTL_ADD_NODE(NULL, 195 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 196 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 197 "umtx hash stats"); 198 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 199 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 200 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 201 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 202 } 203 } 204 205 static int 206 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 207 { 208 char buf[512]; 209 struct sbuf sb; 210 struct umtxq_chain *uc; 211 u_int fract, i, j, tot, whole; 212 u_int sf0, sf1, sf2, sf3, sf4; 213 u_int si0, si1, si2, si3, si4; 214 u_int sw0, sw1, sw2, sw3, sw4; 215 216 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 217 for (i = 0; i < 2; i++) { 218 tot = 0; 219 for (j = 0; j < UMTX_CHAINS; ++j) { 220 uc = &umtxq_chains[i][j]; 221 mtx_lock(&uc->uc_lock); 222 tot += uc->max_length; 223 mtx_unlock(&uc->uc_lock); 224 } 225 if (tot == 0) 226 sbuf_printf(&sb, "%u) Empty ", i); 227 else { 228 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 229 si0 = si1 = si2 = si3 = si4 = 0; 230 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 231 for (j = 0; j < UMTX_CHAINS; j++) { 232 uc = &umtxq_chains[i][j]; 233 mtx_lock(&uc->uc_lock); 234 whole = uc->max_length * 100; 235 mtx_unlock(&uc->uc_lock); 236 fract = (whole % tot) * 100; 237 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 238 sf0 = fract; 239 si0 = j; 240 sw0 = whole; 241 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 242 sf1)) { 243 sf1 = fract; 244 si1 = j; 245 sw1 = whole; 246 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 247 sf2)) { 248 sf2 = fract; 249 si2 = j; 250 sw2 = whole; 251 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 252 sf3)) { 253 sf3 = fract; 254 si3 = j; 255 sw3 = whole; 256 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 257 sf4)) { 258 sf4 = fract; 259 si4 = j; 260 sw4 = whole; 261 } 262 } 263 sbuf_printf(&sb, "queue %u:\n", i); 264 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 265 sf0 / tot, si0); 266 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 267 sf1 / tot, si1); 268 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 269 sf2 / tot, si2); 270 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 271 sf3 / tot, si3); 272 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 273 sf4 / tot, si4); 274 } 275 } 276 sbuf_trim(&sb); 277 sbuf_finish(&sb); 278 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 279 sbuf_delete(&sb); 280 return (0); 281 } 282 283 static int 284 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 285 { 286 struct umtxq_chain *uc; 287 u_int i, j; 288 int clear, error; 289 290 clear = 0; 291 error = sysctl_handle_int(oidp, &clear, 0, req); 292 if (error != 0 || req->newptr == NULL) 293 return (error); 294 295 if (clear != 0) { 296 for (i = 0; i < 2; ++i) { 297 for (j = 0; j < UMTX_CHAINS; ++j) { 298 uc = &umtxq_chains[i][j]; 299 mtx_lock(&uc->uc_lock); 300 uc->length = 0; 301 uc->max_length = 0; 302 mtx_unlock(&uc->uc_lock); 303 } 304 } 305 } 306 return (0); 307 } 308 309 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 310 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 311 sysctl_debug_umtx_chains_clear, "I", 312 "Clear umtx chains statistics"); 313 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 314 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 315 sysctl_debug_umtx_chains_peaks, "A", 316 "Highest peaks in chains max length"); 317 #endif 318 319 static void 320 umtxq_sysinit(void *arg __unused) 321 { 322 int i, j; 323 324 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 325 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 326 for (i = 0; i < 2; ++i) { 327 for (j = 0; j < UMTX_CHAINS; ++j) { 328 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 329 MTX_DEF | MTX_DUPOK); 330 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 331 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 332 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 333 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 334 umtxq_chains[i][j].uc_busy = 0; 335 umtxq_chains[i][j].uc_waiters = 0; 336 #ifdef UMTX_PROFILING 337 umtxq_chains[i][j].length = 0; 338 umtxq_chains[i][j].max_length = 0; 339 #endif 340 } 341 } 342 #ifdef UMTX_PROFILING 343 umtx_init_profiling(); 344 #endif 345 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 346 umtx_shm_init(); 347 } 348 349 struct umtx_q * 350 umtxq_alloc(void) 351 { 352 struct umtx_q *uq; 353 354 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 355 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 356 M_WAITOK | M_ZERO); 357 TAILQ_INIT(&uq->uq_spare_queue->head); 358 TAILQ_INIT(&uq->uq_pi_contested); 359 uq->uq_inherited_pri = PRI_MAX; 360 return (uq); 361 } 362 363 void 364 umtxq_free(struct umtx_q *uq) 365 { 366 367 MPASS(uq->uq_spare_queue != NULL); 368 free(uq->uq_spare_queue, M_UMTX); 369 free(uq, M_UMTX); 370 } 371 372 static inline void 373 umtxq_hash(struct umtx_key *key) 374 { 375 unsigned n; 376 377 n = (uintptr_t)key->info.both.a + key->info.both.b; 378 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 379 } 380 381 struct umtxq_chain * 382 umtxq_getchain(struct umtx_key *key) 383 { 384 385 if (key->type <= TYPE_SEM) 386 return (&umtxq_chains[1][key->hash]); 387 return (&umtxq_chains[0][key->hash]); 388 } 389 390 /* 391 * Set chain to busy state when following operation 392 * may be blocked (kernel mutex can not be used). 393 */ 394 void 395 umtxq_busy(struct umtx_key *key) 396 { 397 struct umtxq_chain *uc; 398 399 uc = umtxq_getchain(key); 400 mtx_assert(&uc->uc_lock, MA_OWNED); 401 if (uc->uc_busy) { 402 #ifdef SMP 403 if (smp_cpus > 1) { 404 int count = BUSY_SPINS; 405 if (count > 0) { 406 umtxq_unlock(key); 407 while (uc->uc_busy && --count > 0) 408 cpu_spinwait(); 409 umtxq_lock(key); 410 } 411 } 412 #endif 413 while (uc->uc_busy) { 414 uc->uc_waiters++; 415 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 416 uc->uc_waiters--; 417 } 418 } 419 uc->uc_busy = 1; 420 } 421 422 /* 423 * Unbusy a chain. 424 */ 425 void 426 umtxq_unbusy(struct umtx_key *key) 427 { 428 struct umtxq_chain *uc; 429 430 uc = umtxq_getchain(key); 431 mtx_assert(&uc->uc_lock, MA_OWNED); 432 KASSERT(uc->uc_busy != 0, ("not busy")); 433 uc->uc_busy = 0; 434 if (uc->uc_waiters) 435 wakeup_one(uc); 436 } 437 438 static inline void 439 umtxq_unbusy_unlocked(struct umtx_key *key) 440 { 441 442 umtxq_lock(key); 443 umtxq_unbusy(key); 444 umtxq_unlock(key); 445 } 446 447 static struct umtxq_queue * 448 umtxq_queue_lookup(struct umtx_key *key, int q) 449 { 450 struct umtxq_queue *uh; 451 struct umtxq_chain *uc; 452 453 uc = umtxq_getchain(key); 454 UMTXQ_LOCKED_ASSERT(uc); 455 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 456 if (umtx_key_match(&uh->key, key)) 457 return (uh); 458 } 459 460 return (NULL); 461 } 462 463 void 464 umtxq_insert_queue(struct umtx_q *uq, int q) 465 { 466 struct umtxq_queue *uh; 467 struct umtxq_chain *uc; 468 469 uc = umtxq_getchain(&uq->uq_key); 470 UMTXQ_LOCKED_ASSERT(uc); 471 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 472 uh = umtxq_queue_lookup(&uq->uq_key, q); 473 if (uh != NULL) { 474 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 475 } else { 476 uh = uq->uq_spare_queue; 477 uh->key = uq->uq_key; 478 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 479 #ifdef UMTX_PROFILING 480 uc->length++; 481 if (uc->length > uc->max_length) { 482 uc->max_length = uc->length; 483 if (uc->max_length > max_length) 484 max_length = uc->max_length; 485 } 486 #endif 487 } 488 uq->uq_spare_queue = NULL; 489 490 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 491 uh->length++; 492 uq->uq_flags |= UQF_UMTXQ; 493 uq->uq_cur_queue = uh; 494 return; 495 } 496 497 void 498 umtxq_remove_queue(struct umtx_q *uq, int q) 499 { 500 struct umtxq_chain *uc; 501 struct umtxq_queue *uh; 502 503 uc = umtxq_getchain(&uq->uq_key); 504 UMTXQ_LOCKED_ASSERT(uc); 505 if (uq->uq_flags & UQF_UMTXQ) { 506 uh = uq->uq_cur_queue; 507 TAILQ_REMOVE(&uh->head, uq, uq_link); 508 uh->length--; 509 uq->uq_flags &= ~UQF_UMTXQ; 510 if (TAILQ_EMPTY(&uh->head)) { 511 KASSERT(uh->length == 0, 512 ("inconsistent umtxq_queue length")); 513 #ifdef UMTX_PROFILING 514 uc->length--; 515 #endif 516 LIST_REMOVE(uh, link); 517 } else { 518 uh = LIST_FIRST(&uc->uc_spare_queue); 519 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 520 LIST_REMOVE(uh, link); 521 } 522 uq->uq_spare_queue = uh; 523 uq->uq_cur_queue = NULL; 524 } 525 } 526 527 /* 528 * Check if there are multiple waiters 529 */ 530 int 531 umtxq_count(struct umtx_key *key) 532 { 533 struct umtxq_queue *uh; 534 535 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 536 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 537 if (uh != NULL) 538 return (uh->length); 539 return (0); 540 } 541 542 /* 543 * Check if there are multiple PI waiters and returns first 544 * waiter. 545 */ 546 static int 547 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 548 { 549 struct umtxq_queue *uh; 550 551 *first = NULL; 552 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 553 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 554 if (uh != NULL) { 555 *first = TAILQ_FIRST(&uh->head); 556 return (uh->length); 557 } 558 return (0); 559 } 560 561 /* 562 * Wake up threads waiting on an userland object by a bit mask. 563 */ 564 int 565 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset) 566 { 567 struct umtxq_queue *uh; 568 struct umtx_q *uq, *uq_temp; 569 int ret; 570 571 ret = 0; 572 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 573 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 574 if (uh == NULL) 575 return (0); 576 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 577 if ((uq->uq_bitset & bitset) == 0) 578 continue; 579 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE); 580 wakeup_one(uq); 581 if (++ret >= n_wake) 582 break; 583 } 584 return (ret); 585 } 586 587 /* 588 * Wake up threads waiting on an userland object. 589 */ 590 591 static int 592 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 593 { 594 struct umtxq_queue *uh; 595 struct umtx_q *uq; 596 int ret; 597 598 ret = 0; 599 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 600 uh = umtxq_queue_lookup(key, q); 601 if (uh != NULL) { 602 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 603 umtxq_remove_queue(uq, q); 604 wakeup(uq); 605 if (++ret >= n_wake) 606 return (ret); 607 } 608 } 609 return (ret); 610 } 611 612 /* 613 * Wake up specified thread. 614 */ 615 static inline void 616 umtxq_signal_thread(struct umtx_q *uq) 617 { 618 619 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 620 umtxq_remove(uq); 621 wakeup(uq); 622 } 623 624 static inline int 625 tstohz(const struct timespec *tsp) 626 { 627 struct timeval tv; 628 629 TIMESPEC_TO_TIMEVAL(&tv, tsp); 630 return tvtohz(&tv); 631 } 632 633 void 634 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid, 635 int absolute, const struct timespec *timeout) 636 { 637 638 timo->clockid = clockid; 639 if (!absolute) { 640 timo->is_abs_real = false; 641 umtx_abs_timeout_update(timo); 642 timespecadd(&timo->cur, timeout, &timo->end); 643 } else { 644 timo->end = *timeout; 645 timo->is_abs_real = clockid == CLOCK_REALTIME || 646 clockid == CLOCK_REALTIME_FAST || 647 clockid == CLOCK_REALTIME_PRECISE; 648 /* 649 * If is_abs_real, umtxq_sleep will read the clock 650 * after setting td_rtcgen; otherwise, read it here. 651 */ 652 if (!timo->is_abs_real) { 653 umtx_abs_timeout_update(timo); 654 } 655 } 656 } 657 658 static void 659 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 660 const struct _umtx_time *umtxtime) 661 { 662 663 umtx_abs_timeout_init(timo, umtxtime->_clockid, 664 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 665 } 666 667 static void 668 umtx_abs_timeout_update(struct umtx_abs_timeout *timo) 669 { 670 671 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 672 } 673 674 static int 675 umtx_abs_timeout_gethz(struct umtx_abs_timeout *timo) 676 { 677 struct timespec tts; 678 679 if (timespeccmp(&timo->end, &timo->cur, <=)) 680 return (-1); 681 timespecsub(&timo->end, &timo->cur, &tts); 682 return (tstohz(&tts)); 683 } 684 685 static uint32_t 686 umtx_unlock_val(uint32_t flags, bool rb) 687 { 688 689 if (rb) 690 return (UMUTEX_RB_OWNERDEAD); 691 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 692 return (UMUTEX_RB_NOTRECOV); 693 else 694 return (UMUTEX_UNOWNED); 695 696 } 697 698 /* 699 * Put thread into sleep state, before sleeping, check if 700 * thread was removed from umtx queue. 701 */ 702 int 703 umtxq_sleep(struct umtx_q *uq, const char *wmesg, 704 struct umtx_abs_timeout *abstime) 705 { 706 struct umtxq_chain *uc; 707 int error, timo; 708 709 if (abstime != NULL && abstime->is_abs_real) { 710 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 711 umtx_abs_timeout_update(abstime); 712 } 713 714 uc = umtxq_getchain(&uq->uq_key); 715 UMTXQ_LOCKED_ASSERT(uc); 716 for (;;) { 717 if (!(uq->uq_flags & UQF_UMTXQ)) { 718 error = 0; 719 break; 720 } 721 if (abstime != NULL) { 722 timo = umtx_abs_timeout_gethz(abstime); 723 if (timo < 0) { 724 error = ETIMEDOUT; 725 break; 726 } 727 } else 728 timo = 0; 729 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 730 if (error == EINTR || error == ERESTART) { 731 umtxq_lock(&uq->uq_key); 732 break; 733 } 734 if (abstime != NULL) { 735 if (abstime->is_abs_real) 736 curthread->td_rtcgen = 737 atomic_load_acq_int(&rtc_generation); 738 umtx_abs_timeout_update(abstime); 739 } 740 umtxq_lock(&uq->uq_key); 741 } 742 743 curthread->td_rtcgen = 0; 744 return (error); 745 } 746 747 /* 748 * Convert userspace address into unique logical address. 749 */ 750 int 751 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 752 { 753 struct thread *td = curthread; 754 vm_map_t map; 755 vm_map_entry_t entry; 756 vm_pindex_t pindex; 757 vm_prot_t prot; 758 boolean_t wired; 759 760 key->type = type; 761 if (share == THREAD_SHARE) { 762 key->shared = 0; 763 key->info.private.vs = td->td_proc->p_vmspace; 764 key->info.private.addr = (uintptr_t)addr; 765 } else { 766 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 767 map = &td->td_proc->p_vmspace->vm_map; 768 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 769 &entry, &key->info.shared.object, &pindex, &prot, 770 &wired) != KERN_SUCCESS) { 771 return (EFAULT); 772 } 773 774 if ((share == PROCESS_SHARE) || 775 (share == AUTO_SHARE && 776 VM_INHERIT_SHARE == entry->inheritance)) { 777 key->shared = 1; 778 key->info.shared.offset = (vm_offset_t)addr - 779 entry->start + entry->offset; 780 vm_object_reference(key->info.shared.object); 781 } else { 782 key->shared = 0; 783 key->info.private.vs = td->td_proc->p_vmspace; 784 key->info.private.addr = (uintptr_t)addr; 785 } 786 vm_map_lookup_done(map, entry); 787 } 788 789 umtxq_hash(key); 790 return (0); 791 } 792 793 /* 794 * Release key. 795 */ 796 void 797 umtx_key_release(struct umtx_key *key) 798 { 799 if (key->shared) 800 vm_object_deallocate(key->info.shared.object); 801 } 802 803 #ifdef COMPAT_FREEBSD10 804 /* 805 * Lock a umtx object. 806 */ 807 static int 808 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 809 const struct timespec *timeout) 810 { 811 struct umtx_abs_timeout timo; 812 struct umtx_q *uq; 813 u_long owner; 814 u_long old; 815 int error = 0; 816 817 uq = td->td_umtxq; 818 if (timeout != NULL) 819 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 820 821 /* 822 * Care must be exercised when dealing with umtx structure. It 823 * can fault on any access. 824 */ 825 for (;;) { 826 /* 827 * Try the uncontested case. This should be done in userland. 828 */ 829 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 830 831 /* The acquire succeeded. */ 832 if (owner == UMTX_UNOWNED) 833 return (0); 834 835 /* The address was invalid. */ 836 if (owner == -1) 837 return (EFAULT); 838 839 /* If no one owns it but it is contested try to acquire it. */ 840 if (owner == UMTX_CONTESTED) { 841 owner = casuword(&umtx->u_owner, 842 UMTX_CONTESTED, id | UMTX_CONTESTED); 843 844 if (owner == UMTX_CONTESTED) 845 return (0); 846 847 /* The address was invalid. */ 848 if (owner == -1) 849 return (EFAULT); 850 851 error = thread_check_susp(td, false); 852 if (error != 0) 853 break; 854 855 /* If this failed the lock has changed, restart. */ 856 continue; 857 } 858 859 /* 860 * If we caught a signal, we have retried and now 861 * exit immediately. 862 */ 863 if (error != 0) 864 break; 865 866 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 867 AUTO_SHARE, &uq->uq_key)) != 0) 868 return (error); 869 870 umtxq_lock(&uq->uq_key); 871 umtxq_busy(&uq->uq_key); 872 umtxq_insert(uq); 873 umtxq_unbusy(&uq->uq_key); 874 umtxq_unlock(&uq->uq_key); 875 876 /* 877 * Set the contested bit so that a release in user space 878 * knows to use the system call for unlock. If this fails 879 * either some one else has acquired the lock or it has been 880 * released. 881 */ 882 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 883 884 /* The address was invalid. */ 885 if (old == -1) { 886 umtxq_lock(&uq->uq_key); 887 umtxq_remove(uq); 888 umtxq_unlock(&uq->uq_key); 889 umtx_key_release(&uq->uq_key); 890 return (EFAULT); 891 } 892 893 /* 894 * We set the contested bit, sleep. Otherwise the lock changed 895 * and we need to retry or we lost a race to the thread 896 * unlocking the umtx. 897 */ 898 umtxq_lock(&uq->uq_key); 899 if (old == owner) 900 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 901 &timo); 902 umtxq_remove(uq); 903 umtxq_unlock(&uq->uq_key); 904 umtx_key_release(&uq->uq_key); 905 906 if (error == 0) 907 error = thread_check_susp(td, false); 908 } 909 910 if (timeout == NULL) { 911 /* Mutex locking is restarted if it is interrupted. */ 912 if (error == EINTR) 913 error = ERESTART; 914 } else { 915 /* Timed-locking is not restarted. */ 916 if (error == ERESTART) 917 error = EINTR; 918 } 919 return (error); 920 } 921 922 /* 923 * Unlock a umtx object. 924 */ 925 static int 926 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 927 { 928 struct umtx_key key; 929 u_long owner; 930 u_long old; 931 int error; 932 int count; 933 934 /* 935 * Make sure we own this mtx. 936 */ 937 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 938 if (owner == -1) 939 return (EFAULT); 940 941 if ((owner & ~UMTX_CONTESTED) != id) 942 return (EPERM); 943 944 /* This should be done in userland */ 945 if ((owner & UMTX_CONTESTED) == 0) { 946 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 947 if (old == -1) 948 return (EFAULT); 949 if (old == owner) 950 return (0); 951 owner = old; 952 } 953 954 /* We should only ever be in here for contested locks */ 955 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 956 &key)) != 0) 957 return (error); 958 959 umtxq_lock(&key); 960 umtxq_busy(&key); 961 count = umtxq_count(&key); 962 umtxq_unlock(&key); 963 964 /* 965 * When unlocking the umtx, it must be marked as unowned if 966 * there is zero or one thread only waiting for it. 967 * Otherwise, it must be marked as contested. 968 */ 969 old = casuword(&umtx->u_owner, owner, 970 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 971 umtxq_lock(&key); 972 umtxq_signal(&key,1); 973 umtxq_unbusy(&key); 974 umtxq_unlock(&key); 975 umtx_key_release(&key); 976 if (old == -1) 977 return (EFAULT); 978 if (old != owner) 979 return (EINVAL); 980 return (0); 981 } 982 983 #ifdef COMPAT_FREEBSD32 984 985 /* 986 * Lock a umtx object. 987 */ 988 static int 989 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 990 const struct timespec *timeout) 991 { 992 struct umtx_abs_timeout timo; 993 struct umtx_q *uq; 994 uint32_t owner; 995 uint32_t old; 996 int error = 0; 997 998 uq = td->td_umtxq; 999 1000 if (timeout != NULL) 1001 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1002 1003 /* 1004 * Care must be exercised when dealing with umtx structure. It 1005 * can fault on any access. 1006 */ 1007 for (;;) { 1008 /* 1009 * Try the uncontested case. This should be done in userland. 1010 */ 1011 owner = casuword32(m, UMUTEX_UNOWNED, id); 1012 1013 /* The acquire succeeded. */ 1014 if (owner == UMUTEX_UNOWNED) 1015 return (0); 1016 1017 /* The address was invalid. */ 1018 if (owner == -1) 1019 return (EFAULT); 1020 1021 /* If no one owns it but it is contested try to acquire it. */ 1022 if (owner == UMUTEX_CONTESTED) { 1023 owner = casuword32(m, 1024 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1025 if (owner == UMUTEX_CONTESTED) 1026 return (0); 1027 1028 /* The address was invalid. */ 1029 if (owner == -1) 1030 return (EFAULT); 1031 1032 error = thread_check_susp(td, false); 1033 if (error != 0) 1034 break; 1035 1036 /* If this failed the lock has changed, restart. */ 1037 continue; 1038 } 1039 1040 /* 1041 * If we caught a signal, we have retried and now 1042 * exit immediately. 1043 */ 1044 if (error != 0) 1045 return (error); 1046 1047 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1048 AUTO_SHARE, &uq->uq_key)) != 0) 1049 return (error); 1050 1051 umtxq_lock(&uq->uq_key); 1052 umtxq_busy(&uq->uq_key); 1053 umtxq_insert(uq); 1054 umtxq_unbusy(&uq->uq_key); 1055 umtxq_unlock(&uq->uq_key); 1056 1057 /* 1058 * Set the contested bit so that a release in user space 1059 * knows to use the system call for unlock. If this fails 1060 * either some one else has acquired the lock or it has been 1061 * released. 1062 */ 1063 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1064 1065 /* The address was invalid. */ 1066 if (old == -1) { 1067 umtxq_lock(&uq->uq_key); 1068 umtxq_remove(uq); 1069 umtxq_unlock(&uq->uq_key); 1070 umtx_key_release(&uq->uq_key); 1071 return (EFAULT); 1072 } 1073 1074 /* 1075 * We set the contested bit, sleep. Otherwise the lock changed 1076 * and we need to retry or we lost a race to the thread 1077 * unlocking the umtx. 1078 */ 1079 umtxq_lock(&uq->uq_key); 1080 if (old == owner) 1081 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1082 NULL : &timo); 1083 umtxq_remove(uq); 1084 umtxq_unlock(&uq->uq_key); 1085 umtx_key_release(&uq->uq_key); 1086 1087 if (error == 0) 1088 error = thread_check_susp(td, false); 1089 } 1090 1091 if (timeout == NULL) { 1092 /* Mutex locking is restarted if it is interrupted. */ 1093 if (error == EINTR) 1094 error = ERESTART; 1095 } else { 1096 /* Timed-locking is not restarted. */ 1097 if (error == ERESTART) 1098 error = EINTR; 1099 } 1100 return (error); 1101 } 1102 1103 /* 1104 * Unlock a umtx object. 1105 */ 1106 static int 1107 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1108 { 1109 struct umtx_key key; 1110 uint32_t owner; 1111 uint32_t old; 1112 int error; 1113 int count; 1114 1115 /* 1116 * Make sure we own this mtx. 1117 */ 1118 owner = fuword32(m); 1119 if (owner == -1) 1120 return (EFAULT); 1121 1122 if ((owner & ~UMUTEX_CONTESTED) != id) 1123 return (EPERM); 1124 1125 /* This should be done in userland */ 1126 if ((owner & UMUTEX_CONTESTED) == 0) { 1127 old = casuword32(m, owner, UMUTEX_UNOWNED); 1128 if (old == -1) 1129 return (EFAULT); 1130 if (old == owner) 1131 return (0); 1132 owner = old; 1133 } 1134 1135 /* We should only ever be in here for contested locks */ 1136 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1137 &key)) != 0) 1138 return (error); 1139 1140 umtxq_lock(&key); 1141 umtxq_busy(&key); 1142 count = umtxq_count(&key); 1143 umtxq_unlock(&key); 1144 1145 /* 1146 * When unlocking the umtx, it must be marked as unowned if 1147 * there is zero or one thread only waiting for it. 1148 * Otherwise, it must be marked as contested. 1149 */ 1150 old = casuword32(m, owner, 1151 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1152 umtxq_lock(&key); 1153 umtxq_signal(&key,1); 1154 umtxq_unbusy(&key); 1155 umtxq_unlock(&key); 1156 umtx_key_release(&key); 1157 if (old == -1) 1158 return (EFAULT); 1159 if (old != owner) 1160 return (EINVAL); 1161 return (0); 1162 } 1163 #endif /* COMPAT_FREEBSD32 */ 1164 #endif /* COMPAT_FREEBSD10 */ 1165 1166 /* 1167 * Fetch and compare value, sleep on the address if value is not changed. 1168 */ 1169 static int 1170 do_wait(struct thread *td, void *addr, u_long id, 1171 struct _umtx_time *timeout, int compat32, int is_private) 1172 { 1173 struct umtx_abs_timeout timo; 1174 struct umtx_q *uq; 1175 u_long tmp; 1176 uint32_t tmp32; 1177 int error = 0; 1178 1179 uq = td->td_umtxq; 1180 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1181 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1182 return (error); 1183 1184 if (timeout != NULL) 1185 umtx_abs_timeout_init2(&timo, timeout); 1186 1187 umtxq_lock(&uq->uq_key); 1188 umtxq_insert(uq); 1189 umtxq_unlock(&uq->uq_key); 1190 if (compat32 == 0) { 1191 error = fueword(addr, &tmp); 1192 if (error != 0) 1193 error = EFAULT; 1194 } else { 1195 error = fueword32(addr, &tmp32); 1196 if (error == 0) 1197 tmp = tmp32; 1198 else 1199 error = EFAULT; 1200 } 1201 umtxq_lock(&uq->uq_key); 1202 if (error == 0) { 1203 if (tmp == id) 1204 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1205 NULL : &timo); 1206 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1207 error = 0; 1208 else 1209 umtxq_remove(uq); 1210 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1211 umtxq_remove(uq); 1212 } 1213 umtxq_unlock(&uq->uq_key); 1214 umtx_key_release(&uq->uq_key); 1215 if (error == ERESTART) 1216 error = EINTR; 1217 return (error); 1218 } 1219 1220 /* 1221 * Wake up threads sleeping on the specified address. 1222 */ 1223 int 1224 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1225 { 1226 struct umtx_key key; 1227 int ret; 1228 1229 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1230 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1231 return (ret); 1232 umtxq_lock(&key); 1233 umtxq_signal(&key, n_wake); 1234 umtxq_unlock(&key); 1235 umtx_key_release(&key); 1236 return (0); 1237 } 1238 1239 /* 1240 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1241 */ 1242 static int 1243 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1244 struct _umtx_time *timeout, int mode) 1245 { 1246 struct umtx_abs_timeout timo; 1247 struct umtx_q *uq; 1248 uint32_t owner, old, id; 1249 int error, rv; 1250 1251 id = td->td_tid; 1252 uq = td->td_umtxq; 1253 error = 0; 1254 if (timeout != NULL) 1255 umtx_abs_timeout_init2(&timo, timeout); 1256 1257 /* 1258 * Care must be exercised when dealing with umtx structure. It 1259 * can fault on any access. 1260 */ 1261 for (;;) { 1262 rv = fueword32(&m->m_owner, &owner); 1263 if (rv == -1) 1264 return (EFAULT); 1265 if (mode == _UMUTEX_WAIT) { 1266 if (owner == UMUTEX_UNOWNED || 1267 owner == UMUTEX_CONTESTED || 1268 owner == UMUTEX_RB_OWNERDEAD || 1269 owner == UMUTEX_RB_NOTRECOV) 1270 return (0); 1271 } else { 1272 /* 1273 * Robust mutex terminated. Kernel duty is to 1274 * return EOWNERDEAD to the userspace. The 1275 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1276 * by the common userspace code. 1277 */ 1278 if (owner == UMUTEX_RB_OWNERDEAD) { 1279 rv = casueword32(&m->m_owner, 1280 UMUTEX_RB_OWNERDEAD, &owner, 1281 id | UMUTEX_CONTESTED); 1282 if (rv == -1) 1283 return (EFAULT); 1284 if (rv == 0) { 1285 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1286 return (EOWNERDEAD); /* success */ 1287 } 1288 MPASS(rv == 1); 1289 rv = thread_check_susp(td, false); 1290 if (rv != 0) 1291 return (rv); 1292 continue; 1293 } 1294 if (owner == UMUTEX_RB_NOTRECOV) 1295 return (ENOTRECOVERABLE); 1296 1297 /* 1298 * Try the uncontested case. This should be 1299 * done in userland. 1300 */ 1301 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1302 &owner, id); 1303 /* The address was invalid. */ 1304 if (rv == -1) 1305 return (EFAULT); 1306 1307 /* The acquire succeeded. */ 1308 if (rv == 0) { 1309 MPASS(owner == UMUTEX_UNOWNED); 1310 return (0); 1311 } 1312 1313 /* 1314 * If no one owns it but it is contested try 1315 * to acquire it. 1316 */ 1317 MPASS(rv == 1); 1318 if (owner == UMUTEX_CONTESTED) { 1319 rv = casueword32(&m->m_owner, 1320 UMUTEX_CONTESTED, &owner, 1321 id | UMUTEX_CONTESTED); 1322 /* The address was invalid. */ 1323 if (rv == -1) 1324 return (EFAULT); 1325 if (rv == 0) { 1326 MPASS(owner == UMUTEX_CONTESTED); 1327 return (0); 1328 } 1329 if (rv == 1) { 1330 rv = thread_check_susp(td, false); 1331 if (rv != 0) 1332 return (rv); 1333 } 1334 1335 /* 1336 * If this failed the lock has 1337 * changed, restart. 1338 */ 1339 continue; 1340 } 1341 1342 /* rv == 1 but not contested, likely store failure */ 1343 rv = thread_check_susp(td, false); 1344 if (rv != 0) 1345 return (rv); 1346 } 1347 1348 if (mode == _UMUTEX_TRY) 1349 return (EBUSY); 1350 1351 /* 1352 * If we caught a signal, we have retried and now 1353 * exit immediately. 1354 */ 1355 if (error != 0) 1356 return (error); 1357 1358 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1359 GET_SHARE(flags), &uq->uq_key)) != 0) 1360 return (error); 1361 1362 umtxq_lock(&uq->uq_key); 1363 umtxq_busy(&uq->uq_key); 1364 umtxq_insert(uq); 1365 umtxq_unlock(&uq->uq_key); 1366 1367 /* 1368 * Set the contested bit so that a release in user space 1369 * knows to use the system call for unlock. If this fails 1370 * either some one else has acquired the lock or it has been 1371 * released. 1372 */ 1373 rv = casueword32(&m->m_owner, owner, &old, 1374 owner | UMUTEX_CONTESTED); 1375 1376 /* The address was invalid or casueword failed to store. */ 1377 if (rv == -1 || rv == 1) { 1378 umtxq_lock(&uq->uq_key); 1379 umtxq_remove(uq); 1380 umtxq_unbusy(&uq->uq_key); 1381 umtxq_unlock(&uq->uq_key); 1382 umtx_key_release(&uq->uq_key); 1383 if (rv == -1) 1384 return (EFAULT); 1385 if (rv == 1) { 1386 rv = thread_check_susp(td, false); 1387 if (rv != 0) 1388 return (rv); 1389 } 1390 continue; 1391 } 1392 1393 /* 1394 * We set the contested bit, sleep. Otherwise the lock changed 1395 * and we need to retry or we lost a race to the thread 1396 * unlocking the umtx. 1397 */ 1398 umtxq_lock(&uq->uq_key); 1399 umtxq_unbusy(&uq->uq_key); 1400 MPASS(old == owner); 1401 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1402 NULL : &timo); 1403 umtxq_remove(uq); 1404 umtxq_unlock(&uq->uq_key); 1405 umtx_key_release(&uq->uq_key); 1406 1407 if (error == 0) 1408 error = thread_check_susp(td, false); 1409 } 1410 1411 return (0); 1412 } 1413 1414 /* 1415 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1416 */ 1417 static int 1418 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1419 { 1420 struct umtx_key key; 1421 uint32_t owner, old, id, newlock; 1422 int error, count; 1423 1424 id = td->td_tid; 1425 1426 again: 1427 /* 1428 * Make sure we own this mtx. 1429 */ 1430 error = fueword32(&m->m_owner, &owner); 1431 if (error == -1) 1432 return (EFAULT); 1433 1434 if ((owner & ~UMUTEX_CONTESTED) != id) 1435 return (EPERM); 1436 1437 newlock = umtx_unlock_val(flags, rb); 1438 if ((owner & UMUTEX_CONTESTED) == 0) { 1439 error = casueword32(&m->m_owner, owner, &old, newlock); 1440 if (error == -1) 1441 return (EFAULT); 1442 if (error == 1) { 1443 error = thread_check_susp(td, false); 1444 if (error != 0) 1445 return (error); 1446 goto again; 1447 } 1448 MPASS(old == owner); 1449 return (0); 1450 } 1451 1452 /* We should only ever be in here for contested locks */ 1453 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1454 &key)) != 0) 1455 return (error); 1456 1457 umtxq_lock(&key); 1458 umtxq_busy(&key); 1459 count = umtxq_count(&key); 1460 umtxq_unlock(&key); 1461 1462 /* 1463 * When unlocking the umtx, it must be marked as unowned if 1464 * there is zero or one thread only waiting for it. 1465 * Otherwise, it must be marked as contested. 1466 */ 1467 if (count > 1) 1468 newlock |= UMUTEX_CONTESTED; 1469 error = casueword32(&m->m_owner, owner, &old, newlock); 1470 umtxq_lock(&key); 1471 umtxq_signal(&key, 1); 1472 umtxq_unbusy(&key); 1473 umtxq_unlock(&key); 1474 umtx_key_release(&key); 1475 if (error == -1) 1476 return (EFAULT); 1477 if (error == 1) { 1478 if (old != owner) 1479 return (EINVAL); 1480 error = thread_check_susp(td, false); 1481 if (error != 0) 1482 return (error); 1483 goto again; 1484 } 1485 return (0); 1486 } 1487 1488 /* 1489 * Check if the mutex is available and wake up a waiter, 1490 * only for simple mutex. 1491 */ 1492 static int 1493 do_wake_umutex(struct thread *td, struct umutex *m) 1494 { 1495 struct umtx_key key; 1496 uint32_t owner; 1497 uint32_t flags; 1498 int error; 1499 int count; 1500 1501 again: 1502 error = fueword32(&m->m_owner, &owner); 1503 if (error == -1) 1504 return (EFAULT); 1505 1506 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1507 owner != UMUTEX_RB_NOTRECOV) 1508 return (0); 1509 1510 error = fueword32(&m->m_flags, &flags); 1511 if (error == -1) 1512 return (EFAULT); 1513 1514 /* We should only ever be in here for contested locks */ 1515 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1516 &key)) != 0) 1517 return (error); 1518 1519 umtxq_lock(&key); 1520 umtxq_busy(&key); 1521 count = umtxq_count(&key); 1522 umtxq_unlock(&key); 1523 1524 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1525 owner != UMUTEX_RB_NOTRECOV) { 1526 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1527 UMUTEX_UNOWNED); 1528 if (error == -1) { 1529 error = EFAULT; 1530 } else if (error == 1) { 1531 umtxq_lock(&key); 1532 umtxq_unbusy(&key); 1533 umtxq_unlock(&key); 1534 umtx_key_release(&key); 1535 error = thread_check_susp(td, false); 1536 if (error != 0) 1537 return (error); 1538 goto again; 1539 } 1540 } 1541 1542 umtxq_lock(&key); 1543 if (error == 0 && count != 0) { 1544 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1545 owner == UMUTEX_RB_OWNERDEAD || 1546 owner == UMUTEX_RB_NOTRECOV); 1547 umtxq_signal(&key, 1); 1548 } 1549 umtxq_unbusy(&key); 1550 umtxq_unlock(&key); 1551 umtx_key_release(&key); 1552 return (error); 1553 } 1554 1555 /* 1556 * Check if the mutex has waiters and tries to fix contention bit. 1557 */ 1558 static int 1559 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1560 { 1561 struct umtx_key key; 1562 uint32_t owner, old; 1563 int type; 1564 int error; 1565 int count; 1566 1567 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1568 UMUTEX_ROBUST)) { 1569 case 0: 1570 case UMUTEX_ROBUST: 1571 type = TYPE_NORMAL_UMUTEX; 1572 break; 1573 case UMUTEX_PRIO_INHERIT: 1574 type = TYPE_PI_UMUTEX; 1575 break; 1576 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1577 type = TYPE_PI_ROBUST_UMUTEX; 1578 break; 1579 case UMUTEX_PRIO_PROTECT: 1580 type = TYPE_PP_UMUTEX; 1581 break; 1582 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1583 type = TYPE_PP_ROBUST_UMUTEX; 1584 break; 1585 default: 1586 return (EINVAL); 1587 } 1588 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1589 return (error); 1590 1591 owner = 0; 1592 umtxq_lock(&key); 1593 umtxq_busy(&key); 1594 count = umtxq_count(&key); 1595 umtxq_unlock(&key); 1596 1597 error = fueword32(&m->m_owner, &owner); 1598 if (error == -1) 1599 error = EFAULT; 1600 1601 /* 1602 * Only repair contention bit if there is a waiter, this means 1603 * the mutex is still being referenced by userland code, 1604 * otherwise don't update any memory. 1605 */ 1606 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1607 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1608 error = casueword32(&m->m_owner, owner, &old, 1609 owner | UMUTEX_CONTESTED); 1610 if (error == -1) { 1611 error = EFAULT; 1612 break; 1613 } 1614 if (error == 0) { 1615 MPASS(old == owner); 1616 break; 1617 } 1618 owner = old; 1619 error = thread_check_susp(td, false); 1620 } 1621 1622 umtxq_lock(&key); 1623 if (error == EFAULT) { 1624 umtxq_signal(&key, INT_MAX); 1625 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1626 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1627 umtxq_signal(&key, 1); 1628 umtxq_unbusy(&key); 1629 umtxq_unlock(&key); 1630 umtx_key_release(&key); 1631 return (error); 1632 } 1633 1634 static inline struct umtx_pi * 1635 umtx_pi_alloc(int flags) 1636 { 1637 struct umtx_pi *pi; 1638 1639 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1640 TAILQ_INIT(&pi->pi_blocked); 1641 atomic_add_int(&umtx_pi_allocated, 1); 1642 return (pi); 1643 } 1644 1645 static inline void 1646 umtx_pi_free(struct umtx_pi *pi) 1647 { 1648 uma_zfree(umtx_pi_zone, pi); 1649 atomic_add_int(&umtx_pi_allocated, -1); 1650 } 1651 1652 /* 1653 * Adjust the thread's position on a pi_state after its priority has been 1654 * changed. 1655 */ 1656 static int 1657 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1658 { 1659 struct umtx_q *uq, *uq1, *uq2; 1660 struct thread *td1; 1661 1662 mtx_assert(&umtx_lock, MA_OWNED); 1663 if (pi == NULL) 1664 return (0); 1665 1666 uq = td->td_umtxq; 1667 1668 /* 1669 * Check if the thread needs to be moved on the blocked chain. 1670 * It needs to be moved if either its priority is lower than 1671 * the previous thread or higher than the next thread. 1672 */ 1673 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1674 uq2 = TAILQ_NEXT(uq, uq_lockq); 1675 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1676 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1677 /* 1678 * Remove thread from blocked chain and determine where 1679 * it should be moved to. 1680 */ 1681 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1682 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1683 td1 = uq1->uq_thread; 1684 MPASS(td1->td_proc->p_magic == P_MAGIC); 1685 if (UPRI(td1) > UPRI(td)) 1686 break; 1687 } 1688 1689 if (uq1 == NULL) 1690 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1691 else 1692 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1693 } 1694 return (1); 1695 } 1696 1697 static struct umtx_pi * 1698 umtx_pi_next(struct umtx_pi *pi) 1699 { 1700 struct umtx_q *uq_owner; 1701 1702 if (pi->pi_owner == NULL) 1703 return (NULL); 1704 uq_owner = pi->pi_owner->td_umtxq; 1705 if (uq_owner == NULL) 1706 return (NULL); 1707 return (uq_owner->uq_pi_blocked); 1708 } 1709 1710 /* 1711 * Floyd's Cycle-Finding Algorithm. 1712 */ 1713 static bool 1714 umtx_pi_check_loop(struct umtx_pi *pi) 1715 { 1716 struct umtx_pi *pi1; /* fast iterator */ 1717 1718 mtx_assert(&umtx_lock, MA_OWNED); 1719 if (pi == NULL) 1720 return (false); 1721 pi1 = pi; 1722 for (;;) { 1723 pi = umtx_pi_next(pi); 1724 if (pi == NULL) 1725 break; 1726 pi1 = umtx_pi_next(pi1); 1727 if (pi1 == NULL) 1728 break; 1729 pi1 = umtx_pi_next(pi1); 1730 if (pi1 == NULL) 1731 break; 1732 if (pi == pi1) 1733 return (true); 1734 } 1735 return (false); 1736 } 1737 1738 /* 1739 * Propagate priority when a thread is blocked on POSIX 1740 * PI mutex. 1741 */ 1742 static void 1743 umtx_propagate_priority(struct thread *td) 1744 { 1745 struct umtx_q *uq; 1746 struct umtx_pi *pi; 1747 int pri; 1748 1749 mtx_assert(&umtx_lock, MA_OWNED); 1750 pri = UPRI(td); 1751 uq = td->td_umtxq; 1752 pi = uq->uq_pi_blocked; 1753 if (pi == NULL) 1754 return; 1755 if (umtx_pi_check_loop(pi)) 1756 return; 1757 1758 for (;;) { 1759 td = pi->pi_owner; 1760 if (td == NULL || td == curthread) 1761 return; 1762 1763 MPASS(td->td_proc != NULL); 1764 MPASS(td->td_proc->p_magic == P_MAGIC); 1765 1766 thread_lock(td); 1767 if (td->td_lend_user_pri > pri) 1768 sched_lend_user_prio(td, pri); 1769 else { 1770 thread_unlock(td); 1771 break; 1772 } 1773 thread_unlock(td); 1774 1775 /* 1776 * Pick up the lock that td is blocked on. 1777 */ 1778 uq = td->td_umtxq; 1779 pi = uq->uq_pi_blocked; 1780 if (pi == NULL) 1781 break; 1782 /* Resort td on the list if needed. */ 1783 umtx_pi_adjust_thread(pi, td); 1784 } 1785 } 1786 1787 /* 1788 * Unpropagate priority for a PI mutex when a thread blocked on 1789 * it is interrupted by signal or resumed by others. 1790 */ 1791 static void 1792 umtx_repropagate_priority(struct umtx_pi *pi) 1793 { 1794 struct umtx_q *uq, *uq_owner; 1795 struct umtx_pi *pi2; 1796 int pri; 1797 1798 mtx_assert(&umtx_lock, MA_OWNED); 1799 1800 if (umtx_pi_check_loop(pi)) 1801 return; 1802 while (pi != NULL && pi->pi_owner != NULL) { 1803 pri = PRI_MAX; 1804 uq_owner = pi->pi_owner->td_umtxq; 1805 1806 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1807 uq = TAILQ_FIRST(&pi2->pi_blocked); 1808 if (uq != NULL) { 1809 if (pri > UPRI(uq->uq_thread)) 1810 pri = UPRI(uq->uq_thread); 1811 } 1812 } 1813 1814 if (pri > uq_owner->uq_inherited_pri) 1815 pri = uq_owner->uq_inherited_pri; 1816 thread_lock(pi->pi_owner); 1817 sched_lend_user_prio(pi->pi_owner, pri); 1818 thread_unlock(pi->pi_owner); 1819 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1820 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1821 } 1822 } 1823 1824 /* 1825 * Insert a PI mutex into owned list. 1826 */ 1827 static void 1828 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1829 { 1830 struct umtx_q *uq_owner; 1831 1832 uq_owner = owner->td_umtxq; 1833 mtx_assert(&umtx_lock, MA_OWNED); 1834 MPASS(pi->pi_owner == NULL); 1835 pi->pi_owner = owner; 1836 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1837 } 1838 1839 /* 1840 * Disown a PI mutex, and remove it from the owned list. 1841 */ 1842 static void 1843 umtx_pi_disown(struct umtx_pi *pi) 1844 { 1845 1846 mtx_assert(&umtx_lock, MA_OWNED); 1847 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1848 pi->pi_owner = NULL; 1849 } 1850 1851 /* 1852 * Claim ownership of a PI mutex. 1853 */ 1854 static int 1855 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1856 { 1857 struct umtx_q *uq; 1858 int pri; 1859 1860 mtx_lock(&umtx_lock); 1861 if (pi->pi_owner == owner) { 1862 mtx_unlock(&umtx_lock); 1863 return (0); 1864 } 1865 1866 if (pi->pi_owner != NULL) { 1867 /* 1868 * userland may have already messed the mutex, sigh. 1869 */ 1870 mtx_unlock(&umtx_lock); 1871 return (EPERM); 1872 } 1873 umtx_pi_setowner(pi, owner); 1874 uq = TAILQ_FIRST(&pi->pi_blocked); 1875 if (uq != NULL) { 1876 pri = UPRI(uq->uq_thread); 1877 thread_lock(owner); 1878 if (pri < UPRI(owner)) 1879 sched_lend_user_prio(owner, pri); 1880 thread_unlock(owner); 1881 } 1882 mtx_unlock(&umtx_lock); 1883 return (0); 1884 } 1885 1886 /* 1887 * Adjust a thread's order position in its blocked PI mutex, 1888 * this may result new priority propagating process. 1889 */ 1890 void 1891 umtx_pi_adjust(struct thread *td, u_char oldpri) 1892 { 1893 struct umtx_q *uq; 1894 struct umtx_pi *pi; 1895 1896 uq = td->td_umtxq; 1897 mtx_lock(&umtx_lock); 1898 /* 1899 * Pick up the lock that td is blocked on. 1900 */ 1901 pi = uq->uq_pi_blocked; 1902 if (pi != NULL) { 1903 umtx_pi_adjust_thread(pi, td); 1904 umtx_repropagate_priority(pi); 1905 } 1906 mtx_unlock(&umtx_lock); 1907 } 1908 1909 /* 1910 * Sleep on a PI mutex. 1911 */ 1912 static int 1913 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1914 const char *wmesg, struct umtx_abs_timeout *timo, bool shared) 1915 { 1916 struct thread *td, *td1; 1917 struct umtx_q *uq1; 1918 int error, pri; 1919 #ifdef INVARIANTS 1920 struct umtxq_chain *uc; 1921 1922 uc = umtxq_getchain(&pi->pi_key); 1923 #endif 1924 error = 0; 1925 td = uq->uq_thread; 1926 KASSERT(td == curthread, ("inconsistent uq_thread")); 1927 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 1928 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1929 umtxq_insert(uq); 1930 mtx_lock(&umtx_lock); 1931 if (pi->pi_owner == NULL) { 1932 mtx_unlock(&umtx_lock); 1933 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1934 mtx_lock(&umtx_lock); 1935 if (td1 != NULL) { 1936 if (pi->pi_owner == NULL) 1937 umtx_pi_setowner(pi, td1); 1938 PROC_UNLOCK(td1->td_proc); 1939 } 1940 } 1941 1942 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1943 pri = UPRI(uq1->uq_thread); 1944 if (pri > UPRI(td)) 1945 break; 1946 } 1947 1948 if (uq1 != NULL) 1949 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1950 else 1951 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1952 1953 uq->uq_pi_blocked = pi; 1954 thread_lock(td); 1955 td->td_flags |= TDF_UPIBLOCKED; 1956 thread_unlock(td); 1957 umtx_propagate_priority(td); 1958 mtx_unlock(&umtx_lock); 1959 umtxq_unbusy(&uq->uq_key); 1960 1961 error = umtxq_sleep(uq, wmesg, timo); 1962 umtxq_remove(uq); 1963 1964 mtx_lock(&umtx_lock); 1965 uq->uq_pi_blocked = NULL; 1966 thread_lock(td); 1967 td->td_flags &= ~TDF_UPIBLOCKED; 1968 thread_unlock(td); 1969 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1970 umtx_repropagate_priority(pi); 1971 mtx_unlock(&umtx_lock); 1972 umtxq_unlock(&uq->uq_key); 1973 1974 return (error); 1975 } 1976 1977 /* 1978 * Add reference count for a PI mutex. 1979 */ 1980 static void 1981 umtx_pi_ref(struct umtx_pi *pi) 1982 { 1983 1984 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 1985 pi->pi_refcount++; 1986 } 1987 1988 /* 1989 * Decrease reference count for a PI mutex, if the counter 1990 * is decreased to zero, its memory space is freed. 1991 */ 1992 static void 1993 umtx_pi_unref(struct umtx_pi *pi) 1994 { 1995 struct umtxq_chain *uc; 1996 1997 uc = umtxq_getchain(&pi->pi_key); 1998 UMTXQ_LOCKED_ASSERT(uc); 1999 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 2000 if (--pi->pi_refcount == 0) { 2001 mtx_lock(&umtx_lock); 2002 if (pi->pi_owner != NULL) 2003 umtx_pi_disown(pi); 2004 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 2005 ("blocked queue not empty")); 2006 mtx_unlock(&umtx_lock); 2007 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 2008 umtx_pi_free(pi); 2009 } 2010 } 2011 2012 /* 2013 * Find a PI mutex in hash table. 2014 */ 2015 static struct umtx_pi * 2016 umtx_pi_lookup(struct umtx_key *key) 2017 { 2018 struct umtxq_chain *uc; 2019 struct umtx_pi *pi; 2020 2021 uc = umtxq_getchain(key); 2022 UMTXQ_LOCKED_ASSERT(uc); 2023 2024 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 2025 if (umtx_key_match(&pi->pi_key, key)) { 2026 return (pi); 2027 } 2028 } 2029 return (NULL); 2030 } 2031 2032 /* 2033 * Insert a PI mutex into hash table. 2034 */ 2035 static inline void 2036 umtx_pi_insert(struct umtx_pi *pi) 2037 { 2038 struct umtxq_chain *uc; 2039 2040 uc = umtxq_getchain(&pi->pi_key); 2041 UMTXQ_LOCKED_ASSERT(uc); 2042 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2043 } 2044 2045 /* 2046 * Lock a PI mutex. 2047 */ 2048 static int 2049 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2050 struct _umtx_time *timeout, int try) 2051 { 2052 struct umtx_abs_timeout timo; 2053 struct umtx_q *uq; 2054 struct umtx_pi *pi, *new_pi; 2055 uint32_t id, old_owner, owner, old; 2056 int error, rv; 2057 2058 id = td->td_tid; 2059 uq = td->td_umtxq; 2060 2061 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2062 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2063 &uq->uq_key)) != 0) 2064 return (error); 2065 2066 if (timeout != NULL) 2067 umtx_abs_timeout_init2(&timo, timeout); 2068 2069 umtxq_lock(&uq->uq_key); 2070 pi = umtx_pi_lookup(&uq->uq_key); 2071 if (pi == NULL) { 2072 new_pi = umtx_pi_alloc(M_NOWAIT); 2073 if (new_pi == NULL) { 2074 umtxq_unlock(&uq->uq_key); 2075 new_pi = umtx_pi_alloc(M_WAITOK); 2076 umtxq_lock(&uq->uq_key); 2077 pi = umtx_pi_lookup(&uq->uq_key); 2078 if (pi != NULL) { 2079 umtx_pi_free(new_pi); 2080 new_pi = NULL; 2081 } 2082 } 2083 if (new_pi != NULL) { 2084 new_pi->pi_key = uq->uq_key; 2085 umtx_pi_insert(new_pi); 2086 pi = new_pi; 2087 } 2088 } 2089 umtx_pi_ref(pi); 2090 umtxq_unlock(&uq->uq_key); 2091 2092 /* 2093 * Care must be exercised when dealing with umtx structure. It 2094 * can fault on any access. 2095 */ 2096 for (;;) { 2097 /* 2098 * Try the uncontested case. This should be done in userland. 2099 */ 2100 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2101 /* The address was invalid. */ 2102 if (rv == -1) { 2103 error = EFAULT; 2104 break; 2105 } 2106 /* The acquire succeeded. */ 2107 if (rv == 0) { 2108 MPASS(owner == UMUTEX_UNOWNED); 2109 error = 0; 2110 break; 2111 } 2112 2113 if (owner == UMUTEX_RB_NOTRECOV) { 2114 error = ENOTRECOVERABLE; 2115 break; 2116 } 2117 2118 /* 2119 * Avoid overwriting a possible error from sleep due 2120 * to the pending signal with suspension check result. 2121 */ 2122 if (error == 0) { 2123 error = thread_check_susp(td, true); 2124 if (error != 0) 2125 break; 2126 } 2127 2128 /* If no one owns it but it is contested try to acquire it. */ 2129 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 2130 old_owner = owner; 2131 rv = casueword32(&m->m_owner, owner, &owner, 2132 id | UMUTEX_CONTESTED); 2133 /* The address was invalid. */ 2134 if (rv == -1) { 2135 error = EFAULT; 2136 break; 2137 } 2138 if (rv == 1) { 2139 if (error == 0) { 2140 error = thread_check_susp(td, true); 2141 if (error != 0) 2142 break; 2143 } 2144 2145 /* 2146 * If this failed the lock could 2147 * changed, restart. 2148 */ 2149 continue; 2150 } 2151 2152 MPASS(rv == 0); 2153 MPASS(owner == old_owner); 2154 umtxq_lock(&uq->uq_key); 2155 umtxq_busy(&uq->uq_key); 2156 error = umtx_pi_claim(pi, td); 2157 umtxq_unbusy(&uq->uq_key); 2158 umtxq_unlock(&uq->uq_key); 2159 if (error != 0) { 2160 /* 2161 * Since we're going to return an 2162 * error, restore the m_owner to its 2163 * previous, unowned state to avoid 2164 * compounding the problem. 2165 */ 2166 (void)casuword32(&m->m_owner, 2167 id | UMUTEX_CONTESTED, old_owner); 2168 } 2169 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 2170 error = EOWNERDEAD; 2171 break; 2172 } 2173 2174 if ((owner & ~UMUTEX_CONTESTED) == id) { 2175 error = EDEADLK; 2176 break; 2177 } 2178 2179 if (try != 0) { 2180 error = EBUSY; 2181 break; 2182 } 2183 2184 /* 2185 * If we caught a signal, we have retried and now 2186 * exit immediately. 2187 */ 2188 if (error != 0) 2189 break; 2190 2191 umtxq_lock(&uq->uq_key); 2192 umtxq_busy(&uq->uq_key); 2193 umtxq_unlock(&uq->uq_key); 2194 2195 /* 2196 * Set the contested bit so that a release in user space 2197 * knows to use the system call for unlock. If this fails 2198 * either some one else has acquired the lock or it has been 2199 * released. 2200 */ 2201 rv = casueword32(&m->m_owner, owner, &old, owner | 2202 UMUTEX_CONTESTED); 2203 2204 /* The address was invalid. */ 2205 if (rv == -1) { 2206 umtxq_unbusy_unlocked(&uq->uq_key); 2207 error = EFAULT; 2208 break; 2209 } 2210 if (rv == 1) { 2211 umtxq_unbusy_unlocked(&uq->uq_key); 2212 error = thread_check_susp(td, true); 2213 if (error != 0) 2214 break; 2215 2216 /* 2217 * The lock changed and we need to retry or we 2218 * lost a race to the thread unlocking the 2219 * umtx. Note that the UMUTEX_RB_OWNERDEAD 2220 * value for owner is impossible there. 2221 */ 2222 continue; 2223 } 2224 2225 umtxq_lock(&uq->uq_key); 2226 2227 /* We set the contested bit, sleep. */ 2228 MPASS(old == owner); 2229 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2230 "umtxpi", timeout == NULL ? NULL : &timo, 2231 (flags & USYNC_PROCESS_SHARED) != 0); 2232 if (error != 0) 2233 continue; 2234 2235 error = thread_check_susp(td, false); 2236 if (error != 0) 2237 break; 2238 } 2239 2240 umtxq_lock(&uq->uq_key); 2241 umtx_pi_unref(pi); 2242 umtxq_unlock(&uq->uq_key); 2243 2244 umtx_key_release(&uq->uq_key); 2245 return (error); 2246 } 2247 2248 /* 2249 * Unlock a PI mutex. 2250 */ 2251 static int 2252 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2253 { 2254 struct umtx_key key; 2255 struct umtx_q *uq_first, *uq_first2, *uq_me; 2256 struct umtx_pi *pi, *pi2; 2257 uint32_t id, new_owner, old, owner; 2258 int count, error, pri; 2259 2260 id = td->td_tid; 2261 2262 usrloop: 2263 /* 2264 * Make sure we own this mtx. 2265 */ 2266 error = fueword32(&m->m_owner, &owner); 2267 if (error == -1) 2268 return (EFAULT); 2269 2270 if ((owner & ~UMUTEX_CONTESTED) != id) 2271 return (EPERM); 2272 2273 new_owner = umtx_unlock_val(flags, rb); 2274 2275 /* This should be done in userland */ 2276 if ((owner & UMUTEX_CONTESTED) == 0) { 2277 error = casueword32(&m->m_owner, owner, &old, new_owner); 2278 if (error == -1) 2279 return (EFAULT); 2280 if (error == 1) { 2281 error = thread_check_susp(td, true); 2282 if (error != 0) 2283 return (error); 2284 goto usrloop; 2285 } 2286 if (old == owner) 2287 return (0); 2288 owner = old; 2289 } 2290 2291 /* We should only ever be in here for contested locks */ 2292 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2293 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2294 &key)) != 0) 2295 return (error); 2296 2297 umtxq_lock(&key); 2298 umtxq_busy(&key); 2299 count = umtxq_count_pi(&key, &uq_first); 2300 if (uq_first != NULL) { 2301 mtx_lock(&umtx_lock); 2302 pi = uq_first->uq_pi_blocked; 2303 KASSERT(pi != NULL, ("pi == NULL?")); 2304 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2305 mtx_unlock(&umtx_lock); 2306 umtxq_unbusy(&key); 2307 umtxq_unlock(&key); 2308 umtx_key_release(&key); 2309 /* userland messed the mutex */ 2310 return (EPERM); 2311 } 2312 uq_me = td->td_umtxq; 2313 if (pi->pi_owner == td) 2314 umtx_pi_disown(pi); 2315 /* get highest priority thread which is still sleeping. */ 2316 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2317 while (uq_first != NULL && 2318 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2319 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2320 } 2321 pri = PRI_MAX; 2322 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2323 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2324 if (uq_first2 != NULL) { 2325 if (pri > UPRI(uq_first2->uq_thread)) 2326 pri = UPRI(uq_first2->uq_thread); 2327 } 2328 } 2329 thread_lock(td); 2330 sched_lend_user_prio(td, pri); 2331 thread_unlock(td); 2332 mtx_unlock(&umtx_lock); 2333 if (uq_first) 2334 umtxq_signal_thread(uq_first); 2335 } else { 2336 pi = umtx_pi_lookup(&key); 2337 /* 2338 * A umtx_pi can exist if a signal or timeout removed the 2339 * last waiter from the umtxq, but there is still 2340 * a thread in do_lock_pi() holding the umtx_pi. 2341 */ 2342 if (pi != NULL) { 2343 /* 2344 * The umtx_pi can be unowned, such as when a thread 2345 * has just entered do_lock_pi(), allocated the 2346 * umtx_pi, and unlocked the umtxq. 2347 * If the current thread owns it, it must disown it. 2348 */ 2349 mtx_lock(&umtx_lock); 2350 if (pi->pi_owner == td) 2351 umtx_pi_disown(pi); 2352 mtx_unlock(&umtx_lock); 2353 } 2354 } 2355 umtxq_unlock(&key); 2356 2357 /* 2358 * When unlocking the umtx, it must be marked as unowned if 2359 * there is zero or one thread only waiting for it. 2360 * Otherwise, it must be marked as contested. 2361 */ 2362 2363 if (count > 1) 2364 new_owner |= UMUTEX_CONTESTED; 2365 again: 2366 error = casueword32(&m->m_owner, owner, &old, new_owner); 2367 if (error == 1) { 2368 error = thread_check_susp(td, false); 2369 if (error == 0) 2370 goto again; 2371 } 2372 umtxq_unbusy_unlocked(&key); 2373 umtx_key_release(&key); 2374 if (error == -1) 2375 return (EFAULT); 2376 if (error == 0 && old != owner) 2377 return (EINVAL); 2378 return (error); 2379 } 2380 2381 /* 2382 * Lock a PP mutex. 2383 */ 2384 static int 2385 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2386 struct _umtx_time *timeout, int try) 2387 { 2388 struct umtx_abs_timeout timo; 2389 struct umtx_q *uq, *uq2; 2390 struct umtx_pi *pi; 2391 uint32_t ceiling; 2392 uint32_t owner, id; 2393 int error, pri, old_inherited_pri, su, rv; 2394 2395 id = td->td_tid; 2396 uq = td->td_umtxq; 2397 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2398 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2399 &uq->uq_key)) != 0) 2400 return (error); 2401 2402 if (timeout != NULL) 2403 umtx_abs_timeout_init2(&timo, timeout); 2404 2405 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2406 for (;;) { 2407 old_inherited_pri = uq->uq_inherited_pri; 2408 umtxq_lock(&uq->uq_key); 2409 umtxq_busy(&uq->uq_key); 2410 umtxq_unlock(&uq->uq_key); 2411 2412 rv = fueword32(&m->m_ceilings[0], &ceiling); 2413 if (rv == -1) { 2414 error = EFAULT; 2415 goto out; 2416 } 2417 ceiling = RTP_PRIO_MAX - ceiling; 2418 if (ceiling > RTP_PRIO_MAX) { 2419 error = EINVAL; 2420 goto out; 2421 } 2422 2423 mtx_lock(&umtx_lock); 2424 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2425 mtx_unlock(&umtx_lock); 2426 error = EINVAL; 2427 goto out; 2428 } 2429 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2430 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2431 thread_lock(td); 2432 if (uq->uq_inherited_pri < UPRI(td)) 2433 sched_lend_user_prio(td, uq->uq_inherited_pri); 2434 thread_unlock(td); 2435 } 2436 mtx_unlock(&umtx_lock); 2437 2438 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2439 id | UMUTEX_CONTESTED); 2440 /* The address was invalid. */ 2441 if (rv == -1) { 2442 error = EFAULT; 2443 break; 2444 } 2445 if (rv == 0) { 2446 MPASS(owner == UMUTEX_CONTESTED); 2447 error = 0; 2448 break; 2449 } 2450 /* rv == 1 */ 2451 if (owner == UMUTEX_RB_OWNERDEAD) { 2452 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2453 &owner, id | UMUTEX_CONTESTED); 2454 if (rv == -1) { 2455 error = EFAULT; 2456 break; 2457 } 2458 if (rv == 0) { 2459 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2460 error = EOWNERDEAD; /* success */ 2461 break; 2462 } 2463 2464 /* 2465 * rv == 1, only check for suspension if we 2466 * did not already catched a signal. If we 2467 * get an error from the check, the same 2468 * condition is checked by the umtxq_sleep() 2469 * call below, so we should obliterate the 2470 * error to not skip the last loop iteration. 2471 */ 2472 if (error == 0) { 2473 error = thread_check_susp(td, false); 2474 if (error == 0) { 2475 if (try != 0) 2476 error = EBUSY; 2477 else 2478 continue; 2479 } 2480 error = 0; 2481 } 2482 } else if (owner == UMUTEX_RB_NOTRECOV) { 2483 error = ENOTRECOVERABLE; 2484 } 2485 2486 if (try != 0) 2487 error = EBUSY; 2488 2489 /* 2490 * If we caught a signal, we have retried and now 2491 * exit immediately. 2492 */ 2493 if (error != 0) 2494 break; 2495 2496 umtxq_lock(&uq->uq_key); 2497 umtxq_insert(uq); 2498 umtxq_unbusy(&uq->uq_key); 2499 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2500 NULL : &timo); 2501 umtxq_remove(uq); 2502 umtxq_unlock(&uq->uq_key); 2503 2504 mtx_lock(&umtx_lock); 2505 uq->uq_inherited_pri = old_inherited_pri; 2506 pri = PRI_MAX; 2507 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2508 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2509 if (uq2 != NULL) { 2510 if (pri > UPRI(uq2->uq_thread)) 2511 pri = UPRI(uq2->uq_thread); 2512 } 2513 } 2514 if (pri > uq->uq_inherited_pri) 2515 pri = uq->uq_inherited_pri; 2516 thread_lock(td); 2517 sched_lend_user_prio(td, pri); 2518 thread_unlock(td); 2519 mtx_unlock(&umtx_lock); 2520 } 2521 2522 if (error != 0 && error != EOWNERDEAD) { 2523 mtx_lock(&umtx_lock); 2524 uq->uq_inherited_pri = old_inherited_pri; 2525 pri = PRI_MAX; 2526 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2527 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2528 if (uq2 != NULL) { 2529 if (pri > UPRI(uq2->uq_thread)) 2530 pri = UPRI(uq2->uq_thread); 2531 } 2532 } 2533 if (pri > uq->uq_inherited_pri) 2534 pri = uq->uq_inherited_pri; 2535 thread_lock(td); 2536 sched_lend_user_prio(td, pri); 2537 thread_unlock(td); 2538 mtx_unlock(&umtx_lock); 2539 } 2540 2541 out: 2542 umtxq_unbusy_unlocked(&uq->uq_key); 2543 umtx_key_release(&uq->uq_key); 2544 return (error); 2545 } 2546 2547 /* 2548 * Unlock a PP mutex. 2549 */ 2550 static int 2551 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2552 { 2553 struct umtx_key key; 2554 struct umtx_q *uq, *uq2; 2555 struct umtx_pi *pi; 2556 uint32_t id, owner, rceiling; 2557 int error, pri, new_inherited_pri, su; 2558 2559 id = td->td_tid; 2560 uq = td->td_umtxq; 2561 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2562 2563 /* 2564 * Make sure we own this mtx. 2565 */ 2566 error = fueword32(&m->m_owner, &owner); 2567 if (error == -1) 2568 return (EFAULT); 2569 2570 if ((owner & ~UMUTEX_CONTESTED) != id) 2571 return (EPERM); 2572 2573 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2574 if (error != 0) 2575 return (error); 2576 2577 if (rceiling == -1) 2578 new_inherited_pri = PRI_MAX; 2579 else { 2580 rceiling = RTP_PRIO_MAX - rceiling; 2581 if (rceiling > RTP_PRIO_MAX) 2582 return (EINVAL); 2583 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2584 } 2585 2586 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2587 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2588 &key)) != 0) 2589 return (error); 2590 umtxq_lock(&key); 2591 umtxq_busy(&key); 2592 umtxq_unlock(&key); 2593 /* 2594 * For priority protected mutex, always set unlocked state 2595 * to UMUTEX_CONTESTED, so that userland always enters kernel 2596 * to lock the mutex, it is necessary because thread priority 2597 * has to be adjusted for such mutex. 2598 */ 2599 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2600 UMUTEX_CONTESTED); 2601 2602 umtxq_lock(&key); 2603 if (error == 0) 2604 umtxq_signal(&key, 1); 2605 umtxq_unbusy(&key); 2606 umtxq_unlock(&key); 2607 2608 if (error == -1) 2609 error = EFAULT; 2610 else { 2611 mtx_lock(&umtx_lock); 2612 if (su != 0) 2613 uq->uq_inherited_pri = new_inherited_pri; 2614 pri = PRI_MAX; 2615 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2616 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2617 if (uq2 != NULL) { 2618 if (pri > UPRI(uq2->uq_thread)) 2619 pri = UPRI(uq2->uq_thread); 2620 } 2621 } 2622 if (pri > uq->uq_inherited_pri) 2623 pri = uq->uq_inherited_pri; 2624 thread_lock(td); 2625 sched_lend_user_prio(td, pri); 2626 thread_unlock(td); 2627 mtx_unlock(&umtx_lock); 2628 } 2629 umtx_key_release(&key); 2630 return (error); 2631 } 2632 2633 static int 2634 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2635 uint32_t *old_ceiling) 2636 { 2637 struct umtx_q *uq; 2638 uint32_t flags, id, owner, save_ceiling; 2639 int error, rv, rv1; 2640 2641 error = fueword32(&m->m_flags, &flags); 2642 if (error == -1) 2643 return (EFAULT); 2644 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2645 return (EINVAL); 2646 if (ceiling > RTP_PRIO_MAX) 2647 return (EINVAL); 2648 id = td->td_tid; 2649 uq = td->td_umtxq; 2650 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2651 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2652 &uq->uq_key)) != 0) 2653 return (error); 2654 for (;;) { 2655 umtxq_lock(&uq->uq_key); 2656 umtxq_busy(&uq->uq_key); 2657 umtxq_unlock(&uq->uq_key); 2658 2659 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2660 if (rv == -1) { 2661 error = EFAULT; 2662 break; 2663 } 2664 2665 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2666 id | UMUTEX_CONTESTED); 2667 if (rv == -1) { 2668 error = EFAULT; 2669 break; 2670 } 2671 2672 if (rv == 0) { 2673 MPASS(owner == UMUTEX_CONTESTED); 2674 rv = suword32(&m->m_ceilings[0], ceiling); 2675 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2676 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2677 break; 2678 } 2679 2680 if ((owner & ~UMUTEX_CONTESTED) == id) { 2681 rv = suword32(&m->m_ceilings[0], ceiling); 2682 error = rv == 0 ? 0 : EFAULT; 2683 break; 2684 } 2685 2686 if (owner == UMUTEX_RB_OWNERDEAD) { 2687 error = EOWNERDEAD; 2688 break; 2689 } else if (owner == UMUTEX_RB_NOTRECOV) { 2690 error = ENOTRECOVERABLE; 2691 break; 2692 } 2693 2694 /* 2695 * If we caught a signal, we have retried and now 2696 * exit immediately. 2697 */ 2698 if (error != 0) 2699 break; 2700 2701 /* 2702 * We set the contested bit, sleep. Otherwise the lock changed 2703 * and we need to retry or we lost a race to the thread 2704 * unlocking the umtx. 2705 */ 2706 umtxq_lock(&uq->uq_key); 2707 umtxq_insert(uq); 2708 umtxq_unbusy(&uq->uq_key); 2709 error = umtxq_sleep(uq, "umtxpp", NULL); 2710 umtxq_remove(uq); 2711 umtxq_unlock(&uq->uq_key); 2712 } 2713 umtxq_lock(&uq->uq_key); 2714 if (error == 0) 2715 umtxq_signal(&uq->uq_key, INT_MAX); 2716 umtxq_unbusy(&uq->uq_key); 2717 umtxq_unlock(&uq->uq_key); 2718 umtx_key_release(&uq->uq_key); 2719 if (error == 0 && old_ceiling != NULL) { 2720 rv = suword32(old_ceiling, save_ceiling); 2721 error = rv == 0 ? 0 : EFAULT; 2722 } 2723 return (error); 2724 } 2725 2726 /* 2727 * Lock a userland POSIX mutex. 2728 */ 2729 static int 2730 do_lock_umutex(struct thread *td, struct umutex *m, 2731 struct _umtx_time *timeout, int mode) 2732 { 2733 uint32_t flags; 2734 int error; 2735 2736 error = fueword32(&m->m_flags, &flags); 2737 if (error == -1) 2738 return (EFAULT); 2739 2740 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2741 case 0: 2742 error = do_lock_normal(td, m, flags, timeout, mode); 2743 break; 2744 case UMUTEX_PRIO_INHERIT: 2745 error = do_lock_pi(td, m, flags, timeout, mode); 2746 break; 2747 case UMUTEX_PRIO_PROTECT: 2748 error = do_lock_pp(td, m, flags, timeout, mode); 2749 break; 2750 default: 2751 return (EINVAL); 2752 } 2753 if (timeout == NULL) { 2754 if (error == EINTR && mode != _UMUTEX_WAIT) 2755 error = ERESTART; 2756 } else { 2757 /* Timed-locking is not restarted. */ 2758 if (error == ERESTART) 2759 error = EINTR; 2760 } 2761 return (error); 2762 } 2763 2764 /* 2765 * Unlock a userland POSIX mutex. 2766 */ 2767 static int 2768 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2769 { 2770 uint32_t flags; 2771 int error; 2772 2773 error = fueword32(&m->m_flags, &flags); 2774 if (error == -1) 2775 return (EFAULT); 2776 2777 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2778 case 0: 2779 return (do_unlock_normal(td, m, flags, rb)); 2780 case UMUTEX_PRIO_INHERIT: 2781 return (do_unlock_pi(td, m, flags, rb)); 2782 case UMUTEX_PRIO_PROTECT: 2783 return (do_unlock_pp(td, m, flags, rb)); 2784 } 2785 2786 return (EINVAL); 2787 } 2788 2789 static int 2790 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2791 struct timespec *timeout, u_long wflags) 2792 { 2793 struct umtx_abs_timeout timo; 2794 struct umtx_q *uq; 2795 uint32_t flags, clockid, hasw; 2796 int error; 2797 2798 uq = td->td_umtxq; 2799 error = fueword32(&cv->c_flags, &flags); 2800 if (error == -1) 2801 return (EFAULT); 2802 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2803 if (error != 0) 2804 return (error); 2805 2806 if ((wflags & CVWAIT_CLOCKID) != 0) { 2807 error = fueword32(&cv->c_clockid, &clockid); 2808 if (error == -1) { 2809 umtx_key_release(&uq->uq_key); 2810 return (EFAULT); 2811 } 2812 if (clockid < CLOCK_REALTIME || 2813 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2814 /* hmm, only HW clock id will work. */ 2815 umtx_key_release(&uq->uq_key); 2816 return (EINVAL); 2817 } 2818 } else { 2819 clockid = CLOCK_REALTIME; 2820 } 2821 2822 umtxq_lock(&uq->uq_key); 2823 umtxq_busy(&uq->uq_key); 2824 umtxq_insert(uq); 2825 umtxq_unlock(&uq->uq_key); 2826 2827 /* 2828 * Set c_has_waiters to 1 before releasing user mutex, also 2829 * don't modify cache line when unnecessary. 2830 */ 2831 error = fueword32(&cv->c_has_waiters, &hasw); 2832 if (error == 0 && hasw == 0) 2833 suword32(&cv->c_has_waiters, 1); 2834 2835 umtxq_unbusy_unlocked(&uq->uq_key); 2836 2837 error = do_unlock_umutex(td, m, false); 2838 2839 if (timeout != NULL) 2840 umtx_abs_timeout_init(&timo, clockid, 2841 (wflags & CVWAIT_ABSTIME) != 0, timeout); 2842 2843 umtxq_lock(&uq->uq_key); 2844 if (error == 0) { 2845 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2846 NULL : &timo); 2847 } 2848 2849 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2850 error = 0; 2851 else { 2852 /* 2853 * This must be timeout,interrupted by signal or 2854 * surprious wakeup, clear c_has_waiter flag when 2855 * necessary. 2856 */ 2857 umtxq_busy(&uq->uq_key); 2858 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2859 int oldlen = uq->uq_cur_queue->length; 2860 umtxq_remove(uq); 2861 if (oldlen == 1) { 2862 umtxq_unlock(&uq->uq_key); 2863 suword32(&cv->c_has_waiters, 0); 2864 umtxq_lock(&uq->uq_key); 2865 } 2866 } 2867 umtxq_unbusy(&uq->uq_key); 2868 if (error == ERESTART) 2869 error = EINTR; 2870 } 2871 2872 umtxq_unlock(&uq->uq_key); 2873 umtx_key_release(&uq->uq_key); 2874 return (error); 2875 } 2876 2877 /* 2878 * Signal a userland condition variable. 2879 */ 2880 static int 2881 do_cv_signal(struct thread *td, struct ucond *cv) 2882 { 2883 struct umtx_key key; 2884 int error, cnt, nwake; 2885 uint32_t flags; 2886 2887 error = fueword32(&cv->c_flags, &flags); 2888 if (error == -1) 2889 return (EFAULT); 2890 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2891 return (error); 2892 umtxq_lock(&key); 2893 umtxq_busy(&key); 2894 cnt = umtxq_count(&key); 2895 nwake = umtxq_signal(&key, 1); 2896 if (cnt <= nwake) { 2897 umtxq_unlock(&key); 2898 error = suword32(&cv->c_has_waiters, 0); 2899 if (error == -1) 2900 error = EFAULT; 2901 umtxq_lock(&key); 2902 } 2903 umtxq_unbusy(&key); 2904 umtxq_unlock(&key); 2905 umtx_key_release(&key); 2906 return (error); 2907 } 2908 2909 static int 2910 do_cv_broadcast(struct thread *td, struct ucond *cv) 2911 { 2912 struct umtx_key key; 2913 int error; 2914 uint32_t flags; 2915 2916 error = fueword32(&cv->c_flags, &flags); 2917 if (error == -1) 2918 return (EFAULT); 2919 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2920 return (error); 2921 2922 umtxq_lock(&key); 2923 umtxq_busy(&key); 2924 umtxq_signal(&key, INT_MAX); 2925 umtxq_unlock(&key); 2926 2927 error = suword32(&cv->c_has_waiters, 0); 2928 if (error == -1) 2929 error = EFAULT; 2930 2931 umtxq_unbusy_unlocked(&key); 2932 2933 umtx_key_release(&key); 2934 return (error); 2935 } 2936 2937 static int 2938 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 2939 struct _umtx_time *timeout) 2940 { 2941 struct umtx_abs_timeout timo; 2942 struct umtx_q *uq; 2943 uint32_t flags, wrflags; 2944 int32_t state, oldstate; 2945 int32_t blocked_readers; 2946 int error, error1, rv; 2947 2948 uq = td->td_umtxq; 2949 error = fueword32(&rwlock->rw_flags, &flags); 2950 if (error == -1) 2951 return (EFAULT); 2952 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2953 if (error != 0) 2954 return (error); 2955 2956 if (timeout != NULL) 2957 umtx_abs_timeout_init2(&timo, timeout); 2958 2959 wrflags = URWLOCK_WRITE_OWNER; 2960 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2961 wrflags |= URWLOCK_WRITE_WAITERS; 2962 2963 for (;;) { 2964 rv = fueword32(&rwlock->rw_state, &state); 2965 if (rv == -1) { 2966 umtx_key_release(&uq->uq_key); 2967 return (EFAULT); 2968 } 2969 2970 /* try to lock it */ 2971 while (!(state & wrflags)) { 2972 if (__predict_false(URWLOCK_READER_COUNT(state) == 2973 URWLOCK_MAX_READERS)) { 2974 umtx_key_release(&uq->uq_key); 2975 return (EAGAIN); 2976 } 2977 rv = casueword32(&rwlock->rw_state, state, 2978 &oldstate, state + 1); 2979 if (rv == -1) { 2980 umtx_key_release(&uq->uq_key); 2981 return (EFAULT); 2982 } 2983 if (rv == 0) { 2984 MPASS(oldstate == state); 2985 umtx_key_release(&uq->uq_key); 2986 return (0); 2987 } 2988 error = thread_check_susp(td, true); 2989 if (error != 0) 2990 break; 2991 state = oldstate; 2992 } 2993 2994 if (error) 2995 break; 2996 2997 /* grab monitor lock */ 2998 umtxq_lock(&uq->uq_key); 2999 umtxq_busy(&uq->uq_key); 3000 umtxq_unlock(&uq->uq_key); 3001 3002 /* 3003 * re-read the state, in case it changed between the try-lock above 3004 * and the check below 3005 */ 3006 rv = fueword32(&rwlock->rw_state, &state); 3007 if (rv == -1) 3008 error = EFAULT; 3009 3010 /* set read contention bit */ 3011 while (error == 0 && (state & wrflags) && 3012 !(state & URWLOCK_READ_WAITERS)) { 3013 rv = casueword32(&rwlock->rw_state, state, 3014 &oldstate, state | URWLOCK_READ_WAITERS); 3015 if (rv == -1) { 3016 error = EFAULT; 3017 break; 3018 } 3019 if (rv == 0) { 3020 MPASS(oldstate == state); 3021 goto sleep; 3022 } 3023 state = oldstate; 3024 error = thread_check_susp(td, false); 3025 if (error != 0) 3026 break; 3027 } 3028 if (error != 0) { 3029 umtxq_unbusy_unlocked(&uq->uq_key); 3030 break; 3031 } 3032 3033 /* state is changed while setting flags, restart */ 3034 if (!(state & wrflags)) { 3035 umtxq_unbusy_unlocked(&uq->uq_key); 3036 error = thread_check_susp(td, true); 3037 if (error != 0) 3038 break; 3039 continue; 3040 } 3041 3042 sleep: 3043 /* 3044 * Contention bit is set, before sleeping, increase 3045 * read waiter count. 3046 */ 3047 rv = fueword32(&rwlock->rw_blocked_readers, 3048 &blocked_readers); 3049 if (rv == -1) { 3050 umtxq_unbusy_unlocked(&uq->uq_key); 3051 error = EFAULT; 3052 break; 3053 } 3054 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 3055 3056 while (state & wrflags) { 3057 umtxq_lock(&uq->uq_key); 3058 umtxq_insert(uq); 3059 umtxq_unbusy(&uq->uq_key); 3060 3061 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 3062 NULL : &timo); 3063 3064 umtxq_busy(&uq->uq_key); 3065 umtxq_remove(uq); 3066 umtxq_unlock(&uq->uq_key); 3067 if (error) 3068 break; 3069 rv = fueword32(&rwlock->rw_state, &state); 3070 if (rv == -1) { 3071 error = EFAULT; 3072 break; 3073 } 3074 } 3075 3076 /* decrease read waiter count, and may clear read contention bit */ 3077 rv = fueword32(&rwlock->rw_blocked_readers, 3078 &blocked_readers); 3079 if (rv == -1) { 3080 umtxq_unbusy_unlocked(&uq->uq_key); 3081 error = EFAULT; 3082 break; 3083 } 3084 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 3085 if (blocked_readers == 1) { 3086 rv = fueword32(&rwlock->rw_state, &state); 3087 if (rv == -1) { 3088 umtxq_unbusy_unlocked(&uq->uq_key); 3089 error = EFAULT; 3090 break; 3091 } 3092 for (;;) { 3093 rv = casueword32(&rwlock->rw_state, state, 3094 &oldstate, state & ~URWLOCK_READ_WAITERS); 3095 if (rv == -1) { 3096 error = EFAULT; 3097 break; 3098 } 3099 if (rv == 0) { 3100 MPASS(oldstate == state); 3101 break; 3102 } 3103 state = oldstate; 3104 error1 = thread_check_susp(td, false); 3105 if (error1 != 0) { 3106 if (error == 0) 3107 error = error1; 3108 break; 3109 } 3110 } 3111 } 3112 3113 umtxq_unbusy_unlocked(&uq->uq_key); 3114 if (error != 0) 3115 break; 3116 } 3117 umtx_key_release(&uq->uq_key); 3118 if (error == ERESTART) 3119 error = EINTR; 3120 return (error); 3121 } 3122 3123 static int 3124 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 3125 { 3126 struct umtx_abs_timeout timo; 3127 struct umtx_q *uq; 3128 uint32_t flags; 3129 int32_t state, oldstate; 3130 int32_t blocked_writers; 3131 int32_t blocked_readers; 3132 int error, error1, rv; 3133 3134 uq = td->td_umtxq; 3135 error = fueword32(&rwlock->rw_flags, &flags); 3136 if (error == -1) 3137 return (EFAULT); 3138 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3139 if (error != 0) 3140 return (error); 3141 3142 if (timeout != NULL) 3143 umtx_abs_timeout_init2(&timo, timeout); 3144 3145 blocked_readers = 0; 3146 for (;;) { 3147 rv = fueword32(&rwlock->rw_state, &state); 3148 if (rv == -1) { 3149 umtx_key_release(&uq->uq_key); 3150 return (EFAULT); 3151 } 3152 while ((state & URWLOCK_WRITE_OWNER) == 0 && 3153 URWLOCK_READER_COUNT(state) == 0) { 3154 rv = casueword32(&rwlock->rw_state, state, 3155 &oldstate, state | URWLOCK_WRITE_OWNER); 3156 if (rv == -1) { 3157 umtx_key_release(&uq->uq_key); 3158 return (EFAULT); 3159 } 3160 if (rv == 0) { 3161 MPASS(oldstate == state); 3162 umtx_key_release(&uq->uq_key); 3163 return (0); 3164 } 3165 state = oldstate; 3166 error = thread_check_susp(td, true); 3167 if (error != 0) 3168 break; 3169 } 3170 3171 if (error) { 3172 if ((state & (URWLOCK_WRITE_OWNER | 3173 URWLOCK_WRITE_WAITERS)) == 0 && 3174 blocked_readers != 0) { 3175 umtxq_lock(&uq->uq_key); 3176 umtxq_busy(&uq->uq_key); 3177 umtxq_signal_queue(&uq->uq_key, INT_MAX, 3178 UMTX_SHARED_QUEUE); 3179 umtxq_unbusy(&uq->uq_key); 3180 umtxq_unlock(&uq->uq_key); 3181 } 3182 3183 break; 3184 } 3185 3186 /* grab monitor lock */ 3187 umtxq_lock(&uq->uq_key); 3188 umtxq_busy(&uq->uq_key); 3189 umtxq_unlock(&uq->uq_key); 3190 3191 /* 3192 * Re-read the state, in case it changed between the 3193 * try-lock above and the check below. 3194 */ 3195 rv = fueword32(&rwlock->rw_state, &state); 3196 if (rv == -1) 3197 error = EFAULT; 3198 3199 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3200 URWLOCK_READER_COUNT(state) != 0) && 3201 (state & URWLOCK_WRITE_WAITERS) == 0) { 3202 rv = casueword32(&rwlock->rw_state, state, 3203 &oldstate, state | URWLOCK_WRITE_WAITERS); 3204 if (rv == -1) { 3205 error = EFAULT; 3206 break; 3207 } 3208 if (rv == 0) { 3209 MPASS(oldstate == state); 3210 goto sleep; 3211 } 3212 state = oldstate; 3213 error = thread_check_susp(td, false); 3214 if (error != 0) 3215 break; 3216 } 3217 if (error != 0) { 3218 umtxq_unbusy_unlocked(&uq->uq_key); 3219 break; 3220 } 3221 3222 if ((state & URWLOCK_WRITE_OWNER) == 0 && 3223 URWLOCK_READER_COUNT(state) == 0) { 3224 umtxq_unbusy_unlocked(&uq->uq_key); 3225 error = thread_check_susp(td, false); 3226 if (error != 0) 3227 break; 3228 continue; 3229 } 3230 sleep: 3231 rv = fueword32(&rwlock->rw_blocked_writers, 3232 &blocked_writers); 3233 if (rv == -1) { 3234 umtxq_unbusy_unlocked(&uq->uq_key); 3235 error = EFAULT; 3236 break; 3237 } 3238 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 3239 3240 while ((state & URWLOCK_WRITE_OWNER) || 3241 URWLOCK_READER_COUNT(state) != 0) { 3242 umtxq_lock(&uq->uq_key); 3243 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3244 umtxq_unbusy(&uq->uq_key); 3245 3246 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3247 NULL : &timo); 3248 3249 umtxq_busy(&uq->uq_key); 3250 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3251 umtxq_unlock(&uq->uq_key); 3252 if (error) 3253 break; 3254 rv = fueword32(&rwlock->rw_state, &state); 3255 if (rv == -1) { 3256 error = EFAULT; 3257 break; 3258 } 3259 } 3260 3261 rv = fueword32(&rwlock->rw_blocked_writers, 3262 &blocked_writers); 3263 if (rv == -1) { 3264 umtxq_unbusy_unlocked(&uq->uq_key); 3265 error = EFAULT; 3266 break; 3267 } 3268 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3269 if (blocked_writers == 1) { 3270 rv = fueword32(&rwlock->rw_state, &state); 3271 if (rv == -1) { 3272 umtxq_unbusy_unlocked(&uq->uq_key); 3273 error = EFAULT; 3274 break; 3275 } 3276 for (;;) { 3277 rv = casueword32(&rwlock->rw_state, state, 3278 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3279 if (rv == -1) { 3280 error = EFAULT; 3281 break; 3282 } 3283 if (rv == 0) { 3284 MPASS(oldstate == state); 3285 break; 3286 } 3287 state = oldstate; 3288 error1 = thread_check_susp(td, false); 3289 /* 3290 * We are leaving the URWLOCK_WRITE_WAITERS 3291 * behind, but this should not harm the 3292 * correctness. 3293 */ 3294 if (error1 != 0) { 3295 if (error == 0) 3296 error = error1; 3297 break; 3298 } 3299 } 3300 rv = fueword32(&rwlock->rw_blocked_readers, 3301 &blocked_readers); 3302 if (rv == -1) { 3303 umtxq_unbusy_unlocked(&uq->uq_key); 3304 error = EFAULT; 3305 break; 3306 } 3307 } else 3308 blocked_readers = 0; 3309 3310 umtxq_unbusy_unlocked(&uq->uq_key); 3311 } 3312 3313 umtx_key_release(&uq->uq_key); 3314 if (error == ERESTART) 3315 error = EINTR; 3316 return (error); 3317 } 3318 3319 static int 3320 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3321 { 3322 struct umtx_q *uq; 3323 uint32_t flags; 3324 int32_t state, oldstate; 3325 int error, rv, q, count; 3326 3327 uq = td->td_umtxq; 3328 error = fueword32(&rwlock->rw_flags, &flags); 3329 if (error == -1) 3330 return (EFAULT); 3331 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3332 if (error != 0) 3333 return (error); 3334 3335 error = fueword32(&rwlock->rw_state, &state); 3336 if (error == -1) { 3337 error = EFAULT; 3338 goto out; 3339 } 3340 if (state & URWLOCK_WRITE_OWNER) { 3341 for (;;) { 3342 rv = casueword32(&rwlock->rw_state, state, 3343 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3344 if (rv == -1) { 3345 error = EFAULT; 3346 goto out; 3347 } 3348 if (rv == 1) { 3349 state = oldstate; 3350 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3351 error = EPERM; 3352 goto out; 3353 } 3354 error = thread_check_susp(td, true); 3355 if (error != 0) 3356 goto out; 3357 } else 3358 break; 3359 } 3360 } else if (URWLOCK_READER_COUNT(state) != 0) { 3361 for (;;) { 3362 rv = casueword32(&rwlock->rw_state, state, 3363 &oldstate, state - 1); 3364 if (rv == -1) { 3365 error = EFAULT; 3366 goto out; 3367 } 3368 if (rv == 1) { 3369 state = oldstate; 3370 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3371 error = EPERM; 3372 goto out; 3373 } 3374 error = thread_check_susp(td, true); 3375 if (error != 0) 3376 goto out; 3377 } else 3378 break; 3379 } 3380 } else { 3381 error = EPERM; 3382 goto out; 3383 } 3384 3385 count = 0; 3386 3387 if (!(flags & URWLOCK_PREFER_READER)) { 3388 if (state & URWLOCK_WRITE_WAITERS) { 3389 count = 1; 3390 q = UMTX_EXCLUSIVE_QUEUE; 3391 } else if (state & URWLOCK_READ_WAITERS) { 3392 count = INT_MAX; 3393 q = UMTX_SHARED_QUEUE; 3394 } 3395 } else { 3396 if (state & URWLOCK_READ_WAITERS) { 3397 count = INT_MAX; 3398 q = UMTX_SHARED_QUEUE; 3399 } else if (state & URWLOCK_WRITE_WAITERS) { 3400 count = 1; 3401 q = UMTX_EXCLUSIVE_QUEUE; 3402 } 3403 } 3404 3405 if (count) { 3406 umtxq_lock(&uq->uq_key); 3407 umtxq_busy(&uq->uq_key); 3408 umtxq_signal_queue(&uq->uq_key, count, q); 3409 umtxq_unbusy(&uq->uq_key); 3410 umtxq_unlock(&uq->uq_key); 3411 } 3412 out: 3413 umtx_key_release(&uq->uq_key); 3414 return (error); 3415 } 3416 3417 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3418 static int 3419 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3420 { 3421 struct umtx_abs_timeout timo; 3422 struct umtx_q *uq; 3423 uint32_t flags, count, count1; 3424 int error, rv, rv1; 3425 3426 uq = td->td_umtxq; 3427 error = fueword32(&sem->_flags, &flags); 3428 if (error == -1) 3429 return (EFAULT); 3430 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3431 if (error != 0) 3432 return (error); 3433 3434 if (timeout != NULL) 3435 umtx_abs_timeout_init2(&timo, timeout); 3436 3437 again: 3438 umtxq_lock(&uq->uq_key); 3439 umtxq_busy(&uq->uq_key); 3440 umtxq_insert(uq); 3441 umtxq_unlock(&uq->uq_key); 3442 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3443 if (rv == 0) 3444 rv1 = fueword32(&sem->_count, &count); 3445 if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || 3446 (rv == 1 && count1 == 0)) { 3447 umtxq_lock(&uq->uq_key); 3448 umtxq_unbusy(&uq->uq_key); 3449 umtxq_remove(uq); 3450 umtxq_unlock(&uq->uq_key); 3451 if (rv == 1) { 3452 rv = thread_check_susp(td, true); 3453 if (rv == 0) 3454 goto again; 3455 error = rv; 3456 goto out; 3457 } 3458 if (rv == 0) 3459 rv = rv1; 3460 error = rv == -1 ? EFAULT : 0; 3461 goto out; 3462 } 3463 umtxq_lock(&uq->uq_key); 3464 umtxq_unbusy(&uq->uq_key); 3465 3466 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3467 3468 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3469 error = 0; 3470 else { 3471 umtxq_remove(uq); 3472 /* A relative timeout cannot be restarted. */ 3473 if (error == ERESTART && timeout != NULL && 3474 (timeout->_flags & UMTX_ABSTIME) == 0) 3475 error = EINTR; 3476 } 3477 umtxq_unlock(&uq->uq_key); 3478 out: 3479 umtx_key_release(&uq->uq_key); 3480 return (error); 3481 } 3482 3483 /* 3484 * Signal a userland semaphore. 3485 */ 3486 static int 3487 do_sem_wake(struct thread *td, struct _usem *sem) 3488 { 3489 struct umtx_key key; 3490 int error, cnt; 3491 uint32_t flags; 3492 3493 error = fueword32(&sem->_flags, &flags); 3494 if (error == -1) 3495 return (EFAULT); 3496 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3497 return (error); 3498 umtxq_lock(&key); 3499 umtxq_busy(&key); 3500 cnt = umtxq_count(&key); 3501 if (cnt > 0) { 3502 /* 3503 * Check if count is greater than 0, this means the memory is 3504 * still being referenced by user code, so we can safely 3505 * update _has_waiters flag. 3506 */ 3507 if (cnt == 1) { 3508 umtxq_unlock(&key); 3509 error = suword32(&sem->_has_waiters, 0); 3510 umtxq_lock(&key); 3511 if (error == -1) 3512 error = EFAULT; 3513 } 3514 umtxq_signal(&key, 1); 3515 } 3516 umtxq_unbusy(&key); 3517 umtxq_unlock(&key); 3518 umtx_key_release(&key); 3519 return (error); 3520 } 3521 #endif 3522 3523 static int 3524 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3525 { 3526 struct umtx_abs_timeout timo; 3527 struct umtx_q *uq; 3528 uint32_t count, flags; 3529 int error, rv; 3530 3531 uq = td->td_umtxq; 3532 flags = fuword32(&sem->_flags); 3533 if (timeout != NULL) 3534 umtx_abs_timeout_init2(&timo, timeout); 3535 3536 again: 3537 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3538 if (error != 0) 3539 return (error); 3540 umtxq_lock(&uq->uq_key); 3541 umtxq_busy(&uq->uq_key); 3542 umtxq_insert(uq); 3543 umtxq_unlock(&uq->uq_key); 3544 rv = fueword32(&sem->_count, &count); 3545 if (rv == -1) { 3546 umtxq_lock(&uq->uq_key); 3547 umtxq_unbusy(&uq->uq_key); 3548 umtxq_remove(uq); 3549 umtxq_unlock(&uq->uq_key); 3550 umtx_key_release(&uq->uq_key); 3551 return (EFAULT); 3552 } 3553 for (;;) { 3554 if (USEM_COUNT(count) != 0) { 3555 umtxq_lock(&uq->uq_key); 3556 umtxq_unbusy(&uq->uq_key); 3557 umtxq_remove(uq); 3558 umtxq_unlock(&uq->uq_key); 3559 umtx_key_release(&uq->uq_key); 3560 return (0); 3561 } 3562 if (count == USEM_HAS_WAITERS) 3563 break; 3564 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3565 if (rv == 0) 3566 break; 3567 umtxq_lock(&uq->uq_key); 3568 umtxq_unbusy(&uq->uq_key); 3569 umtxq_remove(uq); 3570 umtxq_unlock(&uq->uq_key); 3571 umtx_key_release(&uq->uq_key); 3572 if (rv == -1) 3573 return (EFAULT); 3574 rv = thread_check_susp(td, true); 3575 if (rv != 0) 3576 return (rv); 3577 goto again; 3578 } 3579 umtxq_lock(&uq->uq_key); 3580 umtxq_unbusy(&uq->uq_key); 3581 3582 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3583 3584 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3585 error = 0; 3586 else { 3587 umtxq_remove(uq); 3588 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3589 /* A relative timeout cannot be restarted. */ 3590 if (error == ERESTART) 3591 error = EINTR; 3592 if (error == EINTR) { 3593 umtx_abs_timeout_update(&timo); 3594 timespecsub(&timo.end, &timo.cur, 3595 &timeout->_timeout); 3596 } 3597 } 3598 } 3599 umtxq_unlock(&uq->uq_key); 3600 umtx_key_release(&uq->uq_key); 3601 return (error); 3602 } 3603 3604 /* 3605 * Signal a userland semaphore. 3606 */ 3607 static int 3608 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3609 { 3610 struct umtx_key key; 3611 int error, cnt, rv; 3612 uint32_t count, flags; 3613 3614 rv = fueword32(&sem->_flags, &flags); 3615 if (rv == -1) 3616 return (EFAULT); 3617 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3618 return (error); 3619 umtxq_lock(&key); 3620 umtxq_busy(&key); 3621 cnt = umtxq_count(&key); 3622 if (cnt > 0) { 3623 /* 3624 * If this was the last sleeping thread, clear the waiters 3625 * flag in _count. 3626 */ 3627 if (cnt == 1) { 3628 umtxq_unlock(&key); 3629 rv = fueword32(&sem->_count, &count); 3630 while (rv != -1 && count & USEM_HAS_WAITERS) { 3631 rv = casueword32(&sem->_count, count, &count, 3632 count & ~USEM_HAS_WAITERS); 3633 if (rv == 1) { 3634 rv = thread_check_susp(td, true); 3635 if (rv != 0) 3636 break; 3637 } 3638 } 3639 if (rv == -1) 3640 error = EFAULT; 3641 else if (rv > 0) { 3642 error = rv; 3643 } 3644 umtxq_lock(&key); 3645 } 3646 3647 umtxq_signal(&key, 1); 3648 } 3649 umtxq_unbusy(&key); 3650 umtxq_unlock(&key); 3651 umtx_key_release(&key); 3652 return (error); 3653 } 3654 3655 #ifdef COMPAT_FREEBSD10 3656 int 3657 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap) 3658 { 3659 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0)); 3660 } 3661 3662 int 3663 freebsd10__umtx_unlock(struct thread *td, 3664 struct freebsd10__umtx_unlock_args *uap) 3665 { 3666 return (do_unlock_umtx(td, uap->umtx, td->td_tid)); 3667 } 3668 #endif 3669 3670 inline int 3671 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3672 { 3673 int error; 3674 3675 error = copyin(uaddr, tsp, sizeof(*tsp)); 3676 if (error == 0) { 3677 if (tsp->tv_sec < 0 || 3678 tsp->tv_nsec >= 1000000000 || 3679 tsp->tv_nsec < 0) 3680 error = EINVAL; 3681 } 3682 return (error); 3683 } 3684 3685 static inline int 3686 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3687 { 3688 int error; 3689 3690 if (size <= sizeof(tp->_timeout)) { 3691 tp->_clockid = CLOCK_REALTIME; 3692 tp->_flags = 0; 3693 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3694 } else 3695 error = copyin(uaddr, tp, sizeof(*tp)); 3696 if (error != 0) 3697 return (error); 3698 if (tp->_timeout.tv_sec < 0 || 3699 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3700 return (EINVAL); 3701 return (0); 3702 } 3703 3704 static int 3705 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3706 struct umtx_robust_lists_params *rb) 3707 { 3708 3709 if (size > sizeof(*rb)) 3710 return (EINVAL); 3711 return (copyin(uaddr, rb, size)); 3712 } 3713 3714 static int 3715 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3716 { 3717 3718 /* 3719 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3720 * and we're only called if sz >= sizeof(timespec) as supplied in the 3721 * copyops. 3722 */ 3723 KASSERT(sz >= sizeof(*tsp), 3724 ("umtx_copyops specifies incorrect sizes")); 3725 3726 return (copyout(tsp, uaddr, sizeof(*tsp))); 3727 } 3728 3729 #ifdef COMPAT_FREEBSD10 3730 static int 3731 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap, 3732 const struct umtx_copyops *ops) 3733 { 3734 struct timespec *ts, timeout; 3735 int error; 3736 3737 /* Allow a null timespec (wait forever). */ 3738 if (uap->uaddr2 == NULL) 3739 ts = NULL; 3740 else { 3741 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3742 if (error != 0) 3743 return (error); 3744 ts = &timeout; 3745 } 3746 #ifdef COMPAT_FREEBSD32 3747 if (ops->compat32) 3748 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3749 #endif 3750 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3751 } 3752 3753 static int 3754 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap, 3755 const struct umtx_copyops *ops) 3756 { 3757 #ifdef COMPAT_FREEBSD32 3758 if (ops->compat32) 3759 return (do_unlock_umtx32(td, uap->obj, uap->val)); 3760 #endif 3761 return (do_unlock_umtx(td, uap->obj, uap->val)); 3762 } 3763 #endif /* COMPAT_FREEBSD10 */ 3764 3765 #if !defined(COMPAT_FREEBSD10) 3766 static int 3767 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused, 3768 const struct umtx_copyops *ops __unused) 3769 { 3770 return (EOPNOTSUPP); 3771 } 3772 #endif /* COMPAT_FREEBSD10 */ 3773 3774 static int 3775 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3776 const struct umtx_copyops *ops) 3777 { 3778 struct _umtx_time timeout, *tm_p; 3779 int error; 3780 3781 if (uap->uaddr2 == NULL) 3782 tm_p = NULL; 3783 else { 3784 error = ops->copyin_umtx_time( 3785 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3786 if (error != 0) 3787 return (error); 3788 tm_p = &timeout; 3789 } 3790 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3791 } 3792 3793 static int 3794 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3795 const struct umtx_copyops *ops) 3796 { 3797 struct _umtx_time timeout, *tm_p; 3798 int error; 3799 3800 if (uap->uaddr2 == NULL) 3801 tm_p = NULL; 3802 else { 3803 error = ops->copyin_umtx_time( 3804 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3805 if (error != 0) 3806 return (error); 3807 tm_p = &timeout; 3808 } 3809 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3810 } 3811 3812 static int 3813 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3814 const struct umtx_copyops *ops) 3815 { 3816 struct _umtx_time *tm_p, timeout; 3817 int error; 3818 3819 if (uap->uaddr2 == NULL) 3820 tm_p = NULL; 3821 else { 3822 error = ops->copyin_umtx_time( 3823 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3824 if (error != 0) 3825 return (error); 3826 tm_p = &timeout; 3827 } 3828 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3829 } 3830 3831 static int 3832 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3833 const struct umtx_copyops *ops __unused) 3834 { 3835 3836 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3837 } 3838 3839 #define BATCH_SIZE 128 3840 static int 3841 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3842 { 3843 char *uaddrs[BATCH_SIZE], **upp; 3844 int count, error, i, pos, tocopy; 3845 3846 upp = (char **)uap->obj; 3847 error = 0; 3848 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3849 pos += tocopy) { 3850 tocopy = MIN(count, BATCH_SIZE); 3851 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3852 if (error != 0) 3853 break; 3854 for (i = 0; i < tocopy; ++i) { 3855 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3856 } 3857 maybe_yield(); 3858 } 3859 return (error); 3860 } 3861 3862 static int 3863 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3864 { 3865 uint32_t uaddrs[BATCH_SIZE], *upp; 3866 int count, error, i, pos, tocopy; 3867 3868 upp = (uint32_t *)uap->obj; 3869 error = 0; 3870 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3871 pos += tocopy) { 3872 tocopy = MIN(count, BATCH_SIZE); 3873 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 3874 if (error != 0) 3875 break; 3876 for (i = 0; i < tocopy; ++i) { 3877 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 3878 INT_MAX, 1); 3879 } 3880 maybe_yield(); 3881 } 3882 return (error); 3883 } 3884 3885 static int 3886 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 3887 const struct umtx_copyops *ops) 3888 { 3889 3890 if (ops->compat32) 3891 return (__umtx_op_nwake_private_compat32(td, uap)); 3892 return (__umtx_op_nwake_private_native(td, uap)); 3893 } 3894 3895 static int 3896 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 3897 const struct umtx_copyops *ops __unused) 3898 { 3899 3900 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3901 } 3902 3903 static int 3904 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 3905 const struct umtx_copyops *ops) 3906 { 3907 struct _umtx_time *tm_p, timeout; 3908 int error; 3909 3910 /* Allow a null timespec (wait forever). */ 3911 if (uap->uaddr2 == NULL) 3912 tm_p = NULL; 3913 else { 3914 error = ops->copyin_umtx_time( 3915 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3916 if (error != 0) 3917 return (error); 3918 tm_p = &timeout; 3919 } 3920 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3921 } 3922 3923 static int 3924 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 3925 const struct umtx_copyops *ops __unused) 3926 { 3927 3928 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3929 } 3930 3931 static int 3932 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 3933 const struct umtx_copyops *ops) 3934 { 3935 struct _umtx_time *tm_p, timeout; 3936 int error; 3937 3938 /* Allow a null timespec (wait forever). */ 3939 if (uap->uaddr2 == NULL) 3940 tm_p = NULL; 3941 else { 3942 error = ops->copyin_umtx_time( 3943 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3944 if (error != 0) 3945 return (error); 3946 tm_p = &timeout; 3947 } 3948 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3949 } 3950 3951 static int 3952 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 3953 const struct umtx_copyops *ops __unused) 3954 { 3955 3956 return (do_wake_umutex(td, uap->obj)); 3957 } 3958 3959 static int 3960 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 3961 const struct umtx_copyops *ops __unused) 3962 { 3963 3964 return (do_unlock_umutex(td, uap->obj, false)); 3965 } 3966 3967 static int 3968 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 3969 const struct umtx_copyops *ops __unused) 3970 { 3971 3972 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3973 } 3974 3975 static int 3976 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 3977 const struct umtx_copyops *ops) 3978 { 3979 struct timespec *ts, timeout; 3980 int error; 3981 3982 /* Allow a null timespec (wait forever). */ 3983 if (uap->uaddr2 == NULL) 3984 ts = NULL; 3985 else { 3986 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3987 if (error != 0) 3988 return (error); 3989 ts = &timeout; 3990 } 3991 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3992 } 3993 3994 static int 3995 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 3996 const struct umtx_copyops *ops __unused) 3997 { 3998 3999 return (do_cv_signal(td, uap->obj)); 4000 } 4001 4002 static int 4003 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 4004 const struct umtx_copyops *ops __unused) 4005 { 4006 4007 return (do_cv_broadcast(td, uap->obj)); 4008 } 4009 4010 static int 4011 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 4012 const struct umtx_copyops *ops) 4013 { 4014 struct _umtx_time timeout; 4015 int error; 4016 4017 /* Allow a null timespec (wait forever). */ 4018 if (uap->uaddr2 == NULL) { 4019 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4020 } else { 4021 error = ops->copyin_umtx_time(uap->uaddr2, 4022 (size_t)uap->uaddr1, &timeout); 4023 if (error != 0) 4024 return (error); 4025 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4026 } 4027 return (error); 4028 } 4029 4030 static int 4031 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 4032 const struct umtx_copyops *ops) 4033 { 4034 struct _umtx_time timeout; 4035 int error; 4036 4037 /* Allow a null timespec (wait forever). */ 4038 if (uap->uaddr2 == NULL) { 4039 error = do_rw_wrlock(td, uap->obj, 0); 4040 } else { 4041 error = ops->copyin_umtx_time(uap->uaddr2, 4042 (size_t)uap->uaddr1, &timeout); 4043 if (error != 0) 4044 return (error); 4045 4046 error = do_rw_wrlock(td, uap->obj, &timeout); 4047 } 4048 return (error); 4049 } 4050 4051 static int 4052 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 4053 const struct umtx_copyops *ops __unused) 4054 { 4055 4056 return (do_rw_unlock(td, uap->obj)); 4057 } 4058 4059 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4060 static int 4061 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 4062 const struct umtx_copyops *ops) 4063 { 4064 struct _umtx_time *tm_p, timeout; 4065 int error; 4066 4067 /* Allow a null timespec (wait forever). */ 4068 if (uap->uaddr2 == NULL) 4069 tm_p = NULL; 4070 else { 4071 error = ops->copyin_umtx_time( 4072 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4073 if (error != 0) 4074 return (error); 4075 tm_p = &timeout; 4076 } 4077 return (do_sem_wait(td, uap->obj, tm_p)); 4078 } 4079 4080 static int 4081 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 4082 const struct umtx_copyops *ops __unused) 4083 { 4084 4085 return (do_sem_wake(td, uap->obj)); 4086 } 4087 #endif 4088 4089 static int 4090 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 4091 const struct umtx_copyops *ops __unused) 4092 { 4093 4094 return (do_wake2_umutex(td, uap->obj, uap->val)); 4095 } 4096 4097 static int 4098 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 4099 const struct umtx_copyops *ops) 4100 { 4101 struct _umtx_time *tm_p, timeout; 4102 size_t uasize; 4103 int error; 4104 4105 /* Allow a null timespec (wait forever). */ 4106 if (uap->uaddr2 == NULL) { 4107 uasize = 0; 4108 tm_p = NULL; 4109 } else { 4110 uasize = (size_t)uap->uaddr1; 4111 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 4112 if (error != 0) 4113 return (error); 4114 tm_p = &timeout; 4115 } 4116 error = do_sem2_wait(td, uap->obj, tm_p); 4117 if (error == EINTR && uap->uaddr2 != NULL && 4118 (timeout._flags & UMTX_ABSTIME) == 0 && 4119 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 4120 error = ops->copyout_timeout( 4121 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 4122 uasize - ops->umtx_time_sz, &timeout._timeout); 4123 if (error == 0) { 4124 error = EINTR; 4125 } 4126 } 4127 4128 return (error); 4129 } 4130 4131 static int 4132 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 4133 const struct umtx_copyops *ops __unused) 4134 { 4135 4136 return (do_sem2_wake(td, uap->obj)); 4137 } 4138 4139 #define USHM_OBJ_UMTX(o) \ 4140 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 4141 4142 #define USHMF_REG_LINKED 0x0001 4143 #define USHMF_OBJ_LINKED 0x0002 4144 struct umtx_shm_reg { 4145 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 4146 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 4147 struct umtx_key ushm_key; 4148 struct ucred *ushm_cred; 4149 struct shmfd *ushm_obj; 4150 u_int ushm_refcnt; 4151 u_int ushm_flags; 4152 }; 4153 4154 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 4155 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 4156 4157 static uma_zone_t umtx_shm_reg_zone; 4158 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 4159 static struct mtx umtx_shm_lock; 4160 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 4161 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 4162 4163 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 4164 4165 static void 4166 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 4167 { 4168 struct umtx_shm_reg_head d; 4169 struct umtx_shm_reg *reg, *reg1; 4170 4171 TAILQ_INIT(&d); 4172 mtx_lock(&umtx_shm_lock); 4173 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 4174 mtx_unlock(&umtx_shm_lock); 4175 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 4176 TAILQ_REMOVE(&d, reg, ushm_reg_link); 4177 umtx_shm_free_reg(reg); 4178 } 4179 } 4180 4181 static struct task umtx_shm_reg_delfree_task = 4182 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 4183 4184 static struct umtx_shm_reg * 4185 umtx_shm_find_reg_locked(const struct umtx_key *key) 4186 { 4187 struct umtx_shm_reg *reg; 4188 struct umtx_shm_reg_head *reg_head; 4189 4190 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 4191 mtx_assert(&umtx_shm_lock, MA_OWNED); 4192 reg_head = &umtx_shm_registry[key->hash]; 4193 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 4194 KASSERT(reg->ushm_key.shared, 4195 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 4196 if (reg->ushm_key.info.shared.object == 4197 key->info.shared.object && 4198 reg->ushm_key.info.shared.offset == 4199 key->info.shared.offset) { 4200 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 4201 KASSERT(reg->ushm_refcnt > 0, 4202 ("reg %p refcnt 0 onlist", reg)); 4203 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 4204 ("reg %p not linked", reg)); 4205 reg->ushm_refcnt++; 4206 return (reg); 4207 } 4208 } 4209 return (NULL); 4210 } 4211 4212 static struct umtx_shm_reg * 4213 umtx_shm_find_reg(const struct umtx_key *key) 4214 { 4215 struct umtx_shm_reg *reg; 4216 4217 mtx_lock(&umtx_shm_lock); 4218 reg = umtx_shm_find_reg_locked(key); 4219 mtx_unlock(&umtx_shm_lock); 4220 return (reg); 4221 } 4222 4223 static void 4224 umtx_shm_free_reg(struct umtx_shm_reg *reg) 4225 { 4226 4227 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 4228 crfree(reg->ushm_cred); 4229 shm_drop(reg->ushm_obj); 4230 uma_zfree(umtx_shm_reg_zone, reg); 4231 } 4232 4233 static bool 4234 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 4235 { 4236 bool res; 4237 4238 mtx_assert(&umtx_shm_lock, MA_OWNED); 4239 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 4240 reg->ushm_refcnt--; 4241 res = reg->ushm_refcnt == 0; 4242 if (res || force) { 4243 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 4244 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 4245 reg, ushm_reg_link); 4246 reg->ushm_flags &= ~USHMF_REG_LINKED; 4247 } 4248 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 4249 LIST_REMOVE(reg, ushm_obj_link); 4250 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 4251 } 4252 } 4253 return (res); 4254 } 4255 4256 static void 4257 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 4258 { 4259 vm_object_t object; 4260 bool dofree; 4261 4262 if (force) { 4263 object = reg->ushm_obj->shm_object; 4264 VM_OBJECT_WLOCK(object); 4265 object->flags |= OBJ_UMTXDEAD; 4266 VM_OBJECT_WUNLOCK(object); 4267 } 4268 mtx_lock(&umtx_shm_lock); 4269 dofree = umtx_shm_unref_reg_locked(reg, force); 4270 mtx_unlock(&umtx_shm_lock); 4271 if (dofree) 4272 umtx_shm_free_reg(reg); 4273 } 4274 4275 void 4276 umtx_shm_object_init(vm_object_t object) 4277 { 4278 4279 LIST_INIT(USHM_OBJ_UMTX(object)); 4280 } 4281 4282 void 4283 umtx_shm_object_terminated(vm_object_t object) 4284 { 4285 struct umtx_shm_reg *reg, *reg1; 4286 bool dofree; 4287 4288 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 4289 return; 4290 4291 dofree = false; 4292 mtx_lock(&umtx_shm_lock); 4293 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 4294 if (umtx_shm_unref_reg_locked(reg, true)) { 4295 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 4296 ushm_reg_link); 4297 dofree = true; 4298 } 4299 } 4300 mtx_unlock(&umtx_shm_lock); 4301 if (dofree) 4302 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 4303 } 4304 4305 static int 4306 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4307 struct umtx_shm_reg **res) 4308 { 4309 struct umtx_shm_reg *reg, *reg1; 4310 struct ucred *cred; 4311 int error; 4312 4313 reg = umtx_shm_find_reg(key); 4314 if (reg != NULL) { 4315 *res = reg; 4316 return (0); 4317 } 4318 cred = td->td_ucred; 4319 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4320 return (ENOMEM); 4321 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4322 reg->ushm_refcnt = 1; 4323 bcopy(key, ®->ushm_key, sizeof(*key)); 4324 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); 4325 reg->ushm_cred = crhold(cred); 4326 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4327 if (error != 0) { 4328 umtx_shm_free_reg(reg); 4329 return (error); 4330 } 4331 mtx_lock(&umtx_shm_lock); 4332 reg1 = umtx_shm_find_reg_locked(key); 4333 if (reg1 != NULL) { 4334 mtx_unlock(&umtx_shm_lock); 4335 umtx_shm_free_reg(reg); 4336 *res = reg1; 4337 return (0); 4338 } 4339 reg->ushm_refcnt++; 4340 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4341 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4342 ushm_obj_link); 4343 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 4344 mtx_unlock(&umtx_shm_lock); 4345 *res = reg; 4346 return (0); 4347 } 4348 4349 static int 4350 umtx_shm_alive(struct thread *td, void *addr) 4351 { 4352 vm_map_t map; 4353 vm_map_entry_t entry; 4354 vm_object_t object; 4355 vm_pindex_t pindex; 4356 vm_prot_t prot; 4357 int res, ret; 4358 boolean_t wired; 4359 4360 map = &td->td_proc->p_vmspace->vm_map; 4361 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4362 &object, &pindex, &prot, &wired); 4363 if (res != KERN_SUCCESS) 4364 return (EFAULT); 4365 if (object == NULL) 4366 ret = EINVAL; 4367 else 4368 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4369 vm_map_lookup_done(map, entry); 4370 return (ret); 4371 } 4372 4373 static void 4374 umtx_shm_init(void) 4375 { 4376 int i; 4377 4378 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4379 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4380 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4381 for (i = 0; i < nitems(umtx_shm_registry); i++) 4382 TAILQ_INIT(&umtx_shm_registry[i]); 4383 } 4384 4385 static int 4386 umtx_shm(struct thread *td, void *addr, u_int flags) 4387 { 4388 struct umtx_key key; 4389 struct umtx_shm_reg *reg; 4390 struct file *fp; 4391 int error, fd; 4392 4393 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4394 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4395 return (EINVAL); 4396 if ((flags & UMTX_SHM_ALIVE) != 0) 4397 return (umtx_shm_alive(td, addr)); 4398 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4399 if (error != 0) 4400 return (error); 4401 KASSERT(key.shared == 1, ("non-shared key")); 4402 if ((flags & UMTX_SHM_CREAT) != 0) { 4403 error = umtx_shm_create_reg(td, &key, ®); 4404 } else { 4405 reg = umtx_shm_find_reg(&key); 4406 if (reg == NULL) 4407 error = ESRCH; 4408 } 4409 umtx_key_release(&key); 4410 if (error != 0) 4411 return (error); 4412 KASSERT(reg != NULL, ("no reg")); 4413 if ((flags & UMTX_SHM_DESTROY) != 0) { 4414 umtx_shm_unref_reg(reg, true); 4415 } else { 4416 #if 0 4417 #ifdef MAC 4418 error = mac_posixshm_check_open(td->td_ucred, 4419 reg->ushm_obj, FFLAGS(O_RDWR)); 4420 if (error == 0) 4421 #endif 4422 error = shm_access(reg->ushm_obj, td->td_ucred, 4423 FFLAGS(O_RDWR)); 4424 if (error == 0) 4425 #endif 4426 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4427 if (error == 0) { 4428 shm_hold(reg->ushm_obj); 4429 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4430 &shm_ops); 4431 td->td_retval[0] = fd; 4432 fdrop(fp, td); 4433 } 4434 } 4435 umtx_shm_unref_reg(reg, false); 4436 return (error); 4437 } 4438 4439 static int 4440 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4441 const struct umtx_copyops *ops __unused) 4442 { 4443 4444 return (umtx_shm(td, uap->uaddr1, uap->val)); 4445 } 4446 4447 static int 4448 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4449 const struct umtx_copyops *ops) 4450 { 4451 struct umtx_robust_lists_params rb; 4452 int error; 4453 4454 if (ops->compat32) { 4455 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4456 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4457 td->td_rb_inact != 0)) 4458 return (EBUSY); 4459 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4460 return (EBUSY); 4461 } 4462 4463 bzero(&rb, sizeof(rb)); 4464 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4465 if (error != 0) 4466 return (error); 4467 4468 if (ops->compat32) 4469 td->td_pflags2 |= TDP2_COMPAT32RB; 4470 4471 td->td_rb_list = rb.robust_list_offset; 4472 td->td_rbp_list = rb.robust_priv_list_offset; 4473 td->td_rb_inact = rb.robust_inact_offset; 4474 return (0); 4475 } 4476 4477 #if defined(__i386__) || defined(__amd64__) 4478 /* 4479 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4480 * 32-bit time_t there. Other architectures just need the i386 definitions 4481 * along with their standard compat32. 4482 */ 4483 struct timespecx32 { 4484 int64_t tv_sec; 4485 int32_t tv_nsec; 4486 }; 4487 4488 struct umtx_timex32 { 4489 struct timespecx32 _timeout; 4490 uint32_t _flags; 4491 uint32_t _clockid; 4492 }; 4493 4494 #ifndef __i386__ 4495 #define timespeci386 timespec32 4496 #define umtx_timei386 umtx_time32 4497 #endif 4498 #else /* !__i386__ && !__amd64__ */ 4499 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4500 struct timespeci386 { 4501 int32_t tv_sec; 4502 int32_t tv_nsec; 4503 }; 4504 4505 struct umtx_timei386 { 4506 struct timespeci386 _timeout; 4507 uint32_t _flags; 4508 uint32_t _clockid; 4509 }; 4510 4511 #if defined(__LP64__) 4512 #define timespecx32 timespec32 4513 #define umtx_timex32 umtx_time32 4514 #endif 4515 #endif 4516 4517 static int 4518 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4519 struct umtx_robust_lists_params *rbp) 4520 { 4521 struct umtx_robust_lists_params_compat32 rb32; 4522 int error; 4523 4524 if (size > sizeof(rb32)) 4525 return (EINVAL); 4526 bzero(&rb32, sizeof(rb32)); 4527 error = copyin(uaddr, &rb32, size); 4528 if (error != 0) 4529 return (error); 4530 CP(rb32, *rbp, robust_list_offset); 4531 CP(rb32, *rbp, robust_priv_list_offset); 4532 CP(rb32, *rbp, robust_inact_offset); 4533 return (0); 4534 } 4535 4536 #ifndef __i386__ 4537 static inline int 4538 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4539 { 4540 struct timespeci386 ts32; 4541 int error; 4542 4543 error = copyin(uaddr, &ts32, sizeof(ts32)); 4544 if (error == 0) { 4545 if (ts32.tv_sec < 0 || 4546 ts32.tv_nsec >= 1000000000 || 4547 ts32.tv_nsec < 0) 4548 error = EINVAL; 4549 else { 4550 CP(ts32, *tsp, tv_sec); 4551 CP(ts32, *tsp, tv_nsec); 4552 } 4553 } 4554 return (error); 4555 } 4556 4557 static inline int 4558 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4559 { 4560 struct umtx_timei386 t32; 4561 int error; 4562 4563 t32._clockid = CLOCK_REALTIME; 4564 t32._flags = 0; 4565 if (size <= sizeof(t32._timeout)) 4566 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4567 else 4568 error = copyin(uaddr, &t32, sizeof(t32)); 4569 if (error != 0) 4570 return (error); 4571 if (t32._timeout.tv_sec < 0 || 4572 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4573 return (EINVAL); 4574 TS_CP(t32, *tp, _timeout); 4575 CP(t32, *tp, _flags); 4576 CP(t32, *tp, _clockid); 4577 return (0); 4578 } 4579 4580 static int 4581 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4582 { 4583 struct timespeci386 remain32 = { 4584 .tv_sec = tsp->tv_sec, 4585 .tv_nsec = tsp->tv_nsec, 4586 }; 4587 4588 /* 4589 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4590 * and we're only called if sz >= sizeof(timespec) as supplied in the 4591 * copyops. 4592 */ 4593 KASSERT(sz >= sizeof(remain32), 4594 ("umtx_copyops specifies incorrect sizes")); 4595 4596 return (copyout(&remain32, uaddr, sizeof(remain32))); 4597 } 4598 #endif /* !__i386__ */ 4599 4600 #if defined(__i386__) || defined(__LP64__) 4601 static inline int 4602 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4603 { 4604 struct timespecx32 ts32; 4605 int error; 4606 4607 error = copyin(uaddr, &ts32, sizeof(ts32)); 4608 if (error == 0) { 4609 if (ts32.tv_sec < 0 || 4610 ts32.tv_nsec >= 1000000000 || 4611 ts32.tv_nsec < 0) 4612 error = EINVAL; 4613 else { 4614 CP(ts32, *tsp, tv_sec); 4615 CP(ts32, *tsp, tv_nsec); 4616 } 4617 } 4618 return (error); 4619 } 4620 4621 static inline int 4622 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4623 { 4624 struct umtx_timex32 t32; 4625 int error; 4626 4627 t32._clockid = CLOCK_REALTIME; 4628 t32._flags = 0; 4629 if (size <= sizeof(t32._timeout)) 4630 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4631 else 4632 error = copyin(uaddr, &t32, sizeof(t32)); 4633 if (error != 0) 4634 return (error); 4635 if (t32._timeout.tv_sec < 0 || 4636 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4637 return (EINVAL); 4638 TS_CP(t32, *tp, _timeout); 4639 CP(t32, *tp, _flags); 4640 CP(t32, *tp, _clockid); 4641 return (0); 4642 } 4643 4644 static int 4645 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4646 { 4647 struct timespecx32 remain32 = { 4648 .tv_sec = tsp->tv_sec, 4649 .tv_nsec = tsp->tv_nsec, 4650 }; 4651 4652 /* 4653 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4654 * and we're only called if sz >= sizeof(timespec) as supplied in the 4655 * copyops. 4656 */ 4657 KASSERT(sz >= sizeof(remain32), 4658 ("umtx_copyops specifies incorrect sizes")); 4659 4660 return (copyout(&remain32, uaddr, sizeof(remain32))); 4661 } 4662 #endif /* __i386__ || __LP64__ */ 4663 4664 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4665 const struct umtx_copyops *umtx_ops); 4666 4667 static const _umtx_op_func op_table[] = { 4668 #ifdef COMPAT_FREEBSD10 4669 [UMTX_OP_LOCK] = __umtx_op_lock_umtx, 4670 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx, 4671 #else 4672 [UMTX_OP_LOCK] = __umtx_op_unimpl, 4673 [UMTX_OP_UNLOCK] = __umtx_op_unimpl, 4674 #endif 4675 [UMTX_OP_WAIT] = __umtx_op_wait, 4676 [UMTX_OP_WAKE] = __umtx_op_wake, 4677 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4678 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4679 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4680 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4681 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4682 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4683 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4684 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4685 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4686 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4687 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4688 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4689 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4690 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4691 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4692 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4693 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4694 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4695 #else 4696 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4697 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4698 #endif 4699 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4700 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4701 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4702 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4703 [UMTX_OP_SHM] = __umtx_op_shm, 4704 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4705 }; 4706 4707 static const struct umtx_copyops umtx_native_ops = { 4708 .copyin_timeout = umtx_copyin_timeout, 4709 .copyin_umtx_time = umtx_copyin_umtx_time, 4710 .copyin_robust_lists = umtx_copyin_robust_lists, 4711 .copyout_timeout = umtx_copyout_timeout, 4712 .timespec_sz = sizeof(struct timespec), 4713 .umtx_time_sz = sizeof(struct _umtx_time), 4714 }; 4715 4716 #ifndef __i386__ 4717 static const struct umtx_copyops umtx_native_opsi386 = { 4718 .copyin_timeout = umtx_copyin_timeouti386, 4719 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4720 .copyin_robust_lists = umtx_copyin_robust_lists32, 4721 .copyout_timeout = umtx_copyout_timeouti386, 4722 .timespec_sz = sizeof(struct timespeci386), 4723 .umtx_time_sz = sizeof(struct umtx_timei386), 4724 .compat32 = true, 4725 }; 4726 #endif 4727 4728 #if defined(__i386__) || defined(__LP64__) 4729 /* i386 can emulate other 32-bit archs, too! */ 4730 static const struct umtx_copyops umtx_native_opsx32 = { 4731 .copyin_timeout = umtx_copyin_timeoutx32, 4732 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4733 .copyin_robust_lists = umtx_copyin_robust_lists32, 4734 .copyout_timeout = umtx_copyout_timeoutx32, 4735 .timespec_sz = sizeof(struct timespecx32), 4736 .umtx_time_sz = sizeof(struct umtx_timex32), 4737 .compat32 = true, 4738 }; 4739 4740 #ifdef COMPAT_FREEBSD32 4741 #ifdef __amd64__ 4742 #define umtx_native_ops32 umtx_native_opsi386 4743 #else 4744 #define umtx_native_ops32 umtx_native_opsx32 4745 #endif 4746 #endif /* COMPAT_FREEBSD32 */ 4747 #endif /* __i386__ || __LP64__ */ 4748 4749 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4750 4751 static int 4752 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4753 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4754 { 4755 struct _umtx_op_args uap = { 4756 .obj = obj, 4757 .op = op & ~UMTX_OP__FLAGS, 4758 .val = val, 4759 .uaddr1 = uaddr1, 4760 .uaddr2 = uaddr2 4761 }; 4762 4763 if ((uap.op >= nitems(op_table))) 4764 return (EINVAL); 4765 return ((*op_table[uap.op])(td, &uap, ops)); 4766 } 4767 4768 int 4769 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4770 { 4771 static const struct umtx_copyops *umtx_ops; 4772 4773 umtx_ops = &umtx_native_ops; 4774 #ifdef __LP64__ 4775 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 4776 if ((uap->op & UMTX_OP__I386) != 0) 4777 umtx_ops = &umtx_native_opsi386; 4778 else 4779 umtx_ops = &umtx_native_opsx32; 4780 } 4781 #elif !defined(__i386__) 4782 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 4783 if ((uap->op & UMTX_OP__I386) != 0) 4784 umtx_ops = &umtx_native_opsi386; 4785 #else 4786 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 4787 if ((uap->op & UMTX_OP__32BIT) != 0) 4788 umtx_ops = &umtx_native_opsx32; 4789 #endif 4790 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4791 uap->uaddr2, umtx_ops)); 4792 } 4793 4794 #ifdef COMPAT_FREEBSD32 4795 #ifdef COMPAT_FREEBSD10 4796 int 4797 freebsd10_freebsd32_umtx_lock(struct thread *td, 4798 struct freebsd10_freebsd32_umtx_lock_args *uap) 4799 { 4800 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 4801 } 4802 4803 int 4804 freebsd10_freebsd32_umtx_unlock(struct thread *td, 4805 struct freebsd10_freebsd32_umtx_unlock_args *uap) 4806 { 4807 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 4808 } 4809 #endif /* COMPAT_FREEBSD10 */ 4810 4811 int 4812 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4813 { 4814 4815 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr, 4816 uap->uaddr2, &umtx_native_ops32)); 4817 } 4818 #endif /* COMPAT_FREEBSD32 */ 4819 4820 void 4821 umtx_thread_init(struct thread *td) 4822 { 4823 4824 td->td_umtxq = umtxq_alloc(); 4825 td->td_umtxq->uq_thread = td; 4826 } 4827 4828 void 4829 umtx_thread_fini(struct thread *td) 4830 { 4831 4832 umtxq_free(td->td_umtxq); 4833 } 4834 4835 /* 4836 * It will be called when new thread is created, e.g fork(). 4837 */ 4838 void 4839 umtx_thread_alloc(struct thread *td) 4840 { 4841 struct umtx_q *uq; 4842 4843 uq = td->td_umtxq; 4844 uq->uq_inherited_pri = PRI_MAX; 4845 4846 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4847 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4848 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4849 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4850 } 4851 4852 /* 4853 * exec() hook. 4854 * 4855 * Clear robust lists for all process' threads, not delaying the 4856 * cleanup to thread exit, since the relevant address space is 4857 * destroyed right now. 4858 */ 4859 void 4860 umtx_exec(struct proc *p) 4861 { 4862 struct thread *td; 4863 4864 KASSERT(p == curproc, ("need curproc")); 4865 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4866 (p->p_flag & P_STOPPED_SINGLE) != 0, 4867 ("curproc must be single-threaded")); 4868 /* 4869 * There is no need to lock the list as only this thread can be 4870 * running. 4871 */ 4872 FOREACH_THREAD_IN_PROC(p, td) { 4873 KASSERT(td == curthread || 4874 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4875 ("running thread %p %p", p, td)); 4876 umtx_thread_cleanup(td); 4877 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4878 } 4879 } 4880 4881 /* 4882 * thread exit hook. 4883 */ 4884 void 4885 umtx_thread_exit(struct thread *td) 4886 { 4887 4888 umtx_thread_cleanup(td); 4889 } 4890 4891 static int 4892 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 4893 { 4894 u_long res1; 4895 uint32_t res32; 4896 int error; 4897 4898 if (compat32) { 4899 error = fueword32((void *)ptr, &res32); 4900 if (error == 0) 4901 res1 = res32; 4902 } else { 4903 error = fueword((void *)ptr, &res1); 4904 } 4905 if (error == 0) 4906 *res = res1; 4907 else 4908 error = EFAULT; 4909 return (error); 4910 } 4911 4912 static void 4913 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 4914 bool compat32) 4915 { 4916 struct umutex32 m32; 4917 4918 if (compat32) { 4919 memcpy(&m32, m, sizeof(m32)); 4920 *rb_list = m32.m_rb_lnk; 4921 } else { 4922 *rb_list = m->m_rb_lnk; 4923 } 4924 } 4925 4926 static int 4927 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 4928 bool compat32) 4929 { 4930 struct umutex m; 4931 int error; 4932 4933 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4934 error = copyin((void *)rbp, &m, sizeof(m)); 4935 if (error != 0) 4936 return (error); 4937 if (rb_list != NULL) 4938 umtx_read_rb_list(td, &m, rb_list, compat32); 4939 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4940 return (EINVAL); 4941 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4942 /* inact is cleared after unlock, allow the inconsistency */ 4943 return (inact ? 0 : EINVAL); 4944 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4945 } 4946 4947 static void 4948 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4949 const char *name, bool compat32) 4950 { 4951 int error, i; 4952 uintptr_t rbp; 4953 bool inact; 4954 4955 if (rb_list == 0) 4956 return; 4957 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 4958 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4959 if (rbp == *rb_inact) { 4960 inact = true; 4961 *rb_inact = 0; 4962 } else 4963 inact = false; 4964 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 4965 } 4966 if (i == umtx_max_rb && umtx_verbose_rb) { 4967 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4968 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4969 } 4970 if (error != 0 && umtx_verbose_rb) { 4971 uprintf("comm %s pid %d: handling %srb error %d\n", 4972 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4973 } 4974 } 4975 4976 /* 4977 * Clean up umtx data. 4978 */ 4979 static void 4980 umtx_thread_cleanup(struct thread *td) 4981 { 4982 struct umtx_q *uq; 4983 struct umtx_pi *pi; 4984 uintptr_t rb_inact; 4985 bool compat32; 4986 4987 /* 4988 * Disown pi mutexes. 4989 */ 4990 uq = td->td_umtxq; 4991 if (uq != NULL) { 4992 if (uq->uq_inherited_pri != PRI_MAX || 4993 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 4994 mtx_lock(&umtx_lock); 4995 uq->uq_inherited_pri = PRI_MAX; 4996 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4997 pi->pi_owner = NULL; 4998 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4999 } 5000 mtx_unlock(&umtx_lock); 5001 } 5002 sched_lend_user_prio_cond(td, PRI_MAX); 5003 } 5004 5005 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 5006 td->td_pflags2 &= ~TDP2_COMPAT32RB; 5007 5008 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 5009 return; 5010 5011 /* 5012 * Handle terminated robust mutexes. Must be done after 5013 * robust pi disown, otherwise unlock could see unowned 5014 * entries. 5015 */ 5016 rb_inact = td->td_rb_inact; 5017 if (rb_inact != 0) 5018 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 5019 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 5020 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 5021 if (rb_inact != 0) 5022 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 5023 } 5024