1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 #include <sys/umtxvar.h> 67 68 #include <security/mac/mac_framework.h> 69 70 #include <vm/vm.h> 71 #include <vm/vm_param.h> 72 #include <vm/pmap.h> 73 #include <vm/vm_map.h> 74 #include <vm/vm_object.h> 75 76 #include <machine/atomic.h> 77 #include <machine/cpu.h> 78 79 #include <compat/freebsd32/freebsd32.h> 80 #ifdef COMPAT_FREEBSD32 81 #include <compat/freebsd32/freebsd32_proto.h> 82 #endif 83 84 #define _UMUTEX_TRY 1 85 #define _UMUTEX_WAIT 2 86 87 #ifdef UMTX_PROFILING 88 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 89 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 90 #endif 91 92 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 93 94 /* 95 * Don't propagate time-sharing priority, there is a security reason, 96 * a user can simply introduce PI-mutex, let thread A lock the mutex, 97 * and let another thread B block on the mutex, because B is 98 * sleeping, its priority will be boosted, this causes A's priority to 99 * be boosted via priority propagating too and will never be lowered even 100 * if it is using 100%CPU, this is unfair to other processes. 101 */ 102 103 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 104 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 105 PRI_MAX_TIMESHARE : (td)->td_user_pri) 106 107 #define GOLDEN_RATIO_PRIME 2654404609U 108 #ifndef UMTX_CHAINS 109 #define UMTX_CHAINS 512 110 #endif 111 #define UMTX_SHIFTS (__WORD_BIT - 9) 112 113 #define GET_SHARE(flags) \ 114 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 115 116 #define BUSY_SPINS 200 117 118 struct umtx_copyops { 119 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 120 int (*copyin_umtx_time)(const void *uaddr, size_t size, 121 struct _umtx_time *tp); 122 int (*copyin_robust_lists)(const void *uaddr, size_t size, 123 struct umtx_robust_lists_params *rbp); 124 int (*copyout_timeout)(void *uaddr, size_t size, 125 struct timespec *tsp); 126 const size_t timespec_sz; 127 const size_t umtx_time_sz; 128 const bool compat32; 129 }; 130 131 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 132 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 133 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 134 135 int umtx_shm_vnobj_persistent = 0; 136 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 137 &umtx_shm_vnobj_persistent, 0, 138 "False forces destruction of umtx attached to file, on last close"); 139 static int umtx_max_rb = 1000; 140 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 141 &umtx_max_rb, 0, 142 "Maximum number of robust mutexes allowed for each thread"); 143 144 static uma_zone_t umtx_pi_zone; 145 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 146 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 147 static int umtx_pi_allocated; 148 149 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 150 "umtx debug"); 151 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 152 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 153 static int umtx_verbose_rb = 1; 154 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 155 &umtx_verbose_rb, 0, 156 ""); 157 158 #ifdef UMTX_PROFILING 159 static long max_length; 160 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 161 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 162 "umtx chain stats"); 163 #endif 164 165 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 166 const struct _umtx_time *umtxtime); 167 static int umtx_abs_timeout_gethz(struct umtx_abs_timeout *timo); 168 static inline void umtx_abs_timeout_update(struct umtx_abs_timeout *timo); 169 170 static void umtx_shm_init(void); 171 static void umtxq_sysinit(void *); 172 static void umtxq_hash(struct umtx_key *key); 173 static struct umtx_pi *umtx_pi_alloc(int); 174 static void umtx_pi_free(struct umtx_pi *pi); 175 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 176 bool rb); 177 static void umtx_thread_cleanup(struct thread *td); 178 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 179 180 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 181 182 static struct mtx umtx_lock; 183 184 #ifdef UMTX_PROFILING 185 static void 186 umtx_init_profiling(void) 187 { 188 struct sysctl_oid *chain_oid; 189 char chain_name[10]; 190 int i; 191 192 for (i = 0; i < UMTX_CHAINS; ++i) { 193 snprintf(chain_name, sizeof(chain_name), "%d", i); 194 chain_oid = SYSCTL_ADD_NODE(NULL, 195 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 196 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 197 "umtx hash stats"); 198 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 199 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 200 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 201 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 202 } 203 } 204 205 static int 206 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 207 { 208 char buf[512]; 209 struct sbuf sb; 210 struct umtxq_chain *uc; 211 u_int fract, i, j, tot, whole; 212 u_int sf0, sf1, sf2, sf3, sf4; 213 u_int si0, si1, si2, si3, si4; 214 u_int sw0, sw1, sw2, sw3, sw4; 215 216 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 217 for (i = 0; i < 2; i++) { 218 tot = 0; 219 for (j = 0; j < UMTX_CHAINS; ++j) { 220 uc = &umtxq_chains[i][j]; 221 mtx_lock(&uc->uc_lock); 222 tot += uc->max_length; 223 mtx_unlock(&uc->uc_lock); 224 } 225 if (tot == 0) 226 sbuf_printf(&sb, "%u) Empty ", i); 227 else { 228 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 229 si0 = si1 = si2 = si3 = si4 = 0; 230 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 231 for (j = 0; j < UMTX_CHAINS; j++) { 232 uc = &umtxq_chains[i][j]; 233 mtx_lock(&uc->uc_lock); 234 whole = uc->max_length * 100; 235 mtx_unlock(&uc->uc_lock); 236 fract = (whole % tot) * 100; 237 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 238 sf0 = fract; 239 si0 = j; 240 sw0 = whole; 241 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 242 sf1)) { 243 sf1 = fract; 244 si1 = j; 245 sw1 = whole; 246 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 247 sf2)) { 248 sf2 = fract; 249 si2 = j; 250 sw2 = whole; 251 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 252 sf3)) { 253 sf3 = fract; 254 si3 = j; 255 sw3 = whole; 256 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 257 sf4)) { 258 sf4 = fract; 259 si4 = j; 260 sw4 = whole; 261 } 262 } 263 sbuf_printf(&sb, "queue %u:\n", i); 264 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 265 sf0 / tot, si0); 266 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 267 sf1 / tot, si1); 268 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 269 sf2 / tot, si2); 270 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 271 sf3 / tot, si3); 272 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 273 sf4 / tot, si4); 274 } 275 } 276 sbuf_trim(&sb); 277 sbuf_finish(&sb); 278 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 279 sbuf_delete(&sb); 280 return (0); 281 } 282 283 static int 284 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 285 { 286 struct umtxq_chain *uc; 287 u_int i, j; 288 int clear, error; 289 290 clear = 0; 291 error = sysctl_handle_int(oidp, &clear, 0, req); 292 if (error != 0 || req->newptr == NULL) 293 return (error); 294 295 if (clear != 0) { 296 for (i = 0; i < 2; ++i) { 297 for (j = 0; j < UMTX_CHAINS; ++j) { 298 uc = &umtxq_chains[i][j]; 299 mtx_lock(&uc->uc_lock); 300 uc->length = 0; 301 uc->max_length = 0; 302 mtx_unlock(&uc->uc_lock); 303 } 304 } 305 } 306 return (0); 307 } 308 309 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 310 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 311 sysctl_debug_umtx_chains_clear, "I", 312 "Clear umtx chains statistics"); 313 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 314 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 315 sysctl_debug_umtx_chains_peaks, "A", 316 "Highest peaks in chains max length"); 317 #endif 318 319 static void 320 umtxq_sysinit(void *arg __unused) 321 { 322 int i, j; 323 324 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 325 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 326 for (i = 0; i < 2; ++i) { 327 for (j = 0; j < UMTX_CHAINS; ++j) { 328 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 329 MTX_DEF | MTX_DUPOK); 330 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 331 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 332 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 333 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 334 umtxq_chains[i][j].uc_busy = 0; 335 umtxq_chains[i][j].uc_waiters = 0; 336 #ifdef UMTX_PROFILING 337 umtxq_chains[i][j].length = 0; 338 umtxq_chains[i][j].max_length = 0; 339 #endif 340 } 341 } 342 #ifdef UMTX_PROFILING 343 umtx_init_profiling(); 344 #endif 345 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 346 umtx_shm_init(); 347 } 348 349 struct umtx_q * 350 umtxq_alloc(void) 351 { 352 struct umtx_q *uq; 353 354 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 355 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 356 M_WAITOK | M_ZERO); 357 TAILQ_INIT(&uq->uq_spare_queue->head); 358 TAILQ_INIT(&uq->uq_pi_contested); 359 uq->uq_inherited_pri = PRI_MAX; 360 return (uq); 361 } 362 363 void 364 umtxq_free(struct umtx_q *uq) 365 { 366 367 MPASS(uq->uq_spare_queue != NULL); 368 free(uq->uq_spare_queue, M_UMTX); 369 free(uq, M_UMTX); 370 } 371 372 static inline void 373 umtxq_hash(struct umtx_key *key) 374 { 375 unsigned n; 376 377 n = (uintptr_t)key->info.both.a + key->info.both.b; 378 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 379 } 380 381 struct umtxq_chain * 382 umtxq_getchain(struct umtx_key *key) 383 { 384 385 if (key->type <= TYPE_SEM) 386 return (&umtxq_chains[1][key->hash]); 387 return (&umtxq_chains[0][key->hash]); 388 } 389 390 /* 391 * Set chain to busy state when following operation 392 * may be blocked (kernel mutex can not be used). 393 */ 394 void 395 umtxq_busy(struct umtx_key *key) 396 { 397 struct umtxq_chain *uc; 398 399 uc = umtxq_getchain(key); 400 mtx_assert(&uc->uc_lock, MA_OWNED); 401 if (uc->uc_busy) { 402 #ifdef SMP 403 if (smp_cpus > 1) { 404 int count = BUSY_SPINS; 405 if (count > 0) { 406 umtxq_unlock(key); 407 while (uc->uc_busy && --count > 0) 408 cpu_spinwait(); 409 umtxq_lock(key); 410 } 411 } 412 #endif 413 while (uc->uc_busy) { 414 uc->uc_waiters++; 415 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 416 uc->uc_waiters--; 417 } 418 } 419 uc->uc_busy = 1; 420 } 421 422 /* 423 * Unbusy a chain. 424 */ 425 void 426 umtxq_unbusy(struct umtx_key *key) 427 { 428 struct umtxq_chain *uc; 429 430 uc = umtxq_getchain(key); 431 mtx_assert(&uc->uc_lock, MA_OWNED); 432 KASSERT(uc->uc_busy != 0, ("not busy")); 433 uc->uc_busy = 0; 434 if (uc->uc_waiters) 435 wakeup_one(uc); 436 } 437 438 static inline void 439 umtxq_unbusy_unlocked(struct umtx_key *key) 440 { 441 442 umtxq_lock(key); 443 umtxq_unbusy(key); 444 umtxq_unlock(key); 445 } 446 447 static struct umtxq_queue * 448 umtxq_queue_lookup(struct umtx_key *key, int q) 449 { 450 struct umtxq_queue *uh; 451 struct umtxq_chain *uc; 452 453 uc = umtxq_getchain(key); 454 UMTXQ_LOCKED_ASSERT(uc); 455 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 456 if (umtx_key_match(&uh->key, key)) 457 return (uh); 458 } 459 460 return (NULL); 461 } 462 463 void 464 umtxq_insert_queue(struct umtx_q *uq, int q) 465 { 466 struct umtxq_queue *uh; 467 struct umtxq_chain *uc; 468 469 uc = umtxq_getchain(&uq->uq_key); 470 UMTXQ_LOCKED_ASSERT(uc); 471 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 472 uh = umtxq_queue_lookup(&uq->uq_key, q); 473 if (uh != NULL) { 474 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 475 } else { 476 uh = uq->uq_spare_queue; 477 uh->key = uq->uq_key; 478 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 479 #ifdef UMTX_PROFILING 480 uc->length++; 481 if (uc->length > uc->max_length) { 482 uc->max_length = uc->length; 483 if (uc->max_length > max_length) 484 max_length = uc->max_length; 485 } 486 #endif 487 } 488 uq->uq_spare_queue = NULL; 489 490 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 491 uh->length++; 492 uq->uq_flags |= UQF_UMTXQ; 493 uq->uq_cur_queue = uh; 494 return; 495 } 496 497 void 498 umtxq_remove_queue(struct umtx_q *uq, int q) 499 { 500 struct umtxq_chain *uc; 501 struct umtxq_queue *uh; 502 503 uc = umtxq_getchain(&uq->uq_key); 504 UMTXQ_LOCKED_ASSERT(uc); 505 if (uq->uq_flags & UQF_UMTXQ) { 506 uh = uq->uq_cur_queue; 507 TAILQ_REMOVE(&uh->head, uq, uq_link); 508 uh->length--; 509 uq->uq_flags &= ~UQF_UMTXQ; 510 if (TAILQ_EMPTY(&uh->head)) { 511 KASSERT(uh->length == 0, 512 ("inconsistent umtxq_queue length")); 513 #ifdef UMTX_PROFILING 514 uc->length--; 515 #endif 516 LIST_REMOVE(uh, link); 517 } else { 518 uh = LIST_FIRST(&uc->uc_spare_queue); 519 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 520 LIST_REMOVE(uh, link); 521 } 522 uq->uq_spare_queue = uh; 523 uq->uq_cur_queue = NULL; 524 } 525 } 526 527 /* 528 * Check if there are multiple waiters 529 */ 530 int 531 umtxq_count(struct umtx_key *key) 532 { 533 struct umtxq_queue *uh; 534 535 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 536 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 537 if (uh != NULL) 538 return (uh->length); 539 return (0); 540 } 541 542 /* 543 * Check if there are multiple PI waiters and returns first 544 * waiter. 545 */ 546 static int 547 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 548 { 549 struct umtxq_queue *uh; 550 551 *first = NULL; 552 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 553 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 554 if (uh != NULL) { 555 *first = TAILQ_FIRST(&uh->head); 556 return (uh->length); 557 } 558 return (0); 559 } 560 561 /* 562 * Wake up threads waiting on an userland object by a bit mask. 563 */ 564 int 565 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset) 566 { 567 struct umtxq_queue *uh; 568 struct umtx_q *uq, *uq_temp; 569 int ret; 570 571 ret = 0; 572 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 573 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 574 if (uh == NULL) 575 return (0); 576 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 577 if ((uq->uq_bitset & bitset) == 0) 578 continue; 579 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE); 580 wakeup_one(uq); 581 if (++ret >= n_wake) 582 break; 583 } 584 return (ret); 585 } 586 587 /* 588 * Wake up threads waiting on an userland object. 589 */ 590 591 static int 592 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 593 { 594 struct umtxq_queue *uh; 595 struct umtx_q *uq; 596 int ret; 597 598 ret = 0; 599 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 600 uh = umtxq_queue_lookup(key, q); 601 if (uh != NULL) { 602 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 603 umtxq_remove_queue(uq, q); 604 wakeup(uq); 605 if (++ret >= n_wake) 606 return (ret); 607 } 608 } 609 return (ret); 610 } 611 612 /* 613 * Wake up specified thread. 614 */ 615 static inline void 616 umtxq_signal_thread(struct umtx_q *uq) 617 { 618 619 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 620 umtxq_remove(uq); 621 wakeup(uq); 622 } 623 624 /* 625 * Wake up a maximum of n_wake threads that are waiting on an userland 626 * object identified by key. The remaining threads are removed from queue 627 * identified by key and added to the queue identified by key2 (requeued). 628 * The n_requeue specifies an upper limit on the number of threads that 629 * are requeued to the second queue. 630 */ 631 int 632 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2, 633 int n_requeue) 634 { 635 struct umtxq_queue *uh, *uh2; 636 struct umtx_q *uq, *uq_temp; 637 int ret; 638 639 ret = 0; 640 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 641 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2)); 642 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 643 uh2 = umtxq_queue_lookup(key2, UMTX_SHARED_QUEUE); 644 if (uh == NULL) 645 return (0); 646 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 647 if (++ret <= n_wake) { 648 umtxq_remove(uq); 649 wakeup_one(uq); 650 } else { 651 umtxq_remove(uq); 652 uq->uq_key = *key2; 653 umtxq_insert(uq); 654 if (ret - n_wake == n_requeue) 655 break; 656 } 657 } 658 return (ret); 659 } 660 661 static inline int 662 tstohz(const struct timespec *tsp) 663 { 664 struct timeval tv; 665 666 TIMESPEC_TO_TIMEVAL(&tv, tsp); 667 return tvtohz(&tv); 668 } 669 670 void 671 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid, 672 int absolute, const struct timespec *timeout) 673 { 674 675 timo->clockid = clockid; 676 if (!absolute) { 677 timo->is_abs_real = false; 678 umtx_abs_timeout_update(timo); 679 timespecadd(&timo->cur, timeout, &timo->end); 680 } else { 681 timo->end = *timeout; 682 timo->is_abs_real = clockid == CLOCK_REALTIME || 683 clockid == CLOCK_REALTIME_FAST || 684 clockid == CLOCK_REALTIME_PRECISE; 685 /* 686 * If is_abs_real, umtxq_sleep will read the clock 687 * after setting td_rtcgen; otherwise, read it here. 688 */ 689 if (!timo->is_abs_real) { 690 umtx_abs_timeout_update(timo); 691 } 692 } 693 } 694 695 static void 696 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 697 const struct _umtx_time *umtxtime) 698 { 699 700 umtx_abs_timeout_init(timo, umtxtime->_clockid, 701 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 702 } 703 704 static void 705 umtx_abs_timeout_update(struct umtx_abs_timeout *timo) 706 { 707 708 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 709 } 710 711 static int 712 umtx_abs_timeout_gethz(struct umtx_abs_timeout *timo) 713 { 714 struct timespec tts; 715 716 if (timespeccmp(&timo->end, &timo->cur, <=)) 717 return (-1); 718 timespecsub(&timo->end, &timo->cur, &tts); 719 return (tstohz(&tts)); 720 } 721 722 static uint32_t 723 umtx_unlock_val(uint32_t flags, bool rb) 724 { 725 726 if (rb) 727 return (UMUTEX_RB_OWNERDEAD); 728 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 729 return (UMUTEX_RB_NOTRECOV); 730 else 731 return (UMUTEX_UNOWNED); 732 733 } 734 735 /* 736 * Put thread into sleep state, before sleeping, check if 737 * thread was removed from umtx queue. 738 */ 739 int 740 umtxq_sleep(struct umtx_q *uq, const char *wmesg, 741 struct umtx_abs_timeout *abstime) 742 { 743 struct umtxq_chain *uc; 744 int error, timo; 745 746 if (abstime != NULL && abstime->is_abs_real) { 747 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 748 umtx_abs_timeout_update(abstime); 749 } 750 751 uc = umtxq_getchain(&uq->uq_key); 752 UMTXQ_LOCKED_ASSERT(uc); 753 for (;;) { 754 if (!(uq->uq_flags & UQF_UMTXQ)) { 755 error = 0; 756 break; 757 } 758 if (abstime != NULL) { 759 timo = umtx_abs_timeout_gethz(abstime); 760 if (timo < 0) { 761 error = ETIMEDOUT; 762 break; 763 } 764 } else 765 timo = 0; 766 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 767 if (error == EINTR || error == ERESTART) { 768 umtxq_lock(&uq->uq_key); 769 break; 770 } 771 if (abstime != NULL) { 772 if (abstime->is_abs_real) 773 curthread->td_rtcgen = 774 atomic_load_acq_int(&rtc_generation); 775 umtx_abs_timeout_update(abstime); 776 } 777 umtxq_lock(&uq->uq_key); 778 } 779 780 curthread->td_rtcgen = 0; 781 return (error); 782 } 783 784 /* 785 * Convert userspace address into unique logical address. 786 */ 787 int 788 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 789 { 790 struct thread *td = curthread; 791 vm_map_t map; 792 vm_map_entry_t entry; 793 vm_pindex_t pindex; 794 vm_prot_t prot; 795 boolean_t wired; 796 797 key->type = type; 798 if (share == THREAD_SHARE) { 799 key->shared = 0; 800 key->info.private.vs = td->td_proc->p_vmspace; 801 key->info.private.addr = (uintptr_t)addr; 802 } else { 803 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 804 map = &td->td_proc->p_vmspace->vm_map; 805 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 806 &entry, &key->info.shared.object, &pindex, &prot, 807 &wired) != KERN_SUCCESS) { 808 return (EFAULT); 809 } 810 811 if ((share == PROCESS_SHARE) || 812 (share == AUTO_SHARE && 813 VM_INHERIT_SHARE == entry->inheritance)) { 814 key->shared = 1; 815 key->info.shared.offset = (vm_offset_t)addr - 816 entry->start + entry->offset; 817 vm_object_reference(key->info.shared.object); 818 } else { 819 key->shared = 0; 820 key->info.private.vs = td->td_proc->p_vmspace; 821 key->info.private.addr = (uintptr_t)addr; 822 } 823 vm_map_lookup_done(map, entry); 824 } 825 826 umtxq_hash(key); 827 return (0); 828 } 829 830 /* 831 * Release key. 832 */ 833 void 834 umtx_key_release(struct umtx_key *key) 835 { 836 if (key->shared) 837 vm_object_deallocate(key->info.shared.object); 838 } 839 840 #ifdef COMPAT_FREEBSD10 841 /* 842 * Lock a umtx object. 843 */ 844 static int 845 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 846 const struct timespec *timeout) 847 { 848 struct umtx_abs_timeout timo; 849 struct umtx_q *uq; 850 u_long owner; 851 u_long old; 852 int error = 0; 853 854 uq = td->td_umtxq; 855 if (timeout != NULL) 856 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 857 858 /* 859 * Care must be exercised when dealing with umtx structure. It 860 * can fault on any access. 861 */ 862 for (;;) { 863 /* 864 * Try the uncontested case. This should be done in userland. 865 */ 866 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 867 868 /* The acquire succeeded. */ 869 if (owner == UMTX_UNOWNED) 870 return (0); 871 872 /* The address was invalid. */ 873 if (owner == -1) 874 return (EFAULT); 875 876 /* If no one owns it but it is contested try to acquire it. */ 877 if (owner == UMTX_CONTESTED) { 878 owner = casuword(&umtx->u_owner, 879 UMTX_CONTESTED, id | UMTX_CONTESTED); 880 881 if (owner == UMTX_CONTESTED) 882 return (0); 883 884 /* The address was invalid. */ 885 if (owner == -1) 886 return (EFAULT); 887 888 error = thread_check_susp(td, false); 889 if (error != 0) 890 break; 891 892 /* If this failed the lock has changed, restart. */ 893 continue; 894 } 895 896 /* 897 * If we caught a signal, we have retried and now 898 * exit immediately. 899 */ 900 if (error != 0) 901 break; 902 903 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 904 AUTO_SHARE, &uq->uq_key)) != 0) 905 return (error); 906 907 umtxq_lock(&uq->uq_key); 908 umtxq_busy(&uq->uq_key); 909 umtxq_insert(uq); 910 umtxq_unbusy(&uq->uq_key); 911 umtxq_unlock(&uq->uq_key); 912 913 /* 914 * Set the contested bit so that a release in user space 915 * knows to use the system call for unlock. If this fails 916 * either some one else has acquired the lock or it has been 917 * released. 918 */ 919 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 920 921 /* The address was invalid. */ 922 if (old == -1) { 923 umtxq_lock(&uq->uq_key); 924 umtxq_remove(uq); 925 umtxq_unlock(&uq->uq_key); 926 umtx_key_release(&uq->uq_key); 927 return (EFAULT); 928 } 929 930 /* 931 * We set the contested bit, sleep. Otherwise the lock changed 932 * and we need to retry or we lost a race to the thread 933 * unlocking the umtx. 934 */ 935 umtxq_lock(&uq->uq_key); 936 if (old == owner) 937 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 938 &timo); 939 umtxq_remove(uq); 940 umtxq_unlock(&uq->uq_key); 941 umtx_key_release(&uq->uq_key); 942 943 if (error == 0) 944 error = thread_check_susp(td, false); 945 } 946 947 if (timeout == NULL) { 948 /* Mutex locking is restarted if it is interrupted. */ 949 if (error == EINTR) 950 error = ERESTART; 951 } else { 952 /* Timed-locking is not restarted. */ 953 if (error == ERESTART) 954 error = EINTR; 955 } 956 return (error); 957 } 958 959 /* 960 * Unlock a umtx object. 961 */ 962 static int 963 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 964 { 965 struct umtx_key key; 966 u_long owner; 967 u_long old; 968 int error; 969 int count; 970 971 /* 972 * Make sure we own this mtx. 973 */ 974 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 975 if (owner == -1) 976 return (EFAULT); 977 978 if ((owner & ~UMTX_CONTESTED) != id) 979 return (EPERM); 980 981 /* This should be done in userland */ 982 if ((owner & UMTX_CONTESTED) == 0) { 983 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 984 if (old == -1) 985 return (EFAULT); 986 if (old == owner) 987 return (0); 988 owner = old; 989 } 990 991 /* We should only ever be in here for contested locks */ 992 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 993 &key)) != 0) 994 return (error); 995 996 umtxq_lock(&key); 997 umtxq_busy(&key); 998 count = umtxq_count(&key); 999 umtxq_unlock(&key); 1000 1001 /* 1002 * When unlocking the umtx, it must be marked as unowned if 1003 * there is zero or one thread only waiting for it. 1004 * Otherwise, it must be marked as contested. 1005 */ 1006 old = casuword(&umtx->u_owner, owner, 1007 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1008 umtxq_lock(&key); 1009 umtxq_signal(&key,1); 1010 umtxq_unbusy(&key); 1011 umtxq_unlock(&key); 1012 umtx_key_release(&key); 1013 if (old == -1) 1014 return (EFAULT); 1015 if (old != owner) 1016 return (EINVAL); 1017 return (0); 1018 } 1019 1020 #ifdef COMPAT_FREEBSD32 1021 1022 /* 1023 * Lock a umtx object. 1024 */ 1025 static int 1026 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1027 const struct timespec *timeout) 1028 { 1029 struct umtx_abs_timeout timo; 1030 struct umtx_q *uq; 1031 uint32_t owner; 1032 uint32_t old; 1033 int error = 0; 1034 1035 uq = td->td_umtxq; 1036 1037 if (timeout != NULL) 1038 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1039 1040 /* 1041 * Care must be exercised when dealing with umtx structure. It 1042 * can fault on any access. 1043 */ 1044 for (;;) { 1045 /* 1046 * Try the uncontested case. This should be done in userland. 1047 */ 1048 owner = casuword32(m, UMUTEX_UNOWNED, id); 1049 1050 /* The acquire succeeded. */ 1051 if (owner == UMUTEX_UNOWNED) 1052 return (0); 1053 1054 /* The address was invalid. */ 1055 if (owner == -1) 1056 return (EFAULT); 1057 1058 /* If no one owns it but it is contested try to acquire it. */ 1059 if (owner == UMUTEX_CONTESTED) { 1060 owner = casuword32(m, 1061 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1062 if (owner == UMUTEX_CONTESTED) 1063 return (0); 1064 1065 /* The address was invalid. */ 1066 if (owner == -1) 1067 return (EFAULT); 1068 1069 error = thread_check_susp(td, false); 1070 if (error != 0) 1071 break; 1072 1073 /* If this failed the lock has changed, restart. */ 1074 continue; 1075 } 1076 1077 /* 1078 * If we caught a signal, we have retried and now 1079 * exit immediately. 1080 */ 1081 if (error != 0) 1082 return (error); 1083 1084 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1085 AUTO_SHARE, &uq->uq_key)) != 0) 1086 return (error); 1087 1088 umtxq_lock(&uq->uq_key); 1089 umtxq_busy(&uq->uq_key); 1090 umtxq_insert(uq); 1091 umtxq_unbusy(&uq->uq_key); 1092 umtxq_unlock(&uq->uq_key); 1093 1094 /* 1095 * Set the contested bit so that a release in user space 1096 * knows to use the system call for unlock. If this fails 1097 * either some one else has acquired the lock or it has been 1098 * released. 1099 */ 1100 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1101 1102 /* The address was invalid. */ 1103 if (old == -1) { 1104 umtxq_lock(&uq->uq_key); 1105 umtxq_remove(uq); 1106 umtxq_unlock(&uq->uq_key); 1107 umtx_key_release(&uq->uq_key); 1108 return (EFAULT); 1109 } 1110 1111 /* 1112 * We set the contested bit, sleep. Otherwise the lock changed 1113 * and we need to retry or we lost a race to the thread 1114 * unlocking the umtx. 1115 */ 1116 umtxq_lock(&uq->uq_key); 1117 if (old == owner) 1118 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1119 NULL : &timo); 1120 umtxq_remove(uq); 1121 umtxq_unlock(&uq->uq_key); 1122 umtx_key_release(&uq->uq_key); 1123 1124 if (error == 0) 1125 error = thread_check_susp(td, false); 1126 } 1127 1128 if (timeout == NULL) { 1129 /* Mutex locking is restarted if it is interrupted. */ 1130 if (error == EINTR) 1131 error = ERESTART; 1132 } else { 1133 /* Timed-locking is not restarted. */ 1134 if (error == ERESTART) 1135 error = EINTR; 1136 } 1137 return (error); 1138 } 1139 1140 /* 1141 * Unlock a umtx object. 1142 */ 1143 static int 1144 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1145 { 1146 struct umtx_key key; 1147 uint32_t owner; 1148 uint32_t old; 1149 int error; 1150 int count; 1151 1152 /* 1153 * Make sure we own this mtx. 1154 */ 1155 owner = fuword32(m); 1156 if (owner == -1) 1157 return (EFAULT); 1158 1159 if ((owner & ~UMUTEX_CONTESTED) != id) 1160 return (EPERM); 1161 1162 /* This should be done in userland */ 1163 if ((owner & UMUTEX_CONTESTED) == 0) { 1164 old = casuword32(m, owner, UMUTEX_UNOWNED); 1165 if (old == -1) 1166 return (EFAULT); 1167 if (old == owner) 1168 return (0); 1169 owner = old; 1170 } 1171 1172 /* We should only ever be in here for contested locks */ 1173 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1174 &key)) != 0) 1175 return (error); 1176 1177 umtxq_lock(&key); 1178 umtxq_busy(&key); 1179 count = umtxq_count(&key); 1180 umtxq_unlock(&key); 1181 1182 /* 1183 * When unlocking the umtx, it must be marked as unowned if 1184 * there is zero or one thread only waiting for it. 1185 * Otherwise, it must be marked as contested. 1186 */ 1187 old = casuword32(m, owner, 1188 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1189 umtxq_lock(&key); 1190 umtxq_signal(&key,1); 1191 umtxq_unbusy(&key); 1192 umtxq_unlock(&key); 1193 umtx_key_release(&key); 1194 if (old == -1) 1195 return (EFAULT); 1196 if (old != owner) 1197 return (EINVAL); 1198 return (0); 1199 } 1200 #endif /* COMPAT_FREEBSD32 */ 1201 #endif /* COMPAT_FREEBSD10 */ 1202 1203 /* 1204 * Fetch and compare value, sleep on the address if value is not changed. 1205 */ 1206 static int 1207 do_wait(struct thread *td, void *addr, u_long id, 1208 struct _umtx_time *timeout, int compat32, int is_private) 1209 { 1210 struct umtx_abs_timeout timo; 1211 struct umtx_q *uq; 1212 u_long tmp; 1213 uint32_t tmp32; 1214 int error = 0; 1215 1216 uq = td->td_umtxq; 1217 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1218 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1219 return (error); 1220 1221 if (timeout != NULL) 1222 umtx_abs_timeout_init2(&timo, timeout); 1223 1224 umtxq_lock(&uq->uq_key); 1225 umtxq_insert(uq); 1226 umtxq_unlock(&uq->uq_key); 1227 if (compat32 == 0) { 1228 error = fueword(addr, &tmp); 1229 if (error != 0) 1230 error = EFAULT; 1231 } else { 1232 error = fueword32(addr, &tmp32); 1233 if (error == 0) 1234 tmp = tmp32; 1235 else 1236 error = EFAULT; 1237 } 1238 umtxq_lock(&uq->uq_key); 1239 if (error == 0) { 1240 if (tmp == id) 1241 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1242 NULL : &timo); 1243 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1244 error = 0; 1245 else 1246 umtxq_remove(uq); 1247 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1248 umtxq_remove(uq); 1249 } 1250 umtxq_unlock(&uq->uq_key); 1251 umtx_key_release(&uq->uq_key); 1252 if (error == ERESTART) 1253 error = EINTR; 1254 return (error); 1255 } 1256 1257 /* 1258 * Wake up threads sleeping on the specified address. 1259 */ 1260 int 1261 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1262 { 1263 struct umtx_key key; 1264 int ret; 1265 1266 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1267 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1268 return (ret); 1269 umtxq_lock(&key); 1270 umtxq_signal(&key, n_wake); 1271 umtxq_unlock(&key); 1272 umtx_key_release(&key); 1273 return (0); 1274 } 1275 1276 /* 1277 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1278 */ 1279 static int 1280 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1281 struct _umtx_time *timeout, int mode) 1282 { 1283 struct umtx_abs_timeout timo; 1284 struct umtx_q *uq; 1285 uint32_t owner, old, id; 1286 int error, rv; 1287 1288 id = td->td_tid; 1289 uq = td->td_umtxq; 1290 error = 0; 1291 if (timeout != NULL) 1292 umtx_abs_timeout_init2(&timo, timeout); 1293 1294 /* 1295 * Care must be exercised when dealing with umtx structure. It 1296 * can fault on any access. 1297 */ 1298 for (;;) { 1299 rv = fueword32(&m->m_owner, &owner); 1300 if (rv == -1) 1301 return (EFAULT); 1302 if (mode == _UMUTEX_WAIT) { 1303 if (owner == UMUTEX_UNOWNED || 1304 owner == UMUTEX_CONTESTED || 1305 owner == UMUTEX_RB_OWNERDEAD || 1306 owner == UMUTEX_RB_NOTRECOV) 1307 return (0); 1308 } else { 1309 /* 1310 * Robust mutex terminated. Kernel duty is to 1311 * return EOWNERDEAD to the userspace. The 1312 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1313 * by the common userspace code. 1314 */ 1315 if (owner == UMUTEX_RB_OWNERDEAD) { 1316 rv = casueword32(&m->m_owner, 1317 UMUTEX_RB_OWNERDEAD, &owner, 1318 id | UMUTEX_CONTESTED); 1319 if (rv == -1) 1320 return (EFAULT); 1321 if (rv == 0) { 1322 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1323 return (EOWNERDEAD); /* success */ 1324 } 1325 MPASS(rv == 1); 1326 rv = thread_check_susp(td, false); 1327 if (rv != 0) 1328 return (rv); 1329 continue; 1330 } 1331 if (owner == UMUTEX_RB_NOTRECOV) 1332 return (ENOTRECOVERABLE); 1333 1334 /* 1335 * Try the uncontested case. This should be 1336 * done in userland. 1337 */ 1338 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1339 &owner, id); 1340 /* The address was invalid. */ 1341 if (rv == -1) 1342 return (EFAULT); 1343 1344 /* The acquire succeeded. */ 1345 if (rv == 0) { 1346 MPASS(owner == UMUTEX_UNOWNED); 1347 return (0); 1348 } 1349 1350 /* 1351 * If no one owns it but it is contested try 1352 * to acquire it. 1353 */ 1354 MPASS(rv == 1); 1355 if (owner == UMUTEX_CONTESTED) { 1356 rv = casueword32(&m->m_owner, 1357 UMUTEX_CONTESTED, &owner, 1358 id | UMUTEX_CONTESTED); 1359 /* The address was invalid. */ 1360 if (rv == -1) 1361 return (EFAULT); 1362 if (rv == 0) { 1363 MPASS(owner == UMUTEX_CONTESTED); 1364 return (0); 1365 } 1366 if (rv == 1) { 1367 rv = thread_check_susp(td, false); 1368 if (rv != 0) 1369 return (rv); 1370 } 1371 1372 /* 1373 * If this failed the lock has 1374 * changed, restart. 1375 */ 1376 continue; 1377 } 1378 1379 /* rv == 1 but not contested, likely store failure */ 1380 rv = thread_check_susp(td, false); 1381 if (rv != 0) 1382 return (rv); 1383 } 1384 1385 if (mode == _UMUTEX_TRY) 1386 return (EBUSY); 1387 1388 /* 1389 * If we caught a signal, we have retried and now 1390 * exit immediately. 1391 */ 1392 if (error != 0) 1393 return (error); 1394 1395 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1396 GET_SHARE(flags), &uq->uq_key)) != 0) 1397 return (error); 1398 1399 umtxq_lock(&uq->uq_key); 1400 umtxq_busy(&uq->uq_key); 1401 umtxq_insert(uq); 1402 umtxq_unlock(&uq->uq_key); 1403 1404 /* 1405 * Set the contested bit so that a release in user space 1406 * knows to use the system call for unlock. If this fails 1407 * either some one else has acquired the lock or it has been 1408 * released. 1409 */ 1410 rv = casueword32(&m->m_owner, owner, &old, 1411 owner | UMUTEX_CONTESTED); 1412 1413 /* The address was invalid or casueword failed to store. */ 1414 if (rv == -1 || rv == 1) { 1415 umtxq_lock(&uq->uq_key); 1416 umtxq_remove(uq); 1417 umtxq_unbusy(&uq->uq_key); 1418 umtxq_unlock(&uq->uq_key); 1419 umtx_key_release(&uq->uq_key); 1420 if (rv == -1) 1421 return (EFAULT); 1422 if (rv == 1) { 1423 rv = thread_check_susp(td, false); 1424 if (rv != 0) 1425 return (rv); 1426 } 1427 continue; 1428 } 1429 1430 /* 1431 * We set the contested bit, sleep. Otherwise the lock changed 1432 * and we need to retry or we lost a race to the thread 1433 * unlocking the umtx. 1434 */ 1435 umtxq_lock(&uq->uq_key); 1436 umtxq_unbusy(&uq->uq_key); 1437 MPASS(old == owner); 1438 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1439 NULL : &timo); 1440 umtxq_remove(uq); 1441 umtxq_unlock(&uq->uq_key); 1442 umtx_key_release(&uq->uq_key); 1443 1444 if (error == 0) 1445 error = thread_check_susp(td, false); 1446 } 1447 1448 return (0); 1449 } 1450 1451 /* 1452 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1453 */ 1454 static int 1455 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1456 { 1457 struct umtx_key key; 1458 uint32_t owner, old, id, newlock; 1459 int error, count; 1460 1461 id = td->td_tid; 1462 1463 again: 1464 /* 1465 * Make sure we own this mtx. 1466 */ 1467 error = fueword32(&m->m_owner, &owner); 1468 if (error == -1) 1469 return (EFAULT); 1470 1471 if ((owner & ~UMUTEX_CONTESTED) != id) 1472 return (EPERM); 1473 1474 newlock = umtx_unlock_val(flags, rb); 1475 if ((owner & UMUTEX_CONTESTED) == 0) { 1476 error = casueword32(&m->m_owner, owner, &old, newlock); 1477 if (error == -1) 1478 return (EFAULT); 1479 if (error == 1) { 1480 error = thread_check_susp(td, false); 1481 if (error != 0) 1482 return (error); 1483 goto again; 1484 } 1485 MPASS(old == owner); 1486 return (0); 1487 } 1488 1489 /* We should only ever be in here for contested locks */ 1490 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1491 &key)) != 0) 1492 return (error); 1493 1494 umtxq_lock(&key); 1495 umtxq_busy(&key); 1496 count = umtxq_count(&key); 1497 umtxq_unlock(&key); 1498 1499 /* 1500 * When unlocking the umtx, it must be marked as unowned if 1501 * there is zero or one thread only waiting for it. 1502 * Otherwise, it must be marked as contested. 1503 */ 1504 if (count > 1) 1505 newlock |= UMUTEX_CONTESTED; 1506 error = casueword32(&m->m_owner, owner, &old, newlock); 1507 umtxq_lock(&key); 1508 umtxq_signal(&key, 1); 1509 umtxq_unbusy(&key); 1510 umtxq_unlock(&key); 1511 umtx_key_release(&key); 1512 if (error == -1) 1513 return (EFAULT); 1514 if (error == 1) { 1515 if (old != owner) 1516 return (EINVAL); 1517 error = thread_check_susp(td, false); 1518 if (error != 0) 1519 return (error); 1520 goto again; 1521 } 1522 return (0); 1523 } 1524 1525 /* 1526 * Check if the mutex is available and wake up a waiter, 1527 * only for simple mutex. 1528 */ 1529 static int 1530 do_wake_umutex(struct thread *td, struct umutex *m) 1531 { 1532 struct umtx_key key; 1533 uint32_t owner; 1534 uint32_t flags; 1535 int error; 1536 int count; 1537 1538 again: 1539 error = fueword32(&m->m_owner, &owner); 1540 if (error == -1) 1541 return (EFAULT); 1542 1543 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1544 owner != UMUTEX_RB_NOTRECOV) 1545 return (0); 1546 1547 error = fueword32(&m->m_flags, &flags); 1548 if (error == -1) 1549 return (EFAULT); 1550 1551 /* We should only ever be in here for contested locks */ 1552 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1553 &key)) != 0) 1554 return (error); 1555 1556 umtxq_lock(&key); 1557 umtxq_busy(&key); 1558 count = umtxq_count(&key); 1559 umtxq_unlock(&key); 1560 1561 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1562 owner != UMUTEX_RB_NOTRECOV) { 1563 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1564 UMUTEX_UNOWNED); 1565 if (error == -1) { 1566 error = EFAULT; 1567 } else if (error == 1) { 1568 umtxq_lock(&key); 1569 umtxq_unbusy(&key); 1570 umtxq_unlock(&key); 1571 umtx_key_release(&key); 1572 error = thread_check_susp(td, false); 1573 if (error != 0) 1574 return (error); 1575 goto again; 1576 } 1577 } 1578 1579 umtxq_lock(&key); 1580 if (error == 0 && count != 0) { 1581 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1582 owner == UMUTEX_RB_OWNERDEAD || 1583 owner == UMUTEX_RB_NOTRECOV); 1584 umtxq_signal(&key, 1); 1585 } 1586 umtxq_unbusy(&key); 1587 umtxq_unlock(&key); 1588 umtx_key_release(&key); 1589 return (error); 1590 } 1591 1592 /* 1593 * Check if the mutex has waiters and tries to fix contention bit. 1594 */ 1595 static int 1596 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1597 { 1598 struct umtx_key key; 1599 uint32_t owner, old; 1600 int type; 1601 int error; 1602 int count; 1603 1604 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1605 UMUTEX_ROBUST)) { 1606 case 0: 1607 case UMUTEX_ROBUST: 1608 type = TYPE_NORMAL_UMUTEX; 1609 break; 1610 case UMUTEX_PRIO_INHERIT: 1611 type = TYPE_PI_UMUTEX; 1612 break; 1613 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1614 type = TYPE_PI_ROBUST_UMUTEX; 1615 break; 1616 case UMUTEX_PRIO_PROTECT: 1617 type = TYPE_PP_UMUTEX; 1618 break; 1619 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1620 type = TYPE_PP_ROBUST_UMUTEX; 1621 break; 1622 default: 1623 return (EINVAL); 1624 } 1625 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1626 return (error); 1627 1628 owner = 0; 1629 umtxq_lock(&key); 1630 umtxq_busy(&key); 1631 count = umtxq_count(&key); 1632 umtxq_unlock(&key); 1633 1634 error = fueword32(&m->m_owner, &owner); 1635 if (error == -1) 1636 error = EFAULT; 1637 1638 /* 1639 * Only repair contention bit if there is a waiter, this means 1640 * the mutex is still being referenced by userland code, 1641 * otherwise don't update any memory. 1642 */ 1643 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1644 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1645 error = casueword32(&m->m_owner, owner, &old, 1646 owner | UMUTEX_CONTESTED); 1647 if (error == -1) { 1648 error = EFAULT; 1649 break; 1650 } 1651 if (error == 0) { 1652 MPASS(old == owner); 1653 break; 1654 } 1655 owner = old; 1656 error = thread_check_susp(td, false); 1657 } 1658 1659 umtxq_lock(&key); 1660 if (error == EFAULT) { 1661 umtxq_signal(&key, INT_MAX); 1662 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1663 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1664 umtxq_signal(&key, 1); 1665 umtxq_unbusy(&key); 1666 umtxq_unlock(&key); 1667 umtx_key_release(&key); 1668 return (error); 1669 } 1670 1671 static inline struct umtx_pi * 1672 umtx_pi_alloc(int flags) 1673 { 1674 struct umtx_pi *pi; 1675 1676 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1677 TAILQ_INIT(&pi->pi_blocked); 1678 atomic_add_int(&umtx_pi_allocated, 1); 1679 return (pi); 1680 } 1681 1682 static inline void 1683 umtx_pi_free(struct umtx_pi *pi) 1684 { 1685 uma_zfree(umtx_pi_zone, pi); 1686 atomic_add_int(&umtx_pi_allocated, -1); 1687 } 1688 1689 /* 1690 * Adjust the thread's position on a pi_state after its priority has been 1691 * changed. 1692 */ 1693 static int 1694 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1695 { 1696 struct umtx_q *uq, *uq1, *uq2; 1697 struct thread *td1; 1698 1699 mtx_assert(&umtx_lock, MA_OWNED); 1700 if (pi == NULL) 1701 return (0); 1702 1703 uq = td->td_umtxq; 1704 1705 /* 1706 * Check if the thread needs to be moved on the blocked chain. 1707 * It needs to be moved if either its priority is lower than 1708 * the previous thread or higher than the next thread. 1709 */ 1710 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1711 uq2 = TAILQ_NEXT(uq, uq_lockq); 1712 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1713 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1714 /* 1715 * Remove thread from blocked chain and determine where 1716 * it should be moved to. 1717 */ 1718 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1719 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1720 td1 = uq1->uq_thread; 1721 MPASS(td1->td_proc->p_magic == P_MAGIC); 1722 if (UPRI(td1) > UPRI(td)) 1723 break; 1724 } 1725 1726 if (uq1 == NULL) 1727 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1728 else 1729 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1730 } 1731 return (1); 1732 } 1733 1734 static struct umtx_pi * 1735 umtx_pi_next(struct umtx_pi *pi) 1736 { 1737 struct umtx_q *uq_owner; 1738 1739 if (pi->pi_owner == NULL) 1740 return (NULL); 1741 uq_owner = pi->pi_owner->td_umtxq; 1742 if (uq_owner == NULL) 1743 return (NULL); 1744 return (uq_owner->uq_pi_blocked); 1745 } 1746 1747 /* 1748 * Floyd's Cycle-Finding Algorithm. 1749 */ 1750 static bool 1751 umtx_pi_check_loop(struct umtx_pi *pi) 1752 { 1753 struct umtx_pi *pi1; /* fast iterator */ 1754 1755 mtx_assert(&umtx_lock, MA_OWNED); 1756 if (pi == NULL) 1757 return (false); 1758 pi1 = pi; 1759 for (;;) { 1760 pi = umtx_pi_next(pi); 1761 if (pi == NULL) 1762 break; 1763 pi1 = umtx_pi_next(pi1); 1764 if (pi1 == NULL) 1765 break; 1766 pi1 = umtx_pi_next(pi1); 1767 if (pi1 == NULL) 1768 break; 1769 if (pi == pi1) 1770 return (true); 1771 } 1772 return (false); 1773 } 1774 1775 /* 1776 * Propagate priority when a thread is blocked on POSIX 1777 * PI mutex. 1778 */ 1779 static void 1780 umtx_propagate_priority(struct thread *td) 1781 { 1782 struct umtx_q *uq; 1783 struct umtx_pi *pi; 1784 int pri; 1785 1786 mtx_assert(&umtx_lock, MA_OWNED); 1787 pri = UPRI(td); 1788 uq = td->td_umtxq; 1789 pi = uq->uq_pi_blocked; 1790 if (pi == NULL) 1791 return; 1792 if (umtx_pi_check_loop(pi)) 1793 return; 1794 1795 for (;;) { 1796 td = pi->pi_owner; 1797 if (td == NULL || td == curthread) 1798 return; 1799 1800 MPASS(td->td_proc != NULL); 1801 MPASS(td->td_proc->p_magic == P_MAGIC); 1802 1803 thread_lock(td); 1804 if (td->td_lend_user_pri > pri) 1805 sched_lend_user_prio(td, pri); 1806 else { 1807 thread_unlock(td); 1808 break; 1809 } 1810 thread_unlock(td); 1811 1812 /* 1813 * Pick up the lock that td is blocked on. 1814 */ 1815 uq = td->td_umtxq; 1816 pi = uq->uq_pi_blocked; 1817 if (pi == NULL) 1818 break; 1819 /* Resort td on the list if needed. */ 1820 umtx_pi_adjust_thread(pi, td); 1821 } 1822 } 1823 1824 /* 1825 * Unpropagate priority for a PI mutex when a thread blocked on 1826 * it is interrupted by signal or resumed by others. 1827 */ 1828 static void 1829 umtx_repropagate_priority(struct umtx_pi *pi) 1830 { 1831 struct umtx_q *uq, *uq_owner; 1832 struct umtx_pi *pi2; 1833 int pri; 1834 1835 mtx_assert(&umtx_lock, MA_OWNED); 1836 1837 if (umtx_pi_check_loop(pi)) 1838 return; 1839 while (pi != NULL && pi->pi_owner != NULL) { 1840 pri = PRI_MAX; 1841 uq_owner = pi->pi_owner->td_umtxq; 1842 1843 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1844 uq = TAILQ_FIRST(&pi2->pi_blocked); 1845 if (uq != NULL) { 1846 if (pri > UPRI(uq->uq_thread)) 1847 pri = UPRI(uq->uq_thread); 1848 } 1849 } 1850 1851 if (pri > uq_owner->uq_inherited_pri) 1852 pri = uq_owner->uq_inherited_pri; 1853 thread_lock(pi->pi_owner); 1854 sched_lend_user_prio(pi->pi_owner, pri); 1855 thread_unlock(pi->pi_owner); 1856 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1857 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1858 } 1859 } 1860 1861 /* 1862 * Insert a PI mutex into owned list. 1863 */ 1864 static void 1865 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1866 { 1867 struct umtx_q *uq_owner; 1868 1869 uq_owner = owner->td_umtxq; 1870 mtx_assert(&umtx_lock, MA_OWNED); 1871 MPASS(pi->pi_owner == NULL); 1872 pi->pi_owner = owner; 1873 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1874 } 1875 1876 /* 1877 * Disown a PI mutex, and remove it from the owned list. 1878 */ 1879 static void 1880 umtx_pi_disown(struct umtx_pi *pi) 1881 { 1882 1883 mtx_assert(&umtx_lock, MA_OWNED); 1884 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1885 pi->pi_owner = NULL; 1886 } 1887 1888 /* 1889 * Claim ownership of a PI mutex. 1890 */ 1891 static int 1892 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1893 { 1894 struct umtx_q *uq; 1895 int pri; 1896 1897 mtx_lock(&umtx_lock); 1898 if (pi->pi_owner == owner) { 1899 mtx_unlock(&umtx_lock); 1900 return (0); 1901 } 1902 1903 if (pi->pi_owner != NULL) { 1904 /* 1905 * userland may have already messed the mutex, sigh. 1906 */ 1907 mtx_unlock(&umtx_lock); 1908 return (EPERM); 1909 } 1910 umtx_pi_setowner(pi, owner); 1911 uq = TAILQ_FIRST(&pi->pi_blocked); 1912 if (uq != NULL) { 1913 pri = UPRI(uq->uq_thread); 1914 thread_lock(owner); 1915 if (pri < UPRI(owner)) 1916 sched_lend_user_prio(owner, pri); 1917 thread_unlock(owner); 1918 } 1919 mtx_unlock(&umtx_lock); 1920 return (0); 1921 } 1922 1923 /* 1924 * Adjust a thread's order position in its blocked PI mutex, 1925 * this may result new priority propagating process. 1926 */ 1927 void 1928 umtx_pi_adjust(struct thread *td, u_char oldpri) 1929 { 1930 struct umtx_q *uq; 1931 struct umtx_pi *pi; 1932 1933 uq = td->td_umtxq; 1934 mtx_lock(&umtx_lock); 1935 /* 1936 * Pick up the lock that td is blocked on. 1937 */ 1938 pi = uq->uq_pi_blocked; 1939 if (pi != NULL) { 1940 umtx_pi_adjust_thread(pi, td); 1941 umtx_repropagate_priority(pi); 1942 } 1943 mtx_unlock(&umtx_lock); 1944 } 1945 1946 /* 1947 * Sleep on a PI mutex. 1948 */ 1949 static int 1950 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1951 const char *wmesg, struct umtx_abs_timeout *timo, bool shared) 1952 { 1953 struct thread *td, *td1; 1954 struct umtx_q *uq1; 1955 int error, pri; 1956 #ifdef INVARIANTS 1957 struct umtxq_chain *uc; 1958 1959 uc = umtxq_getchain(&pi->pi_key); 1960 #endif 1961 error = 0; 1962 td = uq->uq_thread; 1963 KASSERT(td == curthread, ("inconsistent uq_thread")); 1964 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 1965 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1966 umtxq_insert(uq); 1967 mtx_lock(&umtx_lock); 1968 if (pi->pi_owner == NULL) { 1969 mtx_unlock(&umtx_lock); 1970 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1971 mtx_lock(&umtx_lock); 1972 if (td1 != NULL) { 1973 if (pi->pi_owner == NULL) 1974 umtx_pi_setowner(pi, td1); 1975 PROC_UNLOCK(td1->td_proc); 1976 } 1977 } 1978 1979 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1980 pri = UPRI(uq1->uq_thread); 1981 if (pri > UPRI(td)) 1982 break; 1983 } 1984 1985 if (uq1 != NULL) 1986 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1987 else 1988 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1989 1990 uq->uq_pi_blocked = pi; 1991 thread_lock(td); 1992 td->td_flags |= TDF_UPIBLOCKED; 1993 thread_unlock(td); 1994 umtx_propagate_priority(td); 1995 mtx_unlock(&umtx_lock); 1996 umtxq_unbusy(&uq->uq_key); 1997 1998 error = umtxq_sleep(uq, wmesg, timo); 1999 umtxq_remove(uq); 2000 2001 mtx_lock(&umtx_lock); 2002 uq->uq_pi_blocked = NULL; 2003 thread_lock(td); 2004 td->td_flags &= ~TDF_UPIBLOCKED; 2005 thread_unlock(td); 2006 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 2007 umtx_repropagate_priority(pi); 2008 mtx_unlock(&umtx_lock); 2009 umtxq_unlock(&uq->uq_key); 2010 2011 return (error); 2012 } 2013 2014 /* 2015 * Add reference count for a PI mutex. 2016 */ 2017 static void 2018 umtx_pi_ref(struct umtx_pi *pi) 2019 { 2020 2021 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 2022 pi->pi_refcount++; 2023 } 2024 2025 /* 2026 * Decrease reference count for a PI mutex, if the counter 2027 * is decreased to zero, its memory space is freed. 2028 */ 2029 static void 2030 umtx_pi_unref(struct umtx_pi *pi) 2031 { 2032 struct umtxq_chain *uc; 2033 2034 uc = umtxq_getchain(&pi->pi_key); 2035 UMTXQ_LOCKED_ASSERT(uc); 2036 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 2037 if (--pi->pi_refcount == 0) { 2038 mtx_lock(&umtx_lock); 2039 if (pi->pi_owner != NULL) 2040 umtx_pi_disown(pi); 2041 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 2042 ("blocked queue not empty")); 2043 mtx_unlock(&umtx_lock); 2044 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 2045 umtx_pi_free(pi); 2046 } 2047 } 2048 2049 /* 2050 * Find a PI mutex in hash table. 2051 */ 2052 static struct umtx_pi * 2053 umtx_pi_lookup(struct umtx_key *key) 2054 { 2055 struct umtxq_chain *uc; 2056 struct umtx_pi *pi; 2057 2058 uc = umtxq_getchain(key); 2059 UMTXQ_LOCKED_ASSERT(uc); 2060 2061 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 2062 if (umtx_key_match(&pi->pi_key, key)) { 2063 return (pi); 2064 } 2065 } 2066 return (NULL); 2067 } 2068 2069 /* 2070 * Insert a PI mutex into hash table. 2071 */ 2072 static inline void 2073 umtx_pi_insert(struct umtx_pi *pi) 2074 { 2075 struct umtxq_chain *uc; 2076 2077 uc = umtxq_getchain(&pi->pi_key); 2078 UMTXQ_LOCKED_ASSERT(uc); 2079 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2080 } 2081 2082 /* 2083 * Lock a PI mutex. 2084 */ 2085 static int 2086 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2087 struct _umtx_time *timeout, int try) 2088 { 2089 struct umtx_abs_timeout timo; 2090 struct umtx_q *uq; 2091 struct umtx_pi *pi, *new_pi; 2092 uint32_t id, old_owner, owner, old; 2093 int error, rv; 2094 2095 id = td->td_tid; 2096 uq = td->td_umtxq; 2097 2098 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2099 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2100 &uq->uq_key)) != 0) 2101 return (error); 2102 2103 if (timeout != NULL) 2104 umtx_abs_timeout_init2(&timo, timeout); 2105 2106 umtxq_lock(&uq->uq_key); 2107 pi = umtx_pi_lookup(&uq->uq_key); 2108 if (pi == NULL) { 2109 new_pi = umtx_pi_alloc(M_NOWAIT); 2110 if (new_pi == NULL) { 2111 umtxq_unlock(&uq->uq_key); 2112 new_pi = umtx_pi_alloc(M_WAITOK); 2113 umtxq_lock(&uq->uq_key); 2114 pi = umtx_pi_lookup(&uq->uq_key); 2115 if (pi != NULL) { 2116 umtx_pi_free(new_pi); 2117 new_pi = NULL; 2118 } 2119 } 2120 if (new_pi != NULL) { 2121 new_pi->pi_key = uq->uq_key; 2122 umtx_pi_insert(new_pi); 2123 pi = new_pi; 2124 } 2125 } 2126 umtx_pi_ref(pi); 2127 umtxq_unlock(&uq->uq_key); 2128 2129 /* 2130 * Care must be exercised when dealing with umtx structure. It 2131 * can fault on any access. 2132 */ 2133 for (;;) { 2134 /* 2135 * Try the uncontested case. This should be done in userland. 2136 */ 2137 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2138 /* The address was invalid. */ 2139 if (rv == -1) { 2140 error = EFAULT; 2141 break; 2142 } 2143 /* The acquire succeeded. */ 2144 if (rv == 0) { 2145 MPASS(owner == UMUTEX_UNOWNED); 2146 error = 0; 2147 break; 2148 } 2149 2150 if (owner == UMUTEX_RB_NOTRECOV) { 2151 error = ENOTRECOVERABLE; 2152 break; 2153 } 2154 2155 /* 2156 * Avoid overwriting a possible error from sleep due 2157 * to the pending signal with suspension check result. 2158 */ 2159 if (error == 0) { 2160 error = thread_check_susp(td, true); 2161 if (error != 0) 2162 break; 2163 } 2164 2165 /* If no one owns it but it is contested try to acquire it. */ 2166 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 2167 old_owner = owner; 2168 rv = casueword32(&m->m_owner, owner, &owner, 2169 id | UMUTEX_CONTESTED); 2170 /* The address was invalid. */ 2171 if (rv == -1) { 2172 error = EFAULT; 2173 break; 2174 } 2175 if (rv == 1) { 2176 if (error == 0) { 2177 error = thread_check_susp(td, true); 2178 if (error != 0) 2179 break; 2180 } 2181 2182 /* 2183 * If this failed the lock could 2184 * changed, restart. 2185 */ 2186 continue; 2187 } 2188 2189 MPASS(rv == 0); 2190 MPASS(owner == old_owner); 2191 umtxq_lock(&uq->uq_key); 2192 umtxq_busy(&uq->uq_key); 2193 error = umtx_pi_claim(pi, td); 2194 umtxq_unbusy(&uq->uq_key); 2195 umtxq_unlock(&uq->uq_key); 2196 if (error != 0) { 2197 /* 2198 * Since we're going to return an 2199 * error, restore the m_owner to its 2200 * previous, unowned state to avoid 2201 * compounding the problem. 2202 */ 2203 (void)casuword32(&m->m_owner, 2204 id | UMUTEX_CONTESTED, old_owner); 2205 } 2206 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 2207 error = EOWNERDEAD; 2208 break; 2209 } 2210 2211 if ((owner & ~UMUTEX_CONTESTED) == id) { 2212 error = EDEADLK; 2213 break; 2214 } 2215 2216 if (try != 0) { 2217 error = EBUSY; 2218 break; 2219 } 2220 2221 /* 2222 * If we caught a signal, we have retried and now 2223 * exit immediately. 2224 */ 2225 if (error != 0) 2226 break; 2227 2228 umtxq_lock(&uq->uq_key); 2229 umtxq_busy(&uq->uq_key); 2230 umtxq_unlock(&uq->uq_key); 2231 2232 /* 2233 * Set the contested bit so that a release in user space 2234 * knows to use the system call for unlock. If this fails 2235 * either some one else has acquired the lock or it has been 2236 * released. 2237 */ 2238 rv = casueword32(&m->m_owner, owner, &old, owner | 2239 UMUTEX_CONTESTED); 2240 2241 /* The address was invalid. */ 2242 if (rv == -1) { 2243 umtxq_unbusy_unlocked(&uq->uq_key); 2244 error = EFAULT; 2245 break; 2246 } 2247 if (rv == 1) { 2248 umtxq_unbusy_unlocked(&uq->uq_key); 2249 error = thread_check_susp(td, true); 2250 if (error != 0) 2251 break; 2252 2253 /* 2254 * The lock changed and we need to retry or we 2255 * lost a race to the thread unlocking the 2256 * umtx. Note that the UMUTEX_RB_OWNERDEAD 2257 * value for owner is impossible there. 2258 */ 2259 continue; 2260 } 2261 2262 umtxq_lock(&uq->uq_key); 2263 2264 /* We set the contested bit, sleep. */ 2265 MPASS(old == owner); 2266 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2267 "umtxpi", timeout == NULL ? NULL : &timo, 2268 (flags & USYNC_PROCESS_SHARED) != 0); 2269 if (error != 0) 2270 continue; 2271 2272 error = thread_check_susp(td, false); 2273 if (error != 0) 2274 break; 2275 } 2276 2277 umtxq_lock(&uq->uq_key); 2278 umtx_pi_unref(pi); 2279 umtxq_unlock(&uq->uq_key); 2280 2281 umtx_key_release(&uq->uq_key); 2282 return (error); 2283 } 2284 2285 /* 2286 * Unlock a PI mutex. 2287 */ 2288 static int 2289 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2290 { 2291 struct umtx_key key; 2292 struct umtx_q *uq_first, *uq_first2, *uq_me; 2293 struct umtx_pi *pi, *pi2; 2294 uint32_t id, new_owner, old, owner; 2295 int count, error, pri; 2296 2297 id = td->td_tid; 2298 2299 usrloop: 2300 /* 2301 * Make sure we own this mtx. 2302 */ 2303 error = fueword32(&m->m_owner, &owner); 2304 if (error == -1) 2305 return (EFAULT); 2306 2307 if ((owner & ~UMUTEX_CONTESTED) != id) 2308 return (EPERM); 2309 2310 new_owner = umtx_unlock_val(flags, rb); 2311 2312 /* This should be done in userland */ 2313 if ((owner & UMUTEX_CONTESTED) == 0) { 2314 error = casueword32(&m->m_owner, owner, &old, new_owner); 2315 if (error == -1) 2316 return (EFAULT); 2317 if (error == 1) { 2318 error = thread_check_susp(td, true); 2319 if (error != 0) 2320 return (error); 2321 goto usrloop; 2322 } 2323 if (old == owner) 2324 return (0); 2325 owner = old; 2326 } 2327 2328 /* We should only ever be in here for contested locks */ 2329 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2330 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2331 &key)) != 0) 2332 return (error); 2333 2334 umtxq_lock(&key); 2335 umtxq_busy(&key); 2336 count = umtxq_count_pi(&key, &uq_first); 2337 if (uq_first != NULL) { 2338 mtx_lock(&umtx_lock); 2339 pi = uq_first->uq_pi_blocked; 2340 KASSERT(pi != NULL, ("pi == NULL?")); 2341 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2342 mtx_unlock(&umtx_lock); 2343 umtxq_unbusy(&key); 2344 umtxq_unlock(&key); 2345 umtx_key_release(&key); 2346 /* userland messed the mutex */ 2347 return (EPERM); 2348 } 2349 uq_me = td->td_umtxq; 2350 if (pi->pi_owner == td) 2351 umtx_pi_disown(pi); 2352 /* get highest priority thread which is still sleeping. */ 2353 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2354 while (uq_first != NULL && 2355 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2356 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2357 } 2358 pri = PRI_MAX; 2359 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2360 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2361 if (uq_first2 != NULL) { 2362 if (pri > UPRI(uq_first2->uq_thread)) 2363 pri = UPRI(uq_first2->uq_thread); 2364 } 2365 } 2366 thread_lock(td); 2367 sched_lend_user_prio(td, pri); 2368 thread_unlock(td); 2369 mtx_unlock(&umtx_lock); 2370 if (uq_first) 2371 umtxq_signal_thread(uq_first); 2372 } else { 2373 pi = umtx_pi_lookup(&key); 2374 /* 2375 * A umtx_pi can exist if a signal or timeout removed the 2376 * last waiter from the umtxq, but there is still 2377 * a thread in do_lock_pi() holding the umtx_pi. 2378 */ 2379 if (pi != NULL) { 2380 /* 2381 * The umtx_pi can be unowned, such as when a thread 2382 * has just entered do_lock_pi(), allocated the 2383 * umtx_pi, and unlocked the umtxq. 2384 * If the current thread owns it, it must disown it. 2385 */ 2386 mtx_lock(&umtx_lock); 2387 if (pi->pi_owner == td) 2388 umtx_pi_disown(pi); 2389 mtx_unlock(&umtx_lock); 2390 } 2391 } 2392 umtxq_unlock(&key); 2393 2394 /* 2395 * When unlocking the umtx, it must be marked as unowned if 2396 * there is zero or one thread only waiting for it. 2397 * Otherwise, it must be marked as contested. 2398 */ 2399 2400 if (count > 1) 2401 new_owner |= UMUTEX_CONTESTED; 2402 again: 2403 error = casueword32(&m->m_owner, owner, &old, new_owner); 2404 if (error == 1) { 2405 error = thread_check_susp(td, false); 2406 if (error == 0) 2407 goto again; 2408 } 2409 umtxq_unbusy_unlocked(&key); 2410 umtx_key_release(&key); 2411 if (error == -1) 2412 return (EFAULT); 2413 if (error == 0 && old != owner) 2414 return (EINVAL); 2415 return (error); 2416 } 2417 2418 /* 2419 * Lock a PP mutex. 2420 */ 2421 static int 2422 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2423 struct _umtx_time *timeout, int try) 2424 { 2425 struct umtx_abs_timeout timo; 2426 struct umtx_q *uq, *uq2; 2427 struct umtx_pi *pi; 2428 uint32_t ceiling; 2429 uint32_t owner, id; 2430 int error, pri, old_inherited_pri, su, rv; 2431 2432 id = td->td_tid; 2433 uq = td->td_umtxq; 2434 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2435 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2436 &uq->uq_key)) != 0) 2437 return (error); 2438 2439 if (timeout != NULL) 2440 umtx_abs_timeout_init2(&timo, timeout); 2441 2442 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2443 for (;;) { 2444 old_inherited_pri = uq->uq_inherited_pri; 2445 umtxq_lock(&uq->uq_key); 2446 umtxq_busy(&uq->uq_key); 2447 umtxq_unlock(&uq->uq_key); 2448 2449 rv = fueword32(&m->m_ceilings[0], &ceiling); 2450 if (rv == -1) { 2451 error = EFAULT; 2452 goto out; 2453 } 2454 ceiling = RTP_PRIO_MAX - ceiling; 2455 if (ceiling > RTP_PRIO_MAX) { 2456 error = EINVAL; 2457 goto out; 2458 } 2459 2460 mtx_lock(&umtx_lock); 2461 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2462 mtx_unlock(&umtx_lock); 2463 error = EINVAL; 2464 goto out; 2465 } 2466 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2467 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2468 thread_lock(td); 2469 if (uq->uq_inherited_pri < UPRI(td)) 2470 sched_lend_user_prio(td, uq->uq_inherited_pri); 2471 thread_unlock(td); 2472 } 2473 mtx_unlock(&umtx_lock); 2474 2475 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2476 id | UMUTEX_CONTESTED); 2477 /* The address was invalid. */ 2478 if (rv == -1) { 2479 error = EFAULT; 2480 break; 2481 } 2482 if (rv == 0) { 2483 MPASS(owner == UMUTEX_CONTESTED); 2484 error = 0; 2485 break; 2486 } 2487 /* rv == 1 */ 2488 if (owner == UMUTEX_RB_OWNERDEAD) { 2489 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2490 &owner, id | UMUTEX_CONTESTED); 2491 if (rv == -1) { 2492 error = EFAULT; 2493 break; 2494 } 2495 if (rv == 0) { 2496 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2497 error = EOWNERDEAD; /* success */ 2498 break; 2499 } 2500 2501 /* 2502 * rv == 1, only check for suspension if we 2503 * did not already catched a signal. If we 2504 * get an error from the check, the same 2505 * condition is checked by the umtxq_sleep() 2506 * call below, so we should obliterate the 2507 * error to not skip the last loop iteration. 2508 */ 2509 if (error == 0) { 2510 error = thread_check_susp(td, false); 2511 if (error == 0) { 2512 if (try != 0) 2513 error = EBUSY; 2514 else 2515 continue; 2516 } 2517 error = 0; 2518 } 2519 } else if (owner == UMUTEX_RB_NOTRECOV) { 2520 error = ENOTRECOVERABLE; 2521 } 2522 2523 if (try != 0) 2524 error = EBUSY; 2525 2526 /* 2527 * If we caught a signal, we have retried and now 2528 * exit immediately. 2529 */ 2530 if (error != 0) 2531 break; 2532 2533 umtxq_lock(&uq->uq_key); 2534 umtxq_insert(uq); 2535 umtxq_unbusy(&uq->uq_key); 2536 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2537 NULL : &timo); 2538 umtxq_remove(uq); 2539 umtxq_unlock(&uq->uq_key); 2540 2541 mtx_lock(&umtx_lock); 2542 uq->uq_inherited_pri = old_inherited_pri; 2543 pri = PRI_MAX; 2544 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2545 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2546 if (uq2 != NULL) { 2547 if (pri > UPRI(uq2->uq_thread)) 2548 pri = UPRI(uq2->uq_thread); 2549 } 2550 } 2551 if (pri > uq->uq_inherited_pri) 2552 pri = uq->uq_inherited_pri; 2553 thread_lock(td); 2554 sched_lend_user_prio(td, pri); 2555 thread_unlock(td); 2556 mtx_unlock(&umtx_lock); 2557 } 2558 2559 if (error != 0 && error != EOWNERDEAD) { 2560 mtx_lock(&umtx_lock); 2561 uq->uq_inherited_pri = old_inherited_pri; 2562 pri = PRI_MAX; 2563 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2564 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2565 if (uq2 != NULL) { 2566 if (pri > UPRI(uq2->uq_thread)) 2567 pri = UPRI(uq2->uq_thread); 2568 } 2569 } 2570 if (pri > uq->uq_inherited_pri) 2571 pri = uq->uq_inherited_pri; 2572 thread_lock(td); 2573 sched_lend_user_prio(td, pri); 2574 thread_unlock(td); 2575 mtx_unlock(&umtx_lock); 2576 } 2577 2578 out: 2579 umtxq_unbusy_unlocked(&uq->uq_key); 2580 umtx_key_release(&uq->uq_key); 2581 return (error); 2582 } 2583 2584 /* 2585 * Unlock a PP mutex. 2586 */ 2587 static int 2588 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2589 { 2590 struct umtx_key key; 2591 struct umtx_q *uq, *uq2; 2592 struct umtx_pi *pi; 2593 uint32_t id, owner, rceiling; 2594 int error, pri, new_inherited_pri, su; 2595 2596 id = td->td_tid; 2597 uq = td->td_umtxq; 2598 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2599 2600 /* 2601 * Make sure we own this mtx. 2602 */ 2603 error = fueword32(&m->m_owner, &owner); 2604 if (error == -1) 2605 return (EFAULT); 2606 2607 if ((owner & ~UMUTEX_CONTESTED) != id) 2608 return (EPERM); 2609 2610 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2611 if (error != 0) 2612 return (error); 2613 2614 if (rceiling == -1) 2615 new_inherited_pri = PRI_MAX; 2616 else { 2617 rceiling = RTP_PRIO_MAX - rceiling; 2618 if (rceiling > RTP_PRIO_MAX) 2619 return (EINVAL); 2620 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2621 } 2622 2623 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2624 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2625 &key)) != 0) 2626 return (error); 2627 umtxq_lock(&key); 2628 umtxq_busy(&key); 2629 umtxq_unlock(&key); 2630 /* 2631 * For priority protected mutex, always set unlocked state 2632 * to UMUTEX_CONTESTED, so that userland always enters kernel 2633 * to lock the mutex, it is necessary because thread priority 2634 * has to be adjusted for such mutex. 2635 */ 2636 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2637 UMUTEX_CONTESTED); 2638 2639 umtxq_lock(&key); 2640 if (error == 0) 2641 umtxq_signal(&key, 1); 2642 umtxq_unbusy(&key); 2643 umtxq_unlock(&key); 2644 2645 if (error == -1) 2646 error = EFAULT; 2647 else { 2648 mtx_lock(&umtx_lock); 2649 if (su != 0) 2650 uq->uq_inherited_pri = new_inherited_pri; 2651 pri = PRI_MAX; 2652 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2653 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2654 if (uq2 != NULL) { 2655 if (pri > UPRI(uq2->uq_thread)) 2656 pri = UPRI(uq2->uq_thread); 2657 } 2658 } 2659 if (pri > uq->uq_inherited_pri) 2660 pri = uq->uq_inherited_pri; 2661 thread_lock(td); 2662 sched_lend_user_prio(td, pri); 2663 thread_unlock(td); 2664 mtx_unlock(&umtx_lock); 2665 } 2666 umtx_key_release(&key); 2667 return (error); 2668 } 2669 2670 static int 2671 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2672 uint32_t *old_ceiling) 2673 { 2674 struct umtx_q *uq; 2675 uint32_t flags, id, owner, save_ceiling; 2676 int error, rv, rv1; 2677 2678 error = fueword32(&m->m_flags, &flags); 2679 if (error == -1) 2680 return (EFAULT); 2681 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2682 return (EINVAL); 2683 if (ceiling > RTP_PRIO_MAX) 2684 return (EINVAL); 2685 id = td->td_tid; 2686 uq = td->td_umtxq; 2687 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2688 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2689 &uq->uq_key)) != 0) 2690 return (error); 2691 for (;;) { 2692 umtxq_lock(&uq->uq_key); 2693 umtxq_busy(&uq->uq_key); 2694 umtxq_unlock(&uq->uq_key); 2695 2696 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2697 if (rv == -1) { 2698 error = EFAULT; 2699 break; 2700 } 2701 2702 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2703 id | UMUTEX_CONTESTED); 2704 if (rv == -1) { 2705 error = EFAULT; 2706 break; 2707 } 2708 2709 if (rv == 0) { 2710 MPASS(owner == UMUTEX_CONTESTED); 2711 rv = suword32(&m->m_ceilings[0], ceiling); 2712 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2713 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2714 break; 2715 } 2716 2717 if ((owner & ~UMUTEX_CONTESTED) == id) { 2718 rv = suword32(&m->m_ceilings[0], ceiling); 2719 error = rv == 0 ? 0 : EFAULT; 2720 break; 2721 } 2722 2723 if (owner == UMUTEX_RB_OWNERDEAD) { 2724 error = EOWNERDEAD; 2725 break; 2726 } else if (owner == UMUTEX_RB_NOTRECOV) { 2727 error = ENOTRECOVERABLE; 2728 break; 2729 } 2730 2731 /* 2732 * If we caught a signal, we have retried and now 2733 * exit immediately. 2734 */ 2735 if (error != 0) 2736 break; 2737 2738 /* 2739 * We set the contested bit, sleep. Otherwise the lock changed 2740 * and we need to retry or we lost a race to the thread 2741 * unlocking the umtx. 2742 */ 2743 umtxq_lock(&uq->uq_key); 2744 umtxq_insert(uq); 2745 umtxq_unbusy(&uq->uq_key); 2746 error = umtxq_sleep(uq, "umtxpp", NULL); 2747 umtxq_remove(uq); 2748 umtxq_unlock(&uq->uq_key); 2749 } 2750 umtxq_lock(&uq->uq_key); 2751 if (error == 0) 2752 umtxq_signal(&uq->uq_key, INT_MAX); 2753 umtxq_unbusy(&uq->uq_key); 2754 umtxq_unlock(&uq->uq_key); 2755 umtx_key_release(&uq->uq_key); 2756 if (error == 0 && old_ceiling != NULL) { 2757 rv = suword32(old_ceiling, save_ceiling); 2758 error = rv == 0 ? 0 : EFAULT; 2759 } 2760 return (error); 2761 } 2762 2763 /* 2764 * Lock a userland POSIX mutex. 2765 */ 2766 static int 2767 do_lock_umutex(struct thread *td, struct umutex *m, 2768 struct _umtx_time *timeout, int mode) 2769 { 2770 uint32_t flags; 2771 int error; 2772 2773 error = fueword32(&m->m_flags, &flags); 2774 if (error == -1) 2775 return (EFAULT); 2776 2777 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2778 case 0: 2779 error = do_lock_normal(td, m, flags, timeout, mode); 2780 break; 2781 case UMUTEX_PRIO_INHERIT: 2782 error = do_lock_pi(td, m, flags, timeout, mode); 2783 break; 2784 case UMUTEX_PRIO_PROTECT: 2785 error = do_lock_pp(td, m, flags, timeout, mode); 2786 break; 2787 default: 2788 return (EINVAL); 2789 } 2790 if (timeout == NULL) { 2791 if (error == EINTR && mode != _UMUTEX_WAIT) 2792 error = ERESTART; 2793 } else { 2794 /* Timed-locking is not restarted. */ 2795 if (error == ERESTART) 2796 error = EINTR; 2797 } 2798 return (error); 2799 } 2800 2801 /* 2802 * Unlock a userland POSIX mutex. 2803 */ 2804 static int 2805 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2806 { 2807 uint32_t flags; 2808 int error; 2809 2810 error = fueword32(&m->m_flags, &flags); 2811 if (error == -1) 2812 return (EFAULT); 2813 2814 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2815 case 0: 2816 return (do_unlock_normal(td, m, flags, rb)); 2817 case UMUTEX_PRIO_INHERIT: 2818 return (do_unlock_pi(td, m, flags, rb)); 2819 case UMUTEX_PRIO_PROTECT: 2820 return (do_unlock_pp(td, m, flags, rb)); 2821 } 2822 2823 return (EINVAL); 2824 } 2825 2826 static int 2827 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2828 struct timespec *timeout, u_long wflags) 2829 { 2830 struct umtx_abs_timeout timo; 2831 struct umtx_q *uq; 2832 uint32_t flags, clockid, hasw; 2833 int error; 2834 2835 uq = td->td_umtxq; 2836 error = fueword32(&cv->c_flags, &flags); 2837 if (error == -1) 2838 return (EFAULT); 2839 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2840 if (error != 0) 2841 return (error); 2842 2843 if ((wflags & CVWAIT_CLOCKID) != 0) { 2844 error = fueword32(&cv->c_clockid, &clockid); 2845 if (error == -1) { 2846 umtx_key_release(&uq->uq_key); 2847 return (EFAULT); 2848 } 2849 if (clockid < CLOCK_REALTIME || 2850 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2851 /* hmm, only HW clock id will work. */ 2852 umtx_key_release(&uq->uq_key); 2853 return (EINVAL); 2854 } 2855 } else { 2856 clockid = CLOCK_REALTIME; 2857 } 2858 2859 umtxq_lock(&uq->uq_key); 2860 umtxq_busy(&uq->uq_key); 2861 umtxq_insert(uq); 2862 umtxq_unlock(&uq->uq_key); 2863 2864 /* 2865 * Set c_has_waiters to 1 before releasing user mutex, also 2866 * don't modify cache line when unnecessary. 2867 */ 2868 error = fueword32(&cv->c_has_waiters, &hasw); 2869 if (error == 0 && hasw == 0) 2870 suword32(&cv->c_has_waiters, 1); 2871 2872 umtxq_unbusy_unlocked(&uq->uq_key); 2873 2874 error = do_unlock_umutex(td, m, false); 2875 2876 if (timeout != NULL) 2877 umtx_abs_timeout_init(&timo, clockid, 2878 (wflags & CVWAIT_ABSTIME) != 0, timeout); 2879 2880 umtxq_lock(&uq->uq_key); 2881 if (error == 0) { 2882 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2883 NULL : &timo); 2884 } 2885 2886 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2887 error = 0; 2888 else { 2889 /* 2890 * This must be timeout,interrupted by signal or 2891 * surprious wakeup, clear c_has_waiter flag when 2892 * necessary. 2893 */ 2894 umtxq_busy(&uq->uq_key); 2895 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2896 int oldlen = uq->uq_cur_queue->length; 2897 umtxq_remove(uq); 2898 if (oldlen == 1) { 2899 umtxq_unlock(&uq->uq_key); 2900 suword32(&cv->c_has_waiters, 0); 2901 umtxq_lock(&uq->uq_key); 2902 } 2903 } 2904 umtxq_unbusy(&uq->uq_key); 2905 if (error == ERESTART) 2906 error = EINTR; 2907 } 2908 2909 umtxq_unlock(&uq->uq_key); 2910 umtx_key_release(&uq->uq_key); 2911 return (error); 2912 } 2913 2914 /* 2915 * Signal a userland condition variable. 2916 */ 2917 static int 2918 do_cv_signal(struct thread *td, struct ucond *cv) 2919 { 2920 struct umtx_key key; 2921 int error, cnt, nwake; 2922 uint32_t flags; 2923 2924 error = fueword32(&cv->c_flags, &flags); 2925 if (error == -1) 2926 return (EFAULT); 2927 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2928 return (error); 2929 umtxq_lock(&key); 2930 umtxq_busy(&key); 2931 cnt = umtxq_count(&key); 2932 nwake = umtxq_signal(&key, 1); 2933 if (cnt <= nwake) { 2934 umtxq_unlock(&key); 2935 error = suword32(&cv->c_has_waiters, 0); 2936 if (error == -1) 2937 error = EFAULT; 2938 umtxq_lock(&key); 2939 } 2940 umtxq_unbusy(&key); 2941 umtxq_unlock(&key); 2942 umtx_key_release(&key); 2943 return (error); 2944 } 2945 2946 static int 2947 do_cv_broadcast(struct thread *td, struct ucond *cv) 2948 { 2949 struct umtx_key key; 2950 int error; 2951 uint32_t flags; 2952 2953 error = fueword32(&cv->c_flags, &flags); 2954 if (error == -1) 2955 return (EFAULT); 2956 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2957 return (error); 2958 2959 umtxq_lock(&key); 2960 umtxq_busy(&key); 2961 umtxq_signal(&key, INT_MAX); 2962 umtxq_unlock(&key); 2963 2964 error = suword32(&cv->c_has_waiters, 0); 2965 if (error == -1) 2966 error = EFAULT; 2967 2968 umtxq_unbusy_unlocked(&key); 2969 2970 umtx_key_release(&key); 2971 return (error); 2972 } 2973 2974 static int 2975 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 2976 struct _umtx_time *timeout) 2977 { 2978 struct umtx_abs_timeout timo; 2979 struct umtx_q *uq; 2980 uint32_t flags, wrflags; 2981 int32_t state, oldstate; 2982 int32_t blocked_readers; 2983 int error, error1, rv; 2984 2985 uq = td->td_umtxq; 2986 error = fueword32(&rwlock->rw_flags, &flags); 2987 if (error == -1) 2988 return (EFAULT); 2989 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2990 if (error != 0) 2991 return (error); 2992 2993 if (timeout != NULL) 2994 umtx_abs_timeout_init2(&timo, timeout); 2995 2996 wrflags = URWLOCK_WRITE_OWNER; 2997 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2998 wrflags |= URWLOCK_WRITE_WAITERS; 2999 3000 for (;;) { 3001 rv = fueword32(&rwlock->rw_state, &state); 3002 if (rv == -1) { 3003 umtx_key_release(&uq->uq_key); 3004 return (EFAULT); 3005 } 3006 3007 /* try to lock it */ 3008 while (!(state & wrflags)) { 3009 if (__predict_false(URWLOCK_READER_COUNT(state) == 3010 URWLOCK_MAX_READERS)) { 3011 umtx_key_release(&uq->uq_key); 3012 return (EAGAIN); 3013 } 3014 rv = casueword32(&rwlock->rw_state, state, 3015 &oldstate, state + 1); 3016 if (rv == -1) { 3017 umtx_key_release(&uq->uq_key); 3018 return (EFAULT); 3019 } 3020 if (rv == 0) { 3021 MPASS(oldstate == state); 3022 umtx_key_release(&uq->uq_key); 3023 return (0); 3024 } 3025 error = thread_check_susp(td, true); 3026 if (error != 0) 3027 break; 3028 state = oldstate; 3029 } 3030 3031 if (error) 3032 break; 3033 3034 /* grab monitor lock */ 3035 umtxq_lock(&uq->uq_key); 3036 umtxq_busy(&uq->uq_key); 3037 umtxq_unlock(&uq->uq_key); 3038 3039 /* 3040 * re-read the state, in case it changed between the try-lock above 3041 * and the check below 3042 */ 3043 rv = fueword32(&rwlock->rw_state, &state); 3044 if (rv == -1) 3045 error = EFAULT; 3046 3047 /* set read contention bit */ 3048 while (error == 0 && (state & wrflags) && 3049 !(state & URWLOCK_READ_WAITERS)) { 3050 rv = casueword32(&rwlock->rw_state, state, 3051 &oldstate, state | URWLOCK_READ_WAITERS); 3052 if (rv == -1) { 3053 error = EFAULT; 3054 break; 3055 } 3056 if (rv == 0) { 3057 MPASS(oldstate == state); 3058 goto sleep; 3059 } 3060 state = oldstate; 3061 error = thread_check_susp(td, false); 3062 if (error != 0) 3063 break; 3064 } 3065 if (error != 0) { 3066 umtxq_unbusy_unlocked(&uq->uq_key); 3067 break; 3068 } 3069 3070 /* state is changed while setting flags, restart */ 3071 if (!(state & wrflags)) { 3072 umtxq_unbusy_unlocked(&uq->uq_key); 3073 error = thread_check_susp(td, true); 3074 if (error != 0) 3075 break; 3076 continue; 3077 } 3078 3079 sleep: 3080 /* 3081 * Contention bit is set, before sleeping, increase 3082 * read waiter count. 3083 */ 3084 rv = fueword32(&rwlock->rw_blocked_readers, 3085 &blocked_readers); 3086 if (rv == -1) { 3087 umtxq_unbusy_unlocked(&uq->uq_key); 3088 error = EFAULT; 3089 break; 3090 } 3091 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 3092 3093 while (state & wrflags) { 3094 umtxq_lock(&uq->uq_key); 3095 umtxq_insert(uq); 3096 umtxq_unbusy(&uq->uq_key); 3097 3098 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 3099 NULL : &timo); 3100 3101 umtxq_busy(&uq->uq_key); 3102 umtxq_remove(uq); 3103 umtxq_unlock(&uq->uq_key); 3104 if (error) 3105 break; 3106 rv = fueword32(&rwlock->rw_state, &state); 3107 if (rv == -1) { 3108 error = EFAULT; 3109 break; 3110 } 3111 } 3112 3113 /* decrease read waiter count, and may clear read contention bit */ 3114 rv = fueword32(&rwlock->rw_blocked_readers, 3115 &blocked_readers); 3116 if (rv == -1) { 3117 umtxq_unbusy_unlocked(&uq->uq_key); 3118 error = EFAULT; 3119 break; 3120 } 3121 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 3122 if (blocked_readers == 1) { 3123 rv = fueword32(&rwlock->rw_state, &state); 3124 if (rv == -1) { 3125 umtxq_unbusy_unlocked(&uq->uq_key); 3126 error = EFAULT; 3127 break; 3128 } 3129 for (;;) { 3130 rv = casueword32(&rwlock->rw_state, state, 3131 &oldstate, state & ~URWLOCK_READ_WAITERS); 3132 if (rv == -1) { 3133 error = EFAULT; 3134 break; 3135 } 3136 if (rv == 0) { 3137 MPASS(oldstate == state); 3138 break; 3139 } 3140 state = oldstate; 3141 error1 = thread_check_susp(td, false); 3142 if (error1 != 0) { 3143 if (error == 0) 3144 error = error1; 3145 break; 3146 } 3147 } 3148 } 3149 3150 umtxq_unbusy_unlocked(&uq->uq_key); 3151 if (error != 0) 3152 break; 3153 } 3154 umtx_key_release(&uq->uq_key); 3155 if (error == ERESTART) 3156 error = EINTR; 3157 return (error); 3158 } 3159 3160 static int 3161 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 3162 { 3163 struct umtx_abs_timeout timo; 3164 struct umtx_q *uq; 3165 uint32_t flags; 3166 int32_t state, oldstate; 3167 int32_t blocked_writers; 3168 int32_t blocked_readers; 3169 int error, error1, rv; 3170 3171 uq = td->td_umtxq; 3172 error = fueword32(&rwlock->rw_flags, &flags); 3173 if (error == -1) 3174 return (EFAULT); 3175 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3176 if (error != 0) 3177 return (error); 3178 3179 if (timeout != NULL) 3180 umtx_abs_timeout_init2(&timo, timeout); 3181 3182 blocked_readers = 0; 3183 for (;;) { 3184 rv = fueword32(&rwlock->rw_state, &state); 3185 if (rv == -1) { 3186 umtx_key_release(&uq->uq_key); 3187 return (EFAULT); 3188 } 3189 while ((state & URWLOCK_WRITE_OWNER) == 0 && 3190 URWLOCK_READER_COUNT(state) == 0) { 3191 rv = casueword32(&rwlock->rw_state, state, 3192 &oldstate, state | URWLOCK_WRITE_OWNER); 3193 if (rv == -1) { 3194 umtx_key_release(&uq->uq_key); 3195 return (EFAULT); 3196 } 3197 if (rv == 0) { 3198 MPASS(oldstate == state); 3199 umtx_key_release(&uq->uq_key); 3200 return (0); 3201 } 3202 state = oldstate; 3203 error = thread_check_susp(td, true); 3204 if (error != 0) 3205 break; 3206 } 3207 3208 if (error) { 3209 if ((state & (URWLOCK_WRITE_OWNER | 3210 URWLOCK_WRITE_WAITERS)) == 0 && 3211 blocked_readers != 0) { 3212 umtxq_lock(&uq->uq_key); 3213 umtxq_busy(&uq->uq_key); 3214 umtxq_signal_queue(&uq->uq_key, INT_MAX, 3215 UMTX_SHARED_QUEUE); 3216 umtxq_unbusy(&uq->uq_key); 3217 umtxq_unlock(&uq->uq_key); 3218 } 3219 3220 break; 3221 } 3222 3223 /* grab monitor lock */ 3224 umtxq_lock(&uq->uq_key); 3225 umtxq_busy(&uq->uq_key); 3226 umtxq_unlock(&uq->uq_key); 3227 3228 /* 3229 * Re-read the state, in case it changed between the 3230 * try-lock above and the check below. 3231 */ 3232 rv = fueword32(&rwlock->rw_state, &state); 3233 if (rv == -1) 3234 error = EFAULT; 3235 3236 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3237 URWLOCK_READER_COUNT(state) != 0) && 3238 (state & URWLOCK_WRITE_WAITERS) == 0) { 3239 rv = casueword32(&rwlock->rw_state, state, 3240 &oldstate, state | URWLOCK_WRITE_WAITERS); 3241 if (rv == -1) { 3242 error = EFAULT; 3243 break; 3244 } 3245 if (rv == 0) { 3246 MPASS(oldstate == state); 3247 goto sleep; 3248 } 3249 state = oldstate; 3250 error = thread_check_susp(td, false); 3251 if (error != 0) 3252 break; 3253 } 3254 if (error != 0) { 3255 umtxq_unbusy_unlocked(&uq->uq_key); 3256 break; 3257 } 3258 3259 if ((state & URWLOCK_WRITE_OWNER) == 0 && 3260 URWLOCK_READER_COUNT(state) == 0) { 3261 umtxq_unbusy_unlocked(&uq->uq_key); 3262 error = thread_check_susp(td, false); 3263 if (error != 0) 3264 break; 3265 continue; 3266 } 3267 sleep: 3268 rv = fueword32(&rwlock->rw_blocked_writers, 3269 &blocked_writers); 3270 if (rv == -1) { 3271 umtxq_unbusy_unlocked(&uq->uq_key); 3272 error = EFAULT; 3273 break; 3274 } 3275 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 3276 3277 while ((state & URWLOCK_WRITE_OWNER) || 3278 URWLOCK_READER_COUNT(state) != 0) { 3279 umtxq_lock(&uq->uq_key); 3280 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3281 umtxq_unbusy(&uq->uq_key); 3282 3283 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3284 NULL : &timo); 3285 3286 umtxq_busy(&uq->uq_key); 3287 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3288 umtxq_unlock(&uq->uq_key); 3289 if (error) 3290 break; 3291 rv = fueword32(&rwlock->rw_state, &state); 3292 if (rv == -1) { 3293 error = EFAULT; 3294 break; 3295 } 3296 } 3297 3298 rv = fueword32(&rwlock->rw_blocked_writers, 3299 &blocked_writers); 3300 if (rv == -1) { 3301 umtxq_unbusy_unlocked(&uq->uq_key); 3302 error = EFAULT; 3303 break; 3304 } 3305 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3306 if (blocked_writers == 1) { 3307 rv = fueword32(&rwlock->rw_state, &state); 3308 if (rv == -1) { 3309 umtxq_unbusy_unlocked(&uq->uq_key); 3310 error = EFAULT; 3311 break; 3312 } 3313 for (;;) { 3314 rv = casueword32(&rwlock->rw_state, state, 3315 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3316 if (rv == -1) { 3317 error = EFAULT; 3318 break; 3319 } 3320 if (rv == 0) { 3321 MPASS(oldstate == state); 3322 break; 3323 } 3324 state = oldstate; 3325 error1 = thread_check_susp(td, false); 3326 /* 3327 * We are leaving the URWLOCK_WRITE_WAITERS 3328 * behind, but this should not harm the 3329 * correctness. 3330 */ 3331 if (error1 != 0) { 3332 if (error == 0) 3333 error = error1; 3334 break; 3335 } 3336 } 3337 rv = fueword32(&rwlock->rw_blocked_readers, 3338 &blocked_readers); 3339 if (rv == -1) { 3340 umtxq_unbusy_unlocked(&uq->uq_key); 3341 error = EFAULT; 3342 break; 3343 } 3344 } else 3345 blocked_readers = 0; 3346 3347 umtxq_unbusy_unlocked(&uq->uq_key); 3348 } 3349 3350 umtx_key_release(&uq->uq_key); 3351 if (error == ERESTART) 3352 error = EINTR; 3353 return (error); 3354 } 3355 3356 static int 3357 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3358 { 3359 struct umtx_q *uq; 3360 uint32_t flags; 3361 int32_t state, oldstate; 3362 int error, rv, q, count; 3363 3364 uq = td->td_umtxq; 3365 error = fueword32(&rwlock->rw_flags, &flags); 3366 if (error == -1) 3367 return (EFAULT); 3368 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3369 if (error != 0) 3370 return (error); 3371 3372 error = fueword32(&rwlock->rw_state, &state); 3373 if (error == -1) { 3374 error = EFAULT; 3375 goto out; 3376 } 3377 if (state & URWLOCK_WRITE_OWNER) { 3378 for (;;) { 3379 rv = casueword32(&rwlock->rw_state, state, 3380 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3381 if (rv == -1) { 3382 error = EFAULT; 3383 goto out; 3384 } 3385 if (rv == 1) { 3386 state = oldstate; 3387 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3388 error = EPERM; 3389 goto out; 3390 } 3391 error = thread_check_susp(td, true); 3392 if (error != 0) 3393 goto out; 3394 } else 3395 break; 3396 } 3397 } else if (URWLOCK_READER_COUNT(state) != 0) { 3398 for (;;) { 3399 rv = casueword32(&rwlock->rw_state, state, 3400 &oldstate, state - 1); 3401 if (rv == -1) { 3402 error = EFAULT; 3403 goto out; 3404 } 3405 if (rv == 1) { 3406 state = oldstate; 3407 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3408 error = EPERM; 3409 goto out; 3410 } 3411 error = thread_check_susp(td, true); 3412 if (error != 0) 3413 goto out; 3414 } else 3415 break; 3416 } 3417 } else { 3418 error = EPERM; 3419 goto out; 3420 } 3421 3422 count = 0; 3423 3424 if (!(flags & URWLOCK_PREFER_READER)) { 3425 if (state & URWLOCK_WRITE_WAITERS) { 3426 count = 1; 3427 q = UMTX_EXCLUSIVE_QUEUE; 3428 } else if (state & URWLOCK_READ_WAITERS) { 3429 count = INT_MAX; 3430 q = UMTX_SHARED_QUEUE; 3431 } 3432 } else { 3433 if (state & URWLOCK_READ_WAITERS) { 3434 count = INT_MAX; 3435 q = UMTX_SHARED_QUEUE; 3436 } else if (state & URWLOCK_WRITE_WAITERS) { 3437 count = 1; 3438 q = UMTX_EXCLUSIVE_QUEUE; 3439 } 3440 } 3441 3442 if (count) { 3443 umtxq_lock(&uq->uq_key); 3444 umtxq_busy(&uq->uq_key); 3445 umtxq_signal_queue(&uq->uq_key, count, q); 3446 umtxq_unbusy(&uq->uq_key); 3447 umtxq_unlock(&uq->uq_key); 3448 } 3449 out: 3450 umtx_key_release(&uq->uq_key); 3451 return (error); 3452 } 3453 3454 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3455 static int 3456 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3457 { 3458 struct umtx_abs_timeout timo; 3459 struct umtx_q *uq; 3460 uint32_t flags, count, count1; 3461 int error, rv, rv1; 3462 3463 uq = td->td_umtxq; 3464 error = fueword32(&sem->_flags, &flags); 3465 if (error == -1) 3466 return (EFAULT); 3467 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3468 if (error != 0) 3469 return (error); 3470 3471 if (timeout != NULL) 3472 umtx_abs_timeout_init2(&timo, timeout); 3473 3474 again: 3475 umtxq_lock(&uq->uq_key); 3476 umtxq_busy(&uq->uq_key); 3477 umtxq_insert(uq); 3478 umtxq_unlock(&uq->uq_key); 3479 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3480 if (rv == 0) 3481 rv1 = fueword32(&sem->_count, &count); 3482 if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || 3483 (rv == 1 && count1 == 0)) { 3484 umtxq_lock(&uq->uq_key); 3485 umtxq_unbusy(&uq->uq_key); 3486 umtxq_remove(uq); 3487 umtxq_unlock(&uq->uq_key); 3488 if (rv == 1) { 3489 rv = thread_check_susp(td, true); 3490 if (rv == 0) 3491 goto again; 3492 error = rv; 3493 goto out; 3494 } 3495 if (rv == 0) 3496 rv = rv1; 3497 error = rv == -1 ? EFAULT : 0; 3498 goto out; 3499 } 3500 umtxq_lock(&uq->uq_key); 3501 umtxq_unbusy(&uq->uq_key); 3502 3503 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3504 3505 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3506 error = 0; 3507 else { 3508 umtxq_remove(uq); 3509 /* A relative timeout cannot be restarted. */ 3510 if (error == ERESTART && timeout != NULL && 3511 (timeout->_flags & UMTX_ABSTIME) == 0) 3512 error = EINTR; 3513 } 3514 umtxq_unlock(&uq->uq_key); 3515 out: 3516 umtx_key_release(&uq->uq_key); 3517 return (error); 3518 } 3519 3520 /* 3521 * Signal a userland semaphore. 3522 */ 3523 static int 3524 do_sem_wake(struct thread *td, struct _usem *sem) 3525 { 3526 struct umtx_key key; 3527 int error, cnt; 3528 uint32_t flags; 3529 3530 error = fueword32(&sem->_flags, &flags); 3531 if (error == -1) 3532 return (EFAULT); 3533 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3534 return (error); 3535 umtxq_lock(&key); 3536 umtxq_busy(&key); 3537 cnt = umtxq_count(&key); 3538 if (cnt > 0) { 3539 /* 3540 * Check if count is greater than 0, this means the memory is 3541 * still being referenced by user code, so we can safely 3542 * update _has_waiters flag. 3543 */ 3544 if (cnt == 1) { 3545 umtxq_unlock(&key); 3546 error = suword32(&sem->_has_waiters, 0); 3547 umtxq_lock(&key); 3548 if (error == -1) 3549 error = EFAULT; 3550 } 3551 umtxq_signal(&key, 1); 3552 } 3553 umtxq_unbusy(&key); 3554 umtxq_unlock(&key); 3555 umtx_key_release(&key); 3556 return (error); 3557 } 3558 #endif 3559 3560 static int 3561 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3562 { 3563 struct umtx_abs_timeout timo; 3564 struct umtx_q *uq; 3565 uint32_t count, flags; 3566 int error, rv; 3567 3568 uq = td->td_umtxq; 3569 flags = fuword32(&sem->_flags); 3570 if (timeout != NULL) 3571 umtx_abs_timeout_init2(&timo, timeout); 3572 3573 again: 3574 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3575 if (error != 0) 3576 return (error); 3577 umtxq_lock(&uq->uq_key); 3578 umtxq_busy(&uq->uq_key); 3579 umtxq_insert(uq); 3580 umtxq_unlock(&uq->uq_key); 3581 rv = fueword32(&sem->_count, &count); 3582 if (rv == -1) { 3583 umtxq_lock(&uq->uq_key); 3584 umtxq_unbusy(&uq->uq_key); 3585 umtxq_remove(uq); 3586 umtxq_unlock(&uq->uq_key); 3587 umtx_key_release(&uq->uq_key); 3588 return (EFAULT); 3589 } 3590 for (;;) { 3591 if (USEM_COUNT(count) != 0) { 3592 umtxq_lock(&uq->uq_key); 3593 umtxq_unbusy(&uq->uq_key); 3594 umtxq_remove(uq); 3595 umtxq_unlock(&uq->uq_key); 3596 umtx_key_release(&uq->uq_key); 3597 return (0); 3598 } 3599 if (count == USEM_HAS_WAITERS) 3600 break; 3601 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3602 if (rv == 0) 3603 break; 3604 umtxq_lock(&uq->uq_key); 3605 umtxq_unbusy(&uq->uq_key); 3606 umtxq_remove(uq); 3607 umtxq_unlock(&uq->uq_key); 3608 umtx_key_release(&uq->uq_key); 3609 if (rv == -1) 3610 return (EFAULT); 3611 rv = thread_check_susp(td, true); 3612 if (rv != 0) 3613 return (rv); 3614 goto again; 3615 } 3616 umtxq_lock(&uq->uq_key); 3617 umtxq_unbusy(&uq->uq_key); 3618 3619 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3620 3621 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3622 error = 0; 3623 else { 3624 umtxq_remove(uq); 3625 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3626 /* A relative timeout cannot be restarted. */ 3627 if (error == ERESTART) 3628 error = EINTR; 3629 if (error == EINTR) { 3630 umtx_abs_timeout_update(&timo); 3631 timespecsub(&timo.end, &timo.cur, 3632 &timeout->_timeout); 3633 } 3634 } 3635 } 3636 umtxq_unlock(&uq->uq_key); 3637 umtx_key_release(&uq->uq_key); 3638 return (error); 3639 } 3640 3641 /* 3642 * Signal a userland semaphore. 3643 */ 3644 static int 3645 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3646 { 3647 struct umtx_key key; 3648 int error, cnt, rv; 3649 uint32_t count, flags; 3650 3651 rv = fueword32(&sem->_flags, &flags); 3652 if (rv == -1) 3653 return (EFAULT); 3654 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3655 return (error); 3656 umtxq_lock(&key); 3657 umtxq_busy(&key); 3658 cnt = umtxq_count(&key); 3659 if (cnt > 0) { 3660 /* 3661 * If this was the last sleeping thread, clear the waiters 3662 * flag in _count. 3663 */ 3664 if (cnt == 1) { 3665 umtxq_unlock(&key); 3666 rv = fueword32(&sem->_count, &count); 3667 while (rv != -1 && count & USEM_HAS_WAITERS) { 3668 rv = casueword32(&sem->_count, count, &count, 3669 count & ~USEM_HAS_WAITERS); 3670 if (rv == 1) { 3671 rv = thread_check_susp(td, true); 3672 if (rv != 0) 3673 break; 3674 } 3675 } 3676 if (rv == -1) 3677 error = EFAULT; 3678 else if (rv > 0) { 3679 error = rv; 3680 } 3681 umtxq_lock(&key); 3682 } 3683 3684 umtxq_signal(&key, 1); 3685 } 3686 umtxq_unbusy(&key); 3687 umtxq_unlock(&key); 3688 umtx_key_release(&key); 3689 return (error); 3690 } 3691 3692 #ifdef COMPAT_FREEBSD10 3693 int 3694 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap) 3695 { 3696 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0)); 3697 } 3698 3699 int 3700 freebsd10__umtx_unlock(struct thread *td, 3701 struct freebsd10__umtx_unlock_args *uap) 3702 { 3703 return (do_unlock_umtx(td, uap->umtx, td->td_tid)); 3704 } 3705 #endif 3706 3707 inline int 3708 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3709 { 3710 int error; 3711 3712 error = copyin(uaddr, tsp, sizeof(*tsp)); 3713 if (error == 0) { 3714 if (tsp->tv_sec < 0 || 3715 tsp->tv_nsec >= 1000000000 || 3716 tsp->tv_nsec < 0) 3717 error = EINVAL; 3718 } 3719 return (error); 3720 } 3721 3722 static inline int 3723 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3724 { 3725 int error; 3726 3727 if (size <= sizeof(tp->_timeout)) { 3728 tp->_clockid = CLOCK_REALTIME; 3729 tp->_flags = 0; 3730 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3731 } else 3732 error = copyin(uaddr, tp, sizeof(*tp)); 3733 if (error != 0) 3734 return (error); 3735 if (tp->_timeout.tv_sec < 0 || 3736 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3737 return (EINVAL); 3738 return (0); 3739 } 3740 3741 static int 3742 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3743 struct umtx_robust_lists_params *rb) 3744 { 3745 3746 if (size > sizeof(*rb)) 3747 return (EINVAL); 3748 return (copyin(uaddr, rb, size)); 3749 } 3750 3751 static int 3752 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3753 { 3754 3755 /* 3756 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3757 * and we're only called if sz >= sizeof(timespec) as supplied in the 3758 * copyops. 3759 */ 3760 KASSERT(sz >= sizeof(*tsp), 3761 ("umtx_copyops specifies incorrect sizes")); 3762 3763 return (copyout(tsp, uaddr, sizeof(*tsp))); 3764 } 3765 3766 #ifdef COMPAT_FREEBSD10 3767 static int 3768 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap, 3769 const struct umtx_copyops *ops) 3770 { 3771 struct timespec *ts, timeout; 3772 int error; 3773 3774 /* Allow a null timespec (wait forever). */ 3775 if (uap->uaddr2 == NULL) 3776 ts = NULL; 3777 else { 3778 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3779 if (error != 0) 3780 return (error); 3781 ts = &timeout; 3782 } 3783 #ifdef COMPAT_FREEBSD32 3784 if (ops->compat32) 3785 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3786 #endif 3787 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3788 } 3789 3790 static int 3791 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap, 3792 const struct umtx_copyops *ops) 3793 { 3794 #ifdef COMPAT_FREEBSD32 3795 if (ops->compat32) 3796 return (do_unlock_umtx32(td, uap->obj, uap->val)); 3797 #endif 3798 return (do_unlock_umtx(td, uap->obj, uap->val)); 3799 } 3800 #endif /* COMPAT_FREEBSD10 */ 3801 3802 #if !defined(COMPAT_FREEBSD10) 3803 static int 3804 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused, 3805 const struct umtx_copyops *ops __unused) 3806 { 3807 return (EOPNOTSUPP); 3808 } 3809 #endif /* COMPAT_FREEBSD10 */ 3810 3811 static int 3812 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3813 const struct umtx_copyops *ops) 3814 { 3815 struct _umtx_time timeout, *tm_p; 3816 int error; 3817 3818 if (uap->uaddr2 == NULL) 3819 tm_p = NULL; 3820 else { 3821 error = ops->copyin_umtx_time( 3822 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3823 if (error != 0) 3824 return (error); 3825 tm_p = &timeout; 3826 } 3827 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3828 } 3829 3830 static int 3831 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3832 const struct umtx_copyops *ops) 3833 { 3834 struct _umtx_time timeout, *tm_p; 3835 int error; 3836 3837 if (uap->uaddr2 == NULL) 3838 tm_p = NULL; 3839 else { 3840 error = ops->copyin_umtx_time( 3841 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3842 if (error != 0) 3843 return (error); 3844 tm_p = &timeout; 3845 } 3846 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3847 } 3848 3849 static int 3850 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3851 const struct umtx_copyops *ops) 3852 { 3853 struct _umtx_time *tm_p, timeout; 3854 int error; 3855 3856 if (uap->uaddr2 == NULL) 3857 tm_p = NULL; 3858 else { 3859 error = ops->copyin_umtx_time( 3860 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3861 if (error != 0) 3862 return (error); 3863 tm_p = &timeout; 3864 } 3865 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3866 } 3867 3868 static int 3869 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3870 const struct umtx_copyops *ops __unused) 3871 { 3872 3873 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3874 } 3875 3876 #define BATCH_SIZE 128 3877 static int 3878 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3879 { 3880 char *uaddrs[BATCH_SIZE], **upp; 3881 int count, error, i, pos, tocopy; 3882 3883 upp = (char **)uap->obj; 3884 error = 0; 3885 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3886 pos += tocopy) { 3887 tocopy = MIN(count, BATCH_SIZE); 3888 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3889 if (error != 0) 3890 break; 3891 for (i = 0; i < tocopy; ++i) { 3892 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3893 } 3894 maybe_yield(); 3895 } 3896 return (error); 3897 } 3898 3899 static int 3900 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3901 { 3902 uint32_t uaddrs[BATCH_SIZE], *upp; 3903 int count, error, i, pos, tocopy; 3904 3905 upp = (uint32_t *)uap->obj; 3906 error = 0; 3907 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3908 pos += tocopy) { 3909 tocopy = MIN(count, BATCH_SIZE); 3910 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 3911 if (error != 0) 3912 break; 3913 for (i = 0; i < tocopy; ++i) { 3914 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 3915 INT_MAX, 1); 3916 } 3917 maybe_yield(); 3918 } 3919 return (error); 3920 } 3921 3922 static int 3923 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 3924 const struct umtx_copyops *ops) 3925 { 3926 3927 if (ops->compat32) 3928 return (__umtx_op_nwake_private_compat32(td, uap)); 3929 return (__umtx_op_nwake_private_native(td, uap)); 3930 } 3931 3932 static int 3933 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 3934 const struct umtx_copyops *ops __unused) 3935 { 3936 3937 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3938 } 3939 3940 static int 3941 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 3942 const struct umtx_copyops *ops) 3943 { 3944 struct _umtx_time *tm_p, timeout; 3945 int error; 3946 3947 /* Allow a null timespec (wait forever). */ 3948 if (uap->uaddr2 == NULL) 3949 tm_p = NULL; 3950 else { 3951 error = ops->copyin_umtx_time( 3952 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3953 if (error != 0) 3954 return (error); 3955 tm_p = &timeout; 3956 } 3957 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3958 } 3959 3960 static int 3961 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 3962 const struct umtx_copyops *ops __unused) 3963 { 3964 3965 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3966 } 3967 3968 static int 3969 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 3970 const struct umtx_copyops *ops) 3971 { 3972 struct _umtx_time *tm_p, timeout; 3973 int error; 3974 3975 /* Allow a null timespec (wait forever). */ 3976 if (uap->uaddr2 == NULL) 3977 tm_p = NULL; 3978 else { 3979 error = ops->copyin_umtx_time( 3980 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3981 if (error != 0) 3982 return (error); 3983 tm_p = &timeout; 3984 } 3985 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3986 } 3987 3988 static int 3989 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 3990 const struct umtx_copyops *ops __unused) 3991 { 3992 3993 return (do_wake_umutex(td, uap->obj)); 3994 } 3995 3996 static int 3997 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 3998 const struct umtx_copyops *ops __unused) 3999 { 4000 4001 return (do_unlock_umutex(td, uap->obj, false)); 4002 } 4003 4004 static int 4005 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 4006 const struct umtx_copyops *ops __unused) 4007 { 4008 4009 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 4010 } 4011 4012 static int 4013 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 4014 const struct umtx_copyops *ops) 4015 { 4016 struct timespec *ts, timeout; 4017 int error; 4018 4019 /* Allow a null timespec (wait forever). */ 4020 if (uap->uaddr2 == NULL) 4021 ts = NULL; 4022 else { 4023 error = ops->copyin_timeout(uap->uaddr2, &timeout); 4024 if (error != 0) 4025 return (error); 4026 ts = &timeout; 4027 } 4028 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4029 } 4030 4031 static int 4032 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 4033 const struct umtx_copyops *ops __unused) 4034 { 4035 4036 return (do_cv_signal(td, uap->obj)); 4037 } 4038 4039 static int 4040 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 4041 const struct umtx_copyops *ops __unused) 4042 { 4043 4044 return (do_cv_broadcast(td, uap->obj)); 4045 } 4046 4047 static int 4048 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 4049 const struct umtx_copyops *ops) 4050 { 4051 struct _umtx_time timeout; 4052 int error; 4053 4054 /* Allow a null timespec (wait forever). */ 4055 if (uap->uaddr2 == NULL) { 4056 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4057 } else { 4058 error = ops->copyin_umtx_time(uap->uaddr2, 4059 (size_t)uap->uaddr1, &timeout); 4060 if (error != 0) 4061 return (error); 4062 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4063 } 4064 return (error); 4065 } 4066 4067 static int 4068 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 4069 const struct umtx_copyops *ops) 4070 { 4071 struct _umtx_time timeout; 4072 int error; 4073 4074 /* Allow a null timespec (wait forever). */ 4075 if (uap->uaddr2 == NULL) { 4076 error = do_rw_wrlock(td, uap->obj, 0); 4077 } else { 4078 error = ops->copyin_umtx_time(uap->uaddr2, 4079 (size_t)uap->uaddr1, &timeout); 4080 if (error != 0) 4081 return (error); 4082 4083 error = do_rw_wrlock(td, uap->obj, &timeout); 4084 } 4085 return (error); 4086 } 4087 4088 static int 4089 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 4090 const struct umtx_copyops *ops __unused) 4091 { 4092 4093 return (do_rw_unlock(td, uap->obj)); 4094 } 4095 4096 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4097 static int 4098 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 4099 const struct umtx_copyops *ops) 4100 { 4101 struct _umtx_time *tm_p, timeout; 4102 int error; 4103 4104 /* Allow a null timespec (wait forever). */ 4105 if (uap->uaddr2 == NULL) 4106 tm_p = NULL; 4107 else { 4108 error = ops->copyin_umtx_time( 4109 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4110 if (error != 0) 4111 return (error); 4112 tm_p = &timeout; 4113 } 4114 return (do_sem_wait(td, uap->obj, tm_p)); 4115 } 4116 4117 static int 4118 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 4119 const struct umtx_copyops *ops __unused) 4120 { 4121 4122 return (do_sem_wake(td, uap->obj)); 4123 } 4124 #endif 4125 4126 static int 4127 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 4128 const struct umtx_copyops *ops __unused) 4129 { 4130 4131 return (do_wake2_umutex(td, uap->obj, uap->val)); 4132 } 4133 4134 static int 4135 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 4136 const struct umtx_copyops *ops) 4137 { 4138 struct _umtx_time *tm_p, timeout; 4139 size_t uasize; 4140 int error; 4141 4142 /* Allow a null timespec (wait forever). */ 4143 if (uap->uaddr2 == NULL) { 4144 uasize = 0; 4145 tm_p = NULL; 4146 } else { 4147 uasize = (size_t)uap->uaddr1; 4148 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 4149 if (error != 0) 4150 return (error); 4151 tm_p = &timeout; 4152 } 4153 error = do_sem2_wait(td, uap->obj, tm_p); 4154 if (error == EINTR && uap->uaddr2 != NULL && 4155 (timeout._flags & UMTX_ABSTIME) == 0 && 4156 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 4157 error = ops->copyout_timeout( 4158 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 4159 uasize - ops->umtx_time_sz, &timeout._timeout); 4160 if (error == 0) { 4161 error = EINTR; 4162 } 4163 } 4164 4165 return (error); 4166 } 4167 4168 static int 4169 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 4170 const struct umtx_copyops *ops __unused) 4171 { 4172 4173 return (do_sem2_wake(td, uap->obj)); 4174 } 4175 4176 #define USHM_OBJ_UMTX(o) \ 4177 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 4178 4179 #define USHMF_REG_LINKED 0x0001 4180 #define USHMF_OBJ_LINKED 0x0002 4181 struct umtx_shm_reg { 4182 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 4183 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 4184 struct umtx_key ushm_key; 4185 struct ucred *ushm_cred; 4186 struct shmfd *ushm_obj; 4187 u_int ushm_refcnt; 4188 u_int ushm_flags; 4189 }; 4190 4191 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 4192 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 4193 4194 static uma_zone_t umtx_shm_reg_zone; 4195 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 4196 static struct mtx umtx_shm_lock; 4197 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 4198 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 4199 4200 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 4201 4202 static void 4203 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 4204 { 4205 struct umtx_shm_reg_head d; 4206 struct umtx_shm_reg *reg, *reg1; 4207 4208 TAILQ_INIT(&d); 4209 mtx_lock(&umtx_shm_lock); 4210 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 4211 mtx_unlock(&umtx_shm_lock); 4212 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 4213 TAILQ_REMOVE(&d, reg, ushm_reg_link); 4214 umtx_shm_free_reg(reg); 4215 } 4216 } 4217 4218 static struct task umtx_shm_reg_delfree_task = 4219 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 4220 4221 static struct umtx_shm_reg * 4222 umtx_shm_find_reg_locked(const struct umtx_key *key) 4223 { 4224 struct umtx_shm_reg *reg; 4225 struct umtx_shm_reg_head *reg_head; 4226 4227 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 4228 mtx_assert(&umtx_shm_lock, MA_OWNED); 4229 reg_head = &umtx_shm_registry[key->hash]; 4230 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 4231 KASSERT(reg->ushm_key.shared, 4232 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 4233 if (reg->ushm_key.info.shared.object == 4234 key->info.shared.object && 4235 reg->ushm_key.info.shared.offset == 4236 key->info.shared.offset) { 4237 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 4238 KASSERT(reg->ushm_refcnt > 0, 4239 ("reg %p refcnt 0 onlist", reg)); 4240 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 4241 ("reg %p not linked", reg)); 4242 reg->ushm_refcnt++; 4243 return (reg); 4244 } 4245 } 4246 return (NULL); 4247 } 4248 4249 static struct umtx_shm_reg * 4250 umtx_shm_find_reg(const struct umtx_key *key) 4251 { 4252 struct umtx_shm_reg *reg; 4253 4254 mtx_lock(&umtx_shm_lock); 4255 reg = umtx_shm_find_reg_locked(key); 4256 mtx_unlock(&umtx_shm_lock); 4257 return (reg); 4258 } 4259 4260 static void 4261 umtx_shm_free_reg(struct umtx_shm_reg *reg) 4262 { 4263 4264 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 4265 crfree(reg->ushm_cred); 4266 shm_drop(reg->ushm_obj); 4267 uma_zfree(umtx_shm_reg_zone, reg); 4268 } 4269 4270 static bool 4271 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 4272 { 4273 bool res; 4274 4275 mtx_assert(&umtx_shm_lock, MA_OWNED); 4276 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 4277 reg->ushm_refcnt--; 4278 res = reg->ushm_refcnt == 0; 4279 if (res || force) { 4280 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 4281 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 4282 reg, ushm_reg_link); 4283 reg->ushm_flags &= ~USHMF_REG_LINKED; 4284 } 4285 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 4286 LIST_REMOVE(reg, ushm_obj_link); 4287 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 4288 } 4289 } 4290 return (res); 4291 } 4292 4293 static void 4294 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 4295 { 4296 vm_object_t object; 4297 bool dofree; 4298 4299 if (force) { 4300 object = reg->ushm_obj->shm_object; 4301 VM_OBJECT_WLOCK(object); 4302 object->flags |= OBJ_UMTXDEAD; 4303 VM_OBJECT_WUNLOCK(object); 4304 } 4305 mtx_lock(&umtx_shm_lock); 4306 dofree = umtx_shm_unref_reg_locked(reg, force); 4307 mtx_unlock(&umtx_shm_lock); 4308 if (dofree) 4309 umtx_shm_free_reg(reg); 4310 } 4311 4312 void 4313 umtx_shm_object_init(vm_object_t object) 4314 { 4315 4316 LIST_INIT(USHM_OBJ_UMTX(object)); 4317 } 4318 4319 void 4320 umtx_shm_object_terminated(vm_object_t object) 4321 { 4322 struct umtx_shm_reg *reg, *reg1; 4323 bool dofree; 4324 4325 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 4326 return; 4327 4328 dofree = false; 4329 mtx_lock(&umtx_shm_lock); 4330 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 4331 if (umtx_shm_unref_reg_locked(reg, true)) { 4332 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 4333 ushm_reg_link); 4334 dofree = true; 4335 } 4336 } 4337 mtx_unlock(&umtx_shm_lock); 4338 if (dofree) 4339 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 4340 } 4341 4342 static int 4343 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4344 struct umtx_shm_reg **res) 4345 { 4346 struct umtx_shm_reg *reg, *reg1; 4347 struct ucred *cred; 4348 int error; 4349 4350 reg = umtx_shm_find_reg(key); 4351 if (reg != NULL) { 4352 *res = reg; 4353 return (0); 4354 } 4355 cred = td->td_ucred; 4356 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4357 return (ENOMEM); 4358 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4359 reg->ushm_refcnt = 1; 4360 bcopy(key, ®->ushm_key, sizeof(*key)); 4361 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); 4362 reg->ushm_cred = crhold(cred); 4363 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4364 if (error != 0) { 4365 umtx_shm_free_reg(reg); 4366 return (error); 4367 } 4368 mtx_lock(&umtx_shm_lock); 4369 reg1 = umtx_shm_find_reg_locked(key); 4370 if (reg1 != NULL) { 4371 mtx_unlock(&umtx_shm_lock); 4372 umtx_shm_free_reg(reg); 4373 *res = reg1; 4374 return (0); 4375 } 4376 reg->ushm_refcnt++; 4377 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4378 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4379 ushm_obj_link); 4380 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 4381 mtx_unlock(&umtx_shm_lock); 4382 *res = reg; 4383 return (0); 4384 } 4385 4386 static int 4387 umtx_shm_alive(struct thread *td, void *addr) 4388 { 4389 vm_map_t map; 4390 vm_map_entry_t entry; 4391 vm_object_t object; 4392 vm_pindex_t pindex; 4393 vm_prot_t prot; 4394 int res, ret; 4395 boolean_t wired; 4396 4397 map = &td->td_proc->p_vmspace->vm_map; 4398 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4399 &object, &pindex, &prot, &wired); 4400 if (res != KERN_SUCCESS) 4401 return (EFAULT); 4402 if (object == NULL) 4403 ret = EINVAL; 4404 else 4405 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4406 vm_map_lookup_done(map, entry); 4407 return (ret); 4408 } 4409 4410 static void 4411 umtx_shm_init(void) 4412 { 4413 int i; 4414 4415 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4416 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4417 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4418 for (i = 0; i < nitems(umtx_shm_registry); i++) 4419 TAILQ_INIT(&umtx_shm_registry[i]); 4420 } 4421 4422 static int 4423 umtx_shm(struct thread *td, void *addr, u_int flags) 4424 { 4425 struct umtx_key key; 4426 struct umtx_shm_reg *reg; 4427 struct file *fp; 4428 int error, fd; 4429 4430 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4431 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4432 return (EINVAL); 4433 if ((flags & UMTX_SHM_ALIVE) != 0) 4434 return (umtx_shm_alive(td, addr)); 4435 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4436 if (error != 0) 4437 return (error); 4438 KASSERT(key.shared == 1, ("non-shared key")); 4439 if ((flags & UMTX_SHM_CREAT) != 0) { 4440 error = umtx_shm_create_reg(td, &key, ®); 4441 } else { 4442 reg = umtx_shm_find_reg(&key); 4443 if (reg == NULL) 4444 error = ESRCH; 4445 } 4446 umtx_key_release(&key); 4447 if (error != 0) 4448 return (error); 4449 KASSERT(reg != NULL, ("no reg")); 4450 if ((flags & UMTX_SHM_DESTROY) != 0) { 4451 umtx_shm_unref_reg(reg, true); 4452 } else { 4453 #if 0 4454 #ifdef MAC 4455 error = mac_posixshm_check_open(td->td_ucred, 4456 reg->ushm_obj, FFLAGS(O_RDWR)); 4457 if (error == 0) 4458 #endif 4459 error = shm_access(reg->ushm_obj, td->td_ucred, 4460 FFLAGS(O_RDWR)); 4461 if (error == 0) 4462 #endif 4463 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4464 if (error == 0) { 4465 shm_hold(reg->ushm_obj); 4466 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4467 &shm_ops); 4468 td->td_retval[0] = fd; 4469 fdrop(fp, td); 4470 } 4471 } 4472 umtx_shm_unref_reg(reg, false); 4473 return (error); 4474 } 4475 4476 static int 4477 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4478 const struct umtx_copyops *ops __unused) 4479 { 4480 4481 return (umtx_shm(td, uap->uaddr1, uap->val)); 4482 } 4483 4484 static int 4485 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4486 const struct umtx_copyops *ops) 4487 { 4488 struct umtx_robust_lists_params rb; 4489 int error; 4490 4491 if (ops->compat32) { 4492 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4493 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4494 td->td_rb_inact != 0)) 4495 return (EBUSY); 4496 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4497 return (EBUSY); 4498 } 4499 4500 bzero(&rb, sizeof(rb)); 4501 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4502 if (error != 0) 4503 return (error); 4504 4505 if (ops->compat32) 4506 td->td_pflags2 |= TDP2_COMPAT32RB; 4507 4508 td->td_rb_list = rb.robust_list_offset; 4509 td->td_rbp_list = rb.robust_priv_list_offset; 4510 td->td_rb_inact = rb.robust_inact_offset; 4511 return (0); 4512 } 4513 4514 #if defined(__i386__) || defined(__amd64__) 4515 /* 4516 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4517 * 32-bit time_t there. Other architectures just need the i386 definitions 4518 * along with their standard compat32. 4519 */ 4520 struct timespecx32 { 4521 int64_t tv_sec; 4522 int32_t tv_nsec; 4523 }; 4524 4525 struct umtx_timex32 { 4526 struct timespecx32 _timeout; 4527 uint32_t _flags; 4528 uint32_t _clockid; 4529 }; 4530 4531 #ifndef __i386__ 4532 #define timespeci386 timespec32 4533 #define umtx_timei386 umtx_time32 4534 #endif 4535 #else /* !__i386__ && !__amd64__ */ 4536 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4537 struct timespeci386 { 4538 int32_t tv_sec; 4539 int32_t tv_nsec; 4540 }; 4541 4542 struct umtx_timei386 { 4543 struct timespeci386 _timeout; 4544 uint32_t _flags; 4545 uint32_t _clockid; 4546 }; 4547 4548 #if defined(__LP64__) 4549 #define timespecx32 timespec32 4550 #define umtx_timex32 umtx_time32 4551 #endif 4552 #endif 4553 4554 static int 4555 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4556 struct umtx_robust_lists_params *rbp) 4557 { 4558 struct umtx_robust_lists_params_compat32 rb32; 4559 int error; 4560 4561 if (size > sizeof(rb32)) 4562 return (EINVAL); 4563 bzero(&rb32, sizeof(rb32)); 4564 error = copyin(uaddr, &rb32, size); 4565 if (error != 0) 4566 return (error); 4567 CP(rb32, *rbp, robust_list_offset); 4568 CP(rb32, *rbp, robust_priv_list_offset); 4569 CP(rb32, *rbp, robust_inact_offset); 4570 return (0); 4571 } 4572 4573 #ifndef __i386__ 4574 static inline int 4575 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4576 { 4577 struct timespeci386 ts32; 4578 int error; 4579 4580 error = copyin(uaddr, &ts32, sizeof(ts32)); 4581 if (error == 0) { 4582 if (ts32.tv_sec < 0 || 4583 ts32.tv_nsec >= 1000000000 || 4584 ts32.tv_nsec < 0) 4585 error = EINVAL; 4586 else { 4587 CP(ts32, *tsp, tv_sec); 4588 CP(ts32, *tsp, tv_nsec); 4589 } 4590 } 4591 return (error); 4592 } 4593 4594 static inline int 4595 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4596 { 4597 struct umtx_timei386 t32; 4598 int error; 4599 4600 t32._clockid = CLOCK_REALTIME; 4601 t32._flags = 0; 4602 if (size <= sizeof(t32._timeout)) 4603 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4604 else 4605 error = copyin(uaddr, &t32, sizeof(t32)); 4606 if (error != 0) 4607 return (error); 4608 if (t32._timeout.tv_sec < 0 || 4609 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4610 return (EINVAL); 4611 TS_CP(t32, *tp, _timeout); 4612 CP(t32, *tp, _flags); 4613 CP(t32, *tp, _clockid); 4614 return (0); 4615 } 4616 4617 static int 4618 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4619 { 4620 struct timespeci386 remain32 = { 4621 .tv_sec = tsp->tv_sec, 4622 .tv_nsec = tsp->tv_nsec, 4623 }; 4624 4625 /* 4626 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4627 * and we're only called if sz >= sizeof(timespec) as supplied in the 4628 * copyops. 4629 */ 4630 KASSERT(sz >= sizeof(remain32), 4631 ("umtx_copyops specifies incorrect sizes")); 4632 4633 return (copyout(&remain32, uaddr, sizeof(remain32))); 4634 } 4635 #endif /* !__i386__ */ 4636 4637 #if defined(__i386__) || defined(__LP64__) 4638 static inline int 4639 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4640 { 4641 struct timespecx32 ts32; 4642 int error; 4643 4644 error = copyin(uaddr, &ts32, sizeof(ts32)); 4645 if (error == 0) { 4646 if (ts32.tv_sec < 0 || 4647 ts32.tv_nsec >= 1000000000 || 4648 ts32.tv_nsec < 0) 4649 error = EINVAL; 4650 else { 4651 CP(ts32, *tsp, tv_sec); 4652 CP(ts32, *tsp, tv_nsec); 4653 } 4654 } 4655 return (error); 4656 } 4657 4658 static inline int 4659 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4660 { 4661 struct umtx_timex32 t32; 4662 int error; 4663 4664 t32._clockid = CLOCK_REALTIME; 4665 t32._flags = 0; 4666 if (size <= sizeof(t32._timeout)) 4667 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4668 else 4669 error = copyin(uaddr, &t32, sizeof(t32)); 4670 if (error != 0) 4671 return (error); 4672 if (t32._timeout.tv_sec < 0 || 4673 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4674 return (EINVAL); 4675 TS_CP(t32, *tp, _timeout); 4676 CP(t32, *tp, _flags); 4677 CP(t32, *tp, _clockid); 4678 return (0); 4679 } 4680 4681 static int 4682 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4683 { 4684 struct timespecx32 remain32 = { 4685 .tv_sec = tsp->tv_sec, 4686 .tv_nsec = tsp->tv_nsec, 4687 }; 4688 4689 /* 4690 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4691 * and we're only called if sz >= sizeof(timespec) as supplied in the 4692 * copyops. 4693 */ 4694 KASSERT(sz >= sizeof(remain32), 4695 ("umtx_copyops specifies incorrect sizes")); 4696 4697 return (copyout(&remain32, uaddr, sizeof(remain32))); 4698 } 4699 #endif /* __i386__ || __LP64__ */ 4700 4701 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4702 const struct umtx_copyops *umtx_ops); 4703 4704 static const _umtx_op_func op_table[] = { 4705 #ifdef COMPAT_FREEBSD10 4706 [UMTX_OP_LOCK] = __umtx_op_lock_umtx, 4707 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx, 4708 #else 4709 [UMTX_OP_LOCK] = __umtx_op_unimpl, 4710 [UMTX_OP_UNLOCK] = __umtx_op_unimpl, 4711 #endif 4712 [UMTX_OP_WAIT] = __umtx_op_wait, 4713 [UMTX_OP_WAKE] = __umtx_op_wake, 4714 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4715 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4716 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4717 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4718 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4719 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4720 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4721 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4722 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4723 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4724 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4725 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4726 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4727 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4728 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4729 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4730 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4731 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4732 #else 4733 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4734 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4735 #endif 4736 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4737 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4738 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4739 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4740 [UMTX_OP_SHM] = __umtx_op_shm, 4741 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4742 }; 4743 4744 static const struct umtx_copyops umtx_native_ops = { 4745 .copyin_timeout = umtx_copyin_timeout, 4746 .copyin_umtx_time = umtx_copyin_umtx_time, 4747 .copyin_robust_lists = umtx_copyin_robust_lists, 4748 .copyout_timeout = umtx_copyout_timeout, 4749 .timespec_sz = sizeof(struct timespec), 4750 .umtx_time_sz = sizeof(struct _umtx_time), 4751 }; 4752 4753 #ifndef __i386__ 4754 static const struct umtx_copyops umtx_native_opsi386 = { 4755 .copyin_timeout = umtx_copyin_timeouti386, 4756 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4757 .copyin_robust_lists = umtx_copyin_robust_lists32, 4758 .copyout_timeout = umtx_copyout_timeouti386, 4759 .timespec_sz = sizeof(struct timespeci386), 4760 .umtx_time_sz = sizeof(struct umtx_timei386), 4761 .compat32 = true, 4762 }; 4763 #endif 4764 4765 #if defined(__i386__) || defined(__LP64__) 4766 /* i386 can emulate other 32-bit archs, too! */ 4767 static const struct umtx_copyops umtx_native_opsx32 = { 4768 .copyin_timeout = umtx_copyin_timeoutx32, 4769 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4770 .copyin_robust_lists = umtx_copyin_robust_lists32, 4771 .copyout_timeout = umtx_copyout_timeoutx32, 4772 .timespec_sz = sizeof(struct timespecx32), 4773 .umtx_time_sz = sizeof(struct umtx_timex32), 4774 .compat32 = true, 4775 }; 4776 4777 #ifdef COMPAT_FREEBSD32 4778 #ifdef __amd64__ 4779 #define umtx_native_ops32 umtx_native_opsi386 4780 #else 4781 #define umtx_native_ops32 umtx_native_opsx32 4782 #endif 4783 #endif /* COMPAT_FREEBSD32 */ 4784 #endif /* __i386__ || __LP64__ */ 4785 4786 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4787 4788 static int 4789 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4790 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4791 { 4792 struct _umtx_op_args uap = { 4793 .obj = obj, 4794 .op = op & ~UMTX_OP__FLAGS, 4795 .val = val, 4796 .uaddr1 = uaddr1, 4797 .uaddr2 = uaddr2 4798 }; 4799 4800 if ((uap.op >= nitems(op_table))) 4801 return (EINVAL); 4802 return ((*op_table[uap.op])(td, &uap, ops)); 4803 } 4804 4805 int 4806 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4807 { 4808 static const struct umtx_copyops *umtx_ops; 4809 4810 umtx_ops = &umtx_native_ops; 4811 #ifdef __LP64__ 4812 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 4813 if ((uap->op & UMTX_OP__I386) != 0) 4814 umtx_ops = &umtx_native_opsi386; 4815 else 4816 umtx_ops = &umtx_native_opsx32; 4817 } 4818 #elif !defined(__i386__) 4819 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 4820 if ((uap->op & UMTX_OP__I386) != 0) 4821 umtx_ops = &umtx_native_opsi386; 4822 #else 4823 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 4824 if ((uap->op & UMTX_OP__32BIT) != 0) 4825 umtx_ops = &umtx_native_opsx32; 4826 #endif 4827 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4828 uap->uaddr2, umtx_ops)); 4829 } 4830 4831 #ifdef COMPAT_FREEBSD32 4832 #ifdef COMPAT_FREEBSD10 4833 int 4834 freebsd10_freebsd32_umtx_lock(struct thread *td, 4835 struct freebsd10_freebsd32_umtx_lock_args *uap) 4836 { 4837 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 4838 } 4839 4840 int 4841 freebsd10_freebsd32_umtx_unlock(struct thread *td, 4842 struct freebsd10_freebsd32_umtx_unlock_args *uap) 4843 { 4844 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 4845 } 4846 #endif /* COMPAT_FREEBSD10 */ 4847 4848 int 4849 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4850 { 4851 4852 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr, 4853 uap->uaddr2, &umtx_native_ops32)); 4854 } 4855 #endif /* COMPAT_FREEBSD32 */ 4856 4857 void 4858 umtx_thread_init(struct thread *td) 4859 { 4860 4861 td->td_umtxq = umtxq_alloc(); 4862 td->td_umtxq->uq_thread = td; 4863 } 4864 4865 void 4866 umtx_thread_fini(struct thread *td) 4867 { 4868 4869 umtxq_free(td->td_umtxq); 4870 } 4871 4872 /* 4873 * It will be called when new thread is created, e.g fork(). 4874 */ 4875 void 4876 umtx_thread_alloc(struct thread *td) 4877 { 4878 struct umtx_q *uq; 4879 4880 uq = td->td_umtxq; 4881 uq->uq_inherited_pri = PRI_MAX; 4882 4883 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4884 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4885 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4886 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4887 } 4888 4889 /* 4890 * exec() hook. 4891 * 4892 * Clear robust lists for all process' threads, not delaying the 4893 * cleanup to thread exit, since the relevant address space is 4894 * destroyed right now. 4895 */ 4896 void 4897 umtx_exec(struct proc *p) 4898 { 4899 struct thread *td; 4900 4901 KASSERT(p == curproc, ("need curproc")); 4902 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4903 (p->p_flag & P_STOPPED_SINGLE) != 0, 4904 ("curproc must be single-threaded")); 4905 /* 4906 * There is no need to lock the list as only this thread can be 4907 * running. 4908 */ 4909 FOREACH_THREAD_IN_PROC(p, td) { 4910 KASSERT(td == curthread || 4911 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4912 ("running thread %p %p", p, td)); 4913 umtx_thread_cleanup(td); 4914 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4915 } 4916 } 4917 4918 /* 4919 * thread exit hook. 4920 */ 4921 void 4922 umtx_thread_exit(struct thread *td) 4923 { 4924 4925 umtx_thread_cleanup(td); 4926 } 4927 4928 static int 4929 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 4930 { 4931 u_long res1; 4932 uint32_t res32; 4933 int error; 4934 4935 if (compat32) { 4936 error = fueword32((void *)ptr, &res32); 4937 if (error == 0) 4938 res1 = res32; 4939 } else { 4940 error = fueword((void *)ptr, &res1); 4941 } 4942 if (error == 0) 4943 *res = res1; 4944 else 4945 error = EFAULT; 4946 return (error); 4947 } 4948 4949 static void 4950 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 4951 bool compat32) 4952 { 4953 struct umutex32 m32; 4954 4955 if (compat32) { 4956 memcpy(&m32, m, sizeof(m32)); 4957 *rb_list = m32.m_rb_lnk; 4958 } else { 4959 *rb_list = m->m_rb_lnk; 4960 } 4961 } 4962 4963 static int 4964 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 4965 bool compat32) 4966 { 4967 struct umutex m; 4968 int error; 4969 4970 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4971 error = copyin((void *)rbp, &m, sizeof(m)); 4972 if (error != 0) 4973 return (error); 4974 if (rb_list != NULL) 4975 umtx_read_rb_list(td, &m, rb_list, compat32); 4976 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4977 return (EINVAL); 4978 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4979 /* inact is cleared after unlock, allow the inconsistency */ 4980 return (inact ? 0 : EINVAL); 4981 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4982 } 4983 4984 static void 4985 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4986 const char *name, bool compat32) 4987 { 4988 int error, i; 4989 uintptr_t rbp; 4990 bool inact; 4991 4992 if (rb_list == 0) 4993 return; 4994 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 4995 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4996 if (rbp == *rb_inact) { 4997 inact = true; 4998 *rb_inact = 0; 4999 } else 5000 inact = false; 5001 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 5002 } 5003 if (i == umtx_max_rb && umtx_verbose_rb) { 5004 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 5005 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 5006 } 5007 if (error != 0 && umtx_verbose_rb) { 5008 uprintf("comm %s pid %d: handling %srb error %d\n", 5009 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 5010 } 5011 } 5012 5013 /* 5014 * Clean up umtx data. 5015 */ 5016 static void 5017 umtx_thread_cleanup(struct thread *td) 5018 { 5019 struct umtx_q *uq; 5020 struct umtx_pi *pi; 5021 uintptr_t rb_inact; 5022 bool compat32; 5023 5024 /* 5025 * Disown pi mutexes. 5026 */ 5027 uq = td->td_umtxq; 5028 if (uq != NULL) { 5029 if (uq->uq_inherited_pri != PRI_MAX || 5030 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 5031 mtx_lock(&umtx_lock); 5032 uq->uq_inherited_pri = PRI_MAX; 5033 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 5034 pi->pi_owner = NULL; 5035 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 5036 } 5037 mtx_unlock(&umtx_lock); 5038 } 5039 sched_lend_user_prio_cond(td, PRI_MAX); 5040 } 5041 5042 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 5043 td->td_pflags2 &= ~TDP2_COMPAT32RB; 5044 5045 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 5046 return; 5047 5048 /* 5049 * Handle terminated robust mutexes. Must be done after 5050 * robust pi disown, otherwise unlock could see unowned 5051 * entries. 5052 */ 5053 rb_inact = td->td_rb_inact; 5054 if (rb_inact != 0) 5055 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 5056 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 5057 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 5058 if (rb_inact != 0) 5059 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 5060 } 5061