1 /*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_compat.h" 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/limits.h> 35 #include <sys/lock.h> 36 #include <sys/malloc.h> 37 #include <sys/mutex.h> 38 #include <sys/priv.h> 39 #include <sys/proc.h> 40 #include <sys/sched.h> 41 #include <sys/smp.h> 42 #include <sys/sysctl.h> 43 #include <sys/sysent.h> 44 #include <sys/systm.h> 45 #include <sys/sysproto.h> 46 #include <sys/eventhandler.h> 47 #include <sys/umtx.h> 48 49 #include <vm/vm.h> 50 #include <vm/vm_param.h> 51 #include <vm/pmap.h> 52 #include <vm/vm_map.h> 53 #include <vm/vm_object.h> 54 55 #include <machine/cpu.h> 56 57 #ifdef COMPAT_IA32 58 #include <compat/freebsd32/freebsd32_proto.h> 59 #endif 60 61 #define TYPE_SIMPLE_WAIT 0 62 #define TYPE_CV 1 63 #define TYPE_SIMPLE_LOCK 2 64 #define TYPE_NORMAL_UMUTEX 3 65 #define TYPE_PI_UMUTEX 4 66 #define TYPE_PP_UMUTEX 5 67 #define TYPE_RWLOCK 6 68 69 #define _UMUTEX_TRY 1 70 #define _UMUTEX_WAIT 2 71 72 /* Key to represent a unique userland synchronous object */ 73 struct umtx_key { 74 int hash; 75 int type; 76 int shared; 77 union { 78 struct { 79 vm_object_t object; 80 uintptr_t offset; 81 } shared; 82 struct { 83 struct vmspace *vs; 84 uintptr_t addr; 85 } private; 86 struct { 87 void *a; 88 uintptr_t b; 89 } both; 90 } info; 91 }; 92 93 /* Priority inheritance mutex info. */ 94 struct umtx_pi { 95 /* Owner thread */ 96 struct thread *pi_owner; 97 98 /* Reference count */ 99 int pi_refcount; 100 101 /* List entry to link umtx holding by thread */ 102 TAILQ_ENTRY(umtx_pi) pi_link; 103 104 /* List entry in hash */ 105 TAILQ_ENTRY(umtx_pi) pi_hashlink; 106 107 /* List for waiters */ 108 TAILQ_HEAD(,umtx_q) pi_blocked; 109 110 /* Identify a userland lock object */ 111 struct umtx_key pi_key; 112 }; 113 114 /* A userland synchronous object user. */ 115 struct umtx_q { 116 /* Linked list for the hash. */ 117 TAILQ_ENTRY(umtx_q) uq_link; 118 119 /* Umtx key. */ 120 struct umtx_key uq_key; 121 122 /* Umtx flags. */ 123 int uq_flags; 124 #define UQF_UMTXQ 0x0001 125 126 /* The thread waits on. */ 127 struct thread *uq_thread; 128 129 /* 130 * Blocked on PI mutex. read can use chain lock 131 * or umtx_lock, write must have both chain lock and 132 * umtx_lock being hold. 133 */ 134 struct umtx_pi *uq_pi_blocked; 135 136 /* On blocked list */ 137 TAILQ_ENTRY(umtx_q) uq_lockq; 138 139 /* Thread contending with us */ 140 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 141 142 /* Inherited priority from PP mutex */ 143 u_char uq_inherited_pri; 144 }; 145 146 TAILQ_HEAD(umtxq_head, umtx_q); 147 148 /* Userland lock object's wait-queue chain */ 149 struct umtxq_chain { 150 /* Lock for this chain. */ 151 struct mtx uc_lock; 152 153 /* List of sleep queues. */ 154 struct umtxq_head uc_queue[2]; 155 #define UMTX_SHARED_QUEUE 0 156 #define UMTX_EXCLUSIVE_QUEUE 1 157 158 /* Busy flag */ 159 char uc_busy; 160 161 /* Chain lock waiters */ 162 int uc_waiters; 163 164 /* All PI in the list */ 165 TAILQ_HEAD(,umtx_pi) uc_pi_list; 166 }; 167 168 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 169 170 /* 171 * Don't propagate time-sharing priority, there is a security reason, 172 * a user can simply introduce PI-mutex, let thread A lock the mutex, 173 * and let another thread B block on the mutex, because B is 174 * sleeping, its priority will be boosted, this causes A's priority to 175 * be boosted via priority propagating too and will never be lowered even 176 * if it is using 100%CPU, this is unfair to other processes. 177 */ 178 179 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 180 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 181 PRI_MAX_TIMESHARE : (td)->td_user_pri) 182 183 #define GOLDEN_RATIO_PRIME 2654404609U 184 #define UMTX_CHAINS 128 185 #define UMTX_SHIFTS (__WORD_BIT - 7) 186 187 #define THREAD_SHARE 0 188 #define PROCESS_SHARE 1 189 #define AUTO_SHARE 2 190 191 #define GET_SHARE(flags) \ 192 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 193 194 #define BUSY_SPINS 200 195 196 static uma_zone_t umtx_pi_zone; 197 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 198 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 199 static int umtx_pi_allocated; 200 201 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 202 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 203 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 204 205 static void umtxq_sysinit(void *); 206 static void umtxq_hash(struct umtx_key *key); 207 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 208 static void umtxq_lock(struct umtx_key *key); 209 static void umtxq_unlock(struct umtx_key *key); 210 static void umtxq_busy(struct umtx_key *key); 211 static void umtxq_unbusy(struct umtx_key *key); 212 static void umtxq_insert_queue(struct umtx_q *uq, int q); 213 static void umtxq_remove_queue(struct umtx_q *uq, int q); 214 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo); 215 static int umtxq_count(struct umtx_key *key); 216 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); 217 static int umtx_key_get(void *addr, int type, int share, 218 struct umtx_key *key); 219 static void umtx_key_release(struct umtx_key *key); 220 static struct umtx_pi *umtx_pi_alloc(int); 221 static void umtx_pi_free(struct umtx_pi *pi); 222 static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri); 223 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 224 static void umtx_thread_cleanup(struct thread *td); 225 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 226 struct image_params *imgp __unused); 227 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 228 229 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 230 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 231 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 232 233 static struct mtx umtx_lock; 234 235 static void 236 umtxq_sysinit(void *arg __unused) 237 { 238 int i, j; 239 240 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 241 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 242 for (i = 0; i < 2; ++i) { 243 for (j = 0; j < UMTX_CHAINS; ++j) { 244 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 245 MTX_DEF | MTX_DUPOK); 246 TAILQ_INIT(&umtxq_chains[i][j].uc_queue[0]); 247 TAILQ_INIT(&umtxq_chains[i][j].uc_queue[1]); 248 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 249 umtxq_chains[i][j].uc_busy = 0; 250 umtxq_chains[i][j].uc_waiters = 0; 251 } 252 } 253 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN); 254 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 255 EVENTHANDLER_PRI_ANY); 256 } 257 258 struct umtx_q * 259 umtxq_alloc(void) 260 { 261 struct umtx_q *uq; 262 263 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 264 TAILQ_INIT(&uq->uq_pi_contested); 265 uq->uq_inherited_pri = PRI_MAX; 266 return (uq); 267 } 268 269 void 270 umtxq_free(struct umtx_q *uq) 271 { 272 free(uq, M_UMTX); 273 } 274 275 static inline void 276 umtxq_hash(struct umtx_key *key) 277 { 278 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 279 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 280 } 281 282 static inline int 283 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) 284 { 285 return (k1->type == k2->type && 286 k1->info.both.a == k2->info.both.a && 287 k1->info.both.b == k2->info.both.b); 288 } 289 290 static inline struct umtxq_chain * 291 umtxq_getchain(struct umtx_key *key) 292 { 293 if (key->type <= TYPE_CV) 294 return (&umtxq_chains[1][key->hash]); 295 return (&umtxq_chains[0][key->hash]); 296 } 297 298 /* 299 * Lock a chain. 300 */ 301 static inline void 302 umtxq_lock(struct umtx_key *key) 303 { 304 struct umtxq_chain *uc; 305 306 uc = umtxq_getchain(key); 307 mtx_lock(&uc->uc_lock); 308 } 309 310 /* 311 * Unlock a chain. 312 */ 313 static inline void 314 umtxq_unlock(struct umtx_key *key) 315 { 316 struct umtxq_chain *uc; 317 318 uc = umtxq_getchain(key); 319 mtx_unlock(&uc->uc_lock); 320 } 321 322 /* 323 * Set chain to busy state when following operation 324 * may be blocked (kernel mutex can not be used). 325 */ 326 static inline void 327 umtxq_busy(struct umtx_key *key) 328 { 329 struct umtxq_chain *uc; 330 331 uc = umtxq_getchain(key); 332 mtx_assert(&uc->uc_lock, MA_OWNED); 333 if (uc->uc_busy) { 334 #ifdef SMP 335 if (smp_cpus > 1) { 336 int count = BUSY_SPINS; 337 if (count > 0) { 338 umtxq_unlock(key); 339 while (uc->uc_busy && --count > 0) 340 cpu_spinwait(); 341 umtxq_lock(key); 342 } 343 } 344 #endif 345 while (uc->uc_busy) { 346 uc->uc_waiters++; 347 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 348 uc->uc_waiters--; 349 } 350 } 351 uc->uc_busy = 1; 352 } 353 354 /* 355 * Unbusy a chain. 356 */ 357 static inline void 358 umtxq_unbusy(struct umtx_key *key) 359 { 360 struct umtxq_chain *uc; 361 362 uc = umtxq_getchain(key); 363 mtx_assert(&uc->uc_lock, MA_OWNED); 364 KASSERT(uc->uc_busy != 0, ("not busy")); 365 uc->uc_busy = 0; 366 if (uc->uc_waiters) 367 wakeup_one(uc); 368 } 369 370 static inline void 371 umtxq_insert_queue(struct umtx_q *uq, int q) 372 { 373 struct umtxq_chain *uc; 374 375 uc = umtxq_getchain(&uq->uq_key); 376 UMTXQ_LOCKED_ASSERT(uc); 377 TAILQ_INSERT_TAIL(&uc->uc_queue[q], uq, uq_link); 378 uq->uq_flags |= UQF_UMTXQ; 379 } 380 381 static inline void 382 umtxq_remove_queue(struct umtx_q *uq, int q) 383 { 384 struct umtxq_chain *uc; 385 386 uc = umtxq_getchain(&uq->uq_key); 387 UMTXQ_LOCKED_ASSERT(uc); 388 if (uq->uq_flags & UQF_UMTXQ) { 389 TAILQ_REMOVE(&uc->uc_queue[q], uq, uq_link); 390 uq->uq_flags &= ~UQF_UMTXQ; 391 } 392 } 393 394 /* 395 * Check if there are multiple waiters 396 */ 397 static int 398 umtxq_count(struct umtx_key *key) 399 { 400 struct umtxq_chain *uc; 401 struct umtx_q *uq; 402 int count = 0; 403 404 uc = umtxq_getchain(key); 405 UMTXQ_LOCKED_ASSERT(uc); 406 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) { 407 if (umtx_key_match(&uq->uq_key, key)) { 408 if (++count > 1) 409 break; 410 } 411 } 412 return (count); 413 } 414 415 /* 416 * Check if there are multiple PI waiters and returns first 417 * waiter. 418 */ 419 static int 420 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 421 { 422 struct umtxq_chain *uc; 423 struct umtx_q *uq; 424 int count = 0; 425 426 *first = NULL; 427 uc = umtxq_getchain(key); 428 UMTXQ_LOCKED_ASSERT(uc); 429 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) { 430 if (umtx_key_match(&uq->uq_key, key)) { 431 if (++count > 1) 432 break; 433 *first = uq; 434 } 435 } 436 return (count); 437 } 438 439 /* 440 * Wake up threads waiting on an userland object. 441 */ 442 443 static int 444 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 445 { 446 struct umtxq_chain *uc; 447 struct umtx_q *uq, *next; 448 int ret; 449 450 ret = 0; 451 uc = umtxq_getchain(key); 452 UMTXQ_LOCKED_ASSERT(uc); 453 TAILQ_FOREACH_SAFE(uq, &uc->uc_queue[q], uq_link, next) { 454 if (umtx_key_match(&uq->uq_key, key)) { 455 umtxq_remove_queue(uq, q); 456 wakeup(uq); 457 if (++ret >= n_wake) 458 break; 459 } 460 } 461 return (ret); 462 } 463 464 465 /* 466 * Wake up specified thread. 467 */ 468 static inline void 469 umtxq_signal_thread(struct umtx_q *uq) 470 { 471 struct umtxq_chain *uc; 472 473 uc = umtxq_getchain(&uq->uq_key); 474 UMTXQ_LOCKED_ASSERT(uc); 475 umtxq_remove(uq); 476 wakeup(uq); 477 } 478 479 /* 480 * Put thread into sleep state, before sleeping, check if 481 * thread was removed from umtx queue. 482 */ 483 static inline int 484 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo) 485 { 486 struct umtxq_chain *uc; 487 int error; 488 489 uc = umtxq_getchain(&uq->uq_key); 490 UMTXQ_LOCKED_ASSERT(uc); 491 if (!(uq->uq_flags & UQF_UMTXQ)) 492 return (0); 493 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 494 if (error == EWOULDBLOCK) 495 error = ETIMEDOUT; 496 return (error); 497 } 498 499 /* 500 * Convert userspace address into unique logical address. 501 */ 502 static int 503 umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 504 { 505 struct thread *td = curthread; 506 vm_map_t map; 507 vm_map_entry_t entry; 508 vm_pindex_t pindex; 509 vm_prot_t prot; 510 boolean_t wired; 511 512 key->type = type; 513 if (share == THREAD_SHARE) { 514 key->shared = 0; 515 key->info.private.vs = td->td_proc->p_vmspace; 516 key->info.private.addr = (uintptr_t)addr; 517 } else { 518 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 519 map = &td->td_proc->p_vmspace->vm_map; 520 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 521 &entry, &key->info.shared.object, &pindex, &prot, 522 &wired) != KERN_SUCCESS) { 523 return EFAULT; 524 } 525 526 if ((share == PROCESS_SHARE) || 527 (share == AUTO_SHARE && 528 VM_INHERIT_SHARE == entry->inheritance)) { 529 key->shared = 1; 530 key->info.shared.offset = entry->offset + entry->start - 531 (vm_offset_t)addr; 532 vm_object_reference(key->info.shared.object); 533 } else { 534 key->shared = 0; 535 key->info.private.vs = td->td_proc->p_vmspace; 536 key->info.private.addr = (uintptr_t)addr; 537 } 538 vm_map_lookup_done(map, entry); 539 } 540 541 umtxq_hash(key); 542 return (0); 543 } 544 545 /* 546 * Release key. 547 */ 548 static inline void 549 umtx_key_release(struct umtx_key *key) 550 { 551 if (key->shared) 552 vm_object_deallocate(key->info.shared.object); 553 } 554 555 /* 556 * Lock a umtx object. 557 */ 558 static int 559 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo) 560 { 561 struct umtx_q *uq; 562 u_long owner; 563 u_long old; 564 int error = 0; 565 566 uq = td->td_umtxq; 567 568 /* 569 * Care must be exercised when dealing with umtx structure. It 570 * can fault on any access. 571 */ 572 for (;;) { 573 /* 574 * Try the uncontested case. This should be done in userland. 575 */ 576 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 577 578 /* The acquire succeeded. */ 579 if (owner == UMTX_UNOWNED) 580 return (0); 581 582 /* The address was invalid. */ 583 if (owner == -1) 584 return (EFAULT); 585 586 /* If no one owns it but it is contested try to acquire it. */ 587 if (owner == UMTX_CONTESTED) { 588 owner = casuword(&umtx->u_owner, 589 UMTX_CONTESTED, id | UMTX_CONTESTED); 590 591 if (owner == UMTX_CONTESTED) 592 return (0); 593 594 /* The address was invalid. */ 595 if (owner == -1) 596 return (EFAULT); 597 598 /* If this failed the lock has changed, restart. */ 599 continue; 600 } 601 602 /* 603 * If we caught a signal, we have retried and now 604 * exit immediately. 605 */ 606 if (error != 0) 607 return (error); 608 609 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 610 AUTO_SHARE, &uq->uq_key)) != 0) 611 return (error); 612 613 umtxq_lock(&uq->uq_key); 614 umtxq_busy(&uq->uq_key); 615 umtxq_insert(uq); 616 umtxq_unbusy(&uq->uq_key); 617 umtxq_unlock(&uq->uq_key); 618 619 /* 620 * Set the contested bit so that a release in user space 621 * knows to use the system call for unlock. If this fails 622 * either some one else has acquired the lock or it has been 623 * released. 624 */ 625 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 626 627 /* The address was invalid. */ 628 if (old == -1) { 629 umtxq_lock(&uq->uq_key); 630 umtxq_remove(uq); 631 umtxq_unlock(&uq->uq_key); 632 umtx_key_release(&uq->uq_key); 633 return (EFAULT); 634 } 635 636 /* 637 * We set the contested bit, sleep. Otherwise the lock changed 638 * and we need to retry or we lost a race to the thread 639 * unlocking the umtx. 640 */ 641 umtxq_lock(&uq->uq_key); 642 if (old == owner) 643 error = umtxq_sleep(uq, "umtx", timo); 644 umtxq_remove(uq); 645 umtxq_unlock(&uq->uq_key); 646 umtx_key_release(&uq->uq_key); 647 } 648 649 return (0); 650 } 651 652 /* 653 * Lock a umtx object. 654 */ 655 static int 656 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 657 struct timespec *timeout) 658 { 659 struct timespec ts, ts2, ts3; 660 struct timeval tv; 661 int error; 662 663 if (timeout == NULL) { 664 error = _do_lock_umtx(td, umtx, id, 0); 665 /* Mutex locking is restarted if it is interrupted. */ 666 if (error == EINTR) 667 error = ERESTART; 668 } else { 669 getnanouptime(&ts); 670 timespecadd(&ts, timeout); 671 TIMESPEC_TO_TIMEVAL(&tv, timeout); 672 for (;;) { 673 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv)); 674 if (error != ETIMEDOUT) 675 break; 676 getnanouptime(&ts2); 677 if (timespeccmp(&ts2, &ts, >=)) { 678 error = ETIMEDOUT; 679 break; 680 } 681 ts3 = ts; 682 timespecsub(&ts3, &ts2); 683 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 684 } 685 /* Timed-locking is not restarted. */ 686 if (error == ERESTART) 687 error = EINTR; 688 } 689 return (error); 690 } 691 692 /* 693 * Unlock a umtx object. 694 */ 695 static int 696 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 697 { 698 struct umtx_key key; 699 u_long owner; 700 u_long old; 701 int error; 702 int count; 703 704 /* 705 * Make sure we own this mtx. 706 */ 707 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 708 if (owner == -1) 709 return (EFAULT); 710 711 if ((owner & ~UMTX_CONTESTED) != id) 712 return (EPERM); 713 714 /* This should be done in userland */ 715 if ((owner & UMTX_CONTESTED) == 0) { 716 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 717 if (old == -1) 718 return (EFAULT); 719 if (old == owner) 720 return (0); 721 owner = old; 722 } 723 724 /* We should only ever be in here for contested locks */ 725 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 726 &key)) != 0) 727 return (error); 728 729 umtxq_lock(&key); 730 umtxq_busy(&key); 731 count = umtxq_count(&key); 732 umtxq_unlock(&key); 733 734 /* 735 * When unlocking the umtx, it must be marked as unowned if 736 * there is zero or one thread only waiting for it. 737 * Otherwise, it must be marked as contested. 738 */ 739 old = casuword(&umtx->u_owner, owner, 740 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 741 umtxq_lock(&key); 742 umtxq_signal(&key,1); 743 umtxq_unbusy(&key); 744 umtxq_unlock(&key); 745 umtx_key_release(&key); 746 if (old == -1) 747 return (EFAULT); 748 if (old != owner) 749 return (EINVAL); 750 return (0); 751 } 752 753 #ifdef COMPAT_IA32 754 755 /* 756 * Lock a umtx object. 757 */ 758 static int 759 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo) 760 { 761 struct umtx_q *uq; 762 uint32_t owner; 763 uint32_t old; 764 int error = 0; 765 766 uq = td->td_umtxq; 767 768 /* 769 * Care must be exercised when dealing with umtx structure. It 770 * can fault on any access. 771 */ 772 for (;;) { 773 /* 774 * Try the uncontested case. This should be done in userland. 775 */ 776 owner = casuword32(m, UMUTEX_UNOWNED, id); 777 778 /* The acquire succeeded. */ 779 if (owner == UMUTEX_UNOWNED) 780 return (0); 781 782 /* The address was invalid. */ 783 if (owner == -1) 784 return (EFAULT); 785 786 /* If no one owns it but it is contested try to acquire it. */ 787 if (owner == UMUTEX_CONTESTED) { 788 owner = casuword32(m, 789 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 790 if (owner == UMUTEX_CONTESTED) 791 return (0); 792 793 /* The address was invalid. */ 794 if (owner == -1) 795 return (EFAULT); 796 797 /* If this failed the lock has changed, restart. */ 798 continue; 799 } 800 801 /* 802 * If we caught a signal, we have retried and now 803 * exit immediately. 804 */ 805 if (error != 0) 806 return (error); 807 808 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 809 AUTO_SHARE, &uq->uq_key)) != 0) 810 return (error); 811 812 umtxq_lock(&uq->uq_key); 813 umtxq_busy(&uq->uq_key); 814 umtxq_insert(uq); 815 umtxq_unbusy(&uq->uq_key); 816 umtxq_unlock(&uq->uq_key); 817 818 /* 819 * Set the contested bit so that a release in user space 820 * knows to use the system call for unlock. If this fails 821 * either some one else has acquired the lock or it has been 822 * released. 823 */ 824 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 825 826 /* The address was invalid. */ 827 if (old == -1) { 828 umtxq_lock(&uq->uq_key); 829 umtxq_remove(uq); 830 umtxq_unlock(&uq->uq_key); 831 umtx_key_release(&uq->uq_key); 832 return (EFAULT); 833 } 834 835 /* 836 * We set the contested bit, sleep. Otherwise the lock changed 837 * and we need to retry or we lost a race to the thread 838 * unlocking the umtx. 839 */ 840 umtxq_lock(&uq->uq_key); 841 if (old == owner) 842 error = umtxq_sleep(uq, "umtx", timo); 843 umtxq_remove(uq); 844 umtxq_unlock(&uq->uq_key); 845 umtx_key_release(&uq->uq_key); 846 } 847 848 return (0); 849 } 850 851 /* 852 * Lock a umtx object. 853 */ 854 static int 855 do_lock_umtx32(struct thread *td, void *m, uint32_t id, 856 struct timespec *timeout) 857 { 858 struct timespec ts, ts2, ts3; 859 struct timeval tv; 860 int error; 861 862 if (timeout == NULL) { 863 error = _do_lock_umtx32(td, m, id, 0); 864 /* Mutex locking is restarted if it is interrupted. */ 865 if (error == EINTR) 866 error = ERESTART; 867 } else { 868 getnanouptime(&ts); 869 timespecadd(&ts, timeout); 870 TIMESPEC_TO_TIMEVAL(&tv, timeout); 871 for (;;) { 872 error = _do_lock_umtx32(td, m, id, tvtohz(&tv)); 873 if (error != ETIMEDOUT) 874 break; 875 getnanouptime(&ts2); 876 if (timespeccmp(&ts2, &ts, >=)) { 877 error = ETIMEDOUT; 878 break; 879 } 880 ts3 = ts; 881 timespecsub(&ts3, &ts2); 882 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 883 } 884 /* Timed-locking is not restarted. */ 885 if (error == ERESTART) 886 error = EINTR; 887 } 888 return (error); 889 } 890 891 /* 892 * Unlock a umtx object. 893 */ 894 static int 895 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 896 { 897 struct umtx_key key; 898 uint32_t owner; 899 uint32_t old; 900 int error; 901 int count; 902 903 /* 904 * Make sure we own this mtx. 905 */ 906 owner = fuword32(m); 907 if (owner == -1) 908 return (EFAULT); 909 910 if ((owner & ~UMUTEX_CONTESTED) != id) 911 return (EPERM); 912 913 /* This should be done in userland */ 914 if ((owner & UMUTEX_CONTESTED) == 0) { 915 old = casuword32(m, owner, UMUTEX_UNOWNED); 916 if (old == -1) 917 return (EFAULT); 918 if (old == owner) 919 return (0); 920 owner = old; 921 } 922 923 /* We should only ever be in here for contested locks */ 924 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 925 &key)) != 0) 926 return (error); 927 928 umtxq_lock(&key); 929 umtxq_busy(&key); 930 count = umtxq_count(&key); 931 umtxq_unlock(&key); 932 933 /* 934 * When unlocking the umtx, it must be marked as unowned if 935 * there is zero or one thread only waiting for it. 936 * Otherwise, it must be marked as contested. 937 */ 938 old = casuword32(m, owner, 939 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 940 umtxq_lock(&key); 941 umtxq_signal(&key,1); 942 umtxq_unbusy(&key); 943 umtxq_unlock(&key); 944 umtx_key_release(&key); 945 if (old == -1) 946 return (EFAULT); 947 if (old != owner) 948 return (EINVAL); 949 return (0); 950 } 951 #endif 952 953 /* 954 * Fetch and compare value, sleep on the address if value is not changed. 955 */ 956 static int 957 do_wait(struct thread *td, void *addr, u_long id, 958 struct timespec *timeout, int compat32, int is_private) 959 { 960 struct umtx_q *uq; 961 struct timespec ts, ts2, ts3; 962 struct timeval tv; 963 u_long tmp; 964 int error = 0; 965 966 uq = td->td_umtxq; 967 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 968 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 969 return (error); 970 971 umtxq_lock(&uq->uq_key); 972 umtxq_insert(uq); 973 umtxq_unlock(&uq->uq_key); 974 if (compat32 == 0) 975 tmp = fuword(addr); 976 else 977 tmp = fuword32(addr); 978 if (tmp != id) { 979 umtxq_lock(&uq->uq_key); 980 umtxq_remove(uq); 981 umtxq_unlock(&uq->uq_key); 982 } else if (timeout == NULL) { 983 umtxq_lock(&uq->uq_key); 984 error = umtxq_sleep(uq, "uwait", 0); 985 umtxq_remove(uq); 986 umtxq_unlock(&uq->uq_key); 987 } else { 988 getnanouptime(&ts); 989 timespecadd(&ts, timeout); 990 TIMESPEC_TO_TIMEVAL(&tv, timeout); 991 umtxq_lock(&uq->uq_key); 992 for (;;) { 993 error = umtxq_sleep(uq, "uwait", tvtohz(&tv)); 994 if (!(uq->uq_flags & UQF_UMTXQ)) 995 break; 996 if (error != ETIMEDOUT) 997 break; 998 umtxq_unlock(&uq->uq_key); 999 getnanouptime(&ts2); 1000 if (timespeccmp(&ts2, &ts, >=)) { 1001 error = ETIMEDOUT; 1002 umtxq_lock(&uq->uq_key); 1003 break; 1004 } 1005 ts3 = ts; 1006 timespecsub(&ts3, &ts2); 1007 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 1008 umtxq_lock(&uq->uq_key); 1009 } 1010 umtxq_remove(uq); 1011 umtxq_unlock(&uq->uq_key); 1012 } 1013 umtx_key_release(&uq->uq_key); 1014 if (error == ERESTART) 1015 error = EINTR; 1016 return (error); 1017 } 1018 1019 /* 1020 * Wake up threads sleeping on the specified address. 1021 */ 1022 int 1023 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1024 { 1025 struct umtx_key key; 1026 int ret; 1027 1028 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1029 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1030 return (ret); 1031 umtxq_lock(&key); 1032 ret = umtxq_signal(&key, n_wake); 1033 umtxq_unlock(&key); 1034 umtx_key_release(&key); 1035 return (0); 1036 } 1037 1038 /* 1039 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1040 */ 1041 static int 1042 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1043 int mode) 1044 { 1045 struct umtx_q *uq; 1046 uint32_t owner, old, id; 1047 int error = 0; 1048 1049 id = td->td_tid; 1050 uq = td->td_umtxq; 1051 1052 /* 1053 * Care must be exercised when dealing with umtx structure. It 1054 * can fault on any access. 1055 */ 1056 for (;;) { 1057 owner = fuword32(__DEVOLATILE(void *, &m->m_owner)); 1058 if (mode == _UMUTEX_WAIT) { 1059 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED) 1060 return (0); 1061 } else { 1062 /* 1063 * Try the uncontested case. This should be done in userland. 1064 */ 1065 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1066 1067 /* The acquire succeeded. */ 1068 if (owner == UMUTEX_UNOWNED) 1069 return (0); 1070 1071 /* The address was invalid. */ 1072 if (owner == -1) 1073 return (EFAULT); 1074 1075 /* If no one owns it but it is contested try to acquire it. */ 1076 if (owner == UMUTEX_CONTESTED) { 1077 owner = casuword32(&m->m_owner, 1078 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1079 1080 if (owner == UMUTEX_CONTESTED) 1081 return (0); 1082 1083 /* The address was invalid. */ 1084 if (owner == -1) 1085 return (EFAULT); 1086 1087 /* If this failed the lock has changed, restart. */ 1088 continue; 1089 } 1090 } 1091 1092 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1093 (owner & ~UMUTEX_CONTESTED) == id) 1094 return (EDEADLK); 1095 1096 if (mode == _UMUTEX_TRY) 1097 return (EBUSY); 1098 1099 /* 1100 * If we caught a signal, we have retried and now 1101 * exit immediately. 1102 */ 1103 if (error != 0) 1104 return (error); 1105 1106 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1107 GET_SHARE(flags), &uq->uq_key)) != 0) 1108 return (error); 1109 1110 umtxq_lock(&uq->uq_key); 1111 umtxq_busy(&uq->uq_key); 1112 umtxq_insert(uq); 1113 umtxq_unlock(&uq->uq_key); 1114 1115 /* 1116 * Set the contested bit so that a release in user space 1117 * knows to use the system call for unlock. If this fails 1118 * either some one else has acquired the lock or it has been 1119 * released. 1120 */ 1121 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1122 1123 /* The address was invalid. */ 1124 if (old == -1) { 1125 umtxq_lock(&uq->uq_key); 1126 umtxq_remove(uq); 1127 umtxq_unbusy(&uq->uq_key); 1128 umtxq_unlock(&uq->uq_key); 1129 umtx_key_release(&uq->uq_key); 1130 return (EFAULT); 1131 } 1132 1133 /* 1134 * We set the contested bit, sleep. Otherwise the lock changed 1135 * and we need to retry or we lost a race to the thread 1136 * unlocking the umtx. 1137 */ 1138 umtxq_lock(&uq->uq_key); 1139 umtxq_unbusy(&uq->uq_key); 1140 if (old == owner) 1141 error = umtxq_sleep(uq, "umtxn", timo); 1142 umtxq_remove(uq); 1143 umtxq_unlock(&uq->uq_key); 1144 umtx_key_release(&uq->uq_key); 1145 } 1146 1147 return (0); 1148 } 1149 1150 /* 1151 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1152 */ 1153 /* 1154 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1155 */ 1156 static int 1157 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1158 { 1159 struct umtx_key key; 1160 uint32_t owner, old, id; 1161 int error; 1162 int count; 1163 1164 id = td->td_tid; 1165 /* 1166 * Make sure we own this mtx. 1167 */ 1168 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1169 if (owner == -1) 1170 return (EFAULT); 1171 1172 if ((owner & ~UMUTEX_CONTESTED) != id) 1173 return (EPERM); 1174 1175 if ((owner & UMUTEX_CONTESTED) == 0) { 1176 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1177 if (old == -1) 1178 return (EFAULT); 1179 if (old == owner) 1180 return (0); 1181 owner = old; 1182 } 1183 1184 /* We should only ever be in here for contested locks */ 1185 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1186 &key)) != 0) 1187 return (error); 1188 1189 umtxq_lock(&key); 1190 umtxq_busy(&key); 1191 count = umtxq_count(&key); 1192 umtxq_unlock(&key); 1193 1194 /* 1195 * When unlocking the umtx, it must be marked as unowned if 1196 * there is zero or one thread only waiting for it. 1197 * Otherwise, it must be marked as contested. 1198 */ 1199 old = casuword32(&m->m_owner, owner, 1200 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1201 umtxq_lock(&key); 1202 umtxq_signal(&key,1); 1203 umtxq_unbusy(&key); 1204 umtxq_unlock(&key); 1205 umtx_key_release(&key); 1206 if (old == -1) 1207 return (EFAULT); 1208 if (old != owner) 1209 return (EINVAL); 1210 return (0); 1211 } 1212 1213 /* 1214 * Check if the mutex is available and wake up a waiter, 1215 * only for simple mutex. 1216 */ 1217 static int 1218 do_wake_umutex(struct thread *td, struct umutex *m) 1219 { 1220 struct umtx_key key; 1221 uint32_t owner; 1222 uint32_t flags; 1223 int error; 1224 int count; 1225 1226 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1227 if (owner == -1) 1228 return (EFAULT); 1229 1230 if ((owner & ~UMUTEX_CONTESTED) != 0) 1231 return (0); 1232 1233 flags = fuword32(&m->m_flags); 1234 1235 /* We should only ever be in here for contested locks */ 1236 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1237 &key)) != 0) 1238 return (error); 1239 1240 umtxq_lock(&key); 1241 umtxq_busy(&key); 1242 count = umtxq_count(&key); 1243 umtxq_unlock(&key); 1244 1245 if (count <= 1) 1246 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED); 1247 1248 umtxq_lock(&key); 1249 if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1250 umtxq_signal(&key, 1); 1251 umtxq_unbusy(&key); 1252 umtxq_unlock(&key); 1253 umtx_key_release(&key); 1254 return (0); 1255 } 1256 1257 static inline struct umtx_pi * 1258 umtx_pi_alloc(int flags) 1259 { 1260 struct umtx_pi *pi; 1261 1262 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1263 TAILQ_INIT(&pi->pi_blocked); 1264 atomic_add_int(&umtx_pi_allocated, 1); 1265 return (pi); 1266 } 1267 1268 static inline void 1269 umtx_pi_free(struct umtx_pi *pi) 1270 { 1271 uma_zfree(umtx_pi_zone, pi); 1272 atomic_add_int(&umtx_pi_allocated, -1); 1273 } 1274 1275 /* 1276 * Adjust the thread's position on a pi_state after its priority has been 1277 * changed. 1278 */ 1279 static int 1280 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1281 { 1282 struct umtx_q *uq, *uq1, *uq2; 1283 struct thread *td1; 1284 1285 mtx_assert(&umtx_lock, MA_OWNED); 1286 if (pi == NULL) 1287 return (0); 1288 1289 uq = td->td_umtxq; 1290 1291 /* 1292 * Check if the thread needs to be moved on the blocked chain. 1293 * It needs to be moved if either its priority is lower than 1294 * the previous thread or higher than the next thread. 1295 */ 1296 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1297 uq2 = TAILQ_NEXT(uq, uq_lockq); 1298 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1299 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1300 /* 1301 * Remove thread from blocked chain and determine where 1302 * it should be moved to. 1303 */ 1304 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1305 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1306 td1 = uq1->uq_thread; 1307 MPASS(td1->td_proc->p_magic == P_MAGIC); 1308 if (UPRI(td1) > UPRI(td)) 1309 break; 1310 } 1311 1312 if (uq1 == NULL) 1313 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1314 else 1315 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1316 } 1317 return (1); 1318 } 1319 1320 /* 1321 * Propagate priority when a thread is blocked on POSIX 1322 * PI mutex. 1323 */ 1324 static void 1325 umtx_propagate_priority(struct thread *td) 1326 { 1327 struct umtx_q *uq; 1328 struct umtx_pi *pi; 1329 int pri; 1330 1331 mtx_assert(&umtx_lock, MA_OWNED); 1332 pri = UPRI(td); 1333 uq = td->td_umtxq; 1334 pi = uq->uq_pi_blocked; 1335 if (pi == NULL) 1336 return; 1337 1338 for (;;) { 1339 td = pi->pi_owner; 1340 if (td == NULL) 1341 return; 1342 1343 MPASS(td->td_proc != NULL); 1344 MPASS(td->td_proc->p_magic == P_MAGIC); 1345 1346 if (UPRI(td) <= pri) 1347 return; 1348 1349 thread_lock(td); 1350 sched_lend_user_prio(td, pri); 1351 thread_unlock(td); 1352 1353 /* 1354 * Pick up the lock that td is blocked on. 1355 */ 1356 uq = td->td_umtxq; 1357 pi = uq->uq_pi_blocked; 1358 /* Resort td on the list if needed. */ 1359 if (!umtx_pi_adjust_thread(pi, td)) 1360 break; 1361 } 1362 } 1363 1364 /* 1365 * Unpropagate priority for a PI mutex when a thread blocked on 1366 * it is interrupted by signal or resumed by others. 1367 */ 1368 static void 1369 umtx_unpropagate_priority(struct umtx_pi *pi) 1370 { 1371 struct umtx_q *uq, *uq_owner; 1372 struct umtx_pi *pi2; 1373 int pri, oldpri; 1374 1375 mtx_assert(&umtx_lock, MA_OWNED); 1376 1377 while (pi != NULL && pi->pi_owner != NULL) { 1378 pri = PRI_MAX; 1379 uq_owner = pi->pi_owner->td_umtxq; 1380 1381 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1382 uq = TAILQ_FIRST(&pi2->pi_blocked); 1383 if (uq != NULL) { 1384 if (pri > UPRI(uq->uq_thread)) 1385 pri = UPRI(uq->uq_thread); 1386 } 1387 } 1388 1389 if (pri > uq_owner->uq_inherited_pri) 1390 pri = uq_owner->uq_inherited_pri; 1391 thread_lock(pi->pi_owner); 1392 oldpri = pi->pi_owner->td_user_pri; 1393 sched_unlend_user_prio(pi->pi_owner, pri); 1394 thread_unlock(pi->pi_owner); 1395 umtx_pi_adjust_locked(pi->pi_owner, oldpri); 1396 pi = uq_owner->uq_pi_blocked; 1397 } 1398 } 1399 1400 /* 1401 * Insert a PI mutex into owned list. 1402 */ 1403 static void 1404 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1405 { 1406 struct umtx_q *uq_owner; 1407 1408 uq_owner = owner->td_umtxq; 1409 mtx_assert(&umtx_lock, MA_OWNED); 1410 if (pi->pi_owner != NULL) 1411 panic("pi_ower != NULL"); 1412 pi->pi_owner = owner; 1413 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1414 } 1415 1416 /* 1417 * Claim ownership of a PI mutex. 1418 */ 1419 static int 1420 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1421 { 1422 struct umtx_q *uq, *uq_owner; 1423 1424 uq_owner = owner->td_umtxq; 1425 mtx_lock_spin(&umtx_lock); 1426 if (pi->pi_owner == owner) { 1427 mtx_unlock_spin(&umtx_lock); 1428 return (0); 1429 } 1430 1431 if (pi->pi_owner != NULL) { 1432 /* 1433 * userland may have already messed the mutex, sigh. 1434 */ 1435 mtx_unlock_spin(&umtx_lock); 1436 return (EPERM); 1437 } 1438 umtx_pi_setowner(pi, owner); 1439 uq = TAILQ_FIRST(&pi->pi_blocked); 1440 if (uq != NULL) { 1441 int pri; 1442 1443 pri = UPRI(uq->uq_thread); 1444 thread_lock(owner); 1445 if (pri < UPRI(owner)) 1446 sched_lend_user_prio(owner, pri); 1447 thread_unlock(owner); 1448 } 1449 mtx_unlock_spin(&umtx_lock); 1450 return (0); 1451 } 1452 1453 static void 1454 umtx_pi_adjust_locked(struct thread *td, u_char oldpri) 1455 { 1456 struct umtx_q *uq; 1457 struct umtx_pi *pi; 1458 1459 uq = td->td_umtxq; 1460 /* 1461 * Pick up the lock that td is blocked on. 1462 */ 1463 pi = uq->uq_pi_blocked; 1464 MPASS(pi != NULL); 1465 1466 /* Resort the turnstile on the list. */ 1467 if (!umtx_pi_adjust_thread(pi, td)) 1468 return; 1469 1470 /* 1471 * If our priority was lowered and we are at the head of the 1472 * turnstile, then propagate our new priority up the chain. 1473 */ 1474 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri) 1475 umtx_propagate_priority(td); 1476 } 1477 1478 /* 1479 * Adjust a thread's order position in its blocked PI mutex, 1480 * this may result new priority propagating process. 1481 */ 1482 void 1483 umtx_pi_adjust(struct thread *td, u_char oldpri) 1484 { 1485 struct umtx_q *uq; 1486 struct umtx_pi *pi; 1487 1488 uq = td->td_umtxq; 1489 mtx_lock_spin(&umtx_lock); 1490 /* 1491 * Pick up the lock that td is blocked on. 1492 */ 1493 pi = uq->uq_pi_blocked; 1494 if (pi != NULL) 1495 umtx_pi_adjust_locked(td, oldpri); 1496 mtx_unlock_spin(&umtx_lock); 1497 } 1498 1499 /* 1500 * Sleep on a PI mutex. 1501 */ 1502 static int 1503 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1504 uint32_t owner, const char *wmesg, int timo) 1505 { 1506 struct umtxq_chain *uc; 1507 struct thread *td, *td1; 1508 struct umtx_q *uq1; 1509 int pri; 1510 int error = 0; 1511 1512 td = uq->uq_thread; 1513 KASSERT(td == curthread, ("inconsistent uq_thread")); 1514 uc = umtxq_getchain(&uq->uq_key); 1515 UMTXQ_LOCKED_ASSERT(uc); 1516 umtxq_insert(uq); 1517 if (pi->pi_owner == NULL) { 1518 /* XXX 1519 * Current, We only support process private PI-mutex, 1520 * non-contended PI-mutexes are locked in userland. 1521 * Process shared PI-mutex should always be initialized 1522 * by kernel and be registered in kernel, locking should 1523 * always be done by kernel to avoid security problems. 1524 * For process private PI-mutex, we can find owner 1525 * thread and boost its priority safely. 1526 */ 1527 PROC_LOCK(curproc); 1528 td1 = thread_find(curproc, owner); 1529 mtx_lock_spin(&umtx_lock); 1530 if (td1 != NULL && pi->pi_owner == NULL) { 1531 uq1 = td1->td_umtxq; 1532 umtx_pi_setowner(pi, td1); 1533 } 1534 PROC_UNLOCK(curproc); 1535 } else { 1536 mtx_lock_spin(&umtx_lock); 1537 } 1538 1539 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1540 pri = UPRI(uq1->uq_thread); 1541 if (pri > UPRI(td)) 1542 break; 1543 } 1544 1545 if (uq1 != NULL) 1546 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1547 else 1548 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1549 1550 uq->uq_pi_blocked = pi; 1551 thread_lock(td); 1552 td->td_flags |= TDF_UPIBLOCKED; 1553 thread_unlock(td); 1554 mtx_unlock_spin(&umtx_lock); 1555 umtxq_unlock(&uq->uq_key); 1556 1557 mtx_lock_spin(&umtx_lock); 1558 umtx_propagate_priority(td); 1559 mtx_unlock_spin(&umtx_lock); 1560 1561 umtxq_lock(&uq->uq_key); 1562 if (uq->uq_flags & UQF_UMTXQ) { 1563 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 1564 if (error == EWOULDBLOCK) 1565 error = ETIMEDOUT; 1566 if (uq->uq_flags & UQF_UMTXQ) { 1567 umtxq_busy(&uq->uq_key); 1568 umtxq_remove(uq); 1569 umtxq_unbusy(&uq->uq_key); 1570 } 1571 } 1572 umtxq_unlock(&uq->uq_key); 1573 1574 mtx_lock_spin(&umtx_lock); 1575 uq->uq_pi_blocked = NULL; 1576 thread_lock(td); 1577 td->td_flags &= ~TDF_UPIBLOCKED; 1578 thread_unlock(td); 1579 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1580 umtx_unpropagate_priority(pi); 1581 mtx_unlock_spin(&umtx_lock); 1582 1583 umtxq_lock(&uq->uq_key); 1584 1585 return (error); 1586 } 1587 1588 /* 1589 * Add reference count for a PI mutex. 1590 */ 1591 static void 1592 umtx_pi_ref(struct umtx_pi *pi) 1593 { 1594 struct umtxq_chain *uc; 1595 1596 uc = umtxq_getchain(&pi->pi_key); 1597 UMTXQ_LOCKED_ASSERT(uc); 1598 pi->pi_refcount++; 1599 } 1600 1601 /* 1602 * Decrease reference count for a PI mutex, if the counter 1603 * is decreased to zero, its memory space is freed. 1604 */ 1605 static void 1606 umtx_pi_unref(struct umtx_pi *pi) 1607 { 1608 struct umtxq_chain *uc; 1609 int free = 0; 1610 1611 uc = umtxq_getchain(&pi->pi_key); 1612 UMTXQ_LOCKED_ASSERT(uc); 1613 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1614 if (--pi->pi_refcount == 0) { 1615 mtx_lock_spin(&umtx_lock); 1616 if (pi->pi_owner != NULL) { 1617 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1618 pi, pi_link); 1619 pi->pi_owner = NULL; 1620 } 1621 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1622 ("blocked queue not empty")); 1623 mtx_unlock_spin(&umtx_lock); 1624 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1625 free = 1; 1626 } 1627 if (free) 1628 umtx_pi_free(pi); 1629 } 1630 1631 /* 1632 * Find a PI mutex in hash table. 1633 */ 1634 static struct umtx_pi * 1635 umtx_pi_lookup(struct umtx_key *key) 1636 { 1637 struct umtxq_chain *uc; 1638 struct umtx_pi *pi; 1639 1640 uc = umtxq_getchain(key); 1641 UMTXQ_LOCKED_ASSERT(uc); 1642 1643 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1644 if (umtx_key_match(&pi->pi_key, key)) { 1645 return (pi); 1646 } 1647 } 1648 return (NULL); 1649 } 1650 1651 /* 1652 * Insert a PI mutex into hash table. 1653 */ 1654 static inline void 1655 umtx_pi_insert(struct umtx_pi *pi) 1656 { 1657 struct umtxq_chain *uc; 1658 1659 uc = umtxq_getchain(&pi->pi_key); 1660 UMTXQ_LOCKED_ASSERT(uc); 1661 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1662 } 1663 1664 /* 1665 * Lock a PI mutex. 1666 */ 1667 static int 1668 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1669 int try) 1670 { 1671 struct umtx_q *uq; 1672 struct umtx_pi *pi, *new_pi; 1673 uint32_t id, owner, old; 1674 int error; 1675 1676 id = td->td_tid; 1677 uq = td->td_umtxq; 1678 1679 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1680 &uq->uq_key)) != 0) 1681 return (error); 1682 umtxq_lock(&uq->uq_key); 1683 pi = umtx_pi_lookup(&uq->uq_key); 1684 if (pi == NULL) { 1685 new_pi = umtx_pi_alloc(M_NOWAIT); 1686 if (new_pi == NULL) { 1687 umtxq_unlock(&uq->uq_key); 1688 new_pi = umtx_pi_alloc(M_WAITOK); 1689 new_pi->pi_key = uq->uq_key; 1690 umtxq_lock(&uq->uq_key); 1691 pi = umtx_pi_lookup(&uq->uq_key); 1692 if (pi != NULL) { 1693 umtx_pi_free(new_pi); 1694 new_pi = NULL; 1695 } 1696 } 1697 if (new_pi != NULL) { 1698 new_pi->pi_key = uq->uq_key; 1699 umtx_pi_insert(new_pi); 1700 pi = new_pi; 1701 } 1702 } 1703 umtx_pi_ref(pi); 1704 umtxq_unlock(&uq->uq_key); 1705 1706 /* 1707 * Care must be exercised when dealing with umtx structure. It 1708 * can fault on any access. 1709 */ 1710 for (;;) { 1711 /* 1712 * Try the uncontested case. This should be done in userland. 1713 */ 1714 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1715 1716 /* The acquire succeeded. */ 1717 if (owner == UMUTEX_UNOWNED) { 1718 error = 0; 1719 break; 1720 } 1721 1722 /* The address was invalid. */ 1723 if (owner == -1) { 1724 error = EFAULT; 1725 break; 1726 } 1727 1728 /* If no one owns it but it is contested try to acquire it. */ 1729 if (owner == UMUTEX_CONTESTED) { 1730 owner = casuword32(&m->m_owner, 1731 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1732 1733 if (owner == UMUTEX_CONTESTED) { 1734 umtxq_lock(&uq->uq_key); 1735 error = umtx_pi_claim(pi, td); 1736 umtxq_unlock(&uq->uq_key); 1737 break; 1738 } 1739 1740 /* The address was invalid. */ 1741 if (owner == -1) { 1742 error = EFAULT; 1743 break; 1744 } 1745 1746 /* If this failed the lock has changed, restart. */ 1747 continue; 1748 } 1749 1750 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1751 (owner & ~UMUTEX_CONTESTED) == id) { 1752 error = EDEADLK; 1753 break; 1754 } 1755 1756 if (try != 0) { 1757 error = EBUSY; 1758 break; 1759 } 1760 1761 /* 1762 * If we caught a signal, we have retried and now 1763 * exit immediately. 1764 */ 1765 if (error != 0) 1766 break; 1767 1768 umtxq_lock(&uq->uq_key); 1769 umtxq_busy(&uq->uq_key); 1770 umtxq_unlock(&uq->uq_key); 1771 1772 /* 1773 * Set the contested bit so that a release in user space 1774 * knows to use the system call for unlock. If this fails 1775 * either some one else has acquired the lock or it has been 1776 * released. 1777 */ 1778 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1779 1780 /* The address was invalid. */ 1781 if (old == -1) { 1782 umtxq_lock(&uq->uq_key); 1783 umtxq_unbusy(&uq->uq_key); 1784 umtxq_unlock(&uq->uq_key); 1785 error = EFAULT; 1786 break; 1787 } 1788 1789 umtxq_lock(&uq->uq_key); 1790 umtxq_unbusy(&uq->uq_key); 1791 /* 1792 * We set the contested bit, sleep. Otherwise the lock changed 1793 * and we need to retry or we lost a race to the thread 1794 * unlocking the umtx. 1795 */ 1796 if (old == owner) 1797 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1798 "umtxpi", timo); 1799 umtxq_unlock(&uq->uq_key); 1800 } 1801 1802 umtxq_lock(&uq->uq_key); 1803 umtx_pi_unref(pi); 1804 umtxq_unlock(&uq->uq_key); 1805 1806 umtx_key_release(&uq->uq_key); 1807 return (error); 1808 } 1809 1810 /* 1811 * Unlock a PI mutex. 1812 */ 1813 static int 1814 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 1815 { 1816 struct umtx_key key; 1817 struct umtx_q *uq_first, *uq_first2, *uq_me; 1818 struct umtx_pi *pi, *pi2; 1819 uint32_t owner, old, id; 1820 int error; 1821 int count; 1822 int pri; 1823 1824 id = td->td_tid; 1825 /* 1826 * Make sure we own this mtx. 1827 */ 1828 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1829 if (owner == -1) 1830 return (EFAULT); 1831 1832 if ((owner & ~UMUTEX_CONTESTED) != id) 1833 return (EPERM); 1834 1835 /* This should be done in userland */ 1836 if ((owner & UMUTEX_CONTESTED) == 0) { 1837 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1838 if (old == -1) 1839 return (EFAULT); 1840 if (old == owner) 1841 return (0); 1842 owner = old; 1843 } 1844 1845 /* We should only ever be in here for contested locks */ 1846 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1847 &key)) != 0) 1848 return (error); 1849 1850 umtxq_lock(&key); 1851 umtxq_busy(&key); 1852 count = umtxq_count_pi(&key, &uq_first); 1853 if (uq_first != NULL) { 1854 pi = uq_first->uq_pi_blocked; 1855 if (pi->pi_owner != curthread) { 1856 umtxq_unbusy(&key); 1857 umtxq_unlock(&key); 1858 /* userland messed the mutex */ 1859 return (EPERM); 1860 } 1861 uq_me = curthread->td_umtxq; 1862 mtx_lock_spin(&umtx_lock); 1863 pi->pi_owner = NULL; 1864 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 1865 uq_first = TAILQ_FIRST(&pi->pi_blocked); 1866 pri = PRI_MAX; 1867 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 1868 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 1869 if (uq_first2 != NULL) { 1870 if (pri > UPRI(uq_first2->uq_thread)) 1871 pri = UPRI(uq_first2->uq_thread); 1872 } 1873 } 1874 thread_lock(curthread); 1875 sched_unlend_user_prio(curthread, pri); 1876 thread_unlock(curthread); 1877 mtx_unlock_spin(&umtx_lock); 1878 } 1879 umtxq_unlock(&key); 1880 1881 /* 1882 * When unlocking the umtx, it must be marked as unowned if 1883 * there is zero or one thread only waiting for it. 1884 * Otherwise, it must be marked as contested. 1885 */ 1886 old = casuword32(&m->m_owner, owner, 1887 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1888 1889 umtxq_lock(&key); 1890 if (uq_first != NULL) 1891 umtxq_signal_thread(uq_first); 1892 umtxq_unbusy(&key); 1893 umtxq_unlock(&key); 1894 umtx_key_release(&key); 1895 if (old == -1) 1896 return (EFAULT); 1897 if (old != owner) 1898 return (EINVAL); 1899 return (0); 1900 } 1901 1902 /* 1903 * Lock a PP mutex. 1904 */ 1905 static int 1906 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1907 int try) 1908 { 1909 struct umtx_q *uq, *uq2; 1910 struct umtx_pi *pi; 1911 uint32_t ceiling; 1912 uint32_t owner, id; 1913 int error, pri, old_inherited_pri, su; 1914 1915 id = td->td_tid; 1916 uq = td->td_umtxq; 1917 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1918 &uq->uq_key)) != 0) 1919 return (error); 1920 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1921 for (;;) { 1922 old_inherited_pri = uq->uq_inherited_pri; 1923 umtxq_lock(&uq->uq_key); 1924 umtxq_busy(&uq->uq_key); 1925 umtxq_unlock(&uq->uq_key); 1926 1927 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]); 1928 if (ceiling > RTP_PRIO_MAX) { 1929 error = EINVAL; 1930 goto out; 1931 } 1932 1933 mtx_lock_spin(&umtx_lock); 1934 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 1935 mtx_unlock_spin(&umtx_lock); 1936 error = EINVAL; 1937 goto out; 1938 } 1939 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 1940 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 1941 thread_lock(td); 1942 if (uq->uq_inherited_pri < UPRI(td)) 1943 sched_lend_user_prio(td, uq->uq_inherited_pri); 1944 thread_unlock(td); 1945 } 1946 mtx_unlock_spin(&umtx_lock); 1947 1948 owner = casuword32(&m->m_owner, 1949 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1950 1951 if (owner == UMUTEX_CONTESTED) { 1952 error = 0; 1953 break; 1954 } 1955 1956 /* The address was invalid. */ 1957 if (owner == -1) { 1958 error = EFAULT; 1959 break; 1960 } 1961 1962 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1963 (owner & ~UMUTEX_CONTESTED) == id) { 1964 error = EDEADLK; 1965 break; 1966 } 1967 1968 if (try != 0) { 1969 error = EBUSY; 1970 break; 1971 } 1972 1973 /* 1974 * If we caught a signal, we have retried and now 1975 * exit immediately. 1976 */ 1977 if (error != 0) 1978 break; 1979 1980 umtxq_lock(&uq->uq_key); 1981 umtxq_insert(uq); 1982 umtxq_unbusy(&uq->uq_key); 1983 error = umtxq_sleep(uq, "umtxpp", timo); 1984 umtxq_remove(uq); 1985 umtxq_unlock(&uq->uq_key); 1986 1987 mtx_lock_spin(&umtx_lock); 1988 uq->uq_inherited_pri = old_inherited_pri; 1989 pri = PRI_MAX; 1990 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1991 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1992 if (uq2 != NULL) { 1993 if (pri > UPRI(uq2->uq_thread)) 1994 pri = UPRI(uq2->uq_thread); 1995 } 1996 } 1997 if (pri > uq->uq_inherited_pri) 1998 pri = uq->uq_inherited_pri; 1999 thread_lock(td); 2000 sched_unlend_user_prio(td, pri); 2001 thread_unlock(td); 2002 mtx_unlock_spin(&umtx_lock); 2003 } 2004 2005 if (error != 0) { 2006 mtx_lock_spin(&umtx_lock); 2007 uq->uq_inherited_pri = old_inherited_pri; 2008 pri = PRI_MAX; 2009 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2010 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2011 if (uq2 != NULL) { 2012 if (pri > UPRI(uq2->uq_thread)) 2013 pri = UPRI(uq2->uq_thread); 2014 } 2015 } 2016 if (pri > uq->uq_inherited_pri) 2017 pri = uq->uq_inherited_pri; 2018 thread_lock(td); 2019 sched_unlend_user_prio(td, pri); 2020 thread_unlock(td); 2021 mtx_unlock_spin(&umtx_lock); 2022 } 2023 2024 out: 2025 umtxq_lock(&uq->uq_key); 2026 umtxq_unbusy(&uq->uq_key); 2027 umtxq_unlock(&uq->uq_key); 2028 umtx_key_release(&uq->uq_key); 2029 return (error); 2030 } 2031 2032 /* 2033 * Unlock a PP mutex. 2034 */ 2035 static int 2036 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 2037 { 2038 struct umtx_key key; 2039 struct umtx_q *uq, *uq2; 2040 struct umtx_pi *pi; 2041 uint32_t owner, id; 2042 uint32_t rceiling; 2043 int error, pri, new_inherited_pri, su; 2044 2045 id = td->td_tid; 2046 uq = td->td_umtxq; 2047 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2048 2049 /* 2050 * Make sure we own this mtx. 2051 */ 2052 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 2053 if (owner == -1) 2054 return (EFAULT); 2055 2056 if ((owner & ~UMUTEX_CONTESTED) != id) 2057 return (EPERM); 2058 2059 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2060 if (error != 0) 2061 return (error); 2062 2063 if (rceiling == -1) 2064 new_inherited_pri = PRI_MAX; 2065 else { 2066 rceiling = RTP_PRIO_MAX - rceiling; 2067 if (rceiling > RTP_PRIO_MAX) 2068 return (EINVAL); 2069 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2070 } 2071 2072 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2073 &key)) != 0) 2074 return (error); 2075 umtxq_lock(&key); 2076 umtxq_busy(&key); 2077 umtxq_unlock(&key); 2078 /* 2079 * For priority protected mutex, always set unlocked state 2080 * to UMUTEX_CONTESTED, so that userland always enters kernel 2081 * to lock the mutex, it is necessary because thread priority 2082 * has to be adjusted for such mutex. 2083 */ 2084 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2085 UMUTEX_CONTESTED); 2086 2087 umtxq_lock(&key); 2088 if (error == 0) 2089 umtxq_signal(&key, 1); 2090 umtxq_unbusy(&key); 2091 umtxq_unlock(&key); 2092 2093 if (error == -1) 2094 error = EFAULT; 2095 else { 2096 mtx_lock_spin(&umtx_lock); 2097 if (su != 0) 2098 uq->uq_inherited_pri = new_inherited_pri; 2099 pri = PRI_MAX; 2100 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2101 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2102 if (uq2 != NULL) { 2103 if (pri > UPRI(uq2->uq_thread)) 2104 pri = UPRI(uq2->uq_thread); 2105 } 2106 } 2107 if (pri > uq->uq_inherited_pri) 2108 pri = uq->uq_inherited_pri; 2109 thread_lock(td); 2110 sched_unlend_user_prio(td, pri); 2111 thread_unlock(td); 2112 mtx_unlock_spin(&umtx_lock); 2113 } 2114 umtx_key_release(&key); 2115 return (error); 2116 } 2117 2118 static int 2119 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2120 uint32_t *old_ceiling) 2121 { 2122 struct umtx_q *uq; 2123 uint32_t save_ceiling; 2124 uint32_t owner, id; 2125 uint32_t flags; 2126 int error; 2127 2128 flags = fuword32(&m->m_flags); 2129 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2130 return (EINVAL); 2131 if (ceiling > RTP_PRIO_MAX) 2132 return (EINVAL); 2133 id = td->td_tid; 2134 uq = td->td_umtxq; 2135 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2136 &uq->uq_key)) != 0) 2137 return (error); 2138 for (;;) { 2139 umtxq_lock(&uq->uq_key); 2140 umtxq_busy(&uq->uq_key); 2141 umtxq_unlock(&uq->uq_key); 2142 2143 save_ceiling = fuword32(&m->m_ceilings[0]); 2144 2145 owner = casuword32(&m->m_owner, 2146 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 2147 2148 if (owner == UMUTEX_CONTESTED) { 2149 suword32(&m->m_ceilings[0], ceiling); 2150 suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2151 UMUTEX_CONTESTED); 2152 error = 0; 2153 break; 2154 } 2155 2156 /* The address was invalid. */ 2157 if (owner == -1) { 2158 error = EFAULT; 2159 break; 2160 } 2161 2162 if ((owner & ~UMUTEX_CONTESTED) == id) { 2163 suword32(&m->m_ceilings[0], ceiling); 2164 error = 0; 2165 break; 2166 } 2167 2168 /* 2169 * If we caught a signal, we have retried and now 2170 * exit immediately. 2171 */ 2172 if (error != 0) 2173 break; 2174 2175 /* 2176 * We set the contested bit, sleep. Otherwise the lock changed 2177 * and we need to retry or we lost a race to the thread 2178 * unlocking the umtx. 2179 */ 2180 umtxq_lock(&uq->uq_key); 2181 umtxq_insert(uq); 2182 umtxq_unbusy(&uq->uq_key); 2183 error = umtxq_sleep(uq, "umtxpp", 0); 2184 umtxq_remove(uq); 2185 umtxq_unlock(&uq->uq_key); 2186 } 2187 umtxq_lock(&uq->uq_key); 2188 if (error == 0) 2189 umtxq_signal(&uq->uq_key, INT_MAX); 2190 umtxq_unbusy(&uq->uq_key); 2191 umtxq_unlock(&uq->uq_key); 2192 umtx_key_release(&uq->uq_key); 2193 if (error == 0 && old_ceiling != NULL) 2194 suword32(old_ceiling, save_ceiling); 2195 return (error); 2196 } 2197 2198 static int 2199 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo, 2200 int mode) 2201 { 2202 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2203 case 0: 2204 return (_do_lock_normal(td, m, flags, timo, mode)); 2205 case UMUTEX_PRIO_INHERIT: 2206 return (_do_lock_pi(td, m, flags, timo, mode)); 2207 case UMUTEX_PRIO_PROTECT: 2208 return (_do_lock_pp(td, m, flags, timo, mode)); 2209 } 2210 return (EINVAL); 2211 } 2212 2213 /* 2214 * Lock a userland POSIX mutex. 2215 */ 2216 static int 2217 do_lock_umutex(struct thread *td, struct umutex *m, 2218 struct timespec *timeout, int mode) 2219 { 2220 struct timespec ts, ts2, ts3; 2221 struct timeval tv; 2222 uint32_t flags; 2223 int error; 2224 2225 flags = fuword32(&m->m_flags); 2226 if (flags == -1) 2227 return (EFAULT); 2228 2229 if (timeout == NULL) { 2230 error = _do_lock_umutex(td, m, flags, 0, mode); 2231 /* Mutex locking is restarted if it is interrupted. */ 2232 if (error == EINTR && mode != _UMUTEX_WAIT) 2233 error = ERESTART; 2234 } else { 2235 getnanouptime(&ts); 2236 timespecadd(&ts, timeout); 2237 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2238 for (;;) { 2239 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode); 2240 if (error != ETIMEDOUT) 2241 break; 2242 getnanouptime(&ts2); 2243 if (timespeccmp(&ts2, &ts, >=)) { 2244 error = ETIMEDOUT; 2245 break; 2246 } 2247 ts3 = ts; 2248 timespecsub(&ts3, &ts2); 2249 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 2250 } 2251 /* Timed-locking is not restarted. */ 2252 if (error == ERESTART) 2253 error = EINTR; 2254 } 2255 return (error); 2256 } 2257 2258 /* 2259 * Unlock a userland POSIX mutex. 2260 */ 2261 static int 2262 do_unlock_umutex(struct thread *td, struct umutex *m) 2263 { 2264 uint32_t flags; 2265 2266 flags = fuword32(&m->m_flags); 2267 if (flags == -1) 2268 return (EFAULT); 2269 2270 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2271 case 0: 2272 return (do_unlock_normal(td, m, flags)); 2273 case UMUTEX_PRIO_INHERIT: 2274 return (do_unlock_pi(td, m, flags)); 2275 case UMUTEX_PRIO_PROTECT: 2276 return (do_unlock_pp(td, m, flags)); 2277 } 2278 2279 return (EINVAL); 2280 } 2281 2282 static int 2283 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2284 struct timespec *timeout, u_long wflags) 2285 { 2286 struct umtx_q *uq; 2287 struct timeval tv; 2288 struct timespec cts, ets, tts; 2289 uint32_t flags; 2290 int error; 2291 2292 uq = td->td_umtxq; 2293 flags = fuword32(&cv->c_flags); 2294 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2295 if (error != 0) 2296 return (error); 2297 umtxq_lock(&uq->uq_key); 2298 umtxq_busy(&uq->uq_key); 2299 umtxq_insert(uq); 2300 umtxq_unlock(&uq->uq_key); 2301 2302 /* 2303 * The magic thing is we should set c_has_waiters to 1 before 2304 * releasing user mutex. 2305 */ 2306 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1); 2307 2308 umtxq_lock(&uq->uq_key); 2309 umtxq_unbusy(&uq->uq_key); 2310 umtxq_unlock(&uq->uq_key); 2311 2312 error = do_unlock_umutex(td, m); 2313 2314 umtxq_lock(&uq->uq_key); 2315 if (error == 0) { 2316 if ((wflags & UMTX_CHECK_UNPARKING) && 2317 (td->td_pflags & TDP_WAKEUP)) { 2318 td->td_pflags &= ~TDP_WAKEUP; 2319 error = EINTR; 2320 } else if (timeout == NULL) { 2321 error = umtxq_sleep(uq, "ucond", 0); 2322 } else { 2323 getnanouptime(&ets); 2324 timespecadd(&ets, timeout); 2325 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2326 for (;;) { 2327 error = umtxq_sleep(uq, "ucond", tvtohz(&tv)); 2328 if (error != ETIMEDOUT) 2329 break; 2330 getnanouptime(&cts); 2331 if (timespeccmp(&cts, &ets, >=)) { 2332 error = ETIMEDOUT; 2333 break; 2334 } 2335 tts = ets; 2336 timespecsub(&tts, &cts); 2337 TIMESPEC_TO_TIMEVAL(&tv, &tts); 2338 } 2339 } 2340 } 2341 2342 if (error != 0) { 2343 if ((uq->uq_flags & UQF_UMTXQ) == 0) { 2344 /* 2345 * If we concurrently got do_cv_signal()d 2346 * and we got an error or UNIX signals or a timeout, 2347 * then, perform another umtxq_signal to avoid 2348 * consuming the wakeup. This may cause supurious 2349 * wakeup for another thread which was just queued, 2350 * but SUSV3 explicitly allows supurious wakeup to 2351 * occur, and indeed a kernel based implementation 2352 * can not avoid it. 2353 */ 2354 if (!umtxq_signal(&uq->uq_key, 1)) 2355 error = 0; 2356 } 2357 if (error == ERESTART) 2358 error = EINTR; 2359 } 2360 umtxq_remove(uq); 2361 umtxq_unlock(&uq->uq_key); 2362 umtx_key_release(&uq->uq_key); 2363 return (error); 2364 } 2365 2366 /* 2367 * Signal a userland condition variable. 2368 */ 2369 static int 2370 do_cv_signal(struct thread *td, struct ucond *cv) 2371 { 2372 struct umtx_key key; 2373 int error, cnt, nwake; 2374 uint32_t flags; 2375 2376 flags = fuword32(&cv->c_flags); 2377 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2378 return (error); 2379 umtxq_lock(&key); 2380 umtxq_busy(&key); 2381 cnt = umtxq_count(&key); 2382 nwake = umtxq_signal(&key, 1); 2383 if (cnt <= nwake) { 2384 umtxq_unlock(&key); 2385 error = suword32( 2386 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2387 umtxq_lock(&key); 2388 } 2389 umtxq_unbusy(&key); 2390 umtxq_unlock(&key); 2391 umtx_key_release(&key); 2392 return (error); 2393 } 2394 2395 static int 2396 do_cv_broadcast(struct thread *td, struct ucond *cv) 2397 { 2398 struct umtx_key key; 2399 int error; 2400 uint32_t flags; 2401 2402 flags = fuword32(&cv->c_flags); 2403 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2404 return (error); 2405 2406 umtxq_lock(&key); 2407 umtxq_busy(&key); 2408 umtxq_signal(&key, INT_MAX); 2409 umtxq_unlock(&key); 2410 2411 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2412 2413 umtxq_lock(&key); 2414 umtxq_unbusy(&key); 2415 umtxq_unlock(&key); 2416 2417 umtx_key_release(&key); 2418 return (error); 2419 } 2420 2421 static int 2422 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo) 2423 { 2424 struct umtx_q *uq; 2425 uint32_t flags, wrflags; 2426 int32_t state, oldstate; 2427 int32_t blocked_readers; 2428 int error; 2429 2430 uq = td->td_umtxq; 2431 flags = fuword32(&rwlock->rw_flags); 2432 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2433 if (error != 0) 2434 return (error); 2435 2436 wrflags = URWLOCK_WRITE_OWNER; 2437 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2438 wrflags |= URWLOCK_WRITE_WAITERS; 2439 2440 for (;;) { 2441 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2442 /* try to lock it */ 2443 while (!(state & wrflags)) { 2444 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2445 umtx_key_release(&uq->uq_key); 2446 return (EAGAIN); 2447 } 2448 oldstate = casuword32(&rwlock->rw_state, state, state + 1); 2449 if (oldstate == state) { 2450 umtx_key_release(&uq->uq_key); 2451 return (0); 2452 } 2453 state = oldstate; 2454 } 2455 2456 if (error) 2457 break; 2458 2459 /* grab monitor lock */ 2460 umtxq_lock(&uq->uq_key); 2461 umtxq_busy(&uq->uq_key); 2462 umtxq_unlock(&uq->uq_key); 2463 2464 /* set read contention bit */ 2465 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) { 2466 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS); 2467 if (oldstate == state) 2468 goto sleep; 2469 state = oldstate; 2470 } 2471 2472 /* state is changed while setting flags, restart */ 2473 if (!(state & wrflags)) { 2474 umtxq_lock(&uq->uq_key); 2475 umtxq_unbusy(&uq->uq_key); 2476 umtxq_unlock(&uq->uq_key); 2477 continue; 2478 } 2479 2480 sleep: 2481 /* contention bit is set, before sleeping, increase read waiter count */ 2482 blocked_readers = fuword32(&rwlock->rw_blocked_readers); 2483 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2484 2485 while (state & wrflags) { 2486 umtxq_lock(&uq->uq_key); 2487 umtxq_insert(uq); 2488 umtxq_unbusy(&uq->uq_key); 2489 2490 error = umtxq_sleep(uq, "urdlck", timo); 2491 2492 umtxq_busy(&uq->uq_key); 2493 umtxq_remove(uq); 2494 umtxq_unlock(&uq->uq_key); 2495 if (error) 2496 break; 2497 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2498 } 2499 2500 /* decrease read waiter count, and may clear read contention bit */ 2501 blocked_readers = fuword32(&rwlock->rw_blocked_readers); 2502 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2503 if (blocked_readers == 1) { 2504 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2505 for (;;) { 2506 oldstate = casuword32(&rwlock->rw_state, state, 2507 state & ~URWLOCK_READ_WAITERS); 2508 if (oldstate == state) 2509 break; 2510 state = oldstate; 2511 } 2512 } 2513 2514 umtxq_lock(&uq->uq_key); 2515 umtxq_unbusy(&uq->uq_key); 2516 umtxq_unlock(&uq->uq_key); 2517 } 2518 umtx_key_release(&uq->uq_key); 2519 return (error); 2520 } 2521 2522 static int 2523 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout) 2524 { 2525 struct timespec ts, ts2, ts3; 2526 struct timeval tv; 2527 int error; 2528 2529 getnanouptime(&ts); 2530 timespecadd(&ts, timeout); 2531 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2532 for (;;) { 2533 error = do_rw_rdlock(td, obj, val, tvtohz(&tv)); 2534 if (error != ETIMEDOUT) 2535 break; 2536 getnanouptime(&ts2); 2537 if (timespeccmp(&ts2, &ts, >=)) { 2538 error = ETIMEDOUT; 2539 break; 2540 } 2541 ts3 = ts; 2542 timespecsub(&ts3, &ts2); 2543 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 2544 } 2545 if (error == ERESTART) 2546 error = EINTR; 2547 return (error); 2548 } 2549 2550 static int 2551 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo) 2552 { 2553 struct umtx_q *uq; 2554 uint32_t flags; 2555 int32_t state, oldstate; 2556 int32_t blocked_writers; 2557 int error; 2558 2559 uq = td->td_umtxq; 2560 flags = fuword32(&rwlock->rw_flags); 2561 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2562 if (error != 0) 2563 return (error); 2564 2565 for (;;) { 2566 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2567 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2568 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER); 2569 if (oldstate == state) { 2570 umtx_key_release(&uq->uq_key); 2571 return (0); 2572 } 2573 state = oldstate; 2574 } 2575 2576 if (error) 2577 break; 2578 2579 /* grab monitor lock */ 2580 umtxq_lock(&uq->uq_key); 2581 umtxq_busy(&uq->uq_key); 2582 umtxq_unlock(&uq->uq_key); 2583 2584 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) && 2585 (state & URWLOCK_WRITE_WAITERS) == 0) { 2586 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS); 2587 if (oldstate == state) 2588 goto sleep; 2589 state = oldstate; 2590 } 2591 2592 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2593 umtxq_lock(&uq->uq_key); 2594 umtxq_unbusy(&uq->uq_key); 2595 umtxq_unlock(&uq->uq_key); 2596 continue; 2597 } 2598 sleep: 2599 blocked_writers = fuword32(&rwlock->rw_blocked_writers); 2600 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2601 2602 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2603 umtxq_lock(&uq->uq_key); 2604 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2605 umtxq_unbusy(&uq->uq_key); 2606 2607 error = umtxq_sleep(uq, "uwrlck", timo); 2608 2609 umtxq_busy(&uq->uq_key); 2610 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2611 umtxq_unlock(&uq->uq_key); 2612 if (error) 2613 break; 2614 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2615 } 2616 2617 blocked_writers = fuword32(&rwlock->rw_blocked_writers); 2618 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2619 if (blocked_writers == 1) { 2620 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2621 for (;;) { 2622 oldstate = casuword32(&rwlock->rw_state, state, 2623 state & ~URWLOCK_WRITE_WAITERS); 2624 if (oldstate == state) 2625 break; 2626 state = oldstate; 2627 } 2628 } 2629 2630 umtxq_lock(&uq->uq_key); 2631 umtxq_unbusy(&uq->uq_key); 2632 umtxq_unlock(&uq->uq_key); 2633 } 2634 2635 umtx_key_release(&uq->uq_key); 2636 return (error); 2637 } 2638 2639 static int 2640 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout) 2641 { 2642 struct timespec ts, ts2, ts3; 2643 struct timeval tv; 2644 int error; 2645 2646 getnanouptime(&ts); 2647 timespecadd(&ts, timeout); 2648 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2649 for (;;) { 2650 error = do_rw_wrlock(td, obj, tvtohz(&tv)); 2651 if (error != ETIMEDOUT) 2652 break; 2653 getnanouptime(&ts2); 2654 if (timespeccmp(&ts2, &ts, >=)) { 2655 error = ETIMEDOUT; 2656 break; 2657 } 2658 ts3 = ts; 2659 timespecsub(&ts3, &ts2); 2660 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 2661 } 2662 if (error == ERESTART) 2663 error = EINTR; 2664 return (error); 2665 } 2666 2667 static int 2668 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2669 { 2670 struct umtx_q *uq; 2671 uint32_t flags; 2672 int32_t state, oldstate; 2673 int error, q, count; 2674 2675 uq = td->td_umtxq; 2676 flags = fuword32(&rwlock->rw_flags); 2677 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2678 if (error != 0) 2679 return (error); 2680 2681 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2682 if (state & URWLOCK_WRITE_OWNER) { 2683 for (;;) { 2684 oldstate = casuword32(&rwlock->rw_state, state, 2685 state & ~URWLOCK_WRITE_OWNER); 2686 if (oldstate != state) { 2687 state = oldstate; 2688 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 2689 error = EPERM; 2690 goto out; 2691 } 2692 } else 2693 break; 2694 } 2695 } else if (URWLOCK_READER_COUNT(state) != 0) { 2696 for (;;) { 2697 oldstate = casuword32(&rwlock->rw_state, state, 2698 state - 1); 2699 if (oldstate != state) { 2700 state = oldstate; 2701 if (URWLOCK_READER_COUNT(oldstate) == 0) { 2702 error = EPERM; 2703 goto out; 2704 } 2705 } 2706 else 2707 break; 2708 } 2709 } else { 2710 error = EPERM; 2711 goto out; 2712 } 2713 2714 count = 0; 2715 2716 if (!(flags & URWLOCK_PREFER_READER)) { 2717 if (state & URWLOCK_WRITE_WAITERS) { 2718 count = 1; 2719 q = UMTX_EXCLUSIVE_QUEUE; 2720 } else if (state & URWLOCK_READ_WAITERS) { 2721 count = INT_MAX; 2722 q = UMTX_SHARED_QUEUE; 2723 } 2724 } else { 2725 if (state & URWLOCK_READ_WAITERS) { 2726 count = INT_MAX; 2727 q = UMTX_SHARED_QUEUE; 2728 } else if (state & URWLOCK_WRITE_WAITERS) { 2729 count = 1; 2730 q = UMTX_EXCLUSIVE_QUEUE; 2731 } 2732 } 2733 2734 if (count) { 2735 umtxq_lock(&uq->uq_key); 2736 umtxq_busy(&uq->uq_key); 2737 umtxq_signal_queue(&uq->uq_key, count, q); 2738 umtxq_unbusy(&uq->uq_key); 2739 umtxq_unlock(&uq->uq_key); 2740 } 2741 out: 2742 umtx_key_release(&uq->uq_key); 2743 return (error); 2744 } 2745 2746 int 2747 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 2748 /* struct umtx *umtx */ 2749 { 2750 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0); 2751 } 2752 2753 int 2754 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 2755 /* struct umtx *umtx */ 2756 { 2757 return do_unlock_umtx(td, uap->umtx, td->td_tid); 2758 } 2759 2760 static int 2761 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap) 2762 { 2763 struct timespec *ts, timeout; 2764 int error; 2765 2766 /* Allow a null timespec (wait forever). */ 2767 if (uap->uaddr2 == NULL) 2768 ts = NULL; 2769 else { 2770 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2771 if (error != 0) 2772 return (error); 2773 if (timeout.tv_nsec >= 1000000000 || 2774 timeout.tv_nsec < 0) { 2775 return (EINVAL); 2776 } 2777 ts = &timeout; 2778 } 2779 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 2780 } 2781 2782 static int 2783 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap) 2784 { 2785 return (do_unlock_umtx(td, uap->obj, uap->val)); 2786 } 2787 2788 static int 2789 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 2790 { 2791 struct timespec *ts, timeout; 2792 int error; 2793 2794 if (uap->uaddr2 == NULL) 2795 ts = NULL; 2796 else { 2797 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2798 if (error != 0) 2799 return (error); 2800 if (timeout.tv_nsec >= 1000000000 || 2801 timeout.tv_nsec < 0) 2802 return (EINVAL); 2803 ts = &timeout; 2804 } 2805 return do_wait(td, uap->obj, uap->val, ts, 0, 0); 2806 } 2807 2808 static int 2809 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 2810 { 2811 struct timespec *ts, timeout; 2812 int error; 2813 2814 if (uap->uaddr2 == NULL) 2815 ts = NULL; 2816 else { 2817 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2818 if (error != 0) 2819 return (error); 2820 if (timeout.tv_nsec >= 1000000000 || 2821 timeout.tv_nsec < 0) 2822 return (EINVAL); 2823 ts = &timeout; 2824 } 2825 return do_wait(td, uap->obj, uap->val, ts, 1, 0); 2826 } 2827 2828 static int 2829 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 2830 { 2831 struct timespec *ts, timeout; 2832 int error; 2833 2834 if (uap->uaddr2 == NULL) 2835 ts = NULL; 2836 else { 2837 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2838 if (error != 0) 2839 return (error); 2840 if (timeout.tv_nsec >= 1000000000 || 2841 timeout.tv_nsec < 0) 2842 return (EINVAL); 2843 ts = &timeout; 2844 } 2845 return do_wait(td, uap->obj, uap->val, ts, 1, 1); 2846 } 2847 2848 static int 2849 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 2850 { 2851 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 2852 } 2853 2854 static int 2855 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 2856 { 2857 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 2858 } 2859 2860 static int 2861 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 2862 { 2863 struct timespec *ts, timeout; 2864 int error; 2865 2866 /* Allow a null timespec (wait forever). */ 2867 if (uap->uaddr2 == NULL) 2868 ts = NULL; 2869 else { 2870 error = copyin(uap->uaddr2, &timeout, 2871 sizeof(timeout)); 2872 if (error != 0) 2873 return (error); 2874 if (timeout.tv_nsec >= 1000000000 || 2875 timeout.tv_nsec < 0) { 2876 return (EINVAL); 2877 } 2878 ts = &timeout; 2879 } 2880 return do_lock_umutex(td, uap->obj, ts, 0); 2881 } 2882 2883 static int 2884 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 2885 { 2886 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY); 2887 } 2888 2889 static int 2890 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 2891 { 2892 struct timespec *ts, timeout; 2893 int error; 2894 2895 /* Allow a null timespec (wait forever). */ 2896 if (uap->uaddr2 == NULL) 2897 ts = NULL; 2898 else { 2899 error = copyin(uap->uaddr2, &timeout, 2900 sizeof(timeout)); 2901 if (error != 0) 2902 return (error); 2903 if (timeout.tv_nsec >= 1000000000 || 2904 timeout.tv_nsec < 0) { 2905 return (EINVAL); 2906 } 2907 ts = &timeout; 2908 } 2909 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT); 2910 } 2911 2912 static int 2913 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 2914 { 2915 return do_wake_umutex(td, uap->obj); 2916 } 2917 2918 static int 2919 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 2920 { 2921 return do_unlock_umutex(td, uap->obj); 2922 } 2923 2924 static int 2925 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 2926 { 2927 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 2928 } 2929 2930 static int 2931 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 2932 { 2933 struct timespec *ts, timeout; 2934 int error; 2935 2936 /* Allow a null timespec (wait forever). */ 2937 if (uap->uaddr2 == NULL) 2938 ts = NULL; 2939 else { 2940 error = copyin(uap->uaddr2, &timeout, 2941 sizeof(timeout)); 2942 if (error != 0) 2943 return (error); 2944 if (timeout.tv_nsec >= 1000000000 || 2945 timeout.tv_nsec < 0) { 2946 return (EINVAL); 2947 } 2948 ts = &timeout; 2949 } 2950 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 2951 } 2952 2953 static int 2954 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 2955 { 2956 return do_cv_signal(td, uap->obj); 2957 } 2958 2959 static int 2960 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 2961 { 2962 return do_cv_broadcast(td, uap->obj); 2963 } 2964 2965 static int 2966 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 2967 { 2968 struct timespec timeout; 2969 int error; 2970 2971 /* Allow a null timespec (wait forever). */ 2972 if (uap->uaddr2 == NULL) { 2973 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 2974 } else { 2975 error = copyin(uap->uaddr2, &timeout, 2976 sizeof(timeout)); 2977 if (error != 0) 2978 return (error); 2979 if (timeout.tv_nsec >= 1000000000 || 2980 timeout.tv_nsec < 0) { 2981 return (EINVAL); 2982 } 2983 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout); 2984 } 2985 return (error); 2986 } 2987 2988 static int 2989 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 2990 { 2991 struct timespec timeout; 2992 int error; 2993 2994 /* Allow a null timespec (wait forever). */ 2995 if (uap->uaddr2 == NULL) { 2996 error = do_rw_wrlock(td, uap->obj, 0); 2997 } else { 2998 error = copyin(uap->uaddr2, &timeout, 2999 sizeof(timeout)); 3000 if (error != 0) 3001 return (error); 3002 if (timeout.tv_nsec >= 1000000000 || 3003 timeout.tv_nsec < 0) { 3004 return (EINVAL); 3005 } 3006 3007 error = do_rw_wrlock2(td, uap->obj, &timeout); 3008 } 3009 return (error); 3010 } 3011 3012 static int 3013 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3014 { 3015 return do_rw_unlock(td, uap->obj); 3016 } 3017 3018 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3019 3020 static _umtx_op_func op_table[] = { 3021 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */ 3022 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */ 3023 __umtx_op_wait, /* UMTX_OP_WAIT */ 3024 __umtx_op_wake, /* UMTX_OP_WAKE */ 3025 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 3026 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3027 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3028 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3029 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 3030 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3031 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3032 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */ 3033 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */ 3034 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */ 3035 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3036 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3037 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3038 __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */ 3039 __umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */ 3040 }; 3041 3042 int 3043 _umtx_op(struct thread *td, struct _umtx_op_args *uap) 3044 { 3045 if ((unsigned)uap->op < UMTX_OP_MAX) 3046 return (*op_table[uap->op])(td, uap); 3047 return (EINVAL); 3048 } 3049 3050 #ifdef COMPAT_IA32 3051 int 3052 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap) 3053 /* struct umtx *umtx */ 3054 { 3055 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 3056 } 3057 3058 int 3059 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap) 3060 /* struct umtx *umtx */ 3061 { 3062 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 3063 } 3064 3065 struct timespec32 { 3066 u_int32_t tv_sec; 3067 u_int32_t tv_nsec; 3068 }; 3069 3070 static inline int 3071 copyin_timeout32(void *addr, struct timespec *tsp) 3072 { 3073 struct timespec32 ts32; 3074 int error; 3075 3076 error = copyin(addr, &ts32, sizeof(struct timespec32)); 3077 if (error == 0) { 3078 tsp->tv_sec = ts32.tv_sec; 3079 tsp->tv_nsec = ts32.tv_nsec; 3080 } 3081 return (error); 3082 } 3083 3084 static int 3085 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 3086 { 3087 struct timespec *ts, timeout; 3088 int error; 3089 3090 /* Allow a null timespec (wait forever). */ 3091 if (uap->uaddr2 == NULL) 3092 ts = NULL; 3093 else { 3094 error = copyin_timeout32(uap->uaddr2, &timeout); 3095 if (error != 0) 3096 return (error); 3097 if (timeout.tv_nsec >= 1000000000 || 3098 timeout.tv_nsec < 0) { 3099 return (EINVAL); 3100 } 3101 ts = &timeout; 3102 } 3103 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3104 } 3105 3106 static int 3107 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 3108 { 3109 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val)); 3110 } 3111 3112 static int 3113 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3114 { 3115 struct timespec *ts, timeout; 3116 int error; 3117 3118 if (uap->uaddr2 == NULL) 3119 ts = NULL; 3120 else { 3121 error = copyin_timeout32(uap->uaddr2, &timeout); 3122 if (error != 0) 3123 return (error); 3124 if (timeout.tv_nsec >= 1000000000 || 3125 timeout.tv_nsec < 0) 3126 return (EINVAL); 3127 ts = &timeout; 3128 } 3129 return do_wait(td, uap->obj, uap->val, ts, 1, 0); 3130 } 3131 3132 static int 3133 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3134 { 3135 struct timespec *ts, timeout; 3136 int error; 3137 3138 /* Allow a null timespec (wait forever). */ 3139 if (uap->uaddr2 == NULL) 3140 ts = NULL; 3141 else { 3142 error = copyin_timeout32(uap->uaddr2, &timeout); 3143 if (error != 0) 3144 return (error); 3145 if (timeout.tv_nsec >= 1000000000 || 3146 timeout.tv_nsec < 0) 3147 return (EINVAL); 3148 ts = &timeout; 3149 } 3150 return do_lock_umutex(td, uap->obj, ts, 0); 3151 } 3152 3153 static int 3154 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3155 { 3156 struct timespec *ts, timeout; 3157 int error; 3158 3159 /* Allow a null timespec (wait forever). */ 3160 if (uap->uaddr2 == NULL) 3161 ts = NULL; 3162 else { 3163 error = copyin_timeout32(uap->uaddr2, &timeout); 3164 if (error != 0) 3165 return (error); 3166 if (timeout.tv_nsec >= 1000000000 || 3167 timeout.tv_nsec < 0) 3168 return (EINVAL); 3169 ts = &timeout; 3170 } 3171 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT); 3172 } 3173 3174 static int 3175 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3176 { 3177 struct timespec *ts, timeout; 3178 int error; 3179 3180 /* Allow a null timespec (wait forever). */ 3181 if (uap->uaddr2 == NULL) 3182 ts = NULL; 3183 else { 3184 error = copyin_timeout32(uap->uaddr2, &timeout); 3185 if (error != 0) 3186 return (error); 3187 if (timeout.tv_nsec >= 1000000000 || 3188 timeout.tv_nsec < 0) 3189 return (EINVAL); 3190 ts = &timeout; 3191 } 3192 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3193 } 3194 3195 static int 3196 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3197 { 3198 struct timespec timeout; 3199 int error; 3200 3201 /* Allow a null timespec (wait forever). */ 3202 if (uap->uaddr2 == NULL) { 3203 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3204 } else { 3205 error = copyin(uap->uaddr2, &timeout, 3206 sizeof(timeout)); 3207 if (error != 0) 3208 return (error); 3209 if (timeout.tv_nsec >= 1000000000 || 3210 timeout.tv_nsec < 0) { 3211 return (EINVAL); 3212 } 3213 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout); 3214 } 3215 return (error); 3216 } 3217 3218 static int 3219 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3220 { 3221 struct timespec timeout; 3222 int error; 3223 3224 /* Allow a null timespec (wait forever). */ 3225 if (uap->uaddr2 == NULL) { 3226 error = do_rw_wrlock(td, uap->obj, 0); 3227 } else { 3228 error = copyin_timeout32(uap->uaddr2, &timeout); 3229 if (error != 0) 3230 return (error); 3231 if (timeout.tv_nsec >= 1000000000 || 3232 timeout.tv_nsec < 0) { 3233 return (EINVAL); 3234 } 3235 3236 error = do_rw_wrlock2(td, uap->obj, &timeout); 3237 } 3238 return (error); 3239 } 3240 3241 static int 3242 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3243 { 3244 struct timespec *ts, timeout; 3245 int error; 3246 3247 if (uap->uaddr2 == NULL) 3248 ts = NULL; 3249 else { 3250 error = copyin_timeout32(uap->uaddr2, &timeout); 3251 if (error != 0) 3252 return (error); 3253 if (timeout.tv_nsec >= 1000000000 || 3254 timeout.tv_nsec < 0) 3255 return (EINVAL); 3256 ts = &timeout; 3257 } 3258 return do_wait(td, uap->obj, uap->val, ts, 1, 1); 3259 } 3260 3261 static _umtx_op_func op_table_compat32[] = { 3262 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */ 3263 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */ 3264 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 3265 __umtx_op_wake, /* UMTX_OP_WAKE */ 3266 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3267 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 3268 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3269 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3270 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 3271 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3272 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3273 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */ 3274 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */ 3275 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */ 3276 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3277 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3278 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3279 __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */ 3280 __umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */ 3281 }; 3282 3283 int 3284 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 3285 { 3286 if ((unsigned)uap->op < UMTX_OP_MAX) 3287 return (*op_table_compat32[uap->op])(td, 3288 (struct _umtx_op_args *)uap); 3289 return (EINVAL); 3290 } 3291 #endif 3292 3293 void 3294 umtx_thread_init(struct thread *td) 3295 { 3296 td->td_umtxq = umtxq_alloc(); 3297 td->td_umtxq->uq_thread = td; 3298 } 3299 3300 void 3301 umtx_thread_fini(struct thread *td) 3302 { 3303 umtxq_free(td->td_umtxq); 3304 } 3305 3306 /* 3307 * It will be called when new thread is created, e.g fork(). 3308 */ 3309 void 3310 umtx_thread_alloc(struct thread *td) 3311 { 3312 struct umtx_q *uq; 3313 3314 uq = td->td_umtxq; 3315 uq->uq_inherited_pri = PRI_MAX; 3316 3317 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 3318 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 3319 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 3320 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 3321 } 3322 3323 /* 3324 * exec() hook. 3325 */ 3326 static void 3327 umtx_exec_hook(void *arg __unused, struct proc *p __unused, 3328 struct image_params *imgp __unused) 3329 { 3330 umtx_thread_cleanup(curthread); 3331 } 3332 3333 /* 3334 * thread_exit() hook. 3335 */ 3336 void 3337 umtx_thread_exit(struct thread *td) 3338 { 3339 umtx_thread_cleanup(td); 3340 } 3341 3342 /* 3343 * clean up umtx data. 3344 */ 3345 static void 3346 umtx_thread_cleanup(struct thread *td) 3347 { 3348 struct umtx_q *uq; 3349 struct umtx_pi *pi; 3350 3351 if ((uq = td->td_umtxq) == NULL) 3352 return; 3353 3354 mtx_lock_spin(&umtx_lock); 3355 uq->uq_inherited_pri = PRI_MAX; 3356 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 3357 pi->pi_owner = NULL; 3358 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 3359 } 3360 thread_lock(td); 3361 td->td_flags &= ~TDF_UBORROWING; 3362 thread_unlock(td); 3363 mtx_unlock_spin(&umtx_lock); 3364 } 3365