1 /* 2 * Copyright (c) 2009 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * Implement fast persistent locks based on atomic_cmpset_int() with 36 * semantics similar to lockmgr locks but faster and taking up much less 37 * space. Taken from HAMMER's lock implementation. 38 * 39 * These are meant to complement our LWKT tokens. Tokens are only held 40 * while the thread is running. Mutexes can be held across blocking 41 * conditions. 42 * 43 * - Exclusive priority over shared to prevent SMP starvation. 44 * - locks can be aborted (async callback, if any, will be made w/ENOLCK). 45 * - locks can be asynchronous. 46 * - synchronous fast path if no blocking occurs (async callback is not 47 * made in this case). 48 * 49 * Generally speaking any caller-supplied link state must be properly 50 * initialized before use. 51 * 52 * Most of the support is in sys/mutex[2].h. We mostly provide backoff 53 * functions here. 54 */ 55 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/kernel.h> 59 #include <sys/sysctl.h> 60 #include <sys/indefinite.h> 61 #include <sys/thread.h> 62 63 #include <machine/cpufunc.h> 64 65 #include <sys/thread2.h> 66 #include <sys/mutex2.h> 67 #include <sys/indefinite2.h> 68 69 static int mtx_chain_link_ex(mtx_t *mtx, u_int olock); 70 static int mtx_chain_link_sh(mtx_t *mtx, u_int olock); 71 static void mtx_delete_link(mtx_t *mtx, mtx_link_t *link); 72 73 /* 74 * Exclusive-lock a mutex, block until acquired unless link is async. 75 * Recursion is allowed. 76 * 77 * Returns 0 on success, the tsleep() return code on failure, EINPROGRESS 78 * if async. If immediately successful an async exclusive lock will return 0 79 * and not issue the async callback or link the link structure. The caller 80 * must handle this case (typically this is an optimal code path). 81 * 82 * A tsleep() error can only be returned if PCATCH is specified in the flags. 83 */ 84 static __inline int 85 __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to) 86 { 87 thread_t td; 88 u_int lock; 89 u_int nlock; 90 int error; 91 int isasync; 92 93 for (;;) { 94 lock = mtx->mtx_lock; 95 cpu_ccfence(); 96 97 if (lock == 0) { 98 nlock = MTX_EXCLUSIVE | 1; 99 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 100 mtx->mtx_owner = curthread; 101 cpu_sfence(); 102 link->state = MTX_LINK_ACQUIRED; 103 error = 0; 104 break; 105 } 106 continue; 107 } 108 if ((lock & MTX_EXCLUSIVE) && mtx->mtx_owner == curthread) { 109 KKASSERT((lock & MTX_MASK) != MTX_MASK); 110 nlock = lock + 1; 111 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 112 cpu_sfence(); 113 link->state = MTX_LINK_ACQUIRED; 114 error = 0; 115 break; 116 } 117 continue; 118 } 119 120 /* 121 * We need MTX_LINKSPIN to manipulate exlink or 122 * shlink. 123 * 124 * We must set MTX_EXWANTED with MTX_LINKSPIN to indicate 125 * pending exclusive requests. It cannot be set as a separate 126 * operation prior to acquiring MTX_LINKSPIN. 127 * 128 * To avoid unnecessary cpu cache traffic we poll 129 * for collisions. It is also possible that EXWANTED 130 * state failing the above test was spurious, so all the 131 * tests must be repeated if we cannot obtain LINKSPIN 132 * with the prior state tests intact (i.e. don't reload 133 * the (lock) variable here, for heaven's sake!). 134 */ 135 if (lock & MTX_LINKSPIN) { 136 cpu_pause(); 137 continue; 138 } 139 td = curthread; 140 nlock = lock | MTX_EXWANTED | MTX_LINKSPIN; 141 crit_enter_raw(td); 142 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) { 143 crit_exit_raw(td); 144 continue; 145 } 146 147 /* 148 * Check for early abort. 149 */ 150 if (link->state == MTX_LINK_ABORTED) { 151 if (mtx->mtx_exlink == NULL) { 152 atomic_clear_int(&mtx->mtx_lock, 153 MTX_LINKSPIN | 154 MTX_EXWANTED); 155 } else { 156 atomic_clear_int(&mtx->mtx_lock, 157 MTX_LINKSPIN); 158 } 159 crit_exit_raw(td); 160 link->state = MTX_LINK_IDLE; 161 error = ENOLCK; 162 break; 163 } 164 165 /* 166 * Add our link to the exlink list and release LINKSPIN. 167 */ 168 link->owner = td; 169 link->state = MTX_LINK_LINKED_EX; 170 if (mtx->mtx_exlink) { 171 link->next = mtx->mtx_exlink; 172 link->prev = link->next->prev; 173 link->next->prev = link; 174 link->prev->next = link; 175 } else { 176 link->next = link; 177 link->prev = link; 178 mtx->mtx_exlink = link; 179 } 180 isasync = (link->callback != NULL); 181 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); 182 crit_exit_raw(td); 183 184 /* 185 * If asynchronous lock request return without 186 * blocking, leave link structure linked. 187 */ 188 if (isasync) { 189 error = EINPROGRESS; 190 break; 191 } 192 193 /* 194 * Wait for lock 195 */ 196 error = mtx_wait_link(mtx, link, flags, to); 197 break; 198 } 199 return (error); 200 } 201 202 int 203 _mtx_lock_ex_link(mtx_t *mtx, mtx_link_t *link, int flags, int to) 204 { 205 return(__mtx_lock_ex(mtx, link, flags, to)); 206 } 207 208 int 209 _mtx_lock_ex(mtx_t *mtx, int flags, int to) 210 { 211 mtx_link_t link; 212 213 mtx_link_init(&link); 214 return(__mtx_lock_ex(mtx, &link, flags, to)); 215 } 216 217 int 218 _mtx_lock_ex_quick(mtx_t *mtx) 219 { 220 mtx_link_t link; 221 222 mtx_link_init(&link); 223 return(__mtx_lock_ex(mtx, &link, 0, 0)); 224 } 225 226 /* 227 * Share-lock a mutex, block until acquired. Recursion is allowed. 228 * 229 * Returns 0 on success, or the tsleep() return code on failure. 230 * An error can only be returned if PCATCH is specified in the flags. 231 * 232 * NOTE: Shared locks get a mass-wakeup so if the tsleep fails we 233 * do not have to chain the wakeup(). 234 */ 235 static __inline int 236 __mtx_lock_sh(mtx_t *mtx, mtx_link_t *link, int flags, int to) 237 { 238 thread_t td; 239 u_int lock; 240 u_int nlock; 241 int error; 242 int isasync; 243 244 for (;;) { 245 lock = mtx->mtx_lock; 246 cpu_ccfence(); 247 248 if (lock == 0) { 249 nlock = 1; 250 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 251 error = 0; 252 cpu_sfence(); 253 link->state = MTX_LINK_ACQUIRED; 254 break; 255 } 256 continue; 257 } 258 if ((lock & (MTX_EXCLUSIVE | MTX_EXWANTED)) == 0) { 259 KKASSERT((lock & MTX_MASK) != MTX_MASK); 260 nlock = lock + 1; 261 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 262 error = 0; 263 cpu_sfence(); 264 link->state = MTX_LINK_ACQUIRED; 265 break; 266 } 267 continue; 268 } 269 270 /* 271 * We need MTX_LINKSPIN to manipulate exlink or 272 * shlink. 273 * 274 * We must set MTX_SHWANTED with MTX_LINKSPIN to indicate 275 * pending shared requests. It cannot be set as a separate 276 * operation prior to acquiring MTX_LINKSPIN. 277 * 278 * To avoid unnecessary cpu cache traffic we poll 279 * for collisions. It is also possible that EXWANTED 280 * state failing the above test was spurious, so all the 281 * tests must be repeated if we cannot obtain LINKSPIN 282 * with the prior state tests intact (i.e. don't reload 283 * the (lock) variable here, for heaven's sake!). 284 */ 285 if (lock & MTX_LINKSPIN) { 286 cpu_pause(); 287 continue; 288 } 289 td = curthread; 290 nlock = lock | MTX_SHWANTED | MTX_LINKSPIN; 291 crit_enter_raw(td); 292 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) { 293 crit_exit_raw(td); 294 continue; 295 } 296 297 /* 298 * Check for early abort. Other shared lock requestors 299 * could have sneaked in before we set LINKSPIN so make 300 * sure we undo the state properly. 301 */ 302 if (link->state == MTX_LINK_ABORTED) { 303 if (mtx->mtx_shlink) { 304 atomic_clear_int(&mtx->mtx_lock, 305 MTX_LINKSPIN); 306 } else { 307 atomic_clear_int(&mtx->mtx_lock, 308 MTX_LINKSPIN | 309 MTX_SHWANTED); 310 } 311 crit_exit_raw(td); 312 link->state = MTX_LINK_IDLE; 313 error = ENOLCK; 314 break; 315 } 316 317 /* 318 * Add our link to the shlink list and release LINKSPIN. 319 */ 320 link->owner = td; 321 link->state = MTX_LINK_LINKED_SH; 322 if (mtx->mtx_shlink) { 323 link->next = mtx->mtx_shlink; 324 link->prev = link->next->prev; 325 link->next->prev = link; 326 link->prev->next = link; 327 } else { 328 link->next = link; 329 link->prev = link; 330 mtx->mtx_shlink = link; 331 } 332 isasync = (link->callback != NULL); 333 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); 334 crit_exit_raw(td); 335 336 /* 337 * If asynchronous lock request return without 338 * blocking, leave link structure linked. 339 */ 340 if (isasync) { 341 error = EINPROGRESS; 342 break; 343 } 344 345 /* 346 * Wait for lock 347 */ 348 error = mtx_wait_link(mtx, link, flags, to); 349 break; 350 } 351 return (error); 352 } 353 354 int 355 _mtx_lock_sh_link(mtx_t *mtx, mtx_link_t *link, int flags, int to) 356 { 357 return(__mtx_lock_sh(mtx, link, flags, to)); 358 } 359 360 int 361 _mtx_lock_sh(mtx_t *mtx, int flags, int to) 362 { 363 mtx_link_t link; 364 365 mtx_link_init(&link); 366 return(__mtx_lock_sh(mtx, &link, flags, to)); 367 } 368 369 int 370 _mtx_lock_sh_quick(mtx_t *mtx) 371 { 372 mtx_link_t link; 373 374 mtx_link_init(&link); 375 return(__mtx_lock_sh(mtx, &link, 0, 0)); 376 } 377 378 /* 379 * Get an exclusive spinlock the hard way. 380 */ 381 void 382 _mtx_spinlock(mtx_t *mtx) 383 { 384 u_int lock; 385 u_int nlock; 386 int bb = 1; 387 int bo; 388 389 for (;;) { 390 lock = mtx->mtx_lock; 391 if (lock == 0) { 392 nlock = MTX_EXCLUSIVE | 1; 393 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 394 mtx->mtx_owner = curthread; 395 break; 396 } 397 } else if ((lock & MTX_EXCLUSIVE) && 398 mtx->mtx_owner == curthread) { 399 KKASSERT((lock & MTX_MASK) != MTX_MASK); 400 nlock = lock + 1; 401 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 402 break; 403 } else { 404 /* MWAIT here */ 405 if (bb < 1000) 406 ++bb; 407 cpu_pause(); 408 for (bo = 0; bo < bb; ++bo) 409 ; 410 } 411 cpu_pause(); 412 } 413 } 414 415 /* 416 * Attempt to acquire a spinlock, if we fail we must undo the 417 * gd->gd_spinlocks/gd->gd_curthead->td_critcount predisposition. 418 * 419 * Returns 0 on success, EAGAIN on failure. 420 */ 421 int 422 _mtx_spinlock_try(mtx_t *mtx) 423 { 424 globaldata_t gd = mycpu; 425 u_int lock; 426 u_int nlock; 427 int res = 0; 428 429 for (;;) { 430 lock = mtx->mtx_lock; 431 if (lock == 0) { 432 nlock = MTX_EXCLUSIVE | 1; 433 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 434 mtx->mtx_owner = gd->gd_curthread; 435 break; 436 } 437 } else if ((lock & MTX_EXCLUSIVE) && 438 mtx->mtx_owner == gd->gd_curthread) { 439 KKASSERT((lock & MTX_MASK) != MTX_MASK); 440 nlock = lock + 1; 441 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 442 break; 443 } else { 444 --gd->gd_spinlocks; 445 cpu_ccfence(); 446 crit_exit_raw(gd->gd_curthread); 447 res = EAGAIN; 448 break; 449 } 450 cpu_pause(); 451 } 452 return res; 453 } 454 455 #if 0 456 457 void 458 _mtx_spinlock_sh(mtx_t *mtx) 459 { 460 u_int lock; 461 u_int nlock; 462 int bb = 1; 463 int bo; 464 465 for (;;) { 466 lock = mtx->mtx_lock; 467 if ((lock & MTX_EXCLUSIVE) == 0) { 468 KKASSERT((lock & MTX_MASK) != MTX_MASK); 469 nlock = lock + 1; 470 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 471 break; 472 } else { 473 /* MWAIT here */ 474 if (bb < 1000) 475 ++bb; 476 cpu_pause(); 477 for (bo = 0; bo < bb; ++bo) 478 ; 479 } 480 cpu_pause(); 481 } 482 } 483 484 #endif 485 486 int 487 _mtx_lock_ex_try(mtx_t *mtx) 488 { 489 u_int lock; 490 u_int nlock; 491 int error; 492 493 for (;;) { 494 lock = mtx->mtx_lock; 495 if (lock == 0) { 496 nlock = MTX_EXCLUSIVE | 1; 497 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 498 mtx->mtx_owner = curthread; 499 error = 0; 500 break; 501 } 502 } else if ((lock & MTX_EXCLUSIVE) && 503 mtx->mtx_owner == curthread) { 504 KKASSERT((lock & MTX_MASK) != MTX_MASK); 505 nlock = lock + 1; 506 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 507 error = 0; 508 break; 509 } 510 } else { 511 error = EAGAIN; 512 break; 513 } 514 cpu_pause(); 515 } 516 return (error); 517 } 518 519 int 520 _mtx_lock_sh_try(mtx_t *mtx) 521 { 522 u_int lock; 523 u_int nlock; 524 int error = 0; 525 526 for (;;) { 527 lock = mtx->mtx_lock; 528 if ((lock & MTX_EXCLUSIVE) == 0) { 529 KKASSERT((lock & MTX_MASK) != MTX_MASK); 530 nlock = lock + 1; 531 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 532 break; 533 } else { 534 error = EAGAIN; 535 break; 536 } 537 cpu_pause(); 538 } 539 return (error); 540 } 541 542 /* 543 * If the lock is held exclusively it must be owned by the caller. If the 544 * lock is already a shared lock this operation is a NOP. A panic will 545 * occur if the lock is not held either shared or exclusive. 546 * 547 * The exclusive count is converted to a shared count. 548 */ 549 void 550 _mtx_downgrade(mtx_t *mtx) 551 { 552 u_int lock; 553 u_int nlock; 554 555 for (;;) { 556 lock = mtx->mtx_lock; 557 cpu_ccfence(); 558 559 /* 560 * NOP if already shared. 561 */ 562 if ((lock & MTX_EXCLUSIVE) == 0) { 563 KKASSERT((lock & MTX_MASK) > 0); 564 break; 565 } 566 567 /* 568 * Transfer count to shared. Any additional pending shared 569 * waiters must be woken up. 570 */ 571 if (lock & MTX_SHWANTED) { 572 if (mtx_chain_link_sh(mtx, lock)) 573 break; 574 /* retry */ 575 } else { 576 nlock = lock & ~MTX_EXCLUSIVE; 577 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 578 break; 579 /* retry */ 580 } 581 cpu_pause(); 582 } 583 } 584 585 /* 586 * Upgrade a shared lock to an exclusive lock. The upgrade will fail if 587 * the shared lock has a count other then 1. Optimize the most likely case 588 * but note that a single cmpset can fail due to WANTED races. 589 * 590 * If the lock is held exclusively it must be owned by the caller and 591 * this function will simply return without doing anything. A panic will 592 * occur if the lock is held exclusively by someone other then the caller. 593 * 594 * Returns 0 on success, EDEADLK on failure. 595 */ 596 int 597 _mtx_upgrade_try(mtx_t *mtx) 598 { 599 u_int lock; 600 u_int nlock; 601 int error = 0; 602 603 for (;;) { 604 lock = mtx->mtx_lock; 605 cpu_ccfence(); 606 607 if ((lock & ~MTX_EXWANTED) == 1) { 608 nlock = lock | MTX_EXCLUSIVE; 609 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 610 mtx->mtx_owner = curthread; 611 break; 612 } 613 } else if (lock & MTX_EXCLUSIVE) { 614 KKASSERT(mtx->mtx_owner == curthread); 615 break; 616 } else { 617 error = EDEADLK; 618 break; 619 } 620 cpu_pause(); 621 } 622 return (error); 623 } 624 625 /* 626 * Unlock a lock. The caller must hold the lock either shared or exclusive. 627 * 628 * On the last release we handle any pending chains. 629 */ 630 void 631 _mtx_unlock(mtx_t *mtx) 632 { 633 thread_t td __debugvar = curthread; 634 u_int lock; 635 u_int nlock; 636 637 for (;;) { 638 lock = mtx->mtx_lock; 639 cpu_ccfence(); 640 641 switch(lock) { 642 case MTX_EXCLUSIVE | 1: 643 /* 644 * Last release, exclusive lock. 645 * No exclusive or shared requests pending. 646 */ 647 KKASSERT(mtx->mtx_owner == td || 648 mtx->mtx_owner == NULL); 649 mtx->mtx_owner = NULL; 650 nlock = 0; 651 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 652 goto done; 653 break; 654 case MTX_EXCLUSIVE | MTX_EXWANTED | 1: 655 case MTX_EXCLUSIVE | MTX_EXWANTED | MTX_SHWANTED | 1: 656 /* 657 * Last release, exclusive lock. 658 * Exclusive requests pending. 659 * Exclusive requests have priority over shared reqs. 660 */ 661 KKASSERT(mtx->mtx_owner == td || 662 mtx->mtx_owner == NULL); 663 mtx->mtx_owner = NULL; 664 if (mtx_chain_link_ex(mtx, lock)) 665 goto done; 666 break; 667 case MTX_EXCLUSIVE | MTX_SHWANTED | 1: 668 /* 669 * Last release, exclusive lock. 670 * 671 * Shared requests are pending. Transfer our count (1) 672 * to the first shared request, wakeup all shared reqs. 673 */ 674 KKASSERT(mtx->mtx_owner == td || 675 mtx->mtx_owner == NULL); 676 mtx->mtx_owner = NULL; 677 if (mtx_chain_link_sh(mtx, lock)) 678 goto done; 679 break; 680 case 1: 681 /* 682 * Last release, shared lock. 683 * No exclusive or shared requests pending. 684 */ 685 nlock = 0; 686 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 687 goto done; 688 break; 689 case MTX_EXWANTED | 1: 690 case MTX_EXWANTED | MTX_SHWANTED | 1: 691 /* 692 * Last release, shared lock. 693 * 694 * Exclusive requests are pending. Upgrade this 695 * final shared lock to exclusive and transfer our 696 * count (1) to the next exclusive request. 697 * 698 * Exclusive requests have priority over shared reqs. 699 */ 700 if (mtx_chain_link_ex(mtx, lock)) 701 goto done; 702 break; 703 case MTX_SHWANTED | 1: 704 /* 705 * Last release, shared lock. 706 * Shared requests pending. 707 */ 708 if (mtx_chain_link_sh(mtx, lock)) 709 goto done; 710 break; 711 default: 712 /* 713 * We have to loop if this is the last release but 714 * someone is fiddling with LINKSPIN. 715 */ 716 if ((lock & MTX_MASK) == 1) { 717 KKASSERT(lock & MTX_LINKSPIN); 718 break; 719 } 720 721 /* 722 * Not the last release (shared or exclusive) 723 */ 724 nlock = lock - 1; 725 KKASSERT((nlock & MTX_MASK) != MTX_MASK); 726 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 727 goto done; 728 break; 729 } 730 /* loop try again */ 731 cpu_pause(); 732 } 733 done: 734 ; 735 } 736 737 /* 738 * Chain pending links. Called on the last release of an exclusive or 739 * shared lock when the appropriate WANTED bit is set. mtx_lock old state 740 * is passed in with the count left at 1, which we can inherit, and other 741 * bits which we must adjust in a single atomic operation. 742 * 743 * Return non-zero on success, 0 if caller needs to retry. 744 * 745 * NOTE: It's ok if MTX_EXWANTED is in an indeterminant state while we are 746 * acquiring LINKSPIN as all other cases will also need to acquire 747 * LINKSPIN when handling the EXWANTED case. 748 */ 749 static int 750 mtx_chain_link_ex(mtx_t *mtx, u_int olock) 751 { 752 thread_t td = curthread; 753 mtx_link_t *link; 754 u_int nlock; 755 756 olock &= ~MTX_LINKSPIN; 757 nlock = olock | MTX_LINKSPIN | MTX_EXCLUSIVE; /* upgrade if necc */ 758 crit_enter_raw(td); 759 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) { 760 link = mtx->mtx_exlink; 761 KKASSERT(link != NULL); 762 if (link->next == link) { 763 mtx->mtx_exlink = NULL; 764 nlock = MTX_LINKSPIN | MTX_EXWANTED; /* to clear */ 765 } else { 766 mtx->mtx_exlink = link->next; 767 link->next->prev = link->prev; 768 link->prev->next = link->next; 769 nlock = MTX_LINKSPIN; /* to clear */ 770 } 771 KKASSERT(link->state == MTX_LINK_LINKED_EX); 772 mtx->mtx_owner = link->owner; 773 cpu_sfence(); 774 775 /* 776 * WARNING! The callback can only be safely 777 * made with LINKSPIN still held 778 * and in a critical section. 779 * 780 * WARNING! The link can go away after the 781 * state is set, or after the 782 * callback. 783 */ 784 if (link->callback) { 785 link->state = MTX_LINK_CALLEDBACK; 786 link->callback(link, link->arg, 0); 787 } else { 788 link->state = MTX_LINK_ACQUIRED; 789 wakeup(link); 790 } 791 atomic_clear_int(&mtx->mtx_lock, nlock); 792 crit_exit_raw(td); 793 return 1; 794 } 795 /* retry */ 796 crit_exit_raw(td); 797 798 return 0; 799 } 800 801 /* 802 * Flush waiting shared locks. The lock's prior state is passed in and must 803 * be adjusted atomically only if it matches and LINKSPIN is not set. 804 * 805 * IMPORTANT! The caller has left one active count on the lock for us to 806 * consume. We will apply this to the first link, but must add 807 * additional counts for any other links. 808 */ 809 static int 810 mtx_chain_link_sh(mtx_t *mtx, u_int olock) 811 { 812 thread_t td = curthread; 813 mtx_link_t *link; 814 u_int addcount; 815 u_int nlock; 816 817 olock &= ~MTX_LINKSPIN; 818 nlock = olock | MTX_LINKSPIN; 819 nlock &= ~MTX_EXCLUSIVE; 820 crit_enter_raw(td); 821 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) { 822 /* 823 * It should not be possible for SHWANTED to be set without 824 * any links pending. 825 */ 826 KKASSERT(mtx->mtx_shlink != NULL); 827 828 /* 829 * We have to process the count for all shared locks before 830 * we process any of the links. Count the additional shared 831 * locks beyond the first link (which is already accounted 832 * for) and associate the full count with the lock 833 * immediately. 834 */ 835 addcount = 0; 836 for (link = mtx->mtx_shlink->next; link != mtx->mtx_shlink; 837 link = link->next) { 838 ++addcount; 839 } 840 if (addcount > 0) 841 atomic_add_int(&mtx->mtx_lock, addcount); 842 843 /* 844 * We can wakeup all waiting shared locks. 845 */ 846 while ((link = mtx->mtx_shlink) != NULL) { 847 KKASSERT(link->state == MTX_LINK_LINKED_SH); 848 if (link->next == link) { 849 mtx->mtx_shlink = NULL; 850 } else { 851 mtx->mtx_shlink = link->next; 852 link->next->prev = link->prev; 853 link->prev->next = link->next; 854 } 855 link->next = NULL; 856 link->prev = NULL; 857 cpu_sfence(); 858 if (link->callback) { 859 link->state = MTX_LINK_CALLEDBACK; 860 link->callback(link, link->arg, 0); 861 } else { 862 cpu_sfence(); 863 link->state = MTX_LINK_ACQUIRED; 864 wakeup(link); 865 } 866 } 867 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN | 868 MTX_SHWANTED); 869 crit_exit_raw(td); 870 return 1; 871 } 872 /* retry */ 873 crit_exit_raw(td); 874 875 return 0; 876 } 877 878 /* 879 * Delete a link structure after tsleep has failed. This code is not 880 * in the critical path as most exclusive waits are chained. 881 */ 882 static 883 void 884 mtx_delete_link(mtx_t *mtx, mtx_link_t *link) 885 { 886 thread_t td = curthread; 887 u_int lock; 888 u_int nlock; 889 890 /* 891 * Acquire MTX_LINKSPIN. 892 * 893 * Do not use cmpxchg to wait for LINKSPIN to clear as this might 894 * result in too much cpu cache traffic. 895 */ 896 crit_enter_raw(td); 897 for (;;) { 898 lock = mtx->mtx_lock; 899 if (lock & MTX_LINKSPIN) { 900 cpu_pause(); 901 continue; 902 } 903 nlock = lock | MTX_LINKSPIN; 904 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 905 break; 906 cpu_pause(); 907 } 908 909 /* 910 * Delete the link and release LINKSPIN. 911 */ 912 nlock = MTX_LINKSPIN; /* to clear */ 913 914 switch(link->state) { 915 case MTX_LINK_LINKED_EX: 916 if (link->next == link) { 917 mtx->mtx_exlink = NULL; 918 nlock |= MTX_EXWANTED; /* to clear */ 919 } else { 920 mtx->mtx_exlink = link->next; 921 link->next->prev = link->prev; 922 link->prev->next = link->next; 923 } 924 break; 925 case MTX_LINK_LINKED_SH: 926 if (link->next == link) { 927 mtx->mtx_shlink = NULL; 928 nlock |= MTX_SHWANTED; /* to clear */ 929 } else { 930 mtx->mtx_shlink = link->next; 931 link->next->prev = link->prev; 932 link->prev->next = link->next; 933 } 934 break; 935 default: 936 /* no change */ 937 break; 938 } 939 atomic_clear_int(&mtx->mtx_lock, nlock); 940 crit_exit_raw(td); 941 } 942 943 /* 944 * Wait for async lock completion or abort. Returns ENOLCK if an abort 945 * occurred. 946 */ 947 int 948 mtx_wait_link(mtx_t *mtx, mtx_link_t *link, int flags, int to) 949 { 950 indefinite_info_t info; 951 int error; 952 953 indefinite_init(&info, mtx->mtx_ident, 1, 954 ((link->state & MTX_LINK_LINKED_SH) ? 'm' : 'M')); 955 956 /* 957 * Sleep. Handle false wakeups, interruptions, etc. 958 * The link may also have been aborted. The LINKED 959 * bit was set by this cpu so we can test it without 960 * fences. 961 */ 962 error = 0; 963 while (link->state & MTX_LINK_LINKED) { 964 tsleep_interlock(link, 0); 965 cpu_lfence(); 966 if (link->state & MTX_LINK_LINKED) { 967 error = tsleep(link, flags | PINTERLOCKED, 968 mtx->mtx_ident, to); 969 if (error) 970 break; 971 } 972 if ((mtx->mtx_flags & MTXF_NOCOLLSTATS) == 0) 973 indefinite_check(&info); 974 } 975 976 /* 977 * We need at least a lfence (load fence) to ensure our cpu does not 978 * reorder loads (of data outside the lock structure) prior to the 979 * remote cpu's release, since the above test may have run without 980 * any atomic interactions. 981 * 982 * If we do not do this then state updated by the other cpu before 983 * releasing its lock may not be read cleanly by our cpu when this 984 * function returns. Even though the other cpu ordered its stores, 985 * our loads can still be out of order. 986 */ 987 cpu_mfence(); 988 989 /* 990 * We are done, make sure the link structure is unlinked. 991 * It may still be on the list due to e.g. EINTR or 992 * EWOULDBLOCK. 993 * 994 * It is possible for the tsleep to race an ABORT and cause 995 * error to be 0. 996 * 997 * The tsleep() can be woken up for numerous reasons and error 998 * might be zero in situations where we intend to return an error. 999 * 1000 * (This is the synchronous case so state cannot be CALLEDBACK) 1001 */ 1002 switch(link->state) { 1003 case MTX_LINK_ACQUIRED: 1004 case MTX_LINK_CALLEDBACK: 1005 error = 0; 1006 break; 1007 case MTX_LINK_ABORTED: 1008 error = ENOLCK; 1009 break; 1010 case MTX_LINK_LINKED_EX: 1011 case MTX_LINK_LINKED_SH: 1012 mtx_delete_link(mtx, link); 1013 /* fall through */ 1014 default: 1015 if (error == 0) 1016 error = EWOULDBLOCK; 1017 break; 1018 } 1019 1020 /* 1021 * Clear state on status returned. 1022 */ 1023 link->state = MTX_LINK_IDLE; 1024 1025 if ((mtx->mtx_flags & MTXF_NOCOLLSTATS) == 0) 1026 indefinite_done(&info); 1027 1028 return error; 1029 } 1030 1031 /* 1032 * Abort a mutex locking operation, causing mtx_lock_ex_link() to 1033 * return ENOLCK. This may be called at any time after the mtx_link 1034 * is initialized or the status from a previous lock has been 1035 * returned. If called prior to the next (non-try) lock attempt, the 1036 * next lock attempt using this link structure will abort instantly. 1037 * 1038 * Caller must still wait for the operation to complete, either from a 1039 * blocking call that is still in progress or by calling mtx_wait_link(). 1040 * 1041 * If an asynchronous lock request is possibly in-progress, the caller 1042 * should call mtx_wait_link() synchronously. Note that the asynchronous 1043 * lock callback will NOT be called if a successful abort occurred. XXX 1044 */ 1045 void 1046 mtx_abort_link(mtx_t *mtx, mtx_link_t *link) 1047 { 1048 thread_t td = curthread; 1049 u_int lock; 1050 u_int nlock; 1051 1052 /* 1053 * Acquire MTX_LINKSPIN 1054 */ 1055 crit_enter_raw(td); 1056 for (;;) { 1057 lock = mtx->mtx_lock; 1058 if (lock & MTX_LINKSPIN) { 1059 cpu_pause(); 1060 continue; 1061 } 1062 nlock = lock | MTX_LINKSPIN; 1063 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 1064 break; 1065 cpu_pause(); 1066 } 1067 1068 /* 1069 * Do the abort. 1070 * 1071 * WARNING! Link structure can disappear once link->state is set. 1072 */ 1073 nlock = MTX_LINKSPIN; /* to clear */ 1074 1075 switch(link->state) { 1076 case MTX_LINK_IDLE: 1077 /* 1078 * Link not started yet 1079 */ 1080 link->state = MTX_LINK_ABORTED; 1081 break; 1082 case MTX_LINK_LINKED_EX: 1083 /* 1084 * de-link, mark aborted, and potentially wakeup the thread 1085 * or issue the callback. 1086 */ 1087 if (link->next == link) { 1088 if (mtx->mtx_exlink == link) { 1089 mtx->mtx_exlink = NULL; 1090 nlock |= MTX_EXWANTED; /* to clear */ 1091 } 1092 } else { 1093 if (mtx->mtx_exlink == link) 1094 mtx->mtx_exlink = link->next; 1095 link->next->prev = link->prev; 1096 link->prev->next = link->next; 1097 } 1098 1099 /* 1100 * When aborting the async callback is still made. We must 1101 * not set the link status to ABORTED in the callback case 1102 * since there is nothing else to clear its status if the 1103 * link is reused. 1104 */ 1105 if (link->callback) { 1106 link->state = MTX_LINK_CALLEDBACK; 1107 link->callback(link, link->arg, ENOLCK); 1108 } else { 1109 link->state = MTX_LINK_ABORTED; 1110 wakeup(link); 1111 } 1112 break; 1113 case MTX_LINK_LINKED_SH: 1114 /* 1115 * de-link, mark aborted, and potentially wakeup the thread 1116 * or issue the callback. 1117 */ 1118 if (link->next == link) { 1119 if (mtx->mtx_shlink == link) { 1120 mtx->mtx_shlink = NULL; 1121 nlock |= MTX_SHWANTED; /* to clear */ 1122 } 1123 } else { 1124 if (mtx->mtx_shlink == link) 1125 mtx->mtx_shlink = link->next; 1126 link->next->prev = link->prev; 1127 link->prev->next = link->next; 1128 } 1129 1130 /* 1131 * When aborting the async callback is still made. We must 1132 * not set the link status to ABORTED in the callback case 1133 * since there is nothing else to clear its status if the 1134 * link is reused. 1135 */ 1136 if (link->callback) { 1137 link->state = MTX_LINK_CALLEDBACK; 1138 link->callback(link, link->arg, ENOLCK); 1139 } else { 1140 link->state = MTX_LINK_ABORTED; 1141 wakeup(link); 1142 } 1143 break; 1144 case MTX_LINK_ACQUIRED: 1145 case MTX_LINK_CALLEDBACK: 1146 /* 1147 * Too late, the lock was acquired. Let it complete. 1148 */ 1149 break; 1150 default: 1151 /* 1152 * link already aborted, do nothing. 1153 */ 1154 break; 1155 } 1156 atomic_clear_int(&mtx->mtx_lock, nlock); 1157 crit_exit_raw(td); 1158 } 1159