1 /* 2 * Copyright (c) 2009 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * Implement fast persistent locks based on atomic_cmpset_int() with 36 * semantics similar to lockmgr locks but faster and taking up much less 37 * space. Taken from HAMMER's lock implementation. 38 * 39 * These are meant to complement our LWKT tokens. Tokens are only held 40 * while the thread is running. Mutexes can be held across blocking 41 * conditions. 42 * 43 * - Exclusive priority over shared to prevent SMP starvation. 44 * - locks can be aborted (async callback, if any, will be made w/ENOLCK). 45 * - locks can be asynchronous. 46 * - synchronous fast path if no blocking occurs (async callback is not 47 * made in this case). 48 * 49 * Generally speaking any caller-supplied link state must be properly 50 * initialized before use. 51 * 52 * Most of the support is in sys/mutex[2].h. We mostly provide backoff 53 * functions here. 54 */ 55 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/kernel.h> 59 #include <sys/sysctl.h> 60 #include <sys/thread.h> 61 62 #include <machine/cpufunc.h> 63 64 #include <sys/thread2.h> 65 #include <sys/mutex2.h> 66 67 static int mtx_chain_link_ex(mtx_t *mtx, u_int olock); 68 static int mtx_chain_link_sh(mtx_t *mtx, u_int olock); 69 static void mtx_delete_link(mtx_t *mtx, mtx_link_t *link); 70 71 /* 72 * Exclusive-lock a mutex, block until acquired unless link is async. 73 * Recursion is allowed. 74 * 75 * Returns 0 on success, the tsleep() return code on failure, EINPROGRESS 76 * if async. If immediately successful an async exclusive lock will return 0 77 * and not issue the async callback or link the link structure. The caller 78 * must handle this case (typically this is an optimal code path). 79 * 80 * A tsleep() error can only be returned if PCATCH is specified in the flags. 81 */ 82 static __inline int 83 __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to) 84 { 85 thread_t td; 86 u_int lock; 87 u_int nlock; 88 int error; 89 int isasync; 90 91 for (;;) { 92 lock = mtx->mtx_lock; 93 cpu_ccfence(); 94 95 if (lock == 0) { 96 nlock = MTX_EXCLUSIVE | 1; 97 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 98 mtx->mtx_owner = curthread; 99 cpu_sfence(); 100 link->state = MTX_LINK_ACQUIRED; 101 error = 0; 102 break; 103 } 104 continue; 105 } 106 if ((lock & MTX_EXCLUSIVE) && mtx->mtx_owner == curthread) { 107 KKASSERT((lock & MTX_MASK) != MTX_MASK); 108 nlock = lock + 1; 109 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 110 cpu_sfence(); 111 link->state = MTX_LINK_ACQUIRED; 112 error = 0; 113 break; 114 } 115 continue; 116 } 117 118 /* 119 * We need MTX_LINKSPIN to manipulate exlink or 120 * shlink. 121 * 122 * We must set MTX_EXWANTED with MTX_LINKSPIN to indicate 123 * pending exclusive requests. It cannot be set as a separate 124 * operation prior to acquiring MTX_LINKSPIN. 125 * 126 * To avoid unnecessary cpu cache traffic we poll 127 * for collisions. It is also possible that EXWANTED 128 * state failing the above test was spurious, so all the 129 * tests must be repeated if we cannot obtain LINKSPIN 130 * with the prior state tests intact (i.e. don't reload 131 * the (lock) variable here, for heaven's sake!). 132 */ 133 if (lock & MTX_LINKSPIN) { 134 cpu_pause(); 135 continue; 136 } 137 td = curthread; 138 nlock = lock | MTX_EXWANTED | MTX_LINKSPIN; 139 crit_enter_raw(td); 140 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) { 141 crit_exit_raw(td); 142 continue; 143 } 144 145 /* 146 * Check for early abort. 147 */ 148 if (link->state == MTX_LINK_ABORTED) { 149 if (mtx->mtx_exlink == NULL) { 150 atomic_clear_int(&mtx->mtx_lock, 151 MTX_LINKSPIN | 152 MTX_EXWANTED); 153 } else { 154 atomic_clear_int(&mtx->mtx_lock, 155 MTX_LINKSPIN); 156 } 157 crit_exit_raw(td); 158 link->state = MTX_LINK_IDLE; 159 error = ENOLCK; 160 break; 161 } 162 163 /* 164 * Add our link to the exlink list and release LINKSPIN. 165 */ 166 link->owner = td; 167 link->state = MTX_LINK_LINKED_EX; 168 if (mtx->mtx_exlink) { 169 link->next = mtx->mtx_exlink; 170 link->prev = link->next->prev; 171 link->next->prev = link; 172 link->prev->next = link; 173 } else { 174 link->next = link; 175 link->prev = link; 176 mtx->mtx_exlink = link; 177 } 178 isasync = (link->callback != NULL); 179 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); 180 crit_exit_raw(td); 181 182 /* 183 * If asynchronous lock request return without 184 * blocking, leave link structure linked. 185 */ 186 if (isasync) { 187 error = EINPROGRESS; 188 break; 189 } 190 191 /* 192 * Wait for lock 193 */ 194 error = mtx_wait_link(mtx, link, flags, to); 195 break; 196 } 197 return (error); 198 } 199 200 int 201 _mtx_lock_ex_link(mtx_t *mtx, mtx_link_t *link, int flags, int to) 202 { 203 return(__mtx_lock_ex(mtx, link, flags, to)); 204 } 205 206 int 207 _mtx_lock_ex(mtx_t *mtx, int flags, int to) 208 { 209 mtx_link_t link; 210 211 mtx_link_init(&link); 212 return(__mtx_lock_ex(mtx, &link, flags, to)); 213 } 214 215 int 216 _mtx_lock_ex_quick(mtx_t *mtx) 217 { 218 mtx_link_t link; 219 220 mtx_link_init(&link); 221 return(__mtx_lock_ex(mtx, &link, 0, 0)); 222 } 223 224 /* 225 * Share-lock a mutex, block until acquired. Recursion is allowed. 226 * 227 * Returns 0 on success, or the tsleep() return code on failure. 228 * An error can only be returned if PCATCH is specified in the flags. 229 * 230 * NOTE: Shared locks get a mass-wakeup so if the tsleep fails we 231 * do not have to chain the wakeup(). 232 */ 233 static __inline int 234 __mtx_lock_sh(mtx_t *mtx, mtx_link_t *link, int flags, int to) 235 { 236 thread_t td; 237 u_int lock; 238 u_int nlock; 239 int error; 240 int isasync; 241 242 for (;;) { 243 lock = mtx->mtx_lock; 244 cpu_ccfence(); 245 246 if (lock == 0) { 247 nlock = 1; 248 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 249 error = 0; 250 cpu_sfence(); 251 link->state = MTX_LINK_ACQUIRED; 252 break; 253 } 254 continue; 255 } 256 if ((lock & (MTX_EXCLUSIVE | MTX_EXWANTED)) == 0) { 257 KKASSERT((lock & MTX_MASK) != MTX_MASK); 258 nlock = lock + 1; 259 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 260 error = 0; 261 cpu_sfence(); 262 link->state = MTX_LINK_ACQUIRED; 263 break; 264 } 265 continue; 266 } 267 268 /* 269 * We need MTX_LINKSPIN to manipulate exlink or 270 * shlink. 271 * 272 * We must set MTX_SHWANTED with MTX_LINKSPIN to indicate 273 * pending shared requests. It cannot be set as a separate 274 * operation prior to acquiring MTX_LINKSPIN. 275 * 276 * To avoid unnecessary cpu cache traffic we poll 277 * for collisions. It is also possible that EXWANTED 278 * state failing the above test was spurious, so all the 279 * tests must be repeated if we cannot obtain LINKSPIN 280 * with the prior state tests intact (i.e. don't reload 281 * the (lock) variable here, for heaven's sake!). 282 */ 283 if (lock & MTX_LINKSPIN) { 284 cpu_pause(); 285 continue; 286 } 287 td = curthread; 288 nlock = lock | MTX_SHWANTED | MTX_LINKSPIN; 289 crit_enter_raw(td); 290 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) { 291 crit_exit_raw(td); 292 continue; 293 } 294 295 /* 296 * Check for early abort. Other shared lock requestors 297 * could have sneaked in before we set LINKSPIN so make 298 * sure we undo the state properly. 299 */ 300 if (link->state == MTX_LINK_ABORTED) { 301 if (mtx->mtx_shlink) { 302 atomic_clear_int(&mtx->mtx_lock, 303 MTX_LINKSPIN); 304 } else { 305 atomic_clear_int(&mtx->mtx_lock, 306 MTX_LINKSPIN | 307 MTX_SHWANTED); 308 } 309 crit_exit_raw(td); 310 link->state = MTX_LINK_IDLE; 311 error = ENOLCK; 312 break; 313 } 314 315 /* 316 * Add our link to the shlink list and release LINKSPIN. 317 */ 318 link->owner = td; 319 link->state = MTX_LINK_LINKED_SH; 320 if (mtx->mtx_shlink) { 321 link->next = mtx->mtx_shlink; 322 link->prev = link->next->prev; 323 link->next->prev = link; 324 link->prev->next = link; 325 } else { 326 link->next = link; 327 link->prev = link; 328 mtx->mtx_shlink = link; 329 } 330 isasync = (link->callback != NULL); 331 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); 332 crit_exit_raw(td); 333 334 /* 335 * If asynchronous lock request return without 336 * blocking, leave link structure linked. 337 */ 338 if (isasync) { 339 error = EINPROGRESS; 340 break; 341 } 342 343 /* 344 * Wait for lock 345 */ 346 error = mtx_wait_link(mtx, link, flags, to); 347 break; 348 } 349 return (error); 350 } 351 352 int 353 _mtx_lock_sh_link(mtx_t *mtx, mtx_link_t *link, int flags, int to) 354 { 355 return(__mtx_lock_sh(mtx, link, flags, to)); 356 } 357 358 int 359 _mtx_lock_sh(mtx_t *mtx, int flags, int to) 360 { 361 mtx_link_t link; 362 363 mtx_link_init(&link); 364 return(__mtx_lock_sh(mtx, &link, flags, to)); 365 } 366 367 int 368 _mtx_lock_sh_quick(mtx_t *mtx) 369 { 370 mtx_link_t link; 371 372 mtx_link_init(&link); 373 return(__mtx_lock_sh(mtx, &link, 0, 0)); 374 } 375 376 /* 377 * Get an exclusive spinlock the hard way. 378 */ 379 void 380 _mtx_spinlock(mtx_t *mtx) 381 { 382 u_int lock; 383 u_int nlock; 384 int bb = 1; 385 int bo; 386 387 for (;;) { 388 lock = mtx->mtx_lock; 389 if (lock == 0) { 390 nlock = MTX_EXCLUSIVE | 1; 391 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 392 mtx->mtx_owner = curthread; 393 break; 394 } 395 } else if ((lock & MTX_EXCLUSIVE) && 396 mtx->mtx_owner == curthread) { 397 KKASSERT((lock & MTX_MASK) != MTX_MASK); 398 nlock = lock + 1; 399 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 400 break; 401 } else { 402 /* MWAIT here */ 403 if (bb < 1000) 404 ++bb; 405 cpu_pause(); 406 for (bo = 0; bo < bb; ++bo) 407 ; 408 } 409 cpu_pause(); 410 } 411 } 412 413 /* 414 * Attempt to acquire a spinlock, if we fail we must undo the 415 * gd->gd_spinlocks/gd->gd_curthead->td_critcount predisposition. 416 * 417 * Returns 0 on success, EAGAIN on failure. 418 */ 419 int 420 _mtx_spinlock_try(mtx_t *mtx) 421 { 422 globaldata_t gd = mycpu; 423 u_int lock; 424 u_int nlock; 425 int res = 0; 426 427 for (;;) { 428 lock = mtx->mtx_lock; 429 if (lock == 0) { 430 nlock = MTX_EXCLUSIVE | 1; 431 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 432 mtx->mtx_owner = gd->gd_curthread; 433 break; 434 } 435 } else if ((lock & MTX_EXCLUSIVE) && 436 mtx->mtx_owner == gd->gd_curthread) { 437 KKASSERT((lock & MTX_MASK) != MTX_MASK); 438 nlock = lock + 1; 439 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 440 break; 441 } else { 442 --gd->gd_spinlocks; 443 cpu_ccfence(); 444 crit_exit_raw(gd->gd_curthread); 445 res = EAGAIN; 446 break; 447 } 448 cpu_pause(); 449 } 450 return res; 451 } 452 453 #if 0 454 455 void 456 _mtx_spinlock_sh(mtx_t *mtx) 457 { 458 u_int lock; 459 u_int nlock; 460 int bb = 1; 461 int bo; 462 463 for (;;) { 464 lock = mtx->mtx_lock; 465 if ((lock & MTX_EXCLUSIVE) == 0) { 466 KKASSERT((lock & MTX_MASK) != MTX_MASK); 467 nlock = lock + 1; 468 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 469 break; 470 } else { 471 /* MWAIT here */ 472 if (bb < 1000) 473 ++bb; 474 cpu_pause(); 475 for (bo = 0; bo < bb; ++bo) 476 ; 477 } 478 cpu_pause(); 479 } 480 } 481 482 #endif 483 484 int 485 _mtx_lock_ex_try(mtx_t *mtx) 486 { 487 u_int lock; 488 u_int nlock; 489 int error; 490 491 for (;;) { 492 lock = mtx->mtx_lock; 493 if (lock == 0) { 494 nlock = MTX_EXCLUSIVE | 1; 495 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 496 mtx->mtx_owner = curthread; 497 error = 0; 498 break; 499 } 500 } else if ((lock & MTX_EXCLUSIVE) && 501 mtx->mtx_owner == curthread) { 502 KKASSERT((lock & MTX_MASK) != MTX_MASK); 503 nlock = lock + 1; 504 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 505 error = 0; 506 break; 507 } 508 } else { 509 error = EAGAIN; 510 break; 511 } 512 cpu_pause(); 513 } 514 return (error); 515 } 516 517 int 518 _mtx_lock_sh_try(mtx_t *mtx) 519 { 520 u_int lock; 521 u_int nlock; 522 int error = 0; 523 524 for (;;) { 525 lock = mtx->mtx_lock; 526 if ((lock & MTX_EXCLUSIVE) == 0) { 527 KKASSERT((lock & MTX_MASK) != MTX_MASK); 528 nlock = lock + 1; 529 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 530 break; 531 } else { 532 error = EAGAIN; 533 break; 534 } 535 cpu_pause(); 536 } 537 return (error); 538 } 539 540 /* 541 * If the lock is held exclusively it must be owned by the caller. If the 542 * lock is already a shared lock this operation is a NOP. A panic will 543 * occur if the lock is not held either shared or exclusive. 544 * 545 * The exclusive count is converted to a shared count. 546 */ 547 void 548 _mtx_downgrade(mtx_t *mtx) 549 { 550 u_int lock; 551 u_int nlock; 552 553 for (;;) { 554 lock = mtx->mtx_lock; 555 cpu_ccfence(); 556 557 /* 558 * NOP if already shared. 559 */ 560 if ((lock & MTX_EXCLUSIVE) == 0) { 561 KKASSERT((lock & MTX_MASK) > 0); 562 break; 563 } 564 565 /* 566 * Transfer count to shared. Any additional pending shared 567 * waiters must be woken up. 568 */ 569 if (lock & MTX_SHWANTED) { 570 if (mtx_chain_link_sh(mtx, lock)) 571 break; 572 /* retry */ 573 } else { 574 nlock = lock & ~MTX_EXCLUSIVE; 575 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 576 break; 577 /* retry */ 578 } 579 cpu_pause(); 580 } 581 } 582 583 /* 584 * Upgrade a shared lock to an exclusive lock. The upgrade will fail if 585 * the shared lock has a count other then 1. Optimize the most likely case 586 * but note that a single cmpset can fail due to WANTED races. 587 * 588 * If the lock is held exclusively it must be owned by the caller and 589 * this function will simply return without doing anything. A panic will 590 * occur if the lock is held exclusively by someone other then the caller. 591 * 592 * Returns 0 on success, EDEADLK on failure. 593 */ 594 int 595 _mtx_upgrade_try(mtx_t *mtx) 596 { 597 u_int lock; 598 u_int nlock; 599 int error = 0; 600 601 for (;;) { 602 lock = mtx->mtx_lock; 603 cpu_ccfence(); 604 605 if ((lock & ~MTX_EXWANTED) == 1) { 606 nlock = lock | MTX_EXCLUSIVE; 607 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 608 mtx->mtx_owner = curthread; 609 break; 610 } 611 } else if (lock & MTX_EXCLUSIVE) { 612 KKASSERT(mtx->mtx_owner == curthread); 613 break; 614 } else { 615 error = EDEADLK; 616 break; 617 } 618 cpu_pause(); 619 } 620 return (error); 621 } 622 623 /* 624 * Unlock a lock. The caller must hold the lock either shared or exclusive. 625 * 626 * On the last release we handle any pending chains. 627 */ 628 void 629 _mtx_unlock(mtx_t *mtx) 630 { 631 u_int lock; 632 u_int nlock; 633 634 for (;;) { 635 lock = mtx->mtx_lock; 636 cpu_ccfence(); 637 638 switch(lock) { 639 case MTX_EXCLUSIVE | 1: 640 /* 641 * Last release, exclusive lock. 642 * No exclusive or shared requests pending. 643 */ 644 mtx->mtx_owner = NULL; 645 nlock = 0; 646 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 647 goto done; 648 break; 649 case MTX_EXCLUSIVE | MTX_EXWANTED | 1: 650 case MTX_EXCLUSIVE | MTX_EXWANTED | MTX_SHWANTED | 1: 651 /* 652 * Last release, exclusive lock. 653 * Exclusive requests pending. 654 * Exclusive requests have priority over shared reqs. 655 */ 656 if (mtx_chain_link_ex(mtx, lock)) 657 goto done; 658 break; 659 case MTX_EXCLUSIVE | MTX_SHWANTED | 1: 660 /* 661 * Last release, exclusive lock. 662 * 663 * Shared requests are pending. Transfer our count (1) 664 * to the first shared request, wakeup all shared reqs. 665 */ 666 if (mtx_chain_link_sh(mtx, lock)) 667 goto done; 668 break; 669 case 1: 670 /* 671 * Last release, shared lock. 672 * No exclusive or shared requests pending. 673 */ 674 nlock = 0; 675 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 676 goto done; 677 break; 678 case MTX_EXWANTED | 1: 679 case MTX_EXWANTED | MTX_SHWANTED | 1: 680 /* 681 * Last release, shared lock. 682 * 683 * Exclusive requests are pending. Upgrade this 684 * final shared lock to exclusive and transfer our 685 * count (1) to the next exclusive request. 686 * 687 * Exclusive requests have priority over shared reqs. 688 */ 689 if (mtx_chain_link_ex(mtx, lock)) 690 goto done; 691 break; 692 case MTX_SHWANTED | 1: 693 /* 694 * Last release, shared lock. 695 * Shared requests pending. 696 */ 697 if (mtx_chain_link_sh(mtx, lock)) 698 goto done; 699 break; 700 default: 701 /* 702 * We have to loop if this is the last release but 703 * someone is fiddling with LINKSPIN. 704 */ 705 if ((lock & MTX_MASK) == 1) { 706 KKASSERT(lock & MTX_LINKSPIN); 707 break; 708 } 709 710 /* 711 * Not the last release (shared or exclusive) 712 */ 713 nlock = lock - 1; 714 KKASSERT((nlock & MTX_MASK) != MTX_MASK); 715 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 716 goto done; 717 break; 718 } 719 /* loop try again */ 720 cpu_pause(); 721 } 722 done: 723 ; 724 } 725 726 /* 727 * Chain pending links. Called on the last release of an exclusive or 728 * shared lock when the appropriate WANTED bit is set. mtx_lock old state 729 * is passed in with the count left at 1, which we can inherit, and other 730 * bits which we must adjust in a single atomic operation. 731 * 732 * Return non-zero on success, 0 if caller needs to retry. 733 * 734 * NOTE: It's ok if MTX_EXWANTED is in an indeterminant state while we are 735 * acquiring LINKSPIN as all other cases will also need to acquire 736 * LINKSPIN when handling the EXWANTED case. 737 */ 738 static int 739 mtx_chain_link_ex(mtx_t *mtx, u_int olock) 740 { 741 thread_t td = curthread; 742 mtx_link_t *link; 743 u_int nlock; 744 745 olock &= ~MTX_LINKSPIN; 746 nlock = olock | MTX_LINKSPIN | MTX_EXCLUSIVE; /* upgrade if necc */ 747 crit_enter_raw(td); 748 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) { 749 link = mtx->mtx_exlink; 750 KKASSERT(link != NULL); 751 if (link->next == link) { 752 mtx->mtx_exlink = NULL; 753 nlock = MTX_LINKSPIN | MTX_EXWANTED; /* to clear */ 754 } else { 755 mtx->mtx_exlink = link->next; 756 link->next->prev = link->prev; 757 link->prev->next = link->next; 758 nlock = MTX_LINKSPIN; /* to clear */ 759 } 760 KKASSERT(link->state == MTX_LINK_LINKED_EX); 761 mtx->mtx_owner = link->owner; 762 cpu_sfence(); 763 764 /* 765 * WARNING! The callback can only be safely 766 * made with LINKSPIN still held 767 * and in a critical section. 768 * 769 * WARNING! The link can go away after the 770 * state is set, or after the 771 * callback. 772 */ 773 if (link->callback) { 774 link->state = MTX_LINK_CALLEDBACK; 775 link->callback(link, link->arg, 0); 776 } else { 777 link->state = MTX_LINK_ACQUIRED; 778 wakeup(link); 779 } 780 atomic_clear_int(&mtx->mtx_lock, nlock); 781 crit_exit_raw(td); 782 return 1; 783 } 784 /* retry */ 785 crit_exit_raw(td); 786 787 return 0; 788 } 789 790 /* 791 * Flush waiting shared locks. The lock's prior state is passed in and must 792 * be adjusted atomically only if it matches and LINKSPIN is not set. 793 * 794 * IMPORTANT! The caller has left one active count on the lock for us to 795 * consume. We will apply this to the first link, but must add 796 * additional counts for any other links. 797 */ 798 static int 799 mtx_chain_link_sh(mtx_t *mtx, u_int olock) 800 { 801 thread_t td = curthread; 802 mtx_link_t *link; 803 u_int addcount; 804 u_int nlock; 805 806 olock &= ~MTX_LINKSPIN; 807 nlock = olock | MTX_LINKSPIN; 808 nlock &= ~MTX_EXCLUSIVE; 809 crit_enter_raw(td); 810 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) { 811 /* 812 * It should not be possible for SHWANTED to be set without 813 * any links pending. 814 */ 815 KKASSERT(mtx->mtx_shlink != NULL); 816 817 /* 818 * We have to process the count for all shared locks before 819 * we process any of the links. Count the additional shared 820 * locks beyond the first link (which is already accounted 821 * for) and associate the full count with the lock 822 * immediately. 823 */ 824 addcount = 0; 825 for (link = mtx->mtx_shlink->next; link != mtx->mtx_shlink; 826 link = link->next) { 827 ++addcount; 828 } 829 if (addcount > 0) 830 atomic_add_int(&mtx->mtx_lock, addcount); 831 832 /* 833 * We can wakeup all waiting shared locks. 834 */ 835 while ((link = mtx->mtx_shlink) != NULL) { 836 KKASSERT(link->state == MTX_LINK_LINKED_SH); 837 if (link->next == link) { 838 mtx->mtx_shlink = NULL; 839 } else { 840 mtx->mtx_shlink = link->next; 841 link->next->prev = link->prev; 842 link->prev->next = link->next; 843 } 844 link->next = NULL; 845 link->prev = NULL; 846 cpu_sfence(); 847 if (link->callback) { 848 link->state = MTX_LINK_CALLEDBACK; 849 link->callback(link, link->arg, 0); 850 } else { 851 cpu_sfence(); 852 link->state = MTX_LINK_ACQUIRED; 853 wakeup(link); 854 } 855 } 856 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN | 857 MTX_SHWANTED); 858 crit_exit_raw(td); 859 return 1; 860 } 861 /* retry */ 862 crit_exit_raw(td); 863 864 return 0; 865 } 866 867 /* 868 * Delete a link structure after tsleep has failed. This code is not 869 * in the critical path as most exclusive waits are chained. 870 */ 871 static 872 void 873 mtx_delete_link(mtx_t *mtx, mtx_link_t *link) 874 { 875 thread_t td = curthread; 876 u_int lock; 877 u_int nlock; 878 879 /* 880 * Acquire MTX_LINKSPIN. 881 * 882 * Do not use cmpxchg to wait for LINKSPIN to clear as this might 883 * result in too much cpu cache traffic. 884 */ 885 crit_enter_raw(td); 886 for (;;) { 887 lock = mtx->mtx_lock; 888 if (lock & MTX_LINKSPIN) { 889 cpu_pause(); 890 continue; 891 } 892 nlock = lock | MTX_LINKSPIN; 893 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 894 break; 895 cpu_pause(); 896 } 897 898 /* 899 * Delete the link and release LINKSPIN. 900 */ 901 nlock = MTX_LINKSPIN; /* to clear */ 902 903 switch(link->state) { 904 case MTX_LINK_LINKED_EX: 905 if (link->next == link) { 906 mtx->mtx_exlink = NULL; 907 nlock |= MTX_EXWANTED; /* to clear */ 908 } else { 909 mtx->mtx_exlink = link->next; 910 link->next->prev = link->prev; 911 link->prev->next = link->next; 912 } 913 break; 914 case MTX_LINK_LINKED_SH: 915 if (link->next == link) { 916 mtx->mtx_shlink = NULL; 917 nlock |= MTX_SHWANTED; /* to clear */ 918 } else { 919 mtx->mtx_shlink = link->next; 920 link->next->prev = link->prev; 921 link->prev->next = link->next; 922 } 923 break; 924 default: 925 /* no change */ 926 break; 927 } 928 atomic_clear_int(&mtx->mtx_lock, nlock); 929 crit_exit_raw(td); 930 } 931 932 /* 933 * Wait for async lock completion or abort. Returns ENOLCK if an abort 934 * occurred. 935 */ 936 int 937 mtx_wait_link(mtx_t *mtx, mtx_link_t *link, int flags, int to) 938 { 939 int error; 940 941 /* 942 * Sleep. Handle false wakeups, interruptions, etc. 943 * The link may also have been aborted. The LINKED 944 * bit was set by this cpu so we can test it without 945 * fences. 946 */ 947 error = 0; 948 while (link->state & MTX_LINK_LINKED) { 949 tsleep_interlock(link, 0); 950 cpu_lfence(); 951 if (link->state & MTX_LINK_LINKED) { 952 if (link->state & MTX_LINK_LINKED_SH) 953 mycpu->gd_cnt.v_lock_name[0] = 'S'; 954 else 955 mycpu->gd_cnt.v_lock_name[0] = 'X'; 956 strncpy(mycpu->gd_cnt.v_lock_name + 1, 957 mtx->mtx_ident, 958 sizeof(mycpu->gd_cnt.v_lock_name) - 2); 959 ++mycpu->gd_cnt.v_lock_colls; 960 961 error = tsleep(link, flags | PINTERLOCKED, 962 mtx->mtx_ident, to); 963 if (error) 964 break; 965 } 966 } 967 968 /* 969 * We need at least a lfence (load fence) to ensure our cpu does not 970 * reorder loads (of data outside the lock structure) prior to the 971 * remote cpu's release, since the above test may have run without 972 * any atomic interactions. 973 * 974 * If we do not do this then state updated by the other cpu before 975 * releasing its lock may not be read cleanly by our cpu when this 976 * function returns. Even though the other cpu ordered its stores, 977 * our loads can still be out of order. 978 */ 979 cpu_mfence(); 980 981 /* 982 * We are done, make sure the link structure is unlinked. 983 * It may still be on the list due to e.g. EINTR or 984 * EWOULDBLOCK. 985 * 986 * It is possible for the tsleep to race an ABORT and cause 987 * error to be 0. 988 * 989 * The tsleep() can be woken up for numerous reasons and error 990 * might be zero in situations where we intend to return an error. 991 * 992 * (This is the synchronous case so state cannot be CALLEDBACK) 993 */ 994 switch(link->state) { 995 case MTX_LINK_ACQUIRED: 996 case MTX_LINK_CALLEDBACK: 997 error = 0; 998 break; 999 case MTX_LINK_ABORTED: 1000 error = ENOLCK; 1001 break; 1002 case MTX_LINK_LINKED_EX: 1003 case MTX_LINK_LINKED_SH: 1004 mtx_delete_link(mtx, link); 1005 /* fall through */ 1006 default: 1007 if (error == 0) 1008 error = EWOULDBLOCK; 1009 break; 1010 } 1011 1012 /* 1013 * Clear state on status returned. 1014 */ 1015 link->state = MTX_LINK_IDLE; 1016 1017 return error; 1018 } 1019 1020 /* 1021 * Abort a mutex locking operation, causing mtx_lock_ex_link() to 1022 * return ENOLCK. This may be called at any time after the mtx_link 1023 * is initialized or the status from a previous lock has been 1024 * returned. If called prior to the next (non-try) lock attempt, the 1025 * next lock attempt using this link structure will abort instantly. 1026 * 1027 * Caller must still wait for the operation to complete, either from a 1028 * blocking call that is still in progress or by calling mtx_wait_link(). 1029 * 1030 * If an asynchronous lock request is possibly in-progress, the caller 1031 * should call mtx_wait_link() synchronously. Note that the asynchronous 1032 * lock callback will NOT be called if a successful abort occurred. XXX 1033 */ 1034 void 1035 mtx_abort_link(mtx_t *mtx, mtx_link_t *link) 1036 { 1037 thread_t td = curthread; 1038 u_int lock; 1039 u_int nlock; 1040 1041 /* 1042 * Acquire MTX_LINKSPIN 1043 */ 1044 crit_enter_raw(td); 1045 for (;;) { 1046 lock = mtx->mtx_lock; 1047 if (lock & MTX_LINKSPIN) { 1048 cpu_pause(); 1049 continue; 1050 } 1051 nlock = lock | MTX_LINKSPIN; 1052 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 1053 break; 1054 cpu_pause(); 1055 } 1056 1057 /* 1058 * Do the abort. 1059 * 1060 * WARNING! Link structure can disappear once link->state is set. 1061 */ 1062 nlock = MTX_LINKSPIN; /* to clear */ 1063 1064 switch(link->state) { 1065 case MTX_LINK_IDLE: 1066 /* 1067 * Link not started yet 1068 */ 1069 link->state = MTX_LINK_ABORTED; 1070 break; 1071 case MTX_LINK_LINKED_EX: 1072 /* 1073 * de-link, mark aborted, and potentially wakeup the thread 1074 * or issue the callback. 1075 */ 1076 if (link->next == link) { 1077 if (mtx->mtx_exlink == link) { 1078 mtx->mtx_exlink = NULL; 1079 nlock |= MTX_EXWANTED; /* to clear */ 1080 } 1081 } else { 1082 if (mtx->mtx_exlink == link) 1083 mtx->mtx_exlink = link->next; 1084 link->next->prev = link->prev; 1085 link->prev->next = link->next; 1086 } 1087 1088 /* 1089 * When aborting the async callback is still made. We must 1090 * not set the link status to ABORTED in the callback case 1091 * since there is nothing else to clear its status if the 1092 * link is reused. 1093 */ 1094 if (link->callback) { 1095 link->state = MTX_LINK_CALLEDBACK; 1096 link->callback(link, link->arg, ENOLCK); 1097 } else { 1098 link->state = MTX_LINK_ABORTED; 1099 wakeup(link); 1100 } 1101 break; 1102 case MTX_LINK_LINKED_SH: 1103 /* 1104 * de-link, mark aborted, and potentially wakeup the thread 1105 * or issue the callback. 1106 */ 1107 if (link->next == link) { 1108 if (mtx->mtx_shlink == link) { 1109 mtx->mtx_shlink = NULL; 1110 nlock |= MTX_SHWANTED; /* to clear */ 1111 } 1112 } else { 1113 if (mtx->mtx_shlink == link) 1114 mtx->mtx_shlink = link->next; 1115 link->next->prev = link->prev; 1116 link->prev->next = link->next; 1117 } 1118 1119 /* 1120 * When aborting the async callback is still made. We must 1121 * not set the link status to ABORTED in the callback case 1122 * since there is nothing else to clear its status if the 1123 * link is reused. 1124 */ 1125 if (link->callback) { 1126 link->state = MTX_LINK_CALLEDBACK; 1127 link->callback(link, link->arg, ENOLCK); 1128 } else { 1129 link->state = MTX_LINK_ABORTED; 1130 wakeup(link); 1131 } 1132 break; 1133 case MTX_LINK_ACQUIRED: 1134 case MTX_LINK_CALLEDBACK: 1135 /* 1136 * Too late, the lock was acquired. Let it complete. 1137 */ 1138 break; 1139 default: 1140 /* 1141 * link already aborted, do nothing. 1142 */ 1143 break; 1144 } 1145 atomic_clear_int(&mtx->mtx_lock, nlock); 1146 crit_exit_raw(td); 1147 } 1148