1 /* 2 * Copyright (c) 2009 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * Implement fast persistent locks based on atomic_cmpset_int() with 36 * semantics similar to lockmgr locks but faster and taking up much less 37 * space. Taken from HAMMER's lock implementation. 38 * 39 * These are meant to complement our LWKT tokens. Tokens are only held 40 * while the thread is running. Mutexes can be held across blocking 41 * conditions. 42 * 43 * - Exclusive priority over shared to prevent SMP starvation. 44 * - locks can be aborted (async callback, if any, will be made w/ENOLCK). 45 * - locks can be asynchronous. 46 * - synchronous fast path if no blocking occurs (async callback is not 47 * made in this case). 48 * 49 * Generally speaking any caller-supplied link state must be properly 50 * initialized before use. 51 * 52 * Most of the support is in sys/mutex[2].h. We mostly provide backoff 53 * functions here. 54 */ 55 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/kernel.h> 59 #include <sys/sysctl.h> 60 #include <sys/thread.h> 61 62 #include <machine/cpufunc.h> 63 64 #include <sys/thread2.h> 65 #include <sys/mutex2.h> 66 67 static int mtx_chain_link_ex(mtx_t *mtx, u_int olock); 68 static int mtx_chain_link_sh(mtx_t *mtx, u_int olock, int addcount); 69 static void mtx_delete_link(mtx_t *mtx, mtx_link_t *link); 70 71 /* 72 * Exclusive-lock a mutex, block until acquired unless link is async. 73 * Recursion is allowed. 74 * 75 * Returns 0 on success, the tsleep() return code on failure, EINPROGRESS 76 * if async. If immediately successful an async exclusive lock will return 0 77 * and not issue the async callback or link the link structure. The caller 78 * must handle this case (typically this is an optimal code path). 79 * 80 * A tsleep() error can only be returned if PCATCH is specified in the flags. 81 */ 82 static __inline int 83 __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to) 84 { 85 thread_t td; 86 u_int lock; 87 u_int nlock; 88 int error; 89 int isasync; 90 91 for (;;) { 92 lock = mtx->mtx_lock; 93 cpu_ccfence(); 94 95 if (lock == 0) { 96 nlock = MTX_EXCLUSIVE | 1; 97 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 98 mtx->mtx_owner = curthread; 99 link->state = MTX_LINK_ACQUIRED; 100 error = 0; 101 break; 102 } 103 continue; 104 } 105 if ((lock & MTX_EXCLUSIVE) && mtx->mtx_owner == curthread) { 106 KKASSERT((lock & MTX_MASK) != MTX_MASK); 107 nlock = lock + 1; 108 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 109 link->state = MTX_LINK_ACQUIRED; 110 error = 0; 111 break; 112 } 113 continue; 114 } 115 116 /* 117 * We need MTX_LINKSPIN to manipulate exlink or 118 * shlink. 119 * 120 * We must set MTX_EXWANTED with MTX_LINKSPIN to indicate 121 * pending shared requests. It cannot be set as a separate 122 * operation prior to acquiring MTX_LINKSPIN. 123 * 124 * To avoid unnecessary cpu cache traffic we poll 125 * for collisions. It is also possible that EXWANTED 126 * state failing the above test was spurious, so all the 127 * tests must be repeated if we cannot obtain LINKSPIN 128 * with the prior state tests intact (i.e. don't reload 129 * the (lock) variable here, for heaven's sake!). 130 */ 131 if (lock & MTX_LINKSPIN) { 132 cpu_pause(); 133 continue; 134 } 135 td = curthread; 136 nlock = lock | MTX_EXWANTED | MTX_LINKSPIN; 137 ++td->td_critcount; 138 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) { 139 --td->td_critcount; 140 continue; 141 } 142 143 /* 144 * Check for early abort. 145 */ 146 if (link->state == MTX_LINK_ABORTED) { 147 if (mtx->mtx_exlink == NULL) { 148 atomic_clear_int(&mtx->mtx_lock, 149 MTX_LINKSPIN | 150 MTX_EXWANTED); 151 } else { 152 atomic_clear_int(&mtx->mtx_lock, 153 MTX_LINKSPIN); 154 } 155 --td->td_critcount; 156 link->state = MTX_LINK_IDLE; 157 error = ENOLCK; 158 break; 159 } 160 161 /* 162 * Add our link to the exlink list and release LINKSPIN. 163 */ 164 link->owner = td; 165 link->state = MTX_LINK_LINKED_EX; 166 if (mtx->mtx_exlink) { 167 link->next = mtx->mtx_exlink; 168 link->prev = link->next->prev; 169 link->next->prev = link; 170 link->prev->next = link; 171 } else { 172 link->next = link; 173 link->prev = link; 174 mtx->mtx_exlink = link; 175 } 176 isasync = (link->callback != NULL); 177 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); 178 --td->td_critcount; 179 180 /* 181 * If asynchronous lock request return without 182 * blocking, leave link structure linked. 183 */ 184 if (isasync) { 185 error = EINPROGRESS; 186 break; 187 } 188 189 /* 190 * Wait for lock 191 */ 192 error = mtx_wait_link(mtx, link, flags, to); 193 break; 194 } 195 return (error); 196 } 197 198 int 199 _mtx_lock_ex_link(mtx_t *mtx, mtx_link_t *link, int flags, int to) 200 { 201 return(__mtx_lock_ex(mtx, link, flags, to)); 202 } 203 204 int 205 _mtx_lock_ex(mtx_t *mtx, int flags, int to) 206 { 207 mtx_link_t link; 208 209 mtx_link_init(&link); 210 return(__mtx_lock_ex(mtx, &link, flags, to)); 211 } 212 213 int 214 _mtx_lock_ex_quick(mtx_t *mtx) 215 { 216 mtx_link_t link; 217 218 mtx_link_init(&link); 219 return(__mtx_lock_ex(mtx, &link, 0, 0)); 220 } 221 222 /* 223 * Share-lock a mutex, block until acquired. Recursion is allowed. 224 * 225 * Returns 0 on success, or the tsleep() return code on failure. 226 * An error can only be returned if PCATCH is specified in the flags. 227 * 228 * NOTE: Shared locks get a mass-wakeup so if the tsleep fails we 229 * do not have to chain the wakeup(). 230 */ 231 static __inline int 232 __mtx_lock_sh(mtx_t *mtx, mtx_link_t *link, int flags, int to) 233 { 234 thread_t td; 235 u_int lock; 236 u_int nlock; 237 int error; 238 int isasync; 239 240 for (;;) { 241 lock = mtx->mtx_lock; 242 cpu_ccfence(); 243 244 if (lock == 0) { 245 nlock = 1; 246 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 247 error = 0; 248 link->state = MTX_LINK_ACQUIRED; 249 break; 250 } 251 continue; 252 } 253 if ((lock & (MTX_EXCLUSIVE | MTX_EXWANTED)) == 0) { 254 KKASSERT((lock & MTX_MASK) != MTX_MASK); 255 nlock = lock + 1; 256 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 257 error = 0; 258 link->state = MTX_LINK_ACQUIRED; 259 break; 260 } 261 continue; 262 } 263 264 /* 265 * We need MTX_LINKSPIN to manipulate exlink or 266 * shlink. 267 * 268 * We must set MTX_SHWANTED with MTX_LINKSPIN to indicate 269 * pending shared requests. It cannot be set as a separate 270 * operation prior to acquiring MTX_LINKSPIN. 271 * 272 * To avoid unnecessary cpu cache traffic we poll 273 * for collisions. It is also possible that EXWANTED 274 * state failing the above test was spurious, so all the 275 * tests must be repeated if we cannot obtain LINKSPIN 276 * with the prior state tests intact (i.e. don't reload 277 * the (lock) variable here, for heaven's sake!). 278 */ 279 if (lock & MTX_LINKSPIN) { 280 cpu_pause(); 281 continue; 282 } 283 td = curthread; 284 nlock = lock | MTX_SHWANTED | MTX_LINKSPIN; 285 ++td->td_critcount; 286 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) { 287 --td->td_critcount; 288 continue; 289 } 290 291 /* 292 * Check for early abort. 293 */ 294 if (link->state == MTX_LINK_ABORTED) { 295 if (mtx->mtx_exlink == NULL) { 296 atomic_clear_int(&mtx->mtx_lock, 297 MTX_LINKSPIN | 298 MTX_SHWANTED); 299 } else { 300 atomic_clear_int(&mtx->mtx_lock, 301 MTX_LINKSPIN); 302 } 303 --td->td_critcount; 304 link->state = MTX_LINK_IDLE; 305 error = ENOLCK; 306 break; 307 } 308 309 /* 310 * Add our link to the exlink list and release LINKSPIN. 311 */ 312 link->owner = td; 313 link->state = MTX_LINK_LINKED_SH; 314 if (mtx->mtx_shlink) { 315 link->next = mtx->mtx_shlink; 316 link->prev = link->next->prev; 317 link->next->prev = link; 318 link->prev->next = link; 319 } else { 320 link->next = link; 321 link->prev = link; 322 mtx->mtx_shlink = link; 323 } 324 isasync = (link->callback != NULL); 325 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); 326 --td->td_critcount; 327 328 /* 329 * If asynchronous lock request return without 330 * blocking, leave link structure linked. 331 */ 332 if (isasync) { 333 error = EINPROGRESS; 334 break; 335 } 336 337 /* 338 * Wait for lock 339 */ 340 error = mtx_wait_link(mtx, link, flags, to); 341 break; 342 } 343 return (error); 344 } 345 346 int 347 _mtx_lock_sh_link(mtx_t *mtx, mtx_link_t *link, int flags, int to) 348 { 349 return(__mtx_lock_sh(mtx, link, flags, to)); 350 } 351 352 int 353 _mtx_lock_sh(mtx_t *mtx, int flags, int to) 354 { 355 mtx_link_t link; 356 357 mtx_link_init(&link); 358 return(__mtx_lock_sh(mtx, &link, flags, to)); 359 } 360 361 int 362 _mtx_lock_sh_quick(mtx_t *mtx) 363 { 364 mtx_link_t link; 365 366 mtx_link_init(&link); 367 return(__mtx_lock_sh(mtx, &link, 0, 0)); 368 } 369 370 /* 371 * Get an exclusive spinlock the hard way. 372 */ 373 void 374 _mtx_spinlock(mtx_t *mtx) 375 { 376 u_int lock; 377 u_int nlock; 378 int bb = 1; 379 int bo; 380 381 for (;;) { 382 lock = mtx->mtx_lock; 383 if (lock == 0) { 384 nlock = MTX_EXCLUSIVE | 1; 385 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 386 mtx->mtx_owner = curthread; 387 break; 388 } 389 } else if ((lock & MTX_EXCLUSIVE) && 390 mtx->mtx_owner == curthread) { 391 KKASSERT((lock & MTX_MASK) != MTX_MASK); 392 nlock = lock + 1; 393 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 394 break; 395 } else { 396 /* MWAIT here */ 397 if (bb < 1000) 398 ++bb; 399 cpu_pause(); 400 for (bo = 0; bo < bb; ++bo) 401 ; 402 } 403 cpu_pause(); 404 } 405 } 406 407 /* 408 * Attempt to acquire a spinlock, if we fail we must undo the 409 * gd->gd_spinlocks/gd->gd_curthead->td_critcount predisposition. 410 * 411 * Returns 0 on success, EAGAIN on failure. 412 */ 413 int 414 _mtx_spinlock_try(mtx_t *mtx) 415 { 416 globaldata_t gd = mycpu; 417 u_int lock; 418 u_int nlock; 419 int res = 0; 420 421 for (;;) { 422 lock = mtx->mtx_lock; 423 if (lock == 0) { 424 nlock = MTX_EXCLUSIVE | 1; 425 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 426 mtx->mtx_owner = gd->gd_curthread; 427 break; 428 } 429 } else if ((lock & MTX_EXCLUSIVE) && 430 mtx->mtx_owner == gd->gd_curthread) { 431 KKASSERT((lock & MTX_MASK) != MTX_MASK); 432 nlock = lock + 1; 433 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 434 break; 435 } else { 436 --gd->gd_spinlocks; 437 cpu_ccfence(); 438 --gd->gd_curthread->td_critcount; 439 res = EAGAIN; 440 break; 441 } 442 cpu_pause(); 443 } 444 return res; 445 } 446 447 #if 0 448 449 void 450 _mtx_spinlock_sh(mtx_t *mtx) 451 { 452 u_int lock; 453 u_int nlock; 454 int bb = 1; 455 int bo; 456 457 for (;;) { 458 lock = mtx->mtx_lock; 459 if ((lock & MTX_EXCLUSIVE) == 0) { 460 KKASSERT((lock & MTX_MASK) != MTX_MASK); 461 nlock = lock + 1; 462 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 463 break; 464 } else { 465 /* MWAIT here */ 466 if (bb < 1000) 467 ++bb; 468 cpu_pause(); 469 for (bo = 0; bo < bb; ++bo) 470 ; 471 } 472 cpu_pause(); 473 } 474 } 475 476 #endif 477 478 int 479 _mtx_lock_ex_try(mtx_t *mtx) 480 { 481 u_int lock; 482 u_int nlock; 483 int error; 484 485 for (;;) { 486 lock = mtx->mtx_lock; 487 if (lock == 0) { 488 nlock = MTX_EXCLUSIVE | 1; 489 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 490 mtx->mtx_owner = curthread; 491 error = 0; 492 break; 493 } 494 } else if ((lock & MTX_EXCLUSIVE) && 495 mtx->mtx_owner == curthread) { 496 KKASSERT((lock & MTX_MASK) != MTX_MASK); 497 nlock = lock + 1; 498 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 499 error = 0; 500 break; 501 } 502 } else { 503 error = EAGAIN; 504 break; 505 } 506 cpu_pause(); 507 } 508 return (error); 509 } 510 511 int 512 _mtx_lock_sh_try(mtx_t *mtx) 513 { 514 u_int lock; 515 u_int nlock; 516 int error = 0; 517 518 for (;;) { 519 lock = mtx->mtx_lock; 520 if ((lock & MTX_EXCLUSIVE) == 0) { 521 KKASSERT((lock & MTX_MASK) != MTX_MASK); 522 nlock = lock + 1; 523 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 524 break; 525 } else { 526 error = EAGAIN; 527 break; 528 } 529 cpu_pause(); 530 } 531 return (error); 532 } 533 534 /* 535 * If the lock is held exclusively it must be owned by the caller. If the 536 * lock is already a shared lock this operation is a NOP. A panic will 537 * occur if the lock is not held either shared or exclusive. 538 * 539 * The exclusive count is converted to a shared count. 540 */ 541 void 542 _mtx_downgrade(mtx_t *mtx) 543 { 544 u_int lock; 545 u_int nlock; 546 547 for (;;) { 548 lock = mtx->mtx_lock; 549 cpu_ccfence(); 550 551 /* 552 * NOP if already shared. 553 */ 554 if ((lock & MTX_EXCLUSIVE) == 0) { 555 KKASSERT((lock & MTX_MASK) > 0); 556 break; 557 } 558 559 /* 560 * Transfer count to shared. Any additional pending shared 561 * waiters must be woken up. 562 */ 563 if (lock & MTX_SHWANTED) { 564 if (mtx_chain_link_sh(mtx, lock, 1)) 565 break; 566 /* retry */ 567 } else { 568 nlock = lock & ~MTX_EXCLUSIVE; 569 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 570 break; 571 /* retry */ 572 } 573 cpu_pause(); 574 } 575 } 576 577 /* 578 * Upgrade a shared lock to an exclusive lock. The upgrade will fail if 579 * the shared lock has a count other then 1. Optimize the most likely case 580 * but note that a single cmpset can fail due to WANTED races. 581 * 582 * If the lock is held exclusively it must be owned by the caller and 583 * this function will simply return without doing anything. A panic will 584 * occur if the lock is held exclusively by someone other then the caller. 585 * 586 * Returns 0 on success, EDEADLK on failure. 587 */ 588 int 589 _mtx_upgrade_try(mtx_t *mtx) 590 { 591 u_int lock; 592 u_int nlock; 593 int error = 0; 594 595 for (;;) { 596 lock = mtx->mtx_lock; 597 598 if ((lock & ~MTX_EXWANTED) == 1) { 599 nlock = lock | MTX_EXCLUSIVE; 600 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 601 mtx->mtx_owner = curthread; 602 break; 603 } 604 } else if (lock & MTX_EXCLUSIVE) { 605 KKASSERT(mtx->mtx_owner == curthread); 606 break; 607 } else { 608 error = EDEADLK; 609 break; 610 } 611 cpu_pause(); 612 } 613 return (error); 614 } 615 616 /* 617 * Unlock a lock. The caller must hold the lock either shared or exclusive. 618 * 619 * On the last release we handle any pending chains. 620 */ 621 void 622 _mtx_unlock(mtx_t *mtx) 623 { 624 u_int lock; 625 u_int nlock; 626 627 for (;;) { 628 lock = mtx->mtx_lock; 629 cpu_ccfence(); 630 631 switch(lock) { 632 case MTX_EXCLUSIVE | 1: 633 /* 634 * Last release, exclusive lock. 635 * No exclusive or shared requests pending. 636 */ 637 mtx->mtx_owner = NULL; 638 nlock = 0; 639 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 640 goto done; 641 break; 642 case MTX_EXCLUSIVE | MTX_EXWANTED | 1: 643 case MTX_EXCLUSIVE | MTX_EXWANTED | MTX_SHWANTED | 1: 644 /* 645 * Last release, exclusive lock. 646 * Exclusive requests pending. 647 * Exclusive requests have priority over shared reqs. 648 */ 649 if (mtx_chain_link_ex(mtx, lock)) 650 goto done; 651 break; 652 case MTX_EXCLUSIVE | MTX_SHWANTED | 1: 653 /* 654 * Last release, exclusive lock. 655 * 656 * Shared requests are pending. Transfer our count (1) 657 * to the first shared request, wakeup all shared reqs. 658 */ 659 if (mtx_chain_link_sh(mtx, lock, 0)) 660 goto done; 661 break; 662 case 1: 663 /* 664 * Last release, shared lock. 665 * No exclusive or shared requests pending. 666 */ 667 nlock = 0; 668 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 669 goto done; 670 break; 671 case MTX_EXWANTED | 1: 672 case MTX_EXWANTED | MTX_SHWANTED | 1: 673 /* 674 * Last release, shared lock. 675 * 676 * Exclusive requests are pending. Transfer our 677 * count (1) to the next exclusive request. 678 * 679 * Exclusive requests have priority over shared reqs. 680 */ 681 if (mtx_chain_link_ex(mtx, lock)) 682 goto done; 683 break; 684 case MTX_SHWANTED | 1: 685 /* 686 * Last release, shared lock. 687 * Shared requests pending. 688 */ 689 if (mtx_chain_link_sh(mtx, lock, 0)) 690 goto done; 691 break; 692 default: 693 /* 694 * We have to loop if this is the last release but 695 * someone is fiddling with LINKSPIN. 696 */ 697 if ((lock & MTX_MASK) == 1) { 698 KKASSERT(lock & MTX_LINKSPIN); 699 break; 700 } 701 702 /* 703 * Not the last release (shared or exclusive) 704 */ 705 nlock = lock - 1; 706 KKASSERT((nlock & MTX_MASK) != MTX_MASK); 707 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 708 goto done; 709 break; 710 } 711 /* loop try again */ 712 cpu_pause(); 713 } 714 done: 715 ; 716 } 717 718 /* 719 * Chain pending links. Called on the last release of an exclusive or 720 * shared lock when the appropriate WANTED bit is set. mtx_lock old state 721 * is passed in with the count left at 1, which we can inherit, and other 722 * bits which we must adjust in a single atomic operation. 723 * 724 * Return non-zero on success, 0 if caller needs to retry. 725 * 726 * NOTE: It's ok if MTX_EXWANTED is in an indeterminant state while we are 727 * acquiring LINKSPIN as all other cases will also need to acquire 728 * LINKSPIN when handling the EXWANTED case. 729 */ 730 static int 731 mtx_chain_link_ex(mtx_t *mtx, u_int olock) 732 { 733 thread_t td = curthread; 734 mtx_link_t *link; 735 u_int nlock; 736 737 olock &= ~MTX_LINKSPIN; 738 nlock = olock | MTX_LINKSPIN | MTX_EXCLUSIVE; 739 ++td->td_critcount; 740 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) { 741 link = mtx->mtx_exlink; 742 KKASSERT(link != NULL); 743 if (link->next == link) { 744 mtx->mtx_exlink = NULL; 745 nlock = MTX_LINKSPIN | MTX_EXWANTED; /* to clear */ 746 } else { 747 mtx->mtx_exlink = link->next; 748 link->next->prev = link->prev; 749 link->prev->next = link->next; 750 nlock = MTX_LINKSPIN; /* to clear */ 751 } 752 KKASSERT(link->state == MTX_LINK_LINKED_EX); 753 mtx->mtx_owner = link->owner; 754 cpu_sfence(); 755 756 /* 757 * WARNING! The callback can only be safely 758 * made with LINKSPIN still held 759 * and in a critical section. 760 * 761 * WARNING! The link can go away after the 762 * state is set, or after the 763 * callback. 764 */ 765 if (link->callback) { 766 link->state = MTX_LINK_CALLEDBACK; 767 link->callback(link, link->arg, 0); 768 } else { 769 link->state = MTX_LINK_ACQUIRED; 770 wakeup(link); 771 } 772 atomic_clear_int(&mtx->mtx_lock, nlock); 773 --td->td_critcount; 774 return 1; 775 } 776 /* retry */ 777 --td->td_critcount; 778 return 0; 779 } 780 781 /* 782 * Flush waiting shared locks. The lock's prior state is passed in and must 783 * be adjusted atomically only if it matches. 784 * 785 * If addcount is 0, the count for the first shared lock in the chain is 786 * assumed to have already been accounted for. 787 * 788 * If addcount is 1, the count for the first shared lock in the chain has 789 * not yet been accounted for. 790 */ 791 static int 792 mtx_chain_link_sh(mtx_t *mtx, u_int olock, int addcount) 793 { 794 thread_t td = curthread; 795 mtx_link_t *link; 796 u_int nlock; 797 798 olock &= ~MTX_LINKSPIN; 799 nlock = olock | MTX_LINKSPIN; 800 nlock &= ~MTX_EXCLUSIVE; 801 ++td->td_critcount; 802 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) { 803 KKASSERT(mtx->mtx_shlink != NULL); 804 for (;;) { 805 link = mtx->mtx_shlink; 806 atomic_add_int(&mtx->mtx_lock, addcount); 807 KKASSERT(link->state == MTX_LINK_LINKED_SH); 808 if (link->next == link) { 809 mtx->mtx_shlink = NULL; 810 cpu_sfence(); 811 812 /* 813 * WARNING! The callback can only be safely 814 * made with LINKSPIN still held 815 * and in a critical section. 816 * 817 * WARNING! The link can go away after the 818 * state is set, or after the 819 * callback. 820 */ 821 if (link->callback) { 822 link->state = MTX_LINK_CALLEDBACK; 823 link->callback(link, link->arg, 0); 824 } else { 825 link->state = MTX_LINK_ACQUIRED; 826 wakeup(link); 827 } 828 break; 829 } 830 mtx->mtx_shlink = link->next; 831 link->next->prev = link->prev; 832 link->prev->next = link->next; 833 cpu_sfence(); 834 link->state = MTX_LINK_ACQUIRED; 835 /* link can go away */ 836 wakeup(link); 837 addcount = 1; 838 } 839 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN | 840 MTX_SHWANTED); 841 --td->td_critcount; 842 return 1; 843 } 844 /* retry */ 845 --td->td_critcount; 846 return 0; 847 } 848 849 /* 850 * Delete a link structure after tsleep has failed. This code is not 851 * in the critical path as most exclusive waits are chained. 852 */ 853 static 854 void 855 mtx_delete_link(mtx_t *mtx, mtx_link_t *link) 856 { 857 thread_t td = curthread; 858 u_int lock; 859 u_int nlock; 860 861 /* 862 * Acquire MTX_LINKSPIN. 863 * 864 * Do not use cmpxchg to wait for LINKSPIN to clear as this might 865 * result in too much cpu cache traffic. 866 */ 867 ++td->td_critcount; 868 for (;;) { 869 lock = mtx->mtx_lock; 870 if (lock & MTX_LINKSPIN) { 871 cpu_pause(); 872 continue; 873 } 874 nlock = lock | MTX_LINKSPIN; 875 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 876 break; 877 cpu_pause(); 878 } 879 880 /* 881 * Delete the link and release LINKSPIN. 882 */ 883 nlock = MTX_LINKSPIN; /* to clear */ 884 885 switch(link->state) { 886 case MTX_LINK_LINKED_EX: 887 if (link->next == link) { 888 mtx->mtx_exlink = NULL; 889 nlock |= MTX_EXWANTED; /* to clear */ 890 } else { 891 mtx->mtx_exlink = link->next; 892 link->next->prev = link->prev; 893 link->prev->next = link->next; 894 } 895 break; 896 case MTX_LINK_LINKED_SH: 897 if (link->next == link) { 898 mtx->mtx_shlink = NULL; 899 nlock |= MTX_SHWANTED; /* to clear */ 900 } else { 901 mtx->mtx_shlink = link->next; 902 link->next->prev = link->prev; 903 link->prev->next = link->next; 904 } 905 break; 906 default: 907 /* no change */ 908 break; 909 } 910 atomic_clear_int(&mtx->mtx_lock, nlock); 911 --td->td_critcount; 912 } 913 914 /* 915 * Wait for async lock completion or abort. Returns ENOLCK if an abort 916 * occurred. 917 */ 918 int 919 mtx_wait_link(mtx_t *mtx, mtx_link_t *link, int flags, int to) 920 { 921 int error; 922 923 /* 924 * Sleep. Handle false wakeups, interruptions, etc. 925 * The link may also have been aborted. 926 */ 927 error = 0; 928 while (link->state & MTX_LINK_LINKED) { 929 tsleep_interlock(link, 0); 930 cpu_lfence(); 931 if (link->state & MTX_LINK_LINKED) { 932 if (link->state & MTX_LINK_LINKED_SH) 933 mycpu->gd_cnt.v_lock_name[0] = 'S'; 934 else 935 mycpu->gd_cnt.v_lock_name[0] = 'X'; 936 strncpy(mycpu->gd_cnt.v_lock_name + 1, 937 mtx->mtx_ident, 938 sizeof(mycpu->gd_cnt.v_lock_name) - 2); 939 ++mycpu->gd_cnt.v_lock_colls; 940 941 error = tsleep(link, flags | PINTERLOCKED, 942 mtx->mtx_ident, to); 943 if (error) 944 break; 945 } 946 } 947 948 /* 949 * We are done, make sure the link structure is unlinked. 950 * It may still be on the list due to e.g. EINTR or 951 * EWOULDBLOCK. 952 * 953 * It is possible for the tsleep to race an ABORT and cause 954 * error to be 0. 955 * 956 * The tsleep() can be woken up for numerous reasons and error 957 * might be zero in situations where we intend to return an error. 958 * 959 * (This is the synchronous case so state cannot be CALLEDBACK) 960 */ 961 switch(link->state) { 962 case MTX_LINK_ACQUIRED: 963 case MTX_LINK_CALLEDBACK: 964 error = 0; 965 break; 966 case MTX_LINK_ABORTED: 967 error = ENOLCK; 968 break; 969 case MTX_LINK_LINKED_EX: 970 case MTX_LINK_LINKED_SH: 971 mtx_delete_link(mtx, link); 972 /* fall through */ 973 default: 974 if (error == 0) 975 error = EWOULDBLOCK; 976 break; 977 } 978 979 /* 980 * Clear state on status returned. 981 */ 982 link->state = MTX_LINK_IDLE; 983 984 return error; 985 } 986 987 /* 988 * Abort a mutex locking operation, causing mtx_lock_ex_link() to 989 * return ENOLCK. This may be called at any time after the mtx_link 990 * is initialized or the status from a previous lock has been 991 * returned. If called prior to the next (non-try) lock attempt, the 992 * next lock attempt using this link structure will abort instantly. 993 * 994 * Caller must still wait for the operation to complete, either from a 995 * blocking call that is still in progress or by calling mtx_wait_link(). 996 * 997 * If an asynchronous lock request is possibly in-progress, the caller 998 * should call mtx_wait_link() synchronously. Note that the asynchronous 999 * lock callback will NOT be called if a successful abort occurred. XXX 1000 */ 1001 void 1002 mtx_abort_link(mtx_t *mtx, mtx_link_t *link) 1003 { 1004 thread_t td = curthread; 1005 u_int lock; 1006 u_int nlock; 1007 1008 /* 1009 * Acquire MTX_LINKSPIN 1010 */ 1011 ++td->td_critcount; 1012 for (;;) { 1013 lock = mtx->mtx_lock; 1014 if (lock & MTX_LINKSPIN) { 1015 cpu_pause(); 1016 continue; 1017 } 1018 nlock = lock | MTX_LINKSPIN; 1019 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 1020 break; 1021 cpu_pause(); 1022 } 1023 1024 /* 1025 * Do the abort. 1026 * 1027 * WARNING! Link structure can disappear once link->state is set. 1028 */ 1029 nlock = MTX_LINKSPIN; /* to clear */ 1030 1031 switch(link->state) { 1032 case MTX_LINK_IDLE: 1033 /* 1034 * Link not started yet 1035 */ 1036 link->state = MTX_LINK_ABORTED; 1037 break; 1038 case MTX_LINK_LINKED_EX: 1039 /* 1040 * de-link, mark aborted, and potentially wakeup the thread 1041 * or issue the callback. 1042 */ 1043 if (link->next == link) { 1044 if (mtx->mtx_exlink == link) { 1045 mtx->mtx_exlink = NULL; 1046 nlock |= MTX_EXWANTED; /* to clear */ 1047 } 1048 } else { 1049 if (mtx->mtx_exlink == link) 1050 mtx->mtx_exlink = link->next; 1051 link->next->prev = link->prev; 1052 link->prev->next = link->next; 1053 } 1054 1055 /* 1056 * When aborting the async callback is still made. We must 1057 * not set the link status to ABORTED in the callback case 1058 * since there is nothing else to clear its status if the 1059 * link is reused. 1060 */ 1061 if (link->callback) { 1062 link->state = MTX_LINK_CALLEDBACK; 1063 link->callback(link, link->arg, ENOLCK); 1064 } else { 1065 link->state = MTX_LINK_ABORTED; 1066 wakeup(link); 1067 } 1068 break; 1069 case MTX_LINK_LINKED_SH: 1070 /* 1071 * de-link, mark aborted, and potentially wakeup the thread 1072 * or issue the callback. 1073 */ 1074 if (link->next == link) { 1075 if (mtx->mtx_shlink == link) { 1076 mtx->mtx_shlink = NULL; 1077 nlock |= MTX_SHWANTED; /* to clear */ 1078 } 1079 } else { 1080 if (mtx->mtx_shlink == link) 1081 mtx->mtx_shlink = link->next; 1082 link->next->prev = link->prev; 1083 link->prev->next = link->next; 1084 } 1085 1086 /* 1087 * When aborting the async callback is still made. We must 1088 * not set the link status to ABORTED in the callback case 1089 * since there is nothing else to clear its status if the 1090 * link is reused. 1091 */ 1092 if (link->callback) { 1093 link->state = MTX_LINK_CALLEDBACK; 1094 link->callback(link, link->arg, ENOLCK); 1095 } else { 1096 link->state = MTX_LINK_ABORTED; 1097 wakeup(link); 1098 } 1099 break; 1100 case MTX_LINK_ACQUIRED: 1101 case MTX_LINK_CALLEDBACK: 1102 /* 1103 * Too late, the lock was acquired. Let it complete. 1104 */ 1105 break; 1106 default: 1107 /* 1108 * link already aborted, do nothing. 1109 */ 1110 break; 1111 } 1112 atomic_clear_int(&mtx->mtx_lock, nlock); 1113 --td->td_critcount; 1114 } 1115