1 /* 2 * Copyright (c) 2009 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * Implement fast persistent locks based on atomic_cmpset_int() with 36 * semantics similar to lockmgr locks but faster and taking up much less 37 * space. Taken from HAMMER's lock implementation. 38 * 39 * These are meant to complement our LWKT tokens. Tokens are only held 40 * while the thread is running. Mutexes can be held across blocking 41 * conditions. 42 * 43 * - Exclusive priority over shared to prevent SMP starvation. 44 * - locks can be aborted (async callback, if any, will be made w/ENOLCK). 45 * - locks can be asynchronous. 46 * - synchronous fast path if no blocking occurs (async callback is not 47 * made in this case). 48 * 49 * Generally speaking any caller-supplied link state must be properly 50 * initialized before use. 51 * 52 * Most of the support is in sys/mutex[2].h. We mostly provide backoff 53 * functions here. 54 */ 55 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/kernel.h> 59 #include <sys/sysctl.h> 60 #include <sys/thread.h> 61 62 #include <machine/cpufunc.h> 63 64 #include <sys/thread2.h> 65 #include <sys/mutex2.h> 66 67 static int64_t mtx_contention_count; 68 static int64_t mtx_collision_count; 69 static int64_t mtx_wakeup_count; 70 71 SYSCTL_QUAD(_kern, OID_AUTO, mtx_contention_count, CTLFLAG_RW, 72 &mtx_contention_count, 0, ""); 73 SYSCTL_QUAD(_kern, OID_AUTO, mtx_collision_count, CTLFLAG_RW, 74 &mtx_collision_count, 0, ""); 75 SYSCTL_QUAD(_kern, OID_AUTO, mtx_wakeup_count, CTLFLAG_RW, 76 &mtx_wakeup_count, 0, ""); 77 78 static int mtx_chain_link_ex(mtx_t *mtx, u_int olock); 79 static int mtx_chain_link_sh(mtx_t *mtx, u_int olock, int addcount); 80 static void mtx_delete_link(mtx_t *mtx, mtx_link_t *link); 81 82 /* 83 * Exclusive-lock a mutex, block until acquired unless link is async. 84 * Recursion is allowed. 85 * 86 * Returns 0 on success, the tsleep() return code on failure, EINPROGRESS 87 * if async. If immediately successful an async exclusive lock will return 0 88 * and not issue the async callback or link the link structure. The caller 89 * must handle this case (typically this is an optimal code path). 90 * 91 * A tsleep() error can only be returned if PCATCH is specified in the flags. 92 */ 93 static __inline int 94 __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to) 95 { 96 thread_t td; 97 u_int lock; 98 u_int nlock; 99 int error; 100 int isasync; 101 102 for (;;) { 103 lock = mtx->mtx_lock; 104 cpu_ccfence(); 105 106 if (lock == 0) { 107 nlock = MTX_EXCLUSIVE | 1; 108 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 109 mtx->mtx_owner = curthread; 110 link->state = MTX_LINK_ACQUIRED; 111 error = 0; 112 break; 113 } 114 continue; 115 } 116 if ((lock & MTX_EXCLUSIVE) && mtx->mtx_owner == curthread) { 117 KKASSERT((lock & MTX_MASK) != MTX_MASK); 118 nlock = lock + 1; 119 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 120 link->state = MTX_LINK_ACQUIRED; 121 error = 0; 122 break; 123 } 124 continue; 125 } 126 127 /* 128 * We need MTX_LINKSPIN to manipulate exlink or 129 * shlink. 130 * 131 * We must set MTX_EXWANTED with MTX_LINKSPIN to indicate 132 * pending shared requests. It cannot be set as a separate 133 * operation prior to acquiring MTX_LINKSPIN. 134 * 135 * To avoid unnecessary cpu cache traffic we poll 136 * for collisions. It is also possible that EXWANTED 137 * state failing the above test was spurious, so all the 138 * tests must be repeated if we cannot obtain LINKSPIN 139 * with the prior state tests intact (i.e. don't reload 140 * the (lock) variable here, for heaven's sake!). 141 */ 142 if (lock & MTX_LINKSPIN) { 143 cpu_pause(); 144 ++mtx_collision_count; 145 continue; 146 } 147 td = curthread; 148 nlock = lock | MTX_EXWANTED | MTX_LINKSPIN; 149 ++td->td_critcount; 150 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) { 151 --td->td_critcount; 152 continue; 153 } 154 155 /* 156 * Check for early abort. 157 */ 158 if (link->state == MTX_LINK_ABORTED) { 159 if (mtx->mtx_exlink == NULL) { 160 atomic_clear_int(&mtx->mtx_lock, 161 MTX_LINKSPIN | 162 MTX_EXWANTED); 163 } else { 164 atomic_clear_int(&mtx->mtx_lock, 165 MTX_LINKSPIN); 166 } 167 --td->td_critcount; 168 link->state = MTX_LINK_IDLE; 169 error = ENOLCK; 170 break; 171 } 172 173 /* 174 * Add our link to the exlink list and release LINKSPIN. 175 */ 176 link->owner = td; 177 link->state = MTX_LINK_LINKED_EX; 178 if (mtx->mtx_exlink) { 179 link->next = mtx->mtx_exlink; 180 link->prev = link->next->prev; 181 link->next->prev = link; 182 link->prev->next = link; 183 } else { 184 link->next = link; 185 link->prev = link; 186 mtx->mtx_exlink = link; 187 } 188 isasync = (link->callback != NULL); 189 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); 190 --td->td_critcount; 191 192 /* 193 * If asynchronous lock request return without 194 * blocking, leave link structure linked. 195 */ 196 if (isasync) { 197 error = EINPROGRESS; 198 break; 199 } 200 201 /* 202 * Wait for lock 203 */ 204 error = mtx_wait_link(mtx, link, flags, to); 205 break; 206 } 207 return (error); 208 } 209 210 int 211 _mtx_lock_ex_link(mtx_t *mtx, mtx_link_t *link, int flags, int to) 212 { 213 return(__mtx_lock_ex(mtx, link, flags, to)); 214 } 215 216 int 217 _mtx_lock_ex(mtx_t *mtx, int flags, int to) 218 { 219 mtx_link_t link; 220 221 mtx_link_init(&link); 222 return(__mtx_lock_ex(mtx, &link, flags, to)); 223 } 224 225 int 226 _mtx_lock_ex_quick(mtx_t *mtx) 227 { 228 mtx_link_t link; 229 230 mtx_link_init(&link); 231 return(__mtx_lock_ex(mtx, &link, 0, 0)); 232 } 233 234 /* 235 * Share-lock a mutex, block until acquired. Recursion is allowed. 236 * 237 * Returns 0 on success, or the tsleep() return code on failure. 238 * An error can only be returned if PCATCH is specified in the flags. 239 * 240 * NOTE: Shared locks get a mass-wakeup so if the tsleep fails we 241 * do not have to chain the wakeup(). 242 */ 243 static __inline int 244 __mtx_lock_sh(mtx_t *mtx, mtx_link_t *link, int flags, int to) 245 { 246 thread_t td; 247 u_int lock; 248 u_int nlock; 249 int error; 250 int isasync; 251 252 for (;;) { 253 lock = mtx->mtx_lock; 254 cpu_ccfence(); 255 256 if (lock == 0) { 257 nlock = 1; 258 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 259 error = 0; 260 link->state = MTX_LINK_ACQUIRED; 261 break; 262 } 263 continue; 264 } 265 if ((lock & (MTX_EXCLUSIVE | MTX_EXWANTED)) == 0) { 266 KKASSERT((lock & MTX_MASK) != MTX_MASK); 267 nlock = lock + 1; 268 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 269 error = 0; 270 link->state = MTX_LINK_ACQUIRED; 271 break; 272 } 273 continue; 274 } 275 276 /* 277 * We need MTX_LINKSPIN to manipulate exlink or 278 * shlink. 279 * 280 * We must set MTX_SHWANTED with MTX_LINKSPIN to indicate 281 * pending shared requests. It cannot be set as a separate 282 * operation prior to acquiring MTX_LINKSPIN. 283 * 284 * To avoid unnecessary cpu cache traffic we poll 285 * for collisions. It is also possible that EXWANTED 286 * state failing the above test was spurious, so all the 287 * tests must be repeated if we cannot obtain LINKSPIN 288 * with the prior state tests intact (i.e. don't reload 289 * the (lock) variable here, for heaven's sake!). 290 */ 291 if (lock & MTX_LINKSPIN) { 292 cpu_pause(); 293 ++mtx_collision_count; 294 continue; 295 } 296 td = curthread; 297 nlock = lock | MTX_SHWANTED | MTX_LINKSPIN; 298 ++td->td_critcount; 299 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) { 300 --td->td_critcount; 301 continue; 302 } 303 304 /* 305 * Check for early abort. 306 */ 307 if (link->state == MTX_LINK_ABORTED) { 308 if (mtx->mtx_exlink == NULL) { 309 atomic_clear_int(&mtx->mtx_lock, 310 MTX_LINKSPIN | 311 MTX_SHWANTED); 312 } else { 313 atomic_clear_int(&mtx->mtx_lock, 314 MTX_LINKSPIN); 315 } 316 --td->td_critcount; 317 link->state = MTX_LINK_IDLE; 318 error = ENOLCK; 319 break; 320 } 321 322 /* 323 * Add our link to the exlink list and release LINKSPIN. 324 */ 325 link->owner = td; 326 link->state = MTX_LINK_LINKED_SH; 327 if (mtx->mtx_shlink) { 328 link->next = mtx->mtx_shlink; 329 link->prev = link->next->prev; 330 link->next->prev = link; 331 link->prev->next = link; 332 } else { 333 link->next = link; 334 link->prev = link; 335 mtx->mtx_shlink = link; 336 } 337 isasync = (link->callback != NULL); 338 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); 339 --td->td_critcount; 340 341 /* 342 * If asynchronous lock request return without 343 * blocking, leave link structure linked. 344 */ 345 if (isasync) { 346 error = EINPROGRESS; 347 break; 348 } 349 350 /* 351 * Wait for lock 352 */ 353 error = mtx_wait_link(mtx, link, flags, to); 354 break; 355 } 356 return (error); 357 } 358 359 int 360 _mtx_lock_sh_link(mtx_t *mtx, mtx_link_t *link, int flags, int to) 361 { 362 return(__mtx_lock_sh(mtx, link, flags, to)); 363 } 364 365 int 366 _mtx_lock_sh(mtx_t *mtx, int flags, int to) 367 { 368 mtx_link_t link; 369 370 mtx_link_init(&link); 371 return(__mtx_lock_sh(mtx, &link, flags, to)); 372 } 373 374 int 375 _mtx_lock_sh_quick(mtx_t *mtx) 376 { 377 mtx_link_t link; 378 379 mtx_link_init(&link); 380 return(__mtx_lock_sh(mtx, &link, 0, 0)); 381 } 382 383 /* 384 * Get an exclusive spinlock the hard way. 385 */ 386 void 387 _mtx_spinlock(mtx_t *mtx) 388 { 389 u_int lock; 390 u_int nlock; 391 int bb = 1; 392 int bo; 393 394 for (;;) { 395 lock = mtx->mtx_lock; 396 if (lock == 0) { 397 nlock = MTX_EXCLUSIVE | 1; 398 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 399 mtx->mtx_owner = curthread; 400 break; 401 } 402 } else if ((lock & MTX_EXCLUSIVE) && 403 mtx->mtx_owner == curthread) { 404 KKASSERT((lock & MTX_MASK) != MTX_MASK); 405 nlock = lock + 1; 406 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 407 break; 408 } else { 409 /* MWAIT here */ 410 if (bb < 1000) 411 ++bb; 412 cpu_pause(); 413 for (bo = 0; bo < bb; ++bo) 414 ; 415 ++mtx_contention_count; 416 } 417 cpu_pause(); 418 ++mtx_collision_count; 419 } 420 } 421 422 /* 423 * Attempt to acquire a spinlock, if we fail we must undo the 424 * gd->gd_spinlocks/gd->gd_curthead->td_critcount predisposition. 425 * 426 * Returns 0 on success, EAGAIN on failure. 427 */ 428 int 429 _mtx_spinlock_try(mtx_t *mtx) 430 { 431 globaldata_t gd = mycpu; 432 u_int lock; 433 u_int nlock; 434 int res = 0; 435 436 for (;;) { 437 lock = mtx->mtx_lock; 438 if (lock == 0) { 439 nlock = MTX_EXCLUSIVE | 1; 440 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 441 mtx->mtx_owner = gd->gd_curthread; 442 break; 443 } 444 } else if ((lock & MTX_EXCLUSIVE) && 445 mtx->mtx_owner == gd->gd_curthread) { 446 KKASSERT((lock & MTX_MASK) != MTX_MASK); 447 nlock = lock + 1; 448 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 449 break; 450 } else { 451 --gd->gd_spinlocks; 452 cpu_ccfence(); 453 --gd->gd_curthread->td_critcount; 454 res = EAGAIN; 455 break; 456 } 457 cpu_pause(); 458 ++mtx_collision_count; 459 } 460 return res; 461 } 462 463 #if 0 464 465 void 466 _mtx_spinlock_sh(mtx_t *mtx) 467 { 468 u_int lock; 469 u_int nlock; 470 int bb = 1; 471 int bo; 472 473 for (;;) { 474 lock = mtx->mtx_lock; 475 if ((lock & MTX_EXCLUSIVE) == 0) { 476 KKASSERT((lock & MTX_MASK) != MTX_MASK); 477 nlock = lock + 1; 478 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 479 break; 480 } else { 481 /* MWAIT here */ 482 if (bb < 1000) 483 ++bb; 484 cpu_pause(); 485 for (bo = 0; bo < bb; ++bo) 486 ; 487 ++mtx_contention_count; 488 } 489 cpu_pause(); 490 ++mtx_collision_count; 491 } 492 } 493 494 #endif 495 496 int 497 _mtx_lock_ex_try(mtx_t *mtx) 498 { 499 u_int lock; 500 u_int nlock; 501 int error; 502 503 for (;;) { 504 lock = mtx->mtx_lock; 505 if (lock == 0) { 506 nlock = MTX_EXCLUSIVE | 1; 507 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 508 mtx->mtx_owner = curthread; 509 error = 0; 510 break; 511 } 512 } else if ((lock & MTX_EXCLUSIVE) && 513 mtx->mtx_owner == curthread) { 514 KKASSERT((lock & MTX_MASK) != MTX_MASK); 515 nlock = lock + 1; 516 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 517 error = 0; 518 break; 519 } 520 } else { 521 error = EAGAIN; 522 break; 523 } 524 cpu_pause(); 525 ++mtx_collision_count; 526 } 527 return (error); 528 } 529 530 int 531 _mtx_lock_sh_try(mtx_t *mtx) 532 { 533 u_int lock; 534 u_int nlock; 535 int error = 0; 536 537 for (;;) { 538 lock = mtx->mtx_lock; 539 if ((lock & MTX_EXCLUSIVE) == 0) { 540 KKASSERT((lock & MTX_MASK) != MTX_MASK); 541 nlock = lock + 1; 542 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 543 break; 544 } else { 545 error = EAGAIN; 546 break; 547 } 548 cpu_pause(); 549 ++mtx_collision_count; 550 } 551 return (error); 552 } 553 554 /* 555 * If the lock is held exclusively it must be owned by the caller. If the 556 * lock is already a shared lock this operation is a NOP. A panic will 557 * occur if the lock is not held either shared or exclusive. 558 * 559 * The exclusive count is converted to a shared count. 560 */ 561 void 562 _mtx_downgrade(mtx_t *mtx) 563 { 564 u_int lock; 565 u_int nlock; 566 567 for (;;) { 568 lock = mtx->mtx_lock; 569 cpu_ccfence(); 570 571 /* 572 * NOP if already shared. 573 */ 574 if ((lock & MTX_EXCLUSIVE) == 0) { 575 KKASSERT((lock & MTX_MASK) > 0); 576 break; 577 } 578 579 /* 580 * Transfer count to shared. Any additional pending shared 581 * waiters must be woken up. 582 */ 583 if (lock & MTX_SHWANTED) { 584 if (mtx_chain_link_sh(mtx, lock, 1)) 585 break; 586 /* retry */ 587 } else { 588 nlock = lock & ~MTX_EXCLUSIVE; 589 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 590 break; 591 /* retry */ 592 } 593 cpu_pause(); 594 ++mtx_collision_count; 595 } 596 } 597 598 /* 599 * Upgrade a shared lock to an exclusive lock. The upgrade will fail if 600 * the shared lock has a count other then 1. Optimize the most likely case 601 * but note that a single cmpset can fail due to WANTED races. 602 * 603 * If the lock is held exclusively it must be owned by the caller and 604 * this function will simply return without doing anything. A panic will 605 * occur if the lock is held exclusively by someone other then the caller. 606 * 607 * Returns 0 on success, EDEADLK on failure. 608 */ 609 int 610 _mtx_upgrade_try(mtx_t *mtx) 611 { 612 u_int lock; 613 u_int nlock; 614 int error = 0; 615 616 for (;;) { 617 lock = mtx->mtx_lock; 618 619 if ((lock & ~MTX_EXWANTED) == 1) { 620 nlock = lock | MTX_EXCLUSIVE; 621 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 622 mtx->mtx_owner = curthread; 623 break; 624 } 625 } else if (lock & MTX_EXCLUSIVE) { 626 KKASSERT(mtx->mtx_owner == curthread); 627 break; 628 } else { 629 error = EDEADLK; 630 break; 631 } 632 cpu_pause(); 633 ++mtx_collision_count; 634 } 635 return (error); 636 } 637 638 /* 639 * Unlock a lock. The caller must hold the lock either shared or exclusive. 640 * 641 * On the last release we handle any pending chains. 642 */ 643 void 644 _mtx_unlock(mtx_t *mtx) 645 { 646 u_int lock; 647 u_int nlock; 648 649 for (;;) { 650 lock = mtx->mtx_lock; 651 cpu_ccfence(); 652 653 switch(lock) { 654 case MTX_EXCLUSIVE | 1: 655 /* 656 * Last release, exclusive lock. 657 * No exclusive or shared requests pending. 658 */ 659 mtx->mtx_owner = NULL; 660 nlock = 0; 661 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 662 goto done; 663 break; 664 case MTX_EXCLUSIVE | MTX_EXWANTED | 1: 665 case MTX_EXCLUSIVE | MTX_EXWANTED | MTX_SHWANTED | 1: 666 /* 667 * Last release, exclusive lock. 668 * Exclusive requests pending. 669 * Exclusive requests have priority over shared reqs. 670 */ 671 if (mtx_chain_link_ex(mtx, lock)) 672 goto done; 673 break; 674 case MTX_EXCLUSIVE | MTX_SHWANTED | 1: 675 /* 676 * Last release, exclusive lock. 677 * 678 * Shared requests are pending. Transfer our count (1) 679 * to the first shared request, wakeup all shared reqs. 680 */ 681 if (mtx_chain_link_sh(mtx, lock, 0)) 682 goto done; 683 break; 684 case 1: 685 /* 686 * Last release, shared lock. 687 * No exclusive or shared requests pending. 688 */ 689 nlock = 0; 690 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 691 goto done; 692 break; 693 case MTX_EXWANTED | 1: 694 case MTX_EXWANTED | MTX_SHWANTED | 1: 695 /* 696 * Last release, shared lock. 697 * 698 * Exclusive requests are pending. Transfer our 699 * count (1) to the next exclusive request. 700 * 701 * Exclusive requests have priority over shared reqs. 702 */ 703 if (mtx_chain_link_ex(mtx, lock)) 704 goto done; 705 break; 706 case MTX_SHWANTED | 1: 707 /* 708 * Last release, shared lock. 709 * Shared requests pending. 710 */ 711 if (mtx_chain_link_sh(mtx, lock, 0)) 712 goto done; 713 break; 714 default: 715 /* 716 * We have to loop if this is the last release but 717 * someone is fiddling with LINKSPIN. 718 */ 719 if ((lock & MTX_MASK) == 1) { 720 KKASSERT(lock & MTX_LINKSPIN); 721 break; 722 } 723 724 /* 725 * Not the last release (shared or exclusive) 726 */ 727 nlock = lock - 1; 728 KKASSERT((nlock & MTX_MASK) != MTX_MASK); 729 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 730 goto done; 731 break; 732 } 733 /* loop try again */ 734 cpu_pause(); 735 ++mtx_collision_count; 736 } 737 done: 738 ; 739 } 740 741 /* 742 * Chain pending links. Called on the last release of an exclusive or 743 * shared lock when the appropriate WANTED bit is set. mtx_lock old state 744 * is passed in with the count left at 1, which we can inherit, and other 745 * bits which we must adjust in a single atomic operation. 746 * 747 * Return non-zero on success, 0 if caller needs to retry. 748 * 749 * NOTE: It's ok if MTX_EXWANTED is in an indeterminant state while we are 750 * acquiring LINKSPIN as all other cases will also need to acquire 751 * LINKSPIN when handling the EXWANTED case. 752 */ 753 static int 754 mtx_chain_link_ex(mtx_t *mtx, u_int olock) 755 { 756 thread_t td = curthread; 757 mtx_link_t *link; 758 u_int nlock; 759 760 olock &= ~MTX_LINKSPIN; 761 nlock = olock | MTX_LINKSPIN | MTX_EXCLUSIVE; 762 ++td->td_critcount; 763 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) { 764 link = mtx->mtx_exlink; 765 KKASSERT(link != NULL); 766 if (link->next == link) { 767 mtx->mtx_exlink = NULL; 768 nlock = MTX_LINKSPIN | MTX_EXWANTED; /* to clear */ 769 } else { 770 mtx->mtx_exlink = link->next; 771 link->next->prev = link->prev; 772 link->prev->next = link->next; 773 nlock = MTX_LINKSPIN; /* to clear */ 774 } 775 KKASSERT(link->state == MTX_LINK_LINKED_EX); 776 mtx->mtx_owner = link->owner; 777 cpu_sfence(); 778 779 /* 780 * WARNING! The callback can only be safely 781 * made with LINKSPIN still held 782 * and in a critical section. 783 * 784 * WARNING! The link can go away after the 785 * state is set, or after the 786 * callback. 787 */ 788 if (link->callback) { 789 link->state = MTX_LINK_CALLEDBACK; 790 link->callback(link, link->arg, 0); 791 } else { 792 link->state = MTX_LINK_ACQUIRED; 793 wakeup(link); 794 } 795 atomic_clear_int(&mtx->mtx_lock, nlock); 796 --td->td_critcount; 797 ++mtx_wakeup_count; 798 return 1; 799 } 800 /* retry */ 801 --td->td_critcount; 802 return 0; 803 } 804 805 /* 806 * Flush waiting shared locks. The lock's prior state is passed in and must 807 * be adjusted atomically only if it matches. 808 * 809 * If addcount is 0, the count for the first shared lock in the chain is 810 * assumed to have already been accounted for. 811 * 812 * If addcount is 1, the count for the first shared lock in the chain has 813 * not yet been accounted for. 814 */ 815 static int 816 mtx_chain_link_sh(mtx_t *mtx, u_int olock, int addcount) 817 { 818 thread_t td = curthread; 819 mtx_link_t *link; 820 u_int nlock; 821 822 olock &= ~MTX_LINKSPIN; 823 nlock = olock | MTX_LINKSPIN; 824 nlock &= ~MTX_EXCLUSIVE; 825 ++td->td_critcount; 826 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) { 827 KKASSERT(mtx->mtx_shlink != NULL); 828 for (;;) { 829 link = mtx->mtx_shlink; 830 atomic_add_int(&mtx->mtx_lock, addcount); 831 KKASSERT(link->state == MTX_LINK_LINKED_SH); 832 if (link->next == link) { 833 mtx->mtx_shlink = NULL; 834 cpu_sfence(); 835 836 /* 837 * WARNING! The callback can only be safely 838 * made with LINKSPIN still held 839 * and in a critical section. 840 * 841 * WARNING! The link can go away after the 842 * state is set, or after the 843 * callback. 844 */ 845 if (link->callback) { 846 link->state = MTX_LINK_CALLEDBACK; 847 link->callback(link, link->arg, 0); 848 } else { 849 link->state = MTX_LINK_ACQUIRED; 850 wakeup(link); 851 } 852 ++mtx_wakeup_count; 853 break; 854 } 855 mtx->mtx_shlink = link->next; 856 link->next->prev = link->prev; 857 link->prev->next = link->next; 858 cpu_sfence(); 859 link->state = MTX_LINK_ACQUIRED; 860 /* link can go away */ 861 wakeup(link); 862 ++mtx_wakeup_count; 863 addcount = 1; 864 } 865 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN | 866 MTX_SHWANTED); 867 --td->td_critcount; 868 return 1; 869 } 870 /* retry */ 871 --td->td_critcount; 872 return 0; 873 } 874 875 /* 876 * Delete a link structure after tsleep has failed. This code is not 877 * in the critical path as most exclusive waits are chained. 878 */ 879 static 880 void 881 mtx_delete_link(mtx_t *mtx, mtx_link_t *link) 882 { 883 thread_t td = curthread; 884 u_int lock; 885 u_int nlock; 886 887 /* 888 * Acquire MTX_LINKSPIN. 889 * 890 * Do not use cmpxchg to wait for LINKSPIN to clear as this might 891 * result in too much cpu cache traffic. 892 */ 893 ++td->td_critcount; 894 for (;;) { 895 lock = mtx->mtx_lock; 896 if (lock & MTX_LINKSPIN) { 897 cpu_pause(); 898 ++mtx_collision_count; 899 continue; 900 } 901 nlock = lock | MTX_LINKSPIN; 902 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 903 break; 904 cpu_pause(); 905 ++mtx_collision_count; 906 } 907 908 /* 909 * Delete the link and release LINKSPIN. 910 */ 911 nlock = MTX_LINKSPIN; /* to clear */ 912 913 switch(link->state) { 914 case MTX_LINK_LINKED_EX: 915 if (link->next == link) { 916 mtx->mtx_exlink = NULL; 917 nlock |= MTX_EXWANTED; /* to clear */ 918 } else { 919 mtx->mtx_exlink = link->next; 920 link->next->prev = link->prev; 921 link->prev->next = link->next; 922 } 923 break; 924 case MTX_LINK_LINKED_SH: 925 if (link->next == link) { 926 mtx->mtx_shlink = NULL; 927 nlock |= MTX_SHWANTED; /* to clear */ 928 } else { 929 mtx->mtx_shlink = link->next; 930 link->next->prev = link->prev; 931 link->prev->next = link->next; 932 } 933 break; 934 default: 935 /* no change */ 936 break; 937 } 938 atomic_clear_int(&mtx->mtx_lock, nlock); 939 --td->td_critcount; 940 } 941 942 /* 943 * Wait for async lock completion or abort. Returns ENOLCK if an abort 944 * occurred. 945 */ 946 int 947 mtx_wait_link(mtx_t *mtx, mtx_link_t *link, int flags, int to) 948 { 949 int error; 950 951 /* 952 * Sleep. Handle false wakeups, interruptions, etc. 953 * The link may also have been aborted. 954 */ 955 error = 0; 956 while (link->state & MTX_LINK_LINKED) { 957 tsleep_interlock(link, 0); 958 cpu_lfence(); 959 if (link->state & MTX_LINK_LINKED) { 960 ++mtx_contention_count; 961 if (link->state & MTX_LINK_LINKED_SH) 962 mycpu->gd_cnt.v_lock_name[0] = 'S'; 963 else 964 mycpu->gd_cnt.v_lock_name[0] = 'X'; 965 strncpy(mycpu->gd_cnt.v_lock_name + 1, 966 mtx->mtx_ident, 967 sizeof(mycpu->gd_cnt.v_lock_name) - 2); 968 ++mycpu->gd_cnt.v_lock_colls; 969 970 error = tsleep(link, flags | PINTERLOCKED, 971 mtx->mtx_ident, to); 972 if (error) 973 break; 974 } 975 } 976 977 /* 978 * We are done, make sure the link structure is unlinked. 979 * It may still be on the list due to e.g. EINTR or 980 * EWOULDBLOCK. 981 * 982 * It is possible for the tsleep to race an ABORT and cause 983 * error to be 0. 984 * 985 * The tsleep() can be woken up for numerous reasons and error 986 * might be zero in situations where we intend to return an error. 987 * 988 * (This is the synchronous case so state cannot be CALLEDBACK) 989 */ 990 switch(link->state) { 991 case MTX_LINK_ACQUIRED: 992 case MTX_LINK_CALLEDBACK: 993 error = 0; 994 break; 995 case MTX_LINK_ABORTED: 996 error = ENOLCK; 997 break; 998 case MTX_LINK_LINKED_EX: 999 case MTX_LINK_LINKED_SH: 1000 mtx_delete_link(mtx, link); 1001 /* fall through */ 1002 default: 1003 if (error == 0) 1004 error = EWOULDBLOCK; 1005 break; 1006 } 1007 1008 /* 1009 * Clear state on status returned. 1010 */ 1011 link->state = MTX_LINK_IDLE; 1012 1013 return error; 1014 } 1015 1016 /* 1017 * Abort a mutex locking operation, causing mtx_lock_ex_link() to 1018 * return ENOLCK. This may be called at any time after the mtx_link 1019 * is initialized or the status from a previous lock has been 1020 * returned. If called prior to the next (non-try) lock attempt, the 1021 * next lock attempt using this link structure will abort instantly. 1022 * 1023 * Caller must still wait for the operation to complete, either from a 1024 * blocking call that is still in progress or by calling mtx_wait_link(). 1025 * 1026 * If an asynchronous lock request is possibly in-progress, the caller 1027 * should call mtx_wait_link() synchronously. Note that the asynchronous 1028 * lock callback will NOT be called if a successful abort occurred. XXX 1029 */ 1030 void 1031 mtx_abort_link(mtx_t *mtx, mtx_link_t *link) 1032 { 1033 thread_t td = curthread; 1034 u_int lock; 1035 u_int nlock; 1036 1037 /* 1038 * Acquire MTX_LINKSPIN 1039 */ 1040 ++td->td_critcount; 1041 for (;;) { 1042 lock = mtx->mtx_lock; 1043 if (lock & MTX_LINKSPIN) { 1044 cpu_pause(); 1045 ++mtx_collision_count; 1046 continue; 1047 } 1048 nlock = lock | MTX_LINKSPIN; 1049 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 1050 break; 1051 cpu_pause(); 1052 ++mtx_collision_count; 1053 } 1054 1055 /* 1056 * Do the abort. 1057 * 1058 * WARNING! Link structure can disappear once link->state is set. 1059 */ 1060 nlock = MTX_LINKSPIN; /* to clear */ 1061 1062 switch(link->state) { 1063 case MTX_LINK_IDLE: 1064 /* 1065 * Link not started yet 1066 */ 1067 link->state = MTX_LINK_ABORTED; 1068 break; 1069 case MTX_LINK_LINKED_EX: 1070 /* 1071 * de-link, mark aborted, and potentially wakeup the thread 1072 * or issue the callback. 1073 */ 1074 if (link->next == link) { 1075 if (mtx->mtx_exlink == link) { 1076 mtx->mtx_exlink = NULL; 1077 nlock |= MTX_EXWANTED; /* to clear */ 1078 } 1079 } else { 1080 if (mtx->mtx_exlink == link) 1081 mtx->mtx_exlink = link->next; 1082 link->next->prev = link->prev; 1083 link->prev->next = link->next; 1084 } 1085 1086 /* 1087 * When aborting the async callback is still made. We must 1088 * not set the link status to ABORTED in the callback case 1089 * since there is nothing else to clear its status if the 1090 * link is reused. 1091 */ 1092 if (link->callback) { 1093 link->state = MTX_LINK_CALLEDBACK; 1094 link->callback(link, link->arg, ENOLCK); 1095 } else { 1096 link->state = MTX_LINK_ABORTED; 1097 wakeup(link); 1098 } 1099 ++mtx_wakeup_count; 1100 break; 1101 case MTX_LINK_LINKED_SH: 1102 /* 1103 * de-link, mark aborted, and potentially wakeup the thread 1104 * or issue the callback. 1105 */ 1106 if (link->next == link) { 1107 if (mtx->mtx_shlink == link) { 1108 mtx->mtx_shlink = NULL; 1109 nlock |= MTX_SHWANTED; /* to clear */ 1110 } 1111 } else { 1112 if (mtx->mtx_shlink == link) 1113 mtx->mtx_shlink = link->next; 1114 link->next->prev = link->prev; 1115 link->prev->next = link->next; 1116 } 1117 1118 /* 1119 * When aborting the async callback is still made. We must 1120 * not set the link status to ABORTED in the callback case 1121 * since there is nothing else to clear its status if the 1122 * link is reused. 1123 */ 1124 if (link->callback) { 1125 link->state = MTX_LINK_CALLEDBACK; 1126 link->callback(link, link->arg, ENOLCK); 1127 } else { 1128 link->state = MTX_LINK_ABORTED; 1129 wakeup(link); 1130 } 1131 ++mtx_wakeup_count; 1132 break; 1133 case MTX_LINK_ACQUIRED: 1134 case MTX_LINK_CALLEDBACK: 1135 /* 1136 * Too late, the lock was acquired. Let it complete. 1137 */ 1138 break; 1139 default: 1140 /* 1141 * link already aborted, do nothing. 1142 */ 1143 break; 1144 } 1145 atomic_clear_int(&mtx->mtx_lock, nlock); 1146 --td->td_critcount; 1147 } 1148