1 /* 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.35 2008/10/15 22:38:37 dillon Exp $ 35 */ 36 /* 37 * HAMMER structural locking 38 */ 39 40 #include "hammer.h" 41 #include <sys/dirent.h> 42 43 void 44 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident) 45 { 46 thread_t td = curthread; 47 u_int lv; 48 u_int nlv; 49 50 KKASSERT(lock->refs); 51 for (;;) { 52 lv = lock->lockval; 53 54 if (lv == 0) { 55 nlv = 1 | HAMMER_LOCKF_EXCLUSIVE; 56 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 57 lock->lowner = td; 58 break; 59 } 60 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) && 61 lock->lowner == td) { 62 nlv = (lv + 1); 63 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) 64 break; 65 } else { 66 if (hammer_debug_locks) { 67 kprintf("hammer_lock_ex: held by %p\n", 68 lock->lowner); 69 } 70 nlv = lv | HAMMER_LOCKF_WANTED; 71 ++hammer_contention_count; 72 tsleep_interlock(&lock->lockval, 0); 73 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 74 tsleep(&lock->lockval, PINTERLOCKED, ident, 0); 75 if (hammer_debug_locks) 76 kprintf("hammer_lock_ex: try again\n"); 77 } 78 } 79 } 80 } 81 82 /* 83 * Try to obtain an exclusive lock 84 */ 85 int 86 hammer_lock_ex_try(struct hammer_lock *lock) 87 { 88 thread_t td = curthread; 89 int error; 90 u_int lv; 91 u_int nlv; 92 93 KKASSERT(lock->refs); 94 for (;;) { 95 lv = lock->lockval; 96 97 if (lv == 0) { 98 nlv = 1 | HAMMER_LOCKF_EXCLUSIVE; 99 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 100 lock->lowner = td; 101 error = 0; 102 break; 103 } 104 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) && 105 lock->lowner == td) { 106 nlv = (lv + 1); 107 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 108 error = 0; 109 break; 110 } 111 } else { 112 error = EAGAIN; 113 break; 114 } 115 } 116 return (error); 117 } 118 119 /* 120 * Obtain a shared lock 121 * 122 * We do not give pending exclusive locks priority over shared locks as 123 * doing so could lead to a deadlock. 124 */ 125 void 126 hammer_lock_sh(struct hammer_lock *lock) 127 { 128 thread_t td = curthread; 129 u_int lv; 130 u_int nlv; 131 const char *ident = "hmrlck"; 132 133 KKASSERT(lock->refs); 134 for (;;) { 135 lv = lock->lockval; 136 137 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) { 138 nlv = (lv + 1); 139 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) 140 break; 141 } else if (lock->lowner == td) { 142 /* 143 * Disallowed case, drop into kernel debugger for 144 * now. A cont continues w/ an exclusive lock. 145 */ 146 nlv = (lv + 1); 147 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 148 if (hammer_debug_critical) 149 Debugger("hammer_lock_sh: holding ex"); 150 break; 151 } 152 } else { 153 nlv = lv | HAMMER_LOCKF_WANTED; 154 ++hammer_contention_count; 155 tsleep_interlock(&lock->lockval, 0); 156 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) 157 tsleep(&lock->lockval, PINTERLOCKED, ident, 0); 158 } 159 } 160 } 161 162 int 163 hammer_lock_sh_try(struct hammer_lock *lock) 164 { 165 thread_t td = curthread; 166 u_int lv; 167 u_int nlv; 168 int error; 169 170 KKASSERT(lock->refs); 171 for (;;) { 172 lv = lock->lockval; 173 174 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) { 175 nlv = (lv + 1); 176 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 177 error = 0; 178 break; 179 } 180 } else if (lock->lowner == td) { 181 /* 182 * Disallowed case, drop into kernel debugger for 183 * now. A cont continues w/ an exclusive lock. 184 */ 185 nlv = (lv + 1); 186 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 187 if (hammer_debug_critical) 188 Debugger("hammer_lock_sh: holding ex"); 189 error = 0; 190 break; 191 } 192 } else { 193 error = EAGAIN; 194 break; 195 } 196 } 197 return (error); 198 } 199 200 /* 201 * Upgrade a shared lock to an exclusively held lock. This function will 202 * return EDEADLK If there is more then one shared holder. 203 * 204 * No error occurs and no action is taken if the lock is already exclusively 205 * held by the caller. If the lock is not held at all or held exclusively 206 * by someone else, this function will panic. 207 */ 208 int 209 hammer_lock_upgrade(struct hammer_lock *lock) 210 { 211 thread_t td = curthread; 212 u_int lv; 213 u_int nlv; 214 int error; 215 216 for (;;) { 217 lv = lock->lockval; 218 219 if ((lv & ~HAMMER_LOCKF_WANTED) == 1) { 220 nlv = lv | HAMMER_LOCKF_EXCLUSIVE; 221 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 222 lock->lowner = td; 223 error = 0; 224 break; 225 } 226 } else if (lv & HAMMER_LOCKF_EXCLUSIVE) { 227 if (lock->lowner != curthread) 228 panic("hammer_lock_upgrade: illegal state"); 229 error = 0; 230 break; 231 } else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) { 232 panic("hammer_lock_upgrade: lock is not held"); 233 /* NOT REACHED */ 234 error = EDEADLK; 235 break; 236 } else { 237 error = EDEADLK; 238 break; 239 } 240 } 241 return (error); 242 } 243 244 /* 245 * Downgrade an exclusively held lock to a shared lock. 246 */ 247 void 248 hammer_lock_downgrade(struct hammer_lock *lock) 249 { 250 thread_t td __debugvar = curthread; 251 u_int lv; 252 u_int nlv; 253 254 KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) == 255 (HAMMER_LOCKF_EXCLUSIVE | 1)); 256 KKASSERT(lock->lowner == td); 257 258 /* 259 * NOTE: Must clear owner before releasing exclusivity 260 */ 261 lock->lowner = NULL; 262 263 for (;;) { 264 lv = lock->lockval; 265 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED); 266 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 267 if (lv & HAMMER_LOCKF_WANTED) 268 wakeup(&lock->lockval); 269 break; 270 } 271 } 272 } 273 274 void 275 hammer_unlock(struct hammer_lock *lock) 276 { 277 thread_t td __debugvar = curthread; 278 u_int lv; 279 u_int nlv; 280 281 lv = lock->lockval; 282 KKASSERT(lv != 0); 283 if (lv & HAMMER_LOCKF_EXCLUSIVE) 284 KKASSERT(lock->lowner == td); 285 286 for (;;) { 287 lv = lock->lockval; 288 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED); 289 if (nlv > 1) { 290 nlv = lv - 1; 291 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) 292 break; 293 } else if (nlv == 1) { 294 nlv = 0; 295 if (lv & HAMMER_LOCKF_EXCLUSIVE) 296 lock->lowner = NULL; 297 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 298 if (lv & HAMMER_LOCKF_WANTED) 299 wakeup(&lock->lockval); 300 break; 301 } 302 } else { 303 panic("hammer_unlock: lock %p is not held", lock); 304 } 305 } 306 } 307 308 /* 309 * The calling thread must be holding a shared or exclusive lock. 310 * Returns < 0 if lock is held shared, and > 0 if held exlusively. 311 */ 312 int 313 hammer_lock_status(struct hammer_lock *lock) 314 { 315 u_int lv = lock->lockval; 316 317 if (lv & HAMMER_LOCKF_EXCLUSIVE) 318 return(1); 319 else if (lv) 320 return(-1); 321 panic("hammer_lock_status: lock must be held: %p", lock); 322 } 323 324 /* 325 * Bump the ref count for a lock (not the excl/share count, but a separate 326 * structural reference count). The CHECK flag will be set on a 0->1 327 * transition. 328 * 329 * This function does nothing to serialize races between multple threads. 330 * The caller can interlock it later on to deal with serialization. 331 * 332 * MPSAFE 333 */ 334 void 335 hammer_ref(struct hammer_lock *lock) 336 { 337 u_int lv; 338 u_int nlv; 339 340 for (;;) { 341 lv = lock->refs; 342 if ((lv & ~HAMMER_REFS_FLAGS) == 0) { 343 nlv = (lv + 1) | HAMMER_REFS_CHECK; 344 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 345 return; 346 } else { 347 nlv = (lv + 1); 348 KKASSERT((int)nlv > 0); 349 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 350 return; 351 } 352 } 353 /* not reached */ 354 } 355 356 /* 357 * Drop the ref count for a lock (not the excl/share count, but a separate 358 * structural reference count). The CHECK flag will be cleared on a 1->0 359 * transition. 360 * 361 * This function does nothing to serialize races between multple threads. 362 * 363 * MPSAFE 364 */ 365 void 366 hammer_rel(struct hammer_lock *lock) 367 { 368 u_int lv; 369 u_int nlv; 370 371 for (;;) { 372 lv = lock->refs; 373 if ((lv & ~HAMMER_REFS_FLAGS) == 1) { 374 nlv = (lv - 1) & ~HAMMER_REFS_CHECK; 375 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 376 return; 377 } else { 378 KKASSERT((int)lv > 0); 379 nlv = (lv - 1); 380 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 381 return; 382 } 383 } 384 /* not reached */ 385 } 386 387 /* 388 * The hammer_*_interlock() and hammer_*_interlock_done() functions are 389 * more sophisticated versions which handle MP transition races and block 390 * when necessary. 391 * 392 * hammer_ref_interlock() bumps the ref-count and conditionally acquires 393 * the interlock for 0->1 transitions or if the CHECK is found to be set. 394 * 395 * This case will return TRUE, the interlock will be held, and the CHECK 396 * bit also set. Other threads attempting to ref will see the CHECK bit 397 * and block until we clean up. 398 * 399 * FALSE is returned for transitions other than 0->1 when the CHECK bit 400 * is not found to be set, or if the function loses the race with another 401 * thread. 402 * 403 * TRUE is only returned to one thread and the others will block. 404 * Effectively a TRUE indicator means 'someone transitioned 0->1 405 * and you are the first guy to successfully lock it after that, so you 406 * need to check'. Due to races the ref-count may be greater than 1 upon 407 * return. 408 * 409 * MPSAFE 410 */ 411 int 412 hammer_ref_interlock(struct hammer_lock *lock) 413 { 414 u_int lv; 415 u_int nlv; 416 417 /* 418 * Integrated reference count bump, lock, and check, with hot-path. 419 * 420 * (a) Return 1 (+LOCKED, +CHECK) 0->1 transition 421 * (b) Return 0 (-LOCKED, -CHECK) N->N+1 transition 422 * (c) Break out (+CHECK) Check condition and Cannot lock 423 * (d) Return 1 (+LOCKED, +CHECK) Successfully locked 424 */ 425 for (;;) { 426 lv = lock->refs; 427 if (lv == 0) { 428 nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK; 429 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 430 lock->rowner = curthread; 431 return(1); 432 } 433 } else { 434 nlv = (lv + 1); 435 if ((lv & ~HAMMER_REFS_FLAGS) == 0) 436 nlv |= HAMMER_REFS_CHECK; 437 if ((nlv & HAMMER_REFS_CHECK) == 0) { 438 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 439 return(0); 440 } else if (lv & HAMMER_REFS_LOCKED) { 441 /* CHECK also set here */ 442 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 443 break; 444 } else { 445 /* CHECK also set here */ 446 nlv |= HAMMER_REFS_LOCKED; 447 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 448 lock->rowner = curthread; 449 return(1); 450 } 451 } 452 } 453 } 454 455 /* 456 * Defered check condition because we were unable to acquire the 457 * lock. We must block until the check condition is cleared due 458 * to a race with another thread, or we are able to acquire the 459 * lock. 460 * 461 * (a) Return 0 (-CHECK) Another thread handled it 462 * (b) Return 1 (+LOCKED, +CHECK) We handled it. 463 */ 464 for (;;) { 465 lv = lock->refs; 466 if ((lv & HAMMER_REFS_CHECK) == 0) 467 return(0); 468 if (lv & HAMMER_REFS_LOCKED) { 469 tsleep_interlock(&lock->refs, 0); 470 nlv = (lv | HAMMER_REFS_WANTED); 471 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 472 tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0); 473 } else { 474 /* CHECK also set here */ 475 nlv = lv | HAMMER_REFS_LOCKED; 476 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 477 lock->rowner = curthread; 478 return(1); 479 } 480 } 481 } 482 /* not reached */ 483 } 484 485 /* 486 * This is the same as hammer_ref_interlock() but asserts that the 487 * 0->1 transition is always true, thus the lock must have no references 488 * on entry or have CHECK set, and will have one reference with the 489 * interlock held on return. It must also not be interlocked on entry 490 * by anyone. 491 * 492 * NOTE that CHECK will never be found set when the ref-count is 0. 493 * 494 * TRUE is always returned to match the API for hammer_ref_interlock(). 495 * This function returns with one ref, the lock held, and the CHECK bit set. 496 */ 497 int 498 hammer_ref_interlock_true(struct hammer_lock *lock) 499 { 500 u_int lv; 501 u_int nlv; 502 503 for (;;) { 504 lv = lock->refs; 505 506 if (lv) { 507 panic("hammer_ref_interlock_true: bad lock %p %08x\n", 508 lock, lock->refs); 509 } 510 nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK; 511 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 512 lock->rowner = curthread; 513 return (1); 514 } 515 } 516 } 517 518 /* 519 * Unlock the interlock acquired by hammer_ref_interlock() and clear the 520 * CHECK flag. The ref-count remains unchanged. 521 * 522 * This routine is called in the load path when the load succeeds. 523 */ 524 void 525 hammer_ref_interlock_done(struct hammer_lock *lock) 526 { 527 u_int lv; 528 u_int nlv; 529 530 for (;;) { 531 lv = lock->refs; 532 nlv = lv & ~HAMMER_REFS_FLAGS; 533 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 534 if (lv & HAMMER_REFS_WANTED) 535 wakeup(&lock->refs); 536 break; 537 } 538 } 539 } 540 541 /* 542 * hammer_rel_interlock() works a bit differently in that it must 543 * acquire the lock in tandem with a 1->0 transition. CHECK is 544 * not used. 545 * 546 * TRUE is returned on 1->0 transitions with the lock held on return 547 * and FALSE is returned otherwise with the lock not held. 548 * 549 * It is important to note that the refs are not stable and may 550 * increase while we hold the lock, the TRUE indication only means 551 * that we transitioned 1->0, not necessarily that we stayed at 0. 552 * 553 * Another thread bumping refs while we hold the lock will set CHECK, 554 * causing one of the competing hammer_ref_interlock() calls to 555 * return TRUE after we release our lock. 556 * 557 * MPSAFE 558 */ 559 int 560 hammer_rel_interlock(struct hammer_lock *lock, int locked) 561 { 562 u_int lv; 563 u_int nlv; 564 565 /* 566 * In locked mode (failure/unload path) we release the 567 * ref-count but leave it locked. 568 */ 569 if (locked) { 570 hammer_rel(lock); 571 return(1); 572 } 573 574 /* 575 * Integrated reference count drop with LOCKED, plus the hot-path 576 * returns. 577 */ 578 for (;;) { 579 lv = lock->refs; 580 581 if (lv == 1) { 582 nlv = 0 | HAMMER_REFS_LOCKED; 583 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 584 lock->rowner = curthread; 585 return(1); 586 } 587 } else if ((lv & ~HAMMER_REFS_FLAGS) == 1) { 588 if ((lv & HAMMER_REFS_LOCKED) == 0) { 589 nlv = (lv - 1) | HAMMER_REFS_LOCKED; 590 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 591 lock->rowner = curthread; 592 return(1); 593 } 594 } else { 595 nlv = lv | HAMMER_REFS_WANTED; 596 tsleep_interlock(&lock->refs, 0); 597 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 598 tsleep(&lock->refs, PINTERLOCKED, 599 "h0lk", 0); 600 } 601 } 602 } else { 603 nlv = (lv - 1); 604 KKASSERT((int)nlv >= 0); 605 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 606 return(0); 607 } 608 } 609 /* not reached */ 610 } 611 612 /* 613 * Unlock the interlock acquired by hammer_rel_interlock(). 614 * 615 * If orig_locked is non-zero the interlock was originally held prior to 616 * the hammer_rel_interlock() call and passed through to us. In this 617 * case we want to retain the CHECK error state if not transitioning 618 * to 0. 619 * 620 * The code is the same either way so we do not have to conditionalize 621 * on orig_locked. 622 */ 623 void 624 hammer_rel_interlock_done(struct hammer_lock *lock, int orig_locked __unused) 625 { 626 u_int lv; 627 u_int nlv; 628 629 for (;;) { 630 lv = lock->refs; 631 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED); 632 if ((lv & ~HAMMER_REFS_FLAGS) == 0) 633 nlv &= ~HAMMER_REFS_CHECK; 634 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 635 if (lv & HAMMER_REFS_WANTED) 636 wakeup(&lock->refs); 637 break; 638 } 639 } 640 } 641 642 /* 643 * Acquire the interlock on lock->refs. 644 * 645 * Return TRUE if CHECK is currently set. Note that CHECK will not 646 * be set if the reference count is 0, but can get set if this function 647 * is preceeded by, say, hammer_ref(), or through races with other 648 * threads. The return value allows the caller to use the same logic 649 * as hammer_ref_interlock(). 650 * 651 * MPSAFE 652 */ 653 int 654 hammer_get_interlock(struct hammer_lock *lock) 655 { 656 u_int lv; 657 u_int nlv; 658 659 for (;;) { 660 lv = lock->refs; 661 if (lv & HAMMER_REFS_LOCKED) { 662 nlv = lv | HAMMER_REFS_WANTED; 663 tsleep_interlock(&lock->refs, 0); 664 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 665 tsleep(&lock->refs, PINTERLOCKED, "hilk", 0); 666 } else { 667 nlv = (lv | HAMMER_REFS_LOCKED); 668 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 669 lock->rowner = curthread; 670 return((lv & HAMMER_REFS_CHECK) ? 1 : 0); 671 } 672 } 673 } 674 } 675 676 /* 677 * Attempt to acquire the interlock and expect 0 refs. Used by the buffer 678 * cache callback code to disassociate or lock the bufs related to HAMMER 679 * structures. 680 * 681 * During teardown the related bp will be acquired by hammer_io_release() 682 * which interocks our test. 683 * 684 * Returns non-zero on success, zero on failure. 685 */ 686 int 687 hammer_try_interlock_norefs(struct hammer_lock *lock) 688 { 689 u_int lv; 690 u_int nlv; 691 692 for (;;) { 693 lv = lock->refs; 694 if (lv == 0) { 695 nlv = lv | HAMMER_REFS_LOCKED; 696 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 697 lock->rowner = curthread; 698 return(1); 699 } 700 } else { 701 return(0); 702 } 703 } 704 /* not reached */ 705 } 706 707 /* 708 * Release the interlock on lock->refs. This function will set 709 * CHECK if the refs is non-zero and error is non-zero, and clear 710 * CHECK otherwise. 711 * 712 * MPSAFE 713 */ 714 void 715 hammer_put_interlock(struct hammer_lock *lock, int error) 716 { 717 u_int lv; 718 u_int nlv; 719 720 for (;;) { 721 lv = lock->refs; 722 KKASSERT(lv & HAMMER_REFS_LOCKED); 723 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED); 724 725 if ((nlv & ~HAMMER_REFS_FLAGS) == 0 || error == 0) 726 nlv &= ~HAMMER_REFS_CHECK; 727 else 728 nlv |= HAMMER_REFS_CHECK; 729 730 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 731 if (lv & HAMMER_REFS_WANTED) 732 wakeup(&lock->refs); 733 return; 734 } 735 } 736 } 737 738 /* 739 * The sync_lock must be held when doing any modifying operations on 740 * meta-data. It does not have to be held when modifying non-meta-data buffers 741 * (backend or frontend). 742 * 743 * The flusher holds the lock exclusively while all other consumers hold it 744 * shared. All modifying operations made while holding the lock are atomic 745 * in that they will be made part of the same flush group. 746 * 747 * Due to the atomicy requirement deadlock recovery code CANNOT release the 748 * sync lock, nor can we give pending exclusive sync locks priority over 749 * a shared sync lock as this could lead to a 3-way deadlock. 750 */ 751 void 752 hammer_sync_lock_ex(hammer_transaction_t trans) 753 { 754 ++trans->sync_lock_refs; 755 hammer_lock_ex(&trans->hmp->sync_lock); 756 } 757 758 void 759 hammer_sync_lock_sh(hammer_transaction_t trans) 760 { 761 ++trans->sync_lock_refs; 762 hammer_lock_sh(&trans->hmp->sync_lock); 763 } 764 765 int 766 hammer_sync_lock_sh_try(hammer_transaction_t trans) 767 { 768 int error; 769 770 ++trans->sync_lock_refs; 771 if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0) 772 --trans->sync_lock_refs; 773 return (error); 774 } 775 776 void 777 hammer_sync_unlock(hammer_transaction_t trans) 778 { 779 --trans->sync_lock_refs; 780 hammer_unlock(&trans->hmp->sync_lock); 781 } 782 783 /* 784 * Misc 785 */ 786 u_int32_t 787 hammer_to_unix_xid(uuid_t *uuid) 788 { 789 return(*(u_int32_t *)&uuid->node[2]); 790 } 791 792 void 793 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid) 794 { 795 bzero(uuid, sizeof(*uuid)); 796 *(u_int32_t *)&uuid->node[2] = guid; 797 } 798 799 void 800 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts) 801 { 802 ts->tv_sec = (unsigned long)(xtime / 1000000); 803 ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L; 804 } 805 806 u_int64_t 807 hammer_timespec_to_time(struct timespec *ts) 808 { 809 u_int64_t xtime; 810 811 xtime = (unsigned)(ts->tv_nsec / 1000) + 812 (unsigned long)ts->tv_sec * 1000000ULL; 813 return(xtime); 814 } 815 816 817 /* 818 * Convert a HAMMER filesystem object type to a vnode type 819 */ 820 enum vtype 821 hammer_get_vnode_type(u_int8_t obj_type) 822 { 823 switch(obj_type) { 824 case HAMMER_OBJTYPE_DIRECTORY: 825 return(VDIR); 826 case HAMMER_OBJTYPE_REGFILE: 827 return(VREG); 828 case HAMMER_OBJTYPE_DBFILE: 829 return(VDATABASE); 830 case HAMMER_OBJTYPE_FIFO: 831 return(VFIFO); 832 case HAMMER_OBJTYPE_SOCKET: 833 return(VSOCK); 834 case HAMMER_OBJTYPE_CDEV: 835 return(VCHR); 836 case HAMMER_OBJTYPE_BDEV: 837 return(VBLK); 838 case HAMMER_OBJTYPE_SOFTLINK: 839 return(VLNK); 840 default: 841 return(VBAD); 842 } 843 /* not reached */ 844 } 845 846 int 847 hammer_get_dtype(u_int8_t obj_type) 848 { 849 switch(obj_type) { 850 case HAMMER_OBJTYPE_DIRECTORY: 851 return(DT_DIR); 852 case HAMMER_OBJTYPE_REGFILE: 853 return(DT_REG); 854 case HAMMER_OBJTYPE_DBFILE: 855 return(DT_DBF); 856 case HAMMER_OBJTYPE_FIFO: 857 return(DT_FIFO); 858 case HAMMER_OBJTYPE_SOCKET: 859 return(DT_SOCK); 860 case HAMMER_OBJTYPE_CDEV: 861 return(DT_CHR); 862 case HAMMER_OBJTYPE_BDEV: 863 return(DT_BLK); 864 case HAMMER_OBJTYPE_SOFTLINK: 865 return(DT_LNK); 866 default: 867 return(DT_UNKNOWN); 868 } 869 /* not reached */ 870 } 871 872 u_int8_t 873 hammer_get_obj_type(enum vtype vtype) 874 { 875 switch(vtype) { 876 case VDIR: 877 return(HAMMER_OBJTYPE_DIRECTORY); 878 case VREG: 879 return(HAMMER_OBJTYPE_REGFILE); 880 case VDATABASE: 881 return(HAMMER_OBJTYPE_DBFILE); 882 case VFIFO: 883 return(HAMMER_OBJTYPE_FIFO); 884 case VSOCK: 885 return(HAMMER_OBJTYPE_SOCKET); 886 case VCHR: 887 return(HAMMER_OBJTYPE_CDEV); 888 case VBLK: 889 return(HAMMER_OBJTYPE_BDEV); 890 case VLNK: 891 return(HAMMER_OBJTYPE_SOFTLINK); 892 default: 893 return(HAMMER_OBJTYPE_UNKNOWN); 894 } 895 /* not reached */ 896 } 897 898 /* 899 * Return flags for hammer_delete_at_cursor() 900 */ 901 int 902 hammer_nohistory(hammer_inode_t ip) 903 { 904 if (ip->hmp->hflags & HMNT_NOHISTORY) 905 return(HAMMER_DELETE_DESTROY); 906 if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY)) 907 return(HAMMER_DELETE_DESTROY); 908 return(0); 909 } 910 911 /* 912 * ALGORITHM VERSION 1: 913 * Return a namekey hash. The 64 bit namekey hash consists of a 32 bit 914 * crc in the MSB and 0 in the LSB. The caller will use the low 32 bits 915 * to generate a unique key and will scan all entries with the same upper 916 * 32 bits when issuing a lookup. 917 * 918 * 0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000 919 * 920 * ALGORITHM VERSION 2: 921 * 922 * The 64 bit hash key is generated from the following components. The 923 * first three characters are encoded as 5-bit quantities, the middle 924 * N characters are hashed into a 6 bit quantity, and the last two 925 * characters are encoded as 5-bit quantities. A 32 bit hash of the 926 * entire filename is encoded in the low 32 bits. Bit 0 is set to 927 * 0 to guarantee us a 2^24 bit iteration space. 928 * 929 * 0aaaaabbbbbccccc mmmmmmyyyyyzzzzz hhhhhhhhhhhhhhhh hhhhhhhhhhhhhhh0 930 * 931 * This gives us a domain sort for the first three characters, the last 932 * two characters, and breaks the middle space into 64 random domains. 933 * The domain sort folds upper case, lower case, digits, and punctuation 934 * spaces together, the idea being the filenames tend to not be a mix 935 * of those domains. 936 * 937 * The 64 random domains act as a sub-sort for the middle characters 938 * but may cause a random seek. If the filesystem is being accessed 939 * in sorted order we should tend to get very good linearity for most 940 * filenames and devolve into more random seeks otherwise. 941 * 942 * We strip bit 63 in order to provide a positive key, this way a seek 943 * offset of 0 will represent the base of the directory. 944 * 945 * This function can never return 0. We use the MSB-0 space to synthesize 946 * artificial directory entries such as "." and "..". 947 */ 948 int64_t 949 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len, 950 u_int32_t *max_iterationsp) 951 { 952 int64_t key; 953 int32_t crcx; 954 const char *aname = name; 955 956 switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) { 957 case HAMMER_INODE_CAP_DIRHASH_ALG0: 958 key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32; 959 if (key == 0) 960 key |= 0x100000000LL; 961 *max_iterationsp = 0xFFFFFFFFU; 962 break; 963 case HAMMER_INODE_CAP_DIRHASH_ALG1: 964 key = (u_int32_t)crc32(aname, len) & 0xFFFFFFFEU; 965 966 switch(len) { 967 default: 968 crcx = crc32(aname + 3, len - 5); 969 crcx = crcx ^ (crcx >> 6) ^ (crcx >> 12); 970 key |= (int64_t)(crcx & 0x3F) << 42; 971 /* fall through */ 972 case 5: 973 case 4: 974 /* fall through */ 975 case 3: 976 key |= ((int64_t)(aname[2] & 0x1F) << 48); 977 /* fall through */ 978 case 2: 979 key |= ((int64_t)(aname[1] & 0x1F) << 53) | 980 ((int64_t)(aname[len-2] & 0x1F) << 37); 981 /* fall through */ 982 case 1: 983 key |= ((int64_t)(aname[0] & 0x1F) << 58) | 984 ((int64_t)(aname[len-1] & 0x1F) << 32); 985 /* fall through */ 986 case 0: 987 break; 988 } 989 if ((key & 0xFFFFFFFF00000000LL) == 0) 990 key |= 0x100000000LL; 991 if (hammer_debug_general & 0x0400) { 992 kprintf("namekey2: 0x%016llx %*.*s\n", 993 (long long)key, len, len, aname); 994 } 995 *max_iterationsp = 0x00FFFFFF; 996 break; 997 case HAMMER_INODE_CAP_DIRHASH_ALG2: 998 case HAMMER_INODE_CAP_DIRHASH_ALG3: 999 default: 1000 key = 0; /* compiler warning */ 1001 *max_iterationsp = 1; /* sanity */ 1002 panic("hammer_directory_namekey: bad algorithm %p\n", dip); 1003 break; 1004 } 1005 return(key); 1006 } 1007 1008 /* 1009 * Convert string after @@ (@@ not included) to TID. Returns 0 on success, 1010 * EINVAL on failure. 1011 * 1012 * If this function fails *ispfs, *tidp, and *localizationp will not 1013 * be modified. 1014 */ 1015 int 1016 hammer_str_to_tid(const char *str, int *ispfsp, 1017 hammer_tid_t *tidp, u_int32_t *localizationp) 1018 { 1019 hammer_tid_t tid; 1020 u_int32_t localization; 1021 char *ptr; 1022 int ispfs; 1023 int n; 1024 1025 /* 1026 * Forms allowed for TID: "0x%016llx" 1027 * "-1" 1028 */ 1029 tid = strtouq(str, &ptr, 0); 1030 n = ptr - str; 1031 if (n == 2 && str[0] == '-' && str[1] == '1') { 1032 /* ok */ 1033 } else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') { 1034 /* ok */ 1035 } else { 1036 return(EINVAL); 1037 } 1038 1039 /* 1040 * Forms allowed for PFS: ":%05d" (i.e. "...:0" would be illegal). 1041 */ 1042 str = ptr; 1043 if (*str == ':') { 1044 localization = strtoul(str + 1, &ptr, 10) << 16; 1045 if (ptr - str != 6) 1046 return(EINVAL); 1047 str = ptr; 1048 ispfs = 1; 1049 } else { 1050 localization = *localizationp; 1051 ispfs = 0; 1052 } 1053 1054 /* 1055 * Any trailing junk invalidates special extension handling. 1056 */ 1057 if (*str) 1058 return(EINVAL); 1059 *tidp = tid; 1060 *localizationp = localization; 1061 *ispfsp = ispfs; 1062 return(0); 1063 } 1064 1065 void 1066 hammer_crc_set_blockmap(hammer_blockmap_t blockmap) 1067 { 1068 blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE); 1069 } 1070 1071 void 1072 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk) 1073 { 1074 ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^ 1075 crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2); 1076 } 1077 1078 int 1079 hammer_crc_test_blockmap(hammer_blockmap_t blockmap) 1080 { 1081 hammer_crc_t crc; 1082 1083 crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE); 1084 return (blockmap->entry_crc == crc); 1085 } 1086 1087 int 1088 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk) 1089 { 1090 hammer_crc_t crc; 1091 1092 crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^ 1093 crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2); 1094 return (ondisk->vol_crc == crc); 1095 } 1096 1097 int 1098 hammer_crc_test_btree(hammer_node_ondisk_t ondisk) 1099 { 1100 hammer_crc_t crc; 1101 1102 crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE); 1103 return (ondisk->crc == crc); 1104 } 1105 1106 /* 1107 * Test or set the leaf->data_crc field. Deal with any special cases given 1108 * a generic B-Tree leaf element and its data. 1109 * 1110 * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them 1111 * to be updated in-place. 1112 */ 1113 int 1114 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf) 1115 { 1116 hammer_crc_t crc; 1117 1118 if (leaf->data_len == 0) { 1119 crc = 0; 1120 } else { 1121 switch(leaf->base.rec_type) { 1122 case HAMMER_RECTYPE_INODE: 1123 if (leaf->data_len != sizeof(struct hammer_inode_data)) 1124 return(0); 1125 crc = crc32(data, HAMMER_INODE_CRCSIZE); 1126 break; 1127 default: 1128 crc = crc32(data, leaf->data_len); 1129 break; 1130 } 1131 } 1132 return (leaf->data_crc == crc); 1133 } 1134 1135 void 1136 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf) 1137 { 1138 if (leaf->data_len == 0) { 1139 leaf->data_crc = 0; 1140 } else { 1141 switch(leaf->base.rec_type) { 1142 case HAMMER_RECTYPE_INODE: 1143 KKASSERT(leaf->data_len == 1144 sizeof(struct hammer_inode_data)); 1145 leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE); 1146 break; 1147 default: 1148 leaf->data_crc = crc32(data, leaf->data_len); 1149 break; 1150 } 1151 } 1152 } 1153 1154 void 1155 hkprintf(const char *ctl, ...) 1156 { 1157 __va_list va; 1158 1159 if (hammer_debug_debug) { 1160 __va_start(va, ctl); 1161 kvprintf(ctl, va); 1162 __va_end(va); 1163 } 1164 } 1165 1166 /* 1167 * Return the block size at the specified file offset. 1168 */ 1169 int 1170 hammer_blocksize(int64_t file_offset) 1171 { 1172 if (file_offset < HAMMER_XDEMARC) 1173 return(HAMMER_BUFSIZE); 1174 else 1175 return(HAMMER_XBUFSIZE); 1176 } 1177 1178 int 1179 hammer_blockoff(int64_t file_offset) 1180 { 1181 if (file_offset < HAMMER_XDEMARC) 1182 return((int)file_offset & HAMMER_BUFMASK); 1183 else 1184 return((int)file_offset & HAMMER_XBUFMASK); 1185 } 1186 1187 /* 1188 * Return the demarkation point between the two offsets where 1189 * the block size changes. 1190 */ 1191 int64_t 1192 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2) 1193 { 1194 if (file_offset1 < HAMMER_XDEMARC) { 1195 if (file_offset2 <= HAMMER_XDEMARC) 1196 return(file_offset2); 1197 return(HAMMER_XDEMARC); 1198 } 1199 panic("hammer_blockdemarc: illegal range %lld %lld\n", 1200 (long long)file_offset1, (long long)file_offset2); 1201 } 1202 1203 udev_t 1204 hammer_fsid_to_udev(uuid_t *uuid) 1205 { 1206 u_int32_t crc; 1207 1208 crc = crc32(uuid, sizeof(*uuid)); 1209 return((udev_t)crc); 1210 } 1211 1212