1 /* 2 * Copyright (c) 2007-2011 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * HAMMER structural locking 36 */ 37 38 #include <sys/dirent.h> 39 40 #include "hammer.h" 41 42 void 43 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident) 44 { 45 thread_t td = curthread; 46 u_int lv; 47 u_int nlv; 48 49 KKASSERT(lock->refs); 50 for (;;) { 51 lv = lock->lockval; 52 53 if (lv == 0) { 54 nlv = 1 | HAMMER_LOCKF_EXCLUSIVE; 55 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 56 lock->lowner = td; 57 break; 58 } 59 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) && 60 lock->lowner == td) { 61 nlv = (lv + 1); 62 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) 63 break; 64 } else { 65 if (hammer_debug_locks) { 66 kprintf("hammer_lock_ex: held by %p\n", 67 lock->lowner); 68 } 69 nlv = lv | HAMMER_LOCKF_WANTED; 70 ++hammer_contention_count; 71 tsleep_interlock(&lock->lockval, 0); 72 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 73 tsleep(&lock->lockval, PINTERLOCKED, ident, 0); 74 if (hammer_debug_locks) 75 kprintf("hammer_lock_ex: try again\n"); 76 } 77 } 78 } 79 } 80 81 /* 82 * Try to obtain an exclusive lock 83 */ 84 int 85 hammer_lock_ex_try(struct hammer_lock *lock) 86 { 87 thread_t td = curthread; 88 int error; 89 u_int lv; 90 u_int nlv; 91 92 KKASSERT(lock->refs); 93 for (;;) { 94 lv = lock->lockval; 95 96 if (lv == 0) { 97 nlv = 1 | HAMMER_LOCKF_EXCLUSIVE; 98 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 99 lock->lowner = td; 100 error = 0; 101 break; 102 } 103 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) && 104 lock->lowner == td) { 105 nlv = (lv + 1); 106 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 107 error = 0; 108 break; 109 } 110 } else { 111 error = EAGAIN; 112 break; 113 } 114 } 115 return (error); 116 } 117 118 /* 119 * Obtain a shared lock 120 * 121 * We do not give pending exclusive locks priority over shared locks as 122 * doing so could lead to a deadlock. 123 */ 124 void 125 hammer_lock_sh(struct hammer_lock *lock) 126 { 127 thread_t td = curthread; 128 u_int lv; 129 u_int nlv; 130 const char *ident = "hmrlck"; 131 132 KKASSERT(lock->refs); 133 for (;;) { 134 lv = lock->lockval; 135 136 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) { 137 nlv = (lv + 1); 138 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) 139 break; 140 } else if (lock->lowner == td) { 141 /* 142 * Disallowed case, drop into kernel debugger for 143 * now. A cont continues w/ an exclusive lock. 144 */ 145 nlv = (lv + 1); 146 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 147 if (hammer_debug_critical) 148 Debugger("hammer_lock_sh: holding ex"); 149 break; 150 } 151 } else { 152 nlv = lv | HAMMER_LOCKF_WANTED; 153 ++hammer_contention_count; 154 tsleep_interlock(&lock->lockval, 0); 155 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) 156 tsleep(&lock->lockval, PINTERLOCKED, ident, 0); 157 } 158 } 159 } 160 161 int 162 hammer_lock_sh_try(struct hammer_lock *lock) 163 { 164 thread_t td = curthread; 165 u_int lv; 166 u_int nlv; 167 int error; 168 169 KKASSERT(lock->refs); 170 for (;;) { 171 lv = lock->lockval; 172 173 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) { 174 nlv = (lv + 1); 175 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 176 error = 0; 177 break; 178 } 179 } else if (lock->lowner == td) { 180 /* 181 * Disallowed case, drop into kernel debugger for 182 * now. A cont continues w/ an exclusive lock. 183 */ 184 nlv = (lv + 1); 185 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 186 if (hammer_debug_critical) 187 Debugger("hammer_lock_sh: holding ex"); 188 error = 0; 189 break; 190 } 191 } else { 192 error = EAGAIN; 193 break; 194 } 195 } 196 return (error); 197 } 198 199 /* 200 * Upgrade a shared lock to an exclusively held lock. This function will 201 * return EDEADLK If there is more then one shared holder. 202 * 203 * No error occurs and no action is taken if the lock is already exclusively 204 * held by the caller. If the lock is not held at all or held exclusively 205 * by someone else, this function will panic. 206 */ 207 int 208 hammer_lock_upgrade(struct hammer_lock *lock, int shcount) 209 { 210 thread_t td = curthread; 211 u_int lv; 212 u_int nlv; 213 int error; 214 215 for (;;) { 216 lv = lock->lockval; 217 218 if ((lv & ~HAMMER_LOCKF_WANTED) == shcount) { 219 nlv = lv | HAMMER_LOCKF_EXCLUSIVE; 220 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 221 lock->lowner = td; 222 error = 0; 223 break; 224 } 225 } else if (lv & HAMMER_LOCKF_EXCLUSIVE) { 226 if (lock->lowner != curthread) 227 panic("hammer_lock_upgrade: illegal state"); 228 error = 0; 229 break; 230 } else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) { 231 panic("hammer_lock_upgrade: lock is not held"); 232 /* NOT REACHED */ 233 error = EDEADLK; 234 break; 235 } else { 236 error = EDEADLK; 237 break; 238 } 239 } 240 return (error); 241 } 242 243 /* 244 * Downgrade an exclusively held lock to a shared lock. 245 */ 246 void 247 hammer_lock_downgrade(struct hammer_lock *lock, int shcount) 248 { 249 thread_t td __debugvar = curthread; 250 u_int lv; 251 u_int nlv; 252 253 KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) == 254 (HAMMER_LOCKF_EXCLUSIVE | shcount)); 255 KKASSERT(lock->lowner == td); 256 257 /* 258 * NOTE: Must clear owner before releasing exclusivity 259 */ 260 lock->lowner = NULL; 261 262 for (;;) { 263 lv = lock->lockval; 264 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED); 265 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 266 if (lv & HAMMER_LOCKF_WANTED) 267 wakeup(&lock->lockval); 268 break; 269 } 270 } 271 } 272 273 void 274 hammer_unlock(struct hammer_lock *lock) 275 { 276 thread_t td __debugvar = curthread; 277 u_int lv; 278 u_int nlv; 279 280 lv = lock->lockval; 281 KKASSERT(lv != 0); 282 if (lv & HAMMER_LOCKF_EXCLUSIVE) 283 KKASSERT(lock->lowner == td); 284 285 for (;;) { 286 lv = lock->lockval; 287 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED); 288 if (nlv > 1) { 289 nlv = lv - 1; 290 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) 291 break; 292 } else if (nlv == 1) { 293 nlv = 0; 294 if (lv & HAMMER_LOCKF_EXCLUSIVE) 295 lock->lowner = NULL; 296 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 297 if (lv & HAMMER_LOCKF_WANTED) 298 wakeup(&lock->lockval); 299 break; 300 } 301 } else { 302 panic("hammer_unlock: lock %p is not held", lock); 303 } 304 } 305 } 306 307 /* 308 * The calling thread must be holding a shared or exclusive lock. 309 * Returns < 0 if lock is held shared, and > 0 if held exlusively. 310 */ 311 int 312 hammer_lock_status(struct hammer_lock *lock) 313 { 314 u_int lv = lock->lockval; 315 316 if (lv & HAMMER_LOCKF_EXCLUSIVE) 317 return(1); 318 else if (lv) 319 return(-1); 320 panic("hammer_lock_status: lock must be held: %p", lock); 321 } 322 323 /* 324 * Bump the ref count for a lock (not the excl/share count, but a separate 325 * structural reference count). The CHECK flag will be set on a 0->1 326 * transition. 327 * 328 * This function does nothing to serialize races between multple threads. 329 * The caller can interlock it later on to deal with serialization. 330 * 331 * MPSAFE 332 */ 333 void 334 hammer_ref(struct hammer_lock *lock) 335 { 336 u_int lv; 337 u_int nlv; 338 339 for (;;) { 340 lv = lock->refs; 341 if ((lv & ~HAMMER_REFS_FLAGS) == 0) { 342 nlv = (lv + 1) | HAMMER_REFS_CHECK; 343 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 344 return; 345 } else { 346 nlv = (lv + 1); 347 KKASSERT((int)nlv > 0); 348 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 349 return; 350 } 351 } 352 /* not reached */ 353 } 354 355 /* 356 * Drop the ref count for a lock (not the excl/share count, but a separate 357 * structural reference count). The CHECK flag will be cleared on a 1->0 358 * transition. 359 * 360 * This function does nothing to serialize races between multple threads. 361 * 362 * MPSAFE 363 */ 364 void 365 hammer_rel(struct hammer_lock *lock) 366 { 367 u_int lv; 368 u_int nlv; 369 370 for (;;) { 371 lv = lock->refs; 372 if ((lv & ~HAMMER_REFS_FLAGS) == 1) { 373 nlv = (lv - 1) & ~HAMMER_REFS_CHECK; 374 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 375 return; 376 } else { 377 KKASSERT((int)lv > 0); 378 nlv = (lv - 1); 379 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 380 return; 381 } 382 } 383 /* not reached */ 384 } 385 386 /* 387 * The hammer_*_interlock() and hammer_*_interlock_done() functions are 388 * more sophisticated versions which handle MP transition races and block 389 * when necessary. 390 * 391 * hammer_ref_interlock() bumps the ref-count and conditionally acquires 392 * the interlock for 0->1 transitions or if the CHECK is found to be set. 393 * 394 * This case will return 1, the interlock will be held, and the CHECK 395 * bit also set. Other threads attempting to ref will see the CHECK bit 396 * and block until we clean up. 397 * 398 * 0 is returned for transitions other than 0->1 when the CHECK bit 399 * is not found to be set, or if the function loses the race with another 400 * thread. 401 * 402 * 1 is only returned to one thread and the others will block. 403 * Effectively a 1 indicator means 'someone transitioned 0->1 404 * and you are the first guy to successfully lock it after that, so you 405 * need to check'. Due to races the ref-count may be greater than 1 upon 406 * return. 407 * 408 * MPSAFE 409 */ 410 int 411 hammer_ref_interlock(struct hammer_lock *lock) 412 { 413 u_int lv; 414 u_int nlv; 415 416 /* 417 * Integrated reference count bump, lock, and check, with hot-path. 418 * 419 * (a) Return 1 (+LOCKED, +CHECK) 0->1 transition 420 * (b) Return 0 (-LOCKED, -CHECK) N->N+1 transition 421 * (c) Break out (+CHECK) Check condition and Cannot lock 422 * (d) Return 1 (+LOCKED, +CHECK) Successfully locked 423 */ 424 for (;;) { 425 lv = lock->refs; 426 if (lv == 0) { 427 nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK; 428 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 429 lock->rowner = curthread; 430 return(1); 431 } 432 } else { 433 nlv = (lv + 1); 434 if ((lv & ~HAMMER_REFS_FLAGS) == 0) 435 nlv |= HAMMER_REFS_CHECK; 436 if ((nlv & HAMMER_REFS_CHECK) == 0) { 437 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 438 return(0); 439 } else if (lv & HAMMER_REFS_LOCKED) { 440 /* CHECK also set here */ 441 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 442 break; 443 } else { 444 /* CHECK also set here */ 445 nlv |= HAMMER_REFS_LOCKED; 446 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 447 lock->rowner = curthread; 448 return(1); 449 } 450 } 451 } 452 } 453 454 /* 455 * Defered check condition because we were unable to acquire the 456 * lock. We must block until the check condition is cleared due 457 * to a race with another thread, or we are able to acquire the 458 * lock. 459 * 460 * (a) Return 0 (-CHECK) Another thread handled it 461 * (b) Return 1 (+LOCKED, +CHECK) We handled it. 462 */ 463 for (;;) { 464 lv = lock->refs; 465 if ((lv & HAMMER_REFS_CHECK) == 0) 466 return(0); 467 if (lv & HAMMER_REFS_LOCKED) { 468 tsleep_interlock(&lock->refs, 0); 469 nlv = (lv | HAMMER_REFS_WANTED); 470 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 471 tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0); 472 } else { 473 /* CHECK also set here */ 474 nlv = lv | HAMMER_REFS_LOCKED; 475 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 476 lock->rowner = curthread; 477 return(1); 478 } 479 } 480 } 481 /* not reached */ 482 } 483 484 /* 485 * This is the same as hammer_ref_interlock() but asserts that the 486 * 0->1 transition is always true, thus the lock must have no references 487 * on entry or have CHECK set, and will have one reference with the 488 * interlock held on return. It must also not be interlocked on entry 489 * by anyone. 490 * 491 * NOTE that CHECK will never be found set when the ref-count is 0. 492 * 493 * 1 is always returned to match the API for hammer_ref_interlock(). 494 * This function returns with one ref, the lock held, and the CHECK bit set. 495 */ 496 int 497 hammer_ref_interlock_true(struct hammer_lock *lock) 498 { 499 u_int lv; 500 u_int nlv; 501 502 for (;;) { 503 lv = lock->refs; 504 505 if (lv) { 506 panic("hammer_ref_interlock_true: bad lock %p %08x", 507 lock, lock->refs); 508 } 509 nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK; 510 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 511 lock->rowner = curthread; 512 return (1); 513 } 514 } 515 } 516 517 /* 518 * Unlock the interlock acquired by hammer_ref_interlock() and clear the 519 * CHECK flag. The ref-count remains unchanged. 520 * 521 * This routine is called in the load path when the load succeeds. 522 */ 523 void 524 hammer_ref_interlock_done(struct hammer_lock *lock) 525 { 526 u_int lv; 527 u_int nlv; 528 529 for (;;) { 530 lv = lock->refs; 531 nlv = lv & ~HAMMER_REFS_FLAGS; 532 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 533 if (lv & HAMMER_REFS_WANTED) 534 wakeup(&lock->refs); 535 break; 536 } 537 } 538 } 539 540 /* 541 * hammer_rel_interlock() works a bit differently in that it must 542 * acquire the lock in tandem with a 1->0 transition. CHECK is 543 * not used. 544 * 545 * 1 is returned on 1->0 transitions with the lock held on return 546 * and 0 is returned otherwise with the lock not held. 547 * 548 * It is important to note that the refs are not stable and may 549 * increase while we hold the lock, the 1 indication only means 550 * that we transitioned 1->0, not necessarily that we stayed at 0. 551 * 552 * Another thread bumping refs while we hold the lock will set CHECK, 553 * causing one of the competing hammer_ref_interlock() calls to 554 * return 1 after we release our lock. 555 * 556 * MPSAFE 557 */ 558 int 559 hammer_rel_interlock(struct hammer_lock *lock, int locked) 560 { 561 u_int lv; 562 u_int nlv; 563 564 /* 565 * In locked mode (failure/unload path) we release the 566 * ref-count but leave it locked. 567 */ 568 if (locked) { 569 hammer_rel(lock); 570 return(1); 571 } 572 573 /* 574 * Integrated reference count drop with LOCKED, plus the hot-path 575 * returns. 576 */ 577 for (;;) { 578 lv = lock->refs; 579 580 if (lv == 1) { 581 nlv = 0 | HAMMER_REFS_LOCKED; 582 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 583 lock->rowner = curthread; 584 return(1); 585 } 586 } else if ((lv & ~HAMMER_REFS_FLAGS) == 1) { 587 if ((lv & HAMMER_REFS_LOCKED) == 0) { 588 nlv = (lv - 1) | HAMMER_REFS_LOCKED; 589 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 590 lock->rowner = curthread; 591 return(1); 592 } 593 } else { 594 nlv = lv | HAMMER_REFS_WANTED; 595 tsleep_interlock(&lock->refs, 0); 596 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 597 tsleep(&lock->refs, PINTERLOCKED, 598 "h0lk", 0); 599 } 600 } 601 } else { 602 nlv = (lv - 1); 603 KKASSERT((int)nlv >= 0); 604 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 605 return(0); 606 } 607 } 608 /* not reached */ 609 } 610 611 /* 612 * Unlock the interlock acquired by hammer_rel_interlock(). 613 * 614 * If orig_locked is non-zero the interlock was originally held prior to 615 * the hammer_rel_interlock() call and passed through to us. In this 616 * case we want to retain the CHECK error state if not transitioning 617 * to 0. 618 * 619 * The code is the same either way so we do not have to conditionalize 620 * on orig_locked. 621 */ 622 void 623 hammer_rel_interlock_done(struct hammer_lock *lock, int orig_locked __unused) 624 { 625 u_int lv; 626 u_int nlv; 627 628 for (;;) { 629 lv = lock->refs; 630 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED); 631 if ((lv & ~HAMMER_REFS_FLAGS) == 0) 632 nlv &= ~HAMMER_REFS_CHECK; 633 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 634 if (lv & HAMMER_REFS_WANTED) 635 wakeup(&lock->refs); 636 break; 637 } 638 } 639 } 640 641 /* 642 * Acquire the interlock on lock->refs. 643 * 644 * Return 1 if CHECK is currently set. Note that CHECK will not 645 * be set if the reference count is 0, but can get set if this function 646 * is preceeded by, say, hammer_ref(), or through races with other 647 * threads. The return value allows the caller to use the same logic 648 * as hammer_ref_interlock(). 649 * 650 * MPSAFE 651 */ 652 int 653 hammer_get_interlock(struct hammer_lock *lock) 654 { 655 u_int lv; 656 u_int nlv; 657 658 for (;;) { 659 lv = lock->refs; 660 if (lv & HAMMER_REFS_LOCKED) { 661 nlv = lv | HAMMER_REFS_WANTED; 662 tsleep_interlock(&lock->refs, 0); 663 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 664 tsleep(&lock->refs, PINTERLOCKED, "hilk", 0); 665 } else { 666 nlv = (lv | HAMMER_REFS_LOCKED); 667 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 668 lock->rowner = curthread; 669 return((lv & HAMMER_REFS_CHECK) ? 1 : 0); 670 } 671 } 672 } 673 } 674 675 /* 676 * Attempt to acquire the interlock and expect 0 refs. Used by the buffer 677 * cache callback code to disassociate or lock the bufs related to HAMMER 678 * structures. 679 * 680 * During teardown the related bp will be acquired by hammer_io_release() 681 * which interocks our test. 682 * 683 * Returns non-zero on success, zero on failure. 684 */ 685 int 686 hammer_try_interlock_norefs(struct hammer_lock *lock) 687 { 688 u_int lv; 689 u_int nlv; 690 691 for (;;) { 692 lv = lock->refs; 693 if (lv == 0) { 694 nlv = lv | HAMMER_REFS_LOCKED; 695 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 696 lock->rowner = curthread; 697 return(1); 698 } 699 } else { 700 return(0); 701 } 702 } 703 /* not reached */ 704 } 705 706 /* 707 * Release the interlock on lock->refs. This function will set 708 * CHECK if the refs is non-zero and error is non-zero, and clear 709 * CHECK otherwise. 710 * 711 * MPSAFE 712 */ 713 void 714 hammer_put_interlock(struct hammer_lock *lock, int error) 715 { 716 u_int lv; 717 u_int nlv; 718 719 for (;;) { 720 lv = lock->refs; 721 KKASSERT(lv & HAMMER_REFS_LOCKED); 722 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED); 723 724 if ((nlv & ~HAMMER_REFS_FLAGS) == 0 || error == 0) 725 nlv &= ~HAMMER_REFS_CHECK; 726 else 727 nlv |= HAMMER_REFS_CHECK; 728 729 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 730 if (lv & HAMMER_REFS_WANTED) 731 wakeup(&lock->refs); 732 return; 733 } 734 } 735 } 736 737 /* 738 * The sync_lock must be held when doing any modifying operations on 739 * meta-data. It does not have to be held when modifying non-meta-data buffers 740 * (backend or frontend). 741 * 742 * The flusher holds the lock exclusively while all other consumers hold it 743 * shared. All modifying operations made while holding the lock are atomic 744 * in that they will be made part of the same flush group. 745 * 746 * Due to the atomicy requirement deadlock recovery code CANNOT release the 747 * sync lock, nor can we give pending exclusive sync locks priority over 748 * a shared sync lock as this could lead to a 3-way deadlock. 749 */ 750 void 751 hammer_sync_lock_ex(hammer_transaction_t trans) 752 { 753 ++trans->sync_lock_refs; 754 hammer_lock_ex(&trans->hmp->sync_lock); 755 } 756 757 void 758 hammer_sync_lock_sh(hammer_transaction_t trans) 759 { 760 ++trans->sync_lock_refs; 761 hammer_lock_sh(&trans->hmp->sync_lock); 762 } 763 764 int 765 hammer_sync_lock_sh_try(hammer_transaction_t trans) 766 { 767 int error; 768 769 ++trans->sync_lock_refs; 770 if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0) 771 --trans->sync_lock_refs; 772 return (error); 773 } 774 775 void 776 hammer_sync_unlock(hammer_transaction_t trans) 777 { 778 --trans->sync_lock_refs; 779 hammer_unlock(&trans->hmp->sync_lock); 780 } 781 782 /* 783 * Misc 784 */ 785 u_int32_t 786 hammer_to_unix_xid(uuid_t *uuid) 787 { 788 return(*(u_int32_t *)&uuid->node[2]); 789 } 790 791 void 792 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid) 793 { 794 bzero(uuid, sizeof(*uuid)); 795 *(u_int32_t *)&uuid->node[2] = guid; 796 } 797 798 void 799 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts) 800 { 801 ts->tv_sec = (unsigned long)(xtime / 1000000); 802 ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L; 803 } 804 805 u_int64_t 806 hammer_timespec_to_time(struct timespec *ts) 807 { 808 u_int64_t xtime; 809 810 xtime = (unsigned)(ts->tv_nsec / 1000) + 811 (unsigned long)ts->tv_sec * 1000000ULL; 812 return(xtime); 813 } 814 815 816 /* 817 * Convert a HAMMER filesystem object type to a vnode type 818 */ 819 enum vtype 820 hammer_get_vnode_type(u_int8_t obj_type) 821 { 822 switch(obj_type) { 823 case HAMMER_OBJTYPE_DIRECTORY: 824 return(VDIR); 825 case HAMMER_OBJTYPE_REGFILE: 826 return(VREG); 827 case HAMMER_OBJTYPE_DBFILE: 828 return(VDATABASE); 829 case HAMMER_OBJTYPE_FIFO: 830 return(VFIFO); 831 case HAMMER_OBJTYPE_SOCKET: 832 return(VSOCK); 833 case HAMMER_OBJTYPE_CDEV: 834 return(VCHR); 835 case HAMMER_OBJTYPE_BDEV: 836 return(VBLK); 837 case HAMMER_OBJTYPE_SOFTLINK: 838 return(VLNK); 839 default: 840 return(VBAD); 841 } 842 /* not reached */ 843 } 844 845 int 846 hammer_get_dtype(u_int8_t obj_type) 847 { 848 switch(obj_type) { 849 case HAMMER_OBJTYPE_DIRECTORY: 850 return(DT_DIR); 851 case HAMMER_OBJTYPE_REGFILE: 852 return(DT_REG); 853 case HAMMER_OBJTYPE_DBFILE: 854 return(DT_DBF); 855 case HAMMER_OBJTYPE_FIFO: 856 return(DT_FIFO); 857 case HAMMER_OBJTYPE_SOCKET: 858 return(DT_SOCK); 859 case HAMMER_OBJTYPE_CDEV: 860 return(DT_CHR); 861 case HAMMER_OBJTYPE_BDEV: 862 return(DT_BLK); 863 case HAMMER_OBJTYPE_SOFTLINK: 864 return(DT_LNK); 865 default: 866 return(DT_UNKNOWN); 867 } 868 /* not reached */ 869 } 870 871 u_int8_t 872 hammer_get_obj_type(enum vtype vtype) 873 { 874 switch(vtype) { 875 case VDIR: 876 return(HAMMER_OBJTYPE_DIRECTORY); 877 case VREG: 878 return(HAMMER_OBJTYPE_REGFILE); 879 case VDATABASE: 880 return(HAMMER_OBJTYPE_DBFILE); 881 case VFIFO: 882 return(HAMMER_OBJTYPE_FIFO); 883 case VSOCK: 884 return(HAMMER_OBJTYPE_SOCKET); 885 case VCHR: 886 return(HAMMER_OBJTYPE_CDEV); 887 case VBLK: 888 return(HAMMER_OBJTYPE_BDEV); 889 case VLNK: 890 return(HAMMER_OBJTYPE_SOFTLINK); 891 default: 892 return(HAMMER_OBJTYPE_UNKNOWN); 893 } 894 /* not reached */ 895 } 896 897 /* 898 * Return flags for hammer_delete_at_cursor() 899 */ 900 int 901 hammer_nohistory(hammer_inode_t ip) 902 { 903 if (ip->hmp->hflags & HMNT_NOHISTORY) 904 return(HAMMER_DELETE_DESTROY); 905 if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY)) 906 return(HAMMER_DELETE_DESTROY); 907 return(0); 908 } 909 910 /* 911 * ALGORITHM VERSION 0: 912 * Return a namekey hash. The 64 bit namekey hash consists of a 32 bit 913 * crc in the MSB and 0 in the LSB. The caller will use the low 32 bits 914 * to generate a unique key and will scan all entries with the same upper 915 * 32 bits when issuing a lookup. 916 * 917 * 0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000 918 * 919 * ALGORITHM VERSION 1: 920 * 921 * This algorithm breaks the filename down into a separate 32-bit crcs 922 * for each filename segment separated by a special character (dot, 923 * underscore, underline, or tilde). The CRCs are then added together. 924 * This allows temporary names. A full-filename 16 bit crc is also 925 * generated to deal with degenerate conditions. 926 * 927 * The algorithm is designed to handle create/rename situations such 928 * that a create with an extention to a rename without an extention 929 * only shifts the key space rather than randomizes it. 930 * 931 * NOTE: The inode allocator cache can only match 10 bits so we do 932 * not really have any room for a partial sorted name, and 933 * numbers don't sort well in that situation anyway. 934 * 935 * 0mmmmmmmmmmmmmmm mmmmmmmmmmmmmmmm llllllllllllllll 0000000000000000 936 * 937 * 938 * We strip bit 63 in order to provide a positive key, this way a seek 939 * offset of 0 will represent the base of the directory. 940 * 941 * We usually strip bit 0 (set it to 0) in order to provide a consistent 942 * iteration space for collisions. 943 * 944 * This function can never return 0. We use the MSB-0 space to synthesize 945 * artificial directory entries such as "." and "..". 946 */ 947 int64_t 948 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len, 949 u_int32_t *max_iterationsp) 950 { 951 const char *aname = name; 952 int32_t crcx; 953 int64_t key; 954 int i; 955 int j; 956 957 switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) { 958 case HAMMER_INODE_CAP_DIRHASH_ALG0: 959 /* 960 * Original algorithm 961 */ 962 key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32; 963 if (key == 0) 964 key |= 0x100000000LL; 965 *max_iterationsp = 0xFFFFFFFFU; 966 break; 967 case HAMMER_INODE_CAP_DIRHASH_ALG1: 968 /* 969 * Filesystem version 6 or better will create directories 970 * using the ALG1 dirhash. This hash breaks the filename 971 * up into domains separated by special characters and 972 * hashes each domain independently. 973 * 974 * We also do a simple sub-sort using the first character 975 * of the filename in the top 5-bits. 976 */ 977 key = 0; 978 979 /* 980 * m32 981 */ 982 crcx = 0; 983 for (i = j = 0; i < len; ++i) { 984 if (aname[i] == '.' || 985 aname[i] == '-' || 986 aname[i] == '_' || 987 aname[i] == '~') { 988 if (i != j) 989 crcx += crc32(aname + j, i - j); 990 j = i + 1; 991 } 992 } 993 if (i != j) 994 crcx += crc32(aname + j, i - j); 995 996 #if 0 997 /* 998 * xor top 5 bits 0mmmm into low bits and steal the top 5 999 * bits as a semi sub sort using the first character of 1000 * the filename. bit 63 is always left as 0 so directory 1001 * keys are positive numbers. 1002 */ 1003 crcx ^= (uint32_t)crcx >> (32 - 5); 1004 crcx = (crcx & 0x07FFFFFF) | ((aname[0] & 0x0F) << (32 - 5)); 1005 #endif 1006 crcx &= 0x7FFFFFFFU; 1007 1008 key |= (uint64_t)crcx << 32; 1009 1010 /* 1011 * l16 - crc of entire filename 1012 * 1013 * This crc reduces degenerate hash collision conditions 1014 */ 1015 crcx = crc32(aname, len); 1016 crcx = crcx ^ (crcx << 16); 1017 key |= crcx & 0xFFFF0000U; 1018 1019 /* 1020 * Cleanup 1021 */ 1022 if ((key & 0xFFFFFFFF00000000LL) == 0) 1023 key |= 0x100000000LL; 1024 if (hammer_debug_general & 0x0400) { 1025 kprintf("namekey2: 0x%016llx %*.*s\n", 1026 (long long)key, len, len, aname); 1027 } 1028 *max_iterationsp = 0x00FFFFFF; 1029 break; 1030 case HAMMER_INODE_CAP_DIRHASH_ALG2: 1031 case HAMMER_INODE_CAP_DIRHASH_ALG3: 1032 default: 1033 key = 0; /* compiler warning */ 1034 *max_iterationsp = 1; /* sanity */ 1035 panic("hammer_directory_namekey: bad algorithm %p", dip); 1036 break; 1037 } 1038 return(key); 1039 } 1040 1041 /* 1042 * Convert string after @@ (@@ not included) to TID. Returns 0 on success, 1043 * EINVAL on failure. 1044 * 1045 * If this function fails *ispfs, *tidp, and *localizationp will not 1046 * be modified. 1047 */ 1048 int 1049 hammer_str_to_tid(const char *str, int *ispfsp, 1050 hammer_tid_t *tidp, u_int32_t *localizationp) 1051 { 1052 hammer_tid_t tid; 1053 u_int32_t localization; 1054 char *ptr; 1055 int ispfs; 1056 int n; 1057 1058 /* 1059 * Forms allowed for TID: "0x%016llx" 1060 * "-1" 1061 */ 1062 tid = strtouq(str, &ptr, 0); 1063 n = ptr - str; 1064 if (n == 2 && str[0] == '-' && str[1] == '1') { 1065 /* ok */ 1066 } else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') { 1067 /* ok */ 1068 } else { 1069 return(EINVAL); 1070 } 1071 1072 /* 1073 * Forms allowed for PFS: ":%05d" (i.e. "...:0" would be illegal). 1074 */ 1075 str = ptr; 1076 if (*str == ':') { 1077 localization = strtoul(str + 1, &ptr, 10) << 16; 1078 if (ptr - str != 6) 1079 return(EINVAL); 1080 str = ptr; 1081 ispfs = 1; 1082 } else { 1083 localization = *localizationp; 1084 ispfs = 0; 1085 } 1086 1087 /* 1088 * Any trailing junk invalidates special extension handling. 1089 */ 1090 if (*str) 1091 return(EINVAL); 1092 *tidp = tid; 1093 *localizationp = localization; 1094 *ispfsp = ispfs; 1095 return(0); 1096 } 1097 1098 void 1099 hammer_crc_set_blockmap(hammer_blockmap_t blockmap) 1100 { 1101 blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE); 1102 } 1103 1104 void 1105 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk) 1106 { 1107 ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^ 1108 crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2); 1109 } 1110 1111 int 1112 hammer_crc_test_blockmap(hammer_blockmap_t blockmap) 1113 { 1114 hammer_crc_t crc; 1115 1116 crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE); 1117 return (blockmap->entry_crc == crc); 1118 } 1119 1120 int 1121 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk) 1122 { 1123 hammer_crc_t crc; 1124 1125 crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^ 1126 crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2); 1127 return (ondisk->vol_crc == crc); 1128 } 1129 1130 int 1131 hammer_crc_test_btree(hammer_node_ondisk_t ondisk) 1132 { 1133 hammer_crc_t crc; 1134 1135 crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE); 1136 return (ondisk->crc == crc); 1137 } 1138 1139 /* 1140 * Test or set the leaf->data_crc field. Deal with any special cases given 1141 * a generic B-Tree leaf element and its data. 1142 * 1143 * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them 1144 * to be updated in-place. 1145 */ 1146 int 1147 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf) 1148 { 1149 hammer_crc_t crc; 1150 1151 if (leaf->data_len == 0) { 1152 crc = 0; 1153 } else { 1154 switch(leaf->base.rec_type) { 1155 case HAMMER_RECTYPE_INODE: 1156 if (leaf->data_len != sizeof(struct hammer_inode_data)) 1157 return(0); 1158 crc = crc32(data, HAMMER_INODE_CRCSIZE); 1159 break; 1160 default: 1161 crc = crc32(data, leaf->data_len); 1162 break; 1163 } 1164 } 1165 return (leaf->data_crc == crc); 1166 } 1167 1168 void 1169 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf) 1170 { 1171 if (leaf->data_len == 0) { 1172 leaf->data_crc = 0; 1173 } else { 1174 switch(leaf->base.rec_type) { 1175 case HAMMER_RECTYPE_INODE: 1176 KKASSERT(leaf->data_len == 1177 sizeof(struct hammer_inode_data)); 1178 leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE); 1179 break; 1180 default: 1181 leaf->data_crc = crc32(data, leaf->data_len); 1182 break; 1183 } 1184 } 1185 } 1186 1187 void 1188 hkprintf(const char *ctl, ...) 1189 { 1190 __va_list va; 1191 1192 if (hammer_debug_debug) { 1193 __va_start(va, ctl); 1194 kvprintf(ctl, va); 1195 __va_end(va); 1196 } 1197 } 1198 1199 /* 1200 * Return the block size at the specified file offset. 1201 */ 1202 int 1203 hammer_blocksize(int64_t file_offset) 1204 { 1205 if (file_offset < HAMMER_XDEMARC) 1206 return(HAMMER_BUFSIZE); 1207 else 1208 return(HAMMER_XBUFSIZE); 1209 } 1210 1211 int 1212 hammer_blockoff(int64_t file_offset) 1213 { 1214 if (file_offset < HAMMER_XDEMARC) 1215 return((int)file_offset & HAMMER_BUFMASK); 1216 else 1217 return((int)file_offset & HAMMER_XBUFMASK); 1218 } 1219 1220 /* 1221 * Return the demarkation point between the two offsets where 1222 * the block size changes. 1223 */ 1224 int64_t 1225 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2) 1226 { 1227 if (file_offset1 < HAMMER_XDEMARC) { 1228 if (file_offset2 <= HAMMER_XDEMARC) 1229 return(file_offset2); 1230 return(HAMMER_XDEMARC); 1231 } 1232 panic("hammer_blockdemarc: illegal range %lld %lld", 1233 (long long)file_offset1, (long long)file_offset2); 1234 } 1235 1236 udev_t 1237 hammer_fsid_to_udev(uuid_t *uuid) 1238 { 1239 u_int32_t crc; 1240 1241 crc = crc32(uuid, sizeof(*uuid)); 1242 return((udev_t)crc); 1243 } 1244 1245