1 /* 2 * Copyright (c) 2007-2011 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * HAMMER structural locking 36 */ 37 38 #include "hammer.h" 39 #include <sys/dirent.h> 40 41 void 42 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident) 43 { 44 thread_t td = curthread; 45 u_int lv; 46 u_int nlv; 47 48 KKASSERT(lock->refs); 49 for (;;) { 50 lv = lock->lockval; 51 52 if (lv == 0) { 53 nlv = 1 | HAMMER_LOCKF_EXCLUSIVE; 54 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 55 lock->lowner = td; 56 break; 57 } 58 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) && 59 lock->lowner == td) { 60 nlv = (lv + 1); 61 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) 62 break; 63 } else { 64 if (hammer_debug_locks) { 65 kprintf("hammer_lock_ex: held by %p\n", 66 lock->lowner); 67 } 68 nlv = lv | HAMMER_LOCKF_WANTED; 69 ++hammer_contention_count; 70 tsleep_interlock(&lock->lockval, 0); 71 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 72 tsleep(&lock->lockval, PINTERLOCKED, ident, 0); 73 if (hammer_debug_locks) 74 kprintf("hammer_lock_ex: try again\n"); 75 } 76 } 77 } 78 } 79 80 /* 81 * Try to obtain an exclusive lock 82 */ 83 int 84 hammer_lock_ex_try(struct hammer_lock *lock) 85 { 86 thread_t td = curthread; 87 int error; 88 u_int lv; 89 u_int nlv; 90 91 KKASSERT(lock->refs); 92 for (;;) { 93 lv = lock->lockval; 94 95 if (lv == 0) { 96 nlv = 1 | HAMMER_LOCKF_EXCLUSIVE; 97 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 98 lock->lowner = td; 99 error = 0; 100 break; 101 } 102 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) && 103 lock->lowner == td) { 104 nlv = (lv + 1); 105 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 106 error = 0; 107 break; 108 } 109 } else { 110 error = EAGAIN; 111 break; 112 } 113 } 114 return (error); 115 } 116 117 /* 118 * Obtain a shared lock 119 * 120 * We do not give pending exclusive locks priority over shared locks as 121 * doing so could lead to a deadlock. 122 */ 123 void 124 hammer_lock_sh(struct hammer_lock *lock) 125 { 126 thread_t td = curthread; 127 u_int lv; 128 u_int nlv; 129 const char *ident = "hmrlck"; 130 131 KKASSERT(lock->refs); 132 for (;;) { 133 lv = lock->lockval; 134 135 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) { 136 nlv = (lv + 1); 137 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) 138 break; 139 } else if (lock->lowner == td) { 140 /* 141 * Disallowed case, drop into kernel debugger for 142 * now. A cont continues w/ an exclusive lock. 143 */ 144 nlv = (lv + 1); 145 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 146 if (hammer_debug_critical) 147 Debugger("hammer_lock_sh: holding ex"); 148 break; 149 } 150 } else { 151 nlv = lv | HAMMER_LOCKF_WANTED; 152 ++hammer_contention_count; 153 tsleep_interlock(&lock->lockval, 0); 154 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) 155 tsleep(&lock->lockval, PINTERLOCKED, ident, 0); 156 } 157 } 158 } 159 160 int 161 hammer_lock_sh_try(struct hammer_lock *lock) 162 { 163 thread_t td = curthread; 164 u_int lv; 165 u_int nlv; 166 int error; 167 168 KKASSERT(lock->refs); 169 for (;;) { 170 lv = lock->lockval; 171 172 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) { 173 nlv = (lv + 1); 174 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 175 error = 0; 176 break; 177 } 178 } else if (lock->lowner == td) { 179 /* 180 * Disallowed case, drop into kernel debugger for 181 * now. A cont continues w/ an exclusive lock. 182 */ 183 nlv = (lv + 1); 184 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 185 if (hammer_debug_critical) 186 Debugger("hammer_lock_sh: holding ex"); 187 error = 0; 188 break; 189 } 190 } else { 191 error = EAGAIN; 192 break; 193 } 194 } 195 return (error); 196 } 197 198 /* 199 * Upgrade a shared lock to an exclusively held lock. This function will 200 * return EDEADLK If there is more then one shared holder. 201 * 202 * No error occurs and no action is taken if the lock is already exclusively 203 * held by the caller. If the lock is not held at all or held exclusively 204 * by someone else, this function will panic. 205 */ 206 int 207 hammer_lock_upgrade(struct hammer_lock *lock, int shcount) 208 { 209 thread_t td = curthread; 210 u_int lv; 211 u_int nlv; 212 int error; 213 214 for (;;) { 215 lv = lock->lockval; 216 217 if ((lv & ~HAMMER_LOCKF_WANTED) == shcount) { 218 nlv = lv | HAMMER_LOCKF_EXCLUSIVE; 219 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 220 lock->lowner = td; 221 error = 0; 222 break; 223 } 224 } else if (lv & HAMMER_LOCKF_EXCLUSIVE) { 225 if (lock->lowner != curthread) 226 panic("hammer_lock_upgrade: illegal state"); 227 error = 0; 228 break; 229 } else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) { 230 panic("hammer_lock_upgrade: lock is not held"); 231 /* NOT REACHED */ 232 error = EDEADLK; 233 break; 234 } else { 235 error = EDEADLK; 236 break; 237 } 238 } 239 return (error); 240 } 241 242 /* 243 * Downgrade an exclusively held lock to a shared lock. 244 */ 245 void 246 hammer_lock_downgrade(struct hammer_lock *lock, int shcount) 247 { 248 thread_t td __debugvar = curthread; 249 u_int lv; 250 u_int nlv; 251 252 KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) == 253 (HAMMER_LOCKF_EXCLUSIVE | shcount)); 254 KKASSERT(lock->lowner == td); 255 256 /* 257 * NOTE: Must clear owner before releasing exclusivity 258 */ 259 lock->lowner = NULL; 260 261 for (;;) { 262 lv = lock->lockval; 263 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED); 264 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 265 if (lv & HAMMER_LOCKF_WANTED) 266 wakeup(&lock->lockval); 267 break; 268 } 269 } 270 } 271 272 void 273 hammer_unlock(struct hammer_lock *lock) 274 { 275 thread_t td __debugvar = curthread; 276 u_int lv; 277 u_int nlv; 278 279 lv = lock->lockval; 280 KKASSERT(lv != 0); 281 if (lv & HAMMER_LOCKF_EXCLUSIVE) 282 KKASSERT(lock->lowner == td); 283 284 for (;;) { 285 lv = lock->lockval; 286 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED); 287 if (nlv > 1) { 288 nlv = lv - 1; 289 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) 290 break; 291 } else if (nlv == 1) { 292 nlv = 0; 293 if (lv & HAMMER_LOCKF_EXCLUSIVE) 294 lock->lowner = NULL; 295 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { 296 if (lv & HAMMER_LOCKF_WANTED) 297 wakeup(&lock->lockval); 298 break; 299 } 300 } else { 301 panic("hammer_unlock: lock %p is not held", lock); 302 } 303 } 304 } 305 306 /* 307 * The calling thread must be holding a shared or exclusive lock. 308 * Returns < 0 if lock is held shared, and > 0 if held exlusively. 309 */ 310 int 311 hammer_lock_status(struct hammer_lock *lock) 312 { 313 u_int lv = lock->lockval; 314 315 if (lv & HAMMER_LOCKF_EXCLUSIVE) 316 return(1); 317 else if (lv) 318 return(-1); 319 panic("hammer_lock_status: lock must be held: %p", lock); 320 } 321 322 /* 323 * Bump the ref count for a lock (not the excl/share count, but a separate 324 * structural reference count). The CHECK flag will be set on a 0->1 325 * transition. 326 * 327 * This function does nothing to serialize races between multple threads. 328 * The caller can interlock it later on to deal with serialization. 329 * 330 * MPSAFE 331 */ 332 void 333 hammer_ref(struct hammer_lock *lock) 334 { 335 u_int lv; 336 u_int nlv; 337 338 for (;;) { 339 lv = lock->refs; 340 if ((lv & ~HAMMER_REFS_FLAGS) == 0) { 341 nlv = (lv + 1) | HAMMER_REFS_CHECK; 342 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 343 return; 344 } else { 345 nlv = (lv + 1); 346 KKASSERT((int)nlv > 0); 347 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 348 return; 349 } 350 } 351 /* not reached */ 352 } 353 354 /* 355 * Drop the ref count for a lock (not the excl/share count, but a separate 356 * structural reference count). The CHECK flag will be cleared on a 1->0 357 * transition. 358 * 359 * This function does nothing to serialize races between multple threads. 360 * 361 * MPSAFE 362 */ 363 void 364 hammer_rel(struct hammer_lock *lock) 365 { 366 u_int lv; 367 u_int nlv; 368 369 for (;;) { 370 lv = lock->refs; 371 if ((lv & ~HAMMER_REFS_FLAGS) == 1) { 372 nlv = (lv - 1) & ~HAMMER_REFS_CHECK; 373 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 374 return; 375 } else { 376 KKASSERT((int)lv > 0); 377 nlv = (lv - 1); 378 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 379 return; 380 } 381 } 382 /* not reached */ 383 } 384 385 /* 386 * The hammer_*_interlock() and hammer_*_interlock_done() functions are 387 * more sophisticated versions which handle MP transition races and block 388 * when necessary. 389 * 390 * hammer_ref_interlock() bumps the ref-count and conditionally acquires 391 * the interlock for 0->1 transitions or if the CHECK is found to be set. 392 * 393 * This case will return TRUE, the interlock will be held, and the CHECK 394 * bit also set. Other threads attempting to ref will see the CHECK bit 395 * and block until we clean up. 396 * 397 * FALSE is returned for transitions other than 0->1 when the CHECK bit 398 * is not found to be set, or if the function loses the race with another 399 * thread. 400 * 401 * TRUE is only returned to one thread and the others will block. 402 * Effectively a TRUE indicator means 'someone transitioned 0->1 403 * and you are the first guy to successfully lock it after that, so you 404 * need to check'. Due to races the ref-count may be greater than 1 upon 405 * return. 406 * 407 * MPSAFE 408 */ 409 int 410 hammer_ref_interlock(struct hammer_lock *lock) 411 { 412 u_int lv; 413 u_int nlv; 414 415 /* 416 * Integrated reference count bump, lock, and check, with hot-path. 417 * 418 * (a) Return 1 (+LOCKED, +CHECK) 0->1 transition 419 * (b) Return 0 (-LOCKED, -CHECK) N->N+1 transition 420 * (c) Break out (+CHECK) Check condition and Cannot lock 421 * (d) Return 1 (+LOCKED, +CHECK) Successfully locked 422 */ 423 for (;;) { 424 lv = lock->refs; 425 if (lv == 0) { 426 nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK; 427 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 428 lock->rowner = curthread; 429 return(1); 430 } 431 } else { 432 nlv = (lv + 1); 433 if ((lv & ~HAMMER_REFS_FLAGS) == 0) 434 nlv |= HAMMER_REFS_CHECK; 435 if ((nlv & HAMMER_REFS_CHECK) == 0) { 436 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 437 return(0); 438 } else if (lv & HAMMER_REFS_LOCKED) { 439 /* CHECK also set here */ 440 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 441 break; 442 } else { 443 /* CHECK also set here */ 444 nlv |= HAMMER_REFS_LOCKED; 445 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 446 lock->rowner = curthread; 447 return(1); 448 } 449 } 450 } 451 } 452 453 /* 454 * Defered check condition because we were unable to acquire the 455 * lock. We must block until the check condition is cleared due 456 * to a race with another thread, or we are able to acquire the 457 * lock. 458 * 459 * (a) Return 0 (-CHECK) Another thread handled it 460 * (b) Return 1 (+LOCKED, +CHECK) We handled it. 461 */ 462 for (;;) { 463 lv = lock->refs; 464 if ((lv & HAMMER_REFS_CHECK) == 0) 465 return(0); 466 if (lv & HAMMER_REFS_LOCKED) { 467 tsleep_interlock(&lock->refs, 0); 468 nlv = (lv | HAMMER_REFS_WANTED); 469 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 470 tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0); 471 } else { 472 /* CHECK also set here */ 473 nlv = lv | HAMMER_REFS_LOCKED; 474 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 475 lock->rowner = curthread; 476 return(1); 477 } 478 } 479 } 480 /* not reached */ 481 } 482 483 /* 484 * This is the same as hammer_ref_interlock() but asserts that the 485 * 0->1 transition is always true, thus the lock must have no references 486 * on entry or have CHECK set, and will have one reference with the 487 * interlock held on return. It must also not be interlocked on entry 488 * by anyone. 489 * 490 * NOTE that CHECK will never be found set when the ref-count is 0. 491 * 492 * TRUE is always returned to match the API for hammer_ref_interlock(). 493 * This function returns with one ref, the lock held, and the CHECK bit set. 494 */ 495 int 496 hammer_ref_interlock_true(struct hammer_lock *lock) 497 { 498 u_int lv; 499 u_int nlv; 500 501 for (;;) { 502 lv = lock->refs; 503 504 if (lv) { 505 panic("hammer_ref_interlock_true: bad lock %p %08x", 506 lock, lock->refs); 507 } 508 nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK; 509 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 510 lock->rowner = curthread; 511 return (1); 512 } 513 } 514 } 515 516 /* 517 * Unlock the interlock acquired by hammer_ref_interlock() and clear the 518 * CHECK flag. The ref-count remains unchanged. 519 * 520 * This routine is called in the load path when the load succeeds. 521 */ 522 void 523 hammer_ref_interlock_done(struct hammer_lock *lock) 524 { 525 u_int lv; 526 u_int nlv; 527 528 for (;;) { 529 lv = lock->refs; 530 nlv = lv & ~HAMMER_REFS_FLAGS; 531 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 532 if (lv & HAMMER_REFS_WANTED) 533 wakeup(&lock->refs); 534 break; 535 } 536 } 537 } 538 539 /* 540 * hammer_rel_interlock() works a bit differently in that it must 541 * acquire the lock in tandem with a 1->0 transition. CHECK is 542 * not used. 543 * 544 * TRUE is returned on 1->0 transitions with the lock held on return 545 * and FALSE is returned otherwise with the lock not held. 546 * 547 * It is important to note that the refs are not stable and may 548 * increase while we hold the lock, the TRUE indication only means 549 * that we transitioned 1->0, not necessarily that we stayed at 0. 550 * 551 * Another thread bumping refs while we hold the lock will set CHECK, 552 * causing one of the competing hammer_ref_interlock() calls to 553 * return TRUE after we release our lock. 554 * 555 * MPSAFE 556 */ 557 int 558 hammer_rel_interlock(struct hammer_lock *lock, int locked) 559 { 560 u_int lv; 561 u_int nlv; 562 563 /* 564 * In locked mode (failure/unload path) we release the 565 * ref-count but leave it locked. 566 */ 567 if (locked) { 568 hammer_rel(lock); 569 return(1); 570 } 571 572 /* 573 * Integrated reference count drop with LOCKED, plus the hot-path 574 * returns. 575 */ 576 for (;;) { 577 lv = lock->refs; 578 579 if (lv == 1) { 580 nlv = 0 | HAMMER_REFS_LOCKED; 581 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 582 lock->rowner = curthread; 583 return(1); 584 } 585 } else if ((lv & ~HAMMER_REFS_FLAGS) == 1) { 586 if ((lv & HAMMER_REFS_LOCKED) == 0) { 587 nlv = (lv - 1) | HAMMER_REFS_LOCKED; 588 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 589 lock->rowner = curthread; 590 return(1); 591 } 592 } else { 593 nlv = lv | HAMMER_REFS_WANTED; 594 tsleep_interlock(&lock->refs, 0); 595 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 596 tsleep(&lock->refs, PINTERLOCKED, 597 "h0lk", 0); 598 } 599 } 600 } else { 601 nlv = (lv - 1); 602 KKASSERT((int)nlv >= 0); 603 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 604 return(0); 605 } 606 } 607 /* not reached */ 608 } 609 610 /* 611 * Unlock the interlock acquired by hammer_rel_interlock(). 612 * 613 * If orig_locked is non-zero the interlock was originally held prior to 614 * the hammer_rel_interlock() call and passed through to us. In this 615 * case we want to retain the CHECK error state if not transitioning 616 * to 0. 617 * 618 * The code is the same either way so we do not have to conditionalize 619 * on orig_locked. 620 */ 621 void 622 hammer_rel_interlock_done(struct hammer_lock *lock, int orig_locked __unused) 623 { 624 u_int lv; 625 u_int nlv; 626 627 for (;;) { 628 lv = lock->refs; 629 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED); 630 if ((lv & ~HAMMER_REFS_FLAGS) == 0) 631 nlv &= ~HAMMER_REFS_CHECK; 632 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 633 if (lv & HAMMER_REFS_WANTED) 634 wakeup(&lock->refs); 635 break; 636 } 637 } 638 } 639 640 /* 641 * Acquire the interlock on lock->refs. 642 * 643 * Return TRUE if CHECK is currently set. Note that CHECK will not 644 * be set if the reference count is 0, but can get set if this function 645 * is preceeded by, say, hammer_ref(), or through races with other 646 * threads. The return value allows the caller to use the same logic 647 * as hammer_ref_interlock(). 648 * 649 * MPSAFE 650 */ 651 int 652 hammer_get_interlock(struct hammer_lock *lock) 653 { 654 u_int lv; 655 u_int nlv; 656 657 for (;;) { 658 lv = lock->refs; 659 if (lv & HAMMER_REFS_LOCKED) { 660 nlv = lv | HAMMER_REFS_WANTED; 661 tsleep_interlock(&lock->refs, 0); 662 if (atomic_cmpset_int(&lock->refs, lv, nlv)) 663 tsleep(&lock->refs, PINTERLOCKED, "hilk", 0); 664 } else { 665 nlv = (lv | HAMMER_REFS_LOCKED); 666 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 667 lock->rowner = curthread; 668 return((lv & HAMMER_REFS_CHECK) ? 1 : 0); 669 } 670 } 671 } 672 } 673 674 /* 675 * Attempt to acquire the interlock and expect 0 refs. Used by the buffer 676 * cache callback code to disassociate or lock the bufs related to HAMMER 677 * structures. 678 * 679 * During teardown the related bp will be acquired by hammer_io_release() 680 * which interocks our test. 681 * 682 * Returns non-zero on success, zero on failure. 683 */ 684 int 685 hammer_try_interlock_norefs(struct hammer_lock *lock) 686 { 687 u_int lv; 688 u_int nlv; 689 690 for (;;) { 691 lv = lock->refs; 692 if (lv == 0) { 693 nlv = lv | HAMMER_REFS_LOCKED; 694 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 695 lock->rowner = curthread; 696 return(1); 697 } 698 } else { 699 return(0); 700 } 701 } 702 /* not reached */ 703 } 704 705 /* 706 * Release the interlock on lock->refs. This function will set 707 * CHECK if the refs is non-zero and error is non-zero, and clear 708 * CHECK otherwise. 709 * 710 * MPSAFE 711 */ 712 void 713 hammer_put_interlock(struct hammer_lock *lock, int error) 714 { 715 u_int lv; 716 u_int nlv; 717 718 for (;;) { 719 lv = lock->refs; 720 KKASSERT(lv & HAMMER_REFS_LOCKED); 721 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED); 722 723 if ((nlv & ~HAMMER_REFS_FLAGS) == 0 || error == 0) 724 nlv &= ~HAMMER_REFS_CHECK; 725 else 726 nlv |= HAMMER_REFS_CHECK; 727 728 if (atomic_cmpset_int(&lock->refs, lv, nlv)) { 729 if (lv & HAMMER_REFS_WANTED) 730 wakeup(&lock->refs); 731 return; 732 } 733 } 734 } 735 736 /* 737 * The sync_lock must be held when doing any modifying operations on 738 * meta-data. It does not have to be held when modifying non-meta-data buffers 739 * (backend or frontend). 740 * 741 * The flusher holds the lock exclusively while all other consumers hold it 742 * shared. All modifying operations made while holding the lock are atomic 743 * in that they will be made part of the same flush group. 744 * 745 * Due to the atomicy requirement deadlock recovery code CANNOT release the 746 * sync lock, nor can we give pending exclusive sync locks priority over 747 * a shared sync lock as this could lead to a 3-way deadlock. 748 */ 749 void 750 hammer_sync_lock_ex(hammer_transaction_t trans) 751 { 752 ++trans->sync_lock_refs; 753 hammer_lock_ex(&trans->hmp->sync_lock); 754 } 755 756 void 757 hammer_sync_lock_sh(hammer_transaction_t trans) 758 { 759 ++trans->sync_lock_refs; 760 hammer_lock_sh(&trans->hmp->sync_lock); 761 } 762 763 int 764 hammer_sync_lock_sh_try(hammer_transaction_t trans) 765 { 766 int error; 767 768 ++trans->sync_lock_refs; 769 if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0) 770 --trans->sync_lock_refs; 771 return (error); 772 } 773 774 void 775 hammer_sync_unlock(hammer_transaction_t trans) 776 { 777 --trans->sync_lock_refs; 778 hammer_unlock(&trans->hmp->sync_lock); 779 } 780 781 /* 782 * Misc 783 */ 784 u_int32_t 785 hammer_to_unix_xid(uuid_t *uuid) 786 { 787 return(*(u_int32_t *)&uuid->node[2]); 788 } 789 790 void 791 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid) 792 { 793 bzero(uuid, sizeof(*uuid)); 794 *(u_int32_t *)&uuid->node[2] = guid; 795 } 796 797 void 798 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts) 799 { 800 ts->tv_sec = (unsigned long)(xtime / 1000000); 801 ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L; 802 } 803 804 u_int64_t 805 hammer_timespec_to_time(struct timespec *ts) 806 { 807 u_int64_t xtime; 808 809 xtime = (unsigned)(ts->tv_nsec / 1000) + 810 (unsigned long)ts->tv_sec * 1000000ULL; 811 return(xtime); 812 } 813 814 815 /* 816 * Convert a HAMMER filesystem object type to a vnode type 817 */ 818 enum vtype 819 hammer_get_vnode_type(u_int8_t obj_type) 820 { 821 switch(obj_type) { 822 case HAMMER_OBJTYPE_DIRECTORY: 823 return(VDIR); 824 case HAMMER_OBJTYPE_REGFILE: 825 return(VREG); 826 case HAMMER_OBJTYPE_DBFILE: 827 return(VDATABASE); 828 case HAMMER_OBJTYPE_FIFO: 829 return(VFIFO); 830 case HAMMER_OBJTYPE_SOCKET: 831 return(VSOCK); 832 case HAMMER_OBJTYPE_CDEV: 833 return(VCHR); 834 case HAMMER_OBJTYPE_BDEV: 835 return(VBLK); 836 case HAMMER_OBJTYPE_SOFTLINK: 837 return(VLNK); 838 default: 839 return(VBAD); 840 } 841 /* not reached */ 842 } 843 844 int 845 hammer_get_dtype(u_int8_t obj_type) 846 { 847 switch(obj_type) { 848 case HAMMER_OBJTYPE_DIRECTORY: 849 return(DT_DIR); 850 case HAMMER_OBJTYPE_REGFILE: 851 return(DT_REG); 852 case HAMMER_OBJTYPE_DBFILE: 853 return(DT_DBF); 854 case HAMMER_OBJTYPE_FIFO: 855 return(DT_FIFO); 856 case HAMMER_OBJTYPE_SOCKET: 857 return(DT_SOCK); 858 case HAMMER_OBJTYPE_CDEV: 859 return(DT_CHR); 860 case HAMMER_OBJTYPE_BDEV: 861 return(DT_BLK); 862 case HAMMER_OBJTYPE_SOFTLINK: 863 return(DT_LNK); 864 default: 865 return(DT_UNKNOWN); 866 } 867 /* not reached */ 868 } 869 870 u_int8_t 871 hammer_get_obj_type(enum vtype vtype) 872 { 873 switch(vtype) { 874 case VDIR: 875 return(HAMMER_OBJTYPE_DIRECTORY); 876 case VREG: 877 return(HAMMER_OBJTYPE_REGFILE); 878 case VDATABASE: 879 return(HAMMER_OBJTYPE_DBFILE); 880 case VFIFO: 881 return(HAMMER_OBJTYPE_FIFO); 882 case VSOCK: 883 return(HAMMER_OBJTYPE_SOCKET); 884 case VCHR: 885 return(HAMMER_OBJTYPE_CDEV); 886 case VBLK: 887 return(HAMMER_OBJTYPE_BDEV); 888 case VLNK: 889 return(HAMMER_OBJTYPE_SOFTLINK); 890 default: 891 return(HAMMER_OBJTYPE_UNKNOWN); 892 } 893 /* not reached */ 894 } 895 896 /* 897 * Return flags for hammer_delete_at_cursor() 898 */ 899 int 900 hammer_nohistory(hammer_inode_t ip) 901 { 902 if (ip->hmp->hflags & HMNT_NOHISTORY) 903 return(HAMMER_DELETE_DESTROY); 904 if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY)) 905 return(HAMMER_DELETE_DESTROY); 906 return(0); 907 } 908 909 /* 910 * ALGORITHM VERSION 0: 911 * Return a namekey hash. The 64 bit namekey hash consists of a 32 bit 912 * crc in the MSB and 0 in the LSB. The caller will use the low 32 bits 913 * to generate a unique key and will scan all entries with the same upper 914 * 32 bits when issuing a lookup. 915 * 916 * 0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000 917 * 918 * ALGORITHM VERSION 1: 919 * 920 * This algorithm breaks the filename down into a separate 32-bit crcs 921 * for each filename segment separated by a special character (dot, 922 * underscore, underline, or tilde). The CRCs are then added together. 923 * This allows temporary names. A full-filename 16 bit crc is also 924 * generated to deal with degenerate conditions. 925 * 926 * The algorithm is designed to handle create/rename situations such 927 * that a create with an extention to a rename without an extention 928 * only shifts the key space rather than randomizes it. 929 * 930 * NOTE: The inode allocator cache can only match 10 bits so we do 931 * not really have any room for a partial sorted name, and 932 * numbers don't sort well in that situation anyway. 933 * 934 * 0mmmmmmmmmmmmmmm mmmmmmmmmmmmmmmm llllllllllllllll 0000000000000000 935 * 936 * 937 * We strip bit 63 in order to provide a positive key, this way a seek 938 * offset of 0 will represent the base of the directory. 939 * 940 * We usually strip bit 0 (set it to 0) in order to provide a consistent 941 * iteration space for collisions. 942 * 943 * This function can never return 0. We use the MSB-0 space to synthesize 944 * artificial directory entries such as "." and "..". 945 */ 946 int64_t 947 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len, 948 u_int32_t *max_iterationsp) 949 { 950 const char *aname = name; 951 int32_t crcx; 952 int64_t key; 953 int i; 954 int j; 955 956 switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) { 957 case HAMMER_INODE_CAP_DIRHASH_ALG0: 958 /* 959 * Original algorithm 960 */ 961 key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32; 962 if (key == 0) 963 key |= 0x100000000LL; 964 *max_iterationsp = 0xFFFFFFFFU; 965 break; 966 case HAMMER_INODE_CAP_DIRHASH_ALG1: 967 /* 968 * Filesystem version 6 or better will create directories 969 * using the ALG1 dirhash. This hash breaks the filename 970 * up into domains separated by special characters and 971 * hashes each domain independently. 972 * 973 * We also do a simple sub-sort using the first character 974 * of the filename in the top 5-bits. 975 */ 976 key = 0; 977 978 /* 979 * m32 980 */ 981 crcx = 0; 982 for (i = j = 0; i < len; ++i) { 983 if (aname[i] == '.' || 984 aname[i] == '-' || 985 aname[i] == '_' || 986 aname[i] == '~') { 987 if (i != j) 988 crcx += crc32(aname + j, i - j); 989 j = i + 1; 990 } 991 } 992 if (i != j) 993 crcx += crc32(aname + j, i - j); 994 995 #if 0 996 /* 997 * xor top 5 bits 0mmmm into low bits and steal the top 5 998 * bits as a semi sub sort using the first character of 999 * the filename. bit 63 is always left as 0 so directory 1000 * keys are positive numbers. 1001 */ 1002 crcx ^= (uint32_t)crcx >> (32 - 5); 1003 crcx = (crcx & 0x07FFFFFF) | ((aname[0] & 0x0F) << (32 - 5)); 1004 #endif 1005 crcx &= 0x7FFFFFFFU; 1006 1007 key |= (uint64_t)crcx << 32; 1008 1009 /* 1010 * l16 - crc of entire filename 1011 * 1012 * This crc reduces degenerate hash collision conditions 1013 */ 1014 crcx = crc32(aname, len); 1015 crcx = crcx ^ (crcx << 16); 1016 key |= crcx & 0xFFFF0000U; 1017 1018 /* 1019 * Cleanup 1020 */ 1021 if ((key & 0xFFFFFFFF00000000LL) == 0) 1022 key |= 0x100000000LL; 1023 if (hammer_debug_general & 0x0400) { 1024 kprintf("namekey2: 0x%016llx %*.*s\n", 1025 (long long)key, len, len, aname); 1026 } 1027 *max_iterationsp = 0x00FFFFFF; 1028 break; 1029 case HAMMER_INODE_CAP_DIRHASH_ALG2: 1030 case HAMMER_INODE_CAP_DIRHASH_ALG3: 1031 default: 1032 key = 0; /* compiler warning */ 1033 *max_iterationsp = 1; /* sanity */ 1034 panic("hammer_directory_namekey: bad algorithm %p", dip); 1035 break; 1036 } 1037 return(key); 1038 } 1039 1040 /* 1041 * Convert string after @@ (@@ not included) to TID. Returns 0 on success, 1042 * EINVAL on failure. 1043 * 1044 * If this function fails *ispfs, *tidp, and *localizationp will not 1045 * be modified. 1046 */ 1047 int 1048 hammer_str_to_tid(const char *str, int *ispfsp, 1049 hammer_tid_t *tidp, u_int32_t *localizationp) 1050 { 1051 hammer_tid_t tid; 1052 u_int32_t localization; 1053 char *ptr; 1054 int ispfs; 1055 int n; 1056 1057 /* 1058 * Forms allowed for TID: "0x%016llx" 1059 * "-1" 1060 */ 1061 tid = strtouq(str, &ptr, 0); 1062 n = ptr - str; 1063 if (n == 2 && str[0] == '-' && str[1] == '1') { 1064 /* ok */ 1065 } else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') { 1066 /* ok */ 1067 } else { 1068 return(EINVAL); 1069 } 1070 1071 /* 1072 * Forms allowed for PFS: ":%05d" (i.e. "...:0" would be illegal). 1073 */ 1074 str = ptr; 1075 if (*str == ':') { 1076 localization = strtoul(str + 1, &ptr, 10) << 16; 1077 if (ptr - str != 6) 1078 return(EINVAL); 1079 str = ptr; 1080 ispfs = 1; 1081 } else { 1082 localization = *localizationp; 1083 ispfs = 0; 1084 } 1085 1086 /* 1087 * Any trailing junk invalidates special extension handling. 1088 */ 1089 if (*str) 1090 return(EINVAL); 1091 *tidp = tid; 1092 *localizationp = localization; 1093 *ispfsp = ispfs; 1094 return(0); 1095 } 1096 1097 void 1098 hammer_crc_set_blockmap(hammer_blockmap_t blockmap) 1099 { 1100 blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE); 1101 } 1102 1103 void 1104 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk) 1105 { 1106 ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^ 1107 crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2); 1108 } 1109 1110 int 1111 hammer_crc_test_blockmap(hammer_blockmap_t blockmap) 1112 { 1113 hammer_crc_t crc; 1114 1115 crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE); 1116 return (blockmap->entry_crc == crc); 1117 } 1118 1119 int 1120 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk) 1121 { 1122 hammer_crc_t crc; 1123 1124 crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^ 1125 crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2); 1126 return (ondisk->vol_crc == crc); 1127 } 1128 1129 int 1130 hammer_crc_test_btree(hammer_node_ondisk_t ondisk) 1131 { 1132 hammer_crc_t crc; 1133 1134 crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE); 1135 return (ondisk->crc == crc); 1136 } 1137 1138 /* 1139 * Test or set the leaf->data_crc field. Deal with any special cases given 1140 * a generic B-Tree leaf element and its data. 1141 * 1142 * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them 1143 * to be updated in-place. 1144 */ 1145 int 1146 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf) 1147 { 1148 hammer_crc_t crc; 1149 1150 if (leaf->data_len == 0) { 1151 crc = 0; 1152 } else { 1153 switch(leaf->base.rec_type) { 1154 case HAMMER_RECTYPE_INODE: 1155 if (leaf->data_len != sizeof(struct hammer_inode_data)) 1156 return(0); 1157 crc = crc32(data, HAMMER_INODE_CRCSIZE); 1158 break; 1159 default: 1160 crc = crc32(data, leaf->data_len); 1161 break; 1162 } 1163 } 1164 return (leaf->data_crc == crc); 1165 } 1166 1167 void 1168 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf) 1169 { 1170 if (leaf->data_len == 0) { 1171 leaf->data_crc = 0; 1172 } else { 1173 switch(leaf->base.rec_type) { 1174 case HAMMER_RECTYPE_INODE: 1175 KKASSERT(leaf->data_len == 1176 sizeof(struct hammer_inode_data)); 1177 leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE); 1178 break; 1179 default: 1180 leaf->data_crc = crc32(data, leaf->data_len); 1181 break; 1182 } 1183 } 1184 } 1185 1186 void 1187 hkprintf(const char *ctl, ...) 1188 { 1189 __va_list va; 1190 1191 if (hammer_debug_debug) { 1192 __va_start(va, ctl); 1193 kvprintf(ctl, va); 1194 __va_end(va); 1195 } 1196 } 1197 1198 /* 1199 * Return the block size at the specified file offset. 1200 */ 1201 int 1202 hammer_blocksize(int64_t file_offset) 1203 { 1204 if (file_offset < HAMMER_XDEMARC) 1205 return(HAMMER_BUFSIZE); 1206 else 1207 return(HAMMER_XBUFSIZE); 1208 } 1209 1210 int 1211 hammer_blockoff(int64_t file_offset) 1212 { 1213 if (file_offset < HAMMER_XDEMARC) 1214 return((int)file_offset & HAMMER_BUFMASK); 1215 else 1216 return((int)file_offset & HAMMER_XBUFMASK); 1217 } 1218 1219 /* 1220 * Return the demarkation point between the two offsets where 1221 * the block size changes. 1222 */ 1223 int64_t 1224 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2) 1225 { 1226 if (file_offset1 < HAMMER_XDEMARC) { 1227 if (file_offset2 <= HAMMER_XDEMARC) 1228 return(file_offset2); 1229 return(HAMMER_XDEMARC); 1230 } 1231 panic("hammer_blockdemarc: illegal range %lld %lld", 1232 (long long)file_offset1, (long long)file_offset2); 1233 } 1234 1235 udev_t 1236 hammer_fsid_to_udev(uuid_t *uuid) 1237 { 1238 u_int32_t crc; 1239 1240 crc = crc32(uuid, sizeof(*uuid)); 1241 return((udev_t)crc); 1242 } 1243 1244