1 /* 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.35 2008/10/15 22:38:37 dillon Exp $ 35 */ 36 /* 37 * HAMMER structural locking 38 */ 39 40 #include "hammer.h" 41 #include <sys/dirent.h> 42 43 void 44 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident) 45 { 46 thread_t td = curthread; 47 48 KKASSERT(lock->refs > 0); 49 crit_enter(); 50 if (lock->locktd != td) { 51 while (lock->locktd != NULL || lock->lockcount) { 52 ++lock->exwanted; 53 lock->wanted = 1; 54 if (hammer_debug_locks) { 55 kprintf("hammer_lock_ex: held by %p\n", 56 lock->locktd); 57 } 58 ++hammer_contention_count; 59 tsleep(lock, 0, ident, 0); 60 if (hammer_debug_locks) 61 kprintf("hammer_lock_ex: try again\n"); 62 --lock->exwanted; 63 } 64 lock->locktd = td; 65 } 66 KKASSERT(lock->lockcount >= 0); 67 ++lock->lockcount; 68 crit_exit(); 69 } 70 71 /* 72 * Try to obtain an exclusive lock 73 */ 74 int 75 hammer_lock_ex_try(struct hammer_lock *lock) 76 { 77 thread_t td = curthread; 78 79 KKASSERT(lock->refs > 0); 80 crit_enter(); 81 if (lock->locktd != td) { 82 if (lock->locktd != NULL || lock->lockcount) { 83 crit_exit(); 84 return(EAGAIN); 85 } 86 lock->locktd = td; 87 } 88 KKASSERT(lock->lockcount >= 0); 89 ++lock->lockcount; 90 crit_exit(); 91 return(0); 92 } 93 94 /* 95 * Obtain a shared lock 96 * 97 * We do not give pending exclusive locks priority over shared locks as 98 * doing so could lead to a deadlock. 99 */ 100 void 101 hammer_lock_sh(struct hammer_lock *lock) 102 { 103 KKASSERT(lock->refs > 0); 104 crit_enter(); 105 while (lock->locktd != NULL) { 106 if (lock->locktd == curthread) { 107 Debugger("hammer_lock_sh: lock_sh on exclusive"); 108 ++lock->lockcount; 109 crit_exit(); 110 return; 111 } 112 lock->wanted = 1; 113 tsleep(lock, 0, "hmrlck", 0); 114 } 115 KKASSERT(lock->lockcount <= 0); 116 --lock->lockcount; 117 crit_exit(); 118 } 119 120 int 121 hammer_lock_sh_try(struct hammer_lock *lock) 122 { 123 KKASSERT(lock->refs > 0); 124 crit_enter(); 125 if (lock->locktd) { 126 crit_exit(); 127 return(EAGAIN); 128 } 129 KKASSERT(lock->lockcount <= 0); 130 --lock->lockcount; 131 crit_exit(); 132 return(0); 133 } 134 135 /* 136 * Upgrade a shared lock to an exclusively held lock. This function will 137 * return EDEADLK If there is more then one shared holder. 138 * 139 * No error occurs and no action is taken if the lock is already exclusively 140 * held by the caller. If the lock is not held at all or held exclusively 141 * by someone else, this function will panic. 142 */ 143 int 144 hammer_lock_upgrade(struct hammer_lock *lock) 145 { 146 int error; 147 148 crit_enter(); 149 if (lock->lockcount > 0) { 150 if (lock->locktd != curthread) 151 panic("hammer_lock_upgrade: illegal lock state"); 152 error = 0; 153 } else if (lock->lockcount == -1) { 154 lock->lockcount = 1; 155 lock->locktd = curthread; 156 error = 0; 157 } else if (lock->lockcount != 0) { 158 error = EDEADLK; 159 } else { 160 panic("hammer_lock_upgrade: lock is not held"); 161 /* NOT REACHED */ 162 error = 0; 163 } 164 crit_exit(); 165 return(error); 166 } 167 168 /* 169 * Downgrade an exclusively held lock to a shared lock. 170 */ 171 void 172 hammer_lock_downgrade(struct hammer_lock *lock) 173 { 174 KKASSERT(lock->lockcount == 1 && lock->locktd == curthread); 175 crit_enter(); 176 lock->lockcount = -1; 177 lock->locktd = NULL; 178 if (lock->wanted) { 179 lock->wanted = 0; 180 wakeup(lock); 181 } 182 crit_exit(); 183 /* XXX memory barrier */ 184 } 185 186 void 187 hammer_unlock(struct hammer_lock *lock) 188 { 189 crit_enter(); 190 KKASSERT(lock->lockcount != 0); 191 if (lock->lockcount < 0) { 192 if (++lock->lockcount == 0 && lock->wanted) { 193 lock->wanted = 0; 194 wakeup(lock); 195 } 196 } else { 197 KKASSERT(lock->locktd == curthread); 198 if (--lock->lockcount == 0) { 199 lock->locktd = NULL; 200 if (lock->wanted) { 201 lock->wanted = 0; 202 wakeup(lock); 203 } 204 } 205 206 } 207 crit_exit(); 208 } 209 210 /* 211 * The calling thread must be holding a shared or exclusive lock. 212 * Returns < 0 if lock is held shared, and > 0 if held exlusively. 213 */ 214 int 215 hammer_lock_status(struct hammer_lock *lock) 216 { 217 if (lock->lockcount < 0) 218 return(-1); 219 if (lock->lockcount > 0) 220 return(1); 221 panic("hammer_lock_status: lock must be held: %p", lock); 222 } 223 224 void 225 hammer_ref(struct hammer_lock *lock) 226 { 227 KKASSERT(lock->refs >= 0); 228 crit_enter(); 229 ++lock->refs; 230 crit_exit(); 231 } 232 233 void 234 hammer_unref(struct hammer_lock *lock) 235 { 236 KKASSERT(lock->refs > 0); 237 crit_enter(); 238 --lock->refs; 239 crit_exit(); 240 } 241 242 /* 243 * The sync_lock must be held when doing any modifying operations on 244 * meta-data. It does not have to be held when modifying non-meta-data buffers 245 * (backend or frontend). 246 * 247 * The flusher holds the lock exclusively while all other consumers hold it 248 * shared. All modifying operations made while holding the lock are atomic 249 * in that they will be made part of the same flush group. 250 * 251 * Due to the atomicy requirement deadlock recovery code CANNOT release the 252 * sync lock, nor can we give pending exclusive sync locks priority over 253 * a shared sync lock as this could lead to a 3-way deadlock. 254 */ 255 void 256 hammer_sync_lock_ex(hammer_transaction_t trans) 257 { 258 ++trans->sync_lock_refs; 259 hammer_lock_ex(&trans->hmp->sync_lock); 260 } 261 262 void 263 hammer_sync_lock_sh(hammer_transaction_t trans) 264 { 265 ++trans->sync_lock_refs; 266 hammer_lock_sh(&trans->hmp->sync_lock); 267 } 268 269 int 270 hammer_sync_lock_sh_try(hammer_transaction_t trans) 271 { 272 int error; 273 274 ++trans->sync_lock_refs; 275 if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0) 276 --trans->sync_lock_refs; 277 return (error); 278 } 279 280 void 281 hammer_sync_unlock(hammer_transaction_t trans) 282 { 283 --trans->sync_lock_refs; 284 hammer_unlock(&trans->hmp->sync_lock); 285 } 286 287 /* 288 * Misc 289 */ 290 u_int32_t 291 hammer_to_unix_xid(uuid_t *uuid) 292 { 293 return(*(u_int32_t *)&uuid->node[2]); 294 } 295 296 void 297 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid) 298 { 299 bzero(uuid, sizeof(*uuid)); 300 *(u_int32_t *)&uuid->node[2] = guid; 301 } 302 303 void 304 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts) 305 { 306 ts->tv_sec = (unsigned long)(xtime / 1000000); 307 ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L; 308 } 309 310 u_int64_t 311 hammer_timespec_to_time(struct timespec *ts) 312 { 313 u_int64_t xtime; 314 315 xtime = (unsigned)(ts->tv_nsec / 1000) + 316 (unsigned long)ts->tv_sec * 1000000ULL; 317 return(xtime); 318 } 319 320 321 /* 322 * Convert a HAMMER filesystem object type to a vnode type 323 */ 324 enum vtype 325 hammer_get_vnode_type(u_int8_t obj_type) 326 { 327 switch(obj_type) { 328 case HAMMER_OBJTYPE_DIRECTORY: 329 return(VDIR); 330 case HAMMER_OBJTYPE_REGFILE: 331 return(VREG); 332 case HAMMER_OBJTYPE_DBFILE: 333 return(VDATABASE); 334 case HAMMER_OBJTYPE_FIFO: 335 return(VFIFO); 336 case HAMMER_OBJTYPE_SOCKET: 337 return(VSOCK); 338 case HAMMER_OBJTYPE_CDEV: 339 return(VCHR); 340 case HAMMER_OBJTYPE_BDEV: 341 return(VBLK); 342 case HAMMER_OBJTYPE_SOFTLINK: 343 return(VLNK); 344 default: 345 return(VBAD); 346 } 347 /* not reached */ 348 } 349 350 int 351 hammer_get_dtype(u_int8_t obj_type) 352 { 353 switch(obj_type) { 354 case HAMMER_OBJTYPE_DIRECTORY: 355 return(DT_DIR); 356 case HAMMER_OBJTYPE_REGFILE: 357 return(DT_REG); 358 case HAMMER_OBJTYPE_DBFILE: 359 return(DT_DBF); 360 case HAMMER_OBJTYPE_FIFO: 361 return(DT_FIFO); 362 case HAMMER_OBJTYPE_SOCKET: 363 return(DT_SOCK); 364 case HAMMER_OBJTYPE_CDEV: 365 return(DT_CHR); 366 case HAMMER_OBJTYPE_BDEV: 367 return(DT_BLK); 368 case HAMMER_OBJTYPE_SOFTLINK: 369 return(DT_LNK); 370 default: 371 return(DT_UNKNOWN); 372 } 373 /* not reached */ 374 } 375 376 u_int8_t 377 hammer_get_obj_type(enum vtype vtype) 378 { 379 switch(vtype) { 380 case VDIR: 381 return(HAMMER_OBJTYPE_DIRECTORY); 382 case VREG: 383 return(HAMMER_OBJTYPE_REGFILE); 384 case VDATABASE: 385 return(HAMMER_OBJTYPE_DBFILE); 386 case VFIFO: 387 return(HAMMER_OBJTYPE_FIFO); 388 case VSOCK: 389 return(HAMMER_OBJTYPE_SOCKET); 390 case VCHR: 391 return(HAMMER_OBJTYPE_CDEV); 392 case VBLK: 393 return(HAMMER_OBJTYPE_BDEV); 394 case VLNK: 395 return(HAMMER_OBJTYPE_SOFTLINK); 396 default: 397 return(HAMMER_OBJTYPE_UNKNOWN); 398 } 399 /* not reached */ 400 } 401 402 /* 403 * Return flags for hammer_delete_at_cursor() 404 */ 405 int 406 hammer_nohistory(hammer_inode_t ip) 407 { 408 if (ip->hmp->hflags & HMNT_NOHISTORY) 409 return(HAMMER_DELETE_DESTROY); 410 if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY)) 411 return(HAMMER_DELETE_DESTROY); 412 return(0); 413 } 414 415 /* 416 * ALGORITHM VERSION 1: 417 * Return a namekey hash. The 64 bit namekey hash consists of a 32 bit 418 * crc in the MSB and 0 in the LSB. The caller will use the low 32 bits 419 * to generate a unique key and will scan all entries with the same upper 420 * 32 bits when issuing a lookup. 421 * 422 * 0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000 423 * 424 * ALGORITHM VERSION 2: 425 * 426 * The 64 bit hash key is generated from the following components. The 427 * first three characters are encoded as 5-bit quantities, the middle 428 * N characters are hashed into a 6 bit quantity, and the last two 429 * characters are encoded as 5-bit quantities. A 32 bit hash of the 430 * entire filename is encoded in the low 32 bits. Bit 0 is set to 431 * 0 to guarantee us a 2^24 bit iteration space. 432 * 433 * 0aaaaabbbbbccccc mmmmmmyyyyyzzzzz hhhhhhhhhhhhhhhh hhhhhhhhhhhhhhh0 434 * 435 * This gives us a domain sort for the first three characters, the last 436 * two characters, and breaks the middle space into 64 random domains. 437 * The domain sort folds upper case, lower case, digits, and punctuation 438 * spaces together, the idea being the filenames tend to not be a mix 439 * of those domains. 440 * 441 * The 64 random domains act as a sub-sort for the middle characters 442 * but may cause a random seek. If the filesystem is being accessed 443 * in sorted order we should tend to get very good linearity for most 444 * filenames and devolve into more random seeks otherwise. 445 * 446 * We strip bit 63 in order to provide a positive key, this way a seek 447 * offset of 0 will represent the base of the directory. 448 * 449 * This function can never return 0. We use the MSB-0 space to synthesize 450 * artificial directory entries such as "." and "..". 451 */ 452 int64_t 453 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len, 454 u_int32_t *max_iterationsp) 455 { 456 int64_t key; 457 int32_t crcx; 458 const char *aname = name; 459 460 switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) { 461 case HAMMER_INODE_CAP_DIRHASH_ALG0: 462 key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32; 463 if (key == 0) 464 key |= 0x100000000LL; 465 *max_iterationsp = 0xFFFFFFFFU; 466 break; 467 case HAMMER_INODE_CAP_DIRHASH_ALG1: 468 key = (u_int32_t)crc32(aname, len) & 0xFFFFFFFEU; 469 470 switch(len) { 471 default: 472 crcx = crc32(aname + 3, len - 5); 473 crcx = crcx ^ (crcx >> 6) ^ (crcx >> 12); 474 key |= (int64_t)(crcx & 0x3F) << 42; 475 /* fall through */ 476 case 5: 477 case 4: 478 /* fall through */ 479 case 3: 480 key |= ((int64_t)(aname[2] & 0x1F) << 48); 481 /* fall through */ 482 case 2: 483 key |= ((int64_t)(aname[1] & 0x1F) << 53) | 484 ((int64_t)(aname[len-2] & 0x1F) << 37); 485 /* fall through */ 486 case 1: 487 key |= ((int64_t)(aname[0] & 0x1F) << 58) | 488 ((int64_t)(aname[len-1] & 0x1F) << 32); 489 /* fall through */ 490 case 0: 491 break; 492 } 493 if ((key & 0xFFFFFFFF00000000LL) == 0) 494 key |= 0x100000000LL; 495 if (hammer_debug_general & 0x0400) { 496 kprintf("namekey2: 0x%016llx %*.*s\n", 497 key, len, len, aname); 498 } 499 *max_iterationsp = 0x00FFFFFF; 500 break; 501 case HAMMER_INODE_CAP_DIRHASH_ALG2: 502 case HAMMER_INODE_CAP_DIRHASH_ALG3: 503 default: 504 key = 0; /* compiler warning */ 505 *max_iterationsp = 1; /* sanity */ 506 panic("hammer_directory_namekey: bad algorithm %p\n", dip); 507 break; 508 } 509 return(key); 510 } 511 512 /* 513 * Convert string after @@ (@@ not included) to TID. Returns 0 on success, 514 * EINVAL on failure. 515 * 516 * If this function fails *ispfs, *tidp, and *localizationp will not 517 * be modified. 518 */ 519 int 520 hammer_str_to_tid(const char *str, int *ispfsp, 521 hammer_tid_t *tidp, u_int32_t *localizationp) 522 { 523 hammer_tid_t tid; 524 u_int32_t localization; 525 char *ptr; 526 int ispfs; 527 int n; 528 529 /* 530 * Forms allowed for TID: "0x%016llx" 531 * "-1" 532 */ 533 tid = strtouq(str, &ptr, 0); 534 n = ptr - str; 535 if (n == 2 && str[0] == '-' && str[1] == '1') { 536 /* ok */ 537 } else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') { 538 /* ok */ 539 } else { 540 return(EINVAL); 541 } 542 543 /* 544 * Forms allowed for PFS: ":%05d" (i.e. "...:0" would be illegal). 545 */ 546 str = ptr; 547 if (*str == ':') { 548 localization = strtoul(str + 1, &ptr, 10) << 16; 549 if (ptr - str != 6) 550 return(EINVAL); 551 str = ptr; 552 ispfs = 1; 553 } else { 554 localization = *localizationp; 555 ispfs = 0; 556 } 557 558 /* 559 * Any trailing junk invalidates special extension handling. 560 */ 561 if (*str) 562 return(EINVAL); 563 *tidp = tid; 564 *localizationp = localization; 565 *ispfsp = ispfs; 566 return(0); 567 } 568 569 void 570 hammer_crc_set_blockmap(hammer_blockmap_t blockmap) 571 { 572 blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE); 573 } 574 575 void 576 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk) 577 { 578 ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^ 579 crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2); 580 } 581 582 int 583 hammer_crc_test_blockmap(hammer_blockmap_t blockmap) 584 { 585 hammer_crc_t crc; 586 587 crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE); 588 return (blockmap->entry_crc == crc); 589 } 590 591 int 592 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk) 593 { 594 hammer_crc_t crc; 595 596 crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^ 597 crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2); 598 return (ondisk->vol_crc == crc); 599 } 600 601 int 602 hammer_crc_test_btree(hammer_node_ondisk_t ondisk) 603 { 604 hammer_crc_t crc; 605 606 crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE); 607 return (ondisk->crc == crc); 608 } 609 610 /* 611 * Test or set the leaf->data_crc field. Deal with any special cases given 612 * a generic B-Tree leaf element and its data. 613 * 614 * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them 615 * to be updated in-place. 616 */ 617 int 618 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf) 619 { 620 hammer_crc_t crc; 621 622 if (leaf->data_len == 0) { 623 crc = 0; 624 } else { 625 switch(leaf->base.rec_type) { 626 case HAMMER_RECTYPE_INODE: 627 if (leaf->data_len != sizeof(struct hammer_inode_data)) 628 return(0); 629 crc = crc32(data, HAMMER_INODE_CRCSIZE); 630 break; 631 default: 632 crc = crc32(data, leaf->data_len); 633 break; 634 } 635 } 636 return (leaf->data_crc == crc); 637 } 638 639 void 640 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf) 641 { 642 if (leaf->data_len == 0) { 643 leaf->data_crc = 0; 644 } else { 645 switch(leaf->base.rec_type) { 646 case HAMMER_RECTYPE_INODE: 647 KKASSERT(leaf->data_len == 648 sizeof(struct hammer_inode_data)); 649 leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE); 650 break; 651 default: 652 leaf->data_crc = crc32(data, leaf->data_len); 653 break; 654 } 655 } 656 } 657 658 void 659 hkprintf(const char *ctl, ...) 660 { 661 __va_list va; 662 663 if (hammer_debug_debug) { 664 __va_start(va, ctl); 665 kvprintf(ctl, va); 666 __va_end(va); 667 } 668 } 669 670 /* 671 * Return the block size at the specified file offset. 672 */ 673 int 674 hammer_blocksize(int64_t file_offset) 675 { 676 if (file_offset < HAMMER_XDEMARC) 677 return(HAMMER_BUFSIZE); 678 else 679 return(HAMMER_XBUFSIZE); 680 } 681 682 /* 683 * Return the demarkation point between the two offsets where 684 * the block size changes. 685 */ 686 int64_t 687 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2) 688 { 689 if (file_offset1 < HAMMER_XDEMARC) { 690 if (file_offset2 <= HAMMER_XDEMARC) 691 return(file_offset2); 692 return(HAMMER_XDEMARC); 693 } 694 panic("hammer_blockdemarc: illegal range %lld %lld\n", 695 file_offset1, file_offset2); 696 } 697 698 udev_t 699 hammer_fsid_to_udev(uuid_t *uuid) 700 { 701 u_int32_t crc; 702 703 crc = crc32(uuid, sizeof(*uuid)); 704 return((udev_t)crc); 705 } 706 707