1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer_util.h" 36 37 #include <sys/diskslice.h> 38 #include <sys/diskmbr.h> 39 40 static void check_volume(volume_info_t volume); 41 static void get_buffer_readahead(buffer_info_t base); 42 static __inline int readhammervol(volume_info_t volume); 43 static __inline int readhammerbuf(buffer_info_t buffer); 44 static __inline int writehammervol(volume_info_t volume); 45 static __inline int writehammerbuf(buffer_info_t buffer); 46 47 hammer_uuid_t Hammer_FSType; 48 hammer_uuid_t Hammer_FSId; 49 int UseReadBehind = -4; 50 int UseReadAhead = 4; 51 int DebugOpt; 52 uint32_t HammerVersion = -1; 53 54 TAILQ_HEAD(volume_list, volume_info); 55 static struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList); 56 static int valid_hammer_volumes; 57 58 static __inline 59 int 60 buffer_hash(hammer_off_t zone2_offset) 61 { 62 int hi; 63 64 hi = (int)(zone2_offset / HAMMER_BUFSIZE) & HAMMER_BUFLISTMASK; 65 return(hi); 66 } 67 68 static 69 buffer_info_t 70 find_buffer(hammer_off_t zone2_offset) 71 { 72 volume_info_t volume; 73 buffer_info_t buffer; 74 int hi; 75 76 volume = get_volume(HAMMER_VOL_DECODE(zone2_offset)); 77 assert(volume); 78 79 hi = buffer_hash(zone2_offset); 80 TAILQ_FOREACH(buffer, &volume->buffer_lists[hi], entry) { 81 if (buffer->zone2_offset == zone2_offset) 82 return(buffer); 83 } 84 return(NULL); 85 } 86 87 static 88 volume_info_t 89 __alloc_volume(const char *volname, int oflags) 90 { 91 volume_info_t volume; 92 int i; 93 94 volume = calloc(1, sizeof(*volume)); 95 volume->vol_no = -1; 96 volume->rdonly = (oflags == O_RDONLY); 97 volume->name = strdup(volname); 98 volume->fd = open(volume->name, oflags); 99 if (volume->fd < 0) { 100 err(1, "alloc_volume: Failed to open %s", volume->name); 101 /* not reached */ 102 } 103 check_volume(volume); 104 105 volume->ondisk = calloc(1, HAMMER_BUFSIZE); 106 107 for (i = 0; i < HAMMER_BUFLISTS; ++i) 108 TAILQ_INIT(&volume->buffer_lists[i]); 109 110 return(volume); 111 } 112 113 static 114 void 115 __add_volume(const volume_info_t volume) 116 { 117 volume_info_t scan; 118 struct stat st1, st2; 119 120 if (fstat(volume->fd, &st1) != 0) { 121 errx(1, "add_volume: %s: Failed to stat", volume->name); 122 /* not reached */ 123 } 124 125 TAILQ_FOREACH(scan, &VolList, entry) { 126 if (scan->vol_no == volume->vol_no) { 127 errx(1, "add_volume: %s: Duplicate volume number %d " 128 "against %s", 129 volume->name, volume->vol_no, scan->name); 130 /* not reached */ 131 } 132 if (fstat(scan->fd, &st2) != 0) { 133 errx(1, "add_volume: %s: Failed to stat %s", 134 volume->name, scan->name); 135 /* not reached */ 136 } 137 if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev)) { 138 errx(1, "add_volume: %s: Specified more than once", 139 volume->name); 140 /* not reached */ 141 } 142 } 143 144 TAILQ_INSERT_TAIL(&VolList, volume, entry); 145 } 146 147 static 148 void 149 __verify_volume(const volume_info_t volume) 150 { 151 hammer_volume_ondisk_t ondisk = volume->ondisk; 152 char *fstype; 153 154 if (ondisk->vol_signature != HAMMER_FSBUF_VOLUME) { 155 errx(1, "verify_volume: Invalid volume signature %016jx", 156 ondisk->vol_signature); 157 /* not reached */ 158 } 159 if (ondisk->vol_rootvol != HAMMER_ROOT_VOLNO) { 160 errx(1, "verify_volume: Invalid root volume# %d", 161 ondisk->vol_rootvol); 162 /* not reached */ 163 } 164 hammer_uuid_to_string(&ondisk->vol_fstype, &fstype); 165 if (hammer_uuid_compare(&Hammer_FSType, &ondisk->vol_fstype)) { 166 errx(1, "verify_volume: %s: fstype %s does not indicate " 167 "this is a HAMMER volume", volume->name, fstype); 168 /* not reached */ 169 } 170 free(fstype); 171 if (hammer_uuid_compare(&Hammer_FSId, &ondisk->vol_fsid)) { 172 errx(1, "verify_volume: %s: fsid does not match other volumes!", 173 volume->name); 174 /* not reached */ 175 } 176 if (ondisk->vol_version < HAMMER_VOL_VERSION_MIN || 177 ondisk->vol_version >= HAMMER_VOL_VERSION_WIP) { 178 errx(1, "verify_volume: %s: Invalid volume version %u", 179 volume->name, ondisk->vol_version); 180 /* not reached */ 181 } 182 } 183 184 /* 185 * Initialize a volume structure and ondisk vol_no field. 186 */ 187 volume_info_t 188 init_volume(const char *filename, int oflags, int32_t vol_no) 189 { 190 volume_info_t volume; 191 192 volume = __alloc_volume(filename, oflags); 193 volume->vol_no = volume->ondisk->vol_no = vol_no; 194 195 __add_volume(volume); 196 197 return(volume); 198 } 199 200 /* 201 * Initialize a volume structure and read ondisk volume header. 202 */ 203 volume_info_t 204 load_volume(const char *filename, int oflags, int verify_volume) 205 { 206 volume_info_t volume; 207 int n; 208 209 volume = __alloc_volume(filename, oflags); 210 211 n = readhammervol(volume); 212 if (n == -1) { 213 err(1, "load_volume: %s: Read failed at offset 0", 214 volume->name); 215 /* not reached */ 216 } 217 volume->vol_no = volume->ondisk->vol_no; 218 if (volume->vol_no == HAMMER_ROOT_VOLNO) 219 HammerVersion = volume->ondisk->vol_version; 220 221 if (valid_hammer_volumes++ == 0) 222 Hammer_FSId = volume->ondisk->vol_fsid; 223 if (verify_volume) 224 __verify_volume(volume); 225 226 __add_volume(volume); 227 228 return(volume); 229 } 230 231 /* 232 * Check basic volume characteristics. 233 */ 234 static 235 void 236 check_volume(volume_info_t volume) 237 { 238 struct partinfo pinfo; 239 struct stat st; 240 241 /* 242 * Allow the formatting of block devices or regular files 243 */ 244 if (ioctl(volume->fd, DIOCGPART, &pinfo) < 0) { 245 if (fstat(volume->fd, &st) < 0) { 246 err(1, "Unable to stat %s", volume->name); 247 /* not reached */ 248 } 249 if (S_ISREG(st.st_mode)) { 250 volume->size = st.st_size; 251 volume->type = "REGFILE"; 252 } else { 253 errx(1, "Unsupported file type for %s", volume->name); 254 /* not reached */ 255 } 256 } else { 257 /* 258 * When formatting a block device as a HAMMER volume the 259 * sector size must be compatible. HAMMER uses 16384 byte 260 * filesystem buffers. 261 */ 262 if (pinfo.reserved_blocks) { 263 errx(1, "HAMMER cannot be placed in a partition " 264 "which overlaps the disklabel or MBR"); 265 /* not reached */ 266 } 267 if (pinfo.media_blksize > HAMMER_BUFSIZE || 268 HAMMER_BUFSIZE % pinfo.media_blksize) { 269 errx(1, "A media sector size of %d is not supported", 270 pinfo.media_blksize); 271 /* not reached */ 272 } 273 274 volume->size = pinfo.media_size; 275 volume->device_offset = pinfo.media_offset; 276 volume->type = "DEVICE"; 277 } 278 } 279 280 int 281 is_regfile(const volume_info_t volume) 282 { 283 return(strcmp(volume->type, "REGFILE") ? 0 : 1); 284 } 285 286 void 287 assert_volume_offset(const volume_info_t volume) 288 { 289 assert(hammer_is_zone_raw_buffer(volume->vol_free_off)); 290 assert(hammer_is_zone_raw_buffer(volume->vol_free_end)); 291 if (volume->vol_free_off >= volume->vol_free_end) { 292 errx(1, "Ran out of room, filesystem too small"); 293 /* not reached */ 294 } 295 } 296 297 volume_info_t 298 get_volume(int32_t vol_no) 299 { 300 volume_info_t volume; 301 302 TAILQ_FOREACH(volume, &VolList, entry) { 303 if (volume->vol_no == vol_no) 304 break; 305 } 306 307 return(volume); 308 } 309 310 volume_info_t 311 get_root_volume(void) 312 { 313 return(get_volume(HAMMER_ROOT_VOLNO)); 314 } 315 316 static 317 hammer_off_t 318 __blockmap_xlate_to_zone2(hammer_off_t buf_offset) 319 { 320 hammer_off_t zone2_offset; 321 int error = 0; 322 323 if (hammer_is_zone_raw_buffer(buf_offset)) 324 zone2_offset = buf_offset; 325 else 326 zone2_offset = blockmap_lookup(buf_offset, &error); 327 328 if (error) 329 return(HAMMER_OFF_BAD); 330 assert(hammer_is_zone_raw_buffer(zone2_offset)); 331 332 return(zone2_offset); 333 } 334 335 static 336 buffer_info_t 337 __alloc_buffer(hammer_off_t zone2_offset, int isnew) 338 { 339 volume_info_t volume; 340 buffer_info_t buffer; 341 int hi; 342 343 volume = get_volume(HAMMER_VOL_DECODE(zone2_offset)); 344 assert(volume != NULL); 345 346 buffer = calloc(1, sizeof(*buffer)); 347 buffer->zone2_offset = zone2_offset; 348 buffer->raw_offset = hammer_xlate_to_phys(volume->ondisk, zone2_offset); 349 buffer->volume = volume; 350 buffer->ondisk = calloc(1, HAMMER_BUFSIZE); 351 352 if (isnew <= 0) { 353 if (readhammerbuf(buffer) == -1) { 354 err(1, "Failed to read %s:%016jx at %016jx", 355 volume->name, 356 (intmax_t)buffer->zone2_offset, 357 (intmax_t)buffer->raw_offset); 358 /* not reached */ 359 } 360 } 361 362 hi = buffer_hash(zone2_offset); 363 TAILQ_INSERT_TAIL(&volume->buffer_lists[hi], buffer, entry); 364 hammer_cache_add(&buffer->cache); 365 366 return(buffer); 367 } 368 369 /* 370 * Acquire the 16KB buffer for specified zone offset. 371 */ 372 static 373 buffer_info_t 374 get_buffer(hammer_off_t buf_offset, int isnew) 375 { 376 buffer_info_t buffer; 377 hammer_off_t zone2_offset; 378 int dora = 0; 379 380 zone2_offset = __blockmap_xlate_to_zone2(buf_offset); 381 if (zone2_offset == HAMMER_OFF_BAD) 382 return(NULL); 383 384 zone2_offset &= ~HAMMER_BUFMASK64; 385 buffer = find_buffer(zone2_offset); 386 387 if (buffer == NULL) { 388 buffer = __alloc_buffer(zone2_offset, isnew); 389 dora = (isnew == 0); 390 } else { 391 assert(isnew != -1); 392 hammer_cache_used(&buffer->cache); 393 } 394 assert(buffer->ondisk != NULL); 395 396 ++buffer->cache.refs; 397 hammer_cache_flush(); 398 399 if (isnew > 0) { 400 assert(buffer->cache.modified == 0); 401 bzero(buffer->ondisk, HAMMER_BUFSIZE); 402 buffer->cache.modified = 1; 403 } 404 if (dora) 405 get_buffer_readahead(buffer); 406 return(buffer); 407 } 408 409 static 410 void 411 get_buffer_readahead(const buffer_info_t base) 412 { 413 buffer_info_t buffer; 414 volume_info_t volume; 415 hammer_off_t zone2_offset; 416 int64_t raw_offset; 417 int ri = UseReadBehind; 418 int re = UseReadAhead; 419 420 raw_offset = base->raw_offset + ri * HAMMER_BUFSIZE; 421 volume = base->volume; 422 423 while (ri < re) { 424 if (raw_offset >= volume->ondisk->vol_buf_end) 425 break; 426 if (raw_offset < volume->ondisk->vol_buf_beg || ri == 0) { 427 ++ri; 428 raw_offset += HAMMER_BUFSIZE; 429 continue; 430 } 431 zone2_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 432 raw_offset - volume->ondisk->vol_buf_beg); 433 buffer = find_buffer(zone2_offset); 434 if (buffer == NULL) { 435 /* call with -1 to prevent another readahead */ 436 buffer = get_buffer(zone2_offset, -1); 437 rel_buffer(buffer); 438 } 439 ++ri; 440 raw_offset += HAMMER_BUFSIZE; 441 } 442 } 443 444 void 445 rel_buffer(buffer_info_t buffer) 446 { 447 volume_info_t volume; 448 int hi; 449 450 if (buffer == NULL) 451 return; 452 assert(buffer->cache.refs > 0); 453 if (--buffer->cache.refs == 0) { 454 if (buffer->cache.delete) { 455 hi = buffer_hash(buffer->zone2_offset); 456 volume = buffer->volume; 457 if (buffer->cache.modified) 458 flush_buffer(buffer); 459 TAILQ_REMOVE(&volume->buffer_lists[hi], buffer, entry); 460 hammer_cache_del(&buffer->cache); 461 free(buffer->ondisk); 462 free(buffer); 463 } 464 } 465 } 466 467 /* 468 * Retrieve a pointer to a buffer data given a zone-X buffer offset. 469 * The underlying bufferp is freed if isnew or the corresponding zone-2 470 * offset is out of range of the cached data. If bufferp is freed, 471 * a referenced buffer is loaded into it. 472 */ 473 void * 474 get_buffer_data(hammer_off_t buf_offset, buffer_info_t *bufferp, int isnew) 475 { 476 hammer_off_t xor = 0; 477 hammer_volume_ondisk_t ondisk; 478 479 if (*bufferp != NULL) { 480 if (hammer_is_zone_undo(buf_offset)) { 481 ondisk = (*bufferp)->volume->ondisk; 482 xor = hammer_xlate_to_undo(ondisk, buf_offset) ^ 483 (*bufferp)->zone2_offset; 484 } else if (hammer_is_zone_direct_xlated(buf_offset)) { 485 xor = HAMMER_OFF_LONG_ENCODE(buf_offset) ^ 486 HAMMER_OFF_LONG_ENCODE((*bufferp)->zone2_offset); 487 } else { 488 assert(0); 489 } 490 if (isnew > 0 || (xor & ~HAMMER_BUFMASK64)) { 491 rel_buffer(*bufferp); 492 *bufferp = NULL; 493 } else { 494 hammer_cache_used(&(*bufferp)->cache); 495 } 496 } 497 498 if (*bufferp == NULL) { 499 *bufferp = get_buffer(buf_offset, isnew); 500 if (*bufferp == NULL) 501 return(NULL); 502 } 503 504 return((char *)(*bufferp)->ondisk + 505 ((int32_t)buf_offset & HAMMER_BUFMASK)); 506 } 507 508 /* 509 * Allocate HAMMER elements - B-Tree nodes 510 */ 511 hammer_node_ondisk_t 512 alloc_btree_node(hammer_off_t *offp, buffer_info_t *data_bufferp) 513 { 514 hammer_node_ondisk_t node; 515 516 node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node), 517 offp, data_bufferp); 518 bzero(node, sizeof(*node)); 519 return(node); 520 } 521 522 /* 523 * Allocate HAMMER elements - meta data (inode, direntry, PFS, etc) 524 */ 525 void * 526 alloc_meta_element(hammer_off_t *offp, int32_t data_len, 527 buffer_info_t *data_bufferp) 528 { 529 void *data; 530 531 data = alloc_blockmap(HAMMER_ZONE_META_INDEX, data_len, 532 offp, data_bufferp); 533 bzero(data, data_len); 534 return(data); 535 } 536 537 /* 538 * Format a new blockmap. This is mostly a degenerate case because 539 * all allocations are now actually done from the freemap. 540 */ 541 void 542 format_blockmap(volume_info_t root_vol, int zone, hammer_off_t offset) 543 { 544 hammer_blockmap_t blockmap; 545 hammer_off_t zone_base; 546 547 /* Only root volume needs formatting */ 548 assert(root_vol->vol_no == HAMMER_ROOT_VOLNO); 549 550 assert(hammer_is_index_record(zone)); 551 552 blockmap = &root_vol->ondisk->vol0_blockmap[zone]; 553 zone_base = HAMMER_ZONE_ENCODE(zone, offset); 554 555 bzero(blockmap, sizeof(*blockmap)); 556 blockmap->phys_offset = 0; 557 blockmap->first_offset = zone_base; 558 blockmap->next_offset = zone_base; 559 blockmap->alloc_offset = HAMMER_ENCODE(zone, 255, -1); 560 hammer_crc_set_blockmap(HammerVersion, blockmap); 561 } 562 563 /* 564 * Format a new freemap. Set all layer1 entries to UNAVAIL. The initialize 565 * code will load each volume's freemap. 566 */ 567 void 568 format_freemap(volume_info_t root_vol) 569 { 570 buffer_info_t buffer = NULL; 571 hammer_off_t layer1_offset; 572 hammer_blockmap_t blockmap; 573 hammer_blockmap_layer1_t layer1; 574 int i, isnew; 575 576 /* Only root volume needs formatting */ 577 assert(root_vol->vol_no == HAMMER_ROOT_VOLNO); 578 579 layer1_offset = bootstrap_bigblock(root_vol); 580 for (i = 0; i < HAMMER_BIGBLOCK_SIZE; i += sizeof(*layer1)) { 581 isnew = ((i % HAMMER_BUFSIZE) == 0); 582 layer1 = get_buffer_data(layer1_offset + i, &buffer, isnew); 583 bzero(layer1, sizeof(*layer1)); 584 layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL; 585 layer1->blocks_free = 0; 586 hammer_crc_set_layer1(HammerVersion, layer1); 587 } 588 assert(i == HAMMER_BIGBLOCK_SIZE); 589 rel_buffer(buffer); 590 591 blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 592 bzero(blockmap, sizeof(*blockmap)); 593 blockmap->phys_offset = layer1_offset; 594 blockmap->first_offset = 0; 595 blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0); 596 blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1); 597 hammer_crc_set_blockmap(HammerVersion, blockmap); 598 } 599 600 /* 601 * Load the volume's remaining free space into the freemap. 602 * 603 * Returns the number of big-blocks available. 604 */ 605 int64_t 606 initialize_freemap(volume_info_t volume) 607 { 608 volume_info_t root_vol; 609 buffer_info_t buffer1 = NULL; 610 buffer_info_t buffer2 = NULL; 611 hammer_blockmap_layer1_t layer1; 612 hammer_blockmap_layer2_t layer2; 613 hammer_off_t layer1_offset; 614 hammer_off_t layer2_offset; 615 hammer_off_t phys_offset; 616 hammer_off_t block_offset; 617 hammer_off_t aligned_vol_free_end; 618 hammer_blockmap_t freemap; 619 int64_t count = 0; 620 int64_t layer1_count = 0; 621 622 root_vol = get_root_volume(); 623 624 assert_volume_offset(volume); 625 aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end); 626 627 printf("initialize freemap volume %d\n", volume->vol_no); 628 629 /* 630 * Initialize the freemap. First preallocate the big-blocks required 631 * to implement layer2. This preallocation is a bootstrap allocation 632 * using blocks from the target volume. 633 */ 634 freemap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 635 636 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 637 phys_offset < aligned_vol_free_end; 638 phys_offset += HAMMER_BLOCKMAP_LAYER2) { 639 layer1_offset = freemap->phys_offset + 640 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset); 641 layer1 = get_buffer_data(layer1_offset, &buffer1, 0); 642 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) { 643 layer1->phys_offset = bootstrap_bigblock(volume); 644 layer1->blocks_free = 0; 645 buffer1->cache.modified = 1; 646 hammer_crc_set_layer1(HammerVersion, layer1); 647 } 648 } 649 650 /* 651 * Now fill everything in. 652 */ 653 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 654 phys_offset < aligned_vol_free_end; 655 phys_offset += HAMMER_BLOCKMAP_LAYER2) { 656 layer1_count = 0; 657 layer1_offset = freemap->phys_offset + 658 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset); 659 layer1 = get_buffer_data(layer1_offset, &buffer1, 0); 660 assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL); 661 662 for (block_offset = 0; 663 block_offset < HAMMER_BLOCKMAP_LAYER2; 664 block_offset += HAMMER_BIGBLOCK_SIZE) { 665 layer2_offset = layer1->phys_offset + 666 HAMMER_BLOCKMAP_LAYER2_OFFSET(block_offset); 667 layer2 = get_buffer_data(layer2_offset, &buffer2, 0); 668 bzero(layer2, sizeof(*layer2)); 669 670 if (phys_offset + block_offset < volume->vol_free_off) { 671 /* 672 * Big-blocks already allocated as part 673 * of the freemap bootstrap. 674 */ 675 layer2->zone = HAMMER_ZONE_FREEMAP_INDEX; 676 layer2->append_off = HAMMER_BIGBLOCK_SIZE; 677 layer2->bytes_free = 0; 678 } else if (phys_offset + block_offset < volume->vol_free_end) { 679 layer2->zone = 0; 680 layer2->append_off = 0; 681 layer2->bytes_free = HAMMER_BIGBLOCK_SIZE; 682 ++count; 683 ++layer1_count; 684 } else { 685 layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX; 686 layer2->append_off = HAMMER_BIGBLOCK_SIZE; 687 layer2->bytes_free = 0; 688 } 689 hammer_crc_set_layer2(HammerVersion, layer2); 690 buffer2->cache.modified = 1; 691 } 692 693 layer1->blocks_free += layer1_count; 694 hammer_crc_set_layer1(HammerVersion, layer1); 695 buffer1->cache.modified = 1; 696 } 697 698 rel_buffer(buffer1); 699 rel_buffer(buffer2); 700 return(count); 701 } 702 703 /* 704 * Returns the number of big-blocks available for filesystem data and undos 705 * without formatting. 706 */ 707 int64_t 708 count_freemap(const volume_info_t volume) 709 { 710 hammer_off_t phys_offset; 711 hammer_off_t vol_free_off; 712 hammer_off_t aligned_vol_free_end; 713 int64_t count = 0; 714 715 vol_free_off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 716 717 assert_volume_offset(volume); 718 aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end); 719 720 if (volume->vol_no == HAMMER_ROOT_VOLNO) 721 vol_free_off += HAMMER_BIGBLOCK_SIZE; 722 723 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 724 phys_offset < aligned_vol_free_end; 725 phys_offset += HAMMER_BLOCKMAP_LAYER2) { 726 vol_free_off += HAMMER_BIGBLOCK_SIZE; 727 } 728 729 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 730 phys_offset < aligned_vol_free_end; 731 phys_offset += HAMMER_BIGBLOCK_SIZE) { 732 if (phys_offset < vol_free_off) 733 ; 734 else if (phys_offset < volume->vol_free_end) 735 ++count; 736 } 737 738 return(count); 739 } 740 741 /* 742 * Format the undomap for the root volume. 743 */ 744 void 745 format_undomap(volume_info_t root_vol, int64_t *undo_buffer_size) 746 { 747 hammer_off_t undo_limit; 748 hammer_blockmap_t blockmap; 749 hammer_volume_ondisk_t ondisk; 750 buffer_info_t buffer = NULL; 751 hammer_off_t scan; 752 int n; 753 int limit_index; 754 uint32_t seqno; 755 756 /* Only root volume needs formatting */ 757 assert(root_vol->vol_no == HAMMER_ROOT_VOLNO); 758 ondisk = root_vol->ondisk; 759 760 /* 761 * Size the undo buffer in multiples of HAMMER_BIGBLOCK_SIZE, 762 * up to HAMMER_MAX_UNDO_BIGBLOCKS big-blocks. 763 * Size to approximately 0.1% of the disk. 764 * 765 * The minimum UNDO fifo size is 512MB, or approximately 1% of 766 * the recommended 50G disk. 767 * 768 * Changing this minimum is rather dangerous as complex filesystem 769 * operations can cause the UNDO FIFO to fill up otherwise. 770 */ 771 undo_limit = *undo_buffer_size; 772 if (undo_limit == 0) { 773 undo_limit = HAMMER_VOL_BUF_SIZE(ondisk) / 1000; 774 if (undo_limit < HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS) 775 undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS; 776 } 777 undo_limit = HAMMER_BIGBLOCK_DOALIGN(undo_limit); 778 if (undo_limit < HAMMER_BIGBLOCK_SIZE) 779 undo_limit = HAMMER_BIGBLOCK_SIZE; 780 if (undo_limit > HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS) 781 undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS; 782 *undo_buffer_size = undo_limit; 783 784 blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; 785 bzero(blockmap, sizeof(*blockmap)); 786 blockmap->phys_offset = HAMMER_BLOCKMAP_UNAVAIL; 787 blockmap->first_offset = HAMMER_ENCODE_UNDO(0); 788 blockmap->next_offset = blockmap->first_offset; 789 blockmap->alloc_offset = HAMMER_ENCODE_UNDO(undo_limit); 790 hammer_crc_set_blockmap(HammerVersion, blockmap); 791 792 limit_index = undo_limit / HAMMER_BIGBLOCK_SIZE; 793 assert(limit_index <= HAMMER_MAX_UNDO_BIGBLOCKS); 794 795 for (n = 0; n < limit_index; ++n) 796 ondisk->vol0_undo_array[n] = alloc_undo_bigblock(root_vol); 797 while (n < HAMMER_MAX_UNDO_BIGBLOCKS) 798 ondisk->vol0_undo_array[n++] = HAMMER_BLOCKMAP_UNAVAIL; 799 800 /* 801 * Pre-initialize the UNDO blocks (HAMMER version 4+) 802 */ 803 printf("initializing the undo map (%jd MB)\n", 804 (intmax_t)HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset) / 805 (1024 * 1024)); 806 807 scan = blockmap->first_offset; 808 seqno = 0; 809 810 while (scan < blockmap->alloc_offset) { 811 hammer_fifo_head_t head; 812 hammer_fifo_tail_t tail; 813 int bytes = HAMMER_UNDO_ALIGN; 814 int isnew = ((scan & HAMMER_BUFMASK64) == 0); 815 816 head = get_buffer_data(scan, &buffer, isnew); 817 buffer->cache.modified = 1; 818 tail = (void *)((char *)head + bytes - sizeof(*tail)); 819 820 bzero(head, bytes); 821 head->hdr_signature = HAMMER_HEAD_SIGNATURE; 822 head->hdr_type = HAMMER_HEAD_TYPE_DUMMY; 823 head->hdr_size = bytes; 824 head->hdr_seq = seqno++; 825 826 tail->tail_signature = HAMMER_TAIL_SIGNATURE; 827 tail->tail_type = HAMMER_HEAD_TYPE_DUMMY; 828 tail->tail_size = bytes; 829 830 hammer_crc_set_fifo_head(HammerVersion, head, bytes); 831 832 scan += bytes; 833 } 834 rel_buffer(buffer); 835 } 836 837 const char *zone_labels[] = { 838 "", /* 0 */ 839 "raw_volume", /* 1 */ 840 "raw_buffer", /* 2 */ 841 "undo", /* 3 */ 842 "freemap", /* 4 */ 843 "", /* 5 */ 844 "", /* 6 */ 845 "", /* 7 */ 846 "btree", /* 8 */ 847 "meta", /* 9 */ 848 "large_data", /* 10 */ 849 "small_data", /* 11 */ 850 "", /* 12 */ 851 "", /* 13 */ 852 "", /* 14 */ 853 "unavail", /* 15 */ 854 }; 855 856 void 857 print_blockmap(const volume_info_t volume) 858 { 859 hammer_blockmap_t blockmap; 860 hammer_volume_ondisk_t ondisk = volume->ondisk; 861 int64_t size, used; 862 int i; 863 char *fstype, *fsid; 864 #define INDENT "" 865 866 printf(INDENT"vol_label\t%s\n", ondisk->vol_label); 867 printf(INDENT"vol_count\t%d\n", ondisk->vol_count); 868 869 hammer_uuid_to_string(&ondisk->vol_fstype, &fstype); 870 hammer_uuid_to_string(&ondisk->vol_fsid, &fsid); 871 printf(INDENT"vol_fstype\t%s", fstype); 872 if (strcmp(fstype, "61dc63ac-6e38-11dc-8513-01301bb8a9f5") == 0) 873 printf(" \"%s\"\n", HAMMER_FSTYPE_STRING); 874 else 875 printf("\n"); /* invalid UUID */ 876 printf(INDENT"vol_fsid\t%s\n", fsid); 877 free(fstype); 878 free(fsid); 879 880 printf(INDENT"vol_bot_beg\t%s\n", sizetostr(ondisk->vol_bot_beg)); 881 printf(INDENT"vol_mem_beg\t%s\n", sizetostr(ondisk->vol_mem_beg)); 882 printf(INDENT"vol_buf_beg\t%s\n", sizetostr(ondisk->vol_buf_beg)); 883 printf(INDENT"vol_buf_end\t%s\n", sizetostr(ondisk->vol_buf_end)); 884 printf(INDENT"vol0_next_tid\t%016jx\n", 885 (uintmax_t)ondisk->vol0_next_tid); 886 887 blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; 888 size = HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset); 889 if (blockmap->first_offset <= blockmap->next_offset) 890 used = blockmap->next_offset - blockmap->first_offset; 891 else 892 used = blockmap->alloc_offset - blockmap->first_offset + 893 HAMMER_OFF_LONG_ENCODE(blockmap->next_offset); 894 printf(INDENT"undo_size\t%s\n", sizetostr(size)); 895 printf(INDENT"undo_used\t%s\n", sizetostr(used)); 896 897 printf(INDENT"zone # " 898 "phys first next alloc\n"); 899 for (i = 0; i < HAMMER_MAX_ZONES; i++) { 900 blockmap = &ondisk->vol0_blockmap[i]; 901 printf(INDENT"zone %-2d %-10s %016jx %016jx %016jx %016jx\n", 902 i, zone_labels[i], 903 (uintmax_t)blockmap->phys_offset, 904 (uintmax_t)blockmap->first_offset, 905 (uintmax_t)blockmap->next_offset, 906 (uintmax_t)blockmap->alloc_offset); 907 } 908 } 909 910 /* 911 * Flush various tracking structures to disk 912 */ 913 void 914 flush_all_volumes(void) 915 { 916 volume_info_t volume; 917 918 TAILQ_FOREACH(volume, &VolList, entry) 919 flush_volume(volume); 920 } 921 922 void 923 flush_volume(volume_info_t volume) 924 { 925 buffer_info_t buffer; 926 int i; 927 928 for (i = 0; i < HAMMER_BUFLISTS; ++i) { 929 TAILQ_FOREACH(buffer, &volume->buffer_lists[i], entry) 930 flush_buffer(buffer); 931 } 932 if (writehammervol(volume) == -1) { 933 err(1, "Write volume %d (%s)", volume->vol_no, volume->name); 934 /* not reached */ 935 } 936 } 937 938 void 939 flush_buffer(buffer_info_t buffer) 940 { 941 volume_info_t volume; 942 943 volume = buffer->volume; 944 if (writehammerbuf(buffer) == -1) { 945 err(1, "Write volume %d (%s)", volume->vol_no, volume->name); 946 /* not reached */ 947 } 948 buffer->cache.modified = 0; 949 } 950 951 /* 952 * Core I/O operations 953 */ 954 static 955 int 956 __read(volume_info_t volume, void *data, int64_t offset, int size) 957 { 958 ssize_t n; 959 960 n = pread(volume->fd, data, size, offset); 961 if (n != size) 962 return(-1); 963 return(0); 964 } 965 966 static __inline 967 int 968 readhammervol(volume_info_t volume) 969 { 970 return(__read(volume, volume->ondisk, 0, HAMMER_BUFSIZE)); 971 } 972 973 static __inline 974 int 975 readhammerbuf(buffer_info_t buffer) 976 { 977 return(__read(buffer->volume, buffer->ondisk, buffer->raw_offset, 978 HAMMER_BUFSIZE)); 979 } 980 981 static 982 int 983 __write(volume_info_t volume, const void *data, int64_t offset, int size) 984 { 985 ssize_t n; 986 987 if (volume->rdonly) 988 return(0); 989 990 n = pwrite(volume->fd, data, size, offset); 991 if (n != size) 992 return(-1); 993 return(0); 994 } 995 996 static __inline 997 int 998 writehammervol(volume_info_t volume) 999 { 1000 return(__write(volume, volume->ondisk, 0, HAMMER_BUFSIZE)); 1001 } 1002 1003 static __inline 1004 int 1005 writehammerbuf(buffer_info_t buffer) 1006 { 1007 return(__write(buffer->volume, buffer->ondisk, buffer->raw_offset, 1008 HAMMER_BUFSIZE)); 1009 } 1010 1011 int64_t 1012 init_boot_area_size(int64_t value, off_t avg_vol_size) 1013 { 1014 if (value == 0) { 1015 value = HAMMER_BOOT_NOMBYTES; 1016 while (value > avg_vol_size / HAMMER_MAX_VOLUMES) 1017 value >>= 1; 1018 } 1019 1020 if (value < HAMMER_BOOT_MINBYTES) 1021 value = HAMMER_BOOT_MINBYTES; 1022 else if (value > HAMMER_BOOT_MAXBYTES) 1023 value = HAMMER_BOOT_MAXBYTES; 1024 1025 return(value); 1026 } 1027 1028 int64_t 1029 init_memory_log_size(int64_t value, off_t avg_vol_size) 1030 { 1031 if (value == 0) { 1032 value = HAMMER_MEM_NOMBYTES; 1033 while (value > avg_vol_size / HAMMER_MAX_VOLUMES) 1034 value >>= 1; 1035 } 1036 1037 if (value < HAMMER_MEM_MINBYTES) 1038 value = HAMMER_MEM_MINBYTES; 1039 else if (value > HAMMER_MEM_MAXBYTES) 1040 value = HAMMER_MEM_MAXBYTES; 1041 1042 return(value); 1043 } 1044