1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/diskslice.h> 36 #include <sys/diskmbr.h> 37 38 #include "hammer_util.h" 39 40 static void check_volume(volume_info_t volume); 41 static void get_buffer_readahead(buffer_info_t base); 42 static __inline int readhammervol(volume_info_t volume); 43 static __inline int readhammerbuf(buffer_info_t buffer); 44 static __inline int writehammervol(volume_info_t volume); 45 static __inline int writehammerbuf(buffer_info_t buffer); 46 47 uuid_t Hammer_FSType; 48 uuid_t Hammer_FSId; 49 int UseReadBehind = -4; 50 int UseReadAhead = 4; 51 int DebugOpt; 52 uint32_t HammerVersion = -1; 53 54 TAILQ_HEAD(volume_list, volume_info); 55 static struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList); 56 static int valid_hammer_volumes; 57 58 static __inline 59 int 60 buffer_hash(hammer_off_t zone2_offset) 61 { 62 int hi; 63 64 hi = (int)(zone2_offset / HAMMER_BUFSIZE) & HAMMER_BUFLISTMASK; 65 return(hi); 66 } 67 68 static 69 buffer_info_t 70 find_buffer(hammer_off_t zone2_offset) 71 { 72 volume_info_t volume; 73 buffer_info_t buffer; 74 int hi; 75 76 volume = get_volume(HAMMER_VOL_DECODE(zone2_offset)); 77 assert(volume); 78 79 hi = buffer_hash(zone2_offset); 80 TAILQ_FOREACH(buffer, &volume->buffer_lists[hi], entry) { 81 if (buffer->zone2_offset == zone2_offset) 82 return(buffer); 83 } 84 return(NULL); 85 } 86 87 static 88 volume_info_t 89 __alloc_volume(const char *volname, int oflags) 90 { 91 volume_info_t volume; 92 int i; 93 94 volume = calloc(1, sizeof(*volume)); 95 volume->vol_no = -1; 96 volume->rdonly = (oflags == O_RDONLY); 97 volume->name = strdup(volname); 98 volume->fd = open(volume->name, oflags); 99 if (volume->fd < 0) { 100 err(1, "alloc_volume: Failed to open %s", volume->name); 101 /* not reached */ 102 } 103 check_volume(volume); 104 105 volume->ondisk = calloc(1, HAMMER_BUFSIZE); 106 107 for (i = 0; i < HAMMER_BUFLISTS; ++i) 108 TAILQ_INIT(&volume->buffer_lists[i]); 109 110 return(volume); 111 } 112 113 static 114 void 115 __add_volume(const volume_info_t volume) 116 { 117 volume_info_t scan; 118 struct stat st1, st2; 119 120 if (fstat(volume->fd, &st1) != 0) { 121 errx(1, "add_volume: %s: Failed to stat", volume->name); 122 /* not reached */ 123 } 124 125 TAILQ_FOREACH(scan, &VolList, entry) { 126 if (scan->vol_no == volume->vol_no) { 127 errx(1, "add_volume: %s: Duplicate volume number %d " 128 "against %s", 129 volume->name, volume->vol_no, scan->name); 130 /* not reached */ 131 } 132 if (fstat(scan->fd, &st2) != 0) { 133 errx(1, "add_volume: %s: Failed to stat %s", 134 volume->name, scan->name); 135 /* not reached */ 136 } 137 if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev)) { 138 errx(1, "add_volume: %s: Specified more than once", 139 volume->name); 140 /* not reached */ 141 } 142 } 143 144 TAILQ_INSERT_TAIL(&VolList, volume, entry); 145 } 146 147 static 148 void 149 __verify_volume(const volume_info_t volume) 150 { 151 hammer_volume_ondisk_t ondisk = volume->ondisk; 152 153 if (ondisk->vol_signature != HAMMER_FSBUF_VOLUME) { 154 errx(1, "verify_volume: Invalid volume signature %016jx", 155 ondisk->vol_signature); 156 /* not reached */ 157 } 158 if (ondisk->vol_rootvol != HAMMER_ROOT_VOLNO) { 159 errx(1, "verify_volume: Invalid root volume# %d", 160 ondisk->vol_rootvol); 161 /* not reached */ 162 } 163 if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType))) { 164 errx(1, "verify_volume: %s: Header does not indicate " 165 "that this is a HAMMER volume", volume->name); 166 /* not reached */ 167 } 168 if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId))) { 169 errx(1, "verify_volume: %s: FSId does not match other volumes!", 170 volume->name); 171 /* not reached */ 172 } 173 if (ondisk->vol_version < HAMMER_VOL_VERSION_MIN || 174 ondisk->vol_version >= HAMMER_VOL_VERSION_WIP) { 175 errx(1, "verify_volume: %s: Invalid volume version %u", 176 volume->name, ondisk->vol_version); 177 /* not reached */ 178 } 179 } 180 181 /* 182 * Initialize a volume structure and ondisk vol_no field. 183 */ 184 volume_info_t 185 init_volume(const char *filename, int oflags, int32_t vol_no) 186 { 187 volume_info_t volume; 188 189 volume = __alloc_volume(filename, oflags); 190 volume->vol_no = volume->ondisk->vol_no = vol_no; 191 192 __add_volume(volume); 193 194 return(volume); 195 } 196 197 /* 198 * Initialize a volume structure and read ondisk volume header. 199 */ 200 volume_info_t 201 load_volume(const char *filename, int oflags, int verify_volume) 202 { 203 volume_info_t volume; 204 int n; 205 206 volume = __alloc_volume(filename, oflags); 207 208 n = readhammervol(volume); 209 if (n == -1) { 210 err(1, "load_volume: %s: Read failed at offset 0", 211 volume->name); 212 /* not reached */ 213 } 214 volume->vol_no = volume->ondisk->vol_no; 215 if (volume->vol_no == HAMMER_ROOT_VOLNO) 216 HammerVersion = volume->ondisk->vol_version; 217 218 if (valid_hammer_volumes++ == 0) 219 Hammer_FSId = volume->ondisk->vol_fsid; 220 if (verify_volume) 221 __verify_volume(volume); 222 223 __add_volume(volume); 224 225 return(volume); 226 } 227 228 /* 229 * Check basic volume characteristics. 230 */ 231 static 232 void 233 check_volume(volume_info_t volume) 234 { 235 struct partinfo pinfo; 236 struct stat st; 237 238 /* 239 * Allow the formatting of block devices or regular files 240 */ 241 if (ioctl(volume->fd, DIOCGPART, &pinfo) < 0) { 242 if (fstat(volume->fd, &st) < 0) { 243 err(1, "Unable to stat %s", volume->name); 244 /* not reached */ 245 } 246 if (S_ISREG(st.st_mode)) { 247 volume->size = st.st_size; 248 volume->type = "REGFILE"; 249 } else { 250 errx(1, "Unsupported file type for %s", volume->name); 251 /* not reached */ 252 } 253 } else { 254 /* 255 * When formatting a block device as a HAMMER volume the 256 * sector size must be compatible. HAMMER uses 16384 byte 257 * filesystem buffers. 258 */ 259 if (pinfo.reserved_blocks) { 260 errx(1, "HAMMER cannot be placed in a partition " 261 "which overlaps the disklabel or MBR"); 262 /* not reached */ 263 } 264 if (pinfo.media_blksize > HAMMER_BUFSIZE || 265 HAMMER_BUFSIZE % pinfo.media_blksize) { 266 errx(1, "A media sector size of %d is not supported", 267 pinfo.media_blksize); 268 /* not reached */ 269 } 270 271 volume->size = pinfo.media_size; 272 volume->device_offset = pinfo.media_offset; 273 volume->type = "DEVICE"; 274 } 275 } 276 277 int 278 is_regfile(const volume_info_t volume) 279 { 280 return(strcmp(volume->type, "REGFILE") ? 0 : 1); 281 } 282 283 void 284 assert_volume_offset(const volume_info_t volume) 285 { 286 assert(hammer_is_zone_raw_buffer(volume->vol_free_off)); 287 assert(hammer_is_zone_raw_buffer(volume->vol_free_end)); 288 if (volume->vol_free_off >= volume->vol_free_end) { 289 errx(1, "Ran out of room, filesystem too small"); 290 /* not reached */ 291 } 292 } 293 294 volume_info_t 295 get_volume(int32_t vol_no) 296 { 297 volume_info_t volume; 298 299 TAILQ_FOREACH(volume, &VolList, entry) { 300 if (volume->vol_no == vol_no) 301 break; 302 } 303 304 return(volume); 305 } 306 307 volume_info_t 308 get_root_volume(void) 309 { 310 return(get_volume(HAMMER_ROOT_VOLNO)); 311 } 312 313 static 314 hammer_off_t 315 __blockmap_xlate_to_zone2(hammer_off_t buf_offset) 316 { 317 hammer_off_t zone2_offset; 318 int error = 0; 319 320 if (hammer_is_zone_raw_buffer(buf_offset)) 321 zone2_offset = buf_offset; 322 else 323 zone2_offset = blockmap_lookup(buf_offset, &error); 324 325 if (error) 326 return(HAMMER_OFF_BAD); 327 assert(hammer_is_zone_raw_buffer(zone2_offset)); 328 329 return(zone2_offset); 330 } 331 332 static 333 buffer_info_t 334 __alloc_buffer(hammer_off_t zone2_offset, int isnew) 335 { 336 volume_info_t volume; 337 buffer_info_t buffer; 338 int hi; 339 340 volume = get_volume(HAMMER_VOL_DECODE(zone2_offset)); 341 assert(volume != NULL); 342 343 buffer = calloc(1, sizeof(*buffer)); 344 buffer->zone2_offset = zone2_offset; 345 buffer->raw_offset = hammer_xlate_to_phys(volume->ondisk, zone2_offset); 346 buffer->volume = volume; 347 buffer->ondisk = calloc(1, HAMMER_BUFSIZE); 348 349 if (isnew <= 0) { 350 if (readhammerbuf(buffer) == -1) { 351 err(1, "Failed to read %s:%016jx at %016jx", 352 volume->name, 353 (intmax_t)buffer->zone2_offset, 354 (intmax_t)buffer->raw_offset); 355 /* not reached */ 356 } 357 } 358 359 hi = buffer_hash(zone2_offset); 360 TAILQ_INSERT_TAIL(&volume->buffer_lists[hi], buffer, entry); 361 hammer_cache_add(&buffer->cache); 362 363 return(buffer); 364 } 365 366 /* 367 * Acquire the 16KB buffer for specified zone offset. 368 */ 369 static 370 buffer_info_t 371 get_buffer(hammer_off_t buf_offset, int isnew) 372 { 373 buffer_info_t buffer; 374 hammer_off_t zone2_offset; 375 int dora = 0; 376 377 zone2_offset = __blockmap_xlate_to_zone2(buf_offset); 378 if (zone2_offset == HAMMER_OFF_BAD) 379 return(NULL); 380 381 zone2_offset &= ~HAMMER_BUFMASK64; 382 buffer = find_buffer(zone2_offset); 383 384 if (buffer == NULL) { 385 buffer = __alloc_buffer(zone2_offset, isnew); 386 dora = (isnew == 0); 387 } else { 388 assert(isnew != -1); 389 hammer_cache_used(&buffer->cache); 390 } 391 assert(buffer->ondisk != NULL); 392 393 ++buffer->cache.refs; 394 hammer_cache_flush(); 395 396 if (isnew > 0) { 397 assert(buffer->cache.modified == 0); 398 bzero(buffer->ondisk, HAMMER_BUFSIZE); 399 buffer->cache.modified = 1; 400 } 401 if (dora) 402 get_buffer_readahead(buffer); 403 return(buffer); 404 } 405 406 static 407 void 408 get_buffer_readahead(const buffer_info_t base) 409 { 410 buffer_info_t buffer; 411 volume_info_t volume; 412 hammer_off_t zone2_offset; 413 int64_t raw_offset; 414 int ri = UseReadBehind; 415 int re = UseReadAhead; 416 417 raw_offset = base->raw_offset + ri * HAMMER_BUFSIZE; 418 volume = base->volume; 419 420 while (ri < re) { 421 if (raw_offset >= volume->ondisk->vol_buf_end) 422 break; 423 if (raw_offset < volume->ondisk->vol_buf_beg || ri == 0) { 424 ++ri; 425 raw_offset += HAMMER_BUFSIZE; 426 continue; 427 } 428 zone2_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 429 raw_offset - volume->ondisk->vol_buf_beg); 430 buffer = find_buffer(zone2_offset); 431 if (buffer == NULL) { 432 /* call with -1 to prevent another readahead */ 433 buffer = get_buffer(zone2_offset, -1); 434 rel_buffer(buffer); 435 } 436 ++ri; 437 raw_offset += HAMMER_BUFSIZE; 438 } 439 } 440 441 void 442 rel_buffer(buffer_info_t buffer) 443 { 444 volume_info_t volume; 445 int hi; 446 447 if (buffer == NULL) 448 return; 449 assert(buffer->cache.refs > 0); 450 if (--buffer->cache.refs == 0) { 451 if (buffer->cache.delete) { 452 hi = buffer_hash(buffer->zone2_offset); 453 volume = buffer->volume; 454 if (buffer->cache.modified) 455 flush_buffer(buffer); 456 TAILQ_REMOVE(&volume->buffer_lists[hi], buffer, entry); 457 hammer_cache_del(&buffer->cache); 458 free(buffer->ondisk); 459 free(buffer); 460 } 461 } 462 } 463 464 /* 465 * Retrieve a pointer to a buffer data given a zone-X buffer offset. 466 * The underlying bufferp is freed if isnew or the corresponding zone-2 467 * offset is out of range of the cached data. If bufferp is freed, 468 * a referenced buffer is loaded into it. 469 */ 470 void * 471 get_buffer_data(hammer_off_t buf_offset, buffer_info_t *bufferp, int isnew) 472 { 473 hammer_off_t xor = 0; 474 hammer_volume_ondisk_t ondisk; 475 476 if (*bufferp != NULL) { 477 if (hammer_is_zone_undo(buf_offset)) { 478 ondisk = (*bufferp)->volume->ondisk; 479 xor = hammer_xlate_to_undo(ondisk, buf_offset) ^ 480 (*bufferp)->zone2_offset; 481 } else if (hammer_is_zone_direct_xlated(buf_offset)) { 482 xor = HAMMER_OFF_LONG_ENCODE(buf_offset) ^ 483 HAMMER_OFF_LONG_ENCODE((*bufferp)->zone2_offset); 484 } else { 485 assert(0); 486 } 487 if (isnew > 0 || (xor & ~HAMMER_BUFMASK64)) { 488 rel_buffer(*bufferp); 489 *bufferp = NULL; 490 } else { 491 hammer_cache_used(&(*bufferp)->cache); 492 } 493 } 494 495 if (*bufferp == NULL) { 496 *bufferp = get_buffer(buf_offset, isnew); 497 if (*bufferp == NULL) 498 return(NULL); 499 } 500 501 return((char *)(*bufferp)->ondisk + 502 ((int32_t)buf_offset & HAMMER_BUFMASK)); 503 } 504 505 /* 506 * Allocate HAMMER elements - B-Tree nodes 507 */ 508 hammer_node_ondisk_t 509 alloc_btree_node(hammer_off_t *offp, buffer_info_t *data_bufferp) 510 { 511 hammer_node_ondisk_t node; 512 513 node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node), 514 offp, data_bufferp); 515 bzero(node, sizeof(*node)); 516 return(node); 517 } 518 519 /* 520 * Allocate HAMMER elements - meta data (inode, direntry, PFS, etc) 521 */ 522 void * 523 alloc_meta_element(hammer_off_t *offp, int32_t data_len, 524 buffer_info_t *data_bufferp) 525 { 526 void *data; 527 528 data = alloc_blockmap(HAMMER_ZONE_META_INDEX, data_len, 529 offp, data_bufferp); 530 bzero(data, data_len); 531 return(data); 532 } 533 534 /* 535 * Format a new blockmap. This is mostly a degenerate case because 536 * all allocations are now actually done from the freemap. 537 */ 538 void 539 format_blockmap(volume_info_t root_vol, int zone, hammer_off_t offset) 540 { 541 hammer_blockmap_t blockmap; 542 hammer_off_t zone_base; 543 544 /* Only root volume needs formatting */ 545 assert(root_vol->vol_no == HAMMER_ROOT_VOLNO); 546 547 assert(hammer_is_index_record(zone)); 548 549 blockmap = &root_vol->ondisk->vol0_blockmap[zone]; 550 zone_base = HAMMER_ZONE_ENCODE(zone, offset); 551 552 bzero(blockmap, sizeof(*blockmap)); 553 blockmap->phys_offset = 0; 554 blockmap->first_offset = zone_base; 555 blockmap->next_offset = zone_base; 556 blockmap->alloc_offset = HAMMER_ENCODE(zone, 255, -1); 557 hammer_crc_set_blockmap(HammerVersion, blockmap); 558 } 559 560 /* 561 * Format a new freemap. Set all layer1 entries to UNAVAIL. The initialize 562 * code will load each volume's freemap. 563 */ 564 void 565 format_freemap(volume_info_t root_vol) 566 { 567 buffer_info_t buffer = NULL; 568 hammer_off_t layer1_offset; 569 hammer_blockmap_t blockmap; 570 hammer_blockmap_layer1_t layer1; 571 int i, isnew; 572 573 /* Only root volume needs formatting */ 574 assert(root_vol->vol_no == HAMMER_ROOT_VOLNO); 575 576 layer1_offset = bootstrap_bigblock(root_vol); 577 for (i = 0; i < HAMMER_BIGBLOCK_SIZE; i += sizeof(*layer1)) { 578 isnew = ((i % HAMMER_BUFSIZE) == 0); 579 layer1 = get_buffer_data(layer1_offset + i, &buffer, isnew); 580 bzero(layer1, sizeof(*layer1)); 581 layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL; 582 layer1->blocks_free = 0; 583 hammer_crc_set_layer1(HammerVersion, layer1); 584 } 585 assert(i == HAMMER_BIGBLOCK_SIZE); 586 rel_buffer(buffer); 587 588 blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 589 bzero(blockmap, sizeof(*blockmap)); 590 blockmap->phys_offset = layer1_offset; 591 blockmap->first_offset = 0; 592 blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0); 593 blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1); 594 hammer_crc_set_blockmap(HammerVersion, blockmap); 595 } 596 597 /* 598 * Load the volume's remaining free space into the freemap. 599 * 600 * Returns the number of big-blocks available. 601 */ 602 int64_t 603 initialize_freemap(volume_info_t volume) 604 { 605 volume_info_t root_vol; 606 buffer_info_t buffer1 = NULL; 607 buffer_info_t buffer2 = NULL; 608 hammer_blockmap_layer1_t layer1; 609 hammer_blockmap_layer2_t layer2; 610 hammer_off_t layer1_offset; 611 hammer_off_t layer2_offset; 612 hammer_off_t phys_offset; 613 hammer_off_t block_offset; 614 hammer_off_t aligned_vol_free_end; 615 hammer_blockmap_t freemap; 616 int64_t count = 0; 617 int64_t layer1_count = 0; 618 619 root_vol = get_root_volume(); 620 621 assert_volume_offset(volume); 622 aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end); 623 624 printf("initialize freemap volume %d\n", volume->vol_no); 625 626 /* 627 * Initialize the freemap. First preallocate the big-blocks required 628 * to implement layer2. This preallocation is a bootstrap allocation 629 * using blocks from the target volume. 630 */ 631 freemap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 632 633 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 634 phys_offset < aligned_vol_free_end; 635 phys_offset += HAMMER_BLOCKMAP_LAYER2) { 636 layer1_offset = freemap->phys_offset + 637 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset); 638 layer1 = get_buffer_data(layer1_offset, &buffer1, 0); 639 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) { 640 layer1->phys_offset = bootstrap_bigblock(volume); 641 layer1->blocks_free = 0; 642 buffer1->cache.modified = 1; 643 hammer_crc_set_layer1(HammerVersion, layer1); 644 } 645 } 646 647 /* 648 * Now fill everything in. 649 */ 650 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 651 phys_offset < aligned_vol_free_end; 652 phys_offset += HAMMER_BLOCKMAP_LAYER2) { 653 layer1_count = 0; 654 layer1_offset = freemap->phys_offset + 655 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset); 656 layer1 = get_buffer_data(layer1_offset, &buffer1, 0); 657 assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL); 658 659 for (block_offset = 0; 660 block_offset < HAMMER_BLOCKMAP_LAYER2; 661 block_offset += HAMMER_BIGBLOCK_SIZE) { 662 layer2_offset = layer1->phys_offset + 663 HAMMER_BLOCKMAP_LAYER2_OFFSET(block_offset); 664 layer2 = get_buffer_data(layer2_offset, &buffer2, 0); 665 bzero(layer2, sizeof(*layer2)); 666 667 if (phys_offset + block_offset < volume->vol_free_off) { 668 /* 669 * Big-blocks already allocated as part 670 * of the freemap bootstrap. 671 */ 672 layer2->zone = HAMMER_ZONE_FREEMAP_INDEX; 673 layer2->append_off = HAMMER_BIGBLOCK_SIZE; 674 layer2->bytes_free = 0; 675 } else if (phys_offset + block_offset < volume->vol_free_end) { 676 layer2->zone = 0; 677 layer2->append_off = 0; 678 layer2->bytes_free = HAMMER_BIGBLOCK_SIZE; 679 ++count; 680 ++layer1_count; 681 } else { 682 layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX; 683 layer2->append_off = HAMMER_BIGBLOCK_SIZE; 684 layer2->bytes_free = 0; 685 } 686 hammer_crc_set_layer2(HammerVersion, layer2); 687 buffer2->cache.modified = 1; 688 } 689 690 layer1->blocks_free += layer1_count; 691 hammer_crc_set_layer1(HammerVersion, layer1); 692 buffer1->cache.modified = 1; 693 } 694 695 rel_buffer(buffer1); 696 rel_buffer(buffer2); 697 return(count); 698 } 699 700 /* 701 * Returns the number of big-blocks available for filesystem data and undos 702 * without formatting. 703 */ 704 int64_t 705 count_freemap(const volume_info_t volume) 706 { 707 hammer_off_t phys_offset; 708 hammer_off_t vol_free_off; 709 hammer_off_t aligned_vol_free_end; 710 int64_t count = 0; 711 712 vol_free_off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 713 714 assert_volume_offset(volume); 715 aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end); 716 717 if (volume->vol_no == HAMMER_ROOT_VOLNO) 718 vol_free_off += HAMMER_BIGBLOCK_SIZE; 719 720 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 721 phys_offset < aligned_vol_free_end; 722 phys_offset += HAMMER_BLOCKMAP_LAYER2) { 723 vol_free_off += HAMMER_BIGBLOCK_SIZE; 724 } 725 726 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 727 phys_offset < aligned_vol_free_end; 728 phys_offset += HAMMER_BIGBLOCK_SIZE) { 729 if (phys_offset < vol_free_off) 730 ; 731 else if (phys_offset < volume->vol_free_end) 732 ++count; 733 } 734 735 return(count); 736 } 737 738 /* 739 * Format the undomap for the root volume. 740 */ 741 void 742 format_undomap(volume_info_t root_vol, int64_t *undo_buffer_size) 743 { 744 hammer_off_t undo_limit; 745 hammer_blockmap_t blockmap; 746 hammer_volume_ondisk_t ondisk; 747 buffer_info_t buffer = NULL; 748 hammer_off_t scan; 749 int n; 750 int limit_index; 751 uint32_t seqno; 752 753 /* Only root volume needs formatting */ 754 assert(root_vol->vol_no == HAMMER_ROOT_VOLNO); 755 ondisk = root_vol->ondisk; 756 757 /* 758 * Size the undo buffer in multiples of HAMMER_BIGBLOCK_SIZE, 759 * up to HAMMER_MAX_UNDO_BIGBLOCKS big-blocks. 760 * Size to approximately 0.1% of the disk. 761 * 762 * The minimum UNDO fifo size is 512MB, or approximately 1% of 763 * the recommended 50G disk. 764 * 765 * Changing this minimum is rather dangerous as complex filesystem 766 * operations can cause the UNDO FIFO to fill up otherwise. 767 */ 768 undo_limit = *undo_buffer_size; 769 if (undo_limit == 0) { 770 undo_limit = HAMMER_VOL_BUF_SIZE(ondisk) / 1000; 771 if (undo_limit < HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS) 772 undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS; 773 } 774 undo_limit = HAMMER_BIGBLOCK_DOALIGN(undo_limit); 775 if (undo_limit < HAMMER_BIGBLOCK_SIZE) 776 undo_limit = HAMMER_BIGBLOCK_SIZE; 777 if (undo_limit > HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS) 778 undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS; 779 *undo_buffer_size = undo_limit; 780 781 blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; 782 bzero(blockmap, sizeof(*blockmap)); 783 blockmap->phys_offset = HAMMER_BLOCKMAP_UNAVAIL; 784 blockmap->first_offset = HAMMER_ENCODE_UNDO(0); 785 blockmap->next_offset = blockmap->first_offset; 786 blockmap->alloc_offset = HAMMER_ENCODE_UNDO(undo_limit); 787 hammer_crc_set_blockmap(HammerVersion, blockmap); 788 789 limit_index = undo_limit / HAMMER_BIGBLOCK_SIZE; 790 assert(limit_index <= HAMMER_MAX_UNDO_BIGBLOCKS); 791 792 for (n = 0; n < limit_index; ++n) 793 ondisk->vol0_undo_array[n] = alloc_undo_bigblock(root_vol); 794 while (n < HAMMER_MAX_UNDO_BIGBLOCKS) 795 ondisk->vol0_undo_array[n++] = HAMMER_BLOCKMAP_UNAVAIL; 796 797 /* 798 * Pre-initialize the UNDO blocks (HAMMER version 4+) 799 */ 800 printf("initializing the undo map (%jd MB)\n", 801 (intmax_t)HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset) / 802 (1024 * 1024)); 803 804 scan = blockmap->first_offset; 805 seqno = 0; 806 807 while (scan < blockmap->alloc_offset) { 808 hammer_fifo_head_t head; 809 hammer_fifo_tail_t tail; 810 int bytes = HAMMER_UNDO_ALIGN; 811 int isnew = ((scan & HAMMER_BUFMASK64) == 0); 812 813 head = get_buffer_data(scan, &buffer, isnew); 814 buffer->cache.modified = 1; 815 tail = (void *)((char *)head + bytes - sizeof(*tail)); 816 817 bzero(head, bytes); 818 head->hdr_signature = HAMMER_HEAD_SIGNATURE; 819 head->hdr_type = HAMMER_HEAD_TYPE_DUMMY; 820 head->hdr_size = bytes; 821 head->hdr_seq = seqno++; 822 823 tail->tail_signature = HAMMER_TAIL_SIGNATURE; 824 tail->tail_type = HAMMER_HEAD_TYPE_DUMMY; 825 tail->tail_size = bytes; 826 827 hammer_crc_set_fifo_head(HammerVersion, head, bytes); 828 829 scan += bytes; 830 } 831 rel_buffer(buffer); 832 } 833 834 const char *zone_labels[] = { 835 "", /* 0 */ 836 "raw_volume", /* 1 */ 837 "raw_buffer", /* 2 */ 838 "undo", /* 3 */ 839 "freemap", /* 4 */ 840 "", /* 5 */ 841 "", /* 6 */ 842 "", /* 7 */ 843 "btree", /* 8 */ 844 "meta", /* 9 */ 845 "large_data", /* 10 */ 846 "small_data", /* 11 */ 847 "", /* 12 */ 848 "", /* 13 */ 849 "", /* 14 */ 850 "unavail", /* 15 */ 851 }; 852 853 void 854 print_blockmap(const volume_info_t volume) 855 { 856 hammer_blockmap_t blockmap; 857 hammer_volume_ondisk_t ondisk; 858 int64_t size, used; 859 int i; 860 #define INDENT "" 861 862 ondisk = volume->ondisk; 863 printf(INDENT"vol_label\t%s\n", ondisk->vol_label); 864 printf(INDENT"vol_count\t%d\n", ondisk->vol_count); 865 printf(INDENT"vol_bot_beg\t%s\n", sizetostr(ondisk->vol_bot_beg)); 866 printf(INDENT"vol_mem_beg\t%s\n", sizetostr(ondisk->vol_mem_beg)); 867 printf(INDENT"vol_buf_beg\t%s\n", sizetostr(ondisk->vol_buf_beg)); 868 printf(INDENT"vol_buf_end\t%s\n", sizetostr(ondisk->vol_buf_end)); 869 printf(INDENT"vol0_next_tid\t%016jx\n", 870 (uintmax_t)ondisk->vol0_next_tid); 871 872 blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; 873 size = HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset); 874 if (blockmap->first_offset <= blockmap->next_offset) 875 used = blockmap->next_offset - blockmap->first_offset; 876 else 877 used = blockmap->alloc_offset - blockmap->first_offset + 878 HAMMER_OFF_LONG_ENCODE(blockmap->next_offset); 879 printf(INDENT"undo_size\t%s\n", sizetostr(size)); 880 printf(INDENT"undo_used\t%s\n", sizetostr(used)); 881 882 printf(INDENT"zone # " 883 "phys first next alloc\n"); 884 for (i = 0; i < HAMMER_MAX_ZONES; i++) { 885 blockmap = &ondisk->vol0_blockmap[i]; 886 printf(INDENT"zone %-2d %-10s %016jx %016jx %016jx %016jx\n", 887 i, zone_labels[i], 888 (uintmax_t)blockmap->phys_offset, 889 (uintmax_t)blockmap->first_offset, 890 (uintmax_t)blockmap->next_offset, 891 (uintmax_t)blockmap->alloc_offset); 892 } 893 } 894 895 /* 896 * Flush various tracking structures to disk 897 */ 898 void 899 flush_all_volumes(void) 900 { 901 volume_info_t volume; 902 903 TAILQ_FOREACH(volume, &VolList, entry) 904 flush_volume(volume); 905 } 906 907 void 908 flush_volume(volume_info_t volume) 909 { 910 buffer_info_t buffer; 911 int i; 912 913 for (i = 0; i < HAMMER_BUFLISTS; ++i) { 914 TAILQ_FOREACH(buffer, &volume->buffer_lists[i], entry) 915 flush_buffer(buffer); 916 } 917 if (writehammervol(volume) == -1) { 918 err(1, "Write volume %d (%s)", volume->vol_no, volume->name); 919 /* not reached */ 920 } 921 } 922 923 void 924 flush_buffer(buffer_info_t buffer) 925 { 926 volume_info_t volume; 927 928 volume = buffer->volume; 929 if (writehammerbuf(buffer) == -1) { 930 err(1, "Write volume %d (%s)", volume->vol_no, volume->name); 931 /* not reached */ 932 } 933 buffer->cache.modified = 0; 934 } 935 936 /* 937 * Core I/O operations 938 */ 939 static 940 int 941 __read(volume_info_t volume, void *data, int64_t offset, int size) 942 { 943 ssize_t n; 944 945 n = pread(volume->fd, data, size, offset); 946 if (n != size) 947 return(-1); 948 return(0); 949 } 950 951 static __inline 952 int 953 readhammervol(volume_info_t volume) 954 { 955 return(__read(volume, volume->ondisk, 0, HAMMER_BUFSIZE)); 956 } 957 958 static __inline 959 int 960 readhammerbuf(buffer_info_t buffer) 961 { 962 return(__read(buffer->volume, buffer->ondisk, buffer->raw_offset, 963 HAMMER_BUFSIZE)); 964 } 965 966 static 967 int 968 __write(volume_info_t volume, const void *data, int64_t offset, int size) 969 { 970 ssize_t n; 971 972 if (volume->rdonly) 973 return(0); 974 975 n = pwrite(volume->fd, data, size, offset); 976 if (n != size) 977 return(-1); 978 return(0); 979 } 980 981 static __inline 982 int 983 writehammervol(volume_info_t volume) 984 { 985 return(__write(volume, volume->ondisk, 0, HAMMER_BUFSIZE)); 986 } 987 988 static __inline 989 int 990 writehammerbuf(buffer_info_t buffer) 991 { 992 return(__write(buffer->volume, buffer->ondisk, buffer->raw_offset, 993 HAMMER_BUFSIZE)); 994 } 995 996 int64_t 997 init_boot_area_size(int64_t value, off_t avg_vol_size) 998 { 999 if (value == 0) { 1000 value = HAMMER_BOOT_NOMBYTES; 1001 while (value > avg_vol_size / HAMMER_MAX_VOLUMES) 1002 value >>= 1; 1003 } 1004 1005 if (value < HAMMER_BOOT_MINBYTES) 1006 value = HAMMER_BOOT_MINBYTES; 1007 else if (value > HAMMER_BOOT_MAXBYTES) 1008 value = HAMMER_BOOT_MAXBYTES; 1009 1010 return(value); 1011 } 1012 1013 int64_t 1014 init_memory_log_size(int64_t value, off_t avg_vol_size) 1015 { 1016 if (value == 0) { 1017 value = HAMMER_MEM_NOMBYTES; 1018 while (value > avg_vol_size / HAMMER_MAX_VOLUMES) 1019 value >>= 1; 1020 } 1021 1022 if (value < HAMMER_MEM_MINBYTES) 1023 value = HAMMER_MEM_MINBYTES; 1024 else if (value > HAMMER_MEM_MAXBYTES) 1025 value = HAMMER_MEM_MAXBYTES; 1026 1027 return(value); 1028 } 1029