1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/diskslice.h> 36 #include <sys/diskmbr.h> 37 38 #include "hammer_util.h" 39 40 static void check_volume(volume_info_t volume); 41 static void get_buffer_readahead(buffer_info_t base); 42 static __inline int readhammervol(volume_info_t volume); 43 static __inline int readhammerbuf(buffer_info_t buffer); 44 static __inline int writehammervol(volume_info_t volume); 45 static __inline int writehammerbuf(buffer_info_t buffer); 46 47 uuid_t Hammer_FSType; 48 uuid_t Hammer_FSId; 49 int UseReadBehind = -4; 50 int UseReadAhead = 4; 51 int DebugOpt; 52 uint32_t HammerVersion = -1; 53 54 TAILQ_HEAD(volume_list, volume_info); 55 static struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList); 56 static int valid_hammer_volumes; 57 58 static __inline 59 int 60 buffer_hash(hammer_off_t zone2_offset) 61 { 62 int hi; 63 64 hi = (int)(zone2_offset / HAMMER_BUFSIZE) & HAMMER_BUFLISTMASK; 65 return(hi); 66 } 67 68 static 69 buffer_info_t 70 find_buffer(hammer_off_t zone2_offset) 71 { 72 volume_info_t volume; 73 buffer_info_t buffer; 74 int hi; 75 76 volume = get_volume(HAMMER_VOL_DECODE(zone2_offset)); 77 assert(volume); 78 79 hi = buffer_hash(zone2_offset); 80 TAILQ_FOREACH(buffer, &volume->buffer_lists[hi], entry) { 81 if (buffer->zone2_offset == zone2_offset) 82 return(buffer); 83 } 84 return(NULL); 85 } 86 87 static 88 volume_info_t 89 __alloc_volume(const char *volname, int oflags) 90 { 91 volume_info_t volume; 92 int i; 93 94 volume = calloc(1, sizeof(*volume)); 95 volume->vol_no = -1; 96 volume->rdonly = (oflags == O_RDONLY); 97 volume->name = strdup(volname); 98 volume->fd = open(volume->name, oflags); 99 if (volume->fd < 0) { 100 err(1, "alloc_volume: Failed to open %s", volume->name); 101 /* not reached */ 102 } 103 check_volume(volume); 104 105 volume->ondisk = calloc(1, HAMMER_BUFSIZE); 106 107 for (i = 0; i < HAMMER_BUFLISTS; ++i) 108 TAILQ_INIT(&volume->buffer_lists[i]); 109 110 return(volume); 111 } 112 113 static 114 void 115 __add_volume(const volume_info_t volume) 116 { 117 volume_info_t scan; 118 struct stat st1, st2; 119 120 if (fstat(volume->fd, &st1) != 0) { 121 errx(1, "add_volume: %s: Failed to stat", volume->name); 122 /* not reached */ 123 } 124 125 TAILQ_FOREACH(scan, &VolList, entry) { 126 if (scan->vol_no == volume->vol_no) { 127 errx(1, "add_volume: %s: Duplicate volume number %d " 128 "against %s", 129 volume->name, volume->vol_no, scan->name); 130 /* not reached */ 131 } 132 if (fstat(scan->fd, &st2) != 0) { 133 errx(1, "add_volume: %s: Failed to stat %s", 134 volume->name, scan->name); 135 /* not reached */ 136 } 137 if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev)) { 138 errx(1, "add_volume: %s: Specified more than once", 139 volume->name); 140 /* not reached */ 141 } 142 } 143 144 TAILQ_INSERT_TAIL(&VolList, volume, entry); 145 } 146 147 static 148 void 149 __verify_volume(const volume_info_t volume) 150 { 151 hammer_volume_ondisk_t ondisk = volume->ondisk; 152 153 if (ondisk->vol_signature != HAMMER_FSBUF_VOLUME) { 154 errx(1, "verify_volume: Invalid volume signature %016jx", 155 ondisk->vol_signature); 156 /* not reached */ 157 } 158 if (ondisk->vol_rootvol != HAMMER_ROOT_VOLNO) { 159 errx(1, "verify_volume: Invalid root volume# %d", 160 ondisk->vol_rootvol); 161 /* not reached */ 162 } 163 if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType))) { 164 errx(1, "verify_volume: %s: Header does not indicate " 165 "that this is a HAMMER volume", volume->name); 166 /* not reached */ 167 } 168 if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId))) { 169 errx(1, "verify_volume: %s: FSId does not match other volumes!", 170 volume->name); 171 /* not reached */ 172 } 173 if (ondisk->vol_version < HAMMER_VOL_VERSION_MIN || 174 ondisk->vol_version >= HAMMER_VOL_VERSION_WIP) { 175 errx(1, "verify_volume: %s: Invalid volume version %u", 176 volume->name, ondisk->vol_version); 177 /* not reached */ 178 } 179 } 180 181 /* 182 * Initialize a volume structure and ondisk vol_no field. 183 */ 184 volume_info_t 185 init_volume(const char *filename, int oflags, int32_t vol_no) 186 { 187 volume_info_t volume; 188 189 volume = __alloc_volume(filename, oflags); 190 volume->vol_no = volume->ondisk->vol_no = vol_no; 191 192 __add_volume(volume); 193 194 return(volume); 195 } 196 197 /* 198 * Initialize a volume structure and read ondisk volume header. 199 */ 200 volume_info_t 201 load_volume(const char *filename, int oflags, int verify_volume) 202 { 203 volume_info_t volume; 204 int n; 205 206 volume = __alloc_volume(filename, oflags); 207 208 n = readhammervol(volume); 209 if (n == -1) { 210 err(1, "load_volume: %s: Read failed at offset 0", 211 volume->name); 212 /* not reached */ 213 } 214 volume->vol_no = volume->ondisk->vol_no; 215 if (volume->vol_no == HAMMER_ROOT_VOLNO) 216 HammerVersion = volume->ondisk->vol_version; 217 218 if (valid_hammer_volumes++ == 0) 219 Hammer_FSId = volume->ondisk->vol_fsid; 220 if (verify_volume) 221 __verify_volume(volume); 222 223 __add_volume(volume); 224 225 return(volume); 226 } 227 228 /* 229 * Check basic volume characteristics. 230 */ 231 static 232 void 233 check_volume(volume_info_t volume) 234 { 235 struct partinfo pinfo; 236 struct stat st; 237 238 /* 239 * Allow the formatting of block devices or regular files 240 */ 241 if (ioctl(volume->fd, DIOCGPART, &pinfo) < 0) { 242 if (fstat(volume->fd, &st) < 0) { 243 err(1, "Unable to stat %s", volume->name); 244 /* not reached */ 245 } 246 if (S_ISREG(st.st_mode)) { 247 volume->size = st.st_size; 248 volume->type = "REGFILE"; 249 } else { 250 errx(1, "Unsupported file type for %s", volume->name); 251 /* not reached */ 252 } 253 } else { 254 /* 255 * When formatting a block device as a HAMMER volume the 256 * sector size must be compatible. HAMMER uses 16384 byte 257 * filesystem buffers. 258 */ 259 if (pinfo.reserved_blocks) { 260 errx(1, "HAMMER cannot be placed in a partition " 261 "which overlaps the disklabel or MBR"); 262 /* not reached */ 263 } 264 if (pinfo.media_blksize > HAMMER_BUFSIZE || 265 HAMMER_BUFSIZE % pinfo.media_blksize) { 266 errx(1, "A media sector size of %d is not supported", 267 pinfo.media_blksize); 268 /* not reached */ 269 } 270 271 volume->size = pinfo.media_size; 272 volume->device_offset = pinfo.media_offset; 273 volume->type = "DEVICE"; 274 } 275 } 276 277 int 278 is_regfile(const volume_info_t volume) 279 { 280 return(strcmp(volume->type, "REGFILE") ? 0 : 1); 281 } 282 283 void 284 assert_volume_offset(const volume_info_t volume) 285 { 286 assert(hammer_is_zone_raw_buffer(volume->vol_free_off)); 287 assert(hammer_is_zone_raw_buffer(volume->vol_free_end)); 288 if (volume->vol_free_off >= volume->vol_free_end) { 289 errx(1, "Ran out of room, filesystem too small"); 290 /* not reached */ 291 } 292 } 293 294 volume_info_t 295 get_volume(int32_t vol_no) 296 { 297 volume_info_t volume; 298 299 TAILQ_FOREACH(volume, &VolList, entry) { 300 if (volume->vol_no == vol_no) 301 break; 302 } 303 304 return(volume); 305 } 306 307 volume_info_t 308 get_root_volume(void) 309 { 310 return(get_volume(HAMMER_ROOT_VOLNO)); 311 } 312 313 static 314 hammer_off_t 315 __blockmap_xlate_to_zone2(hammer_off_t buf_offset) 316 { 317 hammer_off_t zone2_offset; 318 int error = 0; 319 320 if (hammer_is_zone_raw_buffer(buf_offset)) 321 zone2_offset = buf_offset; 322 else 323 zone2_offset = blockmap_lookup(buf_offset, &error); 324 325 if (error) 326 return(HAMMER_OFF_BAD); 327 assert(hammer_is_zone_raw_buffer(zone2_offset)); 328 329 return(zone2_offset); 330 } 331 332 static 333 buffer_info_t 334 __alloc_buffer(hammer_off_t zone2_offset, int isnew) 335 { 336 volume_info_t volume; 337 buffer_info_t buffer; 338 int hi; 339 340 volume = get_volume(HAMMER_VOL_DECODE(zone2_offset)); 341 assert(volume != NULL); 342 343 buffer = calloc(1, sizeof(*buffer)); 344 buffer->zone2_offset = zone2_offset; 345 buffer->raw_offset = hammer_xlate_to_phys(volume->ondisk, zone2_offset); 346 buffer->volume = volume; 347 buffer->ondisk = calloc(1, HAMMER_BUFSIZE); 348 349 if (isnew <= 0) { 350 if (readhammerbuf(buffer) == -1) { 351 err(1, "Failed to read %s:%016jx at %016jx", 352 volume->name, 353 (intmax_t)buffer->zone2_offset, 354 (intmax_t)buffer->raw_offset); 355 /* not reached */ 356 } 357 } 358 359 hi = buffer_hash(zone2_offset); 360 TAILQ_INSERT_TAIL(&volume->buffer_lists[hi], buffer, entry); 361 hammer_cache_add(&buffer->cache); 362 363 return(buffer); 364 } 365 366 /* 367 * Acquire the 16KB buffer for specified zone offset. 368 */ 369 static 370 buffer_info_t 371 get_buffer(hammer_off_t buf_offset, int isnew) 372 { 373 buffer_info_t buffer; 374 hammer_off_t zone2_offset; 375 int dora = 0; 376 377 zone2_offset = __blockmap_xlate_to_zone2(buf_offset); 378 if (zone2_offset == HAMMER_OFF_BAD) 379 return(NULL); 380 381 zone2_offset &= ~HAMMER_BUFMASK64; 382 buffer = find_buffer(zone2_offset); 383 384 if (buffer == NULL) { 385 buffer = __alloc_buffer(zone2_offset, isnew); 386 dora = (isnew == 0); 387 } else { 388 assert(isnew != -1); 389 hammer_cache_used(&buffer->cache); 390 } 391 assert(buffer->ondisk != NULL); 392 393 ++buffer->cache.refs; 394 hammer_cache_flush(); 395 396 if (isnew > 0) { 397 assert(buffer->cache.modified == 0); 398 bzero(buffer->ondisk, HAMMER_BUFSIZE); 399 buffer->cache.modified = 1; 400 } 401 if (dora) 402 get_buffer_readahead(buffer); 403 return(buffer); 404 } 405 406 static 407 void 408 get_buffer_readahead(const buffer_info_t base) 409 { 410 buffer_info_t buffer; 411 volume_info_t volume; 412 hammer_off_t zone2_offset; 413 int64_t raw_offset; 414 int ri = UseReadBehind; 415 int re = UseReadAhead; 416 417 raw_offset = base->raw_offset + ri * HAMMER_BUFSIZE; 418 volume = base->volume; 419 420 while (ri < re) { 421 if (raw_offset >= volume->ondisk->vol_buf_end) 422 break; 423 if (raw_offset < volume->ondisk->vol_buf_beg || ri == 0) { 424 ++ri; 425 raw_offset += HAMMER_BUFSIZE; 426 continue; 427 } 428 zone2_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 429 raw_offset - volume->ondisk->vol_buf_beg); 430 buffer = find_buffer(zone2_offset); 431 if (buffer == NULL) { 432 /* call with -1 to prevent another readahead */ 433 buffer = get_buffer(zone2_offset, -1); 434 rel_buffer(buffer); 435 } 436 ++ri; 437 raw_offset += HAMMER_BUFSIZE; 438 } 439 } 440 441 void 442 rel_buffer(buffer_info_t buffer) 443 { 444 volume_info_t volume; 445 int hi; 446 447 if (buffer == NULL) 448 return; 449 assert(buffer->cache.refs > 0); 450 if (--buffer->cache.refs == 0) { 451 if (buffer->cache.delete) { 452 hi = buffer_hash(buffer->zone2_offset); 453 volume = buffer->volume; 454 if (buffer->cache.modified) 455 flush_buffer(buffer); 456 TAILQ_REMOVE(&volume->buffer_lists[hi], buffer, entry); 457 hammer_cache_del(&buffer->cache); 458 free(buffer->ondisk); 459 free(buffer); 460 } 461 } 462 } 463 464 /* 465 * Retrieve a pointer to a buffer data given a buffer offset. The underlying 466 * bufferp is freed if isnew or the offset is out of range of the cached data. 467 * If bufferp is freed a referenced buffer is loaded into it. 468 */ 469 void * 470 get_buffer_data(hammer_off_t buf_offset, buffer_info_t *bufferp, int isnew) 471 { 472 hammer_off_t xor; 473 474 if (*bufferp != NULL) { 475 /* XXX xor is always non zero for indirect zones */ 476 xor = HAMMER_OFF_LONG_ENCODE(buf_offset) ^ 477 HAMMER_OFF_LONG_ENCODE((*bufferp)->zone2_offset); 478 if (isnew > 0 || (xor & ~HAMMER_BUFMASK64)) { 479 rel_buffer(*bufferp); 480 *bufferp = NULL; 481 } 482 } 483 484 if (*bufferp == NULL) { 485 *bufferp = get_buffer(buf_offset, isnew); 486 if (*bufferp == NULL) 487 return(NULL); 488 } 489 490 return(((char *)(*bufferp)->ondisk) + 491 ((int32_t)buf_offset & HAMMER_BUFMASK)); 492 } 493 494 /* 495 * Allocate HAMMER elements - B-Tree nodes 496 */ 497 hammer_node_ondisk_t 498 alloc_btree_node(hammer_off_t *offp, buffer_info_t *data_bufferp) 499 { 500 hammer_node_ondisk_t node; 501 502 node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node), 503 offp, data_bufferp); 504 bzero(node, sizeof(*node)); 505 return(node); 506 } 507 508 /* 509 * Allocate HAMMER elements - meta data (inode, direntry, PFS, etc) 510 */ 511 void * 512 alloc_meta_element(hammer_off_t *offp, int32_t data_len, 513 buffer_info_t *data_bufferp) 514 { 515 void *data; 516 517 data = alloc_blockmap(HAMMER_ZONE_META_INDEX, data_len, 518 offp, data_bufferp); 519 bzero(data, data_len); 520 return(data); 521 } 522 523 /* 524 * Format a new blockmap. This is mostly a degenerate case because 525 * all allocations are now actually done from the freemap. 526 */ 527 void 528 format_blockmap(volume_info_t root_vol, int zone, hammer_off_t offset) 529 { 530 hammer_blockmap_t blockmap; 531 hammer_off_t zone_base; 532 533 /* Only root volume needs formatting */ 534 assert(root_vol->vol_no == HAMMER_ROOT_VOLNO); 535 536 assert(hammer_is_index_record(zone)); 537 538 blockmap = &root_vol->ondisk->vol0_blockmap[zone]; 539 zone_base = HAMMER_ZONE_ENCODE(zone, offset); 540 541 bzero(blockmap, sizeof(*blockmap)); 542 blockmap->phys_offset = 0; 543 blockmap->first_offset = zone_base; 544 blockmap->next_offset = zone_base; 545 blockmap->alloc_offset = HAMMER_ENCODE(zone, 255, -1); 546 hammer_crc_set_blockmap(HammerVersion, blockmap); 547 } 548 549 /* 550 * Format a new freemap. Set all layer1 entries to UNAVAIL. The initialize 551 * code will load each volume's freemap. 552 */ 553 void 554 format_freemap(volume_info_t root_vol) 555 { 556 buffer_info_t buffer = NULL; 557 hammer_off_t layer1_offset; 558 hammer_blockmap_t blockmap; 559 hammer_blockmap_layer1_t layer1; 560 int i, isnew; 561 562 /* Only root volume needs formatting */ 563 assert(root_vol->vol_no == HAMMER_ROOT_VOLNO); 564 565 layer1_offset = bootstrap_bigblock(root_vol); 566 for (i = 0; i < HAMMER_BIGBLOCK_SIZE; i += sizeof(*layer1)) { 567 isnew = ((i % HAMMER_BUFSIZE) == 0); 568 layer1 = get_buffer_data(layer1_offset + i, &buffer, isnew); 569 bzero(layer1, sizeof(*layer1)); 570 layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL; 571 layer1->blocks_free = 0; 572 hammer_crc_set_layer1(HammerVersion, layer1); 573 } 574 assert(i == HAMMER_BIGBLOCK_SIZE); 575 rel_buffer(buffer); 576 577 blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 578 bzero(blockmap, sizeof(*blockmap)); 579 blockmap->phys_offset = layer1_offset; 580 blockmap->first_offset = 0; 581 blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0); 582 blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1); 583 hammer_crc_set_blockmap(HammerVersion, blockmap); 584 } 585 586 /* 587 * Load the volume's remaining free space into the freemap. 588 * 589 * Returns the number of big-blocks available. 590 */ 591 int64_t 592 initialize_freemap(volume_info_t volume) 593 { 594 volume_info_t root_vol; 595 buffer_info_t buffer1 = NULL; 596 buffer_info_t buffer2 = NULL; 597 hammer_blockmap_layer1_t layer1; 598 hammer_blockmap_layer2_t layer2; 599 hammer_off_t layer1_offset; 600 hammer_off_t layer2_offset; 601 hammer_off_t phys_offset; 602 hammer_off_t block_offset; 603 hammer_off_t aligned_vol_free_end; 604 hammer_blockmap_t freemap; 605 int64_t count = 0; 606 int64_t layer1_count = 0; 607 608 root_vol = get_root_volume(); 609 610 assert_volume_offset(volume); 611 aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end); 612 613 printf("initialize freemap volume %d\n", volume->vol_no); 614 615 /* 616 * Initialize the freemap. First preallocate the big-blocks required 617 * to implement layer2. This preallocation is a bootstrap allocation 618 * using blocks from the target volume. 619 */ 620 freemap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 621 622 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 623 phys_offset < aligned_vol_free_end; 624 phys_offset += HAMMER_BLOCKMAP_LAYER2) { 625 layer1_offset = freemap->phys_offset + 626 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset); 627 layer1 = get_buffer_data(layer1_offset, &buffer1, 0); 628 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) { 629 layer1->phys_offset = bootstrap_bigblock(volume); 630 layer1->blocks_free = 0; 631 buffer1->cache.modified = 1; 632 hammer_crc_set_layer1(HammerVersion, layer1); 633 } 634 } 635 636 /* 637 * Now fill everything in. 638 */ 639 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 640 phys_offset < aligned_vol_free_end; 641 phys_offset += HAMMER_BLOCKMAP_LAYER2) { 642 layer1_count = 0; 643 layer1_offset = freemap->phys_offset + 644 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset); 645 layer1 = get_buffer_data(layer1_offset, &buffer1, 0); 646 assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL); 647 648 for (block_offset = 0; 649 block_offset < HAMMER_BLOCKMAP_LAYER2; 650 block_offset += HAMMER_BIGBLOCK_SIZE) { 651 layer2_offset = layer1->phys_offset + 652 HAMMER_BLOCKMAP_LAYER2_OFFSET(block_offset); 653 layer2 = get_buffer_data(layer2_offset, &buffer2, 0); 654 bzero(layer2, sizeof(*layer2)); 655 656 if (phys_offset + block_offset < volume->vol_free_off) { 657 /* 658 * Big-blocks already allocated as part 659 * of the freemap bootstrap. 660 */ 661 layer2->zone = HAMMER_ZONE_FREEMAP_INDEX; 662 layer2->append_off = HAMMER_BIGBLOCK_SIZE; 663 layer2->bytes_free = 0; 664 } else if (phys_offset + block_offset < volume->vol_free_end) { 665 layer2->zone = 0; 666 layer2->append_off = 0; 667 layer2->bytes_free = HAMMER_BIGBLOCK_SIZE; 668 ++count; 669 ++layer1_count; 670 } else { 671 layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX; 672 layer2->append_off = HAMMER_BIGBLOCK_SIZE; 673 layer2->bytes_free = 0; 674 } 675 hammer_crc_set_layer2(HammerVersion, layer2); 676 buffer2->cache.modified = 1; 677 } 678 679 layer1->blocks_free += layer1_count; 680 hammer_crc_set_layer1(HammerVersion, layer1); 681 buffer1->cache.modified = 1; 682 } 683 684 rel_buffer(buffer1); 685 rel_buffer(buffer2); 686 return(count); 687 } 688 689 /* 690 * Returns the number of big-blocks available for filesystem data and undos 691 * without formatting. 692 */ 693 int64_t 694 count_freemap(const volume_info_t volume) 695 { 696 hammer_off_t phys_offset; 697 hammer_off_t vol_free_off; 698 hammer_off_t aligned_vol_free_end; 699 int64_t count = 0; 700 701 vol_free_off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 702 703 assert_volume_offset(volume); 704 aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end); 705 706 if (volume->vol_no == HAMMER_ROOT_VOLNO) 707 vol_free_off += HAMMER_BIGBLOCK_SIZE; 708 709 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 710 phys_offset < aligned_vol_free_end; 711 phys_offset += HAMMER_BLOCKMAP_LAYER2) { 712 vol_free_off += HAMMER_BIGBLOCK_SIZE; 713 } 714 715 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 716 phys_offset < aligned_vol_free_end; 717 phys_offset += HAMMER_BIGBLOCK_SIZE) { 718 if (phys_offset < vol_free_off) 719 ; 720 else if (phys_offset < volume->vol_free_end) 721 ++count; 722 } 723 724 return(count); 725 } 726 727 /* 728 * Format the undomap for the root volume. 729 */ 730 void 731 format_undomap(volume_info_t root_vol, int64_t *undo_buffer_size) 732 { 733 hammer_off_t undo_limit; 734 hammer_blockmap_t blockmap; 735 hammer_volume_ondisk_t ondisk; 736 buffer_info_t buffer = NULL; 737 hammer_off_t scan; 738 int n; 739 int limit_index; 740 uint32_t seqno; 741 742 /* Only root volume needs formatting */ 743 assert(root_vol->vol_no == HAMMER_ROOT_VOLNO); 744 ondisk = root_vol->ondisk; 745 746 /* 747 * Size the undo buffer in multiples of HAMMER_BIGBLOCK_SIZE, 748 * up to HAMMER_MAX_UNDO_BIGBLOCKS big-blocks. 749 * Size to approximately 0.1% of the disk. 750 * 751 * The minimum UNDO fifo size is 512MB, or approximately 1% of 752 * the recommended 50G disk. 753 * 754 * Changing this minimum is rather dangerous as complex filesystem 755 * operations can cause the UNDO FIFO to fill up otherwise. 756 */ 757 undo_limit = *undo_buffer_size; 758 if (undo_limit == 0) { 759 undo_limit = HAMMER_VOL_BUF_SIZE(ondisk) / 1000; 760 if (undo_limit < HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS) 761 undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS; 762 } 763 undo_limit = HAMMER_BIGBLOCK_DOALIGN(undo_limit); 764 if (undo_limit < HAMMER_BIGBLOCK_SIZE) 765 undo_limit = HAMMER_BIGBLOCK_SIZE; 766 if (undo_limit > HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS) 767 undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS; 768 *undo_buffer_size = undo_limit; 769 770 blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; 771 bzero(blockmap, sizeof(*blockmap)); 772 blockmap->phys_offset = HAMMER_BLOCKMAP_UNAVAIL; 773 blockmap->first_offset = HAMMER_ENCODE_UNDO(0); 774 blockmap->next_offset = blockmap->first_offset; 775 blockmap->alloc_offset = HAMMER_ENCODE_UNDO(undo_limit); 776 hammer_crc_set_blockmap(HammerVersion, blockmap); 777 778 limit_index = undo_limit / HAMMER_BIGBLOCK_SIZE; 779 assert(limit_index <= HAMMER_MAX_UNDO_BIGBLOCKS); 780 781 for (n = 0; n < limit_index; ++n) 782 ondisk->vol0_undo_array[n] = alloc_undo_bigblock(root_vol); 783 while (n < HAMMER_MAX_UNDO_BIGBLOCKS) 784 ondisk->vol0_undo_array[n++] = HAMMER_BLOCKMAP_UNAVAIL; 785 786 /* 787 * Pre-initialize the UNDO blocks (HAMMER version 4+) 788 */ 789 printf("initializing the undo map (%jd MB)\n", 790 (intmax_t)HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset) / 791 (1024 * 1024)); 792 793 scan = blockmap->first_offset; 794 seqno = 0; 795 796 while (scan < blockmap->alloc_offset) { 797 hammer_fifo_head_t head; 798 hammer_fifo_tail_t tail; 799 int bytes = HAMMER_UNDO_ALIGN; 800 int isnew = ((scan & HAMMER_BUFMASK64) == 0); 801 802 head = get_buffer_data(scan, &buffer, isnew); 803 buffer->cache.modified = 1; 804 tail = (void *)((char *)head + bytes - sizeof(*tail)); 805 806 bzero(head, bytes); 807 head->hdr_signature = HAMMER_HEAD_SIGNATURE; 808 head->hdr_type = HAMMER_HEAD_TYPE_DUMMY; 809 head->hdr_size = bytes; 810 head->hdr_seq = seqno++; 811 812 tail->tail_signature = HAMMER_TAIL_SIGNATURE; 813 tail->tail_type = HAMMER_HEAD_TYPE_DUMMY; 814 tail->tail_size = bytes; 815 816 hammer_crc_set_fifo_head(HammerVersion, head, bytes); 817 818 scan += bytes; 819 } 820 rel_buffer(buffer); 821 } 822 823 const char *zone_labels[] = { 824 "", /* 0 */ 825 "raw_volume", /* 1 */ 826 "raw_buffer", /* 2 */ 827 "undo", /* 3 */ 828 "freemap", /* 4 */ 829 "", /* 5 */ 830 "", /* 6 */ 831 "", /* 7 */ 832 "btree", /* 8 */ 833 "meta", /* 9 */ 834 "large_data", /* 10 */ 835 "small_data", /* 11 */ 836 "", /* 12 */ 837 "", /* 13 */ 838 "", /* 14 */ 839 "unavail", /* 15 */ 840 }; 841 842 void 843 print_blockmap(const volume_info_t volume) 844 { 845 hammer_blockmap_t blockmap; 846 hammer_volume_ondisk_t ondisk; 847 int64_t size, used; 848 int i; 849 #define INDENT "" 850 851 ondisk = volume->ondisk; 852 printf(INDENT"vol_label\t%s\n", ondisk->vol_label); 853 printf(INDENT"vol_count\t%d\n", ondisk->vol_count); 854 printf(INDENT"vol_bot_beg\t%s\n", sizetostr(ondisk->vol_bot_beg)); 855 printf(INDENT"vol_mem_beg\t%s\n", sizetostr(ondisk->vol_mem_beg)); 856 printf(INDENT"vol_buf_beg\t%s\n", sizetostr(ondisk->vol_buf_beg)); 857 printf(INDENT"vol_buf_end\t%s\n", sizetostr(ondisk->vol_buf_end)); 858 printf(INDENT"vol0_next_tid\t%016jx\n", 859 (uintmax_t)ondisk->vol0_next_tid); 860 861 blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; 862 size = HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset); 863 if (blockmap->first_offset <= blockmap->next_offset) 864 used = blockmap->next_offset - blockmap->first_offset; 865 else 866 used = blockmap->alloc_offset - blockmap->first_offset + 867 HAMMER_OFF_LONG_ENCODE(blockmap->next_offset); 868 printf(INDENT"undo_size\t%s\n", sizetostr(size)); 869 printf(INDENT"undo_used\t%s\n", sizetostr(used)); 870 871 printf(INDENT"zone # " 872 "phys first next alloc\n"); 873 for (i = 0; i < HAMMER_MAX_ZONES; i++) { 874 blockmap = &ondisk->vol0_blockmap[i]; 875 printf(INDENT"zone %-2d %-10s %016jx %016jx %016jx %016jx\n", 876 i, zone_labels[i], 877 (uintmax_t)blockmap->phys_offset, 878 (uintmax_t)blockmap->first_offset, 879 (uintmax_t)blockmap->next_offset, 880 (uintmax_t)blockmap->alloc_offset); 881 } 882 } 883 884 /* 885 * Flush various tracking structures to disk 886 */ 887 void 888 flush_all_volumes(void) 889 { 890 volume_info_t volume; 891 892 TAILQ_FOREACH(volume, &VolList, entry) 893 flush_volume(volume); 894 } 895 896 void 897 flush_volume(volume_info_t volume) 898 { 899 buffer_info_t buffer; 900 int i; 901 902 for (i = 0; i < HAMMER_BUFLISTS; ++i) { 903 TAILQ_FOREACH(buffer, &volume->buffer_lists[i], entry) 904 flush_buffer(buffer); 905 } 906 if (writehammervol(volume) == -1) { 907 err(1, "Write volume %d (%s)", volume->vol_no, volume->name); 908 /* not reached */ 909 } 910 } 911 912 void 913 flush_buffer(buffer_info_t buffer) 914 { 915 volume_info_t volume; 916 917 volume = buffer->volume; 918 if (writehammerbuf(buffer) == -1) { 919 err(1, "Write volume %d (%s)", volume->vol_no, volume->name); 920 /* not reached */ 921 } 922 buffer->cache.modified = 0; 923 } 924 925 /* 926 * Core I/O operations 927 */ 928 static 929 int 930 __read(volume_info_t volume, void *data, int64_t offset, int size) 931 { 932 ssize_t n; 933 934 n = pread(volume->fd, data, size, offset); 935 if (n != size) 936 return(-1); 937 return(0); 938 } 939 940 static __inline 941 int 942 readhammervol(volume_info_t volume) 943 { 944 return(__read(volume, volume->ondisk, 0, HAMMER_BUFSIZE)); 945 } 946 947 static __inline 948 int 949 readhammerbuf(buffer_info_t buffer) 950 { 951 return(__read(buffer->volume, buffer->ondisk, buffer->raw_offset, 952 HAMMER_BUFSIZE)); 953 } 954 955 static 956 int 957 __write(volume_info_t volume, const void *data, int64_t offset, int size) 958 { 959 ssize_t n; 960 961 if (volume->rdonly) 962 return(0); 963 964 n = pwrite(volume->fd, data, size, offset); 965 if (n != size) 966 return(-1); 967 return(0); 968 } 969 970 static __inline 971 int 972 writehammervol(volume_info_t volume) 973 { 974 return(__write(volume, volume->ondisk, 0, HAMMER_BUFSIZE)); 975 } 976 977 static __inline 978 int 979 writehammerbuf(buffer_info_t buffer) 980 { 981 return(__write(buffer->volume, buffer->ondisk, buffer->raw_offset, 982 HAMMER_BUFSIZE)); 983 } 984 985 int64_t 986 init_boot_area_size(int64_t value, off_t avg_vol_size) 987 { 988 if (value == 0) { 989 value = HAMMER_BOOT_NOMBYTES; 990 while (value > avg_vol_size / HAMMER_MAX_VOLUMES) 991 value >>= 1; 992 } 993 994 if (value < HAMMER_BOOT_MINBYTES) 995 value = HAMMER_BOOT_MINBYTES; 996 else if (value > HAMMER_BOOT_MAXBYTES) 997 value = HAMMER_BOOT_MAXBYTES; 998 999 return(value); 1000 } 1001 1002 int64_t 1003 init_memory_log_size(int64_t value, off_t avg_vol_size) 1004 { 1005 if (value == 0) { 1006 value = HAMMER_MEM_NOMBYTES; 1007 while (value > avg_vol_size / HAMMER_MAX_VOLUMES) 1008 value >>= 1; 1009 } 1010 1011 if (value < HAMMER_MEM_MINBYTES) 1012 value = HAMMER_MEM_MINBYTES; 1013 else if (value > HAMMER_MEM_MAXBYTES) 1014 value = HAMMER_MEM_MAXBYTES; 1015 1016 return(value); 1017 } 1018