1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/stat.h> 37 #include <assert.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <stdarg.h> 41 #include <string.h> 42 #include <unistd.h> 43 #include <stddef.h> 44 #include <err.h> 45 #include <fcntl.h> 46 #include "hammer_util.h" 47 48 static void *alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp, 49 struct buffer_info **bufferp); 50 static hammer_off_t alloc_bigblock(struct volume_info *volume, int zone); 51 static void get_buffer_readahead(struct buffer_info *base); 52 static __inline void *get_ondisk(hammer_off_t buf_offset, 53 struct buffer_info **bufferp, int isnew); 54 static int readhammerbuf(struct volume_info *vol, void *data, 55 int64_t offset); 56 static void writehammerbuf(struct volume_info *vol, const void *data, 57 int64_t offset); 58 59 int DebugOpt; 60 61 uuid_t Hammer_FSType; 62 uuid_t Hammer_FSId; 63 int64_t BootAreaSize; 64 int64_t MemAreaSize; 65 int64_t UndoBufferSize; 66 int UsingSuperClusters; 67 int NumVolumes; 68 int RootVolNo = -1; 69 int UseReadBehind = -4; 70 int UseReadAhead = 4; 71 int AssertOnFailure = 1; 72 struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList); 73 74 static __inline 75 int 76 buffer_hash(hammer_off_t buf_offset) 77 { 78 int hi; 79 80 hi = (int)(buf_offset / HAMMER_BUFSIZE) & HAMMER_BUFLISTMASK; 81 return(hi); 82 } 83 84 static struct buffer_info* 85 find_buffer(struct volume_info *volume, hammer_off_t buf_offset) 86 { 87 int hi; 88 struct buffer_info *buf; 89 90 hi = buffer_hash(buf_offset); 91 TAILQ_FOREACH(buf, &volume->buffer_lists[hi], entry) 92 if (buf->buf_offset == buf_offset) 93 return(buf); 94 return(NULL); 95 } 96 97 /* 98 * Lookup the requested information structure and related on-disk buffer. 99 * Missing structures are created. 100 */ 101 struct volume_info * 102 setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags) 103 { 104 struct volume_info *vol; 105 struct volume_info *scan; 106 struct hammer_volume_ondisk *ondisk; 107 int i, n; 108 struct stat st1, st2; 109 110 /* 111 * Allocate the volume structure 112 */ 113 vol = malloc(sizeof(*vol)); 114 bzero(vol, sizeof(*vol)); 115 for (i = 0; i < HAMMER_BUFLISTS; ++i) 116 TAILQ_INIT(&vol->buffer_lists[i]); 117 vol->name = strdup(filename); 118 vol->fd = open(vol->name, oflags); 119 if (vol->fd < 0) { 120 err(1, "setup_volume: %s: Open failed", vol->name); 121 } 122 123 /* 124 * Read or initialize the volume header 125 */ 126 vol->ondisk = ondisk = malloc(HAMMER_BUFSIZE); 127 if (isnew > 0) { 128 bzero(ondisk, HAMMER_BUFSIZE); 129 } else { 130 n = readhammerbuf(vol, ondisk, 0); 131 if (n == -1) { 132 err(1, "setup_volume: %s: Read failed at offset 0", 133 vol->name); 134 } 135 vol_no = ondisk->vol_no; 136 if (RootVolNo < 0) { 137 RootVolNo = ondisk->vol_rootvol; 138 } else if (RootVolNo != (int)ondisk->vol_rootvol) { 139 errx(1, "setup_volume: %s: root volume disagreement: " 140 "%d vs %d", 141 vol->name, RootVolNo, ondisk->vol_rootvol); 142 } 143 144 if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType)) != 0) { 145 errx(1, "setup_volume: %s: Header does not indicate " 146 "that this is a hammer volume", vol->name); 147 } 148 if (TAILQ_EMPTY(&VolList)) { 149 Hammer_FSId = vol->ondisk->vol_fsid; 150 } else if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId)) != 0) { 151 errx(1, "setup_volume: %s: FSId does match other " 152 "volumes!", vol->name); 153 } 154 } 155 vol->vol_no = vol_no; 156 157 if (isnew > 0) { 158 vol->cache.modified = 1; 159 } 160 161 if (fstat(vol->fd, &st1) != 0){ 162 errx(1, "setup_volume: %s: Failed to stat", vol->name); 163 } 164 165 /* 166 * Link the volume structure in 167 */ 168 TAILQ_FOREACH(scan, &VolList, entry) { 169 if (scan->vol_no == vol_no) { 170 errx(1, "setup_volume: %s: Duplicate volume number %d " 171 "against %s", vol->name, vol_no, scan->name); 172 } 173 if (fstat(scan->fd, &st2) != 0){ 174 errx(1, "setup_volume: %s: Failed to stat %s", 175 vol->name, scan->name); 176 } 177 if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev)) { 178 errx(1, "setup_volume: %s: Specified more than once", 179 vol->name); 180 } 181 } 182 TAILQ_INSERT_TAIL(&VolList, vol, entry); 183 return(vol); 184 } 185 186 struct volume_info * 187 get_volume(int32_t vol_no) 188 { 189 struct volume_info *vol; 190 191 TAILQ_FOREACH(vol, &VolList, entry) { 192 if (vol->vol_no == vol_no) 193 break; 194 } 195 if (vol == NULL) { 196 if (AssertOnFailure) 197 errx(1, "get_volume: Volume %d does not exist!", 198 vol_no); 199 return(NULL); 200 } 201 ++vol->cache.refs; 202 /* not added to or removed from hammer cache */ 203 return(vol); 204 } 205 206 void 207 rel_volume(struct volume_info *volume) 208 { 209 if (volume == NULL) 210 return; 211 /* not added to or removed from hammer cache */ 212 --volume->cache.refs; 213 } 214 215 /* 216 * Acquire the specified buffer. isnew is -1 only when called 217 * via get_buffer_readahead() to prevent another readahead. 218 */ 219 struct buffer_info * 220 get_buffer(hammer_off_t buf_offset, int isnew) 221 { 222 void *ondisk; 223 struct buffer_info *buf; 224 struct volume_info *volume; 225 hammer_off_t orig_offset = buf_offset; 226 int vol_no; 227 int zone; 228 int hi, n; 229 int dora = 0; 230 231 zone = HAMMER_ZONE_DECODE(buf_offset); 232 if (zone > HAMMER_ZONE_RAW_BUFFER_INDEX) { 233 buf_offset = blockmap_lookup(buf_offset, NULL, NULL, NULL); 234 } 235 if (buf_offset == HAMMER_OFF_BAD) 236 return(NULL); 237 238 if (AssertOnFailure) { 239 assert((buf_offset & HAMMER_OFF_ZONE_MASK) == 240 HAMMER_ZONE_RAW_BUFFER); 241 } 242 vol_no = HAMMER_VOL_DECODE(buf_offset); 243 volume = get_volume(vol_no); 244 if (volume == NULL) 245 return(NULL); 246 247 buf_offset &= ~HAMMER_BUFMASK64; 248 buf = find_buffer(volume, buf_offset); 249 250 if (buf == NULL) { 251 buf = malloc(sizeof(*buf)); 252 bzero(buf, sizeof(*buf)); 253 if (DebugOpt > 1) { 254 fprintf(stderr, "get_buffer: %016llx %016llx at %p\n", 255 (long long)orig_offset, (long long)buf_offset, 256 buf); 257 } 258 buf->buf_offset = buf_offset; 259 buf->raw_offset = volume->ondisk->vol_buf_beg + 260 (buf_offset & HAMMER_OFF_SHORT_MASK); 261 buf->volume = volume; 262 hi = buffer_hash(buf_offset); 263 TAILQ_INSERT_TAIL(&volume->buffer_lists[hi], buf, entry); 264 ++volume->cache.refs; 265 buf->cache.u.buffer = buf; 266 hammer_cache_add(&buf->cache, ISBUFFER); 267 dora = (isnew == 0); 268 } else { 269 if (DebugOpt > 1) { 270 fprintf(stderr, "get_buffer: %016llx %016llx at %p *\n", 271 (long long)orig_offset, (long long)buf_offset, 272 buf); 273 } 274 hammer_cache_used(&buf->cache); 275 ++buf->use_count; 276 } 277 ++buf->cache.refs; 278 hammer_cache_flush(); 279 if ((ondisk = buf->ondisk) == NULL) { 280 buf->ondisk = ondisk = malloc(HAMMER_BUFSIZE); 281 if (isnew <= 0) { 282 n = readhammerbuf(volume, ondisk, buf->raw_offset); 283 if (n == -1) { 284 if (AssertOnFailure) 285 err(1, "get_buffer: %s:%016llx " 286 "Read failed at offset %016llx", 287 volume->name, 288 (long long)buf->buf_offset, 289 (long long)buf->raw_offset); 290 bzero(ondisk, HAMMER_BUFSIZE); 291 } 292 } 293 } 294 if (isnew > 0) { 295 bzero(ondisk, HAMMER_BUFSIZE); 296 buf->cache.modified = 1; 297 } 298 if (dora) 299 get_buffer_readahead(buf); 300 return(buf); 301 } 302 303 static void 304 get_buffer_readahead(struct buffer_info *base) 305 { 306 struct buffer_info *buf; 307 struct volume_info *vol; 308 hammer_off_t buf_offset; 309 int64_t raw_offset; 310 int ri = UseReadBehind; 311 int re = UseReadAhead; 312 313 raw_offset = base->raw_offset + ri * HAMMER_BUFSIZE; 314 vol = base->volume; 315 316 while (ri < re) { 317 if (raw_offset >= vol->ondisk->vol_buf_end) 318 break; 319 if (raw_offset < vol->ondisk->vol_buf_beg || ri == 0) { 320 ++ri; 321 raw_offset += HAMMER_BUFSIZE; 322 continue; 323 } 324 buf_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 325 raw_offset - vol->ondisk->vol_buf_beg); 326 buf = find_buffer(vol, buf_offset); 327 if (buf == NULL) { 328 buf = get_buffer(buf_offset, -1); 329 rel_buffer(buf); 330 } 331 ++ri; 332 raw_offset += HAMMER_BUFSIZE; 333 } 334 } 335 336 void 337 rel_buffer(struct buffer_info *buffer) 338 { 339 struct volume_info *volume; 340 int hi; 341 342 if (buffer == NULL) 343 return; 344 assert(buffer->cache.refs > 0); 345 if (--buffer->cache.refs == 0) { 346 if (buffer->cache.delete) { 347 hi = buffer_hash(buffer->buf_offset); 348 volume = buffer->volume; 349 if (buffer->cache.modified) 350 flush_buffer(buffer); 351 TAILQ_REMOVE(&volume->buffer_lists[hi], buffer, entry); 352 hammer_cache_del(&buffer->cache); 353 free(buffer->ondisk); 354 free(buffer); 355 rel_volume(volume); 356 } 357 } 358 } 359 360 /* 361 * Retrieve a pointer to a buffer data given a buffer offset. The underlying 362 * bufferp is freed if isnew or the offset is out of range of the cached data. 363 * If bufferp is freed a referenced buffer is loaded into it. 364 */ 365 void * 366 get_buffer_data(hammer_off_t buf_offset, struct buffer_info **bufferp, 367 int isnew) 368 { 369 if (*bufferp != NULL) { 370 if (isnew > 0 || 371 (((*bufferp)->buf_offset ^ buf_offset) & ~HAMMER_BUFMASK64)) { 372 rel_buffer(*bufferp); 373 *bufferp = NULL; 374 } 375 } 376 return(get_ondisk(buf_offset, bufferp, isnew)); 377 } 378 379 /* 380 * Retrieve a pointer to a B-Tree node given a cluster offset. The underlying 381 * bufferp is freed if non-NULL and a referenced buffer is loaded into it. 382 */ 383 hammer_node_ondisk_t 384 get_node(hammer_off_t node_offset, struct buffer_info **bufferp) 385 { 386 if (*bufferp != NULL) { 387 rel_buffer(*bufferp); 388 *bufferp = NULL; 389 } 390 return(get_ondisk(node_offset, bufferp, 0)); 391 } 392 393 /* 394 * Return a pointer to a buffer data given a buffer offset. 395 * If *bufferp is NULL acquire the buffer otherwise use that buffer. 396 */ 397 static __inline 398 void * 399 get_ondisk(hammer_off_t buf_offset, struct buffer_info **bufferp, 400 int isnew) 401 { 402 struct buffer_info *buffer; 403 404 buffer = *bufferp; 405 if (buffer == NULL) { 406 buffer = *bufferp = get_buffer(buf_offset, isnew); 407 if (buffer == NULL) 408 return(NULL); 409 } 410 411 return((char *)buffer->ondisk + 412 ((int32_t)buf_offset & HAMMER_BUFMASK)); 413 } 414 415 /* 416 * Allocate HAMMER elements - btree nodes, meta data, data storage 417 */ 418 void * 419 alloc_btree_element(hammer_off_t *offp, 420 struct buffer_info **data_bufferp) 421 { 422 hammer_node_ondisk_t node; 423 424 node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node), 425 offp, data_bufferp); 426 bzero(node, sizeof(*node)); 427 return (node); 428 } 429 430 void * 431 alloc_meta_element(hammer_off_t *offp, int32_t data_len, 432 struct buffer_info **data_bufferp) 433 { 434 void *data; 435 436 data = alloc_blockmap(HAMMER_ZONE_META_INDEX, data_len, 437 offp, data_bufferp); 438 bzero(data, data_len); 439 return (data); 440 } 441 442 /* 443 * The only data_len supported by HAMMER userspace for large data zone 444 * (zone 10) is HAMMER_BUFSIZE which is 16KB. >16KB data does not fit 445 * in a buffer allocated by get_buffer(). Also alloc_blockmap() does 446 * not consider >16KB buffer size. 447 */ 448 void * 449 alloc_data_element(hammer_off_t *offp, int32_t data_len, 450 struct buffer_info **data_bufferp) 451 { 452 void *data; 453 454 if (data_len >= HAMMER_BUFSIZE) { 455 assert(data_len == HAMMER_BUFSIZE); /* just one buffer */ 456 data = alloc_blockmap(HAMMER_ZONE_LARGE_DATA_INDEX, data_len, 457 offp, data_bufferp); 458 bzero(data, data_len); 459 } else if (data_len) { 460 data = alloc_blockmap(HAMMER_ZONE_SMALL_DATA_INDEX, data_len, 461 offp, data_bufferp); 462 bzero(data, data_len); 463 } else { 464 data = NULL; 465 } 466 return (data); 467 } 468 469 /* 470 * Format a new freemap. Set all layer1 entries to UNAVAIL. The initialize 471 * code will load each volume's freemap. 472 */ 473 void 474 format_freemap(struct volume_info *root_vol) 475 { 476 struct buffer_info *buffer = NULL; 477 hammer_off_t layer1_offset; 478 hammer_blockmap_t blockmap; 479 struct hammer_blockmap_layer1 *layer1; 480 int i, isnew; 481 482 /* Only root volume needs formatting */ 483 assert(root_vol->vol_no == RootVolNo); 484 485 layer1_offset = alloc_bigblock(root_vol, HAMMER_ZONE_FREEMAP_INDEX); 486 for (i = 0; i < (int)HAMMER_BLOCKMAP_RADIX1; ++i) { 487 isnew = ((i % HAMMER_BLOCKMAP_RADIX1_PERBUFFER) == 0); 488 layer1 = get_buffer_data(layer1_offset + i * sizeof(*layer1), 489 &buffer, isnew); 490 bzero(layer1, sizeof(*layer1)); 491 layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL; 492 layer1->blocks_free = 0; 493 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE); 494 } 495 rel_buffer(buffer); 496 497 blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 498 bzero(blockmap, sizeof(*blockmap)); 499 blockmap->phys_offset = layer1_offset; 500 blockmap->first_offset = 0; 501 blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0); 502 blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1); 503 blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE); 504 root_vol->cache.modified = 1; 505 } 506 507 /* 508 * Load the volume's remaining free space into the freemap. 509 * 510 * Returns the number of big-blocks available. 511 */ 512 int64_t 513 initialize_freemap(struct volume_info *vol) 514 { 515 struct volume_info *root_vol; 516 struct buffer_info *buffer1 = NULL; 517 struct buffer_info *buffer2 = NULL; 518 struct hammer_blockmap_layer1 *layer1; 519 struct hammer_blockmap_layer2 *layer2; 520 hammer_off_t layer1_base; 521 hammer_off_t layer1_offset; 522 hammer_off_t layer2_offset; 523 hammer_off_t phys_offset; 524 hammer_off_t aligned_vol_free_end; 525 hammer_blockmap_t freemap; 526 int64_t count = 0; 527 528 root_vol = get_volume(RootVolNo); 529 aligned_vol_free_end = (vol->vol_free_end + HAMMER_BLOCKMAP_LAYER2_MASK) 530 & ~HAMMER_BLOCKMAP_LAYER2_MASK; 531 532 printf("initialize freemap volume %d\n", vol->vol_no); 533 534 /* 535 * Initialize the freemap. First preallocate the big-blocks required 536 * to implement layer2. This preallocation is a bootstrap allocation 537 * using blocks from the target volume. 538 */ 539 freemap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 540 layer1_base = freemap->phys_offset; 541 542 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0); 543 phys_offset < aligned_vol_free_end; 544 phys_offset += HAMMER_BLOCKMAP_LAYER2) { 545 layer1_offset = layer1_base + 546 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset); 547 layer1 = get_buffer_data(layer1_offset, &buffer1, 0); 548 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) { 549 layer1->phys_offset = alloc_bigblock(vol, 550 HAMMER_ZONE_FREEMAP_INDEX); 551 layer1->blocks_free = 0; 552 buffer1->cache.modified = 1; 553 layer1->layer1_crc = crc32(layer1, 554 HAMMER_LAYER1_CRCSIZE); 555 } 556 } 557 558 /* 559 * Now fill everything in. 560 */ 561 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0); 562 phys_offset < aligned_vol_free_end; 563 phys_offset += HAMMER_BIGBLOCK_SIZE) { 564 layer1_offset = layer1_base + 565 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset); 566 layer1 = get_buffer_data(layer1_offset, &buffer1, 0); 567 568 assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL); 569 layer2_offset = layer1->phys_offset + 570 HAMMER_BLOCKMAP_LAYER2_OFFSET(phys_offset); 571 572 layer2 = get_buffer_data(layer2_offset, &buffer2, 0); 573 bzero(layer2, sizeof(*layer2)); 574 if (phys_offset < vol->vol_free_off) { 575 /* 576 * Fixups XXX - big-blocks already allocated as part 577 * of the freemap bootstrap. 578 */ 579 if (layer2->zone == 0) { 580 layer2->zone = HAMMER_ZONE_FREEMAP_INDEX; 581 layer2->append_off = HAMMER_BIGBLOCK_SIZE; 582 layer2->bytes_free = 0; 583 } 584 } else if (phys_offset < vol->vol_free_end) { 585 ++layer1->blocks_free; 586 layer1->layer1_crc = crc32(layer1, 587 HAMMER_LAYER1_CRCSIZE); 588 buffer1->cache.modified = 1; 589 layer2->zone = 0; 590 layer2->append_off = 0; 591 layer2->bytes_free = HAMMER_BIGBLOCK_SIZE; 592 ++count; 593 } else { 594 layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX; 595 layer2->append_off = HAMMER_BIGBLOCK_SIZE; 596 layer2->bytes_free = 0; 597 } 598 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE); 599 buffer2->cache.modified = 1; 600 } 601 rel_buffer(buffer1); 602 rel_buffer(buffer2); 603 rel_volume(root_vol); 604 return(count); 605 } 606 607 /* 608 * Returns the number of big-blocks available for filesystem data and undos 609 * without formatting. 610 */ 611 int64_t 612 count_freemap(struct volume_info *vol) 613 { 614 hammer_off_t phys_offset; 615 hammer_off_t vol_free_off; 616 hammer_off_t aligned_vol_free_end; 617 int64_t count = 0; 618 619 vol_free_off = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0); 620 aligned_vol_free_end = (vol->vol_free_end + HAMMER_BLOCKMAP_LAYER2_MASK) 621 & ~HAMMER_BLOCKMAP_LAYER2_MASK; 622 623 if (vol->vol_no == RootVolNo) 624 vol_free_off += HAMMER_BIGBLOCK_SIZE; 625 626 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0); 627 phys_offset < aligned_vol_free_end; 628 phys_offset += HAMMER_BLOCKMAP_LAYER2) { 629 vol_free_off += HAMMER_BIGBLOCK_SIZE; 630 } 631 632 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0); 633 phys_offset < aligned_vol_free_end; 634 phys_offset += HAMMER_BIGBLOCK_SIZE) { 635 if (phys_offset < vol_free_off) { 636 ; 637 } else if (phys_offset < vol->vol_free_end) { 638 ++count; 639 } 640 } 641 642 return(count); 643 } 644 645 /* 646 * Allocate big-blocks using our poor-man's volume->vol_free_off. 647 * 648 * If the zone is HAMMER_ZONE_FREEMAP_INDEX we are bootstrapping the freemap 649 * itself and cannot update it yet. 650 */ 651 hammer_off_t 652 alloc_bigblock(struct volume_info *volume, int zone) 653 { 654 struct buffer_info *buffer1 = NULL; 655 struct buffer_info *buffer2 = NULL; 656 struct volume_info *root_vol; 657 hammer_off_t result_offset; 658 hammer_off_t layer_offset; 659 hammer_blockmap_t freemap; 660 struct hammer_blockmap_layer1 *layer1; 661 struct hammer_blockmap_layer2 *layer2; 662 663 if (volume == NULL) 664 volume = get_volume(RootVolNo); 665 666 result_offset = volume->vol_free_off; 667 if (result_offset >= volume->vol_free_end) 668 errx(1, "alloc_bigblock: Ran out of room, filesystem too small"); 669 volume->vol_free_off += HAMMER_BIGBLOCK_SIZE; 670 671 /* 672 * Update the freemap. 673 */ 674 if (zone != HAMMER_ZONE_FREEMAP_INDEX) { 675 root_vol = get_volume(RootVolNo); 676 freemap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 677 layer_offset = freemap->phys_offset + 678 HAMMER_BLOCKMAP_LAYER1_OFFSET(result_offset); 679 layer1 = get_buffer_data(layer_offset, &buffer1, 0); 680 assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL); 681 --layer1->blocks_free; 682 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE); 683 buffer1->cache.modified = 1; 684 layer_offset = layer1->phys_offset + 685 HAMMER_BLOCKMAP_LAYER2_OFFSET(result_offset); 686 layer2 = get_buffer_data(layer_offset, &buffer2, 0); 687 assert(layer2->zone == 0); 688 layer2->zone = zone; 689 layer2->append_off = HAMMER_BIGBLOCK_SIZE; 690 layer2->bytes_free = 0; 691 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE); 692 buffer2->cache.modified = 1; 693 694 --root_vol->ondisk->vol0_stat_freebigblocks; 695 root_vol->cache.modified = 1; 696 697 rel_buffer(buffer1); 698 rel_buffer(buffer2); 699 rel_volume(root_vol); 700 } 701 702 rel_volume(volume); 703 return(result_offset); 704 } 705 706 /* 707 * Format the undomap for the root volume. 708 */ 709 void 710 format_undomap(struct volume_info *root_vol) 711 { 712 const int undo_zone = HAMMER_ZONE_UNDO_INDEX; 713 hammer_off_t undo_limit; 714 hammer_blockmap_t blockmap; 715 struct hammer_volume_ondisk *ondisk; 716 struct buffer_info *buffer = NULL; 717 hammer_off_t scan; 718 int n; 719 int limit_index; 720 u_int32_t seqno; 721 722 /* Only root volume needs formatting */ 723 assert(root_vol->vol_no == RootVolNo); 724 ondisk = root_vol->ondisk; 725 726 /* 727 * Size the undo buffer in multiples of HAMMER_BIGBLOCK_SIZE, 728 * up to HAMMER_UNDO_LAYER2 big-blocks. Size to approximately 729 * 0.1% of the disk. 730 * 731 * The minimum UNDO fifo size is 500MB, or approximately 1% of 732 * the recommended 50G disk. 733 * 734 * Changing this minimum is rather dangerous as complex filesystem 735 * operations can cause the UNDO FIFO to fill up otherwise. 736 */ 737 undo_limit = UndoBufferSize; 738 if (undo_limit == 0) { 739 undo_limit = (ondisk->vol_buf_end - ondisk->vol_buf_beg) / 1000; 740 if (undo_limit < 500*1024*1024) 741 undo_limit = 500*1024*1024; 742 } 743 undo_limit = (undo_limit + HAMMER_BIGBLOCK_MASK64) & 744 ~HAMMER_BIGBLOCK_MASK64; 745 if (undo_limit < HAMMER_BIGBLOCK_SIZE) 746 undo_limit = HAMMER_BIGBLOCK_SIZE; 747 if (undo_limit > HAMMER_BIGBLOCK_SIZE * HAMMER_UNDO_LAYER2) 748 undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_UNDO_LAYER2; 749 UndoBufferSize = undo_limit; 750 751 blockmap = &ondisk->vol0_blockmap[undo_zone]; 752 bzero(blockmap, sizeof(*blockmap)); 753 blockmap->phys_offset = HAMMER_BLOCKMAP_UNAVAIL; 754 blockmap->first_offset = HAMMER_ZONE_ENCODE(undo_zone, 0); 755 blockmap->next_offset = blockmap->first_offset; 756 blockmap->alloc_offset = HAMMER_ZONE_ENCODE(undo_zone, undo_limit); 757 blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE); 758 759 limit_index = undo_limit / HAMMER_BIGBLOCK_SIZE; 760 assert(limit_index <= HAMMER_UNDO_LAYER2); 761 762 for (n = 0; n < limit_index; ++n) { 763 ondisk->vol0_undo_array[n] = alloc_bigblock(NULL, 764 HAMMER_ZONE_UNDO_INDEX); 765 } 766 while (n < HAMMER_UNDO_LAYER2) { 767 ondisk->vol0_undo_array[n++] = HAMMER_BLOCKMAP_UNAVAIL; 768 } 769 770 /* 771 * Pre-initialize the UNDO blocks (HAMMER version 4+) 772 */ 773 printf("initializing the undo map (%jd MB)\n", 774 (intmax_t)(blockmap->alloc_offset & HAMMER_OFF_LONG_MASK) / 775 (1024 * 1024)); 776 777 scan = blockmap->first_offset; 778 seqno = 0; 779 780 while (scan < blockmap->alloc_offset) { 781 hammer_fifo_head_t head; 782 hammer_fifo_tail_t tail; 783 int isnew; 784 int bytes = HAMMER_UNDO_ALIGN; 785 786 isnew = ((scan & HAMMER_BUFMASK64) == 0); 787 head = get_buffer_data(scan, &buffer, isnew); 788 buffer->cache.modified = 1; 789 tail = (void *)((char *)head + bytes - sizeof(*tail)); 790 791 bzero(head, bytes); 792 head->hdr_signature = HAMMER_HEAD_SIGNATURE; 793 head->hdr_type = HAMMER_HEAD_TYPE_DUMMY; 794 head->hdr_size = bytes; 795 head->hdr_seq = seqno++; 796 797 tail->tail_signature = HAMMER_TAIL_SIGNATURE; 798 tail->tail_type = HAMMER_HEAD_TYPE_DUMMY; 799 tail->tail_size = bytes; 800 801 head->hdr_crc = crc32(head, HAMMER_FIFO_HEAD_CRCOFF) ^ 802 crc32(head + 1, bytes - sizeof(*head)); 803 804 scan += bytes; 805 } 806 rel_buffer(buffer); 807 } 808 809 /* 810 * Format a new blockmap. This is mostly a degenerate case because 811 * all allocations are now actually done from the freemap. 812 */ 813 void 814 format_blockmap(hammer_blockmap_t blockmap, int zone, hammer_off_t offset) 815 { 816 hammer_off_t zone_base = HAMMER_ZONE_ENCODE(zone, offset); 817 818 bzero(blockmap, sizeof(*blockmap)); 819 blockmap->phys_offset = 0; 820 blockmap->first_offset = zone_base; 821 blockmap->next_offset = zone_base; 822 blockmap->alloc_offset = HAMMER_ENCODE(zone, 255, -1); 823 blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE); 824 } 825 826 /* 827 * Allocate a chunk of data out of a blockmap. This is a simplified 828 * version which uses next_offset as a simple allocation iterator. 829 */ 830 static 831 void * 832 alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp, 833 struct buffer_info **bufferp) 834 { 835 struct buffer_info *buffer1 = NULL; 836 struct buffer_info *buffer2 = NULL; 837 struct volume_info *volume; 838 hammer_blockmap_t blockmap; 839 hammer_blockmap_t freemap; 840 struct hammer_blockmap_layer1 *layer1; 841 struct hammer_blockmap_layer2 *layer2; 842 hammer_off_t layer1_offset; 843 hammer_off_t layer2_offset; 844 hammer_off_t chunk_offset; 845 void *ptr; 846 847 volume = get_volume(RootVolNo); 848 849 blockmap = &volume->ondisk->vol0_blockmap[zone]; 850 freemap = &volume->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 851 assert(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone); 852 853 /* 854 * Alignment and buffer-boundary issues. If the allocation would 855 * cross a buffer boundary we have to skip to the next buffer. 856 */ 857 bytes = (bytes + 15) & ~15; 858 assert(bytes > 0 && bytes <= HAMMER_BUFSIZE); /* not HAMMER_XBUFSIZE */ 859 assert(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES); 860 861 again: 862 assert(blockmap->next_offset != HAMMER_ZONE_ENCODE(zone + 1, 0)); 863 864 if ((blockmap->next_offset ^ (blockmap->next_offset + bytes - 1)) & 865 ~HAMMER_BUFMASK64) { 866 volume->cache.modified = 1; 867 blockmap->next_offset = (blockmap->next_offset + bytes - 1) & 868 ~HAMMER_BUFMASK64; 869 } 870 chunk_offset = blockmap->next_offset & HAMMER_BIGBLOCK_MASK; 871 872 /* 873 * Dive layer 1. 874 */ 875 layer1_offset = freemap->phys_offset + 876 HAMMER_BLOCKMAP_LAYER1_OFFSET(blockmap->next_offset); 877 878 layer1 = get_buffer_data(layer1_offset, &buffer1, 0); 879 assert(!(chunk_offset == 0 && layer1->blocks_free == 0)); 880 881 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) { 882 fprintf(stderr, "alloc_blockmap: ran out of space!\n"); 883 exit(1); 884 } 885 886 /* 887 * Dive layer 2, each entry represents a big-block. 888 */ 889 layer2_offset = layer1->phys_offset + 890 HAMMER_BLOCKMAP_LAYER2_OFFSET(blockmap->next_offset); 891 892 layer2 = get_buffer_data(layer2_offset, &buffer2, 0); 893 894 if (layer2->zone == HAMMER_ZONE_UNAVAIL_INDEX) { 895 fprintf(stderr, "alloc_blockmap: ran out of space!\n"); 896 exit(1); 897 } 898 899 /* 900 * If we are entering a new big-block assign ownership to our 901 * zone. If the big-block is owned by another zone skip it. 902 */ 903 if (layer2->zone == 0) { 904 --layer1->blocks_free; 905 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE); 906 layer2->zone = zone; 907 --volume->ondisk->vol0_stat_freebigblocks; 908 assert(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE); 909 assert(layer2->append_off == 0); 910 } 911 if (layer2->zone != zone) { 912 volume->cache.modified = 1; 913 blockmap->next_offset = (blockmap->next_offset + HAMMER_BIGBLOCK_SIZE) & 914 ~HAMMER_BIGBLOCK_MASK64; 915 goto again; 916 } 917 918 buffer1->cache.modified = 1; 919 buffer2->cache.modified = 1; 920 volume->cache.modified = 1; 921 assert(layer2->append_off == chunk_offset); 922 layer2->bytes_free -= bytes; 923 *result_offp = blockmap->next_offset; 924 blockmap->next_offset += bytes; 925 layer2->append_off = (int)blockmap->next_offset & 926 HAMMER_BIGBLOCK_MASK; 927 928 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE); 929 930 ptr = get_buffer_data(*result_offp, bufferp, 0); 931 (*bufferp)->cache.modified = 1; 932 933 rel_buffer(buffer1); 934 rel_buffer(buffer2); 935 rel_volume(volume); 936 return(ptr); 937 } 938 939 /* 940 * Flush various tracking structures to disk 941 */ 942 void 943 flush_all_volumes(void) 944 { 945 struct volume_info *vol; 946 947 TAILQ_FOREACH(vol, &VolList, entry) 948 flush_volume(vol); 949 } 950 951 void 952 flush_volume(struct volume_info *volume) 953 { 954 struct buffer_info *buffer; 955 int i; 956 957 for (i = 0; i < HAMMER_BUFLISTS; ++i) { 958 TAILQ_FOREACH(buffer, &volume->buffer_lists[i], entry) 959 flush_buffer(buffer); 960 } 961 writehammerbuf(volume, volume->ondisk, 0); 962 volume->cache.modified = 0; 963 } 964 965 void 966 flush_buffer(struct buffer_info *buffer) 967 { 968 writehammerbuf(buffer->volume, buffer->ondisk, buffer->raw_offset); 969 buffer->cache.modified = 0; 970 } 971 972 /* 973 * Core I/O operations 974 */ 975 static int 976 readhammerbuf(struct volume_info *vol, void *data, int64_t offset) 977 { 978 ssize_t n; 979 980 n = pread(vol->fd, data, HAMMER_BUFSIZE, offset); 981 if (n != HAMMER_BUFSIZE) 982 return(-1); 983 return(0); 984 } 985 986 static void 987 writehammerbuf(struct volume_info *vol, const void *data, int64_t offset) 988 { 989 ssize_t n; 990 991 n = pwrite(vol->fd, data, HAMMER_BUFSIZE, offset); 992 if (n != HAMMER_BUFSIZE) 993 err(1, "Write volume %d (%s)", vol->vol_no, vol->name); 994 } 995