1 /* 2 * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org> 3 * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Matthew Dillon <dillon@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/types.h> 37 #include <sys/time.h> 38 #include <sys/sysctl.h> 39 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <stddef.h> 43 #include <unistd.h> 44 #include <string.h> 45 #include <fcntl.h> 46 #include <assert.h> 47 #include <err.h> 48 #include <uuid.h> 49 50 #include <vfs/hammer2/hammer2_disk.h> 51 #include <vfs/hammer2/hammer2_xxhash.h> 52 53 #include "mkfs_hammer2.h" 54 #include "hammer2_subs.h" 55 56 static uint64_t nowtime(void); 57 static int blkrefary_cmp(const void *b1, const void *b2); 58 static void alloc_direct(hammer2_off_t *basep, hammer2_blockref_t *bref, 59 size_t bytes); 60 61 static int 62 get_hammer2_version(void) 63 { 64 int version = HAMMER2_VOL_VERSION_DEFAULT; 65 size_t olen = sizeof(version); 66 67 if (sysctlbyname("vfs.hammer2.supported_version", 68 &version, &olen, NULL, 0) == 0) { 69 if (version >= HAMMER2_VOL_VERSION_WIP) { 70 version = HAMMER2_VOL_VERSION_WIP - 1; 71 fprintf(stderr, 72 "newfs_hammer2: WARNING: HAMMER2 VFS " 73 "supports higher version than I " 74 "understand.\n" 75 "Using default version %d\n", 76 version); 77 } 78 } else { 79 fprintf(stderr, 80 "newfs_hammer2: WARNING: HAMMER2 VFS not " 81 "loaded, cannot get version info.\n" 82 "Using default version %d\n", 83 version); 84 } 85 return(version); 86 } 87 88 void 89 hammer2_mkfs_init(hammer2_mkfs_options_t *opt) 90 { 91 uint32_t status; 92 93 memset(opt, 0, sizeof(*opt)); 94 95 opt->Hammer2Version = get_hammer2_version(); 96 opt->Label[opt->NLabels++] = strdup("LOCAL"); 97 opt->CompType = HAMMER2_COMP_NEWFS_DEFAULT; /* LZ4 */ 98 opt->CheckType = HAMMER2_CHECK_XXHASH64; 99 opt->DefaultLabelType = HAMMER2_LABEL_NONE; 100 101 /* 102 * Generate a filesystem id and lookup the filesystem type 103 */ 104 srandomdev(); 105 uuidgen(&opt->Hammer2_VolFSID, 1); 106 uuidgen(&opt->Hammer2_SupCLID, 1); 107 uuidgen(&opt->Hammer2_SupFSID, 1); 108 uuid_from_string(HAMMER2_UUID_STRING, &opt->Hammer2_FSType, &status); 109 /*uuid_name_lookup(&Hammer2_FSType, "DragonFly HAMMER2", &status);*/ 110 if (status != uuid_s_ok) { 111 errx(1, "uuids file does not have the DragonFly " 112 "HAMMER2 filesystem type"); 113 } 114 } 115 116 void 117 hammer2_mkfs_cleanup(hammer2_mkfs_options_t *opt) 118 { 119 int i; 120 121 for (i = 0; i < opt->NLabels; i++) 122 free(opt->Label[i]); 123 } 124 125 static void 126 adjust_options(hammer2_ondisk_t *fso, hammer2_mkfs_options_t *opt) 127 { 128 /* 129 * Adjust Label[] and NLabels. 130 */ 131 switch (opt->DefaultLabelType) { 132 case HAMMER2_LABEL_BOOT: 133 opt->Label[opt->NLabels++] = strdup("BOOT"); 134 break; 135 case HAMMER2_LABEL_ROOT: 136 opt->Label[opt->NLabels++] = strdup("ROOT"); 137 break; 138 case HAMMER2_LABEL_DATA: 139 opt->Label[opt->NLabels++] = strdup("DATA"); 140 break; 141 case HAMMER2_LABEL_NONE: 142 /* nothing to do */ 143 break; 144 default: 145 assert(0); 146 break; 147 } 148 149 /* 150 * Calculate defaults for the boot area size and round to the 151 * volume alignment boundary. 152 * 153 * NOTE: These areas are currently not used for booting but are 154 * reserved for future filesystem expansion. 155 */ 156 hammer2_off_t BootAreaSize = opt->BootAreaSize; 157 if (BootAreaSize == 0) { 158 BootAreaSize = HAMMER2_BOOT_NOM_BYTES; 159 while (BootAreaSize > fso->total_size / 20) 160 BootAreaSize >>= 1; 161 if (BootAreaSize < HAMMER2_BOOT_MIN_BYTES) 162 BootAreaSize = HAMMER2_BOOT_MIN_BYTES; 163 } else if (BootAreaSize < HAMMER2_BOOT_MIN_BYTES) { 164 BootAreaSize = HAMMER2_BOOT_MIN_BYTES; 165 } 166 BootAreaSize = (BootAreaSize + HAMMER2_VOLUME_ALIGNMASK64) & 167 ~HAMMER2_VOLUME_ALIGNMASK64; 168 opt->BootAreaSize = BootAreaSize; 169 170 /* 171 * Calculate defaults for the aux area size and round to the 172 * volume alignment boundary. 173 * 174 * NOTE: These areas are currently not used for logging but are 175 * reserved for future filesystem expansion. 176 */ 177 hammer2_off_t AuxAreaSize = opt->AuxAreaSize; 178 if (AuxAreaSize == 0) { 179 AuxAreaSize = HAMMER2_AUX_NOM_BYTES; 180 while (AuxAreaSize > fso->total_size / 20) 181 AuxAreaSize >>= 1; 182 if (AuxAreaSize < HAMMER2_AUX_MIN_BYTES) 183 AuxAreaSize = HAMMER2_AUX_MIN_BYTES; 184 } else if (AuxAreaSize < HAMMER2_AUX_MIN_BYTES) { 185 AuxAreaSize = HAMMER2_AUX_MIN_BYTES; 186 } 187 AuxAreaSize = (AuxAreaSize + HAMMER2_VOLUME_ALIGNMASK64) & 188 ~HAMMER2_VOLUME_ALIGNMASK64; 189 opt->AuxAreaSize = AuxAreaSize; 190 } 191 192 /* 193 * Convert a string to a 64 bit signed integer with various requirements. 194 */ 195 int64_t 196 getsize(const char *str, int64_t minval, int64_t maxval, int powerof2) 197 { 198 int64_t val; 199 char *ptr; 200 201 val = strtoll(str, &ptr, 0); 202 switch(*ptr) { 203 case 't': 204 case 'T': 205 val *= 1024; 206 /* fall through */ 207 case 'g': 208 case 'G': 209 val *= 1024; 210 /* fall through */ 211 case 'm': 212 case 'M': 213 val *= 1024; 214 /* fall through */ 215 case 'k': 216 case 'K': 217 val *= 1024; 218 break; 219 default: 220 errx(1, "Unknown suffix in number '%s'", str); 221 /* not reached */ 222 } 223 if (ptr[1]) { 224 errx(1, "Unknown suffix in number '%s'", str); 225 /* not reached */ 226 } 227 if (val < minval) { 228 errx(1, "Value too small: %s, min is %s", 229 str, sizetostr(minval)); 230 /* not reached */ 231 } 232 if (val > maxval) { 233 errx(1, "Value too large: %s, max is %s", 234 str, sizetostr(maxval)); 235 /* not reached */ 236 } 237 if ((powerof2 & 1) && (val ^ (val - 1)) != ((val << 1) - 1)) { 238 errx(1, "Value not power of 2: %s", str); 239 /* not reached */ 240 } 241 if ((powerof2 & 2) && (val & HAMMER2_NEWFS_ALIGNMASK)) { 242 errx(1, "Value not an integral multiple of %dK: %s", 243 HAMMER2_NEWFS_ALIGN / 1024, str); 244 /* not reached */ 245 } 246 return(val); 247 } 248 249 static uint64_t 250 nowtime(void) 251 { 252 struct timeval tv; 253 uint64_t xtime; 254 255 gettimeofday(&tv, NULL); 256 xtime = tv.tv_sec * 1000000LL + tv.tv_usec; 257 return(xtime); 258 } 259 260 static hammer2_off_t 261 format_hammer2_misc(hammer2_volume_t *vol, hammer2_mkfs_options_t *opt, 262 hammer2_off_t boot_base, hammer2_off_t aux_base) 263 { 264 char *buf = malloc(HAMMER2_PBUFSIZE); 265 hammer2_off_t alloc_base = aux_base + opt->AuxAreaSize; 266 hammer2_off_t tmp_base; 267 size_t n; 268 int i; 269 270 /* 271 * Clear the entire 4MB reserve for the first 2G zone. 272 */ 273 bzero(buf, HAMMER2_PBUFSIZE); 274 tmp_base = 0; 275 for (i = 0; i < HAMMER2_ZONE_BLOCKS_SEG; ++i) { 276 n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE, tmp_base); 277 if (n != HAMMER2_PBUFSIZE) { 278 perror("write"); 279 exit(1); 280 } 281 tmp_base += HAMMER2_PBUFSIZE; 282 } 283 284 /* 285 * Make sure alloc_base won't cross the reserved area at the 286 * beginning of each 1GB. 287 * 288 * Reserve space for the super-root inode and the root inode. 289 * Make sure they are in the same 64K block to simplify our code. 290 */ 291 assert((alloc_base & HAMMER2_PBUFMASK) == 0); 292 assert(alloc_base < HAMMER2_FREEMAP_LEVEL1_SIZE); 293 294 /* 295 * Clear the boot/aux area. 296 */ 297 for (tmp_base = boot_base; tmp_base < alloc_base; 298 tmp_base += HAMMER2_PBUFSIZE) { 299 n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE, tmp_base); 300 if (n != HAMMER2_PBUFSIZE) { 301 perror("write (boot/aux)"); 302 exit(1); 303 } 304 } 305 306 free(buf); 307 return(alloc_base); 308 } 309 310 static hammer2_off_t 311 format_hammer2_inode(hammer2_volume_t *vol, hammer2_mkfs_options_t *opt, 312 hammer2_blockref_t *sroot_blockrefp, 313 hammer2_off_t alloc_base) 314 { 315 char *buf = malloc(HAMMER2_PBUFSIZE); 316 hammer2_inode_data_t *rawip; 317 hammer2_blockref_t sroot_blockref; 318 hammer2_blockref_t root_blockref[MAXLABELS]; 319 uint64_t now; 320 size_t n; 321 int i; 322 323 bzero(buf, HAMMER2_PBUFSIZE); 324 bzero(&sroot_blockref, sizeof(sroot_blockref)); 325 bzero(root_blockref, sizeof(root_blockref)); 326 now = nowtime(); 327 alloc_base &= ~HAMMER2_PBUFMASK64; 328 alloc_direct(&alloc_base, &sroot_blockref, HAMMER2_INODE_BYTES); 329 330 for (i = 0; i < opt->NLabels; ++i) { 331 uuidgen(&opt->Hammer2_PfsCLID[i], 1); 332 uuidgen(&opt->Hammer2_PfsFSID[i], 1); 333 334 alloc_direct(&alloc_base, &root_blockref[i], 335 HAMMER2_INODE_BYTES); 336 assert(((sroot_blockref.data_off ^ root_blockref[i].data_off) & 337 ~HAMMER2_PBUFMASK64) == 0); 338 339 /* 340 * Format the root directory inode, which is left empty. 341 */ 342 rawip = (void *)(buf + (HAMMER2_OFF_MASK_LO & 343 root_blockref[i].data_off)); 344 rawip->meta.version = HAMMER2_INODE_VERSION_ONE; 345 rawip->meta.ctime = now; 346 rawip->meta.mtime = now; 347 /* rawip->atime = now; NOT IMPL MUST BE ZERO */ 348 rawip->meta.btime = now; 349 rawip->meta.type = HAMMER2_OBJTYPE_DIRECTORY; 350 rawip->meta.mode = 0755; 351 rawip->meta.inum = 1; /* root inode, inumber 1 */ 352 rawip->meta.nlinks = 1; /* directory link count compat */ 353 354 rawip->meta.name_len = strlen(opt->Label[i]); 355 bcopy(opt->Label[i], rawip->filename, rawip->meta.name_len); 356 rawip->meta.name_key = 357 dirhash((char *)rawip->filename, rawip->meta.name_len); 358 359 /* 360 * Compression mode and supported copyids. 361 * 362 * Do not allow compression when creating any "BOOT" label 363 * (pfs-create also does the same if the pfs is named "BOOT") 364 */ 365 if (strcasecmp(opt->Label[i], "BOOT") == 0) { 366 rawip->meta.comp_algo = HAMMER2_ENC_ALGO( 367 HAMMER2_COMP_AUTOZERO); 368 rawip->meta.check_algo = HAMMER2_ENC_ALGO( 369 HAMMER2_CHECK_XXHASH64); 370 } else { 371 rawip->meta.comp_algo = HAMMER2_ENC_ALGO( 372 opt->CompType); 373 rawip->meta.check_algo = HAMMER2_ENC_ALGO( 374 HAMMER2_CHECK_XXHASH64); 375 } 376 377 /* 378 * NOTE: We leave nmasters set to 0, which means that we 379 * don't know how many masters there are. The quorum 380 * calculation will effectively be 1 ( 0 / 2 + 1 ). 381 */ 382 rawip->meta.pfs_clid = opt->Hammer2_PfsCLID[i]; 383 rawip->meta.pfs_fsid = opt->Hammer2_PfsFSID[i]; 384 rawip->meta.pfs_type = HAMMER2_PFSTYPE_MASTER; 385 rawip->meta.op_flags |= HAMMER2_OPFLAG_PFSROOT; 386 387 /* first allocatable inode number */ 388 rawip->meta.pfs_inum = 16; 389 390 /* rawip->u.blockset is left empty */ 391 392 /* 393 * The root blockref will be stored in the super-root inode as 394 * one of the ~4 PFS root directories. The copyid here is the 395 * actual copyid of the storage ref. 396 * 397 * The key field for a PFS root directory's blockref is 398 * essentially the name key for the entry. 399 */ 400 root_blockref[i].key = rawip->meta.name_key; 401 root_blockref[i].copyid = HAMMER2_COPYID_LOCAL; 402 root_blockref[i].keybits = 0; 403 root_blockref[i].check.xxhash64.value = 404 XXH64(rawip, sizeof(*rawip), XXH_HAMMER2_SEED); 405 root_blockref[i].type = HAMMER2_BREF_TYPE_INODE; 406 root_blockref[i].methods = 407 HAMMER2_ENC_CHECK(HAMMER2_CHECK_XXHASH64) | 408 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 409 root_blockref[i].mirror_tid = 16; 410 root_blockref[i].flags = HAMMER2_BREF_FLAG_PFSROOT; 411 } 412 413 /* 414 * Format the super-root directory inode, giving it ~4 PFS root 415 * directories (root_blockref). 416 * 417 * The superroot contains ~4 directories pointing at the PFS root 418 * inodes (named via the label). Inodes contain one blockset which 419 * is fully associative so we can put the entry anywhere without 420 * having to worry about the hash. Use index 0. 421 */ 422 rawip = (void *)(buf + (HAMMER2_OFF_MASK_LO & sroot_blockref.data_off)); 423 rawip->meta.version = HAMMER2_INODE_VERSION_ONE; 424 rawip->meta.ctime = now; 425 rawip->meta.mtime = now; 426 /* rawip->meta.atime = now; NOT IMPL MUST BE ZERO */ 427 rawip->meta.btime = now; 428 rawip->meta.type = HAMMER2_OBJTYPE_DIRECTORY; 429 rawip->meta.mode = 0700; /* super-root - root only */ 430 rawip->meta.inum = 0; /* super root inode, inumber 0 */ 431 rawip->meta.nlinks = 2; /* directory link count compat */ 432 433 rawip->meta.name_len = 0; /* super-root is unnamed */ 434 rawip->meta.name_key = 0; 435 436 rawip->meta.comp_algo = HAMMER2_ENC_ALGO(HAMMER2_COMP_AUTOZERO); 437 rawip->meta.check_algo = HAMMER2_ENC_ALGO(HAMMER2_CHECK_XXHASH64); 438 439 /* 440 * The super-root is flagged as a PFS and typically given its own 441 * random FSID, making it possible to mirror an entire HAMMER2 disk 442 * snapshots and all if desired. PFS ids are used to match up 443 * mirror sources and targets and cluster copy sources and targets. 444 * 445 * (XXX whole-disk logical mirroring is not really supported in 446 * the first attempt because each PFS is in its own modify/mirror 447 * transaction id domain, so normal mechanics cannot cross a PFS 448 * boundary). 449 */ 450 rawip->meta.pfs_clid = opt->Hammer2_SupCLID; 451 rawip->meta.pfs_fsid = opt->Hammer2_SupFSID; 452 rawip->meta.pfs_type = HAMMER2_PFSTYPE_SUPROOT; 453 snprintf((char*)rawip->filename, sizeof(rawip->filename), "SUPROOT"); 454 rawip->meta.name_key = 0; 455 rawip->meta.name_len = strlen((char*)rawip->filename); 456 457 /* The super-root has an inode number of 0 */ 458 rawip->meta.pfs_inum = 0; 459 460 /* 461 * Currently newfs_hammer2 just throws the PFS inodes into the 462 * top-level block table at the volume root and doesn't try to 463 * create an indirect block, so we are limited to ~4 at filesystem 464 * creation time. More can be added after mounting. 465 */ 466 qsort(root_blockref, opt->NLabels, sizeof(root_blockref[0]), blkrefary_cmp); 467 for (i = 0; i < opt->NLabels; ++i) 468 rawip->u.blockset.blockref[i] = root_blockref[i]; 469 470 /* 471 * The sroot blockref will be stored in the volume header. 472 */ 473 sroot_blockref.copyid = HAMMER2_COPYID_LOCAL; 474 sroot_blockref.keybits = 0; 475 sroot_blockref.check.xxhash64.value = 476 XXH64(rawip, sizeof(*rawip), XXH_HAMMER2_SEED); 477 sroot_blockref.type = HAMMER2_BREF_TYPE_INODE; 478 sroot_blockref.methods = HAMMER2_ENC_CHECK(HAMMER2_CHECK_XXHASH64) | 479 HAMMER2_ENC_COMP(HAMMER2_COMP_AUTOZERO); 480 sroot_blockref.mirror_tid = 16; 481 rawip = NULL; 482 483 /* 484 * Write out the 64K HAMMER2 block containing the root and sroot. 485 */ 486 assert((sroot_blockref.data_off & ~HAMMER2_PBUFMASK64) == 487 ((alloc_base - 1) & ~HAMMER2_PBUFMASK64)); 488 n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE, 489 sroot_blockref.data_off & ~HAMMER2_PBUFMASK64); 490 if (n != HAMMER2_PBUFSIZE) { 491 perror("write"); 492 exit(1); 493 } 494 *sroot_blockrefp = sroot_blockref; 495 496 free(buf); 497 return(alloc_base); 498 } 499 500 /* 501 * Create the volume header, the super-root directory inode, and 502 * the writable snapshot subdirectory (named via the label) which 503 * is to be the initial mount point, or at least the first mount point. 504 * newfs_hammer2 doesn't format the freemap bitmaps for these. 505 * 506 * 0 4MB 507 * [----reserved_area----][boot_area][aux_area] 508 * [[vol_hdr][freemap]...] [sroot][root][root]... 509 * \ ^\ ^ ^ 510 * \--------------------------------------/ \---/-----/---... 511 * 512 * NOTE: The total size is 8MB-aligned to avoid edge cases. 513 */ 514 static void 515 format_hammer2(hammer2_ondisk_t *fso, hammer2_mkfs_options_t *opt, int index) 516 { 517 char *buf = malloc(HAMMER2_PBUFSIZE); 518 hammer2_volume_t *vol = &fso->volumes[index]; 519 hammer2_volume_data_t *voldata; 520 hammer2_blockset_t sroot_blockset; 521 hammer2_off_t boot_base = HAMMER2_ZONE_SEG; 522 hammer2_off_t aux_base = boot_base + opt->BootAreaSize; 523 hammer2_off_t alloc_base; 524 size_t n; 525 int i; 526 527 /* 528 * Make sure we can write to the last usable block. 529 */ 530 bzero(buf, HAMMER2_PBUFSIZE); 531 n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE, 532 vol->size - HAMMER2_PBUFSIZE); 533 if (n != HAMMER2_PBUFSIZE) { 534 perror("write (at-end-of-volume)"); 535 exit(1); 536 } 537 538 /* 539 * Format misc area and sroot/root inodes for the root volume. 540 */ 541 bzero(&sroot_blockset, sizeof(sroot_blockset)); 542 if (vol->id == HAMMER2_ROOT_VOLUME) { 543 alloc_base = format_hammer2_misc(vol, opt, boot_base, aux_base); 544 alloc_base = format_hammer2_inode(vol, opt, 545 &sroot_blockset.blockref[0], 546 alloc_base); 547 } else { 548 alloc_base = 0; 549 for (i = 0; i < HAMMER2_SET_COUNT; ++i) 550 sroot_blockset.blockref[i].type = HAMMER2_BREF_TYPE_INVALID; 551 } 552 553 /* 554 * Format the volume header. 555 * 556 * The volume header points to sroot_blockset. Also be absolutely 557 * sure that allocator_beg is set for the root volume. 558 */ 559 assert(HAMMER2_VOLUME_BYTES <= HAMMER2_PBUFSIZE); 560 bzero(buf, HAMMER2_PBUFSIZE); 561 voldata = (void *)buf; 562 563 voldata->magic = HAMMER2_VOLUME_ID_HBO; 564 if (vol->id == HAMMER2_ROOT_VOLUME) { 565 voldata->boot_beg = boot_base; 566 voldata->boot_end = boot_base + opt->BootAreaSize; 567 voldata->aux_beg = aux_base; 568 voldata->aux_end = aux_base + opt->AuxAreaSize; 569 } 570 voldata->volu_size = vol->size; 571 voldata->version = opt->Hammer2Version; 572 voldata->flags = 0; 573 574 if (voldata->version >= HAMMER2_VOL_VERSION_MULTI_VOLUMES) { 575 voldata->volu_id = vol->id; 576 voldata->nvolumes = fso->nvolumes; 577 voldata->total_size = fso->total_size; 578 for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) { 579 if (i < fso->nvolumes) 580 voldata->volu_loff[i] = fso->volumes[i].offset; 581 else 582 voldata->volu_loff[i] = (hammer2_off_t)-1; 583 } 584 } 585 586 voldata->fsid = opt->Hammer2_VolFSID; 587 voldata->fstype = opt->Hammer2_FSType; 588 589 voldata->peer_type = DMSG_PEER_HAMMER2; /* LNK_CONN identification */ 590 591 assert(vol->id == HAMMER2_ROOT_VOLUME || alloc_base == 0); 592 voldata->allocator_size = fso->free_size; 593 if (vol->id == HAMMER2_ROOT_VOLUME) { 594 voldata->allocator_free = fso->free_size; 595 voldata->allocator_beg = alloc_base; 596 } 597 598 voldata->sroot_blockset = sroot_blockset; 599 voldata->mirror_tid = 16; /* all blockref mirror TIDs set to 16 */ 600 voldata->freemap_tid = 16; /* all blockref mirror TIDs set to 16 */ 601 voldata->icrc_sects[HAMMER2_VOL_ICRC_SECT1] = 602 hammer2_icrc32((char *)voldata + HAMMER2_VOLUME_ICRC1_OFF, 603 HAMMER2_VOLUME_ICRC1_SIZE); 604 605 /* 606 * Set ICRC_SECT0 after all remaining elements of sect0 have been 607 * populated in the volume header. Note hat ICRC_SECT* (except for 608 * SECT0) are part of sect0. 609 */ 610 voldata->icrc_sects[HAMMER2_VOL_ICRC_SECT0] = 611 hammer2_icrc32((char *)voldata + HAMMER2_VOLUME_ICRC0_OFF, 612 HAMMER2_VOLUME_ICRC0_SIZE); 613 voldata->icrc_volheader = 614 hammer2_icrc32((char *)voldata + HAMMER2_VOLUME_ICRCVH_OFF, 615 HAMMER2_VOLUME_ICRCVH_SIZE); 616 617 /* 618 * Write the volume header and all alternates. 619 */ 620 for (i = 0; i < HAMMER2_NUM_VOLHDRS; ++i) { 621 if (i * HAMMER2_ZONE_BYTES64 >= vol->size) 622 break; 623 n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE, 624 i * HAMMER2_ZONE_BYTES64); 625 if (n != HAMMER2_PBUFSIZE) { 626 perror("write"); 627 exit(1); 628 } 629 } 630 fsync(vol->fd); 631 632 /* 633 * Cleanup 634 */ 635 free(buf); 636 } 637 638 static void 639 alloc_direct(hammer2_off_t *basep, hammer2_blockref_t *bref, size_t bytes) 640 { 641 int radix; 642 643 radix = 0; 644 assert(bytes); 645 while ((bytes & 1) == 0) { 646 bytes >>= 1; 647 ++radix; 648 } 649 assert(bytes == 1); 650 if (radix < HAMMER2_RADIX_MIN) 651 radix = HAMMER2_RADIX_MIN; 652 653 bzero(bref, sizeof(*bref)); 654 bref->data_off = *basep | radix; 655 bref->vradix = radix; 656 657 *basep += 1U << radix; 658 } 659 660 static int 661 blkrefary_cmp(const void *b1, const void *b2) 662 { 663 const hammer2_blockref_t *bref1 = b1; 664 const hammer2_blockref_t *bref2 = b2; 665 666 if (bref1->key < bref2->key) 667 return(-1); 668 if (bref1->key > bref2->key) 669 return(1); 670 return 0; 671 } 672 673 void 674 hammer2_mkfs(int ac, char **av, hammer2_mkfs_options_t *opt) 675 { 676 hammer2_off_t resid = 0, reserved_size; 677 hammer2_ondisk_t fso; 678 int i; 679 char *vol_fsid = NULL; 680 char *sup_clid_name = NULL; 681 char *sup_fsid_name = NULL; 682 char *pfs_clid_name = NULL; 683 char *pfs_fsid_name = NULL; 684 685 /* 686 * Sanity check basic filesystem structures. No cookies for us 687 * if it gets broken! 688 */ 689 assert(sizeof(hammer2_volume_data_t) == HAMMER2_VOLUME_BYTES); 690 assert(sizeof(hammer2_inode_data_t) == HAMMER2_INODE_BYTES); 691 assert(sizeof(hammer2_blockref_t) == HAMMER2_BLOCKREF_BYTES); 692 693 /* 694 * Construct volumes information. 695 * 1GB alignment (level1 freemap size) for volumes except for the last. 696 * For the last volume, typically 8MB alignment to avoid edge cases for 697 * reserved blocks and so raid stripes (if any) operate efficiently. 698 */ 699 hammer2_init_ondisk(&fso); 700 fso.version = opt->Hammer2Version; 701 fso.nvolumes = ac; 702 703 assert(ac >= 1); 704 if (opt->NFileSystemSizes == 1) { 705 resid = opt->FileSystemSize[0]; 706 assert(resid >= HAMMER2_FREEMAP_LEVEL1_SIZE); 707 } else if (opt->NFileSystemSizes > 1) { 708 if (ac != opt->NFileSystemSizes) 709 errx(1, "Invalid filesystem size count %d vs %d", 710 opt->NFileSystemSizes, ac); 711 } 712 713 for (i = 0; i < fso.nvolumes; ++i) { 714 hammer2_volume_t *vol = &fso.volumes[i]; 715 hammer2_off_t size; 716 int fd = open(av[i], O_RDWR); 717 if (fd < 0) 718 err(1, "Unable to open %s R+W", av[i]); 719 size = check_volume(fd); 720 721 /* 722 * Limit size if a smaller filesystem size is specified. 723 */ 724 if (opt->NFileSystemSizes == 1) { 725 if (resid == 0) 726 errx(1, "No remaining filesystem size for %s", 727 av[i]); 728 if (size > resid) 729 size = resid; 730 resid -= size; 731 } else if (opt->NFileSystemSizes > 1) { 732 resid = opt->FileSystemSize[i]; 733 assert(resid >= HAMMER2_FREEMAP_LEVEL1_SIZE); 734 if (size > resid) 735 size = resid; 736 } 737 738 assert(size > 0); 739 if (i == fso.nvolumes - 1) 740 size &= ~HAMMER2_VOLUME_ALIGNMASK64; 741 else 742 size &= ~HAMMER2_FREEMAP_LEVEL1_MASK; 743 hammer2_install_volume(vol, fd, i, av[i], fso.total_size, size); 744 fso.total_size += size; 745 } 746 747 /* 748 * Verify volumes constructed above. 749 */ 750 for (i = 0; i < fso.nvolumes; ++i) { 751 hammer2_volume_t *vol = &fso.volumes[i]; 752 printf("Volume %-15s size %s\n", vol->path, 753 sizetostr(vol->size)); 754 } 755 hammer2_verify_volumes(&fso, NULL); 756 757 /* 758 * Adjust options. 759 */ 760 adjust_options(&fso, opt); 761 762 /* 763 * We'll need to stuff this in the volume header soon. 764 */ 765 hammer2_uuid_to_str(&opt->Hammer2_VolFSID, &vol_fsid); 766 hammer2_uuid_to_str(&opt->Hammer2_SupCLID, &sup_clid_name); 767 hammer2_uuid_to_str(&opt->Hammer2_SupFSID, &sup_fsid_name); 768 769 /* 770 * Calculate the amount of reserved space. HAMMER2_ZONE_SEG (4MB) 771 * is reserved at the beginning of every 1GB of storage, rounded up. 772 * Thus a 200MB filesystem will still have a 4MB reserve area. 773 * 774 * We also include the boot and aux areas in the reserve. The 775 * reserve is used to help 'df' calculate the amount of available 776 * space. 777 * 778 * XXX I kinda screwed up and made the reserved area on the LEVEL1 779 * boundary rather than the ZONE boundary. LEVEL1 is on 1GB 780 * boundaries rather than 2GB boundaries. Stick with the LEVEL1 781 * boundary. 782 */ 783 reserved_size = ((fso.total_size + HAMMER2_FREEMAP_LEVEL1_MASK) / 784 HAMMER2_FREEMAP_LEVEL1_SIZE) * HAMMER2_ZONE_SEG64; 785 786 fso.free_size = fso.total_size - reserved_size - opt->BootAreaSize - opt->AuxAreaSize; 787 if ((int64_t)fso.free_size < 0) { 788 fprintf(stderr, "Not enough free space\n"); 789 exit(1); 790 } 791 792 /* 793 * Format HAMMER2 volumes. 794 */ 795 for (i = 0; i < fso.nvolumes; ++i) 796 format_hammer2(&fso, opt, i); 797 798 printf("---------------------------------------------\n"); 799 printf("version: %d\n", opt->Hammer2Version); 800 printf("total-size: %s (%jd bytes)\n", 801 sizetostr(fso.total_size), 802 (intmax_t)fso.total_size); 803 printf("boot-area-size: %s (%jd bytes)\n", 804 sizetostr(opt->BootAreaSize), 805 (intmax_t)opt->BootAreaSize); 806 printf("aux-area-size: %s (%jd bytes)\n", 807 sizetostr(opt->AuxAreaSize), 808 (intmax_t)opt->AuxAreaSize); 809 printf("topo-reserved: %s (%jd bytes)\n", 810 sizetostr(reserved_size), 811 (intmax_t)reserved_size); 812 printf("free-size: %s (%jd bytes)\n", 813 sizetostr(fso.free_size), 814 (intmax_t)fso.free_size); 815 printf("vol-fsid: %s\n", vol_fsid); 816 printf("sup-clid: %s\n", sup_clid_name); 817 printf("sup-fsid: %s\n", sup_fsid_name); 818 for (i = 0; i < opt->NLabels; ++i) { 819 printf("PFS \"%s\"\n", opt->Label[i]); 820 hammer2_uuid_to_str(&opt->Hammer2_PfsCLID[i], &pfs_clid_name); 821 hammer2_uuid_to_str(&opt->Hammer2_PfsFSID[i], &pfs_fsid_name); 822 printf(" clid %s\n", pfs_clid_name); 823 printf(" fsid %s\n", pfs_fsid_name); 824 } 825 if (opt->DebugOpt) { 826 printf("---------------------------------------------\n"); 827 hammer2_print_volumes(&fso); 828 } 829 830 free(vol_fsid); 831 free(sup_clid_name); 832 free(sup_fsid_name); 833 free(pfs_clid_name); 834 free(pfs_fsid_name); 835 836 for (i = 0; i < fso.nvolumes; ++i) 837 hammer2_uninstall_volume(&fso.volumes[i]); 838 } 839