1 /* 2 * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/diskslice.h> 37 #include <sys/diskmbr.h> 38 #include <sys/stat.h> 39 #include <sys/time.h> 40 #include <sys/sysctl.h> 41 #include <vfs/hammer2/hammer2_xxhash.h> 42 #include <vfs/hammer2/hammer2_disk.h> 43 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <stdarg.h> 47 #include <stddef.h> 48 #include <unistd.h> 49 #include <string.h> 50 #include <errno.h> 51 #include <fcntl.h> 52 #include <assert.h> 53 #include <err.h> 54 #include <uuid.h> 55 56 #define MAXLABELS HAMMER2_SET_COUNT 57 58 #define hammer2_icrc32(buf, size) iscsi_crc32((buf), (size)) 59 #define hammer2_icrc32c(buf, size, crc) iscsi_crc32_ext((buf), (size), (crc)) 60 uint32_t iscsi_crc32(const void *buf, size_t size); 61 uint32_t iscsi_crc32_ext(const void *buf, size_t size, uint32_t ocrc); 62 63 static hammer2_off_t check_volume(const char *path, int *fdp); 64 static int64_t getsize(const char *str, int64_t minval, int64_t maxval, int pw); 65 static const char *sizetostr(hammer2_off_t size); 66 static uint64_t nowtime(void); 67 static int blkrefary_cmp(const void *b1, const void *b2); 68 static void usage(void); 69 70 static void format_hammer2(int fd, hammer2_off_t total_space, 71 hammer2_off_t free_space); 72 static void alloc_direct(hammer2_off_t *basep, hammer2_blockref_t *bref, 73 size_t bytes); 74 static hammer2_key_t dirhash(const unsigned char *name, size_t len); 75 76 static int Hammer2Version = -1; 77 static int ForceOpt = 0; 78 static uuid_t Hammer2_FSType; /* static filesystem type id for HAMMER2 */ 79 static uuid_t Hammer2_VolFSID; /* unique filesystem id in volu header */ 80 static uuid_t Hammer2_SupCLID; /* PFS cluster id in super-root inode */ 81 static uuid_t Hammer2_SupFSID; /* PFS unique id in super-root inode */ 82 static uuid_t Hammer2_PfsCLID[MAXLABELS]; 83 static uuid_t Hammer2_PfsFSID[MAXLABELS]; 84 static const char *Label[MAXLABELS]; 85 static hammer2_off_t BootAreaSize; 86 static hammer2_off_t AuxAreaSize; 87 static int NLabels; 88 89 #define GIG ((hammer2_off_t)1024*1024*1024) 90 91 int 92 main(int ac, char **av) 93 { 94 uint32_t status; 95 hammer2_off_t total_space; 96 hammer2_off_t free_space; 97 hammer2_off_t reserved_space; 98 int ch; 99 int fd = -1; 100 int i; 101 int defaultlabels = 1; 102 char *vol_fsid; 103 char *sup_clid_name; 104 char *sup_fsid_name; 105 char *pfs_clid_name; 106 char *pfs_fsid_name; 107 108 Label[NLabels++] = "LOCAL"; 109 110 /* 111 * Sanity check basic filesystem structures. No cookies for us 112 * if it gets broken! 113 */ 114 assert(sizeof(hammer2_volume_data_t) == HAMMER2_VOLUME_BYTES); 115 assert(sizeof(hammer2_inode_data_t) == HAMMER2_INODE_BYTES); 116 assert(sizeof(hammer2_blockref_t) == HAMMER2_BLOCKREF_BYTES); 117 118 /* 119 * Generate a filesystem id and lookup the filesystem type 120 */ 121 srandomdev(); 122 uuidgen(&Hammer2_VolFSID, 1); 123 uuidgen(&Hammer2_SupCLID, 1); 124 uuidgen(&Hammer2_SupFSID, 1); 125 uuid_from_string(HAMMER2_UUID_STRING, &Hammer2_FSType, &status); 126 /*uuid_name_lookup(&Hammer2_FSType, "DragonFly HAMMER2", &status);*/ 127 if (status != uuid_s_ok) { 128 errx(1, "uuids file does not have the DragonFly " 129 "HAMMER2 filesystem type"); 130 } 131 132 /* 133 * Parse arguments 134 */ 135 while ((ch = getopt(ac, av, "fL:b:m:r:V:")) != -1) { 136 switch(ch) { 137 case 'f': 138 ForceOpt = 1; 139 break; 140 case 'L': 141 defaultlabels = 0; 142 if (strcasecmp(optarg, "none") == 0) { 143 break; 144 } 145 if (NLabels >= MAXLABELS) { 146 errx(1, 147 "Limit of %d local labels", 148 MAXLABELS - 1); 149 } 150 Label[NLabels++] = optarg; 151 if (strlen(Label[NLabels-1]) > HAMMER2_INODE_MAXNAME) { 152 errx(1, "Volume label '%s' is too long " 153 "(64 chars max)\n", optarg); 154 } 155 break; 156 case 'b': 157 BootAreaSize = getsize(optarg, 158 HAMMER2_NEWFS_ALIGN, 159 HAMMER2_BOOT_MAX_BYTES, 2); 160 break; 161 case 'r': 162 AuxAreaSize = getsize(optarg, 163 HAMMER2_NEWFS_ALIGN, 164 HAMMER2_REDO_MAX_BYTES, 2); 165 break; 166 case 'V': 167 Hammer2Version = strtol(optarg, NULL, 0); 168 if (Hammer2Version < HAMMER2_VOL_VERSION_MIN || 169 Hammer2Version >= HAMMER2_VOL_VERSION_WIP) { 170 errx(1, 171 "I don't understand how to format " 172 "HAMMER2 version %d\n", 173 Hammer2Version); 174 } 175 break; 176 default: 177 usage(); 178 break; 179 } 180 } 181 182 /* 183 * Check Hammer2 version 184 */ 185 if (Hammer2Version < 0) { 186 size_t olen = sizeof(Hammer2Version); 187 Hammer2Version = HAMMER2_VOL_VERSION_DEFAULT; 188 if (sysctlbyname("vfs.hammer2.supported_version", 189 &Hammer2Version, &olen, NULL, 0) == 0) { 190 if (Hammer2Version >= HAMMER2_VOL_VERSION_WIP) { 191 Hammer2Version = HAMMER2_VOL_VERSION_WIP - 1; 192 fprintf(stderr, 193 "newfs_hammer2: WARNING: HAMMER2 VFS " 194 "supports higher version than I " 195 "understand,\n" 196 "using version %d\n", 197 Hammer2Version); 198 } 199 } else { 200 fprintf(stderr, 201 "newfs_hammer2: WARNING: HAMMER2 VFS not " 202 "loaded, cannot get version info.\n" 203 "Using version %d\n", 204 HAMMER2_VOL_VERSION_DEFAULT); 205 } 206 } 207 208 ac -= optind; 209 av += optind; 210 211 if (ac != 1 || av[0][0] == 0) { 212 fprintf(stderr, "Exactly one disk device must be specified\n"); 213 exit(1); 214 } 215 216 /* 217 * Adjust Label[] and NLabels. 218 */ 219 if (defaultlabels) { 220 char c = av[0][strlen(av[0]) - 1]; 221 if (c == 'a') 222 Label[NLabels++] = "BOOT"; 223 else if (c == 'd') 224 Label[NLabels++] = "ROOT"; 225 else 226 Label[NLabels++] = "DATA"; 227 } 228 229 /* 230 * Collect volume information. 231 */ 232 total_space = check_volume(av[0], &fd); 233 234 /* 235 * ~typically 8MB alignment to avoid edge cases for reserved blocks 236 * and so raid stripes (if any) operate efficiently. 237 */ 238 total_space &= ~HAMMER2_VOLUME_ALIGNMASK64; 239 240 /* 241 * Calculate defaults for the boot area size and round to the 242 * volume alignment boundary. 243 * 244 * NOTE: These areas are currently not used for booting but are 245 * reserved for future filesystem expansion. 246 */ 247 if (BootAreaSize == 0) { 248 BootAreaSize = HAMMER2_BOOT_NOM_BYTES; 249 while (BootAreaSize > total_space / 20) 250 BootAreaSize >>= 1; 251 if (BootAreaSize < HAMMER2_BOOT_MIN_BYTES) 252 BootAreaSize = HAMMER2_BOOT_MIN_BYTES; 253 } else if (BootAreaSize < HAMMER2_BOOT_MIN_BYTES) { 254 BootAreaSize = HAMMER2_BOOT_MIN_BYTES; 255 } 256 BootAreaSize = (BootAreaSize + HAMMER2_VOLUME_ALIGNMASK64) & 257 ~HAMMER2_VOLUME_ALIGNMASK64; 258 259 /* 260 * Calculate defaults for the redo area size and round to the 261 * volume alignment boundary. 262 * 263 * NOTE: These areas are currently not used for logging but are 264 * reserved for future filesystem expansion. 265 */ 266 if (AuxAreaSize == 0) { 267 AuxAreaSize = HAMMER2_REDO_NOM_BYTES; 268 while (AuxAreaSize > total_space / 20) 269 AuxAreaSize >>= 1; 270 if (AuxAreaSize < HAMMER2_REDO_MIN_BYTES) 271 AuxAreaSize = HAMMER2_REDO_MIN_BYTES; 272 } else if (AuxAreaSize < HAMMER2_REDO_MIN_BYTES) { 273 AuxAreaSize = HAMMER2_REDO_MIN_BYTES; 274 } 275 AuxAreaSize = (AuxAreaSize + HAMMER2_VOLUME_ALIGNMASK64) & 276 ~HAMMER2_VOLUME_ALIGNMASK64; 277 278 /* 279 * We'll need to stuff this in the volume header soon. 280 */ 281 uuid_to_string(&Hammer2_VolFSID, &vol_fsid, &status); 282 uuid_to_string(&Hammer2_SupCLID, &sup_clid_name, &status); 283 uuid_to_string(&Hammer2_SupFSID, &sup_fsid_name, &status); 284 285 /* 286 * Calculate the amount of reserved space. HAMMER2_ZONE_SEG (4MB) 287 * is reserved at the beginning of every 2GB of storage, rounded up. 288 * Thus a 200MB filesystem will still have a 4MB reserve area. 289 * 290 * We also include the boot and redo areas in the reserve. The 291 * reserve is used to help 'df' calculate the amount of available 292 * space. 293 * 294 * XXX I kinda screwed up and made the reserved area on the LEVEL1 295 * boundary rather than the ZONE boundary. LEVEL1 is on 1GB 296 * boundaries rather than 2GB boundaries. Stick with the LEVEL1 297 * boundary. 298 */ 299 reserved_space = ((total_space + HAMMER2_FREEMAP_LEVEL1_MASK) / 300 HAMMER2_FREEMAP_LEVEL1_SIZE) * HAMMER2_ZONE_SEG64; 301 302 free_space = total_space - reserved_space - 303 BootAreaSize - AuxAreaSize; 304 305 format_hammer2(fd, total_space, free_space); 306 fsync(fd); 307 close(fd); 308 309 printf("---------------------------------------------\n"); 310 printf("version: %d\n", Hammer2Version); 311 printf("total-size: %s (%jd bytes)\n", 312 sizetostr(total_space), 313 (intmax_t)total_space); 314 printf("boot-area-size: %s\n", sizetostr(BootAreaSize)); 315 printf("aux-area-size: %s\n", sizetostr(AuxAreaSize)); 316 printf("topo-reserved: %s\n", sizetostr(reserved_space)); 317 printf("free-space: %s\n", sizetostr(free_space)); 318 printf("vol-fsid: %s\n", vol_fsid); 319 printf("sup-clid: %s\n", sup_clid_name); 320 printf("sup-fsid: %s\n", sup_fsid_name); 321 for (i = 0; i < NLabels; ++i) { 322 printf("PFS \"%s\"\n", Label[i]); 323 uuid_to_string(&Hammer2_PfsCLID[i], &pfs_clid_name, &status); 324 uuid_to_string(&Hammer2_PfsFSID[i], &pfs_fsid_name, &status); 325 printf(" clid %s\n", pfs_clid_name); 326 printf(" fsid %s\n", pfs_fsid_name); 327 } 328 printf("\n"); 329 330 free(vol_fsid); 331 free(sup_clid_name); 332 free(sup_fsid_name); 333 free(pfs_clid_name); 334 free(pfs_fsid_name); 335 336 return(0); 337 } 338 339 static 340 void 341 usage(void) 342 { 343 fprintf(stderr, 344 "usage: newfs_hammer2 -L label [-f] [-b bootsize] " 345 "[-r redosize] [-V version] special ...\n" 346 ); 347 exit(1); 348 } 349 350 /* 351 * Convert the size in bytes to a human readable string. 352 */ 353 static 354 const char * 355 sizetostr(hammer2_off_t size) 356 { 357 static char buf[32]; 358 359 if (size < 1024 / 2) { 360 snprintf(buf, sizeof(buf), "%6.2f", (double)size); 361 } else if (size < 1024 * 1024 / 2) { 362 snprintf(buf, sizeof(buf), "%6.2fKB", 363 (double)size / 1024); 364 } else if (size < 1024 * 1024 * 1024LL / 2) { 365 snprintf(buf, sizeof(buf), "%6.2fMB", 366 (double)size / (1024 * 1024)); 367 } else if (size < 1024 * 1024 * 1024LL * 1024LL / 2) { 368 snprintf(buf, sizeof(buf), "%6.2fGB", 369 (double)size / (1024 * 1024 * 1024LL)); 370 } else { 371 snprintf(buf, sizeof(buf), "%6.2fTB", 372 (double)size / (1024 * 1024 * 1024LL * 1024LL)); 373 } 374 return(buf); 375 } 376 377 /* 378 * Convert a string to a 64 bit signed integer with various requirements. 379 */ 380 static int64_t 381 getsize(const char *str, int64_t minval, int64_t maxval, int powerof2) 382 { 383 int64_t val; 384 char *ptr; 385 386 val = strtoll(str, &ptr, 0); 387 switch(*ptr) { 388 case 't': 389 case 'T': 390 val *= 1024; 391 /* fall through */ 392 case 'g': 393 case 'G': 394 val *= 1024; 395 /* fall through */ 396 case 'm': 397 case 'M': 398 val *= 1024; 399 /* fall through */ 400 case 'k': 401 case 'K': 402 val *= 1024; 403 break; 404 default: 405 errx(1, "Unknown suffix in number '%s'\n", str); 406 /* not reached */ 407 } 408 if (ptr[1]) { 409 errx(1, "Unknown suffix in number '%s'\n", str); 410 /* not reached */ 411 } 412 if (val < minval) { 413 errx(1, "Value too small: %s, min is %s\n", 414 str, sizetostr(minval)); 415 /* not reached */ 416 } 417 if (val > maxval) { 418 errx(1, "Value too large: %s, max is %s\n", 419 str, sizetostr(maxval)); 420 /* not reached */ 421 } 422 if ((powerof2 & 1) && (val ^ (val - 1)) != ((val << 1) - 1)) { 423 errx(1, "Value not power of 2: %s\n", str); 424 /* not reached */ 425 } 426 if ((powerof2 & 2) && (val & HAMMER2_NEWFS_ALIGNMASK)) { 427 errx(1, "Value not an integral multiple of %dK: %s", 428 HAMMER2_NEWFS_ALIGN / 1024, str); 429 /* not reached */ 430 } 431 return(val); 432 } 433 434 static uint64_t 435 nowtime(void) 436 { 437 struct timeval tv; 438 uint64_t xtime; 439 440 gettimeofday(&tv, NULL); 441 xtime = tv.tv_sec * 1000000LL + tv.tv_usec; 442 return(xtime); 443 } 444 445 /* 446 * Figure out how big the volume is. 447 */ 448 static 449 hammer2_off_t 450 check_volume(const char *path, int *fdp) 451 { 452 struct partinfo pinfo; 453 struct stat st; 454 hammer2_off_t size; 455 456 /* 457 * Get basic information about the volume 458 */ 459 *fdp = open(path, O_RDWR); 460 if (*fdp < 0) 461 err(1, "Unable to open %s R+W", path); 462 if (ioctl(*fdp, DIOCGPART, &pinfo) < 0) { 463 /* 464 * Allow the formatting of regular files as HAMMER2 volumes 465 */ 466 if (fstat(*fdp, &st) < 0) 467 err(1, "Unable to stat %s", path); 468 if (!S_ISREG(st.st_mode)) 469 errx(1, "Unsupported file type for %s", path); 470 size = st.st_size; 471 } else { 472 /* 473 * When formatting a block device as a HAMMER2 volume the 474 * sector size must be compatible. HAMMER2 uses 64K 475 * filesystem buffers but logical buffers for direct I/O 476 * can be as small as HAMMER2_LOGSIZE (16KB). 477 */ 478 if (pinfo.reserved_blocks) { 479 errx(1, "HAMMER2 cannot be placed in a partition " 480 "which overlaps the disklabel or MBR"); 481 } 482 if (pinfo.media_blksize > HAMMER2_PBUFSIZE || 483 HAMMER2_PBUFSIZE % pinfo.media_blksize) { 484 errx(1, "A media sector size of %d is not supported", 485 pinfo.media_blksize); 486 } 487 size = pinfo.media_size; 488 } 489 printf("Volume %-15s size %s\n", path, sizetostr(size)); 490 return (size); 491 } 492 493 /* 494 * Create the volume header, the super-root directory inode, and 495 * the writable snapshot subdirectory (named via the label) which 496 * is to be the initial mount point, or at least the first mount point. 497 * 498 * [----reserved_area----][boot_area][aux_area] 499 * [[vol_hdr]... ] [sroot][root]... 500 * 501 * The sroot and root inodes eat 512 bytes each. newfs labels can only be 502 * 64 bytes so the root (snapshot) inode does not need to extend past 512 503 * bytes. We use the correct hash slot correct but note that because 504 * directory hashes are chained 16x, any slot in the inode will work. 505 * 506 * Also format the allocation map. 507 * 508 * NOTE: The passed total_space is 8MB-aligned to avoid edge cases. 509 */ 510 static 511 void 512 format_hammer2(int fd, hammer2_off_t total_space, hammer2_off_t free_space) 513 { 514 char *buf = malloc(HAMMER2_PBUFSIZE); 515 hammer2_volume_data_t *vol; 516 hammer2_inode_data_t *rawip; 517 hammer2_blockref_t sroot_blockref; 518 hammer2_blockref_t root_blockref[MAXLABELS]; 519 uint64_t now; 520 hammer2_off_t volu_base = 0; 521 hammer2_off_t boot_base = HAMMER2_ZONE_SEG; 522 hammer2_off_t aux_base = boot_base + BootAreaSize; 523 hammer2_off_t alloc_base = aux_base + AuxAreaSize; 524 hammer2_off_t tmp_base; 525 size_t n; 526 int i; 527 528 /* 529 * Clear the entire reserve for the first 2G segment and 530 * make sure we can write to the last block. 531 */ 532 bzero(buf, HAMMER2_PBUFSIZE); 533 tmp_base = volu_base; 534 for (i = 0; i < HAMMER2_ZONE_BLOCKS_SEG; ++i) { 535 n = pwrite(fd, buf, HAMMER2_PBUFSIZE, tmp_base); 536 if (n != HAMMER2_PBUFSIZE) { 537 perror("write"); 538 exit(1); 539 } 540 tmp_base += HAMMER2_PBUFSIZE; 541 } 542 543 n = pwrite(fd, buf, HAMMER2_PBUFSIZE, 544 volu_base + total_space - HAMMER2_PBUFSIZE); 545 if (n != HAMMER2_PBUFSIZE) { 546 perror("write (at-end-of-volume)"); 547 exit(1); 548 } 549 550 /* 551 * Make sure alloc_base won't cross the reserved area at the 552 * beginning of each 2GB zone. 553 * 554 * Reserve space for the super-root inode and the root inode. 555 * Make sure they are in the same 64K block to simplify our code. 556 */ 557 assert((alloc_base & HAMMER2_PBUFMASK) == 0); 558 assert(alloc_base < HAMMER2_ZONE_BYTES64 - HAMMER2_ZONE_SEG); 559 now = nowtime(); 560 bzero(buf, HAMMER2_PBUFSIZE); 561 562 alloc_base &= ~HAMMER2_PBUFMASK64; 563 alloc_direct(&alloc_base, &sroot_blockref, HAMMER2_INODE_BYTES); 564 565 for (i = 0; i < NLabels; ++i) { 566 uuidgen(&Hammer2_PfsCLID[i], 1); 567 uuidgen(&Hammer2_PfsFSID[i], 1); 568 569 alloc_direct(&alloc_base, &root_blockref[i], 570 HAMMER2_INODE_BYTES); 571 assert(((sroot_blockref.data_off ^ root_blockref[i].data_off) & 572 HAMMER2_OFF_MASK_HI) == 0); 573 574 /* 575 * Format the root directory inode, which is left empty. 576 */ 577 rawip = (void *)(buf + (HAMMER2_OFF_MASK_LO & 578 root_blockref[i].data_off)); 579 rawip->meta.version = HAMMER2_INODE_VERSION_ONE; 580 rawip->meta.ctime = now; 581 rawip->meta.mtime = now; 582 /* rawip->atime = now; NOT IMPL MUST BE ZERO */ 583 rawip->meta.btime = now; 584 rawip->meta.type = HAMMER2_OBJTYPE_DIRECTORY; 585 rawip->meta.mode = 0755; 586 rawip->meta.inum = 1; /* root inode, inumber 1 */ 587 rawip->meta.nlinks = 1; /* directory link count compat */ 588 589 rawip->meta.name_len = strlen(Label[i]); 590 bcopy(Label[i], rawip->filename, rawip->meta.name_len); 591 rawip->meta.name_key = 592 dirhash(rawip->filename, rawip->meta.name_len); 593 594 /* 595 * Compression mode and supported copyids. 596 * 597 * Do not allow compression when creating any "BOOT" label 598 * (pfs-create also does the same if the pfs is named "BOOT") 599 */ 600 if (strcasecmp(Label[i], "BOOT") == 0) { 601 rawip->meta.comp_algo = HAMMER2_ENC_ALGO( 602 HAMMER2_COMP_AUTOZERO); 603 rawip->meta.check_algo = HAMMER2_ENC_ALGO( 604 HAMMER2_CHECK_XXHASH64); 605 } else { 606 rawip->meta.comp_algo = HAMMER2_ENC_ALGO( 607 HAMMER2_COMP_NEWFS_DEFAULT); 608 rawip->meta.check_algo = HAMMER2_ENC_ALGO( 609 HAMMER2_CHECK_XXHASH64); 610 } 611 612 /* 613 * NOTE: We leave nmasters set to 0, which means that we 614 * don't know how many masters there are. The quorum 615 * calculation will effectively be 1 ( 0 / 2 + 1 ). 616 */ 617 rawip->meta.pfs_clid = Hammer2_PfsCLID[i]; 618 rawip->meta.pfs_fsid = Hammer2_PfsFSID[i]; 619 rawip->meta.pfs_type = HAMMER2_PFSTYPE_MASTER; 620 rawip->meta.op_flags |= HAMMER2_OPFLAG_PFSROOT; 621 622 /* first allocatable inode number */ 623 rawip->meta.pfs_inum = 16; 624 625 /* rawip->u.blockset is left empty */ 626 627 /* 628 * The root blockref will be stored in the super-root inode as 629 * the only directory entry. The copyid here is the actual 630 * copyid of the storage ref. 631 * 632 * The key field for a directory entry's blockref is 633 * essentially the name key for the entry. 634 */ 635 root_blockref[i].key = rawip->meta.name_key; 636 root_blockref[i].copyid = HAMMER2_COPYID_LOCAL; 637 root_blockref[i].keybits = 0; 638 root_blockref[i].check.xxhash64.value = 639 XXH64(rawip, sizeof(*rawip), XXH_HAMMER2_SEED); 640 root_blockref[i].type = HAMMER2_BREF_TYPE_INODE; 641 root_blockref[i].methods = 642 HAMMER2_ENC_CHECK(HAMMER2_CHECK_XXHASH64) | 643 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 644 root_blockref[i].mirror_tid = 16; 645 root_blockref[i].flags = HAMMER2_BREF_FLAG_PFSROOT; 646 } 647 648 /* 649 * Format the super-root directory inode, giving it one directory 650 * entry (root_blockref) and fixup the icrc method. 651 * 652 * The superroot contains one directory entry pointing at the root 653 * inode (named via the label). Inodes contain one blockset which 654 * is fully associative so we can put the entry anywhere without 655 * having to worry about the hash. Use index 0. 656 */ 657 rawip = (void *)(buf + (HAMMER2_OFF_MASK_LO & sroot_blockref.data_off)); 658 rawip->meta.version = HAMMER2_INODE_VERSION_ONE; 659 rawip->meta.ctime = now; 660 rawip->meta.mtime = now; 661 /* rawip->meta.atime = now; NOT IMPL MUST BE ZERO */ 662 rawip->meta.btime = now; 663 rawip->meta.type = HAMMER2_OBJTYPE_DIRECTORY; 664 rawip->meta.mode = 0700; /* super-root - root only */ 665 rawip->meta.inum = 0; /* super root inode, inumber 0 */ 666 rawip->meta.nlinks = 2; /* directory link count compat */ 667 668 rawip->meta.name_len = 0; /* super-root is unnamed */ 669 rawip->meta.name_key = 0; 670 671 rawip->meta.comp_algo = HAMMER2_ENC_ALGO(HAMMER2_COMP_AUTOZERO); 672 rawip->meta.check_algo = HAMMER2_ENC_ALGO(HAMMER2_CHECK_XXHASH64); 673 674 /* 675 * The super-root is flagged as a PFS and typically given its own 676 * random FSID, making it possible to mirror an entire HAMMER2 disk 677 * snapshots and all if desired. PFS ids are used to match up 678 * mirror sources and targets and cluster copy sources and targets. 679 * 680 * (XXX whole-disk logical mirroring is not really supported in 681 * the first attempt because each PFS is in its own modify/mirror 682 * transaction id domain, so normal mechanics cannot cross a PFS 683 * boundary). 684 */ 685 rawip->meta.pfs_clid = Hammer2_SupCLID; 686 rawip->meta.pfs_fsid = Hammer2_SupFSID; 687 rawip->meta.pfs_type = HAMMER2_PFSTYPE_SUPROOT; 688 snprintf((char*)rawip->filename, sizeof(rawip->filename), "SUPROOT"); 689 rawip->meta.name_key = 0; 690 rawip->meta.name_len = strlen((char*)rawip->filename); 691 692 /* The super-root has an inode number of 0 */ 693 rawip->meta.pfs_inum = 0; 694 695 /* 696 * Currently newfs_hammer2 just throws the PFS inodes into the 697 * top-level block table at the volume root and doesn't try to 698 * create an indirect block, so we are limited to ~4 at filesystem 699 * creation time. More can be added after mounting. 700 */ 701 qsort(root_blockref, NLabels, sizeof(root_blockref[0]), blkrefary_cmp); 702 for (i = 0; i < NLabels; ++i) 703 rawip->u.blockset.blockref[i] = root_blockref[i]; 704 705 /* 706 * The sroot blockref will be stored in the volume header. 707 */ 708 sroot_blockref.copyid = HAMMER2_COPYID_LOCAL; 709 sroot_blockref.keybits = 0; 710 sroot_blockref.check.xxhash64.value = 711 XXH64(rawip, sizeof(*rawip), XXH_HAMMER2_SEED); 712 sroot_blockref.type = HAMMER2_BREF_TYPE_INODE; 713 sroot_blockref.methods = HAMMER2_ENC_CHECK(HAMMER2_CHECK_XXHASH64) | 714 HAMMER2_ENC_COMP(HAMMER2_COMP_AUTOZERO); 715 sroot_blockref.mirror_tid = 16; 716 rawip = NULL; 717 718 /* 719 * Write out the 64K HAMMER2 block containing the root and sroot. 720 */ 721 n = pwrite(fd, buf, HAMMER2_PBUFSIZE, 722 sroot_blockref.data_off & HAMMER2_OFF_MASK_HI); 723 if (n != HAMMER2_PBUFSIZE) { 724 perror("write"); 725 exit(1); 726 } 727 728 /* 729 * Format the volume header. 730 * 731 * The volume header points to sroot_blockref. Also be absolutely 732 * sure that allocator_beg is set. 733 */ 734 bzero(buf, HAMMER2_PBUFSIZE); 735 vol = (void *)buf; 736 737 vol->magic = HAMMER2_VOLUME_ID_HBO; 738 vol->boot_beg = boot_base; 739 vol->boot_end = boot_base + BootAreaSize; 740 vol->aux_beg = aux_base; 741 vol->aux_end = aux_base + AuxAreaSize; 742 vol->volu_size = total_space; 743 vol->version = Hammer2Version; 744 vol->flags = 0; 745 746 vol->fsid = Hammer2_VolFSID; 747 vol->fstype = Hammer2_FSType; 748 749 vol->peer_type = DMSG_PEER_HAMMER2; /* LNK_CONN identification */ 750 751 vol->allocator_size = free_space; 752 vol->allocator_free = free_space; 753 vol->allocator_beg = alloc_base; 754 755 vol->sroot_blockset.blockref[0] = sroot_blockref; 756 vol->mirror_tid = 16; /* all blockref mirror TIDs set to 16 */ 757 vol->freemap_tid = 16; /* all blockref mirror TIDs set to 16 */ 758 vol->icrc_sects[HAMMER2_VOL_ICRC_SECT1] = 759 hammer2_icrc32((char *)vol + HAMMER2_VOLUME_ICRC1_OFF, 760 HAMMER2_VOLUME_ICRC1_SIZE); 761 762 /* 763 * Set ICRC_SECT0 after all remaining elements of sect0 have been 764 * populated in the volume header. Note hat ICRC_SECT* (except for 765 * SECT0) are part of sect0. 766 */ 767 vol->icrc_sects[HAMMER2_VOL_ICRC_SECT0] = 768 hammer2_icrc32((char *)vol + HAMMER2_VOLUME_ICRC0_OFF, 769 HAMMER2_VOLUME_ICRC0_SIZE); 770 vol->icrc_volheader = 771 hammer2_icrc32((char *)vol + HAMMER2_VOLUME_ICRCVH_OFF, 772 HAMMER2_VOLUME_ICRCVH_SIZE); 773 774 /* 775 * Write the volume header and all alternates. 776 */ 777 for (i = 0; i < HAMMER2_NUM_VOLHDRS; ++i) { 778 if (i * HAMMER2_ZONE_BYTES64 >= total_space) 779 break; 780 n = pwrite(fd, buf, HAMMER2_PBUFSIZE, 781 volu_base + i * HAMMER2_ZONE_BYTES64); 782 if (n != HAMMER2_PBUFSIZE) { 783 perror("write"); 784 exit(1); 785 } 786 } 787 788 /* 789 * Cleanup 790 */ 791 free(buf); 792 } 793 794 static void 795 alloc_direct(hammer2_off_t *basep, hammer2_blockref_t *bref, size_t bytes) 796 { 797 int radix; 798 799 radix = 0; 800 assert(bytes); 801 while ((bytes & 1) == 0) { 802 bytes >>= 1; 803 ++radix; 804 } 805 assert(bytes == 1); 806 if (radix < HAMMER2_RADIX_MIN) 807 radix = HAMMER2_RADIX_MIN; 808 809 bzero(bref, sizeof(*bref)); 810 bref->data_off = *basep | radix; 811 bref->vradix = radix; 812 813 *basep += 1U << radix; 814 } 815 816 /* 817 * Borrow HAMMER1's directory hash algorithm #1 with a few modifications. 818 * The filename is split into fields which are hashed separately and then 819 * added together. 820 * 821 * Differences include: bit 63 must be set to 1 for HAMMER2 (HAMMER1 sets 822 * it to 0), this is because bit63=0 is used for hidden hardlinked inodes. 823 * (This means we do not need to do a 0-check/or-with-0x100000000 either). 824 * 825 * Also, the iscsi crc code is used instead of the old crc32 code. 826 */ 827 static hammer2_key_t 828 dirhash(const unsigned char *name, size_t len) 829 { 830 const unsigned char *aname = name; 831 uint32_t crcx; 832 uint64_t key; 833 size_t i; 834 size_t j; 835 836 /* 837 * Filesystem version 6 or better will create directories 838 * using the ALG1 dirhash. This hash breaks the filename 839 * up into domains separated by special characters and 840 * hashes each domain independently. 841 * 842 * We also do a simple sub-sort using the first character 843 * of the filename in the top 5-bits. 844 */ 845 key = 0; 846 847 /* 848 * m32 849 */ 850 crcx = 0; 851 for (i = j = 0; i < len; ++i) { 852 if (aname[i] == '.' || 853 aname[i] == '-' || 854 aname[i] == '_' || 855 aname[i] == '~') { 856 if (i != j) 857 crcx += hammer2_icrc32(aname + j, i - j); 858 j = i + 1; 859 } 860 } 861 if (i != j) 862 crcx += hammer2_icrc32(aname + j, i - j); 863 864 /* 865 * The directory hash utilizes the top 32 bits of the 64-bit key. 866 * Bit 63 must be set to 1. 867 */ 868 crcx |= 0x80000000U; 869 key |= (uint64_t)crcx << 32; 870 871 /* 872 * l16 - crc of entire filename 873 * 874 * This crc reduces degenerate hash collision conditions 875 */ 876 crcx = hammer2_icrc32(aname, len); 877 crcx = crcx ^ (crcx << 16); 878 key |= crcx & 0xFFFF0000U; 879 880 /* 881 * Set bit 15. This allows readdir to strip bit 63 so a positive 882 * 64-bit cookie/offset can always be returned, and still guarantee 883 * that the values 0x0000-0x7FFF are available for artificial entries. 884 * ('.' and '..'). 885 */ 886 key |= 0x8000U; 887 888 return (key); 889 } 890 891 static int 892 blkrefary_cmp(const void *b1, const void *b2) 893 { 894 const hammer2_blockref_t *bref1 = b1; 895 const hammer2_blockref_t *bref2 = b2; 896 if (bref1->key < bref2->key) 897 return(-1); 898 if (bref1->key > bref2->key) 899 return(1); 900 return 0; 901 } 902