1 /*
2  * Copyright (c) 2011-2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/diskslice.h>
38 #include <sys/diskmbr.h>
39 #include <sys/time.h>
40 #include <sys/sysctl.h>
41 #include <sys/ioctl.h>
42 #include <vfs/hammer2/hammer2_xxhash.h>
43 #include <vfs/hammer2/hammer2_disk.h>
44 
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <stdarg.h>
48 #include <stddef.h>
49 #include <unistd.h>
50 #include <string.h>
51 #include <errno.h>
52 #include <fcntl.h>
53 #include <assert.h>
54 #include <err.h>
55 #include <uuid.h>
56 
57 #include "hammer2_subs.h"
58 
59 #define MAXLABELS	HAMMER2_SET_COUNT
60 
61 static int64_t getsize(const char *str, int64_t minval, int64_t maxval, int pw);
62 static uint64_t nowtime(void);
63 static int blkrefary_cmp(const void *b1, const void *b2);
64 static void usage(void);
65 
66 static void format_hammer2(hammer2_ondisk_t *fso, int index);
67 static void alloc_direct(hammer2_off_t *basep, hammer2_blockref_t *bref,
68 				size_t bytes);
69 
70 static int Hammer2Version = -1;
71 static uuid_t Hammer2_FSType;	/* static filesystem type id for HAMMER2 */
72 static uuid_t Hammer2_VolFSID;	/* unique filesystem id in volu header */
73 static uuid_t Hammer2_SupCLID;	/* PFS cluster id in super-root inode */
74 static uuid_t Hammer2_SupFSID;	/* PFS unique id in super-root inode */
75 static uuid_t Hammer2_PfsCLID[MAXLABELS];
76 static uuid_t Hammer2_PfsFSID[MAXLABELS];
77 static const char *Label[MAXLABELS];
78 static hammer2_off_t BootAreaSize;
79 static hammer2_off_t AuxAreaSize;
80 static int NLabels;
81 static int DebugOpt;
82 
83 int
84 main(int ac, char **av)
85 {
86 	uint32_t status;
87 	hammer2_off_t reserved_size;
88 	hammer2_ondisk_t fso;
89 	int ch;
90 	int i;
91 	int defaultlabels = 1;
92 	char *vol_fsid = NULL;
93 	char *sup_clid_name = NULL;
94 	char *sup_fsid_name = NULL;
95 	char *pfs_clid_name = NULL;
96 	char *pfs_fsid_name = NULL;
97 
98 	Label[NLabels++] = "LOCAL";
99 
100 	/*
101 	 * Sanity check basic filesystem structures.  No cookies for us
102 	 * if it gets broken!
103 	 */
104 	assert(sizeof(hammer2_volume_data_t) == HAMMER2_VOLUME_BYTES);
105 	assert(sizeof(hammer2_inode_data_t) == HAMMER2_INODE_BYTES);
106 	assert(sizeof(hammer2_blockref_t) == HAMMER2_BLOCKREF_BYTES);
107 
108 	/*
109 	 * Generate a filesystem id and lookup the filesystem type
110 	 */
111 	srandomdev();
112 	uuidgen(&Hammer2_VolFSID, 1);
113 	uuidgen(&Hammer2_SupCLID, 1);
114 	uuidgen(&Hammer2_SupFSID, 1);
115 	uuid_from_string(HAMMER2_UUID_STRING, &Hammer2_FSType, &status);
116 	/*uuid_name_lookup(&Hammer2_FSType, "DragonFly HAMMER2", &status);*/
117 	if (status != uuid_s_ok) {
118 		errx(1, "uuids file does not have the DragonFly "
119 			"HAMMER2 filesystem type");
120 	}
121 
122 	/*
123 	 * Parse arguments
124 	 */
125 	while ((ch = getopt(ac, av, "L:b:r:V:d")) != -1) {
126 		switch(ch) {
127 		case 'L':
128 			defaultlabels = 0;
129 			if (strcasecmp(optarg, "none") == 0) {
130 				break;
131 			}
132 			if (NLabels >= MAXLABELS) {
133 				errx(1, "Limit of %d local labels",
134 				     MAXLABELS - 1);
135 			}
136 			if (strlen(optarg) == 0) {
137 				errx(1, "Volume label '%s' cannot be 0-length",
138 					optarg);
139 			}
140 			if (strlen(optarg) >= HAMMER2_INODE_MAXNAME) {
141 				errx(1, "Volume label '%s' is too long "
142 					"(%d chars max)",
143 					optarg,
144 					HAMMER2_INODE_MAXNAME - 1);
145 			}
146 			Label[NLabels++] = optarg;
147 			break;
148 		case 'b':
149 			BootAreaSize = getsize(optarg,
150 					 HAMMER2_NEWFS_ALIGN,
151 					 HAMMER2_BOOT_MAX_BYTES, 2);
152 			break;
153 		case 'r':
154 			AuxAreaSize = getsize(optarg,
155 					 HAMMER2_NEWFS_ALIGN,
156 					 HAMMER2_AUX_MAX_BYTES, 2);
157 			break;
158 		case 'V':
159 			Hammer2Version = strtol(optarg, NULL, 0);
160 			if (Hammer2Version < HAMMER2_VOL_VERSION_MIN ||
161 			    Hammer2Version >= HAMMER2_VOL_VERSION_WIP) {
162 				errx(1, "I don't understand how to format "
163 				     "HAMMER2 version %d",
164 				     Hammer2Version);
165 			}
166 			break;
167 		case 'd':
168 			DebugOpt = 1;
169 			break;
170 		default:
171 			usage();
172 			break;
173 		}
174 	}
175 
176 	/*
177 	 * Check Hammer2 version
178 	 */
179 	if (Hammer2Version < 0) {
180 		size_t olen = sizeof(Hammer2Version);
181 		Hammer2Version = HAMMER2_VOL_VERSION_DEFAULT;
182 		if (sysctlbyname("vfs.hammer2.supported_version",
183 				 &Hammer2Version, &olen, NULL, 0) == 0) {
184 			if (Hammer2Version >= HAMMER2_VOL_VERSION_WIP) {
185 				Hammer2Version = HAMMER2_VOL_VERSION_WIP - 1;
186 				fprintf(stderr,
187 					"newfs_hammer2: WARNING: HAMMER2 VFS "
188 					"supports higher version than I "
189 					"understand,\n"
190 					"using version %d\n",
191 					Hammer2Version);
192 			}
193 		} else {
194 			fprintf(stderr,
195 				"newfs_hammer2: WARNING: HAMMER2 VFS not "
196 				"loaded, cannot get version info.\n"
197 				"Using version %d\n",
198 				HAMMER2_VOL_VERSION_DEFAULT);
199 		}
200 	}
201 
202 	ac -= optind;
203 	av += optind;
204 
205 	if (ac == 0)
206 		errx(1, "You must specify at least one disk device");
207 	if (ac > HAMMER2_MAX_VOLUMES)
208 		errx(1, "The maximum number of volumes is %d",
209 		     HAMMER2_MAX_VOLUMES);
210 
211 	/*
212 	 * Adjust Label[] and NLabels.
213 	 */
214 	if (defaultlabels) {
215 		char c = av[0][strlen(av[0]) - 1];
216 		if (c == 'a')
217 			Label[NLabels++] = "BOOT";
218 		else if (c == 'd')
219 			Label[NLabels++] = "ROOT";
220 		else
221 			Label[NLabels++] = "DATA";
222 	}
223 
224 	/*
225 	 * Construct volumes information.
226 	 * 1GB alignment (level1 freemap size) for volumes except for the last.
227 	 * For the last volume, typically 8MB alignment to avoid edge cases for
228 	 * reserved blocks and so raid stripes (if any) operate efficiently.
229 	 */
230 	hammer2_init_ondisk(&fso);
231 	fso.version = Hammer2Version;
232 	fso.nvolumes = ac;
233 	for (i = 0; i < fso.nvolumes; ++i) {
234 		hammer2_volume_t *vol = &fso.volumes[i];
235 		hammer2_off_t size;
236 		int fd = open(av[i], O_RDWR);
237 		if (fd < 0)
238 			err(1, "Unable to open %s R+W", av[i]);
239 		size = check_volume(fd);
240 		if (i == fso.nvolumes - 1)
241 			size &= ~HAMMER2_VOLUME_ALIGNMASK64;
242 		else
243 			size &= ~HAMMER2_FREEMAP_LEVEL1_MASK;
244 		hammer2_install_volume(vol, fd, i, av[i], fso.total_size, size);
245 		fso.total_size += size;
246 	}
247 
248 	/*
249 	 * Verify volumes constructed above.
250 	 */
251 	for (i = 0; i < fso.nvolumes; ++i) {
252 		hammer2_volume_t *vol = &fso.volumes[i];
253 		printf("Volume %-15s size %s\n", vol->path,
254 		       sizetostr(vol->size));
255 	}
256 	hammer2_verify_volumes(&fso, NULL);
257 
258 	/*
259 	 * Calculate defaults for the boot area size and round to the
260 	 * volume alignment boundary.
261 	 *
262 	 * NOTE: These areas are currently not used for booting but are
263 	 *	 reserved for future filesystem expansion.
264 	 */
265 	if (BootAreaSize == 0) {
266 		BootAreaSize = HAMMER2_BOOT_NOM_BYTES;
267 		while (BootAreaSize > fso.total_size / 20)
268 			BootAreaSize >>= 1;
269 		if (BootAreaSize < HAMMER2_BOOT_MIN_BYTES)
270 			BootAreaSize = HAMMER2_BOOT_MIN_BYTES;
271 	} else if (BootAreaSize < HAMMER2_BOOT_MIN_BYTES) {
272 		BootAreaSize = HAMMER2_BOOT_MIN_BYTES;
273 	}
274 	BootAreaSize = (BootAreaSize + HAMMER2_VOLUME_ALIGNMASK64) &
275 		        ~HAMMER2_VOLUME_ALIGNMASK64;
276 
277 	/*
278 	 * Calculate defaults for the aux area size and round to the
279 	 * volume alignment boundary.
280 	 *
281 	 * NOTE: These areas are currently not used for logging but are
282 	 *	 reserved for future filesystem expansion.
283 	 */
284 	if (AuxAreaSize == 0) {
285 		AuxAreaSize = HAMMER2_AUX_NOM_BYTES;
286 		while (AuxAreaSize > fso.total_size / 20)
287 			AuxAreaSize >>= 1;
288 		if (AuxAreaSize < HAMMER2_AUX_MIN_BYTES)
289 			AuxAreaSize = HAMMER2_AUX_MIN_BYTES;
290 	} else if (AuxAreaSize < HAMMER2_AUX_MIN_BYTES) {
291 		AuxAreaSize = HAMMER2_AUX_MIN_BYTES;
292 	}
293 	AuxAreaSize = (AuxAreaSize + HAMMER2_VOLUME_ALIGNMASK64) &
294 		       ~HAMMER2_VOLUME_ALIGNMASK64;
295 
296 	/*
297 	 * We'll need to stuff this in the volume header soon.
298 	 */
299 	hammer2_uuid_to_str(&Hammer2_VolFSID, &vol_fsid);
300 	hammer2_uuid_to_str(&Hammer2_SupCLID, &sup_clid_name);
301 	hammer2_uuid_to_str(&Hammer2_SupFSID, &sup_fsid_name);
302 
303 	/*
304 	 * Calculate the amount of reserved space.  HAMMER2_ZONE_SEG (4MB)
305 	 * is reserved at the beginning of every 1GB of storage, rounded up.
306 	 * Thus a 200MB filesystem will still have a 4MB reserve area.
307 	 *
308 	 * We also include the boot and aux areas in the reserve.  The
309 	 * reserve is used to help 'df' calculate the amount of available
310 	 * space.
311 	 *
312 	 * XXX I kinda screwed up and made the reserved area on the LEVEL1
313 	 *     boundary rather than the ZONE boundary.  LEVEL1 is on 1GB
314 	 *     boundaries rather than 2GB boundaries.  Stick with the LEVEL1
315 	 *     boundary.
316 	 */
317 	reserved_size = ((fso.total_size + HAMMER2_FREEMAP_LEVEL1_MASK) /
318 			  HAMMER2_FREEMAP_LEVEL1_SIZE) * HAMMER2_ZONE_SEG64;
319 
320 	fso.free_size = fso.total_size - reserved_size - BootAreaSize - AuxAreaSize;
321 	if ((int64_t)fso.free_size < 0) {
322 		fprintf(stderr, "Not enough free space\n");
323 		exit(1);
324 	}
325 
326 	/*
327 	 * Format HAMMER2 volumes.
328 	 */
329 	for (i = 0; i < fso.nvolumes; ++i)
330 		format_hammer2(&fso, i);
331 
332 	printf("---------------------------------------------\n");
333 	printf("version:          %d\n", Hammer2Version);
334 	printf("total-size:       %s (%jd bytes)\n",
335 	       sizetostr(fso.total_size),
336 	       (intmax_t)fso.total_size);
337 	printf("boot-area-size:   %s (%jd bytes)\n",
338 	       sizetostr(BootAreaSize),
339 	       (intmax_t)BootAreaSize);
340 	printf("aux-area-size:    %s (%jd bytes)\n",
341 	       sizetostr(AuxAreaSize),
342 	       (intmax_t)AuxAreaSize);
343 	printf("topo-reserved:    %s (%jd bytes)\n",
344 	       sizetostr(reserved_size),
345 	       (intmax_t)reserved_size);
346 	printf("free-size:        %s (%jd bytes)\n",
347 	       sizetostr(fso.free_size),
348 	       (intmax_t)fso.free_size);
349 	printf("vol-fsid:         %s\n", vol_fsid);
350 	printf("sup-clid:         %s\n", sup_clid_name);
351 	printf("sup-fsid:         %s\n", sup_fsid_name);
352 	for (i = 0; i < NLabels; ++i) {
353 		printf("PFS \"%s\"\n", Label[i]);
354 		hammer2_uuid_to_str(&Hammer2_PfsCLID[i], &pfs_clid_name);
355 		hammer2_uuid_to_str(&Hammer2_PfsFSID[i], &pfs_fsid_name);
356 		printf("    clid %s\n", pfs_clid_name);
357 		printf("    fsid %s\n", pfs_fsid_name);
358 	}
359 	if (DebugOpt) {
360 		printf("---------------------------------------------\n");
361 		hammer2_print_volumes(&fso);
362 	}
363 
364 	free(vol_fsid);
365 	free(sup_clid_name);
366 	free(sup_fsid_name);
367 	free(pfs_clid_name);
368 	free(pfs_fsid_name);
369 
370 	for (i = 0; i < fso.nvolumes; ++i)
371 		hammer2_uninstall_volume(&fso.volumes[i]);
372 
373 	return(0);
374 }
375 
376 static
377 void
378 usage(void)
379 {
380 	fprintf(stderr,
381 		"usage: newfs_hammer2 [-b bootsize] [-r auxsize] "
382 		"[-V version] [-L label ...] special ...\n"
383 	);
384 	exit(1);
385 }
386 
387 /*
388  * Convert a string to a 64 bit signed integer with various requirements.
389  */
390 static int64_t
391 getsize(const char *str, int64_t minval, int64_t maxval, int powerof2)
392 {
393 	int64_t val;
394 	char *ptr;
395 
396 	val = strtoll(str, &ptr, 0);
397 	switch(*ptr) {
398 	case 't':
399 	case 'T':
400 		val *= 1024;
401 		/* fall through */
402 	case 'g':
403 	case 'G':
404 		val *= 1024;
405 		/* fall through */
406 	case 'm':
407 	case 'M':
408 		val *= 1024;
409 		/* fall through */
410 	case 'k':
411 	case 'K':
412 		val *= 1024;
413 		break;
414 	default:
415 		errx(1, "Unknown suffix in number '%s'", str);
416 		/* not reached */
417 	}
418 	if (ptr[1]) {
419 		errx(1, "Unknown suffix in number '%s'", str);
420 		/* not reached */
421 	}
422 	if (val < minval) {
423 		errx(1, "Value too small: %s, min is %s",
424 		     str, sizetostr(minval));
425 		/* not reached */
426 	}
427 	if (val > maxval) {
428 		errx(1, "Value too large: %s, max is %s",
429 		     str, sizetostr(maxval));
430 		/* not reached */
431 	}
432 	if ((powerof2 & 1) && (val ^ (val - 1)) != ((val << 1) - 1)) {
433 		errx(1, "Value not power of 2: %s", str);
434 		/* not reached */
435 	}
436 	if ((powerof2 & 2) && (val & HAMMER2_NEWFS_ALIGNMASK)) {
437 		errx(1, "Value not an integral multiple of %dK: %s",
438 		     HAMMER2_NEWFS_ALIGN / 1024, str);
439 		/* not reached */
440 	}
441 	return(val);
442 }
443 
444 static uint64_t
445 nowtime(void)
446 {
447 	struct timeval tv;
448 	uint64_t xtime;
449 
450 	gettimeofday(&tv, NULL);
451 	xtime = tv.tv_sec * 1000000LL + tv.tv_usec;
452 	return(xtime);
453 }
454 
455 static
456 hammer2_off_t
457 format_hammer2_misc(hammer2_volume_t *vol, hammer2_off_t boot_base,
458 		    hammer2_off_t aux_base)
459 {
460 	char *buf = malloc(HAMMER2_PBUFSIZE);
461 	hammer2_off_t alloc_base = aux_base + AuxAreaSize;
462 	hammer2_off_t tmp_base;
463 	size_t n;
464 	int i;
465 
466 	/*
467 	 * Clear the entire 4MB reserve for the first 2G zone.
468 	 */
469 	bzero(buf, HAMMER2_PBUFSIZE);
470 	tmp_base = 0;
471 	for (i = 0; i < HAMMER2_ZONE_BLOCKS_SEG; ++i) {
472 		n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE, tmp_base);
473 		if (n != HAMMER2_PBUFSIZE) {
474 			perror("write");
475 			exit(1);
476 		}
477 		tmp_base += HAMMER2_PBUFSIZE;
478 	}
479 
480 	/*
481 	 * Make sure alloc_base won't cross the reserved area at the
482 	 * beginning of each 1GB.
483 	 *
484 	 * Reserve space for the super-root inode and the root inode.
485 	 * Make sure they are in the same 64K block to simplify our code.
486 	 */
487 	assert((alloc_base & HAMMER2_PBUFMASK) == 0);
488 	assert(alloc_base < HAMMER2_FREEMAP_LEVEL1_SIZE);
489 
490 	/*
491 	 * Clear the boot/aux area.
492 	 */
493 	for (tmp_base = boot_base; tmp_base < alloc_base;
494 	     tmp_base += HAMMER2_PBUFSIZE) {
495 		n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE, tmp_base);
496 		if (n != HAMMER2_PBUFSIZE) {
497 			perror("write (boot/aux)");
498 			exit(1);
499 		}
500 	}
501 
502 	free(buf);
503 	return(alloc_base);
504 }
505 
506 static
507 hammer2_off_t
508 format_hammer2_inode(hammer2_volume_t *vol, hammer2_blockref_t *sroot_blockrefp,
509 		     hammer2_off_t alloc_base)
510 {
511 	char *buf = malloc(HAMMER2_PBUFSIZE);
512 	hammer2_inode_data_t *rawip;
513 	hammer2_blockref_t sroot_blockref;
514 	hammer2_blockref_t root_blockref[MAXLABELS];
515 	uint64_t now;
516 	size_t n;
517 	int i;
518 
519 	bzero(&sroot_blockref, sizeof(sroot_blockref));
520 	bzero(root_blockref, sizeof(root_blockref));
521 	now = nowtime();
522 	alloc_base &= ~HAMMER2_PBUFMASK64;
523 	alloc_direct(&alloc_base, &sroot_blockref, HAMMER2_INODE_BYTES);
524 
525 	for (i = 0; i < NLabels; ++i) {
526 		uuidgen(&Hammer2_PfsCLID[i], 1);
527 		uuidgen(&Hammer2_PfsFSID[i], 1);
528 
529 		alloc_direct(&alloc_base, &root_blockref[i],
530 			     HAMMER2_INODE_BYTES);
531 		assert(((sroot_blockref.data_off ^ root_blockref[i].data_off) &
532 			~HAMMER2_PBUFMASK64) == 0);
533 
534 		/*
535 		 * Format the root directory inode, which is left empty.
536 		 */
537 		rawip = (void *)(buf + (HAMMER2_OFF_MASK_LO &
538 					root_blockref[i].data_off));
539 		rawip->meta.version = HAMMER2_INODE_VERSION_ONE;
540 		rawip->meta.ctime = now;
541 		rawip->meta.mtime = now;
542 		/* rawip->atime = now; NOT IMPL MUST BE ZERO */
543 		rawip->meta.btime = now;
544 		rawip->meta.type = HAMMER2_OBJTYPE_DIRECTORY;
545 		rawip->meta.mode = 0755;
546 		rawip->meta.inum = 1;	/* root inode, inumber 1 */
547 		rawip->meta.nlinks = 1;	/* directory link count compat */
548 
549 		rawip->meta.name_len = strlen(Label[i]);
550 		bcopy(Label[i], rawip->filename, rawip->meta.name_len);
551 		rawip->meta.name_key =
552 				dirhash(rawip->filename, rawip->meta.name_len);
553 
554 		/*
555 		 * Compression mode and supported copyids.
556 		 *
557 		 * Do not allow compression when creating any "BOOT" label
558 		 * (pfs-create also does the same if the pfs is named "BOOT")
559 		 */
560 		if (strcasecmp(Label[i], "BOOT") == 0) {
561 			rawip->meta.comp_algo = HAMMER2_ENC_ALGO(
562 						    HAMMER2_COMP_AUTOZERO);
563 			rawip->meta.check_algo = HAMMER2_ENC_ALGO(
564 						    HAMMER2_CHECK_XXHASH64);
565 		} else {
566 			rawip->meta.comp_algo = HAMMER2_ENC_ALGO(
567 						    HAMMER2_COMP_NEWFS_DEFAULT);
568 			rawip->meta.check_algo = HAMMER2_ENC_ALGO(
569 						    HAMMER2_CHECK_XXHASH64);
570 		}
571 
572 		/*
573 		 * NOTE: We leave nmasters set to 0, which means that we
574 		 *	 don't know how many masters there are.  The quorum
575 		 *	 calculation will effectively be 1 ( 0 / 2 + 1 ).
576 		 */
577 		rawip->meta.pfs_clid = Hammer2_PfsCLID[i];
578 		rawip->meta.pfs_fsid = Hammer2_PfsFSID[i];
579 		rawip->meta.pfs_type = HAMMER2_PFSTYPE_MASTER;
580 		rawip->meta.op_flags |= HAMMER2_OPFLAG_PFSROOT;
581 
582 		/* first allocatable inode number */
583 		rawip->meta.pfs_inum = 16;
584 
585 		/* rawip->u.blockset is left empty */
586 
587 		/*
588 		 * The root blockref will be stored in the super-root inode as
589 		 * one of the ~4 PFS root directories.  The copyid here is the
590 		 * actual copyid of the storage ref.
591 		 *
592 		 * The key field for a PFS root directory's blockref is
593 		 * essentially the name key for the entry.
594 		 */
595 		root_blockref[i].key = rawip->meta.name_key;
596 		root_blockref[i].copyid = HAMMER2_COPYID_LOCAL;
597 		root_blockref[i].keybits = 0;
598 		root_blockref[i].check.xxhash64.value =
599 				XXH64(rawip, sizeof(*rawip), XXH_HAMMER2_SEED);
600 		root_blockref[i].type = HAMMER2_BREF_TYPE_INODE;
601 		root_blockref[i].methods =
602 				HAMMER2_ENC_CHECK(HAMMER2_CHECK_XXHASH64) |
603 				HAMMER2_ENC_COMP(HAMMER2_COMP_NONE);
604 		root_blockref[i].mirror_tid = 16;
605 		root_blockref[i].flags = HAMMER2_BREF_FLAG_PFSROOT;
606 	}
607 
608 	/*
609 	 * Format the super-root directory inode, giving it ~4 PFS root
610 	 * directories (root_blockref).
611 	 *
612 	 * The superroot contains ~4 directories pointing at the PFS root
613 	 * inodes (named via the label).  Inodes contain one blockset which
614 	 * is fully associative so we can put the entry anywhere without
615 	 * having to worry about the hash.  Use index 0.
616 	 */
617 	rawip = (void *)(buf + (HAMMER2_OFF_MASK_LO & sroot_blockref.data_off));
618 	rawip->meta.version = HAMMER2_INODE_VERSION_ONE;
619 	rawip->meta.ctime = now;
620 	rawip->meta.mtime = now;
621 	/* rawip->meta.atime = now; NOT IMPL MUST BE ZERO */
622 	rawip->meta.btime = now;
623 	rawip->meta.type = HAMMER2_OBJTYPE_DIRECTORY;
624 	rawip->meta.mode = 0700;	/* super-root - root only */
625 	rawip->meta.inum = 0;		/* super root inode, inumber 0 */
626 	rawip->meta.nlinks = 2;		/* directory link count compat */
627 
628 	rawip->meta.name_len = 0;	/* super-root is unnamed */
629 	rawip->meta.name_key = 0;
630 
631 	rawip->meta.comp_algo = HAMMER2_ENC_ALGO(HAMMER2_COMP_AUTOZERO);
632 	rawip->meta.check_algo = HAMMER2_ENC_ALGO(HAMMER2_CHECK_XXHASH64);
633 
634 	/*
635 	 * The super-root is flagged as a PFS and typically given its own
636 	 * random FSID, making it possible to mirror an entire HAMMER2 disk
637 	 * snapshots and all if desired.  PFS ids are used to match up
638 	 * mirror sources and targets and cluster copy sources and targets.
639 	 *
640 	 * (XXX whole-disk logical mirroring is not really supported in
641 	 *  the first attempt because each PFS is in its own modify/mirror
642 	 *  transaction id domain, so normal mechanics cannot cross a PFS
643 	 *  boundary).
644 	 */
645 	rawip->meta.pfs_clid = Hammer2_SupCLID;
646 	rawip->meta.pfs_fsid = Hammer2_SupFSID;
647 	rawip->meta.pfs_type = HAMMER2_PFSTYPE_SUPROOT;
648 	snprintf((char*)rawip->filename, sizeof(rawip->filename), "SUPROOT");
649 	rawip->meta.name_key = 0;
650 	rawip->meta.name_len = strlen((char*)rawip->filename);
651 
652 	/* The super-root has an inode number of 0 */
653 	rawip->meta.pfs_inum = 0;
654 
655 	/*
656 	 * Currently newfs_hammer2 just throws the PFS inodes into the
657 	 * top-level block table at the volume root and doesn't try to
658 	 * create an indirect block, so we are limited to ~4 at filesystem
659 	 * creation time.  More can be added after mounting.
660 	 */
661 	qsort(root_blockref, NLabels, sizeof(root_blockref[0]), blkrefary_cmp);
662 	for (i = 0; i < NLabels; ++i)
663 		rawip->u.blockset.blockref[i] = root_blockref[i];
664 
665 	/*
666 	 * The sroot blockref will be stored in the volume header.
667 	 */
668 	sroot_blockref.copyid = HAMMER2_COPYID_LOCAL;
669 	sroot_blockref.keybits = 0;
670 	sroot_blockref.check.xxhash64.value =
671 				XXH64(rawip, sizeof(*rawip), XXH_HAMMER2_SEED);
672 	sroot_blockref.type = HAMMER2_BREF_TYPE_INODE;
673 	sroot_blockref.methods = HAMMER2_ENC_CHECK(HAMMER2_CHECK_XXHASH64) |
674 			         HAMMER2_ENC_COMP(HAMMER2_COMP_AUTOZERO);
675 	sroot_blockref.mirror_tid = 16;
676 	rawip = NULL;
677 
678 	/*
679 	 * Write out the 64K HAMMER2 block containing the root and sroot.
680 	 */
681 	assert((sroot_blockref.data_off & ~HAMMER2_PBUFMASK64) ==
682 		((alloc_base - 1) & ~HAMMER2_PBUFMASK64));
683 	n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE,
684 		   sroot_blockref.data_off & ~HAMMER2_PBUFMASK64);
685 	if (n != HAMMER2_PBUFSIZE) {
686 		perror("write");
687 		exit(1);
688 	}
689 	*sroot_blockrefp = sroot_blockref;
690 
691 	free(buf);
692 	return(alloc_base);
693 }
694 
695 /*
696  * Create the volume header, the super-root directory inode, and
697  * the writable snapshot subdirectory (named via the label) which
698  * is to be the initial mount point, or at least the first mount point.
699  * newfs_hammer2 doesn't format the freemap bitmaps for these.
700  *
701  * 0                      4MB
702  * [----reserved_area----][boot_area][aux_area]
703  * [[vol_hdr][freemap]...]                     [sroot][root][root]...
704  *     \                                        ^\     ^     ^
705  *      \--------------------------------------/  \---/-----/---...
706  *
707  * NOTE: The total size is 8MB-aligned to avoid edge cases.
708  */
709 static
710 void
711 format_hammer2(hammer2_ondisk_t *fso, int index)
712 {
713 	char *buf = malloc(HAMMER2_PBUFSIZE);
714 	hammer2_volume_t *vol = &fso->volumes[index];
715 	hammer2_volume_data_t *voldata;
716 	hammer2_blockset_t sroot_blockset;
717 	hammer2_off_t boot_base = HAMMER2_ZONE_SEG;
718 	hammer2_off_t aux_base = boot_base + BootAreaSize;
719 	hammer2_off_t alloc_base;
720 	size_t n;
721 	int i;
722 
723 	/*
724 	 * Make sure we can write to the last usable block.
725 	 */
726 	bzero(buf, HAMMER2_PBUFSIZE);
727 	n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE,
728 		   vol->size - HAMMER2_PBUFSIZE);
729 	if (n != HAMMER2_PBUFSIZE) {
730 		perror("write (at-end-of-volume)");
731 		exit(1);
732 	}
733 
734 	/*
735 	 * Format misc area and sroot/root inodes for the root volume.
736 	 */
737 	bzero(&sroot_blockset, sizeof(sroot_blockset));
738 	if (vol->id == HAMMER2_ROOT_VOLUME) {
739 		alloc_base = format_hammer2_misc(vol, boot_base, aux_base);
740 		alloc_base = format_hammer2_inode(vol,
741 						  &sroot_blockset.blockref[0],
742 						  alloc_base);
743 	} else {
744 		alloc_base = 0;
745 		for (i = 0; i < HAMMER2_SET_COUNT; ++i)
746 			sroot_blockset.blockref[i].type = HAMMER2_BREF_TYPE_INVALID;
747 	}
748 
749 	/*
750 	 * Format the volume header.
751 	 *
752 	 * The volume header points to sroot_blockset.  Also be absolutely
753 	 * sure that allocator_beg is set for the root volume.
754 	 */
755 	assert(HAMMER2_VOLUME_BYTES <= HAMMER2_PBUFSIZE);
756 	bzero(buf, HAMMER2_PBUFSIZE);
757 	voldata = (void *)buf;
758 
759 	voldata->magic = HAMMER2_VOLUME_ID_HBO;
760 	if (vol->id == HAMMER2_ROOT_VOLUME) {
761 		voldata->boot_beg = boot_base;
762 		voldata->boot_end = boot_base + BootAreaSize;
763 		voldata->aux_beg = aux_base;
764 		voldata->aux_end = aux_base + AuxAreaSize;
765 	}
766 	voldata->volu_size = vol->size;
767 	voldata->version = Hammer2Version;
768 	voldata->flags = 0;
769 
770 	if (voldata->version >= HAMMER2_VOL_VERSION_MULTI_VOLUMES) {
771 		voldata->volu_id = vol->id;
772 		voldata->nvolumes = fso->nvolumes;
773 		voldata->total_size = fso->total_size;
774 		for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
775 			if (i < fso->nvolumes)
776 				voldata->volu_loff[i] = fso->volumes[i].offset;
777 			else
778 				voldata->volu_loff[i] = (hammer2_off_t)-1;
779 		}
780 	}
781 
782 	voldata->fsid = Hammer2_VolFSID;
783 	voldata->fstype = Hammer2_FSType;
784 
785 	voldata->peer_type = DMSG_PEER_HAMMER2;	/* LNK_CONN identification */
786 
787 	assert(vol->id == HAMMER2_ROOT_VOLUME || alloc_base == 0);
788 	voldata->allocator_size = fso->free_size;
789 	if (vol->id == HAMMER2_ROOT_VOLUME) {
790 		voldata->allocator_free = fso->free_size;
791 		voldata->allocator_beg = alloc_base;
792 	}
793 
794 	voldata->sroot_blockset = sroot_blockset;
795 	voldata->mirror_tid = 16;	/* all blockref mirror TIDs set to 16 */
796 	voldata->freemap_tid = 16;	/* all blockref mirror TIDs set to 16 */
797 	voldata->icrc_sects[HAMMER2_VOL_ICRC_SECT1] =
798 			hammer2_icrc32((char *)voldata + HAMMER2_VOLUME_ICRC1_OFF,
799 				       HAMMER2_VOLUME_ICRC1_SIZE);
800 
801 	/*
802 	 * Set ICRC_SECT0 after all remaining elements of sect0 have been
803 	 * populated in the volume header.  Note hat ICRC_SECT* (except for
804 	 * SECT0) are part of sect0.
805 	 */
806 	voldata->icrc_sects[HAMMER2_VOL_ICRC_SECT0] =
807 			hammer2_icrc32((char *)voldata + HAMMER2_VOLUME_ICRC0_OFF,
808 				       HAMMER2_VOLUME_ICRC0_SIZE);
809 	voldata->icrc_volheader =
810 			hammer2_icrc32((char *)voldata + HAMMER2_VOLUME_ICRCVH_OFF,
811 				       HAMMER2_VOLUME_ICRCVH_SIZE);
812 
813 	/*
814 	 * Write the volume header and all alternates.
815 	 */
816 	for (i = 0; i < HAMMER2_NUM_VOLHDRS; ++i) {
817 		if (i * HAMMER2_ZONE_BYTES64 >= vol->size)
818 			break;
819 		n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE,
820 			   i * HAMMER2_ZONE_BYTES64);
821 		if (n != HAMMER2_PBUFSIZE) {
822 			perror("write");
823 			exit(1);
824 		}
825 	}
826 	fsync(vol->fd);
827 
828 	/*
829 	 * Cleanup
830 	 */
831 	free(buf);
832 }
833 
834 static void
835 alloc_direct(hammer2_off_t *basep, hammer2_blockref_t *bref, size_t bytes)
836 {
837 	int radix;
838 
839 	radix = 0;
840 	assert(bytes);
841 	while ((bytes & 1) == 0) {
842 		bytes >>= 1;
843 		++radix;
844 	}
845 	assert(bytes == 1);
846 	if (radix < HAMMER2_RADIX_MIN)
847 		radix = HAMMER2_RADIX_MIN;
848 
849 	bzero(bref, sizeof(*bref));
850 	bref->data_off = *basep | radix;
851 	bref->vradix = radix;
852 
853 	*basep += 1U << radix;
854 }
855 
856 static int
857 blkrefary_cmp(const void *b1, const void *b2)
858 {
859 	const hammer2_blockref_t *bref1 = b1;
860 	const hammer2_blockref_t *bref2 = b2;
861 
862 	if (bref1->key < bref2->key)
863 		return(-1);
864 	if (bref1->key > bref2->key)
865 		return(1);
866 	return 0;
867 }
868