1 /*
2  * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
3  * Copyright (c) 2011-2022 The DragonFly Project.  All rights reserved.
4  *
5  * This code is derived from software contributed to The DragonFly Project
6  * by Matthew Dillon <dillon@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/time.h>
38 #include <sys/sysctl.h>
39 
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <stddef.h>
43 #include <stdint.h>
44 #include <unistd.h>
45 #include <string.h>
46 #include <fcntl.h>
47 #include <assert.h>
48 #include <err.h>
49 #include <uuid.h>
50 
51 #include <vfs/hammer2/hammer2_disk.h>
52 #include <vfs/hammer2/hammer2_xxhash.h>
53 
54 #include "mkfs_hammer2.h"
55 #include "hammer2_subs.h"
56 
57 static uint64_t nowtime(void);
58 static int blkrefary_cmp(const void *b1, const void *b2);
59 static void alloc_direct(hammer2_off_t *basep, hammer2_blockref_t *bref,
60 				size_t bytes);
61 
62 static int
get_hammer2_version(void)63 get_hammer2_version(void)
64 {
65 	int version = HAMMER2_VOL_VERSION_DEFAULT;
66 	size_t olen = sizeof(version);
67 
68 	if (sysctlbyname("vfs.hammer2.supported_version",
69 			 &version, &olen, NULL, 0) == 0) {
70 		if (version >= HAMMER2_VOL_VERSION_WIP) {
71 			version = HAMMER2_VOL_VERSION_WIP - 1;
72 			fprintf(stderr,
73 				"newfs_hammer2: WARNING: HAMMER2 VFS "
74 				"supports higher version than I "
75 				"understand.\n"
76 				"Using default version %d\n",
77 				version);
78 		}
79 	} else {
80 		fprintf(stderr,
81 			"newfs_hammer2: WARNING: HAMMER2 VFS not "
82 			"loaded, cannot get version info.\n"
83 			"Using default version %d\n",
84 			version);
85 	}
86 	return(version);
87 }
88 
89 void
hammer2_mkfs_init(hammer2_mkfs_options_t * opt)90 hammer2_mkfs_init(hammer2_mkfs_options_t *opt)
91 {
92 	uint32_t status;
93 
94 	memset(opt, 0, sizeof(*opt));
95 
96 	opt->Hammer2Version = get_hammer2_version();
97 	opt->Label[opt->NLabels++] = strdup("LOCAL");
98 	opt->CompType = HAMMER2_COMP_DEFAULT; /* LZ4 */
99 	opt->CheckType = HAMMER2_CHECK_DEFAULT; /* xxhash64 */
100 	opt->DefaultLabelType = HAMMER2_LABEL_NONE;
101 
102 	/*
103 	 * Generate a filesystem id and lookup the filesystem type
104 	 */
105 	srandomdev();
106 	uuid_create(&opt->Hammer2_VolFSID, NULL);
107 	uuid_create(&opt->Hammer2_SupCLID, NULL);
108 	uuid_create(&opt->Hammer2_SupFSID, NULL);
109 	uuid_from_string(HAMMER2_UUID_STRING, &opt->Hammer2_FSType, &status);
110 	/*uuid_name_lookup(&Hammer2_FSType, "DragonFly HAMMER2", &status);*/
111 	if (status != uuid_s_ok) {
112 		errx(1, "uuids file does not have the DragonFly "
113 			"HAMMER2 filesystem type");
114 	}
115 }
116 
117 void
hammer2_mkfs_cleanup(hammer2_mkfs_options_t * opt)118 hammer2_mkfs_cleanup(hammer2_mkfs_options_t *opt)
119 {
120 	int i;
121 
122 	for (i = 0; i < opt->NLabels; i++)
123 		free(opt->Label[i]);
124 }
125 
126 static void
adjust_options(hammer2_ondisk_t * fso,hammer2_mkfs_options_t * opt)127 adjust_options(hammer2_ondisk_t *fso, hammer2_mkfs_options_t *opt)
128 {
129 	/*
130 	 * Adjust Label[] and NLabels.
131 	 */
132 	switch (opt->DefaultLabelType) {
133 	case HAMMER2_LABEL_BOOT:
134 		opt->Label[opt->NLabels++] = strdup("BOOT");
135 		break;
136 	case HAMMER2_LABEL_ROOT:
137 		opt->Label[opt->NLabels++] = strdup("ROOT");
138 		break;
139 	case HAMMER2_LABEL_DATA:
140 		opt->Label[opt->NLabels++] = strdup("DATA");
141 		break;
142 	case HAMMER2_LABEL_NONE:
143 		/* nothing to do */
144 		break;
145 	default:
146 		assert(0);
147 		break;
148 	}
149 
150 	/*
151 	 * Calculate defaults for the boot area size and round to the
152 	 * volume alignment boundary.
153 	 *
154 	 * NOTE: These areas are currently not used for booting but are
155 	 *	 reserved for future filesystem expansion.
156 	 */
157 	hammer2_off_t BootAreaSize = opt->BootAreaSize;
158 	if (BootAreaSize == 0) {
159 		BootAreaSize = HAMMER2_BOOT_NOM_BYTES;
160 		while (BootAreaSize > fso->total_size / 20)
161 			BootAreaSize >>= 1;
162 		if (BootAreaSize < HAMMER2_BOOT_MIN_BYTES)
163 			BootAreaSize = HAMMER2_BOOT_MIN_BYTES;
164 	} else if (BootAreaSize < HAMMER2_BOOT_MIN_BYTES) {
165 		BootAreaSize = HAMMER2_BOOT_MIN_BYTES;
166 	}
167 	BootAreaSize = (BootAreaSize + HAMMER2_VOLUME_ALIGNMASK64) &
168 		        ~HAMMER2_VOLUME_ALIGNMASK64;
169 	opt->BootAreaSize = BootAreaSize;
170 
171 	/*
172 	 * Calculate defaults for the aux area size and round to the
173 	 * volume alignment boundary.
174 	 *
175 	 * NOTE: These areas are currently not used for logging but are
176 	 *	 reserved for future filesystem expansion.
177 	 */
178 	hammer2_off_t AuxAreaSize = opt->AuxAreaSize;
179 	if (AuxAreaSize == 0) {
180 		AuxAreaSize = HAMMER2_AUX_NOM_BYTES;
181 		while (AuxAreaSize > fso->total_size / 20)
182 			AuxAreaSize >>= 1;
183 		if (AuxAreaSize < HAMMER2_AUX_MIN_BYTES)
184 			AuxAreaSize = HAMMER2_AUX_MIN_BYTES;
185 	} else if (AuxAreaSize < HAMMER2_AUX_MIN_BYTES) {
186 		AuxAreaSize = HAMMER2_AUX_MIN_BYTES;
187 	}
188 	AuxAreaSize = (AuxAreaSize + HAMMER2_VOLUME_ALIGNMASK64) &
189 		       ~HAMMER2_VOLUME_ALIGNMASK64;
190 	opt->AuxAreaSize = AuxAreaSize;
191 }
192 
193 /*
194  * Convert a string to a 64 bit signed integer with various requirements.
195  */
196 int64_t
getsize(const char * str,int64_t minval,int64_t maxval,int powerof2)197 getsize(const char *str, int64_t minval, int64_t maxval, int powerof2)
198 {
199 	int64_t val;
200 	char *ptr;
201 
202 	val = strtoll(str, &ptr, 0);
203 	switch(*ptr) {
204 	case 't':
205 	case 'T':
206 		val *= 1024;
207 		/* fall through */
208 	case 'g':
209 	case 'G':
210 		val *= 1024;
211 		/* fall through */
212 	case 'm':
213 	case 'M':
214 		val *= 1024;
215 		/* fall through */
216 	case 'k':
217 	case 'K':
218 		val *= 1024;
219 		break;
220 	default:
221 		errx(1, "Unknown suffix in number '%s'", str);
222 		/* not reached */
223 	}
224 	if (ptr[1]) {
225 		errx(1, "Unknown suffix in number '%s'", str);
226 		/* not reached */
227 	}
228 	if (val < minval) {
229 		errx(1, "Value too small: %s, min is %s",
230 		     str, sizetostr(minval));
231 		/* not reached */
232 	}
233 	if (val > maxval) {
234 		errx(1, "Value too large: %s, max is %s",
235 		     str, sizetostr(maxval));
236 		/* not reached */
237 	}
238 	if ((powerof2 & 1) && (val ^ (val - 1)) != ((val << 1) - 1)) {
239 		errx(1, "Value not power of 2: %s", str);
240 		/* not reached */
241 	}
242 	if ((powerof2 & 2) && (val & HAMMER2_NEWFS_ALIGNMASK)) {
243 		errx(1, "Value not an integral multiple of %dK: %s",
244 		     HAMMER2_NEWFS_ALIGN / 1024, str);
245 		/* not reached */
246 	}
247 	return(val);
248 }
249 
250 static uint64_t
nowtime(void)251 nowtime(void)
252 {
253 	struct timeval tv;
254 	uint64_t xtime;
255 
256 	gettimeofday(&tv, NULL);
257 	xtime = tv.tv_sec * 1000000LL + tv.tv_usec;
258 	return(xtime);
259 }
260 
261 static hammer2_off_t
format_hammer2_misc(hammer2_volume_t * vol,hammer2_mkfs_options_t * opt,hammer2_off_t boot_base,hammer2_off_t aux_base)262 format_hammer2_misc(hammer2_volume_t *vol, hammer2_mkfs_options_t *opt,
263 		    hammer2_off_t boot_base, hammer2_off_t aux_base)
264 {
265 	char *buf = malloc(HAMMER2_PBUFSIZE);
266 	hammer2_off_t alloc_base = aux_base + opt->AuxAreaSize;
267 	hammer2_off_t tmp_base;
268 	size_t n;
269 	int i;
270 
271 	/*
272 	 * Clear the entire 4MB reserve for the first 2G zone.
273 	 */
274 	bzero(buf, HAMMER2_PBUFSIZE);
275 	tmp_base = 0;
276 	for (i = 0; i < HAMMER2_ZONE_BLOCKS_SEG; ++i) {
277 		n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE, tmp_base);
278 		if (n != HAMMER2_PBUFSIZE) {
279 			perror("write");
280 			exit(1);
281 		}
282 		tmp_base += HAMMER2_PBUFSIZE;
283 	}
284 
285 	/*
286 	 * Make sure alloc_base won't cross the reserved area at the
287 	 * beginning of each 1GB.
288 	 *
289 	 * Reserve space for the super-root inode and the root inode.
290 	 * Make sure they are in the same 64K block to simplify our code.
291 	 */
292 	assert((alloc_base & HAMMER2_PBUFMASK) == 0);
293 	assert(alloc_base < HAMMER2_FREEMAP_LEVEL1_SIZE);
294 
295 	/*
296 	 * Clear the boot/aux area.
297 	 */
298 	for (tmp_base = boot_base; tmp_base < alloc_base;
299 	     tmp_base += HAMMER2_PBUFSIZE) {
300 		n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE, tmp_base);
301 		if (n != HAMMER2_PBUFSIZE) {
302 			perror("write (boot/aux)");
303 			exit(1);
304 		}
305 	}
306 
307 	free(buf);
308 	return(alloc_base);
309 }
310 
311 static hammer2_off_t
format_hammer2_inode(hammer2_volume_t * vol,hammer2_mkfs_options_t * opt,hammer2_blockref_t * sroot_blockrefp,hammer2_off_t alloc_base)312 format_hammer2_inode(hammer2_volume_t *vol, hammer2_mkfs_options_t *opt,
313 		     hammer2_blockref_t *sroot_blockrefp,
314 		     hammer2_off_t alloc_base)
315 {
316 	char *buf = malloc(HAMMER2_PBUFSIZE);
317 	hammer2_inode_data_t *rawip;
318 	hammer2_blockref_t sroot_blockref;
319 	hammer2_blockref_t root_blockref[MAXLABELS];
320 	uint64_t now;
321 	size_t n;
322 	int i;
323 
324 	bzero(buf, HAMMER2_PBUFSIZE);
325 	bzero(&sroot_blockref, sizeof(sroot_blockref));
326 	bzero(root_blockref, sizeof(root_blockref));
327 	now = nowtime();
328 	alloc_base &= ~HAMMER2_PBUFMASK64;
329 	alloc_direct(&alloc_base, &sroot_blockref, HAMMER2_INODE_BYTES);
330 
331 	for (i = 0; i < opt->NLabels; ++i) {
332 		uuid_create(&opt->Hammer2_PfsCLID[i], NULL);
333 		uuid_create(&opt->Hammer2_PfsFSID[i], NULL);
334 
335 		alloc_direct(&alloc_base, &root_blockref[i],
336 			     HAMMER2_INODE_BYTES);
337 		assert(((sroot_blockref.data_off ^ root_blockref[i].data_off) &
338 			~HAMMER2_PBUFMASK64) == 0);
339 
340 		/*
341 		 * Format the root directory inode, which is left empty.
342 		 */
343 		rawip = (void *)(buf + (HAMMER2_OFF_MASK_LO &
344 					root_blockref[i].data_off));
345 		rawip->meta.version = HAMMER2_INODE_VERSION_ONE;
346 		rawip->meta.ctime = now;
347 		rawip->meta.mtime = now;
348 		/* rawip->atime = now; NOT IMPL MUST BE ZERO */
349 		rawip->meta.btime = now;
350 		rawip->meta.type = HAMMER2_OBJTYPE_DIRECTORY;
351 		rawip->meta.mode = 0755;
352 		rawip->meta.inum = 1;	/* root inode, inumber 1 */
353 		rawip->meta.nlinks = 1;	/* directory link count compat */
354 
355 		rawip->meta.name_len = strlen(opt->Label[i]);
356 		bcopy(opt->Label[i], rawip->filename, rawip->meta.name_len);
357 		rawip->meta.name_key =
358 				dirhash((char *)rawip->filename, rawip->meta.name_len);
359 
360 		/*
361 		 * Compression mode and supported copyids.
362 		 *
363 		 * Do not allow compression when creating any "BOOT" label
364 		 * (pfs-create also does the same if the pfs is named "BOOT")
365 		 */
366 		if (strcasecmp(opt->Label[i], "BOOT") == 0) {
367 			rawip->meta.comp_algo = HAMMER2_ENC_ALGO(
368 						    HAMMER2_COMP_AUTOZERO);
369 			rawip->meta.check_algo = HAMMER2_ENC_ALGO(
370 						    HAMMER2_CHECK_XXHASH64);
371 		} else {
372 			rawip->meta.comp_algo = HAMMER2_ENC_ALGO(
373 						    opt->CompType);
374 			rawip->meta.check_algo = HAMMER2_ENC_ALGO(
375 						    opt->CheckType);
376 		}
377 
378 		/*
379 		 * NOTE: We leave nmasters set to 0, which means that we
380 		 *	 don't know how many masters there are.  The quorum
381 		 *	 calculation will effectively be 1 ( 0 / 2 + 1 ).
382 		 */
383 		rawip->meta.pfs_clid = opt->Hammer2_PfsCLID[i];
384 		rawip->meta.pfs_fsid = opt->Hammer2_PfsFSID[i];
385 		rawip->meta.pfs_type = HAMMER2_PFSTYPE_MASTER;
386 		rawip->meta.op_flags |= HAMMER2_OPFLAG_PFSROOT;
387 
388 		/* first allocatable inode number */
389 		rawip->meta.pfs_inum = 16;
390 
391 		/* rawip->u.blockset is left empty */
392 
393 		/*
394 		 * The root blockref will be stored in the super-root inode as
395 		 * one of the ~4 PFS root directories.  The copyid here is the
396 		 * actual copyid of the storage ref.
397 		 *
398 		 * The key field for a PFS root directory's blockref is
399 		 * essentially the name key for the entry.
400 		 */
401 		root_blockref[i].key = rawip->meta.name_key;
402 		root_blockref[i].copyid = HAMMER2_COPYID_LOCAL;
403 		root_blockref[i].keybits = 0;
404 		root_blockref[i].check.xxhash64.value =
405 				XXH64(rawip, sizeof(*rawip), XXH_HAMMER2_SEED);
406 		root_blockref[i].type = HAMMER2_BREF_TYPE_INODE;
407 		root_blockref[i].methods =
408 				HAMMER2_ENC_CHECK(HAMMER2_CHECK_XXHASH64) |
409 				HAMMER2_ENC_COMP(HAMMER2_COMP_NONE);
410 		root_blockref[i].mirror_tid = 16;
411 		root_blockref[i].flags = HAMMER2_BREF_FLAG_PFSROOT;
412 	}
413 
414 	/*
415 	 * Format the super-root directory inode, giving it ~4 PFS root
416 	 * directories (root_blockref).
417 	 *
418 	 * The superroot contains ~4 directories pointing at the PFS root
419 	 * inodes (named via the label).  Inodes contain one blockset which
420 	 * is fully associative so we can put the entry anywhere without
421 	 * having to worry about the hash.  Use index 0.
422 	 */
423 	rawip = (void *)(buf + (HAMMER2_OFF_MASK_LO & sroot_blockref.data_off));
424 	rawip->meta.version = HAMMER2_INODE_VERSION_ONE;
425 	rawip->meta.ctime = now;
426 	rawip->meta.mtime = now;
427 	/* rawip->meta.atime = now; NOT IMPL MUST BE ZERO */
428 	rawip->meta.btime = now;
429 	rawip->meta.type = HAMMER2_OBJTYPE_DIRECTORY;
430 	rawip->meta.mode = 0700;	/* super-root - root only */
431 	rawip->meta.inum = 0;		/* super root inode, inumber 0 */
432 	rawip->meta.nlinks = 2;		/* directory link count compat */
433 
434 	rawip->meta.name_len = 0;	/* super-root is unnamed */
435 	rawip->meta.name_key = 0;
436 
437 	rawip->meta.comp_algo = HAMMER2_ENC_ALGO(HAMMER2_COMP_AUTOZERO);
438 	rawip->meta.check_algo = HAMMER2_ENC_ALGO(HAMMER2_CHECK_XXHASH64);
439 
440 	/*
441 	 * The super-root is flagged as a PFS and typically given its own
442 	 * random FSID, making it possible to mirror an entire HAMMER2 disk
443 	 * snapshots and all if desired.  PFS ids are used to match up
444 	 * mirror sources and targets and cluster copy sources and targets.
445 	 *
446 	 * (XXX whole-disk logical mirroring is not really supported in
447 	 *  the first attempt because each PFS is in its own modify/mirror
448 	 *  transaction id domain, so normal mechanics cannot cross a PFS
449 	 *  boundary).
450 	 */
451 	rawip->meta.pfs_clid = opt->Hammer2_SupCLID;
452 	rawip->meta.pfs_fsid = opt->Hammer2_SupFSID;
453 	rawip->meta.pfs_type = HAMMER2_PFSTYPE_SUPROOT;
454 	snprintf((char*)rawip->filename, sizeof(rawip->filename), "SUPROOT");
455 	rawip->meta.name_key = 0;
456 	rawip->meta.name_len = strlen((char*)rawip->filename);
457 
458 	/* The super-root has an inode number of 0 */
459 	rawip->meta.pfs_inum = 0;
460 
461 	/*
462 	 * Currently newfs_hammer2 just throws the PFS inodes into the
463 	 * top-level block table at the volume root and doesn't try to
464 	 * create an indirect block, so we are limited to ~4 at filesystem
465 	 * creation time.  More can be added after mounting.
466 	 */
467 	qsort(root_blockref, opt->NLabels, sizeof(root_blockref[0]), blkrefary_cmp);
468 	for (i = 0; i < opt->NLabels; ++i)
469 		rawip->u.blockset.blockref[i] = root_blockref[i];
470 
471 	/*
472 	 * The sroot blockref will be stored in the volume header.
473 	 */
474 	sroot_blockref.copyid = HAMMER2_COPYID_LOCAL;
475 	sroot_blockref.keybits = 0;
476 	sroot_blockref.check.xxhash64.value =
477 				XXH64(rawip, sizeof(*rawip), XXH_HAMMER2_SEED);
478 	sroot_blockref.type = HAMMER2_BREF_TYPE_INODE;
479 	sroot_blockref.methods = HAMMER2_ENC_CHECK(HAMMER2_CHECK_XXHASH64) |
480 			         HAMMER2_ENC_COMP(HAMMER2_COMP_AUTOZERO);
481 	sroot_blockref.mirror_tid = 16;
482 	rawip = NULL;
483 
484 	/*
485 	 * Write out the 64K HAMMER2 block containing the root and sroot.
486 	 */
487 	assert((sroot_blockref.data_off & ~HAMMER2_PBUFMASK64) ==
488 		((alloc_base - 1) & ~HAMMER2_PBUFMASK64));
489 	n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE,
490 		   sroot_blockref.data_off & ~HAMMER2_PBUFMASK64);
491 	if (n != HAMMER2_PBUFSIZE) {
492 		perror("write");
493 		exit(1);
494 	}
495 	*sroot_blockrefp = sroot_blockref;
496 
497 	free(buf);
498 	return(alloc_base);
499 }
500 
501 /*
502  * Create the volume header, the super-root directory inode, and
503  * the writable snapshot subdirectory (named via the label) which
504  * is to be the initial mount point, or at least the first mount point.
505  * newfs_hammer2 doesn't format the freemap bitmaps for these.
506  *
507  * 0                      4MB
508  * [----reserved_area----][boot_area][aux_area]
509  * [[vol_hdr][freemap]...]                     [sroot][root][root]...
510  *     \                                        ^\     ^     ^
511  *      \--------------------------------------/  \---/-----/---...
512  *
513  * NOTE: The total size is 8MB-aligned to avoid edge cases.
514  */
515 static void
format_hammer2(hammer2_ondisk_t * fso,hammer2_mkfs_options_t * opt,int index)516 format_hammer2(hammer2_ondisk_t *fso, hammer2_mkfs_options_t *opt, int index)
517 {
518 	char *buf = malloc(HAMMER2_PBUFSIZE);
519 	hammer2_volume_t *vol = &fso->volumes[index];
520 	hammer2_volume_data_t *voldata;
521 	hammer2_blockset_t sroot_blockset;
522 	hammer2_off_t boot_base = HAMMER2_ZONE_SEG;
523 	hammer2_off_t aux_base = boot_base + opt->BootAreaSize;
524 	hammer2_off_t alloc_base;
525 	size_t n;
526 	int i;
527 
528 	/*
529 	 * Make sure we can write to the last usable block.
530 	 */
531 	bzero(buf, HAMMER2_PBUFSIZE);
532 	n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE,
533 		   vol->size - HAMMER2_PBUFSIZE);
534 	if (n != HAMMER2_PBUFSIZE) {
535 		perror("write (at-end-of-volume)");
536 		exit(1);
537 	}
538 
539 	/*
540 	 * Format misc area and sroot/root inodes for the root volume.
541 	 */
542 	bzero(&sroot_blockset, sizeof(sroot_blockset));
543 	if (vol->id == HAMMER2_ROOT_VOLUME) {
544 		alloc_base = format_hammer2_misc(vol, opt, boot_base, aux_base);
545 		alloc_base = format_hammer2_inode(vol, opt,
546 						  &sroot_blockset.blockref[0],
547 						  alloc_base);
548 	} else {
549 		alloc_base = 0;
550 		for (i = 0; i < HAMMER2_SET_COUNT; ++i)
551 			sroot_blockset.blockref[i].type = HAMMER2_BREF_TYPE_INVALID;
552 	}
553 
554 	/*
555 	 * Format the volume header.
556 	 *
557 	 * The volume header points to sroot_blockset.  Also be absolutely
558 	 * sure that allocator_beg is set for the root volume.
559 	 */
560 	assert(HAMMER2_VOLUME_BYTES <= HAMMER2_PBUFSIZE);
561 	bzero(buf, HAMMER2_PBUFSIZE);
562 	voldata = (void *)buf;
563 
564 	voldata->magic = HAMMER2_VOLUME_ID_HBO;
565 	if (vol->id == HAMMER2_ROOT_VOLUME) {
566 		voldata->boot_beg = boot_base;
567 		voldata->boot_end = boot_base + opt->BootAreaSize;
568 		voldata->aux_beg = aux_base;
569 		voldata->aux_end = aux_base + opt->AuxAreaSize;
570 	}
571 	voldata->volu_size = vol->size;
572 	voldata->version = opt->Hammer2Version;
573 	voldata->flags = 0;
574 
575 	if (voldata->version >= HAMMER2_VOL_VERSION_MULTI_VOLUMES) {
576 		voldata->volu_id = vol->id;
577 		voldata->nvolumes = fso->nvolumes;
578 		voldata->total_size = fso->total_size;
579 		for (i = 0; i < HAMMER2_MAX_VOLUMES; ++i) {
580 			if (i < fso->nvolumes)
581 				voldata->volu_loff[i] = fso->volumes[i].offset;
582 			else
583 				voldata->volu_loff[i] = (hammer2_off_t)-1;
584 		}
585 	}
586 
587 	voldata->fsid = opt->Hammer2_VolFSID;
588 	voldata->fstype = opt->Hammer2_FSType;
589 
590 	voldata->peer_type = DMSG_PEER_HAMMER2;	/* LNK_CONN identification */
591 
592 	assert(vol->id == HAMMER2_ROOT_VOLUME || alloc_base == 0);
593 	voldata->allocator_size = fso->free_size;
594 	if (vol->id == HAMMER2_ROOT_VOLUME) {
595 		voldata->allocator_free = fso->free_size;
596 		voldata->allocator_beg = alloc_base;
597 	}
598 
599 	voldata->sroot_blockset = sroot_blockset;
600 	voldata->mirror_tid = 16;	/* all blockref mirror TIDs set to 16 */
601 	voldata->freemap_tid = 16;	/* all blockref mirror TIDs set to 16 */
602 	voldata->icrc_sects[HAMMER2_VOL_ICRC_SECT1] =
603 			hammer2_icrc32((char *)voldata + HAMMER2_VOLUME_ICRC1_OFF,
604 				       HAMMER2_VOLUME_ICRC1_SIZE);
605 
606 	/*
607 	 * Set ICRC_SECT0 after all remaining elements of sect0 have been
608 	 * populated in the volume header.  Note hat ICRC_SECT* (except for
609 	 * SECT0) are part of sect0.
610 	 */
611 	voldata->icrc_sects[HAMMER2_VOL_ICRC_SECT0] =
612 			hammer2_icrc32((char *)voldata + HAMMER2_VOLUME_ICRC0_OFF,
613 				       HAMMER2_VOLUME_ICRC0_SIZE);
614 	voldata->icrc_volheader =
615 			hammer2_icrc32((char *)voldata + HAMMER2_VOLUME_ICRCVH_OFF,
616 				       HAMMER2_VOLUME_ICRCVH_SIZE);
617 
618 	/*
619 	 * Write the volume header and all alternates.
620 	 */
621 	for (i = 0; i < HAMMER2_NUM_VOLHDRS; ++i) {
622 		if (i * HAMMER2_ZONE_BYTES64 >= vol->size)
623 			break;
624 		n = pwrite(vol->fd, buf, HAMMER2_PBUFSIZE,
625 			   i * HAMMER2_ZONE_BYTES64);
626 		if (n != HAMMER2_PBUFSIZE) {
627 			perror("write");
628 			exit(1);
629 		}
630 	}
631 	fsync(vol->fd);
632 
633 	/*
634 	 * Cleanup
635 	 */
636 	free(buf);
637 }
638 
639 static void
alloc_direct(hammer2_off_t * basep,hammer2_blockref_t * bref,size_t bytes)640 alloc_direct(hammer2_off_t *basep, hammer2_blockref_t *bref, size_t bytes)
641 {
642 	int radix;
643 
644 	radix = 0;
645 	assert(bytes);
646 	while ((bytes & 1) == 0) {
647 		bytes >>= 1;
648 		++radix;
649 	}
650 	assert(bytes == 1);
651 	if (radix < HAMMER2_RADIX_MIN)
652 		radix = HAMMER2_RADIX_MIN;
653 
654 	bzero(bref, sizeof(*bref));
655 	bref->data_off = *basep | radix;
656 	bref->vradix = radix;
657 
658 	*basep += 1U << radix;
659 }
660 
661 static int
blkrefary_cmp(const void * b1,const void * b2)662 blkrefary_cmp(const void *b1, const void *b2)
663 {
664 	const hammer2_blockref_t *bref1 = b1;
665 	const hammer2_blockref_t *bref2 = b2;
666 
667 	if (bref1->key < bref2->key)
668 		return(-1);
669 	if (bref1->key > bref2->key)
670 		return(1);
671 	return 0;
672 }
673 
674 void
hammer2_mkfs(int ac,char ** av,hammer2_mkfs_options_t * opt)675 hammer2_mkfs(int ac, char **av, hammer2_mkfs_options_t *opt)
676 {
677 	hammer2_off_t resid = 0, reserved_size;
678 	hammer2_ondisk_t fso;
679 	int i;
680 	char *vol_fsid = NULL;
681 	char *sup_clid_name = NULL;
682 	char *sup_fsid_name = NULL;
683 	char *pfs_clid_name = NULL;
684 	char *pfs_fsid_name = NULL;
685 
686 	/*
687 	 * Sanity check basic filesystem structures.  No cookies for us
688 	 * if it gets broken!
689 	 */
690 	assert(sizeof(hammer2_volume_data_t) == HAMMER2_VOLUME_BYTES);
691 	assert(sizeof(hammer2_inode_data_t) == HAMMER2_INODE_BYTES);
692 	assert(sizeof(hammer2_blockref_t) == HAMMER2_BLOCKREF_BYTES);
693 
694 	/*
695 	 * Construct volumes information.
696 	 * 1GB alignment (level1 freemap size) for volumes except for the last.
697 	 * For the last volume, typically 8MB alignment to avoid edge cases for
698 	 * reserved blocks and so raid stripes (if any) operate efficiently.
699 	 */
700 	hammer2_init_ondisk(&fso);
701 	fso.version = opt->Hammer2Version;
702 	fso.nvolumes = ac;
703 
704 	assert(ac >= 1);
705 	if (opt->NFileSystemSizes == 1) {
706 		resid = opt->FileSystemSize[0];
707 		assert(resid >= HAMMER2_FREEMAP_LEVEL1_SIZE);
708 	} else if (opt->NFileSystemSizes > 1) {
709 		if (ac != opt->NFileSystemSizes)
710 			errx(1, "Invalid filesystem size count %d vs %d",
711 			    opt->NFileSystemSizes, ac);
712 	}
713 
714 	for (i = 0; i < fso.nvolumes; ++i) {
715 		hammer2_volume_t *vol = &fso.volumes[i];
716 		hammer2_off_t size;
717 		int fd = open(av[i], O_RDWR);
718 		if (fd < 0)
719 			err(1, "Unable to open %s R+W", av[i]);
720 		size = check_volume(fd);
721 
722 		/*
723 		 * Limit size if a smaller filesystem size is specified.
724 		 */
725 		if (opt->NFileSystemSizes == 1) {
726 			if (resid == 0)
727 				errx(1, "No remaining filesystem size for %s",
728 				    av[i]);
729 			if (size > resid)
730 				size = resid;
731 			resid -= size;
732 		} else if (opt->NFileSystemSizes > 1) {
733 			resid = opt->FileSystemSize[i];
734 			assert(resid >= HAMMER2_FREEMAP_LEVEL1_SIZE);
735 			if (size > resid)
736 				size = resid;
737 		}
738 
739 		assert(size > 0);
740 		if (i == fso.nvolumes - 1)
741 			size &= ~HAMMER2_VOLUME_ALIGNMASK64;
742 		else
743 			size &= ~HAMMER2_FREEMAP_LEVEL1_MASK;
744 		hammer2_install_volume(vol, fd, i, av[i], fso.total_size, size);
745 		fso.total_size += size;
746 	}
747 
748 	/*
749 	 * Verify volumes constructed above.
750 	 */
751 	for (i = 0; i < fso.nvolumes; ++i) {
752 		hammer2_volume_t *vol = &fso.volumes[i];
753 		printf("Volume %-15s size %s\n", vol->path,
754 		       sizetostr(vol->size));
755 	}
756 	hammer2_verify_volumes(&fso, NULL);
757 
758 	/*
759 	 * Adjust options.
760 	 */
761 	adjust_options(&fso, opt);
762 
763 	/*
764 	 * We'll need to stuff this in the volume header soon.
765 	 */
766 	hammer2_uuid_to_str(&opt->Hammer2_VolFSID, &vol_fsid);
767 	hammer2_uuid_to_str(&opt->Hammer2_SupCLID, &sup_clid_name);
768 	hammer2_uuid_to_str(&opt->Hammer2_SupFSID, &sup_fsid_name);
769 
770 	/*
771 	 * Calculate the amount of reserved space.  HAMMER2_ZONE_SEG (4MB)
772 	 * is reserved at the beginning of every 1GB of storage, rounded up.
773 	 * Thus a 200MB filesystem will still have a 4MB reserve area.
774 	 *
775 	 * We also include the boot and aux areas in the reserve.  The
776 	 * reserve is used to help 'df' calculate the amount of available
777 	 * space.
778 	 *
779 	 * XXX I kinda screwed up and made the reserved area on the LEVEL1
780 	 *     boundary rather than the ZONE boundary.  LEVEL1 is on 1GB
781 	 *     boundaries rather than 2GB boundaries.  Stick with the LEVEL1
782 	 *     boundary.
783 	 */
784 	reserved_size = ((fso.total_size + HAMMER2_FREEMAP_LEVEL1_MASK) /
785 			  HAMMER2_FREEMAP_LEVEL1_SIZE) * HAMMER2_ZONE_SEG64;
786 
787 	fso.free_size = fso.total_size - reserved_size - opt->BootAreaSize - opt->AuxAreaSize;
788 	if ((int64_t)fso.free_size < 0) {
789 		fprintf(stderr, "Not enough free space\n");
790 		exit(1);
791 	}
792 
793 	/*
794 	 * Format HAMMER2 volumes.
795 	 */
796 	for (i = 0; i < fso.nvolumes; ++i)
797 		format_hammer2(&fso, opt, i);
798 
799 	printf("---------------------------------------------\n");
800 	printf("version:          %d\n", opt->Hammer2Version);
801 	printf("total-size:       %s (%jd bytes)\n",
802 	       sizetostr(fso.total_size),
803 	       (intmax_t)fso.total_size);
804 	printf("boot-area-size:   %s (%jd bytes)\n",
805 	       sizetostr(opt->BootAreaSize),
806 	       (intmax_t)opt->BootAreaSize);
807 	printf("aux-area-size:    %s (%jd bytes)\n",
808 	       sizetostr(opt->AuxAreaSize),
809 	       (intmax_t)opt->AuxAreaSize);
810 	printf("topo-reserved:    %s (%jd bytes)\n",
811 	       sizetostr(reserved_size),
812 	       (intmax_t)reserved_size);
813 	printf("free-size:        %s (%jd bytes)\n",
814 	       sizetostr(fso.free_size),
815 	       (intmax_t)fso.free_size);
816 	printf("vol-fsid:         %s\n", vol_fsid);
817 	printf("sup-clid:         %s\n", sup_clid_name);
818 	printf("sup-fsid:         %s\n", sup_fsid_name);
819 	for (i = 0; i < opt->NLabels; ++i) {
820 		printf("PFS \"%s\"\n", opt->Label[i]);
821 		hammer2_uuid_to_str(&opt->Hammer2_PfsCLID[i], &pfs_clid_name);
822 		hammer2_uuid_to_str(&opt->Hammer2_PfsFSID[i], &pfs_fsid_name);
823 		printf("    clid %s\n", pfs_clid_name);
824 		printf("    fsid %s\n", pfs_fsid_name);
825 	}
826 	if (opt->DebugOpt) {
827 		printf("---------------------------------------------\n");
828 		hammer2_print_volumes(&fso);
829 	}
830 
831 	free(vol_fsid);
832 	free(sup_clid_name);
833 	free(sup_fsid_name);
834 	free(pfs_clid_name);
835 	free(pfs_fsid_name);
836 
837 	for (i = 0; i < fso.nvolumes; ++i)
838 		hammer2_uninstall_volume(&fso.volumes[i]);
839 }
840