xref: /freebsd/usr.sbin/makefs/zfs.c (revision 9821e244)
1240afd8cSMark Johnston /*-
2240afd8cSMark Johnston  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3240afd8cSMark Johnston  *
4240afd8cSMark Johnston  * Copyright (c) 2022 The FreeBSD Foundation
5240afd8cSMark Johnston  *
6240afd8cSMark Johnston  * This software was developed by Mark Johnston under sponsorship from
7240afd8cSMark Johnston  * the FreeBSD Foundation.
8240afd8cSMark Johnston  *
9240afd8cSMark Johnston  * Redistribution and use in source and binary forms, with or without
10240afd8cSMark Johnston  * modification, are permitted provided that the following conditions are
11240afd8cSMark Johnston  * met:
12240afd8cSMark Johnston  * 1. Redistributions of source code must retain the above copyright
13240afd8cSMark Johnston  *    notice, this list of conditions and the following disclaimer.
14240afd8cSMark Johnston  * 2. Redistributions in binary form must reproduce the above copyright
15240afd8cSMark Johnston  *    notice, this list of conditions and the following disclaimer in
16240afd8cSMark Johnston  *    the documentation and/or other materials provided with the distribution.
17240afd8cSMark Johnston  *
18240afd8cSMark Johnston  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19240afd8cSMark Johnston  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20240afd8cSMark Johnston  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21240afd8cSMark Johnston  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22240afd8cSMark Johnston  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23240afd8cSMark Johnston  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24240afd8cSMark Johnston  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25240afd8cSMark Johnston  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26240afd8cSMark Johnston  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27240afd8cSMark Johnston  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28240afd8cSMark Johnston  * SUCH DAMAGE.
29240afd8cSMark Johnston  */
30240afd8cSMark Johnston 
31240afd8cSMark Johnston #include <sys/param.h>
32240afd8cSMark Johnston #include <sys/errno.h>
33240afd8cSMark Johnston #include <sys/queue.h>
34240afd8cSMark Johnston 
35240afd8cSMark Johnston #include <assert.h>
36240afd8cSMark Johnston #include <fcntl.h>
37187084ddSMark Johnston #include <stdalign.h>
38240afd8cSMark Johnston #include <stdbool.h>
39240afd8cSMark Johnston #include <stddef.h>
40240afd8cSMark Johnston #include <stdlib.h>
41240afd8cSMark Johnston #include <string.h>
42240afd8cSMark Johnston #include <unistd.h>
43240afd8cSMark Johnston 
44240afd8cSMark Johnston #include <util.h>
45240afd8cSMark Johnston 
46240afd8cSMark Johnston #include "makefs.h"
47240afd8cSMark Johnston #include "zfs.h"
48240afd8cSMark Johnston 
49240afd8cSMark Johnston #define	VDEV_LABEL_SPACE	\
50240afd8cSMark Johnston 	((off_t)(VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE))
51240afd8cSMark Johnston _Static_assert(VDEV_LABEL_SPACE <= MINDEVSIZE, "");
52240afd8cSMark Johnston 
53240afd8cSMark Johnston #define	MINMSSIZE		((off_t)1 << 24) /* 16MB */
54240afd8cSMark Johnston #define	DFLTMSSIZE		((off_t)1 << 29) /* 512MB */
55240afd8cSMark Johnston #define	MAXMSSIZE		((off_t)1 << 34) /* 16GB */
56240afd8cSMark Johnston 
57240afd8cSMark Johnston #define	INDIR_LEVELS		6
58240afd8cSMark Johnston /* Indirect blocks are always 128KB. */
59240afd8cSMark Johnston #define	BLKPTR_PER_INDIR	(MAXBLOCKSIZE / sizeof(blkptr_t))
60240afd8cSMark Johnston 
61240afd8cSMark Johnston struct dnode_cursor {
62240afd8cSMark Johnston 	char		inddir[INDIR_LEVELS][MAXBLOCKSIZE];
63240afd8cSMark Johnston 	off_t		indloc;
64240afd8cSMark Johnston 	off_t		indspace;
65240afd8cSMark Johnston 	dnode_phys_t	*dnode;
66240afd8cSMark Johnston 	off_t		dataoff;
67240afd8cSMark Johnston 	off_t		datablksz;
68240afd8cSMark Johnston };
69240afd8cSMark Johnston 
70240afd8cSMark Johnston void
71240afd8cSMark Johnston zfs_prep_opts(fsinfo_t *fsopts)
72240afd8cSMark Johnston {
73c4d26f02SMark Johnston 	zfs_opt_t *zfs;
74187084ddSMark Johnston 	size_t align;
75187084ddSMark Johnston 
76187084ddSMark Johnston 	align = alignof(uint64_t);
77c4d26f02SMark Johnston 	zfs = aligned_alloc(align, roundup2(sizeof(*zfs), align));
78187084ddSMark Johnston 	if (zfs == NULL)
79187084ddSMark Johnston 		err(1, "aligned_alloc");
80187084ddSMark Johnston 	memset(zfs, 0, sizeof(*zfs));
81240afd8cSMark Johnston 
82240afd8cSMark Johnston 	const option_t zfs_options[] = {
83240afd8cSMark Johnston 		{ '\0', "bootfs", &zfs->bootfs, OPT_STRPTR,
84240afd8cSMark Johnston 		  0, 0, "Bootable dataset" },
85240afd8cSMark Johnston 		{ '\0', "mssize", &zfs->mssize, OPT_INT64,
86240afd8cSMark Johnston 		  MINMSSIZE, MAXMSSIZE, "Metaslab size" },
87240afd8cSMark Johnston 		{ '\0', "poolname", &zfs->poolname, OPT_STRPTR,
88240afd8cSMark Johnston 		  0, 0, "ZFS pool name" },
89240afd8cSMark Johnston 		{ '\0', "rootpath", &zfs->rootpath, OPT_STRPTR,
90240afd8cSMark Johnston 		  0, 0, "Prefix for all dataset mount points" },
91240afd8cSMark Johnston 		{ '\0', "ashift", &zfs->ashift, OPT_INT32,
92240afd8cSMark Johnston 		  MINBLOCKSHIFT, MAXBLOCKSHIFT, "ZFS pool ashift" },
93240afd8cSMark Johnston 		{ '\0', "nowarn", &zfs->nowarn, OPT_BOOL,
94240afd8cSMark Johnston 		  0, 0, "Suppress warning about experimental ZFS support" },
95240afd8cSMark Johnston 		{ .name = NULL }
96240afd8cSMark Johnston 	};
97240afd8cSMark Johnston 
98240afd8cSMark Johnston 	STAILQ_INIT(&zfs->datasetdescs);
99240afd8cSMark Johnston 
100240afd8cSMark Johnston 	fsopts->fs_specific = zfs;
101240afd8cSMark Johnston 	fsopts->fs_options = copy_opts(zfs_options);
102240afd8cSMark Johnston }
103240afd8cSMark Johnston 
104240afd8cSMark Johnston int
105240afd8cSMark Johnston zfs_parse_opts(const char *option, fsinfo_t *fsopts)
106240afd8cSMark Johnston {
107240afd8cSMark Johnston 	zfs_opt_t *zfs;
108240afd8cSMark Johnston 	struct dataset_desc *dsdesc;
109240afd8cSMark Johnston 	char buf[BUFSIZ], *opt, *val;
110240afd8cSMark Johnston 	int rv;
111240afd8cSMark Johnston 
112240afd8cSMark Johnston 	zfs = fsopts->fs_specific;
113240afd8cSMark Johnston 
114240afd8cSMark Johnston 	opt = val = estrdup(option);
115240afd8cSMark Johnston 	opt = strsep(&val, "=");
116240afd8cSMark Johnston 	if (strcmp(opt, "fs") == 0) {
117240afd8cSMark Johnston 		if (val == NULL)
118240afd8cSMark Johnston 			errx(1, "invalid filesystem parameters `%s'", option);
119240afd8cSMark Johnston 
120240afd8cSMark Johnston 		/*
121240afd8cSMark Johnston 		 * Dataset descriptions will be parsed later, in dsl_init().
122240afd8cSMark Johnston 		 * Just stash them away for now.
123240afd8cSMark Johnston 		 */
124240afd8cSMark Johnston 		dsdesc = ecalloc(1, sizeof(*dsdesc));
125240afd8cSMark Johnston 		dsdesc->params = estrdup(val);
126240afd8cSMark Johnston 		free(opt);
127240afd8cSMark Johnston 		STAILQ_INSERT_TAIL(&zfs->datasetdescs, dsdesc, next);
128240afd8cSMark Johnston 		return (1);
129240afd8cSMark Johnston 	}
130240afd8cSMark Johnston 	free(opt);
131240afd8cSMark Johnston 
132240afd8cSMark Johnston 	rv = set_option(fsopts->fs_options, option, buf, sizeof(buf));
133240afd8cSMark Johnston 	return (rv == -1 ? 0 : 1);
134240afd8cSMark Johnston }
135240afd8cSMark Johnston 
136240afd8cSMark Johnston static void
137240afd8cSMark Johnston zfs_size_vdev(fsinfo_t *fsopts)
138240afd8cSMark Johnston {
139240afd8cSMark Johnston 	zfs_opt_t *zfs;
140240afd8cSMark Johnston 	off_t asize, mssize, vdevsize, vdevsize1;
141240afd8cSMark Johnston 
142240afd8cSMark Johnston 	zfs = fsopts->fs_specific;
143240afd8cSMark Johnston 
144240afd8cSMark Johnston 	assert(fsopts->maxsize != 0);
145240afd8cSMark Johnston 	assert(zfs->ashift != 0);
146240afd8cSMark Johnston 
147240afd8cSMark Johnston 	/*
148240afd8cSMark Johnston 	 * Figure out how big the vdev should be.
149240afd8cSMark Johnston 	 */
150240afd8cSMark Johnston 	vdevsize = rounddown2(fsopts->maxsize, 1 << zfs->ashift);
151240afd8cSMark Johnston 	if (vdevsize < MINDEVSIZE)
152240afd8cSMark Johnston 		errx(1, "maximum image size is too small");
153240afd8cSMark Johnston 	if (vdevsize < fsopts->minsize || vdevsize > fsopts->maxsize) {
154240afd8cSMark Johnston 		errx(1, "image size bounds must be multiples of %d",
155240afd8cSMark Johnston 		    1 << zfs->ashift);
156240afd8cSMark Johnston 	}
157240afd8cSMark Johnston 	asize = vdevsize - VDEV_LABEL_SPACE;
158240afd8cSMark Johnston 
159240afd8cSMark Johnston 	/*
160240afd8cSMark Johnston 	 * Size metaslabs according to the following heuristic:
161240afd8cSMark Johnston 	 * - provide at least 8 metaslabs,
162240afd8cSMark Johnston 	 * - without using a metaslab size larger than 512MB.
163240afd8cSMark Johnston 	 * This approximates what OpenZFS does without being complicated.  In
164240afd8cSMark Johnston 	 * practice we expect pools to be expanded upon first use, and OpenZFS
165240afd8cSMark Johnston 	 * does not resize metaslabs in that case, so there is no right answer
166240afd8cSMark Johnston 	 * here.  In general we want to provide large metaslabs even if the
167240afd8cSMark Johnston 	 * image size is small, and 512MB is a reasonable size for pools up to
168240afd8cSMark Johnston 	 * several hundred gigabytes.
169240afd8cSMark Johnston 	 *
170240afd8cSMark Johnston 	 * The user may override this heuristic using the "-o mssize" option.
171240afd8cSMark Johnston 	 */
172240afd8cSMark Johnston 	mssize = zfs->mssize;
173240afd8cSMark Johnston 	if (mssize == 0) {
174240afd8cSMark Johnston 		mssize = MAX(MIN(asize / 8, DFLTMSSIZE), MINMSSIZE);
175240afd8cSMark Johnston 		if (!powerof2(mssize))
176240afd8cSMark Johnston 			mssize = 1l << (flsll(mssize) - 1);
177240afd8cSMark Johnston 	}
178240afd8cSMark Johnston 	if (!powerof2(mssize))
179240afd8cSMark Johnston 		errx(1, "metaslab size must be a power of 2");
180240afd8cSMark Johnston 
181240afd8cSMark Johnston 	/*
182240afd8cSMark Johnston 	 * If we have some slop left over, try to cover it by resizing the vdev,
183240afd8cSMark Johnston 	 * subject to the maxsize and minsize parameters.
184240afd8cSMark Johnston 	 */
185240afd8cSMark Johnston 	if (asize % mssize != 0) {
186240afd8cSMark Johnston 		vdevsize1 = rounddown2(asize, mssize) + VDEV_LABEL_SPACE;
187240afd8cSMark Johnston 		if (vdevsize1 < fsopts->minsize)
188240afd8cSMark Johnston 			vdevsize1 = roundup2(asize, mssize) + VDEV_LABEL_SPACE;
189240afd8cSMark Johnston 		if (vdevsize1 <= fsopts->maxsize)
190240afd8cSMark Johnston 			vdevsize = vdevsize1;
191240afd8cSMark Johnston 	}
192240afd8cSMark Johnston 	asize = vdevsize - VDEV_LABEL_SPACE;
193240afd8cSMark Johnston 
194240afd8cSMark Johnston 	zfs->asize = asize;
195240afd8cSMark Johnston 	zfs->vdevsize = vdevsize;
196240afd8cSMark Johnston 	zfs->mssize = mssize;
197240afd8cSMark Johnston 	zfs->msshift = flsll(mssize) - 1;
198240afd8cSMark Johnston 	zfs->mscount = asize / mssize;
199240afd8cSMark Johnston }
200240afd8cSMark Johnston 
201240afd8cSMark Johnston /*
202240afd8cSMark Johnston  * Validate options and set some default values.
203240afd8cSMark Johnston  */
204240afd8cSMark Johnston static void
205240afd8cSMark Johnston zfs_check_opts(fsinfo_t *fsopts)
206240afd8cSMark Johnston {
207240afd8cSMark Johnston 	zfs_opt_t *zfs;
208240afd8cSMark Johnston 
209240afd8cSMark Johnston 	zfs = fsopts->fs_specific;
210240afd8cSMark Johnston 
211240afd8cSMark Johnston 	if (fsopts->offset != 0)
212240afd8cSMark Johnston 		errx(1, "unhandled offset option");
213240afd8cSMark Johnston 	if (fsopts->maxsize == 0)
214240afd8cSMark Johnston 		errx(1, "an image size must be specified");
215240afd8cSMark Johnston 
216240afd8cSMark Johnston 	if (zfs->poolname == NULL)
217240afd8cSMark Johnston 		errx(1, "a pool name must be specified");
218240afd8cSMark Johnston 
219240afd8cSMark Johnston 	if (zfs->rootpath == NULL)
220240afd8cSMark Johnston 		easprintf(&zfs->rootpath, "/%s", zfs->poolname);
221240afd8cSMark Johnston 	if (zfs->rootpath[0] != '/')
222240afd8cSMark Johnston 		errx(1, "mountpoint `%s' must be absolute", zfs->rootpath);
223240afd8cSMark Johnston 
224240afd8cSMark Johnston 	if (zfs->ashift == 0)
225240afd8cSMark Johnston 		zfs->ashift = 12;
226240afd8cSMark Johnston 
227240afd8cSMark Johnston 	zfs_size_vdev(fsopts);
228240afd8cSMark Johnston }
229240afd8cSMark Johnston 
230240afd8cSMark Johnston void
231240afd8cSMark Johnston zfs_cleanup_opts(fsinfo_t *fsopts)
232240afd8cSMark Johnston {
233240afd8cSMark Johnston 	struct dataset_desc *d, *tmp;
234240afd8cSMark Johnston 	zfs_opt_t *zfs;
235240afd8cSMark Johnston 
236240afd8cSMark Johnston 	zfs = fsopts->fs_specific;
237240afd8cSMark Johnston 	free(zfs->rootpath);
238240afd8cSMark Johnston 	free(zfs->bootfs);
239240afd8cSMark Johnston 	free(__DECONST(void *, zfs->poolname));
240240afd8cSMark Johnston 	STAILQ_FOREACH_SAFE(d, &zfs->datasetdescs, next, tmp) {
241240afd8cSMark Johnston 		free(d->params);
242240afd8cSMark Johnston 		free(d);
243240afd8cSMark Johnston 	}
244240afd8cSMark Johnston 	free(zfs);
245240afd8cSMark Johnston 	free(fsopts->fs_options);
246240afd8cSMark Johnston }
247240afd8cSMark Johnston 
248240afd8cSMark Johnston static size_t
249240afd8cSMark Johnston nvlist_size(const nvlist_t *nvl)
250240afd8cSMark Johnston {
251240afd8cSMark Johnston 	return (sizeof(nvl->nv_header) + nvl->nv_size);
252240afd8cSMark Johnston }
253240afd8cSMark Johnston 
254240afd8cSMark Johnston static void
255240afd8cSMark Johnston nvlist_copy(const nvlist_t *nvl, char *buf, size_t sz)
256240afd8cSMark Johnston {
257240afd8cSMark Johnston 	assert(sz >= nvlist_size(nvl));
258240afd8cSMark Johnston 
259240afd8cSMark Johnston 	memcpy(buf, &nvl->nv_header, sizeof(nvl->nv_header));
260240afd8cSMark Johnston 	memcpy(buf + sizeof(nvl->nv_header), nvl->nv_data, nvl->nv_size);
261240afd8cSMark Johnston }
262240afd8cSMark Johnston 
263240afd8cSMark Johnston static nvlist_t *
264240afd8cSMark Johnston pool_config_nvcreate(zfs_opt_t *zfs)
265240afd8cSMark Johnston {
266240afd8cSMark Johnston 	nvlist_t *featuresnv, *poolnv;
267240afd8cSMark Johnston 
268240afd8cSMark Johnston 	poolnv = nvlist_create(NV_UNIQUE_NAME);
269240afd8cSMark Johnston 	nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_TXG, TXG);
270240afd8cSMark Johnston 	nvlist_add_uint64(poolnv, ZPOOL_CONFIG_VERSION, SPA_VERSION);
271240afd8cSMark Johnston 	nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_STATE, POOL_STATE_EXPORTED);
272240afd8cSMark Johnston 	nvlist_add_string(poolnv, ZPOOL_CONFIG_POOL_NAME, zfs->poolname);
273240afd8cSMark Johnston 	nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_GUID, zfs->poolguid);
274240afd8cSMark Johnston 	nvlist_add_uint64(poolnv, ZPOOL_CONFIG_TOP_GUID, zfs->vdevguid);
275240afd8cSMark Johnston 	nvlist_add_uint64(poolnv, ZPOOL_CONFIG_GUID, zfs->vdevguid);
276240afd8cSMark Johnston 	nvlist_add_uint64(poolnv, ZPOOL_CONFIG_VDEV_CHILDREN, 1);
277240afd8cSMark Johnston 
278240afd8cSMark Johnston 	featuresnv = nvlist_create(NV_UNIQUE_NAME);
279240afd8cSMark Johnston 	nvlist_add_nvlist(poolnv, ZPOOL_CONFIG_FEATURES_FOR_READ, featuresnv);
280240afd8cSMark Johnston 	nvlist_destroy(featuresnv);
281240afd8cSMark Johnston 
282240afd8cSMark Johnston 	return (poolnv);
283240afd8cSMark Johnston }
284240afd8cSMark Johnston 
285240afd8cSMark Johnston static nvlist_t *
286240afd8cSMark Johnston pool_disk_vdev_config_nvcreate(zfs_opt_t *zfs)
287240afd8cSMark Johnston {
288240afd8cSMark Johnston 	nvlist_t *diskvdevnv;
289240afd8cSMark Johnston 
290240afd8cSMark Johnston 	assert(zfs->objarrid != 0);
291240afd8cSMark Johnston 
292240afd8cSMark Johnston 	diskvdevnv = nvlist_create(NV_UNIQUE_NAME);
293240afd8cSMark Johnston 	nvlist_add_string(diskvdevnv, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK);
294240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ASHIFT, zfs->ashift);
295240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ASIZE, zfs->asize);
296240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_GUID, zfs->vdevguid);
297240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ID, 0);
298240afd8cSMark Johnston 	nvlist_add_string(diskvdevnv, ZPOOL_CONFIG_PATH, "/dev/null");
299240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_WHOLE_DISK, 1);
300240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_CREATE_TXG, TXG);
301240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_METASLAB_ARRAY,
302240afd8cSMark Johnston 	    zfs->objarrid);
303240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_METASLAB_SHIFT,
304240afd8cSMark Johnston 	    zfs->msshift);
305240afd8cSMark Johnston 
306240afd8cSMark Johnston 	return (diskvdevnv);
307240afd8cSMark Johnston }
308240afd8cSMark Johnston 
309240afd8cSMark Johnston static nvlist_t *
310240afd8cSMark Johnston pool_root_vdev_config_nvcreate(zfs_opt_t *zfs)
311240afd8cSMark Johnston {
312240afd8cSMark Johnston 	nvlist_t *diskvdevnv, *rootvdevnv;
313240afd8cSMark Johnston 
314240afd8cSMark Johnston 	diskvdevnv = pool_disk_vdev_config_nvcreate(zfs);
315240afd8cSMark Johnston 	rootvdevnv = nvlist_create(NV_UNIQUE_NAME);
316240afd8cSMark Johnston 
317240afd8cSMark Johnston 	nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_ID, 0);
318240afd8cSMark Johnston 	nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_GUID, zfs->poolguid);
319240afd8cSMark Johnston 	nvlist_add_string(rootvdevnv, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT);
320240afd8cSMark Johnston 	nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_CREATE_TXG, TXG);
321240afd8cSMark Johnston 	nvlist_add_nvlist_array(rootvdevnv, ZPOOL_CONFIG_CHILDREN, &diskvdevnv,
322240afd8cSMark Johnston 	    1);
323240afd8cSMark Johnston 	nvlist_destroy(diskvdevnv);
324240afd8cSMark Johnston 
325240afd8cSMark Johnston 	return (rootvdevnv);
326240afd8cSMark Johnston }
327240afd8cSMark Johnston 
328240afd8cSMark Johnston /*
329240afd8cSMark Johnston  * Create the pool's "config" object, which contains an nvlist describing pool
330240afd8cSMark Johnston  * parameters and the vdev topology.  It is similar but not identical to the
331240afd8cSMark Johnston  * nvlist stored in vdev labels.  The main difference is that vdev labels do not
332240afd8cSMark Johnston  * describe the full vdev tree and in particular do not contain the "root"
333240afd8cSMark Johnston  * meta-vdev.
334240afd8cSMark Johnston  */
335240afd8cSMark Johnston static void
336240afd8cSMark Johnston pool_init_objdir_config(zfs_opt_t *zfs, zfs_zap_t *objdir)
337240afd8cSMark Johnston {
338240afd8cSMark Johnston 	dnode_phys_t *dnode;
339240afd8cSMark Johnston 	nvlist_t *poolconfig, *vdevconfig;
340240afd8cSMark Johnston 	void *configbuf;
341240afd8cSMark Johnston 	uint64_t dnid;
342240afd8cSMark Johnston 	off_t configloc, configblksz;
343240afd8cSMark Johnston 	int error;
344240afd8cSMark Johnston 
345240afd8cSMark Johnston 	dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_PACKED_NVLIST,
346240afd8cSMark Johnston 	    DMU_OT_PACKED_NVLIST_SIZE, sizeof(uint64_t), &dnid);
347240afd8cSMark Johnston 
348240afd8cSMark Johnston 	poolconfig = pool_config_nvcreate(zfs);
349240afd8cSMark Johnston 
350240afd8cSMark Johnston 	vdevconfig = pool_root_vdev_config_nvcreate(zfs);
351240afd8cSMark Johnston 	nvlist_add_nvlist(poolconfig, ZPOOL_CONFIG_VDEV_TREE, vdevconfig);
352240afd8cSMark Johnston 	nvlist_destroy(vdevconfig);
353240afd8cSMark Johnston 
354240afd8cSMark Johnston 	error = nvlist_export(poolconfig);
355240afd8cSMark Johnston 	if (error != 0)
356240afd8cSMark Johnston 		errc(1, error, "nvlist_export");
357240afd8cSMark Johnston 
358240afd8cSMark Johnston 	configblksz = nvlist_size(poolconfig);
359240afd8cSMark Johnston 	configloc = objset_space_alloc(zfs, zfs->mos, &configblksz);
360240afd8cSMark Johnston 	configbuf = ecalloc(1, configblksz);
361240afd8cSMark Johnston 	nvlist_copy(poolconfig, configbuf, configblksz);
362240afd8cSMark Johnston 
363240afd8cSMark Johnston 	vdev_pwrite_dnode_data(zfs, dnode, configbuf, configblksz, configloc);
364240afd8cSMark Johnston 
365240afd8cSMark Johnston 	dnode->dn_datablkszsec = configblksz >> MINBLOCKSHIFT;
366240afd8cSMark Johnston 	dnode->dn_flags = DNODE_FLAG_USED_BYTES;
367240afd8cSMark Johnston 	*(uint64_t *)DN_BONUS(dnode) = nvlist_size(poolconfig);
368240afd8cSMark Johnston 
369240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_CONFIG, dnid);
370240afd8cSMark Johnston 
371240afd8cSMark Johnston 	nvlist_destroy(poolconfig);
372240afd8cSMark Johnston 	free(configbuf);
373240afd8cSMark Johnston }
374240afd8cSMark Johnston 
375240afd8cSMark Johnston /*
376240afd8cSMark Johnston  * Add objects block pointer list objects, used for deferred frees.  We don't do
377240afd8cSMark Johnston  * anything with them, but they need to be present or OpenZFS will refuse to
378240afd8cSMark Johnston  * import the pool.
379240afd8cSMark Johnston  */
380240afd8cSMark Johnston static void
381240afd8cSMark Johnston pool_init_objdir_bplists(zfs_opt_t *zfs __unused, zfs_zap_t *objdir)
382240afd8cSMark Johnston {
383240afd8cSMark Johnston 	uint64_t dnid;
384240afd8cSMark Johnston 
385240afd8cSMark Johnston 	(void)objset_dnode_bonus_alloc(zfs->mos, DMU_OT_BPOBJ, DMU_OT_BPOBJ_HDR,
386240afd8cSMark Johnston 	    BPOBJ_SIZE_V2, &dnid);
387240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_FREE_BPOBJ, dnid);
388240afd8cSMark Johnston 
389240afd8cSMark Johnston 	(void)objset_dnode_bonus_alloc(zfs->mos, DMU_OT_BPOBJ, DMU_OT_BPOBJ_HDR,
390240afd8cSMark Johnston 	    BPOBJ_SIZE_V2, &dnid);
391240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_SYNC_BPLIST, dnid);
392240afd8cSMark Johnston }
393240afd8cSMark Johnston 
394240afd8cSMark Johnston /*
395240afd8cSMark Johnston  * Add required feature metadata objects.  We don't know anything about ZFS
396240afd8cSMark Johnston  * features, so the objects are just empty ZAPs.
397240afd8cSMark Johnston  */
398240afd8cSMark Johnston static void
399240afd8cSMark Johnston pool_init_objdir_feature_maps(zfs_opt_t *zfs, zfs_zap_t *objdir)
400240afd8cSMark Johnston {
401240afd8cSMark Johnston 	dnode_phys_t *dnode;
402240afd8cSMark Johnston 	uint64_t dnid;
403240afd8cSMark Johnston 
404240afd8cSMark Johnston 	dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid);
405240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_FEATURES_FOR_READ, dnid);
406240afd8cSMark Johnston 	zap_write(zfs, zap_alloc(zfs->mos, dnode));
407240afd8cSMark Johnston 
408240afd8cSMark Johnston 	dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid);
409240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_FEATURES_FOR_WRITE, dnid);
410240afd8cSMark Johnston 	zap_write(zfs, zap_alloc(zfs->mos, dnode));
411240afd8cSMark Johnston 
412240afd8cSMark Johnston 	dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid);
413240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_FEATURE_DESCRIPTIONS, dnid);
414240afd8cSMark Johnston 	zap_write(zfs, zap_alloc(zfs->mos, dnode));
415240afd8cSMark Johnston }
416240afd8cSMark Johnston 
417240afd8cSMark Johnston static void
418240afd8cSMark Johnston pool_init_objdir_dsl(zfs_opt_t *zfs, zfs_zap_t *objdir)
419240afd8cSMark Johnston {
420240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_ROOT_DATASET,
421240afd8cSMark Johnston 	    dsl_dir_id(zfs->rootdsldir));
422240afd8cSMark Johnston }
423240afd8cSMark Johnston 
424240afd8cSMark Johnston static void
425240afd8cSMark Johnston pool_init_objdir_poolprops(zfs_opt_t *zfs, zfs_zap_t *objdir)
426240afd8cSMark Johnston {
427240afd8cSMark Johnston 	dnode_phys_t *dnode;
428240afd8cSMark Johnston 	uint64_t id;
429240afd8cSMark Johnston 
430240afd8cSMark Johnston 	dnode = objset_dnode_alloc(zfs->mos, DMU_OT_POOL_PROPS, &id);
431240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_PROPS, id);
432240afd8cSMark Johnston 
433240afd8cSMark Johnston 	zfs->poolprops = zap_alloc(zfs->mos, dnode);
434240afd8cSMark Johnston }
435240afd8cSMark Johnston 
436240afd8cSMark Johnston /*
437240afd8cSMark Johnston  * Initialize the MOS object directory, the root of virtually all of the pool's
438240afd8cSMark Johnston  * data and metadata.
439240afd8cSMark Johnston  */
440240afd8cSMark Johnston static void
441240afd8cSMark Johnston pool_init_objdir(zfs_opt_t *zfs)
442240afd8cSMark Johnston {
443240afd8cSMark Johnston 	zfs_zap_t *zap;
444240afd8cSMark Johnston 	dnode_phys_t *objdir;
445240afd8cSMark Johnston 
446240afd8cSMark Johnston 	objdir = objset_dnode_lookup(zfs->mos, DMU_POOL_DIRECTORY_OBJECT);
447240afd8cSMark Johnston 
448240afd8cSMark Johnston 	zap = zap_alloc(zfs->mos, objdir);
449240afd8cSMark Johnston 	pool_init_objdir_config(zfs, zap);
450240afd8cSMark Johnston 	pool_init_objdir_bplists(zfs, zap);
451240afd8cSMark Johnston 	pool_init_objdir_feature_maps(zfs, zap);
452240afd8cSMark Johnston 	pool_init_objdir_dsl(zfs, zap);
453240afd8cSMark Johnston 	pool_init_objdir_poolprops(zfs, zap);
454240afd8cSMark Johnston 	zap_write(zfs, zap);
455240afd8cSMark Johnston }
456240afd8cSMark Johnston 
457240afd8cSMark Johnston /*
458240afd8cSMark Johnston  * Initialize the meta-object set (MOS) and immediately write out several
459240afd8cSMark Johnston  * special objects whose contents are already finalized, including the object
460240afd8cSMark Johnston  * directory.
461240afd8cSMark Johnston  *
462240afd8cSMark Johnston  * Once the MOS is finalized, it'll look roughly like this:
463240afd8cSMark Johnston  *
464240afd8cSMark Johnston  *	object directory (ZAP)
465240afd8cSMark Johnston  *	|-> vdev config object (nvlist)
466240afd8cSMark Johnston  *	|-> features for read
467240afd8cSMark Johnston  *	|-> features for write
468240afd8cSMark Johnston  *	|-> feature descriptions
469240afd8cSMark Johnston  *	|-> sync bplist
470240afd8cSMark Johnston  *	|-> free bplist
471240afd8cSMark Johnston  *	|-> pool properties
472240afd8cSMark Johnston  *	L-> root DSL directory
473240afd8cSMark Johnston  *	    |-> DSL child directory (ZAP)
474240afd8cSMark Johnston  *	    |   |-> $MOS (DSL dir)
475240afd8cSMark Johnston  *	    |   |   |-> child map
476240afd8cSMark Johnston  *	    |   |   L-> props (ZAP)
477240afd8cSMark Johnston  *	    |   |-> $FREE (DSL dir)
478240afd8cSMark Johnston  *	    |   |   |-> child map
479240afd8cSMark Johnston  *	    |   |   L-> props (ZAP)
480240afd8cSMark Johnston  *	    |   |-> $ORIGIN (DSL dir)
481240afd8cSMark Johnston  *	    |   |   |-> child map
482240afd8cSMark Johnston  *	    |   |   |-> dataset
483240afd8cSMark Johnston  *	    |   |   |   L-> deadlist
484240afd8cSMark Johnston  *	    |   |   |-> snapshot
485240afd8cSMark Johnston  *	    |   |   |   |-> deadlist
486240afd8cSMark Johnston  *	    |   |   |   L-> snapshot names
487240afd8cSMark Johnston  *	    |   |   |-> props (ZAP)
488240afd8cSMark Johnston  *	    |   |   L-> clones (ZAP)
489240afd8cSMark Johnston  *	    |   |-> dataset 1 (DSL dir)
490240afd8cSMark Johnston  *	    |   |   |-> DSL dataset
491240afd8cSMark Johnston  *	    |   |   |   |-> snapshot names
492240afd8cSMark Johnston  *	    |   |   |   L-> deadlist
493240afd8cSMark Johnston  *	    |   |   |-> child map
494240afd8cSMark Johnston  *	    |   |   |   L-> ...
495240afd8cSMark Johnston  *	    |   |   L-> props
496240afd8cSMark Johnston  *	    |   |-> dataset 2
497240afd8cSMark Johnston  *	    |   |   L-> ...
498240afd8cSMark Johnston  *	    |   |-> ...
499240afd8cSMark Johnston  *	    |   L-> dataset n
500240afd8cSMark Johnston  *	    |-> DSL root dataset
501240afd8cSMark Johnston  *	    |   |-> snapshot names
502240afd8cSMark Johnston  *	    |   L-> deadlist
503240afd8cSMark Johnston  *	    L-> props (ZAP)
504240afd8cSMark Johnston  *	space map object array
505240afd8cSMark Johnston  *	|-> space map 1
506240afd8cSMark Johnston  *	|-> space map 2
507240afd8cSMark Johnston  *	|-> ...
508240afd8cSMark Johnston  *	L-> space map n (zfs->mscount)
509240afd8cSMark Johnston  *
510240afd8cSMark Johnston  * The space map object array is pointed to by the "msarray" property in the
511240afd8cSMark Johnston  * pool configuration.
512240afd8cSMark Johnston  */
513240afd8cSMark Johnston static void
514240afd8cSMark Johnston pool_init(zfs_opt_t *zfs)
515240afd8cSMark Johnston {
516240afd8cSMark Johnston 	uint64_t dnid;
517240afd8cSMark Johnston 
518240afd8cSMark Johnston 	zfs->poolguid = ((uint64_t)random() << 32) | random();
519240afd8cSMark Johnston 	zfs->vdevguid = ((uint64_t)random() << 32) | random();
520240afd8cSMark Johnston 
521240afd8cSMark Johnston 	zfs->mos = objset_alloc(zfs, DMU_OST_META);
522240afd8cSMark Johnston 
523240afd8cSMark Johnston 	(void)objset_dnode_alloc(zfs->mos, DMU_OT_OBJECT_DIRECTORY, &dnid);
524240afd8cSMark Johnston 	assert(dnid == DMU_POOL_DIRECTORY_OBJECT);
525240afd8cSMark Johnston 
526240afd8cSMark Johnston 	(void)objset_dnode_alloc(zfs->mos, DMU_OT_OBJECT_ARRAY, &zfs->objarrid);
527240afd8cSMark Johnston 
528240afd8cSMark Johnston 	dsl_init(zfs);
529240afd8cSMark Johnston 
530240afd8cSMark Johnston 	pool_init_objdir(zfs);
531240afd8cSMark Johnston }
532240afd8cSMark Johnston 
533240afd8cSMark Johnston static void
534240afd8cSMark Johnston pool_labels_write(zfs_opt_t *zfs)
535240afd8cSMark Johnston {
536240afd8cSMark Johnston 	uberblock_t *ub;
537240afd8cSMark Johnston 	vdev_label_t *label;
538240afd8cSMark Johnston 	nvlist_t *poolconfig, *vdevconfig;
539240afd8cSMark Johnston 	int error;
540240afd8cSMark Johnston 
541240afd8cSMark Johnston 	label = ecalloc(1, sizeof(*label));
542240afd8cSMark Johnston 
543240afd8cSMark Johnston 	/*
544240afd8cSMark Johnston 	 * Assemble the vdev configuration and store it in the label.
545240afd8cSMark Johnston 	 */
546240afd8cSMark Johnston 	poolconfig = pool_config_nvcreate(zfs);
547240afd8cSMark Johnston 	vdevconfig = pool_disk_vdev_config_nvcreate(zfs);
548240afd8cSMark Johnston 	nvlist_add_nvlist(poolconfig, ZPOOL_CONFIG_VDEV_TREE, vdevconfig);
549240afd8cSMark Johnston 	nvlist_destroy(vdevconfig);
550240afd8cSMark Johnston 
551240afd8cSMark Johnston 	error = nvlist_export(poolconfig);
552240afd8cSMark Johnston 	if (error != 0)
553240afd8cSMark Johnston 		errc(1, error, "nvlist_export");
554240afd8cSMark Johnston 	nvlist_copy(poolconfig, label->vl_vdev_phys.vp_nvlist,
555240afd8cSMark Johnston 	    sizeof(label->vl_vdev_phys.vp_nvlist));
556240afd8cSMark Johnston 	nvlist_destroy(poolconfig);
557240afd8cSMark Johnston 
558240afd8cSMark Johnston 	/*
559240afd8cSMark Johnston 	 * Fill out the uberblock.  Just make each one the same.  The embedded
560240afd8cSMark Johnston 	 * checksum is calculated in vdev_label_write().
561240afd8cSMark Johnston 	 */
562240afd8cSMark Johnston 	for (size_t uoff = 0; uoff < sizeof(label->vl_uberblock);
563240afd8cSMark Johnston 	    uoff += (1 << zfs->ashift)) {
564240afd8cSMark Johnston 		ub = (uberblock_t *)(&label->vl_uberblock[0] + uoff);
565240afd8cSMark Johnston 		ub->ub_magic = UBERBLOCK_MAGIC;
566240afd8cSMark Johnston 		ub->ub_version = SPA_VERSION;
567240afd8cSMark Johnston 		ub->ub_txg = TXG;
568240afd8cSMark Johnston 		ub->ub_guid_sum = zfs->poolguid + zfs->vdevguid;
569240afd8cSMark Johnston 		ub->ub_timestamp = 0;
570240afd8cSMark Johnston 
571240afd8cSMark Johnston 		ub->ub_software_version = SPA_VERSION;
572240afd8cSMark Johnston 		ub->ub_mmp_magic = MMP_MAGIC;
573240afd8cSMark Johnston 		ub->ub_mmp_delay = 0;
574240afd8cSMark Johnston 		ub->ub_mmp_config = 0;
575240afd8cSMark Johnston 		ub->ub_checkpoint_txg = 0;
576240afd8cSMark Johnston 		objset_root_blkptr_copy(zfs->mos, &ub->ub_rootbp);
577240afd8cSMark Johnston 	}
578240afd8cSMark Johnston 
579240afd8cSMark Johnston 	/*
580240afd8cSMark Johnston 	 * Write out four copies of the label: two at the beginning of the vdev
581240afd8cSMark Johnston 	 * and two at the end.
582240afd8cSMark Johnston 	 */
583240afd8cSMark Johnston 	for (int i = 0; i < VDEV_LABELS; i++)
584240afd8cSMark Johnston 		vdev_label_write(zfs, i, label);
585240afd8cSMark Johnston 
586240afd8cSMark Johnston 	free(label);
587240afd8cSMark Johnston }
588240afd8cSMark Johnston 
589240afd8cSMark Johnston static void
590240afd8cSMark Johnston pool_fini(zfs_opt_t *zfs)
591240afd8cSMark Johnston {
592240afd8cSMark Johnston 	zap_write(zfs, zfs->poolprops);
593240afd8cSMark Johnston 	dsl_write(zfs);
594240afd8cSMark Johnston 	objset_write(zfs, zfs->mos);
595240afd8cSMark Johnston 	pool_labels_write(zfs);
596240afd8cSMark Johnston }
597240afd8cSMark Johnston 
598240afd8cSMark Johnston struct dnode_cursor *
599240afd8cSMark Johnston dnode_cursor_init(zfs_opt_t *zfs, zfs_objset_t *os, dnode_phys_t *dnode,
600240afd8cSMark Johnston     off_t size, off_t blksz)
601240afd8cSMark Johnston {
602240afd8cSMark Johnston 	struct dnode_cursor *c;
603240afd8cSMark Johnston 	uint64_t nbppindir, indlevel, ndatablks, nindblks;
604240afd8cSMark Johnston 
605240afd8cSMark Johnston 	assert(dnode->dn_nblkptr == 1);
606240afd8cSMark Johnston 	assert(blksz <= MAXBLOCKSIZE);
607240afd8cSMark Johnston 
608240afd8cSMark Johnston 	if (blksz == 0) {
609240afd8cSMark Johnston 		/* Must be between 1<<ashift and 128KB. */
610240afd8cSMark Johnston 		blksz = MIN(MAXBLOCKSIZE, MAX(1 << zfs->ashift,
6119821e244SJohn Baldwin 		    powerof2(size) ? size : (1l << flsll(size))));
612240afd8cSMark Johnston 	}
613240afd8cSMark Johnston 	assert(powerof2(blksz));
614240afd8cSMark Johnston 
615240afd8cSMark Johnston 	/*
616240afd8cSMark Johnston 	 * Do we need indirect blocks?  Figure out how many levels are needed
617240afd8cSMark Johnston 	 * (indlevel == 1 means no indirect blocks) and how much space is needed
618240afd8cSMark Johnston 	 * (it has to be allocated up-front to break the dependency cycle
619240afd8cSMark Johnston 	 * described in objset_write()).
620240afd8cSMark Johnston 	 */
621240afd8cSMark Johnston 	ndatablks = size == 0 ? 0 : howmany(size, blksz);
622240afd8cSMark Johnston 	nindblks = 0;
623240afd8cSMark Johnston 	for (indlevel = 1, nbppindir = 1; ndatablks > nbppindir; indlevel++) {
624240afd8cSMark Johnston 		nbppindir *= BLKPTR_PER_INDIR;
625240afd8cSMark Johnston 		nindblks += howmany(ndatablks, indlevel * nbppindir);
626240afd8cSMark Johnston 	}
627240afd8cSMark Johnston 	assert(indlevel < INDIR_LEVELS);
628240afd8cSMark Johnston 
629240afd8cSMark Johnston 	dnode->dn_nlevels = (uint8_t)indlevel;
630240afd8cSMark Johnston 	dnode->dn_maxblkid = ndatablks > 0 ? ndatablks - 1 : 0;
631240afd8cSMark Johnston 	dnode->dn_datablkszsec = blksz >> MINBLOCKSHIFT;
632240afd8cSMark Johnston 
633240afd8cSMark Johnston 	c = ecalloc(1, sizeof(*c));
634240afd8cSMark Johnston 	if (nindblks > 0) {
635240afd8cSMark Johnston 		c->indspace = nindblks * MAXBLOCKSIZE;
636240afd8cSMark Johnston 		c->indloc = objset_space_alloc(zfs, os, &c->indspace);
637240afd8cSMark Johnston 	}
638240afd8cSMark Johnston 	c->dnode = dnode;
639240afd8cSMark Johnston 	c->dataoff = 0;
640240afd8cSMark Johnston 	c->datablksz = blksz;
641240afd8cSMark Johnston 
642240afd8cSMark Johnston 	return (c);
643240afd8cSMark Johnston }
644240afd8cSMark Johnston 
645240afd8cSMark Johnston static void
646240afd8cSMark Johnston _dnode_cursor_flush(zfs_opt_t *zfs, struct dnode_cursor *c, int levels)
647240afd8cSMark Johnston {
648240afd8cSMark Johnston 	blkptr_t *bp, *pbp;
649240afd8cSMark Johnston 	void *buf;
650240afd8cSMark Johnston 	uint64_t fill;
651240afd8cSMark Johnston 	off_t blkid, blksz, loc;
652240afd8cSMark Johnston 
653240afd8cSMark Johnston 	assert(levels > 0);
654240afd8cSMark Johnston 	assert(levels <= c->dnode->dn_nlevels - 1);
655240afd8cSMark Johnston 
656240afd8cSMark Johnston 	blksz = MAXBLOCKSIZE;
657240afd8cSMark Johnston 	blkid = (c->dataoff / c->datablksz) / BLKPTR_PER_INDIR;
658240afd8cSMark Johnston 	for (int level = 1; level <= levels; level++) {
659240afd8cSMark Johnston 		buf = c->inddir[level - 1];
660240afd8cSMark Johnston 
661240afd8cSMark Johnston 		if (level == c->dnode->dn_nlevels - 1) {
662240afd8cSMark Johnston 			pbp = &c->dnode->dn_blkptr[0];
663240afd8cSMark Johnston 		} else {
664240afd8cSMark Johnston 			uint64_t iblkid;
665240afd8cSMark Johnston 
666240afd8cSMark Johnston 			iblkid = blkid & (BLKPTR_PER_INDIR - 1);
667240afd8cSMark Johnston 			pbp = (blkptr_t *)
668240afd8cSMark Johnston 			    &c->inddir[level][iblkid * sizeof(blkptr_t)];
669240afd8cSMark Johnston 		}
670240afd8cSMark Johnston 
671240afd8cSMark Johnston 		/*
672240afd8cSMark Johnston 		 * Space for indirect blocks is allocated up-front; see the
673240afd8cSMark Johnston 		 * comment in objset_write().
674240afd8cSMark Johnston 		 */
675240afd8cSMark Johnston 		loc = c->indloc;
676240afd8cSMark Johnston 		c->indloc += blksz;
677240afd8cSMark Johnston 		assert(c->indspace >= blksz);
678240afd8cSMark Johnston 		c->indspace -= blksz;
679240afd8cSMark Johnston 
680240afd8cSMark Johnston 		bp = buf;
681240afd8cSMark Johnston 		fill = 0;
682240afd8cSMark Johnston 		for (size_t i = 0; i < BLKPTR_PER_INDIR; i++)
683240afd8cSMark Johnston 			fill += BP_GET_FILL(&bp[i]);
684240afd8cSMark Johnston 
685240afd8cSMark Johnston 		vdev_pwrite_dnode_indir(zfs, c->dnode, level, fill, buf, blksz,
686240afd8cSMark Johnston 		    loc, pbp);
687240afd8cSMark Johnston 		memset(buf, 0, MAXBLOCKSIZE);
688240afd8cSMark Johnston 
689240afd8cSMark Johnston 		blkid /= BLKPTR_PER_INDIR;
690240afd8cSMark Johnston 	}
691240afd8cSMark Johnston }
692240afd8cSMark Johnston 
693240afd8cSMark Johnston blkptr_t *
694240afd8cSMark Johnston dnode_cursor_next(zfs_opt_t *zfs, struct dnode_cursor *c, off_t off)
695240afd8cSMark Johnston {
696240afd8cSMark Johnston 	off_t blkid, l1id;
697240afd8cSMark Johnston 	int levels;
698240afd8cSMark Johnston 
699240afd8cSMark Johnston 	if (c->dnode->dn_nlevels == 1) {
700240afd8cSMark Johnston 		assert(off < MAXBLOCKSIZE);
701240afd8cSMark Johnston 		return (&c->dnode->dn_blkptr[0]);
702240afd8cSMark Johnston 	}
703240afd8cSMark Johnston 
704240afd8cSMark Johnston 	assert(off % c->datablksz == 0);
705240afd8cSMark Johnston 
706240afd8cSMark Johnston 	/* Do we need to flush any full indirect blocks? */
707240afd8cSMark Johnston 	if (off > 0) {
708240afd8cSMark Johnston 		blkid = off / c->datablksz;
709240afd8cSMark Johnston 		for (levels = 0; levels < c->dnode->dn_nlevels - 1; levels++) {
710240afd8cSMark Johnston 			if (blkid % BLKPTR_PER_INDIR != 0)
711240afd8cSMark Johnston 				break;
712240afd8cSMark Johnston 			blkid /= BLKPTR_PER_INDIR;
713240afd8cSMark Johnston 		}
714240afd8cSMark Johnston 		if (levels > 0)
715240afd8cSMark Johnston 			_dnode_cursor_flush(zfs, c, levels);
716240afd8cSMark Johnston 	}
717240afd8cSMark Johnston 
718240afd8cSMark Johnston 	c->dataoff = off;
719240afd8cSMark Johnston 	l1id = (off / c->datablksz) & (BLKPTR_PER_INDIR - 1);
720240afd8cSMark Johnston 	return ((blkptr_t *)&c->inddir[0][l1id * sizeof(blkptr_t)]);
721240afd8cSMark Johnston }
722240afd8cSMark Johnston 
723240afd8cSMark Johnston void
724240afd8cSMark Johnston dnode_cursor_finish(zfs_opt_t *zfs, struct dnode_cursor *c)
725240afd8cSMark Johnston {
726240afd8cSMark Johnston 	int levels;
727240afd8cSMark Johnston 
728240afd8cSMark Johnston 	levels = c->dnode->dn_nlevels - 1;
729240afd8cSMark Johnston 	if (levels > 0)
730240afd8cSMark Johnston 		_dnode_cursor_flush(zfs, c, levels);
731240afd8cSMark Johnston 	assert(c->indspace == 0);
732240afd8cSMark Johnston 	free(c);
733240afd8cSMark Johnston }
734240afd8cSMark Johnston 
735240afd8cSMark Johnston void
736240afd8cSMark Johnston zfs_makefs(const char *image, const char *dir, fsnode *root, fsinfo_t *fsopts)
737240afd8cSMark Johnston {
738240afd8cSMark Johnston 	zfs_opt_t *zfs;
739240afd8cSMark Johnston 	int dirfd;
740240afd8cSMark Johnston 
741240afd8cSMark Johnston 	zfs = fsopts->fs_specific;
742240afd8cSMark Johnston 
743240afd8cSMark Johnston 	/*
744240afd8cSMark Johnston 	 * Use a fixed seed to provide reproducible pseudo-random numbers for
745240afd8cSMark Johnston 	 * on-disk structures when needed (e.g., GUIDs, ZAP hash salts).
746240afd8cSMark Johnston 	 */
747240afd8cSMark Johnston 	srandom(1729);
748240afd8cSMark Johnston 
749240afd8cSMark Johnston 	zfs_check_opts(fsopts);
750240afd8cSMark Johnston 
751240afd8cSMark Johnston 	if (!zfs->nowarn) {
752240afd8cSMark Johnston 		fprintf(stderr,
753240afd8cSMark Johnston 		    "ZFS support is currently considered experimental. "
754240afd8cSMark Johnston 		    "Do not use it for anything critical.\n");
755240afd8cSMark Johnston 	}
756240afd8cSMark Johnston 
757240afd8cSMark Johnston 	dirfd = open(dir, O_DIRECTORY | O_RDONLY);
758240afd8cSMark Johnston 	if (dirfd < 0)
759240afd8cSMark Johnston 		err(1, "open(%s)", dir);
760240afd8cSMark Johnston 
761240afd8cSMark Johnston 	vdev_init(zfs, image);
762240afd8cSMark Johnston 	pool_init(zfs);
763240afd8cSMark Johnston 	fs_build(zfs, dirfd, root);
764240afd8cSMark Johnston 	pool_fini(zfs);
765240afd8cSMark Johnston 	vdev_fini(zfs);
766240afd8cSMark Johnston }
767