xref: /freebsd/stand/libsa/zfs/zfs.c (revision 315ee00f)
1 /*-
2  * Copyright (c) 2007 Doug Rabson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 /*
29  *	Stand-alone file reading package.
30  */
31 
32 #include <stand.h>
33 #include <sys/disk.h>
34 #include <sys/param.h>
35 #include <sys/time.h>
36 #include <sys/queue.h>
37 #include <part.h>
38 #include <stddef.h>
39 #include <stdarg.h>
40 #include <string.h>
41 #include <bootstrap.h>
42 
43 #include "libzfs.h"
44 
45 #include "zfsimpl.c"
46 
47 /* Define the range of indexes to be populated with ZFS Boot Environments */
48 #define		ZFS_BE_FIRST	4
49 #define		ZFS_BE_LAST	8
50 
51 static int	zfs_open(const char *path, struct open_file *f);
52 static int	zfs_close(struct open_file *f);
53 static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
54 static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
55 static int	zfs_stat(struct open_file *f, struct stat *sb);
56 static int	zfs_readdir(struct open_file *f, struct dirent *d);
57 static int	zfs_mount(const char *dev, const char *path, void **data);
58 static int	zfs_unmount(const char *dev, void *data);
59 
60 static void	zfs_bootenv_initial(const char *envname, spa_t *spa,
61 		    const char *name, const char *dsname, int checkpoint);
62 static void	zfs_checkpoints_initial(spa_t *spa, const char *name,
63 		    const char *dsname);
64 
65 static int	zfs_parsedev(struct devdesc **idev, const char *devspec,
66 		    const char **path);
67 
68 struct devsw zfs_dev;
69 
70 struct fs_ops zfs_fsops = {
71 	.fs_name = "zfs",
72 	.fo_open = zfs_open,
73 	.fo_close = zfs_close,
74 	.fo_read = zfs_read,
75 	.fo_write = null_write,
76 	.fo_seek = zfs_seek,
77 	.fo_stat = zfs_stat,
78 	.fo_readdir = zfs_readdir,
79 	.fo_mount = zfs_mount,
80 	.fo_unmount = zfs_unmount
81 };
82 
83 /*
84  * In-core open file.
85  */
86 struct file {
87 	off_t		f_seekp;	/* seek pointer */
88 	dnode_phys_t	f_dnode;
89 	uint64_t	f_zap_type;	/* zap type for readdir */
90 	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
91 	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
92 };
93 
94 static int	zfs_env_index;
95 static int	zfs_env_count;
96 
97 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
98 struct zfs_be_list *zfs_be_headp;
99 struct zfs_be_entry {
100 	char *name;
101 	SLIST_ENTRY(zfs_be_entry) entries;
102 } *zfs_be, *zfs_be_tmp;
103 
104 /*
105  * Open a file.
106  */
107 static int
108 zfs_open(const char *upath, struct open_file *f)
109 {
110 	struct devdesc *dev = f->f_devdata;
111 	struct zfsmount *mount = dev->d_opendata;
112 	struct file *fp;
113 	int rc;
114 
115 	if (f->f_dev != &zfs_dev)
116 		return (EINVAL);
117 
118 	/* allocate file system specific data structure */
119 	fp = calloc(1, sizeof(struct file));
120 	if (fp == NULL)
121 		return (ENOMEM);
122 	f->f_fsdata = fp;
123 
124 	rc = zfs_lookup(mount, upath, &fp->f_dnode);
125 	fp->f_seekp = 0;
126 	if (rc) {
127 		f->f_fsdata = NULL;
128 		free(fp);
129 	}
130 	return (rc);
131 }
132 
133 static int
134 zfs_close(struct open_file *f)
135 {
136 	struct file *fp = (struct file *)f->f_fsdata;
137 
138 	dnode_cache_obj = NULL;
139 	f->f_fsdata = NULL;
140 
141 	free(fp);
142 	return (0);
143 }
144 
145 /*
146  * Copy a portion of a file into kernel memory.
147  * Cross block boundaries when necessary.
148  */
149 static int
150 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
151 {
152 	struct devdesc *dev = f->f_devdata;
153 	const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
154 	struct file *fp = (struct file *)f->f_fsdata;
155 	struct stat sb;
156 	size_t n;
157 	int rc;
158 
159 	rc = zfs_stat(f, &sb);
160 	if (rc)
161 		return (rc);
162 	n = size;
163 	if (fp->f_seekp + n > sb.st_size)
164 		n = sb.st_size - fp->f_seekp;
165 
166 	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
167 	if (rc)
168 		return (rc);
169 
170 	if (0) {
171 	    int i;
172 	    for (i = 0; i < n; i++)
173 		putchar(((char*) start)[i]);
174 	}
175 	fp->f_seekp += n;
176 	if (resid)
177 		*resid = size - n;
178 
179 	return (0);
180 }
181 
182 static off_t
183 zfs_seek(struct open_file *f, off_t offset, int where)
184 {
185 	struct file *fp = (struct file *)f->f_fsdata;
186 
187 	switch (where) {
188 	case SEEK_SET:
189 		fp->f_seekp = offset;
190 		break;
191 	case SEEK_CUR:
192 		fp->f_seekp += offset;
193 		break;
194 	case SEEK_END:
195 	    {
196 		struct stat sb;
197 		int error;
198 
199 		error = zfs_stat(f, &sb);
200 		if (error != 0) {
201 			errno = error;
202 			return (-1);
203 		}
204 		fp->f_seekp = sb.st_size - offset;
205 		break;
206 	    }
207 	default:
208 		errno = EINVAL;
209 		return (-1);
210 	}
211 	return (fp->f_seekp);
212 }
213 
214 static int
215 zfs_stat(struct open_file *f, struct stat *sb)
216 {
217 	struct devdesc *dev = f->f_devdata;
218 	const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
219 	struct file *fp = (struct file *)f->f_fsdata;
220 
221 	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
222 }
223 
224 static int
225 zfs_readdir(struct open_file *f, struct dirent *d)
226 {
227 	struct devdesc *dev = f->f_devdata;
228 	const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
229 	struct file *fp = (struct file *)f->f_fsdata;
230 	mzap_ent_phys_t mze;
231 	struct stat sb;
232 	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
233 	int rc;
234 
235 	rc = zfs_stat(f, &sb);
236 	if (rc)
237 		return (rc);
238 	if (!S_ISDIR(sb.st_mode))
239 		return (ENOTDIR);
240 
241 	/*
242 	 * If this is the first read, get the zap type.
243 	 */
244 	if (fp->f_seekp == 0) {
245 		rc = dnode_read(spa, &fp->f_dnode,
246 				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
247 		if (rc)
248 			return (rc);
249 
250 		if (fp->f_zap_type == ZBT_MICRO) {
251 			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
252 		} else {
253 			rc = dnode_read(spa, &fp->f_dnode,
254 					offsetof(zap_phys_t, zap_num_leafs),
255 					&fp->f_num_leafs,
256 					sizeof(fp->f_num_leafs));
257 			if (rc)
258 				return (rc);
259 
260 			fp->f_seekp = bsize;
261 			fp->f_zap_leaf = malloc(bsize);
262 			if (fp->f_zap_leaf == NULL)
263 				return (ENOMEM);
264 			rc = dnode_read(spa, &fp->f_dnode,
265 					fp->f_seekp,
266 					fp->f_zap_leaf,
267 					bsize);
268 			if (rc)
269 				return (rc);
270 		}
271 	}
272 
273 	if (fp->f_zap_type == ZBT_MICRO) {
274 	mzap_next:
275 		if (fp->f_seekp >= bsize)
276 			return (ENOENT);
277 
278 		rc = dnode_read(spa, &fp->f_dnode,
279 				fp->f_seekp, &mze, sizeof(mze));
280 		if (rc)
281 			return (rc);
282 		fp->f_seekp += sizeof(mze);
283 
284 		if (!mze.mze_name[0])
285 			goto mzap_next;
286 
287 		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
288 		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
289 		strcpy(d->d_name, mze.mze_name);
290 		d->d_namlen = strlen(d->d_name);
291 		return (0);
292 	} else {
293 		zap_leaf_t zl;
294 		zap_leaf_chunk_t *zc, *nc;
295 		int chunk;
296 		size_t namelen;
297 		char *p;
298 		uint64_t value;
299 
300 		/*
301 		 * Initialise this so we can use the ZAP size
302 		 * calculating macros.
303 		 */
304 		zl.l_bs = ilog2(bsize);
305 		zl.l_phys = fp->f_zap_leaf;
306 
307 		/*
308 		 * Figure out which chunk we are currently looking at
309 		 * and consider seeking to the next leaf. We use the
310 		 * low bits of f_seekp as a simple chunk index.
311 		 */
312 	fzap_next:
313 		chunk = fp->f_seekp & (bsize - 1);
314 		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
315 			fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize;
316 			chunk = 0;
317 
318 			/*
319 			 * Check for EOF and read the new leaf.
320 			 */
321 			if (fp->f_seekp >= bsize * fp->f_num_leafs)
322 				return (ENOENT);
323 
324 			rc = dnode_read(spa, &fp->f_dnode,
325 					fp->f_seekp,
326 					fp->f_zap_leaf,
327 					bsize);
328 			if (rc)
329 				return (rc);
330 		}
331 
332 		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
333 		fp->f_seekp++;
334 		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
335 			goto fzap_next;
336 
337 		namelen = zc->l_entry.le_name_numints;
338 		if (namelen > sizeof(d->d_name))
339 			namelen = sizeof(d->d_name);
340 
341 		/*
342 		 * Paste the name back together.
343 		 */
344 		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
345 		p = d->d_name;
346 		while (namelen > 0) {
347 			int len;
348 			len = namelen;
349 			if (len > ZAP_LEAF_ARRAY_BYTES)
350 				len = ZAP_LEAF_ARRAY_BYTES;
351 			memcpy(p, nc->l_array.la_array, len);
352 			p += len;
353 			namelen -= len;
354 			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
355 		}
356 		d->d_name[sizeof(d->d_name) - 1] = 0;
357 
358 		/*
359 		 * Assume the first eight bytes of the value are
360 		 * a uint64_t.
361 		 */
362 		value = fzap_leaf_value(&zl, zc);
363 
364 		d->d_fileno = ZFS_DIRENT_OBJ(value);
365 		d->d_type = ZFS_DIRENT_TYPE(value);
366 		d->d_namlen = strlen(d->d_name);
367 
368 		return (0);
369 	}
370 }
371 
372 static spa_t *
373 spa_find_by_dev(struct zfs_devdesc *dev)
374 {
375 
376 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
377 		return (NULL);
378 
379 	if (dev->pool_guid == 0)
380 		return (STAILQ_FIRST(&zfs_pools));
381 
382 	return (spa_find_by_guid(dev->pool_guid));
383 }
384 
385 /*
386  * if path is NULL, create mount structure, but do not add it to list.
387  */
388 static int
389 zfs_mount(const char *dev, const char *path, void **data)
390 {
391 	struct zfs_devdesc *zfsdev = NULL;
392 	spa_t *spa;
393 	struct zfsmount *mnt = NULL;
394 	int rv;
395 
396 	errno = 0;
397 	rv = zfs_parsedev((struct devdesc **)&zfsdev, dev, NULL);
398 	if (rv != 0) {
399 		return (rv);
400 	}
401 
402 	spa = spa_find_by_dev(zfsdev);
403 	if (spa == NULL) {
404 		rv = ENXIO;
405 		goto err;
406 	}
407 
408 	mnt = calloc(1, sizeof(*mnt));
409 	if (mnt == NULL) {
410 		rv = ENOMEM;
411 		goto err;
412 	}
413 
414 	if (mnt->path != NULL) {
415 		mnt->path = strdup(path);
416 		if (mnt->path == NULL) {
417 			rv = ENOMEM;
418 			goto err;
419 		}
420 	}
421 
422 	rv = zfs_mount_impl(spa, zfsdev->root_guid, mnt);
423 
424 	if (rv == 0 && mnt->objset.os_type != DMU_OST_ZFS) {
425 		printf("Unexpected object set type %ju\n",
426 		    (uintmax_t)mnt->objset.os_type);
427 		rv = EIO;
428 	}
429 err:
430 	if (rv != 0) {
431 		if (mnt != NULL)
432 			free(mnt->path);
433 		free(mnt);
434 		free(zfsdev);
435 		return (rv);
436 	}
437 
438 	*data = mnt;
439 	if (path != NULL)
440 		STAILQ_INSERT_TAIL(&zfsmount, mnt, next);
441 
442 	free(zfsdev);
443 
444 	return (rv);
445 }
446 
447 static int
448 zfs_unmount(const char *dev, void *data)
449 {
450 	struct zfsmount *mnt = data;
451 
452 	STAILQ_REMOVE(&zfsmount, mnt, zfsmount, next);
453 	free(mnt->path);
454 	free(mnt);
455 	return (0);
456 }
457 
458 static int
459 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes)
460 {
461 	int fd, ret;
462 	size_t res, head, tail, total_size, full_sec_size;
463 	unsigned secsz, do_tail_read;
464 	off_t start_sec;
465 	char *outbuf, *bouncebuf;
466 
467 	fd = (uintptr_t) priv;
468 	outbuf = (char *) buf;
469 	bouncebuf = NULL;
470 
471 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
472 	if (ret != 0)
473 		return (ret);
474 
475 	/*
476 	 * Handling reads of arbitrary offset and size - multi-sector case
477 	 * and single-sector case.
478 	 *
479 	 *                        Multi-sector Case
480 	 *                (do_tail_read = true if tail > 0)
481 	 *
482 	 *   |<----------------------total_size--------------------->|
483 	 *   |                                                       |
484 	 *   |<--head-->|<--------------bytes------------>|<--tail-->|
485 	 *   |          |                                 |          |
486 	 *   |          |       |<~full_sec_size~>|       |          |
487 	 *   +------------------+                 +------------------+
488 	 *   |          |0101010|     .  .  .     |0101011|          |
489 	 *   +------------------+                 +------------------+
490 	 *         start_sec                         start_sec + n
491 	 *
492 	 *
493 	 *                      Single-sector Case
494 	 *                    (do_tail_read = false)
495 	 *
496 	 *              |<------total_size = secsz----->|
497 	 *              |                               |
498 	 *              |<-head->|<---bytes--->|<-tail->|
499 	 *              +-------------------------------+
500 	 *              |        |0101010101010|        |
501 	 *              +-------------------------------+
502 	 *                          start_sec
503 	 */
504 	start_sec = offset / secsz;
505 	head = offset % secsz;
506 	total_size = roundup2(head + bytes, secsz);
507 	tail = total_size - (head + bytes);
508 	do_tail_read = ((tail > 0) && (head + bytes > secsz));
509 	full_sec_size = total_size;
510 	if (head > 0)
511 		full_sec_size -= secsz;
512 	if (do_tail_read)
513 		full_sec_size -= secsz;
514 
515 	/* Return of partial sector data requires a bounce buffer. */
516 	if ((head > 0) || do_tail_read || bytes < secsz) {
517 		bouncebuf = malloc(secsz);
518 		if (bouncebuf == NULL) {
519 			printf("vdev_read: out of memory\n");
520 			return (ENOMEM);
521 		}
522 	}
523 
524 	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
525 		ret = errno;
526 		goto error;
527 	}
528 
529 	/* Partial data return from first sector */
530 	if (head > 0) {
531 		res = read(fd, bouncebuf, secsz);
532 		if (res != secsz) {
533 			ret = EIO;
534 			goto error;
535 		}
536 		memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes));
537 		outbuf += min(secsz - head, bytes);
538 	}
539 
540 	/*
541 	 * Full data return from read sectors.
542 	 * Note, there is still corner case where we read
543 	 * from sector boundary, but less than sector size, e.g. reading 512B
544 	 * from 4k sector.
545 	 */
546 	if (full_sec_size > 0) {
547 		if (bytes < full_sec_size) {
548 			res = read(fd, bouncebuf, secsz);
549 			if (res != secsz) {
550 				ret = EIO;
551 				goto error;
552 			}
553 			memcpy(outbuf, bouncebuf, bytes);
554 		} else {
555 			res = read(fd, outbuf, full_sec_size);
556 			if (res != full_sec_size) {
557 				ret = EIO;
558 				goto error;
559 			}
560 			outbuf += full_sec_size;
561 		}
562 	}
563 
564 	/* Partial data return from last sector */
565 	if (do_tail_read) {
566 		res = read(fd, bouncebuf, secsz);
567 		if (res != secsz) {
568 			ret = EIO;
569 			goto error;
570 		}
571 		memcpy(outbuf, bouncebuf, secsz - tail);
572 	}
573 
574 	ret = 0;
575 error:
576 	free(bouncebuf);
577 	return (ret);
578 }
579 
580 static int
581 vdev_write(vdev_t *vdev, off_t offset, void *buf, size_t bytes)
582 {
583 	int fd, ret;
584 	size_t head, tail, total_size, full_sec_size;
585 	unsigned secsz, do_tail_write;
586 	off_t start_sec;
587 	ssize_t res;
588 	char *outbuf, *bouncebuf;
589 
590 	fd = (uintptr_t)vdev->v_priv;
591 	outbuf = (char *)buf;
592 	bouncebuf = NULL;
593 
594 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
595 	if (ret != 0)
596 		return (ret);
597 
598 	start_sec = offset / secsz;
599 	head = offset % secsz;
600 	total_size = roundup2(head + bytes, secsz);
601 	tail = total_size - (head + bytes);
602 	do_tail_write = ((tail > 0) && (head + bytes > secsz));
603 	full_sec_size = total_size;
604 	if (head > 0)
605 		full_sec_size -= secsz;
606 	if (do_tail_write)
607 		full_sec_size -= secsz;
608 
609 	/* Partial sector write requires a bounce buffer. */
610 	if ((head > 0) || do_tail_write || bytes < secsz) {
611 		bouncebuf = malloc(secsz);
612 		if (bouncebuf == NULL) {
613 			printf("vdev_write: out of memory\n");
614 			return (ENOMEM);
615 		}
616 	}
617 
618 	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
619 		ret = errno;
620 		goto error;
621 	}
622 
623 	/* Partial data for first sector */
624 	if (head > 0) {
625 		res = read(fd, bouncebuf, secsz);
626 		if ((unsigned)res != secsz) {
627 			ret = EIO;
628 			goto error;
629 		}
630 		memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes));
631 		(void) lseek(fd, -secsz, SEEK_CUR);
632 		res = write(fd, bouncebuf, secsz);
633 		if ((unsigned)res != secsz) {
634 			ret = EIO;
635 			goto error;
636 		}
637 		outbuf += min(secsz - head, bytes);
638 	}
639 
640 	/*
641 	 * Full data write to sectors.
642 	 * Note, there is still corner case where we write
643 	 * to sector boundary, but less than sector size, e.g. write 512B
644 	 * to 4k sector.
645 	 */
646 	if (full_sec_size > 0) {
647 		if (bytes < full_sec_size) {
648 			res = read(fd, bouncebuf, secsz);
649 			if ((unsigned)res != secsz) {
650 				ret = EIO;
651 				goto error;
652 			}
653 			memcpy(bouncebuf, outbuf, bytes);
654 			(void) lseek(fd, -secsz, SEEK_CUR);
655 			res = write(fd, bouncebuf, secsz);
656 			if ((unsigned)res != secsz) {
657 				ret = EIO;
658 				goto error;
659 			}
660 		} else {
661 			res = write(fd, outbuf, full_sec_size);
662 			if ((unsigned)res != full_sec_size) {
663 				ret = EIO;
664 				goto error;
665 			}
666 			outbuf += full_sec_size;
667 		}
668 	}
669 
670 	/* Partial data write to last sector */
671 	if (do_tail_write) {
672 		res = read(fd, bouncebuf, secsz);
673 		if ((unsigned)res != secsz) {
674 			ret = EIO;
675 			goto error;
676 		}
677 		memcpy(bouncebuf, outbuf, secsz - tail);
678 		(void) lseek(fd, -secsz, SEEK_CUR);
679 		res = write(fd, bouncebuf, secsz);
680 		if ((unsigned)res != secsz) {
681 			ret = EIO;
682 			goto error;
683 		}
684 	}
685 
686 	ret = 0;
687 error:
688 	free(bouncebuf);
689 	return (ret);
690 }
691 
692 static int
693 zfs_dev_init(void)
694 {
695 	spa_t *spa;
696 	spa_t *next;
697 	spa_t *prev;
698 
699 	zfs_init();
700 	if (archsw.arch_zfs_probe == NULL)
701 		return (ENXIO);
702 	archsw.arch_zfs_probe();
703 
704 	prev = NULL;
705 	spa = STAILQ_FIRST(&zfs_pools);
706 	while (spa != NULL) {
707 		next = STAILQ_NEXT(spa, spa_link);
708 		if (zfs_spa_init(spa)) {
709 			if (prev == NULL)
710 				STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
711 			else
712 				STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
713 		} else
714 			prev = spa;
715 		spa = next;
716 	}
717 	return (0);
718 }
719 
720 struct zfs_probe_args {
721 	int		fd;
722 	const char	*devname;
723 	uint64_t	*pool_guid;
724 	u_int		secsz;
725 };
726 
727 static int
728 zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset)
729 {
730 	struct zfs_probe_args *ppa;
731 
732 	ppa = (struct zfs_probe_args *)arg;
733 	return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
734 	    offset * ppa->secsz, buf, blocks * ppa->secsz));
735 }
736 
737 static int
738 zfs_probe(int fd, uint64_t *pool_guid)
739 {
740 	spa_t *spa;
741 	int ret;
742 
743 	spa = NULL;
744 	ret = vdev_probe(vdev_read, vdev_write, (void *)(uintptr_t)fd, &spa);
745 	if (ret == 0 && pool_guid != NULL)
746 		if (*pool_guid == 0)
747 			*pool_guid = spa->spa_guid;
748 	return (ret);
749 }
750 
751 static int
752 zfs_probe_partition(void *arg, const char *partname,
753     const struct ptable_entry *part)
754 {
755 	struct zfs_probe_args *ppa, pa;
756 	struct ptable *table;
757 	char devname[32];
758 	int ret;
759 
760 	/* Probe only freebsd-zfs and freebsd partitions */
761 	if (part->type != PART_FREEBSD &&
762 	    part->type != PART_FREEBSD_ZFS)
763 		return (0);
764 
765 	ppa = (struct zfs_probe_args *)arg;
766 	strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
767 	devname[strlen(ppa->devname) - 1] = '\0';
768 	snprintf(devname, sizeof(devname), "%s%s:", devname, partname);
769 	pa.fd = open(devname, O_RDWR);
770 	if (pa.fd == -1)
771 		return (0);
772 	ret = zfs_probe(pa.fd, ppa->pool_guid);
773 	if (ret == 0)
774 		return (0);
775 	/* Do we have BSD label here? */
776 	if (part->type == PART_FREEBSD) {
777 		pa.devname = devname;
778 		pa.pool_guid = ppa->pool_guid;
779 		pa.secsz = ppa->secsz;
780 		table = ptable_open(&pa, part->end - part->start + 1,
781 		    ppa->secsz, zfs_diskread);
782 		if (table != NULL) {
783 			ptable_iterate(table, &pa, zfs_probe_partition);
784 			ptable_close(table);
785 		}
786 	}
787 	close(pa.fd);
788 	return (0);
789 }
790 
791 /*
792  * Return bootenv nvlist from pool label.
793  */
794 int
795 zfs_get_bootenv(void *vdev, nvlist_t **benvp)
796 {
797 	spa_t *spa;
798 
799 	if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL)
800 		return (ENXIO);
801 
802 	return (zfs_get_bootenv_spa(spa, benvp));
803 }
804 
805 /*
806  * Store nvlist to pool label bootenv area. Also updates cached pointer in spa.
807  */
808 int
809 zfs_set_bootenv(void *vdev, nvlist_t *benv)
810 {
811 	spa_t *spa;
812 
813 	if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL)
814 		return (ENXIO);
815 
816 	return (zfs_set_bootenv_spa(spa, benv));
817 }
818 
819 /*
820  * Get bootonce value by key. The bootonce <key, value> pair is removed
821  * from the bootenv nvlist and the remaining nvlist is committed back to disk.
822  */
823 int
824 zfs_get_bootonce(void *vdev, const char *key, char *buf, size_t size)
825 {
826 	spa_t *spa;
827 
828 	if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL)
829 		return (ENXIO);
830 
831 	return (zfs_get_bootonce_spa(spa, key, buf, size));
832 }
833 
834 /*
835  * nvstore backend.
836  */
837 
838 static int zfs_nvstore_setter(void *, int, const char *,
839     const void *, size_t);
840 static int zfs_nvstore_setter_str(void *, const char *, const char *,
841     const char *);
842 static int zfs_nvstore_unset_impl(void *, const char *, bool);
843 static int zfs_nvstore_setenv(void *, void *);
844 
845 /*
846  * nvstore is only present for current rootfs pool.
847  */
848 static int
849 zfs_nvstore_sethook(struct env_var *ev, int flags __unused, const void *value)
850 {
851 	struct zfs_devdesc *dev;
852 	int rv;
853 
854 	archsw.arch_getdev((void **)&dev, NULL, NULL);
855 	if (dev == NULL)
856 		return (ENXIO);
857 
858 	rv = zfs_nvstore_setter_str(dev, NULL, ev->ev_name, value);
859 
860 	free(dev);
861 	return (rv);
862 }
863 
864 /*
865  * nvstore is only present for current rootfs pool.
866  */
867 static int
868 zfs_nvstore_unsethook(struct env_var *ev)
869 {
870 	struct zfs_devdesc *dev;
871 	int rv;
872 
873 	archsw.arch_getdev((void **)&dev, NULL, NULL);
874 	if (dev == NULL)
875 		return (ENXIO);
876 
877 	rv = zfs_nvstore_unset_impl(dev, ev->ev_name, false);
878 
879 	free(dev);
880 	return (rv);
881 }
882 
883 static int
884 zfs_nvstore_getter(void *vdev, const char *name, void **data)
885 {
886 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
887 	spa_t *spa;
888 	nvlist_t *nv;
889 	char *str, **ptr;
890 	int size;
891 	int rv;
892 
893 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
894 		return (ENOTSUP);
895 
896 	if ((spa = spa_find_by_dev(dev)) == NULL)
897 		return (ENXIO);
898 
899 	if (spa->spa_bootenv == NULL)
900 		return (ENXIO);
901 
902 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
903 	    NULL, &nv, NULL) != 0)
904 		return (ENOENT);
905 
906 	rv = nvlist_find(nv, name, DATA_TYPE_STRING, NULL, &str, &size);
907 	if (rv == 0) {
908 		ptr = (char **)data;
909 		asprintf(ptr, "%.*s", size, str);
910 		if (*data == NULL)
911 			rv = ENOMEM;
912 	}
913 	nvlist_destroy(nv);
914 	return (rv);
915 }
916 
917 static int
918 zfs_nvstore_setter(void *vdev, int type, const char *name,
919     const void *data, size_t size)
920 {
921 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
922 	spa_t *spa;
923 	nvlist_t *nv;
924 	int rv;
925 	bool env_set = true;
926 
927 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
928 		return (ENOTSUP);
929 
930 	if ((spa = spa_find_by_dev(dev)) == NULL)
931 		return (ENXIO);
932 
933 	if (spa->spa_bootenv == NULL)
934 		return (ENXIO);
935 
936 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
937 	    NULL, &nv, NULL) != 0) {
938 		nv = nvlist_create(NV_UNIQUE_NAME);
939 		if (nv == NULL)
940 			return (ENOMEM);
941 	}
942 
943 	rv = 0;
944 	switch (type) {
945         case DATA_TYPE_INT8:
946 		if (size != sizeof (int8_t)) {
947 			rv = EINVAL;
948 			break;
949 		}
950 		rv = nvlist_add_int8(nv, name, *(int8_t *)data);
951 		break;
952 
953         case DATA_TYPE_INT16:
954 		if (size != sizeof (int16_t)) {
955 			rv = EINVAL;
956 			break;
957 		}
958 		rv = nvlist_add_int16(nv, name, *(int16_t *)data);
959 		break;
960 
961         case DATA_TYPE_INT32:
962 		if (size != sizeof (int32_t)) {
963 			rv = EINVAL;
964 			break;
965 		}
966 		rv = nvlist_add_int32(nv, name, *(int32_t *)data);
967 		break;
968 
969         case DATA_TYPE_INT64:
970 		if (size != sizeof (int64_t)) {
971 			rv = EINVAL;
972 			break;
973 		}
974 		rv = nvlist_add_int64(nv, name, *(int64_t *)data);
975 		break;
976 
977         case DATA_TYPE_BYTE:
978 		if (size != sizeof (uint8_t)) {
979 			rv = EINVAL;
980 			break;
981 		}
982 		rv = nvlist_add_byte(nv, name, *(int8_t *)data);
983 		break;
984 
985         case DATA_TYPE_UINT8:
986 		if (size != sizeof (uint8_t)) {
987 			rv = EINVAL;
988 			break;
989 		}
990 		rv = nvlist_add_uint8(nv, name, *(int8_t *)data);
991 		break;
992 
993         case DATA_TYPE_UINT16:
994 		if (size != sizeof (uint16_t)) {
995 			rv = EINVAL;
996 			break;
997 		}
998 		rv = nvlist_add_uint16(nv, name, *(uint16_t *)data);
999 		break;
1000 
1001         case DATA_TYPE_UINT32:
1002 		if (size != sizeof (uint32_t)) {
1003 			rv = EINVAL;
1004 			break;
1005 		}
1006 		rv = nvlist_add_uint32(nv, name, *(uint32_t *)data);
1007 		break;
1008 
1009         case DATA_TYPE_UINT64:
1010 		if (size != sizeof (uint64_t)) {
1011 			rv = EINVAL;
1012 			break;
1013 		}
1014 		rv = nvlist_add_uint64(nv, name, *(uint64_t *)data);
1015 		break;
1016 
1017         case DATA_TYPE_STRING:
1018 		rv = nvlist_add_string(nv, name, data);
1019 		break;
1020 
1021 	case DATA_TYPE_BOOLEAN_VALUE:
1022 		if (size != sizeof (boolean_t)) {
1023 			rv = EINVAL;
1024 			break;
1025 		}
1026 		rv = nvlist_add_boolean_value(nv, name, *(boolean_t *)data);
1027 		break;
1028 
1029 	default:
1030 		rv = EINVAL;
1031 		break;
1032 	}
1033 
1034 	if (rv == 0) {
1035 		rv = nvlist_add_nvlist(spa->spa_bootenv, OS_NVSTORE, nv);
1036 		if (rv == 0) {
1037 			rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1038 		}
1039 		if (rv == 0) {
1040 			if (env_set) {
1041 				rv = zfs_nvstore_setenv(vdev,
1042 				    nvpair_find(nv, name));
1043 			} else {
1044 				env_discard(env_getenv(name));
1045 				rv = 0;
1046 			}
1047 		}
1048 	}
1049 
1050 	nvlist_destroy(nv);
1051 	return (rv);
1052 }
1053 
1054 static int
1055 get_int64(const char *data, int64_t *ip)
1056 {
1057 	char *end;
1058 	int64_t val;
1059 
1060 	errno = 0;
1061 	val = strtoll(data, &end, 0);
1062 	if (errno != 0 || *data == '\0' || *end != '\0')
1063 		return (EINVAL);
1064 
1065 	*ip = val;
1066 	return (0);
1067 }
1068 
1069 static int
1070 get_uint64(const char *data, uint64_t *ip)
1071 {
1072 	char *end;
1073 	uint64_t val;
1074 
1075 	errno = 0;
1076 	val = strtoull(data, &end, 0);
1077 	if (errno != 0 || *data == '\0' || *end != '\0')
1078 		return (EINVAL);
1079 
1080 	*ip = val;
1081 	return (0);
1082 }
1083 
1084 /*
1085  * Translate textual data to data type. If type is not set, and we are
1086  * creating new pair, use DATA_TYPE_STRING.
1087  */
1088 static int
1089 zfs_nvstore_setter_str(void *vdev, const char *type, const char *name,
1090     const char *data)
1091 {
1092 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1093 	spa_t *spa;
1094 	nvlist_t *nv;
1095 	int rv;
1096 	data_type_t dt;
1097 	int64_t val;
1098 	uint64_t uval;
1099 
1100 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1101 		return (ENOTSUP);
1102 
1103 	if ((spa = spa_find_by_dev(dev)) == NULL)
1104 		return (ENXIO);
1105 
1106 	if (spa->spa_bootenv == NULL)
1107 		return (ENXIO);
1108 
1109 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1110 	    NULL, &nv, NULL) != 0) {
1111 		nv = NULL;
1112 	}
1113 
1114 	if (type == NULL) {
1115 		nvp_header_t *nvh;
1116 
1117 		/*
1118 		 * if there is no existing pair, default to string.
1119 		 * Otherwise, use type from existing pair.
1120 		 */
1121 		nvh = nvpair_find(nv, name);
1122 		if (nvh == NULL) {
1123 			dt = DATA_TYPE_STRING;
1124 		} else {
1125 			nv_string_t *nvp_name;
1126 			nv_pair_data_t *nvp_data;
1127 
1128 			nvp_name = (nv_string_t *)(nvh + 1);
1129 			nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1130 			    NV_ALIGN4(nvp_name->nv_size));
1131 			dt = nvp_data->nv_type;
1132 		}
1133 	} else {
1134 		dt = nvpair_type_from_name(type);
1135 	}
1136 	nvlist_destroy(nv);
1137 
1138 	rv = 0;
1139 	switch (dt) {
1140         case DATA_TYPE_INT8:
1141 		rv = get_int64(data, &val);
1142 		if (rv == 0) {
1143 			int8_t v = val;
1144 
1145 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1146 		}
1147 		break;
1148         case DATA_TYPE_INT16:
1149 		rv = get_int64(data, &val);
1150 		if (rv == 0) {
1151 			int16_t v = val;
1152 
1153 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1154 		}
1155 		break;
1156         case DATA_TYPE_INT32:
1157 		rv = get_int64(data, &val);
1158 		if (rv == 0) {
1159 			int32_t v = val;
1160 
1161 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1162 		}
1163 		break;
1164         case DATA_TYPE_INT64:
1165 		rv = get_int64(data, &val);
1166 		if (rv == 0) {
1167 			rv = zfs_nvstore_setter(vdev, dt, name, &val,
1168 			    sizeof (val));
1169 		}
1170 		break;
1171 
1172         case DATA_TYPE_BYTE:
1173 		rv = get_uint64(data, &uval);
1174 		if (rv == 0) {
1175 			uint8_t v = uval;
1176 
1177 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1178 		}
1179 		break;
1180 
1181         case DATA_TYPE_UINT8:
1182 		rv = get_uint64(data, &uval);
1183 		if (rv == 0) {
1184 			uint8_t v = uval;
1185 
1186 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1187 		}
1188 		break;
1189 
1190         case DATA_TYPE_UINT16:
1191 		rv = get_uint64(data, &uval);
1192 		if (rv == 0) {
1193 			uint16_t v = uval;
1194 
1195 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1196 		}
1197 		break;
1198 
1199         case DATA_TYPE_UINT32:
1200 		rv = get_uint64(data, &uval);
1201 		if (rv == 0) {
1202 			uint32_t v = uval;
1203 
1204 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1205 		}
1206 		break;
1207 
1208         case DATA_TYPE_UINT64:
1209 		rv = get_uint64(data, &uval);
1210 		if (rv == 0) {
1211 			rv = zfs_nvstore_setter(vdev, dt, name, &uval,
1212 			    sizeof (uval));
1213 		}
1214 		break;
1215 
1216         case DATA_TYPE_STRING:
1217 		rv = zfs_nvstore_setter(vdev, dt, name, data, strlen(data) + 1);
1218 		break;
1219 
1220 	case DATA_TYPE_BOOLEAN_VALUE:
1221 		rv = get_int64(data, &val);
1222 		if (rv == 0) {
1223 			boolean_t v = val;
1224 
1225 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1226 		}
1227 
1228 	default:
1229 		rv = EINVAL;
1230 	}
1231 	return (rv);
1232 }
1233 
1234 static int
1235 zfs_nvstore_unset_impl(void *vdev, const char *name, bool unset_env)
1236 {
1237 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1238 	spa_t *spa;
1239 	nvlist_t *nv;
1240 	int rv;
1241 
1242 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1243 		return (ENOTSUP);
1244 
1245 	if ((spa = spa_find_by_dev(dev)) == NULL)
1246 		return (ENXIO);
1247 
1248 	if (spa->spa_bootenv == NULL)
1249 		return (ENXIO);
1250 
1251 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1252 	    NULL, &nv, NULL) != 0)
1253 		return (ENOENT);
1254 
1255 	rv = nvlist_remove(nv, name, DATA_TYPE_UNKNOWN);
1256 	if (rv == 0) {
1257 		if (nvlist_next_nvpair(nv, NULL) == NULL) {
1258 			rv = nvlist_remove(spa->spa_bootenv, OS_NVSTORE,
1259 			    DATA_TYPE_NVLIST);
1260 		} else {
1261 			rv = nvlist_add_nvlist(spa->spa_bootenv,
1262 			    OS_NVSTORE, nv);
1263 		}
1264 		if (rv == 0)
1265 			rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1266 	}
1267 
1268 	if (unset_env)
1269 		env_discard(env_getenv(name));
1270 	return (rv);
1271 }
1272 
1273 static int
1274 zfs_nvstore_unset(void *vdev, const char *name)
1275 {
1276 	return (zfs_nvstore_unset_impl(vdev, name, true));
1277 }
1278 
1279 static int
1280 zfs_nvstore_print(void *vdev __unused, void *ptr)
1281 {
1282 
1283 	nvpair_print(ptr, 0);
1284 	return (0);
1285 }
1286 
1287 /*
1288  * Create environment variable from nvpair.
1289  * set hook will update nvstore with new value, unset hook will remove
1290  * variable from nvstore.
1291  */
1292 static int
1293 zfs_nvstore_setenv(void *vdev __unused, void *ptr)
1294 {
1295 	nvp_header_t *nvh = ptr;
1296 	nv_string_t *nvp_name, *nvp_value;
1297 	nv_pair_data_t *nvp_data;
1298 	char *name, *value;
1299 	int rv = 0;
1300 
1301 	if (nvh == NULL)
1302 		return (ENOENT);
1303 
1304 	nvp_name = (nv_string_t *)(nvh + 1);
1305 	nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1306 	    NV_ALIGN4(nvp_name->nv_size));
1307 
1308 	if ((name = nvstring_get(nvp_name)) == NULL)
1309 		return (ENOMEM);
1310 
1311 	value = NULL;
1312 	switch (nvp_data->nv_type) {
1313 	case DATA_TYPE_BYTE:
1314 	case DATA_TYPE_UINT8:
1315 		(void) asprintf(&value, "%uc",
1316 		    *(unsigned *)&nvp_data->nv_data[0]);
1317 		if (value == NULL)
1318 			rv = ENOMEM;
1319 		break;
1320 
1321 	case DATA_TYPE_INT8:
1322 		(void) asprintf(&value, "%c", *(int *)&nvp_data->nv_data[0]);
1323 		if (value == NULL)
1324 			rv = ENOMEM;
1325 		break;
1326 
1327 	case DATA_TYPE_INT16:
1328 		(void) asprintf(&value, "%hd", *(short *)&nvp_data->nv_data[0]);
1329 		if (value == NULL)
1330 			rv = ENOMEM;
1331 		break;
1332 
1333 	case DATA_TYPE_UINT16:
1334 		(void) asprintf(&value, "%hu",
1335 		    *(unsigned short *)&nvp_data->nv_data[0]);
1336 		if (value == NULL)
1337 			rv = ENOMEM;
1338 		break;
1339 
1340 	case DATA_TYPE_BOOLEAN_VALUE:
1341 	case DATA_TYPE_INT32:
1342 		(void) asprintf(&value, "%d", *(int *)&nvp_data->nv_data[0]);
1343 		if (value == NULL)
1344 			rv = ENOMEM;
1345 		break;
1346 
1347 	case DATA_TYPE_UINT32:
1348 		(void) asprintf(&value, "%u",
1349 		    *(unsigned *)&nvp_data->nv_data[0]);
1350 		if (value == NULL)
1351 			rv = ENOMEM;
1352 		break;
1353 
1354 	case DATA_TYPE_INT64:
1355 		(void) asprintf(&value, "%jd",
1356 		    (intmax_t)*(int64_t *)&nvp_data->nv_data[0]);
1357 		if (value == NULL)
1358 			rv = ENOMEM;
1359 		break;
1360 
1361 	case DATA_TYPE_UINT64:
1362 		(void) asprintf(&value, "%ju",
1363 		    (uintmax_t)*(uint64_t *)&nvp_data->nv_data[0]);
1364 		if (value == NULL)
1365 			rv = ENOMEM;
1366 		break;
1367 
1368 	case DATA_TYPE_STRING:
1369 		nvp_value = (nv_string_t *)&nvp_data->nv_data[0];
1370 		if ((value = nvstring_get(nvp_value)) == NULL) {
1371 			rv = ENOMEM;
1372 			break;
1373 		}
1374 		break;
1375 
1376 	default:
1377 		rv = EINVAL;
1378 		break;
1379 	}
1380 
1381 	if (value != NULL) {
1382 		rv = env_setenv(name, EV_VOLATILE | EV_NOHOOK, value,
1383 		    zfs_nvstore_sethook, zfs_nvstore_unsethook);
1384 		free(value);
1385 	}
1386 	free(name);
1387 	return (rv);
1388 }
1389 
1390 static int
1391 zfs_nvstore_iterate(void *vdev, int (*cb)(void *, void *))
1392 {
1393 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1394 	spa_t *spa;
1395 	nvlist_t *nv;
1396 	nvp_header_t *nvh;
1397 	int rv;
1398 
1399 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1400 		return (ENOTSUP);
1401 
1402 	if ((spa = spa_find_by_dev(dev)) == NULL)
1403 		return (ENXIO);
1404 
1405 	if (spa->spa_bootenv == NULL)
1406 		return (ENXIO);
1407 
1408 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1409 	    NULL, &nv, NULL) != 0)
1410 		return (ENOENT);
1411 
1412 	rv = 0;
1413 	nvh = NULL;
1414 	while ((nvh = nvlist_next_nvpair(nv, nvh)) != NULL) {
1415 		rv = cb(vdev, nvh);
1416 		if (rv != 0)
1417 			break;
1418 	}
1419 	return (rv);
1420 }
1421 
1422 nvs_callbacks_t nvstore_zfs_cb = {
1423 	.nvs_getter = zfs_nvstore_getter,
1424 	.nvs_setter = zfs_nvstore_setter,
1425 	.nvs_setter_str = zfs_nvstore_setter_str,
1426 	.nvs_unset = zfs_nvstore_unset,
1427 	.nvs_print = zfs_nvstore_print,
1428 	.nvs_iterate = zfs_nvstore_iterate
1429 };
1430 
1431 int
1432 zfs_attach_nvstore(void *vdev)
1433 {
1434 	struct zfs_devdesc *dev = vdev;
1435 	spa_t *spa;
1436 	uint64_t version;
1437 	int rv;
1438 
1439 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1440 		return (ENOTSUP);
1441 
1442 	if ((spa = spa_find_by_dev(dev)) == NULL)
1443 		return (ENXIO);
1444 
1445 	rv = nvlist_find(spa->spa_bootenv, BOOTENV_VERSION, DATA_TYPE_UINT64,
1446 	    NULL, &version, NULL);
1447 
1448 	if (rv != 0 || version != VB_NVLIST) {
1449 		return (ENXIO);
1450 	}
1451 
1452 	dev = malloc(sizeof (*dev));
1453 	if (dev == NULL)
1454 		return (ENOMEM);
1455 	memcpy(dev, vdev, sizeof (*dev));
1456 
1457 	rv = nvstore_init(spa->spa_name, &nvstore_zfs_cb, dev);
1458 	if (rv != 0)
1459 		free(dev);
1460 	else
1461 		rv = zfs_nvstore_iterate(dev, zfs_nvstore_setenv);
1462 	return (rv);
1463 }
1464 
1465 int
1466 zfs_probe_dev(const char *devname, uint64_t *pool_guid, bool parts_too)
1467 {
1468 	struct ptable *table;
1469 	struct zfs_probe_args pa;
1470 	uint64_t mediasz;
1471 	int ret;
1472 
1473 	if (pool_guid)
1474 		*pool_guid = 0;
1475 	pa.fd = open(devname, O_RDWR);
1476 	if (pa.fd == -1)
1477 		return (ENXIO);
1478 	/* Probe the whole disk */
1479 	ret = zfs_probe(pa.fd, pool_guid);
1480 	if (ret == 0)
1481 		return (0);
1482 	if (!parts_too)
1483 		return (ENXIO);
1484 
1485 	/* Probe each partition */
1486 	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
1487 	if (ret == 0)
1488 		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
1489 	if (ret == 0) {
1490 		pa.devname = devname;
1491 		pa.pool_guid = pool_guid;
1492 		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
1493 		    zfs_diskread);
1494 		if (table != NULL) {
1495 			ptable_iterate(table, &pa, zfs_probe_partition);
1496 			ptable_close(table);
1497 		}
1498 	}
1499 	close(pa.fd);
1500 	if (pool_guid && *pool_guid == 0)
1501 		ret = ENXIO;
1502 	return (ret);
1503 }
1504 
1505 /*
1506  * Print information about ZFS pools
1507  */
1508 static int
1509 zfs_dev_print(int verbose)
1510 {
1511 	spa_t *spa;
1512 	char line[80];
1513 	int ret = 0;
1514 
1515 	if (STAILQ_EMPTY(&zfs_pools))
1516 		return (0);
1517 
1518 	printf("%s devices:", zfs_dev.dv_name);
1519 	if ((ret = pager_output("\n")) != 0)
1520 		return (ret);
1521 
1522 	if (verbose) {
1523 		return (spa_all_status());
1524 	}
1525 	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
1526 		snprintf(line, sizeof(line), "    zfs:%s\n", spa->spa_name);
1527 		ret = pager_output(line);
1528 		if (ret != 0)
1529 			break;
1530 	}
1531 	return (ret);
1532 }
1533 
1534 /*
1535  * Attempt to open the pool described by (dev) for use by (f).
1536  */
1537 static int
1538 zfs_dev_open(struct open_file *f, ...)
1539 {
1540 	va_list		args;
1541 	struct zfs_devdesc	*dev;
1542 	struct zfsmount	*mount;
1543 	spa_t		*spa;
1544 	int		rv;
1545 
1546 	va_start(args, f);
1547 	dev = va_arg(args, struct zfs_devdesc *);
1548 	va_end(args);
1549 
1550 	if ((spa = spa_find_by_dev(dev)) == NULL)
1551 		return (ENXIO);
1552 
1553 	STAILQ_FOREACH(mount, &zfsmount, next) {
1554 		if (spa->spa_guid == mount->spa->spa_guid)
1555 			break;
1556 	}
1557 
1558 	rv = 0;
1559 	/* This device is not set as currdev, mount us private copy. */
1560 	if (mount == NULL)
1561 		rv = zfs_mount(devformat(&dev->dd), NULL, (void **)&mount);
1562 
1563 	if (rv == 0) {
1564 		dev->dd.d_opendata = mount;
1565 	}
1566 	return (rv);
1567 }
1568 
1569 static int
1570 zfs_dev_close(struct open_file *f)
1571 {
1572 	struct devdesc *dev;
1573 	struct zfsmount	*mnt, *mount;
1574 
1575 	dev = f->f_devdata;
1576 	mnt = dev->d_opendata;
1577 
1578 	STAILQ_FOREACH(mount, &zfsmount, next) {
1579 		if (mnt->spa->spa_guid == mount->spa->spa_guid)
1580 			break;
1581 	}
1582 
1583 	/* XXX */
1584 	return (0);
1585 }
1586 
1587 static int
1588 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
1589 {
1590 
1591 	return (ENOSYS);
1592 }
1593 
1594 struct devsw zfs_dev = {
1595 	.dv_name = "zfs",
1596 	.dv_type = DEVT_ZFS,
1597 	.dv_init = zfs_dev_init,
1598 	.dv_strategy = zfs_dev_strategy,
1599 	.dv_open = zfs_dev_open,
1600 	.dv_close = zfs_dev_close,
1601 	.dv_ioctl = noioctl,
1602 	.dv_print = zfs_dev_print,
1603 	.dv_cleanup = nullsys,
1604 	.dv_fmtdev = zfs_fmtdev,
1605 	.dv_parsedev = zfs_parsedev,
1606 };
1607 
1608 static int
1609 zfs_parsedev(struct devdesc **idev, const char *devspec, const char **path)
1610 {
1611 	static char	rootname[ZFS_MAXNAMELEN];
1612 	static char	poolname[ZFS_MAXNAMELEN];
1613 	spa_t		*spa;
1614 	const char	*end;
1615 	const char	*np;
1616 	const char	*sep;
1617 	int		rv;
1618 	struct zfs_devdesc *dev;
1619 
1620 	np = devspec + 3;			/* Skip the leading 'zfs' */
1621 	if (*np != ':')
1622 		return (EINVAL);
1623 	np++;
1624 	end = strrchr(np, ':');
1625 	if (end == NULL)
1626 		return (EINVAL);
1627 	sep = strchr(np, '/');
1628 	if (sep == NULL || sep >= end)
1629 		sep = end;
1630 	memcpy(poolname, np, sep - np);
1631 	poolname[sep - np] = '\0';
1632 	if (sep < end) {
1633 		sep++;
1634 		memcpy(rootname, sep, end - sep);
1635 		rootname[end - sep] = '\0';
1636 	}
1637 	else
1638 		rootname[0] = '\0';
1639 
1640 	spa = spa_find_by_name(poolname);
1641 	if (!spa)
1642 		return (ENXIO);
1643 	dev = malloc(sizeof(*dev));
1644 	if (dev == NULL)
1645 		return (ENOMEM);
1646 	dev->pool_guid = spa->spa_guid;
1647 	rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
1648 	if (rv != 0) {
1649 		free(dev);
1650 		return (rv);
1651 	}
1652 	if (path != NULL)
1653 		*path = (*end == '\0') ? end : end + 1;
1654 	dev->dd.d_dev = &zfs_dev;
1655 	*idev = &dev->dd;
1656 	return (0);
1657 }
1658 
1659 char *
1660 zfs_fmtdev(struct devdesc *vdev)
1661 {
1662 	static char		rootname[ZFS_MAXNAMELEN];
1663 	static char		buf[2 * ZFS_MAXNAMELEN + 8];
1664 	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
1665 	spa_t			*spa;
1666 
1667 	buf[0] = '\0';
1668 	if (vdev->d_dev->dv_type != DEVT_ZFS)
1669 		return (buf);
1670 
1671 	/* Do we have any pools? */
1672 	spa = STAILQ_FIRST(&zfs_pools);
1673 	if (spa == NULL)
1674 		return (buf);
1675 
1676 	if (dev->pool_guid == 0)
1677 		dev->pool_guid = spa->spa_guid;
1678 	else
1679 		spa = spa_find_by_guid(dev->pool_guid);
1680 
1681 	if (spa == NULL) {
1682 		printf("ZFS: can't find pool by guid\n");
1683 		return (buf);
1684 	}
1685 	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
1686 		printf("ZFS: can't find root filesystem\n");
1687 		return (buf);
1688 	}
1689 	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
1690 		printf("ZFS: can't find filesystem by guid\n");
1691 		return (buf);
1692 	}
1693 
1694 	if (rootname[0] == '\0')
1695 		snprintf(buf, sizeof(buf), "%s:%s:", dev->dd.d_dev->dv_name,
1696 		    spa->spa_name);
1697 	else
1698 		snprintf(buf, sizeof(buf), "%s:%s/%s:", dev->dd.d_dev->dv_name,
1699 		    spa->spa_name, rootname);
1700 	return (buf);
1701 }
1702 
1703 static int
1704 split_devname(const char *name, char *poolname, size_t size,
1705     const char **dsnamep)
1706 {
1707 	const char *dsname;
1708 	size_t len;
1709 
1710 	ASSERT(name != NULL);
1711 	ASSERT(poolname != NULL);
1712 
1713 	len = strlen(name);
1714 	dsname = strchr(name, '/');
1715 	if (dsname != NULL) {
1716 		len = dsname - name;
1717 		dsname++;
1718 	} else
1719 		dsname = "";
1720 
1721 	if (len + 1 > size)
1722 		return (EINVAL);
1723 
1724 	strlcpy(poolname, name, len + 1);
1725 
1726 	if (dsnamep != NULL)
1727 		*dsnamep = dsname;
1728 
1729 	return (0);
1730 }
1731 
1732 int
1733 zfs_list(const char *name)
1734 {
1735 	static char	poolname[ZFS_MAXNAMELEN];
1736 	uint64_t	objid;
1737 	spa_t		*spa;
1738 	const char	*dsname;
1739 	int		rv;
1740 
1741 	if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1742 		return (EINVAL);
1743 
1744 	spa = spa_find_by_name(poolname);
1745 	if (!spa)
1746 		return (ENXIO);
1747 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1748 	if (rv != 0)
1749 		return (rv);
1750 
1751 	return (zfs_list_dataset(spa, objid));
1752 }
1753 
1754 void
1755 init_zfs_boot_options(const char *currdev_in)
1756 {
1757 	char poolname[ZFS_MAXNAMELEN];
1758 	char *beroot, *currdev;
1759 	spa_t *spa;
1760 	int currdev_len;
1761 	const char *dsname;
1762 
1763 	currdev = NULL;
1764 	currdev_len = strlen(currdev_in);
1765 	if (currdev_len == 0)
1766 		return;
1767 	if (strncmp(currdev_in, "zfs:", 4) != 0)
1768 		return;
1769 	currdev = strdup(currdev_in);
1770 	if (currdev == NULL)
1771 		return;
1772 	/* Remove the trailing : */
1773 	currdev[currdev_len - 1] = '\0';
1774 
1775 	setenv("zfs_be_active", currdev, 1);
1776 	setenv("zfs_be_currpage", "1", 1);
1777 	/* Remove the last element (current bootenv) */
1778 	beroot = strrchr(currdev, '/');
1779 	if (beroot != NULL)
1780 		beroot[0] = '\0';
1781 	beroot = strchr(currdev, ':') + 1;
1782 	setenv("zfs_be_root", beroot, 1);
1783 
1784 	if (split_devname(beroot, poolname, sizeof(poolname), &dsname) != 0)
1785 		return;
1786 
1787 	spa = spa_find_by_name(poolname);
1788 	if (spa == NULL)
1789 		return;
1790 
1791 	zfs_bootenv_initial("bootenvs", spa, beroot, dsname, 0);
1792 	zfs_checkpoints_initial(spa, beroot, dsname);
1793 
1794 	free(currdev);
1795 }
1796 
1797 static void
1798 zfs_checkpoints_initial(spa_t *spa, const char *name, const char *dsname)
1799 {
1800 	char envname[32];
1801 
1802 	if (spa->spa_uberblock_checkpoint.ub_checkpoint_txg != 0) {
1803 		snprintf(envname, sizeof(envname), "zpool_checkpoint");
1804 		setenv(envname, name, 1);
1805 
1806 		spa->spa_uberblock = &spa->spa_uberblock_checkpoint;
1807 		spa->spa_mos = &spa->spa_mos_checkpoint;
1808 
1809 		zfs_bootenv_initial("bootenvs_check", spa, name, dsname, 1);
1810 
1811 		spa->spa_uberblock = &spa->spa_uberblock_master;
1812 		spa->spa_mos = &spa->spa_mos_master;
1813 	}
1814 }
1815 
1816 static void
1817 zfs_bootenv_initial(const char *envprefix, spa_t *spa, const char *rootname,
1818    const char *dsname, int checkpoint)
1819 {
1820 	char		envname[32], envval[256];
1821 	uint64_t	objid;
1822 	int		bootenvs_idx, rv;
1823 
1824 	SLIST_INIT(&zfs_be_head);
1825 	zfs_env_count = 0;
1826 
1827 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1828 	if (rv != 0)
1829 		return;
1830 
1831 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1832 	bootenvs_idx = 0;
1833 	/* Populate the initial environment variables */
1834 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1835 		/* Enumerate all bootenvs for general usage */
1836 		snprintf(envname, sizeof(envname), "%s[%d]",
1837 		    envprefix, bootenvs_idx);
1838 		snprintf(envval, sizeof(envval), "zfs:%s%s/%s",
1839 		    checkpoint ? "!" : "", rootname, zfs_be->name);
1840 		rv = setenv(envname, envval, 1);
1841 		if (rv != 0)
1842 			break;
1843 		bootenvs_idx++;
1844 	}
1845 	snprintf(envname, sizeof(envname), "%s_count", envprefix);
1846 	snprintf(envval, sizeof(envval), "%d", bootenvs_idx);
1847 	setenv(envname, envval, 1);
1848 
1849 	/* Clean up the SLIST of ZFS BEs */
1850 	while (!SLIST_EMPTY(&zfs_be_head)) {
1851 		zfs_be = SLIST_FIRST(&zfs_be_head);
1852 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1853 		free(zfs_be->name);
1854 		free(zfs_be);
1855 	}
1856 }
1857 
1858 int
1859 zfs_bootenv(const char *name)
1860 {
1861 	char		poolname[ZFS_MAXNAMELEN], *root;
1862 	const char	*dsname;
1863 	char		becount[4];
1864 	uint64_t	objid;
1865 	spa_t		*spa;
1866 	int		rv, pages, perpage, currpage;
1867 
1868 	if (name == NULL)
1869 		return (EINVAL);
1870 	if ((root = getenv("zfs_be_root")) == NULL)
1871 		return (EINVAL);
1872 
1873 	if (strcmp(name, root) != 0) {
1874 		if (setenv("zfs_be_root", name, 1) != 0)
1875 			return (ENOMEM);
1876 	}
1877 
1878 	SLIST_INIT(&zfs_be_head);
1879 	zfs_env_count = 0;
1880 
1881 	if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1882 		return (EINVAL);
1883 
1884 	spa = spa_find_by_name(poolname);
1885 	if (!spa)
1886 		return (ENXIO);
1887 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1888 	if (rv != 0)
1889 		return (rv);
1890 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1891 
1892 	/* Calculate and store the number of pages of BEs */
1893 	perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
1894 	pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
1895 	snprintf(becount, 4, "%d", pages);
1896 	if (setenv("zfs_be_pages", becount, 1) != 0)
1897 		return (ENOMEM);
1898 
1899 	/* Roll over the page counter if it has exceeded the maximum */
1900 	currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
1901 	if (currpage > pages) {
1902 		if (setenv("zfs_be_currpage", "1", 1) != 0)
1903 			return (ENOMEM);
1904 	}
1905 
1906 	/* Populate the menu environment variables */
1907 	zfs_set_env();
1908 
1909 	/* Clean up the SLIST of ZFS BEs */
1910 	while (!SLIST_EMPTY(&zfs_be_head)) {
1911 		zfs_be = SLIST_FIRST(&zfs_be_head);
1912 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1913 		free(zfs_be->name);
1914 		free(zfs_be);
1915 	}
1916 
1917 	return (rv);
1918 }
1919 
1920 int
1921 zfs_belist_add(const char *name, uint64_t value __unused)
1922 {
1923 
1924 	/* Skip special datasets that start with a $ character */
1925 	if (strncmp(name, "$", 1) == 0) {
1926 		return (0);
1927 	}
1928 	/* Add the boot environment to the head of the SLIST */
1929 	zfs_be = malloc(sizeof(struct zfs_be_entry));
1930 	if (zfs_be == NULL) {
1931 		return (ENOMEM);
1932 	}
1933 	zfs_be->name = strdup(name);
1934 	if (zfs_be->name == NULL) {
1935 		free(zfs_be);
1936 		return (ENOMEM);
1937 	}
1938 	SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
1939 	zfs_env_count++;
1940 
1941 	return (0);
1942 }
1943 
1944 int
1945 zfs_set_env(void)
1946 {
1947 	char envname[32], envval[256];
1948 	char *beroot, *pagenum;
1949 	int rv, page, ctr;
1950 
1951 	beroot = getenv("zfs_be_root");
1952 	if (beroot == NULL) {
1953 		return (1);
1954 	}
1955 
1956 	pagenum = getenv("zfs_be_currpage");
1957 	if (pagenum != NULL) {
1958 		page = strtol(pagenum, NULL, 10);
1959 	} else {
1960 		page = 1;
1961 	}
1962 
1963 	ctr = 1;
1964 	rv = 0;
1965 	zfs_env_index = ZFS_BE_FIRST;
1966 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1967 		/* Skip to the requested page number */
1968 		if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
1969 			ctr++;
1970 			continue;
1971 		}
1972 
1973 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
1974 		snprintf(envval, sizeof(envval), "%s", zfs_be->name);
1975 		rv = setenv(envname, envval, 1);
1976 		if (rv != 0) {
1977 			break;
1978 		}
1979 
1980 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
1981 		rv = setenv(envname, envval, 1);
1982 		if (rv != 0){
1983 			break;
1984 		}
1985 
1986 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
1987 		rv = setenv(envname, "set_bootenv", 1);
1988 		if (rv != 0){
1989 			break;
1990 		}
1991 
1992 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
1993 		snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
1994 		rv = setenv(envname, envval, 1);
1995 		if (rv != 0){
1996 			break;
1997 		}
1998 
1999 		zfs_env_index++;
2000 		if (zfs_env_index > ZFS_BE_LAST) {
2001 			break;
2002 		}
2003 
2004 	}
2005 
2006 	for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
2007 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
2008 		(void)unsetenv(envname);
2009 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
2010 		(void)unsetenv(envname);
2011 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
2012 		(void)unsetenv(envname);
2013 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
2014 		(void)unsetenv(envname);
2015 	}
2016 
2017 	return (rv);
2018 }
2019