xref: /freebsd/stand/libsa/zfs/zfs.c (revision 148a8da8)
1 /*-
2  * Copyright (c) 2007 Doug Rabson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  *	$FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 /*
33  *	Stand-alone file reading package.
34  */
35 
36 #include <stand.h>
37 #include <sys/disk.h>
38 #include <sys/param.h>
39 #include <sys/time.h>
40 #include <sys/queue.h>
41 #include <disk.h>
42 #include <part.h>
43 #include <stddef.h>
44 #include <stdarg.h>
45 #include <string.h>
46 #include <bootstrap.h>
47 
48 #include "libzfs.h"
49 
50 #include "zfsimpl.c"
51 
52 /* Define the range of indexes to be populated with ZFS Boot Environments */
53 #define		ZFS_BE_FIRST	4
54 #define		ZFS_BE_LAST	8
55 
56 static int	zfs_open(const char *path, struct open_file *f);
57 static int	zfs_close(struct open_file *f);
58 static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
59 static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
60 static int	zfs_stat(struct open_file *f, struct stat *sb);
61 static int	zfs_readdir(struct open_file *f, struct dirent *d);
62 
63 static void	zfs_bootenv_initial(const char *);
64 
65 struct devsw zfs_dev;
66 
67 struct fs_ops zfs_fsops = {
68 	"zfs",
69 	zfs_open,
70 	zfs_close,
71 	zfs_read,
72 	null_write,
73 	zfs_seek,
74 	zfs_stat,
75 	zfs_readdir
76 };
77 
78 /*
79  * In-core open file.
80  */
81 struct file {
82 	off_t		f_seekp;	/* seek pointer */
83 	dnode_phys_t	f_dnode;
84 	uint64_t	f_zap_type;	/* zap type for readdir */
85 	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
86 	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
87 };
88 
89 static int	zfs_env_index;
90 static int	zfs_env_count;
91 
92 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
93 struct zfs_be_list *zfs_be_headp;
94 struct zfs_be_entry {
95 	const char *name;
96 	SLIST_ENTRY(zfs_be_entry) entries;
97 } *zfs_be, *zfs_be_tmp;
98 
99 /*
100  * Open a file.
101  */
102 static int
103 zfs_open(const char *upath, struct open_file *f)
104 {
105 	struct zfsmount *mount = (struct zfsmount *)f->f_devdata;
106 	struct file *fp;
107 	int rc;
108 
109 	if (f->f_dev != &zfs_dev)
110 		return (EINVAL);
111 
112 	/* allocate file system specific data structure */
113 	fp = malloc(sizeof(struct file));
114 	bzero(fp, sizeof(struct file));
115 	f->f_fsdata = (void *)fp;
116 
117 	rc = zfs_lookup(mount, upath, &fp->f_dnode);
118 	fp->f_seekp = 0;
119 	if (rc) {
120 		f->f_fsdata = NULL;
121 		free(fp);
122 	}
123 	return (rc);
124 }
125 
126 static int
127 zfs_close(struct open_file *f)
128 {
129 	struct file *fp = (struct file *)f->f_fsdata;
130 
131 	dnode_cache_obj = NULL;
132 	f->f_fsdata = (void *)0;
133 	if (fp == (struct file *)0)
134 		return (0);
135 
136 	free(fp);
137 	return (0);
138 }
139 
140 /*
141  * Copy a portion of a file into kernel memory.
142  * Cross block boundaries when necessary.
143  */
144 static int
145 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
146 {
147 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
148 	struct file *fp = (struct file *)f->f_fsdata;
149 	struct stat sb;
150 	size_t n;
151 	int rc;
152 
153 	rc = zfs_stat(f, &sb);
154 	if (rc)
155 		return (rc);
156 	n = size;
157 	if (fp->f_seekp + n > sb.st_size)
158 		n = sb.st_size - fp->f_seekp;
159 
160 	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
161 	if (rc)
162 		return (rc);
163 
164 	if (0) {
165 	    int i;
166 	    for (i = 0; i < n; i++)
167 		putchar(((char*) start)[i]);
168 	}
169 	fp->f_seekp += n;
170 	if (resid)
171 		*resid = size - n;
172 
173 	return (0);
174 }
175 
176 static off_t
177 zfs_seek(struct open_file *f, off_t offset, int where)
178 {
179 	struct file *fp = (struct file *)f->f_fsdata;
180 
181 	switch (where) {
182 	case SEEK_SET:
183 		fp->f_seekp = offset;
184 		break;
185 	case SEEK_CUR:
186 		fp->f_seekp += offset;
187 		break;
188 	case SEEK_END:
189 	    {
190 		struct stat sb;
191 		int error;
192 
193 		error = zfs_stat(f, &sb);
194 		if (error != 0) {
195 			errno = error;
196 			return (-1);
197 		}
198 		fp->f_seekp = sb.st_size - offset;
199 		break;
200 	    }
201 	default:
202 		errno = EINVAL;
203 		return (-1);
204 	}
205 	return (fp->f_seekp);
206 }
207 
208 static int
209 zfs_stat(struct open_file *f, struct stat *sb)
210 {
211 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
212 	struct file *fp = (struct file *)f->f_fsdata;
213 
214 	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
215 }
216 
217 static int
218 zfs_readdir(struct open_file *f, struct dirent *d)
219 {
220 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
221 	struct file *fp = (struct file *)f->f_fsdata;
222 	mzap_ent_phys_t mze;
223 	struct stat sb;
224 	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
225 	int rc;
226 
227 	rc = zfs_stat(f, &sb);
228 	if (rc)
229 		return (rc);
230 	if (!S_ISDIR(sb.st_mode))
231 		return (ENOTDIR);
232 
233 	/*
234 	 * If this is the first read, get the zap type.
235 	 */
236 	if (fp->f_seekp == 0) {
237 		rc = dnode_read(spa, &fp->f_dnode,
238 				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
239 		if (rc)
240 			return (rc);
241 
242 		if (fp->f_zap_type == ZBT_MICRO) {
243 			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
244 		} else {
245 			rc = dnode_read(spa, &fp->f_dnode,
246 					offsetof(zap_phys_t, zap_num_leafs),
247 					&fp->f_num_leafs,
248 					sizeof(fp->f_num_leafs));
249 			if (rc)
250 				return (rc);
251 
252 			fp->f_seekp = bsize;
253 			fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize);
254 			rc = dnode_read(spa, &fp->f_dnode,
255 					fp->f_seekp,
256 					fp->f_zap_leaf,
257 					bsize);
258 			if (rc)
259 				return (rc);
260 		}
261 	}
262 
263 	if (fp->f_zap_type == ZBT_MICRO) {
264 	mzap_next:
265 		if (fp->f_seekp >= bsize)
266 			return (ENOENT);
267 
268 		rc = dnode_read(spa, &fp->f_dnode,
269 				fp->f_seekp, &mze, sizeof(mze));
270 		if (rc)
271 			return (rc);
272 		fp->f_seekp += sizeof(mze);
273 
274 		if (!mze.mze_name[0])
275 			goto mzap_next;
276 
277 		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
278 		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
279 		strcpy(d->d_name, mze.mze_name);
280 		d->d_namlen = strlen(d->d_name);
281 		return (0);
282 	} else {
283 		zap_leaf_t zl;
284 		zap_leaf_chunk_t *zc, *nc;
285 		int chunk;
286 		size_t namelen;
287 		char *p;
288 		uint64_t value;
289 
290 		/*
291 		 * Initialise this so we can use the ZAP size
292 		 * calculating macros.
293 		 */
294 		zl.l_bs = ilog2(bsize);
295 		zl.l_phys = fp->f_zap_leaf;
296 
297 		/*
298 		 * Figure out which chunk we are currently looking at
299 		 * and consider seeking to the next leaf. We use the
300 		 * low bits of f_seekp as a simple chunk index.
301 		 */
302 	fzap_next:
303 		chunk = fp->f_seekp & (bsize - 1);
304 		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
305 			fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize;
306 			chunk = 0;
307 
308 			/*
309 			 * Check for EOF and read the new leaf.
310 			 */
311 			if (fp->f_seekp >= bsize * fp->f_num_leafs)
312 				return (ENOENT);
313 
314 			rc = dnode_read(spa, &fp->f_dnode,
315 					fp->f_seekp,
316 					fp->f_zap_leaf,
317 					bsize);
318 			if (rc)
319 				return (rc);
320 		}
321 
322 		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
323 		fp->f_seekp++;
324 		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
325 			goto fzap_next;
326 
327 		namelen = zc->l_entry.le_name_numints;
328 		if (namelen > sizeof(d->d_name))
329 			namelen = sizeof(d->d_name);
330 
331 		/*
332 		 * Paste the name back together.
333 		 */
334 		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
335 		p = d->d_name;
336 		while (namelen > 0) {
337 			int len;
338 			len = namelen;
339 			if (len > ZAP_LEAF_ARRAY_BYTES)
340 				len = ZAP_LEAF_ARRAY_BYTES;
341 			memcpy(p, nc->l_array.la_array, len);
342 			p += len;
343 			namelen -= len;
344 			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
345 		}
346 		d->d_name[sizeof(d->d_name) - 1] = 0;
347 
348 		/*
349 		 * Assume the first eight bytes of the value are
350 		 * a uint64_t.
351 		 */
352 		value = fzap_leaf_value(&zl, zc);
353 
354 		d->d_fileno = ZFS_DIRENT_OBJ(value);
355 		d->d_type = ZFS_DIRENT_TYPE(value);
356 		d->d_namlen = strlen(d->d_name);
357 
358 		return (0);
359 	}
360 }
361 
362 static int
363 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes)
364 {
365 	int fd, ret;
366 	size_t res, head, tail, total_size, full_sec_size;
367 	unsigned secsz, do_tail_read;
368 	off_t start_sec;
369 	char *outbuf, *bouncebuf;
370 
371 	fd = (uintptr_t) priv;
372 	outbuf = (char *) buf;
373 	bouncebuf = NULL;
374 
375 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
376 	if (ret != 0)
377 		return (ret);
378 
379 	/*
380 	 * Handling reads of arbitrary offset and size - multi-sector case
381 	 * and single-sector case.
382 	 *
383 	 *                        Multi-sector Case
384 	 *                (do_tail_read = true if tail > 0)
385 	 *
386 	 *   |<----------------------total_size--------------------->|
387 	 *   |                                                       |
388 	 *   |<--head-->|<--------------bytes------------>|<--tail-->|
389 	 *   |          |                                 |          |
390 	 *   |          |       |<~full_sec_size~>|       |          |
391 	 *   +------------------+                 +------------------+
392 	 *   |          |0101010|     .  .  .     |0101011|          |
393 	 *   +------------------+                 +------------------+
394 	 *         start_sec                         start_sec + n
395 	 *
396 	 *
397 	 *                      Single-sector Case
398 	 *                    (do_tail_read = false)
399 	 *
400 	 *              |<------total_size = secsz----->|
401 	 *              |                               |
402 	 *              |<-head->|<---bytes--->|<-tail->|
403 	 *              +-------------------------------+
404 	 *              |        |0101010101010|        |
405 	 *              +-------------------------------+
406 	 *                          start_sec
407 	 */
408 	start_sec = offset / secsz;
409 	head = offset % secsz;
410 	total_size = roundup2(head + bytes, secsz);
411 	tail = total_size - (head + bytes);
412 	do_tail_read = ((tail > 0) && (head + bytes > secsz));
413 	full_sec_size = total_size;
414 	if (head > 0)
415 		full_sec_size -= secsz;
416 	if (do_tail_read)
417 		full_sec_size -= secsz;
418 
419 	/* Return of partial sector data requires a bounce buffer. */
420 	if ((head > 0) || do_tail_read) {
421 		bouncebuf = zfs_alloc(secsz);
422 		if (bouncebuf == NULL) {
423 			printf("vdev_read: out of memory\n");
424 			return (ENOMEM);
425 		}
426 	}
427 
428 	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1)
429 		return (errno);
430 
431 	/* Partial data return from first sector */
432 	if (head > 0) {
433 		res = read(fd, bouncebuf, secsz);
434 		if (res != secsz) {
435 			ret = EIO;
436 			goto error;
437 		}
438 		memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes));
439 		outbuf += min(secsz - head, bytes);
440 	}
441 
442 	/* Full data return from read sectors */
443 	if (full_sec_size > 0) {
444 		res = read(fd, outbuf, full_sec_size);
445 		if (res != full_sec_size) {
446 			ret = EIO;
447 			goto error;
448 		}
449 		outbuf += full_sec_size;
450 	}
451 
452 	/* Partial data return from last sector */
453 	if (do_tail_read) {
454 		res = read(fd, bouncebuf, secsz);
455 		if (res != secsz) {
456 			ret = EIO;
457 			goto error;
458 		}
459 		memcpy(outbuf, bouncebuf, secsz - tail);
460 	}
461 
462 	ret = 0;
463 error:
464 	if (bouncebuf != NULL)
465 		zfs_free(bouncebuf, secsz);
466 	return (ret);
467 }
468 
469 static int
470 zfs_dev_init(void)
471 {
472 	spa_t *spa;
473 	spa_t *next;
474 	spa_t *prev;
475 
476 	zfs_init();
477 	if (archsw.arch_zfs_probe == NULL)
478 		return (ENXIO);
479 	archsw.arch_zfs_probe();
480 
481 	prev = NULL;
482 	spa = STAILQ_FIRST(&zfs_pools);
483 	while (spa != NULL) {
484 		next = STAILQ_NEXT(spa, spa_link);
485 		if (zfs_spa_init(spa)) {
486 			if (prev == NULL)
487 				STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
488 			else
489 				STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
490 		} else
491 			prev = spa;
492 		spa = next;
493 	}
494 	return (0);
495 }
496 
497 struct zfs_probe_args {
498 	int		fd;
499 	const char	*devname;
500 	uint64_t	*pool_guid;
501 	u_int		secsz;
502 };
503 
504 static int
505 zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset)
506 {
507 	struct zfs_probe_args *ppa;
508 
509 	ppa = (struct zfs_probe_args *)arg;
510 	return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
511 	    offset * ppa->secsz, buf, blocks * ppa->secsz));
512 }
513 
514 static int
515 zfs_probe(int fd, uint64_t *pool_guid)
516 {
517 	spa_t *spa;
518 	int ret;
519 
520 	spa = NULL;
521 	ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa);
522 	if (ret == 0 && pool_guid != NULL)
523 		*pool_guid = spa->spa_guid;
524 	return (ret);
525 }
526 
527 static int
528 zfs_probe_partition(void *arg, const char *partname,
529     const struct ptable_entry *part)
530 {
531 	struct zfs_probe_args *ppa, pa;
532 	struct ptable *table;
533 	char devname[32];
534 	int ret;
535 
536 	/* Probe only freebsd-zfs and freebsd partitions */
537 	if (part->type != PART_FREEBSD &&
538 	    part->type != PART_FREEBSD_ZFS)
539 		return (0);
540 
541 	ppa = (struct zfs_probe_args *)arg;
542 	strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
543 	devname[strlen(ppa->devname) - 1] = '\0';
544 	sprintf(devname, "%s%s:", devname, partname);
545 	pa.fd = open(devname, O_RDONLY);
546 	if (pa.fd == -1)
547 		return (0);
548 	ret = zfs_probe(pa.fd, ppa->pool_guid);
549 	if (ret == 0)
550 		return (0);
551 	/* Do we have BSD label here? */
552 	if (part->type == PART_FREEBSD) {
553 		pa.devname = devname;
554 		pa.pool_guid = ppa->pool_guid;
555 		pa.secsz = ppa->secsz;
556 		table = ptable_open(&pa, part->end - part->start + 1,
557 		    ppa->secsz, zfs_diskread);
558 		if (table != NULL) {
559 			ptable_iterate(table, &pa, zfs_probe_partition);
560 			ptable_close(table);
561 		}
562 	}
563 	close(pa.fd);
564 	return (0);
565 }
566 
567 int
568 zfs_probe_dev(const char *devname, uint64_t *pool_guid)
569 {
570 	struct disk_devdesc *dev;
571 	struct ptable *table;
572 	struct zfs_probe_args pa;
573 	uint64_t mediasz;
574 	int ret;
575 
576 	if (pool_guid)
577 		*pool_guid = 0;
578 	pa.fd = open(devname, O_RDONLY);
579 	if (pa.fd == -1)
580 		return (ENXIO);
581 	/*
582 	 * We will not probe the whole disk, we can not boot from such
583 	 * disks and some systems will misreport the disk sizes and will
584 	 * hang while accessing the disk.
585 	 */
586 	if (archsw.arch_getdev((void **)&dev, devname, NULL) == 0) {
587 		int partition = dev->d_partition;
588 		int slice = dev->d_slice;
589 
590 		free(dev);
591 		if (partition != D_PARTNONE && slice != D_SLICENONE) {
592 			ret = zfs_probe(pa.fd, pool_guid);
593 			if (ret == 0)
594 				return (0);
595 		}
596 	}
597 
598 	/* Probe each partition */
599 	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
600 	if (ret == 0)
601 		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
602 	if (ret == 0) {
603 		pa.devname = devname;
604 		pa.pool_guid = pool_guid;
605 		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
606 		    zfs_diskread);
607 		if (table != NULL) {
608 			ptable_iterate(table, &pa, zfs_probe_partition);
609 			ptable_close(table);
610 		}
611 	}
612 	close(pa.fd);
613 	if (pool_guid && *pool_guid == 0)
614 		ret = ENXIO;
615 	return (ret);
616 }
617 
618 /*
619  * Print information about ZFS pools
620  */
621 static int
622 zfs_dev_print(int verbose)
623 {
624 	spa_t *spa;
625 	char line[80];
626 	int ret = 0;
627 
628 	if (STAILQ_EMPTY(&zfs_pools))
629 		return (0);
630 
631 	printf("%s devices:", zfs_dev.dv_name);
632 	if ((ret = pager_output("\n")) != 0)
633 		return (ret);
634 
635 	if (verbose) {
636 		return (spa_all_status());
637 	}
638 	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
639 		snprintf(line, sizeof(line), "    zfs:%s\n", spa->spa_name);
640 		ret = pager_output(line);
641 		if (ret != 0)
642 			break;
643 	}
644 	return (ret);
645 }
646 
647 /*
648  * Attempt to open the pool described by (dev) for use by (f).
649  */
650 static int
651 zfs_dev_open(struct open_file *f, ...)
652 {
653 	va_list		args;
654 	struct zfs_devdesc	*dev;
655 	struct zfsmount	*mount;
656 	spa_t		*spa;
657 	int		rv;
658 
659 	va_start(args, f);
660 	dev = va_arg(args, struct zfs_devdesc *);
661 	va_end(args);
662 
663 	if (dev->pool_guid == 0)
664 		spa = STAILQ_FIRST(&zfs_pools);
665 	else
666 		spa = spa_find_by_guid(dev->pool_guid);
667 	if (!spa)
668 		return (ENXIO);
669 	mount = malloc(sizeof(*mount));
670 	rv = zfs_mount(spa, dev->root_guid, mount);
671 	if (rv != 0) {
672 		free(mount);
673 		return (rv);
674 	}
675 	if (mount->objset.os_type != DMU_OST_ZFS) {
676 		printf("Unexpected object set type %ju\n",
677 		    (uintmax_t)mount->objset.os_type);
678 		free(mount);
679 		return (EIO);
680 	}
681 	f->f_devdata = mount;
682 	free(dev);
683 	return (0);
684 }
685 
686 static int
687 zfs_dev_close(struct open_file *f)
688 {
689 
690 	free(f->f_devdata);
691 	f->f_devdata = NULL;
692 	return (0);
693 }
694 
695 static int
696 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
697 {
698 
699 	return (ENOSYS);
700 }
701 
702 struct devsw zfs_dev = {
703 	.dv_name = "zfs",
704 	.dv_type = DEVT_ZFS,
705 	.dv_init = zfs_dev_init,
706 	.dv_strategy = zfs_dev_strategy,
707 	.dv_open = zfs_dev_open,
708 	.dv_close = zfs_dev_close,
709 	.dv_ioctl = noioctl,
710 	.dv_print = zfs_dev_print,
711 	.dv_cleanup = NULL
712 };
713 
714 int
715 zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path)
716 {
717 	static char	rootname[ZFS_MAXNAMELEN];
718 	static char	poolname[ZFS_MAXNAMELEN];
719 	spa_t		*spa;
720 	const char	*end;
721 	const char	*np;
722 	const char	*sep;
723 	int		rv;
724 
725 	np = devspec;
726 	if (*np != ':')
727 		return (EINVAL);
728 	np++;
729 	end = strrchr(np, ':');
730 	if (end == NULL)
731 		return (EINVAL);
732 	sep = strchr(np, '/');
733 	if (sep == NULL || sep >= end)
734 		sep = end;
735 	memcpy(poolname, np, sep - np);
736 	poolname[sep - np] = '\0';
737 	if (sep < end) {
738 		sep++;
739 		memcpy(rootname, sep, end - sep);
740 		rootname[end - sep] = '\0';
741 	}
742 	else
743 		rootname[0] = '\0';
744 
745 	spa = spa_find_by_name(poolname);
746 	if (!spa)
747 		return (ENXIO);
748 	dev->pool_guid = spa->spa_guid;
749 	rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
750 	if (rv != 0)
751 		return (rv);
752 	if (path != NULL)
753 		*path = (*end == '\0') ? end : end + 1;
754 	dev->dd.d_dev = &zfs_dev;
755 	return (0);
756 }
757 
758 char *
759 zfs_fmtdev(void *vdev)
760 {
761 	static char		rootname[ZFS_MAXNAMELEN];
762 	static char		buf[2 * ZFS_MAXNAMELEN + 8];
763 	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
764 	spa_t			*spa;
765 
766 	buf[0] = '\0';
767 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
768 		return (buf);
769 
770 	if (dev->pool_guid == 0) {
771 		spa = STAILQ_FIRST(&zfs_pools);
772 		dev->pool_guid = spa->spa_guid;
773 	} else
774 		spa = spa_find_by_guid(dev->pool_guid);
775 	if (spa == NULL) {
776 		printf("ZFS: can't find pool by guid\n");
777 		return (buf);
778 	}
779 	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
780 		printf("ZFS: can't find root filesystem\n");
781 		return (buf);
782 	}
783 	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
784 		printf("ZFS: can't find filesystem by guid\n");
785 		return (buf);
786 	}
787 
788 	if (rootname[0] == '\0')
789 		sprintf(buf, "%s:%s:", dev->dd.d_dev->dv_name, spa->spa_name);
790 	else
791 		sprintf(buf, "%s:%s/%s:", dev->dd.d_dev->dv_name, spa->spa_name,
792 		    rootname);
793 	return (buf);
794 }
795 
796 int
797 zfs_list(const char *name)
798 {
799 	static char	poolname[ZFS_MAXNAMELEN];
800 	uint64_t	objid;
801 	spa_t		*spa;
802 	const char	*dsname;
803 	int		len;
804 	int		rv;
805 
806 	len = strlen(name);
807 	dsname = strchr(name, '/');
808 	if (dsname != NULL) {
809 		len = dsname - name;
810 		dsname++;
811 	} else
812 		dsname = "";
813 	memcpy(poolname, name, len);
814 	poolname[len] = '\0';
815 
816 	spa = spa_find_by_name(poolname);
817 	if (!spa)
818 		return (ENXIO);
819 	rv = zfs_lookup_dataset(spa, dsname, &objid);
820 	if (rv != 0)
821 		return (rv);
822 
823 	return (zfs_list_dataset(spa, objid));
824 }
825 
826 void
827 init_zfs_bootenv(const char *currdev_in)
828 {
829 	char *beroot, *currdev;
830 	int currdev_len;
831 
832 	currdev = NULL;
833 	currdev_len = strlen(currdev_in);
834 	if (currdev_len == 0)
835 		return;
836 	if (strncmp(currdev_in, "zfs:", 4) != 0)
837 		return;
838 	currdev = strdup(currdev_in);
839 	if (currdev == NULL)
840 		return;
841 	/* Remove the trailing : */
842 	currdev[currdev_len - 1] = '\0';
843 	setenv("zfs_be_active", currdev, 1);
844 	setenv("zfs_be_currpage", "1", 1);
845 	/* Remove the last element (current bootenv) */
846 	beroot = strrchr(currdev, '/');
847 	if (beroot != NULL)
848 		beroot[0] = '\0';
849 	beroot = strchr(currdev, ':') + 1;
850 	setenv("zfs_be_root", beroot, 1);
851 	zfs_bootenv_initial(beroot);
852 	free(currdev);
853 }
854 
855 static void
856 zfs_bootenv_initial(const char *name)
857 {
858 	char		poolname[ZFS_MAXNAMELEN], *dsname;
859 	char envname[32], envval[256];
860 	uint64_t	objid;
861 	spa_t		*spa;
862 	int		bootenvs_idx, len, rv;
863 
864 	SLIST_INIT(&zfs_be_head);
865 	zfs_env_count = 0;
866 	len = strlen(name);
867 	dsname = strchr(name, '/');
868 	if (dsname != NULL) {
869 		len = dsname - name;
870 		dsname++;
871 	} else
872 		dsname = "";
873 	strlcpy(poolname, name, len + 1);
874 	spa = spa_find_by_name(poolname);
875 	if (spa == NULL)
876 		return;
877 	rv = zfs_lookup_dataset(spa, dsname, &objid);
878 	if (rv != 0)
879 		return;
880 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
881 	bootenvs_idx = 0;
882 	/* Populate the initial environment variables */
883 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
884 		/* Enumerate all bootenvs for general usage */
885 		snprintf(envname, sizeof(envname), "bootenvs[%d]", bootenvs_idx);
886 		snprintf(envval, sizeof(envval), "zfs:%s/%s", name, zfs_be->name);
887 		rv = setenv(envname, envval, 1);
888 		if (rv != 0)
889 			break;
890 		bootenvs_idx++;
891 	}
892 	snprintf(envval, sizeof(envval), "%d", bootenvs_idx);
893 	setenv("bootenvs_count", envval, 1);
894 
895 	/* Clean up the SLIST of ZFS BEs */
896 	while (!SLIST_EMPTY(&zfs_be_head)) {
897 		zfs_be = SLIST_FIRST(&zfs_be_head);
898 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
899 		free(zfs_be);
900 	}
901 
902 	return;
903 
904 }
905 
906 int
907 zfs_bootenv(const char *name)
908 {
909 	static char	poolname[ZFS_MAXNAMELEN], *dsname, *root;
910 	char		becount[4];
911 	uint64_t	objid;
912 	spa_t		*spa;
913 	int		len, rv, pages, perpage, currpage;
914 
915 	if (name == NULL)
916 		return (EINVAL);
917 	if ((root = getenv("zfs_be_root")) == NULL)
918 		return (EINVAL);
919 
920 	if (strcmp(name, root) != 0) {
921 		if (setenv("zfs_be_root", name, 1) != 0)
922 			return (ENOMEM);
923 	}
924 
925 	SLIST_INIT(&zfs_be_head);
926 	zfs_env_count = 0;
927 	len = strlen(name);
928 	dsname = strchr(name, '/');
929 	if (dsname != NULL) {
930 		len = dsname - name;
931 		dsname++;
932 	} else
933 		dsname = "";
934 	memcpy(poolname, name, len);
935 	poolname[len] = '\0';
936 
937 	spa = spa_find_by_name(poolname);
938 	if (!spa)
939 		return (ENXIO);
940 	rv = zfs_lookup_dataset(spa, dsname, &objid);
941 	if (rv != 0)
942 		return (rv);
943 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
944 
945 	/* Calculate and store the number of pages of BEs */
946 	perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
947 	pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
948 	snprintf(becount, 4, "%d", pages);
949 	if (setenv("zfs_be_pages", becount, 1) != 0)
950 		return (ENOMEM);
951 
952 	/* Roll over the page counter if it has exceeded the maximum */
953 	currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
954 	if (currpage > pages) {
955 		if (setenv("zfs_be_currpage", "1", 1) != 0)
956 			return (ENOMEM);
957 	}
958 
959 	/* Populate the menu environment variables */
960 	zfs_set_env();
961 
962 	/* Clean up the SLIST of ZFS BEs */
963 	while (!SLIST_EMPTY(&zfs_be_head)) {
964 		zfs_be = SLIST_FIRST(&zfs_be_head);
965 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
966 		free(zfs_be);
967 	}
968 
969 	return (rv);
970 }
971 
972 int
973 zfs_belist_add(const char *name, uint64_t value __unused)
974 {
975 
976 	/* Skip special datasets that start with a $ character */
977 	if (strncmp(name, "$", 1) == 0) {
978 		return (0);
979 	}
980 	/* Add the boot environment to the head of the SLIST */
981 	zfs_be = malloc(sizeof(struct zfs_be_entry));
982 	if (zfs_be == NULL) {
983 		return (ENOMEM);
984 	}
985 	zfs_be->name = name;
986 	SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
987 	zfs_env_count++;
988 
989 	return (0);
990 }
991 
992 int
993 zfs_set_env(void)
994 {
995 	char envname[32], envval[256];
996 	char *beroot, *pagenum;
997 	int rv, page, ctr;
998 
999 	beroot = getenv("zfs_be_root");
1000 	if (beroot == NULL) {
1001 		return (1);
1002 	}
1003 
1004 	pagenum = getenv("zfs_be_currpage");
1005 	if (pagenum != NULL) {
1006 		page = strtol(pagenum, NULL, 10);
1007 	} else {
1008 		page = 1;
1009 	}
1010 
1011 	ctr = 1;
1012 	rv = 0;
1013 	zfs_env_index = ZFS_BE_FIRST;
1014 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1015 		/* Skip to the requested page number */
1016 		if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
1017 			ctr++;
1018 			continue;
1019 		}
1020 
1021 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
1022 		snprintf(envval, sizeof(envval), "%s", zfs_be->name);
1023 		rv = setenv(envname, envval, 1);
1024 		if (rv != 0) {
1025 			break;
1026 		}
1027 
1028 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
1029 		rv = setenv(envname, envval, 1);
1030 		if (rv != 0){
1031 			break;
1032 		}
1033 
1034 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
1035 		rv = setenv(envname, "set_bootenv", 1);
1036 		if (rv != 0){
1037 			break;
1038 		}
1039 
1040 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
1041 		snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
1042 		rv = setenv(envname, envval, 1);
1043 		if (rv != 0){
1044 			break;
1045 		}
1046 
1047 		zfs_env_index++;
1048 		if (zfs_env_index > ZFS_BE_LAST) {
1049 			break;
1050 		}
1051 
1052 	}
1053 
1054 	for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
1055 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
1056 		(void)unsetenv(envname);
1057 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
1058 		(void)unsetenv(envname);
1059 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
1060 		(void)unsetenv(envname);
1061 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
1062 		(void)unsetenv(envname);
1063 	}
1064 
1065 	return (rv);
1066 }
1067