1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
25  * Copyright 2017 RackTop Systems.
26  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
27  * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
28  * Copyright (c) 2019 Datto Inc.
29  */
30 
31 /*
32  * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
33  * It has the following characteristics:
34  *
35  *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
36  *  threads.  This is accomplished primarily by avoiding global data
37  *  (e.g. caching).  Since it's thread-safe, there is no reason for a
38  *  process to have multiple libzfs "instances".  Therefore, we store
39  *  our few pieces of data (e.g. the file descriptor) in global
40  *  variables.  The fd is reference-counted so that the libzfs_core
41  *  library can be "initialized" multiple times (e.g. by different
42  *  consumers within the same process).
43  *
44  *  - Committed Interface.  The libzfs_core interface will be committed,
45  *  therefore consumers can compile against it and be confident that
46  *  their code will continue to work on future releases of this code.
47  *  Currently, the interface is Evolving (not Committed), but we intend
48  *  to commit to it once it is more complete and we determine that it
49  *  meets the needs of all consumers.
50  *
51  *  - Programmatic Error Handling.  libzfs_core communicates errors with
52  *  defined error numbers, and doesn't print anything to stdout/stderr.
53  *
54  *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
55  *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
56  *  between libzfs_core functions and ioctls to ZFS_DEV.
57  *
58  *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
59  *  with kernel ioctls, and kernel ioctls are general atomic, each
60  *  libzfs_core function is atomic.  For example, creating multiple
61  *  snapshots with a single call to lzc_snapshot() is atomic -- it
62  *  can't fail with only some of the requested snapshots created, even
63  *  in the event of power loss or system crash.
64  *
65  *  - Continued libzfs Support.  Some higher-level operations (e.g.
66  *  support for "zfs send -R") are too complicated to fit the scope of
67  *  libzfs_core.  This functionality will continue to live in libzfs.
68  *  Where appropriate, libzfs will use the underlying atomic operations
69  *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
70  *  zfs receive" by using individual "send one snapshot", rename,
71  *  destroy, and "receive one snapshot" operations in libzfs_core.
72  *  /sbin/zfs and /sbin/zpool will link with both libzfs and
73  *  libzfs_core.  Other consumers should aim to use only libzfs_core,
74  *  since that will be the supported, stable interface going forwards.
75  */
76 
77 #include <libzfs_core.h>
78 #include <ctype.h>
79 #include <unistd.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #ifdef ZFS_DEBUG
83 #include <stdio.h>
84 #endif
85 #include <errno.h>
86 #include <fcntl.h>
87 #include <pthread.h>
88 #include <libzutil.h>
89 #include <sys/nvpair.h>
90 #include <sys/param.h>
91 #include <sys/types.h>
92 #include <sys/stat.h>
93 #include <sys/zfs_ioctl.h>
94 #if __FreeBSD__
95 #define	BIG_PIPE_SIZE (64 * 1024) /* From sys/pipe.h */
96 #endif
97 
98 static int g_fd = -1;
99 static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
100 static int g_refcount;
101 
102 #ifdef ZFS_DEBUG
103 static zfs_ioc_t fail_ioc_cmd = ZFS_IOC_LAST;
104 static zfs_errno_t fail_ioc_err;
105 
106 static void
107 libzfs_core_debug_ioc(void)
108 {
109 	/*
110 	 * To test running newer user space binaries with kernel's
111 	 * that don't yet support an ioctl or a new ioctl arg we
112 	 * provide an override to intentionally fail an ioctl.
113 	 *
114 	 * USAGE:
115 	 * The override variable, ZFS_IOC_TEST, is of the form "cmd:err"
116 	 *
117 	 * For example, to fail a ZFS_IOC_POOL_CHECKPOINT with a
118 	 * ZFS_ERR_IOC_CMD_UNAVAIL, the string would be "0x5a4d:1029"
119 	 *
120 	 * $ sudo sh -c "ZFS_IOC_TEST=0x5a4d:1029 zpool checkpoint tank"
121 	 * cannot checkpoint 'tank': the loaded zfs module does not support
122 	 * this operation. A reboot may be required to enable this operation.
123 	 */
124 	if (fail_ioc_cmd == ZFS_IOC_LAST) {
125 		char *ioc_test = getenv("ZFS_IOC_TEST");
126 		unsigned int ioc_num = 0, ioc_err = 0;
127 
128 		if (ioc_test != NULL &&
129 		    sscanf(ioc_test, "%i:%i", &ioc_num, &ioc_err) == 2 &&
130 		    ioc_num < ZFS_IOC_LAST)  {
131 			fail_ioc_cmd = ioc_num;
132 			fail_ioc_err = ioc_err;
133 		}
134 	}
135 }
136 #endif
137 
138 int
139 libzfs_core_init(void)
140 {
141 	(void) pthread_mutex_lock(&g_lock);
142 	if (g_refcount == 0) {
143 		g_fd = open(ZFS_DEV, O_RDWR|O_CLOEXEC);
144 		if (g_fd < 0) {
145 			(void) pthread_mutex_unlock(&g_lock);
146 			return (errno);
147 		}
148 	}
149 	g_refcount++;
150 
151 #ifdef ZFS_DEBUG
152 	libzfs_core_debug_ioc();
153 #endif
154 	(void) pthread_mutex_unlock(&g_lock);
155 	return (0);
156 }
157 
158 void
159 libzfs_core_fini(void)
160 {
161 	(void) pthread_mutex_lock(&g_lock);
162 	ASSERT3S(g_refcount, >, 0);
163 
164 	g_refcount--;
165 
166 	if (g_refcount == 0 && g_fd != -1) {
167 		(void) close(g_fd);
168 		g_fd = -1;
169 	}
170 	(void) pthread_mutex_unlock(&g_lock);
171 }
172 
173 static int
174 lzc_ioctl(zfs_ioc_t ioc, const char *name,
175     nvlist_t *source, nvlist_t **resultp)
176 {
177 	zfs_cmd_t zc = {"\0"};
178 	int error = 0;
179 	char *packed = NULL;
180 	size_t size = 0;
181 
182 	ASSERT3S(g_refcount, >, 0);
183 	VERIFY3S(g_fd, !=, -1);
184 
185 #ifdef ZFS_DEBUG
186 	if (ioc == fail_ioc_cmd)
187 		return (fail_ioc_err);
188 #endif
189 
190 	if (name != NULL)
191 		(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
192 
193 	if (source != NULL) {
194 		packed = fnvlist_pack(source, &size);
195 		zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
196 		zc.zc_nvlist_src_size = size;
197 	}
198 
199 	if (resultp != NULL) {
200 		*resultp = NULL;
201 		if (ioc == ZFS_IOC_CHANNEL_PROGRAM) {
202 			zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source,
203 			    ZCP_ARG_MEMLIMIT);
204 		} else {
205 			zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
206 		}
207 		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
208 		    malloc(zc.zc_nvlist_dst_size);
209 		if (zc.zc_nvlist_dst == (uint64_t)0) {
210 			error = ENOMEM;
211 			goto out;
212 		}
213 	}
214 
215 	while (lzc_ioctl_fd(g_fd, ioc, &zc) != 0) {
216 		/*
217 		 * If ioctl exited with ENOMEM, we retry the ioctl after
218 		 * increasing the size of the destination nvlist.
219 		 *
220 		 * Channel programs that exit with ENOMEM ran over the
221 		 * lua memory sandbox; they should not be retried.
222 		 */
223 		if (errno == ENOMEM && resultp != NULL &&
224 		    ioc != ZFS_IOC_CHANNEL_PROGRAM) {
225 			free((void *)(uintptr_t)zc.zc_nvlist_dst);
226 			zc.zc_nvlist_dst_size *= 2;
227 			zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
228 			    malloc(zc.zc_nvlist_dst_size);
229 			if (zc.zc_nvlist_dst == (uint64_t)0) {
230 				error = ENOMEM;
231 				goto out;
232 			}
233 		} else {
234 			error = errno;
235 			break;
236 		}
237 	}
238 	if (zc.zc_nvlist_dst_filled) {
239 		*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
240 		    zc.zc_nvlist_dst_size);
241 	}
242 
243 out:
244 	if (packed != NULL)
245 		fnvlist_pack_free(packed, size);
246 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
247 	return (error);
248 }
249 
250 int
251 lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props,
252     uint8_t *wkeydata, uint_t wkeylen)
253 {
254 	int error;
255 	nvlist_t *hidden_args = NULL;
256 	nvlist_t *args = fnvlist_alloc();
257 
258 	fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
259 	if (props != NULL)
260 		fnvlist_add_nvlist(args, "props", props);
261 
262 	if (wkeydata != NULL) {
263 		hidden_args = fnvlist_alloc();
264 		fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
265 		    wkeylen);
266 		fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args);
267 	}
268 
269 	error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
270 	nvlist_free(hidden_args);
271 	nvlist_free(args);
272 	return (error);
273 }
274 
275 int
276 lzc_clone(const char *fsname, const char *origin, nvlist_t *props)
277 {
278 	int error;
279 	nvlist_t *hidden_args = NULL;
280 	nvlist_t *args = fnvlist_alloc();
281 
282 	fnvlist_add_string(args, "origin", origin);
283 	if (props != NULL)
284 		fnvlist_add_nvlist(args, "props", props);
285 	error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
286 	nvlist_free(hidden_args);
287 	nvlist_free(args);
288 	return (error);
289 }
290 
291 int
292 lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
293 {
294 	/*
295 	 * The promote ioctl is still legacy, so we need to construct our
296 	 * own zfs_cmd_t rather than using lzc_ioctl().
297 	 */
298 	zfs_cmd_t zc = {"\0"};
299 
300 	ASSERT3S(g_refcount, >, 0);
301 	VERIFY3S(g_fd, !=, -1);
302 
303 	(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
304 	if (lzc_ioctl_fd(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
305 		int error = errno;
306 		if (error == EEXIST && snapnamebuf != NULL)
307 			(void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen);
308 		return (error);
309 	}
310 	return (0);
311 }
312 
313 int
314 lzc_rename(const char *source, const char *target)
315 {
316 	zfs_cmd_t zc = {"\0"};
317 	int error;
318 
319 	ASSERT3S(g_refcount, >, 0);
320 	VERIFY3S(g_fd, !=, -1);
321 	(void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name));
322 	(void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
323 	error = lzc_ioctl_fd(g_fd, ZFS_IOC_RENAME, &zc);
324 	if (error != 0)
325 		error = errno;
326 	return (error);
327 }
328 
329 int
330 lzc_destroy(const char *fsname)
331 {
332 	int error;
333 	nvlist_t *args = fnvlist_alloc();
334 	error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL);
335 	nvlist_free(args);
336 	return (error);
337 }
338 
339 /*
340  * Creates snapshots.
341  *
342  * The keys in the snaps nvlist are the snapshots to be created.
343  * They must all be in the same pool.
344  *
345  * The props nvlist is properties to set.  Currently only user properties
346  * are supported.  { user:prop_name -> string value }
347  *
348  * The returned results nvlist will have an entry for each snapshot that failed.
349  * The value will be the (int32) error code.
350  *
351  * The return value will be 0 if all snapshots were created, otherwise it will
352  * be the errno of a (unspecified) snapshot that failed.
353  */
354 int
355 lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
356 {
357 	nvpair_t *elem;
358 	nvlist_t *args;
359 	int error;
360 	char pool[ZFS_MAX_DATASET_NAME_LEN];
361 
362 	*errlist = NULL;
363 
364 	/* determine the pool name */
365 	elem = nvlist_next_nvpair(snaps, NULL);
366 	if (elem == NULL)
367 		return (0);
368 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
369 	pool[strcspn(pool, "/@")] = '\0';
370 
371 	args = fnvlist_alloc();
372 	fnvlist_add_nvlist(args, "snaps", snaps);
373 	if (props != NULL)
374 		fnvlist_add_nvlist(args, "props", props);
375 
376 	error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
377 	nvlist_free(args);
378 
379 	return (error);
380 }
381 
382 /*
383  * Destroys snapshots.
384  *
385  * The keys in the snaps nvlist are the snapshots to be destroyed.
386  * They must all be in the same pool.
387  *
388  * Snapshots that do not exist will be silently ignored.
389  *
390  * If 'defer' is not set, and a snapshot has user holds or clones, the
391  * destroy operation will fail and none of the snapshots will be
392  * destroyed.
393  *
394  * If 'defer' is set, and a snapshot has user holds or clones, it will be
395  * marked for deferred destruction, and will be destroyed when the last hold
396  * or clone is removed/destroyed.
397  *
398  * The return value will be 0 if all snapshots were destroyed (or marked for
399  * later destruction if 'defer' is set) or didn't exist to begin with.
400  *
401  * Otherwise the return value will be the errno of a (unspecified) snapshot
402  * that failed, no snapshots will be destroyed, and the errlist will have an
403  * entry for each snapshot that failed.  The value in the errlist will be
404  * the (int32) error code.
405  */
406 int
407 lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
408 {
409 	nvpair_t *elem;
410 	nvlist_t *args;
411 	int error;
412 	char pool[ZFS_MAX_DATASET_NAME_LEN];
413 
414 	/* determine the pool name */
415 	elem = nvlist_next_nvpair(snaps, NULL);
416 	if (elem == NULL)
417 		return (0);
418 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
419 	pool[strcspn(pool, "/@")] = '\0';
420 
421 	args = fnvlist_alloc();
422 	fnvlist_add_nvlist(args, "snaps", snaps);
423 	if (defer)
424 		fnvlist_add_boolean(args, "defer");
425 
426 	error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
427 	nvlist_free(args);
428 
429 	return (error);
430 }
431 
432 int
433 lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
434     uint64_t *usedp)
435 {
436 	nvlist_t *args;
437 	nvlist_t *result;
438 	int err;
439 	char fs[ZFS_MAX_DATASET_NAME_LEN];
440 	char *atp;
441 
442 	/* determine the fs name */
443 	(void) strlcpy(fs, firstsnap, sizeof (fs));
444 	atp = strchr(fs, '@');
445 	if (atp == NULL)
446 		return (EINVAL);
447 	*atp = '\0';
448 
449 	args = fnvlist_alloc();
450 	fnvlist_add_string(args, "firstsnap", firstsnap);
451 
452 	err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
453 	nvlist_free(args);
454 	if (err == 0)
455 		*usedp = fnvlist_lookup_uint64(result, "used");
456 	fnvlist_free(result);
457 
458 	return (err);
459 }
460 
461 boolean_t
462 lzc_exists(const char *dataset)
463 {
464 	/*
465 	 * The objset_stats ioctl is still legacy, so we need to construct our
466 	 * own zfs_cmd_t rather than using lzc_ioctl().
467 	 */
468 	zfs_cmd_t zc = {"\0"};
469 
470 	ASSERT3S(g_refcount, >, 0);
471 	VERIFY3S(g_fd, !=, -1);
472 
473 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
474 	return (lzc_ioctl_fd(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
475 }
476 
477 /*
478  * outnvl is unused.
479  * It was added to preserve the function signature in case it is
480  * needed in the future.
481  */
482 int
483 lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl)
484 {
485 	(void) outnvl;
486 	return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL));
487 }
488 
489 /*
490  * Create "user holds" on snapshots.  If there is a hold on a snapshot,
491  * the snapshot can not be destroyed.  (However, it can be marked for deletion
492  * by lzc_destroy_snaps(defer=B_TRUE).)
493  *
494  * The keys in the nvlist are snapshot names.
495  * The snapshots must all be in the same pool.
496  * The value is the name of the hold (string type).
497  *
498  * If cleanup_fd is not -1, it must be the result of open(ZFS_DEV, O_EXCL).
499  * In this case, when the cleanup_fd is closed (including on process
500  * termination), the holds will be released.  If the system is shut down
501  * uncleanly, the holds will be released when the pool is next opened
502  * or imported.
503  *
504  * Holds for snapshots which don't exist will be skipped and have an entry
505  * added to errlist, but will not cause an overall failure.
506  *
507  * The return value will be 0 if all holds, for snapshots that existed,
508  * were successfully created.
509  *
510  * Otherwise the return value will be the errno of a (unspecified) hold that
511  * failed and no holds will be created.
512  *
513  * In all cases the errlist will have an entry for each hold that failed
514  * (name = snapshot), with its value being the error code (int32).
515  */
516 int
517 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
518 {
519 	char pool[ZFS_MAX_DATASET_NAME_LEN];
520 	nvlist_t *args;
521 	nvpair_t *elem;
522 	int error;
523 
524 	/* determine the pool name */
525 	elem = nvlist_next_nvpair(holds, NULL);
526 	if (elem == NULL)
527 		return (0);
528 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
529 	pool[strcspn(pool, "/@")] = '\0';
530 
531 	args = fnvlist_alloc();
532 	fnvlist_add_nvlist(args, "holds", holds);
533 	if (cleanup_fd != -1)
534 		fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
535 
536 	error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
537 	nvlist_free(args);
538 	return (error);
539 }
540 
541 /*
542  * Release "user holds" on snapshots.  If the snapshot has been marked for
543  * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
544  * any clones, and all the user holds are removed, then the snapshot will be
545  * destroyed.
546  *
547  * The keys in the nvlist are snapshot names.
548  * The snapshots must all be in the same pool.
549  * The value is an nvlist whose keys are the holds to remove.
550  *
551  * Holds which failed to release because they didn't exist will have an entry
552  * added to errlist, but will not cause an overall failure.
553  *
554  * The return value will be 0 if the nvl holds was empty or all holds that
555  * existed, were successfully removed.
556  *
557  * Otherwise the return value will be the errno of a (unspecified) hold that
558  * failed to release and no holds will be released.
559  *
560  * In all cases the errlist will have an entry for each hold that failed to
561  * to release.
562  */
563 int
564 lzc_release(nvlist_t *holds, nvlist_t **errlist)
565 {
566 	char pool[ZFS_MAX_DATASET_NAME_LEN];
567 	nvpair_t *elem;
568 
569 	/* determine the pool name */
570 	elem = nvlist_next_nvpair(holds, NULL);
571 	if (elem == NULL)
572 		return (0);
573 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
574 	pool[strcspn(pool, "/@")] = '\0';
575 
576 	return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
577 }
578 
579 /*
580  * Retrieve list of user holds on the specified snapshot.
581  *
582  * On success, *holdsp will be set to an nvlist which the caller must free.
583  * The keys are the names of the holds, and the value is the creation time
584  * of the hold (uint64) in seconds since the epoch.
585  */
586 int
587 lzc_get_holds(const char *snapname, nvlist_t **holdsp)
588 {
589 	return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp));
590 }
591 
592 static unsigned int
593 max_pipe_buffer(int infd)
594 {
595 #if __linux__
596 	static unsigned int max;
597 	if (max == 0) {
598 		max = 1048576; /* fs/pipe.c default */
599 
600 		FILE *procf = fopen("/proc/sys/fs/pipe-max-size", "re");
601 		if (procf != NULL) {
602 			if (fscanf(procf, "%u", &max) <= 0) {
603 				/* ignore error: max untouched if parse fails */
604 			}
605 			fclose(procf);
606 		}
607 	}
608 
609 	unsigned int cur = fcntl(infd, F_GETPIPE_SZ);
610 	/*
611 	 * Sadly, Linux has an unfixed deadlock if you do SETPIPE_SZ on a pipe
612 	 * with data in it.
613 	 * cf. #13232, https://bugzilla.kernel.org/show_bug.cgi?id=212295
614 	 *
615 	 * And since the problem is in waking up the writer, there's nothing
616 	 * we can do about it from here.
617 	 *
618 	 * So if people want to, they can set this, but they
619 	 * may regret it...
620 	 */
621 	if (getenv("ZFS_SET_PIPE_MAX") == NULL)
622 		return (cur);
623 	if (cur < max && fcntl(infd, F_SETPIPE_SZ, max) != -1)
624 		cur = max;
625 	return (cur);
626 #else
627 	/* FreeBSD automatically resizes */
628 	(void) infd;
629 	return (BIG_PIPE_SIZE);
630 #endif
631 }
632 
633 #if __linux__
634 struct send_worker_ctx {
635 	int from;	/* read end of pipe, with send data; closed on exit */
636 	int to;		/* original arbitrary output fd; mustn't be a pipe */
637 };
638 
639 static void *
640 send_worker(void *arg)
641 {
642 	struct send_worker_ctx *ctx = arg;
643 	unsigned int bufsiz = max_pipe_buffer(ctx->from);
644 	ssize_t rd;
645 
646 	while ((rd = splice(ctx->from, NULL, ctx->to, NULL, bufsiz,
647 	    SPLICE_F_MOVE | SPLICE_F_MORE)) > 0)
648 		;
649 
650 	int err = (rd == -1) ? errno : 0;
651 	close(ctx->from);
652 	return ((void *)(uintptr_t)err);
653 }
654 #endif
655 
656 /*
657  * Since Linux 5.10, 4d03e3cc59828c82ee89ea6e27a2f3cdf95aaadf
658  * ("fs: don't allow kernel reads and writes without iter ops"),
659  * ZFS_IOC_SEND* will EINVAL when writing to /dev/null, /dev/zero, &c.
660  *
661  * This wrapper transparently executes func() with a pipe
662  * by spawning a thread to copy from that pipe to the original output
663  * in the background.
664  *
665  * Returns the error from func(), if nonzero,
666  * otherwise the error from the thread.
667  *
668  * No-op if orig_fd is -1, already a pipe (but the buffer size is bumped),
669  * and on not-Linux; as such, it is safe to wrap/call wrapped functions
670  * in a wrapped context.
671  */
672 int
673 lzc_send_wrapper(int (*func)(int, void *), int orig_fd, void *data)
674 {
675 #if __linux__
676 	struct stat sb;
677 	if (orig_fd != -1 && fstat(orig_fd, &sb) == -1)
678 		return (errno);
679 	if (orig_fd == -1 || S_ISFIFO(sb.st_mode)) {
680 		if (orig_fd != -1)
681 			(void) max_pipe_buffer(orig_fd);
682 		return (func(orig_fd, data));
683 	}
684 	if ((fcntl(orig_fd, F_GETFL) & O_ACCMODE) == O_RDONLY)
685 		return (errno = EBADF);
686 
687 	int rw[2];
688 	if (pipe2(rw, O_CLOEXEC) == -1)
689 		return (errno);
690 
691 	int err;
692 	pthread_t send_thread;
693 	struct send_worker_ctx ctx = {.from = rw[0], .to = orig_fd};
694 	if ((err = pthread_create(&send_thread, NULL, send_worker, &ctx))
695 	    != 0) {
696 		close(rw[0]);
697 		close(rw[1]);
698 		return (errno = err);
699 	}
700 
701 	err = func(rw[1], data);
702 
703 	void *send_err;
704 	close(rw[1]);
705 	pthread_join(send_thread, &send_err);
706 	if (err == 0 && send_err != 0)
707 		errno = err = (uintptr_t)send_err;
708 
709 	return (err);
710 #else
711 	return (func(orig_fd, data));
712 #endif
713 }
714 
715 /*
716  * Generate a zfs send stream for the specified snapshot and write it to
717  * the specified file descriptor.
718  *
719  * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
720  *
721  * If "from" is NULL, a full (non-incremental) stream will be sent.
722  * If "from" is non-NULL, it must be the full name of a snapshot or
723  * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
724  * "pool/fs#earlier_bmark").  If non-NULL, the specified snapshot or
725  * bookmark must represent an earlier point in the history of "snapname").
726  * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
727  * or it can be the origin of "snapname"'s filesystem, or an earlier
728  * snapshot in the origin, etc.
729  *
730  * "fd" is the file descriptor to write the send stream to.
731  *
732  * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
733  * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
734  * records with drr_blksz > 128K.
735  *
736  * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
737  * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
738  * which the receiving system must support (as indicated by support
739  * for the "embedded_data" feature).
740  *
741  * If "flags" contains LZC_SEND_FLAG_COMPRESS, the stream is generated by using
742  * compressed WRITE records for blocks which are compressed on disk and in
743  * memory.  If the lz4_compress feature is active on the sending system, then
744  * the receiving system must have that feature enabled as well.
745  *
746  * If "flags" contains LZC_SEND_FLAG_RAW, the stream is generated, for encrypted
747  * datasets, by sending data exactly as it exists on disk.  This allows backups
748  * to be taken even if encryption keys are not currently loaded.
749  */
750 int
751 lzc_send(const char *snapname, const char *from, int fd,
752     enum lzc_send_flags flags)
753 {
754 	return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
755 	    NULL));
756 }
757 
758 int
759 lzc_send_redacted(const char *snapname, const char *from, int fd,
760     enum lzc_send_flags flags, const char *redactbook)
761 {
762 	return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
763 	    redactbook));
764 }
765 
766 int
767 lzc_send_resume(const char *snapname, const char *from, int fd,
768     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
769 {
770 	return (lzc_send_resume_redacted(snapname, from, fd, flags, resumeobj,
771 	    resumeoff, NULL));
772 }
773 
774 /*
775  * snapname: The name of the "tosnap", or the snapshot whose contents we are
776  * sending.
777  * from: The name of the "fromsnap", or the incremental source.
778  * fd: File descriptor to write the stream to.
779  * flags: flags that determine features to be used by the stream.
780  * resumeobj: Object to resume from, for resuming send
781  * resumeoff: Offset to resume from, for resuming send.
782  * redactnv: nvlist of string -> boolean(ignored) containing the names of all
783  * the snapshots that we should redact with respect to.
784  * redactbook: Name of the redaction bookmark to create.
785  *
786  * Pre-wrapped.
787  */
788 static int
789 lzc_send_resume_redacted_cb_impl(const char *snapname, const char *from, int fd,
790     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
791     const char *redactbook)
792 {
793 	nvlist_t *args;
794 	int err;
795 
796 	args = fnvlist_alloc();
797 	fnvlist_add_int32(args, "fd", fd);
798 	if (from != NULL)
799 		fnvlist_add_string(args, "fromsnap", from);
800 	if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
801 		fnvlist_add_boolean(args, "largeblockok");
802 	if (flags & LZC_SEND_FLAG_EMBED_DATA)
803 		fnvlist_add_boolean(args, "embedok");
804 	if (flags & LZC_SEND_FLAG_COMPRESS)
805 		fnvlist_add_boolean(args, "compressok");
806 	if (flags & LZC_SEND_FLAG_RAW)
807 		fnvlist_add_boolean(args, "rawok");
808 	if (flags & LZC_SEND_FLAG_SAVED)
809 		fnvlist_add_boolean(args, "savedok");
810 	if (resumeobj != 0 || resumeoff != 0) {
811 		fnvlist_add_uint64(args, "resume_object", resumeobj);
812 		fnvlist_add_uint64(args, "resume_offset", resumeoff);
813 	}
814 	if (redactbook != NULL)
815 		fnvlist_add_string(args, "redactbook", redactbook);
816 
817 	err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
818 	nvlist_free(args);
819 	return (err);
820 }
821 
822 struct lzc_send_resume_redacted {
823 	const char *snapname;
824 	const char *from;
825 	enum lzc_send_flags flags;
826 	uint64_t resumeobj;
827 	uint64_t resumeoff;
828 	const char *redactbook;
829 };
830 
831 static int
832 lzc_send_resume_redacted_cb(int fd, void *arg)
833 {
834 	struct lzc_send_resume_redacted *zsrr = arg;
835 	return (lzc_send_resume_redacted_cb_impl(zsrr->snapname, zsrr->from,
836 	    fd, zsrr->flags, zsrr->resumeobj, zsrr->resumeoff,
837 	    zsrr->redactbook));
838 }
839 
840 int
841 lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
842     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
843     const char *redactbook)
844 {
845 	struct lzc_send_resume_redacted zsrr = {
846 		.snapname = snapname,
847 		.from = from,
848 		.flags = flags,
849 		.resumeobj = resumeobj,
850 		.resumeoff = resumeoff,
851 		.redactbook = redactbook,
852 	};
853 	return (lzc_send_wrapper(lzc_send_resume_redacted_cb, fd, &zsrr));
854 }
855 
856 /*
857  * "from" can be NULL, a snapshot, or a bookmark.
858  *
859  * If from is NULL, a full (non-incremental) stream will be estimated.  This
860  * is calculated very efficiently.
861  *
862  * If from is a snapshot, lzc_send_space uses the deadlists attached to
863  * each snapshot to efficiently estimate the stream size.
864  *
865  * If from is a bookmark, the indirect blocks in the destination snapshot
866  * are traversed, looking for blocks with a birth time since the creation TXG of
867  * the snapshot this bookmark was created from.  This will result in
868  * significantly more I/O and be less efficient than a send space estimation on
869  * an equivalent snapshot. This process is also used if redact_snaps is
870  * non-null.
871  *
872  * Pre-wrapped.
873  */
874 static int
875 lzc_send_space_resume_redacted_cb_impl(const char *snapname, const char *from,
876     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
877     uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
878 {
879 	nvlist_t *args;
880 	nvlist_t *result;
881 	int err;
882 
883 	args = fnvlist_alloc();
884 	if (from != NULL)
885 		fnvlist_add_string(args, "from", from);
886 	if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
887 		fnvlist_add_boolean(args, "largeblockok");
888 	if (flags & LZC_SEND_FLAG_EMBED_DATA)
889 		fnvlist_add_boolean(args, "embedok");
890 	if (flags & LZC_SEND_FLAG_COMPRESS)
891 		fnvlist_add_boolean(args, "compressok");
892 	if (flags & LZC_SEND_FLAG_RAW)
893 		fnvlist_add_boolean(args, "rawok");
894 	if (resumeobj != 0 || resumeoff != 0) {
895 		fnvlist_add_uint64(args, "resume_object", resumeobj);
896 		fnvlist_add_uint64(args, "resume_offset", resumeoff);
897 		fnvlist_add_uint64(args, "bytes", resume_bytes);
898 	}
899 	if (redactbook != NULL)
900 		fnvlist_add_string(args, "redactbook", redactbook);
901 	if (fd != -1)
902 		fnvlist_add_int32(args, "fd", fd);
903 
904 	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
905 	nvlist_free(args);
906 	if (err == 0)
907 		*spacep = fnvlist_lookup_uint64(result, "space");
908 	nvlist_free(result);
909 	return (err);
910 }
911 
912 struct lzc_send_space_resume_redacted {
913 	const char *snapname;
914 	const char *from;
915 	enum lzc_send_flags flags;
916 	uint64_t resumeobj;
917 	uint64_t resumeoff;
918 	uint64_t resume_bytes;
919 	const char *redactbook;
920 	uint64_t *spacep;
921 };
922 
923 static int
924 lzc_send_space_resume_redacted_cb(int fd, void *arg)
925 {
926 	struct lzc_send_space_resume_redacted *zssrr = arg;
927 	return (lzc_send_space_resume_redacted_cb_impl(zssrr->snapname,
928 	    zssrr->from, zssrr->flags, zssrr->resumeobj, zssrr->resumeoff,
929 	    zssrr->resume_bytes, zssrr->redactbook, fd, zssrr->spacep));
930 }
931 
932 int
933 lzc_send_space_resume_redacted(const char *snapname, const char *from,
934     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
935     uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
936 {
937 	struct lzc_send_space_resume_redacted zssrr = {
938 		.snapname = snapname,
939 		.from = from,
940 		.flags = flags,
941 		.resumeobj = resumeobj,
942 		.resumeoff = resumeoff,
943 		.resume_bytes = resume_bytes,
944 		.redactbook = redactbook,
945 		.spacep = spacep,
946 	};
947 	return (lzc_send_wrapper(lzc_send_space_resume_redacted_cb,
948 	    fd, &zssrr));
949 }
950 
951 int
952 lzc_send_space(const char *snapname, const char *from,
953     enum lzc_send_flags flags, uint64_t *spacep)
954 {
955 	return (lzc_send_space_resume_redacted(snapname, from, flags, 0, 0, 0,
956 	    NULL, -1, spacep));
957 }
958 
959 static int
960 recv_read(int fd, void *buf, int ilen)
961 {
962 	char *cp = buf;
963 	int rv;
964 	int len = ilen;
965 
966 	do {
967 		rv = read(fd, cp, len);
968 		cp += rv;
969 		len -= rv;
970 	} while (rv > 0);
971 
972 	if (rv < 0 || len != 0)
973 		return (EIO);
974 
975 	return (0);
976 }
977 
978 /*
979  * Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the
980  * legacy ZFS_IOC_RECV user/kernel interface.  The new interface supports all
981  * stream options but is currently only used for resumable streams.  This way
982  * updated user space utilities will interoperate with older kernel modules.
983  *
984  * Non-Linux OpenZFS platforms have opted to modify the legacy interface.
985  */
986 static int
987 recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
988     uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force,
989     boolean_t heal, boolean_t resumable, boolean_t raw, int input_fd,
990     const dmu_replay_record_t *begin_record, uint64_t *read_bytes,
991     uint64_t *errflags, nvlist_t **errors)
992 {
993 	dmu_replay_record_t drr;
994 	char fsname[MAXPATHLEN];
995 	char *atp;
996 	int error;
997 	boolean_t payload = B_FALSE;
998 
999 	ASSERT3S(g_refcount, >, 0);
1000 	VERIFY3S(g_fd, !=, -1);
1001 
1002 	/* Set 'fsname' to the name of containing filesystem */
1003 	(void) strlcpy(fsname, snapname, sizeof (fsname));
1004 	atp = strchr(fsname, '@');
1005 	if (atp == NULL)
1006 		return (EINVAL);
1007 	*atp = '\0';
1008 
1009 	/* If the fs does not exist, try its parent. */
1010 	if (!lzc_exists(fsname)) {
1011 		char *slashp = strrchr(fsname, '/');
1012 		if (slashp == NULL)
1013 			return (ENOENT);
1014 		*slashp = '\0';
1015 	}
1016 
1017 	/*
1018 	 * It is not uncommon for gigabytes to be processed by zfs receive.
1019 	 * Speculatively increase the buffer size if supported by the platform.
1020 	 */
1021 	struct stat sb;
1022 	if (fstat(input_fd, &sb) == -1)
1023 		return (errno);
1024 	if (S_ISFIFO(sb.st_mode))
1025 		(void) max_pipe_buffer(input_fd);
1026 
1027 	/*
1028 	 * The begin_record is normally a non-byteswapped BEGIN record.
1029 	 * For resumable streams it may be set to any non-byteswapped
1030 	 * dmu_replay_record_t.
1031 	 */
1032 	if (begin_record == NULL) {
1033 		error = recv_read(input_fd, &drr, sizeof (drr));
1034 		if (error != 0)
1035 			return (error);
1036 	} else {
1037 		drr = *begin_record;
1038 		payload = (begin_record->drr_payloadlen != 0);
1039 	}
1040 
1041 	/*
1042 	 * All receives with a payload should use the new interface.
1043 	 */
1044 	if (resumable || heal || raw || wkeydata != NULL || payload) {
1045 		nvlist_t *outnvl = NULL;
1046 		nvlist_t *innvl = fnvlist_alloc();
1047 
1048 		fnvlist_add_string(innvl, "snapname", snapname);
1049 
1050 		if (recvdprops != NULL)
1051 			fnvlist_add_nvlist(innvl, "props", recvdprops);
1052 
1053 		if (localprops != NULL)
1054 			fnvlist_add_nvlist(innvl, "localprops", localprops);
1055 
1056 		if (wkeydata != NULL) {
1057 			/*
1058 			 * wkeydata must be placed in the special
1059 			 * ZPOOL_HIDDEN_ARGS nvlist so that it
1060 			 * will not be printed to the zpool history.
1061 			 */
1062 			nvlist_t *hidden_args = fnvlist_alloc();
1063 			fnvlist_add_uint8_array(hidden_args, "wkeydata",
1064 			    wkeydata, wkeylen);
1065 			fnvlist_add_nvlist(innvl, ZPOOL_HIDDEN_ARGS,
1066 			    hidden_args);
1067 			nvlist_free(hidden_args);
1068 		}
1069 
1070 		if (origin != NULL && strlen(origin))
1071 			fnvlist_add_string(innvl, "origin", origin);
1072 
1073 		fnvlist_add_byte_array(innvl, "begin_record",
1074 		    (uchar_t *)&drr, sizeof (drr));
1075 
1076 		fnvlist_add_int32(innvl, "input_fd", input_fd);
1077 
1078 		if (force)
1079 			fnvlist_add_boolean(innvl, "force");
1080 
1081 		if (resumable)
1082 			fnvlist_add_boolean(innvl, "resumable");
1083 
1084 		if (heal)
1085 			fnvlist_add_boolean(innvl, "heal");
1086 
1087 		error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl);
1088 
1089 		if (error == 0 && read_bytes != NULL)
1090 			error = nvlist_lookup_uint64(outnvl, "read_bytes",
1091 			    read_bytes);
1092 
1093 		if (error == 0 && errflags != NULL)
1094 			error = nvlist_lookup_uint64(outnvl, "error_flags",
1095 			    errflags);
1096 
1097 		if (error == 0 && errors != NULL) {
1098 			nvlist_t *nvl;
1099 			error = nvlist_lookup_nvlist(outnvl, "errors", &nvl);
1100 			if (error == 0)
1101 				*errors = fnvlist_dup(nvl);
1102 		}
1103 
1104 		fnvlist_free(innvl);
1105 		fnvlist_free(outnvl);
1106 	} else {
1107 		zfs_cmd_t zc = {"\0"};
1108 		char *packed = NULL;
1109 		size_t size;
1110 
1111 		ASSERT3S(g_refcount, >, 0);
1112 
1113 		(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
1114 		(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
1115 
1116 		if (recvdprops != NULL) {
1117 			packed = fnvlist_pack(recvdprops, &size);
1118 			zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
1119 			zc.zc_nvlist_src_size = size;
1120 		}
1121 
1122 		if (localprops != NULL) {
1123 			packed = fnvlist_pack(localprops, &size);
1124 			zc.zc_nvlist_conf = (uint64_t)(uintptr_t)packed;
1125 			zc.zc_nvlist_conf_size = size;
1126 		}
1127 
1128 		if (origin != NULL)
1129 			(void) strlcpy(zc.zc_string, origin,
1130 			    sizeof (zc.zc_string));
1131 
1132 		ASSERT3S(drr.drr_type, ==, DRR_BEGIN);
1133 		zc.zc_begin_record = drr.drr_u.drr_begin;
1134 		zc.zc_guid = force;
1135 		zc.zc_cookie = input_fd;
1136 		zc.zc_cleanup_fd = -1;
1137 		zc.zc_action_handle = 0;
1138 
1139 		zc.zc_nvlist_dst_size = 128 * 1024;
1140 		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
1141 		    malloc(zc.zc_nvlist_dst_size);
1142 
1143 		error = lzc_ioctl_fd(g_fd, ZFS_IOC_RECV, &zc);
1144 		if (error != 0) {
1145 			error = errno;
1146 		} else {
1147 			if (read_bytes != NULL)
1148 				*read_bytes = zc.zc_cookie;
1149 
1150 			if (errflags != NULL)
1151 				*errflags = zc.zc_obj;
1152 
1153 			if (errors != NULL)
1154 				VERIFY0(nvlist_unpack(
1155 				    (void *)(uintptr_t)zc.zc_nvlist_dst,
1156 				    zc.zc_nvlist_dst_size, errors, KM_SLEEP));
1157 		}
1158 
1159 		if (packed != NULL)
1160 			fnvlist_pack_free(packed, size);
1161 		free((void *)(uintptr_t)zc.zc_nvlist_dst);
1162 	}
1163 
1164 	return (error);
1165 }
1166 
1167 /*
1168  * The simplest receive case: receive from the specified fd, creating the
1169  * specified snapshot.  Apply the specified properties as "received" properties
1170  * (which can be overridden by locally-set properties).  If the stream is a
1171  * clone, its origin snapshot must be specified by 'origin'.  The 'force'
1172  * flag will cause the target filesystem to be rolled back or destroyed if
1173  * necessary to receive.
1174  *
1175  * Return 0 on success or an errno on failure.
1176  *
1177  * Note: this interface does not work on dedup'd streams
1178  * (those with DMU_BACKUP_FEATURE_DEDUP).
1179  */
1180 int
1181 lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
1182     boolean_t force, boolean_t raw, int fd)
1183 {
1184 	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1185 	    B_FALSE, B_FALSE, raw, fd, NULL, NULL, NULL, NULL));
1186 }
1187 
1188 /*
1189  * Like lzc_receive, but if the receive fails due to premature stream
1190  * termination, the intermediate state will be preserved on disk.  In this
1191  * case, ECKSUM will be returned.  The receive may subsequently be resumed
1192  * with a resuming send stream generated by lzc_send_resume().
1193  */
1194 int
1195 lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
1196     boolean_t force, boolean_t raw, int fd)
1197 {
1198 	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1199 	    B_FALSE, B_TRUE, raw, fd, NULL, NULL, NULL, NULL));
1200 }
1201 
1202 /*
1203  * Like lzc_receive, but allows the caller to read the begin record and then to
1204  * pass it in.  That could be useful if the caller wants to derive, for example,
1205  * the snapname or the origin parameters based on the information contained in
1206  * the begin record.
1207  * The begin record must be in its original form as read from the stream,
1208  * in other words, it should not be byteswapped.
1209  *
1210  * The 'resumable' parameter allows to obtain the same behavior as with
1211  * lzc_receive_resumable.
1212  */
1213 int
1214 lzc_receive_with_header(const char *snapname, nvlist_t *props,
1215     const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
1216     int fd, const dmu_replay_record_t *begin_record)
1217 {
1218 	if (begin_record == NULL)
1219 		return (EINVAL);
1220 
1221 	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1222 	    B_FALSE, resumable, raw, fd, begin_record, NULL, NULL, NULL));
1223 }
1224 
1225 /*
1226  * Like lzc_receive, but allows the caller to pass all supported arguments
1227  * and retrieve all values returned.  The only additional input parameter
1228  * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor.
1229  *
1230  * The following parameters all provide return values.  Several may be set
1231  * in the failure case and will contain additional information.
1232  *
1233  * The 'read_bytes' value will be set to the total number of bytes read.
1234  *
1235  * The 'errflags' value will contain zprop_errflags_t flags which are
1236  * used to describe any failures.
1237  *
1238  * The 'action_handle' and 'cleanup_fd' are no longer used, and are ignored.
1239  *
1240  * The 'errors' nvlist contains an entry for each unapplied received
1241  * property.  Callers are responsible for freeing this nvlist.
1242  */
1243 int
1244 lzc_receive_one(const char *snapname, nvlist_t *props,
1245     const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
1246     int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
1247     uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
1248     nvlist_t **errors)
1249 {
1250 	(void) action_handle, (void) cleanup_fd;
1251 	return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1252 	    B_FALSE, resumable, raw, input_fd, begin_record,
1253 	    read_bytes, errflags, errors));
1254 }
1255 
1256 /*
1257  * Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops'
1258  * argument.
1259  *
1260  * The 'cmdprops' nvlist contains both override ('zfs receive -o') and
1261  * exclude ('zfs receive -x') properties. Callers are responsible for freeing
1262  * this nvlist
1263  */
1264 int
1265 lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props,
1266     nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
1267     boolean_t force, boolean_t resumable, boolean_t raw, int input_fd,
1268     const dmu_replay_record_t *begin_record, int cleanup_fd,
1269     uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
1270     nvlist_t **errors)
1271 {
1272 	(void) action_handle, (void) cleanup_fd;
1273 	return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
1274 	    force, B_FALSE, resumable, raw, input_fd, begin_record,
1275 	    read_bytes, errflags, errors));
1276 }
1277 
1278 /*
1279  * Like lzc_receive_with_cmdprops, but allows the caller to pass an additional
1280  * 'heal' argument.
1281  *
1282  * The heal arguments tells us to heal the provided snapshot using the provided
1283  * send stream
1284  */
1285 int lzc_receive_with_heal(const char *snapname, nvlist_t *props,
1286     nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
1287     boolean_t force, boolean_t heal, boolean_t resumable, boolean_t raw,
1288     int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
1289     uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
1290     nvlist_t **errors)
1291 {
1292 	(void) action_handle, (void) cleanup_fd;
1293 	return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
1294 	    force, heal, resumable, raw, input_fd, begin_record,
1295 	    read_bytes, errflags, errors));
1296 }
1297 
1298 /*
1299  * Roll back this filesystem or volume to its most recent snapshot.
1300  * If snapnamebuf is not NULL, it will be filled in with the name
1301  * of the most recent snapshot.
1302  * Note that the latest snapshot may change if a new one is concurrently
1303  * created or the current one is destroyed.  lzc_rollback_to can be used
1304  * to roll back to a specific latest snapshot.
1305  *
1306  * Return 0 on success or an errno on failure.
1307  */
1308 int
1309 lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
1310 {
1311 	nvlist_t *args;
1312 	nvlist_t *result;
1313 	int err;
1314 
1315 	args = fnvlist_alloc();
1316 	err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
1317 	nvlist_free(args);
1318 	if (err == 0 && snapnamebuf != NULL) {
1319 		const char *snapname = fnvlist_lookup_string(result, "target");
1320 		(void) strlcpy(snapnamebuf, snapname, snapnamelen);
1321 	}
1322 	nvlist_free(result);
1323 
1324 	return (err);
1325 }
1326 
1327 /*
1328  * Roll back this filesystem or volume to the specified snapshot,
1329  * if possible.
1330  *
1331  * Return 0 on success or an errno on failure.
1332  */
1333 int
1334 lzc_rollback_to(const char *fsname, const char *snapname)
1335 {
1336 	nvlist_t *args;
1337 	nvlist_t *result;
1338 	int err;
1339 
1340 	args = fnvlist_alloc();
1341 	fnvlist_add_string(args, "target", snapname);
1342 	err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
1343 	nvlist_free(args);
1344 	nvlist_free(result);
1345 	return (err);
1346 }
1347 
1348 /*
1349  * Creates new bookmarks from existing snapshot or bookmark.
1350  *
1351  * The bookmarks nvlist maps from the full name of the new bookmark to
1352  * the full name of the source snapshot or bookmark.
1353  * All the bookmarks and snapshots must be in the same pool.
1354  * The new bookmarks names must be unique.
1355  * => see function dsl_bookmark_create_nvl_validate
1356  *
1357  * The returned results nvlist will have an entry for each bookmark that failed.
1358  * The value will be the (int32) error code.
1359  *
1360  * The return value will be 0 if all bookmarks were created, otherwise it will
1361  * be the errno of a (undetermined) bookmarks that failed.
1362  */
1363 int
1364 lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
1365 {
1366 	nvpair_t *elem;
1367 	int error;
1368 	char pool[ZFS_MAX_DATASET_NAME_LEN];
1369 
1370 	/* determine pool name from first bookmark */
1371 	elem = nvlist_next_nvpair(bookmarks, NULL);
1372 	if (elem == NULL)
1373 		return (0);
1374 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
1375 	pool[strcspn(pool, "/#")] = '\0';
1376 
1377 	error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
1378 
1379 	return (error);
1380 }
1381 
1382 /*
1383  * Retrieve bookmarks.
1384  *
1385  * Retrieve the list of bookmarks for the given file system. The props
1386  * parameter is an nvlist of property names (with no values) that will be
1387  * returned for each bookmark.
1388  *
1389  * The following are valid properties on bookmarks, most of which are numbers
1390  * (represented as uint64 in the nvlist), except redact_snaps, which is a
1391  * uint64 array, and redact_complete, which is a boolean
1392  *
1393  * "guid" - globally unique identifier of the snapshot it refers to
1394  * "createtxg" - txg when the snapshot it refers to was created
1395  * "creation" - timestamp when the snapshot it refers to was created
1396  * "ivsetguid" - IVset guid for identifying encrypted snapshots
1397  * "redact_snaps" - list of guids of the redaction snapshots for the specified
1398  *     bookmark.  If the bookmark is not a redaction bookmark, the nvlist will
1399  *     not contain an entry for this value.  If it is redacted with respect to
1400  *     no snapshots, it will contain value -> NULL uint64 array
1401  * "redact_complete" - boolean value; true if the redaction bookmark is
1402  *     complete, false otherwise.
1403  *
1404  * The format of the returned nvlist as follows:
1405  * <short name of bookmark> -> {
1406  *     <name of property> -> {
1407  *         "value" -> uint64
1408  *     }
1409  *     ...
1410  *     "redact_snaps" -> {
1411  *         "value" -> uint64 array
1412  *     }
1413  *     "redact_complete" -> {
1414  *         "value" -> boolean value
1415  *     }
1416  *  }
1417  */
1418 int
1419 lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
1420 {
1421 	return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
1422 }
1423 
1424 /*
1425  * Get bookmark properties.
1426  *
1427  * Given a bookmark's full name, retrieve all properties for the bookmark.
1428  *
1429  * The format of the returned property list is as follows:
1430  * {
1431  *     <name of property> -> {
1432  *         "value" -> uint64
1433  *     }
1434  *     ...
1435  *     "redact_snaps" -> {
1436  *         "value" -> uint64 array
1437  * }
1438  */
1439 int
1440 lzc_get_bookmark_props(const char *bookmark, nvlist_t **props)
1441 {
1442 	int error;
1443 
1444 	nvlist_t *innvl = fnvlist_alloc();
1445 	error = lzc_ioctl(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, innvl, props);
1446 	fnvlist_free(innvl);
1447 
1448 	return (error);
1449 }
1450 
1451 /*
1452  * Destroys bookmarks.
1453  *
1454  * The keys in the bmarks nvlist are the bookmarks to be destroyed.
1455  * They must all be in the same pool.  Bookmarks are specified as
1456  * <fs>#<bmark>.
1457  *
1458  * Bookmarks that do not exist will be silently ignored.
1459  *
1460  * The return value will be 0 if all bookmarks that existed were destroyed.
1461  *
1462  * Otherwise the return value will be the errno of a (undetermined) bookmark
1463  * that failed, no bookmarks will be destroyed, and the errlist will have an
1464  * entry for each bookmarks that failed.  The value in the errlist will be
1465  * the (int32) error code.
1466  */
1467 int
1468 lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
1469 {
1470 	nvpair_t *elem;
1471 	int error;
1472 	char pool[ZFS_MAX_DATASET_NAME_LEN];
1473 
1474 	/* determine the pool name */
1475 	elem = nvlist_next_nvpair(bmarks, NULL);
1476 	if (elem == NULL)
1477 		return (0);
1478 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
1479 	pool[strcspn(pool, "/#")] = '\0';
1480 
1481 	error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
1482 
1483 	return (error);
1484 }
1485 
1486 static int
1487 lzc_channel_program_impl(const char *pool, const char *program, boolean_t sync,
1488     uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1489 {
1490 	int error;
1491 	nvlist_t *args;
1492 
1493 	args = fnvlist_alloc();
1494 	fnvlist_add_string(args, ZCP_ARG_PROGRAM, program);
1495 	fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl);
1496 	fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync);
1497 	fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit);
1498 	fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit);
1499 	error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl);
1500 	fnvlist_free(args);
1501 
1502 	return (error);
1503 }
1504 
1505 /*
1506  * Executes a channel program.
1507  *
1508  * If this function returns 0 the channel program was successfully loaded and
1509  * ran without failing. Note that individual commands the channel program ran
1510  * may have failed and the channel program is responsible for reporting such
1511  * errors through outnvl if they are important.
1512  *
1513  * This method may also return:
1514  *
1515  * EINVAL   The program contains syntax errors, or an invalid memory or time
1516  *          limit was given. No part of the channel program was executed.
1517  *          If caused by syntax errors, 'outnvl' contains information about the
1518  *          errors.
1519  *
1520  * ECHRNG   The program was executed, but encountered a runtime error, such as
1521  *          calling a function with incorrect arguments, invoking the error()
1522  *          function directly, failing an assert() command, etc. Some portion
1523  *          of the channel program may have executed and committed changes.
1524  *          Information about the failure can be found in 'outnvl'.
1525  *
1526  * ENOMEM   The program fully executed, but the output buffer was not large
1527  *          enough to store the returned value. No output is returned through
1528  *          'outnvl'.
1529  *
1530  * ENOSPC   The program was terminated because it exceeded its memory usage
1531  *          limit. Some portion of the channel program may have executed and
1532  *          committed changes to disk. No output is returned through 'outnvl'.
1533  *
1534  * ETIME    The program was terminated because it exceeded its Lua instruction
1535  *          limit. Some portion of the channel program may have executed and
1536  *          committed changes to disk. No output is returned through 'outnvl'.
1537  */
1538 int
1539 lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit,
1540     uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1541 {
1542 	return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit,
1543 	    memlimit, argnvl, outnvl));
1544 }
1545 
1546 /*
1547  * Creates a checkpoint for the specified pool.
1548  *
1549  * If this function returns 0 the pool was successfully checkpointed.
1550  *
1551  * This method may also return:
1552  *
1553  * ZFS_ERR_CHECKPOINT_EXISTS
1554  *	The pool already has a checkpoint. A pools can only have one
1555  *	checkpoint at most, at any given time.
1556  *
1557  * ZFS_ERR_DISCARDING_CHECKPOINT
1558  * 	ZFS is in the middle of discarding a checkpoint for this pool.
1559  * 	The pool can be checkpointed again once the discard is done.
1560  *
1561  * ZFS_DEVRM_IN_PROGRESS
1562  * 	A vdev is currently being removed. The pool cannot be
1563  * 	checkpointed until the device removal is done.
1564  *
1565  * ZFS_VDEV_TOO_BIG
1566  * 	One or more top-level vdevs exceed the maximum vdev size
1567  * 	supported for this feature.
1568  */
1569 int
1570 lzc_pool_checkpoint(const char *pool)
1571 {
1572 	int error;
1573 
1574 	nvlist_t *result = NULL;
1575 	nvlist_t *args = fnvlist_alloc();
1576 
1577 	error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result);
1578 
1579 	fnvlist_free(args);
1580 	fnvlist_free(result);
1581 
1582 	return (error);
1583 }
1584 
1585 /*
1586  * Discard the checkpoint from the specified pool.
1587  *
1588  * If this function returns 0 the checkpoint was successfully discarded.
1589  *
1590  * This method may also return:
1591  *
1592  * ZFS_ERR_NO_CHECKPOINT
1593  * 	The pool does not have a checkpoint.
1594  *
1595  * ZFS_ERR_DISCARDING_CHECKPOINT
1596  * 	ZFS is already in the middle of discarding the checkpoint.
1597  */
1598 int
1599 lzc_pool_checkpoint_discard(const char *pool)
1600 {
1601 	int error;
1602 
1603 	nvlist_t *result = NULL;
1604 	nvlist_t *args = fnvlist_alloc();
1605 
1606 	error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result);
1607 
1608 	fnvlist_free(args);
1609 	fnvlist_free(result);
1610 
1611 	return (error);
1612 }
1613 
1614 /*
1615  * Executes a read-only channel program.
1616  *
1617  * A read-only channel program works programmatically the same way as a
1618  * normal channel program executed with lzc_channel_program(). The only
1619  * difference is it runs exclusively in open-context and therefore can
1620  * return faster. The downside to that, is that the program cannot change
1621  * on-disk state by calling functions from the zfs.sync submodule.
1622  *
1623  * The return values of this function (and their meaning) are exactly the
1624  * same as the ones described in lzc_channel_program().
1625  */
1626 int
1627 lzc_channel_program_nosync(const char *pool, const char *program,
1628     uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1629 {
1630 	return (lzc_channel_program_impl(pool, program, B_FALSE, timeout,
1631 	    memlimit, argnvl, outnvl));
1632 }
1633 
1634 int
1635 lzc_get_vdev_prop(const char *poolname, nvlist_t *innvl, nvlist_t **outnvl)
1636 {
1637 	return (lzc_ioctl(ZFS_IOC_VDEV_GET_PROPS, poolname, innvl, outnvl));
1638 }
1639 
1640 int
1641 lzc_set_vdev_prop(const char *poolname, nvlist_t *innvl, nvlist_t **outnvl)
1642 {
1643 	return (lzc_ioctl(ZFS_IOC_VDEV_SET_PROPS, poolname, innvl, outnvl));
1644 }
1645 
1646 /*
1647  * Performs key management functions
1648  *
1649  * crypto_cmd should be a value from dcp_cmd_t. If the command specifies to
1650  * load or change a wrapping key, the key should be specified in the
1651  * hidden_args nvlist so that it is not logged.
1652  */
1653 int
1654 lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata,
1655     uint_t wkeylen)
1656 {
1657 	int error;
1658 	nvlist_t *ioc_args;
1659 	nvlist_t *hidden_args;
1660 
1661 	if (wkeydata == NULL)
1662 		return (EINVAL);
1663 
1664 	ioc_args = fnvlist_alloc();
1665 	hidden_args = fnvlist_alloc();
1666 	fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen);
1667 	fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
1668 	if (noop)
1669 		fnvlist_add_boolean(ioc_args, "noop");
1670 	error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL);
1671 	nvlist_free(hidden_args);
1672 	nvlist_free(ioc_args);
1673 
1674 	return (error);
1675 }
1676 
1677 int
1678 lzc_unload_key(const char *fsname)
1679 {
1680 	return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL));
1681 }
1682 
1683 int
1684 lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props,
1685     uint8_t *wkeydata, uint_t wkeylen)
1686 {
1687 	int error;
1688 	nvlist_t *ioc_args = fnvlist_alloc();
1689 	nvlist_t *hidden_args = NULL;
1690 
1691 	fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd);
1692 
1693 	if (wkeydata != NULL) {
1694 		hidden_args = fnvlist_alloc();
1695 		fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
1696 		    wkeylen);
1697 		fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
1698 	}
1699 
1700 	if (props != NULL)
1701 		fnvlist_add_nvlist(ioc_args, "props", props);
1702 
1703 	error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL);
1704 	nvlist_free(hidden_args);
1705 	nvlist_free(ioc_args);
1706 
1707 	return (error);
1708 }
1709 
1710 int
1711 lzc_reopen(const char *pool_name, boolean_t scrub_restart)
1712 {
1713 	nvlist_t *args = fnvlist_alloc();
1714 	int error;
1715 
1716 	fnvlist_add_boolean_value(args, "scrub_restart", scrub_restart);
1717 
1718 	error = lzc_ioctl(ZFS_IOC_POOL_REOPEN, pool_name, args, NULL);
1719 	nvlist_free(args);
1720 	return (error);
1721 }
1722 
1723 /*
1724  * Changes initializing state.
1725  *
1726  * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1727  * The key is ignored.
1728  *
1729  * If there are errors related to vdev arguments, per-vdev errors are returned
1730  * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1731  * guid is stringified with PRIu64, and errno is one of the following as
1732  * an int64_t:
1733  *	- ENODEV if the device was not found
1734  *	- EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1735  *	- EROFS if the device is not writeable
1736  *	- EBUSY start requested but the device is already being either
1737  *	        initialized or trimmed
1738  *	- ESRCH cancel/suspend requested but device is not being initialized
1739  *
1740  * If the errlist is empty, then return value will be:
1741  *	- EINVAL if one or more arguments was invalid
1742  *	- Other spa_open failures
1743  *	- 0 if the operation succeeded
1744  */
1745 int
1746 lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type,
1747     nvlist_t *vdevs, nvlist_t **errlist)
1748 {
1749 	int error;
1750 
1751 	nvlist_t *args = fnvlist_alloc();
1752 	fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type);
1753 	fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs);
1754 
1755 	error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist);
1756 
1757 	fnvlist_free(args);
1758 
1759 	return (error);
1760 }
1761 
1762 /*
1763  * Changes TRIM state.
1764  *
1765  * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1766  * The key is ignored.
1767  *
1768  * If there are errors related to vdev arguments, per-vdev errors are returned
1769  * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1770  * guid is stringified with PRIu64, and errno is one of the following as
1771  * an int64_t:
1772  *	- ENODEV if the device was not found
1773  *	- EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1774  *	- EROFS if the device is not writeable
1775  *	- EBUSY start requested but the device is already being either trimmed
1776  *	        or initialized
1777  *	- ESRCH cancel/suspend requested but device is not being initialized
1778  *	- EOPNOTSUPP if the device does not support TRIM (or secure TRIM)
1779  *
1780  * If the errlist is empty, then return value will be:
1781  *	- EINVAL if one or more arguments was invalid
1782  *	- Other spa_open failures
1783  *	- 0 if the operation succeeded
1784  */
1785 int
1786 lzc_trim(const char *poolname, pool_trim_func_t cmd_type, uint64_t rate,
1787     boolean_t secure, nvlist_t *vdevs, nvlist_t **errlist)
1788 {
1789 	int error;
1790 
1791 	nvlist_t *args = fnvlist_alloc();
1792 	fnvlist_add_uint64(args, ZPOOL_TRIM_COMMAND, (uint64_t)cmd_type);
1793 	fnvlist_add_nvlist(args, ZPOOL_TRIM_VDEVS, vdevs);
1794 	fnvlist_add_uint64(args, ZPOOL_TRIM_RATE, rate);
1795 	fnvlist_add_boolean_value(args, ZPOOL_TRIM_SECURE, secure);
1796 
1797 	error = lzc_ioctl(ZFS_IOC_POOL_TRIM, poolname, args, errlist);
1798 
1799 	fnvlist_free(args);
1800 
1801 	return (error);
1802 }
1803 
1804 /*
1805  * Create a redaction bookmark named bookname by redacting snapshot with respect
1806  * to all the snapshots in snapnv.
1807  */
1808 int
1809 lzc_redact(const char *snapshot, const char *bookname, nvlist_t *snapnv)
1810 {
1811 	nvlist_t *args = fnvlist_alloc();
1812 	fnvlist_add_string(args, "bookname", bookname);
1813 	fnvlist_add_nvlist(args, "snapnv", snapnv);
1814 	int error = lzc_ioctl(ZFS_IOC_REDACT, snapshot, args, NULL);
1815 	fnvlist_free(args);
1816 	return (error);
1817 }
1818 
1819 static int
1820 wait_common(const char *pool, zpool_wait_activity_t activity, boolean_t use_tag,
1821     uint64_t tag, boolean_t *waited)
1822 {
1823 	nvlist_t *args = fnvlist_alloc();
1824 	nvlist_t *result = NULL;
1825 
1826 	fnvlist_add_int32(args, ZPOOL_WAIT_ACTIVITY, activity);
1827 	if (use_tag)
1828 		fnvlist_add_uint64(args, ZPOOL_WAIT_TAG, tag);
1829 
1830 	int error = lzc_ioctl(ZFS_IOC_WAIT, pool, args, &result);
1831 
1832 	if (error == 0 && waited != NULL)
1833 		*waited = fnvlist_lookup_boolean_value(result,
1834 		    ZPOOL_WAIT_WAITED);
1835 
1836 	fnvlist_free(args);
1837 	fnvlist_free(result);
1838 
1839 	return (error);
1840 }
1841 
1842 int
1843 lzc_wait(const char *pool, zpool_wait_activity_t activity, boolean_t *waited)
1844 {
1845 	return (wait_common(pool, activity, B_FALSE, 0, waited));
1846 }
1847 
1848 int
1849 lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag,
1850     boolean_t *waited)
1851 {
1852 	return (wait_common(pool, activity, B_TRUE, tag, waited));
1853 }
1854 
1855 int
1856 lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited)
1857 {
1858 	nvlist_t *args = fnvlist_alloc();
1859 	nvlist_t *result = NULL;
1860 
1861 	fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity);
1862 
1863 	int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result);
1864 
1865 	if (error == 0 && waited != NULL)
1866 		*waited = fnvlist_lookup_boolean_value(result,
1867 		    ZFS_WAIT_WAITED);
1868 
1869 	fnvlist_free(args);
1870 	fnvlist_free(result);
1871 
1872 	return (error);
1873 }
1874 
1875 /*
1876  * Set the bootenv contents for the given pool.
1877  */
1878 int
1879 lzc_set_bootenv(const char *pool, const nvlist_t *env)
1880 {
1881 	return (lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, (nvlist_t *)env, NULL));
1882 }
1883 
1884 /*
1885  * Get the contents of the bootenv of the given pool.
1886  */
1887 int
1888 lzc_get_bootenv(const char *pool, nvlist_t **outnvl)
1889 {
1890 	return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl));
1891 }
1892