1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <assert.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <libdevinfo.h>
31 #include <libintl.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <strings.h>
35 #include <unistd.h>
36 #include <stddef.h>
37 #include <fcntl.h>
38 #include <sys/mount.h>
39 #include <sys/mntent.h>
40 #include <sys/mnttab.h>
41 #include <sys/avl.h>
42 #include <stddef.h>
43 
44 #include <libzfs.h>
45 
46 #include "zfs_namecheck.h"
47 #include "zfs_prop.h"
48 #include "libzfs_impl.h"
49 
50 #include <fletcher.c> /* XXX */
51 
52 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t,
53     int, avl_tree_t *, char **);
54 
55 /*
56  * Routines for dealing with the AVL tree of fs-nvlists
57  */
58 typedef struct fsavl_node {
59 	avl_node_t fn_node;
60 	nvlist_t *fn_nvfs;
61 	char *fn_snapname;
62 	uint64_t fn_guid;
63 } fsavl_node_t;
64 
65 static int
66 fsavl_compare(const void *arg1, const void *arg2)
67 {
68 	const fsavl_node_t *fn1 = arg1;
69 	const fsavl_node_t *fn2 = arg2;
70 
71 	if (fn1->fn_guid > fn2->fn_guid)
72 		return (+1);
73 	else if (fn1->fn_guid < fn2->fn_guid)
74 		return (-1);
75 	else
76 		return (0);
77 }
78 
79 /*
80  * Given the GUID of a snapshot, find its containing filesystem and
81  * (optionally) name.
82  */
83 static nvlist_t *
84 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
85 {
86 	fsavl_node_t fn_find;
87 	fsavl_node_t *fn;
88 
89 	fn_find.fn_guid = snapguid;
90 
91 	fn = avl_find(avl, &fn_find, NULL);
92 	if (fn) {
93 		if (snapname)
94 			*snapname = fn->fn_snapname;
95 		return (fn->fn_nvfs);
96 	}
97 	return (NULL);
98 }
99 
100 static void
101 fsavl_destroy(avl_tree_t *avl)
102 {
103 	fsavl_node_t *fn;
104 	void *cookie;
105 
106 	if (avl == NULL)
107 		return;
108 
109 	cookie = NULL;
110 	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
111 		free(fn);
112 	avl_destroy(avl);
113 	free(avl);
114 }
115 
116 /*
117  * Given an nvlist, produce an avl tree of snapshots, ordered by guid
118  */
119 static avl_tree_t *
120 fsavl_create(nvlist_t *fss)
121 {
122 	avl_tree_t *fsavl;
123 	nvpair_t *fselem = NULL;
124 
125 	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
126 		return (NULL);
127 
128 	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
129 	    offsetof(fsavl_node_t, fn_node));
130 
131 	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
132 		nvlist_t *nvfs, *snaps;
133 		nvpair_t *snapelem = NULL;
134 
135 		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
136 		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
137 
138 		while ((snapelem =
139 		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
140 			fsavl_node_t *fn;
141 			uint64_t guid;
142 
143 			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
144 			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
145 				fsavl_destroy(fsavl);
146 				return (NULL);
147 			}
148 			fn->fn_nvfs = nvfs;
149 			fn->fn_snapname = nvpair_name(snapelem);
150 			fn->fn_guid = guid;
151 
152 			/*
153 			 * Note: if there are multiple snaps with the
154 			 * same GUID, we ignore all but one.
155 			 */
156 			if (avl_find(fsavl, fn, NULL) == NULL)
157 				avl_add(fsavl, fn);
158 			else
159 				free(fn);
160 		}
161 	}
162 
163 	return (fsavl);
164 }
165 
166 /*
167  * Routines for dealing with the giant nvlist of fs-nvlists, etc.
168  */
169 typedef struct send_data {
170 	uint64_t parent_fromsnap_guid;
171 	nvlist_t *parent_snaps;
172 	nvlist_t *fss;
173 	nvlist_t *snapprops;
174 	const char *fromsnap;
175 	const char *tosnap;
176 
177 	/*
178 	 * The header nvlist is of the following format:
179 	 * {
180 	 *   "tosnap" -> string
181 	 *   "fromsnap" -> string (if incremental)
182 	 *   "fss" -> {
183 	 *	id -> {
184 	 *
185 	 *	 "name" -> string (full name; for debugging)
186 	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
187 	 *
188 	 *	 "props" -> { name -> value (only if set here) }
189 	 *	 "snaps" -> { name (lastname) -> number (guid) }
190 	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
191 	 *
192 	 *	 "origin" -> number (guid) (if clone)
193 	 *	 "sent" -> boolean (not on-disk)
194 	 *	}
195 	 *   }
196 	 * }
197 	 *
198 	 */
199 } send_data_t;
200 
201 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
202 
203 static int
204 send_iterate_snap(zfs_handle_t *zhp, void *arg)
205 {
206 	send_data_t *sd = arg;
207 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
208 	char *snapname;
209 	nvlist_t *nv;
210 
211 	snapname = strrchr(zhp->zfs_name, '@')+1;
212 
213 	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
214 	/*
215 	 * NB: if there is no fromsnap here (it's a newly created fs in
216 	 * an incremental replication), we will substitute the tosnap.
217 	 */
218 	if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
219 	    (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
220 	    strcmp(snapname, sd->tosnap) == 0)) {
221 		sd->parent_fromsnap_guid = guid;
222 	}
223 
224 	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
225 	send_iterate_prop(zhp, nv);
226 	VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
227 	nvlist_free(nv);
228 
229 	zfs_close(zhp);
230 	return (0);
231 }
232 
233 static void
234 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
235 {
236 	nvpair_t *elem = NULL;
237 
238 	while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
239 		char *propname = nvpair_name(elem);
240 		zfs_prop_t prop = zfs_name_to_prop(propname);
241 		nvlist_t *propnv;
242 
243 		assert(zfs_prop_user(propname) || prop != ZPROP_INVAL);
244 
245 		if (!zfs_prop_user(propname) && zfs_prop_readonly(prop))
246 			continue;
247 
248 		verify(nvpair_value_nvlist(elem, &propnv) == 0);
249 		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
250 		    prop == ZFS_PROP_REFQUOTA ||
251 		    prop == ZFS_PROP_REFRESERVATION) {
252 			/* these guys are modifyable, but have no source */
253 			uint64_t value;
254 			verify(nvlist_lookup_uint64(propnv,
255 			    ZPROP_VALUE, &value) == 0);
256 			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
257 				continue;
258 		} else {
259 			char *source;
260 			if (nvlist_lookup_string(propnv,
261 			    ZPROP_SOURCE, &source) != 0)
262 				continue;
263 			if (strcmp(source, zhp->zfs_name) != 0)
264 				continue;
265 		}
266 
267 		if (zfs_prop_user(propname) ||
268 		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
269 			char *value;
270 			verify(nvlist_lookup_string(propnv,
271 			    ZPROP_VALUE, &value) == 0);
272 			VERIFY(0 == nvlist_add_string(nv, propname, value));
273 		} else {
274 			uint64_t value;
275 			verify(nvlist_lookup_uint64(propnv,
276 			    ZPROP_VALUE, &value) == 0);
277 			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
278 		}
279 	}
280 }
281 
282 /*
283  * recursively generate nvlists describing datasets.  See comment
284  * for the data structure send_data_t above for description of contents
285  * of the nvlist.
286  */
287 static int
288 send_iterate_fs(zfs_handle_t *zhp, void *arg)
289 {
290 	send_data_t *sd = arg;
291 	nvlist_t *nvfs, *nv;
292 	int rv;
293 	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
294 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
295 	char guidstring[64];
296 
297 	VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
298 	VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
299 	VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
300 	    sd->parent_fromsnap_guid));
301 
302 	if (zhp->zfs_dmustats.dds_origin[0]) {
303 		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
304 		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
305 		if (origin == NULL)
306 			return (-1);
307 		VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
308 		    origin->zfs_dmustats.dds_guid));
309 	}
310 
311 	/* iterate over props */
312 	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
313 	send_iterate_prop(zhp, nv);
314 	VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
315 	nvlist_free(nv);
316 
317 	/* iterate over snaps, and set sd->parent_fromsnap_guid */
318 	sd->parent_fromsnap_guid = 0;
319 	VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
320 	VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
321 	(void) zfs_iter_snapshots(zhp, send_iterate_snap, sd);
322 	VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
323 	VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
324 	nvlist_free(sd->parent_snaps);
325 	nvlist_free(sd->snapprops);
326 
327 	/* add this fs to nvlist */
328 	(void) snprintf(guidstring, sizeof (guidstring),
329 	    "0x%llx", (longlong_t)guid);
330 	VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
331 	nvlist_free(nvfs);
332 
333 	/* iterate over children */
334 	rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
335 
336 	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
337 
338 	zfs_close(zhp);
339 	return (rv);
340 }
341 
342 static int
343 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
344     const char *tosnap, nvlist_t **nvlp, avl_tree_t **avlp)
345 {
346 	zfs_handle_t *zhp;
347 	send_data_t sd = { 0 };
348 	int error;
349 
350 	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
351 	if (zhp == NULL)
352 		return (EZFS_BADTYPE);
353 
354 	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
355 	sd.fromsnap = fromsnap;
356 	sd.tosnap = tosnap;
357 
358 	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
359 		nvlist_free(sd.fss);
360 		if (avlp != NULL)
361 			*avlp = NULL;
362 		*nvlp = NULL;
363 		return (error);
364 	}
365 
366 	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
367 		nvlist_free(sd.fss);
368 		*nvlp = NULL;
369 		return (EZFS_NOMEM);
370 	}
371 
372 	*nvlp = sd.fss;
373 	return (0);
374 }
375 
376 /*
377  * Routines for dealing with the sorted snapshot functionality
378  */
379 typedef struct zfs_node {
380 	zfs_handle_t	*zn_handle;
381 	avl_node_t	zn_avlnode;
382 } zfs_node_t;
383 
384 static int
385 zfs_sort_snaps(zfs_handle_t *zhp, void *data)
386 {
387 	avl_tree_t *avl = data;
388 	zfs_node_t *node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
389 
390 	node->zn_handle = zhp;
391 	avl_add(avl, node);
392 	return (0);
393 }
394 
395 /* ARGSUSED */
396 static int
397 zfs_snapshot_compare(const void *larg, const void *rarg)
398 {
399 	zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
400 	zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
401 	uint64_t lcreate, rcreate;
402 
403 	/*
404 	 * Sort them according to creation time.  We use the hidden
405 	 * CREATETXG property to get an absolute ordering of snapshots.
406 	 */
407 	lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
408 	rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
409 
410 	if (lcreate < rcreate)
411 		return (-1);
412 	else if (lcreate > rcreate)
413 		return (+1);
414 	else
415 		return (0);
416 }
417 
418 static int
419 zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data)
420 {
421 	int ret = 0;
422 	zfs_node_t *node;
423 	avl_tree_t avl;
424 	void *cookie = NULL;
425 
426 	avl_create(&avl, zfs_snapshot_compare,
427 	    sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode));
428 
429 	ret = zfs_iter_snapshots(zhp, zfs_sort_snaps, &avl);
430 
431 	for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node))
432 		ret |= callback(node->zn_handle, data);
433 
434 	while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL)
435 		free(node);
436 
437 	avl_destroy(&avl);
438 
439 	return (ret);
440 }
441 
442 /*
443  * Routines specific to "zfs send"
444  */
445 typedef struct send_dump_data {
446 	/* these are all just the short snapname (the part after the @) */
447 	const char *fromsnap;
448 	const char *tosnap;
449 	char lastsnap[ZFS_MAXNAMELEN];
450 	boolean_t seenfrom, seento, replicate, doall, fromorigin;
451 	boolean_t verbose;
452 	int outfd;
453 	boolean_t err;
454 	nvlist_t *fss;
455 	avl_tree_t *fsavl;
456 } send_dump_data_t;
457 
458 /*
459  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
460  * NULL) to the file descriptor specified by outfd.
461  */
462 static int
463 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
464     int outfd)
465 {
466 	zfs_cmd_t zc = { 0 };
467 	libzfs_handle_t *hdl = zhp->zfs_hdl;
468 
469 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
470 	assert(fromsnap == NULL || fromsnap[0] == '\0' || !fromorigin);
471 
472 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
473 	if (fromsnap)
474 		(void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_value));
475 	zc.zc_cookie = outfd;
476 	zc.zc_obj = fromorigin;
477 
478 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) {
479 		char errbuf[1024];
480 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
481 		    "warning: cannot send '%s'"), zhp->zfs_name);
482 
483 		switch (errno) {
484 
485 		case EXDEV:
486 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
487 			    "not an earlier snapshot from the same fs"));
488 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
489 
490 		case ENOENT:
491 			if (zfs_dataset_exists(hdl, zc.zc_name,
492 			    ZFS_TYPE_SNAPSHOT)) {
493 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
494 				    "incremental source (@%s) does not exist"),
495 				    zc.zc_value);
496 			}
497 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
498 
499 		case EDQUOT:
500 		case EFBIG:
501 		case EIO:
502 		case ENOLINK:
503 		case ENOSPC:
504 		case ENOSTR:
505 		case ENXIO:
506 		case EPIPE:
507 		case ERANGE:
508 		case EFAULT:
509 		case EROFS:
510 			zfs_error_aux(hdl, strerror(errno));
511 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
512 
513 		default:
514 			return (zfs_standard_error(hdl, errno, errbuf));
515 		}
516 	}
517 
518 	return (0);
519 }
520 
521 static int
522 dump_snapshot(zfs_handle_t *zhp, void *arg)
523 {
524 	send_dump_data_t *sdd = arg;
525 	const char *thissnap;
526 	int err;
527 
528 	thissnap = strchr(zhp->zfs_name, '@') + 1;
529 
530 	if (sdd->fromsnap && !sdd->seenfrom &&
531 	    strcmp(sdd->fromsnap, thissnap) == 0) {
532 		sdd->seenfrom = B_TRUE;
533 		(void) strcpy(sdd->lastsnap, thissnap);
534 		zfs_close(zhp);
535 		return (0);
536 	}
537 
538 	if (sdd->seento || !sdd->seenfrom) {
539 		zfs_close(zhp);
540 		return (0);
541 	}
542 
543 	/* send it */
544 	if (sdd->verbose) {
545 		(void) fprintf(stderr, "sending from @%s to %s\n",
546 		    sdd->lastsnap, zhp->zfs_name);
547 	}
548 
549 	err = dump_ioctl(zhp, sdd->lastsnap,
550 	    sdd->lastsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
551 	    sdd->outfd);
552 
553 	if (!sdd->seento && strcmp(sdd->tosnap, thissnap) == 0)
554 		sdd->seento = B_TRUE;
555 
556 	(void) strcpy(sdd->lastsnap, thissnap);
557 	zfs_close(zhp);
558 	return (err);
559 }
560 
561 static int
562 dump_filesystem(zfs_handle_t *zhp, void *arg)
563 {
564 	int rv = 0;
565 	send_dump_data_t *sdd = arg;
566 	boolean_t missingfrom = B_FALSE;
567 	zfs_cmd_t zc = { 0 };
568 
569 	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
570 	    zhp->zfs_name, sdd->tosnap);
571 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
572 		(void) fprintf(stderr, "WARNING: "
573 		    "could not send %s@%s: does not exist\n",
574 		    zhp->zfs_name, sdd->tosnap);
575 		sdd->err = B_TRUE;
576 		return (0);
577 	}
578 
579 	if (sdd->replicate && sdd->fromsnap) {
580 		/*
581 		 * If this fs does not have fromsnap, and we're doing
582 		 * recursive, we need to send a full stream from the
583 		 * beginning (or an incremental from the origin if this
584 		 * is a clone).  If we're doing non-recursive, then let
585 		 * them get the error.
586 		 */
587 		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
588 		    zhp->zfs_name, sdd->fromsnap);
589 		if (ioctl(zhp->zfs_hdl->libzfs_fd,
590 		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
591 			missingfrom = B_TRUE;
592 		}
593 	}
594 
595 	if (sdd->doall) {
596 		sdd->seenfrom = sdd->seento = sdd->lastsnap[0] = 0;
597 		if (sdd->fromsnap == NULL || missingfrom)
598 			sdd->seenfrom = B_TRUE;
599 
600 		rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
601 		if (!sdd->seenfrom) {
602 			(void) fprintf(stderr,
603 			    "WARNING: could not send %s@%s:\n"
604 			    "incremental source (%s@%s) does not exist\n",
605 			    zhp->zfs_name, sdd->tosnap,
606 			    zhp->zfs_name, sdd->fromsnap);
607 			sdd->err = B_TRUE;
608 		} else if (!sdd->seento) {
609 			if (sdd->fromsnap) {
610 				(void) fprintf(stderr,
611 				    "WARNING: could not send %s@%s:\n"
612 				    "incremental source (%s@%s) "
613 				    "is not earlier than it\n",
614 				    zhp->zfs_name, sdd->tosnap,
615 				    zhp->zfs_name, sdd->fromsnap);
616 			} else {
617 				(void) fprintf(stderr, "WARNING: "
618 				    "could not send %s@%s: does not exist\n",
619 				    zhp->zfs_name, sdd->tosnap);
620 			}
621 			sdd->err = B_TRUE;
622 		}
623 	} else {
624 		zfs_handle_t *snapzhp;
625 		char snapname[ZFS_MAXNAMELEN];
626 
627 		(void) snprintf(snapname, sizeof (snapname), "%s@%s",
628 		    zfs_get_name(zhp), sdd->tosnap);
629 		snapzhp = zfs_open(zhp->zfs_hdl, snapname, ZFS_TYPE_SNAPSHOT);
630 		if (snapzhp == NULL) {
631 			rv = -1;
632 		} else {
633 			rv = dump_ioctl(snapzhp,
634 			    missingfrom ? NULL : sdd->fromsnap,
635 			    sdd->fromorigin || missingfrom,
636 			    sdd->outfd);
637 			sdd->seento = B_TRUE;
638 			zfs_close(snapzhp);
639 		}
640 	}
641 
642 	return (rv);
643 }
644 
645 static int
646 dump_filesystems(zfs_handle_t *rzhp, void *arg)
647 {
648 	send_dump_data_t *sdd = arg;
649 	nvpair_t *fspair;
650 	boolean_t needagain, progress;
651 
652 	if (!sdd->replicate)
653 		return (dump_filesystem(rzhp, sdd));
654 
655 again:
656 	needagain = progress = B_FALSE;
657 	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
658 	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
659 		nvlist_t *fslist;
660 		char *fsname;
661 		zfs_handle_t *zhp;
662 		int err;
663 		uint64_t origin_guid = 0;
664 		nvlist_t *origin_nv;
665 
666 		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
667 		if (nvlist_lookup_boolean(fslist, "sent") == 0)
668 			continue;
669 
670 		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
671 		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
672 
673 		origin_nv = fsavl_find(sdd->fsavl, origin_guid, NULL);
674 		if (origin_nv &&
675 		    nvlist_lookup_boolean(origin_nv, "sent") == ENOENT) {
676 			/*
677 			 * origin has not been sent yet;
678 			 * skip this clone.
679 			 */
680 			needagain = B_TRUE;
681 			continue;
682 		}
683 
684 		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
685 		if (zhp == NULL)
686 			return (-1);
687 		err = dump_filesystem(zhp, sdd);
688 		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
689 		progress = B_TRUE;
690 		zfs_close(zhp);
691 		if (err)
692 			return (err);
693 	}
694 	if (needagain) {
695 		assert(progress);
696 		goto again;
697 	}
698 	return (0);
699 }
700 
701 /*
702  * Generate a send stream for the dataset identified by the argument zhp.
703  *
704  * The content of the send stream is the snapshot identified by
705  * 'tosnap'.  Incremental streams are requested in two ways:
706  *     - from the snapshot identified by "fromsnap" (if non-null) or
707  *     - from the origin of the dataset identified by zhp, which must
708  *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
709  *	 is TRUE.
710  *
711  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
712  * uses a special header (with a version field of DMU_BACKUP_HEADER_VERSION)
713  * if "replicate" is set.  If "doall" is set, dump all the intermediate
714  * snapshots. The DMU_BACKUP_HEADER_VERSION header is used in the "doall"
715  * case too.
716  */
717 int
718 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
719     boolean_t replicate, boolean_t doall, boolean_t fromorigin,
720     boolean_t verbose, int outfd)
721 {
722 	char errbuf[1024];
723 	send_dump_data_t sdd = { 0 };
724 	int err;
725 	nvlist_t *fss = NULL;
726 	avl_tree_t *fsavl = NULL;
727 
728 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
729 	    "cannot send '%s'"), zhp->zfs_name);
730 
731 	if (fromsnap && fromsnap[0] == '\0') {
732 		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
733 		    "zero-length incremental source"));
734 		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
735 	}
736 
737 	if (replicate || doall) {
738 		dmu_replay_record_t drr = { 0 };
739 		char *packbuf = NULL;
740 		size_t buflen = 0;
741 		zio_cksum_t zc = { 0 };
742 
743 		assert(fromsnap || doall);
744 
745 		if (replicate) {
746 			nvlist_t *hdrnv;
747 
748 			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
749 			if (fromsnap) {
750 				VERIFY(0 == nvlist_add_string(hdrnv,
751 				    "fromsnap", fromsnap));
752 			}
753 			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
754 
755 			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
756 			    fromsnap, tosnap, &fss, &fsavl);
757 			if (err)
758 				return (err);
759 			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
760 			err = nvlist_pack(hdrnv, &packbuf, &buflen,
761 			    NV_ENCODE_XDR, 0);
762 			nvlist_free(hdrnv);
763 			if (err) {
764 				fsavl_destroy(fsavl);
765 				nvlist_free(fss);
766 				return (zfs_standard_error(zhp->zfs_hdl,
767 				    err, errbuf));
768 			}
769 		}
770 
771 		/* write first begin record */
772 		drr.drr_type = DRR_BEGIN;
773 		drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
774 		drr.drr_u.drr_begin.drr_version = DMU_BACKUP_HEADER_VERSION;
775 		(void) snprintf(drr.drr_u.drr_begin.drr_toname,
776 		    sizeof (drr.drr_u.drr_begin.drr_toname),
777 		    "%s@%s", zhp->zfs_name, tosnap);
778 		drr.drr_payloadlen = buflen;
779 		fletcher_4_incremental_native(&drr, sizeof (drr), &zc);
780 		err = write(outfd, &drr, sizeof (drr));
781 
782 		/* write header nvlist */
783 		if (err != -1) {
784 			fletcher_4_incremental_native(packbuf, buflen, &zc);
785 			err = write(outfd, packbuf, buflen);
786 		}
787 		free(packbuf);
788 		if (err == -1) {
789 			fsavl_destroy(fsavl);
790 			nvlist_free(fss);
791 			return (zfs_standard_error(zhp->zfs_hdl,
792 			    errno, errbuf));
793 		}
794 
795 		/* write end record */
796 		if (err != -1) {
797 			bzero(&drr, sizeof (drr));
798 			drr.drr_type = DRR_END;
799 			drr.drr_u.drr_end.drr_checksum = zc;
800 			err = write(outfd, &drr, sizeof (drr));
801 			if (err == -1) {
802 				fsavl_destroy(fsavl);
803 				nvlist_free(fss);
804 				return (zfs_standard_error(zhp->zfs_hdl,
805 				    errno, errbuf));
806 			}
807 		}
808 	}
809 
810 	/* dump each stream */
811 	sdd.fromsnap = fromsnap;
812 	sdd.tosnap = tosnap;
813 	sdd.outfd = outfd;
814 	sdd.replicate = replicate;
815 	sdd.doall = doall;
816 	sdd.fromorigin = fromorigin;
817 	sdd.fss = fss;
818 	sdd.fsavl = fsavl;
819 	sdd.verbose = verbose;
820 	err = dump_filesystems(zhp, &sdd);
821 	fsavl_destroy(fsavl);
822 	nvlist_free(fss);
823 
824 	if (replicate || doall) {
825 		/*
826 		 * write final end record.  NB: want to do this even if
827 		 * there was some error, because it might not be totally
828 		 * failed.
829 		 */
830 		dmu_replay_record_t drr = { 0 };
831 		drr.drr_type = DRR_END;
832 		if (write(outfd, &drr, sizeof (drr)) == -1) {
833 			return (zfs_standard_error(zhp->zfs_hdl,
834 			    errno, errbuf));
835 		}
836 	}
837 
838 	return (err || sdd.err);
839 }
840 
841 /*
842  * Routines specific to "zfs recv"
843  */
844 
845 static int
846 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
847     boolean_t byteswap, zio_cksum_t *zc)
848 {
849 	char *cp = buf;
850 	int rv;
851 	int len = ilen;
852 
853 	do {
854 		rv = read(fd, cp, len);
855 		cp += rv;
856 		len -= rv;
857 	} while (rv > 0);
858 
859 	if (rv < 0 || len != 0) {
860 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
861 		    "failed to read from stream"));
862 		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
863 		    "cannot receive")));
864 	}
865 
866 	if (zc) {
867 		if (byteswap)
868 			fletcher_4_incremental_byteswap(buf, ilen, zc);
869 		else
870 			fletcher_4_incremental_native(buf, ilen, zc);
871 	}
872 	return (0);
873 }
874 
875 static int
876 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
877     boolean_t byteswap, zio_cksum_t *zc)
878 {
879 	char *buf;
880 	int err;
881 
882 	buf = zfs_alloc(hdl, len);
883 	if (buf == NULL)
884 		return (ENOMEM);
885 
886 	err = recv_read(hdl, fd, buf, len, byteswap, zc);
887 	if (err != 0) {
888 		free(buf);
889 		return (err);
890 	}
891 
892 	err = nvlist_unpack(buf, len, nvp, 0);
893 	free(buf);
894 	if (err != 0) {
895 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
896 		    "stream (malformed nvlist)"));
897 		return (EINVAL);
898 	}
899 	return (0);
900 }
901 
902 static int
903 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
904     int baselen, char *newname, recvflags_t flags)
905 {
906 	static int seq;
907 	zfs_cmd_t zc = { 0 };
908 	int err;
909 	prop_changelist_t *clp;
910 	zfs_handle_t *zhp;
911 
912 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
913 	if (zhp == NULL)
914 		return (-1);
915 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
916 	    flags.force ? MS_FORCE : 0);
917 	zfs_close(zhp);
918 	if (clp == NULL)
919 		return (-1);
920 	err = changelist_prefix(clp);
921 	if (err)
922 		return (err);
923 
924 	if (tryname) {
925 		(void) strcpy(newname, tryname);
926 
927 		zc.zc_objset_type = DMU_OST_ZFS;
928 		(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
929 		(void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
930 
931 		if (flags.verbose) {
932 			(void) printf("attempting rename %s to %s\n",
933 			    zc.zc_name, zc.zc_value);
934 		}
935 		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
936 		if (err == 0)
937 			changelist_rename(clp, name, tryname);
938 	} else {
939 		err = ENOENT;
940 	}
941 
942 	if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
943 		seq++;
944 
945 		(void) strncpy(newname, name, baselen);
946 		(void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
947 		    "recv-%u-%u", getpid(), seq);
948 		(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
949 
950 		if (flags.verbose) {
951 			(void) printf("failed - trying rename %s to %s\n",
952 			    zc.zc_name, zc.zc_value);
953 		}
954 		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
955 		if (err == 0)
956 			changelist_rename(clp, name, newname);
957 		if (err && flags.verbose) {
958 			(void) printf("failed (%u) - "
959 			    "will try again on next pass\n", errno);
960 		}
961 		err = EAGAIN;
962 	} else if (flags.verbose) {
963 		if (err == 0)
964 			(void) printf("success\n");
965 		else
966 			(void) printf("failed (%u)\n", errno);
967 	}
968 
969 	(void) changelist_postfix(clp);
970 	changelist_free(clp);
971 
972 	return (err);
973 }
974 
975 static int
976 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
977     char *newname, recvflags_t flags)
978 {
979 	zfs_cmd_t zc = { 0 };
980 	int err = 0;
981 	prop_changelist_t *clp;
982 	zfs_handle_t *zhp;
983 
984 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
985 	if (zhp == NULL)
986 		return (-1);
987 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
988 	    flags.force ? MS_FORCE : 0);
989 	zfs_close(zhp);
990 	if (clp == NULL)
991 		return (-1);
992 	err = changelist_prefix(clp);
993 	if (err)
994 		return (err);
995 
996 	zc.zc_objset_type = DMU_OST_ZFS;
997 	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
998 
999 	if (flags.verbose)
1000 		(void) printf("attempting destroy %s\n", zc.zc_name);
1001 	err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
1002 
1003 	if (err == 0) {
1004 		if (flags.verbose)
1005 			(void) printf("success\n");
1006 		changelist_remove(clp, zc.zc_name);
1007 	}
1008 
1009 	(void) changelist_postfix(clp);
1010 	changelist_free(clp);
1011 
1012 	if (err != 0)
1013 		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
1014 
1015 	return (err);
1016 }
1017 
1018 typedef struct guid_to_name_data {
1019 	uint64_t guid;
1020 	char *name;
1021 } guid_to_name_data_t;
1022 
1023 static int
1024 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
1025 {
1026 	guid_to_name_data_t *gtnd = arg;
1027 	int err;
1028 
1029 	if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
1030 		(void) strcpy(gtnd->name, zhp->zfs_name);
1031 		return (EEXIST);
1032 	}
1033 	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
1034 	zfs_close(zhp);
1035 	return (err);
1036 }
1037 
1038 static int
1039 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
1040     char *name)
1041 {
1042 	/* exhaustive search all local snapshots */
1043 	guid_to_name_data_t gtnd;
1044 	int err = 0;
1045 	zfs_handle_t *zhp;
1046 	char *cp;
1047 
1048 	gtnd.guid = guid;
1049 	gtnd.name = name;
1050 
1051 	if (strchr(parent, '@') == NULL) {
1052 		zhp = make_dataset_handle(hdl, parent);
1053 		if (zhp != NULL) {
1054 			err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1055 			zfs_close(zhp);
1056 			if (err == EEXIST)
1057 				return (0);
1058 		}
1059 	}
1060 
1061 	cp = strchr(parent, '/');
1062 	if (cp)
1063 		*cp = '\0';
1064 	zhp = make_dataset_handle(hdl, parent);
1065 	if (cp)
1066 		*cp = '/';
1067 
1068 	if (zhp) {
1069 		err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1070 		zfs_close(zhp);
1071 	}
1072 
1073 	return (err == EEXIST ? 0 : ENOENT);
1074 
1075 }
1076 
1077 /*
1078  * Return true if dataset guid1 is created before guid2.
1079  */
1080 static int
1081 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
1082     uint64_t guid1, uint64_t guid2)
1083 {
1084 	nvlist_t *nvfs;
1085 	char *fsname, *snapname;
1086 	char buf[ZFS_MAXNAMELEN];
1087 	int rv;
1088 	zfs_node_t zn1, zn2;
1089 
1090 	if (guid2 == 0)
1091 		return (0);
1092 	if (guid1 == 0)
1093 		return (1);
1094 
1095 	nvfs = fsavl_find(avl, guid1, &snapname);
1096 	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1097 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1098 	zn1.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1099 	if (zn1.zn_handle == NULL)
1100 		return (-1);
1101 
1102 	nvfs = fsavl_find(avl, guid2, &snapname);
1103 	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1104 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1105 	zn2.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1106 	if (zn2.zn_handle == NULL) {
1107 		zfs_close(zn2.zn_handle);
1108 		return (-1);
1109 	}
1110 
1111 	rv = (zfs_snapshot_compare(&zn1, &zn2) == -1);
1112 
1113 	zfs_close(zn1.zn_handle);
1114 	zfs_close(zn2.zn_handle);
1115 
1116 	return (rv);
1117 }
1118 
1119 static int
1120 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
1121     recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl)
1122 {
1123 	nvlist_t *local_nv;
1124 	avl_tree_t *local_avl;
1125 	nvpair_t *fselem, *nextfselem;
1126 	char *tosnap, *fromsnap;
1127 	char newname[ZFS_MAXNAMELEN];
1128 	int error;
1129 	boolean_t needagain, progress;
1130 
1131 	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
1132 	VERIFY(0 == nvlist_lookup_string(stream_nv, "tosnap", &tosnap));
1133 
1134 	if (flags.dryrun)
1135 		return (0);
1136 
1137 again:
1138 	needagain = progress = B_FALSE;
1139 
1140 	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
1141 	    &local_nv, &local_avl)) != 0)
1142 		return (error);
1143 
1144 	/*
1145 	 * Process deletes and renames
1146 	 */
1147 	for (fselem = nvlist_next_nvpair(local_nv, NULL);
1148 	    fselem; fselem = nextfselem) {
1149 		nvlist_t *nvfs, *snaps;
1150 		nvlist_t *stream_nvfs = NULL;
1151 		nvpair_t *snapelem, *nextsnapelem;
1152 		uint64_t fromguid = 0;
1153 		uint64_t originguid = 0;
1154 		uint64_t stream_originguid = 0;
1155 		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
1156 		char *fsname, *stream_fsname;
1157 
1158 		nextfselem = nvlist_next_nvpair(local_nv, fselem);
1159 
1160 		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
1161 		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
1162 		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1163 		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
1164 		    &parent_fromsnap_guid));
1165 		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
1166 
1167 		/*
1168 		 * First find the stream's fs, so we can check for
1169 		 * a different origin (due to "zfs promote")
1170 		 */
1171 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
1172 		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
1173 			uint64_t thisguid;
1174 
1175 			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1176 			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
1177 
1178 			if (stream_nvfs != NULL)
1179 				break;
1180 		}
1181 
1182 		/* check for promote */
1183 		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
1184 		    &stream_originguid);
1185 		if (stream_nvfs && originguid != stream_originguid) {
1186 			switch (created_before(hdl, local_avl,
1187 			    stream_originguid, originguid)) {
1188 			case 1: {
1189 				/* promote it! */
1190 				zfs_cmd_t zc = { 0 };
1191 				nvlist_t *origin_nvfs;
1192 				char *origin_fsname;
1193 
1194 				if (flags.verbose)
1195 					(void) printf("promoting %s\n", fsname);
1196 
1197 				origin_nvfs = fsavl_find(local_avl, originguid,
1198 				    NULL);
1199 				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
1200 				    "name", &origin_fsname));
1201 				(void) strlcpy(zc.zc_value, origin_fsname,
1202 				    sizeof (zc.zc_value));
1203 				(void) strlcpy(zc.zc_name, fsname,
1204 				    sizeof (zc.zc_name));
1205 				error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
1206 				if (error == 0)
1207 					progress = B_TRUE;
1208 				break;
1209 			}
1210 			default:
1211 				break;
1212 			case -1:
1213 				fsavl_destroy(local_avl);
1214 				nvlist_free(local_nv);
1215 				return (-1);
1216 			}
1217 			/*
1218 			 * We had/have the wrong origin, therefore our
1219 			 * list of snapshots is wrong.  Need to handle
1220 			 * them on the next pass.
1221 			 */
1222 			needagain = B_TRUE;
1223 			continue;
1224 		}
1225 
1226 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
1227 		    snapelem; snapelem = nextsnapelem) {
1228 			uint64_t thisguid;
1229 			char *stream_snapname;
1230 			nvlist_t *found, *props;
1231 
1232 			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
1233 
1234 			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1235 			found = fsavl_find(stream_avl, thisguid,
1236 			    &stream_snapname);
1237 
1238 			/* check for delete */
1239 			if (found == NULL) {
1240 				char name[ZFS_MAXNAMELEN];
1241 
1242 				if (!flags.force)
1243 					continue;
1244 
1245 				(void) snprintf(name, sizeof (name), "%s@%s",
1246 				    fsname, nvpair_name(snapelem));
1247 
1248 				error = recv_destroy(hdl, name,
1249 				    strlen(fsname)+1, newname, flags);
1250 				if (error)
1251 					needagain = B_TRUE;
1252 				else
1253 					progress = B_TRUE;
1254 				continue;
1255 			}
1256 
1257 			stream_nvfs = found;
1258 
1259 			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
1260 			    &props) && 0 == nvlist_lookup_nvlist(props,
1261 			    stream_snapname, &props)) {
1262 				zfs_cmd_t zc = { 0 };
1263 
1264 				zc.zc_cookie = B_TRUE; /* clear current props */
1265 				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
1266 				    "%s@%s", fsname, nvpair_name(snapelem));
1267 				if (zcmd_write_src_nvlist(hdl, &zc,
1268 				    props) == 0) {
1269 					(void) zfs_ioctl(hdl,
1270 					    ZFS_IOC_SET_PROP, &zc);
1271 					zcmd_free_nvlists(&zc);
1272 				}
1273 			}
1274 
1275 			/* check for different snapname */
1276 			if (strcmp(nvpair_name(snapelem),
1277 			    stream_snapname) != 0) {
1278 				char name[ZFS_MAXNAMELEN];
1279 				char tryname[ZFS_MAXNAMELEN];
1280 
1281 				(void) snprintf(name, sizeof (name), "%s@%s",
1282 				    fsname, nvpair_name(snapelem));
1283 				(void) snprintf(tryname, sizeof (name), "%s@%s",
1284 				    fsname, stream_snapname);
1285 
1286 				error = recv_rename(hdl, name, tryname,
1287 				    strlen(fsname)+1, newname, flags);
1288 				if (error)
1289 					needagain = B_TRUE;
1290 				else
1291 					progress = B_TRUE;
1292 			}
1293 
1294 			if (strcmp(stream_snapname, fromsnap) == 0)
1295 				fromguid = thisguid;
1296 		}
1297 
1298 		/* check for delete */
1299 		if (stream_nvfs == NULL) {
1300 			if (!flags.force)
1301 				continue;
1302 
1303 			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
1304 			    newname, flags);
1305 			if (error)
1306 				needagain = B_TRUE;
1307 			else
1308 				progress = B_TRUE;
1309 			continue;
1310 		}
1311 
1312 		if (fromguid == 0 && flags.verbose) {
1313 			(void) printf("local fs %s does not have fromsnap "
1314 			    "(%s in stream); must have been deleted locally; "
1315 			    "ignoring\n", fsname, fromsnap);
1316 			continue;
1317 		}
1318 
1319 		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
1320 		    "name", &stream_fsname));
1321 		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
1322 		    "parentfromsnap", &stream_parent_fromsnap_guid));
1323 
1324 		/* check for rename */
1325 		if ((stream_parent_fromsnap_guid != 0 &&
1326 		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
1327 		    strcmp(strrchr(fsname, '/'),
1328 		    strrchr(stream_fsname, '/')) != 0) {
1329 			nvlist_t *parent;
1330 			char tryname[ZFS_MAXNAMELEN];
1331 
1332 			parent = fsavl_find(local_avl,
1333 			    stream_parent_fromsnap_guid, NULL);
1334 			/*
1335 			 * NB: parent might not be found if we used the
1336 			 * tosnap for stream_parent_fromsnap_guid,
1337 			 * because the parent is a newly-created fs;
1338 			 * we'll be able to rename it after we recv the
1339 			 * new fs.
1340 			 */
1341 			if (parent != NULL) {
1342 				char *pname;
1343 
1344 				VERIFY(0 == nvlist_lookup_string(parent, "name",
1345 				    &pname));
1346 				(void) snprintf(tryname, sizeof (tryname),
1347 				    "%s%s", pname, strrchr(stream_fsname, '/'));
1348 			} else {
1349 				tryname[0] = '\0';
1350 				if (flags.verbose) {
1351 					(void) printf("local fs %s new parent "
1352 					    "not found\n", fsname);
1353 				}
1354 			}
1355 
1356 			error = recv_rename(hdl, fsname, tryname,
1357 			    strlen(tofs)+1, newname, flags);
1358 			if (error)
1359 				needagain = B_TRUE;
1360 			else
1361 				progress = B_TRUE;
1362 		}
1363 	}
1364 
1365 	fsavl_destroy(local_avl);
1366 	nvlist_free(local_nv);
1367 
1368 	if (needagain && progress) {
1369 		/* do another pass to fix up temporary names */
1370 		if (flags.verbose)
1371 			(void) printf("another pass:\n");
1372 		goto again;
1373 	}
1374 
1375 	return (needagain);
1376 }
1377 
1378 static int
1379 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
1380     recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
1381     char **top_zfs)
1382 {
1383 	nvlist_t *stream_nv = NULL;
1384 	avl_tree_t *stream_avl = NULL;
1385 	char *fromsnap = NULL;
1386 	char tofs[ZFS_MAXNAMELEN];
1387 	char errbuf[1024];
1388 	dmu_replay_record_t drre;
1389 	int error;
1390 	boolean_t anyerr = B_FALSE;
1391 	boolean_t softerr = B_FALSE;
1392 
1393 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1394 	    "cannot receive"));
1395 
1396 	if (strchr(destname, '@')) {
1397 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1398 		    "can not specify snapshot name for multi-snapshot stream"));
1399 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
1400 	}
1401 
1402 	assert(drr->drr_type == DRR_BEGIN);
1403 	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
1404 	assert(drr->drr_u.drr_begin.drr_version == DMU_BACKUP_HEADER_VERSION);
1405 
1406 	/*
1407 	 * Read in the nvlist from the stream.
1408 	 */
1409 	if (drr->drr_payloadlen != 0) {
1410 		if (!flags.isprefix) {
1411 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1412 			    "must use -d to receive replication "
1413 			    "(send -R) stream"));
1414 			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
1415 		}
1416 
1417 		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
1418 		    &stream_nv, flags.byteswap, zc);
1419 		if (error) {
1420 			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1421 			goto out;
1422 		}
1423 	}
1424 
1425 	/*
1426 	 * Read in the end record and verify checksum.
1427 	 */
1428 	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
1429 	    flags.byteswap, NULL)))
1430 		goto out;
1431 	if (flags.byteswap) {
1432 		drre.drr_type = BSWAP_32(drre.drr_type);
1433 		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
1434 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
1435 		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
1436 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
1437 		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
1438 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
1439 		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
1440 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
1441 	}
1442 	if (drre.drr_type != DRR_END) {
1443 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1444 		goto out;
1445 	}
1446 	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
1447 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1448 		    "incorrect header checksum"));
1449 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1450 		goto out;
1451 	}
1452 
1453 	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
1454 
1455 	if (drr->drr_payloadlen != 0) {
1456 		nvlist_t *stream_fss;
1457 
1458 		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
1459 		    &stream_fss));
1460 		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
1461 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1462 			    "couldn't allocate avl tree"));
1463 			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
1464 			goto out;
1465 		}
1466 
1467 		if (fromsnap != NULL) {
1468 			(void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
1469 			if (flags.isprefix) {
1470 				int i = strcspn(drr->drr_u.drr_begin.drr_toname,
1471 				    "/@");
1472 				/* zfs_receive_one() will create_parents() */
1473 				(void) strlcat(tofs,
1474 				    &drr->drr_u.drr_begin.drr_toname[i],
1475 				    ZFS_MAXNAMELEN);
1476 				*strchr(tofs, '@') = '\0';
1477 			}
1478 			softerr = recv_incremental_replication(hdl, tofs,
1479 			    flags, stream_nv, stream_avl);
1480 		}
1481 	}
1482 
1483 
1484 	/* Finally, receive each contained stream */
1485 	do {
1486 		/*
1487 		 * we should figure out if it has a recoverable
1488 		 * error, in which case do a recv_skip() and drive on.
1489 		 * Note, if we fail due to already having this guid,
1490 		 * zfs_receive_one() will take care of it (ie,
1491 		 * recv_skip() and return 0).
1492 		 */
1493 		error = zfs_receive_impl(hdl, destname, flags, fd,
1494 		    stream_avl, top_zfs);
1495 		if (error == ENODATA) {
1496 			error = 0;
1497 			break;
1498 		}
1499 		anyerr |= error;
1500 	} while (error == 0);
1501 
1502 	if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
1503 		/*
1504 		 * Now that we have the fs's they sent us, try the
1505 		 * renames again.
1506 		 */
1507 		softerr = recv_incremental_replication(hdl, tofs, flags,
1508 		    stream_nv, stream_avl);
1509 	}
1510 
1511 out:
1512 	fsavl_destroy(stream_avl);
1513 	if (stream_nv)
1514 		nvlist_free(stream_nv);
1515 	if (softerr)
1516 		error = -2;
1517 	if (anyerr)
1518 		error = -1;
1519 	return (error);
1520 }
1521 
1522 static int
1523 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
1524 {
1525 	dmu_replay_record_t *drr;
1526 	void *buf = malloc(1<<20);
1527 
1528 	/* XXX would be great to use lseek if possible... */
1529 	drr = buf;
1530 
1531 	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
1532 	    byteswap, NULL) == 0) {
1533 		if (byteswap)
1534 			drr->drr_type = BSWAP_32(drr->drr_type);
1535 
1536 		switch (drr->drr_type) {
1537 		case DRR_BEGIN:
1538 			/* NB: not to be used on v2 stream packages */
1539 			assert(drr->drr_payloadlen == 0);
1540 			break;
1541 
1542 		case DRR_END:
1543 			free(buf);
1544 			return (0);
1545 
1546 		case DRR_OBJECT:
1547 			if (byteswap) {
1548 				drr->drr_u.drr_object.drr_bonuslen =
1549 				    BSWAP_32(drr->drr_u.drr_object.
1550 				    drr_bonuslen);
1551 			}
1552 			(void) recv_read(hdl, fd, buf,
1553 			    P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
1554 			    B_FALSE, NULL);
1555 			break;
1556 
1557 		case DRR_WRITE:
1558 			if (byteswap) {
1559 				drr->drr_u.drr_write.drr_length =
1560 				    BSWAP_64(drr->drr_u.drr_write.drr_length);
1561 			}
1562 			(void) recv_read(hdl, fd, buf,
1563 			    drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
1564 			break;
1565 
1566 		case DRR_FREEOBJECTS:
1567 		case DRR_FREE:
1568 			break;
1569 
1570 		default:
1571 			assert(!"invalid record type");
1572 		}
1573 	}
1574 
1575 	free(buf);
1576 	return (-1);
1577 }
1578 
1579 /*
1580  * Restores a backup of tosnap from the file descriptor specified by infd.
1581  */
1582 static int
1583 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
1584     recvflags_t flags, dmu_replay_record_t *drr,
1585     dmu_replay_record_t *drr_noswap, avl_tree_t *stream_avl,
1586     char **top_zfs)
1587 {
1588 	zfs_cmd_t zc = { 0 };
1589 	time_t begin_time;
1590 	int ioctl_err, ioctl_errno, err, choplen;
1591 	char *cp;
1592 	struct drr_begin *drrb = &drr->drr_u.drr_begin;
1593 	char errbuf[1024];
1594 	char chopprefix[ZFS_MAXNAMELEN];
1595 	boolean_t newfs = B_FALSE;
1596 	boolean_t stream_wantsnewfs;
1597 	uint64_t parent_snapguid = 0;
1598 	prop_changelist_t *clp = NULL;
1599 	nvlist_t *snapprops_nvlist = NULL;
1600 
1601 	begin_time = time(NULL);
1602 
1603 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1604 	    "cannot receive"));
1605 
1606 	if (stream_avl != NULL) {
1607 		char *snapname;
1608 		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
1609 		    &snapname);
1610 		nvlist_t *props;
1611 		int ret;
1612 
1613 		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
1614 		    &parent_snapguid);
1615 		err = nvlist_lookup_nvlist(fs, "props", &props);
1616 		if (err)
1617 			VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
1618 
1619 		if (flags.canmountoff) {
1620 			VERIFY(0 == nvlist_add_uint64(props,
1621 			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
1622 		}
1623 		ret = zcmd_write_src_nvlist(hdl, &zc, props);
1624 		if (err)
1625 			nvlist_free(props);
1626 
1627 		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
1628 			VERIFY(0 == nvlist_lookup_nvlist(props,
1629 			    snapname, &snapprops_nvlist));
1630 		}
1631 
1632 		if (ret != 0)
1633 			return (-1);
1634 	}
1635 
1636 	/*
1637 	 * Determine how much of the snapshot name stored in the stream
1638 	 * we are going to tack on to the name they specified on the
1639 	 * command line, and how much we are going to chop off.
1640 	 *
1641 	 * If they specified a snapshot, chop the entire name stored in
1642 	 * the stream.
1643 	 */
1644 	(void) strcpy(chopprefix, drrb->drr_toname);
1645 	if (flags.isprefix) {
1646 		/*
1647 		 * They specified a fs with -d, we want to tack on
1648 		 * everything but the pool name stored in the stream
1649 		 */
1650 		if (strchr(tosnap, '@')) {
1651 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
1652 			    "argument - snapshot not allowed with -d"));
1653 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
1654 		}
1655 		cp = strchr(chopprefix, '/');
1656 		if (cp == NULL)
1657 			cp = strchr(chopprefix, '@');
1658 		*cp = '\0';
1659 	} else if (strchr(tosnap, '@') == NULL) {
1660 		/*
1661 		 * If they specified a filesystem without -d, we want to
1662 		 * tack on everything after the fs specified in the
1663 		 * first name from the stream.
1664 		 */
1665 		cp = strchr(chopprefix, '@');
1666 		*cp = '\0';
1667 	}
1668 	choplen = strlen(chopprefix);
1669 
1670 	/*
1671 	 * Determine name of destination snapshot, store in zc_value.
1672 	 */
1673 	(void) strcpy(zc.zc_value, tosnap);
1674 	(void) strncat(zc.zc_value, drrb->drr_toname+choplen,
1675 	    sizeof (zc.zc_value));
1676 	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
1677 		zcmd_free_nvlists(&zc);
1678 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
1679 	}
1680 
1681 	/*
1682 	 * Determine the name of the origin snapshot, store in zc_string.
1683 	 */
1684 	if (drrb->drr_flags & DRR_FLAG_CLONE) {
1685 		if (guid_to_name(hdl, tosnap,
1686 		    drrb->drr_fromguid, zc.zc_string) != 0) {
1687 			zcmd_free_nvlists(&zc);
1688 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1689 			    "local origin for clone %s does not exist"),
1690 			    zc.zc_value);
1691 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1692 		}
1693 		if (flags.verbose)
1694 			(void) printf("found clone origin %s\n", zc.zc_string);
1695 	}
1696 
1697 	stream_wantsnewfs = (drrb->drr_fromguid == NULL ||
1698 	    (drrb->drr_flags & DRR_FLAG_CLONE));
1699 
1700 	if (stream_wantsnewfs) {
1701 		/*
1702 		 * if the parent fs does not exist, look for it based on
1703 		 * the parent snap GUID
1704 		 */
1705 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1706 		    "cannot receive new filesystem stream"));
1707 
1708 		(void) strcpy(zc.zc_name, zc.zc_value);
1709 		cp = strrchr(zc.zc_name, '/');
1710 		if (cp)
1711 			*cp = '\0';
1712 		if (cp &&
1713 		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
1714 			char suffix[ZFS_MAXNAMELEN];
1715 			(void) strcpy(suffix, strrchr(zc.zc_value, '/'));
1716 			if (guid_to_name(hdl, tosnap, parent_snapguid,
1717 			    zc.zc_value) == 0) {
1718 				*strchr(zc.zc_value, '@') = '\0';
1719 				(void) strcat(zc.zc_value, suffix);
1720 			}
1721 		}
1722 	} else {
1723 		/*
1724 		 * if the fs does not exist, look for it based on the
1725 		 * fromsnap GUID
1726 		 */
1727 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1728 		    "cannot receive incremental stream"));
1729 
1730 		(void) strcpy(zc.zc_name, zc.zc_value);
1731 		*strchr(zc.zc_name, '@') = '\0';
1732 
1733 		if (!zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
1734 			char snap[ZFS_MAXNAMELEN];
1735 			(void) strcpy(snap, strchr(zc.zc_value, '@'));
1736 			if (guid_to_name(hdl, tosnap, drrb->drr_fromguid,
1737 			    zc.zc_value) == 0) {
1738 				*strchr(zc.zc_value, '@') = '\0';
1739 				(void) strcat(zc.zc_value, snap);
1740 			}
1741 		}
1742 	}
1743 
1744 	(void) strcpy(zc.zc_name, zc.zc_value);
1745 	*strchr(zc.zc_name, '@') = '\0';
1746 
1747 	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
1748 		zfs_handle_t *zhp;
1749 		/*
1750 		 * Destination fs exists.  Therefore this should either
1751 		 * be an incremental, or the stream specifies a new fs
1752 		 * (full stream or clone) and they want us to blow it
1753 		 * away (and have therefore specified -F and removed any
1754 		 * snapshots).
1755 		 */
1756 
1757 		if (stream_wantsnewfs) {
1758 			if (!flags.force) {
1759 				zcmd_free_nvlists(&zc);
1760 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1761 				    "destination '%s' exists\n"
1762 				    "must specify -F to overwrite it"),
1763 				    zc.zc_name);
1764 				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
1765 			}
1766 			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
1767 			    &zc) == 0) {
1768 				zcmd_free_nvlists(&zc);
1769 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1770 				    "destination has snapshots (eg. %s)\n"
1771 				    "must destroy them to overwrite it"),
1772 				    zc.zc_name);
1773 				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
1774 			}
1775 		}
1776 
1777 		if ((zhp = zfs_open(hdl, zc.zc_name,
1778 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
1779 			zcmd_free_nvlists(&zc);
1780 			return (-1);
1781 		}
1782 
1783 		if (stream_wantsnewfs &&
1784 		    zhp->zfs_dmustats.dds_origin[0]) {
1785 			zcmd_free_nvlists(&zc);
1786 			zfs_close(zhp);
1787 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1788 			    "destination '%s' is a clone\n"
1789 			    "must destroy it to overwrite it"),
1790 			    zc.zc_name);
1791 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
1792 		}
1793 
1794 		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
1795 		    stream_wantsnewfs) {
1796 			/* We can't do online recv in this case */
1797 			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
1798 			if (clp == NULL) {
1799 				zfs_close(zhp);
1800 				zcmd_free_nvlists(&zc);
1801 				return (-1);
1802 			}
1803 			if (changelist_prefix(clp) != 0) {
1804 				changelist_free(clp);
1805 				zfs_close(zhp);
1806 				zcmd_free_nvlists(&zc);
1807 				return (-1);
1808 			}
1809 		}
1810 		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME &&
1811 		    zvol_remove_link(hdl, zhp->zfs_name) != 0) {
1812 			zfs_close(zhp);
1813 			zcmd_free_nvlists(&zc);
1814 			return (-1);
1815 		}
1816 		zfs_close(zhp);
1817 	} else {
1818 		/*
1819 		 * Destination filesystem does not exist.  Therefore we better
1820 		 * be creating a new filesystem (either from a full backup, or
1821 		 * a clone).  It would therefore be invalid if the user
1822 		 * specified only the pool name (i.e. if the destination name
1823 		 * contained no slash character).
1824 		 */
1825 		if (!stream_wantsnewfs ||
1826 		    (cp = strrchr(zc.zc_name, '/')) == NULL) {
1827 			zcmd_free_nvlists(&zc);
1828 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1829 			    "destination '%s' does not exist"), zc.zc_name);
1830 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1831 		}
1832 
1833 		/*
1834 		 * Trim off the final dataset component so we perform the
1835 		 * recvbackup ioctl to the filesystems's parent.
1836 		 */
1837 		*cp = '\0';
1838 
1839 		if (flags.isprefix && !flags.dryrun &&
1840 		    create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
1841 			zcmd_free_nvlists(&zc);
1842 			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
1843 		}
1844 
1845 		newfs = B_TRUE;
1846 	}
1847 
1848 	zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
1849 	zc.zc_cookie = infd;
1850 	zc.zc_guid = flags.force;
1851 	if (flags.verbose) {
1852 		(void) printf("%s %s stream of %s into %s\n",
1853 		    flags.dryrun ? "would receive" : "receiving",
1854 		    drrb->drr_fromguid ? "incremental" : "full",
1855 		    drrb->drr_toname, zc.zc_value);
1856 		(void) fflush(stdout);
1857 	}
1858 
1859 	if (flags.dryrun) {
1860 		zcmd_free_nvlists(&zc);
1861 		return (recv_skip(hdl, infd, flags.byteswap));
1862 	}
1863 
1864 	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
1865 	ioctl_errno = errno;
1866 	zcmd_free_nvlists(&zc);
1867 
1868 	if (err == 0 && snapprops_nvlist) {
1869 		zfs_cmd_t zc2 = { 0 };
1870 
1871 		(void) strcpy(zc2.zc_name, zc.zc_value);
1872 		if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
1873 			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
1874 			zcmd_free_nvlists(&zc2);
1875 		}
1876 	}
1877 
1878 	if (err && (ioctl_errno == ENOENT || ioctl_errno == ENODEV)) {
1879 		/*
1880 		 * It may be that this snapshot already exists,
1881 		 * in which case we want to consume & ignore it
1882 		 * rather than failing.
1883 		 */
1884 		avl_tree_t *local_avl;
1885 		nvlist_t *local_nv, *fs;
1886 		char *cp = strchr(zc.zc_value, '@');
1887 
1888 		/*
1889 		 * XXX Do this faster by just iterating over snaps in
1890 		 * this fs.  Also if zc_value does not exist, we will
1891 		 * get a strange "does not exist" error message.
1892 		 */
1893 		*cp = '\0';
1894 		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL,
1895 		    &local_nv, &local_avl) == 0) {
1896 			*cp = '@';
1897 			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
1898 			fsavl_destroy(local_avl);
1899 			nvlist_free(local_nv);
1900 
1901 			if (fs != NULL) {
1902 				if (flags.verbose) {
1903 					(void) printf("snap %s already exists; "
1904 					    "ignoring\n", zc.zc_value);
1905 				}
1906 				ioctl_err = recv_skip(hdl, infd,
1907 				    flags.byteswap);
1908 			}
1909 		}
1910 		*cp = '@';
1911 	}
1912 
1913 
1914 	if (ioctl_err != 0) {
1915 		switch (ioctl_errno) {
1916 		case ENODEV:
1917 			cp = strchr(zc.zc_value, '@');
1918 			*cp = '\0';
1919 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1920 			    "most recent snapshot of %s does not\n"
1921 			    "match incremental source"), zc.zc_value);
1922 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
1923 			*cp = '@';
1924 			break;
1925 		case ETXTBSY:
1926 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1927 			    "destination %s has been modified\n"
1928 			    "since most recent snapshot"), zc.zc_name);
1929 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
1930 			break;
1931 		case EEXIST:
1932 			cp = strchr(zc.zc_value, '@');
1933 			if (newfs) {
1934 				/* it's the containing fs that exists */
1935 				*cp = '\0';
1936 			}
1937 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1938 			    "destination already exists"));
1939 			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
1940 			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
1941 			    zc.zc_value);
1942 			*cp = '@';
1943 			break;
1944 		case EINVAL:
1945 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1946 			break;
1947 		case ECKSUM:
1948 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1949 			    "invalid stream (checksum mismatch)"));
1950 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1951 			break;
1952 		default:
1953 			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
1954 		}
1955 	}
1956 
1957 	/*
1958 	 * Mount or recreate the /dev links for the target filesystem
1959 	 * (if created, or if we tore them down to do an incremental
1960 	 * restore), and the /dev links for the new snapshot (if
1961 	 * created). Also mount any children of the target filesystem
1962 	 * if we did a replication receive (indicated by stream_avl
1963 	 * being non-NULL).
1964 	 */
1965 	cp = strchr(zc.zc_value, '@');
1966 	if (cp && (ioctl_err == 0 || !newfs)) {
1967 		zfs_handle_t *h;
1968 
1969 		*cp = '\0';
1970 		h = zfs_open(hdl, zc.zc_value,
1971 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
1972 		if (h != NULL) {
1973 			if (h->zfs_type == ZFS_TYPE_VOLUME) {
1974 				*cp = '@';
1975 				err = zvol_create_link(hdl, h->zfs_name);
1976 				if (err == 0 && ioctl_err == 0)
1977 					err = zvol_create_link(hdl,
1978 					    zc.zc_value);
1979 			} else if (newfs || stream_avl) {
1980 				/*
1981 				 * Track the first/top of hierarchy fs,
1982 				 * for mounting and sharing later.
1983 				 */
1984 				if (top_zfs && *top_zfs == NULL)
1985 					*top_zfs = zfs_strdup(hdl, zc.zc_value);
1986 			}
1987 			zfs_close(h);
1988 		}
1989 		*cp = '@';
1990 	}
1991 
1992 	if (clp) {
1993 		err |= changelist_postfix(clp);
1994 		changelist_free(clp);
1995 	}
1996 
1997 	if (err || ioctl_err)
1998 		return (-1);
1999 
2000 	if (flags.verbose) {
2001 		char buf1[64];
2002 		char buf2[64];
2003 		uint64_t bytes = zc.zc_cookie;
2004 		time_t delta = time(NULL) - begin_time;
2005 		if (delta == 0)
2006 			delta = 1;
2007 		zfs_nicenum(bytes, buf1, sizeof (buf1));
2008 		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
2009 
2010 		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
2011 		    buf1, delta, buf2);
2012 	}
2013 
2014 	return (0);
2015 }
2016 
2017 static int
2018 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
2019     int infd, avl_tree_t *stream_avl, char **top_zfs)
2020 {
2021 	int err;
2022 	dmu_replay_record_t drr, drr_noswap;
2023 	struct drr_begin *drrb = &drr.drr_u.drr_begin;
2024 	char errbuf[1024];
2025 	zio_cksum_t zcksum = { 0 };
2026 
2027 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2028 	    "cannot receive"));
2029 
2030 	if (flags.isprefix &&
2031 	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
2032 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
2033 		    "(%s) does not exist"), tosnap);
2034 		return (zfs_error(hdl, EZFS_NOENT, errbuf));
2035 	}
2036 
2037 	/* read in the BEGIN record */
2038 	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
2039 	    &zcksum)))
2040 		return (err);
2041 
2042 	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
2043 		/* It's the double end record at the end of a package */
2044 		return (ENODATA);
2045 	}
2046 
2047 	/* the kernel needs the non-byteswapped begin record */
2048 	drr_noswap = drr;
2049 
2050 	flags.byteswap = B_FALSE;
2051 	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
2052 		/*
2053 		 * We computed the checksum in the wrong byteorder in
2054 		 * recv_read() above; do it again correctly.
2055 		 */
2056 		bzero(&zcksum, sizeof (zio_cksum_t));
2057 		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
2058 		flags.byteswap = B_TRUE;
2059 
2060 		drr.drr_type = BSWAP_32(drr.drr_type);
2061 		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
2062 		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
2063 		drrb->drr_version = BSWAP_64(drrb->drr_version);
2064 		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
2065 		drrb->drr_type = BSWAP_32(drrb->drr_type);
2066 		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
2067 		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
2068 		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
2069 	}
2070 
2071 	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
2072 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2073 		    "stream (bad magic number)"));
2074 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2075 	}
2076 
2077 	if (strchr(drrb->drr_toname, '@') == NULL) {
2078 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2079 		    "stream (bad snapshot name)"));
2080 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2081 	}
2082 
2083 	if (drrb->drr_version == DMU_BACKUP_STREAM_VERSION) {
2084 		return (zfs_receive_one(hdl, infd, tosnap, flags,
2085 		    &drr, &drr_noswap, stream_avl, top_zfs));
2086 	} else if (drrb->drr_version == DMU_BACKUP_HEADER_VERSION) {
2087 		return (zfs_receive_package(hdl, infd, tosnap, flags,
2088 		    &drr, &zcksum, top_zfs));
2089 	} else {
2090 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2091 		    "stream is unsupported version %llu"),
2092 		    drrb->drr_version);
2093 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2094 	}
2095 }
2096 
2097 /*
2098  * Restores a backup of tosnap from the file descriptor specified by infd.
2099  * Return 0 on total success, -2 if some things couldn't be
2100  * destroyed/renamed/promoted, -1 if some things couldn't be received.
2101  * (-1 will override -2).
2102  */
2103 int
2104 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
2105     int infd, avl_tree_t *stream_avl)
2106 {
2107 	char *top_zfs = NULL;
2108 	int err;
2109 
2110 	err = zfs_receive_impl(hdl, tosnap, flags, infd, stream_avl, &top_zfs);
2111 
2112 	if (err == 0 && !flags.nomount && top_zfs) {
2113 		zfs_handle_t *zhp;
2114 		prop_changelist_t *clp;
2115 
2116 		zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
2117 		if (zhp != NULL) {
2118 			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
2119 			    CL_GATHER_MOUNT_ALWAYS, 0);
2120 			zfs_close(zhp);
2121 			if (clp != NULL) {
2122 				/* mount and share received datasets */
2123 				err = changelist_postfix(clp);
2124 				changelist_free(clp);
2125 			}
2126 		}
2127 		if (zhp == NULL || clp == NULL || err)
2128 			err = -1;
2129 	}
2130 	if (top_zfs)
2131 		free(top_zfs);
2132 
2133 	return (err);
2134 }
2135