1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2012, 2020 by Delphix. All rights reserved. 24 * Copyright (c) 2013 Steven Hartland. All rights reserved. 25 * Copyright (c) 2017 Datto Inc. 26 * Copyright 2017 RackTop Systems. 27 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. 28 * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved. 29 */ 30 31 /* 32 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs. 33 * It has the following characteristics: 34 * 35 * - Thread Safe. libzfs_core is accessible concurrently from multiple 36 * threads. This is accomplished primarily by avoiding global data 37 * (e.g. caching). Since it's thread-safe, there is no reason for a 38 * process to have multiple libzfs "instances". Therefore, we store 39 * our few pieces of data (e.g. the file descriptor) in global 40 * variables. The fd is reference-counted so that the libzfs_core 41 * library can be "initialized" multiple times (e.g. by different 42 * consumers within the same process). 43 * 44 * - Committed Interface. The libzfs_core interface will be committed, 45 * therefore consumers can compile against it and be confident that 46 * their code will continue to work on future releases of this code. 47 * Currently, the interface is Evolving (not Committed), but we intend 48 * to commit to it once it is more complete and we determine that it 49 * meets the needs of all consumers. 50 * 51 * - Programmatic Error Handling. libzfs_core communicates errors with 52 * defined error numbers, and doesn't print anything to stdout/stderr. 53 * 54 * - Thin Layer. libzfs_core is a thin layer, marshaling arguments 55 * to/from the kernel ioctls. There is generally a 1:1 correspondence 56 * between libzfs_core functions and ioctls to ZFS_DEV. 57 * 58 * - Clear Atomicity. Because libzfs_core functions are generally 1:1 59 * with kernel ioctls, and kernel ioctls are general atomic, each 60 * libzfs_core function is atomic. For example, creating multiple 61 * snapshots with a single call to lzc_snapshot() is atomic -- it 62 * can't fail with only some of the requested snapshots created, even 63 * in the event of power loss or system crash. 64 * 65 * - Continued libzfs Support. Some higher-level operations (e.g. 66 * support for "zfs send -R") are too complicated to fit the scope of 67 * libzfs_core. This functionality will continue to live in libzfs. 68 * Where appropriate, libzfs will use the underlying atomic operations 69 * of libzfs_core. For example, libzfs may implement "zfs send -R | 70 * zfs receive" by using individual "send one snapshot", rename, 71 * destroy, and "receive one snapshot" operations in libzfs_core. 72 * /sbin/zfs and /sbin/zpool will link with both libzfs and 73 * libzfs_core. Other consumers should aim to use only libzfs_core, 74 * since that will be the supported, stable interface going forwards. 75 */ 76 77 #include <libzfs_core.h> 78 #include <ctype.h> 79 #include <unistd.h> 80 #include <stdlib.h> 81 #include <string.h> 82 #ifdef ZFS_DEBUG 83 #include <stdio.h> 84 #endif 85 #include <errno.h> 86 #include <fcntl.h> 87 #include <pthread.h> 88 #include <libzutil.h> 89 #include <sys/nvpair.h> 90 #include <sys/param.h> 91 #include <sys/types.h> 92 #include <sys/stat.h> 93 #include <sys/zfs_ioctl.h> 94 #if __FreeBSD__ 95 #define BIG_PIPE_SIZE (64 * 1024) /* From sys/pipe.h */ 96 #endif 97 98 static int g_fd = -1; 99 static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; 100 static int g_refcount; 101 102 #ifdef ZFS_DEBUG 103 static zfs_ioc_t fail_ioc_cmd = ZFS_IOC_LAST; 104 static zfs_errno_t fail_ioc_err; 105 106 static void 107 libzfs_core_debug_ioc(void) 108 { 109 /* 110 * To test running newer user space binaries with kernel's 111 * that don't yet support an ioctl or a new ioctl arg we 112 * provide an override to intentionally fail an ioctl. 113 * 114 * USAGE: 115 * The override variable, ZFS_IOC_TEST, is of the form "cmd:err" 116 * 117 * For example, to fail a ZFS_IOC_POOL_CHECKPOINT with a 118 * ZFS_ERR_IOC_CMD_UNAVAIL, the string would be "0x5a4d:1029" 119 * 120 * $ sudo sh -c "ZFS_IOC_TEST=0x5a4d:1029 zpool checkpoint tank" 121 * cannot checkpoint 'tank': the loaded zfs module does not support 122 * this operation. A reboot may be required to enable this operation. 123 */ 124 if (fail_ioc_cmd == ZFS_IOC_LAST) { 125 char *ioc_test = getenv("ZFS_IOC_TEST"); 126 unsigned int ioc_num = 0, ioc_err = 0; 127 128 if (ioc_test != NULL && 129 sscanf(ioc_test, "%i:%i", &ioc_num, &ioc_err) == 2 && 130 ioc_num < ZFS_IOC_LAST) { 131 fail_ioc_cmd = ioc_num; 132 fail_ioc_err = ioc_err; 133 } 134 } 135 } 136 #endif 137 138 int 139 libzfs_core_init(void) 140 { 141 (void) pthread_mutex_lock(&g_lock); 142 if (g_refcount == 0) { 143 g_fd = open(ZFS_DEV, O_RDWR|O_CLOEXEC); 144 if (g_fd < 0) { 145 (void) pthread_mutex_unlock(&g_lock); 146 return (errno); 147 } 148 } 149 g_refcount++; 150 151 #ifdef ZFS_DEBUG 152 libzfs_core_debug_ioc(); 153 #endif 154 (void) pthread_mutex_unlock(&g_lock); 155 return (0); 156 } 157 158 void 159 libzfs_core_fini(void) 160 { 161 (void) pthread_mutex_lock(&g_lock); 162 ASSERT3S(g_refcount, >, 0); 163 164 g_refcount--; 165 166 if (g_refcount == 0 && g_fd != -1) { 167 (void) close(g_fd); 168 g_fd = -1; 169 } 170 (void) pthread_mutex_unlock(&g_lock); 171 } 172 173 static int 174 lzc_ioctl(zfs_ioc_t ioc, const char *name, 175 nvlist_t *source, nvlist_t **resultp) 176 { 177 zfs_cmd_t zc = {"\0"}; 178 int error = 0; 179 char *packed = NULL; 180 size_t size = 0; 181 182 ASSERT3S(g_refcount, >, 0); 183 VERIFY3S(g_fd, !=, -1); 184 185 #ifdef ZFS_DEBUG 186 if (ioc == fail_ioc_cmd) 187 return (fail_ioc_err); 188 #endif 189 190 if (name != NULL) 191 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); 192 193 if (source != NULL) { 194 packed = fnvlist_pack(source, &size); 195 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 196 zc.zc_nvlist_src_size = size; 197 } 198 199 if (resultp != NULL) { 200 *resultp = NULL; 201 if (ioc == ZFS_IOC_CHANNEL_PROGRAM) { 202 zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source, 203 ZCP_ARG_MEMLIMIT); 204 } else { 205 zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); 206 } 207 zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 208 malloc(zc.zc_nvlist_dst_size); 209 if (zc.zc_nvlist_dst == (uint64_t)0) { 210 error = ENOMEM; 211 goto out; 212 } 213 } 214 215 while (lzc_ioctl_fd(g_fd, ioc, &zc) != 0) { 216 /* 217 * If ioctl exited with ENOMEM, we retry the ioctl after 218 * increasing the size of the destination nvlist. 219 * 220 * Channel programs that exit with ENOMEM ran over the 221 * lua memory sandbox; they should not be retried. 222 */ 223 if (errno == ENOMEM && resultp != NULL && 224 ioc != ZFS_IOC_CHANNEL_PROGRAM) { 225 free((void *)(uintptr_t)zc.zc_nvlist_dst); 226 zc.zc_nvlist_dst_size *= 2; 227 zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 228 malloc(zc.zc_nvlist_dst_size); 229 if (zc.zc_nvlist_dst == (uint64_t)0) { 230 error = ENOMEM; 231 goto out; 232 } 233 } else { 234 error = errno; 235 break; 236 } 237 } 238 if (zc.zc_nvlist_dst_filled) { 239 *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, 240 zc.zc_nvlist_dst_size); 241 } 242 243 out: 244 if (packed != NULL) 245 fnvlist_pack_free(packed, size); 246 free((void *)(uintptr_t)zc.zc_nvlist_dst); 247 return (error); 248 } 249 250 int 251 lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props, 252 uint8_t *wkeydata, uint_t wkeylen) 253 { 254 int error; 255 nvlist_t *hidden_args = NULL; 256 nvlist_t *args = fnvlist_alloc(); 257 258 fnvlist_add_int32(args, "type", (dmu_objset_type_t)type); 259 if (props != NULL) 260 fnvlist_add_nvlist(args, "props", props); 261 262 if (wkeydata != NULL) { 263 hidden_args = fnvlist_alloc(); 264 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, 265 wkeylen); 266 fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args); 267 } 268 269 error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); 270 nvlist_free(hidden_args); 271 nvlist_free(args); 272 return (error); 273 } 274 275 int 276 lzc_clone(const char *fsname, const char *origin, nvlist_t *props) 277 { 278 int error; 279 nvlist_t *hidden_args = NULL; 280 nvlist_t *args = fnvlist_alloc(); 281 282 fnvlist_add_string(args, "origin", origin); 283 if (props != NULL) 284 fnvlist_add_nvlist(args, "props", props); 285 error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); 286 nvlist_free(hidden_args); 287 nvlist_free(args); 288 return (error); 289 } 290 291 int 292 lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen) 293 { 294 /* 295 * The promote ioctl is still legacy, so we need to construct our 296 * own zfs_cmd_t rather than using lzc_ioctl(). 297 */ 298 zfs_cmd_t zc = {"\0"}; 299 300 ASSERT3S(g_refcount, >, 0); 301 VERIFY3S(g_fd, !=, -1); 302 303 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name)); 304 if (lzc_ioctl_fd(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) { 305 int error = errno; 306 if (error == EEXIST && snapnamebuf != NULL) 307 (void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen); 308 return (error); 309 } 310 return (0); 311 } 312 313 int 314 lzc_rename(const char *source, const char *target) 315 { 316 zfs_cmd_t zc = {"\0"}; 317 int error; 318 319 ASSERT3S(g_refcount, >, 0); 320 VERIFY3S(g_fd, !=, -1); 321 (void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name)); 322 (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value)); 323 error = lzc_ioctl_fd(g_fd, ZFS_IOC_RENAME, &zc); 324 if (error != 0) 325 error = errno; 326 return (error); 327 } 328 329 int 330 lzc_destroy(const char *fsname) 331 { 332 int error; 333 nvlist_t *args = fnvlist_alloc(); 334 error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL); 335 nvlist_free(args); 336 return (error); 337 } 338 339 /* 340 * Creates snapshots. 341 * 342 * The keys in the snaps nvlist are the snapshots to be created. 343 * They must all be in the same pool. 344 * 345 * The props nvlist is properties to set. Currently only user properties 346 * are supported. { user:prop_name -> string value } 347 * 348 * The returned results nvlist will have an entry for each snapshot that failed. 349 * The value will be the (int32) error code. 350 * 351 * The return value will be 0 if all snapshots were created, otherwise it will 352 * be the errno of a (unspecified) snapshot that failed. 353 */ 354 int 355 lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) 356 { 357 nvpair_t *elem; 358 nvlist_t *args; 359 int error; 360 char pool[ZFS_MAX_DATASET_NAME_LEN]; 361 362 *errlist = NULL; 363 364 /* determine the pool name */ 365 elem = nvlist_next_nvpair(snaps, NULL); 366 if (elem == NULL) 367 return (0); 368 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 369 pool[strcspn(pool, "/@")] = '\0'; 370 371 args = fnvlist_alloc(); 372 fnvlist_add_nvlist(args, "snaps", snaps); 373 if (props != NULL) 374 fnvlist_add_nvlist(args, "props", props); 375 376 error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist); 377 nvlist_free(args); 378 379 return (error); 380 } 381 382 /* 383 * Destroys snapshots. 384 * 385 * The keys in the snaps nvlist are the snapshots to be destroyed. 386 * They must all be in the same pool. 387 * 388 * Snapshots that do not exist will be silently ignored. 389 * 390 * If 'defer' is not set, and a snapshot has user holds or clones, the 391 * destroy operation will fail and none of the snapshots will be 392 * destroyed. 393 * 394 * If 'defer' is set, and a snapshot has user holds or clones, it will be 395 * marked for deferred destruction, and will be destroyed when the last hold 396 * or clone is removed/destroyed. 397 * 398 * The return value will be 0 if all snapshots were destroyed (or marked for 399 * later destruction if 'defer' is set) or didn't exist to begin with. 400 * 401 * Otherwise the return value will be the errno of a (unspecified) snapshot 402 * that failed, no snapshots will be destroyed, and the errlist will have an 403 * entry for each snapshot that failed. The value in the errlist will be 404 * the (int32) error code. 405 */ 406 int 407 lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) 408 { 409 nvpair_t *elem; 410 nvlist_t *args; 411 int error; 412 char pool[ZFS_MAX_DATASET_NAME_LEN]; 413 414 /* determine the pool name */ 415 elem = nvlist_next_nvpair(snaps, NULL); 416 if (elem == NULL) 417 return (0); 418 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 419 pool[strcspn(pool, "/@")] = '\0'; 420 421 args = fnvlist_alloc(); 422 fnvlist_add_nvlist(args, "snaps", snaps); 423 if (defer) 424 fnvlist_add_boolean(args, "defer"); 425 426 error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist); 427 nvlist_free(args); 428 429 return (error); 430 } 431 432 int 433 lzc_snaprange_space(const char *firstsnap, const char *lastsnap, 434 uint64_t *usedp) 435 { 436 nvlist_t *args; 437 nvlist_t *result; 438 int err; 439 char fs[ZFS_MAX_DATASET_NAME_LEN]; 440 char *atp; 441 442 /* determine the fs name */ 443 (void) strlcpy(fs, firstsnap, sizeof (fs)); 444 atp = strchr(fs, '@'); 445 if (atp == NULL) 446 return (EINVAL); 447 *atp = '\0'; 448 449 args = fnvlist_alloc(); 450 fnvlist_add_string(args, "firstsnap", firstsnap); 451 452 err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result); 453 nvlist_free(args); 454 if (err == 0) 455 *usedp = fnvlist_lookup_uint64(result, "used"); 456 fnvlist_free(result); 457 458 return (err); 459 } 460 461 boolean_t 462 lzc_exists(const char *dataset) 463 { 464 /* 465 * The objset_stats ioctl is still legacy, so we need to construct our 466 * own zfs_cmd_t rather than using lzc_ioctl(). 467 */ 468 zfs_cmd_t zc = {"\0"}; 469 470 ASSERT3S(g_refcount, >, 0); 471 VERIFY3S(g_fd, !=, -1); 472 473 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); 474 return (lzc_ioctl_fd(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); 475 } 476 477 /* 478 * outnvl is unused. 479 * It was added to preserve the function signature in case it is 480 * needed in the future. 481 */ 482 int 483 lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl) 484 { 485 (void) outnvl; 486 return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL)); 487 } 488 489 /* 490 * Create "user holds" on snapshots. If there is a hold on a snapshot, 491 * the snapshot can not be destroyed. (However, it can be marked for deletion 492 * by lzc_destroy_snaps(defer=B_TRUE).) 493 * 494 * The keys in the nvlist are snapshot names. 495 * The snapshots must all be in the same pool. 496 * The value is the name of the hold (string type). 497 * 498 * If cleanup_fd is not -1, it must be the result of open(ZFS_DEV, O_EXCL). 499 * In this case, when the cleanup_fd is closed (including on process 500 * termination), the holds will be released. If the system is shut down 501 * uncleanly, the holds will be released when the pool is next opened 502 * or imported. 503 * 504 * Holds for snapshots which don't exist will be skipped and have an entry 505 * added to errlist, but will not cause an overall failure. 506 * 507 * The return value will be 0 if all holds, for snapshots that existed, 508 * were successfully created. 509 * 510 * Otherwise the return value will be the errno of a (unspecified) hold that 511 * failed and no holds will be created. 512 * 513 * In all cases the errlist will have an entry for each hold that failed 514 * (name = snapshot), with its value being the error code (int32). 515 */ 516 int 517 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) 518 { 519 char pool[ZFS_MAX_DATASET_NAME_LEN]; 520 nvlist_t *args; 521 nvpair_t *elem; 522 int error; 523 524 /* determine the pool name */ 525 elem = nvlist_next_nvpair(holds, NULL); 526 if (elem == NULL) 527 return (0); 528 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 529 pool[strcspn(pool, "/@")] = '\0'; 530 531 args = fnvlist_alloc(); 532 fnvlist_add_nvlist(args, "holds", holds); 533 if (cleanup_fd != -1) 534 fnvlist_add_int32(args, "cleanup_fd", cleanup_fd); 535 536 error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist); 537 nvlist_free(args); 538 return (error); 539 } 540 541 /* 542 * Release "user holds" on snapshots. If the snapshot has been marked for 543 * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have 544 * any clones, and all the user holds are removed, then the snapshot will be 545 * destroyed. 546 * 547 * The keys in the nvlist are snapshot names. 548 * The snapshots must all be in the same pool. 549 * The value is an nvlist whose keys are the holds to remove. 550 * 551 * Holds which failed to release because they didn't exist will have an entry 552 * added to errlist, but will not cause an overall failure. 553 * 554 * The return value will be 0 if the nvl holds was empty or all holds that 555 * existed, were successfully removed. 556 * 557 * Otherwise the return value will be the errno of a (unspecified) hold that 558 * failed to release and no holds will be released. 559 * 560 * In all cases the errlist will have an entry for each hold that failed to 561 * to release. 562 */ 563 int 564 lzc_release(nvlist_t *holds, nvlist_t **errlist) 565 { 566 char pool[ZFS_MAX_DATASET_NAME_LEN]; 567 nvpair_t *elem; 568 569 /* determine the pool name */ 570 elem = nvlist_next_nvpair(holds, NULL); 571 if (elem == NULL) 572 return (0); 573 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 574 pool[strcspn(pool, "/@")] = '\0'; 575 576 return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist)); 577 } 578 579 /* 580 * Retrieve list of user holds on the specified snapshot. 581 * 582 * On success, *holdsp will be set to an nvlist which the caller must free. 583 * The keys are the names of the holds, and the value is the creation time 584 * of the hold (uint64) in seconds since the epoch. 585 */ 586 int 587 lzc_get_holds(const char *snapname, nvlist_t **holdsp) 588 { 589 return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp)); 590 } 591 592 static unsigned int 593 max_pipe_buffer(int infd) 594 { 595 #if __linux__ 596 static unsigned int max; 597 if (max == 0) { 598 max = 1048576; /* fs/pipe.c default */ 599 600 FILE *procf = fopen("/proc/sys/fs/pipe-max-size", "re"); 601 if (procf != NULL) { 602 if (fscanf(procf, "%u", &max) <= 0) { 603 /* ignore error: max untouched if parse fails */ 604 } 605 fclose(procf); 606 } 607 } 608 609 unsigned int cur = fcntl(infd, F_GETPIPE_SZ); 610 if (cur < max && fcntl(infd, F_SETPIPE_SZ, max) != -1) 611 cur = max; 612 return (cur); 613 #else 614 /* FreeBSD automatically resizes */ 615 (void) infd; 616 return (BIG_PIPE_SIZE); 617 #endif 618 } 619 620 #if __linux__ 621 struct send_worker_ctx { 622 int from; /* read end of pipe, with send data; closed on exit */ 623 int to; /* original arbitrary output fd; mustn't be a pipe */ 624 }; 625 626 static void * 627 send_worker(void *arg) 628 { 629 struct send_worker_ctx *ctx = arg; 630 unsigned int bufsiz = max_pipe_buffer(ctx->from); 631 ssize_t rd; 632 633 while ((rd = splice(ctx->from, NULL, ctx->to, NULL, bufsiz, 634 SPLICE_F_MOVE | SPLICE_F_MORE)) > 0) 635 ; 636 637 int err = (rd == -1) ? errno : 0; 638 close(ctx->from); 639 return ((void *)(uintptr_t)err); 640 } 641 #endif 642 643 /* 644 * Since Linux 5.10, 4d03e3cc59828c82ee89ea6e27a2f3cdf95aaadf 645 * ("fs: don't allow kernel reads and writes without iter ops"), 646 * ZFS_IOC_SEND* will EINVAL when writing to /dev/null, /dev/zero, &c. 647 * 648 * This wrapper transparently executes func() with a pipe 649 * by spawning a thread to copy from that pipe to the original output 650 * in the background. 651 * 652 * Returns the error from func(), if nonzero, 653 * otherwise the error from the thread. 654 * 655 * No-op if orig_fd is -1, already a pipe (but the buffer size is bumped), 656 * and on not-Linux; as such, it is safe to wrap/call wrapped functions 657 * in a wrapped context. 658 */ 659 int 660 lzc_send_wrapper(int (*func)(int, void *), int orig_fd, void *data) 661 { 662 #if __linux__ 663 struct stat sb; 664 if (orig_fd != -1 && fstat(orig_fd, &sb) == -1) 665 return (errno); 666 if (orig_fd == -1 || S_ISFIFO(sb.st_mode)) { 667 if (orig_fd != -1) 668 (void) max_pipe_buffer(orig_fd); 669 return (func(orig_fd, data)); 670 } 671 if ((fcntl(orig_fd, F_GETFL) & O_ACCMODE) == O_RDONLY) 672 return (errno = EBADF); 673 674 int rw[2]; 675 if (pipe2(rw, O_CLOEXEC) == -1) 676 return (errno); 677 678 int err; 679 pthread_t send_thread; 680 struct send_worker_ctx ctx = {.from = rw[0], .to = orig_fd}; 681 if ((err = pthread_create(&send_thread, NULL, send_worker, &ctx)) 682 != 0) { 683 close(rw[0]); 684 close(rw[1]); 685 return (errno = err); 686 } 687 688 err = func(rw[1], data); 689 690 void *send_err; 691 close(rw[1]); 692 pthread_join(send_thread, &send_err); 693 if (err == 0 && send_err != 0) 694 errno = err = (uintptr_t)send_err; 695 696 return (err); 697 #else 698 return (func(orig_fd, data)); 699 #endif 700 } 701 702 /* 703 * Generate a zfs send stream for the specified snapshot and write it to 704 * the specified file descriptor. 705 * 706 * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap") 707 * 708 * If "from" is NULL, a full (non-incremental) stream will be sent. 709 * If "from" is non-NULL, it must be the full name of a snapshot or 710 * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or 711 * "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or 712 * bookmark must represent an earlier point in the history of "snapname"). 713 * It can be an earlier snapshot in the same filesystem or zvol as "snapname", 714 * or it can be the origin of "snapname"'s filesystem, or an earlier 715 * snapshot in the origin, etc. 716 * 717 * "fd" is the file descriptor to write the send stream to. 718 * 719 * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted 720 * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT 721 * records with drr_blksz > 128K. 722 * 723 * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted 724 * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA, 725 * which the receiving system must support (as indicated by support 726 * for the "embedded_data" feature). 727 * 728 * If "flags" contains LZC_SEND_FLAG_COMPRESS, the stream is generated by using 729 * compressed WRITE records for blocks which are compressed on disk and in 730 * memory. If the lz4_compress feature is active on the sending system, then 731 * the receiving system must have that feature enabled as well. 732 * 733 * If "flags" contains LZC_SEND_FLAG_RAW, the stream is generated, for encrypted 734 * datasets, by sending data exactly as it exists on disk. This allows backups 735 * to be taken even if encryption keys are not currently loaded. 736 */ 737 int 738 lzc_send(const char *snapname, const char *from, int fd, 739 enum lzc_send_flags flags) 740 { 741 return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0, 742 NULL)); 743 } 744 745 int 746 lzc_send_redacted(const char *snapname, const char *from, int fd, 747 enum lzc_send_flags flags, const char *redactbook) 748 { 749 return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0, 750 redactbook)); 751 } 752 753 int 754 lzc_send_resume(const char *snapname, const char *from, int fd, 755 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff) 756 { 757 return (lzc_send_resume_redacted(snapname, from, fd, flags, resumeobj, 758 resumeoff, NULL)); 759 } 760 761 /* 762 * snapname: The name of the "tosnap", or the snapshot whose contents we are 763 * sending. 764 * from: The name of the "fromsnap", or the incremental source. 765 * fd: File descriptor to write the stream to. 766 * flags: flags that determine features to be used by the stream. 767 * resumeobj: Object to resume from, for resuming send 768 * resumeoff: Offset to resume from, for resuming send. 769 * redactnv: nvlist of string -> boolean(ignored) containing the names of all 770 * the snapshots that we should redact with respect to. 771 * redactbook: Name of the redaction bookmark to create. 772 * 773 * Pre-wrapped. 774 */ 775 static int 776 lzc_send_resume_redacted_cb_impl(const char *snapname, const char *from, int fd, 777 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff, 778 const char *redactbook) 779 { 780 nvlist_t *args; 781 int err; 782 783 args = fnvlist_alloc(); 784 fnvlist_add_int32(args, "fd", fd); 785 if (from != NULL) 786 fnvlist_add_string(args, "fromsnap", from); 787 if (flags & LZC_SEND_FLAG_LARGE_BLOCK) 788 fnvlist_add_boolean(args, "largeblockok"); 789 if (flags & LZC_SEND_FLAG_EMBED_DATA) 790 fnvlist_add_boolean(args, "embedok"); 791 if (flags & LZC_SEND_FLAG_COMPRESS) 792 fnvlist_add_boolean(args, "compressok"); 793 if (flags & LZC_SEND_FLAG_RAW) 794 fnvlist_add_boolean(args, "rawok"); 795 if (flags & LZC_SEND_FLAG_SAVED) 796 fnvlist_add_boolean(args, "savedok"); 797 if (resumeobj != 0 || resumeoff != 0) { 798 fnvlist_add_uint64(args, "resume_object", resumeobj); 799 fnvlist_add_uint64(args, "resume_offset", resumeoff); 800 } 801 if (redactbook != NULL) 802 fnvlist_add_string(args, "redactbook", redactbook); 803 804 err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); 805 nvlist_free(args); 806 return (err); 807 } 808 809 struct lzc_send_resume_redacted { 810 const char *snapname; 811 const char *from; 812 enum lzc_send_flags flags; 813 uint64_t resumeobj; 814 uint64_t resumeoff; 815 const char *redactbook; 816 }; 817 818 static int 819 lzc_send_resume_redacted_cb(int fd, void *arg) 820 { 821 struct lzc_send_resume_redacted *zsrr = arg; 822 return (lzc_send_resume_redacted_cb_impl(zsrr->snapname, zsrr->from, 823 fd, zsrr->flags, zsrr->resumeobj, zsrr->resumeoff, 824 zsrr->redactbook)); 825 } 826 827 int 828 lzc_send_resume_redacted(const char *snapname, const char *from, int fd, 829 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff, 830 const char *redactbook) 831 { 832 struct lzc_send_resume_redacted zsrr = { 833 .snapname = snapname, 834 .from = from, 835 .flags = flags, 836 .resumeobj = resumeobj, 837 .resumeoff = resumeoff, 838 .redactbook = redactbook, 839 }; 840 return (lzc_send_wrapper(lzc_send_resume_redacted_cb, fd, &zsrr)); 841 } 842 843 /* 844 * "from" can be NULL, a snapshot, or a bookmark. 845 * 846 * If from is NULL, a full (non-incremental) stream will be estimated. This 847 * is calculated very efficiently. 848 * 849 * If from is a snapshot, lzc_send_space uses the deadlists attached to 850 * each snapshot to efficiently estimate the stream size. 851 * 852 * If from is a bookmark, the indirect blocks in the destination snapshot 853 * are traversed, looking for blocks with a birth time since the creation TXG of 854 * the snapshot this bookmark was created from. This will result in 855 * significantly more I/O and be less efficient than a send space estimation on 856 * an equivalent snapshot. This process is also used if redact_snaps is 857 * non-null. 858 * 859 * Pre-wrapped. 860 */ 861 static int 862 lzc_send_space_resume_redacted_cb_impl(const char *snapname, const char *from, 863 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff, 864 uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep) 865 { 866 nvlist_t *args; 867 nvlist_t *result; 868 int err; 869 870 args = fnvlist_alloc(); 871 if (from != NULL) 872 fnvlist_add_string(args, "from", from); 873 if (flags & LZC_SEND_FLAG_LARGE_BLOCK) 874 fnvlist_add_boolean(args, "largeblockok"); 875 if (flags & LZC_SEND_FLAG_EMBED_DATA) 876 fnvlist_add_boolean(args, "embedok"); 877 if (flags & LZC_SEND_FLAG_COMPRESS) 878 fnvlist_add_boolean(args, "compressok"); 879 if (flags & LZC_SEND_FLAG_RAW) 880 fnvlist_add_boolean(args, "rawok"); 881 if (resumeobj != 0 || resumeoff != 0) { 882 fnvlist_add_uint64(args, "resume_object", resumeobj); 883 fnvlist_add_uint64(args, "resume_offset", resumeoff); 884 fnvlist_add_uint64(args, "bytes", resume_bytes); 885 } 886 if (redactbook != NULL) 887 fnvlist_add_string(args, "redactbook", redactbook); 888 if (fd != -1) 889 fnvlist_add_int32(args, "fd", fd); 890 891 err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); 892 nvlist_free(args); 893 if (err == 0) 894 *spacep = fnvlist_lookup_uint64(result, "space"); 895 nvlist_free(result); 896 return (err); 897 } 898 899 struct lzc_send_space_resume_redacted { 900 const char *snapname; 901 const char *from; 902 enum lzc_send_flags flags; 903 uint64_t resumeobj; 904 uint64_t resumeoff; 905 uint64_t resume_bytes; 906 const char *redactbook; 907 uint64_t *spacep; 908 }; 909 910 static int 911 lzc_send_space_resume_redacted_cb(int fd, void *arg) 912 { 913 struct lzc_send_space_resume_redacted *zssrr = arg; 914 return (lzc_send_space_resume_redacted_cb_impl(zssrr->snapname, 915 zssrr->from, zssrr->flags, zssrr->resumeobj, zssrr->resumeoff, 916 zssrr->resume_bytes, zssrr->redactbook, fd, zssrr->spacep)); 917 } 918 919 int 920 lzc_send_space_resume_redacted(const char *snapname, const char *from, 921 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff, 922 uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep) 923 { 924 struct lzc_send_space_resume_redacted zssrr = { 925 .snapname = snapname, 926 .from = from, 927 .flags = flags, 928 .resumeobj = resumeobj, 929 .resumeoff = resumeoff, 930 .resume_bytes = resume_bytes, 931 .redactbook = redactbook, 932 .spacep = spacep, 933 }; 934 return (lzc_send_wrapper(lzc_send_space_resume_redacted_cb, 935 fd, &zssrr)); 936 } 937 938 int 939 lzc_send_space(const char *snapname, const char *from, 940 enum lzc_send_flags flags, uint64_t *spacep) 941 { 942 return (lzc_send_space_resume_redacted(snapname, from, flags, 0, 0, 0, 943 NULL, -1, spacep)); 944 } 945 946 static int 947 recv_read(int fd, void *buf, int ilen) 948 { 949 char *cp = buf; 950 int rv; 951 int len = ilen; 952 953 do { 954 rv = read(fd, cp, len); 955 cp += rv; 956 len -= rv; 957 } while (rv > 0); 958 959 if (rv < 0 || len != 0) 960 return (EIO); 961 962 return (0); 963 } 964 965 /* 966 * Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the 967 * legacy ZFS_IOC_RECV user/kernel interface. The new interface supports all 968 * stream options but is currently only used for resumable streams. This way 969 * updated user space utilities will interoperate with older kernel modules. 970 * 971 * Non-Linux OpenZFS platforms have opted to modify the legacy interface. 972 */ 973 static int 974 recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops, 975 uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force, 976 boolean_t resumable, boolean_t raw, int input_fd, 977 const dmu_replay_record_t *begin_record, uint64_t *read_bytes, 978 uint64_t *errflags, nvlist_t **errors) 979 { 980 dmu_replay_record_t drr; 981 char fsname[MAXPATHLEN]; 982 char *atp; 983 int error; 984 boolean_t payload = B_FALSE; 985 986 ASSERT3S(g_refcount, >, 0); 987 VERIFY3S(g_fd, !=, -1); 988 989 /* Set 'fsname' to the name of containing filesystem */ 990 (void) strlcpy(fsname, snapname, sizeof (fsname)); 991 atp = strchr(fsname, '@'); 992 if (atp == NULL) 993 return (EINVAL); 994 *atp = '\0'; 995 996 /* If the fs does not exist, try its parent. */ 997 if (!lzc_exists(fsname)) { 998 char *slashp = strrchr(fsname, '/'); 999 if (slashp == NULL) 1000 return (ENOENT); 1001 *slashp = '\0'; 1002 } 1003 1004 /* 1005 * It is not uncommon for gigabytes to be processed by zfs receive. 1006 * Speculatively increase the buffer size if supported by the platform. 1007 */ 1008 struct stat sb; 1009 if (fstat(input_fd, &sb) == -1) 1010 return (errno); 1011 if (S_ISFIFO(sb.st_mode)) 1012 (void) max_pipe_buffer(input_fd); 1013 1014 /* 1015 * The begin_record is normally a non-byteswapped BEGIN record. 1016 * For resumable streams it may be set to any non-byteswapped 1017 * dmu_replay_record_t. 1018 */ 1019 if (begin_record == NULL) { 1020 error = recv_read(input_fd, &drr, sizeof (drr)); 1021 if (error != 0) 1022 return (error); 1023 } else { 1024 drr = *begin_record; 1025 payload = (begin_record->drr_payloadlen != 0); 1026 } 1027 1028 /* 1029 * All receives with a payload should use the new interface. 1030 */ 1031 if (resumable || raw || wkeydata != NULL || payload) { 1032 nvlist_t *outnvl = NULL; 1033 nvlist_t *innvl = fnvlist_alloc(); 1034 1035 fnvlist_add_string(innvl, "snapname", snapname); 1036 1037 if (recvdprops != NULL) 1038 fnvlist_add_nvlist(innvl, "props", recvdprops); 1039 1040 if (localprops != NULL) 1041 fnvlist_add_nvlist(innvl, "localprops", localprops); 1042 1043 if (wkeydata != NULL) { 1044 /* 1045 * wkeydata must be placed in the special 1046 * ZPOOL_HIDDEN_ARGS nvlist so that it 1047 * will not be printed to the zpool history. 1048 */ 1049 nvlist_t *hidden_args = fnvlist_alloc(); 1050 fnvlist_add_uint8_array(hidden_args, "wkeydata", 1051 wkeydata, wkeylen); 1052 fnvlist_add_nvlist(innvl, ZPOOL_HIDDEN_ARGS, 1053 hidden_args); 1054 nvlist_free(hidden_args); 1055 } 1056 1057 if (origin != NULL && strlen(origin)) 1058 fnvlist_add_string(innvl, "origin", origin); 1059 1060 fnvlist_add_byte_array(innvl, "begin_record", 1061 (uchar_t *)&drr, sizeof (drr)); 1062 1063 fnvlist_add_int32(innvl, "input_fd", input_fd); 1064 1065 if (force) 1066 fnvlist_add_boolean(innvl, "force"); 1067 1068 if (resumable) 1069 fnvlist_add_boolean(innvl, "resumable"); 1070 1071 1072 error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl); 1073 1074 if (error == 0 && read_bytes != NULL) 1075 error = nvlist_lookup_uint64(outnvl, "read_bytes", 1076 read_bytes); 1077 1078 if (error == 0 && errflags != NULL) 1079 error = nvlist_lookup_uint64(outnvl, "error_flags", 1080 errflags); 1081 1082 if (error == 0 && errors != NULL) { 1083 nvlist_t *nvl; 1084 error = nvlist_lookup_nvlist(outnvl, "errors", &nvl); 1085 if (error == 0) 1086 *errors = fnvlist_dup(nvl); 1087 } 1088 1089 fnvlist_free(innvl); 1090 fnvlist_free(outnvl); 1091 } else { 1092 zfs_cmd_t zc = {"\0"}; 1093 char *packed = NULL; 1094 size_t size; 1095 1096 ASSERT3S(g_refcount, >, 0); 1097 1098 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name)); 1099 (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); 1100 1101 if (recvdprops != NULL) { 1102 packed = fnvlist_pack(recvdprops, &size); 1103 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 1104 zc.zc_nvlist_src_size = size; 1105 } 1106 1107 if (localprops != NULL) { 1108 packed = fnvlist_pack(localprops, &size); 1109 zc.zc_nvlist_conf = (uint64_t)(uintptr_t)packed; 1110 zc.zc_nvlist_conf_size = size; 1111 } 1112 1113 if (origin != NULL) 1114 (void) strlcpy(zc.zc_string, origin, 1115 sizeof (zc.zc_string)); 1116 1117 ASSERT3S(drr.drr_type, ==, DRR_BEGIN); 1118 zc.zc_begin_record = drr.drr_u.drr_begin; 1119 zc.zc_guid = force; 1120 zc.zc_cookie = input_fd; 1121 zc.zc_cleanup_fd = -1; 1122 zc.zc_action_handle = 0; 1123 1124 zc.zc_nvlist_dst_size = 128 * 1024; 1125 zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 1126 malloc(zc.zc_nvlist_dst_size); 1127 1128 error = lzc_ioctl_fd(g_fd, ZFS_IOC_RECV, &zc); 1129 if (error != 0) { 1130 error = errno; 1131 } else { 1132 if (read_bytes != NULL) 1133 *read_bytes = zc.zc_cookie; 1134 1135 if (errflags != NULL) 1136 *errflags = zc.zc_obj; 1137 1138 if (errors != NULL) 1139 VERIFY0(nvlist_unpack( 1140 (void *)(uintptr_t)zc.zc_nvlist_dst, 1141 zc.zc_nvlist_dst_size, errors, KM_SLEEP)); 1142 } 1143 1144 if (packed != NULL) 1145 fnvlist_pack_free(packed, size); 1146 free((void *)(uintptr_t)zc.zc_nvlist_dst); 1147 } 1148 1149 return (error); 1150 } 1151 1152 /* 1153 * The simplest receive case: receive from the specified fd, creating the 1154 * specified snapshot. Apply the specified properties as "received" properties 1155 * (which can be overridden by locally-set properties). If the stream is a 1156 * clone, its origin snapshot must be specified by 'origin'. The 'force' 1157 * flag will cause the target filesystem to be rolled back or destroyed if 1158 * necessary to receive. 1159 * 1160 * Return 0 on success or an errno on failure. 1161 * 1162 * Note: this interface does not work on dedup'd streams 1163 * (those with DMU_BACKUP_FEATURE_DEDUP). 1164 */ 1165 int 1166 lzc_receive(const char *snapname, nvlist_t *props, const char *origin, 1167 boolean_t force, boolean_t raw, int fd) 1168 { 1169 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, 1170 B_FALSE, raw, fd, NULL, NULL, NULL, NULL)); 1171 } 1172 1173 /* 1174 * Like lzc_receive, but if the receive fails due to premature stream 1175 * termination, the intermediate state will be preserved on disk. In this 1176 * case, ECKSUM will be returned. The receive may subsequently be resumed 1177 * with a resuming send stream generated by lzc_send_resume(). 1178 */ 1179 int 1180 lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, 1181 boolean_t force, boolean_t raw, int fd) 1182 { 1183 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, 1184 B_TRUE, raw, fd, NULL, NULL, NULL, NULL)); 1185 } 1186 1187 /* 1188 * Like lzc_receive, but allows the caller to read the begin record and then to 1189 * pass it in. That could be useful if the caller wants to derive, for example, 1190 * the snapname or the origin parameters based on the information contained in 1191 * the begin record. 1192 * The begin record must be in its original form as read from the stream, 1193 * in other words, it should not be byteswapped. 1194 * 1195 * The 'resumable' parameter allows to obtain the same behavior as with 1196 * lzc_receive_resumable. 1197 */ 1198 int 1199 lzc_receive_with_header(const char *snapname, nvlist_t *props, 1200 const char *origin, boolean_t force, boolean_t resumable, boolean_t raw, 1201 int fd, const dmu_replay_record_t *begin_record) 1202 { 1203 if (begin_record == NULL) 1204 return (EINVAL); 1205 1206 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, 1207 resumable, raw, fd, begin_record, NULL, NULL, NULL)); 1208 } 1209 1210 /* 1211 * Like lzc_receive, but allows the caller to pass all supported arguments 1212 * and retrieve all values returned. The only additional input parameter 1213 * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor. 1214 * 1215 * The following parameters all provide return values. Several may be set 1216 * in the failure case and will contain additional information. 1217 * 1218 * The 'read_bytes' value will be set to the total number of bytes read. 1219 * 1220 * The 'errflags' value will contain zprop_errflags_t flags which are 1221 * used to describe any failures. 1222 * 1223 * The 'action_handle' and 'cleanup_fd' are no longer used, and are ignored. 1224 * 1225 * The 'errors' nvlist contains an entry for each unapplied received 1226 * property. Callers are responsible for freeing this nvlist. 1227 */ 1228 int 1229 lzc_receive_one(const char *snapname, nvlist_t *props, 1230 const char *origin, boolean_t force, boolean_t resumable, boolean_t raw, 1231 int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd, 1232 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, 1233 nvlist_t **errors) 1234 { 1235 (void) action_handle, (void) cleanup_fd; 1236 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, 1237 resumable, raw, input_fd, begin_record, 1238 read_bytes, errflags, errors)); 1239 } 1240 1241 /* 1242 * Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops' 1243 * argument. 1244 * 1245 * The 'cmdprops' nvlist contains both override ('zfs receive -o') and 1246 * exclude ('zfs receive -x') properties. Callers are responsible for freeing 1247 * this nvlist 1248 */ 1249 int 1250 lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props, 1251 nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin, 1252 boolean_t force, boolean_t resumable, boolean_t raw, int input_fd, 1253 const dmu_replay_record_t *begin_record, int cleanup_fd, 1254 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, 1255 nvlist_t **errors) 1256 { 1257 (void) action_handle, (void) cleanup_fd; 1258 return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin, 1259 force, resumable, raw, input_fd, begin_record, 1260 read_bytes, errflags, errors)); 1261 } 1262 1263 /* 1264 * Roll back this filesystem or volume to its most recent snapshot. 1265 * If snapnamebuf is not NULL, it will be filled in with the name 1266 * of the most recent snapshot. 1267 * Note that the latest snapshot may change if a new one is concurrently 1268 * created or the current one is destroyed. lzc_rollback_to can be used 1269 * to roll back to a specific latest snapshot. 1270 * 1271 * Return 0 on success or an errno on failure. 1272 */ 1273 int 1274 lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen) 1275 { 1276 nvlist_t *args; 1277 nvlist_t *result; 1278 int err; 1279 1280 args = fnvlist_alloc(); 1281 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result); 1282 nvlist_free(args); 1283 if (err == 0 && snapnamebuf != NULL) { 1284 const char *snapname = fnvlist_lookup_string(result, "target"); 1285 (void) strlcpy(snapnamebuf, snapname, snapnamelen); 1286 } 1287 nvlist_free(result); 1288 1289 return (err); 1290 } 1291 1292 /* 1293 * Roll back this filesystem or volume to the specified snapshot, 1294 * if possible. 1295 * 1296 * Return 0 on success or an errno on failure. 1297 */ 1298 int 1299 lzc_rollback_to(const char *fsname, const char *snapname) 1300 { 1301 nvlist_t *args; 1302 nvlist_t *result; 1303 int err; 1304 1305 args = fnvlist_alloc(); 1306 fnvlist_add_string(args, "target", snapname); 1307 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result); 1308 nvlist_free(args); 1309 nvlist_free(result); 1310 return (err); 1311 } 1312 1313 /* 1314 * Creates new bookmarks from existing snapshot or bookmark. 1315 * 1316 * The bookmarks nvlist maps from the full name of the new bookmark to 1317 * the full name of the source snapshot or bookmark. 1318 * All the bookmarks and snapshots must be in the same pool. 1319 * The new bookmarks names must be unique. 1320 * => see function dsl_bookmark_create_nvl_validate 1321 * 1322 * The returned results nvlist will have an entry for each bookmark that failed. 1323 * The value will be the (int32) error code. 1324 * 1325 * The return value will be 0 if all bookmarks were created, otherwise it will 1326 * be the errno of a (undetermined) bookmarks that failed. 1327 */ 1328 int 1329 lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist) 1330 { 1331 nvpair_t *elem; 1332 int error; 1333 char pool[ZFS_MAX_DATASET_NAME_LEN]; 1334 1335 /* determine pool name from first bookmark */ 1336 elem = nvlist_next_nvpair(bookmarks, NULL); 1337 if (elem == NULL) 1338 return (0); 1339 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 1340 pool[strcspn(pool, "/#")] = '\0'; 1341 1342 error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist); 1343 1344 return (error); 1345 } 1346 1347 /* 1348 * Retrieve bookmarks. 1349 * 1350 * Retrieve the list of bookmarks for the given file system. The props 1351 * parameter is an nvlist of property names (with no values) that will be 1352 * returned for each bookmark. 1353 * 1354 * The following are valid properties on bookmarks, most of which are numbers 1355 * (represented as uint64 in the nvlist), except redact_snaps, which is a 1356 * uint64 array, and redact_complete, which is a boolean 1357 * 1358 * "guid" - globally unique identifier of the snapshot it refers to 1359 * "createtxg" - txg when the snapshot it refers to was created 1360 * "creation" - timestamp when the snapshot it refers to was created 1361 * "ivsetguid" - IVset guid for identifying encrypted snapshots 1362 * "redact_snaps" - list of guids of the redaction snapshots for the specified 1363 * bookmark. If the bookmark is not a redaction bookmark, the nvlist will 1364 * not contain an entry for this value. If it is redacted with respect to 1365 * no snapshots, it will contain value -> NULL uint64 array 1366 * "redact_complete" - boolean value; true if the redaction bookmark is 1367 * complete, false otherwise. 1368 * 1369 * The format of the returned nvlist as follows: 1370 * <short name of bookmark> -> { 1371 * <name of property> -> { 1372 * "value" -> uint64 1373 * } 1374 * ... 1375 * "redact_snaps" -> { 1376 * "value" -> uint64 array 1377 * } 1378 * "redact_complete" -> { 1379 * "value" -> boolean value 1380 * } 1381 * } 1382 */ 1383 int 1384 lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks) 1385 { 1386 return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks)); 1387 } 1388 1389 /* 1390 * Get bookmark properties. 1391 * 1392 * Given a bookmark's full name, retrieve all properties for the bookmark. 1393 * 1394 * The format of the returned property list is as follows: 1395 * { 1396 * <name of property> -> { 1397 * "value" -> uint64 1398 * } 1399 * ... 1400 * "redact_snaps" -> { 1401 * "value" -> uint64 array 1402 * } 1403 */ 1404 int 1405 lzc_get_bookmark_props(const char *bookmark, nvlist_t **props) 1406 { 1407 int error; 1408 1409 nvlist_t *innvl = fnvlist_alloc(); 1410 error = lzc_ioctl(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, innvl, props); 1411 fnvlist_free(innvl); 1412 1413 return (error); 1414 } 1415 1416 /* 1417 * Destroys bookmarks. 1418 * 1419 * The keys in the bmarks nvlist are the bookmarks to be destroyed. 1420 * They must all be in the same pool. Bookmarks are specified as 1421 * <fs>#<bmark>. 1422 * 1423 * Bookmarks that do not exist will be silently ignored. 1424 * 1425 * The return value will be 0 if all bookmarks that existed were destroyed. 1426 * 1427 * Otherwise the return value will be the errno of a (undetermined) bookmark 1428 * that failed, no bookmarks will be destroyed, and the errlist will have an 1429 * entry for each bookmarks that failed. The value in the errlist will be 1430 * the (int32) error code. 1431 */ 1432 int 1433 lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist) 1434 { 1435 nvpair_t *elem; 1436 int error; 1437 char pool[ZFS_MAX_DATASET_NAME_LEN]; 1438 1439 /* determine the pool name */ 1440 elem = nvlist_next_nvpair(bmarks, NULL); 1441 if (elem == NULL) 1442 return (0); 1443 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 1444 pool[strcspn(pool, "/#")] = '\0'; 1445 1446 error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist); 1447 1448 return (error); 1449 } 1450 1451 static int 1452 lzc_channel_program_impl(const char *pool, const char *program, boolean_t sync, 1453 uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) 1454 { 1455 int error; 1456 nvlist_t *args; 1457 1458 args = fnvlist_alloc(); 1459 fnvlist_add_string(args, ZCP_ARG_PROGRAM, program); 1460 fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl); 1461 fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync); 1462 fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit); 1463 fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit); 1464 error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl); 1465 fnvlist_free(args); 1466 1467 return (error); 1468 } 1469 1470 /* 1471 * Executes a channel program. 1472 * 1473 * If this function returns 0 the channel program was successfully loaded and 1474 * ran without failing. Note that individual commands the channel program ran 1475 * may have failed and the channel program is responsible for reporting such 1476 * errors through outnvl if they are important. 1477 * 1478 * This method may also return: 1479 * 1480 * EINVAL The program contains syntax errors, or an invalid memory or time 1481 * limit was given. No part of the channel program was executed. 1482 * If caused by syntax errors, 'outnvl' contains information about the 1483 * errors. 1484 * 1485 * ECHRNG The program was executed, but encountered a runtime error, such as 1486 * calling a function with incorrect arguments, invoking the error() 1487 * function directly, failing an assert() command, etc. Some portion 1488 * of the channel program may have executed and committed changes. 1489 * Information about the failure can be found in 'outnvl'. 1490 * 1491 * ENOMEM The program fully executed, but the output buffer was not large 1492 * enough to store the returned value. No output is returned through 1493 * 'outnvl'. 1494 * 1495 * ENOSPC The program was terminated because it exceeded its memory usage 1496 * limit. Some portion of the channel program may have executed and 1497 * committed changes to disk. No output is returned through 'outnvl'. 1498 * 1499 * ETIME The program was terminated because it exceeded its Lua instruction 1500 * limit. Some portion of the channel program may have executed and 1501 * committed changes to disk. No output is returned through 'outnvl'. 1502 */ 1503 int 1504 lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit, 1505 uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) 1506 { 1507 return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit, 1508 memlimit, argnvl, outnvl)); 1509 } 1510 1511 /* 1512 * Creates a checkpoint for the specified pool. 1513 * 1514 * If this function returns 0 the pool was successfully checkpointed. 1515 * 1516 * This method may also return: 1517 * 1518 * ZFS_ERR_CHECKPOINT_EXISTS 1519 * The pool already has a checkpoint. A pools can only have one 1520 * checkpoint at most, at any given time. 1521 * 1522 * ZFS_ERR_DISCARDING_CHECKPOINT 1523 * ZFS is in the middle of discarding a checkpoint for this pool. 1524 * The pool can be checkpointed again once the discard is done. 1525 * 1526 * ZFS_DEVRM_IN_PROGRESS 1527 * A vdev is currently being removed. The pool cannot be 1528 * checkpointed until the device removal is done. 1529 * 1530 * ZFS_VDEV_TOO_BIG 1531 * One or more top-level vdevs exceed the maximum vdev size 1532 * supported for this feature. 1533 */ 1534 int 1535 lzc_pool_checkpoint(const char *pool) 1536 { 1537 int error; 1538 1539 nvlist_t *result = NULL; 1540 nvlist_t *args = fnvlist_alloc(); 1541 1542 error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result); 1543 1544 fnvlist_free(args); 1545 fnvlist_free(result); 1546 1547 return (error); 1548 } 1549 1550 /* 1551 * Discard the checkpoint from the specified pool. 1552 * 1553 * If this function returns 0 the checkpoint was successfully discarded. 1554 * 1555 * This method may also return: 1556 * 1557 * ZFS_ERR_NO_CHECKPOINT 1558 * The pool does not have a checkpoint. 1559 * 1560 * ZFS_ERR_DISCARDING_CHECKPOINT 1561 * ZFS is already in the middle of discarding the checkpoint. 1562 */ 1563 int 1564 lzc_pool_checkpoint_discard(const char *pool) 1565 { 1566 int error; 1567 1568 nvlist_t *result = NULL; 1569 nvlist_t *args = fnvlist_alloc(); 1570 1571 error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result); 1572 1573 fnvlist_free(args); 1574 fnvlist_free(result); 1575 1576 return (error); 1577 } 1578 1579 /* 1580 * Executes a read-only channel program. 1581 * 1582 * A read-only channel program works programmatically the same way as a 1583 * normal channel program executed with lzc_channel_program(). The only 1584 * difference is it runs exclusively in open-context and therefore can 1585 * return faster. The downside to that, is that the program cannot change 1586 * on-disk state by calling functions from the zfs.sync submodule. 1587 * 1588 * The return values of this function (and their meaning) are exactly the 1589 * same as the ones described in lzc_channel_program(). 1590 */ 1591 int 1592 lzc_channel_program_nosync(const char *pool, const char *program, 1593 uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) 1594 { 1595 return (lzc_channel_program_impl(pool, program, B_FALSE, timeout, 1596 memlimit, argnvl, outnvl)); 1597 } 1598 1599 int 1600 lzc_get_vdev_prop(const char *poolname, nvlist_t *innvl, nvlist_t **outnvl) 1601 { 1602 return (lzc_ioctl(ZFS_IOC_VDEV_GET_PROPS, poolname, innvl, outnvl)); 1603 } 1604 1605 int 1606 lzc_set_vdev_prop(const char *poolname, nvlist_t *innvl, nvlist_t **outnvl) 1607 { 1608 return (lzc_ioctl(ZFS_IOC_VDEV_SET_PROPS, poolname, innvl, outnvl)); 1609 } 1610 1611 /* 1612 * Performs key management functions 1613 * 1614 * crypto_cmd should be a value from dcp_cmd_t. If the command specifies to 1615 * load or change a wrapping key, the key should be specified in the 1616 * hidden_args nvlist so that it is not logged. 1617 */ 1618 int 1619 lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata, 1620 uint_t wkeylen) 1621 { 1622 int error; 1623 nvlist_t *ioc_args; 1624 nvlist_t *hidden_args; 1625 1626 if (wkeydata == NULL) 1627 return (EINVAL); 1628 1629 ioc_args = fnvlist_alloc(); 1630 hidden_args = fnvlist_alloc(); 1631 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen); 1632 fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args); 1633 if (noop) 1634 fnvlist_add_boolean(ioc_args, "noop"); 1635 error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL); 1636 nvlist_free(hidden_args); 1637 nvlist_free(ioc_args); 1638 1639 return (error); 1640 } 1641 1642 int 1643 lzc_unload_key(const char *fsname) 1644 { 1645 return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL)); 1646 } 1647 1648 int 1649 lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props, 1650 uint8_t *wkeydata, uint_t wkeylen) 1651 { 1652 int error; 1653 nvlist_t *ioc_args = fnvlist_alloc(); 1654 nvlist_t *hidden_args = NULL; 1655 1656 fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd); 1657 1658 if (wkeydata != NULL) { 1659 hidden_args = fnvlist_alloc(); 1660 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, 1661 wkeylen); 1662 fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args); 1663 } 1664 1665 if (props != NULL) 1666 fnvlist_add_nvlist(ioc_args, "props", props); 1667 1668 error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL); 1669 nvlist_free(hidden_args); 1670 nvlist_free(ioc_args); 1671 1672 return (error); 1673 } 1674 1675 int 1676 lzc_reopen(const char *pool_name, boolean_t scrub_restart) 1677 { 1678 nvlist_t *args = fnvlist_alloc(); 1679 int error; 1680 1681 fnvlist_add_boolean_value(args, "scrub_restart", scrub_restart); 1682 1683 error = lzc_ioctl(ZFS_IOC_POOL_REOPEN, pool_name, args, NULL); 1684 nvlist_free(args); 1685 return (error); 1686 } 1687 1688 /* 1689 * Changes initializing state. 1690 * 1691 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID. 1692 * The key is ignored. 1693 * 1694 * If there are errors related to vdev arguments, per-vdev errors are returned 1695 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where 1696 * guid is stringified with PRIu64, and errno is one of the following as 1697 * an int64_t: 1698 * - ENODEV if the device was not found 1699 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing) 1700 * - EROFS if the device is not writeable 1701 * - EBUSY start requested but the device is already being either 1702 * initialized or trimmed 1703 * - ESRCH cancel/suspend requested but device is not being initialized 1704 * 1705 * If the errlist is empty, then return value will be: 1706 * - EINVAL if one or more arguments was invalid 1707 * - Other spa_open failures 1708 * - 0 if the operation succeeded 1709 */ 1710 int 1711 lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type, 1712 nvlist_t *vdevs, nvlist_t **errlist) 1713 { 1714 int error; 1715 1716 nvlist_t *args = fnvlist_alloc(); 1717 fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type); 1718 fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs); 1719 1720 error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist); 1721 1722 fnvlist_free(args); 1723 1724 return (error); 1725 } 1726 1727 /* 1728 * Changes TRIM state. 1729 * 1730 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID. 1731 * The key is ignored. 1732 * 1733 * If there are errors related to vdev arguments, per-vdev errors are returned 1734 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where 1735 * guid is stringified with PRIu64, and errno is one of the following as 1736 * an int64_t: 1737 * - ENODEV if the device was not found 1738 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing) 1739 * - EROFS if the device is not writeable 1740 * - EBUSY start requested but the device is already being either trimmed 1741 * or initialized 1742 * - ESRCH cancel/suspend requested but device is not being initialized 1743 * - EOPNOTSUPP if the device does not support TRIM (or secure TRIM) 1744 * 1745 * If the errlist is empty, then return value will be: 1746 * - EINVAL if one or more arguments was invalid 1747 * - Other spa_open failures 1748 * - 0 if the operation succeeded 1749 */ 1750 int 1751 lzc_trim(const char *poolname, pool_trim_func_t cmd_type, uint64_t rate, 1752 boolean_t secure, nvlist_t *vdevs, nvlist_t **errlist) 1753 { 1754 int error; 1755 1756 nvlist_t *args = fnvlist_alloc(); 1757 fnvlist_add_uint64(args, ZPOOL_TRIM_COMMAND, (uint64_t)cmd_type); 1758 fnvlist_add_nvlist(args, ZPOOL_TRIM_VDEVS, vdevs); 1759 fnvlist_add_uint64(args, ZPOOL_TRIM_RATE, rate); 1760 fnvlist_add_boolean_value(args, ZPOOL_TRIM_SECURE, secure); 1761 1762 error = lzc_ioctl(ZFS_IOC_POOL_TRIM, poolname, args, errlist); 1763 1764 fnvlist_free(args); 1765 1766 return (error); 1767 } 1768 1769 /* 1770 * Create a redaction bookmark named bookname by redacting snapshot with respect 1771 * to all the snapshots in snapnv. 1772 */ 1773 int 1774 lzc_redact(const char *snapshot, const char *bookname, nvlist_t *snapnv) 1775 { 1776 nvlist_t *args = fnvlist_alloc(); 1777 fnvlist_add_string(args, "bookname", bookname); 1778 fnvlist_add_nvlist(args, "snapnv", snapnv); 1779 int error = lzc_ioctl(ZFS_IOC_REDACT, snapshot, args, NULL); 1780 fnvlist_free(args); 1781 return (error); 1782 } 1783 1784 static int 1785 wait_common(const char *pool, zpool_wait_activity_t activity, boolean_t use_tag, 1786 uint64_t tag, boolean_t *waited) 1787 { 1788 nvlist_t *args = fnvlist_alloc(); 1789 nvlist_t *result = NULL; 1790 1791 fnvlist_add_int32(args, ZPOOL_WAIT_ACTIVITY, activity); 1792 if (use_tag) 1793 fnvlist_add_uint64(args, ZPOOL_WAIT_TAG, tag); 1794 1795 int error = lzc_ioctl(ZFS_IOC_WAIT, pool, args, &result); 1796 1797 if (error == 0 && waited != NULL) 1798 *waited = fnvlist_lookup_boolean_value(result, 1799 ZPOOL_WAIT_WAITED); 1800 1801 fnvlist_free(args); 1802 fnvlist_free(result); 1803 1804 return (error); 1805 } 1806 1807 int 1808 lzc_wait(const char *pool, zpool_wait_activity_t activity, boolean_t *waited) 1809 { 1810 return (wait_common(pool, activity, B_FALSE, 0, waited)); 1811 } 1812 1813 int 1814 lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag, 1815 boolean_t *waited) 1816 { 1817 return (wait_common(pool, activity, B_TRUE, tag, waited)); 1818 } 1819 1820 int 1821 lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited) 1822 { 1823 nvlist_t *args = fnvlist_alloc(); 1824 nvlist_t *result = NULL; 1825 1826 fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity); 1827 1828 int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result); 1829 1830 if (error == 0 && waited != NULL) 1831 *waited = fnvlist_lookup_boolean_value(result, 1832 ZFS_WAIT_WAITED); 1833 1834 fnvlist_free(args); 1835 fnvlist_free(result); 1836 1837 return (error); 1838 } 1839 1840 /* 1841 * Set the bootenv contents for the given pool. 1842 */ 1843 int 1844 lzc_set_bootenv(const char *pool, const nvlist_t *env) 1845 { 1846 return (lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, (nvlist_t *)env, NULL)); 1847 } 1848 1849 /* 1850 * Get the contents of the bootenv of the given pool. 1851 */ 1852 int 1853 lzc_get_bootenv(const char *pool, nvlist_t **outnvl) 1854 { 1855 return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl)); 1856 } 1857