1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2012, 2020 by Delphix. All rights reserved. 24 * Copyright (c) 2013 Steven Hartland. All rights reserved. 25 * Copyright (c) 2017 Datto Inc. 26 * Copyright 2017 RackTop Systems. 27 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. 28 * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved. 29 */ 30 31 /* 32 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs. 33 * It has the following characteristics: 34 * 35 * - Thread Safe. libzfs_core is accessible concurrently from multiple 36 * threads. This is accomplished primarily by avoiding global data 37 * (e.g. caching). Since it's thread-safe, there is no reason for a 38 * process to have multiple libzfs "instances". Therefore, we store 39 * our few pieces of data (e.g. the file descriptor) in global 40 * variables. The fd is reference-counted so that the libzfs_core 41 * library can be "initialized" multiple times (e.g. by different 42 * consumers within the same process). 43 * 44 * - Committed Interface. The libzfs_core interface will be committed, 45 * therefore consumers can compile against it and be confident that 46 * their code will continue to work on future releases of this code. 47 * Currently, the interface is Evolving (not Committed), but we intend 48 * to commit to it once it is more complete and we determine that it 49 * meets the needs of all consumers. 50 * 51 * - Programmatic Error Handling. libzfs_core communicates errors with 52 * defined error numbers, and doesn't print anything to stdout/stderr. 53 * 54 * - Thin Layer. libzfs_core is a thin layer, marshaling arguments 55 * to/from the kernel ioctls. There is generally a 1:1 correspondence 56 * between libzfs_core functions and ioctls to ZFS_DEV. 57 * 58 * - Clear Atomicity. Because libzfs_core functions are generally 1:1 59 * with kernel ioctls, and kernel ioctls are general atomic, each 60 * libzfs_core function is atomic. For example, creating multiple 61 * snapshots with a single call to lzc_snapshot() is atomic -- it 62 * can't fail with only some of the requested snapshots created, even 63 * in the event of power loss or system crash. 64 * 65 * - Continued libzfs Support. Some higher-level operations (e.g. 66 * support for "zfs send -R") are too complicated to fit the scope of 67 * libzfs_core. This functionality will continue to live in libzfs. 68 * Where appropriate, libzfs will use the underlying atomic operations 69 * of libzfs_core. For example, libzfs may implement "zfs send -R | 70 * zfs receive" by using individual "send one snapshot", rename, 71 * destroy, and "receive one snapshot" operations in libzfs_core. 72 * /sbin/zfs and /sbin/zpool will link with both libzfs and 73 * libzfs_core. Other consumers should aim to use only libzfs_core, 74 * since that will be the supported, stable interface going forwards. 75 */ 76 77 #include <libzfs_core.h> 78 #include <ctype.h> 79 #include <unistd.h> 80 #include <stdlib.h> 81 #include <string.h> 82 #ifdef ZFS_DEBUG 83 #include <stdio.h> 84 #endif 85 #include <errno.h> 86 #include <fcntl.h> 87 #include <pthread.h> 88 #include <libzutil.h> 89 #include <sys/nvpair.h> 90 #include <sys/param.h> 91 #include <sys/types.h> 92 #include <sys/stat.h> 93 #include <sys/zfs_ioctl.h> 94 95 static int g_fd = -1; 96 static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; 97 static int g_refcount; 98 99 #ifdef ZFS_DEBUG 100 static zfs_ioc_t fail_ioc_cmd = ZFS_IOC_LAST; 101 static zfs_errno_t fail_ioc_err; 102 103 static void 104 libzfs_core_debug_ioc(void) 105 { 106 /* 107 * To test running newer user space binaries with kernel's 108 * that don't yet support an ioctl or a new ioctl arg we 109 * provide an override to intentionally fail an ioctl. 110 * 111 * USAGE: 112 * The override variable, ZFS_IOC_TEST, is of the form "cmd:err" 113 * 114 * For example, to fail a ZFS_IOC_POOL_CHECKPOINT with a 115 * ZFS_ERR_IOC_CMD_UNAVAIL, the string would be "0x5a4d:1029" 116 * 117 * $ sudo sh -c "ZFS_IOC_TEST=0x5a4d:1029 zpool checkpoint tank" 118 * cannot checkpoint 'tank': the loaded zfs module does not support 119 * this operation. A reboot may be required to enable this operation. 120 */ 121 if (fail_ioc_cmd == ZFS_IOC_LAST) { 122 char *ioc_test = getenv("ZFS_IOC_TEST"); 123 unsigned int ioc_num = 0, ioc_err = 0; 124 125 if (ioc_test != NULL && 126 sscanf(ioc_test, "%i:%i", &ioc_num, &ioc_err) == 2 && 127 ioc_num < ZFS_IOC_LAST) { 128 fail_ioc_cmd = ioc_num; 129 fail_ioc_err = ioc_err; 130 } 131 } 132 } 133 #endif 134 135 int 136 libzfs_core_init(void) 137 { 138 (void) pthread_mutex_lock(&g_lock); 139 if (g_refcount == 0) { 140 g_fd = open(ZFS_DEV, O_RDWR|O_CLOEXEC); 141 if (g_fd < 0) { 142 (void) pthread_mutex_unlock(&g_lock); 143 return (errno); 144 } 145 } 146 g_refcount++; 147 148 #ifdef ZFS_DEBUG 149 libzfs_core_debug_ioc(); 150 #endif 151 (void) pthread_mutex_unlock(&g_lock); 152 return (0); 153 } 154 155 void 156 libzfs_core_fini(void) 157 { 158 (void) pthread_mutex_lock(&g_lock); 159 ASSERT3S(g_refcount, >, 0); 160 161 g_refcount--; 162 163 if (g_refcount == 0 && g_fd != -1) { 164 (void) close(g_fd); 165 g_fd = -1; 166 } 167 (void) pthread_mutex_unlock(&g_lock); 168 } 169 170 static int 171 lzc_ioctl(zfs_ioc_t ioc, const char *name, 172 nvlist_t *source, nvlist_t **resultp) 173 { 174 zfs_cmd_t zc = {"\0"}; 175 int error = 0; 176 char *packed = NULL; 177 size_t size = 0; 178 179 ASSERT3S(g_refcount, >, 0); 180 VERIFY3S(g_fd, !=, -1); 181 182 #ifdef ZFS_DEBUG 183 if (ioc == fail_ioc_cmd) 184 return (fail_ioc_err); 185 #endif 186 187 if (name != NULL) 188 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); 189 190 if (source != NULL) { 191 packed = fnvlist_pack(source, &size); 192 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 193 zc.zc_nvlist_src_size = size; 194 } 195 196 if (resultp != NULL) { 197 *resultp = NULL; 198 if (ioc == ZFS_IOC_CHANNEL_PROGRAM) { 199 zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source, 200 ZCP_ARG_MEMLIMIT); 201 } else { 202 zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); 203 } 204 zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 205 malloc(zc.zc_nvlist_dst_size); 206 if (zc.zc_nvlist_dst == (uint64_t)0) { 207 error = ENOMEM; 208 goto out; 209 } 210 } 211 212 while (lzc_ioctl_fd(g_fd, ioc, &zc) != 0) { 213 /* 214 * If ioctl exited with ENOMEM, we retry the ioctl after 215 * increasing the size of the destination nvlist. 216 * 217 * Channel programs that exit with ENOMEM ran over the 218 * lua memory sandbox; they should not be retried. 219 */ 220 if (errno == ENOMEM && resultp != NULL && 221 ioc != ZFS_IOC_CHANNEL_PROGRAM) { 222 free((void *)(uintptr_t)zc.zc_nvlist_dst); 223 zc.zc_nvlist_dst_size *= 2; 224 zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 225 malloc(zc.zc_nvlist_dst_size); 226 if (zc.zc_nvlist_dst == (uint64_t)0) { 227 error = ENOMEM; 228 goto out; 229 } 230 } else { 231 error = errno; 232 break; 233 } 234 } 235 if (zc.zc_nvlist_dst_filled) { 236 *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, 237 zc.zc_nvlist_dst_size); 238 } 239 240 out: 241 if (packed != NULL) 242 fnvlist_pack_free(packed, size); 243 free((void *)(uintptr_t)zc.zc_nvlist_dst); 244 return (error); 245 } 246 247 int 248 lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props, 249 uint8_t *wkeydata, uint_t wkeylen) 250 { 251 int error; 252 nvlist_t *hidden_args = NULL; 253 nvlist_t *args = fnvlist_alloc(); 254 255 fnvlist_add_int32(args, "type", (dmu_objset_type_t)type); 256 if (props != NULL) 257 fnvlist_add_nvlist(args, "props", props); 258 259 if (wkeydata != NULL) { 260 hidden_args = fnvlist_alloc(); 261 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, 262 wkeylen); 263 fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args); 264 } 265 266 error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); 267 nvlist_free(hidden_args); 268 nvlist_free(args); 269 return (error); 270 } 271 272 int 273 lzc_clone(const char *fsname, const char *origin, nvlist_t *props) 274 { 275 int error; 276 nvlist_t *hidden_args = NULL; 277 nvlist_t *args = fnvlist_alloc(); 278 279 fnvlist_add_string(args, "origin", origin); 280 if (props != NULL) 281 fnvlist_add_nvlist(args, "props", props); 282 error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); 283 nvlist_free(hidden_args); 284 nvlist_free(args); 285 return (error); 286 } 287 288 int 289 lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen) 290 { 291 /* 292 * The promote ioctl is still legacy, so we need to construct our 293 * own zfs_cmd_t rather than using lzc_ioctl(). 294 */ 295 zfs_cmd_t zc = {"\0"}; 296 297 ASSERT3S(g_refcount, >, 0); 298 VERIFY3S(g_fd, !=, -1); 299 300 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name)); 301 if (lzc_ioctl_fd(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) { 302 int error = errno; 303 if (error == EEXIST && snapnamebuf != NULL) 304 (void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen); 305 return (error); 306 } 307 return (0); 308 } 309 310 int 311 lzc_rename(const char *source, const char *target) 312 { 313 zfs_cmd_t zc = {"\0"}; 314 int error; 315 316 ASSERT3S(g_refcount, >, 0); 317 VERIFY3S(g_fd, !=, -1); 318 (void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name)); 319 (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value)); 320 error = lzc_ioctl_fd(g_fd, ZFS_IOC_RENAME, &zc); 321 if (error != 0) 322 error = errno; 323 return (error); 324 } 325 326 int 327 lzc_destroy(const char *fsname) 328 { 329 int error; 330 nvlist_t *args = fnvlist_alloc(); 331 error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL); 332 nvlist_free(args); 333 return (error); 334 } 335 336 /* 337 * Creates snapshots. 338 * 339 * The keys in the snaps nvlist are the snapshots to be created. 340 * They must all be in the same pool. 341 * 342 * The props nvlist is properties to set. Currently only user properties 343 * are supported. { user:prop_name -> string value } 344 * 345 * The returned results nvlist will have an entry for each snapshot that failed. 346 * The value will be the (int32) error code. 347 * 348 * The return value will be 0 if all snapshots were created, otherwise it will 349 * be the errno of a (unspecified) snapshot that failed. 350 */ 351 int 352 lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) 353 { 354 nvpair_t *elem; 355 nvlist_t *args; 356 int error; 357 char pool[ZFS_MAX_DATASET_NAME_LEN]; 358 359 *errlist = NULL; 360 361 /* determine the pool name */ 362 elem = nvlist_next_nvpair(snaps, NULL); 363 if (elem == NULL) 364 return (0); 365 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 366 pool[strcspn(pool, "/@")] = '\0'; 367 368 args = fnvlist_alloc(); 369 fnvlist_add_nvlist(args, "snaps", snaps); 370 if (props != NULL) 371 fnvlist_add_nvlist(args, "props", props); 372 373 error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist); 374 nvlist_free(args); 375 376 return (error); 377 } 378 379 /* 380 * Destroys snapshots. 381 * 382 * The keys in the snaps nvlist are the snapshots to be destroyed. 383 * They must all be in the same pool. 384 * 385 * Snapshots that do not exist will be silently ignored. 386 * 387 * If 'defer' is not set, and a snapshot has user holds or clones, the 388 * destroy operation will fail and none of the snapshots will be 389 * destroyed. 390 * 391 * If 'defer' is set, and a snapshot has user holds or clones, it will be 392 * marked for deferred destruction, and will be destroyed when the last hold 393 * or clone is removed/destroyed. 394 * 395 * The return value will be 0 if all snapshots were destroyed (or marked for 396 * later destruction if 'defer' is set) or didn't exist to begin with. 397 * 398 * Otherwise the return value will be the errno of a (unspecified) snapshot 399 * that failed, no snapshots will be destroyed, and the errlist will have an 400 * entry for each snapshot that failed. The value in the errlist will be 401 * the (int32) error code. 402 */ 403 int 404 lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) 405 { 406 nvpair_t *elem; 407 nvlist_t *args; 408 int error; 409 char pool[ZFS_MAX_DATASET_NAME_LEN]; 410 411 /* determine the pool name */ 412 elem = nvlist_next_nvpair(snaps, NULL); 413 if (elem == NULL) 414 return (0); 415 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 416 pool[strcspn(pool, "/@")] = '\0'; 417 418 args = fnvlist_alloc(); 419 fnvlist_add_nvlist(args, "snaps", snaps); 420 if (defer) 421 fnvlist_add_boolean(args, "defer"); 422 423 error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist); 424 nvlist_free(args); 425 426 return (error); 427 } 428 429 int 430 lzc_snaprange_space(const char *firstsnap, const char *lastsnap, 431 uint64_t *usedp) 432 { 433 nvlist_t *args; 434 nvlist_t *result; 435 int err; 436 char fs[ZFS_MAX_DATASET_NAME_LEN]; 437 char *atp; 438 439 /* determine the fs name */ 440 (void) strlcpy(fs, firstsnap, sizeof (fs)); 441 atp = strchr(fs, '@'); 442 if (atp == NULL) 443 return (EINVAL); 444 *atp = '\0'; 445 446 args = fnvlist_alloc(); 447 fnvlist_add_string(args, "firstsnap", firstsnap); 448 449 err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result); 450 nvlist_free(args); 451 if (err == 0) 452 *usedp = fnvlist_lookup_uint64(result, "used"); 453 fnvlist_free(result); 454 455 return (err); 456 } 457 458 boolean_t 459 lzc_exists(const char *dataset) 460 { 461 /* 462 * The objset_stats ioctl is still legacy, so we need to construct our 463 * own zfs_cmd_t rather than using lzc_ioctl(). 464 */ 465 zfs_cmd_t zc = {"\0"}; 466 467 ASSERT3S(g_refcount, >, 0); 468 VERIFY3S(g_fd, !=, -1); 469 470 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); 471 return (lzc_ioctl_fd(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); 472 } 473 474 /* 475 * outnvl is unused. 476 * It was added to preserve the function signature in case it is 477 * needed in the future. 478 */ 479 /*ARGSUSED*/ 480 int 481 lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl) 482 { 483 return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL)); 484 } 485 486 /* 487 * Create "user holds" on snapshots. If there is a hold on a snapshot, 488 * the snapshot can not be destroyed. (However, it can be marked for deletion 489 * by lzc_destroy_snaps(defer=B_TRUE).) 490 * 491 * The keys in the nvlist are snapshot names. 492 * The snapshots must all be in the same pool. 493 * The value is the name of the hold (string type). 494 * 495 * If cleanup_fd is not -1, it must be the result of open(ZFS_DEV, O_EXCL). 496 * In this case, when the cleanup_fd is closed (including on process 497 * termination), the holds will be released. If the system is shut down 498 * uncleanly, the holds will be released when the pool is next opened 499 * or imported. 500 * 501 * Holds for snapshots which don't exist will be skipped and have an entry 502 * added to errlist, but will not cause an overall failure. 503 * 504 * The return value will be 0 if all holds, for snapshots that existed, 505 * were successfully created. 506 * 507 * Otherwise the return value will be the errno of a (unspecified) hold that 508 * failed and no holds will be created. 509 * 510 * In all cases the errlist will have an entry for each hold that failed 511 * (name = snapshot), with its value being the error code (int32). 512 */ 513 int 514 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) 515 { 516 char pool[ZFS_MAX_DATASET_NAME_LEN]; 517 nvlist_t *args; 518 nvpair_t *elem; 519 int error; 520 521 /* determine the pool name */ 522 elem = nvlist_next_nvpair(holds, NULL); 523 if (elem == NULL) 524 return (0); 525 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 526 pool[strcspn(pool, "/@")] = '\0'; 527 528 args = fnvlist_alloc(); 529 fnvlist_add_nvlist(args, "holds", holds); 530 if (cleanup_fd != -1) 531 fnvlist_add_int32(args, "cleanup_fd", cleanup_fd); 532 533 error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist); 534 nvlist_free(args); 535 return (error); 536 } 537 538 /* 539 * Release "user holds" on snapshots. If the snapshot has been marked for 540 * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have 541 * any clones, and all the user holds are removed, then the snapshot will be 542 * destroyed. 543 * 544 * The keys in the nvlist are snapshot names. 545 * The snapshots must all be in the same pool. 546 * The value is an nvlist whose keys are the holds to remove. 547 * 548 * Holds which failed to release because they didn't exist will have an entry 549 * added to errlist, but will not cause an overall failure. 550 * 551 * The return value will be 0 if the nvl holds was empty or all holds that 552 * existed, were successfully removed. 553 * 554 * Otherwise the return value will be the errno of a (unspecified) hold that 555 * failed to release and no holds will be released. 556 * 557 * In all cases the errlist will have an entry for each hold that failed to 558 * to release. 559 */ 560 int 561 lzc_release(nvlist_t *holds, nvlist_t **errlist) 562 { 563 char pool[ZFS_MAX_DATASET_NAME_LEN]; 564 nvpair_t *elem; 565 566 /* determine the pool name */ 567 elem = nvlist_next_nvpair(holds, NULL); 568 if (elem == NULL) 569 return (0); 570 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 571 pool[strcspn(pool, "/@")] = '\0'; 572 573 return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist)); 574 } 575 576 /* 577 * Retrieve list of user holds on the specified snapshot. 578 * 579 * On success, *holdsp will be set to an nvlist which the caller must free. 580 * The keys are the names of the holds, and the value is the creation time 581 * of the hold (uint64) in seconds since the epoch. 582 */ 583 int 584 lzc_get_holds(const char *snapname, nvlist_t **holdsp) 585 { 586 return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp)); 587 } 588 589 /* 590 * Generate a zfs send stream for the specified snapshot and write it to 591 * the specified file descriptor. 592 * 593 * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap") 594 * 595 * If "from" is NULL, a full (non-incremental) stream will be sent. 596 * If "from" is non-NULL, it must be the full name of a snapshot or 597 * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or 598 * "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or 599 * bookmark must represent an earlier point in the history of "snapname"). 600 * It can be an earlier snapshot in the same filesystem or zvol as "snapname", 601 * or it can be the origin of "snapname"'s filesystem, or an earlier 602 * snapshot in the origin, etc. 603 * 604 * "fd" is the file descriptor to write the send stream to. 605 * 606 * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted 607 * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT 608 * records with drr_blksz > 128K. 609 * 610 * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted 611 * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA, 612 * which the receiving system must support (as indicated by support 613 * for the "embedded_data" feature). 614 * 615 * If "flags" contains LZC_SEND_FLAG_COMPRESS, the stream is generated by using 616 * compressed WRITE records for blocks which are compressed on disk and in 617 * memory. If the lz4_compress feature is active on the sending system, then 618 * the receiving system must have that feature enabled as well. 619 * 620 * If "flags" contains LZC_SEND_FLAG_RAW, the stream is generated, for encrypted 621 * datasets, by sending data exactly as it exists on disk. This allows backups 622 * to be taken even if encryption keys are not currently loaded. 623 */ 624 int 625 lzc_send(const char *snapname, const char *from, int fd, 626 enum lzc_send_flags flags) 627 { 628 return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0, 629 NULL)); 630 } 631 632 int 633 lzc_send_redacted(const char *snapname, const char *from, int fd, 634 enum lzc_send_flags flags, const char *redactbook) 635 { 636 return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0, 637 redactbook)); 638 } 639 640 int 641 lzc_send_resume(const char *snapname, const char *from, int fd, 642 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff) 643 { 644 return (lzc_send_resume_redacted(snapname, from, fd, flags, resumeobj, 645 resumeoff, NULL)); 646 } 647 648 /* 649 * snapname: The name of the "tosnap", or the snapshot whose contents we are 650 * sending. 651 * from: The name of the "fromsnap", or the incremental source. 652 * fd: File descriptor to write the stream to. 653 * flags: flags that determine features to be used by the stream. 654 * resumeobj: Object to resume from, for resuming send 655 * resumeoff: Offset to resume from, for resuming send. 656 * redactnv: nvlist of string -> boolean(ignored) containing the names of all 657 * the snapshots that we should redact with respect to. 658 * redactbook: Name of the redaction bookmark to create. 659 */ 660 int 661 lzc_send_resume_redacted(const char *snapname, const char *from, int fd, 662 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff, 663 const char *redactbook) 664 { 665 nvlist_t *args; 666 int err; 667 668 args = fnvlist_alloc(); 669 fnvlist_add_int32(args, "fd", fd); 670 if (from != NULL) 671 fnvlist_add_string(args, "fromsnap", from); 672 if (flags & LZC_SEND_FLAG_LARGE_BLOCK) 673 fnvlist_add_boolean(args, "largeblockok"); 674 if (flags & LZC_SEND_FLAG_EMBED_DATA) 675 fnvlist_add_boolean(args, "embedok"); 676 if (flags & LZC_SEND_FLAG_COMPRESS) 677 fnvlist_add_boolean(args, "compressok"); 678 if (flags & LZC_SEND_FLAG_RAW) 679 fnvlist_add_boolean(args, "rawok"); 680 if (flags & LZC_SEND_FLAG_SAVED) 681 fnvlist_add_boolean(args, "savedok"); 682 if (resumeobj != 0 || resumeoff != 0) { 683 fnvlist_add_uint64(args, "resume_object", resumeobj); 684 fnvlist_add_uint64(args, "resume_offset", resumeoff); 685 } 686 if (redactbook != NULL) 687 fnvlist_add_string(args, "redactbook", redactbook); 688 689 err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); 690 nvlist_free(args); 691 return (err); 692 } 693 694 /* 695 * "from" can be NULL, a snapshot, or a bookmark. 696 * 697 * If from is NULL, a full (non-incremental) stream will be estimated. This 698 * is calculated very efficiently. 699 * 700 * If from is a snapshot, lzc_send_space uses the deadlists attached to 701 * each snapshot to efficiently estimate the stream size. 702 * 703 * If from is a bookmark, the indirect blocks in the destination snapshot 704 * are traversed, looking for blocks with a birth time since the creation TXG of 705 * the snapshot this bookmark was created from. This will result in 706 * significantly more I/O and be less efficient than a send space estimation on 707 * an equivalent snapshot. This process is also used if redact_snaps is 708 * non-null. 709 */ 710 int 711 lzc_send_space_resume_redacted(const char *snapname, const char *from, 712 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff, 713 uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep) 714 { 715 nvlist_t *args; 716 nvlist_t *result; 717 int err; 718 719 args = fnvlist_alloc(); 720 if (from != NULL) 721 fnvlist_add_string(args, "from", from); 722 if (flags & LZC_SEND_FLAG_LARGE_BLOCK) 723 fnvlist_add_boolean(args, "largeblockok"); 724 if (flags & LZC_SEND_FLAG_EMBED_DATA) 725 fnvlist_add_boolean(args, "embedok"); 726 if (flags & LZC_SEND_FLAG_COMPRESS) 727 fnvlist_add_boolean(args, "compressok"); 728 if (flags & LZC_SEND_FLAG_RAW) 729 fnvlist_add_boolean(args, "rawok"); 730 if (resumeobj != 0 || resumeoff != 0) { 731 fnvlist_add_uint64(args, "resume_object", resumeobj); 732 fnvlist_add_uint64(args, "resume_offset", resumeoff); 733 fnvlist_add_uint64(args, "bytes", resume_bytes); 734 } 735 if (redactbook != NULL) 736 fnvlist_add_string(args, "redactbook", redactbook); 737 if (fd != -1) 738 fnvlist_add_int32(args, "fd", fd); 739 740 err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); 741 nvlist_free(args); 742 if (err == 0) 743 *spacep = fnvlist_lookup_uint64(result, "space"); 744 nvlist_free(result); 745 return (err); 746 } 747 748 int 749 lzc_send_space(const char *snapname, const char *from, 750 enum lzc_send_flags flags, uint64_t *spacep) 751 { 752 return (lzc_send_space_resume_redacted(snapname, from, flags, 0, 0, 0, 753 NULL, -1, spacep)); 754 } 755 756 static int 757 recv_read(int fd, void *buf, int ilen) 758 { 759 char *cp = buf; 760 int rv; 761 int len = ilen; 762 763 do { 764 rv = read(fd, cp, len); 765 cp += rv; 766 len -= rv; 767 } while (rv > 0); 768 769 if (rv < 0 || len != 0) 770 return (EIO); 771 772 return (0); 773 } 774 775 /* 776 * Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the 777 * legacy ZFS_IOC_RECV user/kernel interface. The new interface supports all 778 * stream options but is currently only used for resumable streams. This way 779 * updated user space utilities will interoperate with older kernel modules. 780 * 781 * Non-Linux OpenZFS platforms have opted to modify the legacy interface. 782 */ 783 static int 784 recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops, 785 uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force, 786 boolean_t resumable, boolean_t raw, int input_fd, 787 const dmu_replay_record_t *begin_record, uint64_t *read_bytes, 788 uint64_t *errflags, nvlist_t **errors) 789 { 790 dmu_replay_record_t drr; 791 char fsname[MAXPATHLEN]; 792 char *atp; 793 int error; 794 boolean_t payload = B_FALSE; 795 796 ASSERT3S(g_refcount, >, 0); 797 VERIFY3S(g_fd, !=, -1); 798 799 /* Set 'fsname' to the name of containing filesystem */ 800 (void) strlcpy(fsname, snapname, sizeof (fsname)); 801 atp = strchr(fsname, '@'); 802 if (atp == NULL) 803 return (EINVAL); 804 *atp = '\0'; 805 806 /* If the fs does not exist, try its parent. */ 807 if (!lzc_exists(fsname)) { 808 char *slashp = strrchr(fsname, '/'); 809 if (slashp == NULL) 810 return (ENOENT); 811 *slashp = '\0'; 812 } 813 814 /* 815 * The begin_record is normally a non-byteswapped BEGIN record. 816 * For resumable streams it may be set to any non-byteswapped 817 * dmu_replay_record_t. 818 */ 819 if (begin_record == NULL) { 820 error = recv_read(input_fd, &drr, sizeof (drr)); 821 if (error != 0) 822 return (error); 823 } else { 824 drr = *begin_record; 825 payload = (begin_record->drr_payloadlen != 0); 826 } 827 828 /* 829 * All receives with a payload should use the new interface. 830 */ 831 if (resumable || raw || wkeydata != NULL || payload) { 832 nvlist_t *outnvl = NULL; 833 nvlist_t *innvl = fnvlist_alloc(); 834 835 fnvlist_add_string(innvl, "snapname", snapname); 836 837 if (recvdprops != NULL) 838 fnvlist_add_nvlist(innvl, "props", recvdprops); 839 840 if (localprops != NULL) 841 fnvlist_add_nvlist(innvl, "localprops", localprops); 842 843 if (wkeydata != NULL) { 844 /* 845 * wkeydata must be placed in the special 846 * ZPOOL_HIDDEN_ARGS nvlist so that it 847 * will not be printed to the zpool history. 848 */ 849 nvlist_t *hidden_args = fnvlist_alloc(); 850 fnvlist_add_uint8_array(hidden_args, "wkeydata", 851 wkeydata, wkeylen); 852 fnvlist_add_nvlist(innvl, ZPOOL_HIDDEN_ARGS, 853 hidden_args); 854 nvlist_free(hidden_args); 855 } 856 857 if (origin != NULL && strlen(origin)) 858 fnvlist_add_string(innvl, "origin", origin); 859 860 fnvlist_add_byte_array(innvl, "begin_record", 861 (uchar_t *)&drr, sizeof (drr)); 862 863 fnvlist_add_int32(innvl, "input_fd", input_fd); 864 865 if (force) 866 fnvlist_add_boolean(innvl, "force"); 867 868 if (resumable) 869 fnvlist_add_boolean(innvl, "resumable"); 870 871 872 error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl); 873 874 if (error == 0 && read_bytes != NULL) 875 error = nvlist_lookup_uint64(outnvl, "read_bytes", 876 read_bytes); 877 878 if (error == 0 && errflags != NULL) 879 error = nvlist_lookup_uint64(outnvl, "error_flags", 880 errflags); 881 882 if (error == 0 && errors != NULL) { 883 nvlist_t *nvl; 884 error = nvlist_lookup_nvlist(outnvl, "errors", &nvl); 885 if (error == 0) 886 *errors = fnvlist_dup(nvl); 887 } 888 889 fnvlist_free(innvl); 890 fnvlist_free(outnvl); 891 } else { 892 zfs_cmd_t zc = {"\0"}; 893 char *packed = NULL; 894 size_t size; 895 896 ASSERT3S(g_refcount, >, 0); 897 898 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name)); 899 (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); 900 901 if (recvdprops != NULL) { 902 packed = fnvlist_pack(recvdprops, &size); 903 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 904 zc.zc_nvlist_src_size = size; 905 } 906 907 if (localprops != NULL) { 908 packed = fnvlist_pack(localprops, &size); 909 zc.zc_nvlist_conf = (uint64_t)(uintptr_t)packed; 910 zc.zc_nvlist_conf_size = size; 911 } 912 913 if (origin != NULL) 914 (void) strlcpy(zc.zc_string, origin, 915 sizeof (zc.zc_string)); 916 917 ASSERT3S(drr.drr_type, ==, DRR_BEGIN); 918 zc.zc_begin_record = drr.drr_u.drr_begin; 919 zc.zc_guid = force; 920 zc.zc_cookie = input_fd; 921 zc.zc_cleanup_fd = -1; 922 zc.zc_action_handle = 0; 923 924 zc.zc_nvlist_dst_size = 128 * 1024; 925 zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 926 malloc(zc.zc_nvlist_dst_size); 927 928 error = lzc_ioctl_fd(g_fd, ZFS_IOC_RECV, &zc); 929 if (error != 0) { 930 error = errno; 931 } else { 932 if (read_bytes != NULL) 933 *read_bytes = zc.zc_cookie; 934 935 if (errflags != NULL) 936 *errflags = zc.zc_obj; 937 938 if (errors != NULL) 939 VERIFY0(nvlist_unpack( 940 (void *)(uintptr_t)zc.zc_nvlist_dst, 941 zc.zc_nvlist_dst_size, errors, KM_SLEEP)); 942 } 943 944 if (packed != NULL) 945 fnvlist_pack_free(packed, size); 946 free((void *)(uintptr_t)zc.zc_nvlist_dst); 947 } 948 949 return (error); 950 } 951 952 /* 953 * The simplest receive case: receive from the specified fd, creating the 954 * specified snapshot. Apply the specified properties as "received" properties 955 * (which can be overridden by locally-set properties). If the stream is a 956 * clone, its origin snapshot must be specified by 'origin'. The 'force' 957 * flag will cause the target filesystem to be rolled back or destroyed if 958 * necessary to receive. 959 * 960 * Return 0 on success or an errno on failure. 961 * 962 * Note: this interface does not work on dedup'd streams 963 * (those with DMU_BACKUP_FEATURE_DEDUP). 964 */ 965 int 966 lzc_receive(const char *snapname, nvlist_t *props, const char *origin, 967 boolean_t force, boolean_t raw, int fd) 968 { 969 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, 970 B_FALSE, raw, fd, NULL, NULL, NULL, NULL)); 971 } 972 973 /* 974 * Like lzc_receive, but if the receive fails due to premature stream 975 * termination, the intermediate state will be preserved on disk. In this 976 * case, ECKSUM will be returned. The receive may subsequently be resumed 977 * with a resuming send stream generated by lzc_send_resume(). 978 */ 979 int 980 lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, 981 boolean_t force, boolean_t raw, int fd) 982 { 983 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, 984 B_TRUE, raw, fd, NULL, NULL, NULL, NULL)); 985 } 986 987 /* 988 * Like lzc_receive, but allows the caller to read the begin record and then to 989 * pass it in. That could be useful if the caller wants to derive, for example, 990 * the snapname or the origin parameters based on the information contained in 991 * the begin record. 992 * The begin record must be in its original form as read from the stream, 993 * in other words, it should not be byteswapped. 994 * 995 * The 'resumable' parameter allows to obtain the same behavior as with 996 * lzc_receive_resumable. 997 */ 998 int 999 lzc_receive_with_header(const char *snapname, nvlist_t *props, 1000 const char *origin, boolean_t force, boolean_t resumable, boolean_t raw, 1001 int fd, const dmu_replay_record_t *begin_record) 1002 { 1003 if (begin_record == NULL) 1004 return (EINVAL); 1005 1006 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, 1007 resumable, raw, fd, begin_record, NULL, NULL, NULL)); 1008 } 1009 1010 /* 1011 * Like lzc_receive, but allows the caller to pass all supported arguments 1012 * and retrieve all values returned. The only additional input parameter 1013 * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor. 1014 * 1015 * The following parameters all provide return values. Several may be set 1016 * in the failure case and will contain additional information. 1017 * 1018 * The 'read_bytes' value will be set to the total number of bytes read. 1019 * 1020 * The 'errflags' value will contain zprop_errflags_t flags which are 1021 * used to describe any failures. 1022 * 1023 * The 'action_handle' and 'cleanup_fd' are no longer used, and are ignored. 1024 * 1025 * The 'errors' nvlist contains an entry for each unapplied received 1026 * property. Callers are responsible for freeing this nvlist. 1027 */ 1028 int 1029 lzc_receive_one(const char *snapname, nvlist_t *props, 1030 const char *origin, boolean_t force, boolean_t resumable, boolean_t raw, 1031 int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd, 1032 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, 1033 nvlist_t **errors) 1034 { 1035 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, 1036 resumable, raw, input_fd, begin_record, 1037 read_bytes, errflags, errors)); 1038 } 1039 1040 /* 1041 * Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops' 1042 * argument. 1043 * 1044 * The 'cmdprops' nvlist contains both override ('zfs receive -o') and 1045 * exclude ('zfs receive -x') properties. Callers are responsible for freeing 1046 * this nvlist 1047 */ 1048 int 1049 lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props, 1050 nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin, 1051 boolean_t force, boolean_t resumable, boolean_t raw, int input_fd, 1052 const dmu_replay_record_t *begin_record, int cleanup_fd, 1053 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, 1054 nvlist_t **errors) 1055 { 1056 return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin, 1057 force, resumable, raw, input_fd, begin_record, 1058 read_bytes, errflags, errors)); 1059 } 1060 1061 /* 1062 * Roll back this filesystem or volume to its most recent snapshot. 1063 * If snapnamebuf is not NULL, it will be filled in with the name 1064 * of the most recent snapshot. 1065 * Note that the latest snapshot may change if a new one is concurrently 1066 * created or the current one is destroyed. lzc_rollback_to can be used 1067 * to roll back to a specific latest snapshot. 1068 * 1069 * Return 0 on success or an errno on failure. 1070 */ 1071 int 1072 lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen) 1073 { 1074 nvlist_t *args; 1075 nvlist_t *result; 1076 int err; 1077 1078 args = fnvlist_alloc(); 1079 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result); 1080 nvlist_free(args); 1081 if (err == 0 && snapnamebuf != NULL) { 1082 const char *snapname = fnvlist_lookup_string(result, "target"); 1083 (void) strlcpy(snapnamebuf, snapname, snapnamelen); 1084 } 1085 nvlist_free(result); 1086 1087 return (err); 1088 } 1089 1090 /* 1091 * Roll back this filesystem or volume to the specified snapshot, 1092 * if possible. 1093 * 1094 * Return 0 on success or an errno on failure. 1095 */ 1096 int 1097 lzc_rollback_to(const char *fsname, const char *snapname) 1098 { 1099 nvlist_t *args; 1100 nvlist_t *result; 1101 int err; 1102 1103 args = fnvlist_alloc(); 1104 fnvlist_add_string(args, "target", snapname); 1105 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result); 1106 nvlist_free(args); 1107 nvlist_free(result); 1108 return (err); 1109 } 1110 1111 /* 1112 * Creates new bookmarks from existing snapshot or bookmark. 1113 * 1114 * The bookmarks nvlist maps from the full name of the new bookmark to 1115 * the full name of the source snapshot or bookmark. 1116 * All the bookmarks and snapshots must be in the same pool. 1117 * The new bookmarks names must be unique. 1118 * => see function dsl_bookmark_create_nvl_validate 1119 * 1120 * The returned results nvlist will have an entry for each bookmark that failed. 1121 * The value will be the (int32) error code. 1122 * 1123 * The return value will be 0 if all bookmarks were created, otherwise it will 1124 * be the errno of a (undetermined) bookmarks that failed. 1125 */ 1126 int 1127 lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist) 1128 { 1129 nvpair_t *elem; 1130 int error; 1131 char pool[ZFS_MAX_DATASET_NAME_LEN]; 1132 1133 /* determine pool name from first bookmark */ 1134 elem = nvlist_next_nvpair(bookmarks, NULL); 1135 if (elem == NULL) 1136 return (0); 1137 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 1138 pool[strcspn(pool, "/#")] = '\0'; 1139 1140 error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist); 1141 1142 return (error); 1143 } 1144 1145 /* 1146 * Retrieve bookmarks. 1147 * 1148 * Retrieve the list of bookmarks for the given file system. The props 1149 * parameter is an nvlist of property names (with no values) that will be 1150 * returned for each bookmark. 1151 * 1152 * The following are valid properties on bookmarks, most of which are numbers 1153 * (represented as uint64 in the nvlist), except redact_snaps, which is a 1154 * uint64 array, and redact_complete, which is a boolean 1155 * 1156 * "guid" - globally unique identifier of the snapshot it refers to 1157 * "createtxg" - txg when the snapshot it refers to was created 1158 * "creation" - timestamp when the snapshot it refers to was created 1159 * "ivsetguid" - IVset guid for identifying encrypted snapshots 1160 * "redact_snaps" - list of guids of the redaction snapshots for the specified 1161 * bookmark. If the bookmark is not a redaction bookmark, the nvlist will 1162 * not contain an entry for this value. If it is redacted with respect to 1163 * no snapshots, it will contain value -> NULL uint64 array 1164 * "redact_complete" - boolean value; true if the redaction bookmark is 1165 * complete, false otherwise. 1166 * 1167 * The format of the returned nvlist as follows: 1168 * <short name of bookmark> -> { 1169 * <name of property> -> { 1170 * "value" -> uint64 1171 * } 1172 * ... 1173 * "redact_snaps" -> { 1174 * "value" -> uint64 array 1175 * } 1176 * "redact_complete" -> { 1177 * "value" -> boolean value 1178 * } 1179 * } 1180 */ 1181 int 1182 lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks) 1183 { 1184 return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks)); 1185 } 1186 1187 /* 1188 * Get bookmark properties. 1189 * 1190 * Given a bookmark's full name, retrieve all properties for the bookmark. 1191 * 1192 * The format of the returned property list is as follows: 1193 * { 1194 * <name of property> -> { 1195 * "value" -> uint64 1196 * } 1197 * ... 1198 * "redact_snaps" -> { 1199 * "value" -> uint64 array 1200 * } 1201 */ 1202 int 1203 lzc_get_bookmark_props(const char *bookmark, nvlist_t **props) 1204 { 1205 int error; 1206 1207 nvlist_t *innvl = fnvlist_alloc(); 1208 error = lzc_ioctl(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, innvl, props); 1209 fnvlist_free(innvl); 1210 1211 return (error); 1212 } 1213 1214 /* 1215 * Destroys bookmarks. 1216 * 1217 * The keys in the bmarks nvlist are the bookmarks to be destroyed. 1218 * They must all be in the same pool. Bookmarks are specified as 1219 * <fs>#<bmark>. 1220 * 1221 * Bookmarks that do not exist will be silently ignored. 1222 * 1223 * The return value will be 0 if all bookmarks that existed were destroyed. 1224 * 1225 * Otherwise the return value will be the errno of a (undetermined) bookmark 1226 * that failed, no bookmarks will be destroyed, and the errlist will have an 1227 * entry for each bookmarks that failed. The value in the errlist will be 1228 * the (int32) error code. 1229 */ 1230 int 1231 lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist) 1232 { 1233 nvpair_t *elem; 1234 int error; 1235 char pool[ZFS_MAX_DATASET_NAME_LEN]; 1236 1237 /* determine the pool name */ 1238 elem = nvlist_next_nvpair(bmarks, NULL); 1239 if (elem == NULL) 1240 return (0); 1241 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 1242 pool[strcspn(pool, "/#")] = '\0'; 1243 1244 error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist); 1245 1246 return (error); 1247 } 1248 1249 static int 1250 lzc_channel_program_impl(const char *pool, const char *program, boolean_t sync, 1251 uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) 1252 { 1253 int error; 1254 nvlist_t *args; 1255 1256 args = fnvlist_alloc(); 1257 fnvlist_add_string(args, ZCP_ARG_PROGRAM, program); 1258 fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl); 1259 fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync); 1260 fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit); 1261 fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit); 1262 error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl); 1263 fnvlist_free(args); 1264 1265 return (error); 1266 } 1267 1268 /* 1269 * Executes a channel program. 1270 * 1271 * If this function returns 0 the channel program was successfully loaded and 1272 * ran without failing. Note that individual commands the channel program ran 1273 * may have failed and the channel program is responsible for reporting such 1274 * errors through outnvl if they are important. 1275 * 1276 * This method may also return: 1277 * 1278 * EINVAL The program contains syntax errors, or an invalid memory or time 1279 * limit was given. No part of the channel program was executed. 1280 * If caused by syntax errors, 'outnvl' contains information about the 1281 * errors. 1282 * 1283 * ECHRNG The program was executed, but encountered a runtime error, such as 1284 * calling a function with incorrect arguments, invoking the error() 1285 * function directly, failing an assert() command, etc. Some portion 1286 * of the channel program may have executed and committed changes. 1287 * Information about the failure can be found in 'outnvl'. 1288 * 1289 * ENOMEM The program fully executed, but the output buffer was not large 1290 * enough to store the returned value. No output is returned through 1291 * 'outnvl'. 1292 * 1293 * ENOSPC The program was terminated because it exceeded its memory usage 1294 * limit. Some portion of the channel program may have executed and 1295 * committed changes to disk. No output is returned through 'outnvl'. 1296 * 1297 * ETIME The program was terminated because it exceeded its Lua instruction 1298 * limit. Some portion of the channel program may have executed and 1299 * committed changes to disk. No output is returned through 'outnvl'. 1300 */ 1301 int 1302 lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit, 1303 uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) 1304 { 1305 return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit, 1306 memlimit, argnvl, outnvl)); 1307 } 1308 1309 /* 1310 * Creates a checkpoint for the specified pool. 1311 * 1312 * If this function returns 0 the pool was successfully checkpointed. 1313 * 1314 * This method may also return: 1315 * 1316 * ZFS_ERR_CHECKPOINT_EXISTS 1317 * The pool already has a checkpoint. A pools can only have one 1318 * checkpoint at most, at any given time. 1319 * 1320 * ZFS_ERR_DISCARDING_CHECKPOINT 1321 * ZFS is in the middle of discarding a checkpoint for this pool. 1322 * The pool can be checkpointed again once the discard is done. 1323 * 1324 * ZFS_DEVRM_IN_PROGRESS 1325 * A vdev is currently being removed. The pool cannot be 1326 * checkpointed until the device removal is done. 1327 * 1328 * ZFS_VDEV_TOO_BIG 1329 * One or more top-level vdevs exceed the maximum vdev size 1330 * supported for this feature. 1331 */ 1332 int 1333 lzc_pool_checkpoint(const char *pool) 1334 { 1335 int error; 1336 1337 nvlist_t *result = NULL; 1338 nvlist_t *args = fnvlist_alloc(); 1339 1340 error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result); 1341 1342 fnvlist_free(args); 1343 fnvlist_free(result); 1344 1345 return (error); 1346 } 1347 1348 /* 1349 * Discard the checkpoint from the specified pool. 1350 * 1351 * If this function returns 0 the checkpoint was successfully discarded. 1352 * 1353 * This method may also return: 1354 * 1355 * ZFS_ERR_NO_CHECKPOINT 1356 * The pool does not have a checkpoint. 1357 * 1358 * ZFS_ERR_DISCARDING_CHECKPOINT 1359 * ZFS is already in the middle of discarding the checkpoint. 1360 */ 1361 int 1362 lzc_pool_checkpoint_discard(const char *pool) 1363 { 1364 int error; 1365 1366 nvlist_t *result = NULL; 1367 nvlist_t *args = fnvlist_alloc(); 1368 1369 error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result); 1370 1371 fnvlist_free(args); 1372 fnvlist_free(result); 1373 1374 return (error); 1375 } 1376 1377 /* 1378 * Executes a read-only channel program. 1379 * 1380 * A read-only channel program works programmatically the same way as a 1381 * normal channel program executed with lzc_channel_program(). The only 1382 * difference is it runs exclusively in open-context and therefore can 1383 * return faster. The downside to that, is that the program cannot change 1384 * on-disk state by calling functions from the zfs.sync submodule. 1385 * 1386 * The return values of this function (and their meaning) are exactly the 1387 * same as the ones described in lzc_channel_program(). 1388 */ 1389 int 1390 lzc_channel_program_nosync(const char *pool, const char *program, 1391 uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) 1392 { 1393 return (lzc_channel_program_impl(pool, program, B_FALSE, timeout, 1394 memlimit, argnvl, outnvl)); 1395 } 1396 1397 int 1398 lzc_get_vdev_prop(const char *poolname, nvlist_t *innvl, nvlist_t **outnvl) 1399 { 1400 return (lzc_ioctl(ZFS_IOC_VDEV_GET_PROPS, poolname, innvl, outnvl)); 1401 } 1402 1403 int 1404 lzc_set_vdev_prop(const char *poolname, nvlist_t *innvl, nvlist_t **outnvl) 1405 { 1406 return (lzc_ioctl(ZFS_IOC_VDEV_SET_PROPS, poolname, innvl, outnvl)); 1407 } 1408 1409 /* 1410 * Performs key management functions 1411 * 1412 * crypto_cmd should be a value from dcp_cmd_t. If the command specifies to 1413 * load or change a wrapping key, the key should be specified in the 1414 * hidden_args nvlist so that it is not logged. 1415 */ 1416 int 1417 lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata, 1418 uint_t wkeylen) 1419 { 1420 int error; 1421 nvlist_t *ioc_args; 1422 nvlist_t *hidden_args; 1423 1424 if (wkeydata == NULL) 1425 return (EINVAL); 1426 1427 ioc_args = fnvlist_alloc(); 1428 hidden_args = fnvlist_alloc(); 1429 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen); 1430 fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args); 1431 if (noop) 1432 fnvlist_add_boolean(ioc_args, "noop"); 1433 error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL); 1434 nvlist_free(hidden_args); 1435 nvlist_free(ioc_args); 1436 1437 return (error); 1438 } 1439 1440 int 1441 lzc_unload_key(const char *fsname) 1442 { 1443 return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL)); 1444 } 1445 1446 int 1447 lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props, 1448 uint8_t *wkeydata, uint_t wkeylen) 1449 { 1450 int error; 1451 nvlist_t *ioc_args = fnvlist_alloc(); 1452 nvlist_t *hidden_args = NULL; 1453 1454 fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd); 1455 1456 if (wkeydata != NULL) { 1457 hidden_args = fnvlist_alloc(); 1458 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, 1459 wkeylen); 1460 fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args); 1461 } 1462 1463 if (props != NULL) 1464 fnvlist_add_nvlist(ioc_args, "props", props); 1465 1466 error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL); 1467 nvlist_free(hidden_args); 1468 nvlist_free(ioc_args); 1469 1470 return (error); 1471 } 1472 1473 int 1474 lzc_reopen(const char *pool_name, boolean_t scrub_restart) 1475 { 1476 nvlist_t *args = fnvlist_alloc(); 1477 int error; 1478 1479 fnvlist_add_boolean_value(args, "scrub_restart", scrub_restart); 1480 1481 error = lzc_ioctl(ZFS_IOC_POOL_REOPEN, pool_name, args, NULL); 1482 nvlist_free(args); 1483 return (error); 1484 } 1485 1486 /* 1487 * Changes initializing state. 1488 * 1489 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID. 1490 * The key is ignored. 1491 * 1492 * If there are errors related to vdev arguments, per-vdev errors are returned 1493 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where 1494 * guid is stringified with PRIu64, and errno is one of the following as 1495 * an int64_t: 1496 * - ENODEV if the device was not found 1497 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing) 1498 * - EROFS if the device is not writeable 1499 * - EBUSY start requested but the device is already being either 1500 * initialized or trimmed 1501 * - ESRCH cancel/suspend requested but device is not being initialized 1502 * 1503 * If the errlist is empty, then return value will be: 1504 * - EINVAL if one or more arguments was invalid 1505 * - Other spa_open failures 1506 * - 0 if the operation succeeded 1507 */ 1508 int 1509 lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type, 1510 nvlist_t *vdevs, nvlist_t **errlist) 1511 { 1512 int error; 1513 1514 nvlist_t *args = fnvlist_alloc(); 1515 fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type); 1516 fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs); 1517 1518 error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist); 1519 1520 fnvlist_free(args); 1521 1522 return (error); 1523 } 1524 1525 /* 1526 * Changes TRIM state. 1527 * 1528 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID. 1529 * The key is ignored. 1530 * 1531 * If there are errors related to vdev arguments, per-vdev errors are returned 1532 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where 1533 * guid is stringified with PRIu64, and errno is one of the following as 1534 * an int64_t: 1535 * - ENODEV if the device was not found 1536 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing) 1537 * - EROFS if the device is not writeable 1538 * - EBUSY start requested but the device is already being either trimmed 1539 * or initialized 1540 * - ESRCH cancel/suspend requested but device is not being initialized 1541 * - EOPNOTSUPP if the device does not support TRIM (or secure TRIM) 1542 * 1543 * If the errlist is empty, then return value will be: 1544 * - EINVAL if one or more arguments was invalid 1545 * - Other spa_open failures 1546 * - 0 if the operation succeeded 1547 */ 1548 int 1549 lzc_trim(const char *poolname, pool_trim_func_t cmd_type, uint64_t rate, 1550 boolean_t secure, nvlist_t *vdevs, nvlist_t **errlist) 1551 { 1552 int error; 1553 1554 nvlist_t *args = fnvlist_alloc(); 1555 fnvlist_add_uint64(args, ZPOOL_TRIM_COMMAND, (uint64_t)cmd_type); 1556 fnvlist_add_nvlist(args, ZPOOL_TRIM_VDEVS, vdevs); 1557 fnvlist_add_uint64(args, ZPOOL_TRIM_RATE, rate); 1558 fnvlist_add_boolean_value(args, ZPOOL_TRIM_SECURE, secure); 1559 1560 error = lzc_ioctl(ZFS_IOC_POOL_TRIM, poolname, args, errlist); 1561 1562 fnvlist_free(args); 1563 1564 return (error); 1565 } 1566 1567 /* 1568 * Create a redaction bookmark named bookname by redacting snapshot with respect 1569 * to all the snapshots in snapnv. 1570 */ 1571 int 1572 lzc_redact(const char *snapshot, const char *bookname, nvlist_t *snapnv) 1573 { 1574 nvlist_t *args = fnvlist_alloc(); 1575 fnvlist_add_string(args, "bookname", bookname); 1576 fnvlist_add_nvlist(args, "snapnv", snapnv); 1577 int error = lzc_ioctl(ZFS_IOC_REDACT, snapshot, args, NULL); 1578 fnvlist_free(args); 1579 return (error); 1580 } 1581 1582 static int 1583 wait_common(const char *pool, zpool_wait_activity_t activity, boolean_t use_tag, 1584 uint64_t tag, boolean_t *waited) 1585 { 1586 nvlist_t *args = fnvlist_alloc(); 1587 nvlist_t *result = NULL; 1588 1589 fnvlist_add_int32(args, ZPOOL_WAIT_ACTIVITY, activity); 1590 if (use_tag) 1591 fnvlist_add_uint64(args, ZPOOL_WAIT_TAG, tag); 1592 1593 int error = lzc_ioctl(ZFS_IOC_WAIT, pool, args, &result); 1594 1595 if (error == 0 && waited != NULL) 1596 *waited = fnvlist_lookup_boolean_value(result, 1597 ZPOOL_WAIT_WAITED); 1598 1599 fnvlist_free(args); 1600 fnvlist_free(result); 1601 1602 return (error); 1603 } 1604 1605 int 1606 lzc_wait(const char *pool, zpool_wait_activity_t activity, boolean_t *waited) 1607 { 1608 return (wait_common(pool, activity, B_FALSE, 0, waited)); 1609 } 1610 1611 int 1612 lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag, 1613 boolean_t *waited) 1614 { 1615 return (wait_common(pool, activity, B_TRUE, tag, waited)); 1616 } 1617 1618 int 1619 lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited) 1620 { 1621 nvlist_t *args = fnvlist_alloc(); 1622 nvlist_t *result = NULL; 1623 1624 fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity); 1625 1626 int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result); 1627 1628 if (error == 0 && waited != NULL) 1629 *waited = fnvlist_lookup_boolean_value(result, 1630 ZFS_WAIT_WAITED); 1631 1632 fnvlist_free(args); 1633 fnvlist_free(result); 1634 1635 return (error); 1636 } 1637 1638 /* 1639 * Set the bootenv contents for the given pool. 1640 */ 1641 int 1642 lzc_set_bootenv(const char *pool, const nvlist_t *env) 1643 { 1644 return (lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, (nvlist_t *)env, NULL)); 1645 } 1646 1647 /* 1648 * Get the contents of the bootenv of the given pool. 1649 */ 1650 int 1651 lzc_get_bootenv(const char *pool, nvlist_t **outnvl) 1652 { 1653 return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl)); 1654 } 1655