1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2012 by Delphix. All rights reserved. 24 */ 25 26 /* 27 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs. 28 * It has the following characteristics: 29 * 30 * - Thread Safe. libzfs_core is accessible concurrently from multiple 31 * threads. This is accomplished primarily by avoiding global data 32 * (e.g. caching). Since it's thread-safe, there is no reason for a 33 * process to have multiple libzfs "instances". Therefore, we store 34 * our few pieces of data (e.g. the file descriptor) in global 35 * variables. The fd is reference-counted so that the libzfs_core 36 * library can be "initialized" multiple times (e.g. by different 37 * consumers within the same process). 38 * 39 * - Committed Interface. The libzfs_core interface will be committed, 40 * therefore consumers can compile against it and be confident that 41 * their code will continue to work on future releases of this code. 42 * Currently, the interface is Evolving (not Committed), but we intend 43 * to commit to it once it is more complete and we determine that it 44 * meets the needs of all consumers. 45 * 46 * - Programatic Error Handling. libzfs_core communicates errors with 47 * defined error numbers, and doesn't print anything to stdout/stderr. 48 * 49 * - Thin Layer. libzfs_core is a thin layer, marshaling arguments 50 * to/from the kernel ioctls. There is generally a 1:1 correspondence 51 * between libzfs_core functions and ioctls to /dev/zfs. 52 * 53 * - Clear Atomicity. Because libzfs_core functions are generally 1:1 54 * with kernel ioctls, and kernel ioctls are general atomic, each 55 * libzfs_core function is atomic. For example, creating multiple 56 * snapshots with a single call to lzc_snapshot() is atomic -- it 57 * can't fail with only some of the requested snapshots created, even 58 * in the event of power loss or system crash. 59 * 60 * - Continued libzfs Support. Some higher-level operations (e.g. 61 * support for "zfs send -R") are too complicated to fit the scope of 62 * libzfs_core. This functionality will continue to live in libzfs. 63 * Where appropriate, libzfs will use the underlying atomic operations 64 * of libzfs_core. For example, libzfs may implement "zfs send -R | 65 * zfs receive" by using individual "send one snapshot", rename, 66 * destroy, and "receive one snapshot" operations in libzfs_core. 67 * /sbin/zfs and /zbin/zpool will link with both libzfs and 68 * libzfs_core. Other consumers should aim to use only libzfs_core, 69 * since that will be the supported, stable interface going forwards. 70 */ 71 72 #include <libzfs_core.h> 73 #include <ctype.h> 74 #include <unistd.h> 75 #include <stdlib.h> 76 #include <string.h> 77 #include <errno.h> 78 #include <fcntl.h> 79 #include <pthread.h> 80 #include <sys/nvpair.h> 81 #include <sys/param.h> 82 #include <sys/types.h> 83 #include <sys/stat.h> 84 #include <sys/zfs_ioctl.h> 85 86 static int g_fd; 87 static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; 88 static int g_refcount; 89 90 int 91 libzfs_core_init(void) 92 { 93 (void) pthread_mutex_lock(&g_lock); 94 if (g_refcount == 0) { 95 g_fd = open("/dev/zfs", O_RDWR); 96 if (g_fd < 0) { 97 (void) pthread_mutex_unlock(&g_lock); 98 return (errno); 99 } 100 } 101 g_refcount++; 102 (void) pthread_mutex_unlock(&g_lock); 103 return (0); 104 } 105 106 void 107 libzfs_core_fini(void) 108 { 109 (void) pthread_mutex_lock(&g_lock); 110 ASSERT3S(g_refcount, >, 0); 111 g_refcount--; 112 if (g_refcount == 0) 113 (void) close(g_fd); 114 (void) pthread_mutex_unlock(&g_lock); 115 } 116 117 static int 118 lzc_ioctl(zfs_ioc_t ioc, const char *name, 119 nvlist_t *source, nvlist_t **resultp) 120 { 121 zfs_cmd_t zc = { 0 }; 122 int error = 0; 123 char *packed; 124 size_t size; 125 126 ASSERT3S(g_refcount, >, 0); 127 128 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); 129 130 packed = fnvlist_pack(source, &size); 131 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 132 zc.zc_nvlist_src_size = size; 133 134 if (resultp != NULL) { 135 zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); 136 zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 137 malloc(zc.zc_nvlist_dst_size); 138 if (zc.zc_nvlist_dst == NULL) { 139 error = ENOMEM; 140 goto out; 141 } 142 } 143 144 while (ioctl(g_fd, ioc, &zc) != 0) { 145 if (errno == ENOMEM && resultp != NULL) { 146 free((void *)(uintptr_t)zc.zc_nvlist_dst); 147 zc.zc_nvlist_dst_size *= 2; 148 zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 149 malloc(zc.zc_nvlist_dst_size); 150 if (zc.zc_nvlist_dst == NULL) { 151 error = ENOMEM; 152 goto out; 153 } 154 } else { 155 error = errno; 156 break; 157 } 158 } 159 if (zc.zc_nvlist_dst_filled) { 160 *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, 161 zc.zc_nvlist_dst_size); 162 } else if (resultp != NULL) { 163 *resultp = NULL; 164 } 165 166 out: 167 fnvlist_pack_free(packed, size); 168 free((void *)(uintptr_t)zc.zc_nvlist_dst); 169 return (error); 170 } 171 172 int 173 lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props) 174 { 175 int error; 176 nvlist_t *args = fnvlist_alloc(); 177 fnvlist_add_int32(args, "type", type); 178 if (props != NULL) 179 fnvlist_add_nvlist(args, "props", props); 180 error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); 181 nvlist_free(args); 182 return (error); 183 } 184 185 int 186 lzc_clone(const char *fsname, const char *origin, 187 nvlist_t *props) 188 { 189 int error; 190 nvlist_t *args = fnvlist_alloc(); 191 fnvlist_add_string(args, "origin", origin); 192 if (props != NULL) 193 fnvlist_add_nvlist(args, "props", props); 194 error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); 195 nvlist_free(args); 196 return (error); 197 } 198 199 /* 200 * Creates snapshots. 201 * 202 * The keys in the snaps nvlist are the snapshots to be created. 203 * They must all be in the same pool. 204 * 205 * The props nvlist is properties to set. Currently only user properties 206 * are supported. { user:prop_name -> string value } 207 * 208 * The returned results nvlist will have an entry for each snapshot that failed. 209 * The value will be the (int32) error code. 210 * 211 * The return value will be 0 if all snapshots were created, otherwise it will 212 * be the errno of a (undetermined) snapshot that failed. 213 */ 214 int 215 lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) 216 { 217 nvpair_t *elem; 218 nvlist_t *args; 219 int error; 220 char pool[MAXNAMELEN]; 221 222 *errlist = NULL; 223 224 /* determine the pool name */ 225 elem = nvlist_next_nvpair(snaps, NULL); 226 if (elem == NULL) 227 return (0); 228 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 229 pool[strcspn(pool, "/@")] = '\0'; 230 231 args = fnvlist_alloc(); 232 fnvlist_add_nvlist(args, "snaps", snaps); 233 if (props != NULL) 234 fnvlist_add_nvlist(args, "props", props); 235 236 error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist); 237 nvlist_free(args); 238 239 return (error); 240 } 241 242 /* 243 * Destroys snapshots. 244 * 245 * The keys in the snaps nvlist are the snapshots to be destroyed. 246 * They must all be in the same pool. 247 * 248 * Snapshots that do not exist will be silently ignored. 249 * 250 * If 'defer' is not set, and a snapshot has user holds or clones, the 251 * destroy operation will fail and none of the snapshots will be 252 * destroyed. 253 * 254 * If 'defer' is set, and a snapshot has user holds or clones, it will be 255 * marked for deferred destruction, and will be destroyed when the last hold 256 * or clone is removed/destroyed. 257 * 258 * The return value will be 0 if all snapshots were destroyed (or marked for 259 * later destruction if 'defer' is set) or didn't exist to begin with. 260 * 261 * Otherwise the return value will be the errno of a (undetermined) snapshot 262 * that failed, no snapshots will be destroyed, and the errlist will have an 263 * entry for each snapshot that failed. The value in the errlist will be 264 * the (int32) error code. 265 */ 266 int 267 lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) 268 { 269 nvpair_t *elem; 270 nvlist_t *args; 271 int error; 272 char pool[MAXNAMELEN]; 273 274 /* determine the pool name */ 275 elem = nvlist_next_nvpair(snaps, NULL); 276 if (elem == NULL) 277 return (0); 278 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 279 pool[strcspn(pool, "/@")] = '\0'; 280 281 args = fnvlist_alloc(); 282 fnvlist_add_nvlist(args, "snaps", snaps); 283 if (defer) 284 fnvlist_add_boolean(args, "defer"); 285 286 error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist); 287 nvlist_free(args); 288 289 return (error); 290 291 } 292 293 int 294 lzc_snaprange_space(const char *firstsnap, const char *lastsnap, 295 uint64_t *usedp) 296 { 297 nvlist_t *args; 298 nvlist_t *result; 299 int err; 300 char fs[MAXNAMELEN]; 301 char *atp; 302 303 /* determine the fs name */ 304 (void) strlcpy(fs, firstsnap, sizeof (fs)); 305 atp = strchr(fs, '@'); 306 if (atp == NULL) 307 return (EINVAL); 308 *atp = '\0'; 309 310 args = fnvlist_alloc(); 311 fnvlist_add_string(args, "firstsnap", firstsnap); 312 313 err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result); 314 nvlist_free(args); 315 if (err == 0) 316 *usedp = fnvlist_lookup_uint64(result, "used"); 317 fnvlist_free(result); 318 319 return (err); 320 } 321 322 boolean_t 323 lzc_exists(const char *dataset) 324 { 325 /* 326 * The objset_stats ioctl is still legacy, so we need to construct our 327 * own zfs_cmd_t rather than using zfsc_ioctl(). 328 */ 329 zfs_cmd_t zc = { 0 }; 330 331 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); 332 return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); 333 } 334 335 /* 336 * If fromsnap is NULL, a full (non-incremental) stream will be sent. 337 */ 338 int 339 lzc_send(const char *snapname, const char *fromsnap, int fd) 340 { 341 nvlist_t *args; 342 int err; 343 344 args = fnvlist_alloc(); 345 fnvlist_add_int32(args, "fd", fd); 346 if (fromsnap != NULL) 347 fnvlist_add_string(args, "fromsnap", fromsnap); 348 err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); 349 nvlist_free(args); 350 return (err); 351 } 352 353 /* 354 * If fromsnap is NULL, a full (non-incremental) stream will be estimated. 355 */ 356 int 357 lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep) 358 { 359 nvlist_t *args; 360 nvlist_t *result; 361 int err; 362 363 args = fnvlist_alloc(); 364 if (fromsnap != NULL) 365 fnvlist_add_string(args, "fromsnap", fromsnap); 366 err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); 367 nvlist_free(args); 368 if (err == 0) 369 *spacep = fnvlist_lookup_uint64(result, "space"); 370 nvlist_free(result); 371 return (err); 372 } 373 374 static int 375 recv_read(int fd, void *buf, int ilen) 376 { 377 char *cp = buf; 378 int rv; 379 int len = ilen; 380 381 do { 382 rv = read(fd, cp, len); 383 cp += rv; 384 len -= rv; 385 } while (rv > 0); 386 387 if (rv < 0 || len != 0) 388 return (EIO); 389 390 return (0); 391 } 392 393 /* 394 * The simplest receive case: receive from the specified fd, creating the 395 * specified snapshot. Apply the specified properties a "received" properties 396 * (which can be overridden by locally-set properties). If the stream is a 397 * clone, its origin snapshot must be specified by 'origin'. The 'force' 398 * flag will cause the target filesystem to be rolled back or destroyed if 399 * necessary to receive. 400 * 401 * Return 0 on success or an errno on failure. 402 * 403 * Note: this interface does not work on dedup'd streams 404 * (those with DMU_BACKUP_FEATURE_DEDUP). 405 */ 406 int 407 lzc_receive(const char *snapname, nvlist_t *props, const char *origin, 408 boolean_t force, int fd) 409 { 410 /* 411 * The receive ioctl is still legacy, so we need to construct our own 412 * zfs_cmd_t rather than using zfsc_ioctl(). 413 */ 414 zfs_cmd_t zc = { 0 }; 415 char *atp; 416 char *packed = NULL; 417 size_t size; 418 dmu_replay_record_t drr; 419 int error; 420 421 ASSERT3S(g_refcount, >, 0); 422 423 /* zc_name is name of containing filesystem */ 424 (void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name)); 425 atp = strchr(zc.zc_name, '@'); 426 if (atp == NULL) 427 return (EINVAL); 428 *atp = '\0'; 429 430 /* if the fs does not exist, try its parent. */ 431 if (!lzc_exists(zc.zc_name)) { 432 char *slashp = strrchr(zc.zc_name, '/'); 433 if (slashp == NULL) 434 return (ENOENT); 435 *slashp = '\0'; 436 437 } 438 439 /* zc_value is full name of the snapshot to create */ 440 (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); 441 442 if (props != NULL) { 443 /* zc_nvlist_src is props to set */ 444 packed = fnvlist_pack(props, &size); 445 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 446 zc.zc_nvlist_src_size = size; 447 } 448 449 /* zc_string is name of clone origin (if DRR_FLAG_CLONE) */ 450 if (origin != NULL) 451 (void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string)); 452 453 /* zc_begin_record is non-byteswapped BEGIN record */ 454 error = recv_read(fd, &drr, sizeof (drr)); 455 if (error != 0) 456 goto out; 457 zc.zc_begin_record = drr.drr_u.drr_begin; 458 459 /* zc_cookie is fd to read from */ 460 zc.zc_cookie = fd; 461 462 /* zc guid is force flag */ 463 zc.zc_guid = force; 464 465 /* zc_cleanup_fd is unused */ 466 zc.zc_cleanup_fd = -1; 467 468 error = ioctl(g_fd, ZFS_IOC_RECV, &zc); 469 if (error != 0) 470 error = errno; 471 472 out: 473 if (packed != NULL) 474 fnvlist_pack_free(packed, size); 475 free((void*)(uintptr_t)zc.zc_nvlist_dst); 476 return (error); 477 } 478