1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 24 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. 25 */ 26 27 /* 28 * ZFS control directory (a.k.a. ".zfs") 29 * 30 * This directory provides a common location for all ZFS meta-objects. 31 * Currently, this is only the 'snapshot' directory, but this may expand in the 32 * future. The elements are built using the GFS primitives, as the hierarchy 33 * does not actually exist on disk. 34 * 35 * For 'snapshot', we don't want to have all snapshots always mounted, because 36 * this would take up a huge amount of space in /etc/mnttab. We have three 37 * types of objects: 38 * 39 * ctldir ------> snapshotdir -------> snapshot 40 * | 41 * | 42 * V 43 * mounted fs 44 * 45 * The 'snapshot' node contains just enough information to lookup '..' and act 46 * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we 47 * perform an automount of the underlying filesystem and return the 48 * corresponding vnode. 49 * 50 * All mounts are handled automatically by the kernel, but unmounts are 51 * (currently) handled from user land. The main reason is that there is no 52 * reliable way to auto-unmount the filesystem when it's "no longer in use". 53 * When the user unmounts a filesystem, we call zfsctl_unmount(), which 54 * unmounts any snapshots within the snapshot directory. 55 * 56 * The '.zfs', '.zfs/snapshot', and all directories created under 57 * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and 58 * share the same vfs_t as the head filesystem (what '.zfs' lives under). 59 * 60 * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>' 61 * (ie: snapshots) are ZFS nodes and have their own unique vfs_t. 62 * However, vnodes within these mounted on file systems have their v_vfsp 63 * fields set to the head filesystem to make NFS happy (see 64 * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t 65 * so that it cannot be freed until all snapshots have been unmounted. 66 */ 67 68 #include <sys/types.h> 69 #include <sys/param.h> 70 #include <sys/libkern.h> 71 #include <sys/dirent.h> 72 #include <sys/zfs_context.h> 73 #include <sys/zfs_ctldir.h> 74 #include <sys/zfs_ioctl.h> 75 #include <sys/zfs_vfsops.h> 76 #include <sys/namei.h> 77 #include <sys/stat.h> 78 #include <sys/dmu.h> 79 #include <sys/dsl_dataset.h> 80 #include <sys/dsl_destroy.h> 81 #include <sys/dsl_deleg.h> 82 #include <sys/mount.h> 83 #include <sys/zap.h> 84 #include <sys/sysproto.h> 85 86 #include "zfs_namecheck.h" 87 88 #include <sys/kernel.h> 89 #include <sys/ccompat.h> 90 91 /* Common access mode for all virtual directories under the ctldir */ 92 const uint16_t zfsctl_ctldir_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | 93 S_IROTH | S_IXOTH; 94 95 /* 96 * "Synthetic" filesystem implementation. 97 */ 98 99 /* 100 * Assert that A implies B. 101 */ 102 #define KASSERT_IMPLY(A, B, msg) KASSERT(!(A) || (B), (msg)); 103 104 static MALLOC_DEFINE(M_SFSNODES, "sfs_nodes", "synthetic-fs nodes"); 105 106 typedef struct sfs_node { 107 char sn_name[ZFS_MAX_DATASET_NAME_LEN]; 108 uint64_t sn_parent_id; 109 uint64_t sn_id; 110 } sfs_node_t; 111 112 /* 113 * Check the parent's ID as well as the node's to account for a chance 114 * that IDs originating from different domains (snapshot IDs, artificial 115 * IDs, znode IDs) may clash. 116 */ 117 static int 118 sfs_compare_ids(struct vnode *vp, void *arg) 119 { 120 sfs_node_t *n1 = vp->v_data; 121 sfs_node_t *n2 = arg; 122 bool equal; 123 124 equal = n1->sn_id == n2->sn_id && 125 n1->sn_parent_id == n2->sn_parent_id; 126 127 /* Zero means equality. */ 128 return (!equal); 129 } 130 131 static int 132 sfs_vnode_get(const struct mount *mp, int flags, uint64_t parent_id, 133 uint64_t id, struct vnode **vpp) 134 { 135 sfs_node_t search; 136 int err; 137 138 search.sn_id = id; 139 search.sn_parent_id = parent_id; 140 err = vfs_hash_get(mp, (uint32_t)id, flags, curthread, vpp, 141 sfs_compare_ids, &search); 142 return (err); 143 } 144 145 static int 146 sfs_vnode_insert(struct vnode *vp, int flags, uint64_t parent_id, 147 uint64_t id, struct vnode **vpp) 148 { 149 int err; 150 151 KASSERT(vp->v_data != NULL, ("sfs_vnode_insert with NULL v_data")); 152 err = vfs_hash_insert(vp, (uint32_t)id, flags, curthread, vpp, 153 sfs_compare_ids, vp->v_data); 154 return (err); 155 } 156 157 static void 158 sfs_vnode_remove(struct vnode *vp) 159 { 160 vfs_hash_remove(vp); 161 } 162 163 typedef void sfs_vnode_setup_fn(vnode_t *vp, void *arg); 164 165 static int 166 sfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id, 167 const char *tag, struct vop_vector *vops, 168 sfs_vnode_setup_fn setup, void *arg, 169 struct vnode **vpp) 170 { 171 struct vnode *vp; 172 int error; 173 174 error = sfs_vnode_get(mp, flags, parent_id, id, vpp); 175 if (error != 0 || *vpp != NULL) { 176 KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL, 177 "sfs vnode with no data"); 178 return (error); 179 } 180 181 /* Allocate a new vnode/inode. */ 182 error = getnewvnode(tag, mp, vops, &vp); 183 if (error != 0) { 184 *vpp = NULL; 185 return (error); 186 } 187 188 /* 189 * Exclusively lock the vnode vnode while it's being constructed. 190 */ 191 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 192 error = insmntque(vp, mp); 193 if (error != 0) { 194 *vpp = NULL; 195 return (error); 196 } 197 198 setup(vp, arg); 199 200 error = sfs_vnode_insert(vp, flags, parent_id, id, vpp); 201 if (error != 0 || *vpp != NULL) { 202 KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL, 203 "sfs vnode with no data"); 204 return (error); 205 } 206 207 #if __FreeBSD_version >= 1400077 208 vn_set_state(vp, VSTATE_CONSTRUCTED); 209 #endif 210 211 *vpp = vp; 212 return (0); 213 } 214 215 static void 216 sfs_print_node(sfs_node_t *node) 217 { 218 printf("\tname = %s\n", node->sn_name); 219 printf("\tparent_id = %ju\n", (uintmax_t)node->sn_parent_id); 220 printf("\tid = %ju\n", (uintmax_t)node->sn_id); 221 } 222 223 static sfs_node_t * 224 sfs_alloc_node(size_t size, const char *name, uint64_t parent_id, uint64_t id) 225 { 226 struct sfs_node *node; 227 228 KASSERT(strlen(name) < sizeof (node->sn_name), 229 ("sfs node name is too long")); 230 KASSERT(size >= sizeof (*node), ("sfs node size is too small")); 231 node = malloc(size, M_SFSNODES, M_WAITOK | M_ZERO); 232 strlcpy(node->sn_name, name, sizeof (node->sn_name)); 233 node->sn_parent_id = parent_id; 234 node->sn_id = id; 235 236 return (node); 237 } 238 239 static void 240 sfs_destroy_node(sfs_node_t *node) 241 { 242 free(node, M_SFSNODES); 243 } 244 245 static void * 246 sfs_reclaim_vnode(vnode_t *vp) 247 { 248 void *data; 249 250 sfs_vnode_remove(vp); 251 data = vp->v_data; 252 vp->v_data = NULL; 253 return (data); 254 } 255 256 static int 257 sfs_readdir_common(uint64_t parent_id, uint64_t id, struct vop_readdir_args *ap, 258 zfs_uio_t *uio, off_t *offp) 259 { 260 struct dirent entry; 261 int error; 262 263 /* Reset ncookies for subsequent use of vfs_read_dirent. */ 264 if (ap->a_ncookies != NULL) 265 *ap->a_ncookies = 0; 266 267 if (zfs_uio_resid(uio) < sizeof (entry)) 268 return (SET_ERROR(EINVAL)); 269 270 if (zfs_uio_offset(uio) < 0) 271 return (SET_ERROR(EINVAL)); 272 if (zfs_uio_offset(uio) == 0) { 273 entry.d_fileno = id; 274 entry.d_type = DT_DIR; 275 entry.d_name[0] = '.'; 276 entry.d_name[1] = '\0'; 277 entry.d_namlen = 1; 278 entry.d_reclen = sizeof (entry); 279 error = vfs_read_dirent(ap, &entry, zfs_uio_offset(uio)); 280 if (error != 0) 281 return (SET_ERROR(error)); 282 } 283 284 if (zfs_uio_offset(uio) < sizeof (entry)) 285 return (SET_ERROR(EINVAL)); 286 if (zfs_uio_offset(uio) == sizeof (entry)) { 287 entry.d_fileno = parent_id; 288 entry.d_type = DT_DIR; 289 entry.d_name[0] = '.'; 290 entry.d_name[1] = '.'; 291 entry.d_name[2] = '\0'; 292 entry.d_namlen = 2; 293 entry.d_reclen = sizeof (entry); 294 error = vfs_read_dirent(ap, &entry, zfs_uio_offset(uio)); 295 if (error != 0) 296 return (SET_ERROR(error)); 297 } 298 299 if (offp != NULL) 300 *offp = 2 * sizeof (entry); 301 return (0); 302 } 303 304 305 /* 306 * .zfs inode namespace 307 * 308 * We need to generate unique inode numbers for all files and directories 309 * within the .zfs pseudo-filesystem. We use the following scheme: 310 * 311 * ENTRY ZFSCTL_INODE 312 * .zfs 1 313 * .zfs/snapshot 2 314 * .zfs/snapshot/<snap> objectid(snap) 315 */ 316 #define ZFSCTL_INO_SNAP(id) (id) 317 318 static struct vop_vector zfsctl_ops_root; 319 static struct vop_vector zfsctl_ops_snapdir; 320 static struct vop_vector zfsctl_ops_snapshot; 321 322 void 323 zfsctl_init(void) 324 { 325 } 326 327 void 328 zfsctl_fini(void) 329 { 330 } 331 332 boolean_t 333 zfsctl_is_node(vnode_t *vp) 334 { 335 return (vn_matchops(vp, zfsctl_ops_root) || 336 vn_matchops(vp, zfsctl_ops_snapdir) || 337 vn_matchops(vp, zfsctl_ops_snapshot)); 338 339 } 340 341 typedef struct zfsctl_root { 342 sfs_node_t node; 343 sfs_node_t *snapdir; 344 timestruc_t cmtime; 345 } zfsctl_root_t; 346 347 348 /* 349 * Create the '.zfs' directory. 350 */ 351 void 352 zfsctl_create(zfsvfs_t *zfsvfs) 353 { 354 zfsctl_root_t *dot_zfs; 355 sfs_node_t *snapdir; 356 vnode_t *rvp; 357 uint64_t crtime[2]; 358 359 ASSERT3P(zfsvfs->z_ctldir, ==, NULL); 360 361 snapdir = sfs_alloc_node(sizeof (*snapdir), "snapshot", ZFSCTL_INO_ROOT, 362 ZFSCTL_INO_SNAPDIR); 363 dot_zfs = (zfsctl_root_t *)sfs_alloc_node(sizeof (*dot_zfs), ".zfs", 0, 364 ZFSCTL_INO_ROOT); 365 dot_zfs->snapdir = snapdir; 366 367 VERIFY0(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp)); 368 VERIFY0(sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 369 &crtime, sizeof (crtime))); 370 ZFS_TIME_DECODE(&dot_zfs->cmtime, crtime); 371 vput(rvp); 372 373 zfsvfs->z_ctldir = dot_zfs; 374 } 375 376 /* 377 * Destroy the '.zfs' directory. Only called when the filesystem is unmounted. 378 * The nodes must not have any associated vnodes by now as they should be 379 * vflush-ed. 380 */ 381 void 382 zfsctl_destroy(zfsvfs_t *zfsvfs) 383 { 384 sfs_destroy_node(zfsvfs->z_ctldir->snapdir); 385 sfs_destroy_node((sfs_node_t *)zfsvfs->z_ctldir); 386 zfsvfs->z_ctldir = NULL; 387 } 388 389 static int 390 zfsctl_fs_root_vnode(struct mount *mp, void *arg __unused, int flags, 391 struct vnode **vpp) 392 { 393 return (VFS_ROOT(mp, flags, vpp)); 394 } 395 396 static void 397 zfsctl_common_vnode_setup(vnode_t *vp, void *arg) 398 { 399 ASSERT_VOP_ELOCKED(vp, __func__); 400 401 /* We support shared locking. */ 402 VN_LOCK_ASHARE(vp); 403 vp->v_type = VDIR; 404 vp->v_data = arg; 405 } 406 407 static int 408 zfsctl_root_vnode(struct mount *mp, void *arg __unused, int flags, 409 struct vnode **vpp) 410 { 411 void *node; 412 int err; 413 414 node = ((zfsvfs_t *)mp->mnt_data)->z_ctldir; 415 err = sfs_vgetx(mp, flags, 0, ZFSCTL_INO_ROOT, "zfs", &zfsctl_ops_root, 416 zfsctl_common_vnode_setup, node, vpp); 417 return (err); 418 } 419 420 static int 421 zfsctl_snapdir_vnode(struct mount *mp, void *arg __unused, int flags, 422 struct vnode **vpp) 423 { 424 void *node; 425 int err; 426 427 node = ((zfsvfs_t *)mp->mnt_data)->z_ctldir->snapdir; 428 err = sfs_vgetx(mp, flags, ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, "zfs", 429 &zfsctl_ops_snapdir, zfsctl_common_vnode_setup, node, vpp); 430 return (err); 431 } 432 433 /* 434 * Given a root znode, retrieve the associated .zfs directory. 435 * Add a hold to the vnode and return it. 436 */ 437 int 438 zfsctl_root(zfsvfs_t *zfsvfs, int flags, vnode_t **vpp) 439 { 440 int error; 441 442 error = zfsctl_root_vnode(zfsvfs->z_vfs, NULL, flags, vpp); 443 return (error); 444 } 445 446 /* 447 * Common open routine. Disallow any write access. 448 */ 449 static int 450 zfsctl_common_open(struct vop_open_args *ap) 451 { 452 int flags = ap->a_mode; 453 454 if (flags & FWRITE) 455 return (SET_ERROR(EACCES)); 456 457 return (0); 458 } 459 460 /* 461 * Common close routine. Nothing to do here. 462 */ 463 static int 464 zfsctl_common_close(struct vop_close_args *ap) 465 { 466 (void) ap; 467 return (0); 468 } 469 470 /* 471 * Common access routine. Disallow writes. 472 */ 473 static int 474 zfsctl_common_access(struct vop_access_args *ap) 475 { 476 accmode_t accmode = ap->a_accmode; 477 478 if (accmode & VWRITE) 479 return (SET_ERROR(EACCES)); 480 return (0); 481 } 482 483 /* 484 * Common getattr function. Fill in basic information. 485 */ 486 static void 487 zfsctl_common_getattr(vnode_t *vp, vattr_t *vap) 488 { 489 timestruc_t now; 490 sfs_node_t *node; 491 492 node = vp->v_data; 493 494 vap->va_uid = 0; 495 vap->va_gid = 0; 496 vap->va_rdev = 0; 497 /* 498 * We are a purely virtual object, so we have no 499 * blocksize or allocated blocks. 500 */ 501 vap->va_blksize = 0; 502 vap->va_nblocks = 0; 503 vap->va_gen = 0; 504 vn_fsid(vp, vap); 505 vap->va_mode = zfsctl_ctldir_mode; 506 vap->va_type = VDIR; 507 /* 508 * We live in the now (for atime). 509 */ 510 gethrestime(&now); 511 vap->va_atime = now; 512 /* FreeBSD: Reset chflags(2) flags. */ 513 vap->va_flags = 0; 514 515 vap->va_nodeid = node->sn_id; 516 517 /* At least '.' and '..'. */ 518 vap->va_nlink = 2; 519 } 520 521 #ifndef _OPENSOLARIS_SYS_VNODE_H_ 522 struct vop_fid_args { 523 struct vnode *a_vp; 524 struct fid *a_fid; 525 }; 526 #endif 527 528 static int 529 zfsctl_common_fid(struct vop_fid_args *ap) 530 { 531 vnode_t *vp = ap->a_vp; 532 fid_t *fidp = (void *)ap->a_fid; 533 sfs_node_t *node = vp->v_data; 534 uint64_t object = node->sn_id; 535 zfid_short_t *zfid; 536 int i; 537 538 zfid = (zfid_short_t *)fidp; 539 zfid->zf_len = SHORT_FID_LEN; 540 541 for (i = 0; i < sizeof (zfid->zf_object); i++) 542 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 543 544 /* .zfs nodes always have a generation number of 0 */ 545 for (i = 0; i < sizeof (zfid->zf_gen); i++) 546 zfid->zf_gen[i] = 0; 547 548 return (0); 549 } 550 551 #ifndef _SYS_SYSPROTO_H_ 552 struct vop_reclaim_args { 553 struct vnode *a_vp; 554 struct thread *a_td; 555 }; 556 #endif 557 558 static int 559 zfsctl_common_reclaim(struct vop_reclaim_args *ap) 560 { 561 vnode_t *vp = ap->a_vp; 562 563 (void) sfs_reclaim_vnode(vp); 564 return (0); 565 } 566 567 #ifndef _SYS_SYSPROTO_H_ 568 struct vop_print_args { 569 struct vnode *a_vp; 570 }; 571 #endif 572 573 static int 574 zfsctl_common_print(struct vop_print_args *ap) 575 { 576 sfs_print_node(ap->a_vp->v_data); 577 return (0); 578 } 579 580 #ifndef _SYS_SYSPROTO_H_ 581 struct vop_getattr_args { 582 struct vnode *a_vp; 583 struct vattr *a_vap; 584 struct ucred *a_cred; 585 }; 586 #endif 587 588 /* 589 * Get root directory attributes. 590 */ 591 static int 592 zfsctl_root_getattr(struct vop_getattr_args *ap) 593 { 594 struct vnode *vp = ap->a_vp; 595 struct vattr *vap = ap->a_vap; 596 zfsctl_root_t *node = vp->v_data; 597 598 zfsctl_common_getattr(vp, vap); 599 vap->va_ctime = node->cmtime; 600 vap->va_mtime = vap->va_ctime; 601 vap->va_birthtime = vap->va_ctime; 602 vap->va_nlink += 1; /* snapdir */ 603 vap->va_size = vap->va_nlink; 604 return (0); 605 } 606 607 /* 608 * When we lookup "." we still can be asked to lock it 609 * differently, can't we? 610 */ 611 static int 612 zfsctl_relock_dot(vnode_t *dvp, int ltype) 613 { 614 vref(dvp); 615 if (ltype != VOP_ISLOCKED(dvp)) { 616 if (ltype == LK_EXCLUSIVE) 617 vn_lock(dvp, LK_UPGRADE | LK_RETRY); 618 else /* if (ltype == LK_SHARED) */ 619 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 620 621 /* Relock for the "." case may left us with reclaimed vnode. */ 622 if (VN_IS_DOOMED(dvp)) { 623 vrele(dvp); 624 return (SET_ERROR(ENOENT)); 625 } 626 } 627 return (0); 628 } 629 630 /* 631 * Special case the handling of "..". 632 */ 633 static int 634 zfsctl_root_lookup(struct vop_lookup_args *ap) 635 { 636 struct componentname *cnp = ap->a_cnp; 637 vnode_t *dvp = ap->a_dvp; 638 vnode_t **vpp = ap->a_vpp; 639 int flags = ap->a_cnp->cn_flags; 640 int lkflags = ap->a_cnp->cn_lkflags; 641 int nameiop = ap->a_cnp->cn_nameiop; 642 int err; 643 644 ASSERT3S(dvp->v_type, ==, VDIR); 645 646 if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP) 647 return (SET_ERROR(ENOTSUP)); 648 649 if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') { 650 err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK); 651 if (err == 0) 652 *vpp = dvp; 653 } else if ((flags & ISDOTDOT) != 0) { 654 err = vn_vget_ino_gen(dvp, zfsctl_fs_root_vnode, NULL, 655 lkflags, vpp); 656 } else if (strncmp(cnp->cn_nameptr, "snapshot", cnp->cn_namelen) == 0) { 657 err = zfsctl_snapdir_vnode(dvp->v_mount, NULL, lkflags, vpp); 658 } else { 659 err = SET_ERROR(ENOENT); 660 } 661 if (err != 0) 662 *vpp = NULL; 663 return (err); 664 } 665 666 static int 667 zfsctl_root_readdir(struct vop_readdir_args *ap) 668 { 669 struct dirent entry; 670 vnode_t *vp = ap->a_vp; 671 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 672 zfsctl_root_t *node = vp->v_data; 673 zfs_uio_t uio; 674 int *eofp = ap->a_eofflag; 675 off_t dots_offset; 676 int error; 677 678 zfs_uio_init(&uio, ap->a_uio); 679 680 ASSERT3S(vp->v_type, ==, VDIR); 681 682 /* 683 * FIXME: this routine only ever emits 3 entries and does not tolerate 684 * being called with a buffer too small to handle all of them. 685 * 686 * The check below facilitates the idiom of repeating calls until the 687 * count to return is 0. 688 */ 689 if (zfs_uio_offset(&uio) == 3 * sizeof(entry)) { 690 return (0); 691 } 692 693 error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, &uio, 694 &dots_offset); 695 if (error != 0) { 696 if (error == ENAMETOOLONG) /* ran out of destination space */ 697 error = 0; 698 return (error); 699 } 700 if (zfs_uio_offset(&uio) != dots_offset) 701 return (SET_ERROR(EINVAL)); 702 703 _Static_assert(sizeof (node->snapdir->sn_name) <= sizeof (entry.d_name), 704 "node->snapdir->sn_name too big for entry.d_name"); 705 entry.d_fileno = node->snapdir->sn_id; 706 entry.d_type = DT_DIR; 707 strcpy(entry.d_name, node->snapdir->sn_name); 708 entry.d_namlen = strlen(entry.d_name); 709 entry.d_reclen = sizeof (entry); 710 error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio)); 711 if (error != 0) { 712 if (error == ENAMETOOLONG) 713 error = 0; 714 return (SET_ERROR(error)); 715 } 716 if (eofp != NULL) 717 *eofp = 1; 718 return (0); 719 } 720 721 static int 722 zfsctl_root_vptocnp(struct vop_vptocnp_args *ap) 723 { 724 static const char dotzfs_name[4] = ".zfs"; 725 vnode_t *dvp; 726 int error; 727 728 if (*ap->a_buflen < sizeof (dotzfs_name)) 729 return (SET_ERROR(ENOMEM)); 730 731 error = vn_vget_ino_gen(ap->a_vp, zfsctl_fs_root_vnode, NULL, 732 LK_SHARED, &dvp); 733 if (error != 0) 734 return (SET_ERROR(error)); 735 736 VOP_UNLOCK1(dvp); 737 *ap->a_vpp = dvp; 738 *ap->a_buflen -= sizeof (dotzfs_name); 739 memcpy(ap->a_buf + *ap->a_buflen, dotzfs_name, sizeof (dotzfs_name)); 740 return (0); 741 } 742 743 static int 744 zfsctl_common_pathconf(struct vop_pathconf_args *ap) 745 { 746 /* 747 * We care about ACL variables so that user land utilities like ls 748 * can display them correctly. Since the ctldir's st_dev is set to be 749 * the same as the parent dataset, we must support all variables that 750 * it supports. 751 */ 752 switch (ap->a_name) { 753 case _PC_LINK_MAX: 754 *ap->a_retval = MIN(LONG_MAX, ZFS_LINK_MAX); 755 return (0); 756 757 case _PC_FILESIZEBITS: 758 *ap->a_retval = 64; 759 return (0); 760 761 case _PC_MIN_HOLE_SIZE: 762 *ap->a_retval = (int)SPA_MINBLOCKSIZE; 763 return (0); 764 765 case _PC_ACL_EXTENDED: 766 *ap->a_retval = 0; 767 return (0); 768 769 case _PC_ACL_NFS4: 770 *ap->a_retval = 1; 771 return (0); 772 773 case _PC_ACL_PATH_MAX: 774 *ap->a_retval = ACL_MAX_ENTRIES; 775 return (0); 776 777 case _PC_NAME_MAX: 778 *ap->a_retval = NAME_MAX; 779 return (0); 780 781 default: 782 return (vop_stdpathconf(ap)); 783 } 784 } 785 786 /* 787 * Returns a trivial ACL 788 */ 789 static int 790 zfsctl_common_getacl(struct vop_getacl_args *ap) 791 { 792 int i; 793 794 if (ap->a_type != ACL_TYPE_NFS4) 795 return (EINVAL); 796 797 acl_nfs4_sync_acl_from_mode(ap->a_aclp, zfsctl_ctldir_mode, 0); 798 /* 799 * acl_nfs4_sync_acl_from_mode assumes that the owner can always modify 800 * attributes. That is not the case for the ctldir, so we must clear 801 * those bits. We also must clear ACL_READ_NAMED_ATTRS, because xattrs 802 * aren't supported by the ctldir. 803 */ 804 for (i = 0; i < ap->a_aclp->acl_cnt; i++) { 805 struct acl_entry *entry; 806 entry = &(ap->a_aclp->acl_entry[i]); 807 entry->ae_perm &= ~(ACL_WRITE_ACL | ACL_WRITE_OWNER | 808 ACL_WRITE_ATTRIBUTES | ACL_WRITE_NAMED_ATTRS | 809 ACL_READ_NAMED_ATTRS); 810 } 811 812 return (0); 813 } 814 815 static struct vop_vector zfsctl_ops_root = { 816 .vop_default = &default_vnodeops, 817 #if __FreeBSD_version >= 1300121 818 .vop_fplookup_vexec = VOP_EAGAIN, 819 #endif 820 #if __FreeBSD_version >= 1300139 821 .vop_fplookup_symlink = VOP_EAGAIN, 822 #endif 823 .vop_open = zfsctl_common_open, 824 .vop_close = zfsctl_common_close, 825 .vop_ioctl = VOP_EINVAL, 826 .vop_getattr = zfsctl_root_getattr, 827 .vop_access = zfsctl_common_access, 828 .vop_readdir = zfsctl_root_readdir, 829 .vop_lookup = zfsctl_root_lookup, 830 .vop_inactive = VOP_NULL, 831 .vop_reclaim = zfsctl_common_reclaim, 832 .vop_fid = zfsctl_common_fid, 833 .vop_print = zfsctl_common_print, 834 .vop_vptocnp = zfsctl_root_vptocnp, 835 .vop_pathconf = zfsctl_common_pathconf, 836 .vop_getacl = zfsctl_common_getacl, 837 #if __FreeBSD_version >= 1400043 838 .vop_add_writecount = vop_stdadd_writecount_nomsync, 839 #endif 840 }; 841 VFS_VOP_VECTOR_REGISTER(zfsctl_ops_root); 842 843 static int 844 zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname) 845 { 846 objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os; 847 848 dmu_objset_name(os, zname); 849 if (strlen(zname) + 1 + strlen(name) >= len) 850 return (SET_ERROR(ENAMETOOLONG)); 851 (void) strcat(zname, "@"); 852 (void) strcat(zname, name); 853 return (0); 854 } 855 856 static int 857 zfsctl_snapshot_lookup(vnode_t *vp, const char *name, uint64_t *id) 858 { 859 objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os; 860 int err; 861 862 err = dsl_dataset_snap_lookup(dmu_objset_ds(os), name, id); 863 return (err); 864 } 865 866 /* 867 * Given a vnode get a root vnode of a filesystem mounted on top of 868 * the vnode, if any. The root vnode is referenced and locked. 869 * If no filesystem is mounted then the orinal vnode remains referenced 870 * and locked. If any error happens the orinal vnode is unlocked and 871 * released. 872 */ 873 static int 874 zfsctl_mounted_here(vnode_t **vpp, int flags) 875 { 876 struct mount *mp; 877 int err; 878 879 ASSERT_VOP_LOCKED(*vpp, __func__); 880 ASSERT3S((*vpp)->v_type, ==, VDIR); 881 882 if ((mp = (*vpp)->v_mountedhere) != NULL) { 883 err = vfs_busy(mp, 0); 884 KASSERT(err == 0, ("vfs_busy(mp, 0) failed with %d", err)); 885 KASSERT(vrefcnt(*vpp) > 1, ("unreferenced mountpoint")); 886 vput(*vpp); 887 err = VFS_ROOT(mp, flags, vpp); 888 vfs_unbusy(mp); 889 return (err); 890 } 891 return (EJUSTRETURN); 892 } 893 894 typedef struct { 895 const char *snap_name; 896 uint64_t snap_id; 897 } snapshot_setup_arg_t; 898 899 static void 900 zfsctl_snapshot_vnode_setup(vnode_t *vp, void *arg) 901 { 902 snapshot_setup_arg_t *ssa = arg; 903 sfs_node_t *node; 904 905 ASSERT_VOP_ELOCKED(vp, __func__); 906 907 node = sfs_alloc_node(sizeof (sfs_node_t), 908 ssa->snap_name, ZFSCTL_INO_SNAPDIR, ssa->snap_id); 909 zfsctl_common_vnode_setup(vp, node); 910 911 /* We have to support recursive locking. */ 912 VN_LOCK_AREC(vp); 913 } 914 915 /* 916 * Lookup entry point for the 'snapshot' directory. Try to open the 917 * snapshot if it exist, creating the pseudo filesystem vnode as necessary. 918 * Perform a mount of the associated dataset on top of the vnode. 919 * There are four possibilities: 920 * - the snapshot node and vnode do not exist 921 * - the snapshot vnode is covered by the mounted snapshot 922 * - the snapshot vnode is not covered yet, the mount operation is in progress 923 * - the snapshot vnode is not covered, because the snapshot has been unmounted 924 * The last two states are transient and should be relatively short-lived. 925 */ 926 static int 927 zfsctl_snapdir_lookup(struct vop_lookup_args *ap) 928 { 929 vnode_t *dvp = ap->a_dvp; 930 vnode_t **vpp = ap->a_vpp; 931 struct componentname *cnp = ap->a_cnp; 932 char name[NAME_MAX + 1]; 933 char fullname[ZFS_MAX_DATASET_NAME_LEN]; 934 char *mountpoint; 935 size_t mountpoint_len; 936 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 937 uint64_t snap_id; 938 int nameiop = cnp->cn_nameiop; 939 int lkflags = cnp->cn_lkflags; 940 int flags = cnp->cn_flags; 941 int err; 942 943 ASSERT3S(dvp->v_type, ==, VDIR); 944 945 if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP) 946 return (SET_ERROR(ENOTSUP)); 947 948 if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') { 949 err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK); 950 if (err == 0) 951 *vpp = dvp; 952 return (err); 953 } 954 if (flags & ISDOTDOT) { 955 err = vn_vget_ino_gen(dvp, zfsctl_root_vnode, NULL, lkflags, 956 vpp); 957 return (err); 958 } 959 960 if (cnp->cn_namelen >= sizeof (name)) 961 return (SET_ERROR(ENAMETOOLONG)); 962 963 strlcpy(name, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1); 964 err = zfsctl_snapshot_lookup(dvp, name, &snap_id); 965 if (err != 0) 966 return (SET_ERROR(ENOENT)); 967 968 for (;;) { 969 snapshot_setup_arg_t ssa; 970 971 ssa.snap_name = name; 972 ssa.snap_id = snap_id; 973 err = sfs_vgetx(dvp->v_mount, LK_SHARED, ZFSCTL_INO_SNAPDIR, 974 snap_id, "zfs", &zfsctl_ops_snapshot, 975 zfsctl_snapshot_vnode_setup, &ssa, vpp); 976 if (err != 0) 977 return (err); 978 979 /* Check if a new vnode has just been created. */ 980 if (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE) 981 break; 982 983 /* 984 * Check if a snapshot is already mounted on top of the vnode. 985 */ 986 err = zfsctl_mounted_here(vpp, lkflags); 987 if (err != EJUSTRETURN) 988 return (err); 989 990 /* 991 * If the vnode is not covered, then either the mount operation 992 * is in progress or the snapshot has already been unmounted 993 * but the vnode hasn't been inactivated and reclaimed yet. 994 * We can try to re-use the vnode in the latter case. 995 */ 996 VI_LOCK(*vpp); 997 if (((*vpp)->v_iflag & VI_MOUNT) == 0) { 998 VI_UNLOCK(*vpp); 999 /* 1000 * Upgrade to exclusive lock in order to: 1001 * - avoid race conditions 1002 * - satisfy the contract of mount_snapshot() 1003 */ 1004 err = VOP_LOCK(*vpp, LK_TRYUPGRADE); 1005 if (err == 0) 1006 break; 1007 } else { 1008 VI_UNLOCK(*vpp); 1009 } 1010 1011 /* 1012 * In this state we can loop on uncontested locks and starve 1013 * the thread doing the lengthy, non-trivial mount operation. 1014 * So, yield to prevent that from happening. 1015 */ 1016 vput(*vpp); 1017 kern_yield(PRI_USER); 1018 } 1019 1020 VERIFY0(zfsctl_snapshot_zname(dvp, name, sizeof (fullname), fullname)); 1021 1022 mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) + 1023 strlen("/" ZFS_CTLDIR_NAME "/snapshot/") + strlen(name) + 1; 1024 mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP); 1025 (void) snprintf(mountpoint, mountpoint_len, 1026 "%s/" ZFS_CTLDIR_NAME "/snapshot/%s", 1027 dvp->v_vfsp->mnt_stat.f_mntonname, name); 1028 1029 err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0, 1030 dvp->v_vfsp); 1031 kmem_free(mountpoint, mountpoint_len); 1032 if (err == 0) { 1033 /* 1034 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>. 1035 * 1036 * This is where we lie about our v_vfsp in order to 1037 * make .zfs/snapshot/<snapname> accessible over NFS 1038 * without requiring manual mounts of <snapname>. 1039 */ 1040 ASSERT3P(VTOZ(*vpp)->z_zfsvfs, !=, zfsvfs); 1041 VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs; 1042 1043 /* Clear the root flag (set via VFS_ROOT) as well. */ 1044 (*vpp)->v_vflag &= ~VV_ROOT; 1045 } 1046 1047 if (err != 0) 1048 *vpp = NULL; 1049 return (err); 1050 } 1051 1052 static int 1053 zfsctl_snapdir_readdir(struct vop_readdir_args *ap) 1054 { 1055 char snapname[ZFS_MAX_DATASET_NAME_LEN]; 1056 struct dirent entry; 1057 vnode_t *vp = ap->a_vp; 1058 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 1059 zfs_uio_t uio; 1060 int *eofp = ap->a_eofflag; 1061 off_t dots_offset; 1062 int error; 1063 1064 zfs_uio_init(&uio, ap->a_uio); 1065 1066 ASSERT3S(vp->v_type, ==, VDIR); 1067 1068 error = sfs_readdir_common(ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, ap, 1069 &uio, &dots_offset); 1070 if (error != 0) { 1071 if (error == ENAMETOOLONG) /* ran out of destination space */ 1072 error = 0; 1073 return (error); 1074 } 1075 1076 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1077 return (error); 1078 for (;;) { 1079 uint64_t cookie; 1080 uint64_t id; 1081 1082 cookie = zfs_uio_offset(&uio) - dots_offset; 1083 1084 dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG); 1085 error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof (snapname), 1086 snapname, &id, &cookie, NULL); 1087 dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG); 1088 if (error != 0) { 1089 if (error == ENOENT) { 1090 if (eofp != NULL) 1091 *eofp = 1; 1092 error = 0; 1093 } 1094 zfs_exit(zfsvfs, FTAG); 1095 return (error); 1096 } 1097 1098 entry.d_fileno = id; 1099 entry.d_type = DT_DIR; 1100 strcpy(entry.d_name, snapname); 1101 entry.d_namlen = strlen(entry.d_name); 1102 entry.d_reclen = sizeof (entry); 1103 error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio)); 1104 if (error != 0) { 1105 if (error == ENAMETOOLONG) 1106 error = 0; 1107 zfs_exit(zfsvfs, FTAG); 1108 return (SET_ERROR(error)); 1109 } 1110 zfs_uio_setoffset(&uio, cookie + dots_offset); 1111 } 1112 __builtin_unreachable(); 1113 } 1114 1115 static int 1116 zfsctl_snapdir_getattr(struct vop_getattr_args *ap) 1117 { 1118 vnode_t *vp = ap->a_vp; 1119 vattr_t *vap = ap->a_vap; 1120 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 1121 dsl_dataset_t *ds; 1122 uint64_t snap_count; 1123 int err; 1124 1125 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1126 return (err); 1127 ds = dmu_objset_ds(zfsvfs->z_os); 1128 zfsctl_common_getattr(vp, vap); 1129 vap->va_ctime = dmu_objset_snap_cmtime(zfsvfs->z_os); 1130 vap->va_mtime = vap->va_ctime; 1131 vap->va_birthtime = vap->va_ctime; 1132 if (dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0) { 1133 err = zap_count(dmu_objset_pool(ds->ds_objset)->dp_meta_objset, 1134 dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count); 1135 if (err != 0) { 1136 zfs_exit(zfsvfs, FTAG); 1137 return (err); 1138 } 1139 vap->va_nlink += snap_count; 1140 } 1141 vap->va_size = vap->va_nlink; 1142 1143 zfs_exit(zfsvfs, FTAG); 1144 return (0); 1145 } 1146 1147 static struct vop_vector zfsctl_ops_snapdir = { 1148 .vop_default = &default_vnodeops, 1149 #if __FreeBSD_version >= 1300121 1150 .vop_fplookup_vexec = VOP_EAGAIN, 1151 #endif 1152 #if __FreeBSD_version >= 1300139 1153 .vop_fplookup_symlink = VOP_EAGAIN, 1154 #endif 1155 .vop_open = zfsctl_common_open, 1156 .vop_close = zfsctl_common_close, 1157 .vop_getattr = zfsctl_snapdir_getattr, 1158 .vop_access = zfsctl_common_access, 1159 .vop_readdir = zfsctl_snapdir_readdir, 1160 .vop_lookup = zfsctl_snapdir_lookup, 1161 .vop_reclaim = zfsctl_common_reclaim, 1162 .vop_fid = zfsctl_common_fid, 1163 .vop_print = zfsctl_common_print, 1164 .vop_pathconf = zfsctl_common_pathconf, 1165 .vop_getacl = zfsctl_common_getacl, 1166 #if __FreeBSD_version >= 1400043 1167 .vop_add_writecount = vop_stdadd_writecount_nomsync, 1168 #endif 1169 }; 1170 VFS_VOP_VECTOR_REGISTER(zfsctl_ops_snapdir); 1171 1172 1173 static int 1174 zfsctl_snapshot_inactive(struct vop_inactive_args *ap) 1175 { 1176 vnode_t *vp = ap->a_vp; 1177 1178 vrecycle(vp); 1179 return (0); 1180 } 1181 1182 static int 1183 zfsctl_snapshot_reclaim(struct vop_reclaim_args *ap) 1184 { 1185 vnode_t *vp = ap->a_vp; 1186 void *data = vp->v_data; 1187 1188 sfs_reclaim_vnode(vp); 1189 sfs_destroy_node(data); 1190 return (0); 1191 } 1192 1193 static int 1194 zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap) 1195 { 1196 struct mount *mp; 1197 vnode_t *dvp; 1198 vnode_t *vp; 1199 sfs_node_t *node; 1200 size_t len; 1201 int locked; 1202 int error; 1203 1204 vp = ap->a_vp; 1205 node = vp->v_data; 1206 len = strlen(node->sn_name); 1207 if (*ap->a_buflen < len) 1208 return (SET_ERROR(ENOMEM)); 1209 1210 /* 1211 * Prevent unmounting of the snapshot while the vnode lock 1212 * is not held. That is not strictly required, but allows 1213 * us to assert that an uncovered snapshot vnode is never 1214 * "leaked". 1215 */ 1216 mp = vp->v_mountedhere; 1217 if (mp == NULL) 1218 return (SET_ERROR(ENOENT)); 1219 error = vfs_busy(mp, 0); 1220 KASSERT(error == 0, ("vfs_busy(mp, 0) failed with %d", error)); 1221 1222 /* 1223 * We can vput the vnode as we can now depend on the reference owned 1224 * by the busied mp. But we also need to hold the vnode, because 1225 * the reference may go after vfs_unbusy() which has to be called 1226 * before we can lock the vnode again. 1227 */ 1228 locked = VOP_ISLOCKED(vp); 1229 #if __FreeBSD_version >= 1300045 1230 enum vgetstate vs = vget_prep(vp); 1231 #else 1232 vhold(vp); 1233 #endif 1234 vput(vp); 1235 1236 /* Look up .zfs/snapshot, our parent. */ 1237 error = zfsctl_snapdir_vnode(vp->v_mount, NULL, LK_SHARED, &dvp); 1238 if (error == 0) { 1239 VOP_UNLOCK1(dvp); 1240 *ap->a_vpp = dvp; 1241 *ap->a_buflen -= len; 1242 memcpy(ap->a_buf + *ap->a_buflen, node->sn_name, len); 1243 } 1244 vfs_unbusy(mp); 1245 #if __FreeBSD_version >= 1300045 1246 vget_finish(vp, locked | LK_RETRY, vs); 1247 #else 1248 vget(vp, locked | LK_VNHELD | LK_RETRY, curthread); 1249 #endif 1250 return (error); 1251 } 1252 1253 /* 1254 * These VP's should never see the light of day. They should always 1255 * be covered. 1256 */ 1257 static struct vop_vector zfsctl_ops_snapshot = { 1258 .vop_default = NULL, /* ensure very restricted access */ 1259 #if __FreeBSD_version >= 1300121 1260 .vop_fplookup_vexec = VOP_EAGAIN, 1261 #endif 1262 #if __FreeBSD_version >= 1300139 1263 .vop_fplookup_symlink = VOP_EAGAIN, 1264 #endif 1265 .vop_open = zfsctl_common_open, 1266 .vop_close = zfsctl_common_close, 1267 .vop_inactive = zfsctl_snapshot_inactive, 1268 #if __FreeBSD_version >= 1300045 1269 .vop_need_inactive = vop_stdneed_inactive, 1270 #endif 1271 .vop_reclaim = zfsctl_snapshot_reclaim, 1272 .vop_vptocnp = zfsctl_snapshot_vptocnp, 1273 .vop_lock1 = vop_stdlock, 1274 .vop_unlock = vop_stdunlock, 1275 .vop_islocked = vop_stdislocked, 1276 .vop_advlockpurge = vop_stdadvlockpurge, /* called by vgone */ 1277 .vop_print = zfsctl_common_print, 1278 #if __FreeBSD_version >= 1400043 1279 .vop_add_writecount = vop_stdadd_writecount_nomsync, 1280 #endif 1281 }; 1282 VFS_VOP_VECTOR_REGISTER(zfsctl_ops_snapshot); 1283 1284 int 1285 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) 1286 { 1287 zfsvfs_t *zfsvfs __unused = vfsp->vfs_data; 1288 vnode_t *vp; 1289 int error; 1290 1291 ASSERT3P(zfsvfs->z_ctldir, !=, NULL); 1292 *zfsvfsp = NULL; 1293 error = sfs_vnode_get(vfsp, LK_EXCLUSIVE, 1294 ZFSCTL_INO_SNAPDIR, objsetid, &vp); 1295 if (error == 0 && vp != NULL) { 1296 /* 1297 * XXX Probably need to at least reference, if not busy, the mp. 1298 */ 1299 if (vp->v_mountedhere != NULL) 1300 *zfsvfsp = vp->v_mountedhere->mnt_data; 1301 vput(vp); 1302 } 1303 if (*zfsvfsp == NULL) 1304 return (SET_ERROR(EINVAL)); 1305 return (0); 1306 } 1307 1308 /* 1309 * Unmount any snapshots for the given filesystem. This is called from 1310 * zfs_umount() - if we have a ctldir, then go through and unmount all the 1311 * snapshots. 1312 */ 1313 int 1314 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr) 1315 { 1316 char snapname[ZFS_MAX_DATASET_NAME_LEN]; 1317 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1318 struct mount *mp; 1319 vnode_t *vp; 1320 uint64_t cookie; 1321 int error; 1322 1323 ASSERT3P(zfsvfs->z_ctldir, !=, NULL); 1324 1325 cookie = 0; 1326 for (;;) { 1327 uint64_t id; 1328 1329 dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG); 1330 error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof (snapname), 1331 snapname, &id, &cookie, NULL); 1332 dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG); 1333 if (error != 0) { 1334 if (error == ENOENT) 1335 error = 0; 1336 break; 1337 } 1338 1339 for (;;) { 1340 error = sfs_vnode_get(vfsp, LK_EXCLUSIVE, 1341 ZFSCTL_INO_SNAPDIR, id, &vp); 1342 if (error != 0 || vp == NULL) 1343 break; 1344 1345 mp = vp->v_mountedhere; 1346 1347 /* 1348 * v_mountedhere being NULL means that the 1349 * (uncovered) vnode is in a transient state 1350 * (mounting or unmounting), so loop until it 1351 * settles down. 1352 */ 1353 if (mp != NULL) 1354 break; 1355 vput(vp); 1356 } 1357 if (error != 0) 1358 break; 1359 if (vp == NULL) 1360 continue; /* no mountpoint, nothing to do */ 1361 1362 /* 1363 * The mount-point vnode is kept locked to avoid spurious EBUSY 1364 * from a concurrent umount. 1365 * The vnode lock must have recursive locking enabled. 1366 */ 1367 vfs_ref(mp); 1368 error = dounmount(mp, fflags, curthread); 1369 KASSERT_IMPLY(error == 0, vrefcnt(vp) == 1, 1370 ("extra references after unmount")); 1371 vput(vp); 1372 if (error != 0) 1373 break; 1374 } 1375 KASSERT_IMPLY((fflags & MS_FORCE) != 0, error == 0, 1376 ("force unmounting failed")); 1377 return (error); 1378 } 1379 1380 int 1381 zfsctl_snapshot_unmount(const char *snapname, int flags __unused) 1382 { 1383 vfs_t *vfsp = NULL; 1384 zfsvfs_t *zfsvfs = NULL; 1385 1386 if (strchr(snapname, '@') == NULL) 1387 return (0); 1388 1389 int err = getzfsvfs(snapname, &zfsvfs); 1390 if (err != 0) { 1391 ASSERT3P(zfsvfs, ==, NULL); 1392 return (0); 1393 } 1394 vfsp = zfsvfs->z_vfs; 1395 1396 ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os))); 1397 1398 vfs_ref(vfsp); 1399 vfs_unbusy(vfsp); 1400 return (dounmount(vfsp, MS_FORCE, curthread)); 1401 } 1402