1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 24 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. 25 */ 26 27 /* 28 * ZFS control directory (a.k.a. ".zfs") 29 * 30 * This directory provides a common location for all ZFS meta-objects. 31 * Currently, this is only the 'snapshot' directory, but this may expand in the 32 * future. The elements are built using the GFS primitives, as the hierarchy 33 * does not actually exist on disk. 34 * 35 * For 'snapshot', we don't want to have all snapshots always mounted, because 36 * this would take up a huge amount of space in /etc/mnttab. We have three 37 * types of objects: 38 * 39 * ctldir ------> snapshotdir -------> snapshot 40 * | 41 * | 42 * V 43 * mounted fs 44 * 45 * The 'snapshot' node contains just enough information to lookup '..' and act 46 * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we 47 * perform an automount of the underlying filesystem and return the 48 * corresponding vnode. 49 * 50 * All mounts are handled automatically by the kernel, but unmounts are 51 * (currently) handled from user land. The main reason is that there is no 52 * reliable way to auto-unmount the filesystem when it's "no longer in use". 53 * When the user unmounts a filesystem, we call zfsctl_unmount(), which 54 * unmounts any snapshots within the snapshot directory. 55 * 56 * The '.zfs', '.zfs/snapshot', and all directories created under 57 * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and 58 * share the same vfs_t as the head filesystem (what '.zfs' lives under). 59 * 60 * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>' 61 * (ie: snapshots) are ZFS nodes and have their own unique vfs_t. 62 * However, vnodes within these mounted on file systems have their v_vfsp 63 * fields set to the head filesystem to make NFS happy (see 64 * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t 65 * so that it cannot be freed until all snapshots have been unmounted. 66 */ 67 68 #include <sys/types.h> 69 #include <sys/param.h> 70 #include <sys/libkern.h> 71 #include <sys/dirent.h> 72 #include <sys/zfs_context.h> 73 #include <sys/zfs_ctldir.h> 74 #include <sys/zfs_ioctl.h> 75 #include <sys/zfs_vfsops.h> 76 #include <sys/namei.h> 77 #include <sys/stat.h> 78 #include <sys/dmu.h> 79 #include <sys/dsl_dataset.h> 80 #include <sys/dsl_destroy.h> 81 #include <sys/dsl_deleg.h> 82 #include <sys/mount.h> 83 #include <sys/zap.h> 84 #include <sys/sysproto.h> 85 86 #include "zfs_namecheck.h" 87 88 #include <sys/kernel.h> 89 #include <sys/ccompat.h> 90 91 /* Common access mode for all virtual directories under the ctldir */ 92 const uint16_t zfsctl_ctldir_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | 93 S_IROTH | S_IXOTH; 94 95 /* 96 * "Synthetic" filesystem implementation. 97 */ 98 99 /* 100 * Assert that A implies B. 101 */ 102 #define KASSERT_IMPLY(A, B, msg) KASSERT(!(A) || (B), (msg)); 103 104 static MALLOC_DEFINE(M_SFSNODES, "sfs_nodes", "synthetic-fs nodes"); 105 106 typedef struct sfs_node { 107 char sn_name[ZFS_MAX_DATASET_NAME_LEN]; 108 uint64_t sn_parent_id; 109 uint64_t sn_id; 110 } sfs_node_t; 111 112 /* 113 * Check the parent's ID as well as the node's to account for a chance 114 * that IDs originating from different domains (snapshot IDs, artificial 115 * IDs, znode IDs) may clash. 116 */ 117 static int 118 sfs_compare_ids(struct vnode *vp, void *arg) 119 { 120 sfs_node_t *n1 = vp->v_data; 121 sfs_node_t *n2 = arg; 122 bool equal; 123 124 equal = n1->sn_id == n2->sn_id && 125 n1->sn_parent_id == n2->sn_parent_id; 126 127 /* Zero means equality. */ 128 return (!equal); 129 } 130 131 static int 132 sfs_vnode_get(const struct mount *mp, int flags, uint64_t parent_id, 133 uint64_t id, struct vnode **vpp) 134 { 135 sfs_node_t search; 136 int err; 137 138 search.sn_id = id; 139 search.sn_parent_id = parent_id; 140 err = vfs_hash_get(mp, (uint32_t)id, flags, curthread, vpp, 141 sfs_compare_ids, &search); 142 return (err); 143 } 144 145 static int 146 sfs_vnode_insert(struct vnode *vp, int flags, uint64_t parent_id, 147 uint64_t id, struct vnode **vpp) 148 { 149 int err; 150 151 KASSERT(vp->v_data != NULL, ("sfs_vnode_insert with NULL v_data")); 152 err = vfs_hash_insert(vp, (uint32_t)id, flags, curthread, vpp, 153 sfs_compare_ids, vp->v_data); 154 return (err); 155 } 156 157 static void 158 sfs_vnode_remove(struct vnode *vp) 159 { 160 vfs_hash_remove(vp); 161 } 162 163 typedef void sfs_vnode_setup_fn(vnode_t *vp, void *arg); 164 165 static int 166 sfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id, 167 const char *tag, struct vop_vector *vops, 168 sfs_vnode_setup_fn setup, void *arg, 169 struct vnode **vpp) 170 { 171 struct vnode *vp; 172 int error; 173 174 error = sfs_vnode_get(mp, flags, parent_id, id, vpp); 175 if (error != 0 || *vpp != NULL) { 176 KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL, 177 "sfs vnode with no data"); 178 return (error); 179 } 180 181 /* Allocate a new vnode/inode. */ 182 error = getnewvnode(tag, mp, vops, &vp); 183 if (error != 0) { 184 *vpp = NULL; 185 return (error); 186 } 187 188 /* 189 * Exclusively lock the vnode vnode while it's being constructed. 190 */ 191 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 192 error = insmntque(vp, mp); 193 if (error != 0) { 194 *vpp = NULL; 195 return (error); 196 } 197 198 setup(vp, arg); 199 200 error = sfs_vnode_insert(vp, flags, parent_id, id, vpp); 201 if (error != 0 || *vpp != NULL) { 202 KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL, 203 "sfs vnode with no data"); 204 return (error); 205 } 206 207 #if __FreeBSD_version >= 1400077 208 vn_set_state(vp, VSTATE_CONSTRUCTED); 209 #endif 210 211 *vpp = vp; 212 return (0); 213 } 214 215 static void 216 sfs_print_node(sfs_node_t *node) 217 { 218 printf("\tname = %s\n", node->sn_name); 219 printf("\tparent_id = %ju\n", (uintmax_t)node->sn_parent_id); 220 printf("\tid = %ju\n", (uintmax_t)node->sn_id); 221 } 222 223 static sfs_node_t * 224 sfs_alloc_node(size_t size, const char *name, uint64_t parent_id, uint64_t id) 225 { 226 struct sfs_node *node; 227 228 KASSERT(strlen(name) < sizeof (node->sn_name), 229 ("sfs node name is too long")); 230 KASSERT(size >= sizeof (*node), ("sfs node size is too small")); 231 node = malloc(size, M_SFSNODES, M_WAITOK | M_ZERO); 232 strlcpy(node->sn_name, name, sizeof (node->sn_name)); 233 node->sn_parent_id = parent_id; 234 node->sn_id = id; 235 236 return (node); 237 } 238 239 static void 240 sfs_destroy_node(sfs_node_t *node) 241 { 242 free(node, M_SFSNODES); 243 } 244 245 static void * 246 sfs_reclaim_vnode(vnode_t *vp) 247 { 248 void *data; 249 250 sfs_vnode_remove(vp); 251 data = vp->v_data; 252 vp->v_data = NULL; 253 return (data); 254 } 255 256 static int 257 sfs_readdir_common(uint64_t parent_id, uint64_t id, struct vop_readdir_args *ap, 258 zfs_uio_t *uio, off_t *offp) 259 { 260 struct dirent entry; 261 int error; 262 263 /* Reset ncookies for subsequent use of vfs_read_dirent. */ 264 if (ap->a_ncookies != NULL) 265 *ap->a_ncookies = 0; 266 267 if (zfs_uio_resid(uio) < sizeof (entry)) 268 return (SET_ERROR(EINVAL)); 269 270 if (zfs_uio_offset(uio) < 0) 271 return (SET_ERROR(EINVAL)); 272 if (zfs_uio_offset(uio) == 0) { 273 entry.d_fileno = id; 274 entry.d_type = DT_DIR; 275 entry.d_name[0] = '.'; 276 entry.d_name[1] = '\0'; 277 entry.d_namlen = 1; 278 entry.d_reclen = sizeof (entry); 279 error = vfs_read_dirent(ap, &entry, zfs_uio_offset(uio)); 280 if (error != 0) 281 return (SET_ERROR(error)); 282 } 283 284 if (zfs_uio_offset(uio) < sizeof (entry)) 285 return (SET_ERROR(EINVAL)); 286 if (zfs_uio_offset(uio) == sizeof (entry)) { 287 entry.d_fileno = parent_id; 288 entry.d_type = DT_DIR; 289 entry.d_name[0] = '.'; 290 entry.d_name[1] = '.'; 291 entry.d_name[2] = '\0'; 292 entry.d_namlen = 2; 293 entry.d_reclen = sizeof (entry); 294 error = vfs_read_dirent(ap, &entry, zfs_uio_offset(uio)); 295 if (error != 0) 296 return (SET_ERROR(error)); 297 } 298 299 if (offp != NULL) 300 *offp = 2 * sizeof (entry); 301 return (0); 302 } 303 304 305 /* 306 * .zfs inode namespace 307 * 308 * We need to generate unique inode numbers for all files and directories 309 * within the .zfs pseudo-filesystem. We use the following scheme: 310 * 311 * ENTRY ZFSCTL_INODE 312 * .zfs 1 313 * .zfs/snapshot 2 314 * .zfs/snapshot/<snap> objectid(snap) 315 */ 316 #define ZFSCTL_INO_SNAP(id) (id) 317 318 static struct vop_vector zfsctl_ops_root; 319 static struct vop_vector zfsctl_ops_snapdir; 320 static struct vop_vector zfsctl_ops_snapshot; 321 322 void 323 zfsctl_init(void) 324 { 325 } 326 327 void 328 zfsctl_fini(void) 329 { 330 } 331 332 boolean_t 333 zfsctl_is_node(vnode_t *vp) 334 { 335 return (vn_matchops(vp, zfsctl_ops_root) || 336 vn_matchops(vp, zfsctl_ops_snapdir) || 337 vn_matchops(vp, zfsctl_ops_snapshot)); 338 339 } 340 341 typedef struct zfsctl_root { 342 sfs_node_t node; 343 sfs_node_t *snapdir; 344 timestruc_t cmtime; 345 } zfsctl_root_t; 346 347 348 /* 349 * Create the '.zfs' directory. 350 */ 351 void 352 zfsctl_create(zfsvfs_t *zfsvfs) 353 { 354 zfsctl_root_t *dot_zfs; 355 sfs_node_t *snapdir; 356 vnode_t *rvp; 357 uint64_t crtime[2]; 358 359 ASSERT3P(zfsvfs->z_ctldir, ==, NULL); 360 361 snapdir = sfs_alloc_node(sizeof (*snapdir), "snapshot", ZFSCTL_INO_ROOT, 362 ZFSCTL_INO_SNAPDIR); 363 dot_zfs = (zfsctl_root_t *)sfs_alloc_node(sizeof (*dot_zfs), ".zfs", 0, 364 ZFSCTL_INO_ROOT); 365 dot_zfs->snapdir = snapdir; 366 367 VERIFY0(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp)); 368 VERIFY0(sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 369 &crtime, sizeof (crtime))); 370 ZFS_TIME_DECODE(&dot_zfs->cmtime, crtime); 371 vput(rvp); 372 373 zfsvfs->z_ctldir = dot_zfs; 374 } 375 376 /* 377 * Destroy the '.zfs' directory. Only called when the filesystem is unmounted. 378 * The nodes must not have any associated vnodes by now as they should be 379 * vflush-ed. 380 */ 381 void 382 zfsctl_destroy(zfsvfs_t *zfsvfs) 383 { 384 sfs_destroy_node(zfsvfs->z_ctldir->snapdir); 385 sfs_destroy_node((sfs_node_t *)zfsvfs->z_ctldir); 386 zfsvfs->z_ctldir = NULL; 387 } 388 389 static int 390 zfsctl_fs_root_vnode(struct mount *mp, void *arg __unused, int flags, 391 struct vnode **vpp) 392 { 393 return (VFS_ROOT(mp, flags, vpp)); 394 } 395 396 static void 397 zfsctl_common_vnode_setup(vnode_t *vp, void *arg) 398 { 399 ASSERT_VOP_ELOCKED(vp, __func__); 400 401 /* We support shared locking. */ 402 VN_LOCK_ASHARE(vp); 403 vp->v_type = VDIR; 404 vp->v_data = arg; 405 } 406 407 static int 408 zfsctl_root_vnode(struct mount *mp, void *arg __unused, int flags, 409 struct vnode **vpp) 410 { 411 void *node; 412 int err; 413 414 node = ((zfsvfs_t *)mp->mnt_data)->z_ctldir; 415 err = sfs_vgetx(mp, flags, 0, ZFSCTL_INO_ROOT, "zfs", &zfsctl_ops_root, 416 zfsctl_common_vnode_setup, node, vpp); 417 return (err); 418 } 419 420 static int 421 zfsctl_snapdir_vnode(struct mount *mp, void *arg __unused, int flags, 422 struct vnode **vpp) 423 { 424 void *node; 425 int err; 426 427 node = ((zfsvfs_t *)mp->mnt_data)->z_ctldir->snapdir; 428 err = sfs_vgetx(mp, flags, ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, "zfs", 429 &zfsctl_ops_snapdir, zfsctl_common_vnode_setup, node, vpp); 430 return (err); 431 } 432 433 /* 434 * Given a root znode, retrieve the associated .zfs directory. 435 * Add a hold to the vnode and return it. 436 */ 437 int 438 zfsctl_root(zfsvfs_t *zfsvfs, int flags, vnode_t **vpp) 439 { 440 int error; 441 442 error = zfsctl_root_vnode(zfsvfs->z_vfs, NULL, flags, vpp); 443 return (error); 444 } 445 446 /* 447 * Common open routine. Disallow any write access. 448 */ 449 static int 450 zfsctl_common_open(struct vop_open_args *ap) 451 { 452 int flags = ap->a_mode; 453 454 if (flags & FWRITE) 455 return (SET_ERROR(EACCES)); 456 457 return (0); 458 } 459 460 /* 461 * Common close routine. Nothing to do here. 462 */ 463 static int 464 zfsctl_common_close(struct vop_close_args *ap) 465 { 466 (void) ap; 467 return (0); 468 } 469 470 /* 471 * Common access routine. Disallow writes. 472 */ 473 static int 474 zfsctl_common_access(struct vop_access_args *ap) 475 { 476 accmode_t accmode = ap->a_accmode; 477 478 if (accmode & VWRITE) 479 return (SET_ERROR(EACCES)); 480 return (0); 481 } 482 483 /* 484 * Common getattr function. Fill in basic information. 485 */ 486 static void 487 zfsctl_common_getattr(vnode_t *vp, vattr_t *vap) 488 { 489 timestruc_t now; 490 sfs_node_t *node; 491 492 node = vp->v_data; 493 494 vap->va_uid = 0; 495 vap->va_gid = 0; 496 vap->va_rdev = 0; 497 /* 498 * We are a purely virtual object, so we have no 499 * blocksize or allocated blocks. 500 */ 501 vap->va_blksize = 0; 502 vap->va_nblocks = 0; 503 vap->va_gen = 0; 504 vn_fsid(vp, vap); 505 vap->va_mode = zfsctl_ctldir_mode; 506 vap->va_type = VDIR; 507 /* 508 * We live in the now (for atime). 509 */ 510 gethrestime(&now); 511 vap->va_atime = now; 512 /* FreeBSD: Reset chflags(2) flags. */ 513 vap->va_flags = 0; 514 515 vap->va_nodeid = node->sn_id; 516 517 /* At least '.' and '..'. */ 518 vap->va_nlink = 2; 519 } 520 521 #ifndef _OPENSOLARIS_SYS_VNODE_H_ 522 struct vop_fid_args { 523 struct vnode *a_vp; 524 struct fid *a_fid; 525 }; 526 #endif 527 528 static int 529 zfsctl_common_fid(struct vop_fid_args *ap) 530 { 531 vnode_t *vp = ap->a_vp; 532 fid_t *fidp = (void *)ap->a_fid; 533 sfs_node_t *node = vp->v_data; 534 uint64_t object = node->sn_id; 535 zfid_short_t *zfid; 536 int i; 537 538 zfid = (zfid_short_t *)fidp; 539 zfid->zf_len = SHORT_FID_LEN; 540 541 for (i = 0; i < sizeof (zfid->zf_object); i++) 542 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 543 544 /* .zfs nodes always have a generation number of 0 */ 545 for (i = 0; i < sizeof (zfid->zf_gen); i++) 546 zfid->zf_gen[i] = 0; 547 548 return (0); 549 } 550 551 #ifndef _SYS_SYSPROTO_H_ 552 struct vop_reclaim_args { 553 struct vnode *a_vp; 554 struct thread *a_td; 555 }; 556 #endif 557 558 static int 559 zfsctl_common_reclaim(struct vop_reclaim_args *ap) 560 { 561 vnode_t *vp = ap->a_vp; 562 563 (void) sfs_reclaim_vnode(vp); 564 return (0); 565 } 566 567 #ifndef _SYS_SYSPROTO_H_ 568 struct vop_print_args { 569 struct vnode *a_vp; 570 }; 571 #endif 572 573 static int 574 zfsctl_common_print(struct vop_print_args *ap) 575 { 576 sfs_print_node(ap->a_vp->v_data); 577 return (0); 578 } 579 580 #ifndef _SYS_SYSPROTO_H_ 581 struct vop_getattr_args { 582 struct vnode *a_vp; 583 struct vattr *a_vap; 584 struct ucred *a_cred; 585 }; 586 #endif 587 588 /* 589 * Get root directory attributes. 590 */ 591 static int 592 zfsctl_root_getattr(struct vop_getattr_args *ap) 593 { 594 struct vnode *vp = ap->a_vp; 595 struct vattr *vap = ap->a_vap; 596 zfsctl_root_t *node = vp->v_data; 597 598 zfsctl_common_getattr(vp, vap); 599 vap->va_ctime = node->cmtime; 600 vap->va_mtime = vap->va_ctime; 601 vap->va_birthtime = vap->va_ctime; 602 vap->va_nlink += 1; /* snapdir */ 603 vap->va_size = vap->va_nlink; 604 return (0); 605 } 606 607 /* 608 * When we lookup "." we still can be asked to lock it 609 * differently, can't we? 610 */ 611 static int 612 zfsctl_relock_dot(vnode_t *dvp, int ltype) 613 { 614 vref(dvp); 615 if (ltype != VOP_ISLOCKED(dvp)) { 616 if (ltype == LK_EXCLUSIVE) 617 vn_lock(dvp, LK_UPGRADE | LK_RETRY); 618 else /* if (ltype == LK_SHARED) */ 619 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 620 621 /* Relock for the "." case may left us with reclaimed vnode. */ 622 if (VN_IS_DOOMED(dvp)) { 623 vrele(dvp); 624 return (SET_ERROR(ENOENT)); 625 } 626 } 627 return (0); 628 } 629 630 /* 631 * Special case the handling of "..". 632 */ 633 static int 634 zfsctl_root_lookup(struct vop_lookup_args *ap) 635 { 636 struct componentname *cnp = ap->a_cnp; 637 vnode_t *dvp = ap->a_dvp; 638 vnode_t **vpp = ap->a_vpp; 639 int flags = ap->a_cnp->cn_flags; 640 int lkflags = ap->a_cnp->cn_lkflags; 641 int nameiop = ap->a_cnp->cn_nameiop; 642 int err; 643 644 ASSERT3S(dvp->v_type, ==, VDIR); 645 646 if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP) 647 return (SET_ERROR(ENOTSUP)); 648 649 if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') { 650 err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK); 651 if (err == 0) 652 *vpp = dvp; 653 } else if ((flags & ISDOTDOT) != 0) { 654 err = vn_vget_ino_gen(dvp, zfsctl_fs_root_vnode, NULL, 655 lkflags, vpp); 656 } else if (strncmp(cnp->cn_nameptr, "snapshot", cnp->cn_namelen) == 0) { 657 err = zfsctl_snapdir_vnode(dvp->v_mount, NULL, lkflags, vpp); 658 } else { 659 err = SET_ERROR(ENOENT); 660 } 661 if (err != 0) 662 *vpp = NULL; 663 return (err); 664 } 665 666 static int 667 zfsctl_root_readdir(struct vop_readdir_args *ap) 668 { 669 struct dirent entry; 670 vnode_t *vp = ap->a_vp; 671 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 672 zfsctl_root_t *node = vp->v_data; 673 zfs_uio_t uio; 674 int *eofp = ap->a_eofflag; 675 off_t dots_offset; 676 int error; 677 678 zfs_uio_init(&uio, ap->a_uio); 679 680 ASSERT3S(vp->v_type, ==, VDIR); 681 682 /* 683 * FIXME: this routine only ever emits 3 entries and does not tolerate 684 * being called with a buffer too small to handle all of them. 685 * 686 * The check below facilitates the idiom of repeating calls until the 687 * count to return is 0. 688 */ 689 if (zfs_uio_offset(&uio) == 3 * sizeof(entry)) { 690 return (0); 691 } 692 693 error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, &uio, 694 &dots_offset); 695 if (error != 0) { 696 if (error == ENAMETOOLONG) /* ran out of destination space */ 697 error = 0; 698 return (error); 699 } 700 if (zfs_uio_offset(&uio) != dots_offset) 701 return (SET_ERROR(EINVAL)); 702 703 _Static_assert(sizeof (node->snapdir->sn_name) <= sizeof (entry.d_name), 704 "node->snapdir->sn_name too big for entry.d_name"); 705 entry.d_fileno = node->snapdir->sn_id; 706 entry.d_type = DT_DIR; 707 strcpy(entry.d_name, node->snapdir->sn_name); 708 entry.d_namlen = strlen(entry.d_name); 709 entry.d_reclen = sizeof (entry); 710 error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio)); 711 if (error != 0) { 712 if (error == ENAMETOOLONG) 713 error = 0; 714 return (SET_ERROR(error)); 715 } 716 if (eofp != NULL) 717 *eofp = 1; 718 return (0); 719 } 720 721 static int 722 zfsctl_root_vptocnp(struct vop_vptocnp_args *ap) 723 { 724 static const char dotzfs_name[4] = ".zfs"; 725 vnode_t *dvp; 726 int error; 727 728 if (*ap->a_buflen < sizeof (dotzfs_name)) 729 return (SET_ERROR(ENOMEM)); 730 731 error = vn_vget_ino_gen(ap->a_vp, zfsctl_fs_root_vnode, NULL, 732 LK_SHARED, &dvp); 733 if (error != 0) 734 return (SET_ERROR(error)); 735 736 VOP_UNLOCK1(dvp); 737 *ap->a_vpp = dvp; 738 *ap->a_buflen -= sizeof (dotzfs_name); 739 memcpy(ap->a_buf + *ap->a_buflen, dotzfs_name, sizeof (dotzfs_name)); 740 return (0); 741 } 742 743 static int 744 zfsctl_common_pathconf(struct vop_pathconf_args *ap) 745 { 746 /* 747 * We care about ACL variables so that user land utilities like ls 748 * can display them correctly. Since the ctldir's st_dev is set to be 749 * the same as the parent dataset, we must support all variables that 750 * it supports. 751 */ 752 switch (ap->a_name) { 753 case _PC_LINK_MAX: 754 *ap->a_retval = MIN(LONG_MAX, ZFS_LINK_MAX); 755 return (0); 756 757 case _PC_FILESIZEBITS: 758 *ap->a_retval = 64; 759 return (0); 760 761 case _PC_MIN_HOLE_SIZE: 762 *ap->a_retval = (int)SPA_MINBLOCKSIZE; 763 return (0); 764 765 case _PC_ACL_EXTENDED: 766 *ap->a_retval = 0; 767 return (0); 768 769 case _PC_ACL_NFS4: 770 *ap->a_retval = 1; 771 return (0); 772 773 case _PC_ACL_PATH_MAX: 774 *ap->a_retval = ACL_MAX_ENTRIES; 775 return (0); 776 777 case _PC_NAME_MAX: 778 *ap->a_retval = NAME_MAX; 779 return (0); 780 781 default: 782 return (vop_stdpathconf(ap)); 783 } 784 } 785 786 /* 787 * Returns a trivial ACL 788 */ 789 static int 790 zfsctl_common_getacl(struct vop_getacl_args *ap) 791 { 792 int i; 793 794 if (ap->a_type != ACL_TYPE_NFS4) 795 return (EINVAL); 796 797 acl_nfs4_sync_acl_from_mode(ap->a_aclp, zfsctl_ctldir_mode, 0); 798 /* 799 * acl_nfs4_sync_acl_from_mode assumes that the owner can always modify 800 * attributes. That is not the case for the ctldir, so we must clear 801 * those bits. We also must clear ACL_READ_NAMED_ATTRS, because xattrs 802 * aren't supported by the ctldir. 803 */ 804 for (i = 0; i < ap->a_aclp->acl_cnt; i++) { 805 struct acl_entry *entry; 806 entry = &(ap->a_aclp->acl_entry[i]); 807 entry->ae_perm &= ~(ACL_WRITE_ACL | ACL_WRITE_OWNER | 808 ACL_WRITE_ATTRIBUTES | ACL_WRITE_NAMED_ATTRS | 809 ACL_READ_NAMED_ATTRS); 810 } 811 812 return (0); 813 } 814 815 static struct vop_vector zfsctl_ops_root = { 816 .vop_default = &default_vnodeops, 817 #if __FreeBSD_version >= 1300121 818 .vop_fplookup_vexec = VOP_EAGAIN, 819 #endif 820 #if __FreeBSD_version >= 1300139 821 .vop_fplookup_symlink = VOP_EAGAIN, 822 #endif 823 .vop_open = zfsctl_common_open, 824 .vop_close = zfsctl_common_close, 825 .vop_ioctl = VOP_EINVAL, 826 .vop_getattr = zfsctl_root_getattr, 827 .vop_access = zfsctl_common_access, 828 .vop_readdir = zfsctl_root_readdir, 829 .vop_lookup = zfsctl_root_lookup, 830 .vop_inactive = VOP_NULL, 831 .vop_reclaim = zfsctl_common_reclaim, 832 .vop_fid = zfsctl_common_fid, 833 .vop_print = zfsctl_common_print, 834 .vop_vptocnp = zfsctl_root_vptocnp, 835 .vop_pathconf = zfsctl_common_pathconf, 836 .vop_getacl = zfsctl_common_getacl, 837 #if __FreeBSD_version >= 1400043 838 .vop_add_writecount = vop_stdadd_writecount_nomsync, 839 #endif 840 }; 841 VFS_VOP_VECTOR_REGISTER(zfsctl_ops_root); 842 843 static int 844 zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname) 845 { 846 objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os; 847 848 dmu_objset_name(os, zname); 849 if (strlen(zname) + 1 + strlen(name) >= len) 850 return (SET_ERROR(ENAMETOOLONG)); 851 (void) strcat(zname, "@"); 852 (void) strcat(zname, name); 853 return (0); 854 } 855 856 static int 857 zfsctl_snapshot_lookup(vnode_t *vp, const char *name, uint64_t *id) 858 { 859 objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os; 860 int err; 861 862 err = dsl_dataset_snap_lookup(dmu_objset_ds(os), name, id); 863 return (err); 864 } 865 866 /* 867 * Given a vnode get a root vnode of a filesystem mounted on top of 868 * the vnode, if any. The root vnode is referenced and locked. 869 * If no filesystem is mounted then the orinal vnode remains referenced 870 * and locked. If any error happens the orinal vnode is unlocked and 871 * released. 872 */ 873 static int 874 zfsctl_mounted_here(vnode_t **vpp, int flags) 875 { 876 struct mount *mp; 877 int err; 878 879 ASSERT_VOP_LOCKED(*vpp, __func__); 880 ASSERT3S((*vpp)->v_type, ==, VDIR); 881 882 if ((mp = (*vpp)->v_mountedhere) != NULL) { 883 err = vfs_busy(mp, 0); 884 KASSERT(err == 0, ("vfs_busy(mp, 0) failed with %d", err)); 885 KASSERT(vrefcnt(*vpp) > 1, ("unreferenced mountpoint")); 886 vput(*vpp); 887 err = VFS_ROOT(mp, flags, vpp); 888 vfs_unbusy(mp); 889 return (err); 890 } 891 return (EJUSTRETURN); 892 } 893 894 typedef struct { 895 const char *snap_name; 896 uint64_t snap_id; 897 } snapshot_setup_arg_t; 898 899 static void 900 zfsctl_snapshot_vnode_setup(vnode_t *vp, void *arg) 901 { 902 snapshot_setup_arg_t *ssa = arg; 903 sfs_node_t *node; 904 905 ASSERT_VOP_ELOCKED(vp, __func__); 906 907 node = sfs_alloc_node(sizeof (sfs_node_t), 908 ssa->snap_name, ZFSCTL_INO_SNAPDIR, ssa->snap_id); 909 zfsctl_common_vnode_setup(vp, node); 910 911 /* We have to support recursive locking. */ 912 VN_LOCK_AREC(vp); 913 } 914 915 /* 916 * Lookup entry point for the 'snapshot' directory. Try to open the 917 * snapshot if it exist, creating the pseudo filesystem vnode as necessary. 918 * Perform a mount of the associated dataset on top of the vnode. 919 * There are four possibilities: 920 * - the snapshot node and vnode do not exist 921 * - the snapshot vnode is covered by the mounted snapshot 922 * - the snapshot vnode is not covered yet, the mount operation is in progress 923 * - the snapshot vnode is not covered, because the snapshot has been unmounted 924 * The last two states are transient and should be relatively short-lived. 925 */ 926 static int 927 zfsctl_snapdir_lookup(struct vop_lookup_args *ap) 928 { 929 vnode_t *dvp = ap->a_dvp; 930 vnode_t **vpp = ap->a_vpp; 931 struct componentname *cnp = ap->a_cnp; 932 char name[NAME_MAX + 1]; 933 char fullname[ZFS_MAX_DATASET_NAME_LEN]; 934 char *mountpoint; 935 size_t mountpoint_len; 936 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 937 uint64_t snap_id; 938 int nameiop = cnp->cn_nameiop; 939 int lkflags = cnp->cn_lkflags; 940 int flags = cnp->cn_flags; 941 int err; 942 943 ASSERT3S(dvp->v_type, ==, VDIR); 944 945 if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP) 946 return (SET_ERROR(ENOTSUP)); 947 948 if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') { 949 err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK); 950 if (err == 0) 951 *vpp = dvp; 952 return (err); 953 } 954 if (flags & ISDOTDOT) { 955 err = vn_vget_ino_gen(dvp, zfsctl_root_vnode, NULL, lkflags, 956 vpp); 957 return (err); 958 } 959 960 if (cnp->cn_namelen >= sizeof (name)) 961 return (SET_ERROR(ENAMETOOLONG)); 962 963 strlcpy(name, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1); 964 err = zfsctl_snapshot_lookup(dvp, name, &snap_id); 965 if (err != 0) 966 return (SET_ERROR(ENOENT)); 967 968 for (;;) { 969 snapshot_setup_arg_t ssa; 970 971 ssa.snap_name = name; 972 ssa.snap_id = snap_id; 973 err = sfs_vgetx(dvp->v_mount, LK_SHARED, ZFSCTL_INO_SNAPDIR, 974 snap_id, "zfs", &zfsctl_ops_snapshot, 975 zfsctl_snapshot_vnode_setup, &ssa, vpp); 976 if (err != 0) 977 return (err); 978 979 /* Check if a new vnode has just been created. */ 980 if (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE) 981 break; 982 983 /* 984 * Check if a snapshot is already mounted on top of the vnode. 985 */ 986 err = zfsctl_mounted_here(vpp, lkflags); 987 if (err != EJUSTRETURN) 988 return (err); 989 990 /* 991 * If the vnode is not covered, then either the mount operation 992 * is in progress or the snapshot has already been unmounted 993 * but the vnode hasn't been inactivated and reclaimed yet. 994 * We can try to re-use the vnode in the latter case. 995 */ 996 VI_LOCK(*vpp); 997 if (((*vpp)->v_iflag & VI_MOUNT) == 0) { 998 VI_UNLOCK(*vpp); 999 /* 1000 * Upgrade to exclusive lock in order to: 1001 * - avoid race conditions 1002 * - satisfy the contract of mount_snapshot() 1003 */ 1004 err = VOP_LOCK(*vpp, LK_TRYUPGRADE); 1005 if (err == 0) 1006 break; 1007 } else { 1008 VI_UNLOCK(*vpp); 1009 } 1010 1011 /* 1012 * In this state we can loop on uncontested locks and starve 1013 * the thread doing the lengthy, non-trivial mount operation. 1014 * So, yield to prevent that from happening. 1015 */ 1016 vput(*vpp); 1017 kern_yield(PRI_USER); 1018 } 1019 1020 VERIFY0(zfsctl_snapshot_zname(dvp, name, sizeof (fullname), fullname)); 1021 1022 mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) + 1023 strlen("/" ZFS_CTLDIR_NAME "/snapshot/") + strlen(name) + 1; 1024 mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP); 1025 (void) snprintf(mountpoint, mountpoint_len, 1026 "%s/" ZFS_CTLDIR_NAME "/snapshot/%s", 1027 dvp->v_vfsp->mnt_stat.f_mntonname, name); 1028 1029 err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0); 1030 kmem_free(mountpoint, mountpoint_len); 1031 if (err == 0) { 1032 /* 1033 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>. 1034 * 1035 * This is where we lie about our v_vfsp in order to 1036 * make .zfs/snapshot/<snapname> accessible over NFS 1037 * without requiring manual mounts of <snapname>. 1038 */ 1039 ASSERT3P(VTOZ(*vpp)->z_zfsvfs, !=, zfsvfs); 1040 VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs; 1041 1042 /* Clear the root flag (set via VFS_ROOT) as well. */ 1043 (*vpp)->v_vflag &= ~VV_ROOT; 1044 } 1045 1046 if (err != 0) 1047 *vpp = NULL; 1048 return (err); 1049 } 1050 1051 static int 1052 zfsctl_snapdir_readdir(struct vop_readdir_args *ap) 1053 { 1054 char snapname[ZFS_MAX_DATASET_NAME_LEN]; 1055 struct dirent entry; 1056 vnode_t *vp = ap->a_vp; 1057 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 1058 zfs_uio_t uio; 1059 int *eofp = ap->a_eofflag; 1060 off_t dots_offset; 1061 int error; 1062 1063 zfs_uio_init(&uio, ap->a_uio); 1064 1065 ASSERT3S(vp->v_type, ==, VDIR); 1066 1067 error = sfs_readdir_common(ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, ap, 1068 &uio, &dots_offset); 1069 if (error != 0) { 1070 if (error == ENAMETOOLONG) /* ran out of destination space */ 1071 error = 0; 1072 return (error); 1073 } 1074 1075 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1076 return (error); 1077 for (;;) { 1078 uint64_t cookie; 1079 uint64_t id; 1080 1081 cookie = zfs_uio_offset(&uio) - dots_offset; 1082 1083 dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG); 1084 error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof (snapname), 1085 snapname, &id, &cookie, NULL); 1086 dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG); 1087 if (error != 0) { 1088 if (error == ENOENT) { 1089 if (eofp != NULL) 1090 *eofp = 1; 1091 error = 0; 1092 } 1093 zfs_exit(zfsvfs, FTAG); 1094 return (error); 1095 } 1096 1097 entry.d_fileno = id; 1098 entry.d_type = DT_DIR; 1099 strcpy(entry.d_name, snapname); 1100 entry.d_namlen = strlen(entry.d_name); 1101 entry.d_reclen = sizeof (entry); 1102 error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio)); 1103 if (error != 0) { 1104 if (error == ENAMETOOLONG) 1105 error = 0; 1106 zfs_exit(zfsvfs, FTAG); 1107 return (SET_ERROR(error)); 1108 } 1109 zfs_uio_setoffset(&uio, cookie + dots_offset); 1110 } 1111 __builtin_unreachable(); 1112 } 1113 1114 static int 1115 zfsctl_snapdir_getattr(struct vop_getattr_args *ap) 1116 { 1117 vnode_t *vp = ap->a_vp; 1118 vattr_t *vap = ap->a_vap; 1119 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 1120 dsl_dataset_t *ds; 1121 uint64_t snap_count; 1122 int err; 1123 1124 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1125 return (err); 1126 ds = dmu_objset_ds(zfsvfs->z_os); 1127 zfsctl_common_getattr(vp, vap); 1128 vap->va_ctime = dmu_objset_snap_cmtime(zfsvfs->z_os); 1129 vap->va_mtime = vap->va_ctime; 1130 vap->va_birthtime = vap->va_ctime; 1131 if (dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0) { 1132 err = zap_count(dmu_objset_pool(ds->ds_objset)->dp_meta_objset, 1133 dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count); 1134 if (err != 0) { 1135 zfs_exit(zfsvfs, FTAG); 1136 return (err); 1137 } 1138 vap->va_nlink += snap_count; 1139 } 1140 vap->va_size = vap->va_nlink; 1141 1142 zfs_exit(zfsvfs, FTAG); 1143 return (0); 1144 } 1145 1146 static struct vop_vector zfsctl_ops_snapdir = { 1147 .vop_default = &default_vnodeops, 1148 #if __FreeBSD_version >= 1300121 1149 .vop_fplookup_vexec = VOP_EAGAIN, 1150 #endif 1151 #if __FreeBSD_version >= 1300139 1152 .vop_fplookup_symlink = VOP_EAGAIN, 1153 #endif 1154 .vop_open = zfsctl_common_open, 1155 .vop_close = zfsctl_common_close, 1156 .vop_getattr = zfsctl_snapdir_getattr, 1157 .vop_access = zfsctl_common_access, 1158 .vop_readdir = zfsctl_snapdir_readdir, 1159 .vop_lookup = zfsctl_snapdir_lookup, 1160 .vop_reclaim = zfsctl_common_reclaim, 1161 .vop_fid = zfsctl_common_fid, 1162 .vop_print = zfsctl_common_print, 1163 .vop_pathconf = zfsctl_common_pathconf, 1164 .vop_getacl = zfsctl_common_getacl, 1165 #if __FreeBSD_version >= 1400043 1166 .vop_add_writecount = vop_stdadd_writecount_nomsync, 1167 #endif 1168 }; 1169 VFS_VOP_VECTOR_REGISTER(zfsctl_ops_snapdir); 1170 1171 1172 static int 1173 zfsctl_snapshot_inactive(struct vop_inactive_args *ap) 1174 { 1175 vnode_t *vp = ap->a_vp; 1176 1177 vrecycle(vp); 1178 return (0); 1179 } 1180 1181 static int 1182 zfsctl_snapshot_reclaim(struct vop_reclaim_args *ap) 1183 { 1184 vnode_t *vp = ap->a_vp; 1185 void *data = vp->v_data; 1186 1187 sfs_reclaim_vnode(vp); 1188 sfs_destroy_node(data); 1189 return (0); 1190 } 1191 1192 static int 1193 zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap) 1194 { 1195 struct mount *mp; 1196 vnode_t *dvp; 1197 vnode_t *vp; 1198 sfs_node_t *node; 1199 size_t len; 1200 int locked; 1201 int error; 1202 1203 vp = ap->a_vp; 1204 node = vp->v_data; 1205 len = strlen(node->sn_name); 1206 if (*ap->a_buflen < len) 1207 return (SET_ERROR(ENOMEM)); 1208 1209 /* 1210 * Prevent unmounting of the snapshot while the vnode lock 1211 * is not held. That is not strictly required, but allows 1212 * us to assert that an uncovered snapshot vnode is never 1213 * "leaked". 1214 */ 1215 mp = vp->v_mountedhere; 1216 if (mp == NULL) 1217 return (SET_ERROR(ENOENT)); 1218 error = vfs_busy(mp, 0); 1219 KASSERT(error == 0, ("vfs_busy(mp, 0) failed with %d", error)); 1220 1221 /* 1222 * We can vput the vnode as we can now depend on the reference owned 1223 * by the busied mp. But we also need to hold the vnode, because 1224 * the reference may go after vfs_unbusy() which has to be called 1225 * before we can lock the vnode again. 1226 */ 1227 locked = VOP_ISLOCKED(vp); 1228 #if __FreeBSD_version >= 1300045 1229 enum vgetstate vs = vget_prep(vp); 1230 #else 1231 vhold(vp); 1232 #endif 1233 vput(vp); 1234 1235 /* Look up .zfs/snapshot, our parent. */ 1236 error = zfsctl_snapdir_vnode(vp->v_mount, NULL, LK_SHARED, &dvp); 1237 if (error == 0) { 1238 VOP_UNLOCK1(dvp); 1239 *ap->a_vpp = dvp; 1240 *ap->a_buflen -= len; 1241 memcpy(ap->a_buf + *ap->a_buflen, node->sn_name, len); 1242 } 1243 vfs_unbusy(mp); 1244 #if __FreeBSD_version >= 1300045 1245 vget_finish(vp, locked | LK_RETRY, vs); 1246 #else 1247 vget(vp, locked | LK_VNHELD | LK_RETRY, curthread); 1248 #endif 1249 return (error); 1250 } 1251 1252 /* 1253 * These VP's should never see the light of day. They should always 1254 * be covered. 1255 */ 1256 static struct vop_vector zfsctl_ops_snapshot = { 1257 .vop_default = NULL, /* ensure very restricted access */ 1258 #if __FreeBSD_version >= 1300121 1259 .vop_fplookup_vexec = VOP_EAGAIN, 1260 #endif 1261 #if __FreeBSD_version >= 1300139 1262 .vop_fplookup_symlink = VOP_EAGAIN, 1263 #endif 1264 .vop_open = zfsctl_common_open, 1265 .vop_close = zfsctl_common_close, 1266 .vop_inactive = zfsctl_snapshot_inactive, 1267 #if __FreeBSD_version >= 1300045 1268 .vop_need_inactive = vop_stdneed_inactive, 1269 #endif 1270 .vop_reclaim = zfsctl_snapshot_reclaim, 1271 .vop_vptocnp = zfsctl_snapshot_vptocnp, 1272 .vop_lock1 = vop_stdlock, 1273 .vop_unlock = vop_stdunlock, 1274 .vop_islocked = vop_stdislocked, 1275 .vop_advlockpurge = vop_stdadvlockpurge, /* called by vgone */ 1276 .vop_print = zfsctl_common_print, 1277 #if __FreeBSD_version >= 1400043 1278 .vop_add_writecount = vop_stdadd_writecount_nomsync, 1279 #endif 1280 }; 1281 VFS_VOP_VECTOR_REGISTER(zfsctl_ops_snapshot); 1282 1283 int 1284 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) 1285 { 1286 zfsvfs_t *zfsvfs __unused = vfsp->vfs_data; 1287 vnode_t *vp; 1288 int error; 1289 1290 ASSERT3P(zfsvfs->z_ctldir, !=, NULL); 1291 *zfsvfsp = NULL; 1292 error = sfs_vnode_get(vfsp, LK_EXCLUSIVE, 1293 ZFSCTL_INO_SNAPDIR, objsetid, &vp); 1294 if (error == 0 && vp != NULL) { 1295 /* 1296 * XXX Probably need to at least reference, if not busy, the mp. 1297 */ 1298 if (vp->v_mountedhere != NULL) 1299 *zfsvfsp = vp->v_mountedhere->mnt_data; 1300 vput(vp); 1301 } 1302 if (*zfsvfsp == NULL) 1303 return (SET_ERROR(EINVAL)); 1304 return (0); 1305 } 1306 1307 /* 1308 * Unmount any snapshots for the given filesystem. This is called from 1309 * zfs_umount() - if we have a ctldir, then go through and unmount all the 1310 * snapshots. 1311 */ 1312 int 1313 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr) 1314 { 1315 char snapname[ZFS_MAX_DATASET_NAME_LEN]; 1316 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1317 struct mount *mp; 1318 vnode_t *vp; 1319 uint64_t cookie; 1320 int error; 1321 1322 ASSERT3P(zfsvfs->z_ctldir, !=, NULL); 1323 1324 cookie = 0; 1325 for (;;) { 1326 uint64_t id; 1327 1328 dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG); 1329 error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof (snapname), 1330 snapname, &id, &cookie, NULL); 1331 dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG); 1332 if (error != 0) { 1333 if (error == ENOENT) 1334 error = 0; 1335 break; 1336 } 1337 1338 for (;;) { 1339 error = sfs_vnode_get(vfsp, LK_EXCLUSIVE, 1340 ZFSCTL_INO_SNAPDIR, id, &vp); 1341 if (error != 0 || vp == NULL) 1342 break; 1343 1344 mp = vp->v_mountedhere; 1345 1346 /* 1347 * v_mountedhere being NULL means that the 1348 * (uncovered) vnode is in a transient state 1349 * (mounting or unmounting), so loop until it 1350 * settles down. 1351 */ 1352 if (mp != NULL) 1353 break; 1354 vput(vp); 1355 } 1356 if (error != 0) 1357 break; 1358 if (vp == NULL) 1359 continue; /* no mountpoint, nothing to do */ 1360 1361 /* 1362 * The mount-point vnode is kept locked to avoid spurious EBUSY 1363 * from a concurrent umount. 1364 * The vnode lock must have recursive locking enabled. 1365 */ 1366 vfs_ref(mp); 1367 error = dounmount(mp, fflags, curthread); 1368 KASSERT_IMPLY(error == 0, vrefcnt(vp) == 1, 1369 ("extra references after unmount")); 1370 vput(vp); 1371 if (error != 0) 1372 break; 1373 } 1374 KASSERT_IMPLY((fflags & MS_FORCE) != 0, error == 0, 1375 ("force unmounting failed")); 1376 return (error); 1377 } 1378 1379 int 1380 zfsctl_snapshot_unmount(const char *snapname, int flags __unused) 1381 { 1382 vfs_t *vfsp = NULL; 1383 zfsvfs_t *zfsvfs = NULL; 1384 1385 if (strchr(snapname, '@') == NULL) 1386 return (0); 1387 1388 int err = getzfsvfs(snapname, &zfsvfs); 1389 if (err != 0) { 1390 ASSERT3P(zfsvfs, ==, NULL); 1391 return (0); 1392 } 1393 vfsp = zfsvfs->z_vfs; 1394 1395 ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os))); 1396 1397 vfs_ref(vfsp); 1398 vfs_unbusy(vfsp); 1399 return (dounmount(vfsp, MS_FORCE, curthread)); 1400 } 1401