1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
25  */
26 
27 /*
28  * ZFS control directory (a.k.a. ".zfs")
29  *
30  * This directory provides a common location for all ZFS meta-objects.
31  * Currently, this is only the 'snapshot' directory, but this may expand in the
32  * future.  The elements are built using the GFS primitives, as the hierarchy
33  * does not actually exist on disk.
34  *
35  * For 'snapshot', we don't want to have all snapshots always mounted, because
36  * this would take up a huge amount of space in /etc/mnttab.  We have three
37  * types of objects:
38  *
39  * 	ctldir ------> snapshotdir -------> snapshot
40  *                                             |
41  *                                             |
42  *                                             V
43  *                                         mounted fs
44  *
45  * The 'snapshot' node contains just enough information to lookup '..' and act
46  * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
47  * perform an automount of the underlying filesystem and return the
48  * corresponding vnode.
49  *
50  * All mounts are handled automatically by the kernel, but unmounts are
51  * (currently) handled from user land.  The main reason is that there is no
52  * reliable way to auto-unmount the filesystem when it's "no longer in use".
53  * When the user unmounts a filesystem, we call zfsctl_unmount(), which
54  * unmounts any snapshots within the snapshot directory.
55  *
56  * The '.zfs', '.zfs/snapshot', and all directories created under
57  * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and
58  * share the same vfs_t as the head filesystem (what '.zfs' lives under).
59  *
60  * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>'
61  * (ie: snapshots) are ZFS nodes and have their own unique vfs_t.
62  * However, vnodes within these mounted on file systems have their v_vfsp
63  * fields set to the head filesystem to make NFS happy (see
64  * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t
65  * so that it cannot be freed until all snapshots have been unmounted.
66  */
67 
68 #ifdef __FreeBSD__
69 
70 #include <sys/zfs_context.h>
71 #include <sys/zfs_ctldir.h>
72 #include <sys/zfs_ioctl.h>
73 #include <sys/zfs_vfsops.h>
74 #include <sys/namei.h>
75 #include <sys/stat.h>
76 #include <sys/dmu.h>
77 #include <sys/dsl_dataset.h>
78 #include <sys/dsl_destroy.h>
79 #include <sys/dsl_deleg.h>
80 #include <sys/mount.h>
81 #include <sys/zap.h>
82 
83 #include "zfs_namecheck.h"
84 
85 /*
86  * "Synthetic" filesystem implementation.
87  */
88 
89 /*
90  * Assert that A implies B.
91  */
92 #define KASSERT_IMPLY(A, B, msg)	KASSERT(!(A) || (B), (msg));
93 
94 static MALLOC_DEFINE(M_SFSNODES, "sfs_nodes", "synthetic-fs nodes");
95 
96 typedef struct sfs_node {
97 	char		sn_name[ZFS_MAX_DATASET_NAME_LEN];
98 	uint64_t	sn_parent_id;
99 	uint64_t	sn_id;
100 } sfs_node_t;
101 
102 /*
103  * Check the parent's ID as well as the node's to account for a chance
104  * that IDs originating from different domains (snapshot IDs, artifical
105  * IDs, znode IDs) may clash.
106  */
107 static int
sfs_compare_ids(struct vnode * vp,void * arg)108 sfs_compare_ids(struct vnode *vp, void *arg)
109 {
110 	sfs_node_t *n1 = vp->v_data;
111 	sfs_node_t *n2 = arg;
112 	bool equal;
113 
114 	equal = n1->sn_id == n2->sn_id &&
115 	    n1->sn_parent_id == n2->sn_parent_id;
116 
117 	/* Zero means equality. */
118 	return (!equal);
119 }
120 
121 static int
sfs_vnode_get(const struct mount * mp,int flags,uint64_t parent_id,uint64_t id,struct vnode ** vpp)122 sfs_vnode_get(const struct mount *mp, int flags, uint64_t parent_id,
123    uint64_t id, struct vnode **vpp)
124 {
125 	sfs_node_t search;
126 	int err;
127 
128 	search.sn_id = id;
129 	search.sn_parent_id = parent_id;
130 	err = vfs_hash_get(mp, (u_int)id, flags, curthread, vpp,
131 	    sfs_compare_ids, &search);
132 	return (err);
133 }
134 
135 static int
sfs_vnode_insert(struct vnode * vp,int flags,uint64_t parent_id,uint64_t id,struct vnode ** vpp)136 sfs_vnode_insert(struct vnode *vp, int flags, uint64_t parent_id,
137    uint64_t id, struct vnode **vpp)
138 {
139 	int err;
140 
141 	KASSERT(vp->v_data != NULL, ("sfs_vnode_insert with NULL v_data"));
142 	err = vfs_hash_insert(vp, (u_int)id, flags, curthread, vpp,
143 	    sfs_compare_ids, vp->v_data);
144 	return (err);
145 }
146 
147 static void
sfs_vnode_remove(struct vnode * vp)148 sfs_vnode_remove(struct vnode *vp)
149 {
150 	vfs_hash_remove(vp);
151 }
152 
153 typedef void sfs_vnode_setup_fn(vnode_t *vp, void *arg);
154 
155 static int
sfs_vgetx(struct mount * mp,int flags,uint64_t parent_id,uint64_t id,const char * tag,struct vop_vector * vops,sfs_vnode_setup_fn setup,void * arg,struct vnode ** vpp)156 sfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id,
157     const char *tag, struct vop_vector *vops,
158     sfs_vnode_setup_fn setup, void *arg,
159     struct vnode **vpp)
160 {
161 	struct vnode *vp;
162 	int error;
163 
164 	error = sfs_vnode_get(mp, flags, parent_id, id, vpp);
165 	if (error != 0 || *vpp != NULL) {
166 		KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL,
167 		    "sfs vnode with no data");
168 		return (error);
169 	}
170 
171 	/* Allocate a new vnode/inode. */
172 	error = getnewvnode(tag, mp, vops, &vp);
173 	if (error != 0) {
174 		*vpp = NULL;
175 		return (error);
176 	}
177 
178 	/*
179 	 * Exclusively lock the vnode vnode while it's being constructed.
180 	 */
181 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
182 	error = insmntque(vp, mp);
183 	if (error != 0) {
184 		*vpp = NULL;
185 		return (error);
186 	}
187 
188 	setup(vp, arg);
189 
190 	error = sfs_vnode_insert(vp, flags, parent_id, id, vpp);
191 	if (error != 0 || *vpp != NULL) {
192 		KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL,
193 		    "sfs vnode with no data");
194 		return (error);
195 	}
196 
197 	*vpp = vp;
198 	return (0);
199 }
200 
201 static void
sfs_print_node(sfs_node_t * node)202 sfs_print_node(sfs_node_t *node)
203 {
204 	printf("\tname = %s\n", node->sn_name);
205 	printf("\tparent_id = %ju\n", (uintmax_t)node->sn_parent_id);
206 	printf("\tid = %ju\n", (uintmax_t)node->sn_id);
207 }
208 
209 static sfs_node_t *
sfs_alloc_node(size_t size,const char * name,uint64_t parent_id,uint64_t id)210 sfs_alloc_node(size_t size, const char *name, uint64_t parent_id, uint64_t id)
211 {
212 	struct sfs_node *node;
213 
214 	KASSERT(strlen(name) < sizeof(node->sn_name),
215 	    ("sfs node name is too long"));
216 	KASSERT(size >= sizeof(*node), ("sfs node size is too small"));
217 	node = malloc(size, M_SFSNODES, M_WAITOK | M_ZERO);
218 	strlcpy(node->sn_name, name, sizeof(node->sn_name));
219 	node->sn_parent_id = parent_id;
220 	node->sn_id = id;
221 
222 	return (node);
223 }
224 
225 static void
sfs_destroy_node(sfs_node_t * node)226 sfs_destroy_node(sfs_node_t *node)
227 {
228 	free(node, M_SFSNODES);
229 }
230 
231 static void *
sfs_reclaim_vnode(vnode_t * vp)232 sfs_reclaim_vnode(vnode_t *vp)
233 {
234 	sfs_node_t *node;
235 	void *data;
236 
237 	sfs_vnode_remove(vp);
238 	data = vp->v_data;
239 	vp->v_data = NULL;
240 	return (data);
241 }
242 
243 static int
sfs_readdir_common(uint64_t parent_id,uint64_t id,struct vop_readdir_args * ap,uio_t * uio,off_t * offp)244 sfs_readdir_common(uint64_t parent_id, uint64_t id, struct vop_readdir_args *ap,
245     uio_t *uio, off_t *offp)
246 {
247 	struct dirent entry;
248 	int error;
249 
250 	/* Reset ncookies for subsequent use of vfs_read_dirent. */
251 	if (ap->a_ncookies != NULL)
252 		*ap->a_ncookies = 0;
253 
254 	if (uio->uio_resid < sizeof(entry))
255 		return (SET_ERROR(EINVAL));
256 
257 	if (uio->uio_offset < 0)
258 		return (SET_ERROR(EINVAL));
259 	if (uio->uio_offset == 0) {
260 		entry.d_fileno = id;
261 		entry.d_type = DT_DIR;
262 		entry.d_name[0] = '.';
263 		entry.d_name[1] = '\0';
264 		entry.d_namlen = 1;
265 		entry.d_reclen = sizeof(entry);
266 		error = vfs_read_dirent(ap, &entry, uio->uio_offset);
267 		if (error != 0)
268 			return (SET_ERROR(error));
269 	}
270 
271 	if (uio->uio_offset < sizeof(entry))
272 		return (SET_ERROR(EINVAL));
273 	if (uio->uio_offset == sizeof(entry)) {
274 		entry.d_fileno = parent_id;
275 		entry.d_type = DT_DIR;
276 		entry.d_name[0] = '.';
277 		entry.d_name[1] = '.';
278 		entry.d_name[2] = '\0';
279 		entry.d_namlen = 2;
280 		entry.d_reclen = sizeof(entry);
281 		error = vfs_read_dirent(ap, &entry, uio->uio_offset);
282 		if (error != 0)
283 			return (SET_ERROR(error));
284 	}
285 
286 	if (offp != NULL)
287 		*offp = 2 * sizeof(entry);
288 	return (0);
289 }
290 
291 
292 /*
293  * .zfs inode namespace
294  *
295  * We need to generate unique inode numbers for all files and directories
296  * within the .zfs pseudo-filesystem.  We use the following scheme:
297  *
298  * 	ENTRY			ZFSCTL_INODE
299  * 	.zfs			1
300  * 	.zfs/snapshot		2
301  * 	.zfs/snapshot/<snap>	objectid(snap)
302  */
303 #define	ZFSCTL_INO_SNAP(id)	(id)
304 
305 static struct vop_vector zfsctl_ops_root;
306 static struct vop_vector zfsctl_ops_snapdir;
307 static struct vop_vector zfsctl_ops_snapshot;
308 static struct vop_vector zfsctl_ops_shares_dir;
309 
310 void
zfsctl_init(void)311 zfsctl_init(void)
312 {
313 }
314 
315 void
zfsctl_fini(void)316 zfsctl_fini(void)
317 {
318 }
319 
320 boolean_t
zfsctl_is_node(vnode_t * vp)321 zfsctl_is_node(vnode_t *vp)
322 {
323 	return (vn_matchops(vp, zfsctl_ops_root) ||
324 	    vn_matchops(vp, zfsctl_ops_snapdir) ||
325 	    vn_matchops(vp, zfsctl_ops_snapshot) ||
326 	    vn_matchops(vp, zfsctl_ops_shares_dir));
327 
328 }
329 
330 typedef struct zfsctl_root {
331 	sfs_node_t	node;
332 	sfs_node_t	*snapdir;
333 	timestruc_t	cmtime;
334 } zfsctl_root_t;
335 
336 
337 /*
338  * Create the '.zfs' directory.
339  */
340 void
zfsctl_create(zfsvfs_t * zfsvfs)341 zfsctl_create(zfsvfs_t *zfsvfs)
342 {
343 	zfsctl_root_t *dot_zfs;
344 	sfs_node_t *snapdir;
345 	vnode_t *rvp;
346 	uint64_t crtime[2];
347 
348 	ASSERT(zfsvfs->z_ctldir == NULL);
349 
350 	snapdir = sfs_alloc_node(sizeof(*snapdir), "snapshot", ZFSCTL_INO_ROOT,
351 	    ZFSCTL_INO_SNAPDIR);
352 	dot_zfs = (zfsctl_root_t *)sfs_alloc_node(sizeof(*dot_zfs), ".zfs", 0,
353 	    ZFSCTL_INO_ROOT);
354 	dot_zfs->snapdir = snapdir;
355 
356 	VERIFY(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp) == 0);
357 	VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
358 	    &crtime, sizeof(crtime)));
359 	ZFS_TIME_DECODE(&dot_zfs->cmtime, crtime);
360 	vput(rvp);
361 
362 	zfsvfs->z_ctldir = dot_zfs;
363 }
364 
365 /*
366  * Destroy the '.zfs' directory.  Only called when the filesystem is unmounted.
367  * The nodes must not have any associated vnodes by now as they should be
368  * vflush-ed.
369  */
370 void
zfsctl_destroy(zfsvfs_t * zfsvfs)371 zfsctl_destroy(zfsvfs_t *zfsvfs)
372 {
373 	sfs_destroy_node(zfsvfs->z_ctldir->snapdir);
374 	sfs_destroy_node((sfs_node_t *)zfsvfs->z_ctldir);
375 	zfsvfs->z_ctldir = NULL;
376 }
377 
378 static int
zfsctl_fs_root_vnode(struct mount * mp,void * arg __unused,int flags,struct vnode ** vpp)379 zfsctl_fs_root_vnode(struct mount *mp, void *arg __unused, int flags,
380     struct vnode **vpp)
381 {
382 	return (VFS_ROOT(mp, flags, vpp));
383 }
384 
385 static void
zfsctl_common_vnode_setup(vnode_t * vp,void * arg)386 zfsctl_common_vnode_setup(vnode_t *vp, void *arg)
387 {
388 	ASSERT_VOP_ELOCKED(vp, __func__);
389 
390 	/* We support shared locking. */
391 	VN_LOCK_ASHARE(vp);
392 	vp->v_type = VDIR;
393 	vp->v_data = arg;
394 }
395 
396 static int
zfsctl_root_vnode(struct mount * mp,void * arg __unused,int flags,struct vnode ** vpp)397 zfsctl_root_vnode(struct mount *mp, void *arg __unused, int flags,
398     struct vnode **vpp)
399 {
400 	void *node;
401 	int err;
402 
403 	node = ((zfsvfs_t*)mp->mnt_data)->z_ctldir;
404 	err = sfs_vgetx(mp, flags, 0, ZFSCTL_INO_ROOT, "zfs", &zfsctl_ops_root,
405 	    zfsctl_common_vnode_setup, node, vpp);
406 	return (err);
407 }
408 
409 static int
zfsctl_snapdir_vnode(struct mount * mp,void * arg __unused,int flags,struct vnode ** vpp)410 zfsctl_snapdir_vnode(struct mount *mp, void *arg __unused, int flags,
411     struct vnode **vpp)
412 {
413 	void *node;
414 	int err;
415 
416 	node = ((zfsvfs_t*)mp->mnt_data)->z_ctldir->snapdir;
417 	err = sfs_vgetx(mp, flags, ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, "zfs",
418 	   &zfsctl_ops_snapdir, zfsctl_common_vnode_setup, node, vpp);
419 	return (err);
420 }
421 
422 /*
423  * Given a root znode, retrieve the associated .zfs directory.
424  * Add a hold to the vnode and return it.
425  */
426 int
zfsctl_root(zfsvfs_t * zfsvfs,int flags,vnode_t ** vpp)427 zfsctl_root(zfsvfs_t *zfsvfs, int flags, vnode_t **vpp)
428 {
429 	vnode_t *vp;
430 	int error;
431 
432 	error = zfsctl_root_vnode(zfsvfs->z_vfs, NULL, flags, vpp);
433 	return (error);
434 }
435 
436 /*
437  * Common open routine.  Disallow any write access.
438  */
439 /* ARGSUSED */
440 static int
zfsctl_common_open(struct vop_open_args * ap)441 zfsctl_common_open(struct vop_open_args *ap)
442 {
443 	int flags = ap->a_mode;
444 
445 	if (flags & FWRITE)
446 		return (SET_ERROR(EACCES));
447 
448 	return (0);
449 }
450 
451 /*
452  * Common close routine.  Nothing to do here.
453  */
454 /* ARGSUSED */
455 static int
zfsctl_common_close(struct vop_close_args * ap)456 zfsctl_common_close(struct vop_close_args *ap)
457 {
458 	return (0);
459 }
460 
461 /*
462  * Common access routine.  Disallow writes.
463  */
464 /* ARGSUSED */
465 static int
zfsctl_common_access(ap)466 zfsctl_common_access(ap)
467 	struct vop_access_args /* {
468 		struct vnode *a_vp;
469 		accmode_t a_accmode;
470 		struct ucred *a_cred;
471 		struct thread *a_td;
472 	} */ *ap;
473 {
474 	accmode_t accmode = ap->a_accmode;
475 
476 	if (accmode & VWRITE)
477 		return (SET_ERROR(EACCES));
478 	return (0);
479 }
480 
481 /*
482  * Common getattr function.  Fill in basic information.
483  */
484 static void
zfsctl_common_getattr(vnode_t * vp,vattr_t * vap)485 zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
486 {
487 	timestruc_t	now;
488 	sfs_node_t *node;
489 
490 	node = vp->v_data;
491 
492 	vap->va_uid = 0;
493 	vap->va_gid = 0;
494 	vap->va_rdev = 0;
495 	/*
496 	 * We are a purely virtual object, so we have no
497 	 * blocksize or allocated blocks.
498 	 */
499 	vap->va_blksize = 0;
500 	vap->va_nblocks = 0;
501 	vap->va_seq = 0;
502 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
503 	vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
504 	    S_IROTH | S_IXOTH;
505 	vap->va_type = VDIR;
506 	/*
507 	 * We live in the now (for atime).
508 	 */
509 	gethrestime(&now);
510 	vap->va_atime = now;
511 	/* FreeBSD: Reset chflags(2) flags. */
512 	vap->va_flags = 0;
513 
514 	vap->va_nodeid = node->sn_id;
515 
516 	/* At least '.' and '..'. */
517 	vap->va_nlink = 2;
518 }
519 
520 /*ARGSUSED*/
521 static int
zfsctl_common_fid(ap)522 zfsctl_common_fid(ap)
523 	struct vop_fid_args /* {
524 		struct vnode *a_vp;
525 		struct fid *a_fid;
526 	} */ *ap;
527 {
528 	vnode_t		*vp = ap->a_vp;
529 	fid_t		*fidp = (void *)ap->a_fid;
530 	sfs_node_t	*node = vp->v_data;
531 	uint64_t	object = node->sn_id;
532 	zfid_short_t	*zfid;
533 	int		i;
534 
535 	zfid = (zfid_short_t *)fidp;
536 	zfid->zf_len = SHORT_FID_LEN;
537 
538 	for (i = 0; i < sizeof(zfid->zf_object); i++)
539 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
540 
541 	/* .zfs nodes always have a generation number of 0 */
542 	for (i = 0; i < sizeof(zfid->zf_gen); i++)
543 		zfid->zf_gen[i] = 0;
544 
545 	return (0);
546 }
547 
548 static int
zfsctl_common_reclaim(ap)549 zfsctl_common_reclaim(ap)
550 	struct vop_reclaim_args /* {
551 		struct vnode *a_vp;
552 		struct thread *a_td;
553 	} */ *ap;
554 {
555 	vnode_t *vp = ap->a_vp;
556 
557 	(void) sfs_reclaim_vnode(vp);
558 	return (0);
559 }
560 
561 static int
zfsctl_common_print(ap)562 zfsctl_common_print(ap)
563 	struct vop_print_args /* {
564 		struct vnode *a_vp;
565 	} */ *ap;
566 {
567 	sfs_print_node(ap->a_vp->v_data);
568 	return (0);
569 }
570 
571 /*
572  * Get root directory attributes.
573  */
574 /* ARGSUSED */
575 static int
zfsctl_root_getattr(ap)576 zfsctl_root_getattr(ap)
577 	struct vop_getattr_args /* {
578 		struct vnode *a_vp;
579 		struct vattr *a_vap;
580 		struct ucred *a_cred;
581 	} */ *ap;
582 {
583 	struct vnode *vp = ap->a_vp;
584 	struct vattr *vap = ap->a_vap;
585 	zfsctl_root_t *node = vp->v_data;
586 
587 	zfsctl_common_getattr(vp, vap);
588 	vap->va_ctime = node->cmtime;
589 	vap->va_mtime = vap->va_ctime;
590 	vap->va_birthtime = vap->va_ctime;
591 	vap->va_nlink += 1; /* snapdir */
592 	vap->va_size = vap->va_nlink;
593 	return (0);
594 }
595 
596 /*
597  * When we lookup "." we still can be asked to lock it
598  * differently, can't we?
599  */
600 int
zfsctl_relock_dot(vnode_t * dvp,int ltype)601 zfsctl_relock_dot(vnode_t *dvp, int ltype)
602 {
603 	vref(dvp);
604 	if (ltype != VOP_ISLOCKED(dvp)) {
605 		if (ltype == LK_EXCLUSIVE)
606 			vn_lock(dvp, LK_UPGRADE | LK_RETRY);
607 		else /* if (ltype == LK_SHARED) */
608 			vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
609 
610 		/* Relock for the "." case may left us with reclaimed vnode. */
611 		if ((dvp->v_iflag & VI_DOOMED) != 0) {
612 			vrele(dvp);
613 			return (SET_ERROR(ENOENT));
614 		}
615 	}
616 	return (0);
617 }
618 
619 /*
620  * Special case the handling of "..".
621  */
622 int
zfsctl_root_lookup(ap)623 zfsctl_root_lookup(ap)
624 	struct vop_lookup_args /* {
625 		struct vnode *a_dvp;
626 		struct vnode **a_vpp;
627 		struct componentname *a_cnp;
628 	} */ *ap;
629 {
630 	struct componentname *cnp = ap->a_cnp;
631 	vnode_t *dvp = ap->a_dvp;
632 	vnode_t **vpp = ap->a_vpp;
633 	cred_t *cr = ap->a_cnp->cn_cred;
634 	int flags = ap->a_cnp->cn_flags;
635 	int lkflags = ap->a_cnp->cn_lkflags;
636 	int nameiop = ap->a_cnp->cn_nameiop;
637 	int err;
638 	int ltype;
639 
640 	ASSERT(dvp->v_type == VDIR);
641 
642 	if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP)
643 		return (SET_ERROR(ENOTSUP));
644 
645 	if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') {
646 		err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK);
647 		if (err == 0)
648 			*vpp = dvp;
649 	} else if ((flags & ISDOTDOT) != 0) {
650 		err = vn_vget_ino_gen(dvp, zfsctl_fs_root_vnode, NULL,
651 		    lkflags, vpp);
652 	} else if (strncmp(cnp->cn_nameptr, "snapshot", cnp->cn_namelen) == 0) {
653 		err = zfsctl_snapdir_vnode(dvp->v_mount, NULL, lkflags, vpp);
654 	} else {
655 		err = SET_ERROR(ENOENT);
656 	}
657 	if (err != 0)
658 		*vpp = NULL;
659 	return (err);
660 }
661 
662 static int
zfsctl_root_readdir(ap)663 zfsctl_root_readdir(ap)
664 	struct vop_readdir_args /* {
665 		struct vnode *a_vp;
666 		struct uio *a_uio;
667 		struct ucred *a_cred;
668 		int *a_eofflag;
669 		int *ncookies;
670 		u_long **a_cookies;
671 	} */ *ap;
672 {
673 	struct dirent entry;
674 	vnode_t *vp = ap->a_vp;
675 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
676 	zfsctl_root_t *node = vp->v_data;
677 	uio_t *uio = ap->a_uio;
678 	int *eofp = ap->a_eofflag;
679 	off_t dots_offset;
680 	int error;
681 
682 	ASSERT(vp->v_type == VDIR);
683 
684 	error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, uio,
685 	    &dots_offset);
686 	if (error != 0) {
687 		if (error == ENAMETOOLONG) /* ran out of destination space */
688 			error = 0;
689 		return (error);
690 	}
691 	if (uio->uio_offset != dots_offset)
692 		return (SET_ERROR(EINVAL));
693 
694 	CTASSERT(sizeof(node->snapdir->sn_name) <= sizeof(entry.d_name));
695 	entry.d_fileno = node->snapdir->sn_id;
696 	entry.d_type = DT_DIR;
697 	strcpy(entry.d_name, node->snapdir->sn_name);
698 	entry.d_namlen = strlen(entry.d_name);
699 	entry.d_reclen = sizeof(entry);
700 	error = vfs_read_dirent(ap, &entry, uio->uio_offset);
701 	if (error != 0) {
702 		if (error == ENAMETOOLONG)
703 			error = 0;
704 		return (SET_ERROR(error));
705 	}
706 	if (eofp != NULL)
707 		*eofp = 1;
708 	return (0);
709 }
710 
711 static int
zfsctl_root_vptocnp(struct vop_vptocnp_args * ap)712 zfsctl_root_vptocnp(struct vop_vptocnp_args *ap)
713 {
714 	static const char dotzfs_name[4] = ".zfs";
715 	vnode_t *dvp;
716 	int error;
717 
718 	if (*ap->a_buflen < sizeof (dotzfs_name))
719 		return (SET_ERROR(ENOMEM));
720 
721 	error = vn_vget_ino_gen(ap->a_vp, zfsctl_fs_root_vnode, NULL,
722 	    LK_SHARED, &dvp);
723 	if (error != 0)
724 		return (SET_ERROR(error));
725 
726 	VOP_UNLOCK(dvp, 0);
727 	*ap->a_vpp = dvp;
728 	*ap->a_buflen -= sizeof (dotzfs_name);
729 	bcopy(dotzfs_name, ap->a_buf + *ap->a_buflen, sizeof (dotzfs_name));
730 	return (0);
731 }
732 
733 static struct vop_vector zfsctl_ops_root = {
734 	.vop_default =	&default_vnodeops,
735 	.vop_open =	zfsctl_common_open,
736 	.vop_close =	zfsctl_common_close,
737 	.vop_ioctl =	VOP_EINVAL,
738 	.vop_getattr =	zfsctl_root_getattr,
739 	.vop_access =	zfsctl_common_access,
740 	.vop_readdir =	zfsctl_root_readdir,
741 	.vop_lookup =	zfsctl_root_lookup,
742 	.vop_inactive =	VOP_NULL,
743 	.vop_reclaim =	zfsctl_common_reclaim,
744 	.vop_fid =	zfsctl_common_fid,
745 	.vop_print =	zfsctl_common_print,
746 	.vop_vptocnp =	zfsctl_root_vptocnp,
747 };
748 
749 static int
zfsctl_snapshot_zname(vnode_t * vp,const char * name,int len,char * zname)750 zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
751 {
752 	objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
753 
754 	dmu_objset_name(os, zname);
755 	if (strlen(zname) + 1 + strlen(name) >= len)
756 		return (SET_ERROR(ENAMETOOLONG));
757 	(void) strcat(zname, "@");
758 	(void) strcat(zname, name);
759 	return (0);
760 }
761 
762 static int
zfsctl_snapshot_lookup(vnode_t * vp,const char * name,uint64_t * id)763 zfsctl_snapshot_lookup(vnode_t *vp, const char *name, uint64_t *id)
764 {
765 	objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
766 	int err;
767 
768 	err = dsl_dataset_snap_lookup(dmu_objset_ds(os), name, id);
769 	return (err);
770 }
771 
772 /*
773  * Given a vnode get a root vnode of a filesystem mounted on top of
774  * the vnode, if any.  The root vnode is referenced and locked.
775  * If no filesystem is mounted then the orinal vnode remains referenced
776  * and locked.  If any error happens the orinal vnode is unlocked and
777  * released.
778  */
779 static int
zfsctl_mounted_here(vnode_t ** vpp,int flags)780 zfsctl_mounted_here(vnode_t **vpp, int flags)
781 {
782 	struct mount *mp;
783 	int err;
784 
785 	ASSERT_VOP_LOCKED(*vpp, __func__);
786 	ASSERT3S((*vpp)->v_type, ==, VDIR);
787 
788 	if ((mp = (*vpp)->v_mountedhere) != NULL) {
789 		err = vfs_busy(mp, 0);
790 		KASSERT(err == 0, ("vfs_busy(mp, 0) failed with %d", err));
791 		KASSERT(vrefcnt(*vpp) > 1, ("unreferenced mountpoint"));
792 		vput(*vpp);
793 		err = VFS_ROOT(mp, flags, vpp);
794 		vfs_unbusy(mp);
795 		return (err);
796 	}
797 	return (EJUSTRETURN);
798 }
799 
800 typedef struct {
801 	const char *snap_name;
802 	uint64_t    snap_id;
803 } snapshot_setup_arg_t;
804 
805 static void
zfsctl_snapshot_vnode_setup(vnode_t * vp,void * arg)806 zfsctl_snapshot_vnode_setup(vnode_t *vp, void *arg)
807 {
808 	snapshot_setup_arg_t *ssa = arg;
809 	sfs_node_t *node;
810 
811 	ASSERT_VOP_ELOCKED(vp, __func__);
812 
813 	node = sfs_alloc_node(sizeof(sfs_node_t),
814 	    ssa->snap_name, ZFSCTL_INO_SNAPDIR, ssa->snap_id);
815 	zfsctl_common_vnode_setup(vp, node);
816 
817 	/* We have to support recursive locking. */
818 	VN_LOCK_AREC(vp);
819 }
820 
821 /*
822  * Lookup entry point for the 'snapshot' directory.  Try to open the
823  * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
824  * Perform a mount of the associated dataset on top of the vnode.
825  */
826 /* ARGSUSED */
827 int
zfsctl_snapdir_lookup(ap)828 zfsctl_snapdir_lookup(ap)
829 	struct vop_lookup_args /* {
830 		struct vnode *a_dvp;
831 		struct vnode **a_vpp;
832 		struct componentname *a_cnp;
833 	} */ *ap;
834 {
835 	vnode_t *dvp = ap->a_dvp;
836 	vnode_t **vpp = ap->a_vpp;
837 	struct componentname *cnp = ap->a_cnp;
838 	char name[NAME_MAX + 1];
839 	char fullname[ZFS_MAX_DATASET_NAME_LEN];
840 	char *mountpoint;
841 	size_t mountpoint_len;
842 	zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
843 	uint64_t snap_id;
844 	int nameiop = cnp->cn_nameiop;
845 	int lkflags = cnp->cn_lkflags;
846 	int flags = cnp->cn_flags;
847 	int err;
848 
849 	ASSERT(dvp->v_type == VDIR);
850 
851 	if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP)
852 		return (SET_ERROR(ENOTSUP));
853 
854 	if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') {
855 		err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK);
856 		if (err == 0)
857 			*vpp = dvp;
858 		return (err);
859 	}
860 	if (flags & ISDOTDOT) {
861 		err = vn_vget_ino_gen(dvp, zfsctl_root_vnode, NULL, lkflags,
862 		    vpp);
863 		return (err);
864 	}
865 
866 	if (cnp->cn_namelen >= sizeof(name))
867 		return (SET_ERROR(ENAMETOOLONG));
868 
869 	strlcpy(name, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
870 	err = zfsctl_snapshot_lookup(dvp, name, &snap_id);
871 	if (err != 0)
872 		return (SET_ERROR(ENOENT));
873 
874 	for (;;) {
875 		snapshot_setup_arg_t ssa;
876 
877 		ssa.snap_name = name;
878 		ssa.snap_id = snap_id;
879 		err = sfs_vgetx(dvp->v_mount, LK_SHARED, ZFSCTL_INO_SNAPDIR,
880 		   snap_id, "zfs", &zfsctl_ops_snapshot,
881 		   zfsctl_snapshot_vnode_setup, &ssa, vpp);
882 		if (err != 0)
883 			return (err);
884 
885 		/* Check if a new vnode has just been created. */
886 		if (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE)
887 			break;
888 
889 		/*
890 		 * The vnode must be referenced at least by this thread and
891 		 * the mounted snapshot or the thread doing the mounting.
892 		 * There can be more references from concurrent lookups.
893 		 */
894 		KASSERT(vrefcnt(*vpp) > 1, ("found unreferenced mountpoint"));
895 
896 		/*
897 		 * Check if a snapshot is already mounted on top of the vnode.
898 		 */
899 		err = zfsctl_mounted_here(vpp, lkflags);
900 		if (err != EJUSTRETURN)
901 			return (err);
902 
903 #ifdef INVARIANTS
904 		/*
905 		 * If the vnode not covered yet, then the mount operation
906 		 * must be in progress.
907 		 */
908 		VI_LOCK(*vpp);
909 		KASSERT(((*vpp)->v_iflag & VI_MOUNT) != 0,
910 		    ("snapshot vnode not covered"));
911 		VI_UNLOCK(*vpp);
912 #endif
913 		vput(*vpp);
914 
915 		/*
916 		 * In this situation we can loop on uncontested locks and starve
917 		 * the thread doing the lengthy, non-trivial mount operation.
918 		 */
919 		kern_yield(PRI_USER);
920 	}
921 
922 	VERIFY0(zfsctl_snapshot_zname(dvp, name, sizeof(fullname), fullname));
923 
924 	mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) +
925 	    strlen("/" ZFS_CTLDIR_NAME "/snapshot/") + strlen(name) + 1;
926 	mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
927 	(void) snprintf(mountpoint, mountpoint_len,
928 	    "%s/" ZFS_CTLDIR_NAME "/snapshot/%s",
929 	    dvp->v_vfsp->mnt_stat.f_mntonname, name);
930 
931 	err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0);
932 	kmem_free(mountpoint, mountpoint_len);
933 	if (err == 0) {
934 		/*
935 		 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
936 		 *
937 		 * This is where we lie about our v_vfsp in order to
938 		 * make .zfs/snapshot/<snapname> accessible over NFS
939 		 * without requiring manual mounts of <snapname>.
940 		 */
941 		ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
942 		VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
943 
944 		/* Clear the root flag (set via VFS_ROOT) as well. */
945 		(*vpp)->v_vflag &= ~VV_ROOT;
946 	}
947 
948 	if (err != 0)
949 		*vpp = NULL;
950 	return (err);
951 }
952 
953 static int
zfsctl_snapdir_readdir(ap)954 zfsctl_snapdir_readdir(ap)
955 	struct vop_readdir_args /* {
956 		struct vnode *a_vp;
957 		struct uio *a_uio;
958 		struct ucred *a_cred;
959 		int *a_eofflag;
960 		int *ncookies;
961 		u_long **a_cookies;
962 	} */ *ap;
963 {
964 	char snapname[ZFS_MAX_DATASET_NAME_LEN];
965 	struct dirent entry;
966 	vnode_t *vp = ap->a_vp;
967 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
968 	uio_t *uio = ap->a_uio;
969 	int *eofp = ap->a_eofflag;
970 	off_t dots_offset;
971 	int error;
972 
973 	ASSERT(vp->v_type == VDIR);
974 
975 	error = sfs_readdir_common(ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, ap, uio,
976 	    &dots_offset);
977 	if (error != 0) {
978 		if (error == ENAMETOOLONG) /* ran out of destination space */
979 			error = 0;
980 		return (error);
981 	}
982 
983 	for (;;) {
984 		uint64_t cookie;
985 		uint64_t id;
986 
987 		cookie = uio->uio_offset - dots_offset;
988 
989 		dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
990 		error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof(snapname),
991 		    snapname, &id, &cookie, NULL);
992 		dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
993 		if (error != 0) {
994 			if (error == ENOENT) {
995 				if (eofp != NULL)
996 					*eofp = 1;
997 				error = 0;
998 			}
999 			return (error);
1000 		}
1001 
1002 		entry.d_fileno = id;
1003 		entry.d_type = DT_DIR;
1004 		strcpy(entry.d_name, snapname);
1005 		entry.d_namlen = strlen(entry.d_name);
1006 		entry.d_reclen = sizeof(entry);
1007 		error = vfs_read_dirent(ap, &entry, uio->uio_offset);
1008 		if (error != 0) {
1009 			if (error == ENAMETOOLONG)
1010 				error = 0;
1011 			return (SET_ERROR(error));
1012 		}
1013 		uio->uio_offset = cookie + dots_offset;
1014 	}
1015 	/* NOTREACHED */
1016 }
1017 
1018 /* ARGSUSED */
1019 static int
zfsctl_snapdir_getattr(ap)1020 zfsctl_snapdir_getattr(ap)
1021 	struct vop_getattr_args /* {
1022 		struct vnode *a_vp;
1023 		struct vattr *a_vap;
1024 		struct ucred *a_cred;
1025 	} */ *ap;
1026 {
1027 	vnode_t *vp = ap->a_vp;
1028 	vattr_t *vap = ap->a_vap;
1029 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1030 	dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os);
1031 	sfs_node_t *node = vp->v_data;
1032 	uint64_t snap_count;
1033 	int err;
1034 
1035 	zfsctl_common_getattr(vp, vap);
1036 	vap->va_ctime = dmu_objset_snap_cmtime(zfsvfs->z_os);
1037 	vap->va_mtime = vap->va_ctime;
1038 	vap->va_birthtime = vap->va_ctime;
1039 	if (dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0) {
1040 		err = zap_count(dmu_objset_pool(ds->ds_objset)->dp_meta_objset,
1041 		    dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count);
1042 		if (err != 0)
1043 			return (err);
1044 		vap->va_nlink += snap_count;
1045 	}
1046 	vap->va_size = vap->va_nlink;
1047 
1048 	return (0);
1049 }
1050 
1051 static struct vop_vector zfsctl_ops_snapdir = {
1052 	.vop_default =	&default_vnodeops,
1053 	.vop_open =	zfsctl_common_open,
1054 	.vop_close =	zfsctl_common_close,
1055 	.vop_getattr =	zfsctl_snapdir_getattr,
1056 	.vop_access =	zfsctl_common_access,
1057 	.vop_readdir =	zfsctl_snapdir_readdir,
1058 	.vop_lookup =	zfsctl_snapdir_lookup,
1059 	.vop_reclaim =	zfsctl_common_reclaim,
1060 	.vop_fid =	zfsctl_common_fid,
1061 	.vop_print =	zfsctl_common_print,
1062 };
1063 
1064 static int
zfsctl_snapshot_inactive(ap)1065 zfsctl_snapshot_inactive(ap)
1066 	struct vop_inactive_args /* {
1067 		struct vnode *a_vp;
1068 		struct thread *a_td;
1069 	} */ *ap;
1070 {
1071 	vnode_t *vp = ap->a_vp;
1072 
1073 	VERIFY(vrecycle(vp) == 1);
1074 	return (0);
1075 }
1076 
1077 static int
zfsctl_snapshot_reclaim(ap)1078 zfsctl_snapshot_reclaim(ap)
1079 	struct vop_reclaim_args /* {
1080 		struct vnode *a_vp;
1081 		struct thread *a_td;
1082 	} */ *ap;
1083 {
1084 	vnode_t *vp = ap->a_vp;
1085 	void *data = vp->v_data;
1086 
1087 	sfs_reclaim_vnode(vp);
1088 	sfs_destroy_node(data);
1089 	return (0);
1090 }
1091 
1092 static int
zfsctl_snapshot_vptocnp(struct vop_vptocnp_args * ap)1093 zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap)
1094 {
1095 	struct mount *mp;
1096 	vnode_t *dvp;
1097 	vnode_t *vp;
1098 	sfs_node_t *node;
1099 	size_t len;
1100 	int locked;
1101 	int error;
1102 
1103 	vp = ap->a_vp;
1104 	node = vp->v_data;
1105 	len = strlen(node->sn_name);
1106 	if (*ap->a_buflen < len)
1107 		return (SET_ERROR(ENOMEM));
1108 
1109 	/*
1110 	 * Prevent unmounting of the snapshot while the vnode lock
1111 	 * is not held.  That is not strictly required, but allows
1112 	 * us to assert that an uncovered snapshot vnode is never
1113 	 * "leaked".
1114 	 */
1115 	mp = vp->v_mountedhere;
1116 	if (mp == NULL)
1117 		return (SET_ERROR(ENOENT));
1118 	error = vfs_busy(mp, 0);
1119 	KASSERT(error == 0, ("vfs_busy(mp, 0) failed with %d", error));
1120 
1121 	/*
1122 	 * We can vput the vnode as we can now depend on the reference owned
1123 	 * by the busied mp.  But we also need to hold the vnode, because
1124 	 * the reference may go after vfs_unbusy() which has to be called
1125 	 * before we can lock the vnode again.
1126 	 */
1127 	locked = VOP_ISLOCKED(vp);
1128 	vhold(vp);
1129 	vput(vp);
1130 
1131 	/* Look up .zfs/snapshot, our parent. */
1132 	error = zfsctl_snapdir_vnode(vp->v_mount, NULL, LK_SHARED, &dvp);
1133 	if (error == 0) {
1134 		VOP_UNLOCK(dvp, 0);
1135 		*ap->a_vpp = dvp;
1136 		*ap->a_buflen -= len;
1137 		bcopy(node->sn_name, ap->a_buf + *ap->a_buflen, len);
1138 	}
1139 	vfs_unbusy(mp);
1140 	vget(vp, locked | LK_VNHELD | LK_RETRY, curthread);
1141 	return (error);
1142 }
1143 
1144 /*
1145  * These VP's should never see the light of day.  They should always
1146  * be covered.
1147  */
1148 static struct vop_vector zfsctl_ops_snapshot = {
1149 	.vop_default =		NULL, /* ensure very restricted access */
1150 	.vop_inactive =		zfsctl_snapshot_inactive,
1151 	.vop_reclaim =		zfsctl_snapshot_reclaim,
1152 	.vop_vptocnp =		zfsctl_snapshot_vptocnp,
1153 	.vop_lock1 =		vop_stdlock,
1154 	.vop_unlock =		vop_stdunlock,
1155 	.vop_islocked =		vop_stdislocked,
1156 	.vop_advlockpurge =	vop_stdadvlockpurge, /* called by vgone */
1157 	.vop_print =		zfsctl_common_print,
1158 };
1159 
1160 int
zfsctl_lookup_objset(vfs_t * vfsp,uint64_t objsetid,zfsvfs_t ** zfsvfsp)1161 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
1162 {
1163 	struct mount *mp;
1164 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1165 	vnode_t *vp;
1166 	int error;
1167 
1168 	ASSERT(zfsvfs->z_ctldir != NULL);
1169 	*zfsvfsp = NULL;
1170 	error = sfs_vnode_get(vfsp, LK_EXCLUSIVE,
1171 	    ZFSCTL_INO_SNAPDIR, objsetid, &vp);
1172 	if (error == 0 && vp != NULL) {
1173 		/*
1174 		 * XXX Probably need to at least reference, if not busy, the mp.
1175 		 */
1176 		if (vp->v_mountedhere != NULL)
1177 			*zfsvfsp = vp->v_mountedhere->mnt_data;
1178 		vput(vp);
1179 	}
1180 	if (*zfsvfsp == NULL)
1181 		return (SET_ERROR(EINVAL));
1182 	return (0);
1183 }
1184 
1185 /*
1186  * Unmount any snapshots for the given filesystem.  This is called from
1187  * zfs_umount() - if we have a ctldir, then go through and unmount all the
1188  * snapshots.
1189  */
1190 int
zfsctl_umount_snapshots(vfs_t * vfsp,int fflags,cred_t * cr)1191 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
1192 {
1193 	char snapname[ZFS_MAX_DATASET_NAME_LEN];
1194 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1195 	struct mount *mp;
1196 	vnode_t *dvp;
1197 	vnode_t *vp;
1198 	sfs_node_t *node;
1199 	sfs_node_t *snap;
1200 	uint64_t cookie;
1201 	int error;
1202 
1203 	ASSERT(zfsvfs->z_ctldir != NULL);
1204 
1205 	cookie = 0;
1206 	for (;;) {
1207 		uint64_t id;
1208 
1209 		dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
1210 		error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof(snapname),
1211 		    snapname, &id, &cookie, NULL);
1212 		dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
1213 		if (error != 0) {
1214 			if (error == ENOENT)
1215 				error = 0;
1216 			break;
1217 		}
1218 
1219 		for (;;) {
1220 			error = sfs_vnode_get(vfsp, LK_EXCLUSIVE,
1221 			    ZFSCTL_INO_SNAPDIR, id, &vp);
1222 			if (error != 0 || vp == NULL)
1223 				break;
1224 
1225 			mp = vp->v_mountedhere;
1226 
1227 			/*
1228 			 * v_mountedhere being NULL means that the
1229 			 * (uncovered) vnode is in a transient state
1230 			 * (mounting or unmounting), so loop until it
1231 			 * settles down.
1232 			 */
1233 			if (mp != NULL)
1234 				break;
1235 			vput(vp);
1236 		}
1237 		if (error != 0)
1238 			break;
1239 		if (vp == NULL)
1240 			continue;	/* no mountpoint, nothing to do */
1241 
1242 		/*
1243 		 * The mount-point vnode is kept locked to avoid spurious EBUSY
1244 		 * from a concurrent umount.
1245 		 * The vnode lock must have recursive locking enabled.
1246 		 */
1247 		vfs_ref(mp);
1248 		error = dounmount(mp, fflags, curthread);
1249 		KASSERT_IMPLY(error == 0, vrefcnt(vp) == 1,
1250 		    ("extra references after unmount"));
1251 		vput(vp);
1252 		if (error != 0)
1253 			break;
1254 	}
1255 	KASSERT_IMPLY((fflags & MS_FORCE) != 0, error == 0,
1256 	    ("force unmounting failed"));
1257 	return (error);
1258 }
1259 
1260 #endif /* __FreeBSD__ */
1261 
1262 #ifdef __NetBSD__
1263 
1264 #include <sys/malloc.h>
1265 #include <sys/pathname.h>
1266 #include <miscfs/genfs/genfs.h>
1267 #include <sys/zfs_context.h>
1268 #include <sys/zfs_ctldir.h>
1269 #include <sys/dsl_dataset.h>
1270 #include <sys/zap.h>
1271 
1272 struct zfsctl_root {
1273 	timestruc_t zc_cmtime;
1274 };
1275 
1276 struct sfs_node_key {
1277 	uint64_t parent_id;
1278 	uint64_t id;
1279 };
1280 struct sfs_node {
1281 	struct sfs_node_key sn_key;
1282 #define sn_parent_id sn_key.parent_id
1283 #define sn_id sn_key.id
1284 	lwp_t *sn_mounting;
1285 };
1286 
1287 #define ZFS_SNAPDIR_NAME "snapshot"
1288 
1289 #define VTOSFS(vp) ((struct sfs_node *)((vp)->v_data))
1290 
1291 #define SFS_NODE_ASSERT(vp) \
1292 	do { \
1293 		struct sfs_node *np = VTOSFS(vp); \
1294 		ASSERT((vp)->v_op == zfs_sfsop_p); \
1295 		ASSERT((vp)->v_type == VDIR); \
1296 	} while (/*CONSTCOND*/ 0)
1297 
1298 static int (**zfs_sfsop_p)(void *);
1299 
1300 /*
1301  * Mount a snapshot.  Cannot use do_sys_umount() as it
1302  * doesn't allow its "path" argument from SYSSPACE.
1303  */
1304 static int
sfs_snapshot_mount(vnode_t * vp,const char * snapname)1305 sfs_snapshot_mount(vnode_t *vp, const char *snapname)
1306 {
1307 	struct sfs_node *node = VTOSFS(vp);
1308 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1309 	vfs_t *vfsp;
1310 	char *path, *osname;
1311 	int error;
1312 	extern int zfs_domount(vfs_t *, char *);
1313 
1314 	path = PNBUF_GET();
1315 	osname = PNBUF_GET();
1316 
1317 	dmu_objset_name(zfsvfs->z_os, path);
1318 	snprintf(osname, MAXPATHLEN, "%s@%s", path, snapname);
1319 	snprintf(path, MAXPATHLEN,
1320 	    "%s/" ZFS_CTLDIR_NAME "/" ZFS_SNAPDIR_NAME "/%s",
1321 	    vp->v_vfsp->mnt_stat.f_mntonname, snapname);
1322 
1323 	vfsp = vfs_mountalloc(vp->v_vfsp->mnt_op, vp);
1324 	if (vfsp == NULL) {
1325 		error = ENOMEM;
1326 		goto out;
1327 	}
1328 	vfsp->mnt_op->vfs_refcount++;
1329 	vfsp->mnt_stat.f_owner = 0;
1330 	vfsp->mnt_flag = MNT_RDONLY | MNT_NOSUID | MNT_IGNORE;
1331 
1332 	mutex_enter(vfsp->mnt_updating);
1333 
1334 	error = zfs_domount(vfsp, osname);
1335 	if (error)
1336 		goto out;
1337 
1338 	/* Set f_fsidx from parent to cheat NFSD. */
1339 	vfsp->mnt_stat.f_fsidx = vp->v_vfsp->mnt_stat.f_fsidx;
1340 
1341 	strlcpy(vfsp->mnt_stat.f_mntfromname, osname,
1342 	    sizeof(vfsp->mnt_stat.f_mntfromname));
1343 	set_statvfs_info(path, UIO_SYSSPACE, vfsp->mnt_stat.f_mntfromname,
1344 	    UIO_SYSSPACE, vfsp->mnt_op->vfs_name, vfsp, curlwp);
1345 
1346 	error = vfs_set_lowermount(vfsp, vp->v_vfsp);
1347 	if (error)
1348 		goto out;
1349 
1350 	mountlist_append(vfsp);
1351 	vref(vp);
1352 	vp->v_mountedhere = vfsp;
1353 
1354 	mutex_exit(vfsp->mnt_updating);
1355 	(void) VFS_STATVFS(vfsp, &vfsp->mnt_stat);
1356 
1357 out:;
1358 	if (error && vfsp) {
1359 		mutex_exit(vfsp->mnt_updating);
1360 		vfs_rele(vfsp);
1361 	}
1362 	PNBUF_PUT(osname);
1363 	PNBUF_PUT(path);
1364 
1365 	return error;
1366 }
1367 
1368 static int
sfs_lookup_snapshot(vnode_t * dvp,struct componentname * cnp,vnode_t ** vpp)1369 sfs_lookup_snapshot(vnode_t *dvp, struct componentname *cnp, vnode_t **vpp)
1370 {
1371 	zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
1372 	vnode_t *vp;
1373 	struct sfs_node *node;
1374 	struct sfs_node_key key;
1375 	char snapname[ZFS_MAX_DATASET_NAME_LEN];
1376 	int error;
1377 
1378 	/* Retrieve the snapshot object id and the to be mounted on vnode. */
1379 	if (cnp->cn_namelen >= sizeof(snapname))
1380 		return ENOENT;
1381 
1382 	strlcpy(snapname, cnp->cn_nameptr, cnp->cn_namelen + 1);
1383 	error = dsl_dataset_snap_lookup( dmu_objset_ds(zfsvfs->z_os),
1384 	    snapname, &key.id);
1385 	if (error)
1386 		return error;
1387 	key.parent_id = ZFSCTL_INO_SNAPDIR;
1388 	error = vcache_get(zfsvfs->z_vfs, &key, sizeof(key), vpp);
1389 	if (error)
1390 		return error;
1391 
1392 	/* Handle case where the vnode is currently mounting. */
1393 	vp = *vpp;
1394 	mutex_enter(vp->v_interlock);
1395 	node = VTOSFS(vp);
1396 	if (node->sn_mounting) {
1397 		if (node->sn_mounting == curlwp)
1398 			error = 0;
1399 		else
1400 			error = ERESTART;
1401 		mutex_exit(vp->v_interlock);
1402 		if (error)
1403 			yield();
1404 		return error;
1405 	}
1406 
1407 	/* If not yet mounted mount the snapshot. */
1408 	if (vp->v_mountedhere == NULL) {
1409 		ASSERT(node->sn_mounting == NULL);
1410 		node->sn_mounting = curlwp;
1411 		mutex_exit(vp->v_interlock);
1412 
1413 		VOP_UNLOCK(dvp, 0);
1414 		error = sfs_snapshot_mount(vp, snapname);
1415 		if (vn_lock(dvp, LK_EXCLUSIVE) != 0) {
1416 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
1417 			error = ENOENT;
1418 		}
1419 
1420 		mutex_enter(vp->v_interlock);
1421 		if ((node = VTOSFS(vp)))
1422 			node->sn_mounting = NULL;
1423 		mutex_exit(vp->v_interlock);
1424 
1425 		if (error) {
1426 			vrele(vp);
1427 			*vpp = NULL;
1428 			return error;
1429 		}
1430 	} else
1431 		mutex_exit(vp->v_interlock);
1432 
1433 	/* Return the mounted root rather than the covered mount point.  */
1434 	ASSERT(vp->v_mountedhere);
1435 	error = VFS_ROOT(vp->v_mountedhere, LK_EXCLUSIVE, vpp);
1436 	vrele(vp);
1437 	if (error)
1438 		return error;
1439 
1440 	/*
1441 	 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>
1442 	 *
1443 	 * Here we make .zfs/snapshot/<snapname> accessible over NFS
1444 	 * without requiring manual mounts of <snapname>.
1445 	 */
1446 	if (((*vpp)->v_vflag & VV_ROOT)) {
1447 		ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
1448 		VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
1449 		(*vpp)->v_vflag &= ~VV_ROOT;
1450 	}
1451 	VOP_UNLOCK(*vpp, 0);
1452 
1453 	return 0;
1454 }
1455 
1456 static int
sfs_lookup(void * v)1457 sfs_lookup(void *v)
1458 {
1459 	struct vop_lookup_v2_args /* {
1460 		struct vnode *a_dvp;
1461 		struct vnode **a_vpp;
1462 		struct componentname *a_cnp;
1463 	} */ *ap = v;
1464 	vnode_t *dvp = ap->a_dvp;
1465 	vnode_t **vpp = ap->a_vpp;
1466 	struct componentname *cnp = ap->a_cnp;
1467 	zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
1468 	struct sfs_node *dnode = VTOSFS(dvp);
1469 	int error;
1470 
1471 	SFS_NODE_ASSERT(dvp);
1472 	ZFS_ENTER(zfsvfs);
1473 
1474 	/*
1475 	 * No CREATE, DELETE or RENAME.
1476 	 */
1477 	if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop != LOOKUP) {
1478 		ZFS_EXIT(zfsvfs);
1479 
1480 		return ENOTSUP;
1481 	}
1482 
1483 	/*
1484 	 * Handle DOT and DOTDOT.
1485 	 */
1486 	if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
1487 		vref(dvp);
1488 		*vpp = dvp;
1489 		ZFS_EXIT(zfsvfs);
1490 
1491 		return 0;
1492 	}
1493 	if ((cnp->cn_flags & ISDOTDOT)) {
1494 		if (dnode->sn_parent_id == 0) {
1495 			error = vcache_get(zfsvfs->z_vfs,
1496 			    &zfsvfs->z_root, sizeof(zfsvfs->z_root), vpp);
1497 		} else if (dnode->sn_parent_id == ZFSCTL_INO_ROOT) {
1498 			error = zfsctl_root(zfsvfs, vpp);
1499 		} else if (dnode->sn_parent_id == ZFSCTL_INO_SNAPDIR) {
1500 			error = zfsctl_snapshot(zfsvfs, vpp);
1501 		} else {
1502 			error = ENOENT;
1503 		}
1504 		ZFS_EXIT(zfsvfs);
1505 
1506 		return error;
1507 	}
1508 
1509 	/*
1510 	 * Lookup in ".zfs".
1511 	 */
1512 	if (dnode->sn_id == ZFSCTL_INO_ROOT) {
1513 		if (cnp->cn_namelen == strlen(ZFS_SNAPDIR_NAME) &&
1514 		    strncmp(cnp->cn_nameptr, ZFS_SNAPDIR_NAME,
1515 		    cnp->cn_namelen) == 0) {
1516 			error = zfsctl_snapshot(zfsvfs, vpp);
1517 		} else {
1518 			error = ENOENT;
1519 		}
1520 		ZFS_EXIT(zfsvfs);
1521 
1522 		return error;
1523 	}
1524 
1525 	/*
1526 	 * Lookup in ".zfs/snapshot".
1527 	 */
1528 	if (dnode->sn_id == ZFSCTL_INO_SNAPDIR) {
1529 		error = sfs_lookup_snapshot(dvp, cnp, vpp);
1530 		ZFS_EXIT(zfsvfs);
1531 
1532 		return error;
1533 	}
1534 
1535 	vprint("sfs_lookup: unexpected node for lookup", dvp);
1536 	ZFS_EXIT(zfsvfs);
1537 
1538 	return ENOENT;
1539 }
1540 
1541 static int
sfs_open(void * v)1542 sfs_open(void *v)
1543 {
1544 	struct vop_open_args /* {
1545 		struct vnode *a_vp;
1546 		int a_mode;
1547 		kauth_cred_t a_cred;
1548 	} */ *ap = v;
1549 	zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data;
1550 	int error = 0;
1551 
1552 	SFS_NODE_ASSERT(ap->a_vp);
1553 	ZFS_ENTER(zfsvfs);
1554 
1555 	if (ap->a_mode & FWRITE)
1556 		error = EACCES;
1557 
1558 	ZFS_EXIT(zfsvfs);
1559 
1560 	return error;
1561 }
1562 
1563 static int
sfs_close(void * v)1564 sfs_close(void *v)
1565 {
1566 	struct vop_close_args /* {
1567 		struct vnode *a_vp;
1568 		int a_mode;
1569 		kauth_cred_t a_cred;
1570 	} */ *ap = v;
1571 	zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data;
1572 
1573 	SFS_NODE_ASSERT(ap->a_vp);
1574 	ZFS_ENTER(zfsvfs);
1575 
1576 	ZFS_EXIT(zfsvfs);
1577 
1578 	return 0;
1579 }
1580 
1581 static int
sfs_access(void * v)1582 sfs_access(void *v)
1583 {
1584 	struct vop_access_args /* {
1585 		struct vnode *a_vp;
1586 		int a_mode;
1587 		kauth_cred_t a_cred;
1588 	} */ *ap = v;
1589 	zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data;
1590 	int error = 0;
1591 
1592 	SFS_NODE_ASSERT(ap->a_vp);
1593 	ZFS_ENTER(zfsvfs);
1594 
1595 	if (ap->a_accmode & FWRITE)
1596 		error = EACCES;
1597 
1598 	ZFS_EXIT(zfsvfs);
1599 
1600 	return error;
1601 }
1602 
1603 static int
sfs_getattr(void * v)1604 sfs_getattr(void *v)
1605 {
1606 	struct vop_getattr_args /* {
1607 		struct vnode *a_vp;
1608 		struct vattr *a_vap;
1609 		kauth_cred_t a_cred;
1610 	} */ *ap = v;
1611 	vnode_t *vp = ap->a_vp;
1612 	struct sfs_node *node = VTOSFS(vp);
1613 	struct vattr *vap = ap->a_vap;
1614 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1615 	dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os);
1616 	timestruc_t now;
1617 	uint64_t snap_count;
1618 	int error;
1619 
1620 	SFS_NODE_ASSERT(vp);
1621 	ZFS_ENTER(zfsvfs);
1622 
1623 	vap->va_type = VDIR;
1624 	vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
1625 	    S_IROTH | S_IXOTH;
1626 	vap->va_nlink = 2;
1627 	vap->va_uid = 0;
1628 	vap->va_gid = 0;
1629 	vap->va_fsid = vp->v_vfsp->mnt_stat.f_fsid;
1630 	vap->va_fileid = node->sn_id;
1631 	vap->va_size = 0;
1632 	vap->va_blocksize = 0;
1633 	gethrestime(&now);
1634 	vap->va_atime = now;
1635 	vap->va_ctime = zfsvfs->z_ctldir->zc_cmtime;
1636 	vap->va_mtime = vap->va_ctime;
1637 	vap->va_birthtime = vap->va_ctime;
1638 	vap->va_gen = 0;
1639 	vap->va_flags = 0;
1640 	vap->va_rdev = 0;
1641 	vap->va_bytes = 0;
1642 	vap->va_filerev = 0;
1643 
1644 	switch (node->sn_id){
1645 	case ZFSCTL_INO_ROOT:
1646 		vap->va_nlink += 1; /* snapdir */
1647 		vap->va_size = vap->va_nlink;
1648 		break;
1649 	case ZFSCTL_INO_SNAPDIR:
1650 		if (dsl_dataset_phys(ds)->ds_snapnames_zapobj) {
1651 			error = zap_count(
1652 			    dmu_objset_pool(ds->ds_objset)->dp_meta_objset,
1653 			    dsl_dataset_phys(ds)->ds_snapnames_zapobj,
1654 			    &snap_count);
1655 			if (error)
1656 				return error;
1657 			vap->va_nlink += snap_count;
1658 		}
1659 		vap->va_size = vap->va_nlink;
1660 		break;
1661 	}
1662 
1663 	ZFS_EXIT(zfsvfs);
1664 
1665 	return 0;
1666 }
1667 
1668 static int
sfs_readdir_one(struct vop_readdir_args * ap,struct dirent * dp,const char * name,ino_t ino,off_t * offp)1669 sfs_readdir_one(struct vop_readdir_args *ap, struct dirent *dp,
1670     const char *name, ino_t ino, off_t *offp)
1671 {
1672 	int error;
1673 
1674 	dp->d_fileno = ino;
1675 	dp->d_type = DT_DIR;
1676 	strlcpy(dp->d_name, name, sizeof(dp->d_name));
1677 	dp->d_namlen = strlen(dp->d_name);
1678 	dp->d_reclen = _DIRENT_SIZE(dp);
1679 
1680 	if (ap->a_uio->uio_resid < dp->d_reclen)
1681 		return ENAMETOOLONG;
1682 	if (ap->a_uio->uio_offset > *offp) {
1683 		*offp += dp->d_reclen;
1684 		return 0;
1685 	}
1686 
1687 	error = uiomove(dp, dp->d_reclen, UIO_READ, ap->a_uio);
1688 	if (error)
1689 		return error;
1690 	if (ap->a_ncookies)
1691 		(*ap->a_cookies)[(*ap->a_ncookies)++] = *offp;
1692 	*offp += dp->d_reclen;
1693 
1694 	return 0;
1695 }
1696 
1697 static int
sfs_readdir(void * v)1698 sfs_readdir(void *v)
1699 {
1700 	struct vop_readdir_args /* {
1701 		struct vnode *a_vp;
1702 		struct uio *a_uio;
1703 		kauth_cred_t a_cred;
1704 		int *a_eofflag;
1705 		off_t **a_cookies;
1706 		int *a_ncookies;
1707 	} */ *ap = v;
1708 	vnode_t *vp = ap->a_vp;
1709 	struct sfs_node *node = VTOSFS(vp);
1710 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1711 	struct dirent *dp;
1712 	uint64_t parent;
1713 	off_t offset;
1714 	int error, ncookies;
1715 
1716 	SFS_NODE_ASSERT(ap->a_vp);
1717 	ZFS_ENTER(zfsvfs);
1718 
1719 	parent = node->sn_parent_id == 0 ? zfsvfs->z_root : node->sn_parent_id;
1720 	dp = kmem_alloc(sizeof(*dp), KM_SLEEP);
1721 	if (ap->a_ncookies) {
1722 		ncookies = ap->a_uio->uio_resid / _DIRENT_MINSIZE(dp);
1723 		*ap->a_ncookies = 0;
1724 		*ap->a_cookies = malloc(ncookies * sizeof (off_t),
1725 		    M_TEMP, M_WAITOK);
1726 	}
1727 
1728 	offset = 0;
1729 	error = sfs_readdir_one(ap, dp, ".", node->sn_id, &offset);
1730 	if (error == 0)
1731 		error = sfs_readdir_one(ap, dp, "..", parent, &offset);
1732 	if (error == 0 && node->sn_id == ZFSCTL_INO_ROOT) {
1733 		error = sfs_readdir_one(ap, dp, ZFS_SNAPDIR_NAME,
1734 		    ZFSCTL_INO_SNAPDIR, &offset);
1735 	} else if (error == 0 && node->sn_id == ZFSCTL_INO_SNAPDIR) {
1736 		char snapname[ZFS_MAX_DATASET_NAME_LEN];
1737 		uint64_t cookie, id;
1738 
1739 		cookie = 0;
1740 		for (;;) {
1741 			dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os),
1742 			    FTAG);
1743 			error = dmu_snapshot_list_next(zfsvfs->z_os,
1744 			    sizeof(snapname), snapname, &id, &cookie, NULL);
1745 			dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os),
1746 			    FTAG);
1747 			if (error) {
1748 				if (error == ENOENT)
1749 					error = 0;
1750 				break;
1751 			}
1752 			error = sfs_readdir_one(ap, dp, snapname, id, &offset);
1753 			if (error)
1754 				break;
1755 		}
1756 	}
1757 
1758 	if (ap->a_eofflag && error == 0)
1759 		*ap->a_eofflag = 1;
1760 
1761 	if (error == ENAMETOOLONG)
1762 		error = 0;
1763 
1764 	if (ap->a_ncookies && error) {
1765 		free(*ap->a_cookies, M_TEMP);
1766 		*ap->a_ncookies = 0;
1767 		*ap->a_cookies = NULL;
1768 	}
1769 	kmem_free(dp, sizeof(*dp));
1770 
1771 	ZFS_EXIT(zfsvfs);
1772 
1773 	return error;
1774 }
1775 
1776 static int
sfs_inactive(void * v)1777 sfs_inactive(void *v)
1778 {
1779 	struct vop_inactive_v2_args /* {
1780 		struct vnode *a_vp;
1781 		bool *a_recycle;
1782 	} */ *ap = v;
1783 	vnode_t *vp = ap->a_vp;
1784 	struct sfs_node *node = VTOSFS(vp);
1785 
1786 	SFS_NODE_ASSERT(vp);
1787 
1788 	*ap->a_recycle = (node->sn_parent_id == ZFSCTL_INO_SNAPDIR);
1789 
1790 	return 0;
1791 }
1792 
1793 static int
sfs_reclaim(void * v)1794 sfs_reclaim(void *v)
1795 {
1796 	struct vop_reclaim_v2_args /* {
1797 		struct vnode *a_vp;
1798 	} */ *ap = v;
1799 	vnode_t *vp = ap->a_vp;
1800 	struct sfs_node *node = VTOSFS(vp);
1801 
1802 	SFS_NODE_ASSERT(ap->a_vp);
1803 
1804 	vp->v_data = NULL;
1805 	VOP_UNLOCK(vp, 0);
1806 
1807 	kmem_free(node, sizeof(*node));
1808 
1809 	return 0;
1810 }
1811 
1812 static int
sfs_print(void * v)1813 sfs_print(void *v)
1814 {
1815 	struct vop_print_args /* {
1816 		struct vnode *a_vp;
1817 	} */ *ap = v;
1818 	struct sfs_node *node = VTOSFS(ap->a_vp);
1819 
1820 	SFS_NODE_ASSERT(ap->a_vp);
1821 
1822 	printf("\tid %" PRIu64 ", parent %" PRIu64 "\n",
1823 	    node->sn_id, node->sn_parent_id);
1824 
1825 	return 0;
1826 }
1827 
1828 const struct vnodeopv_entry_desc zfs_sfsop_entries[] = {
1829 	{ &vop_default_desc,		vn_default_error },
1830 	{ &vop_parsepath_desc,		genfs_parsepath },
1831 	{ &vop_lookup_desc,		sfs_lookup },
1832 	{ &vop_open_desc,		sfs_open },
1833 	{ &vop_close_desc,		sfs_close },
1834 	{ &vop_access_desc,		sfs_access },
1835 	{ &vop_getattr_desc,		sfs_getattr },
1836 	{ &vop_lock_desc,		genfs_lock },
1837 	{ &vop_unlock_desc,		genfs_unlock },
1838 	{ &vop_readdir_desc,		sfs_readdir },
1839 	{ &vop_inactive_desc,		sfs_inactive },
1840 	{ &vop_reclaim_desc,		sfs_reclaim },
1841 	{ &vop_seek_desc,		genfs_seek },
1842 	{ &vop_putpages_desc,		genfs_null_putpages },
1843 	{ &vop_islocked_desc,		genfs_islocked },
1844 	{ &vop_print_desc,		sfs_print },
1845 	{ &vop_pathconf_desc,		genfs_pathconf },
1846 	{ NULL, NULL }
1847 };
1848 
1849 const struct vnodeopv_desc zfs_sfsop_opv_desc =
1850 	{ &zfs_sfsop_p, zfs_sfsop_entries };
1851 
1852 void
zfsctl_init(void)1853 zfsctl_init(void)
1854 {
1855 }
1856 
1857 void
zfsctl_fini(void)1858 zfsctl_fini(void)
1859 {
1860 }
1861 
1862 int
zfsctl_loadvnode(vfs_t * vfsp,vnode_t * vp,const void * key,size_t key_len,const void ** new_key)1863 zfsctl_loadvnode(vfs_t *vfsp, vnode_t *vp,
1864     const void *key, size_t key_len, const void **new_key)
1865 {
1866 	struct sfs_node_key node_key;
1867 	struct sfs_node *node;
1868 
1869 	if (key_len != sizeof(node_key))
1870 		return EINVAL;
1871 	if ((vfsp->mnt_iflag & IMNT_UNMOUNT))
1872 		return ENOENT;
1873 
1874 	memcpy(&node_key, key, key_len);
1875 
1876 	node = kmem_alloc(sizeof(*node), KM_SLEEP);
1877 
1878 	node->sn_mounting = NULL;
1879 	node->sn_key = node_key;
1880 
1881 	vp->v_data = node;
1882 	vp->v_op = zfs_sfsop_p;
1883 	vp->v_tag = VT_ZFS;
1884 	vp->v_type = VDIR;
1885 	uvm_vnp_setsize(vp, 0);
1886 
1887 	*new_key = &node->sn_key;
1888 
1889 	return 0;
1890 }
1891 
1892 int
zfsctl_vptofh(vnode_t * vp,fid_t * fidp,size_t * fh_size)1893 zfsctl_vptofh(vnode_t *vp, fid_t *fidp, size_t *fh_size)
1894 {
1895 	struct sfs_node *node = VTOSFS(vp);
1896 	uint64_t object = node->sn_id;
1897 	zfid_short_t *zfid = (zfid_short_t *)fidp;
1898 	int i;
1899 
1900 	SFS_NODE_ASSERT(vp);
1901 
1902 	if (*fh_size < SHORT_FID_LEN) {
1903 		*fh_size = SHORT_FID_LEN;
1904 		return SET_ERROR(E2BIG);
1905 	}
1906 	*fh_size = SHORT_FID_LEN;
1907 
1908 	zfid->zf_len = SHORT_FID_LEN;
1909 	for (i = 0; i < sizeof(zfid->zf_object); i++)
1910 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
1911 
1912 	/* .zfs nodes always have a generation number of 0 */
1913 	for (i = 0; i < sizeof(zfid->zf_gen); i++)
1914 		zfid->zf_gen[i] = 0;
1915 
1916 	return 0;
1917 }
1918 
1919 /*
1920  * Return the ".zfs" vnode.
1921  */
1922 int
zfsctl_root(zfsvfs_t * zfsvfs,vnode_t ** vpp)1923 zfsctl_root(zfsvfs_t *zfsvfs, vnode_t **vpp)
1924 {
1925 	struct sfs_node_key key = {
1926 		.parent_id = 0,
1927 		.id = ZFSCTL_INO_ROOT
1928 	};
1929 
1930 	return vcache_get(zfsvfs->z_vfs, &key, sizeof(key), vpp);
1931 }
1932 
1933 /*
1934  * Return the ".zfs/snapshot" vnode.
1935  */
1936 int
zfsctl_snapshot(zfsvfs_t * zfsvfs,vnode_t ** vpp)1937 zfsctl_snapshot(zfsvfs_t *zfsvfs, vnode_t **vpp)
1938 {
1939 	struct sfs_node_key key = {
1940 		.parent_id = ZFSCTL_INO_ROOT,
1941 		.id = ZFSCTL_INO_SNAPDIR
1942 	};
1943 
1944 	return vcache_get(zfsvfs->z_vfs, &key, sizeof(key), vpp);
1945 }
1946 
1947 void
zfsctl_create(zfsvfs_t * zfsvfs)1948 zfsctl_create(zfsvfs_t *zfsvfs)
1949 {
1950 	vnode_t *vp;
1951 	struct zfsctl_root *zc;
1952 	uint64_t crtime[2];
1953 
1954 	zc = kmem_alloc(sizeof(*zc), KM_SLEEP);
1955 
1956 	VERIFY(0 == VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &vp));
1957 	VERIFY(0 == sa_lookup(VTOZ(vp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
1958 	    &crtime, sizeof(crtime)));
1959 	vput(vp);
1960 
1961 	ZFS_TIME_DECODE(&zc->zc_cmtime, crtime);
1962 
1963 	ASSERT(zfsvfs->z_ctldir == NULL);
1964 	zfsvfs->z_ctldir = zc;
1965 }
1966 
1967 void
zfsctl_destroy(zfsvfs_t * zfsvfs)1968 zfsctl_destroy(zfsvfs_t *zfsvfs)
1969 {
1970 	struct zfsctl_root *zc = zfsvfs->z_ctldir;
1971 
1972 	ASSERT(zfsvfs->z_ctldir);
1973  	zfsvfs->z_ctldir = NULL;
1974 	kmem_free(zc, sizeof(*zc));
1975 }
1976 
1977 int
zfsctl_lookup_objset(vfs_t * vfsp,uint64_t objsetid,zfsvfs_t ** zfsvfsp)1978 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
1979 {
1980 	struct sfs_node_key key = {
1981 		.parent_id = ZFSCTL_INO_SNAPDIR,
1982 		.id = objsetid
1983 	};
1984 	vnode_t *vp;
1985 	int error;
1986 
1987 	*zfsvfsp = NULL;
1988 	error = vcache_get(vfsp, &key, sizeof(key), &vp);
1989 	if (error == 0) {
1990 		if (vp->v_mountedhere)
1991 			*zfsvfsp = vp->v_mountedhere->mnt_data;
1992 		vrele(vp);
1993 	}
1994 	if (*zfsvfsp == NULL)
1995 		return SET_ERROR(EINVAL);
1996 	return 0;
1997 }
1998 
1999 int
zfsctl_umount_snapshots(vfs_t * vfsp,int fflags,cred_t * cr)2000 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
2001 {
2002 	char snapname[ZFS_MAX_DATASET_NAME_LEN];
2003 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
2004 	struct mount *mp;
2005 	vnode_t *vp;
2006 	struct sfs_node_key key;
2007 	uint64_t cookie;
2008 	int error;
2009 
2010 	ASSERT(zfsvfs->z_ctldir);
2011 
2012 	cookie = 0;
2013 	key.parent_id = ZFSCTL_INO_SNAPDIR;
2014 	for (;;) {
2015 		dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
2016 		error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof(snapname),
2017 		    snapname, &key.id, &cookie, NULL);
2018 		dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
2019 		if (error) {
2020 			if (error == ENOENT)
2021 				error = 0;
2022 			break;
2023 		}
2024 
2025 		error = vcache_get(zfsvfs->z_vfs, &key, sizeof(key), &vp);
2026 		if (error == ENOENT)
2027 			continue;
2028 		else if (error)
2029 			break;
2030 
2031 		mp = vp->v_mountedhere;
2032 		if (mp == NULL) {
2033 			vrele(vp);
2034 			continue;
2035 		}
2036 
2037 		error = dounmount(mp, fflags, curthread);
2038 		vrele(vp);
2039 		if (error)
2040 			break;
2041 	}
2042 	ASSERT((fflags & MS_FORCE) == 0 || error == 0);
2043 
2044 	return (error);
2045 }
2046 
2047 boolean_t
zfsctl_is_node(vnode_t * vp)2048 zfsctl_is_node(vnode_t *vp)
2049 {
2050 
2051 	return (vp->v_op == zfs_sfsop_p);
2052 }
2053 #endif /* __NetBSD__ */
2054