1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
25 */
26
27 /*
28 * ZFS control directory (a.k.a. ".zfs")
29 *
30 * This directory provides a common location for all ZFS meta-objects.
31 * Currently, this is only the 'snapshot' directory, but this may expand in the
32 * future. The elements are built using the GFS primitives, as the hierarchy
33 * does not actually exist on disk.
34 *
35 * For 'snapshot', we don't want to have all snapshots always mounted, because
36 * this would take up a huge amount of space in /etc/mnttab. We have three
37 * types of objects:
38 *
39 * ctldir ------> snapshotdir -------> snapshot
40 * |
41 * |
42 * V
43 * mounted fs
44 *
45 * The 'snapshot' node contains just enough information to lookup '..' and act
46 * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we
47 * perform an automount of the underlying filesystem and return the
48 * corresponding vnode.
49 *
50 * All mounts are handled automatically by the kernel, but unmounts are
51 * (currently) handled from user land. The main reason is that there is no
52 * reliable way to auto-unmount the filesystem when it's "no longer in use".
53 * When the user unmounts a filesystem, we call zfsctl_unmount(), which
54 * unmounts any snapshots within the snapshot directory.
55 *
56 * The '.zfs', '.zfs/snapshot', and all directories created under
57 * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and
58 * share the same vfs_t as the head filesystem (what '.zfs' lives under).
59 *
60 * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>'
61 * (ie: snapshots) are ZFS nodes and have their own unique vfs_t.
62 * However, vnodes within these mounted on file systems have their v_vfsp
63 * fields set to the head filesystem to make NFS happy (see
64 * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t
65 * so that it cannot be freed until all snapshots have been unmounted.
66 */
67
68 #ifdef __FreeBSD__
69
70 #include <sys/zfs_context.h>
71 #include <sys/zfs_ctldir.h>
72 #include <sys/zfs_ioctl.h>
73 #include <sys/zfs_vfsops.h>
74 #include <sys/namei.h>
75 #include <sys/stat.h>
76 #include <sys/dmu.h>
77 #include <sys/dsl_dataset.h>
78 #include <sys/dsl_destroy.h>
79 #include <sys/dsl_deleg.h>
80 #include <sys/mount.h>
81 #include <sys/zap.h>
82
83 #include "zfs_namecheck.h"
84
85 /*
86 * "Synthetic" filesystem implementation.
87 */
88
89 /*
90 * Assert that A implies B.
91 */
92 #define KASSERT_IMPLY(A, B, msg) KASSERT(!(A) || (B), (msg));
93
94 static MALLOC_DEFINE(M_SFSNODES, "sfs_nodes", "synthetic-fs nodes");
95
96 typedef struct sfs_node {
97 char sn_name[ZFS_MAX_DATASET_NAME_LEN];
98 uint64_t sn_parent_id;
99 uint64_t sn_id;
100 } sfs_node_t;
101
102 /*
103 * Check the parent's ID as well as the node's to account for a chance
104 * that IDs originating from different domains (snapshot IDs, artifical
105 * IDs, znode IDs) may clash.
106 */
107 static int
sfs_compare_ids(struct vnode * vp,void * arg)108 sfs_compare_ids(struct vnode *vp, void *arg)
109 {
110 sfs_node_t *n1 = vp->v_data;
111 sfs_node_t *n2 = arg;
112 bool equal;
113
114 equal = n1->sn_id == n2->sn_id &&
115 n1->sn_parent_id == n2->sn_parent_id;
116
117 /* Zero means equality. */
118 return (!equal);
119 }
120
121 static int
sfs_vnode_get(const struct mount * mp,int flags,uint64_t parent_id,uint64_t id,struct vnode ** vpp)122 sfs_vnode_get(const struct mount *mp, int flags, uint64_t parent_id,
123 uint64_t id, struct vnode **vpp)
124 {
125 sfs_node_t search;
126 int err;
127
128 search.sn_id = id;
129 search.sn_parent_id = parent_id;
130 err = vfs_hash_get(mp, (u_int)id, flags, curthread, vpp,
131 sfs_compare_ids, &search);
132 return (err);
133 }
134
135 static int
sfs_vnode_insert(struct vnode * vp,int flags,uint64_t parent_id,uint64_t id,struct vnode ** vpp)136 sfs_vnode_insert(struct vnode *vp, int flags, uint64_t parent_id,
137 uint64_t id, struct vnode **vpp)
138 {
139 int err;
140
141 KASSERT(vp->v_data != NULL, ("sfs_vnode_insert with NULL v_data"));
142 err = vfs_hash_insert(vp, (u_int)id, flags, curthread, vpp,
143 sfs_compare_ids, vp->v_data);
144 return (err);
145 }
146
147 static void
sfs_vnode_remove(struct vnode * vp)148 sfs_vnode_remove(struct vnode *vp)
149 {
150 vfs_hash_remove(vp);
151 }
152
153 typedef void sfs_vnode_setup_fn(vnode_t *vp, void *arg);
154
155 static int
sfs_vgetx(struct mount * mp,int flags,uint64_t parent_id,uint64_t id,const char * tag,struct vop_vector * vops,sfs_vnode_setup_fn setup,void * arg,struct vnode ** vpp)156 sfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id,
157 const char *tag, struct vop_vector *vops,
158 sfs_vnode_setup_fn setup, void *arg,
159 struct vnode **vpp)
160 {
161 struct vnode *vp;
162 int error;
163
164 error = sfs_vnode_get(mp, flags, parent_id, id, vpp);
165 if (error != 0 || *vpp != NULL) {
166 KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL,
167 "sfs vnode with no data");
168 return (error);
169 }
170
171 /* Allocate a new vnode/inode. */
172 error = getnewvnode(tag, mp, vops, &vp);
173 if (error != 0) {
174 *vpp = NULL;
175 return (error);
176 }
177
178 /*
179 * Exclusively lock the vnode vnode while it's being constructed.
180 */
181 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
182 error = insmntque(vp, mp);
183 if (error != 0) {
184 *vpp = NULL;
185 return (error);
186 }
187
188 setup(vp, arg);
189
190 error = sfs_vnode_insert(vp, flags, parent_id, id, vpp);
191 if (error != 0 || *vpp != NULL) {
192 KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL,
193 "sfs vnode with no data");
194 return (error);
195 }
196
197 *vpp = vp;
198 return (0);
199 }
200
201 static void
sfs_print_node(sfs_node_t * node)202 sfs_print_node(sfs_node_t *node)
203 {
204 printf("\tname = %s\n", node->sn_name);
205 printf("\tparent_id = %ju\n", (uintmax_t)node->sn_parent_id);
206 printf("\tid = %ju\n", (uintmax_t)node->sn_id);
207 }
208
209 static sfs_node_t *
sfs_alloc_node(size_t size,const char * name,uint64_t parent_id,uint64_t id)210 sfs_alloc_node(size_t size, const char *name, uint64_t parent_id, uint64_t id)
211 {
212 struct sfs_node *node;
213
214 KASSERT(strlen(name) < sizeof(node->sn_name),
215 ("sfs node name is too long"));
216 KASSERT(size >= sizeof(*node), ("sfs node size is too small"));
217 node = malloc(size, M_SFSNODES, M_WAITOK | M_ZERO);
218 strlcpy(node->sn_name, name, sizeof(node->sn_name));
219 node->sn_parent_id = parent_id;
220 node->sn_id = id;
221
222 return (node);
223 }
224
225 static void
sfs_destroy_node(sfs_node_t * node)226 sfs_destroy_node(sfs_node_t *node)
227 {
228 free(node, M_SFSNODES);
229 }
230
231 static void *
sfs_reclaim_vnode(vnode_t * vp)232 sfs_reclaim_vnode(vnode_t *vp)
233 {
234 sfs_node_t *node;
235 void *data;
236
237 sfs_vnode_remove(vp);
238 data = vp->v_data;
239 vp->v_data = NULL;
240 return (data);
241 }
242
243 static int
sfs_readdir_common(uint64_t parent_id,uint64_t id,struct vop_readdir_args * ap,uio_t * uio,off_t * offp)244 sfs_readdir_common(uint64_t parent_id, uint64_t id, struct vop_readdir_args *ap,
245 uio_t *uio, off_t *offp)
246 {
247 struct dirent entry;
248 int error;
249
250 /* Reset ncookies for subsequent use of vfs_read_dirent. */
251 if (ap->a_ncookies != NULL)
252 *ap->a_ncookies = 0;
253
254 if (uio->uio_resid < sizeof(entry))
255 return (SET_ERROR(EINVAL));
256
257 if (uio->uio_offset < 0)
258 return (SET_ERROR(EINVAL));
259 if (uio->uio_offset == 0) {
260 entry.d_fileno = id;
261 entry.d_type = DT_DIR;
262 entry.d_name[0] = '.';
263 entry.d_name[1] = '\0';
264 entry.d_namlen = 1;
265 entry.d_reclen = sizeof(entry);
266 error = vfs_read_dirent(ap, &entry, uio->uio_offset);
267 if (error != 0)
268 return (SET_ERROR(error));
269 }
270
271 if (uio->uio_offset < sizeof(entry))
272 return (SET_ERROR(EINVAL));
273 if (uio->uio_offset == sizeof(entry)) {
274 entry.d_fileno = parent_id;
275 entry.d_type = DT_DIR;
276 entry.d_name[0] = '.';
277 entry.d_name[1] = '.';
278 entry.d_name[2] = '\0';
279 entry.d_namlen = 2;
280 entry.d_reclen = sizeof(entry);
281 error = vfs_read_dirent(ap, &entry, uio->uio_offset);
282 if (error != 0)
283 return (SET_ERROR(error));
284 }
285
286 if (offp != NULL)
287 *offp = 2 * sizeof(entry);
288 return (0);
289 }
290
291
292 /*
293 * .zfs inode namespace
294 *
295 * We need to generate unique inode numbers for all files and directories
296 * within the .zfs pseudo-filesystem. We use the following scheme:
297 *
298 * ENTRY ZFSCTL_INODE
299 * .zfs 1
300 * .zfs/snapshot 2
301 * .zfs/snapshot/<snap> objectid(snap)
302 */
303 #define ZFSCTL_INO_SNAP(id) (id)
304
305 static struct vop_vector zfsctl_ops_root;
306 static struct vop_vector zfsctl_ops_snapdir;
307 static struct vop_vector zfsctl_ops_snapshot;
308 static struct vop_vector zfsctl_ops_shares_dir;
309
310 void
zfsctl_init(void)311 zfsctl_init(void)
312 {
313 }
314
315 void
zfsctl_fini(void)316 zfsctl_fini(void)
317 {
318 }
319
320 boolean_t
zfsctl_is_node(vnode_t * vp)321 zfsctl_is_node(vnode_t *vp)
322 {
323 return (vn_matchops(vp, zfsctl_ops_root) ||
324 vn_matchops(vp, zfsctl_ops_snapdir) ||
325 vn_matchops(vp, zfsctl_ops_snapshot) ||
326 vn_matchops(vp, zfsctl_ops_shares_dir));
327
328 }
329
330 typedef struct zfsctl_root {
331 sfs_node_t node;
332 sfs_node_t *snapdir;
333 timestruc_t cmtime;
334 } zfsctl_root_t;
335
336
337 /*
338 * Create the '.zfs' directory.
339 */
340 void
zfsctl_create(zfsvfs_t * zfsvfs)341 zfsctl_create(zfsvfs_t *zfsvfs)
342 {
343 zfsctl_root_t *dot_zfs;
344 sfs_node_t *snapdir;
345 vnode_t *rvp;
346 uint64_t crtime[2];
347
348 ASSERT(zfsvfs->z_ctldir == NULL);
349
350 snapdir = sfs_alloc_node(sizeof(*snapdir), "snapshot", ZFSCTL_INO_ROOT,
351 ZFSCTL_INO_SNAPDIR);
352 dot_zfs = (zfsctl_root_t *)sfs_alloc_node(sizeof(*dot_zfs), ".zfs", 0,
353 ZFSCTL_INO_ROOT);
354 dot_zfs->snapdir = snapdir;
355
356 VERIFY(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp) == 0);
357 VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
358 &crtime, sizeof(crtime)));
359 ZFS_TIME_DECODE(&dot_zfs->cmtime, crtime);
360 vput(rvp);
361
362 zfsvfs->z_ctldir = dot_zfs;
363 }
364
365 /*
366 * Destroy the '.zfs' directory. Only called when the filesystem is unmounted.
367 * The nodes must not have any associated vnodes by now as they should be
368 * vflush-ed.
369 */
370 void
zfsctl_destroy(zfsvfs_t * zfsvfs)371 zfsctl_destroy(zfsvfs_t *zfsvfs)
372 {
373 sfs_destroy_node(zfsvfs->z_ctldir->snapdir);
374 sfs_destroy_node((sfs_node_t *)zfsvfs->z_ctldir);
375 zfsvfs->z_ctldir = NULL;
376 }
377
378 static int
zfsctl_fs_root_vnode(struct mount * mp,void * arg __unused,int flags,struct vnode ** vpp)379 zfsctl_fs_root_vnode(struct mount *mp, void *arg __unused, int flags,
380 struct vnode **vpp)
381 {
382 return (VFS_ROOT(mp, flags, vpp));
383 }
384
385 static void
zfsctl_common_vnode_setup(vnode_t * vp,void * arg)386 zfsctl_common_vnode_setup(vnode_t *vp, void *arg)
387 {
388 ASSERT_VOP_ELOCKED(vp, __func__);
389
390 /* We support shared locking. */
391 VN_LOCK_ASHARE(vp);
392 vp->v_type = VDIR;
393 vp->v_data = arg;
394 }
395
396 static int
zfsctl_root_vnode(struct mount * mp,void * arg __unused,int flags,struct vnode ** vpp)397 zfsctl_root_vnode(struct mount *mp, void *arg __unused, int flags,
398 struct vnode **vpp)
399 {
400 void *node;
401 int err;
402
403 node = ((zfsvfs_t*)mp->mnt_data)->z_ctldir;
404 err = sfs_vgetx(mp, flags, 0, ZFSCTL_INO_ROOT, "zfs", &zfsctl_ops_root,
405 zfsctl_common_vnode_setup, node, vpp);
406 return (err);
407 }
408
409 static int
zfsctl_snapdir_vnode(struct mount * mp,void * arg __unused,int flags,struct vnode ** vpp)410 zfsctl_snapdir_vnode(struct mount *mp, void *arg __unused, int flags,
411 struct vnode **vpp)
412 {
413 void *node;
414 int err;
415
416 node = ((zfsvfs_t*)mp->mnt_data)->z_ctldir->snapdir;
417 err = sfs_vgetx(mp, flags, ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, "zfs",
418 &zfsctl_ops_snapdir, zfsctl_common_vnode_setup, node, vpp);
419 return (err);
420 }
421
422 /*
423 * Given a root znode, retrieve the associated .zfs directory.
424 * Add a hold to the vnode and return it.
425 */
426 int
zfsctl_root(zfsvfs_t * zfsvfs,int flags,vnode_t ** vpp)427 zfsctl_root(zfsvfs_t *zfsvfs, int flags, vnode_t **vpp)
428 {
429 vnode_t *vp;
430 int error;
431
432 error = zfsctl_root_vnode(zfsvfs->z_vfs, NULL, flags, vpp);
433 return (error);
434 }
435
436 /*
437 * Common open routine. Disallow any write access.
438 */
439 /* ARGSUSED */
440 static int
zfsctl_common_open(struct vop_open_args * ap)441 zfsctl_common_open(struct vop_open_args *ap)
442 {
443 int flags = ap->a_mode;
444
445 if (flags & FWRITE)
446 return (SET_ERROR(EACCES));
447
448 return (0);
449 }
450
451 /*
452 * Common close routine. Nothing to do here.
453 */
454 /* ARGSUSED */
455 static int
zfsctl_common_close(struct vop_close_args * ap)456 zfsctl_common_close(struct vop_close_args *ap)
457 {
458 return (0);
459 }
460
461 /*
462 * Common access routine. Disallow writes.
463 */
464 /* ARGSUSED */
465 static int
zfsctl_common_access(ap)466 zfsctl_common_access(ap)
467 struct vop_access_args /* {
468 struct vnode *a_vp;
469 accmode_t a_accmode;
470 struct ucred *a_cred;
471 struct thread *a_td;
472 } */ *ap;
473 {
474 accmode_t accmode = ap->a_accmode;
475
476 if (accmode & VWRITE)
477 return (SET_ERROR(EACCES));
478 return (0);
479 }
480
481 /*
482 * Common getattr function. Fill in basic information.
483 */
484 static void
zfsctl_common_getattr(vnode_t * vp,vattr_t * vap)485 zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
486 {
487 timestruc_t now;
488 sfs_node_t *node;
489
490 node = vp->v_data;
491
492 vap->va_uid = 0;
493 vap->va_gid = 0;
494 vap->va_rdev = 0;
495 /*
496 * We are a purely virtual object, so we have no
497 * blocksize or allocated blocks.
498 */
499 vap->va_blksize = 0;
500 vap->va_nblocks = 0;
501 vap->va_seq = 0;
502 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
503 vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
504 S_IROTH | S_IXOTH;
505 vap->va_type = VDIR;
506 /*
507 * We live in the now (for atime).
508 */
509 gethrestime(&now);
510 vap->va_atime = now;
511 /* FreeBSD: Reset chflags(2) flags. */
512 vap->va_flags = 0;
513
514 vap->va_nodeid = node->sn_id;
515
516 /* At least '.' and '..'. */
517 vap->va_nlink = 2;
518 }
519
520 /*ARGSUSED*/
521 static int
zfsctl_common_fid(ap)522 zfsctl_common_fid(ap)
523 struct vop_fid_args /* {
524 struct vnode *a_vp;
525 struct fid *a_fid;
526 } */ *ap;
527 {
528 vnode_t *vp = ap->a_vp;
529 fid_t *fidp = (void *)ap->a_fid;
530 sfs_node_t *node = vp->v_data;
531 uint64_t object = node->sn_id;
532 zfid_short_t *zfid;
533 int i;
534
535 zfid = (zfid_short_t *)fidp;
536 zfid->zf_len = SHORT_FID_LEN;
537
538 for (i = 0; i < sizeof(zfid->zf_object); i++)
539 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
540
541 /* .zfs nodes always have a generation number of 0 */
542 for (i = 0; i < sizeof(zfid->zf_gen); i++)
543 zfid->zf_gen[i] = 0;
544
545 return (0);
546 }
547
548 static int
zfsctl_common_reclaim(ap)549 zfsctl_common_reclaim(ap)
550 struct vop_reclaim_args /* {
551 struct vnode *a_vp;
552 struct thread *a_td;
553 } */ *ap;
554 {
555 vnode_t *vp = ap->a_vp;
556
557 (void) sfs_reclaim_vnode(vp);
558 return (0);
559 }
560
561 static int
zfsctl_common_print(ap)562 zfsctl_common_print(ap)
563 struct vop_print_args /* {
564 struct vnode *a_vp;
565 } */ *ap;
566 {
567 sfs_print_node(ap->a_vp->v_data);
568 return (0);
569 }
570
571 /*
572 * Get root directory attributes.
573 */
574 /* ARGSUSED */
575 static int
zfsctl_root_getattr(ap)576 zfsctl_root_getattr(ap)
577 struct vop_getattr_args /* {
578 struct vnode *a_vp;
579 struct vattr *a_vap;
580 struct ucred *a_cred;
581 } */ *ap;
582 {
583 struct vnode *vp = ap->a_vp;
584 struct vattr *vap = ap->a_vap;
585 zfsctl_root_t *node = vp->v_data;
586
587 zfsctl_common_getattr(vp, vap);
588 vap->va_ctime = node->cmtime;
589 vap->va_mtime = vap->va_ctime;
590 vap->va_birthtime = vap->va_ctime;
591 vap->va_nlink += 1; /* snapdir */
592 vap->va_size = vap->va_nlink;
593 return (0);
594 }
595
596 /*
597 * When we lookup "." we still can be asked to lock it
598 * differently, can't we?
599 */
600 int
zfsctl_relock_dot(vnode_t * dvp,int ltype)601 zfsctl_relock_dot(vnode_t *dvp, int ltype)
602 {
603 vref(dvp);
604 if (ltype != VOP_ISLOCKED(dvp)) {
605 if (ltype == LK_EXCLUSIVE)
606 vn_lock(dvp, LK_UPGRADE | LK_RETRY);
607 else /* if (ltype == LK_SHARED) */
608 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
609
610 /* Relock for the "." case may left us with reclaimed vnode. */
611 if ((dvp->v_iflag & VI_DOOMED) != 0) {
612 vrele(dvp);
613 return (SET_ERROR(ENOENT));
614 }
615 }
616 return (0);
617 }
618
619 /*
620 * Special case the handling of "..".
621 */
622 int
zfsctl_root_lookup(ap)623 zfsctl_root_lookup(ap)
624 struct vop_lookup_args /* {
625 struct vnode *a_dvp;
626 struct vnode **a_vpp;
627 struct componentname *a_cnp;
628 } */ *ap;
629 {
630 struct componentname *cnp = ap->a_cnp;
631 vnode_t *dvp = ap->a_dvp;
632 vnode_t **vpp = ap->a_vpp;
633 cred_t *cr = ap->a_cnp->cn_cred;
634 int flags = ap->a_cnp->cn_flags;
635 int lkflags = ap->a_cnp->cn_lkflags;
636 int nameiop = ap->a_cnp->cn_nameiop;
637 int err;
638 int ltype;
639
640 ASSERT(dvp->v_type == VDIR);
641
642 if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP)
643 return (SET_ERROR(ENOTSUP));
644
645 if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') {
646 err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK);
647 if (err == 0)
648 *vpp = dvp;
649 } else if ((flags & ISDOTDOT) != 0) {
650 err = vn_vget_ino_gen(dvp, zfsctl_fs_root_vnode, NULL,
651 lkflags, vpp);
652 } else if (strncmp(cnp->cn_nameptr, "snapshot", cnp->cn_namelen) == 0) {
653 err = zfsctl_snapdir_vnode(dvp->v_mount, NULL, lkflags, vpp);
654 } else {
655 err = SET_ERROR(ENOENT);
656 }
657 if (err != 0)
658 *vpp = NULL;
659 return (err);
660 }
661
662 static int
zfsctl_root_readdir(ap)663 zfsctl_root_readdir(ap)
664 struct vop_readdir_args /* {
665 struct vnode *a_vp;
666 struct uio *a_uio;
667 struct ucred *a_cred;
668 int *a_eofflag;
669 int *ncookies;
670 u_long **a_cookies;
671 } */ *ap;
672 {
673 struct dirent entry;
674 vnode_t *vp = ap->a_vp;
675 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
676 zfsctl_root_t *node = vp->v_data;
677 uio_t *uio = ap->a_uio;
678 int *eofp = ap->a_eofflag;
679 off_t dots_offset;
680 int error;
681
682 ASSERT(vp->v_type == VDIR);
683
684 error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, uio,
685 &dots_offset);
686 if (error != 0) {
687 if (error == ENAMETOOLONG) /* ran out of destination space */
688 error = 0;
689 return (error);
690 }
691 if (uio->uio_offset != dots_offset)
692 return (SET_ERROR(EINVAL));
693
694 CTASSERT(sizeof(node->snapdir->sn_name) <= sizeof(entry.d_name));
695 entry.d_fileno = node->snapdir->sn_id;
696 entry.d_type = DT_DIR;
697 strcpy(entry.d_name, node->snapdir->sn_name);
698 entry.d_namlen = strlen(entry.d_name);
699 entry.d_reclen = sizeof(entry);
700 error = vfs_read_dirent(ap, &entry, uio->uio_offset);
701 if (error != 0) {
702 if (error == ENAMETOOLONG)
703 error = 0;
704 return (SET_ERROR(error));
705 }
706 if (eofp != NULL)
707 *eofp = 1;
708 return (0);
709 }
710
711 static int
zfsctl_root_vptocnp(struct vop_vptocnp_args * ap)712 zfsctl_root_vptocnp(struct vop_vptocnp_args *ap)
713 {
714 static const char dotzfs_name[4] = ".zfs";
715 vnode_t *dvp;
716 int error;
717
718 if (*ap->a_buflen < sizeof (dotzfs_name))
719 return (SET_ERROR(ENOMEM));
720
721 error = vn_vget_ino_gen(ap->a_vp, zfsctl_fs_root_vnode, NULL,
722 LK_SHARED, &dvp);
723 if (error != 0)
724 return (SET_ERROR(error));
725
726 VOP_UNLOCK(dvp, 0);
727 *ap->a_vpp = dvp;
728 *ap->a_buflen -= sizeof (dotzfs_name);
729 bcopy(dotzfs_name, ap->a_buf + *ap->a_buflen, sizeof (dotzfs_name));
730 return (0);
731 }
732
733 static struct vop_vector zfsctl_ops_root = {
734 .vop_default = &default_vnodeops,
735 .vop_open = zfsctl_common_open,
736 .vop_close = zfsctl_common_close,
737 .vop_ioctl = VOP_EINVAL,
738 .vop_getattr = zfsctl_root_getattr,
739 .vop_access = zfsctl_common_access,
740 .vop_readdir = zfsctl_root_readdir,
741 .vop_lookup = zfsctl_root_lookup,
742 .vop_inactive = VOP_NULL,
743 .vop_reclaim = zfsctl_common_reclaim,
744 .vop_fid = zfsctl_common_fid,
745 .vop_print = zfsctl_common_print,
746 .vop_vptocnp = zfsctl_root_vptocnp,
747 };
748
749 static int
zfsctl_snapshot_zname(vnode_t * vp,const char * name,int len,char * zname)750 zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
751 {
752 objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
753
754 dmu_objset_name(os, zname);
755 if (strlen(zname) + 1 + strlen(name) >= len)
756 return (SET_ERROR(ENAMETOOLONG));
757 (void) strcat(zname, "@");
758 (void) strcat(zname, name);
759 return (0);
760 }
761
762 static int
zfsctl_snapshot_lookup(vnode_t * vp,const char * name,uint64_t * id)763 zfsctl_snapshot_lookup(vnode_t *vp, const char *name, uint64_t *id)
764 {
765 objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
766 int err;
767
768 err = dsl_dataset_snap_lookup(dmu_objset_ds(os), name, id);
769 return (err);
770 }
771
772 /*
773 * Given a vnode get a root vnode of a filesystem mounted on top of
774 * the vnode, if any. The root vnode is referenced and locked.
775 * If no filesystem is mounted then the orinal vnode remains referenced
776 * and locked. If any error happens the orinal vnode is unlocked and
777 * released.
778 */
779 static int
zfsctl_mounted_here(vnode_t ** vpp,int flags)780 zfsctl_mounted_here(vnode_t **vpp, int flags)
781 {
782 struct mount *mp;
783 int err;
784
785 ASSERT_VOP_LOCKED(*vpp, __func__);
786 ASSERT3S((*vpp)->v_type, ==, VDIR);
787
788 if ((mp = (*vpp)->v_mountedhere) != NULL) {
789 err = vfs_busy(mp, 0);
790 KASSERT(err == 0, ("vfs_busy(mp, 0) failed with %d", err));
791 KASSERT(vrefcnt(*vpp) > 1, ("unreferenced mountpoint"));
792 vput(*vpp);
793 err = VFS_ROOT(mp, flags, vpp);
794 vfs_unbusy(mp);
795 return (err);
796 }
797 return (EJUSTRETURN);
798 }
799
800 typedef struct {
801 const char *snap_name;
802 uint64_t snap_id;
803 } snapshot_setup_arg_t;
804
805 static void
zfsctl_snapshot_vnode_setup(vnode_t * vp,void * arg)806 zfsctl_snapshot_vnode_setup(vnode_t *vp, void *arg)
807 {
808 snapshot_setup_arg_t *ssa = arg;
809 sfs_node_t *node;
810
811 ASSERT_VOP_ELOCKED(vp, __func__);
812
813 node = sfs_alloc_node(sizeof(sfs_node_t),
814 ssa->snap_name, ZFSCTL_INO_SNAPDIR, ssa->snap_id);
815 zfsctl_common_vnode_setup(vp, node);
816
817 /* We have to support recursive locking. */
818 VN_LOCK_AREC(vp);
819 }
820
821 /*
822 * Lookup entry point for the 'snapshot' directory. Try to open the
823 * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
824 * Perform a mount of the associated dataset on top of the vnode.
825 */
826 /* ARGSUSED */
827 int
zfsctl_snapdir_lookup(ap)828 zfsctl_snapdir_lookup(ap)
829 struct vop_lookup_args /* {
830 struct vnode *a_dvp;
831 struct vnode **a_vpp;
832 struct componentname *a_cnp;
833 } */ *ap;
834 {
835 vnode_t *dvp = ap->a_dvp;
836 vnode_t **vpp = ap->a_vpp;
837 struct componentname *cnp = ap->a_cnp;
838 char name[NAME_MAX + 1];
839 char fullname[ZFS_MAX_DATASET_NAME_LEN];
840 char *mountpoint;
841 size_t mountpoint_len;
842 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
843 uint64_t snap_id;
844 int nameiop = cnp->cn_nameiop;
845 int lkflags = cnp->cn_lkflags;
846 int flags = cnp->cn_flags;
847 int err;
848
849 ASSERT(dvp->v_type == VDIR);
850
851 if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP)
852 return (SET_ERROR(ENOTSUP));
853
854 if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') {
855 err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK);
856 if (err == 0)
857 *vpp = dvp;
858 return (err);
859 }
860 if (flags & ISDOTDOT) {
861 err = vn_vget_ino_gen(dvp, zfsctl_root_vnode, NULL, lkflags,
862 vpp);
863 return (err);
864 }
865
866 if (cnp->cn_namelen >= sizeof(name))
867 return (SET_ERROR(ENAMETOOLONG));
868
869 strlcpy(name, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
870 err = zfsctl_snapshot_lookup(dvp, name, &snap_id);
871 if (err != 0)
872 return (SET_ERROR(ENOENT));
873
874 for (;;) {
875 snapshot_setup_arg_t ssa;
876
877 ssa.snap_name = name;
878 ssa.snap_id = snap_id;
879 err = sfs_vgetx(dvp->v_mount, LK_SHARED, ZFSCTL_INO_SNAPDIR,
880 snap_id, "zfs", &zfsctl_ops_snapshot,
881 zfsctl_snapshot_vnode_setup, &ssa, vpp);
882 if (err != 0)
883 return (err);
884
885 /* Check if a new vnode has just been created. */
886 if (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE)
887 break;
888
889 /*
890 * The vnode must be referenced at least by this thread and
891 * the mounted snapshot or the thread doing the mounting.
892 * There can be more references from concurrent lookups.
893 */
894 KASSERT(vrefcnt(*vpp) > 1, ("found unreferenced mountpoint"));
895
896 /*
897 * Check if a snapshot is already mounted on top of the vnode.
898 */
899 err = zfsctl_mounted_here(vpp, lkflags);
900 if (err != EJUSTRETURN)
901 return (err);
902
903 #ifdef INVARIANTS
904 /*
905 * If the vnode not covered yet, then the mount operation
906 * must be in progress.
907 */
908 VI_LOCK(*vpp);
909 KASSERT(((*vpp)->v_iflag & VI_MOUNT) != 0,
910 ("snapshot vnode not covered"));
911 VI_UNLOCK(*vpp);
912 #endif
913 vput(*vpp);
914
915 /*
916 * In this situation we can loop on uncontested locks and starve
917 * the thread doing the lengthy, non-trivial mount operation.
918 */
919 kern_yield(PRI_USER);
920 }
921
922 VERIFY0(zfsctl_snapshot_zname(dvp, name, sizeof(fullname), fullname));
923
924 mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) +
925 strlen("/" ZFS_CTLDIR_NAME "/snapshot/") + strlen(name) + 1;
926 mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
927 (void) snprintf(mountpoint, mountpoint_len,
928 "%s/" ZFS_CTLDIR_NAME "/snapshot/%s",
929 dvp->v_vfsp->mnt_stat.f_mntonname, name);
930
931 err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0);
932 kmem_free(mountpoint, mountpoint_len);
933 if (err == 0) {
934 /*
935 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
936 *
937 * This is where we lie about our v_vfsp in order to
938 * make .zfs/snapshot/<snapname> accessible over NFS
939 * without requiring manual mounts of <snapname>.
940 */
941 ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
942 VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
943
944 /* Clear the root flag (set via VFS_ROOT) as well. */
945 (*vpp)->v_vflag &= ~VV_ROOT;
946 }
947
948 if (err != 0)
949 *vpp = NULL;
950 return (err);
951 }
952
953 static int
zfsctl_snapdir_readdir(ap)954 zfsctl_snapdir_readdir(ap)
955 struct vop_readdir_args /* {
956 struct vnode *a_vp;
957 struct uio *a_uio;
958 struct ucred *a_cred;
959 int *a_eofflag;
960 int *ncookies;
961 u_long **a_cookies;
962 } */ *ap;
963 {
964 char snapname[ZFS_MAX_DATASET_NAME_LEN];
965 struct dirent entry;
966 vnode_t *vp = ap->a_vp;
967 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
968 uio_t *uio = ap->a_uio;
969 int *eofp = ap->a_eofflag;
970 off_t dots_offset;
971 int error;
972
973 ASSERT(vp->v_type == VDIR);
974
975 error = sfs_readdir_common(ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, ap, uio,
976 &dots_offset);
977 if (error != 0) {
978 if (error == ENAMETOOLONG) /* ran out of destination space */
979 error = 0;
980 return (error);
981 }
982
983 for (;;) {
984 uint64_t cookie;
985 uint64_t id;
986
987 cookie = uio->uio_offset - dots_offset;
988
989 dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
990 error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof(snapname),
991 snapname, &id, &cookie, NULL);
992 dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
993 if (error != 0) {
994 if (error == ENOENT) {
995 if (eofp != NULL)
996 *eofp = 1;
997 error = 0;
998 }
999 return (error);
1000 }
1001
1002 entry.d_fileno = id;
1003 entry.d_type = DT_DIR;
1004 strcpy(entry.d_name, snapname);
1005 entry.d_namlen = strlen(entry.d_name);
1006 entry.d_reclen = sizeof(entry);
1007 error = vfs_read_dirent(ap, &entry, uio->uio_offset);
1008 if (error != 0) {
1009 if (error == ENAMETOOLONG)
1010 error = 0;
1011 return (SET_ERROR(error));
1012 }
1013 uio->uio_offset = cookie + dots_offset;
1014 }
1015 /* NOTREACHED */
1016 }
1017
1018 /* ARGSUSED */
1019 static int
zfsctl_snapdir_getattr(ap)1020 zfsctl_snapdir_getattr(ap)
1021 struct vop_getattr_args /* {
1022 struct vnode *a_vp;
1023 struct vattr *a_vap;
1024 struct ucred *a_cred;
1025 } */ *ap;
1026 {
1027 vnode_t *vp = ap->a_vp;
1028 vattr_t *vap = ap->a_vap;
1029 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1030 dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os);
1031 sfs_node_t *node = vp->v_data;
1032 uint64_t snap_count;
1033 int err;
1034
1035 zfsctl_common_getattr(vp, vap);
1036 vap->va_ctime = dmu_objset_snap_cmtime(zfsvfs->z_os);
1037 vap->va_mtime = vap->va_ctime;
1038 vap->va_birthtime = vap->va_ctime;
1039 if (dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0) {
1040 err = zap_count(dmu_objset_pool(ds->ds_objset)->dp_meta_objset,
1041 dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count);
1042 if (err != 0)
1043 return (err);
1044 vap->va_nlink += snap_count;
1045 }
1046 vap->va_size = vap->va_nlink;
1047
1048 return (0);
1049 }
1050
1051 static struct vop_vector zfsctl_ops_snapdir = {
1052 .vop_default = &default_vnodeops,
1053 .vop_open = zfsctl_common_open,
1054 .vop_close = zfsctl_common_close,
1055 .vop_getattr = zfsctl_snapdir_getattr,
1056 .vop_access = zfsctl_common_access,
1057 .vop_readdir = zfsctl_snapdir_readdir,
1058 .vop_lookup = zfsctl_snapdir_lookup,
1059 .vop_reclaim = zfsctl_common_reclaim,
1060 .vop_fid = zfsctl_common_fid,
1061 .vop_print = zfsctl_common_print,
1062 };
1063
1064 static int
zfsctl_snapshot_inactive(ap)1065 zfsctl_snapshot_inactive(ap)
1066 struct vop_inactive_args /* {
1067 struct vnode *a_vp;
1068 struct thread *a_td;
1069 } */ *ap;
1070 {
1071 vnode_t *vp = ap->a_vp;
1072
1073 VERIFY(vrecycle(vp) == 1);
1074 return (0);
1075 }
1076
1077 static int
zfsctl_snapshot_reclaim(ap)1078 zfsctl_snapshot_reclaim(ap)
1079 struct vop_reclaim_args /* {
1080 struct vnode *a_vp;
1081 struct thread *a_td;
1082 } */ *ap;
1083 {
1084 vnode_t *vp = ap->a_vp;
1085 void *data = vp->v_data;
1086
1087 sfs_reclaim_vnode(vp);
1088 sfs_destroy_node(data);
1089 return (0);
1090 }
1091
1092 static int
zfsctl_snapshot_vptocnp(struct vop_vptocnp_args * ap)1093 zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap)
1094 {
1095 struct mount *mp;
1096 vnode_t *dvp;
1097 vnode_t *vp;
1098 sfs_node_t *node;
1099 size_t len;
1100 int locked;
1101 int error;
1102
1103 vp = ap->a_vp;
1104 node = vp->v_data;
1105 len = strlen(node->sn_name);
1106 if (*ap->a_buflen < len)
1107 return (SET_ERROR(ENOMEM));
1108
1109 /*
1110 * Prevent unmounting of the snapshot while the vnode lock
1111 * is not held. That is not strictly required, but allows
1112 * us to assert that an uncovered snapshot vnode is never
1113 * "leaked".
1114 */
1115 mp = vp->v_mountedhere;
1116 if (mp == NULL)
1117 return (SET_ERROR(ENOENT));
1118 error = vfs_busy(mp, 0);
1119 KASSERT(error == 0, ("vfs_busy(mp, 0) failed with %d", error));
1120
1121 /*
1122 * We can vput the vnode as we can now depend on the reference owned
1123 * by the busied mp. But we also need to hold the vnode, because
1124 * the reference may go after vfs_unbusy() which has to be called
1125 * before we can lock the vnode again.
1126 */
1127 locked = VOP_ISLOCKED(vp);
1128 vhold(vp);
1129 vput(vp);
1130
1131 /* Look up .zfs/snapshot, our parent. */
1132 error = zfsctl_snapdir_vnode(vp->v_mount, NULL, LK_SHARED, &dvp);
1133 if (error == 0) {
1134 VOP_UNLOCK(dvp, 0);
1135 *ap->a_vpp = dvp;
1136 *ap->a_buflen -= len;
1137 bcopy(node->sn_name, ap->a_buf + *ap->a_buflen, len);
1138 }
1139 vfs_unbusy(mp);
1140 vget(vp, locked | LK_VNHELD | LK_RETRY, curthread);
1141 return (error);
1142 }
1143
1144 /*
1145 * These VP's should never see the light of day. They should always
1146 * be covered.
1147 */
1148 static struct vop_vector zfsctl_ops_snapshot = {
1149 .vop_default = NULL, /* ensure very restricted access */
1150 .vop_inactive = zfsctl_snapshot_inactive,
1151 .vop_reclaim = zfsctl_snapshot_reclaim,
1152 .vop_vptocnp = zfsctl_snapshot_vptocnp,
1153 .vop_lock1 = vop_stdlock,
1154 .vop_unlock = vop_stdunlock,
1155 .vop_islocked = vop_stdislocked,
1156 .vop_advlockpurge = vop_stdadvlockpurge, /* called by vgone */
1157 .vop_print = zfsctl_common_print,
1158 };
1159
1160 int
zfsctl_lookup_objset(vfs_t * vfsp,uint64_t objsetid,zfsvfs_t ** zfsvfsp)1161 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
1162 {
1163 struct mount *mp;
1164 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1165 vnode_t *vp;
1166 int error;
1167
1168 ASSERT(zfsvfs->z_ctldir != NULL);
1169 *zfsvfsp = NULL;
1170 error = sfs_vnode_get(vfsp, LK_EXCLUSIVE,
1171 ZFSCTL_INO_SNAPDIR, objsetid, &vp);
1172 if (error == 0 && vp != NULL) {
1173 /*
1174 * XXX Probably need to at least reference, if not busy, the mp.
1175 */
1176 if (vp->v_mountedhere != NULL)
1177 *zfsvfsp = vp->v_mountedhere->mnt_data;
1178 vput(vp);
1179 }
1180 if (*zfsvfsp == NULL)
1181 return (SET_ERROR(EINVAL));
1182 return (0);
1183 }
1184
1185 /*
1186 * Unmount any snapshots for the given filesystem. This is called from
1187 * zfs_umount() - if we have a ctldir, then go through and unmount all the
1188 * snapshots.
1189 */
1190 int
zfsctl_umount_snapshots(vfs_t * vfsp,int fflags,cred_t * cr)1191 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
1192 {
1193 char snapname[ZFS_MAX_DATASET_NAME_LEN];
1194 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1195 struct mount *mp;
1196 vnode_t *dvp;
1197 vnode_t *vp;
1198 sfs_node_t *node;
1199 sfs_node_t *snap;
1200 uint64_t cookie;
1201 int error;
1202
1203 ASSERT(zfsvfs->z_ctldir != NULL);
1204
1205 cookie = 0;
1206 for (;;) {
1207 uint64_t id;
1208
1209 dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
1210 error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof(snapname),
1211 snapname, &id, &cookie, NULL);
1212 dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
1213 if (error != 0) {
1214 if (error == ENOENT)
1215 error = 0;
1216 break;
1217 }
1218
1219 for (;;) {
1220 error = sfs_vnode_get(vfsp, LK_EXCLUSIVE,
1221 ZFSCTL_INO_SNAPDIR, id, &vp);
1222 if (error != 0 || vp == NULL)
1223 break;
1224
1225 mp = vp->v_mountedhere;
1226
1227 /*
1228 * v_mountedhere being NULL means that the
1229 * (uncovered) vnode is in a transient state
1230 * (mounting or unmounting), so loop until it
1231 * settles down.
1232 */
1233 if (mp != NULL)
1234 break;
1235 vput(vp);
1236 }
1237 if (error != 0)
1238 break;
1239 if (vp == NULL)
1240 continue; /* no mountpoint, nothing to do */
1241
1242 /*
1243 * The mount-point vnode is kept locked to avoid spurious EBUSY
1244 * from a concurrent umount.
1245 * The vnode lock must have recursive locking enabled.
1246 */
1247 vfs_ref(mp);
1248 error = dounmount(mp, fflags, curthread);
1249 KASSERT_IMPLY(error == 0, vrefcnt(vp) == 1,
1250 ("extra references after unmount"));
1251 vput(vp);
1252 if (error != 0)
1253 break;
1254 }
1255 KASSERT_IMPLY((fflags & MS_FORCE) != 0, error == 0,
1256 ("force unmounting failed"));
1257 return (error);
1258 }
1259
1260 #endif /* __FreeBSD__ */
1261
1262 #ifdef __NetBSD__
1263
1264 #include <sys/malloc.h>
1265 #include <sys/pathname.h>
1266 #include <miscfs/genfs/genfs.h>
1267 #include <sys/zfs_context.h>
1268 #include <sys/zfs_ctldir.h>
1269 #include <sys/dsl_dataset.h>
1270 #include <sys/zap.h>
1271
1272 struct zfsctl_root {
1273 timestruc_t zc_cmtime;
1274 };
1275
1276 struct sfs_node_key {
1277 uint64_t parent_id;
1278 uint64_t id;
1279 };
1280 struct sfs_node {
1281 struct sfs_node_key sn_key;
1282 #define sn_parent_id sn_key.parent_id
1283 #define sn_id sn_key.id
1284 lwp_t *sn_mounting;
1285 };
1286
1287 #define ZFS_SNAPDIR_NAME "snapshot"
1288
1289 #define VTOSFS(vp) ((struct sfs_node *)((vp)->v_data))
1290
1291 #define SFS_NODE_ASSERT(vp) \
1292 do { \
1293 struct sfs_node *np = VTOSFS(vp); \
1294 ASSERT((vp)->v_op == zfs_sfsop_p); \
1295 ASSERT((vp)->v_type == VDIR); \
1296 } while (/*CONSTCOND*/ 0)
1297
1298 static int (**zfs_sfsop_p)(void *);
1299
1300 /*
1301 * Mount a snapshot. Cannot use do_sys_umount() as it
1302 * doesn't allow its "path" argument from SYSSPACE.
1303 */
1304 static int
sfs_snapshot_mount(vnode_t * vp,const char * snapname)1305 sfs_snapshot_mount(vnode_t *vp, const char *snapname)
1306 {
1307 struct sfs_node *node = VTOSFS(vp);
1308 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1309 vfs_t *vfsp;
1310 char *path, *osname;
1311 int error;
1312 extern int zfs_domount(vfs_t *, char *);
1313
1314 path = PNBUF_GET();
1315 osname = PNBUF_GET();
1316
1317 dmu_objset_name(zfsvfs->z_os, path);
1318 snprintf(osname, MAXPATHLEN, "%s@%s", path, snapname);
1319 snprintf(path, MAXPATHLEN,
1320 "%s/" ZFS_CTLDIR_NAME "/" ZFS_SNAPDIR_NAME "/%s",
1321 vp->v_vfsp->mnt_stat.f_mntonname, snapname);
1322
1323 vfsp = vfs_mountalloc(vp->v_vfsp->mnt_op, vp);
1324 if (vfsp == NULL) {
1325 error = ENOMEM;
1326 goto out;
1327 }
1328 vfsp->mnt_op->vfs_refcount++;
1329 vfsp->mnt_stat.f_owner = 0;
1330 vfsp->mnt_flag = MNT_RDONLY | MNT_NOSUID | MNT_IGNORE;
1331
1332 mutex_enter(vfsp->mnt_updating);
1333
1334 error = zfs_domount(vfsp, osname);
1335 if (error)
1336 goto out;
1337
1338 /* Set f_fsidx from parent to cheat NFSD. */
1339 vfsp->mnt_stat.f_fsidx = vp->v_vfsp->mnt_stat.f_fsidx;
1340
1341 strlcpy(vfsp->mnt_stat.f_mntfromname, osname,
1342 sizeof(vfsp->mnt_stat.f_mntfromname));
1343 set_statvfs_info(path, UIO_SYSSPACE, vfsp->mnt_stat.f_mntfromname,
1344 UIO_SYSSPACE, vfsp->mnt_op->vfs_name, vfsp, curlwp);
1345
1346 error = vfs_set_lowermount(vfsp, vp->v_vfsp);
1347 if (error)
1348 goto out;
1349
1350 mountlist_append(vfsp);
1351 vref(vp);
1352 vp->v_mountedhere = vfsp;
1353
1354 mutex_exit(vfsp->mnt_updating);
1355 (void) VFS_STATVFS(vfsp, &vfsp->mnt_stat);
1356
1357 out:;
1358 if (error && vfsp) {
1359 mutex_exit(vfsp->mnt_updating);
1360 vfs_rele(vfsp);
1361 }
1362 PNBUF_PUT(osname);
1363 PNBUF_PUT(path);
1364
1365 return error;
1366 }
1367
1368 static int
sfs_lookup_snapshot(vnode_t * dvp,struct componentname * cnp,vnode_t ** vpp)1369 sfs_lookup_snapshot(vnode_t *dvp, struct componentname *cnp, vnode_t **vpp)
1370 {
1371 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
1372 vnode_t *vp;
1373 struct sfs_node *node;
1374 struct sfs_node_key key;
1375 char snapname[ZFS_MAX_DATASET_NAME_LEN];
1376 int error;
1377
1378 /* Retrieve the snapshot object id and the to be mounted on vnode. */
1379 if (cnp->cn_namelen >= sizeof(snapname))
1380 return ENOENT;
1381
1382 strlcpy(snapname, cnp->cn_nameptr, cnp->cn_namelen + 1);
1383 error = dsl_dataset_snap_lookup( dmu_objset_ds(zfsvfs->z_os),
1384 snapname, &key.id);
1385 if (error)
1386 return error;
1387 key.parent_id = ZFSCTL_INO_SNAPDIR;
1388 error = vcache_get(zfsvfs->z_vfs, &key, sizeof(key), vpp);
1389 if (error)
1390 return error;
1391
1392 /* Handle case where the vnode is currently mounting. */
1393 vp = *vpp;
1394 mutex_enter(vp->v_interlock);
1395 node = VTOSFS(vp);
1396 if (node->sn_mounting) {
1397 if (node->sn_mounting == curlwp)
1398 error = 0;
1399 else
1400 error = ERESTART;
1401 mutex_exit(vp->v_interlock);
1402 if (error)
1403 yield();
1404 return error;
1405 }
1406
1407 /* If not yet mounted mount the snapshot. */
1408 if (vp->v_mountedhere == NULL) {
1409 ASSERT(node->sn_mounting == NULL);
1410 node->sn_mounting = curlwp;
1411 mutex_exit(vp->v_interlock);
1412
1413 VOP_UNLOCK(dvp, 0);
1414 error = sfs_snapshot_mount(vp, snapname);
1415 if (vn_lock(dvp, LK_EXCLUSIVE) != 0) {
1416 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
1417 error = ENOENT;
1418 }
1419
1420 mutex_enter(vp->v_interlock);
1421 if ((node = VTOSFS(vp)))
1422 node->sn_mounting = NULL;
1423 mutex_exit(vp->v_interlock);
1424
1425 if (error) {
1426 vrele(vp);
1427 *vpp = NULL;
1428 return error;
1429 }
1430 } else
1431 mutex_exit(vp->v_interlock);
1432
1433 /* Return the mounted root rather than the covered mount point. */
1434 ASSERT(vp->v_mountedhere);
1435 error = VFS_ROOT(vp->v_mountedhere, LK_EXCLUSIVE, vpp);
1436 vrele(vp);
1437 if (error)
1438 return error;
1439
1440 /*
1441 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>
1442 *
1443 * Here we make .zfs/snapshot/<snapname> accessible over NFS
1444 * without requiring manual mounts of <snapname>.
1445 */
1446 if (((*vpp)->v_vflag & VV_ROOT)) {
1447 ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
1448 VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
1449 (*vpp)->v_vflag &= ~VV_ROOT;
1450 }
1451 VOP_UNLOCK(*vpp, 0);
1452
1453 return 0;
1454 }
1455
1456 static int
sfs_lookup(void * v)1457 sfs_lookup(void *v)
1458 {
1459 struct vop_lookup_v2_args /* {
1460 struct vnode *a_dvp;
1461 struct vnode **a_vpp;
1462 struct componentname *a_cnp;
1463 } */ *ap = v;
1464 vnode_t *dvp = ap->a_dvp;
1465 vnode_t **vpp = ap->a_vpp;
1466 struct componentname *cnp = ap->a_cnp;
1467 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
1468 struct sfs_node *dnode = VTOSFS(dvp);
1469 int error;
1470
1471 SFS_NODE_ASSERT(dvp);
1472 ZFS_ENTER(zfsvfs);
1473
1474 /*
1475 * No CREATE, DELETE or RENAME.
1476 */
1477 if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop != LOOKUP) {
1478 ZFS_EXIT(zfsvfs);
1479
1480 return ENOTSUP;
1481 }
1482
1483 /*
1484 * Handle DOT and DOTDOT.
1485 */
1486 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
1487 vref(dvp);
1488 *vpp = dvp;
1489 ZFS_EXIT(zfsvfs);
1490
1491 return 0;
1492 }
1493 if ((cnp->cn_flags & ISDOTDOT)) {
1494 if (dnode->sn_parent_id == 0) {
1495 error = vcache_get(zfsvfs->z_vfs,
1496 &zfsvfs->z_root, sizeof(zfsvfs->z_root), vpp);
1497 } else if (dnode->sn_parent_id == ZFSCTL_INO_ROOT) {
1498 error = zfsctl_root(zfsvfs, vpp);
1499 } else if (dnode->sn_parent_id == ZFSCTL_INO_SNAPDIR) {
1500 error = zfsctl_snapshot(zfsvfs, vpp);
1501 } else {
1502 error = ENOENT;
1503 }
1504 ZFS_EXIT(zfsvfs);
1505
1506 return error;
1507 }
1508
1509 /*
1510 * Lookup in ".zfs".
1511 */
1512 if (dnode->sn_id == ZFSCTL_INO_ROOT) {
1513 if (cnp->cn_namelen == strlen(ZFS_SNAPDIR_NAME) &&
1514 strncmp(cnp->cn_nameptr, ZFS_SNAPDIR_NAME,
1515 cnp->cn_namelen) == 0) {
1516 error = zfsctl_snapshot(zfsvfs, vpp);
1517 } else {
1518 error = ENOENT;
1519 }
1520 ZFS_EXIT(zfsvfs);
1521
1522 return error;
1523 }
1524
1525 /*
1526 * Lookup in ".zfs/snapshot".
1527 */
1528 if (dnode->sn_id == ZFSCTL_INO_SNAPDIR) {
1529 error = sfs_lookup_snapshot(dvp, cnp, vpp);
1530 ZFS_EXIT(zfsvfs);
1531
1532 return error;
1533 }
1534
1535 vprint("sfs_lookup: unexpected node for lookup", dvp);
1536 ZFS_EXIT(zfsvfs);
1537
1538 return ENOENT;
1539 }
1540
1541 static int
sfs_open(void * v)1542 sfs_open(void *v)
1543 {
1544 struct vop_open_args /* {
1545 struct vnode *a_vp;
1546 int a_mode;
1547 kauth_cred_t a_cred;
1548 } */ *ap = v;
1549 zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data;
1550 int error = 0;
1551
1552 SFS_NODE_ASSERT(ap->a_vp);
1553 ZFS_ENTER(zfsvfs);
1554
1555 if (ap->a_mode & FWRITE)
1556 error = EACCES;
1557
1558 ZFS_EXIT(zfsvfs);
1559
1560 return error;
1561 }
1562
1563 static int
sfs_close(void * v)1564 sfs_close(void *v)
1565 {
1566 struct vop_close_args /* {
1567 struct vnode *a_vp;
1568 int a_mode;
1569 kauth_cred_t a_cred;
1570 } */ *ap = v;
1571 zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data;
1572
1573 SFS_NODE_ASSERT(ap->a_vp);
1574 ZFS_ENTER(zfsvfs);
1575
1576 ZFS_EXIT(zfsvfs);
1577
1578 return 0;
1579 }
1580
1581 static int
sfs_access(void * v)1582 sfs_access(void *v)
1583 {
1584 struct vop_access_args /* {
1585 struct vnode *a_vp;
1586 int a_mode;
1587 kauth_cred_t a_cred;
1588 } */ *ap = v;
1589 zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data;
1590 int error = 0;
1591
1592 SFS_NODE_ASSERT(ap->a_vp);
1593 ZFS_ENTER(zfsvfs);
1594
1595 if (ap->a_accmode & FWRITE)
1596 error = EACCES;
1597
1598 ZFS_EXIT(zfsvfs);
1599
1600 return error;
1601 }
1602
1603 static int
sfs_getattr(void * v)1604 sfs_getattr(void *v)
1605 {
1606 struct vop_getattr_args /* {
1607 struct vnode *a_vp;
1608 struct vattr *a_vap;
1609 kauth_cred_t a_cred;
1610 } */ *ap = v;
1611 vnode_t *vp = ap->a_vp;
1612 struct sfs_node *node = VTOSFS(vp);
1613 struct vattr *vap = ap->a_vap;
1614 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1615 dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os);
1616 timestruc_t now;
1617 uint64_t snap_count;
1618 int error;
1619
1620 SFS_NODE_ASSERT(vp);
1621 ZFS_ENTER(zfsvfs);
1622
1623 vap->va_type = VDIR;
1624 vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
1625 S_IROTH | S_IXOTH;
1626 vap->va_nlink = 2;
1627 vap->va_uid = 0;
1628 vap->va_gid = 0;
1629 vap->va_fsid = vp->v_vfsp->mnt_stat.f_fsid;
1630 vap->va_fileid = node->sn_id;
1631 vap->va_size = 0;
1632 vap->va_blocksize = 0;
1633 gethrestime(&now);
1634 vap->va_atime = now;
1635 vap->va_ctime = zfsvfs->z_ctldir->zc_cmtime;
1636 vap->va_mtime = vap->va_ctime;
1637 vap->va_birthtime = vap->va_ctime;
1638 vap->va_gen = 0;
1639 vap->va_flags = 0;
1640 vap->va_rdev = 0;
1641 vap->va_bytes = 0;
1642 vap->va_filerev = 0;
1643
1644 switch (node->sn_id){
1645 case ZFSCTL_INO_ROOT:
1646 vap->va_nlink += 1; /* snapdir */
1647 vap->va_size = vap->va_nlink;
1648 break;
1649 case ZFSCTL_INO_SNAPDIR:
1650 if (dsl_dataset_phys(ds)->ds_snapnames_zapobj) {
1651 error = zap_count(
1652 dmu_objset_pool(ds->ds_objset)->dp_meta_objset,
1653 dsl_dataset_phys(ds)->ds_snapnames_zapobj,
1654 &snap_count);
1655 if (error)
1656 return error;
1657 vap->va_nlink += snap_count;
1658 }
1659 vap->va_size = vap->va_nlink;
1660 break;
1661 }
1662
1663 ZFS_EXIT(zfsvfs);
1664
1665 return 0;
1666 }
1667
1668 static int
sfs_readdir_one(struct vop_readdir_args * ap,struct dirent * dp,const char * name,ino_t ino,off_t * offp)1669 sfs_readdir_one(struct vop_readdir_args *ap, struct dirent *dp,
1670 const char *name, ino_t ino, off_t *offp)
1671 {
1672 int error;
1673
1674 dp->d_fileno = ino;
1675 dp->d_type = DT_DIR;
1676 strlcpy(dp->d_name, name, sizeof(dp->d_name));
1677 dp->d_namlen = strlen(dp->d_name);
1678 dp->d_reclen = _DIRENT_SIZE(dp);
1679
1680 if (ap->a_uio->uio_resid < dp->d_reclen)
1681 return ENAMETOOLONG;
1682 if (ap->a_uio->uio_offset > *offp) {
1683 *offp += dp->d_reclen;
1684 return 0;
1685 }
1686
1687 error = uiomove(dp, dp->d_reclen, UIO_READ, ap->a_uio);
1688 if (error)
1689 return error;
1690 if (ap->a_ncookies)
1691 (*ap->a_cookies)[(*ap->a_ncookies)++] = *offp;
1692 *offp += dp->d_reclen;
1693
1694 return 0;
1695 }
1696
1697 static int
sfs_readdir(void * v)1698 sfs_readdir(void *v)
1699 {
1700 struct vop_readdir_args /* {
1701 struct vnode *a_vp;
1702 struct uio *a_uio;
1703 kauth_cred_t a_cred;
1704 int *a_eofflag;
1705 off_t **a_cookies;
1706 int *a_ncookies;
1707 } */ *ap = v;
1708 vnode_t *vp = ap->a_vp;
1709 struct sfs_node *node = VTOSFS(vp);
1710 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1711 struct dirent *dp;
1712 uint64_t parent;
1713 off_t offset;
1714 int error, ncookies;
1715
1716 SFS_NODE_ASSERT(ap->a_vp);
1717 ZFS_ENTER(zfsvfs);
1718
1719 parent = node->sn_parent_id == 0 ? zfsvfs->z_root : node->sn_parent_id;
1720 dp = kmem_alloc(sizeof(*dp), KM_SLEEP);
1721 if (ap->a_ncookies) {
1722 ncookies = ap->a_uio->uio_resid / _DIRENT_MINSIZE(dp);
1723 *ap->a_ncookies = 0;
1724 *ap->a_cookies = malloc(ncookies * sizeof (off_t),
1725 M_TEMP, M_WAITOK);
1726 }
1727
1728 offset = 0;
1729 error = sfs_readdir_one(ap, dp, ".", node->sn_id, &offset);
1730 if (error == 0)
1731 error = sfs_readdir_one(ap, dp, "..", parent, &offset);
1732 if (error == 0 && node->sn_id == ZFSCTL_INO_ROOT) {
1733 error = sfs_readdir_one(ap, dp, ZFS_SNAPDIR_NAME,
1734 ZFSCTL_INO_SNAPDIR, &offset);
1735 } else if (error == 0 && node->sn_id == ZFSCTL_INO_SNAPDIR) {
1736 char snapname[ZFS_MAX_DATASET_NAME_LEN];
1737 uint64_t cookie, id;
1738
1739 cookie = 0;
1740 for (;;) {
1741 dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os),
1742 FTAG);
1743 error = dmu_snapshot_list_next(zfsvfs->z_os,
1744 sizeof(snapname), snapname, &id, &cookie, NULL);
1745 dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os),
1746 FTAG);
1747 if (error) {
1748 if (error == ENOENT)
1749 error = 0;
1750 break;
1751 }
1752 error = sfs_readdir_one(ap, dp, snapname, id, &offset);
1753 if (error)
1754 break;
1755 }
1756 }
1757
1758 if (ap->a_eofflag && error == 0)
1759 *ap->a_eofflag = 1;
1760
1761 if (error == ENAMETOOLONG)
1762 error = 0;
1763
1764 if (ap->a_ncookies && error) {
1765 free(*ap->a_cookies, M_TEMP);
1766 *ap->a_ncookies = 0;
1767 *ap->a_cookies = NULL;
1768 }
1769 kmem_free(dp, sizeof(*dp));
1770
1771 ZFS_EXIT(zfsvfs);
1772
1773 return error;
1774 }
1775
1776 static int
sfs_inactive(void * v)1777 sfs_inactive(void *v)
1778 {
1779 struct vop_inactive_v2_args /* {
1780 struct vnode *a_vp;
1781 bool *a_recycle;
1782 } */ *ap = v;
1783 vnode_t *vp = ap->a_vp;
1784 struct sfs_node *node = VTOSFS(vp);
1785
1786 SFS_NODE_ASSERT(vp);
1787
1788 *ap->a_recycle = (node->sn_parent_id == ZFSCTL_INO_SNAPDIR);
1789
1790 return 0;
1791 }
1792
1793 static int
sfs_reclaim(void * v)1794 sfs_reclaim(void *v)
1795 {
1796 struct vop_reclaim_v2_args /* {
1797 struct vnode *a_vp;
1798 } */ *ap = v;
1799 vnode_t *vp = ap->a_vp;
1800 struct sfs_node *node = VTOSFS(vp);
1801
1802 SFS_NODE_ASSERT(ap->a_vp);
1803
1804 vp->v_data = NULL;
1805 VOP_UNLOCK(vp, 0);
1806
1807 kmem_free(node, sizeof(*node));
1808
1809 return 0;
1810 }
1811
1812 static int
sfs_print(void * v)1813 sfs_print(void *v)
1814 {
1815 struct vop_print_args /* {
1816 struct vnode *a_vp;
1817 } */ *ap = v;
1818 struct sfs_node *node = VTOSFS(ap->a_vp);
1819
1820 SFS_NODE_ASSERT(ap->a_vp);
1821
1822 printf("\tid %" PRIu64 ", parent %" PRIu64 "\n",
1823 node->sn_id, node->sn_parent_id);
1824
1825 return 0;
1826 }
1827
1828 const struct vnodeopv_entry_desc zfs_sfsop_entries[] = {
1829 { &vop_default_desc, vn_default_error },
1830 { &vop_parsepath_desc, genfs_parsepath },
1831 { &vop_lookup_desc, sfs_lookup },
1832 { &vop_open_desc, sfs_open },
1833 { &vop_close_desc, sfs_close },
1834 { &vop_access_desc, sfs_access },
1835 { &vop_getattr_desc, sfs_getattr },
1836 { &vop_lock_desc, genfs_lock },
1837 { &vop_unlock_desc, genfs_unlock },
1838 { &vop_readdir_desc, sfs_readdir },
1839 { &vop_inactive_desc, sfs_inactive },
1840 { &vop_reclaim_desc, sfs_reclaim },
1841 { &vop_seek_desc, genfs_seek },
1842 { &vop_putpages_desc, genfs_null_putpages },
1843 { &vop_islocked_desc, genfs_islocked },
1844 { &vop_print_desc, sfs_print },
1845 { &vop_pathconf_desc, genfs_pathconf },
1846 { NULL, NULL }
1847 };
1848
1849 const struct vnodeopv_desc zfs_sfsop_opv_desc =
1850 { &zfs_sfsop_p, zfs_sfsop_entries };
1851
1852 void
zfsctl_init(void)1853 zfsctl_init(void)
1854 {
1855 }
1856
1857 void
zfsctl_fini(void)1858 zfsctl_fini(void)
1859 {
1860 }
1861
1862 int
zfsctl_loadvnode(vfs_t * vfsp,vnode_t * vp,const void * key,size_t key_len,const void ** new_key)1863 zfsctl_loadvnode(vfs_t *vfsp, vnode_t *vp,
1864 const void *key, size_t key_len, const void **new_key)
1865 {
1866 struct sfs_node_key node_key;
1867 struct sfs_node *node;
1868
1869 if (key_len != sizeof(node_key))
1870 return EINVAL;
1871 if ((vfsp->mnt_iflag & IMNT_UNMOUNT))
1872 return ENOENT;
1873
1874 memcpy(&node_key, key, key_len);
1875
1876 node = kmem_alloc(sizeof(*node), KM_SLEEP);
1877
1878 node->sn_mounting = NULL;
1879 node->sn_key = node_key;
1880
1881 vp->v_data = node;
1882 vp->v_op = zfs_sfsop_p;
1883 vp->v_tag = VT_ZFS;
1884 vp->v_type = VDIR;
1885 uvm_vnp_setsize(vp, 0);
1886
1887 *new_key = &node->sn_key;
1888
1889 return 0;
1890 }
1891
1892 int
zfsctl_vptofh(vnode_t * vp,fid_t * fidp,size_t * fh_size)1893 zfsctl_vptofh(vnode_t *vp, fid_t *fidp, size_t *fh_size)
1894 {
1895 struct sfs_node *node = VTOSFS(vp);
1896 uint64_t object = node->sn_id;
1897 zfid_short_t *zfid = (zfid_short_t *)fidp;
1898 int i;
1899
1900 SFS_NODE_ASSERT(vp);
1901
1902 if (*fh_size < SHORT_FID_LEN) {
1903 *fh_size = SHORT_FID_LEN;
1904 return SET_ERROR(E2BIG);
1905 }
1906 *fh_size = SHORT_FID_LEN;
1907
1908 zfid->zf_len = SHORT_FID_LEN;
1909 for (i = 0; i < sizeof(zfid->zf_object); i++)
1910 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
1911
1912 /* .zfs nodes always have a generation number of 0 */
1913 for (i = 0; i < sizeof(zfid->zf_gen); i++)
1914 zfid->zf_gen[i] = 0;
1915
1916 return 0;
1917 }
1918
1919 /*
1920 * Return the ".zfs" vnode.
1921 */
1922 int
zfsctl_root(zfsvfs_t * zfsvfs,vnode_t ** vpp)1923 zfsctl_root(zfsvfs_t *zfsvfs, vnode_t **vpp)
1924 {
1925 struct sfs_node_key key = {
1926 .parent_id = 0,
1927 .id = ZFSCTL_INO_ROOT
1928 };
1929
1930 return vcache_get(zfsvfs->z_vfs, &key, sizeof(key), vpp);
1931 }
1932
1933 /*
1934 * Return the ".zfs/snapshot" vnode.
1935 */
1936 int
zfsctl_snapshot(zfsvfs_t * zfsvfs,vnode_t ** vpp)1937 zfsctl_snapshot(zfsvfs_t *zfsvfs, vnode_t **vpp)
1938 {
1939 struct sfs_node_key key = {
1940 .parent_id = ZFSCTL_INO_ROOT,
1941 .id = ZFSCTL_INO_SNAPDIR
1942 };
1943
1944 return vcache_get(zfsvfs->z_vfs, &key, sizeof(key), vpp);
1945 }
1946
1947 void
zfsctl_create(zfsvfs_t * zfsvfs)1948 zfsctl_create(zfsvfs_t *zfsvfs)
1949 {
1950 vnode_t *vp;
1951 struct zfsctl_root *zc;
1952 uint64_t crtime[2];
1953
1954 zc = kmem_alloc(sizeof(*zc), KM_SLEEP);
1955
1956 VERIFY(0 == VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &vp));
1957 VERIFY(0 == sa_lookup(VTOZ(vp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
1958 &crtime, sizeof(crtime)));
1959 vput(vp);
1960
1961 ZFS_TIME_DECODE(&zc->zc_cmtime, crtime);
1962
1963 ASSERT(zfsvfs->z_ctldir == NULL);
1964 zfsvfs->z_ctldir = zc;
1965 }
1966
1967 void
zfsctl_destroy(zfsvfs_t * zfsvfs)1968 zfsctl_destroy(zfsvfs_t *zfsvfs)
1969 {
1970 struct zfsctl_root *zc = zfsvfs->z_ctldir;
1971
1972 ASSERT(zfsvfs->z_ctldir);
1973 zfsvfs->z_ctldir = NULL;
1974 kmem_free(zc, sizeof(*zc));
1975 }
1976
1977 int
zfsctl_lookup_objset(vfs_t * vfsp,uint64_t objsetid,zfsvfs_t ** zfsvfsp)1978 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
1979 {
1980 struct sfs_node_key key = {
1981 .parent_id = ZFSCTL_INO_SNAPDIR,
1982 .id = objsetid
1983 };
1984 vnode_t *vp;
1985 int error;
1986
1987 *zfsvfsp = NULL;
1988 error = vcache_get(vfsp, &key, sizeof(key), &vp);
1989 if (error == 0) {
1990 if (vp->v_mountedhere)
1991 *zfsvfsp = vp->v_mountedhere->mnt_data;
1992 vrele(vp);
1993 }
1994 if (*zfsvfsp == NULL)
1995 return SET_ERROR(EINVAL);
1996 return 0;
1997 }
1998
1999 int
zfsctl_umount_snapshots(vfs_t * vfsp,int fflags,cred_t * cr)2000 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
2001 {
2002 char snapname[ZFS_MAX_DATASET_NAME_LEN];
2003 zfsvfs_t *zfsvfs = vfsp->vfs_data;
2004 struct mount *mp;
2005 vnode_t *vp;
2006 struct sfs_node_key key;
2007 uint64_t cookie;
2008 int error;
2009
2010 ASSERT(zfsvfs->z_ctldir);
2011
2012 cookie = 0;
2013 key.parent_id = ZFSCTL_INO_SNAPDIR;
2014 for (;;) {
2015 dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
2016 error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof(snapname),
2017 snapname, &key.id, &cookie, NULL);
2018 dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
2019 if (error) {
2020 if (error == ENOENT)
2021 error = 0;
2022 break;
2023 }
2024
2025 error = vcache_get(zfsvfs->z_vfs, &key, sizeof(key), &vp);
2026 if (error == ENOENT)
2027 continue;
2028 else if (error)
2029 break;
2030
2031 mp = vp->v_mountedhere;
2032 if (mp == NULL) {
2033 vrele(vp);
2034 continue;
2035 }
2036
2037 error = dounmount(mp, fflags, curthread);
2038 vrele(vp);
2039 if (error)
2040 break;
2041 }
2042 ASSERT((fflags & MS_FORCE) == 0 || error == 0);
2043
2044 return (error);
2045 }
2046
2047 boolean_t
zfsctl_is_node(vnode_t * vp)2048 zfsctl_is_node(vnode_t *vp)
2049 {
2050
2051 return (vp->v_op == zfs_sfsop_p);
2052 }
2053 #endif /* __NetBSD__ */
2054