1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
24  * All rights reserved.
25  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
26  * Copyright (c) 2014 Integros [integros.com]
27  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
28  */
29 
30 /* Portions Copyright 2010 Robert Milkowski */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/sysmacros.h>
37 #include <sys/kmem.h>
38 #include <sys/acl.h>
39 #include <sys/vnode.h>
40 #include <sys/vfs.h>
41 #include <sys/mntent.h>
42 #include <sys/mount.h>
43 #include <sys/cmn_err.h>
44 #include <sys/zfs_znode.h>
45 #include <sys/zfs_vnops.h>
46 #include <sys/zfs_dir.h>
47 #include <sys/zil.h>
48 #include <sys/fs/zfs.h>
49 #include <sys/dmu.h>
50 #include <sys/dsl_prop.h>
51 #include <sys/dsl_dataset.h>
52 #include <sys/dsl_deleg.h>
53 #include <sys/spa.h>
54 #include <sys/zap.h>
55 #include <sys/sa.h>
56 #include <sys/sa_impl.h>
57 #include <sys/policy.h>
58 #include <sys/atomic.h>
59 #include <sys/zfs_ioctl.h>
60 #include <sys/zfs_ctldir.h>
61 #include <sys/zfs_fuid.h>
62 #include <sys/sunddi.h>
63 #include <sys/dmu_objset.h>
64 #include <sys/dsl_dir.h>
65 #include <sys/spa_boot.h>
66 #include <sys/jail.h>
67 #include <ufs/ufs/quota.h>
68 #include <sys/zfs_quota.h>
69 
70 #include "zfs_comutil.h"
71 
72 #ifndef	MNTK_VMSETSIZE_BUG
73 #define	MNTK_VMSETSIZE_BUG	0
74 #endif
75 #ifndef	MNTK_NOMSYNC
76 #define	MNTK_NOMSYNC	8
77 #endif
78 
79 struct mtx zfs_debug_mtx;
80 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
81 
82 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system");
83 
84 int zfs_super_owner;
85 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0,
86 	"File system owners can perform privileged operation on file systems");
87 
88 int zfs_debug_level;
89 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
90 	"Debug level");
91 
92 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions");
93 static int zfs_version_acl = ZFS_ACL_VERSION;
94 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
95 	"ZFS_ACL_VERSION");
96 static int zfs_version_spa = SPA_VERSION;
97 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0,
98 	"SPA_VERSION");
99 static int zfs_version_zpl = ZPL_VERSION;
100 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
101 	"ZPL_VERSION");
102 
103 #if __FreeBSD_version >= 1400018
104 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg,
105     bool *mp_busy);
106 #else
107 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg);
108 #endif
109 static int zfs_mount(vfs_t *vfsp);
110 static int zfs_umount(vfs_t *vfsp, int fflag);
111 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp);
112 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp);
113 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
114 static int zfs_sync(vfs_t *vfsp, int waitfor);
115 #if __FreeBSD_version >= 1300098
116 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
117     struct ucred **credanonp, int *numsecflavors, int *secflavors);
118 #else
119 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
120     struct ucred **credanonp, int *numsecflavors, int **secflavors);
121 #endif
122 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp);
123 static void zfs_freevfs(vfs_t *vfsp);
124 
125 struct vfsops zfs_vfsops = {
126 	.vfs_mount =		zfs_mount,
127 	.vfs_unmount =		zfs_umount,
128 #if __FreeBSD_version >= 1300049
129 	.vfs_root =		vfs_cache_root,
130 	.vfs_cachedroot = zfs_root,
131 #else
132 	.vfs_root =		zfs_root,
133 #endif
134 	.vfs_statfs =		zfs_statfs,
135 	.vfs_vget =		zfs_vget,
136 	.vfs_sync =		zfs_sync,
137 	.vfs_checkexp =		zfs_checkexp,
138 	.vfs_fhtovp =		zfs_fhtovp,
139 	.vfs_quotactl =		zfs_quotactl,
140 };
141 
142 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN);
143 
144 /*
145  * We need to keep a count of active fs's.
146  * This is necessary to prevent our module
147  * from being unloaded after a umount -f
148  */
149 static uint32_t	zfs_active_fs_count = 0;
150 
151 int
152 zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
153     char *setpoint)
154 {
155 	int error;
156 	zfsvfs_t *zfvp;
157 	vfs_t *vfsp;
158 	objset_t *os;
159 	uint64_t tmp = *val;
160 
161 	error = dmu_objset_from_ds(ds, &os);
162 	if (error != 0)
163 		return (error);
164 
165 	error = getzfsvfs_impl(os, &zfvp);
166 	if (error != 0)
167 		return (error);
168 	if (zfvp == NULL)
169 		return (ENOENT);
170 	vfsp = zfvp->z_vfs;
171 	switch (zfs_prop) {
172 	case ZFS_PROP_ATIME:
173 		if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
174 			tmp = 0;
175 		if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL))
176 			tmp = 1;
177 		break;
178 	case ZFS_PROP_DEVICES:
179 		if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
180 			tmp = 0;
181 		if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL))
182 			tmp = 1;
183 		break;
184 	case ZFS_PROP_EXEC:
185 		if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL))
186 			tmp = 0;
187 		if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL))
188 			tmp = 1;
189 		break;
190 	case ZFS_PROP_SETUID:
191 		if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
192 			tmp = 0;
193 		if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL))
194 			tmp = 1;
195 		break;
196 	case ZFS_PROP_READONLY:
197 		if (vfs_optionisset(vfsp, MNTOPT_RW, NULL))
198 			tmp = 0;
199 		if (vfs_optionisset(vfsp, MNTOPT_RO, NULL))
200 			tmp = 1;
201 		break;
202 	case ZFS_PROP_XATTR:
203 		if (zfvp->z_flags & ZSB_XATTR)
204 			tmp = zfvp->z_xattr;
205 		break;
206 	case ZFS_PROP_NBMAND:
207 		if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL))
208 			tmp = 0;
209 		if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL))
210 			tmp = 1;
211 		break;
212 	default:
213 		vfs_unbusy(vfsp);
214 		return (ENOENT);
215 	}
216 
217 	vfs_unbusy(vfsp);
218 	if (tmp != *val) {
219 		(void) strcpy(setpoint, "temporary");
220 		*val = tmp;
221 	}
222 	return (0);
223 }
224 
225 static int
226 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp)
227 {
228 	int error = 0;
229 	char buf[32];
230 	uint64_t usedobj, quotaobj;
231 	uint64_t quota, used = 0;
232 	timespec_t now;
233 
234 	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
235 	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
236 
237 	if (quotaobj == 0 || zfsvfs->z_replay) {
238 		error = ENOENT;
239 		goto done;
240 	}
241 	(void) sprintf(buf, "%llx", (longlong_t)id);
242 	if ((error = zap_lookup(zfsvfs->z_os, quotaobj,
243 	    buf, sizeof (quota), 1, &quota)) != 0) {
244 		dprintf("%s(%d): quotaobj lookup failed\n",
245 		    __FUNCTION__, __LINE__);
246 		goto done;
247 	}
248 	/*
249 	 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit".
250 	 * So we set them to be the same.
251 	 */
252 	dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota);
253 	error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used);
254 	if (error && error != ENOENT) {
255 		dprintf("%s(%d):  usedobj failed; %d\n",
256 		    __FUNCTION__, __LINE__, error);
257 		goto done;
258 	}
259 	dqp->dqb_curblocks = btodb(used);
260 	dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0;
261 	vfs_timestamp(&now);
262 	/*
263 	 * Setting this to 0 causes FreeBSD quota(8) to print
264 	 * the number of days since the epoch, which isn't
265 	 * particularly useful.
266 	 */
267 	dqp->dqb_btime = dqp->dqb_itime = now.tv_sec;
268 done:
269 	return (error);
270 }
271 
272 static int
273 #if __FreeBSD_version >= 1400018
274 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, bool *mp_busy)
275 #else
276 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg)
277 #endif
278 {
279 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
280 	struct thread *td;
281 	int cmd, type, error = 0;
282 	int bitsize;
283 	zfs_userquota_prop_t quota_type;
284 	struct dqblk64 dqblk = { 0 };
285 
286 	td = curthread;
287 	cmd = cmds >> SUBCMDSHIFT;
288 	type = cmds & SUBCMDMASK;
289 
290 	ZFS_ENTER(zfsvfs);
291 	if (id == -1) {
292 		switch (type) {
293 		case USRQUOTA:
294 			id = td->td_ucred->cr_ruid;
295 			break;
296 		case GRPQUOTA:
297 			id = td->td_ucred->cr_rgid;
298 			break;
299 		default:
300 			error = EINVAL;
301 #if __FreeBSD_version < 1400018
302 			if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF)
303 				vfs_unbusy(vfsp);
304 #endif
305 			goto done;
306 		}
307 	}
308 	/*
309 	 * Map BSD type to:
310 	 * ZFS_PROP_USERUSED,
311 	 * ZFS_PROP_USERQUOTA,
312 	 * ZFS_PROP_GROUPUSED,
313 	 * ZFS_PROP_GROUPQUOTA
314 	 */
315 	switch (cmd) {
316 	case Q_SETQUOTA:
317 	case Q_SETQUOTA32:
318 		if (type == USRQUOTA)
319 			quota_type = ZFS_PROP_USERQUOTA;
320 		else if (type == GRPQUOTA)
321 			quota_type = ZFS_PROP_GROUPQUOTA;
322 		else
323 			error = EINVAL;
324 		break;
325 	case Q_GETQUOTA:
326 	case Q_GETQUOTA32:
327 		if (type == USRQUOTA)
328 			quota_type = ZFS_PROP_USERUSED;
329 		else if (type == GRPQUOTA)
330 			quota_type = ZFS_PROP_GROUPUSED;
331 		else
332 			error = EINVAL;
333 		break;
334 	}
335 
336 	/*
337 	 * Depending on the cmd, we may need to get
338 	 * the ruid and domain (see fuidstr_to_sid?),
339 	 * the fuid (how?), or other information.
340 	 * Create fuid using zfs_fuid_create(zfsvfs, id,
341 	 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)?
342 	 * I think I can use just the id?
343 	 *
344 	 * Look at zfs_id_overquota() to look up a quota.
345 	 * zap_lookup(something, quotaobj, fuidstring,
346 	 *     sizeof (long long), 1, &quota)
347 	 *
348 	 * See zfs_set_userquota() to set a quota.
349 	 */
350 	if ((uint32_t)type >= MAXQUOTAS) {
351 		error = EINVAL;
352 		goto done;
353 	}
354 
355 	switch (cmd) {
356 	case Q_GETQUOTASIZE:
357 		bitsize = 64;
358 		error = copyout(&bitsize, arg, sizeof (int));
359 		break;
360 	case Q_QUOTAON:
361 		// As far as I can tell, you can't turn quotas on or off on zfs
362 		error = 0;
363 #if __FreeBSD_version < 1400018
364 		vfs_unbusy(vfsp);
365 #endif
366 		break;
367 	case Q_QUOTAOFF:
368 		error = ENOTSUP;
369 #if __FreeBSD_version < 1400018
370 		vfs_unbusy(vfsp);
371 #endif
372 		break;
373 	case Q_SETQUOTA:
374 		error = copyin(arg, &dqblk, sizeof (dqblk));
375 		if (error == 0)
376 			error = zfs_set_userquota(zfsvfs, quota_type,
377 			    "", id, dbtob(dqblk.dqb_bhardlimit));
378 		break;
379 	case Q_GETQUOTA:
380 		error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk);
381 		if (error == 0)
382 			error = copyout(&dqblk, arg, sizeof (dqblk));
383 		break;
384 	default:
385 		error = EINVAL;
386 		break;
387 	}
388 done:
389 	ZFS_EXIT(zfsvfs);
390 	return (error);
391 }
392 
393 
394 boolean_t
395 zfs_is_readonly(zfsvfs_t *zfsvfs)
396 {
397 	return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY));
398 }
399 
400 static int
401 zfs_sync(vfs_t *vfsp, int waitfor)
402 {
403 
404 	/*
405 	 * Data integrity is job one.  We don't want a compromised kernel
406 	 * writing to the storage pool, so we never sync during panic.
407 	 */
408 	if (panicstr)
409 		return (0);
410 
411 	/*
412 	 * Ignore the system syncher.  ZFS already commits async data
413 	 * at zfs_txg_timeout intervals.
414 	 */
415 	if (waitfor == MNT_LAZY)
416 		return (0);
417 
418 	if (vfsp != NULL) {
419 		/*
420 		 * Sync a specific filesystem.
421 		 */
422 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
423 		dsl_pool_t *dp;
424 		int error;
425 
426 		error = vfs_stdsync(vfsp, waitfor);
427 		if (error != 0)
428 			return (error);
429 
430 		ZFS_ENTER(zfsvfs);
431 		dp = dmu_objset_pool(zfsvfs->z_os);
432 
433 		/*
434 		 * If the system is shutting down, then skip any
435 		 * filesystems which may exist on a suspended pool.
436 		 */
437 		if (rebooting && spa_suspended(dp->dp_spa)) {
438 			ZFS_EXIT(zfsvfs);
439 			return (0);
440 		}
441 
442 		if (zfsvfs->z_log != NULL)
443 			zil_commit(zfsvfs->z_log, 0);
444 
445 		ZFS_EXIT(zfsvfs);
446 	} else {
447 		/*
448 		 * Sync all ZFS filesystems.  This is what happens when you
449 		 * run sync(8).  Unlike other filesystems, ZFS honors the
450 		 * request by waiting for all pools to commit all dirty data.
451 		 */
452 		spa_sync_allpools();
453 	}
454 
455 	return (0);
456 }
457 
458 static void
459 atime_changed_cb(void *arg, uint64_t newval)
460 {
461 	zfsvfs_t *zfsvfs = arg;
462 
463 	if (newval == TRUE) {
464 		zfsvfs->z_atime = TRUE;
465 		zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME;
466 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
467 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
468 	} else {
469 		zfsvfs->z_atime = FALSE;
470 		zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME;
471 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
472 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
473 	}
474 }
475 
476 static void
477 xattr_changed_cb(void *arg, uint64_t newval)
478 {
479 	zfsvfs_t *zfsvfs = arg;
480 
481 	if (newval == ZFS_XATTR_OFF) {
482 		zfsvfs->z_flags &= ~ZSB_XATTR;
483 	} else {
484 		zfsvfs->z_flags |= ZSB_XATTR;
485 
486 		if (newval == ZFS_XATTR_SA)
487 			zfsvfs->z_xattr_sa = B_TRUE;
488 		else
489 			zfsvfs->z_xattr_sa = B_FALSE;
490 	}
491 }
492 
493 static void
494 blksz_changed_cb(void *arg, uint64_t newval)
495 {
496 	zfsvfs_t *zfsvfs = arg;
497 	ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
498 	ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
499 	ASSERT(ISP2(newval));
500 
501 	zfsvfs->z_max_blksz = newval;
502 	zfsvfs->z_vfs->mnt_stat.f_iosize = newval;
503 }
504 
505 static void
506 readonly_changed_cb(void *arg, uint64_t newval)
507 {
508 	zfsvfs_t *zfsvfs = arg;
509 
510 	if (newval) {
511 		/* XXX locking on vfs_flag? */
512 		zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
513 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
514 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
515 	} else {
516 		/* XXX locking on vfs_flag? */
517 		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
518 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
519 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
520 	}
521 }
522 
523 static void
524 setuid_changed_cb(void *arg, uint64_t newval)
525 {
526 	zfsvfs_t *zfsvfs = arg;
527 
528 	if (newval == FALSE) {
529 		zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
530 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
531 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
532 	} else {
533 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
534 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
535 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
536 	}
537 }
538 
539 static void
540 exec_changed_cb(void *arg, uint64_t newval)
541 {
542 	zfsvfs_t *zfsvfs = arg;
543 
544 	if (newval == FALSE) {
545 		zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
546 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
547 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
548 	} else {
549 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
550 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
551 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
552 	}
553 }
554 
555 /*
556  * The nbmand mount option can be changed at mount time.
557  * We can't allow it to be toggled on live file systems or incorrect
558  * behavior may be seen from cifs clients
559  *
560  * This property isn't registered via dsl_prop_register(), but this callback
561  * will be called when a file system is first mounted
562  */
563 static void
564 nbmand_changed_cb(void *arg, uint64_t newval)
565 {
566 	zfsvfs_t *zfsvfs = arg;
567 	if (newval == FALSE) {
568 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
569 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
570 	} else {
571 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
572 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
573 	}
574 }
575 
576 static void
577 snapdir_changed_cb(void *arg, uint64_t newval)
578 {
579 	zfsvfs_t *zfsvfs = arg;
580 
581 	zfsvfs->z_show_ctldir = newval;
582 }
583 
584 static void
585 acl_mode_changed_cb(void *arg, uint64_t newval)
586 {
587 	zfsvfs_t *zfsvfs = arg;
588 
589 	zfsvfs->z_acl_mode = newval;
590 }
591 
592 static void
593 acl_inherit_changed_cb(void *arg, uint64_t newval)
594 {
595 	zfsvfs_t *zfsvfs = arg;
596 
597 	zfsvfs->z_acl_inherit = newval;
598 }
599 
600 static void
601 acl_type_changed_cb(void *arg, uint64_t newval)
602 {
603 	zfsvfs_t *zfsvfs = arg;
604 
605 	zfsvfs->z_acl_type = newval;
606 }
607 
608 static int
609 zfs_register_callbacks(vfs_t *vfsp)
610 {
611 	struct dsl_dataset *ds = NULL;
612 	objset_t *os = NULL;
613 	zfsvfs_t *zfsvfs = NULL;
614 	uint64_t nbmand;
615 	boolean_t readonly = B_FALSE;
616 	boolean_t do_readonly = B_FALSE;
617 	boolean_t setuid = B_FALSE;
618 	boolean_t do_setuid = B_FALSE;
619 	boolean_t exec = B_FALSE;
620 	boolean_t do_exec = B_FALSE;
621 	boolean_t xattr = B_FALSE;
622 	boolean_t atime = B_FALSE;
623 	boolean_t do_atime = B_FALSE;
624 	boolean_t do_xattr = B_FALSE;
625 	int error = 0;
626 
627 	ASSERT3P(vfsp, !=, NULL);
628 	zfsvfs = vfsp->vfs_data;
629 	ASSERT3P(zfsvfs, !=, NULL);
630 	os = zfsvfs->z_os;
631 
632 	/*
633 	 * This function can be called for a snapshot when we update snapshot's
634 	 * mount point, which isn't really supported.
635 	 */
636 	if (dmu_objset_is_snapshot(os))
637 		return (EOPNOTSUPP);
638 
639 	/*
640 	 * The act of registering our callbacks will destroy any mount
641 	 * options we may have.  In order to enable temporary overrides
642 	 * of mount options, we stash away the current values and
643 	 * restore them after we register the callbacks.
644 	 */
645 	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
646 	    !spa_writeable(dmu_objset_spa(os))) {
647 		readonly = B_TRUE;
648 		do_readonly = B_TRUE;
649 	} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
650 		readonly = B_FALSE;
651 		do_readonly = B_TRUE;
652 	}
653 	if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
654 		setuid = B_FALSE;
655 		do_setuid = B_TRUE;
656 	} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
657 		setuid = B_TRUE;
658 		do_setuid = B_TRUE;
659 	}
660 	if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
661 		exec = B_FALSE;
662 		do_exec = B_TRUE;
663 	} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
664 		exec = B_TRUE;
665 		do_exec = B_TRUE;
666 	}
667 	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
668 		zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF;
669 		do_xattr = B_TRUE;
670 	} else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
671 		zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
672 		do_xattr = B_TRUE;
673 	} else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) {
674 		zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
675 		do_xattr = B_TRUE;
676 	} else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) {
677 		zfsvfs->z_xattr = xattr = ZFS_XATTR_SA;
678 		do_xattr = B_TRUE;
679 	}
680 	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
681 		atime = B_FALSE;
682 		do_atime = B_TRUE;
683 	} else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
684 		atime = B_TRUE;
685 		do_atime = B_TRUE;
686 	}
687 
688 	/*
689 	 * We need to enter pool configuration here, so that we can use
690 	 * dsl_prop_get_int_ds() to handle the special nbmand property below.
691 	 * dsl_prop_get_integer() can not be used, because it has to acquire
692 	 * spa_namespace_lock and we can not do that because we already hold
693 	 * z_teardown_lock.  The problem is that spa_write_cachefile() is called
694 	 * with spa_namespace_lock held and the function calls ZFS vnode
695 	 * operations to write the cache file and thus z_teardown_lock is
696 	 * acquired after spa_namespace_lock.
697 	 */
698 	ds = dmu_objset_ds(os);
699 	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
700 
701 	/*
702 	 * nbmand is a special property.  It can only be changed at
703 	 * mount time.
704 	 *
705 	 * This is weird, but it is documented to only be changeable
706 	 * at mount time.
707 	 */
708 	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
709 		nbmand = B_FALSE;
710 	} else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
711 		nbmand = B_TRUE;
712 	} else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand) != 0)) {
713 		dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
714 		return (error);
715 	}
716 
717 	/*
718 	 * Register property callbacks.
719 	 *
720 	 * It would probably be fine to just check for i/o error from
721 	 * the first prop_register(), but I guess I like to go
722 	 * overboard...
723 	 */
724 	error = dsl_prop_register(ds,
725 	    zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
726 	error = error ? error : dsl_prop_register(ds,
727 	    zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
728 	error = error ? error : dsl_prop_register(ds,
729 	    zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
730 	error = error ? error : dsl_prop_register(ds,
731 	    zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
732 	error = error ? error : dsl_prop_register(ds,
733 	    zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
734 	error = error ? error : dsl_prop_register(ds,
735 	    zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
736 	error = error ? error : dsl_prop_register(ds,
737 	    zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
738 	error = error ? error : dsl_prop_register(ds,
739 	    zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs);
740 	error = error ? error : dsl_prop_register(ds,
741 	    zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
742 	error = error ? error : dsl_prop_register(ds,
743 	    zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
744 	    zfsvfs);
745 	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
746 	if (error)
747 		goto unregister;
748 
749 	/*
750 	 * Invoke our callbacks to restore temporary mount options.
751 	 */
752 	if (do_readonly)
753 		readonly_changed_cb(zfsvfs, readonly);
754 	if (do_setuid)
755 		setuid_changed_cb(zfsvfs, setuid);
756 	if (do_exec)
757 		exec_changed_cb(zfsvfs, exec);
758 	if (do_xattr)
759 		xattr_changed_cb(zfsvfs, xattr);
760 	if (do_atime)
761 		atime_changed_cb(zfsvfs, atime);
762 
763 	nbmand_changed_cb(zfsvfs, nbmand);
764 
765 	return (0);
766 
767 unregister:
768 	dsl_prop_unregister_all(ds, zfsvfs);
769 	return (error);
770 }
771 
772 /*
773  * Associate this zfsvfs with the given objset, which must be owned.
774  * This will cache a bunch of on-disk state from the objset in the
775  * zfsvfs.
776  */
777 static int
778 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
779 {
780 	int error;
781 	uint64_t val;
782 
783 	zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
784 	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
785 	zfsvfs->z_os = os;
786 
787 	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
788 	if (error != 0)
789 		return (error);
790 	if (zfsvfs->z_version >
791 	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
792 		(void) printf("Can't mount a version %lld file system "
793 		    "on a version %lld pool\n. Pool must be upgraded to mount "
794 		    "this file system.", (u_longlong_t)zfsvfs->z_version,
795 		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
796 		return (SET_ERROR(ENOTSUP));
797 	}
798 	error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
799 	if (error != 0)
800 		return (error);
801 	zfsvfs->z_norm = (int)val;
802 
803 	error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
804 	if (error != 0)
805 		return (error);
806 	zfsvfs->z_utf8 = (val != 0);
807 
808 	error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
809 	if (error != 0)
810 		return (error);
811 	zfsvfs->z_case = (uint_t)val;
812 
813 	error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val);
814 	if (error != 0)
815 		return (error);
816 	zfsvfs->z_acl_type = (uint_t)val;
817 
818 	/*
819 	 * Fold case on file systems that are always or sometimes case
820 	 * insensitive.
821 	 */
822 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
823 	    zfsvfs->z_case == ZFS_CASE_MIXED)
824 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
825 
826 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
827 	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
828 
829 	uint64_t sa_obj = 0;
830 	if (zfsvfs->z_use_sa) {
831 		/* should either have both of these objects or none */
832 		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
833 		    &sa_obj);
834 		if (error != 0)
835 			return (error);
836 
837 		error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val);
838 		if (error == 0 && val == ZFS_XATTR_SA)
839 			zfsvfs->z_xattr_sa = B_TRUE;
840 	}
841 
842 	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
843 	    &zfsvfs->z_attr_table);
844 	if (error != 0)
845 		return (error);
846 
847 	if (zfsvfs->z_version >= ZPL_VERSION_SA)
848 		sa_register_update_callback(os, zfs_sa_upgrade);
849 
850 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
851 	    &zfsvfs->z_root);
852 	if (error != 0)
853 		return (error);
854 	ASSERT3U(zfsvfs->z_root, !=, 0);
855 
856 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
857 	    &zfsvfs->z_unlinkedobj);
858 	if (error != 0)
859 		return (error);
860 
861 	error = zap_lookup(os, MASTER_NODE_OBJ,
862 	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
863 	    8, 1, &zfsvfs->z_userquota_obj);
864 	if (error == ENOENT)
865 		zfsvfs->z_userquota_obj = 0;
866 	else if (error != 0)
867 		return (error);
868 
869 	error = zap_lookup(os, MASTER_NODE_OBJ,
870 	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
871 	    8, 1, &zfsvfs->z_groupquota_obj);
872 	if (error == ENOENT)
873 		zfsvfs->z_groupquota_obj = 0;
874 	else if (error != 0)
875 		return (error);
876 
877 	error = zap_lookup(os, MASTER_NODE_OBJ,
878 	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA],
879 	    8, 1, &zfsvfs->z_projectquota_obj);
880 	if (error == ENOENT)
881 		zfsvfs->z_projectquota_obj = 0;
882 	else if (error != 0)
883 		return (error);
884 
885 	error = zap_lookup(os, MASTER_NODE_OBJ,
886 	    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA],
887 	    8, 1, &zfsvfs->z_userobjquota_obj);
888 	if (error == ENOENT)
889 		zfsvfs->z_userobjquota_obj = 0;
890 	else if (error != 0)
891 		return (error);
892 
893 	error = zap_lookup(os, MASTER_NODE_OBJ,
894 	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA],
895 	    8, 1, &zfsvfs->z_groupobjquota_obj);
896 	if (error == ENOENT)
897 		zfsvfs->z_groupobjquota_obj = 0;
898 	else if (error != 0)
899 		return (error);
900 
901 	error = zap_lookup(os, MASTER_NODE_OBJ,
902 	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA],
903 	    8, 1, &zfsvfs->z_projectobjquota_obj);
904 	if (error == ENOENT)
905 		zfsvfs->z_projectobjquota_obj = 0;
906 	else if (error != 0)
907 		return (error);
908 
909 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
910 	    &zfsvfs->z_fuid_obj);
911 	if (error == ENOENT)
912 		zfsvfs->z_fuid_obj = 0;
913 	else if (error != 0)
914 		return (error);
915 
916 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
917 	    &zfsvfs->z_shares_dir);
918 	if (error == ENOENT)
919 		zfsvfs->z_shares_dir = 0;
920 	else if (error != 0)
921 		return (error);
922 
923 	/*
924 	 * Only use the name cache if we are looking for a
925 	 * name on a file system that does not require normalization
926 	 * or case folding.  We can also look there if we happen to be
927 	 * on a non-normalizing, mixed sensitivity file system IF we
928 	 * are looking for the exact name (which is always the case on
929 	 * FreeBSD).
930 	 */
931 	zfsvfs->z_use_namecache = !zfsvfs->z_norm ||
932 	    ((zfsvfs->z_case == ZFS_CASE_MIXED) &&
933 	    !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER));
934 
935 	return (0);
936 }
937 
938 taskq_t *zfsvfs_taskq;
939 
940 static void
941 zfsvfs_task_unlinked_drain(void *context, int pending __unused)
942 {
943 
944 	zfs_unlinked_drain((zfsvfs_t *)context);
945 }
946 
947 int
948 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
949 {
950 	objset_t *os;
951 	zfsvfs_t *zfsvfs;
952 	int error;
953 	boolean_t ro = (readonly || (strchr(osname, '@') != NULL));
954 
955 	/*
956 	 * XXX: Fix struct statfs so this isn't necessary!
957 	 *
958 	 * The 'osname' is used as the filesystem's special node, which means
959 	 * it must fit in statfs.f_mntfromname, or else it can't be
960 	 * enumerated, so libzfs_mnttab_find() returns NULL, which causes
961 	 * 'zfs unmount' to think it's not mounted when it is.
962 	 */
963 	if (strlen(osname) >= MNAMELEN)
964 		return (SET_ERROR(ENAMETOOLONG));
965 
966 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
967 
968 	error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs,
969 	    &os);
970 	if (error != 0) {
971 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
972 		return (error);
973 	}
974 
975 	error = zfsvfs_create_impl(zfvp, zfsvfs, os);
976 
977 	return (error);
978 }
979 
980 
981 int
982 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
983 {
984 	int error;
985 
986 	zfsvfs->z_vfs = NULL;
987 	zfsvfs->z_parent = zfsvfs;
988 
989 	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
990 	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
991 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
992 	    offsetof(znode_t, z_link_node));
993 	TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0,
994 	    zfsvfs_task_unlinked_drain, zfsvfs);
995 	ZFS_TEARDOWN_INIT(zfsvfs);
996 	ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs);
997 	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
998 	for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
999 		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
1000 
1001 	error = zfsvfs_init(zfsvfs, os);
1002 	if (error != 0) {
1003 		dmu_objset_disown(os, B_TRUE, zfsvfs);
1004 		*zfvp = NULL;
1005 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
1006 		return (error);
1007 	}
1008 
1009 	*zfvp = zfsvfs;
1010 	return (0);
1011 }
1012 
1013 static int
1014 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
1015 {
1016 	int error;
1017 
1018 	/*
1019 	 * Check for a bad on-disk format version now since we
1020 	 * lied about owning the dataset readonly before.
1021 	 */
1022 	if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
1023 	    dmu_objset_incompatible_encryption_version(zfsvfs->z_os))
1024 		return (SET_ERROR(EROFS));
1025 
1026 	error = zfs_register_callbacks(zfsvfs->z_vfs);
1027 	if (error)
1028 		return (error);
1029 
1030 	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
1031 
1032 	/*
1033 	 * If we are not mounting (ie: online recv), then we don't
1034 	 * have to worry about replaying the log as we blocked all
1035 	 * operations out since we closed the ZIL.
1036 	 */
1037 	if (mounting) {
1038 		boolean_t readonly;
1039 
1040 		ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
1041 		dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
1042 
1043 		/*
1044 		 * During replay we remove the read only flag to
1045 		 * allow replays to succeed.
1046 		 */
1047 		readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
1048 		if (readonly != 0) {
1049 			zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
1050 		} else {
1051 			dsl_dir_t *dd;
1052 			zap_stats_t zs;
1053 
1054 			if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
1055 			    &zs) == 0) {
1056 				dataset_kstats_update_nunlinks_kstat(
1057 				    &zfsvfs->z_kstat, zs.zs_num_entries);
1058 				dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
1059 				    "num_entries in unlinked set: %llu",
1060 				    (u_longlong_t)zs.zs_num_entries);
1061 			}
1062 
1063 			zfs_unlinked_drain(zfsvfs);
1064 			dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
1065 			dd->dd_activity_cancelled = B_FALSE;
1066 		}
1067 
1068 		/*
1069 		 * Parse and replay the intent log.
1070 		 *
1071 		 * Because of ziltest, this must be done after
1072 		 * zfs_unlinked_drain().  (Further note: ziltest
1073 		 * doesn't use readonly mounts, where
1074 		 * zfs_unlinked_drain() isn't called.)  This is because
1075 		 * ziltest causes spa_sync() to think it's committed,
1076 		 * but actually it is not, so the intent log contains
1077 		 * many txg's worth of changes.
1078 		 *
1079 		 * In particular, if object N is in the unlinked set in
1080 		 * the last txg to actually sync, then it could be
1081 		 * actually freed in a later txg and then reallocated
1082 		 * in a yet later txg.  This would write a "create
1083 		 * object N" record to the intent log.  Normally, this
1084 		 * would be fine because the spa_sync() would have
1085 		 * written out the fact that object N is free, before
1086 		 * we could write the "create object N" intent log
1087 		 * record.
1088 		 *
1089 		 * But when we are in ziltest mode, we advance the "open
1090 		 * txg" without actually spa_sync()-ing the changes to
1091 		 * disk.  So we would see that object N is still
1092 		 * allocated and in the unlinked set, and there is an
1093 		 * intent log record saying to allocate it.
1094 		 */
1095 		if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
1096 			if (zil_replay_disable) {
1097 				zil_destroy(zfsvfs->z_log, B_FALSE);
1098 			} else {
1099 				boolean_t use_nc = zfsvfs->z_use_namecache;
1100 				zfsvfs->z_use_namecache = B_FALSE;
1101 				zfsvfs->z_replay = B_TRUE;
1102 				zil_replay(zfsvfs->z_os, zfsvfs,
1103 				    zfs_replay_vector);
1104 				zfsvfs->z_replay = B_FALSE;
1105 				zfsvfs->z_use_namecache = use_nc;
1106 			}
1107 		}
1108 
1109 		/* restore readonly bit */
1110 		if (readonly != 0)
1111 			zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
1112 	}
1113 
1114 	/*
1115 	 * Set the objset user_ptr to track its zfsvfs.
1116 	 */
1117 	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1118 	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1119 	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1120 
1121 	return (0);
1122 }
1123 
1124 void
1125 zfsvfs_free(zfsvfs_t *zfsvfs)
1126 {
1127 	int i;
1128 
1129 	zfs_fuid_destroy(zfsvfs);
1130 
1131 	mutex_destroy(&zfsvfs->z_znodes_lock);
1132 	mutex_destroy(&zfsvfs->z_lock);
1133 	ASSERT3U(zfsvfs->z_nr_znodes, ==, 0);
1134 	list_destroy(&zfsvfs->z_all_znodes);
1135 	ZFS_TEARDOWN_DESTROY(zfsvfs);
1136 	ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs);
1137 	rw_destroy(&zfsvfs->z_fuid_lock);
1138 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1139 		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
1140 	dataset_kstats_destroy(&zfsvfs->z_kstat);
1141 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
1142 }
1143 
1144 static void
1145 zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
1146 {
1147 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
1148 	if (zfsvfs->z_vfs) {
1149 		if (zfsvfs->z_use_fuids) {
1150 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1151 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1152 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1153 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1154 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1155 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1156 		} else {
1157 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1158 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1159 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1160 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1161 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1162 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1163 		}
1164 	}
1165 	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
1166 }
1167 
1168 static int
1169 zfs_domount(vfs_t *vfsp, char *osname)
1170 {
1171 	uint64_t recordsize, fsid_guid;
1172 	int error = 0;
1173 	zfsvfs_t *zfsvfs;
1174 
1175 	ASSERT3P(vfsp, !=, NULL);
1176 	ASSERT3P(osname, !=, NULL);
1177 
1178 	error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs);
1179 	if (error)
1180 		return (error);
1181 	zfsvfs->z_vfs = vfsp;
1182 
1183 	if ((error = dsl_prop_get_integer(osname,
1184 	    "recordsize", &recordsize, NULL)))
1185 		goto out;
1186 	zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE;
1187 	zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize;
1188 
1189 	vfsp->vfs_data = zfsvfs;
1190 	vfsp->mnt_flag |= MNT_LOCAL;
1191 	vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
1192 	vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
1193 	vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
1194 	/*
1195 	 * This can cause a loss of coherence between ARC and page cache
1196 	 * on ZoF - unclear if the problem is in FreeBSD or ZoF
1197 	 */
1198 	vfsp->mnt_kern_flag |= MNTK_NO_IOPF;	/* vn_io_fault can be used */
1199 	vfsp->mnt_kern_flag |= MNTK_NOMSYNC;
1200 	vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG;
1201 
1202 #if defined(_KERNEL) && !defined(KMEM_DEBUG)
1203 	vfsp->mnt_kern_flag |= MNTK_FPLOOKUP;
1204 #endif
1205 	/*
1206 	 * The fsid is 64 bits, composed of an 8-bit fs type, which
1207 	 * separates our fsid from any other filesystem types, and a
1208 	 * 56-bit objset unique ID.  The objset unique ID is unique to
1209 	 * all objsets open on this system, provided by unique_create().
1210 	 * The 8-bit fs type must be put in the low bits of fsid[1]
1211 	 * because that's where other Solaris filesystems put it.
1212 	 */
1213 	fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
1214 	ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0);
1215 	vfsp->vfs_fsid.val[0] = fsid_guid;
1216 	vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) |
1217 	    (vfsp->mnt_vfc->vfc_typenum & 0xFF);
1218 
1219 	/*
1220 	 * Set features for file system.
1221 	 */
1222 	zfs_set_fuid_feature(zfsvfs);
1223 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
1224 		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1225 		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1226 		vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
1227 	} else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
1228 		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1229 		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1230 	}
1231 	vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
1232 
1233 	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
1234 		uint64_t pval;
1235 
1236 		atime_changed_cb(zfsvfs, B_FALSE);
1237 		readonly_changed_cb(zfsvfs, B_TRUE);
1238 		if ((error = dsl_prop_get_integer(osname,
1239 		    "xattr", &pval, NULL)))
1240 			goto out;
1241 		xattr_changed_cb(zfsvfs, pval);
1242 		if ((error = dsl_prop_get_integer(osname,
1243 		    "acltype", &pval, NULL)))
1244 			goto out;
1245 		acl_type_changed_cb(zfsvfs, pval);
1246 		zfsvfs->z_issnap = B_TRUE;
1247 		zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
1248 
1249 		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1250 		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1251 		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1252 	} else {
1253 		if ((error = zfsvfs_setup(zfsvfs, B_TRUE)))
1254 			goto out;
1255 	}
1256 
1257 	vfs_mountedfrom(vfsp, osname);
1258 
1259 	if (!zfsvfs->z_issnap)
1260 		zfsctl_create(zfsvfs);
1261 out:
1262 	if (error) {
1263 		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
1264 		zfsvfs_free(zfsvfs);
1265 	} else {
1266 		atomic_inc_32(&zfs_active_fs_count);
1267 	}
1268 
1269 	return (error);
1270 }
1271 
1272 static void
1273 zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
1274 {
1275 	objset_t *os = zfsvfs->z_os;
1276 
1277 	if (!dmu_objset_is_snapshot(os))
1278 		dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
1279 }
1280 
1281 static int
1282 getpoolname(const char *osname, char *poolname)
1283 {
1284 	char *p;
1285 
1286 	p = strchr(osname, '/');
1287 	if (p == NULL) {
1288 		if (strlen(osname) >= MAXNAMELEN)
1289 			return (ENAMETOOLONG);
1290 		(void) strcpy(poolname, osname);
1291 	} else {
1292 		if (p - osname >= MAXNAMELEN)
1293 			return (ENAMETOOLONG);
1294 		(void) strncpy(poolname, osname, p - osname);
1295 		poolname[p - osname] = '\0';
1296 	}
1297 	return (0);
1298 }
1299 
1300 static void
1301 fetch_osname_options(char *name, bool *checkpointrewind)
1302 {
1303 
1304 	if (name[0] == '!') {
1305 		*checkpointrewind = true;
1306 		memmove(name, name + 1, strlen(name));
1307 	} else {
1308 		*checkpointrewind = false;
1309 	}
1310 }
1311 
1312 static int
1313 zfs_mount(vfs_t *vfsp)
1314 {
1315 	kthread_t	*td = curthread;
1316 	vnode_t		*mvp = vfsp->mnt_vnodecovered;
1317 	cred_t		*cr = td->td_ucred;
1318 	char		*osname;
1319 	int		error = 0;
1320 	int		canwrite;
1321 	bool		checkpointrewind;
1322 
1323 	if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
1324 		return (SET_ERROR(EINVAL));
1325 
1326 	/*
1327 	 * If full-owner-access is enabled and delegated administration is
1328 	 * turned on, we must set nosuid.
1329 	 */
1330 	if (zfs_super_owner &&
1331 	    dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) {
1332 		secpolicy_fs_mount_clearopts(cr, vfsp);
1333 	}
1334 
1335 	fetch_osname_options(osname, &checkpointrewind);
1336 
1337 	/*
1338 	 * Check for mount privilege?
1339 	 *
1340 	 * If we don't have privilege then see if
1341 	 * we have local permission to allow it
1342 	 */
1343 	error = secpolicy_fs_mount(cr, mvp, vfsp);
1344 	if (error) {
1345 		if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0)
1346 			goto out;
1347 
1348 		if (!(vfsp->vfs_flag & MS_REMOUNT)) {
1349 			vattr_t		vattr;
1350 
1351 			/*
1352 			 * Make sure user is the owner of the mount point
1353 			 * or has sufficient privileges.
1354 			 */
1355 
1356 			vattr.va_mask = AT_UID;
1357 
1358 			vn_lock(mvp, LK_SHARED | LK_RETRY);
1359 			if (VOP_GETATTR(mvp, &vattr, cr)) {
1360 				VOP_UNLOCK1(mvp);
1361 				goto out;
1362 			}
1363 
1364 			if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 &&
1365 			    VOP_ACCESS(mvp, VWRITE, cr, td) != 0) {
1366 				VOP_UNLOCK1(mvp);
1367 				goto out;
1368 			}
1369 			VOP_UNLOCK1(mvp);
1370 		}
1371 
1372 		secpolicy_fs_mount_clearopts(cr, vfsp);
1373 	}
1374 
1375 	/*
1376 	 * Refuse to mount a filesystem if we are in a local zone and the
1377 	 * dataset is not visible.
1378 	 */
1379 	if (!INGLOBALZONE(curproc) &&
1380 	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
1381 		error = SET_ERROR(EPERM);
1382 		goto out;
1383 	}
1384 
1385 	vfsp->vfs_flag |= MNT_NFS4ACLS;
1386 
1387 	/*
1388 	 * When doing a remount, we simply refresh our temporary properties
1389 	 * according to those options set in the current VFS options.
1390 	 */
1391 	if (vfsp->vfs_flag & MS_REMOUNT) {
1392 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
1393 
1394 		/*
1395 		 * Refresh mount options with z_teardown_lock blocking I/O while
1396 		 * the filesystem is in an inconsistent state.
1397 		 * The lock also serializes this code with filesystem
1398 		 * manipulations between entry to zfs_suspend_fs() and return
1399 		 * from zfs_resume_fs().
1400 		 */
1401 		ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
1402 		zfs_unregister_callbacks(zfsvfs);
1403 		error = zfs_register_callbacks(vfsp);
1404 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
1405 		goto out;
1406 	}
1407 
1408 	/* Initial root mount: try hard to import the requested root pool. */
1409 	if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 &&
1410 	    (vfsp->vfs_flag & MNT_UPDATE) == 0) {
1411 		char pname[MAXNAMELEN];
1412 
1413 		error = getpoolname(osname, pname);
1414 		if (error == 0)
1415 			error = spa_import_rootpool(pname, checkpointrewind);
1416 		if (error)
1417 			goto out;
1418 	}
1419 	DROP_GIANT();
1420 	error = zfs_domount(vfsp, osname);
1421 	PICKUP_GIANT();
1422 
1423 out:
1424 	return (error);
1425 }
1426 
1427 static int
1428 zfs_statfs(vfs_t *vfsp, struct statfs *statp)
1429 {
1430 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1431 	uint64_t refdbytes, availbytes, usedobjs, availobjs;
1432 
1433 	statp->f_version = STATFS_VERSION;
1434 
1435 	ZFS_ENTER(zfsvfs);
1436 
1437 	dmu_objset_space(zfsvfs->z_os,
1438 	    &refdbytes, &availbytes, &usedobjs, &availobjs);
1439 
1440 	/*
1441 	 * The underlying storage pool actually uses multiple block sizes.
1442 	 * We report the fragsize as the smallest block size we support,
1443 	 * and we report our blocksize as the filesystem's maximum blocksize.
1444 	 */
1445 	statp->f_bsize = SPA_MINBLOCKSIZE;
1446 	statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize;
1447 
1448 	/*
1449 	 * The following report "total" blocks of various kinds in the
1450 	 * file system, but reported in terms of f_frsize - the
1451 	 * "fragment" size.
1452 	 */
1453 
1454 	statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
1455 	statp->f_bfree = availbytes / statp->f_bsize;
1456 	statp->f_bavail = statp->f_bfree; /* no root reservation */
1457 
1458 	/*
1459 	 * statvfs() should really be called statufs(), because it assumes
1460 	 * static metadata.  ZFS doesn't preallocate files, so the best
1461 	 * we can do is report the max that could possibly fit in f_files,
1462 	 * and that minus the number actually used in f_ffree.
1463 	 * For f_ffree, report the smaller of the number of object available
1464 	 * and the number of blocks (each object will take at least a block).
1465 	 */
1466 	statp->f_ffree = MIN(availobjs, statp->f_bfree);
1467 	statp->f_files = statp->f_ffree + usedobjs;
1468 
1469 	/*
1470 	 * We're a zfs filesystem.
1471 	 */
1472 	strlcpy(statp->f_fstypename, "zfs",
1473 	    sizeof (statp->f_fstypename));
1474 
1475 	strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
1476 	    sizeof (statp->f_mntfromname));
1477 	strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
1478 	    sizeof (statp->f_mntonname));
1479 
1480 	statp->f_namemax = MAXNAMELEN - 1;
1481 
1482 	ZFS_EXIT(zfsvfs);
1483 	return (0);
1484 }
1485 
1486 static int
1487 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
1488 {
1489 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1490 	znode_t *rootzp;
1491 	int error;
1492 
1493 	ZFS_ENTER(zfsvfs);
1494 
1495 	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
1496 	if (error == 0)
1497 		*vpp = ZTOV(rootzp);
1498 
1499 	ZFS_EXIT(zfsvfs);
1500 
1501 	if (error == 0) {
1502 		error = vn_lock(*vpp, flags);
1503 		if (error != 0) {
1504 			VN_RELE(*vpp);
1505 			*vpp = NULL;
1506 		}
1507 	}
1508 	return (error);
1509 }
1510 
1511 /*
1512  * Teardown the zfsvfs::z_os.
1513  *
1514  * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
1515  * and 'z_teardown_inactive_lock' held.
1516  */
1517 static int
1518 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
1519 {
1520 	znode_t	*zp;
1521 	dsl_dir_t *dd;
1522 
1523 	/*
1524 	 * If someone has not already unmounted this file system,
1525 	 * drain the zrele_taskq to ensure all active references to the
1526 	 * zfsvfs_t have been handled only then can it be safely destroyed.
1527 	 */
1528 	if (zfsvfs->z_os) {
1529 		/*
1530 		 * If we're unmounting we have to wait for the list to
1531 		 * drain completely.
1532 		 *
1533 		 * If we're not unmounting there's no guarantee the list
1534 		 * will drain completely, but zreles run from the taskq
1535 		 * may add the parents of dir-based xattrs to the taskq
1536 		 * so we want to wait for these.
1537 		 *
1538 		 * We can safely read z_nr_znodes without locking because the
1539 		 * VFS has already blocked operations which add to the
1540 		 * z_all_znodes list and thus increment z_nr_znodes.
1541 		 */
1542 		int round = 0;
1543 		while (zfsvfs->z_nr_znodes > 0) {
1544 			taskq_wait_outstanding(dsl_pool_zrele_taskq(
1545 			    dmu_objset_pool(zfsvfs->z_os)), 0);
1546 			if (++round > 1 && !unmounting)
1547 				break;
1548 		}
1549 	}
1550 	ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
1551 
1552 	if (!unmounting) {
1553 		/*
1554 		 * We purge the parent filesystem's vfsp as the parent
1555 		 * filesystem and all of its snapshots have their vnode's
1556 		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
1557 		 * 'z_parent' is self referential for non-snapshots.
1558 		 */
1559 #ifdef FREEBSD_NAMECACHE
1560 #if __FreeBSD_version >= 1300117
1561 		cache_purgevfs(zfsvfs->z_parent->z_vfs);
1562 #else
1563 		cache_purgevfs(zfsvfs->z_parent->z_vfs, true);
1564 #endif
1565 #endif
1566 	}
1567 
1568 	/*
1569 	 * Close the zil. NB: Can't close the zil while zfs_inactive
1570 	 * threads are blocked as zil_close can call zfs_inactive.
1571 	 */
1572 	if (zfsvfs->z_log) {
1573 		zil_close(zfsvfs->z_log);
1574 		zfsvfs->z_log = NULL;
1575 	}
1576 
1577 	ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs);
1578 
1579 	/*
1580 	 * If we are not unmounting (ie: online recv) and someone already
1581 	 * unmounted this file system while we were doing the switcheroo,
1582 	 * or a reopen of z_os failed then just bail out now.
1583 	 */
1584 	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
1585 		ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
1586 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
1587 		return (SET_ERROR(EIO));
1588 	}
1589 
1590 	/*
1591 	 * At this point there are no vops active, and any new vops will
1592 	 * fail with EIO since we have z_teardown_lock for writer (only
1593 	 * relevant for forced unmount).
1594 	 *
1595 	 * Release all holds on dbufs.
1596 	 */
1597 	mutex_enter(&zfsvfs->z_znodes_lock);
1598 	for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
1599 	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
1600 		if (zp->z_sa_hdl != NULL) {
1601 			zfs_znode_dmu_fini(zp);
1602 		}
1603 	}
1604 	mutex_exit(&zfsvfs->z_znodes_lock);
1605 
1606 	/*
1607 	 * If we are unmounting, set the unmounted flag and let new vops
1608 	 * unblock.  zfs_inactive will have the unmounted behavior, and all
1609 	 * other vops will fail with EIO.
1610 	 */
1611 	if (unmounting) {
1612 		zfsvfs->z_unmounted = B_TRUE;
1613 		ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
1614 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
1615 	}
1616 
1617 	/*
1618 	 * z_os will be NULL if there was an error in attempting to reopen
1619 	 * zfsvfs, so just return as the properties had already been
1620 	 * unregistered and cached data had been evicted before.
1621 	 */
1622 	if (zfsvfs->z_os == NULL)
1623 		return (0);
1624 
1625 	/*
1626 	 * Unregister properties.
1627 	 */
1628 	zfs_unregister_callbacks(zfsvfs);
1629 
1630 	/*
1631 	 * Evict cached data
1632 	 */
1633 	if (!zfs_is_readonly(zfsvfs))
1634 		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
1635 	dmu_objset_evict_dbufs(zfsvfs->z_os);
1636 	dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
1637 	dsl_dir_cancel_waiters(dd);
1638 
1639 	return (0);
1640 }
1641 
1642 static int
1643 zfs_umount(vfs_t *vfsp, int fflag)
1644 {
1645 	kthread_t *td = curthread;
1646 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1647 	objset_t *os;
1648 	cred_t *cr = td->td_ucred;
1649 	int ret;
1650 
1651 	ret = secpolicy_fs_unmount(cr, vfsp);
1652 	if (ret) {
1653 		if (dsl_deleg_access((char *)vfsp->vfs_resource,
1654 		    ZFS_DELEG_PERM_MOUNT, cr))
1655 			return (ret);
1656 	}
1657 
1658 	/*
1659 	 * Unmount any snapshots mounted under .zfs before unmounting the
1660 	 * dataset itself.
1661 	 */
1662 	if (zfsvfs->z_ctldir != NULL) {
1663 		if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
1664 			return (ret);
1665 	}
1666 
1667 	if (fflag & MS_FORCE) {
1668 		/*
1669 		 * Mark file system as unmounted before calling
1670 		 * vflush(FORCECLOSE). This way we ensure no future vnops
1671 		 * will be called and risk operating on DOOMED vnodes.
1672 		 */
1673 		ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
1674 		zfsvfs->z_unmounted = B_TRUE;
1675 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
1676 	}
1677 
1678 	/*
1679 	 * Flush all the files.
1680 	 */
1681 	ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td);
1682 	if (ret != 0)
1683 		return (ret);
1684 	while (taskqueue_cancel(zfsvfs_taskq->tq_queue,
1685 	    &zfsvfs->z_unlinked_drain_task, NULL) != 0)
1686 		taskqueue_drain(zfsvfs_taskq->tq_queue,
1687 		    &zfsvfs->z_unlinked_drain_task);
1688 
1689 	VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE));
1690 	os = zfsvfs->z_os;
1691 
1692 	/*
1693 	 * z_os will be NULL if there was an error in
1694 	 * attempting to reopen zfsvfs.
1695 	 */
1696 	if (os != NULL) {
1697 		/*
1698 		 * Unset the objset user_ptr.
1699 		 */
1700 		mutex_enter(&os->os_user_ptr_lock);
1701 		dmu_objset_set_user(os, NULL);
1702 		mutex_exit(&os->os_user_ptr_lock);
1703 
1704 		/*
1705 		 * Finally release the objset
1706 		 */
1707 		dmu_objset_disown(os, B_TRUE, zfsvfs);
1708 	}
1709 
1710 	/*
1711 	 * We can now safely destroy the '.zfs' directory node.
1712 	 */
1713 	if (zfsvfs->z_ctldir != NULL)
1714 		zfsctl_destroy(zfsvfs);
1715 	zfs_freevfs(vfsp);
1716 
1717 	return (0);
1718 }
1719 
1720 static int
1721 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
1722 {
1723 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
1724 	znode_t		*zp;
1725 	int 		err;
1726 
1727 	/*
1728 	 * zfs_zget() can't operate on virtual entries like .zfs/ or
1729 	 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP.
1730 	 * This will make NFS to switch to LOOKUP instead of using VGET.
1731 	 */
1732 	if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR ||
1733 	    (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir))
1734 		return (EOPNOTSUPP);
1735 
1736 	ZFS_ENTER(zfsvfs);
1737 	err = zfs_zget(zfsvfs, ino, &zp);
1738 	if (err == 0 && zp->z_unlinked) {
1739 		vrele(ZTOV(zp));
1740 		err = EINVAL;
1741 	}
1742 	if (err == 0)
1743 		*vpp = ZTOV(zp);
1744 	ZFS_EXIT(zfsvfs);
1745 	if (err == 0) {
1746 		err = vn_lock(*vpp, flags);
1747 		if (err != 0)
1748 			vrele(*vpp);
1749 	}
1750 	if (err != 0)
1751 		*vpp = NULL;
1752 	return (err);
1753 }
1754 
1755 static int
1756 #if __FreeBSD_version >= 1300098
1757 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
1758     struct ucred **credanonp, int *numsecflavors, int *secflavors)
1759 #else
1760 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
1761     struct ucred **credanonp, int *numsecflavors, int **secflavors)
1762 #endif
1763 {
1764 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1765 
1766 	/*
1767 	 * If this is regular file system vfsp is the same as
1768 	 * zfsvfs->z_parent->z_vfs, but if it is snapshot,
1769 	 * zfsvfs->z_parent->z_vfs represents parent file system
1770 	 * which we have to use here, because only this file system
1771 	 * has mnt_export configured.
1772 	 */
1773 	return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp,
1774 	    credanonp, numsecflavors, secflavors));
1775 }
1776 
1777 _Static_assert(sizeof (struct fid) >= SHORT_FID_LEN,
1778 	"struct fid bigger than SHORT_FID_LEN");
1779 _Static_assert(sizeof (struct fid) >= LONG_FID_LEN,
1780 	"struct fid bigger than LONG_FID_LEN");
1781 
1782 static int
1783 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
1784 {
1785 	struct componentname cn;
1786 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
1787 	znode_t		*zp;
1788 	vnode_t		*dvp;
1789 	uint64_t	object = 0;
1790 	uint64_t	fid_gen = 0;
1791 	uint64_t	setgen = 0;
1792 	uint64_t	gen_mask;
1793 	uint64_t	zp_gen;
1794 	int 		i, err;
1795 
1796 	*vpp = NULL;
1797 
1798 	ZFS_ENTER(zfsvfs);
1799 
1800 	/*
1801 	 * On FreeBSD we can get snapshot's mount point or its parent file
1802 	 * system mount point depending if snapshot is already mounted or not.
1803 	 */
1804 	if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) {
1805 		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
1806 		uint64_t	objsetid = 0;
1807 
1808 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
1809 			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
1810 
1811 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
1812 			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
1813 
1814 		ZFS_EXIT(zfsvfs);
1815 
1816 		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
1817 		if (err)
1818 			return (SET_ERROR(EINVAL));
1819 		ZFS_ENTER(zfsvfs);
1820 	}
1821 
1822 	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
1823 		zfid_short_t	*zfid = (zfid_short_t *)fidp;
1824 
1825 		for (i = 0; i < sizeof (zfid->zf_object); i++)
1826 			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
1827 
1828 		for (i = 0; i < sizeof (zfid->zf_gen); i++)
1829 			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
1830 	} else {
1831 		ZFS_EXIT(zfsvfs);
1832 		return (SET_ERROR(EINVAL));
1833 	}
1834 
1835 	if (fidp->fid_len == LONG_FID_LEN && (fid_gen > 1 || setgen != 0)) {
1836 		dprintf("snapdir fid: fid_gen (%llu) and setgen (%llu)\n",
1837 		    (u_longlong_t)fid_gen, (u_longlong_t)setgen);
1838 		return (SET_ERROR(EINVAL));
1839 	}
1840 
1841 	/*
1842 	 * A zero fid_gen means we are in .zfs or the .zfs/snapshot
1843 	 * directory tree. If the object == zfsvfs->z_shares_dir, then
1844 	 * we are in the .zfs/shares directory tree.
1845 	 */
1846 	if ((fid_gen == 0 &&
1847 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) ||
1848 	    (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) {
1849 		ZFS_EXIT(zfsvfs);
1850 		VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp));
1851 		if (object == ZFSCTL_INO_SNAPDIR) {
1852 			cn.cn_nameptr = "snapshot";
1853 			cn.cn_namelen = strlen(cn.cn_nameptr);
1854 			cn.cn_nameiop = LOOKUP;
1855 			cn.cn_flags = ISLASTCN | LOCKLEAF;
1856 			cn.cn_lkflags = flags;
1857 			VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
1858 			vput(dvp);
1859 		} else if (object == zfsvfs->z_shares_dir) {
1860 			/*
1861 			 * XXX This branch must not be taken,
1862 			 * if it is, then the lookup below will
1863 			 * explode.
1864 			 */
1865 			cn.cn_nameptr = "shares";
1866 			cn.cn_namelen = strlen(cn.cn_nameptr);
1867 			cn.cn_nameiop = LOOKUP;
1868 			cn.cn_flags = ISLASTCN;
1869 			cn.cn_lkflags = flags;
1870 			VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
1871 			vput(dvp);
1872 		} else {
1873 			*vpp = dvp;
1874 		}
1875 		return (err);
1876 	}
1877 
1878 	gen_mask = -1ULL >> (64 - 8 * i);
1879 
1880 	dprintf("getting %llu [%llu mask %llx]\n", (u_longlong_t)object,
1881 	    (u_longlong_t)fid_gen,
1882 	    (u_longlong_t)gen_mask);
1883 	if ((err = zfs_zget(zfsvfs, object, &zp))) {
1884 		ZFS_EXIT(zfsvfs);
1885 		return (err);
1886 	}
1887 	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
1888 	    sizeof (uint64_t));
1889 	zp_gen = zp_gen & gen_mask;
1890 	if (zp_gen == 0)
1891 		zp_gen = 1;
1892 	if (zp->z_unlinked || zp_gen != fid_gen) {
1893 		dprintf("znode gen (%llu) != fid gen (%llu)\n",
1894 		    (u_longlong_t)zp_gen, (u_longlong_t)fid_gen);
1895 		vrele(ZTOV(zp));
1896 		ZFS_EXIT(zfsvfs);
1897 		return (SET_ERROR(EINVAL));
1898 	}
1899 
1900 	*vpp = ZTOV(zp);
1901 	ZFS_EXIT(zfsvfs);
1902 	err = vn_lock(*vpp, flags);
1903 	if (err == 0)
1904 		vnode_create_vobject(*vpp, zp->z_size, curthread);
1905 	else
1906 		*vpp = NULL;
1907 	return (err);
1908 }
1909 
1910 /*
1911  * Block out VOPs and close zfsvfs_t::z_os
1912  *
1913  * Note, if successful, then we return with the 'z_teardown_lock' and
1914  * 'z_teardown_inactive_lock' write held.  We leave ownership of the underlying
1915  * dataset and objset intact so that they can be atomically handed off during
1916  * a subsequent rollback or recv operation and the resume thereafter.
1917  */
1918 int
1919 zfs_suspend_fs(zfsvfs_t *zfsvfs)
1920 {
1921 	int error;
1922 
1923 	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
1924 		return (error);
1925 
1926 	return (0);
1927 }
1928 
1929 /*
1930  * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset
1931  * is an invariant across any of the operations that can be performed while the
1932  * filesystem was suspended.  Whether it succeeded or failed, the preconditions
1933  * are the same: the relevant objset and associated dataset are owned by
1934  * zfsvfs, held, and long held on entry.
1935  */
1936 int
1937 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
1938 {
1939 	int err;
1940 	znode_t *zp;
1941 
1942 	ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
1943 	ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs));
1944 
1945 	/*
1946 	 * We already own this, so just update the objset_t, as the one we
1947 	 * had before may have been evicted.
1948 	 */
1949 	objset_t *os;
1950 	VERIFY3P(ds->ds_owner, ==, zfsvfs);
1951 	VERIFY(dsl_dataset_long_held(ds));
1952 	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
1953 	dsl_pool_config_enter(dp, FTAG);
1954 	VERIFY0(dmu_objset_from_ds(ds, &os));
1955 	dsl_pool_config_exit(dp, FTAG);
1956 
1957 	err = zfsvfs_init(zfsvfs, os);
1958 	if (err != 0)
1959 		goto bail;
1960 
1961 	ds->ds_dir->dd_activity_cancelled = B_FALSE;
1962 	VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE));
1963 
1964 	zfs_set_fuid_feature(zfsvfs);
1965 
1966 	/*
1967 	 * Attempt to re-establish all the active znodes with
1968 	 * their dbufs.  If a zfs_rezget() fails, then we'll let
1969 	 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
1970 	 * when they try to use their znode.
1971 	 */
1972 	mutex_enter(&zfsvfs->z_znodes_lock);
1973 	for (zp = list_head(&zfsvfs->z_all_znodes); zp;
1974 	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
1975 		(void) zfs_rezget(zp);
1976 	}
1977 	mutex_exit(&zfsvfs->z_znodes_lock);
1978 
1979 bail:
1980 	/* release the VOPs */
1981 	ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
1982 	ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
1983 
1984 	if (err) {
1985 		/*
1986 		 * Since we couldn't setup the sa framework, try to force
1987 		 * unmount this file system.
1988 		 */
1989 		if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) {
1990 			vfs_ref(zfsvfs->z_vfs);
1991 			(void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
1992 		}
1993 	}
1994 	return (err);
1995 }
1996 
1997 static void
1998 zfs_freevfs(vfs_t *vfsp)
1999 {
2000 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
2001 
2002 	zfsvfs_free(zfsvfs);
2003 
2004 	atomic_dec_32(&zfs_active_fs_count);
2005 }
2006 
2007 #ifdef __i386__
2008 static int desiredvnodes_backup;
2009 #include <sys/vmmeter.h>
2010 
2011 
2012 #include <vm/vm_page.h>
2013 #include <vm/vm_object.h>
2014 #include <vm/vm_kern.h>
2015 #include <vm/vm_map.h>
2016 #endif
2017 
2018 static void
2019 zfs_vnodes_adjust(void)
2020 {
2021 #ifdef __i386__
2022 	int newdesiredvnodes;
2023 
2024 	desiredvnodes_backup = desiredvnodes;
2025 
2026 	/*
2027 	 * We calculate newdesiredvnodes the same way it is done in
2028 	 * vntblinit(). If it is equal to desiredvnodes, it means that
2029 	 * it wasn't tuned by the administrator and we can tune it down.
2030 	 */
2031 	newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 *
2032 	    vm_kmem_size / (5 * (sizeof (struct vm_object) +
2033 	    sizeof (struct vnode))));
2034 	if (newdesiredvnodes == desiredvnodes)
2035 		desiredvnodes = (3 * newdesiredvnodes) / 4;
2036 #endif
2037 }
2038 
2039 static void
2040 zfs_vnodes_adjust_back(void)
2041 {
2042 
2043 #ifdef __i386__
2044 	desiredvnodes = desiredvnodes_backup;
2045 #endif
2046 }
2047 
2048 void
2049 zfs_init(void)
2050 {
2051 
2052 	printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n");
2053 
2054 	/*
2055 	 * Initialize .zfs directory structures
2056 	 */
2057 	zfsctl_init();
2058 
2059 	/*
2060 	 * Initialize znode cache, vnode ops, etc...
2061 	 */
2062 	zfs_znode_init();
2063 
2064 	/*
2065 	 * Reduce number of vnodes. Originally number of vnodes is calculated
2066 	 * with UFS inode in mind. We reduce it here, because it's too big for
2067 	 * ZFS/i386.
2068 	 */
2069 	zfs_vnodes_adjust();
2070 
2071 	dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
2072 
2073 	zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
2074 }
2075 
2076 void
2077 zfs_fini(void)
2078 {
2079 	taskq_destroy(zfsvfs_taskq);
2080 	zfsctl_fini();
2081 	zfs_znode_fini();
2082 	zfs_vnodes_adjust_back();
2083 }
2084 
2085 int
2086 zfs_busy(void)
2087 {
2088 	return (zfs_active_fs_count != 0);
2089 }
2090 
2091 /*
2092  * Release VOPs and unmount a suspended filesystem.
2093  */
2094 int
2095 zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
2096 {
2097 	ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
2098 	ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs));
2099 
2100 	/*
2101 	 * We already own this, so just hold and rele it to update the
2102 	 * objset_t, as the one we had before may have been evicted.
2103 	 */
2104 	objset_t *os;
2105 	VERIFY3P(ds->ds_owner, ==, zfsvfs);
2106 	VERIFY(dsl_dataset_long_held(ds));
2107 	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
2108 	dsl_pool_config_enter(dp, FTAG);
2109 	VERIFY0(dmu_objset_from_ds(ds, &os));
2110 	dsl_pool_config_exit(dp, FTAG);
2111 	zfsvfs->z_os = os;
2112 
2113 	/* release the VOPs */
2114 	ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
2115 	ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
2116 
2117 	/*
2118 	 * Try to force unmount this file system.
2119 	 */
2120 	(void) zfs_umount(zfsvfs->z_vfs, 0);
2121 	zfsvfs->z_unmounted = B_TRUE;
2122 	return (0);
2123 }
2124 
2125 int
2126 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
2127 {
2128 	int error;
2129 	objset_t *os = zfsvfs->z_os;
2130 	dmu_tx_t *tx;
2131 
2132 	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
2133 		return (SET_ERROR(EINVAL));
2134 
2135 	if (newvers < zfsvfs->z_version)
2136 		return (SET_ERROR(EINVAL));
2137 
2138 	if (zfs_spa_version_map(newvers) >
2139 	    spa_version(dmu_objset_spa(zfsvfs->z_os)))
2140 		return (SET_ERROR(ENOTSUP));
2141 
2142 	tx = dmu_tx_create(os);
2143 	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
2144 	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2145 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
2146 		    ZFS_SA_ATTRS);
2147 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
2148 	}
2149 	error = dmu_tx_assign(tx, TXG_WAIT);
2150 	if (error) {
2151 		dmu_tx_abort(tx);
2152 		return (error);
2153 	}
2154 
2155 	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
2156 	    8, 1, &newvers, tx);
2157 
2158 	if (error) {
2159 		dmu_tx_commit(tx);
2160 		return (error);
2161 	}
2162 
2163 	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2164 		uint64_t sa_obj;
2165 
2166 		ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
2167 		    SPA_VERSION_SA);
2168 		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
2169 		    DMU_OT_NONE, 0, tx);
2170 
2171 		error = zap_add(os, MASTER_NODE_OBJ,
2172 		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
2173 		ASSERT0(error);
2174 
2175 		VERIFY0(sa_set_sa_object(os, sa_obj));
2176 		sa_register_update_callback(os, zfs_sa_upgrade);
2177 	}
2178 
2179 	spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
2180 	    "from %ju to %ju", (uintmax_t)zfsvfs->z_version,
2181 	    (uintmax_t)newvers);
2182 	dmu_tx_commit(tx);
2183 
2184 	zfsvfs->z_version = newvers;
2185 	os->os_version = newvers;
2186 
2187 	zfs_set_fuid_feature(zfsvfs);
2188 
2189 	return (0);
2190 }
2191 
2192 /*
2193  * Read a property stored within the master node.
2194  */
2195 int
2196 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
2197 {
2198 	uint64_t *cached_copy = NULL;
2199 
2200 	/*
2201 	 * Figure out where in the objset_t the cached copy would live, if it
2202 	 * is available for the requested property.
2203 	 */
2204 	if (os != NULL) {
2205 		switch (prop) {
2206 		case ZFS_PROP_VERSION:
2207 			cached_copy = &os->os_version;
2208 			break;
2209 		case ZFS_PROP_NORMALIZE:
2210 			cached_copy = &os->os_normalization;
2211 			break;
2212 		case ZFS_PROP_UTF8ONLY:
2213 			cached_copy = &os->os_utf8only;
2214 			break;
2215 		case ZFS_PROP_CASE:
2216 			cached_copy = &os->os_casesensitivity;
2217 			break;
2218 		default:
2219 			break;
2220 		}
2221 	}
2222 	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
2223 		*value = *cached_copy;
2224 		return (0);
2225 	}
2226 
2227 	/*
2228 	 * If the property wasn't cached, look up the file system's value for
2229 	 * the property. For the version property, we look up a slightly
2230 	 * different string.
2231 	 */
2232 	const char *pname;
2233 	int error = ENOENT;
2234 	if (prop == ZFS_PROP_VERSION) {
2235 		pname = ZPL_VERSION_STR;
2236 	} else {
2237 		pname = zfs_prop_to_name(prop);
2238 	}
2239 
2240 	if (os != NULL) {
2241 		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
2242 		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
2243 	}
2244 
2245 	if (error == ENOENT) {
2246 		/* No value set, use the default value */
2247 		switch (prop) {
2248 		case ZFS_PROP_VERSION:
2249 			*value = ZPL_VERSION;
2250 			break;
2251 		case ZFS_PROP_NORMALIZE:
2252 		case ZFS_PROP_UTF8ONLY:
2253 			*value = 0;
2254 			break;
2255 		case ZFS_PROP_CASE:
2256 			*value = ZFS_CASE_SENSITIVE;
2257 			break;
2258 		case ZFS_PROP_ACLTYPE:
2259 			*value = ZFS_ACLTYPE_NFSV4;
2260 			break;
2261 		default:
2262 			return (error);
2263 		}
2264 		error = 0;
2265 	}
2266 
2267 	/*
2268 	 * If one of the methods for getting the property value above worked,
2269 	 * copy it into the objset_t's cache.
2270 	 */
2271 	if (error == 0 && cached_copy != NULL) {
2272 		*cached_copy = *value;
2273 	}
2274 
2275 	return (error);
2276 }
2277 
2278 /*
2279  * Return true if the corresponding vfs's unmounted flag is set.
2280  * Otherwise return false.
2281  * If this function returns true we know VFS unmount has been initiated.
2282  */
2283 boolean_t
2284 zfs_get_vfs_flag_unmounted(objset_t *os)
2285 {
2286 	zfsvfs_t *zfvp;
2287 	boolean_t unmounted = B_FALSE;
2288 
2289 	ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS);
2290 
2291 	mutex_enter(&os->os_user_ptr_lock);
2292 	zfvp = dmu_objset_get_user(os);
2293 	if (zfvp != NULL && zfvp->z_vfs != NULL &&
2294 	    (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT))
2295 		unmounted = B_TRUE;
2296 	mutex_exit(&os->os_user_ptr_lock);
2297 
2298 	return (unmounted);
2299 }
2300 
2301 #ifdef _KERNEL
2302 void
2303 zfsvfs_update_fromname(const char *oldname, const char *newname)
2304 {
2305 	char tmpbuf[MAXPATHLEN];
2306 	struct mount *mp;
2307 	char *fromname;
2308 	size_t oldlen;
2309 
2310 	oldlen = strlen(oldname);
2311 
2312 	mtx_lock(&mountlist_mtx);
2313 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2314 		fromname = mp->mnt_stat.f_mntfromname;
2315 		if (strcmp(fromname, oldname) == 0) {
2316 			(void) strlcpy(fromname, newname,
2317 			    sizeof (mp->mnt_stat.f_mntfromname));
2318 			continue;
2319 		}
2320 		if (strncmp(fromname, oldname, oldlen) == 0 &&
2321 		    (fromname[oldlen] == '/' || fromname[oldlen] == '@')) {
2322 			(void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s",
2323 			    newname, fromname + oldlen);
2324 			(void) strlcpy(fromname, tmpbuf,
2325 			    sizeof (mp->mnt_stat.f_mntfromname));
2326 			continue;
2327 		}
2328 	}
2329 	mtx_unlock(&mountlist_mtx);
2330 }
2331 #endif
2332