1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 24 * All rights reserved. 25 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 26 * Copyright (c) 2014 Integros [integros.com] 27 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 28 */ 29 30 /* Portions Copyright 2010 Robert Milkowski */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/sysmacros.h> 37 #include <sys/kmem.h> 38 #include <sys/acl.h> 39 #include <sys/vnode.h> 40 #include <sys/vfs.h> 41 #include <sys/mntent.h> 42 #include <sys/mount.h> 43 #include <sys/cmn_err.h> 44 #include <sys/zfs_znode.h> 45 #include <sys/zfs_vnops.h> 46 #include <sys/zfs_dir.h> 47 #include <sys/zil.h> 48 #include <sys/fs/zfs.h> 49 #include <sys/dmu.h> 50 #include <sys/dsl_prop.h> 51 #include <sys/dsl_dataset.h> 52 #include <sys/dsl_deleg.h> 53 #include <sys/spa.h> 54 #include <sys/zap.h> 55 #include <sys/sa.h> 56 #include <sys/sa_impl.h> 57 #include <sys/policy.h> 58 #include <sys/atomic.h> 59 #include <sys/zfs_ioctl.h> 60 #include <sys/zfs_ctldir.h> 61 #include <sys/zfs_fuid.h> 62 #include <sys/sunddi.h> 63 #include <sys/dmu_objset.h> 64 #include <sys/dsl_dir.h> 65 #include <sys/spa_boot.h> 66 #include <sys/jail.h> 67 #include <ufs/ufs/quota.h> 68 #include <sys/zfs_quota.h> 69 70 #include "zfs_comutil.h" 71 72 #ifndef MNTK_VMSETSIZE_BUG 73 #define MNTK_VMSETSIZE_BUG 0 74 #endif 75 #ifndef MNTK_NOMSYNC 76 #define MNTK_NOMSYNC 8 77 #endif 78 79 struct mtx zfs_debug_mtx; 80 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 81 82 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 83 84 int zfs_super_owner; 85 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 86 "File system owners can perform privileged operation on file systems"); 87 88 int zfs_debug_level; 89 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0, 90 "Debug level"); 91 92 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 93 static int zfs_version_acl = ZFS_ACL_VERSION; 94 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 95 "ZFS_ACL_VERSION"); 96 static int zfs_version_spa = SPA_VERSION; 97 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 98 "SPA_VERSION"); 99 static int zfs_version_zpl = ZPL_VERSION; 100 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 101 "ZPL_VERSION"); 102 103 #if __FreeBSD_version >= 1400018 104 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, 105 bool *mp_busy); 106 #else 107 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg); 108 #endif 109 static int zfs_mount(vfs_t *vfsp); 110 static int zfs_umount(vfs_t *vfsp, int fflag); 111 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 112 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 113 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 114 static int zfs_sync(vfs_t *vfsp, int waitfor); 115 #if __FreeBSD_version >= 1300098 116 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 117 struct ucred **credanonp, int *numsecflavors, int *secflavors); 118 #else 119 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 120 struct ucred **credanonp, int *numsecflavors, int **secflavors); 121 #endif 122 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp); 123 static void zfs_freevfs(vfs_t *vfsp); 124 125 struct vfsops zfs_vfsops = { 126 .vfs_mount = zfs_mount, 127 .vfs_unmount = zfs_umount, 128 #if __FreeBSD_version >= 1300049 129 .vfs_root = vfs_cache_root, 130 .vfs_cachedroot = zfs_root, 131 #else 132 .vfs_root = zfs_root, 133 #endif 134 .vfs_statfs = zfs_statfs, 135 .vfs_vget = zfs_vget, 136 .vfs_sync = zfs_sync, 137 .vfs_checkexp = zfs_checkexp, 138 .vfs_fhtovp = zfs_fhtovp, 139 .vfs_quotactl = zfs_quotactl, 140 }; 141 142 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 143 144 /* 145 * We need to keep a count of active fs's. 146 * This is necessary to prevent our module 147 * from being unloaded after a umount -f 148 */ 149 static uint32_t zfs_active_fs_count = 0; 150 151 int 152 zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val, 153 char *setpoint) 154 { 155 int error; 156 zfsvfs_t *zfvp; 157 vfs_t *vfsp; 158 objset_t *os; 159 uint64_t tmp = *val; 160 161 error = dmu_objset_from_ds(ds, &os); 162 if (error != 0) 163 return (error); 164 165 error = getzfsvfs_impl(os, &zfvp); 166 if (error != 0) 167 return (error); 168 if (zfvp == NULL) 169 return (ENOENT); 170 vfsp = zfvp->z_vfs; 171 switch (zfs_prop) { 172 case ZFS_PROP_ATIME: 173 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 174 tmp = 0; 175 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 176 tmp = 1; 177 break; 178 case ZFS_PROP_DEVICES: 179 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 180 tmp = 0; 181 if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) 182 tmp = 1; 183 break; 184 case ZFS_PROP_EXEC: 185 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 186 tmp = 0; 187 if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 188 tmp = 1; 189 break; 190 case ZFS_PROP_SETUID: 191 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 192 tmp = 0; 193 if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 194 tmp = 1; 195 break; 196 case ZFS_PROP_READONLY: 197 if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 198 tmp = 0; 199 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 200 tmp = 1; 201 break; 202 case ZFS_PROP_XATTR: 203 if (zfvp->z_flags & ZSB_XATTR) 204 tmp = zfvp->z_xattr; 205 break; 206 case ZFS_PROP_NBMAND: 207 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 208 tmp = 0; 209 if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) 210 tmp = 1; 211 break; 212 default: 213 vfs_unbusy(vfsp); 214 return (ENOENT); 215 } 216 217 vfs_unbusy(vfsp); 218 if (tmp != *val) { 219 (void) strcpy(setpoint, "temporary"); 220 *val = tmp; 221 } 222 return (0); 223 } 224 225 static int 226 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp) 227 { 228 int error = 0; 229 char buf[32]; 230 uint64_t usedobj, quotaobj; 231 uint64_t quota, used = 0; 232 timespec_t now; 233 234 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 235 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 236 237 if (quotaobj == 0 || zfsvfs->z_replay) { 238 error = ENOENT; 239 goto done; 240 } 241 (void) sprintf(buf, "%llx", (longlong_t)id); 242 if ((error = zap_lookup(zfsvfs->z_os, quotaobj, 243 buf, sizeof (quota), 1, "a)) != 0) { 244 dprintf("%s(%d): quotaobj lookup failed\n", 245 __FUNCTION__, __LINE__); 246 goto done; 247 } 248 /* 249 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit". 250 * So we set them to be the same. 251 */ 252 dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota); 253 error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used); 254 if (error && error != ENOENT) { 255 dprintf("%s(%d): usedobj failed; %d\n", 256 __FUNCTION__, __LINE__, error); 257 goto done; 258 } 259 dqp->dqb_curblocks = btodb(used); 260 dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0; 261 vfs_timestamp(&now); 262 /* 263 * Setting this to 0 causes FreeBSD quota(8) to print 264 * the number of days since the epoch, which isn't 265 * particularly useful. 266 */ 267 dqp->dqb_btime = dqp->dqb_itime = now.tv_sec; 268 done: 269 return (error); 270 } 271 272 static int 273 #if __FreeBSD_version >= 1400018 274 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, bool *mp_busy) 275 #else 276 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg) 277 #endif 278 { 279 zfsvfs_t *zfsvfs = vfsp->vfs_data; 280 struct thread *td; 281 int cmd, type, error = 0; 282 int bitsize; 283 zfs_userquota_prop_t quota_type; 284 struct dqblk64 dqblk = { 0 }; 285 286 td = curthread; 287 cmd = cmds >> SUBCMDSHIFT; 288 type = cmds & SUBCMDMASK; 289 290 ZFS_ENTER(zfsvfs); 291 if (id == -1) { 292 switch (type) { 293 case USRQUOTA: 294 id = td->td_ucred->cr_ruid; 295 break; 296 case GRPQUOTA: 297 id = td->td_ucred->cr_rgid; 298 break; 299 default: 300 error = EINVAL; 301 #if __FreeBSD_version < 1400018 302 if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF) 303 vfs_unbusy(vfsp); 304 #endif 305 goto done; 306 } 307 } 308 /* 309 * Map BSD type to: 310 * ZFS_PROP_USERUSED, 311 * ZFS_PROP_USERQUOTA, 312 * ZFS_PROP_GROUPUSED, 313 * ZFS_PROP_GROUPQUOTA 314 */ 315 switch (cmd) { 316 case Q_SETQUOTA: 317 case Q_SETQUOTA32: 318 if (type == USRQUOTA) 319 quota_type = ZFS_PROP_USERQUOTA; 320 else if (type == GRPQUOTA) 321 quota_type = ZFS_PROP_GROUPQUOTA; 322 else 323 error = EINVAL; 324 break; 325 case Q_GETQUOTA: 326 case Q_GETQUOTA32: 327 if (type == USRQUOTA) 328 quota_type = ZFS_PROP_USERUSED; 329 else if (type == GRPQUOTA) 330 quota_type = ZFS_PROP_GROUPUSED; 331 else 332 error = EINVAL; 333 break; 334 } 335 336 /* 337 * Depending on the cmd, we may need to get 338 * the ruid and domain (see fuidstr_to_sid?), 339 * the fuid (how?), or other information. 340 * Create fuid using zfs_fuid_create(zfsvfs, id, 341 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)? 342 * I think I can use just the id? 343 * 344 * Look at zfs_id_overquota() to look up a quota. 345 * zap_lookup(something, quotaobj, fuidstring, 346 * sizeof (long long), 1, "a) 347 * 348 * See zfs_set_userquota() to set a quota. 349 */ 350 if ((uint32_t)type >= MAXQUOTAS) { 351 error = EINVAL; 352 goto done; 353 } 354 355 switch (cmd) { 356 case Q_GETQUOTASIZE: 357 bitsize = 64; 358 error = copyout(&bitsize, arg, sizeof (int)); 359 break; 360 case Q_QUOTAON: 361 // As far as I can tell, you can't turn quotas on or off on zfs 362 error = 0; 363 #if __FreeBSD_version < 1400018 364 vfs_unbusy(vfsp); 365 #endif 366 break; 367 case Q_QUOTAOFF: 368 error = ENOTSUP; 369 #if __FreeBSD_version < 1400018 370 vfs_unbusy(vfsp); 371 #endif 372 break; 373 case Q_SETQUOTA: 374 error = copyin(arg, &dqblk, sizeof (dqblk)); 375 if (error == 0) 376 error = zfs_set_userquota(zfsvfs, quota_type, 377 "", id, dbtob(dqblk.dqb_bhardlimit)); 378 break; 379 case Q_GETQUOTA: 380 error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk); 381 if (error == 0) 382 error = copyout(&dqblk, arg, sizeof (dqblk)); 383 break; 384 default: 385 error = EINVAL; 386 break; 387 } 388 done: 389 ZFS_EXIT(zfsvfs); 390 return (error); 391 } 392 393 394 boolean_t 395 zfs_is_readonly(zfsvfs_t *zfsvfs) 396 { 397 return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)); 398 } 399 400 static int 401 zfs_sync(vfs_t *vfsp, int waitfor) 402 { 403 404 /* 405 * Data integrity is job one. We don't want a compromised kernel 406 * writing to the storage pool, so we never sync during panic. 407 */ 408 if (panicstr) 409 return (0); 410 411 /* 412 * Ignore the system syncher. ZFS already commits async data 413 * at zfs_txg_timeout intervals. 414 */ 415 if (waitfor == MNT_LAZY) 416 return (0); 417 418 if (vfsp != NULL) { 419 /* 420 * Sync a specific filesystem. 421 */ 422 zfsvfs_t *zfsvfs = vfsp->vfs_data; 423 dsl_pool_t *dp; 424 int error; 425 426 error = vfs_stdsync(vfsp, waitfor); 427 if (error != 0) 428 return (error); 429 430 ZFS_ENTER(zfsvfs); 431 dp = dmu_objset_pool(zfsvfs->z_os); 432 433 /* 434 * If the system is shutting down, then skip any 435 * filesystems which may exist on a suspended pool. 436 */ 437 if (rebooting && spa_suspended(dp->dp_spa)) { 438 ZFS_EXIT(zfsvfs); 439 return (0); 440 } 441 442 if (zfsvfs->z_log != NULL) 443 zil_commit(zfsvfs->z_log, 0); 444 445 ZFS_EXIT(zfsvfs); 446 } else { 447 /* 448 * Sync all ZFS filesystems. This is what happens when you 449 * run sync(8). Unlike other filesystems, ZFS honors the 450 * request by waiting for all pools to commit all dirty data. 451 */ 452 spa_sync_allpools(); 453 } 454 455 return (0); 456 } 457 458 static void 459 atime_changed_cb(void *arg, uint64_t newval) 460 { 461 zfsvfs_t *zfsvfs = arg; 462 463 if (newval == TRUE) { 464 zfsvfs->z_atime = TRUE; 465 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 466 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 467 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 468 } else { 469 zfsvfs->z_atime = FALSE; 470 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 471 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 472 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 473 } 474 } 475 476 static void 477 xattr_changed_cb(void *arg, uint64_t newval) 478 { 479 zfsvfs_t *zfsvfs = arg; 480 481 if (newval == ZFS_XATTR_OFF) { 482 zfsvfs->z_flags &= ~ZSB_XATTR; 483 } else { 484 zfsvfs->z_flags |= ZSB_XATTR; 485 486 if (newval == ZFS_XATTR_SA) 487 zfsvfs->z_xattr_sa = B_TRUE; 488 else 489 zfsvfs->z_xattr_sa = B_FALSE; 490 } 491 } 492 493 static void 494 blksz_changed_cb(void *arg, uint64_t newval) 495 { 496 zfsvfs_t *zfsvfs = arg; 497 ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os))); 498 ASSERT3U(newval, >=, SPA_MINBLOCKSIZE); 499 ASSERT(ISP2(newval)); 500 501 zfsvfs->z_max_blksz = newval; 502 zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 503 } 504 505 static void 506 readonly_changed_cb(void *arg, uint64_t newval) 507 { 508 zfsvfs_t *zfsvfs = arg; 509 510 if (newval) { 511 /* XXX locking on vfs_flag? */ 512 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 513 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 514 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 515 } else { 516 /* XXX locking on vfs_flag? */ 517 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 518 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 519 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 520 } 521 } 522 523 static void 524 setuid_changed_cb(void *arg, uint64_t newval) 525 { 526 zfsvfs_t *zfsvfs = arg; 527 528 if (newval == FALSE) { 529 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 530 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 531 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 532 } else { 533 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 534 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 535 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 536 } 537 } 538 539 static void 540 exec_changed_cb(void *arg, uint64_t newval) 541 { 542 zfsvfs_t *zfsvfs = arg; 543 544 if (newval == FALSE) { 545 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 546 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 547 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 548 } else { 549 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 550 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 551 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 552 } 553 } 554 555 /* 556 * The nbmand mount option can be changed at mount time. 557 * We can't allow it to be toggled on live file systems or incorrect 558 * behavior may be seen from cifs clients 559 * 560 * This property isn't registered via dsl_prop_register(), but this callback 561 * will be called when a file system is first mounted 562 */ 563 static void 564 nbmand_changed_cb(void *arg, uint64_t newval) 565 { 566 zfsvfs_t *zfsvfs = arg; 567 if (newval == FALSE) { 568 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 569 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 570 } else { 571 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 572 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 573 } 574 } 575 576 static void 577 snapdir_changed_cb(void *arg, uint64_t newval) 578 { 579 zfsvfs_t *zfsvfs = arg; 580 581 zfsvfs->z_show_ctldir = newval; 582 } 583 584 static void 585 acl_mode_changed_cb(void *arg, uint64_t newval) 586 { 587 zfsvfs_t *zfsvfs = arg; 588 589 zfsvfs->z_acl_mode = newval; 590 } 591 592 static void 593 acl_inherit_changed_cb(void *arg, uint64_t newval) 594 { 595 zfsvfs_t *zfsvfs = arg; 596 597 zfsvfs->z_acl_inherit = newval; 598 } 599 600 static void 601 acl_type_changed_cb(void *arg, uint64_t newval) 602 { 603 zfsvfs_t *zfsvfs = arg; 604 605 zfsvfs->z_acl_type = newval; 606 } 607 608 static int 609 zfs_register_callbacks(vfs_t *vfsp) 610 { 611 struct dsl_dataset *ds = NULL; 612 objset_t *os = NULL; 613 zfsvfs_t *zfsvfs = NULL; 614 uint64_t nbmand; 615 boolean_t readonly = B_FALSE; 616 boolean_t do_readonly = B_FALSE; 617 boolean_t setuid = B_FALSE; 618 boolean_t do_setuid = B_FALSE; 619 boolean_t exec = B_FALSE; 620 boolean_t do_exec = B_FALSE; 621 boolean_t xattr = B_FALSE; 622 boolean_t atime = B_FALSE; 623 boolean_t do_atime = B_FALSE; 624 boolean_t do_xattr = B_FALSE; 625 int error = 0; 626 627 ASSERT3P(vfsp, !=, NULL); 628 zfsvfs = vfsp->vfs_data; 629 ASSERT3P(zfsvfs, !=, NULL); 630 os = zfsvfs->z_os; 631 632 /* 633 * This function can be called for a snapshot when we update snapshot's 634 * mount point, which isn't really supported. 635 */ 636 if (dmu_objset_is_snapshot(os)) 637 return (EOPNOTSUPP); 638 639 /* 640 * The act of registering our callbacks will destroy any mount 641 * options we may have. In order to enable temporary overrides 642 * of mount options, we stash away the current values and 643 * restore them after we register the callbacks. 644 */ 645 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) || 646 !spa_writeable(dmu_objset_spa(os))) { 647 readonly = B_TRUE; 648 do_readonly = B_TRUE; 649 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 650 readonly = B_FALSE; 651 do_readonly = B_TRUE; 652 } 653 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 654 setuid = B_FALSE; 655 do_setuid = B_TRUE; 656 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 657 setuid = B_TRUE; 658 do_setuid = B_TRUE; 659 } 660 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 661 exec = B_FALSE; 662 do_exec = B_TRUE; 663 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 664 exec = B_TRUE; 665 do_exec = B_TRUE; 666 } 667 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 668 zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF; 669 do_xattr = B_TRUE; 670 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 671 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 672 do_xattr = B_TRUE; 673 } else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) { 674 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 675 do_xattr = B_TRUE; 676 } else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) { 677 zfsvfs->z_xattr = xattr = ZFS_XATTR_SA; 678 do_xattr = B_TRUE; 679 } 680 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 681 atime = B_FALSE; 682 do_atime = B_TRUE; 683 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 684 atime = B_TRUE; 685 do_atime = B_TRUE; 686 } 687 688 /* 689 * We need to enter pool configuration here, so that we can use 690 * dsl_prop_get_int_ds() to handle the special nbmand property below. 691 * dsl_prop_get_integer() can not be used, because it has to acquire 692 * spa_namespace_lock and we can not do that because we already hold 693 * z_teardown_lock. The problem is that spa_write_cachefile() is called 694 * with spa_namespace_lock held and the function calls ZFS vnode 695 * operations to write the cache file and thus z_teardown_lock is 696 * acquired after spa_namespace_lock. 697 */ 698 ds = dmu_objset_ds(os); 699 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 700 701 /* 702 * nbmand is a special property. It can only be changed at 703 * mount time. 704 * 705 * This is weird, but it is documented to only be changeable 706 * at mount time. 707 */ 708 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 709 nbmand = B_FALSE; 710 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 711 nbmand = B_TRUE; 712 } else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand) != 0)) { 713 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 714 return (error); 715 } 716 717 /* 718 * Register property callbacks. 719 * 720 * It would probably be fine to just check for i/o error from 721 * the first prop_register(), but I guess I like to go 722 * overboard... 723 */ 724 error = dsl_prop_register(ds, 725 zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs); 726 error = error ? error : dsl_prop_register(ds, 727 zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs); 728 error = error ? error : dsl_prop_register(ds, 729 zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs); 730 error = error ? error : dsl_prop_register(ds, 731 zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs); 732 error = error ? error : dsl_prop_register(ds, 733 zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs); 734 error = error ? error : dsl_prop_register(ds, 735 zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs); 736 error = error ? error : dsl_prop_register(ds, 737 zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs); 738 error = error ? error : dsl_prop_register(ds, 739 zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs); 740 error = error ? error : dsl_prop_register(ds, 741 zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs); 742 error = error ? error : dsl_prop_register(ds, 743 zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, 744 zfsvfs); 745 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 746 if (error) 747 goto unregister; 748 749 /* 750 * Invoke our callbacks to restore temporary mount options. 751 */ 752 if (do_readonly) 753 readonly_changed_cb(zfsvfs, readonly); 754 if (do_setuid) 755 setuid_changed_cb(zfsvfs, setuid); 756 if (do_exec) 757 exec_changed_cb(zfsvfs, exec); 758 if (do_xattr) 759 xattr_changed_cb(zfsvfs, xattr); 760 if (do_atime) 761 atime_changed_cb(zfsvfs, atime); 762 763 nbmand_changed_cb(zfsvfs, nbmand); 764 765 return (0); 766 767 unregister: 768 dsl_prop_unregister_all(ds, zfsvfs); 769 return (error); 770 } 771 772 /* 773 * Associate this zfsvfs with the given objset, which must be owned. 774 * This will cache a bunch of on-disk state from the objset in the 775 * zfsvfs. 776 */ 777 static int 778 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os) 779 { 780 int error; 781 uint64_t val; 782 783 zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE; 784 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 785 zfsvfs->z_os = os; 786 787 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 788 if (error != 0) 789 return (error); 790 if (zfsvfs->z_version > 791 zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { 792 (void) printf("Can't mount a version %lld file system " 793 "on a version %lld pool\n. Pool must be upgraded to mount " 794 "this file system.", (u_longlong_t)zfsvfs->z_version, 795 (u_longlong_t)spa_version(dmu_objset_spa(os))); 796 return (SET_ERROR(ENOTSUP)); 797 } 798 error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val); 799 if (error != 0) 800 return (error); 801 zfsvfs->z_norm = (int)val; 802 803 error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val); 804 if (error != 0) 805 return (error); 806 zfsvfs->z_utf8 = (val != 0); 807 808 error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val); 809 if (error != 0) 810 return (error); 811 zfsvfs->z_case = (uint_t)val; 812 813 error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val); 814 if (error != 0) 815 return (error); 816 zfsvfs->z_acl_type = (uint_t)val; 817 818 /* 819 * Fold case on file systems that are always or sometimes case 820 * insensitive. 821 */ 822 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 823 zfsvfs->z_case == ZFS_CASE_MIXED) 824 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 825 826 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 827 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 828 829 uint64_t sa_obj = 0; 830 if (zfsvfs->z_use_sa) { 831 /* should either have both of these objects or none */ 832 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, 833 &sa_obj); 834 if (error != 0) 835 return (error); 836 837 error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val); 838 if (error == 0 && val == ZFS_XATTR_SA) 839 zfsvfs->z_xattr_sa = B_TRUE; 840 } 841 842 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 843 &zfsvfs->z_attr_table); 844 if (error != 0) 845 return (error); 846 847 if (zfsvfs->z_version >= ZPL_VERSION_SA) 848 sa_register_update_callback(os, zfs_sa_upgrade); 849 850 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 851 &zfsvfs->z_root); 852 if (error != 0) 853 return (error); 854 ASSERT3U(zfsvfs->z_root, !=, 0); 855 856 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 857 &zfsvfs->z_unlinkedobj); 858 if (error != 0) 859 return (error); 860 861 error = zap_lookup(os, MASTER_NODE_OBJ, 862 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 863 8, 1, &zfsvfs->z_userquota_obj); 864 if (error == ENOENT) 865 zfsvfs->z_userquota_obj = 0; 866 else if (error != 0) 867 return (error); 868 869 error = zap_lookup(os, MASTER_NODE_OBJ, 870 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 871 8, 1, &zfsvfs->z_groupquota_obj); 872 if (error == ENOENT) 873 zfsvfs->z_groupquota_obj = 0; 874 else if (error != 0) 875 return (error); 876 877 error = zap_lookup(os, MASTER_NODE_OBJ, 878 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA], 879 8, 1, &zfsvfs->z_projectquota_obj); 880 if (error == ENOENT) 881 zfsvfs->z_projectquota_obj = 0; 882 else if (error != 0) 883 return (error); 884 885 error = zap_lookup(os, MASTER_NODE_OBJ, 886 zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA], 887 8, 1, &zfsvfs->z_userobjquota_obj); 888 if (error == ENOENT) 889 zfsvfs->z_userobjquota_obj = 0; 890 else if (error != 0) 891 return (error); 892 893 error = zap_lookup(os, MASTER_NODE_OBJ, 894 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA], 895 8, 1, &zfsvfs->z_groupobjquota_obj); 896 if (error == ENOENT) 897 zfsvfs->z_groupobjquota_obj = 0; 898 else if (error != 0) 899 return (error); 900 901 error = zap_lookup(os, MASTER_NODE_OBJ, 902 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA], 903 8, 1, &zfsvfs->z_projectobjquota_obj); 904 if (error == ENOENT) 905 zfsvfs->z_projectobjquota_obj = 0; 906 else if (error != 0) 907 return (error); 908 909 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 910 &zfsvfs->z_fuid_obj); 911 if (error == ENOENT) 912 zfsvfs->z_fuid_obj = 0; 913 else if (error != 0) 914 return (error); 915 916 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 917 &zfsvfs->z_shares_dir); 918 if (error == ENOENT) 919 zfsvfs->z_shares_dir = 0; 920 else if (error != 0) 921 return (error); 922 923 /* 924 * Only use the name cache if we are looking for a 925 * name on a file system that does not require normalization 926 * or case folding. We can also look there if we happen to be 927 * on a non-normalizing, mixed sensitivity file system IF we 928 * are looking for the exact name (which is always the case on 929 * FreeBSD). 930 */ 931 zfsvfs->z_use_namecache = !zfsvfs->z_norm || 932 ((zfsvfs->z_case == ZFS_CASE_MIXED) && 933 !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER)); 934 935 return (0); 936 } 937 938 taskq_t *zfsvfs_taskq; 939 940 static void 941 zfsvfs_task_unlinked_drain(void *context, int pending __unused) 942 { 943 944 zfs_unlinked_drain((zfsvfs_t *)context); 945 } 946 947 int 948 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp) 949 { 950 objset_t *os; 951 zfsvfs_t *zfsvfs; 952 int error; 953 boolean_t ro = (readonly || (strchr(osname, '@') != NULL)); 954 955 /* 956 * XXX: Fix struct statfs so this isn't necessary! 957 * 958 * The 'osname' is used as the filesystem's special node, which means 959 * it must fit in statfs.f_mntfromname, or else it can't be 960 * enumerated, so libzfs_mnttab_find() returns NULL, which causes 961 * 'zfs unmount' to think it's not mounted when it is. 962 */ 963 if (strlen(osname) >= MNAMELEN) 964 return (SET_ERROR(ENAMETOOLONG)); 965 966 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 967 968 error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs, 969 &os); 970 if (error != 0) { 971 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 972 return (error); 973 } 974 975 error = zfsvfs_create_impl(zfvp, zfsvfs, os); 976 977 return (error); 978 } 979 980 981 int 982 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os) 983 { 984 int error; 985 986 zfsvfs->z_vfs = NULL; 987 zfsvfs->z_parent = zfsvfs; 988 989 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 990 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 991 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 992 offsetof(znode_t, z_link_node)); 993 TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0, 994 zfsvfs_task_unlinked_drain, zfsvfs); 995 ZFS_TEARDOWN_INIT(zfsvfs); 996 ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs); 997 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 998 for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++) 999 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 1000 1001 error = zfsvfs_init(zfsvfs, os); 1002 if (error != 0) { 1003 dmu_objset_disown(os, B_TRUE, zfsvfs); 1004 *zfvp = NULL; 1005 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1006 return (error); 1007 } 1008 1009 *zfvp = zfsvfs; 1010 return (0); 1011 } 1012 1013 static int 1014 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 1015 { 1016 int error; 1017 1018 /* 1019 * Check for a bad on-disk format version now since we 1020 * lied about owning the dataset readonly before. 1021 */ 1022 if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) && 1023 dmu_objset_incompatible_encryption_version(zfsvfs->z_os)) 1024 return (SET_ERROR(EROFS)); 1025 1026 error = zfs_register_callbacks(zfsvfs->z_vfs); 1027 if (error) 1028 return (error); 1029 1030 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 1031 1032 /* 1033 * If we are not mounting (ie: online recv), then we don't 1034 * have to worry about replaying the log as we blocked all 1035 * operations out since we closed the ZIL. 1036 */ 1037 if (mounting) { 1038 boolean_t readonly; 1039 1040 ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); 1041 dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); 1042 1043 /* 1044 * During replay we remove the read only flag to 1045 * allow replays to succeed. 1046 */ 1047 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 1048 if (readonly != 0) { 1049 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 1050 } else { 1051 dsl_dir_t *dd; 1052 zap_stats_t zs; 1053 1054 if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj, 1055 &zs) == 0) { 1056 dataset_kstats_update_nunlinks_kstat( 1057 &zfsvfs->z_kstat, zs.zs_num_entries); 1058 dprintf_ds(zfsvfs->z_os->os_dsl_dataset, 1059 "num_entries in unlinked set: %llu", 1060 (u_longlong_t)zs.zs_num_entries); 1061 } 1062 1063 zfs_unlinked_drain(zfsvfs); 1064 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1065 dd->dd_activity_cancelled = B_FALSE; 1066 } 1067 1068 /* 1069 * Parse and replay the intent log. 1070 * 1071 * Because of ziltest, this must be done after 1072 * zfs_unlinked_drain(). (Further note: ziltest 1073 * doesn't use readonly mounts, where 1074 * zfs_unlinked_drain() isn't called.) This is because 1075 * ziltest causes spa_sync() to think it's committed, 1076 * but actually it is not, so the intent log contains 1077 * many txg's worth of changes. 1078 * 1079 * In particular, if object N is in the unlinked set in 1080 * the last txg to actually sync, then it could be 1081 * actually freed in a later txg and then reallocated 1082 * in a yet later txg. This would write a "create 1083 * object N" record to the intent log. Normally, this 1084 * would be fine because the spa_sync() would have 1085 * written out the fact that object N is free, before 1086 * we could write the "create object N" intent log 1087 * record. 1088 * 1089 * But when we are in ziltest mode, we advance the "open 1090 * txg" without actually spa_sync()-ing the changes to 1091 * disk. So we would see that object N is still 1092 * allocated and in the unlinked set, and there is an 1093 * intent log record saying to allocate it. 1094 */ 1095 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) { 1096 if (zil_replay_disable) { 1097 zil_destroy(zfsvfs->z_log, B_FALSE); 1098 } else { 1099 boolean_t use_nc = zfsvfs->z_use_namecache; 1100 zfsvfs->z_use_namecache = B_FALSE; 1101 zfsvfs->z_replay = B_TRUE; 1102 zil_replay(zfsvfs->z_os, zfsvfs, 1103 zfs_replay_vector); 1104 zfsvfs->z_replay = B_FALSE; 1105 zfsvfs->z_use_namecache = use_nc; 1106 } 1107 } 1108 1109 /* restore readonly bit */ 1110 if (readonly != 0) 1111 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 1112 } 1113 1114 /* 1115 * Set the objset user_ptr to track its zfsvfs. 1116 */ 1117 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1118 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1119 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1120 1121 return (0); 1122 } 1123 1124 void 1125 zfsvfs_free(zfsvfs_t *zfsvfs) 1126 { 1127 int i; 1128 1129 zfs_fuid_destroy(zfsvfs); 1130 1131 mutex_destroy(&zfsvfs->z_znodes_lock); 1132 mutex_destroy(&zfsvfs->z_lock); 1133 ASSERT3U(zfsvfs->z_nr_znodes, ==, 0); 1134 list_destroy(&zfsvfs->z_all_znodes); 1135 ZFS_TEARDOWN_DESTROY(zfsvfs); 1136 ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs); 1137 rw_destroy(&zfsvfs->z_fuid_lock); 1138 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1139 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1140 dataset_kstats_destroy(&zfsvfs->z_kstat); 1141 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1142 } 1143 1144 static void 1145 zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 1146 { 1147 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 1148 if (zfsvfs->z_vfs) { 1149 if (zfsvfs->z_use_fuids) { 1150 vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 1151 vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 1152 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 1153 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 1154 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); 1155 vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE); 1156 } else { 1157 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 1158 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 1159 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 1160 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 1161 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); 1162 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE); 1163 } 1164 } 1165 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 1166 } 1167 1168 static int 1169 zfs_domount(vfs_t *vfsp, char *osname) 1170 { 1171 uint64_t recordsize, fsid_guid; 1172 int error = 0; 1173 zfsvfs_t *zfsvfs; 1174 1175 ASSERT3P(vfsp, !=, NULL); 1176 ASSERT3P(osname, !=, NULL); 1177 1178 error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs); 1179 if (error) 1180 return (error); 1181 zfsvfs->z_vfs = vfsp; 1182 1183 if ((error = dsl_prop_get_integer(osname, 1184 "recordsize", &recordsize, NULL))) 1185 goto out; 1186 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 1187 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 1188 1189 vfsp->vfs_data = zfsvfs; 1190 vfsp->mnt_flag |= MNT_LOCAL; 1191 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1192 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 1193 vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; 1194 /* 1195 * This can cause a loss of coherence between ARC and page cache 1196 * on ZoF - unclear if the problem is in FreeBSD or ZoF 1197 */ 1198 vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */ 1199 vfsp->mnt_kern_flag |= MNTK_NOMSYNC; 1200 vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG; 1201 1202 #if defined(_KERNEL) && !defined(KMEM_DEBUG) 1203 vfsp->mnt_kern_flag |= MNTK_FPLOOKUP; 1204 #endif 1205 /* 1206 * The fsid is 64 bits, composed of an 8-bit fs type, which 1207 * separates our fsid from any other filesystem types, and a 1208 * 56-bit objset unique ID. The objset unique ID is unique to 1209 * all objsets open on this system, provided by unique_create(). 1210 * The 8-bit fs type must be put in the low bits of fsid[1] 1211 * because that's where other Solaris filesystems put it. 1212 */ 1213 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1214 ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0); 1215 vfsp->vfs_fsid.val[0] = fsid_guid; 1216 vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) | 1217 (vfsp->mnt_vfc->vfc_typenum & 0xFF); 1218 1219 /* 1220 * Set features for file system. 1221 */ 1222 zfs_set_fuid_feature(zfsvfs); 1223 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 1224 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1225 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1226 vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 1227 } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 1228 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1229 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1230 } 1231 vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED); 1232 1233 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1234 uint64_t pval; 1235 1236 atime_changed_cb(zfsvfs, B_FALSE); 1237 readonly_changed_cb(zfsvfs, B_TRUE); 1238 if ((error = dsl_prop_get_integer(osname, 1239 "xattr", &pval, NULL))) 1240 goto out; 1241 xattr_changed_cb(zfsvfs, pval); 1242 if ((error = dsl_prop_get_integer(osname, 1243 "acltype", &pval, NULL))) 1244 goto out; 1245 acl_type_changed_cb(zfsvfs, pval); 1246 zfsvfs->z_issnap = B_TRUE; 1247 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; 1248 1249 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1250 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1251 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1252 } else { 1253 if ((error = zfsvfs_setup(zfsvfs, B_TRUE))) 1254 goto out; 1255 } 1256 1257 vfs_mountedfrom(vfsp, osname); 1258 1259 if (!zfsvfs->z_issnap) 1260 zfsctl_create(zfsvfs); 1261 out: 1262 if (error) { 1263 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); 1264 zfsvfs_free(zfsvfs); 1265 } else { 1266 atomic_inc_32(&zfs_active_fs_count); 1267 } 1268 1269 return (error); 1270 } 1271 1272 static void 1273 zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1274 { 1275 objset_t *os = zfsvfs->z_os; 1276 1277 if (!dmu_objset_is_snapshot(os)) 1278 dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs); 1279 } 1280 1281 static int 1282 getpoolname(const char *osname, char *poolname) 1283 { 1284 char *p; 1285 1286 p = strchr(osname, '/'); 1287 if (p == NULL) { 1288 if (strlen(osname) >= MAXNAMELEN) 1289 return (ENAMETOOLONG); 1290 (void) strcpy(poolname, osname); 1291 } else { 1292 if (p - osname >= MAXNAMELEN) 1293 return (ENAMETOOLONG); 1294 (void) strncpy(poolname, osname, p - osname); 1295 poolname[p - osname] = '\0'; 1296 } 1297 return (0); 1298 } 1299 1300 static void 1301 fetch_osname_options(char *name, bool *checkpointrewind) 1302 { 1303 1304 if (name[0] == '!') { 1305 *checkpointrewind = true; 1306 memmove(name, name + 1, strlen(name)); 1307 } else { 1308 *checkpointrewind = false; 1309 } 1310 } 1311 1312 static int 1313 zfs_mount(vfs_t *vfsp) 1314 { 1315 kthread_t *td = curthread; 1316 vnode_t *mvp = vfsp->mnt_vnodecovered; 1317 cred_t *cr = td->td_ucred; 1318 char *osname; 1319 int error = 0; 1320 int canwrite; 1321 bool checkpointrewind; 1322 1323 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 1324 return (SET_ERROR(EINVAL)); 1325 1326 /* 1327 * If full-owner-access is enabled and delegated administration is 1328 * turned on, we must set nosuid. 1329 */ 1330 if (zfs_super_owner && 1331 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 1332 secpolicy_fs_mount_clearopts(cr, vfsp); 1333 } 1334 1335 fetch_osname_options(osname, &checkpointrewind); 1336 1337 /* 1338 * Check for mount privilege? 1339 * 1340 * If we don't have privilege then see if 1341 * we have local permission to allow it 1342 */ 1343 error = secpolicy_fs_mount(cr, mvp, vfsp); 1344 if (error) { 1345 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) 1346 goto out; 1347 1348 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 1349 vattr_t vattr; 1350 1351 /* 1352 * Make sure user is the owner of the mount point 1353 * or has sufficient privileges. 1354 */ 1355 1356 vattr.va_mask = AT_UID; 1357 1358 vn_lock(mvp, LK_SHARED | LK_RETRY); 1359 if (VOP_GETATTR(mvp, &vattr, cr)) { 1360 VOP_UNLOCK1(mvp); 1361 goto out; 1362 } 1363 1364 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 1365 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 1366 VOP_UNLOCK1(mvp); 1367 goto out; 1368 } 1369 VOP_UNLOCK1(mvp); 1370 } 1371 1372 secpolicy_fs_mount_clearopts(cr, vfsp); 1373 } 1374 1375 /* 1376 * Refuse to mount a filesystem if we are in a local zone and the 1377 * dataset is not visible. 1378 */ 1379 if (!INGLOBALZONE(curproc) && 1380 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1381 error = SET_ERROR(EPERM); 1382 goto out; 1383 } 1384 1385 vfsp->vfs_flag |= MNT_NFS4ACLS; 1386 1387 /* 1388 * When doing a remount, we simply refresh our temporary properties 1389 * according to those options set in the current VFS options. 1390 */ 1391 if (vfsp->vfs_flag & MS_REMOUNT) { 1392 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1393 1394 /* 1395 * Refresh mount options with z_teardown_lock blocking I/O while 1396 * the filesystem is in an inconsistent state. 1397 * The lock also serializes this code with filesystem 1398 * manipulations between entry to zfs_suspend_fs() and return 1399 * from zfs_resume_fs(). 1400 */ 1401 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1402 zfs_unregister_callbacks(zfsvfs); 1403 error = zfs_register_callbacks(vfsp); 1404 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1405 goto out; 1406 } 1407 1408 /* Initial root mount: try hard to import the requested root pool. */ 1409 if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 && 1410 (vfsp->vfs_flag & MNT_UPDATE) == 0) { 1411 char pname[MAXNAMELEN]; 1412 1413 error = getpoolname(osname, pname); 1414 if (error == 0) 1415 error = spa_import_rootpool(pname, checkpointrewind); 1416 if (error) 1417 goto out; 1418 } 1419 DROP_GIANT(); 1420 error = zfs_domount(vfsp, osname); 1421 PICKUP_GIANT(); 1422 1423 out: 1424 return (error); 1425 } 1426 1427 static int 1428 zfs_statfs(vfs_t *vfsp, struct statfs *statp) 1429 { 1430 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1431 uint64_t refdbytes, availbytes, usedobjs, availobjs; 1432 1433 statp->f_version = STATFS_VERSION; 1434 1435 ZFS_ENTER(zfsvfs); 1436 1437 dmu_objset_space(zfsvfs->z_os, 1438 &refdbytes, &availbytes, &usedobjs, &availobjs); 1439 1440 /* 1441 * The underlying storage pool actually uses multiple block sizes. 1442 * We report the fragsize as the smallest block size we support, 1443 * and we report our blocksize as the filesystem's maximum blocksize. 1444 */ 1445 statp->f_bsize = SPA_MINBLOCKSIZE; 1446 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 1447 1448 /* 1449 * The following report "total" blocks of various kinds in the 1450 * file system, but reported in terms of f_frsize - the 1451 * "fragment" size. 1452 */ 1453 1454 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1455 statp->f_bfree = availbytes / statp->f_bsize; 1456 statp->f_bavail = statp->f_bfree; /* no root reservation */ 1457 1458 /* 1459 * statvfs() should really be called statufs(), because it assumes 1460 * static metadata. ZFS doesn't preallocate files, so the best 1461 * we can do is report the max that could possibly fit in f_files, 1462 * and that minus the number actually used in f_ffree. 1463 * For f_ffree, report the smaller of the number of object available 1464 * and the number of blocks (each object will take at least a block). 1465 */ 1466 statp->f_ffree = MIN(availobjs, statp->f_bfree); 1467 statp->f_files = statp->f_ffree + usedobjs; 1468 1469 /* 1470 * We're a zfs filesystem. 1471 */ 1472 strlcpy(statp->f_fstypename, "zfs", 1473 sizeof (statp->f_fstypename)); 1474 1475 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 1476 sizeof (statp->f_mntfromname)); 1477 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 1478 sizeof (statp->f_mntonname)); 1479 1480 statp->f_namemax = MAXNAMELEN - 1; 1481 1482 ZFS_EXIT(zfsvfs); 1483 return (0); 1484 } 1485 1486 static int 1487 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 1488 { 1489 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1490 znode_t *rootzp; 1491 int error; 1492 1493 ZFS_ENTER(zfsvfs); 1494 1495 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1496 if (error == 0) 1497 *vpp = ZTOV(rootzp); 1498 1499 ZFS_EXIT(zfsvfs); 1500 1501 if (error == 0) { 1502 error = vn_lock(*vpp, flags); 1503 if (error != 0) { 1504 VN_RELE(*vpp); 1505 *vpp = NULL; 1506 } 1507 } 1508 return (error); 1509 } 1510 1511 /* 1512 * Teardown the zfsvfs::z_os. 1513 * 1514 * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock' 1515 * and 'z_teardown_inactive_lock' held. 1516 */ 1517 static int 1518 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1519 { 1520 znode_t *zp; 1521 dsl_dir_t *dd; 1522 1523 /* 1524 * If someone has not already unmounted this file system, 1525 * drain the zrele_taskq to ensure all active references to the 1526 * zfsvfs_t have been handled only then can it be safely destroyed. 1527 */ 1528 if (zfsvfs->z_os) { 1529 /* 1530 * If we're unmounting we have to wait for the list to 1531 * drain completely. 1532 * 1533 * If we're not unmounting there's no guarantee the list 1534 * will drain completely, but zreles run from the taskq 1535 * may add the parents of dir-based xattrs to the taskq 1536 * so we want to wait for these. 1537 * 1538 * We can safely read z_nr_znodes without locking because the 1539 * VFS has already blocked operations which add to the 1540 * z_all_znodes list and thus increment z_nr_znodes. 1541 */ 1542 int round = 0; 1543 while (zfsvfs->z_nr_znodes > 0) { 1544 taskq_wait_outstanding(dsl_pool_zrele_taskq( 1545 dmu_objset_pool(zfsvfs->z_os)), 0); 1546 if (++round > 1 && !unmounting) 1547 break; 1548 } 1549 } 1550 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1551 1552 if (!unmounting) { 1553 /* 1554 * We purge the parent filesystem's vfsp as the parent 1555 * filesystem and all of its snapshots have their vnode's 1556 * v_vfsp set to the parent's filesystem's vfsp. Note, 1557 * 'z_parent' is self referential for non-snapshots. 1558 */ 1559 #ifdef FREEBSD_NAMECACHE 1560 #if __FreeBSD_version >= 1300117 1561 cache_purgevfs(zfsvfs->z_parent->z_vfs); 1562 #else 1563 cache_purgevfs(zfsvfs->z_parent->z_vfs, true); 1564 #endif 1565 #endif 1566 } 1567 1568 /* 1569 * Close the zil. NB: Can't close the zil while zfs_inactive 1570 * threads are blocked as zil_close can call zfs_inactive. 1571 */ 1572 if (zfsvfs->z_log) { 1573 zil_close(zfsvfs->z_log); 1574 zfsvfs->z_log = NULL; 1575 } 1576 1577 ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs); 1578 1579 /* 1580 * If we are not unmounting (ie: online recv) and someone already 1581 * unmounted this file system while we were doing the switcheroo, 1582 * or a reopen of z_os failed then just bail out now. 1583 */ 1584 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1585 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1586 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1587 return (SET_ERROR(EIO)); 1588 } 1589 1590 /* 1591 * At this point there are no vops active, and any new vops will 1592 * fail with EIO since we have z_teardown_lock for writer (only 1593 * relevant for forced unmount). 1594 * 1595 * Release all holds on dbufs. 1596 */ 1597 mutex_enter(&zfsvfs->z_znodes_lock); 1598 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1599 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1600 if (zp->z_sa_hdl != NULL) { 1601 zfs_znode_dmu_fini(zp); 1602 } 1603 } 1604 mutex_exit(&zfsvfs->z_znodes_lock); 1605 1606 /* 1607 * If we are unmounting, set the unmounted flag and let new vops 1608 * unblock. zfs_inactive will have the unmounted behavior, and all 1609 * other vops will fail with EIO. 1610 */ 1611 if (unmounting) { 1612 zfsvfs->z_unmounted = B_TRUE; 1613 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1614 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1615 } 1616 1617 /* 1618 * z_os will be NULL if there was an error in attempting to reopen 1619 * zfsvfs, so just return as the properties had already been 1620 * unregistered and cached data had been evicted before. 1621 */ 1622 if (zfsvfs->z_os == NULL) 1623 return (0); 1624 1625 /* 1626 * Unregister properties. 1627 */ 1628 zfs_unregister_callbacks(zfsvfs); 1629 1630 /* 1631 * Evict cached data 1632 */ 1633 if (!zfs_is_readonly(zfsvfs)) 1634 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1635 dmu_objset_evict_dbufs(zfsvfs->z_os); 1636 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1637 dsl_dir_cancel_waiters(dd); 1638 1639 return (0); 1640 } 1641 1642 static int 1643 zfs_umount(vfs_t *vfsp, int fflag) 1644 { 1645 kthread_t *td = curthread; 1646 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1647 objset_t *os; 1648 cred_t *cr = td->td_ucred; 1649 int ret; 1650 1651 ret = secpolicy_fs_unmount(cr, vfsp); 1652 if (ret) { 1653 if (dsl_deleg_access((char *)vfsp->vfs_resource, 1654 ZFS_DELEG_PERM_MOUNT, cr)) 1655 return (ret); 1656 } 1657 1658 /* 1659 * Unmount any snapshots mounted under .zfs before unmounting the 1660 * dataset itself. 1661 */ 1662 if (zfsvfs->z_ctldir != NULL) { 1663 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1664 return (ret); 1665 } 1666 1667 if (fflag & MS_FORCE) { 1668 /* 1669 * Mark file system as unmounted before calling 1670 * vflush(FORCECLOSE). This way we ensure no future vnops 1671 * will be called and risk operating on DOOMED vnodes. 1672 */ 1673 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1674 zfsvfs->z_unmounted = B_TRUE; 1675 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1676 } 1677 1678 /* 1679 * Flush all the files. 1680 */ 1681 ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 1682 if (ret != 0) 1683 return (ret); 1684 while (taskqueue_cancel(zfsvfs_taskq->tq_queue, 1685 &zfsvfs->z_unlinked_drain_task, NULL) != 0) 1686 taskqueue_drain(zfsvfs_taskq->tq_queue, 1687 &zfsvfs->z_unlinked_drain_task); 1688 1689 VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE)); 1690 os = zfsvfs->z_os; 1691 1692 /* 1693 * z_os will be NULL if there was an error in 1694 * attempting to reopen zfsvfs. 1695 */ 1696 if (os != NULL) { 1697 /* 1698 * Unset the objset user_ptr. 1699 */ 1700 mutex_enter(&os->os_user_ptr_lock); 1701 dmu_objset_set_user(os, NULL); 1702 mutex_exit(&os->os_user_ptr_lock); 1703 1704 /* 1705 * Finally release the objset 1706 */ 1707 dmu_objset_disown(os, B_TRUE, zfsvfs); 1708 } 1709 1710 /* 1711 * We can now safely destroy the '.zfs' directory node. 1712 */ 1713 if (zfsvfs->z_ctldir != NULL) 1714 zfsctl_destroy(zfsvfs); 1715 zfs_freevfs(vfsp); 1716 1717 return (0); 1718 } 1719 1720 static int 1721 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1722 { 1723 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1724 znode_t *zp; 1725 int err; 1726 1727 /* 1728 * zfs_zget() can't operate on virtual entries like .zfs/ or 1729 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 1730 * This will make NFS to switch to LOOKUP instead of using VGET. 1731 */ 1732 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR || 1733 (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir)) 1734 return (EOPNOTSUPP); 1735 1736 ZFS_ENTER(zfsvfs); 1737 err = zfs_zget(zfsvfs, ino, &zp); 1738 if (err == 0 && zp->z_unlinked) { 1739 vrele(ZTOV(zp)); 1740 err = EINVAL; 1741 } 1742 if (err == 0) 1743 *vpp = ZTOV(zp); 1744 ZFS_EXIT(zfsvfs); 1745 if (err == 0) { 1746 err = vn_lock(*vpp, flags); 1747 if (err != 0) 1748 vrele(*vpp); 1749 } 1750 if (err != 0) 1751 *vpp = NULL; 1752 return (err); 1753 } 1754 1755 static int 1756 #if __FreeBSD_version >= 1300098 1757 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 1758 struct ucred **credanonp, int *numsecflavors, int *secflavors) 1759 #else 1760 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1761 struct ucred **credanonp, int *numsecflavors, int **secflavors) 1762 #endif 1763 { 1764 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1765 1766 /* 1767 * If this is regular file system vfsp is the same as 1768 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1769 * zfsvfs->z_parent->z_vfs represents parent file system 1770 * which we have to use here, because only this file system 1771 * has mnt_export configured. 1772 */ 1773 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1774 credanonp, numsecflavors, secflavors)); 1775 } 1776 1777 _Static_assert(sizeof (struct fid) >= SHORT_FID_LEN, 1778 "struct fid bigger than SHORT_FID_LEN"); 1779 _Static_assert(sizeof (struct fid) >= LONG_FID_LEN, 1780 "struct fid bigger than LONG_FID_LEN"); 1781 1782 static int 1783 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) 1784 { 1785 struct componentname cn; 1786 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1787 znode_t *zp; 1788 vnode_t *dvp; 1789 uint64_t object = 0; 1790 uint64_t fid_gen = 0; 1791 uint64_t setgen = 0; 1792 uint64_t gen_mask; 1793 uint64_t zp_gen; 1794 int i, err; 1795 1796 *vpp = NULL; 1797 1798 ZFS_ENTER(zfsvfs); 1799 1800 /* 1801 * On FreeBSD we can get snapshot's mount point or its parent file 1802 * system mount point depending if snapshot is already mounted or not. 1803 */ 1804 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1805 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1806 uint64_t objsetid = 0; 1807 1808 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1809 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1810 1811 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1812 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1813 1814 ZFS_EXIT(zfsvfs); 1815 1816 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1817 if (err) 1818 return (SET_ERROR(EINVAL)); 1819 ZFS_ENTER(zfsvfs); 1820 } 1821 1822 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1823 zfid_short_t *zfid = (zfid_short_t *)fidp; 1824 1825 for (i = 0; i < sizeof (zfid->zf_object); i++) 1826 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1827 1828 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1829 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1830 } else { 1831 ZFS_EXIT(zfsvfs); 1832 return (SET_ERROR(EINVAL)); 1833 } 1834 1835 if (fidp->fid_len == LONG_FID_LEN && (fid_gen > 1 || setgen != 0)) { 1836 dprintf("snapdir fid: fid_gen (%llu) and setgen (%llu)\n", 1837 (u_longlong_t)fid_gen, (u_longlong_t)setgen); 1838 return (SET_ERROR(EINVAL)); 1839 } 1840 1841 /* 1842 * A zero fid_gen means we are in .zfs or the .zfs/snapshot 1843 * directory tree. If the object == zfsvfs->z_shares_dir, then 1844 * we are in the .zfs/shares directory tree. 1845 */ 1846 if ((fid_gen == 0 && 1847 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) || 1848 (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) { 1849 ZFS_EXIT(zfsvfs); 1850 VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp)); 1851 if (object == ZFSCTL_INO_SNAPDIR) { 1852 cn.cn_nameptr = "snapshot"; 1853 cn.cn_namelen = strlen(cn.cn_nameptr); 1854 cn.cn_nameiop = LOOKUP; 1855 cn.cn_flags = ISLASTCN | LOCKLEAF; 1856 cn.cn_lkflags = flags; 1857 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1858 vput(dvp); 1859 } else if (object == zfsvfs->z_shares_dir) { 1860 /* 1861 * XXX This branch must not be taken, 1862 * if it is, then the lookup below will 1863 * explode. 1864 */ 1865 cn.cn_nameptr = "shares"; 1866 cn.cn_namelen = strlen(cn.cn_nameptr); 1867 cn.cn_nameiop = LOOKUP; 1868 cn.cn_flags = ISLASTCN; 1869 cn.cn_lkflags = flags; 1870 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1871 vput(dvp); 1872 } else { 1873 *vpp = dvp; 1874 } 1875 return (err); 1876 } 1877 1878 gen_mask = -1ULL >> (64 - 8 * i); 1879 1880 dprintf("getting %llu [%llu mask %llx]\n", (u_longlong_t)object, 1881 (u_longlong_t)fid_gen, 1882 (u_longlong_t)gen_mask); 1883 if ((err = zfs_zget(zfsvfs, object, &zp))) { 1884 ZFS_EXIT(zfsvfs); 1885 return (err); 1886 } 1887 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, 1888 sizeof (uint64_t)); 1889 zp_gen = zp_gen & gen_mask; 1890 if (zp_gen == 0) 1891 zp_gen = 1; 1892 if (zp->z_unlinked || zp_gen != fid_gen) { 1893 dprintf("znode gen (%llu) != fid gen (%llu)\n", 1894 (u_longlong_t)zp_gen, (u_longlong_t)fid_gen); 1895 vrele(ZTOV(zp)); 1896 ZFS_EXIT(zfsvfs); 1897 return (SET_ERROR(EINVAL)); 1898 } 1899 1900 *vpp = ZTOV(zp); 1901 ZFS_EXIT(zfsvfs); 1902 err = vn_lock(*vpp, flags); 1903 if (err == 0) 1904 vnode_create_vobject(*vpp, zp->z_size, curthread); 1905 else 1906 *vpp = NULL; 1907 return (err); 1908 } 1909 1910 /* 1911 * Block out VOPs and close zfsvfs_t::z_os 1912 * 1913 * Note, if successful, then we return with the 'z_teardown_lock' and 1914 * 'z_teardown_inactive_lock' write held. We leave ownership of the underlying 1915 * dataset and objset intact so that they can be atomically handed off during 1916 * a subsequent rollback or recv operation and the resume thereafter. 1917 */ 1918 int 1919 zfs_suspend_fs(zfsvfs_t *zfsvfs) 1920 { 1921 int error; 1922 1923 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1924 return (error); 1925 1926 return (0); 1927 } 1928 1929 /* 1930 * Rebuild SA and release VOPs. Note that ownership of the underlying dataset 1931 * is an invariant across any of the operations that can be performed while the 1932 * filesystem was suspended. Whether it succeeded or failed, the preconditions 1933 * are the same: the relevant objset and associated dataset are owned by 1934 * zfsvfs, held, and long held on entry. 1935 */ 1936 int 1937 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 1938 { 1939 int err; 1940 znode_t *zp; 1941 1942 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 1943 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 1944 1945 /* 1946 * We already own this, so just update the objset_t, as the one we 1947 * had before may have been evicted. 1948 */ 1949 objset_t *os; 1950 VERIFY3P(ds->ds_owner, ==, zfsvfs); 1951 VERIFY(dsl_dataset_long_held(ds)); 1952 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 1953 dsl_pool_config_enter(dp, FTAG); 1954 VERIFY0(dmu_objset_from_ds(ds, &os)); 1955 dsl_pool_config_exit(dp, FTAG); 1956 1957 err = zfsvfs_init(zfsvfs, os); 1958 if (err != 0) 1959 goto bail; 1960 1961 ds->ds_dir->dd_activity_cancelled = B_FALSE; 1962 VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE)); 1963 1964 zfs_set_fuid_feature(zfsvfs); 1965 1966 /* 1967 * Attempt to re-establish all the active znodes with 1968 * their dbufs. If a zfs_rezget() fails, then we'll let 1969 * any potential callers discover that via ZFS_ENTER_VERIFY_VP 1970 * when they try to use their znode. 1971 */ 1972 mutex_enter(&zfsvfs->z_znodes_lock); 1973 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1974 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1975 (void) zfs_rezget(zp); 1976 } 1977 mutex_exit(&zfsvfs->z_znodes_lock); 1978 1979 bail: 1980 /* release the VOPs */ 1981 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1982 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1983 1984 if (err) { 1985 /* 1986 * Since we couldn't setup the sa framework, try to force 1987 * unmount this file system. 1988 */ 1989 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) { 1990 vfs_ref(zfsvfs->z_vfs); 1991 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 1992 } 1993 } 1994 return (err); 1995 } 1996 1997 static void 1998 zfs_freevfs(vfs_t *vfsp) 1999 { 2000 zfsvfs_t *zfsvfs = vfsp->vfs_data; 2001 2002 zfsvfs_free(zfsvfs); 2003 2004 atomic_dec_32(&zfs_active_fs_count); 2005 } 2006 2007 #ifdef __i386__ 2008 static int desiredvnodes_backup; 2009 #include <sys/vmmeter.h> 2010 2011 2012 #include <vm/vm_page.h> 2013 #include <vm/vm_object.h> 2014 #include <vm/vm_kern.h> 2015 #include <vm/vm_map.h> 2016 #endif 2017 2018 static void 2019 zfs_vnodes_adjust(void) 2020 { 2021 #ifdef __i386__ 2022 int newdesiredvnodes; 2023 2024 desiredvnodes_backup = desiredvnodes; 2025 2026 /* 2027 * We calculate newdesiredvnodes the same way it is done in 2028 * vntblinit(). If it is equal to desiredvnodes, it means that 2029 * it wasn't tuned by the administrator and we can tune it down. 2030 */ 2031 newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 * 2032 vm_kmem_size / (5 * (sizeof (struct vm_object) + 2033 sizeof (struct vnode)))); 2034 if (newdesiredvnodes == desiredvnodes) 2035 desiredvnodes = (3 * newdesiredvnodes) / 4; 2036 #endif 2037 } 2038 2039 static void 2040 zfs_vnodes_adjust_back(void) 2041 { 2042 2043 #ifdef __i386__ 2044 desiredvnodes = desiredvnodes_backup; 2045 #endif 2046 } 2047 2048 void 2049 zfs_init(void) 2050 { 2051 2052 printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n"); 2053 2054 /* 2055 * Initialize .zfs directory structures 2056 */ 2057 zfsctl_init(); 2058 2059 /* 2060 * Initialize znode cache, vnode ops, etc... 2061 */ 2062 zfs_znode_init(); 2063 2064 /* 2065 * Reduce number of vnodes. Originally number of vnodes is calculated 2066 * with UFS inode in mind. We reduce it here, because it's too big for 2067 * ZFS/i386. 2068 */ 2069 zfs_vnodes_adjust(); 2070 2071 dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info); 2072 2073 zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); 2074 } 2075 2076 void 2077 zfs_fini(void) 2078 { 2079 taskq_destroy(zfsvfs_taskq); 2080 zfsctl_fini(); 2081 zfs_znode_fini(); 2082 zfs_vnodes_adjust_back(); 2083 } 2084 2085 int 2086 zfs_busy(void) 2087 { 2088 return (zfs_active_fs_count != 0); 2089 } 2090 2091 /* 2092 * Release VOPs and unmount a suspended filesystem. 2093 */ 2094 int 2095 zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 2096 { 2097 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 2098 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 2099 2100 /* 2101 * We already own this, so just hold and rele it to update the 2102 * objset_t, as the one we had before may have been evicted. 2103 */ 2104 objset_t *os; 2105 VERIFY3P(ds->ds_owner, ==, zfsvfs); 2106 VERIFY(dsl_dataset_long_held(ds)); 2107 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 2108 dsl_pool_config_enter(dp, FTAG); 2109 VERIFY0(dmu_objset_from_ds(ds, &os)); 2110 dsl_pool_config_exit(dp, FTAG); 2111 zfsvfs->z_os = os; 2112 2113 /* release the VOPs */ 2114 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 2115 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 2116 2117 /* 2118 * Try to force unmount this file system. 2119 */ 2120 (void) zfs_umount(zfsvfs->z_vfs, 0); 2121 zfsvfs->z_unmounted = B_TRUE; 2122 return (0); 2123 } 2124 2125 int 2126 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 2127 { 2128 int error; 2129 objset_t *os = zfsvfs->z_os; 2130 dmu_tx_t *tx; 2131 2132 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 2133 return (SET_ERROR(EINVAL)); 2134 2135 if (newvers < zfsvfs->z_version) 2136 return (SET_ERROR(EINVAL)); 2137 2138 if (zfs_spa_version_map(newvers) > 2139 spa_version(dmu_objset_spa(zfsvfs->z_os))) 2140 return (SET_ERROR(ENOTSUP)); 2141 2142 tx = dmu_tx_create(os); 2143 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 2144 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2145 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 2146 ZFS_SA_ATTRS); 2147 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2148 } 2149 error = dmu_tx_assign(tx, TXG_WAIT); 2150 if (error) { 2151 dmu_tx_abort(tx); 2152 return (error); 2153 } 2154 2155 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 2156 8, 1, &newvers, tx); 2157 2158 if (error) { 2159 dmu_tx_commit(tx); 2160 return (error); 2161 } 2162 2163 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2164 uint64_t sa_obj; 2165 2166 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, 2167 SPA_VERSION_SA); 2168 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 2169 DMU_OT_NONE, 0, tx); 2170 2171 error = zap_add(os, MASTER_NODE_OBJ, 2172 ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 2173 ASSERT0(error); 2174 2175 VERIFY0(sa_set_sa_object(os, sa_obj)); 2176 sa_register_update_callback(os, zfs_sa_upgrade); 2177 } 2178 2179 spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, 2180 "from %ju to %ju", (uintmax_t)zfsvfs->z_version, 2181 (uintmax_t)newvers); 2182 dmu_tx_commit(tx); 2183 2184 zfsvfs->z_version = newvers; 2185 os->os_version = newvers; 2186 2187 zfs_set_fuid_feature(zfsvfs); 2188 2189 return (0); 2190 } 2191 2192 /* 2193 * Read a property stored within the master node. 2194 */ 2195 int 2196 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 2197 { 2198 uint64_t *cached_copy = NULL; 2199 2200 /* 2201 * Figure out where in the objset_t the cached copy would live, if it 2202 * is available for the requested property. 2203 */ 2204 if (os != NULL) { 2205 switch (prop) { 2206 case ZFS_PROP_VERSION: 2207 cached_copy = &os->os_version; 2208 break; 2209 case ZFS_PROP_NORMALIZE: 2210 cached_copy = &os->os_normalization; 2211 break; 2212 case ZFS_PROP_UTF8ONLY: 2213 cached_copy = &os->os_utf8only; 2214 break; 2215 case ZFS_PROP_CASE: 2216 cached_copy = &os->os_casesensitivity; 2217 break; 2218 default: 2219 break; 2220 } 2221 } 2222 if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { 2223 *value = *cached_copy; 2224 return (0); 2225 } 2226 2227 /* 2228 * If the property wasn't cached, look up the file system's value for 2229 * the property. For the version property, we look up a slightly 2230 * different string. 2231 */ 2232 const char *pname; 2233 int error = ENOENT; 2234 if (prop == ZFS_PROP_VERSION) { 2235 pname = ZPL_VERSION_STR; 2236 } else { 2237 pname = zfs_prop_to_name(prop); 2238 } 2239 2240 if (os != NULL) { 2241 ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); 2242 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 2243 } 2244 2245 if (error == ENOENT) { 2246 /* No value set, use the default value */ 2247 switch (prop) { 2248 case ZFS_PROP_VERSION: 2249 *value = ZPL_VERSION; 2250 break; 2251 case ZFS_PROP_NORMALIZE: 2252 case ZFS_PROP_UTF8ONLY: 2253 *value = 0; 2254 break; 2255 case ZFS_PROP_CASE: 2256 *value = ZFS_CASE_SENSITIVE; 2257 break; 2258 case ZFS_PROP_ACLTYPE: 2259 *value = ZFS_ACLTYPE_NFSV4; 2260 break; 2261 default: 2262 return (error); 2263 } 2264 error = 0; 2265 } 2266 2267 /* 2268 * If one of the methods for getting the property value above worked, 2269 * copy it into the objset_t's cache. 2270 */ 2271 if (error == 0 && cached_copy != NULL) { 2272 *cached_copy = *value; 2273 } 2274 2275 return (error); 2276 } 2277 2278 /* 2279 * Return true if the corresponding vfs's unmounted flag is set. 2280 * Otherwise return false. 2281 * If this function returns true we know VFS unmount has been initiated. 2282 */ 2283 boolean_t 2284 zfs_get_vfs_flag_unmounted(objset_t *os) 2285 { 2286 zfsvfs_t *zfvp; 2287 boolean_t unmounted = B_FALSE; 2288 2289 ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS); 2290 2291 mutex_enter(&os->os_user_ptr_lock); 2292 zfvp = dmu_objset_get_user(os); 2293 if (zfvp != NULL && zfvp->z_vfs != NULL && 2294 (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT)) 2295 unmounted = B_TRUE; 2296 mutex_exit(&os->os_user_ptr_lock); 2297 2298 return (unmounted); 2299 } 2300 2301 #ifdef _KERNEL 2302 void 2303 zfsvfs_update_fromname(const char *oldname, const char *newname) 2304 { 2305 char tmpbuf[MAXPATHLEN]; 2306 struct mount *mp; 2307 char *fromname; 2308 size_t oldlen; 2309 2310 oldlen = strlen(oldname); 2311 2312 mtx_lock(&mountlist_mtx); 2313 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 2314 fromname = mp->mnt_stat.f_mntfromname; 2315 if (strcmp(fromname, oldname) == 0) { 2316 (void) strlcpy(fromname, newname, 2317 sizeof (mp->mnt_stat.f_mntfromname)); 2318 continue; 2319 } 2320 if (strncmp(fromname, oldname, oldlen) == 0 && 2321 (fromname[oldlen] == '/' || fromname[oldlen] == '@')) { 2322 (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s", 2323 newname, fromname + oldlen); 2324 (void) strlcpy(fromname, tmpbuf, 2325 sizeof (mp->mnt_stat.f_mntfromname)); 2326 continue; 2327 } 2328 } 2329 mtx_unlock(&mountlist_mtx); 2330 } 2331 #endif 2332