1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 24 * All rights reserved. 25 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 26 * Copyright (c) 2014 Integros [integros.com] 27 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 28 */ 29 30 /* Portions Copyright 2010 Robert Milkowski */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/sysmacros.h> 37 #include <sys/kmem.h> 38 #include <sys/acl.h> 39 #include <sys/vnode.h> 40 #include <sys/vfs.h> 41 #include <sys/mntent.h> 42 #include <sys/mount.h> 43 #include <sys/cmn_err.h> 44 #include <sys/zfs_znode.h> 45 #include <sys/zfs_vnops.h> 46 #include <sys/zfs_dir.h> 47 #include <sys/zil.h> 48 #include <sys/fs/zfs.h> 49 #include <sys/dmu.h> 50 #include <sys/dsl_prop.h> 51 #include <sys/dsl_dataset.h> 52 #include <sys/dsl_deleg.h> 53 #include <sys/spa.h> 54 #include <sys/zap.h> 55 #include <sys/sa.h> 56 #include <sys/sa_impl.h> 57 #include <sys/policy.h> 58 #include <sys/atomic.h> 59 #include <sys/zfs_ioctl.h> 60 #include <sys/zfs_ctldir.h> 61 #include <sys/zfs_fuid.h> 62 #include <sys/sunddi.h> 63 #include <sys/dmu_objset.h> 64 #include <sys/dsl_dir.h> 65 #include <sys/jail.h> 66 #include <sys/osd.h> 67 #include <ufs/ufs/quota.h> 68 #include <sys/zfs_quota.h> 69 70 #include "zfs_comutil.h" 71 72 #ifndef MNTK_VMSETSIZE_BUG 73 #define MNTK_VMSETSIZE_BUG 0 74 #endif 75 #ifndef MNTK_NOMSYNC 76 #define MNTK_NOMSYNC 8 77 #endif 78 79 struct mtx zfs_debug_mtx; 80 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 81 82 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 83 84 int zfs_super_owner; 85 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 86 "File system owners can perform privileged operation on file systems"); 87 88 int zfs_debug_level; 89 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0, 90 "Debug level"); 91 92 struct zfs_jailparam { 93 int mount_snapshot; 94 }; 95 96 static struct zfs_jailparam zfs_jailparam0 = { 97 .mount_snapshot = 0, 98 }; 99 100 static int zfs_jailparam_slot; 101 102 SYSCTL_JAIL_PARAM_SYS_NODE(zfs, CTLFLAG_RW, "Jail ZFS parameters"); 103 SYSCTL_JAIL_PARAM(_zfs, mount_snapshot, CTLTYPE_INT | CTLFLAG_RW, "I", 104 "Allow mounting snapshots in the .zfs directory for unjailed datasets"); 105 106 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 107 static int zfs_version_acl = ZFS_ACL_VERSION; 108 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 109 "ZFS_ACL_VERSION"); 110 static int zfs_version_spa = SPA_VERSION; 111 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 112 "SPA_VERSION"); 113 static int zfs_version_zpl = ZPL_VERSION; 114 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 115 "ZPL_VERSION"); 116 117 #if __FreeBSD_version >= 1400018 118 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, 119 bool *mp_busy); 120 #else 121 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg); 122 #endif 123 static int zfs_mount(vfs_t *vfsp); 124 static int zfs_umount(vfs_t *vfsp, int fflag); 125 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 126 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 127 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 128 static int zfs_sync(vfs_t *vfsp, int waitfor); 129 #if __FreeBSD_version >= 1300098 130 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 131 struct ucred **credanonp, int *numsecflavors, int *secflavors); 132 #else 133 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 134 struct ucred **credanonp, int *numsecflavors, int **secflavors); 135 #endif 136 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp); 137 static void zfs_freevfs(vfs_t *vfsp); 138 139 struct vfsops zfs_vfsops = { 140 .vfs_mount = zfs_mount, 141 .vfs_unmount = zfs_umount, 142 #if __FreeBSD_version >= 1300049 143 .vfs_root = vfs_cache_root, 144 .vfs_cachedroot = zfs_root, 145 #else 146 .vfs_root = zfs_root, 147 #endif 148 .vfs_statfs = zfs_statfs, 149 .vfs_vget = zfs_vget, 150 .vfs_sync = zfs_sync, 151 .vfs_checkexp = zfs_checkexp, 152 .vfs_fhtovp = zfs_fhtovp, 153 .vfs_quotactl = zfs_quotactl, 154 }; 155 156 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 157 158 /* 159 * We need to keep a count of active fs's. 160 * This is necessary to prevent our module 161 * from being unloaded after a umount -f 162 */ 163 static uint32_t zfs_active_fs_count = 0; 164 165 int 166 zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val, 167 char *setpoint) 168 { 169 int error; 170 zfsvfs_t *zfvp; 171 vfs_t *vfsp; 172 objset_t *os; 173 uint64_t tmp = *val; 174 175 error = dmu_objset_from_ds(ds, &os); 176 if (error != 0) 177 return (error); 178 179 error = getzfsvfs_impl(os, &zfvp); 180 if (error != 0) 181 return (error); 182 if (zfvp == NULL) 183 return (ENOENT); 184 vfsp = zfvp->z_vfs; 185 switch (zfs_prop) { 186 case ZFS_PROP_ATIME: 187 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 188 tmp = 0; 189 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 190 tmp = 1; 191 break; 192 case ZFS_PROP_DEVICES: 193 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 194 tmp = 0; 195 if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) 196 tmp = 1; 197 break; 198 case ZFS_PROP_EXEC: 199 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 200 tmp = 0; 201 if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 202 tmp = 1; 203 break; 204 case ZFS_PROP_SETUID: 205 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 206 tmp = 0; 207 if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 208 tmp = 1; 209 break; 210 case ZFS_PROP_READONLY: 211 if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 212 tmp = 0; 213 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 214 tmp = 1; 215 break; 216 case ZFS_PROP_XATTR: 217 if (zfvp->z_flags & ZSB_XATTR) 218 tmp = zfvp->z_xattr; 219 break; 220 case ZFS_PROP_NBMAND: 221 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 222 tmp = 0; 223 if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) 224 tmp = 1; 225 break; 226 default: 227 vfs_unbusy(vfsp); 228 return (ENOENT); 229 } 230 231 vfs_unbusy(vfsp); 232 if (tmp != *val) { 233 (void) strcpy(setpoint, "temporary"); 234 *val = tmp; 235 } 236 return (0); 237 } 238 239 static int 240 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp) 241 { 242 int error = 0; 243 char buf[32]; 244 uint64_t usedobj, quotaobj; 245 uint64_t quota, used = 0; 246 timespec_t now; 247 248 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 249 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 250 251 if (quotaobj == 0 || zfsvfs->z_replay) { 252 error = ENOENT; 253 goto done; 254 } 255 (void) sprintf(buf, "%llx", (longlong_t)id); 256 if ((error = zap_lookup(zfsvfs->z_os, quotaobj, 257 buf, sizeof (quota), 1, "a)) != 0) { 258 dprintf("%s(%d): quotaobj lookup failed\n", 259 __FUNCTION__, __LINE__); 260 goto done; 261 } 262 /* 263 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit". 264 * So we set them to be the same. 265 */ 266 dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota); 267 error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used); 268 if (error && error != ENOENT) { 269 dprintf("%s(%d): usedobj failed; %d\n", 270 __FUNCTION__, __LINE__, error); 271 goto done; 272 } 273 dqp->dqb_curblocks = btodb(used); 274 dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0; 275 vfs_timestamp(&now); 276 /* 277 * Setting this to 0 causes FreeBSD quota(8) to print 278 * the number of days since the epoch, which isn't 279 * particularly useful. 280 */ 281 dqp->dqb_btime = dqp->dqb_itime = now.tv_sec; 282 done: 283 return (error); 284 } 285 286 static int 287 #if __FreeBSD_version >= 1400018 288 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, bool *mp_busy) 289 #else 290 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg) 291 #endif 292 { 293 zfsvfs_t *zfsvfs = vfsp->vfs_data; 294 struct thread *td; 295 int cmd, type, error = 0; 296 int bitsize; 297 zfs_userquota_prop_t quota_type; 298 struct dqblk64 dqblk = { 0 }; 299 300 td = curthread; 301 cmd = cmds >> SUBCMDSHIFT; 302 type = cmds & SUBCMDMASK; 303 304 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 305 return (error); 306 if (id == -1) { 307 switch (type) { 308 case USRQUOTA: 309 id = td->td_ucred->cr_ruid; 310 break; 311 case GRPQUOTA: 312 id = td->td_ucred->cr_rgid; 313 break; 314 default: 315 error = EINVAL; 316 #if __FreeBSD_version < 1400018 317 if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF) 318 vfs_unbusy(vfsp); 319 #endif 320 goto done; 321 } 322 } 323 /* 324 * Map BSD type to: 325 * ZFS_PROP_USERUSED, 326 * ZFS_PROP_USERQUOTA, 327 * ZFS_PROP_GROUPUSED, 328 * ZFS_PROP_GROUPQUOTA 329 */ 330 switch (cmd) { 331 case Q_SETQUOTA: 332 case Q_SETQUOTA32: 333 if (type == USRQUOTA) 334 quota_type = ZFS_PROP_USERQUOTA; 335 else if (type == GRPQUOTA) 336 quota_type = ZFS_PROP_GROUPQUOTA; 337 else 338 error = EINVAL; 339 break; 340 case Q_GETQUOTA: 341 case Q_GETQUOTA32: 342 if (type == USRQUOTA) 343 quota_type = ZFS_PROP_USERUSED; 344 else if (type == GRPQUOTA) 345 quota_type = ZFS_PROP_GROUPUSED; 346 else 347 error = EINVAL; 348 break; 349 } 350 351 /* 352 * Depending on the cmd, we may need to get 353 * the ruid and domain (see fuidstr_to_sid?), 354 * the fuid (how?), or other information. 355 * Create fuid using zfs_fuid_create(zfsvfs, id, 356 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)? 357 * I think I can use just the id? 358 * 359 * Look at zfs_id_overquota() to look up a quota. 360 * zap_lookup(something, quotaobj, fuidstring, 361 * sizeof (long long), 1, "a) 362 * 363 * See zfs_set_userquota() to set a quota. 364 */ 365 if ((uint32_t)type >= MAXQUOTAS) { 366 error = EINVAL; 367 goto done; 368 } 369 370 switch (cmd) { 371 case Q_GETQUOTASIZE: 372 bitsize = 64; 373 error = copyout(&bitsize, arg, sizeof (int)); 374 break; 375 case Q_QUOTAON: 376 // As far as I can tell, you can't turn quotas on or off on zfs 377 error = 0; 378 #if __FreeBSD_version < 1400018 379 vfs_unbusy(vfsp); 380 #endif 381 break; 382 case Q_QUOTAOFF: 383 error = ENOTSUP; 384 #if __FreeBSD_version < 1400018 385 vfs_unbusy(vfsp); 386 #endif 387 break; 388 case Q_SETQUOTA: 389 error = copyin(arg, &dqblk, sizeof (dqblk)); 390 if (error == 0) 391 error = zfs_set_userquota(zfsvfs, quota_type, 392 "", id, dbtob(dqblk.dqb_bhardlimit)); 393 break; 394 case Q_GETQUOTA: 395 error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk); 396 if (error == 0) 397 error = copyout(&dqblk, arg, sizeof (dqblk)); 398 break; 399 default: 400 error = EINVAL; 401 break; 402 } 403 done: 404 zfs_exit(zfsvfs, FTAG); 405 return (error); 406 } 407 408 409 boolean_t 410 zfs_is_readonly(zfsvfs_t *zfsvfs) 411 { 412 return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)); 413 } 414 415 static int 416 zfs_sync(vfs_t *vfsp, int waitfor) 417 { 418 419 /* 420 * Data integrity is job one. We don't want a compromised kernel 421 * writing to the storage pool, so we never sync during panic. 422 */ 423 if (panicstr) 424 return (0); 425 426 /* 427 * Ignore the system syncher. ZFS already commits async data 428 * at zfs_txg_timeout intervals. 429 */ 430 if (waitfor == MNT_LAZY) 431 return (0); 432 433 if (vfsp != NULL) { 434 /* 435 * Sync a specific filesystem. 436 */ 437 zfsvfs_t *zfsvfs = vfsp->vfs_data; 438 dsl_pool_t *dp; 439 int error; 440 441 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 442 return (error); 443 dp = dmu_objset_pool(zfsvfs->z_os); 444 445 /* 446 * If the system is shutting down, then skip any 447 * filesystems which may exist on a suspended pool. 448 */ 449 if (rebooting && spa_suspended(dp->dp_spa)) { 450 zfs_exit(zfsvfs, FTAG); 451 return (0); 452 } 453 454 if (zfsvfs->z_log != NULL) 455 zil_commit(zfsvfs->z_log, 0); 456 457 zfs_exit(zfsvfs, FTAG); 458 } else { 459 /* 460 * Sync all ZFS filesystems. This is what happens when you 461 * run sync(8). Unlike other filesystems, ZFS honors the 462 * request by waiting for all pools to commit all dirty data. 463 */ 464 spa_sync_allpools(); 465 } 466 467 return (0); 468 } 469 470 static void 471 atime_changed_cb(void *arg, uint64_t newval) 472 { 473 zfsvfs_t *zfsvfs = arg; 474 475 if (newval == TRUE) { 476 zfsvfs->z_atime = TRUE; 477 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 478 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 479 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 480 } else { 481 zfsvfs->z_atime = FALSE; 482 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 483 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 484 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 485 } 486 } 487 488 static void 489 xattr_changed_cb(void *arg, uint64_t newval) 490 { 491 zfsvfs_t *zfsvfs = arg; 492 493 if (newval == ZFS_XATTR_OFF) { 494 zfsvfs->z_flags &= ~ZSB_XATTR; 495 } else { 496 zfsvfs->z_flags |= ZSB_XATTR; 497 498 if (newval == ZFS_XATTR_SA) 499 zfsvfs->z_xattr_sa = B_TRUE; 500 else 501 zfsvfs->z_xattr_sa = B_FALSE; 502 } 503 } 504 505 static void 506 blksz_changed_cb(void *arg, uint64_t newval) 507 { 508 zfsvfs_t *zfsvfs = arg; 509 ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os))); 510 ASSERT3U(newval, >=, SPA_MINBLOCKSIZE); 511 ASSERT(ISP2(newval)); 512 513 zfsvfs->z_max_blksz = newval; 514 zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 515 } 516 517 static void 518 readonly_changed_cb(void *arg, uint64_t newval) 519 { 520 zfsvfs_t *zfsvfs = arg; 521 522 if (newval) { 523 /* XXX locking on vfs_flag? */ 524 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 525 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 526 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 527 } else { 528 /* XXX locking on vfs_flag? */ 529 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 530 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 531 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 532 } 533 } 534 535 static void 536 setuid_changed_cb(void *arg, uint64_t newval) 537 { 538 zfsvfs_t *zfsvfs = arg; 539 540 if (newval == FALSE) { 541 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 542 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 543 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 544 } else { 545 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 546 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 547 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 548 } 549 } 550 551 static void 552 exec_changed_cb(void *arg, uint64_t newval) 553 { 554 zfsvfs_t *zfsvfs = arg; 555 556 if (newval == FALSE) { 557 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 558 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 559 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 560 } else { 561 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 562 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 563 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 564 } 565 } 566 567 /* 568 * The nbmand mount option can be changed at mount time. 569 * We can't allow it to be toggled on live file systems or incorrect 570 * behavior may be seen from cifs clients 571 * 572 * This property isn't registered via dsl_prop_register(), but this callback 573 * will be called when a file system is first mounted 574 */ 575 static void 576 nbmand_changed_cb(void *arg, uint64_t newval) 577 { 578 zfsvfs_t *zfsvfs = arg; 579 if (newval == FALSE) { 580 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 581 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 582 } else { 583 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 584 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 585 } 586 } 587 588 static void 589 snapdir_changed_cb(void *arg, uint64_t newval) 590 { 591 zfsvfs_t *zfsvfs = arg; 592 593 zfsvfs->z_show_ctldir = newval; 594 } 595 596 static void 597 acl_mode_changed_cb(void *arg, uint64_t newval) 598 { 599 zfsvfs_t *zfsvfs = arg; 600 601 zfsvfs->z_acl_mode = newval; 602 } 603 604 static void 605 acl_inherit_changed_cb(void *arg, uint64_t newval) 606 { 607 zfsvfs_t *zfsvfs = arg; 608 609 zfsvfs->z_acl_inherit = newval; 610 } 611 612 static void 613 acl_type_changed_cb(void *arg, uint64_t newval) 614 { 615 zfsvfs_t *zfsvfs = arg; 616 617 zfsvfs->z_acl_type = newval; 618 } 619 620 static int 621 zfs_register_callbacks(vfs_t *vfsp) 622 { 623 struct dsl_dataset *ds = NULL; 624 objset_t *os = NULL; 625 zfsvfs_t *zfsvfs = NULL; 626 uint64_t nbmand; 627 boolean_t readonly = B_FALSE; 628 boolean_t do_readonly = B_FALSE; 629 boolean_t setuid = B_FALSE; 630 boolean_t do_setuid = B_FALSE; 631 boolean_t exec = B_FALSE; 632 boolean_t do_exec = B_FALSE; 633 boolean_t xattr = B_FALSE; 634 boolean_t atime = B_FALSE; 635 boolean_t do_atime = B_FALSE; 636 boolean_t do_xattr = B_FALSE; 637 int error = 0; 638 639 ASSERT3P(vfsp, !=, NULL); 640 zfsvfs = vfsp->vfs_data; 641 ASSERT3P(zfsvfs, !=, NULL); 642 os = zfsvfs->z_os; 643 644 /* 645 * This function can be called for a snapshot when we update snapshot's 646 * mount point, which isn't really supported. 647 */ 648 if (dmu_objset_is_snapshot(os)) 649 return (EOPNOTSUPP); 650 651 /* 652 * The act of registering our callbacks will destroy any mount 653 * options we may have. In order to enable temporary overrides 654 * of mount options, we stash away the current values and 655 * restore them after we register the callbacks. 656 */ 657 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) || 658 !spa_writeable(dmu_objset_spa(os))) { 659 readonly = B_TRUE; 660 do_readonly = B_TRUE; 661 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 662 readonly = B_FALSE; 663 do_readonly = B_TRUE; 664 } 665 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 666 setuid = B_FALSE; 667 do_setuid = B_TRUE; 668 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 669 setuid = B_TRUE; 670 do_setuid = B_TRUE; 671 } 672 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 673 exec = B_FALSE; 674 do_exec = B_TRUE; 675 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 676 exec = B_TRUE; 677 do_exec = B_TRUE; 678 } 679 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 680 zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF; 681 do_xattr = B_TRUE; 682 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 683 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 684 do_xattr = B_TRUE; 685 } else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) { 686 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 687 do_xattr = B_TRUE; 688 } else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) { 689 zfsvfs->z_xattr = xattr = ZFS_XATTR_SA; 690 do_xattr = B_TRUE; 691 } 692 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 693 atime = B_FALSE; 694 do_atime = B_TRUE; 695 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 696 atime = B_TRUE; 697 do_atime = B_TRUE; 698 } 699 700 /* 701 * We need to enter pool configuration here, so that we can use 702 * dsl_prop_get_int_ds() to handle the special nbmand property below. 703 * dsl_prop_get_integer() can not be used, because it has to acquire 704 * spa_namespace_lock and we can not do that because we already hold 705 * z_teardown_lock. The problem is that spa_write_cachefile() is called 706 * with spa_namespace_lock held and the function calls ZFS vnode 707 * operations to write the cache file and thus z_teardown_lock is 708 * acquired after spa_namespace_lock. 709 */ 710 ds = dmu_objset_ds(os); 711 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 712 713 /* 714 * nbmand is a special property. It can only be changed at 715 * mount time. 716 * 717 * This is weird, but it is documented to only be changeable 718 * at mount time. 719 */ 720 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 721 nbmand = B_FALSE; 722 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 723 nbmand = B_TRUE; 724 } else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand)) != 0) { 725 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 726 return (error); 727 } 728 729 /* 730 * Register property callbacks. 731 * 732 * It would probably be fine to just check for i/o error from 733 * the first prop_register(), but I guess I like to go 734 * overboard... 735 */ 736 error = dsl_prop_register(ds, 737 zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs); 738 error = error ? error : dsl_prop_register(ds, 739 zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs); 740 error = error ? error : dsl_prop_register(ds, 741 zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs); 742 error = error ? error : dsl_prop_register(ds, 743 zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs); 744 error = error ? error : dsl_prop_register(ds, 745 zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs); 746 error = error ? error : dsl_prop_register(ds, 747 zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs); 748 error = error ? error : dsl_prop_register(ds, 749 zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs); 750 error = error ? error : dsl_prop_register(ds, 751 zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs); 752 error = error ? error : dsl_prop_register(ds, 753 zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs); 754 error = error ? error : dsl_prop_register(ds, 755 zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, 756 zfsvfs); 757 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 758 if (error) 759 goto unregister; 760 761 /* 762 * Invoke our callbacks to restore temporary mount options. 763 */ 764 if (do_readonly) 765 readonly_changed_cb(zfsvfs, readonly); 766 if (do_setuid) 767 setuid_changed_cb(zfsvfs, setuid); 768 if (do_exec) 769 exec_changed_cb(zfsvfs, exec); 770 if (do_xattr) 771 xattr_changed_cb(zfsvfs, xattr); 772 if (do_atime) 773 atime_changed_cb(zfsvfs, atime); 774 775 nbmand_changed_cb(zfsvfs, nbmand); 776 777 return (0); 778 779 unregister: 780 dsl_prop_unregister_all(ds, zfsvfs); 781 return (error); 782 } 783 784 /* 785 * Associate this zfsvfs with the given objset, which must be owned. 786 * This will cache a bunch of on-disk state from the objset in the 787 * zfsvfs. 788 */ 789 static int 790 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os) 791 { 792 int error; 793 uint64_t val; 794 795 zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE; 796 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 797 zfsvfs->z_os = os; 798 799 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 800 if (error != 0) 801 return (error); 802 if (zfsvfs->z_version > 803 zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { 804 (void) printf("Can't mount a version %lld file system " 805 "on a version %lld pool\n. Pool must be upgraded to mount " 806 "this file system.", (u_longlong_t)zfsvfs->z_version, 807 (u_longlong_t)spa_version(dmu_objset_spa(os))); 808 return (SET_ERROR(ENOTSUP)); 809 } 810 error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val); 811 if (error != 0) 812 return (error); 813 zfsvfs->z_norm = (int)val; 814 815 error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val); 816 if (error != 0) 817 return (error); 818 zfsvfs->z_utf8 = (val != 0); 819 820 error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val); 821 if (error != 0) 822 return (error); 823 zfsvfs->z_case = (uint_t)val; 824 825 error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val); 826 if (error != 0) 827 return (error); 828 zfsvfs->z_acl_type = (uint_t)val; 829 830 /* 831 * Fold case on file systems that are always or sometimes case 832 * insensitive. 833 */ 834 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 835 zfsvfs->z_case == ZFS_CASE_MIXED) 836 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 837 838 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 839 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 840 841 uint64_t sa_obj = 0; 842 if (zfsvfs->z_use_sa) { 843 /* should either have both of these objects or none */ 844 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, 845 &sa_obj); 846 if (error != 0) 847 return (error); 848 849 error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val); 850 if (error == 0 && val == ZFS_XATTR_SA) 851 zfsvfs->z_xattr_sa = B_TRUE; 852 } 853 854 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 855 &zfsvfs->z_attr_table); 856 if (error != 0) 857 return (error); 858 859 if (zfsvfs->z_version >= ZPL_VERSION_SA) 860 sa_register_update_callback(os, zfs_sa_upgrade); 861 862 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 863 &zfsvfs->z_root); 864 if (error != 0) 865 return (error); 866 ASSERT3U(zfsvfs->z_root, !=, 0); 867 868 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 869 &zfsvfs->z_unlinkedobj); 870 if (error != 0) 871 return (error); 872 873 error = zap_lookup(os, MASTER_NODE_OBJ, 874 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 875 8, 1, &zfsvfs->z_userquota_obj); 876 if (error == ENOENT) 877 zfsvfs->z_userquota_obj = 0; 878 else if (error != 0) 879 return (error); 880 881 error = zap_lookup(os, MASTER_NODE_OBJ, 882 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 883 8, 1, &zfsvfs->z_groupquota_obj); 884 if (error == ENOENT) 885 zfsvfs->z_groupquota_obj = 0; 886 else if (error != 0) 887 return (error); 888 889 error = zap_lookup(os, MASTER_NODE_OBJ, 890 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA], 891 8, 1, &zfsvfs->z_projectquota_obj); 892 if (error == ENOENT) 893 zfsvfs->z_projectquota_obj = 0; 894 else if (error != 0) 895 return (error); 896 897 error = zap_lookup(os, MASTER_NODE_OBJ, 898 zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA], 899 8, 1, &zfsvfs->z_userobjquota_obj); 900 if (error == ENOENT) 901 zfsvfs->z_userobjquota_obj = 0; 902 else if (error != 0) 903 return (error); 904 905 error = zap_lookup(os, MASTER_NODE_OBJ, 906 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA], 907 8, 1, &zfsvfs->z_groupobjquota_obj); 908 if (error == ENOENT) 909 zfsvfs->z_groupobjquota_obj = 0; 910 else if (error != 0) 911 return (error); 912 913 error = zap_lookup(os, MASTER_NODE_OBJ, 914 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA], 915 8, 1, &zfsvfs->z_projectobjquota_obj); 916 if (error == ENOENT) 917 zfsvfs->z_projectobjquota_obj = 0; 918 else if (error != 0) 919 return (error); 920 921 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 922 &zfsvfs->z_fuid_obj); 923 if (error == ENOENT) 924 zfsvfs->z_fuid_obj = 0; 925 else if (error != 0) 926 return (error); 927 928 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 929 &zfsvfs->z_shares_dir); 930 if (error == ENOENT) 931 zfsvfs->z_shares_dir = 0; 932 else if (error != 0) 933 return (error); 934 935 /* 936 * Only use the name cache if we are looking for a 937 * name on a file system that does not require normalization 938 * or case folding. We can also look there if we happen to be 939 * on a non-normalizing, mixed sensitivity file system IF we 940 * are looking for the exact name (which is always the case on 941 * FreeBSD). 942 */ 943 zfsvfs->z_use_namecache = !zfsvfs->z_norm || 944 ((zfsvfs->z_case == ZFS_CASE_MIXED) && 945 !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER)); 946 947 return (0); 948 } 949 950 taskq_t *zfsvfs_taskq; 951 952 static void 953 zfsvfs_task_unlinked_drain(void *context, int pending __unused) 954 { 955 956 zfs_unlinked_drain((zfsvfs_t *)context); 957 } 958 959 int 960 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp) 961 { 962 objset_t *os; 963 zfsvfs_t *zfsvfs; 964 int error; 965 boolean_t ro = (readonly || (strchr(osname, '@') != NULL)); 966 967 /* 968 * XXX: Fix struct statfs so this isn't necessary! 969 * 970 * The 'osname' is used as the filesystem's special node, which means 971 * it must fit in statfs.f_mntfromname, or else it can't be 972 * enumerated, so libzfs_mnttab_find() returns NULL, which causes 973 * 'zfs unmount' to think it's not mounted when it is. 974 */ 975 if (strlen(osname) >= MNAMELEN) 976 return (SET_ERROR(ENAMETOOLONG)); 977 978 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 979 980 error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs, 981 &os); 982 if (error != 0) { 983 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 984 return (error); 985 } 986 987 error = zfsvfs_create_impl(zfvp, zfsvfs, os); 988 989 return (error); 990 } 991 992 993 int 994 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os) 995 { 996 int error; 997 998 zfsvfs->z_vfs = NULL; 999 zfsvfs->z_parent = zfsvfs; 1000 1001 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 1002 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 1003 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 1004 offsetof(znode_t, z_link_node)); 1005 TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0, 1006 zfsvfs_task_unlinked_drain, zfsvfs); 1007 ZFS_TEARDOWN_INIT(zfsvfs); 1008 ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs); 1009 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 1010 for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1011 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 1012 1013 error = zfsvfs_init(zfsvfs, os); 1014 if (error != 0) { 1015 dmu_objset_disown(os, B_TRUE, zfsvfs); 1016 *zfvp = NULL; 1017 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1018 return (error); 1019 } 1020 1021 *zfvp = zfsvfs; 1022 return (0); 1023 } 1024 1025 static int 1026 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 1027 { 1028 int error; 1029 1030 /* 1031 * Check for a bad on-disk format version now since we 1032 * lied about owning the dataset readonly before. 1033 */ 1034 if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) && 1035 dmu_objset_incompatible_encryption_version(zfsvfs->z_os)) 1036 return (SET_ERROR(EROFS)); 1037 1038 error = zfs_register_callbacks(zfsvfs->z_vfs); 1039 if (error) 1040 return (error); 1041 1042 /* 1043 * If we are not mounting (ie: online recv), then we don't 1044 * have to worry about replaying the log as we blocked all 1045 * operations out since we closed the ZIL. 1046 */ 1047 if (mounting) { 1048 boolean_t readonly; 1049 1050 ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); 1051 error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); 1052 if (error) 1053 return (error); 1054 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, 1055 &zfsvfs->z_kstat.dk_zil_sums); 1056 1057 /* 1058 * During replay we remove the read only flag to 1059 * allow replays to succeed. 1060 */ 1061 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 1062 if (readonly != 0) { 1063 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 1064 } else { 1065 dsl_dir_t *dd; 1066 zap_stats_t zs; 1067 1068 if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj, 1069 &zs) == 0) { 1070 dataset_kstats_update_nunlinks_kstat( 1071 &zfsvfs->z_kstat, zs.zs_num_entries); 1072 dprintf_ds(zfsvfs->z_os->os_dsl_dataset, 1073 "num_entries in unlinked set: %llu", 1074 (u_longlong_t)zs.zs_num_entries); 1075 } 1076 1077 zfs_unlinked_drain(zfsvfs); 1078 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1079 dd->dd_activity_cancelled = B_FALSE; 1080 } 1081 1082 /* 1083 * Parse and replay the intent log. 1084 * 1085 * Because of ziltest, this must be done after 1086 * zfs_unlinked_drain(). (Further note: ziltest 1087 * doesn't use readonly mounts, where 1088 * zfs_unlinked_drain() isn't called.) This is because 1089 * ziltest causes spa_sync() to think it's committed, 1090 * but actually it is not, so the intent log contains 1091 * many txg's worth of changes. 1092 * 1093 * In particular, if object N is in the unlinked set in 1094 * the last txg to actually sync, then it could be 1095 * actually freed in a later txg and then reallocated 1096 * in a yet later txg. This would write a "create 1097 * object N" record to the intent log. Normally, this 1098 * would be fine because the spa_sync() would have 1099 * written out the fact that object N is free, before 1100 * we could write the "create object N" intent log 1101 * record. 1102 * 1103 * But when we are in ziltest mode, we advance the "open 1104 * txg" without actually spa_sync()-ing the changes to 1105 * disk. So we would see that object N is still 1106 * allocated and in the unlinked set, and there is an 1107 * intent log record saying to allocate it. 1108 */ 1109 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) { 1110 if (zil_replay_disable) { 1111 zil_destroy(zfsvfs->z_log, B_FALSE); 1112 } else { 1113 boolean_t use_nc = zfsvfs->z_use_namecache; 1114 zfsvfs->z_use_namecache = B_FALSE; 1115 zfsvfs->z_replay = B_TRUE; 1116 zil_replay(zfsvfs->z_os, zfsvfs, 1117 zfs_replay_vector); 1118 zfsvfs->z_replay = B_FALSE; 1119 zfsvfs->z_use_namecache = use_nc; 1120 } 1121 } 1122 1123 /* restore readonly bit */ 1124 if (readonly != 0) 1125 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 1126 } else { 1127 ASSERT3P(zfsvfs->z_kstat.dk_kstats, !=, NULL); 1128 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, 1129 &zfsvfs->z_kstat.dk_zil_sums); 1130 } 1131 1132 /* 1133 * Set the objset user_ptr to track its zfsvfs. 1134 */ 1135 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1136 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1137 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1138 1139 return (0); 1140 } 1141 1142 void 1143 zfsvfs_free(zfsvfs_t *zfsvfs) 1144 { 1145 int i; 1146 1147 zfs_fuid_destroy(zfsvfs); 1148 1149 mutex_destroy(&zfsvfs->z_znodes_lock); 1150 mutex_destroy(&zfsvfs->z_lock); 1151 ASSERT3U(zfsvfs->z_nr_znodes, ==, 0); 1152 list_destroy(&zfsvfs->z_all_znodes); 1153 ZFS_TEARDOWN_DESTROY(zfsvfs); 1154 ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs); 1155 rw_destroy(&zfsvfs->z_fuid_lock); 1156 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1157 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1158 dataset_kstats_destroy(&zfsvfs->z_kstat); 1159 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1160 } 1161 1162 static void 1163 zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 1164 { 1165 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 1166 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 1167 } 1168 1169 static int 1170 zfs_domount(vfs_t *vfsp, char *osname) 1171 { 1172 uint64_t recordsize, fsid_guid; 1173 int error = 0; 1174 zfsvfs_t *zfsvfs; 1175 1176 ASSERT3P(vfsp, !=, NULL); 1177 ASSERT3P(osname, !=, NULL); 1178 1179 error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs); 1180 if (error) 1181 return (error); 1182 zfsvfs->z_vfs = vfsp; 1183 1184 if ((error = dsl_prop_get_integer(osname, 1185 "recordsize", &recordsize, NULL))) 1186 goto out; 1187 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 1188 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 1189 1190 vfsp->vfs_data = zfsvfs; 1191 vfsp->mnt_flag |= MNT_LOCAL; 1192 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1193 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 1194 vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; 1195 /* 1196 * This can cause a loss of coherence between ARC and page cache 1197 * on ZoF - unclear if the problem is in FreeBSD or ZoF 1198 */ 1199 vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */ 1200 vfsp->mnt_kern_flag |= MNTK_NOMSYNC; 1201 vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG; 1202 1203 #if defined(_KERNEL) && !defined(KMEM_DEBUG) 1204 vfsp->mnt_kern_flag |= MNTK_FPLOOKUP; 1205 #endif 1206 /* 1207 * The fsid is 64 bits, composed of an 8-bit fs type, which 1208 * separates our fsid from any other filesystem types, and a 1209 * 56-bit objset unique ID. The objset unique ID is unique to 1210 * all objsets open on this system, provided by unique_create(). 1211 * The 8-bit fs type must be put in the low bits of fsid[1] 1212 * because that's where other Solaris filesystems put it. 1213 */ 1214 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1215 ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0); 1216 vfsp->vfs_fsid.val[0] = fsid_guid; 1217 vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) | 1218 (vfsp->mnt_vfc->vfc_typenum & 0xFF); 1219 1220 /* 1221 * Set features for file system. 1222 */ 1223 zfs_set_fuid_feature(zfsvfs); 1224 1225 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1226 uint64_t pval; 1227 1228 atime_changed_cb(zfsvfs, B_FALSE); 1229 readonly_changed_cb(zfsvfs, B_TRUE); 1230 if ((error = dsl_prop_get_integer(osname, 1231 "xattr", &pval, NULL))) 1232 goto out; 1233 xattr_changed_cb(zfsvfs, pval); 1234 if ((error = dsl_prop_get_integer(osname, 1235 "acltype", &pval, NULL))) 1236 goto out; 1237 acl_type_changed_cb(zfsvfs, pval); 1238 zfsvfs->z_issnap = B_TRUE; 1239 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; 1240 1241 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1242 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1243 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1244 } else { 1245 if ((error = zfsvfs_setup(zfsvfs, B_TRUE))) 1246 goto out; 1247 } 1248 1249 vfs_mountedfrom(vfsp, osname); 1250 1251 if (!zfsvfs->z_issnap) 1252 zfsctl_create(zfsvfs); 1253 out: 1254 if (error) { 1255 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); 1256 zfsvfs_free(zfsvfs); 1257 } else { 1258 atomic_inc_32(&zfs_active_fs_count); 1259 } 1260 1261 return (error); 1262 } 1263 1264 static void 1265 zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1266 { 1267 objset_t *os = zfsvfs->z_os; 1268 1269 if (!dmu_objset_is_snapshot(os)) 1270 dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs); 1271 } 1272 1273 static int 1274 getpoolname(const char *osname, char *poolname) 1275 { 1276 char *p; 1277 1278 p = strchr(osname, '/'); 1279 if (p == NULL) { 1280 if (strlen(osname) >= MAXNAMELEN) 1281 return (ENAMETOOLONG); 1282 (void) strcpy(poolname, osname); 1283 } else { 1284 if (p - osname >= MAXNAMELEN) 1285 return (ENAMETOOLONG); 1286 (void) strlcpy(poolname, osname, p - osname + 1); 1287 } 1288 return (0); 1289 } 1290 1291 static void 1292 fetch_osname_options(char *name, bool *checkpointrewind) 1293 { 1294 1295 if (name[0] == '!') { 1296 *checkpointrewind = true; 1297 memmove(name, name + 1, strlen(name)); 1298 } else { 1299 *checkpointrewind = false; 1300 } 1301 } 1302 1303 static int 1304 zfs_mount(vfs_t *vfsp) 1305 { 1306 kthread_t *td = curthread; 1307 vnode_t *mvp = vfsp->mnt_vnodecovered; 1308 cred_t *cr = td->td_ucred; 1309 char *osname; 1310 int error = 0; 1311 int canwrite; 1312 bool checkpointrewind, isctlsnap = false; 1313 1314 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 1315 return (SET_ERROR(EINVAL)); 1316 1317 /* 1318 * If full-owner-access is enabled and delegated administration is 1319 * turned on, we must set nosuid. 1320 */ 1321 if (zfs_super_owner && 1322 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 1323 secpolicy_fs_mount_clearopts(cr, vfsp); 1324 } 1325 1326 fetch_osname_options(osname, &checkpointrewind); 1327 isctlsnap = (mvp != NULL && zfsctl_is_node(mvp) && 1328 strchr(osname, '@') != NULL); 1329 1330 /* 1331 * Check for mount privilege? 1332 * 1333 * If we don't have privilege then see if 1334 * we have local permission to allow it 1335 */ 1336 error = secpolicy_fs_mount(cr, mvp, vfsp); 1337 if (error && isctlsnap) { 1338 secpolicy_fs_mount_clearopts(cr, vfsp); 1339 } else if (error) { 1340 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) 1341 goto out; 1342 1343 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 1344 vattr_t vattr; 1345 1346 /* 1347 * Make sure user is the owner of the mount point 1348 * or has sufficient privileges. 1349 */ 1350 1351 vattr.va_mask = AT_UID; 1352 1353 vn_lock(mvp, LK_SHARED | LK_RETRY); 1354 if (VOP_GETATTR(mvp, &vattr, cr)) { 1355 VOP_UNLOCK1(mvp); 1356 goto out; 1357 } 1358 1359 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 1360 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 1361 VOP_UNLOCK1(mvp); 1362 goto out; 1363 } 1364 VOP_UNLOCK1(mvp); 1365 } 1366 1367 secpolicy_fs_mount_clearopts(cr, vfsp); 1368 } 1369 1370 /* 1371 * Refuse to mount a filesystem if we are in a local zone and the 1372 * dataset is not visible. 1373 */ 1374 if (!INGLOBALZONE(curproc) && 1375 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1376 boolean_t mount_snapshot = B_FALSE; 1377 1378 /* 1379 * Snapshots may be mounted in .zfs for unjailed datasets 1380 * if allowed by the jail param zfs.mount_snapshot. 1381 */ 1382 if (isctlsnap) { 1383 struct prison *pr; 1384 struct zfs_jailparam *zjp; 1385 1386 pr = curthread->td_ucred->cr_prison; 1387 mtx_lock(&pr->pr_mtx); 1388 zjp = osd_jail_get(pr, zfs_jailparam_slot); 1389 mtx_unlock(&pr->pr_mtx); 1390 if (zjp && zjp->mount_snapshot) 1391 mount_snapshot = B_TRUE; 1392 } 1393 if (!mount_snapshot) { 1394 error = SET_ERROR(EPERM); 1395 goto out; 1396 } 1397 } 1398 1399 vfsp->vfs_flag |= MNT_NFS4ACLS; 1400 1401 /* 1402 * When doing a remount, we simply refresh our temporary properties 1403 * according to those options set in the current VFS options. 1404 */ 1405 if (vfsp->vfs_flag & MS_REMOUNT) { 1406 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1407 1408 /* 1409 * Refresh mount options with z_teardown_lock blocking I/O while 1410 * the filesystem is in an inconsistent state. 1411 * The lock also serializes this code with filesystem 1412 * manipulations between entry to zfs_suspend_fs() and return 1413 * from zfs_resume_fs(). 1414 */ 1415 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1416 zfs_unregister_callbacks(zfsvfs); 1417 error = zfs_register_callbacks(vfsp); 1418 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1419 goto out; 1420 } 1421 1422 /* Initial root mount: try hard to import the requested root pool. */ 1423 if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 && 1424 (vfsp->vfs_flag & MNT_UPDATE) == 0) { 1425 char pname[MAXNAMELEN]; 1426 1427 error = getpoolname(osname, pname); 1428 if (error == 0) 1429 error = spa_import_rootpool(pname, checkpointrewind); 1430 if (error) 1431 goto out; 1432 } 1433 DROP_GIANT(); 1434 error = zfs_domount(vfsp, osname); 1435 PICKUP_GIANT(); 1436 1437 out: 1438 return (error); 1439 } 1440 1441 static int 1442 zfs_statfs(vfs_t *vfsp, struct statfs *statp) 1443 { 1444 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1445 uint64_t refdbytes, availbytes, usedobjs, availobjs; 1446 int error; 1447 1448 statp->f_version = STATFS_VERSION; 1449 1450 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1451 return (error); 1452 1453 dmu_objset_space(zfsvfs->z_os, 1454 &refdbytes, &availbytes, &usedobjs, &availobjs); 1455 1456 /* 1457 * The underlying storage pool actually uses multiple block sizes. 1458 * We report the fragsize as the smallest block size we support, 1459 * and we report our blocksize as the filesystem's maximum blocksize. 1460 */ 1461 statp->f_bsize = SPA_MINBLOCKSIZE; 1462 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 1463 1464 /* 1465 * The following report "total" blocks of various kinds in the 1466 * file system, but reported in terms of f_frsize - the 1467 * "fragment" size. 1468 */ 1469 1470 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1471 statp->f_bfree = availbytes / statp->f_bsize; 1472 statp->f_bavail = statp->f_bfree; /* no root reservation */ 1473 1474 /* 1475 * statvfs() should really be called statufs(), because it assumes 1476 * static metadata. ZFS doesn't preallocate files, so the best 1477 * we can do is report the max that could possibly fit in f_files, 1478 * and that minus the number actually used in f_ffree. 1479 * For f_ffree, report the smaller of the number of object available 1480 * and the number of blocks (each object will take at least a block). 1481 */ 1482 statp->f_ffree = MIN(availobjs, statp->f_bfree); 1483 statp->f_files = statp->f_ffree + usedobjs; 1484 1485 /* 1486 * We're a zfs filesystem. 1487 */ 1488 strlcpy(statp->f_fstypename, "zfs", 1489 sizeof (statp->f_fstypename)); 1490 1491 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 1492 sizeof (statp->f_mntfromname)); 1493 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 1494 sizeof (statp->f_mntonname)); 1495 1496 statp->f_namemax = MAXNAMELEN - 1; 1497 1498 zfs_exit(zfsvfs, FTAG); 1499 return (0); 1500 } 1501 1502 static int 1503 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 1504 { 1505 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1506 znode_t *rootzp; 1507 int error; 1508 1509 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1510 return (error); 1511 1512 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1513 if (error == 0) 1514 *vpp = ZTOV(rootzp); 1515 1516 zfs_exit(zfsvfs, FTAG); 1517 1518 if (error == 0) { 1519 error = vn_lock(*vpp, flags); 1520 if (error != 0) { 1521 VN_RELE(*vpp); 1522 *vpp = NULL; 1523 } 1524 } 1525 return (error); 1526 } 1527 1528 /* 1529 * Teardown the zfsvfs::z_os. 1530 * 1531 * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock' 1532 * and 'z_teardown_inactive_lock' held. 1533 */ 1534 static int 1535 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1536 { 1537 znode_t *zp; 1538 dsl_dir_t *dd; 1539 1540 /* 1541 * If someone has not already unmounted this file system, 1542 * drain the zrele_taskq to ensure all active references to the 1543 * zfsvfs_t have been handled only then can it be safely destroyed. 1544 */ 1545 if (zfsvfs->z_os) { 1546 /* 1547 * If we're unmounting we have to wait for the list to 1548 * drain completely. 1549 * 1550 * If we're not unmounting there's no guarantee the list 1551 * will drain completely, but zreles run from the taskq 1552 * may add the parents of dir-based xattrs to the taskq 1553 * so we want to wait for these. 1554 * 1555 * We can safely read z_nr_znodes without locking because the 1556 * VFS has already blocked operations which add to the 1557 * z_all_znodes list and thus increment z_nr_znodes. 1558 */ 1559 int round = 0; 1560 while (zfsvfs->z_nr_znodes > 0) { 1561 taskq_wait_outstanding(dsl_pool_zrele_taskq( 1562 dmu_objset_pool(zfsvfs->z_os)), 0); 1563 if (++round > 1 && !unmounting) 1564 break; 1565 } 1566 } 1567 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1568 1569 if (!unmounting) { 1570 /* 1571 * We purge the parent filesystem's vfsp as the parent 1572 * filesystem and all of its snapshots have their vnode's 1573 * v_vfsp set to the parent's filesystem's vfsp. Note, 1574 * 'z_parent' is self referential for non-snapshots. 1575 */ 1576 #ifdef FREEBSD_NAMECACHE 1577 #if __FreeBSD_version >= 1300117 1578 cache_purgevfs(zfsvfs->z_parent->z_vfs); 1579 #else 1580 cache_purgevfs(zfsvfs->z_parent->z_vfs, true); 1581 #endif 1582 #endif 1583 } 1584 1585 /* 1586 * Close the zil. NB: Can't close the zil while zfs_inactive 1587 * threads are blocked as zil_close can call zfs_inactive. 1588 */ 1589 if (zfsvfs->z_log) { 1590 zil_close(zfsvfs->z_log); 1591 zfsvfs->z_log = NULL; 1592 } 1593 1594 ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs); 1595 1596 /* 1597 * If we are not unmounting (ie: online recv) and someone already 1598 * unmounted this file system while we were doing the switcheroo, 1599 * or a reopen of z_os failed then just bail out now. 1600 */ 1601 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1602 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1603 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1604 return (SET_ERROR(EIO)); 1605 } 1606 1607 /* 1608 * At this point there are no vops active, and any new vops will 1609 * fail with EIO since we have z_teardown_lock for writer (only 1610 * relevant for forced unmount). 1611 * 1612 * Release all holds on dbufs. 1613 */ 1614 mutex_enter(&zfsvfs->z_znodes_lock); 1615 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1616 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1617 if (zp->z_sa_hdl != NULL) { 1618 zfs_znode_dmu_fini(zp); 1619 } 1620 } 1621 mutex_exit(&zfsvfs->z_znodes_lock); 1622 1623 /* 1624 * If we are unmounting, set the unmounted flag and let new vops 1625 * unblock. zfs_inactive will have the unmounted behavior, and all 1626 * other vops will fail with EIO. 1627 */ 1628 if (unmounting) { 1629 zfsvfs->z_unmounted = B_TRUE; 1630 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1631 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1632 } 1633 1634 /* 1635 * z_os will be NULL if there was an error in attempting to reopen 1636 * zfsvfs, so just return as the properties had already been 1637 * unregistered and cached data had been evicted before. 1638 */ 1639 if (zfsvfs->z_os == NULL) 1640 return (0); 1641 1642 /* 1643 * Unregister properties. 1644 */ 1645 zfs_unregister_callbacks(zfsvfs); 1646 1647 /* 1648 * Evict cached data 1649 */ 1650 if (!zfs_is_readonly(zfsvfs)) 1651 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1652 dmu_objset_evict_dbufs(zfsvfs->z_os); 1653 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1654 dsl_dir_cancel_waiters(dd); 1655 1656 return (0); 1657 } 1658 1659 static int 1660 zfs_umount(vfs_t *vfsp, int fflag) 1661 { 1662 kthread_t *td = curthread; 1663 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1664 objset_t *os; 1665 cred_t *cr = td->td_ucred; 1666 int ret; 1667 1668 ret = secpolicy_fs_unmount(cr, vfsp); 1669 if (ret) { 1670 if (dsl_deleg_access((char *)vfsp->vfs_resource, 1671 ZFS_DELEG_PERM_MOUNT, cr)) 1672 return (ret); 1673 } 1674 1675 /* 1676 * Unmount any snapshots mounted under .zfs before unmounting the 1677 * dataset itself. 1678 */ 1679 if (zfsvfs->z_ctldir != NULL) { 1680 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1681 return (ret); 1682 } 1683 1684 if (fflag & MS_FORCE) { 1685 /* 1686 * Mark file system as unmounted before calling 1687 * vflush(FORCECLOSE). This way we ensure no future vnops 1688 * will be called and risk operating on DOOMED vnodes. 1689 */ 1690 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1691 zfsvfs->z_unmounted = B_TRUE; 1692 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1693 } 1694 1695 /* 1696 * Flush all the files. 1697 */ 1698 ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 1699 if (ret != 0) 1700 return (ret); 1701 while (taskqueue_cancel(zfsvfs_taskq->tq_queue, 1702 &zfsvfs->z_unlinked_drain_task, NULL) != 0) 1703 taskqueue_drain(zfsvfs_taskq->tq_queue, 1704 &zfsvfs->z_unlinked_drain_task); 1705 1706 VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE)); 1707 os = zfsvfs->z_os; 1708 1709 /* 1710 * z_os will be NULL if there was an error in 1711 * attempting to reopen zfsvfs. 1712 */ 1713 if (os != NULL) { 1714 /* 1715 * Unset the objset user_ptr. 1716 */ 1717 mutex_enter(&os->os_user_ptr_lock); 1718 dmu_objset_set_user(os, NULL); 1719 mutex_exit(&os->os_user_ptr_lock); 1720 1721 /* 1722 * Finally release the objset 1723 */ 1724 dmu_objset_disown(os, B_TRUE, zfsvfs); 1725 } 1726 1727 /* 1728 * We can now safely destroy the '.zfs' directory node. 1729 */ 1730 if (zfsvfs->z_ctldir != NULL) 1731 zfsctl_destroy(zfsvfs); 1732 zfs_freevfs(vfsp); 1733 1734 return (0); 1735 } 1736 1737 static int 1738 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1739 { 1740 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1741 znode_t *zp; 1742 int err; 1743 1744 /* 1745 * zfs_zget() can't operate on virtual entries like .zfs/ or 1746 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 1747 * This will make NFS to switch to LOOKUP instead of using VGET. 1748 */ 1749 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR || 1750 (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir)) 1751 return (EOPNOTSUPP); 1752 1753 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1754 return (err); 1755 err = zfs_zget(zfsvfs, ino, &zp); 1756 if (err == 0 && zp->z_unlinked) { 1757 vrele(ZTOV(zp)); 1758 err = EINVAL; 1759 } 1760 if (err == 0) 1761 *vpp = ZTOV(zp); 1762 zfs_exit(zfsvfs, FTAG); 1763 if (err == 0) { 1764 err = vn_lock(*vpp, flags); 1765 if (err != 0) 1766 vrele(*vpp); 1767 } 1768 if (err != 0) 1769 *vpp = NULL; 1770 return (err); 1771 } 1772 1773 static int 1774 #if __FreeBSD_version >= 1300098 1775 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 1776 struct ucred **credanonp, int *numsecflavors, int *secflavors) 1777 #else 1778 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1779 struct ucred **credanonp, int *numsecflavors, int **secflavors) 1780 #endif 1781 { 1782 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1783 1784 /* 1785 * If this is regular file system vfsp is the same as 1786 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1787 * zfsvfs->z_parent->z_vfs represents parent file system 1788 * which we have to use here, because only this file system 1789 * has mnt_export configured. 1790 */ 1791 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1792 credanonp, numsecflavors, secflavors)); 1793 } 1794 1795 _Static_assert(sizeof (struct fid) >= SHORT_FID_LEN, 1796 "struct fid bigger than SHORT_FID_LEN"); 1797 _Static_assert(sizeof (struct fid) >= LONG_FID_LEN, 1798 "struct fid bigger than LONG_FID_LEN"); 1799 1800 static int 1801 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) 1802 { 1803 struct componentname cn; 1804 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1805 znode_t *zp; 1806 vnode_t *dvp; 1807 uint64_t object = 0; 1808 uint64_t fid_gen = 0; 1809 uint64_t setgen = 0; 1810 uint64_t gen_mask; 1811 uint64_t zp_gen; 1812 int i, err; 1813 1814 *vpp = NULL; 1815 1816 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1817 return (err); 1818 1819 /* 1820 * On FreeBSD we can get snapshot's mount point or its parent file 1821 * system mount point depending if snapshot is already mounted or not. 1822 */ 1823 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1824 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1825 uint64_t objsetid = 0; 1826 1827 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1828 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1829 1830 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1831 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1832 1833 zfs_exit(zfsvfs, FTAG); 1834 1835 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1836 if (err) 1837 return (SET_ERROR(EINVAL)); 1838 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1839 return (err); 1840 } 1841 1842 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1843 zfid_short_t *zfid = (zfid_short_t *)fidp; 1844 1845 for (i = 0; i < sizeof (zfid->zf_object); i++) 1846 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1847 1848 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1849 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1850 } else { 1851 zfs_exit(zfsvfs, FTAG); 1852 return (SET_ERROR(EINVAL)); 1853 } 1854 1855 if (fidp->fid_len == LONG_FID_LEN && setgen != 0) { 1856 zfs_exit(zfsvfs, FTAG); 1857 dprintf("snapdir fid: fid_gen (%llu) and setgen (%llu)\n", 1858 (u_longlong_t)fid_gen, (u_longlong_t)setgen); 1859 return (SET_ERROR(EINVAL)); 1860 } 1861 1862 /* 1863 * A zero fid_gen means we are in .zfs or the .zfs/snapshot 1864 * directory tree. If the object == zfsvfs->z_shares_dir, then 1865 * we are in the .zfs/shares directory tree. 1866 */ 1867 if ((fid_gen == 0 && 1868 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) || 1869 (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) { 1870 zfs_exit(zfsvfs, FTAG); 1871 VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp)); 1872 if (object == ZFSCTL_INO_SNAPDIR) { 1873 cn.cn_nameptr = "snapshot"; 1874 cn.cn_namelen = strlen(cn.cn_nameptr); 1875 cn.cn_nameiop = LOOKUP; 1876 cn.cn_flags = ISLASTCN | LOCKLEAF; 1877 cn.cn_lkflags = flags; 1878 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1879 vput(dvp); 1880 } else if (object == zfsvfs->z_shares_dir) { 1881 /* 1882 * XXX This branch must not be taken, 1883 * if it is, then the lookup below will 1884 * explode. 1885 */ 1886 cn.cn_nameptr = "shares"; 1887 cn.cn_namelen = strlen(cn.cn_nameptr); 1888 cn.cn_nameiop = LOOKUP; 1889 cn.cn_flags = ISLASTCN; 1890 cn.cn_lkflags = flags; 1891 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1892 vput(dvp); 1893 } else { 1894 *vpp = dvp; 1895 } 1896 return (err); 1897 } 1898 1899 gen_mask = -1ULL >> (64 - 8 * i); 1900 1901 dprintf("getting %llu [%llu mask %llx]\n", (u_longlong_t)object, 1902 (u_longlong_t)fid_gen, 1903 (u_longlong_t)gen_mask); 1904 if ((err = zfs_zget(zfsvfs, object, &zp))) { 1905 zfs_exit(zfsvfs, FTAG); 1906 return (err); 1907 } 1908 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, 1909 sizeof (uint64_t)); 1910 zp_gen = zp_gen & gen_mask; 1911 if (zp_gen == 0) 1912 zp_gen = 1; 1913 if (zp->z_unlinked || zp_gen != fid_gen) { 1914 dprintf("znode gen (%llu) != fid gen (%llu)\n", 1915 (u_longlong_t)zp_gen, (u_longlong_t)fid_gen); 1916 vrele(ZTOV(zp)); 1917 zfs_exit(zfsvfs, FTAG); 1918 return (SET_ERROR(EINVAL)); 1919 } 1920 1921 *vpp = ZTOV(zp); 1922 zfs_exit(zfsvfs, FTAG); 1923 err = vn_lock(*vpp, flags); 1924 if (err == 0) 1925 vnode_create_vobject(*vpp, zp->z_size, curthread); 1926 else 1927 *vpp = NULL; 1928 return (err); 1929 } 1930 1931 /* 1932 * Block out VOPs and close zfsvfs_t::z_os 1933 * 1934 * Note, if successful, then we return with the 'z_teardown_lock' and 1935 * 'z_teardown_inactive_lock' write held. We leave ownership of the underlying 1936 * dataset and objset intact so that they can be atomically handed off during 1937 * a subsequent rollback or recv operation and the resume thereafter. 1938 */ 1939 int 1940 zfs_suspend_fs(zfsvfs_t *zfsvfs) 1941 { 1942 int error; 1943 1944 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1945 return (error); 1946 1947 return (0); 1948 } 1949 1950 /* 1951 * Rebuild SA and release VOPs. Note that ownership of the underlying dataset 1952 * is an invariant across any of the operations that can be performed while the 1953 * filesystem was suspended. Whether it succeeded or failed, the preconditions 1954 * are the same: the relevant objset and associated dataset are owned by 1955 * zfsvfs, held, and long held on entry. 1956 */ 1957 int 1958 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 1959 { 1960 int err; 1961 znode_t *zp; 1962 1963 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 1964 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 1965 1966 /* 1967 * We already own this, so just update the objset_t, as the one we 1968 * had before may have been evicted. 1969 */ 1970 objset_t *os; 1971 VERIFY3P(ds->ds_owner, ==, zfsvfs); 1972 VERIFY(dsl_dataset_long_held(ds)); 1973 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 1974 dsl_pool_config_enter(dp, FTAG); 1975 VERIFY0(dmu_objset_from_ds(ds, &os)); 1976 dsl_pool_config_exit(dp, FTAG); 1977 1978 err = zfsvfs_init(zfsvfs, os); 1979 if (err != 0) 1980 goto bail; 1981 1982 ds->ds_dir->dd_activity_cancelled = B_FALSE; 1983 VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE)); 1984 1985 zfs_set_fuid_feature(zfsvfs); 1986 1987 /* 1988 * Attempt to re-establish all the active znodes with 1989 * their dbufs. If a zfs_rezget() fails, then we'll let 1990 * any potential callers discover that via zfs_enter_verify_zp 1991 * when they try to use their znode. 1992 */ 1993 mutex_enter(&zfsvfs->z_znodes_lock); 1994 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1995 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1996 (void) zfs_rezget(zp); 1997 } 1998 mutex_exit(&zfsvfs->z_znodes_lock); 1999 2000 bail: 2001 /* release the VOPs */ 2002 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 2003 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 2004 2005 if (err) { 2006 /* 2007 * Since we couldn't setup the sa framework, try to force 2008 * unmount this file system. 2009 */ 2010 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) { 2011 vfs_ref(zfsvfs->z_vfs); 2012 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 2013 } 2014 } 2015 return (err); 2016 } 2017 2018 static void 2019 zfs_freevfs(vfs_t *vfsp) 2020 { 2021 zfsvfs_t *zfsvfs = vfsp->vfs_data; 2022 2023 zfsvfs_free(zfsvfs); 2024 2025 atomic_dec_32(&zfs_active_fs_count); 2026 } 2027 2028 #ifdef __i386__ 2029 static int desiredvnodes_backup; 2030 #include <sys/vmmeter.h> 2031 2032 2033 #include <vm/vm_page.h> 2034 #include <vm/vm_object.h> 2035 #include <vm/vm_kern.h> 2036 #include <vm/vm_map.h> 2037 #endif 2038 2039 static void 2040 zfs_vnodes_adjust(void) 2041 { 2042 #ifdef __i386__ 2043 int newdesiredvnodes; 2044 2045 desiredvnodes_backup = desiredvnodes; 2046 2047 /* 2048 * We calculate newdesiredvnodes the same way it is done in 2049 * vntblinit(). If it is equal to desiredvnodes, it means that 2050 * it wasn't tuned by the administrator and we can tune it down. 2051 */ 2052 newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 * 2053 vm_kmem_size / (5 * (sizeof (struct vm_object) + 2054 sizeof (struct vnode)))); 2055 if (newdesiredvnodes == desiredvnodes) 2056 desiredvnodes = (3 * newdesiredvnodes) / 4; 2057 #endif 2058 } 2059 2060 static void 2061 zfs_vnodes_adjust_back(void) 2062 { 2063 2064 #ifdef __i386__ 2065 desiredvnodes = desiredvnodes_backup; 2066 #endif 2067 } 2068 2069 void 2070 zfs_init(void) 2071 { 2072 2073 printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n"); 2074 2075 /* 2076 * Initialize .zfs directory structures 2077 */ 2078 zfsctl_init(); 2079 2080 /* 2081 * Initialize znode cache, vnode ops, etc... 2082 */ 2083 zfs_znode_init(); 2084 2085 /* 2086 * Reduce number of vnodes. Originally number of vnodes is calculated 2087 * with UFS inode in mind. We reduce it here, because it's too big for 2088 * ZFS/i386. 2089 */ 2090 zfs_vnodes_adjust(); 2091 2092 dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info); 2093 2094 zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); 2095 } 2096 2097 void 2098 zfs_fini(void) 2099 { 2100 taskq_destroy(zfsvfs_taskq); 2101 zfsctl_fini(); 2102 zfs_znode_fini(); 2103 zfs_vnodes_adjust_back(); 2104 } 2105 2106 int 2107 zfs_busy(void) 2108 { 2109 return (zfs_active_fs_count != 0); 2110 } 2111 2112 /* 2113 * Release VOPs and unmount a suspended filesystem. 2114 */ 2115 int 2116 zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 2117 { 2118 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 2119 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 2120 2121 /* 2122 * We already own this, so just hold and rele it to update the 2123 * objset_t, as the one we had before may have been evicted. 2124 */ 2125 objset_t *os; 2126 VERIFY3P(ds->ds_owner, ==, zfsvfs); 2127 VERIFY(dsl_dataset_long_held(ds)); 2128 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 2129 dsl_pool_config_enter(dp, FTAG); 2130 VERIFY0(dmu_objset_from_ds(ds, &os)); 2131 dsl_pool_config_exit(dp, FTAG); 2132 zfsvfs->z_os = os; 2133 2134 /* release the VOPs */ 2135 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 2136 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 2137 2138 /* 2139 * Try to force unmount this file system. 2140 */ 2141 (void) zfs_umount(zfsvfs->z_vfs, 0); 2142 zfsvfs->z_unmounted = B_TRUE; 2143 return (0); 2144 } 2145 2146 int 2147 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 2148 { 2149 int error; 2150 objset_t *os = zfsvfs->z_os; 2151 dmu_tx_t *tx; 2152 2153 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 2154 return (SET_ERROR(EINVAL)); 2155 2156 if (newvers < zfsvfs->z_version) 2157 return (SET_ERROR(EINVAL)); 2158 2159 if (zfs_spa_version_map(newvers) > 2160 spa_version(dmu_objset_spa(zfsvfs->z_os))) 2161 return (SET_ERROR(ENOTSUP)); 2162 2163 tx = dmu_tx_create(os); 2164 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 2165 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2166 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 2167 ZFS_SA_ATTRS); 2168 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2169 } 2170 error = dmu_tx_assign(tx, TXG_WAIT); 2171 if (error) { 2172 dmu_tx_abort(tx); 2173 return (error); 2174 } 2175 2176 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 2177 8, 1, &newvers, tx); 2178 2179 if (error) { 2180 dmu_tx_commit(tx); 2181 return (error); 2182 } 2183 2184 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2185 uint64_t sa_obj; 2186 2187 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, 2188 SPA_VERSION_SA); 2189 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 2190 DMU_OT_NONE, 0, tx); 2191 2192 error = zap_add(os, MASTER_NODE_OBJ, 2193 ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 2194 ASSERT0(error); 2195 2196 VERIFY0(sa_set_sa_object(os, sa_obj)); 2197 sa_register_update_callback(os, zfs_sa_upgrade); 2198 } 2199 2200 spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, 2201 "from %ju to %ju", (uintmax_t)zfsvfs->z_version, 2202 (uintmax_t)newvers); 2203 dmu_tx_commit(tx); 2204 2205 zfsvfs->z_version = newvers; 2206 os->os_version = newvers; 2207 2208 zfs_set_fuid_feature(zfsvfs); 2209 2210 return (0); 2211 } 2212 2213 /* 2214 * Read a property stored within the master node. 2215 */ 2216 int 2217 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 2218 { 2219 uint64_t *cached_copy = NULL; 2220 2221 /* 2222 * Figure out where in the objset_t the cached copy would live, if it 2223 * is available for the requested property. 2224 */ 2225 if (os != NULL) { 2226 switch (prop) { 2227 case ZFS_PROP_VERSION: 2228 cached_copy = &os->os_version; 2229 break; 2230 case ZFS_PROP_NORMALIZE: 2231 cached_copy = &os->os_normalization; 2232 break; 2233 case ZFS_PROP_UTF8ONLY: 2234 cached_copy = &os->os_utf8only; 2235 break; 2236 case ZFS_PROP_CASE: 2237 cached_copy = &os->os_casesensitivity; 2238 break; 2239 default: 2240 break; 2241 } 2242 } 2243 if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { 2244 *value = *cached_copy; 2245 return (0); 2246 } 2247 2248 /* 2249 * If the property wasn't cached, look up the file system's value for 2250 * the property. For the version property, we look up a slightly 2251 * different string. 2252 */ 2253 const char *pname; 2254 int error = ENOENT; 2255 if (prop == ZFS_PROP_VERSION) { 2256 pname = ZPL_VERSION_STR; 2257 } else { 2258 pname = zfs_prop_to_name(prop); 2259 } 2260 2261 if (os != NULL) { 2262 ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); 2263 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 2264 } 2265 2266 if (error == ENOENT) { 2267 /* No value set, use the default value */ 2268 switch (prop) { 2269 case ZFS_PROP_VERSION: 2270 *value = ZPL_VERSION; 2271 break; 2272 case ZFS_PROP_NORMALIZE: 2273 case ZFS_PROP_UTF8ONLY: 2274 *value = 0; 2275 break; 2276 case ZFS_PROP_CASE: 2277 *value = ZFS_CASE_SENSITIVE; 2278 break; 2279 case ZFS_PROP_ACLTYPE: 2280 *value = ZFS_ACLTYPE_NFSV4; 2281 break; 2282 default: 2283 return (error); 2284 } 2285 error = 0; 2286 } 2287 2288 /* 2289 * If one of the methods for getting the property value above worked, 2290 * copy it into the objset_t's cache. 2291 */ 2292 if (error == 0 && cached_copy != NULL) { 2293 *cached_copy = *value; 2294 } 2295 2296 return (error); 2297 } 2298 2299 /* 2300 * Return true if the corresponding vfs's unmounted flag is set. 2301 * Otherwise return false. 2302 * If this function returns true we know VFS unmount has been initiated. 2303 */ 2304 boolean_t 2305 zfs_get_vfs_flag_unmounted(objset_t *os) 2306 { 2307 zfsvfs_t *zfvp; 2308 boolean_t unmounted = B_FALSE; 2309 2310 ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS); 2311 2312 mutex_enter(&os->os_user_ptr_lock); 2313 zfvp = dmu_objset_get_user(os); 2314 if (zfvp != NULL && zfvp->z_vfs != NULL && 2315 (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT)) 2316 unmounted = B_TRUE; 2317 mutex_exit(&os->os_user_ptr_lock); 2318 2319 return (unmounted); 2320 } 2321 2322 #ifdef _KERNEL 2323 void 2324 zfsvfs_update_fromname(const char *oldname, const char *newname) 2325 { 2326 char tmpbuf[MAXPATHLEN]; 2327 struct mount *mp; 2328 char *fromname; 2329 size_t oldlen; 2330 2331 oldlen = strlen(oldname); 2332 2333 mtx_lock(&mountlist_mtx); 2334 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 2335 fromname = mp->mnt_stat.f_mntfromname; 2336 if (strcmp(fromname, oldname) == 0) { 2337 (void) strlcpy(fromname, newname, 2338 sizeof (mp->mnt_stat.f_mntfromname)); 2339 continue; 2340 } 2341 if (strncmp(fromname, oldname, oldlen) == 0 && 2342 (fromname[oldlen] == '/' || fromname[oldlen] == '@')) { 2343 (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s", 2344 newname, fromname + oldlen); 2345 (void) strlcpy(fromname, tmpbuf, 2346 sizeof (mp->mnt_stat.f_mntfromname)); 2347 continue; 2348 } 2349 } 2350 mtx_unlock(&mountlist_mtx); 2351 } 2352 #endif 2353 2354 /* 2355 * Find a prison with ZFS info. 2356 * Return the ZFS info and the (locked) prison. 2357 */ 2358 static struct zfs_jailparam * 2359 zfs_jailparam_find(struct prison *spr, struct prison **prp) 2360 { 2361 struct prison *pr; 2362 struct zfs_jailparam *zjp; 2363 2364 for (pr = spr; ; pr = pr->pr_parent) { 2365 mtx_lock(&pr->pr_mtx); 2366 if (pr == &prison0) { 2367 zjp = &zfs_jailparam0; 2368 break; 2369 } 2370 zjp = osd_jail_get(pr, zfs_jailparam_slot); 2371 if (zjp != NULL) 2372 break; 2373 mtx_unlock(&pr->pr_mtx); 2374 } 2375 *prp = pr; 2376 2377 return (zjp); 2378 } 2379 2380 /* 2381 * Ensure a prison has its own ZFS info. If zjpp is non-null, point it to the 2382 * ZFS info and lock the prison. 2383 */ 2384 static void 2385 zfs_jailparam_alloc(struct prison *pr, struct zfs_jailparam **zjpp) 2386 { 2387 struct prison *ppr; 2388 struct zfs_jailparam *zjp, *nzjp; 2389 void **rsv; 2390 2391 /* If this prison already has ZFS info, return that. */ 2392 zjp = zfs_jailparam_find(pr, &ppr); 2393 if (ppr == pr) 2394 goto done; 2395 2396 /* 2397 * Allocate a new info record. Then check again, in case something 2398 * changed during the allocation. 2399 */ 2400 mtx_unlock(&ppr->pr_mtx); 2401 nzjp = malloc(sizeof (struct zfs_jailparam), M_PRISON, M_WAITOK); 2402 rsv = osd_reserve(zfs_jailparam_slot); 2403 zjp = zfs_jailparam_find(pr, &ppr); 2404 if (ppr == pr) { 2405 free(nzjp, M_PRISON); 2406 osd_free_reserved(rsv); 2407 goto done; 2408 } 2409 /* Inherit the initial values from the ancestor. */ 2410 mtx_lock(&pr->pr_mtx); 2411 (void) osd_jail_set_reserved(pr, zfs_jailparam_slot, rsv, nzjp); 2412 (void) memcpy(nzjp, zjp, sizeof (*zjp)); 2413 zjp = nzjp; 2414 mtx_unlock(&ppr->pr_mtx); 2415 done: 2416 if (zjpp != NULL) 2417 *zjpp = zjp; 2418 else 2419 mtx_unlock(&pr->pr_mtx); 2420 } 2421 2422 /* 2423 * Jail OSD methods for ZFS VFS info. 2424 */ 2425 static int 2426 zfs_jailparam_create(void *obj, void *data) 2427 { 2428 struct prison *pr = obj; 2429 struct vfsoptlist *opts = data; 2430 int jsys; 2431 2432 if (vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)) == 0 && 2433 jsys == JAIL_SYS_INHERIT) 2434 return (0); 2435 /* 2436 * Inherit a prison's initial values from its parent 2437 * (different from JAIL_SYS_INHERIT which also inherits changes). 2438 */ 2439 zfs_jailparam_alloc(pr, NULL); 2440 return (0); 2441 } 2442 2443 static int 2444 zfs_jailparam_get(void *obj, void *data) 2445 { 2446 struct prison *ppr, *pr = obj; 2447 struct vfsoptlist *opts = data; 2448 struct zfs_jailparam *zjp; 2449 int jsys, error; 2450 2451 zjp = zfs_jailparam_find(pr, &ppr); 2452 jsys = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 2453 error = vfs_setopt(opts, "zfs", &jsys, sizeof (jsys)); 2454 if (error != 0 && error != ENOENT) 2455 goto done; 2456 if (jsys == JAIL_SYS_NEW) { 2457 error = vfs_setopt(opts, "zfs.mount_snapshot", 2458 &zjp->mount_snapshot, sizeof (zjp->mount_snapshot)); 2459 if (error != 0 && error != ENOENT) 2460 goto done; 2461 } else { 2462 /* 2463 * If this prison is inheriting its ZFS info, report 2464 * empty/zero parameters. 2465 */ 2466 static int mount_snapshot = 0; 2467 2468 error = vfs_setopt(opts, "zfs.mount_snapshot", 2469 &mount_snapshot, sizeof (mount_snapshot)); 2470 if (error != 0 && error != ENOENT) 2471 goto done; 2472 } 2473 error = 0; 2474 done: 2475 mtx_unlock(&ppr->pr_mtx); 2476 return (error); 2477 } 2478 2479 static int 2480 zfs_jailparam_set(void *obj, void *data) 2481 { 2482 struct prison *pr = obj; 2483 struct prison *ppr; 2484 struct vfsoptlist *opts = data; 2485 int error, jsys, mount_snapshot; 2486 2487 /* Set the parameters, which should be correct. */ 2488 error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)); 2489 if (error == ENOENT) 2490 jsys = -1; 2491 error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot, 2492 sizeof (mount_snapshot)); 2493 if (error == ENOENT) 2494 mount_snapshot = -1; 2495 else 2496 jsys = JAIL_SYS_NEW; 2497 if (jsys == JAIL_SYS_NEW) { 2498 /* "zfs=new" or "zfs.*": the prison gets its own ZFS info. */ 2499 struct zfs_jailparam *zjp; 2500 2501 /* 2502 * A child jail cannot have more permissions than its parent 2503 */ 2504 if (pr->pr_parent != &prison0) { 2505 zjp = zfs_jailparam_find(pr->pr_parent, &ppr); 2506 mtx_unlock(&ppr->pr_mtx); 2507 if (zjp->mount_snapshot < mount_snapshot) { 2508 return (EPERM); 2509 } 2510 } 2511 zfs_jailparam_alloc(pr, &zjp); 2512 if (mount_snapshot != -1) 2513 zjp->mount_snapshot = mount_snapshot; 2514 mtx_unlock(&pr->pr_mtx); 2515 } else { 2516 /* "zfs=inherit": inherit the parent's ZFS info. */ 2517 mtx_lock(&pr->pr_mtx); 2518 osd_jail_del(pr, zfs_jailparam_slot); 2519 mtx_unlock(&pr->pr_mtx); 2520 } 2521 return (0); 2522 } 2523 2524 static int 2525 zfs_jailparam_check(void *obj __unused, void *data) 2526 { 2527 struct vfsoptlist *opts = data; 2528 int error, jsys, mount_snapshot; 2529 2530 /* Check that the parameters are correct. */ 2531 error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)); 2532 if (error != ENOENT) { 2533 if (error != 0) 2534 return (error); 2535 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 2536 return (EINVAL); 2537 } 2538 error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot, 2539 sizeof (mount_snapshot)); 2540 if (error != ENOENT) { 2541 if (error != 0) 2542 return (error); 2543 if (mount_snapshot != 0 && mount_snapshot != 1) 2544 return (EINVAL); 2545 } 2546 return (0); 2547 } 2548 2549 static void 2550 zfs_jailparam_destroy(void *data) 2551 { 2552 2553 free(data, M_PRISON); 2554 } 2555 2556 static void 2557 zfs_jailparam_sysinit(void *arg __unused) 2558 { 2559 struct prison *pr; 2560 osd_method_t methods[PR_MAXMETHOD] = { 2561 [PR_METHOD_CREATE] = zfs_jailparam_create, 2562 [PR_METHOD_GET] = zfs_jailparam_get, 2563 [PR_METHOD_SET] = zfs_jailparam_set, 2564 [PR_METHOD_CHECK] = zfs_jailparam_check, 2565 }; 2566 2567 zfs_jailparam_slot = osd_jail_register(zfs_jailparam_destroy, methods); 2568 /* Copy the defaults to any existing prisons. */ 2569 sx_slock(&allprison_lock); 2570 TAILQ_FOREACH(pr, &allprison, pr_list) 2571 zfs_jailparam_alloc(pr, NULL); 2572 sx_sunlock(&allprison_lock); 2573 } 2574 2575 static void 2576 zfs_jailparam_sysuninit(void *arg __unused) 2577 { 2578 2579 osd_jail_deregister(zfs_jailparam_slot); 2580 } 2581 2582 SYSINIT(zfs_jailparam_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY, 2583 zfs_jailparam_sysinit, NULL); 2584 SYSUNINIT(zfs_jailparam_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY, 2585 zfs_jailparam_sysuninit, NULL); 2586