1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 24 * All rights reserved. 25 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 26 * Copyright (c) 2014 Integros [integros.com] 27 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 28 */ 29 30 /* Portions Copyright 2010 Robert Milkowski */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/sysmacros.h> 37 #include <sys/kmem.h> 38 #include <sys/acl.h> 39 #include <sys/vnode.h> 40 #include <sys/vfs.h> 41 #include <sys/mntent.h> 42 #include <sys/mount.h> 43 #include <sys/cmn_err.h> 44 #include <sys/zfs_znode.h> 45 #include <sys/zfs_vnops.h> 46 #include <sys/zfs_dir.h> 47 #include <sys/zil.h> 48 #include <sys/fs/zfs.h> 49 #include <sys/dmu.h> 50 #include <sys/dsl_prop.h> 51 #include <sys/dsl_dataset.h> 52 #include <sys/dsl_deleg.h> 53 #include <sys/spa.h> 54 #include <sys/zap.h> 55 #include <sys/sa.h> 56 #include <sys/sa_impl.h> 57 #include <sys/policy.h> 58 #include <sys/atomic.h> 59 #include <sys/zfs_ioctl.h> 60 #include <sys/zfs_ctldir.h> 61 #include <sys/zfs_fuid.h> 62 #include <sys/sunddi.h> 63 #include <sys/dmu_objset.h> 64 #include <sys/dsl_dir.h> 65 #include <sys/jail.h> 66 #include <sys/osd.h> 67 #include <ufs/ufs/quota.h> 68 #include <sys/zfs_quota.h> 69 70 #include "zfs_comutil.h" 71 72 #ifndef MNTK_VMSETSIZE_BUG 73 #define MNTK_VMSETSIZE_BUG 0 74 #endif 75 #ifndef MNTK_NOMSYNC 76 #define MNTK_NOMSYNC 8 77 #endif 78 79 struct mtx zfs_debug_mtx; 80 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 81 82 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 83 84 int zfs_super_owner; 85 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 86 "File system owners can perform privileged operation on file systems"); 87 88 int zfs_debug_level; 89 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0, 90 "Debug level"); 91 92 struct zfs_jailparam { 93 int mount_snapshot; 94 }; 95 96 static struct zfs_jailparam zfs_jailparam0 = { 97 .mount_snapshot = 0, 98 }; 99 100 static int zfs_jailparam_slot; 101 102 SYSCTL_JAIL_PARAM_SYS_NODE(zfs, CTLFLAG_RW, "Jail ZFS parameters"); 103 SYSCTL_JAIL_PARAM(_zfs, mount_snapshot, CTLTYPE_INT | CTLFLAG_RW, "I", 104 "Allow mounting snapshots in the .zfs directory for unjailed datasets"); 105 106 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 107 static int zfs_version_acl = ZFS_ACL_VERSION; 108 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 109 "ZFS_ACL_VERSION"); 110 static int zfs_version_spa = SPA_VERSION; 111 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 112 "SPA_VERSION"); 113 static int zfs_version_zpl = ZPL_VERSION; 114 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 115 "ZPL_VERSION"); 116 117 #if __FreeBSD_version >= 1400018 118 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, 119 bool *mp_busy); 120 #else 121 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg); 122 #endif 123 static int zfs_mount(vfs_t *vfsp); 124 static int zfs_umount(vfs_t *vfsp, int fflag); 125 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 126 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 127 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 128 static int zfs_sync(vfs_t *vfsp, int waitfor); 129 #if __FreeBSD_version >= 1300098 130 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 131 struct ucred **credanonp, int *numsecflavors, int *secflavors); 132 #else 133 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 134 struct ucred **credanonp, int *numsecflavors, int **secflavors); 135 #endif 136 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp); 137 static void zfs_freevfs(vfs_t *vfsp); 138 139 struct vfsops zfs_vfsops = { 140 .vfs_mount = zfs_mount, 141 .vfs_unmount = zfs_umount, 142 #if __FreeBSD_version >= 1300049 143 .vfs_root = vfs_cache_root, 144 .vfs_cachedroot = zfs_root, 145 #else 146 .vfs_root = zfs_root, 147 #endif 148 .vfs_statfs = zfs_statfs, 149 .vfs_vget = zfs_vget, 150 .vfs_sync = zfs_sync, 151 .vfs_checkexp = zfs_checkexp, 152 .vfs_fhtovp = zfs_fhtovp, 153 .vfs_quotactl = zfs_quotactl, 154 }; 155 156 #ifdef VFCF_CROSS_COPY_FILE_RANGE 157 VFS_SET(zfs_vfsops, zfs, 158 VFCF_DELEGADMIN | VFCF_JAIL | VFCF_CROSS_COPY_FILE_RANGE); 159 #else 160 VFS_SET(zfs_vfsops, zfs, VFCF_DELEGADMIN | VFCF_JAIL); 161 #endif 162 163 /* 164 * We need to keep a count of active fs's. 165 * This is necessary to prevent our module 166 * from being unloaded after a umount -f 167 */ 168 static uint32_t zfs_active_fs_count = 0; 169 170 int 171 zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val, 172 char *setpoint) 173 { 174 int error; 175 zfsvfs_t *zfvp; 176 vfs_t *vfsp; 177 objset_t *os; 178 uint64_t tmp = *val; 179 180 error = dmu_objset_from_ds(ds, &os); 181 if (error != 0) 182 return (error); 183 184 error = getzfsvfs_impl(os, &zfvp); 185 if (error != 0) 186 return (error); 187 if (zfvp == NULL) 188 return (ENOENT); 189 vfsp = zfvp->z_vfs; 190 switch (zfs_prop) { 191 case ZFS_PROP_ATIME: 192 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 193 tmp = 0; 194 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 195 tmp = 1; 196 break; 197 case ZFS_PROP_DEVICES: 198 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 199 tmp = 0; 200 if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) 201 tmp = 1; 202 break; 203 case ZFS_PROP_EXEC: 204 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 205 tmp = 0; 206 if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 207 tmp = 1; 208 break; 209 case ZFS_PROP_SETUID: 210 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 211 tmp = 0; 212 if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 213 tmp = 1; 214 break; 215 case ZFS_PROP_READONLY: 216 if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 217 tmp = 0; 218 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 219 tmp = 1; 220 break; 221 case ZFS_PROP_XATTR: 222 if (zfvp->z_flags & ZSB_XATTR) 223 tmp = zfvp->z_xattr; 224 break; 225 case ZFS_PROP_NBMAND: 226 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 227 tmp = 0; 228 if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) 229 tmp = 1; 230 break; 231 default: 232 vfs_unbusy(vfsp); 233 return (ENOENT); 234 } 235 236 vfs_unbusy(vfsp); 237 if (tmp != *val) { 238 if (setpoint) 239 (void) strcpy(setpoint, "temporary"); 240 *val = tmp; 241 } 242 return (0); 243 } 244 245 static int 246 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp) 247 { 248 int error = 0; 249 char buf[32]; 250 uint64_t usedobj, quotaobj; 251 uint64_t quota, used = 0; 252 timespec_t now; 253 254 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 255 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 256 257 if (quotaobj == 0 || zfsvfs->z_replay) { 258 error = ENOENT; 259 goto done; 260 } 261 (void) sprintf(buf, "%llx", (longlong_t)id); 262 if ((error = zap_lookup(zfsvfs->z_os, quotaobj, 263 buf, sizeof (quota), 1, "a)) != 0) { 264 dprintf("%s(%d): quotaobj lookup failed\n", 265 __FUNCTION__, __LINE__); 266 goto done; 267 } 268 /* 269 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit". 270 * So we set them to be the same. 271 */ 272 dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota); 273 error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used); 274 if (error && error != ENOENT) { 275 dprintf("%s(%d): usedobj failed; %d\n", 276 __FUNCTION__, __LINE__, error); 277 goto done; 278 } 279 dqp->dqb_curblocks = btodb(used); 280 dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0; 281 vfs_timestamp(&now); 282 /* 283 * Setting this to 0 causes FreeBSD quota(8) to print 284 * the number of days since the epoch, which isn't 285 * particularly useful. 286 */ 287 dqp->dqb_btime = dqp->dqb_itime = now.tv_sec; 288 done: 289 return (error); 290 } 291 292 static int 293 #if __FreeBSD_version >= 1400018 294 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, bool *mp_busy) 295 #else 296 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg) 297 #endif 298 { 299 zfsvfs_t *zfsvfs = vfsp->vfs_data; 300 struct thread *td; 301 int cmd, type, error = 0; 302 int bitsize; 303 zfs_userquota_prop_t quota_type; 304 struct dqblk64 dqblk = { 0 }; 305 306 td = curthread; 307 cmd = cmds >> SUBCMDSHIFT; 308 type = cmds & SUBCMDMASK; 309 310 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 311 return (error); 312 if (id == -1) { 313 switch (type) { 314 case USRQUOTA: 315 id = td->td_ucred->cr_ruid; 316 break; 317 case GRPQUOTA: 318 id = td->td_ucred->cr_rgid; 319 break; 320 default: 321 error = EINVAL; 322 #if __FreeBSD_version < 1400018 323 if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF) 324 vfs_unbusy(vfsp); 325 #endif 326 goto done; 327 } 328 } 329 /* 330 * Map BSD type to: 331 * ZFS_PROP_USERUSED, 332 * ZFS_PROP_USERQUOTA, 333 * ZFS_PROP_GROUPUSED, 334 * ZFS_PROP_GROUPQUOTA 335 */ 336 switch (cmd) { 337 case Q_SETQUOTA: 338 case Q_SETQUOTA32: 339 if (type == USRQUOTA) 340 quota_type = ZFS_PROP_USERQUOTA; 341 else if (type == GRPQUOTA) 342 quota_type = ZFS_PROP_GROUPQUOTA; 343 else 344 error = EINVAL; 345 break; 346 case Q_GETQUOTA: 347 case Q_GETQUOTA32: 348 if (type == USRQUOTA) 349 quota_type = ZFS_PROP_USERUSED; 350 else if (type == GRPQUOTA) 351 quota_type = ZFS_PROP_GROUPUSED; 352 else 353 error = EINVAL; 354 break; 355 } 356 357 /* 358 * Depending on the cmd, we may need to get 359 * the ruid and domain (see fuidstr_to_sid?), 360 * the fuid (how?), or other information. 361 * Create fuid using zfs_fuid_create(zfsvfs, id, 362 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)? 363 * I think I can use just the id? 364 * 365 * Look at zfs_id_overquota() to look up a quota. 366 * zap_lookup(something, quotaobj, fuidstring, 367 * sizeof (long long), 1, "a) 368 * 369 * See zfs_set_userquota() to set a quota. 370 */ 371 if ((uint32_t)type >= MAXQUOTAS) { 372 error = EINVAL; 373 goto done; 374 } 375 376 switch (cmd) { 377 case Q_GETQUOTASIZE: 378 bitsize = 64; 379 error = copyout(&bitsize, arg, sizeof (int)); 380 break; 381 case Q_QUOTAON: 382 // As far as I can tell, you can't turn quotas on or off on zfs 383 error = 0; 384 #if __FreeBSD_version < 1400018 385 vfs_unbusy(vfsp); 386 #endif 387 break; 388 case Q_QUOTAOFF: 389 error = ENOTSUP; 390 #if __FreeBSD_version < 1400018 391 vfs_unbusy(vfsp); 392 #endif 393 break; 394 case Q_SETQUOTA: 395 error = copyin(arg, &dqblk, sizeof (dqblk)); 396 if (error == 0) 397 error = zfs_set_userquota(zfsvfs, quota_type, 398 "", id, dbtob(dqblk.dqb_bhardlimit)); 399 break; 400 case Q_GETQUOTA: 401 error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk); 402 if (error == 0) 403 error = copyout(&dqblk, arg, sizeof (dqblk)); 404 break; 405 default: 406 error = EINVAL; 407 break; 408 } 409 done: 410 zfs_exit(zfsvfs, FTAG); 411 return (error); 412 } 413 414 415 boolean_t 416 zfs_is_readonly(zfsvfs_t *zfsvfs) 417 { 418 return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)); 419 } 420 421 static int 422 zfs_sync(vfs_t *vfsp, int waitfor) 423 { 424 425 /* 426 * Data integrity is job one. We don't want a compromised kernel 427 * writing to the storage pool, so we never sync during panic. 428 */ 429 if (panicstr) 430 return (0); 431 432 /* 433 * Ignore the system syncher. ZFS already commits async data 434 * at zfs_txg_timeout intervals. 435 */ 436 if (waitfor == MNT_LAZY) 437 return (0); 438 439 if (vfsp != NULL) { 440 /* 441 * Sync a specific filesystem. 442 */ 443 zfsvfs_t *zfsvfs = vfsp->vfs_data; 444 dsl_pool_t *dp; 445 int error; 446 447 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 448 return (error); 449 dp = dmu_objset_pool(zfsvfs->z_os); 450 451 /* 452 * If the system is shutting down, then skip any 453 * filesystems which may exist on a suspended pool. 454 */ 455 if (rebooting && spa_suspended(dp->dp_spa)) { 456 zfs_exit(zfsvfs, FTAG); 457 return (0); 458 } 459 460 if (zfsvfs->z_log != NULL) 461 zil_commit(zfsvfs->z_log, 0); 462 463 zfs_exit(zfsvfs, FTAG); 464 } else { 465 /* 466 * Sync all ZFS filesystems. This is what happens when you 467 * run sync(8). Unlike other filesystems, ZFS honors the 468 * request by waiting for all pools to commit all dirty data. 469 */ 470 spa_sync_allpools(); 471 } 472 473 return (0); 474 } 475 476 static void 477 atime_changed_cb(void *arg, uint64_t newval) 478 { 479 zfsvfs_t *zfsvfs = arg; 480 481 if (newval == TRUE) { 482 zfsvfs->z_atime = TRUE; 483 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 484 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 485 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 486 } else { 487 zfsvfs->z_atime = FALSE; 488 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 489 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 490 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 491 } 492 } 493 494 static void 495 xattr_changed_cb(void *arg, uint64_t newval) 496 { 497 zfsvfs_t *zfsvfs = arg; 498 499 if (newval == ZFS_XATTR_OFF) { 500 zfsvfs->z_flags &= ~ZSB_XATTR; 501 } else { 502 zfsvfs->z_flags |= ZSB_XATTR; 503 504 if (newval == ZFS_XATTR_SA) 505 zfsvfs->z_xattr_sa = B_TRUE; 506 else 507 zfsvfs->z_xattr_sa = B_FALSE; 508 } 509 } 510 511 static void 512 blksz_changed_cb(void *arg, uint64_t newval) 513 { 514 zfsvfs_t *zfsvfs = arg; 515 ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os))); 516 ASSERT3U(newval, >=, SPA_MINBLOCKSIZE); 517 ASSERT(ISP2(newval)); 518 519 zfsvfs->z_max_blksz = newval; 520 zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 521 } 522 523 static void 524 readonly_changed_cb(void *arg, uint64_t newval) 525 { 526 zfsvfs_t *zfsvfs = arg; 527 528 if (newval) { 529 /* XXX locking on vfs_flag? */ 530 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 531 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 532 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 533 } else { 534 /* XXX locking on vfs_flag? */ 535 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 536 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 537 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 538 } 539 } 540 541 static void 542 setuid_changed_cb(void *arg, uint64_t newval) 543 { 544 zfsvfs_t *zfsvfs = arg; 545 546 if (newval == FALSE) { 547 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 548 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 549 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 550 } else { 551 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 552 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 553 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 554 } 555 } 556 557 static void 558 exec_changed_cb(void *arg, uint64_t newval) 559 { 560 zfsvfs_t *zfsvfs = arg; 561 562 if (newval == FALSE) { 563 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 564 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 565 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 566 } else { 567 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 568 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 569 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 570 } 571 } 572 573 /* 574 * The nbmand mount option can be changed at mount time. 575 * We can't allow it to be toggled on live file systems or incorrect 576 * behavior may be seen from cifs clients 577 * 578 * This property isn't registered via dsl_prop_register(), but this callback 579 * will be called when a file system is first mounted 580 */ 581 static void 582 nbmand_changed_cb(void *arg, uint64_t newval) 583 { 584 zfsvfs_t *zfsvfs = arg; 585 if (newval == FALSE) { 586 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 587 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 588 } else { 589 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 590 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 591 } 592 } 593 594 static void 595 snapdir_changed_cb(void *arg, uint64_t newval) 596 { 597 zfsvfs_t *zfsvfs = arg; 598 599 zfsvfs->z_show_ctldir = newval; 600 } 601 602 static void 603 acl_mode_changed_cb(void *arg, uint64_t newval) 604 { 605 zfsvfs_t *zfsvfs = arg; 606 607 zfsvfs->z_acl_mode = newval; 608 } 609 610 static void 611 acl_inherit_changed_cb(void *arg, uint64_t newval) 612 { 613 zfsvfs_t *zfsvfs = arg; 614 615 zfsvfs->z_acl_inherit = newval; 616 } 617 618 static void 619 acl_type_changed_cb(void *arg, uint64_t newval) 620 { 621 zfsvfs_t *zfsvfs = arg; 622 623 zfsvfs->z_acl_type = newval; 624 } 625 626 static int 627 zfs_register_callbacks(vfs_t *vfsp) 628 { 629 struct dsl_dataset *ds = NULL; 630 objset_t *os = NULL; 631 zfsvfs_t *zfsvfs = NULL; 632 uint64_t nbmand; 633 boolean_t readonly = B_FALSE; 634 boolean_t do_readonly = B_FALSE; 635 boolean_t setuid = B_FALSE; 636 boolean_t do_setuid = B_FALSE; 637 boolean_t exec = B_FALSE; 638 boolean_t do_exec = B_FALSE; 639 boolean_t xattr = B_FALSE; 640 boolean_t atime = B_FALSE; 641 boolean_t do_atime = B_FALSE; 642 boolean_t do_xattr = B_FALSE; 643 int error = 0; 644 645 ASSERT3P(vfsp, !=, NULL); 646 zfsvfs = vfsp->vfs_data; 647 ASSERT3P(zfsvfs, !=, NULL); 648 os = zfsvfs->z_os; 649 650 /* 651 * This function can be called for a snapshot when we update snapshot's 652 * mount point, which isn't really supported. 653 */ 654 if (dmu_objset_is_snapshot(os)) 655 return (EOPNOTSUPP); 656 657 /* 658 * The act of registering our callbacks will destroy any mount 659 * options we may have. In order to enable temporary overrides 660 * of mount options, we stash away the current values and 661 * restore them after we register the callbacks. 662 */ 663 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) || 664 !spa_writeable(dmu_objset_spa(os))) { 665 readonly = B_TRUE; 666 do_readonly = B_TRUE; 667 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 668 readonly = B_FALSE; 669 do_readonly = B_TRUE; 670 } 671 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 672 setuid = B_FALSE; 673 do_setuid = B_TRUE; 674 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 675 setuid = B_TRUE; 676 do_setuid = B_TRUE; 677 } 678 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 679 exec = B_FALSE; 680 do_exec = B_TRUE; 681 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 682 exec = B_TRUE; 683 do_exec = B_TRUE; 684 } 685 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 686 zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF; 687 do_xattr = B_TRUE; 688 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 689 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 690 do_xattr = B_TRUE; 691 } else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) { 692 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 693 do_xattr = B_TRUE; 694 } else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) { 695 zfsvfs->z_xattr = xattr = ZFS_XATTR_SA; 696 do_xattr = B_TRUE; 697 } 698 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 699 atime = B_FALSE; 700 do_atime = B_TRUE; 701 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 702 atime = B_TRUE; 703 do_atime = B_TRUE; 704 } 705 706 /* 707 * We need to enter pool configuration here, so that we can use 708 * dsl_prop_get_int_ds() to handle the special nbmand property below. 709 * dsl_prop_get_integer() can not be used, because it has to acquire 710 * spa_namespace_lock and we can not do that because we already hold 711 * z_teardown_lock. The problem is that spa_write_cachefile() is called 712 * with spa_namespace_lock held and the function calls ZFS vnode 713 * operations to write the cache file and thus z_teardown_lock is 714 * acquired after spa_namespace_lock. 715 */ 716 ds = dmu_objset_ds(os); 717 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 718 719 /* 720 * nbmand is a special property. It can only be changed at 721 * mount time. 722 * 723 * This is weird, but it is documented to only be changeable 724 * at mount time. 725 */ 726 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 727 nbmand = B_FALSE; 728 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 729 nbmand = B_TRUE; 730 } else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand)) != 0) { 731 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 732 return (error); 733 } 734 735 /* 736 * Register property callbacks. 737 * 738 * It would probably be fine to just check for i/o error from 739 * the first prop_register(), but I guess I like to go 740 * overboard... 741 */ 742 error = dsl_prop_register(ds, 743 zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs); 744 error = error ? error : dsl_prop_register(ds, 745 zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs); 746 error = error ? error : dsl_prop_register(ds, 747 zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs); 748 error = error ? error : dsl_prop_register(ds, 749 zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs); 750 error = error ? error : dsl_prop_register(ds, 751 zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs); 752 error = error ? error : dsl_prop_register(ds, 753 zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs); 754 error = error ? error : dsl_prop_register(ds, 755 zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs); 756 error = error ? error : dsl_prop_register(ds, 757 zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs); 758 error = error ? error : dsl_prop_register(ds, 759 zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs); 760 error = error ? error : dsl_prop_register(ds, 761 zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, 762 zfsvfs); 763 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 764 if (error) 765 goto unregister; 766 767 /* 768 * Invoke our callbacks to restore temporary mount options. 769 */ 770 if (do_readonly) 771 readonly_changed_cb(zfsvfs, readonly); 772 if (do_setuid) 773 setuid_changed_cb(zfsvfs, setuid); 774 if (do_exec) 775 exec_changed_cb(zfsvfs, exec); 776 if (do_xattr) 777 xattr_changed_cb(zfsvfs, xattr); 778 if (do_atime) 779 atime_changed_cb(zfsvfs, atime); 780 781 nbmand_changed_cb(zfsvfs, nbmand); 782 783 return (0); 784 785 unregister: 786 dsl_prop_unregister_all(ds, zfsvfs); 787 return (error); 788 } 789 790 /* 791 * Associate this zfsvfs with the given objset, which must be owned. 792 * This will cache a bunch of on-disk state from the objset in the 793 * zfsvfs. 794 */ 795 static int 796 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os) 797 { 798 int error; 799 uint64_t val; 800 801 zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE; 802 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 803 zfsvfs->z_os = os; 804 805 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 806 if (error != 0) 807 return (error); 808 if (zfsvfs->z_version > 809 zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { 810 (void) printf("Can't mount a version %lld file system " 811 "on a version %lld pool\n. Pool must be upgraded to mount " 812 "this file system.", (u_longlong_t)zfsvfs->z_version, 813 (u_longlong_t)spa_version(dmu_objset_spa(os))); 814 return (SET_ERROR(ENOTSUP)); 815 } 816 error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val); 817 if (error != 0) 818 return (error); 819 zfsvfs->z_norm = (int)val; 820 821 error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val); 822 if (error != 0) 823 return (error); 824 zfsvfs->z_utf8 = (val != 0); 825 826 error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val); 827 if (error != 0) 828 return (error); 829 zfsvfs->z_case = (uint_t)val; 830 831 error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val); 832 if (error != 0) 833 return (error); 834 zfsvfs->z_acl_type = (uint_t)val; 835 836 /* 837 * Fold case on file systems that are always or sometimes case 838 * insensitive. 839 */ 840 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 841 zfsvfs->z_case == ZFS_CASE_MIXED) 842 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 843 844 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 845 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 846 847 uint64_t sa_obj = 0; 848 if (zfsvfs->z_use_sa) { 849 /* should either have both of these objects or none */ 850 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, 851 &sa_obj); 852 if (error != 0) 853 return (error); 854 855 error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val); 856 if (error == 0 && val == ZFS_XATTR_SA) 857 zfsvfs->z_xattr_sa = B_TRUE; 858 } 859 860 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 861 &zfsvfs->z_attr_table); 862 if (error != 0) 863 return (error); 864 865 if (zfsvfs->z_version >= ZPL_VERSION_SA) 866 sa_register_update_callback(os, zfs_sa_upgrade); 867 868 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 869 &zfsvfs->z_root); 870 if (error != 0) 871 return (error); 872 ASSERT3U(zfsvfs->z_root, !=, 0); 873 874 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 875 &zfsvfs->z_unlinkedobj); 876 if (error != 0) 877 return (error); 878 879 error = zap_lookup(os, MASTER_NODE_OBJ, 880 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 881 8, 1, &zfsvfs->z_userquota_obj); 882 if (error == ENOENT) 883 zfsvfs->z_userquota_obj = 0; 884 else if (error != 0) 885 return (error); 886 887 error = zap_lookup(os, MASTER_NODE_OBJ, 888 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 889 8, 1, &zfsvfs->z_groupquota_obj); 890 if (error == ENOENT) 891 zfsvfs->z_groupquota_obj = 0; 892 else if (error != 0) 893 return (error); 894 895 error = zap_lookup(os, MASTER_NODE_OBJ, 896 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA], 897 8, 1, &zfsvfs->z_projectquota_obj); 898 if (error == ENOENT) 899 zfsvfs->z_projectquota_obj = 0; 900 else if (error != 0) 901 return (error); 902 903 error = zap_lookup(os, MASTER_NODE_OBJ, 904 zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA], 905 8, 1, &zfsvfs->z_userobjquota_obj); 906 if (error == ENOENT) 907 zfsvfs->z_userobjquota_obj = 0; 908 else if (error != 0) 909 return (error); 910 911 error = zap_lookup(os, MASTER_NODE_OBJ, 912 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA], 913 8, 1, &zfsvfs->z_groupobjquota_obj); 914 if (error == ENOENT) 915 zfsvfs->z_groupobjquota_obj = 0; 916 else if (error != 0) 917 return (error); 918 919 error = zap_lookup(os, MASTER_NODE_OBJ, 920 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA], 921 8, 1, &zfsvfs->z_projectobjquota_obj); 922 if (error == ENOENT) 923 zfsvfs->z_projectobjquota_obj = 0; 924 else if (error != 0) 925 return (error); 926 927 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 928 &zfsvfs->z_fuid_obj); 929 if (error == ENOENT) 930 zfsvfs->z_fuid_obj = 0; 931 else if (error != 0) 932 return (error); 933 934 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 935 &zfsvfs->z_shares_dir); 936 if (error == ENOENT) 937 zfsvfs->z_shares_dir = 0; 938 else if (error != 0) 939 return (error); 940 941 /* 942 * Only use the name cache if we are looking for a 943 * name on a file system that does not require normalization 944 * or case folding. We can also look there if we happen to be 945 * on a non-normalizing, mixed sensitivity file system IF we 946 * are looking for the exact name (which is always the case on 947 * FreeBSD). 948 */ 949 zfsvfs->z_use_namecache = !zfsvfs->z_norm || 950 ((zfsvfs->z_case == ZFS_CASE_MIXED) && 951 !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER)); 952 953 return (0); 954 } 955 956 taskq_t *zfsvfs_taskq; 957 958 static void 959 zfsvfs_task_unlinked_drain(void *context, int pending __unused) 960 { 961 962 zfs_unlinked_drain((zfsvfs_t *)context); 963 } 964 965 int 966 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp) 967 { 968 objset_t *os; 969 zfsvfs_t *zfsvfs; 970 int error; 971 boolean_t ro = (readonly || (strchr(osname, '@') != NULL)); 972 973 /* 974 * XXX: Fix struct statfs so this isn't necessary! 975 * 976 * The 'osname' is used as the filesystem's special node, which means 977 * it must fit in statfs.f_mntfromname, or else it can't be 978 * enumerated, so libzfs_mnttab_find() returns NULL, which causes 979 * 'zfs unmount' to think it's not mounted when it is. 980 */ 981 if (strlen(osname) >= MNAMELEN) 982 return (SET_ERROR(ENAMETOOLONG)); 983 984 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 985 986 error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs, 987 &os); 988 if (error != 0) { 989 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 990 return (error); 991 } 992 993 error = zfsvfs_create_impl(zfvp, zfsvfs, os); 994 995 return (error); 996 } 997 998 999 int 1000 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os) 1001 { 1002 int error; 1003 1004 zfsvfs->z_vfs = NULL; 1005 zfsvfs->z_parent = zfsvfs; 1006 1007 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 1008 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 1009 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 1010 offsetof(znode_t, z_link_node)); 1011 TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0, 1012 zfsvfs_task_unlinked_drain, zfsvfs); 1013 ZFS_TEARDOWN_INIT(zfsvfs); 1014 ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs); 1015 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 1016 for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1017 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 1018 1019 error = zfsvfs_init(zfsvfs, os); 1020 if (error != 0) { 1021 dmu_objset_disown(os, B_TRUE, zfsvfs); 1022 *zfvp = NULL; 1023 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1024 return (error); 1025 } 1026 1027 *zfvp = zfsvfs; 1028 return (0); 1029 } 1030 1031 static int 1032 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 1033 { 1034 int error; 1035 1036 /* 1037 * Check for a bad on-disk format version now since we 1038 * lied about owning the dataset readonly before. 1039 */ 1040 if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) && 1041 dmu_objset_incompatible_encryption_version(zfsvfs->z_os)) 1042 return (SET_ERROR(EROFS)); 1043 1044 error = zfs_register_callbacks(zfsvfs->z_vfs); 1045 if (error) 1046 return (error); 1047 1048 /* 1049 * If we are not mounting (ie: online recv), then we don't 1050 * have to worry about replaying the log as we blocked all 1051 * operations out since we closed the ZIL. 1052 */ 1053 if (mounting) { 1054 boolean_t readonly; 1055 1056 ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); 1057 error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); 1058 if (error) 1059 return (error); 1060 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, 1061 &zfsvfs->z_kstat.dk_zil_sums); 1062 1063 /* 1064 * During replay we remove the read only flag to 1065 * allow replays to succeed. 1066 */ 1067 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 1068 if (readonly != 0) { 1069 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 1070 } else { 1071 dsl_dir_t *dd; 1072 zap_stats_t zs; 1073 1074 if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj, 1075 &zs) == 0) { 1076 dataset_kstats_update_nunlinks_kstat( 1077 &zfsvfs->z_kstat, zs.zs_num_entries); 1078 dprintf_ds(zfsvfs->z_os->os_dsl_dataset, 1079 "num_entries in unlinked set: %llu", 1080 (u_longlong_t)zs.zs_num_entries); 1081 } 1082 1083 zfs_unlinked_drain(zfsvfs); 1084 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1085 dd->dd_activity_cancelled = B_FALSE; 1086 } 1087 1088 /* 1089 * Parse and replay the intent log. 1090 * 1091 * Because of ziltest, this must be done after 1092 * zfs_unlinked_drain(). (Further note: ziltest 1093 * doesn't use readonly mounts, where 1094 * zfs_unlinked_drain() isn't called.) This is because 1095 * ziltest causes spa_sync() to think it's committed, 1096 * but actually it is not, so the intent log contains 1097 * many txg's worth of changes. 1098 * 1099 * In particular, if object N is in the unlinked set in 1100 * the last txg to actually sync, then it could be 1101 * actually freed in a later txg and then reallocated 1102 * in a yet later txg. This would write a "create 1103 * object N" record to the intent log. Normally, this 1104 * would be fine because the spa_sync() would have 1105 * written out the fact that object N is free, before 1106 * we could write the "create object N" intent log 1107 * record. 1108 * 1109 * But when we are in ziltest mode, we advance the "open 1110 * txg" without actually spa_sync()-ing the changes to 1111 * disk. So we would see that object N is still 1112 * allocated and in the unlinked set, and there is an 1113 * intent log record saying to allocate it. 1114 */ 1115 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) { 1116 if (zil_replay_disable) { 1117 zil_destroy(zfsvfs->z_log, B_FALSE); 1118 } else { 1119 boolean_t use_nc = zfsvfs->z_use_namecache; 1120 zfsvfs->z_use_namecache = B_FALSE; 1121 zfsvfs->z_replay = B_TRUE; 1122 zil_replay(zfsvfs->z_os, zfsvfs, 1123 zfs_replay_vector); 1124 zfsvfs->z_replay = B_FALSE; 1125 zfsvfs->z_use_namecache = use_nc; 1126 } 1127 } 1128 1129 /* restore readonly bit */ 1130 if (readonly != 0) 1131 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 1132 } else { 1133 ASSERT3P(zfsvfs->z_kstat.dk_kstats, !=, NULL); 1134 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, 1135 &zfsvfs->z_kstat.dk_zil_sums); 1136 } 1137 1138 /* 1139 * Set the objset user_ptr to track its zfsvfs. 1140 */ 1141 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1142 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1143 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1144 1145 return (0); 1146 } 1147 1148 void 1149 zfsvfs_free(zfsvfs_t *zfsvfs) 1150 { 1151 int i; 1152 1153 zfs_fuid_destroy(zfsvfs); 1154 1155 mutex_destroy(&zfsvfs->z_znodes_lock); 1156 mutex_destroy(&zfsvfs->z_lock); 1157 list_destroy(&zfsvfs->z_all_znodes); 1158 ZFS_TEARDOWN_DESTROY(zfsvfs); 1159 ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs); 1160 rw_destroy(&zfsvfs->z_fuid_lock); 1161 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1162 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1163 dataset_kstats_destroy(&zfsvfs->z_kstat); 1164 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1165 } 1166 1167 static void 1168 zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 1169 { 1170 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 1171 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 1172 } 1173 1174 static int 1175 zfs_domount(vfs_t *vfsp, char *osname) 1176 { 1177 uint64_t recordsize, fsid_guid; 1178 int error = 0; 1179 zfsvfs_t *zfsvfs; 1180 1181 ASSERT3P(vfsp, !=, NULL); 1182 ASSERT3P(osname, !=, NULL); 1183 1184 error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs); 1185 if (error) 1186 return (error); 1187 zfsvfs->z_vfs = vfsp; 1188 1189 if ((error = dsl_prop_get_integer(osname, 1190 "recordsize", &recordsize, NULL))) 1191 goto out; 1192 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 1193 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 1194 1195 vfsp->vfs_data = zfsvfs; 1196 vfsp->mnt_flag |= MNT_LOCAL; 1197 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1198 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 1199 vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; 1200 /* 1201 * This can cause a loss of coherence between ARC and page cache 1202 * on ZoF - unclear if the problem is in FreeBSD or ZoF 1203 */ 1204 vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */ 1205 vfsp->mnt_kern_flag |= MNTK_NOMSYNC; 1206 vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG; 1207 1208 #if defined(_KERNEL) && !defined(KMEM_DEBUG) 1209 vfsp->mnt_kern_flag |= MNTK_FPLOOKUP; 1210 #endif 1211 /* 1212 * The fsid is 64 bits, composed of an 8-bit fs type, which 1213 * separates our fsid from any other filesystem types, and a 1214 * 56-bit objset unique ID. The objset unique ID is unique to 1215 * all objsets open on this system, provided by unique_create(). 1216 * The 8-bit fs type must be put in the low bits of fsid[1] 1217 * because that's where other Solaris filesystems put it. 1218 */ 1219 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1220 ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0); 1221 vfsp->vfs_fsid.val[0] = fsid_guid; 1222 vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) | 1223 (vfsp->mnt_vfc->vfc_typenum & 0xFF); 1224 1225 /* 1226 * Set features for file system. 1227 */ 1228 zfs_set_fuid_feature(zfsvfs); 1229 1230 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1231 uint64_t pval; 1232 1233 atime_changed_cb(zfsvfs, B_FALSE); 1234 readonly_changed_cb(zfsvfs, B_TRUE); 1235 if ((error = dsl_prop_get_integer(osname, 1236 "xattr", &pval, NULL))) 1237 goto out; 1238 xattr_changed_cb(zfsvfs, pval); 1239 if ((error = dsl_prop_get_integer(osname, 1240 "acltype", &pval, NULL))) 1241 goto out; 1242 acl_type_changed_cb(zfsvfs, pval); 1243 zfsvfs->z_issnap = B_TRUE; 1244 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; 1245 1246 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1247 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1248 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1249 } else { 1250 if ((error = zfsvfs_setup(zfsvfs, B_TRUE))) 1251 goto out; 1252 } 1253 1254 vfs_mountedfrom(vfsp, osname); 1255 1256 if (!zfsvfs->z_issnap) 1257 zfsctl_create(zfsvfs); 1258 out: 1259 if (error) { 1260 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); 1261 zfsvfs_free(zfsvfs); 1262 } else { 1263 atomic_inc_32(&zfs_active_fs_count); 1264 } 1265 1266 return (error); 1267 } 1268 1269 static void 1270 zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1271 { 1272 objset_t *os = zfsvfs->z_os; 1273 1274 if (!dmu_objset_is_snapshot(os)) 1275 dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs); 1276 } 1277 1278 static int 1279 getpoolname(const char *osname, char *poolname) 1280 { 1281 char *p; 1282 1283 p = strchr(osname, '/'); 1284 if (p == NULL) { 1285 if (strlen(osname) >= MAXNAMELEN) 1286 return (ENAMETOOLONG); 1287 (void) strcpy(poolname, osname); 1288 } else { 1289 if (p - osname >= MAXNAMELEN) 1290 return (ENAMETOOLONG); 1291 (void) strlcpy(poolname, osname, p - osname + 1); 1292 } 1293 return (0); 1294 } 1295 1296 static void 1297 fetch_osname_options(char *name, bool *checkpointrewind) 1298 { 1299 1300 if (name[0] == '!') { 1301 *checkpointrewind = true; 1302 memmove(name, name + 1, strlen(name)); 1303 } else { 1304 *checkpointrewind = false; 1305 } 1306 } 1307 1308 static int 1309 zfs_mount(vfs_t *vfsp) 1310 { 1311 kthread_t *td = curthread; 1312 vnode_t *mvp = vfsp->mnt_vnodecovered; 1313 cred_t *cr = td->td_ucred; 1314 char *osname; 1315 int error = 0; 1316 int canwrite; 1317 bool checkpointrewind, isctlsnap = false; 1318 1319 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 1320 return (SET_ERROR(EINVAL)); 1321 1322 /* 1323 * If full-owner-access is enabled and delegated administration is 1324 * turned on, we must set nosuid. 1325 */ 1326 if (zfs_super_owner && 1327 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 1328 secpolicy_fs_mount_clearopts(cr, vfsp); 1329 } 1330 1331 fetch_osname_options(osname, &checkpointrewind); 1332 isctlsnap = (mvp != NULL && zfsctl_is_node(mvp) && 1333 strchr(osname, '@') != NULL); 1334 1335 /* 1336 * Check for mount privilege? 1337 * 1338 * If we don't have privilege then see if 1339 * we have local permission to allow it 1340 */ 1341 error = secpolicy_fs_mount(cr, mvp, vfsp); 1342 if (error && isctlsnap) { 1343 secpolicy_fs_mount_clearopts(cr, vfsp); 1344 } else if (error) { 1345 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) 1346 goto out; 1347 1348 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 1349 vattr_t vattr; 1350 1351 /* 1352 * Make sure user is the owner of the mount point 1353 * or has sufficient privileges. 1354 */ 1355 1356 vattr.va_mask = AT_UID; 1357 1358 vn_lock(mvp, LK_SHARED | LK_RETRY); 1359 if (VOP_GETATTR(mvp, &vattr, cr)) { 1360 VOP_UNLOCK1(mvp); 1361 goto out; 1362 } 1363 1364 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 1365 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 1366 VOP_UNLOCK1(mvp); 1367 goto out; 1368 } 1369 VOP_UNLOCK1(mvp); 1370 } 1371 1372 secpolicy_fs_mount_clearopts(cr, vfsp); 1373 } 1374 1375 /* 1376 * Refuse to mount a filesystem if we are in a local zone and the 1377 * dataset is not visible. 1378 */ 1379 if (!INGLOBALZONE(curproc) && 1380 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1381 boolean_t mount_snapshot = B_FALSE; 1382 1383 /* 1384 * Snapshots may be mounted in .zfs for unjailed datasets 1385 * if allowed by the jail param zfs.mount_snapshot. 1386 */ 1387 if (isctlsnap) { 1388 struct prison *pr; 1389 struct zfs_jailparam *zjp; 1390 1391 pr = curthread->td_ucred->cr_prison; 1392 mtx_lock(&pr->pr_mtx); 1393 zjp = osd_jail_get(pr, zfs_jailparam_slot); 1394 mtx_unlock(&pr->pr_mtx); 1395 if (zjp && zjp->mount_snapshot) 1396 mount_snapshot = B_TRUE; 1397 } 1398 if (!mount_snapshot) { 1399 error = SET_ERROR(EPERM); 1400 goto out; 1401 } 1402 } 1403 1404 vfsp->vfs_flag |= MNT_NFS4ACLS; 1405 1406 /* 1407 * When doing a remount, we simply refresh our temporary properties 1408 * according to those options set in the current VFS options. 1409 */ 1410 if (vfsp->vfs_flag & MS_REMOUNT) { 1411 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1412 1413 /* 1414 * Refresh mount options with z_teardown_lock blocking I/O while 1415 * the filesystem is in an inconsistent state. 1416 * The lock also serializes this code with filesystem 1417 * manipulations between entry to zfs_suspend_fs() and return 1418 * from zfs_resume_fs(). 1419 */ 1420 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1421 zfs_unregister_callbacks(zfsvfs); 1422 error = zfs_register_callbacks(vfsp); 1423 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1424 goto out; 1425 } 1426 1427 /* Initial root mount: try hard to import the requested root pool. */ 1428 if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 && 1429 (vfsp->vfs_flag & MNT_UPDATE) == 0) { 1430 char pname[MAXNAMELEN]; 1431 1432 error = getpoolname(osname, pname); 1433 if (error == 0) 1434 error = spa_import_rootpool(pname, checkpointrewind); 1435 if (error) 1436 goto out; 1437 } 1438 DROP_GIANT(); 1439 error = zfs_domount(vfsp, osname); 1440 PICKUP_GIANT(); 1441 1442 out: 1443 return (error); 1444 } 1445 1446 static int 1447 zfs_statfs(vfs_t *vfsp, struct statfs *statp) 1448 { 1449 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1450 uint64_t refdbytes, availbytes, usedobjs, availobjs; 1451 int error; 1452 1453 statp->f_version = STATFS_VERSION; 1454 1455 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1456 return (error); 1457 1458 dmu_objset_space(zfsvfs->z_os, 1459 &refdbytes, &availbytes, &usedobjs, &availobjs); 1460 1461 /* 1462 * The underlying storage pool actually uses multiple block sizes. 1463 * We report the fragsize as the smallest block size we support, 1464 * and we report our blocksize as the filesystem's maximum blocksize. 1465 */ 1466 statp->f_bsize = SPA_MINBLOCKSIZE; 1467 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 1468 1469 /* 1470 * The following report "total" blocks of various kinds in the 1471 * file system, but reported in terms of f_frsize - the 1472 * "fragment" size. 1473 */ 1474 1475 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1476 statp->f_bfree = availbytes / statp->f_bsize; 1477 statp->f_bavail = statp->f_bfree; /* no root reservation */ 1478 1479 /* 1480 * statvfs() should really be called statufs(), because it assumes 1481 * static metadata. ZFS doesn't preallocate files, so the best 1482 * we can do is report the max that could possibly fit in f_files, 1483 * and that minus the number actually used in f_ffree. 1484 * For f_ffree, report the smaller of the number of object available 1485 * and the number of blocks (each object will take at least a block). 1486 */ 1487 statp->f_ffree = MIN(availobjs, statp->f_bfree); 1488 statp->f_files = statp->f_ffree + usedobjs; 1489 1490 /* 1491 * We're a zfs filesystem. 1492 */ 1493 strlcpy(statp->f_fstypename, "zfs", 1494 sizeof (statp->f_fstypename)); 1495 1496 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 1497 sizeof (statp->f_mntfromname)); 1498 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 1499 sizeof (statp->f_mntonname)); 1500 1501 statp->f_namemax = MAXNAMELEN - 1; 1502 1503 zfs_exit(zfsvfs, FTAG); 1504 return (0); 1505 } 1506 1507 static int 1508 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 1509 { 1510 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1511 znode_t *rootzp; 1512 int error; 1513 1514 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1515 return (error); 1516 1517 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1518 if (error == 0) 1519 *vpp = ZTOV(rootzp); 1520 1521 zfs_exit(zfsvfs, FTAG); 1522 1523 if (error == 0) { 1524 error = vn_lock(*vpp, flags); 1525 if (error != 0) { 1526 VN_RELE(*vpp); 1527 *vpp = NULL; 1528 } 1529 } 1530 return (error); 1531 } 1532 1533 /* 1534 * Teardown the zfsvfs::z_os. 1535 * 1536 * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock' 1537 * and 'z_teardown_inactive_lock' held. 1538 */ 1539 static int 1540 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1541 { 1542 znode_t *zp; 1543 dsl_dir_t *dd; 1544 1545 /* 1546 * If someone has not already unmounted this file system, 1547 * drain the zrele_taskq to ensure all active references to the 1548 * zfsvfs_t have been handled only then can it be safely destroyed. 1549 */ 1550 if (zfsvfs->z_os) { 1551 /* 1552 * If we're unmounting we have to wait for the list to 1553 * drain completely. 1554 * 1555 * If we're not unmounting there's no guarantee the list 1556 * will drain completely, but zreles run from the taskq 1557 * may add the parents of dir-based xattrs to the taskq 1558 * so we want to wait for these. 1559 * 1560 * We can safely check z_all_znodes for being empty because the 1561 * VFS has already blocked operations which add to it. 1562 */ 1563 int round = 0; 1564 while (!list_is_empty(&zfsvfs->z_all_znodes)) { 1565 taskq_wait_outstanding(dsl_pool_zrele_taskq( 1566 dmu_objset_pool(zfsvfs->z_os)), 0); 1567 if (++round > 1 && !unmounting) 1568 break; 1569 } 1570 } 1571 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1572 1573 if (!unmounting) { 1574 /* 1575 * We purge the parent filesystem's vfsp as the parent 1576 * filesystem and all of its snapshots have their vnode's 1577 * v_vfsp set to the parent's filesystem's vfsp. Note, 1578 * 'z_parent' is self referential for non-snapshots. 1579 */ 1580 #ifdef FREEBSD_NAMECACHE 1581 #if __FreeBSD_version >= 1300117 1582 cache_purgevfs(zfsvfs->z_parent->z_vfs); 1583 #else 1584 cache_purgevfs(zfsvfs->z_parent->z_vfs, true); 1585 #endif 1586 #endif 1587 } 1588 1589 /* 1590 * Close the zil. NB: Can't close the zil while zfs_inactive 1591 * threads are blocked as zil_close can call zfs_inactive. 1592 */ 1593 if (zfsvfs->z_log) { 1594 zil_close(zfsvfs->z_log); 1595 zfsvfs->z_log = NULL; 1596 } 1597 1598 ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs); 1599 1600 /* 1601 * If we are not unmounting (ie: online recv) and someone already 1602 * unmounted this file system while we were doing the switcheroo, 1603 * or a reopen of z_os failed then just bail out now. 1604 */ 1605 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1606 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1607 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1608 return (SET_ERROR(EIO)); 1609 } 1610 1611 /* 1612 * At this point there are no vops active, and any new vops will 1613 * fail with EIO since we have z_teardown_lock for writer (only 1614 * relevant for forced unmount). 1615 * 1616 * Release all holds on dbufs. 1617 */ 1618 mutex_enter(&zfsvfs->z_znodes_lock); 1619 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1620 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1621 if (zp->z_sa_hdl != NULL) { 1622 zfs_znode_dmu_fini(zp); 1623 } 1624 } 1625 mutex_exit(&zfsvfs->z_znodes_lock); 1626 1627 /* 1628 * If we are unmounting, set the unmounted flag and let new vops 1629 * unblock. zfs_inactive will have the unmounted behavior, and all 1630 * other vops will fail with EIO. 1631 */ 1632 if (unmounting) { 1633 zfsvfs->z_unmounted = B_TRUE; 1634 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1635 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1636 } 1637 1638 /* 1639 * z_os will be NULL if there was an error in attempting to reopen 1640 * zfsvfs, so just return as the properties had already been 1641 * unregistered and cached data had been evicted before. 1642 */ 1643 if (zfsvfs->z_os == NULL) 1644 return (0); 1645 1646 /* 1647 * Unregister properties. 1648 */ 1649 zfs_unregister_callbacks(zfsvfs); 1650 1651 /* 1652 * Evict cached data 1653 */ 1654 if (!zfs_is_readonly(zfsvfs)) 1655 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1656 dmu_objset_evict_dbufs(zfsvfs->z_os); 1657 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1658 dsl_dir_cancel_waiters(dd); 1659 1660 return (0); 1661 } 1662 1663 static int 1664 zfs_umount(vfs_t *vfsp, int fflag) 1665 { 1666 kthread_t *td = curthread; 1667 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1668 objset_t *os; 1669 cred_t *cr = td->td_ucred; 1670 int ret; 1671 1672 ret = secpolicy_fs_unmount(cr, vfsp); 1673 if (ret) { 1674 if (dsl_deleg_access((char *)vfsp->vfs_resource, 1675 ZFS_DELEG_PERM_MOUNT, cr)) 1676 return (ret); 1677 } 1678 1679 /* 1680 * Unmount any snapshots mounted under .zfs before unmounting the 1681 * dataset itself. 1682 */ 1683 if (zfsvfs->z_ctldir != NULL) { 1684 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1685 return (ret); 1686 } 1687 1688 if (fflag & MS_FORCE) { 1689 /* 1690 * Mark file system as unmounted before calling 1691 * vflush(FORCECLOSE). This way we ensure no future vnops 1692 * will be called and risk operating on DOOMED vnodes. 1693 */ 1694 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1695 zfsvfs->z_unmounted = B_TRUE; 1696 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1697 } 1698 1699 /* 1700 * Flush all the files. 1701 */ 1702 ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 1703 if (ret != 0) 1704 return (ret); 1705 while (taskqueue_cancel(zfsvfs_taskq->tq_queue, 1706 &zfsvfs->z_unlinked_drain_task, NULL) != 0) 1707 taskqueue_drain(zfsvfs_taskq->tq_queue, 1708 &zfsvfs->z_unlinked_drain_task); 1709 1710 VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE)); 1711 os = zfsvfs->z_os; 1712 1713 /* 1714 * z_os will be NULL if there was an error in 1715 * attempting to reopen zfsvfs. 1716 */ 1717 if (os != NULL) { 1718 /* 1719 * Unset the objset user_ptr. 1720 */ 1721 mutex_enter(&os->os_user_ptr_lock); 1722 dmu_objset_set_user(os, NULL); 1723 mutex_exit(&os->os_user_ptr_lock); 1724 1725 /* 1726 * Finally release the objset 1727 */ 1728 dmu_objset_disown(os, B_TRUE, zfsvfs); 1729 } 1730 1731 /* 1732 * We can now safely destroy the '.zfs' directory node. 1733 */ 1734 if (zfsvfs->z_ctldir != NULL) 1735 zfsctl_destroy(zfsvfs); 1736 zfs_freevfs(vfsp); 1737 1738 return (0); 1739 } 1740 1741 static int 1742 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1743 { 1744 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1745 znode_t *zp; 1746 int err; 1747 1748 /* 1749 * zfs_zget() can't operate on virtual entries like .zfs/ or 1750 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 1751 * This will make NFS to switch to LOOKUP instead of using VGET. 1752 */ 1753 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR || 1754 (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir)) 1755 return (EOPNOTSUPP); 1756 1757 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1758 return (err); 1759 err = zfs_zget(zfsvfs, ino, &zp); 1760 if (err == 0 && zp->z_unlinked) { 1761 vrele(ZTOV(zp)); 1762 err = EINVAL; 1763 } 1764 if (err == 0) 1765 *vpp = ZTOV(zp); 1766 zfs_exit(zfsvfs, FTAG); 1767 if (err == 0) { 1768 err = vn_lock(*vpp, flags); 1769 if (err != 0) 1770 vrele(*vpp); 1771 } 1772 if (err != 0) 1773 *vpp = NULL; 1774 return (err); 1775 } 1776 1777 static int 1778 #if __FreeBSD_version >= 1300098 1779 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 1780 struct ucred **credanonp, int *numsecflavors, int *secflavors) 1781 #else 1782 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1783 struct ucred **credanonp, int *numsecflavors, int **secflavors) 1784 #endif 1785 { 1786 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1787 1788 /* 1789 * If this is regular file system vfsp is the same as 1790 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1791 * zfsvfs->z_parent->z_vfs represents parent file system 1792 * which we have to use here, because only this file system 1793 * has mnt_export configured. 1794 */ 1795 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1796 credanonp, numsecflavors, secflavors)); 1797 } 1798 1799 _Static_assert(sizeof (struct fid) >= SHORT_FID_LEN, 1800 "struct fid bigger than SHORT_FID_LEN"); 1801 _Static_assert(sizeof (struct fid) >= LONG_FID_LEN, 1802 "struct fid bigger than LONG_FID_LEN"); 1803 1804 static int 1805 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) 1806 { 1807 struct componentname cn; 1808 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1809 znode_t *zp; 1810 vnode_t *dvp; 1811 uint64_t object = 0; 1812 uint64_t fid_gen = 0; 1813 uint64_t setgen = 0; 1814 uint64_t gen_mask; 1815 uint64_t zp_gen; 1816 int i, err; 1817 1818 *vpp = NULL; 1819 1820 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1821 return (err); 1822 1823 /* 1824 * On FreeBSD we can get snapshot's mount point or its parent file 1825 * system mount point depending if snapshot is already mounted or not. 1826 */ 1827 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1828 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1829 uint64_t objsetid = 0; 1830 1831 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1832 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1833 1834 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1835 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1836 1837 zfs_exit(zfsvfs, FTAG); 1838 1839 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1840 if (err) 1841 return (SET_ERROR(EINVAL)); 1842 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1843 return (err); 1844 } 1845 1846 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1847 zfid_short_t *zfid = (zfid_short_t *)fidp; 1848 1849 for (i = 0; i < sizeof (zfid->zf_object); i++) 1850 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1851 1852 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1853 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1854 } else { 1855 zfs_exit(zfsvfs, FTAG); 1856 return (SET_ERROR(EINVAL)); 1857 } 1858 1859 if (fidp->fid_len == LONG_FID_LEN && setgen != 0) { 1860 zfs_exit(zfsvfs, FTAG); 1861 dprintf("snapdir fid: fid_gen (%llu) and setgen (%llu)\n", 1862 (u_longlong_t)fid_gen, (u_longlong_t)setgen); 1863 return (SET_ERROR(EINVAL)); 1864 } 1865 1866 /* 1867 * A zero fid_gen means we are in .zfs or the .zfs/snapshot 1868 * directory tree. If the object == zfsvfs->z_shares_dir, then 1869 * we are in the .zfs/shares directory tree. 1870 */ 1871 if ((fid_gen == 0 && 1872 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) || 1873 (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) { 1874 zfs_exit(zfsvfs, FTAG); 1875 VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp)); 1876 if (object == ZFSCTL_INO_SNAPDIR) { 1877 cn.cn_nameptr = "snapshot"; 1878 cn.cn_namelen = strlen(cn.cn_nameptr); 1879 cn.cn_nameiop = LOOKUP; 1880 cn.cn_flags = ISLASTCN | LOCKLEAF; 1881 cn.cn_lkflags = flags; 1882 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1883 vput(dvp); 1884 } else if (object == zfsvfs->z_shares_dir) { 1885 /* 1886 * XXX This branch must not be taken, 1887 * if it is, then the lookup below will 1888 * explode. 1889 */ 1890 cn.cn_nameptr = "shares"; 1891 cn.cn_namelen = strlen(cn.cn_nameptr); 1892 cn.cn_nameiop = LOOKUP; 1893 cn.cn_flags = ISLASTCN; 1894 cn.cn_lkflags = flags; 1895 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1896 vput(dvp); 1897 } else { 1898 *vpp = dvp; 1899 } 1900 return (err); 1901 } 1902 1903 gen_mask = -1ULL >> (64 - 8 * i); 1904 1905 dprintf("getting %llu [%llu mask %llx]\n", (u_longlong_t)object, 1906 (u_longlong_t)fid_gen, 1907 (u_longlong_t)gen_mask); 1908 if ((err = zfs_zget(zfsvfs, object, &zp))) { 1909 zfs_exit(zfsvfs, FTAG); 1910 return (err); 1911 } 1912 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, 1913 sizeof (uint64_t)); 1914 zp_gen = zp_gen & gen_mask; 1915 if (zp_gen == 0) 1916 zp_gen = 1; 1917 if (zp->z_unlinked || zp_gen != fid_gen) { 1918 dprintf("znode gen (%llu) != fid gen (%llu)\n", 1919 (u_longlong_t)zp_gen, (u_longlong_t)fid_gen); 1920 vrele(ZTOV(zp)); 1921 zfs_exit(zfsvfs, FTAG); 1922 return (SET_ERROR(EINVAL)); 1923 } 1924 1925 *vpp = ZTOV(zp); 1926 zfs_exit(zfsvfs, FTAG); 1927 err = vn_lock(*vpp, flags); 1928 if (err == 0) 1929 vnode_create_vobject(*vpp, zp->z_size, curthread); 1930 else 1931 *vpp = NULL; 1932 return (err); 1933 } 1934 1935 /* 1936 * Block out VOPs and close zfsvfs_t::z_os 1937 * 1938 * Note, if successful, then we return with the 'z_teardown_lock' and 1939 * 'z_teardown_inactive_lock' write held. We leave ownership of the underlying 1940 * dataset and objset intact so that they can be atomically handed off during 1941 * a subsequent rollback or recv operation and the resume thereafter. 1942 */ 1943 int 1944 zfs_suspend_fs(zfsvfs_t *zfsvfs) 1945 { 1946 int error; 1947 1948 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1949 return (error); 1950 1951 return (0); 1952 } 1953 1954 /* 1955 * Rebuild SA and release VOPs. Note that ownership of the underlying dataset 1956 * is an invariant across any of the operations that can be performed while the 1957 * filesystem was suspended. Whether it succeeded or failed, the preconditions 1958 * are the same: the relevant objset and associated dataset are owned by 1959 * zfsvfs, held, and long held on entry. 1960 */ 1961 int 1962 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 1963 { 1964 int err; 1965 znode_t *zp; 1966 1967 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 1968 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 1969 1970 /* 1971 * We already own this, so just update the objset_t, as the one we 1972 * had before may have been evicted. 1973 */ 1974 objset_t *os; 1975 VERIFY3P(ds->ds_owner, ==, zfsvfs); 1976 VERIFY(dsl_dataset_long_held(ds)); 1977 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 1978 dsl_pool_config_enter(dp, FTAG); 1979 VERIFY0(dmu_objset_from_ds(ds, &os)); 1980 dsl_pool_config_exit(dp, FTAG); 1981 1982 err = zfsvfs_init(zfsvfs, os); 1983 if (err != 0) 1984 goto bail; 1985 1986 ds->ds_dir->dd_activity_cancelled = B_FALSE; 1987 VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE)); 1988 1989 zfs_set_fuid_feature(zfsvfs); 1990 1991 /* 1992 * Attempt to re-establish all the active znodes with 1993 * their dbufs. If a zfs_rezget() fails, then we'll let 1994 * any potential callers discover that via zfs_enter_verify_zp 1995 * when they try to use their znode. 1996 */ 1997 mutex_enter(&zfsvfs->z_znodes_lock); 1998 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1999 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 2000 (void) zfs_rezget(zp); 2001 } 2002 mutex_exit(&zfsvfs->z_znodes_lock); 2003 2004 bail: 2005 /* release the VOPs */ 2006 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 2007 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 2008 2009 if (err) { 2010 /* 2011 * Since we couldn't setup the sa framework, try to force 2012 * unmount this file system. 2013 */ 2014 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) { 2015 vfs_ref(zfsvfs->z_vfs); 2016 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 2017 } 2018 } 2019 return (err); 2020 } 2021 2022 static void 2023 zfs_freevfs(vfs_t *vfsp) 2024 { 2025 zfsvfs_t *zfsvfs = vfsp->vfs_data; 2026 2027 zfsvfs_free(zfsvfs); 2028 2029 atomic_dec_32(&zfs_active_fs_count); 2030 } 2031 2032 #ifdef __i386__ 2033 static int desiredvnodes_backup; 2034 #include <sys/vmmeter.h> 2035 2036 2037 #include <vm/vm_page.h> 2038 #include <vm/vm_object.h> 2039 #include <vm/vm_kern.h> 2040 #include <vm/vm_map.h> 2041 #endif 2042 2043 static void 2044 zfs_vnodes_adjust(void) 2045 { 2046 #ifdef __i386__ 2047 int newdesiredvnodes; 2048 2049 desiredvnodes_backup = desiredvnodes; 2050 2051 /* 2052 * We calculate newdesiredvnodes the same way it is done in 2053 * vntblinit(). If it is equal to desiredvnodes, it means that 2054 * it wasn't tuned by the administrator and we can tune it down. 2055 */ 2056 newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 * 2057 vm_kmem_size / (5 * (sizeof (struct vm_object) + 2058 sizeof (struct vnode)))); 2059 if (newdesiredvnodes == desiredvnodes) 2060 desiredvnodes = (3 * newdesiredvnodes) / 4; 2061 #endif 2062 } 2063 2064 static void 2065 zfs_vnodes_adjust_back(void) 2066 { 2067 2068 #ifdef __i386__ 2069 desiredvnodes = desiredvnodes_backup; 2070 #endif 2071 } 2072 2073 #if __FreeBSD_version >= 1300139 2074 static struct sx zfs_vnlru_lock; 2075 static struct vnode *zfs_vnlru_marker; 2076 #endif 2077 static arc_prune_t *zfs_prune; 2078 2079 static void 2080 zfs_prune_task(uint64_t nr_to_scan, void *arg __unused) 2081 { 2082 if (nr_to_scan > INT_MAX) 2083 nr_to_scan = INT_MAX; 2084 #if __FreeBSD_version >= 1300139 2085 sx_xlock(&zfs_vnlru_lock); 2086 vnlru_free_vfsops(nr_to_scan, &zfs_vfsops, zfs_vnlru_marker); 2087 sx_xunlock(&zfs_vnlru_lock); 2088 #else 2089 vnlru_free(nr_to_scan, &zfs_vfsops); 2090 #endif 2091 } 2092 2093 void 2094 zfs_init(void) 2095 { 2096 2097 printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n"); 2098 2099 /* 2100 * Initialize .zfs directory structures 2101 */ 2102 zfsctl_init(); 2103 2104 /* 2105 * Initialize znode cache, vnode ops, etc... 2106 */ 2107 zfs_znode_init(); 2108 2109 /* 2110 * Reduce number of vnodes. Originally number of vnodes is calculated 2111 * with UFS inode in mind. We reduce it here, because it's too big for 2112 * ZFS/i386. 2113 */ 2114 zfs_vnodes_adjust(); 2115 2116 dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info); 2117 2118 zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); 2119 2120 #if __FreeBSD_version >= 1300139 2121 zfs_vnlru_marker = vnlru_alloc_marker(); 2122 sx_init(&zfs_vnlru_lock, "zfs vnlru lock"); 2123 #endif 2124 zfs_prune = arc_add_prune_callback(zfs_prune_task, NULL); 2125 } 2126 2127 void 2128 zfs_fini(void) 2129 { 2130 arc_remove_prune_callback(zfs_prune); 2131 #if __FreeBSD_version >= 1300139 2132 vnlru_free_marker(zfs_vnlru_marker); 2133 sx_destroy(&zfs_vnlru_lock); 2134 #endif 2135 2136 taskq_destroy(zfsvfs_taskq); 2137 zfsctl_fini(); 2138 zfs_znode_fini(); 2139 zfs_vnodes_adjust_back(); 2140 } 2141 2142 int 2143 zfs_busy(void) 2144 { 2145 return (zfs_active_fs_count != 0); 2146 } 2147 2148 /* 2149 * Release VOPs and unmount a suspended filesystem. 2150 */ 2151 int 2152 zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 2153 { 2154 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 2155 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 2156 2157 /* 2158 * We already own this, so just hold and rele it to update the 2159 * objset_t, as the one we had before may have been evicted. 2160 */ 2161 objset_t *os; 2162 VERIFY3P(ds->ds_owner, ==, zfsvfs); 2163 VERIFY(dsl_dataset_long_held(ds)); 2164 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 2165 dsl_pool_config_enter(dp, FTAG); 2166 VERIFY0(dmu_objset_from_ds(ds, &os)); 2167 dsl_pool_config_exit(dp, FTAG); 2168 zfsvfs->z_os = os; 2169 2170 /* release the VOPs */ 2171 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 2172 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 2173 2174 /* 2175 * Try to force unmount this file system. 2176 */ 2177 (void) zfs_umount(zfsvfs->z_vfs, 0); 2178 zfsvfs->z_unmounted = B_TRUE; 2179 return (0); 2180 } 2181 2182 int 2183 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 2184 { 2185 int error; 2186 objset_t *os = zfsvfs->z_os; 2187 dmu_tx_t *tx; 2188 2189 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 2190 return (SET_ERROR(EINVAL)); 2191 2192 if (newvers < zfsvfs->z_version) 2193 return (SET_ERROR(EINVAL)); 2194 2195 if (zfs_spa_version_map(newvers) > 2196 spa_version(dmu_objset_spa(zfsvfs->z_os))) 2197 return (SET_ERROR(ENOTSUP)); 2198 2199 tx = dmu_tx_create(os); 2200 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 2201 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2202 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 2203 ZFS_SA_ATTRS); 2204 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2205 } 2206 error = dmu_tx_assign(tx, TXG_WAIT); 2207 if (error) { 2208 dmu_tx_abort(tx); 2209 return (error); 2210 } 2211 2212 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 2213 8, 1, &newvers, tx); 2214 2215 if (error) { 2216 dmu_tx_commit(tx); 2217 return (error); 2218 } 2219 2220 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2221 uint64_t sa_obj; 2222 2223 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, 2224 SPA_VERSION_SA); 2225 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 2226 DMU_OT_NONE, 0, tx); 2227 2228 error = zap_add(os, MASTER_NODE_OBJ, 2229 ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 2230 ASSERT0(error); 2231 2232 VERIFY0(sa_set_sa_object(os, sa_obj)); 2233 sa_register_update_callback(os, zfs_sa_upgrade); 2234 } 2235 2236 spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, 2237 "from %ju to %ju", (uintmax_t)zfsvfs->z_version, 2238 (uintmax_t)newvers); 2239 dmu_tx_commit(tx); 2240 2241 zfsvfs->z_version = newvers; 2242 os->os_version = newvers; 2243 2244 zfs_set_fuid_feature(zfsvfs); 2245 2246 return (0); 2247 } 2248 2249 /* 2250 * Return true if the corresponding vfs's unmounted flag is set. 2251 * Otherwise return false. 2252 * If this function returns true we know VFS unmount has been initiated. 2253 */ 2254 boolean_t 2255 zfs_get_vfs_flag_unmounted(objset_t *os) 2256 { 2257 zfsvfs_t *zfvp; 2258 boolean_t unmounted = B_FALSE; 2259 2260 ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS); 2261 2262 mutex_enter(&os->os_user_ptr_lock); 2263 zfvp = dmu_objset_get_user(os); 2264 if (zfvp != NULL && zfvp->z_vfs != NULL && 2265 (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT)) 2266 unmounted = B_TRUE; 2267 mutex_exit(&os->os_user_ptr_lock); 2268 2269 return (unmounted); 2270 } 2271 2272 #ifdef _KERNEL 2273 void 2274 zfsvfs_update_fromname(const char *oldname, const char *newname) 2275 { 2276 char tmpbuf[MAXPATHLEN]; 2277 struct mount *mp; 2278 char *fromname; 2279 size_t oldlen; 2280 2281 oldlen = strlen(oldname); 2282 2283 mtx_lock(&mountlist_mtx); 2284 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 2285 fromname = mp->mnt_stat.f_mntfromname; 2286 if (strcmp(fromname, oldname) == 0) { 2287 (void) strlcpy(fromname, newname, 2288 sizeof (mp->mnt_stat.f_mntfromname)); 2289 continue; 2290 } 2291 if (strncmp(fromname, oldname, oldlen) == 0 && 2292 (fromname[oldlen] == '/' || fromname[oldlen] == '@')) { 2293 (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s", 2294 newname, fromname + oldlen); 2295 (void) strlcpy(fromname, tmpbuf, 2296 sizeof (mp->mnt_stat.f_mntfromname)); 2297 continue; 2298 } 2299 } 2300 mtx_unlock(&mountlist_mtx); 2301 } 2302 #endif 2303 2304 /* 2305 * Find a prison with ZFS info. 2306 * Return the ZFS info and the (locked) prison. 2307 */ 2308 static struct zfs_jailparam * 2309 zfs_jailparam_find(struct prison *spr, struct prison **prp) 2310 { 2311 struct prison *pr; 2312 struct zfs_jailparam *zjp; 2313 2314 for (pr = spr; ; pr = pr->pr_parent) { 2315 mtx_lock(&pr->pr_mtx); 2316 if (pr == &prison0) { 2317 zjp = &zfs_jailparam0; 2318 break; 2319 } 2320 zjp = osd_jail_get(pr, zfs_jailparam_slot); 2321 if (zjp != NULL) 2322 break; 2323 mtx_unlock(&pr->pr_mtx); 2324 } 2325 *prp = pr; 2326 2327 return (zjp); 2328 } 2329 2330 /* 2331 * Ensure a prison has its own ZFS info. If zjpp is non-null, point it to the 2332 * ZFS info and lock the prison. 2333 */ 2334 static void 2335 zfs_jailparam_alloc(struct prison *pr, struct zfs_jailparam **zjpp) 2336 { 2337 struct prison *ppr; 2338 struct zfs_jailparam *zjp, *nzjp; 2339 void **rsv; 2340 2341 /* If this prison already has ZFS info, return that. */ 2342 zjp = zfs_jailparam_find(pr, &ppr); 2343 if (ppr == pr) 2344 goto done; 2345 2346 /* 2347 * Allocate a new info record. Then check again, in case something 2348 * changed during the allocation. 2349 */ 2350 mtx_unlock(&ppr->pr_mtx); 2351 nzjp = malloc(sizeof (struct zfs_jailparam), M_PRISON, M_WAITOK); 2352 rsv = osd_reserve(zfs_jailparam_slot); 2353 zjp = zfs_jailparam_find(pr, &ppr); 2354 if (ppr == pr) { 2355 free(nzjp, M_PRISON); 2356 osd_free_reserved(rsv); 2357 goto done; 2358 } 2359 /* Inherit the initial values from the ancestor. */ 2360 mtx_lock(&pr->pr_mtx); 2361 (void) osd_jail_set_reserved(pr, zfs_jailparam_slot, rsv, nzjp); 2362 (void) memcpy(nzjp, zjp, sizeof (*zjp)); 2363 zjp = nzjp; 2364 mtx_unlock(&ppr->pr_mtx); 2365 done: 2366 if (zjpp != NULL) 2367 *zjpp = zjp; 2368 else 2369 mtx_unlock(&pr->pr_mtx); 2370 } 2371 2372 /* 2373 * Jail OSD methods for ZFS VFS info. 2374 */ 2375 static int 2376 zfs_jailparam_create(void *obj, void *data) 2377 { 2378 struct prison *pr = obj; 2379 struct vfsoptlist *opts = data; 2380 int jsys; 2381 2382 if (vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)) == 0 && 2383 jsys == JAIL_SYS_INHERIT) 2384 return (0); 2385 /* 2386 * Inherit a prison's initial values from its parent 2387 * (different from JAIL_SYS_INHERIT which also inherits changes). 2388 */ 2389 zfs_jailparam_alloc(pr, NULL); 2390 return (0); 2391 } 2392 2393 static int 2394 zfs_jailparam_get(void *obj, void *data) 2395 { 2396 struct prison *ppr, *pr = obj; 2397 struct vfsoptlist *opts = data; 2398 struct zfs_jailparam *zjp; 2399 int jsys, error; 2400 2401 zjp = zfs_jailparam_find(pr, &ppr); 2402 jsys = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 2403 error = vfs_setopt(opts, "zfs", &jsys, sizeof (jsys)); 2404 if (error != 0 && error != ENOENT) 2405 goto done; 2406 if (jsys == JAIL_SYS_NEW) { 2407 error = vfs_setopt(opts, "zfs.mount_snapshot", 2408 &zjp->mount_snapshot, sizeof (zjp->mount_snapshot)); 2409 if (error != 0 && error != ENOENT) 2410 goto done; 2411 } else { 2412 /* 2413 * If this prison is inheriting its ZFS info, report 2414 * empty/zero parameters. 2415 */ 2416 static int mount_snapshot = 0; 2417 2418 error = vfs_setopt(opts, "zfs.mount_snapshot", 2419 &mount_snapshot, sizeof (mount_snapshot)); 2420 if (error != 0 && error != ENOENT) 2421 goto done; 2422 } 2423 error = 0; 2424 done: 2425 mtx_unlock(&ppr->pr_mtx); 2426 return (error); 2427 } 2428 2429 static int 2430 zfs_jailparam_set(void *obj, void *data) 2431 { 2432 struct prison *pr = obj; 2433 struct prison *ppr; 2434 struct vfsoptlist *opts = data; 2435 int error, jsys, mount_snapshot; 2436 2437 /* Set the parameters, which should be correct. */ 2438 error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)); 2439 if (error == ENOENT) 2440 jsys = -1; 2441 error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot, 2442 sizeof (mount_snapshot)); 2443 if (error == ENOENT) 2444 mount_snapshot = -1; 2445 else 2446 jsys = JAIL_SYS_NEW; 2447 switch (jsys) { 2448 case JAIL_SYS_NEW: 2449 { 2450 /* "zfs=new" or "zfs.*": the prison gets its own ZFS info. */ 2451 struct zfs_jailparam *zjp; 2452 2453 /* 2454 * A child jail cannot have more permissions than its parent 2455 */ 2456 if (pr->pr_parent != &prison0) { 2457 zjp = zfs_jailparam_find(pr->pr_parent, &ppr); 2458 mtx_unlock(&ppr->pr_mtx); 2459 if (zjp->mount_snapshot < mount_snapshot) { 2460 return (EPERM); 2461 } 2462 } 2463 zfs_jailparam_alloc(pr, &zjp); 2464 if (mount_snapshot != -1) 2465 zjp->mount_snapshot = mount_snapshot; 2466 mtx_unlock(&pr->pr_mtx); 2467 break; 2468 } 2469 case JAIL_SYS_INHERIT: 2470 /* "zfs=inherit": inherit the parent's ZFS info. */ 2471 mtx_lock(&pr->pr_mtx); 2472 osd_jail_del(pr, zfs_jailparam_slot); 2473 mtx_unlock(&pr->pr_mtx); 2474 break; 2475 case -1: 2476 /* 2477 * If the setting being changed is not ZFS related 2478 * then do nothing. 2479 */ 2480 break; 2481 } 2482 2483 return (0); 2484 } 2485 2486 static int 2487 zfs_jailparam_check(void *obj __unused, void *data) 2488 { 2489 struct vfsoptlist *opts = data; 2490 int error, jsys, mount_snapshot; 2491 2492 /* Check that the parameters are correct. */ 2493 error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)); 2494 if (error != ENOENT) { 2495 if (error != 0) 2496 return (error); 2497 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 2498 return (EINVAL); 2499 } 2500 error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot, 2501 sizeof (mount_snapshot)); 2502 if (error != ENOENT) { 2503 if (error != 0) 2504 return (error); 2505 if (mount_snapshot != 0 && mount_snapshot != 1) 2506 return (EINVAL); 2507 } 2508 return (0); 2509 } 2510 2511 static void 2512 zfs_jailparam_destroy(void *data) 2513 { 2514 2515 free(data, M_PRISON); 2516 } 2517 2518 static void 2519 zfs_jailparam_sysinit(void *arg __unused) 2520 { 2521 struct prison *pr; 2522 osd_method_t methods[PR_MAXMETHOD] = { 2523 [PR_METHOD_CREATE] = zfs_jailparam_create, 2524 [PR_METHOD_GET] = zfs_jailparam_get, 2525 [PR_METHOD_SET] = zfs_jailparam_set, 2526 [PR_METHOD_CHECK] = zfs_jailparam_check, 2527 }; 2528 2529 zfs_jailparam_slot = osd_jail_register(zfs_jailparam_destroy, methods); 2530 /* Copy the defaults to any existing prisons. */ 2531 sx_slock(&allprison_lock); 2532 TAILQ_FOREACH(pr, &allprison, pr_list) 2533 zfs_jailparam_alloc(pr, NULL); 2534 sx_sunlock(&allprison_lock); 2535 } 2536 2537 static void 2538 zfs_jailparam_sysuninit(void *arg __unused) 2539 { 2540 2541 osd_jail_deregister(zfs_jailparam_slot); 2542 } 2543 2544 SYSINIT(zfs_jailparam_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY, 2545 zfs_jailparam_sysinit, NULL); 2546 SYSUNINIT(zfs_jailparam_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY, 2547 zfs_jailparam_sysuninit, NULL); 2548