1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 24 * All rights reserved. 25 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 26 * Copyright (c) 2014 Integros [integros.com] 27 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 28 */ 29 30 /* Portions Copyright 2010 Robert Milkowski */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/sysmacros.h> 37 #include <sys/kmem.h> 38 #include <sys/acl.h> 39 #include <sys/vnode.h> 40 #include <sys/vfs.h> 41 #include <sys/mntent.h> 42 #include <sys/mount.h> 43 #include <sys/cmn_err.h> 44 #include <sys/zfs_znode.h> 45 #include <sys/zfs_vnops.h> 46 #include <sys/zfs_dir.h> 47 #include <sys/zil.h> 48 #include <sys/fs/zfs.h> 49 #include <sys/dmu.h> 50 #include <sys/dsl_prop.h> 51 #include <sys/dsl_dataset.h> 52 #include <sys/dsl_deleg.h> 53 #include <sys/spa.h> 54 #include <sys/zap.h> 55 #include <sys/sa.h> 56 #include <sys/sa_impl.h> 57 #include <sys/policy.h> 58 #include <sys/atomic.h> 59 #include <sys/zfs_ioctl.h> 60 #include <sys/zfs_ctldir.h> 61 #include <sys/zfs_fuid.h> 62 #include <sys/sunddi.h> 63 #include <sys/dmu_objset.h> 64 #include <sys/dsl_dir.h> 65 #include <sys/jail.h> 66 #include <sys/osd.h> 67 #include <ufs/ufs/quota.h> 68 #include <sys/zfs_quota.h> 69 70 #include "zfs_comutil.h" 71 72 #ifndef MNTK_VMSETSIZE_BUG 73 #define MNTK_VMSETSIZE_BUG 0 74 #endif 75 #ifndef MNTK_NOMSYNC 76 #define MNTK_NOMSYNC 8 77 #endif 78 79 struct mtx zfs_debug_mtx; 80 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 81 82 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 83 84 int zfs_super_owner; 85 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 86 "File system owners can perform privileged operation on file systems"); 87 88 int zfs_debug_level; 89 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0, 90 "Debug level"); 91 92 int zfs_bclone_enabled = 1; 93 SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN, 94 &zfs_bclone_enabled, 0, "Enable block cloning"); 95 96 struct zfs_jailparam { 97 int mount_snapshot; 98 }; 99 100 static struct zfs_jailparam zfs_jailparam0 = { 101 .mount_snapshot = 0, 102 }; 103 104 static int zfs_jailparam_slot; 105 106 SYSCTL_JAIL_PARAM_SYS_NODE(zfs, CTLFLAG_RW, "Jail ZFS parameters"); 107 SYSCTL_JAIL_PARAM(_zfs, mount_snapshot, CTLTYPE_INT | CTLFLAG_RW, "I", 108 "Allow mounting snapshots in the .zfs directory for unjailed datasets"); 109 110 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 111 static int zfs_version_acl = ZFS_ACL_VERSION; 112 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 113 "ZFS_ACL_VERSION"); 114 static int zfs_version_spa = SPA_VERSION; 115 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 116 "SPA_VERSION"); 117 static int zfs_version_zpl = ZPL_VERSION; 118 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 119 "ZPL_VERSION"); 120 121 #if __FreeBSD_version >= 1400018 122 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, 123 bool *mp_busy); 124 #else 125 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg); 126 #endif 127 static int zfs_mount(vfs_t *vfsp); 128 static int zfs_umount(vfs_t *vfsp, int fflag); 129 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 130 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 131 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 132 static int zfs_sync(vfs_t *vfsp, int waitfor); 133 #if __FreeBSD_version >= 1300098 134 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 135 struct ucred **credanonp, int *numsecflavors, int *secflavors); 136 #else 137 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 138 struct ucred **credanonp, int *numsecflavors, int **secflavors); 139 #endif 140 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp); 141 static void zfs_freevfs(vfs_t *vfsp); 142 143 struct vfsops zfs_vfsops = { 144 .vfs_mount = zfs_mount, 145 .vfs_unmount = zfs_umount, 146 #if __FreeBSD_version >= 1300049 147 .vfs_root = vfs_cache_root, 148 .vfs_cachedroot = zfs_root, 149 #else 150 .vfs_root = zfs_root, 151 #endif 152 .vfs_statfs = zfs_statfs, 153 .vfs_vget = zfs_vget, 154 .vfs_sync = zfs_sync, 155 .vfs_checkexp = zfs_checkexp, 156 .vfs_fhtovp = zfs_fhtovp, 157 .vfs_quotactl = zfs_quotactl, 158 }; 159 160 #ifdef VFCF_CROSS_COPY_FILE_RANGE 161 VFS_SET(zfs_vfsops, zfs, 162 VFCF_DELEGADMIN | VFCF_JAIL | VFCF_CROSS_COPY_FILE_RANGE); 163 #else 164 VFS_SET(zfs_vfsops, zfs, VFCF_DELEGADMIN | VFCF_JAIL); 165 #endif 166 167 /* 168 * We need to keep a count of active fs's. 169 * This is necessary to prevent our module 170 * from being unloaded after a umount -f 171 */ 172 static uint32_t zfs_active_fs_count = 0; 173 174 int 175 zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val, 176 char *setpoint) 177 { 178 int error; 179 zfsvfs_t *zfvp; 180 vfs_t *vfsp; 181 objset_t *os; 182 uint64_t tmp = *val; 183 184 error = dmu_objset_from_ds(ds, &os); 185 if (error != 0) 186 return (error); 187 188 error = getzfsvfs_impl(os, &zfvp); 189 if (error != 0) 190 return (error); 191 if (zfvp == NULL) 192 return (ENOENT); 193 vfsp = zfvp->z_vfs; 194 switch (zfs_prop) { 195 case ZFS_PROP_ATIME: 196 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 197 tmp = 0; 198 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 199 tmp = 1; 200 break; 201 case ZFS_PROP_DEVICES: 202 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 203 tmp = 0; 204 if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) 205 tmp = 1; 206 break; 207 case ZFS_PROP_EXEC: 208 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 209 tmp = 0; 210 if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 211 tmp = 1; 212 break; 213 case ZFS_PROP_SETUID: 214 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 215 tmp = 0; 216 if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 217 tmp = 1; 218 break; 219 case ZFS_PROP_READONLY: 220 if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 221 tmp = 0; 222 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 223 tmp = 1; 224 break; 225 case ZFS_PROP_XATTR: 226 if (zfvp->z_flags & ZSB_XATTR) 227 tmp = zfvp->z_xattr; 228 break; 229 case ZFS_PROP_NBMAND: 230 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 231 tmp = 0; 232 if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) 233 tmp = 1; 234 break; 235 default: 236 vfs_unbusy(vfsp); 237 return (ENOENT); 238 } 239 240 vfs_unbusy(vfsp); 241 if (tmp != *val) { 242 if (setpoint) 243 (void) strcpy(setpoint, "temporary"); 244 *val = tmp; 245 } 246 return (0); 247 } 248 249 static int 250 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp) 251 { 252 int error = 0; 253 char buf[32]; 254 uint64_t usedobj, quotaobj; 255 uint64_t quota, used = 0; 256 timespec_t now; 257 258 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 259 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 260 261 if (quotaobj == 0 || zfsvfs->z_replay) { 262 error = ENOENT; 263 goto done; 264 } 265 (void) sprintf(buf, "%llx", (longlong_t)id); 266 if ((error = zap_lookup(zfsvfs->z_os, quotaobj, 267 buf, sizeof (quota), 1, "a)) != 0) { 268 dprintf("%s(%d): quotaobj lookup failed\n", 269 __FUNCTION__, __LINE__); 270 goto done; 271 } 272 /* 273 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit". 274 * So we set them to be the same. 275 */ 276 dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota); 277 error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used); 278 if (error && error != ENOENT) { 279 dprintf("%s(%d): usedobj failed; %d\n", 280 __FUNCTION__, __LINE__, error); 281 goto done; 282 } 283 dqp->dqb_curblocks = btodb(used); 284 dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0; 285 vfs_timestamp(&now); 286 /* 287 * Setting this to 0 causes FreeBSD quota(8) to print 288 * the number of days since the epoch, which isn't 289 * particularly useful. 290 */ 291 dqp->dqb_btime = dqp->dqb_itime = now.tv_sec; 292 done: 293 return (error); 294 } 295 296 static int 297 #if __FreeBSD_version >= 1400018 298 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, bool *mp_busy) 299 #else 300 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg) 301 #endif 302 { 303 zfsvfs_t *zfsvfs = vfsp->vfs_data; 304 struct thread *td; 305 int cmd, type, error = 0; 306 int bitsize; 307 zfs_userquota_prop_t quota_type; 308 struct dqblk64 dqblk = { 0 }; 309 310 td = curthread; 311 cmd = cmds >> SUBCMDSHIFT; 312 type = cmds & SUBCMDMASK; 313 314 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 315 return (error); 316 if (id == -1) { 317 switch (type) { 318 case USRQUOTA: 319 id = td->td_ucred->cr_ruid; 320 break; 321 case GRPQUOTA: 322 id = td->td_ucred->cr_rgid; 323 break; 324 default: 325 error = EINVAL; 326 #if __FreeBSD_version < 1400018 327 if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF) 328 vfs_unbusy(vfsp); 329 #endif 330 goto done; 331 } 332 } 333 /* 334 * Map BSD type to: 335 * ZFS_PROP_USERUSED, 336 * ZFS_PROP_USERQUOTA, 337 * ZFS_PROP_GROUPUSED, 338 * ZFS_PROP_GROUPQUOTA 339 */ 340 switch (cmd) { 341 case Q_SETQUOTA: 342 case Q_SETQUOTA32: 343 if (type == USRQUOTA) 344 quota_type = ZFS_PROP_USERQUOTA; 345 else if (type == GRPQUOTA) 346 quota_type = ZFS_PROP_GROUPQUOTA; 347 else 348 error = EINVAL; 349 break; 350 case Q_GETQUOTA: 351 case Q_GETQUOTA32: 352 if (type == USRQUOTA) 353 quota_type = ZFS_PROP_USERUSED; 354 else if (type == GRPQUOTA) 355 quota_type = ZFS_PROP_GROUPUSED; 356 else 357 error = EINVAL; 358 break; 359 } 360 361 /* 362 * Depending on the cmd, we may need to get 363 * the ruid and domain (see fuidstr_to_sid?), 364 * the fuid (how?), or other information. 365 * Create fuid using zfs_fuid_create(zfsvfs, id, 366 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)? 367 * I think I can use just the id? 368 * 369 * Look at zfs_id_overquota() to look up a quota. 370 * zap_lookup(something, quotaobj, fuidstring, 371 * sizeof (long long), 1, "a) 372 * 373 * See zfs_set_userquota() to set a quota. 374 */ 375 if ((uint32_t)type >= MAXQUOTAS) { 376 error = EINVAL; 377 goto done; 378 } 379 380 switch (cmd) { 381 case Q_GETQUOTASIZE: 382 bitsize = 64; 383 error = copyout(&bitsize, arg, sizeof (int)); 384 break; 385 case Q_QUOTAON: 386 // As far as I can tell, you can't turn quotas on or off on zfs 387 error = 0; 388 #if __FreeBSD_version < 1400018 389 vfs_unbusy(vfsp); 390 #endif 391 break; 392 case Q_QUOTAOFF: 393 error = ENOTSUP; 394 #if __FreeBSD_version < 1400018 395 vfs_unbusy(vfsp); 396 #endif 397 break; 398 case Q_SETQUOTA: 399 error = copyin(arg, &dqblk, sizeof (dqblk)); 400 if (error == 0) 401 error = zfs_set_userquota(zfsvfs, quota_type, 402 "", id, dbtob(dqblk.dqb_bhardlimit)); 403 break; 404 case Q_GETQUOTA: 405 error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk); 406 if (error == 0) 407 error = copyout(&dqblk, arg, sizeof (dqblk)); 408 break; 409 default: 410 error = EINVAL; 411 break; 412 } 413 done: 414 zfs_exit(zfsvfs, FTAG); 415 return (error); 416 } 417 418 419 boolean_t 420 zfs_is_readonly(zfsvfs_t *zfsvfs) 421 { 422 return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)); 423 } 424 425 static int 426 zfs_sync(vfs_t *vfsp, int waitfor) 427 { 428 429 /* 430 * Data integrity is job one. We don't want a compromised kernel 431 * writing to the storage pool, so we never sync during panic. 432 */ 433 if (panicstr) 434 return (0); 435 436 /* 437 * Ignore the system syncher. ZFS already commits async data 438 * at zfs_txg_timeout intervals. 439 */ 440 if (waitfor == MNT_LAZY) 441 return (0); 442 443 if (vfsp != NULL) { 444 /* 445 * Sync a specific filesystem. 446 */ 447 zfsvfs_t *zfsvfs = vfsp->vfs_data; 448 dsl_pool_t *dp; 449 int error; 450 451 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 452 return (error); 453 dp = dmu_objset_pool(zfsvfs->z_os); 454 455 /* 456 * If the system is shutting down, then skip any 457 * filesystems which may exist on a suspended pool. 458 */ 459 if (rebooting && spa_suspended(dp->dp_spa)) { 460 zfs_exit(zfsvfs, FTAG); 461 return (0); 462 } 463 464 if (zfsvfs->z_log != NULL) 465 zil_commit(zfsvfs->z_log, 0); 466 467 zfs_exit(zfsvfs, FTAG); 468 } else { 469 /* 470 * Sync all ZFS filesystems. This is what happens when you 471 * run sync(8). Unlike other filesystems, ZFS honors the 472 * request by waiting for all pools to commit all dirty data. 473 */ 474 spa_sync_allpools(); 475 } 476 477 return (0); 478 } 479 480 static void 481 atime_changed_cb(void *arg, uint64_t newval) 482 { 483 zfsvfs_t *zfsvfs = arg; 484 485 if (newval == TRUE) { 486 zfsvfs->z_atime = TRUE; 487 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 488 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 489 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 490 } else { 491 zfsvfs->z_atime = FALSE; 492 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 493 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 494 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 495 } 496 } 497 498 static void 499 xattr_changed_cb(void *arg, uint64_t newval) 500 { 501 zfsvfs_t *zfsvfs = arg; 502 503 if (newval == ZFS_XATTR_OFF) { 504 zfsvfs->z_flags &= ~ZSB_XATTR; 505 } else { 506 zfsvfs->z_flags |= ZSB_XATTR; 507 508 if (newval == ZFS_XATTR_SA) 509 zfsvfs->z_xattr_sa = B_TRUE; 510 else 511 zfsvfs->z_xattr_sa = B_FALSE; 512 } 513 } 514 515 static void 516 blksz_changed_cb(void *arg, uint64_t newval) 517 { 518 zfsvfs_t *zfsvfs = arg; 519 ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os))); 520 ASSERT3U(newval, >=, SPA_MINBLOCKSIZE); 521 ASSERT(ISP2(newval)); 522 523 zfsvfs->z_max_blksz = newval; 524 zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 525 } 526 527 static void 528 readonly_changed_cb(void *arg, uint64_t newval) 529 { 530 zfsvfs_t *zfsvfs = arg; 531 532 if (newval) { 533 /* XXX locking on vfs_flag? */ 534 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 535 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 536 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 537 } else { 538 /* XXX locking on vfs_flag? */ 539 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 540 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 541 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 542 } 543 } 544 545 static void 546 setuid_changed_cb(void *arg, uint64_t newval) 547 { 548 zfsvfs_t *zfsvfs = arg; 549 550 if (newval == FALSE) { 551 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 552 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 553 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 554 } else { 555 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 556 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 557 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 558 } 559 } 560 561 static void 562 exec_changed_cb(void *arg, uint64_t newval) 563 { 564 zfsvfs_t *zfsvfs = arg; 565 566 if (newval == FALSE) { 567 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 568 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 569 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 570 } else { 571 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 572 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 573 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 574 } 575 } 576 577 /* 578 * The nbmand mount option can be changed at mount time. 579 * We can't allow it to be toggled on live file systems or incorrect 580 * behavior may be seen from cifs clients 581 * 582 * This property isn't registered via dsl_prop_register(), but this callback 583 * will be called when a file system is first mounted 584 */ 585 static void 586 nbmand_changed_cb(void *arg, uint64_t newval) 587 { 588 zfsvfs_t *zfsvfs = arg; 589 if (newval == FALSE) { 590 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 591 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 592 } else { 593 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 594 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 595 } 596 } 597 598 static void 599 snapdir_changed_cb(void *arg, uint64_t newval) 600 { 601 zfsvfs_t *zfsvfs = arg; 602 603 zfsvfs->z_show_ctldir = newval; 604 } 605 606 static void 607 acl_mode_changed_cb(void *arg, uint64_t newval) 608 { 609 zfsvfs_t *zfsvfs = arg; 610 611 zfsvfs->z_acl_mode = newval; 612 } 613 614 static void 615 acl_inherit_changed_cb(void *arg, uint64_t newval) 616 { 617 zfsvfs_t *zfsvfs = arg; 618 619 zfsvfs->z_acl_inherit = newval; 620 } 621 622 static void 623 acl_type_changed_cb(void *arg, uint64_t newval) 624 { 625 zfsvfs_t *zfsvfs = arg; 626 627 zfsvfs->z_acl_type = newval; 628 } 629 630 static int 631 zfs_register_callbacks(vfs_t *vfsp) 632 { 633 struct dsl_dataset *ds = NULL; 634 objset_t *os = NULL; 635 zfsvfs_t *zfsvfs = NULL; 636 uint64_t nbmand; 637 boolean_t readonly = B_FALSE; 638 boolean_t do_readonly = B_FALSE; 639 boolean_t setuid = B_FALSE; 640 boolean_t do_setuid = B_FALSE; 641 boolean_t exec = B_FALSE; 642 boolean_t do_exec = B_FALSE; 643 boolean_t xattr = B_FALSE; 644 boolean_t atime = B_FALSE; 645 boolean_t do_atime = B_FALSE; 646 boolean_t do_xattr = B_FALSE; 647 int error = 0; 648 649 ASSERT3P(vfsp, !=, NULL); 650 zfsvfs = vfsp->vfs_data; 651 ASSERT3P(zfsvfs, !=, NULL); 652 os = zfsvfs->z_os; 653 654 /* 655 * This function can be called for a snapshot when we update snapshot's 656 * mount point, which isn't really supported. 657 */ 658 if (dmu_objset_is_snapshot(os)) 659 return (EOPNOTSUPP); 660 661 /* 662 * The act of registering our callbacks will destroy any mount 663 * options we may have. In order to enable temporary overrides 664 * of mount options, we stash away the current values and 665 * restore them after we register the callbacks. 666 */ 667 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) || 668 !spa_writeable(dmu_objset_spa(os))) { 669 readonly = B_TRUE; 670 do_readonly = B_TRUE; 671 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 672 readonly = B_FALSE; 673 do_readonly = B_TRUE; 674 } 675 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 676 setuid = B_FALSE; 677 do_setuid = B_TRUE; 678 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 679 setuid = B_TRUE; 680 do_setuid = B_TRUE; 681 } 682 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 683 exec = B_FALSE; 684 do_exec = B_TRUE; 685 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 686 exec = B_TRUE; 687 do_exec = B_TRUE; 688 } 689 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 690 zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF; 691 do_xattr = B_TRUE; 692 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 693 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 694 do_xattr = B_TRUE; 695 } else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) { 696 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 697 do_xattr = B_TRUE; 698 } else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) { 699 zfsvfs->z_xattr = xattr = ZFS_XATTR_SA; 700 do_xattr = B_TRUE; 701 } 702 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 703 atime = B_FALSE; 704 do_atime = B_TRUE; 705 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 706 atime = B_TRUE; 707 do_atime = B_TRUE; 708 } 709 710 /* 711 * We need to enter pool configuration here, so that we can use 712 * dsl_prop_get_int_ds() to handle the special nbmand property below. 713 * dsl_prop_get_integer() can not be used, because it has to acquire 714 * spa_namespace_lock and we can not do that because we already hold 715 * z_teardown_lock. The problem is that spa_write_cachefile() is called 716 * with spa_namespace_lock held and the function calls ZFS vnode 717 * operations to write the cache file and thus z_teardown_lock is 718 * acquired after spa_namespace_lock. 719 */ 720 ds = dmu_objset_ds(os); 721 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 722 723 /* 724 * nbmand is a special property. It can only be changed at 725 * mount time. 726 * 727 * This is weird, but it is documented to only be changeable 728 * at mount time. 729 */ 730 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 731 nbmand = B_FALSE; 732 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 733 nbmand = B_TRUE; 734 } else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand)) != 0) { 735 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 736 return (error); 737 } 738 739 /* 740 * Register property callbacks. 741 * 742 * It would probably be fine to just check for i/o error from 743 * the first prop_register(), but I guess I like to go 744 * overboard... 745 */ 746 error = dsl_prop_register(ds, 747 zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs); 748 error = error ? error : dsl_prop_register(ds, 749 zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs); 750 error = error ? error : dsl_prop_register(ds, 751 zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs); 752 error = error ? error : dsl_prop_register(ds, 753 zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs); 754 error = error ? error : dsl_prop_register(ds, 755 zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs); 756 error = error ? error : dsl_prop_register(ds, 757 zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs); 758 error = error ? error : dsl_prop_register(ds, 759 zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs); 760 error = error ? error : dsl_prop_register(ds, 761 zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs); 762 error = error ? error : dsl_prop_register(ds, 763 zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs); 764 error = error ? error : dsl_prop_register(ds, 765 zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, 766 zfsvfs); 767 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 768 if (error) 769 goto unregister; 770 771 /* 772 * Invoke our callbacks to restore temporary mount options. 773 */ 774 if (do_readonly) 775 readonly_changed_cb(zfsvfs, readonly); 776 if (do_setuid) 777 setuid_changed_cb(zfsvfs, setuid); 778 if (do_exec) 779 exec_changed_cb(zfsvfs, exec); 780 if (do_xattr) 781 xattr_changed_cb(zfsvfs, xattr); 782 if (do_atime) 783 atime_changed_cb(zfsvfs, atime); 784 785 nbmand_changed_cb(zfsvfs, nbmand); 786 787 return (0); 788 789 unregister: 790 dsl_prop_unregister_all(ds, zfsvfs); 791 return (error); 792 } 793 794 /* 795 * Associate this zfsvfs with the given objset, which must be owned. 796 * This will cache a bunch of on-disk state from the objset in the 797 * zfsvfs. 798 */ 799 static int 800 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os) 801 { 802 int error; 803 uint64_t val; 804 805 zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE; 806 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 807 zfsvfs->z_os = os; 808 809 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 810 if (error != 0) 811 return (error); 812 if (zfsvfs->z_version > 813 zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { 814 (void) printf("Can't mount a version %lld file system " 815 "on a version %lld pool\n. Pool must be upgraded to mount " 816 "this file system.", (u_longlong_t)zfsvfs->z_version, 817 (u_longlong_t)spa_version(dmu_objset_spa(os))); 818 return (SET_ERROR(ENOTSUP)); 819 } 820 error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val); 821 if (error != 0) 822 return (error); 823 zfsvfs->z_norm = (int)val; 824 825 error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val); 826 if (error != 0) 827 return (error); 828 zfsvfs->z_utf8 = (val != 0); 829 830 error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val); 831 if (error != 0) 832 return (error); 833 zfsvfs->z_case = (uint_t)val; 834 835 error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val); 836 if (error != 0) 837 return (error); 838 zfsvfs->z_acl_type = (uint_t)val; 839 840 /* 841 * Fold case on file systems that are always or sometimes case 842 * insensitive. 843 */ 844 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 845 zfsvfs->z_case == ZFS_CASE_MIXED) 846 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 847 848 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 849 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 850 851 uint64_t sa_obj = 0; 852 if (zfsvfs->z_use_sa) { 853 /* should either have both of these objects or none */ 854 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, 855 &sa_obj); 856 if (error != 0) 857 return (error); 858 859 error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val); 860 if (error == 0 && val == ZFS_XATTR_SA) 861 zfsvfs->z_xattr_sa = B_TRUE; 862 } 863 864 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 865 &zfsvfs->z_attr_table); 866 if (error != 0) 867 return (error); 868 869 if (zfsvfs->z_version >= ZPL_VERSION_SA) 870 sa_register_update_callback(os, zfs_sa_upgrade); 871 872 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 873 &zfsvfs->z_root); 874 if (error != 0) 875 return (error); 876 ASSERT3U(zfsvfs->z_root, !=, 0); 877 878 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 879 &zfsvfs->z_unlinkedobj); 880 if (error != 0) 881 return (error); 882 883 error = zap_lookup(os, MASTER_NODE_OBJ, 884 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 885 8, 1, &zfsvfs->z_userquota_obj); 886 if (error == ENOENT) 887 zfsvfs->z_userquota_obj = 0; 888 else if (error != 0) 889 return (error); 890 891 error = zap_lookup(os, MASTER_NODE_OBJ, 892 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 893 8, 1, &zfsvfs->z_groupquota_obj); 894 if (error == ENOENT) 895 zfsvfs->z_groupquota_obj = 0; 896 else if (error != 0) 897 return (error); 898 899 error = zap_lookup(os, MASTER_NODE_OBJ, 900 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA], 901 8, 1, &zfsvfs->z_projectquota_obj); 902 if (error == ENOENT) 903 zfsvfs->z_projectquota_obj = 0; 904 else if (error != 0) 905 return (error); 906 907 error = zap_lookup(os, MASTER_NODE_OBJ, 908 zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA], 909 8, 1, &zfsvfs->z_userobjquota_obj); 910 if (error == ENOENT) 911 zfsvfs->z_userobjquota_obj = 0; 912 else if (error != 0) 913 return (error); 914 915 error = zap_lookup(os, MASTER_NODE_OBJ, 916 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA], 917 8, 1, &zfsvfs->z_groupobjquota_obj); 918 if (error == ENOENT) 919 zfsvfs->z_groupobjquota_obj = 0; 920 else if (error != 0) 921 return (error); 922 923 error = zap_lookup(os, MASTER_NODE_OBJ, 924 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA], 925 8, 1, &zfsvfs->z_projectobjquota_obj); 926 if (error == ENOENT) 927 zfsvfs->z_projectobjquota_obj = 0; 928 else if (error != 0) 929 return (error); 930 931 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 932 &zfsvfs->z_fuid_obj); 933 if (error == ENOENT) 934 zfsvfs->z_fuid_obj = 0; 935 else if (error != 0) 936 return (error); 937 938 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 939 &zfsvfs->z_shares_dir); 940 if (error == ENOENT) 941 zfsvfs->z_shares_dir = 0; 942 else if (error != 0) 943 return (error); 944 945 /* 946 * Only use the name cache if we are looking for a 947 * name on a file system that does not require normalization 948 * or case folding. We can also look there if we happen to be 949 * on a non-normalizing, mixed sensitivity file system IF we 950 * are looking for the exact name (which is always the case on 951 * FreeBSD). 952 */ 953 zfsvfs->z_use_namecache = !zfsvfs->z_norm || 954 ((zfsvfs->z_case == ZFS_CASE_MIXED) && 955 !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER)); 956 957 return (0); 958 } 959 960 taskq_t *zfsvfs_taskq; 961 962 static void 963 zfsvfs_task_unlinked_drain(void *context, int pending __unused) 964 { 965 966 zfs_unlinked_drain((zfsvfs_t *)context); 967 } 968 969 int 970 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp) 971 { 972 objset_t *os; 973 zfsvfs_t *zfsvfs; 974 int error; 975 boolean_t ro = (readonly || (strchr(osname, '@') != NULL)); 976 977 /* 978 * XXX: Fix struct statfs so this isn't necessary! 979 * 980 * The 'osname' is used as the filesystem's special node, which means 981 * it must fit in statfs.f_mntfromname, or else it can't be 982 * enumerated, so libzfs_mnttab_find() returns NULL, which causes 983 * 'zfs unmount' to think it's not mounted when it is. 984 */ 985 if (strlen(osname) >= MNAMELEN) 986 return (SET_ERROR(ENAMETOOLONG)); 987 988 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 989 990 error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs, 991 &os); 992 if (error != 0) { 993 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 994 return (error); 995 } 996 997 error = zfsvfs_create_impl(zfvp, zfsvfs, os); 998 999 return (error); 1000 } 1001 1002 1003 int 1004 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os) 1005 { 1006 int error; 1007 1008 zfsvfs->z_vfs = NULL; 1009 zfsvfs->z_parent = zfsvfs; 1010 1011 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 1012 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 1013 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 1014 offsetof(znode_t, z_link_node)); 1015 TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0, 1016 zfsvfs_task_unlinked_drain, zfsvfs); 1017 ZFS_TEARDOWN_INIT(zfsvfs); 1018 ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs); 1019 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 1020 for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1021 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 1022 1023 error = zfsvfs_init(zfsvfs, os); 1024 if (error != 0) { 1025 dmu_objset_disown(os, B_TRUE, zfsvfs); 1026 *zfvp = NULL; 1027 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1028 return (error); 1029 } 1030 1031 *zfvp = zfsvfs; 1032 return (0); 1033 } 1034 1035 static int 1036 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 1037 { 1038 int error; 1039 1040 /* 1041 * Check for a bad on-disk format version now since we 1042 * lied about owning the dataset readonly before. 1043 */ 1044 if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) && 1045 dmu_objset_incompatible_encryption_version(zfsvfs->z_os)) 1046 return (SET_ERROR(EROFS)); 1047 1048 error = zfs_register_callbacks(zfsvfs->z_vfs); 1049 if (error) 1050 return (error); 1051 1052 /* 1053 * If we are not mounting (ie: online recv), then we don't 1054 * have to worry about replaying the log as we blocked all 1055 * operations out since we closed the ZIL. 1056 */ 1057 if (mounting) { 1058 boolean_t readonly; 1059 1060 ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); 1061 error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); 1062 if (error) 1063 return (error); 1064 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, 1065 &zfsvfs->z_kstat.dk_zil_sums); 1066 1067 /* 1068 * During replay we remove the read only flag to 1069 * allow replays to succeed. 1070 */ 1071 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 1072 if (readonly != 0) { 1073 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 1074 } else { 1075 dsl_dir_t *dd; 1076 zap_stats_t zs; 1077 1078 if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj, 1079 &zs) == 0) { 1080 dataset_kstats_update_nunlinks_kstat( 1081 &zfsvfs->z_kstat, zs.zs_num_entries); 1082 dprintf_ds(zfsvfs->z_os->os_dsl_dataset, 1083 "num_entries in unlinked set: %llu", 1084 (u_longlong_t)zs.zs_num_entries); 1085 } 1086 1087 zfs_unlinked_drain(zfsvfs); 1088 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1089 dd->dd_activity_cancelled = B_FALSE; 1090 } 1091 1092 /* 1093 * Parse and replay the intent log. 1094 * 1095 * Because of ziltest, this must be done after 1096 * zfs_unlinked_drain(). (Further note: ziltest 1097 * doesn't use readonly mounts, where 1098 * zfs_unlinked_drain() isn't called.) This is because 1099 * ziltest causes spa_sync() to think it's committed, 1100 * but actually it is not, so the intent log contains 1101 * many txg's worth of changes. 1102 * 1103 * In particular, if object N is in the unlinked set in 1104 * the last txg to actually sync, then it could be 1105 * actually freed in a later txg and then reallocated 1106 * in a yet later txg. This would write a "create 1107 * object N" record to the intent log. Normally, this 1108 * would be fine because the spa_sync() would have 1109 * written out the fact that object N is free, before 1110 * we could write the "create object N" intent log 1111 * record. 1112 * 1113 * But when we are in ziltest mode, we advance the "open 1114 * txg" without actually spa_sync()-ing the changes to 1115 * disk. So we would see that object N is still 1116 * allocated and in the unlinked set, and there is an 1117 * intent log record saying to allocate it. 1118 */ 1119 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) { 1120 if (zil_replay_disable) { 1121 zil_destroy(zfsvfs->z_log, B_FALSE); 1122 } else { 1123 boolean_t use_nc = zfsvfs->z_use_namecache; 1124 zfsvfs->z_use_namecache = B_FALSE; 1125 zfsvfs->z_replay = B_TRUE; 1126 zil_replay(zfsvfs->z_os, zfsvfs, 1127 zfs_replay_vector); 1128 zfsvfs->z_replay = B_FALSE; 1129 zfsvfs->z_use_namecache = use_nc; 1130 } 1131 } 1132 1133 /* restore readonly bit */ 1134 if (readonly != 0) 1135 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 1136 } else { 1137 ASSERT3P(zfsvfs->z_kstat.dk_kstats, !=, NULL); 1138 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, 1139 &zfsvfs->z_kstat.dk_zil_sums); 1140 } 1141 1142 /* 1143 * Set the objset user_ptr to track its zfsvfs. 1144 */ 1145 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1146 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1147 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1148 1149 return (0); 1150 } 1151 1152 void 1153 zfsvfs_free(zfsvfs_t *zfsvfs) 1154 { 1155 int i; 1156 1157 zfs_fuid_destroy(zfsvfs); 1158 1159 mutex_destroy(&zfsvfs->z_znodes_lock); 1160 mutex_destroy(&zfsvfs->z_lock); 1161 list_destroy(&zfsvfs->z_all_znodes); 1162 ZFS_TEARDOWN_DESTROY(zfsvfs); 1163 ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs); 1164 rw_destroy(&zfsvfs->z_fuid_lock); 1165 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1166 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1167 dataset_kstats_destroy(&zfsvfs->z_kstat); 1168 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1169 } 1170 1171 static void 1172 zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 1173 { 1174 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 1175 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 1176 } 1177 1178 static int 1179 zfs_domount(vfs_t *vfsp, char *osname) 1180 { 1181 uint64_t recordsize, fsid_guid; 1182 int error = 0; 1183 zfsvfs_t *zfsvfs; 1184 1185 ASSERT3P(vfsp, !=, NULL); 1186 ASSERT3P(osname, !=, NULL); 1187 1188 error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs); 1189 if (error) 1190 return (error); 1191 zfsvfs->z_vfs = vfsp; 1192 1193 if ((error = dsl_prop_get_integer(osname, 1194 "recordsize", &recordsize, NULL))) 1195 goto out; 1196 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 1197 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 1198 1199 vfsp->vfs_data = zfsvfs; 1200 vfsp->mnt_flag |= MNT_LOCAL; 1201 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1202 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 1203 vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; 1204 /* 1205 * This can cause a loss of coherence between ARC and page cache 1206 * on ZoF - unclear if the problem is in FreeBSD or ZoF 1207 */ 1208 vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */ 1209 vfsp->mnt_kern_flag |= MNTK_NOMSYNC; 1210 vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG; 1211 1212 #if defined(_KERNEL) && !defined(KMEM_DEBUG) 1213 vfsp->mnt_kern_flag |= MNTK_FPLOOKUP; 1214 #endif 1215 /* 1216 * The fsid is 64 bits, composed of an 8-bit fs type, which 1217 * separates our fsid from any other filesystem types, and a 1218 * 56-bit objset unique ID. The objset unique ID is unique to 1219 * all objsets open on this system, provided by unique_create(). 1220 * The 8-bit fs type must be put in the low bits of fsid[1] 1221 * because that's where other Solaris filesystems put it. 1222 */ 1223 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1224 ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0); 1225 vfsp->vfs_fsid.val[0] = fsid_guid; 1226 vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) | 1227 (vfsp->mnt_vfc->vfc_typenum & 0xFF); 1228 1229 /* 1230 * Set features for file system. 1231 */ 1232 zfs_set_fuid_feature(zfsvfs); 1233 1234 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1235 uint64_t pval; 1236 1237 atime_changed_cb(zfsvfs, B_FALSE); 1238 readonly_changed_cb(zfsvfs, B_TRUE); 1239 if ((error = dsl_prop_get_integer(osname, 1240 "xattr", &pval, NULL))) 1241 goto out; 1242 xattr_changed_cb(zfsvfs, pval); 1243 if ((error = dsl_prop_get_integer(osname, 1244 "acltype", &pval, NULL))) 1245 goto out; 1246 acl_type_changed_cb(zfsvfs, pval); 1247 zfsvfs->z_issnap = B_TRUE; 1248 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; 1249 1250 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1251 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1252 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1253 } else { 1254 if ((error = zfsvfs_setup(zfsvfs, B_TRUE))) 1255 goto out; 1256 } 1257 1258 vfs_mountedfrom(vfsp, osname); 1259 1260 if (!zfsvfs->z_issnap) 1261 zfsctl_create(zfsvfs); 1262 out: 1263 if (error) { 1264 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); 1265 zfsvfs_free(zfsvfs); 1266 } else { 1267 atomic_inc_32(&zfs_active_fs_count); 1268 } 1269 1270 return (error); 1271 } 1272 1273 static void 1274 zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1275 { 1276 objset_t *os = zfsvfs->z_os; 1277 1278 if (!dmu_objset_is_snapshot(os)) 1279 dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs); 1280 } 1281 1282 static int 1283 getpoolname(const char *osname, char *poolname) 1284 { 1285 char *p; 1286 1287 p = strchr(osname, '/'); 1288 if (p == NULL) { 1289 if (strlen(osname) >= MAXNAMELEN) 1290 return (ENAMETOOLONG); 1291 (void) strcpy(poolname, osname); 1292 } else { 1293 if (p - osname >= MAXNAMELEN) 1294 return (ENAMETOOLONG); 1295 (void) strlcpy(poolname, osname, p - osname + 1); 1296 } 1297 return (0); 1298 } 1299 1300 static void 1301 fetch_osname_options(char *name, bool *checkpointrewind) 1302 { 1303 1304 if (name[0] == '!') { 1305 *checkpointrewind = true; 1306 memmove(name, name + 1, strlen(name)); 1307 } else { 1308 *checkpointrewind = false; 1309 } 1310 } 1311 1312 static int 1313 zfs_mount(vfs_t *vfsp) 1314 { 1315 kthread_t *td = curthread; 1316 vnode_t *mvp = vfsp->mnt_vnodecovered; 1317 cred_t *cr = td->td_ucred; 1318 char *osname; 1319 int error = 0; 1320 int canwrite; 1321 bool checkpointrewind, isctlsnap = false; 1322 1323 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 1324 return (SET_ERROR(EINVAL)); 1325 1326 /* 1327 * If full-owner-access is enabled and delegated administration is 1328 * turned on, we must set nosuid. 1329 */ 1330 if (zfs_super_owner && 1331 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 1332 secpolicy_fs_mount_clearopts(cr, vfsp); 1333 } 1334 1335 fetch_osname_options(osname, &checkpointrewind); 1336 isctlsnap = (mvp != NULL && zfsctl_is_node(mvp) && 1337 strchr(osname, '@') != NULL); 1338 1339 /* 1340 * Check for mount privilege? 1341 * 1342 * If we don't have privilege then see if 1343 * we have local permission to allow it 1344 */ 1345 error = secpolicy_fs_mount(cr, mvp, vfsp); 1346 if (error && isctlsnap) { 1347 secpolicy_fs_mount_clearopts(cr, vfsp); 1348 } else if (error) { 1349 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) 1350 goto out; 1351 1352 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 1353 vattr_t vattr; 1354 1355 /* 1356 * Make sure user is the owner of the mount point 1357 * or has sufficient privileges. 1358 */ 1359 1360 vattr.va_mask = AT_UID; 1361 1362 vn_lock(mvp, LK_SHARED | LK_RETRY); 1363 if (VOP_GETATTR(mvp, &vattr, cr)) { 1364 VOP_UNLOCK1(mvp); 1365 goto out; 1366 } 1367 1368 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 1369 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 1370 VOP_UNLOCK1(mvp); 1371 goto out; 1372 } 1373 VOP_UNLOCK1(mvp); 1374 } 1375 1376 secpolicy_fs_mount_clearopts(cr, vfsp); 1377 } 1378 1379 /* 1380 * Refuse to mount a filesystem if we are in a local zone and the 1381 * dataset is not visible. 1382 */ 1383 if (!INGLOBALZONE(curproc) && 1384 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1385 boolean_t mount_snapshot = B_FALSE; 1386 1387 /* 1388 * Snapshots may be mounted in .zfs for unjailed datasets 1389 * if allowed by the jail param zfs.mount_snapshot. 1390 */ 1391 if (isctlsnap) { 1392 struct prison *pr; 1393 struct zfs_jailparam *zjp; 1394 1395 pr = curthread->td_ucred->cr_prison; 1396 mtx_lock(&pr->pr_mtx); 1397 zjp = osd_jail_get(pr, zfs_jailparam_slot); 1398 mtx_unlock(&pr->pr_mtx); 1399 if (zjp && zjp->mount_snapshot) 1400 mount_snapshot = B_TRUE; 1401 } 1402 if (!mount_snapshot) { 1403 error = SET_ERROR(EPERM); 1404 goto out; 1405 } 1406 } 1407 1408 vfsp->vfs_flag |= MNT_NFS4ACLS; 1409 1410 /* 1411 * When doing a remount, we simply refresh our temporary properties 1412 * according to those options set in the current VFS options. 1413 */ 1414 if (vfsp->vfs_flag & MS_REMOUNT) { 1415 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1416 1417 /* 1418 * Refresh mount options with z_teardown_lock blocking I/O while 1419 * the filesystem is in an inconsistent state. 1420 * The lock also serializes this code with filesystem 1421 * manipulations between entry to zfs_suspend_fs() and return 1422 * from zfs_resume_fs(). 1423 */ 1424 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1425 zfs_unregister_callbacks(zfsvfs); 1426 error = zfs_register_callbacks(vfsp); 1427 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1428 goto out; 1429 } 1430 1431 /* Initial root mount: try hard to import the requested root pool. */ 1432 if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 && 1433 (vfsp->vfs_flag & MNT_UPDATE) == 0) { 1434 char pname[MAXNAMELEN]; 1435 1436 error = getpoolname(osname, pname); 1437 if (error == 0) 1438 error = spa_import_rootpool(pname, checkpointrewind); 1439 if (error) 1440 goto out; 1441 } 1442 DROP_GIANT(); 1443 error = zfs_domount(vfsp, osname); 1444 PICKUP_GIANT(); 1445 1446 out: 1447 return (error); 1448 } 1449 1450 static int 1451 zfs_statfs(vfs_t *vfsp, struct statfs *statp) 1452 { 1453 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1454 uint64_t refdbytes, availbytes, usedobjs, availobjs; 1455 int error; 1456 1457 statp->f_version = STATFS_VERSION; 1458 1459 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1460 return (error); 1461 1462 dmu_objset_space(zfsvfs->z_os, 1463 &refdbytes, &availbytes, &usedobjs, &availobjs); 1464 1465 /* 1466 * The underlying storage pool actually uses multiple block sizes. 1467 * We report the fragsize as the smallest block size we support, 1468 * and we report our blocksize as the filesystem's maximum blocksize. 1469 */ 1470 statp->f_bsize = SPA_MINBLOCKSIZE; 1471 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 1472 1473 /* 1474 * The following report "total" blocks of various kinds in the 1475 * file system, but reported in terms of f_frsize - the 1476 * "fragment" size. 1477 */ 1478 1479 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1480 statp->f_bfree = availbytes / statp->f_bsize; 1481 statp->f_bavail = statp->f_bfree; /* no root reservation */ 1482 1483 /* 1484 * statvfs() should really be called statufs(), because it assumes 1485 * static metadata. ZFS doesn't preallocate files, so the best 1486 * we can do is report the max that could possibly fit in f_files, 1487 * and that minus the number actually used in f_ffree. 1488 * For f_ffree, report the smaller of the number of object available 1489 * and the number of blocks (each object will take at least a block). 1490 */ 1491 statp->f_ffree = MIN(availobjs, statp->f_bfree); 1492 statp->f_files = statp->f_ffree + usedobjs; 1493 1494 /* 1495 * We're a zfs filesystem. 1496 */ 1497 strlcpy(statp->f_fstypename, "zfs", 1498 sizeof (statp->f_fstypename)); 1499 1500 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 1501 sizeof (statp->f_mntfromname)); 1502 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 1503 sizeof (statp->f_mntonname)); 1504 1505 statp->f_namemax = MAXNAMELEN - 1; 1506 1507 zfs_exit(zfsvfs, FTAG); 1508 return (0); 1509 } 1510 1511 static int 1512 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 1513 { 1514 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1515 znode_t *rootzp; 1516 int error; 1517 1518 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1519 return (error); 1520 1521 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1522 if (error == 0) 1523 *vpp = ZTOV(rootzp); 1524 1525 zfs_exit(zfsvfs, FTAG); 1526 1527 if (error == 0) { 1528 error = vn_lock(*vpp, flags); 1529 if (error != 0) { 1530 VN_RELE(*vpp); 1531 *vpp = NULL; 1532 } 1533 } 1534 return (error); 1535 } 1536 1537 /* 1538 * Teardown the zfsvfs::z_os. 1539 * 1540 * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock' 1541 * and 'z_teardown_inactive_lock' held. 1542 */ 1543 static int 1544 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1545 { 1546 znode_t *zp; 1547 dsl_dir_t *dd; 1548 1549 /* 1550 * If someone has not already unmounted this file system, 1551 * drain the zrele_taskq to ensure all active references to the 1552 * zfsvfs_t have been handled only then can it be safely destroyed. 1553 */ 1554 if (zfsvfs->z_os) { 1555 /* 1556 * If we're unmounting we have to wait for the list to 1557 * drain completely. 1558 * 1559 * If we're not unmounting there's no guarantee the list 1560 * will drain completely, but zreles run from the taskq 1561 * may add the parents of dir-based xattrs to the taskq 1562 * so we want to wait for these. 1563 * 1564 * We can safely check z_all_znodes for being empty because the 1565 * VFS has already blocked operations which add to it. 1566 */ 1567 int round = 0; 1568 while (!list_is_empty(&zfsvfs->z_all_znodes)) { 1569 taskq_wait_outstanding(dsl_pool_zrele_taskq( 1570 dmu_objset_pool(zfsvfs->z_os)), 0); 1571 if (++round > 1 && !unmounting) 1572 break; 1573 } 1574 } 1575 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1576 1577 if (!unmounting) { 1578 /* 1579 * We purge the parent filesystem's vfsp as the parent 1580 * filesystem and all of its snapshots have their vnode's 1581 * v_vfsp set to the parent's filesystem's vfsp. Note, 1582 * 'z_parent' is self referential for non-snapshots. 1583 */ 1584 #ifdef FREEBSD_NAMECACHE 1585 #if __FreeBSD_version >= 1300117 1586 cache_purgevfs(zfsvfs->z_parent->z_vfs); 1587 #else 1588 cache_purgevfs(zfsvfs->z_parent->z_vfs, true); 1589 #endif 1590 #endif 1591 } 1592 1593 /* 1594 * Close the zil. NB: Can't close the zil while zfs_inactive 1595 * threads are blocked as zil_close can call zfs_inactive. 1596 */ 1597 if (zfsvfs->z_log) { 1598 zil_close(zfsvfs->z_log); 1599 zfsvfs->z_log = NULL; 1600 } 1601 1602 ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs); 1603 1604 /* 1605 * If we are not unmounting (ie: online recv) and someone already 1606 * unmounted this file system while we were doing the switcheroo, 1607 * or a reopen of z_os failed then just bail out now. 1608 */ 1609 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1610 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1611 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1612 return (SET_ERROR(EIO)); 1613 } 1614 1615 /* 1616 * At this point there are no vops active, and any new vops will 1617 * fail with EIO since we have z_teardown_lock for writer (only 1618 * relevant for forced unmount). 1619 * 1620 * Release all holds on dbufs. 1621 */ 1622 mutex_enter(&zfsvfs->z_znodes_lock); 1623 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1624 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1625 if (zp->z_sa_hdl != NULL) { 1626 zfs_znode_dmu_fini(zp); 1627 } 1628 } 1629 mutex_exit(&zfsvfs->z_znodes_lock); 1630 1631 /* 1632 * If we are unmounting, set the unmounted flag and let new vops 1633 * unblock. zfs_inactive will have the unmounted behavior, and all 1634 * other vops will fail with EIO. 1635 */ 1636 if (unmounting) { 1637 zfsvfs->z_unmounted = B_TRUE; 1638 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1639 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1640 } 1641 1642 /* 1643 * z_os will be NULL if there was an error in attempting to reopen 1644 * zfsvfs, so just return as the properties had already been 1645 * unregistered and cached data had been evicted before. 1646 */ 1647 if (zfsvfs->z_os == NULL) 1648 return (0); 1649 1650 /* 1651 * Unregister properties. 1652 */ 1653 zfs_unregister_callbacks(zfsvfs); 1654 1655 /* 1656 * Evict cached data 1657 */ 1658 if (!zfs_is_readonly(zfsvfs)) 1659 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1660 dmu_objset_evict_dbufs(zfsvfs->z_os); 1661 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1662 dsl_dir_cancel_waiters(dd); 1663 1664 return (0); 1665 } 1666 1667 static int 1668 zfs_umount(vfs_t *vfsp, int fflag) 1669 { 1670 kthread_t *td = curthread; 1671 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1672 objset_t *os; 1673 cred_t *cr = td->td_ucred; 1674 int ret; 1675 1676 ret = secpolicy_fs_unmount(cr, vfsp); 1677 if (ret) { 1678 if (dsl_deleg_access((char *)vfsp->vfs_resource, 1679 ZFS_DELEG_PERM_MOUNT, cr)) 1680 return (ret); 1681 } 1682 1683 /* 1684 * Unmount any snapshots mounted under .zfs before unmounting the 1685 * dataset itself. 1686 */ 1687 if (zfsvfs->z_ctldir != NULL) { 1688 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1689 return (ret); 1690 } 1691 1692 if (fflag & MS_FORCE) { 1693 /* 1694 * Mark file system as unmounted before calling 1695 * vflush(FORCECLOSE). This way we ensure no future vnops 1696 * will be called and risk operating on DOOMED vnodes. 1697 */ 1698 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1699 zfsvfs->z_unmounted = B_TRUE; 1700 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1701 } 1702 1703 /* 1704 * Flush all the files. 1705 */ 1706 ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 1707 if (ret != 0) 1708 return (ret); 1709 while (taskqueue_cancel(zfsvfs_taskq->tq_queue, 1710 &zfsvfs->z_unlinked_drain_task, NULL) != 0) 1711 taskqueue_drain(zfsvfs_taskq->tq_queue, 1712 &zfsvfs->z_unlinked_drain_task); 1713 1714 VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE)); 1715 os = zfsvfs->z_os; 1716 1717 /* 1718 * z_os will be NULL if there was an error in 1719 * attempting to reopen zfsvfs. 1720 */ 1721 if (os != NULL) { 1722 /* 1723 * Unset the objset user_ptr. 1724 */ 1725 mutex_enter(&os->os_user_ptr_lock); 1726 dmu_objset_set_user(os, NULL); 1727 mutex_exit(&os->os_user_ptr_lock); 1728 1729 /* 1730 * Finally release the objset 1731 */ 1732 dmu_objset_disown(os, B_TRUE, zfsvfs); 1733 } 1734 1735 /* 1736 * We can now safely destroy the '.zfs' directory node. 1737 */ 1738 if (zfsvfs->z_ctldir != NULL) 1739 zfsctl_destroy(zfsvfs); 1740 zfs_freevfs(vfsp); 1741 1742 return (0); 1743 } 1744 1745 static int 1746 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1747 { 1748 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1749 znode_t *zp; 1750 int err; 1751 1752 /* 1753 * zfs_zget() can't operate on virtual entries like .zfs/ or 1754 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 1755 * This will make NFS to switch to LOOKUP instead of using VGET. 1756 */ 1757 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR || 1758 (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir)) 1759 return (EOPNOTSUPP); 1760 1761 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1762 return (err); 1763 err = zfs_zget(zfsvfs, ino, &zp); 1764 if (err == 0 && zp->z_unlinked) { 1765 vrele(ZTOV(zp)); 1766 err = EINVAL; 1767 } 1768 if (err == 0) 1769 *vpp = ZTOV(zp); 1770 zfs_exit(zfsvfs, FTAG); 1771 if (err == 0) { 1772 err = vn_lock(*vpp, flags); 1773 if (err != 0) 1774 vrele(*vpp); 1775 } 1776 if (err != 0) 1777 *vpp = NULL; 1778 return (err); 1779 } 1780 1781 static int 1782 #if __FreeBSD_version >= 1300098 1783 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 1784 struct ucred **credanonp, int *numsecflavors, int *secflavors) 1785 #else 1786 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1787 struct ucred **credanonp, int *numsecflavors, int **secflavors) 1788 #endif 1789 { 1790 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1791 1792 /* 1793 * If this is regular file system vfsp is the same as 1794 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1795 * zfsvfs->z_parent->z_vfs represents parent file system 1796 * which we have to use here, because only this file system 1797 * has mnt_export configured. 1798 */ 1799 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1800 credanonp, numsecflavors, secflavors)); 1801 } 1802 1803 _Static_assert(sizeof (struct fid) >= SHORT_FID_LEN, 1804 "struct fid bigger than SHORT_FID_LEN"); 1805 _Static_assert(sizeof (struct fid) >= LONG_FID_LEN, 1806 "struct fid bigger than LONG_FID_LEN"); 1807 1808 static int 1809 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) 1810 { 1811 struct componentname cn; 1812 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1813 znode_t *zp; 1814 vnode_t *dvp; 1815 uint64_t object = 0; 1816 uint64_t fid_gen = 0; 1817 uint64_t setgen = 0; 1818 uint64_t gen_mask; 1819 uint64_t zp_gen; 1820 int i, err; 1821 1822 *vpp = NULL; 1823 1824 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1825 return (err); 1826 1827 /* 1828 * On FreeBSD we can get snapshot's mount point or its parent file 1829 * system mount point depending if snapshot is already mounted or not. 1830 */ 1831 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1832 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1833 uint64_t objsetid = 0; 1834 1835 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1836 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1837 1838 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1839 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1840 1841 zfs_exit(zfsvfs, FTAG); 1842 1843 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1844 if (err) 1845 return (SET_ERROR(EINVAL)); 1846 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1847 return (err); 1848 } 1849 1850 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1851 zfid_short_t *zfid = (zfid_short_t *)fidp; 1852 1853 for (i = 0; i < sizeof (zfid->zf_object); i++) 1854 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1855 1856 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1857 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1858 } else { 1859 zfs_exit(zfsvfs, FTAG); 1860 return (SET_ERROR(EINVAL)); 1861 } 1862 1863 if (fidp->fid_len == LONG_FID_LEN && setgen != 0) { 1864 zfs_exit(zfsvfs, FTAG); 1865 dprintf("snapdir fid: fid_gen (%llu) and setgen (%llu)\n", 1866 (u_longlong_t)fid_gen, (u_longlong_t)setgen); 1867 return (SET_ERROR(EINVAL)); 1868 } 1869 1870 /* 1871 * A zero fid_gen means we are in .zfs or the .zfs/snapshot 1872 * directory tree. If the object == zfsvfs->z_shares_dir, then 1873 * we are in the .zfs/shares directory tree. 1874 */ 1875 if ((fid_gen == 0 && 1876 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) || 1877 (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) { 1878 zfs_exit(zfsvfs, FTAG); 1879 VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp)); 1880 if (object == ZFSCTL_INO_SNAPDIR) { 1881 cn.cn_nameptr = "snapshot"; 1882 cn.cn_namelen = strlen(cn.cn_nameptr); 1883 cn.cn_nameiop = LOOKUP; 1884 cn.cn_flags = ISLASTCN | LOCKLEAF; 1885 cn.cn_lkflags = flags; 1886 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1887 vput(dvp); 1888 } else if (object == zfsvfs->z_shares_dir) { 1889 /* 1890 * XXX This branch must not be taken, 1891 * if it is, then the lookup below will 1892 * explode. 1893 */ 1894 cn.cn_nameptr = "shares"; 1895 cn.cn_namelen = strlen(cn.cn_nameptr); 1896 cn.cn_nameiop = LOOKUP; 1897 cn.cn_flags = ISLASTCN; 1898 cn.cn_lkflags = flags; 1899 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1900 vput(dvp); 1901 } else { 1902 *vpp = dvp; 1903 } 1904 return (err); 1905 } 1906 1907 gen_mask = -1ULL >> (64 - 8 * i); 1908 1909 dprintf("getting %llu [%llu mask %llx]\n", (u_longlong_t)object, 1910 (u_longlong_t)fid_gen, 1911 (u_longlong_t)gen_mask); 1912 if ((err = zfs_zget(zfsvfs, object, &zp))) { 1913 zfs_exit(zfsvfs, FTAG); 1914 return (err); 1915 } 1916 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, 1917 sizeof (uint64_t)); 1918 zp_gen = zp_gen & gen_mask; 1919 if (zp_gen == 0) 1920 zp_gen = 1; 1921 if (zp->z_unlinked || zp_gen != fid_gen) { 1922 dprintf("znode gen (%llu) != fid gen (%llu)\n", 1923 (u_longlong_t)zp_gen, (u_longlong_t)fid_gen); 1924 vrele(ZTOV(zp)); 1925 zfs_exit(zfsvfs, FTAG); 1926 return (SET_ERROR(EINVAL)); 1927 } 1928 1929 *vpp = ZTOV(zp); 1930 zfs_exit(zfsvfs, FTAG); 1931 err = vn_lock(*vpp, flags); 1932 if (err == 0) 1933 vnode_create_vobject(*vpp, zp->z_size, curthread); 1934 else 1935 *vpp = NULL; 1936 return (err); 1937 } 1938 1939 /* 1940 * Block out VOPs and close zfsvfs_t::z_os 1941 * 1942 * Note, if successful, then we return with the 'z_teardown_lock' and 1943 * 'z_teardown_inactive_lock' write held. We leave ownership of the underlying 1944 * dataset and objset intact so that they can be atomically handed off during 1945 * a subsequent rollback or recv operation and the resume thereafter. 1946 */ 1947 int 1948 zfs_suspend_fs(zfsvfs_t *zfsvfs) 1949 { 1950 int error; 1951 1952 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1953 return (error); 1954 1955 return (0); 1956 } 1957 1958 /* 1959 * Rebuild SA and release VOPs. Note that ownership of the underlying dataset 1960 * is an invariant across any of the operations that can be performed while the 1961 * filesystem was suspended. Whether it succeeded or failed, the preconditions 1962 * are the same: the relevant objset and associated dataset are owned by 1963 * zfsvfs, held, and long held on entry. 1964 */ 1965 int 1966 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 1967 { 1968 int err; 1969 znode_t *zp; 1970 1971 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 1972 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 1973 1974 /* 1975 * We already own this, so just update the objset_t, as the one we 1976 * had before may have been evicted. 1977 */ 1978 objset_t *os; 1979 VERIFY3P(ds->ds_owner, ==, zfsvfs); 1980 VERIFY(dsl_dataset_long_held(ds)); 1981 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 1982 dsl_pool_config_enter(dp, FTAG); 1983 VERIFY0(dmu_objset_from_ds(ds, &os)); 1984 dsl_pool_config_exit(dp, FTAG); 1985 1986 err = zfsvfs_init(zfsvfs, os); 1987 if (err != 0) 1988 goto bail; 1989 1990 ds->ds_dir->dd_activity_cancelled = B_FALSE; 1991 VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE)); 1992 1993 zfs_set_fuid_feature(zfsvfs); 1994 1995 /* 1996 * Attempt to re-establish all the active znodes with 1997 * their dbufs. If a zfs_rezget() fails, then we'll let 1998 * any potential callers discover that via zfs_enter_verify_zp 1999 * when they try to use their znode. 2000 */ 2001 mutex_enter(&zfsvfs->z_znodes_lock); 2002 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 2003 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 2004 (void) zfs_rezget(zp); 2005 } 2006 mutex_exit(&zfsvfs->z_znodes_lock); 2007 2008 bail: 2009 /* release the VOPs */ 2010 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 2011 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 2012 2013 if (err) { 2014 /* 2015 * Since we couldn't setup the sa framework, try to force 2016 * unmount this file system. 2017 */ 2018 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) { 2019 vfs_ref(zfsvfs->z_vfs); 2020 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 2021 } 2022 } 2023 return (err); 2024 } 2025 2026 static void 2027 zfs_freevfs(vfs_t *vfsp) 2028 { 2029 zfsvfs_t *zfsvfs = vfsp->vfs_data; 2030 2031 zfsvfs_free(zfsvfs); 2032 2033 atomic_dec_32(&zfs_active_fs_count); 2034 } 2035 2036 #ifdef __i386__ 2037 static int desiredvnodes_backup; 2038 #include <sys/vmmeter.h> 2039 2040 2041 #include <vm/vm_page.h> 2042 #include <vm/vm_object.h> 2043 #include <vm/vm_kern.h> 2044 #include <vm/vm_map.h> 2045 #endif 2046 2047 static void 2048 zfs_vnodes_adjust(void) 2049 { 2050 #ifdef __i386__ 2051 int newdesiredvnodes; 2052 2053 desiredvnodes_backup = desiredvnodes; 2054 2055 /* 2056 * We calculate newdesiredvnodes the same way it is done in 2057 * vntblinit(). If it is equal to desiredvnodes, it means that 2058 * it wasn't tuned by the administrator and we can tune it down. 2059 */ 2060 newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 * 2061 vm_kmem_size / (5 * (sizeof (struct vm_object) + 2062 sizeof (struct vnode)))); 2063 if (newdesiredvnodes == desiredvnodes) 2064 desiredvnodes = (3 * newdesiredvnodes) / 4; 2065 #endif 2066 } 2067 2068 static void 2069 zfs_vnodes_adjust_back(void) 2070 { 2071 2072 #ifdef __i386__ 2073 desiredvnodes = desiredvnodes_backup; 2074 #endif 2075 } 2076 2077 #if __FreeBSD_version >= 1300139 2078 static struct sx zfs_vnlru_lock; 2079 static struct vnode *zfs_vnlru_marker; 2080 #endif 2081 static arc_prune_t *zfs_prune; 2082 2083 static void 2084 zfs_prune_task(uint64_t nr_to_scan, void *arg __unused) 2085 { 2086 if (nr_to_scan > INT_MAX) 2087 nr_to_scan = INT_MAX; 2088 #if __FreeBSD_version >= 1300139 2089 sx_xlock(&zfs_vnlru_lock); 2090 vnlru_free_vfsops(nr_to_scan, &zfs_vfsops, zfs_vnlru_marker); 2091 sx_xunlock(&zfs_vnlru_lock); 2092 #else 2093 vnlru_free(nr_to_scan, &zfs_vfsops); 2094 #endif 2095 } 2096 2097 void 2098 zfs_init(void) 2099 { 2100 2101 printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n"); 2102 2103 /* 2104 * Initialize .zfs directory structures 2105 */ 2106 zfsctl_init(); 2107 2108 /* 2109 * Initialize znode cache, vnode ops, etc... 2110 */ 2111 zfs_znode_init(); 2112 2113 /* 2114 * Reduce number of vnodes. Originally number of vnodes is calculated 2115 * with UFS inode in mind. We reduce it here, because it's too big for 2116 * ZFS/i386. 2117 */ 2118 zfs_vnodes_adjust(); 2119 2120 dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info); 2121 2122 zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); 2123 2124 #if __FreeBSD_version >= 1300139 2125 zfs_vnlru_marker = vnlru_alloc_marker(); 2126 sx_init(&zfs_vnlru_lock, "zfs vnlru lock"); 2127 #endif 2128 zfs_prune = arc_add_prune_callback(zfs_prune_task, NULL); 2129 } 2130 2131 void 2132 zfs_fini(void) 2133 { 2134 arc_remove_prune_callback(zfs_prune); 2135 #if __FreeBSD_version >= 1300139 2136 vnlru_free_marker(zfs_vnlru_marker); 2137 sx_destroy(&zfs_vnlru_lock); 2138 #endif 2139 2140 taskq_destroy(zfsvfs_taskq); 2141 zfsctl_fini(); 2142 zfs_znode_fini(); 2143 zfs_vnodes_adjust_back(); 2144 } 2145 2146 int 2147 zfs_busy(void) 2148 { 2149 return (zfs_active_fs_count != 0); 2150 } 2151 2152 /* 2153 * Release VOPs and unmount a suspended filesystem. 2154 */ 2155 int 2156 zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 2157 { 2158 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 2159 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 2160 2161 /* 2162 * We already own this, so just hold and rele it to update the 2163 * objset_t, as the one we had before may have been evicted. 2164 */ 2165 objset_t *os; 2166 VERIFY3P(ds->ds_owner, ==, zfsvfs); 2167 VERIFY(dsl_dataset_long_held(ds)); 2168 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 2169 dsl_pool_config_enter(dp, FTAG); 2170 VERIFY0(dmu_objset_from_ds(ds, &os)); 2171 dsl_pool_config_exit(dp, FTAG); 2172 zfsvfs->z_os = os; 2173 2174 /* release the VOPs */ 2175 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 2176 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 2177 2178 /* 2179 * Try to force unmount this file system. 2180 */ 2181 (void) zfs_umount(zfsvfs->z_vfs, 0); 2182 zfsvfs->z_unmounted = B_TRUE; 2183 return (0); 2184 } 2185 2186 int 2187 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 2188 { 2189 int error; 2190 objset_t *os = zfsvfs->z_os; 2191 dmu_tx_t *tx; 2192 2193 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 2194 return (SET_ERROR(EINVAL)); 2195 2196 if (newvers < zfsvfs->z_version) 2197 return (SET_ERROR(EINVAL)); 2198 2199 if (zfs_spa_version_map(newvers) > 2200 spa_version(dmu_objset_spa(zfsvfs->z_os))) 2201 return (SET_ERROR(ENOTSUP)); 2202 2203 tx = dmu_tx_create(os); 2204 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 2205 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2206 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 2207 ZFS_SA_ATTRS); 2208 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2209 } 2210 error = dmu_tx_assign(tx, TXG_WAIT); 2211 if (error) { 2212 dmu_tx_abort(tx); 2213 return (error); 2214 } 2215 2216 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 2217 8, 1, &newvers, tx); 2218 2219 if (error) { 2220 dmu_tx_commit(tx); 2221 return (error); 2222 } 2223 2224 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2225 uint64_t sa_obj; 2226 2227 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, 2228 SPA_VERSION_SA); 2229 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 2230 DMU_OT_NONE, 0, tx); 2231 2232 error = zap_add(os, MASTER_NODE_OBJ, 2233 ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 2234 ASSERT0(error); 2235 2236 VERIFY0(sa_set_sa_object(os, sa_obj)); 2237 sa_register_update_callback(os, zfs_sa_upgrade); 2238 } 2239 2240 spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, 2241 "from %ju to %ju", (uintmax_t)zfsvfs->z_version, 2242 (uintmax_t)newvers); 2243 dmu_tx_commit(tx); 2244 2245 zfsvfs->z_version = newvers; 2246 os->os_version = newvers; 2247 2248 zfs_set_fuid_feature(zfsvfs); 2249 2250 return (0); 2251 } 2252 2253 /* 2254 * Return true if the corresponding vfs's unmounted flag is set. 2255 * Otherwise return false. 2256 * If this function returns true we know VFS unmount has been initiated. 2257 */ 2258 boolean_t 2259 zfs_get_vfs_flag_unmounted(objset_t *os) 2260 { 2261 zfsvfs_t *zfvp; 2262 boolean_t unmounted = B_FALSE; 2263 2264 ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS); 2265 2266 mutex_enter(&os->os_user_ptr_lock); 2267 zfvp = dmu_objset_get_user(os); 2268 if (zfvp != NULL && zfvp->z_vfs != NULL && 2269 (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT)) 2270 unmounted = B_TRUE; 2271 mutex_exit(&os->os_user_ptr_lock); 2272 2273 return (unmounted); 2274 } 2275 2276 #ifdef _KERNEL 2277 void 2278 zfsvfs_update_fromname(const char *oldname, const char *newname) 2279 { 2280 char tmpbuf[MAXPATHLEN]; 2281 struct mount *mp; 2282 char *fromname; 2283 size_t oldlen; 2284 2285 oldlen = strlen(oldname); 2286 2287 mtx_lock(&mountlist_mtx); 2288 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 2289 fromname = mp->mnt_stat.f_mntfromname; 2290 if (strcmp(fromname, oldname) == 0) { 2291 (void) strlcpy(fromname, newname, 2292 sizeof (mp->mnt_stat.f_mntfromname)); 2293 continue; 2294 } 2295 if (strncmp(fromname, oldname, oldlen) == 0 && 2296 (fromname[oldlen] == '/' || fromname[oldlen] == '@')) { 2297 (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s", 2298 newname, fromname + oldlen); 2299 (void) strlcpy(fromname, tmpbuf, 2300 sizeof (mp->mnt_stat.f_mntfromname)); 2301 continue; 2302 } 2303 } 2304 mtx_unlock(&mountlist_mtx); 2305 } 2306 #endif 2307 2308 /* 2309 * Find a prison with ZFS info. 2310 * Return the ZFS info and the (locked) prison. 2311 */ 2312 static struct zfs_jailparam * 2313 zfs_jailparam_find(struct prison *spr, struct prison **prp) 2314 { 2315 struct prison *pr; 2316 struct zfs_jailparam *zjp; 2317 2318 for (pr = spr; ; pr = pr->pr_parent) { 2319 mtx_lock(&pr->pr_mtx); 2320 if (pr == &prison0) { 2321 zjp = &zfs_jailparam0; 2322 break; 2323 } 2324 zjp = osd_jail_get(pr, zfs_jailparam_slot); 2325 if (zjp != NULL) 2326 break; 2327 mtx_unlock(&pr->pr_mtx); 2328 } 2329 *prp = pr; 2330 2331 return (zjp); 2332 } 2333 2334 /* 2335 * Ensure a prison has its own ZFS info. If zjpp is non-null, point it to the 2336 * ZFS info and lock the prison. 2337 */ 2338 static void 2339 zfs_jailparam_alloc(struct prison *pr, struct zfs_jailparam **zjpp) 2340 { 2341 struct prison *ppr; 2342 struct zfs_jailparam *zjp, *nzjp; 2343 void **rsv; 2344 2345 /* If this prison already has ZFS info, return that. */ 2346 zjp = zfs_jailparam_find(pr, &ppr); 2347 if (ppr == pr) 2348 goto done; 2349 2350 /* 2351 * Allocate a new info record. Then check again, in case something 2352 * changed during the allocation. 2353 */ 2354 mtx_unlock(&ppr->pr_mtx); 2355 nzjp = malloc(sizeof (struct zfs_jailparam), M_PRISON, M_WAITOK); 2356 rsv = osd_reserve(zfs_jailparam_slot); 2357 zjp = zfs_jailparam_find(pr, &ppr); 2358 if (ppr == pr) { 2359 free(nzjp, M_PRISON); 2360 osd_free_reserved(rsv); 2361 goto done; 2362 } 2363 /* Inherit the initial values from the ancestor. */ 2364 mtx_lock(&pr->pr_mtx); 2365 (void) osd_jail_set_reserved(pr, zfs_jailparam_slot, rsv, nzjp); 2366 (void) memcpy(nzjp, zjp, sizeof (*zjp)); 2367 zjp = nzjp; 2368 mtx_unlock(&ppr->pr_mtx); 2369 done: 2370 if (zjpp != NULL) 2371 *zjpp = zjp; 2372 else 2373 mtx_unlock(&pr->pr_mtx); 2374 } 2375 2376 /* 2377 * Jail OSD methods for ZFS VFS info. 2378 */ 2379 static int 2380 zfs_jailparam_create(void *obj, void *data) 2381 { 2382 struct prison *pr = obj; 2383 struct vfsoptlist *opts = data; 2384 int jsys; 2385 2386 if (vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)) == 0 && 2387 jsys == JAIL_SYS_INHERIT) 2388 return (0); 2389 /* 2390 * Inherit a prison's initial values from its parent 2391 * (different from JAIL_SYS_INHERIT which also inherits changes). 2392 */ 2393 zfs_jailparam_alloc(pr, NULL); 2394 return (0); 2395 } 2396 2397 static int 2398 zfs_jailparam_get(void *obj, void *data) 2399 { 2400 struct prison *ppr, *pr = obj; 2401 struct vfsoptlist *opts = data; 2402 struct zfs_jailparam *zjp; 2403 int jsys, error; 2404 2405 zjp = zfs_jailparam_find(pr, &ppr); 2406 jsys = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 2407 error = vfs_setopt(opts, "zfs", &jsys, sizeof (jsys)); 2408 if (error != 0 && error != ENOENT) 2409 goto done; 2410 if (jsys == JAIL_SYS_NEW) { 2411 error = vfs_setopt(opts, "zfs.mount_snapshot", 2412 &zjp->mount_snapshot, sizeof (zjp->mount_snapshot)); 2413 if (error != 0 && error != ENOENT) 2414 goto done; 2415 } else { 2416 /* 2417 * If this prison is inheriting its ZFS info, report 2418 * empty/zero parameters. 2419 */ 2420 static int mount_snapshot = 0; 2421 2422 error = vfs_setopt(opts, "zfs.mount_snapshot", 2423 &mount_snapshot, sizeof (mount_snapshot)); 2424 if (error != 0 && error != ENOENT) 2425 goto done; 2426 } 2427 error = 0; 2428 done: 2429 mtx_unlock(&ppr->pr_mtx); 2430 return (error); 2431 } 2432 2433 static int 2434 zfs_jailparam_set(void *obj, void *data) 2435 { 2436 struct prison *pr = obj; 2437 struct prison *ppr; 2438 struct vfsoptlist *opts = data; 2439 int error, jsys, mount_snapshot; 2440 2441 /* Set the parameters, which should be correct. */ 2442 error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)); 2443 if (error == ENOENT) 2444 jsys = -1; 2445 error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot, 2446 sizeof (mount_snapshot)); 2447 if (error == ENOENT) 2448 mount_snapshot = -1; 2449 else 2450 jsys = JAIL_SYS_NEW; 2451 switch (jsys) { 2452 case JAIL_SYS_NEW: 2453 { 2454 /* "zfs=new" or "zfs.*": the prison gets its own ZFS info. */ 2455 struct zfs_jailparam *zjp; 2456 2457 /* 2458 * A child jail cannot have more permissions than its parent 2459 */ 2460 if (pr->pr_parent != &prison0) { 2461 zjp = zfs_jailparam_find(pr->pr_parent, &ppr); 2462 mtx_unlock(&ppr->pr_mtx); 2463 if (zjp->mount_snapshot < mount_snapshot) { 2464 return (EPERM); 2465 } 2466 } 2467 zfs_jailparam_alloc(pr, &zjp); 2468 if (mount_snapshot != -1) 2469 zjp->mount_snapshot = mount_snapshot; 2470 mtx_unlock(&pr->pr_mtx); 2471 break; 2472 } 2473 case JAIL_SYS_INHERIT: 2474 /* "zfs=inherit": inherit the parent's ZFS info. */ 2475 mtx_lock(&pr->pr_mtx); 2476 osd_jail_del(pr, zfs_jailparam_slot); 2477 mtx_unlock(&pr->pr_mtx); 2478 break; 2479 case -1: 2480 /* 2481 * If the setting being changed is not ZFS related 2482 * then do nothing. 2483 */ 2484 break; 2485 } 2486 2487 return (0); 2488 } 2489 2490 static int 2491 zfs_jailparam_check(void *obj __unused, void *data) 2492 { 2493 struct vfsoptlist *opts = data; 2494 int error, jsys, mount_snapshot; 2495 2496 /* Check that the parameters are correct. */ 2497 error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)); 2498 if (error != ENOENT) { 2499 if (error != 0) 2500 return (error); 2501 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 2502 return (EINVAL); 2503 } 2504 error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot, 2505 sizeof (mount_snapshot)); 2506 if (error != ENOENT) { 2507 if (error != 0) 2508 return (error); 2509 if (mount_snapshot != 0 && mount_snapshot != 1) 2510 return (EINVAL); 2511 } 2512 return (0); 2513 } 2514 2515 static void 2516 zfs_jailparam_destroy(void *data) 2517 { 2518 2519 free(data, M_PRISON); 2520 } 2521 2522 static void 2523 zfs_jailparam_sysinit(void *arg __unused) 2524 { 2525 struct prison *pr; 2526 osd_method_t methods[PR_MAXMETHOD] = { 2527 [PR_METHOD_CREATE] = zfs_jailparam_create, 2528 [PR_METHOD_GET] = zfs_jailparam_get, 2529 [PR_METHOD_SET] = zfs_jailparam_set, 2530 [PR_METHOD_CHECK] = zfs_jailparam_check, 2531 }; 2532 2533 zfs_jailparam_slot = osd_jail_register(zfs_jailparam_destroy, methods); 2534 /* Copy the defaults to any existing prisons. */ 2535 sx_slock(&allprison_lock); 2536 TAILQ_FOREACH(pr, &allprison, pr_list) 2537 zfs_jailparam_alloc(pr, NULL); 2538 sx_sunlock(&allprison_lock); 2539 } 2540 2541 static void 2542 zfs_jailparam_sysuninit(void *arg __unused) 2543 { 2544 2545 osd_jail_deregister(zfs_jailparam_slot); 2546 } 2547 2548 SYSINIT(zfs_jailparam_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY, 2549 zfs_jailparam_sysinit, NULL); 2550 SYSUNINIT(zfs_jailparam_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY, 2551 zfs_jailparam_sysuninit, NULL); 2552