1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 24 * All rights reserved. 25 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 26 * Copyright (c) 2014 Integros [integros.com] 27 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 28 */ 29 30 /* Portions Copyright 2010 Robert Milkowski */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/sysmacros.h> 37 #include <sys/kmem.h> 38 #include <sys/acl.h> 39 #include <sys/vnode.h> 40 #include <sys/vfs.h> 41 #include <sys/mntent.h> 42 #include <sys/mount.h> 43 #include <sys/cmn_err.h> 44 #include <sys/zfs_znode.h> 45 #include <sys/zfs_vnops.h> 46 #include <sys/zfs_dir.h> 47 #include <sys/zil.h> 48 #include <sys/fs/zfs.h> 49 #include <sys/dmu.h> 50 #include <sys/dsl_prop.h> 51 #include <sys/dsl_dataset.h> 52 #include <sys/dsl_deleg.h> 53 #include <sys/spa.h> 54 #include <sys/zap.h> 55 #include <sys/sa.h> 56 #include <sys/sa_impl.h> 57 #include <sys/policy.h> 58 #include <sys/atomic.h> 59 #include <sys/zfs_ioctl.h> 60 #include <sys/zfs_ctldir.h> 61 #include <sys/zfs_fuid.h> 62 #include <sys/sunddi.h> 63 #include <sys/dmu_objset.h> 64 #include <sys/dsl_dir.h> 65 #include <sys/jail.h> 66 #include <sys/osd.h> 67 #include <ufs/ufs/quota.h> 68 #include <sys/zfs_quota.h> 69 70 #include "zfs_comutil.h" 71 72 #ifndef MNTK_VMSETSIZE_BUG 73 #define MNTK_VMSETSIZE_BUG 0 74 #endif 75 #ifndef MNTK_NOMSYNC 76 #define MNTK_NOMSYNC 8 77 #endif 78 79 struct mtx zfs_debug_mtx; 80 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 81 82 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 83 84 int zfs_super_owner; 85 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 86 "File system owners can perform privileged operation on file systems"); 87 88 int zfs_debug_level; 89 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0, 90 "Debug level"); 91 92 int zfs_bclone_enabled; 93 SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN, 94 &zfs_bclone_enabled, 0, "Enable block cloning"); 95 96 struct zfs_jailparam { 97 int mount_snapshot; 98 }; 99 100 static struct zfs_jailparam zfs_jailparam0 = { 101 .mount_snapshot = 0, 102 }; 103 104 static int zfs_jailparam_slot; 105 106 SYSCTL_JAIL_PARAM_SYS_NODE(zfs, CTLFLAG_RW, "Jail ZFS parameters"); 107 SYSCTL_JAIL_PARAM(_zfs, mount_snapshot, CTLTYPE_INT | CTLFLAG_RW, "I", 108 "Allow mounting snapshots in the .zfs directory for unjailed datasets"); 109 110 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 111 static int zfs_version_acl = ZFS_ACL_VERSION; 112 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 113 "ZFS_ACL_VERSION"); 114 static int zfs_version_spa = SPA_VERSION; 115 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 116 "SPA_VERSION"); 117 static int zfs_version_zpl = ZPL_VERSION; 118 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 119 "ZPL_VERSION"); 120 121 #if __FreeBSD_version >= 1400018 122 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, 123 bool *mp_busy); 124 #else 125 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg); 126 #endif 127 static int zfs_mount(vfs_t *vfsp); 128 static int zfs_umount(vfs_t *vfsp, int fflag); 129 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 130 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 131 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 132 static int zfs_sync(vfs_t *vfsp, int waitfor); 133 #if __FreeBSD_version >= 1300098 134 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 135 struct ucred **credanonp, int *numsecflavors, int *secflavors); 136 #else 137 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 138 struct ucred **credanonp, int *numsecflavors, int **secflavors); 139 #endif 140 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp); 141 static void zfs_freevfs(vfs_t *vfsp); 142 143 struct vfsops zfs_vfsops = { 144 .vfs_mount = zfs_mount, 145 .vfs_unmount = zfs_umount, 146 #if __FreeBSD_version >= 1300049 147 .vfs_root = vfs_cache_root, 148 .vfs_cachedroot = zfs_root, 149 #else 150 .vfs_root = zfs_root, 151 #endif 152 .vfs_statfs = zfs_statfs, 153 .vfs_vget = zfs_vget, 154 .vfs_sync = zfs_sync, 155 .vfs_checkexp = zfs_checkexp, 156 .vfs_fhtovp = zfs_fhtovp, 157 .vfs_quotactl = zfs_quotactl, 158 }; 159 160 #ifdef VFCF_CROSS_COPY_FILE_RANGE 161 VFS_SET(zfs_vfsops, zfs, 162 VFCF_DELEGADMIN | VFCF_JAIL | VFCF_CROSS_COPY_FILE_RANGE); 163 #else 164 VFS_SET(zfs_vfsops, zfs, VFCF_DELEGADMIN | VFCF_JAIL); 165 #endif 166 167 /* 168 * We need to keep a count of active fs's. 169 * This is necessary to prevent our module 170 * from being unloaded after a umount -f 171 */ 172 static uint32_t zfs_active_fs_count = 0; 173 174 int 175 zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val, 176 char *setpoint) 177 { 178 int error; 179 zfsvfs_t *zfvp; 180 vfs_t *vfsp; 181 objset_t *os; 182 uint64_t tmp = *val; 183 184 error = dmu_objset_from_ds(ds, &os); 185 if (error != 0) 186 return (error); 187 188 error = getzfsvfs_impl(os, &zfvp); 189 if (error != 0) 190 return (error); 191 if (zfvp == NULL) 192 return (ENOENT); 193 vfsp = zfvp->z_vfs; 194 switch (zfs_prop) { 195 case ZFS_PROP_ATIME: 196 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 197 tmp = 0; 198 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 199 tmp = 1; 200 break; 201 case ZFS_PROP_DEVICES: 202 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 203 tmp = 0; 204 if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) 205 tmp = 1; 206 break; 207 case ZFS_PROP_EXEC: 208 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 209 tmp = 0; 210 if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 211 tmp = 1; 212 break; 213 case ZFS_PROP_SETUID: 214 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 215 tmp = 0; 216 if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 217 tmp = 1; 218 break; 219 case ZFS_PROP_READONLY: 220 if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 221 tmp = 0; 222 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 223 tmp = 1; 224 break; 225 case ZFS_PROP_XATTR: 226 if (zfvp->z_flags & ZSB_XATTR) 227 tmp = zfvp->z_xattr; 228 break; 229 case ZFS_PROP_NBMAND: 230 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 231 tmp = 0; 232 if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) 233 tmp = 1; 234 break; 235 default: 236 vfs_unbusy(vfsp); 237 return (ENOENT); 238 } 239 240 vfs_unbusy(vfsp); 241 if (tmp != *val) { 242 if (setpoint) 243 (void) strcpy(setpoint, "temporary"); 244 *val = tmp; 245 } 246 return (0); 247 } 248 249 static int 250 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp) 251 { 252 int error = 0; 253 char buf[32]; 254 uint64_t usedobj, quotaobj; 255 uint64_t quota, used = 0; 256 timespec_t now; 257 258 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 259 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 260 261 if (quotaobj == 0 || zfsvfs->z_replay) { 262 error = ENOENT; 263 goto done; 264 } 265 (void) sprintf(buf, "%llx", (longlong_t)id); 266 if ((error = zap_lookup(zfsvfs->z_os, quotaobj, 267 buf, sizeof (quota), 1, "a)) != 0) { 268 dprintf("%s(%d): quotaobj lookup failed\n", 269 __FUNCTION__, __LINE__); 270 goto done; 271 } 272 /* 273 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit". 274 * So we set them to be the same. 275 */ 276 dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota); 277 error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used); 278 if (error && error != ENOENT) { 279 dprintf("%s(%d): usedobj failed; %d\n", 280 __FUNCTION__, __LINE__, error); 281 goto done; 282 } 283 dqp->dqb_curblocks = btodb(used); 284 dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0; 285 vfs_timestamp(&now); 286 /* 287 * Setting this to 0 causes FreeBSD quota(8) to print 288 * the number of days since the epoch, which isn't 289 * particularly useful. 290 */ 291 dqp->dqb_btime = dqp->dqb_itime = now.tv_sec; 292 done: 293 return (error); 294 } 295 296 static int 297 #if __FreeBSD_version >= 1400018 298 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, bool *mp_busy) 299 #else 300 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg) 301 #endif 302 { 303 zfsvfs_t *zfsvfs = vfsp->vfs_data; 304 struct thread *td; 305 int cmd, type, error = 0; 306 int bitsize; 307 zfs_userquota_prop_t quota_type; 308 struct dqblk64 dqblk = { 0 }; 309 310 td = curthread; 311 cmd = cmds >> SUBCMDSHIFT; 312 type = cmds & SUBCMDMASK; 313 314 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 315 return (error); 316 if (id == -1) { 317 switch (type) { 318 case USRQUOTA: 319 id = td->td_ucred->cr_ruid; 320 break; 321 case GRPQUOTA: 322 id = td->td_ucred->cr_rgid; 323 break; 324 default: 325 error = EINVAL; 326 #if __FreeBSD_version < 1400018 327 if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF) 328 vfs_unbusy(vfsp); 329 #endif 330 goto done; 331 } 332 } 333 /* 334 * Map BSD type to: 335 * ZFS_PROP_USERUSED, 336 * ZFS_PROP_USERQUOTA, 337 * ZFS_PROP_GROUPUSED, 338 * ZFS_PROP_GROUPQUOTA 339 */ 340 switch (cmd) { 341 case Q_SETQUOTA: 342 case Q_SETQUOTA32: 343 if (type == USRQUOTA) 344 quota_type = ZFS_PROP_USERQUOTA; 345 else if (type == GRPQUOTA) 346 quota_type = ZFS_PROP_GROUPQUOTA; 347 else 348 error = EINVAL; 349 break; 350 case Q_GETQUOTA: 351 case Q_GETQUOTA32: 352 if (type == USRQUOTA) 353 quota_type = ZFS_PROP_USERUSED; 354 else if (type == GRPQUOTA) 355 quota_type = ZFS_PROP_GROUPUSED; 356 else 357 error = EINVAL; 358 break; 359 } 360 361 /* 362 * Depending on the cmd, we may need to get 363 * the ruid and domain (see fuidstr_to_sid?), 364 * the fuid (how?), or other information. 365 * Create fuid using zfs_fuid_create(zfsvfs, id, 366 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)? 367 * I think I can use just the id? 368 * 369 * Look at zfs_id_overquota() to look up a quota. 370 * zap_lookup(something, quotaobj, fuidstring, 371 * sizeof (long long), 1, "a) 372 * 373 * See zfs_set_userquota() to set a quota. 374 */ 375 if ((uint32_t)type >= MAXQUOTAS) { 376 error = EINVAL; 377 goto done; 378 } 379 380 switch (cmd) { 381 case Q_GETQUOTASIZE: 382 bitsize = 64; 383 error = copyout(&bitsize, arg, sizeof (int)); 384 break; 385 case Q_QUOTAON: 386 // As far as I can tell, you can't turn quotas on or off on zfs 387 error = 0; 388 #if __FreeBSD_version < 1400018 389 vfs_unbusy(vfsp); 390 #endif 391 break; 392 case Q_QUOTAOFF: 393 error = ENOTSUP; 394 #if __FreeBSD_version < 1400018 395 vfs_unbusy(vfsp); 396 #endif 397 break; 398 case Q_SETQUOTA: 399 error = copyin(arg, &dqblk, sizeof (dqblk)); 400 if (error == 0) 401 error = zfs_set_userquota(zfsvfs, quota_type, 402 "", id, dbtob(dqblk.dqb_bhardlimit)); 403 break; 404 case Q_GETQUOTA: 405 error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk); 406 if (error == 0) 407 error = copyout(&dqblk, arg, sizeof (dqblk)); 408 break; 409 default: 410 error = EINVAL; 411 break; 412 } 413 done: 414 zfs_exit(zfsvfs, FTAG); 415 return (error); 416 } 417 418 419 boolean_t 420 zfs_is_readonly(zfsvfs_t *zfsvfs) 421 { 422 return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)); 423 } 424 425 static int 426 zfs_sync(vfs_t *vfsp, int waitfor) 427 { 428 429 /* 430 * Data integrity is job one. We don't want a compromised kernel 431 * writing to the storage pool, so we never sync during panic. 432 */ 433 if (panicstr) 434 return (0); 435 436 /* 437 * Ignore the system syncher. ZFS already commits async data 438 * at zfs_txg_timeout intervals. 439 */ 440 if (waitfor == MNT_LAZY) 441 return (0); 442 443 if (vfsp != NULL) { 444 /* 445 * Sync a specific filesystem. 446 */ 447 zfsvfs_t *zfsvfs = vfsp->vfs_data; 448 dsl_pool_t *dp; 449 int error; 450 451 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 452 return (error); 453 dp = dmu_objset_pool(zfsvfs->z_os); 454 455 /* 456 * If the system is shutting down, then skip any 457 * filesystems which may exist on a suspended pool. 458 */ 459 if (rebooting && spa_suspended(dp->dp_spa)) { 460 zfs_exit(zfsvfs, FTAG); 461 return (0); 462 } 463 464 if (zfsvfs->z_log != NULL) 465 zil_commit(zfsvfs->z_log, 0); 466 467 zfs_exit(zfsvfs, FTAG); 468 } else { 469 /* 470 * Sync all ZFS filesystems. This is what happens when you 471 * run sync(8). Unlike other filesystems, ZFS honors the 472 * request by waiting for all pools to commit all dirty data. 473 */ 474 spa_sync_allpools(); 475 } 476 477 return (0); 478 } 479 480 static void 481 atime_changed_cb(void *arg, uint64_t newval) 482 { 483 zfsvfs_t *zfsvfs = arg; 484 485 if (newval == TRUE) { 486 zfsvfs->z_atime = TRUE; 487 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 488 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 489 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 490 } else { 491 zfsvfs->z_atime = FALSE; 492 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 493 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 494 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 495 } 496 } 497 498 static void 499 xattr_changed_cb(void *arg, uint64_t newval) 500 { 501 zfsvfs_t *zfsvfs = arg; 502 503 if (newval == ZFS_XATTR_OFF) { 504 zfsvfs->z_flags &= ~ZSB_XATTR; 505 } else { 506 zfsvfs->z_flags |= ZSB_XATTR; 507 508 if (newval == ZFS_XATTR_SA) 509 zfsvfs->z_xattr_sa = B_TRUE; 510 else 511 zfsvfs->z_xattr_sa = B_FALSE; 512 } 513 } 514 515 static void 516 blksz_changed_cb(void *arg, uint64_t newval) 517 { 518 zfsvfs_t *zfsvfs = arg; 519 ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os))); 520 ASSERT3U(newval, >=, SPA_MINBLOCKSIZE); 521 ASSERT(ISP2(newval)); 522 523 zfsvfs->z_max_blksz = newval; 524 zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 525 } 526 527 static void 528 readonly_changed_cb(void *arg, uint64_t newval) 529 { 530 zfsvfs_t *zfsvfs = arg; 531 532 if (newval) { 533 /* XXX locking on vfs_flag? */ 534 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 535 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 536 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 537 } else { 538 /* XXX locking on vfs_flag? */ 539 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 540 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 541 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 542 } 543 } 544 545 static void 546 setuid_changed_cb(void *arg, uint64_t newval) 547 { 548 zfsvfs_t *zfsvfs = arg; 549 550 if (newval == FALSE) { 551 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 552 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 553 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 554 } else { 555 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 556 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 557 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 558 } 559 } 560 561 static void 562 exec_changed_cb(void *arg, uint64_t newval) 563 { 564 zfsvfs_t *zfsvfs = arg; 565 566 if (newval == FALSE) { 567 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 568 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 569 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 570 } else { 571 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 572 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 573 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 574 } 575 } 576 577 /* 578 * The nbmand mount option can be changed at mount time. 579 * We can't allow it to be toggled on live file systems or incorrect 580 * behavior may be seen from cifs clients 581 * 582 * This property isn't registered via dsl_prop_register(), but this callback 583 * will be called when a file system is first mounted 584 */ 585 static void 586 nbmand_changed_cb(void *arg, uint64_t newval) 587 { 588 zfsvfs_t *zfsvfs = arg; 589 if (newval == FALSE) { 590 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 591 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 592 } else { 593 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 594 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 595 } 596 } 597 598 static void 599 snapdir_changed_cb(void *arg, uint64_t newval) 600 { 601 zfsvfs_t *zfsvfs = arg; 602 603 zfsvfs->z_show_ctldir = newval; 604 } 605 606 static void 607 acl_mode_changed_cb(void *arg, uint64_t newval) 608 { 609 zfsvfs_t *zfsvfs = arg; 610 611 zfsvfs->z_acl_mode = newval; 612 } 613 614 static void 615 acl_inherit_changed_cb(void *arg, uint64_t newval) 616 { 617 zfsvfs_t *zfsvfs = arg; 618 619 zfsvfs->z_acl_inherit = newval; 620 } 621 622 static void 623 acl_type_changed_cb(void *arg, uint64_t newval) 624 { 625 zfsvfs_t *zfsvfs = arg; 626 627 zfsvfs->z_acl_type = newval; 628 } 629 630 static int 631 zfs_register_callbacks(vfs_t *vfsp) 632 { 633 struct dsl_dataset *ds = NULL; 634 objset_t *os = NULL; 635 zfsvfs_t *zfsvfs = NULL; 636 uint64_t nbmand; 637 boolean_t readonly = B_FALSE; 638 boolean_t do_readonly = B_FALSE; 639 boolean_t setuid = B_FALSE; 640 boolean_t do_setuid = B_FALSE; 641 boolean_t exec = B_FALSE; 642 boolean_t do_exec = B_FALSE; 643 boolean_t xattr = B_FALSE; 644 boolean_t atime = B_FALSE; 645 boolean_t do_atime = B_FALSE; 646 boolean_t do_xattr = B_FALSE; 647 int error = 0; 648 649 ASSERT3P(vfsp, !=, NULL); 650 zfsvfs = vfsp->vfs_data; 651 ASSERT3P(zfsvfs, !=, NULL); 652 os = zfsvfs->z_os; 653 654 /* 655 * This function can be called for a snapshot when we update snapshot's 656 * mount point, which isn't really supported. 657 */ 658 if (dmu_objset_is_snapshot(os)) 659 return (EOPNOTSUPP); 660 661 /* 662 * The act of registering our callbacks will destroy any mount 663 * options we may have. In order to enable temporary overrides 664 * of mount options, we stash away the current values and 665 * restore them after we register the callbacks. 666 */ 667 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) || 668 !spa_writeable(dmu_objset_spa(os))) { 669 readonly = B_TRUE; 670 do_readonly = B_TRUE; 671 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 672 readonly = B_FALSE; 673 do_readonly = B_TRUE; 674 } 675 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 676 setuid = B_FALSE; 677 do_setuid = B_TRUE; 678 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 679 setuid = B_TRUE; 680 do_setuid = B_TRUE; 681 } 682 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 683 exec = B_FALSE; 684 do_exec = B_TRUE; 685 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 686 exec = B_TRUE; 687 do_exec = B_TRUE; 688 } 689 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 690 zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF; 691 do_xattr = B_TRUE; 692 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 693 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 694 do_xattr = B_TRUE; 695 } else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) { 696 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 697 do_xattr = B_TRUE; 698 } else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) { 699 zfsvfs->z_xattr = xattr = ZFS_XATTR_SA; 700 do_xattr = B_TRUE; 701 } 702 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 703 atime = B_FALSE; 704 do_atime = B_TRUE; 705 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 706 atime = B_TRUE; 707 do_atime = B_TRUE; 708 } 709 710 /* 711 * We need to enter pool configuration here, so that we can use 712 * dsl_prop_get_int_ds() to handle the special nbmand property below. 713 * dsl_prop_get_integer() can not be used, because it has to acquire 714 * spa_namespace_lock and we can not do that because we already hold 715 * z_teardown_lock. The problem is that spa_write_cachefile() is called 716 * with spa_namespace_lock held and the function calls ZFS vnode 717 * operations to write the cache file and thus z_teardown_lock is 718 * acquired after spa_namespace_lock. 719 */ 720 ds = dmu_objset_ds(os); 721 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 722 723 /* 724 * nbmand is a special property. It can only be changed at 725 * mount time. 726 * 727 * This is weird, but it is documented to only be changeable 728 * at mount time. 729 */ 730 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 731 nbmand = B_FALSE; 732 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 733 nbmand = B_TRUE; 734 } else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand)) != 0) { 735 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 736 return (error); 737 } 738 739 /* 740 * Register property callbacks. 741 * 742 * It would probably be fine to just check for i/o error from 743 * the first prop_register(), but I guess I like to go 744 * overboard... 745 */ 746 error = dsl_prop_register(ds, 747 zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs); 748 error = error ? error : dsl_prop_register(ds, 749 zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs); 750 error = error ? error : dsl_prop_register(ds, 751 zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs); 752 error = error ? error : dsl_prop_register(ds, 753 zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs); 754 error = error ? error : dsl_prop_register(ds, 755 zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs); 756 error = error ? error : dsl_prop_register(ds, 757 zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs); 758 error = error ? error : dsl_prop_register(ds, 759 zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs); 760 error = error ? error : dsl_prop_register(ds, 761 zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs); 762 error = error ? error : dsl_prop_register(ds, 763 zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs); 764 error = error ? error : dsl_prop_register(ds, 765 zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, 766 zfsvfs); 767 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 768 if (error) 769 goto unregister; 770 771 /* 772 * Invoke our callbacks to restore temporary mount options. 773 */ 774 if (do_readonly) 775 readonly_changed_cb(zfsvfs, readonly); 776 if (do_setuid) 777 setuid_changed_cb(zfsvfs, setuid); 778 if (do_exec) 779 exec_changed_cb(zfsvfs, exec); 780 if (do_xattr) 781 xattr_changed_cb(zfsvfs, xattr); 782 if (do_atime) 783 atime_changed_cb(zfsvfs, atime); 784 785 nbmand_changed_cb(zfsvfs, nbmand); 786 787 return (0); 788 789 unregister: 790 dsl_prop_unregister_all(ds, zfsvfs); 791 return (error); 792 } 793 794 /* 795 * Associate this zfsvfs with the given objset, which must be owned. 796 * This will cache a bunch of on-disk state from the objset in the 797 * zfsvfs. 798 */ 799 static int 800 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os) 801 { 802 int error; 803 uint64_t val; 804 805 zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE; 806 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 807 zfsvfs->z_os = os; 808 809 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 810 if (error != 0) 811 return (error); 812 if (zfsvfs->z_version > 813 zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { 814 (void) printf("Can't mount a version %lld file system " 815 "on a version %lld pool\n. Pool must be upgraded to mount " 816 "this file system.", (u_longlong_t)zfsvfs->z_version, 817 (u_longlong_t)spa_version(dmu_objset_spa(os))); 818 return (SET_ERROR(ENOTSUP)); 819 } 820 error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val); 821 if (error != 0) 822 return (error); 823 zfsvfs->z_norm = (int)val; 824 825 error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val); 826 if (error != 0) 827 return (error); 828 zfsvfs->z_utf8 = (val != 0); 829 830 error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val); 831 if (error != 0) 832 return (error); 833 zfsvfs->z_case = (uint_t)val; 834 835 error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val); 836 if (error != 0) 837 return (error); 838 zfsvfs->z_acl_type = (uint_t)val; 839 840 /* 841 * Fold case on file systems that are always or sometimes case 842 * insensitive. 843 */ 844 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 845 zfsvfs->z_case == ZFS_CASE_MIXED) 846 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 847 848 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 849 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 850 851 uint64_t sa_obj = 0; 852 if (zfsvfs->z_use_sa) { 853 /* should either have both of these objects or none */ 854 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, 855 &sa_obj); 856 if (error != 0) 857 return (error); 858 859 error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val); 860 if (error == 0 && val == ZFS_XATTR_SA) 861 zfsvfs->z_xattr_sa = B_TRUE; 862 } 863 864 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 865 &zfsvfs->z_attr_table); 866 if (error != 0) 867 return (error); 868 869 if (zfsvfs->z_version >= ZPL_VERSION_SA) 870 sa_register_update_callback(os, zfs_sa_upgrade); 871 872 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 873 &zfsvfs->z_root); 874 if (error != 0) 875 return (error); 876 ASSERT3U(zfsvfs->z_root, !=, 0); 877 878 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 879 &zfsvfs->z_unlinkedobj); 880 if (error != 0) 881 return (error); 882 883 error = zap_lookup(os, MASTER_NODE_OBJ, 884 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 885 8, 1, &zfsvfs->z_userquota_obj); 886 if (error == ENOENT) 887 zfsvfs->z_userquota_obj = 0; 888 else if (error != 0) 889 return (error); 890 891 error = zap_lookup(os, MASTER_NODE_OBJ, 892 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 893 8, 1, &zfsvfs->z_groupquota_obj); 894 if (error == ENOENT) 895 zfsvfs->z_groupquota_obj = 0; 896 else if (error != 0) 897 return (error); 898 899 error = zap_lookup(os, MASTER_NODE_OBJ, 900 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA], 901 8, 1, &zfsvfs->z_projectquota_obj); 902 if (error == ENOENT) 903 zfsvfs->z_projectquota_obj = 0; 904 else if (error != 0) 905 return (error); 906 907 error = zap_lookup(os, MASTER_NODE_OBJ, 908 zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA], 909 8, 1, &zfsvfs->z_userobjquota_obj); 910 if (error == ENOENT) 911 zfsvfs->z_userobjquota_obj = 0; 912 else if (error != 0) 913 return (error); 914 915 error = zap_lookup(os, MASTER_NODE_OBJ, 916 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA], 917 8, 1, &zfsvfs->z_groupobjquota_obj); 918 if (error == ENOENT) 919 zfsvfs->z_groupobjquota_obj = 0; 920 else if (error != 0) 921 return (error); 922 923 error = zap_lookup(os, MASTER_NODE_OBJ, 924 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA], 925 8, 1, &zfsvfs->z_projectobjquota_obj); 926 if (error == ENOENT) 927 zfsvfs->z_projectobjquota_obj = 0; 928 else if (error != 0) 929 return (error); 930 931 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 932 &zfsvfs->z_fuid_obj); 933 if (error == ENOENT) 934 zfsvfs->z_fuid_obj = 0; 935 else if (error != 0) 936 return (error); 937 938 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 939 &zfsvfs->z_shares_dir); 940 if (error == ENOENT) 941 zfsvfs->z_shares_dir = 0; 942 else if (error != 0) 943 return (error); 944 945 /* 946 * Only use the name cache if we are looking for a 947 * name on a file system that does not require normalization 948 * or case folding. We can also look there if we happen to be 949 * on a non-normalizing, mixed sensitivity file system IF we 950 * are looking for the exact name (which is always the case on 951 * FreeBSD). 952 */ 953 zfsvfs->z_use_namecache = !zfsvfs->z_norm || 954 ((zfsvfs->z_case == ZFS_CASE_MIXED) && 955 !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER)); 956 957 return (0); 958 } 959 960 taskq_t *zfsvfs_taskq; 961 962 static void 963 zfsvfs_task_unlinked_drain(void *context, int pending __unused) 964 { 965 966 zfs_unlinked_drain((zfsvfs_t *)context); 967 } 968 969 int 970 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp) 971 { 972 objset_t *os; 973 zfsvfs_t *zfsvfs; 974 int error; 975 boolean_t ro = (readonly || (strchr(osname, '@') != NULL)); 976 977 /* 978 * XXX: Fix struct statfs so this isn't necessary! 979 * 980 * The 'osname' is used as the filesystem's special node, which means 981 * it must fit in statfs.f_mntfromname, or else it can't be 982 * enumerated, so libzfs_mnttab_find() returns NULL, which causes 983 * 'zfs unmount' to think it's not mounted when it is. 984 */ 985 if (strlen(osname) >= MNAMELEN) 986 return (SET_ERROR(ENAMETOOLONG)); 987 988 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 989 990 error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs, 991 &os); 992 if (error != 0) { 993 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 994 return (error); 995 } 996 997 error = zfsvfs_create_impl(zfvp, zfsvfs, os); 998 999 return (error); 1000 } 1001 1002 1003 int 1004 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os) 1005 { 1006 int error; 1007 1008 zfsvfs->z_vfs = NULL; 1009 zfsvfs->z_parent = zfsvfs; 1010 1011 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 1012 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 1013 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 1014 offsetof(znode_t, z_link_node)); 1015 TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0, 1016 zfsvfs_task_unlinked_drain, zfsvfs); 1017 ZFS_TEARDOWN_INIT(zfsvfs); 1018 ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs); 1019 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 1020 for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1021 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 1022 1023 error = zfsvfs_init(zfsvfs, os); 1024 if (error != 0) { 1025 dmu_objset_disown(os, B_TRUE, zfsvfs); 1026 *zfvp = NULL; 1027 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1028 return (error); 1029 } 1030 1031 *zfvp = zfsvfs; 1032 return (0); 1033 } 1034 1035 static int 1036 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 1037 { 1038 int error; 1039 1040 /* 1041 * Check for a bad on-disk format version now since we 1042 * lied about owning the dataset readonly before. 1043 */ 1044 if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) && 1045 dmu_objset_incompatible_encryption_version(zfsvfs->z_os)) 1046 return (SET_ERROR(EROFS)); 1047 1048 error = zfs_register_callbacks(zfsvfs->z_vfs); 1049 if (error) 1050 return (error); 1051 1052 /* 1053 * If we are not mounting (ie: online recv), then we don't 1054 * have to worry about replaying the log as we blocked all 1055 * operations out since we closed the ZIL. 1056 */ 1057 if (mounting) { 1058 boolean_t readonly; 1059 1060 ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); 1061 error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); 1062 if (error) 1063 return (error); 1064 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, 1065 &zfsvfs->z_kstat.dk_zil_sums); 1066 1067 /* 1068 * During replay we remove the read only flag to 1069 * allow replays to succeed. 1070 */ 1071 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 1072 if (readonly != 0) { 1073 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 1074 } else { 1075 dsl_dir_t *dd; 1076 zap_stats_t zs; 1077 1078 if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj, 1079 &zs) == 0) { 1080 dataset_kstats_update_nunlinks_kstat( 1081 &zfsvfs->z_kstat, zs.zs_num_entries); 1082 dprintf_ds(zfsvfs->z_os->os_dsl_dataset, 1083 "num_entries in unlinked set: %llu", 1084 (u_longlong_t)zs.zs_num_entries); 1085 } 1086 1087 zfs_unlinked_drain(zfsvfs); 1088 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1089 dd->dd_activity_cancelled = B_FALSE; 1090 } 1091 1092 /* 1093 * Parse and replay the intent log. 1094 * 1095 * Because of ziltest, this must be done after 1096 * zfs_unlinked_drain(). (Further note: ziltest 1097 * doesn't use readonly mounts, where 1098 * zfs_unlinked_drain() isn't called.) This is because 1099 * ziltest causes spa_sync() to think it's committed, 1100 * but actually it is not, so the intent log contains 1101 * many txg's worth of changes. 1102 * 1103 * In particular, if object N is in the unlinked set in 1104 * the last txg to actually sync, then it could be 1105 * actually freed in a later txg and then reallocated 1106 * in a yet later txg. This would write a "create 1107 * object N" record to the intent log. Normally, this 1108 * would be fine because the spa_sync() would have 1109 * written out the fact that object N is free, before 1110 * we could write the "create object N" intent log 1111 * record. 1112 * 1113 * But when we are in ziltest mode, we advance the "open 1114 * txg" without actually spa_sync()-ing the changes to 1115 * disk. So we would see that object N is still 1116 * allocated and in the unlinked set, and there is an 1117 * intent log record saying to allocate it. 1118 */ 1119 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) { 1120 if (zil_replay_disable) { 1121 zil_destroy(zfsvfs->z_log, B_FALSE); 1122 } else { 1123 boolean_t use_nc = zfsvfs->z_use_namecache; 1124 zfsvfs->z_use_namecache = B_FALSE; 1125 zfsvfs->z_replay = B_TRUE; 1126 zil_replay(zfsvfs->z_os, zfsvfs, 1127 zfs_replay_vector); 1128 zfsvfs->z_replay = B_FALSE; 1129 zfsvfs->z_use_namecache = use_nc; 1130 } 1131 } 1132 1133 /* restore readonly bit */ 1134 if (readonly != 0) 1135 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 1136 } else { 1137 ASSERT3P(zfsvfs->z_kstat.dk_kstats, !=, NULL); 1138 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, 1139 &zfsvfs->z_kstat.dk_zil_sums); 1140 } 1141 1142 /* 1143 * Set the objset user_ptr to track its zfsvfs. 1144 */ 1145 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1146 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1147 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1148 1149 return (0); 1150 } 1151 1152 void 1153 zfsvfs_free(zfsvfs_t *zfsvfs) 1154 { 1155 int i; 1156 1157 zfs_fuid_destroy(zfsvfs); 1158 1159 mutex_destroy(&zfsvfs->z_znodes_lock); 1160 mutex_destroy(&zfsvfs->z_lock); 1161 ASSERT3U(zfsvfs->z_nr_znodes, ==, 0); 1162 list_destroy(&zfsvfs->z_all_znodes); 1163 ZFS_TEARDOWN_DESTROY(zfsvfs); 1164 ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs); 1165 rw_destroy(&zfsvfs->z_fuid_lock); 1166 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1167 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1168 dataset_kstats_destroy(&zfsvfs->z_kstat); 1169 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1170 } 1171 1172 static void 1173 zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 1174 { 1175 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 1176 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 1177 } 1178 1179 static int 1180 zfs_domount(vfs_t *vfsp, char *osname) 1181 { 1182 uint64_t recordsize, fsid_guid; 1183 int error = 0; 1184 zfsvfs_t *zfsvfs; 1185 1186 ASSERT3P(vfsp, !=, NULL); 1187 ASSERT3P(osname, !=, NULL); 1188 1189 error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs); 1190 if (error) 1191 return (error); 1192 zfsvfs->z_vfs = vfsp; 1193 1194 if ((error = dsl_prop_get_integer(osname, 1195 "recordsize", &recordsize, NULL))) 1196 goto out; 1197 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 1198 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 1199 1200 vfsp->vfs_data = zfsvfs; 1201 vfsp->mnt_flag |= MNT_LOCAL; 1202 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1203 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 1204 vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; 1205 /* 1206 * This can cause a loss of coherence between ARC and page cache 1207 * on ZoF - unclear if the problem is in FreeBSD or ZoF 1208 */ 1209 vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */ 1210 vfsp->mnt_kern_flag |= MNTK_NOMSYNC; 1211 vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG; 1212 1213 #if defined(_KERNEL) && !defined(KMEM_DEBUG) 1214 vfsp->mnt_kern_flag |= MNTK_FPLOOKUP; 1215 #endif 1216 /* 1217 * The fsid is 64 bits, composed of an 8-bit fs type, which 1218 * separates our fsid from any other filesystem types, and a 1219 * 56-bit objset unique ID. The objset unique ID is unique to 1220 * all objsets open on this system, provided by unique_create(). 1221 * The 8-bit fs type must be put in the low bits of fsid[1] 1222 * because that's where other Solaris filesystems put it. 1223 */ 1224 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1225 ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0); 1226 vfsp->vfs_fsid.val[0] = fsid_guid; 1227 vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) | 1228 (vfsp->mnt_vfc->vfc_typenum & 0xFF); 1229 1230 /* 1231 * Set features for file system. 1232 */ 1233 zfs_set_fuid_feature(zfsvfs); 1234 1235 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1236 uint64_t pval; 1237 1238 atime_changed_cb(zfsvfs, B_FALSE); 1239 readonly_changed_cb(zfsvfs, B_TRUE); 1240 if ((error = dsl_prop_get_integer(osname, 1241 "xattr", &pval, NULL))) 1242 goto out; 1243 xattr_changed_cb(zfsvfs, pval); 1244 if ((error = dsl_prop_get_integer(osname, 1245 "acltype", &pval, NULL))) 1246 goto out; 1247 acl_type_changed_cb(zfsvfs, pval); 1248 zfsvfs->z_issnap = B_TRUE; 1249 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; 1250 1251 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1252 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1253 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1254 } else { 1255 if ((error = zfsvfs_setup(zfsvfs, B_TRUE))) 1256 goto out; 1257 } 1258 1259 vfs_mountedfrom(vfsp, osname); 1260 1261 if (!zfsvfs->z_issnap) 1262 zfsctl_create(zfsvfs); 1263 out: 1264 if (error) { 1265 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); 1266 zfsvfs_free(zfsvfs); 1267 } else { 1268 atomic_inc_32(&zfs_active_fs_count); 1269 } 1270 1271 return (error); 1272 } 1273 1274 static void 1275 zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1276 { 1277 objset_t *os = zfsvfs->z_os; 1278 1279 if (!dmu_objset_is_snapshot(os)) 1280 dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs); 1281 } 1282 1283 static int 1284 getpoolname(const char *osname, char *poolname) 1285 { 1286 char *p; 1287 1288 p = strchr(osname, '/'); 1289 if (p == NULL) { 1290 if (strlen(osname) >= MAXNAMELEN) 1291 return (ENAMETOOLONG); 1292 (void) strcpy(poolname, osname); 1293 } else { 1294 if (p - osname >= MAXNAMELEN) 1295 return (ENAMETOOLONG); 1296 (void) strlcpy(poolname, osname, p - osname + 1); 1297 } 1298 return (0); 1299 } 1300 1301 static void 1302 fetch_osname_options(char *name, bool *checkpointrewind) 1303 { 1304 1305 if (name[0] == '!') { 1306 *checkpointrewind = true; 1307 memmove(name, name + 1, strlen(name)); 1308 } else { 1309 *checkpointrewind = false; 1310 } 1311 } 1312 1313 static int 1314 zfs_mount(vfs_t *vfsp) 1315 { 1316 kthread_t *td = curthread; 1317 vnode_t *mvp = vfsp->mnt_vnodecovered; 1318 cred_t *cr = td->td_ucred; 1319 char *osname; 1320 int error = 0; 1321 int canwrite; 1322 bool checkpointrewind, isctlsnap = false; 1323 1324 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 1325 return (SET_ERROR(EINVAL)); 1326 1327 /* 1328 * If full-owner-access is enabled and delegated administration is 1329 * turned on, we must set nosuid. 1330 */ 1331 if (zfs_super_owner && 1332 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 1333 secpolicy_fs_mount_clearopts(cr, vfsp); 1334 } 1335 1336 fetch_osname_options(osname, &checkpointrewind); 1337 isctlsnap = (mvp != NULL && zfsctl_is_node(mvp) && 1338 strchr(osname, '@') != NULL); 1339 1340 /* 1341 * Check for mount privilege? 1342 * 1343 * If we don't have privilege then see if 1344 * we have local permission to allow it 1345 */ 1346 error = secpolicy_fs_mount(cr, mvp, vfsp); 1347 if (error && isctlsnap) { 1348 secpolicy_fs_mount_clearopts(cr, vfsp); 1349 } else if (error) { 1350 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) 1351 goto out; 1352 1353 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 1354 vattr_t vattr; 1355 1356 /* 1357 * Make sure user is the owner of the mount point 1358 * or has sufficient privileges. 1359 */ 1360 1361 vattr.va_mask = AT_UID; 1362 1363 vn_lock(mvp, LK_SHARED | LK_RETRY); 1364 if (VOP_GETATTR(mvp, &vattr, cr)) { 1365 VOP_UNLOCK1(mvp); 1366 goto out; 1367 } 1368 1369 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 1370 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 1371 VOP_UNLOCK1(mvp); 1372 goto out; 1373 } 1374 VOP_UNLOCK1(mvp); 1375 } 1376 1377 secpolicy_fs_mount_clearopts(cr, vfsp); 1378 } 1379 1380 /* 1381 * Refuse to mount a filesystem if we are in a local zone and the 1382 * dataset is not visible. 1383 */ 1384 if (!INGLOBALZONE(curproc) && 1385 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1386 boolean_t mount_snapshot = B_FALSE; 1387 1388 /* 1389 * Snapshots may be mounted in .zfs for unjailed datasets 1390 * if allowed by the jail param zfs.mount_snapshot. 1391 */ 1392 if (isctlsnap) { 1393 struct prison *pr; 1394 struct zfs_jailparam *zjp; 1395 1396 pr = curthread->td_ucred->cr_prison; 1397 mtx_lock(&pr->pr_mtx); 1398 zjp = osd_jail_get(pr, zfs_jailparam_slot); 1399 mtx_unlock(&pr->pr_mtx); 1400 if (zjp && zjp->mount_snapshot) 1401 mount_snapshot = B_TRUE; 1402 } 1403 if (!mount_snapshot) { 1404 error = SET_ERROR(EPERM); 1405 goto out; 1406 } 1407 } 1408 1409 vfsp->vfs_flag |= MNT_NFS4ACLS; 1410 1411 /* 1412 * When doing a remount, we simply refresh our temporary properties 1413 * according to those options set in the current VFS options. 1414 */ 1415 if (vfsp->vfs_flag & MS_REMOUNT) { 1416 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1417 1418 /* 1419 * Refresh mount options with z_teardown_lock blocking I/O while 1420 * the filesystem is in an inconsistent state. 1421 * The lock also serializes this code with filesystem 1422 * manipulations between entry to zfs_suspend_fs() and return 1423 * from zfs_resume_fs(). 1424 */ 1425 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1426 zfs_unregister_callbacks(zfsvfs); 1427 error = zfs_register_callbacks(vfsp); 1428 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1429 goto out; 1430 } 1431 1432 /* Initial root mount: try hard to import the requested root pool. */ 1433 if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 && 1434 (vfsp->vfs_flag & MNT_UPDATE) == 0) { 1435 char pname[MAXNAMELEN]; 1436 1437 error = getpoolname(osname, pname); 1438 if (error == 0) 1439 error = spa_import_rootpool(pname, checkpointrewind); 1440 if (error) 1441 goto out; 1442 } 1443 DROP_GIANT(); 1444 error = zfs_domount(vfsp, osname); 1445 PICKUP_GIANT(); 1446 1447 out: 1448 return (error); 1449 } 1450 1451 static int 1452 zfs_statfs(vfs_t *vfsp, struct statfs *statp) 1453 { 1454 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1455 uint64_t refdbytes, availbytes, usedobjs, availobjs; 1456 int error; 1457 1458 statp->f_version = STATFS_VERSION; 1459 1460 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1461 return (error); 1462 1463 dmu_objset_space(zfsvfs->z_os, 1464 &refdbytes, &availbytes, &usedobjs, &availobjs); 1465 1466 /* 1467 * The underlying storage pool actually uses multiple block sizes. 1468 * We report the fragsize as the smallest block size we support, 1469 * and we report our blocksize as the filesystem's maximum blocksize. 1470 */ 1471 statp->f_bsize = SPA_MINBLOCKSIZE; 1472 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 1473 1474 /* 1475 * The following report "total" blocks of various kinds in the 1476 * file system, but reported in terms of f_frsize - the 1477 * "fragment" size. 1478 */ 1479 1480 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1481 statp->f_bfree = availbytes / statp->f_bsize; 1482 statp->f_bavail = statp->f_bfree; /* no root reservation */ 1483 1484 /* 1485 * statvfs() should really be called statufs(), because it assumes 1486 * static metadata. ZFS doesn't preallocate files, so the best 1487 * we can do is report the max that could possibly fit in f_files, 1488 * and that minus the number actually used in f_ffree. 1489 * For f_ffree, report the smaller of the number of object available 1490 * and the number of blocks (each object will take at least a block). 1491 */ 1492 statp->f_ffree = MIN(availobjs, statp->f_bfree); 1493 statp->f_files = statp->f_ffree + usedobjs; 1494 1495 /* 1496 * We're a zfs filesystem. 1497 */ 1498 strlcpy(statp->f_fstypename, "zfs", 1499 sizeof (statp->f_fstypename)); 1500 1501 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 1502 sizeof (statp->f_mntfromname)); 1503 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 1504 sizeof (statp->f_mntonname)); 1505 1506 statp->f_namemax = MAXNAMELEN - 1; 1507 1508 zfs_exit(zfsvfs, FTAG); 1509 return (0); 1510 } 1511 1512 static int 1513 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 1514 { 1515 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1516 znode_t *rootzp; 1517 int error; 1518 1519 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1520 return (error); 1521 1522 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1523 if (error == 0) 1524 *vpp = ZTOV(rootzp); 1525 1526 zfs_exit(zfsvfs, FTAG); 1527 1528 if (error == 0) { 1529 error = vn_lock(*vpp, flags); 1530 if (error != 0) { 1531 VN_RELE(*vpp); 1532 *vpp = NULL; 1533 } 1534 } 1535 return (error); 1536 } 1537 1538 /* 1539 * Teardown the zfsvfs::z_os. 1540 * 1541 * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock' 1542 * and 'z_teardown_inactive_lock' held. 1543 */ 1544 static int 1545 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1546 { 1547 znode_t *zp; 1548 dsl_dir_t *dd; 1549 1550 /* 1551 * If someone has not already unmounted this file system, 1552 * drain the zrele_taskq to ensure all active references to the 1553 * zfsvfs_t have been handled only then can it be safely destroyed. 1554 */ 1555 if (zfsvfs->z_os) { 1556 /* 1557 * If we're unmounting we have to wait for the list to 1558 * drain completely. 1559 * 1560 * If we're not unmounting there's no guarantee the list 1561 * will drain completely, but zreles run from the taskq 1562 * may add the parents of dir-based xattrs to the taskq 1563 * so we want to wait for these. 1564 * 1565 * We can safely read z_nr_znodes without locking because the 1566 * VFS has already blocked operations which add to the 1567 * z_all_znodes list and thus increment z_nr_znodes. 1568 */ 1569 int round = 0; 1570 while (zfsvfs->z_nr_znodes > 0) { 1571 taskq_wait_outstanding(dsl_pool_zrele_taskq( 1572 dmu_objset_pool(zfsvfs->z_os)), 0); 1573 if (++round > 1 && !unmounting) 1574 break; 1575 } 1576 } 1577 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1578 1579 if (!unmounting) { 1580 /* 1581 * We purge the parent filesystem's vfsp as the parent 1582 * filesystem and all of its snapshots have their vnode's 1583 * v_vfsp set to the parent's filesystem's vfsp. Note, 1584 * 'z_parent' is self referential for non-snapshots. 1585 */ 1586 #ifdef FREEBSD_NAMECACHE 1587 #if __FreeBSD_version >= 1300117 1588 cache_purgevfs(zfsvfs->z_parent->z_vfs); 1589 #else 1590 cache_purgevfs(zfsvfs->z_parent->z_vfs, true); 1591 #endif 1592 #endif 1593 } 1594 1595 /* 1596 * Close the zil. NB: Can't close the zil while zfs_inactive 1597 * threads are blocked as zil_close can call zfs_inactive. 1598 */ 1599 if (zfsvfs->z_log) { 1600 zil_close(zfsvfs->z_log); 1601 zfsvfs->z_log = NULL; 1602 } 1603 1604 ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs); 1605 1606 /* 1607 * If we are not unmounting (ie: online recv) and someone already 1608 * unmounted this file system while we were doing the switcheroo, 1609 * or a reopen of z_os failed then just bail out now. 1610 */ 1611 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1612 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1613 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1614 return (SET_ERROR(EIO)); 1615 } 1616 1617 /* 1618 * At this point there are no vops active, and any new vops will 1619 * fail with EIO since we have z_teardown_lock for writer (only 1620 * relevant for forced unmount). 1621 * 1622 * Release all holds on dbufs. 1623 */ 1624 mutex_enter(&zfsvfs->z_znodes_lock); 1625 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1626 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1627 if (zp->z_sa_hdl != NULL) { 1628 zfs_znode_dmu_fini(zp); 1629 } 1630 } 1631 mutex_exit(&zfsvfs->z_znodes_lock); 1632 1633 /* 1634 * If we are unmounting, set the unmounted flag and let new vops 1635 * unblock. zfs_inactive will have the unmounted behavior, and all 1636 * other vops will fail with EIO. 1637 */ 1638 if (unmounting) { 1639 zfsvfs->z_unmounted = B_TRUE; 1640 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1641 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1642 } 1643 1644 /* 1645 * z_os will be NULL if there was an error in attempting to reopen 1646 * zfsvfs, so just return as the properties had already been 1647 * unregistered and cached data had been evicted before. 1648 */ 1649 if (zfsvfs->z_os == NULL) 1650 return (0); 1651 1652 /* 1653 * Unregister properties. 1654 */ 1655 zfs_unregister_callbacks(zfsvfs); 1656 1657 /* 1658 * Evict cached data 1659 */ 1660 if (!zfs_is_readonly(zfsvfs)) 1661 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1662 dmu_objset_evict_dbufs(zfsvfs->z_os); 1663 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1664 dsl_dir_cancel_waiters(dd); 1665 1666 return (0); 1667 } 1668 1669 static int 1670 zfs_umount(vfs_t *vfsp, int fflag) 1671 { 1672 kthread_t *td = curthread; 1673 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1674 objset_t *os; 1675 cred_t *cr = td->td_ucred; 1676 int ret; 1677 1678 ret = secpolicy_fs_unmount(cr, vfsp); 1679 if (ret) { 1680 if (dsl_deleg_access((char *)vfsp->vfs_resource, 1681 ZFS_DELEG_PERM_MOUNT, cr)) 1682 return (ret); 1683 } 1684 1685 /* 1686 * Unmount any snapshots mounted under .zfs before unmounting the 1687 * dataset itself. 1688 */ 1689 if (zfsvfs->z_ctldir != NULL) { 1690 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1691 return (ret); 1692 } 1693 1694 if (fflag & MS_FORCE) { 1695 /* 1696 * Mark file system as unmounted before calling 1697 * vflush(FORCECLOSE). This way we ensure no future vnops 1698 * will be called and risk operating on DOOMED vnodes. 1699 */ 1700 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1701 zfsvfs->z_unmounted = B_TRUE; 1702 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1703 } 1704 1705 /* 1706 * Flush all the files. 1707 */ 1708 ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 1709 if (ret != 0) 1710 return (ret); 1711 while (taskqueue_cancel(zfsvfs_taskq->tq_queue, 1712 &zfsvfs->z_unlinked_drain_task, NULL) != 0) 1713 taskqueue_drain(zfsvfs_taskq->tq_queue, 1714 &zfsvfs->z_unlinked_drain_task); 1715 1716 VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE)); 1717 os = zfsvfs->z_os; 1718 1719 /* 1720 * z_os will be NULL if there was an error in 1721 * attempting to reopen zfsvfs. 1722 */ 1723 if (os != NULL) { 1724 /* 1725 * Unset the objset user_ptr. 1726 */ 1727 mutex_enter(&os->os_user_ptr_lock); 1728 dmu_objset_set_user(os, NULL); 1729 mutex_exit(&os->os_user_ptr_lock); 1730 1731 /* 1732 * Finally release the objset 1733 */ 1734 dmu_objset_disown(os, B_TRUE, zfsvfs); 1735 } 1736 1737 /* 1738 * We can now safely destroy the '.zfs' directory node. 1739 */ 1740 if (zfsvfs->z_ctldir != NULL) 1741 zfsctl_destroy(zfsvfs); 1742 zfs_freevfs(vfsp); 1743 1744 return (0); 1745 } 1746 1747 static int 1748 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1749 { 1750 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1751 znode_t *zp; 1752 int err; 1753 1754 /* 1755 * zfs_zget() can't operate on virtual entries like .zfs/ or 1756 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 1757 * This will make NFS to switch to LOOKUP instead of using VGET. 1758 */ 1759 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR || 1760 (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir)) 1761 return (EOPNOTSUPP); 1762 1763 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1764 return (err); 1765 err = zfs_zget(zfsvfs, ino, &zp); 1766 if (err == 0 && zp->z_unlinked) { 1767 vrele(ZTOV(zp)); 1768 err = EINVAL; 1769 } 1770 if (err == 0) 1771 *vpp = ZTOV(zp); 1772 zfs_exit(zfsvfs, FTAG); 1773 if (err == 0) { 1774 err = vn_lock(*vpp, flags); 1775 if (err != 0) 1776 vrele(*vpp); 1777 } 1778 if (err != 0) 1779 *vpp = NULL; 1780 return (err); 1781 } 1782 1783 static int 1784 #if __FreeBSD_version >= 1300098 1785 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 1786 struct ucred **credanonp, int *numsecflavors, int *secflavors) 1787 #else 1788 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1789 struct ucred **credanonp, int *numsecflavors, int **secflavors) 1790 #endif 1791 { 1792 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1793 1794 /* 1795 * If this is regular file system vfsp is the same as 1796 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1797 * zfsvfs->z_parent->z_vfs represents parent file system 1798 * which we have to use here, because only this file system 1799 * has mnt_export configured. 1800 */ 1801 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1802 credanonp, numsecflavors, secflavors)); 1803 } 1804 1805 _Static_assert(sizeof (struct fid) >= SHORT_FID_LEN, 1806 "struct fid bigger than SHORT_FID_LEN"); 1807 _Static_assert(sizeof (struct fid) >= LONG_FID_LEN, 1808 "struct fid bigger than LONG_FID_LEN"); 1809 1810 static int 1811 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) 1812 { 1813 struct componentname cn; 1814 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1815 znode_t *zp; 1816 vnode_t *dvp; 1817 uint64_t object = 0; 1818 uint64_t fid_gen = 0; 1819 uint64_t setgen = 0; 1820 uint64_t gen_mask; 1821 uint64_t zp_gen; 1822 int i, err; 1823 1824 *vpp = NULL; 1825 1826 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1827 return (err); 1828 1829 /* 1830 * On FreeBSD we can get snapshot's mount point or its parent file 1831 * system mount point depending if snapshot is already mounted or not. 1832 */ 1833 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1834 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1835 uint64_t objsetid = 0; 1836 1837 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1838 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1839 1840 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1841 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1842 1843 zfs_exit(zfsvfs, FTAG); 1844 1845 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1846 if (err) 1847 return (SET_ERROR(EINVAL)); 1848 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1849 return (err); 1850 } 1851 1852 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1853 zfid_short_t *zfid = (zfid_short_t *)fidp; 1854 1855 for (i = 0; i < sizeof (zfid->zf_object); i++) 1856 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1857 1858 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1859 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1860 } else { 1861 zfs_exit(zfsvfs, FTAG); 1862 return (SET_ERROR(EINVAL)); 1863 } 1864 1865 if (fidp->fid_len == LONG_FID_LEN && setgen != 0) { 1866 zfs_exit(zfsvfs, FTAG); 1867 dprintf("snapdir fid: fid_gen (%llu) and setgen (%llu)\n", 1868 (u_longlong_t)fid_gen, (u_longlong_t)setgen); 1869 return (SET_ERROR(EINVAL)); 1870 } 1871 1872 /* 1873 * A zero fid_gen means we are in .zfs or the .zfs/snapshot 1874 * directory tree. If the object == zfsvfs->z_shares_dir, then 1875 * we are in the .zfs/shares directory tree. 1876 */ 1877 if ((fid_gen == 0 && 1878 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) || 1879 (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) { 1880 zfs_exit(zfsvfs, FTAG); 1881 VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp)); 1882 if (object == ZFSCTL_INO_SNAPDIR) { 1883 cn.cn_nameptr = "snapshot"; 1884 cn.cn_namelen = strlen(cn.cn_nameptr); 1885 cn.cn_nameiop = LOOKUP; 1886 cn.cn_flags = ISLASTCN | LOCKLEAF; 1887 cn.cn_lkflags = flags; 1888 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1889 vput(dvp); 1890 } else if (object == zfsvfs->z_shares_dir) { 1891 /* 1892 * XXX This branch must not be taken, 1893 * if it is, then the lookup below will 1894 * explode. 1895 */ 1896 cn.cn_nameptr = "shares"; 1897 cn.cn_namelen = strlen(cn.cn_nameptr); 1898 cn.cn_nameiop = LOOKUP; 1899 cn.cn_flags = ISLASTCN; 1900 cn.cn_lkflags = flags; 1901 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1902 vput(dvp); 1903 } else { 1904 *vpp = dvp; 1905 } 1906 return (err); 1907 } 1908 1909 gen_mask = -1ULL >> (64 - 8 * i); 1910 1911 dprintf("getting %llu [%llu mask %llx]\n", (u_longlong_t)object, 1912 (u_longlong_t)fid_gen, 1913 (u_longlong_t)gen_mask); 1914 if ((err = zfs_zget(zfsvfs, object, &zp))) { 1915 zfs_exit(zfsvfs, FTAG); 1916 return (err); 1917 } 1918 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, 1919 sizeof (uint64_t)); 1920 zp_gen = zp_gen & gen_mask; 1921 if (zp_gen == 0) 1922 zp_gen = 1; 1923 if (zp->z_unlinked || zp_gen != fid_gen) { 1924 dprintf("znode gen (%llu) != fid gen (%llu)\n", 1925 (u_longlong_t)zp_gen, (u_longlong_t)fid_gen); 1926 vrele(ZTOV(zp)); 1927 zfs_exit(zfsvfs, FTAG); 1928 return (SET_ERROR(EINVAL)); 1929 } 1930 1931 *vpp = ZTOV(zp); 1932 zfs_exit(zfsvfs, FTAG); 1933 err = vn_lock(*vpp, flags); 1934 if (err == 0) 1935 vnode_create_vobject(*vpp, zp->z_size, curthread); 1936 else 1937 *vpp = NULL; 1938 return (err); 1939 } 1940 1941 /* 1942 * Block out VOPs and close zfsvfs_t::z_os 1943 * 1944 * Note, if successful, then we return with the 'z_teardown_lock' and 1945 * 'z_teardown_inactive_lock' write held. We leave ownership of the underlying 1946 * dataset and objset intact so that they can be atomically handed off during 1947 * a subsequent rollback or recv operation and the resume thereafter. 1948 */ 1949 int 1950 zfs_suspend_fs(zfsvfs_t *zfsvfs) 1951 { 1952 int error; 1953 1954 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1955 return (error); 1956 1957 return (0); 1958 } 1959 1960 /* 1961 * Rebuild SA and release VOPs. Note that ownership of the underlying dataset 1962 * is an invariant across any of the operations that can be performed while the 1963 * filesystem was suspended. Whether it succeeded or failed, the preconditions 1964 * are the same: the relevant objset and associated dataset are owned by 1965 * zfsvfs, held, and long held on entry. 1966 */ 1967 int 1968 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 1969 { 1970 int err; 1971 znode_t *zp; 1972 1973 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 1974 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 1975 1976 /* 1977 * We already own this, so just update the objset_t, as the one we 1978 * had before may have been evicted. 1979 */ 1980 objset_t *os; 1981 VERIFY3P(ds->ds_owner, ==, zfsvfs); 1982 VERIFY(dsl_dataset_long_held(ds)); 1983 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 1984 dsl_pool_config_enter(dp, FTAG); 1985 VERIFY0(dmu_objset_from_ds(ds, &os)); 1986 dsl_pool_config_exit(dp, FTAG); 1987 1988 err = zfsvfs_init(zfsvfs, os); 1989 if (err != 0) 1990 goto bail; 1991 1992 ds->ds_dir->dd_activity_cancelled = B_FALSE; 1993 VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE)); 1994 1995 zfs_set_fuid_feature(zfsvfs); 1996 1997 /* 1998 * Attempt to re-establish all the active znodes with 1999 * their dbufs. If a zfs_rezget() fails, then we'll let 2000 * any potential callers discover that via zfs_enter_verify_zp 2001 * when they try to use their znode. 2002 */ 2003 mutex_enter(&zfsvfs->z_znodes_lock); 2004 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 2005 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 2006 (void) zfs_rezget(zp); 2007 } 2008 mutex_exit(&zfsvfs->z_znodes_lock); 2009 2010 bail: 2011 /* release the VOPs */ 2012 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 2013 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 2014 2015 if (err) { 2016 /* 2017 * Since we couldn't setup the sa framework, try to force 2018 * unmount this file system. 2019 */ 2020 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) { 2021 vfs_ref(zfsvfs->z_vfs); 2022 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 2023 } 2024 } 2025 return (err); 2026 } 2027 2028 static void 2029 zfs_freevfs(vfs_t *vfsp) 2030 { 2031 zfsvfs_t *zfsvfs = vfsp->vfs_data; 2032 2033 zfsvfs_free(zfsvfs); 2034 2035 atomic_dec_32(&zfs_active_fs_count); 2036 } 2037 2038 #ifdef __i386__ 2039 static int desiredvnodes_backup; 2040 #include <sys/vmmeter.h> 2041 2042 2043 #include <vm/vm_page.h> 2044 #include <vm/vm_object.h> 2045 #include <vm/vm_kern.h> 2046 #include <vm/vm_map.h> 2047 #endif 2048 2049 static void 2050 zfs_vnodes_adjust(void) 2051 { 2052 #ifdef __i386__ 2053 int newdesiredvnodes; 2054 2055 desiredvnodes_backup = desiredvnodes; 2056 2057 /* 2058 * We calculate newdesiredvnodes the same way it is done in 2059 * vntblinit(). If it is equal to desiredvnodes, it means that 2060 * it wasn't tuned by the administrator and we can tune it down. 2061 */ 2062 newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 * 2063 vm_kmem_size / (5 * (sizeof (struct vm_object) + 2064 sizeof (struct vnode)))); 2065 if (newdesiredvnodes == desiredvnodes) 2066 desiredvnodes = (3 * newdesiredvnodes) / 4; 2067 #endif 2068 } 2069 2070 static void 2071 zfs_vnodes_adjust_back(void) 2072 { 2073 2074 #ifdef __i386__ 2075 desiredvnodes = desiredvnodes_backup; 2076 #endif 2077 } 2078 2079 void 2080 zfs_init(void) 2081 { 2082 2083 printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n"); 2084 2085 /* 2086 * Initialize .zfs directory structures 2087 */ 2088 zfsctl_init(); 2089 2090 /* 2091 * Initialize znode cache, vnode ops, etc... 2092 */ 2093 zfs_znode_init(); 2094 2095 /* 2096 * Reduce number of vnodes. Originally number of vnodes is calculated 2097 * with UFS inode in mind. We reduce it here, because it's too big for 2098 * ZFS/i386. 2099 */ 2100 zfs_vnodes_adjust(); 2101 2102 dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info); 2103 2104 zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); 2105 } 2106 2107 void 2108 zfs_fini(void) 2109 { 2110 taskq_destroy(zfsvfs_taskq); 2111 zfsctl_fini(); 2112 zfs_znode_fini(); 2113 zfs_vnodes_adjust_back(); 2114 } 2115 2116 int 2117 zfs_busy(void) 2118 { 2119 return (zfs_active_fs_count != 0); 2120 } 2121 2122 /* 2123 * Release VOPs and unmount a suspended filesystem. 2124 */ 2125 int 2126 zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 2127 { 2128 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 2129 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 2130 2131 /* 2132 * We already own this, so just hold and rele it to update the 2133 * objset_t, as the one we had before may have been evicted. 2134 */ 2135 objset_t *os; 2136 VERIFY3P(ds->ds_owner, ==, zfsvfs); 2137 VERIFY(dsl_dataset_long_held(ds)); 2138 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 2139 dsl_pool_config_enter(dp, FTAG); 2140 VERIFY0(dmu_objset_from_ds(ds, &os)); 2141 dsl_pool_config_exit(dp, FTAG); 2142 zfsvfs->z_os = os; 2143 2144 /* release the VOPs */ 2145 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 2146 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 2147 2148 /* 2149 * Try to force unmount this file system. 2150 */ 2151 (void) zfs_umount(zfsvfs->z_vfs, 0); 2152 zfsvfs->z_unmounted = B_TRUE; 2153 return (0); 2154 } 2155 2156 int 2157 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 2158 { 2159 int error; 2160 objset_t *os = zfsvfs->z_os; 2161 dmu_tx_t *tx; 2162 2163 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 2164 return (SET_ERROR(EINVAL)); 2165 2166 if (newvers < zfsvfs->z_version) 2167 return (SET_ERROR(EINVAL)); 2168 2169 if (zfs_spa_version_map(newvers) > 2170 spa_version(dmu_objset_spa(zfsvfs->z_os))) 2171 return (SET_ERROR(ENOTSUP)); 2172 2173 tx = dmu_tx_create(os); 2174 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 2175 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2176 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 2177 ZFS_SA_ATTRS); 2178 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2179 } 2180 error = dmu_tx_assign(tx, TXG_WAIT); 2181 if (error) { 2182 dmu_tx_abort(tx); 2183 return (error); 2184 } 2185 2186 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 2187 8, 1, &newvers, tx); 2188 2189 if (error) { 2190 dmu_tx_commit(tx); 2191 return (error); 2192 } 2193 2194 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2195 uint64_t sa_obj; 2196 2197 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, 2198 SPA_VERSION_SA); 2199 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 2200 DMU_OT_NONE, 0, tx); 2201 2202 error = zap_add(os, MASTER_NODE_OBJ, 2203 ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 2204 ASSERT0(error); 2205 2206 VERIFY0(sa_set_sa_object(os, sa_obj)); 2207 sa_register_update_callback(os, zfs_sa_upgrade); 2208 } 2209 2210 spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, 2211 "from %ju to %ju", (uintmax_t)zfsvfs->z_version, 2212 (uintmax_t)newvers); 2213 dmu_tx_commit(tx); 2214 2215 zfsvfs->z_version = newvers; 2216 os->os_version = newvers; 2217 2218 zfs_set_fuid_feature(zfsvfs); 2219 2220 return (0); 2221 } 2222 2223 /* 2224 * Return true if the corresponding vfs's unmounted flag is set. 2225 * Otherwise return false. 2226 * If this function returns true we know VFS unmount has been initiated. 2227 */ 2228 boolean_t 2229 zfs_get_vfs_flag_unmounted(objset_t *os) 2230 { 2231 zfsvfs_t *zfvp; 2232 boolean_t unmounted = B_FALSE; 2233 2234 ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS); 2235 2236 mutex_enter(&os->os_user_ptr_lock); 2237 zfvp = dmu_objset_get_user(os); 2238 if (zfvp != NULL && zfvp->z_vfs != NULL && 2239 (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT)) 2240 unmounted = B_TRUE; 2241 mutex_exit(&os->os_user_ptr_lock); 2242 2243 return (unmounted); 2244 } 2245 2246 #ifdef _KERNEL 2247 void 2248 zfsvfs_update_fromname(const char *oldname, const char *newname) 2249 { 2250 char tmpbuf[MAXPATHLEN]; 2251 struct mount *mp; 2252 char *fromname; 2253 size_t oldlen; 2254 2255 oldlen = strlen(oldname); 2256 2257 mtx_lock(&mountlist_mtx); 2258 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 2259 fromname = mp->mnt_stat.f_mntfromname; 2260 if (strcmp(fromname, oldname) == 0) { 2261 (void) strlcpy(fromname, newname, 2262 sizeof (mp->mnt_stat.f_mntfromname)); 2263 continue; 2264 } 2265 if (strncmp(fromname, oldname, oldlen) == 0 && 2266 (fromname[oldlen] == '/' || fromname[oldlen] == '@')) { 2267 (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s", 2268 newname, fromname + oldlen); 2269 (void) strlcpy(fromname, tmpbuf, 2270 sizeof (mp->mnt_stat.f_mntfromname)); 2271 continue; 2272 } 2273 } 2274 mtx_unlock(&mountlist_mtx); 2275 } 2276 #endif 2277 2278 /* 2279 * Find a prison with ZFS info. 2280 * Return the ZFS info and the (locked) prison. 2281 */ 2282 static struct zfs_jailparam * 2283 zfs_jailparam_find(struct prison *spr, struct prison **prp) 2284 { 2285 struct prison *pr; 2286 struct zfs_jailparam *zjp; 2287 2288 for (pr = spr; ; pr = pr->pr_parent) { 2289 mtx_lock(&pr->pr_mtx); 2290 if (pr == &prison0) { 2291 zjp = &zfs_jailparam0; 2292 break; 2293 } 2294 zjp = osd_jail_get(pr, zfs_jailparam_slot); 2295 if (zjp != NULL) 2296 break; 2297 mtx_unlock(&pr->pr_mtx); 2298 } 2299 *prp = pr; 2300 2301 return (zjp); 2302 } 2303 2304 /* 2305 * Ensure a prison has its own ZFS info. If zjpp is non-null, point it to the 2306 * ZFS info and lock the prison. 2307 */ 2308 static void 2309 zfs_jailparam_alloc(struct prison *pr, struct zfs_jailparam **zjpp) 2310 { 2311 struct prison *ppr; 2312 struct zfs_jailparam *zjp, *nzjp; 2313 void **rsv; 2314 2315 /* If this prison already has ZFS info, return that. */ 2316 zjp = zfs_jailparam_find(pr, &ppr); 2317 if (ppr == pr) 2318 goto done; 2319 2320 /* 2321 * Allocate a new info record. Then check again, in case something 2322 * changed during the allocation. 2323 */ 2324 mtx_unlock(&ppr->pr_mtx); 2325 nzjp = malloc(sizeof (struct zfs_jailparam), M_PRISON, M_WAITOK); 2326 rsv = osd_reserve(zfs_jailparam_slot); 2327 zjp = zfs_jailparam_find(pr, &ppr); 2328 if (ppr == pr) { 2329 free(nzjp, M_PRISON); 2330 osd_free_reserved(rsv); 2331 goto done; 2332 } 2333 /* Inherit the initial values from the ancestor. */ 2334 mtx_lock(&pr->pr_mtx); 2335 (void) osd_jail_set_reserved(pr, zfs_jailparam_slot, rsv, nzjp); 2336 (void) memcpy(nzjp, zjp, sizeof (*zjp)); 2337 zjp = nzjp; 2338 mtx_unlock(&ppr->pr_mtx); 2339 done: 2340 if (zjpp != NULL) 2341 *zjpp = zjp; 2342 else 2343 mtx_unlock(&pr->pr_mtx); 2344 } 2345 2346 /* 2347 * Jail OSD methods for ZFS VFS info. 2348 */ 2349 static int 2350 zfs_jailparam_create(void *obj, void *data) 2351 { 2352 struct prison *pr = obj; 2353 struct vfsoptlist *opts = data; 2354 int jsys; 2355 2356 if (vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)) == 0 && 2357 jsys == JAIL_SYS_INHERIT) 2358 return (0); 2359 /* 2360 * Inherit a prison's initial values from its parent 2361 * (different from JAIL_SYS_INHERIT which also inherits changes). 2362 */ 2363 zfs_jailparam_alloc(pr, NULL); 2364 return (0); 2365 } 2366 2367 static int 2368 zfs_jailparam_get(void *obj, void *data) 2369 { 2370 struct prison *ppr, *pr = obj; 2371 struct vfsoptlist *opts = data; 2372 struct zfs_jailparam *zjp; 2373 int jsys, error; 2374 2375 zjp = zfs_jailparam_find(pr, &ppr); 2376 jsys = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 2377 error = vfs_setopt(opts, "zfs", &jsys, sizeof (jsys)); 2378 if (error != 0 && error != ENOENT) 2379 goto done; 2380 if (jsys == JAIL_SYS_NEW) { 2381 error = vfs_setopt(opts, "zfs.mount_snapshot", 2382 &zjp->mount_snapshot, sizeof (zjp->mount_snapshot)); 2383 if (error != 0 && error != ENOENT) 2384 goto done; 2385 } else { 2386 /* 2387 * If this prison is inheriting its ZFS info, report 2388 * empty/zero parameters. 2389 */ 2390 static int mount_snapshot = 0; 2391 2392 error = vfs_setopt(opts, "zfs.mount_snapshot", 2393 &mount_snapshot, sizeof (mount_snapshot)); 2394 if (error != 0 && error != ENOENT) 2395 goto done; 2396 } 2397 error = 0; 2398 done: 2399 mtx_unlock(&ppr->pr_mtx); 2400 return (error); 2401 } 2402 2403 static int 2404 zfs_jailparam_set(void *obj, void *data) 2405 { 2406 struct prison *pr = obj; 2407 struct prison *ppr; 2408 struct vfsoptlist *opts = data; 2409 int error, jsys, mount_snapshot; 2410 2411 /* Set the parameters, which should be correct. */ 2412 error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)); 2413 if (error == ENOENT) 2414 jsys = -1; 2415 error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot, 2416 sizeof (mount_snapshot)); 2417 if (error == ENOENT) 2418 mount_snapshot = -1; 2419 else 2420 jsys = JAIL_SYS_NEW; 2421 switch (jsys) { 2422 case JAIL_SYS_NEW: 2423 { 2424 /* "zfs=new" or "zfs.*": the prison gets its own ZFS info. */ 2425 struct zfs_jailparam *zjp; 2426 2427 /* 2428 * A child jail cannot have more permissions than its parent 2429 */ 2430 if (pr->pr_parent != &prison0) { 2431 zjp = zfs_jailparam_find(pr->pr_parent, &ppr); 2432 mtx_unlock(&ppr->pr_mtx); 2433 if (zjp->mount_snapshot < mount_snapshot) { 2434 return (EPERM); 2435 } 2436 } 2437 zfs_jailparam_alloc(pr, &zjp); 2438 if (mount_snapshot != -1) 2439 zjp->mount_snapshot = mount_snapshot; 2440 mtx_unlock(&pr->pr_mtx); 2441 break; 2442 } 2443 case JAIL_SYS_INHERIT: 2444 /* "zfs=inherit": inherit the parent's ZFS info. */ 2445 mtx_lock(&pr->pr_mtx); 2446 osd_jail_del(pr, zfs_jailparam_slot); 2447 mtx_unlock(&pr->pr_mtx); 2448 break; 2449 case -1: 2450 /* 2451 * If the setting being changed is not ZFS related 2452 * then do nothing. 2453 */ 2454 break; 2455 } 2456 2457 return (0); 2458 } 2459 2460 static int 2461 zfs_jailparam_check(void *obj __unused, void *data) 2462 { 2463 struct vfsoptlist *opts = data; 2464 int error, jsys, mount_snapshot; 2465 2466 /* Check that the parameters are correct. */ 2467 error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)); 2468 if (error != ENOENT) { 2469 if (error != 0) 2470 return (error); 2471 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 2472 return (EINVAL); 2473 } 2474 error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot, 2475 sizeof (mount_snapshot)); 2476 if (error != ENOENT) { 2477 if (error != 0) 2478 return (error); 2479 if (mount_snapshot != 0 && mount_snapshot != 1) 2480 return (EINVAL); 2481 } 2482 return (0); 2483 } 2484 2485 static void 2486 zfs_jailparam_destroy(void *data) 2487 { 2488 2489 free(data, M_PRISON); 2490 } 2491 2492 static void 2493 zfs_jailparam_sysinit(void *arg __unused) 2494 { 2495 struct prison *pr; 2496 osd_method_t methods[PR_MAXMETHOD] = { 2497 [PR_METHOD_CREATE] = zfs_jailparam_create, 2498 [PR_METHOD_GET] = zfs_jailparam_get, 2499 [PR_METHOD_SET] = zfs_jailparam_set, 2500 [PR_METHOD_CHECK] = zfs_jailparam_check, 2501 }; 2502 2503 zfs_jailparam_slot = osd_jail_register(zfs_jailparam_destroy, methods); 2504 /* Copy the defaults to any existing prisons. */ 2505 sx_slock(&allprison_lock); 2506 TAILQ_FOREACH(pr, &allprison, pr_list) 2507 zfs_jailparam_alloc(pr, NULL); 2508 sx_sunlock(&allprison_lock); 2509 } 2510 2511 static void 2512 zfs_jailparam_sysuninit(void *arg __unused) 2513 { 2514 2515 osd_jail_deregister(zfs_jailparam_slot); 2516 } 2517 2518 SYSINIT(zfs_jailparam_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY, 2519 zfs_jailparam_sysinit, NULL); 2520 SYSUNINIT(zfs_jailparam_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY, 2521 zfs_jailparam_sysuninit, NULL); 2522