1 /* $NetBSD: ffs_vfsops.c,v 1.263 2010/12/27 18:49:42 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Wasabi Systems, Inc, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1991, 1993, 1994 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.263 2010/12/27 18:49:42 hannken Exp $"); 65 66 #if defined(_KERNEL_OPT) 67 #include "opt_ffs.h" 68 #include "opt_quota.h" 69 #include "opt_wapbl.h" 70 #endif 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/namei.h> 75 #include <sys/proc.h> 76 #include <sys/kernel.h> 77 #include <sys/vnode.h> 78 #include <sys/socket.h> 79 #include <sys/mount.h> 80 #include <sys/buf.h> 81 #include <sys/device.h> 82 #include <sys/mbuf.h> 83 #include <sys/file.h> 84 #include <sys/disklabel.h> 85 #include <sys/ioctl.h> 86 #include <sys/errno.h> 87 #include <sys/malloc.h> 88 #include <sys/pool.h> 89 #include <sys/lock.h> 90 #include <sys/sysctl.h> 91 #include <sys/conf.h> 92 #include <sys/kauth.h> 93 #include <sys/wapbl.h> 94 #include <sys/fstrans.h> 95 #include <sys/module.h> 96 97 #include <miscfs/genfs/genfs.h> 98 #include <miscfs/specfs/specdev.h> 99 100 #include <ufs/ufs/quota.h> 101 #include <ufs/ufs/ufsmount.h> 102 #include <ufs/ufs/inode.h> 103 #include <ufs/ufs/dir.h> 104 #include <ufs/ufs/ufs_extern.h> 105 #include <ufs/ufs/ufs_bswap.h> 106 #include <ufs/ufs/ufs_wapbl.h> 107 108 #include <ufs/ffs/fs.h> 109 #include <ufs/ffs/ffs_extern.h> 110 111 MODULE(MODULE_CLASS_VFS, ffs, NULL); 112 113 static int ffs_vfs_fsync(vnode_t *, int); 114 115 static struct sysctllog *ffs_sysctl_log; 116 117 /* how many times ffs_init() was called */ 118 int ffs_initcount = 0; 119 120 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc; 121 extern const struct vnodeopv_desc ffs_specop_opv_desc; 122 extern const struct vnodeopv_desc ffs_fifoop_opv_desc; 123 124 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = { 125 &ffs_vnodeop_opv_desc, 126 &ffs_specop_opv_desc, 127 &ffs_fifoop_opv_desc, 128 NULL, 129 }; 130 131 struct vfsops ffs_vfsops = { 132 MOUNT_FFS, 133 sizeof (struct ufs_args), 134 ffs_mount, 135 ufs_start, 136 ffs_unmount, 137 ufs_root, 138 ufs_quotactl, 139 ffs_statvfs, 140 ffs_sync, 141 ffs_vget, 142 ffs_fhtovp, 143 ffs_vptofh, 144 ffs_init, 145 ffs_reinit, 146 ffs_done, 147 ffs_mountroot, 148 ffs_snapshot, 149 ffs_extattrctl, 150 ffs_suspendctl, 151 genfs_renamelock_enter, 152 genfs_renamelock_exit, 153 ffs_vfs_fsync, 154 ffs_vnodeopv_descs, 155 0, 156 { NULL, NULL }, 157 }; 158 159 static const struct genfs_ops ffs_genfsops = { 160 .gop_size = ffs_gop_size, 161 .gop_alloc = ufs_gop_alloc, 162 .gop_write = genfs_gop_write, 163 .gop_markupdate = ufs_gop_markupdate, 164 }; 165 166 static const struct ufs_ops ffs_ufsops = { 167 .uo_itimes = ffs_itimes, 168 .uo_update = ffs_update, 169 .uo_truncate = ffs_truncate, 170 .uo_valloc = ffs_valloc, 171 .uo_vfree = ffs_vfree, 172 .uo_balloc = ffs_balloc, 173 .uo_unmark_vnode = (void (*)(vnode_t *))nullop, 174 }; 175 176 static int 177 ffs_modcmd(modcmd_t cmd, void *arg) 178 { 179 int error; 180 181 #if 0 182 extern int doasyncfree; 183 #endif 184 extern int ffs_log_changeopt; 185 186 switch (cmd) { 187 case MODULE_CMD_INIT: 188 error = vfs_attach(&ffs_vfsops); 189 if (error != 0) 190 break; 191 192 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 193 CTLFLAG_PERMANENT, 194 CTLTYPE_NODE, "vfs", NULL, 195 NULL, 0, NULL, 0, 196 CTL_VFS, CTL_EOL); 197 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 198 CTLFLAG_PERMANENT, 199 CTLTYPE_NODE, "ffs", 200 SYSCTL_DESCR("Berkeley Fast File System"), 201 NULL, 0, NULL, 0, 202 CTL_VFS, 1, CTL_EOL); 203 204 /* 205 * @@@ should we even bother with these first three? 206 */ 207 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 208 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 209 CTLTYPE_INT, "doclusterread", NULL, 210 sysctl_notavail, 0, NULL, 0, 211 CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL); 212 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 213 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 214 CTLTYPE_INT, "doclusterwrite", NULL, 215 sysctl_notavail, 0, NULL, 0, 216 CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL); 217 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 218 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 219 CTLTYPE_INT, "doreallocblks", NULL, 220 sysctl_notavail, 0, NULL, 0, 221 CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL); 222 #if 0 223 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 224 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 225 CTLTYPE_INT, "doasyncfree", 226 SYSCTL_DESCR("Release dirty blocks asynchronously"), 227 NULL, 0, &doasyncfree, 0, 228 CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL); 229 #endif 230 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 231 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 232 CTLTYPE_INT, "log_changeopt", 233 SYSCTL_DESCR("Log changes in optimization strategy"), 234 NULL, 0, &ffs_log_changeopt, 0, 235 CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL); 236 break; 237 case MODULE_CMD_FINI: 238 error = vfs_detach(&ffs_vfsops); 239 if (error != 0) 240 break; 241 sysctl_teardown(&ffs_sysctl_log); 242 break; 243 default: 244 error = ENOTTY; 245 break; 246 } 247 248 return (error); 249 } 250 251 pool_cache_t ffs_inode_cache; 252 pool_cache_t ffs_dinode1_cache; 253 pool_cache_t ffs_dinode2_cache; 254 255 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t); 256 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *); 257 258 /* 259 * Called by main() when ffs is going to be mounted as root. 260 */ 261 262 int 263 ffs_mountroot(void) 264 { 265 struct fs *fs; 266 struct mount *mp; 267 struct lwp *l = curlwp; /* XXX */ 268 struct ufsmount *ump; 269 int error; 270 271 if (device_class(root_device) != DV_DISK) 272 return (ENODEV); 273 274 if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) { 275 vrele(rootvp); 276 return (error); 277 } 278 279 /* 280 * We always need to be able to mount the root file system. 281 */ 282 mp->mnt_flag |= MNT_FORCE; 283 if ((error = ffs_mountfs(rootvp, mp, l)) != 0) { 284 vfs_unbusy(mp, false, NULL); 285 vfs_destroy(mp); 286 return (error); 287 } 288 mp->mnt_flag &= ~MNT_FORCE; 289 mutex_enter(&mountlist_lock); 290 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 291 mutex_exit(&mountlist_lock); 292 ump = VFSTOUFS(mp); 293 fs = ump->um_fs; 294 memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt)); 295 (void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0); 296 (void)ffs_statvfs(mp, &mp->mnt_stat); 297 vfs_unbusy(mp, false, NULL); 298 setrootfstime((time_t)fs->fs_time); 299 return (0); 300 } 301 302 /* 303 * VFS Operations. 304 * 305 * mount system call 306 */ 307 int 308 ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) 309 { 310 struct lwp *l = curlwp; 311 struct vnode *devvp = NULL; 312 struct ufs_args *args = data; 313 struct ufsmount *ump = NULL; 314 struct fs *fs; 315 int error = 0, flags, update; 316 mode_t accessmode; 317 318 if (*data_len < sizeof *args) 319 return EINVAL; 320 321 if (mp->mnt_flag & MNT_GETARGS) { 322 ump = VFSTOUFS(mp); 323 if (ump == NULL) 324 return EIO; 325 args->fspec = NULL; 326 *data_len = sizeof *args; 327 return 0; 328 } 329 330 update = mp->mnt_flag & MNT_UPDATE; 331 332 /* Check arguments */ 333 if (args->fspec != NULL) { 334 /* 335 * Look up the name and verify that it's sane. 336 */ 337 error = namei_simple_user(args->fspec, 338 NSM_FOLLOW_NOEMULROOT, &devvp); 339 if (error != 0) 340 return (error); 341 342 if (!update) { 343 /* 344 * Be sure this is a valid block device 345 */ 346 if (devvp->v_type != VBLK) 347 error = ENOTBLK; 348 else if (bdevsw_lookup(devvp->v_rdev) == NULL) 349 error = ENXIO; 350 } else { 351 /* 352 * Be sure we're still naming the same device 353 * used for our initial mount 354 */ 355 ump = VFSTOUFS(mp); 356 if (devvp != ump->um_devvp) { 357 if (devvp->v_rdev != ump->um_devvp->v_rdev) 358 error = EINVAL; 359 else { 360 vrele(devvp); 361 devvp = ump->um_devvp; 362 vref(devvp); 363 } 364 } 365 } 366 } else { 367 if (!update) { 368 /* New mounts must have a filename for the device */ 369 return (EINVAL); 370 } else { 371 /* Use the extant mount */ 372 ump = VFSTOUFS(mp); 373 devvp = ump->um_devvp; 374 vref(devvp); 375 } 376 } 377 378 /* 379 * If mount by non-root, then verify that user has necessary 380 * permissions on the device. 381 * 382 * Permission to update a mount is checked higher, so here we presume 383 * updating the mount is okay (for example, as far as securelevel goes) 384 * which leaves us with the normal check. 385 */ 386 if (error == 0) { 387 accessmode = VREAD; 388 if (update ? 389 (mp->mnt_iflag & IMNT_WANTRDWR) != 0 : 390 (mp->mnt_flag & MNT_RDONLY) == 0) 391 accessmode |= VWRITE; 392 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 393 error = genfs_can_mount(devvp, accessmode, l->l_cred); 394 VOP_UNLOCK(devvp); 395 } 396 397 if (error) { 398 vrele(devvp); 399 return (error); 400 } 401 402 #ifdef WAPBL 403 /* WAPBL can only be enabled on a r/w mount. */ 404 if ((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_iflag & IMNT_WANTRDWR)) { 405 mp->mnt_flag &= ~MNT_LOG; 406 } 407 #else /* !WAPBL */ 408 mp->mnt_flag &= ~MNT_LOG; 409 #endif /* !WAPBL */ 410 411 if (!update) { 412 int xflags; 413 414 if (mp->mnt_flag & MNT_RDONLY) 415 xflags = FREAD; 416 else 417 xflags = FREAD | FWRITE; 418 error = VOP_OPEN(devvp, xflags, FSCRED); 419 if (error) 420 goto fail; 421 error = ffs_mountfs(devvp, mp, l); 422 if (error) { 423 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 424 (void)VOP_CLOSE(devvp, xflags, NOCRED); 425 VOP_UNLOCK(devvp); 426 goto fail; 427 } 428 429 ump = VFSTOUFS(mp); 430 fs = ump->um_fs; 431 } else { 432 /* 433 * Update the mount. 434 */ 435 436 /* 437 * The initial mount got a reference on this 438 * device, so drop the one obtained via 439 * namei(), above. 440 */ 441 vrele(devvp); 442 443 ump = VFSTOUFS(mp); 444 fs = ump->um_fs; 445 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 446 /* 447 * Changing from r/w to r/o 448 */ 449 flags = WRITECLOSE; 450 if (mp->mnt_flag & MNT_FORCE) 451 flags |= FORCECLOSE; 452 error = ffs_flushfiles(mp, flags, l); 453 if (error == 0) 454 error = UFS_WAPBL_BEGIN(mp); 455 if (error == 0 && 456 ffs_cgupdate(ump, MNT_WAIT) == 0 && 457 fs->fs_clean & FS_WASCLEAN) { 458 if (mp->mnt_flag & MNT_SOFTDEP) 459 fs->fs_flags &= ~FS_DOSOFTDEP; 460 fs->fs_clean = FS_ISCLEAN; 461 (void) ffs_sbupdate(ump, MNT_WAIT); 462 } 463 if (error == 0) 464 UFS_WAPBL_END(mp); 465 if (error) 466 return (error); 467 } 468 469 #ifdef WAPBL 470 if ((mp->mnt_flag & MNT_LOG) == 0) { 471 error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE); 472 if (error) 473 return error; 474 } 475 #endif /* WAPBL */ 476 477 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 478 /* 479 * Finish change from r/w to r/o 480 */ 481 fs->fs_ronly = 1; 482 fs->fs_fmod = 0; 483 } 484 485 if (mp->mnt_flag & MNT_RELOAD) { 486 error = ffs_reload(mp, l->l_cred, l); 487 if (error) 488 return (error); 489 } 490 491 if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) { 492 /* 493 * Changing from read-only to read/write 494 */ 495 fs->fs_ronly = 0; 496 fs->fs_clean <<= 1; 497 fs->fs_fmod = 1; 498 #ifdef WAPBL 499 if (fs->fs_flags & FS_DOWAPBL) { 500 printf("%s: replaying log to disk\n", 501 fs->fs_fsmnt); 502 KDASSERT(mp->mnt_wapbl_replay); 503 error = wapbl_replay_write(mp->mnt_wapbl_replay, 504 devvp); 505 if (error) { 506 return error; 507 } 508 wapbl_replay_stop(mp->mnt_wapbl_replay); 509 fs->fs_clean = FS_WASCLEAN; 510 } 511 #endif /* WAPBL */ 512 if (fs->fs_snapinum[0] != 0) 513 ffs_snapshot_mount(mp); 514 } 515 516 #ifdef WAPBL 517 error = ffs_wapbl_start(mp); 518 if (error) 519 return error; 520 #endif /* WAPBL */ 521 522 if (args->fspec == NULL) 523 return 0; 524 } 525 526 error = set_statvfs_info(path, UIO_USERSPACE, args->fspec, 527 UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); 528 if (error == 0) 529 (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, 530 sizeof(fs->fs_fsmnt)); 531 fs->fs_flags &= ~FS_DOSOFTDEP; 532 if (fs->fs_fmod != 0) { /* XXX */ 533 int err; 534 535 fs->fs_fmod = 0; 536 if (fs->fs_clean & FS_WASCLEAN) 537 fs->fs_time = time_second; 538 else { 539 printf("%s: file system not clean (fs_clean=%#x); " 540 "please fsck(8)\n", mp->mnt_stat.f_mntfromname, 541 fs->fs_clean); 542 printf("%s: lost blocks %" PRId64 " files %d\n", 543 mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks, 544 fs->fs_pendinginodes); 545 } 546 err = UFS_WAPBL_BEGIN(mp); 547 if (err == 0) { 548 (void) ffs_cgupdate(ump, MNT_WAIT); 549 UFS_WAPBL_END(mp); 550 } 551 } 552 if ((mp->mnt_flag & MNT_SOFTDEP) != 0) { 553 printf("%s: `-o softdep' is no longer supported, " 554 "consider `-o log'\n", mp->mnt_stat.f_mntfromname); 555 mp->mnt_flag &= ~MNT_SOFTDEP; 556 } 557 558 return (error); 559 560 fail: 561 vrele(devvp); 562 return (error); 563 } 564 565 /* 566 * Reload all incore data for a filesystem (used after running fsck on 567 * the root filesystem and finding things to fix). The filesystem must 568 * be mounted read-only. 569 * 570 * Things to do to update the mount: 571 * 1) invalidate all cached meta-data. 572 * 2) re-read superblock from disk. 573 * 3) re-read summary information from disk. 574 * 4) invalidate all inactive vnodes. 575 * 5) invalidate all cached file data. 576 * 6) re-read inode data for all active vnodes. 577 */ 578 int 579 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l) 580 { 581 struct vnode *vp, *mvp, *devvp; 582 struct inode *ip; 583 void *space; 584 struct buf *bp; 585 struct fs *fs, *newfs; 586 struct partinfo dpart; 587 int i, bsize, blks, error; 588 int32_t *lp; 589 struct ufsmount *ump; 590 daddr_t sblockloc; 591 592 if ((mp->mnt_flag & MNT_RDONLY) == 0) 593 return (EINVAL); 594 595 ump = VFSTOUFS(mp); 596 /* 597 * Step 1: invalidate all cached meta-data. 598 */ 599 devvp = ump->um_devvp; 600 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 601 error = vinvalbuf(devvp, 0, cred, l, 0, 0); 602 VOP_UNLOCK(devvp); 603 if (error) 604 panic("ffs_reload: dirty1"); 605 /* 606 * Step 2: re-read superblock from disk. 607 */ 608 fs = ump->um_fs; 609 610 /* XXX we don't handle possibility that superblock moved. */ 611 error = bread(devvp, fs->fs_sblockloc / DEV_BSIZE, fs->fs_sbsize, 612 NOCRED, 0, &bp); 613 if (error) { 614 brelse(bp, 0); 615 return (error); 616 } 617 newfs = malloc(fs->fs_sbsize, M_UFSMNT, M_WAITOK); 618 memcpy(newfs, bp->b_data, fs->fs_sbsize); 619 #ifdef FFS_EI 620 if (ump->um_flags & UFS_NEEDSWAP) { 621 ffs_sb_swap((struct fs*)bp->b_data, newfs); 622 fs->fs_flags |= FS_SWAPPED; 623 } else 624 #endif 625 fs->fs_flags &= ~FS_SWAPPED; 626 if ((newfs->fs_magic != FS_UFS1_MAGIC && 627 newfs->fs_magic != FS_UFS2_MAGIC)|| 628 newfs->fs_bsize > MAXBSIZE || 629 newfs->fs_bsize < sizeof(struct fs)) { 630 brelse(bp, 0); 631 free(newfs, M_UFSMNT); 632 return (EIO); /* XXX needs translation */ 633 } 634 /* Store off old fs_sblockloc for fs_oldfscompat_read. */ 635 sblockloc = fs->fs_sblockloc; 636 /* 637 * Copy pointer fields back into superblock before copying in XXX 638 * new superblock. These should really be in the ufsmount. XXX 639 * Note that important parameters (eg fs_ncg) are unchanged. 640 */ 641 newfs->fs_csp = fs->fs_csp; 642 newfs->fs_maxcluster = fs->fs_maxcluster; 643 newfs->fs_contigdirs = fs->fs_contigdirs; 644 newfs->fs_ronly = fs->fs_ronly; 645 newfs->fs_active = fs->fs_active; 646 memcpy(fs, newfs, (u_int)fs->fs_sbsize); 647 brelse(bp, 0); 648 free(newfs, M_UFSMNT); 649 650 /* Recheck for apple UFS filesystem */ 651 ump->um_flags &= ~UFS_ISAPPLEUFS; 652 /* First check to see if this is tagged as an Apple UFS filesystem 653 * in the disklabel 654 */ 655 if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) && 656 (dpart.part->p_fstype == FS_APPLEUFS)) { 657 ump->um_flags |= UFS_ISAPPLEUFS; 658 } 659 #ifdef APPLE_UFS 660 else { 661 /* Manually look for an apple ufs label, and if a valid one 662 * is found, then treat it like an Apple UFS filesystem anyway 663 */ 664 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / DEV_BSIZE), 665 APPLEUFS_LABEL_SIZE, cred, 0, &bp); 666 if (error) { 667 brelse(bp, 0); 668 return (error); 669 } 670 error = ffs_appleufs_validate(fs->fs_fsmnt, 671 (struct appleufslabel *)bp->b_data, NULL); 672 if (error == 0) 673 ump->um_flags |= UFS_ISAPPLEUFS; 674 brelse(bp, 0); 675 bp = NULL; 676 } 677 #else 678 if (ump->um_flags & UFS_ISAPPLEUFS) 679 return (EIO); 680 #endif 681 682 if (UFS_MPISAPPLEUFS(ump)) { 683 /* see comment about NeXT below */ 684 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN; 685 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ; 686 mp->mnt_iflag |= IMNT_DTYPE; 687 } else { 688 ump->um_maxsymlinklen = fs->fs_maxsymlinklen; 689 ump->um_dirblksiz = DIRBLKSIZ; 690 if (ump->um_maxsymlinklen > 0) 691 mp->mnt_iflag |= IMNT_DTYPE; 692 else 693 mp->mnt_iflag &= ~IMNT_DTYPE; 694 } 695 ffs_oldfscompat_read(fs, ump, sblockloc); 696 697 mutex_enter(&ump->um_lock); 698 ump->um_maxfilesize = fs->fs_maxfilesize; 699 if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) { 700 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n", 701 mp->mnt_stat.f_mntonname, fs->fs_flags, 702 (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting"); 703 if ((mp->mnt_flag & MNT_FORCE) == 0) { 704 mutex_exit(&ump->um_lock); 705 return (EINVAL); 706 } 707 } 708 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { 709 fs->fs_pendingblocks = 0; 710 fs->fs_pendinginodes = 0; 711 } 712 mutex_exit(&ump->um_lock); 713 714 ffs_statvfs(mp, &mp->mnt_stat); 715 /* 716 * Step 3: re-read summary information from disk. 717 */ 718 blks = howmany(fs->fs_cssize, fs->fs_fsize); 719 space = fs->fs_csp; 720 for (i = 0; i < blks; i += fs->fs_frag) { 721 bsize = fs->fs_bsize; 722 if (i + fs->fs_frag > blks) 723 bsize = (blks - i) * fs->fs_fsize; 724 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), bsize, 725 NOCRED, 0, &bp); 726 if (error) { 727 brelse(bp, 0); 728 return (error); 729 } 730 #ifdef FFS_EI 731 if (UFS_FSNEEDSWAP(fs)) 732 ffs_csum_swap((struct csum *)bp->b_data, 733 (struct csum *)space, bsize); 734 else 735 #endif 736 memcpy(space, bp->b_data, (size_t)bsize); 737 space = (char *)space + bsize; 738 brelse(bp, 0); 739 } 740 if (fs->fs_snapinum[0] != 0) 741 ffs_snapshot_mount(mp); 742 /* 743 * We no longer know anything about clusters per cylinder group. 744 */ 745 if (fs->fs_contigsumsize > 0) { 746 lp = fs->fs_maxcluster; 747 for (i = 0; i < fs->fs_ncg; i++) 748 *lp++ = fs->fs_contigsumsize; 749 } 750 751 /* Allocate a marker vnode. */ 752 if ((mvp = vnalloc(mp)) == NULL) 753 return ENOMEM; 754 /* 755 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() 756 * and vclean() can be called indirectly 757 */ 758 mutex_enter(&mntvnode_lock); 759 loop: 760 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { 761 vmark(mvp, vp); 762 if (vp->v_mount != mp || vismarker(vp)) 763 continue; 764 /* 765 * Step 4: invalidate all inactive vnodes. 766 */ 767 if (vrecycle(vp, &mntvnode_lock, l)) { 768 mutex_enter(&mntvnode_lock); 769 (void)vunmark(mvp); 770 goto loop; 771 } 772 /* 773 * Step 5: invalidate all cached file data. 774 */ 775 mutex_enter(&vp->v_interlock); 776 mutex_exit(&mntvnode_lock); 777 if (vget(vp, LK_EXCLUSIVE)) { 778 (void)vunmark(mvp); 779 goto loop; 780 } 781 if (vinvalbuf(vp, 0, cred, l, 0, 0)) 782 panic("ffs_reload: dirty2"); 783 /* 784 * Step 6: re-read inode data for all active vnodes. 785 */ 786 ip = VTOI(vp); 787 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 788 (int)fs->fs_bsize, NOCRED, 0, &bp); 789 if (error) { 790 brelse(bp, 0); 791 vput(vp); 792 (void)vunmark(mvp); 793 break; 794 } 795 ffs_load_inode(bp, ip, fs, ip->i_number); 796 brelse(bp, 0); 797 vput(vp); 798 mutex_enter(&mntvnode_lock); 799 } 800 mutex_exit(&mntvnode_lock); 801 vnfree(mvp); 802 return (error); 803 } 804 805 /* 806 * Possible superblock locations ordered from most to least likely. 807 */ 808 static const int sblock_try[] = SBLOCKSEARCH; 809 810 /* 811 * Common code for mount and mountroot 812 */ 813 int 814 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) 815 { 816 struct ufsmount *ump; 817 struct buf *bp; 818 struct fs *fs; 819 dev_t dev; 820 struct partinfo dpart; 821 void *space; 822 daddr_t sblockloc, fsblockloc; 823 int blks, fstype; 824 int error, i, bsize, ronly, bset = 0; 825 #ifdef FFS_EI 826 int needswap = 0; /* keep gcc happy */ 827 #endif 828 int32_t *lp; 829 kauth_cred_t cred; 830 u_int32_t sbsize = 8192; /* keep gcc happy*/ 831 int32_t fsbsize; 832 833 dev = devvp->v_rdev; 834 cred = l ? l->l_cred : NOCRED; 835 836 /* Flush out any old buffers remaining from a previous use. */ 837 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 838 error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0); 839 VOP_UNLOCK(devvp); 840 if (error) 841 return (error); 842 843 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 844 845 bp = NULL; 846 ump = NULL; 847 fs = NULL; 848 sblockloc = 0; 849 fstype = 0; 850 851 error = fstrans_mount(mp); 852 if (error) 853 return error; 854 855 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK); 856 memset(ump, 0, sizeof *ump); 857 mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE); 858 error = ffs_snapshot_init(ump); 859 if (error) 860 goto out; 861 ump->um_ops = &ffs_ufsops; 862 863 #ifdef WAPBL 864 sbagain: 865 #endif 866 /* 867 * Try reading the superblock in each of its possible locations. 868 */ 869 for (i = 0; ; i++) { 870 if (bp != NULL) { 871 brelse(bp, BC_NOCACHE); 872 bp = NULL; 873 } 874 if (sblock_try[i] == -1) { 875 error = EINVAL; 876 fs = NULL; 877 goto out; 878 } 879 error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE, cred, 880 0, &bp); 881 if (error) { 882 fs = NULL; 883 goto out; 884 } 885 fs = (struct fs*)bp->b_data; 886 fsblockloc = sblockloc = sblock_try[i]; 887 if (fs->fs_magic == FS_UFS1_MAGIC) { 888 sbsize = fs->fs_sbsize; 889 fstype = UFS1; 890 fsbsize = fs->fs_bsize; 891 #ifdef FFS_EI 892 needswap = 0; 893 } else if (fs->fs_magic == bswap32(FS_UFS1_MAGIC)) { 894 sbsize = bswap32(fs->fs_sbsize); 895 fstype = UFS1; 896 fsbsize = bswap32(fs->fs_bsize); 897 needswap = 1; 898 #endif 899 } else if (fs->fs_magic == FS_UFS2_MAGIC) { 900 sbsize = fs->fs_sbsize; 901 fstype = UFS2; 902 fsbsize = fs->fs_bsize; 903 #ifdef FFS_EI 904 needswap = 0; 905 } else if (fs->fs_magic == bswap32(FS_UFS2_MAGIC)) { 906 sbsize = bswap32(fs->fs_sbsize); 907 fstype = UFS2; 908 fsbsize = bswap32(fs->fs_bsize); 909 needswap = 1; 910 #endif 911 } else 912 continue; 913 914 915 /* fs->fs_sblockloc isn't defined for old filesystems */ 916 if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) { 917 if (sblockloc == SBLOCK_UFS2) 918 /* 919 * This is likely to be the first alternate 920 * in a filesystem with 64k blocks. 921 * Don't use it. 922 */ 923 continue; 924 fsblockloc = sblockloc; 925 } else { 926 fsblockloc = fs->fs_sblockloc; 927 #ifdef FFS_EI 928 if (needswap) 929 fsblockloc = bswap64(fsblockloc); 930 #endif 931 } 932 933 /* Check we haven't found an alternate superblock */ 934 if (fsblockloc != sblockloc) 935 continue; 936 937 /* Validate size of superblock */ 938 if (sbsize > MAXBSIZE || sbsize < sizeof(struct fs)) 939 continue; 940 941 /* Check that we can handle the file system blocksize */ 942 if (fsbsize > MAXBSIZE) { 943 printf("ffs_mountfs: block size (%d) > MAXBSIZE (%d)\n", 944 fsbsize, MAXBSIZE); 945 continue; 946 } 947 948 /* Ok seems to be a good superblock */ 949 break; 950 } 951 952 fs = malloc((u_long)sbsize, M_UFSMNT, M_WAITOK); 953 memcpy(fs, bp->b_data, sbsize); 954 ump->um_fs = fs; 955 956 #ifdef FFS_EI 957 if (needswap) { 958 ffs_sb_swap((struct fs*)bp->b_data, fs); 959 fs->fs_flags |= FS_SWAPPED; 960 } else 961 #endif 962 fs->fs_flags &= ~FS_SWAPPED; 963 964 #ifdef WAPBL 965 if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) { 966 error = ffs_wapbl_replay_start(mp, fs, devvp); 967 if (error && (mp->mnt_flag & MNT_FORCE) == 0) 968 goto out; 969 if (!error) { 970 if (!ronly) { 971 /* XXX fsmnt may be stale. */ 972 printf("%s: replaying log to disk\n", 973 fs->fs_fsmnt); 974 error = wapbl_replay_write(mp->mnt_wapbl_replay, 975 devvp); 976 if (error) 977 goto out; 978 wapbl_replay_stop(mp->mnt_wapbl_replay); 979 fs->fs_clean = FS_WASCLEAN; 980 } else { 981 /* XXX fsmnt may be stale */ 982 printf("%s: replaying log to memory\n", 983 fs->fs_fsmnt); 984 } 985 986 /* Force a re-read of the superblock */ 987 brelse(bp, BC_INVAL); 988 bp = NULL; 989 free(fs, M_UFSMNT); 990 fs = NULL; 991 goto sbagain; 992 } 993 } 994 #else /* !WAPBL */ 995 if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) { 996 error = EPERM; 997 goto out; 998 } 999 #endif /* !WAPBL */ 1000 1001 ffs_oldfscompat_read(fs, ump, sblockloc); 1002 ump->um_maxfilesize = fs->fs_maxfilesize; 1003 1004 if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) { 1005 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n", 1006 mp->mnt_stat.f_mntonname, fs->fs_flags, 1007 (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting"); 1008 if ((mp->mnt_flag & MNT_FORCE) == 0) { 1009 error = EINVAL; 1010 goto out; 1011 } 1012 } 1013 1014 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { 1015 fs->fs_pendingblocks = 0; 1016 fs->fs_pendinginodes = 0; 1017 } 1018 1019 ump->um_fstype = fstype; 1020 if (fs->fs_sbsize < SBLOCKSIZE) 1021 brelse(bp, BC_INVAL); 1022 else 1023 brelse(bp, 0); 1024 bp = NULL; 1025 1026 /* First check to see if this is tagged as an Apple UFS filesystem 1027 * in the disklabel 1028 */ 1029 if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) && 1030 (dpart.part->p_fstype == FS_APPLEUFS)) { 1031 ump->um_flags |= UFS_ISAPPLEUFS; 1032 } 1033 #ifdef APPLE_UFS 1034 else { 1035 /* Manually look for an apple ufs label, and if a valid one 1036 * is found, then treat it like an Apple UFS filesystem anyway 1037 */ 1038 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / DEV_BSIZE), 1039 APPLEUFS_LABEL_SIZE, cred, 0, &bp); 1040 if (error) 1041 goto out; 1042 error = ffs_appleufs_validate(fs->fs_fsmnt, 1043 (struct appleufslabel *)bp->b_data, NULL); 1044 if (error == 0) { 1045 ump->um_flags |= UFS_ISAPPLEUFS; 1046 } 1047 brelse(bp, 0); 1048 bp = NULL; 1049 } 1050 #else 1051 if (ump->um_flags & UFS_ISAPPLEUFS) { 1052 error = EINVAL; 1053 goto out; 1054 } 1055 #endif 1056 1057 #if 0 1058 /* 1059 * XXX This code changes the behaviour of mounting dirty filesystems, to 1060 * XXX require "mount -f ..." to mount them. This doesn't match what 1061 * XXX mount(8) describes and is disabled for now. 1062 */ 1063 /* 1064 * If the file system is not clean, don't allow it to be mounted 1065 * unless MNT_FORCE is specified. (Note: MNT_FORCE is always set 1066 * for the root file system.) 1067 */ 1068 if (fs->fs_flags & FS_DOWAPBL) { 1069 /* 1070 * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL 1071 * bit is set, although there's a window in unmount where it 1072 * could be FS_ISCLEAN 1073 */ 1074 if ((mp->mnt_flag & MNT_FORCE) == 0 && 1075 (fs->fs_clean & (FS_WASCLEAN | FS_ISCLEAN)) == 0) { 1076 error = EPERM; 1077 goto out; 1078 } 1079 } else 1080 if ((fs->fs_clean & FS_ISCLEAN) == 0 && 1081 (mp->mnt_flag & MNT_FORCE) == 0) { 1082 error = EPERM; 1083 goto out; 1084 } 1085 #endif 1086 1087 /* 1088 * verify that we can access the last block in the fs 1089 * if we're mounting read/write. 1090 */ 1091 1092 if (!ronly) { 1093 error = bread(devvp, fsbtodb(fs, fs->fs_size - 1), fs->fs_fsize, 1094 cred, 0, &bp); 1095 if (bp->b_bcount != fs->fs_fsize) 1096 error = EINVAL; 1097 if (error) { 1098 bset = BC_INVAL; 1099 goto out; 1100 } 1101 brelse(bp, BC_INVAL); 1102 bp = NULL; 1103 } 1104 1105 fs->fs_ronly = ronly; 1106 /* Don't bump fs_clean if we're replaying journal */ 1107 if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN))) 1108 if (ronly == 0) { 1109 fs->fs_clean <<= 1; 1110 fs->fs_fmod = 1; 1111 } 1112 bsize = fs->fs_cssize; 1113 blks = howmany(bsize, fs->fs_fsize); 1114 if (fs->fs_contigsumsize > 0) 1115 bsize += fs->fs_ncg * sizeof(int32_t); 1116 bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs); 1117 space = malloc((u_long)bsize, M_UFSMNT, M_WAITOK); 1118 fs->fs_csp = space; 1119 for (i = 0; i < blks; i += fs->fs_frag) { 1120 bsize = fs->fs_bsize; 1121 if (i + fs->fs_frag > blks) 1122 bsize = (blks - i) * fs->fs_fsize; 1123 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), bsize, 1124 cred, 0, &bp); 1125 if (error) { 1126 free(fs->fs_csp, M_UFSMNT); 1127 goto out; 1128 } 1129 #ifdef FFS_EI 1130 if (needswap) 1131 ffs_csum_swap((struct csum *)bp->b_data, 1132 (struct csum *)space, bsize); 1133 else 1134 #endif 1135 memcpy(space, bp->b_data, (u_int)bsize); 1136 1137 space = (char *)space + bsize; 1138 brelse(bp, 0); 1139 bp = NULL; 1140 } 1141 if (fs->fs_contigsumsize > 0) { 1142 fs->fs_maxcluster = lp = space; 1143 for (i = 0; i < fs->fs_ncg; i++) 1144 *lp++ = fs->fs_contigsumsize; 1145 space = lp; 1146 } 1147 bsize = fs->fs_ncg * sizeof(*fs->fs_contigdirs); 1148 fs->fs_contigdirs = space; 1149 space = (char *)space + bsize; 1150 memset(fs->fs_contigdirs, 0, bsize); 1151 /* Compatibility for old filesystems - XXX */ 1152 if (fs->fs_avgfilesize <= 0) 1153 fs->fs_avgfilesize = AVFILESIZ; 1154 if (fs->fs_avgfpdir <= 0) 1155 fs->fs_avgfpdir = AFPDIR; 1156 fs->fs_active = NULL; 1157 mp->mnt_data = ump; 1158 mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev; 1159 mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS); 1160 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 1161 mp->mnt_stat.f_namemax = FFS_MAXNAMLEN; 1162 if (UFS_MPISAPPLEUFS(ump)) { 1163 /* NeXT used to keep short symlinks in the inode even 1164 * when using FS_42INODEFMT. In that case fs->fs_maxsymlinklen 1165 * is probably -1, but we still need to be able to identify 1166 * short symlinks. 1167 */ 1168 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN; 1169 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ; 1170 mp->mnt_iflag |= IMNT_DTYPE; 1171 } else { 1172 ump->um_maxsymlinklen = fs->fs_maxsymlinklen; 1173 ump->um_dirblksiz = DIRBLKSIZ; 1174 if (ump->um_maxsymlinklen > 0) 1175 mp->mnt_iflag |= IMNT_DTYPE; 1176 else 1177 mp->mnt_iflag &= ~IMNT_DTYPE; 1178 } 1179 mp->mnt_fs_bshift = fs->fs_bshift; 1180 mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */ 1181 mp->mnt_flag |= MNT_LOCAL; 1182 mp->mnt_iflag |= IMNT_MPSAFE; 1183 #ifdef FFS_EI 1184 if (needswap) 1185 ump->um_flags |= UFS_NEEDSWAP; 1186 #endif 1187 ump->um_mountp = mp; 1188 ump->um_dev = dev; 1189 ump->um_devvp = devvp; 1190 ump->um_nindir = fs->fs_nindir; 1191 ump->um_lognindir = ffs(fs->fs_nindir) - 1; 1192 ump->um_bptrtodb = fs->fs_fshift - DEV_BSHIFT; 1193 ump->um_seqinc = fs->fs_frag; 1194 for (i = 0; i < MAXQUOTAS; i++) 1195 ump->um_quotas[i] = NULLVP; 1196 devvp->v_specmountpoint = mp; 1197 if (ronly == 0 && fs->fs_snapinum[0] != 0) 1198 ffs_snapshot_mount(mp); 1199 1200 #ifdef WAPBL 1201 if (!ronly) { 1202 KDASSERT(fs->fs_ronly == 0); 1203 /* 1204 * ffs_wapbl_start() needs mp->mnt_stat initialised if it 1205 * needs to create a new log file in-filesystem. 1206 */ 1207 ffs_statvfs(mp, &mp->mnt_stat); 1208 1209 error = ffs_wapbl_start(mp); 1210 if (error) { 1211 free(fs->fs_csp, M_UFSMNT); 1212 goto out; 1213 } 1214 } 1215 #endif /* WAPBL */ 1216 #ifdef UFS_EXTATTR 1217 /* 1218 * Initialize file-backed extended attributes on UFS1 file 1219 * systems. 1220 */ 1221 if (ump->um_fstype == UFS1) { 1222 ufs_extattr_uepm_init(&ump->um_extattr); 1223 #ifdef UFS_EXTATTR_AUTOSTART 1224 /* 1225 * XXX Just ignore errors. Not clear that we should 1226 * XXX fail the mount in this case. 1227 */ 1228 (void) ufs_extattr_autostart(mp, l); 1229 #endif 1230 } 1231 #endif /* UFS_EXTATTR */ 1232 return (0); 1233 out: 1234 #ifdef WAPBL 1235 if (mp->mnt_wapbl_replay) { 1236 wapbl_replay_stop(mp->mnt_wapbl_replay); 1237 wapbl_replay_free(mp->mnt_wapbl_replay); 1238 mp->mnt_wapbl_replay = 0; 1239 } 1240 #endif 1241 1242 fstrans_unmount(mp); 1243 if (fs) 1244 free(fs, M_UFSMNT); 1245 devvp->v_specmountpoint = NULL; 1246 if (bp) 1247 brelse(bp, bset); 1248 if (ump) { 1249 if (ump->um_oldfscompat) 1250 free(ump->um_oldfscompat, M_UFSMNT); 1251 mutex_destroy(&ump->um_lock); 1252 free(ump, M_UFSMNT); 1253 mp->mnt_data = NULL; 1254 } 1255 return (error); 1256 } 1257 1258 /* 1259 * Sanity checks for loading old filesystem superblocks. 1260 * See ffs_oldfscompat_write below for unwound actions. 1261 * 1262 * XXX - Parts get retired eventually. 1263 * Unfortunately new bits get added. 1264 */ 1265 static void 1266 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc) 1267 { 1268 off_t maxfilesize; 1269 int32_t *extrasave; 1270 1271 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1272 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1273 return; 1274 1275 if (!ump->um_oldfscompat) 1276 ump->um_oldfscompat = malloc(512 + 3*sizeof(int32_t), 1277 M_UFSMNT, M_WAITOK); 1278 1279 memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512); 1280 extrasave = ump->um_oldfscompat; 1281 extrasave += 512/sizeof(int32_t); 1282 extrasave[0] = fs->fs_old_npsect; 1283 extrasave[1] = fs->fs_old_interleave; 1284 extrasave[2] = fs->fs_old_trackskew; 1285 1286 /* These fields will be overwritten by their 1287 * original values in fs_oldfscompat_write, so it is harmless 1288 * to modify them here. 1289 */ 1290 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir; 1291 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree; 1292 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree; 1293 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree; 1294 1295 fs->fs_maxbsize = fs->fs_bsize; 1296 fs->fs_time = fs->fs_old_time; 1297 fs->fs_size = fs->fs_old_size; 1298 fs->fs_dsize = fs->fs_old_dsize; 1299 fs->fs_csaddr = fs->fs_old_csaddr; 1300 fs->fs_sblockloc = sblockloc; 1301 1302 fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL); 1303 1304 if (fs->fs_old_postblformat == FS_42POSTBLFMT) { 1305 fs->fs_old_nrpos = 8; 1306 fs->fs_old_npsect = fs->fs_old_nsect; 1307 fs->fs_old_interleave = 1; 1308 fs->fs_old_trackskew = 0; 1309 } 1310 1311 if (fs->fs_old_inodefmt < FS_44INODEFMT) { 1312 fs->fs_maxfilesize = (u_quad_t) 1LL << 39; 1313 fs->fs_qbmask = ~fs->fs_bmask; 1314 fs->fs_qfmask = ~fs->fs_fmask; 1315 } 1316 1317 maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1; 1318 if (fs->fs_maxfilesize > maxfilesize) 1319 fs->fs_maxfilesize = maxfilesize; 1320 1321 /* Compatibility for old filesystems */ 1322 if (fs->fs_avgfilesize <= 0) 1323 fs->fs_avgfilesize = AVFILESIZ; 1324 if (fs->fs_avgfpdir <= 0) 1325 fs->fs_avgfpdir = AFPDIR; 1326 1327 #if 0 1328 if (bigcgs) { 1329 fs->fs_save_cgsize = fs->fs_cgsize; 1330 fs->fs_cgsize = fs->fs_bsize; 1331 } 1332 #endif 1333 } 1334 1335 /* 1336 * Unwinding superblock updates for old filesystems. 1337 * See ffs_oldfscompat_read above for details. 1338 * 1339 * XXX - Parts get retired eventually. 1340 * Unfortunately new bits get added. 1341 */ 1342 static void 1343 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump) 1344 { 1345 int32_t *extrasave; 1346 1347 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1348 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1349 return; 1350 1351 fs->fs_old_time = fs->fs_time; 1352 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir; 1353 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree; 1354 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree; 1355 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree; 1356 fs->fs_old_flags = fs->fs_flags; 1357 1358 #if 0 1359 if (bigcgs) { 1360 fs->fs_cgsize = fs->fs_save_cgsize; 1361 } 1362 #endif 1363 1364 memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512); 1365 extrasave = ump->um_oldfscompat; 1366 extrasave += 512/sizeof(int32_t); 1367 fs->fs_old_npsect = extrasave[0]; 1368 fs->fs_old_interleave = extrasave[1]; 1369 fs->fs_old_trackskew = extrasave[2]; 1370 1371 } 1372 1373 /* 1374 * unmount vfs operation 1375 */ 1376 int 1377 ffs_unmount(struct mount *mp, int mntflags) 1378 { 1379 struct lwp *l = curlwp; 1380 struct ufsmount *ump = VFSTOUFS(mp); 1381 struct fs *fs = ump->um_fs; 1382 int error, flags; 1383 #ifdef WAPBL 1384 extern int doforce; 1385 #endif 1386 1387 flags = 0; 1388 if (mntflags & MNT_FORCE) 1389 flags |= FORCECLOSE; 1390 if ((error = ffs_flushfiles(mp, flags, l)) != 0) 1391 return (error); 1392 error = UFS_WAPBL_BEGIN(mp); 1393 if (error == 0) 1394 if (fs->fs_ronly == 0 && 1395 ffs_cgupdate(ump, MNT_WAIT) == 0 && 1396 fs->fs_clean & FS_WASCLEAN) { 1397 fs->fs_clean = FS_ISCLEAN; 1398 fs->fs_fmod = 0; 1399 (void) ffs_sbupdate(ump, MNT_WAIT); 1400 } 1401 if (error == 0) 1402 UFS_WAPBL_END(mp); 1403 #ifdef WAPBL 1404 KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl)); 1405 if (mp->mnt_wapbl_replay) { 1406 KDASSERT(fs->fs_ronly); 1407 wapbl_replay_stop(mp->mnt_wapbl_replay); 1408 wapbl_replay_free(mp->mnt_wapbl_replay); 1409 mp->mnt_wapbl_replay = 0; 1410 } 1411 error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE)); 1412 if (error) { 1413 return error; 1414 } 1415 #endif /* WAPBL */ 1416 #ifdef UFS_EXTATTR 1417 if (ump->um_fstype == UFS1) { 1418 ufs_extattr_stop(mp, l); 1419 ufs_extattr_uepm_destroy(&ump->um_extattr); 1420 } 1421 #endif /* UFS_EXTATTR */ 1422 1423 if (ump->um_devvp->v_type != VBAD) 1424 ump->um_devvp->v_specmountpoint = NULL; 1425 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1426 (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE, 1427 NOCRED); 1428 vput(ump->um_devvp); 1429 free(fs->fs_csp, M_UFSMNT); 1430 free(fs, M_UFSMNT); 1431 if (ump->um_oldfscompat != NULL) 1432 free(ump->um_oldfscompat, M_UFSMNT); 1433 mutex_destroy(&ump->um_lock); 1434 ffs_snapshot_fini(ump); 1435 free(ump, M_UFSMNT); 1436 mp->mnt_data = NULL; 1437 mp->mnt_flag &= ~MNT_LOCAL; 1438 fstrans_unmount(mp); 1439 return (0); 1440 } 1441 1442 /* 1443 * Flush out all the files in a filesystem. 1444 */ 1445 int 1446 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l) 1447 { 1448 extern int doforce; 1449 struct ufsmount *ump; 1450 int error; 1451 1452 if (!doforce) 1453 flags &= ~FORCECLOSE; 1454 ump = VFSTOUFS(mp); 1455 #ifdef QUOTA 1456 if (mp->mnt_flag & MNT_QUOTA) { 1457 int i; 1458 if ((error = vflush(mp, NULLVP, SKIPSYSTEM | flags)) != 0) 1459 return (error); 1460 for (i = 0; i < MAXQUOTAS; i++) { 1461 if (ump->um_quotas[i] == NULLVP) 1462 continue; 1463 quotaoff(l, mp, i); 1464 } 1465 /* 1466 * Here we fall through to vflush again to ensure 1467 * that we have gotten rid of all the system vnodes. 1468 */ 1469 } 1470 #endif 1471 if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0) 1472 return (error); 1473 ffs_snapshot_unmount(mp); 1474 /* 1475 * Flush all the files. 1476 */ 1477 error = vflush(mp, NULLVP, flags); 1478 if (error) 1479 return (error); 1480 /* 1481 * Flush filesystem metadata. 1482 */ 1483 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1484 error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0); 1485 VOP_UNLOCK(ump->um_devvp); 1486 if (flags & FORCECLOSE) /* XXXDBJ */ 1487 error = 0; 1488 1489 #ifdef WAPBL 1490 if (error) 1491 return error; 1492 if (mp->mnt_wapbl) { 1493 error = wapbl_flush(mp->mnt_wapbl, 1); 1494 if (flags & FORCECLOSE) 1495 error = 0; 1496 } 1497 #endif 1498 1499 return (error); 1500 } 1501 1502 /* 1503 * Get file system statistics. 1504 */ 1505 int 1506 ffs_statvfs(struct mount *mp, struct statvfs *sbp) 1507 { 1508 struct ufsmount *ump; 1509 struct fs *fs; 1510 1511 ump = VFSTOUFS(mp); 1512 fs = ump->um_fs; 1513 mutex_enter(&ump->um_lock); 1514 sbp->f_bsize = fs->fs_bsize; 1515 sbp->f_frsize = fs->fs_fsize; 1516 sbp->f_iosize = fs->fs_bsize; 1517 sbp->f_blocks = fs->fs_dsize; 1518 sbp->f_bfree = blkstofrags(fs, fs->fs_cstotal.cs_nbfree) + 1519 fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks); 1520 sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t) 1521 fs->fs_minfree) / (u_int64_t) 100; 1522 if (sbp->f_bfree > sbp->f_bresvd) 1523 sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd; 1524 else 1525 sbp->f_bavail = 0; 1526 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO; 1527 sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes; 1528 sbp->f_favail = sbp->f_ffree; 1529 sbp->f_fresvd = 0; 1530 mutex_exit(&ump->um_lock); 1531 copy_statvfs_info(sbp, mp); 1532 1533 return (0); 1534 } 1535 1536 /* 1537 * Go through the disk queues to initiate sandbagged IO; 1538 * go through the inodes to write those that have been modified; 1539 * initiate the writing of the super block if it has been modified. 1540 * 1541 * Note: we are always called with the filesystem marked `MPBUSY'. 1542 */ 1543 int 1544 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) 1545 { 1546 struct vnode *vp, *mvp, *nvp; 1547 struct inode *ip; 1548 struct ufsmount *ump = VFSTOUFS(mp); 1549 struct fs *fs; 1550 int error, allerror = 0; 1551 bool is_suspending; 1552 1553 fs = ump->um_fs; 1554 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ 1555 printf("fs = %s\n", fs->fs_fsmnt); 1556 panic("update: rofs mod"); 1557 } 1558 1559 /* Allocate a marker vnode. */ 1560 if ((mvp = vnalloc(mp)) == NULL) 1561 return (ENOMEM); 1562 1563 fstrans_start(mp, FSTRANS_SHARED); 1564 is_suspending = (fstrans_getstate(mp) == FSTRANS_SUSPENDING); 1565 /* 1566 * Write back each (modified) inode. 1567 */ 1568 mutex_enter(&mntvnode_lock); 1569 loop: 1570 /* 1571 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() 1572 * and vclean() can be called indirectly 1573 */ 1574 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 1575 nvp = TAILQ_NEXT(vp, v_mntvnodes); 1576 /* 1577 * If the vnode that we are about to sync is no longer 1578 * associated with this mount point, start over. 1579 */ 1580 if (vp->v_mount != mp) 1581 goto loop; 1582 /* 1583 * Don't interfere with concurrent scans of this FS. 1584 */ 1585 if (vismarker(vp)) 1586 continue; 1587 mutex_enter(&vp->v_interlock); 1588 ip = VTOI(vp); 1589 1590 /* 1591 * Skip the vnode/inode if inaccessible. 1592 */ 1593 if (ip == NULL || (vp->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0 || 1594 vp->v_type == VNON) { 1595 mutex_exit(&vp->v_interlock); 1596 continue; 1597 } 1598 1599 /* 1600 * We deliberately update inode times here. This will 1601 * prevent a massive queue of updates accumulating, only 1602 * to be handled by a call to unmount. 1603 * 1604 * XXX It would be better to have the syncer trickle these 1605 * out. Adjustment needed to allow registering vnodes for 1606 * sync when the vnode is clean, but the inode dirty. Or 1607 * have ufs itself trickle out inode updates. 1608 * 1609 * If doing a lazy sync, we don't care about metadata or 1610 * data updates, because they are handled by each vnode's 1611 * synclist entry. In this case we are only interested in 1612 * writing back modified inodes. 1613 */ 1614 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE | 1615 IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) == 0 && 1616 (waitfor == MNT_LAZY || (LIST_EMPTY(&vp->v_dirtyblkhd) && 1617 UVM_OBJ_IS_CLEAN(&vp->v_uobj)))) { 1618 mutex_exit(&vp->v_interlock); 1619 continue; 1620 } 1621 if (vp->v_type == VBLK && is_suspending) { 1622 mutex_exit(&vp->v_interlock); 1623 continue; 1624 } 1625 vmark(mvp, vp); 1626 mutex_exit(&mntvnode_lock); 1627 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT); 1628 if (error) { 1629 mutex_enter(&mntvnode_lock); 1630 nvp = vunmark(mvp); 1631 if (error == ENOENT) { 1632 goto loop; 1633 } 1634 continue; 1635 } 1636 if (waitfor == MNT_LAZY) { 1637 error = UFS_WAPBL_BEGIN(vp->v_mount); 1638 if (!error) { 1639 error = ffs_update(vp, NULL, NULL, 1640 UPDATE_CLOSE); 1641 UFS_WAPBL_END(vp->v_mount); 1642 } 1643 } else { 1644 error = VOP_FSYNC(vp, cred, FSYNC_NOLOG | 1645 (waitfor == MNT_WAIT ? FSYNC_WAIT : 0), 0, 0); 1646 } 1647 if (error) 1648 allerror = error; 1649 vput(vp); 1650 mutex_enter(&mntvnode_lock); 1651 nvp = vunmark(mvp); 1652 } 1653 mutex_exit(&mntvnode_lock); 1654 /* 1655 * Force stale file system control information to be flushed. 1656 */ 1657 if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 || 1658 !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) { 1659 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1660 if ((error = VOP_FSYNC(ump->um_devvp, cred, 1661 (waitfor == MNT_WAIT ? FSYNC_WAIT : 0) | FSYNC_NOLOG, 1662 0, 0)) != 0) 1663 allerror = error; 1664 VOP_UNLOCK(ump->um_devvp); 1665 if (allerror == 0 && waitfor == MNT_WAIT && !mp->mnt_wapbl) { 1666 mutex_enter(&mntvnode_lock); 1667 goto loop; 1668 } 1669 } 1670 #ifdef QUOTA 1671 qsync(mp); 1672 #endif 1673 /* 1674 * Write back modified superblock. 1675 */ 1676 if (fs->fs_fmod != 0) { 1677 fs->fs_fmod = 0; 1678 fs->fs_time = time_second; 1679 error = UFS_WAPBL_BEGIN(mp); 1680 if (error) 1681 allerror = error; 1682 else { 1683 if ((error = ffs_cgupdate(ump, waitfor))) 1684 allerror = error; 1685 UFS_WAPBL_END(mp); 1686 } 1687 } 1688 1689 #ifdef WAPBL 1690 if (mp->mnt_wapbl) { 1691 error = wapbl_flush(mp->mnt_wapbl, 0); 1692 if (error) 1693 allerror = error; 1694 } 1695 #endif 1696 1697 fstrans_done(mp); 1698 vnfree(mvp); 1699 return (allerror); 1700 } 1701 1702 /* 1703 * Look up a FFS dinode number to find its incore vnode, otherwise read it 1704 * in from disk. If it is in core, wait for the lock bit to clear, then 1705 * return the inode locked. Detection and handling of mount points must be 1706 * done by the calling routine. 1707 */ 1708 int 1709 ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) 1710 { 1711 struct fs *fs; 1712 struct inode *ip; 1713 struct ufsmount *ump; 1714 struct buf *bp; 1715 struct vnode *vp; 1716 dev_t dev; 1717 int error; 1718 1719 ump = VFSTOUFS(mp); 1720 dev = ump->um_dev; 1721 1722 retry: 1723 if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL) 1724 return (0); 1725 1726 /* Allocate a new vnode/inode. */ 1727 if ((error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) != 0) { 1728 *vpp = NULL; 1729 return (error); 1730 } 1731 ip = pool_cache_get(ffs_inode_cache, PR_WAITOK); 1732 1733 /* 1734 * If someone beat us to it, put back the freshly allocated 1735 * vnode/inode pair and retry. 1736 */ 1737 mutex_enter(&ufs_hashlock); 1738 if (ufs_ihashget(dev, ino, 0) != NULL) { 1739 mutex_exit(&ufs_hashlock); 1740 ungetnewvnode(vp); 1741 pool_cache_put(ffs_inode_cache, ip); 1742 goto retry; 1743 } 1744 1745 vp->v_vflag |= VV_LOCKSWORK; 1746 1747 /* 1748 * XXX MFS ends up here, too, to allocate an inode. Should we 1749 * XXX create another pool for MFS inodes? 1750 */ 1751 1752 memset(ip, 0, sizeof(struct inode)); 1753 vp->v_data = ip; 1754 ip->i_vnode = vp; 1755 ip->i_ump = ump; 1756 ip->i_fs = fs = ump->um_fs; 1757 ip->i_dev = dev; 1758 ip->i_number = ino; 1759 #ifdef QUOTA 1760 ufsquota_init(ip); 1761 #endif 1762 1763 /* 1764 * Initialize genfs node, we might proceed to destroy it in 1765 * error branches. 1766 */ 1767 genfs_node_init(vp, &ffs_genfsops); 1768 1769 /* 1770 * Put it onto its hash chain and lock it so that other requests for 1771 * this inode will block if they arrive while we are sleeping waiting 1772 * for old data structures to be purged or for the contents of the 1773 * disk portion of this inode to be read. 1774 */ 1775 1776 ufs_ihashins(ip); 1777 mutex_exit(&ufs_hashlock); 1778 1779 /* Read in the disk contents for the inode, copy into the inode. */ 1780 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1781 (int)fs->fs_bsize, NOCRED, 0, &bp); 1782 if (error) { 1783 1784 /* 1785 * The inode does not contain anything useful, so it would 1786 * be misleading to leave it on its hash chain. With mode 1787 * still zero, it will be unlinked and returned to the free 1788 * list by vput(). 1789 */ 1790 1791 vput(vp); 1792 brelse(bp, 0); 1793 *vpp = NULL; 1794 return (error); 1795 } 1796 if (ip->i_ump->um_fstype == UFS1) 1797 ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache, 1798 PR_WAITOK); 1799 else 1800 ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache, 1801 PR_WAITOK); 1802 ffs_load_inode(bp, ip, fs, ino); 1803 brelse(bp, 0); 1804 1805 /* 1806 * Initialize the vnode from the inode, check for aliases. 1807 * Note that the underlying vnode may have changed. 1808 */ 1809 1810 ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); 1811 1812 /* 1813 * Finish inode initialization now that aliasing has been resolved. 1814 */ 1815 1816 ip->i_devvp = ump->um_devvp; 1817 vref(ip->i_devvp); 1818 1819 /* 1820 * Ensure that uid and gid are correct. This is a temporary 1821 * fix until fsck has been changed to do the update. 1822 */ 1823 1824 if (fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */ 1825 ip->i_uid = ip->i_ffs1_ouid; /* XXX */ 1826 ip->i_gid = ip->i_ffs1_ogid; /* XXX */ 1827 } /* XXX */ 1828 uvm_vnp_setsize(vp, ip->i_size); 1829 *vpp = vp; 1830 return (0); 1831 } 1832 1833 /* 1834 * File handle to vnode 1835 * 1836 * Have to be really careful about stale file handles: 1837 * - check that the inode number is valid 1838 * - call ffs_vget() to get the locked inode 1839 * - check for an unallocated inode (i_mode == 0) 1840 * - check that the given client host has export rights and return 1841 * those rights via. exflagsp and credanonp 1842 */ 1843 int 1844 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) 1845 { 1846 struct ufid ufh; 1847 struct fs *fs; 1848 1849 if (fhp->fid_len != sizeof(struct ufid)) 1850 return EINVAL; 1851 1852 memcpy(&ufh, fhp, sizeof(ufh)); 1853 fs = VFSTOUFS(mp)->um_fs; 1854 if (ufh.ufid_ino < ROOTINO || 1855 ufh.ufid_ino >= fs->fs_ncg * fs->fs_ipg) 1856 return (ESTALE); 1857 return (ufs_fhtovp(mp, &ufh, vpp)); 1858 } 1859 1860 /* 1861 * Vnode pointer to File handle 1862 */ 1863 /* ARGSUSED */ 1864 int 1865 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size) 1866 { 1867 struct inode *ip; 1868 struct ufid ufh; 1869 1870 if (*fh_size < sizeof(struct ufid)) { 1871 *fh_size = sizeof(struct ufid); 1872 return E2BIG; 1873 } 1874 ip = VTOI(vp); 1875 *fh_size = sizeof(struct ufid); 1876 memset(&ufh, 0, sizeof(ufh)); 1877 ufh.ufid_len = sizeof(struct ufid); 1878 ufh.ufid_ino = ip->i_number; 1879 ufh.ufid_gen = ip->i_gen; 1880 memcpy(fhp, &ufh, sizeof(ufh)); 1881 return (0); 1882 } 1883 1884 void 1885 ffs_init(void) 1886 { 1887 if (ffs_initcount++ > 0) 1888 return; 1889 1890 ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0, 1891 "ffsino", NULL, IPL_NONE, NULL, NULL, NULL); 1892 ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0, 1893 "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL); 1894 ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0, 1895 "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL); 1896 ufs_init(); 1897 } 1898 1899 void 1900 ffs_reinit(void) 1901 { 1902 1903 ufs_reinit(); 1904 } 1905 1906 void 1907 ffs_done(void) 1908 { 1909 if (--ffs_initcount > 0) 1910 return; 1911 1912 ufs_done(); 1913 pool_cache_destroy(ffs_dinode2_cache); 1914 pool_cache_destroy(ffs_dinode1_cache); 1915 pool_cache_destroy(ffs_inode_cache); 1916 } 1917 1918 /* 1919 * Write a superblock and associated information back to disk. 1920 */ 1921 int 1922 ffs_sbupdate(struct ufsmount *mp, int waitfor) 1923 { 1924 struct fs *fs = mp->um_fs; 1925 struct buf *bp; 1926 int error = 0; 1927 u_int32_t saveflag; 1928 1929 error = ffs_getblk(mp->um_devvp, 1930 fs->fs_sblockloc / DEV_BSIZE, FFS_NOBLK, 1931 fs->fs_sbsize, false, &bp); 1932 if (error) 1933 return error; 1934 saveflag = fs->fs_flags & FS_INTERNAL; 1935 fs->fs_flags &= ~FS_INTERNAL; 1936 1937 memcpy(bp->b_data, fs, fs->fs_sbsize); 1938 1939 ffs_oldfscompat_write((struct fs *)bp->b_data, mp); 1940 #ifdef FFS_EI 1941 if (mp->um_flags & UFS_NEEDSWAP) 1942 ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data); 1943 #endif 1944 fs->fs_flags |= saveflag; 1945 1946 if (waitfor == MNT_WAIT) 1947 error = bwrite(bp); 1948 else 1949 bawrite(bp); 1950 return (error); 1951 } 1952 1953 int 1954 ffs_cgupdate(struct ufsmount *mp, int waitfor) 1955 { 1956 struct fs *fs = mp->um_fs; 1957 struct buf *bp; 1958 int blks; 1959 void *space; 1960 int i, size, error = 0, allerror = 0; 1961 1962 allerror = ffs_sbupdate(mp, waitfor); 1963 blks = howmany(fs->fs_cssize, fs->fs_fsize); 1964 space = fs->fs_csp; 1965 for (i = 0; i < blks; i += fs->fs_frag) { 1966 size = fs->fs_bsize; 1967 if (i + fs->fs_frag > blks) 1968 size = (blks - i) * fs->fs_fsize; 1969 error = ffs_getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), 1970 FFS_NOBLK, size, false, &bp); 1971 if (error) 1972 break; 1973 #ifdef FFS_EI 1974 if (mp->um_flags & UFS_NEEDSWAP) 1975 ffs_csum_swap((struct csum*)space, 1976 (struct csum*)bp->b_data, size); 1977 else 1978 #endif 1979 memcpy(bp->b_data, space, (u_int)size); 1980 space = (char *)space + size; 1981 if (waitfor == MNT_WAIT) 1982 error = bwrite(bp); 1983 else 1984 bawrite(bp); 1985 } 1986 if (!allerror && error) 1987 allerror = error; 1988 return (allerror); 1989 } 1990 1991 int 1992 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp, 1993 int attrnamespace, const char *attrname) 1994 { 1995 #ifdef UFS_EXTATTR 1996 /* 1997 * File-backed extended attributes are only supported on UFS1. 1998 * UFS2 has native extended attributes. 1999 */ 2000 if (VFSTOUFS(mp)->um_fstype == UFS1) 2001 return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname)); 2002 #endif 2003 return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname)); 2004 } 2005 2006 int 2007 ffs_suspendctl(struct mount *mp, int cmd) 2008 { 2009 int error; 2010 struct lwp *l = curlwp; 2011 2012 switch (cmd) { 2013 case SUSPEND_SUSPEND: 2014 if ((error = fstrans_setstate(mp, FSTRANS_SUSPENDING)) != 0) 2015 return error; 2016 error = ffs_sync(mp, MNT_WAIT, l->l_proc->p_cred); 2017 if (error == 0) 2018 error = fstrans_setstate(mp, FSTRANS_SUSPENDED); 2019 #ifdef WAPBL 2020 if (error == 0 && mp->mnt_wapbl) 2021 error = wapbl_flush(mp->mnt_wapbl, 1); 2022 #endif 2023 if (error != 0) { 2024 (void) fstrans_setstate(mp, FSTRANS_NORMAL); 2025 return error; 2026 } 2027 return 0; 2028 2029 case SUSPEND_RESUME: 2030 return fstrans_setstate(mp, FSTRANS_NORMAL); 2031 2032 default: 2033 return EINVAL; 2034 } 2035 } 2036 2037 /* 2038 * Synch vnode for a mounted file system. This is called for foreign 2039 * vnodes, i.e. non-ffs. 2040 */ 2041 static int 2042 ffs_vfs_fsync(vnode_t *vp, int flags) 2043 { 2044 int error, passes, skipmeta, i, pflags; 2045 buf_t *bp, *nbp; 2046 #ifdef WAPBL 2047 struct mount *mp; 2048 #endif 2049 2050 KASSERT(vp->v_type == VBLK); 2051 KASSERT(vp->v_specmountpoint != NULL); 2052 2053 /* 2054 * Flush all dirty data associated with the vnode. 2055 */ 2056 pflags = PGO_ALLPAGES | PGO_CLEANIT; 2057 if ((flags & FSYNC_WAIT) != 0) 2058 pflags |= PGO_SYNCIO; 2059 mutex_enter(&vp->v_interlock); 2060 error = VOP_PUTPAGES(vp, 0, 0, pflags); 2061 if (error) 2062 return error; 2063 2064 #ifdef WAPBL 2065 mp = vp->v_specmountpoint; 2066 if (mp && mp->mnt_wapbl) { 2067 /* 2068 * Don't bother writing out metadata if the syncer is 2069 * making the request. We will let the sync vnode 2070 * write it out in a single burst through a call to 2071 * VFS_SYNC(). 2072 */ 2073 if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY | FSYNC_NOLOG)) != 0) 2074 return 0; 2075 2076 /* 2077 * Don't flush the log if the vnode being flushed 2078 * contains no dirty buffers that could be in the log. 2079 */ 2080 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 2081 error = wapbl_flush(mp->mnt_wapbl, 0); 2082 if (error) 2083 return error; 2084 } 2085 2086 if ((flags & FSYNC_WAIT) != 0) { 2087 mutex_enter(&vp->v_interlock); 2088 while (vp->v_numoutput) 2089 cv_wait(&vp->v_cv, &vp->v_interlock); 2090 mutex_exit(&vp->v_interlock); 2091 } 2092 2093 return 0; 2094 } 2095 #endif /* WAPBL */ 2096 2097 /* 2098 * Write out metadata for non-logging file systems. XXX This block 2099 * should be simplified now that softdep is gone. 2100 */ 2101 passes = NIADDR + 1; 2102 skipmeta = 0; 2103 if (flags & FSYNC_WAIT) 2104 skipmeta = 1; 2105 2106 loop: 2107 mutex_enter(&bufcache_lock); 2108 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 2109 bp->b_cflags &= ~BC_SCANNED; 2110 } 2111 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 2112 nbp = LIST_NEXT(bp, b_vnbufs); 2113 if (bp->b_cflags & (BC_BUSY | BC_SCANNED)) 2114 continue; 2115 if ((bp->b_oflags & BO_DELWRI) == 0) 2116 panic("ffs_fsync: not dirty"); 2117 if (skipmeta && bp->b_lblkno < 0) 2118 continue; 2119 bp->b_cflags |= BC_BUSY | BC_VFLUSH | BC_SCANNED; 2120 mutex_exit(&bufcache_lock); 2121 /* 2122 * On our final pass through, do all I/O synchronously 2123 * so that we can find out if our flush is failing 2124 * because of write errors. 2125 */ 2126 if (passes > 0 || !(flags & FSYNC_WAIT)) 2127 (void) bawrite(bp); 2128 else if ((error = bwrite(bp)) != 0) 2129 return (error); 2130 /* 2131 * Since we unlocked during the I/O, we need 2132 * to start from a known point. 2133 */ 2134 mutex_enter(&bufcache_lock); 2135 nbp = LIST_FIRST(&vp->v_dirtyblkhd); 2136 } 2137 mutex_exit(&bufcache_lock); 2138 if (skipmeta) { 2139 skipmeta = 0; 2140 goto loop; 2141 } 2142 2143 if ((flags & FSYNC_WAIT) != 0) { 2144 mutex_enter(&vp->v_interlock); 2145 while (vp->v_numoutput) { 2146 cv_wait(&vp->v_cv, &vp->v_interlock); 2147 } 2148 mutex_exit(&vp->v_interlock); 2149 2150 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 2151 /* 2152 * Block devices associated with filesystems may 2153 * have new I/O requests posted for them even if 2154 * the vnode is locked, so no amount of trying will 2155 * get them clean. Thus we give block devices a 2156 * good effort, then just give up. For all other file 2157 * types, go around and try again until it is clean. 2158 */ 2159 if (passes > 0) { 2160 passes--; 2161 goto loop; 2162 } 2163 #ifdef DIAGNOSTIC 2164 if (vp->v_type != VBLK) 2165 vprint("ffs_fsync: dirty", vp); 2166 #endif 2167 } 2168 } 2169 2170 if (error == 0 && (flags & FSYNC_CACHE) != 0) { 2171 (void)VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE, 2172 kauth_cred_get()); 2173 } 2174 2175 return error; 2176 } 2177