1 /* 2 * Copyright (c) 1989, 1991, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_vfsops.c 8.20 (Berkeley) 06/10/95 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/namei.h> 13 #include <sys/proc.h> 14 #include <sys/kernel.h> 15 #include <sys/vnode.h> 16 #include <sys/mount.h> 17 #include <sys/buf.h> 18 #include <sys/mbuf.h> 19 #include <sys/file.h> 20 #include <sys/disklabel.h> 21 #include <sys/ioctl.h> 22 #include <sys/errno.h> 23 #include <sys/malloc.h> 24 #include <sys/socket.h> 25 26 #include <miscfs/specfs/specdev.h> 27 28 #include <ufs/ufs/quota.h> 29 #include <ufs/ufs/inode.h> 30 #include <ufs/ufs/ufsmount.h> 31 #include <ufs/ufs/ufs_extern.h> 32 33 #include <ufs/lfs/lfs.h> 34 #include <ufs/lfs/lfs_extern.h> 35 36 int lfs_mountfs __P((struct vnode *, struct mount *, struct proc *)); 37 38 struct vfsops lfs_vfsops = { 39 lfs_mount, 40 ufs_start, 41 lfs_unmount, 42 ufs_root, 43 ufs_quotactl, 44 lfs_statfs, 45 lfs_sync, 46 lfs_vget, 47 lfs_fhtovp, 48 lfs_vptofh, 49 lfs_init, 50 lfs_sysctl, 51 }; 52 53 /* 54 * Called by main() when ufs is going to be mounted as root. 55 */ 56 lfs_mountroot() 57 { 58 extern struct vnode *rootvp; 59 struct fs *fs; 60 struct mount *mp; 61 struct proc *p = curproc; /* XXX */ 62 int error; 63 64 /* 65 * Get vnodes for swapdev and rootdev. 66 */ 67 if ((error = bdevvp(swapdev, &swapdev_vp)) || 68 (error = bdevvp(rootdev, &rootvp))) { 69 printf("lfs_mountroot: can't setup bdevvp's"); 70 return (error); 71 } 72 if (error = vfs_rootmountalloc("lfs", "root_device", &mp)) 73 return (error); 74 if (error = lfs_mountfs(rootvp, mp, p)) { 75 mp->mnt_vfc->vfc_refcount--; 76 vfs_unbusy(mp, p); 77 free(mp, M_MOUNT); 78 return (error); 79 } 80 simple_lock(&mountlist_slock); 81 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 82 simple_unlock(&mountlist_slock); 83 (void)lfs_statfs(mp, &mp->mnt_stat, p); 84 vfs_unbusy(mp, p); 85 return (0); 86 } 87 88 /* 89 * VFS Operations. 90 * 91 * mount system call 92 */ 93 lfs_mount(mp, path, data, ndp, p) 94 register struct mount *mp; 95 char *path; 96 caddr_t data; 97 struct nameidata *ndp; 98 struct proc *p; 99 { 100 struct vnode *devvp; 101 struct ufs_args args; 102 struct ufsmount *ump; 103 register struct lfs *fs; /* LFS */ 104 u_int size; 105 int error; 106 mode_t accessmode; 107 108 if (error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args))) 109 return (error); 110 111 /* Until LFS can do NFS right. XXX */ 112 if (args.export.ex_flags & MNT_EXPORTED) 113 return (EINVAL); 114 115 /* 116 * If updating, check whether changing from read-only to 117 * read/write; if there is no device name, that's all we do. 118 */ 119 if (mp->mnt_flag & MNT_UPDATE) { 120 ump = VFSTOUFS(mp); 121 if (fs->lfs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) { 122 /* 123 * If upgrade to read-write by non-root, then verify 124 * that user has necessary permissions on the device. 125 */ 126 if (p->p_ucred->cr_uid != 0) { 127 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, 128 p); 129 if (error = VOP_ACCESS(ump->um_devvp, 130 VREAD | VWRITE, p->p_ucred, p)) { 131 VOP_UNLOCK(ump->um_devvp, 0, p); 132 return (error); 133 } 134 VOP_UNLOCK(ump->um_devvp, 0, p); 135 } 136 fs->lfs_ronly = 0; 137 } 138 if (args.fspec == 0) { 139 /* 140 * Process export requests. 141 */ 142 return (vfs_export(mp, &ump->um_export, &args.export)); 143 } 144 } 145 /* 146 * Not an update, or updating the name: look up the name 147 * and verify that it refers to a sensible block device. 148 */ 149 NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); 150 if (error = namei(ndp)) 151 return (error); 152 devvp = ndp->ni_vp; 153 if (devvp->v_type != VBLK) { 154 vrele(devvp); 155 return (ENOTBLK); 156 } 157 if (major(devvp->v_rdev) >= nblkdev) { 158 vrele(devvp); 159 return (ENXIO); 160 } 161 /* 162 * If mount by non-root, then verify that user has necessary 163 * permissions on the device. 164 */ 165 if (p->p_ucred->cr_uid != 0) { 166 accessmode = VREAD; 167 if ((mp->mnt_flag & MNT_RDONLY) == 0) 168 accessmode |= VWRITE; 169 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 170 if (error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) { 171 vput(devvp); 172 return (error); 173 } 174 VOP_UNLOCK(devvp, 0, p); 175 } 176 if ((mp->mnt_flag & MNT_UPDATE) == 0) 177 error = lfs_mountfs(devvp, mp, p); /* LFS */ 178 else { 179 if (devvp != ump->um_devvp) 180 error = EINVAL; /* needs translation */ 181 else 182 vrele(devvp); 183 } 184 if (error) { 185 vrele(devvp); 186 return (error); 187 } 188 ump = VFSTOUFS(mp); 189 fs = ump->um_lfs; /* LFS */ 190 #ifdef NOTLFS /* LFS */ 191 (void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size); 192 bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size); 193 bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, 194 MNAMELEN); 195 (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 196 &size); 197 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 198 (void) ufs_statfs(mp, &mp->mnt_stat, p); 199 #else 200 (void)copyinstr(path, fs->lfs_fsmnt, sizeof(fs->lfs_fsmnt) - 1, &size); 201 bzero(fs->lfs_fsmnt + size, sizeof(fs->lfs_fsmnt) - size); 202 bcopy((caddr_t)fs->lfs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, 203 MNAMELEN); 204 (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 205 &size); 206 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 207 (void) lfs_statfs(mp, &mp->mnt_stat, p); 208 #endif 209 return (0); 210 } 211 212 /* 213 * Common code for mount and mountroot 214 * LFS specific 215 */ 216 int 217 lfs_mountfs(devvp, mp, p) 218 register struct vnode *devvp; 219 struct mount *mp; 220 struct proc *p; 221 { 222 extern struct vnode *rootvp; 223 register struct lfs *fs; 224 register struct ufsmount *ump; 225 struct vnode *vp; 226 struct buf *bp; 227 struct partinfo dpart; 228 dev_t dev; 229 int error, i, ronly, size; 230 struct ucred *cred; 231 232 cred = p ? p->p_ucred : NOCRED; 233 /* 234 * Disallow multiple mounts of the same device. 235 * Disallow mounting of a device that is currently in use 236 * (except for root, which might share swap device for miniroot). 237 * Flush out any old buffers remaining from a previous use. 238 */ 239 if (error = vfs_mountedon(devvp)) 240 return (error); 241 if (vcount(devvp) > 1 && devvp != rootvp) 242 return (EBUSY); 243 if (error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0)) 244 return (error); 245 246 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 247 if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p)) 248 return (error); 249 250 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0) 251 size = DEV_BSIZE; 252 else { 253 size = dpart.disklab->d_secsize; 254 #ifdef NEVER_USED 255 dpart.part->p_fstype = FS_LFS; 256 dpart.part->p_fsize = fs->lfs_fsize; /* frag size */ 257 dpart.part->p_frag = fs->lfs_frag; /* frags per block */ 258 dpart.part->p_cpg = fs->lfs_segshift; /* segment shift */ 259 #endif 260 } 261 262 /* Don't free random space on error. */ 263 bp = NULL; 264 ump = NULL; 265 266 /* Read in the superblock. */ 267 if (error = bread(devvp, LFS_LABELPAD / size, LFS_SBPAD, cred, &bp)) 268 goto out; 269 fs = (struct lfs *)bp->b_data; 270 271 /* Check the basics. */ 272 if (fs->lfs_magic != LFS_MAGIC || fs->lfs_bsize > MAXBSIZE || 273 fs->lfs_bsize < sizeof(struct lfs)) { 274 error = EINVAL; /* XXX needs translation */ 275 goto out; 276 } 277 278 /* Allocate the mount structure, copy the superblock into it. */ 279 ump = (struct ufsmount *)malloc(sizeof *ump, M_UFSMNT, M_WAITOK); 280 fs = ump->um_lfs = malloc(sizeof(struct lfs), M_UFSMNT, M_WAITOK); 281 bcopy(bp->b_data, fs, sizeof(struct lfs)); 282 if (sizeof(struct lfs) < LFS_SBPAD) /* XXX why? */ 283 bp->b_flags |= B_INVAL; 284 brelse(bp); 285 bp = NULL; 286 287 /* Set up the I/O information */ 288 fs->lfs_iocount = 0; 289 290 /* Set up the ifile and lock aflags */ 291 fs->lfs_doifile = 0; 292 fs->lfs_writer = 0; 293 fs->lfs_dirops = 0; 294 fs->lfs_seglock = 0; 295 296 /* Set the file system readonly/modify bits. */ 297 fs->lfs_ronly = ronly; 298 if (ronly == 0) 299 fs->lfs_fmod = 1; 300 301 /* Initialize the mount structure. */ 302 dev = devvp->v_rdev; 303 mp->mnt_data = (qaddr_t)ump; 304 mp->mnt_stat.f_fsid.val[0] = (long)dev; 305 mp->mnt_stat.f_fsid.val[1] = lfs_mount_type; 306 mp->mnt_maxsymlinklen = fs->lfs_maxsymlinklen; 307 mp->mnt_flag |= MNT_LOCAL; 308 ump->um_mountp = mp; 309 ump->um_dev = dev; 310 ump->um_devvp = devvp; 311 ump->um_bptrtodb = 0; 312 ump->um_seqinc = 1 << fs->lfs_fsbtodb; 313 ump->um_nindir = fs->lfs_nindir; 314 for (i = 0; i < MAXQUOTAS; i++) 315 ump->um_quotas[i] = NULLVP; 316 devvp->v_specflags |= SI_MOUNTEDON; 317 318 /* 319 * We use the ifile vnode for almost every operation. Instead of 320 * retrieving it from the hash table each time we retrieve it here, 321 * artificially increment the reference count and keep a pointer 322 * to it in the incore copy of the superblock. 323 */ 324 if (error = VFS_VGET(mp, LFS_IFILE_INUM, &vp)) 325 goto out; 326 fs->lfs_ivnode = vp; 327 VREF(vp); 328 vput(vp); 329 330 return (0); 331 out: 332 if (bp) 333 brelse(bp); 334 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p); 335 if (ump) { 336 free(ump->um_lfs, M_UFSMNT); 337 free(ump, M_UFSMNT); 338 mp->mnt_data = (qaddr_t)0; 339 } 340 return (error); 341 } 342 343 /* 344 * unmount system call 345 */ 346 lfs_unmount(mp, mntflags, p) 347 struct mount *mp; 348 int mntflags; 349 struct proc *p; 350 { 351 extern int doforce; 352 register struct ufsmount *ump; 353 register struct lfs *fs; 354 int i, error, flags, ronly; 355 356 flags = 0; 357 if (mntflags & MNT_FORCE) 358 flags |= FORCECLOSE; 359 360 ump = VFSTOUFS(mp); 361 fs = ump->um_lfs; 362 #ifdef QUOTA 363 if (mp->mnt_flag & MNT_QUOTA) { 364 if (error = vflush(mp, fs->lfs_ivnode, SKIPSYSTEM|flags)) 365 return (error); 366 for (i = 0; i < MAXQUOTAS; i++) { 367 if (ump->um_quotas[i] == NULLVP) 368 continue; 369 quotaoff(p, mp, i); 370 } 371 /* 372 * Here we fall through to vflush again to ensure 373 * that we have gotten rid of all the system vnodes. 374 */ 375 } 376 #endif 377 if (error = vflush(mp, fs->lfs_ivnode, flags)) 378 return (error); 379 fs->lfs_clean = 1; 380 if (error = VFS_SYNC(mp, 1, p->p_ucred, p)) 381 return (error); 382 if (fs->lfs_ivnode->v_dirtyblkhd.lh_first) 383 panic("lfs_unmount: still dirty blocks on ifile vnode\n"); 384 vrele(fs->lfs_ivnode); 385 vgone(fs->lfs_ivnode); 386 387 ronly = !fs->lfs_ronly; 388 ump->um_devvp->v_specflags &= ~SI_MOUNTEDON; 389 error = VOP_CLOSE(ump->um_devvp, 390 ronly ? FREAD : FREAD|FWRITE, NOCRED, p); 391 vrele(ump->um_devvp); 392 free(fs, M_UFSMNT); 393 free(ump, M_UFSMNT); 394 mp->mnt_data = (qaddr_t)0; 395 mp->mnt_flag &= ~MNT_LOCAL; 396 return (error); 397 } 398 399 /* 400 * Get file system statistics. 401 */ 402 lfs_statfs(mp, sbp, p) 403 struct mount *mp; 404 register struct statfs *sbp; 405 struct proc *p; 406 { 407 register struct lfs *fs; 408 register struct ufsmount *ump; 409 410 ump = VFSTOUFS(mp); 411 fs = ump->um_lfs; 412 if (fs->lfs_magic != LFS_MAGIC) 413 panic("lfs_statfs: magic"); 414 sbp->f_bsize = fs->lfs_fsize; 415 sbp->f_iosize = fs->lfs_bsize; 416 sbp->f_blocks = dbtofrags(fs,fs->lfs_dsize); 417 sbp->f_bfree = dbtofrags(fs, fs->lfs_bfree); 418 /* 419 * To compute the available space. Subtract the minimum free 420 * from the total number of blocks in the file system. Set avail 421 * to the smaller of this number and fs->lfs_bfree. 422 */ 423 sbp->f_bavail = fs->lfs_dsize * (100 - fs->lfs_minfree) / 100; 424 sbp->f_bavail = 425 sbp->f_bavail > fs->lfs_bfree ? fs->lfs_bfree : sbp->f_bavail; 426 sbp->f_bavail = dbtofrags(fs, sbp->f_bavail); 427 sbp->f_files = fs->lfs_nfiles; 428 sbp->f_ffree = sbp->f_bfree * INOPB(fs); 429 if (sbp != &mp->mnt_stat) { 430 sbp->f_type = mp->mnt_vfc->vfc_typenum; 431 bcopy((caddr_t)mp->mnt_stat.f_mntonname, 432 (caddr_t)&sbp->f_mntonname[0], MNAMELEN); 433 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 434 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 435 } 436 return (0); 437 } 438 439 /* 440 * Go through the disk queues to initiate sandbagged IO; 441 * go through the inodes to write those that have been modified; 442 * initiate the writing of the super block if it has been modified. 443 * 444 * Note: we are always called with the filesystem marked `MPBUSY'. 445 */ 446 lfs_sync(mp, waitfor, cred, p) 447 struct mount *mp; 448 int waitfor; 449 struct ucred *cred; 450 struct proc *p; 451 { 452 int error; 453 454 /* All syncs must be checkpoints until roll-forward is implemented. */ 455 error = lfs_segwrite(mp, SEGM_CKP | (waitfor ? SEGM_SYNC : 0)); 456 #ifdef QUOTA 457 qsync(mp); 458 #endif 459 return (error); 460 } 461 462 /* 463 * Look up an LFS dinode number to find its incore vnode. If not already 464 * in core, read it in from the specified device. Return the inode locked. 465 * Detection and handling of mount points must be done by the calling routine. 466 */ 467 int 468 lfs_vget(mp, ino, vpp) 469 struct mount *mp; 470 ino_t ino; 471 struct vnode **vpp; 472 { 473 register struct lfs *fs; 474 register struct inode *ip; 475 struct buf *bp; 476 struct ifile *ifp; 477 struct vnode *vp; 478 struct ufsmount *ump; 479 ufs_daddr_t daddr; 480 dev_t dev; 481 int error; 482 483 ump = VFSTOUFS(mp); 484 dev = ump->um_dev; 485 if ((*vpp = ufs_ihashget(dev, ino)) != NULL) 486 return (0); 487 488 /* Translate the inode number to a disk address. */ 489 fs = ump->um_lfs; 490 if (ino == LFS_IFILE_INUM) 491 daddr = fs->lfs_idaddr; 492 else { 493 LFS_IENTRY(ifp, fs, ino, bp); 494 daddr = ifp->if_daddr; 495 brelse(bp); 496 if (daddr == LFS_UNUSED_DADDR) 497 return (ENOENT); 498 } 499 500 /* Allocate new vnode/inode. */ 501 if (error = lfs_vcreate(mp, ino, &vp)) { 502 *vpp = NULL; 503 return (error); 504 } 505 506 /* 507 * Put it onto its hash chain and lock it so that other requests for 508 * this inode will block if they arrive while we are sleeping waiting 509 * for old data structures to be purged or for the contents of the 510 * disk portion of this inode to be read. 511 */ 512 ip = VTOI(vp); 513 ufs_ihashins(ip); 514 515 /* 516 * XXX 517 * This may not need to be here, logically it should go down with 518 * the i_devvp initialization. 519 * Ask Kirk. 520 */ 521 ip->i_lfs = ump->um_lfs; 522 523 /* Read in the disk contents for the inode, copy into the inode. */ 524 if (error = 525 bread(ump->um_devvp, daddr, (int)fs->lfs_bsize, NOCRED, &bp)) { 526 /* 527 * The inode does not contain anything useful, so it would 528 * be misleading to leave it on its hash chain. With mode 529 * still zero, it will be unlinked and returned to the free 530 * list by vput(). 531 */ 532 vput(vp); 533 brelse(bp); 534 *vpp = NULL; 535 return (error); 536 } 537 ip->i_din = *lfs_ifind(fs, ino, (struct dinode *)bp->b_data); 538 brelse(bp); 539 540 /* 541 * Initialize the vnode from the inode, check for aliases. In all 542 * cases re-init ip, the underlying vnode/inode may have changed. 543 */ 544 if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) { 545 vput(vp); 546 *vpp = NULL; 547 return (error); 548 } 549 /* 550 * Finish inode initialization now that aliasing has been resolved. 551 */ 552 ip->i_devvp = ump->um_devvp; 553 VREF(ip->i_devvp); 554 *vpp = vp; 555 return (0); 556 } 557 558 /* 559 * File handle to vnode 560 * 561 * Have to be really careful about stale file handles: 562 * - check that the inode number is valid 563 * - call lfs_vget() to get the locked inode 564 * - check for an unallocated inode (i_mode == 0) 565 * - check that the given client host has export rights and return 566 * those rights via. exflagsp and credanonp 567 * 568 * XXX 569 * use ifile to see if inode is allocated instead of reading off disk 570 * what is the relationship between my generational number and the NFS 571 * generational number. 572 */ 573 int 574 lfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) 575 register struct mount *mp; 576 struct fid *fhp; 577 struct mbuf *nam; 578 struct vnode **vpp; 579 int *exflagsp; 580 struct ucred **credanonp; 581 { 582 register struct ufid *ufhp; 583 584 ufhp = (struct ufid *)fhp; 585 if (ufhp->ufid_ino < ROOTINO) 586 return (ESTALE); 587 return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp)); 588 } 589 590 /* 591 * Vnode pointer to File handle 592 */ 593 /* ARGSUSED */ 594 lfs_vptofh(vp, fhp) 595 struct vnode *vp; 596 struct fid *fhp; 597 { 598 register struct inode *ip; 599 register struct ufid *ufhp; 600 601 ip = VTOI(vp); 602 ufhp = (struct ufid *)fhp; 603 ufhp->ufid_len = sizeof(struct ufid); 604 ufhp->ufid_ino = ip->i_number; 605 ufhp->ufid_gen = ip->i_gen; 606 return (0); 607 } 608 609 /* 610 * Initialize the filesystem, most work done by ufs_init. 611 */ 612 int lfs_mount_type; 613 614 int 615 lfs_init(vfsp) 616 struct vfsconf *vfsp; 617 { 618 619 lfs_mount_type = vfsp->vfc_typenum; 620 return (ufs_init(vfsp)); 621 } 622