1 /* $OpenBSD: ffs_vnops.c,v 1.85 2016/03/01 21:00:56 natano Exp $ */ 2 /* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ffs_vnops.c 8.10 (Berkeley) 8/10/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/resourcevar.h> 38 #include <sys/kernel.h> 39 #include <sys/file.h> 40 #include <sys/stat.h> 41 #include <sys/buf.h> 42 #include <sys/mount.h> 43 #include <sys/vnode.h> 44 #include <sys/malloc.h> 45 #include <sys/signalvar.h> 46 #include <sys/pool.h> 47 #include <sys/event.h> 48 #include <sys/specdev.h> 49 50 #include <miscfs/fifofs/fifo.h> 51 52 #include <ufs/ufs/quota.h> 53 #include <ufs/ufs/inode.h> 54 #include <ufs/ufs/dir.h> 55 #include <ufs/ufs/ufs_extern.h> 56 #include <ufs/ufs/ufsmount.h> 57 58 #include <ufs/ffs/fs.h> 59 #include <ufs/ffs/ffs_extern.h> 60 61 struct vops ffs_vops = { 62 .vop_lookup = ufs_lookup, 63 .vop_create = ufs_create, 64 .vop_mknod = ufs_mknod, 65 .vop_open = ufs_open, 66 .vop_close = ufs_close, 67 .vop_access = ufs_access, 68 .vop_getattr = ufs_getattr, 69 .vop_setattr = ufs_setattr, 70 .vop_read = ffs_read, 71 .vop_write = ffs_write, 72 .vop_ioctl = ufs_ioctl, 73 .vop_poll = ufs_poll, 74 .vop_kqfilter = ufs_kqfilter, 75 .vop_revoke = vop_generic_revoke, 76 .vop_fsync = ffs_fsync, 77 .vop_remove = ufs_remove, 78 .vop_link = ufs_link, 79 .vop_rename = ufs_rename, 80 .vop_mkdir = ufs_mkdir, 81 .vop_rmdir = ufs_rmdir, 82 .vop_symlink = ufs_symlink, 83 .vop_readdir = ufs_readdir, 84 .vop_readlink = ufs_readlink, 85 .vop_abortop = vop_generic_abortop, 86 .vop_inactive = ufs_inactive, 87 .vop_reclaim = ffs_reclaim, 88 .vop_lock = ufs_lock, 89 .vop_unlock = ufs_unlock, 90 .vop_bmap = ufs_bmap, 91 .vop_strategy = ufs_strategy, 92 .vop_print = ufs_print, 93 .vop_islocked = ufs_islocked, 94 .vop_pathconf = ufs_pathconf, 95 .vop_advlock = ufs_advlock, 96 .vop_reallocblks = ffs_reallocblks, 97 .vop_bwrite = vop_generic_bwrite 98 }; 99 100 struct vops ffs_specvops = { 101 .vop_close = ufsspec_close, 102 .vop_access = ufs_access, 103 .vop_getattr = ufs_getattr, 104 .vop_setattr = ufs_setattr, 105 .vop_read = ufsspec_read, 106 .vop_write = ufsspec_write, 107 .vop_fsync = ffs_fsync, 108 .vop_inactive = ufs_inactive, 109 .vop_reclaim = ffs_reclaim, 110 .vop_lock = ufs_lock, 111 .vop_unlock = ufs_unlock, 112 .vop_print = ufs_print, 113 .vop_islocked = ufs_islocked, 114 115 /* XXX: Keep in sync with spec_vops */ 116 .vop_lookup = vop_generic_lookup, 117 .vop_create = spec_badop, 118 .vop_mknod = spec_badop, 119 .vop_open = spec_open, 120 .vop_ioctl = spec_ioctl, 121 .vop_poll = spec_poll, 122 .vop_kqfilter = spec_kqfilter, 123 .vop_revoke = vop_generic_revoke, 124 .vop_remove = spec_badop, 125 .vop_link = spec_badop, 126 .vop_rename = spec_badop, 127 .vop_mkdir = spec_badop, 128 .vop_rmdir = spec_badop, 129 .vop_symlink = spec_badop, 130 .vop_readdir = spec_badop, 131 .vop_readlink = spec_badop, 132 .vop_abortop = spec_badop, 133 .vop_bmap = vop_generic_bmap, 134 .vop_strategy = spec_strategy, 135 .vop_pathconf = spec_pathconf, 136 .vop_advlock = spec_advlock, 137 .vop_bwrite = vop_generic_bwrite, 138 }; 139 140 #ifdef FIFO 141 struct vops ffs_fifovops = { 142 .vop_close = ufsfifo_close, 143 .vop_access = ufs_access, 144 .vop_getattr = ufs_getattr, 145 .vop_setattr = ufs_setattr, 146 .vop_read = ufsfifo_read, 147 .vop_write = ufsfifo_write, 148 .vop_fsync = ffs_fsync, 149 .vop_inactive = ufs_inactive, 150 .vop_reclaim = ffsfifo_reclaim, 151 .vop_lock = ufs_lock, 152 .vop_unlock = ufs_unlock, 153 .vop_print = ufs_print, 154 .vop_islocked = ufs_islocked, 155 .vop_bwrite = vop_generic_bwrite, 156 157 /* XXX: Keep in sync with fifo_vops */ 158 .vop_lookup = vop_generic_lookup, 159 .vop_create = fifo_badop, 160 .vop_mknod = fifo_badop, 161 .vop_open = fifo_open, 162 .vop_ioctl = fifo_ioctl, 163 .vop_poll = fifo_poll, 164 .vop_kqfilter = fifo_kqfilter, 165 .vop_revoke = vop_generic_revoke, 166 .vop_remove = fifo_badop, 167 .vop_link = fifo_badop, 168 .vop_rename = fifo_badop, 169 .vop_mkdir = fifo_badop, 170 .vop_rmdir = fifo_badop, 171 .vop_symlink = fifo_badop, 172 .vop_readdir = fifo_badop, 173 .vop_readlink = fifo_badop, 174 .vop_abortop = fifo_badop, 175 .vop_bmap = vop_generic_bmap, 176 .vop_strategy = fifo_badop, 177 .vop_pathconf = fifo_pathconf, 178 .vop_advlock = fifo_advlock 179 }; 180 #endif /* FIFO */ 181 182 /* 183 * Vnode op for reading. 184 */ 185 int 186 ffs_read(void *v) 187 { 188 struct vop_read_args *ap = v; 189 struct vnode *vp; 190 struct inode *ip; 191 struct uio *uio; 192 struct fs *fs; 193 struct buf *bp; 194 daddr_t lbn, nextlbn; 195 off_t bytesinfile; 196 int size, xfersize, blkoffset; 197 mode_t mode; 198 int error; 199 200 vp = ap->a_vp; 201 ip = VTOI(vp); 202 mode = DIP(ip, mode); 203 uio = ap->a_uio; 204 205 #ifdef DIAGNOSTIC 206 if (uio->uio_rw != UIO_READ) 207 panic("ffs_read: mode"); 208 209 if (vp->v_type == VLNK) { 210 if (DIP(ip, size) < ip->i_ump->um_maxsymlinklen || 211 (ip->i_ump->um_maxsymlinklen == 0 && DIP(ip, blocks) == 0)) 212 panic("ffs_read: short symlink"); 213 } else if (vp->v_type != VREG && vp->v_type != VDIR) 214 panic("ffs_read: type %d", vp->v_type); 215 #endif 216 fs = ip->i_fs; 217 if (uio->uio_offset < 0) 218 return (EINVAL); 219 if (uio->uio_resid == 0) 220 return (0); 221 222 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 223 if ((bytesinfile = DIP(ip, size) - uio->uio_offset) <= 0) 224 break; 225 lbn = lblkno(fs, uio->uio_offset); 226 nextlbn = lbn + 1; 227 size = fs->fs_bsize; /* WAS blksize(fs, ip, lbn); */ 228 blkoffset = blkoff(fs, uio->uio_offset); 229 xfersize = fs->fs_bsize - blkoffset; 230 if (uio->uio_resid < xfersize) 231 xfersize = uio->uio_resid; 232 if (bytesinfile < xfersize) 233 xfersize = bytesinfile; 234 235 if (lblktosize(fs, nextlbn) >= DIP(ip, size)) 236 error = bread(vp, lbn, size, &bp); 237 else if (lbn - 1 == ip->i_ci.ci_lastr) { 238 error = bread_cluster(vp, lbn, size, &bp); 239 } else 240 error = bread(vp, lbn, size, &bp); 241 242 if (error) 243 break; 244 ip->i_ci.ci_lastr = lbn; 245 246 /* 247 * We should only get non-zero b_resid when an I/O error 248 * has occurred, which should cause us to break above. 249 * However, if the short read did not cause an error, 250 * then we want to ensure that we do not uiomove bad 251 * or uninitialized data. 252 */ 253 size -= bp->b_resid; 254 if (size < xfersize) { 255 if (size == 0) 256 break; 257 xfersize = size; 258 } 259 error = uiomove(bp->b_data + blkoffset, xfersize, uio); 260 if (error) 261 break; 262 brelse(bp); 263 } 264 if (bp != NULL) 265 brelse(bp); 266 if (!(vp->v_mount->mnt_flag & MNT_NOATIME) || 267 (ip->i_flag & (IN_CHANGE | IN_UPDATE))) { 268 ip->i_flag |= IN_ACCESS; 269 } 270 return (error); 271 } 272 273 /* 274 * Vnode op for writing. 275 */ 276 int 277 ffs_write(void *v) 278 { 279 struct vop_write_args *ap = v; 280 struct vnode *vp; 281 struct uio *uio; 282 struct inode *ip; 283 struct fs *fs; 284 struct buf *bp; 285 daddr_t lbn; 286 off_t osize; 287 int blkoffset, error, extended, flags, ioflag, size, xfersize; 288 size_t resid; 289 ssize_t overrun; 290 291 extended = 0; 292 ioflag = ap->a_ioflag; 293 uio = ap->a_uio; 294 vp = ap->a_vp; 295 ip = VTOI(vp); 296 297 #ifdef DIAGNOSTIC 298 if (uio->uio_rw != UIO_WRITE) 299 panic("ffs_write: mode"); 300 #endif 301 302 /* 303 * If writing 0 bytes, succeed and do not change 304 * update time or file offset (standards compliance) 305 */ 306 if (uio->uio_resid == 0) 307 return (0); 308 309 switch (vp->v_type) { 310 case VREG: 311 if (ioflag & IO_APPEND) 312 uio->uio_offset = DIP(ip, size); 313 if ((DIP(ip, flags) & APPEND) && uio->uio_offset != DIP(ip, size)) 314 return (EPERM); 315 /* FALLTHROUGH */ 316 case VLNK: 317 break; 318 case VDIR: 319 if ((ioflag & IO_SYNC) == 0) 320 panic("ffs_write: nonsync dir write"); 321 break; 322 default: 323 panic("ffs_write: type"); 324 } 325 326 fs = ip->i_fs; 327 if (uio->uio_offset < 0 || 328 (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) 329 return (EFBIG); 330 331 /* do the filesize rlimit check */ 332 if ((error = vn_fsizechk(vp, uio, ioflag, &overrun))) 333 return (error); 334 335 resid = uio->uio_resid; 336 osize = DIP(ip, size); 337 flags = ioflag & IO_SYNC ? B_SYNC : 0; 338 339 for (error = 0; uio->uio_resid > 0;) { 340 lbn = lblkno(fs, uio->uio_offset); 341 blkoffset = blkoff(fs, uio->uio_offset); 342 xfersize = fs->fs_bsize - blkoffset; 343 if (uio->uio_resid < xfersize) 344 xfersize = uio->uio_resid; 345 if (fs->fs_bsize > xfersize) 346 flags |= B_CLRBUF; 347 else 348 flags &= ~B_CLRBUF; 349 350 if ((error = UFS_BUF_ALLOC(ip, uio->uio_offset, xfersize, 351 ap->a_cred, flags, &bp)) != 0) 352 break; 353 if (uio->uio_offset + xfersize > DIP(ip, size)) { 354 DIP_ASSIGN(ip, size, uio->uio_offset + xfersize); 355 uvm_vnp_setsize(vp, DIP(ip, size)); 356 extended = 1; 357 } 358 (void)uvm_vnp_uncache(vp); 359 360 size = blksize(fs, ip, lbn) - bp->b_resid; 361 if (size < xfersize) 362 xfersize = size; 363 364 error = uiomove(bp->b_data + blkoffset, xfersize, uio); 365 366 if (error != 0) 367 memset(bp->b_data + blkoffset, 0, xfersize); 368 369 #if 0 370 if (ioflag & IO_NOCACHE) 371 bp->b_flags |= B_NOCACHE; 372 #endif 373 if (ioflag & IO_SYNC) 374 (void)bwrite(bp); 375 else if (xfersize + blkoffset == fs->fs_bsize) { 376 bawrite(bp); 377 } else 378 bdwrite(bp); 379 380 if (error || xfersize == 0) 381 break; 382 ip->i_flag |= IN_CHANGE | IN_UPDATE; 383 } 384 /* 385 * If we successfully wrote any data, and we are not the superuser 386 * we clear the setuid and setgid bits as a precaution against 387 * tampering. 388 */ 389 if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) 390 DIP_ASSIGN(ip, mode, DIP(ip, mode) & ~(ISUID | ISGID)); 391 if (resid > uio->uio_resid) 392 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); 393 if (error) { 394 if (ioflag & IO_UNIT) { 395 (void)UFS_TRUNCATE(ip, osize, 396 ioflag & IO_SYNC, ap->a_cred); 397 uio->uio_offset -= resid - uio->uio_resid; 398 uio->uio_resid = resid; 399 } 400 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) { 401 error = UFS_UPDATE(ip, 1); 402 } 403 /* correct the result for writes clamped by vn_fsizechk() */ 404 uio->uio_resid += overrun; 405 return (error); 406 } 407 408 /* 409 * Synch an open file. 410 */ 411 int 412 ffs_fsync(void *v) 413 { 414 struct vop_fsync_args *ap = v; 415 struct vnode *vp = ap->a_vp; 416 struct buf *bp, *nbp; 417 int s, error, passes, skipmeta; 418 419 if (vp->v_type == VBLK && 420 vp->v_specmountpoint != NULL && 421 (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) 422 softdep_fsync_mountdev(vp, ap->a_waitfor); 423 424 /* 425 * Flush all dirty buffers associated with a vnode. 426 */ 427 passes = NIADDR + 1; 428 skipmeta = 0; 429 if (ap->a_waitfor == MNT_WAIT) 430 skipmeta = 1; 431 s = splbio(); 432 loop: 433 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; 434 bp = LIST_NEXT(bp, b_vnbufs)) 435 bp->b_flags &= ~B_SCANNED; 436 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 437 nbp = LIST_NEXT(bp, b_vnbufs); 438 /* 439 * Reasons to skip this buffer: it has already been considered 440 * on this pass, this pass is the first time through on a 441 * synchronous flush request and the buffer being considered 442 * is metadata, the buffer has dependencies that will cause 443 * it to be redirtied and it has not already been deferred, 444 * or it is already being written. 445 */ 446 if (bp->b_flags & (B_BUSY | B_SCANNED)) 447 continue; 448 if ((bp->b_flags & B_DELWRI) == 0) 449 panic("ffs_fsync: not dirty"); 450 if (skipmeta && bp->b_lblkno < 0) 451 continue; 452 if (ap->a_waitfor != MNT_WAIT && 453 LIST_FIRST(&bp->b_dep) != NULL && 454 (bp->b_flags & B_DEFERRED) == 0 && 455 buf_countdeps(bp, 0, 1)) { 456 bp->b_flags |= B_DEFERRED; 457 continue; 458 } 459 460 bremfree(bp); 461 buf_acquire(bp); 462 bp->b_flags |= B_SCANNED; 463 splx(s); 464 /* 465 * On our final pass through, do all I/O synchronously 466 * so that we can find out if our flush is failing 467 * because of write errors. 468 */ 469 if (passes > 0 || ap->a_waitfor != MNT_WAIT) 470 (void) bawrite(bp); 471 else if ((error = bwrite(bp)) != 0) 472 return (error); 473 s = splbio(); 474 /* 475 * Since we may have slept during the I/O, we need 476 * to start from a known point. 477 */ 478 nbp = LIST_FIRST(&vp->v_dirtyblkhd); 479 } 480 if (skipmeta) { 481 skipmeta = 0; 482 goto loop; 483 } 484 if (ap->a_waitfor == MNT_WAIT) { 485 vwaitforio(vp, 0, "ffs_fsync", 0); 486 487 /* 488 * Ensure that any filesystem metadata associated 489 * with the vnode has been written. 490 */ 491 splx(s); 492 if ((error = softdep_sync_metadata(ap)) != 0) 493 return (error); 494 s = splbio(); 495 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 496 /* 497 * Block devices associated with filesystems may 498 * have new I/O requests posted for them even if 499 * the vnode is locked, so no amount of trying will 500 * get them clean. Thus we give block devices a 501 * good effort, then just give up. For all other file 502 * types, go around and try again until it is clean. 503 */ 504 if (passes > 0) { 505 passes -= 1; 506 goto loop; 507 } 508 #ifdef DIAGNOSTIC 509 if (vp->v_type != VBLK) 510 vprint("ffs_fsync: dirty", vp); 511 #endif 512 } 513 } 514 splx(s); 515 return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT)); 516 } 517 518 /* 519 * Reclaim an inode so that it can be used for other purposes. 520 */ 521 int 522 ffs_reclaim(void *v) 523 { 524 struct vop_reclaim_args *ap = v; 525 struct vnode *vp = ap->a_vp; 526 struct inode *ip = VTOI(vp); 527 int error; 528 529 if ((error = ufs_reclaim(vp, ap->a_p)) != 0) 530 return (error); 531 532 if (ip->i_din1 != NULL) { 533 #ifdef FFS2 534 if (ip->i_ump->um_fstype == UM_UFS2) 535 pool_put(&ffs_dinode2_pool, ip->i_din2); 536 else 537 #endif 538 pool_put(&ffs_dinode1_pool, ip->i_din1); 539 } 540 541 pool_put(&ffs_ino_pool, ip); 542 543 vp->v_data = NULL; 544 545 return (0); 546 } 547 548 #ifdef FIFO 549 int 550 ffsfifo_reclaim(void *v) 551 { 552 fifo_reclaim(v); 553 return (ffs_reclaim(v)); 554 } 555 #endif 556