1 /* $OpenBSD: ffs_vnops.c,v 1.102 2024/02/03 18:51:58 beck Exp $ */ 2 /* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ffs_vnops.c 8.10 (Berkeley) 8/10/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/resourcevar.h> 38 #include <sys/kernel.h> 39 #include <sys/stat.h> 40 #include <sys/buf.h> 41 #include <sys/mount.h> 42 #include <sys/vnode.h> 43 #include <sys/malloc.h> 44 #include <sys/signalvar.h> 45 #include <sys/pool.h> 46 #include <sys/event.h> 47 #include <sys/specdev.h> 48 49 #include <miscfs/fifofs/fifo.h> 50 51 #include <ufs/ufs/quota.h> 52 #include <ufs/ufs/inode.h> 53 #include <ufs/ufs/dir.h> 54 #include <ufs/ufs/ufs_extern.h> 55 #include <ufs/ufs/ufsmount.h> 56 57 #include <ufs/ffs/fs.h> 58 #include <ufs/ffs/ffs_extern.h> 59 60 const struct vops ffs_vops = { 61 .vop_lookup = ufs_lookup, 62 .vop_create = ufs_create, 63 .vop_mknod = ufs_mknod, 64 .vop_open = ufs_open, 65 .vop_close = ufs_close, 66 .vop_access = ufs_access, 67 .vop_getattr = ufs_getattr, 68 .vop_setattr = ufs_setattr, 69 .vop_read = ffs_read, 70 .vop_write = ffs_write, 71 .vop_ioctl = ufs_ioctl, 72 .vop_kqfilter = ufs_kqfilter, 73 .vop_revoke = vop_generic_revoke, 74 .vop_fsync = ffs_fsync, 75 .vop_remove = ufs_remove, 76 .vop_link = ufs_link, 77 .vop_rename = ufs_rename, 78 .vop_mkdir = ufs_mkdir, 79 .vop_rmdir = ufs_rmdir, 80 .vop_symlink = ufs_symlink, 81 .vop_readdir = ufs_readdir, 82 .vop_readlink = ufs_readlink, 83 .vop_abortop = vop_generic_abortop, 84 .vop_inactive = ufs_inactive, 85 .vop_reclaim = ffs_reclaim, 86 .vop_lock = ufs_lock, 87 .vop_unlock = ufs_unlock, 88 .vop_bmap = ufs_bmap, 89 .vop_strategy = ufs_strategy, 90 .vop_print = ufs_print, 91 .vop_islocked = ufs_islocked, 92 .vop_pathconf = ufs_pathconf, 93 .vop_advlock = ufs_advlock, 94 .vop_bwrite = vop_generic_bwrite 95 }; 96 97 const struct vops ffs_specvops = { 98 .vop_close = ufsspec_close, 99 .vop_access = ufs_access, 100 .vop_getattr = ufs_getattr, 101 .vop_setattr = ufs_setattr, 102 .vop_read = ufsspec_read, 103 .vop_write = ufsspec_write, 104 .vop_fsync = ffs_fsync, 105 .vop_inactive = ufs_inactive, 106 .vop_reclaim = ffs_reclaim, 107 .vop_lock = ufs_lock, 108 .vop_unlock = ufs_unlock, 109 .vop_print = ufs_print, 110 .vop_islocked = ufs_islocked, 111 112 /* XXX: Keep in sync with spec_vops */ 113 .vop_lookup = vop_generic_lookup, 114 .vop_create = vop_generic_badop, 115 .vop_mknod = vop_generic_badop, 116 .vop_open = spec_open, 117 .vop_ioctl = spec_ioctl, 118 .vop_kqfilter = spec_kqfilter, 119 .vop_revoke = vop_generic_revoke, 120 .vop_remove = vop_generic_badop, 121 .vop_link = vop_generic_badop, 122 .vop_rename = vop_generic_badop, 123 .vop_mkdir = vop_generic_badop, 124 .vop_rmdir = vop_generic_badop, 125 .vop_symlink = vop_generic_badop, 126 .vop_readdir = vop_generic_badop, 127 .vop_readlink = vop_generic_badop, 128 .vop_abortop = vop_generic_badop, 129 .vop_bmap = vop_generic_bmap, 130 .vop_strategy = spec_strategy, 131 .vop_pathconf = spec_pathconf, 132 .vop_advlock = spec_advlock, 133 .vop_bwrite = vop_generic_bwrite, 134 }; 135 136 #ifdef FIFO 137 const struct vops ffs_fifovops = { 138 .vop_close = ufsfifo_close, 139 .vop_access = ufs_access, 140 .vop_getattr = ufs_getattr, 141 .vop_setattr = ufs_setattr, 142 .vop_read = ufsfifo_read, 143 .vop_write = ufsfifo_write, 144 .vop_fsync = ffs_fsync, 145 .vop_inactive = ufs_inactive, 146 .vop_reclaim = ffsfifo_reclaim, 147 .vop_lock = ufs_lock, 148 .vop_unlock = ufs_unlock, 149 .vop_print = ufs_print, 150 .vop_islocked = ufs_islocked, 151 .vop_bwrite = vop_generic_bwrite, 152 153 /* XXX: Keep in sync with fifo_vops */ 154 .vop_lookup = vop_generic_lookup, 155 .vop_create = vop_generic_badop, 156 .vop_mknod = vop_generic_badop, 157 .vop_open = fifo_open, 158 .vop_ioctl = fifo_ioctl, 159 .vop_kqfilter = fifo_kqfilter, 160 .vop_revoke = vop_generic_revoke, 161 .vop_remove = vop_generic_badop, 162 .vop_link = vop_generic_badop, 163 .vop_rename = vop_generic_badop, 164 .vop_mkdir = vop_generic_badop, 165 .vop_rmdir = vop_generic_badop, 166 .vop_symlink = vop_generic_badop, 167 .vop_readdir = vop_generic_badop, 168 .vop_readlink = vop_generic_badop, 169 .vop_abortop = vop_generic_badop, 170 .vop_bmap = vop_generic_bmap, 171 .vop_strategy = vop_generic_badop, 172 .vop_pathconf = fifo_pathconf, 173 .vop_advlock = fifo_advlock 174 }; 175 #endif /* FIFO */ 176 177 /* 178 * Vnode op for reading. 179 */ 180 int 181 ffs_read(void *v) 182 { 183 struct vop_read_args *ap = v; 184 struct vnode *vp; 185 struct inode *ip; 186 struct uio *uio; 187 struct fs *fs; 188 struct buf *bp; 189 daddr_t lbn, nextlbn; 190 off_t bytesinfile; 191 int size, xfersize, blkoffset; 192 mode_t mode; 193 int error; 194 195 vp = ap->a_vp; 196 ip = VTOI(vp); 197 mode = DIP(ip, mode); 198 uio = ap->a_uio; 199 200 #ifdef DIAGNOSTIC 201 if (uio->uio_rw != UIO_READ) 202 panic("ffs_read: mode"); 203 204 if (vp->v_type == VLNK) { 205 if (DIP(ip, size) < ip->i_ump->um_maxsymlinklen) 206 panic("ffs_read: short symlink"); 207 } else if (vp->v_type != VREG && vp->v_type != VDIR) 208 panic("ffs_read: type %d", vp->v_type); 209 #endif 210 fs = ip->i_fs; 211 if (uio->uio_offset < 0) 212 return (EINVAL); 213 if (uio->uio_resid == 0) 214 return (0); 215 216 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 217 if ((bytesinfile = DIP(ip, size) - uio->uio_offset) <= 0) 218 break; 219 lbn = lblkno(fs, uio->uio_offset); 220 nextlbn = lbn + 1; 221 size = fs->fs_bsize; /* WAS blksize(fs, ip, lbn); */ 222 blkoffset = blkoff(fs, uio->uio_offset); 223 xfersize = fs->fs_bsize - blkoffset; 224 if (uio->uio_resid < xfersize) 225 xfersize = uio->uio_resid; 226 if (bytesinfile < xfersize) 227 xfersize = bytesinfile; 228 229 if (lblktosize(fs, nextlbn) >= DIP(ip, size)) 230 error = bread(vp, lbn, size, &bp); 231 else if (lbn - 1 == ip->i_ci.ci_lastr || 232 uio->uio_resid > xfersize) { 233 error = bread_cluster(vp, lbn, size, &bp); 234 } else 235 error = bread(vp, lbn, size, &bp); 236 237 if (error) 238 break; 239 ip->i_ci.ci_lastr = lbn; 240 241 /* 242 * We should only get non-zero b_resid when an I/O error 243 * has occurred, which should cause us to break above. 244 * However, if the short read did not cause an error, 245 * then we want to ensure that we do not uiomove bad 246 * or uninitialized data. 247 */ 248 size -= bp->b_resid; 249 if (size < xfersize) { 250 if (size == 0) 251 break; 252 xfersize = size; 253 } 254 error = uiomove(bp->b_data + blkoffset, xfersize, uio); 255 if (error) 256 break; 257 brelse(bp); 258 } 259 if (bp != NULL) 260 brelse(bp); 261 if (!(vp->v_mount->mnt_flag & MNT_NOATIME) || 262 (ip->i_flag & (IN_CHANGE | IN_UPDATE))) { 263 ip->i_flag |= IN_ACCESS; 264 } 265 return (error); 266 } 267 268 /* 269 * Vnode op for writing. 270 */ 271 int 272 ffs_write(void *v) 273 { 274 struct vop_write_args *ap = v; 275 struct vnode *vp; 276 struct uio *uio; 277 struct inode *ip; 278 struct fs *fs; 279 struct buf *bp; 280 daddr_t lbn; 281 off_t osize; 282 int blkoffset, error, extended, flags, ioflag, size, xfersize; 283 size_t resid; 284 ssize_t overrun; 285 286 extended = 0; 287 ioflag = ap->a_ioflag; 288 uio = ap->a_uio; 289 vp = ap->a_vp; 290 ip = VTOI(vp); 291 292 #ifdef DIAGNOSTIC 293 if (uio->uio_rw != UIO_WRITE) 294 panic("ffs_write: mode"); 295 #endif 296 297 /* 298 * If writing 0 bytes, succeed and do not change 299 * update time or file offset (standards compliance) 300 */ 301 if (uio->uio_resid == 0) 302 return (0); 303 304 switch (vp->v_type) { 305 case VREG: 306 if (ioflag & IO_APPEND) 307 uio->uio_offset = DIP(ip, size); 308 if ((DIP(ip, flags) & APPEND) && uio->uio_offset != DIP(ip, size)) 309 return (EPERM); 310 /* FALLTHROUGH */ 311 case VLNK: 312 break; 313 case VDIR: 314 if ((ioflag & IO_SYNC) == 0) 315 panic("ffs_write: nonsync dir write"); 316 break; 317 default: 318 panic("ffs_write: type %d", vp->v_type); 319 } 320 321 fs = ip->i_fs; 322 if (uio->uio_offset < 0 || 323 (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) 324 return (EFBIG); 325 326 /* do the filesize rlimit check */ 327 if ((error = vn_fsizechk(vp, uio, ioflag, &overrun))) 328 return (error); 329 330 resid = uio->uio_resid; 331 osize = DIP(ip, size); 332 flags = ioflag & IO_SYNC ? B_SYNC : 0; 333 334 for (error = 0; uio->uio_resid > 0;) { 335 lbn = lblkno(fs, uio->uio_offset); 336 blkoffset = blkoff(fs, uio->uio_offset); 337 xfersize = fs->fs_bsize - blkoffset; 338 if (uio->uio_resid < xfersize) 339 xfersize = uio->uio_resid; 340 if (fs->fs_bsize > xfersize) 341 flags |= B_CLRBUF; 342 else 343 flags &= ~B_CLRBUF; 344 345 if ((error = UFS_BUF_ALLOC(ip, uio->uio_offset, xfersize, 346 ap->a_cred, flags, &bp)) != 0) 347 break; 348 if (uio->uio_offset + xfersize > DIP(ip, size)) { 349 DIP_ASSIGN(ip, size, uio->uio_offset + xfersize); 350 uvm_vnp_setsize(vp, DIP(ip, size)); 351 extended = 1; 352 } 353 (void)uvm_vnp_uncache(vp); 354 355 size = blksize(fs, ip, lbn) - bp->b_resid; 356 if (size < xfersize) 357 xfersize = size; 358 359 error = uiomove(bp->b_data + blkoffset, xfersize, uio); 360 /* 361 * If the buffer is not already filled and we encounter an 362 * error while trying to fill it, we have to clear out any 363 * garbage data from the pages instantiated for the buffer. 364 * If we do not, a failed uiomove() during a write can leave 365 * the prior contents of the pages exposed to a userland mmap. 366 * 367 * Note that we don't need to clear buffers that were 368 * allocated with the B_CLRBUF flag set. 369 */ 370 if (error != 0 && !(flags & B_CLRBUF)) 371 memset(bp->b_data + blkoffset, 0, xfersize); 372 373 if (ioflag & IO_NOCACHE) 374 bp->b_flags |= B_NOCACHE; 375 376 if (ioflag & IO_SYNC) 377 (void)bwrite(bp); 378 else if (xfersize + blkoffset == fs->fs_bsize) { 379 bawrite(bp); 380 } else 381 bdwrite(bp); 382 383 if (error || xfersize == 0) 384 break; 385 ip->i_flag |= IN_CHANGE | IN_UPDATE; 386 } 387 /* 388 * If we successfully wrote any data, and we are not the superuser 389 * we clear the setuid and setgid bits as a precaution against 390 * tampering. 391 */ 392 if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0 && 393 !vnoperm(vp)) 394 DIP_ASSIGN(ip, mode, DIP(ip, mode) & ~(ISUID | ISGID)); 395 if (resid > uio->uio_resid) 396 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); 397 if (error) { 398 if (ioflag & IO_UNIT) { 399 (void)UFS_TRUNCATE(ip, osize, 400 ioflag & IO_SYNC, ap->a_cred); 401 uio->uio_offset -= resid - uio->uio_resid; 402 uio->uio_resid = resid; 403 } 404 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) { 405 error = UFS_UPDATE(ip, 1); 406 } 407 /* correct the result for writes clamped by vn_fsizechk() */ 408 uio->uio_resid += overrun; 409 return (error); 410 } 411 412 /* 413 * Synch an open file. 414 */ 415 int 416 ffs_fsync(void *v) 417 { 418 struct vop_fsync_args *ap = v; 419 struct vnode *vp = ap->a_vp; 420 struct buf *bp, *nbp; 421 int s, error, passes, skipmeta; 422 423 /* 424 * Flush all dirty buffers associated with a vnode. 425 */ 426 passes = NIADDR + 1; 427 skipmeta = 0; 428 if (ap->a_waitfor == MNT_WAIT) 429 skipmeta = 1; 430 s = splbio(); 431 loop: 432 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 433 bp->b_flags &= ~B_SCANNED; 434 } 435 LIST_FOREACH_SAFE(bp, &vp->v_dirtyblkhd, b_vnbufs, nbp) { 436 /* 437 * Reasons to skip this buffer: it has already been considered 438 * on this pass, this pass is the first time through on a 439 * synchronous flush request and the buffer being considered 440 * is metadata, the buffer has dependencies that will cause 441 * it to be redirtied and it has not already been deferred, 442 * or it is already being written. 443 */ 444 if (bp->b_flags & (B_BUSY | B_SCANNED)) 445 continue; 446 if ((bp->b_flags & B_DELWRI) == 0) 447 panic("ffs_fsync: not dirty"); 448 if (skipmeta && bp->b_lblkno < 0) 449 continue; 450 451 bremfree(bp); 452 buf_acquire(bp); 453 bp->b_flags |= B_SCANNED; 454 splx(s); 455 /* 456 * On our final pass through, do all I/O synchronously 457 * so that we can find out if our flush is failing 458 * because of write errors. 459 */ 460 if (passes > 0 || ap->a_waitfor != MNT_WAIT) 461 (void) bawrite(bp); 462 else if ((error = bwrite(bp)) != 0) 463 return (error); 464 s = splbio(); 465 /* 466 * Since we may have slept during the I/O, we need 467 * to start from a known point. 468 */ 469 nbp = LIST_FIRST(&vp->v_dirtyblkhd); 470 } 471 if (skipmeta) { 472 skipmeta = 0; 473 goto loop; 474 } 475 if (ap->a_waitfor == MNT_WAIT) { 476 vwaitforio(vp, 0, "ffs_fsync", INFSLP); 477 478 /* 479 * Ensure that any filesystem metadata associated 480 * with the vnode has been written. 481 */ 482 splx(s); 483 /* XXX softdep was here. reconsider this locking dance */ 484 s = splbio(); 485 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 486 /* 487 * Block devices associated with filesystems may 488 * have new I/O requests posted for them even if 489 * the vnode is locked, so no amount of trying will 490 * get them clean. Thus we give block devices a 491 * good effort, then just give up. For all other file 492 * types, go around and try again until it is clean. 493 */ 494 if (passes > 0) { 495 passes -= 1; 496 goto loop; 497 } 498 #ifdef DIAGNOSTIC 499 if (vp->v_type != VBLK) 500 vprint("ffs_fsync: dirty", vp); 501 #endif 502 } 503 } 504 splx(s); 505 return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT)); 506 } 507 508 /* 509 * Reclaim an inode so that it can be used for other purposes. 510 */ 511 int 512 ffs_reclaim(void *v) 513 { 514 struct vop_reclaim_args *ap = v; 515 struct vnode *vp = ap->a_vp; 516 struct inode *ip = VTOI(vp); 517 int error; 518 519 if ((error = ufs_reclaim(vp)) != 0) 520 return (error); 521 522 if (ip->i_din1 != NULL) { 523 #ifdef FFS2 524 if (ip->i_ump->um_fstype == UM_UFS2) 525 pool_put(&ffs_dinode2_pool, ip->i_din2); 526 else 527 #endif 528 pool_put(&ffs_dinode1_pool, ip->i_din1); 529 } 530 531 pool_put(&ffs_ino_pool, ip); 532 533 vp->v_data = NULL; 534 535 return (0); 536 } 537 538 #ifdef FIFO 539 int 540 ffsfifo_reclaim(void *v) 541 { 542 fifo_reclaim(v); 543 return (ffs_reclaim(v)); 544 } 545 #endif 546