1 /* $NetBSD: ufs_readwrite.c,v 1.42 2002/03/25 02:23:56 chs Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 36 */ 37 38 #include <sys/cdefs.h> 39 __KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.42 2002/03/25 02:23:56 chs Exp $"); 40 41 #ifdef LFS_READWRITE 42 #define BLKSIZE(a, b, c) blksize(a, b, c) 43 #define FS struct lfs 44 #define I_FS i_lfs 45 #define READ lfs_read 46 #define READ_S "lfs_read" 47 #define WRITE lfs_write 48 #define WRITE_S "lfs_write" 49 #define fs_bsize lfs_bsize 50 #define fs_maxfilesize lfs_maxfilesize 51 #else 52 #define BLKSIZE(a, b, c) blksize(a, b, c) 53 #define FS struct fs 54 #define I_FS i_fs 55 #define READ ffs_read 56 #define READ_S "ffs_read" 57 #define WRITE ffs_write 58 #define WRITE_S "ffs_write" 59 #endif 60 61 /* 62 * Vnode op for reading. 63 */ 64 /* ARGSUSED */ 65 int 66 READ(void *v) 67 { 68 struct vop_read_args /* { 69 struct vnode *a_vp; 70 struct uio *a_uio; 71 int a_ioflag; 72 struct ucred *a_cred; 73 } */ *ap = v; 74 struct vnode *vp; 75 struct inode *ip; 76 struct uio *uio; 77 FS *fs; 78 void *win; 79 vsize_t bytelen; 80 struct buf *bp; 81 ufs_daddr_t lbn, nextlbn; 82 off_t bytesinfile; 83 long size, xfersize, blkoffset; 84 int error; 85 boolean_t usepc = FALSE; 86 87 vp = ap->a_vp; 88 ip = VTOI(vp); 89 uio = ap->a_uio; 90 error = 0; 91 92 #ifdef DIAGNOSTIC 93 if (uio->uio_rw != UIO_READ) 94 panic("%s: mode", READ_S); 95 96 if (vp->v_type == VLNK) { 97 if ((int)ip->i_ffs_size < vp->v_mount->mnt_maxsymlinklen || 98 (vp->v_mount->mnt_maxsymlinklen == 0 && 99 ip->i_ffs_blocks == 0)) 100 panic("%s: short symlink", READ_S); 101 } else if (vp->v_type != VREG && vp->v_type != VDIR) 102 panic("%s: type %d", READ_S, vp->v_type); 103 #endif 104 fs = ip->I_FS; 105 if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize) 106 return (EFBIG); 107 if (uio->uio_resid == 0) 108 return (0); 109 if (uio->uio_offset >= ip->i_ffs_size) { 110 goto out; 111 } 112 113 #ifndef LFS_READWRITE 114 usepc = vp->v_type == VREG; 115 #endif 116 if (usepc) { 117 while (uio->uio_resid > 0) { 118 bytelen = MIN(ip->i_ffs_size - uio->uio_offset, 119 uio->uio_resid); 120 if (bytelen == 0) 121 break; 122 123 win = ubc_alloc(&vp->v_uobj, uio->uio_offset, 124 &bytelen, UBC_READ); 125 error = uiomove(win, bytelen, uio); 126 ubc_release(win, 0); 127 if (error) 128 break; 129 } 130 goto out; 131 } 132 133 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 134 bytesinfile = ip->i_ffs_size - uio->uio_offset; 135 if (bytesinfile <= 0) 136 break; 137 lbn = lblkno(fs, uio->uio_offset); 138 nextlbn = lbn + 1; 139 size = BLKSIZE(fs, ip, lbn); 140 blkoffset = blkoff(fs, uio->uio_offset); 141 xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid), 142 bytesinfile); 143 144 if (lblktosize(fs, nextlbn) >= ip->i_ffs_size) 145 error = bread(vp, lbn, size, NOCRED, &bp); 146 else { 147 int nextsize = BLKSIZE(fs, ip, nextlbn); 148 error = breadn(vp, lbn, 149 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 150 } 151 if (error) 152 break; 153 154 /* 155 * We should only get non-zero b_resid when an I/O error 156 * has occurred, which should cause us to break above. 157 * However, if the short read did not cause an error, 158 * then we want to ensure that we do not uiomove bad 159 * or uninitialized data. 160 */ 161 size -= bp->b_resid; 162 if (size < xfersize) { 163 if (size == 0) 164 break; 165 xfersize = size; 166 } 167 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); 168 if (error) 169 break; 170 brelse(bp); 171 } 172 if (bp != NULL) 173 brelse(bp); 174 175 out: 176 if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) { 177 ip->i_flag |= IN_ACCESS; 178 if ((ap->a_ioflag & IO_SYNC) == IO_SYNC) 179 error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT); 180 } 181 return (error); 182 } 183 184 /* 185 * Vnode op for writing. 186 */ 187 int 188 WRITE(void *v) 189 { 190 struct vop_write_args /* { 191 struct vnode *a_vp; 192 struct uio *a_uio; 193 int a_ioflag; 194 struct ucred *a_cred; 195 } */ *ap = v; 196 struct vnode *vp; 197 struct uio *uio; 198 struct inode *ip; 199 struct genfs_node *gp; 200 FS *fs; 201 struct buf *bp; 202 struct proc *p; 203 struct ucred *cred; 204 ufs_daddr_t lbn; 205 off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize; 206 int blkoffset, error, flags, ioflag, resid, size, xfersize; 207 int bsize, aflag; 208 int ubc_alloc_flags; 209 void *win; 210 vsize_t bytelen; 211 boolean_t async; 212 boolean_t usepc = FALSE; 213 214 cred = ap->a_cred; 215 ioflag = ap->a_ioflag; 216 uio = ap->a_uio; 217 vp = ap->a_vp; 218 ip = VTOI(vp); 219 gp = VTOG(vp); 220 221 KASSERT(vp->v_size == ip->i_ffs_size); 222 #ifdef DIAGNOSTIC 223 if (uio->uio_rw != UIO_WRITE) 224 panic("%s: mode", WRITE_S); 225 #endif 226 227 switch (vp->v_type) { 228 case VREG: 229 if (ioflag & IO_APPEND) 230 uio->uio_offset = ip->i_ffs_size; 231 if ((ip->i_ffs_flags & APPEND) && uio->uio_offset != ip->i_ffs_size) 232 return (EPERM); 233 /* FALLTHROUGH */ 234 case VLNK: 235 break; 236 case VDIR: 237 if ((ioflag & IO_SYNC) == 0) 238 panic("%s: nonsync dir write", WRITE_S); 239 break; 240 default: 241 panic("%s: type", WRITE_S); 242 } 243 244 fs = ip->I_FS; 245 if (uio->uio_offset < 0 || 246 (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) 247 return (EFBIG); 248 #ifdef LFS_READWRITE 249 /* Disallow writes to the Ifile, even if noschg flag is removed */ 250 /* XXX can this go away when the Ifile is no longer in the namespace? */ 251 if (vp == fs->lfs_ivnode) 252 return (EPERM); 253 #endif 254 255 /* 256 * Maybe this should be above the vnode op call, but so long as 257 * file servers have no limits, I don't think it matters. 258 */ 259 p = uio->uio_procp; 260 if (vp->v_type == VREG && p && 261 uio->uio_offset + uio->uio_resid > 262 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 263 psignal(p, SIGXFSZ); 264 return (EFBIG); 265 } 266 if (uio->uio_resid == 0) 267 return (0); 268 269 flags = ioflag & IO_SYNC ? B_SYNC : 0; 270 async = vp->v_mount->mnt_flag & MNT_ASYNC; 271 origoff = uio->uio_offset; 272 resid = uio->uio_resid; 273 osize = ip->i_ffs_size; 274 bsize = fs->fs_bsize; 275 error = 0; 276 277 #ifndef LFS_READWRITE 278 usepc = vp->v_type == VREG; 279 #endif 280 if (!usepc) { 281 goto bcache; 282 } 283 284 preallocoff = round_page(blkroundup(fs, MAX(osize, uio->uio_offset))); 285 aflag = ioflag & IO_SYNC ? B_SYNC : 0; 286 nsize = MAX(osize, uio->uio_offset + uio->uio_resid); 287 endallocoff = nsize - blkoff(fs, nsize); 288 289 /* 290 * if we're increasing the file size, deal with expanding 291 * the fragment if there is one. 292 */ 293 294 if (nsize > osize && lblkno(fs, osize) < NDADDR && 295 lblkno(fs, osize) != lblkno(fs, nsize) && 296 blkroundup(fs, osize) != osize) { 297 error = ufs_balloc_range(vp, osize, blkroundup(fs, osize) - 298 osize, cred, aflag); 299 if (error) { 300 goto out; 301 } 302 if (flags & B_SYNC) { 303 vp->v_size = blkroundup(fs, osize); 304 simple_lock(&vp->v_interlock); 305 VOP_PUTPAGES(vp, trunc_page(osize & ~(bsize - 1)), 306 round_page(vp->v_size), PGO_CLEANIT | PGO_SYNCIO); 307 } 308 } 309 310 ubc_alloc_flags = UBC_WRITE; 311 while (uio->uio_resid > 0) { 312 oldoff = uio->uio_offset; 313 blkoffset = blkoff(fs, uio->uio_offset); 314 bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid); 315 316 /* 317 * if we're filling in a hole, allocate the blocks now and 318 * initialize the pages first. if we're extending the file, 319 * we can safely allocate blocks without initializing pages 320 * since the new blocks will be inaccessible until the write 321 * is complete. 322 */ 323 324 if (uio->uio_offset < preallocoff || 325 uio->uio_offset >= endallocoff) { 326 error = ufs_balloc_range(vp, uio->uio_offset, bytelen, 327 cred, aflag); 328 if (error) { 329 break; 330 } 331 ubc_alloc_flags &= ~UBC_FAULTBUSY; 332 } else { 333 lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL); 334 error = GOP_ALLOC(vp, uio->uio_offset, bytelen, 335 aflag, cred); 336 lockmgr(&gp->g_glock, LK_RELEASE, NULL); 337 if (error) { 338 break; 339 } 340 ubc_alloc_flags |= UBC_FAULTBUSY; 341 } 342 343 /* 344 * copy the data. 345 */ 346 347 win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen, 348 ubc_alloc_flags); 349 error = uiomove(win, bytelen, uio); 350 ubc_release(win, 0); 351 if (error) { 352 break; 353 } 354 355 /* 356 * update UVM's notion of the size now that we've 357 * copied the data into the vnode's pages. 358 */ 359 360 if (vp->v_size < uio->uio_offset) { 361 uvm_vnp_setsize(vp, uio->uio_offset); 362 } 363 364 /* 365 * flush what we just wrote if necessary. 366 * XXXUBC simplistic async flushing. 367 */ 368 369 if (!async && oldoff >> 16 != uio->uio_offset >> 16) { 370 simple_lock(&vp->v_interlock); 371 error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16, 372 (uio->uio_offset >> 16) << 16, PGO_CLEANIT); 373 if (error) { 374 break; 375 } 376 } 377 } 378 if (error == 0 && ioflag & IO_SYNC) { 379 simple_lock(&vp->v_interlock); 380 error = VOP_PUTPAGES(vp, trunc_page(origoff & ~(bsize - 1)), 381 round_page(blkroundup(fs, uio->uio_offset)), 382 PGO_CLEANIT | PGO_SYNCIO); 383 } 384 goto out; 385 386 bcache: 387 simple_lock(&vp->v_interlock); 388 VOP_PUTPAGES(vp, trunc_page(origoff), round_page(origoff + resid), 389 PGO_CLEANIT | PGO_FREE | PGO_SYNCIO); 390 while (uio->uio_resid > 0) { 391 lbn = lblkno(fs, uio->uio_offset); 392 blkoffset = blkoff(fs, uio->uio_offset); 393 xfersize = MIN(fs->fs_bsize - blkoffset, uio->uio_resid); 394 if (fs->fs_bsize > xfersize) 395 flags |= B_CLRBUF; 396 else 397 flags &= ~B_CLRBUF; 398 399 error = VOP_BALLOC(vp, uio->uio_offset, xfersize, 400 ap->a_cred, flags, &bp); 401 402 if (error) 403 break; 404 if (uio->uio_offset + xfersize > ip->i_ffs_size) { 405 ip->i_ffs_size = uio->uio_offset + xfersize; 406 uvm_vnp_setsize(vp, ip->i_ffs_size); 407 } 408 size = BLKSIZE(fs, ip, lbn) - bp->b_resid; 409 if (xfersize > size) 410 xfersize = size; 411 412 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); 413 414 /* 415 * if we didn't clear the block and the uiomove failed, 416 * the buf will now contain part of some other file, 417 * so we need to invalidate it. 418 */ 419 if (error && (flags & B_CLRBUF) == 0) { 420 bp->b_flags |= B_INVAL; 421 brelse(bp); 422 break; 423 } 424 #ifdef LFS_READWRITE 425 if (!error) 426 error = lfs_reserve(fs, vp, btofsb(fs, (NIADDR + 1) << fs->lfs_bshift)); 427 (void)VOP_BWRITE(bp); 428 if (!error) 429 lfs_reserve(fs, vp, -btofsb(fs, (NIADDR + 1) << fs->lfs_bshift)); 430 #else 431 if (ioflag & IO_SYNC) 432 (void)bwrite(bp); 433 else if (xfersize + blkoffset == fs->fs_bsize) 434 bawrite(bp); 435 else 436 bdwrite(bp); 437 #endif 438 if (error || xfersize == 0) 439 break; 440 } 441 /* 442 * If we successfully wrote any data, and we are not the superuser 443 * we clear the setuid and setgid bits as a precaution against 444 * tampering. 445 */ 446 out: 447 ip->i_flag |= IN_CHANGE | IN_UPDATE; 448 if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) 449 ip->i_ffs_mode &= ~(ISUID | ISGID); 450 if (error) { 451 (void) VOP_TRUNCATE(vp, osize, ioflag & IO_SYNC, ap->a_cred, 452 uio->uio_procp); 453 uio->uio_offset -= resid - uio->uio_resid; 454 uio->uio_resid = resid; 455 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC) 456 error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT); 457 KASSERT(vp->v_size == ip->i_ffs_size); 458 return (error); 459 } 460