1 /* $OpenBSD: nfs_bio.c,v 1.33 2001/12/19 08:58:06 art Exp $ */ 2 /* $NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Rick Macklem at The University of Guelph. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 40 */ 41 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/resourcevar.h> 46 #include <sys/signalvar.h> 47 #include <sys/proc.h> 48 #include <sys/buf.h> 49 #include <sys/vnode.h> 50 #include <sys/mount.h> 51 #include <sys/kernel.h> 52 #include <sys/namei.h> 53 54 #include <uvm/uvm_extern.h> 55 56 #include <nfs/rpcv2.h> 57 #include <nfs/nfsproto.h> 58 #include <nfs/nfs.h> 59 #include <nfs/nfsmount.h> 60 #include <nfs/nfsnode.h> 61 #include <nfs/nfs_var.h> 62 63 extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; 64 extern int nfs_numasync; 65 struct nfsstats nfsstats; 66 67 /* 68 * Vnode op for read using bio 69 * Any similarity to readip() is purely coincidental 70 */ 71 int 72 nfs_bioread(vp, uio, ioflag, cred) 73 register struct vnode *vp; 74 register struct uio *uio; 75 int ioflag; 76 struct ucred *cred; 77 { 78 register struct nfsnode *np = VTONFS(vp); 79 register int biosize, diff; 80 struct buf *bp = NULL, *rabp; 81 struct vattr vattr; 82 struct proc *p; 83 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 84 daddr_t lbn, bn, rabn; 85 caddr_t baddr; 86 int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin; 87 88 #ifdef DIAGNOSTIC 89 if (uio->uio_rw != UIO_READ) 90 panic("nfs_read mode"); 91 #endif 92 if (uio->uio_resid == 0) 93 return (0); 94 if (uio->uio_offset < 0) 95 return (EINVAL); 96 p = uio->uio_procp; 97 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) 98 (void)nfs_fsinfo(nmp, vp, cred, p); 99 biosize = nmp->nm_rsize; 100 /* 101 * For nfs, cache consistency can only be maintained approximately. 102 * Although RFC1094 does not specify the criteria, the following is 103 * believed to be compatible with the reference port. 104 * For nfs: 105 * If the file's modify time on the server has changed since the 106 * last read rpc or you have written to the file, 107 * you may have lost data cache consistency with the 108 * server, so flush all of the file's data out of the cache. 109 * Then force a getattr rpc to ensure that you have up to date 110 * attributes. 111 * NB: This implies that cache data can be read when up to 112 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 113 * attributes this could be forced by setting n_attrstamp to 0 before 114 * the VOP_GETATTR() call. 115 */ 116 /* 117 * There is no way to modify a symbolic link via NFS or via 118 * VFS, so we don't check if the link was modified 119 */ 120 if (vp->v_type != VLNK) { 121 if (np->n_flag & NMODIFIED) { 122 np->n_attrstamp = 0; 123 error = VOP_GETATTR(vp, &vattr, cred, p); 124 if (error) 125 return (error); 126 np->n_mtime = vattr.va_mtime.tv_sec; 127 } else { 128 error = VOP_GETATTR(vp, &vattr, cred, p); 129 if (error) 130 return (error); 131 if (np->n_mtime != vattr.va_mtime.tv_sec) { 132 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 133 if (error) 134 return (error); 135 np->n_mtime = vattr.va_mtime.tv_sec; 136 } 137 } 138 } 139 140 /* 141 * update the cache read creds for this vnode 142 */ 143 if (np->n_rcred) 144 crfree(np->n_rcred); 145 np->n_rcred = cred; 146 crhold(cred); 147 148 do { 149 if ((vp->v_flag & VROOT) && vp->v_type == VLNK) { 150 return (nfs_readlinkrpc(vp, uio, cred)); 151 } 152 baddr = (caddr_t)0; 153 switch (vp->v_type) { 154 case VREG: 155 nfsstats.biocache_reads++; 156 lbn = uio->uio_offset / biosize; 157 on = uio->uio_offset & (biosize - 1); 158 bn = lbn * (biosize / DEV_BSIZE); 159 not_readin = 1; 160 161 /* 162 * Start the read ahead(s), as required. 163 */ 164 if (nfs_numasync > 0 && nmp->nm_readahead > 0) { 165 for (nra = 0; nra < nmp->nm_readahead && 166 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 167 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 168 if (!incore(vp, rabn)) { 169 rabp = nfs_getcacheblk(vp, rabn, biosize, p); 170 if (!rabp) 171 return (EINTR); 172 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { 173 rabp->b_flags |= (B_READ | B_ASYNC); 174 if (nfs_asyncio(rabp)) { 175 rabp->b_flags |= B_INVAL; 176 brelse(rabp); 177 } 178 } else 179 brelse(rabp); 180 } 181 } 182 } 183 184 /* 185 * If the block is in the cache and has the required data 186 * in a valid region, just copy it out. 187 * Otherwise, get the block and write back/read in, 188 * as required. 189 */ 190 if ((bp = incore(vp, bn)) && 191 (bp->b_flags & (B_BUSY | B_WRITEINPROG)) == 192 (B_BUSY | B_WRITEINPROG)) 193 got_buf = 0; 194 else { 195 again: 196 bp = nfs_getcacheblk(vp, bn, biosize, p); 197 if (!bp) 198 return (EINTR); 199 got_buf = 1; 200 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 201 bp->b_flags |= B_READ; 202 not_readin = 0; 203 error = nfs_doio(bp, p); 204 if (error) { 205 brelse(bp); 206 return (error); 207 } 208 } 209 } 210 n = min((unsigned)(biosize - on), uio->uio_resid); 211 diff = np->n_size - uio->uio_offset; 212 if (diff < n) 213 n = diff; 214 if (not_readin && n > 0) { 215 if (on < bp->b_validoff || (on + n) > bp->b_validend) { 216 if (!got_buf) { 217 bp = nfs_getcacheblk(vp, bn, biosize, p); 218 if (!bp) 219 return (EINTR); 220 got_buf = 1; 221 } 222 bp->b_flags |= B_INVAFTERWRITE; 223 if (bp->b_dirtyend > 0) { 224 if ((bp->b_flags & B_DELWRI) == 0) 225 panic("nfsbioread"); 226 if (VOP_BWRITE(bp) == EINTR) 227 return (EINTR); 228 } else 229 brelse(bp); 230 goto again; 231 } 232 } 233 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 234 if (diff < n) 235 n = diff; 236 break; 237 case VLNK: 238 nfsstats.biocache_readlinks++; 239 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 240 if (!bp) 241 return (EINTR); 242 if ((bp->b_flags & B_DONE) == 0) { 243 bp->b_flags |= B_READ; 244 error = nfs_doio(bp, p); 245 if (error) { 246 brelse(bp); 247 return (error); 248 } 249 } 250 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 251 got_buf = 1; 252 on = 0; 253 break; 254 default: 255 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 256 break; 257 } 258 259 if (n > 0) { 260 if (!baddr) 261 baddr = bp->b_data; 262 error = uiomove(baddr + on, (int)n, uio); 263 } 264 switch (vp->v_type) { 265 case VREG: 266 break; 267 case VLNK: 268 n = 0; 269 break; 270 default: 271 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 272 } 273 if (got_buf) 274 brelse(bp); 275 } while (error == 0 && uio->uio_resid > 0 && n > 0); 276 return (error); 277 } 278 279 /* 280 * Vnode op for write using bio 281 */ 282 int 283 nfs_write(v) 284 void *v; 285 { 286 struct vop_write_args /* { 287 struct vnode *a_vp; 288 struct uio *a_uio; 289 int a_ioflag; 290 struct ucred *a_cred; 291 } */ *ap = v; 292 register int biosize; 293 register struct uio *uio = ap->a_uio; 294 struct proc *p = uio->uio_procp; 295 register struct vnode *vp = ap->a_vp; 296 struct nfsnode *np = VTONFS(vp); 297 register struct ucred *cred = ap->a_cred; 298 int ioflag = ap->a_ioflag; 299 struct buf *bp; 300 struct vattr vattr; 301 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 302 daddr_t lbn, bn; 303 int n, on, error = 0; 304 305 #ifdef DIAGNOSTIC 306 if (uio->uio_rw != UIO_WRITE) 307 panic("nfs_write mode"); 308 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 309 panic("nfs_write proc"); 310 #endif 311 if (vp->v_type != VREG) 312 return (EIO); 313 if (np->n_flag & NWRITEERR) { 314 np->n_flag &= ~NWRITEERR; 315 return (np->n_error); 316 } 317 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) 318 (void)nfs_fsinfo(nmp, vp, cred, p); 319 if (ioflag & (IO_APPEND | IO_SYNC)) { 320 if (np->n_flag & NMODIFIED) { 321 np->n_attrstamp = 0; 322 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 323 if (error) 324 return (error); 325 } 326 if (ioflag & IO_APPEND) { 327 np->n_attrstamp = 0; 328 error = VOP_GETATTR(vp, &vattr, cred, p); 329 if (error) 330 return (error); 331 uio->uio_offset = np->n_size; 332 } 333 } 334 if (uio->uio_offset < 0) 335 return (EINVAL); 336 if (uio->uio_resid == 0) 337 return (0); 338 /* 339 * Maybe this should be above the vnode op call, but so long as 340 * file servers have no limits, i don't think it matters 341 */ 342 if (p && uio->uio_offset + uio->uio_resid > 343 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 344 psignal(p, SIGXFSZ); 345 return (EFBIG); 346 } 347 348 /* 349 * update the cache write creds for this node. 350 */ 351 if (np->n_wcred) 352 crfree(np->n_wcred); 353 np->n_wcred = cred; 354 crhold(cred); 355 356 /* 357 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 358 * will be the same size within a filesystem. nfs_writerpc will 359 * still use nm_wsize when sizing the rpc's. 360 */ 361 biosize = nmp->nm_rsize; 362 do { 363 364 /* 365 * XXX make sure we aren't cached in the VM page cache 366 */ 367 uvm_vnp_uncache(vp); 368 369 nfsstats.biocache_writes++; 370 lbn = uio->uio_offset / biosize; 371 on = uio->uio_offset & (biosize-1); 372 n = min((unsigned)(biosize - on), uio->uio_resid); 373 bn = lbn * (biosize / DEV_BSIZE); 374 again: 375 bp = nfs_getcacheblk(vp, bn, biosize, p); 376 if (!bp) 377 return (EINTR); 378 np->n_flag |= NMODIFIED; 379 if (uio->uio_offset + n > np->n_size) { 380 np->n_size = uio->uio_offset + n; 381 uvm_vnp_setsize(vp, (u_long)np->n_size); 382 } 383 384 /* 385 * If the new write will leave a contiguous dirty 386 * area, just update the b_dirtyoff and b_dirtyend, 387 * otherwise force a write rpc of the old dirty area. 388 */ 389 if (bp->b_dirtyend > 0 && 390 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 391 bp->b_proc = p; 392 if (VOP_BWRITE(bp) == EINTR) 393 return (EINTR); 394 goto again; 395 } 396 397 error = uiomove((char *)bp->b_data + on, n, uio); 398 if (error) { 399 bp->b_flags |= B_ERROR; 400 brelse(bp); 401 return (error); 402 } 403 if (bp->b_dirtyend > 0) { 404 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 405 bp->b_dirtyend = max((on + n), bp->b_dirtyend); 406 } else { 407 bp->b_dirtyoff = on; 408 bp->b_dirtyend = on + n; 409 } 410 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 411 bp->b_validoff > bp->b_dirtyend) { 412 bp->b_validoff = bp->b_dirtyoff; 413 bp->b_validend = bp->b_dirtyend; 414 } else { 415 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 416 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 417 } 418 419 /* 420 * Since this block is being modified, it must be written 421 * again and not just committed. 422 */ 423 bp->b_flags &= ~B_NEEDCOMMIT; 424 425 /* 426 * If the lease is non-cachable or IO_SYNC do bwrite(). 427 */ 428 if (ioflag & IO_SYNC) { 429 bp->b_proc = p; 430 error = VOP_BWRITE(bp); 431 if (error) 432 return (error); 433 } else if ((n + on) == biosize) { 434 bp->b_proc = (struct proc *)0; 435 bp->b_flags |= B_ASYNC; 436 (void)nfs_writebp(bp, 0); 437 } else { 438 bdwrite(bp); 439 } 440 } while (uio->uio_resid > 0 && n > 0); 441 return (0); 442 } 443 444 /* 445 * Get an nfs cache block. 446 * Allocate a new one if the block isn't currently in the cache 447 * and return the block marked busy. If the calling process is 448 * interrupted by a signal for an interruptible mount point, return 449 * NULL. 450 */ 451 struct buf * 452 nfs_getcacheblk(vp, bn, size, p) 453 struct vnode *vp; 454 daddr_t bn; 455 int size; 456 struct proc *p; 457 { 458 register struct buf *bp; 459 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 460 461 if (nmp->nm_flag & NFSMNT_INT) { 462 bp = getblk(vp, bn, size, PCATCH, 0); 463 while (bp == (struct buf *)0) { 464 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 465 return ((struct buf *)0); 466 bp = getblk(vp, bn, size, 0, 2 * hz); 467 } 468 } else 469 bp = getblk(vp, bn, size, 0, 0); 470 return (bp); 471 } 472 473 /* 474 * Flush and invalidate all dirty buffers. If another process is already 475 * doing the flush, just wait for completion. 476 */ 477 int 478 nfs_vinvalbuf(vp, flags, cred, p, intrflg) 479 struct vnode *vp; 480 int flags; 481 struct ucred *cred; 482 struct proc *p; 483 int intrflg; 484 { 485 register struct nfsnode *np = VTONFS(vp); 486 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 487 int error = 0, slpflag, slptimeo; 488 489 if ((nmp->nm_flag & NFSMNT_INT) == 0) 490 intrflg = 0; 491 if (intrflg) { 492 slpflag = PCATCH; 493 slptimeo = 2 * hz; 494 } else { 495 slpflag = 0; 496 slptimeo = 0; 497 } 498 /* 499 * First wait for any other process doing a flush to complete. 500 */ 501 while (np->n_flag & NFLUSHINPROG) { 502 np->n_flag |= NFLUSHWANT; 503 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 504 slptimeo); 505 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) 506 return (EINTR); 507 } 508 509 /* 510 * Now, flush as required. 511 */ 512 np->n_flag |= NFLUSHINPROG; 513 error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 514 while (error) { 515 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { 516 np->n_flag &= ~NFLUSHINPROG; 517 if (np->n_flag & NFLUSHWANT) { 518 np->n_flag &= ~NFLUSHWANT; 519 wakeup((caddr_t)&np->n_flag); 520 } 521 return (EINTR); 522 } 523 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 524 } 525 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 526 if (np->n_flag & NFLUSHWANT) { 527 np->n_flag &= ~NFLUSHWANT; 528 wakeup((caddr_t)&np->n_flag); 529 } 530 return (0); 531 } 532 533 /* 534 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 535 * This is mainly to avoid queueing async I/O requests when the nfsiods 536 * are all hung on a dead server. 537 */ 538 int 539 nfs_asyncio(bp) 540 struct buf *bp; 541 { 542 int i,s; 543 544 if (nfs_numasync == 0) 545 return (EIO); 546 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 547 if (nfs_iodwant[i]) { 548 if ((bp->b_flags & B_READ) == 0) { 549 bp->b_flags |= B_WRITEINPROG; 550 } 551 552 TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist); 553 nfs_iodwant[i] = (struct proc *)0; 554 wakeup((caddr_t)&nfs_iodwant[i]); 555 return (0); 556 } 557 558 /* 559 * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE 560 * return EIO so the process will call nfs_doio() and do it 561 * synchronously. 562 */ 563 if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE)) 564 return (EIO); 565 566 /* 567 * Just turn the async write into a delayed write, instead of 568 * doing in synchronously. Hopefully, at least one of the nfsiods 569 * is currently doing a write for this file and will pick up the 570 * delayed writes before going back to sleep. 571 */ 572 s = splbio(); 573 buf_dirty(bp); 574 splx(s); 575 biodone(bp); 576 return (0); 577 } 578 579 /* 580 * Do an I/O operation to/from a cache block. This may be called 581 * synchronously or from an nfsiod. 582 */ 583 int 584 nfs_doio(bp, p) 585 struct buf *bp; 586 struct proc *p; 587 { 588 register struct uio *uiop; 589 register struct vnode *vp; 590 struct nfsnode *np; 591 struct nfsmount *nmp; 592 int s, error = 0, diff, len, iomode, must_commit = 0; 593 struct uio uio; 594 struct iovec io; 595 596 vp = bp->b_vp; 597 np = VTONFS(vp); 598 nmp = VFSTONFS(vp->v_mount); 599 uiop = &uio; 600 uiop->uio_iov = &io; 601 uiop->uio_iovcnt = 1; 602 uiop->uio_segflg = UIO_SYSSPACE; 603 uiop->uio_procp = p; 604 605 /* 606 * Historically, paging was done with physio, but no more... 607 */ 608 if (bp->b_flags & B_PHYS) { 609 /* 610 * ...though reading /dev/drum still gets us here. 611 */ 612 io.iov_len = uiop->uio_resid = bp->b_bcount; 613 /* mapping was done by vmapbuf() */ 614 io.iov_base = bp->b_data; 615 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; 616 if (bp->b_flags & B_READ) { 617 uiop->uio_rw = UIO_READ; 618 nfsstats.read_physios++; 619 error = nfs_readrpc(vp, uiop); 620 } else { 621 iomode = NFSV3WRITE_DATASYNC; 622 uiop->uio_rw = UIO_WRITE; 623 nfsstats.write_physios++; 624 error = nfs_writerpc(vp, uiop, &iomode, &must_commit); 625 } 626 if (error) { 627 bp->b_flags |= B_ERROR; 628 bp->b_error = error; 629 } 630 } else if (bp->b_flags & B_READ) { 631 io.iov_len = uiop->uio_resid = bp->b_bcount; 632 io.iov_base = bp->b_data; 633 uiop->uio_rw = UIO_READ; 634 switch (vp->v_type) { 635 case VREG: 636 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; 637 nfsstats.read_bios++; 638 error = nfs_readrpc(vp, uiop); 639 if (!error) { 640 bp->b_validoff = 0; 641 if (uiop->uio_resid) { 642 /* 643 * If len > 0, there is a hole in the file and 644 * no writes after the hole have been pushed to 645 * the server yet. 646 * Just zero fill the rest of the valid area. 647 */ 648 diff = bp->b_bcount - uiop->uio_resid; 649 len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT) 650 + diff); 651 if (len > 0) { 652 len = min(len, uiop->uio_resid); 653 bzero((char *)bp->b_data + diff, len); 654 bp->b_validend = diff + len; 655 } else 656 bp->b_validend = diff; 657 } else 658 bp->b_validend = bp->b_bcount; 659 } 660 if (p && (vp->v_flag & VTEXT) && 661 (np->n_mtime != np->n_vattr.va_mtime.tv_sec)) { 662 uprintf("Process killed due to text file modification\n"); 663 psignal(p, SIGKILL); 664 p->p_holdcnt++; 665 } 666 break; 667 case VLNK: 668 uiop->uio_offset = (off_t)0; 669 nfsstats.readlink_bios++; 670 error = nfs_readlinkrpc(vp, uiop, curproc->p_ucred); 671 break; 672 default: 673 printf("nfs_doio: type %x unexpected\n",vp->v_type); 674 break; 675 }; 676 if (error) { 677 bp->b_flags |= B_ERROR; 678 bp->b_error = error; 679 } 680 } else { 681 io.iov_len = uiop->uio_resid = bp->b_dirtyend 682 - bp->b_dirtyoff; 683 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE 684 + bp->b_dirtyoff; 685 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 686 uiop->uio_rw = UIO_WRITE; 687 nfsstats.write_bios++; 688 if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC) 689 iomode = NFSV3WRITE_UNSTABLE; 690 else 691 iomode = NFSV3WRITE_FILESYNC; 692 bp->b_flags |= B_WRITEINPROG; 693 #ifdef fvdl_debug 694 printf("nfs_doio(%x): bp %x doff %d dend %d\n", 695 vp, bp, bp->b_dirtyoff, bp->b_dirtyend); 696 #endif 697 error = nfs_writerpc(vp, uiop, &iomode, &must_commit); 698 if (!error && iomode == NFSV3WRITE_UNSTABLE) 699 bp->b_flags |= B_NEEDCOMMIT; 700 else 701 bp->b_flags &= ~B_NEEDCOMMIT; 702 bp->b_flags &= ~B_WRITEINPROG; 703 704 /* 705 * For an interrupted write, the buffer is still valid and the 706 * write hasn't been pushed to the server yet, so we can't set 707 * B_ERROR and report the interruption by setting B_EINTR. For 708 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 709 * is essentially a noop. 710 * For the case of a V3 write rpc not being committed to stable 711 * storage, the block is still dirty and requires either a commit 712 * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC 713 * before the block is reused. This is indicated by setting the 714 * B_DELWRI and B_NEEDCOMMIT flags. 715 */ 716 if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { 717 s = splbio(); 718 buf_dirty(bp); 719 splx(s); 720 721 if (!(bp->b_flags & B_ASYNC) && error) 722 bp->b_flags |= B_EINTR; 723 } else { 724 if (error) { 725 bp->b_flags |= B_ERROR; 726 bp->b_error = np->n_error = error; 727 np->n_flag |= NWRITEERR; 728 } 729 bp->b_dirtyoff = bp->b_dirtyend = 0; 730 } 731 } 732 bp->b_resid = uiop->uio_resid; 733 if (must_commit) 734 nfs_clearcommit(vp->v_mount); 735 biodone(bp); 736 return (error); 737 } 738