1 /* $OpenBSD: nfs_bio.c,v 1.84 2019/07/25 01:43:21 cheloha Exp $ */ 2 /* $NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Rick Macklem at The University of Guelph. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/resourcevar.h> 41 #include <sys/signalvar.h> 42 #include <sys/proc.h> 43 #include <sys/buf.h> 44 #include <sys/vnode.h> 45 #include <sys/mount.h> 46 #include <sys/kernel.h> 47 #include <sys/namei.h> 48 #include <sys/queue.h> 49 #include <sys/time.h> 50 51 #include <nfs/nfsproto.h> 52 #include <nfs/nfs.h> 53 #include <nfs/nfsmount.h> 54 #include <nfs/nfsnode.h> 55 #include <nfs/nfs_var.h> 56 57 extern int nfs_numasync; 58 extern struct nfsstats nfsstats; 59 struct nfs_bufqhead nfs_bufq; 60 uint32_t nfs_bufqmax, nfs_bufqlen; 61 62 struct buf *nfs_getcacheblk(struct vnode *, daddr_t, int, struct proc *); 63 64 /* 65 * Vnode op for read using bio 66 * Any similarity to readip() is purely coincidental 67 */ 68 int 69 nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) 70 { 71 struct nfsnode *np = VTONFS(vp); 72 int biosize, diff; 73 struct buf *bp = NULL, *rabp; 74 struct vattr vattr; 75 struct proc *p; 76 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 77 daddr_t lbn, bn, rabn; 78 caddr_t baddr; 79 int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin; 80 off_t offdiff; 81 82 #ifdef DIAGNOSTIC 83 if (uio->uio_rw != UIO_READ) 84 panic("nfs_read mode"); 85 #endif 86 if (uio->uio_resid == 0) 87 return (0); 88 if (uio->uio_offset < 0) 89 return (EINVAL); 90 p = uio->uio_procp; 91 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) 92 (void)nfs_fsinfo(nmp, vp, cred, p); 93 biosize = nmp->nm_rsize; 94 /* 95 * For nfs, cache consistency can only be maintained approximately. 96 * Although RFC1094 does not specify the criteria, the following is 97 * believed to be compatible with the reference port. 98 * For nfs: 99 * If the file's modify time on the server has changed since the 100 * last read rpc or you have written to the file, 101 * you may have lost data cache consistency with the 102 * server, so flush all of the file's data out of the cache. 103 * Then force a getattr rpc to ensure that you have up to date 104 * attributes. 105 */ 106 if (np->n_flag & NMODIFIED) { 107 NFS_INVALIDATE_ATTRCACHE(np); 108 error = VOP_GETATTR(vp, &vattr, cred, p); 109 if (error) 110 return (error); 111 np->n_mtime = vattr.va_mtime; 112 } else { 113 error = VOP_GETATTR(vp, &vattr, cred, p); 114 if (error) 115 return (error); 116 if (timespeccmp(&np->n_mtime, &vattr.va_mtime, !=)) { 117 error = nfs_vinvalbuf(vp, V_SAVE, cred, p); 118 if (error) 119 return (error); 120 np->n_mtime = vattr.va_mtime; 121 } 122 } 123 124 /* 125 * update the cache read creds for this vnode 126 */ 127 if (np->n_rcred) 128 crfree(np->n_rcred); 129 np->n_rcred = cred; 130 crhold(cred); 131 132 do { 133 if ((vp->v_flag & VROOT) && vp->v_type == VLNK) { 134 return (nfs_readlinkrpc(vp, uio, cred)); 135 } 136 baddr = NULL; 137 switch (vp->v_type) { 138 case VREG: 139 nfsstats.biocache_reads++; 140 lbn = uio->uio_offset / biosize; 141 on = uio->uio_offset & (biosize - 1); 142 bn = lbn * (biosize / DEV_BSIZE); 143 not_readin = 1; 144 145 /* 146 * Start the read ahead(s), as required. 147 */ 148 if (nfs_numasync > 0 && nmp->nm_readahead > 0) { 149 for (nra = 0; nra < nmp->nm_readahead && 150 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 151 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 152 if (!incore(vp, rabn)) { 153 rabp = nfs_getcacheblk(vp, rabn, biosize, p); 154 if (!rabp) 155 return (EINTR); 156 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { 157 rabp->b_flags |= (B_READ | B_ASYNC); 158 if (nfs_asyncio(rabp, 1)) { 159 rabp->b_flags |= B_INVAL; 160 brelse(rabp); 161 } 162 } else 163 brelse(rabp); 164 } 165 } 166 } 167 168 again: 169 bp = nfs_getcacheblk(vp, bn, biosize, p); 170 if (!bp) 171 return (EINTR); 172 got_buf = 1; 173 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 174 bp->b_flags |= B_READ; 175 not_readin = 0; 176 error = nfs_doio(bp, p); 177 if (error) { 178 brelse(bp); 179 return (error); 180 } 181 } 182 n = ulmin(biosize - on, uio->uio_resid); 183 offdiff = np->n_size - uio->uio_offset; 184 if (offdiff < (off_t)n) 185 n = (int)offdiff; 186 if (not_readin && n > 0) { 187 if (on < bp->b_validoff || (on + n) > bp->b_validend) { 188 bp->b_flags |= B_INVAFTERWRITE; 189 if (bp->b_dirtyend > 0) { 190 if ((bp->b_flags & B_DELWRI) == 0) 191 panic("nfsbioread"); 192 if (VOP_BWRITE(bp) == EINTR) 193 return (EINTR); 194 } else 195 brelse(bp); 196 goto again; 197 } 198 } 199 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 200 if (diff < n) 201 n = diff; 202 break; 203 case VLNK: 204 nfsstats.biocache_readlinks++; 205 bp = nfs_getcacheblk(vp, 0, NFS_MAXPATHLEN, p); 206 if (!bp) 207 return (EINTR); 208 if ((bp->b_flags & B_DONE) == 0) { 209 bp->b_flags |= B_READ; 210 error = nfs_doio(bp, p); 211 if (error) { 212 brelse(bp); 213 return (error); 214 } 215 } 216 n = ulmin(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 217 got_buf = 1; 218 on = 0; 219 break; 220 default: 221 panic("nfsbioread: type %x unexpected", vp->v_type); 222 break; 223 } 224 225 if (n > 0) { 226 if (!baddr) 227 baddr = bp->b_data; 228 error = uiomove(baddr + on, n, uio); 229 } 230 231 if (vp->v_type == VLNK) 232 n = 0; 233 234 if (got_buf) 235 brelse(bp); 236 } while (error == 0 && uio->uio_resid > 0 && n > 0); 237 return (error); 238 } 239 240 /* 241 * Vnode op for write using bio 242 */ 243 int 244 nfs_write(void *v) 245 { 246 struct vop_write_args *ap = v; 247 int biosize; 248 struct uio *uio = ap->a_uio; 249 struct proc *p = uio->uio_procp; 250 struct vnode *vp = ap->a_vp; 251 struct nfsnode *np = VTONFS(vp); 252 struct ucred *cred = ap->a_cred; 253 int ioflag = ap->a_ioflag; 254 struct buf *bp; 255 struct vattr vattr; 256 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 257 daddr_t lbn, bn; 258 int n, on, error = 0, extended = 0, wrotedta = 0, truncated = 0; 259 ssize_t overrun; 260 261 #ifdef DIAGNOSTIC 262 if (uio->uio_rw != UIO_WRITE) 263 panic("nfs_write mode"); 264 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 265 panic("nfs_write proc"); 266 #endif 267 if (vp->v_type != VREG) 268 return (EIO); 269 if (np->n_flag & NWRITEERR) { 270 np->n_flag &= ~NWRITEERR; 271 return (np->n_error); 272 } 273 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) 274 (void)nfs_fsinfo(nmp, vp, cred, p); 275 if (ioflag & (IO_APPEND | IO_SYNC)) { 276 if (np->n_flag & NMODIFIED) { 277 NFS_INVALIDATE_ATTRCACHE(np); 278 error = nfs_vinvalbuf(vp, V_SAVE, cred, p); 279 if (error) 280 return (error); 281 } 282 if (ioflag & IO_APPEND) { 283 NFS_INVALIDATE_ATTRCACHE(np); 284 error = VOP_GETATTR(vp, &vattr, cred, p); 285 if (error) 286 return (error); 287 uio->uio_offset = np->n_size; 288 } 289 } 290 if (uio->uio_offset < 0) 291 return (EINVAL); 292 if (uio->uio_resid == 0) 293 return (0); 294 295 /* do the filesize rlimit check */ 296 if ((error = vn_fsizechk(vp, uio, ioflag, &overrun))) 297 return (error); 298 299 /* 300 * update the cache write creds for this node. 301 */ 302 if (np->n_wcred) 303 crfree(np->n_wcred); 304 np->n_wcred = cred; 305 crhold(cred); 306 307 /* 308 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 309 * will be the same size within a filesystem. nfs_writerpc will 310 * still use nm_wsize when sizing the rpc's. 311 */ 312 biosize = nmp->nm_rsize; 313 do { 314 315 /* 316 * XXX make sure we aren't cached in the VM page cache 317 */ 318 uvm_vnp_uncache(vp); 319 320 nfsstats.biocache_writes++; 321 lbn = uio->uio_offset / biosize; 322 on = uio->uio_offset & (biosize-1); 323 n = ulmin(biosize - on, uio->uio_resid); 324 bn = lbn * (biosize / DEV_BSIZE); 325 again: 326 bp = nfs_getcacheblk(vp, bn, biosize, p); 327 if (!bp) { 328 error = EINTR; 329 goto out; 330 } 331 np->n_flag |= NMODIFIED; 332 if (uio->uio_offset + n > np->n_size) { 333 np->n_size = uio->uio_offset + n; 334 uvm_vnp_setsize(vp, (u_long)np->n_size); 335 extended = 1; 336 } else if (uio->uio_offset + n < np->n_size) 337 truncated = 1; 338 339 /* 340 * If the new write will leave a contiguous dirty 341 * area, just update the b_dirtyoff and b_dirtyend, 342 * otherwise force a write rpc of the old dirty area. 343 */ 344 if (bp->b_dirtyend > 0 && 345 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 346 bp->b_proc = p; 347 if (VOP_BWRITE(bp) == EINTR) { 348 error = EINTR; 349 goto out; 350 } 351 goto again; 352 } 353 354 error = uiomove((char *)bp->b_data + on, n, uio); 355 if (error) { 356 bp->b_flags |= B_ERROR; 357 brelse(bp); 358 goto out; 359 } 360 if (bp->b_dirtyend > 0) { 361 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 362 bp->b_dirtyend = max((on + n), bp->b_dirtyend); 363 } else { 364 bp->b_dirtyoff = on; 365 bp->b_dirtyend = on + n; 366 } 367 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 368 bp->b_validoff > bp->b_dirtyend) { 369 bp->b_validoff = bp->b_dirtyoff; 370 bp->b_validend = bp->b_dirtyend; 371 } else { 372 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 373 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 374 } 375 376 wrotedta = 1; 377 378 /* 379 * Since this block is being modified, it must be written 380 * again and not just committed. 381 */ 382 383 if (NFS_ISV3(vp)) { 384 rw_enter_write(&np->n_commitlock); 385 if (bp->b_flags & B_NEEDCOMMIT) { 386 bp->b_flags &= ~B_NEEDCOMMIT; 387 nfs_del_tobecommitted_range(vp, bp); 388 } 389 nfs_del_committed_range(vp, bp); 390 rw_exit_write(&np->n_commitlock); 391 } else 392 bp->b_flags &= ~B_NEEDCOMMIT; 393 394 if (ioflag & IO_SYNC) { 395 bp->b_proc = p; 396 error = VOP_BWRITE(bp); 397 if (error) 398 goto out; 399 } else if ((n + on) == biosize) { 400 bp->b_proc = NULL; 401 bp->b_flags |= B_ASYNC; 402 (void)nfs_writebp(bp, 0); 403 } else { 404 bdwrite(bp); 405 } 406 } while (uio->uio_resid > 0 && n > 0); 407 408 /*out: XXX belongs here??? */ 409 if (wrotedta) 410 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0) | 411 (truncated ? NOTE_TRUNCATE : 0)); 412 413 out: 414 /* correct the result for writes clamped by vn_fsizechk() */ 415 uio->uio_resid += overrun; 416 417 return (error); 418 } 419 420 /* 421 * Get an nfs cache block. 422 * Allocate a new one if the block isn't currently in the cache 423 * and return the block marked busy. If the calling process is 424 * interrupted by a signal for an interruptible mount point, return 425 * NULL. 426 */ 427 struct buf * 428 nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size, struct proc *p) 429 { 430 struct buf *bp; 431 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 432 433 if (nmp->nm_flag & NFSMNT_INT) { 434 bp = getblk(vp, bn, size, PCATCH, INFSLP); 435 while (bp == NULL) { 436 if (nfs_sigintr(nmp, NULL, p)) 437 return (NULL); 438 bp = getblk(vp, bn, size, 0, SEC_TO_NSEC(2)); 439 } 440 } else 441 bp = getblk(vp, bn, size, 0, INFSLP); 442 return (bp); 443 } 444 445 /* 446 * Flush and invalidate all dirty buffers. If another process is already 447 * doing the flush, just wait for completion. 448 */ 449 int 450 nfs_vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct proc *p) 451 { 452 struct nfsmount *nmp= VFSTONFS(vp->v_mount); 453 struct nfsnode *np = VTONFS(vp); 454 uint64_t stimeo; 455 int error, sintr; 456 457 stimeo = INFSLP; 458 error = sintr = 0; 459 460 if (ISSET(nmp->nm_flag, NFSMNT_INT)) { 461 sintr = PCATCH; 462 stimeo = SEC_TO_NSEC(2); 463 } 464 465 /* First wait for any other process doing a flush to complete. */ 466 while (np->n_flag & NFLUSHINPROG) { 467 np->n_flag |= NFLUSHWANT; 468 error = tsleep_nsec(&np->n_flag, PRIBIO|sintr, "nfsvinval", 469 stimeo); 470 if (error && sintr && nfs_sigintr(nmp, NULL, p)) 471 return (EINTR); 472 } 473 474 /* Now, flush as required. */ 475 np->n_flag |= NFLUSHINPROG; 476 error = vinvalbuf(vp, flags, cred, p, sintr, INFSLP); 477 while (error) { 478 if (sintr && nfs_sigintr(nmp, NULL, p)) { 479 np->n_flag &= ~NFLUSHINPROG; 480 if (np->n_flag & NFLUSHWANT) { 481 np->n_flag &= ~NFLUSHWANT; 482 wakeup(&np->n_flag); 483 } 484 return (EINTR); 485 } 486 error = vinvalbuf(vp, flags, cred, p, 0, stimeo); 487 } 488 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 489 if (np->n_flag & NFLUSHWANT) { 490 np->n_flag &= ~NFLUSHWANT; 491 wakeup(&np->n_flag); 492 } 493 return (0); 494 } 495 496 /* 497 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 498 * This is mainly to avoid queueing async I/O requests when the nfsiods 499 * are all hung on a dead server. 500 */ 501 int 502 nfs_asyncio(struct buf *bp, int readahead) 503 { 504 if (nfs_numasync == 0) 505 goto out; 506 507 while (nfs_bufqlen > nfs_bufqmax) 508 if (readahead) 509 goto out; 510 else 511 tsleep_nsec(&nfs_bufqlen, PRIBIO, "nfs_bufq", INFSLP); 512 513 if ((bp->b_flags & B_READ) == 0) { 514 bp->b_flags |= B_WRITEINPROG; 515 } 516 517 TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist); 518 nfs_bufqlen++; 519 520 wakeup_one(&nfs_bufq); 521 return (0); 522 523 out: 524 nfsstats.forcedsync++; 525 return (EIO); 526 } 527 528 /* 529 * Do an I/O operation to/from a cache block. This may be called 530 * synchronously or from an nfsiod. 531 */ 532 int 533 nfs_doio(struct buf *bp, struct proc *p) 534 { 535 struct uio *uiop; 536 struct vnode *vp; 537 struct nfsnode *np; 538 struct nfsmount *nmp; 539 int s, error = 0, diff, len, iomode, must_commit = 0; 540 struct uio uio; 541 struct iovec io; 542 543 vp = bp->b_vp; 544 np = VTONFS(vp); 545 nmp = VFSTONFS(vp->v_mount); 546 uiop = &uio; 547 uiop->uio_iov = &io; 548 uiop->uio_iovcnt = 1; 549 uiop->uio_segflg = UIO_SYSSPACE; 550 uiop->uio_procp = p; 551 552 /* 553 * Historically, paging was done with physio, but no more. 554 */ 555 if (bp->b_flags & B_PHYS) { 556 io.iov_len = uiop->uio_resid = bp->b_bcount; 557 /* mapping was done by vmapbuf() */ 558 io.iov_base = bp->b_data; 559 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; 560 if (bp->b_flags & B_READ) { 561 uiop->uio_rw = UIO_READ; 562 nfsstats.read_physios++; 563 error = nfs_readrpc(vp, uiop); 564 } else { 565 iomode = NFSV3WRITE_DATASYNC; 566 uiop->uio_rw = UIO_WRITE; 567 nfsstats.write_physios++; 568 error = nfs_writerpc(vp, uiop, &iomode, &must_commit); 569 } 570 if (error) { 571 bp->b_flags |= B_ERROR; 572 bp->b_error = error; 573 } 574 } else if (bp->b_flags & B_READ) { 575 io.iov_len = uiop->uio_resid = bp->b_bcount; 576 io.iov_base = bp->b_data; 577 uiop->uio_rw = UIO_READ; 578 switch (vp->v_type) { 579 case VREG: 580 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; 581 nfsstats.read_bios++; 582 bcstats.pendingreads++; 583 bcstats.numreads++; 584 error = nfs_readrpc(vp, uiop); 585 if (!error) { 586 bp->b_validoff = 0; 587 if (uiop->uio_resid) { 588 /* 589 * If len > 0, there is a hole in the file and 590 * no writes after the hole have been pushed to 591 * the server yet. 592 * Just zero fill the rest of the valid area. 593 */ 594 diff = bp->b_bcount - uiop->uio_resid; 595 len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT) 596 + diff); 597 if (len > 0) { 598 len = ulmin(len, uiop->uio_resid); 599 memset((char *)bp->b_data + diff, 0, len); 600 bp->b_validend = diff + len; 601 } else 602 bp->b_validend = diff; 603 } else 604 bp->b_validend = bp->b_bcount; 605 } 606 if (p && (vp->v_flag & VTEXT) && 607 (timespeccmp(&np->n_mtime, &np->n_vattr.va_mtime, !=))) { 608 uprintf("Process killed due to text file modification\n"); 609 psignal(p, SIGKILL); 610 } 611 break; 612 case VLNK: 613 uiop->uio_offset = (off_t)0; 614 nfsstats.readlink_bios++; 615 bcstats.pendingreads++; 616 bcstats.numreads++; 617 error = nfs_readlinkrpc(vp, uiop, curproc->p_ucred); 618 break; 619 default: 620 panic("nfs_doio: type %x unexpected", vp->v_type); 621 break; 622 }; 623 if (error) { 624 bp->b_flags |= B_ERROR; 625 bp->b_error = error; 626 } 627 } else { 628 io.iov_len = uiop->uio_resid = bp->b_dirtyend 629 - bp->b_dirtyoff; 630 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE 631 + bp->b_dirtyoff; 632 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 633 uiop->uio_rw = UIO_WRITE; 634 nfsstats.write_bios++; 635 bcstats.pendingwrites++; 636 bcstats.numwrites++; 637 if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC) 638 iomode = NFSV3WRITE_UNSTABLE; 639 else 640 iomode = NFSV3WRITE_FILESYNC; 641 bp->b_flags |= B_WRITEINPROG; 642 error = nfs_writerpc(vp, uiop, &iomode, &must_commit); 643 644 rw_enter_write(&np->n_commitlock); 645 if (!error && iomode == NFSV3WRITE_UNSTABLE) { 646 bp->b_flags |= B_NEEDCOMMIT; 647 nfs_add_tobecommitted_range(vp, bp); 648 } else { 649 bp->b_flags &= ~B_NEEDCOMMIT; 650 nfs_del_committed_range(vp, bp); 651 } 652 rw_exit_write(&np->n_commitlock); 653 654 bp->b_flags &= ~B_WRITEINPROG; 655 656 /* 657 * For an interrupted write, the buffer is still valid and the 658 * write hasn't been pushed to the server yet, so we can't set 659 * B_ERROR and report the interruption by setting B_EINTR. For 660 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 661 * is essentially a noop. 662 * For the case of a V3 write rpc not being committed to stable 663 * storage, the block is still dirty and requires either a commit 664 * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC 665 * before the block is reused. This is indicated by setting the 666 * B_DELWRI and B_NEEDCOMMIT flags. 667 */ 668 if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { 669 s = splbio(); 670 buf_dirty(bp); 671 splx(s); 672 673 if (!(bp->b_flags & B_ASYNC) && error) 674 bp->b_flags |= B_EINTR; 675 } else { 676 if (error) { 677 bp->b_flags |= B_ERROR; 678 bp->b_error = np->n_error = error; 679 np->n_flag |= NWRITEERR; 680 } 681 bp->b_dirtyoff = bp->b_dirtyend = 0; 682 } 683 } 684 bp->b_resid = uiop->uio_resid; 685 if (must_commit) 686 nfs_clearcommit(vp->v_mount); 687 s = splbio(); 688 biodone(bp); 689 splx(s); 690 return (error); 691 } 692