1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * %sccs.include.redist.c% 9 * 10 * @(#)nfs_bio.c 7.27 (Berkeley) 06/19/92 11 */ 12 13 #include <sys/param.h> 14 #include <sys/resourcevar.h> 15 #include <sys/proc.h> 16 #include <sys/buf.h> 17 #include <sys/vnode.h> 18 #include <sys/trace.h> 19 #include <sys/mount.h> 20 #include <sys/kernel.h> 21 #include <machine/endian.h> 22 #include <vm/vm.h> 23 #include <nfs/nfsnode.h> 24 #include <nfs/rpcv2.h> 25 #include <nfs/nfsv2.h> 26 #include <nfs/nfs.h> 27 #include <nfs/nfsmount.h> 28 #include <nfs/nqnfs.h> 29 30 /* True and false, how exciting */ 31 #define TRUE 1 32 #define FALSE 0 33 34 /* 35 * Vnode op for read using bio 36 * Any similarity to readip() is purely coincidental 37 */ 38 nfs_bioread(vp, uio, ioflag, cred) 39 register struct vnode *vp; 40 register struct uio *uio; 41 int ioflag; 42 struct ucred *cred; 43 { 44 USES_VOP_GETATTR; 45 register struct nfsnode *np = VTONFS(vp); 46 register int biosize; 47 struct buf *bp; 48 struct vattr vattr; 49 struct nfsmount *nmp; 50 daddr_t lbn, bn, rablock[NFS_MAXRAHEAD]; 51 int rasize[NFS_MAXRAHEAD], nra, diff, error = 0; 52 int n, on; 53 54 #ifdef lint 55 ioflag = ioflag; 56 #endif /* lint */ 57 #ifdef DIAGNOSTIC 58 if (uio->uio_rw != UIO_READ) 59 panic("nfs_read mode"); 60 #endif 61 if (uio->uio_resid == 0) 62 return (0); 63 if (uio->uio_offset < 0 && vp->v_type != VDIR) 64 return (EINVAL); 65 nmp = VFSTONFS(vp->v_mount); 66 biosize = nmp->nm_rsize; 67 /* 68 * For nfs, cache consistency can only be maintained approximately. 69 * Although RFC1094 does not specify the criteria, the following is 70 * believed to be compatible with the reference port. 71 * For nqnfs, full cache consistency is maintained within the loop. 72 * For nfs: 73 * If the file's modify time on the server has changed since the 74 * last read rpc or you have written to the file, 75 * you may have lost data cache consistency with the 76 * server, so flush all of the file's data out of the cache. 77 * Then force a getattr rpc to ensure that you have up to date 78 * attributes. 79 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 80 * the ones changing the modify time. 81 * NB: This implies that cache data can be read when up to 82 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 83 * attributes this could be forced by setting n_attrstamp to 0 before 84 * the VOP_GETATTR() call. 85 */ 86 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 87 if (np->n_flag & NMODIFIED) { 88 np->n_flag &= ~NMODIFIED; 89 if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 90 vp->v_type != VREG) 91 vinvalbuf(vp, TRUE); 92 np->n_attrstamp = 0; 93 np->n_direofoffset = 0; 94 if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) 95 return (error); 96 np->n_mtime = vattr.va_mtime.ts_sec; 97 } else { 98 if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) 99 return (error); 100 if (np->n_mtime != vattr.va_mtime.ts_sec) { 101 np->n_direofoffset = 0; 102 vinvalbuf(vp, TRUE); 103 np->n_mtime = vattr.va_mtime.ts_sec; 104 } 105 } 106 } 107 do { 108 109 /* 110 * Get a valid lease. If cached data is stale, flush it. 111 */ 112 if ((nmp->nm_flag & NFSMNT_NQNFS) && 113 NQNFS_CKINVALID(vp, np, NQL_READ)) { 114 do { 115 error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 116 } while (error == NQNFS_EXPIRED); 117 if (error) 118 return (error); 119 if (QUADNE(np->n_lrev, np->n_brev) || 120 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 121 if (vp->v_type == VDIR) { 122 np->n_direofoffset = 0; 123 cache_purge(vp); 124 } 125 np->n_flag &= ~NMODIFIED; 126 vinvalbuf(vp, TRUE); 127 np->n_brev = np->n_lrev; 128 } 129 } 130 if (np->n_flag & NQNFSNONCACHE) { 131 switch (vp->v_type) { 132 case VREG: 133 error = nfs_readrpc(vp, uio, cred); 134 break; 135 case VLNK: 136 error = nfs_readlinkrpc(vp, uio, cred); 137 break; 138 case VDIR: 139 error = nfs_readdirrpc(vp, uio, cred); 140 break; 141 }; 142 return (error); 143 } 144 switch (vp->v_type) { 145 case VREG: 146 nfsstats.biocache_reads++; 147 lbn = uio->uio_offset / biosize; 148 on = uio->uio_offset & (biosize-1); 149 n = MIN((unsigned)(biosize - on), uio->uio_resid); 150 diff = np->n_size - uio->uio_offset; 151 if (diff <= 0) 152 return (error); 153 if (diff < n) 154 n = diff; 155 bn = lbn*(biosize/DEV_BSIZE); 156 for (nra = 0; nra < nmp->nm_readahead && 157 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 158 rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 159 rasize[nra] = biosize; 160 } 161 again: 162 if (nra > 0 && lbn >= vp->v_lastr) 163 error = breadn(vp, bn, biosize, rablock, rasize, nra, 164 cred, &bp); 165 else 166 error = bread(vp, bn, biosize, cred, &bp); 167 if (bp->b_validend > 0) { 168 if (on < bp->b_validoff || (on+n) > bp->b_validend) { 169 bp->b_flags |= B_INVAL; 170 if (bp->b_dirtyend > 0) { 171 if ((bp->b_flags & B_DELWRI) == 0) 172 panic("nfsbioread"); 173 (void) bwrite(bp); 174 } else 175 brelse(bp); 176 goto again; 177 } 178 } else { 179 bp->b_validoff = 0; 180 bp->b_validend = biosize - bp->b_resid; 181 } 182 vp->v_lastr = lbn; 183 if (bp->b_resid) { 184 diff = (on >= (biosize-bp->b_resid)) ? 0 : 185 (biosize-bp->b_resid-on); 186 n = MIN(n, diff); 187 } 188 break; 189 case VLNK: 190 nfsstats.biocache_readlinks++; 191 on = 0; 192 error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp); 193 n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 194 break; 195 case VDIR: 196 nfsstats.biocache_readdirs++; 197 on = 0; 198 error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp); 199 n = MIN(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 200 break; 201 }; 202 if (error) { 203 brelse(bp); 204 return (error); 205 } 206 207 /* 208 * For nqnfs: 209 * Must check for valid lease, since it may have expired while in 210 * bread(). If expired, get a lease. 211 * If data is stale, flush and try again. 212 * nb: If a read rpc is done by bread() or breada() and there is 213 * no valid lease, a get_lease request will be piggy backed. 214 */ 215 if (nmp->nm_flag & NFSMNT_NQNFS) { 216 if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 217 do { 218 error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 219 } while (error == NQNFS_EXPIRED); 220 if (error) { 221 brelse(bp); 222 return (error); 223 } 224 if ((np->n_flag & NQNFSNONCACHE) || 225 QUADNE(np->n_lrev, np->n_brev) || 226 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 227 if (vp->v_type == VDIR) { 228 np->n_direofoffset = 0; 229 cache_purge(vp); 230 } 231 brelse(bp); 232 np->n_flag &= ~NMODIFIED; 233 vinvalbuf(vp, TRUE); 234 np->n_brev = np->n_lrev; 235 continue; 236 } 237 } else if ((np->n_flag & NQNFSNONCACHE) || 238 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 239 np->n_direofoffset = 0; 240 brelse(bp); 241 np->n_flag &= ~NMODIFIED; 242 vinvalbuf(vp, TRUE); 243 np->n_brev = np->n_lrev; 244 continue; 245 } 246 } 247 if (n > 0) 248 error = uiomove(bp->b_un.b_addr + on, (int)n, uio); 249 switch (vp->v_type) { 250 case VREG: 251 if (n+on == biosize || uio->uio_offset == np->n_size) 252 bp->b_flags |= B_AGE; 253 break; 254 case VLNK: 255 n = 0; 256 break; 257 case VDIR: 258 uio->uio_offset = bp->b_blkno; 259 break; 260 }; 261 brelse(bp); 262 } while (error == 0 && uio->uio_resid > 0 && n != 0); 263 return (error); 264 } 265 266 /* 267 * Vnode op for write using bio 268 */ 269 nfs_write (ap) 270 struct vop_write_args *ap; 271 { 272 USES_VOP_GETATTR; 273 register int biosize; 274 struct proc *p = ap->a_uio->uio_procp; 275 struct buf *bp; 276 struct nfsnode *np = VTONFS(ap->a_vp); 277 struct vattr vattr; 278 struct nfsmount *nmp; 279 daddr_t lbn, bn; 280 int n, on, error = 0; 281 282 #ifdef DIAGNOSTIC 283 if (ap->a_uio->uio_rw != UIO_WRITE) 284 panic("nfs_write mode"); 285 if (ap->a_uio->uio_segflg == UIO_USERSPACE && ap->a_uio->uio_procp != curproc) 286 panic("nfs_write proc"); 287 #endif 288 if (ap->a_vp->v_type != VREG) 289 return (EIO); 290 if (np->n_flag & NWRITEERR) { 291 np->n_flag &= ~NWRITEERR; 292 return (np->n_error); 293 } 294 if (ap->a_ioflag & (IO_APPEND | IO_SYNC)) { 295 if (np->n_flag & NMODIFIED) { 296 np->n_flag &= ~NMODIFIED; 297 vinvalbuf(ap->a_vp, TRUE); 298 } 299 if (ap->a_ioflag & IO_APPEND) { 300 np->n_attrstamp = 0; 301 if (error = VOP_GETATTR(ap->a_vp, &vattr, ap->a_cred, p)) 302 return (error); 303 ap->a_uio->uio_offset = np->n_size; 304 } 305 } 306 nmp = VFSTONFS(ap->a_vp->v_mount); 307 if (ap->a_uio->uio_offset < 0) 308 return (EINVAL); 309 if (ap->a_uio->uio_resid == 0) 310 return (0); 311 /* 312 * Maybe this should be above the vnode op call, but so long as 313 * file servers have no limits, i don't think it matters 314 */ 315 if (p && ap->a_uio->uio_offset + ap->a_uio->uio_resid > 316 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 317 psignal(p, SIGXFSZ); 318 return (EFBIG); 319 } 320 /* 321 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 322 * will be the same size within a filesystem. nfs_writerpc will 323 * still use nm_wsize when sizing the rpc's. 324 */ 325 biosize = nmp->nm_rsize; 326 np->n_flag |= NMODIFIED; 327 do { 328 329 /* 330 * Check for a valid write lease. 331 * If non-cachable, just do the rpc 332 */ 333 if ((nmp->nm_flag & NFSMNT_NQNFS) && 334 NQNFS_CKINVALID(ap->a_vp, np, NQL_WRITE)) { 335 do { 336 error = nqnfs_getlease(ap->a_vp, NQL_WRITE, ap->a_cred, p); 337 } while (error == NQNFS_EXPIRED); 338 if (error) 339 return (error); 340 if (QUADNE(np->n_lrev, np->n_brev) || 341 (np->n_flag & NQNFSNONCACHE)) { 342 vinvalbuf(ap->a_vp, TRUE); 343 np->n_brev = np->n_lrev; 344 } 345 } 346 if (np->n_flag & NQNFSNONCACHE) 347 return (nfs_writerpc(ap->a_vp, ap->a_uio, ap->a_cred)); 348 nfsstats.biocache_writes++; 349 lbn = ap->a_uio->uio_offset / biosize; 350 on = ap->a_uio->uio_offset & (biosize-1); 351 n = MIN((unsigned)(biosize - on), ap->a_uio->uio_resid); 352 if (ap->a_uio->uio_offset + n > np->n_size) { 353 np->n_size = ap->a_uio->uio_offset + n; 354 vnode_pager_setsize(ap->a_vp, (u_long)np->n_size); 355 } 356 bn = lbn * (biosize / DEV_BSIZE); 357 again: 358 bp = getblk(ap->a_vp, bn, biosize); 359 if (bp->b_wcred == NOCRED) { 360 crhold(ap->a_cred); 361 bp->b_wcred = ap->a_cred; 362 } 363 364 /* 365 * If the new write will leave a contiguous dirty 366 * area, just update the b_dirtyoff and b_dirtyend, 367 * otherwise force a write rpc of the old dirty area. 368 */ 369 if (bp->b_dirtyend > 0 && 370 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 371 bp->b_proc = p; 372 if (error = bwrite(bp)) 373 return (error); 374 goto again; 375 } 376 377 /* 378 * Check for valid write lease and get one as required. 379 * In case getblk() and/or bwrite() delayed us. 380 */ 381 if ((nmp->nm_flag & NFSMNT_NQNFS) && 382 NQNFS_CKINVALID(ap->a_vp, np, NQL_WRITE)) { 383 do { 384 error = nqnfs_getlease(ap->a_vp, NQL_WRITE, ap->a_cred, p); 385 } while (error == NQNFS_EXPIRED); 386 if (error) { 387 brelse(bp); 388 return (error); 389 } 390 if (QUADNE(np->n_lrev, np->n_brev) || 391 (np->n_flag & NQNFSNONCACHE)) { 392 vinvalbuf(ap->a_vp, TRUE); 393 np->n_brev = np->n_lrev; 394 } 395 } 396 if (error = uiomove(bp->b_un.b_addr + on, n, ap->a_uio)) { 397 brelse(bp); 398 return (error); 399 } 400 if (bp->b_dirtyend > 0) { 401 bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); 402 bp->b_dirtyend = MAX((on+n), bp->b_dirtyend); 403 } else { 404 bp->b_dirtyoff = on; 405 bp->b_dirtyend = on+n; 406 } 407 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 408 bp->b_validoff > bp->b_dirtyend) { 409 bp->b_validoff = bp->b_dirtyoff; 410 bp->b_validend = bp->b_dirtyend; 411 } else { 412 bp->b_validoff = MIN(bp->b_validoff, bp->b_dirtyoff); 413 bp->b_validend = MAX(bp->b_validend, bp->b_dirtyend); 414 } 415 416 /* 417 * If the lease is non-cachable or IO_SYNC do bwrite(). 418 */ 419 if ((np->n_flag & NQNFSNONCACHE) || (ap->a_ioflag & IO_SYNC)) { 420 bp->b_proc = p; 421 bwrite(bp); 422 } else if ((n+on) == biosize && 423 (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 424 bp->b_flags |= B_AGE; 425 bp->b_proc = (struct proc *)0; 426 bawrite(bp); 427 } else { 428 bp->b_proc = (struct proc *)0; 429 bdwrite(bp); 430 } 431 } while (error == 0 && ap->a_uio->uio_resid > 0 && n != 0); 432 return (error); 433 } 434