1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * %sccs.include.redist.c% 9 * 10 * @(#)nfs_bio.c 7.23 (Berkeley) 05/04/92 11 */ 12 13 #include <sys/param.h> 14 #include <sys/resourcevar.h> 15 #include <sys/proc.h> 16 #include <sys/buf.h> 17 #include <sys/vnode.h> 18 #include <sys/trace.h> 19 #include <sys/mount.h> 20 #include <sys/kernel.h> 21 #include <machine/endian.h> 22 #include <vm/vm.h> 23 #include <nfs/nfsnode.h> 24 #include <nfs/rpcv2.h> 25 #include <nfs/nfsv2.h> 26 #include <nfs/nfs.h> 27 #include <nfs/nfsmount.h> 28 #include <nfs/nqnfs.h> 29 30 /* True and false, how exciting */ 31 #define TRUE 1 32 #define FALSE 0 33 34 /* 35 * Vnode op for read using bio 36 * Any similarity to readip() is purely coincidental 37 */ 38 nfs_bioread(vp, uio, ioflag, cred) 39 register struct vnode *vp; 40 register struct uio *uio; 41 int ioflag; 42 struct ucred *cred; 43 { 44 register struct nfsnode *np = VTONFS(vp); 45 register int biosize; 46 struct buf *bp; 47 struct vattr vattr; 48 struct nfsmount *nmp; 49 daddr_t lbn, bn, rablock[NFS_MAXRAHEAD]; 50 int rasize[NFS_MAXRAHEAD], nra, diff, error = 0; 51 int n, on; 52 53 #ifdef lint 54 ioflag = ioflag; 55 #endif /* lint */ 56 #ifdef DIAGNOSTIC 57 if (uio->uio_rw != UIO_READ) 58 panic("nfs_read mode"); 59 #endif 60 if (uio->uio_resid == 0) 61 return (0); 62 if (uio->uio_offset < 0 && vp->v_type != VDIR) 63 return (EINVAL); 64 nmp = VFSTONFS(vp->v_mount); 65 biosize = nmp->nm_rsize; 66 /* 67 * For nfs, cache consistency can only be maintained approximately. 68 * Although RFC1094 does not specify the criteria, the following is 69 * believed to be compatible with the reference port. 70 * For nqnfs, full cache consistency is maintained within the loop. 71 * For nfs: 72 * If the file's modify time on the server has changed since the 73 * last read rpc or you have written to the file, 74 * you may have lost data cache consistency with the 75 * server, so flush all of the file's data out of the cache. 76 * Then force a getattr rpc to ensure that you have up to date 77 * attributes. 78 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 79 * the ones changing the modify time. 80 * NB: This implies that cache data can be read when up to 81 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 82 * attributes this could be forced by setting n_attrstamp to 0 before 83 * the nfs_getattr() call. 84 */ 85 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 86 if (np->n_flag & NMODIFIED) { 87 np->n_flag &= ~NMODIFIED; 88 if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 89 vp->v_type != VREG) 90 vinvalbuf(vp, TRUE); 91 np->n_attrstamp = 0; 92 np->n_direofoffset = 0; 93 if (error = nfs_getattr(vp, &vattr, cred, uio->uio_procp)) 94 return (error); 95 np->n_mtime = vattr.va_mtime.tv_sec; 96 } else { 97 if (error = nfs_getattr(vp, &vattr, cred, uio->uio_procp)) 98 return (error); 99 if (np->n_mtime != vattr.va_mtime.tv_sec) { 100 np->n_direofoffset = 0; 101 vinvalbuf(vp, TRUE); 102 np->n_mtime = vattr.va_mtime.tv_sec; 103 } 104 } 105 } 106 do { 107 108 /* 109 * Get a valid lease. If cached data is stale, flush it. 110 */ 111 if ((nmp->nm_flag & NFSMNT_NQNFS) && 112 NQNFS_CKINVALID(vp, np, NQL_READ)) { 113 do { 114 error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 115 } while (error == NQNFS_EXPIRED); 116 if (error) 117 return (error); 118 if (QUADNE(np->n_lrev, np->n_brev) || 119 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 120 if (vp->v_type == VDIR) { 121 np->n_direofoffset = 0; 122 cache_purge(vp); 123 } 124 np->n_flag &= ~NMODIFIED; 125 vinvalbuf(vp, TRUE); 126 np->n_brev = np->n_lrev; 127 } 128 } 129 if (np->n_flag & NQNFSNONCACHE) { 130 switch (vp->v_type) { 131 case VREG: 132 error = nfs_readrpc(vp, uio, cred); 133 break; 134 case VLNK: 135 error = nfs_readlinkrpc(vp, uio, cred); 136 break; 137 case VDIR: 138 error = nfs_readdirrpc(vp, uio, cred); 139 break; 140 }; 141 return (error); 142 } 143 switch (vp->v_type) { 144 case VREG: 145 nfsstats.biocache_reads++; 146 lbn = uio->uio_offset / biosize; 147 on = uio->uio_offset & (biosize-1); 148 n = MIN((unsigned)(biosize - on), uio->uio_resid); 149 diff = np->n_size - uio->uio_offset; 150 if (diff <= 0) 151 return (error); 152 if (diff < n) 153 n = diff; 154 bn = lbn*(biosize/DEV_BSIZE); 155 for (nra = 0; nra < nmp->nm_readahead && 156 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 157 rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 158 rasize[nra] = biosize; 159 } 160 again: 161 if (nra > 0 && lbn >= vp->v_lastr) 162 error = breadn(vp, bn, biosize, rablock, rasize, nra, 163 cred, &bp); 164 else 165 error = bread(vp, bn, biosize, cred, &bp); 166 if (bp->b_validend > 0) { 167 if (on < bp->b_validoff || (on+n) > bp->b_validend) { 168 bp->b_flags |= B_INVAL; 169 if (bp->b_dirtyend > 0) { 170 if ((bp->b_flags & B_DELWRI) == 0) 171 panic("nfsbioread"); 172 (void) bwrite(bp); 173 } else 174 brelse(bp); 175 goto again; 176 } 177 } else { 178 bp->b_validoff = 0; 179 bp->b_validend = biosize - bp->b_resid; 180 } 181 vp->v_lastr = lbn; 182 if (bp->b_resid) { 183 diff = (on >= (biosize-bp->b_resid)) ? 0 : 184 (biosize-bp->b_resid-on); 185 n = MIN(n, diff); 186 } 187 break; 188 case VLNK: 189 nfsstats.biocache_readlinks++; 190 on = 0; 191 error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp); 192 n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 193 break; 194 case VDIR: 195 nfsstats.biocache_readdirs++; 196 on = 0; 197 error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp); 198 n = MIN(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 199 break; 200 }; 201 if (error) { 202 brelse(bp); 203 return (error); 204 } 205 206 /* 207 * For nqnfs: 208 * Must check for valid lease, since it may have expired while in 209 * bread(). If expired, get a lease. 210 * If data is stale, flush and try again. 211 * nb: If a read rpc is done by bread() or breada() and there is 212 * no valid lease, a get_lease request will be piggy backed. 213 */ 214 if (nmp->nm_flag & NFSMNT_NQNFS) { 215 if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 216 do { 217 error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 218 } while (error == NQNFS_EXPIRED); 219 if (error) { 220 brelse(bp); 221 return (error); 222 } 223 if ((np->n_flag & NQNFSNONCACHE) || 224 QUADNE(np->n_lrev, np->n_brev) || 225 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 226 if (vp->v_type == VDIR) { 227 np->n_direofoffset = 0; 228 cache_purge(vp); 229 } 230 brelse(bp); 231 np->n_flag &= ~NMODIFIED; 232 vinvalbuf(vp, TRUE); 233 np->n_brev = np->n_lrev; 234 continue; 235 } 236 } else if ((np->n_flag & NQNFSNONCACHE) || 237 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 238 np->n_direofoffset = 0; 239 brelse(bp); 240 np->n_flag &= ~NMODIFIED; 241 vinvalbuf(vp, TRUE); 242 np->n_brev = np->n_lrev; 243 continue; 244 } 245 } 246 if (n > 0) 247 error = uiomove(bp->b_un.b_addr + on, (int)n, uio); 248 switch (vp->v_type) { 249 case VREG: 250 if (n+on == biosize || uio->uio_offset == np->n_size) 251 bp->b_flags |= B_AGE; 252 break; 253 case VLNK: 254 n = 0; 255 break; 256 case VDIR: 257 uio->uio_offset = bp->b_blkno; 258 break; 259 }; 260 brelse(bp); 261 } while (error == 0 && uio->uio_resid > 0 && n != 0); 262 return (error); 263 } 264 265 /* 266 * Vnode op for write using bio 267 */ 268 nfs_write(vp, uio, ioflag, cred) 269 register struct vnode *vp; 270 register struct uio *uio; 271 int ioflag; 272 struct ucred *cred; 273 { 274 register int biosize; 275 struct proc *p = uio->uio_procp; 276 struct buf *bp; 277 struct nfsnode *np = VTONFS(vp); 278 struct vattr vattr; 279 struct nfsmount *nmp; 280 daddr_t lbn, bn; 281 int n, on, error = 0; 282 283 #ifdef DIAGNOSTIC 284 if (uio->uio_rw != UIO_WRITE) 285 panic("nfs_write mode"); 286 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 287 panic("nfs_write proc"); 288 #endif 289 if (vp->v_type != VREG) 290 return (EIO); 291 if (ioflag & (IO_APPEND | IO_SYNC)) { 292 if (np->n_flag & NMODIFIED) { 293 np->n_flag &= ~NMODIFIED; 294 vinvalbuf(vp, TRUE); 295 } 296 if (ioflag & IO_APPEND) { 297 np->n_attrstamp = 0; 298 if (error = nfs_getattr(vp, &vattr, cred, p)) 299 return (error); 300 uio->uio_offset = np->n_size; 301 } 302 } 303 nmp = VFSTONFS(vp->v_mount); 304 if (uio->uio_offset < 0) 305 return (EINVAL); 306 if (uio->uio_resid == 0) 307 return (0); 308 /* 309 * Maybe this should be above the vnode op call, but so long as 310 * file servers have no limits, i don't think it matters 311 */ 312 if (p && uio->uio_offset + uio->uio_resid > 313 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 314 psignal(p, SIGXFSZ); 315 return (EFBIG); 316 } 317 /* 318 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 319 * will be the same size within a filesystem. nfs_writerpc will 320 * still use nm_wsize when sizing the rpc's. 321 */ 322 biosize = nmp->nm_rsize; 323 np->n_flag |= NMODIFIED; 324 do { 325 326 /* 327 * Check for a valid write lease. 328 * If non-cachable, just do the rpc 329 */ 330 if ((nmp->nm_flag & NFSMNT_NQNFS) && 331 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 332 do { 333 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 334 } while (error == NQNFS_EXPIRED); 335 if (error) 336 return (error); 337 if (QUADNE(np->n_lrev, np->n_brev) || 338 (np->n_flag & NQNFSNONCACHE)) { 339 vinvalbuf(vp, TRUE); 340 np->n_brev = np->n_lrev; 341 } 342 } 343 if (np->n_flag & NQNFSNONCACHE) 344 return (nfs_writerpc(vp, uio, cred)); 345 nfsstats.biocache_writes++; 346 lbn = uio->uio_offset / biosize; 347 on = uio->uio_offset & (biosize-1); 348 n = MIN((unsigned)(biosize - on), uio->uio_resid); 349 if (uio->uio_offset + n > np->n_size) { 350 np->n_size = uio->uio_offset + n; 351 vnode_pager_setsize(vp, (u_long)np->n_size); 352 } 353 bn = lbn * (biosize / DEV_BSIZE); 354 again: 355 bp = getblk(vp, bn, biosize); 356 if (bp->b_wcred == NOCRED) { 357 crhold(cred); 358 bp->b_wcred = cred; 359 } 360 361 /* 362 * If the new write will leave a contiguous dirty 363 * area, just update the b_dirtyoff and b_dirtyend, 364 * otherwise force a write rpc of the old dirty area. 365 */ 366 if (bp->b_dirtyend > 0 && 367 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 368 bp->b_proc = p; 369 if (error = bwrite(bp)) 370 return (error); 371 goto again; 372 } 373 374 /* 375 * Check for valid write lease and get one as required. 376 * In case getblk() and/or bwrite() delayed us. 377 */ 378 if ((nmp->nm_flag & NFSMNT_NQNFS) && 379 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 380 do { 381 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 382 } while (error == NQNFS_EXPIRED); 383 if (error) { 384 brelse(bp); 385 return (error); 386 } 387 if (QUADNE(np->n_lrev, np->n_brev) || 388 (np->n_flag & NQNFSNONCACHE)) { 389 vinvalbuf(vp, TRUE); 390 np->n_brev = np->n_lrev; 391 } 392 } 393 if (error = uiomove(bp->b_un.b_addr + on, n, uio)) { 394 brelse(bp); 395 return (error); 396 } 397 if (bp->b_dirtyend > 0) { 398 bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); 399 bp->b_dirtyend = MAX((on+n), bp->b_dirtyend); 400 } else { 401 bp->b_dirtyoff = on; 402 bp->b_dirtyend = on+n; 403 } 404 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 405 bp->b_validoff > bp->b_dirtyend) { 406 bp->b_validoff = bp->b_dirtyoff; 407 bp->b_validend = bp->b_dirtyend; 408 } else { 409 bp->b_validoff = MIN(bp->b_validoff, bp->b_dirtyoff); 410 bp->b_validend = MAX(bp->b_validend, bp->b_dirtyend); 411 } 412 413 /* 414 * If the lease is non-cachable or IO_SYNC do bwrite(). 415 */ 416 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 417 bp->b_proc = p; 418 bwrite(bp); 419 } else if ((n+on) == biosize && 420 (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 421 bp->b_flags |= B_AGE; 422 bp->b_proc = (struct proc *)0; 423 bawrite(bp); 424 } else { 425 bp->b_proc = (struct proc *)0; 426 bdwrite(bp); 427 } 428 } while (error == 0 && uio->uio_resid > 0 && n != 0); 429 return (error); 430 } 431