xref: /original-bsd/sys/nfs/nfs_bio.c (revision 2932bec8)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)nfs_bio.c	7.23 (Berkeley) 05/04/92
11  */
12 
13 #include <sys/param.h>
14 #include <sys/resourcevar.h>
15 #include <sys/proc.h>
16 #include <sys/buf.h>
17 #include <sys/vnode.h>
18 #include <sys/trace.h>
19 #include <sys/mount.h>
20 #include <sys/kernel.h>
21 #include <machine/endian.h>
22 #include <vm/vm.h>
23 #include <nfs/nfsnode.h>
24 #include <nfs/rpcv2.h>
25 #include <nfs/nfsv2.h>
26 #include <nfs/nfs.h>
27 #include <nfs/nfsmount.h>
28 #include <nfs/nqnfs.h>
29 
30 /* True and false, how exciting */
31 #define	TRUE	1
32 #define	FALSE	0
33 
34 /*
35  * Vnode op for read using bio
36  * Any similarity to readip() is purely coincidental
37  */
38 nfs_bioread(vp, uio, ioflag, cred)
39 	register struct vnode *vp;
40 	register struct uio *uio;
41 	int ioflag;
42 	struct ucred *cred;
43 {
44 	register struct nfsnode *np = VTONFS(vp);
45 	register int biosize;
46 	struct buf *bp;
47 	struct vattr vattr;
48 	struct nfsmount *nmp;
49 	daddr_t lbn, bn, rablock[NFS_MAXRAHEAD];
50 	int rasize[NFS_MAXRAHEAD], nra, diff, error = 0;
51 	int n, on;
52 
53 #ifdef lint
54 	ioflag = ioflag;
55 #endif /* lint */
56 #ifdef DIAGNOSTIC
57 	if (uio->uio_rw != UIO_READ)
58 		panic("nfs_read mode");
59 #endif
60 	if (uio->uio_resid == 0)
61 		return (0);
62 	if (uio->uio_offset < 0 && vp->v_type != VDIR)
63 		return (EINVAL);
64 	nmp = VFSTONFS(vp->v_mount);
65 	biosize = nmp->nm_rsize;
66 	/*
67 	 * For nfs, cache consistency can only be maintained approximately.
68 	 * Although RFC1094 does not specify the criteria, the following is
69 	 * believed to be compatible with the reference port.
70 	 * For nqnfs, full cache consistency is maintained within the loop.
71 	 * For nfs:
72 	 * If the file's modify time on the server has changed since the
73 	 * last read rpc or you have written to the file,
74 	 * you may have lost data cache consistency with the
75 	 * server, so flush all of the file's data out of the cache.
76 	 * Then force a getattr rpc to ensure that you have up to date
77 	 * attributes.
78 	 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
79 	 * the ones changing the modify time.
80 	 * NB: This implies that cache data can be read when up to
81 	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
82 	 * attributes this could be forced by setting n_attrstamp to 0 before
83 	 * the nfs_getattr() call.
84 	 */
85 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
86 		if (np->n_flag & NMODIFIED) {
87 			np->n_flag &= ~NMODIFIED;
88 			if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
89 			     vp->v_type != VREG)
90 				vinvalbuf(vp, TRUE);
91 			np->n_attrstamp = 0;
92 			np->n_direofoffset = 0;
93 			if (error = nfs_getattr(vp, &vattr, cred, uio->uio_procp))
94 				return (error);
95 			np->n_mtime = vattr.va_mtime.tv_sec;
96 		} else {
97 			if (error = nfs_getattr(vp, &vattr, cred, uio->uio_procp))
98 				return (error);
99 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
100 				np->n_direofoffset = 0;
101 				vinvalbuf(vp, TRUE);
102 				np->n_mtime = vattr.va_mtime.tv_sec;
103 			}
104 		}
105 	}
106 	do {
107 
108 	    /*
109 	     * Get a valid lease. If cached data is stale, flush it.
110 	     */
111 	    if ((nmp->nm_flag & NFSMNT_NQNFS) &&
112 		NQNFS_CKINVALID(vp, np, NQL_READ)) {
113 		do {
114 			error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp);
115 		} while (error == NQNFS_EXPIRED);
116 		if (error)
117 			return (error);
118 		if (QUADNE(np->n_lrev, np->n_brev) ||
119 		    ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
120 			if (vp->v_type == VDIR) {
121 				np->n_direofoffset = 0;
122 				cache_purge(vp);
123 			}
124 			np->n_flag &= ~NMODIFIED;
125 			vinvalbuf(vp, TRUE);
126 			np->n_brev = np->n_lrev;
127 		}
128 	    }
129 	    if (np->n_flag & NQNFSNONCACHE) {
130 		switch (vp->v_type) {
131 		case VREG:
132 			error = nfs_readrpc(vp, uio, cred);
133 			break;
134 		case VLNK:
135 			error = nfs_readlinkrpc(vp, uio, cred);
136 			break;
137 		case VDIR:
138 			error = nfs_readdirrpc(vp, uio, cred);
139 			break;
140 		};
141 		return (error);
142 	    }
143 	    switch (vp->v_type) {
144 	    case VREG:
145 		nfsstats.biocache_reads++;
146 		lbn = uio->uio_offset / biosize;
147 		on = uio->uio_offset & (biosize-1);
148 		n = MIN((unsigned)(biosize - on), uio->uio_resid);
149 		diff = np->n_size - uio->uio_offset;
150 		if (diff <= 0)
151 			return (error);
152 		if (diff < n)
153 			n = diff;
154 		bn = lbn*(biosize/DEV_BSIZE);
155 		for (nra = 0; nra < nmp->nm_readahead &&
156 			(lbn + 1 + nra) * biosize < np->n_size; nra++) {
157 			rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
158 			rasize[nra] = biosize;
159 		}
160 again:
161 		if (nra > 0 && lbn >= vp->v_lastr)
162 			error = breadn(vp, bn, biosize, rablock, rasize, nra,
163 				cred, &bp);
164 		else
165 			error = bread(vp, bn, biosize, cred, &bp);
166 		if (bp->b_validend > 0) {
167 			if (on < bp->b_validoff || (on+n) > bp->b_validend) {
168 				bp->b_flags |= B_INVAL;
169 				if (bp->b_dirtyend > 0) {
170 					if ((bp->b_flags & B_DELWRI) == 0)
171 						panic("nfsbioread");
172 					(void) bwrite(bp);
173 				} else
174 					brelse(bp);
175 				goto again;
176 			}
177 		} else {
178 			bp->b_validoff = 0;
179 			bp->b_validend = biosize - bp->b_resid;
180 		}
181 		vp->v_lastr = lbn;
182 		if (bp->b_resid) {
183 		   diff = (on >= (biosize-bp->b_resid)) ? 0 :
184 			(biosize-bp->b_resid-on);
185 		   n = MIN(n, diff);
186 		}
187 		break;
188 	    case VLNK:
189 		nfsstats.biocache_readlinks++;
190 		on = 0;
191 		error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp);
192 		n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
193 		break;
194 	    case VDIR:
195 		nfsstats.biocache_readdirs++;
196 		on = 0;
197 		error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp);
198 		n = MIN(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
199 		break;
200 	    };
201 	    if (error) {
202 		brelse(bp);
203 		return (error);
204 	    }
205 
206 	    /*
207 	     * For nqnfs:
208 	     * Must check for valid lease, since it may have expired while in
209 	     * bread(). If expired, get a lease.
210 	     * If data is stale, flush and try again.
211 	     * nb: If a read rpc is done by bread() or breada() and there is
212 	     *     no valid lease, a get_lease request will be piggy backed.
213 	     */
214 	    if (nmp->nm_flag & NFSMNT_NQNFS) {
215 		if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
216 			do {
217 				error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp);
218 			} while (error == NQNFS_EXPIRED);
219 			if (error) {
220 				brelse(bp);
221 				return (error);
222 			}
223 			if ((np->n_flag & NQNFSNONCACHE) ||
224 			    QUADNE(np->n_lrev, np->n_brev) ||
225 			    ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
226 				if (vp->v_type == VDIR) {
227 					np->n_direofoffset = 0;
228 					cache_purge(vp);
229 				}
230 				brelse(bp);
231 				np->n_flag &= ~NMODIFIED;
232 				vinvalbuf(vp, TRUE);
233 				np->n_brev = np->n_lrev;
234 				continue;
235 			}
236 		} else if ((np->n_flag & NQNFSNONCACHE) ||
237 		    ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
238 			np->n_direofoffset = 0;
239 			brelse(bp);
240 			np->n_flag &= ~NMODIFIED;
241 			vinvalbuf(vp, TRUE);
242 			np->n_brev = np->n_lrev;
243 			continue;
244 		}
245 	    }
246 	    if (n > 0)
247 		error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
248 	    switch (vp->v_type) {
249 	    case VREG:
250 		if (n+on == biosize || uio->uio_offset == np->n_size)
251 			bp->b_flags |= B_AGE;
252 		break;
253 	    case VLNK:
254 		n = 0;
255 		break;
256 	    case VDIR:
257 		uio->uio_offset = bp->b_blkno;
258 		break;
259 	    };
260 	    brelse(bp);
261 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
262 	return (error);
263 }
264 
265 /*
266  * Vnode op for write using bio
267  */
268 nfs_write(vp, uio, ioflag, cred)
269 	register struct vnode *vp;
270 	register struct uio *uio;
271 	int ioflag;
272 	struct ucred *cred;
273 {
274 	register int biosize;
275 	struct proc *p = uio->uio_procp;
276 	struct buf *bp;
277 	struct nfsnode *np = VTONFS(vp);
278 	struct vattr vattr;
279 	struct nfsmount *nmp;
280 	daddr_t lbn, bn;
281 	int n, on, error = 0;
282 
283 #ifdef DIAGNOSTIC
284 	if (uio->uio_rw != UIO_WRITE)
285 		panic("nfs_write mode");
286 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
287 		panic("nfs_write proc");
288 #endif
289 	if (vp->v_type != VREG)
290 		return (EIO);
291 	if (ioflag & (IO_APPEND | IO_SYNC)) {
292 		if (np->n_flag & NMODIFIED) {
293 			np->n_flag &= ~NMODIFIED;
294 			vinvalbuf(vp, TRUE);
295 		}
296 		if (ioflag & IO_APPEND) {
297 			np->n_attrstamp = 0;
298 			if (error = nfs_getattr(vp, &vattr, cred, p))
299 				return (error);
300 			uio->uio_offset = np->n_size;
301 		}
302 	}
303 	nmp = VFSTONFS(vp->v_mount);
304 	if (uio->uio_offset < 0)
305 		return (EINVAL);
306 	if (uio->uio_resid == 0)
307 		return (0);
308 	/*
309 	 * Maybe this should be above the vnode op call, but so long as
310 	 * file servers have no limits, i don't think it matters
311 	 */
312 	if (p && uio->uio_offset + uio->uio_resid >
313 	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
314 		psignal(p, SIGXFSZ);
315 		return (EFBIG);
316 	}
317 	/*
318 	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
319 	 * will be the same size within a filesystem. nfs_writerpc will
320 	 * still use nm_wsize when sizing the rpc's.
321 	 */
322 	biosize = nmp->nm_rsize;
323 	np->n_flag |= NMODIFIED;
324 	do {
325 
326 		/*
327 		 * Check for a valid write lease.
328 		 * If non-cachable, just do the rpc
329 		 */
330 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
331 		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
332 			do {
333 				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
334 			} while (error == NQNFS_EXPIRED);
335 			if (error)
336 				return (error);
337 			if (QUADNE(np->n_lrev, np->n_brev) ||
338 			    (np->n_flag & NQNFSNONCACHE)) {
339 				vinvalbuf(vp, TRUE);
340 				np->n_brev = np->n_lrev;
341 			}
342 		}
343 		if (np->n_flag & NQNFSNONCACHE)
344 			return (nfs_writerpc(vp, uio, cred));
345 		nfsstats.biocache_writes++;
346 		lbn = uio->uio_offset / biosize;
347 		on = uio->uio_offset & (biosize-1);
348 		n = MIN((unsigned)(biosize - on), uio->uio_resid);
349 		if (uio->uio_offset + n > np->n_size) {
350 			np->n_size = uio->uio_offset + n;
351 			vnode_pager_setsize(vp, (u_long)np->n_size);
352 		}
353 		bn = lbn * (biosize / DEV_BSIZE);
354 again:
355 		bp = getblk(vp, bn, biosize);
356 		if (bp->b_wcred == NOCRED) {
357 			crhold(cred);
358 			bp->b_wcred = cred;
359 		}
360 
361 		/*
362 		 * If the new write will leave a contiguous dirty
363 		 * area, just update the b_dirtyoff and b_dirtyend,
364 		 * otherwise force a write rpc of the old dirty area.
365 		 */
366 		if (bp->b_dirtyend > 0 &&
367 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
368 			bp->b_proc = p;
369 			if (error = bwrite(bp))
370 				return (error);
371 			goto again;
372 		}
373 
374 		/*
375 		 * Check for valid write lease and get one as required.
376 		 * In case getblk() and/or bwrite() delayed us.
377 		 */
378 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
379 		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
380 			do {
381 				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
382 			} while (error == NQNFS_EXPIRED);
383 			if (error) {
384 				brelse(bp);
385 				return (error);
386 			}
387 			if (QUADNE(np->n_lrev, np->n_brev) ||
388 			    (np->n_flag & NQNFSNONCACHE)) {
389 				vinvalbuf(vp, TRUE);
390 				np->n_brev = np->n_lrev;
391 			}
392 		}
393 		if (error = uiomove(bp->b_un.b_addr + on, n, uio)) {
394 			brelse(bp);
395 			return (error);
396 		}
397 		if (bp->b_dirtyend > 0) {
398 			bp->b_dirtyoff = MIN(on, bp->b_dirtyoff);
399 			bp->b_dirtyend = MAX((on+n), bp->b_dirtyend);
400 		} else {
401 			bp->b_dirtyoff = on;
402 			bp->b_dirtyend = on+n;
403 		}
404 		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
405 		    bp->b_validoff > bp->b_dirtyend) {
406 			bp->b_validoff = bp->b_dirtyoff;
407 			bp->b_validend = bp->b_dirtyend;
408 		} else {
409 			bp->b_validoff = MIN(bp->b_validoff, bp->b_dirtyoff);
410 			bp->b_validend = MAX(bp->b_validend, bp->b_dirtyend);
411 		}
412 
413 		/*
414 		 * If the lease is non-cachable or IO_SYNC do bwrite().
415 		 */
416 		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
417 			bp->b_proc = p;
418 			bwrite(bp);
419 		} else if ((n+on) == biosize &&
420 			 (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
421 			bp->b_flags |= B_AGE;
422 			bp->b_proc = (struct proc *)0;
423 			bawrite(bp);
424 		} else {
425 			bp->b_proc = (struct proc *)0;
426 			bdwrite(bp);
427 		}
428 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
429 	return (error);
430 }
431