xref: /original-bsd/sys/nfs/nfs_bio.c (revision 68d9582f)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)nfs_bio.c	7.27 (Berkeley) 06/19/92
11  */
12 
13 #include <sys/param.h>
14 #include <sys/resourcevar.h>
15 #include <sys/proc.h>
16 #include <sys/buf.h>
17 #include <sys/vnode.h>
18 #include <sys/trace.h>
19 #include <sys/mount.h>
20 #include <sys/kernel.h>
21 #include <machine/endian.h>
22 #include <vm/vm.h>
23 #include <nfs/nfsnode.h>
24 #include <nfs/rpcv2.h>
25 #include <nfs/nfsv2.h>
26 #include <nfs/nfs.h>
27 #include <nfs/nfsmount.h>
28 #include <nfs/nqnfs.h>
29 
30 /* True and false, how exciting */
31 #define	TRUE	1
32 #define	FALSE	0
33 
34 /*
35  * Vnode op for read using bio
36  * Any similarity to readip() is purely coincidental
37  */
38 nfs_bioread(vp, uio, ioflag, cred)
39 	register struct vnode *vp;
40 	register struct uio *uio;
41 	int ioflag;
42 	struct ucred *cred;
43 {
44 	USES_VOP_GETATTR;
45 	register struct nfsnode *np = VTONFS(vp);
46 	register int biosize;
47 	struct buf *bp;
48 	struct vattr vattr;
49 	struct nfsmount *nmp;
50 	daddr_t lbn, bn, rablock[NFS_MAXRAHEAD];
51 	int rasize[NFS_MAXRAHEAD], nra, diff, error = 0;
52 	int n, on;
53 
54 #ifdef lint
55 	ioflag = ioflag;
56 #endif /* lint */
57 #ifdef DIAGNOSTIC
58 	if (uio->uio_rw != UIO_READ)
59 		panic("nfs_read mode");
60 #endif
61 	if (uio->uio_resid == 0)
62 		return (0);
63 	if (uio->uio_offset < 0 && vp->v_type != VDIR)
64 		return (EINVAL);
65 	nmp = VFSTONFS(vp->v_mount);
66 	biosize = nmp->nm_rsize;
67 	/*
68 	 * For nfs, cache consistency can only be maintained approximately.
69 	 * Although RFC1094 does not specify the criteria, the following is
70 	 * believed to be compatible with the reference port.
71 	 * For nqnfs, full cache consistency is maintained within the loop.
72 	 * For nfs:
73 	 * If the file's modify time on the server has changed since the
74 	 * last read rpc or you have written to the file,
75 	 * you may have lost data cache consistency with the
76 	 * server, so flush all of the file's data out of the cache.
77 	 * Then force a getattr rpc to ensure that you have up to date
78 	 * attributes.
79 	 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
80 	 * the ones changing the modify time.
81 	 * NB: This implies that cache data can be read when up to
82 	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
83 	 * attributes this could be forced by setting n_attrstamp to 0 before
84 	 * the VOP_GETATTR() call.
85 	 */
86 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
87 		if (np->n_flag & NMODIFIED) {
88 			np->n_flag &= ~NMODIFIED;
89 			if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
90 			     vp->v_type != VREG)
91 				vinvalbuf(vp, TRUE);
92 			np->n_attrstamp = 0;
93 			np->n_direofoffset = 0;
94 			if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp))
95 				return (error);
96 			np->n_mtime = vattr.va_mtime.ts_sec;
97 		} else {
98 			if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp))
99 				return (error);
100 			if (np->n_mtime != vattr.va_mtime.ts_sec) {
101 				np->n_direofoffset = 0;
102 				vinvalbuf(vp, TRUE);
103 				np->n_mtime = vattr.va_mtime.ts_sec;
104 			}
105 		}
106 	}
107 	do {
108 
109 	    /*
110 	     * Get a valid lease. If cached data is stale, flush it.
111 	     */
112 	    if ((nmp->nm_flag & NFSMNT_NQNFS) &&
113 		NQNFS_CKINVALID(vp, np, NQL_READ)) {
114 		do {
115 			error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp);
116 		} while (error == NQNFS_EXPIRED);
117 		if (error)
118 			return (error);
119 		if (QUADNE(np->n_lrev, np->n_brev) ||
120 		    ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
121 			if (vp->v_type == VDIR) {
122 				np->n_direofoffset = 0;
123 				cache_purge(vp);
124 			}
125 			np->n_flag &= ~NMODIFIED;
126 			vinvalbuf(vp, TRUE);
127 			np->n_brev = np->n_lrev;
128 		}
129 	    }
130 	    if (np->n_flag & NQNFSNONCACHE) {
131 		switch (vp->v_type) {
132 		case VREG:
133 			error = nfs_readrpc(vp, uio, cred);
134 			break;
135 		case VLNK:
136 			error = nfs_readlinkrpc(vp, uio, cred);
137 			break;
138 		case VDIR:
139 			error = nfs_readdirrpc(vp, uio, cred);
140 			break;
141 		};
142 		return (error);
143 	    }
144 	    switch (vp->v_type) {
145 	    case VREG:
146 		nfsstats.biocache_reads++;
147 		lbn = uio->uio_offset / biosize;
148 		on = uio->uio_offset & (biosize-1);
149 		n = MIN((unsigned)(biosize - on), uio->uio_resid);
150 		diff = np->n_size - uio->uio_offset;
151 		if (diff <= 0)
152 			return (error);
153 		if (diff < n)
154 			n = diff;
155 		bn = lbn*(biosize/DEV_BSIZE);
156 		for (nra = 0; nra < nmp->nm_readahead &&
157 			(lbn + 1 + nra) * biosize < np->n_size; nra++) {
158 			rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
159 			rasize[nra] = biosize;
160 		}
161 again:
162 		if (nra > 0 && lbn >= vp->v_lastr)
163 			error = breadn(vp, bn, biosize, rablock, rasize, nra,
164 				cred, &bp);
165 		else
166 			error = bread(vp, bn, biosize, cred, &bp);
167 		if (bp->b_validend > 0) {
168 			if (on < bp->b_validoff || (on+n) > bp->b_validend) {
169 				bp->b_flags |= B_INVAL;
170 				if (bp->b_dirtyend > 0) {
171 					if ((bp->b_flags & B_DELWRI) == 0)
172 						panic("nfsbioread");
173 					(void) bwrite(bp);
174 				} else
175 					brelse(bp);
176 				goto again;
177 			}
178 		} else {
179 			bp->b_validoff = 0;
180 			bp->b_validend = biosize - bp->b_resid;
181 		}
182 		vp->v_lastr = lbn;
183 		if (bp->b_resid) {
184 		   diff = (on >= (biosize-bp->b_resid)) ? 0 :
185 			(biosize-bp->b_resid-on);
186 		   n = MIN(n, diff);
187 		}
188 		break;
189 	    case VLNK:
190 		nfsstats.biocache_readlinks++;
191 		on = 0;
192 		error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp);
193 		n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
194 		break;
195 	    case VDIR:
196 		nfsstats.biocache_readdirs++;
197 		on = 0;
198 		error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp);
199 		n = MIN(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
200 		break;
201 	    };
202 	    if (error) {
203 		brelse(bp);
204 		return (error);
205 	    }
206 
207 	    /*
208 	     * For nqnfs:
209 	     * Must check for valid lease, since it may have expired while in
210 	     * bread(). If expired, get a lease.
211 	     * If data is stale, flush and try again.
212 	     * nb: If a read rpc is done by bread() or breada() and there is
213 	     *     no valid lease, a get_lease request will be piggy backed.
214 	     */
215 	    if (nmp->nm_flag & NFSMNT_NQNFS) {
216 		if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
217 			do {
218 				error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp);
219 			} while (error == NQNFS_EXPIRED);
220 			if (error) {
221 				brelse(bp);
222 				return (error);
223 			}
224 			if ((np->n_flag & NQNFSNONCACHE) ||
225 			    QUADNE(np->n_lrev, np->n_brev) ||
226 			    ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
227 				if (vp->v_type == VDIR) {
228 					np->n_direofoffset = 0;
229 					cache_purge(vp);
230 				}
231 				brelse(bp);
232 				np->n_flag &= ~NMODIFIED;
233 				vinvalbuf(vp, TRUE);
234 				np->n_brev = np->n_lrev;
235 				continue;
236 			}
237 		} else if ((np->n_flag & NQNFSNONCACHE) ||
238 		    ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
239 			np->n_direofoffset = 0;
240 			brelse(bp);
241 			np->n_flag &= ~NMODIFIED;
242 			vinvalbuf(vp, TRUE);
243 			np->n_brev = np->n_lrev;
244 			continue;
245 		}
246 	    }
247 	    if (n > 0)
248 		error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
249 	    switch (vp->v_type) {
250 	    case VREG:
251 		if (n+on == biosize || uio->uio_offset == np->n_size)
252 			bp->b_flags |= B_AGE;
253 		break;
254 	    case VLNK:
255 		n = 0;
256 		break;
257 	    case VDIR:
258 		uio->uio_offset = bp->b_blkno;
259 		break;
260 	    };
261 	    brelse(bp);
262 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
263 	return (error);
264 }
265 
266 /*
267  * Vnode op for write using bio
268  */
269 nfs_write (ap)
270 	struct vop_write_args *ap;
271 {
272 	USES_VOP_GETATTR;
273 	register int biosize;
274 	struct proc *p = ap->a_uio->uio_procp;
275 	struct buf *bp;
276 	struct nfsnode *np = VTONFS(ap->a_vp);
277 	struct vattr vattr;
278 	struct nfsmount *nmp;
279 	daddr_t lbn, bn;
280 	int n, on, error = 0;
281 
282 #ifdef DIAGNOSTIC
283 	if (ap->a_uio->uio_rw != UIO_WRITE)
284 		panic("nfs_write mode");
285 	if (ap->a_uio->uio_segflg == UIO_USERSPACE && ap->a_uio->uio_procp != curproc)
286 		panic("nfs_write proc");
287 #endif
288 	if (ap->a_vp->v_type != VREG)
289 		return (EIO);
290 	if (np->n_flag & NWRITEERR) {
291 		np->n_flag &= ~NWRITEERR;
292 		return (np->n_error);
293 	}
294 	if (ap->a_ioflag & (IO_APPEND | IO_SYNC)) {
295 		if (np->n_flag & NMODIFIED) {
296 			np->n_flag &= ~NMODIFIED;
297 			vinvalbuf(ap->a_vp, TRUE);
298 		}
299 		if (ap->a_ioflag & IO_APPEND) {
300 			np->n_attrstamp = 0;
301 			if (error = VOP_GETATTR(ap->a_vp, &vattr, ap->a_cred, p))
302 				return (error);
303 			ap->a_uio->uio_offset = np->n_size;
304 		}
305 	}
306 	nmp = VFSTONFS(ap->a_vp->v_mount);
307 	if (ap->a_uio->uio_offset < 0)
308 		return (EINVAL);
309 	if (ap->a_uio->uio_resid == 0)
310 		return (0);
311 	/*
312 	 * Maybe this should be above the vnode op call, but so long as
313 	 * file servers have no limits, i don't think it matters
314 	 */
315 	if (p && ap->a_uio->uio_offset + ap->a_uio->uio_resid >
316 	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
317 		psignal(p, SIGXFSZ);
318 		return (EFBIG);
319 	}
320 	/*
321 	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
322 	 * will be the same size within a filesystem. nfs_writerpc will
323 	 * still use nm_wsize when sizing the rpc's.
324 	 */
325 	biosize = nmp->nm_rsize;
326 	np->n_flag |= NMODIFIED;
327 	do {
328 
329 		/*
330 		 * Check for a valid write lease.
331 		 * If non-cachable, just do the rpc
332 		 */
333 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
334 		    NQNFS_CKINVALID(ap->a_vp, np, NQL_WRITE)) {
335 			do {
336 				error = nqnfs_getlease(ap->a_vp, NQL_WRITE, ap->a_cred, p);
337 			} while (error == NQNFS_EXPIRED);
338 			if (error)
339 				return (error);
340 			if (QUADNE(np->n_lrev, np->n_brev) ||
341 			    (np->n_flag & NQNFSNONCACHE)) {
342 				vinvalbuf(ap->a_vp, TRUE);
343 				np->n_brev = np->n_lrev;
344 			}
345 		}
346 		if (np->n_flag & NQNFSNONCACHE)
347 			return (nfs_writerpc(ap->a_vp, ap->a_uio, ap->a_cred));
348 		nfsstats.biocache_writes++;
349 		lbn = ap->a_uio->uio_offset / biosize;
350 		on = ap->a_uio->uio_offset & (biosize-1);
351 		n = MIN((unsigned)(biosize - on), ap->a_uio->uio_resid);
352 		if (ap->a_uio->uio_offset + n > np->n_size) {
353 			np->n_size = ap->a_uio->uio_offset + n;
354 			vnode_pager_setsize(ap->a_vp, (u_long)np->n_size);
355 		}
356 		bn = lbn * (biosize / DEV_BSIZE);
357 again:
358 		bp = getblk(ap->a_vp, bn, biosize);
359 		if (bp->b_wcred == NOCRED) {
360 			crhold(ap->a_cred);
361 			bp->b_wcred = ap->a_cred;
362 		}
363 
364 		/*
365 		 * If the new write will leave a contiguous dirty
366 		 * area, just update the b_dirtyoff and b_dirtyend,
367 		 * otherwise force a write rpc of the old dirty area.
368 		 */
369 		if (bp->b_dirtyend > 0 &&
370 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
371 			bp->b_proc = p;
372 			if (error = bwrite(bp))
373 				return (error);
374 			goto again;
375 		}
376 
377 		/*
378 		 * Check for valid write lease and get one as required.
379 		 * In case getblk() and/or bwrite() delayed us.
380 		 */
381 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
382 		    NQNFS_CKINVALID(ap->a_vp, np, NQL_WRITE)) {
383 			do {
384 				error = nqnfs_getlease(ap->a_vp, NQL_WRITE, ap->a_cred, p);
385 			} while (error == NQNFS_EXPIRED);
386 			if (error) {
387 				brelse(bp);
388 				return (error);
389 			}
390 			if (QUADNE(np->n_lrev, np->n_brev) ||
391 			    (np->n_flag & NQNFSNONCACHE)) {
392 				vinvalbuf(ap->a_vp, TRUE);
393 				np->n_brev = np->n_lrev;
394 			}
395 		}
396 		if (error = uiomove(bp->b_un.b_addr + on, n, ap->a_uio)) {
397 			brelse(bp);
398 			return (error);
399 		}
400 		if (bp->b_dirtyend > 0) {
401 			bp->b_dirtyoff = MIN(on, bp->b_dirtyoff);
402 			bp->b_dirtyend = MAX((on+n), bp->b_dirtyend);
403 		} else {
404 			bp->b_dirtyoff = on;
405 			bp->b_dirtyend = on+n;
406 		}
407 		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
408 		    bp->b_validoff > bp->b_dirtyend) {
409 			bp->b_validoff = bp->b_dirtyoff;
410 			bp->b_validend = bp->b_dirtyend;
411 		} else {
412 			bp->b_validoff = MIN(bp->b_validoff, bp->b_dirtyoff);
413 			bp->b_validend = MAX(bp->b_validend, bp->b_dirtyend);
414 		}
415 
416 		/*
417 		 * If the lease is non-cachable or IO_SYNC do bwrite().
418 		 */
419 		if ((np->n_flag & NQNFSNONCACHE) || (ap->a_ioflag & IO_SYNC)) {
420 			bp->b_proc = p;
421 			bwrite(bp);
422 		} else if ((n+on) == biosize &&
423 			 (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
424 			bp->b_flags |= B_AGE;
425 			bp->b_proc = (struct proc *)0;
426 			bawrite(bp);
427 		} else {
428 			bp->b_proc = (struct proc *)0;
429 			bdwrite(bp);
430 		}
431 	} while (error == 0 && ap->a_uio->uio_resid > 0 && n != 0);
432 	return (error);
433 }
434