xref: /dragonfly/sys/vfs/ufs/ufs_readwrite.c (revision 1de703da)
1 /*-
2  * Copyright (c) 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ufs_readwrite.c	8.11 (Berkeley) 5/8/95
34  * $FreeBSD: src/sys/ufs/ufs/ufs_readwrite.c,v 1.65.2.14 2003/04/04 22:21:29 tegge Exp $
35  * $DragonFly: src/sys/vfs/ufs/ufs_readwrite.c,v 1.2 2003/06/17 04:29:00 dillon Exp $
36  */
37 
38 #define	BLKSIZE(a, b, c)	blksize(a, b, c)
39 #define	FS			struct fs
40 #define	I_FS			i_fs
41 #define	READ			ffs_read
42 #define	READ_S			"ffs_read"
43 #define	WRITE			ffs_write
44 #define	WRITE_S			"ffs_write"
45 
46 #include <vm/vm.h>
47 #include <vm/vm_object.h>
48 #include <vm/vm_pager.h>
49 #include <vm/vm_map.h>
50 #include <vm/vnode_pager.h>
51 #include <sys/event.h>
52 #include <sys/vmmeter.h>
53 #include "opt_directio.h"
54 
55 #define VN_KNOTE(vp, b) \
56 	KNOTE((struct klist *)&vp->v_pollinfo.vpi_selinfo.si_note, (b))
57 
58 #ifdef DIRECTIO
59 extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
60 #endif
61 
62 /*
63  * Vnode op for reading.
64  */
65 /* ARGSUSED */
66 int
67 READ(ap)
68 	struct vop_read_args /* {
69 		struct vnode *a_vp;
70 		struct uio *a_uio;
71 		int a_ioflag;
72 		struct ucred *a_cred;
73 	} */ *ap;
74 {
75 	register struct vnode *vp;
76 	register struct inode *ip;
77 	register struct uio *uio;
78 	register FS *fs;
79 	struct buf *bp;
80 	ufs_daddr_t lbn, nextlbn;
81 	off_t bytesinfile;
82 	long size, xfersize, blkoffset;
83 	int error, orig_resid;
84 	u_short mode;
85 	int seqcount;
86 	int ioflag;
87 	vm_object_t object;
88 
89 	vp = ap->a_vp;
90 	seqcount = ap->a_ioflag >> 16;
91 	ip = VTOI(vp);
92 	mode = ip->i_mode;
93 	uio = ap->a_uio;
94 	ioflag = ap->a_ioflag;
95 #ifdef DIRECTIO
96 	if ((ioflag & IO_DIRECT) != 0) {
97 		int workdone;
98 
99 		error = ffs_rawread(vp, uio, &workdone);
100 		if (error || workdone)
101 			return error;
102 	}
103 #endif
104 
105 #ifdef DIAGNOSTIC
106 	if (uio->uio_rw != UIO_READ)
107 		panic("%s: mode", READ_S);
108 
109 	if (vp->v_type == VLNK) {
110 		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
111 			panic("%s: short symlink", READ_S);
112 	} else if (vp->v_type != VREG && vp->v_type != VDIR)
113 		panic("%s: type %d", READ_S, vp->v_type);
114 #endif
115 	fs = ip->I_FS;
116 	if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize)
117 		return (EFBIG);
118 
119 	orig_resid = uio->uio_resid;
120 	if (orig_resid <= 0)
121 		return (0);
122 
123 	object = vp->v_object;
124 
125 	bytesinfile = ip->i_size - uio->uio_offset;
126 	if (bytesinfile <= 0) {
127 		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
128 			ip->i_flag |= IN_ACCESS;
129 		return 0;
130 	}
131 
132 	if (object)
133 		vm_object_reference(object);
134 
135 #ifdef ENABLE_VFS_IOOPT
136 	/*
137 	 * If IO optimisation is turned on,
138 	 * and we are NOT a VM based IO request,
139 	 * (i.e. not headed for the buffer cache)
140 	 * but there IS a vm object associated with it.
141 	 */
142 	if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) {
143 		int nread, toread;
144 
145 		toread = uio->uio_resid;
146 		if (toread > bytesinfile)
147 			toread = bytesinfile;
148 		if (toread >= PAGE_SIZE) {
149 			/*
150 			 * Then if it's at least a page in size, try
151 			 * get the data from the object using vm tricks
152 			 */
153 			error = uioread(toread, uio, object, &nread);
154 			if ((uio->uio_resid == 0) || (error != 0)) {
155 				/*
156 				 * If we finished or there was an error
157 				 * then finish up (the reference previously
158 				 * obtained on object must be released).
159 				 */
160 				if ((error == 0 ||
161 				    uio->uio_resid != orig_resid) &&
162 				    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
163 					ip->i_flag |= IN_ACCESS;
164 
165 				if (object)
166 					vm_object_vndeallocate(object);
167 				return error;
168 			}
169 		}
170 	}
171 #endif
172 
173 	/*
174 	 * Ok so we couldn't do it all in one vm trick...
175 	 * so cycle around trying smaller bites..
176 	 */
177 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
178 		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
179 			break;
180 #ifdef ENABLE_VFS_IOOPT
181 		if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) {
182 			/*
183 			 * Obviously we didn't finish above, but we
184 			 * didn't get an error either. Try the same trick again.
185 			 * but this time we are looping.
186 			 */
187 			int nread, toread;
188 			toread = uio->uio_resid;
189 			if (toread > bytesinfile)
190 				toread = bytesinfile;
191 
192 			/*
193 			 * Once again, if there isn't enough for a
194 			 * whole page, don't try optimising.
195 			 */
196 			if (toread >= PAGE_SIZE) {
197 				error = uioread(toread, uio, object, &nread);
198 				if ((uio->uio_resid == 0) || (error != 0)) {
199 					/*
200 					 * If we finished or there was an
201 					 * error then finish up (the reference
202 					 * previously obtained on object must
203 					 * be released).
204 					 */
205 					if ((error == 0 ||
206 					    uio->uio_resid != orig_resid) &&
207 					    (vp->v_mount->mnt_flag &
208 					    MNT_NOATIME) == 0)
209 						ip->i_flag |= IN_ACCESS;
210 					if (object)
211 						vm_object_vndeallocate(object);
212 					return error;
213 				}
214 				/*
215 				 * To get here we didnt't finish or err.
216 				 * If we did get some data,
217 				 * loop to try another bite.
218 				 */
219 				if (nread > 0) {
220 					continue;
221 				}
222 			}
223 		}
224 #endif
225 
226 		lbn = lblkno(fs, uio->uio_offset);
227 		nextlbn = lbn + 1;
228 
229 		/*
230 		 * size of buffer.  The buffer representing the
231 		 * end of the file is rounded up to the size of
232 		 * the block type ( fragment or full block,
233 		 * depending ).
234 		 */
235 		size = BLKSIZE(fs, ip, lbn);
236 		blkoffset = blkoff(fs, uio->uio_offset);
237 
238 		/*
239 		 * The amount we want to transfer in this iteration is
240 		 * one FS block less the amount of the data before
241 		 * our startpoint (duh!)
242 		 */
243 		xfersize = fs->fs_bsize - blkoffset;
244 
245 		/*
246 		 * But if we actually want less than the block,
247 		 * or the file doesn't have a whole block more of data,
248 		 * then use the lesser number.
249 		 */
250 		if (uio->uio_resid < xfersize)
251 			xfersize = uio->uio_resid;
252 		if (bytesinfile < xfersize)
253 			xfersize = bytesinfile;
254 
255 		if (lblktosize(fs, nextlbn) >= ip->i_size) {
256 			/*
257 			 * Don't do readahead if this is the end of the file.
258 			 */
259 			error = bread(vp, lbn, size, NOCRED, &bp);
260 		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
261 			/*
262 			 * Otherwise if we are allowed to cluster,
263 			 * grab as much as we can.
264 			 *
265 			 * XXX  This may not be a win if we are not
266 			 * doing sequential access.
267 			 */
268 			error = cluster_read(vp, ip->i_size, lbn,
269 				size, NOCRED, uio->uio_resid, seqcount, &bp);
270 		} else if (seqcount > 1) {
271 			/*
272 			 * If we are NOT allowed to cluster, then
273 			 * if we appear to be acting sequentially,
274 			 * fire off a request for a readahead
275 			 * as well as a read. Note that the 4th and 5th
276 			 * arguments point to arrays of the size specified in
277 			 * the 6th argument.
278 			 */
279 			int nextsize = BLKSIZE(fs, ip, nextlbn);
280 			error = breadn(vp, lbn,
281 			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
282 		} else {
283 			/*
284 			 * Failing all of the above, just read what the
285 			 * user asked for. Interestingly, the same as
286 			 * the first option above.
287 			 */
288 			error = bread(vp, lbn, size, NOCRED, &bp);
289 		}
290 		if (error) {
291 			brelse(bp);
292 			bp = NULL;
293 			break;
294 		}
295 
296 		/*
297 		 * If IO_DIRECT then set B_DIRECT for the buffer.  This
298 		 * will cause us to attempt to release the buffer later on
299 		 * and will cause the buffer cache to attempt to free the
300 		 * underlying pages.
301 		 */
302 		if (ioflag & IO_DIRECT)
303 			bp->b_flags |= B_DIRECT;
304 
305 		/*
306 		 * We should only get non-zero b_resid when an I/O error
307 		 * has occurred, which should cause us to break above.
308 		 * However, if the short read did not cause an error,
309 		 * then we want to ensure that we do not uiomove bad
310 		 * or uninitialized data.
311 		 *
312 		 * XXX b_resid is only valid when an actual I/O has occured
313 		 * and may be incorrect if the buffer is B_CACHE or if the
314 		 * last op on the buffer was a failed write.  This KASSERT
315 		 * is a precursor to removing it from the UFS code.
316 		 */
317 		KASSERT(bp->b_resid == 0, ("bp->b_resid != 0"));
318 		size -= bp->b_resid;
319 		if (size < xfersize) {
320 			if (size == 0)
321 				break;
322 			xfersize = size;
323 		}
324 
325 #ifdef ENABLE_VFS_IOOPT
326 		if (vfs_ioopt && object &&
327 		    (bp->b_flags & B_VMIO) &&
328 		    ((blkoffset & PAGE_MASK) == 0) &&
329 		    ((xfersize & PAGE_MASK) == 0)) {
330 			/*
331 			 * If VFS IO  optimisation is turned on,
332 			 * and it's an exact page multiple
333 			 * And a normal VM based op,
334 			 * then use uiomiveco()
335 			 */
336 			error =
337 				uiomoveco((char *)bp->b_data + blkoffset,
338 					(int)xfersize, uio, object);
339 		} else
340 #endif
341 		{
342 			/*
343 			 * otherwise use the general form
344 			 */
345 			error =
346 				uiomove((char *)bp->b_data + blkoffset,
347 					(int)xfersize, uio);
348 		}
349 
350 		if (error)
351 			break;
352 
353 		if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
354 		    (LIST_FIRST(&bp->b_dep) == NULL)) {
355 			/*
356 			 * If there are no dependencies, and it's VMIO,
357 			 * then we don't need the buf, mark it available
358 			 * for freeing. The VM has the data.
359 			 */
360 			bp->b_flags |= B_RELBUF;
361 			brelse(bp);
362 		} else {
363 			/*
364 			 * Otherwise let whoever
365 			 * made the request take care of
366 			 * freeing it. We just queue
367 			 * it onto another list.
368 			 */
369 			bqrelse(bp);
370 		}
371 	}
372 
373 	/*
374 	 * This can only happen in the case of an error
375 	 * because the loop above resets bp to NULL on each iteration
376 	 * and on normal completion has not set a new value into it.
377 	 * so it must have come from a 'break' statement
378 	 */
379 	if (bp != NULL) {
380 		if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
381 		    (LIST_FIRST(&bp->b_dep) == NULL)) {
382 			bp->b_flags |= B_RELBUF;
383 			brelse(bp);
384 		} else {
385 			bqrelse(bp);
386 		}
387 	}
388 
389 	if (object)
390 		vm_object_vndeallocate(object);
391 	if ((error == 0 || uio->uio_resid != orig_resid) &&
392 	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
393 		ip->i_flag |= IN_ACCESS;
394 	return (error);
395 }
396 
397 /*
398  * Vnode op for writing.
399  */
400 int
401 WRITE(ap)
402 	struct vop_write_args /* {
403 		struct vnode *a_vp;
404 		struct uio *a_uio;
405 		int a_ioflag;
406 		struct ucred *a_cred;
407 	} */ *ap;
408 {
409 	register struct vnode *vp;
410 	register struct uio *uio;
411 	register struct inode *ip;
412 	register FS *fs;
413 	struct buf *bp;
414 	struct proc *p;
415 	ufs_daddr_t lbn;
416 	off_t osize;
417 	int seqcount;
418 	int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
419 	vm_object_t object;
420 
421 	extended = 0;
422 	seqcount = ap->a_ioflag >> 16;
423 	ioflag = ap->a_ioflag;
424 	uio = ap->a_uio;
425 	vp = ap->a_vp;
426 	ip = VTOI(vp);
427 
428 	object = vp->v_object;
429 	if (object)
430 		vm_object_reference(object);
431 
432 #ifdef DIAGNOSTIC
433 	if (uio->uio_rw != UIO_WRITE)
434 		panic("%s: mode", WRITE_S);
435 #endif
436 
437 	switch (vp->v_type) {
438 	case VREG:
439 		if (ioflag & IO_APPEND)
440 			uio->uio_offset = ip->i_size;
441 		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) {
442 			if (object)
443 				vm_object_vndeallocate(object);
444 			return (EPERM);
445 		}
446 		/* FALLTHROUGH */
447 	case VLNK:
448 		break;
449 	case VDIR:
450 		panic("%s: dir write", WRITE_S);
451 		break;
452 	default:
453 		panic("%s: type %p %d (%d,%d)", WRITE_S, vp, (int)vp->v_type,
454 			(int)uio->uio_offset,
455 			(int)uio->uio_resid
456 		);
457 	}
458 
459 	fs = ip->I_FS;
460 	if (uio->uio_offset < 0 ||
461 	    (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) {
462 		if (object)
463 			vm_object_vndeallocate(object);
464 		return (EFBIG);
465 	}
466 	/*
467 	 * Maybe this should be above the vnode op call, but so long as
468 	 * file servers have no limits, I don't think it matters.
469 	 */
470 	p = uio->uio_procp;
471 	if (vp->v_type == VREG && p &&
472 	    uio->uio_offset + uio->uio_resid >
473 	    p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
474 		psignal(p, SIGXFSZ);
475 		if (object)
476 			vm_object_vndeallocate(object);
477 		return (EFBIG);
478 	}
479 
480 	resid = uio->uio_resid;
481 	osize = ip->i_size;
482 
483 	/*
484 	 * NOTE! These B_ flags are actually balloc-only flags, not buffer
485 	 * flags.  They are similar to the BA_ flags in -current.
486 	 */
487 	if (seqcount > B_SEQMAX)
488 		flags = B_SEQMAX << B_SEQSHIFT;
489 	else
490 		flags = seqcount << B_SEQSHIFT;
491 	if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
492 		flags |= B_SYNC;
493 
494 	if (object && (object->flags & OBJ_OPT)) {
495 		vm_freeze_copyopts(object,
496 			OFF_TO_IDX(uio->uio_offset),
497 			OFF_TO_IDX(uio->uio_offset + uio->uio_resid + PAGE_MASK));
498 	}
499 
500 	for (error = 0; uio->uio_resid > 0;) {
501 		lbn = lblkno(fs, uio->uio_offset);
502 		blkoffset = blkoff(fs, uio->uio_offset);
503 		xfersize = fs->fs_bsize - blkoffset;
504 		if (uio->uio_resid < xfersize)
505 			xfersize = uio->uio_resid;
506 
507 		if (uio->uio_offset + xfersize > ip->i_size)
508 			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
509 
510 		/*
511 		 * We must perform a read-before-write if the transfer
512 		 * size does not cover the entire buffer.
513 		 */
514 		if (fs->fs_bsize > xfersize)
515 			flags |= B_CLRBUF;
516 		else
517 			flags &= ~B_CLRBUF;
518 /* XXX is uio->uio_offset the right thing here? */
519 		error = VOP_BALLOC(vp, uio->uio_offset, xfersize,
520 		    ap->a_cred, flags, &bp);
521 		if (error != 0)
522 			break;
523 		/*
524 		 * If the buffer is not valid and we did not clear garbage
525 		 * out above, we have to do so here even though the write
526 		 * covers the entire buffer in order to avoid a mmap()/write
527 		 * race where another process may see the garbage prior to
528 		 * the uiomove() for a write replacing it.
529 		 */
530 		if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
531 			vfs_bio_clrbuf(bp);
532 		if (ioflag & IO_DIRECT)
533 			bp->b_flags |= B_DIRECT;
534 		if (ioflag & IO_NOWDRAIN)
535 			bp->b_flags |= B_NOWDRAIN;
536 
537 		if (uio->uio_offset + xfersize > ip->i_size) {
538 			ip->i_size = uio->uio_offset + xfersize;
539 			extended = 1;
540 		}
541 
542 		size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
543 		if (size < xfersize)
544 			xfersize = size;
545 
546 		error =
547 		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
548 		if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
549 		    (LIST_FIRST(&bp->b_dep) == NULL)) {
550 			bp->b_flags |= B_RELBUF;
551 		}
552 
553 		/*
554 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
555 		 * if we have a severe page deficiency write the buffer
556 		 * asynchronously.  Otherwise try to cluster, and if that
557 		 * doesn't do it then either do an async write (if O_DIRECT),
558 		 * or a delayed write (if not).
559 		 */
560 
561 		if (ioflag & IO_SYNC) {
562 			(void)bwrite(bp);
563 		} else if (vm_page_count_severe() ||
564 			    buf_dirty_count_severe() ||
565 			    (ioflag & IO_ASYNC)) {
566 			bp->b_flags |= B_CLUSTEROK;
567 			bawrite(bp);
568 		} else if (xfersize + blkoffset == fs->fs_bsize) {
569 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
570 				bp->b_flags |= B_CLUSTEROK;
571 				cluster_write(bp, ip->i_size, seqcount);
572 			} else {
573 				bawrite(bp);
574 			}
575 		} else if (ioflag & IO_DIRECT) {
576 			bp->b_flags |= B_CLUSTEROK;
577 			bawrite(bp);
578 		} else {
579 			bp->b_flags |= B_CLUSTEROK;
580 			bdwrite(bp);
581 		}
582 		if (error || xfersize == 0)
583 			break;
584 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
585 	}
586 	/*
587 	 * If we successfully wrote any data, and we are not the superuser
588 	 * we clear the setuid and setgid bits as a precaution against
589 	 * tampering.
590 	 */
591 	if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
592 		ip->i_mode &= ~(ISUID | ISGID);
593 	if (resid > uio->uio_resid)
594 		VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
595 	if (error) {
596 		if (ioflag & IO_UNIT) {
597 			(void)UFS_TRUNCATE(vp, osize,
598 			    ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
599 			uio->uio_offset -= resid - uio->uio_resid;
600 			uio->uio_resid = resid;
601 		}
602 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
603 		error = UFS_UPDATE(vp, 1);
604 
605 	if (object)
606 		vm_object_vndeallocate(object);
607 
608 	return (error);
609 }
610 
611 
612 /*
613  * get page routine
614  */
615 int
616 ffs_getpages(ap)
617 	struct vop_getpages_args *ap;
618 {
619 	off_t foff, physoffset;
620 	int i, size, bsize;
621 	struct vnode *dp, *vp;
622 	vm_object_t obj;
623 	vm_pindex_t pindex, firstindex;
624 	vm_page_t mreq;
625 	int bbackwards, bforwards;
626 	int pbackwards, pforwards;
627 	int firstpage;
628 	int reqlblkno;
629 	daddr_t reqblkno;
630 	int poff;
631 	int pcount;
632 	int rtval;
633 	int pagesperblock;
634 
635 
636 	pcount = round_page(ap->a_count) / PAGE_SIZE;
637 	mreq = ap->a_m[ap->a_reqpage];
638 	firstindex = ap->a_m[0]->pindex;
639 
640 	/*
641 	 * if ANY DEV_BSIZE blocks are valid on a large filesystem block,
642 	 * then the entire page is valid.  Since the page may be mapped,
643 	 * user programs might reference data beyond the actual end of file
644 	 * occuring within the page.  We have to zero that data.
645 	 */
646 	if (mreq->valid) {
647 		if (mreq->valid != VM_PAGE_BITS_ALL)
648 			vm_page_zero_invalid(mreq, TRUE);
649 		for (i = 0; i < pcount; i++) {
650 			if (i != ap->a_reqpage) {
651 				vm_page_free(ap->a_m[i]);
652 			}
653 		}
654 		return VM_PAGER_OK;
655 	}
656 
657 	vp = ap->a_vp;
658 	obj = vp->v_object;
659 	bsize = vp->v_mount->mnt_stat.f_iosize;
660 	pindex = mreq->pindex;
661 	foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero */;
662 
663 	if (bsize < PAGE_SIZE)
664 		return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
665 						    ap->a_count,
666 						    ap->a_reqpage);
667 
668 	/*
669 	 * foff is the file offset of the required page
670 	 * reqlblkno is the logical block that contains the page
671 	 * poff is the index of the page into the logical block
672 	 */
673 	reqlblkno = foff / bsize;
674 	poff = (foff % bsize) / PAGE_SIZE;
675 
676 	if ( VOP_BMAP( vp, reqlblkno, &dp, &reqblkno,
677 		&bforwards, &bbackwards) || (reqblkno == -1)) {
678 		for(i = 0; i < pcount; i++) {
679 			if (i != ap->a_reqpage)
680 				vm_page_free(ap->a_m[i]);
681 		}
682 		if (reqblkno == -1) {
683 			if ((mreq->flags & PG_ZERO) == 0)
684 				vm_page_zero_fill(mreq);
685 			vm_page_undirty(mreq);
686 			mreq->valid = VM_PAGE_BITS_ALL;
687 			return VM_PAGER_OK;
688 		} else {
689 			return VM_PAGER_ERROR;
690 		}
691 	}
692 
693 	physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE;
694 	pagesperblock = bsize / PAGE_SIZE;
695 	/*
696 	 * find the first page that is contiguous...
697 	 * note that pbackwards is the number of pages that are contiguous
698 	 * backwards.
699 	 */
700 	firstpage = 0;
701 	if (ap->a_count) {
702 		pbackwards = poff + bbackwards * pagesperblock;
703 		if (ap->a_reqpage > pbackwards) {
704 			firstpage = ap->a_reqpage - pbackwards;
705 			for(i=0;i<firstpage;i++)
706 				vm_page_free(ap->a_m[i]);
707 		}
708 
709 	/*
710 	 * pforwards is the number of pages that are contiguous
711 	 * after the current page.
712 	 */
713 		pforwards = (pagesperblock - (poff + 1)) +
714 			bforwards * pagesperblock;
715 		if (pforwards < (pcount - (ap->a_reqpage + 1))) {
716 			for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++)
717 				vm_page_free(ap->a_m[i]);
718 			pcount = ap->a_reqpage + pforwards + 1;
719 		}
720 
721 	/*
722 	 * number of pages for I/O corrected for the non-contig pages at
723 	 * the beginning of the array.
724 	 */
725 		pcount -= firstpage;
726 	}
727 
728 	/*
729 	 * calculate the size of the transfer
730 	 */
731 
732 	size = pcount * PAGE_SIZE;
733 
734 	if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) >
735 		obj->un_pager.vnp.vnp_size)
736 		size = obj->un_pager.vnp.vnp_size -
737 			IDX_TO_OFF(ap->a_m[firstpage]->pindex);
738 
739 	physoffset -= foff;
740 	rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size,
741 		(ap->a_reqpage - firstpage), physoffset);
742 
743 	return (rtval);
744 }
745 
746 /*
747  * put page routine
748  *
749  * XXX By default, wimp out... note that a_offset is ignored (and always
750  * XXX has been).
751  */
752 int
753 ffs_putpages(ap)
754 	struct vop_putpages_args *ap;
755 {
756 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
757 		ap->a_sync, ap->a_rtvals);
758 }
759