xref: /dragonfly/sys/vfs/ufs/ufs_readwrite.c (revision 9dbf638f)
1 /*-
2  * Copyright (c) 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ufs_readwrite.c	8.11 (Berkeley) 5/8/95
34  * $FreeBSD: src/sys/ufs/ufs/ufs_readwrite.c,v 1.65.2.14 2003/04/04 22:21:29 tegge Exp $
35  * $DragonFly: src/sys/vfs/ufs/ufs_readwrite.c,v 1.9 2003/07/26 22:04:27 rob Exp $
36  */
37 
38 #define	BLKSIZE(a, b, c)	blksize(a, b, c)
39 #define	FS			struct fs
40 #define	I_FS			i_fs
41 
42 #include <vm/vm.h>
43 #include <vm/vm_object.h>
44 #include <vm/vm_pager.h>
45 #include <vm/vm_map.h>
46 #include <vm/vnode_pager.h>
47 #include <sys/event.h>
48 #include <sys/vmmeter.h>
49 #include <vm/vm_page2.h>
50 
51 #include "opt_directio.h"
52 
53 #define VN_KNOTE(vp, b) \
54 	KNOTE((struct klist *)&vp->v_pollinfo.vpi_selinfo.si_note, (b))
55 
56 #ifdef DIRECTIO
57 extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
58 #endif
59 
60 /*
61  * Vnode op for reading.
62  */
63 /* ARGSUSED */
64 int
65 ffs_read(ap)
66 	struct vop_read_args /* {
67 		struct vnode *a_vp;
68 		struct uio *a_uio;
69 		int a_ioflag;
70 		struct ucred *a_cred;
71 	} */ *ap;
72 {
73 	struct vnode *vp;
74 	struct inode *ip;
75 	struct uio *uio;
76 	FS *fs;
77 	struct buf *bp;
78 	ufs_daddr_t lbn, nextlbn;
79 	off_t bytesinfile;
80 	long size, xfersize, blkoffset;
81 	int error, orig_resid;
82 	u_short mode;
83 	int seqcount;
84 	int ioflag;
85 	vm_object_t object;
86 
87 	vp = ap->a_vp;
88 	seqcount = ap->a_ioflag >> 16;
89 	ip = VTOI(vp);
90 	mode = ip->i_mode;
91 	uio = ap->a_uio;
92 	ioflag = ap->a_ioflag;
93 #ifdef DIRECTIO
94 	if ((ioflag & IO_DIRECT) != 0) {
95 		int workdone;
96 
97 		error = ffs_rawread(vp, uio, &workdone);
98 		if (error || workdone)
99 			return error;
100 	}
101 #endif
102 
103 #ifdef DIAGNOSTIC
104 	if (uio->uio_rw != UIO_READ)
105 		panic("ffs_read: mode");
106 
107 	if (vp->v_type == VLNK) {
108 		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
109 			panic("ffs_read: short symlink");
110 	} else if (vp->v_type != VREG && vp->v_type != VDIR)
111 		panic("ffs_read: type %d", vp->v_type);
112 #endif
113 	fs = ip->I_FS;
114 	if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize)
115 		return (EFBIG);
116 
117 	orig_resid = uio->uio_resid;
118 	if (orig_resid <= 0)
119 		return (0);
120 
121 	object = vp->v_object;
122 
123 	bytesinfile = ip->i_size - uio->uio_offset;
124 	if (bytesinfile <= 0) {
125 		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
126 			ip->i_flag |= IN_ACCESS;
127 		return 0;
128 	}
129 
130 	if (object)
131 		vm_object_reference(object);
132 
133 #ifdef ENABLE_VFS_IOOPT
134 	/*
135 	 * If IO optimisation is turned on,
136 	 * and we are NOT a VM based IO request,
137 	 * (i.e. not headed for the buffer cache)
138 	 * but there IS a vm object associated with it.
139 	 */
140 	if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) {
141 		int nread, toread;
142 
143 		toread = uio->uio_resid;
144 		if (toread > bytesinfile)
145 			toread = bytesinfile;
146 		if (toread >= PAGE_SIZE) {
147 			/*
148 			 * Then if it's at least a page in size, try
149 			 * get the data from the object using vm tricks
150 			 */
151 			error = uioread(toread, uio, object, &nread);
152 			if ((uio->uio_resid == 0) || (error != 0)) {
153 				/*
154 				 * If we finished or there was an error
155 				 * then finish up (the reference previously
156 				 * obtained on object must be released).
157 				 */
158 				if ((error == 0 ||
159 				    uio->uio_resid != orig_resid) &&
160 				    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
161 					ip->i_flag |= IN_ACCESS;
162 
163 				if (object)
164 					vm_object_vndeallocate(object);
165 				return error;
166 			}
167 		}
168 	}
169 #endif
170 
171 	/*
172 	 * Ok so we couldn't do it all in one vm trick...
173 	 * so cycle around trying smaller bites..
174 	 */
175 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
176 		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
177 			break;
178 #ifdef ENABLE_VFS_IOOPT
179 		if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) {
180 			/*
181 			 * Obviously we didn't finish above, but we
182 			 * didn't get an error either. Try the same trick again.
183 			 * but this time we are looping.
184 			 */
185 			int nread, toread;
186 			toread = uio->uio_resid;
187 			if (toread > bytesinfile)
188 				toread = bytesinfile;
189 
190 			/*
191 			 * Once again, if there isn't enough for a
192 			 * whole page, don't try optimising.
193 			 */
194 			if (toread >= PAGE_SIZE) {
195 				error = uioread(toread, uio, object, &nread);
196 				if ((uio->uio_resid == 0) || (error != 0)) {
197 					/*
198 					 * If we finished or there was an
199 					 * error then finish up (the reference
200 					 * previously obtained on object must
201 					 * be released).
202 					 */
203 					if ((error == 0 ||
204 					    uio->uio_resid != orig_resid) &&
205 					    (vp->v_mount->mnt_flag &
206 					    MNT_NOATIME) == 0)
207 						ip->i_flag |= IN_ACCESS;
208 					if (object)
209 						vm_object_vndeallocate(object);
210 					return error;
211 				}
212 				/*
213 				 * To get here we didnt't finish or err.
214 				 * If we did get some data,
215 				 * loop to try another bite.
216 				 */
217 				if (nread > 0) {
218 					continue;
219 				}
220 			}
221 		}
222 #endif
223 
224 		lbn = lblkno(fs, uio->uio_offset);
225 		nextlbn = lbn + 1;
226 
227 		/*
228 		 * size of buffer.  The buffer representing the
229 		 * end of the file is rounded up to the size of
230 		 * the block type ( fragment or full block,
231 		 * depending ).
232 		 */
233 		size = BLKSIZE(fs, ip, lbn);
234 		blkoffset = blkoff(fs, uio->uio_offset);
235 
236 		/*
237 		 * The amount we want to transfer in this iteration is
238 		 * one FS block less the amount of the data before
239 		 * our startpoint (duh!)
240 		 */
241 		xfersize = fs->fs_bsize - blkoffset;
242 
243 		/*
244 		 * But if we actually want less than the block,
245 		 * or the file doesn't have a whole block more of data,
246 		 * then use the lesser number.
247 		 */
248 		if (uio->uio_resid < xfersize)
249 			xfersize = uio->uio_resid;
250 		if (bytesinfile < xfersize)
251 			xfersize = bytesinfile;
252 
253 		if (lblktosize(fs, nextlbn) >= ip->i_size) {
254 			/*
255 			 * Don't do readahead if this is the end of the file.
256 			 */
257 			error = bread(vp, lbn, size, &bp);
258 		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
259 			/*
260 			 * Otherwise if we are allowed to cluster,
261 			 * grab as much as we can.
262 			 *
263 			 * XXX  This may not be a win if we are not
264 			 * doing sequential access.
265 			 */
266 			error = cluster_read(vp, ip->i_size, lbn,
267 				size, uio->uio_resid, seqcount, &bp);
268 		} else if (seqcount > 1) {
269 			/*
270 			 * If we are NOT allowed to cluster, then
271 			 * if we appear to be acting sequentially,
272 			 * fire off a request for a readahead
273 			 * as well as a read. Note that the 4th and 5th
274 			 * arguments point to arrays of the size specified in
275 			 * the 6th argument.
276 			 */
277 			int nextsize = BLKSIZE(fs, ip, nextlbn);
278 			error = breadn(vp, lbn,
279 			    size, &nextlbn, &nextsize, 1, &bp);
280 		} else {
281 			/*
282 			 * Failing all of the above, just read what the
283 			 * user asked for. Interestingly, the same as
284 			 * the first option above.
285 			 */
286 			error = bread(vp, lbn, size, &bp);
287 		}
288 		if (error) {
289 			brelse(bp);
290 			bp = NULL;
291 			break;
292 		}
293 
294 		/*
295 		 * If IO_DIRECT then set B_DIRECT for the buffer.  This
296 		 * will cause us to attempt to release the buffer later on
297 		 * and will cause the buffer cache to attempt to free the
298 		 * underlying pages.
299 		 */
300 		if (ioflag & IO_DIRECT)
301 			bp->b_flags |= B_DIRECT;
302 
303 		/*
304 		 * We should only get non-zero b_resid when an I/O error
305 		 * has occurred, which should cause us to break above.
306 		 * However, if the short read did not cause an error,
307 		 * then we want to ensure that we do not uiomove bad
308 		 * or uninitialized data.
309 		 *
310 		 * XXX b_resid is only valid when an actual I/O has occured
311 		 * and may be incorrect if the buffer is B_CACHE or if the
312 		 * last op on the buffer was a failed write.  This KASSERT
313 		 * is a precursor to removing it from the UFS code.
314 		 */
315 		KASSERT(bp->b_resid == 0, ("bp->b_resid != 0"));
316 		size -= bp->b_resid;
317 		if (size < xfersize) {
318 			if (size == 0)
319 				break;
320 			xfersize = size;
321 		}
322 
323 #ifdef ENABLE_VFS_IOOPT
324 		if (vfs_ioopt && object &&
325 		    (bp->b_flags & B_VMIO) &&
326 		    ((blkoffset & PAGE_MASK) == 0) &&
327 		    ((xfersize & PAGE_MASK) == 0)) {
328 			/*
329 			 * If VFS IO  optimisation is turned on,
330 			 * and it's an exact page multiple
331 			 * And a normal VM based op,
332 			 * then use uiomiveco()
333 			 */
334 			error =
335 				uiomoveco((char *)bp->b_data + blkoffset,
336 					(int)xfersize, uio, object);
337 		} else
338 #endif
339 		{
340 			/*
341 			 * otherwise use the general form
342 			 */
343 			error =
344 				uiomove((char *)bp->b_data + blkoffset,
345 					(int)xfersize, uio);
346 		}
347 
348 		if (error)
349 			break;
350 
351 		if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
352 		    (LIST_FIRST(&bp->b_dep) == NULL)) {
353 			/*
354 			 * If there are no dependencies, and it's VMIO,
355 			 * then we don't need the buf, mark it available
356 			 * for freeing. The VM has the data.
357 			 */
358 			bp->b_flags |= B_RELBUF;
359 			brelse(bp);
360 		} else {
361 			/*
362 			 * Otherwise let whoever
363 			 * made the request take care of
364 			 * freeing it. We just queue
365 			 * it onto another list.
366 			 */
367 			bqrelse(bp);
368 		}
369 	}
370 
371 	/*
372 	 * This can only happen in the case of an error
373 	 * because the loop above resets bp to NULL on each iteration
374 	 * and on normal completion has not set a new value into it.
375 	 * so it must have come from a 'break' statement
376 	 */
377 	if (bp != NULL) {
378 		if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
379 		    (LIST_FIRST(&bp->b_dep) == NULL)) {
380 			bp->b_flags |= B_RELBUF;
381 			brelse(bp);
382 		} else {
383 			bqrelse(bp);
384 		}
385 	}
386 
387 	if (object)
388 		vm_object_vndeallocate(object);
389 	if ((error == 0 || uio->uio_resid != orig_resid) &&
390 	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
391 		ip->i_flag |= IN_ACCESS;
392 	return (error);
393 }
394 
395 /*
396  * Vnode op for writing.
397  */
398 int
399 ffs_write(ap)
400 	struct vop_write_args /* {
401 		struct vnode *a_vp;
402 		struct uio *a_uio;
403 		int a_ioflag;
404 		struct ucred *a_cred;
405 	} */ *ap;
406 {
407 	struct vnode *vp;
408 	struct uio *uio;
409 	struct inode *ip;
410 	FS *fs;
411 	struct buf *bp;
412 	ufs_daddr_t lbn;
413 	off_t osize;
414 	int seqcount;
415 	int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
416 	vm_object_t object;
417 	struct thread *td;
418 
419 	extended = 0;
420 	seqcount = ap->a_ioflag >> 16;
421 	ioflag = ap->a_ioflag;
422 	uio = ap->a_uio;
423 	vp = ap->a_vp;
424 	ip = VTOI(vp);
425 
426 	object = vp->v_object;
427 	if (object)
428 		vm_object_reference(object);
429 
430 #ifdef DIAGNOSTIC
431 	if (uio->uio_rw != UIO_WRITE)
432 		panic("ffs_write: mode");
433 #endif
434 
435 	switch (vp->v_type) {
436 	case VREG:
437 		if (ioflag & IO_APPEND)
438 			uio->uio_offset = ip->i_size;
439 		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) {
440 			if (object)
441 				vm_object_vndeallocate(object);
442 			return (EPERM);
443 		}
444 		/* FALLTHROUGH */
445 	case VLNK:
446 		break;
447 	case VDIR:
448 		panic("ffs_write: dir write");
449 		break;
450 	default:
451 		panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type,
452 			(int)uio->uio_offset,
453 			(int)uio->uio_resid
454 		);
455 	}
456 
457 	fs = ip->I_FS;
458 	if (uio->uio_offset < 0 ||
459 	    (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) {
460 		if (object)
461 			vm_object_vndeallocate(object);
462 		return (EFBIG);
463 	}
464 	/*
465 	 * Maybe this should be above the vnode op call, but so long as
466 	 * file servers have no limits, I don't think it matters.
467 	 */
468 	td = uio->uio_td;
469 	if (vp->v_type == VREG && td && td->td_proc &&
470 	    uio->uio_offset + uio->uio_resid >
471 	    td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
472 		psignal(td->td_proc, SIGXFSZ);
473 		if (object)
474 			vm_object_vndeallocate(object);
475 		return (EFBIG);
476 	}
477 
478 	resid = uio->uio_resid;
479 	osize = ip->i_size;
480 
481 	/*
482 	 * NOTE! These B_ flags are actually balloc-only flags, not buffer
483 	 * flags.  They are similar to the BA_ flags in -current.
484 	 */
485 	if (seqcount > B_SEQMAX)
486 		flags = B_SEQMAX << B_SEQSHIFT;
487 	else
488 		flags = seqcount << B_SEQSHIFT;
489 	if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
490 		flags |= B_SYNC;
491 
492 	if (object && (object->flags & OBJ_OPT)) {
493 		vm_freeze_copyopts(object,
494 			OFF_TO_IDX(uio->uio_offset),
495 			OFF_TO_IDX(uio->uio_offset + uio->uio_resid + PAGE_MASK));
496 	}
497 
498 	for (error = 0; uio->uio_resid > 0;) {
499 		lbn = lblkno(fs, uio->uio_offset);
500 		blkoffset = blkoff(fs, uio->uio_offset);
501 		xfersize = fs->fs_bsize - blkoffset;
502 		if (uio->uio_resid < xfersize)
503 			xfersize = uio->uio_resid;
504 
505 		if (uio->uio_offset + xfersize > ip->i_size)
506 			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
507 
508 		/*
509 		 * We must perform a read-before-write if the transfer
510 		 * size does not cover the entire buffer.
511 		 */
512 		if (fs->fs_bsize > xfersize)
513 			flags |= B_CLRBUF;
514 		else
515 			flags &= ~B_CLRBUF;
516 /* XXX is uio->uio_offset the right thing here? */
517 		error = VOP_BALLOC(vp, uio->uio_offset, xfersize,
518 		    ap->a_cred, flags, &bp);
519 		if (error != 0)
520 			break;
521 		/*
522 		 * If the buffer is not valid and we did not clear garbage
523 		 * out above, we have to do so here even though the write
524 		 * covers the entire buffer in order to avoid a mmap()/write
525 		 * race where another process may see the garbage prior to
526 		 * the uiomove() for a write replacing it.
527 		 */
528 		if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
529 			vfs_bio_clrbuf(bp);
530 		if (ioflag & IO_DIRECT)
531 			bp->b_flags |= B_DIRECT;
532 		if (ioflag & IO_NOWDRAIN)
533 			bp->b_flags |= B_NOWDRAIN;
534 
535 		if (uio->uio_offset + xfersize > ip->i_size) {
536 			ip->i_size = uio->uio_offset + xfersize;
537 			extended = 1;
538 		}
539 
540 		size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
541 		if (size < xfersize)
542 			xfersize = size;
543 
544 		error =
545 		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
546 		if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
547 		    (LIST_FIRST(&bp->b_dep) == NULL)) {
548 			bp->b_flags |= B_RELBUF;
549 		}
550 
551 		/*
552 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
553 		 * if we have a severe page deficiency write the buffer
554 		 * asynchronously.  Otherwise try to cluster, and if that
555 		 * doesn't do it then either do an async write (if O_DIRECT),
556 		 * or a delayed write (if not).
557 		 */
558 
559 		if (ioflag & IO_SYNC) {
560 			(void)bwrite(bp);
561 		} else if (vm_page_count_severe() ||
562 			    buf_dirty_count_severe() ||
563 			    (ioflag & IO_ASYNC)) {
564 			bp->b_flags |= B_CLUSTEROK;
565 			bawrite(bp);
566 		} else if (xfersize + blkoffset == fs->fs_bsize) {
567 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
568 				bp->b_flags |= B_CLUSTEROK;
569 				cluster_write(bp, ip->i_size, seqcount);
570 			} else {
571 				bawrite(bp);
572 			}
573 		} else if (ioflag & IO_DIRECT) {
574 			bp->b_flags |= B_CLUSTEROK;
575 			bawrite(bp);
576 		} else {
577 			bp->b_flags |= B_CLUSTEROK;
578 			bdwrite(bp);
579 		}
580 		if (error || xfersize == 0)
581 			break;
582 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
583 	}
584 	/*
585 	 * If we successfully wrote any data, and we are not the superuser
586 	 * we clear the setuid and setgid bits as a precaution against
587 	 * tampering.
588 	 */
589 	if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
590 		ip->i_mode &= ~(ISUID | ISGID);
591 	if (resid > uio->uio_resid)
592 		VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
593 	if (error) {
594 		if (ioflag & IO_UNIT) {
595 			(void)UFS_TRUNCATE(vp, osize,
596 			    ioflag & IO_SYNC, ap->a_cred, uio->uio_td);
597 			uio->uio_offset -= resid - uio->uio_resid;
598 			uio->uio_resid = resid;
599 		}
600 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
601 		error = UFS_UPDATE(vp, 1);
602 
603 	if (object)
604 		vm_object_vndeallocate(object);
605 
606 	return (error);
607 }
608 
609 
610 /*
611  * get page routine
612  */
613 int
614 ffs_getpages(ap)
615 	struct vop_getpages_args *ap;
616 {
617 	off_t foff, physoffset;
618 	int i, size, bsize;
619 	struct vnode *dp, *vp;
620 	vm_object_t obj;
621 	vm_pindex_t pindex, firstindex;
622 	vm_page_t mreq;
623 	int bbackwards, bforwards;
624 	int pbackwards, pforwards;
625 	int firstpage;
626 	int reqlblkno;
627 	daddr_t reqblkno;
628 	int poff;
629 	int pcount;
630 	int rtval;
631 	int pagesperblock;
632 
633 
634 	pcount = round_page(ap->a_count) / PAGE_SIZE;
635 	mreq = ap->a_m[ap->a_reqpage];
636 	firstindex = ap->a_m[0]->pindex;
637 
638 	/*
639 	 * if ANY DEV_BSIZE blocks are valid on a large filesystem block,
640 	 * then the entire page is valid.  Since the page may be mapped,
641 	 * user programs might reference data beyond the actual end of file
642 	 * occuring within the page.  We have to zero that data.
643 	 */
644 	if (mreq->valid) {
645 		if (mreq->valid != VM_PAGE_BITS_ALL)
646 			vm_page_zero_invalid(mreq, TRUE);
647 		for (i = 0; i < pcount; i++) {
648 			if (i != ap->a_reqpage) {
649 				vm_page_free(ap->a_m[i]);
650 			}
651 		}
652 		return VM_PAGER_OK;
653 	}
654 
655 	vp = ap->a_vp;
656 	obj = vp->v_object;
657 	bsize = vp->v_mount->mnt_stat.f_iosize;
658 	pindex = mreq->pindex;
659 	foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero */;
660 
661 	if (bsize < PAGE_SIZE)
662 		return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
663 						    ap->a_count,
664 						    ap->a_reqpage);
665 
666 	/*
667 	 * foff is the file offset of the required page
668 	 * reqlblkno is the logical block that contains the page
669 	 * poff is the index of the page into the logical block
670 	 */
671 	reqlblkno = foff / bsize;
672 	poff = (foff % bsize) / PAGE_SIZE;
673 
674 	if ( VOP_BMAP( vp, reqlblkno, &dp, &reqblkno,
675 		&bforwards, &bbackwards) || (reqblkno == -1)) {
676 		for(i = 0; i < pcount; i++) {
677 			if (i != ap->a_reqpage)
678 				vm_page_free(ap->a_m[i]);
679 		}
680 		if (reqblkno == -1) {
681 			if ((mreq->flags & PG_ZERO) == 0)
682 				vm_page_zero_fill(mreq);
683 			vm_page_undirty(mreq);
684 			mreq->valid = VM_PAGE_BITS_ALL;
685 			return VM_PAGER_OK;
686 		} else {
687 			return VM_PAGER_ERROR;
688 		}
689 	}
690 
691 	physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE;
692 	pagesperblock = bsize / PAGE_SIZE;
693 	/*
694 	 * find the first page that is contiguous...
695 	 * note that pbackwards is the number of pages that are contiguous
696 	 * backwards.
697 	 */
698 	firstpage = 0;
699 	if (ap->a_count) {
700 		pbackwards = poff + bbackwards * pagesperblock;
701 		if (ap->a_reqpage > pbackwards) {
702 			firstpage = ap->a_reqpage - pbackwards;
703 			for(i=0;i<firstpage;i++)
704 				vm_page_free(ap->a_m[i]);
705 		}
706 
707 	/*
708 	 * pforwards is the number of pages that are contiguous
709 	 * after the current page.
710 	 */
711 		pforwards = (pagesperblock - (poff + 1)) +
712 			bforwards * pagesperblock;
713 		if (pforwards < (pcount - (ap->a_reqpage + 1))) {
714 			for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++)
715 				vm_page_free(ap->a_m[i]);
716 			pcount = ap->a_reqpage + pforwards + 1;
717 		}
718 
719 	/*
720 	 * number of pages for I/O corrected for the non-contig pages at
721 	 * the beginning of the array.
722 	 */
723 		pcount -= firstpage;
724 	}
725 
726 	/*
727 	 * calculate the size of the transfer
728 	 */
729 
730 	size = pcount * PAGE_SIZE;
731 
732 	if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) >
733 		obj->un_pager.vnp.vnp_size)
734 		size = obj->un_pager.vnp.vnp_size -
735 			IDX_TO_OFF(ap->a_m[firstpage]->pindex);
736 
737 	physoffset -= foff;
738 	rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size,
739 		(ap->a_reqpage - firstpage), physoffset);
740 
741 	return (rtval);
742 }
743 
744 /*
745  * put page routine
746  *
747  * XXX By default, wimp out... note that a_offset is ignored (and always
748  * XXX has been).
749  */
750 int
751 ffs_putpages(ap)
752 	struct vop_putpages_args *ap;
753 {
754 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
755 		ap->a_sync, ap->a_rtvals);
756 }
757