xref: /freebsd/sys/ufs/ffs/ffs_vnops.c (revision a3557ef0)
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
3  *
4  * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	from: @(#)ufs_readwrite.c	8.11 (Berkeley) 5/8/95
62  * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
63  *	@(#)ffs_vnops.c	8.15 (Berkeley) 5/14/95
64  */
65 
66 #include <sys/cdefs.h>
67 __FBSDID("$FreeBSD$");
68 
69 #include <sys/param.h>
70 #include <sys/bio.h>
71 #include <sys/systm.h>
72 #include <sys/buf.h>
73 #include <sys/conf.h>
74 #include <sys/extattr.h>
75 #include <sys/kernel.h>
76 #include <sys/limits.h>
77 #include <sys/malloc.h>
78 #include <sys/mount.h>
79 #include <sys/priv.h>
80 #include <sys/rwlock.h>
81 #include <sys/stat.h>
82 #include <sys/sysctl.h>
83 #include <sys/vmmeter.h>
84 #include <sys/vnode.h>
85 
86 #include <vm/vm.h>
87 #include <vm/vm_param.h>
88 #include <vm/vm_extern.h>
89 #include <vm/vm_object.h>
90 #include <vm/vm_page.h>
91 #include <vm/vm_pager.h>
92 #include <vm/vnode_pager.h>
93 
94 #include <ufs/ufs/extattr.h>
95 #include <ufs/ufs/quota.h>
96 #include <ufs/ufs/inode.h>
97 #include <ufs/ufs/ufs_extern.h>
98 #include <ufs/ufs/ufsmount.h>
99 
100 #include <ufs/ffs/fs.h>
101 #include <ufs/ffs/ffs_extern.h>
102 #include "opt_directio.h"
103 #include "opt_ffs.h"
104 
105 #define	ALIGNED_TO(ptr, s)	\
106 	(((uintptr_t)(ptr) & (_Alignof(s) - 1)) == 0)
107 
108 #ifdef DIRECTIO
109 extern int	ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
110 #endif
111 static vop_fdatasync_t	ffs_fdatasync;
112 static vop_fsync_t	ffs_fsync;
113 static vop_getpages_t	ffs_getpages;
114 static vop_getpages_async_t	ffs_getpages_async;
115 static vop_lock1_t	ffs_lock;
116 #ifdef INVARIANTS
117 static vop_unlock_t	ffs_unlock_debug;
118 #endif
119 static vop_read_t	ffs_read;
120 static vop_write_t	ffs_write;
121 static int	ffs_extread(struct vnode *vp, struct uio *uio, int ioflag);
122 static int	ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag,
123 		    struct ucred *cred);
124 static vop_strategy_t	ffsext_strategy;
125 static vop_closeextattr_t	ffs_closeextattr;
126 static vop_deleteextattr_t	ffs_deleteextattr;
127 static vop_getextattr_t	ffs_getextattr;
128 static vop_listextattr_t	ffs_listextattr;
129 static vop_openextattr_t	ffs_openextattr;
130 static vop_setextattr_t	ffs_setextattr;
131 static vop_vptofh_t	ffs_vptofh;
132 
133 /* Global vfs data structures for ufs. */
134 struct vop_vector ffs_vnodeops1 = {
135 	.vop_default =		&ufs_vnodeops,
136 	.vop_fsync =		ffs_fsync,
137 	.vop_fdatasync =	ffs_fdatasync,
138 	.vop_getpages =		ffs_getpages,
139 	.vop_getpages_async =	ffs_getpages_async,
140 	.vop_lock1 =		ffs_lock,
141 #ifdef INVARIANTS
142 	.vop_unlock =		ffs_unlock_debug,
143 #endif
144 	.vop_read =		ffs_read,
145 	.vop_reallocblks =	ffs_reallocblks,
146 	.vop_write =		ffs_write,
147 	.vop_vptofh =		ffs_vptofh,
148 };
149 VFS_VOP_VECTOR_REGISTER(ffs_vnodeops1);
150 
151 struct vop_vector ffs_fifoops1 = {
152 	.vop_default =		&ufs_fifoops,
153 	.vop_fsync =		ffs_fsync,
154 	.vop_fdatasync =	ffs_fdatasync,
155 	.vop_lock1 =		ffs_lock,
156 #ifdef INVARIANTS
157 	.vop_unlock =		ffs_unlock_debug,
158 #endif
159 	.vop_vptofh =		ffs_vptofh,
160 };
161 VFS_VOP_VECTOR_REGISTER(ffs_fifoops1);
162 
163 /* Global vfs data structures for ufs. */
164 struct vop_vector ffs_vnodeops2 = {
165 	.vop_default =		&ufs_vnodeops,
166 	.vop_fsync =		ffs_fsync,
167 	.vop_fdatasync =	ffs_fdatasync,
168 	.vop_getpages =		ffs_getpages,
169 	.vop_getpages_async =	ffs_getpages_async,
170 	.vop_lock1 =		ffs_lock,
171 #ifdef INVARIANTS
172 	.vop_unlock =		ffs_unlock_debug,
173 #endif
174 	.vop_read =		ffs_read,
175 	.vop_reallocblks =	ffs_reallocblks,
176 	.vop_write =		ffs_write,
177 	.vop_closeextattr =	ffs_closeextattr,
178 	.vop_deleteextattr =	ffs_deleteextattr,
179 	.vop_getextattr =	ffs_getextattr,
180 	.vop_listextattr =	ffs_listextattr,
181 	.vop_openextattr =	ffs_openextattr,
182 	.vop_setextattr =	ffs_setextattr,
183 	.vop_vptofh =		ffs_vptofh,
184 };
185 VFS_VOP_VECTOR_REGISTER(ffs_vnodeops2);
186 
187 struct vop_vector ffs_fifoops2 = {
188 	.vop_default =		&ufs_fifoops,
189 	.vop_fsync =		ffs_fsync,
190 	.vop_fdatasync =	ffs_fdatasync,
191 	.vop_lock1 =		ffs_lock,
192 #ifdef INVARIANTS
193 	.vop_unlock =		ffs_unlock_debug,
194 #endif
195 	.vop_reallocblks =	ffs_reallocblks,
196 	.vop_strategy =		ffsext_strategy,
197 	.vop_closeextattr =	ffs_closeextattr,
198 	.vop_deleteextattr =	ffs_deleteextattr,
199 	.vop_getextattr =	ffs_getextattr,
200 	.vop_listextattr =	ffs_listextattr,
201 	.vop_openextattr =	ffs_openextattr,
202 	.vop_setextattr =	ffs_setextattr,
203 	.vop_vptofh =		ffs_vptofh,
204 };
205 VFS_VOP_VECTOR_REGISTER(ffs_fifoops2);
206 
207 /*
208  * Synch an open file.
209  */
210 /* ARGSUSED */
211 static int
212 ffs_fsync(struct vop_fsync_args *ap)
213 {
214 	struct vnode *vp;
215 	struct bufobj *bo;
216 	int error;
217 
218 	vp = ap->a_vp;
219 	bo = &vp->v_bufobj;
220 retry:
221 	error = ffs_syncvnode(vp, ap->a_waitfor, 0);
222 	if (error)
223 		return (error);
224 	if (ap->a_waitfor == MNT_WAIT && DOINGSOFTDEP(vp)) {
225 		error = softdep_fsync(vp);
226 		if (error)
227 			return (error);
228 
229 		/*
230 		 * The softdep_fsync() function may drop vp lock,
231 		 * allowing for dirty buffers to reappear on the
232 		 * bo_dirty list. Recheck and resync as needed.
233 		 */
234 		BO_LOCK(bo);
235 		if ((vp->v_type == VREG || vp->v_type == VDIR) &&
236 		    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
237 			BO_UNLOCK(bo);
238 			goto retry;
239 		}
240 		BO_UNLOCK(bo);
241 	}
242 	if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), 0))
243 		return (ENXIO);
244 	return (0);
245 }
246 
247 int
248 ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
249 {
250 	struct inode *ip;
251 	struct bufobj *bo;
252 	struct ufsmount *ump;
253 	struct buf *bp, *nbp;
254 	ufs_lbn_t lbn;
255 	int error, passes;
256 	bool still_dirty, wait;
257 
258 	ip = VTOI(vp);
259 	ip->i_flag &= ~IN_NEEDSYNC;
260 	bo = &vp->v_bufobj;
261 	ump = VFSTOUFS(vp->v_mount);
262 
263 	/*
264 	 * When doing MNT_WAIT we must first flush all dependencies
265 	 * on the inode.
266 	 */
267 	if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
268 	    (error = softdep_sync_metadata(vp)) != 0) {
269 		if (ffs_fsfail_cleanup(ump, error))
270 			error = 0;
271 		return (error);
272 	}
273 
274 	/*
275 	 * Flush all dirty buffers associated with a vnode.
276 	 */
277 	error = 0;
278 	passes = 0;
279 	wait = false;	/* Always do an async pass first. */
280 	lbn = lblkno(ITOFS(ip), (ip->i_size + ITOFS(ip)->fs_bsize - 1));
281 	BO_LOCK(bo);
282 loop:
283 	TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
284 		bp->b_vflags &= ~BV_SCANNED;
285 	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
286 		/*
287 		 * Reasons to skip this buffer: it has already been considered
288 		 * on this pass, the buffer has dependencies that will cause
289 		 * it to be redirtied and it has not already been deferred,
290 		 * or it is already being written.
291 		 */
292 		if ((bp->b_vflags & BV_SCANNED) != 0)
293 			continue;
294 		bp->b_vflags |= BV_SCANNED;
295 		/*
296 		 * Flush indirects in order, if requested.
297 		 *
298 		 * Note that if only datasync is requested, we can
299 		 * skip indirect blocks when softupdates are not
300 		 * active.  Otherwise we must flush them with data,
301 		 * since dependencies prevent data block writes.
302 		 */
303 		if (waitfor == MNT_WAIT && bp->b_lblkno <= -UFS_NDADDR &&
304 		    (lbn_level(bp->b_lblkno) >= passes ||
305 		    ((flags & DATA_ONLY) != 0 && !DOINGSOFTDEP(vp))))
306 			continue;
307 		if (bp->b_lblkno > lbn)
308 			panic("ffs_syncvnode: syncing truncated data.");
309 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) {
310 			BO_UNLOCK(bo);
311 		} else if (wait) {
312 			if (BUF_LOCK(bp,
313 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
314 			    BO_LOCKPTR(bo)) != 0) {
315 				bp->b_vflags &= ~BV_SCANNED;
316 				goto next;
317 			}
318 		} else
319 			continue;
320 		if ((bp->b_flags & B_DELWRI) == 0)
321 			panic("ffs_fsync: not dirty");
322 		/*
323 		 * Check for dependencies and potentially complete them.
324 		 */
325 		if (!LIST_EMPTY(&bp->b_dep) &&
326 		    (error = softdep_sync_buf(vp, bp,
327 		    wait ? MNT_WAIT : MNT_NOWAIT)) != 0) {
328 			/* I/O error. */
329 			if (error != EBUSY) {
330 				BUF_UNLOCK(bp);
331 				return (error);
332 			}
333 			/* If we deferred once, don't defer again. */
334 		    	if ((bp->b_flags & B_DEFERRED) == 0) {
335 				bp->b_flags |= B_DEFERRED;
336 				BUF_UNLOCK(bp);
337 				goto next;
338 			}
339 		}
340 		if (wait) {
341 			bremfree(bp);
342 			error = bwrite(bp);
343 			if (ffs_fsfail_cleanup(ump, error))
344 				error = 0;
345 			if (error != 0)
346 				return (error);
347 		} else if ((bp->b_flags & B_CLUSTEROK)) {
348 			(void) vfs_bio_awrite(bp);
349 		} else {
350 			bremfree(bp);
351 			(void) bawrite(bp);
352 		}
353 next:
354 		/*
355 		 * Since we may have slept during the I/O, we need
356 		 * to start from a known point.
357 		 */
358 		BO_LOCK(bo);
359 		nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd);
360 	}
361 	if (waitfor != MNT_WAIT) {
362 		BO_UNLOCK(bo);
363 		if ((flags & NO_INO_UPDT) != 0)
364 			return (0);
365 		else
366 			return (ffs_update(vp, 0));
367 	}
368 	/* Drain IO to see if we're done. */
369 	bufobj_wwait(bo, 0, 0);
370 	/*
371 	 * Block devices associated with filesystems may have new I/O
372 	 * requests posted for them even if the vnode is locked, so no
373 	 * amount of trying will get them clean.  We make several passes
374 	 * as a best effort.
375 	 *
376 	 * Regular files may need multiple passes to flush all dependency
377 	 * work as it is possible that we must write once per indirect
378 	 * level, once for the leaf, and once for the inode and each of
379 	 * these will be done with one sync and one async pass.
380 	 */
381 	if (bo->bo_dirty.bv_cnt > 0) {
382 		if ((flags & DATA_ONLY) == 0) {
383 			still_dirty = true;
384 		} else {
385 			/*
386 			 * For data-only sync, dirty indirect buffers
387 			 * are ignored.
388 			 */
389 			still_dirty = false;
390 			TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
391 				if (bp->b_lblkno > -UFS_NDADDR) {
392 					still_dirty = true;
393 					break;
394 				}
395 			}
396 		}
397 
398 		if (still_dirty) {
399 			/* Write the inode after sync passes to flush deps. */
400 			if (wait && DOINGSOFTDEP(vp) &&
401 			    (flags & NO_INO_UPDT) == 0) {
402 				BO_UNLOCK(bo);
403 				ffs_update(vp, 1);
404 				BO_LOCK(bo);
405 			}
406 			/* switch between sync/async. */
407 			wait = !wait;
408 			if (wait || ++passes < UFS_NIADDR + 2)
409 				goto loop;
410 		}
411 	}
412 	BO_UNLOCK(bo);
413 	error = 0;
414 	if ((flags & DATA_ONLY) == 0) {
415 		if ((flags & NO_INO_UPDT) == 0)
416 			error = ffs_update(vp, 1);
417 		if (DOINGSUJ(vp))
418 			softdep_journal_fsync(VTOI(vp));
419 	} else if ((ip->i_flags & (IN_SIZEMOD | IN_IBLKDATA)) != 0) {
420 		error = ffs_update(vp, 1);
421 	}
422 	return (error);
423 }
424 
425 static int
426 ffs_fdatasync(struct vop_fdatasync_args *ap)
427 {
428 
429 	return (ffs_syncvnode(ap->a_vp, MNT_WAIT, DATA_ONLY));
430 }
431 
432 static int
433 ffs_lock(ap)
434 	struct vop_lock1_args /* {
435 		struct vnode *a_vp;
436 		int a_flags;
437 		struct thread *a_td;
438 		char *file;
439 		int line;
440 	} */ *ap;
441 {
442 #ifndef NO_FFS_SNAPSHOT
443 	struct vnode *vp;
444 	int flags;
445 	struct lock *lkp;
446 	int result;
447 
448 	switch (ap->a_flags & LK_TYPE_MASK) {
449 	case LK_SHARED:
450 	case LK_UPGRADE:
451 	case LK_EXCLUSIVE:
452 		vp = ap->a_vp;
453 		flags = ap->a_flags;
454 		for (;;) {
455 #ifdef DEBUG_VFS_LOCKS
456 			VNPASS(vp->v_holdcnt != 0, vp);
457 #endif
458 			lkp = vp->v_vnlock;
459 			result = lockmgr_lock_flags(lkp, flags,
460 			    &VI_MTX(vp)->lock_object, ap->a_file, ap->a_line);
461 			if (lkp == vp->v_vnlock || result != 0)
462 				break;
463 			/*
464 			 * Apparent success, except that the vnode
465 			 * mutated between snapshot file vnode and
466 			 * regular file vnode while this process
467 			 * slept.  The lock currently held is not the
468 			 * right lock.  Release it, and try to get the
469 			 * new lock.
470 			 */
471 			lockmgr_unlock(lkp);
472 			if ((flags & (LK_INTERLOCK | LK_NOWAIT)) ==
473 			    (LK_INTERLOCK | LK_NOWAIT))
474 				return (EBUSY);
475 			if ((flags & LK_TYPE_MASK) == LK_UPGRADE)
476 				flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE;
477 			flags &= ~LK_INTERLOCK;
478 		}
479 		break;
480 	default:
481 		result = VOP_LOCK1_APV(&ufs_vnodeops, ap);
482 	}
483 	return (result);
484 #else
485 	return (VOP_LOCK1_APV(&ufs_vnodeops, ap));
486 #endif
487 }
488 
489 #ifdef INVARIANTS
490 static int
491 ffs_unlock_debug(struct vop_unlock_args *ap)
492 {
493 	struct vnode *vp = ap->a_vp;
494 	struct inode *ip = VTOI(vp);
495 
496 	if (ip->i_flag & UFS_INODE_FLAG_LAZY_MASK_ASSERTABLE) {
497 		if ((vp->v_mflag & VMP_LAZYLIST) == 0) {
498 			VI_LOCK(vp);
499 			VNASSERT((vp->v_mflag & VMP_LAZYLIST), vp,
500 			    ("%s: modified vnode (%x) not on lazy list",
501 			    __func__, ip->i_flag));
502 			VI_UNLOCK(vp);
503 		}
504 	}
505 	return (VOP_UNLOCK_APV(&ufs_vnodeops, ap));
506 }
507 #endif
508 
509 static int
510 ffs_read_hole(struct uio *uio, long xfersize, long *size)
511 {
512 	ssize_t saved_resid, tlen;
513 	int error;
514 
515 	while (xfersize > 0) {
516 		tlen = min(xfersize, ZERO_REGION_SIZE);
517 		saved_resid = uio->uio_resid;
518 		error = vn_io_fault_uiomove(__DECONST(void *, zero_region),
519 		    tlen, uio);
520 		if (error != 0)
521 			return (error);
522 		tlen = saved_resid - uio->uio_resid;
523 		xfersize -= tlen;
524 		*size -= tlen;
525 	}
526 	return (0);
527 }
528 
529 /*
530  * Vnode op for reading.
531  */
532 static int
533 ffs_read(ap)
534 	struct vop_read_args /* {
535 		struct vnode *a_vp;
536 		struct uio *a_uio;
537 		int a_ioflag;
538 		struct ucred *a_cred;
539 	} */ *ap;
540 {
541 	struct vnode *vp;
542 	struct inode *ip;
543 	struct uio *uio;
544 	struct fs *fs;
545 	struct buf *bp;
546 	ufs_lbn_t lbn, nextlbn;
547 	off_t bytesinfile;
548 	long size, xfersize, blkoffset;
549 	ssize_t orig_resid;
550 	int bflag, error, ioflag, seqcount;
551 
552 	vp = ap->a_vp;
553 	uio = ap->a_uio;
554 	ioflag = ap->a_ioflag;
555 	if (ap->a_ioflag & IO_EXT)
556 #ifdef notyet
557 		return (ffs_extread(vp, uio, ioflag));
558 #else
559 		panic("ffs_read+IO_EXT");
560 #endif
561 #ifdef DIRECTIO
562 	if ((ioflag & IO_DIRECT) != 0) {
563 		int workdone;
564 
565 		error = ffs_rawread(vp, uio, &workdone);
566 		if (error != 0 || workdone != 0)
567 			return error;
568 	}
569 #endif
570 
571 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
572 	ip = VTOI(vp);
573 
574 #ifdef INVARIANTS
575 	if (uio->uio_rw != UIO_READ)
576 		panic("ffs_read: mode");
577 
578 	if (vp->v_type == VLNK) {
579 		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
580 			panic("ffs_read: short symlink");
581 	} else if (vp->v_type != VREG && vp->v_type != VDIR)
582 		panic("ffs_read: type %d",  vp->v_type);
583 #endif
584 	orig_resid = uio->uio_resid;
585 	KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0"));
586 	if (orig_resid == 0)
587 		return (0);
588 	KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0"));
589 	fs = ITOFS(ip);
590 	if (uio->uio_offset < ip->i_size &&
591 	    uio->uio_offset >= fs->fs_maxfilesize)
592 		return (EOVERFLOW);
593 
594 	bflag = GB_UNMAPPED | (uio->uio_segflg == UIO_NOCOPY ? 0 : GB_NOSPARSE);
595 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
596 		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
597 			break;
598 		lbn = lblkno(fs, uio->uio_offset);
599 		nextlbn = lbn + 1;
600 
601 		/*
602 		 * size of buffer.  The buffer representing the
603 		 * end of the file is rounded up to the size of
604 		 * the block type ( fragment or full block,
605 		 * depending ).
606 		 */
607 		size = blksize(fs, ip, lbn);
608 		blkoffset = blkoff(fs, uio->uio_offset);
609 
610 		/*
611 		 * The amount we want to transfer in this iteration is
612 		 * one FS block less the amount of the data before
613 		 * our startpoint (duh!)
614 		 */
615 		xfersize = fs->fs_bsize - blkoffset;
616 
617 		/*
618 		 * But if we actually want less than the block,
619 		 * or the file doesn't have a whole block more of data,
620 		 * then use the lesser number.
621 		 */
622 		if (uio->uio_resid < xfersize)
623 			xfersize = uio->uio_resid;
624 		if (bytesinfile < xfersize)
625 			xfersize = bytesinfile;
626 
627 		if (lblktosize(fs, nextlbn) >= ip->i_size) {
628 			/*
629 			 * Don't do readahead if this is the end of the file.
630 			 */
631 			error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
632 		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
633 			/*
634 			 * Otherwise if we are allowed to cluster,
635 			 * grab as much as we can.
636 			 *
637 			 * XXX  This may not be a win if we are not
638 			 * doing sequential access.
639 			 */
640 			error = cluster_read(vp, ip->i_size, lbn,
641 			    size, NOCRED, blkoffset + uio->uio_resid,
642 			    seqcount, bflag, &bp);
643 		} else if (seqcount > 1) {
644 			/*
645 			 * If we are NOT allowed to cluster, then
646 			 * if we appear to be acting sequentially,
647 			 * fire off a request for a readahead
648 			 * as well as a read. Note that the 4th and 5th
649 			 * arguments point to arrays of the size specified in
650 			 * the 6th argument.
651 			 */
652 			u_int nextsize = blksize(fs, ip, nextlbn);
653 			error = breadn_flags(vp, lbn, lbn, size, &nextlbn,
654 			    &nextsize, 1, NOCRED, bflag, NULL, &bp);
655 		} else {
656 			/*
657 			 * Failing all of the above, just read what the
658 			 * user asked for. Interestingly, the same as
659 			 * the first option above.
660 			 */
661 			error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
662 		}
663 		if (error == EJUSTRETURN) {
664 			error = ffs_read_hole(uio, xfersize, &size);
665 			if (error == 0)
666 				continue;
667 		}
668 		if (error != 0) {
669 			brelse(bp);
670 			bp = NULL;
671 			break;
672 		}
673 
674 		/*
675 		 * We should only get non-zero b_resid when an I/O error
676 		 * has occurred, which should cause us to break above.
677 		 * However, if the short read did not cause an error,
678 		 * then we want to ensure that we do not uiomove bad
679 		 * or uninitialized data.
680 		 */
681 		size -= bp->b_resid;
682 		if (size < xfersize) {
683 			if (size == 0)
684 				break;
685 			xfersize = size;
686 		}
687 
688 		if (buf_mapped(bp)) {
689 			error = vn_io_fault_uiomove((char *)bp->b_data +
690 			    blkoffset, (int)xfersize, uio);
691 		} else {
692 			error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
693 			    (int)xfersize, uio);
694 		}
695 		if (error)
696 			break;
697 
698 		vfs_bio_brelse(bp, ioflag);
699 	}
700 
701 	/*
702 	 * This can only happen in the case of an error
703 	 * because the loop above resets bp to NULL on each iteration
704 	 * and on normal completion has not set a new value into it.
705 	 * so it must have come from a 'break' statement
706 	 */
707 	if (bp != NULL)
708 		vfs_bio_brelse(bp, ioflag);
709 
710 	if ((error == 0 || uio->uio_resid != orig_resid) &&
711 	    (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
712 		UFS_INODE_SET_FLAG_SHARED(ip, IN_ACCESS);
713 	return (error);
714 }
715 
716 /*
717  * Vnode op for writing.
718  */
719 static int
720 ffs_write(ap)
721 	struct vop_write_args /* {
722 		struct vnode *a_vp;
723 		struct uio *a_uio;
724 		int a_ioflag;
725 		struct ucred *a_cred;
726 	} */ *ap;
727 {
728 	struct vnode *vp;
729 	struct uio *uio;
730 	struct inode *ip;
731 	struct fs *fs;
732 	struct buf *bp;
733 	ufs_lbn_t lbn;
734 	off_t osize;
735 	ssize_t resid;
736 	int seqcount;
737 	int blkoffset, error, flags, ioflag, size, xfersize;
738 
739 	vp = ap->a_vp;
740 	uio = ap->a_uio;
741 	ioflag = ap->a_ioflag;
742 	if (ap->a_ioflag & IO_EXT)
743 #ifdef notyet
744 		return (ffs_extwrite(vp, uio, ioflag, ap->a_cred));
745 #else
746 		panic("ffs_write+IO_EXT");
747 #endif
748 
749 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
750 	ip = VTOI(vp);
751 
752 #ifdef INVARIANTS
753 	if (uio->uio_rw != UIO_WRITE)
754 		panic("ffs_write: mode");
755 #endif
756 
757 	switch (vp->v_type) {
758 	case VREG:
759 		if (ioflag & IO_APPEND)
760 			uio->uio_offset = ip->i_size;
761 		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
762 			return (EPERM);
763 		/* FALLTHROUGH */
764 	case VLNK:
765 		break;
766 	case VDIR:
767 		panic("ffs_write: dir write");
768 		break;
769 	default:
770 		panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type,
771 			(int)uio->uio_offset,
772 			(int)uio->uio_resid
773 		);
774 	}
775 
776 	KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0"));
777 	KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0"));
778 	fs = ITOFS(ip);
779 	if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
780 		return (EFBIG);
781 	/*
782 	 * Maybe this should be above the vnode op call, but so long as
783 	 * file servers have no limits, I don't think it matters.
784 	 */
785 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
786 		return (EFBIG);
787 
788 	resid = uio->uio_resid;
789 	osize = ip->i_size;
790 	if (seqcount > BA_SEQMAX)
791 		flags = BA_SEQMAX << BA_SEQSHIFT;
792 	else
793 		flags = seqcount << BA_SEQSHIFT;
794 	if (ioflag & IO_SYNC)
795 		flags |= IO_SYNC;
796 	flags |= BA_UNMAPPED;
797 
798 	for (error = 0; uio->uio_resid > 0;) {
799 		lbn = lblkno(fs, uio->uio_offset);
800 		blkoffset = blkoff(fs, uio->uio_offset);
801 		xfersize = fs->fs_bsize - blkoffset;
802 		if (uio->uio_resid < xfersize)
803 			xfersize = uio->uio_resid;
804 		if (uio->uio_offset + xfersize > ip->i_size)
805 			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
806 
807 		/*
808 		 * We must perform a read-before-write if the transfer size
809 		 * does not cover the entire buffer.
810 		 */
811 		if (fs->fs_bsize > xfersize)
812 			flags |= BA_CLRBUF;
813 		else
814 			flags &= ~BA_CLRBUF;
815 /* XXX is uio->uio_offset the right thing here? */
816 		error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
817 		    ap->a_cred, flags, &bp);
818 		if (error != 0) {
819 			vnode_pager_setsize(vp, ip->i_size);
820 			break;
821 		}
822 		if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
823 			bp->b_flags |= B_NOCACHE;
824 
825 		if (uio->uio_offset + xfersize > ip->i_size) {
826 			ip->i_size = uio->uio_offset + xfersize;
827 			DIP_SET(ip, i_size, ip->i_size);
828 			UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
829 		}
830 
831 		size = blksize(fs, ip, lbn) - bp->b_resid;
832 		if (size < xfersize)
833 			xfersize = size;
834 
835 		if (buf_mapped(bp)) {
836 			error = vn_io_fault_uiomove((char *)bp->b_data +
837 			    blkoffset, (int)xfersize, uio);
838 		} else {
839 			error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
840 			    (int)xfersize, uio);
841 		}
842 		/*
843 		 * If the buffer is not already filled and we encounter an
844 		 * error while trying to fill it, we have to clear out any
845 		 * garbage data from the pages instantiated for the buffer.
846 		 * If we do not, a failed uiomove() during a write can leave
847 		 * the prior contents of the pages exposed to a userland mmap.
848 		 *
849 		 * Note that we need only clear buffers with a transfer size
850 		 * equal to the block size because buffers with a shorter
851 		 * transfer size were cleared above by the call to UFS_BALLOC()
852 		 * with the BA_CLRBUF flag set.
853 		 *
854 		 * If the source region for uiomove identically mmaps the
855 		 * buffer, uiomove() performed the NOP copy, and the buffer
856 		 * content remains valid because the page fault handler
857 		 * validated the pages.
858 		 */
859 		if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
860 		    fs->fs_bsize == xfersize)
861 			vfs_bio_clrbuf(bp);
862 
863 		vfs_bio_set_flags(bp, ioflag);
864 
865 		/*
866 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
867 		 * if we have a severe page deficiency write the buffer
868 		 * asynchronously.  Otherwise try to cluster, and if that
869 		 * doesn't do it then either do an async write (if O_DIRECT),
870 		 * or a delayed write (if not).
871 		 */
872 		if (ioflag & IO_SYNC) {
873 			(void)bwrite(bp);
874 		} else if (vm_page_count_severe() ||
875 			    buf_dirty_count_severe() ||
876 			    (ioflag & IO_ASYNC)) {
877 			bp->b_flags |= B_CLUSTEROK;
878 			bawrite(bp);
879 		} else if (xfersize + blkoffset == fs->fs_bsize) {
880 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
881 				bp->b_flags |= B_CLUSTEROK;
882 				cluster_write(vp, bp, ip->i_size, seqcount,
883 				    GB_UNMAPPED);
884 			} else {
885 				bawrite(bp);
886 			}
887 		} else if (ioflag & IO_DIRECT) {
888 			bp->b_flags |= B_CLUSTEROK;
889 			bawrite(bp);
890 		} else {
891 			bp->b_flags |= B_CLUSTEROK;
892 			bdwrite(bp);
893 		}
894 		if (error || xfersize == 0)
895 			break;
896 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
897 	}
898 	/*
899 	 * If we successfully wrote any data, and we are not the superuser
900 	 * we clear the setuid and setgid bits as a precaution against
901 	 * tampering.
902 	 */
903 	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
904 	    ap->a_cred) {
905 		if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID)) {
906 			ip->i_mode &= ~(ISUID | ISGID);
907 			DIP_SET(ip, i_mode, ip->i_mode);
908 		}
909 	}
910 	if (error) {
911 		if (ioflag & IO_UNIT) {
912 			(void)ffs_truncate(vp, osize,
913 			    IO_NORMAL | (ioflag & IO_SYNC), ap->a_cred);
914 			uio->uio_offset -= resid - uio->uio_resid;
915 			uio->uio_resid = resid;
916 		}
917 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
918 		error = ffs_update(vp, 1);
919 		if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error))
920 			error = ENXIO;
921 	}
922 	return (error);
923 }
924 
925 /*
926  * Extended attribute area reading.
927  */
928 static int
929 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
930 {
931 	struct inode *ip;
932 	struct ufs2_dinode *dp;
933 	struct fs *fs;
934 	struct buf *bp;
935 	ufs_lbn_t lbn, nextlbn;
936 	off_t bytesinfile;
937 	long size, xfersize, blkoffset;
938 	ssize_t orig_resid;
939 	int error;
940 
941 	ip = VTOI(vp);
942 	fs = ITOFS(ip);
943 	dp = ip->i_din2;
944 
945 #ifdef INVARIANTS
946 	if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
947 		panic("ffs_extread: mode");
948 
949 #endif
950 	orig_resid = uio->uio_resid;
951 	KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0"));
952 	if (orig_resid == 0)
953 		return (0);
954 	KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0"));
955 
956 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
957 		if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
958 			break;
959 		lbn = lblkno(fs, uio->uio_offset);
960 		nextlbn = lbn + 1;
961 
962 		/*
963 		 * size of buffer.  The buffer representing the
964 		 * end of the file is rounded up to the size of
965 		 * the block type ( fragment or full block,
966 		 * depending ).
967 		 */
968 		size = sblksize(fs, dp->di_extsize, lbn);
969 		blkoffset = blkoff(fs, uio->uio_offset);
970 
971 		/*
972 		 * The amount we want to transfer in this iteration is
973 		 * one FS block less the amount of the data before
974 		 * our startpoint (duh!)
975 		 */
976 		xfersize = fs->fs_bsize - blkoffset;
977 
978 		/*
979 		 * But if we actually want less than the block,
980 		 * or the file doesn't have a whole block more of data,
981 		 * then use the lesser number.
982 		 */
983 		if (uio->uio_resid < xfersize)
984 			xfersize = uio->uio_resid;
985 		if (bytesinfile < xfersize)
986 			xfersize = bytesinfile;
987 
988 		if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
989 			/*
990 			 * Don't do readahead if this is the end of the info.
991 			 */
992 			error = bread(vp, -1 - lbn, size, NOCRED, &bp);
993 		} else {
994 			/*
995 			 * If we have a second block, then
996 			 * fire off a request for a readahead
997 			 * as well as a read. Note that the 4th and 5th
998 			 * arguments point to arrays of the size specified in
999 			 * the 6th argument.
1000 			 */
1001 			u_int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
1002 
1003 			nextlbn = -1 - nextlbn;
1004 			error = breadn(vp, -1 - lbn,
1005 			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
1006 		}
1007 		if (error) {
1008 			brelse(bp);
1009 			bp = NULL;
1010 			break;
1011 		}
1012 
1013 		/*
1014 		 * We should only get non-zero b_resid when an I/O error
1015 		 * has occurred, which should cause us to break above.
1016 		 * However, if the short read did not cause an error,
1017 		 * then we want to ensure that we do not uiomove bad
1018 		 * or uninitialized data.
1019 		 */
1020 		size -= bp->b_resid;
1021 		if (size < xfersize) {
1022 			if (size == 0)
1023 				break;
1024 			xfersize = size;
1025 		}
1026 
1027 		error = uiomove((char *)bp->b_data + blkoffset,
1028 					(int)xfersize, uio);
1029 		if (error)
1030 			break;
1031 		vfs_bio_brelse(bp, ioflag);
1032 	}
1033 
1034 	/*
1035 	 * This can only happen in the case of an error
1036 	 * because the loop above resets bp to NULL on each iteration
1037 	 * and on normal completion has not set a new value into it.
1038 	 * so it must have come from a 'break' statement
1039 	 */
1040 	if (bp != NULL)
1041 		vfs_bio_brelse(bp, ioflag);
1042 	return (error);
1043 }
1044 
1045 /*
1046  * Extended attribute area writing.
1047  */
1048 static int
1049 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
1050 {
1051 	struct inode *ip;
1052 	struct ufs2_dinode *dp;
1053 	struct fs *fs;
1054 	struct buf *bp;
1055 	ufs_lbn_t lbn;
1056 	off_t osize;
1057 	ssize_t resid;
1058 	int blkoffset, error, flags, size, xfersize;
1059 
1060 	ip = VTOI(vp);
1061 	fs = ITOFS(ip);
1062 	dp = ip->i_din2;
1063 
1064 #ifdef INVARIANTS
1065 	if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
1066 		panic("ffs_extwrite: mode");
1067 #endif
1068 
1069 	if (ioflag & IO_APPEND)
1070 		uio->uio_offset = dp->di_extsize;
1071 	KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0"));
1072 	KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0"));
1073 	if ((uoff_t)uio->uio_offset + uio->uio_resid >
1074 	    UFS_NXADDR * fs->fs_bsize)
1075 		return (EFBIG);
1076 
1077 	resid = uio->uio_resid;
1078 	osize = dp->di_extsize;
1079 	flags = IO_EXT;
1080 	if (ioflag & IO_SYNC)
1081 		flags |= IO_SYNC;
1082 
1083 	for (error = 0; uio->uio_resid > 0;) {
1084 		lbn = lblkno(fs, uio->uio_offset);
1085 		blkoffset = blkoff(fs, uio->uio_offset);
1086 		xfersize = fs->fs_bsize - blkoffset;
1087 		if (uio->uio_resid < xfersize)
1088 			xfersize = uio->uio_resid;
1089 
1090 		/*
1091 		 * We must perform a read-before-write if the transfer size
1092 		 * does not cover the entire buffer.
1093 		 */
1094 		if (fs->fs_bsize > xfersize)
1095 			flags |= BA_CLRBUF;
1096 		else
1097 			flags &= ~BA_CLRBUF;
1098 		error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
1099 		    ucred, flags, &bp);
1100 		if (error != 0)
1101 			break;
1102 		/*
1103 		 * If the buffer is not valid we have to clear out any
1104 		 * garbage data from the pages instantiated for the buffer.
1105 		 * If we do not, a failed uiomove() during a write can leave
1106 		 * the prior contents of the pages exposed to a userland
1107 		 * mmap().  XXX deal with uiomove() errors a better way.
1108 		 */
1109 		if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
1110 			vfs_bio_clrbuf(bp);
1111 
1112 		if (uio->uio_offset + xfersize > dp->di_extsize) {
1113 			dp->di_extsize = uio->uio_offset + xfersize;
1114 			UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
1115 		}
1116 
1117 		size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
1118 		if (size < xfersize)
1119 			xfersize = size;
1120 
1121 		error =
1122 		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
1123 
1124 		vfs_bio_set_flags(bp, ioflag);
1125 
1126 		/*
1127 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
1128 		 * if we have a severe page deficiency write the buffer
1129 		 * asynchronously.  Otherwise try to cluster, and if that
1130 		 * doesn't do it then either do an async write (if O_DIRECT),
1131 		 * or a delayed write (if not).
1132 		 */
1133 		if (ioflag & IO_SYNC) {
1134 			(void)bwrite(bp);
1135 		} else if (vm_page_count_severe() ||
1136 			    buf_dirty_count_severe() ||
1137 			    xfersize + blkoffset == fs->fs_bsize ||
1138 			    (ioflag & (IO_ASYNC | IO_DIRECT)))
1139 			bawrite(bp);
1140 		else
1141 			bdwrite(bp);
1142 		if (error || xfersize == 0)
1143 			break;
1144 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
1145 	}
1146 	/*
1147 	 * If we successfully wrote any data, and we are not the superuser
1148 	 * we clear the setuid and setgid bits as a precaution against
1149 	 * tampering.
1150 	 */
1151 	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) {
1152 		if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID)) {
1153 			ip->i_mode &= ~(ISUID | ISGID);
1154 			dp->di_mode = ip->i_mode;
1155 		}
1156 	}
1157 	if (error) {
1158 		if (ioflag & IO_UNIT) {
1159 			(void)ffs_truncate(vp, osize,
1160 			    IO_EXT | (ioflag&IO_SYNC), ucred);
1161 			uio->uio_offset -= resid - uio->uio_resid;
1162 			uio->uio_resid = resid;
1163 		}
1164 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
1165 		error = ffs_update(vp, 1);
1166 	return (error);
1167 }
1168 
1169 
1170 /*
1171  * Vnode operating to retrieve a named extended attribute.
1172  *
1173  * Locate a particular EA (nspace:name) in the area (ptr:length), and return
1174  * the length of the EA, and possibly the pointer to the entry and to the data.
1175  */
1176 static int
1177 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name,
1178     struct extattr **eapp, u_char **eac)
1179 {
1180 	struct extattr *eap, *eaend;
1181 	size_t nlen;
1182 
1183 	nlen = strlen(name);
1184 	KASSERT(ALIGNED_TO(ptr, struct extattr), ("unaligned"));
1185 	eap = (struct extattr *)ptr;
1186 	eaend = (struct extattr *)(ptr + length);
1187 	for (; eap < eaend; eap = EXTATTR_NEXT(eap)) {
1188 		/* make sure this entry is complete */
1189 		if (EXTATTR_NEXT(eap) > eaend)
1190 			break;
1191 		if (eap->ea_namespace != nspace || eap->ea_namelength != nlen
1192 		    || memcmp(eap->ea_name, name, nlen) != 0)
1193 			continue;
1194 		if (eapp != NULL)
1195 			*eapp = eap;
1196 		if (eac != NULL)
1197 			*eac = EXTATTR_CONTENT(eap);
1198 		return (EXTATTR_CONTENT_SIZE(eap));
1199 	}
1200 	return (-1);
1201 }
1202 
1203 static int
1204 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra)
1205 {
1206 	struct inode *ip;
1207 	struct ufs2_dinode *dp;
1208 	struct fs *fs;
1209 	struct uio luio;
1210 	struct iovec liovec;
1211 	u_int easize;
1212 	int error;
1213 	u_char *eae;
1214 
1215 	ip = VTOI(vp);
1216 	fs = ITOFS(ip);
1217 	dp = ip->i_din2;
1218 	easize = dp->di_extsize;
1219 	if ((uoff_t)easize + extra > UFS_NXADDR * fs->fs_bsize)
1220 		return (EFBIG);
1221 
1222 	eae = malloc(easize + extra, M_TEMP, M_WAITOK);
1223 
1224 	liovec.iov_base = eae;
1225 	liovec.iov_len = easize;
1226 	luio.uio_iov = &liovec;
1227 	luio.uio_iovcnt = 1;
1228 	luio.uio_offset = 0;
1229 	luio.uio_resid = easize;
1230 	luio.uio_segflg = UIO_SYSSPACE;
1231 	luio.uio_rw = UIO_READ;
1232 	luio.uio_td = td;
1233 
1234 	error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC);
1235 	if (error) {
1236 		free(eae, M_TEMP);
1237 		return(error);
1238 	}
1239 	*p = eae;
1240 	return (0);
1241 }
1242 
1243 static void
1244 ffs_lock_ea(struct vnode *vp)
1245 {
1246 	struct inode *ip;
1247 
1248 	ip = VTOI(vp);
1249 	VI_LOCK(vp);
1250 	while (ip->i_flag & IN_EA_LOCKED) {
1251 		UFS_INODE_SET_FLAG(ip, IN_EA_LOCKWAIT);
1252 		msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD + 2, "ufs_ea",
1253 		    0);
1254 	}
1255 	UFS_INODE_SET_FLAG(ip, IN_EA_LOCKED);
1256 	VI_UNLOCK(vp);
1257 }
1258 
1259 static void
1260 ffs_unlock_ea(struct vnode *vp)
1261 {
1262 	struct inode *ip;
1263 
1264 	ip = VTOI(vp);
1265 	VI_LOCK(vp);
1266 	if (ip->i_flag & IN_EA_LOCKWAIT)
1267 		wakeup(&ip->i_ea_refs);
1268 	ip->i_flag &= ~(IN_EA_LOCKED | IN_EA_LOCKWAIT);
1269 	VI_UNLOCK(vp);
1270 }
1271 
1272 static int
1273 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td)
1274 {
1275 	struct inode *ip;
1276 	struct ufs2_dinode *dp;
1277 	int error;
1278 
1279 	ip = VTOI(vp);
1280 
1281 	ffs_lock_ea(vp);
1282 	if (ip->i_ea_area != NULL) {
1283 		ip->i_ea_refs++;
1284 		ffs_unlock_ea(vp);
1285 		return (0);
1286 	}
1287 	dp = ip->i_din2;
1288 	error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0);
1289 	if (error) {
1290 		ffs_unlock_ea(vp);
1291 		return (error);
1292 	}
1293 	ip->i_ea_len = dp->di_extsize;
1294 	ip->i_ea_error = 0;
1295 	ip->i_ea_refs++;
1296 	ffs_unlock_ea(vp);
1297 	return (0);
1298 }
1299 
1300 /*
1301  * Vnode extattr transaction commit/abort
1302  */
1303 static int
1304 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td)
1305 {
1306 	struct inode *ip;
1307 	struct uio luio;
1308 	struct iovec liovec;
1309 	int error;
1310 	struct ufs2_dinode *dp;
1311 
1312 	ip = VTOI(vp);
1313 
1314 	ffs_lock_ea(vp);
1315 	if (ip->i_ea_area == NULL) {
1316 		ffs_unlock_ea(vp);
1317 		return (EINVAL);
1318 	}
1319 	dp = ip->i_din2;
1320 	error = ip->i_ea_error;
1321 	if (commit && error == 0) {
1322 		ASSERT_VOP_ELOCKED(vp, "ffs_close_ea commit");
1323 		if (cred == NOCRED)
1324 			cred =  vp->v_mount->mnt_cred;
1325 		liovec.iov_base = ip->i_ea_area;
1326 		liovec.iov_len = ip->i_ea_len;
1327 		luio.uio_iov = &liovec;
1328 		luio.uio_iovcnt = 1;
1329 		luio.uio_offset = 0;
1330 		luio.uio_resid = ip->i_ea_len;
1331 		luio.uio_segflg = UIO_SYSSPACE;
1332 		luio.uio_rw = UIO_WRITE;
1333 		luio.uio_td = td;
1334 		/* XXX: I'm not happy about truncating to zero size */
1335 		if (ip->i_ea_len < dp->di_extsize)
1336 			error = ffs_truncate(vp, 0, IO_EXT, cred);
1337 		error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
1338 	}
1339 	if (--ip->i_ea_refs == 0) {
1340 		free(ip->i_ea_area, M_TEMP);
1341 		ip->i_ea_area = NULL;
1342 		ip->i_ea_len = 0;
1343 		ip->i_ea_error = 0;
1344 	}
1345 	ffs_unlock_ea(vp);
1346 	return (error);
1347 }
1348 
1349 /*
1350  * Vnode extattr strategy routine for fifos.
1351  *
1352  * We need to check for a read or write of the external attributes.
1353  * Otherwise we just fall through and do the usual thing.
1354  */
1355 static int
1356 ffsext_strategy(struct vop_strategy_args *ap)
1357 /*
1358 struct vop_strategy_args {
1359 	struct vnodeop_desc *a_desc;
1360 	struct vnode *a_vp;
1361 	struct buf *a_bp;
1362 };
1363 */
1364 {
1365 	struct vnode *vp;
1366 	daddr_t lbn;
1367 
1368 	vp = ap->a_vp;
1369 	lbn = ap->a_bp->b_lblkno;
1370 	if (I_IS_UFS2(VTOI(vp)) && lbn < 0 && lbn >= -UFS_NXADDR)
1371 		return (VOP_STRATEGY_APV(&ufs_vnodeops, ap));
1372 	if (vp->v_type == VFIFO)
1373 		return (VOP_STRATEGY_APV(&ufs_fifoops, ap));
1374 	panic("spec nodes went here");
1375 }
1376 
1377 /*
1378  * Vnode extattr transaction commit/abort
1379  */
1380 static int
1381 ffs_openextattr(struct vop_openextattr_args *ap)
1382 /*
1383 struct vop_openextattr_args {
1384 	struct vnodeop_desc *a_desc;
1385 	struct vnode *a_vp;
1386 	IN struct ucred *a_cred;
1387 	IN struct thread *a_td;
1388 };
1389 */
1390 {
1391 
1392 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1393 		return (EOPNOTSUPP);
1394 
1395 	return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td));
1396 }
1397 
1398 
1399 /*
1400  * Vnode extattr transaction commit/abort
1401  */
1402 static int
1403 ffs_closeextattr(struct vop_closeextattr_args *ap)
1404 /*
1405 struct vop_closeextattr_args {
1406 	struct vnodeop_desc *a_desc;
1407 	struct vnode *a_vp;
1408 	int a_commit;
1409 	IN struct ucred *a_cred;
1410 	IN struct thread *a_td;
1411 };
1412 */
1413 {
1414 
1415 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1416 		return (EOPNOTSUPP);
1417 
1418 	if (ap->a_commit && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY))
1419 		return (EROFS);
1420 
1421 	return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td));
1422 }
1423 
1424 /*
1425  * Vnode operation to remove a named attribute.
1426  */
1427 static int
1428 ffs_deleteextattr(struct vop_deleteextattr_args *ap)
1429 /*
1430 vop_deleteextattr {
1431 	IN struct vnode *a_vp;
1432 	IN int a_attrnamespace;
1433 	IN const char *a_name;
1434 	IN struct ucred *a_cred;
1435 	IN struct thread *a_td;
1436 };
1437 */
1438 {
1439 	struct inode *ip;
1440 	struct extattr *eap;
1441 	uint32_t ul;
1442 	int olen, error, i, easize;
1443 	u_char *eae;
1444 	void *tmp;
1445 
1446 	ip = VTOI(ap->a_vp);
1447 
1448 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1449 		return (EOPNOTSUPP);
1450 
1451 	if (strlen(ap->a_name) == 0)
1452 		return (EINVAL);
1453 
1454 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1455 		return (EROFS);
1456 
1457 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1458 	    ap->a_cred, ap->a_td, VWRITE);
1459 	if (error) {
1460 
1461 		/*
1462 		 * ffs_lock_ea is not needed there, because the vnode
1463 		 * must be exclusively locked.
1464 		 */
1465 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1466 			ip->i_ea_error = error;
1467 		return (error);
1468 	}
1469 
1470 	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1471 	if (error)
1472 		return (error);
1473 
1474 	/* CEM: delete could be done in-place instead */
1475 	eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK);
1476 	bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1477 	easize = ip->i_ea_len;
1478 
1479 	olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1480 	    &eap, NULL);
1481 	if (olen == -1) {
1482 		/* delete but nonexistent */
1483 		free(eae, M_TEMP);
1484 		ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1485 		return (ENOATTR);
1486 	}
1487 	ul = eap->ea_length;
1488 	i = (u_char *)EXTATTR_NEXT(eap) - eae;
1489 	bcopy(EXTATTR_NEXT(eap), eap, easize - i);
1490 	easize -= ul;
1491 
1492 	tmp = ip->i_ea_area;
1493 	ip->i_ea_area = eae;
1494 	ip->i_ea_len = easize;
1495 	free(tmp, M_TEMP);
1496 	error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1497 	return (error);
1498 }
1499 
1500 /*
1501  * Vnode operation to retrieve a named extended attribute.
1502  */
1503 static int
1504 ffs_getextattr(struct vop_getextattr_args *ap)
1505 /*
1506 vop_getextattr {
1507 	IN struct vnode *a_vp;
1508 	IN int a_attrnamespace;
1509 	IN const char *a_name;
1510 	INOUT struct uio *a_uio;
1511 	OUT size_t *a_size;
1512 	IN struct ucred *a_cred;
1513 	IN struct thread *a_td;
1514 };
1515 */
1516 {
1517 	struct inode *ip;
1518 	u_char *eae, *p;
1519 	unsigned easize;
1520 	int error, ealen;
1521 
1522 	ip = VTOI(ap->a_vp);
1523 
1524 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1525 		return (EOPNOTSUPP);
1526 
1527 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1528 	    ap->a_cred, ap->a_td, VREAD);
1529 	if (error)
1530 		return (error);
1531 
1532 	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1533 	if (error)
1534 		return (error);
1535 
1536 	eae = ip->i_ea_area;
1537 	easize = ip->i_ea_len;
1538 
1539 	ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1540 	    NULL, &p);
1541 	if (ealen >= 0) {
1542 		error = 0;
1543 		if (ap->a_size != NULL)
1544 			*ap->a_size = ealen;
1545 		else if (ap->a_uio != NULL)
1546 			error = uiomove(p, ealen, ap->a_uio);
1547 	} else
1548 		error = ENOATTR;
1549 
1550 	ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1551 	return (error);
1552 }
1553 
1554 /*
1555  * Vnode operation to retrieve extended attributes on a vnode.
1556  */
1557 static int
1558 ffs_listextattr(struct vop_listextattr_args *ap)
1559 /*
1560 vop_listextattr {
1561 	IN struct vnode *a_vp;
1562 	IN int a_attrnamespace;
1563 	INOUT struct uio *a_uio;
1564 	OUT size_t *a_size;
1565 	IN struct ucred *a_cred;
1566 	IN struct thread *a_td;
1567 };
1568 */
1569 {
1570 	struct inode *ip;
1571 	struct extattr *eap, *eaend;
1572 	int error, ealen;
1573 
1574 	ip = VTOI(ap->a_vp);
1575 
1576 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1577 		return (EOPNOTSUPP);
1578 
1579 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1580 	    ap->a_cred, ap->a_td, VREAD);
1581 	if (error)
1582 		return (error);
1583 
1584 	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1585 	if (error)
1586 		return (error);
1587 
1588 	error = 0;
1589 	if (ap->a_size != NULL)
1590 		*ap->a_size = 0;
1591 
1592 	KASSERT(ALIGNED_TO(ip->i_ea_area, struct extattr), ("unaligned"));
1593 	eap = (struct extattr *)ip->i_ea_area;
1594 	eaend = (struct extattr *)(ip->i_ea_area + ip->i_ea_len);
1595 	for (; error == 0 && eap < eaend; eap = EXTATTR_NEXT(eap)) {
1596 		/* make sure this entry is complete */
1597 		if (EXTATTR_NEXT(eap) > eaend)
1598 			break;
1599 		if (eap->ea_namespace != ap->a_attrnamespace)
1600 			continue;
1601 
1602 		ealen = eap->ea_namelength;
1603 		if (ap->a_size != NULL)
1604 			*ap->a_size += ealen + 1;
1605 		else if (ap->a_uio != NULL)
1606 			error = uiomove(&eap->ea_namelength, ealen + 1,
1607 			    ap->a_uio);
1608 	}
1609 
1610 	ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1611 	return (error);
1612 }
1613 
1614 /*
1615  * Vnode operation to set a named attribute.
1616  */
1617 static int
1618 ffs_setextattr(struct vop_setextattr_args *ap)
1619 /*
1620 vop_setextattr {
1621 	IN struct vnode *a_vp;
1622 	IN int a_attrnamespace;
1623 	IN const char *a_name;
1624 	INOUT struct uio *a_uio;
1625 	IN struct ucred *a_cred;
1626 	IN struct thread *a_td;
1627 };
1628 */
1629 {
1630 	struct inode *ip;
1631 	struct fs *fs;
1632 	struct extattr *eap;
1633 	uint32_t ealength, ul;
1634 	ssize_t ealen;
1635 	int olen, eapad1, eapad2, error, i, easize;
1636 	u_char *eae;
1637 	void *tmp;
1638 
1639 	ip = VTOI(ap->a_vp);
1640 	fs = ITOFS(ip);
1641 
1642 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1643 		return (EOPNOTSUPP);
1644 
1645 	if (strlen(ap->a_name) == 0)
1646 		return (EINVAL);
1647 
1648 	/* XXX Now unsupported API to delete EAs using NULL uio. */
1649 	if (ap->a_uio == NULL)
1650 		return (EOPNOTSUPP);
1651 
1652 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1653 		return (EROFS);
1654 
1655 	ealen = ap->a_uio->uio_resid;
1656 	if (ealen < 0 || ealen > lblktosize(fs, UFS_NXADDR))
1657 		return (EINVAL);
1658 
1659 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1660 	    ap->a_cred, ap->a_td, VWRITE);
1661 	if (error) {
1662 
1663 		/*
1664 		 * ffs_lock_ea is not needed there, because the vnode
1665 		 * must be exclusively locked.
1666 		 */
1667 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1668 			ip->i_ea_error = error;
1669 		return (error);
1670 	}
1671 
1672 	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1673 	if (error)
1674 		return (error);
1675 
1676 	ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name);
1677 	eapad1 = roundup2(ealength, 8) - ealength;
1678 	eapad2 = roundup2(ealen, 8) - ealen;
1679 	ealength += eapad1 + ealen + eapad2;
1680 
1681 	/*
1682 	 * CEM: rewrites of the same size or smaller could be done in-place
1683 	 * instead.  (We don't acquire any fine-grained locks in here either,
1684 	 * so we could also do bigger writes in-place.)
1685 	 */
1686 	eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK);
1687 	bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1688 	easize = ip->i_ea_len;
1689 
1690 	olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1691 	    &eap, NULL);
1692         if (olen == -1) {
1693 		/* new, append at end */
1694 		KASSERT(ALIGNED_TO(eae + easize, struct extattr),
1695 		    ("unaligned"));
1696 		eap = (struct extattr *)(eae + easize);
1697 		easize += ealength;
1698 	} else {
1699 		ul = eap->ea_length;
1700 		i = (u_char *)EXTATTR_NEXT(eap) - eae;
1701 		if (ul != ealength) {
1702 			bcopy(EXTATTR_NEXT(eap), (u_char *)eap + ealength,
1703 			    easize - i);
1704 			easize += (ealength - ul);
1705 		}
1706 	}
1707 	if (easize > lblktosize(fs, UFS_NXADDR)) {
1708 		free(eae, M_TEMP);
1709 		ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1710 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1711 			ip->i_ea_error = ENOSPC;
1712 		return (ENOSPC);
1713 	}
1714 	eap->ea_length = ealength;
1715 	eap->ea_namespace = ap->a_attrnamespace;
1716 	eap->ea_contentpadlen = eapad2;
1717 	eap->ea_namelength = strlen(ap->a_name);
1718 	memcpy(eap->ea_name, ap->a_name, strlen(ap->a_name));
1719 	bzero(&eap->ea_name[strlen(ap->a_name)], eapad1);
1720 	error = uiomove(EXTATTR_CONTENT(eap), ealen, ap->a_uio);
1721 	if (error) {
1722 		free(eae, M_TEMP);
1723 		ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1724 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1725 			ip->i_ea_error = error;
1726 		return (error);
1727 	}
1728 	bzero((u_char *)EXTATTR_CONTENT(eap) + ealen, eapad2);
1729 
1730 	tmp = ip->i_ea_area;
1731 	ip->i_ea_area = eae;
1732 	ip->i_ea_len = easize;
1733 	free(tmp, M_TEMP);
1734 	error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1735 	return (error);
1736 }
1737 
1738 /*
1739  * Vnode pointer to File handle
1740  */
1741 static int
1742 ffs_vptofh(struct vop_vptofh_args *ap)
1743 /*
1744 vop_vptofh {
1745 	IN struct vnode *a_vp;
1746 	IN struct fid *a_fhp;
1747 };
1748 */
1749 {
1750 	struct inode *ip;
1751 	struct ufid *ufhp;
1752 
1753 	ip = VTOI(ap->a_vp);
1754 	ufhp = (struct ufid *)ap->a_fhp;
1755 	ufhp->ufid_len = sizeof(struct ufid);
1756 	ufhp->ufid_ino = ip->i_number;
1757 	ufhp->ufid_gen = ip->i_gen;
1758 	return (0);
1759 }
1760 
1761 SYSCTL_DECL(_vfs_ffs);
1762 static int use_buf_pager = 1;
1763 SYSCTL_INT(_vfs_ffs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN, &use_buf_pager, 0,
1764     "Always use buffer pager instead of bmap");
1765 
1766 static daddr_t
1767 ffs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
1768 {
1769 
1770 	return (lblkno(VFSTOUFS(vp->v_mount)->um_fs, off));
1771 }
1772 
1773 static int
1774 ffs_gbp_getblksz(struct vnode *vp, daddr_t lbn)
1775 {
1776 
1777 	return (blksize(VFSTOUFS(vp->v_mount)->um_fs, VTOI(vp), lbn));
1778 }
1779 
1780 static int
1781 ffs_getpages(struct vop_getpages_args *ap)
1782 {
1783 	struct vnode *vp;
1784 	struct ufsmount *um;
1785 
1786 	vp = ap->a_vp;
1787 	um = VFSTOUFS(vp->v_mount);
1788 
1789 	if (!use_buf_pager && um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE)
1790 		return (vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
1791 		    ap->a_rbehind, ap->a_rahead, NULL, NULL));
1792 	return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
1793 	    ap->a_rahead, ffs_gbp_getblkno, ffs_gbp_getblksz));
1794 }
1795 
1796 static int
1797 ffs_getpages_async(struct vop_getpages_async_args *ap)
1798 {
1799 	struct vnode *vp;
1800 	struct ufsmount *um;
1801 	bool do_iodone;
1802 	int error;
1803 
1804 	vp = ap->a_vp;
1805 	um = VFSTOUFS(vp->v_mount);
1806 	do_iodone = true;
1807 
1808 	if (um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE) {
1809 		error = vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
1810 		    ap->a_rbehind, ap->a_rahead, ap->a_iodone, ap->a_arg);
1811 		if (error == 0)
1812 			do_iodone = false;
1813 	} else {
1814 		error = vfs_bio_getpages(vp, ap->a_m, ap->a_count,
1815 		    ap->a_rbehind, ap->a_rahead, ffs_gbp_getblkno,
1816 		    ffs_gbp_getblksz);
1817 	}
1818 	if (do_iodone && ap->a_iodone != NULL)
1819 		ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error);
1820 
1821 	return (error);
1822 }
1823 
1824