1 /*	$NetBSD: ufs_readwrite.c,v 1.120 2015/04/12 22:48:38 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)ufs_readwrite.c	8.11 (Berkeley) 5/8/95
32  */
33 
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.120 2015/04/12 22:48:38 riastradh Exp $");
36 
37 #ifdef LFS_READWRITE
38 #define	FS			struct lfs
39 #define	I_FS			i_lfs
40 #define	READ			lfs_read
41 #define	READ_S			"lfs_read"
42 #define	WRITE			lfs_write
43 #define	WRITE_S			"lfs_write"
44 #define	BUFRD			lfs_bufrd
45 #define	BUFWR			lfs_bufwr
46 #define	fs_bsize		lfs_bsize
47 #define	fs_bmask		lfs_bmask
48 #define	UFS_WAPBL_BEGIN(mp)	0
49 #define	UFS_WAPBL_END(mp)	do { } while (0)
50 #define	UFS_WAPBL_UPDATE(vp, access, modify, flags)	do { } while (0)
51 #define ufs_blkoff		lfs_blkoff
52 #define ufs_blksize		lfs_blksize
53 #define ufs_lblkno		lfs_lblkno
54 #define ufs_lblktosize		lfs_lblktosize
55 #define ufs_blkroundup		lfs_blkroundup
56 #else
57 #define	FS			struct fs
58 #define	I_FS			i_fs
59 #define	READ			ffs_read
60 #define	READ_S			"ffs_read"
61 #define	WRITE			ffs_write
62 #define	WRITE_S			"ffs_write"
63 #define	BUFRD			ffs_bufrd
64 #define	BUFWR			ffs_bufwr
65 #define ufs_blkoff		ffs_blkoff
66 #define ufs_blksize		ffs_blksize
67 #define ufs_lblkno		ffs_lblkno
68 #define ufs_lblktosize		ffs_lblktosize
69 #define ufs_blkroundup		ffs_blkroundup
70 #endif
71 
72 static int	ufs_post_read_update(struct vnode *, int, int);
73 static int	ufs_post_write_update(struct vnode *, struct uio *, int,
74 		    kauth_cred_t, off_t, int, int, int);
75 
76 /*
77  * Vnode op for reading.
78  */
79 /* ARGSUSED */
80 int
READ(void * v)81 READ(void *v)
82 {
83 	struct vop_read_args /* {
84 		struct vnode *a_vp;
85 		struct uio *a_uio;
86 		int a_ioflag;
87 		kauth_cred_t a_cred;
88 	} */ *ap = v;
89 	struct vnode *vp;
90 	struct inode *ip;
91 	struct uio *uio;
92 	struct ufsmount *ump;
93 	vsize_t bytelen;
94 	int error, ioflag, advice;
95 
96 	vp = ap->a_vp;
97 	ip = VTOI(vp);
98 	ump = ip->i_ump;
99 	uio = ap->a_uio;
100 	ioflag = ap->a_ioflag;
101 	error = 0;
102 
103 	KASSERT(uio->uio_rw == UIO_READ);
104 	KASSERT(vp->v_type == VREG || vp->v_type == VDIR);
105 
106 	/* XXX Eliminate me by refusing directory reads from userland.  */
107 	if (vp->v_type == VDIR)
108 		return BUFRD(vp, uio, ioflag, ap->a_cred);
109 #ifdef LFS_READWRITE
110 	/* XXX Eliminate me by using ufs_bufio in lfs.  */
111 	if (vp->v_type == VREG && ip->i_number == LFS_IFILE_INUM)
112 		return BUFRD(vp, uio, ioflag, ap->a_cred);
113 #endif
114 	if ((u_int64_t)uio->uio_offset > ump->um_maxfilesize)
115 		return (EFBIG);
116 	if (uio->uio_resid == 0)
117 		return (0);
118 
119 #ifndef LFS_READWRITE
120 	if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT)
121 		return ffs_snapshot_read(vp, uio, ioflag);
122 #endif /* !LFS_READWRITE */
123 
124 	fstrans_start(vp->v_mount, FSTRANS_SHARED);
125 
126 	if (uio->uio_offset >= ip->i_size)
127 		goto out;
128 
129 	KASSERT(vp->v_type == VREG);
130 	advice = IO_ADV_DECODE(ap->a_ioflag);
131 	while (uio->uio_resid > 0) {
132 		if (ioflag & IO_DIRECT) {
133 			genfs_directio(vp, uio, ioflag);
134 		}
135 		bytelen = MIN(ip->i_size - uio->uio_offset, uio->uio_resid);
136 		if (bytelen == 0)
137 			break;
138 		error = ubc_uiomove(&vp->v_uobj, uio, bytelen, advice,
139 		    UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
140 		if (error)
141 			break;
142 	}
143 
144  out:
145 	error = ufs_post_read_update(vp, ap->a_ioflag, error);
146 	fstrans_done(vp->v_mount);
147 	return (error);
148 }
149 
150 /*
151  * UFS op for reading via the buffer cache
152  */
153 int
BUFRD(struct vnode * vp,struct uio * uio,int ioflag,kauth_cred_t cred)154 BUFRD(struct vnode *vp, struct uio *uio, int ioflag, kauth_cred_t cred)
155 {
156 	struct inode *ip;
157 	struct ufsmount *ump;
158 	FS *fs;
159 	struct buf *bp;
160 	daddr_t lbn, nextlbn;
161 	off_t bytesinfile;
162 	long size, xfersize, blkoffset;
163 	int error;
164 
165 	KASSERT(VOP_ISLOCKED(vp));
166 	KASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
167 	KASSERT(uio->uio_rw == UIO_READ);
168 
169 	ip = VTOI(vp);
170 	ump = ip->i_ump;
171 	fs = ip->I_FS;
172 	error = 0;
173 
174 	KASSERT(vp->v_type != VLNK || ip->i_size >= ump->um_maxsymlinklen);
175 	KASSERT(vp->v_type != VLNK || ump->um_maxsymlinklen != 0 ||
176 	    DIP(ip, blocks) == 0);
177 
178 	if (uio->uio_offset > ump->um_maxfilesize)
179 		return EFBIG;
180 	if (uio->uio_resid == 0)
181 		return 0;
182 
183 #ifndef LFS_READWRITE
184 	KASSERT(!ISSET(ip->i_flags, (SF_SNAPSHOT | SF_SNAPINVAL)));
185 #endif
186 
187 	fstrans_start(vp->v_mount, FSTRANS_SHARED);
188 
189 	if (uio->uio_offset >= ip->i_size)
190 		goto out;
191 
192 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
193 		bytesinfile = ip->i_size - uio->uio_offset;
194 		if (bytesinfile <= 0)
195 			break;
196 		lbn = ufs_lblkno(fs, uio->uio_offset);
197 		nextlbn = lbn + 1;
198 		size = ufs_blksize(fs, ip, lbn);
199 		blkoffset = ufs_blkoff(fs, uio->uio_offset);
200 		xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid),
201 		    bytesinfile);
202 
203 		if (ufs_lblktosize(fs, nextlbn) >= ip->i_size)
204 			error = bread(vp, lbn, size, 0, &bp);
205 		else {
206 			int nextsize = ufs_blksize(fs, ip, nextlbn);
207 			error = breadn(vp, lbn,
208 			    size, &nextlbn, &nextsize, 1, 0, &bp);
209 		}
210 		if (error)
211 			break;
212 
213 		/*
214 		 * We should only get non-zero b_resid when an I/O error
215 		 * has occurred, which should cause us to break above.
216 		 * However, if the short read did not cause an error,
217 		 * then we want to ensure that we do not uiomove bad
218 		 * or uninitialized data.
219 		 */
220 		size -= bp->b_resid;
221 		if (size < xfersize) {
222 			if (size == 0)
223 				break;
224 			xfersize = size;
225 		}
226 		error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
227 		if (error)
228 			break;
229 		brelse(bp, 0);
230 	}
231 	if (bp != NULL)
232 		brelse(bp, 0);
233 
234  out:
235 	error = ufs_post_read_update(vp, ioflag, error);
236 	fstrans_done(vp->v_mount);
237 	return (error);
238 }
239 
240 static int
ufs_post_read_update(struct vnode * vp,int ioflag,int oerror)241 ufs_post_read_update(struct vnode *vp, int ioflag, int oerror)
242 {
243 	struct inode *ip = VTOI(vp);
244 	int error = oerror;
245 
246 	if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
247 		ip->i_flag |= IN_ACCESS;
248 		if ((ioflag & IO_SYNC) == IO_SYNC) {
249 			error = UFS_WAPBL_BEGIN(vp->v_mount);
250 			if (error)
251 				goto out;
252 			error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
253 			UFS_WAPBL_END(vp->v_mount);
254 		}
255 	}
256 
257 out:
258 	/* Read error overrides any inode update error.  */
259 	if (oerror)
260 		error = oerror;
261 	return error;
262 }
263 
264 /*
265  * Vnode op for writing.
266  */
267 int
WRITE(void * v)268 WRITE(void *v)
269 {
270 	struct vop_write_args /* {
271 		struct vnode *a_vp;
272 		struct uio *a_uio;
273 		int a_ioflag;
274 		kauth_cred_t a_cred;
275 	} */ *ap = v;
276 	struct vnode *vp;
277 	struct uio *uio;
278 	struct inode *ip;
279 	FS *fs;
280 	kauth_cred_t cred;
281 	off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize;
282 	int blkoffset, error, flags, ioflag, resid;
283 	int aflag;
284 	int extended=0;
285 	vsize_t bytelen;
286 	bool async;
287 	struct ufsmount *ump;
288 
289 	cred = ap->a_cred;
290 	ioflag = ap->a_ioflag;
291 	uio = ap->a_uio;
292 	vp = ap->a_vp;
293 	ip = VTOI(vp);
294 	ump = ip->i_ump;
295 
296 	KASSERT(vp->v_size == ip->i_size);
297 	KASSERT(uio->uio_rw == UIO_WRITE);
298 	KASSERT(vp->v_type == VREG);
299 	KASSERT(!ISSET(ioflag, IO_JOURNALLOCKED));
300 	UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount);
301 
302 	if (ioflag & IO_APPEND)
303 		uio->uio_offset = ip->i_size;
304 	if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
305 		return (EPERM);
306 
307 	fs = ip->I_FS;
308 	if (uio->uio_offset < 0 ||
309 	    (u_int64_t)uio->uio_offset + uio->uio_resid > ump->um_maxfilesize)
310 		return (EFBIG);
311 #ifdef LFS_READWRITE
312 	/* Disallow writes to the Ifile, even if noschg flag is removed */
313 	/* XXX can this go away when the Ifile is no longer in the namespace? */
314 	if (vp == fs->lfs_ivnode)
315 		return (EPERM);
316 #endif
317 	if (uio->uio_resid == 0)
318 		return (0);
319 
320 	fstrans_start(vp->v_mount, FSTRANS_SHARED);
321 
322 	flags = ioflag & IO_SYNC ? B_SYNC : 0;
323 	async = vp->v_mount->mnt_flag & MNT_ASYNC;
324 	origoff = uio->uio_offset;
325 	resid = uio->uio_resid;
326 	osize = ip->i_size;
327 	error = 0;
328 
329 	KASSERT(vp->v_type == VREG);
330 
331 	/*
332 	 * XXX The entire write operation must occur in a single WAPBL
333 	 * transaction because it may allocate disk blocks, if
334 	 * appending or filling holes, which is allowed to happen only
335 	 * if the write fully succeeds.
336 	 *
337 	 * If ubc_uiomove fails in the middle with EFAULT, we can clean
338 	 * up at the end with UFS_TRUNCATE.  But if the power fails in
339 	 * the middle, there would be nobody to deallocate the blocks,
340 	 * without an fsck to globally analyze the file system.
341 	 *
342 	 * If the increasingly inaccurately named WAPBL were augmented
343 	 * with rollback records for block allocations, then we could
344 	 * split this into multiple transactions and commit the
345 	 * allocations in the last one.
346 	 *
347 	 * But WAPBL doesn't have that notion now, so we'll have to
348 	 * live with gigantic transactions and WAPBL tentacles in
349 	 * genfs_getpages/putpages to cope with the possibility that
350 	 * the transaction may or may not be locked on entry to the
351 	 * page cache.
352 	 *
353 	 * And even if we added that notion to WAPBL, it wouldn't help
354 	 * us get rid of the tentacles in genfs_getpages/putpages
355 	 * because we'd have to interoperate with old implementations
356 	 * that assume they can replay the log without fsck.
357 	 */
358 	error = UFS_WAPBL_BEGIN(vp->v_mount);
359 	if (error) {
360 		fstrans_done(vp->v_mount);
361 		return error;
362 	}
363 
364 #ifdef LFS_READWRITE
365 	async = true;
366 	lfs_availwait(fs, btofsb(fs, uio->uio_resid));
367 	lfs_check(vp, LFS_UNUSED_LBN, 0);
368 #endif /* !LFS_READWRITE */
369 
370 	preallocoff = round_page(ufs_blkroundup(fs, MAX(osize, uio->uio_offset)));
371 	aflag = ioflag & IO_SYNC ? B_SYNC : 0;
372 	nsize = MAX(osize, uio->uio_offset + uio->uio_resid);
373 	endallocoff = nsize - ufs_blkoff(fs, nsize);
374 
375 	/*
376 	 * if we're increasing the file size, deal with expanding
377 	 * the fragment if there is one.
378 	 */
379 
380 	if (nsize > osize && ufs_lblkno(fs, osize) < UFS_NDADDR &&
381 	    ufs_lblkno(fs, osize) != ufs_lblkno(fs, nsize) &&
382 	    ufs_blkroundup(fs, osize) != osize) {
383 		off_t eob;
384 
385 		eob = ufs_blkroundup(fs, osize);
386 		uvm_vnp_setwritesize(vp, eob);
387 		error = ufs_balloc_range(vp, osize, eob - osize, cred, aflag);
388 		if (error)
389 			goto out;
390 		if (flags & B_SYNC) {
391 			mutex_enter(vp->v_interlock);
392 			VOP_PUTPAGES(vp, trunc_page(osize & fs->fs_bmask),
393 			    round_page(eob),
394 			    PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED);
395 		}
396 	}
397 
398 	while (uio->uio_resid > 0) {
399 		int ubc_flags = UBC_WRITE;
400 		bool overwrite; /* if we're overwrite a whole block */
401 		off_t newoff;
402 
403 		if (ioflag & IO_DIRECT) {
404 			genfs_directio(vp, uio, ioflag | IO_JOURNALLOCKED);
405 		}
406 
407 		oldoff = uio->uio_offset;
408 		blkoffset = ufs_blkoff(fs, uio->uio_offset);
409 		bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
410 		if (bytelen == 0) {
411 			break;
412 		}
413 
414 		/*
415 		 * if we're filling in a hole, allocate the blocks now and
416 		 * initialize the pages first.  if we're extending the file,
417 		 * we can safely allocate blocks without initializing pages
418 		 * since the new blocks will be inaccessible until the write
419 		 * is complete.
420 		 */
421 		overwrite = uio->uio_offset >= preallocoff &&
422 		    uio->uio_offset < endallocoff;
423 		if (!overwrite && (vp->v_vflag & VV_MAPPED) == 0 &&
424 		    ufs_blkoff(fs, uio->uio_offset) == 0 &&
425 		    (uio->uio_offset & PAGE_MASK) == 0) {
426 			vsize_t len;
427 
428 			len = trunc_page(bytelen);
429 			len -= ufs_blkoff(fs, len);
430 			if (len > 0) {
431 				overwrite = true;
432 				bytelen = len;
433 			}
434 		}
435 
436 		newoff = oldoff + bytelen;
437 		if (vp->v_size < newoff) {
438 			uvm_vnp_setwritesize(vp, newoff);
439 		}
440 
441 		if (!overwrite) {
442 			error = ufs_balloc_range(vp, uio->uio_offset, bytelen,
443 			    cred, aflag);
444 			if (error)
445 				break;
446 		} else {
447 			genfs_node_wrlock(vp);
448 			error = GOP_ALLOC(vp, uio->uio_offset, bytelen,
449 			    aflag, cred);
450 			genfs_node_unlock(vp);
451 			if (error)
452 				break;
453 			ubc_flags |= UBC_FAULTBUSY;
454 		}
455 
456 		/*
457 		 * copy the data.
458 		 */
459 
460 		error = ubc_uiomove(&vp->v_uobj, uio, bytelen,
461 		    IO_ADV_DECODE(ioflag), ubc_flags | UBC_UNMAP_FLAG(vp));
462 
463 		/*
464 		 * update UVM's notion of the size now that we've
465 		 * copied the data into the vnode's pages.
466 		 *
467 		 * we should update the size even when uiomove failed.
468 		 */
469 
470 		if (vp->v_size < newoff) {
471 			uvm_vnp_setsize(vp, newoff);
472 			extended = 1;
473 		}
474 
475 		if (error)
476 			break;
477 
478 		/*
479 		 * flush what we just wrote if necessary.
480 		 * XXXUBC simplistic async flushing.
481 		 */
482 
483 #ifndef LFS_READWRITE
484 		if (!async && oldoff >> 16 != uio->uio_offset >> 16) {
485 			mutex_enter(vp->v_interlock);
486 			error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16,
487 			    (uio->uio_offset >> 16) << 16,
488 			    PGO_CLEANIT | PGO_JOURNALLOCKED | PGO_LAZY);
489 			if (error)
490 				break;
491 		}
492 #endif
493 	}
494 	if (error == 0 && ioflag & IO_SYNC) {
495 		mutex_enter(vp->v_interlock);
496 		error = VOP_PUTPAGES(vp, trunc_page(origoff & fs->fs_bmask),
497 		    round_page(ufs_blkroundup(fs, uio->uio_offset)),
498 		    PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED);
499 	}
500 
501 out:
502 	error = ufs_post_write_update(vp, uio, ioflag, cred, osize, resid,
503 	    extended, error);
504 	UFS_WAPBL_END(vp->v_mount);
505 	fstrans_done(vp->v_mount);
506 
507 	return (error);
508 }
509 
510 /*
511  * UFS op for writing via the buffer cache
512  */
513 int
BUFWR(struct vnode * vp,struct uio * uio,int ioflag,kauth_cred_t cred)514 BUFWR(struct vnode *vp, struct uio *uio, int ioflag, kauth_cred_t cred)
515 {
516 	struct inode *ip;
517 	struct ufsmount *ump;
518 	FS *fs;
519 	int flags;
520 	struct buf *bp;
521 	off_t osize;
522 	int resid, xfersize, size, blkoffset;
523 	daddr_t lbn;
524 	int extended=0;
525 	int error;
526 #ifdef LFS_READWRITE
527 	bool need_unreserve = false;
528 #endif
529 
530 	KASSERT(ISSET(ioflag, IO_NODELOCKED));
531 	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
532 	KASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
533 	KASSERT(vp->v_type != VDIR || ISSET(ioflag, IO_SYNC));
534 	KASSERT(uio->uio_rw == UIO_WRITE);
535 	KASSERT(ISSET(ioflag, IO_JOURNALLOCKED));
536 	UFS_WAPBL_JLOCK_ASSERT(vp->v_mount);
537 
538 	ip = VTOI(vp);
539 	ump = ip->i_ump;
540 	fs = ip->I_FS;
541 
542 	KASSERT(vp->v_size == ip->i_size);
543 
544 	if (uio->uio_offset < 0 ||
545 	    uio->uio_resid > ump->um_maxfilesize ||
546 	    uio->uio_offset > (ump->um_maxfilesize - uio->uio_resid))
547 		return EFBIG;
548 #ifdef LFS_READWRITE
549 	KASSERT(vp != fs->lfs_ivnode);
550 #endif
551 	if (uio->uio_resid == 0)
552 		return 0;
553 
554 	fstrans_start(vp->v_mount, FSTRANS_SHARED);
555 
556 	flags = ioflag & IO_SYNC ? B_SYNC : 0;
557 	resid = uio->uio_resid;
558 	osize = ip->i_size;
559 	error = 0;
560 
561 	KASSERT(vp->v_type != VREG);
562 
563 #ifdef LFS_READWRITE
564 	lfs_availwait(fs, btofsb(fs, uio->uio_resid));
565 	lfs_check(vp, LFS_UNUSED_LBN, 0);
566 #endif /* !LFS_READWRITE */
567 
568 	/* XXX Should never have pages cached here.  */
569 	KASSERT(vp->v_uobj.uo_npages == 0);
570 	while (uio->uio_resid > 0) {
571 		lbn = ufs_lblkno(fs, uio->uio_offset);
572 		blkoffset = ufs_blkoff(fs, uio->uio_offset);
573 		xfersize = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
574 		if (fs->fs_bsize > xfersize)
575 			flags |= B_CLRBUF;
576 		else
577 			flags &= ~B_CLRBUF;
578 
579 #ifdef LFS_READWRITE
580 		error = lfs_reserve(fs, vp, NULL,
581 		    btofsb(fs, (UFS_NIADDR + 1) << fs->lfs_bshift));
582 		if (error)
583 			break;
584 		need_unreserve = true;
585 #endif
586 		error = UFS_BALLOC(vp, uio->uio_offset, xfersize, cred, flags,
587 		    &bp);
588 
589 		if (error)
590 			break;
591 		if (uio->uio_offset + xfersize > ip->i_size) {
592 			ip->i_size = uio->uio_offset + xfersize;
593 			DIP_ASSIGN(ip, size, ip->i_size);
594 			uvm_vnp_setsize(vp, ip->i_size);
595 			extended = 1;
596 		}
597 		size = ufs_blksize(fs, ip, lbn) - bp->b_resid;
598 		if (xfersize > size)
599 			xfersize = size;
600 
601 		error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
602 
603 		/*
604 		 * if we didn't clear the block and the uiomove failed,
605 		 * the buf will now contain part of some other file,
606 		 * so we need to invalidate it.
607 		 */
608 		if (error && (flags & B_CLRBUF) == 0) {
609 			brelse(bp, BC_INVAL);
610 			break;
611 		}
612 #ifdef LFS_READWRITE
613 		(void)VOP_BWRITE(bp->b_vp, bp);
614 		lfs_reserve(fs, vp, NULL,
615 		    -btofsb(fs, (UFS_NIADDR + 1) << fs->lfs_bshift));
616 		need_unreserve = false;
617 #else
618 		if (ioflag & IO_SYNC)
619 			(void)bwrite(bp);
620 		else if (xfersize + blkoffset == fs->fs_bsize)
621 			bawrite(bp);
622 		else
623 			bdwrite(bp);
624 #endif
625 		if (error || xfersize == 0)
626 			break;
627 	}
628 #ifdef LFS_READWRITE
629 	if (need_unreserve) {
630 		lfs_reserve(fs, vp, NULL,
631 		    -btofsb(fs, (UFS_NIADDR + 1) << fs->lfs_bshift));
632 	}
633 #endif
634 
635 	error = ufs_post_write_update(vp, uio, ioflag, cred, osize, resid,
636 	    extended, error);
637 	fstrans_done(vp->v_mount);
638 
639 	return (error);
640 }
641 
642 static int
ufs_post_write_update(struct vnode * vp,struct uio * uio,int ioflag,kauth_cred_t cred,off_t osize,int resid,int extended,int oerror)643 ufs_post_write_update(struct vnode *vp, struct uio *uio, int ioflag,
644     kauth_cred_t cred, off_t osize, int resid, int extended, int oerror)
645 {
646 	struct inode *ip = VTOI(vp);
647 	int error = oerror;
648 
649 	/* Trigger ctime and mtime updates, and atime if MNT_RELATIME.  */
650 	ip->i_flag |= IN_CHANGE | IN_UPDATE;
651 	if (vp->v_mount->mnt_flag & MNT_RELATIME)
652 		ip->i_flag |= IN_ACCESS;
653 
654 	/*
655 	 * If we successfully wrote any data and we are not the superuser,
656 	 * we clear the setuid and setgid bits as a precaution against
657 	 * tampering.
658 	 */
659 	if (resid > uio->uio_resid && cred) {
660 		if (ip->i_mode & ISUID) {
661 			if (kauth_authorize_vnode(cred,
662 			    KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) {
663 				ip->i_mode &= ~ISUID;
664 				DIP_ASSIGN(ip, mode, ip->i_mode);
665 			}
666 		}
667 
668 		if (ip->i_mode & ISGID) {
669 			if (kauth_authorize_vnode(cred,
670 			    KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) {
671 				ip->i_mode &= ~ISGID;
672 				DIP_ASSIGN(ip, mode, ip->i_mode);
673 			}
674 		}
675 	}
676 
677 	/* If we successfully wrote anything, notify kevent listeners.  */
678 	if (resid > uio->uio_resid)
679 		VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
680 
681 	/*
682 	 * Update the size on disk: truncate back to original size on
683 	 * error, or reflect the new size on success.
684 	 */
685 	if (error) {
686 		(void) UFS_TRUNCATE(vp, osize, ioflag & IO_SYNC, cred);
687 		uio->uio_offset -= resid - uio->uio_resid;
688 		uio->uio_resid = resid;
689 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC)
690 		error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
691 	else
692 		UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
693 
694 	/* Make sure the vnode uvm size matches the inode file size.  */
695 	KASSERT(vp->v_size == ip->i_size);
696 
697 	/* Write error overrides any inode update error.  */
698 	if (oerror)
699 		error = oerror;
700 	return error;
701 }
702