xref: /netbsd/sys/ufs/ufs/ufs_readwrite.c (revision bf9ec67e)
1 /*	$NetBSD: ufs_readwrite.c,v 1.42 2002/03/25 02:23:56 chs Exp $	*/
2 
3 /*-
4  * Copyright (c) 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)ufs_readwrite.c	8.11 (Berkeley) 5/8/95
36  */
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.42 2002/03/25 02:23:56 chs Exp $");
40 
41 #ifdef LFS_READWRITE
42 #define	BLKSIZE(a, b, c)	blksize(a, b, c)
43 #define	FS			struct lfs
44 #define	I_FS			i_lfs
45 #define	READ			lfs_read
46 #define	READ_S			"lfs_read"
47 #define	WRITE			lfs_write
48 #define	WRITE_S			"lfs_write"
49 #define	fs_bsize		lfs_bsize
50 #define	fs_maxfilesize		lfs_maxfilesize
51 #else
52 #define	BLKSIZE(a, b, c)	blksize(a, b, c)
53 #define	FS			struct fs
54 #define	I_FS			i_fs
55 #define	READ			ffs_read
56 #define	READ_S			"ffs_read"
57 #define	WRITE			ffs_write
58 #define	WRITE_S			"ffs_write"
59 #endif
60 
61 /*
62  * Vnode op for reading.
63  */
64 /* ARGSUSED */
65 int
66 READ(void *v)
67 {
68 	struct vop_read_args /* {
69 		struct vnode *a_vp;
70 		struct uio *a_uio;
71 		int a_ioflag;
72 		struct ucred *a_cred;
73 	} */ *ap = v;
74 	struct vnode *vp;
75 	struct inode *ip;
76 	struct uio *uio;
77 	FS *fs;
78 	void *win;
79 	vsize_t bytelen;
80 	struct buf *bp;
81 	ufs_daddr_t lbn, nextlbn;
82 	off_t bytesinfile;
83 	long size, xfersize, blkoffset;
84 	int error;
85 	boolean_t usepc = FALSE;
86 
87 	vp = ap->a_vp;
88 	ip = VTOI(vp);
89 	uio = ap->a_uio;
90 	error = 0;
91 
92 #ifdef DIAGNOSTIC
93 	if (uio->uio_rw != UIO_READ)
94 		panic("%s: mode", READ_S);
95 
96 	if (vp->v_type == VLNK) {
97 		if ((int)ip->i_ffs_size < vp->v_mount->mnt_maxsymlinklen ||
98 		    (vp->v_mount->mnt_maxsymlinklen == 0 &&
99 		     ip->i_ffs_blocks == 0))
100 			panic("%s: short symlink", READ_S);
101 	} else if (vp->v_type != VREG && vp->v_type != VDIR)
102 		panic("%s: type %d", READ_S, vp->v_type);
103 #endif
104 	fs = ip->I_FS;
105 	if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize)
106 		return (EFBIG);
107 	if (uio->uio_resid == 0)
108 		return (0);
109 	if (uio->uio_offset >= ip->i_ffs_size) {
110 		goto out;
111 	}
112 
113 #ifndef LFS_READWRITE
114 	usepc = vp->v_type == VREG;
115 #endif
116 	if (usepc) {
117 		while (uio->uio_resid > 0) {
118 			bytelen = MIN(ip->i_ffs_size - uio->uio_offset,
119 			    uio->uio_resid);
120 			if (bytelen == 0)
121 				break;
122 
123 			win = ubc_alloc(&vp->v_uobj, uio->uio_offset,
124 					&bytelen, UBC_READ);
125 			error = uiomove(win, bytelen, uio);
126 			ubc_release(win, 0);
127 			if (error)
128 				break;
129 		}
130 		goto out;
131 	}
132 
133 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
134 		bytesinfile = ip->i_ffs_size - uio->uio_offset;
135 		if (bytesinfile <= 0)
136 			break;
137 		lbn = lblkno(fs, uio->uio_offset);
138 		nextlbn = lbn + 1;
139 		size = BLKSIZE(fs, ip, lbn);
140 		blkoffset = blkoff(fs, uio->uio_offset);
141 		xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid),
142 		    bytesinfile);
143 
144 		if (lblktosize(fs, nextlbn) >= ip->i_ffs_size)
145 			error = bread(vp, lbn, size, NOCRED, &bp);
146 		else {
147 			int nextsize = BLKSIZE(fs, ip, nextlbn);
148 			error = breadn(vp, lbn,
149 			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
150 		}
151 		if (error)
152 			break;
153 
154 		/*
155 		 * We should only get non-zero b_resid when an I/O error
156 		 * has occurred, which should cause us to break above.
157 		 * However, if the short read did not cause an error,
158 		 * then we want to ensure that we do not uiomove bad
159 		 * or uninitialized data.
160 		 */
161 		size -= bp->b_resid;
162 		if (size < xfersize) {
163 			if (size == 0)
164 				break;
165 			xfersize = size;
166 		}
167 		error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
168 		if (error)
169 			break;
170 		brelse(bp);
171 	}
172 	if (bp != NULL)
173 		brelse(bp);
174 
175  out:
176 	if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
177 		ip->i_flag |= IN_ACCESS;
178 		if ((ap->a_ioflag & IO_SYNC) == IO_SYNC)
179 			error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
180 	}
181 	return (error);
182 }
183 
184 /*
185  * Vnode op for writing.
186  */
187 int
188 WRITE(void *v)
189 {
190 	struct vop_write_args /* {
191 		struct vnode *a_vp;
192 		struct uio *a_uio;
193 		int a_ioflag;
194 		struct ucred *a_cred;
195 	} */ *ap = v;
196 	struct vnode *vp;
197 	struct uio *uio;
198 	struct inode *ip;
199 	struct genfs_node *gp;
200 	FS *fs;
201 	struct buf *bp;
202 	struct proc *p;
203 	struct ucred *cred;
204 	ufs_daddr_t lbn;
205 	off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize;
206 	int blkoffset, error, flags, ioflag, resid, size, xfersize;
207 	int bsize, aflag;
208 	int ubc_alloc_flags;
209 	void *win;
210 	vsize_t bytelen;
211 	boolean_t async;
212 	boolean_t usepc = FALSE;
213 
214 	cred = ap->a_cred;
215 	ioflag = ap->a_ioflag;
216 	uio = ap->a_uio;
217 	vp = ap->a_vp;
218 	ip = VTOI(vp);
219 	gp = VTOG(vp);
220 
221 	KASSERT(vp->v_size == ip->i_ffs_size);
222 #ifdef DIAGNOSTIC
223 	if (uio->uio_rw != UIO_WRITE)
224 		panic("%s: mode", WRITE_S);
225 #endif
226 
227 	switch (vp->v_type) {
228 	case VREG:
229 		if (ioflag & IO_APPEND)
230 			uio->uio_offset = ip->i_ffs_size;
231 		if ((ip->i_ffs_flags & APPEND) && uio->uio_offset != ip->i_ffs_size)
232 			return (EPERM);
233 		/* FALLTHROUGH */
234 	case VLNK:
235 		break;
236 	case VDIR:
237 		if ((ioflag & IO_SYNC) == 0)
238 			panic("%s: nonsync dir write", WRITE_S);
239 		break;
240 	default:
241 		panic("%s: type", WRITE_S);
242 	}
243 
244 	fs = ip->I_FS;
245 	if (uio->uio_offset < 0 ||
246 	    (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
247 		return (EFBIG);
248 #ifdef LFS_READWRITE
249 	/* Disallow writes to the Ifile, even if noschg flag is removed */
250 	/* XXX can this go away when the Ifile is no longer in the namespace? */
251 	if (vp == fs->lfs_ivnode)
252 		return (EPERM);
253 #endif
254 
255 	/*
256 	 * Maybe this should be above the vnode op call, but so long as
257 	 * file servers have no limits, I don't think it matters.
258 	 */
259 	p = uio->uio_procp;
260 	if (vp->v_type == VREG && p &&
261 	    uio->uio_offset + uio->uio_resid >
262 	    p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
263 		psignal(p, SIGXFSZ);
264 		return (EFBIG);
265 	}
266 	if (uio->uio_resid == 0)
267 		return (0);
268 
269 	flags = ioflag & IO_SYNC ? B_SYNC : 0;
270 	async = vp->v_mount->mnt_flag & MNT_ASYNC;
271 	origoff = uio->uio_offset;
272 	resid = uio->uio_resid;
273 	osize = ip->i_ffs_size;
274 	bsize = fs->fs_bsize;
275 	error = 0;
276 
277 #ifndef LFS_READWRITE
278 	usepc = vp->v_type == VREG;
279 #endif
280 	if (!usepc) {
281 		goto bcache;
282 	}
283 
284 	preallocoff = round_page(blkroundup(fs, MAX(osize, uio->uio_offset)));
285 	aflag = ioflag & IO_SYNC ? B_SYNC : 0;
286 	nsize = MAX(osize, uio->uio_offset + uio->uio_resid);
287 	endallocoff = nsize - blkoff(fs, nsize);
288 
289 	/*
290 	 * if we're increasing the file size, deal with expanding
291 	 * the fragment if there is one.
292 	 */
293 
294 	if (nsize > osize && lblkno(fs, osize) < NDADDR &&
295 	    lblkno(fs, osize) != lblkno(fs, nsize) &&
296 	    blkroundup(fs, osize) != osize) {
297 		error = ufs_balloc_range(vp, osize, blkroundup(fs, osize) -
298 		    osize, cred, aflag);
299 		if (error) {
300 			goto out;
301 		}
302 		if (flags & B_SYNC) {
303 			vp->v_size = blkroundup(fs, osize);
304 			simple_lock(&vp->v_interlock);
305 			VOP_PUTPAGES(vp, trunc_page(osize & ~(bsize - 1)),
306 			    round_page(vp->v_size), PGO_CLEANIT | PGO_SYNCIO);
307 		}
308 	}
309 
310 	ubc_alloc_flags = UBC_WRITE;
311 	while (uio->uio_resid > 0) {
312 		oldoff = uio->uio_offset;
313 		blkoffset = blkoff(fs, uio->uio_offset);
314 		bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
315 
316 		/*
317 		 * if we're filling in a hole, allocate the blocks now and
318 		 * initialize the pages first.  if we're extending the file,
319 		 * we can safely allocate blocks without initializing pages
320 		 * since the new blocks will be inaccessible until the write
321 		 * is complete.
322 		 */
323 
324 		if (uio->uio_offset < preallocoff ||
325 		    uio->uio_offset >= endallocoff) {
326 			error = ufs_balloc_range(vp, uio->uio_offset, bytelen,
327 			    cred, aflag);
328 			if (error) {
329 				break;
330 			}
331 			ubc_alloc_flags &= ~UBC_FAULTBUSY;
332 		} else {
333 			lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL);
334 			error = GOP_ALLOC(vp, uio->uio_offset, bytelen,
335 			    aflag, cred);
336 			lockmgr(&gp->g_glock, LK_RELEASE, NULL);
337 			if (error) {
338 				break;
339 			}
340 			ubc_alloc_flags |= UBC_FAULTBUSY;
341 		}
342 
343 		/*
344 		 * copy the data.
345 		 */
346 
347 		win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen,
348 		    ubc_alloc_flags);
349 		error = uiomove(win, bytelen, uio);
350 		ubc_release(win, 0);
351 		if (error) {
352 			break;
353 		}
354 
355 		/*
356 		 * update UVM's notion of the size now that we've
357 		 * copied the data into the vnode's pages.
358 		 */
359 
360 		if (vp->v_size < uio->uio_offset) {
361 			uvm_vnp_setsize(vp, uio->uio_offset);
362 		}
363 
364 		/*
365 		 * flush what we just wrote if necessary.
366 		 * XXXUBC simplistic async flushing.
367 		 */
368 
369 		if (!async && oldoff >> 16 != uio->uio_offset >> 16) {
370 			simple_lock(&vp->v_interlock);
371 			error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16,
372 			    (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
373 			if (error) {
374 				break;
375 			}
376 		}
377 	}
378 	if (error == 0 && ioflag & IO_SYNC) {
379 		simple_lock(&vp->v_interlock);
380 		error = VOP_PUTPAGES(vp, trunc_page(origoff & ~(bsize - 1)),
381 		    round_page(blkroundup(fs, uio->uio_offset)),
382 		    PGO_CLEANIT | PGO_SYNCIO);
383 	}
384 	goto out;
385 
386  bcache:
387 	simple_lock(&vp->v_interlock);
388 	VOP_PUTPAGES(vp, trunc_page(origoff), round_page(origoff + resid),
389 	    PGO_CLEANIT | PGO_FREE | PGO_SYNCIO);
390 	while (uio->uio_resid > 0) {
391 		lbn = lblkno(fs, uio->uio_offset);
392 		blkoffset = blkoff(fs, uio->uio_offset);
393 		xfersize = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
394 		if (fs->fs_bsize > xfersize)
395 			flags |= B_CLRBUF;
396 		else
397 			flags &= ~B_CLRBUF;
398 
399 		error = VOP_BALLOC(vp, uio->uio_offset, xfersize,
400 		    ap->a_cred, flags, &bp);
401 
402 		if (error)
403 			break;
404 		if (uio->uio_offset + xfersize > ip->i_ffs_size) {
405 			ip->i_ffs_size = uio->uio_offset + xfersize;
406 			uvm_vnp_setsize(vp, ip->i_ffs_size);
407 		}
408 		size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
409 		if (xfersize > size)
410 			xfersize = size;
411 
412 		error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
413 
414 		/*
415 		 * if we didn't clear the block and the uiomove failed,
416 		 * the buf will now contain part of some other file,
417 		 * so we need to invalidate it.
418 		 */
419 		if (error && (flags & B_CLRBUF) == 0) {
420 			bp->b_flags |= B_INVAL;
421 			brelse(bp);
422 			break;
423 		}
424 #ifdef LFS_READWRITE
425 		if (!error)
426 			error = lfs_reserve(fs, vp, btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
427 		(void)VOP_BWRITE(bp);
428 		if (!error)
429 			lfs_reserve(fs, vp, -btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
430 #else
431 		if (ioflag & IO_SYNC)
432 			(void)bwrite(bp);
433 		else if (xfersize + blkoffset == fs->fs_bsize)
434 			bawrite(bp);
435 		else
436 			bdwrite(bp);
437 #endif
438 		if (error || xfersize == 0)
439 			break;
440 	}
441 	/*
442 	 * If we successfully wrote any data, and we are not the superuser
443 	 * we clear the setuid and setgid bits as a precaution against
444 	 * tampering.
445 	 */
446 out:
447 	ip->i_flag |= IN_CHANGE | IN_UPDATE;
448 	if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
449 		ip->i_ffs_mode &= ~(ISUID | ISGID);
450 	if (error) {
451 		(void) VOP_TRUNCATE(vp, osize, ioflag & IO_SYNC, ap->a_cred,
452 		    uio->uio_procp);
453 		uio->uio_offset -= resid - uio->uio_resid;
454 		uio->uio_resid = resid;
455 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC)
456 		error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
457 	KASSERT(vp->v_size == ip->i_ffs_size);
458 	return (error);
459 }
460