xref: /openbsd/sys/kern/vfs_vnops.c (revision 133306f0)
1 /*	$OpenBSD: vfs_vnops.c,v 1.27 2000/11/21 21:49:57 provos Exp $	*/
2 /*	$NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)vfs_vnops.c	8.5 (Berkeley) 12/8/94
42  */
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/file.h>
48 #include <sys/stat.h>
49 #include <sys/buf.h>
50 #include <sys/proc.h>
51 #include <sys/mount.h>
52 #include <sys/namei.h>
53 #include <sys/vnode.h>
54 #include <sys/ioctl.h>
55 #include <sys/tty.h>
56 #include <sys/cdio.h>
57 
58 #include <vm/vm.h>
59 
60 #if defined(UVM)
61 #include <uvm/uvm_extern.h>
62 #endif
63 
64 #include <ufs/ufs/quota.h>
65 #include <ufs/ufs/inode.h>
66 
67 int	vn_read __P((struct file *fp, off_t *off, struct uio *uio,
68 	    struct ucred *cred));
69 int	vn_write __P((struct file *fp, off_t *off, struct uio *uio,
70             struct ucred *cred));
71 int	vn_select __P((struct file *fp, int which, struct proc *p));
72 int 	vn_closefile __P((struct file *fp, struct proc *p));
73 int	vn_ioctl __P((struct file *fp, u_long com, caddr_t data,
74 	    struct proc *p));
75 
76 struct 	fileops vnops =
77 	{ vn_read, vn_write, vn_ioctl, vn_select, vn_closefile };
78 
79 static int	filt_nullattach(struct knote *kn);
80 static int	filt_vnattach(struct knote *kn);
81 static void	filt_vndetach(struct knote *kn);
82 static int	filt_vnode(struct knote *kn, long hint);
83 static int	filt_vnread(struct knote *kn, long hint);
84 
85 struct filterops vn_filtops =
86 	{ 1, filt_vnattach, filt_vndetach, filt_vnode };
87 
88 /*
89  * XXX
90  * filt_vnread is ufs-specific, so the attach routine should really
91  * switch out to different filterops based on the vn filetype
92  */
93 struct filterops vn_rwfiltops[] = {
94 	{ 1, filt_vnattach, filt_vndetach, filt_vnread },
95 	{ 1, filt_nullattach, NULL, NULL },
96 };
97 
98 /*
99  * Common code for vnode open operations.
100  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
101  */
102 int
103 vn_open(ndp, fmode, cmode)
104 	register struct nameidata *ndp;
105 	int fmode, cmode;
106 {
107 	register struct vnode *vp;
108 	register struct proc *p = ndp->ni_cnd.cn_proc;
109 	register struct ucred *cred = p->p_ucred;
110 	struct vattr va;
111 	int error;
112 
113 	if ((fmode & (FREAD|FWRITE)) == 0)
114 		return (EINVAL);
115 	if ((fmode & (O_TRUNC | FWRITE)) == O_TRUNC)
116 		return (EINVAL);
117 	if (fmode & O_CREAT) {
118 		ndp->ni_cnd.cn_nameiop = CREATE;
119 		ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
120 		if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
121 			ndp->ni_cnd.cn_flags |= FOLLOW;
122 		if ((error = namei(ndp)) != 0)
123 			return (error);
124 
125 		if (ndp->ni_vp == NULL) {
126 			VATTR_NULL(&va);
127 			va.va_type = VREG;
128 			va.va_mode = cmode;
129 			VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE);
130 			error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
131 					   &ndp->ni_cnd, &va);
132 			if (error)
133 				return (error);
134 			fmode &= ~O_TRUNC;
135 			vp = ndp->ni_vp;
136 		} else {
137 			VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
138 			if (ndp->ni_dvp == ndp->ni_vp)
139 				vrele(ndp->ni_dvp);
140 			else
141 				vput(ndp->ni_dvp);
142 			ndp->ni_dvp = NULL;
143 			vp = ndp->ni_vp;
144 			if (fmode & O_EXCL) {
145 				error = EEXIST;
146 				goto bad;
147 			}
148 			fmode &= ~O_CREAT;
149 		}
150 	} else {
151 		ndp->ni_cnd.cn_nameiop = LOOKUP;
152 		ndp->ni_cnd.cn_flags =
153 		    ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF;
154 		if ((error = namei(ndp)) != 0)
155 			return (error);
156 		vp = ndp->ni_vp;
157 	}
158 	if (vp->v_type == VSOCK) {
159 		error = EOPNOTSUPP;
160 		goto bad;
161 	}
162 	if (vp->v_type == VLNK) {
163 		error = EMLINK;
164 		goto bad;
165 	}
166 	if ((fmode & O_CREAT) == 0) {
167 		if (fmode & FREAD) {
168 			if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
169 				goto bad;
170 		}
171 		if (fmode & FWRITE) {
172 			if (vp->v_type == VDIR) {
173 				error = EISDIR;
174 				goto bad;
175 			}
176 			if ((error = vn_writechk(vp)) != 0 ||
177 			    (error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0)
178 				goto bad;
179 		}
180 	}
181 	if (fmode & O_TRUNC) {
182 		VOP_UNLOCK(vp, 0, p);				/* XXX */
183 		VOP_LEASE(vp, p, cred, LEASE_WRITE);
184 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
185 		VATTR_NULL(&va);
186 		va.va_size = 0;
187 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
188 			goto bad;
189 	}
190 	if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0)
191 		goto bad;
192 	if (fmode & FWRITE)
193 		vp->v_writecount++;
194 	return (0);
195 bad:
196 	vput(vp);
197 	return (error);
198 }
199 
200 /*
201  * Check for write permissions on the specified vnode.
202  * Prototype text segments cannot be written.
203  */
204 int
205 vn_writechk(vp)
206 	register struct vnode *vp;
207 {
208 
209 	/*
210 	 * Disallow write attempts on read-only file systems;
211 	 * unless the file is a socket or a block or character
212 	 * device resident on the file system.
213 	 */
214 	if (vp->v_mount->mnt_flag & MNT_RDONLY) {
215 		switch (vp->v_type) {
216 		case VREG: case VDIR: case VLNK:
217 			return (EROFS);
218 		case VNON: case VCHR: case VSOCK:
219 		case VFIFO: case VBAD: case VBLK:
220 			break;
221 		}
222 	}
223 	/*
224 	 * If there's shared text associated with
225 	 * the vnode, try to free it up once.  If
226 	 * we fail, we can't allow writing.
227 	 */
228 #if defined(UVM)
229 	if ((vp->v_flag & VTEXT) && !uvm_vnp_uncache(vp))
230 		return (ETXTBSY);
231 #else
232 	if ((vp->v_flag & VTEXT) && !vnode_pager_uncache(vp))
233 		return (ETXTBSY);
234 #endif
235 	return (0);
236 }
237 
238 /*
239  * Vnode close call
240  */
241 int
242 vn_close(vp, flags, cred, p)
243 	register struct vnode *vp;
244 	int flags;
245 	struct ucred *cred;
246 	struct proc *p;
247 {
248 	int error;
249 
250 	if (flags & FWRITE)
251 		vp->v_writecount--;
252 	error = VOP_CLOSE(vp, flags, cred, p);
253 	vrele(vp);
254 	return (error);
255 }
256 
257 /*
258  * Package up an I/O request on a vnode into a uio and do it.
259  */
260 int
261 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
262 	enum uio_rw rw;
263 	struct vnode *vp;
264 	caddr_t base;
265 	int len;
266 	off_t offset;
267 	enum uio_seg segflg;
268 	int ioflg;
269 	struct ucred *cred;
270 	size_t *aresid;
271 	struct proc *p;
272 {
273 	struct uio auio;
274 	struct iovec aiov;
275 	int error;
276 
277 	if ((ioflg & IO_NODELOCKED) == 0)
278 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
279 	auio.uio_iov = &aiov;
280 	auio.uio_iovcnt = 1;
281 	aiov.iov_base = base;
282 	aiov.iov_len = len;
283 	auio.uio_resid = len;
284 	auio.uio_offset = offset;
285 	auio.uio_segflg = segflg;
286 	auio.uio_rw = rw;
287 	auio.uio_procp = p;
288 	if (rw == UIO_READ) {
289 		error = VOP_READ(vp, &auio, ioflg, cred);
290 	} else {
291 		error = VOP_WRITE(vp, &auio, ioflg, cred);
292 	}
293 	if (aresid)
294 		*aresid = auio.uio_resid;
295 	else
296 		if (auio.uio_resid && error == 0)
297 			error = EIO;
298 	if ((ioflg & IO_NODELOCKED) == 0)
299 		VOP_UNLOCK(vp, 0, p);
300 	return (error);
301 }
302 
303 /*
304  * File table vnode read routine.
305  */
306 int
307 vn_read(fp, poff, uio, cred)
308 	struct file *fp;
309 	off_t *poff;
310 	struct uio *uio;
311 	struct ucred *cred;
312 {
313 	register struct vnode *vp = (struct vnode *)fp->f_data;
314 	int error = 0;
315 	size_t count;
316 	struct proc *p = uio->uio_procp;
317 
318 	VOP_LEASE(vp, uio->uio_procp, cred, LEASE_READ);
319 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
320 	uio->uio_offset = *poff;
321 	count = uio->uio_resid;
322 	if (vp->v_type != VDIR)
323 		error = VOP_READ(vp, uio,
324 		    (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, cred);
325 	*poff += count - uio->uio_resid;
326 	VOP_UNLOCK(vp, 0, p);
327 	return (error);
328 }
329 
330 /*
331  * File table vnode write routine.
332  */
333 int
334 vn_write(fp, poff, uio, cred)
335 	struct file *fp;
336 	off_t *poff;
337 	struct uio *uio;
338 	struct ucred *cred;
339 {
340 	register struct vnode *vp = (struct vnode *)fp->f_data;
341 	struct proc *p = uio->uio_procp;
342 	int error, ioflag = IO_UNIT;
343 	size_t count;
344 
345 	if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
346 		ioflag |= IO_APPEND;
347 	if (fp->f_flag & FNONBLOCK)
348 		ioflag |= IO_NDELAY;
349 	if ((fp->f_flag & FFSYNC) ||
350 	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
351 		ioflag |= IO_SYNC;
352 	VOP_LEASE(vp, uio->uio_procp, cred, LEASE_WRITE);
353 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
354 	uio->uio_offset = *poff;
355 	count = uio->uio_resid;
356 	error = VOP_WRITE(vp, uio, ioflag, cred);
357 	if (ioflag & IO_APPEND)
358 		*poff = uio->uio_offset;
359 	else
360 		*poff += count - uio->uio_resid;
361 	VOP_UNLOCK(vp, 0, p);
362 	return (error);
363 }
364 
365 /*
366  * File table vnode stat routine.
367  */
368 int
369 vn_stat(vp, sb, p)
370 	struct vnode *vp;
371 	register struct stat *sb;
372 	struct proc *p;
373 {
374 	struct vattr va;
375 	int error;
376 	u_short mode;
377 
378 	error = VOP_GETATTR(vp, &va, p->p_ucred, p);
379 	if (error)
380 		return (error);
381 	/*
382 	 * Copy from vattr table
383 	 */
384 	sb->st_dev = va.va_fsid;
385 	sb->st_ino = va.va_fileid;
386 	mode = va.va_mode;
387 	switch (vp->v_type) {
388 	case VREG:
389 		mode |= S_IFREG;
390 		break;
391 	case VDIR:
392 		mode |= S_IFDIR;
393 		break;
394 	case VBLK:
395 		mode |= S_IFBLK;
396 		break;
397 	case VCHR:
398 		mode |= S_IFCHR;
399 		break;
400 	case VLNK:
401 		mode |= S_IFLNK;
402 		break;
403 	case VSOCK:
404 		mode |= S_IFSOCK;
405 		break;
406 	case VFIFO:
407 		mode |= S_IFIFO;
408 		break;
409 	default:
410 		return (EBADF);
411 	}
412 	sb->st_mode = mode;
413 	sb->st_nlink = va.va_nlink;
414 	sb->st_uid = va.va_uid;
415 	sb->st_gid = va.va_gid;
416 	sb->st_rdev = va.va_rdev;
417 	sb->st_size = va.va_size;
418 	sb->st_atimespec = va.va_atime;
419 	sb->st_mtimespec = va.va_mtime;
420 	sb->st_ctimespec = va.va_ctime;
421 	sb->st_blksize = va.va_blocksize;
422 	sb->st_flags = va.va_flags;
423 	sb->st_gen = va.va_gen;
424 	sb->st_blocks = va.va_bytes / S_BLKSIZE;
425 	return (0);
426 }
427 
428 /*
429  * File table vnode ioctl routine.
430  */
431 int
432 vn_ioctl(fp, com, data, p)
433 	struct file *fp;
434 	u_long com;
435 	caddr_t data;
436 	struct proc *p;
437 {
438 	register struct vnode *vp = ((struct vnode *)fp->f_data);
439 	struct vattr vattr;
440 	int error;
441 
442 	switch (vp->v_type) {
443 
444 	case VREG:
445 	case VDIR:
446 		if (com == FIONREAD) {
447 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
448 			if (error)
449 				return (error);
450 			*(int *)data = vattr.va_size - fp->f_offset;
451 			return (0);
452 		}
453 		if (com == FIBMAP)
454 			return VOP_IOCTL(vp, com, data, fp->f_flag,
455 					 p->p_ucred, p);
456 		if (com == FIONBIO || com == FIOASYNC)  /* XXX */
457 			return (0);                     /* XXX */
458 		/* fall into... */
459 
460 	default:
461 		return (ENOTTY);
462 
463 	case VFIFO:
464 	case VCHR:
465 	case VBLK:
466 		error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
467 		if (error == 0 && com == TIOCSCTTY) {
468 			if (p->p_session->s_ttyvp)
469 				vrele(p->p_session->s_ttyvp);
470 			p->p_session->s_ttyvp = vp;
471 			VREF(vp);
472 		}
473 		return (error);
474 	}
475 }
476 
477 /*
478  * File table vnode select routine.
479  */
480 int
481 vn_select(fp, which, p)
482 	struct file *fp;
483 	int which;
484 	struct proc *p;
485 {
486 
487 	return (VOP_SELECT(((struct vnode *)fp->f_data), which, fp->f_flag,
488 			   fp->f_cred, p));
489 }
490 
491 /*
492  * Check that the vnode is still valid, and if so
493  * acquire requested lock.
494  */
495 int
496 vn_lock(vp, flags, p)
497 	struct vnode *vp;
498 	int flags;
499 	struct proc *p;
500 {
501 	int error;
502 
503 	do {
504 		if ((flags & LK_INTERLOCK) == 0)
505 			simple_lock(&vp->v_interlock);
506 		if (vp->v_flag & VXLOCK) {
507 			vp->v_flag |= VXWANT;
508 			simple_unlock(&vp->v_interlock);
509 			tsleep((caddr_t)vp, PINOD, "vn_lock", 0);
510 			error = ENOENT;
511 		} else {
512 			error = VOP_LOCK(vp, flags | LK_INTERLOCK | LK_CANRECURSE, p);
513 			if (error == 0)
514 				return (error);
515 		}
516 		flags &= ~LK_INTERLOCK;
517 	} while (flags & LK_RETRY);
518 	return (error);
519 }
520 
521 /*
522  * File table vnode close routine.
523  */
524 int
525 vn_closefile(fp, p)
526 	struct file *fp;
527 	struct proc *p;
528 {
529 
530 	return (vn_close(((struct vnode *)fp->f_data), fp->f_flag,
531 		fp->f_cred, p));
532 }
533 
534 static int
535 filt_vnattach(struct knote *kn)
536 {
537 	struct vnode *vp;
538 
539 	if (kn->kn_fp->f_type != DTYPE_VNODE)
540 		return (EBADF);
541 
542 	vp = (struct vnode *)kn->kn_fp->f_data;
543 
544 	/*
545 	 * XXX
546 	 * this is a hack simply to cause the filter attach to fail
547 	 * for non-ufs filesystems, until the support for them is done.
548 	 */
549 	if ((vp)->v_tag != VT_UFS || (vp)->v_type == VFIFO)
550 		return (EOPNOTSUPP);
551 
552 	simple_lock(&vp->v_selectinfo.vsi_lock);
553 	SLIST_INSERT_HEAD(&vp->v_selectinfo.vsi_selinfo.si_note, kn, kn_selnext);
554 	simple_unlock(&vp->v_selectinfo.vsi_lock);
555 
556 	return (0);
557 }
558 
559 static void
560 filt_vndetach(struct knote *kn)
561 {
562 	struct vnode *vp = (struct vnode *)kn->kn_fp->f_data;
563 
564 	simple_lock(&vp->v_selectinfo.vsi_lock);
565 	SLIST_REMOVE(&vp->v_selectinfo.vsi_selinfo.si_note,
566 	    kn, knote, kn_selnext);
567 	simple_unlock(&vp->v_selectinfo.vsi_lock);
568 }
569 
570 static int
571 filt_vnode(struct knote *kn, long hint)
572 {
573 	if (kn->kn_sfflags & hint)
574 		kn->kn_fflags |= hint;
575 	return (kn->kn_fflags != 0);
576 }
577 
578 static int
579 filt_nullattach(struct knote *kn)
580 {
581 	return (ENXIO);
582 }
583 
584 /*ARGSUSED*/
585 static int
586 filt_vnread(struct knote *kn, long hint)
587 {
588 	struct vnode *vp = (struct vnode *)kn->kn_fp->f_data;
589 	struct inode *ip = VTOI(vp);
590 
591 	kn->kn_data = ip->i_ffs_size - kn->kn_fp->f_offset;
592 	return (kn->kn_data != 0);
593 }
594