xref: /dragonfly/sys/kern/vfs_syscalls.c (revision 1de703da)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $
40  * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.2 2003/06/17 04:28:42 dillon Exp $
41  */
42 
43 /* For 4.3 integer FS ID compatibility */
44 #include "opt_compat.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/buf.h>
49 #include <sys/sysent.h>
50 #include <sys/malloc.h>
51 #include <sys/mount.h>
52 #include <sys/sysproto.h>
53 #include <sys/namei.h>
54 #include <sys/filedesc.h>
55 #include <sys/kernel.h>
56 #include <sys/fcntl.h>
57 #include <sys/file.h>
58 #include <sys/linker.h>
59 #include <sys/stat.h>
60 #include <sys/unistd.h>
61 #include <sys/vnode.h>
62 #include <sys/proc.h>
63 #include <sys/dirent.h>
64 #include <sys/extattr.h>
65 
66 #include <machine/limits.h>
67 #include <miscfs/union/union.h>
68 #include <sys/sysctl.h>
69 #include <vm/vm.h>
70 #include <vm/vm_object.h>
71 #include <vm/vm_zone.h>
72 #include <vm/vm_page.h>
73 
74 static int change_dir __P((struct nameidata *ndp, struct proc *p));
75 static void checkdirs __P((struct vnode *olddp));
76 static int chroot_refuse_vdir_fds __P((struct filedesc *fdp));
77 static int getutimes __P((const struct timeval *, struct timespec *));
78 static int setfown __P((struct proc *, struct vnode *, uid_t, gid_t));
79 static int setfmode __P((struct proc *, struct vnode *, int));
80 static int setfflags __P((struct proc *, struct vnode *, int));
81 static int setutimes __P((struct proc *, struct vnode *,
82     const struct timespec *, int));
83 static int	usermount = 0;	/* if 1, non-root can mount fs. */
84 
85 int (*union_dircheckp) __P((struct proc *, struct vnode **, struct file *));
86 
87 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
88 
89 /*
90  * Virtual File System System Calls
91  */
92 
93 /*
94  * Mount a file system.
95  */
96 #ifndef _SYS_SYSPROTO_H_
97 struct mount_args {
98 	char	*type;
99 	char	*path;
100 	int	flags;
101 	caddr_t	data;
102 };
103 #endif
104 /* ARGSUSED */
105 int
106 mount(p, uap)
107 	struct proc *p;
108 	register struct mount_args /* {
109 		syscallarg(char *) type;
110 		syscallarg(char *) path;
111 		syscallarg(int) flags;
112 		syscallarg(caddr_t) data;
113 	} */ *uap;
114 {
115 	struct vnode *vp;
116 	struct mount *mp;
117 	struct vfsconf *vfsp;
118 	int error, flag = 0, flag2 = 0;
119 	struct vattr va;
120 #ifdef COMPAT_43
121 	u_long fstypenum;
122 #endif
123 	struct nameidata nd;
124 	char fstypename[MFSNAMELEN];
125 
126 	if (usermount == 0 && (error = suser(p)))
127 		return (error);
128 	/*
129 	 * Do not allow NFS export by non-root users.
130 	 */
131 	if (SCARG(uap, flags) & MNT_EXPORTED) {
132 		error = suser(p);
133 		if (error)
134 			return (error);
135 	}
136 	/*
137 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
138 	 */
139 	if (suser_xxx(p->p_ucred, 0, 0))
140 		SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
141 	/*
142 	 * Get vnode to be covered
143 	 */
144 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
145 	    SCARG(uap, path), p);
146 	if ((error = namei(&nd)) != 0)
147 		return (error);
148 	NDFREE(&nd, NDF_ONLY_PNBUF);
149 	vp = nd.ni_vp;
150 	if (SCARG(uap, flags) & MNT_UPDATE) {
151 		if ((vp->v_flag & VROOT) == 0) {
152 			vput(vp);
153 			return (EINVAL);
154 		}
155 		mp = vp->v_mount;
156 		flag = mp->mnt_flag;
157 		flag2 = mp->mnt_kern_flag;
158 		/*
159 		 * We only allow the filesystem to be reloaded if it
160 		 * is currently mounted read-only.
161 		 */
162 		if ((SCARG(uap, flags) & MNT_RELOAD) &&
163 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
164 			vput(vp);
165 			return (EOPNOTSUPP);	/* Needs translation */
166 		}
167 		/*
168 		 * Only root, or the user that did the original mount is
169 		 * permitted to update it.
170 		 */
171 		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
172 		    (error = suser(p))) {
173 			vput(vp);
174 			return (error);
175 		}
176 		if (vfs_busy(mp, LK_NOWAIT, 0, p)) {
177 			vput(vp);
178 			return (EBUSY);
179 		}
180 		simple_lock(&vp->v_interlock);
181 		if ((vp->v_flag & VMOUNT) != 0 ||
182 		    vp->v_mountedhere != NULL) {
183 			simple_unlock(&vp->v_interlock);
184 			vfs_unbusy(mp, p);
185 			vput(vp);
186 			return (EBUSY);
187 		}
188 		vp->v_flag |= VMOUNT;
189 		simple_unlock(&vp->v_interlock);
190 		mp->mnt_flag |=
191 		    SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
192 		VOP_UNLOCK(vp, 0, p);
193 		goto update;
194 	}
195 	/*
196 	 * If the user is not root, ensure that they own the directory
197 	 * onto which we are attempting to mount.
198 	 */
199 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) ||
200 	    (va.va_uid != p->p_ucred->cr_uid &&
201 	     (error = suser(p)))) {
202 		vput(vp);
203 		return (error);
204 	}
205 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) {
206 		vput(vp);
207 		return (error);
208 	}
209 	if (vp->v_type != VDIR) {
210 		vput(vp);
211 		return (ENOTDIR);
212 	}
213 #ifdef COMPAT_43
214 	/*
215 	 * Historically filesystem types were identified by number. If we
216 	 * get an integer for the filesystem type instead of a string, we
217 	 * check to see if it matches one of the historic filesystem types.
218 	 */
219 	fstypenum = (uintptr_t)SCARG(uap, type);
220 	if (fstypenum < maxvfsconf) {
221 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
222 			if (vfsp->vfc_typenum == fstypenum)
223 				break;
224 		if (vfsp == NULL) {
225 			vput(vp);
226 			return (ENODEV);
227 		}
228 		strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN);
229 	} else
230 #endif /* COMPAT_43 */
231 	if ((error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) != 0) {
232 		vput(vp);
233 		return (error);
234 	}
235 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
236 		if (!strcmp(vfsp->vfc_name, fstypename))
237 			break;
238 	if (vfsp == NULL) {
239 		linker_file_t lf;
240 
241 		/* Only load modules for root (very important!) */
242 		if ((error = suser(p)) != 0) {
243 			vput(vp);
244 			return error;
245 		}
246 		error = linker_load_file(fstypename, &lf);
247 		if (error || lf == NULL) {
248 			vput(vp);
249 			if (lf == NULL)
250 				error = ENODEV;
251 			return error;
252 		}
253 		lf->userrefs++;
254 		/* lookup again, see if the VFS was loaded */
255 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
256 			if (!strcmp(vfsp->vfc_name, fstypename))
257 				break;
258 		if (vfsp == NULL) {
259 			lf->userrefs--;
260 			linker_file_unload(lf);
261 			vput(vp);
262 			return (ENODEV);
263 		}
264 	}
265 	simple_lock(&vp->v_interlock);
266 	if ((vp->v_flag & VMOUNT) != 0 ||
267 	    vp->v_mountedhere != NULL) {
268 		simple_unlock(&vp->v_interlock);
269 		vput(vp);
270 		return (EBUSY);
271 	}
272 	vp->v_flag |= VMOUNT;
273 	simple_unlock(&vp->v_interlock);
274 
275 	/*
276 	 * Allocate and initialize the filesystem.
277 	 */
278 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK);
279 	bzero((char *)mp, (u_long)sizeof(struct mount));
280 	TAILQ_INIT(&mp->mnt_nvnodelist);
281 	TAILQ_INIT(&mp->mnt_reservedvnlist);
282 	mp->mnt_nvnodelistsize = 0;
283 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
284 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
285 	mp->mnt_op = vfsp->vfc_vfsops;
286 	mp->mnt_vfc = vfsp;
287 	vfsp->vfc_refcount++;
288 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
289 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
290 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
291 	mp->mnt_vnodecovered = vp;
292 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
293 	mp->mnt_iosize_max = DFLTPHYS;
294 	VOP_UNLOCK(vp, 0, p);
295 update:
296 	/*
297 	 * Set the mount level flags.
298 	 */
299 	if (SCARG(uap, flags) & MNT_RDONLY)
300 		mp->mnt_flag |= MNT_RDONLY;
301 	else if (mp->mnt_flag & MNT_RDONLY)
302 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
303 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
304 	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
305 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
306 	    MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
307 	mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC |
308 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
309 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
310 	    MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
311 	/*
312 	 * Mount the filesystem.
313 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
314 	 * get.  No freeing of cn_pnbuf.
315 	 */
316 	error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p);
317 	if (mp->mnt_flag & MNT_UPDATE) {
318 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
319 			mp->mnt_flag &= ~MNT_RDONLY;
320 		mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
321 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
322 		if (error) {
323 			mp->mnt_flag = flag;
324 			mp->mnt_kern_flag = flag2;
325 		}
326 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
327 			if (mp->mnt_syncer == NULL)
328 				error = vfs_allocate_syncvnode(mp);
329 		} else {
330 			if (mp->mnt_syncer != NULL)
331 				vrele(mp->mnt_syncer);
332 			mp->mnt_syncer = NULL;
333 		}
334 		vfs_unbusy(mp, p);
335 		simple_lock(&vp->v_interlock);
336 		vp->v_flag &= ~VMOUNT;
337 		simple_unlock(&vp->v_interlock);
338 		vrele(vp);
339 		return (error);
340 	}
341 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
342 	/*
343 	 * Put the new filesystem on the mount list after root.
344 	 */
345 	cache_purge(vp);
346 	if (!error) {
347 		simple_lock(&vp->v_interlock);
348 		vp->v_flag &= ~VMOUNT;
349 		vp->v_mountedhere = mp;
350 		simple_unlock(&vp->v_interlock);
351 		simple_lock(&mountlist_slock);
352 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
353 		simple_unlock(&mountlist_slock);
354 		checkdirs(vp);
355 		VOP_UNLOCK(vp, 0, p);
356 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
357 			error = vfs_allocate_syncvnode(mp);
358 		vfs_unbusy(mp, p);
359 		if ((error = VFS_START(mp, 0, p)) != 0)
360 			vrele(vp);
361 	} else {
362 		simple_lock(&vp->v_interlock);
363 		vp->v_flag &= ~VMOUNT;
364 		simple_unlock(&vp->v_interlock);
365 		mp->mnt_vfc->vfc_refcount--;
366 		vfs_unbusy(mp, p);
367 		free((caddr_t)mp, M_MOUNT);
368 		vput(vp);
369 	}
370 	return (error);
371 }
372 
373 /*
374  * Scan all active processes to see if any of them have a current
375  * or root directory onto which the new filesystem has just been
376  * mounted. If so, replace them with the new mount point.
377  */
378 static void
379 checkdirs(olddp)
380 	struct vnode *olddp;
381 {
382 	struct filedesc *fdp;
383 	struct vnode *newdp;
384 	struct proc *p;
385 
386 	if (olddp->v_usecount == 1)
387 		return;
388 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
389 		panic("mount: lost mount");
390 	LIST_FOREACH(p, &allproc, p_list) {
391 		fdp = p->p_fd;
392 		if (fdp->fd_cdir == olddp) {
393 			vrele(fdp->fd_cdir);
394 			VREF(newdp);
395 			fdp->fd_cdir = newdp;
396 		}
397 		if (fdp->fd_rdir == olddp) {
398 			vrele(fdp->fd_rdir);
399 			VREF(newdp);
400 			fdp->fd_rdir = newdp;
401 		}
402 	}
403 	if (rootvnode == olddp) {
404 		vrele(rootvnode);
405 		VREF(newdp);
406 		rootvnode = newdp;
407 	}
408 	vput(newdp);
409 }
410 
411 /*
412  * Unmount a file system.
413  *
414  * Note: unmount takes a path to the vnode mounted on as argument,
415  * not special file (as before).
416  */
417 #ifndef _SYS_SYSPROTO_H_
418 struct unmount_args {
419 	char	*path;
420 	int	flags;
421 };
422 #endif
423 /* ARGSUSED */
424 int
425 unmount(p, uap)
426 	struct proc *p;
427 	register struct unmount_args /* {
428 		syscallarg(char *) path;
429 		syscallarg(int) flags;
430 	} */ *uap;
431 {
432 	register struct vnode *vp;
433 	struct mount *mp;
434 	int error;
435 	struct nameidata nd;
436 
437 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
438 	    SCARG(uap, path), p);
439 	if ((error = namei(&nd)) != 0)
440 		return (error);
441 	vp = nd.ni_vp;
442 	NDFREE(&nd, NDF_ONLY_PNBUF);
443 	mp = vp->v_mount;
444 
445 	/*
446 	 * Only root, or the user that did the original mount is
447 	 * permitted to unmount this filesystem.
448 	 */
449 	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
450 	    (error = suser(p))) {
451 		vput(vp);
452 		return (error);
453 	}
454 
455 	/*
456 	 * Don't allow unmounting the root file system.
457 	 */
458 	if (mp->mnt_flag & MNT_ROOTFS) {
459 		vput(vp);
460 		return (EINVAL);
461 	}
462 
463 	/*
464 	 * Must be the root of the filesystem
465 	 */
466 	if ((vp->v_flag & VROOT) == 0) {
467 		vput(vp);
468 		return (EINVAL);
469 	}
470 	vput(vp);
471 	return (dounmount(mp, SCARG(uap, flags), p));
472 }
473 
474 /*
475  * Do the actual file system unmount.
476  */
477 int
478 dounmount(mp, flags, p)
479 	register struct mount *mp;
480 	int flags;
481 	struct proc *p;
482 {
483 	struct vnode *coveredvp;
484 	int error;
485 	int async_flag;
486 
487 	simple_lock(&mountlist_slock);
488 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
489 		simple_unlock(&mountlist_slock);
490 		return (EBUSY);
491 	}
492 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
493 	/* Allow filesystems to detect that a forced unmount is in progress. */
494 	if (flags & MNT_FORCE)
495 		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
496 	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
497 	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_slock, p);
498 	if (error) {
499 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
500 		if (mp->mnt_kern_flag & MNTK_MWAIT)
501 			wakeup((caddr_t)mp);
502 		return (error);
503 	}
504 
505 	if (mp->mnt_flag & MNT_EXPUBLIC)
506 		vfs_setpublicfs(NULL, NULL, NULL);
507 
508 	vfs_msync(mp, MNT_WAIT);
509 	async_flag = mp->mnt_flag & MNT_ASYNC;
510 	mp->mnt_flag &=~ MNT_ASYNC;
511 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
512 	if (mp->mnt_syncer != NULL)
513 		vrele(mp->mnt_syncer);
514 	if (((mp->mnt_flag & MNT_RDONLY) ||
515 	     (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
516 	    (flags & MNT_FORCE))
517 		error = VFS_UNMOUNT(mp, flags, p);
518 	simple_lock(&mountlist_slock);
519 	if (error) {
520 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
521 			(void) vfs_allocate_syncvnode(mp);
522 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
523 		mp->mnt_flag |= async_flag;
524 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
525 		    &mountlist_slock, p);
526 		if (mp->mnt_kern_flag & MNTK_MWAIT)
527 			wakeup((caddr_t)mp);
528 		return (error);
529 	}
530 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
531 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
532 		coveredvp->v_mountedhere = (struct mount *)0;
533 		vrele(coveredvp);
534 	}
535 	mp->mnt_vfc->vfc_refcount--;
536 	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
537 		panic("unmount: dangling vnode");
538 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p);
539 	if (mp->mnt_kern_flag & MNTK_MWAIT)
540 		wakeup((caddr_t)mp);
541 	free((caddr_t)mp, M_MOUNT);
542 	return (0);
543 }
544 
545 /*
546  * Sync each mounted filesystem.
547  */
548 #ifndef _SYS_SYSPROTO_H_
549 struct sync_args {
550         int     dummy;
551 };
552 #endif
553 
554 #ifdef DEBUG
555 static int syncprt = 0;
556 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
557 #endif
558 
559 /* ARGSUSED */
560 int
561 sync(p, uap)
562 	struct proc *p;
563 	struct sync_args *uap;
564 {
565 	register struct mount *mp, *nmp;
566 	int asyncflag;
567 
568 	simple_lock(&mountlist_slock);
569 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
570 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
571 			nmp = TAILQ_NEXT(mp, mnt_list);
572 			continue;
573 		}
574 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
575 			asyncflag = mp->mnt_flag & MNT_ASYNC;
576 			mp->mnt_flag &= ~MNT_ASYNC;
577 			vfs_msync(mp, MNT_NOWAIT);
578 			VFS_SYNC(mp, MNT_NOWAIT,
579 				((p != NULL) ? p->p_ucred : NOCRED), p);
580 			mp->mnt_flag |= asyncflag;
581 		}
582 		simple_lock(&mountlist_slock);
583 		nmp = TAILQ_NEXT(mp, mnt_list);
584 		vfs_unbusy(mp, p);
585 	}
586 	simple_unlock(&mountlist_slock);
587 #if 0
588 /*
589  * XXX don't call vfs_bufstats() yet because that routine
590  * was not imported in the Lite2 merge.
591  */
592 #ifdef DIAGNOSTIC
593 	if (syncprt)
594 		vfs_bufstats();
595 #endif /* DIAGNOSTIC */
596 #endif
597 	return (0);
598 }
599 
600 /* XXX PRISON: could be per prison flag */
601 static int prison_quotas;
602 #if 0
603 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
604 #endif
605 
606 /*
607  * Change filesystem quotas.
608  */
609 #ifndef _SYS_SYSPROTO_H_
610 struct quotactl_args {
611 	char *path;
612 	int cmd;
613 	int uid;
614 	caddr_t arg;
615 };
616 #endif
617 /* ARGSUSED */
618 int
619 quotactl(p, uap)
620 	struct proc *p;
621 	register struct quotactl_args /* {
622 		syscallarg(char *) path;
623 		syscallarg(int) cmd;
624 		syscallarg(int) uid;
625 		syscallarg(caddr_t) arg;
626 	} */ *uap;
627 {
628 	register struct mount *mp;
629 	int error;
630 	struct nameidata nd;
631 
632 	if (p->p_prison && !prison_quotas)
633 		return (EPERM);
634 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
635 	if ((error = namei(&nd)) != 0)
636 		return (error);
637 	mp = nd.ni_vp->v_mount;
638 	NDFREE(&nd, NDF_ONLY_PNBUF);
639 	vrele(nd.ni_vp);
640 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
641 	    SCARG(uap, arg), p));
642 }
643 
644 /*
645  * Get filesystem statistics.
646  */
647 #ifndef _SYS_SYSPROTO_H_
648 struct statfs_args {
649 	char *path;
650 	struct statfs *buf;
651 };
652 #endif
653 /* ARGSUSED */
654 int
655 statfs(p, uap)
656 	struct proc *p;
657 	register struct statfs_args /* {
658 		syscallarg(char *) path;
659 		syscallarg(struct statfs *) buf;
660 	} */ *uap;
661 {
662 	register struct mount *mp;
663 	register struct statfs *sp;
664 	int error;
665 	struct nameidata nd;
666 	struct statfs sb;
667 
668 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
669 	if ((error = namei(&nd)) != 0)
670 		return (error);
671 	mp = nd.ni_vp->v_mount;
672 	sp = &mp->mnt_stat;
673 	NDFREE(&nd, NDF_ONLY_PNBUF);
674 	vrele(nd.ni_vp);
675 	error = VFS_STATFS(mp, sp, p);
676 	if (error)
677 		return (error);
678 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
679 	if (suser_xxx(p->p_ucred, 0, 0)) {
680 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
681 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
682 		sp = &sb;
683 	}
684 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
685 }
686 
687 /*
688  * Get filesystem statistics.
689  */
690 #ifndef _SYS_SYSPROTO_H_
691 struct fstatfs_args {
692 	int fd;
693 	struct statfs *buf;
694 };
695 #endif
696 /* ARGSUSED */
697 int
698 fstatfs(p, uap)
699 	struct proc *p;
700 	register struct fstatfs_args /* {
701 		syscallarg(int) fd;
702 		syscallarg(struct statfs *) buf;
703 	} */ *uap;
704 {
705 	struct file *fp;
706 	struct mount *mp;
707 	register struct statfs *sp;
708 	int error;
709 	struct statfs sb;
710 
711 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
712 		return (error);
713 	mp = ((struct vnode *)fp->f_data)->v_mount;
714 	if (mp == NULL)
715 		return (EBADF);
716 	sp = &mp->mnt_stat;
717 	error = VFS_STATFS(mp, sp, p);
718 	if (error)
719 		return (error);
720 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
721 	if (suser_xxx(p->p_ucred, 0, 0)) {
722 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
723 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
724 		sp = &sb;
725 	}
726 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
727 }
728 
729 /*
730  * Get statistics on all filesystems.
731  */
732 #ifndef _SYS_SYSPROTO_H_
733 struct getfsstat_args {
734 	struct statfs *buf;
735 	long bufsize;
736 	int flags;
737 };
738 #endif
739 int
740 getfsstat(p, uap)
741 	struct proc *p;
742 	register struct getfsstat_args /* {
743 		syscallarg(struct statfs *) buf;
744 		syscallarg(long) bufsize;
745 		syscallarg(int) flags;
746 	} */ *uap;
747 {
748 	register struct mount *mp, *nmp;
749 	register struct statfs *sp;
750 	caddr_t sfsp;
751 	long count, maxcount, error;
752 
753 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
754 	sfsp = (caddr_t)SCARG(uap, buf);
755 	count = 0;
756 	simple_lock(&mountlist_slock);
757 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
758 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
759 			nmp = TAILQ_NEXT(mp, mnt_list);
760 			continue;
761 		}
762 		if (sfsp && count < maxcount) {
763 			sp = &mp->mnt_stat;
764 			/*
765 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
766 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
767 			 * overrides MNT_WAIT.
768 			 */
769 			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
770 			    (SCARG(uap, flags) & MNT_WAIT)) &&
771 			    (error = VFS_STATFS(mp, sp, p))) {
772 				simple_lock(&mountlist_slock);
773 				nmp = TAILQ_NEXT(mp, mnt_list);
774 				vfs_unbusy(mp, p);
775 				continue;
776 			}
777 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
778 			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
779 			if (error) {
780 				vfs_unbusy(mp, p);
781 				return (error);
782 			}
783 			sfsp += sizeof(*sp);
784 		}
785 		count++;
786 		simple_lock(&mountlist_slock);
787 		nmp = TAILQ_NEXT(mp, mnt_list);
788 		vfs_unbusy(mp, p);
789 	}
790 	simple_unlock(&mountlist_slock);
791 	if (sfsp && count > maxcount)
792 		p->p_retval[0] = maxcount;
793 	else
794 		p->p_retval[0] = count;
795 	return (0);
796 }
797 
798 /*
799  * Change current working directory to a given file descriptor.
800  */
801 #ifndef _SYS_SYSPROTO_H_
802 struct fchdir_args {
803 	int	fd;
804 };
805 #endif
806 /* ARGSUSED */
807 int
808 fchdir(p, uap)
809 	struct proc *p;
810 	struct fchdir_args /* {
811 		syscallarg(int) fd;
812 	} */ *uap;
813 {
814 	register struct filedesc *fdp = p->p_fd;
815 	struct vnode *vp, *tdp;
816 	struct mount *mp;
817 	struct file *fp;
818 	int error;
819 
820 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
821 		return (error);
822 	vp = (struct vnode *)fp->f_data;
823 	VREF(vp);
824 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
825 	if (vp->v_type != VDIR)
826 		error = ENOTDIR;
827 	else
828 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
829 	while (!error && (mp = vp->v_mountedhere) != NULL) {
830 		if (vfs_busy(mp, 0, 0, p))
831 			continue;
832 		error = VFS_ROOT(mp, &tdp);
833 		vfs_unbusy(mp, p);
834 		if (error)
835 			break;
836 		vput(vp);
837 		vp = tdp;
838 	}
839 	if (error) {
840 		vput(vp);
841 		return (error);
842 	}
843 	VOP_UNLOCK(vp, 0, p);
844 	vrele(fdp->fd_cdir);
845 	fdp->fd_cdir = vp;
846 	return (0);
847 }
848 
849 /*
850  * Change current working directory (``.'').
851  */
852 #ifndef _SYS_SYSPROTO_H_
853 struct chdir_args {
854 	char	*path;
855 };
856 #endif
857 /* ARGSUSED */
858 int
859 chdir(p, uap)
860 	struct proc *p;
861 	struct chdir_args /* {
862 		syscallarg(char *) path;
863 	} */ *uap;
864 {
865 	register struct filedesc *fdp = p->p_fd;
866 	int error;
867 	struct nameidata nd;
868 
869 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
870 	    SCARG(uap, path), p);
871 	if ((error = change_dir(&nd, p)) != 0)
872 		return (error);
873 	NDFREE(&nd, NDF_ONLY_PNBUF);
874 	vrele(fdp->fd_cdir);
875 	fdp->fd_cdir = nd.ni_vp;
876 	return (0);
877 }
878 
879 /*
880  * Helper function for raised chroot(2) security function:  Refuse if
881  * any filedescriptors are open directories.
882  */
883 static int
884 chroot_refuse_vdir_fds(fdp)
885 	struct filedesc *fdp;
886 {
887 	struct vnode *vp;
888 	struct file *fp;
889 	int error;
890 	int fd;
891 
892 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
893 		error = getvnode(fdp, fd, &fp);
894 		if (error)
895 			continue;
896 		vp = (struct vnode *)fp->f_data;
897 		if (vp->v_type != VDIR)
898 			continue;
899 		return(EPERM);
900 	}
901 	return (0);
902 }
903 
904 /*
905  * This sysctl determines if we will allow a process to chroot(2) if it
906  * has a directory open:
907  *	0: disallowed for all processes.
908  *	1: allowed for processes that were not already chroot(2)'ed.
909  *	2: allowed for all processes.
910  */
911 
912 static int chroot_allow_open_directories = 1;
913 
914 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
915      &chroot_allow_open_directories, 0, "");
916 
917 /*
918  * Change notion of root (``/'') directory.
919  */
920 #ifndef _SYS_SYSPROTO_H_
921 struct chroot_args {
922 	char	*path;
923 };
924 #endif
925 /* ARGSUSED */
926 int
927 chroot(p, uap)
928 	struct proc *p;
929 	struct chroot_args /* {
930 		syscallarg(char *) path;
931 	} */ *uap;
932 {
933 	register struct filedesc *fdp = p->p_fd;
934 	int error;
935 	struct nameidata nd;
936 
937 	error = suser_xxx(0, p, PRISON_ROOT);
938 	if (error)
939 		return (error);
940 	if (chroot_allow_open_directories == 0 ||
941 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode))
942 		error = chroot_refuse_vdir_fds(fdp);
943 	if (error)
944 		return (error);
945 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
946 	    SCARG(uap, path), p);
947 	if ((error = change_dir(&nd, p)) != 0)
948 		return (error);
949 	NDFREE(&nd, NDF_ONLY_PNBUF);
950 	vrele(fdp->fd_rdir);
951 	fdp->fd_rdir = nd.ni_vp;
952 	if (!fdp->fd_jdir) {
953 		fdp->fd_jdir = nd.ni_vp;
954                 VREF(fdp->fd_jdir);
955 	}
956 	return (0);
957 }
958 
959 /*
960  * Common routine for chroot and chdir.
961  */
962 static int
963 change_dir(ndp, p)
964 	register struct nameidata *ndp;
965 	struct proc *p;
966 {
967 	struct vnode *vp;
968 	int error;
969 
970 	error = namei(ndp);
971 	if (error)
972 		return (error);
973 	vp = ndp->ni_vp;
974 	if (vp->v_type != VDIR)
975 		error = ENOTDIR;
976 	else
977 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
978 	if (error)
979 		vput(vp);
980 	else
981 		VOP_UNLOCK(vp, 0, p);
982 	return (error);
983 }
984 
985 /*
986  * Check permissions, allocate an open file structure,
987  * and call the device open routine if any.
988  */
989 #ifndef _SYS_SYSPROTO_H_
990 struct open_args {
991 	char	*path;
992 	int	flags;
993 	int	mode;
994 };
995 #endif
996 int
997 open(p, uap)
998 	struct proc *p;
999 	register struct open_args /* {
1000 		syscallarg(char *) path;
1001 		syscallarg(int) flags;
1002 		syscallarg(int) mode;
1003 	} */ *uap;
1004 {
1005 	register struct filedesc *fdp = p->p_fd;
1006 	register struct file *fp;
1007 	register struct vnode *vp;
1008 	int cmode, flags, oflags;
1009 	struct file *nfp;
1010 	int type, indx, error;
1011 	struct flock lf;
1012 	struct nameidata nd;
1013 
1014 	oflags = SCARG(uap, flags);
1015 	if ((oflags & O_ACCMODE) == O_ACCMODE)
1016 		return (EINVAL);
1017 	flags = FFLAGS(oflags);
1018 	error = falloc(p, &nfp, &indx);
1019 	if (error)
1020 		return (error);
1021 	fp = nfp;
1022 	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1023 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1024 	p->p_dupfd = -indx - 1;			/* XXX check for fdopen */
1025 	/*
1026 	 * Bump the ref count to prevent another process from closing
1027 	 * the descriptor while we are blocked in vn_open()
1028 	 */
1029 	fhold(fp);
1030 	error = vn_open(&nd, flags, cmode);
1031 	if (error) {
1032 		/*
1033 		 * release our own reference
1034 		 */
1035 		fdrop(fp, p);
1036 
1037 		/*
1038 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1039 		 * responsible for dropping the old contents of ofiles[indx]
1040 		 * if it succeeds.
1041 		 */
1042 		if ((error == ENODEV || error == ENXIO) &&
1043 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1044 		    (error =
1045 			dupfdopen(p, fdp, indx, p->p_dupfd, flags, error)) == 0) {
1046 			p->p_retval[0] = indx;
1047 			return (0);
1048 		}
1049 		/*
1050 		 * Clean up the descriptor, but only if another thread hadn't
1051 		 * replaced or closed it.
1052 		 */
1053 		if (fdp->fd_ofiles[indx] == fp) {
1054 			fdp->fd_ofiles[indx] = NULL;
1055 			fdrop(fp, p);
1056 		}
1057 
1058 		if (error == ERESTART)
1059 			error = EINTR;
1060 		return (error);
1061 	}
1062 	p->p_dupfd = 0;
1063 	NDFREE(&nd, NDF_ONLY_PNBUF);
1064 	vp = nd.ni_vp;
1065 
1066 	/*
1067 	 * There should be 2 references on the file, one from the descriptor
1068 	 * table, and one for us.
1069 	 *
1070 	 * Handle the case where someone closed the file (via its file
1071 	 * descriptor) while we were blocked.  The end result should look
1072 	 * like opening the file succeeded but it was immediately closed.
1073 	 */
1074 	if (fp->f_count == 1) {
1075 		KASSERT(fdp->fd_ofiles[indx] != fp,
1076 		    ("Open file descriptor lost all refs"));
1077 		VOP_UNLOCK(vp, 0, p);
1078 		vn_close(vp, flags & FMASK, fp->f_cred, p);
1079 		fdrop(fp, p);
1080 		p->p_retval[0] = indx;
1081 		return 0;
1082 	}
1083 
1084 	fp->f_data = (caddr_t)vp;
1085 	fp->f_flag = flags & FMASK;
1086 	fp->f_ops = &vnops;
1087 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1088 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1089 		lf.l_whence = SEEK_SET;
1090 		lf.l_start = 0;
1091 		lf.l_len = 0;
1092 		if (flags & O_EXLOCK)
1093 			lf.l_type = F_WRLCK;
1094 		else
1095 			lf.l_type = F_RDLCK;
1096 		type = F_FLOCK;
1097 		if ((flags & FNONBLOCK) == 0)
1098 			type |= F_WAIT;
1099 		VOP_UNLOCK(vp, 0, p);
1100 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
1101 			/*
1102 			 * lock request failed.  Normally close the descriptor
1103 			 * but handle the case where someone might have dup()d
1104 			 * it when we weren't looking.  One reference is
1105 			 * owned by the descriptor array, the other by us.
1106 			 */
1107 			if (fdp->fd_ofiles[indx] == fp) {
1108 				fdp->fd_ofiles[indx] = NULL;
1109 				fdrop(fp, p);
1110 			}
1111 			fdrop(fp, p);
1112 			return (error);
1113 		}
1114 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1115 		fp->f_flag |= FHASLOCK;
1116 	}
1117 	/* assert that vn_open created a backing object if one is needed */
1118 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1119 		("open: vmio vnode has no backing object after vn_open"));
1120 	VOP_UNLOCK(vp, 0, p);
1121 
1122 	/*
1123 	 * release our private reference, leaving the one associated with the
1124 	 * descriptor table intact.
1125 	 */
1126 	fdrop(fp, p);
1127 	p->p_retval[0] = indx;
1128 	return (0);
1129 }
1130 
1131 #ifdef COMPAT_43
1132 /*
1133  * Create a file.
1134  */
1135 #ifndef _SYS_SYSPROTO_H_
1136 struct ocreat_args {
1137 	char	*path;
1138 	int	mode;
1139 };
1140 #endif
1141 int
1142 ocreat(p, uap)
1143 	struct proc *p;
1144 	register struct ocreat_args /* {
1145 		syscallarg(char *) path;
1146 		syscallarg(int) mode;
1147 	} */ *uap;
1148 {
1149 	struct open_args /* {
1150 		syscallarg(char *) path;
1151 		syscallarg(int) flags;
1152 		syscallarg(int) mode;
1153 	} */ nuap;
1154 
1155 	SCARG(&nuap, path) = SCARG(uap, path);
1156 	SCARG(&nuap, mode) = SCARG(uap, mode);
1157 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1158 	return (open(p, &nuap));
1159 }
1160 #endif /* COMPAT_43 */
1161 
1162 /*
1163  * Create a special file.
1164  */
1165 #ifndef _SYS_SYSPROTO_H_
1166 struct mknod_args {
1167 	char	*path;
1168 	int	mode;
1169 	int	dev;
1170 };
1171 #endif
1172 /* ARGSUSED */
1173 int
1174 mknod(p, uap)
1175 	struct proc *p;
1176 	register struct mknod_args /* {
1177 		syscallarg(char *) path;
1178 		syscallarg(int) mode;
1179 		syscallarg(int) dev;
1180 	} */ *uap;
1181 {
1182 	register struct vnode *vp;
1183 	struct vattr vattr;
1184 	int error;
1185 	int whiteout = 0;
1186 	struct nameidata nd;
1187 
1188 	switch (SCARG(uap, mode) & S_IFMT) {
1189 	case S_IFCHR:
1190 	case S_IFBLK:
1191 		error = suser(p);
1192 		break;
1193 	default:
1194 		error = suser_xxx(0, p, PRISON_ROOT);
1195 		break;
1196 	}
1197 	if (error)
1198 		return (error);
1199 	bwillwrite();
1200 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1201 	if ((error = namei(&nd)) != 0)
1202 		return (error);
1203 	vp = nd.ni_vp;
1204 	if (vp != NULL)
1205 		error = EEXIST;
1206 	else {
1207 		VATTR_NULL(&vattr);
1208 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1209 		vattr.va_rdev = SCARG(uap, dev);
1210 		whiteout = 0;
1211 
1212 		switch (SCARG(uap, mode) & S_IFMT) {
1213 		case S_IFMT:	/* used by badsect to flag bad sectors */
1214 			vattr.va_type = VBAD;
1215 			break;
1216 		case S_IFCHR:
1217 			vattr.va_type = VCHR;
1218 			break;
1219 		case S_IFBLK:
1220 			vattr.va_type = VBLK;
1221 			break;
1222 		case S_IFWHT:
1223 			whiteout = 1;
1224 			break;
1225 		default:
1226 			error = EINVAL;
1227 			break;
1228 		}
1229 	}
1230 	if (!error) {
1231 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1232 		if (whiteout)
1233 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1234 		else {
1235 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1236 						&nd.ni_cnd, &vattr);
1237 			if (error == 0)
1238 				vput(nd.ni_vp);
1239 		}
1240 		NDFREE(&nd, NDF_ONLY_PNBUF);
1241 		vput(nd.ni_dvp);
1242 	} else {
1243 		NDFREE(&nd, NDF_ONLY_PNBUF);
1244 		if (nd.ni_dvp == vp)
1245 			vrele(nd.ni_dvp);
1246 		else
1247 			vput(nd.ni_dvp);
1248 		if (vp)
1249 			vrele(vp);
1250 	}
1251 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1252 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1253 	return (error);
1254 }
1255 
1256 /*
1257  * Create a named pipe.
1258  */
1259 #ifndef _SYS_SYSPROTO_H_
1260 struct mkfifo_args {
1261 	char	*path;
1262 	int	mode;
1263 };
1264 #endif
1265 /* ARGSUSED */
1266 int
1267 mkfifo(p, uap)
1268 	struct proc *p;
1269 	register struct mkfifo_args /* {
1270 		syscallarg(char *) path;
1271 		syscallarg(int) mode;
1272 	} */ *uap;
1273 {
1274 	struct vattr vattr;
1275 	int error;
1276 	struct nameidata nd;
1277 
1278 	bwillwrite();
1279 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1280 	if ((error = namei(&nd)) != 0)
1281 		return (error);
1282 	if (nd.ni_vp != NULL) {
1283 		NDFREE(&nd, NDF_ONLY_PNBUF);
1284 		if (nd.ni_dvp == nd.ni_vp)
1285 			vrele(nd.ni_dvp);
1286 		else
1287 			vput(nd.ni_dvp);
1288 		vrele(nd.ni_vp);
1289 		return (EEXIST);
1290 	}
1291 	VATTR_NULL(&vattr);
1292 	vattr.va_type = VFIFO;
1293 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1294 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1295 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1296 	if (error == 0)
1297 		vput(nd.ni_vp);
1298 	NDFREE(&nd, NDF_ONLY_PNBUF);
1299 	vput(nd.ni_dvp);
1300 	return (error);
1301 }
1302 
1303 /*
1304  * Make a hard file link.
1305  */
1306 #ifndef _SYS_SYSPROTO_H_
1307 struct link_args {
1308 	char	*path;
1309 	char	*link;
1310 };
1311 #endif
1312 /* ARGSUSED */
1313 int
1314 link(p, uap)
1315 	struct proc *p;
1316 	register struct link_args /* {
1317 		syscallarg(char *) path;
1318 		syscallarg(char *) link;
1319 	} */ *uap;
1320 {
1321 	register struct vnode *vp;
1322 	struct nameidata nd;
1323 	int error;
1324 
1325 	bwillwrite();
1326 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p);
1327 	if ((error = namei(&nd)) != 0)
1328 		return (error);
1329 	NDFREE(&nd, NDF_ONLY_PNBUF);
1330 	vp = nd.ni_vp;
1331 	if (vp->v_type == VDIR)
1332 		error = EPERM;		/* POSIX */
1333 	else {
1334 		NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
1335 		error = namei(&nd);
1336 		if (!error) {
1337 			if (nd.ni_vp != NULL) {
1338 				if (nd.ni_vp)
1339 					vrele(nd.ni_vp);
1340 				error = EEXIST;
1341 			} else {
1342 				VOP_LEASE(nd.ni_dvp, p, p->p_ucred,
1343 				    LEASE_WRITE);
1344 				VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1345 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1346 			}
1347 			NDFREE(&nd, NDF_ONLY_PNBUF);
1348 			if (nd.ni_dvp == nd.ni_vp)
1349 				vrele(nd.ni_dvp);
1350 			else
1351 				vput(nd.ni_dvp);
1352 		}
1353 	}
1354 	vrele(vp);
1355 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1356 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1357 	return (error);
1358 }
1359 
1360 /*
1361  * Make a symbolic link.
1362  */
1363 #ifndef _SYS_SYSPROTO_H_
1364 struct symlink_args {
1365 	char	*path;
1366 	char	*link;
1367 };
1368 #endif
1369 /* ARGSUSED */
1370 int
1371 symlink(p, uap)
1372 	struct proc *p;
1373 	register struct symlink_args /* {
1374 		syscallarg(char *) path;
1375 		syscallarg(char *) link;
1376 	} */ *uap;
1377 {
1378 	struct vattr vattr;
1379 	char *path;
1380 	int error;
1381 	struct nameidata nd;
1382 
1383 	path = zalloc(namei_zone);
1384 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
1385 		goto out;
1386 	bwillwrite();
1387 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
1388 	if ((error = namei(&nd)) != 0)
1389 		goto out;
1390 	if (nd.ni_vp) {
1391 		NDFREE(&nd, NDF_ONLY_PNBUF);
1392 		if (nd.ni_dvp == nd.ni_vp)
1393 			vrele(nd.ni_dvp);
1394 		else
1395 			vput(nd.ni_dvp);
1396 		vrele(nd.ni_vp);
1397 		error = EEXIST;
1398 		goto out;
1399 	}
1400 	VATTR_NULL(&vattr);
1401 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1402 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1403 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1404 	NDFREE(&nd, NDF_ONLY_PNBUF);
1405 	if (error == 0)
1406 		vput(nd.ni_vp);
1407 	vput(nd.ni_dvp);
1408 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1409 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1410 out:
1411 	zfree(namei_zone, path);
1412 	return (error);
1413 }
1414 
1415 /*
1416  * Delete a whiteout from the filesystem.
1417  */
1418 /* ARGSUSED */
1419 int
1420 undelete(p, uap)
1421 	struct proc *p;
1422 	register struct undelete_args /* {
1423 		syscallarg(char *) path;
1424 	} */ *uap;
1425 {
1426 	int error;
1427 	struct nameidata nd;
1428 
1429 	bwillwrite();
1430 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1431 	    SCARG(uap, path), p);
1432 	error = namei(&nd);
1433 	if (error)
1434 		return (error);
1435 
1436 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1437 		NDFREE(&nd, NDF_ONLY_PNBUF);
1438 		if (nd.ni_dvp == nd.ni_vp)
1439 			vrele(nd.ni_dvp);
1440 		else
1441 			vput(nd.ni_dvp);
1442 		if (nd.ni_vp)
1443 			vrele(nd.ni_vp);
1444 		return (EEXIST);
1445 	}
1446 
1447 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1448 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1449 	NDFREE(&nd, NDF_ONLY_PNBUF);
1450 	vput(nd.ni_dvp);
1451 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1452 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1453 	return (error);
1454 }
1455 
1456 /*
1457  * Delete a name from the filesystem.
1458  */
1459 #ifndef _SYS_SYSPROTO_H_
1460 struct unlink_args {
1461 	char	*path;
1462 };
1463 #endif
1464 /* ARGSUSED */
1465 int
1466 unlink(p, uap)
1467 	struct proc *p;
1468 	struct unlink_args /* {
1469 		syscallarg(char *) path;
1470 	} */ *uap;
1471 {
1472 	register struct vnode *vp;
1473 	int error;
1474 	struct nameidata nd;
1475 
1476 	bwillwrite();
1477 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1478 	if ((error = namei(&nd)) != 0)
1479 		return (error);
1480 	vp = nd.ni_vp;
1481 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1482 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1483 
1484 	if (vp->v_type == VDIR)
1485 		error = EPERM;		/* POSIX */
1486 	else {
1487 		/*
1488 		 * The root of a mounted filesystem cannot be deleted.
1489 		 *
1490 		 * XXX: can this only be a VDIR case?
1491 		 */
1492 		if (vp->v_flag & VROOT)
1493 			error = EBUSY;
1494 	}
1495 
1496 	if (!error) {
1497 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1498 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1499 	}
1500 	NDFREE(&nd, NDF_ONLY_PNBUF);
1501 	if (nd.ni_dvp == vp)
1502 		vrele(nd.ni_dvp);
1503 	else
1504 		vput(nd.ni_dvp);
1505 	if (vp != NULLVP)
1506 		vput(vp);
1507 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1508 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1509 	return (error);
1510 }
1511 
1512 /*
1513  * Reposition read/write file offset.
1514  */
1515 #ifndef _SYS_SYSPROTO_H_
1516 struct lseek_args {
1517 	int	fd;
1518 	int	pad;
1519 	off_t	offset;
1520 	int	whence;
1521 };
1522 #endif
1523 int
1524 lseek(p, uap)
1525 	struct proc *p;
1526 	register struct lseek_args /* {
1527 		syscallarg(int) fd;
1528 		syscallarg(int) pad;
1529 		syscallarg(off_t) offset;
1530 		syscallarg(int) whence;
1531 	} */ *uap;
1532 {
1533 	struct ucred *cred = p->p_ucred;
1534 	register struct filedesc *fdp = p->p_fd;
1535 	register struct file *fp;
1536 	struct vattr vattr;
1537 	int error;
1538 
1539 	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
1540 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
1541 		return (EBADF);
1542 	if (fp->f_type != DTYPE_VNODE)
1543 		return (ESPIPE);
1544 	switch (SCARG(uap, whence)) {
1545 	case L_INCR:
1546 		fp->f_offset += SCARG(uap, offset);
1547 		break;
1548 	case L_XTND:
1549 		error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p);
1550 		if (error)
1551 			return (error);
1552 		fp->f_offset = SCARG(uap, offset) + vattr.va_size;
1553 		break;
1554 	case L_SET:
1555 		fp->f_offset = SCARG(uap, offset);
1556 		break;
1557 	default:
1558 		return (EINVAL);
1559 	}
1560 	*(off_t *)(p->p_retval) = fp->f_offset;
1561 	return (0);
1562 }
1563 
1564 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1565 /*
1566  * Reposition read/write file offset.
1567  */
1568 #ifndef _SYS_SYSPROTO_H_
1569 struct olseek_args {
1570 	int	fd;
1571 	long	offset;
1572 	int	whence;
1573 };
1574 #endif
1575 int
1576 olseek(p, uap)
1577 	struct proc *p;
1578 	register struct olseek_args /* {
1579 		syscallarg(int) fd;
1580 		syscallarg(long) offset;
1581 		syscallarg(int) whence;
1582 	} */ *uap;
1583 {
1584 	struct lseek_args /* {
1585 		syscallarg(int) fd;
1586 		syscallarg(int) pad;
1587 		syscallarg(off_t) offset;
1588 		syscallarg(int) whence;
1589 	} */ nuap;
1590 	int error;
1591 
1592 	SCARG(&nuap, fd) = SCARG(uap, fd);
1593 	SCARG(&nuap, offset) = SCARG(uap, offset);
1594 	SCARG(&nuap, whence) = SCARG(uap, whence);
1595 	error = lseek(p, &nuap);
1596 	return (error);
1597 }
1598 #endif /* COMPAT_43 */
1599 
1600 /*
1601  * Check access permissions.
1602  */
1603 #ifndef _SYS_SYSPROTO_H_
1604 struct access_args {
1605 	char	*path;
1606 	int	flags;
1607 };
1608 #endif
1609 int
1610 access(p, uap)
1611 	struct proc *p;
1612 	register struct access_args /* {
1613 		syscallarg(char *) path;
1614 		syscallarg(int) flags;
1615 	} */ *uap;
1616 {
1617 	struct ucred *cred, *tmpcred;
1618 	register struct vnode *vp;
1619 	int error, flags;
1620 	struct nameidata nd;
1621 
1622 	cred = p->p_ucred;
1623 	/*
1624 	 * Create and modify a temporary credential instead of one that
1625 	 * is potentially shared.  This could also mess up socket
1626 	 * buffer accounting which can run in an interrupt context.
1627 	 */
1628 	tmpcred = crdup(cred);
1629 	tmpcred->cr_uid = p->p_cred->p_ruid;
1630 	tmpcred->cr_groups[0] = p->p_cred->p_rgid;
1631 	p->p_ucred = tmpcred;
1632 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1633 	    SCARG(uap, path), p);
1634 	if ((error = namei(&nd)) != 0)
1635 		goto out1;
1636 	vp = nd.ni_vp;
1637 
1638 	/* Flags == 0 means only check for existence. */
1639 	if (SCARG(uap, flags)) {
1640 		flags = 0;
1641 		if (SCARG(uap, flags) & R_OK)
1642 			flags |= VREAD;
1643 		if (SCARG(uap, flags) & W_OK)
1644 			flags |= VWRITE;
1645 		if (SCARG(uap, flags) & X_OK)
1646 			flags |= VEXEC;
1647 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1648 			error = VOP_ACCESS(vp, flags, tmpcred, p);
1649 	}
1650 	NDFREE(&nd, NDF_ONLY_PNBUF);
1651 	vput(vp);
1652 out1:
1653 	p->p_ucred = cred;
1654 	crfree(tmpcred);
1655 	return (error);
1656 }
1657 
1658 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1659 /*
1660  * Get file status; this version follows links.
1661  */
1662 #ifndef _SYS_SYSPROTO_H_
1663 struct ostat_args {
1664 	char	*path;
1665 	struct ostat *ub;
1666 };
1667 #endif
1668 /* ARGSUSED */
1669 int
1670 ostat(p, uap)
1671 	struct proc *p;
1672 	register struct ostat_args /* {
1673 		syscallarg(char *) path;
1674 		syscallarg(struct ostat *) ub;
1675 	} */ *uap;
1676 {
1677 	struct stat sb;
1678 	struct ostat osb;
1679 	int error;
1680 	struct nameidata nd;
1681 
1682 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1683 	    SCARG(uap, path), p);
1684 	if ((error = namei(&nd)) != 0)
1685 		return (error);
1686 	NDFREE(&nd, NDF_ONLY_PNBUF);
1687 	error = vn_stat(nd.ni_vp, &sb, p);
1688 	vput(nd.ni_vp);
1689 	if (error)
1690 		return (error);
1691 	cvtstat(&sb, &osb);
1692 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1693 	return (error);
1694 }
1695 
1696 /*
1697  * Get file status; this version does not follow links.
1698  */
1699 #ifndef _SYS_SYSPROTO_H_
1700 struct olstat_args {
1701 	char	*path;
1702 	struct ostat *ub;
1703 };
1704 #endif
1705 /* ARGSUSED */
1706 int
1707 olstat(p, uap)
1708 	struct proc *p;
1709 	register struct olstat_args /* {
1710 		syscallarg(char *) path;
1711 		syscallarg(struct ostat *) ub;
1712 	} */ *uap;
1713 {
1714 	struct vnode *vp;
1715 	struct stat sb;
1716 	struct ostat osb;
1717 	int error;
1718 	struct nameidata nd;
1719 
1720 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1721 	    SCARG(uap, path), p);
1722 	if ((error = namei(&nd)) != 0)
1723 		return (error);
1724 	vp = nd.ni_vp;
1725 	error = vn_stat(vp, &sb, p);
1726 	NDFREE(&nd, NDF_ONLY_PNBUF);
1727 	vput(vp);
1728 	if (error)
1729 		return (error);
1730 	cvtstat(&sb, &osb);
1731 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1732 	return (error);
1733 }
1734 
1735 /*
1736  * Convert from an old to a new stat structure.
1737  */
1738 void
1739 cvtstat(st, ost)
1740 	struct stat *st;
1741 	struct ostat *ost;
1742 {
1743 
1744 	ost->st_dev = st->st_dev;
1745 	ost->st_ino = st->st_ino;
1746 	ost->st_mode = st->st_mode;
1747 	ost->st_nlink = st->st_nlink;
1748 	ost->st_uid = st->st_uid;
1749 	ost->st_gid = st->st_gid;
1750 	ost->st_rdev = st->st_rdev;
1751 	if (st->st_size < (quad_t)1 << 32)
1752 		ost->st_size = st->st_size;
1753 	else
1754 		ost->st_size = -2;
1755 	ost->st_atime = st->st_atime;
1756 	ost->st_mtime = st->st_mtime;
1757 	ost->st_ctime = st->st_ctime;
1758 	ost->st_blksize = st->st_blksize;
1759 	ost->st_blocks = st->st_blocks;
1760 	ost->st_flags = st->st_flags;
1761 	ost->st_gen = st->st_gen;
1762 }
1763 #endif /* COMPAT_43 || COMPAT_SUNOS */
1764 
1765 /*
1766  * Get file status; this version follows links.
1767  */
1768 #ifndef _SYS_SYSPROTO_H_
1769 struct stat_args {
1770 	char	*path;
1771 	struct stat *ub;
1772 };
1773 #endif
1774 /* ARGSUSED */
1775 int
1776 stat(p, uap)
1777 	struct proc *p;
1778 	register struct stat_args /* {
1779 		syscallarg(char *) path;
1780 		syscallarg(struct stat *) ub;
1781 	} */ *uap;
1782 {
1783 	struct stat sb;
1784 	int error;
1785 	struct nameidata nd;
1786 
1787 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1788 	    SCARG(uap, path), p);
1789 	if ((error = namei(&nd)) != 0)
1790 		return (error);
1791 	error = vn_stat(nd.ni_vp, &sb, p);
1792 	NDFREE(&nd, NDF_ONLY_PNBUF);
1793 	vput(nd.ni_vp);
1794 	if (error)
1795 		return (error);
1796 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1797 	return (error);
1798 }
1799 
1800 /*
1801  * Get file status; this version does not follow links.
1802  */
1803 #ifndef _SYS_SYSPROTO_H_
1804 struct lstat_args {
1805 	char	*path;
1806 	struct stat *ub;
1807 };
1808 #endif
1809 /* ARGSUSED */
1810 int
1811 lstat(p, uap)
1812 	struct proc *p;
1813 	register struct lstat_args /* {
1814 		syscallarg(char *) path;
1815 		syscallarg(struct stat *) ub;
1816 	} */ *uap;
1817 {
1818 	int error;
1819 	struct vnode *vp;
1820 	struct stat sb;
1821 	struct nameidata nd;
1822 
1823 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1824 	    SCARG(uap, path), p);
1825 	if ((error = namei(&nd)) != 0)
1826 		return (error);
1827 	vp = nd.ni_vp;
1828 	error = vn_stat(vp, &sb, p);
1829 	NDFREE(&nd, NDF_ONLY_PNBUF);
1830 	vput(vp);
1831 	if (error)
1832 		return (error);
1833 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1834 	return (error);
1835 }
1836 
1837 void
1838 cvtnstat(sb, nsb)
1839 	struct stat *sb;
1840 	struct nstat *nsb;
1841 {
1842 	nsb->st_dev = sb->st_dev;
1843 	nsb->st_ino = sb->st_ino;
1844 	nsb->st_mode = sb->st_mode;
1845 	nsb->st_nlink = sb->st_nlink;
1846 	nsb->st_uid = sb->st_uid;
1847 	nsb->st_gid = sb->st_gid;
1848 	nsb->st_rdev = sb->st_rdev;
1849 	nsb->st_atimespec = sb->st_atimespec;
1850 	nsb->st_mtimespec = sb->st_mtimespec;
1851 	nsb->st_ctimespec = sb->st_ctimespec;
1852 	nsb->st_size = sb->st_size;
1853 	nsb->st_blocks = sb->st_blocks;
1854 	nsb->st_blksize = sb->st_blksize;
1855 	nsb->st_flags = sb->st_flags;
1856 	nsb->st_gen = sb->st_gen;
1857 	nsb->st_qspare[0] = sb->st_qspare[0];
1858 	nsb->st_qspare[1] = sb->st_qspare[1];
1859 }
1860 
1861 #ifndef _SYS_SYSPROTO_H_
1862 struct nstat_args {
1863 	char	*path;
1864 	struct nstat *ub;
1865 };
1866 #endif
1867 /* ARGSUSED */
1868 int
1869 nstat(p, uap)
1870 	struct proc *p;
1871 	register struct nstat_args /* {
1872 		syscallarg(char *) path;
1873 		syscallarg(struct nstat *) ub;
1874 	} */ *uap;
1875 {
1876 	struct stat sb;
1877 	struct nstat nsb;
1878 	int error;
1879 	struct nameidata nd;
1880 
1881 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1882 	    SCARG(uap, path), p);
1883 	if ((error = namei(&nd)) != 0)
1884 		return (error);
1885 	NDFREE(&nd, NDF_ONLY_PNBUF);
1886 	error = vn_stat(nd.ni_vp, &sb, p);
1887 	vput(nd.ni_vp);
1888 	if (error)
1889 		return (error);
1890 	cvtnstat(&sb, &nsb);
1891 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
1892 	return (error);
1893 }
1894 
1895 /*
1896  * Get file status; this version does not follow links.
1897  */
1898 #ifndef _SYS_SYSPROTO_H_
1899 struct lstat_args {
1900 	char	*path;
1901 	struct stat *ub;
1902 };
1903 #endif
1904 /* ARGSUSED */
1905 int
1906 nlstat(p, uap)
1907 	struct proc *p;
1908 	register struct nlstat_args /* {
1909 		syscallarg(char *) path;
1910 		syscallarg(struct nstat *) ub;
1911 	} */ *uap;
1912 {
1913 	int error;
1914 	struct vnode *vp;
1915 	struct stat sb;
1916 	struct nstat nsb;
1917 	struct nameidata nd;
1918 
1919 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1920 	    SCARG(uap, path), p);
1921 	if ((error = namei(&nd)) != 0)
1922 		return (error);
1923 	vp = nd.ni_vp;
1924 	NDFREE(&nd, NDF_ONLY_PNBUF);
1925 	error = vn_stat(vp, &sb, p);
1926 	vput(vp);
1927 	if (error)
1928 		return (error);
1929 	cvtnstat(&sb, &nsb);
1930 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
1931 	return (error);
1932 }
1933 
1934 /*
1935  * Get configurable pathname variables.
1936  */
1937 #ifndef _SYS_SYSPROTO_H_
1938 struct pathconf_args {
1939 	char	*path;
1940 	int	name;
1941 };
1942 #endif
1943 /* ARGSUSED */
1944 int
1945 pathconf(p, uap)
1946 	struct proc *p;
1947 	register struct pathconf_args /* {
1948 		syscallarg(char *) path;
1949 		syscallarg(int) name;
1950 	} */ *uap;
1951 {
1952 	int error;
1953 	struct nameidata nd;
1954 
1955 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1956 	    SCARG(uap, path), p);
1957 	if ((error = namei(&nd)) != 0)
1958 		return (error);
1959 	NDFREE(&nd, NDF_ONLY_PNBUF);
1960 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), p->p_retval);
1961 	vput(nd.ni_vp);
1962 	return (error);
1963 }
1964 
1965 /*
1966  * Return target name of a symbolic link.
1967  */
1968 #ifndef _SYS_SYSPROTO_H_
1969 struct readlink_args {
1970 	char	*path;
1971 	char	*buf;
1972 	int	count;
1973 };
1974 #endif
1975 /* ARGSUSED */
1976 int
1977 readlink(p, uap)
1978 	struct proc *p;
1979 	register struct readlink_args /* {
1980 		syscallarg(char *) path;
1981 		syscallarg(char *) buf;
1982 		syscallarg(int) count;
1983 	} */ *uap;
1984 {
1985 	register struct vnode *vp;
1986 	struct iovec aiov;
1987 	struct uio auio;
1988 	int error;
1989 	struct nameidata nd;
1990 
1991 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1992 	    SCARG(uap, path), p);
1993 	if ((error = namei(&nd)) != 0)
1994 		return (error);
1995 	NDFREE(&nd, NDF_ONLY_PNBUF);
1996 	vp = nd.ni_vp;
1997 	if (vp->v_type != VLNK)
1998 		error = EINVAL;
1999 	else {
2000 		aiov.iov_base = SCARG(uap, buf);
2001 		aiov.iov_len = SCARG(uap, count);
2002 		auio.uio_iov = &aiov;
2003 		auio.uio_iovcnt = 1;
2004 		auio.uio_offset = 0;
2005 		auio.uio_rw = UIO_READ;
2006 		auio.uio_segflg = UIO_USERSPACE;
2007 		auio.uio_procp = p;
2008 		auio.uio_resid = SCARG(uap, count);
2009 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2010 	}
2011 	vput(vp);
2012 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
2013 	return (error);
2014 }
2015 
2016 static int
2017 setfflags(p, vp, flags)
2018 	struct proc *p;
2019 	struct vnode *vp;
2020 	int flags;
2021 {
2022 	int error;
2023 	struct vattr vattr;
2024 
2025 	/*
2026 	 * Prevent non-root users from setting flags on devices.  When
2027 	 * a device is reused, users can retain ownership of the device
2028 	 * if they are allowed to set flags and programs assume that
2029 	 * chown can't fail when done as root.
2030 	 */
2031 	if ((vp->v_type == VCHR || vp->v_type == VBLK) &&
2032 	    ((error = suser_xxx(p->p_ucred, p, PRISON_ROOT)) != 0))
2033 		return (error);
2034 
2035 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2036 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2037 	VATTR_NULL(&vattr);
2038 	vattr.va_flags = flags;
2039 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2040 	VOP_UNLOCK(vp, 0, p);
2041 	return (error);
2042 }
2043 
2044 /*
2045  * Change flags of a file given a path name.
2046  */
2047 #ifndef _SYS_SYSPROTO_H_
2048 struct chflags_args {
2049 	char	*path;
2050 	int	flags;
2051 };
2052 #endif
2053 /* ARGSUSED */
2054 int
2055 chflags(p, uap)
2056 	struct proc *p;
2057 	register struct chflags_args /* {
2058 		syscallarg(char *) path;
2059 		syscallarg(int) flags;
2060 	} */ *uap;
2061 {
2062 	int error;
2063 	struct nameidata nd;
2064 
2065 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2066 	if ((error = namei(&nd)) != 0)
2067 		return (error);
2068 	NDFREE(&nd, NDF_ONLY_PNBUF);
2069 	error = setfflags(p, nd.ni_vp, SCARG(uap, flags));
2070 	vrele(nd.ni_vp);
2071 	return error;
2072 }
2073 
2074 /*
2075  * Change flags of a file given a file descriptor.
2076  */
2077 #ifndef _SYS_SYSPROTO_H_
2078 struct fchflags_args {
2079 	int	fd;
2080 	int	flags;
2081 };
2082 #endif
2083 /* ARGSUSED */
2084 int
2085 fchflags(p, uap)
2086 	struct proc *p;
2087 	register struct fchflags_args /* {
2088 		syscallarg(int) fd;
2089 		syscallarg(int) flags;
2090 	} */ *uap;
2091 {
2092 	struct file *fp;
2093 	int error;
2094 
2095 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2096 		return (error);
2097 	return setfflags(p, (struct vnode *) fp->f_data, SCARG(uap, flags));
2098 }
2099 
2100 static int
2101 setfmode(p, vp, mode)
2102 	struct proc *p;
2103 	struct vnode *vp;
2104 	int mode;
2105 {
2106 	int error;
2107 	struct vattr vattr;
2108 
2109 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2110 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2111 	VATTR_NULL(&vattr);
2112 	vattr.va_mode = mode & ALLPERMS;
2113 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2114 	VOP_UNLOCK(vp, 0, p);
2115 	return error;
2116 }
2117 
2118 /*
2119  * Change mode of a file given path name.
2120  */
2121 #ifndef _SYS_SYSPROTO_H_
2122 struct chmod_args {
2123 	char	*path;
2124 	int	mode;
2125 };
2126 #endif
2127 /* ARGSUSED */
2128 int
2129 chmod(p, uap)
2130 	struct proc *p;
2131 	register struct chmod_args /* {
2132 		syscallarg(char *) path;
2133 		syscallarg(int) mode;
2134 	} */ *uap;
2135 {
2136 	int error;
2137 	struct nameidata nd;
2138 
2139 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2140 	if ((error = namei(&nd)) != 0)
2141 		return (error);
2142 	NDFREE(&nd, NDF_ONLY_PNBUF);
2143 	error = setfmode(p, nd.ni_vp, SCARG(uap, mode));
2144 	vrele(nd.ni_vp);
2145 	return error;
2146 }
2147 
2148 /*
2149  * Change mode of a file given path name (don't follow links.)
2150  */
2151 #ifndef _SYS_SYSPROTO_H_
2152 struct lchmod_args {
2153 	char	*path;
2154 	int	mode;
2155 };
2156 #endif
2157 /* ARGSUSED */
2158 int
2159 lchmod(p, uap)
2160 	struct proc *p;
2161 	register struct lchmod_args /* {
2162 		syscallarg(char *) path;
2163 		syscallarg(int) mode;
2164 	} */ *uap;
2165 {
2166 	int error;
2167 	struct nameidata nd;
2168 
2169 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2170 	if ((error = namei(&nd)) != 0)
2171 		return (error);
2172 	NDFREE(&nd, NDF_ONLY_PNBUF);
2173 	error = setfmode(p, nd.ni_vp, SCARG(uap, mode));
2174 	vrele(nd.ni_vp);
2175 	return error;
2176 }
2177 
2178 /*
2179  * Change mode of a file given a file descriptor.
2180  */
2181 #ifndef _SYS_SYSPROTO_H_
2182 struct fchmod_args {
2183 	int	fd;
2184 	int	mode;
2185 };
2186 #endif
2187 /* ARGSUSED */
2188 int
2189 fchmod(p, uap)
2190 	struct proc *p;
2191 	register struct fchmod_args /* {
2192 		syscallarg(int) fd;
2193 		syscallarg(int) mode;
2194 	} */ *uap;
2195 {
2196 	struct file *fp;
2197 	int error;
2198 
2199 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2200 		return (error);
2201 	return setfmode(p, (struct vnode *)fp->f_data, SCARG(uap, mode));
2202 }
2203 
2204 static int
2205 setfown(p, vp, uid, gid)
2206 	struct proc *p;
2207 	struct vnode *vp;
2208 	uid_t uid;
2209 	gid_t gid;
2210 {
2211 	int error;
2212 	struct vattr vattr;
2213 
2214 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2215 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2216 	VATTR_NULL(&vattr);
2217 	vattr.va_uid = uid;
2218 	vattr.va_gid = gid;
2219 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2220 	VOP_UNLOCK(vp, 0, p);
2221 	return error;
2222 }
2223 
2224 /*
2225  * Set ownership given a path name.
2226  */
2227 #ifndef _SYS_SYSPROTO_H_
2228 struct chown_args {
2229 	char	*path;
2230 	int	uid;
2231 	int	gid;
2232 };
2233 #endif
2234 /* ARGSUSED */
2235 int
2236 chown(p, uap)
2237 	struct proc *p;
2238 	register struct chown_args /* {
2239 		syscallarg(char *) path;
2240 		syscallarg(int) uid;
2241 		syscallarg(int) gid;
2242 	} */ *uap;
2243 {
2244 	int error;
2245 	struct nameidata nd;
2246 
2247 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2248 	if ((error = namei(&nd)) != 0)
2249 		return (error);
2250 	NDFREE(&nd, NDF_ONLY_PNBUF);
2251 	error = setfown(p, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2252 	vrele(nd.ni_vp);
2253 	return (error);
2254 }
2255 
2256 /*
2257  * Set ownership given a path name, do not cross symlinks.
2258  */
2259 #ifndef _SYS_SYSPROTO_H_
2260 struct lchown_args {
2261 	char	*path;
2262 	int	uid;
2263 	int	gid;
2264 };
2265 #endif
2266 /* ARGSUSED */
2267 int
2268 lchown(p, uap)
2269 	struct proc *p;
2270 	register struct lchown_args /* {
2271 		syscallarg(char *) path;
2272 		syscallarg(int) uid;
2273 		syscallarg(int) gid;
2274 	} */ *uap;
2275 {
2276 	int error;
2277 	struct nameidata nd;
2278 
2279 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2280 	if ((error = namei(&nd)) != 0)
2281 		return (error);
2282 	NDFREE(&nd, NDF_ONLY_PNBUF);
2283 	error = setfown(p, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2284 	vrele(nd.ni_vp);
2285 	return (error);
2286 }
2287 
2288 /*
2289  * Set ownership given a file descriptor.
2290  */
2291 #ifndef _SYS_SYSPROTO_H_
2292 struct fchown_args {
2293 	int	fd;
2294 	int	uid;
2295 	int	gid;
2296 };
2297 #endif
2298 /* ARGSUSED */
2299 int
2300 fchown(p, uap)
2301 	struct proc *p;
2302 	register struct fchown_args /* {
2303 		syscallarg(int) fd;
2304 		syscallarg(int) uid;
2305 		syscallarg(int) gid;
2306 	} */ *uap;
2307 {
2308 	struct file *fp;
2309 	int error;
2310 
2311 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2312 		return (error);
2313 	return setfown(p, (struct vnode *)fp->f_data,
2314 		SCARG(uap, uid), SCARG(uap, gid));
2315 }
2316 
2317 static int
2318 getutimes(usrtvp, tsp)
2319 	const struct timeval *usrtvp;
2320 	struct timespec *tsp;
2321 {
2322 	struct timeval tv[2];
2323 	int error;
2324 
2325 	if (usrtvp == NULL) {
2326 		microtime(&tv[0]);
2327 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2328 		tsp[1] = tsp[0];
2329 	} else {
2330 		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
2331 			return (error);
2332 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2333 		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
2334 	}
2335 	return 0;
2336 }
2337 
2338 static int
2339 setutimes(p, vp, ts, nullflag)
2340 	struct proc *p;
2341 	struct vnode *vp;
2342 	const struct timespec *ts;
2343 	int nullflag;
2344 {
2345 	int error;
2346 	struct vattr vattr;
2347 
2348 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2349 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2350 	VATTR_NULL(&vattr);
2351 	vattr.va_atime = ts[0];
2352 	vattr.va_mtime = ts[1];
2353 	if (nullflag)
2354 		vattr.va_vaflags |= VA_UTIMES_NULL;
2355 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2356 	VOP_UNLOCK(vp, 0, p);
2357 	return error;
2358 }
2359 
2360 /*
2361  * Set the access and modification times of a file.
2362  */
2363 #ifndef _SYS_SYSPROTO_H_
2364 struct utimes_args {
2365 	char	*path;
2366 	struct	timeval *tptr;
2367 };
2368 #endif
2369 /* ARGSUSED */
2370 int
2371 utimes(p, uap)
2372 	struct proc *p;
2373 	register struct utimes_args /* {
2374 		syscallarg(char *) path;
2375 		syscallarg(struct timeval *) tptr;
2376 	} */ *uap;
2377 {
2378 	struct timespec ts[2];
2379 	struct timeval *usrtvp;
2380 	int error;
2381 	struct nameidata nd;
2382 
2383 	usrtvp = SCARG(uap, tptr);
2384 	if ((error = getutimes(usrtvp, ts)) != 0)
2385 		return (error);
2386 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2387 	if ((error = namei(&nd)) != 0)
2388 		return (error);
2389 	NDFREE(&nd, NDF_ONLY_PNBUF);
2390 	error = setutimes(p, nd.ni_vp, ts, usrtvp == NULL);
2391 	vrele(nd.ni_vp);
2392 	return (error);
2393 }
2394 
2395 /*
2396  * Set the access and modification times of a file.
2397  */
2398 #ifndef _SYS_SYSPROTO_H_
2399 struct lutimes_args {
2400 	char	*path;
2401 	struct	timeval *tptr;
2402 };
2403 #endif
2404 /* ARGSUSED */
2405 int
2406 lutimes(p, uap)
2407 	struct proc *p;
2408 	register struct lutimes_args /* {
2409 		syscallarg(char *) path;
2410 		syscallarg(struct timeval *) tptr;
2411 	} */ *uap;
2412 {
2413 	struct timespec ts[2];
2414 	struct timeval *usrtvp;
2415 	int error;
2416 	struct nameidata nd;
2417 
2418 	usrtvp = SCARG(uap, tptr);
2419 	if ((error = getutimes(usrtvp, ts)) != 0)
2420 		return (error);
2421 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2422 	if ((error = namei(&nd)) != 0)
2423 		return (error);
2424 	NDFREE(&nd, NDF_ONLY_PNBUF);
2425 	error = setutimes(p, nd.ni_vp, ts, usrtvp == NULL);
2426 	vrele(nd.ni_vp);
2427 	return (error);
2428 }
2429 
2430 /*
2431  * Set the access and modification times of a file.
2432  */
2433 #ifndef _SYS_SYSPROTO_H_
2434 struct futimes_args {
2435 	int	fd;
2436 	struct	timeval *tptr;
2437 };
2438 #endif
2439 /* ARGSUSED */
2440 int
2441 futimes(p, uap)
2442 	struct proc *p;
2443 	register struct futimes_args /* {
2444 		syscallarg(int ) fd;
2445 		syscallarg(struct timeval *) tptr;
2446 	} */ *uap;
2447 {
2448 	struct timespec ts[2];
2449 	struct file *fp;
2450 	struct timeval *usrtvp;
2451 	int error;
2452 
2453 	usrtvp = SCARG(uap, tptr);
2454 	if ((error = getutimes(usrtvp, ts)) != 0)
2455 		return (error);
2456 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2457 		return (error);
2458 	return setutimes(p, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
2459 }
2460 
2461 /*
2462  * Truncate a file given its path name.
2463  */
2464 #ifndef _SYS_SYSPROTO_H_
2465 struct truncate_args {
2466 	char	*path;
2467 	int	pad;
2468 	off_t	length;
2469 };
2470 #endif
2471 /* ARGSUSED */
2472 int
2473 truncate(p, uap)
2474 	struct proc *p;
2475 	register struct truncate_args /* {
2476 		syscallarg(char *) path;
2477 		syscallarg(int) pad;
2478 		syscallarg(off_t) length;
2479 	} */ *uap;
2480 {
2481 	register struct vnode *vp;
2482 	struct vattr vattr;
2483 	int error;
2484 	struct nameidata nd;
2485 
2486 	if (uap->length < 0)
2487 		return(EINVAL);
2488 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2489 	if ((error = namei(&nd)) != 0)
2490 		return (error);
2491 	vp = nd.ni_vp;
2492 	NDFREE(&nd, NDF_ONLY_PNBUF);
2493 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2494 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2495 	if (vp->v_type == VDIR)
2496 		error = EISDIR;
2497 	else if ((error = vn_writechk(vp)) == 0 &&
2498 	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
2499 		VATTR_NULL(&vattr);
2500 		vattr.va_size = SCARG(uap, length);
2501 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2502 	}
2503 	vput(vp);
2504 	return (error);
2505 }
2506 
2507 /*
2508  * Truncate a file given a file descriptor.
2509  */
2510 #ifndef _SYS_SYSPROTO_H_
2511 struct ftruncate_args {
2512 	int	fd;
2513 	int	pad;
2514 	off_t	length;
2515 };
2516 #endif
2517 /* ARGSUSED */
2518 int
2519 ftruncate(p, uap)
2520 	struct proc *p;
2521 	register struct ftruncate_args /* {
2522 		syscallarg(int) fd;
2523 		syscallarg(int) pad;
2524 		syscallarg(off_t) length;
2525 	} */ *uap;
2526 {
2527 	struct vattr vattr;
2528 	struct vnode *vp;
2529 	struct file *fp;
2530 	int error;
2531 
2532 	if (uap->length < 0)
2533 		return(EINVAL);
2534 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2535 		return (error);
2536 	if ((fp->f_flag & FWRITE) == 0)
2537 		return (EINVAL);
2538 	vp = (struct vnode *)fp->f_data;
2539 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2540 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2541 	if (vp->v_type == VDIR)
2542 		error = EISDIR;
2543 	else if ((error = vn_writechk(vp)) == 0) {
2544 		VATTR_NULL(&vattr);
2545 		vattr.va_size = SCARG(uap, length);
2546 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2547 	}
2548 	VOP_UNLOCK(vp, 0, p);
2549 	return (error);
2550 }
2551 
2552 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2553 /*
2554  * Truncate a file given its path name.
2555  */
2556 #ifndef _SYS_SYSPROTO_H_
2557 struct otruncate_args {
2558 	char	*path;
2559 	long	length;
2560 };
2561 #endif
2562 /* ARGSUSED */
2563 int
2564 otruncate(p, uap)
2565 	struct proc *p;
2566 	register struct otruncate_args /* {
2567 		syscallarg(char *) path;
2568 		syscallarg(long) length;
2569 	} */ *uap;
2570 {
2571 	struct truncate_args /* {
2572 		syscallarg(char *) path;
2573 		syscallarg(int) pad;
2574 		syscallarg(off_t) length;
2575 	} */ nuap;
2576 
2577 	SCARG(&nuap, path) = SCARG(uap, path);
2578 	SCARG(&nuap, length) = SCARG(uap, length);
2579 	return (truncate(p, &nuap));
2580 }
2581 
2582 /*
2583  * Truncate a file given a file descriptor.
2584  */
2585 #ifndef _SYS_SYSPROTO_H_
2586 struct oftruncate_args {
2587 	int	fd;
2588 	long	length;
2589 };
2590 #endif
2591 /* ARGSUSED */
2592 int
2593 oftruncate(p, uap)
2594 	struct proc *p;
2595 	register struct oftruncate_args /* {
2596 		syscallarg(int) fd;
2597 		syscallarg(long) length;
2598 	} */ *uap;
2599 {
2600 	struct ftruncate_args /* {
2601 		syscallarg(int) fd;
2602 		syscallarg(int) pad;
2603 		syscallarg(off_t) length;
2604 	} */ nuap;
2605 
2606 	SCARG(&nuap, fd) = SCARG(uap, fd);
2607 	SCARG(&nuap, length) = SCARG(uap, length);
2608 	return (ftruncate(p, &nuap));
2609 }
2610 #endif /* COMPAT_43 || COMPAT_SUNOS */
2611 
2612 /*
2613  * Sync an open file.
2614  */
2615 #ifndef _SYS_SYSPROTO_H_
2616 struct fsync_args {
2617 	int	fd;
2618 };
2619 #endif
2620 /* ARGSUSED */
2621 int
2622 fsync(p, uap)
2623 	struct proc *p;
2624 	struct fsync_args /* {
2625 		syscallarg(int) fd;
2626 	} */ *uap;
2627 {
2628 	register struct vnode *vp;
2629 	struct file *fp;
2630 	vm_object_t obj;
2631 	int error;
2632 
2633 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2634 		return (error);
2635 	vp = (struct vnode *)fp->f_data;
2636 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2637 	if (VOP_GETVOBJECT(vp, &obj) == 0)
2638 		vm_object_page_clean(obj, 0, 0, 0);
2639 	if ((error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p)) == 0 &&
2640 	    vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP) &&
2641 	    bioops.io_fsync)
2642 		error = (*bioops.io_fsync)(vp);
2643 	VOP_UNLOCK(vp, 0, p);
2644 	return (error);
2645 }
2646 
2647 /*
2648  * Rename files.  Source and destination must either both be directories,
2649  * or both not be directories.  If target is a directory, it must be empty.
2650  */
2651 #ifndef _SYS_SYSPROTO_H_
2652 struct rename_args {
2653 	char	*from;
2654 	char	*to;
2655 };
2656 #endif
2657 /* ARGSUSED */
2658 int
2659 rename(p, uap)
2660 	struct proc *p;
2661 	register struct rename_args /* {
2662 		syscallarg(char *) from;
2663 		syscallarg(char *) to;
2664 	} */ *uap;
2665 {
2666 	register struct vnode *tvp, *fvp, *tdvp;
2667 	struct nameidata fromnd, tond;
2668 	int error;
2669 
2670 	bwillwrite();
2671 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2672 	    SCARG(uap, from), p);
2673 	if ((error = namei(&fromnd)) != 0)
2674 		return (error);
2675 	fvp = fromnd.ni_vp;
2676 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
2677 	    UIO_USERSPACE, SCARG(uap, to), p);
2678 	if (fromnd.ni_vp->v_type == VDIR)
2679 		tond.ni_cnd.cn_flags |= WILLBEDIR;
2680 	if ((error = namei(&tond)) != 0) {
2681 		/* Translate error code for rename("dir1", "dir2/."). */
2682 		if (error == EISDIR && fvp->v_type == VDIR)
2683 			error = EINVAL;
2684 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2685 		vrele(fromnd.ni_dvp);
2686 		vrele(fvp);
2687 		goto out1;
2688 	}
2689 	tdvp = tond.ni_dvp;
2690 	tvp = tond.ni_vp;
2691 	if (tvp != NULL) {
2692 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2693 			error = ENOTDIR;
2694 			goto out;
2695 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2696 			error = EISDIR;
2697 			goto out;
2698 		}
2699 	}
2700 	if (fvp == tdvp)
2701 		error = EINVAL;
2702 	/*
2703 	 * If the source is the same as the destination (that is, if they
2704 	 * are links to the same vnode), then there is nothing to do.
2705 	 */
2706 	if (fvp == tvp)
2707 		error = -1;
2708 out:
2709 	if (!error) {
2710 		VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE);
2711 		if (fromnd.ni_dvp != tdvp) {
2712 			VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2713 		}
2714 		if (tvp) {
2715 			VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
2716 		}
2717 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2718 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2719 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2720 		NDFREE(&tond, NDF_ONLY_PNBUF);
2721 	} else {
2722 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2723 		NDFREE(&tond, NDF_ONLY_PNBUF);
2724 		if (tdvp == tvp)
2725 			vrele(tdvp);
2726 		else
2727 			vput(tdvp);
2728 		if (tvp)
2729 			vput(tvp);
2730 		vrele(fromnd.ni_dvp);
2731 		vrele(fvp);
2732 	}
2733 	vrele(tond.ni_startdir);
2734 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2735 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2736 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2737 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2738 out1:
2739 	if (fromnd.ni_startdir)
2740 		vrele(fromnd.ni_startdir);
2741 	if (error == -1)
2742 		return (0);
2743 	return (error);
2744 }
2745 
2746 /*
2747  * Make a directory file.
2748  */
2749 #ifndef _SYS_SYSPROTO_H_
2750 struct mkdir_args {
2751 	char	*path;
2752 	int	mode;
2753 };
2754 #endif
2755 /* ARGSUSED */
2756 int
2757 mkdir(p, uap)
2758 	struct proc *p;
2759 	register struct mkdir_args /* {
2760 		syscallarg(char *) path;
2761 		syscallarg(int) mode;
2762 	} */ *uap;
2763 {
2764 	register struct vnode *vp;
2765 	struct vattr vattr;
2766 	int error;
2767 	struct nameidata nd;
2768 
2769 	bwillwrite();
2770 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
2771 	nd.ni_cnd.cn_flags |= WILLBEDIR;
2772 	if ((error = namei(&nd)) != 0)
2773 		return (error);
2774 	vp = nd.ni_vp;
2775 	if (vp != NULL) {
2776 		NDFREE(&nd, NDF_ONLY_PNBUF);
2777 		if (nd.ni_dvp == vp)
2778 			vrele(nd.ni_dvp);
2779 		else
2780 			vput(nd.ni_dvp);
2781 		vrele(vp);
2782 		return (EEXIST);
2783 	}
2784 	VATTR_NULL(&vattr);
2785 	vattr.va_type = VDIR;
2786 	vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
2787 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2788 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2789 	NDFREE(&nd, NDF_ONLY_PNBUF);
2790 	vput(nd.ni_dvp);
2791 	if (!error)
2792 		vput(nd.ni_vp);
2793 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2794 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2795 	return (error);
2796 }
2797 
2798 /*
2799  * Remove a directory file.
2800  */
2801 #ifndef _SYS_SYSPROTO_H_
2802 struct rmdir_args {
2803 	char	*path;
2804 };
2805 #endif
2806 /* ARGSUSED */
2807 int
2808 rmdir(p, uap)
2809 	struct proc *p;
2810 	struct rmdir_args /* {
2811 		syscallarg(char *) path;
2812 	} */ *uap;
2813 {
2814 	register struct vnode *vp;
2815 	int error;
2816 	struct nameidata nd;
2817 
2818 	bwillwrite();
2819 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2820 	    SCARG(uap, path), p);
2821 	if ((error = namei(&nd)) != 0)
2822 		return (error);
2823 	vp = nd.ni_vp;
2824 	if (vp->v_type != VDIR) {
2825 		error = ENOTDIR;
2826 		goto out;
2827 	}
2828 	/*
2829 	 * No rmdir "." please.
2830 	 */
2831 	if (nd.ni_dvp == vp) {
2832 		error = EINVAL;
2833 		goto out;
2834 	}
2835 	/*
2836 	 * The root of a mounted filesystem cannot be deleted.
2837 	 */
2838 	if (vp->v_flag & VROOT)
2839 		error = EBUSY;
2840 	else {
2841 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2842 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2843 		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2844 	}
2845 out:
2846 	NDFREE(&nd, NDF_ONLY_PNBUF);
2847 	if (nd.ni_dvp == vp)
2848 		vrele(nd.ni_dvp);
2849 	else
2850 		vput(nd.ni_dvp);
2851 	if (vp != NULLVP)
2852 		vput(vp);
2853 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
2854 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
2855 	return (error);
2856 }
2857 
2858 #ifdef COMPAT_43
2859 /*
2860  * Read a block of directory entries in a file system independent format.
2861  */
2862 #ifndef _SYS_SYSPROTO_H_
2863 struct ogetdirentries_args {
2864 	int	fd;
2865 	char	*buf;
2866 	u_int	count;
2867 	long	*basep;
2868 };
2869 #endif
2870 int
2871 ogetdirentries(p, uap)
2872 	struct proc *p;
2873 	register struct ogetdirentries_args /* {
2874 		syscallarg(int) fd;
2875 		syscallarg(char *) buf;
2876 		syscallarg(u_int) count;
2877 		syscallarg(long *) basep;
2878 	} */ *uap;
2879 {
2880 	struct vnode *vp;
2881 	struct file *fp;
2882 	struct uio auio, kuio;
2883 	struct iovec aiov, kiov;
2884 	struct dirent *dp, *edp;
2885 	caddr_t dirbuf;
2886 	int error, eofflag, readcnt;
2887 	long loff;
2888 
2889 	/* XXX arbitrary sanity limit on `count'. */
2890 	if (SCARG(uap, count) > 64 * 1024)
2891 		return (EINVAL);
2892 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2893 		return (error);
2894 	if ((fp->f_flag & FREAD) == 0)
2895 		return (EBADF);
2896 	vp = (struct vnode *)fp->f_data;
2897 unionread:
2898 	if (vp->v_type != VDIR)
2899 		return (EINVAL);
2900 	aiov.iov_base = SCARG(uap, buf);
2901 	aiov.iov_len = SCARG(uap, count);
2902 	auio.uio_iov = &aiov;
2903 	auio.uio_iovcnt = 1;
2904 	auio.uio_rw = UIO_READ;
2905 	auio.uio_segflg = UIO_USERSPACE;
2906 	auio.uio_procp = p;
2907 	auio.uio_resid = SCARG(uap, count);
2908 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2909 	loff = auio.uio_offset = fp->f_offset;
2910 #	if (BYTE_ORDER != LITTLE_ENDIAN)
2911 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
2912 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
2913 			    NULL, NULL);
2914 			fp->f_offset = auio.uio_offset;
2915 		} else
2916 #	endif
2917 	{
2918 		kuio = auio;
2919 		kuio.uio_iov = &kiov;
2920 		kuio.uio_segflg = UIO_SYSSPACE;
2921 		kiov.iov_len = SCARG(uap, count);
2922 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
2923 		kiov.iov_base = dirbuf;
2924 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
2925 			    NULL, NULL);
2926 		fp->f_offset = kuio.uio_offset;
2927 		if (error == 0) {
2928 			readcnt = SCARG(uap, count) - kuio.uio_resid;
2929 			edp = (struct dirent *)&dirbuf[readcnt];
2930 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
2931 #				if (BYTE_ORDER == LITTLE_ENDIAN)
2932 					/*
2933 					 * The expected low byte of
2934 					 * dp->d_namlen is our dp->d_type.
2935 					 * The high MBZ byte of dp->d_namlen
2936 					 * is our dp->d_namlen.
2937 					 */
2938 					dp->d_type = dp->d_namlen;
2939 					dp->d_namlen = 0;
2940 #				else
2941 					/*
2942 					 * The dp->d_type is the high byte
2943 					 * of the expected dp->d_namlen,
2944 					 * so must be zero'ed.
2945 					 */
2946 					dp->d_type = 0;
2947 #				endif
2948 				if (dp->d_reclen > 0) {
2949 					dp = (struct dirent *)
2950 					    ((char *)dp + dp->d_reclen);
2951 				} else {
2952 					error = EIO;
2953 					break;
2954 				}
2955 			}
2956 			if (dp >= edp)
2957 				error = uiomove(dirbuf, readcnt, &auio);
2958 		}
2959 		FREE(dirbuf, M_TEMP);
2960 	}
2961 	VOP_UNLOCK(vp, 0, p);
2962 	if (error)
2963 		return (error);
2964 	if (SCARG(uap, count) == auio.uio_resid) {
2965 		if (union_dircheckp) {
2966 			error = union_dircheckp(p, &vp, fp);
2967 			if (error == -1)
2968 				goto unionread;
2969 			if (error)
2970 				return (error);
2971 		}
2972 		if ((vp->v_flag & VROOT) &&
2973 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
2974 			struct vnode *tvp = vp;
2975 			vp = vp->v_mount->mnt_vnodecovered;
2976 			VREF(vp);
2977 			fp->f_data = (caddr_t) vp;
2978 			fp->f_offset = 0;
2979 			vrele(tvp);
2980 			goto unionread;
2981 		}
2982 	}
2983 	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
2984 	    sizeof(long));
2985 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
2986 	return (error);
2987 }
2988 #endif /* COMPAT_43 */
2989 
2990 /*
2991  * Read a block of directory entries in a file system independent format.
2992  */
2993 #ifndef _SYS_SYSPROTO_H_
2994 struct getdirentries_args {
2995 	int	fd;
2996 	char	*buf;
2997 	u_int	count;
2998 	long	*basep;
2999 };
3000 #endif
3001 int
3002 getdirentries(p, uap)
3003 	struct proc *p;
3004 	register struct getdirentries_args /* {
3005 		syscallarg(int) fd;
3006 		syscallarg(char *) buf;
3007 		syscallarg(u_int) count;
3008 		syscallarg(long *) basep;
3009 	} */ *uap;
3010 {
3011 	struct vnode *vp;
3012 	struct file *fp;
3013 	struct uio auio;
3014 	struct iovec aiov;
3015 	long loff;
3016 	int error, eofflag;
3017 
3018 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3019 		return (error);
3020 	if ((fp->f_flag & FREAD) == 0)
3021 		return (EBADF);
3022 	vp = (struct vnode *)fp->f_data;
3023 unionread:
3024 	if (vp->v_type != VDIR)
3025 		return (EINVAL);
3026 	aiov.iov_base = SCARG(uap, buf);
3027 	aiov.iov_len = SCARG(uap, count);
3028 	auio.uio_iov = &aiov;
3029 	auio.uio_iovcnt = 1;
3030 	auio.uio_rw = UIO_READ;
3031 	auio.uio_segflg = UIO_USERSPACE;
3032 	auio.uio_procp = p;
3033 	auio.uio_resid = SCARG(uap, count);
3034 	/* vn_lock(vp, LK_SHARED | LK_RETRY, p); */
3035 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3036 	loff = auio.uio_offset = fp->f_offset;
3037 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3038 	fp->f_offset = auio.uio_offset;
3039 	VOP_UNLOCK(vp, 0, p);
3040 	if (error)
3041 		return (error);
3042 	if (SCARG(uap, count) == auio.uio_resid) {
3043 		if (union_dircheckp) {
3044 			error = union_dircheckp(p, &vp, fp);
3045 			if (error == -1)
3046 				goto unionread;
3047 			if (error)
3048 				return (error);
3049 		}
3050 		if ((vp->v_flag & VROOT) &&
3051 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3052 			struct vnode *tvp = vp;
3053 			vp = vp->v_mount->mnt_vnodecovered;
3054 			VREF(vp);
3055 			fp->f_data = (caddr_t) vp;
3056 			fp->f_offset = 0;
3057 			vrele(tvp);
3058 			goto unionread;
3059 		}
3060 	}
3061 	if (SCARG(uap, basep) != NULL) {
3062 		error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3063 		    sizeof(long));
3064 	}
3065 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
3066 	return (error);
3067 }
3068 #ifndef _SYS_SYSPROTO_H_
3069 struct getdents_args {
3070 	int fd;
3071 	char *buf;
3072 	size_t count;
3073 };
3074 #endif
3075 int
3076 getdents(p, uap)
3077 	struct proc *p;
3078 	register struct getdents_args /* {
3079 		syscallarg(int) fd;
3080 		syscallarg(char *) buf;
3081 		syscallarg(u_int) count;
3082 	} */ *uap;
3083 {
3084 	struct getdirentries_args ap;
3085 	ap.fd = uap->fd;
3086 	ap.buf = uap->buf;
3087 	ap.count = uap->count;
3088 	ap.basep = NULL;
3089 	return getdirentries(p, &ap);
3090 }
3091 
3092 /*
3093  * Set the mode mask for creation of filesystem nodes.
3094  *
3095  * MP SAFE
3096  */
3097 #ifndef _SYS_SYSPROTO_H_
3098 struct umask_args {
3099 	int	newmask;
3100 };
3101 #endif
3102 int
3103 umask(p, uap)
3104 	struct proc *p;
3105 	struct umask_args /* {
3106 		syscallarg(int) newmask;
3107 	} */ *uap;
3108 {
3109 	register struct filedesc *fdp;
3110 
3111 	fdp = p->p_fd;
3112 	p->p_retval[0] = fdp->fd_cmask;
3113 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
3114 	return (0);
3115 }
3116 
3117 /*
3118  * Void all references to file by ripping underlying filesystem
3119  * away from vnode.
3120  */
3121 #ifndef _SYS_SYSPROTO_H_
3122 struct revoke_args {
3123 	char	*path;
3124 };
3125 #endif
3126 /* ARGSUSED */
3127 int
3128 revoke(p, uap)
3129 	struct proc *p;
3130 	register struct revoke_args /* {
3131 		syscallarg(char *) path;
3132 	} */ *uap;
3133 {
3134 	register struct vnode *vp;
3135 	struct vattr vattr;
3136 	int error;
3137 	struct nameidata nd;
3138 
3139 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3140 	if ((error = namei(&nd)) != 0)
3141 		return (error);
3142 	vp = nd.ni_vp;
3143 	NDFREE(&nd, NDF_ONLY_PNBUF);
3144 	if (vp->v_type != VCHR && vp->v_type != VBLK) {
3145 		error = EINVAL;
3146 		goto out;
3147 	}
3148 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3149 		goto out;
3150 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3151 	    (error = suser_xxx(0, p, PRISON_ROOT)))
3152 		goto out;
3153 	if (vcount(vp) > 1)
3154 		VOP_REVOKE(vp, REVOKEALL);
3155 out:
3156 	vrele(vp);
3157 	return (error);
3158 }
3159 
3160 /*
3161  * Convert a user file descriptor to a kernel file entry.
3162  */
3163 int
3164 getvnode(fdp, fd, fpp)
3165 	struct filedesc *fdp;
3166 	int fd;
3167 	struct file **fpp;
3168 {
3169 	struct file *fp;
3170 
3171 	if ((u_int)fd >= fdp->fd_nfiles ||
3172 	    (fp = fdp->fd_ofiles[fd]) == NULL)
3173 		return (EBADF);
3174 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO)
3175 		return (EINVAL);
3176 	*fpp = fp;
3177 	return (0);
3178 }
3179 /*
3180  * Get (NFS) file handle
3181  */
3182 #ifndef _SYS_SYSPROTO_H_
3183 struct getfh_args {
3184 	char	*fname;
3185 	fhandle_t *fhp;
3186 };
3187 #endif
3188 int
3189 getfh(p, uap)
3190 	struct proc *p;
3191 	register struct getfh_args *uap;
3192 {
3193 	struct nameidata nd;
3194 	fhandle_t fh;
3195 	register struct vnode *vp;
3196 	int error;
3197 
3198 	/*
3199 	 * Must be super user
3200 	 */
3201 	error = suser(p);
3202 	if (error)
3203 		return (error);
3204 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
3205 	error = namei(&nd);
3206 	if (error)
3207 		return (error);
3208 	NDFREE(&nd, NDF_ONLY_PNBUF);
3209 	vp = nd.ni_vp;
3210 	bzero(&fh, sizeof(fh));
3211 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3212 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3213 	vput(vp);
3214 	if (error)
3215 		return (error);
3216 	error = copyout(&fh, uap->fhp, sizeof (fh));
3217 	return (error);
3218 }
3219 
3220 /*
3221  * syscall for the rpc.lockd to use to translate a NFS file handle into
3222  * an open descriptor.
3223  *
3224  * warning: do not remove the suser() call or this becomes one giant
3225  * security hole.
3226  */
3227 #ifndef _SYS_SYSPROTO_H_
3228 struct fhopen_args {
3229 	const struct fhandle *u_fhp;
3230 	int flags;
3231 };
3232 #endif
3233 int
3234 fhopen(p, uap)
3235 	struct proc *p;
3236 	struct fhopen_args /* {
3237 		syscallarg(const struct fhandle *) u_fhp;
3238 		syscallarg(int) flags;
3239 	} */ *uap;
3240 {
3241 	struct mount *mp;
3242 	struct vnode *vp;
3243 	struct fhandle fhp;
3244 	struct vattr vat;
3245 	struct vattr *vap = &vat;
3246 	struct flock lf;
3247 	struct file *fp;
3248 	register struct filedesc *fdp = p->p_fd;
3249 	int fmode, mode, error, type;
3250 	struct file *nfp;
3251 	int indx;
3252 
3253 	/*
3254 	 * Must be super user
3255 	 */
3256 	error = suser(p);
3257 	if (error)
3258 		return (error);
3259 
3260 	fmode = FFLAGS(SCARG(uap, flags));
3261 	/* why not allow a non-read/write open for our lockd? */
3262 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3263 		return (EINVAL);
3264 	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
3265 	if (error)
3266 		return(error);
3267 	/* find the mount point */
3268 	mp = vfs_getvfs(&fhp.fh_fsid);
3269 	if (mp == NULL)
3270 		return (ESTALE);
3271 	/* now give me my vnode, it gets returned to me locked */
3272 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3273 	if (error)
3274 		return (error);
3275  	/*
3276 	 * from now on we have to make sure not
3277 	 * to forget about the vnode
3278 	 * any error that causes an abort must vput(vp)
3279 	 * just set error = err and 'goto bad;'.
3280 	 */
3281 
3282 	/*
3283 	 * from vn_open
3284 	 */
3285 	if (vp->v_type == VLNK) {
3286 		error = EMLINK;
3287 		goto bad;
3288 	}
3289 	if (vp->v_type == VSOCK) {
3290 		error = EOPNOTSUPP;
3291 		goto bad;
3292 	}
3293 	mode = 0;
3294 	if (fmode & (FWRITE | O_TRUNC)) {
3295 		if (vp->v_type == VDIR) {
3296 			error = EISDIR;
3297 			goto bad;
3298 		}
3299 		error = vn_writechk(vp);
3300 		if (error)
3301 			goto bad;
3302 		mode |= VWRITE;
3303 	}
3304 	if (fmode & FREAD)
3305 		mode |= VREAD;
3306 	if (mode) {
3307 		error = VOP_ACCESS(vp, mode, p->p_ucred, p);
3308 		if (error)
3309 			goto bad;
3310 	}
3311 	if (fmode & O_TRUNC) {
3312 		VOP_UNLOCK(vp, 0, p);				/* XXX */
3313 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3314 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
3315 		VATTR_NULL(vap);
3316 		vap->va_size = 0;
3317 		error = VOP_SETATTR(vp, vap, p->p_ucred, p);
3318 		if (error)
3319 			goto bad;
3320 	}
3321 	error = VOP_OPEN(vp, fmode, p->p_ucred, p);
3322 	if (error)
3323 		goto bad;
3324 	/*
3325 	 * Make sure that a VM object is created for VMIO support.
3326 	 */
3327 	if (vn_canvmio(vp) == TRUE) {
3328 		if ((error = vfs_object_create(vp, p, p->p_ucred)) != 0)
3329 			goto bad;
3330 	}
3331 	if (fmode & FWRITE)
3332 		vp->v_writecount++;
3333 
3334 	/*
3335 	 * end of vn_open code
3336 	 */
3337 
3338 	if ((error = falloc(p, &nfp, &indx)) != 0) {
3339 		if (fmode & FWRITE)
3340 			vp->v_writecount--;
3341 		goto bad;
3342 	}
3343 	fp = nfp;
3344 
3345 	/*
3346 	 * hold an extra reference to avoid having fp ripped out
3347 	 * from under us while we block in the lock op.
3348 	 */
3349 	fhold(fp);
3350 	nfp->f_data = (caddr_t)vp;
3351 	nfp->f_flag = fmode & FMASK;
3352 	nfp->f_ops = &vnops;
3353 	nfp->f_type = DTYPE_VNODE;
3354 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3355 		lf.l_whence = SEEK_SET;
3356 		lf.l_start = 0;
3357 		lf.l_len = 0;
3358 		if (fmode & O_EXLOCK)
3359 			lf.l_type = F_WRLCK;
3360 		else
3361 			lf.l_type = F_RDLCK;
3362 		type = F_FLOCK;
3363 		if ((fmode & FNONBLOCK) == 0)
3364 			type |= F_WAIT;
3365 		VOP_UNLOCK(vp, 0, p);
3366 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
3367 			/*
3368 			 * lock request failed.  Normally close the descriptor
3369 			 * but handle the case where someone might have dup()d
3370 			 * or close()d it when we weren't looking.
3371 			 */
3372 			if (fdp->fd_ofiles[indx] == fp) {
3373 				fdp->fd_ofiles[indx] = NULL;
3374 				fdrop(fp, p);
3375 			}
3376 
3377 			/*
3378 			 * release our private reference.
3379 			 */
3380 			fdrop(fp, p);
3381 			return (error);
3382 		}
3383 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3384 		fp->f_flag |= FHASLOCK;
3385 	}
3386 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3387 		vfs_object_create(vp, p, p->p_ucred);
3388 
3389 	VOP_UNLOCK(vp, 0, p);
3390 	fdrop(fp, p);
3391 	p->p_retval[0] = indx;
3392 	return (0);
3393 
3394 bad:
3395 	vput(vp);
3396 	return (error);
3397 }
3398 
3399 #ifndef _SYS_SYSPROTO_H_
3400 struct fhstat_args {
3401 	struct fhandle *u_fhp;
3402 	struct stat *sb;
3403 };
3404 #endif
3405 int
3406 fhstat(p, uap)
3407 	struct proc *p;
3408 	register struct fhstat_args /* {
3409 		syscallarg(struct fhandle *) u_fhp;
3410 		syscallarg(struct stat *) sb;
3411 	} */ *uap;
3412 {
3413 	struct stat sb;
3414 	fhandle_t fh;
3415 	struct mount *mp;
3416 	struct vnode *vp;
3417 	int error;
3418 
3419 	/*
3420 	 * Must be super user
3421 	 */
3422 	error = suser(p);
3423 	if (error)
3424 		return (error);
3425 
3426 	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
3427 	if (error)
3428 		return (error);
3429 
3430 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3431 		return (ESTALE);
3432 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3433 		return (error);
3434 	error = vn_stat(vp, &sb, p);
3435 	vput(vp);
3436 	if (error)
3437 		return (error);
3438 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3439 	return (error);
3440 }
3441 
3442 #ifndef _SYS_SYSPROTO_H_
3443 struct fhstatfs_args {
3444 	struct fhandle *u_fhp;
3445 	struct statfs *buf;
3446 };
3447 #endif
3448 int
3449 fhstatfs(p, uap)
3450 	struct proc *p;
3451 	struct fhstatfs_args /* {
3452 		syscallarg(struct fhandle) *u_fhp;
3453 		syscallarg(struct statfs) *buf;
3454 	} */ *uap;
3455 {
3456 	struct statfs *sp;
3457 	struct mount *mp;
3458 	struct vnode *vp;
3459 	struct statfs sb;
3460 	fhandle_t fh;
3461 	int error;
3462 
3463 	/*
3464 	 * Must be super user
3465 	 */
3466 	if ((error = suser(p)))
3467 		return (error);
3468 
3469 	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3470 		return (error);
3471 
3472 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3473 		return (ESTALE);
3474 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3475 		return (error);
3476 	mp = vp->v_mount;
3477 	sp = &mp->mnt_stat;
3478 	vput(vp);
3479 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
3480 		return (error);
3481 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3482 	if (suser_xxx(p->p_ucred, 0, 0)) {
3483 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
3484 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3485 		sp = &sb;
3486 	}
3487 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3488 }
3489 
3490 /*
3491  * Syscall to push extended attribute configuration information into the
3492  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3493  * a command (int cmd), and attribute name and misc data.  For now, the
3494  * attribute name is left in userspace for consumption by the VFS_op.
3495  * It will probably be changed to be copied into sysspace by the
3496  * syscall in the future, once issues with various consumers of the
3497  * attribute code have raised their hands.
3498  *
3499  * Currently this is used only by UFS Extended Attributes.
3500  */
3501 int
3502 extattrctl(p, uap)
3503 	struct proc *p;
3504 	struct extattrctl_args *uap;
3505 {
3506 	struct nameidata nd;
3507 	struct mount *mp;
3508 	int error;
3509 
3510 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3511 	if ((error = namei(&nd)) != 0)
3512 		return (error);
3513 	mp = nd.ni_vp->v_mount;
3514 	NDFREE(&nd, 0);
3515 	return (VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
3516 	    SCARG(uap, arg), p));
3517 }
3518 
3519 /*
3520  * Syscall to set a named extended attribute on a file or directory.
3521  * Accepts attribute name, and a uio structure pointing to the data to set.
3522  * The uio is consumed in the style of writev().  The real work happens
3523  * in VOP_SETEXTATTR().
3524  */
3525 int
3526 extattr_set_file(p, uap)
3527 	struct proc *p;
3528 	struct extattr_set_file_args *uap;
3529 {
3530 	struct nameidata nd;
3531 	struct uio auio;
3532 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
3533 	char attrname[EXTATTR_MAXNAMELEN];
3534 	u_int iovlen, cnt;
3535 	int error, i;
3536 
3537 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
3538 	if (error)
3539 		return (error);
3540 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3541 	    SCARG(uap, path), p);
3542 	if ((error = namei(&nd)) != 0)
3543 		return(error);
3544 	iovlen = uap->iovcnt * sizeof(struct iovec);
3545 	if (uap->iovcnt > UIO_SMALLIOV) {
3546 		if (uap->iovcnt > UIO_MAXIOV) {
3547 			error = EINVAL;
3548 			goto done;
3549 		}
3550 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3551 		needfree = iov;
3552 	} else
3553 		iov = aiov;
3554 	auio.uio_iov = iov;
3555 	auio.uio_iovcnt = uap->iovcnt;
3556 	auio.uio_rw = UIO_WRITE;
3557 	auio.uio_segflg = UIO_USERSPACE;
3558 	auio.uio_procp = p;
3559 	auio.uio_offset = 0;
3560 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
3561 		goto done;
3562 	auio.uio_resid = 0;
3563 	for (i = 0; i < uap->iovcnt; i++) {
3564 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3565 			error = EINVAL;
3566 			goto done;
3567 		}
3568 		auio.uio_resid += iov->iov_len;
3569 		iov++;
3570 	}
3571 	cnt = auio.uio_resid;
3572 	error = VOP_SETEXTATTR(nd.ni_vp, attrname, &auio, p->p_cred->pc_ucred,
3573 	    p);
3574 	cnt -= auio.uio_resid;
3575 	p->p_retval[0] = cnt;
3576 done:
3577 	if (needfree)
3578 		FREE(needfree, M_IOV);
3579 	NDFREE(&nd, 0);
3580 	return (error);
3581 }
3582 
3583 /*
3584  * Syscall to get a named extended attribute on a file or directory.
3585  * Accepts attribute name, and a uio structure pointing to a buffer for the
3586  * data.  The uio is consumed in the style of readv().  The real work
3587  * happens in VOP_GETEXTATTR();
3588  */
3589 int
3590 extattr_get_file(p, uap)
3591 	struct proc *p;
3592 	struct extattr_get_file_args *uap;
3593 {
3594 	struct nameidata nd;
3595 	struct uio auio;
3596 	struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
3597 	char attrname[EXTATTR_MAXNAMELEN];
3598 	u_int iovlen, cnt;
3599 	int error, i;
3600 
3601 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
3602 	if (error)
3603 		return (error);
3604 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3605 	    SCARG(uap, path), p);
3606 	if ((error = namei(&nd)) != 0)
3607 		return (error);
3608 	iovlen = uap->iovcnt * sizeof (struct iovec);
3609 	if (uap->iovcnt > UIO_SMALLIOV) {
3610 		if (uap->iovcnt > UIO_MAXIOV) {
3611 			NDFREE(&nd, 0);
3612 			return (EINVAL);
3613 		}
3614 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3615 		needfree = iov;
3616 	} else {
3617 		iov = aiov;
3618 		needfree = NULL;
3619 	}
3620 	auio.uio_iov = iov;
3621 	auio.uio_iovcnt = uap->iovcnt;
3622 	auio.uio_rw = UIO_READ;
3623 	auio.uio_segflg = UIO_USERSPACE;
3624 	auio.uio_procp = p;
3625 	auio.uio_offset = 0;
3626 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
3627 		goto done;
3628 	auio.uio_resid = 0;
3629 	for (i = 0; i < uap->iovcnt; i++) {
3630 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3631 			error = EINVAL;
3632 			goto done;
3633 		}
3634 		auio.uio_resid += iov->iov_len;
3635 		iov++;
3636 	}
3637 	cnt = auio.uio_resid;
3638 	error = VOP_GETEXTATTR(nd.ni_vp, attrname, &auio, p->p_cred->pc_ucred,
3639 	    p);
3640 	cnt -= auio.uio_resid;
3641 	p->p_retval[0] = cnt;
3642 done:
3643 	if (needfree)
3644 		FREE(needfree, M_IOV);
3645 	NDFREE(&nd, 0);
3646 	return(error);
3647 }
3648 
3649 /*
3650  * Syscall to delete a named extended attribute from a file or directory.
3651  * Accepts attribute name.  The real work happens in VOP_SETEXTATTR().
3652  */
3653 int
3654 extattr_delete_file(p, uap)
3655 	struct proc *p;
3656 	struct extattr_delete_file_args *uap;
3657 {
3658 	struct nameidata nd;
3659 	char attrname[EXTATTR_MAXNAMELEN];
3660 	int	error;
3661 
3662 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
3663 	if (error)
3664 		return(error);
3665 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3666 	    SCARG(uap, path), p);
3667 	if ((error = namei(&nd)) != 0)
3668 		return(error);
3669 	error = VOP_SETEXTATTR(nd.ni_vp, attrname, NULL, p->p_cred->pc_ucred,
3670 	    p);
3671 	NDFREE(&nd, 0);
3672 	return(error);
3673 }
3674