xref: /dragonfly/sys/kern/vfs_syscalls.c (revision 17b61719)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $
40  * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.38 2004/08/17 18:57:32 dillon Exp $
41  */
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/buf.h>
46 #include <sys/conf.h>
47 #include <sys/sysent.h>
48 #include <sys/malloc.h>
49 #include <sys/mount.h>
50 #include <sys/sysproto.h>
51 #include <sys/filedesc.h>
52 #include <sys/kernel.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/linker.h>
56 #include <sys/stat.h>
57 #include <sys/unistd.h>
58 #include <sys/vnode.h>
59 #include <sys/proc.h>
60 #include <sys/namei.h>
61 #include <sys/dirent.h>
62 #include <sys/extattr.h>
63 #include <sys/kern_syscall.h>
64 
65 #include <machine/limits.h>
66 #include <vfs/union/union.h>
67 #include <sys/sysctl.h>
68 #include <vm/vm.h>
69 #include <vm/vm_object.h>
70 #include <vm/vm_zone.h>
71 #include <vm/vm_page.h>
72 
73 #include <sys/file2.h>
74 
75 static int checkvp_chdir (struct vnode *vn, struct thread *td);
76 static void checkdirs (struct vnode *olddp);
77 static int chroot_refuse_vdir_fds (struct filedesc *fdp);
78 static int getutimes (const struct timeval *, struct timespec *);
79 static int setfown (struct vnode *, uid_t, gid_t);
80 static int setfmode (struct vnode *, int);
81 static int setfflags (struct vnode *, int);
82 static int setutimes (struct vnode *, const struct timespec *, int);
83 static int	usermount = 0;	/* if 1, non-root can mount fs. */
84 
85 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *);
86 
87 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
88 
89 /*
90  * Virtual File System System Calls
91  */
92 
93 /*
94  * Mount a file system.
95  */
96 /*
97  * mount_args(char *type, char *path, int flags, caddr_t data)
98  */
99 /* ARGSUSED */
100 int
101 mount(struct mount_args *uap)
102 {
103 	struct thread *td = curthread;
104 	struct proc *p = td->td_proc;
105 	struct vnode *vp;
106 	struct mount *mp;
107 	struct vfsconf *vfsp;
108 	int error, flag = 0, flag2 = 0;
109 	struct vattr va;
110 	struct nameidata nd;
111 	char fstypename[MFSNAMELEN];
112 	lwkt_tokref vlock;
113 	lwkt_tokref ilock;
114 
115 	KKASSERT(p);
116 	if (p->p_ucred->cr_prison != NULL)
117 		return (EPERM);
118 	if (usermount == 0 && (error = suser(td)))
119 		return (error);
120 	/*
121 	 * Do not allow NFS export by non-root users.
122 	 */
123 	if (SCARG(uap, flags) & MNT_EXPORTED) {
124 		error = suser(td);
125 		if (error)
126 			return (error);
127 	}
128 	/*
129 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
130 	 */
131 	if (suser(td))
132 		SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
133 	/*
134 	 * Get vnode to be covered
135 	 */
136 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE,
137 	    SCARG(uap, path), td);
138 	if ((error = namei(&nd)) != 0)
139 		return (error);
140 	NDFREE(&nd, NDF_ONLY_PNBUF);
141 	vp = nd.ni_vp;
142 	if (SCARG(uap, flags) & MNT_UPDATE) {
143 		if ((vp->v_flag & VROOT) == 0) {
144 			vput(vp);
145 			return (EINVAL);
146 		}
147 		mp = vp->v_mount;
148 		flag = mp->mnt_flag;
149 		flag2 = mp->mnt_kern_flag;
150 		/*
151 		 * We only allow the filesystem to be reloaded if it
152 		 * is currently mounted read-only.
153 		 */
154 		if ((SCARG(uap, flags) & MNT_RELOAD) &&
155 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
156 			vput(vp);
157 			return (EOPNOTSUPP);	/* Needs translation */
158 		}
159 		/*
160 		 * Only root, or the user that did the original mount is
161 		 * permitted to update it.
162 		 */
163 		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
164 		    (error = suser(td))) {
165 			vput(vp);
166 			return (error);
167 		}
168 		if (vfs_busy(mp, LK_NOWAIT, NULL, td)) {
169 			vput(vp);
170 			return (EBUSY);
171 		}
172 		lwkt_gettoken(&vlock, vp->v_interlock);
173 		if ((vp->v_flag & VMOUNT) != 0 ||
174 		    vp->v_mountedhere != NULL) {
175 			lwkt_reltoken(&vlock);
176 			vfs_unbusy(mp, td);
177 			vput(vp);
178 			return (EBUSY);
179 		}
180 		vp->v_flag |= VMOUNT;
181 		lwkt_reltoken(&vlock);
182 		mp->mnt_flag |=
183 		    SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
184 		VOP_UNLOCK(vp, NULL, 0, td);
185 		goto update;
186 	}
187 	/*
188 	 * If the user is not root, ensure that they own the directory
189 	 * onto which we are attempting to mount.
190 	 */
191 	if ((error = VOP_GETATTR(vp, &va, td)) ||
192 	    (va.va_uid != p->p_ucred->cr_uid &&
193 	     (error = suser(td)))) {
194 		vput(vp);
195 		return (error);
196 	}
197 	if ((error = vinvalbuf(vp, V_SAVE, td, 0, 0)) != 0) {
198 		vput(vp);
199 		return (error);
200 	}
201 	if (vp->v_type != VDIR) {
202 		vput(vp);
203 		return (ENOTDIR);
204 	}
205 	if ((error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) != 0) {
206 		vput(vp);
207 		return (error);
208 	}
209 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
210 		if (!strcmp(vfsp->vfc_name, fstypename))
211 			break;
212 	if (vfsp == NULL) {
213 		linker_file_t lf;
214 
215 		/* Only load modules for root (very important!) */
216 		if ((error = suser(td)) != 0) {
217 			vput(vp);
218 			return error;
219 		}
220 		error = linker_load_file(fstypename, &lf);
221 		if (error || lf == NULL) {
222 			vput(vp);
223 			if (lf == NULL)
224 				error = ENODEV;
225 			return error;
226 		}
227 		lf->userrefs++;
228 		/* lookup again, see if the VFS was loaded */
229 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
230 			if (!strcmp(vfsp->vfc_name, fstypename))
231 				break;
232 		if (vfsp == NULL) {
233 			lf->userrefs--;
234 			linker_file_unload(lf);
235 			vput(vp);
236 			return (ENODEV);
237 		}
238 	}
239 	lwkt_gettoken(&vlock, vp->v_interlock);
240 	if ((vp->v_flag & VMOUNT) != 0 ||
241 	    vp->v_mountedhere != NULL) {
242 		lwkt_reltoken(&vlock);
243 		vput(vp);
244 		return (EBUSY);
245 	}
246 	vp->v_flag |= VMOUNT;
247 	lwkt_reltoken(&vlock);
248 
249 	/*
250 	 * Allocate and initialize the filesystem.
251 	 */
252 	mp = malloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK);
253 	TAILQ_INIT(&mp->mnt_nvnodelist);
254 	TAILQ_INIT(&mp->mnt_reservedvnlist);
255 	mp->mnt_nvnodelistsize = 0;
256 	lockinit(&mp->mnt_lock, 0, "vfslock", 0, LK_NOPAUSE);
257 	vfs_busy(mp, LK_NOWAIT, NULL, td);
258 	mp->mnt_op = vfsp->vfc_vfsops;
259 	mp->mnt_vfc = vfsp;
260 	vfsp->vfc_refcount++;
261 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
262 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
263 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
264 	mp->mnt_vnodecovered = vp;
265 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
266 	mp->mnt_iosize_max = DFLTPHYS;
267 	VOP_UNLOCK(vp, NULL, 0, td);
268 update:
269 	/*
270 	 * Set the mount level flags.
271 	 */
272 	if (SCARG(uap, flags) & MNT_RDONLY)
273 		mp->mnt_flag |= MNT_RDONLY;
274 	else if (mp->mnt_flag & MNT_RDONLY)
275 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
276 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
277 	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
278 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
279 	    MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
280 	mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC |
281 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
282 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
283 	    MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
284 	/*
285 	 * Mount the filesystem.
286 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
287 	 * get.  No freeing of cn_pnbuf.
288 	 */
289 	error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, td);
290 	if (mp->mnt_flag & MNT_UPDATE) {
291 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
292 			mp->mnt_flag &= ~MNT_RDONLY;
293 		mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
294 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
295 		if (error) {
296 			mp->mnt_flag = flag;
297 			mp->mnt_kern_flag = flag2;
298 		}
299 		vfs_unbusy(mp, td);
300 		lwkt_gettoken(&vlock, vp->v_interlock);
301 		vp->v_flag &= ~VMOUNT;
302 		lwkt_reltoken(&vlock);
303 		vrele(vp);
304 		return (error);
305 	}
306 	vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td);
307 	/*
308 	 * Put the new filesystem on the mount list after root.
309 	 */
310 	cache_purge(vp);
311 	if (!error) {
312 		lwkt_gettoken(&vlock, vp->v_interlock);
313 		vp->v_flag &= ~VMOUNT;
314 		vp->v_mountedhere = mp;
315 		lwkt_reltoken(&vlock);
316 		lwkt_gettoken(&ilock, &mountlist_token);
317 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
318 		lwkt_reltoken(&ilock);
319 		checkdirs(vp);
320 		VOP_UNLOCK(vp, NULL, 0, td);
321 		error = vfs_allocate_syncvnode(mp);
322 		vfs_unbusy(mp, td);
323 		if ((error = VFS_START(mp, 0, td)) != 0)
324 			vrele(vp);
325 	} else {
326 		vfs_rm_vnodeops(&mp->mnt_vn_ops);
327 		vfs_rm_vnodeops(&mp->mnt_vn_spec_ops);
328 		vfs_rm_vnodeops(&mp->mnt_vn_fifo_ops);
329 		lwkt_gettoken(&vlock, vp->v_interlock);
330 		vp->v_flag &= ~VMOUNT;
331 		lwkt_reltoken(&vlock);
332 		mp->mnt_vfc->vfc_refcount--;
333 		vfs_unbusy(mp, td);
334 		free(mp, M_MOUNT);
335 		vput(vp);
336 	}
337 	return (error);
338 }
339 
340 /*
341  * Scan all active processes to see if any of them have a current
342  * or root directory onto which the new filesystem has just been
343  * mounted. If so, replace them with the new mount point.
344  */
345 static void
346 checkdirs(struct vnode *olddp)
347 {
348 	struct filedesc *fdp;
349 	struct vnode *newdp;
350 	struct proc *p;
351 
352 	if (olddp->v_usecount == 1)
353 		return;
354 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
355 		panic("mount: lost mount");
356 	FOREACH_PROC_IN_SYSTEM(p) {
357 		fdp = p->p_fd;
358 		if (fdp->fd_cdir == olddp) {
359 			vrele(fdp->fd_cdir);
360 			vref(newdp);
361 			fdp->fd_cdir = newdp;
362 		}
363 		if (fdp->fd_rdir == olddp) {
364 			vrele(fdp->fd_rdir);
365 			vref(newdp);
366 			fdp->fd_rdir = newdp;
367 		}
368 	}
369 	if (rootvnode == olddp) {
370 		vrele(rootvnode);
371 		vref(newdp);
372 		rootvnode = newdp;
373 		vfs_cache_setroot(rootvnode);
374 	}
375 	vput(newdp);
376 }
377 
378 /*
379  * Unmount a file system.
380  *
381  * Note: unmount takes a path to the vnode mounted on as argument,
382  * not special file (as before).
383  */
384 /*
385  * umount_args(char *path, int flags)
386  */
387 /* ARGSUSED */
388 int
389 unmount(struct unmount_args *uap)
390 {
391 	struct thread *td = curthread;
392 	struct proc *p = td->td_proc;
393 	struct vnode *vp;
394 	struct mount *mp;
395 	int error;
396 	struct nameidata nd;
397 
398 	KKASSERT(p);
399 	if (p->p_ucred->cr_prison != NULL)
400 		return (EPERM);
401 	if (usermount == 0 && (error = suser(td)))
402 		return (error);
403 
404 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE,
405 	    SCARG(uap, path), td);
406 	if ((error = namei(&nd)) != 0)
407 		return (error);
408 	vp = nd.ni_vp;
409 	NDFREE(&nd, NDF_ONLY_PNBUF);
410 	mp = vp->v_mount;
411 
412 	/*
413 	 * Only root, or the user that did the original mount is
414 	 * permitted to unmount this filesystem.
415 	 */
416 	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
417 	    (error = suser(td))) {
418 		vput(vp);
419 		return (error);
420 	}
421 
422 	/*
423 	 * Don't allow unmounting the root file system.
424 	 */
425 	if (mp->mnt_flag & MNT_ROOTFS) {
426 		vput(vp);
427 		return (EINVAL);
428 	}
429 
430 	/*
431 	 * Must be the root of the filesystem
432 	 */
433 	if ((vp->v_flag & VROOT) == 0) {
434 		vput(vp);
435 		return (EINVAL);
436 	}
437 	vput(vp);
438 	return (dounmount(mp, SCARG(uap, flags), td));
439 }
440 
441 /*
442  * Do the actual file system unmount.
443  */
444 int
445 dounmount(struct mount *mp, int flags, struct thread *td)
446 {
447 	struct vnode *coveredvp;
448 	int error;
449 	int async_flag;
450 	lwkt_tokref ilock;
451 
452 	lwkt_gettoken(&ilock, &mountlist_token);
453 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
454 		lwkt_reltoken(&ilock);
455 		return (EBUSY);
456 	}
457 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
458 	/* Allow filesystems to detect that a forced unmount is in progress. */
459 	if (flags & MNT_FORCE)
460 		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
461 	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
462 	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &ilock, td);
463 	if (error) {
464 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
465 		if (mp->mnt_kern_flag & MNTK_MWAIT)
466 			wakeup(mp);
467 		return (error);
468 	}
469 
470 	if (mp->mnt_flag & MNT_EXPUBLIC)
471 		vfs_setpublicfs(NULL, NULL, NULL);
472 
473 	vfs_msync(mp, MNT_WAIT);
474 	async_flag = mp->mnt_flag & MNT_ASYNC;
475 	mp->mnt_flag &=~ MNT_ASYNC;
476 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
477 	if (mp->mnt_syncer != NULL)
478 		vrele(mp->mnt_syncer);
479 	if (((mp->mnt_flag & MNT_RDONLY) ||
480 	     (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) ||
481 	    (flags & MNT_FORCE))
482 		error = VFS_UNMOUNT(mp, flags, td);
483 	lwkt_gettokref(&ilock);
484 	if (error) {
485 		if (mp->mnt_syncer == NULL)
486 			vfs_allocate_syncvnode(mp);
487 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
488 		mp->mnt_flag |= async_flag;
489 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
490 		    &ilock, td);
491 		if (mp->mnt_kern_flag & MNTK_MWAIT)
492 			wakeup(mp);
493 		return (error);
494 	}
495 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
496 
497 	/*
498 	 * Remove any installed vnode ops here so the individual VFSs don't
499 	 * have to.
500 	 */
501 	vfs_rm_vnodeops(&mp->mnt_vn_ops);
502 	vfs_rm_vnodeops(&mp->mnt_vn_spec_ops);
503 	vfs_rm_vnodeops(&mp->mnt_vn_fifo_ops);
504 
505 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
506 		coveredvp->v_mountedhere = NULL;
507 		vrele(coveredvp);
508 	}
509 	mp->mnt_vfc->vfc_refcount--;
510 	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
511 		panic("unmount: dangling vnode");
512 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &ilock, td);
513 	if (mp->mnt_kern_flag & MNTK_MWAIT)
514 		wakeup(mp);
515 	free(mp, M_MOUNT);
516 	return (0);
517 }
518 
519 /*
520  * Sync each mounted filesystem.
521  */
522 
523 #ifdef DEBUG
524 static int syncprt = 0;
525 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
526 #endif /* DEBUG */
527 
528 /* ARGSUSED */
529 int
530 sync(struct sync_args *uap)
531 {
532 	struct thread *td = curthread;
533 	struct mount *mp, *nmp;
534 	lwkt_tokref ilock;
535 	int asyncflag;
536 
537 	lwkt_gettoken(&ilock, &mountlist_token);
538 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
539 		if (vfs_busy(mp, LK_NOWAIT, &ilock, td)) {
540 			nmp = TAILQ_NEXT(mp, mnt_list);
541 			continue;
542 		}
543 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
544 			asyncflag = mp->mnt_flag & MNT_ASYNC;
545 			mp->mnt_flag &= ~MNT_ASYNC;
546 			vfs_msync(mp, MNT_NOWAIT);
547 			VFS_SYNC(mp, MNT_NOWAIT, td);
548 			mp->mnt_flag |= asyncflag;
549 		}
550 		lwkt_gettokref(&ilock);
551 		nmp = TAILQ_NEXT(mp, mnt_list);
552 		vfs_unbusy(mp, td);
553 	}
554 	lwkt_reltoken(&ilock);
555 /*
556  * print out buffer pool stat information on each sync() call.
557  */
558 #ifdef DEBUG
559 	if (syncprt)
560 		vfs_bufstats();
561 #endif /* DEBUG */
562 	return (0);
563 }
564 
565 /* XXX PRISON: could be per prison flag */
566 static int prison_quotas;
567 #if 0
568 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
569 #endif
570 
571 /*
572  *  quotactl_args(char *path, int fcmd, int uid, caddr_t arg)
573  *
574  * Change filesystem quotas.
575  */
576 /* ARGSUSED */
577 int
578 quotactl(struct quotactl_args *uap)
579 {
580 	struct thread *td = curthread;
581 	struct proc *p = td->td_proc;
582 	struct mount *mp;
583 	int error;
584 	struct nameidata nd;
585 
586 	KKASSERT(p);
587 	if (p->p_ucred->cr_prison && !prison_quotas)
588 		return (EPERM);
589 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE,
590 	    SCARG(uap, path), td);
591 	if ((error = namei(&nd)) != 0)
592 		return (error);
593 	mp = nd.ni_vp->v_mount;
594 	NDFREE(&nd, NDF_ONLY_PNBUF);
595 	vrele(nd.ni_vp);
596 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
597 	    SCARG(uap, arg), td));
598 }
599 
600 int
601 kern_statfs(struct nameidata *nd, struct statfs *buf)
602 {
603 	struct thread *td = curthread;
604 	struct mount *mp;
605 	struct statfs *sp;
606 	int error;
607 
608 	error = namei(nd);
609 	if (error)
610 		return (error);
611 	mp = nd->ni_vp->v_mount;
612 	sp = &mp->mnt_stat;
613 	NDFREE(nd, NDF_ONLY_PNBUF);
614 	vrele(nd->ni_vp);
615 	error = VFS_STATFS(mp, sp, td);
616 	if (error)
617 		return (error);
618 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
619 	bcopy(sp, buf, sizeof(*buf));
620 	/* Only root should have access to the fsid's. */
621 	if (suser(td))
622 		buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
623 	return (0);
624 }
625 
626 /*
627  * statfs_args(char *path, struct statfs *buf)
628  *
629  * Get filesystem statistics.
630  */
631 int
632 statfs(struct statfs_args *uap)
633 {
634 	struct thread *td = curthread;
635 	struct nameidata nd;
636 	struct statfs buf;
637 	int error;
638 
639 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, uap->path, td);
640 
641 	error = kern_statfs(&nd, &buf);
642 
643 	if (error == 0)
644 		error = copyout(&buf, uap->buf, sizeof(*uap->buf));
645 	return (error);
646 }
647 
648 int
649 kern_fstatfs(int fd, struct statfs *buf)
650 {
651 	struct thread *td = curthread;
652 	struct proc *p = td->td_proc;
653 	struct file *fp;
654 	struct mount *mp;
655 	struct statfs *sp;
656 	int error;
657 
658 	KKASSERT(p);
659 	error = getvnode(p->p_fd, fd, &fp);
660 	if (error)
661 		return (error);
662 	mp = ((struct vnode *)fp->f_data)->v_mount;
663 	if (mp == NULL)
664 		return (EBADF);
665 	sp = &mp->mnt_stat;
666 	error = VFS_STATFS(mp, sp, td);
667 	if (error)
668 		return (error);
669 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
670 	bcopy(sp, buf, sizeof(*buf));
671 	/* Only root should have access to the fsid's. */
672 	if (suser(td))
673 		buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
674 	return (0);
675 }
676 
677 /*
678  * fstatfs_args(int fd, struct statfs *buf)
679  *
680  * Get filesystem statistics.
681  */
682 int
683 fstatfs(struct fstatfs_args *uap)
684 {
685 	struct statfs buf;
686 	int error;
687 
688 	error = kern_fstatfs(uap->fd, &buf);
689 
690 	if (error == 0)
691 		error = copyout(&buf, uap->buf, sizeof(*uap->buf));
692 	return (error);
693 }
694 
695 /*
696  * getfsstat_args(struct statfs *buf, long bufsize, int flags)
697  *
698  * Get statistics on all filesystems.
699  */
700 /* ARGSUSED */
701 int
702 getfsstat(struct getfsstat_args *uap)
703 {
704 	struct thread *td = curthread;
705 	struct mount *mp, *nmp;
706 	struct statfs *sp;
707 	caddr_t sfsp;
708 	lwkt_tokref ilock;
709 	long count, maxcount, error;
710 
711 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
712 	sfsp = (caddr_t)SCARG(uap, buf);
713 	count = 0;
714 	lwkt_gettoken(&ilock, &mountlist_token);
715 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
716 		if (vfs_busy(mp, LK_NOWAIT, &ilock, td)) {
717 			nmp = TAILQ_NEXT(mp, mnt_list);
718 			continue;
719 		}
720 		if (sfsp && count < maxcount) {
721 			sp = &mp->mnt_stat;
722 			/*
723 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
724 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
725 			 * overrides MNT_WAIT.
726 			 */
727 			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
728 			    (SCARG(uap, flags) & MNT_WAIT)) &&
729 			    (error = VFS_STATFS(mp, sp, td))) {
730 				lwkt_gettokref(&ilock);
731 				nmp = TAILQ_NEXT(mp, mnt_list);
732 				vfs_unbusy(mp, td);
733 				continue;
734 			}
735 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
736 			error = copyout(sp, sfsp, sizeof(*sp));
737 			if (error) {
738 				vfs_unbusy(mp, td);
739 				return (error);
740 			}
741 			sfsp += sizeof(*sp);
742 		}
743 		count++;
744 		lwkt_gettokref(&ilock);
745 		nmp = TAILQ_NEXT(mp, mnt_list);
746 		vfs_unbusy(mp, td);
747 	}
748 	lwkt_reltoken(&ilock);
749 	if (sfsp && count > maxcount)
750 		uap->sysmsg_result = maxcount;
751 	else
752 		uap->sysmsg_result = count;
753 	return (0);
754 }
755 
756 /*
757  * fchdir_args(int fd)
758  *
759  * Change current working directory to a given file descriptor.
760  */
761 /* ARGSUSED */
762 int
763 fchdir(struct fchdir_args *uap)
764 {
765 	struct thread *td = curthread;
766 	struct proc *p = td->td_proc;
767 	struct filedesc *fdp = p->p_fd;
768 	struct vnode *vp, *tdp;
769 	struct mount *mp;
770 	struct file *fp;
771 	int error;
772 
773 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
774 		return (error);
775 	vp = (struct vnode *)fp->f_data;
776 	vref(vp);
777 	vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td);
778 	if (vp->v_type != VDIR)
779 		error = ENOTDIR;
780 	else
781 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, td);
782 	while (!error && (mp = vp->v_mountedhere) != NULL) {
783 		if (vfs_busy(mp, 0, NULL, td))
784 			continue;
785 		error = VFS_ROOT(mp, &tdp);
786 		vfs_unbusy(mp, td);
787 		if (error)
788 			break;
789 		vput(vp);
790 		vp = tdp;
791 	}
792 	if (error) {
793 		vput(vp);
794 		return (error);
795 	}
796 	VOP_UNLOCK(vp, NULL, 0, td);
797 	vrele(fdp->fd_cdir);
798 	fdp->fd_cdir = vp;
799 	return (0);
800 }
801 
802 int
803 kern_chdir(struct nameidata *nd)
804 {
805 	struct thread *td = curthread;
806 	struct proc *p = td->td_proc;
807 	struct filedesc *fdp = p->p_fd;
808 	int error;
809 
810 	if ((error = namei(nd)) != 0)
811 		return (error);
812 	if ((error = checkvp_chdir(nd->ni_vp, td)) == 0) {
813 		vrele(fdp->fd_cdir);
814 		fdp->fd_cdir = nd->ni_vp;
815 		vref(fdp->fd_cdir);
816 	}
817 	NDFREE(nd, ~(NDF_NO_FREE_PNBUF | NDF_NO_VP_PUT));
818 	return (error);
819 }
820 
821 /*
822  * chdir_args(char *path)
823  *
824  * Change current working directory (``.'').
825  */
826 int
827 chdir(struct chdir_args *uap)
828 {
829 	struct thread *td = curthread;
830 	struct nameidata nd;
831 	int error;
832 
833 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE,
834 	    uap->path, td);
835 
836 	error = kern_chdir(&nd);
837 
838 	return (error);
839 }
840 
841 /*
842  * Helper function for raised chroot(2) security function:  Refuse if
843  * any filedescriptors are open directories.
844  */
845 static int
846 chroot_refuse_vdir_fds(fdp)
847 	struct filedesc *fdp;
848 {
849 	struct vnode *vp;
850 	struct file *fp;
851 	int error;
852 	int fd;
853 
854 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
855 		error = getvnode(fdp, fd, &fp);
856 		if (error)
857 			continue;
858 		vp = (struct vnode *)fp->f_data;
859 		if (vp->v_type != VDIR)
860 			continue;
861 		return(EPERM);
862 	}
863 	return (0);
864 }
865 
866 /*
867  * This sysctl determines if we will allow a process to chroot(2) if it
868  * has a directory open:
869  *	0: disallowed for all processes.
870  *	1: allowed for processes that were not already chroot(2)'ed.
871  *	2: allowed for all processes.
872  */
873 
874 static int chroot_allow_open_directories = 1;
875 
876 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
877      &chroot_allow_open_directories, 0, "");
878 
879 /*
880  * Chroot to the specified vnode.  vp must be locked and referenced on
881  * call, and will be left locked and referenced on return.  This routine
882  * may acquire additional refs on the vnode when associating it with
883  * the process's root and/or jail dirs.
884  */
885 int
886 kern_chroot(struct vnode *vp)
887 {
888 	struct thread *td = curthread;
889 	struct proc *p = td->td_proc;
890 	struct filedesc *fdp = p->p_fd;
891 	int error;
892 
893 	/*
894 	 * Only root can chroot
895 	 */
896 	if ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0)
897 		return (error);
898 
899 	/*
900 	 * Disallow open directory descriptors (fchdir() breakouts).
901 	 */
902 	if (chroot_allow_open_directories == 0 ||
903 	   (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
904 		if ((error = chroot_refuse_vdir_fds(fdp)) != 0)
905 			return (error);
906 	}
907 
908 	/*
909 	 * Check the validity of vp as a directory to change to and
910 	 * associate it with rdir/jdir.
911 	 */
912 	if ((error = checkvp_chdir(vp, td)) == 0) {
913 		vrele(fdp->fd_rdir);
914 		fdp->fd_rdir = vp;
915 		vref(fdp->fd_rdir);
916 		if (fdp->fd_jdir == NULL) {
917 			fdp->fd_jdir = vp;
918 			vref(fdp->fd_jdir);
919 		}
920 	}
921 	return (error);
922 }
923 
924 /*
925  * chroot_args(char *path)
926  *
927  * Change notion of root (``/'') directory.
928  */
929 /* ARGSUSED */
930 int
931 chroot(struct chroot_args *uap)
932 {
933 	struct thread *td = curthread;
934 	struct nameidata nd;
935 	int error;
936 
937 	KKASSERT(td->td_proc);
938 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE,
939 		SCARG(uap, path), td);
940 	if ((error = namei(&nd)) == 0) {
941 		error = kern_chroot(nd.ni_vp);
942 		NDFREE(&nd, ~(NDF_NO_FREE_PNBUF | NDF_NO_VP_PUT));
943 	}
944 	return (error);
945 }
946 
947 /*
948  * Common routine for chroot and chdir.  Given a locked, referenced vnode,
949  * determine whether it is legal to chdir to the vnode.  The vnode's state
950  * is not changed by this call.
951  */
952 int
953 checkvp_chdir(struct vnode *vp, struct thread *td)
954 {
955 	int error;
956 
957 	if (vp->v_type != VDIR)
958 		error = ENOTDIR;
959 	else
960 		error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred, td);
961 	return (error);
962 }
963 
964 int
965 kern_open(struct nameidata *nd, int oflags, int mode, int *res)
966 {
967 	struct thread *td = curthread;
968 	struct proc *p = td->td_proc;
969 	struct filedesc *fdp = p->p_fd;
970 	struct file *fp;
971 	struct vnode *vp;
972 	int cmode, flags;
973 	struct file *nfp;
974 	int type, indx, error;
975 	struct flock lf;
976 
977 	if ((oflags & O_ACCMODE) == O_ACCMODE)
978 		return (EINVAL);
979 	flags = FFLAGS(oflags);
980 	error = falloc(p, &nfp, &indx);
981 	if (error)
982 		return (error);
983 	fp = nfp;
984 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
985 	p->p_dupfd = -indx - 1;			/* XXX check for fdopen */
986 	/*
987 	 * Bump the ref count to prevent another process from closing
988 	 * the descriptor while we are blocked in vn_open()
989 	 */
990 	fhold(fp);
991 	error = vn_open(nd, flags, cmode);
992 	if (error) {
993 		/*
994 		 * release our own reference
995 		 */
996 		fdrop(fp, td);
997 
998 		/*
999 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1000 		 * responsible for dropping the old contents of ofiles[indx]
1001 		 * if it succeeds.
1002 		 */
1003 		if ((error == ENODEV || error == ENXIO) &&
1004 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1005 		    (error =
1006 			dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) {
1007 			*res = indx;
1008 			return (0);
1009 		}
1010 		/*
1011 		 * Clean up the descriptor, but only if another thread hadn't
1012 		 * replaced or closed it.
1013 		 */
1014 		if (fdp->fd_ofiles[indx] == fp) {
1015 			fdp->fd_ofiles[indx] = NULL;
1016 			fdrop(fp, td);
1017 		}
1018 
1019 		if (error == ERESTART)
1020 			error = EINTR;
1021 		return (error);
1022 	}
1023 	p->p_dupfd = 0;
1024 	NDFREE(nd, NDF_ONLY_PNBUF);
1025 	vp = nd->ni_vp;
1026 
1027 	/*
1028 	 * There should be 2 references on the file, one from the descriptor
1029 	 * table, and one for us.
1030 	 *
1031 	 * Handle the case where someone closed the file (via its file
1032 	 * descriptor) while we were blocked.  The end result should look
1033 	 * like opening the file succeeded but it was immediately closed.
1034 	 */
1035 	if (fp->f_count == 1) {
1036 		KASSERT(fdp->fd_ofiles[indx] != fp,
1037 		    ("Open file descriptor lost all refs"));
1038 		VOP_UNLOCK(vp, NULL, 0, td);
1039 		vn_close(vp, flags & FMASK, td);
1040 		fdrop(fp, td);
1041 		*res = indx;
1042 		return 0;
1043 	}
1044 
1045 	fp->f_data = (caddr_t)vp;
1046 	fp->f_flag = flags & FMASK;
1047 	fp->f_ops = &vnops;
1048 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1049 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1050 		lf.l_whence = SEEK_SET;
1051 		lf.l_start = 0;
1052 		lf.l_len = 0;
1053 		if (flags & O_EXLOCK)
1054 			lf.l_type = F_WRLCK;
1055 		else
1056 			lf.l_type = F_RDLCK;
1057 		type = F_FLOCK;
1058 		if ((flags & FNONBLOCK) == 0)
1059 			type |= F_WAIT;
1060 		VOP_UNLOCK(vp, NULL, 0, td);
1061 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
1062 			/*
1063 			 * lock request failed.  Normally close the descriptor
1064 			 * but handle the case where someone might have dup()d
1065 			 * it when we weren't looking.  One reference is
1066 			 * owned by the descriptor array, the other by us.
1067 			 */
1068 			if (fdp->fd_ofiles[indx] == fp) {
1069 				fdp->fd_ofiles[indx] = NULL;
1070 				fdrop(fp, td);
1071 			}
1072 			fdrop(fp, td);
1073 			return (error);
1074 		}
1075 		vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td);
1076 		fp->f_flag |= FHASLOCK;
1077 	}
1078 	/* assert that vn_open created a backing object if one is needed */
1079 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1080 		("open: vmio vnode has no backing object after vn_open"));
1081 	VOP_UNLOCK(vp, NULL, 0, td);
1082 
1083 	/*
1084 	 * release our private reference, leaving the one associated with the
1085 	 * descriptor table intact.
1086 	 */
1087 	fdrop(fp, td);
1088 	*res = indx;
1089 	return (0);
1090 }
1091 
1092 /*
1093  * open_args(char *path, int flags, int mode)
1094  *
1095  * Check permissions, allocate an open file structure,
1096  * and call the device open routine if any.
1097  */
1098 int
1099 open(struct open_args *uap)
1100 {
1101 	struct thread *td = curthread;
1102 	struct nameidata nd;
1103 	int error;
1104 
1105 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, uap->path, td);
1106 
1107 	error = kern_open(&nd, uap->flags, uap->mode, &uap->sysmsg_result);
1108 
1109 	return (error);
1110 }
1111 
1112 int
1113 kern_mknod(struct nameidata *nd, int mode, int dev)
1114 {
1115 	struct thread *td = curthread;
1116 	struct proc *p = td->td_proc;
1117 	struct vnode *vp;
1118 	struct vattr vattr;
1119 	int error;
1120 	int whiteout = 0;
1121 
1122 	KKASSERT(p);
1123 
1124 	switch (mode & S_IFMT) {
1125 	case S_IFCHR:
1126 	case S_IFBLK:
1127 		error = suser(td);
1128 		break;
1129 	default:
1130 		error = suser_cred(p->p_ucred, PRISON_ROOT);
1131 		break;
1132 	}
1133 	if (error)
1134 		return (error);
1135 	bwillwrite();
1136 	error = namei(nd);
1137 	if (error)
1138 		return (error);
1139 	vp = nd->ni_vp;
1140 	if (vp != NULL)
1141 		error = EEXIST;
1142 	else {
1143 		VATTR_NULL(&vattr);
1144 		vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1145 		vattr.va_rdev = dev;
1146 		whiteout = 0;
1147 
1148 		switch (mode & S_IFMT) {
1149 		case S_IFMT:	/* used by badsect to flag bad sectors */
1150 			vattr.va_type = VBAD;
1151 			break;
1152 		case S_IFCHR:
1153 			vattr.va_type = VCHR;
1154 			break;
1155 		case S_IFBLK:
1156 			vattr.va_type = VBLK;
1157 			break;
1158 		case S_IFWHT:
1159 			whiteout = 1;
1160 			break;
1161 		default:
1162 			error = EINVAL;
1163 			break;
1164 		}
1165 	}
1166 	if (error == 0) {
1167 		VOP_LEASE(nd->ni_dvp, td, p->p_ucred, LEASE_WRITE);
1168 		if (whiteout)
1169 			error = VOP_WHITEOUT(nd->ni_dvp, NCPNULL,
1170 			    &nd->ni_cnd, NAMEI_CREATE);
1171 		else {
1172 			error = VOP_MKNOD(nd->ni_dvp, NCPNULL, &nd->ni_vp,
1173 			    &nd->ni_cnd, &vattr);
1174 			if (error == 0)
1175 				vput(nd->ni_vp);
1176 		}
1177 		NDFREE(nd, NDF_ONLY_PNBUF);
1178 		vput(nd->ni_dvp);
1179 	} else {
1180 		NDFREE(nd, NDF_ONLY_PNBUF);
1181 		if (nd->ni_dvp == vp)
1182 			vrele(nd->ni_dvp);
1183 		else
1184 			vput(nd->ni_dvp);
1185 		if (vp)
1186 			vrele(vp);
1187 	}
1188 	ASSERT_VOP_UNLOCKED(nd->ni_dvp, "mknod");
1189 	ASSERT_VOP_UNLOCKED(nd->ni_vp, "mknod");
1190 	return (error);
1191 }
1192 
1193 /*
1194  * mknod_args(char *path, int mode, int dev)
1195  *
1196  * Create a special file.
1197  */
1198 int
1199 mknod(struct mknod_args *uap)
1200 {
1201 	struct thread *td = curthread;
1202 	struct nameidata nd;
1203 	int error;
1204 
1205 	NDINIT(&nd, NAMEI_CREATE, CNP_LOCKPARENT, UIO_USERSPACE, uap->path,
1206 	    td);
1207 
1208 	error = kern_mknod(&nd, uap->mode, uap->dev);
1209 
1210 	return (error);
1211 }
1212 
1213 int
1214 kern_mkfifo(struct nameidata *nd, int mode)
1215 {
1216 	struct thread *td = curthread;
1217 	struct proc *p = td->td_proc;
1218 	struct vattr vattr;
1219 	int error;
1220 
1221 	bwillwrite();
1222 	error = namei(nd);
1223 	if (error)
1224 		return (error);
1225 	if (nd->ni_vp != NULL) {
1226 		NDFREE(nd, NDF_ONLY_PNBUF);
1227 		if (nd->ni_dvp == nd->ni_vp)
1228 			vrele(nd->ni_dvp);
1229 		else
1230 			vput(nd->ni_dvp);
1231 		vrele(nd->ni_vp);
1232 		return (EEXIST);
1233 	}
1234 	VATTR_NULL(&vattr);
1235 	vattr.va_type = VFIFO;
1236 	vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1237 	VOP_LEASE(nd->ni_dvp, td, p->p_ucred, LEASE_WRITE);
1238 	error = VOP_MKNOD(nd->ni_dvp, NCPNULL, &nd->ni_vp, &nd->ni_cnd, &vattr);
1239 	if (error == 0)
1240 		vput(nd->ni_vp);
1241 	NDFREE(nd, NDF_ONLY_PNBUF);
1242 	vput(nd->ni_dvp);
1243 	return (error);
1244 }
1245 
1246 /*
1247  * mkfifo_args(char *path, int mode)
1248  *
1249  * Create a named pipe.
1250  */
1251 int
1252 mkfifo(struct mkfifo_args *uap)
1253 {
1254 	struct thread *td = curthread;
1255 	struct nameidata nd;
1256 	int error;
1257 
1258 	NDINIT(&nd, NAMEI_CREATE, CNP_LOCKPARENT, UIO_USERSPACE, uap->path,
1259 	    td);
1260 
1261 	error = kern_mkfifo(&nd, uap->mode);
1262 
1263 	return (error);
1264 }
1265 
1266 int
1267 kern_link(struct nameidata *nd, struct nameidata *linknd)
1268 {
1269 	struct thread *td = curthread;
1270 	struct proc *p = td->td_proc;
1271 	struct vnode *vp;
1272 	int error;
1273 
1274 	bwillwrite();
1275 	error = namei(nd);
1276 	if (error)
1277 		return (error);
1278 	NDFREE(nd, NDF_ONLY_PNBUF);
1279 	vp = nd->ni_vp;
1280 	if (vp->v_type == VDIR)
1281 		error = EPERM;		/* POSIX */
1282 	else {
1283 		error = namei(linknd);
1284 		if (error == 0) {
1285 			if (linknd->ni_vp != NULL) {
1286 				if (linknd->ni_vp)
1287 					vrele(linknd->ni_vp);
1288 				error = EEXIST;
1289 			} else {
1290 				VOP_LEASE(linknd->ni_dvp, td, p->p_ucred,
1291 				    LEASE_WRITE);
1292 				VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE);
1293 				error = VOP_LINK(linknd->ni_dvp, NCPNULL, vp,
1294 				    &linknd->ni_cnd);
1295 			}
1296 			NDFREE(linknd, NDF_ONLY_PNBUF);
1297 			if (linknd->ni_dvp == linknd->ni_vp)
1298 				vrele(linknd->ni_dvp);
1299 			else
1300 				vput(linknd->ni_dvp);
1301 			ASSERT_VOP_UNLOCKED(linknd->ni_dvp, "link");
1302 			ASSERT_VOP_UNLOCKED(linknd->ni_vp, "link");
1303 		}
1304 	}
1305 	vrele(vp);
1306 	return (error);
1307 }
1308 
1309 /*
1310  * link_args(char *path, char *link)
1311  *
1312  * Make a hard file link.
1313  */
1314 int
1315 link(struct link_args *uap)
1316 {
1317 	struct thread *td = curthread;
1318 	struct nameidata nd, linknd;
1319 	int error;
1320 
1321 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_NOOBJ, UIO_USERSPACE,
1322 	    uap->path, td);
1323 	NDINIT(&linknd, NAMEI_CREATE, CNP_LOCKPARENT | CNP_NOOBJ,
1324 	    UIO_USERSPACE, uap->link, td);
1325 
1326 	error = kern_link(&nd, &linknd);
1327 
1328 	return (error);
1329 }
1330 
1331 int
1332 kern_symlink(char *path, struct nameidata *nd)
1333 {
1334 	struct thread *td = curthread;
1335 	struct proc *p = td->td_proc;
1336 	struct vattr vattr;
1337 	int error;
1338 
1339 	bwillwrite();
1340 	error = namei(nd);
1341 	if (error)
1342 		return (error);
1343 	if (nd->ni_vp) {
1344 		NDFREE(nd, NDF_ONLY_PNBUF);
1345 		if (nd->ni_dvp == nd->ni_vp)
1346 			vrele(nd->ni_dvp);
1347 		else
1348 			vput(nd->ni_dvp);
1349 		vrele(nd->ni_vp);
1350 		return (EEXIST);
1351 	}
1352 	VATTR_NULL(&vattr);
1353 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1354 	VOP_LEASE(nd->ni_dvp, td, p->p_ucred, LEASE_WRITE);
1355 	error = VOP_SYMLINK(nd->ni_dvp, NCPNULL, &nd->ni_vp, &nd->ni_cnd,
1356 	    &vattr, path);
1357 	NDFREE(nd, NDF_ONLY_PNBUF);
1358 	if (error == 0)
1359 		vput(nd->ni_vp);
1360 	vput(nd->ni_dvp);
1361 	ASSERT_VOP_UNLOCKED(nd->ni_dvp, "symlink");
1362 	ASSERT_VOP_UNLOCKED(nd->ni_vp, "symlink");
1363 
1364 	return (error);
1365 }
1366 
1367 /*
1368  * symlink(char *path, char *link)
1369  *
1370  * Make a symbolic link.
1371  */
1372 int
1373 symlink(struct symlink_args *uap)
1374 {
1375 	struct thread *td = curthread;
1376 	struct nameidata nd;
1377 	char *path;
1378 	int error;
1379 
1380 	path = zalloc(namei_zone);
1381 	error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
1382 	if (error == 0) {
1383 		NDINIT(&nd, NAMEI_CREATE, CNP_LOCKPARENT | CNP_NOOBJ,
1384 			UIO_USERSPACE, uap->link, td);
1385 		error = kern_symlink(path, &nd);
1386 	}
1387 	zfree(namei_zone, path);
1388 	return (error);
1389 }
1390 
1391 /*
1392  * undelete_args(char *path)
1393  *
1394  * Delete a whiteout from the filesystem.
1395  */
1396 /* ARGSUSED */
1397 int
1398 undelete(struct undelete_args *uap)
1399 {
1400 	struct thread *td = curthread;
1401 	struct proc *p = td->td_proc;
1402 	int error;
1403 	struct nameidata nd;
1404 
1405 	bwillwrite();
1406 	NDINIT(&nd, NAMEI_DELETE, CNP_LOCKPARENT | CNP_DOWHITEOUT, UIO_USERSPACE,
1407 	    SCARG(uap, path), td);
1408 	error = namei(&nd);
1409 	if (error)
1410 		return (error);
1411 
1412 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & CNP_ISWHITEOUT)) {
1413 		NDFREE(&nd, NDF_ONLY_PNBUF);
1414 		if (nd.ni_dvp == nd.ni_vp)
1415 			vrele(nd.ni_dvp);
1416 		else
1417 			vput(nd.ni_dvp);
1418 		if (nd.ni_vp)
1419 			vrele(nd.ni_vp);
1420 		return (EEXIST);
1421 	}
1422 
1423 	VOP_LEASE(nd.ni_dvp, td, p->p_ucred, LEASE_WRITE);
1424 	error = VOP_WHITEOUT(nd.ni_dvp, NCPNULL, &nd.ni_cnd, NAMEI_DELETE);
1425 	NDFREE(&nd, NDF_ONLY_PNBUF);
1426 	vput(nd.ni_dvp);
1427 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1428 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1429 	return (error);
1430 }
1431 
1432 int
1433 kern_unlink(struct nameidata *nd)
1434 {
1435 	struct thread *td = curthread;
1436 	struct proc *p = td->td_proc;
1437 	struct vnode *vp;
1438 	int error;
1439 
1440 	bwillwrite();
1441 	error = namei(nd);
1442 	if (error)
1443 		return (error);
1444 	vp = nd->ni_vp;
1445 	VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE);
1446 	vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td);
1447 
1448 	if (vp->v_type == VDIR)
1449 		error = EPERM;		/* POSIX */
1450 	else {
1451 		/*
1452 		 * The root of a mounted filesystem cannot be deleted.
1453 		 *
1454 		 * XXX: can this only be a VDIR case?
1455 		 */
1456 		if (vp->v_flag & VROOT)
1457 			error = EBUSY;
1458 	}
1459 
1460 	if (error == 0) {
1461 		VOP_LEASE(nd->ni_dvp, td, p->p_ucred, LEASE_WRITE);
1462 		error = VOP_REMOVE(nd->ni_dvp, NCPNULL, vp, &nd->ni_cnd);
1463 	}
1464 	NDFREE(nd, NDF_ONLY_PNBUF);
1465 	if (nd->ni_dvp == vp)
1466 		vrele(nd->ni_dvp);
1467 	else
1468 		vput(nd->ni_dvp);
1469 	if (vp != NULLVP)
1470 		vput(vp);
1471 	ASSERT_VOP_UNLOCKED(nd->ni_dvp, "unlink");
1472 	ASSERT_VOP_UNLOCKED(nd->ni_vp, "unlink");
1473 	return (error);
1474 }
1475 
1476 /*
1477  * unlink_args(char *path)
1478  *
1479  * Delete a name from the filesystem.
1480  */
1481 int
1482 unlink(struct unlink_args *uap)
1483 {
1484 	struct thread *td = curthread;
1485 	struct nameidata nd;
1486 	int error;
1487 
1488 	NDINIT(&nd, NAMEI_DELETE, CNP_LOCKPARENT, UIO_USERSPACE, uap->path,
1489 	    td);
1490 
1491 	error = kern_unlink(&nd);
1492 
1493 	return (error);
1494 }
1495 
1496 int
1497 kern_lseek(int fd, off_t offset, int whence, off_t *res)
1498 {
1499 	struct thread *td = curthread;
1500 	struct proc *p = td->td_proc;
1501 	struct filedesc *fdp = p->p_fd;
1502 	struct file *fp;
1503 	struct vattr vattr;
1504 	int error;
1505 
1506 	if (fd >= fdp->fd_nfiles ||
1507 	    (fp = fdp->fd_ofiles[fd]) == NULL)
1508 		return (EBADF);
1509 	if (fp->f_type != DTYPE_VNODE)
1510 		return (ESPIPE);
1511 	switch (whence) {
1512 	case L_INCR:
1513 		fp->f_offset += offset;
1514 		break;
1515 	case L_XTND:
1516 		error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, td);
1517 		if (error)
1518 			return (error);
1519 		fp->f_offset = offset + vattr.va_size;
1520 		break;
1521 	case L_SET:
1522 		fp->f_offset = offset;
1523 		break;
1524 	default:
1525 		return (EINVAL);
1526 	}
1527 	*res = fp->f_offset;
1528 	return (0);
1529 }
1530 
1531 /*
1532  * lseek_args(int fd, int pad, off_t offset, int whence)
1533  *
1534  * Reposition read/write file offset.
1535  */
1536 int
1537 lseek(struct lseek_args *uap)
1538 {
1539 	int error;
1540 
1541 	error = kern_lseek(uap->fd, uap->offset, uap->whence,
1542 	    &uap->sysmsg_offset);
1543 
1544 	return (error);
1545 }
1546 
1547 int
1548 kern_access(struct nameidata *nd, int aflags)
1549 {
1550 	struct thread *td = curthread;
1551 	struct proc *p = td->td_proc;
1552 	struct ucred *cred, *tmpcred;
1553 	struct vnode *vp;
1554 	int error, flags;
1555 
1556 	cred = p->p_ucred;
1557 	/*
1558 	 * Create and modify a temporary credential instead of one that
1559 	 * is potentially shared.  This could also mess up socket
1560 	 * buffer accounting which can run in an interrupt context.
1561 	 */
1562 	tmpcred = crdup(cred);
1563 	tmpcred->cr_uid = p->p_ucred->cr_ruid;
1564 	tmpcred->cr_groups[0] = p->p_ucred->cr_rgid;
1565 	p->p_ucred = tmpcred;
1566 	nd->ni_cnd.cn_cred = tmpcred;
1567 	error = namei(nd);
1568 	if (error)
1569 		goto out1;
1570 	vp = nd->ni_vp;
1571 
1572 	/* Flags == 0 means only check for existence. */
1573 	if (aflags) {
1574 		flags = 0;
1575 		if (aflags & R_OK)
1576 			flags |= VREAD;
1577 		if (aflags & W_OK)
1578 			flags |= VWRITE;
1579 		if (aflags & X_OK)
1580 			flags |= VEXEC;
1581 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1582 			error = VOP_ACCESS(vp, flags, tmpcred, td);
1583 	}
1584 	NDFREE(nd, NDF_ONLY_PNBUF);
1585 	vput(vp);
1586 out1:
1587 	p->p_ucred = cred;
1588 	crfree(tmpcred);
1589 	return (error);
1590 }
1591 
1592 /*
1593  * access_args(char *path, int flags)
1594  *
1595  * Check access permissions.
1596  */
1597 int
1598 access(struct access_args *uap)
1599 {
1600 	struct thread *td = curthread;
1601 	struct nameidata nd;
1602 	int error;
1603 
1604 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF | CNP_NOOBJ,
1605 	    UIO_USERSPACE, uap->path, td);
1606 
1607 	error = kern_access(&nd, uap->flags);
1608 
1609 	return (error);
1610 }
1611 
1612 int
1613 kern_stat(struct nameidata *nd, struct stat *st)
1614 {
1615 	struct thread *td = curthread;
1616 	int error;
1617 
1618 	error = namei(nd);
1619 	if (error)
1620 		return (error);
1621 	error = vn_stat(nd->ni_vp, st, td);
1622 	NDFREE(nd, NDF_ONLY_PNBUF);
1623 	vput(nd->ni_vp);
1624 	return (error);
1625 }
1626 
1627 /*
1628  * stat_args(char *path, struct stat *ub)
1629  *
1630  * Get file status; this version follows links.
1631  */
1632 int
1633 stat(struct stat_args *uap)
1634 {
1635 	struct thread *td = curthread;
1636 	struct nameidata nd;
1637 	struct stat st;
1638 	int error;
1639 
1640 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF | CNP_NOOBJ,
1641 	    UIO_USERSPACE, uap->path, td);
1642 
1643 	error = kern_stat(&nd, &st);
1644 
1645 	if (error == 0)
1646 		error = copyout(&st, uap->ub, sizeof(*uap->ub));
1647 	return (error);
1648 }
1649 
1650 /*
1651  * lstat_args(char *path, struct stat *ub)
1652  *
1653  * Get file status; this version does not follow links.
1654  */
1655 int
1656 lstat(struct lstat_args *uap)
1657 {
1658 	struct thread *td = curthread;
1659 	struct nameidata nd;
1660 	struct stat st;
1661 	int error;
1662 
1663 	NDINIT(&nd, NAMEI_LOOKUP, CNP_LOCKLEAF | CNP_NOOBJ,
1664 	    UIO_USERSPACE, SCARG(uap, path), td);
1665 
1666 	error = kern_stat(&nd, &st);
1667 
1668 	if (error == 0)
1669 		error = copyout(&st, uap->ub, sizeof(*uap->ub));
1670 	return (error);
1671 }
1672 
1673 void
1674 cvtnstat(sb, nsb)
1675 	struct stat *sb;
1676 	struct nstat *nsb;
1677 {
1678 	nsb->st_dev = sb->st_dev;
1679 	nsb->st_ino = sb->st_ino;
1680 	nsb->st_mode = sb->st_mode;
1681 	nsb->st_nlink = sb->st_nlink;
1682 	nsb->st_uid = sb->st_uid;
1683 	nsb->st_gid = sb->st_gid;
1684 	nsb->st_rdev = sb->st_rdev;
1685 	nsb->st_atimespec = sb->st_atimespec;
1686 	nsb->st_mtimespec = sb->st_mtimespec;
1687 	nsb->st_ctimespec = sb->st_ctimespec;
1688 	nsb->st_size = sb->st_size;
1689 	nsb->st_blocks = sb->st_blocks;
1690 	nsb->st_blksize = sb->st_blksize;
1691 	nsb->st_flags = sb->st_flags;
1692 	nsb->st_gen = sb->st_gen;
1693 	nsb->st_qspare[0] = sb->st_qspare[0];
1694 	nsb->st_qspare[1] = sb->st_qspare[1];
1695 }
1696 
1697 /*
1698  * nstat_args(char *path, struct nstat *ub)
1699  */
1700 /* ARGSUSED */
1701 int
1702 nstat(struct nstat_args *uap)
1703 {
1704 	struct thread *td = curthread;
1705 	struct stat sb;
1706 	struct nstat nsb;
1707 	int error;
1708 	struct nameidata nd;
1709 
1710 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF | CNP_NOOBJ,
1711 	    UIO_USERSPACE, SCARG(uap, path), td);
1712 	if ((error = namei(&nd)) != 0)
1713 		return (error);
1714 	NDFREE(&nd, NDF_ONLY_PNBUF);
1715 	error = vn_stat(nd.ni_vp, &sb, td);
1716 	vput(nd.ni_vp);
1717 	if (error)
1718 		return (error);
1719 	cvtnstat(&sb, &nsb);
1720 	error = copyout(&nsb, SCARG(uap, ub), sizeof (nsb));
1721 	return (error);
1722 }
1723 
1724 /*
1725  * lstat_args(char *path, struct stat *ub)
1726  *
1727  * Get file status; this version does not follow links.
1728  */
1729 /* ARGSUSED */
1730 int
1731 nlstat(struct nlstat_args *uap)
1732 {
1733 	struct thread *td = curthread;
1734 	int error;
1735 	struct vnode *vp;
1736 	struct stat sb;
1737 	struct nstat nsb;
1738 	struct nameidata nd;
1739 
1740 	NDINIT(&nd, NAMEI_LOOKUP, CNP_LOCKLEAF | CNP_NOOBJ,
1741 	    UIO_USERSPACE, SCARG(uap, path), td);
1742 	if ((error = namei(&nd)) != 0)
1743 		return (error);
1744 	vp = nd.ni_vp;
1745 	NDFREE(&nd, NDF_ONLY_PNBUF);
1746 	error = vn_stat(vp, &sb, td);
1747 	vput(vp);
1748 	if (error)
1749 		return (error);
1750 	cvtnstat(&sb, &nsb);
1751 	error = copyout(&nsb, SCARG(uap, ub), sizeof (nsb));
1752 	return (error);
1753 }
1754 
1755 /*
1756  * pathconf_Args(char *path, int name)
1757  *
1758  * Get configurable pathname variables.
1759  */
1760 /* ARGSUSED */
1761 int
1762 pathconf(struct pathconf_args *uap)
1763 {
1764 	struct thread *td = curthread;
1765 	int error;
1766 	struct nameidata nd;
1767 
1768 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF | CNP_NOOBJ,
1769 	    UIO_USERSPACE, SCARG(uap, path), td);
1770 	if ((error = namei(&nd)) != 0)
1771 		return (error);
1772 	NDFREE(&nd, NDF_ONLY_PNBUF);
1773 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), uap->sysmsg_fds);
1774 	vput(nd.ni_vp);
1775 	return (error);
1776 }
1777 
1778 /*
1779  * XXX: daver
1780  * kern_readlink isn't properly split yet.  There is a copyin burried
1781  * in VOP_READLINK().
1782  */
1783 int
1784 kern_readlink(struct nameidata *nd, char *buf, int count, int *res)
1785 {
1786 	struct thread *td = curthread;
1787 	struct proc *p = td->td_proc;
1788 	struct vnode *vp;
1789 	struct iovec aiov;
1790 	struct uio auio;
1791 	int error;
1792 
1793 	error = namei(nd);
1794 	if (error)
1795 		return (error);
1796 	NDFREE(nd, NDF_ONLY_PNBUF);
1797 	vp = nd->ni_vp;
1798 	if (vp->v_type != VLNK)
1799 		error = EINVAL;
1800 	else {
1801 		aiov.iov_base = buf;
1802 		aiov.iov_len = count;
1803 		auio.uio_iov = &aiov;
1804 		auio.uio_iovcnt = 1;
1805 		auio.uio_offset = 0;
1806 		auio.uio_rw = UIO_READ;
1807 		auio.uio_segflg = UIO_USERSPACE;
1808 		auio.uio_td = td;
1809 		auio.uio_resid = count;
1810 		error = VOP_READLINK(vp, &auio, p->p_ucred);
1811 	}
1812 	vput(vp);
1813 	*res = count - auio.uio_resid;
1814 	return (error);
1815 }
1816 
1817 /*
1818  * readlink_args(char *path, char *buf, int count)
1819  *
1820  * Return target name of a symbolic link.
1821  */
1822 int
1823 readlink(struct readlink_args *uap)
1824 {
1825 	struct thread *td = curthread;
1826 	struct nameidata nd;
1827 	int error;
1828 
1829 	NDINIT(&nd, NAMEI_LOOKUP, CNP_LOCKLEAF | CNP_NOOBJ, UIO_USERSPACE,
1830 	    uap->path, td);
1831 
1832 	error = kern_readlink(&nd, uap->buf, uap->count,
1833 	    &uap->sysmsg_result);
1834 
1835 	return (error);
1836 }
1837 
1838 static int
1839 setfflags(struct vnode *vp, int flags)
1840 {
1841 	struct thread *td = curthread;
1842 	struct proc *p = td->td_proc;
1843 	int error;
1844 	struct vattr vattr;
1845 
1846 	/*
1847 	 * Prevent non-root users from setting flags on devices.  When
1848 	 * a device is reused, users can retain ownership of the device
1849 	 * if they are allowed to set flags and programs assume that
1850 	 * chown can't fail when done as root.
1851 	 */
1852 	if ((vp->v_type == VCHR || vp->v_type == VBLK) &&
1853 	    ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0))
1854 		return (error);
1855 
1856 	VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE);
1857 	vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td);
1858 	VATTR_NULL(&vattr);
1859 	vattr.va_flags = flags;
1860 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, td);
1861 	VOP_UNLOCK(vp, NULL, 0, td);
1862 	return (error);
1863 }
1864 
1865 /*
1866  * chflags(char *path, int flags)
1867  *
1868  * Change flags of a file given a path name.
1869  */
1870 /* ARGSUSED */
1871 int
1872 chflags(struct chflags_args *uap)
1873 {
1874 	struct thread *td = curthread;
1875 	int error;
1876 	struct nameidata nd;
1877 
1878 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE,
1879 	    SCARG(uap, path), td);
1880 	if ((error = namei(&nd)) != 0)
1881 		return (error);
1882 	NDFREE(&nd, NDF_ONLY_PNBUF);
1883 	error = setfflags(nd.ni_vp, SCARG(uap, flags));
1884 	vrele(nd.ni_vp);
1885 	return error;
1886 }
1887 
1888 /*
1889  * fchflags_args(int fd, int flags)
1890  *
1891  * Change flags of a file given a file descriptor.
1892  */
1893 /* ARGSUSED */
1894 int
1895 fchflags(struct fchflags_args *uap)
1896 {
1897 	struct thread *td = curthread;
1898 	struct proc *p = td->td_proc;
1899 	struct file *fp;
1900 	int error;
1901 
1902 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
1903 		return (error);
1904 	return setfflags((struct vnode *) fp->f_data, SCARG(uap, flags));
1905 }
1906 
1907 static int
1908 setfmode(struct vnode *vp, int mode)
1909 {
1910 	struct thread *td = curthread;
1911 	struct proc *p = td->td_proc;
1912 	int error;
1913 	struct vattr vattr;
1914 
1915 	VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE);
1916 	vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td);
1917 	VATTR_NULL(&vattr);
1918 	vattr.va_mode = mode & ALLPERMS;
1919 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, td);
1920 	VOP_UNLOCK(vp, NULL, 0, td);
1921 	return error;
1922 }
1923 
1924 int
1925 kern_chmod(struct nameidata *nd, int mode)
1926 {
1927 	int error;
1928 
1929 	error = namei(nd);
1930 	if (error)
1931 		return (error);
1932 	NDFREE(nd, NDF_ONLY_PNBUF);
1933 	error = setfmode(nd->ni_vp, mode);
1934 	vrele(nd->ni_vp);
1935 	return error;
1936 }
1937 
1938 /*
1939  * chmod_args(char *path, int mode)
1940  *
1941  * Change mode of a file given path name.
1942  */
1943 /* ARGSUSED */
1944 int
1945 chmod(struct chmod_args *uap)
1946 {
1947 	struct thread *td = curthread;
1948 	struct nameidata nd;
1949 	int error;
1950 
1951 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, uap->path, td);
1952 
1953 	error = kern_chmod(&nd, uap->mode);
1954 
1955 	return (error);
1956 }
1957 
1958 /*
1959  * lchmod_args(char *path, int mode)
1960  *
1961  * Change mode of a file given path name (don't follow links.)
1962  */
1963 /* ARGSUSED */
1964 int
1965 lchmod(struct lchmod_args *uap)
1966 {
1967 	struct thread *td = curthread;
1968 	int error;
1969 	struct nameidata nd;
1970 
1971 	NDINIT(&nd, NAMEI_LOOKUP, 0, UIO_USERSPACE, SCARG(uap, path), td);
1972 	if ((error = namei(&nd)) != 0)
1973 		return (error);
1974 	NDFREE(&nd, NDF_ONLY_PNBUF);
1975 	error = setfmode(nd.ni_vp, SCARG(uap, mode));
1976 	vrele(nd.ni_vp);
1977 	return error;
1978 }
1979 
1980 /*
1981  * fchmod_args(int fd, int mode)
1982  *
1983  * Change mode of a file given a file descriptor.
1984  */
1985 /* ARGSUSED */
1986 int
1987 fchmod(struct fchmod_args *uap)
1988 {
1989 	struct thread *td = curthread;
1990 	struct proc *p = td->td_proc;
1991 	struct file *fp;
1992 	int error;
1993 
1994 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
1995 		return (error);
1996 	return setfmode((struct vnode *)fp->f_data, SCARG(uap, mode));
1997 }
1998 
1999 static int
2000 setfown(struct vnode *vp, uid_t uid, gid_t gid)
2001 {
2002 	struct thread *td = curthread;
2003 	struct proc *p = td->td_proc;
2004 	int error;
2005 	struct vattr vattr;
2006 
2007 	VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE);
2008 	vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td);
2009 	VATTR_NULL(&vattr);
2010 	vattr.va_uid = uid;
2011 	vattr.va_gid = gid;
2012 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, td);
2013 	VOP_UNLOCK(vp, NULL, 0, td);
2014 	return error;
2015 }
2016 
2017 int
2018 kern_chown(struct nameidata *nd, int uid, int gid)
2019 {
2020 	int error;
2021 
2022 	error = namei(nd);
2023 	if (error)
2024 		return (error);
2025 	NDFREE(nd, NDF_ONLY_PNBUF);
2026 	error = setfown(nd->ni_vp, uid, gid);
2027 	vrele(nd->ni_vp);
2028 	return (error);
2029 }
2030 
2031 /*
2032  * chown(char *path, int uid, int gid)
2033  *
2034  * Set ownership given a path name.
2035  */
2036 int
2037 chown(struct chown_args *uap)
2038 {
2039 	struct thread *td = curthread;
2040 	struct nameidata nd;
2041 	int error;
2042 
2043 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, uap->path, td);
2044 
2045 	error = kern_chown(&nd, uap->uid, uap->gid);
2046 
2047 	return (error);
2048 }
2049 
2050 /*
2051  * lchown_args(char *path, int uid, int gid)
2052  *
2053  * Set ownership given a path name, do not cross symlinks.
2054  */
2055 int
2056 lchown(struct lchown_args *uap)
2057 {
2058 	struct thread *td = curthread;
2059 	int error;
2060 	struct nameidata nd;
2061 
2062 	NDINIT(&nd, NAMEI_LOOKUP, 0, UIO_USERSPACE, uap->path, td);
2063 
2064 	error = kern_chown(&nd, uap->uid, uap->gid);
2065 
2066 	return (error);
2067 }
2068 
2069 /*
2070  * fchown_args(int fd, int uid, int gid)
2071  *
2072  * Set ownership given a file descriptor.
2073  */
2074 /* ARGSUSED */
2075 int
2076 fchown(struct fchown_args *uap)
2077 {
2078 	struct thread *td = curthread;
2079 	struct proc *p = td->td_proc;
2080 	struct file *fp;
2081 	int error;
2082 
2083 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2084 		return (error);
2085 	return setfown((struct vnode *)fp->f_data,
2086 		SCARG(uap, uid), SCARG(uap, gid));
2087 }
2088 
2089 static int
2090 getutimes(const struct timeval *tvp, struct timespec *tsp)
2091 {
2092 	struct timeval tv[2];
2093 
2094 	if (tvp == NULL) {
2095 		microtime(&tv[0]);
2096 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2097 		tsp[1] = tsp[0];
2098 	} else {
2099 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2100 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2101 	}
2102 	return 0;
2103 }
2104 
2105 static int
2106 setutimes(struct vnode *vp, const struct timespec *ts, int nullflag)
2107 {
2108 	struct thread *td = curthread;
2109 	struct proc *p = td->td_proc;
2110 	int error;
2111 	struct vattr vattr;
2112 
2113 	VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE);
2114 	vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td);
2115 	VATTR_NULL(&vattr);
2116 	vattr.va_atime = ts[0];
2117 	vattr.va_mtime = ts[1];
2118 	if (nullflag)
2119 		vattr.va_vaflags |= VA_UTIMES_NULL;
2120 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, td);
2121 	VOP_UNLOCK(vp, NULL, 0, td);
2122 	return error;
2123 }
2124 
2125 int
2126 kern_utimes(struct nameidata *nd, struct timeval *tptr)
2127 {
2128 	struct timespec ts[2];
2129 	int error;
2130 
2131 	error = getutimes(tptr, ts);
2132 	if (error)
2133 		return (error);
2134 	error = namei(nd);
2135 	if (error)
2136 		return (error);
2137 	NDFREE(nd, NDF_ONLY_PNBUF);
2138 	error = setutimes(nd->ni_vp, ts, tptr == NULL);
2139 	vrele(nd->ni_vp);
2140 	return (error);
2141 }
2142 
2143 /*
2144  * utimes_args(char *path, struct timeval *tptr)
2145  *
2146  * Set the access and modification times of a file.
2147  */
2148 int
2149 utimes(struct utimes_args *uap)
2150 {
2151 	struct thread *td = curthread;
2152 	struct timeval tv[2];
2153 	struct nameidata nd;
2154 	int error;
2155 
2156 	if (uap->tptr) {
2157  		error = copyin(uap->tptr, tv, sizeof(tv));
2158 		if (error)
2159 			return (error);
2160 	}
2161 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, uap->path, td);
2162 
2163 	error = kern_utimes(&nd, uap->tptr ? tv : NULL);
2164 
2165 	return (error);
2166 }
2167 
2168 /*
2169  * lutimes_args(char *path, struct timeval *tptr)
2170  *
2171  * Set the access and modification times of a file.
2172  */
2173 int
2174 lutimes(struct lutimes_args *uap)
2175 {
2176 	struct thread *td = curthread;
2177 	struct timeval tv[2];
2178 	struct nameidata nd;
2179 	int error;
2180 
2181 	if (uap->tptr) {
2182 		error = copyin(uap->tptr, tv, sizeof(tv));
2183 		if (error)
2184 			return (error);
2185 	}
2186 	NDINIT(&nd, NAMEI_LOOKUP, 0, UIO_USERSPACE, uap->path, td);
2187 
2188 	error = kern_utimes(&nd, uap->tptr ? tv : NULL);
2189 
2190 	return (error);
2191 }
2192 
2193 int
2194 kern_futimes(int fd, struct timeval *tptr)
2195 {
2196 	struct thread *td = curthread;
2197 	struct proc *p = td->td_proc;
2198 	struct timespec ts[2];
2199 	struct file *fp;
2200 	int error;
2201 
2202 	error = getutimes(tptr, ts);
2203 	if (error)
2204 		return (error);
2205 	error = getvnode(p->p_fd, fd, &fp);
2206 	if (error)
2207 		return (error);
2208 	error =  setutimes((struct vnode *)fp->f_data, ts, tptr == NULL);
2209 	return (error);
2210 }
2211 
2212 /*
2213  * futimes_args(int fd, struct timeval *tptr)
2214  *
2215  * Set the access and modification times of a file.
2216  */
2217 int
2218 futimes(struct futimes_args *uap)
2219 {
2220 	struct timeval tv[2];
2221 	int error;
2222 
2223 	if (uap->tptr) {
2224 		error = copyin(uap->tptr, tv, sizeof(tv));
2225 		if (error)
2226 			return (error);
2227 	}
2228 
2229 	error = kern_futimes(uap->fd, uap->tptr ? tv : NULL);
2230 
2231 	return (error);
2232 }
2233 
2234 int
2235 kern_truncate(struct nameidata* nd, off_t length)
2236 {
2237 	struct thread *td = curthread;
2238 	struct proc *p = td->td_proc;
2239 	struct vnode *vp;
2240 	struct vattr vattr;
2241 	int error;
2242 
2243 	if (length < 0)
2244 		return(EINVAL);
2245 	if ((error = namei(nd)) != 0)
2246 		return (error);
2247 	vp = nd->ni_vp;
2248 	NDFREE(nd, NDF_ONLY_PNBUF);
2249 	VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE);
2250 	vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td);
2251 	if (vp->v_type == VDIR)
2252 		error = EISDIR;
2253 	else if ((error = vn_writechk(vp)) == 0 &&
2254 	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, td)) == 0) {
2255 		VATTR_NULL(&vattr);
2256 		vattr.va_size = length;
2257 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, td);
2258 	}
2259 	vput(vp);
2260 	return (error);
2261 }
2262 
2263 /*
2264  * truncate(char *path, int pad, off_t length)
2265  *
2266  * Truncate a file given its path name.
2267  */
2268 int
2269 truncate(struct truncate_args *uap)
2270 {
2271 	struct thread *td = curthread;
2272 	struct nameidata nd;
2273 	int error;
2274 
2275 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, uap->path, td);
2276 
2277 	error = kern_truncate(&nd, uap->length);
2278 
2279 	return error;
2280 }
2281 
2282 int
2283 kern_ftruncate(int fd, off_t length)
2284 {
2285 	struct thread *td = curthread;
2286 	struct proc *p = td->td_proc;
2287 	struct vattr vattr;
2288 	struct vnode *vp;
2289 	struct file *fp;
2290 	int error;
2291 
2292 	if (length < 0)
2293 		return(EINVAL);
2294 	if ((error = getvnode(p->p_fd, fd, &fp)) != 0)
2295 		return (error);
2296 	if ((fp->f_flag & FWRITE) == 0)
2297 		return (EINVAL);
2298 	vp = (struct vnode *)fp->f_data;
2299 	VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE);
2300 	vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td);
2301 	if (vp->v_type == VDIR)
2302 		error = EISDIR;
2303 	else if ((error = vn_writechk(vp)) == 0) {
2304 		VATTR_NULL(&vattr);
2305 		vattr.va_size = length;
2306 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2307 	}
2308 	VOP_UNLOCK(vp, NULL, 0, td);
2309 	return (error);
2310 }
2311 
2312 /*
2313  * ftruncate_args(int fd, int pad, off_t length)
2314  *
2315  * Truncate a file given a file descriptor.
2316  */
2317 int
2318 ftruncate(struct ftruncate_args *uap)
2319 {
2320 	int error;
2321 
2322 	error = kern_ftruncate(uap->fd, uap->length);
2323 
2324 	return (error);
2325 }
2326 
2327 /*
2328  * fsync(int fd)
2329  *
2330  * Sync an open file.
2331  */
2332 /* ARGSUSED */
2333 int
2334 fsync(struct fsync_args *uap)
2335 {
2336 	struct thread *td = curthread;
2337 	struct proc *p = td->td_proc;
2338 	struct vnode *vp;
2339 	struct file *fp;
2340 	vm_object_t obj;
2341 	int error;
2342 
2343 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2344 		return (error);
2345 	vp = (struct vnode *)fp->f_data;
2346 	vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td);
2347 	if (VOP_GETVOBJECT(vp, &obj) == 0)
2348 		vm_object_page_clean(obj, 0, 0, 0);
2349 	if ((error = VOP_FSYNC(vp, MNT_WAIT, td)) == 0 &&
2350 	    vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP) &&
2351 	    bioops.io_fsync)
2352 		error = (*bioops.io_fsync)(vp);
2353 	VOP_UNLOCK(vp, NULL, 0, td);
2354 	return (error);
2355 }
2356 
2357 int
2358 kern_rename(struct nameidata *fromnd, struct nameidata *tond)
2359 {
2360 	struct thread *td = curthread;
2361 	struct proc *p = td->td_proc;
2362 	struct vnode *tvp, *fvp, *tdvp;
2363 	int error;
2364 
2365 	bwillwrite();
2366 	error = namei(fromnd);
2367 	if (error)
2368 		return (error);
2369 	fvp = fromnd->ni_vp;
2370 	if (fromnd->ni_vp->v_type == VDIR)
2371 		tond->ni_cnd.cn_flags |= CNP_WILLBEDIR;
2372 	error = namei(tond);
2373 	if (error) {
2374 		/* Translate error code for rename("dir1", "dir2/."). */
2375 		if (error == EISDIR && fvp->v_type == VDIR)
2376 			error = EINVAL;
2377 		NDFREE(fromnd, NDF_ONLY_PNBUF);
2378 		vrele(fromnd->ni_dvp);
2379 		vrele(fvp);
2380 		goto out1;
2381 	}
2382 	tdvp = tond->ni_dvp;
2383 	tvp = tond->ni_vp;
2384 	if (tvp != NULL) {
2385 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2386 			error = ENOTDIR;
2387 			goto out;
2388 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2389 			error = EISDIR;
2390 			goto out;
2391 		}
2392 	}
2393 	if (fvp == tdvp)
2394 		error = EINVAL;
2395 	/*
2396 	 * If the source is the same as the destination (that is, if they
2397 	 * are links to the same vnode), then there is nothing to do.
2398 	 */
2399 	if (fvp == tvp)
2400 		error = -1;
2401 out:
2402 	if (!error) {
2403 		VOP_LEASE(tdvp, td, p->p_ucred, LEASE_WRITE);
2404 		if (fromnd->ni_dvp != tdvp) {
2405 			VOP_LEASE(fromnd->ni_dvp, td, p->p_ucred, LEASE_WRITE);
2406 		}
2407 		if (tvp) {
2408 			VOP_LEASE(tvp, td, p->p_ucred, LEASE_WRITE);
2409 		}
2410 		error = VOP_RENAME(fromnd->ni_dvp, NCPNULL, fromnd->ni_vp,
2411 		    &fromnd->ni_cnd, tond->ni_dvp, NCPNULL, tond->ni_vp,
2412 		    &tond->ni_cnd);
2413 		NDFREE(fromnd, NDF_ONLY_PNBUF);
2414 		NDFREE(tond, NDF_ONLY_PNBUF);
2415 	} else {
2416 		NDFREE(fromnd, NDF_ONLY_PNBUF);
2417 		NDFREE(tond, NDF_ONLY_PNBUF);
2418 		if (tdvp == tvp)
2419 			vrele(tdvp);
2420 		else
2421 			vput(tdvp);
2422 		if (tvp)
2423 			vput(tvp);
2424 		vrele(fromnd->ni_dvp);
2425 		vrele(fvp);
2426 	}
2427 	vrele(tond->ni_startdir);
2428 	ASSERT_VOP_UNLOCKED(fromnd->ni_dvp, "rename");
2429 	ASSERT_VOP_UNLOCKED(fromnd->ni_vp, "rename");
2430 	ASSERT_VOP_UNLOCKED(tond->ni_dvp, "rename");
2431 	ASSERT_VOP_UNLOCKED(tond->ni_vp, "rename");
2432 out1:
2433 	if (fromnd->ni_startdir)
2434 		vrele(fromnd->ni_startdir);
2435 	if (error == -1)
2436 		return (0);
2437 	return (error);
2438 }
2439 
2440 /*
2441  * rename_args(char *from, char *to)
2442  *
2443  * Rename files.  Source and destination must either both be directories,
2444  * or both not be directories.  If target is a directory, it must be empty.
2445  */
2446 int
2447 rename(struct rename_args *uap)
2448 {
2449 	struct thread *td = curthread;
2450 	struct nameidata fromnd, tond;
2451 	int error;
2452 
2453 	NDINIT(&fromnd, NAMEI_DELETE, CNP_WANTPARENT | CNP_SAVESTART,
2454 		UIO_USERSPACE, uap->from, td);
2455 	NDINIT(&tond, NAMEI_RENAME,
2456 	    CNP_LOCKPARENT | CNP_LOCKLEAF | CNP_NOCACHE |
2457 	     CNP_SAVESTART | CNP_NOOBJ,
2458 	    UIO_USERSPACE, uap->to, td);
2459 
2460 	error = kern_rename(&fromnd, &tond);
2461 
2462 	return (error);
2463 }
2464 
2465 int
2466 kern_mkdir(struct nameidata *nd, int mode)
2467 {
2468 	struct thread *td = curthread;
2469 	struct proc *p = td->td_proc;
2470 	struct vnode *vp;
2471 	struct vattr vattr;
2472 	int error;
2473 
2474 	bwillwrite();
2475 	nd->ni_cnd.cn_flags |= CNP_WILLBEDIR;
2476 	error = namei(nd);
2477 	if (error)
2478 		return (error);
2479 	vp = nd->ni_vp;
2480 	if (vp) {
2481 		NDFREE(nd, NDF_ONLY_PNBUF);
2482 		if (nd->ni_dvp == vp)
2483 			vrele(nd->ni_dvp);
2484 		else
2485 			vput(nd->ni_dvp);
2486 		vrele(vp);
2487 		return (EEXIST);
2488 	}
2489 	VATTR_NULL(&vattr);
2490 	vattr.va_type = VDIR;
2491 	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
2492 	VOP_LEASE(nd->ni_dvp, td, p->p_ucred, LEASE_WRITE);
2493 	error = VOP_MKDIR(nd->ni_dvp, NCPNULL, &nd->ni_vp, &nd->ni_cnd,
2494 	    &vattr);
2495 	NDFREE(nd, NDF_ONLY_PNBUF);
2496 	vput(nd->ni_dvp);
2497 	if (error == 0)
2498 		vput(nd->ni_vp);
2499 	ASSERT_VOP_UNLOCKED(nd->ni_dvp, "mkdir");
2500 	ASSERT_VOP_UNLOCKED(nd->ni_vp, "mkdir");
2501 	return (error);
2502 }
2503 
2504 /*
2505  * mkdir_args(char *path, int mode)
2506  *
2507  * Make a directory file.
2508  */
2509 /* ARGSUSED */
2510 int
2511 mkdir(struct mkdir_args *uap)
2512 {
2513 	struct thread *td = curthread;
2514 	struct nameidata nd;
2515 	int error;
2516 
2517 	NDINIT(&nd, NAMEI_CREATE, CNP_LOCKPARENT, UIO_USERSPACE, uap->path,
2518 	    td);
2519 
2520 	error = kern_mkdir(&nd, uap->mode);
2521 
2522 	return (error);
2523 }
2524 
2525 int
2526 kern_rmdir(struct nameidata *nd)
2527 {
2528 	struct thread *td = curthread;
2529 	struct proc *p = td->td_proc;
2530 	struct vnode *vp;
2531 	int error;
2532 
2533 	bwillwrite();
2534 	error = namei(nd);
2535 	if (error)
2536 		return (error);
2537 	vp = nd->ni_vp;
2538 	if (vp->v_type != VDIR) {
2539 		error = ENOTDIR;
2540 		goto out;
2541 	}
2542 	/*
2543 	 * No rmdir "." please.
2544 	 */
2545 	if (nd->ni_dvp == vp) {
2546 		error = EINVAL;
2547 		goto out;
2548 	}
2549 	/*
2550 	 * The root of a mounted filesystem cannot be deleted.
2551 	 */
2552 	if (vp->v_flag & VROOT)
2553 		error = EBUSY;
2554 	else {
2555 		VOP_LEASE(nd->ni_dvp, td, p->p_ucred, LEASE_WRITE);
2556 		VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE);
2557 		error = VOP_RMDIR(nd->ni_dvp, NCPNULL, nd->ni_vp,
2558 		    &nd->ni_cnd);
2559 	}
2560 out:
2561 	NDFREE(nd, NDF_ONLY_PNBUF);
2562 	if (nd->ni_dvp == vp)
2563 		vrele(nd->ni_dvp);
2564 	else
2565 		vput(nd->ni_dvp);
2566 	if (vp != NULLVP)
2567 		vput(vp);
2568 	ASSERT_VOP_UNLOCKED(nd->ni_dvp, "rmdir");
2569 	ASSERT_VOP_UNLOCKED(nd->ni_vp, "rmdir");
2570 	return (error);
2571 }
2572 
2573 /*
2574  * rmdir_args(char *path)
2575  *
2576  * Remove a directory file.
2577  */
2578 /* ARGSUSED */
2579 int
2580 rmdir(struct rmdir_args *uap)
2581 {
2582 	struct thread *td = curthread;
2583 	struct nameidata nd;
2584 	int error;
2585 
2586 	NDINIT(&nd, NAMEI_DELETE, CNP_LOCKPARENT | CNP_LOCKLEAF,
2587 	    UIO_USERSPACE, uap->path, td);
2588 
2589 	error = kern_rmdir(&nd);
2590 
2591 	return (error);
2592 }
2593 
2594 int
2595 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res)
2596 {
2597 	struct thread *td = curthread;
2598 	struct proc *p = td->td_proc;
2599 	struct vnode *vp;
2600 	struct file *fp;
2601 	struct uio auio;
2602 	struct iovec aiov;
2603 	long loff;
2604 	int error, eofflag;
2605 
2606 	if ((error = getvnode(p->p_fd, fd, &fp)) != 0)
2607 		return (error);
2608 	if ((fp->f_flag & FREAD) == 0)
2609 		return (EBADF);
2610 	vp = (struct vnode *)fp->f_data;
2611 unionread:
2612 	if (vp->v_type != VDIR)
2613 		return (EINVAL);
2614 	aiov.iov_base = buf;
2615 	aiov.iov_len = count;
2616 	auio.uio_iov = &aiov;
2617 	auio.uio_iovcnt = 1;
2618 	auio.uio_rw = UIO_READ;
2619 	auio.uio_segflg = UIO_USERSPACE;
2620 	auio.uio_td = td;
2621 	auio.uio_resid = count;
2622 	/* vn_lock(vp, NULL, LK_SHARED | LK_RETRY, td); */
2623 	vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td);
2624 	loff = auio.uio_offset = fp->f_offset;
2625 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
2626 	fp->f_offset = auio.uio_offset;
2627 	VOP_UNLOCK(vp, NULL, 0, td);
2628 	if (error)
2629 		return (error);
2630 	if (count == auio.uio_resid) {
2631 		if (union_dircheckp) {
2632 			error = union_dircheckp(td, &vp, fp);
2633 			if (error == -1)
2634 				goto unionread;
2635 			if (error)
2636 				return (error);
2637 		}
2638 		if ((vp->v_flag & VROOT) &&
2639 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
2640 			struct vnode *tvp = vp;
2641 			vp = vp->v_mount->mnt_vnodecovered;
2642 			vref(vp);
2643 			fp->f_data = (caddr_t)vp;
2644 			fp->f_offset = 0;
2645 			vrele(tvp);
2646 			goto unionread;
2647 		}
2648 	}
2649 	if (basep) {
2650 		*basep = loff;
2651 	}
2652 	*res = count - auio.uio_resid;
2653 	return (error);
2654 }
2655 
2656 /*
2657  * getdirentries_args(int fd, char *buf, u_int conut, long *basep)
2658  *
2659  * Read a block of directory entries in a file system independent format.
2660  */
2661 int
2662 getdirentries(struct getdirentries_args *uap)
2663 {
2664 	long base;
2665 	int error;
2666 
2667 	error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base,
2668 	    &uap->sysmsg_result);
2669 
2670 	if (error == 0)
2671 		error = copyout(&base, uap->basep, sizeof(*uap->basep));
2672 	return (error);
2673 }
2674 
2675 /*
2676  * getdents_args(int fd, char *buf, size_t count)
2677  */
2678 int
2679 getdents(struct getdents_args *uap)
2680 {
2681 	int error;
2682 
2683 	error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL,
2684 	    &uap->sysmsg_result);
2685 
2686 	return (error);
2687 }
2688 
2689 /*
2690  * umask(int newmask)
2691  *
2692  * Set the mode mask for creation of filesystem nodes.
2693  *
2694  * MP SAFE
2695  */
2696 int
2697 umask(struct umask_args *uap)
2698 {
2699 	struct thread *td = curthread;
2700 	struct proc *p = td->td_proc;
2701 	struct filedesc *fdp;
2702 
2703 	fdp = p->p_fd;
2704 	uap->sysmsg_result = fdp->fd_cmask;
2705 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
2706 	return (0);
2707 }
2708 
2709 /*
2710  * revoke(char *path)
2711  *
2712  * Void all references to file by ripping underlying filesystem
2713  * away from vnode.
2714  */
2715 /* ARGSUSED */
2716 int
2717 revoke(struct revoke_args *uap)
2718 {
2719 	struct thread *td = curthread;
2720 	struct proc *p = td->td_proc;
2721 	struct vnode *vp;
2722 	struct vattr vattr;
2723 	int error;
2724 	struct nameidata nd;
2725 
2726 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2727 	if ((error = namei(&nd)) != 0)
2728 		return (error);
2729 	vp = nd.ni_vp;
2730 	NDFREE(&nd, NDF_ONLY_PNBUF);
2731 	if (vp->v_type != VCHR && vp->v_type != VBLK) {
2732 		error = EINVAL;
2733 		goto out;
2734 	}
2735 	if ((error = VOP_GETATTR(vp, &vattr, td)) != 0)
2736 		goto out;
2737 	if (p->p_ucred->cr_uid != vattr.va_uid &&
2738 	    (error = suser_cred(p->p_ucred, PRISON_ROOT)))
2739 		goto out;
2740 	if (count_udev(vp->v_udev) > 0)
2741 		VOP_REVOKE(vp, REVOKEALL);
2742 out:
2743 	vrele(vp);
2744 	return (error);
2745 }
2746 
2747 /*
2748  * Convert a user file descriptor to a kernel file entry.
2749  */
2750 int
2751 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
2752 {
2753 	struct file *fp;
2754 
2755 	if ((u_int)fd >= fdp->fd_nfiles ||
2756 	    (fp = fdp->fd_ofiles[fd]) == NULL)
2757 		return (EBADF);
2758 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO)
2759 		return (EINVAL);
2760 	*fpp = fp;
2761 	return (0);
2762 }
2763 /*
2764  * getfh_args(char *fname, fhandle_t *fhp)
2765  *
2766  * Get (NFS) file handle
2767  */
2768 int
2769 getfh(struct getfh_args *uap)
2770 {
2771 	struct thread *td = curthread;
2772 	struct nameidata nd;
2773 	fhandle_t fh;
2774 	struct vnode *vp;
2775 	int error;
2776 
2777 	/*
2778 	 * Must be super user
2779 	 */
2780 	error = suser(td);
2781 	if (error)
2782 		return (error);
2783 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE, uap->fname, td);
2784 	error = namei(&nd);
2785 	if (error)
2786 		return (error);
2787 	NDFREE(&nd, NDF_ONLY_PNBUF);
2788 	vp = nd.ni_vp;
2789 	bzero(&fh, sizeof(fh));
2790 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
2791 	error = VFS_VPTOFH(vp, &fh.fh_fid);
2792 	vput(vp);
2793 	if (error)
2794 		return (error);
2795 	error = copyout(&fh, uap->fhp, sizeof (fh));
2796 	return (error);
2797 }
2798 
2799 /*
2800  * fhopen_args(const struct fhandle *u_fhp, int flags)
2801  *
2802  * syscall for the rpc.lockd to use to translate a NFS file handle into
2803  * an open descriptor.
2804  *
2805  * warning: do not remove the suser() call or this becomes one giant
2806  * security hole.
2807  */
2808 int
2809 fhopen(struct fhopen_args *uap)
2810 {
2811 	struct thread *td = curthread;
2812 	struct proc *p = td->td_proc;
2813 	struct mount *mp;
2814 	struct vnode *vp;
2815 	struct fhandle fhp;
2816 	struct vattr vat;
2817 	struct vattr *vap = &vat;
2818 	struct flock lf;
2819 	struct file *fp;
2820 	struct filedesc *fdp = p->p_fd;
2821 	int fmode, mode, error, type;
2822 	struct file *nfp;
2823 	int indx;
2824 
2825 	/*
2826 	 * Must be super user
2827 	 */
2828 	error = suser(td);
2829 	if (error)
2830 		return (error);
2831 
2832 	fmode = FFLAGS(SCARG(uap, flags));
2833 	/* why not allow a non-read/write open for our lockd? */
2834 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
2835 		return (EINVAL);
2836 	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
2837 	if (error)
2838 		return(error);
2839 	/* find the mount point */
2840 	mp = vfs_getvfs(&fhp.fh_fsid);
2841 	if (mp == NULL)
2842 		return (ESTALE);
2843 	/* now give me my vnode, it gets returned to me locked */
2844 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
2845 	if (error)
2846 		return (error);
2847  	/*
2848 	 * from now on we have to make sure not
2849 	 * to forget about the vnode
2850 	 * any error that causes an abort must vput(vp)
2851 	 * just set error = err and 'goto bad;'.
2852 	 */
2853 
2854 	/*
2855 	 * from vn_open
2856 	 */
2857 	if (vp->v_type == VLNK) {
2858 		error = EMLINK;
2859 		goto bad;
2860 	}
2861 	if (vp->v_type == VSOCK) {
2862 		error = EOPNOTSUPP;
2863 		goto bad;
2864 	}
2865 	mode = 0;
2866 	if (fmode & (FWRITE | O_TRUNC)) {
2867 		if (vp->v_type == VDIR) {
2868 			error = EISDIR;
2869 			goto bad;
2870 		}
2871 		error = vn_writechk(vp);
2872 		if (error)
2873 			goto bad;
2874 		mode |= VWRITE;
2875 	}
2876 	if (fmode & FREAD)
2877 		mode |= VREAD;
2878 	if (mode) {
2879 		error = VOP_ACCESS(vp, mode, p->p_ucred, td);
2880 		if (error)
2881 			goto bad;
2882 	}
2883 	if (fmode & O_TRUNC) {
2884 		VOP_UNLOCK(vp, NULL, 0, td);			/* XXX */
2885 		VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE);
2886 		vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
2887 		VATTR_NULL(vap);
2888 		vap->va_size = 0;
2889 		error = VOP_SETATTR(vp, vap, p->p_ucred, td);
2890 		if (error)
2891 			goto bad;
2892 	}
2893 	error = VOP_OPEN(vp, fmode, p->p_ucred, td);
2894 	if (error)
2895 		goto bad;
2896 	/*
2897 	 * Make sure that a VM object is created for VMIO support.
2898 	 */
2899 	if (vn_canvmio(vp) == TRUE) {
2900 		if ((error = vfs_object_create(vp, td)) != 0)
2901 			goto bad;
2902 	}
2903 	if (fmode & FWRITE)
2904 		vp->v_writecount++;
2905 
2906 	/*
2907 	 * end of vn_open code
2908 	 */
2909 
2910 	if ((error = falloc(p, &nfp, &indx)) != 0) {
2911 		if (fmode & FWRITE)
2912 			vp->v_writecount--;
2913 		goto bad;
2914 	}
2915 	fp = nfp;
2916 
2917 	/*
2918 	 * hold an extra reference to avoid having fp ripped out
2919 	 * from under us while we block in the lock op.
2920 	 */
2921 	fhold(fp);
2922 	nfp->f_data = (caddr_t)vp;
2923 	nfp->f_flag = fmode & FMASK;
2924 	nfp->f_ops = &vnops;
2925 	nfp->f_type = DTYPE_VNODE;
2926 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
2927 		lf.l_whence = SEEK_SET;
2928 		lf.l_start = 0;
2929 		lf.l_len = 0;
2930 		if (fmode & O_EXLOCK)
2931 			lf.l_type = F_WRLCK;
2932 		else
2933 			lf.l_type = F_RDLCK;
2934 		type = F_FLOCK;
2935 		if ((fmode & FNONBLOCK) == 0)
2936 			type |= F_WAIT;
2937 		VOP_UNLOCK(vp, NULL, 0, td);
2938 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
2939 			/*
2940 			 * lock request failed.  Normally close the descriptor
2941 			 * but handle the case where someone might have dup()d
2942 			 * or close()d it when we weren't looking.
2943 			 */
2944 			if (fdp->fd_ofiles[indx] == fp) {
2945 				fdp->fd_ofiles[indx] = NULL;
2946 				fdrop(fp, td);
2947 			}
2948 
2949 			/*
2950 			 * release our private reference.
2951 			 */
2952 			fdrop(fp, td);
2953 			return (error);
2954 		}
2955 		vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td);
2956 		fp->f_flag |= FHASLOCK;
2957 	}
2958 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
2959 		vfs_object_create(vp, td);
2960 
2961 	VOP_UNLOCK(vp, NULL, 0, td);
2962 	fdrop(fp, td);
2963 	uap->sysmsg_result = indx;
2964 	return (0);
2965 
2966 bad:
2967 	vput(vp);
2968 	return (error);
2969 }
2970 
2971 /*
2972  * fhstat_args(struct fhandle *u_fhp, struct stat *sb)
2973  */
2974 int
2975 fhstat(struct fhstat_args *uap)
2976 {
2977 	struct thread *td = curthread;
2978 	struct stat sb;
2979 	fhandle_t fh;
2980 	struct mount *mp;
2981 	struct vnode *vp;
2982 	int error;
2983 
2984 	/*
2985 	 * Must be super user
2986 	 */
2987 	error = suser(td);
2988 	if (error)
2989 		return (error);
2990 
2991 	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
2992 	if (error)
2993 		return (error);
2994 
2995 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
2996 		return (ESTALE);
2997 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
2998 		return (error);
2999 	error = vn_stat(vp, &sb, td);
3000 	vput(vp);
3001 	if (error)
3002 		return (error);
3003 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3004 	return (error);
3005 }
3006 
3007 /*
3008  * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf)
3009  */
3010 int
3011 fhstatfs(struct fhstatfs_args *uap)
3012 {
3013 	struct thread *td = curthread;
3014 	struct statfs *sp;
3015 	struct mount *mp;
3016 	struct vnode *vp;
3017 	struct statfs sb;
3018 	fhandle_t fh;
3019 	int error;
3020 
3021 	/*
3022 	 * Must be super user
3023 	 */
3024 	if ((error = suser(td)))
3025 		return (error);
3026 
3027 	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3028 		return (error);
3029 
3030 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3031 		return (ESTALE);
3032 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3033 		return (error);
3034 	mp = vp->v_mount;
3035 	sp = &mp->mnt_stat;
3036 	vput(vp);
3037 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3038 		return (error);
3039 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3040 	if (suser(td)) {
3041 		bcopy(sp, &sb, sizeof(sb));
3042 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3043 		sp = &sb;
3044 	}
3045 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3046 }
3047 
3048 /*
3049  * Syscall to push extended attribute configuration information into the
3050  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3051  * a command (int cmd), and attribute name and misc data.  For now, the
3052  * attribute name is left in userspace for consumption by the VFS_op.
3053  * It will probably be changed to be copied into sysspace by the
3054  * syscall in the future, once issues with various consumers of the
3055  * attribute code have raised their hands.
3056  *
3057  * Currently this is used only by UFS Extended Attributes.
3058  */
3059 int
3060 extattrctl(struct extattrctl_args *uap)
3061 {
3062 	struct thread *td = curthread;
3063 	struct nameidata nd;
3064 	struct mount *mp;
3065 	int error;
3066 
3067 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3068 	if ((error = namei(&nd)) != 0)
3069 		return (error);
3070 	mp = nd.ni_vp->v_mount;
3071 	NDFREE(&nd, 0);
3072 	return (VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
3073 	    SCARG(uap, arg), td));
3074 }
3075 
3076 /*
3077  * Syscall to set a named extended attribute on a file or directory.
3078  * Accepts attribute name, and a uio structure pointing to the data to set.
3079  * The uio is consumed in the style of writev().  The real work happens
3080  * in VOP_SETEXTATTR().
3081  */
3082 int
3083 extattr_set_file(struct extattr_set_file_args *uap)
3084 {
3085 	struct thread *td = curthread;
3086 	struct proc *p = td->td_proc;
3087 	struct nameidata nd;
3088 	struct uio auio;
3089 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
3090 	char attrname[EXTATTR_MAXNAMELEN];
3091 	u_int iovlen, cnt;
3092 	int error, i;
3093 
3094 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
3095 	if (error)
3096 		return (error);
3097 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE,
3098 	    SCARG(uap, path), td);
3099 	if ((error = namei(&nd)) != 0)
3100 		return(error);
3101 	iovlen = uap->iovcnt * sizeof(struct iovec);
3102 	if (uap->iovcnt > UIO_SMALLIOV) {
3103 		if (uap->iovcnt > UIO_MAXIOV) {
3104 			error = EINVAL;
3105 			goto done;
3106 		}
3107 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3108 		needfree = iov;
3109 	} else
3110 		iov = aiov;
3111 	auio.uio_iov = iov;
3112 	auio.uio_iovcnt = uap->iovcnt;
3113 	auio.uio_rw = UIO_WRITE;
3114 	auio.uio_segflg = UIO_USERSPACE;
3115 	auio.uio_td = td;
3116 	auio.uio_offset = 0;
3117 	if ((error = copyin(uap->iovp, iov, iovlen)))
3118 		goto done;
3119 	auio.uio_resid = 0;
3120 	for (i = 0; i < uap->iovcnt; i++) {
3121 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3122 			error = EINVAL;
3123 			goto done;
3124 		}
3125 		auio.uio_resid += iov->iov_len;
3126 		iov++;
3127 	}
3128 	cnt = auio.uio_resid;
3129 	error = VOP_SETEXTATTR(nd.ni_vp, attrname, &auio, p->p_ucred, td);
3130 	cnt -= auio.uio_resid;
3131 	uap->sysmsg_result = cnt;
3132 done:
3133 	if (needfree)
3134 		FREE(needfree, M_IOV);
3135 	NDFREE(&nd, 0);
3136 	return (error);
3137 }
3138 
3139 /*
3140  * Syscall to get a named extended attribute on a file or directory.
3141  * Accepts attribute name, and a uio structure pointing to a buffer for the
3142  * data.  The uio is consumed in the style of readv().  The real work
3143  * happens in VOP_GETEXTATTR();
3144  */
3145 int
3146 extattr_get_file(struct extattr_get_file_args *uap)
3147 {
3148 	struct thread *td = curthread;
3149 	struct proc *p = td->td_proc;
3150 	struct nameidata nd;
3151 	struct uio auio;
3152 	struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
3153 	char attrname[EXTATTR_MAXNAMELEN];
3154 	u_int iovlen, cnt;
3155 	int error, i;
3156 
3157 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
3158 	if (error)
3159 		return (error);
3160 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE,
3161 	    SCARG(uap, path), td);
3162 	if ((error = namei(&nd)) != 0)
3163 		return (error);
3164 	iovlen = uap->iovcnt * sizeof (struct iovec);
3165 	if (uap->iovcnt > UIO_SMALLIOV) {
3166 		if (uap->iovcnt > UIO_MAXIOV) {
3167 			NDFREE(&nd, 0);
3168 			return (EINVAL);
3169 		}
3170 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3171 		needfree = iov;
3172 	} else {
3173 		iov = aiov;
3174 		needfree = NULL;
3175 	}
3176 	auio.uio_iov = iov;
3177 	auio.uio_iovcnt = uap->iovcnt;
3178 	auio.uio_rw = UIO_READ;
3179 	auio.uio_segflg = UIO_USERSPACE;
3180 	auio.uio_td = td;
3181 	auio.uio_offset = 0;
3182 	if ((error = copyin(uap->iovp, iov, iovlen)))
3183 		goto done;
3184 	auio.uio_resid = 0;
3185 	for (i = 0; i < uap->iovcnt; i++) {
3186 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3187 			error = EINVAL;
3188 			goto done;
3189 		}
3190 		auio.uio_resid += iov->iov_len;
3191 		iov++;
3192 	}
3193 	cnt = auio.uio_resid;
3194 	error = VOP_GETEXTATTR(nd.ni_vp, attrname, &auio, p->p_ucred, td);
3195 	cnt -= auio.uio_resid;
3196 	uap->sysmsg_result = cnt;
3197 done:
3198 	if (needfree)
3199 		FREE(needfree, M_IOV);
3200 	NDFREE(&nd, 0);
3201 	return(error);
3202 }
3203 
3204 /*
3205  * Syscall to delete a named extended attribute from a file or directory.
3206  * Accepts attribute name.  The real work happens in VOP_SETEXTATTR().
3207  */
3208 int
3209 extattr_delete_file(struct extattr_delete_file_args *uap)
3210 {
3211 	struct thread *td = curthread;
3212 	struct proc *p = td->td_proc;
3213 	struct nameidata nd;
3214 	char attrname[EXTATTR_MAXNAMELEN];
3215 	int	error;
3216 
3217 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
3218 	if (error)
3219 		return(error);
3220 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE,
3221 	    SCARG(uap, path), td);
3222 	if ((error = namei(&nd)) != 0)
3223 		return(error);
3224 	error = VOP_SETEXTATTR(nd.ni_vp, attrname, NULL, p->p_ucred, td);
3225 	NDFREE(&nd, 0);
3226 	return(error);
3227 }
3228 
3229 /*
3230  * print out statistics from the current status of the buffer pool
3231  * this can be toggeled by the system control option debug.syncprt
3232  */
3233 #ifdef DEBUG
3234 void
3235 vfs_bufstats(void)
3236 {
3237         int s, i, j, count;
3238         struct buf *bp;
3239         struct bqueues *dp;
3240         int counts[(MAXBSIZE / PAGE_SIZE) + 1];
3241         static char *bname[3] = { "LOCKED", "LRU", "AGE" };
3242 
3243         for (dp = bufqueues, i = 0; dp < &bufqueues[3]; dp++, i++) {
3244                 count = 0;
3245                 for (j = 0; j <= MAXBSIZE/PAGE_SIZE; j++)
3246                         counts[j] = 0;
3247                 s = splbio();
3248                 TAILQ_FOREACH(bp, dp, b_freelist) {
3249                         counts[bp->b_bufsize/PAGE_SIZE]++;
3250                         count++;
3251                 }
3252                 splx(s);
3253                 printf("%s: total-%d", bname[i], count);
3254                 for (j = 0; j <= MAXBSIZE/PAGE_SIZE; j++)
3255                         if (counts[j] != 0)
3256                                 printf(", %d-%d", j * PAGE_SIZE, counts[j]);
3257                 printf("\n");
3258         }
3259 }
3260 #endif
3261