xref: /openbsd/sys/kern/vfs_syscalls.c (revision 8932bfb7)
1 /*	$OpenBSD: vfs_syscalls.c,v 1.177 2011/07/18 00:16:54 matthew Exp $	*/
2 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_syscalls.c	8.28 (Berkeley) 12/10/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/filedesc.h>
44 #include <sys/kernel.h>
45 #include <sys/file.h>
46 #include <sys/stat.h>
47 #include <sys/vnode.h>
48 #include <sys/mount.h>
49 #include <sys/proc.h>
50 #include <sys/uio.h>
51 #include <sys/malloc.h>
52 #include <sys/pool.h>
53 #include <sys/dirent.h>
54 #include <sys/dkio.h>
55 #include <sys/disklabel.h>
56 #include <sys/ktrace.h>
57 
58 #include <sys/syscallargs.h>
59 
60 #include <uvm/uvm_extern.h>
61 #include <sys/sysctl.h>
62 
63 extern int suid_clear;
64 int	usermount = 0;		/* sysctl: by default, users may not mount */
65 
66 static int change_dir(struct nameidata *, struct proc *);
67 
68 void checkdirs(struct vnode *);
69 
70 int copyout_statfs(struct statfs *, void *, struct proc *);
71 
72 int getdirentries_internal(struct proc *, int, char *, int, off_t *,
73     register_t *);
74 
75 int doopenat(struct proc *, int, const char *, int, mode_t, register_t *);
76 int domknodat(struct proc *, int, const char *, mode_t, dev_t, register_t *);
77 int domkfifoat(struct proc *, int, const char *, mode_t, register_t *);
78 int dolinkat(struct proc *, int, const char *, int, const char *, int,
79     register_t *);
80 int dosymlinkat(struct proc *, const char *, int, const char *, register_t *);
81 int dounlinkat(struct proc *, int, const char *, int, register_t *);
82 int dofaccessat(struct proc *, int, const char *, int, int, register_t *);
83 int dofstatat(struct proc *, int, const char *, struct stat *, int,
84     register_t *);
85 int doreadlinkat(struct proc *, int, const char *, char *, size_t,
86     register_t *);
87 int dofchmodat(struct proc *, int, const char *, mode_t, int, register_t *);
88 int dofchownat(struct proc *, int, const char *, uid_t, gid_t, int,
89     register_t *);
90 int dorenameat(struct proc *, int, const char *, int, const char *,
91     register_t *);
92 int domkdirat(struct proc *, int, const char *, mode_t, register_t *);
93 int doutimensat(struct proc *, int, const char *, struct timespec [2],
94     int, register_t *);
95 int dovutimens(struct proc *, struct vnode *, struct timespec [2],
96     register_t *);
97 int dofutimens(struct proc *, int, struct timespec [2], register_t *);
98 
99 /*
100  * Virtual File System System Calls
101  */
102 
103 /*
104  * Mount a file system.
105  */
106 /* ARGSUSED */
107 int
108 sys_mount(struct proc *p, void *v, register_t *retval)
109 {
110 	struct sys_mount_args /* {
111 		syscallarg(const char *) type;
112 		syscallarg(const char *) path;
113 		syscallarg(int) flags;
114 		syscallarg(void *) data;
115 	} */ *uap = v;
116 	struct vnode *vp;
117 	struct mount *mp;
118 	int error, mntflag = 0;
119 	char fstypename[MFSNAMELEN];
120 	char fspath[MNAMELEN];
121 	struct vattr va;
122 	struct nameidata nd;
123 	struct vfsconf *vfsp;
124 	int flags = SCARG(uap, flags);
125 
126 	if (usermount == 0 && (error = suser(p, 0)))
127 		return (error);
128 
129 	/*
130 	 * Mount points must fit in MNAMELEN, not MAXPATHLEN.
131 	 */
132 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
133 	if (error)
134 		return(error);
135 
136 	/*
137 	 * Get vnode to be covered
138 	 */
139 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
140 	if ((error = namei(&nd)) != 0)
141 		return (error);
142 	vp = nd.ni_vp;
143 	if (flags & MNT_UPDATE) {
144 		if ((vp->v_flag & VROOT) == 0) {
145 			vput(vp);
146 			return (EINVAL);
147 		}
148 		mp = vp->v_mount;
149 		mntflag = mp->mnt_flag;
150 		/*
151 		 * We only allow the filesystem to be reloaded if it
152 		 * is currently mounted read-only.
153 		 */
154 		if ((flags & MNT_RELOAD) &&
155 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
156 			vput(vp);
157 			return (EOPNOTSUPP);	/* Needs translation */
158 		}
159 
160 		/*
161 		 * Only root, or the user that did the original mount is
162 		 * permitted to update it.
163 		 */
164 		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
165 		    (error = suser(p, 0))) {
166 			vput(vp);
167 			return (error);
168 		}
169 		/*
170 		 * Do not allow NFS export by non-root users. Silently
171 		 * enforce MNT_NOSUID and MNT_NODEV for non-root users, and
172 		 * inherit MNT_NOEXEC from the mount point.
173 		 */
174 		if (suser(p, 0) != 0) {
175 			if (flags & MNT_EXPORTED) {
176 				vput(vp);
177 				return (EPERM);
178 			}
179 			flags |= MNT_NOSUID | MNT_NODEV;
180 			if (mntflag & MNT_NOEXEC)
181 				flags |= MNT_NOEXEC;
182 		}
183 		if ((error = vfs_busy(mp, VB_READ|VB_NOWAIT)) != 0) {
184 			vput(vp);
185 			return (error);
186 		}
187 		mp->mnt_flag |= flags & (MNT_RELOAD | MNT_UPDATE);
188 		goto update;
189 	}
190 	/*
191 	 * If the user is not root, ensure that they own the directory
192 	 * onto which we are attempting to mount.
193 	 */
194 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) ||
195 	    (va.va_uid != p->p_ucred->cr_uid &&
196 	    (error = suser(p, 0)))) {
197 		vput(vp);
198 		return (error);
199 	}
200 	/*
201 	 * Do not allow NFS export by non-root users. Silently
202 	 * enforce MNT_NOSUID and MNT_NODEV for non-root users, and inherit
203 	 * MNT_NOEXEC from the mount point.
204 	 */
205 	if (suser(p, 0) != 0) {
206 		if (flags & MNT_EXPORTED) {
207 			vput(vp);
208 			return (EPERM);
209 		}
210 		flags |= MNT_NOSUID | MNT_NODEV;
211 		if (vp->v_mount->mnt_flag & MNT_NOEXEC)
212 			flags |= MNT_NOEXEC;
213 	}
214 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) {
215 		vput(vp);
216 		return (error);
217 	}
218 	if (vp->v_type != VDIR) {
219 		vput(vp);
220 		return (ENOTDIR);
221 	}
222 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
223 	if (error) {
224 		vput(vp);
225 		return (error);
226 	}
227 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
228 		if (!strcmp(vfsp->vfc_name, fstypename))
229 			break;
230 	}
231 
232 	if (vfsp == NULL) {
233 		vput(vp);
234 		return (EOPNOTSUPP);
235 	}
236 
237 	if (vp->v_mountedhere != NULL) {
238 		vput(vp);
239 		return (EBUSY);
240 	}
241 
242 	/*
243 	 * Allocate and initialize the file system.
244 	 */
245 	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
246 		M_MOUNT, M_WAITOK|M_ZERO);
247 	(void) vfs_busy(mp, VB_READ|VB_NOWAIT);
248 	mp->mnt_op = vfsp->vfc_vfsops;
249 	mp->mnt_vfc = vfsp;
250 	mp->mnt_flag |= (vfsp->vfc_flags & MNT_VISFLAGMASK);
251 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
252 	mp->mnt_vnodecovered = vp;
253 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
254 update:
255 	/*
256 	 * Set the mount level flags.
257 	 */
258 	if (flags & MNT_RDONLY)
259 		mp->mnt_flag |= MNT_RDONLY;
260 	else if (mp->mnt_flag & MNT_RDONLY)
261 		mp->mnt_flag |= MNT_WANTRDWR;
262 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
263 	    MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP | MNT_NOATIME |
264 	    MNT_FORCE);
265 	mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC |
266 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_SOFTDEP |
267 	    MNT_NOATIME | MNT_FORCE);
268 	/*
269 	 * Mount the filesystem.
270 	 */
271 	error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p);
272 	if (!error) {
273 		mp->mnt_stat.f_ctime = time_second;
274 	}
275 	if (mp->mnt_flag & MNT_UPDATE) {
276 		vput(vp);
277 		if (mp->mnt_flag & MNT_WANTRDWR)
278 			mp->mnt_flag &= ~MNT_RDONLY;
279 		mp->mnt_flag &=~
280 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR);
281 		if (error)
282 			mp->mnt_flag = mntflag;
283 
284  		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
285  			if (mp->mnt_syncer == NULL)
286  				error = vfs_allocate_syncvnode(mp);
287  		} else {
288  			if (mp->mnt_syncer != NULL)
289  				vgone(mp->mnt_syncer);
290  			mp->mnt_syncer = NULL;
291  		}
292 
293 		vfs_unbusy(mp);
294 		return (error);
295 	}
296 
297 	vp->v_mountedhere = mp;
298 
299 	/*
300 	 * Put the new filesystem on the mount list after root.
301 	 */
302 	cache_purge(vp);
303 	if (!error) {
304 		vfsp->vfc_refcount++;
305 		CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
306 		checkdirs(vp);
307 		VOP_UNLOCK(vp, 0, p);
308  		if ((mp->mnt_flag & MNT_RDONLY) == 0)
309  			error = vfs_allocate_syncvnode(mp);
310 		vfs_unbusy(mp);
311 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
312 		if ((error = VFS_START(mp, 0, p)) != 0)
313 			vrele(vp);
314 	} else {
315 		mp->mnt_vnodecovered->v_mountedhere = NULL;
316 		vfs_unbusy(mp);
317 		free(mp, M_MOUNT);
318 		vput(vp);
319 	}
320 	return (error);
321 }
322 
323 /*
324  * Scan all active processes to see if any of them have a current
325  * or root directory onto which the new filesystem has just been
326  * mounted. If so, replace them with the new mount point.
327  */
328 void
329 checkdirs(struct vnode *olddp)
330 {
331 	struct filedesc *fdp;
332 	struct vnode *newdp, *vp;
333 	struct proc *p;
334 
335 	if (olddp->v_usecount == 1)
336 		return;
337 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
338 		panic("mount: lost mount");
339 again:
340 	LIST_FOREACH(p, &allproc, p_list) {
341 		fdp = p->p_fd;
342 		if (fdp->fd_cdir == olddp) {
343 			vp = fdp->fd_cdir;
344 			vref(newdp);
345 			fdp->fd_cdir = newdp;
346 			if (vrele(vp))
347 				goto again;
348 		}
349 		if (fdp->fd_rdir == olddp) {
350 			vp = fdp->fd_rdir;
351 			vref(newdp);
352 			fdp->fd_rdir = newdp;
353 			if (vrele(vp))
354 				goto again;
355 		}
356 	}
357 	if (rootvnode == olddp) {
358 		vrele(rootvnode);
359 		vref(newdp);
360 		rootvnode = newdp;
361 	}
362 	vput(newdp);
363 }
364 
365 /*
366  * Unmount a file system.
367  *
368  * Note: unmount takes a path to the vnode mounted on as argument,
369  * not special file (as before).
370  */
371 /* ARGSUSED */
372 int
373 sys_unmount(struct proc *p, void *v, register_t *retval)
374 {
375 	struct sys_unmount_args /* {
376 		syscallarg(const char *) path;
377 		syscallarg(int) flags;
378 	} */ *uap = v;
379 	struct vnode *vp;
380 	struct mount *mp;
381 	int error;
382 	struct nameidata nd;
383 
384 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
385 	    SCARG(uap, path), p);
386 	if ((error = namei(&nd)) != 0)
387 		return (error);
388 	vp = nd.ni_vp;
389 	mp = vp->v_mount;
390 
391 	/*
392 	 * Only root, or the user that did the original mount is
393 	 * permitted to unmount this filesystem.
394 	 */
395 	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
396 	    (error = suser(p, 0))) {
397 		vput(vp);
398 		return (error);
399 	}
400 
401 	/*
402 	 * Don't allow unmounting the root file system.
403 	 */
404 	if (mp->mnt_flag & MNT_ROOTFS) {
405 		vput(vp);
406 		return (EINVAL);
407 	}
408 
409 	/*
410 	 * Must be the root of the filesystem
411 	 */
412 	if ((vp->v_flag & VROOT) == 0) {
413 		vput(vp);
414 		return (EINVAL);
415 	}
416 	vput(vp);
417 
418 	if (vfs_busy(mp, VB_WRITE|VB_WAIT))
419 		return (EBUSY);
420 
421 	return (dounmount(mp, SCARG(uap, flags), p, vp));
422 }
423 
424 /*
425  * Do the actual file system unmount.
426  */
427 int
428 dounmount(struct mount *mp, int flags, struct proc *p, struct vnode *olddp)
429 {
430 	struct vnode *coveredvp;
431 	int error;
432 	int hadsyncer = 0;
433 
434  	mp->mnt_flag &=~ MNT_ASYNC;
435  	cache_purgevfs(mp);	/* remove cache entries for this file sys */
436  	if (mp->mnt_syncer != NULL) {
437 		hadsyncer = 1;
438  		vgone(mp->mnt_syncer);
439 		mp->mnt_syncer = NULL;
440 	}
441 	if (((mp->mnt_flag & MNT_RDONLY) ||
442 	    (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
443  	    (flags & MNT_FORCE))
444  		error = VFS_UNMOUNT(mp, flags, p);
445 
446  	if (error && error != EIO && !(flags & MNT_DOOMED)) {
447  		if ((mp->mnt_flag & MNT_RDONLY) == 0 && hadsyncer)
448  			(void) vfs_allocate_syncvnode(mp);
449 		vfs_unbusy(mp);
450 		return (error);
451 	}
452 
453 	CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
454 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
455 		coveredvp->v_mountedhere = NULL;
456  		vrele(coveredvp);
457  	}
458 
459 	mp->mnt_vfc->vfc_refcount--;
460 
461 	if (!LIST_EMPTY(&mp->mnt_vnodelist))
462 		panic("unmount: dangling vnode");
463 
464 	vfs_unbusy(mp);
465 	free(mp, M_MOUNT);
466 
467 	return (0);
468 }
469 
470 /*
471  * Sync each mounted filesystem.
472  */
473 #ifdef DEBUG
474 int syncprt = 0;
475 struct ctldebug debug0 = { "syncprt", &syncprt };
476 #endif
477 
478 /* ARGSUSED */
479 int
480 sys_sync(struct proc *p, void *v, register_t *retval)
481 {
482 	struct mount *mp, *nmp;
483 	int asyncflag;
484 
485 	for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
486 	    mp = nmp) {
487 		if (vfs_busy(mp, VB_READ|VB_NOWAIT)) {
488 			nmp = CIRCLEQ_PREV(mp, mnt_list);
489 			continue;
490 		}
491 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
492 			asyncflag = mp->mnt_flag & MNT_ASYNC;
493 			mp->mnt_flag &= ~MNT_ASYNC;
494 			uvm_vnp_sync(mp);
495 			VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
496 			if (asyncflag)
497 				mp->mnt_flag |= MNT_ASYNC;
498 		}
499 		nmp = CIRCLEQ_PREV(mp, mnt_list);
500 		vfs_unbusy(mp);
501 	}
502 
503 	return (0);
504 }
505 
506 /*
507  * Change filesystem quotas.
508  */
509 /* ARGSUSED */
510 int
511 sys_quotactl(struct proc *p, void *v, register_t *retval)
512 {
513 	struct sys_quotactl_args /* {
514 		syscallarg(const char *) path;
515 		syscallarg(int) cmd;
516 		syscallarg(int) uid;
517 		syscallarg(char *) arg;
518 	} */ *uap = v;
519 	struct mount *mp;
520 	int error;
521 	struct nameidata nd;
522 
523 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
524 	if ((error = namei(&nd)) != 0)
525 		return (error);
526 	mp = nd.ni_vp->v_mount;
527 	vrele(nd.ni_vp);
528 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
529 	    SCARG(uap, arg), p));
530 }
531 
532 int
533 copyout_statfs(struct statfs *sp, void *uaddr, struct proc *p)
534 {
535 	size_t co_sz1 = offsetof(struct statfs, f_fsid);
536 	size_t co_off2 = co_sz1 + sizeof(fsid_t);
537 	size_t co_sz2 = sizeof(struct statfs) - co_off2;
538 	char *s, *d;
539 	int error;
540 
541 	/* Don't let non-root see filesystem id (for NFS security) */
542 	if (suser(p, 0)) {
543 		fsid_t fsid;
544 
545 		s = (char *)sp;
546 		d = (char *)uaddr;
547 
548 		memset(&fsid, 0, sizeof(fsid));
549 
550 		if ((error = copyout(s, d, co_sz1)) != 0)
551 			return (error);
552 		if ((error = copyout(&fsid, d + co_sz1, sizeof(fsid))) != 0)
553 			return (error);
554 		return (copyout(s + co_off2, d + co_off2, co_sz2));
555 	}
556 
557 	return (copyout(sp, uaddr, sizeof(*sp)));
558 }
559 
560 /*
561  * Get filesystem statistics.
562  */
563 /* ARGSUSED */
564 int
565 sys_statfs(struct proc *p, void *v, register_t *retval)
566 {
567 	struct sys_statfs_args /* {
568 		syscallarg(const char *) path;
569 		syscallarg(struct statfs *) buf;
570 	} */ *uap = v;
571 	struct mount *mp;
572 	struct statfs *sp;
573 	int error;
574 	struct nameidata nd;
575 
576 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
577 	if ((error = namei(&nd)) != 0)
578 		return (error);
579 	mp = nd.ni_vp->v_mount;
580 	sp = &mp->mnt_stat;
581 	vrele(nd.ni_vp);
582 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
583 		return (error);
584 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
585 
586 	return (copyout_statfs(sp, SCARG(uap, buf), p));
587 }
588 
589 /*
590  * Get filesystem statistics.
591  */
592 /* ARGSUSED */
593 int
594 sys_fstatfs(struct proc *p, void *v, register_t *retval)
595 {
596 	struct sys_fstatfs_args /* {
597 		syscallarg(int) fd;
598 		syscallarg(struct statfs *) buf;
599 	} */ *uap = v;
600 	struct file *fp;
601 	struct mount *mp;
602 	struct statfs *sp;
603 	int error;
604 
605 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
606 		return (error);
607 	mp = ((struct vnode *)fp->f_data)->v_mount;
608 	if (!mp) {
609 		FRELE(fp);
610 		return (ENOENT);
611 	}
612 	sp = &mp->mnt_stat;
613 	error = VFS_STATFS(mp, sp, p);
614 	FRELE(fp);
615 	if (error)
616 		return (error);
617 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
618 
619 	return (copyout_statfs(sp, SCARG(uap, buf), p));
620 }
621 
622 /*
623  * Get statistics on all filesystems.
624  */
625 int
626 sys_getfsstat(struct proc *p, void *v, register_t *retval)
627 {
628 	struct sys_getfsstat_args /* {
629 		syscallarg(struct statfs *) buf;
630 		syscallarg(size_t) bufsize;
631 		syscallarg(int) flags;
632 	} */ *uap = v;
633 	struct mount *mp, *nmp;
634 	struct statfs *sp;
635 	struct statfs *sfsp;
636 	size_t count, maxcount;
637 	int error, flags = SCARG(uap, flags);
638 
639 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
640 	sfsp = SCARG(uap, buf);
641 	count = 0;
642 
643 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
644 	    mp = nmp) {
645 		if (vfs_busy(mp, VB_READ|VB_NOWAIT)) {
646 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
647 			continue;
648 		}
649 		if (sfsp && count < maxcount) {
650 			sp = &mp->mnt_stat;
651 
652 			/* Refresh stats unless MNT_NOWAIT is specified */
653 			if (flags != MNT_NOWAIT &&
654 			    flags != MNT_LAZY &&
655 			    (flags == MNT_WAIT ||
656 			    flags == 0) &&
657 			    (error = VFS_STATFS(mp, sp, p))) {
658 				nmp = CIRCLEQ_NEXT(mp, mnt_list);
659 				vfs_unbusy(mp);
660  				continue;
661 			}
662 
663 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
664 #if notyet
665 			if (mp->mnt_flag & MNT_SOFTDEP)
666 				sp->f_eflags = STATFS_SOFTUPD;
667 #endif
668 			error = (copyout_statfs(sp, sfsp, p));
669 			if (error) {
670 				vfs_unbusy(mp);
671 				return (error);
672 			}
673 			sfsp++;
674 		}
675 		count++;
676 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
677 		vfs_unbusy(mp);
678 	}
679 
680 	if (sfsp && count > maxcount)
681 		*retval = maxcount;
682 	else
683 		*retval = count;
684 
685 	return (0);
686 }
687 
688 /*
689  * Change current working directory to a given file descriptor.
690  */
691 /* ARGSUSED */
692 int
693 sys_fchdir(struct proc *p, void *v, register_t *retval)
694 {
695 	struct sys_fchdir_args /* {
696 		syscallarg(int) fd;
697 	} */ *uap = v;
698 	struct filedesc *fdp = p->p_fd;
699 	struct vnode *vp, *tdp;
700 	struct mount *mp;
701 	struct file *fp;
702 	int error;
703 
704 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
705 		return (EBADF);
706 	vp = (struct vnode *)fp->f_data;
707 	if (fp->f_type != DTYPE_VNODE || vp->v_type != VDIR)
708 		return (ENOTDIR);
709 	vref(vp);
710 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
711 	error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
712 
713 	while (!error && (mp = vp->v_mountedhere) != NULL) {
714 		if (vfs_busy(mp, VB_READ|VB_WAIT))
715 			continue;
716 		error = VFS_ROOT(mp, &tdp);
717 		vfs_unbusy(mp);
718 		if (error)
719 			break;
720 		vput(vp);
721 		vp = tdp;
722 	}
723 	if (error) {
724 		vput(vp);
725 		return (error);
726 	}
727 	VOP_UNLOCK(vp, 0, p);
728 	vrele(fdp->fd_cdir);
729 	fdp->fd_cdir = vp;
730 	return (0);
731 }
732 
733 /*
734  * Change current working directory (``.'').
735  */
736 /* ARGSUSED */
737 int
738 sys_chdir(struct proc *p, void *v, register_t *retval)
739 {
740 	struct sys_chdir_args /* {
741 		syscallarg(const char *) path;
742 	} */ *uap = v;
743 	struct filedesc *fdp = p->p_fd;
744 	int error;
745 	struct nameidata nd;
746 
747 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
748 	    SCARG(uap, path), p);
749 	if ((error = change_dir(&nd, p)) != 0)
750 		return (error);
751 	vrele(fdp->fd_cdir);
752 	fdp->fd_cdir = nd.ni_vp;
753 	return (0);
754 }
755 
756 /*
757  * Change notion of root (``/'') directory.
758  */
759 /* ARGSUSED */
760 int
761 sys_chroot(struct proc *p, void *v, register_t *retval)
762 {
763 	struct sys_chroot_args /* {
764 		syscallarg(const char *) path;
765 	} */ *uap = v;
766 	struct filedesc *fdp = p->p_fd;
767 	int error;
768 	struct nameidata nd;
769 
770 	if ((error = suser(p, 0)) != 0)
771 		return (error);
772 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
773 	    SCARG(uap, path), p);
774 	if ((error = change_dir(&nd, p)) != 0)
775 		return (error);
776 	if (fdp->fd_rdir != NULL) {
777 		/*
778 		 * A chroot() done inside a changed root environment does
779 		 * an automatic chdir to avoid the out-of-tree experience.
780 		 */
781 		vrele(fdp->fd_rdir);
782 		vrele(fdp->fd_cdir);
783 		vref(nd.ni_vp);
784 		fdp->fd_cdir = nd.ni_vp;
785 	}
786 	fdp->fd_rdir = nd.ni_vp;
787 	return (0);
788 }
789 
790 /*
791  * Common routine for chroot and chdir.
792  */
793 static int
794 change_dir(struct nameidata *ndp, struct proc *p)
795 {
796 	struct vnode *vp;
797 	int error;
798 
799 	if ((error = namei(ndp)) != 0)
800 		return (error);
801 	vp = ndp->ni_vp;
802 	if (vp->v_type != VDIR)
803 		error = ENOTDIR;
804 	else
805 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
806 	if (error)
807 		vput(vp);
808 	else
809 		VOP_UNLOCK(vp, 0, p);
810 	return (error);
811 }
812 
813 /*
814  * Check permissions, allocate an open file structure,
815  * and call the device open routine if any.
816  */
817 int
818 sys_open(struct proc *p, void *v, register_t *retval)
819 {
820 	struct sys_open_args /* {
821 		syscallarg(const char *) path;
822 		syscallarg(int) flags;
823 		syscallarg(mode_t) mode;
824 	} */ *uap = v;
825 
826 	return (doopenat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, flags),
827 	    SCARG(uap, mode), retval));
828 }
829 
830 int
831 sys_openat(struct proc *p, void *v, register_t *retval)
832 {
833 	struct sys_openat_args /* {
834 		syscallarg(int) fd;
835 		syscallarg(const char *) path;
836 		syscallarg(int) flags;
837 		syscallarg(mode_t) mode;
838 	} */ *uap = v;
839 
840 	return (doopenat(p, SCARG(uap, fd), SCARG(uap, path),
841 	    SCARG(uap, flags), SCARG(uap, mode), retval));
842 }
843 
844 int
845 doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
846     register_t *retval)
847 {
848 	struct filedesc *fdp = p->p_fd;
849 	struct file *fp;
850 	struct vnode *vp;
851 	struct vattr vattr;
852 	int flags, cmode;
853 	int type, indx, error, localtrunc = 0;
854 	struct flock lf;
855 	struct nameidata nd;
856 
857 	fdplock(fdp);
858 
859 	if ((error = falloc(p, &fp, &indx)) != 0)
860 		goto out;
861 
862 	flags = FFLAGS(oflags);
863 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
864 	NDINITAT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fd, path, p);
865 	p->p_dupfd = -1;			/* XXX check for fdopen */
866 	if ((flags & O_TRUNC) && (flags & (O_EXLOCK | O_SHLOCK))) {
867 		localtrunc = 1;
868 		flags &= ~O_TRUNC;	/* Must do truncate ourselves */
869 	}
870 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
871 		if ((error == ENODEV || error == ENXIO) &&
872 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
873 		    (error =
874 			dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) {
875 			closef(fp, p);
876 			*retval = indx;
877 			goto out;
878 		}
879 		if (error == ERESTART)
880 			error = EINTR;
881 		fdremove(fdp, indx);
882 		closef(fp, p);
883 		goto out;
884 	}
885 	p->p_dupfd = 0;
886 	vp = nd.ni_vp;
887 	fp->f_flag = flags & FMASK;
888 	fp->f_type = DTYPE_VNODE;
889 	fp->f_ops = &vnops;
890 	fp->f_data = vp;
891 	if (flags & (O_EXLOCK | O_SHLOCK)) {
892 		lf.l_whence = SEEK_SET;
893 		lf.l_start = 0;
894 		lf.l_len = 0;
895 		if (flags & O_EXLOCK)
896 			lf.l_type = F_WRLCK;
897 		else
898 			lf.l_type = F_RDLCK;
899 		type = F_FLOCK;
900 		if ((flags & FNONBLOCK) == 0)
901 			type |= F_WAIT;
902 		VOP_UNLOCK(vp, 0, p);
903 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
904 		if (error) {
905 			/* closef will vn_close the file for us. */
906 			fdremove(fdp, indx);
907 			closef(fp, p);
908 			goto out;
909 		}
910 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
911 		fp->f_flag |= FHASLOCK;
912 	}
913 	if (localtrunc) {
914 		if ((fp->f_flag & FWRITE) == 0)
915 			error = EACCES;
916 		else if (vp->v_mount->mnt_flag & MNT_RDONLY)
917 			error = EROFS;
918 		else if (vp->v_type == VDIR)
919 			error = EISDIR;
920 		else if ((error = vn_writechk(vp)) == 0) {
921 			VATTR_NULL(&vattr);
922 			vattr.va_size = 0;
923 			error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
924 		}
925 		if (error) {
926 			VOP_UNLOCK(vp, 0, p);
927 			/* closef will close the file for us. */
928 			fdremove(fdp, indx);
929 			closef(fp, p);
930 			goto out;
931 		}
932 	}
933 	VOP_UNLOCK(vp, 0, p);
934 	if (flags & O_CLOEXEC)
935 		fdp->fd_ofileflags[indx] |= UF_EXCLOSE;
936 	*retval = indx;
937 	FILE_SET_MATURE(fp);
938 out:
939 	fdpunlock(fdp);
940 	return (error);
941 }
942 
943 /*
944  * Get file handle system call
945  */
946 int
947 sys_getfh(struct proc *p, void *v, register_t *retval)
948 {
949 	struct sys_getfh_args /* {
950 		syscallarg(const char *) fname;
951 		syscallarg(fhandle_t *) fhp;
952 	} */ *uap = v;
953 	struct vnode *vp;
954 	fhandle_t fh;
955 	int error;
956 	struct nameidata nd;
957 
958 	/*
959 	 * Must be super user
960 	 */
961 	error = suser(p, 0);
962 	if (error)
963 		return (error);
964 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
965 	    SCARG(uap, fname), p);
966 	error = namei(&nd);
967 	if (error)
968 		return (error);
969 	vp = nd.ni_vp;
970 	bzero(&fh, sizeof(fh));
971 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
972 	error = VFS_VPTOFH(vp, &fh.fh_fid);
973 	vput(vp);
974 	if (error)
975 		return (error);
976 	error = copyout(&fh, SCARG(uap, fhp), sizeof(fh));
977 	return (error);
978 }
979 
980 /*
981  * Open a file given a file handle.
982  *
983  * Check permissions, allocate an open file structure,
984  * and call the device open routine if any.
985  */
986 int
987 sys_fhopen(struct proc *p, void *v, register_t *retval)
988 {
989 	struct sys_fhopen_args /* {
990 		syscallarg(const fhandle_t *) fhp;
991 		syscallarg(int) flags;
992 	} */ *uap = v;
993 	struct filedesc *fdp = p->p_fd;
994 	struct file *fp;
995 	struct vnode *vp = NULL;
996 	struct mount *mp;
997 	struct ucred *cred = p->p_ucred;
998 	int flags;
999 	int type, indx, error=0;
1000 	struct flock lf;
1001 	struct vattr va;
1002 	fhandle_t fh;
1003 
1004 	/*
1005 	 * Must be super user
1006 	 */
1007 	if ((error = suser(p, 0)))
1008 		return (error);
1009 
1010 	flags = FFLAGS(SCARG(uap, flags));
1011 	if ((flags & (FREAD | FWRITE)) == 0)
1012 		return (EINVAL);
1013 	if ((flags & O_CREAT))
1014 		return (EINVAL);
1015 
1016 	fdplock(fdp);
1017 	if ((error = falloc(p, &fp, &indx)) != 0) {
1018 		fp = NULL;
1019 		goto bad;
1020 	}
1021 
1022 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1023 		goto bad;
1024 
1025 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
1026 		error = ESTALE;
1027 		goto bad;
1028 	}
1029 
1030 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1031 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1032 		goto bad;
1033 	}
1034 
1035 	/* Now do an effective vn_open */
1036 
1037 	if (vp->v_type == VSOCK) {
1038 		error = EOPNOTSUPP;
1039 		goto bad;
1040 	}
1041 	if (flags & FREAD) {
1042 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1043 			goto bad;
1044 	}
1045 	if (flags & (FWRITE | O_TRUNC)) {
1046 		if (vp->v_type == VDIR) {
1047 			error = EISDIR;
1048 			goto bad;
1049 		}
1050 		if ((error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0 ||
1051 		    (error = vn_writechk(vp)) != 0)
1052 			goto bad;
1053 	}
1054 	if (flags & O_TRUNC) {
1055 		VATTR_NULL(&va);
1056 		va.va_size = 0;
1057 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1058 			goto bad;
1059 	}
1060 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1061 		goto bad;
1062 	if (flags & FWRITE)
1063 		vp->v_writecount++;
1064 
1065 	/* done with modified vn_open, now finish what sys_open does. */
1066 
1067 	fp->f_flag = flags & FMASK;
1068 	fp->f_type = DTYPE_VNODE;
1069 	fp->f_ops = &vnops;
1070 	fp->f_data = vp;
1071 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1072 		lf.l_whence = SEEK_SET;
1073 		lf.l_start = 0;
1074 		lf.l_len = 0;
1075 		if (flags & O_EXLOCK)
1076 			lf.l_type = F_WRLCK;
1077 		else
1078 			lf.l_type = F_RDLCK;
1079 		type = F_FLOCK;
1080 		if ((flags & FNONBLOCK) == 0)
1081 			type |= F_WAIT;
1082 		VOP_UNLOCK(vp, 0, p);
1083 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1084 		if (error) {
1085 			vp = NULL;	/* closef will vn_close the file */
1086 			goto bad;
1087 		}
1088 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1089 		fp->f_flag |= FHASLOCK;
1090 	}
1091 	VOP_UNLOCK(vp, 0, p);
1092 	*retval = indx;
1093 	FILE_SET_MATURE(fp);
1094 
1095 	fdpunlock(fdp);
1096 	return (0);
1097 
1098 bad:
1099 	if (fp) {
1100 		fdremove(fdp, indx);
1101 		closef(fp, p);
1102 		if (vp != NULL)
1103 			vput(vp);
1104 	}
1105 	fdpunlock(fdp);
1106 	return (error);
1107 }
1108 
1109 /* ARGSUSED */
1110 int
1111 sys_fhstat(struct proc *p, void *v, register_t *retval)
1112 {
1113 	struct sys_fhstat_args /* {
1114 		syscallarg(const fhandle_t *) fhp;
1115 		syscallarg(struct stat *) sb;
1116 	} */ *uap = v;
1117 	struct stat sb;
1118 	int error;
1119 	fhandle_t fh;
1120 	struct mount *mp;
1121 	struct vnode *vp;
1122 
1123 	/*
1124 	 * Must be super user
1125 	 */
1126 	if ((error = suser(p, 0)))
1127 		return (error);
1128 
1129 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1130 		return (error);
1131 
1132 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1133 		return (ESTALE);
1134 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1135 		return (error);
1136 	error = vn_stat(vp, &sb, p);
1137 	vput(vp);
1138 	if (error)
1139 		return (error);
1140 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1141 	return (error);
1142 }
1143 
1144 /* ARGSUSED */
1145 int
1146 sys_fhstatfs(struct proc *p, void *v, register_t *retval)
1147 {
1148 	struct sys_fhstatfs_args /* {
1149 		syscallarg(const fhandle_t *) fhp;
1150 		syscallarg(struct statfs *) buf;
1151 	} */ *uap = v;
1152 	struct statfs *sp;
1153 	fhandle_t fh;
1154 	struct mount *mp;
1155 	struct vnode *vp;
1156 	int error;
1157 
1158 	/*
1159 	 * Must be super user
1160 	 */
1161 	if ((error = suser(p, 0)))
1162 		return (error);
1163 
1164 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1165 		return (error);
1166 
1167 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1168 		return (ESTALE);
1169 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1170 		return (error);
1171 	mp = vp->v_mount;
1172 	sp = &mp->mnt_stat;
1173 	vput(vp);
1174 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
1175 		return (error);
1176 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1177 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
1178 }
1179 
1180 /*
1181  * Create a special file.
1182  */
1183 /* ARGSUSED */
1184 int
1185 sys_mknod(struct proc *p, void *v, register_t *retval)
1186 {
1187 	struct sys_mknod_args /* {
1188 		syscallarg(const char *) path;
1189 		syscallarg(mode_t) mode;
1190 		syscallarg(int) dev;
1191 	} */ *uap = v;
1192 
1193 	return (domknodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1194 	    SCARG(uap, dev), retval));
1195 }
1196 
1197 int
1198 sys_mknodat(struct proc *p, void *v, register_t *retval)
1199 {
1200 	struct sys_mknodat_args /* {
1201 		syscallarg(int) fd;
1202 		syscallarg(const char *) path;
1203 		syscallarg(mode_t) mode;
1204 		syscallarg(dev_t) dev;
1205 	} */ *uap = v;
1206 
1207 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1208 	    SCARG(uap, mode), SCARG(uap, dev), retval));
1209 }
1210 
1211 int
1212 domknodat(struct proc *p, int fd, const char *path, mode_t mode, dev_t dev,
1213     register_t *retval)
1214 {
1215 	struct vnode *vp;
1216 	struct vattr vattr;
1217 	int error;
1218 	struct nameidata nd;
1219 
1220 	if ((error = suser(p, 0)) != 0)
1221 		return (error);
1222 	if (p->p_fd->fd_rdir)
1223 		return (EINVAL);
1224 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1225 	if ((error = namei(&nd)) != 0)
1226 		return (error);
1227 	vp = nd.ni_vp;
1228 	if (vp != NULL)
1229 		error = EEXIST;
1230 	else {
1231 		VATTR_NULL(&vattr);
1232 		vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1233 		vattr.va_rdev = dev;
1234 
1235 		switch (mode & S_IFMT) {
1236 		case S_IFMT:	/* used by badsect to flag bad sectors */
1237 			vattr.va_type = VBAD;
1238 			break;
1239 		case S_IFCHR:
1240 			vattr.va_type = VCHR;
1241 			break;
1242 		case S_IFBLK:
1243 			vattr.va_type = VBLK;
1244 			break;
1245 		default:
1246 			error = EINVAL;
1247 			break;
1248 		}
1249 	}
1250 	if (!error) {
1251 		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1252 	} else {
1253 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1254 		if (nd.ni_dvp == vp)
1255 			vrele(nd.ni_dvp);
1256 		else
1257 			vput(nd.ni_dvp);
1258 		if (vp)
1259 			vrele(vp);
1260 	}
1261 	return (error);
1262 }
1263 
1264 /*
1265  * Create a named pipe.
1266  */
1267 /* ARGSUSED */
1268 int
1269 sys_mkfifo(struct proc *p, void *v, register_t *retval)
1270 {
1271 	struct sys_mkfifo_args /* {
1272 		syscallarg(const char *) path;
1273 		syscallarg(mode_t) mode;
1274 	} */ *uap = v;
1275 
1276 	return (domkfifoat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1277 	    retval));
1278 }
1279 
1280 int
1281 sys_mkfifoat(struct proc *p, void *v, register_t *retval)
1282 {
1283 	struct sys_mkfifoat_args /* {
1284 		syscallarg(int) fd;
1285 		syscallarg(const char *) path;
1286 		syscallarg(mode_t) mode;
1287 	} */ *uap = v;
1288 
1289 	return (domkfifoat(p, SCARG(uap, fd), SCARG(uap, path),
1290 	    SCARG(uap, mode), retval));
1291 }
1292 
1293 int
1294 domkfifoat(struct proc *p, int fd, const char *path, mode_t mode, register_t *retval)
1295 {
1296 #ifndef FIFO
1297 	return (EOPNOTSUPP);
1298 #else
1299 	struct vattr vattr;
1300 	int error;
1301 	struct nameidata nd;
1302 
1303 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1304 	if ((error = namei(&nd)) != 0)
1305 		return (error);
1306 	if (nd.ni_vp != NULL) {
1307 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1308 		if (nd.ni_dvp == nd.ni_vp)
1309 			vrele(nd.ni_dvp);
1310 		else
1311 			vput(nd.ni_dvp);
1312 		vrele(nd.ni_vp);
1313 		return (EEXIST);
1314 	}
1315 	VATTR_NULL(&vattr);
1316 	vattr.va_type = VFIFO;
1317 	vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1318 	return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr));
1319 #endif /* FIFO */
1320 }
1321 
1322 /*
1323  * Make a hard file link.
1324  */
1325 /* ARGSUSED */
1326 int
1327 sys_link(struct proc *p, void *v, register_t *retval)
1328 {
1329 	struct sys_link_args /* {
1330 		syscallarg(const char *) path;
1331 		syscallarg(const char *) link;
1332 	} */ *uap = v;
1333 
1334 	return (dolinkat(p, AT_FDCWD, SCARG(uap, path), AT_FDCWD,
1335 	    SCARG(uap, link), AT_SYMLINK_FOLLOW, retval));
1336 }
1337 
1338 int
1339 sys_linkat(struct proc *p, void *v, register_t *retval)
1340 {
1341 	struct sys_linkat_args /* {
1342 		syscallarg(int) fd1;
1343 		syscallarg(const char *) path1;
1344 		syscallarg(int) fd2;
1345 		syscallarg(const char *) path2;
1346 		syscallarg(int) flag;
1347 	} */ *uap = v;
1348 
1349 	return (dolinkat(p, SCARG(uap, fd1), SCARG(uap, path1),
1350 	    SCARG(uap, fd2), SCARG(uap, path2), SCARG(uap, flag), retval));
1351 }
1352 
1353 int
1354 dolinkat(struct proc *p, int fd1, const char *path1, int fd2,
1355     const char *path2, int flag, register_t *retval)
1356 {
1357 	struct vnode *vp;
1358 	struct nameidata nd;
1359 	int error, follow;
1360 	int flags;
1361 
1362 	if (flag & ~AT_SYMLINK_FOLLOW)
1363 		return (EINVAL);
1364 
1365 	follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
1366 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd1, path1, p);
1367 	if ((error = namei(&nd)) != 0)
1368 		return (error);
1369 	vp = nd.ni_vp;
1370 
1371 	flags = LOCKPARENT;
1372 	if (vp->v_type == VDIR) {
1373 		flags |= STRIPSLASHES;
1374 	}
1375 
1376 	NDINITAT(&nd, CREATE, flags, UIO_USERSPACE, fd2, path2, p);
1377 	if ((error = namei(&nd)) != 0)
1378 		goto out;
1379 	if (nd.ni_vp) {
1380 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1381 		if (nd.ni_dvp == nd.ni_vp)
1382 			vrele(nd.ni_dvp);
1383 		else
1384 			vput(nd.ni_dvp);
1385 		vrele(nd.ni_vp);
1386 		error = EEXIST;
1387 		goto out;
1388 	}
1389 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1390 out:
1391 	vrele(vp);
1392 	return (error);
1393 }
1394 
1395 /*
1396  * Make a symbolic link.
1397  */
1398 /* ARGSUSED */
1399 int
1400 sys_symlink(struct proc *p, void *v, register_t *retval)
1401 {
1402 	struct sys_symlink_args /* {
1403 		syscallarg(const char *) path;
1404 		syscallarg(const char *) link;
1405 	} */ *uap = v;
1406 
1407 	return (dosymlinkat(p, SCARG(uap, path), AT_FDCWD, SCARG(uap, link),
1408 	    retval));
1409 }
1410 
1411 int
1412 sys_symlinkat(struct proc *p, void *v, register_t *retval)
1413 {
1414 	struct sys_symlinkat_args /* {
1415 		syscallarg(const char *) path;
1416 		syscallarg(int) fd;
1417 		syscallarg(const char *) link;
1418 	} */ *uap = v;
1419 
1420 	return (dosymlinkat(p, SCARG(uap, path), SCARG(uap, fd),
1421 	    SCARG(uap, link), retval));
1422 }
1423 
1424 int
1425 dosymlinkat(struct proc *p, const char *upath, int fd, const char *link,
1426     register_t *retval)
1427 {
1428 	struct vattr vattr;
1429 	char *path;
1430 	int error;
1431 	struct nameidata nd;
1432 
1433 	path = pool_get(&namei_pool, PR_WAITOK);
1434 	error = copyinstr(upath, path, MAXPATHLEN, NULL);
1435 	if (error)
1436 		goto out;
1437 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, link, p);
1438 	if ((error = namei(&nd)) != 0)
1439 		goto out;
1440 	if (nd.ni_vp) {
1441 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1442 		if (nd.ni_dvp == nd.ni_vp)
1443 			vrele(nd.ni_dvp);
1444 		else
1445 			vput(nd.ni_dvp);
1446 		vrele(nd.ni_vp);
1447 		error = EEXIST;
1448 		goto out;
1449 	}
1450 	VATTR_NULL(&vattr);
1451 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1452 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1453 out:
1454 	pool_put(&namei_pool, path);
1455 	return (error);
1456 }
1457 
1458 /*
1459  * Delete a name from the filesystem.
1460  */
1461 /* ARGSUSED */
1462 int
1463 sys_unlink(struct proc *p, void *v, register_t *retval)
1464 {
1465 	struct sys_unlink_args /* {
1466 		syscallarg(const char *) path;
1467 	} */ *uap = v;
1468 
1469 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), 0, retval));
1470 }
1471 
1472 int
1473 sys_unlinkat(struct proc *p, void *v, register_t *retval)
1474 {
1475 	struct sys_unlinkat_args /* {
1476 		syscallarg(int) fd;
1477 		syscallarg(const char *) path;
1478 		syscallarg(int) flag;
1479 	} */ *uap = v;
1480 
1481 	return (dounlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1482 	    SCARG(uap, flag), retval));
1483 }
1484 
1485 int
1486 dounlinkat(struct proc *p, int fd, const char *path, int flag,
1487     register_t *retval)
1488 {
1489 	struct vnode *vp;
1490 	int error;
1491 	struct nameidata nd;
1492 
1493 	if (flag & ~AT_REMOVEDIR)
1494 		return (EINVAL);
1495 
1496 	NDINITAT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1497 	    fd, path, p);
1498 	if ((error = namei(&nd)) != 0)
1499 		return (error);
1500 	vp = nd.ni_vp;
1501 
1502 	if (flag & AT_REMOVEDIR) {
1503 		if (vp->v_type != VDIR) {
1504 			error = ENOTDIR;
1505 			goto out;
1506 		}
1507 		/*
1508 		 * No rmdir "." please.
1509 		 */
1510 		if (nd.ni_dvp == vp) {
1511 			error = EBUSY;
1512 			goto out;
1513 		}
1514 	}
1515 
1516 	/*
1517 	 * The root of a mounted filesystem cannot be deleted.
1518 	 */
1519 	if (vp->v_flag & VROOT)
1520 		error = EBUSY;
1521 out:
1522 	if (!error) {
1523 		if (flag & AT_REMOVEDIR) {
1524 			error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1525 		} else {
1526 			(void)uvm_vnp_uncache(vp);
1527 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1528 		}
1529 	} else {
1530 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1531 		if (nd.ni_dvp == vp)
1532 			vrele(nd.ni_dvp);
1533 		else
1534 			vput(nd.ni_dvp);
1535 		vput(vp);
1536 	}
1537 	return (error);
1538 }
1539 
1540 /*
1541  * Reposition read/write file offset.
1542  */
1543 int
1544 sys_lseek(struct proc *p, void *v, register_t *retval)
1545 {
1546 	struct sys_lseek_args /* {
1547 		syscallarg(int) fd;
1548 		syscallarg(int) pad;
1549 		syscallarg(off_t) offset;
1550 		syscallarg(int) whence;
1551 	} */ *uap = v;
1552 	struct ucred *cred = p->p_ucred;
1553 	struct filedesc *fdp = p->p_fd;
1554 	struct file *fp;
1555 	struct vattr vattr;
1556 	struct vnode *vp;
1557 	off_t offarg, newoff;
1558 	int error, special;
1559 
1560 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1561 		return (EBADF);
1562 	if (fp->f_type != DTYPE_VNODE)
1563 		return (ESPIPE);
1564 	vp = (struct vnode *)fp->f_data;
1565 	if (vp->v_type == VFIFO)
1566 		return (ESPIPE);
1567 	FREF(fp);
1568 	if (vp->v_type == VCHR)
1569 		special = 1;
1570 	else
1571 		special = 0;
1572 	offarg = SCARG(uap, offset);
1573 
1574 	switch (SCARG(uap, whence)) {
1575 	case SEEK_CUR:
1576 		newoff = fp->f_offset + offarg;
1577 		break;
1578 	case SEEK_END:
1579 		error = VOP_GETATTR(vp, &vattr, cred, p);
1580 		if (error)
1581 			goto bad;
1582 		newoff = offarg + (off_t)vattr.va_size;
1583 		break;
1584 	case SEEK_SET:
1585 		newoff = offarg;
1586 		break;
1587 	default:
1588 		error = EINVAL;
1589 		goto bad;
1590 	}
1591 	if (!special) {
1592 		if (newoff < 0) {
1593 			error = EINVAL;
1594 			goto bad;
1595 		}
1596 	}
1597 	*(off_t *)retval = fp->f_offset = newoff;
1598 	fp->f_seek++;
1599 	error = 0;
1600  bad:
1601 	FRELE(fp);
1602 	return (error);
1603 }
1604 
1605 /*
1606  * Check access permissions.
1607  */
1608 int
1609 sys_access(struct proc *p, void *v, register_t *retval)
1610 {
1611 	struct sys_access_args /* {
1612 		syscallarg(const char *) path;
1613 		syscallarg(int) flags;
1614 	} */ *uap = v;
1615 
1616 	return (dofaccessat(p, AT_FDCWD, SCARG(uap, path),
1617 	    SCARG(uap, flags), 0, retval));
1618 }
1619 
1620 int
1621 sys_faccessat(struct proc *p, void *v, register_t *retval)
1622 {
1623 	struct sys_faccessat_args /* {
1624 		syscallarg(int) fd;
1625 		syscallarg(const char *) path;
1626 		syscallarg(int) amode;
1627 		syscallarg(int) flag;
1628 	} */ *uap = v;
1629 
1630 	return (dofaccessat(p, SCARG(uap, fd), SCARG(uap, path),
1631 	    SCARG(uap, amode), SCARG(uap, flag), retval));
1632 }
1633 
1634 int
1635 dofaccessat(struct proc *p, int fd, const char *path, int amode, int flag,
1636     register_t *retval)
1637 {
1638 	struct vnode *vp;
1639 	int error;
1640 	struct nameidata nd;
1641 
1642 	if (amode & ~(R_OK | W_OK | X_OK))
1643 		return (EINVAL);
1644 	if (flag & ~AT_EACCESS)
1645 		return (EINVAL);
1646 
1647 	NDINITAT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1648 	if ((error = namei(&nd)) != 0)
1649 		return (error);
1650 	vp = nd.ni_vp;
1651 
1652 	/* Flags == 0 means only check for existence. */
1653 	if (amode) {
1654 		struct ucred *cred = p->p_ucred;
1655 		int vflags = 0;
1656 
1657 		crhold(cred);
1658 
1659 		if (!(flag & AT_EACCESS)) {
1660 			cred = crcopy(cred);
1661 			cred->cr_uid = p->p_cred->p_ruid;
1662 			cred->cr_gid = p->p_cred->p_rgid;
1663 		}
1664 
1665 		if (amode & R_OK)
1666 			vflags |= VREAD;
1667 		if (amode & W_OK)
1668 			vflags |= VWRITE;
1669 		if (amode & X_OK)
1670 			vflags |= VEXEC;
1671 
1672 		error = VOP_ACCESS(vp, vflags, cred, p);
1673 		if (!error && (vflags & VWRITE))
1674 			error = vn_writechk(vp);
1675 
1676 		crfree(cred);
1677 	}
1678 	vput(vp);
1679 	return (error);
1680 }
1681 
1682 /*
1683  * Get file status; this version follows links.
1684  */
1685 /* ARGSUSED */
1686 int
1687 sys_stat(struct proc *p, void *v, register_t *retval)
1688 {
1689 	struct sys_stat_args /* {
1690 		syscallarg(const char *) path;
1691 		syscallarg(struct stat *) ub;
1692 	} */ *uap = v;
1693 
1694 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub), 0,
1695 	    retval));
1696 }
1697 
1698 int
1699 sys_fstatat(struct proc *p, void *v, register_t *retval)
1700 {
1701 	struct sys_fstatat_args /* {
1702 		syscallarg(int) fd;
1703 		syscallarg(const char *) path;
1704 		syscallarg(struct stat *) buf;
1705 		syscallarg(int) flag;
1706 	} */ *uap = v;
1707 
1708 	return (dofstatat(p, SCARG(uap, fd), SCARG(uap, path),
1709 	    SCARG(uap, buf), SCARG(uap, flag), retval));
1710 }
1711 
1712 int
1713 dofstatat(struct proc *p, int fd, const char *path, struct stat *buf,
1714     int flag, register_t *retval)
1715 {
1716 	struct stat sb;
1717 	int error, follow;
1718 	struct nameidata nd;
1719 
1720 	if (flag & ~AT_SYMLINK_NOFOLLOW)
1721 		return (EINVAL);
1722 
1723 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
1724 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1725 	if ((error = namei(&nd)) != 0)
1726 		return (error);
1727 	error = vn_stat(nd.ni_vp, &sb, p);
1728 	vput(nd.ni_vp);
1729 	if (error)
1730 		return (error);
1731 	/* Don't let non-root see generation numbers (for NFS security) */
1732 	if (suser(p, 0))
1733 		sb.st_gen = 0;
1734 	error = copyout(&sb, buf, sizeof(sb));
1735 #ifdef KTRACE
1736 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
1737 		ktrstat(p, &sb);
1738 #endif
1739 	return (error);
1740 }
1741 
1742 /*
1743  * Get file status; this version does not follow links.
1744  */
1745 /* ARGSUSED */
1746 int
1747 sys_lstat(struct proc *p, void *v, register_t *retval)
1748 {
1749 	struct sys_lstat_args /* {
1750 		syscallarg(const char *) path;
1751 		syscallarg(struct stat *) ub;
1752 	} */ *uap = v;
1753 
1754 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub),
1755 	    AT_SYMLINK_NOFOLLOW, retval));
1756 }
1757 
1758 /*
1759  * Get configurable pathname variables.
1760  */
1761 /* ARGSUSED */
1762 int
1763 sys_pathconf(struct proc *p, void *v, register_t *retval)
1764 {
1765 	struct sys_pathconf_args /* {
1766 		syscallarg(const char *) path;
1767 		syscallarg(int) name;
1768 	} */ *uap = v;
1769 	int error;
1770 	struct nameidata nd;
1771 
1772 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1773 	    SCARG(uap, path), p);
1774 	if ((error = namei(&nd)) != 0)
1775 		return (error);
1776 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
1777 	vput(nd.ni_vp);
1778 	return (error);
1779 }
1780 
1781 /*
1782  * Return target name of a symbolic link.
1783  */
1784 /* ARGSUSED */
1785 int
1786 sys_readlink(struct proc *p, void *v, register_t *retval)
1787 {
1788 	struct sys_readlink_args /* {
1789 		syscallarg(const char *) path;
1790 		syscallarg(char *) buf;
1791 		syscallarg(size_t) count;
1792 	} */ *uap = v;
1793 
1794 	return (doreadlinkat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, buf),
1795 	    SCARG(uap, count), retval));
1796 }
1797 
1798 int
1799 sys_readlinkat(struct proc *p, void *v, register_t *retval)
1800 {
1801 	struct sys_readlinkat_args /* {
1802 		syscallarg(int) fd;
1803 		syscallarg(const char *) path;
1804 		syscallarg(char *) buf;
1805 		syscallarg(size_t) count;
1806 	} */ *uap = v;
1807 
1808 	return (doreadlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1809 	    SCARG(uap, buf), SCARG(uap, count), retval));
1810 }
1811 
1812 int
1813 doreadlinkat(struct proc *p, int fd, const char *path, char *buf,
1814     size_t count, register_t *retval)
1815 {
1816 	struct vnode *vp;
1817 	struct iovec aiov;
1818 	struct uio auio;
1819 	int error;
1820 	struct nameidata nd;
1821 
1822 	NDINITAT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1823 	if ((error = namei(&nd)) != 0)
1824 		return (error);
1825 	vp = nd.ni_vp;
1826 	if (vp->v_type != VLNK)
1827 		error = EINVAL;
1828 	else {
1829 		aiov.iov_base = buf;
1830 		aiov.iov_len = count;
1831 		auio.uio_iov = &aiov;
1832 		auio.uio_iovcnt = 1;
1833 		auio.uio_offset = 0;
1834 		auio.uio_rw = UIO_READ;
1835 		auio.uio_segflg = UIO_USERSPACE;
1836 		auio.uio_procp = p;
1837 		auio.uio_resid = count;
1838 		error = VOP_READLINK(vp, &auio, p->p_ucred);
1839 	}
1840 	vput(vp);
1841 	*retval = count - auio.uio_resid;
1842 	return (error);
1843 }
1844 
1845 /*
1846  * Change flags of a file given a path name.
1847  */
1848 /* ARGSUSED */
1849 int
1850 sys_chflags(struct proc *p, void *v, register_t *retval)
1851 {
1852 	struct sys_chflags_args /* {
1853 		syscallarg(const char *) path;
1854 		syscallarg(u_int) flags;
1855 	} */ *uap = v;
1856 	struct vnode *vp;
1857 	struct vattr vattr;
1858 	int error;
1859 	struct nameidata nd;
1860 	u_int flags = SCARG(uap, flags);
1861 
1862 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1863 	if ((error = namei(&nd)) != 0)
1864 		return (error);
1865 	vp = nd.ni_vp;
1866 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1867 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1868 		error = EROFS;
1869 	else if (flags == VNOVAL)
1870 		error = EINVAL;
1871 	else {
1872 		if (suser(p, 0)) {
1873 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
1874 				goto out;
1875 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
1876 				error = EINVAL;
1877 				goto out;
1878 			}
1879 		}
1880 		VATTR_NULL(&vattr);
1881 		vattr.va_flags = flags;
1882 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1883 	}
1884 out:
1885 	vput(vp);
1886 	return (error);
1887 }
1888 
1889 /*
1890  * Change flags of a file given a file descriptor.
1891  */
1892 /* ARGSUSED */
1893 int
1894 sys_fchflags(struct proc *p, void *v, register_t *retval)
1895 {
1896 	struct sys_fchflags_args /* {
1897 		syscallarg(int) fd;
1898 		syscallarg(u_int) flags;
1899 	} */ *uap = v;
1900 	struct vattr vattr;
1901 	struct vnode *vp;
1902 	struct file *fp;
1903 	int error;
1904 	u_int flags = SCARG(uap, flags);
1905 
1906 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
1907 		return (error);
1908 	vp = (struct vnode *)fp->f_data;
1909 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1910 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
1911 		error = EROFS;
1912 	else if (flags == VNOVAL)
1913 		error = EINVAL;
1914 	else {
1915 		if (suser(p, 0)) {
1916 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
1917 			    != 0)
1918 				goto out;
1919 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
1920 				error = EINVAL;
1921 				goto out;
1922 			}
1923 		}
1924 		VATTR_NULL(&vattr);
1925 		vattr.va_flags = flags;
1926 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1927 	}
1928 out:
1929 	VOP_UNLOCK(vp, 0, p);
1930 	FRELE(fp);
1931 	return (error);
1932 }
1933 
1934 /*
1935  * Change mode of a file given path name.
1936  */
1937 /* ARGSUSED */
1938 int
1939 sys_chmod(struct proc *p, void *v, register_t *retval)
1940 {
1941 	struct sys_chmod_args /* {
1942 		syscallarg(const char *) path;
1943 		syscallarg(mode_t) mode;
1944 	} */ *uap = v;
1945 
1946 	return (dofchmodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1947 	    0, retval));
1948 }
1949 
1950 int
1951 sys_fchmodat(struct proc *p, void *v, register_t *retval)
1952 {
1953 	struct sys_fchmodat_args /* {
1954 		syscallarg(int) fd;
1955 		syscallarg(const char *) path;
1956 		syscallarg(mode_t) mode;
1957 		syscallarg(int) flag;
1958 	} */ *uap = v;
1959 
1960 	return (dofchmodat(p, SCARG(uap, fd), SCARG(uap, path),
1961 	    SCARG(uap, mode), SCARG(uap, flag), retval));
1962 }
1963 
1964 int
1965 dofchmodat(struct proc *p, int fd, const char *path, mode_t mode, int flag,
1966     register_t *retval)
1967 {
1968 	struct vnode *vp;
1969 	struct vattr vattr;
1970 	int error, follow;
1971 	struct nameidata nd;
1972 
1973 	if (mode & ~(S_IFMT | ALLPERMS))
1974 		return (EINVAL);
1975 	if (flag & ~AT_SYMLINK_NOFOLLOW)
1976 		return (EINVAL);
1977 
1978 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
1979 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
1980 	if ((error = namei(&nd)) != 0)
1981 		return (error);
1982 	vp = nd.ni_vp;
1983 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1984 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1985 		error = EROFS;
1986 	else {
1987 		VATTR_NULL(&vattr);
1988 		vattr.va_mode = mode & ALLPERMS;
1989 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
1990 	}
1991 	vput(vp);
1992 	return (error);
1993 }
1994 
1995 /*
1996  * Change mode of a file given a file descriptor.
1997  */
1998 /* ARGSUSED */
1999 int
2000 sys_fchmod(struct proc *p, void *v, register_t *retval)
2001 {
2002 	struct sys_fchmod_args /* {
2003 		syscallarg(int) fd;
2004 		syscallarg(mode_t) mode;
2005 	} */ *uap = v;
2006 	struct vattr vattr;
2007 	struct vnode *vp;
2008 	struct file *fp;
2009 	int error;
2010 
2011 	if (SCARG(uap, mode) & ~(S_IFMT | ALLPERMS))
2012 		return (EINVAL);
2013 
2014 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2015 		return (error);
2016 	vp = (struct vnode *)fp->f_data;
2017 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2018 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2019 		error = EROFS;
2020 	else {
2021 		VATTR_NULL(&vattr);
2022 		vattr.va_mode = SCARG(uap, mode) & ALLPERMS;
2023 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2024 	}
2025 	VOP_UNLOCK(vp, 0, p);
2026 	FRELE(fp);
2027 	return (error);
2028 }
2029 
2030 /*
2031  * Set ownership given a path name.
2032  */
2033 /* ARGSUSED */
2034 int
2035 sys_chown(struct proc *p, void *v, register_t *retval)
2036 {
2037 	struct sys_chown_args /* {
2038 		syscallarg(const char *) path;
2039 		syscallarg(uid_t) uid;
2040 		syscallarg(gid_t) gid;
2041 	} */ *uap = v;
2042 
2043 	return (dofchownat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, uid),
2044 	    SCARG(uap, gid), 0, retval));
2045 }
2046 
2047 int
2048 sys_fchownat(struct proc *p, void *v, register_t *retval)
2049 {
2050 	struct sys_fchownat_args /* {
2051 		syscallarg(int) fd;
2052 		syscallarg(const char *) path;
2053 		syscallarg(uid_t) uid;
2054 		syscallarg(gid_t) gid;
2055 		syscallarg(int) flag;
2056 	} */ *uap = v;
2057 
2058 	return (dofchownat(p, SCARG(uap, fd), SCARG(uap, path),
2059 	    SCARG(uap, uid), SCARG(uap, gid), SCARG(uap, flag), retval));
2060 }
2061 
2062 int
2063 dofchownat(struct proc *p, int fd, const char *path, uid_t uid, gid_t gid,
2064     int flag, register_t *retval)
2065 {
2066 	struct vnode *vp;
2067 	struct vattr vattr;
2068 	int error, follow;
2069 	struct nameidata nd;
2070 	mode_t mode;
2071 
2072 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2073 		return (EINVAL);
2074 
2075 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2076 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2077 	if ((error = namei(&nd)) != 0)
2078 		return (error);
2079 	vp = nd.ni_vp;
2080 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2081 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2082 		error = EROFS;
2083 	else {
2084 		if ((uid != -1 || gid != -1) &&
2085 		    (suser(p, 0) || suid_clear)) {
2086 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2087 			if (error)
2088 				goto out;
2089 			mode = vattr.va_mode & ~(VSUID | VSGID);
2090 			if (mode == vattr.va_mode)
2091 				mode = VNOVAL;
2092 		}
2093 		else
2094 			mode = VNOVAL;
2095 		VATTR_NULL(&vattr);
2096 		vattr.va_uid = uid;
2097 		vattr.va_gid = gid;
2098 		vattr.va_mode = mode;
2099 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2100 	}
2101 out:
2102 	vput(vp);
2103 	return (error);
2104 }
2105 
2106 /*
2107  * Set ownership given a path name, without following links.
2108  */
2109 /* ARGSUSED */
2110 int
2111 sys_lchown(struct proc *p, void *v, register_t *retval)
2112 {
2113 	struct sys_lchown_args /* {
2114 		syscallarg(const char *) path;
2115 		syscallarg(uid_t) uid;
2116 		syscallarg(gid_t) gid;
2117 	} */ *uap = v;
2118 	struct vnode *vp;
2119 	struct vattr vattr;
2120 	int error;
2121 	struct nameidata nd;
2122 	mode_t mode;
2123 	uid_t uid = SCARG(uap, uid);
2124 	gid_t gid = SCARG(uap, gid);
2125 
2126 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2127 	if ((error = namei(&nd)) != 0)
2128 		return (error);
2129 	vp = nd.ni_vp;
2130 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2131 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2132 		error = EROFS;
2133 	else {
2134 		if ((uid != -1 || gid != -1) &&
2135 		    (suser(p, 0) || suid_clear)) {
2136 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2137 			if (error)
2138 				goto out;
2139 			mode = vattr.va_mode & ~(VSUID | VSGID);
2140 			if (mode == vattr.va_mode)
2141 				mode = VNOVAL;
2142 		}
2143 		else
2144 			mode = VNOVAL;
2145 		VATTR_NULL(&vattr);
2146 		vattr.va_uid = uid;
2147 		vattr.va_gid = gid;
2148 		vattr.va_mode = mode;
2149 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2150 	}
2151 out:
2152 	vput(vp);
2153 	return (error);
2154 }
2155 
2156 /*
2157  * Set ownership given a file descriptor.
2158  */
2159 /* ARGSUSED */
2160 int
2161 sys_fchown(struct proc *p, void *v, register_t *retval)
2162 {
2163 	struct sys_fchown_args /* {
2164 		syscallarg(int) fd;
2165 		syscallarg(uid_t) uid;
2166 		syscallarg(gid_t) gid;
2167 	} */ *uap = v;
2168 	struct vnode *vp;
2169 	struct vattr vattr;
2170 	int error;
2171 	struct file *fp;
2172 	mode_t mode;
2173 	uid_t uid = SCARG(uap, uid);
2174 	gid_t gid = SCARG(uap, gid);
2175 
2176 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2177 		return (error);
2178 	vp = (struct vnode *)fp->f_data;
2179 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2180 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2181 		error = EROFS;
2182 	else {
2183 		if ((uid != -1 || gid != -1) &&
2184 		    (suser(p, 0) || suid_clear)) {
2185 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2186 			if (error)
2187 				goto out;
2188 			mode = vattr.va_mode & ~(VSUID | VSGID);
2189 			if (mode == vattr.va_mode)
2190 				mode = VNOVAL;
2191 		} else
2192 			mode = VNOVAL;
2193 		VATTR_NULL(&vattr);
2194 		vattr.va_uid = uid;
2195 		vattr.va_gid = gid;
2196 		vattr.va_mode = mode;
2197 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2198 	}
2199 out:
2200 	VOP_UNLOCK(vp, 0, p);
2201 	FRELE(fp);
2202 	return (error);
2203 }
2204 
2205 /*
2206  * Set the access and modification times given a path name.
2207  */
2208 /* ARGSUSED */
2209 int
2210 sys_utimes(struct proc *p, void *v, register_t *retval)
2211 {
2212 	struct sys_utimes_args /* {
2213 		syscallarg(const char *) path;
2214 		syscallarg(const struct timeval *) tptr;
2215 	} */ *uap = v;
2216 
2217 	struct timespec ts[2];
2218 	struct timeval tv[2];
2219 	const struct timeval *tvp;
2220 	int error;
2221 
2222 	tvp = SCARG(uap, tptr);
2223 	if (tvp != NULL) {
2224 		error = copyin(tvp, tv, sizeof(tv));
2225 		if (error)
2226 			return (error);
2227 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2228 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2229 	} else
2230 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2231 
2232 	return (doutimensat(p, AT_FDCWD, SCARG(uap, path), ts, 0, retval));
2233 }
2234 
2235 int
2236 sys_utimensat(struct proc *p, void *v, register_t *retval)
2237 {
2238 	struct sys_utimensat_args /* {
2239 		syscallarg(int) fd;
2240 		syscallarg(const char *) path;
2241 		syscallarg(const struct timespec *) times;
2242 		syscallarg(int) flag;
2243 	} */ *uap = v;
2244 
2245 	struct timespec ts[2];
2246 	const struct timespec *tsp;
2247 	int error;
2248 
2249 	tsp = SCARG(uap, times);
2250 	if (tsp != NULL) {
2251 		error = copyin(tsp, ts, sizeof(ts));
2252 		if (error)
2253 			return (error);
2254 	} else
2255 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2256 
2257 	return (doutimensat(p, SCARG(uap, fd), SCARG(uap, path), ts,
2258 	    SCARG(uap, flag), retval));
2259 }
2260 
2261 int
2262 doutimensat(struct proc *p, int fd, const char *path,
2263     struct timespec ts[2], int flag, register_t *retval)
2264 {
2265 	struct vnode *vp;
2266 	int error, follow;
2267 	struct nameidata nd;
2268 
2269 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2270 		return (EINVAL);
2271 
2272 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2273 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2274 	if ((error = namei(&nd)) != 0)
2275 		return (error);
2276 	vp = nd.ni_vp;
2277 
2278 	return (dovutimens(p, vp, ts, retval));
2279 }
2280 
2281 int
2282 dovutimens(struct proc *p, struct vnode *vp, struct timespec ts[2],
2283     register_t *retval)
2284 {
2285 	struct vattr vattr;
2286 	struct timespec now;
2287 	int error;
2288 
2289 	VATTR_NULL(&vattr);
2290 	if (ts[0].tv_nsec == UTIME_NOW || ts[1].tv_nsec == UTIME_NOW) {
2291 		if (ts[0].tv_nsec == UTIME_NOW && ts[1].tv_nsec == UTIME_NOW)
2292 			vattr.va_vaflags |= VA_UTIMES_NULL;
2293 
2294 		getnanotime(&now);
2295 		if (ts[0].tv_nsec == UTIME_NOW)
2296 			ts[0] = now;
2297 		if (ts[1].tv_nsec == UTIME_NOW)
2298 			ts[1] = now;
2299 	}
2300 
2301 	/*
2302 	 * XXX: Ideally the filesystem code would check tv_nsec ==
2303 	 * UTIME_OMIT instead of tv_sec == VNOVAL, but until then we
2304 	 * need to fudge tv_sec if it happens to equal VNOVAL.
2305 	 */
2306 	if (ts[0].tv_nsec == UTIME_OMIT)
2307 		ts[0].tv_sec = VNOVAL;
2308 	else if (ts[0].tv_sec == VNOVAL)
2309 		ts[0].tv_sec = VNOVAL - 1;
2310 
2311 	if (ts[1].tv_nsec == UTIME_OMIT)
2312 		ts[1].tv_sec = VNOVAL;
2313 	else if (ts[1].tv_sec == VNOVAL)
2314 		ts[1].tv_sec = VNOVAL - 1;
2315 
2316 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2317 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2318 		error = EROFS;
2319 	else {
2320 		vattr.va_atime = ts[0];
2321 		vattr.va_mtime = ts[1];
2322 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2323 	}
2324 	vput(vp);
2325 	return (error);
2326 }
2327 
2328 /*
2329  * Set the access and modification times given a file descriptor.
2330  */
2331 /* ARGSUSED */
2332 int
2333 sys_futimes(struct proc *p, void *v, register_t *retval)
2334 {
2335 	struct sys_futimes_args /* {
2336 		syscallarg(int) fd;
2337 		syscallarg(const struct timeval *) tptr;
2338 	} */ *uap = v;
2339 	struct timeval tv[2];
2340 	struct timespec ts[2];
2341 	const struct timeval *tvp;
2342 	int error;
2343 
2344 	tvp = SCARG(uap, tptr);
2345 	if (tvp != NULL) {
2346 		error = copyin(tvp, tv, sizeof(tv));
2347 		if (error)
2348 			return (error);
2349 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2350 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2351 	} else
2352 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2353 
2354 	return (dofutimens(p, SCARG(uap, fd), ts, retval));
2355 }
2356 
2357 int
2358 sys_futimens(struct proc *p, void *v, register_t *retval)
2359 {
2360 	struct sys_futimens_args /* {
2361 		syscallarg(int) fd;
2362 		syscallarg(const struct timespec *) times;
2363 	} */ *uap = v;
2364 	struct timespec ts[2];
2365 	const struct timespec *tsp;
2366 	int error;
2367 
2368 	tsp = SCARG(uap, times);
2369 	if (tsp != NULL) {
2370 		error = copyin(tsp, ts, sizeof(ts));
2371 		if (error)
2372 			return (error);
2373 	} else
2374 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2375 
2376 	return (dofutimens(p, SCARG(uap, fd), ts, retval));
2377 }
2378 
2379 int
2380 dofutimens(struct proc *p, int fd, struct timespec ts[2], register_t *retval)
2381 {
2382 	struct file *fp;
2383 	struct vnode *vp;
2384 	int error;
2385 
2386 	if ((error = getvnode(p->p_fd, fd, &fp)) != 0)
2387 		return (error);
2388 	vp = (struct vnode *)fp->f_data;
2389 	vref(vp);
2390 	FRELE(fp);
2391 
2392 	return (dovutimens(p, vp, ts, retval));
2393 }
2394 
2395 /*
2396  * Truncate a file given its path name.
2397  */
2398 /* ARGSUSED */
2399 int
2400 sys_truncate(struct proc *p, void *v, register_t *retval)
2401 {
2402 	struct sys_truncate_args /* {
2403 		syscallarg(const char *) path;
2404 		syscallarg(int) pad;
2405 		syscallarg(off_t) length;
2406 	} */ *uap = v;
2407 	struct vnode *vp;
2408 	struct vattr vattr;
2409 	int error;
2410 	struct nameidata nd;
2411 
2412 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2413 	if ((error = namei(&nd)) != 0)
2414 		return (error);
2415 	vp = nd.ni_vp;
2416 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2417 	if (vp->v_type == VDIR)
2418 		error = EISDIR;
2419 	else if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0 &&
2420 	    (error = vn_writechk(vp)) == 0) {
2421 		VATTR_NULL(&vattr);
2422 		vattr.va_size = SCARG(uap, length);
2423 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2424 	}
2425 	vput(vp);
2426 	return (error);
2427 }
2428 
2429 /*
2430  * Truncate a file given a file descriptor.
2431  */
2432 /* ARGSUSED */
2433 int
2434 sys_ftruncate(struct proc *p, void *v, register_t *retval)
2435 {
2436 	struct sys_ftruncate_args /* {
2437 		syscallarg(int) fd;
2438 		syscallarg(int) pad;
2439 		syscallarg(off_t) length;
2440 	} */ *uap = v;
2441 	struct vattr vattr;
2442 	struct vnode *vp;
2443 	struct file *fp;
2444 	off_t len;
2445 	int error;
2446 
2447 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2448 		return (error);
2449 	len = SCARG(uap, length);
2450 	if ((fp->f_flag & FWRITE) == 0 || len < 0) {
2451 		error = EINVAL;
2452 		goto bad;
2453 	}
2454 	vp = (struct vnode *)fp->f_data;
2455 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2456 	if (vp->v_type == VDIR)
2457 		error = EISDIR;
2458 	else if ((error = vn_writechk(vp)) == 0) {
2459 		VATTR_NULL(&vattr);
2460 		vattr.va_size = len;
2461 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2462 	}
2463 	VOP_UNLOCK(vp, 0, p);
2464 bad:
2465 	FRELE(fp);
2466 	return (error);
2467 }
2468 
2469 /*
2470  * Sync an open file.
2471  */
2472 /* ARGSUSED */
2473 int
2474 sys_fsync(struct proc *p, void *v, register_t *retval)
2475 {
2476 	struct sys_fsync_args /* {
2477 		syscallarg(int) fd;
2478 	} */ *uap = v;
2479 	struct vnode *vp;
2480 	struct file *fp;
2481 	int error;
2482 
2483 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2484 		return (error);
2485 	vp = (struct vnode *)fp->f_data;
2486 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2487 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2488 #ifdef FFS_SOFTUPDATES
2489 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2490 		error = softdep_fsync(vp);
2491 #endif
2492 
2493 	VOP_UNLOCK(vp, 0, p);
2494 	FRELE(fp);
2495 	return (error);
2496 }
2497 
2498 /*
2499  * Rename files.  Source and destination must either both be directories,
2500  * or both not be directories.  If target is a directory, it must be empty.
2501  */
2502 /* ARGSUSED */
2503 int
2504 sys_rename(struct proc *p, void *v, register_t *retval)
2505 {
2506 	struct sys_rename_args /* {
2507 		syscallarg(const char *) from;
2508 		syscallarg(const char *) to;
2509 	} */ *uap = v;
2510 
2511 	return (dorenameat(p, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
2512 	    SCARG(uap, to), retval));
2513 }
2514 
2515 int
2516 sys_renameat(struct proc *p, void *v, register_t *retval)
2517 {
2518 	struct sys_renameat_args /* {
2519 		syscallarg(int) fromfd;
2520 		syscallarg(const char *) from;
2521 		syscallarg(int) tofd;
2522 		syscallarg(const char *) to;
2523 	} */ *uap = v;
2524 
2525 	return (dorenameat(p, SCARG(uap, fromfd), SCARG(uap, from),
2526 	    SCARG(uap, tofd), SCARG(uap, to), retval));
2527 }
2528 
2529 int
2530 dorenameat(struct proc *p, int fromfd, const char *from, int tofd,
2531     const char *to, register_t *retval)
2532 {
2533 	struct vnode *tvp, *fvp, *tdvp;
2534 	struct nameidata fromnd, tond;
2535 	int error;
2536 	int flags;
2537 
2538 	NDINITAT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2539 	    fromfd, from, p);
2540 	if ((error = namei(&fromnd)) != 0)
2541 		return (error);
2542 	fvp = fromnd.ni_vp;
2543 
2544 	flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2545 	/*
2546 	 * rename("foo/", "bar/");  is  OK
2547 	 */
2548 	if (fvp->v_type == VDIR)
2549 		flags |= STRIPSLASHES;
2550 
2551 	NDINITAT(&tond, RENAME, flags, UIO_USERSPACE, tofd, to, p);
2552 	if ((error = namei(&tond)) != 0) {
2553 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2554 		vrele(fromnd.ni_dvp);
2555 		vrele(fvp);
2556 		goto out1;
2557 	}
2558 	tdvp = tond.ni_dvp;
2559 	tvp = tond.ni_vp;
2560 	if (tvp != NULL) {
2561 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2562 			error = ENOTDIR;
2563 			goto out;
2564 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2565 			error = EISDIR;
2566 			goto out;
2567 		}
2568 	}
2569 	if (fvp == tdvp)
2570 		error = EINVAL;
2571 	/*
2572 	 * If source is the same as the destination (that is the
2573 	 * same inode number)
2574 	 */
2575 	if (fvp == tvp)
2576 		error = -1;
2577 out:
2578 	if (!error) {
2579 		if (tvp) {
2580 			(void)uvm_vnp_uncache(tvp);
2581 		}
2582 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2583 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2584 	} else {
2585 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
2586 		if (tdvp == tvp)
2587 			vrele(tdvp);
2588 		else
2589 			vput(tdvp);
2590 		if (tvp)
2591 			vput(tvp);
2592 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2593 		vrele(fromnd.ni_dvp);
2594 		vrele(fvp);
2595 	}
2596 	vrele(tond.ni_startdir);
2597 	pool_put(&namei_pool, tond.ni_cnd.cn_pnbuf);
2598 out1:
2599 	if (fromnd.ni_startdir)
2600 		vrele(fromnd.ni_startdir);
2601 	pool_put(&namei_pool, fromnd.ni_cnd.cn_pnbuf);
2602 	if (error == -1)
2603 		return (0);
2604 	return (error);
2605 }
2606 
2607 /*
2608  * Make a directory file.
2609  */
2610 /* ARGSUSED */
2611 int
2612 sys_mkdir(struct proc *p, void *v, register_t *retval)
2613 {
2614 	struct sys_mkdir_args /* {
2615 		syscallarg(const char *) path;
2616 		syscallarg(mode_t) mode;
2617 	} */ *uap = v;
2618 
2619 	return (domkdirat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
2620 	    retval));
2621 }
2622 
2623 int
2624 sys_mkdirat(struct proc *p, void *v, register_t *retval)
2625 {
2626 	struct sys_mkdirat_args /* {
2627 		syscallarg(int) fd;
2628 		syscallarg(const char *) path;
2629 		syscallarg(mode_t) mode;
2630 	} */ *uap = v;
2631 
2632 	return (domkdirat(p, SCARG(uap, fd), SCARG(uap, path),
2633 	    SCARG(uap, mode), retval));
2634 }
2635 
2636 int
2637 domkdirat(struct proc *p, int fd, const char *path, mode_t mode,
2638     register_t *retval)
2639 {
2640 	struct vnode *vp;
2641 	struct vattr vattr;
2642 	int error;
2643 	struct nameidata nd;
2644 
2645 	NDINITAT(&nd, CREATE, LOCKPARENT | STRIPSLASHES, UIO_USERSPACE,
2646 	    fd, path, p);
2647 	if ((error = namei(&nd)) != 0)
2648 		return (error);
2649 	vp = nd.ni_vp;
2650 	if (vp != NULL) {
2651 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2652 		if (nd.ni_dvp == vp)
2653 			vrele(nd.ni_dvp);
2654 		else
2655 			vput(nd.ni_dvp);
2656 		vrele(vp);
2657 		return (EEXIST);
2658 	}
2659 	VATTR_NULL(&vattr);
2660 	vattr.va_type = VDIR;
2661 	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
2662 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2663 	if (!error)
2664 		vput(nd.ni_vp);
2665 	return (error);
2666 }
2667 
2668 /*
2669  * Remove a directory file.
2670  */
2671 /* ARGSUSED */
2672 int
2673 sys_rmdir(struct proc *p, void *v, register_t *retval)
2674 {
2675 	struct sys_rmdir_args /* {
2676 		syscallarg(const char *) path;
2677 	} */ *uap = v;
2678 
2679 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR,
2680 	    retval));
2681 }
2682 
2683 /*
2684  * Read a block of directory entries in a file system independent format.
2685  */
2686 int
2687 getdirentries_internal(struct proc *p, int fd, char *buf, int count,
2688     off_t *basep, register_t *retval)
2689 {
2690 	struct vnode *vp;
2691 	struct file *fp;
2692 	struct uio auio;
2693 	struct iovec aiov;
2694 	int error, eofflag;
2695 
2696 	if (count < 0)
2697 		return EINVAL;
2698 	if ((error = getvnode(p->p_fd, fd, &fp)) != 0)
2699 		return (error);
2700 	if ((fp->f_flag & FREAD) == 0) {
2701 		error = EBADF;
2702 		goto bad;
2703 	}
2704 	if (fp->f_offset < 0) {
2705 		error = EINVAL;
2706 		goto bad;
2707 	}
2708 	vp = (struct vnode *)fp->f_data;
2709 	if (vp->v_type != VDIR) {
2710 		error = EINVAL;
2711 		goto bad;
2712 	}
2713 	aiov.iov_base = buf;
2714 	aiov.iov_len = count;
2715 	auio.uio_iov = &aiov;
2716 	auio.uio_iovcnt = 1;
2717 	auio.uio_rw = UIO_READ;
2718 	auio.uio_segflg = UIO_USERSPACE;
2719 	auio.uio_procp = p;
2720 	auio.uio_resid = count;
2721 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2722 	*basep = auio.uio_offset = fp->f_offset;
2723 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 0, 0);
2724 	fp->f_offset = auio.uio_offset;
2725 	VOP_UNLOCK(vp, 0, p);
2726 	if (error)
2727 		goto bad;
2728 	*retval = count - auio.uio_resid;
2729 bad:
2730 	FRELE(fp);
2731 	return (error);
2732 }
2733 
2734 int
2735 sys_getdirentries(struct proc *p, void *v, register_t *retval)
2736 {
2737 	struct sys_getdirentries_args /* {
2738 		syscallarg(int) fd;
2739 		syscallarg(char *) buf;
2740 		syscallarg(int) count;
2741 		syscallarg(off_t *) basep;
2742 	} */ *uap = v;
2743 	int error;
2744 	off_t off;
2745 
2746 	error = getdirentries_internal(p, SCARG(uap, fd), SCARG(uap, buf),
2747 	    SCARG(uap, count), &off, retval);
2748 	if (!error)
2749 		error = copyout(&off, SCARG(uap, basep), sizeof(off_t));
2750 	return error;
2751 }
2752 
2753 #ifdef COMPAT_O48
2754 int
2755 compat_o48_sys_getdirentries(struct proc *p, void *v, register_t *retval)
2756 {
2757 	struct compat_o48_sys_getdirentries_args /* {
2758 		syscallarg(int) fd;
2759 		syscallarg(char *) buf;
2760 		syscallarg(int) count;
2761 		syscallarg(long *) basep;
2762 	} */ *uap = v;
2763 	int error;
2764 	off_t off;
2765 
2766 	error = getdirentries_internal(p, SCARG(uap, fd), SCARG(uap, buf),
2767 	    SCARG(uap, count), &off, retval);
2768 	if (!error) {
2769 		long loff = (long)off;
2770 		error = copyout(&loff, SCARG(uap, basep), sizeof(long));
2771 	}
2772 	return error;
2773 }
2774 #endif
2775 
2776 /*
2777  * Set the mode mask for creation of filesystem nodes.
2778  */
2779 int
2780 sys_umask(struct proc *p, void *v, register_t *retval)
2781 {
2782 	struct sys_umask_args /* {
2783 		syscallarg(mode_t) newmask;
2784 	} */ *uap = v;
2785 	struct filedesc *fdp;
2786 
2787 	fdp = p->p_fd;
2788 	*retval = fdp->fd_cmask;
2789 	fdp->fd_cmask = SCARG(uap, newmask) & ACCESSPERMS;
2790 	return (0);
2791 }
2792 
2793 /*
2794  * Void all references to file by ripping underlying filesystem
2795  * away from vnode.
2796  */
2797 /* ARGSUSED */
2798 int
2799 sys_revoke(struct proc *p, void *v, register_t *retval)
2800 {
2801 	struct sys_revoke_args /* {
2802 		syscallarg(const char *) path;
2803 	} */ *uap = v;
2804 	struct vnode *vp;
2805 	struct vattr vattr;
2806 	int error;
2807 	struct nameidata nd;
2808 
2809 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2810 	if ((error = namei(&nd)) != 0)
2811 		return (error);
2812 	vp = nd.ni_vp;
2813 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
2814 		goto out;
2815 	if (p->p_ucred->cr_uid != vattr.va_uid &&
2816 	    (error = suser(p, 0)))
2817 		goto out;
2818 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED)))
2819 		VOP_REVOKE(vp, REVOKEALL);
2820 out:
2821 	vrele(vp);
2822 	return (error);
2823 }
2824 
2825 /*
2826  * Convert a user file descriptor to a kernel file entry.
2827  *
2828  * On return *fpp is FREF:ed.
2829  */
2830 int
2831 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
2832 {
2833 	struct file *fp;
2834 	struct vnode *vp;
2835 
2836 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2837 		return (EBADF);
2838 
2839 	if (fp->f_type != DTYPE_VNODE)
2840 		return (EINVAL);
2841 
2842 	vp = (struct vnode *)fp->f_data;
2843 	if (vp->v_type == VBAD)
2844 		return (EBADF);
2845 
2846 	FREF(fp);
2847 	*fpp = fp;
2848 
2849 	return (0);
2850 }
2851 
2852 /*
2853  * Positional read system call.
2854  */
2855 int
2856 sys_pread(struct proc *p, void *v, register_t *retval)
2857 {
2858 	struct sys_pread_args /* {
2859 		syscallarg(int) fd;
2860 		syscallarg(void *) buf;
2861 		syscallarg(size_t) nbyte;
2862 		syscallarg(int) pad;
2863 		syscallarg(off_t) offset;
2864 	} */ *uap = v;
2865 	struct iovec iov;
2866 	struct filedesc *fdp = p->p_fd;
2867 	struct file *fp;
2868 	struct vnode *vp;
2869 	off_t offset;
2870 	int fd = SCARG(uap, fd);
2871 
2872 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2873 		return (EBADF);
2874 	if ((fp->f_flag & FREAD) == 0)
2875 		return (EBADF);
2876 
2877 	vp = (struct vnode *)fp->f_data;
2878 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2879 		return (ESPIPE);
2880 	}
2881 
2882 	iov.iov_base = SCARG(uap, buf);
2883 	iov.iov_len = SCARG(uap, nbyte);
2884 
2885 	offset = SCARG(uap, offset);
2886 
2887 	FREF(fp);
2888 
2889 	/* dofilereadv() will FRELE the descriptor for us */
2890 	return (dofilereadv(p, fd, fp, &iov, 1, 0, &offset, retval));
2891 }
2892 
2893 /*
2894  * Positional scatter read system call.
2895  */
2896 int
2897 sys_preadv(struct proc *p, void *v, register_t *retval)
2898 {
2899 	struct sys_preadv_args /* {
2900 		syscallarg(int) fd;
2901 		syscallarg(const struct iovec *) iovp;
2902 		syscallarg(int) iovcnt;
2903 		syscallarg(int) pad;
2904 		syscallarg(off_t) offset;
2905 	} */ *uap = v;
2906 	struct filedesc *fdp = p->p_fd;
2907 	struct file *fp;
2908 	struct vnode *vp;
2909 	off_t offset;
2910 	int fd = SCARG(uap, fd);
2911 
2912 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2913 		return (EBADF);
2914 	if ((fp->f_flag & FREAD) == 0)
2915 		return (EBADF);
2916 
2917 	vp = (struct vnode *)fp->f_data;
2918 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2919 		return (ESPIPE);
2920 	}
2921 
2922 	FREF(fp);
2923 
2924 	offset = SCARG(uap, offset);
2925 
2926 	/* dofilereadv() will FRELE the descriptor for us */
2927 	return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1,
2928 	    &offset, retval));
2929 }
2930 
2931 /*
2932  * Positional write system call.
2933  */
2934 int
2935 sys_pwrite(struct proc *p, void *v, register_t *retval)
2936 {
2937 	struct sys_pwrite_args /* {
2938 		syscallarg(int) fd;
2939 		syscallarg(const void *) buf;
2940 		syscallarg(size_t) nbyte;
2941 		syscallarg(int) pad;
2942 		syscallarg(off_t) offset;
2943 	} */ *uap = v;
2944 	struct iovec iov;
2945 	struct filedesc *fdp = p->p_fd;
2946 	struct file *fp;
2947 	struct vnode *vp;
2948 	off_t offset;
2949 	int fd = SCARG(uap, fd);
2950 
2951 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2952 		return (EBADF);
2953 	if ((fp->f_flag & FWRITE) == 0)
2954 		return (EBADF);
2955 
2956 	vp = (struct vnode *)fp->f_data;
2957 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2958 		return (ESPIPE);
2959 	}
2960 
2961 	iov.iov_base = (void *)SCARG(uap, buf);
2962 	iov.iov_len = SCARG(uap, nbyte);
2963 
2964 	FREF(fp);
2965 
2966 	offset = SCARG(uap, offset);
2967 
2968 	/* dofilewrite() will FRELE the descriptor for us */
2969 	return (dofilewritev(p, fd, fp, &iov, 1, 0, &offset, retval));
2970 }
2971 
2972 /*
2973  * Positional gather write system call.
2974  */
2975 int
2976 sys_pwritev(struct proc *p, void *v, register_t *retval)
2977 {
2978 	struct sys_pwritev_args /* {
2979 		syscallarg(int) fd;
2980 		syscallarg(const struct iovec *) iovp;
2981 		syscallarg(int) iovcnt;
2982 		syscallarg(int) pad;
2983 		syscallarg(off_t) offset;
2984 	} */ *uap = v;
2985 	struct filedesc *fdp = p->p_fd;
2986 	struct file *fp;
2987 	struct vnode *vp;
2988 	off_t offset;
2989 	int fd = SCARG(uap, fd);
2990 
2991 	if ((fp = fd_getfile(fdp, fd)) == NULL)
2992 		return (EBADF);
2993 	if ((fp->f_flag & FWRITE) == 0)
2994 		return (EBADF);
2995 
2996 	vp = (struct vnode *)fp->f_data;
2997 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2998 		return (ESPIPE);
2999 	}
3000 
3001 	FREF(fp);
3002 
3003 	offset = SCARG(uap, offset);
3004 
3005 	/* dofilewritev() will FRELE the descriptor for us */
3006 	return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
3007 	    1, &offset, retval));
3008 }
3009