xref: /openbsd/sys/kern/vfs_syscalls.c (revision af61481e)
1 /*	$OpenBSD: vfs_syscalls.c,v 1.370 2024/11/05 06:03:19 jsg Exp $	*/
2 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_syscalls.c	8.28 (Berkeley) 12/10/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/filedesc.h>
44 #include <sys/conf.h>
45 #include <sys/fcntl.h>
46 #include <sys/file.h>
47 #include <sys/stat.h>
48 #include <sys/lock.h>
49 #include <sys/vnode.h>
50 #include <sys/mount.h>
51 #include <sys/proc.h>
52 #include <sys/pledge.h>
53 #include <sys/uio.h>
54 #include <sys/malloc.h>
55 #include <sys/pool.h>
56 #include <sys/ktrace.h>
57 #include <sys/unistd.h>
58 #include <sys/specdev.h>
59 #include <sys/resourcevar.h>
60 #include <sys/signalvar.h>
61 
62 #include <sys/syscallargs.h>
63 
64 extern int suid_clear;
65 
66 static int change_dir(struct nameidata *, struct proc *);
67 
68 void checkdirs(struct vnode *);
69 
70 int copyout_statfs(struct statfs *, void *, struct proc *);
71 
72 int doopenat(struct proc *, int, const char *, int, mode_t, register_t *);
73 int domknodat(struct proc *, int, const char *, mode_t, dev_t);
74 int dolinkat(struct proc *, int, const char *, int, const char *, int);
75 int dosymlinkat(struct proc *, const char *, int, const char *);
76 int dounlinkat(struct proc *, int, const char *, int);
77 int dofaccessat(struct proc *, int, const char *, int, int);
78 int dofstatat(struct proc *, int, const char *, struct stat *, int);
79 int dopathconfat(struct proc *, int, const char *, int, int, register_t *);
80 int doreadlinkat(struct proc *, int, const char *, char *, size_t,
81     register_t *);
82 int dochflagsat(struct proc *, int, const char *, u_int, int);
83 int dovchflags(struct proc *, struct vnode *, u_int);
84 int dofchmodat(struct proc *, int, const char *, mode_t, int);
85 int dofchownat(struct proc *, int, const char *, uid_t, gid_t, int);
86 int dorenameat(struct proc *, int, const char *, int, const char *);
87 int domkdirat(struct proc *, int, const char *, mode_t);
88 int doutimensat(struct proc *, int, const char *, struct timespec [2], int);
89 int dovutimens(struct proc *, struct vnode *, struct timespec [2]);
90 int dofutimens(struct proc *, int, struct timespec [2]);
91 int dounmount_leaf(struct mount *, int, struct proc *);
92 
93 /*
94  * Virtual File System System Calls
95  */
96 
97 /*
98  * Mount a file system.
99  */
100 int
sys_mount(struct proc * p,void * v,register_t * retval)101 sys_mount(struct proc *p, void *v, register_t *retval)
102 {
103 	struct sys_mount_args /* {
104 		syscallarg(const char *) type;
105 		syscallarg(const char *) path;
106 		syscallarg(int) flags;
107 		syscallarg(void *) data;
108 	} */ *uap = v;
109 	struct vnode *vp;
110 	struct mount *mp;
111 	int error, mntflag = 0;
112 	char fstypename[MFSNAMELEN];
113 	char fspath[MNAMELEN];
114 	struct nameidata nd;
115 	struct vfsconf *vfsp;
116 	int flags = SCARG(uap, flags);
117 	void *args = NULL;
118 
119 	if ((error = suser(p)))
120 		return (error);
121 
122 	/*
123 	 * Mount points must fit in MNAMELEN, not MAXPATHLEN.
124 	 */
125 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
126 	if (error)
127 		return(error);
128 
129 	/*
130 	 * Get vnode to be covered
131 	 */
132 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
133 	if ((error = namei(&nd)) != 0)
134 		goto fail;
135 	vp = nd.ni_vp;
136 	if (flags & MNT_UPDATE) {
137 		if ((vp->v_flag & VROOT) == 0) {
138 			vput(vp);
139 			error = EINVAL;
140 			goto fail;
141 		}
142 		mp = vp->v_mount;
143 		vfsp = mp->mnt_vfc;
144 
145 		args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
146 		error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
147 		if (error) {
148 			vput(vp);
149 			goto fail;
150 		}
151 
152 		mntflag = mp->mnt_flag;
153 		/*
154 		 * We only allow the filesystem to be reloaded if it
155 		 * is currently mounted read-only.
156 		 */
157 		if ((flags & MNT_RELOAD) &&
158 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
159 			vput(vp);
160 			error = EOPNOTSUPP;	/* Needs translation */
161 			goto fail;
162 		}
163 
164 		if ((error = vfs_busy(mp, VB_READ|VB_NOWAIT)) != 0) {
165 			vput(vp);
166 			goto fail;
167 		}
168 		mp->mnt_flag |= flags & (MNT_RELOAD | MNT_UPDATE);
169 		goto update;
170 	}
171 	/*
172 	 * Do not allow disabling of permission checks unless exec and access to
173 	 * device files is disabled too.
174 	 */
175 	if ((flags & MNT_NOPERM) &&
176 	    (flags & (MNT_NODEV | MNT_NOEXEC)) != (MNT_NODEV | MNT_NOEXEC)) {
177 		vput(vp);
178 		error = EPERM;
179 		goto fail;
180 	}
181 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, INFSLP)) != 0) {
182 		vput(vp);
183 		goto fail;
184 	}
185 	if (vp->v_type != VDIR) {
186 		vput(vp);
187 		goto fail;
188 	}
189 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
190 	if (error) {
191 		vput(vp);
192 		goto fail;
193 	}
194 	vfsp = vfs_byname(fstypename);
195 	if (vfsp == NULL) {
196 		vput(vp);
197 		error = EOPNOTSUPP;
198 		goto fail;
199 	}
200 
201 	args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
202 	error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
203 	if (error) {
204 		vput(vp);
205 		goto fail;
206 	}
207 
208 	if (vp->v_mountedhere != NULL) {
209 		vput(vp);
210 		error = EBUSY;
211 		goto fail;
212 	}
213 
214 	/*
215 	 * Allocate and initialize the file system.
216 	 */
217 	mp = vfs_mount_alloc(vp, vfsp);
218 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
219 
220 update:
221 	/* Ensure that the parent mountpoint does not get unmounted. */
222 	error = vfs_busy(vp->v_mount, VB_READ|VB_NOWAIT|VB_DUPOK);
223 	if (error) {
224 		if (mp->mnt_flag & MNT_UPDATE) {
225 			mp->mnt_flag = mntflag;
226 			vfs_unbusy(mp);
227 		} else {
228 			vfs_unbusy(mp);
229 			vfs_mount_free(mp);
230 		}
231 		vput(vp);
232 		goto fail;
233 	}
234 
235 	/*
236 	 * Set the mount level flags.
237 	 */
238 	if (flags & MNT_RDONLY)
239 		mp->mnt_flag |= MNT_RDONLY;
240 	else if (mp->mnt_flag & MNT_RDONLY)
241 		mp->mnt_flag |= MNT_WANTRDWR;
242 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED | MNT_NODEV |
243 	    MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | MNT_NOPERM | MNT_FORCE);
244 	mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED |
245 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | MNT_NOPERM |
246 	    MNT_FORCE);
247 	/*
248 	 * Mount the filesystem.
249 	 */
250 	error = VFS_MOUNT(mp, fspath, args, &nd, p);
251 	if (!error) {
252 		mp->mnt_stat.f_ctime = gettime();
253 	}
254 	if (mp->mnt_flag & MNT_UPDATE) {
255 		vfs_unbusy(vp->v_mount);
256 		vput(vp);
257 		if (mp->mnt_flag & MNT_WANTRDWR)
258 			mp->mnt_flag &= ~MNT_RDONLY;
259 		mp->mnt_flag &= ~MNT_OP_FLAGS;
260 		if (error)
261 			mp->mnt_flag = mntflag;
262 
263 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
264 			if (mp->mnt_syncer == NULL)
265 				error = vfs_allocate_syncvnode(mp);
266 		} else {
267 			if (mp->mnt_syncer != NULL)
268 				vgone(mp->mnt_syncer);
269 			mp->mnt_syncer = NULL;
270 		}
271 
272 		vfs_unbusy(mp);
273 		goto fail;
274 	}
275 
276 	mp->mnt_flag &= ~MNT_OP_FLAGS;
277 	vp->v_mountedhere = mp;
278 
279 	/*
280 	 * Put the new filesystem on the mount list after root.
281 	 */
282 	cache_purge(vp);
283 	if (!error) {
284 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
285 		checkdirs(vp);
286 		vfs_unbusy(vp->v_mount);
287 		VOP_UNLOCK(vp);
288 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
289 			error = vfs_allocate_syncvnode(mp);
290 		vfs_unbusy(mp);
291 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
292 		if ((error = VFS_START(mp, 0, p)) != 0)
293 			vrele(vp);
294 	} else {
295 		mp->mnt_vnodecovered->v_mountedhere = NULL;
296 		vfs_unbusy(mp);
297 		vfs_mount_free(mp);
298 		vfs_unbusy(vp->v_mount);
299 		vput(vp);
300 	}
301 fail:
302 	if (args)
303 		free(args, M_TEMP, vfsp->vfc_datasize);
304 	return (error);
305 }
306 
307 /*
308  * Scan all active processes to see if any of them have a current
309  * or root directory onto which the new filesystem has just been
310  * mounted. If so, replace them with the new mount point, keeping
311  * track of how many were replaced.  That's the number of references
312  * the old vnode had that we've replaced, so finish by vrele()'ing
313  * it that many times.  This puts off any possible sleeping until
314  * we've finished walking the allprocess list.
315  */
316 void
checkdirs(struct vnode * olddp)317 checkdirs(struct vnode *olddp)
318 {
319 	struct filedesc *fdp;
320 	struct vnode *newdp;
321 	struct process *pr;
322 	u_int  free_count = 0;
323 
324 	if (olddp->v_usecount == 1)
325 		return;
326 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
327 		panic("mount: lost mount");
328 	LIST_FOREACH(pr, &allprocess, ps_list) {
329 		fdp = pr->ps_fd;
330 		if (fdp->fd_cdir == olddp) {
331 			free_count++;
332 			vref(newdp);
333 			fdp->fd_cdir = newdp;
334 		}
335 		if (fdp->fd_rdir == olddp) {
336 			free_count++;
337 			vref(newdp);
338 			fdp->fd_rdir = newdp;
339 		}
340 	}
341 	if (rootvnode == olddp) {
342 		free_count++;
343 		vref(newdp);
344 		rootvnode = newdp;
345 	}
346 	while (free_count-- > 0)
347 		vrele(olddp);
348 	vput(newdp);
349 }
350 
351 /*
352  * Unmount a file system.
353  *
354  * Note: unmount takes a path to the vnode mounted on as argument,
355  * not special file (as before).
356  */
357 int
sys_unmount(struct proc * p,void * v,register_t * retval)358 sys_unmount(struct proc *p, void *v, register_t *retval)
359 {
360 	struct sys_unmount_args /* {
361 		syscallarg(const char *) path;
362 		syscallarg(int) flags;
363 	} */ *uap = v;
364 	struct vnode *vp;
365 	struct mount *mp;
366 	int error;
367 	struct nameidata nd;
368 
369 	if ((error = suser(p)) != 0)
370 		return (error);
371 
372 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
373 	    SCARG(uap, path), p);
374 	if ((error = namei(&nd)) != 0)
375 		return (error);
376 	vp = nd.ni_vp;
377 	mp = vp->v_mount;
378 
379 	/*
380 	 * Don't allow unmounting the root file system.
381 	 */
382 	if (mp->mnt_flag & MNT_ROOTFS) {
383 		vput(vp);
384 		return (EINVAL);
385 	}
386 
387 	/*
388 	 * Must be the root of the filesystem
389 	 */
390 	if ((vp->v_flag & VROOT) == 0) {
391 		vput(vp);
392 		return (EINVAL);
393 	}
394 	vput(vp);
395 
396 	if (vfs_busy(mp, VB_WRITE|VB_WAIT))
397 		return (EBUSY);
398 
399 	return (dounmount(mp, SCARG(uap, flags) & MNT_FORCE, p));
400 }
401 
402 /*
403  * Do the actual file system unmount.
404  */
405 int
dounmount(struct mount * mp,int flags,struct proc * p)406 dounmount(struct mount *mp, int flags, struct proc *p)
407 {
408 	SLIST_HEAD(, mount) mplist;
409 	struct mount *nmp;
410 	int error;
411 
412 	SLIST_INIT(&mplist);
413 	SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
414 
415 	/*
416 	 * Collect nested mount points. This takes advantage of the mount list
417 	 * being ordered - nested mount points come after their parent.
418 	 */
419 	while ((mp = TAILQ_NEXT(mp, mnt_list)) != NULL) {
420 		SLIST_FOREACH(nmp, &mplist, mnt_dounmount) {
421 			if (mp->mnt_vnodecovered == NULLVP ||
422 			    mp->mnt_vnodecovered->v_mount != nmp)
423 				continue;
424 
425 			if ((flags & MNT_FORCE) == 0) {
426 				error = EBUSY;
427 				goto err;
428 			}
429 			error = vfs_busy(mp, VB_WRITE|VB_WAIT|VB_DUPOK);
430 			if (error) {
431 				if ((flags & MNT_DOOMED)) {
432 					/*
433 					 * If the mount point was busy due to
434 					 * being unmounted, it has been removed
435 					 * from the mount list already.
436 					 * Restart the iteration from the last
437 					 * collected busy entry.
438 					 */
439 					mp = SLIST_FIRST(&mplist);
440 					break;
441 				}
442 				goto err;
443 			}
444 			SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
445 			break;
446 		}
447 	}
448 
449 	/*
450 	 * Nested mount points cannot appear during this loop as mounting
451 	 * requires a read lock for the parent mount point.
452 	 */
453 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
454 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
455 		error = dounmount_leaf(mp, flags, p);
456 		if (error)
457 			goto err;
458 	}
459 	return (0);
460 
461 err:
462 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
463 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
464 		vfs_unbusy(mp);
465 	}
466 	return (error);
467 }
468 
469 int
dounmount_leaf(struct mount * mp,int flags,struct proc * p)470 dounmount_leaf(struct mount *mp, int flags, struct proc *p)
471 {
472 	struct vnode *coveredvp;
473 	struct vnode *vp, *nvp;
474 	int error;
475 	int hadsyncer = 0;
476 
477 	mp->mnt_flag &=~ MNT_ASYNC;
478 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
479 	if (mp->mnt_syncer != NULL) {
480 		hadsyncer = 1;
481 		vgone(mp->mnt_syncer);
482 		mp->mnt_syncer = NULL;
483 	}
484 
485 	/*
486 	 * Before calling file system unmount, make sure
487 	 * all unveils to vnodes in here are dropped.
488 	 */
489 	TAILQ_FOREACH_SAFE(vp , &mp->mnt_vnodelist, v_mntvnodes, nvp) {
490 		unveil_removevnode(vp);
491 	}
492 
493 	if (((mp->mnt_flag & MNT_RDONLY) ||
494 	    (error = VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p)) == 0) ||
495 	    (flags & MNT_FORCE))
496 		error = VFS_UNMOUNT(mp, flags, p);
497 
498 	if (error && !(flags & MNT_DOOMED)) {
499 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && hadsyncer)
500 			(void) vfs_allocate_syncvnode(mp);
501 		vfs_unbusy(mp);
502 		return (error);
503 	}
504 
505 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
506 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
507 		coveredvp->v_mountedhere = NULL;
508 		vrele(coveredvp);
509 	}
510 
511 	if (!TAILQ_EMPTY(&mp->mnt_vnodelist))
512 		panic("unmount: dangling vnode");
513 
514 	vfs_unbusy(mp);
515 	vfs_mount_free(mp);
516 
517 	return (0);
518 }
519 
520 /*
521  * Sync each mounted filesystem.
522  */
523 int
sys_sync(struct proc * p,void * v,register_t * retval)524 sys_sync(struct proc *p, void *v, register_t *retval)
525 {
526 	struct mount *mp;
527 	int asyncflag;
528 
529 	TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
530 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
531 			continue;
532 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
533 			asyncflag = mp->mnt_flag & MNT_ASYNC;
534 			mp->mnt_flag &= ~MNT_ASYNC;
535 			uvm_vnp_sync(mp);
536 			VFS_SYNC(mp, MNT_NOWAIT, 0, p->p_ucred, p);
537 			if (asyncflag)
538 				mp->mnt_flag |= MNT_ASYNC;
539 		}
540 		vfs_unbusy(mp);
541 	}
542 
543 	return (0);
544 }
545 
546 /*
547  * Change filesystem quotas.
548  */
549 int
sys_quotactl(struct proc * p,void * v,register_t * retval)550 sys_quotactl(struct proc *p, void *v, register_t *retval)
551 {
552 	struct sys_quotactl_args /* {
553 		syscallarg(const char *) path;
554 		syscallarg(int) cmd;
555 		syscallarg(int) uid;
556 		syscallarg(char *) arg;
557 	} */ *uap = v;
558 	struct mount *mp;
559 	int error;
560 	struct nameidata nd;
561 
562 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
563 	if ((error = namei(&nd)) != 0)
564 		return (error);
565 	mp = nd.ni_vp->v_mount;
566 	vrele(nd.ni_vp);
567 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
568 	    SCARG(uap, arg), p));
569 }
570 
571 int
copyout_statfs(struct statfs * sp,void * uaddr,struct proc * p)572 copyout_statfs(struct statfs *sp, void *uaddr, struct proc *p)
573 {
574 	size_t co_sz1 = offsetof(struct statfs, f_fsid);
575 	size_t co_off2 = co_sz1 + sizeof(fsid_t);
576 	size_t co_sz2 = sizeof(struct statfs) - co_off2;
577 	char *s, *d;
578 	int error;
579 
580 	/* Don't let non-root see filesystem id (for NFS security) */
581 	if (suser(p)) {
582 		fsid_t fsid;
583 
584 		s = (char *)sp;
585 		d = (char *)uaddr;
586 
587 		memset(&fsid, 0, sizeof(fsid));
588 
589 		if ((error = copyout(s, d, co_sz1)) != 0)
590 			return (error);
591 		if ((error = copyout(&fsid, d + co_sz1, sizeof(fsid))) != 0)
592 			return (error);
593 		return (copyout(s + co_off2, d + co_off2, co_sz2));
594 	}
595 
596 	return (copyout(sp, uaddr, sizeof(*sp)));
597 }
598 
599 /*
600  * Get filesystem statistics.
601  */
602 int
sys_statfs(struct proc * p,void * v,register_t * retval)603 sys_statfs(struct proc *p, void *v, register_t *retval)
604 {
605 	struct sys_statfs_args /* {
606 		syscallarg(const char *) path;
607 		syscallarg(struct statfs *) buf;
608 	} */ *uap = v;
609 	struct mount *mp;
610 	struct statfs *sp;
611 	int error;
612 	struct nameidata nd;
613 
614 	NDINIT(&nd, LOOKUP, FOLLOW | BYPASSUNVEIL, UIO_USERSPACE,
615 	    SCARG(uap, path), p);
616 	nd.ni_pledge = PLEDGE_RPATH;
617 	nd.ni_unveil = UNVEIL_READ;
618 	if ((error = namei(&nd)) != 0)
619 		return (error);
620 	mp = nd.ni_vp->v_mount;
621 	sp = &mp->mnt_stat;
622 	vrele(nd.ni_vp);
623 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
624 		return (error);
625 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
626 
627 	return (copyout_statfs(sp, SCARG(uap, buf), p));
628 }
629 
630 /*
631  * Get filesystem statistics.
632  */
633 int
sys_fstatfs(struct proc * p,void * v,register_t * retval)634 sys_fstatfs(struct proc *p, void *v, register_t *retval)
635 {
636 	struct sys_fstatfs_args /* {
637 		syscallarg(int) fd;
638 		syscallarg(struct statfs *) buf;
639 	} */ *uap = v;
640 	struct file *fp;
641 	struct mount *mp;
642 	struct statfs *sp;
643 	int error;
644 
645 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
646 		return (error);
647 	mp = ((struct vnode *)fp->f_data)->v_mount;
648 	if (!mp) {
649 		FRELE(fp, p);
650 		return (ENOENT);
651 	}
652 	sp = &mp->mnt_stat;
653 	error = VFS_STATFS(mp, sp, p);
654 	FRELE(fp, p);
655 	if (error)
656 		return (error);
657 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
658 
659 	return (copyout_statfs(sp, SCARG(uap, buf), p));
660 }
661 
662 /*
663  * Get statistics on all filesystems.
664  */
665 int
sys_getfsstat(struct proc * p,void * v,register_t * retval)666 sys_getfsstat(struct proc *p, void *v, register_t *retval)
667 {
668 	struct sys_getfsstat_args /* {
669 		syscallarg(struct statfs *) buf;
670 		syscallarg(size_t) bufsize;
671 		syscallarg(int) flags;
672 	} */ *uap = v;
673 	struct mount *mp;
674 	struct statfs *sp;
675 	struct statfs *sfsp;
676 	size_t count, maxcount;
677 	int error, flags = SCARG(uap, flags);
678 
679 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
680 	sfsp = SCARG(uap, buf);
681 	count = 0;
682 
683 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
684 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
685 			continue;
686 		if (sfsp && count < maxcount) {
687 			sp = &mp->mnt_stat;
688 
689 			/* Refresh stats unless MNT_NOWAIT is specified */
690 			if (flags != MNT_NOWAIT &&
691 			    flags != MNT_LAZY &&
692 			    (flags == MNT_WAIT ||
693 			    flags == 0) &&
694 			    (error = VFS_STATFS(mp, sp, p))) {
695 				vfs_unbusy(mp);
696 				continue;
697 			}
698 
699 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
700 			error = (copyout_statfs(sp, sfsp, p));
701 			if (error) {
702 				vfs_unbusy(mp);
703 				return (error);
704 			}
705 			sfsp++;
706 		}
707 		count++;
708 		vfs_unbusy(mp);
709 	}
710 
711 	if (sfsp && count > maxcount)
712 		*retval = maxcount;
713 	else
714 		*retval = count;
715 
716 	return (0);
717 }
718 
719 /*
720  * Change current working directory to a given file descriptor.
721  */
722 int
sys_fchdir(struct proc * p,void * v,register_t * retval)723 sys_fchdir(struct proc *p, void *v, register_t *retval)
724 {
725 	struct sys_fchdir_args /* {
726 		syscallarg(int) fd;
727 	} */ *uap = v;
728 	struct filedesc *fdp = p->p_fd;
729 	struct vnode *vp, *tdp, *old_cdir;
730 	struct mount *mp;
731 	struct file *fp;
732 	int error;
733 
734 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
735 		return (EBADF);
736 	vp = fp->f_data;
737 	if (fp->f_type != DTYPE_VNODE || vp->v_type != VDIR) {
738 		FRELE(fp, p);
739 		return (ENOTDIR);
740 	}
741 	vref(vp);
742 	FRELE(fp, p);
743 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
744 	error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
745 
746 	while (!error && (mp = vp->v_mountedhere) != NULL) {
747 		if (vfs_busy(mp, VB_READ|VB_WAIT))
748 			continue;
749 		error = VFS_ROOT(mp, &tdp);
750 		vfs_unbusy(mp);
751 		if (error)
752 			break;
753 		vput(vp);
754 		vp = tdp;
755 	}
756 	if (error) {
757 		vput(vp);
758 		return (error);
759 	}
760 	VOP_UNLOCK(vp);
761 	old_cdir = fdp->fd_cdir;
762 	fdp->fd_cdir = vp;
763 	vrele(old_cdir);
764 	return (0);
765 }
766 
767 /*
768  * Change current working directory (``.'').
769  */
770 int
sys_chdir(struct proc * p,void * v,register_t * retval)771 sys_chdir(struct proc *p, void *v, register_t *retval)
772 {
773 	struct sys_chdir_args /* {
774 		syscallarg(const char *) path;
775 	} */ *uap = v;
776 	struct filedesc *fdp = p->p_fd;
777 	struct vnode *old_cdir;
778 	int error;
779 	struct nameidata nd;
780 
781 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
782 	    SCARG(uap, path), p);
783 	nd.ni_pledge = PLEDGE_RPATH;
784 	nd.ni_unveil = UNVEIL_READ;
785 	if ((error = change_dir(&nd, p)) != 0)
786 		return (error);
787 	old_cdir = fdp->fd_cdir;
788 	fdp->fd_cdir = nd.ni_vp;
789 	vrele(old_cdir);
790 	return (0);
791 }
792 
793 /*
794  * Change notion of root (``/'') directory.
795  */
796 int
sys_chroot(struct proc * p,void * v,register_t * retval)797 sys_chroot(struct proc *p, void *v, register_t *retval)
798 {
799 	struct sys_chroot_args /* {
800 		syscallarg(const char *) path;
801 	} */ *uap = v;
802 	struct filedesc *fdp = p->p_fd;
803 	struct vnode *old_cdir, *old_rdir;
804 	int error;
805 	struct nameidata nd;
806 
807 	if ((error = suser(p)) != 0)
808 		return (error);
809 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
810 	    SCARG(uap, path), p);
811 	if ((error = change_dir(&nd, p)) != 0)
812 		return (error);
813 	if (fdp->fd_rdir != NULL) {
814 		/*
815 		 * A chroot() done inside a changed root environment does
816 		 * an automatic chdir to avoid the out-of-tree experience.
817 		 */
818 		vref(nd.ni_vp);
819 		old_rdir = fdp->fd_rdir;
820 		old_cdir = fdp->fd_cdir;
821 		fdp->fd_rdir = fdp->fd_cdir = nd.ni_vp;
822 		vrele(old_rdir);
823 		vrele(old_cdir);
824 	} else
825 		fdp->fd_rdir = nd.ni_vp;
826 	atomic_setbits_int(&p->p_p->ps_flags, PS_CHROOT);
827 	return (0);
828 }
829 
830 /*
831  * Common routine for chroot and chdir.
832  */
833 static int
change_dir(struct nameidata * ndp,struct proc * p)834 change_dir(struct nameidata *ndp, struct proc *p)
835 {
836 	struct vnode *vp;
837 	int error;
838 
839 	if ((error = namei(ndp)) != 0)
840 		return (error);
841 	vp = ndp->ni_vp;
842 	if (vp->v_type != VDIR)
843 		error = ENOTDIR;
844 	else
845 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
846 	if (error)
847 		vput(vp);
848 	else
849 		VOP_UNLOCK(vp);
850 	return (error);
851 }
852 
853 int
sys___realpath(struct proc * p,void * v,register_t * retval)854 sys___realpath(struct proc *p, void *v, register_t *retval)
855 {
856 	struct sys___realpath_args /* {
857 		syscallarg(const char *) pathname;
858 		syscallarg(char *) resolved;
859 	} */ *uap = v;
860 	char *pathname;
861 	char *rpbuf;
862 	struct nameidata nd;
863 	size_t pathlen;
864 	int error = 0;
865 
866 	if (SCARG(uap, pathname) == NULL)
867 		return (EINVAL);
868 
869 	pathname = pool_get(&namei_pool, PR_WAITOK);
870 	rpbuf = pool_get(&namei_pool, PR_WAITOK);
871 
872 	if ((error = copyinstr(SCARG(uap, pathname), pathname, MAXPATHLEN,
873 	    &pathlen)))
874 		goto end;
875 
876 	if (pathlen == 1) { /* empty string "" */
877 		error = ENOENT;
878 		goto end;
879 	}
880 	if (pathlen < 2) {
881 		error = EINVAL;
882 		goto end;
883 	}
884 
885 	/* Get cwd for relative path if needed, prepend to rpbuf */
886 	rpbuf[0] = '\0';
887 	if (pathname[0] != '/') {
888 		int cwdlen = MAXPATHLEN * 4; /* for vfs_getcwd_common */
889 		char *cwdbuf, *bp;
890 
891 		cwdbuf = malloc(cwdlen, M_TEMP, M_WAITOK);
892 
893 		/* vfs_getcwd_common fills this in backwards */
894 		bp = &cwdbuf[cwdlen - 1];
895 		*bp = '\0';
896 
897 		error = vfs_getcwd_common(p->p_fd->fd_cdir, NULL, &bp, cwdbuf,
898 		    cwdlen/2, GETCWD_CHECK_ACCESS, p);
899 
900 		if (error) {
901 			free(cwdbuf, M_TEMP, cwdlen);
902 			goto end;
903 		}
904 
905 		if (strlcpy(rpbuf, bp, MAXPATHLEN) >= MAXPATHLEN) {
906 			free(cwdbuf, M_TEMP, cwdlen);
907 			error = ENAMETOOLONG;
908 			goto end;
909 		}
910 
911 		free(cwdbuf, M_TEMP, cwdlen);
912 	}
913 
914 	NDINIT(&nd, LOOKUP, FOLLOW | SAVENAME | REALPATH, UIO_SYSSPACE,
915 	    pathname, p);
916 
917 	nd.ni_cnd.cn_rpbuf = rpbuf;
918 	nd.ni_cnd.cn_rpi = strlen(rpbuf);
919 
920 	nd.ni_pledge = PLEDGE_RPATH;
921 	nd.ni_unveil = UNVEIL_READ;
922 	if ((error = namei(&nd)) != 0)
923 		goto end;
924 
925 	/* release reference from namei */
926 	if (nd.ni_vp)
927 		vrele(nd.ni_vp);
928 
929 	error = copyoutstr(nd.ni_cnd.cn_rpbuf, SCARG(uap, resolved),
930 	    MAXPATHLEN, NULL);
931 
932 #ifdef KTRACE
933 	if (KTRPOINT(p, KTR_NAMEI))
934 		ktrnamei(p, nd.ni_cnd.cn_rpbuf);
935 #endif
936 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
937 end:
938 	pool_put(&namei_pool, rpbuf);
939 	pool_put(&namei_pool, pathname);
940 	return (error);
941 }
942 
943 int
sys_unveil(struct proc * p,void * v,register_t * retval)944 sys_unveil(struct proc *p, void *v, register_t *retval)
945 {
946 	struct sys_unveil_args /* {
947 		syscallarg(const char *) path;
948 		syscallarg(const char *) permissions;
949 	} */ *uap = v;
950 	struct process *pr = p->p_p;
951 	char *pathname, *c;
952 	struct nameidata nd;
953 	size_t pathlen;
954 	char permissions[5];
955 	int error, allow;
956 
957 	if (SCARG(uap, path) == NULL && SCARG(uap, permissions) == NULL) {
958 		pr->ps_uvdone = 1;
959 		return (0);
960 	}
961 
962 	if (pr->ps_uvdone != 0)
963 		return EPERM;
964 
965 	error = copyinstr(SCARG(uap, permissions), permissions,
966 	    sizeof(permissions), NULL);
967 	if (error)
968 		return (error);
969 	pathname = pool_get(&namei_pool, PR_WAITOK);
970 	error = copyinstr(SCARG(uap, path), pathname, MAXPATHLEN, &pathlen);
971 	if (error)
972 		goto end;
973 
974 #ifdef KTRACE
975 	if (KTRPOINT(p, KTR_STRUCT))
976 		ktrstruct(p, "unveil", permissions, strlen(permissions));
977 #endif
978 	if (pathlen < 2) {
979 		error = EINVAL;
980 		goto end;
981 	}
982 
983 	/* find root "/" or "//" */
984 	for (c = pathname; *c != '\0'; c++) {
985 		if (*c != '/')
986 			break;
987 	}
988 	if (*c == '\0')
989 		/* root directory */
990 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SAVENAME,
991 		    UIO_SYSSPACE, pathname, p);
992 	else
993 		NDINIT(&nd, CREATE, FOLLOW | LOCKLEAF | LOCKPARENT | SAVENAME,
994 		    UIO_SYSSPACE, pathname, p);
995 
996 	nd.ni_pledge = PLEDGE_UNVEIL;
997 	if ((error = namei(&nd)) != 0)
998 		goto end;
999 
1000 	/*
1001 	 * XXX Any access to the file or directory will allow us to
1002 	 * pledge path it
1003 	 */
1004 	allow = ((nd.ni_vp &&
1005 	    (VOP_ACCESS(nd.ni_vp, VREAD, p->p_ucred, p) == 0 ||
1006 	    VOP_ACCESS(nd.ni_vp, VWRITE, p->p_ucred, p) == 0 ||
1007 	    VOP_ACCESS(nd.ni_vp, VEXEC, p->p_ucred, p) == 0)) ||
1008 	    (nd.ni_dvp &&
1009 	    (VOP_ACCESS(nd.ni_dvp, VREAD, p->p_ucred, p) == 0 ||
1010 	    VOP_ACCESS(nd.ni_dvp, VWRITE, p->p_ucred, p) == 0 ||
1011 	    VOP_ACCESS(nd.ni_dvp, VEXEC, p->p_ucred, p) == 0)));
1012 
1013 	/* release lock from namei, but keep ref */
1014 	if (nd.ni_vp)
1015 		VOP_UNLOCK(nd.ni_vp);
1016 	if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp)
1017 		VOP_UNLOCK(nd.ni_dvp);
1018 
1019 	if (allow)
1020 		error = unveil_add(p, &nd, permissions);
1021 	else
1022 		error = EPERM;
1023 
1024 	/* release vref from namei, but not vref from unveil_add */
1025 	if (nd.ni_vp)
1026 		vrele(nd.ni_vp);
1027 	if (nd.ni_dvp)
1028 		vrele(nd.ni_dvp);
1029 
1030 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
1031 end:
1032 	pool_put(&namei_pool, pathname);
1033 
1034 	return (error);
1035 }
1036 
1037 /*
1038  * Check permissions, allocate an open file structure,
1039  * and call the device open routine if any.
1040  */
1041 int
sys_open(struct proc * p,void * v,register_t * retval)1042 sys_open(struct proc *p, void *v, register_t *retval)
1043 {
1044 	struct sys_open_args /* {
1045 		syscallarg(const char *) path;
1046 		syscallarg(int) flags;
1047 		syscallarg(mode_t) mode;
1048 	} */ *uap = v;
1049 
1050 	return (doopenat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, flags),
1051 	    SCARG(uap, mode), retval));
1052 }
1053 
1054 int
sys_openat(struct proc * p,void * v,register_t * retval)1055 sys_openat(struct proc *p, void *v, register_t *retval)
1056 {
1057 	struct sys_openat_args /* {
1058 		syscallarg(int) fd;
1059 		syscallarg(const char *) path;
1060 		syscallarg(int) flags;
1061 		syscallarg(mode_t) mode;
1062 	} */ *uap = v;
1063 
1064 	return (doopenat(p, SCARG(uap, fd), SCARG(uap, path),
1065 	    SCARG(uap, flags), SCARG(uap, mode), retval));
1066 }
1067 
1068 int
doopenat(struct proc * p,int fd,const char * path,int oflags,mode_t mode,register_t * retval)1069 doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
1070     register_t *retval)
1071 {
1072 	struct filedesc *fdp = p->p_fd;
1073 	struct file *fp;
1074 	struct vnode *vp;
1075 	struct vattr vattr;
1076 	int flags, cloexec, cmode;
1077 	int type, indx, error, localtrunc = 0;
1078 	struct flock lf;
1079 	struct nameidata nd;
1080 	uint64_t ni_pledge = 0;
1081 	u_char ni_unveil = 0;
1082 
1083 	if (oflags & (O_EXLOCK | O_SHLOCK)) {
1084 		error = pledge_flock(p);
1085 		if (error != 0)
1086 			return (error);
1087 	}
1088 
1089 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1090 
1091 	fdplock(fdp);
1092 	if ((error = falloc(p, &fp, &indx)) != 0) {
1093 		fdpunlock(fdp);
1094 		return (error);
1095 	}
1096 	fdpunlock(fdp);
1097 
1098 	flags = FFLAGS(oflags);
1099 	if (flags & FREAD) {
1100 		ni_pledge |= PLEDGE_RPATH;
1101 		ni_unveil |= UNVEIL_READ;
1102 	}
1103 	if (flags & FWRITE) {
1104 		ni_pledge |= PLEDGE_WPATH;
1105 		ni_unveil |= UNVEIL_WRITE;
1106 	}
1107 	if (oflags & O_CREAT) {
1108 		ni_pledge |= PLEDGE_CPATH;
1109 		ni_unveil |= UNVEIL_CREATE;
1110 	}
1111 
1112 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1113 	if ((p->p_p->ps_flags & PS_PLEDGE))
1114 		cmode &= ACCESSPERMS;
1115 	NDINITAT(&nd, 0, 0, UIO_USERSPACE, fd, path, p);
1116 	nd.ni_pledge = ni_pledge;
1117 	nd.ni_unveil = ni_unveil;
1118 	p->p_dupfd = -1;			/* XXX check for fdopen */
1119 	if ((flags & O_TRUNC) && (flags & (O_EXLOCK | O_SHLOCK))) {
1120 		localtrunc = 1;
1121 		flags &= ~O_TRUNC;	/* Must do truncate ourselves */
1122 	}
1123 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1124 		fdplock(fdp);
1125 		if (error == ENODEV &&
1126 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1127 		    (error =
1128 			dupfdopen(p, indx, flags)) == 0) {
1129 			fdpunlock(fdp);
1130 			closef(fp, p);
1131 			*retval = indx;
1132 			return (error);
1133 		}
1134 		if (error == ERESTART)
1135 			error = EINTR;
1136 		fdremove(fdp, indx);
1137 		fdpunlock(fdp);
1138 		closef(fp, p);
1139 		return (error);
1140 	}
1141 	p->p_dupfd = 0;
1142 	vp = nd.ni_vp;
1143 	fp->f_flag = flags & FMASK;
1144 	fp->f_type = DTYPE_VNODE;
1145 	fp->f_ops = &vnops;
1146 	fp->f_data = vp;
1147 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1148 		lf.l_whence = SEEK_SET;
1149 		lf.l_start = 0;
1150 		lf.l_len = 0;
1151 		if (flags & O_EXLOCK)
1152 			lf.l_type = F_WRLCK;
1153 		else
1154 			lf.l_type = F_RDLCK;
1155 		type = F_FLOCK;
1156 		if ((flags & FNONBLOCK) == 0)
1157 			type |= F_WAIT;
1158 		VOP_UNLOCK(vp);
1159 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1160 		if (error) {
1161 			fdplock(fdp);
1162 			/* closef will vn_close the file for us. */
1163 			fdremove(fdp, indx);
1164 			fdpunlock(fdp);
1165 			closef(fp, p);
1166 			return (error);
1167 		}
1168 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1169 		atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK);
1170 	}
1171 	if (localtrunc) {
1172 		if ((fp->f_flag & FWRITE) == 0)
1173 			error = EACCES;
1174 		else if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
1175 			error = EROFS;
1176 		else if (vp->v_type == VDIR)
1177 			error = EISDIR;
1178 		else if ((error = vn_writechk(vp)) == 0) {
1179 			vattr_null(&vattr);
1180 			vattr.va_size = 0;
1181 			error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
1182 		}
1183 		if (error) {
1184 			VOP_UNLOCK(vp);
1185 			fdplock(fdp);
1186 			/* closef will close the file for us. */
1187 			fdremove(fdp, indx);
1188 			fdpunlock(fdp);
1189 			closef(fp, p);
1190 			return (error);
1191 		}
1192 	}
1193 	VOP_UNLOCK(vp);
1194 	*retval = indx;
1195 	fdplock(fdp);
1196 	fdinsert(fdp, indx, cloexec, fp);
1197 	fdpunlock(fdp);
1198 	FRELE(fp, p);
1199 	return (error);
1200 }
1201 
1202 /*
1203  * Open a new created file (in /tmp) suitable for mmaping.
1204  */
1205 int
sys___tmpfd(struct proc * p,void * v,register_t * retval)1206 sys___tmpfd(struct proc *p, void *v, register_t *retval)
1207 {
1208 	struct sys___tmpfd_args /* {
1209 		syscallarg(int) flags;
1210 	} */ *uap = v;
1211 	struct filedesc *fdp = p->p_fd;
1212 	struct file *fp;
1213 	struct vnode *vp;
1214 	int oflags = SCARG(uap, flags);
1215 	int flags, cloexec, cmode;
1216 	int indx, error;
1217 	unsigned int i;
1218 	struct nameidata nd;
1219 	char path[64];
1220 	static const char *letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-";
1221 
1222 	/* most flags are hardwired */
1223 	oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | (oflags & O_CLOEXEC);
1224 
1225 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1226 
1227 	fdplock(fdp);
1228 	if ((error = falloc(p, &fp, &indx)) != 0) {
1229 		fdpunlock(fdp);
1230 		return (error);
1231 	}
1232 	fdpunlock(fdp);
1233 
1234 	flags = FFLAGS(oflags);
1235 
1236 	arc4random_buf(path, sizeof(path));
1237 	memcpy(path, "/tmp/", 5);
1238 	for (i = 5; i < sizeof(path) - 1; i++)
1239 		path[i] = letters[(unsigned char)path[i] & 63];
1240 	path[sizeof(path)-1] = 0;
1241 
1242 	cmode = 0600;
1243 	NDINITAT(&nd, 0, KERNELPATH, UIO_SYSSPACE, AT_FDCWD, path, p);
1244 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1245 		if (error == ERESTART)
1246 			error = EINTR;
1247 		fdplock(fdp);
1248 		fdremove(fdp, indx);
1249 		fdpunlock(fdp);
1250 		closef(fp, p);
1251 		return (error);
1252 	}
1253 	vp = nd.ni_vp;
1254 	fp->f_flag = flags & FMASK;
1255 	fp->f_type = DTYPE_VNODE;
1256 	fp->f_ops = &vnops;
1257 	fp->f_data = vp;
1258 	VOP_UNLOCK(vp);
1259 	*retval = indx;
1260 	fdplock(fdp);
1261 	fdinsert(fdp, indx, cloexec, fp);
1262 	fdpunlock(fdp);
1263 	FRELE(fp, p);
1264 
1265 	/* unlink it */
1266 	/* XXX
1267 	 * there is a wee race here, although it is mostly inconsequential.
1268 	 * perhaps someday we can create a file like object without a name...
1269 	 */
1270 	NDINITAT(&nd, DELETE, KERNELPATH | LOCKPARENT | LOCKLEAF, UIO_SYSSPACE,
1271 	    AT_FDCWD, path, p);
1272 	if ((error = namei(&nd)) != 0) {
1273 		printf("can't unlink temp file! %d\n", error);
1274 		error = 0;
1275 	} else {
1276 		vp = nd.ni_vp;
1277 		uvm_vnp_uncache(vp);
1278 		error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1279 		if (error) {
1280 			printf("error removing vop: %d\n", error);
1281 			error = 0;
1282 		}
1283 	}
1284 
1285 	return (error);
1286 }
1287 
1288 /*
1289  * Get file handle system call
1290  */
1291 int
sys_getfh(struct proc * p,void * v,register_t * retval)1292 sys_getfh(struct proc *p, void *v, register_t *retval)
1293 {
1294 	struct sys_getfh_args /* {
1295 		syscallarg(const char *) fname;
1296 		syscallarg(fhandle_t *) fhp;
1297 	} */ *uap = v;
1298 	struct vnode *vp;
1299 	fhandle_t fh;
1300 	int error;
1301 	struct nameidata nd;
1302 
1303 	/*
1304 	 * Must be super user
1305 	 */
1306 	error = suser(p);
1307 	if (error)
1308 		return (error);
1309 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1310 	    SCARG(uap, fname), p);
1311 	error = namei(&nd);
1312 	if (error)
1313 		return (error);
1314 	vp = nd.ni_vp;
1315 	memset(&fh, 0, sizeof(fh));
1316 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1317 	error = VFS_VPTOFH(vp, &fh.fh_fid);
1318 	vput(vp);
1319 	if (error)
1320 		return (error);
1321 	error = copyout(&fh, SCARG(uap, fhp), sizeof(fh));
1322 	return (error);
1323 }
1324 
1325 /*
1326  * Open a file given a file handle.
1327  *
1328  * Check permissions, allocate an open file structure,
1329  * and call the device open routine if any.
1330  */
1331 int
sys_fhopen(struct proc * p,void * v,register_t * retval)1332 sys_fhopen(struct proc *p, void *v, register_t *retval)
1333 {
1334 	struct sys_fhopen_args /* {
1335 		syscallarg(const fhandle_t *) fhp;
1336 		syscallarg(int) flags;
1337 	} */ *uap = v;
1338 	struct filedesc *fdp = p->p_fd;
1339 	struct file *fp;
1340 	struct vnode *vp = NULL;
1341 	struct mount *mp;
1342 	struct ucred *cred = p->p_ucred;
1343 	int flags, cloexec;
1344 	int type, indx, error=0;
1345 	struct flock lf;
1346 	struct vattr va;
1347 	fhandle_t fh;
1348 
1349 	/*
1350 	 * Must be super user
1351 	 */
1352 	if ((error = suser(p)))
1353 		return (error);
1354 
1355 	flags = FFLAGS(SCARG(uap, flags));
1356 	if ((flags & (FREAD | FWRITE)) == 0)
1357 		return (EINVAL);
1358 	if ((flags & O_CREAT))
1359 		return (EINVAL);
1360 
1361 	cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1362 
1363 	fdplock(fdp);
1364 	if ((error = falloc(p, &fp, &indx)) != 0) {
1365 		fdpunlock(fdp);
1366 		fp = NULL;
1367 		goto bad;
1368 	}
1369 	fdpunlock(fdp);
1370 
1371 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1372 		goto bad;
1373 
1374 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
1375 		error = ESTALE;
1376 		goto bad;
1377 	}
1378 
1379 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1380 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1381 		goto bad;
1382 	}
1383 
1384 	/* Now do an effective vn_open */
1385 
1386 	if (vp->v_type == VSOCK) {
1387 		error = EOPNOTSUPP;
1388 		goto bad;
1389 	}
1390 	if ((flags & O_DIRECTORY) && vp->v_type != VDIR) {
1391 		error = ENOTDIR;
1392 		goto bad;
1393 	}
1394 	if (flags & FREAD) {
1395 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1396 			goto bad;
1397 	}
1398 	if (flags & (FWRITE | O_TRUNC)) {
1399 		if (vp->v_type == VDIR) {
1400 			error = EISDIR;
1401 			goto bad;
1402 		}
1403 		if ((error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0 ||
1404 		    (error = vn_writechk(vp)) != 0)
1405 			goto bad;
1406 	}
1407 	if (flags & O_TRUNC) {
1408 		vattr_null(&va);
1409 		va.va_size = 0;
1410 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1411 			goto bad;
1412 	}
1413 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1414 		goto bad;
1415 	if (flags & FWRITE)
1416 		vp->v_writecount++;
1417 
1418 	/* done with modified vn_open, now finish what sys_open does. */
1419 
1420 	fp->f_flag = flags & FMASK;
1421 	fp->f_type = DTYPE_VNODE;
1422 	fp->f_ops = &vnops;
1423 	fp->f_data = vp;
1424 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1425 		lf.l_whence = SEEK_SET;
1426 		lf.l_start = 0;
1427 		lf.l_len = 0;
1428 		if (flags & O_EXLOCK)
1429 			lf.l_type = F_WRLCK;
1430 		else
1431 			lf.l_type = F_RDLCK;
1432 		type = F_FLOCK;
1433 		if ((flags & FNONBLOCK) == 0)
1434 			type |= F_WAIT;
1435 		VOP_UNLOCK(vp);
1436 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1437 		if (error) {
1438 			vp = NULL;	/* closef will vn_close the file */
1439 			goto bad;
1440 		}
1441 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1442 		atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK);
1443 	}
1444 	VOP_UNLOCK(vp);
1445 	*retval = indx;
1446 	fdplock(fdp);
1447 	fdinsert(fdp, indx, cloexec, fp);
1448 	fdpunlock(fdp);
1449 	FRELE(fp, p);
1450 	return (0);
1451 
1452 bad:
1453 	if (fp) {
1454 		fdplock(fdp);
1455 		fdremove(fdp, indx);
1456 		fdpunlock(fdp);
1457 		closef(fp, p);
1458 		if (vp != NULL)
1459 			vput(vp);
1460 	}
1461 	return (error);
1462 }
1463 
1464 int
sys_fhstat(struct proc * p,void * v,register_t * retval)1465 sys_fhstat(struct proc *p, void *v, register_t *retval)
1466 {
1467 	struct sys_fhstat_args /* {
1468 		syscallarg(const fhandle_t *) fhp;
1469 		syscallarg(struct stat *) sb;
1470 	} */ *uap = v;
1471 	struct stat sb;
1472 	int error;
1473 	fhandle_t fh;
1474 	struct mount *mp;
1475 	struct vnode *vp;
1476 
1477 	/*
1478 	 * Must be super user
1479 	 */
1480 	if ((error = suser(p)))
1481 		return (error);
1482 
1483 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1484 		return (error);
1485 
1486 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1487 		return (ESTALE);
1488 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1489 		return (error);
1490 	error = vn_stat(vp, &sb, p);
1491 	vput(vp);
1492 	if (error)
1493 		return (error);
1494 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1495 	return (error);
1496 }
1497 
1498 int
sys_fhstatfs(struct proc * p,void * v,register_t * retval)1499 sys_fhstatfs(struct proc *p, void *v, register_t *retval)
1500 {
1501 	struct sys_fhstatfs_args /* {
1502 		syscallarg(const fhandle_t *) fhp;
1503 		syscallarg(struct statfs *) buf;
1504 	} */ *uap = v;
1505 	struct statfs *sp;
1506 	fhandle_t fh;
1507 	struct mount *mp;
1508 	struct vnode *vp;
1509 	int error;
1510 
1511 	/*
1512 	 * Must be super user
1513 	 */
1514 	if ((error = suser(p)))
1515 		return (error);
1516 
1517 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1518 		return (error);
1519 
1520 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1521 		return (ESTALE);
1522 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1523 		return (error);
1524 	mp = vp->v_mount;
1525 	sp = &mp->mnt_stat;
1526 	vput(vp);
1527 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
1528 		return (error);
1529 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1530 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
1531 }
1532 
1533 /*
1534  * Create a special file or named pipe.
1535  */
1536 int
sys_mknod(struct proc * p,void * v,register_t * retval)1537 sys_mknod(struct proc *p, void *v, register_t *retval)
1538 {
1539 	struct sys_mknod_args /* {
1540 		syscallarg(const char *) path;
1541 		syscallarg(mode_t) mode;
1542 		syscallarg(int) dev;
1543 	} */ *uap = v;
1544 
1545 	return (domknodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1546 	    SCARG(uap, dev)));
1547 }
1548 
1549 int
sys_mknodat(struct proc * p,void * v,register_t * retval)1550 sys_mknodat(struct proc *p, void *v, register_t *retval)
1551 {
1552 	struct sys_mknodat_args /* {
1553 		syscallarg(int) fd;
1554 		syscallarg(const char *) path;
1555 		syscallarg(mode_t) mode;
1556 		syscallarg(dev_t) dev;
1557 	} */ *uap = v;
1558 
1559 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1560 	    SCARG(uap, mode), SCARG(uap, dev)));
1561 }
1562 
1563 int
domknodat(struct proc * p,int fd,const char * path,mode_t mode,dev_t dev)1564 domknodat(struct proc *p, int fd, const char *path, mode_t mode, dev_t dev)
1565 {
1566 	struct vnode *vp;
1567 	struct vattr vattr;
1568 	int error;
1569 	struct nameidata nd;
1570 
1571 	if (dev == VNOVAL)
1572 		return (EINVAL);
1573 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1574 	nd.ni_pledge = PLEDGE_DPATH;
1575 	nd.ni_unveil = UNVEIL_CREATE;
1576 	if ((error = namei(&nd)) != 0)
1577 		return (error);
1578 	vp = nd.ni_vp;
1579 	if (!S_ISFIFO(mode) || dev != 0) {
1580 		if (!vnoperm(nd.ni_dvp) && (error = suser(p)) != 0)
1581 			goto out;
1582 		if (p->p_fd->fd_rdir) {
1583 			error = EINVAL;
1584 			goto out;
1585 		}
1586 	}
1587 	if (vp != NULL)
1588 		error = EEXIST;
1589 	else {
1590 		vattr_null(&vattr);
1591 		vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1592 		if ((p->p_p->ps_flags & PS_PLEDGE))
1593 			vattr.va_mode &= ACCESSPERMS;
1594 		vattr.va_rdev = dev;
1595 
1596 		switch (mode & S_IFMT) {
1597 		case S_IFMT:	/* used by badsect to flag bad sectors */
1598 			vattr.va_type = VBAD;
1599 			break;
1600 		case S_IFCHR:
1601 			vattr.va_type = VCHR;
1602 			break;
1603 		case S_IFBLK:
1604 			vattr.va_type = VBLK;
1605 			break;
1606 		case S_IFIFO:
1607 #ifndef FIFO
1608 			error = EOPNOTSUPP;
1609 			break;
1610 #else
1611 			if (dev == 0) {
1612 				vattr.va_type = VFIFO;
1613 				break;
1614 			}
1615 			/* FALLTHROUGH */
1616 #endif /* FIFO */
1617 		default:
1618 			error = EINVAL;
1619 			break;
1620 		}
1621 	}
1622 out:
1623 	if (!error) {
1624 		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1625 		vput(nd.ni_dvp);
1626 	} else {
1627 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1628 		if (nd.ni_dvp == vp)
1629 			vrele(nd.ni_dvp);
1630 		else
1631 			vput(nd.ni_dvp);
1632 		if (vp)
1633 			vrele(vp);
1634 	}
1635 	return (error);
1636 }
1637 
1638 /*
1639  * Create a named pipe.
1640  */
1641 int
sys_mkfifo(struct proc * p,void * v,register_t * retval)1642 sys_mkfifo(struct proc *p, void *v, register_t *retval)
1643 {
1644 	struct sys_mkfifo_args /* {
1645 		syscallarg(const char *) path;
1646 		syscallarg(mode_t) mode;
1647 	} */ *uap = v;
1648 
1649 	return (domknodat(p, AT_FDCWD, SCARG(uap, path),
1650 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1651 }
1652 
1653 int
sys_mkfifoat(struct proc * p,void * v,register_t * retval)1654 sys_mkfifoat(struct proc *p, void *v, register_t *retval)
1655 {
1656 	struct sys_mkfifoat_args /* {
1657 		syscallarg(int) fd;
1658 		syscallarg(const char *) path;
1659 		syscallarg(mode_t) mode;
1660 	} */ *uap = v;
1661 
1662 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1663 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1664 }
1665 
1666 /*
1667  * Make a hard file link.
1668  */
1669 int
sys_link(struct proc * p,void * v,register_t * retval)1670 sys_link(struct proc *p, void *v, register_t *retval)
1671 {
1672 	struct sys_link_args /* {
1673 		syscallarg(const char *) path;
1674 		syscallarg(const char *) link;
1675 	} */ *uap = v;
1676 
1677 	return (dolinkat(p, AT_FDCWD, SCARG(uap, path), AT_FDCWD,
1678 	    SCARG(uap, link), AT_SYMLINK_FOLLOW));
1679 }
1680 
1681 int
sys_linkat(struct proc * p,void * v,register_t * retval)1682 sys_linkat(struct proc *p, void *v, register_t *retval)
1683 {
1684 	struct sys_linkat_args /* {
1685 		syscallarg(int) fd1;
1686 		syscallarg(const char *) path1;
1687 		syscallarg(int) fd2;
1688 		syscallarg(const char *) path2;
1689 		syscallarg(int) flag;
1690 	} */ *uap = v;
1691 
1692 	return (dolinkat(p, SCARG(uap, fd1), SCARG(uap, path1),
1693 	    SCARG(uap, fd2), SCARG(uap, path2), SCARG(uap, flag)));
1694 }
1695 
1696 int
dolinkat(struct proc * p,int fd1,const char * path1,int fd2,const char * path2,int flag)1697 dolinkat(struct proc *p, int fd1, const char *path1, int fd2,
1698     const char *path2, int flag)
1699 {
1700 	struct vnode *vp;
1701 	struct nameidata nd;
1702 	int error, follow;
1703 
1704 	if (flag & ~AT_SYMLINK_FOLLOW)
1705 		return (EINVAL);
1706 
1707 	follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
1708 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd1, path1, p);
1709 	nd.ni_pledge = PLEDGE_RPATH;
1710 	nd.ni_unveil = UNVEIL_READ;
1711 	if ((error = namei(&nd)) != 0)
1712 		return (error);
1713 	vp = nd.ni_vp;
1714 
1715 	if (vp->v_type == VDIR) {
1716 		error = EPERM;
1717 		goto out;
1718 	}
1719 
1720 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd2, path2, p);
1721 	nd.ni_pledge = PLEDGE_CPATH;
1722 	nd.ni_unveil = UNVEIL_CREATE;
1723 	if ((error = namei(&nd)) != 0)
1724 		goto out;
1725 	if (nd.ni_vp) {
1726 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1727 		if (nd.ni_dvp == nd.ni_vp)
1728 			vrele(nd.ni_dvp);
1729 		else
1730 			vput(nd.ni_dvp);
1731 		vrele(nd.ni_vp);
1732 		error = EEXIST;
1733 		goto out;
1734 	}
1735 
1736 	/* No cross-mount links! */
1737 	if (nd.ni_dvp->v_mount != vp->v_mount) {
1738 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1739 		vput(nd.ni_dvp);
1740 		error = EXDEV;
1741 		goto out;
1742 	}
1743 
1744 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1745 out:
1746 	vrele(vp);
1747 	return (error);
1748 }
1749 
1750 /*
1751  * Make a symbolic link.
1752  */
1753 int
sys_symlink(struct proc * p,void * v,register_t * retval)1754 sys_symlink(struct proc *p, void *v, register_t *retval)
1755 {
1756 	struct sys_symlink_args /* {
1757 		syscallarg(const char *) path;
1758 		syscallarg(const char *) link;
1759 	} */ *uap = v;
1760 
1761 	return (dosymlinkat(p, SCARG(uap, path), AT_FDCWD, SCARG(uap, link)));
1762 }
1763 
1764 int
sys_symlinkat(struct proc * p,void * v,register_t * retval)1765 sys_symlinkat(struct proc *p, void *v, register_t *retval)
1766 {
1767 	struct sys_symlinkat_args /* {
1768 		syscallarg(const char *) path;
1769 		syscallarg(int) fd;
1770 		syscallarg(const char *) link;
1771 	} */ *uap = v;
1772 
1773 	return (dosymlinkat(p, SCARG(uap, path), SCARG(uap, fd),
1774 	    SCARG(uap, link)));
1775 }
1776 
1777 int
dosymlinkat(struct proc * p,const char * upath,int fd,const char * link)1778 dosymlinkat(struct proc *p, const char *upath, int fd, const char *link)
1779 {
1780 	struct vattr vattr;
1781 	char *path;
1782 	int error;
1783 	struct nameidata nd;
1784 
1785 	path = pool_get(&namei_pool, PR_WAITOK);
1786 	error = copyinstr(upath, path, MAXPATHLEN, NULL);
1787 	if (error)
1788 		goto out;
1789 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, link, p);
1790 	nd.ni_pledge = PLEDGE_CPATH;
1791 	nd.ni_unveil = UNVEIL_CREATE;
1792 	if ((error = namei(&nd)) != 0)
1793 		goto out;
1794 	if (nd.ni_vp) {
1795 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1796 		if (nd.ni_dvp == nd.ni_vp)
1797 			vrele(nd.ni_dvp);
1798 		else
1799 			vput(nd.ni_dvp);
1800 		vrele(nd.ni_vp);
1801 		error = EEXIST;
1802 		goto out;
1803 	}
1804 	vattr_null(&vattr);
1805 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1806 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1807 out:
1808 	pool_put(&namei_pool, path);
1809 	return (error);
1810 }
1811 
1812 /*
1813  * Delete a name from the filesystem.
1814  */
1815 int
sys_unlink(struct proc * p,void * v,register_t * retval)1816 sys_unlink(struct proc *p, void *v, register_t *retval)
1817 {
1818 	struct sys_unlink_args /* {
1819 		syscallarg(const char *) path;
1820 	} */ *uap = v;
1821 
1822 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), 0));
1823 }
1824 
1825 int
sys_unlinkat(struct proc * p,void * v,register_t * retval)1826 sys_unlinkat(struct proc *p, void *v, register_t *retval)
1827 {
1828 	struct sys_unlinkat_args /* {
1829 		syscallarg(int) fd;
1830 		syscallarg(const char *) path;
1831 		syscallarg(int) flag;
1832 	} */ *uap = v;
1833 
1834 	return (dounlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1835 	    SCARG(uap, flag)));
1836 }
1837 
1838 int
dounlinkat(struct proc * p,int fd,const char * path,int flag)1839 dounlinkat(struct proc *p, int fd, const char *path, int flag)
1840 {
1841 	struct vnode *vp;
1842 	int error;
1843 	struct nameidata nd;
1844 
1845 	if (flag & ~AT_REMOVEDIR)
1846 		return (EINVAL);
1847 
1848 	NDINITAT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1849 	    fd, path, p);
1850 	nd.ni_pledge = PLEDGE_CPATH;
1851 	nd.ni_unveil = UNVEIL_CREATE;
1852 	if ((error = namei(&nd)) != 0)
1853 		return (error);
1854 	vp = nd.ni_vp;
1855 
1856 	if (flag & AT_REMOVEDIR) {
1857 		if (vp->v_type != VDIR) {
1858 			error = ENOTDIR;
1859 			goto out;
1860 		}
1861 		/*
1862 		 * No rmdir "." please.
1863 		 */
1864 		if (nd.ni_dvp == vp) {
1865 			error = EINVAL;
1866 			goto out;
1867 		}
1868 		/*
1869 		 * A mounted on directory cannot be deleted.
1870 		 */
1871 		if (vp->v_mountedhere != NULL) {
1872 			error = EBUSY;
1873 			goto out;
1874 		}
1875 	}
1876 
1877 	/*
1878 	 * The root of a mounted filesystem cannot be deleted.
1879 	 */
1880 	if (vp->v_flag & VROOT)
1881 		error = EBUSY;
1882 out:
1883 	if (!error) {
1884 		if (flag & AT_REMOVEDIR) {
1885 			error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1886 		} else {
1887 			(void)uvm_vnp_uncache(vp);
1888 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1889 		}
1890 	} else {
1891 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1892 		if (nd.ni_dvp == vp)
1893 			vrele(nd.ni_dvp);
1894 		else
1895 			vput(nd.ni_dvp);
1896 		vput(vp);
1897 	}
1898 	return (error);
1899 }
1900 
1901 /*
1902  * Reposition read/write file offset.
1903  */
1904 int
sys_lseek(struct proc * p,void * v,register_t * retval)1905 sys_lseek(struct proc *p, void *v, register_t *retval)
1906 {
1907 	struct sys_lseek_args /* {
1908 		syscallarg(int) fd;
1909 		syscallarg(off_t) offset;
1910 		syscallarg(int) whence;
1911 	} */ *uap = v;
1912 	struct filedesc *fdp = p->p_fd;
1913 	struct file *fp;
1914 	off_t offset;
1915 	int error;
1916 
1917 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1918 		return (EBADF);
1919 	if (fp->f_ops->fo_seek == NULL) {
1920 		error = ESPIPE;
1921 		goto bad;
1922 	}
1923 	offset = SCARG(uap, offset);
1924 
1925 	error = (*fp->f_ops->fo_seek)(fp, &offset, SCARG(uap, whence), p);
1926 	if (error)
1927 		goto bad;
1928 
1929 	*(off_t *)retval = offset;
1930 	mtx_enter(&fp->f_mtx);
1931 	fp->f_seek++;
1932 	mtx_leave(&fp->f_mtx);
1933 	error = 0;
1934  bad:
1935 	FRELE(fp, p);
1936 	return (error);
1937 }
1938 
1939 /*
1940  * Check access permissions.
1941  */
1942 int
sys_access(struct proc * p,void * v,register_t * retval)1943 sys_access(struct proc *p, void *v, register_t *retval)
1944 {
1945 	struct sys_access_args /* {
1946 		syscallarg(const char *) path;
1947 		syscallarg(int) amode;
1948 	} */ *uap = v;
1949 
1950 	return (dofaccessat(p, AT_FDCWD, SCARG(uap, path),
1951 	    SCARG(uap, amode), 0));
1952 }
1953 
1954 int
sys_faccessat(struct proc * p,void * v,register_t * retval)1955 sys_faccessat(struct proc *p, void *v, register_t *retval)
1956 {
1957 	struct sys_faccessat_args /* {
1958 		syscallarg(int) fd;
1959 		syscallarg(const char *) path;
1960 		syscallarg(int) amode;
1961 		syscallarg(int) flag;
1962 	} */ *uap = v;
1963 
1964 	return (dofaccessat(p, SCARG(uap, fd), SCARG(uap, path),
1965 	    SCARG(uap, amode), SCARG(uap, flag)));
1966 }
1967 
1968 int
dofaccessat(struct proc * p,int fd,const char * path,int amode,int flag)1969 dofaccessat(struct proc *p, int fd, const char *path, int amode, int flag)
1970 {
1971 	struct vnode *vp;
1972 	struct ucred *newcred, *oldcred;
1973 	struct nameidata nd;
1974 	int vflags = 0, error;
1975 
1976 	if (amode & ~(R_OK | W_OK | X_OK))
1977 		return (EINVAL);
1978 	if (flag & ~AT_EACCESS)
1979 		return (EINVAL);
1980 
1981 	newcred = NULL;
1982 	oldcred = p->p_ucred;
1983 
1984 	/*
1985 	 * If access as real ids was requested and they really differ,
1986 	 * give the thread new creds with them reset
1987 	 */
1988 	if ((flag & AT_EACCESS) == 0 &&
1989 	    (oldcred->cr_uid != oldcred->cr_ruid ||
1990 	    (oldcred->cr_gid != oldcred->cr_rgid))) {
1991 		p->p_ucred = newcred = crdup(oldcred);
1992 		newcred->cr_uid = newcred->cr_ruid;
1993 		newcred->cr_gid = newcred->cr_rgid;
1994 	}
1995 
1996 	NDINITAT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
1997 	nd.ni_pledge = PLEDGE_RPATH;
1998 	nd.ni_unveil = UNVEIL_READ;
1999 	if (amode & R_OK)
2000 		vflags |= VREAD;
2001 	if (amode & W_OK) {
2002 		vflags |= VWRITE;
2003 		nd.ni_unveil |= UNVEIL_WRITE;
2004 	}
2005 	if (amode & X_OK)
2006 		vflags |= VEXEC;
2007 	if ((error = namei(&nd)) != 0)
2008 		goto out;
2009 	vp = nd.ni_vp;
2010 
2011 	/* Flags == 0 means only check for existence. */
2012 	if (amode) {
2013 		error = VOP_ACCESS(vp, vflags, p->p_ucred, p);
2014 		if (!error && (vflags & VWRITE))
2015 			error = vn_writechk(vp);
2016 	}
2017 	vput(vp);
2018 out:
2019 	if (newcred != NULL) {
2020 		p->p_ucred = oldcred;
2021 		crfree(newcred);
2022 	}
2023 	return (error);
2024 }
2025 
2026 /*
2027  * Get file status; this version follows links.
2028  */
2029 int
sys_stat(struct proc * p,void * v,register_t * retval)2030 sys_stat(struct proc *p, void *v, register_t *retval)
2031 {
2032 	struct sys_stat_args /* {
2033 		syscallarg(const char *) path;
2034 		syscallarg(struct stat *) ub;
2035 	} */ *uap = v;
2036 
2037 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub), 0));
2038 }
2039 
2040 int
sys_fstatat(struct proc * p,void * v,register_t * retval)2041 sys_fstatat(struct proc *p, void *v, register_t *retval)
2042 {
2043 	struct sys_fstatat_args /* {
2044 		syscallarg(int) fd;
2045 		syscallarg(const char *) path;
2046 		syscallarg(struct stat *) buf;
2047 		syscallarg(int) flag;
2048 	} */ *uap = v;
2049 
2050 	return (dofstatat(p, SCARG(uap, fd), SCARG(uap, path),
2051 	    SCARG(uap, buf), SCARG(uap, flag)));
2052 }
2053 
2054 int
dofstatat(struct proc * p,int fd,const char * path,struct stat * buf,int flag)2055 dofstatat(struct proc *p, int fd, const char *path, struct stat *buf, int flag)
2056 {
2057 	struct stat sb;
2058 	int error, follow;
2059 	struct nameidata nd;
2060 
2061 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2062 		return (EINVAL);
2063 
2064 
2065 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2066 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2067 	nd.ni_pledge = PLEDGE_RPATH;
2068 	nd.ni_unveil = UNVEIL_READ;
2069 	if ((error = namei(&nd)) != 0)
2070 		return (error);
2071 	error = vn_stat(nd.ni_vp, &sb, p);
2072 	vput(nd.ni_vp);
2073 	if (error)
2074 		return (error);
2075 	/* Don't let non-root see generation numbers (for NFS security) */
2076 	if (suser(p))
2077 		sb.st_gen = 0;
2078 	error = copyout(&sb, buf, sizeof(sb));
2079 #ifdef KTRACE
2080 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
2081 		ktrstat(p, &sb);
2082 #endif
2083 	return (error);
2084 }
2085 
2086 /*
2087  * Get file status; this version does not follow links.
2088  */
2089 int
sys_lstat(struct proc * p,void * v,register_t * retval)2090 sys_lstat(struct proc *p, void *v, register_t *retval)
2091 {
2092 	struct sys_lstat_args /* {
2093 		syscallarg(const char *) path;
2094 		syscallarg(struct stat *) ub;
2095 	} */ *uap = v;
2096 
2097 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub),
2098 	    AT_SYMLINK_NOFOLLOW));
2099 }
2100 
2101 /*
2102  * Get configurable pathname variables.
2103  */
2104 int
sys_pathconf(struct proc * p,void * v,register_t * retval)2105 sys_pathconf(struct proc *p, void *v, register_t *retval)
2106 {
2107 	struct sys_pathconf_args /* {
2108 		syscallarg(const char *) path;
2109 		syscallarg(int) name;
2110 	} */ *uap = v;
2111 
2112 	return dopathconfat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, name),
2113 	    0, retval);
2114 }
2115 
2116 int
sys_pathconfat(struct proc * p,void * v,register_t * retval)2117 sys_pathconfat(struct proc *p, void *v, register_t *retval)
2118 {
2119 	struct sys_pathconfat_args /* {
2120 		syscallarg(int) fd;
2121 		syscallarg(const char *) path;
2122 		syscallarg(int) name;
2123 		syscallarg(int) flag;
2124 	} */ *uap = v;
2125 
2126 	return dopathconfat(p, SCARG(uap, fd), SCARG(uap, path),
2127 	    SCARG(uap, name), SCARG(uap, flag), retval);
2128 }
2129 
2130 int
dopathconfat(struct proc * p,int fd,const char * path,int name,int flag,register_t * retval)2131 dopathconfat(struct proc *p, int fd, const char *path, int name, int flag,
2132     register_t *retval)
2133 {
2134 	int follow, error;
2135 	struct nameidata nd;
2136 
2137 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2138 		return EINVAL;
2139 
2140 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2141 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2142 	nd.ni_pledge = PLEDGE_RPATH;
2143 	nd.ni_unveil = UNVEIL_READ;
2144 	if ((error = namei(&nd)) != 0)
2145 		return (error);
2146 	error = VOP_PATHCONF(nd.ni_vp, name, retval);
2147 	vput(nd.ni_vp);
2148 	return (error);
2149 }
2150 
2151 /*
2152  * Return target name of a symbolic link.
2153  */
2154 int
sys_readlink(struct proc * p,void * v,register_t * retval)2155 sys_readlink(struct proc *p, void *v, register_t *retval)
2156 {
2157 	struct sys_readlink_args /* {
2158 		syscallarg(const char *) path;
2159 		syscallarg(char *) buf;
2160 		syscallarg(size_t) count;
2161 	} */ *uap = v;
2162 
2163 	return (doreadlinkat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, buf),
2164 	    SCARG(uap, count), retval));
2165 }
2166 
2167 int
sys_readlinkat(struct proc * p,void * v,register_t * retval)2168 sys_readlinkat(struct proc *p, void *v, register_t *retval)
2169 {
2170 	struct sys_readlinkat_args /* {
2171 		syscallarg(int) fd;
2172 		syscallarg(const char *) path;
2173 		syscallarg(char *) buf;
2174 		syscallarg(size_t) count;
2175 	} */ *uap = v;
2176 
2177 	return (doreadlinkat(p, SCARG(uap, fd), SCARG(uap, path),
2178 	    SCARG(uap, buf), SCARG(uap, count), retval));
2179 }
2180 
2181 int
doreadlinkat(struct proc * p,int fd,const char * path,char * buf,size_t count,register_t * retval)2182 doreadlinkat(struct proc *p, int fd, const char *path, char *buf,
2183     size_t count, register_t *retval)
2184 {
2185 	struct vnode *vp;
2186 	struct iovec aiov;
2187 	struct uio auio;
2188 	int error;
2189 	struct nameidata nd;
2190 
2191 	NDINITAT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2192 	nd.ni_pledge = PLEDGE_RPATH;
2193 	nd.ni_unveil = UNVEIL_READ;
2194 	if ((error = namei(&nd)) != 0)
2195 		return (error);
2196 	vp = nd.ni_vp;
2197 	if (vp->v_type != VLNK)
2198 		error = EINVAL;
2199 	else {
2200 		aiov.iov_base = buf;
2201 		aiov.iov_len = count;
2202 		auio.uio_iov = &aiov;
2203 		auio.uio_iovcnt = 1;
2204 		auio.uio_offset = 0;
2205 		auio.uio_rw = UIO_READ;
2206 		auio.uio_segflg = UIO_USERSPACE;
2207 		auio.uio_procp = p;
2208 		auio.uio_resid = count;
2209 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2210 		*retval = count - auio.uio_resid;
2211 	}
2212 	vput(vp);
2213 	return (error);
2214 }
2215 
2216 /*
2217  * Change flags of a file given a path name.
2218  */
2219 int
sys_chflags(struct proc * p,void * v,register_t * retval)2220 sys_chflags(struct proc *p, void *v, register_t *retval)
2221 {
2222 	struct sys_chflags_args /* {
2223 		syscallarg(const char *) path;
2224 		syscallarg(u_int) flags;
2225 	} */ *uap = v;
2226 
2227 	return (dochflagsat(p, AT_FDCWD, SCARG(uap, path),
2228 	    SCARG(uap, flags), 0));
2229 }
2230 
2231 int
sys_chflagsat(struct proc * p,void * v,register_t * retval)2232 sys_chflagsat(struct proc *p, void *v, register_t *retval)
2233 {
2234 	struct sys_chflagsat_args /* {
2235 		syscallarg(int) fd;
2236 		syscallarg(const char *) path;
2237 		syscallarg(u_int) flags;
2238 		syscallarg(int) atflags;
2239 	} */ *uap = v;
2240 
2241 	return (dochflagsat(p, SCARG(uap, fd), SCARG(uap, path),
2242 	    SCARG(uap, flags), SCARG(uap, atflags)));
2243 }
2244 
2245 int
dochflagsat(struct proc * p,int fd,const char * path,u_int flags,int atflags)2246 dochflagsat(struct proc *p, int fd, const char *path, u_int flags, int atflags)
2247 {
2248 	struct nameidata nd;
2249 	int error, follow;
2250 
2251 	if (atflags & ~AT_SYMLINK_NOFOLLOW)
2252 		return (EINVAL);
2253 
2254 	follow = (atflags & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2255 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2256 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2257 	nd.ni_unveil = UNVEIL_WRITE;
2258 	if ((error = namei(&nd)) != 0)
2259 		return (error);
2260 	return (dovchflags(p, nd.ni_vp, flags));
2261 }
2262 
2263 /*
2264  * Change flags of a file given a file descriptor.
2265  */
2266 int
sys_fchflags(struct proc * p,void * v,register_t * retval)2267 sys_fchflags(struct proc *p, void *v, register_t *retval)
2268 {
2269 	struct sys_fchflags_args /* {
2270 		syscallarg(int) fd;
2271 		syscallarg(u_int) flags;
2272 	} */ *uap = v;
2273 	struct file *fp;
2274 	struct vnode *vp;
2275 	int error;
2276 
2277 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2278 		return (error);
2279 	vp = fp->f_data;
2280 	vref(vp);
2281 	FRELE(fp, p);
2282 	return (dovchflags(p, vp, SCARG(uap, flags)));
2283 }
2284 
2285 int
dovchflags(struct proc * p,struct vnode * vp,u_int flags)2286 dovchflags(struct proc *p, struct vnode *vp, u_int flags)
2287 {
2288 	struct vattr vattr;
2289 	int error;
2290 
2291 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2292 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2293 		error = EROFS;
2294 	else if (flags == VNOVAL)
2295 		error = EINVAL;
2296 	else {
2297 		if (suser(p)) {
2298 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
2299 			    != 0)
2300 				goto out;
2301 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2302 				error = EINVAL;
2303 				goto out;
2304 			}
2305 		}
2306 		vattr_null(&vattr);
2307 		vattr.va_flags = flags;
2308 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2309 	}
2310 out:
2311 	vput(vp);
2312 	return (error);
2313 }
2314 
2315 /*
2316  * Change mode of a file given path name.
2317  */
2318 int
sys_chmod(struct proc * p,void * v,register_t * retval)2319 sys_chmod(struct proc *p, void *v, register_t *retval)
2320 {
2321 	struct sys_chmod_args /* {
2322 		syscallarg(const char *) path;
2323 		syscallarg(mode_t) mode;
2324 	} */ *uap = v;
2325 
2326 	return (dofchmodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 0));
2327 }
2328 
2329 int
sys_fchmodat(struct proc * p,void * v,register_t * retval)2330 sys_fchmodat(struct proc *p, void *v, register_t *retval)
2331 {
2332 	struct sys_fchmodat_args /* {
2333 		syscallarg(int) fd;
2334 		syscallarg(const char *) path;
2335 		syscallarg(mode_t) mode;
2336 		syscallarg(int) flag;
2337 	} */ *uap = v;
2338 
2339 	return (dofchmodat(p, SCARG(uap, fd), SCARG(uap, path),
2340 	    SCARG(uap, mode), SCARG(uap, flag)));
2341 }
2342 
2343 int
dofchmodat(struct proc * p,int fd,const char * path,mode_t mode,int flag)2344 dofchmodat(struct proc *p, int fd, const char *path, mode_t mode, int flag)
2345 {
2346 	struct vnode *vp;
2347 	struct vattr vattr;
2348 	int error, follow;
2349 	struct nameidata nd;
2350 
2351 	if (mode & ~(S_IFMT | ALLPERMS))
2352 		return (EINVAL);
2353 	if ((p->p_p->ps_flags & PS_PLEDGE))
2354 		mode &= ACCESSPERMS;
2355 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2356 		return (EINVAL);
2357 
2358 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2359 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2360 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2361 	nd.ni_unveil = UNVEIL_WRITE;
2362 	if ((error = namei(&nd)) != 0)
2363 		return (error);
2364 	vp = nd.ni_vp;
2365 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2366 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2367 		error = EROFS;
2368 	else {
2369 		vattr_null(&vattr);
2370 		vattr.va_mode = mode & ALLPERMS;
2371 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2372 	}
2373 	vput(vp);
2374 	return (error);
2375 }
2376 
2377 /*
2378  * Change mode of a file given a file descriptor.
2379  */
2380 int
sys_fchmod(struct proc * p,void * v,register_t * retval)2381 sys_fchmod(struct proc *p, void *v, register_t *retval)
2382 {
2383 	struct sys_fchmod_args /* {
2384 		syscallarg(int) fd;
2385 		syscallarg(mode_t) mode;
2386 	} */ *uap = v;
2387 	struct vattr vattr;
2388 	struct vnode *vp;
2389 	struct file *fp;
2390 	mode_t mode = SCARG(uap, mode);
2391 	int error;
2392 
2393 	if (mode & ~(S_IFMT | ALLPERMS))
2394 		return (EINVAL);
2395 	if ((p->p_p->ps_flags & PS_PLEDGE))
2396 		mode &= ACCESSPERMS;
2397 
2398 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2399 		return (error);
2400 	vp = fp->f_data;
2401 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2402 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2403 		error = EROFS;
2404 	else {
2405 		vattr_null(&vattr);
2406 		vattr.va_mode = mode & ALLPERMS;
2407 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2408 	}
2409 	VOP_UNLOCK(vp);
2410 	FRELE(fp, p);
2411 	return (error);
2412 }
2413 
2414 /*
2415  * Set ownership given a path name.
2416  */
2417 int
sys_chown(struct proc * p,void * v,register_t * retval)2418 sys_chown(struct proc *p, void *v, register_t *retval)
2419 {
2420 	struct sys_chown_args /* {
2421 		syscallarg(const char *) path;
2422 		syscallarg(uid_t) uid;
2423 		syscallarg(gid_t) gid;
2424 	} */ *uap = v;
2425 
2426 	return (dofchownat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, uid),
2427 	    SCARG(uap, gid), 0));
2428 }
2429 
2430 int
sys_fchownat(struct proc * p,void * v,register_t * retval)2431 sys_fchownat(struct proc *p, void *v, register_t *retval)
2432 {
2433 	struct sys_fchownat_args /* {
2434 		syscallarg(int) fd;
2435 		syscallarg(const char *) path;
2436 		syscallarg(uid_t) uid;
2437 		syscallarg(gid_t) gid;
2438 		syscallarg(int) flag;
2439 	} */ *uap = v;
2440 
2441 	return (dofchownat(p, SCARG(uap, fd), SCARG(uap, path),
2442 	    SCARG(uap, uid), SCARG(uap, gid), SCARG(uap, flag)));
2443 }
2444 
2445 int
dofchownat(struct proc * p,int fd,const char * path,uid_t uid,gid_t gid,int flag)2446 dofchownat(struct proc *p, int fd, const char *path, uid_t uid, gid_t gid,
2447     int flag)
2448 {
2449 	struct vnode *vp;
2450 	struct vattr vattr;
2451 	int error, follow;
2452 	struct nameidata nd;
2453 	mode_t mode;
2454 
2455 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2456 		return (EINVAL);
2457 
2458 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2459 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2460 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2461 	nd.ni_unveil = UNVEIL_WRITE;
2462 	if ((error = namei(&nd)) != 0)
2463 		return (error);
2464 	vp = nd.ni_vp;
2465 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2466 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2467 		error = EROFS;
2468 	else {
2469 		if ((error = pledge_chown(p, uid, gid)))
2470 			goto out;
2471 		if ((uid != -1 || gid != -1) &&
2472 		    !vnoperm(vp) &&
2473 		    (suser(p) || atomic_load_int(&suid_clear))) {
2474 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2475 			if (error)
2476 				goto out;
2477 			mode = vattr.va_mode & ~(VSUID | VSGID);
2478 			if (mode == vattr.va_mode)
2479 				mode = VNOVAL;
2480 		} else
2481 			mode = VNOVAL;
2482 		vattr_null(&vattr);
2483 		vattr.va_uid = uid;
2484 		vattr.va_gid = gid;
2485 		vattr.va_mode = mode;
2486 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2487 	}
2488 out:
2489 	vput(vp);
2490 	return (error);
2491 }
2492 
2493 /*
2494  * Set ownership given a path name, without following links.
2495  */
2496 int
sys_lchown(struct proc * p,void * v,register_t * retval)2497 sys_lchown(struct proc *p, void *v, register_t *retval)
2498 {
2499 	struct sys_lchown_args /* {
2500 		syscallarg(const char *) path;
2501 		syscallarg(uid_t) uid;
2502 		syscallarg(gid_t) gid;
2503 	} */ *uap = v;
2504 	struct vnode *vp;
2505 	struct vattr vattr;
2506 	int error;
2507 	struct nameidata nd;
2508 	mode_t mode;
2509 	uid_t uid = SCARG(uap, uid);
2510 	gid_t gid = SCARG(uap, gid);
2511 
2512 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2513 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2514 	nd.ni_unveil = UNVEIL_WRITE;
2515 	if ((error = namei(&nd)) != 0)
2516 		return (error);
2517 	vp = nd.ni_vp;
2518 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2519 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2520 		error = EROFS;
2521 	else {
2522 		if ((error = pledge_chown(p, uid, gid)))
2523 			goto out;
2524 		if ((uid != -1 || gid != -1) &&
2525 		    !vnoperm(vp) &&
2526 		    (suser(p) || atomic_load_int(&suid_clear))) {
2527 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2528 			if (error)
2529 				goto out;
2530 			mode = vattr.va_mode & ~(VSUID | VSGID);
2531 			if (mode == vattr.va_mode)
2532 				mode = VNOVAL;
2533 		} else
2534 			mode = VNOVAL;
2535 		vattr_null(&vattr);
2536 		vattr.va_uid = uid;
2537 		vattr.va_gid = gid;
2538 		vattr.va_mode = mode;
2539 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2540 	}
2541 out:
2542 	vput(vp);
2543 	return (error);
2544 }
2545 
2546 /*
2547  * Set ownership given a file descriptor.
2548  */
2549 int
sys_fchown(struct proc * p,void * v,register_t * retval)2550 sys_fchown(struct proc *p, void *v, register_t *retval)
2551 {
2552 	struct sys_fchown_args /* {
2553 		syscallarg(int) fd;
2554 		syscallarg(uid_t) uid;
2555 		syscallarg(gid_t) gid;
2556 	} */ *uap = v;
2557 	struct vnode *vp;
2558 	struct vattr vattr;
2559 	int error;
2560 	struct file *fp;
2561 	mode_t mode;
2562 	uid_t uid = SCARG(uap, uid);
2563 	gid_t gid = SCARG(uap, gid);
2564 
2565 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2566 		return (error);
2567 	vp = fp->f_data;
2568 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2569 	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
2570 		error = EROFS;
2571 	else {
2572 		if ((error = pledge_chown(p, uid, gid)))
2573 			goto out;
2574 		if ((uid != -1 || gid != -1) &&
2575 		    !vnoperm(vp) &&
2576 		    (suser(p) || atomic_load_int(&suid_clear))) {
2577 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2578 			if (error)
2579 				goto out;
2580 			mode = vattr.va_mode & ~(VSUID | VSGID);
2581 			if (mode == vattr.va_mode)
2582 				mode = VNOVAL;
2583 		} else
2584 			mode = VNOVAL;
2585 		vattr_null(&vattr);
2586 		vattr.va_uid = uid;
2587 		vattr.va_gid = gid;
2588 		vattr.va_mode = mode;
2589 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2590 	}
2591 out:
2592 	VOP_UNLOCK(vp);
2593 	FRELE(fp, p);
2594 	return (error);
2595 }
2596 
2597 /*
2598  * Set the access and modification times given a path name.
2599  */
2600 int
sys_utimes(struct proc * p,void * v,register_t * retval)2601 sys_utimes(struct proc *p, void *v, register_t *retval)
2602 {
2603 	struct sys_utimes_args /* {
2604 		syscallarg(const char *) path;
2605 		syscallarg(const struct timeval *) tptr;
2606 	} */ *uap = v;
2607 
2608 	struct timespec ts[2];
2609 	struct timeval tv[2];
2610 	const struct timeval *tvp;
2611 	int error;
2612 
2613 	tvp = SCARG(uap, tptr);
2614 	if (tvp != NULL) {
2615 		error = copyin(tvp, tv, sizeof(tv));
2616 		if (error)
2617 			return (error);
2618 #ifdef KTRACE
2619 		if (KTRPOINT(p, KTR_STRUCT))
2620 			ktrabstimeval(p, &tv);
2621 #endif
2622 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2623 			return (EINVAL);
2624 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2625 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2626 	} else
2627 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2628 
2629 	return (doutimensat(p, AT_FDCWD, SCARG(uap, path), ts, 0));
2630 }
2631 
2632 int
sys_utimensat(struct proc * p,void * v,register_t * retval)2633 sys_utimensat(struct proc *p, void *v, register_t *retval)
2634 {
2635 	struct sys_utimensat_args /* {
2636 		syscallarg(int) fd;
2637 		syscallarg(const char *) path;
2638 		syscallarg(const struct timespec *) times;
2639 		syscallarg(int) flag;
2640 	} */ *uap = v;
2641 
2642 	struct timespec ts[2];
2643 	const struct timespec *tsp;
2644 	int error, i;
2645 
2646 	tsp = SCARG(uap, times);
2647 	if (tsp != NULL) {
2648 		error = copyin(tsp, ts, sizeof(ts));
2649 		if (error)
2650 			return (error);
2651 		for (i = 0; i < nitems(ts); i++) {
2652 			if (ts[i].tv_nsec == UTIME_NOW)
2653 				continue;
2654 			if (ts[i].tv_nsec == UTIME_OMIT)
2655 				continue;
2656 #ifdef KTRACE
2657 			if (KTRPOINT(p, KTR_STRUCT))
2658 				ktrabstimespec(p, &ts[i]);
2659 #endif
2660 			if (!timespecisvalid(&ts[i]))
2661 				return (EINVAL);
2662 		}
2663 	} else
2664 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2665 
2666 	return (doutimensat(p, SCARG(uap, fd), SCARG(uap, path), ts,
2667 	    SCARG(uap, flag)));
2668 }
2669 
2670 int
doutimensat(struct proc * p,int fd,const char * path,struct timespec ts[2],int flag)2671 doutimensat(struct proc *p, int fd, const char *path,
2672     struct timespec ts[2], int flag)
2673 {
2674 	struct vnode *vp;
2675 	int error, follow;
2676 	struct nameidata nd;
2677 
2678 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2679 		return (EINVAL);
2680 
2681 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2682 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2683 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2684 	nd.ni_unveil = UNVEIL_WRITE;
2685 	if ((error = namei(&nd)) != 0)
2686 		return (error);
2687 	vp = nd.ni_vp;
2688 
2689 	return (dovutimens(p, vp, ts));
2690 }
2691 
2692 int
dovutimens(struct proc * p,struct vnode * vp,struct timespec ts[2])2693 dovutimens(struct proc *p, struct vnode *vp, struct timespec ts[2])
2694 {
2695 	struct vattr vattr;
2696 	struct timespec now;
2697 	int error;
2698 
2699 #ifdef KTRACE
2700 	/* if they're both UTIME_NOW, then don't report either */
2701 	if ((ts[0].tv_nsec != UTIME_NOW || ts[1].tv_nsec != UTIME_NOW) &&
2702 	    KTRPOINT(p, KTR_STRUCT)) {
2703 		ktrabstimespec(p, &ts[0]);
2704 		ktrabstimespec(p, &ts[1]);
2705 	}
2706 #endif
2707 
2708 	vattr_null(&vattr);
2709 
2710 	/*  make sure ctime is updated even if neither mtime nor atime is */
2711 	vattr.va_vaflags = VA_UTIMES_CHANGE;
2712 
2713 	if (ts[0].tv_nsec == UTIME_NOW || ts[1].tv_nsec == UTIME_NOW) {
2714 		if (ts[0].tv_nsec == UTIME_NOW && ts[1].tv_nsec == UTIME_NOW)
2715 			vattr.va_vaflags |= VA_UTIMES_NULL;
2716 
2717 		getnanotime(&now);
2718 		if (ts[0].tv_nsec == UTIME_NOW)
2719 			ts[0] = now;
2720 		if (ts[1].tv_nsec == UTIME_NOW)
2721 			ts[1] = now;
2722 	}
2723 
2724 	if (ts[0].tv_nsec != UTIME_OMIT)
2725 		vattr.va_atime = ts[0];
2726 	if (ts[1].tv_nsec != UTIME_OMIT)
2727 		vattr.va_mtime = ts[1];
2728 
2729 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2730 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2731 		error = EROFS;
2732 	else
2733 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2734 	vput(vp);
2735 	return (error);
2736 }
2737 
2738 /*
2739  * Set the access and modification times given a file descriptor.
2740  */
2741 int
sys_futimes(struct proc * p,void * v,register_t * retval)2742 sys_futimes(struct proc *p, void *v, register_t *retval)
2743 {
2744 	struct sys_futimes_args /* {
2745 		syscallarg(int) fd;
2746 		syscallarg(const struct timeval *) tptr;
2747 	} */ *uap = v;
2748 	struct timeval tv[2];
2749 	struct timespec ts[2];
2750 	const struct timeval *tvp;
2751 	int error;
2752 
2753 	tvp = SCARG(uap, tptr);
2754 	if (tvp != NULL) {
2755 		error = copyin(tvp, tv, sizeof(tv));
2756 		if (error)
2757 			return (error);
2758 #ifdef KTRACE
2759 		if (KTRPOINT(p, KTR_STRUCT)) {
2760 			ktrabstimeval(p, &tv[0]);
2761 			ktrabstimeval(p, &tv[1]);
2762 		}
2763 #endif
2764 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2765 			return (EINVAL);
2766 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2767 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2768 	} else
2769 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2770 
2771 	return (dofutimens(p, SCARG(uap, fd), ts));
2772 }
2773 
2774 int
sys_futimens(struct proc * p,void * v,register_t * retval)2775 sys_futimens(struct proc *p, void *v, register_t *retval)
2776 {
2777 	struct sys_futimens_args /* {
2778 		syscallarg(int) fd;
2779 		syscallarg(const struct timespec *) times;
2780 	} */ *uap = v;
2781 	struct timespec ts[2];
2782 	const struct timespec *tsp;
2783 	int error, i;
2784 
2785 	tsp = SCARG(uap, times);
2786 	if (tsp != NULL) {
2787 		error = copyin(tsp, ts, sizeof(ts));
2788 		if (error)
2789 			return (error);
2790 		for (i = 0; i < nitems(ts); i++) {
2791 			if (ts[i].tv_nsec == UTIME_NOW)
2792 				continue;
2793 			if (ts[i].tv_nsec == UTIME_OMIT)
2794 				continue;
2795 #ifdef KTRACE
2796 			if (KTRPOINT(p, KTR_STRUCT))
2797 				ktrabstimespec(p, &ts[i]);
2798 #endif
2799 			if (!timespecisvalid(&ts[i]))
2800 				return (EINVAL);
2801 		}
2802 	} else
2803 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2804 
2805 	return (dofutimens(p, SCARG(uap, fd), ts));
2806 }
2807 
2808 int
dofutimens(struct proc * p,int fd,struct timespec ts[2])2809 dofutimens(struct proc *p, int fd, struct timespec ts[2])
2810 {
2811 	struct file *fp;
2812 	struct vnode *vp;
2813 	int error;
2814 
2815 	if ((error = getvnode(p, fd, &fp)) != 0)
2816 		return (error);
2817 	vp = fp->f_data;
2818 	vref(vp);
2819 	FRELE(fp, p);
2820 
2821 	return (dovutimens(p, vp, ts));
2822 }
2823 
2824 /*
2825  * Truncate a file given a vnode.
2826  */
2827 int
dotruncate(struct proc * p,struct vnode * vp,off_t len)2828 dotruncate(struct proc *p, struct vnode *vp, off_t len)
2829 {
2830 	struct vattr vattr;
2831 	int error;
2832 
2833 	if (len < 0)
2834 		return EINVAL;
2835 	if (vp->v_type == VDIR)
2836 		return EISDIR;
2837 	if ((error = vn_writechk(vp)) != 0)
2838 		return error;
2839 	if (vp->v_type == VREG && len > lim_cur_proc(p, RLIMIT_FSIZE)) {
2840 		if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
2841 			return error;
2842 		if (len > vattr.va_size) {
2843 			/* if extending over the limit, send signal and fail */
2844 			psignal(p, SIGXFSZ);
2845 			return EFBIG;
2846 		}
2847 	}
2848 	vattr_null(&vattr);
2849 	vattr.va_size = len;
2850 	return VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2851 }
2852 
2853 /*
2854  * Truncate a file given its path name.
2855  */
2856 int
sys_truncate(struct proc * p,void * v,register_t * retval)2857 sys_truncate(struct proc *p, void *v, register_t *retval)
2858 {
2859 	struct sys_truncate_args /* {
2860 		syscallarg(const char *) path;
2861 		syscallarg(off_t) length;
2862 	} */ *uap = v;
2863 	struct vnode *vp;
2864 	int error;
2865 	struct nameidata nd;
2866 
2867 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2868 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2869 	nd.ni_unveil = UNVEIL_WRITE;
2870 	if ((error = namei(&nd)) != 0)
2871 		return (error);
2872 	vp = nd.ni_vp;
2873 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2874 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0)
2875 		error = dotruncate(p, vp, SCARG(uap, length));
2876 	vput(vp);
2877 	return (error);
2878 }
2879 
2880 /*
2881  * Truncate a file given a file descriptor.
2882  */
2883 int
sys_ftruncate(struct proc * p,void * v,register_t * retval)2884 sys_ftruncate(struct proc *p, void *v, register_t *retval)
2885 {
2886 	struct sys_ftruncate_args /* {
2887 		syscallarg(int) fd;
2888 		syscallarg(off_t) length;
2889 	} */ *uap = v;
2890 	struct vnode *vp;
2891 	struct file *fp;
2892 	int error;
2893 
2894 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2895 		return (error);
2896 	if ((fp->f_flag & FWRITE) == 0) {
2897 		error = EINVAL;
2898 		goto bad;
2899 	}
2900 	vp = fp->f_data;
2901 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2902 	error = dotruncate(p, vp, SCARG(uap, length));
2903 	VOP_UNLOCK(vp);
2904 bad:
2905 	FRELE(fp, p);
2906 	return (error);
2907 }
2908 
2909 /*
2910  * Sync an open file.
2911  */
2912 int
sys_fsync(struct proc * p,void * v,register_t * retval)2913 sys_fsync(struct proc *p, void *v, register_t *retval)
2914 {
2915 	struct sys_fsync_args /* {
2916 		syscallarg(int) fd;
2917 	} */ *uap = v;
2918 	struct vnode *vp;
2919 	struct file *fp;
2920 	int error;
2921 
2922 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2923 		return (error);
2924 	vp = fp->f_data;
2925 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2926 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2927 
2928 	VOP_UNLOCK(vp);
2929 	FRELE(fp, p);
2930 	return (error);
2931 }
2932 
2933 /*
2934  * Rename files.  Source and destination must either both be directories,
2935  * or both not be directories.  If target is a directory, it must be empty.
2936  */
2937 int
sys_rename(struct proc * p,void * v,register_t * retval)2938 sys_rename(struct proc *p, void *v, register_t *retval)
2939 {
2940 	struct sys_rename_args /* {
2941 		syscallarg(const char *) from;
2942 		syscallarg(const char *) to;
2943 	} */ *uap = v;
2944 
2945 	return (dorenameat(p, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
2946 	    SCARG(uap, to)));
2947 }
2948 
2949 int
sys_renameat(struct proc * p,void * v,register_t * retval)2950 sys_renameat(struct proc *p, void *v, register_t *retval)
2951 {
2952 	struct sys_renameat_args /* {
2953 		syscallarg(int) fromfd;
2954 		syscallarg(const char *) from;
2955 		syscallarg(int) tofd;
2956 		syscallarg(const char *) to;
2957 	} */ *uap = v;
2958 
2959 	return (dorenameat(p, SCARG(uap, fromfd), SCARG(uap, from),
2960 	    SCARG(uap, tofd), SCARG(uap, to)));
2961 }
2962 
2963 int
dorenameat(struct proc * p,int fromfd,const char * from,int tofd,const char * to)2964 dorenameat(struct proc *p, int fromfd, const char *from, int tofd,
2965     const char *to)
2966 {
2967 	struct vnode *tvp, *fvp, *tdvp;
2968 	struct nameidata fromnd, tond;
2969 	int error;
2970 	int flags;
2971 
2972 	NDINITAT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2973 	    fromfd, from, p);
2974 	fromnd.ni_pledge = PLEDGE_RPATH | PLEDGE_CPATH;
2975 	fromnd.ni_unveil = UNVEIL_READ | UNVEIL_CREATE;
2976 	if ((error = namei(&fromnd)) != 0)
2977 		return (error);
2978 	fvp = fromnd.ni_vp;
2979 
2980 	flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2981 	/*
2982 	 * rename("foo/", "bar/");  is  OK
2983 	 */
2984 	if (fvp->v_type == VDIR)
2985 		flags |= STRIPSLASHES;
2986 
2987 	NDINITAT(&tond, RENAME, flags, UIO_USERSPACE, tofd, to, p);
2988 	tond.ni_pledge = PLEDGE_CPATH;
2989 	tond.ni_unveil = UNVEIL_CREATE;
2990 	if ((error = namei(&tond)) != 0) {
2991 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2992 		vrele(fromnd.ni_dvp);
2993 		vrele(fvp);
2994 		goto out1;
2995 	}
2996 	tdvp = tond.ni_dvp;
2997 	tvp = tond.ni_vp;
2998 	if (tvp != NULL) {
2999 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3000 			error = ENOTDIR;
3001 			goto out;
3002 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3003 			error = EISDIR;
3004 			goto out;
3005 		}
3006 	}
3007 	if (fvp == tdvp)
3008 		error = EINVAL;
3009 	/*
3010 	 * If source is the same as the destination (that is the
3011 	 * same inode number)
3012 	 */
3013 	if (fvp == tvp)
3014 		error = -1;
3015 out:
3016 	if (!error) {
3017 		if (tvp) {
3018 			(void)uvm_vnp_uncache(tvp);
3019 		}
3020 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3021 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3022 	} else {
3023 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3024 		if (tdvp == tvp)
3025 			vrele(tdvp);
3026 		else
3027 			vput(tdvp);
3028 		if (tvp)
3029 			vput(tvp);
3030 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3031 		vrele(fromnd.ni_dvp);
3032 		vrele(fvp);
3033 	}
3034 	vrele(tond.ni_startdir);
3035 	pool_put(&namei_pool, tond.ni_cnd.cn_pnbuf);
3036 out1:
3037 	if (fromnd.ni_startdir)
3038 		vrele(fromnd.ni_startdir);
3039 	pool_put(&namei_pool, fromnd.ni_cnd.cn_pnbuf);
3040 	if (error == -1)
3041 		return (0);
3042 	return (error);
3043 }
3044 
3045 /*
3046  * Make a directory file.
3047  */
3048 int
sys_mkdir(struct proc * p,void * v,register_t * retval)3049 sys_mkdir(struct proc *p, void *v, register_t *retval)
3050 {
3051 	struct sys_mkdir_args /* {
3052 		syscallarg(const char *) path;
3053 		syscallarg(mode_t) mode;
3054 	} */ *uap = v;
3055 
3056 	return (domkdirat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)));
3057 }
3058 
3059 int
sys_mkdirat(struct proc * p,void * v,register_t * retval)3060 sys_mkdirat(struct proc *p, void *v, register_t *retval)
3061 {
3062 	struct sys_mkdirat_args /* {
3063 		syscallarg(int) fd;
3064 		syscallarg(const char *) path;
3065 		syscallarg(mode_t) mode;
3066 	} */ *uap = v;
3067 
3068 	return (domkdirat(p, SCARG(uap, fd), SCARG(uap, path),
3069 	    SCARG(uap, mode)));
3070 }
3071 
3072 int
domkdirat(struct proc * p,int fd,const char * path,mode_t mode)3073 domkdirat(struct proc *p, int fd, const char *path, mode_t mode)
3074 {
3075 	struct vnode *vp;
3076 	struct vattr vattr;
3077 	int error;
3078 	struct nameidata nd;
3079 
3080 	NDINITAT(&nd, CREATE, LOCKPARENT | STRIPSLASHES, UIO_USERSPACE,
3081 	    fd, path, p);
3082 	nd.ni_pledge = PLEDGE_CPATH;
3083 	nd.ni_unveil = UNVEIL_CREATE;
3084 	if ((error = namei(&nd)) != 0)
3085 		return (error);
3086 	vp = nd.ni_vp;
3087 	if (vp != NULL) {
3088 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3089 		if (nd.ni_dvp == vp)
3090 			vrele(nd.ni_dvp);
3091 		else
3092 			vput(nd.ni_dvp);
3093 		vrele(vp);
3094 		return (EEXIST);
3095 	}
3096 	vattr_null(&vattr);
3097 	vattr.va_type = VDIR;
3098 	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
3099 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3100 	if (!error)
3101 		vput(nd.ni_vp);
3102 	return (error);
3103 }
3104 
3105 /*
3106  * Remove a directory file.
3107  */
3108 int
sys_rmdir(struct proc * p,void * v,register_t * retval)3109 sys_rmdir(struct proc *p, void *v, register_t *retval)
3110 {
3111 	struct sys_rmdir_args /* {
3112 		syscallarg(const char *) path;
3113 	} */ *uap = v;
3114 
3115 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR));
3116 }
3117 
3118 /*
3119  * Read a block of directory entries in a file system independent format.
3120  */
3121 int
sys_getdents(struct proc * p,void * v,register_t * retval)3122 sys_getdents(struct proc *p, void *v, register_t *retval)
3123 {
3124 	struct sys_getdents_args /* {
3125 		syscallarg(int) fd;
3126 		syscallarg(void *) buf;
3127 		syscallarg(size_t) buflen;
3128 	} */ *uap = v;
3129 	struct vnode *vp;
3130 	struct file *fp;
3131 	struct uio auio;
3132 	struct iovec aiov;
3133 	size_t buflen;
3134 	int error, eofflag;
3135 
3136 	buflen = SCARG(uap, buflen);
3137 
3138 	if (buflen > INT_MAX)
3139 		return (EINVAL);
3140 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
3141 		return (error);
3142 	if ((fp->f_flag & FREAD) == 0) {
3143 		error = EBADF;
3144 		goto bad;
3145 	}
3146 	vp = fp->f_data;
3147 	if (vp->v_type != VDIR) {
3148 		error = EINVAL;
3149 		goto bad;
3150 	}
3151 
3152 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3153 
3154 	if (fp->f_offset < 0) {
3155 		VOP_UNLOCK(vp);
3156 		error = EINVAL;
3157 		goto bad;
3158 	}
3159 
3160 	aiov.iov_base = SCARG(uap, buf);
3161 	aiov.iov_len = buflen;
3162 	auio.uio_iov = &aiov;
3163 	auio.uio_iovcnt = 1;
3164 	auio.uio_rw = UIO_READ;
3165 	auio.uio_segflg = UIO_USERSPACE;
3166 	auio.uio_procp = p;
3167 	auio.uio_resid = buflen;
3168 	auio.uio_offset = fp->f_offset;
3169 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
3170 	mtx_enter(&fp->f_mtx);
3171 	fp->f_offset = auio.uio_offset;
3172 	mtx_leave(&fp->f_mtx);
3173 	VOP_UNLOCK(vp);
3174 	if (error)
3175 		goto bad;
3176 	*retval = buflen - auio.uio_resid;
3177 bad:
3178 	FRELE(fp, p);
3179 	return (error);
3180 }
3181 
3182 /*
3183  * Set the mode mask for creation of filesystem nodes.
3184  */
3185 int
sys_umask(struct proc * p,void * v,register_t * retval)3186 sys_umask(struct proc *p, void *v, register_t *retval)
3187 {
3188 	struct sys_umask_args /* {
3189 		syscallarg(mode_t) newmask;
3190 	} */ *uap = v;
3191 	struct filedesc *fdp = p->p_fd;
3192 
3193 	fdplock(fdp);
3194 	*retval = fdp->fd_cmask;
3195 	fdp->fd_cmask = SCARG(uap, newmask) & ACCESSPERMS;
3196 	fdpunlock(fdp);
3197 	return (0);
3198 }
3199 
3200 /*
3201  * Void all references to file by ripping underlying filesystem
3202  * away from vnode.
3203  */
3204 int
sys_revoke(struct proc * p,void * v,register_t * retval)3205 sys_revoke(struct proc *p, void *v, register_t *retval)
3206 {
3207 	struct sys_revoke_args /* {
3208 		syscallarg(const char *) path;
3209 	} */ *uap = v;
3210 	struct vnode *vp;
3211 	struct vattr vattr;
3212 	int error;
3213 	struct nameidata nd;
3214 
3215 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3216 	nd.ni_pledge = PLEDGE_RPATH | PLEDGE_TTY;
3217 	nd.ni_unveil = UNVEIL_READ;
3218 	if ((error = namei(&nd)) != 0)
3219 		return (error);
3220 	vp = nd.ni_vp;
3221 	if (vp->v_type != VCHR || (u_int)major(vp->v_rdev) >= nchrdev ||
3222 	    cdevsw[major(vp->v_rdev)].d_type != D_TTY) {
3223 		error = ENOTTY;
3224 		goto out;
3225 	}
3226 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3227 		goto out;
3228 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3229 	    (error = suser(p)))
3230 		goto out;
3231 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED)))
3232 		VOP_REVOKE(vp, REVOKEALL);
3233 out:
3234 	vrele(vp);
3235 	return (error);
3236 }
3237 
3238 /*
3239  * Convert a user file descriptor to a kernel file entry.
3240  *
3241  * On return *fpp is FREF:ed.
3242  */
3243 int
getvnode(struct proc * p,int fd,struct file ** fpp)3244 getvnode(struct proc *p, int fd, struct file **fpp)
3245 {
3246 	struct file *fp;
3247 	struct vnode *vp;
3248 
3249 	if ((fp = fd_getfile(p->p_fd, fd)) == NULL)
3250 		return (EBADF);
3251 
3252 	if (fp->f_type != DTYPE_VNODE) {
3253 		FRELE(fp, p);
3254 		return (EINVAL);
3255 	}
3256 
3257 	vp = fp->f_data;
3258 	if (vp->v_type == VBAD) {
3259 		FRELE(fp, p);
3260 		return (EBADF);
3261 	}
3262 
3263 	*fpp = fp;
3264 
3265 	return (0);
3266 }
3267 
3268 /*
3269  * Positional read system call.
3270  */
3271 int
sys_pread(struct proc * p,void * v,register_t * retval)3272 sys_pread(struct proc *p, void *v, register_t *retval)
3273 {
3274 	struct sys_pread_args /* {
3275 		syscallarg(int) fd;
3276 		syscallarg(void *) buf;
3277 		syscallarg(size_t) nbyte;
3278 		syscallarg(off_t) offset;
3279 	} */ *uap = v;
3280 	struct iovec iov;
3281 	struct uio auio;
3282 
3283 	iov.iov_base = SCARG(uap, buf);
3284 	iov.iov_len = SCARG(uap, nbyte);
3285 	if (iov.iov_len > SSIZE_MAX)
3286 		return (EINVAL);
3287 
3288 	auio.uio_iov = &iov;
3289 	auio.uio_iovcnt = 1;
3290 	auio.uio_resid = iov.iov_len;
3291 	auio.uio_offset = SCARG(uap, offset);
3292 
3293 	return (dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3294 }
3295 
3296 /*
3297  * Positional scatter read system call.
3298  */
3299 int
sys_preadv(struct proc * p,void * v,register_t * retval)3300 sys_preadv(struct proc *p, void *v, register_t *retval)
3301 {
3302 	struct sys_preadv_args /* {
3303 		syscallarg(int) fd;
3304 		syscallarg(const struct iovec *) iovp;
3305 		syscallarg(int) iovcnt;
3306 		syscallarg(off_t) offset;
3307 	} */ *uap = v;
3308 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3309 	int error, iovcnt = SCARG(uap, iovcnt);
3310 	struct uio auio;
3311 	size_t resid;
3312 
3313 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3314 	if (error)
3315 		goto done;
3316 
3317 	auio.uio_iov = iov;
3318 	auio.uio_iovcnt = iovcnt;
3319 	auio.uio_resid = resid;
3320 	auio.uio_offset = SCARG(uap, offset);
3321 
3322 	error = dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3323  done:
3324 	iovec_free(iov, iovcnt);
3325 	return (error);
3326 }
3327 
3328 /*
3329  * Positional write system call.
3330  */
3331 int
sys_pwrite(struct proc * p,void * v,register_t * retval)3332 sys_pwrite(struct proc *p, void *v, register_t *retval)
3333 {
3334 	struct sys_pwrite_args /* {
3335 		syscallarg(int) fd;
3336 		syscallarg(const void *) buf;
3337 		syscallarg(size_t) nbyte;
3338 		syscallarg(off_t) offset;
3339 	} */ *uap = v;
3340 	struct iovec iov;
3341 	struct uio auio;
3342 
3343 	iov.iov_base = (void *)SCARG(uap, buf);
3344 	iov.iov_len = SCARG(uap, nbyte);
3345 	if (iov.iov_len > SSIZE_MAX)
3346 		return (EINVAL);
3347 
3348 	auio.uio_iov = &iov;
3349 	auio.uio_iovcnt = 1;
3350 	auio.uio_resid = iov.iov_len;
3351 	auio.uio_offset = SCARG(uap, offset);
3352 
3353 	return (dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3354 }
3355 
3356 /*
3357  * Positional gather write system call.
3358  */
3359 int
sys_pwritev(struct proc * p,void * v,register_t * retval)3360 sys_pwritev(struct proc *p, void *v, register_t *retval)
3361 {
3362 	struct sys_pwritev_args /* {
3363 		syscallarg(int) fd;
3364 		syscallarg(const struct iovec *) iovp;
3365 		syscallarg(int) iovcnt;
3366 		syscallarg(off_t) offset;
3367 	} */ *uap = v;
3368 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3369 	int error, iovcnt = SCARG(uap, iovcnt);
3370 	struct uio auio;
3371 	size_t resid;
3372 
3373 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3374 	if (error)
3375 		goto done;
3376 
3377 	auio.uio_iov = iov;
3378 	auio.uio_iovcnt = iovcnt;
3379 	auio.uio_resid = resid;
3380 	auio.uio_offset = SCARG(uap, offset);
3381 
3382 	error = dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3383  done:
3384 	iovec_free(iov, iovcnt);
3385 	return (error);
3386 }
3387