xref: /openbsd/sys/kern/vfs_syscalls.c (revision 5f31b145)
1 /*	$OpenBSD: vfs_syscalls.c,v 1.365 2024/05/18 05:20:22 guenther Exp $	*/
2 /*	$NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_syscalls.c	8.28 (Berkeley) 12/10/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/filedesc.h>
44 #include <sys/conf.h>
45 #include <sys/fcntl.h>
46 #include <sys/file.h>
47 #include <sys/stat.h>
48 #include <sys/lock.h>
49 #include <sys/vnode.h>
50 #include <sys/mount.h>
51 #include <sys/proc.h>
52 #include <sys/pledge.h>
53 #include <sys/uio.h>
54 #include <sys/malloc.h>
55 #include <sys/pool.h>
56 #include <sys/ktrace.h>
57 #include <sys/unistd.h>
58 #include <sys/specdev.h>
59 #include <sys/resourcevar.h>
60 #include <sys/signalvar.h>
61 
62 #include <sys/syscallargs.h>
63 
64 extern int suid_clear;
65 
66 static int change_dir(struct nameidata *, struct proc *);
67 
68 void checkdirs(struct vnode *);
69 
70 int copyout_statfs(struct statfs *, void *, struct proc *);
71 
72 int doopenat(struct proc *, int, const char *, int, mode_t, register_t *);
73 int domknodat(struct proc *, int, const char *, mode_t, dev_t);
74 int dolinkat(struct proc *, int, const char *, int, const char *, int);
75 int dosymlinkat(struct proc *, const char *, int, const char *);
76 int dounlinkat(struct proc *, int, const char *, int);
77 int dofaccessat(struct proc *, int, const char *, int, int);
78 int dofstatat(struct proc *, int, const char *, struct stat *, int);
79 int dopathconfat(struct proc *, int, const char *, int, int, register_t *);
80 int doreadlinkat(struct proc *, int, const char *, char *, size_t,
81     register_t *);
82 int dochflagsat(struct proc *, int, const char *, u_int, int);
83 int dovchflags(struct proc *, struct vnode *, u_int);
84 int dofchmodat(struct proc *, int, const char *, mode_t, int);
85 int dofchownat(struct proc *, int, const char *, uid_t, gid_t, int);
86 int dorenameat(struct proc *, int, const char *, int, const char *);
87 int domkdirat(struct proc *, int, const char *, mode_t);
88 int doutimensat(struct proc *, int, const char *, struct timespec [2], int);
89 int dovutimens(struct proc *, struct vnode *, struct timespec [2]);
90 int dofutimens(struct proc *, int, struct timespec [2]);
91 int dounmount_leaf(struct mount *, int, struct proc *);
92 
93 /*
94  * Virtual File System System Calls
95  */
96 
97 /*
98  * Mount a file system.
99  */
100 int
sys_mount(struct proc * p,void * v,register_t * retval)101 sys_mount(struct proc *p, void *v, register_t *retval)
102 {
103 	struct sys_mount_args /* {
104 		syscallarg(const char *) type;
105 		syscallarg(const char *) path;
106 		syscallarg(int) flags;
107 		syscallarg(void *) data;
108 	} */ *uap = v;
109 	struct vnode *vp;
110 	struct mount *mp;
111 	int error, mntflag = 0;
112 	char fstypename[MFSNAMELEN];
113 	char fspath[MNAMELEN];
114 	struct nameidata nd;
115 	struct vfsconf *vfsp;
116 	int flags = SCARG(uap, flags);
117 	void *args = NULL;
118 
119 	if ((error = suser(p)))
120 		return (error);
121 
122 	/*
123 	 * Mount points must fit in MNAMELEN, not MAXPATHLEN.
124 	 */
125 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
126 	if (error)
127 		return(error);
128 
129 	/*
130 	 * Get vnode to be covered
131 	 */
132 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
133 	if ((error = namei(&nd)) != 0)
134 		goto fail;
135 	vp = nd.ni_vp;
136 	if (flags & MNT_UPDATE) {
137 		if ((vp->v_flag & VROOT) == 0) {
138 			vput(vp);
139 			error = EINVAL;
140 			goto fail;
141 		}
142 		mp = vp->v_mount;
143 		vfsp = mp->mnt_vfc;
144 
145 		args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
146 		error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
147 		if (error) {
148 			vput(vp);
149 			goto fail;
150 		}
151 
152 		mntflag = mp->mnt_flag;
153 		/*
154 		 * We only allow the filesystem to be reloaded if it
155 		 * is currently mounted read-only.
156 		 */
157 		if ((flags & MNT_RELOAD) &&
158 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
159 			vput(vp);
160 			error = EOPNOTSUPP;	/* Needs translation */
161 			goto fail;
162 		}
163 
164 		if ((error = vfs_busy(mp, VB_READ|VB_NOWAIT)) != 0) {
165 			vput(vp);
166 			goto fail;
167 		}
168 		mp->mnt_flag |= flags & (MNT_RELOAD | MNT_UPDATE);
169 		goto update;
170 	}
171 	/*
172 	 * Do not allow disabling of permission checks unless exec and access to
173 	 * device files is disabled too.
174 	 */
175 	if ((flags & MNT_NOPERM) &&
176 	    (flags & (MNT_NODEV | MNT_NOEXEC)) != (MNT_NODEV | MNT_NOEXEC)) {
177 		vput(vp);
178 		error = EPERM;
179 		goto fail;
180 	}
181 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, INFSLP)) != 0) {
182 		vput(vp);
183 		goto fail;
184 	}
185 	if (vp->v_type != VDIR) {
186 		vput(vp);
187 		goto fail;
188 	}
189 	error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
190 	if (error) {
191 		vput(vp);
192 		goto fail;
193 	}
194 	vfsp = vfs_byname(fstypename);
195 	if (vfsp == NULL) {
196 		vput(vp);
197 		error = EOPNOTSUPP;
198 		goto fail;
199 	}
200 
201 	args = malloc(vfsp->vfc_datasize, M_TEMP, M_WAITOK | M_ZERO);
202 	error = copyin(SCARG(uap, data), args, vfsp->vfc_datasize);
203 	if (error) {
204 		vput(vp);
205 		goto fail;
206 	}
207 
208 	if (vp->v_mountedhere != NULL) {
209 		vput(vp);
210 		error = EBUSY;
211 		goto fail;
212 	}
213 
214 	/*
215 	 * Allocate and initialize the file system.
216 	 */
217 	mp = vfs_mount_alloc(vp, vfsp);
218 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
219 
220 update:
221 	/* Ensure that the parent mountpoint does not get unmounted. */
222 	error = vfs_busy(vp->v_mount, VB_READ|VB_NOWAIT|VB_DUPOK);
223 	if (error) {
224 		if (mp->mnt_flag & MNT_UPDATE) {
225 			mp->mnt_flag = mntflag;
226 			vfs_unbusy(mp);
227 		} else {
228 			vfs_unbusy(mp);
229 			vfs_mount_free(mp);
230 		}
231 		vput(vp);
232 		goto fail;
233 	}
234 
235 	/*
236 	 * Set the mount level flags.
237 	 */
238 	if (flags & MNT_RDONLY)
239 		mp->mnt_flag |= MNT_RDONLY;
240 	else if (mp->mnt_flag & MNT_RDONLY)
241 		mp->mnt_flag |= MNT_WANTRDWR;
242 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED | MNT_NODEV |
243 	    MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | MNT_NOPERM | MNT_FORCE);
244 	mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_WXALLOWED |
245 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | MNT_NOPERM |
246 	    MNT_FORCE);
247 	/*
248 	 * Mount the filesystem.
249 	 */
250 	error = VFS_MOUNT(mp, fspath, args, &nd, p);
251 	if (!error) {
252 		mp->mnt_stat.f_ctime = gettime();
253 	}
254 	if (mp->mnt_flag & MNT_UPDATE) {
255 		vfs_unbusy(vp->v_mount);
256 		vput(vp);
257 		if (mp->mnt_flag & MNT_WANTRDWR)
258 			mp->mnt_flag &= ~MNT_RDONLY;
259 		mp->mnt_flag &= ~MNT_OP_FLAGS;
260 		if (error)
261 			mp->mnt_flag = mntflag;
262 
263 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
264 			if (mp->mnt_syncer == NULL)
265 				error = vfs_allocate_syncvnode(mp);
266 		} else {
267 			if (mp->mnt_syncer != NULL)
268 				vgone(mp->mnt_syncer);
269 			mp->mnt_syncer = NULL;
270 		}
271 
272 		vfs_unbusy(mp);
273 		goto fail;
274 	}
275 
276 	mp->mnt_flag &= ~MNT_OP_FLAGS;
277 	vp->v_mountedhere = mp;
278 
279 	/*
280 	 * Put the new filesystem on the mount list after root.
281 	 */
282 	cache_purge(vp);
283 	if (!error) {
284 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
285 		checkdirs(vp);
286 		vfs_unbusy(vp->v_mount);
287 		VOP_UNLOCK(vp);
288 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
289 			error = vfs_allocate_syncvnode(mp);
290 		vfs_unbusy(mp);
291 		(void) VFS_STATFS(mp, &mp->mnt_stat, p);
292 		if ((error = VFS_START(mp, 0, p)) != 0)
293 			vrele(vp);
294 	} else {
295 		mp->mnt_vnodecovered->v_mountedhere = NULL;
296 		vfs_unbusy(mp);
297 		vfs_mount_free(mp);
298 		vfs_unbusy(vp->v_mount);
299 		vput(vp);
300 	}
301 fail:
302 	if (args)
303 		free(args, M_TEMP, vfsp->vfc_datasize);
304 	return (error);
305 }
306 
307 /*
308  * Scan all active processes to see if any of them have a current
309  * or root directory onto which the new filesystem has just been
310  * mounted. If so, replace them with the new mount point, keeping
311  * track of how many were replaced.  That's the number of references
312  * the old vnode had that we've replaced, so finish by vrele()'ing
313  * it that many times.  This puts off any possible sleeping until
314  * we've finished walking the allprocess list.
315  */
316 void
checkdirs(struct vnode * olddp)317 checkdirs(struct vnode *olddp)
318 {
319 	struct filedesc *fdp;
320 	struct vnode *newdp;
321 	struct process *pr;
322 	u_int  free_count = 0;
323 
324 	if (olddp->v_usecount == 1)
325 		return;
326 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
327 		panic("mount: lost mount");
328 	LIST_FOREACH(pr, &allprocess, ps_list) {
329 		fdp = pr->ps_fd;
330 		if (fdp->fd_cdir == olddp) {
331 			free_count++;
332 			vref(newdp);
333 			fdp->fd_cdir = newdp;
334 		}
335 		if (fdp->fd_rdir == olddp) {
336 			free_count++;
337 			vref(newdp);
338 			fdp->fd_rdir = newdp;
339 		}
340 	}
341 	if (rootvnode == olddp) {
342 		free_count++;
343 		vref(newdp);
344 		rootvnode = newdp;
345 	}
346 	while (free_count-- > 0)
347 		vrele(olddp);
348 	vput(newdp);
349 }
350 
351 /*
352  * Unmount a file system.
353  *
354  * Note: unmount takes a path to the vnode mounted on as argument,
355  * not special file (as before).
356  */
357 int
sys_unmount(struct proc * p,void * v,register_t * retval)358 sys_unmount(struct proc *p, void *v, register_t *retval)
359 {
360 	struct sys_unmount_args /* {
361 		syscallarg(const char *) path;
362 		syscallarg(int) flags;
363 	} */ *uap = v;
364 	struct vnode *vp;
365 	struct mount *mp;
366 	int error;
367 	struct nameidata nd;
368 
369 	if ((error = suser(p)) != 0)
370 		return (error);
371 
372 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
373 	    SCARG(uap, path), p);
374 	if ((error = namei(&nd)) != 0)
375 		return (error);
376 	vp = nd.ni_vp;
377 	mp = vp->v_mount;
378 
379 	/*
380 	 * Don't allow unmounting the root file system.
381 	 */
382 	if (mp->mnt_flag & MNT_ROOTFS) {
383 		vput(vp);
384 		return (EINVAL);
385 	}
386 
387 	/*
388 	 * Must be the root of the filesystem
389 	 */
390 	if ((vp->v_flag & VROOT) == 0) {
391 		vput(vp);
392 		return (EINVAL);
393 	}
394 	vput(vp);
395 
396 	if (vfs_busy(mp, VB_WRITE|VB_WAIT))
397 		return (EBUSY);
398 
399 	return (dounmount(mp, SCARG(uap, flags) & MNT_FORCE, p));
400 }
401 
402 /*
403  * Do the actual file system unmount.
404  */
405 int
dounmount(struct mount * mp,int flags,struct proc * p)406 dounmount(struct mount *mp, int flags, struct proc *p)
407 {
408 	SLIST_HEAD(, mount) mplist;
409 	struct mount *nmp;
410 	int error;
411 
412 	SLIST_INIT(&mplist);
413 	SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
414 
415 	/*
416 	 * Collect nested mount points. This takes advantage of the mount list
417 	 * being ordered - nested mount points come after their parent.
418 	 */
419 	while ((mp = TAILQ_NEXT(mp, mnt_list)) != NULL) {
420 		SLIST_FOREACH(nmp, &mplist, mnt_dounmount) {
421 			if (mp->mnt_vnodecovered == NULLVP ||
422 			    mp->mnt_vnodecovered->v_mount != nmp)
423 				continue;
424 
425 			if ((flags & MNT_FORCE) == 0) {
426 				error = EBUSY;
427 				goto err;
428 			}
429 			error = vfs_busy(mp, VB_WRITE|VB_WAIT|VB_DUPOK);
430 			if (error) {
431 				if ((flags & MNT_DOOMED)) {
432 					/*
433 					 * If the mount point was busy due to
434 					 * being unmounted, it has been removed
435 					 * from the mount list already.
436 					 * Restart the iteration from the last
437 					 * collected busy entry.
438 					 */
439 					mp = SLIST_FIRST(&mplist);
440 					break;
441 				}
442 				goto err;
443 			}
444 			SLIST_INSERT_HEAD(&mplist, mp, mnt_dounmount);
445 			break;
446 		}
447 	}
448 
449 	/*
450 	 * Nested mount points cannot appear during this loop as mounting
451 	 * requires a read lock for the parent mount point.
452 	 */
453 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
454 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
455 		error = dounmount_leaf(mp, flags, p);
456 		if (error)
457 			goto err;
458 	}
459 	return (0);
460 
461 err:
462 	while ((mp = SLIST_FIRST(&mplist)) != NULL) {
463 		SLIST_REMOVE(&mplist, mp, mount, mnt_dounmount);
464 		vfs_unbusy(mp);
465 	}
466 	return (error);
467 }
468 
469 int
dounmount_leaf(struct mount * mp,int flags,struct proc * p)470 dounmount_leaf(struct mount *mp, int flags, struct proc *p)
471 {
472 	struct vnode *coveredvp;
473 	struct vnode *vp, *nvp;
474 	int error;
475 	int hadsyncer = 0;
476 
477 	mp->mnt_flag &=~ MNT_ASYNC;
478 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
479 	if (mp->mnt_syncer != NULL) {
480 		hadsyncer = 1;
481 		vgone(mp->mnt_syncer);
482 		mp->mnt_syncer = NULL;
483 	}
484 
485 	/*
486 	 * Before calling file system unmount, make sure
487 	 * all unveils to vnodes in here are dropped.
488 	 */
489 	TAILQ_FOREACH_SAFE(vp , &mp->mnt_vnodelist, v_mntvnodes, nvp) {
490 		unveil_removevnode(vp);
491 	}
492 
493 	if (((mp->mnt_flag & MNT_RDONLY) ||
494 	    (error = VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p)) == 0) ||
495 	    (flags & MNT_FORCE))
496 		error = VFS_UNMOUNT(mp, flags, p);
497 
498 	if (error && !(flags & MNT_DOOMED)) {
499 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && hadsyncer)
500 			(void) vfs_allocate_syncvnode(mp);
501 		vfs_unbusy(mp);
502 		return (error);
503 	}
504 
505 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
506 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
507 		coveredvp->v_mountedhere = NULL;
508 		vrele(coveredvp);
509 	}
510 
511 	if (!TAILQ_EMPTY(&mp->mnt_vnodelist))
512 		panic("unmount: dangling vnode");
513 
514 	vfs_unbusy(mp);
515 	vfs_mount_free(mp);
516 
517 	return (0);
518 }
519 
520 /*
521  * Sync each mounted filesystem.
522  */
523 int
sys_sync(struct proc * p,void * v,register_t * retval)524 sys_sync(struct proc *p, void *v, register_t *retval)
525 {
526 	struct mount *mp;
527 	int asyncflag;
528 
529 	TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
530 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
531 			continue;
532 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
533 			asyncflag = mp->mnt_flag & MNT_ASYNC;
534 			mp->mnt_flag &= ~MNT_ASYNC;
535 			uvm_vnp_sync(mp);
536 			VFS_SYNC(mp, MNT_NOWAIT, 0, p->p_ucred, p);
537 			if (asyncflag)
538 				mp->mnt_flag |= MNT_ASYNC;
539 		}
540 		vfs_unbusy(mp);
541 	}
542 
543 	return (0);
544 }
545 
546 /*
547  * Change filesystem quotas.
548  */
549 int
sys_quotactl(struct proc * p,void * v,register_t * retval)550 sys_quotactl(struct proc *p, void *v, register_t *retval)
551 {
552 	struct sys_quotactl_args /* {
553 		syscallarg(const char *) path;
554 		syscallarg(int) cmd;
555 		syscallarg(int) uid;
556 		syscallarg(char *) arg;
557 	} */ *uap = v;
558 	struct mount *mp;
559 	int error;
560 	struct nameidata nd;
561 
562 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
563 	if ((error = namei(&nd)) != 0)
564 		return (error);
565 	mp = nd.ni_vp->v_mount;
566 	vrele(nd.ni_vp);
567 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
568 	    SCARG(uap, arg), p));
569 }
570 
571 int
copyout_statfs(struct statfs * sp,void * uaddr,struct proc * p)572 copyout_statfs(struct statfs *sp, void *uaddr, struct proc *p)
573 {
574 	size_t co_sz1 = offsetof(struct statfs, f_fsid);
575 	size_t co_off2 = co_sz1 + sizeof(fsid_t);
576 	size_t co_sz2 = sizeof(struct statfs) - co_off2;
577 	char *s, *d;
578 	int error;
579 
580 	/* Don't let non-root see filesystem id (for NFS security) */
581 	if (suser(p)) {
582 		fsid_t fsid;
583 
584 		s = (char *)sp;
585 		d = (char *)uaddr;
586 
587 		memset(&fsid, 0, sizeof(fsid));
588 
589 		if ((error = copyout(s, d, co_sz1)) != 0)
590 			return (error);
591 		if ((error = copyout(&fsid, d + co_sz1, sizeof(fsid))) != 0)
592 			return (error);
593 		return (copyout(s + co_off2, d + co_off2, co_sz2));
594 	}
595 
596 	return (copyout(sp, uaddr, sizeof(*sp)));
597 }
598 
599 /*
600  * Get filesystem statistics.
601  */
602 int
sys_statfs(struct proc * p,void * v,register_t * retval)603 sys_statfs(struct proc *p, void *v, register_t *retval)
604 {
605 	struct sys_statfs_args /* {
606 		syscallarg(const char *) path;
607 		syscallarg(struct statfs *) buf;
608 	} */ *uap = v;
609 	struct mount *mp;
610 	struct statfs *sp;
611 	int error;
612 	struct nameidata nd;
613 
614 	NDINIT(&nd, LOOKUP, FOLLOW | BYPASSUNVEIL, UIO_USERSPACE,
615 	    SCARG(uap, path), p);
616 	nd.ni_pledge = PLEDGE_RPATH;
617 	nd.ni_unveil = UNVEIL_READ;
618 	if ((error = namei(&nd)) != 0)
619 		return (error);
620 	mp = nd.ni_vp->v_mount;
621 	sp = &mp->mnt_stat;
622 	vrele(nd.ni_vp);
623 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
624 		return (error);
625 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
626 
627 	return (copyout_statfs(sp, SCARG(uap, buf), p));
628 }
629 
630 /*
631  * Get filesystem statistics.
632  */
633 int
sys_fstatfs(struct proc * p,void * v,register_t * retval)634 sys_fstatfs(struct proc *p, void *v, register_t *retval)
635 {
636 	struct sys_fstatfs_args /* {
637 		syscallarg(int) fd;
638 		syscallarg(struct statfs *) buf;
639 	} */ *uap = v;
640 	struct file *fp;
641 	struct mount *mp;
642 	struct statfs *sp;
643 	int error;
644 
645 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
646 		return (error);
647 	mp = ((struct vnode *)fp->f_data)->v_mount;
648 	if (!mp) {
649 		FRELE(fp, p);
650 		return (ENOENT);
651 	}
652 	sp = &mp->mnt_stat;
653 	error = VFS_STATFS(mp, sp, p);
654 	FRELE(fp, p);
655 	if (error)
656 		return (error);
657 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
658 
659 	return (copyout_statfs(sp, SCARG(uap, buf), p));
660 }
661 
662 /*
663  * Get statistics on all filesystems.
664  */
665 int
sys_getfsstat(struct proc * p,void * v,register_t * retval)666 sys_getfsstat(struct proc *p, void *v, register_t *retval)
667 {
668 	struct sys_getfsstat_args /* {
669 		syscallarg(struct statfs *) buf;
670 		syscallarg(size_t) bufsize;
671 		syscallarg(int) flags;
672 	} */ *uap = v;
673 	struct mount *mp;
674 	struct statfs *sp;
675 	struct statfs *sfsp;
676 	size_t count, maxcount;
677 	int error, flags = SCARG(uap, flags);
678 
679 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
680 	sfsp = SCARG(uap, buf);
681 	count = 0;
682 
683 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
684 		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
685 			continue;
686 		if (sfsp && count < maxcount) {
687 			sp = &mp->mnt_stat;
688 
689 			/* Refresh stats unless MNT_NOWAIT is specified */
690 			if (flags != MNT_NOWAIT &&
691 			    flags != MNT_LAZY &&
692 			    (flags == MNT_WAIT ||
693 			    flags == 0) &&
694 			    (error = VFS_STATFS(mp, sp, p))) {
695 				vfs_unbusy(mp);
696 				continue;
697 			}
698 
699 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
700 #if notyet
701 			if (mp->mnt_flag & MNT_SOFTDEP)
702 				sp->f_eflags = STATFS_SOFTUPD;
703 #endif
704 			error = (copyout_statfs(sp, sfsp, p));
705 			if (error) {
706 				vfs_unbusy(mp);
707 				return (error);
708 			}
709 			sfsp++;
710 		}
711 		count++;
712 		vfs_unbusy(mp);
713 	}
714 
715 	if (sfsp && count > maxcount)
716 		*retval = maxcount;
717 	else
718 		*retval = count;
719 
720 	return (0);
721 }
722 
723 /*
724  * Change current working directory to a given file descriptor.
725  */
726 int
sys_fchdir(struct proc * p,void * v,register_t * retval)727 sys_fchdir(struct proc *p, void *v, register_t *retval)
728 {
729 	struct sys_fchdir_args /* {
730 		syscallarg(int) fd;
731 	} */ *uap = v;
732 	struct filedesc *fdp = p->p_fd;
733 	struct vnode *vp, *tdp, *old_cdir;
734 	struct mount *mp;
735 	struct file *fp;
736 	int error;
737 
738 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
739 		return (EBADF);
740 	vp = fp->f_data;
741 	if (fp->f_type != DTYPE_VNODE || vp->v_type != VDIR) {
742 		FRELE(fp, p);
743 		return (ENOTDIR);
744 	}
745 	vref(vp);
746 	FRELE(fp, p);
747 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
748 	error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
749 
750 	while (!error && (mp = vp->v_mountedhere) != NULL) {
751 		if (vfs_busy(mp, VB_READ|VB_WAIT))
752 			continue;
753 		error = VFS_ROOT(mp, &tdp);
754 		vfs_unbusy(mp);
755 		if (error)
756 			break;
757 		vput(vp);
758 		vp = tdp;
759 	}
760 	if (error) {
761 		vput(vp);
762 		return (error);
763 	}
764 	VOP_UNLOCK(vp);
765 	old_cdir = fdp->fd_cdir;
766 	fdp->fd_cdir = vp;
767 	vrele(old_cdir);
768 	return (0);
769 }
770 
771 /*
772  * Change current working directory (``.'').
773  */
774 int
sys_chdir(struct proc * p,void * v,register_t * retval)775 sys_chdir(struct proc *p, void *v, register_t *retval)
776 {
777 	struct sys_chdir_args /* {
778 		syscallarg(const char *) path;
779 	} */ *uap = v;
780 	struct filedesc *fdp = p->p_fd;
781 	struct vnode *old_cdir;
782 	int error;
783 	struct nameidata nd;
784 
785 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
786 	    SCARG(uap, path), p);
787 	nd.ni_pledge = PLEDGE_RPATH;
788 	nd.ni_unveil = UNVEIL_READ;
789 	if ((error = change_dir(&nd, p)) != 0)
790 		return (error);
791 	old_cdir = fdp->fd_cdir;
792 	fdp->fd_cdir = nd.ni_vp;
793 	vrele(old_cdir);
794 	return (0);
795 }
796 
797 /*
798  * Change notion of root (``/'') directory.
799  */
800 int
sys_chroot(struct proc * p,void * v,register_t * retval)801 sys_chroot(struct proc *p, void *v, register_t *retval)
802 {
803 	struct sys_chroot_args /* {
804 		syscallarg(const char *) path;
805 	} */ *uap = v;
806 	struct filedesc *fdp = p->p_fd;
807 	struct vnode *old_cdir, *old_rdir;
808 	int error;
809 	struct nameidata nd;
810 
811 	if ((error = suser(p)) != 0)
812 		return (error);
813 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
814 	    SCARG(uap, path), p);
815 	if ((error = change_dir(&nd, p)) != 0)
816 		return (error);
817 	if (fdp->fd_rdir != NULL) {
818 		/*
819 		 * A chroot() done inside a changed root environment does
820 		 * an automatic chdir to avoid the out-of-tree experience.
821 		 */
822 		vref(nd.ni_vp);
823 		old_rdir = fdp->fd_rdir;
824 		old_cdir = fdp->fd_cdir;
825 		fdp->fd_rdir = fdp->fd_cdir = nd.ni_vp;
826 		vrele(old_rdir);
827 		vrele(old_cdir);
828 	} else
829 		fdp->fd_rdir = nd.ni_vp;
830 	atomic_setbits_int(&p->p_p->ps_flags, PS_CHROOT);
831 	return (0);
832 }
833 
834 /*
835  * Common routine for chroot and chdir.
836  */
837 static int
change_dir(struct nameidata * ndp,struct proc * p)838 change_dir(struct nameidata *ndp, struct proc *p)
839 {
840 	struct vnode *vp;
841 	int error;
842 
843 	if ((error = namei(ndp)) != 0)
844 		return (error);
845 	vp = ndp->ni_vp;
846 	if (vp->v_type != VDIR)
847 		error = ENOTDIR;
848 	else
849 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
850 	if (error)
851 		vput(vp);
852 	else
853 		VOP_UNLOCK(vp);
854 	return (error);
855 }
856 
857 int
sys___realpath(struct proc * p,void * v,register_t * retval)858 sys___realpath(struct proc *p, void *v, register_t *retval)
859 {
860 	struct sys___realpath_args /* {
861 		syscallarg(const char *) pathname;
862 		syscallarg(char *) resolved;
863 	} */ *uap = v;
864 	char *pathname;
865 	char *rpbuf;
866 	struct nameidata nd;
867 	size_t pathlen;
868 	int error = 0;
869 
870 	if (SCARG(uap, pathname) == NULL)
871 		return (EINVAL);
872 
873 	pathname = pool_get(&namei_pool, PR_WAITOK);
874 	rpbuf = pool_get(&namei_pool, PR_WAITOK);
875 
876 	if ((error = copyinstr(SCARG(uap, pathname), pathname, MAXPATHLEN,
877 	    &pathlen)))
878 		goto end;
879 
880 	if (pathlen == 1) { /* empty string "" */
881 		error = ENOENT;
882 		goto end;
883 	}
884 	if (pathlen < 2) {
885 		error = EINVAL;
886 		goto end;
887 	}
888 
889 	/* Get cwd for relative path if needed, prepend to rpbuf */
890 	rpbuf[0] = '\0';
891 	if (pathname[0] != '/') {
892 		int cwdlen = MAXPATHLEN * 4; /* for vfs_getcwd_common */
893 		char *cwdbuf, *bp;
894 
895 		cwdbuf = malloc(cwdlen, M_TEMP, M_WAITOK);
896 
897 		/* vfs_getcwd_common fills this in backwards */
898 		bp = &cwdbuf[cwdlen - 1];
899 		*bp = '\0';
900 
901 		error = vfs_getcwd_common(p->p_fd->fd_cdir, NULL, &bp, cwdbuf,
902 		    cwdlen/2, GETCWD_CHECK_ACCESS, p);
903 
904 		if (error) {
905 			free(cwdbuf, M_TEMP, cwdlen);
906 			goto end;
907 		}
908 
909 		if (strlcpy(rpbuf, bp, MAXPATHLEN) >= MAXPATHLEN) {
910 			free(cwdbuf, M_TEMP, cwdlen);
911 			error = ENAMETOOLONG;
912 			goto end;
913 		}
914 
915 		free(cwdbuf, M_TEMP, cwdlen);
916 	}
917 
918 	NDINIT(&nd, LOOKUP, FOLLOW | SAVENAME | REALPATH, UIO_SYSSPACE,
919 	    pathname, p);
920 
921 	nd.ni_cnd.cn_rpbuf = rpbuf;
922 	nd.ni_cnd.cn_rpi = strlen(rpbuf);
923 
924 	nd.ni_pledge = PLEDGE_RPATH;
925 	nd.ni_unveil = UNVEIL_READ;
926 	if ((error = namei(&nd)) != 0)
927 		goto end;
928 
929 	/* release reference from namei */
930 	if (nd.ni_vp)
931 		vrele(nd.ni_vp);
932 
933 	error = copyoutstr(nd.ni_cnd.cn_rpbuf, SCARG(uap, resolved),
934 	    MAXPATHLEN, NULL);
935 
936 #ifdef KTRACE
937 	if (KTRPOINT(p, KTR_NAMEI))
938 		ktrnamei(p, nd.ni_cnd.cn_rpbuf);
939 #endif
940 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
941 end:
942 	pool_put(&namei_pool, rpbuf);
943 	pool_put(&namei_pool, pathname);
944 	return (error);
945 }
946 
947 int
sys_unveil(struct proc * p,void * v,register_t * retval)948 sys_unveil(struct proc *p, void *v, register_t *retval)
949 {
950 	struct sys_unveil_args /* {
951 		syscallarg(const char *) path;
952 		syscallarg(const char *) permissions;
953 	} */ *uap = v;
954 	struct process *pr = p->p_p;
955 	char *pathname, *c;
956 	struct nameidata nd;
957 	size_t pathlen;
958 	char permissions[5];
959 	int error, allow;
960 
961 	if (SCARG(uap, path) == NULL && SCARG(uap, permissions) == NULL) {
962 		pr->ps_uvdone = 1;
963 		return (0);
964 	}
965 
966 	if (pr->ps_uvdone != 0)
967 		return EPERM;
968 
969 	error = copyinstr(SCARG(uap, permissions), permissions,
970 	    sizeof(permissions), NULL);
971 	if (error)
972 		return (error);
973 	pathname = pool_get(&namei_pool, PR_WAITOK);
974 	error = copyinstr(SCARG(uap, path), pathname, MAXPATHLEN, &pathlen);
975 	if (error)
976 		goto end;
977 
978 #ifdef KTRACE
979 	if (KTRPOINT(p, KTR_STRUCT))
980 		ktrstruct(p, "unveil", permissions, strlen(permissions));
981 #endif
982 	if (pathlen < 2) {
983 		error = EINVAL;
984 		goto end;
985 	}
986 
987 	/* find root "/" or "//" */
988 	for (c = pathname; *c != '\0'; c++) {
989 		if (*c != '/')
990 			break;
991 	}
992 	if (*c == '\0')
993 		/* root directory */
994 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SAVENAME,
995 		    UIO_SYSSPACE, pathname, p);
996 	else
997 		NDINIT(&nd, CREATE, FOLLOW | LOCKLEAF | LOCKPARENT | SAVENAME,
998 		    UIO_SYSSPACE, pathname, p);
999 
1000 	nd.ni_pledge = PLEDGE_UNVEIL;
1001 	if ((error = namei(&nd)) != 0)
1002 		goto end;
1003 
1004 	/*
1005 	 * XXX Any access to the file or directory will allow us to
1006 	 * pledge path it
1007 	 */
1008 	allow = ((nd.ni_vp &&
1009 	    (VOP_ACCESS(nd.ni_vp, VREAD, p->p_ucred, p) == 0 ||
1010 	    VOP_ACCESS(nd.ni_vp, VWRITE, p->p_ucred, p) == 0 ||
1011 	    VOP_ACCESS(nd.ni_vp, VEXEC, p->p_ucred, p) == 0)) ||
1012 	    (nd.ni_dvp &&
1013 	    (VOP_ACCESS(nd.ni_dvp, VREAD, p->p_ucred, p) == 0 ||
1014 	    VOP_ACCESS(nd.ni_dvp, VWRITE, p->p_ucred, p) == 0 ||
1015 	    VOP_ACCESS(nd.ni_dvp, VEXEC, p->p_ucred, p) == 0)));
1016 
1017 	/* release lock from namei, but keep ref */
1018 	if (nd.ni_vp)
1019 		VOP_UNLOCK(nd.ni_vp);
1020 	if (nd.ni_dvp && nd.ni_dvp != nd.ni_vp)
1021 		VOP_UNLOCK(nd.ni_dvp);
1022 
1023 	if (allow)
1024 		error = unveil_add(p, &nd, permissions);
1025 	else
1026 		error = EPERM;
1027 
1028 	/* release vref from namei, but not vref from unveil_add */
1029 	if (nd.ni_vp)
1030 		vrele(nd.ni_vp);
1031 	if (nd.ni_dvp)
1032 		vrele(nd.ni_dvp);
1033 
1034 	pool_put(&namei_pool, nd.ni_cnd.cn_pnbuf);
1035 end:
1036 	pool_put(&namei_pool, pathname);
1037 
1038 	return (error);
1039 }
1040 
1041 /*
1042  * Check permissions, allocate an open file structure,
1043  * and call the device open routine if any.
1044  */
1045 int
sys_open(struct proc * p,void * v,register_t * retval)1046 sys_open(struct proc *p, void *v, register_t *retval)
1047 {
1048 	struct sys_open_args /* {
1049 		syscallarg(const char *) path;
1050 		syscallarg(int) flags;
1051 		syscallarg(mode_t) mode;
1052 	} */ *uap = v;
1053 
1054 	return (doopenat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, flags),
1055 	    SCARG(uap, mode), retval));
1056 }
1057 
1058 int
sys_openat(struct proc * p,void * v,register_t * retval)1059 sys_openat(struct proc *p, void *v, register_t *retval)
1060 {
1061 	struct sys_openat_args /* {
1062 		syscallarg(int) fd;
1063 		syscallarg(const char *) path;
1064 		syscallarg(int) flags;
1065 		syscallarg(mode_t) mode;
1066 	} */ *uap = v;
1067 
1068 	return (doopenat(p, SCARG(uap, fd), SCARG(uap, path),
1069 	    SCARG(uap, flags), SCARG(uap, mode), retval));
1070 }
1071 
1072 int
doopenat(struct proc * p,int fd,const char * path,int oflags,mode_t mode,register_t * retval)1073 doopenat(struct proc *p, int fd, const char *path, int oflags, mode_t mode,
1074     register_t *retval)
1075 {
1076 	struct filedesc *fdp = p->p_fd;
1077 	struct file *fp;
1078 	struct vnode *vp;
1079 	struct vattr vattr;
1080 	int flags, cloexec, cmode;
1081 	int type, indx, error, localtrunc = 0;
1082 	struct flock lf;
1083 	struct nameidata nd;
1084 	uint64_t ni_pledge = 0;
1085 	u_char ni_unveil = 0;
1086 
1087 	if (oflags & (O_EXLOCK | O_SHLOCK)) {
1088 		error = pledge_flock(p);
1089 		if (error != 0)
1090 			return (error);
1091 	}
1092 
1093 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1094 
1095 	fdplock(fdp);
1096 	if ((error = falloc(p, &fp, &indx)) != 0) {
1097 		fdpunlock(fdp);
1098 		return (error);
1099 	}
1100 	fdpunlock(fdp);
1101 
1102 	flags = FFLAGS(oflags);
1103 	if (flags & FREAD) {
1104 		ni_pledge |= PLEDGE_RPATH;
1105 		ni_unveil |= UNVEIL_READ;
1106 	}
1107 	if (flags & FWRITE) {
1108 		ni_pledge |= PLEDGE_WPATH;
1109 		ni_unveil |= UNVEIL_WRITE;
1110 	}
1111 	if (oflags & O_CREAT) {
1112 		ni_pledge |= PLEDGE_CPATH;
1113 		ni_unveil |= UNVEIL_CREATE;
1114 	}
1115 
1116 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1117 	if ((p->p_p->ps_flags & PS_PLEDGE))
1118 		cmode &= ACCESSPERMS;
1119 	NDINITAT(&nd, 0, 0, UIO_USERSPACE, fd, path, p);
1120 	nd.ni_pledge = ni_pledge;
1121 	nd.ni_unveil = ni_unveil;
1122 	p->p_dupfd = -1;			/* XXX check for fdopen */
1123 	if ((flags & O_TRUNC) && (flags & (O_EXLOCK | O_SHLOCK))) {
1124 		localtrunc = 1;
1125 		flags &= ~O_TRUNC;	/* Must do truncate ourselves */
1126 	}
1127 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1128 		fdplock(fdp);
1129 		if (error == ENODEV &&
1130 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1131 		    (error =
1132 			dupfdopen(p, indx, flags)) == 0) {
1133 			fdpunlock(fdp);
1134 			closef(fp, p);
1135 			*retval = indx;
1136 			return (error);
1137 		}
1138 		if (error == ERESTART)
1139 			error = EINTR;
1140 		fdremove(fdp, indx);
1141 		fdpunlock(fdp);
1142 		closef(fp, p);
1143 		return (error);
1144 	}
1145 	p->p_dupfd = 0;
1146 	vp = nd.ni_vp;
1147 	fp->f_flag = flags & FMASK;
1148 	fp->f_type = DTYPE_VNODE;
1149 	fp->f_ops = &vnops;
1150 	fp->f_data = vp;
1151 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1152 		lf.l_whence = SEEK_SET;
1153 		lf.l_start = 0;
1154 		lf.l_len = 0;
1155 		if (flags & O_EXLOCK)
1156 			lf.l_type = F_WRLCK;
1157 		else
1158 			lf.l_type = F_RDLCK;
1159 		type = F_FLOCK;
1160 		if ((flags & FNONBLOCK) == 0)
1161 			type |= F_WAIT;
1162 		VOP_UNLOCK(vp);
1163 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1164 		if (error) {
1165 			fdplock(fdp);
1166 			/* closef will vn_close the file for us. */
1167 			fdremove(fdp, indx);
1168 			fdpunlock(fdp);
1169 			closef(fp, p);
1170 			return (error);
1171 		}
1172 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1173 		atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK);
1174 	}
1175 	if (localtrunc) {
1176 		if ((fp->f_flag & FWRITE) == 0)
1177 			error = EACCES;
1178 		else if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
1179 			error = EROFS;
1180 		else if (vp->v_type == VDIR)
1181 			error = EISDIR;
1182 		else if ((error = vn_writechk(vp)) == 0) {
1183 			VATTR_NULL(&vattr);
1184 			vattr.va_size = 0;
1185 			error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
1186 		}
1187 		if (error) {
1188 			VOP_UNLOCK(vp);
1189 			fdplock(fdp);
1190 			/* closef will close the file for us. */
1191 			fdremove(fdp, indx);
1192 			fdpunlock(fdp);
1193 			closef(fp, p);
1194 			return (error);
1195 		}
1196 	}
1197 	VOP_UNLOCK(vp);
1198 	*retval = indx;
1199 	fdplock(fdp);
1200 	fdinsert(fdp, indx, cloexec, fp);
1201 	fdpunlock(fdp);
1202 	FRELE(fp, p);
1203 	return (error);
1204 }
1205 
1206 /*
1207  * Open a new created file (in /tmp) suitable for mmaping.
1208  */
1209 int
sys___tmpfd(struct proc * p,void * v,register_t * retval)1210 sys___tmpfd(struct proc *p, void *v, register_t *retval)
1211 {
1212 	struct sys___tmpfd_args /* {
1213 		syscallarg(int) flags;
1214 	} */ *uap = v;
1215 	struct filedesc *fdp = p->p_fd;
1216 	struct file *fp;
1217 	struct vnode *vp;
1218 	int oflags = SCARG(uap, flags);
1219 	int flags, cloexec, cmode;
1220 	int indx, error;
1221 	unsigned int i;
1222 	struct nameidata nd;
1223 	char path[64];
1224 	static const char *letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-";
1225 
1226 	/* most flags are hardwired */
1227 	oflags = O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW | (oflags & O_CLOEXEC);
1228 
1229 	cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1230 
1231 	fdplock(fdp);
1232 	if ((error = falloc(p, &fp, &indx)) != 0) {
1233 		fdpunlock(fdp);
1234 		return (error);
1235 	}
1236 	fdpunlock(fdp);
1237 
1238 	flags = FFLAGS(oflags);
1239 
1240 	arc4random_buf(path, sizeof(path));
1241 	memcpy(path, "/tmp/", 5);
1242 	for (i = 5; i < sizeof(path) - 1; i++)
1243 		path[i] = letters[(unsigned char)path[i] & 63];
1244 	path[sizeof(path)-1] = 0;
1245 
1246 	cmode = 0600;
1247 	NDINITAT(&nd, 0, KERNELPATH, UIO_SYSSPACE, AT_FDCWD, path, p);
1248 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
1249 		if (error == ERESTART)
1250 			error = EINTR;
1251 		fdplock(fdp);
1252 		fdremove(fdp, indx);
1253 		fdpunlock(fdp);
1254 		closef(fp, p);
1255 		return (error);
1256 	}
1257 	vp = nd.ni_vp;
1258 	fp->f_flag = flags & FMASK;
1259 	fp->f_type = DTYPE_VNODE;
1260 	fp->f_ops = &vnops;
1261 	fp->f_data = vp;
1262 	VOP_UNLOCK(vp);
1263 	*retval = indx;
1264 	fdplock(fdp);
1265 	fdinsert(fdp, indx, cloexec, fp);
1266 	fdpunlock(fdp);
1267 	FRELE(fp, p);
1268 
1269 	/* unlink it */
1270 	/* XXX
1271 	 * there is a wee race here, although it is mostly inconsequential.
1272 	 * perhaps someday we can create a file like object without a name...
1273 	 */
1274 	NDINITAT(&nd, DELETE, KERNELPATH | LOCKPARENT | LOCKLEAF, UIO_SYSSPACE,
1275 	    AT_FDCWD, path, p);
1276 	if ((error = namei(&nd)) != 0) {
1277 		printf("can't unlink temp file! %d\n", error);
1278 		error = 0;
1279 	} else {
1280 		vp = nd.ni_vp;
1281 		uvm_vnp_uncache(vp);
1282 		error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1283 		if (error) {
1284 			printf("error removing vop: %d\n", error);
1285 			error = 0;
1286 		}
1287 	}
1288 
1289 	return (error);
1290 }
1291 
1292 /*
1293  * Get file handle system call
1294  */
1295 int
sys_getfh(struct proc * p,void * v,register_t * retval)1296 sys_getfh(struct proc *p, void *v, register_t *retval)
1297 {
1298 	struct sys_getfh_args /* {
1299 		syscallarg(const char *) fname;
1300 		syscallarg(fhandle_t *) fhp;
1301 	} */ *uap = v;
1302 	struct vnode *vp;
1303 	fhandle_t fh;
1304 	int error;
1305 	struct nameidata nd;
1306 
1307 	/*
1308 	 * Must be super user
1309 	 */
1310 	error = suser(p);
1311 	if (error)
1312 		return (error);
1313 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1314 	    SCARG(uap, fname), p);
1315 	error = namei(&nd);
1316 	if (error)
1317 		return (error);
1318 	vp = nd.ni_vp;
1319 	memset(&fh, 0, sizeof(fh));
1320 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1321 	error = VFS_VPTOFH(vp, &fh.fh_fid);
1322 	vput(vp);
1323 	if (error)
1324 		return (error);
1325 	error = copyout(&fh, SCARG(uap, fhp), sizeof(fh));
1326 	return (error);
1327 }
1328 
1329 /*
1330  * Open a file given a file handle.
1331  *
1332  * Check permissions, allocate an open file structure,
1333  * and call the device open routine if any.
1334  */
1335 int
sys_fhopen(struct proc * p,void * v,register_t * retval)1336 sys_fhopen(struct proc *p, void *v, register_t *retval)
1337 {
1338 	struct sys_fhopen_args /* {
1339 		syscallarg(const fhandle_t *) fhp;
1340 		syscallarg(int) flags;
1341 	} */ *uap = v;
1342 	struct filedesc *fdp = p->p_fd;
1343 	struct file *fp;
1344 	struct vnode *vp = NULL;
1345 	struct mount *mp;
1346 	struct ucred *cred = p->p_ucred;
1347 	int flags, cloexec;
1348 	int type, indx, error=0;
1349 	struct flock lf;
1350 	struct vattr va;
1351 	fhandle_t fh;
1352 
1353 	/*
1354 	 * Must be super user
1355 	 */
1356 	if ((error = suser(p)))
1357 		return (error);
1358 
1359 	flags = FFLAGS(SCARG(uap, flags));
1360 	if ((flags & (FREAD | FWRITE)) == 0)
1361 		return (EINVAL);
1362 	if ((flags & O_CREAT))
1363 		return (EINVAL);
1364 
1365 	cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
1366 
1367 	fdplock(fdp);
1368 	if ((error = falloc(p, &fp, &indx)) != 0) {
1369 		fdpunlock(fdp);
1370 		fp = NULL;
1371 		goto bad;
1372 	}
1373 	fdpunlock(fdp);
1374 
1375 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1376 		goto bad;
1377 
1378 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
1379 		error = ESTALE;
1380 		goto bad;
1381 	}
1382 
1383 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)) != 0) {
1384 		vp = NULL;	/* most likely unnecessary sanity for bad: */
1385 		goto bad;
1386 	}
1387 
1388 	/* Now do an effective vn_open */
1389 
1390 	if (vp->v_type == VSOCK) {
1391 		error = EOPNOTSUPP;
1392 		goto bad;
1393 	}
1394 	if ((flags & O_DIRECTORY) && vp->v_type != VDIR) {
1395 		error = ENOTDIR;
1396 		goto bad;
1397 	}
1398 	if (flags & FREAD) {
1399 		if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
1400 			goto bad;
1401 	}
1402 	if (flags & (FWRITE | O_TRUNC)) {
1403 		if (vp->v_type == VDIR) {
1404 			error = EISDIR;
1405 			goto bad;
1406 		}
1407 		if ((error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0 ||
1408 		    (error = vn_writechk(vp)) != 0)
1409 			goto bad;
1410 	}
1411 	if (flags & O_TRUNC) {
1412 		VATTR_NULL(&va);
1413 		va.va_size = 0;
1414 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
1415 			goto bad;
1416 	}
1417 	if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
1418 		goto bad;
1419 	if (flags & FWRITE)
1420 		vp->v_writecount++;
1421 
1422 	/* done with modified vn_open, now finish what sys_open does. */
1423 
1424 	fp->f_flag = flags & FMASK;
1425 	fp->f_type = DTYPE_VNODE;
1426 	fp->f_ops = &vnops;
1427 	fp->f_data = vp;
1428 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1429 		lf.l_whence = SEEK_SET;
1430 		lf.l_start = 0;
1431 		lf.l_len = 0;
1432 		if (flags & O_EXLOCK)
1433 			lf.l_type = F_WRLCK;
1434 		else
1435 			lf.l_type = F_RDLCK;
1436 		type = F_FLOCK;
1437 		if ((flags & FNONBLOCK) == 0)
1438 			type |= F_WAIT;
1439 		VOP_UNLOCK(vp);
1440 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
1441 		if (error) {
1442 			vp = NULL;	/* closef will vn_close the file */
1443 			goto bad;
1444 		}
1445 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1446 		atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK);
1447 	}
1448 	VOP_UNLOCK(vp);
1449 	*retval = indx;
1450 	fdplock(fdp);
1451 	fdinsert(fdp, indx, cloexec, fp);
1452 	fdpunlock(fdp);
1453 	FRELE(fp, p);
1454 	return (0);
1455 
1456 bad:
1457 	if (fp) {
1458 		fdplock(fdp);
1459 		fdremove(fdp, indx);
1460 		fdpunlock(fdp);
1461 		closef(fp, p);
1462 		if (vp != NULL)
1463 			vput(vp);
1464 	}
1465 	return (error);
1466 }
1467 
1468 int
sys_fhstat(struct proc * p,void * v,register_t * retval)1469 sys_fhstat(struct proc *p, void *v, register_t *retval)
1470 {
1471 	struct sys_fhstat_args /* {
1472 		syscallarg(const fhandle_t *) fhp;
1473 		syscallarg(struct stat *) sb;
1474 	} */ *uap = v;
1475 	struct stat sb;
1476 	int error;
1477 	fhandle_t fh;
1478 	struct mount *mp;
1479 	struct vnode *vp;
1480 
1481 	/*
1482 	 * Must be super user
1483 	 */
1484 	if ((error = suser(p)))
1485 		return (error);
1486 
1487 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1488 		return (error);
1489 
1490 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1491 		return (ESTALE);
1492 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1493 		return (error);
1494 	error = vn_stat(vp, &sb, p);
1495 	vput(vp);
1496 	if (error)
1497 		return (error);
1498 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1499 	return (error);
1500 }
1501 
1502 int
sys_fhstatfs(struct proc * p,void * v,register_t * retval)1503 sys_fhstatfs(struct proc *p, void *v, register_t *retval)
1504 {
1505 	struct sys_fhstatfs_args /* {
1506 		syscallarg(const fhandle_t *) fhp;
1507 		syscallarg(struct statfs *) buf;
1508 	} */ *uap = v;
1509 	struct statfs *sp;
1510 	fhandle_t fh;
1511 	struct mount *mp;
1512 	struct vnode *vp;
1513 	int error;
1514 
1515 	/*
1516 	 * Must be super user
1517 	 */
1518 	if ((error = suser(p)))
1519 		return (error);
1520 
1521 	if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
1522 		return (error);
1523 
1524 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
1525 		return (ESTALE);
1526 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
1527 		return (error);
1528 	mp = vp->v_mount;
1529 	sp = &mp->mnt_stat;
1530 	vput(vp);
1531 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
1532 		return (error);
1533 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1534 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
1535 }
1536 
1537 /*
1538  * Create a special file or named pipe.
1539  */
1540 int
sys_mknod(struct proc * p,void * v,register_t * retval)1541 sys_mknod(struct proc *p, void *v, register_t *retval)
1542 {
1543 	struct sys_mknod_args /* {
1544 		syscallarg(const char *) path;
1545 		syscallarg(mode_t) mode;
1546 		syscallarg(int) dev;
1547 	} */ *uap = v;
1548 
1549 	return (domknodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
1550 	    SCARG(uap, dev)));
1551 }
1552 
1553 int
sys_mknodat(struct proc * p,void * v,register_t * retval)1554 sys_mknodat(struct proc *p, void *v, register_t *retval)
1555 {
1556 	struct sys_mknodat_args /* {
1557 		syscallarg(int) fd;
1558 		syscallarg(const char *) path;
1559 		syscallarg(mode_t) mode;
1560 		syscallarg(dev_t) dev;
1561 	} */ *uap = v;
1562 
1563 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1564 	    SCARG(uap, mode), SCARG(uap, dev)));
1565 }
1566 
1567 int
domknodat(struct proc * p,int fd,const char * path,mode_t mode,dev_t dev)1568 domknodat(struct proc *p, int fd, const char *path, mode_t mode, dev_t dev)
1569 {
1570 	struct vnode *vp;
1571 	struct vattr vattr;
1572 	int error;
1573 	struct nameidata nd;
1574 
1575 	if (dev == VNOVAL)
1576 		return (EINVAL);
1577 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, path, p);
1578 	nd.ni_pledge = PLEDGE_DPATH;
1579 	nd.ni_unveil = UNVEIL_CREATE;
1580 	if ((error = namei(&nd)) != 0)
1581 		return (error);
1582 	vp = nd.ni_vp;
1583 	if (!S_ISFIFO(mode) || dev != 0) {
1584 		if (!vnoperm(nd.ni_dvp) && (error = suser(p)) != 0)
1585 			goto out;
1586 		if (p->p_fd->fd_rdir) {
1587 			error = EINVAL;
1588 			goto out;
1589 		}
1590 	}
1591 	if (vp != NULL)
1592 		error = EEXIST;
1593 	else {
1594 		VATTR_NULL(&vattr);
1595 		vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
1596 		if ((p->p_p->ps_flags & PS_PLEDGE))
1597 			vattr.va_mode &= ACCESSPERMS;
1598 		vattr.va_rdev = dev;
1599 
1600 		switch (mode & S_IFMT) {
1601 		case S_IFMT:	/* used by badsect to flag bad sectors */
1602 			vattr.va_type = VBAD;
1603 			break;
1604 		case S_IFCHR:
1605 			vattr.va_type = VCHR;
1606 			break;
1607 		case S_IFBLK:
1608 			vattr.va_type = VBLK;
1609 			break;
1610 		case S_IFIFO:
1611 #ifndef FIFO
1612 			error = EOPNOTSUPP;
1613 			break;
1614 #else
1615 			if (dev == 0) {
1616 				vattr.va_type = VFIFO;
1617 				break;
1618 			}
1619 			/* FALLTHROUGH */
1620 #endif /* FIFO */
1621 		default:
1622 			error = EINVAL;
1623 			break;
1624 		}
1625 	}
1626 out:
1627 	if (!error) {
1628 		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1629 		vput(nd.ni_dvp);
1630 	} else {
1631 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1632 		if (nd.ni_dvp == vp)
1633 			vrele(nd.ni_dvp);
1634 		else
1635 			vput(nd.ni_dvp);
1636 		if (vp)
1637 			vrele(vp);
1638 	}
1639 	return (error);
1640 }
1641 
1642 /*
1643  * Create a named pipe.
1644  */
1645 int
sys_mkfifo(struct proc * p,void * v,register_t * retval)1646 sys_mkfifo(struct proc *p, void *v, register_t *retval)
1647 {
1648 	struct sys_mkfifo_args /* {
1649 		syscallarg(const char *) path;
1650 		syscallarg(mode_t) mode;
1651 	} */ *uap = v;
1652 
1653 	return (domknodat(p, AT_FDCWD, SCARG(uap, path),
1654 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1655 }
1656 
1657 int
sys_mkfifoat(struct proc * p,void * v,register_t * retval)1658 sys_mkfifoat(struct proc *p, void *v, register_t *retval)
1659 {
1660 	struct sys_mkfifoat_args /* {
1661 		syscallarg(int) fd;
1662 		syscallarg(const char *) path;
1663 		syscallarg(mode_t) mode;
1664 	} */ *uap = v;
1665 
1666 	return (domknodat(p, SCARG(uap, fd), SCARG(uap, path),
1667 	    (SCARG(uap, mode) & ALLPERMS) | S_IFIFO, 0));
1668 }
1669 
1670 /*
1671  * Make a hard file link.
1672  */
1673 int
sys_link(struct proc * p,void * v,register_t * retval)1674 sys_link(struct proc *p, void *v, register_t *retval)
1675 {
1676 	struct sys_link_args /* {
1677 		syscallarg(const char *) path;
1678 		syscallarg(const char *) link;
1679 	} */ *uap = v;
1680 
1681 	return (dolinkat(p, AT_FDCWD, SCARG(uap, path), AT_FDCWD,
1682 	    SCARG(uap, link), AT_SYMLINK_FOLLOW));
1683 }
1684 
1685 int
sys_linkat(struct proc * p,void * v,register_t * retval)1686 sys_linkat(struct proc *p, void *v, register_t *retval)
1687 {
1688 	struct sys_linkat_args /* {
1689 		syscallarg(int) fd1;
1690 		syscallarg(const char *) path1;
1691 		syscallarg(int) fd2;
1692 		syscallarg(const char *) path2;
1693 		syscallarg(int) flag;
1694 	} */ *uap = v;
1695 
1696 	return (dolinkat(p, SCARG(uap, fd1), SCARG(uap, path1),
1697 	    SCARG(uap, fd2), SCARG(uap, path2), SCARG(uap, flag)));
1698 }
1699 
1700 int
dolinkat(struct proc * p,int fd1,const char * path1,int fd2,const char * path2,int flag)1701 dolinkat(struct proc *p, int fd1, const char *path1, int fd2,
1702     const char *path2, int flag)
1703 {
1704 	struct vnode *vp;
1705 	struct nameidata nd;
1706 	int error, follow;
1707 
1708 	if (flag & ~AT_SYMLINK_FOLLOW)
1709 		return (EINVAL);
1710 
1711 	follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
1712 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd1, path1, p);
1713 	nd.ni_pledge = PLEDGE_RPATH;
1714 	nd.ni_unveil = UNVEIL_READ;
1715 	if ((error = namei(&nd)) != 0)
1716 		return (error);
1717 	vp = nd.ni_vp;
1718 
1719 	if (vp->v_type == VDIR) {
1720 		error = EPERM;
1721 		goto out;
1722 	}
1723 
1724 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd2, path2, p);
1725 	nd.ni_pledge = PLEDGE_CPATH;
1726 	nd.ni_unveil = UNVEIL_CREATE;
1727 	if ((error = namei(&nd)) != 0)
1728 		goto out;
1729 	if (nd.ni_vp) {
1730 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1731 		if (nd.ni_dvp == nd.ni_vp)
1732 			vrele(nd.ni_dvp);
1733 		else
1734 			vput(nd.ni_dvp);
1735 		vrele(nd.ni_vp);
1736 		error = EEXIST;
1737 		goto out;
1738 	}
1739 
1740 	/* No cross-mount links! */
1741 	if (nd.ni_dvp->v_mount != vp->v_mount) {
1742 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1743 		vput(nd.ni_dvp);
1744 		error = EXDEV;
1745 		goto out;
1746 	}
1747 
1748 	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1749 out:
1750 	vrele(vp);
1751 	return (error);
1752 }
1753 
1754 /*
1755  * Make a symbolic link.
1756  */
1757 int
sys_symlink(struct proc * p,void * v,register_t * retval)1758 sys_symlink(struct proc *p, void *v, register_t *retval)
1759 {
1760 	struct sys_symlink_args /* {
1761 		syscallarg(const char *) path;
1762 		syscallarg(const char *) link;
1763 	} */ *uap = v;
1764 
1765 	return (dosymlinkat(p, SCARG(uap, path), AT_FDCWD, SCARG(uap, link)));
1766 }
1767 
1768 int
sys_symlinkat(struct proc * p,void * v,register_t * retval)1769 sys_symlinkat(struct proc *p, void *v, register_t *retval)
1770 {
1771 	struct sys_symlinkat_args /* {
1772 		syscallarg(const char *) path;
1773 		syscallarg(int) fd;
1774 		syscallarg(const char *) link;
1775 	} */ *uap = v;
1776 
1777 	return (dosymlinkat(p, SCARG(uap, path), SCARG(uap, fd),
1778 	    SCARG(uap, link)));
1779 }
1780 
1781 int
dosymlinkat(struct proc * p,const char * upath,int fd,const char * link)1782 dosymlinkat(struct proc *p, const char *upath, int fd, const char *link)
1783 {
1784 	struct vattr vattr;
1785 	char *path;
1786 	int error;
1787 	struct nameidata nd;
1788 
1789 	path = pool_get(&namei_pool, PR_WAITOK);
1790 	error = copyinstr(upath, path, MAXPATHLEN, NULL);
1791 	if (error)
1792 		goto out;
1793 	NDINITAT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, fd, link, p);
1794 	nd.ni_pledge = PLEDGE_CPATH;
1795 	nd.ni_unveil = UNVEIL_CREATE;
1796 	if ((error = namei(&nd)) != 0)
1797 		goto out;
1798 	if (nd.ni_vp) {
1799 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1800 		if (nd.ni_dvp == nd.ni_vp)
1801 			vrele(nd.ni_dvp);
1802 		else
1803 			vput(nd.ni_dvp);
1804 		vrele(nd.ni_vp);
1805 		error = EEXIST;
1806 		goto out;
1807 	}
1808 	VATTR_NULL(&vattr);
1809 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1810 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1811 out:
1812 	pool_put(&namei_pool, path);
1813 	return (error);
1814 }
1815 
1816 /*
1817  * Delete a name from the filesystem.
1818  */
1819 int
sys_unlink(struct proc * p,void * v,register_t * retval)1820 sys_unlink(struct proc *p, void *v, register_t *retval)
1821 {
1822 	struct sys_unlink_args /* {
1823 		syscallarg(const char *) path;
1824 	} */ *uap = v;
1825 
1826 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), 0));
1827 }
1828 
1829 int
sys_unlinkat(struct proc * p,void * v,register_t * retval)1830 sys_unlinkat(struct proc *p, void *v, register_t *retval)
1831 {
1832 	struct sys_unlinkat_args /* {
1833 		syscallarg(int) fd;
1834 		syscallarg(const char *) path;
1835 		syscallarg(int) flag;
1836 	} */ *uap = v;
1837 
1838 	return (dounlinkat(p, SCARG(uap, fd), SCARG(uap, path),
1839 	    SCARG(uap, flag)));
1840 }
1841 
1842 int
dounlinkat(struct proc * p,int fd,const char * path,int flag)1843 dounlinkat(struct proc *p, int fd, const char *path, int flag)
1844 {
1845 	struct vnode *vp;
1846 	int error;
1847 	struct nameidata nd;
1848 
1849 	if (flag & ~AT_REMOVEDIR)
1850 		return (EINVAL);
1851 
1852 	NDINITAT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1853 	    fd, path, p);
1854 	nd.ni_pledge = PLEDGE_CPATH;
1855 	nd.ni_unveil = UNVEIL_CREATE;
1856 	if ((error = namei(&nd)) != 0)
1857 		return (error);
1858 	vp = nd.ni_vp;
1859 
1860 	if (flag & AT_REMOVEDIR) {
1861 		if (vp->v_type != VDIR) {
1862 			error = ENOTDIR;
1863 			goto out;
1864 		}
1865 		/*
1866 		 * No rmdir "." please.
1867 		 */
1868 		if (nd.ni_dvp == vp) {
1869 			error = EINVAL;
1870 			goto out;
1871 		}
1872 		/*
1873 		 * A mounted on directory cannot be deleted.
1874 		 */
1875 		if (vp->v_mountedhere != NULL) {
1876 			error = EBUSY;
1877 			goto out;
1878 		}
1879 	}
1880 
1881 	/*
1882 	 * The root of a mounted filesystem cannot be deleted.
1883 	 */
1884 	if (vp->v_flag & VROOT)
1885 		error = EBUSY;
1886 out:
1887 	if (!error) {
1888 		if (flag & AT_REMOVEDIR) {
1889 			error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1890 		} else {
1891 			(void)uvm_vnp_uncache(vp);
1892 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1893 		}
1894 	} else {
1895 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1896 		if (nd.ni_dvp == vp)
1897 			vrele(nd.ni_dvp);
1898 		else
1899 			vput(nd.ni_dvp);
1900 		vput(vp);
1901 	}
1902 	return (error);
1903 }
1904 
1905 /*
1906  * Reposition read/write file offset.
1907  */
1908 int
sys_lseek(struct proc * p,void * v,register_t * retval)1909 sys_lseek(struct proc *p, void *v, register_t *retval)
1910 {
1911 	struct sys_lseek_args /* {
1912 		syscallarg(int) fd;
1913 		syscallarg(off_t) offset;
1914 		syscallarg(int) whence;
1915 	} */ *uap = v;
1916 	struct filedesc *fdp = p->p_fd;
1917 	struct file *fp;
1918 	off_t offset;
1919 	int error;
1920 
1921 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1922 		return (EBADF);
1923 	if (fp->f_ops->fo_seek == NULL) {
1924 		error = ESPIPE;
1925 		goto bad;
1926 	}
1927 	offset = SCARG(uap, offset);
1928 
1929 	error = (*fp->f_ops->fo_seek)(fp, &offset, SCARG(uap, whence), p);
1930 	if (error)
1931 		goto bad;
1932 
1933 	*(off_t *)retval = offset;
1934 	mtx_enter(&fp->f_mtx);
1935 	fp->f_seek++;
1936 	mtx_leave(&fp->f_mtx);
1937 	error = 0;
1938  bad:
1939 	FRELE(fp, p);
1940 	return (error);
1941 }
1942 
1943 /*
1944  * Check access permissions.
1945  */
1946 int
sys_access(struct proc * p,void * v,register_t * retval)1947 sys_access(struct proc *p, void *v, register_t *retval)
1948 {
1949 	struct sys_access_args /* {
1950 		syscallarg(const char *) path;
1951 		syscallarg(int) amode;
1952 	} */ *uap = v;
1953 
1954 	return (dofaccessat(p, AT_FDCWD, SCARG(uap, path),
1955 	    SCARG(uap, amode), 0));
1956 }
1957 
1958 int
sys_faccessat(struct proc * p,void * v,register_t * retval)1959 sys_faccessat(struct proc *p, void *v, register_t *retval)
1960 {
1961 	struct sys_faccessat_args /* {
1962 		syscallarg(int) fd;
1963 		syscallarg(const char *) path;
1964 		syscallarg(int) amode;
1965 		syscallarg(int) flag;
1966 	} */ *uap = v;
1967 
1968 	return (dofaccessat(p, SCARG(uap, fd), SCARG(uap, path),
1969 	    SCARG(uap, amode), SCARG(uap, flag)));
1970 }
1971 
1972 int
dofaccessat(struct proc * p,int fd,const char * path,int amode,int flag)1973 dofaccessat(struct proc *p, int fd, const char *path, int amode, int flag)
1974 {
1975 	struct vnode *vp;
1976 	struct ucred *newcred, *oldcred;
1977 	struct nameidata nd;
1978 	int error;
1979 
1980 	if (amode & ~(R_OK | W_OK | X_OK))
1981 		return (EINVAL);
1982 	if (flag & ~AT_EACCESS)
1983 		return (EINVAL);
1984 
1985 	newcred = NULL;
1986 	oldcred = p->p_ucred;
1987 
1988 	/*
1989 	 * If access as real ids was requested and they really differ,
1990 	 * give the thread new creds with them reset
1991 	 */
1992 	if ((flag & AT_EACCESS) == 0 &&
1993 	    (oldcred->cr_uid != oldcred->cr_ruid ||
1994 	    (oldcred->cr_gid != oldcred->cr_rgid))) {
1995 		p->p_ucred = newcred = crdup(oldcred);
1996 		newcred->cr_uid = newcred->cr_ruid;
1997 		newcred->cr_gid = newcred->cr_rgid;
1998 	}
1999 
2000 	NDINITAT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2001 	nd.ni_pledge = PLEDGE_RPATH;
2002 	nd.ni_unveil = UNVEIL_READ;
2003 	if ((error = namei(&nd)) != 0)
2004 		goto out;
2005 	vp = nd.ni_vp;
2006 
2007 	/* Flags == 0 means only check for existence. */
2008 	if (amode) {
2009 		int vflags = 0;
2010 
2011 		if (amode & R_OK)
2012 			vflags |= VREAD;
2013 		if (amode & W_OK)
2014 			vflags |= VWRITE;
2015 		if (amode & X_OK)
2016 			vflags |= VEXEC;
2017 
2018 		error = VOP_ACCESS(vp, vflags, p->p_ucred, p);
2019 		if (!error && (vflags & VWRITE))
2020 			error = vn_writechk(vp);
2021 	}
2022 	vput(vp);
2023 out:
2024 	if (newcred != NULL) {
2025 		p->p_ucred = oldcred;
2026 		crfree(newcred);
2027 	}
2028 	return (error);
2029 }
2030 
2031 /*
2032  * Get file status; this version follows links.
2033  */
2034 int
sys_stat(struct proc * p,void * v,register_t * retval)2035 sys_stat(struct proc *p, void *v, register_t *retval)
2036 {
2037 	struct sys_stat_args /* {
2038 		syscallarg(const char *) path;
2039 		syscallarg(struct stat *) ub;
2040 	} */ *uap = v;
2041 
2042 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub), 0));
2043 }
2044 
2045 int
sys_fstatat(struct proc * p,void * v,register_t * retval)2046 sys_fstatat(struct proc *p, void *v, register_t *retval)
2047 {
2048 	struct sys_fstatat_args /* {
2049 		syscallarg(int) fd;
2050 		syscallarg(const char *) path;
2051 		syscallarg(struct stat *) buf;
2052 		syscallarg(int) flag;
2053 	} */ *uap = v;
2054 
2055 	return (dofstatat(p, SCARG(uap, fd), SCARG(uap, path),
2056 	    SCARG(uap, buf), SCARG(uap, flag)));
2057 }
2058 
2059 int
dofstatat(struct proc * p,int fd,const char * path,struct stat * buf,int flag)2060 dofstatat(struct proc *p, int fd, const char *path, struct stat *buf, int flag)
2061 {
2062 	struct stat sb;
2063 	int error, follow;
2064 	struct nameidata nd;
2065 
2066 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2067 		return (EINVAL);
2068 
2069 
2070 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2071 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2072 	nd.ni_pledge = PLEDGE_RPATH;
2073 	nd.ni_unveil = UNVEIL_READ;
2074 	if ((error = namei(&nd)) != 0)
2075 		return (error);
2076 	error = vn_stat(nd.ni_vp, &sb, p);
2077 	vput(nd.ni_vp);
2078 	if (error)
2079 		return (error);
2080 	/* Don't let non-root see generation numbers (for NFS security) */
2081 	if (suser(p))
2082 		sb.st_gen = 0;
2083 	error = copyout(&sb, buf, sizeof(sb));
2084 #ifdef KTRACE
2085 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
2086 		ktrstat(p, &sb);
2087 #endif
2088 	return (error);
2089 }
2090 
2091 /*
2092  * Get file status; this version does not follow links.
2093  */
2094 int
sys_lstat(struct proc * p,void * v,register_t * retval)2095 sys_lstat(struct proc *p, void *v, register_t *retval)
2096 {
2097 	struct sys_lstat_args /* {
2098 		syscallarg(const char *) path;
2099 		syscallarg(struct stat *) ub;
2100 	} */ *uap = v;
2101 
2102 	return (dofstatat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, ub),
2103 	    AT_SYMLINK_NOFOLLOW));
2104 }
2105 
2106 /*
2107  * Get configurable pathname variables.
2108  */
2109 int
sys_pathconf(struct proc * p,void * v,register_t * retval)2110 sys_pathconf(struct proc *p, void *v, register_t *retval)
2111 {
2112 	struct sys_pathconf_args /* {
2113 		syscallarg(const char *) path;
2114 		syscallarg(int) name;
2115 	} */ *uap = v;
2116 
2117 	return dopathconfat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, name),
2118 	    0, retval);
2119 }
2120 
2121 int
sys_pathconfat(struct proc * p,void * v,register_t * retval)2122 sys_pathconfat(struct proc *p, void *v, register_t *retval)
2123 {
2124 	struct sys_pathconfat_args /* {
2125 		syscallarg(int) fd;
2126 		syscallarg(const char *) path;
2127 		syscallarg(int) name;
2128 		syscallarg(int) flag;
2129 	} */ *uap = v;
2130 
2131 	return dopathconfat(p, SCARG(uap, fd), SCARG(uap, path),
2132 	    SCARG(uap, name), SCARG(uap, flag), retval);
2133 }
2134 
2135 int
dopathconfat(struct proc * p,int fd,const char * path,int name,int flag,register_t * retval)2136 dopathconfat(struct proc *p, int fd, const char *path, int name, int flag,
2137     register_t *retval)
2138 {
2139 	int follow, error;
2140 	struct nameidata nd;
2141 
2142 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2143 		return EINVAL;
2144 
2145 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2146 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2147 	nd.ni_pledge = PLEDGE_RPATH;
2148 	nd.ni_unveil = UNVEIL_READ;
2149 	if ((error = namei(&nd)) != 0)
2150 		return (error);
2151 	error = VOP_PATHCONF(nd.ni_vp, name, retval);
2152 	vput(nd.ni_vp);
2153 	return (error);
2154 }
2155 
2156 /*
2157  * Return target name of a symbolic link.
2158  */
2159 int
sys_readlink(struct proc * p,void * v,register_t * retval)2160 sys_readlink(struct proc *p, void *v, register_t *retval)
2161 {
2162 	struct sys_readlink_args /* {
2163 		syscallarg(const char *) path;
2164 		syscallarg(char *) buf;
2165 		syscallarg(size_t) count;
2166 	} */ *uap = v;
2167 
2168 	return (doreadlinkat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, buf),
2169 	    SCARG(uap, count), retval));
2170 }
2171 
2172 int
sys_readlinkat(struct proc * p,void * v,register_t * retval)2173 sys_readlinkat(struct proc *p, void *v, register_t *retval)
2174 {
2175 	struct sys_readlinkat_args /* {
2176 		syscallarg(int) fd;
2177 		syscallarg(const char *) path;
2178 		syscallarg(char *) buf;
2179 		syscallarg(size_t) count;
2180 	} */ *uap = v;
2181 
2182 	return (doreadlinkat(p, SCARG(uap, fd), SCARG(uap, path),
2183 	    SCARG(uap, buf), SCARG(uap, count), retval));
2184 }
2185 
2186 int
doreadlinkat(struct proc * p,int fd,const char * path,char * buf,size_t count,register_t * retval)2187 doreadlinkat(struct proc *p, int fd, const char *path, char *buf,
2188     size_t count, register_t *retval)
2189 {
2190 	struct vnode *vp;
2191 	struct iovec aiov;
2192 	struct uio auio;
2193 	int error;
2194 	struct nameidata nd;
2195 
2196 	NDINITAT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, fd, path, p);
2197 	nd.ni_pledge = PLEDGE_RPATH;
2198 	nd.ni_unveil = UNVEIL_READ;
2199 	if ((error = namei(&nd)) != 0)
2200 		return (error);
2201 	vp = nd.ni_vp;
2202 	if (vp->v_type != VLNK)
2203 		error = EINVAL;
2204 	else {
2205 		aiov.iov_base = buf;
2206 		aiov.iov_len = count;
2207 		auio.uio_iov = &aiov;
2208 		auio.uio_iovcnt = 1;
2209 		auio.uio_offset = 0;
2210 		auio.uio_rw = UIO_READ;
2211 		auio.uio_segflg = UIO_USERSPACE;
2212 		auio.uio_procp = p;
2213 		auio.uio_resid = count;
2214 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2215 		*retval = count - auio.uio_resid;
2216 	}
2217 	vput(vp);
2218 	return (error);
2219 }
2220 
2221 /*
2222  * Change flags of a file given a path name.
2223  */
2224 int
sys_chflags(struct proc * p,void * v,register_t * retval)2225 sys_chflags(struct proc *p, void *v, register_t *retval)
2226 {
2227 	struct sys_chflags_args /* {
2228 		syscallarg(const char *) path;
2229 		syscallarg(u_int) flags;
2230 	} */ *uap = v;
2231 
2232 	return (dochflagsat(p, AT_FDCWD, SCARG(uap, path),
2233 	    SCARG(uap, flags), 0));
2234 }
2235 
2236 int
sys_chflagsat(struct proc * p,void * v,register_t * retval)2237 sys_chflagsat(struct proc *p, void *v, register_t *retval)
2238 {
2239 	struct sys_chflagsat_args /* {
2240 		syscallarg(int) fd;
2241 		syscallarg(const char *) path;
2242 		syscallarg(u_int) flags;
2243 		syscallarg(int) atflags;
2244 	} */ *uap = v;
2245 
2246 	return (dochflagsat(p, SCARG(uap, fd), SCARG(uap, path),
2247 	    SCARG(uap, flags), SCARG(uap, atflags)));
2248 }
2249 
2250 int
dochflagsat(struct proc * p,int fd,const char * path,u_int flags,int atflags)2251 dochflagsat(struct proc *p, int fd, const char *path, u_int flags, int atflags)
2252 {
2253 	struct nameidata nd;
2254 	int error, follow;
2255 
2256 	if (atflags & ~AT_SYMLINK_NOFOLLOW)
2257 		return (EINVAL);
2258 
2259 	follow = (atflags & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2260 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2261 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2262 	nd.ni_unveil = UNVEIL_WRITE;
2263 	if ((error = namei(&nd)) != 0)
2264 		return (error);
2265 	return (dovchflags(p, nd.ni_vp, flags));
2266 }
2267 
2268 /*
2269  * Change flags of a file given a file descriptor.
2270  */
2271 int
sys_fchflags(struct proc * p,void * v,register_t * retval)2272 sys_fchflags(struct proc *p, void *v, register_t *retval)
2273 {
2274 	struct sys_fchflags_args /* {
2275 		syscallarg(int) fd;
2276 		syscallarg(u_int) flags;
2277 	} */ *uap = v;
2278 	struct file *fp;
2279 	struct vnode *vp;
2280 	int error;
2281 
2282 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2283 		return (error);
2284 	vp = fp->f_data;
2285 	vref(vp);
2286 	FRELE(fp, p);
2287 	return (dovchflags(p, vp, SCARG(uap, flags)));
2288 }
2289 
2290 int
dovchflags(struct proc * p,struct vnode * vp,u_int flags)2291 dovchflags(struct proc *p, struct vnode *vp, u_int flags)
2292 {
2293 	struct vattr vattr;
2294 	int error;
2295 
2296 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2297 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2298 		error = EROFS;
2299 	else if (flags == VNOVAL)
2300 		error = EINVAL;
2301 	else {
2302 		if (suser(p)) {
2303 			if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
2304 			    != 0)
2305 				goto out;
2306 			if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2307 				error = EINVAL;
2308 				goto out;
2309 			}
2310 		}
2311 		VATTR_NULL(&vattr);
2312 		vattr.va_flags = flags;
2313 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2314 	}
2315 out:
2316 	vput(vp);
2317 	return (error);
2318 }
2319 
2320 /*
2321  * Change mode of a file given path name.
2322  */
2323 int
sys_chmod(struct proc * p,void * v,register_t * retval)2324 sys_chmod(struct proc *p, void *v, register_t *retval)
2325 {
2326 	struct sys_chmod_args /* {
2327 		syscallarg(const char *) path;
2328 		syscallarg(mode_t) mode;
2329 	} */ *uap = v;
2330 
2331 	return (dofchmodat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 0));
2332 }
2333 
2334 int
sys_fchmodat(struct proc * p,void * v,register_t * retval)2335 sys_fchmodat(struct proc *p, void *v, register_t *retval)
2336 {
2337 	struct sys_fchmodat_args /* {
2338 		syscallarg(int) fd;
2339 		syscallarg(const char *) path;
2340 		syscallarg(mode_t) mode;
2341 		syscallarg(int) flag;
2342 	} */ *uap = v;
2343 
2344 	return (dofchmodat(p, SCARG(uap, fd), SCARG(uap, path),
2345 	    SCARG(uap, mode), SCARG(uap, flag)));
2346 }
2347 
2348 int
dofchmodat(struct proc * p,int fd,const char * path,mode_t mode,int flag)2349 dofchmodat(struct proc *p, int fd, const char *path, mode_t mode, int flag)
2350 {
2351 	struct vnode *vp;
2352 	struct vattr vattr;
2353 	int error, follow;
2354 	struct nameidata nd;
2355 
2356 	if (mode & ~(S_IFMT | ALLPERMS))
2357 		return (EINVAL);
2358 	if ((p->p_p->ps_flags & PS_PLEDGE))
2359 		mode &= ACCESSPERMS;
2360 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2361 		return (EINVAL);
2362 
2363 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2364 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2365 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2366 	nd.ni_unveil = UNVEIL_WRITE;
2367 	if ((error = namei(&nd)) != 0)
2368 		return (error);
2369 	vp = nd.ni_vp;
2370 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2371 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2372 		error = EROFS;
2373 	else {
2374 		VATTR_NULL(&vattr);
2375 		vattr.va_mode = mode & ALLPERMS;
2376 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2377 	}
2378 	vput(vp);
2379 	return (error);
2380 }
2381 
2382 /*
2383  * Change mode of a file given a file descriptor.
2384  */
2385 int
sys_fchmod(struct proc * p,void * v,register_t * retval)2386 sys_fchmod(struct proc *p, void *v, register_t *retval)
2387 {
2388 	struct sys_fchmod_args /* {
2389 		syscallarg(int) fd;
2390 		syscallarg(mode_t) mode;
2391 	} */ *uap = v;
2392 	struct vattr vattr;
2393 	struct vnode *vp;
2394 	struct file *fp;
2395 	mode_t mode = SCARG(uap, mode);
2396 	int error;
2397 
2398 	if (mode & ~(S_IFMT | ALLPERMS))
2399 		return (EINVAL);
2400 	if ((p->p_p->ps_flags & PS_PLEDGE))
2401 		mode &= ACCESSPERMS;
2402 
2403 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2404 		return (error);
2405 	vp = fp->f_data;
2406 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2407 	if (vp->v_mount && vp->v_mount->mnt_flag & MNT_RDONLY)
2408 		error = EROFS;
2409 	else {
2410 		VATTR_NULL(&vattr);
2411 		vattr.va_mode = mode & ALLPERMS;
2412 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2413 	}
2414 	VOP_UNLOCK(vp);
2415 	FRELE(fp, p);
2416 	return (error);
2417 }
2418 
2419 /*
2420  * Set ownership given a path name.
2421  */
2422 int
sys_chown(struct proc * p,void * v,register_t * retval)2423 sys_chown(struct proc *p, void *v, register_t *retval)
2424 {
2425 	struct sys_chown_args /* {
2426 		syscallarg(const char *) path;
2427 		syscallarg(uid_t) uid;
2428 		syscallarg(gid_t) gid;
2429 	} */ *uap = v;
2430 
2431 	return (dofchownat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, uid),
2432 	    SCARG(uap, gid), 0));
2433 }
2434 
2435 int
sys_fchownat(struct proc * p,void * v,register_t * retval)2436 sys_fchownat(struct proc *p, void *v, register_t *retval)
2437 {
2438 	struct sys_fchownat_args /* {
2439 		syscallarg(int) fd;
2440 		syscallarg(const char *) path;
2441 		syscallarg(uid_t) uid;
2442 		syscallarg(gid_t) gid;
2443 		syscallarg(int) flag;
2444 	} */ *uap = v;
2445 
2446 	return (dofchownat(p, SCARG(uap, fd), SCARG(uap, path),
2447 	    SCARG(uap, uid), SCARG(uap, gid), SCARG(uap, flag)));
2448 }
2449 
2450 int
dofchownat(struct proc * p,int fd,const char * path,uid_t uid,gid_t gid,int flag)2451 dofchownat(struct proc *p, int fd, const char *path, uid_t uid, gid_t gid,
2452     int flag)
2453 {
2454 	struct vnode *vp;
2455 	struct vattr vattr;
2456 	int error, follow;
2457 	struct nameidata nd;
2458 	mode_t mode;
2459 
2460 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2461 		return (EINVAL);
2462 
2463 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2464 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2465 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2466 	nd.ni_unveil = UNVEIL_WRITE;
2467 	if ((error = namei(&nd)) != 0)
2468 		return (error);
2469 	vp = nd.ni_vp;
2470 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2471 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2472 		error = EROFS;
2473 	else {
2474 		if ((error = pledge_chown(p, uid, gid)))
2475 			goto out;
2476 		if ((uid != -1 || gid != -1) &&
2477 		    !vnoperm(vp) &&
2478 		    (suser(p) || suid_clear)) {
2479 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2480 			if (error)
2481 				goto out;
2482 			mode = vattr.va_mode & ~(VSUID | VSGID);
2483 			if (mode == vattr.va_mode)
2484 				mode = VNOVAL;
2485 		} else
2486 			mode = VNOVAL;
2487 		VATTR_NULL(&vattr);
2488 		vattr.va_uid = uid;
2489 		vattr.va_gid = gid;
2490 		vattr.va_mode = mode;
2491 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2492 	}
2493 out:
2494 	vput(vp);
2495 	return (error);
2496 }
2497 
2498 /*
2499  * Set ownership given a path name, without following links.
2500  */
2501 int
sys_lchown(struct proc * p,void * v,register_t * retval)2502 sys_lchown(struct proc *p, void *v, register_t *retval)
2503 {
2504 	struct sys_lchown_args /* {
2505 		syscallarg(const char *) path;
2506 		syscallarg(uid_t) uid;
2507 		syscallarg(gid_t) gid;
2508 	} */ *uap = v;
2509 	struct vnode *vp;
2510 	struct vattr vattr;
2511 	int error;
2512 	struct nameidata nd;
2513 	mode_t mode;
2514 	uid_t uid = SCARG(uap, uid);
2515 	gid_t gid = SCARG(uap, gid);
2516 
2517 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2518 	nd.ni_pledge = PLEDGE_CHOWN | PLEDGE_RPATH;
2519 	nd.ni_unveil = UNVEIL_WRITE;
2520 	if ((error = namei(&nd)) != 0)
2521 		return (error);
2522 	vp = nd.ni_vp;
2523 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2524 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2525 		error = EROFS;
2526 	else {
2527 		if ((error = pledge_chown(p, uid, gid)))
2528 			goto out;
2529 		if ((uid != -1 || gid != -1) &&
2530 		    !vnoperm(vp) &&
2531 		    (suser(p) || suid_clear)) {
2532 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2533 			if (error)
2534 				goto out;
2535 			mode = vattr.va_mode & ~(VSUID | VSGID);
2536 			if (mode == vattr.va_mode)
2537 				mode = VNOVAL;
2538 		} else
2539 			mode = VNOVAL;
2540 		VATTR_NULL(&vattr);
2541 		vattr.va_uid = uid;
2542 		vattr.va_gid = gid;
2543 		vattr.va_mode = mode;
2544 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2545 	}
2546 out:
2547 	vput(vp);
2548 	return (error);
2549 }
2550 
2551 /*
2552  * Set ownership given a file descriptor.
2553  */
2554 int
sys_fchown(struct proc * p,void * v,register_t * retval)2555 sys_fchown(struct proc *p, void *v, register_t *retval)
2556 {
2557 	struct sys_fchown_args /* {
2558 		syscallarg(int) fd;
2559 		syscallarg(uid_t) uid;
2560 		syscallarg(gid_t) gid;
2561 	} */ *uap = v;
2562 	struct vnode *vp;
2563 	struct vattr vattr;
2564 	int error;
2565 	struct file *fp;
2566 	mode_t mode;
2567 	uid_t uid = SCARG(uap, uid);
2568 	gid_t gid = SCARG(uap, gid);
2569 
2570 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2571 		return (error);
2572 	vp = fp->f_data;
2573 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2574 	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
2575 		error = EROFS;
2576 	else {
2577 		if ((error = pledge_chown(p, uid, gid)))
2578 			goto out;
2579 		if ((uid != -1 || gid != -1) &&
2580 		    !vnoperm(vp) &&
2581 		    (suser(p) || suid_clear)) {
2582 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
2583 			if (error)
2584 				goto out;
2585 			mode = vattr.va_mode & ~(VSUID | VSGID);
2586 			if (mode == vattr.va_mode)
2587 				mode = VNOVAL;
2588 		} else
2589 			mode = VNOVAL;
2590 		VATTR_NULL(&vattr);
2591 		vattr.va_uid = uid;
2592 		vattr.va_gid = gid;
2593 		vattr.va_mode = mode;
2594 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2595 	}
2596 out:
2597 	VOP_UNLOCK(vp);
2598 	FRELE(fp, p);
2599 	return (error);
2600 }
2601 
2602 /*
2603  * Set the access and modification times given a path name.
2604  */
2605 int
sys_utimes(struct proc * p,void * v,register_t * retval)2606 sys_utimes(struct proc *p, void *v, register_t *retval)
2607 {
2608 	struct sys_utimes_args /* {
2609 		syscallarg(const char *) path;
2610 		syscallarg(const struct timeval *) tptr;
2611 	} */ *uap = v;
2612 
2613 	struct timespec ts[2];
2614 	struct timeval tv[2];
2615 	const struct timeval *tvp;
2616 	int error;
2617 
2618 	tvp = SCARG(uap, tptr);
2619 	if (tvp != NULL) {
2620 		error = copyin(tvp, tv, sizeof(tv));
2621 		if (error)
2622 			return (error);
2623 #ifdef KTRACE
2624 		if (KTRPOINT(p, KTR_STRUCT))
2625 			ktrabstimeval(p, &tv);
2626 #endif
2627 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2628 			return (EINVAL);
2629 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2630 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2631 	} else
2632 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2633 
2634 	return (doutimensat(p, AT_FDCWD, SCARG(uap, path), ts, 0));
2635 }
2636 
2637 int
sys_utimensat(struct proc * p,void * v,register_t * retval)2638 sys_utimensat(struct proc *p, void *v, register_t *retval)
2639 {
2640 	struct sys_utimensat_args /* {
2641 		syscallarg(int) fd;
2642 		syscallarg(const char *) path;
2643 		syscallarg(const struct timespec *) times;
2644 		syscallarg(int) flag;
2645 	} */ *uap = v;
2646 
2647 	struct timespec ts[2];
2648 	const struct timespec *tsp;
2649 	int error, i;
2650 
2651 	tsp = SCARG(uap, times);
2652 	if (tsp != NULL) {
2653 		error = copyin(tsp, ts, sizeof(ts));
2654 		if (error)
2655 			return (error);
2656 		for (i = 0; i < nitems(ts); i++) {
2657 			if (ts[i].tv_nsec == UTIME_NOW)
2658 				continue;
2659 			if (ts[i].tv_nsec == UTIME_OMIT)
2660 				continue;
2661 #ifdef KTRACE
2662 			if (KTRPOINT(p, KTR_STRUCT))
2663 				ktrabstimespec(p, &ts[i]);
2664 #endif
2665 			if (!timespecisvalid(&ts[i]))
2666 				return (EINVAL);
2667 		}
2668 	} else
2669 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2670 
2671 	return (doutimensat(p, SCARG(uap, fd), SCARG(uap, path), ts,
2672 	    SCARG(uap, flag)));
2673 }
2674 
2675 int
doutimensat(struct proc * p,int fd,const char * path,struct timespec ts[2],int flag)2676 doutimensat(struct proc *p, int fd, const char *path,
2677     struct timespec ts[2], int flag)
2678 {
2679 	struct vnode *vp;
2680 	int error, follow;
2681 	struct nameidata nd;
2682 
2683 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2684 		return (EINVAL);
2685 
2686 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2687 	NDINITAT(&nd, LOOKUP, follow, UIO_USERSPACE, fd, path, p);
2688 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2689 	nd.ni_unveil = UNVEIL_WRITE;
2690 	if ((error = namei(&nd)) != 0)
2691 		return (error);
2692 	vp = nd.ni_vp;
2693 
2694 	return (dovutimens(p, vp, ts));
2695 }
2696 
2697 int
dovutimens(struct proc * p,struct vnode * vp,struct timespec ts[2])2698 dovutimens(struct proc *p, struct vnode *vp, struct timespec ts[2])
2699 {
2700 	struct vattr vattr;
2701 	struct timespec now;
2702 	int error;
2703 
2704 #ifdef KTRACE
2705 	/* if they're both UTIME_NOW, then don't report either */
2706 	if ((ts[0].tv_nsec != UTIME_NOW || ts[1].tv_nsec != UTIME_NOW) &&
2707 	    KTRPOINT(p, KTR_STRUCT)) {
2708 		ktrabstimespec(p, &ts[0]);
2709 		ktrabstimespec(p, &ts[1]);
2710 	}
2711 #endif
2712 
2713 	VATTR_NULL(&vattr);
2714 
2715 	/*  make sure ctime is updated even if neither mtime nor atime is */
2716 	vattr.va_vaflags = VA_UTIMES_CHANGE;
2717 
2718 	if (ts[0].tv_nsec == UTIME_NOW || ts[1].tv_nsec == UTIME_NOW) {
2719 		if (ts[0].tv_nsec == UTIME_NOW && ts[1].tv_nsec == UTIME_NOW)
2720 			vattr.va_vaflags |= VA_UTIMES_NULL;
2721 
2722 		getnanotime(&now);
2723 		if (ts[0].tv_nsec == UTIME_NOW)
2724 			ts[0] = now;
2725 		if (ts[1].tv_nsec == UTIME_NOW)
2726 			ts[1] = now;
2727 	}
2728 
2729 	if (ts[0].tv_nsec != UTIME_OMIT)
2730 		vattr.va_atime = ts[0];
2731 	if (ts[1].tv_nsec != UTIME_OMIT)
2732 		vattr.va_mtime = ts[1];
2733 
2734 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2735 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2736 		error = EROFS;
2737 	else
2738 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2739 	vput(vp);
2740 	return (error);
2741 }
2742 
2743 /*
2744  * Set the access and modification times given a file descriptor.
2745  */
2746 int
sys_futimes(struct proc * p,void * v,register_t * retval)2747 sys_futimes(struct proc *p, void *v, register_t *retval)
2748 {
2749 	struct sys_futimes_args /* {
2750 		syscallarg(int) fd;
2751 		syscallarg(const struct timeval *) tptr;
2752 	} */ *uap = v;
2753 	struct timeval tv[2];
2754 	struct timespec ts[2];
2755 	const struct timeval *tvp;
2756 	int error;
2757 
2758 	tvp = SCARG(uap, tptr);
2759 	if (tvp != NULL) {
2760 		error = copyin(tvp, tv, sizeof(tv));
2761 		if (error)
2762 			return (error);
2763 #ifdef KTRACE
2764 		if (KTRPOINT(p, KTR_STRUCT)) {
2765 			ktrabstimeval(p, &tv[0]);
2766 			ktrabstimeval(p, &tv[1]);
2767 		}
2768 #endif
2769 		if (!timerisvalid(&tv[0]) || !timerisvalid(&tv[1]))
2770 			return (EINVAL);
2771 		TIMEVAL_TO_TIMESPEC(&tv[0], &ts[0]);
2772 		TIMEVAL_TO_TIMESPEC(&tv[1], &ts[1]);
2773 	} else
2774 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2775 
2776 	return (dofutimens(p, SCARG(uap, fd), ts));
2777 }
2778 
2779 int
sys_futimens(struct proc * p,void * v,register_t * retval)2780 sys_futimens(struct proc *p, void *v, register_t *retval)
2781 {
2782 	struct sys_futimens_args /* {
2783 		syscallarg(int) fd;
2784 		syscallarg(const struct timespec *) times;
2785 	} */ *uap = v;
2786 	struct timespec ts[2];
2787 	const struct timespec *tsp;
2788 	int error, i;
2789 
2790 	tsp = SCARG(uap, times);
2791 	if (tsp != NULL) {
2792 		error = copyin(tsp, ts, sizeof(ts));
2793 		if (error)
2794 			return (error);
2795 		for (i = 0; i < nitems(ts); i++) {
2796 			if (ts[i].tv_nsec == UTIME_NOW)
2797 				continue;
2798 			if (ts[i].tv_nsec == UTIME_OMIT)
2799 				continue;
2800 #ifdef KTRACE
2801 			if (KTRPOINT(p, KTR_STRUCT))
2802 				ktrabstimespec(p, &ts[i]);
2803 #endif
2804 			if (!timespecisvalid(&ts[i]))
2805 				return (EINVAL);
2806 		}
2807 	} else
2808 		ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
2809 
2810 	return (dofutimens(p, SCARG(uap, fd), ts));
2811 }
2812 
2813 int
dofutimens(struct proc * p,int fd,struct timespec ts[2])2814 dofutimens(struct proc *p, int fd, struct timespec ts[2])
2815 {
2816 	struct file *fp;
2817 	struct vnode *vp;
2818 	int error;
2819 
2820 	if ((error = getvnode(p, fd, &fp)) != 0)
2821 		return (error);
2822 	vp = fp->f_data;
2823 	vref(vp);
2824 	FRELE(fp, p);
2825 
2826 	return (dovutimens(p, vp, ts));
2827 }
2828 
2829 /*
2830  * Truncate a file given a vnode.
2831  */
2832 int
dotruncate(struct proc * p,struct vnode * vp,off_t len)2833 dotruncate(struct proc *p, struct vnode *vp, off_t len)
2834 {
2835 	struct vattr vattr;
2836 	int error;
2837 
2838 	if (len < 0)
2839 		return EINVAL;
2840 	if (vp->v_type == VDIR)
2841 		return EISDIR;
2842 	if ((error = vn_writechk(vp)) != 0)
2843 		return error;
2844 	if (vp->v_type == VREG && len > lim_cur_proc(p, RLIMIT_FSIZE)) {
2845 		if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
2846 			return error;
2847 		if (len > vattr.va_size) {
2848 			/* if extending over the limit, send signal and fail */
2849 			psignal(p, SIGXFSZ);
2850 			return EFBIG;
2851 		}
2852 	}
2853 	VATTR_NULL(&vattr);
2854 	vattr.va_size = len;
2855 	return VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2856 }
2857 
2858 /*
2859  * Truncate a file given its path name.
2860  */
2861 int
sys_truncate(struct proc * p,void * v,register_t * retval)2862 sys_truncate(struct proc *p, void *v, register_t *retval)
2863 {
2864 	struct sys_truncate_args /* {
2865 		syscallarg(const char *) path;
2866 		syscallarg(off_t) length;
2867 	} */ *uap = v;
2868 	struct vnode *vp;
2869 	int error;
2870 	struct nameidata nd;
2871 
2872 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2873 	nd.ni_pledge = PLEDGE_FATTR | PLEDGE_RPATH;
2874 	nd.ni_unveil = UNVEIL_WRITE;
2875 	if ((error = namei(&nd)) != 0)
2876 		return (error);
2877 	vp = nd.ni_vp;
2878 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2879 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0)
2880 		error = dotruncate(p, vp, SCARG(uap, length));
2881 	vput(vp);
2882 	return (error);
2883 }
2884 
2885 /*
2886  * Truncate a file given a file descriptor.
2887  */
2888 int
sys_ftruncate(struct proc * p,void * v,register_t * retval)2889 sys_ftruncate(struct proc *p, void *v, register_t *retval)
2890 {
2891 	struct sys_ftruncate_args /* {
2892 		syscallarg(int) fd;
2893 		syscallarg(off_t) length;
2894 	} */ *uap = v;
2895 	struct vnode *vp;
2896 	struct file *fp;
2897 	int error;
2898 
2899 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2900 		return (error);
2901 	if ((fp->f_flag & FWRITE) == 0) {
2902 		error = EINVAL;
2903 		goto bad;
2904 	}
2905 	vp = fp->f_data;
2906 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2907 	error = dotruncate(p, vp, SCARG(uap, length));
2908 	VOP_UNLOCK(vp);
2909 bad:
2910 	FRELE(fp, p);
2911 	return (error);
2912 }
2913 
2914 /*
2915  * Sync an open file.
2916  */
2917 int
sys_fsync(struct proc * p,void * v,register_t * retval)2918 sys_fsync(struct proc *p, void *v, register_t *retval)
2919 {
2920 	struct sys_fsync_args /* {
2921 		syscallarg(int) fd;
2922 	} */ *uap = v;
2923 	struct vnode *vp;
2924 	struct file *fp;
2925 	int error;
2926 
2927 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
2928 		return (error);
2929 	vp = fp->f_data;
2930 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2931 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2932 
2933 	VOP_UNLOCK(vp);
2934 	FRELE(fp, p);
2935 	return (error);
2936 }
2937 
2938 /*
2939  * Rename files.  Source and destination must either both be directories,
2940  * or both not be directories.  If target is a directory, it must be empty.
2941  */
2942 int
sys_rename(struct proc * p,void * v,register_t * retval)2943 sys_rename(struct proc *p, void *v, register_t *retval)
2944 {
2945 	struct sys_rename_args /* {
2946 		syscallarg(const char *) from;
2947 		syscallarg(const char *) to;
2948 	} */ *uap = v;
2949 
2950 	return (dorenameat(p, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
2951 	    SCARG(uap, to)));
2952 }
2953 
2954 int
sys_renameat(struct proc * p,void * v,register_t * retval)2955 sys_renameat(struct proc *p, void *v, register_t *retval)
2956 {
2957 	struct sys_renameat_args /* {
2958 		syscallarg(int) fromfd;
2959 		syscallarg(const char *) from;
2960 		syscallarg(int) tofd;
2961 		syscallarg(const char *) to;
2962 	} */ *uap = v;
2963 
2964 	return (dorenameat(p, SCARG(uap, fromfd), SCARG(uap, from),
2965 	    SCARG(uap, tofd), SCARG(uap, to)));
2966 }
2967 
2968 int
dorenameat(struct proc * p,int fromfd,const char * from,int tofd,const char * to)2969 dorenameat(struct proc *p, int fromfd, const char *from, int tofd,
2970     const char *to)
2971 {
2972 	struct vnode *tvp, *fvp, *tdvp;
2973 	struct nameidata fromnd, tond;
2974 	int error;
2975 	int flags;
2976 
2977 	NDINITAT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2978 	    fromfd, from, p);
2979 	fromnd.ni_pledge = PLEDGE_RPATH | PLEDGE_CPATH;
2980 	fromnd.ni_unveil = UNVEIL_READ | UNVEIL_CREATE;
2981 	if ((error = namei(&fromnd)) != 0)
2982 		return (error);
2983 	fvp = fromnd.ni_vp;
2984 
2985 	flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2986 	/*
2987 	 * rename("foo/", "bar/");  is  OK
2988 	 */
2989 	if (fvp->v_type == VDIR)
2990 		flags |= STRIPSLASHES;
2991 
2992 	NDINITAT(&tond, RENAME, flags, UIO_USERSPACE, tofd, to, p);
2993 	tond.ni_pledge = PLEDGE_CPATH;
2994 	tond.ni_unveil = UNVEIL_CREATE;
2995 	if ((error = namei(&tond)) != 0) {
2996 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2997 		vrele(fromnd.ni_dvp);
2998 		vrele(fvp);
2999 		goto out1;
3000 	}
3001 	tdvp = tond.ni_dvp;
3002 	tvp = tond.ni_vp;
3003 	if (tvp != NULL) {
3004 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3005 			error = ENOTDIR;
3006 			goto out;
3007 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3008 			error = EISDIR;
3009 			goto out;
3010 		}
3011 	}
3012 	if (fvp == tdvp)
3013 		error = EINVAL;
3014 	/*
3015 	 * If source is the same as the destination (that is the
3016 	 * same inode number)
3017 	 */
3018 	if (fvp == tvp)
3019 		error = -1;
3020 out:
3021 	if (!error) {
3022 		if (tvp) {
3023 			(void)uvm_vnp_uncache(tvp);
3024 		}
3025 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3026 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3027 	} else {
3028 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3029 		if (tdvp == tvp)
3030 			vrele(tdvp);
3031 		else
3032 			vput(tdvp);
3033 		if (tvp)
3034 			vput(tvp);
3035 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3036 		vrele(fromnd.ni_dvp);
3037 		vrele(fvp);
3038 	}
3039 	vrele(tond.ni_startdir);
3040 	pool_put(&namei_pool, tond.ni_cnd.cn_pnbuf);
3041 out1:
3042 	if (fromnd.ni_startdir)
3043 		vrele(fromnd.ni_startdir);
3044 	pool_put(&namei_pool, fromnd.ni_cnd.cn_pnbuf);
3045 	if (error == -1)
3046 		return (0);
3047 	return (error);
3048 }
3049 
3050 /*
3051  * Make a directory file.
3052  */
3053 int
sys_mkdir(struct proc * p,void * v,register_t * retval)3054 sys_mkdir(struct proc *p, void *v, register_t *retval)
3055 {
3056 	struct sys_mkdir_args /* {
3057 		syscallarg(const char *) path;
3058 		syscallarg(mode_t) mode;
3059 	} */ *uap = v;
3060 
3061 	return (domkdirat(p, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)));
3062 }
3063 
3064 int
sys_mkdirat(struct proc * p,void * v,register_t * retval)3065 sys_mkdirat(struct proc *p, void *v, register_t *retval)
3066 {
3067 	struct sys_mkdirat_args /* {
3068 		syscallarg(int) fd;
3069 		syscallarg(const char *) path;
3070 		syscallarg(mode_t) mode;
3071 	} */ *uap = v;
3072 
3073 	return (domkdirat(p, SCARG(uap, fd), SCARG(uap, path),
3074 	    SCARG(uap, mode)));
3075 }
3076 
3077 int
domkdirat(struct proc * p,int fd,const char * path,mode_t mode)3078 domkdirat(struct proc *p, int fd, const char *path, mode_t mode)
3079 {
3080 	struct vnode *vp;
3081 	struct vattr vattr;
3082 	int error;
3083 	struct nameidata nd;
3084 
3085 	NDINITAT(&nd, CREATE, LOCKPARENT | STRIPSLASHES, UIO_USERSPACE,
3086 	    fd, path, p);
3087 	nd.ni_pledge = PLEDGE_CPATH;
3088 	nd.ni_unveil = UNVEIL_CREATE;
3089 	if ((error = namei(&nd)) != 0)
3090 		return (error);
3091 	vp = nd.ni_vp;
3092 	if (vp != NULL) {
3093 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3094 		if (nd.ni_dvp == vp)
3095 			vrele(nd.ni_dvp);
3096 		else
3097 			vput(nd.ni_dvp);
3098 		vrele(vp);
3099 		return (EEXIST);
3100 	}
3101 	VATTR_NULL(&vattr);
3102 	vattr.va_type = VDIR;
3103 	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
3104 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3105 	if (!error)
3106 		vput(nd.ni_vp);
3107 	return (error);
3108 }
3109 
3110 /*
3111  * Remove a directory file.
3112  */
3113 int
sys_rmdir(struct proc * p,void * v,register_t * retval)3114 sys_rmdir(struct proc *p, void *v, register_t *retval)
3115 {
3116 	struct sys_rmdir_args /* {
3117 		syscallarg(const char *) path;
3118 	} */ *uap = v;
3119 
3120 	return (dounlinkat(p, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR));
3121 }
3122 
3123 /*
3124  * Read a block of directory entries in a file system independent format.
3125  */
3126 int
sys_getdents(struct proc * p,void * v,register_t * retval)3127 sys_getdents(struct proc *p, void *v, register_t *retval)
3128 {
3129 	struct sys_getdents_args /* {
3130 		syscallarg(int) fd;
3131 		syscallarg(void *) buf;
3132 		syscallarg(size_t) buflen;
3133 	} */ *uap = v;
3134 	struct vnode *vp;
3135 	struct file *fp;
3136 	struct uio auio;
3137 	struct iovec aiov;
3138 	size_t buflen;
3139 	int error, eofflag;
3140 
3141 	buflen = SCARG(uap, buflen);
3142 
3143 	if (buflen > INT_MAX)
3144 		return (EINVAL);
3145 	if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
3146 		return (error);
3147 	if ((fp->f_flag & FREAD) == 0) {
3148 		error = EBADF;
3149 		goto bad;
3150 	}
3151 	vp = fp->f_data;
3152 	if (vp->v_type != VDIR) {
3153 		error = EINVAL;
3154 		goto bad;
3155 	}
3156 
3157 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3158 
3159 	if (fp->f_offset < 0) {
3160 		VOP_UNLOCK(vp);
3161 		error = EINVAL;
3162 		goto bad;
3163 	}
3164 
3165 	aiov.iov_base = SCARG(uap, buf);
3166 	aiov.iov_len = buflen;
3167 	auio.uio_iov = &aiov;
3168 	auio.uio_iovcnt = 1;
3169 	auio.uio_rw = UIO_READ;
3170 	auio.uio_segflg = UIO_USERSPACE;
3171 	auio.uio_procp = p;
3172 	auio.uio_resid = buflen;
3173 	auio.uio_offset = fp->f_offset;
3174 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
3175 	mtx_enter(&fp->f_mtx);
3176 	fp->f_offset = auio.uio_offset;
3177 	mtx_leave(&fp->f_mtx);
3178 	VOP_UNLOCK(vp);
3179 	if (error)
3180 		goto bad;
3181 	*retval = buflen - auio.uio_resid;
3182 bad:
3183 	FRELE(fp, p);
3184 	return (error);
3185 }
3186 
3187 /*
3188  * Set the mode mask for creation of filesystem nodes.
3189  */
3190 int
sys_umask(struct proc * p,void * v,register_t * retval)3191 sys_umask(struct proc *p, void *v, register_t *retval)
3192 {
3193 	struct sys_umask_args /* {
3194 		syscallarg(mode_t) newmask;
3195 	} */ *uap = v;
3196 	struct filedesc *fdp = p->p_fd;
3197 
3198 	fdplock(fdp);
3199 	*retval = fdp->fd_cmask;
3200 	fdp->fd_cmask = SCARG(uap, newmask) & ACCESSPERMS;
3201 	fdpunlock(fdp);
3202 	return (0);
3203 }
3204 
3205 /*
3206  * Void all references to file by ripping underlying filesystem
3207  * away from vnode.
3208  */
3209 int
sys_revoke(struct proc * p,void * v,register_t * retval)3210 sys_revoke(struct proc *p, void *v, register_t *retval)
3211 {
3212 	struct sys_revoke_args /* {
3213 		syscallarg(const char *) path;
3214 	} */ *uap = v;
3215 	struct vnode *vp;
3216 	struct vattr vattr;
3217 	int error;
3218 	struct nameidata nd;
3219 
3220 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3221 	nd.ni_pledge = PLEDGE_RPATH | PLEDGE_TTY;
3222 	nd.ni_unveil = UNVEIL_READ;
3223 	if ((error = namei(&nd)) != 0)
3224 		return (error);
3225 	vp = nd.ni_vp;
3226 	if (vp->v_type != VCHR || (u_int)major(vp->v_rdev) >= nchrdev ||
3227 	    cdevsw[major(vp->v_rdev)].d_type != D_TTY) {
3228 		error = ENOTTY;
3229 		goto out;
3230 	}
3231 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3232 		goto out;
3233 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3234 	    (error = suser(p)))
3235 		goto out;
3236 	if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED)))
3237 		VOP_REVOKE(vp, REVOKEALL);
3238 out:
3239 	vrele(vp);
3240 	return (error);
3241 }
3242 
3243 /*
3244  * Convert a user file descriptor to a kernel file entry.
3245  *
3246  * On return *fpp is FREF:ed.
3247  */
3248 int
getvnode(struct proc * p,int fd,struct file ** fpp)3249 getvnode(struct proc *p, int fd, struct file **fpp)
3250 {
3251 	struct file *fp;
3252 	struct vnode *vp;
3253 
3254 	if ((fp = fd_getfile(p->p_fd, fd)) == NULL)
3255 		return (EBADF);
3256 
3257 	if (fp->f_type != DTYPE_VNODE) {
3258 		FRELE(fp, p);
3259 		return (EINVAL);
3260 	}
3261 
3262 	vp = fp->f_data;
3263 	if (vp->v_type == VBAD) {
3264 		FRELE(fp, p);
3265 		return (EBADF);
3266 	}
3267 
3268 	*fpp = fp;
3269 
3270 	return (0);
3271 }
3272 
3273 /*
3274  * Positional read system call.
3275  */
3276 int
sys_pread(struct proc * p,void * v,register_t * retval)3277 sys_pread(struct proc *p, void *v, register_t *retval)
3278 {
3279 	struct sys_pread_args /* {
3280 		syscallarg(int) fd;
3281 		syscallarg(void *) buf;
3282 		syscallarg(size_t) nbyte;
3283 		syscallarg(off_t) offset;
3284 	} */ *uap = v;
3285 	struct iovec iov;
3286 	struct uio auio;
3287 
3288 	iov.iov_base = SCARG(uap, buf);
3289 	iov.iov_len = SCARG(uap, nbyte);
3290 	if (iov.iov_len > SSIZE_MAX)
3291 		return (EINVAL);
3292 
3293 	auio.uio_iov = &iov;
3294 	auio.uio_iovcnt = 1;
3295 	auio.uio_resid = iov.iov_len;
3296 	auio.uio_offset = SCARG(uap, offset);
3297 
3298 	return (dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3299 }
3300 
3301 /*
3302  * Positional scatter read system call.
3303  */
3304 int
sys_preadv(struct proc * p,void * v,register_t * retval)3305 sys_preadv(struct proc *p, void *v, register_t *retval)
3306 {
3307 	struct sys_preadv_args /* {
3308 		syscallarg(int) fd;
3309 		syscallarg(const struct iovec *) iovp;
3310 		syscallarg(int) iovcnt;
3311 		syscallarg(off_t) offset;
3312 	} */ *uap = v;
3313 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3314 	int error, iovcnt = SCARG(uap, iovcnt);
3315 	struct uio auio;
3316 	size_t resid;
3317 
3318 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3319 	if (error)
3320 		goto done;
3321 
3322 	auio.uio_iov = iov;
3323 	auio.uio_iovcnt = iovcnt;
3324 	auio.uio_resid = resid;
3325 	auio.uio_offset = SCARG(uap, offset);
3326 
3327 	error = dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3328  done:
3329 	iovec_free(iov, iovcnt);
3330 	return (error);
3331 }
3332 
3333 /*
3334  * Positional write system call.
3335  */
3336 int
sys_pwrite(struct proc * p,void * v,register_t * retval)3337 sys_pwrite(struct proc *p, void *v, register_t *retval)
3338 {
3339 	struct sys_pwrite_args /* {
3340 		syscallarg(int) fd;
3341 		syscallarg(const void *) buf;
3342 		syscallarg(size_t) nbyte;
3343 		syscallarg(off_t) offset;
3344 	} */ *uap = v;
3345 	struct iovec iov;
3346 	struct uio auio;
3347 
3348 	iov.iov_base = (void *)SCARG(uap, buf);
3349 	iov.iov_len = SCARG(uap, nbyte);
3350 	if (iov.iov_len > SSIZE_MAX)
3351 		return (EINVAL);
3352 
3353 	auio.uio_iov = &iov;
3354 	auio.uio_iovcnt = 1;
3355 	auio.uio_resid = iov.iov_len;
3356 	auio.uio_offset = SCARG(uap, offset);
3357 
3358 	return (dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval));
3359 }
3360 
3361 /*
3362  * Positional gather write system call.
3363  */
3364 int
sys_pwritev(struct proc * p,void * v,register_t * retval)3365 sys_pwritev(struct proc *p, void *v, register_t *retval)
3366 {
3367 	struct sys_pwritev_args /* {
3368 		syscallarg(int) fd;
3369 		syscallarg(const struct iovec *) iovp;
3370 		syscallarg(int) iovcnt;
3371 		syscallarg(off_t) offset;
3372 	} */ *uap = v;
3373 	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
3374 	int error, iovcnt = SCARG(uap, iovcnt);
3375 	struct uio auio;
3376 	size_t resid;
3377 
3378 	error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid);
3379 	if (error)
3380 		goto done;
3381 
3382 	auio.uio_iov = iov;
3383 	auio.uio_iovcnt = iovcnt;
3384 	auio.uio_resid = resid;
3385 	auio.uio_offset = SCARG(uap, offset);
3386 
3387 	error = dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval);
3388  done:
3389 	iovec_free(iov, iovcnt);
3390 	return (error);
3391 }
3392