1 /*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $
36 */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/buf.h>
41 #include <sys/conf.h>
42 #include <sys/sysent.h>
43 #include <sys/malloc.h>
44 #include <sys/mount.h>
45 #include <sys/mountctl.h>
46 #include <sys/sysmsg.h>
47 #include <sys/filedesc.h>
48 #include <sys/kernel.h>
49 #include <sys/fcntl.h>
50 #include <sys/file.h>
51 #include <sys/linker.h>
52 #include <sys/stat.h>
53 #include <sys/unistd.h>
54 #include <sys/vnode.h>
55 #include <sys/proc.h>
56 #include <sys/caps.h>
57 #include <sys/jail.h>
58 #include <sys/namei.h>
59 #include <sys/nlookup.h>
60 #include <sys/dirent.h>
61 #include <sys/extattr.h>
62 #include <sys/spinlock.h>
63 #include <sys/kern_syscall.h>
64 #include <sys/objcache.h>
65 #include <sys/sysctl.h>
66
67 #include <sys/buf2.h>
68 #include <sys/file2.h>
69 #include <sys/spinlock2.h>
70
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74
75 #include <machine/limits.h>
76 #include <machine/stdarg.h>
77
78 #define UMOUNTF_RETRIES 50 /* 0.25 seconds per retry */
79
80 static void mount_warning(struct mount *mp, const char *ctl, ...)
81 __printflike(2, 3);
82 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb);
83 static int checkvp_chdir (struct vnode *vn, struct thread *td);
84 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch);
85 static int get_fscap(const char *);
86 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp);
87 static int chroot_visible_mnt(struct mount *mp, struct proc *p);
88 static int getutimes (struct timeval *, struct timespec *);
89 static int getutimens (const struct timespec *, struct timespec *, int *);
90 static int setfown (struct mount *, struct vnode *, uid_t, gid_t);
91 static int setfmode (struct vnode *, int);
92 static int setfflags (struct vnode *, u_long);
93 static int setutimes (struct vnode *, struct vattr *,
94 const struct timespec *, int);
95
96 static int usermount = 0; /* if 1, non-root can mount fs. */
97 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
98 "Allow non-root users to mount filesystems");
99
100 static int debug_unmount = 0; /* if 1 loop until unmount success */
101 SYSCTL_INT(_vfs, OID_AUTO, debug_unmount, CTLFLAG_RW, &debug_unmount, 0,
102 "Stall failed unmounts in loop");
103
104 static struct krate krate_rename = { 1 };
105
106 /*
107 * Virtual File System System Calls
108 */
109
110 /*
111 * Mount a file system.
112 *
113 * mount_args(char *type, char *path, int flags, caddr_t data)
114 *
115 * MPALMOSTSAFE
116 */
117 int
sys_mount(struct sysmsg * sysmsg,const struct mount_args * uap)118 sys_mount(struct sysmsg *sysmsg, const struct mount_args *uap)
119 {
120 struct thread *td = curthread;
121 struct vnode *vp;
122 struct nchandle nch;
123 struct mount *mp, *nullmp;
124 struct vfsconf *vfsp;
125 int error, flag = 0, flag2 = 0;
126 int hasmount;
127 int priv = 0;
128 int flags = uap->flags;
129 struct vattr va;
130 struct nlookupdata nd;
131 char fstypename[MFSNAMELEN];
132 struct ucred *cred;
133
134 cred = td->td_ucred;
135
136 /* We do not allow user mounts inside a jail for now */
137 if (usermount && jailed(cred)) {
138 error = EPERM;
139 goto done;
140 }
141
142 /*
143 * Extract the file system type. We need to know this early, to take
144 * appropriate actions for jails and the filesystems to mount.
145 */
146 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0)
147 goto done;
148
149 /*
150 * Select the correct cap according to the file system type.
151 */
152 priv = get_fscap(fstypename);
153
154 if (usermount == 0 && (error = caps_priv_check_td(td, priv)))
155 goto done;
156
157 /*
158 * Do not allow NFS export by non-root users.
159 */
160 if (flags & MNT_EXPORTED) {
161 error = caps_priv_check_td(td, priv);
162 if (error)
163 goto done;
164 }
165 /*
166 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
167 */
168 if (caps_priv_check_td(td, priv))
169 flags |= MNT_NOSUID | MNT_NODEV;
170
171 /*
172 * Lookup the requested path and extract the nch and vnode.
173 */
174 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
175 if (error == 0) {
176 if ((error = nlookup(&nd)) == 0) {
177 if (nd.nl_nch.ncp->nc_vp == NULL)
178 error = ENOENT;
179 }
180 }
181 if (error) {
182 nlookup_done(&nd);
183 goto done;
184 }
185
186 /*
187 * If the target filesystem is resolved via a nullfs mount, then
188 * nd.nl_nch.mount will be pointing to the nullfs mount structure
189 * instead of the target file system. We need it in case we are
190 * doing an update.
191 */
192 nullmp = nd.nl_nch.mount;
193
194 /*
195 * Extract the locked+refd ncp and cleanup the nd structure
196 */
197 nch = nd.nl_nch;
198 cache_zero(&nd.nl_nch);
199 nlookup_done(&nd);
200
201 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
202 (mp = cache_findmount(&nch)) != NULL) {
203 cache_dropmount(mp);
204 hasmount = 1;
205 } else {
206 hasmount = 0;
207 }
208
209
210 /*
211 * now we have the locked ref'd nch and unreferenced vnode.
212 */
213 vp = nch.ncp->nc_vp;
214 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) {
215 cache_put(&nch);
216 goto done;
217 }
218 cache_unlock(&nch);
219
220 /*
221 * Now we have an unlocked ref'd nch and a locked ref'd vp
222 */
223 if (flags & MNT_UPDATE) {
224 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) {
225 cache_drop(&nch);
226 vput(vp);
227 error = EINVAL;
228 goto done;
229 }
230
231 if (strncmp(fstypename, "null", 5) == 0) {
232 KKASSERT(nullmp);
233 mp = nullmp;
234 } else {
235 mp = vp->v_mount;
236 }
237
238 flag = mp->mnt_flag;
239 flag2 = mp->mnt_kern_flag;
240 /*
241 * We only allow the filesystem to be reloaded if it
242 * is currently mounted read-only.
243 */
244 if ((flags & MNT_RELOAD) &&
245 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
246 cache_drop(&nch);
247 vput(vp);
248 error = EOPNOTSUPP; /* Needs translation */
249 goto done;
250 }
251 /*
252 * Only root, or the user that did the original mount is
253 * permitted to update it.
254 */
255 if (mp->mnt_stat.f_owner != cred->cr_uid &&
256 (error = caps_priv_check_td(td, priv))) {
257 cache_drop(&nch);
258 vput(vp);
259 goto done;
260 }
261 if (vfs_busy(mp, LK_NOWAIT)) {
262 cache_drop(&nch);
263 vput(vp);
264 error = EBUSY;
265 goto done;
266 }
267 if (hasmount) {
268 cache_drop(&nch);
269 vfs_unbusy(mp);
270 vput(vp);
271 error = EBUSY;
272 goto done;
273 }
274 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
275 lwkt_gettoken(&mp->mnt_token);
276 vn_unlock(vp);
277 vfsp = mp->mnt_vfc;
278 goto update;
279 }
280
281 /*
282 * If the user is not root, ensure that they own the directory
283 * onto which we are attempting to mount.
284 */
285 if ((error = VOP_GETATTR(vp, &va)) ||
286 (va.va_uid != cred->cr_uid &&
287 (error = caps_priv_check_td(td, priv)))) {
288 cache_drop(&nch);
289 vput(vp);
290 goto done;
291 }
292 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) {
293 cache_drop(&nch);
294 vput(vp);
295 goto done;
296 }
297 if (vp->v_type != VDIR) {
298 cache_drop(&nch);
299 vput(vp);
300 error = ENOTDIR;
301 goto done;
302 }
303 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) {
304 cache_drop(&nch);
305 vput(vp);
306 error = EPERM;
307 goto done;
308 }
309 vfsp = vfsconf_find_by_name(fstypename);
310 if (vfsp == NULL) {
311 linker_file_t lf;
312
313 /* Only load modules for root (very important!) */
314 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT);
315 if (error) {
316 cache_drop(&nch);
317 vput(vp);
318 goto done;
319 }
320 error = linker_load_file(fstypename, &lf);
321 if (error || lf == NULL) {
322 cache_drop(&nch);
323 vput(vp);
324 if (lf == NULL)
325 error = ENODEV;
326 goto done;
327 }
328 lf->userrefs++;
329 /* lookup again, see if the VFS was loaded */
330 vfsp = vfsconf_find_by_name(fstypename);
331 if (vfsp == NULL) {
332 lf->userrefs--;
333 linker_file_unload(lf);
334 cache_drop(&nch);
335 vput(vp);
336 error = ENODEV;
337 goto done;
338 }
339 }
340 if (hasmount) {
341 cache_drop(&nch);
342 vput(vp);
343 error = EBUSY;
344 goto done;
345 }
346
347 /*
348 * Allocate and initialize the filesystem.
349 */
350 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK);
351 mount_init(mp, vfsp->vfc_vfsops);
352 vfs_busy(mp, LK_NOWAIT);
353 mp->mnt_vfc = vfsp;
354 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT;
355 vfsp->vfc_refcount++;
356 mp->mnt_stat.f_type = vfsp->vfc_typenum;
357 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
358 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
359 mp->mnt_stat.f_owner = cred->cr_uid;
360 lwkt_gettoken(&mp->mnt_token);
361 vn_unlock(vp);
362 update:
363 /*
364 * (per-mount token acquired at this point)
365 *
366 * Set the mount level flags.
367 */
368 if (flags & MNT_RDONLY)
369 mp->mnt_flag |= MNT_RDONLY;
370 else if (mp->mnt_flag & MNT_RDONLY)
371 mp->mnt_kern_flag |= MNTK_WANTRDWR;
372 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
373 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME |
374 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM |
375 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR |
376 MNT_AUTOMOUNTED);
377 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC |
378 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE |
379 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM |
380 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR |
381 MNT_AUTOMOUNTED);
382
383 /*
384 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf.
385 * This way the initial VFS_MOUNT() call will also be MPSAFE.
386 */
387 if (vfsp->vfc_flags & VFCF_MPSAFE)
388 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE;
389
390 /*
391 * Mount the filesystem.
392 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
393 * get.
394 */
395 if (mp->mnt_flag & MNT_UPDATE) {
396 error = VFS_MOUNT(mp, uap->path, uap->data, cred);
397 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
398 mp->mnt_flag &= ~MNT_RDONLY;
399 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
400 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
401 if (error) {
402 mp->mnt_flag = flag;
403 mp->mnt_kern_flag = flag2;
404 }
405 lwkt_reltoken(&mp->mnt_token);
406 vfs_unbusy(mp);
407 vrele(vp);
408 cache_drop(&nch);
409 goto done;
410 }
411 mp->mnt_ncmounton = nch;
412 error = VFS_MOUNT(mp, uap->path, uap->data, cred);
413 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
414
415 /*
416 * Put the new filesystem on the mount list after root. The mount
417 * point gets its own mnt_ncmountpt (unless the VFS already set one
418 * up) which represents the root of the mount. The lookup code
419 * detects the mount point going forward and checks the root of
420 * the mount going backwards.
421 *
422 * It is not necessary to invalidate or purge the vnode underneath
423 * because elements under the mount will be given their own glue
424 * namecache record.
425 */
426 if (!error) {
427 if (mp->mnt_ncmountpt.ncp == NULL) {
428 /*
429 * Allocate, then unlock, but leave the ref intact.
430 * This is the mnt_refs (1) that we will retain
431 * through to the unmount.
432 */
433 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL);
434 cache_unlock(&mp->mnt_ncmountpt);
435 }
436 vn_unlock(vp);
437 cache_lock(&nch);
438 nch.ncp->nc_flag |= NCF_ISMOUNTPT;
439 cache_unlock(&nch);
440 cache_ismounting(mp);
441 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
442
443 mountlist_insert(mp, MNTINS_LAST);
444 vn_unlock(vp);
445 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt);
446 error = vfs_allocate_syncvnode(mp);
447 lwkt_reltoken(&mp->mnt_token);
448 vfs_unbusy(mp);
449 error = VFS_START(mp, 0);
450 vrele(vp);
451 KNOTE(&fs_klist, VQ_MOUNT);
452 } else {
453 bzero(&mp->mnt_ncmounton, sizeof(mp->mnt_ncmounton));
454 vn_syncer_thr_stop(mp);
455 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
456 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
457 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
458 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
459 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
460 if (mp->mnt_cred) {
461 crfree(mp->mnt_cred);
462 mp->mnt_cred = NULL;
463 }
464 mp->mnt_vfc->vfc_refcount--;
465 lwkt_reltoken(&mp->mnt_token);
466 vfs_unbusy(mp);
467 kfree(mp, M_MOUNT);
468 cache_drop(&nch);
469 vput(vp);
470 }
471 done:
472 return (error);
473 }
474
475 /*
476 * Scan all active processes to see if any of them have a current
477 * or root directory onto which the new filesystem has just been
478 * mounted. If so, replace them with the new mount point.
479 *
480 * Both old_nch and new_nch are ref'd on call but not locked.
481 * new_nch must be temporarily locked so it can be associated with the
482 * vnode representing the root of the mount point.
483 */
484 struct checkdirs_info {
485 struct nchandle old_nch;
486 struct nchandle new_nch;
487 struct vnode *old_vp;
488 struct vnode *new_vp;
489 };
490
491 static int checkdirs_callback(struct proc *p, void *data);
492
493 static void
checkdirs(struct nchandle * old_nch,struct nchandle * new_nch)494 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch)
495 {
496 struct checkdirs_info info;
497 struct vnode *olddp;
498 struct vnode *newdp;
499 struct mount *mp;
500
501 /*
502 * If the old mount point's vnode has a usecount of 1, it is not
503 * being held as a descriptor anywhere.
504 */
505 olddp = old_nch->ncp->nc_vp;
506 if (olddp == NULL || VREFCNT(olddp) == 1)
507 return;
508
509 /*
510 * Force the root vnode of the new mount point to be resolved
511 * so we can update any matching processes.
512 */
513 mp = new_nch->mount;
514 if (VFS_ROOT(mp, &newdp))
515 panic("mount: lost mount");
516 vn_unlock(newdp);
517 cache_lock(new_nch);
518 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY);
519 cache_setunresolved(new_nch);
520 cache_setvp(new_nch, newdp);
521 cache_unlock(new_nch);
522
523 /*
524 * Special handling of the root node
525 */
526 if (rootvnode == olddp) {
527 vref(newdp);
528 vfs_cache_setroot(newdp, cache_hold(new_nch));
529 }
530
531 /*
532 * Pass newdp separately so the callback does not have to access
533 * it via new_nch->ncp->nc_vp.
534 */
535 info.old_nch = *old_nch;
536 info.new_nch = *new_nch;
537 info.new_vp = newdp;
538 allproc_scan(checkdirs_callback, &info, 0);
539 vput(newdp);
540 }
541
542 /*
543 * NOTE: callback is not MP safe because the scanned process's filedesc
544 * structure can be ripped out from under us, amoung other things.
545 */
546 static int
checkdirs_callback(struct proc * p,void * data)547 checkdirs_callback(struct proc *p, void *data)
548 {
549 struct checkdirs_info *info = data;
550 struct filedesc *fdp;
551 struct nchandle ncdrop1;
552 struct nchandle ncdrop2;
553 struct vnode *vprele1;
554 struct vnode *vprele2;
555
556 if ((fdp = p->p_fd) != NULL) {
557 cache_zero(&ncdrop1);
558 cache_zero(&ncdrop2);
559 vprele1 = NULL;
560 vprele2 = NULL;
561
562 /*
563 * MPUNSAFE - XXX fdp can be pulled out from under a
564 * foreign process.
565 *
566 * A shared filedesc is ok, we don't have to copy it
567 * because we are making this change globally.
568 */
569 spin_lock(&fdp->fd_spin);
570 if (fdp->fd_ncdir.mount == info->old_nch.mount &&
571 fdp->fd_ncdir.ncp == info->old_nch.ncp) {
572 vprele1 = fdp->fd_cdir;
573 vref(info->new_vp);
574 fdp->fd_cdir = info->new_vp;
575 ncdrop1 = fdp->fd_ncdir;
576 cache_copy(&info->new_nch, &fdp->fd_ncdir);
577 }
578 if (fdp->fd_nrdir.mount == info->old_nch.mount &&
579 fdp->fd_nrdir.ncp == info->old_nch.ncp) {
580 vprele2 = fdp->fd_rdir;
581 vref(info->new_vp);
582 fdp->fd_rdir = info->new_vp;
583 ncdrop2 = fdp->fd_nrdir;
584 cache_copy(&info->new_nch, &fdp->fd_nrdir);
585 }
586 spin_unlock(&fdp->fd_spin);
587 if (ncdrop1.ncp)
588 cache_drop(&ncdrop1);
589 if (ncdrop2.ncp)
590 cache_drop(&ncdrop2);
591 if (vprele1)
592 vrele(vprele1);
593 if (vprele2)
594 vrele(vprele2);
595 }
596 return(0);
597 }
598
599 /*
600 * Unmount a file system.
601 *
602 * Note: unmount takes a path to the vnode mounted on as argument,
603 * not special file (as before).
604 *
605 * umount_args(char *path, int flags)
606 *
607 * MPALMOSTSAFE
608 */
609 int
sys_unmount(struct sysmsg * sysmsg,const struct unmount_args * uap)610 sys_unmount(struct sysmsg *sysmsg, const struct unmount_args *uap)
611 {
612 struct thread *td = curthread;
613 struct proc *p __debugvar = td->td_proc;
614 struct mount *mp = NULL;
615 struct nlookupdata nd;
616 char fstypename[MFSNAMELEN];
617 int priv = 0;
618 int error;
619 struct ucred *cred;
620
621 cred = td->td_ucred;
622
623 KKASSERT(p);
624
625 /* We do not allow user umounts inside a jail for now */
626 if (usermount && jailed(cred)) {
627 error = EPERM;
628 goto done;
629 }
630
631 error = nlookup_init(&nd, uap->path, UIO_USERSPACE,
632 NLC_FOLLOW | NLC_IGNBADDIR);
633 if (error == 0)
634 error = nlookup(&nd);
635 if (error)
636 goto out;
637
638 mp = nd.nl_nch.mount;
639
640 /* Figure out the fsname in order to select proper privs */
641 ksnprintf(fstypename, MFSNAMELEN, "%s", mp->mnt_vfc->vfc_name);
642 priv = get_fscap(fstypename);
643
644 if (usermount == 0 && (error = caps_priv_check_td(td, priv))) {
645 nlookup_done(&nd);
646 goto done;
647 }
648
649 /*
650 * Only root, or the user that did the original mount is
651 * permitted to unmount this filesystem.
652 */
653 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) &&
654 (error = caps_priv_check_td(td, priv)))
655 {
656 goto out;
657 }
658
659 /*
660 * Don't allow unmounting the root file system.
661 */
662 if (mp->mnt_flag & MNT_ROOTFS) {
663 error = EINVAL;
664 goto out;
665 }
666
667 /*
668 * Must be the root of the filesystem
669 */
670 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) {
671 error = EINVAL;
672 goto out;
673 }
674
675 /* Check if this mount belongs to this prison */
676 if (jailed(cred) && mp->mnt_cred && (!mp->mnt_cred->cr_prison ||
677 mp->mnt_cred->cr_prison != cred->cr_prison)) {
678 kprintf("mountpoint %s does not belong to this jail\n",
679 uap->path);
680 error = EPERM;
681 goto out;
682 }
683
684 /*
685 * If no error try to issue the unmount. We lose our cache
686 * ref when we call nlookup_done so we must hold the mount point
687 * to prevent use-after-free races.
688 */
689 out:
690 if (error == 0) {
691 mount_hold(mp);
692 nlookup_done(&nd);
693 error = dounmount(mp, uap->flags, 0);
694 mount_drop(mp);
695 } else {
696 nlookup_done(&nd);
697 }
698 done:
699 return (error);
700 }
701
702 /*
703 * Do the actual file system unmount (interlocked against the mountlist
704 * token and mp->mnt_token).
705 */
706 static int
dounmount_interlock(struct mount * mp)707 dounmount_interlock(struct mount *mp)
708 {
709 if (mp->mnt_kern_flag & MNTK_UNMOUNT)
710 return (EBUSY);
711 mp->mnt_kern_flag |= MNTK_UNMOUNT;
712 return(0);
713 }
714
715 /*
716 * Returns non-zero if the specified process uses the specified
717 * mount point.
718 */
719 static int
process_uses_mount(struct proc * p,struct mount * mp)720 process_uses_mount(struct proc *p, struct mount *mp)
721 {
722 struct filedesc *fdp;
723 struct file *fp;
724 int found;
725 int n;
726
727 fdp = p->p_fd;
728 if (fdp == NULL)
729 return 0;
730 if (fdp->fd_ncdir.mount == mp ||
731 fdp->fd_nrdir.mount == mp ||
732 fdp->fd_njdir.mount == mp)
733 {
734 return 1;
735 }
736
737 found = 0;
738 spin_lock_shared(&fdp->fd_spin);
739 for (n = 0; n < fdp->fd_nfiles; ++n) {
740 fp = fdp->fd_files[n].fp;
741 if (fp && fp->f_nchandle.mount == mp) {
742 found = 1;
743 break;
744 }
745 }
746 spin_unlock_shared(&fdp->fd_spin);
747
748 return found;
749 }
750
751 /*
752 * Cleanup processes that have references to the mount point
753 * being force-unmounted.
754 */
755 struct unmount_allproc_info {
756 struct mount *mp;
757 int sig;
758 };
759
760 static int
unmount_allproc_cb(struct proc * p,void * arg)761 unmount_allproc_cb(struct proc *p, void *arg)
762 {
763 struct unmount_allproc_info *info;
764 struct mount *mp;
765
766 info = arg;
767 mp = info->mp;
768
769 if (p->p_textnch.mount == mp)
770 cache_drop(&p->p_textnch);
771 if (info->sig && process_uses_mount(p, mp)) {
772 lwkt_gettoken(&p->p_token);
773 p->p_flags |= P_MUSTKILL;
774 lwkt_reltoken(&p->p_token);
775 ksignal(p, info->sig);
776 }
777
778 return 0;
779 }
780
781 /*
782 * The guts of the unmount code. The mount owns one ref and one hold
783 * count. If we successfully interlock the unmount, those refs are ours.
784 * (The ref is from mnt_ncmountpt).
785 *
786 * When halting we shortcut certain mount types such as devfs by not actually
787 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected
788 * from the mountlist so higher-level filesytems can unmount cleanly.
789 *
790 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs.
791 */
792 int
dounmount(struct mount * mp,int flags,int halting)793 dounmount(struct mount *mp, int flags, int halting)
794 {
795 struct namecache *ncp;
796 struct nchandle nch;
797 struct vnode *vp;
798 int error;
799 int async_flag;
800 int lflags;
801 int freeok = 1;
802 int hadsyncer = 0;
803 int retry;
804 int quickhalt;
805
806 lwkt_gettoken(&mp->mnt_token);
807
808 /*
809 * When halting, certain mount points can essentially just
810 * be unhooked and otherwise ignored.
811 */
812 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) {
813 quickhalt = 1;
814 freeok = 0;
815 } else {
816 quickhalt = 0;
817 }
818
819
820 /*
821 * Exclusive access for unmounting purposes.
822 */
823 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0)
824 goto out;
825
826 /*
827 * We now 'own' the last mp->mnt_refs
828 *
829 * Allow filesystems to detect that a forced unmount is in progress.
830 */
831 if (flags & MNT_FORCE)
832 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
833 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK);
834 error = lockmgr(&mp->mnt_lock, lflags);
835 if (error) {
836 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
837 if (mp->mnt_kern_flag & MNTK_MWAIT) {
838 mp->mnt_kern_flag &= ~MNTK_MWAIT;
839 wakeup(mp);
840 }
841 goto out;
842 }
843
844 if (mp->mnt_flag & MNT_EXPUBLIC)
845 vfs_setpublicfs(NULL, NULL, NULL);
846
847 vfs_msync(mp, MNT_WAIT);
848 async_flag = mp->mnt_flag & MNT_ASYNC;
849 mp->mnt_flag &=~ MNT_ASYNC;
850
851 /*
852 * Decomission our special mnt_syncer vnode. This also stops
853 * the vnlru code. If we are unable to unmount we recommission
854 * the vnode.
855 *
856 * Then sync the filesystem.
857 */
858 if ((vp = mp->mnt_syncer) != NULL) {
859 mp->mnt_syncer = NULL;
860 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
861 vrele(vp);
862 hadsyncer = 1;
863 }
864
865 /*
866 * Sync normally-mounted filesystem.
867 */
868 if (quickhalt == 0) {
869 if ((mp->mnt_flag & MNT_RDONLY) == 0)
870 VFS_SYNC(mp, MNT_WAIT);
871 }
872
873 /*
874 * nchandle records ref the mount structure. Expect a count of 1
875 * (our mount->mnt_ncmountpt).
876 *
877 * Scans can get temporary refs on a mountpoint (thought really
878 * heavy duty stuff like cache_findmount() do not).
879 */
880 for (retry = 0; (retry < UMOUNTF_RETRIES || debug_unmount); ++retry) {
881 int dummy = 0;
882
883 /*
884 * Invalidate the namecache topology under the mount.
885 * nullfs mounts alias a real mount's namecache topology
886 * and it should not be invalidated in that case.
887 */
888 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) {
889 cache_lock(&mp->mnt_ncmountpt);
890 cache_inval(&mp->mnt_ncmountpt,
891 CINV_DESTROY|CINV_CHILDREN);
892 cache_unlock(&mp->mnt_ncmountpt);
893 }
894
895 /*
896 * Clear pcpu caches
897 */
898 cache_unmounting(mp);
899 if (mp->mnt_refs != 1)
900 cache_clearmntcache(mp);
901
902 /*
903 * Break out if we are good. Don't count ncp refs if the
904 * mount is aliased.
905 */
906 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ?
907 NULL : mp->mnt_ncmountpt.ncp;
908 if (mp->mnt_refs == 1 &&
909 (ncp == NULL || (ncp->nc_refs == 1 &&
910 TAILQ_FIRST(&ncp->nc_list) == NULL))) {
911 break;
912 }
913
914 /*
915 * If forcing the unmount, clean out any p->p_textnch
916 * nchandles that match this mount.
917 *
918 * In addition any process which has a current, root, or
919 * jail directory matching the mount, or which has an open
920 * descriptor matching the mount, will be killed. We first
921 * try SIGINT, and if that doesn't work we issue SIGKILL.
922 */
923 if (flags & MNT_FORCE) {
924 struct unmount_allproc_info info;
925
926 info.mp = mp;
927 switch(retry) {
928 case 3:
929 info.sig = SIGINT;
930 break;
931 case 7:
932 info.sig = SIGKILL;
933 break;
934 default:
935 info.sig = 0;
936 break;
937 }
938 allproc_scan(&unmount_allproc_cb, &info, 0);
939 }
940
941 /*
942 * Sleep and retry.
943 */
944 error = lockmgr(&mp->mnt_lock, LK_RELEASE);
945 tsleep(&dummy, 0, "mntbsy", hz / 4 + 1);
946 error = lockmgr(&mp->mnt_lock, LK_EXCLUSIVE);
947 if (debug_unmount && (retry & 15) == 15) {
948 mount_warning(mp,
949 "(%p) debug - retry %d, "
950 "%d namecache refs, %d mount refs",
951 mp, retry,
952 (ncp ? ncp->nc_refs - 1 : 0),
953 mp->mnt_refs - 1);
954 }
955 }
956 if (retry == UMOUNTF_RETRIES) {
957 mount_warning(mp,
958 "forced umount of \"%s\" - "
959 "%d namecache refs, %d mount refs",
960 (mp->mnt_ncmountpt.ncp ?
961 mp->mnt_ncmountpt.ncp->nc_name : "?"),
962 (ncp ? ncp->nc_refs - 1 : 0),
963 mp->mnt_refs - 1);
964 }
965
966 error = 0;
967 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ?
968 NULL : mp->mnt_ncmountpt.ncp;
969 if (mp->mnt_refs != 1 ||
970 (ncp != NULL && (ncp->nc_refs != 1 ||
971 TAILQ_FIRST(&ncp->nc_list)))) {
972 mount_warning(mp,
973 "(%p): %d namecache refs, %d mount refs "
974 "still present",
975 mp,
976 (ncp ? ncp->nc_refs - 1 : 0),
977 mp->mnt_refs - 1);
978 if (flags & MNT_FORCE) {
979 freeok = 0;
980 mount_warning(mp, "forcing unmount\n");
981 } else {
982 error = EBUSY;
983 }
984 }
985
986 /*
987 * So far so good, sync the filesystem once more and
988 * call the VFS unmount code if the sync succeeds.
989 */
990 if (error == 0 && quickhalt == 0) {
991 if (mp->mnt_flag & MNT_RDONLY) {
992 error = VFS_UNMOUNT(mp, flags);
993 } else {
994 error = VFS_SYNC(mp, MNT_WAIT);
995 if (error == 0 || /* no error */
996 error == EOPNOTSUPP || /* no sync avail */
997 (flags & MNT_FORCE)) { /* force anyway */
998 error = VFS_UNMOUNT(mp, flags);
999 }
1000 }
1001 if (error) {
1002 mount_warning(mp,
1003 "(%p) unmount: vfs refused to unmount, "
1004 "error %d",
1005 mp, error);
1006 }
1007 }
1008
1009 /*
1010 * If an error occurred we can still recover, restoring the
1011 * syncer vnode and misc flags.
1012 */
1013 if (error) {
1014 if (mp->mnt_syncer == NULL && hadsyncer)
1015 vfs_allocate_syncvnode(mp);
1016 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1017 mp->mnt_flag |= async_flag;
1018 lockmgr(&mp->mnt_lock, LK_RELEASE);
1019 if (mp->mnt_kern_flag & MNTK_MWAIT) {
1020 mp->mnt_kern_flag &= ~MNTK_MWAIT;
1021 wakeup(mp);
1022 }
1023 goto out;
1024 }
1025 /*
1026 * Clean up any journals still associated with the mount after
1027 * filesystem activity has ceased.
1028 */
1029 journal_remove_all_journals(mp,
1030 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0));
1031
1032 mountlist_remove(mp);
1033
1034 /*
1035 * Remove any installed vnode ops here so the individual VFSs don't
1036 * have to.
1037 *
1038 * mnt_refs should go to zero when we scrap mnt_ncmountpt.
1039 *
1040 * When quickhalting we have to keep these intact because the
1041 * underlying vnodes have not been destroyed, and some might be
1042 * dirty.
1043 */
1044 if (quickhalt == 0) {
1045 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
1046 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
1047 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
1048 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
1049 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
1050 }
1051
1052 if (mp->mnt_ncmountpt.ncp != NULL) {
1053 nch = mp->mnt_ncmountpt;
1054 cache_zero(&mp->mnt_ncmountpt);
1055 cache_clrmountpt(&nch);
1056 cache_drop(&nch);
1057 }
1058 if (mp->mnt_ncmounton.ncp != NULL) {
1059 cache_unmounting(mp);
1060 nch = mp->mnt_ncmounton;
1061 cache_zero(&mp->mnt_ncmounton);
1062 cache_clrmountpt(&nch);
1063 cache_drop(&nch);
1064 }
1065
1066 if (mp->mnt_cred) {
1067 crfree(mp->mnt_cred);
1068 mp->mnt_cred = NULL;
1069 }
1070
1071 mp->mnt_vfc->vfc_refcount--;
1072
1073 /*
1074 * If not quickhalting the mount, we expect there to be no
1075 * vnodes left.
1076 */
1077 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist))
1078 panic("unmount: dangling vnode");
1079
1080 /*
1081 * Release the lock
1082 */
1083 lockmgr(&mp->mnt_lock, LK_RELEASE);
1084 if (mp->mnt_kern_flag & MNTK_MWAIT) {
1085 mp->mnt_kern_flag &= ~MNTK_MWAIT;
1086 wakeup(mp);
1087 }
1088
1089 /*
1090 * If we reach here and freeok != 0 we must free the mount.
1091 * mnt_refs should already have dropped to 0, so if it is not
1092 * zero we must cycle the caches and wait.
1093 *
1094 * When we are satisfied that the mount has disconnected we can
1095 * drop the hold on the mp that represented the mount (though the
1096 * caller might actually have another, so the caller's drop may
1097 * do the actual free).
1098 */
1099 if (freeok) {
1100 if (mp->mnt_refs > 0)
1101 cache_clearmntcache(mp);
1102 while (mp->mnt_refs > 0) {
1103 cache_unmounting(mp);
1104 wakeup(mp);
1105 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1);
1106 cache_clearmntcache(mp);
1107 }
1108 lwkt_reltoken(&mp->mnt_token);
1109 mount_drop(mp);
1110 mp = NULL;
1111 } else {
1112 cache_clearmntcache(mp);
1113 }
1114 error = 0;
1115 KNOTE(&fs_klist, VQ_UNMOUNT);
1116 out:
1117 if (mp)
1118 lwkt_reltoken(&mp->mnt_token);
1119 return (error);
1120 }
1121
1122 static
1123 void
mount_warning(struct mount * mp,const char * ctl,...)1124 mount_warning(struct mount *mp, const char *ctl, ...)
1125 {
1126 char *ptr;
1127 char *buf;
1128 __va_list va;
1129
1130 __va_start(va, ctl);
1131 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL,
1132 &ptr, &buf, 0) == 0) {
1133 kprintf("unmount(%s): ", ptr);
1134 kvprintf(ctl, va);
1135 kprintf("\n");
1136 kfree(buf, M_TEMP);
1137 } else {
1138 kprintf("unmount(%p", mp);
1139 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name)
1140 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name);
1141 kprintf("): ");
1142 kvprintf(ctl, va);
1143 kprintf("\n");
1144 }
1145 __va_end(va);
1146 }
1147
1148 /*
1149 * Shim cache_fullpath() to handle the case where a process is chrooted into
1150 * a subdirectory of a mount. In this case if the root mount matches the
1151 * process root directory's mount we have to specify the process's root
1152 * directory instead of the mount point, because the mount point might
1153 * be above the root directory.
1154 */
1155 static
1156 int
mount_path(struct proc * p,struct mount * mp,char ** rb,char ** fb)1157 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb)
1158 {
1159 struct nchandle *nch;
1160
1161 if (p && p->p_fd->fd_nrdir.mount == mp)
1162 nch = &p->p_fd->fd_nrdir;
1163 else
1164 nch = &mp->mnt_ncmountpt;
1165 return(cache_fullpath(p, nch, NULL, rb, fb, 0));
1166 }
1167
1168 /*
1169 * Sync each mounted filesystem.
1170 */
1171
1172 #ifdef DEBUG
1173 static int syncprt = 0;
1174 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
1175 #endif /* DEBUG */
1176
1177 static int sync_callback(struct mount *mp, void *data);
1178
1179 int
sys_sync(struct sysmsg * sysmsg,const struct sync_args * uap)1180 sys_sync(struct sysmsg *sysmsg, const struct sync_args *uap)
1181 {
1182 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD);
1183 return (0);
1184 }
1185
1186 static
1187 int
sync_callback(struct mount * mp,void * data __unused)1188 sync_callback(struct mount *mp, void *data __unused)
1189 {
1190 int asyncflag;
1191
1192 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1193 lwkt_gettoken(&mp->mnt_token);
1194 asyncflag = mp->mnt_flag & MNT_ASYNC;
1195 mp->mnt_flag &= ~MNT_ASYNC;
1196 lwkt_reltoken(&mp->mnt_token);
1197 vfs_msync(mp, MNT_NOWAIT);
1198 VFS_SYNC(mp, MNT_NOWAIT);
1199 lwkt_gettoken(&mp->mnt_token);
1200 mp->mnt_flag |= asyncflag;
1201 lwkt_reltoken(&mp->mnt_token);
1202 }
1203 return(0);
1204 }
1205
1206 /* XXX PRISON: could be per prison flag */
1207 static int prison_quotas;
1208 #if 0
1209 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
1210 #endif
1211
1212 /*
1213 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg)
1214 *
1215 * Change filesystem quotas.
1216 *
1217 * MPALMOSTSAFE
1218 */
1219 int
sys_quotactl(struct sysmsg * sysmsg,const struct quotactl_args * uap)1220 sys_quotactl(struct sysmsg *sysmsg, const struct quotactl_args *uap)
1221 {
1222 struct nlookupdata nd;
1223 struct thread *td;
1224 struct mount *mp;
1225 int error;
1226
1227 td = curthread;
1228 if (td->td_ucred->cr_prison && !prison_quotas) {
1229 error = EPERM;
1230 goto done;
1231 }
1232
1233 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1234 if (error == 0)
1235 error = nlookup(&nd);
1236 if (error == 0) {
1237 mp = nd.nl_nch.mount;
1238 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid,
1239 uap->arg, nd.nl_cred);
1240 }
1241 nlookup_done(&nd);
1242 done:
1243 return (error);
1244 }
1245
1246 /*
1247 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen,
1248 * void *buf, int buflen)
1249 *
1250 * This function operates on a mount point and executes the specified
1251 * operation using the specified control data, and possibly returns data.
1252 *
1253 * The actual number of bytes stored in the result buffer is returned, 0
1254 * if none, otherwise an error is returned.
1255 *
1256 * MPALMOSTSAFE
1257 */
1258 int
sys_mountctl(struct sysmsg * sysmsg,const struct mountctl_args * uap)1259 sys_mountctl(struct sysmsg *sysmsg, const struct mountctl_args *uap)
1260 {
1261 struct thread *td = curthread;
1262 struct file *fp;
1263 void *ctl = NULL;
1264 void *buf = NULL;
1265 char *path = NULL;
1266 int error;
1267
1268 /*
1269 * Sanity and permissions checks. We must be root.
1270 */
1271 if (td->td_ucred->cr_prison != NULL)
1272 return (EPERM);
1273 if ((uap->op != MOUNTCTL_MOUNTFLAGS) &&
1274 (error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) != 0)
1275 {
1276 return (error);
1277 }
1278
1279 /*
1280 * Argument length checks
1281 */
1282 if (uap->ctllen < 0 || uap->ctllen > 1024)
1283 return (EINVAL);
1284 if (uap->buflen < 0 || uap->buflen > 16 * 1024)
1285 return (EINVAL);
1286 if (uap->path == NULL)
1287 return (EINVAL);
1288
1289 /*
1290 * Allocate the necessary buffers and copyin data
1291 */
1292 path = objcache_get(namei_oc, M_WAITOK);
1293 error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
1294 if (error)
1295 goto done;
1296
1297 if (uap->ctllen) {
1298 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO);
1299 error = copyin(uap->ctl, ctl, uap->ctllen);
1300 if (error)
1301 goto done;
1302 }
1303 if (uap->buflen)
1304 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO);
1305
1306 /*
1307 * Validate the descriptor
1308 */
1309 if (uap->fd >= 0) {
1310 fp = holdfp(td, uap->fd, -1);
1311 if (fp == NULL) {
1312 error = EBADF;
1313 goto done;
1314 }
1315 } else {
1316 fp = NULL;
1317 }
1318
1319 /*
1320 * Execute the internal kernel function and clean up.
1321 */
1322 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen,
1323 buf, uap->buflen, &sysmsg->sysmsg_result);
1324 if (fp)
1325 dropfp(td, uap->fd, fp);
1326 if (error == 0 && sysmsg->sysmsg_result > 0)
1327 error = copyout(buf, uap->buf, sysmsg->sysmsg_result);
1328 done:
1329 if (path)
1330 objcache_put(namei_oc, path);
1331 if (ctl)
1332 kfree(ctl, M_TEMP);
1333 if (buf)
1334 kfree(buf, M_TEMP);
1335 return (error);
1336 }
1337
1338 /*
1339 * Execute a mount control operation by resolving the path to a mount point
1340 * and calling vop_mountctl().
1341 *
1342 * Use the mount point from the nch instead of the vnode so nullfs mounts
1343 * can properly spike the VOP.
1344 */
1345 int
kern_mountctl(const char * path,int op,struct file * fp,const void * ctl,int ctllen,void * buf,int buflen,int * res)1346 kern_mountctl(const char *path, int op, struct file *fp,
1347 const void *ctl, int ctllen,
1348 void *buf, int buflen, int *res)
1349 {
1350 struct vnode *vp;
1351 struct nlookupdata nd;
1352 struct nchandle nch;
1353 struct mount *mp;
1354 int error;
1355
1356 *res = 0;
1357 vp = NULL;
1358 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW);
1359 if (error)
1360 return (error);
1361 error = nlookup(&nd);
1362 if (error) {
1363 nlookup_done(&nd);
1364 return (error);
1365 }
1366 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
1367 if (error) {
1368 nlookup_done(&nd);
1369 return (error);
1370 }
1371
1372 /*
1373 * Yes, all this is needed to use the nch.mount below, because
1374 * we must maintain a ref on the mount to avoid ripouts (e.g.
1375 * due to heavy mount/unmount use by synth or poudriere).
1376 */
1377 nch = nd.nl_nch;
1378 cache_zero(&nd.nl_nch);
1379 cache_unlock(&nch);
1380 nlookup_done(&nd);
1381 vn_unlock(vp);
1382
1383 mp = nch.mount;
1384
1385 /*
1386 * Must be the root of the filesystem
1387 */
1388 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) {
1389 cache_drop(&nch);
1390 vrele(vp);
1391 return (EINVAL);
1392 }
1393 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) {
1394 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n",
1395 path);
1396 cache_drop(&nch);
1397 vrele(vp);
1398 return (EINVAL);
1399 }
1400 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen,
1401 buf, buflen, res);
1402 vrele(vp);
1403 cache_drop(&nch);
1404
1405 return (error);
1406 }
1407
1408 int
kern_statfs(struct nlookupdata * nd,struct statfs * buf)1409 kern_statfs(struct nlookupdata *nd, struct statfs *buf)
1410 {
1411 struct thread *td = curthread;
1412 struct proc *p = td->td_proc;
1413 struct mount *mp;
1414 struct statfs *sp;
1415 char *fullpath, *freepath;
1416 int error;
1417
1418 if ((error = nlookup(nd)) != 0)
1419 return (error);
1420 mp = nd->nl_nch.mount;
1421 sp = &mp->mnt_stat;
1422
1423 /*
1424 * Ignore refresh error, user should have visibility.
1425 * This can happen if a NFS mount goes bad (e.g. server
1426 * revokes perms or goes down).
1427 */
1428 error = VFS_STATFS(mp, sp, nd->nl_cred);
1429 /* ignore error */
1430
1431 error = mount_path(p, mp, &fullpath, &freepath);
1432 if (error)
1433 return(error);
1434 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1435 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
1436 kfree(freepath, M_TEMP);
1437
1438 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1439 bcopy(sp, buf, sizeof(*buf));
1440 /* Only root should have access to the fsid's. */
1441 if (caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT))
1442 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
1443 return (0);
1444 }
1445
1446 /*
1447 * statfs_args(char *path, struct statfs *buf)
1448 *
1449 * Get filesystem statistics.
1450 */
1451 int
sys_statfs(struct sysmsg * sysmsg,const struct statfs_args * uap)1452 sys_statfs(struct sysmsg *sysmsg, const struct statfs_args *uap)
1453 {
1454 struct nlookupdata nd;
1455 struct statfs buf;
1456 int error;
1457
1458 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1459 if (error == 0)
1460 error = kern_statfs(&nd, &buf);
1461 nlookup_done(&nd);
1462 if (error == 0)
1463 error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1464 return (error);
1465 }
1466
1467 int
kern_fstatfs(int fd,struct statfs * buf)1468 kern_fstatfs(int fd, struct statfs *buf)
1469 {
1470 struct thread *td = curthread;
1471 struct proc *p = td->td_proc;
1472 struct file *fp;
1473 struct mount *mp;
1474 struct statfs *sp;
1475 char *fullpath, *freepath;
1476 int error;
1477
1478 KKASSERT(p);
1479 if ((error = holdvnode(td, fd, &fp)) != 0)
1480 return (error);
1481
1482 /*
1483 * Try to use mount info from any overlays rather than the
1484 * mount info for the underlying vnode, otherwise we will
1485 * fail when operating on null-mounted paths inside a chroot.
1486 */
1487 if ((mp = fp->f_nchandle.mount) == NULL)
1488 mp = ((struct vnode *)fp->f_data)->v_mount;
1489 if (mp == NULL) {
1490 error = EBADF;
1491 goto done;
1492 }
1493 if (fp->f_cred == NULL) {
1494 error = EINVAL;
1495 goto done;
1496 }
1497
1498 /*
1499 * Ignore refresh error, user should have visibility.
1500 * This can happen if a NFS mount goes bad (e.g. server
1501 * revokes perms or goes down).
1502 */
1503 sp = &mp->mnt_stat;
1504 error = VFS_STATFS(mp, sp, fp->f_cred);
1505
1506 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0)
1507 goto done;
1508 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1509 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
1510 kfree(freepath, M_TEMP);
1511
1512 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1513 bcopy(sp, buf, sizeof(*buf));
1514
1515 /* Only root should have access to the fsid's. */
1516 if (caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT))
1517 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
1518 error = 0;
1519 done:
1520 fdrop(fp);
1521 return (error);
1522 }
1523
1524 /*
1525 * fstatfs_args(int fd, struct statfs *buf)
1526 *
1527 * Get filesystem statistics.
1528 */
1529 int
sys_fstatfs(struct sysmsg * sysmsg,const struct fstatfs_args * uap)1530 sys_fstatfs(struct sysmsg *sysmsg, const struct fstatfs_args *uap)
1531 {
1532 struct statfs buf;
1533 int error;
1534
1535 error = kern_fstatfs(uap->fd, &buf);
1536
1537 if (error == 0)
1538 error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1539 return (error);
1540 }
1541
1542 int
kern_statvfs(struct nlookupdata * nd,struct statvfs * buf)1543 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf)
1544 {
1545 struct mount *mp;
1546 struct statvfs *sp;
1547 int error;
1548
1549 if ((error = nlookup(nd)) != 0)
1550 return (error);
1551 mp = nd->nl_nch.mount;
1552 sp = &mp->mnt_vstat;
1553 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0)
1554 return (error);
1555
1556 sp->f_flag = 0;
1557 if (mp->mnt_flag & MNT_RDONLY)
1558 sp->f_flag |= ST_RDONLY;
1559 if (mp->mnt_flag & MNT_NOSUID)
1560 sp->f_flag |= ST_NOSUID;
1561 bcopy(sp, buf, sizeof(*buf));
1562 return (0);
1563 }
1564
1565 /*
1566 * statfs_args(char *path, struct statfs *buf)
1567 *
1568 * Get filesystem statistics.
1569 */
1570 int
sys_statvfs(struct sysmsg * sysmsg,const struct statvfs_args * uap)1571 sys_statvfs(struct sysmsg *sysmsg, const struct statvfs_args *uap)
1572 {
1573 struct nlookupdata nd;
1574 struct statvfs buf;
1575 int error;
1576
1577 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1578 if (error == 0)
1579 error = kern_statvfs(&nd, &buf);
1580 nlookup_done(&nd);
1581 if (error == 0)
1582 error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1583 return (error);
1584 }
1585
1586 int
kern_fstatvfs(int fd,struct statvfs * buf)1587 kern_fstatvfs(int fd, struct statvfs *buf)
1588 {
1589 struct thread *td = curthread;
1590 struct file *fp;
1591 struct mount *mp;
1592 struct statvfs *sp;
1593 int error;
1594
1595 if ((error = holdvnode(td, fd, &fp)) != 0)
1596 return (error);
1597 if ((mp = fp->f_nchandle.mount) == NULL)
1598 mp = ((struct vnode *)fp->f_data)->v_mount;
1599 if (mp == NULL) {
1600 error = EBADF;
1601 goto done;
1602 }
1603 if (fp->f_cred == NULL) {
1604 error = EINVAL;
1605 goto done;
1606 }
1607 sp = &mp->mnt_vstat;
1608 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0)
1609 goto done;
1610
1611 sp->f_flag = 0;
1612 if (mp->mnt_flag & MNT_RDONLY)
1613 sp->f_flag |= ST_RDONLY;
1614 if (mp->mnt_flag & MNT_NOSUID)
1615 sp->f_flag |= ST_NOSUID;
1616
1617 bcopy(sp, buf, sizeof(*buf));
1618 error = 0;
1619 done:
1620 fdrop(fp);
1621 return (error);
1622 }
1623
1624 /*
1625 * fstatfs_args(int fd, struct statfs *buf)
1626 *
1627 * Get filesystem statistics.
1628 */
1629 int
sys_fstatvfs(struct sysmsg * sysmsg,const struct fstatvfs_args * uap)1630 sys_fstatvfs(struct sysmsg *sysmsg, const struct fstatvfs_args *uap)
1631 {
1632 struct statvfs buf;
1633 int error;
1634
1635 error = kern_fstatvfs(uap->fd, &buf);
1636
1637 if (error == 0)
1638 error = copyout(&buf, uap->buf, sizeof(*uap->buf));
1639 return (error);
1640 }
1641
1642 /*
1643 * getfsstat_args(struct statfs *buf, long bufsize, int flags)
1644 *
1645 * Get statistics on all filesystems.
1646 */
1647
1648 struct getfsstat_info {
1649 struct statfs *sfsp;
1650 long count;
1651 long maxcount;
1652 int error;
1653 int flags;
1654 struct thread *td;
1655 };
1656
1657 static int getfsstat_callback(struct mount *, void *);
1658
1659 int
sys_getfsstat(struct sysmsg * sysmsg,const struct getfsstat_args * uap)1660 sys_getfsstat(struct sysmsg *sysmsg, const struct getfsstat_args *uap)
1661 {
1662 struct thread *td = curthread;
1663 struct getfsstat_info info;
1664
1665 bzero(&info, sizeof(info));
1666
1667 info.maxcount = uap->bufsize / sizeof(struct statfs);
1668 info.sfsp = uap->buf;
1669 info.count = 0;
1670 info.flags = uap->flags;
1671 info.td = td;
1672
1673 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD);
1674 if (info.sfsp && info.count > info.maxcount)
1675 sysmsg->sysmsg_result = info.maxcount;
1676 else
1677 sysmsg->sysmsg_result = info.count;
1678 return (info.error);
1679 }
1680
1681 static int
getfsstat_callback(struct mount * mp,void * data)1682 getfsstat_callback(struct mount *mp, void *data)
1683 {
1684 struct getfsstat_info *info = data;
1685 struct statfs *sp;
1686 char *freepath;
1687 char *fullpath;
1688 int error;
1689
1690 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc))
1691 return(0);
1692
1693 if (info->sfsp && info->count < info->maxcount) {
1694 sp = &mp->mnt_stat;
1695
1696 /*
1697 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1698 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
1699 * overrides MNT_WAIT.
1700 *
1701 * Ignore refresh error, user should have visibility.
1702 * This can happen if a NFS mount goes bad (e.g. server
1703 * revokes perms or goes down).
1704 */
1705 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1706 (info->flags & MNT_WAIT)) &&
1707 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) {
1708 /* ignore error */
1709 }
1710 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1711
1712 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath);
1713 if (error) {
1714 info->error = error;
1715 return(-1);
1716 }
1717 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1718 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
1719 kfree(freepath, M_TEMP);
1720
1721 error = copyout(sp, info->sfsp, sizeof(*sp));
1722 if (error) {
1723 info->error = error;
1724 return (-1);
1725 }
1726 ++info->sfsp;
1727 }
1728 info->count++;
1729 return(0);
1730 }
1731
1732 /*
1733 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf,
1734 long bufsize, int flags)
1735 *
1736 * Get statistics on all filesystems.
1737 */
1738
1739 struct getvfsstat_info {
1740 struct statfs *sfsp;
1741 struct statvfs *vsfsp;
1742 long count;
1743 long maxcount;
1744 int error;
1745 int flags;
1746 struct thread *td;
1747 };
1748
1749 static int getvfsstat_callback(struct mount *, void *);
1750
1751 int
sys_getvfsstat(struct sysmsg * sysmsg,const struct getvfsstat_args * uap)1752 sys_getvfsstat(struct sysmsg *sysmsg, const struct getvfsstat_args *uap)
1753 {
1754 struct thread *td = curthread;
1755 struct getvfsstat_info info;
1756
1757 bzero(&info, sizeof(info));
1758
1759 info.maxcount = uap->vbufsize / sizeof(struct statvfs);
1760 info.sfsp = uap->buf;
1761 info.vsfsp = uap->vbuf;
1762 info.count = 0;
1763 info.flags = uap->flags;
1764 info.td = td;
1765
1766 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD);
1767 if (info.vsfsp && info.count > info.maxcount)
1768 sysmsg->sysmsg_result = info.maxcount;
1769 else
1770 sysmsg->sysmsg_result = info.count;
1771 return (info.error);
1772 }
1773
1774 static int
getvfsstat_callback(struct mount * mp,void * data)1775 getvfsstat_callback(struct mount *mp, void *data)
1776 {
1777 struct getvfsstat_info *info = data;
1778 struct statfs *sp;
1779 struct statvfs *vsp;
1780 char *freepath;
1781 char *fullpath;
1782 int error;
1783
1784 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc))
1785 return(0);
1786
1787 if (info->vsfsp && info->count < info->maxcount) {
1788 sp = &mp->mnt_stat;
1789 vsp = &mp->mnt_vstat;
1790
1791 /*
1792 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1793 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
1794 * overrides MNT_WAIT.
1795 *
1796 * Ignore refresh error, user should have visibility.
1797 * This can happen if a NFS mount goes bad (e.g. server
1798 * revokes perms or goes down).
1799 */
1800 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1801 (info->flags & MNT_WAIT)) &&
1802 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) {
1803 /* ignore error */
1804 }
1805 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1806
1807 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1808 (info->flags & MNT_WAIT)) &&
1809 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) {
1810 /* ignore error */
1811 }
1812 vsp->f_flag = 0;
1813 if (mp->mnt_flag & MNT_RDONLY)
1814 vsp->f_flag |= ST_RDONLY;
1815 if (mp->mnt_flag & MNT_NOSUID)
1816 vsp->f_flag |= ST_NOSUID;
1817
1818 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath);
1819 if (error) {
1820 info->error = error;
1821 return(-1);
1822 }
1823 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
1824 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
1825 kfree(freepath, M_TEMP);
1826
1827 error = copyout(sp, info->sfsp, sizeof(*sp));
1828 if (error == 0)
1829 error = copyout(vsp, info->vsfsp, sizeof(*vsp));
1830 if (error) {
1831 info->error = error;
1832 return (-1);
1833 }
1834 ++info->sfsp;
1835 ++info->vsfsp;
1836 }
1837 info->count++;
1838 return(0);
1839 }
1840
1841
1842 /*
1843 * fchdir_args(int fd)
1844 *
1845 * Change current working directory to a given file descriptor.
1846 */
1847 int
sys_fchdir(struct sysmsg * sysmsg,const struct fchdir_args * uap)1848 sys_fchdir(struct sysmsg *sysmsg, const struct fchdir_args *uap)
1849 {
1850 struct thread *td = curthread;
1851 struct proc *p = td->td_proc;
1852 struct filedesc *fdp = p->p_fd;
1853 struct vnode *vp, *ovp;
1854 struct mount *mp;
1855 struct file *fp;
1856 struct nchandle nch, onch, tnch;
1857 int error;
1858
1859 if ((error = holdvnode(td, uap->fd, &fp)) != 0)
1860 return (error);
1861 lwkt_gettoken(&p->p_token);
1862 vp = (struct vnode *)fp->f_data;
1863 vref(vp);
1864 vn_lock(vp, LK_SHARED | LK_RETRY);
1865 if (fp->f_nchandle.ncp == NULL)
1866 error = ENOTDIR;
1867 else
1868 error = checkvp_chdir(vp, td);
1869 if (error) {
1870 vput(vp);
1871 goto done;
1872 }
1873 cache_copy(&fp->f_nchandle, &nch);
1874
1875 /*
1876 * If the ncp has become a mount point, traverse through
1877 * the mount point.
1878 */
1879
1880 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
1881 (mp = cache_findmount(&nch)) != NULL
1882 ) {
1883 error = nlookup_mp(mp, &tnch);
1884 if (error == 0) {
1885 cache_unlock(&tnch); /* leave ref intact */
1886 vput(vp);
1887 vp = tnch.ncp->nc_vp;
1888 error = vget(vp, LK_SHARED);
1889 KKASSERT(error == 0);
1890 cache_drop(&nch);
1891 nch = tnch;
1892 }
1893 cache_dropmount(mp);
1894 }
1895 if (error == 0) {
1896 spin_lock(&fdp->fd_spin);
1897 ovp = fdp->fd_cdir;
1898 onch = fdp->fd_ncdir;
1899 fdp->fd_cdir = vp;
1900 fdp->fd_ncdir = nch;
1901 spin_unlock(&fdp->fd_spin);
1902 vn_unlock(vp); /* leave ref intact */
1903 cache_drop(&onch);
1904 vrele(ovp);
1905 } else {
1906 cache_drop(&nch);
1907 vput(vp);
1908 }
1909 fdrop(fp);
1910 done:
1911 lwkt_reltoken(&p->p_token);
1912 return (error);
1913 }
1914
1915 int
kern_chdir(struct nlookupdata * nd)1916 kern_chdir(struct nlookupdata *nd)
1917 {
1918 struct thread *td = curthread;
1919 struct proc *p = td->td_proc;
1920 struct filedesc *fdp = p->p_fd;
1921 struct vnode *vp, *ovp;
1922 struct nchandle onch;
1923 int error;
1924
1925 nd->nl_flags |= NLC_SHAREDLOCK;
1926 if ((error = nlookup(nd)) != 0)
1927 return (error);
1928 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
1929 return (ENOENT);
1930 if ((error = vget(vp, LK_SHARED)) != 0)
1931 return (error);
1932
1933 lwkt_gettoken(&p->p_token);
1934 error = checkvp_chdir(vp, td);
1935 vn_unlock(vp);
1936 if (error == 0) {
1937 spin_lock(&fdp->fd_spin);
1938 ovp = fdp->fd_cdir;
1939 onch = fdp->fd_ncdir;
1940 fdp->fd_ncdir = nd->nl_nch;
1941 fdp->fd_cdir = vp;
1942 spin_unlock(&fdp->fd_spin);
1943 cache_unlock(&nd->nl_nch); /* leave reference intact */
1944 cache_drop(&onch);
1945 vrele(ovp);
1946 cache_zero(&nd->nl_nch);
1947 } else {
1948 vrele(vp);
1949 }
1950 lwkt_reltoken(&p->p_token);
1951 return (error);
1952 }
1953
1954 /*
1955 * chdir_args(char *path)
1956 *
1957 * Change current working directory (``.'').
1958 */
1959 int
sys_chdir(struct sysmsg * sysmsg,const struct chdir_args * uap)1960 sys_chdir(struct sysmsg *sysmsg, const struct chdir_args *uap)
1961 {
1962 struct nlookupdata nd;
1963 int error;
1964
1965 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
1966 if (error == 0)
1967 error = kern_chdir(&nd);
1968 nlookup_done(&nd);
1969 return (error);
1970 }
1971
1972 /*
1973 * Helper function for raised chroot(2) security function: Refuse if
1974 * any filedescriptors are open directories.
1975 */
1976 static int
chroot_refuse_vdir_fds(thread_t td,struct filedesc * fdp)1977 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp)
1978 {
1979 struct vnode *vp;
1980 struct file *fp;
1981 int error;
1982 int fd;
1983
1984 for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
1985 if ((error = holdvnode(td, fd, &fp)) != 0)
1986 continue;
1987 vp = (struct vnode *)fp->f_data;
1988 if (vp->v_type != VDIR) {
1989 fdrop(fp);
1990 continue;
1991 }
1992 fdrop(fp);
1993 return(EPERM);
1994 }
1995 return (0);
1996 }
1997
1998 /*
1999 * This sysctl determines if we will allow a process to chroot(2) if it
2000 * has a directory open:
2001 * 0: disallowed for all processes.
2002 * 1: allowed for processes that were not already chroot(2)'ed.
2003 * 2: allowed for all processes.
2004 */
2005
2006 static int chroot_allow_open_directories = 1;
2007
2008 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
2009 &chroot_allow_open_directories, 0, "");
2010
2011 /*
2012 * chroot to the specified namecache entry. We obtain the vp from the
2013 * namecache data. The passed ncp must be locked and referenced and will
2014 * remain locked and referenced on return.
2015 */
2016 int
kern_chroot(struct nchandle * nch)2017 kern_chroot(struct nchandle *nch)
2018 {
2019 struct thread *td = curthread;
2020 struct proc *p = td->td_proc;
2021 struct filedesc *fdp = p->p_fd;
2022 struct vnode *vp;
2023 int error;
2024
2025 /*
2026 * Only privileged user can chroot
2027 */
2028 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_CHROOT);
2029 if (error)
2030 return (error);
2031
2032 /*
2033 * Disallow open directory descriptors (fchdir() breakouts).
2034 */
2035 if (chroot_allow_open_directories == 0 ||
2036 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
2037 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0)
2038 return (error);
2039 }
2040 if ((vp = nch->ncp->nc_vp) == NULL)
2041 return (ENOENT);
2042
2043 if ((error = vget(vp, LK_SHARED)) != 0)
2044 return (error);
2045
2046 /*
2047 * Check the validity of vp as a directory to change to and
2048 * associate it with rdir/jdir.
2049 */
2050 error = checkvp_chdir(vp, td);
2051 vn_unlock(vp); /* leave reference intact */
2052 if (error == 0) {
2053 lwkt_gettoken(&p->p_token);
2054 vrele(fdp->fd_rdir);
2055 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */
2056 cache_drop(&fdp->fd_nrdir);
2057 cache_copy(nch, &fdp->fd_nrdir);
2058 if (fdp->fd_jdir == NULL) {
2059 fdp->fd_jdir = vp;
2060 vref(fdp->fd_jdir);
2061 cache_copy(nch, &fdp->fd_njdir);
2062 }
2063 if ((p->p_flags & P_DIDCHROOT) == 0) {
2064 p->p_flags |= P_DIDCHROOT;
2065 if (p->p_depth <= 65535 - 32)
2066 p->p_depth += 32;
2067 }
2068 lwkt_reltoken(&p->p_token);
2069 } else {
2070 vrele(vp);
2071 }
2072 return (error);
2073 }
2074
2075 /*
2076 * chroot_args(char *path)
2077 *
2078 * Change notion of root (``/'') directory.
2079 */
2080 int
sys_chroot(struct sysmsg * sysmsg,const struct chroot_args * uap)2081 sys_chroot(struct sysmsg *sysmsg, const struct chroot_args *uap)
2082 {
2083 struct thread *td __debugvar = curthread;
2084 struct nlookupdata nd;
2085 int error;
2086
2087 KKASSERT(td->td_proc);
2088 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2089 if (error == 0) {
2090 nd.nl_flags |= NLC_EXEC;
2091 error = nlookup(&nd);
2092 if (error == 0)
2093 error = kern_chroot(&nd.nl_nch);
2094 }
2095 nlookup_done(&nd);
2096 return(error);
2097 }
2098
2099 int
sys_chroot_kernel(struct sysmsg * sysmsg,const struct chroot_kernel_args * uap)2100 sys_chroot_kernel(struct sysmsg *sysmsg, const struct chroot_kernel_args *uap)
2101 {
2102 struct thread *td = curthread;
2103 struct nlookupdata nd;
2104 struct nchandle *nch;
2105 struct vnode *vp;
2106 int error;
2107
2108 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2109 if (error)
2110 goto error_nond;
2111
2112 error = nlookup(&nd);
2113 if (error)
2114 goto error_out;
2115
2116 nch = &nd.nl_nch;
2117
2118 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_CHROOT);
2119 if (error)
2120 goto error_out;
2121
2122 if ((vp = nch->ncp->nc_vp) == NULL) {
2123 error = ENOENT;
2124 goto error_out;
2125 }
2126
2127 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0)
2128 goto error_out;
2129
2130 vfs_cache_setroot(vp, cache_hold(nch));
2131
2132 error_out:
2133 nlookup_done(&nd);
2134 error_nond:
2135 return(error);
2136 }
2137
2138 /*
2139 * Common routine for chroot and chdir. Given a locked, referenced vnode,
2140 * determine whether it is legal to chdir to the vnode. The vnode's state
2141 * is not changed by this call.
2142 */
2143 static int
checkvp_chdir(struct vnode * vp,struct thread * td)2144 checkvp_chdir(struct vnode *vp, struct thread *td)
2145 {
2146 int error;
2147
2148 if (vp->v_type != VDIR)
2149 error = ENOTDIR;
2150 else
2151 error = VOP_EACCESS(vp, VEXEC, td->td_ucred);
2152 return (error);
2153 }
2154
2155 int
kern_open(struct nlookupdata * nd,int oflags,int mode,int * res)2156 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res)
2157 {
2158 struct thread *td = curthread;
2159 struct proc *p = td->td_proc;
2160 struct lwp *lp = td->td_lwp;
2161 struct filedesc *fdp = p->p_fd;
2162 int cmode, flags;
2163 struct file *nfp;
2164 struct file *fp;
2165 int type, indx, error = 0;
2166 struct flock lf;
2167
2168 if ((oflags & O_ACCMODE) == O_ACCMODE)
2169 return (EINVAL);
2170 flags = FFLAGS(oflags);
2171 error = falloc(lp, &nfp, NULL);
2172 if (error)
2173 return (error);
2174 fp = nfp;
2175 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2176
2177 /*
2178 * Call vn_open() to do the lookup and assign the vnode to the
2179 * file pointer. vn_open() does not change the ref count on fp
2180 * and the vnode, on success, will be inherited by the file pointer
2181 * and unlocked.
2182 *
2183 * Request a shared lock on the vnode if possible.
2184 *
2185 * When NLC_SHAREDLOCK is set we may still need an exclusive vnode
2186 * lock for O_RDWR opens on executables in order to avoid a VTEXT
2187 * detection race. The NLC_EXCLLOCK_IFEXEC handles this case.
2188 *
2189 * NOTE: We need a flag to separate terminal vnode locking from
2190 * parent locking. O_CREAT needs parent locking, but O_TRUNC
2191 * and O_RDWR only need to lock the terminal vnode exclusively.
2192 */
2193 nd->nl_flags |= NLC_LOCKVP;
2194 if ((flags & (O_CREAT|O_TRUNC)) == 0) {
2195 nd->nl_flags |= NLC_SHAREDLOCK;
2196 if (flags & O_RDWR)
2197 nd->nl_flags |= NLC_EXCLLOCK_IFEXEC;
2198 }
2199
2200 /*
2201 * Issue the vn_open, passing in the referenced fp. the vn_open()
2202 * is allowed to replace fp by fdrop()ing it and returning its own
2203 * referenced fp.
2204 */
2205 nfp = fp;
2206 error = vn_open(nd, &nfp, flags, cmode);
2207 fp = nfp;
2208 nlookup_done(nd);
2209
2210 /*
2211 * Deal with any error condition
2212 */
2213 if (error) {
2214 fdrop(fp); /* our ref */
2215 if (error == ERESTART)
2216 error = EINTR;
2217 return (error);
2218 }
2219
2220 /*
2221 * Reserve a file descriptor.
2222 */
2223 if ((error = fdalloc(p, 0, &indx)) != 0) {
2224 fdrop(fp);
2225 return (error);
2226 }
2227
2228 /*
2229 * Handle advisory lock flags. This is only supported with vnodes.
2230 * For things like /dev/fd/N we might not actually get a vnode.
2231 */
2232 if ((flags & (O_EXLOCK | O_SHLOCK)) && fp->f_type == DTYPE_VNODE) {
2233 struct vnode *vp;
2234
2235 vp = (struct vnode *)fp->f_data;
2236 vref(vp);
2237
2238 lf.l_whence = SEEK_SET;
2239 lf.l_start = 0;
2240 lf.l_len = 0;
2241 if (flags & O_EXLOCK)
2242 lf.l_type = F_WRLCK;
2243 else
2244 lf.l_type = F_RDLCK;
2245 if (flags & FNONBLOCK)
2246 type = 0;
2247 else
2248 type = F_WAIT;
2249
2250 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
2251 if (error) {
2252 /*
2253 * lock request failed. Clean up the reserved
2254 * descriptor.
2255 */
2256 vrele(vp);
2257 fsetfd(fdp, NULL, indx);
2258 fdrop(fp);
2259 return (error);
2260 }
2261 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */
2262 vrele(vp);
2263 }
2264
2265 /*
2266 * release our private reference, leaving the one associated with the
2267 * descriptor table intact.
2268 */
2269 if (oflags & O_CLOEXEC)
2270 fdp->fd_files[indx].fileflags |= UF_EXCLOSE;
2271 fsetfd(fdp, fp, indx);
2272 fdrop(fp);
2273 *res = indx;
2274
2275 return (error);
2276 }
2277
2278 /*
2279 * open_args(char *path, int flags, int mode)
2280 *
2281 * Check permissions, allocate an open file structure,
2282 * and call the device open routine if any.
2283 */
2284 int
sys_open(struct sysmsg * sysmsg,const struct open_args * uap)2285 sys_open(struct sysmsg *sysmsg, const struct open_args *uap)
2286 {
2287 struct nlookupdata nd;
2288 int error;
2289
2290 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2291 if (error == 0) {
2292 error = kern_open(&nd, uap->flags,
2293 uap->mode, &sysmsg->sysmsg_result);
2294 }
2295 nlookup_done(&nd);
2296 return (error);
2297 }
2298
2299 /*
2300 * openat_args(int fd, char *path, int flags, int mode)
2301 */
2302 int
sys_openat(struct sysmsg * sysmsg,const struct openat_args * uap)2303 sys_openat(struct sysmsg *sysmsg, const struct openat_args *uap)
2304 {
2305 struct nlookupdata nd;
2306 int error;
2307 struct file *fp;
2308
2309 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2310 if (error == 0) {
2311 error = kern_open(&nd, uap->flags, uap->mode,
2312 &sysmsg->sysmsg_result);
2313 }
2314 nlookup_done_at(&nd, fp);
2315 return (error);
2316 }
2317
2318 int
kern_mknod(struct nlookupdata * nd,int mode,int rmajor,int rminor)2319 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor)
2320 {
2321 struct thread *td = curthread;
2322 struct proc *p = td->td_proc;
2323 struct vnode *vp;
2324 struct vattr vattr;
2325 int error;
2326 int whiteout = 0;
2327
2328 KKASSERT(p);
2329
2330 VATTR_NULL(&vattr);
2331 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
2332 vattr.va_rmajor = rmajor;
2333 vattr.va_rminor = rminor;
2334
2335 switch (mode & S_IFMT) {
2336 case S_IFMT: /* used by badsect to flag bad sectors */
2337 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_MKNOD_BAD);
2338 vattr.va_type = VBAD;
2339 break;
2340 case S_IFCHR:
2341 error = caps_priv_check_td(td, SYSCAP_NOVFS_MKNOD_DEV);
2342 vattr.va_type = VCHR;
2343 break;
2344 case S_IFBLK:
2345 error = caps_priv_check_td(td, SYSCAP_NOVFS_MKNOD_DEV);
2346 vattr.va_type = VBLK;
2347 break;
2348 case S_IFWHT:
2349 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_MKNOD_WHT);
2350 whiteout = 1;
2351 break;
2352 case S_IFDIR: /* special directories support for HAMMER */
2353 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_MKNOD_DIR);
2354 vattr.va_type = VDIR;
2355 break;
2356 case S_IFIFO:
2357 return (kern_mkfifo(nd, mode));
2358 break;
2359 default:
2360 error = EINVAL;
2361 break;
2362 }
2363
2364 if (error)
2365 return (error);
2366
2367 bwillinode(1);
2368 nd->nl_flags |= NLC_CREATE | NLC_REFDVP;
2369 if ((error = nlookup(nd)) != 0)
2370 return (error);
2371 if (nd->nl_nch.ncp->nc_vp)
2372 return (EEXIST);
2373 if (nd->nl_dvp == NULL)
2374 return (EINVAL);
2375 if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2376 return (error);
2377
2378 if (whiteout) {
2379 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp,
2380 nd->nl_cred, NAMEI_CREATE);
2381 } else {
2382 vp = NULL;
2383 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp,
2384 &vp, nd->nl_cred, &vattr);
2385 if (error == 0)
2386 vput(vp);
2387 }
2388 return (error);
2389 }
2390
2391 /*
2392 * mknod_args(char *path, int mode, int dev)
2393 *
2394 * Create a special file.
2395 */
2396 int
sys_mknod(struct sysmsg * sysmsg,const struct mknod_args * uap)2397 sys_mknod(struct sysmsg *sysmsg, const struct mknod_args *uap)
2398 {
2399 struct nlookupdata nd;
2400 int error;
2401
2402 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2403 if (error == 0) {
2404 error = kern_mknod(&nd, uap->mode,
2405 umajor(uap->dev), uminor(uap->dev));
2406 }
2407 nlookup_done(&nd);
2408 return (error);
2409 }
2410
2411 /*
2412 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev)
2413 *
2414 * Create a special file. The path is relative to the directory associated
2415 * with fd.
2416 */
2417 int
sys_mknodat(struct sysmsg * sysmsg,const struct mknodat_args * uap)2418 sys_mknodat(struct sysmsg *sysmsg, const struct mknodat_args *uap)
2419 {
2420 struct nlookupdata nd;
2421 struct file *fp;
2422 int error;
2423
2424 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2425 if (error == 0) {
2426 error = kern_mknod(&nd, uap->mode,
2427 umajor(uap->dev), uminor(uap->dev));
2428 }
2429 nlookup_done_at(&nd, fp);
2430 return (error);
2431 }
2432
2433 int
kern_mkfifo(struct nlookupdata * nd,int mode)2434 kern_mkfifo(struct nlookupdata *nd, int mode)
2435 {
2436 struct thread *td = curthread;
2437 struct proc *p = td->td_proc;
2438 struct vattr vattr;
2439 struct vnode *vp;
2440 int error;
2441
2442 bwillinode(1);
2443
2444 nd->nl_flags |= NLC_CREATE | NLC_REFDVP;
2445 if ((error = nlookup(nd)) != 0)
2446 return (error);
2447 if (nd->nl_nch.ncp->nc_vp)
2448 return (EEXIST);
2449 if (nd->nl_dvp == NULL)
2450 return (EINVAL);
2451 if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2452 return (error);
2453
2454 VATTR_NULL(&vattr);
2455 vattr.va_type = VFIFO;
2456 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
2457 vp = NULL;
2458 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr);
2459 if (error == 0)
2460 vput(vp);
2461 return (error);
2462 }
2463
2464 /*
2465 * mkfifo_args(char *path, int mode)
2466 *
2467 * Create a named pipe.
2468 */
2469 int
sys_mkfifo(struct sysmsg * sysmsg,const struct mkfifo_args * uap)2470 sys_mkfifo(struct sysmsg *sysmsg, const struct mkfifo_args *uap)
2471 {
2472 struct nlookupdata nd;
2473 int error;
2474
2475 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2476 if (error == 0)
2477 error = kern_mkfifo(&nd, uap->mode);
2478 nlookup_done(&nd);
2479 return (error);
2480 }
2481
2482 /*
2483 * mkfifoat_args(int fd, char *path, mode_t mode)
2484 *
2485 * Create a named pipe. The path is relative to the directory associated
2486 * with fd.
2487 */
2488 int
sys_mkfifoat(struct sysmsg * sysmsg,const struct mkfifoat_args * uap)2489 sys_mkfifoat(struct sysmsg *sysmsg, const struct mkfifoat_args *uap)
2490 {
2491 struct nlookupdata nd;
2492 struct file *fp;
2493 int error;
2494
2495 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2496 if (error == 0)
2497 error = kern_mkfifo(&nd, uap->mode);
2498 nlookup_done_at(&nd, fp);
2499 return (error);
2500 }
2501
2502 static int hardlink_check_uid = 0;
2503 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
2504 &hardlink_check_uid, 0,
2505 "Unprivileged processes cannot create hard links to files owned by other "
2506 "users");
2507 static int hardlink_check_gid = 0;
2508 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
2509 &hardlink_check_gid, 0,
2510 "Unprivileged processes cannot create hard links to files owned by other "
2511 "groups");
2512
2513 static int
can_hardlink(struct vnode * vp,struct thread * td,struct ucred * cred)2514 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
2515 {
2516 struct vattr va;
2517 int error;
2518
2519 /*
2520 * Shortcut if disabled
2521 */
2522 if (hardlink_check_uid == 0 && hardlink_check_gid == 0)
2523 return (0);
2524
2525 /*
2526 * Privileged user can always hardlink
2527 */
2528 if (caps_priv_check(cred, SYSCAP_NOVFS_LINK) == 0)
2529 return (0);
2530
2531 /*
2532 * Otherwise only if the originating file is owned by the
2533 * same user or group. Note that any group is allowed if
2534 * the file is owned by the caller.
2535 */
2536 error = VOP_GETATTR(vp, &va);
2537 if (error != 0)
2538 return (error);
2539
2540 if (hardlink_check_uid) {
2541 if (cred->cr_uid != va.va_uid)
2542 return (EPERM);
2543 }
2544
2545 if (hardlink_check_gid) {
2546 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred))
2547 return (EPERM);
2548 }
2549
2550 return (0);
2551 }
2552
2553 int
kern_link(struct nlookupdata * nd,struct nlookupdata * linknd)2554 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd)
2555 {
2556 struct thread *td = curthread;
2557 struct vnode *vp;
2558 int error;
2559
2560 /*
2561 * Lookup the source and obtained a locked vnode.
2562 *
2563 * You may only hardlink a file which you have write permission
2564 * on or which you own.
2565 *
2566 * XXX relookup on vget failure / race ?
2567 */
2568 bwillinode(1);
2569 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK;
2570 if ((error = nlookup(nd)) != 0)
2571 return (error);
2572 vp = nd->nl_nch.ncp->nc_vp;
2573 KKASSERT(vp != NULL);
2574 if (vp->v_type == VDIR)
2575 return (EPERM); /* POSIX */
2576 if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2577 return (error);
2578 if ((error = vget(vp, LK_EXCLUSIVE)) != 0)
2579 return (error);
2580
2581 /*
2582 * Unlock the source so we can lookup the target without deadlocking
2583 * (XXX vp is locked already, possible other deadlock?). The target
2584 * must not exist.
2585 */
2586 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED);
2587 nd->nl_flags &= ~NLC_NCPISLOCKED;
2588 cache_unlock(&nd->nl_nch);
2589 vn_unlock(vp);
2590
2591 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP;
2592 if ((error = nlookup(linknd)) != 0) {
2593 vrele(vp);
2594 return (error);
2595 }
2596 if (linknd->nl_nch.ncp->nc_vp) {
2597 vrele(vp);
2598 return (EEXIST);
2599 }
2600 if (linknd->nl_dvp == NULL) {
2601 vrele(vp);
2602 return (EINVAL);
2603 }
2604 VFS_MODIFYING(vp->v_mount);
2605 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM);
2606 if (error) {
2607 vrele(vp);
2608 return (error);
2609 }
2610
2611 /*
2612 * Finally run the new API VOP.
2613 */
2614 error = can_hardlink(vp, td, td->td_ucred);
2615 if (error == 0) {
2616 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp,
2617 vp, linknd->nl_cred);
2618 }
2619 vput(vp);
2620 return (error);
2621 }
2622
2623 /*
2624 * link_args(char *path, char *link)
2625 *
2626 * Make a hard file link.
2627 */
2628 int
sys_link(struct sysmsg * sysmsg,const struct link_args * uap)2629 sys_link(struct sysmsg *sysmsg, const struct link_args *uap)
2630 {
2631 struct nlookupdata nd, linknd;
2632 int error;
2633
2634 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2635 if (error == 0) {
2636 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0);
2637 if (error == 0)
2638 error = kern_link(&nd, &linknd);
2639 nlookup_done(&linknd);
2640 }
2641 nlookup_done(&nd);
2642 return (error);
2643 }
2644
2645 /*
2646 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags)
2647 *
2648 * Make a hard file link. The path1 argument is relative to the directory
2649 * associated with fd1, and similarly the path2 argument is relative to
2650 * the directory associated with fd2.
2651 */
2652 int
sys_linkat(struct sysmsg * sysmsg,const struct linkat_args * uap)2653 sys_linkat(struct sysmsg *sysmsg, const struct linkat_args *uap)
2654 {
2655 struct nlookupdata nd, linknd;
2656 struct file *fp1, *fp2;
2657 int error;
2658
2659 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE,
2660 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0);
2661 if (error == 0) {
2662 error = nlookup_init_at(&linknd, &fp2, uap->fd2,
2663 uap->path2, UIO_USERSPACE, 0);
2664 if (error == 0)
2665 error = kern_link(&nd, &linknd);
2666 nlookup_done_at(&linknd, fp2);
2667 }
2668 nlookup_done_at(&nd, fp1);
2669 return (error);
2670 }
2671
2672 int
kern_symlink(struct nlookupdata * nd,char * path,int mode)2673 kern_symlink(struct nlookupdata *nd, char *path, int mode)
2674 {
2675 struct vattr vattr;
2676 struct vnode *vp;
2677 struct vnode *dvp;
2678 int error;
2679
2680 bwillinode(1);
2681 nd->nl_flags |= NLC_CREATE | NLC_REFDVP;
2682 if ((error = nlookup(nd)) != 0)
2683 return (error);
2684 if (nd->nl_nch.ncp->nc_vp)
2685 return (EEXIST);
2686 if (nd->nl_dvp == NULL)
2687 return (EINVAL);
2688 if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2689 return (error);
2690 dvp = nd->nl_dvp;
2691 VATTR_NULL(&vattr);
2692 vattr.va_mode = mode;
2693 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path);
2694 if (error == 0)
2695 vput(vp);
2696 return (error);
2697 }
2698
2699 /*
2700 * symlink(char *path, char *link)
2701 *
2702 * Make a symbolic link.
2703 */
2704 int
sys_symlink(struct sysmsg * sysmsg,const struct symlink_args * uap)2705 sys_symlink(struct sysmsg *sysmsg, const struct symlink_args *uap)
2706 {
2707 struct thread *td = curthread;
2708 struct nlookupdata nd;
2709 char *path;
2710 int error;
2711 int mode;
2712
2713 path = objcache_get(namei_oc, M_WAITOK);
2714 error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
2715 if (error == 0) {
2716 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0);
2717 if (error == 0) {
2718 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask;
2719 error = kern_symlink(&nd, path, mode);
2720 }
2721 nlookup_done(&nd);
2722 }
2723 objcache_put(namei_oc, path);
2724 return (error);
2725 }
2726
2727 /*
2728 * symlinkat_args(char *path1, int fd, char *path2)
2729 *
2730 * Make a symbolic link. The path2 argument is relative to the directory
2731 * associated with fd.
2732 */
2733 int
sys_symlinkat(struct sysmsg * sysmsg,const struct symlinkat_args * uap)2734 sys_symlinkat(struct sysmsg *sysmsg, const struct symlinkat_args *uap)
2735 {
2736 struct thread *td = curthread;
2737 struct nlookupdata nd;
2738 struct file *fp;
2739 char *path1;
2740 int error;
2741 int mode;
2742
2743 path1 = objcache_get(namei_oc, M_WAITOK);
2744 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL);
2745 if (error == 0) {
2746 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2,
2747 UIO_USERSPACE, 0);
2748 if (error == 0) {
2749 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask;
2750 error = kern_symlink(&nd, path1, mode);
2751 }
2752 nlookup_done_at(&nd, fp);
2753 }
2754 objcache_put(namei_oc, path1);
2755 return (error);
2756 }
2757
2758 /*
2759 * undelete_args(char *path)
2760 *
2761 * Delete a whiteout from the filesystem.
2762 */
2763 int
sys_undelete(struct sysmsg * sysmsg,const struct undelete_args * uap)2764 sys_undelete(struct sysmsg *sysmsg, const struct undelete_args *uap)
2765 {
2766 struct nlookupdata nd;
2767 int error;
2768
2769 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2770 bwillinode(1);
2771 nd.nl_flags |= NLC_DELETE | NLC_REFDVP;
2772 if (error == 0)
2773 error = nlookup(&nd);
2774 if (error == 0 && nd.nl_dvp == NULL)
2775 error = EINVAL;
2776 if (error == 0)
2777 error = ncp_writechk(&nd.nl_nch);
2778 if (error == 0) {
2779 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred,
2780 NAMEI_DELETE);
2781 }
2782 nlookup_done(&nd);
2783 return (error);
2784 }
2785
2786 int
kern_unlink(struct nlookupdata * nd)2787 kern_unlink(struct nlookupdata *nd)
2788 {
2789 int error;
2790
2791 bwillinode(1);
2792 nd->nl_flags |= NLC_DELETE | NLC_REFDVP;
2793 if ((error = nlookup(nd)) != 0)
2794 return (error);
2795 if (nd->nl_dvp == NULL)
2796 return EINVAL;
2797 if ((error = ncp_writechk(&nd->nl_nch)) != 0)
2798 return (error);
2799 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
2800 return (error);
2801 }
2802
2803 /*
2804 * unlink_args(char *path)
2805 *
2806 * Delete a name from the filesystem.
2807 */
2808 int
sys_unlink(struct sysmsg * sysmsg,const struct unlink_args * uap)2809 sys_unlink(struct sysmsg *sysmsg, const struct unlink_args *uap)
2810 {
2811 struct nlookupdata nd;
2812 int error;
2813
2814 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
2815 if (error == 0)
2816 error = kern_unlink(&nd);
2817 nlookup_done(&nd);
2818 return (error);
2819 }
2820
2821
2822 /*
2823 * unlinkat_args(int fd, char *path, int flags)
2824 *
2825 * Delete the file or directory entry pointed to by fd/path.
2826 */
2827 int
sys_unlinkat(struct sysmsg * sysmsg,const struct unlinkat_args * uap)2828 sys_unlinkat(struct sysmsg *sysmsg, const struct unlinkat_args *uap)
2829 {
2830 struct nlookupdata nd;
2831 struct file *fp;
2832 int error;
2833
2834 if (uap->flags & ~AT_REMOVEDIR)
2835 return (EINVAL);
2836
2837 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
2838 if (error == 0) {
2839 if (uap->flags & AT_REMOVEDIR)
2840 error = kern_rmdir(&nd);
2841 else
2842 error = kern_unlink(&nd);
2843 }
2844 nlookup_done_at(&nd, fp);
2845 return (error);
2846 }
2847
2848 int
kern_lseek(int fd,off_t offset,int whence,off_t * res)2849 kern_lseek(int fd, off_t offset, int whence, off_t *res)
2850 {
2851 struct thread *td = curthread;
2852 struct file *fp;
2853 int error;
2854
2855 fp = holdfp(td, fd, -1);
2856 if (fp == NULL)
2857 return (EBADF);
2858
2859 error = fo_seek(fp, offset, whence, res);
2860 dropfp(td, fd, fp);
2861
2862 return (error);
2863 }
2864
2865 /*
2866 * lseek_args(int fd, int pad, off_t offset, int whence)
2867 *
2868 * Reposition read/write file offset.
2869 */
2870 int
sys_lseek(struct sysmsg * sysmsg,const struct lseek_args * uap)2871 sys_lseek(struct sysmsg *sysmsg, const struct lseek_args *uap)
2872 {
2873 int error;
2874
2875 error = kern_lseek(uap->fd, uap->offset, uap->whence,
2876 &sysmsg->sysmsg_offset);
2877
2878 return (error);
2879 }
2880
2881 /*
2882 * Check if current process can access given file. amode is a bitmask of *_OK
2883 * access bits. flags is a bitmask of AT_* flags.
2884 */
2885 int
kern_access(struct nlookupdata * nd,int amode,int flags)2886 kern_access(struct nlookupdata *nd, int amode, int flags)
2887 {
2888 struct vnode *vp;
2889 int error, mode;
2890
2891 if (flags & ~AT_EACCESS)
2892 return (EINVAL);
2893 nd->nl_flags |= NLC_SHAREDLOCK;
2894 if ((error = nlookup(nd)) != 0)
2895 return (error);
2896 if ((amode & W_OK) && (error = ncp_writechk(&nd->nl_nch)) != 0)
2897 return (error);
2898 retry:
2899 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp);
2900 if (error)
2901 return (error);
2902
2903 /* Flags == 0 means only check for existence. */
2904 if (amode) {
2905 mode = 0;
2906 if (amode & R_OK)
2907 mode |= VREAD;
2908 if (amode & W_OK)
2909 mode |= VWRITE;
2910 if (amode & X_OK)
2911 mode |= VEXEC;
2912 if ((mode & VWRITE) == 0 ||
2913 (error = vn_writechk(vp)) == 0) {
2914 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred);
2915 }
2916
2917 /*
2918 * If the file handle is stale we have to re-resolve the
2919 * entry with the ncp held exclusively. This is a hack
2920 * at the moment.
2921 */
2922 if (error == ESTALE) {
2923 u_int dummy_gen;
2924
2925 vput(vp);
2926 cache_unlock(&nd->nl_nch);
2927 cache_lock(&nd->nl_nch);
2928 dummy_gen = nd->nl_nch.ncp->nc_generation;
2929 cache_setunresolved(&nd->nl_nch);
2930 error = cache_resolve(&nd->nl_nch, &dummy_gen,
2931 nd->nl_cred);
2932 if (error == 0) {
2933 vp = NULL;
2934 goto retry;
2935 }
2936 return(error);
2937 }
2938 }
2939 vput(vp);
2940 return (error);
2941 }
2942
2943 /*
2944 * access_args(char *path, int flags)
2945 *
2946 * Check access permissions.
2947 */
2948 int
sys_access(struct sysmsg * sysmsg,const struct access_args * uap)2949 sys_access(struct sysmsg *sysmsg, const struct access_args *uap)
2950 {
2951 struct nlookupdata nd;
2952 int error;
2953
2954 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2955 if (error == 0)
2956 error = kern_access(&nd, uap->flags, 0);
2957 nlookup_done(&nd);
2958 return (error);
2959 }
2960
2961
2962 /*
2963 * eaccess_args(char *path, int flags)
2964 *
2965 * Check access permissions.
2966 */
2967 int
sys_eaccess(struct sysmsg * sysmsg,const struct eaccess_args * uap)2968 sys_eaccess(struct sysmsg *sysmsg, const struct eaccess_args *uap)
2969 {
2970 struct nlookupdata nd;
2971 int error;
2972
2973 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
2974 if (error == 0)
2975 error = kern_access(&nd, uap->flags, AT_EACCESS);
2976 nlookup_done(&nd);
2977 return (error);
2978 }
2979
2980
2981 /*
2982 * faccessat_args(int fd, char *path, int amode, int flags)
2983 *
2984 * Check access permissions.
2985 */
2986 int
sys_faccessat(struct sysmsg * sysmsg,const struct faccessat_args * uap)2987 sys_faccessat(struct sysmsg *sysmsg, const struct faccessat_args *uap)
2988 {
2989 struct nlookupdata nd;
2990 struct file *fp;
2991 int error;
2992
2993 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE,
2994 NLC_FOLLOW);
2995 if (error == 0)
2996 error = kern_access(&nd, uap->amode, uap->flags);
2997 nlookup_done_at(&nd, fp);
2998 return (error);
2999 }
3000
3001 int
kern_stat(struct nlookupdata * nd,struct stat * st)3002 kern_stat(struct nlookupdata *nd, struct stat *st)
3003 {
3004 int error;
3005 struct vnode *vp;
3006
3007 nd->nl_flags |= NLC_SHAREDLOCK;
3008 if ((error = nlookup(nd)) != 0)
3009 return (error);
3010 again:
3011 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
3012 return (ENOENT);
3013
3014 #if 1
3015 error = cache_vref(&nd->nl_nch, NULL, &vp);
3016 #else
3017 error = vget(vp, LK_SHARED);
3018 #endif
3019 if (error)
3020 return (error);
3021 error = vn_stat(vp, st, nd->nl_cred);
3022
3023 /*
3024 * If the file handle is stale we have to re-resolve the
3025 * entry with the ncp held exclusively. This is a hack
3026 * at the moment.
3027 */
3028 if (error == ESTALE) {
3029 u_int dummy_gen;
3030 #if 1
3031 vrele(vp);
3032 #else
3033 vput(vp);
3034 #endif
3035 cache_unlock(&nd->nl_nch);
3036 cache_lock(&nd->nl_nch);
3037 dummy_gen = nd->nl_nch.ncp->nc_generation;
3038 cache_setunresolved(&nd->nl_nch);
3039 error = cache_resolve(&nd->nl_nch, &dummy_gen, nd->nl_cred);
3040 if (error == 0)
3041 goto again;
3042 } else {
3043 #if 1
3044 vrele(vp);
3045 #else
3046 vput(vp);
3047 #endif
3048 }
3049 return (error);
3050 }
3051
3052 /*
3053 * stat_args(char *path, struct stat *ub)
3054 *
3055 * Get file status; this version follows links.
3056 */
3057 int
sys_stat(struct sysmsg * sysmsg,const struct stat_args * uap)3058 sys_stat(struct sysmsg *sysmsg, const struct stat_args *uap)
3059 {
3060 struct nlookupdata nd;
3061 struct stat st;
3062 int error;
3063
3064 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3065 if (error == 0) {
3066 error = kern_stat(&nd, &st);
3067 if (error == 0)
3068 error = copyout(&st, uap->ub, sizeof(*uap->ub));
3069 }
3070 nlookup_done(&nd);
3071 return (error);
3072 }
3073
3074 /*
3075 * lstat_args(char *path, struct stat *ub)
3076 *
3077 * Get file status; this version does not follow links.
3078 */
3079 int
sys_lstat(struct sysmsg * sysmsg,const struct lstat_args * uap)3080 sys_lstat(struct sysmsg *sysmsg, const struct lstat_args *uap)
3081 {
3082 struct nlookupdata nd;
3083 struct stat st;
3084 int error;
3085
3086 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3087 if (error == 0) {
3088 error = kern_stat(&nd, &st);
3089 if (error == 0)
3090 error = copyout(&st, uap->ub, sizeof(*uap->ub));
3091 }
3092 nlookup_done(&nd);
3093 return (error);
3094 }
3095
3096 /*
3097 * fstatat_args(int fd, char *path, struct stat *sb, int flags)
3098 *
3099 * Get status of file pointed to by fd/path.
3100 */
3101 int
sys_fstatat(struct sysmsg * sysmsg,const struct fstatat_args * uap)3102 sys_fstatat(struct sysmsg *sysmsg, const struct fstatat_args *uap)
3103 {
3104 struct nlookupdata nd;
3105 struct stat st;
3106 int error;
3107 int flags;
3108 struct file *fp;
3109
3110 if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
3111 return (EINVAL);
3112
3113 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
3114
3115 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
3116 UIO_USERSPACE, flags);
3117 if (error == 0) {
3118 error = kern_stat(&nd, &st);
3119 if (error == 0)
3120 error = copyout(&st, uap->sb, sizeof(*uap->sb));
3121 }
3122 nlookup_done_at(&nd, fp);
3123 return (error);
3124 }
3125
3126 static int
kern_pathconf(char * path,int name,int flags,register_t * sysmsg_regp)3127 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp)
3128 {
3129 struct nlookupdata nd;
3130 struct vnode *vp;
3131 int error;
3132
3133 vp = NULL;
3134 error = nlookup_init(&nd, path, UIO_USERSPACE, flags);
3135 if (error == 0)
3136 error = nlookup(&nd);
3137 if (error == 0)
3138 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
3139 nlookup_done(&nd);
3140 if (error == 0) {
3141 error = VOP_PATHCONF(vp, name, sysmsg_regp);
3142 vput(vp);
3143 }
3144 return (error);
3145 }
3146
3147 /*
3148 * pathconf_Args(char *path, int name)
3149 *
3150 * Get configurable pathname variables.
3151 */
3152 int
sys_pathconf(struct sysmsg * sysmsg,const struct pathconf_args * uap)3153 sys_pathconf(struct sysmsg *sysmsg, const struct pathconf_args *uap)
3154 {
3155 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW,
3156 &sysmsg->sysmsg_reg));
3157 }
3158
3159 /*
3160 * lpathconf_Args(char *path, int name)
3161 *
3162 * Get configurable pathname variables, but don't follow symlinks.
3163 */
3164 int
sys_lpathconf(struct sysmsg * sysmsg,const struct lpathconf_args * uap)3165 sys_lpathconf(struct sysmsg *sysmsg, const struct lpathconf_args *uap)
3166 {
3167 return (kern_pathconf(uap->path, uap->name, 0, &sysmsg->sysmsg_reg));
3168 }
3169
3170 /*
3171 * XXX: daver
3172 * kern_readlink isn't properly split yet. There is a copyin burried
3173 * in VOP_READLINK().
3174 */
3175 int
kern_readlink(struct nlookupdata * nd,char * buf,int count,int * res)3176 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res)
3177 {
3178 struct thread *td = curthread;
3179 struct vnode *vp;
3180 struct iovec aiov;
3181 struct uio auio;
3182 int error;
3183
3184 nd->nl_flags |= NLC_SHAREDLOCK;
3185 if ((error = nlookup(nd)) != 0)
3186 return (error);
3187 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp);
3188 if (error)
3189 return (error);
3190 if (vp->v_type != VLNK) {
3191 error = EINVAL;
3192 } else {
3193 aiov.iov_base = buf;
3194 aiov.iov_len = count;
3195 auio.uio_iov = &aiov;
3196 auio.uio_iovcnt = 1;
3197 auio.uio_offset = 0;
3198 auio.uio_rw = UIO_READ;
3199 auio.uio_segflg = UIO_USERSPACE;
3200 auio.uio_td = td;
3201 auio.uio_resid = count;
3202 error = VOP_READLINK(vp, &auio, td->td_ucred);
3203 }
3204 vput(vp);
3205 *res = count - auio.uio_resid;
3206 return (error);
3207 }
3208
3209 /*
3210 * readlink_args(char *path, char *buf, int count)
3211 *
3212 * Return target name of a symbolic link.
3213 */
3214 int
sys_readlink(struct sysmsg * sysmsg,const struct readlink_args * uap)3215 sys_readlink(struct sysmsg *sysmsg, const struct readlink_args *uap)
3216 {
3217 struct nlookupdata nd;
3218 int error;
3219
3220 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3221 if (error == 0) {
3222 error = kern_readlink(&nd, uap->buf, uap->count,
3223 &sysmsg->sysmsg_result);
3224 }
3225 nlookup_done(&nd);
3226 return (error);
3227 }
3228
3229 /*
3230 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize)
3231 *
3232 * Return target name of a symbolic link. The path is relative to the
3233 * directory associated with fd.
3234 */
3235 int
sys_readlinkat(struct sysmsg * sysmsg,const struct readlinkat_args * uap)3236 sys_readlinkat(struct sysmsg *sysmsg, const struct readlinkat_args *uap)
3237 {
3238 struct nlookupdata nd;
3239 struct file *fp;
3240 int error;
3241
3242 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
3243 if (error == 0) {
3244 error = kern_readlink(&nd, uap->buf, uap->bufsize,
3245 &sysmsg->sysmsg_result);
3246 }
3247 nlookup_done_at(&nd, fp);
3248 return (error);
3249 }
3250
3251 static int
setfflags(struct vnode * vp,u_long flags)3252 setfflags(struct vnode *vp, u_long flags)
3253 {
3254 struct thread *td = curthread;
3255 int error;
3256 struct vattr vattr;
3257
3258 /*
3259 * Prevent non-root users from setting flags on devices. When
3260 * a device is reused, users can retain ownership of the device
3261 * if they are allowed to set flags and programs assume that
3262 * chown can't fail when done as root.
3263 */
3264 if ((vp->v_type == VCHR || vp->v_type == VBLK) &&
3265 ((error =
3266 caps_priv_check(td->td_ucred, SYSCAP_NOVFS_CHFLAGS_DEV)) != 0))
3267 {
3268 return (error);
3269 }
3270
3271 /*
3272 * note: vget is required for any operation that might mod the vnode
3273 * so VINACTIVE is properly cleared.
3274 */
3275 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
3276 VATTR_NULL(&vattr);
3277 vattr.va_flags = flags;
3278 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3279 vput(vp);
3280 }
3281 return (error);
3282 }
3283
3284 /*
3285 * chflags(const char *path, u_long flags)
3286 *
3287 * Change flags of a file given a path name.
3288 */
3289 int
sys_chflags(struct sysmsg * sysmsg,const struct chflags_args * uap)3290 sys_chflags(struct sysmsg *sysmsg, const struct chflags_args *uap)
3291 {
3292 struct nlookupdata nd;
3293 struct vnode *vp;
3294 int error;
3295
3296 vp = NULL;
3297 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3298 if (error == 0)
3299 error = nlookup(&nd);
3300 if (error == 0)
3301 error = ncp_writechk(&nd.nl_nch);
3302 if (error == 0)
3303 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
3304 nlookup_done(&nd);
3305 if (error == 0) {
3306 error = setfflags(vp, uap->flags);
3307 vrele(vp);
3308 }
3309 return (error);
3310 }
3311
3312 /*
3313 * lchflags(const char *path, u_long flags)
3314 *
3315 * Change flags of a file given a path name, but don't follow symlinks.
3316 */
3317 int
sys_lchflags(struct sysmsg * sysmsg,const struct lchflags_args * uap)3318 sys_lchflags(struct sysmsg *sysmsg, const struct lchflags_args *uap)
3319 {
3320 struct nlookupdata nd;
3321 struct vnode *vp;
3322 int error;
3323
3324 vp = NULL;
3325 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3326 if (error == 0)
3327 error = nlookup(&nd);
3328 if (error == 0)
3329 error = ncp_writechk(&nd.nl_nch);
3330 if (error == 0)
3331 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
3332 nlookup_done(&nd);
3333 if (error == 0) {
3334 error = setfflags(vp, uap->flags);
3335 vrele(vp);
3336 }
3337 return (error);
3338 }
3339
3340 /*
3341 * fchflags_args(int fd, u_flags flags)
3342 *
3343 * Change flags of a file given a file descriptor.
3344 */
3345 int
sys_fchflags(struct sysmsg * sysmsg,const struct fchflags_args * uap)3346 sys_fchflags(struct sysmsg *sysmsg, const struct fchflags_args *uap)
3347 {
3348 struct thread *td = curthread;
3349 struct file *fp;
3350 int error;
3351
3352 if ((error = holdvnode(td, uap->fd, &fp)) != 0)
3353 return (error);
3354 if (fp->f_nchandle.ncp)
3355 error = ncp_writechk(&fp->f_nchandle);
3356 if (error == 0)
3357 error = setfflags((struct vnode *) fp->f_data, uap->flags);
3358 fdrop(fp);
3359 return (error);
3360 }
3361
3362 /*
3363 * chflagsat_args(int fd, const char *path, u_long flags, int atflags)
3364 * change flags given a pathname relative to a filedescriptor
3365 */
3366 int
sys_chflagsat(struct sysmsg * sysmsg,const struct chflagsat_args * uap)3367 sys_chflagsat(struct sysmsg *sysmsg, const struct chflagsat_args *uap)
3368 {
3369 struct nlookupdata nd;
3370 struct vnode *vp;
3371 struct file *fp;
3372 int error;
3373 int lookupflags;
3374
3375 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW)
3376 return (EINVAL);
3377
3378 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
3379
3380 vp = NULL;
3381 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags);
3382 if (error == 0)
3383 error = nlookup(&nd);
3384 if (error == 0)
3385 error = ncp_writechk(&nd.nl_nch);
3386 if (error == 0)
3387 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
3388 nlookup_done_at(&nd, fp);
3389 if (error == 0) {
3390 error = setfflags(vp, uap->flags);
3391 vrele(vp);
3392 }
3393 return (error);
3394 }
3395
3396
3397 static int
setfmode(struct vnode * vp,int mode)3398 setfmode(struct vnode *vp, int mode)
3399 {
3400 struct thread *td = curthread;
3401 int error;
3402 struct vattr vattr;
3403
3404 /*
3405 * note: vget is required for any operation that might mod the vnode
3406 * so VINACTIVE is properly cleared.
3407 */
3408 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
3409 VATTR_NULL(&vattr);
3410 vattr.va_mode = mode & ALLPERMS;
3411 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3412 cache_inval_wxok(vp);
3413 vput(vp);
3414 }
3415 return error;
3416 }
3417
3418 int
kern_chmod(struct nlookupdata * nd,int mode)3419 kern_chmod(struct nlookupdata *nd, int mode)
3420 {
3421 struct vnode *vp;
3422 int error;
3423
3424 if ((error = nlookup(nd)) != 0)
3425 return (error);
3426 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
3427 return (error);
3428 if ((error = ncp_writechk(&nd->nl_nch)) == 0)
3429 error = setfmode(vp, mode);
3430 vrele(vp);
3431 return (error);
3432 }
3433
3434 /*
3435 * chmod_args(char *path, int mode)
3436 *
3437 * Change mode of a file given path name.
3438 */
3439 int
sys_chmod(struct sysmsg * sysmsg,const struct chmod_args * uap)3440 sys_chmod(struct sysmsg *sysmsg, const struct chmod_args *uap)
3441 {
3442 struct nlookupdata nd;
3443 int error;
3444
3445 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3446 if (error == 0)
3447 error = kern_chmod(&nd, uap->mode);
3448 nlookup_done(&nd);
3449 return (error);
3450 }
3451
3452 /*
3453 * lchmod_args(char *path, int mode)
3454 *
3455 * Change mode of a file given path name (don't follow links.)
3456 */
3457 int
sys_lchmod(struct sysmsg * sysmsg,const struct lchmod_args * uap)3458 sys_lchmod(struct sysmsg *sysmsg, const struct lchmod_args *uap)
3459 {
3460 struct nlookupdata nd;
3461 int error;
3462
3463 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3464 if (error == 0)
3465 error = kern_chmod(&nd, uap->mode);
3466 nlookup_done(&nd);
3467 return (error);
3468 }
3469
3470 /*
3471 * fchmod_args(int fd, int mode)
3472 *
3473 * Change mode of a file given a file descriptor.
3474 */
3475 int
sys_fchmod(struct sysmsg * sysmsg,const struct fchmod_args * uap)3476 sys_fchmod(struct sysmsg *sysmsg, const struct fchmod_args *uap)
3477 {
3478 struct thread *td = curthread;
3479 struct file *fp;
3480 int error;
3481
3482 if ((error = holdvnode(td, uap->fd, &fp)) != 0)
3483 return (error);
3484 if (fp->f_nchandle.ncp)
3485 error = ncp_writechk(&fp->f_nchandle);
3486 if (error == 0)
3487 error = setfmode((struct vnode *)fp->f_data, uap->mode);
3488 fdrop(fp);
3489 return (error);
3490 }
3491
3492 /*
3493 * fchmodat_args(char *path, int mode)
3494 *
3495 * Change mode of a file pointed to by fd/path.
3496 */
3497 int
sys_fchmodat(struct sysmsg * sysmsg,const struct fchmodat_args * uap)3498 sys_fchmodat(struct sysmsg *sysmsg, const struct fchmodat_args *uap)
3499 {
3500 struct nlookupdata nd;
3501 struct file *fp;
3502 int error;
3503 int flags;
3504
3505 if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
3506 return (EINVAL);
3507 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
3508
3509 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
3510 UIO_USERSPACE, flags);
3511 if (error == 0)
3512 error = kern_chmod(&nd, uap->mode);
3513 nlookup_done_at(&nd, fp);
3514 return (error);
3515 }
3516
3517 static int
setfown(struct mount * mp,struct vnode * vp,uid_t uid,gid_t gid)3518 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid)
3519 {
3520 struct thread *td = curthread;
3521 int error;
3522 struct vattr vattr;
3523 uid_t o_uid;
3524 gid_t o_gid;
3525 uint64_t size;
3526
3527 /*
3528 * note: vget is required for any operation that might mod the vnode
3529 * so VINACTIVE is properly cleared.
3530 */
3531 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
3532 if ((error = VOP_GETATTR(vp, &vattr)) != 0)
3533 return error;
3534 o_uid = vattr.va_uid;
3535 o_gid = vattr.va_gid;
3536 size = vattr.va_size;
3537
3538 VATTR_NULL(&vattr);
3539 vattr.va_uid = uid;
3540 vattr.va_gid = gid;
3541 error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3542 vput(vp);
3543 }
3544
3545 if (error == 0) {
3546 if (uid == -1)
3547 uid = o_uid;
3548 if (gid == -1)
3549 gid = o_gid;
3550 VFS_ACCOUNT(mp, o_uid, o_gid, -size);
3551 VFS_ACCOUNT(mp, uid, gid, size);
3552 }
3553
3554 return error;
3555 }
3556
3557 int
kern_chown(struct nlookupdata * nd,int uid,int gid)3558 kern_chown(struct nlookupdata *nd, int uid, int gid)
3559 {
3560 struct vnode *vp;
3561 int error;
3562
3563 if ((error = nlookup(nd)) != 0)
3564 return (error);
3565 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
3566 return (error);
3567 if ((error = ncp_writechk(&nd->nl_nch)) == 0)
3568 error = setfown(nd->nl_nch.mount, vp, uid, gid);
3569 vrele(vp);
3570 return (error);
3571 }
3572
3573 /*
3574 * chown(char *path, int uid, int gid)
3575 *
3576 * Set ownership given a path name.
3577 */
3578 int
sys_chown(struct sysmsg * sysmsg,const struct chown_args * uap)3579 sys_chown(struct sysmsg *sysmsg, const struct chown_args *uap)
3580 {
3581 struct nlookupdata nd;
3582 int error;
3583
3584 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3585 if (error == 0)
3586 error = kern_chown(&nd, uap->uid, uap->gid);
3587 nlookup_done(&nd);
3588 return (error);
3589 }
3590
3591 /*
3592 * lchown_args(char *path, int uid, int gid)
3593 *
3594 * Set ownership given a path name, do not cross symlinks.
3595 */
3596 int
sys_lchown(struct sysmsg * sysmsg,const struct lchown_args * uap)3597 sys_lchown(struct sysmsg *sysmsg, const struct lchown_args *uap)
3598 {
3599 struct nlookupdata nd;
3600 int error;
3601
3602 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3603 if (error == 0)
3604 error = kern_chown(&nd, uap->uid, uap->gid);
3605 nlookup_done(&nd);
3606 return (error);
3607 }
3608
3609 /*
3610 * fchown_args(int fd, int uid, int gid)
3611 *
3612 * Set ownership given a file descriptor.
3613 */
3614 int
sys_fchown(struct sysmsg * sysmsg,const struct fchown_args * uap)3615 sys_fchown(struct sysmsg *sysmsg, const struct fchown_args *uap)
3616 {
3617 struct thread *td = curthread;
3618 struct proc *p = td->td_proc;
3619 struct file *fp;
3620 int error;
3621
3622 if ((error = holdvnode(td, uap->fd, &fp)) != 0)
3623 return (error);
3624 if (fp->f_nchandle.ncp)
3625 error = ncp_writechk(&fp->f_nchandle);
3626 if (error == 0)
3627 error = setfown(p->p_fd->fd_ncdir.mount,
3628 (struct vnode *)fp->f_data, uap->uid, uap->gid);
3629 fdrop(fp);
3630 return (error);
3631 }
3632
3633 /*
3634 * fchownat(int fd, char *path, int uid, int gid, int flags)
3635 *
3636 * Set ownership of file pointed to by fd/path.
3637 */
3638 int
sys_fchownat(struct sysmsg * sysmsg,const struct fchownat_args * uap)3639 sys_fchownat(struct sysmsg *sysmsg, const struct fchownat_args *uap)
3640 {
3641 struct nlookupdata nd;
3642 struct file *fp;
3643 int error;
3644 int flags;
3645
3646 if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
3647 return (EINVAL);
3648 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
3649
3650 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
3651 UIO_USERSPACE, flags);
3652 if (error == 0)
3653 error = kern_chown(&nd, uap->uid, uap->gid);
3654 nlookup_done_at(&nd, fp);
3655 return (error);
3656 }
3657
3658
3659 static int
getutimes(struct timeval * tvp,struct timespec * tsp)3660 getutimes(struct timeval *tvp, struct timespec *tsp)
3661 {
3662 struct timeval tv[2];
3663 int error;
3664
3665 if (tvp == NULL) {
3666 microtime(&tv[0]);
3667 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
3668 tsp[1] = tsp[0];
3669 } else {
3670 if ((error = itimerfix(tvp)) != 0)
3671 return (error);
3672 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
3673 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
3674 }
3675 return 0;
3676 }
3677
3678 static int
getutimens(const struct timespec * ts,struct timespec * newts,int * nullflag)3679 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag)
3680 {
3681 struct timespec tsnow;
3682 int error;
3683
3684 *nullflag = 0;
3685 nanotime(&tsnow);
3686 if (ts == NULL) {
3687 newts[0] = tsnow;
3688 newts[1] = tsnow;
3689 *nullflag = 1;
3690 return (0);
3691 }
3692
3693 newts[0] = ts[0];
3694 newts[1] = ts[1];
3695 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) {
3696 newts[0].tv_sec = VNOVAL;
3697 newts[1].tv_sec = VNOVAL;
3698 return (0);
3699 }
3700 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW)
3701 *nullflag = 1;
3702
3703 if (newts[0].tv_nsec == UTIME_OMIT)
3704 newts[0].tv_sec = VNOVAL;
3705 else if (newts[0].tv_nsec == UTIME_NOW)
3706 newts[0] = tsnow;
3707 else if ((error = itimespecfix(&newts[0])) != 0)
3708 return (error);
3709
3710 if (newts[1].tv_nsec == UTIME_OMIT)
3711 newts[1].tv_sec = VNOVAL;
3712 else if (newts[1].tv_nsec == UTIME_NOW)
3713 newts[1] = tsnow;
3714 else if ((error = itimespecfix(&newts[1])) != 0)
3715 return (error);
3716
3717 return (0);
3718 }
3719
3720 static int
setutimes(struct vnode * vp,struct vattr * vattr,const struct timespec * ts,int nullflag)3721 setutimes(struct vnode *vp, struct vattr *vattr,
3722 const struct timespec *ts, int nullflag)
3723 {
3724 struct thread *td = curthread;
3725 int error;
3726
3727 VATTR_NULL(vattr);
3728 vattr->va_atime = ts[0];
3729 vattr->va_mtime = ts[1];
3730 if (nullflag)
3731 vattr->va_vaflags |= VA_UTIMES_NULL;
3732 error = VOP_SETATTR(vp, vattr, td->td_ucred);
3733
3734 return error;
3735 }
3736
3737 int
kern_utimes(struct nlookupdata * nd,struct timeval * tptr)3738 kern_utimes(struct nlookupdata *nd, struct timeval *tptr)
3739 {
3740 struct timespec ts[2];
3741 int error;
3742
3743 if (tptr) {
3744 if ((error = getutimes(tptr, ts)) != 0)
3745 return (error);
3746 }
3747 error = kern_utimensat(nd, tptr ? ts : NULL, 0);
3748 return (error);
3749 }
3750
3751 /*
3752 * utimes_args(char *path, struct timeval *tptr)
3753 *
3754 * Set the access and modification times of a file.
3755 */
3756 int
sys_utimes(struct sysmsg * sysmsg,const struct utimes_args * uap)3757 sys_utimes(struct sysmsg *sysmsg, const struct utimes_args *uap)
3758 {
3759 struct timeval tv[2];
3760 struct nlookupdata nd;
3761 int error;
3762
3763 if (uap->tptr) {
3764 error = copyin(uap->tptr, tv, sizeof(tv));
3765 if (error)
3766 return (error);
3767 }
3768 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
3769 if (error == 0)
3770 error = kern_utimes(&nd, uap->tptr ? tv : NULL);
3771 nlookup_done(&nd);
3772 return (error);
3773 }
3774
3775 /*
3776 * lutimes_args(char *path, struct timeval *tptr)
3777 *
3778 * Set the access and modification times of a file.
3779 */
3780 int
sys_lutimes(struct sysmsg * sysmsg,const struct lutimes_args * uap)3781 sys_lutimes(struct sysmsg *sysmsg, const struct lutimes_args *uap)
3782 {
3783 struct timeval tv[2];
3784 struct nlookupdata nd;
3785 int error;
3786
3787 if (uap->tptr) {
3788 error = copyin(uap->tptr, tv, sizeof(tv));
3789 if (error)
3790 return (error);
3791 }
3792 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
3793 if (error == 0)
3794 error = kern_utimes(&nd, uap->tptr ? tv : NULL);
3795 nlookup_done(&nd);
3796 return (error);
3797 }
3798
3799 /*
3800 * Set utimes on a file descriptor. The creds used to open the
3801 * file are used to determine whether the operation is allowed
3802 * or not.
3803 */
3804 int
kern_futimens(int fd,struct timespec * ts)3805 kern_futimens(int fd, struct timespec *ts)
3806 {
3807 struct thread *td = curthread;
3808 struct timespec newts[2];
3809 struct file *fp;
3810 struct vnode *vp;
3811 struct vattr vattr;
3812 struct vattr_lite lva;
3813 int nullflag;
3814 int error;
3815
3816 error = getutimens(ts, newts, &nullflag);
3817 if (error)
3818 return (error);
3819 if ((error = holdvnode(td, fd, &fp)) != 0)
3820 return (error);
3821 if (fp->f_nchandle.ncp)
3822 error = ncp_writechk(&fp->f_nchandle);
3823 if (error == 0) {
3824 vp = fp->f_data;
3825 error = vget(vp, LK_EXCLUSIVE);
3826 if (error == 0) {
3827 error = VOP_GETATTR_FP(vp, &vattr, fp);
3828 if (error == 0) {
3829 lva.va_type = vattr.va_type;
3830 lva.va_nlink = vattr.va_nlink;
3831 lva.va_mode = vattr.va_mode;
3832 lva.va_uid = vattr.va_uid;
3833 lva.va_gid = vattr.va_gid;
3834 lva.va_size = vattr.va_size;
3835 lva.va_flags = vattr.va_flags;
3836
3837 error = naccess_lva(&lva, NLC_OWN | NLC_WRITE,
3838 fp->f_cred);
3839 }
3840 if (error == 0) {
3841 error = setutimes(vp, &vattr, newts, nullflag);
3842 }
3843 vput(vp);
3844 }
3845 }
3846 fdrop(fp);
3847 return (error);
3848 }
3849
3850 /*
3851 * futimens_args(int fd, struct timespec *ts)
3852 *
3853 * Set the access and modification times of a file.
3854 */
3855 int
sys_futimens(struct sysmsg * sysmsg,const struct futimens_args * uap)3856 sys_futimens(struct sysmsg *sysmsg, const struct futimens_args *uap)
3857 {
3858 struct timespec ts[2];
3859 int error;
3860
3861 if (uap->ts) {
3862 error = copyin(uap->ts, ts, sizeof(ts));
3863 if (error)
3864 return (error);
3865 }
3866 error = kern_futimens(uap->fd, uap->ts ? ts : NULL);
3867 return (error);
3868 }
3869
3870 int
kern_futimes(int fd,struct timeval * tptr)3871 kern_futimes(int fd, struct timeval *tptr)
3872 {
3873 struct timespec ts[2];
3874 int error;
3875
3876 if (tptr) {
3877 if ((error = getutimes(tptr, ts)) != 0)
3878 return (error);
3879 }
3880 error = kern_futimens(fd, tptr ? ts : NULL);
3881 return (error);
3882 }
3883
3884 /*
3885 * futimes_args(int fd, struct timeval *tptr)
3886 *
3887 * Set the access and modification times of a file.
3888 */
3889 int
sys_futimes(struct sysmsg * sysmsg,const struct futimes_args * uap)3890 sys_futimes(struct sysmsg *sysmsg, const struct futimes_args *uap)
3891 {
3892 struct timeval tv[2];
3893 int error;
3894
3895 if (uap->tptr) {
3896 error = copyin(uap->tptr, tv, sizeof(tv));
3897 if (error)
3898 return (error);
3899 }
3900 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL);
3901 return (error);
3902 }
3903
3904 /*
3905 * futimesat_args(int fd, const char *path, struct timeval *tptr)
3906 *
3907 * Set the access and modification times of a file.
3908 */
3909 int
sys_futimesat(struct sysmsg * sysmsg,const struct futimesat_args * uap)3910 sys_futimesat(struct sysmsg *sysmsg, const struct futimesat_args *uap)
3911 {
3912 struct timespec ts[2];
3913 struct nlookupdata nd;
3914 struct file *fp;
3915 int error;
3916
3917 if (uap->tptr) {
3918 struct timeval tv[2];
3919
3920 if ((error = copyin(uap->tptr, tv, sizeof(tv))) != 0)
3921 return error;
3922 if ((error = getutimes(tv, ts)) != 0)
3923 return error;
3924 }
3925
3926 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
3927 UIO_USERSPACE, 0);
3928 if (error == 0)
3929 error = kern_utimensat(&nd, uap->tptr ? ts : NULL, 0);
3930 nlookup_done_at(&nd, fp);
3931
3932 return (error);
3933 }
3934
3935 int
kern_utimensat(struct nlookupdata * nd,const struct timespec * ts,int flags)3936 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags)
3937 {
3938 struct timespec newts[2];
3939 struct vnode *vp;
3940 struct vattr vattr;
3941 int nullflag;
3942 int error;
3943
3944 if (flags & ~AT_SYMLINK_NOFOLLOW)
3945 return (EINVAL);
3946
3947 error = getutimens(ts, newts, &nullflag);
3948 if (error)
3949 return (error);
3950
3951 nd->nl_flags |= NLC_OWN | NLC_WRITE;
3952 if ((error = nlookup(nd)) != 0)
3953 return (error);
3954 if ((error = ncp_writechk(&nd->nl_nch)) != 0)
3955 return (error);
3956 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
3957 return (error);
3958 if ((error = vn_writechk(vp)) == 0) {
3959 error = vget(vp, LK_EXCLUSIVE);
3960 if (error == 0) {
3961 error = setutimes(vp, &vattr, newts, nullflag);
3962 vput(vp);
3963 }
3964 }
3965 vrele(vp);
3966 return (error);
3967 }
3968
3969 /*
3970 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags);
3971 *
3972 * Set file access and modification times of a file.
3973 */
3974 int
sys_utimensat(struct sysmsg * sysmsg,const struct utimensat_args * uap)3975 sys_utimensat(struct sysmsg *sysmsg, const struct utimensat_args *uap)
3976 {
3977 struct timespec ts[2];
3978 struct nlookupdata nd;
3979 struct file *fp;
3980 int error;
3981 int flags;
3982
3983 if (uap->ts) {
3984 error = copyin(uap->ts, ts, sizeof(ts));
3985 if (error)
3986 return (error);
3987 }
3988
3989 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
3990 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
3991 UIO_USERSPACE, flags);
3992 if (error == 0)
3993 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags);
3994 nlookup_done_at(&nd, fp);
3995 return (error);
3996 }
3997
3998 int
kern_truncate(struct nlookupdata * nd,off_t length)3999 kern_truncate(struct nlookupdata *nd, off_t length)
4000 {
4001 struct vnode *vp;
4002 struct vattr vattr;
4003 int error;
4004 uid_t uid = 0;
4005 gid_t gid = 0;
4006 uint64_t old_size = 0;
4007
4008 if (length < 0)
4009 return(EINVAL);
4010 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE;
4011 if ((error = nlookup(nd)) != 0)
4012 return (error);
4013 if ((error = ncp_writechk(&nd->nl_nch)) != 0)
4014 return (error);
4015 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
4016 return (error);
4017 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM);
4018 if (error) {
4019 vrele(vp);
4020 return (error);
4021 }
4022 if (vp->v_type == VDIR) {
4023 error = EISDIR;
4024 goto done;
4025 }
4026 if (vfs_quota_enabled) {
4027 error = VOP_GETATTR(vp, &vattr);
4028 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0"));
4029 uid = vattr.va_uid;
4030 gid = vattr.va_gid;
4031 old_size = vattr.va_size;
4032 }
4033
4034 if ((error = vn_writechk(vp)) == 0) {
4035 VATTR_NULL(&vattr);
4036 vattr.va_size = length;
4037 error = VOP_SETATTR(vp, &vattr, nd->nl_cred);
4038 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size);
4039 }
4040 done:
4041 vput(vp);
4042 return (error);
4043 }
4044
4045 /*
4046 * truncate(char *path, int pad, off_t length)
4047 *
4048 * Truncate a file given its path name.
4049 */
4050 int
sys_truncate(struct sysmsg * sysmsg,const struct truncate_args * uap)4051 sys_truncate(struct sysmsg *sysmsg, const struct truncate_args *uap)
4052 {
4053 struct nlookupdata nd;
4054 int error;
4055
4056 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
4057 if (error == 0)
4058 error = kern_truncate(&nd, uap->length);
4059 nlookup_done(&nd);
4060 return error;
4061 }
4062
4063 int
kern_ftruncate(int fd,off_t length)4064 kern_ftruncate(int fd, off_t length)
4065 {
4066 struct thread *td = curthread;
4067 struct vattr vattr;
4068 struct vnode *vp;
4069 struct file *fp;
4070 int error;
4071 uid_t uid = 0;
4072 gid_t gid = 0;
4073 uint64_t old_size = 0;
4074 struct mount *mp;
4075
4076 if (length < 0)
4077 return(EINVAL);
4078 if ((error = holdvnode(td, fd, &fp)) != 0)
4079 return (error);
4080 if (fp->f_nchandle.ncp) {
4081 error = ncp_writechk(&fp->f_nchandle);
4082 if (error)
4083 goto done;
4084 }
4085 if ((fp->f_flag & FWRITE) == 0) {
4086 error = EINVAL;
4087 goto done;
4088 }
4089 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */
4090 error = EINVAL;
4091 goto done;
4092 }
4093 vp = (struct vnode *)fp->f_data;
4094 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4095 if (vp->v_type == VDIR) {
4096 error = EISDIR;
4097 vn_unlock(vp);
4098 goto done;
4099 }
4100
4101 if (vfs_quota_enabled) {
4102 error = VOP_GETATTR_FP(vp, &vattr, fp);
4103 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0"));
4104 uid = vattr.va_uid;
4105 gid = vattr.va_gid;
4106 old_size = vattr.va_size;
4107 }
4108
4109 if ((error = vn_writechk(vp)) == 0) {
4110 VATTR_NULL(&vattr);
4111 vattr.va_size = length;
4112 error = VOP_SETATTR_FP(vp, &vattr, fp->f_cred, fp);
4113 mp = vq_vptomp(vp);
4114 VFS_ACCOUNT(mp, uid, gid, length - old_size);
4115 }
4116 vn_unlock(vp);
4117 done:
4118 fdrop(fp);
4119 return (error);
4120 }
4121
4122 /*
4123 * ftruncate_args(int fd, int pad, off_t length)
4124 *
4125 * Truncate a file given a file descriptor.
4126 */
4127 int
sys_ftruncate(struct sysmsg * sysmsg,const struct ftruncate_args * uap)4128 sys_ftruncate(struct sysmsg *sysmsg, const struct ftruncate_args *uap)
4129 {
4130 int error;
4131
4132 error = kern_ftruncate(uap->fd, uap->length);
4133
4134 return (error);
4135 }
4136
4137 int
kern_fsync(int fd,bool fullsync)4138 kern_fsync(int fd, bool fullsync)
4139 {
4140 struct thread *td = curthread;
4141 struct vnode *vp;
4142 struct file *fp;
4143 vm_object_t obj;
4144 int error;
4145
4146 if ((error = holdvnode(td, fd, &fp)) != 0)
4147 return (error);
4148 vp = (struct vnode *)fp->f_data;
4149 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4150 if ((obj = vp->v_object) != NULL) {
4151 if (vp->v_mount == NULL ||
4152 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) {
4153 vm_object_page_clean(obj, 0, 0, 0);
4154 }
4155 }
4156 error = fullsync ?
4157 VOP_FSYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp) :
4158 VOP_FDATASYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp);
4159 if (error == 0 && vp->v_mount)
4160 error = buf_fsync(vp);
4161 vn_unlock(vp);
4162 fdrop(fp);
4163
4164 return (error);
4165 }
4166
4167 /*
4168 * fsync(int fd)
4169 *
4170 * Sync an open file.
4171 */
4172 int
sys_fsync(struct sysmsg * sysmsg,const struct fsync_args * uap)4173 sys_fsync(struct sysmsg *sysmsg, const struct fsync_args *uap)
4174 {
4175 return (kern_fsync(uap->fd, true));
4176 }
4177
4178 /*
4179 * fdatasync(int fd)
4180 *
4181 * Data-sync an open file.
4182 */
4183 int
sys_fdatasync(struct sysmsg * sysmsg,const struct fdatasync_args * uap)4184 sys_fdatasync(struct sysmsg *sysmsg, const struct fdatasync_args *uap)
4185 {
4186 return (kern_fsync(uap->fd, false));
4187 }
4188
4189 /*
4190 * rename op.
4191 *
4192 * NOTE: error == 0 and nl_dvp is NULL indicates a mount point, operation
4193 * disallowed. e.g. /var/cache where /var/cache is a null-mount, for
4194 * example.
4195 */
4196 int
kern_rename(struct nlookupdata * fromnd,struct nlookupdata * tond)4197 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond)
4198 {
4199 struct nchandle fnchd;
4200 struct nchandle tnchd;
4201 struct namecache *ncp;
4202 struct vnode *fdvp;
4203 struct vnode *tdvp;
4204 struct mount *mp;
4205 struct mount *userenlk;
4206 int error;
4207 u_int fncp_gen;
4208 u_int tncp_gen;
4209
4210 bwillinode(1);
4211 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC;
4212 if ((error = nlookup(fromnd)) != 0)
4213 return (error);
4214
4215 /*
4216 * Attempt to rename a mount point (from or to)
4217 */
4218 if (error == 0 && fromnd->nl_dvp == NULL)
4219 return (EINVAL);
4220
4221 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL)
4222 return (ENOENT);
4223 fnchd.mount = fromnd->nl_nch.mount;
4224 cache_hold(&fnchd);
4225
4226 /*
4227 * unlock the source nch so we can lookup the target nch without
4228 * deadlocking. The target may or may not exist so we do not check
4229 * for a target vp like kern_mkdir() and other creation functions do.
4230 *
4231 * The source and target directories are ref'd and rechecked after
4232 * everything is relocked to determine if the source or target file
4233 * has been renamed.
4234 */
4235 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED);
4236 fromnd->nl_flags &= ~NLC_NCPISLOCKED;
4237 fncp_gen = fromnd->nl_nch.ncp->nc_generation;
4238
4239 if (fromnd->nl_nch.ncp->nc_vp &&
4240 fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) {
4241 userenlk = fnchd.mount;
4242 cache_unlock(&fromnd->nl_nch);
4243 lockmgr(&userenlk->mnt_renlock, LK_EXCLUSIVE);
4244 } else {
4245 userenlk = NULL;
4246 cache_unlock(&fromnd->nl_nch);
4247 }
4248
4249 /*
4250 * Lookup target
4251 */
4252 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP;
4253 if ((error = nlookup(tond)) != 0) {
4254 cache_drop(&fnchd);
4255 goto done;
4256 }
4257 tncp_gen = tond->nl_nch.ncp->nc_generation;
4258
4259 /*
4260 * Attempt to rename a mount point (from or to)
4261 */
4262 if (error == 0 && tond->nl_dvp == NULL) {
4263 cache_drop(&fnchd);
4264 error = ENOENT;
4265 goto done;
4266 }
4267
4268 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) {
4269 cache_drop(&fnchd);
4270 error = ENOENT;
4271 goto done;
4272 }
4273 tnchd.mount = tond->nl_nch.mount;
4274 cache_hold(&tnchd);
4275
4276 /*
4277 * If the source and target are the same there is nothing to do
4278 */
4279 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) {
4280 cache_drop(&fnchd);
4281 cache_drop(&tnchd);
4282 error = 0;
4283 goto done;
4284 }
4285
4286 /*
4287 * Mount points cannot be renamed or overwritten
4288 */
4289 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) &
4290 NCF_ISMOUNTPT
4291 ) {
4292 cache_drop(&fnchd);
4293 cache_drop(&tnchd);
4294 error = EINVAL;
4295 goto done;
4296 }
4297
4298 /*
4299 * Lock all four namecache entries. tond is already locked.
4300 */
4301 cache_lock4_tondlocked(&fnchd, &fromnd->nl_nch,
4302 &tnchd, &tond->nl_nch,
4303 fromnd->nl_cred, tond->nl_cred);
4304 fromnd->nl_flags |= NLC_NCPISLOCKED;
4305
4306 /*
4307 * If the namecache generation changed for either fromnd or tond,
4308 * we must retry.
4309 */
4310 if (((fromnd->nl_nch.ncp->nc_generation - fncp_gen) & ~1) ||
4311 ((tond->nl_nch.ncp->nc_generation - tncp_gen) & ~1))
4312 {
4313 krateprintf(&krate_rename,
4314 "kern_rename: retry due to race on: "
4315 "\"%s\" -> \"%s\" (%d,%d)\n",
4316 fromnd->nl_nch.ncp->nc_name,
4317 tond->nl_nch.ncp->nc_name,
4318 fromnd->nl_nch.ncp->nc_generation - fncp_gen,
4319 tond->nl_nch.ncp->nc_generation - tncp_gen);
4320 error = EAGAIN;
4321 goto finish;
4322 }
4323
4324 /*
4325 * If either fromnd or tond are marked destroyed a ripout occured
4326 * out from under us and we must retry.
4327 */
4328 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) ||
4329 fromnd->nl_nch.ncp->nc_vp == NULL ||
4330 (tond->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED))) {
4331 krateprintf(&krate_rename,
4332 "kern_rename: retry due to ripout on: "
4333 "\"%s\" -> \"%s\"\n",
4334 fromnd->nl_nch.ncp->nc_name,
4335 tond->nl_nch.ncp->nc_name);
4336 error = EAGAIN;
4337 goto finish;
4338 }
4339
4340 /*
4341 * Make sure the parent directories linkages are the same. We have
4342 * already checked that fromnd and tond are not mount points so this
4343 * should not loop forever on a cross-mount.
4344 */
4345 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent ||
4346 tnchd.ncp != tond->nl_nch.ncp->nc_parent) {
4347 error = EAGAIN;
4348 goto finish;
4349 }
4350
4351 /*
4352 * Both the source and target must be within the same filesystem and
4353 * in the same filesystem as their parent directories within the
4354 * namecache topology.
4355 *
4356 * NOTE: fromnd's nc_mount or nc_vp could be NULL.
4357 */
4358 mp = fnchd.mount;
4359 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount ||
4360 mp != tond->nl_nch.mount) {
4361 error = EXDEV;
4362 goto finish;
4363 }
4364
4365 /*
4366 * Make sure the mount point is writable
4367 */
4368 if ((error = ncp_writechk(&tond->nl_nch)) != 0) {
4369 goto finish;
4370 }
4371
4372 /*
4373 * If the target exists and either the source or target is a directory,
4374 * then both must be directories.
4375 *
4376 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h
4377 * have become NULL.
4378 */
4379 if (tond->nl_nch.ncp->nc_vp) {
4380 if (fromnd->nl_nch.ncp->nc_vp == NULL) {
4381 error = ENOENT;
4382 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) {
4383 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR)
4384 error = ENOTDIR;
4385 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) {
4386 error = EISDIR;
4387 }
4388 }
4389
4390 /*
4391 * You cannot rename a source into itself or a subdirectory of itself.
4392 * We check this by travsersing the target directory upwards looking
4393 * for a match against the source.
4394 *
4395 * Only required when renaming a directory, in which case userenlk is
4396 * non-NULL.
4397 */
4398 if (__predict_false(userenlk && error == 0)) {
4399 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) {
4400 if (fromnd->nl_nch.ncp == ncp) {
4401 error = EINVAL;
4402 break;
4403 }
4404 }
4405 }
4406
4407 /*
4408 * Even though the namespaces are different, they may still represent
4409 * hardlinks to the same file. The filesystem might have a hard time
4410 * with this so we issue a NREMOVE of the source instead of a NRENAME
4411 * when we detect the situation.
4412 */
4413 if (error == 0) {
4414 fdvp = fromnd->nl_dvp;
4415 tdvp = tond->nl_dvp;
4416 if (fdvp == NULL || tdvp == NULL) {
4417 error = EPERM;
4418 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) {
4419 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp,
4420 fromnd->nl_cred);
4421 } else {
4422 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch,
4423 fdvp, tdvp, tond->nl_cred);
4424 }
4425 }
4426 finish:
4427 cache_put(&tnchd);
4428 cache_put(&fnchd);
4429 done:
4430 if (userenlk)
4431 lockmgr(&userenlk->mnt_renlock, LK_RELEASE);
4432 return (error);
4433 }
4434
4435 /*
4436 * rename_args(char *from, char *to)
4437 *
4438 * Rename files. Source and destination must either both be directories,
4439 * or both not be directories. If target is a directory, it must be empty.
4440 */
4441 int
sys_rename(struct sysmsg * sysmsg,const struct rename_args * uap)4442 sys_rename(struct sysmsg *sysmsg, const struct rename_args *uap)
4443 {
4444 struct nlookupdata fromnd, tond;
4445 int error;
4446
4447 do {
4448 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0);
4449 if (error == 0) {
4450 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0);
4451 if (error == 0)
4452 error = kern_rename(&fromnd, &tond);
4453 nlookup_done(&tond);
4454 }
4455 nlookup_done(&fromnd);
4456 } while (error == EAGAIN);
4457 return (error);
4458 }
4459
4460 /*
4461 * renameat_args(int oldfd, char *old, int newfd, char *new)
4462 *
4463 * Rename files using paths relative to the directories associated with
4464 * oldfd and newfd. Source and destination must either both be directories,
4465 * or both not be directories. If target is a directory, it must be empty.
4466 */
4467 int
sys_renameat(struct sysmsg * sysmsg,const struct renameat_args * uap)4468 sys_renameat(struct sysmsg *sysmsg, const struct renameat_args *uap)
4469 {
4470 struct nlookupdata oldnd, newnd;
4471 struct file *oldfp, *newfp;
4472 int error;
4473
4474 do {
4475 error = nlookup_init_at(&oldnd, &oldfp,
4476 uap->oldfd, uap->old,
4477 UIO_USERSPACE, 0);
4478 if (error == 0) {
4479 error = nlookup_init_at(&newnd, &newfp,
4480 uap->newfd, uap->new,
4481 UIO_USERSPACE, 0);
4482 if (error == 0)
4483 error = kern_rename(&oldnd, &newnd);
4484 nlookup_done_at(&newnd, newfp);
4485 }
4486 nlookup_done_at(&oldnd, oldfp);
4487 } while (error == EAGAIN);
4488 return (error);
4489 }
4490
4491 int
kern_mkdir(struct nlookupdata * nd,int mode)4492 kern_mkdir(struct nlookupdata *nd, int mode)
4493 {
4494 struct thread *td = curthread;
4495 struct proc *p = td->td_proc;
4496 struct vnode *vp;
4497 struct vattr vattr;
4498 int error;
4499
4500 bwillinode(1);
4501 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP;
4502 if ((error = nlookup(nd)) != 0)
4503 return (error);
4504
4505 if (nd->nl_nch.ncp->nc_vp)
4506 return (EEXIST);
4507 if (nd->nl_dvp == NULL)
4508 return (EINVAL);
4509 if ((error = ncp_writechk(&nd->nl_nch)) != 0)
4510 return (error);
4511 VATTR_NULL(&vattr);
4512 vattr.va_type = VDIR;
4513 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
4514
4515 vp = NULL;
4516 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr);
4517 if (error == 0)
4518 vput(vp);
4519 return (error);
4520 }
4521
4522 /*
4523 * mkdir_args(char *path, int mode)
4524 *
4525 * Make a directory file.
4526 */
4527 int
sys_mkdir(struct sysmsg * sysmsg,const struct mkdir_args * uap)4528 sys_mkdir(struct sysmsg *sysmsg, const struct mkdir_args *uap)
4529 {
4530 struct nlookupdata nd;
4531 int error;
4532
4533 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
4534 if (error == 0)
4535 error = kern_mkdir(&nd, uap->mode);
4536 nlookup_done(&nd);
4537 return (error);
4538 }
4539
4540 /*
4541 * mkdirat_args(int fd, char *path, mode_t mode)
4542 *
4543 * Make a directory file. The path is relative to the directory associated
4544 * with fd.
4545 */
4546 int
sys_mkdirat(struct sysmsg * sysmsg,const struct mkdirat_args * uap)4547 sys_mkdirat(struct sysmsg *sysmsg, const struct mkdirat_args *uap)
4548 {
4549 struct nlookupdata nd;
4550 struct file *fp;
4551 int error;
4552
4553 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
4554 if (error == 0)
4555 error = kern_mkdir(&nd, uap->mode);
4556 nlookup_done_at(&nd, fp);
4557 return (error);
4558 }
4559
4560 int
kern_rmdir(struct nlookupdata * nd)4561 kern_rmdir(struct nlookupdata *nd)
4562 {
4563 int error;
4564
4565 bwillinode(1);
4566 nd->nl_flags |= NLC_DELETE | NLC_REFDVP;
4567 if ((error = nlookup(nd)) != 0)
4568 return (error);
4569
4570 /*
4571 * Do not allow directories representing mount points to be
4572 * deleted, even if empty. Check write perms on mount point
4573 * in case the vnode is aliased (aka nullfs).
4574 */
4575 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT))
4576 return (EBUSY);
4577 if (nd->nl_dvp == NULL)
4578 return (EINVAL);
4579 if ((error = ncp_writechk(&nd->nl_nch)) != 0)
4580 return (error);
4581 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
4582 return (error);
4583 }
4584
4585 /*
4586 * rmdir_args(char *path)
4587 *
4588 * Remove a directory file.
4589 */
4590 int
sys_rmdir(struct sysmsg * sysmsg,const struct rmdir_args * uap)4591 sys_rmdir(struct sysmsg *sysmsg, const struct rmdir_args *uap)
4592 {
4593 struct nlookupdata nd;
4594 int error;
4595
4596 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
4597 if (error == 0)
4598 error = kern_rmdir(&nd);
4599 nlookup_done(&nd);
4600 return (error);
4601 }
4602
4603 int
kern_getdirentries(int fd,char * buf,u_int count,long * basep,int * res,enum uio_seg direction)4604 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res,
4605 enum uio_seg direction)
4606 {
4607 struct thread *td = curthread;
4608 struct vnode *vp;
4609 struct file *fp;
4610 struct uio auio;
4611 struct iovec aiov;
4612 off_t loff;
4613 int error, eofflag;
4614
4615 if ((error = holdvnode(td, fd, &fp)) != 0)
4616 return (error);
4617 if ((fp->f_flag & FREAD) == 0) {
4618 error = EBADF;
4619 goto done;
4620 }
4621 vp = (struct vnode *)fp->f_data;
4622 if (vp->v_type != VDIR) {
4623 error = EINVAL;
4624 goto done;
4625 }
4626 aiov.iov_base = buf;
4627 aiov.iov_len = count;
4628 auio.uio_iov = &aiov;
4629 auio.uio_iovcnt = 1;
4630 auio.uio_rw = UIO_READ;
4631 auio.uio_segflg = direction;
4632 auio.uio_td = td;
4633 auio.uio_resid = count;
4634 loff = auio.uio_offset = fp->f_offset;
4635 error = VOP_READDIR_FP(vp, &auio, fp->f_cred, &eofflag, NULL, NULL, fp);
4636 fp->f_offset = auio.uio_offset;
4637 if (error)
4638 goto done;
4639
4640 /*
4641 * WARNING! *basep may not be wide enough to accomodate the
4642 * seek offset. XXX should we hack this to return the upper 32 bits
4643 * for offsets greater then 4G?
4644 */
4645 if (basep) {
4646 *basep = (long)loff;
4647 }
4648 *res = count - auio.uio_resid;
4649 done:
4650 fdrop(fp);
4651 return (error);
4652 }
4653
4654 /*
4655 * getdirentries_args(int fd, char *buf, u_int conut, long *basep)
4656 *
4657 * Read a block of directory entries in a file system independent format.
4658 */
4659 int
sys_getdirentries(struct sysmsg * sysmsg,const struct getdirentries_args * uap)4660 sys_getdirentries(struct sysmsg *sysmsg, const struct getdirentries_args *uap)
4661 {
4662 long base;
4663 int error;
4664
4665 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base,
4666 &sysmsg->sysmsg_result, UIO_USERSPACE);
4667
4668 if (error == 0 && uap->basep)
4669 error = copyout(&base, uap->basep, sizeof(*uap->basep));
4670 return (error);
4671 }
4672
4673 /*
4674 * getdents_args(int fd, char *buf, size_t count)
4675 */
4676 int
sys_getdents(struct sysmsg * sysmsg,const struct getdents_args * uap)4677 sys_getdents(struct sysmsg *sysmsg, const struct getdents_args *uap)
4678 {
4679 int error;
4680
4681 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL,
4682 &sysmsg->sysmsg_result, UIO_USERSPACE);
4683
4684 return (error);
4685 }
4686
4687 /*
4688 * Set the mode mask for creation of filesystem nodes.
4689 *
4690 * umask(int newmask)
4691 */
4692 int
sys_umask(struct sysmsg * sysmsg,const struct umask_args * uap)4693 sys_umask(struct sysmsg *sysmsg, const struct umask_args *uap)
4694 {
4695 struct thread *td = curthread;
4696 struct proc *p = td->td_proc;
4697 struct filedesc *fdp;
4698
4699 fdp = p->p_fd;
4700 sysmsg->sysmsg_result = fdp->fd_cmask;
4701 fdp->fd_cmask = uap->newmask & ALLPERMS;
4702 return (0);
4703 }
4704
4705 /*
4706 * revoke(char *path)
4707 *
4708 * Void all references to file by ripping underlying filesystem
4709 * away from vnode.
4710 */
4711 int
sys_revoke(struct sysmsg * sysmsg,const struct revoke_args * uap)4712 sys_revoke(struct sysmsg *sysmsg, const struct revoke_args *uap)
4713 {
4714 struct nlookupdata nd;
4715 struct vattr vattr;
4716 struct vnode *vp;
4717 struct ucred *cred;
4718 int error;
4719
4720 vp = NULL;
4721 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
4722 if (error == 0)
4723 error = nlookup(&nd);
4724 if (error == 0)
4725 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
4726 cred = crhold(nd.nl_cred);
4727 nlookup_done(&nd);
4728 if (error == 0) {
4729 if (error == 0)
4730 error = VOP_GETATTR(vp, &vattr);
4731 if (error == 0 && cred->cr_uid != vattr.va_uid)
4732 error = caps_priv_check(cred, SYSCAP_NOVFS_REVOKE);
4733 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) {
4734 if (vcount(vp) > 0)
4735 error = vrevoke(vp, cred);
4736 } else if (error == 0) {
4737 error = vrevoke(vp, cred);
4738 }
4739 vrele(vp);
4740 }
4741 if (cred)
4742 crfree(cred);
4743 return (error);
4744 }
4745
4746 /*
4747 * getfh_args(char *fname, fhandle_t *fhp)
4748 *
4749 * Get (NFS) file handle
4750 *
4751 * NOTE: We use the fsid of the covering mount, even if it is a nullfs
4752 * mount. This allows nullfs mounts to be explicitly exported.
4753 *
4754 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe.
4755 *
4756 * nullfs mounts of subdirectories are not safe. That is, it will
4757 * work, but you do not really have protection against access to
4758 * the related parent directories.
4759 */
4760 int
sys_getfh(struct sysmsg * sysmsg,const struct getfh_args * uap)4761 sys_getfh(struct sysmsg *sysmsg, const struct getfh_args *uap)
4762 {
4763 struct nlookupdata nd;
4764 fhandle_t fh;
4765 struct vnode *vp;
4766 struct mount *mp;
4767 int error;
4768
4769 /*
4770 * Must be super user
4771 */
4772 if ((error = caps_priv_check_self(SYSCAP_RESTRICTEDROOT)) != 0)
4773 return (error);
4774
4775 vp = NULL;
4776 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW);
4777 if (error == 0)
4778 error = nlookup(&nd);
4779 if (error == 0)
4780 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
4781 mp = nd.nl_nch.mount;
4782 nlookup_done(&nd);
4783 if (error == 0) {
4784 bzero(&fh, sizeof(fh));
4785 fh.fh_fsid = mp->mnt_stat.f_fsid;
4786 error = VFS_VPTOFH(vp, &fh.fh_fid);
4787 vput(vp);
4788 if (error == 0)
4789 error = copyout(&fh, uap->fhp, sizeof(fh));
4790 }
4791 return (error);
4792 }
4793
4794 /*
4795 * fhopen_args(const struct fhandle *u_fhp, int flags)
4796 *
4797 * syscall for the rpc.lockd to use to translate a NFS file handle into
4798 * an open descriptor.
4799 *
4800 * WARNING: Do not remove the caps_priv_check() call or this becomes
4801 * one giant security hole.
4802 */
4803 int
sys_fhopen(struct sysmsg * sysmsg,const struct fhopen_args * uap)4804 sys_fhopen(struct sysmsg *sysmsg, const struct fhopen_args *uap)
4805 {
4806 struct thread *td = curthread;
4807 struct filedesc *fdp = td->td_proc->p_fd;
4808 struct mount *mp;
4809 struct vnode *vp;
4810 struct fhandle fhp;
4811 struct vattr vat;
4812 struct vattr *vap = &vat;
4813 struct flock lf;
4814 int fmode, mode, error = 0, type;
4815 struct file *nfp;
4816 struct file *fp;
4817 int indx;
4818
4819 /*
4820 * Must be super user
4821 */
4822 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT);
4823 if (error)
4824 return (error);
4825
4826 fmode = FFLAGS(uap->flags);
4827
4828 /*
4829 * Why not allow a non-read/write open for our lockd?
4830 */
4831 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4832 return (EINVAL);
4833 error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4834 if (error)
4835 return(error);
4836
4837 /*
4838 * Find the mount point
4839 */
4840 mp = vfs_getvfs(&fhp.fh_fsid);
4841 if (mp == NULL) {
4842 error = ESTALE;
4843 goto done2;
4844 }
4845 /* now give me my vnode, it gets returned to me locked */
4846 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp);
4847 if (error)
4848 goto done;
4849 /*
4850 * from now on we have to make sure not
4851 * to forget about the vnode
4852 * any error that causes an abort must vput(vp)
4853 * just set error = err and 'goto bad;'.
4854 */
4855
4856 /*
4857 * from vn_open
4858 */
4859 if (vp->v_type == VLNK) {
4860 error = EMLINK;
4861 goto bad;
4862 }
4863 if (vp->v_type == VSOCK) {
4864 error = EOPNOTSUPP;
4865 goto bad;
4866 }
4867 mode = 0;
4868 if (fmode & (FWRITE | O_TRUNC)) {
4869 if (vp->v_type == VDIR) {
4870 error = EISDIR;
4871 goto bad;
4872 }
4873 error = vn_writechk(vp);
4874 if (error)
4875 goto bad;
4876 mode |= VWRITE;
4877 }
4878 if (fmode & FREAD)
4879 mode |= VREAD;
4880 if (mode) {
4881 error = VOP_ACCESS(vp, mode, td->td_ucred);
4882 if (error)
4883 goto bad;
4884 }
4885 if (fmode & O_TRUNC) {
4886 vn_unlock(vp); /* XXX */
4887 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
4888 VATTR_NULL(vap);
4889 vap->va_size = 0;
4890 error = VOP_SETATTR(vp, vap, td->td_ucred);
4891 if (error)
4892 goto bad;
4893 }
4894
4895 /*
4896 * VOP_OPEN needs the file pointer so it can potentially override
4897 * it.
4898 *
4899 * WARNING! no f_nchandle will be associated when fhopen()ing a
4900 * directory. XXX
4901 */
4902 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0)
4903 goto bad;
4904 error = VOP_OPEN(vp, fmode, td->td_ucred, &nfp);
4905 fp = nfp;
4906
4907 if (error) {
4908 /*
4909 * setting f_ops this way prevents VOP_CLOSE from being
4910 * called or fdrop() releasing the vp from v_data. Since
4911 * the VOP_OPEN failed we don't want to VOP_CLOSE.
4912 */
4913 fp->f_ops = &badfileops;
4914 fp->f_data = NULL;
4915 goto bad_drop;
4916 }
4917
4918 /*
4919 * The fp is given its own reference, we still have our ref and lock.
4920 *
4921 * Assert that all regular files must be created with a VM object.
4922 */
4923 if (vp->v_type == VREG && vp->v_object == NULL) {
4924 kprintf("fhopen: regular file did not "
4925 "have VM object: %p\n",
4926 vp);
4927 goto bad_drop;
4928 }
4929
4930 /*
4931 * The open was successful. Handle any locking requirements.
4932 */
4933 if (fmode & (O_EXLOCK | O_SHLOCK)) {
4934 lf.l_whence = SEEK_SET;
4935 lf.l_start = 0;
4936 lf.l_len = 0;
4937 if (fmode & O_EXLOCK)
4938 lf.l_type = F_WRLCK;
4939 else
4940 lf.l_type = F_RDLCK;
4941 if (fmode & FNONBLOCK)
4942 type = 0;
4943 else
4944 type = F_WAIT;
4945 vn_unlock(vp);
4946 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK,
4947 &lf, type)) != 0) {
4948 /*
4949 * release our private reference.
4950 */
4951 fsetfd(fdp, NULL, indx);
4952 fdrop(fp);
4953 vrele(vp);
4954 goto done;
4955 }
4956 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4957 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */
4958 }
4959
4960 /*
4961 * Clean up. Associate the file pointer with the previously
4962 * reserved descriptor and return it.
4963 */
4964 vput(vp);
4965 if (uap->flags & O_CLOEXEC)
4966 fdp->fd_files[indx].fileflags |= UF_EXCLOSE;
4967 fsetfd(fdp, fp, indx);
4968 fdrop(fp);
4969 sysmsg->sysmsg_result = indx;
4970 mount_drop(mp);
4971
4972 return (error);
4973
4974 bad_drop:
4975 fsetfd(fdp, NULL, indx);
4976 fdrop(fp);
4977 bad:
4978 vput(vp);
4979 done:
4980 mount_drop(mp);
4981 done2:
4982 return (error);
4983 }
4984
4985 /*
4986 * fhstat_args(struct fhandle *u_fhp, struct stat *sb)
4987 */
4988 int
sys_fhstat(struct sysmsg * sysmsg,const struct fhstat_args * uap)4989 sys_fhstat(struct sysmsg *sysmsg, const struct fhstat_args *uap)
4990 {
4991 struct thread *td = curthread;
4992 struct stat sb;
4993 fhandle_t fh;
4994 struct mount *mp;
4995 struct vnode *vp;
4996 int error;
4997
4998 /*
4999 * Must be super user
5000 */
5001 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT);
5002 if (error)
5003 return (error);
5004
5005 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
5006 if (error)
5007 return (error);
5008
5009 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
5010 error = ESTALE;
5011 if (error == 0) {
5012 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) {
5013 error = vn_stat(vp, &sb, td->td_ucred);
5014 vput(vp);
5015 }
5016 }
5017 if (error == 0)
5018 error = copyout(&sb, uap->sb, sizeof(sb));
5019 if (mp)
5020 mount_drop(mp);
5021
5022 return (error);
5023 }
5024
5025 /*
5026 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf)
5027 */
5028 int
sys_fhstatfs(struct sysmsg * sysmsg,const struct fhstatfs_args * uap)5029 sys_fhstatfs(struct sysmsg *sysmsg, const struct fhstatfs_args *uap)
5030 {
5031 struct thread *td = curthread;
5032 struct proc *p = td->td_proc;
5033 struct statfs *sp;
5034 struct mount *mp;
5035 struct vnode *vp;
5036 struct statfs sb;
5037 char *fullpath, *freepath;
5038 fhandle_t fh;
5039 int error;
5040
5041 /*
5042 * Must be super user
5043 */
5044 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT);
5045 if (error)
5046 return (error);
5047
5048 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
5049 return (error);
5050
5051 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
5052 error = ESTALE;
5053 goto done;
5054 }
5055 if (p != NULL && !chroot_visible_mnt(mp, p)) {
5056 error = ESTALE;
5057 goto done;
5058 }
5059
5060 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0)
5061 goto done;
5062 mp = vp->v_mount;
5063 sp = &mp->mnt_stat;
5064 vput(vp);
5065 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0)
5066 goto done;
5067
5068 error = mount_path(p, mp, &fullpath, &freepath);
5069 if (error)
5070 goto done;
5071 bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
5072 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
5073 kfree(freepath, M_TEMP);
5074
5075 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
5076 if (caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) {
5077 bcopy(sp, &sb, sizeof(sb));
5078 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
5079 sp = &sb;
5080 }
5081 error = copyout(sp, uap->buf, sizeof(*sp));
5082 done:
5083 if (mp)
5084 mount_drop(mp);
5085
5086 return (error);
5087 }
5088
5089 /*
5090 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf)
5091 */
5092 int
sys_fhstatvfs(struct sysmsg * sysmsg,const struct fhstatvfs_args * uap)5093 sys_fhstatvfs(struct sysmsg *sysmsg, const struct fhstatvfs_args *uap)
5094 {
5095 struct thread *td = curthread;
5096 struct proc *p = td->td_proc;
5097 struct statvfs *sp;
5098 struct mount *mp;
5099 struct vnode *vp;
5100 fhandle_t fh;
5101 int error;
5102
5103 /*
5104 * Must be super user
5105 */
5106 if ((error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)))
5107 return (error);
5108
5109 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
5110 return (error);
5111
5112 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
5113 error = ESTALE;
5114 goto done;
5115 }
5116 if (p != NULL && !chroot_visible_mnt(mp, p)) {
5117 error = ESTALE;
5118 goto done;
5119 }
5120
5121 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)))
5122 goto done;
5123 mp = vp->v_mount;
5124 sp = &mp->mnt_vstat;
5125 vput(vp);
5126 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0)
5127 goto done;
5128
5129 sp->f_flag = 0;
5130 if (mp->mnt_flag & MNT_RDONLY)
5131 sp->f_flag |= ST_RDONLY;
5132 if (mp->mnt_flag & MNT_NOSUID)
5133 sp->f_flag |= ST_NOSUID;
5134 error = copyout(sp, uap->buf, sizeof(*sp));
5135 done:
5136 if (mp)
5137 mount_drop(mp);
5138 return (error);
5139 }
5140
5141
5142 /*
5143 * Syscall to push extended attribute configuration information into the
5144 * VFS. Accepts a path, which it converts to a mountpoint, as well as
5145 * a command (int cmd), and attribute name and misc data. For now, the
5146 * attribute name is left in userspace for consumption by the VFS_op.
5147 * It will probably be changed to be copied into sysspace by the
5148 * syscall in the future, once issues with various consumers of the
5149 * attribute code have raised their hands.
5150 *
5151 * Currently this is used only by UFS Extended Attributes.
5152 */
5153 int
sys_extattrctl(struct sysmsg * sysmsg,const struct extattrctl_args * uap)5154 sys_extattrctl(struct sysmsg *sysmsg, const struct extattrctl_args *uap)
5155 {
5156 struct nlookupdata nd;
5157 struct vnode *vp;
5158 char attrname[EXTATTR_MAXNAMELEN];
5159 int error;
5160 size_t size;
5161
5162 attrname[0] = 0;
5163 vp = NULL;
5164 error = 0;
5165
5166 if (error == 0 && uap->filename) {
5167 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE,
5168 NLC_FOLLOW);
5169 if (error == 0)
5170 error = nlookup(&nd);
5171 if (error == 0)
5172 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
5173 nlookup_done(&nd);
5174 }
5175
5176 if (error == 0 && uap->attrname) {
5177 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
5178 &size);
5179 }
5180
5181 if (error == 0) {
5182 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
5183 if (error == 0)
5184 error = nlookup(&nd);
5185 if (error == 0)
5186 error = ncp_writechk(&nd.nl_nch);
5187 if (error == 0) {
5188 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp,
5189 uap->attrnamespace,
5190 uap->attrname, nd.nl_cred);
5191 }
5192 nlookup_done(&nd);
5193 }
5194
5195 return (error);
5196 }
5197
5198 /*
5199 * Syscall to get a named extended attribute on a file or directory.
5200 */
5201 int
sys_extattr_set_file(struct sysmsg * sysmsg,const struct extattr_set_file_args * uap)5202 sys_extattr_set_file(struct sysmsg *sysmsg,
5203 const struct extattr_set_file_args *uap)
5204 {
5205 char attrname[EXTATTR_MAXNAMELEN];
5206 struct nlookupdata nd;
5207 struct vnode *vp;
5208 struct uio auio;
5209 struct iovec aiov;
5210 int error;
5211
5212 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
5213 if (error)
5214 return (error);
5215
5216 vp = NULL;
5217
5218 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
5219 if (error == 0)
5220 error = nlookup(&nd);
5221 if (error == 0)
5222 error = ncp_writechk(&nd.nl_nch);
5223 if (error == 0)
5224 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
5225 if (error) {
5226 nlookup_done(&nd);
5227 return (error);
5228 }
5229
5230 bzero(&auio, sizeof(auio));
5231 aiov.iov_base = uap->data;
5232 aiov.iov_len = uap->nbytes;
5233 auio.uio_iov = &aiov;
5234 auio.uio_iovcnt = 1;
5235 auio.uio_offset = 0;
5236 auio.uio_resid = uap->nbytes;
5237 auio.uio_rw = UIO_WRITE;
5238 auio.uio_td = curthread;
5239
5240 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname,
5241 &auio, nd.nl_cred);
5242
5243 vput(vp);
5244 nlookup_done(&nd);
5245 return (error);
5246 }
5247
5248 /*
5249 * Syscall to get a named extended attribute on a file or directory.
5250 */
5251 int
sys_extattr_get_file(struct sysmsg * sysmsg,const struct extattr_get_file_args * uap)5252 sys_extattr_get_file(struct sysmsg *sysmsg,
5253 const struct extattr_get_file_args *uap)
5254 {
5255 char attrname[EXTATTR_MAXNAMELEN];
5256 struct nlookupdata nd;
5257 struct uio auio;
5258 struct iovec aiov;
5259 struct vnode *vp;
5260 int error;
5261
5262 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
5263 if (error)
5264 return (error);
5265
5266 vp = NULL;
5267
5268 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
5269 if (error == 0)
5270 error = nlookup(&nd);
5271 if (error == 0)
5272 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp);
5273 if (error) {
5274 nlookup_done(&nd);
5275 return (error);
5276 }
5277
5278 bzero(&auio, sizeof(auio));
5279 aiov.iov_base = uap->data;
5280 aiov.iov_len = uap->nbytes;
5281 auio.uio_iov = &aiov;
5282 auio.uio_iovcnt = 1;
5283 auio.uio_offset = 0;
5284 auio.uio_resid = uap->nbytes;
5285 auio.uio_rw = UIO_READ;
5286 auio.uio_td = curthread;
5287
5288 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname,
5289 &auio, nd.nl_cred);
5290 sysmsg->sysmsg_result = uap->nbytes - auio.uio_resid;
5291
5292 vput(vp);
5293 nlookup_done(&nd);
5294 return(error);
5295 }
5296
5297 /*
5298 * Syscall to delete a named extended attribute from a file or directory.
5299 * Accepts attribute name. The real work happens in VOP_SETEXTATTR().
5300 */
5301 int
sys_extattr_delete_file(struct sysmsg * sysmsg,const struct extattr_delete_file_args * uap)5302 sys_extattr_delete_file(struct sysmsg *sysmsg,
5303 const struct extattr_delete_file_args *uap)
5304 {
5305 char attrname[EXTATTR_MAXNAMELEN];
5306 struct nlookupdata nd;
5307 struct vnode *vp;
5308 int error;
5309
5310 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
5311 if (error)
5312 return(error);
5313
5314 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
5315 if (error == 0)
5316 error = nlookup(&nd);
5317 if (error == 0)
5318 error = ncp_writechk(&nd.nl_nch);
5319 if (error == 0) {
5320 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
5321 if (error == 0) {
5322 error = VOP_SETEXTATTR(vp, uap->attrnamespace,
5323 attrname, NULL, nd.nl_cred);
5324 vput(vp);
5325 }
5326 }
5327 nlookup_done(&nd);
5328 return(error);
5329 }
5330
5331 /*
5332 * Determine if the mount is visible to the process.
5333 */
5334 static int
chroot_visible_mnt(struct mount * mp,struct proc * p)5335 chroot_visible_mnt(struct mount *mp, struct proc *p)
5336 {
5337 struct nchandle nch;
5338
5339 /*
5340 * Traverse from the mount point upwards. If we hit the process
5341 * root then the mount point is visible to the process.
5342 */
5343 nch = mp->mnt_ncmountpt;
5344 while (nch.ncp) {
5345 if (nch.mount == p->p_fd->fd_nrdir.mount &&
5346 nch.ncp == p->p_fd->fd_nrdir.ncp) {
5347 return(1);
5348 }
5349 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) {
5350 nch = nch.mount->mnt_ncmounton;
5351 } else {
5352 nch.ncp = nch.ncp->nc_parent;
5353 }
5354 }
5355
5356 /*
5357 * If the mount point is not visible to the process, but the
5358 * process root is in a subdirectory of the mount, return
5359 * TRUE anyway.
5360 */
5361 if (p->p_fd->fd_nrdir.mount == mp)
5362 return(1);
5363
5364 return(0);
5365 }
5366
5367 /*
5368 * Return the appropriate system capability restriction.
5369 */
5370 static int
get_fscap(const char * fsname)5371 get_fscap(const char *fsname)
5372 {
5373
5374 if (strncmp("null", fsname, 5) == 0) {
5375 return SYSCAP_NOMOUNT_NULLFS;
5376 } else if (strncmp(fsname, "devfs", 6) == 0) {
5377 return SYSCAP_NOMOUNT_DEVFS;
5378 } else if (strncmp(fsname, "procfs", 7) == 0) {
5379 return SYSCAP_NOMOUNT_PROCFS;
5380 } else if (strncmp(fsname, "tmpfs", 6) == 0) {
5381 return SYSCAP_NOMOUNT_TMPFS;
5382 } else if (strncmp(fsname, "fusefs", 7) == 0) {
5383 return SYSCAP_NOMOUNT_FUSE;
5384 }
5385 return SYSCAP_RESTRICTEDROOT;
5386 }
5387
5388 int
sys___realpath(struct sysmsg * sysmsg,const struct __realpath_args * uap)5389 sys___realpath(struct sysmsg *sysmsg, const struct __realpath_args *uap)
5390 {
5391 struct nlookupdata nd;
5392 char *rbuf;
5393 char *fbuf;
5394 ssize_t rlen;
5395 int error;
5396
5397 /*
5398 * Invalid length if less than 0. 0 is allowed
5399 */
5400 if ((ssize_t)uap->len < 0)
5401 return EINVAL;
5402
5403 rbuf = NULL;
5404 fbuf = NULL;
5405 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
5406 if (error)
5407 goto done;
5408
5409 nd.nl_flags |= NLC_SHAREDLOCK;
5410 error = nlookup(&nd);
5411 if (error)
5412 goto done;
5413
5414 if (nd.nl_nch.ncp->nc_vp == NULL) {
5415 error = ENOENT;
5416 goto done;
5417 }
5418
5419 /*
5420 * Shortcut test for existence.
5421 */
5422 if (uap->len == 0) {
5423 error = ENAMETOOLONG;
5424 goto done;
5425 }
5426
5427 /*
5428 * Obtain the path relative to the process root. The nch must not
5429 * be locked for the cache_fullpath() call.
5430 */
5431 if (nd.nl_flags & NLC_NCPISLOCKED) {
5432 nd.nl_flags &= ~NLC_NCPISLOCKED;
5433 cache_unlock(&nd.nl_nch);
5434 }
5435 error = cache_fullpath(curproc, &nd.nl_nch, NULL, &rbuf, &fbuf, 0);
5436 if (error)
5437 goto done;
5438
5439 rlen = (ssize_t)strlen(rbuf);
5440 if (rlen >= uap->len) {
5441 error = ENAMETOOLONG;
5442 goto done;
5443 }
5444 error = copyout(rbuf, uap->buf, rlen + 1);
5445 if (error == 0)
5446 sysmsg->sysmsg_szresult = rlen;
5447 done:
5448 nlookup_done(&nd);
5449 if (fbuf)
5450 kfree(fbuf, M_TEMP);
5451
5452 return error;
5453 }
5454
5455 int
sys_posix_fallocate(struct sysmsg * sysmsg,const struct posix_fallocate_args * uap)5456 sys_posix_fallocate(struct sysmsg *sysmsg, const struct posix_fallocate_args *uap)
5457 {
5458 return (kern_posix_fallocate(uap->fd, uap->offset, uap->len));
5459 }
5460
5461 int
kern_posix_fallocate(int fd,off_t offset,off_t len)5462 kern_posix_fallocate(int fd, off_t offset, off_t len)
5463 {
5464 struct thread *td = curthread;
5465 struct vnode *vp;
5466 struct file *fp;
5467 int error;
5468
5469 if (offset < 0 || len <= 0)
5470 return (EINVAL);
5471 /* Check for wrap. */
5472 if (offset > OFF_MAX - len)
5473 return (EFBIG);
5474
5475 fp = holdfp(td, fd, -1);
5476 if (fp == NULL)
5477 return (EBADF);
5478
5479 switch (fp->f_type) {
5480 case DTYPE_VNODE:
5481 break;
5482 case DTYPE_PIPE:
5483 case DTYPE_FIFO:
5484 error = ESPIPE;
5485 goto out;
5486 default:
5487 error = ENODEV;
5488 goto out;
5489 }
5490
5491 if ((fp->f_flag & FWRITE) == 0) {
5492 error = EBADF;
5493 goto out;
5494 }
5495
5496 vp = fp->f_data;
5497 if (vp->v_type != VREG) {
5498 error = ENODEV;
5499 goto out;
5500 }
5501
5502 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
5503 error = VOP_ALLOCATE(vp, offset, len);
5504 vn_unlock(vp);
5505 out:
5506 dropfp(td, fd, fp);
5507 return (error);
5508 }
5509