1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/buf.h> 45 #include <sys/conf.h> 46 #include <sys/sysent.h> 47 #include <sys/malloc.h> 48 #include <sys/mount.h> 49 #include <sys/mountctl.h> 50 #include <sys/sysproto.h> 51 #include <sys/filedesc.h> 52 #include <sys/kernel.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/linker.h> 56 #include <sys/stat.h> 57 #include <sys/unistd.h> 58 #include <sys/vnode.h> 59 #include <sys/proc.h> 60 #include <sys/priv.h> 61 #include <sys/jail.h> 62 #include <sys/namei.h> 63 #include <sys/nlookup.h> 64 #include <sys/dirent.h> 65 #include <sys/extattr.h> 66 #include <sys/spinlock.h> 67 #include <sys/kern_syscall.h> 68 #include <sys/objcache.h> 69 #include <sys/sysctl.h> 70 71 #include <sys/buf2.h> 72 #include <sys/file2.h> 73 #include <sys/spinlock2.h> 74 #include <sys/mplock2.h> 75 76 #include <vm/vm.h> 77 #include <vm/vm_object.h> 78 #include <vm/vm_page.h> 79 80 #include <machine/limits.h> 81 #include <machine/stdarg.h> 82 83 #include <vfs/union/union.h> 84 85 static void mount_warning(struct mount *mp, const char *ctl, ...) 86 __printflike(2, 3); 87 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 88 static int checkvp_chdir (struct vnode *vn, struct thread *td); 89 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 90 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 91 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 92 static int getutimes (const struct timeval *, struct timespec *); 93 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 94 static int setfmode (struct vnode *, int); 95 static int setfflags (struct vnode *, int); 96 static int setutimes (struct vnode *, struct vattr *, 97 const struct timespec *, int); 98 static int usermount = 0; /* if 1, non-root can mount fs. */ 99 100 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 101 102 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 103 "Allow non-root users to mount filesystems"); 104 105 /* 106 * Virtual File System System Calls 107 */ 108 109 /* 110 * Mount a file system. 111 * 112 * mount_args(char *type, char *path, int flags, caddr_t data) 113 * 114 * MPALMOSTSAFE 115 */ 116 int 117 sys_mount(struct mount_args *uap) 118 { 119 struct thread *td = curthread; 120 struct vnode *vp; 121 struct nchandle nch; 122 struct mount *mp, *nullmp; 123 struct vfsconf *vfsp; 124 int error, flag = 0, flag2 = 0; 125 int hasmount; 126 struct vattr va; 127 struct nlookupdata nd; 128 char fstypename[MFSNAMELEN]; 129 struct ucred *cred; 130 131 get_mplock(); 132 cred = td->td_ucred; 133 if (jailed(cred)) { 134 error = EPERM; 135 goto done; 136 } 137 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 138 goto done; 139 140 /* 141 * Do not allow NFS export by non-root users. 142 */ 143 if (uap->flags & MNT_EXPORTED) { 144 error = priv_check(td, PRIV_ROOT); 145 if (error) 146 goto done; 147 } 148 /* 149 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 150 */ 151 if (priv_check(td, PRIV_ROOT)) 152 uap->flags |= MNT_NOSUID | MNT_NODEV; 153 154 /* 155 * Lookup the requested path and extract the nch and vnode. 156 */ 157 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 158 if (error == 0) { 159 if ((error = nlookup(&nd)) == 0) { 160 if (nd.nl_nch.ncp->nc_vp == NULL) 161 error = ENOENT; 162 } 163 } 164 if (error) { 165 nlookup_done(&nd); 166 goto done; 167 } 168 169 /* 170 * If the target filesystem is resolved via a nullfs mount, then 171 * nd.nl_nch.mount will be pointing to the nullfs mount structure 172 * instead of the target file system. We need it in case we are 173 * doing an update. 174 */ 175 nullmp = nd.nl_nch.mount; 176 177 /* 178 * Extract the locked+refd ncp and cleanup the nd structure 179 */ 180 nch = nd.nl_nch; 181 cache_zero(&nd.nl_nch); 182 nlookup_done(&nd); 183 184 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 185 (mp = cache_findmount(&nch)) != NULL) { 186 cache_dropmount(mp); 187 hasmount = 1; 188 } else { 189 hasmount = 0; 190 } 191 192 193 /* 194 * now we have the locked ref'd nch and unreferenced vnode. 195 */ 196 vp = nch.ncp->nc_vp; 197 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 198 cache_put(&nch); 199 goto done; 200 } 201 cache_unlock(&nch); 202 203 /* 204 * Extract the file system type. We need to know this early, to take 205 * appropriate actions if we are dealing with a nullfs. 206 */ 207 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 208 cache_drop(&nch); 209 vput(vp); 210 goto done; 211 } 212 213 /* 214 * Now we have an unlocked ref'd nch and a locked ref'd vp 215 */ 216 if (uap->flags & MNT_UPDATE) { 217 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 218 cache_drop(&nch); 219 vput(vp); 220 error = EINVAL; 221 goto done; 222 } 223 224 if (strncmp(fstypename, "null", 5) == 0) { 225 KKASSERT(nullmp); 226 mp = nullmp; 227 } else { 228 mp = vp->v_mount; 229 } 230 231 flag = mp->mnt_flag; 232 flag2 = mp->mnt_kern_flag; 233 /* 234 * We only allow the filesystem to be reloaded if it 235 * is currently mounted read-only. 236 */ 237 if ((uap->flags & MNT_RELOAD) && 238 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 239 cache_drop(&nch); 240 vput(vp); 241 error = EOPNOTSUPP; /* Needs translation */ 242 goto done; 243 } 244 /* 245 * Only root, or the user that did the original mount is 246 * permitted to update it. 247 */ 248 if (mp->mnt_stat.f_owner != cred->cr_uid && 249 (error = priv_check(td, PRIV_ROOT))) { 250 cache_drop(&nch); 251 vput(vp); 252 goto done; 253 } 254 if (vfs_busy(mp, LK_NOWAIT)) { 255 cache_drop(&nch); 256 vput(vp); 257 error = EBUSY; 258 goto done; 259 } 260 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 261 cache_drop(&nch); 262 vfs_unbusy(mp); 263 vput(vp); 264 error = EBUSY; 265 goto done; 266 } 267 vsetflags(vp, VMOUNT); 268 mp->mnt_flag |= 269 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 270 vn_unlock(vp); 271 goto update; 272 } 273 /* 274 * If the user is not root, ensure that they own the directory 275 * onto which we are attempting to mount. 276 */ 277 if ((error = VOP_GETATTR(vp, &va)) || 278 (va.va_uid != cred->cr_uid && (error = priv_check(td, PRIV_ROOT)))) { 279 cache_drop(&nch); 280 vput(vp); 281 goto done; 282 } 283 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 284 cache_drop(&nch); 285 vput(vp); 286 goto done; 287 } 288 if (vp->v_type != VDIR) { 289 cache_drop(&nch); 290 vput(vp); 291 error = ENOTDIR; 292 goto done; 293 } 294 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 295 cache_drop(&nch); 296 vput(vp); 297 error = EPERM; 298 goto done; 299 } 300 vfsp = vfsconf_find_by_name(fstypename); 301 if (vfsp == NULL) { 302 linker_file_t lf; 303 304 /* Only load modules for root (very important!) */ 305 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 306 cache_drop(&nch); 307 vput(vp); 308 goto done; 309 } 310 error = linker_load_file(fstypename, &lf); 311 if (error || lf == NULL) { 312 cache_drop(&nch); 313 vput(vp); 314 if (lf == NULL) 315 error = ENODEV; 316 goto done; 317 } 318 lf->userrefs++; 319 /* lookup again, see if the VFS was loaded */ 320 vfsp = vfsconf_find_by_name(fstypename); 321 if (vfsp == NULL) { 322 lf->userrefs--; 323 linker_file_unload(lf); 324 cache_drop(&nch); 325 vput(vp); 326 error = ENODEV; 327 goto done; 328 } 329 } 330 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 331 cache_drop(&nch); 332 vput(vp); 333 error = EBUSY; 334 goto done; 335 } 336 vsetflags(vp, VMOUNT); 337 338 /* 339 * Allocate and initialize the filesystem. 340 */ 341 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 342 mount_init(mp); 343 vfs_busy(mp, LK_NOWAIT); 344 mp->mnt_op = vfsp->vfc_vfsops; 345 mp->mnt_vfc = vfsp; 346 vfsp->vfc_refcount++; 347 mp->mnt_stat.f_type = vfsp->vfc_typenum; 348 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 349 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 350 mp->mnt_stat.f_owner = cred->cr_uid; 351 vn_unlock(vp); 352 update: 353 /* 354 * Set the mount level flags. 355 */ 356 if (uap->flags & MNT_RDONLY) 357 mp->mnt_flag |= MNT_RDONLY; 358 else if (mp->mnt_flag & MNT_RDONLY) 359 mp->mnt_kern_flag |= MNTK_WANTRDWR; 360 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 361 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 362 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 363 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 364 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 365 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 366 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 367 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 368 /* 369 * Mount the filesystem. 370 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 371 * get. 372 */ 373 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 374 if (mp->mnt_flag & MNT_UPDATE) { 375 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 376 mp->mnt_flag &= ~MNT_RDONLY; 377 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 378 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 379 if (error) { 380 mp->mnt_flag = flag; 381 mp->mnt_kern_flag = flag2; 382 } 383 vfs_unbusy(mp); 384 vclrflags(vp, VMOUNT); 385 vrele(vp); 386 cache_drop(&nch); 387 goto done; 388 } 389 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 390 /* 391 * Put the new filesystem on the mount list after root. The mount 392 * point gets its own mnt_ncmountpt (unless the VFS already set one 393 * up) which represents the root of the mount. The lookup code 394 * detects the mount point going forward and checks the root of 395 * the mount going backwards. 396 * 397 * It is not necessary to invalidate or purge the vnode underneath 398 * because elements under the mount will be given their own glue 399 * namecache record. 400 */ 401 if (!error) { 402 if (mp->mnt_ncmountpt.ncp == NULL) { 403 /* 404 * allocate, then unlock, but leave the ref intact 405 */ 406 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 407 cache_unlock(&mp->mnt_ncmountpt); 408 } 409 mp->mnt_ncmounton = nch; /* inherits ref */ 410 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 411 412 /* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */ 413 vclrflags(vp, VMOUNT); 414 mountlist_insert(mp, MNTINS_LAST); 415 vn_unlock(vp); 416 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 417 error = vfs_allocate_syncvnode(mp); 418 vfs_unbusy(mp); 419 error = VFS_START(mp, 0); 420 vrele(vp); 421 } else { 422 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 423 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 424 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 425 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 426 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 427 vclrflags(vp, VMOUNT); 428 mp->mnt_vfc->vfc_refcount--; 429 vfs_unbusy(mp); 430 kfree(mp, M_MOUNT); 431 cache_drop(&nch); 432 vput(vp); 433 } 434 done: 435 rel_mplock(); 436 return (error); 437 } 438 439 /* 440 * Scan all active processes to see if any of them have a current 441 * or root directory onto which the new filesystem has just been 442 * mounted. If so, replace them with the new mount point. 443 * 444 * The passed ncp is ref'd and locked (from the mount code) and 445 * must be associated with the vnode representing the root of the 446 * mount point. 447 */ 448 struct checkdirs_info { 449 struct nchandle old_nch; 450 struct nchandle new_nch; 451 struct vnode *old_vp; 452 struct vnode *new_vp; 453 }; 454 455 static int checkdirs_callback(struct proc *p, void *data); 456 457 static void 458 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 459 { 460 struct checkdirs_info info; 461 struct vnode *olddp; 462 struct vnode *newdp; 463 struct mount *mp; 464 465 /* 466 * If the old mount point's vnode has a usecount of 1, it is not 467 * being held as a descriptor anywhere. 468 */ 469 olddp = old_nch->ncp->nc_vp; 470 if (olddp == NULL || olddp->v_sysref.refcnt == 1) 471 return; 472 473 /* 474 * Force the root vnode of the new mount point to be resolved 475 * so we can update any matching processes. 476 */ 477 mp = new_nch->mount; 478 if (VFS_ROOT(mp, &newdp)) 479 panic("mount: lost mount"); 480 cache_setunresolved(new_nch); 481 cache_setvp(new_nch, newdp); 482 483 /* 484 * Special handling of the root node 485 */ 486 if (rootvnode == olddp) { 487 vref(newdp); 488 vfs_cache_setroot(newdp, cache_hold(new_nch)); 489 } 490 491 /* 492 * Pass newdp separately so the callback does not have to access 493 * it via new_nch->ncp->nc_vp. 494 */ 495 info.old_nch = *old_nch; 496 info.new_nch = *new_nch; 497 info.new_vp = newdp; 498 allproc_scan(checkdirs_callback, &info); 499 vput(newdp); 500 } 501 502 /* 503 * NOTE: callback is not MP safe because the scanned process's filedesc 504 * structure can be ripped out from under us, amoung other things. 505 */ 506 static int 507 checkdirs_callback(struct proc *p, void *data) 508 { 509 struct checkdirs_info *info = data; 510 struct filedesc *fdp; 511 struct nchandle ncdrop1; 512 struct nchandle ncdrop2; 513 struct vnode *vprele1; 514 struct vnode *vprele2; 515 516 if ((fdp = p->p_fd) != NULL) { 517 cache_zero(&ncdrop1); 518 cache_zero(&ncdrop2); 519 vprele1 = NULL; 520 vprele2 = NULL; 521 522 /* 523 * MPUNSAFE - XXX fdp can be pulled out from under a 524 * foreign process. 525 * 526 * A shared filedesc is ok, we don't have to copy it 527 * because we are making this change globally. 528 */ 529 spin_lock(&fdp->fd_spin); 530 if (fdp->fd_ncdir.mount == info->old_nch.mount && 531 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 532 vprele1 = fdp->fd_cdir; 533 vref(info->new_vp); 534 fdp->fd_cdir = info->new_vp; 535 ncdrop1 = fdp->fd_ncdir; 536 cache_copy(&info->new_nch, &fdp->fd_ncdir); 537 } 538 if (fdp->fd_nrdir.mount == info->old_nch.mount && 539 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 540 vprele2 = fdp->fd_rdir; 541 vref(info->new_vp); 542 fdp->fd_rdir = info->new_vp; 543 ncdrop2 = fdp->fd_nrdir; 544 cache_copy(&info->new_nch, &fdp->fd_nrdir); 545 } 546 spin_unlock(&fdp->fd_spin); 547 if (ncdrop1.ncp) 548 cache_drop(&ncdrop1); 549 if (ncdrop2.ncp) 550 cache_drop(&ncdrop2); 551 if (vprele1) 552 vrele(vprele1); 553 if (vprele2) 554 vrele(vprele2); 555 } 556 return(0); 557 } 558 559 /* 560 * Unmount a file system. 561 * 562 * Note: unmount takes a path to the vnode mounted on as argument, 563 * not special file (as before). 564 * 565 * umount_args(char *path, int flags) 566 * 567 * MPALMOSTSAFE 568 */ 569 int 570 sys_unmount(struct unmount_args *uap) 571 { 572 struct thread *td = curthread; 573 struct proc *p __debugvar = td->td_proc; 574 struct mount *mp = NULL; 575 struct nlookupdata nd; 576 int error; 577 578 KKASSERT(p); 579 get_mplock(); 580 if (td->td_ucred->cr_prison != NULL) { 581 error = EPERM; 582 goto done; 583 } 584 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 585 goto done; 586 587 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 588 if (error == 0) 589 error = nlookup(&nd); 590 if (error) 591 goto out; 592 593 mp = nd.nl_nch.mount; 594 595 /* 596 * Only root, or the user that did the original mount is 597 * permitted to unmount this filesystem. 598 */ 599 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 600 (error = priv_check(td, PRIV_ROOT))) 601 goto out; 602 603 /* 604 * Don't allow unmounting the root file system. 605 */ 606 if (mp->mnt_flag & MNT_ROOTFS) { 607 error = EINVAL; 608 goto out; 609 } 610 611 /* 612 * Must be the root of the filesystem 613 */ 614 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 615 error = EINVAL; 616 goto out; 617 } 618 619 out: 620 nlookup_done(&nd); 621 if (error == 0) 622 error = dounmount(mp, uap->flags); 623 done: 624 rel_mplock(); 625 return (error); 626 } 627 628 /* 629 * Do the actual file system unmount. 630 */ 631 static int 632 dounmount_interlock(struct mount *mp) 633 { 634 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 635 return (EBUSY); 636 mp->mnt_kern_flag |= MNTK_UNMOUNT; 637 return(0); 638 } 639 640 static int 641 unmount_allproc_cb(struct proc *p, void *arg) 642 { 643 struct mount *mp; 644 645 if (p->p_textnch.ncp == NULL) 646 return 0; 647 648 mp = (struct mount *)arg; 649 if (p->p_textnch.mount == mp) 650 cache_drop(&p->p_textnch); 651 652 return 0; 653 } 654 655 int 656 dounmount(struct mount *mp, int flags) 657 { 658 struct namecache *ncp; 659 struct nchandle nch; 660 struct vnode *vp; 661 int error; 662 int async_flag; 663 int lflags; 664 int freeok = 1; 665 666 /* 667 * Exclusive access for unmounting purposes 668 */ 669 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 670 return (error); 671 672 /* 673 * Allow filesystems to detect that a forced unmount is in progress. 674 */ 675 if (flags & MNT_FORCE) 676 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 677 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT); 678 error = lockmgr(&mp->mnt_lock, lflags); 679 if (error) { 680 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 681 if (mp->mnt_kern_flag & MNTK_MWAIT) 682 wakeup(mp); 683 return (error); 684 } 685 686 if (mp->mnt_flag & MNT_EXPUBLIC) 687 vfs_setpublicfs(NULL, NULL, NULL); 688 689 vfs_msync(mp, MNT_WAIT); 690 async_flag = mp->mnt_flag & MNT_ASYNC; 691 mp->mnt_flag &=~ MNT_ASYNC; 692 693 /* 694 * If this filesystem isn't aliasing other filesystems, 695 * try to invalidate any remaining namecache entries and 696 * check the count afterwords. 697 */ 698 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 699 cache_lock(&mp->mnt_ncmountpt); 700 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 701 cache_unlock(&mp->mnt_ncmountpt); 702 703 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 704 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 705 allproc_scan(&unmount_allproc_cb, mp); 706 } 707 708 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 709 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 710 711 if ((flags & MNT_FORCE) == 0) { 712 error = EBUSY; 713 mount_warning(mp, "Cannot unmount: " 714 "%d namecache " 715 "references still " 716 "present", 717 ncp->nc_refs - 1); 718 } else { 719 mount_warning(mp, "Forced unmount: " 720 "%d namecache " 721 "references still " 722 "present", 723 ncp->nc_refs - 1); 724 freeok = 0; 725 } 726 } 727 } 728 729 /* 730 * nchandle records ref the mount structure. Expect a count of 1 731 * (our mount->mnt_ncmountpt). 732 */ 733 if (mp->mnt_refs != 1) { 734 if ((flags & MNT_FORCE) == 0) { 735 mount_warning(mp, "Cannot unmount: " 736 "%d process references still " 737 "present", mp->mnt_refs); 738 error = EBUSY; 739 } else { 740 mount_warning(mp, "Forced unmount: " 741 "%d process references still " 742 "present", mp->mnt_refs); 743 freeok = 0; 744 } 745 } 746 747 /* 748 * Decomission our special mnt_syncer vnode. This also stops 749 * the vnlru code. If we are unable to unmount we recommission 750 * the vnode. 751 */ 752 if (error == 0) { 753 if ((vp = mp->mnt_syncer) != NULL) { 754 mp->mnt_syncer = NULL; 755 vrele(vp); 756 } 757 if (((mp->mnt_flag & MNT_RDONLY) || 758 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 759 (flags & MNT_FORCE)) { 760 error = VFS_UNMOUNT(mp, flags); 761 } 762 } 763 if (error) { 764 if (mp->mnt_syncer == NULL) 765 vfs_allocate_syncvnode(mp); 766 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 767 mp->mnt_flag |= async_flag; 768 lockmgr(&mp->mnt_lock, LK_RELEASE); 769 if (mp->mnt_kern_flag & MNTK_MWAIT) 770 wakeup(mp); 771 return (error); 772 } 773 /* 774 * Clean up any journals still associated with the mount after 775 * filesystem activity has ceased. 776 */ 777 journal_remove_all_journals(mp, 778 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 779 780 mountlist_remove(mp); 781 782 /* 783 * Remove any installed vnode ops here so the individual VFSs don't 784 * have to. 785 */ 786 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 787 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 788 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 789 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 790 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 791 792 if (mp->mnt_ncmountpt.ncp != NULL) { 793 nch = mp->mnt_ncmountpt; 794 cache_zero(&mp->mnt_ncmountpt); 795 cache_clrmountpt(&nch); 796 cache_drop(&nch); 797 } 798 if (mp->mnt_ncmounton.ncp != NULL) { 799 nch = mp->mnt_ncmounton; 800 cache_zero(&mp->mnt_ncmounton); 801 cache_clrmountpt(&nch); 802 cache_drop(&nch); 803 } 804 805 mp->mnt_vfc->vfc_refcount--; 806 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 807 panic("unmount: dangling vnode"); 808 lockmgr(&mp->mnt_lock, LK_RELEASE); 809 if (mp->mnt_kern_flag & MNTK_MWAIT) 810 wakeup(mp); 811 if (freeok) 812 kfree(mp, M_MOUNT); 813 return (0); 814 } 815 816 static 817 void 818 mount_warning(struct mount *mp, const char *ctl, ...) 819 { 820 char *ptr; 821 char *buf; 822 __va_list va; 823 824 __va_start(va, ctl); 825 if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf, 0) == 0) { 826 kprintf("unmount(%s): ", ptr); 827 kvprintf(ctl, va); 828 kprintf("\n"); 829 kfree(buf, M_TEMP); 830 } else { 831 kprintf("unmount(%p", mp); 832 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 833 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 834 kprintf("): "); 835 kvprintf(ctl, va); 836 kprintf("\n"); 837 } 838 __va_end(va); 839 } 840 841 /* 842 * Shim cache_fullpath() to handle the case where a process is chrooted into 843 * a subdirectory of a mount. In this case if the root mount matches the 844 * process root directory's mount we have to specify the process's root 845 * directory instead of the mount point, because the mount point might 846 * be above the root directory. 847 */ 848 static 849 int 850 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 851 { 852 struct nchandle *nch; 853 854 if (p && p->p_fd->fd_nrdir.mount == mp) 855 nch = &p->p_fd->fd_nrdir; 856 else 857 nch = &mp->mnt_ncmountpt; 858 return(cache_fullpath(p, nch, rb, fb, 0)); 859 } 860 861 /* 862 * Sync each mounted filesystem. 863 */ 864 865 #ifdef DEBUG 866 static int syncprt = 0; 867 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 868 #endif /* DEBUG */ 869 870 static int sync_callback(struct mount *mp, void *data); 871 872 int 873 sys_sync(struct sync_args *uap) 874 { 875 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 876 #ifdef DEBUG 877 /* 878 * print out buffer pool stat information on each sync() call. 879 */ 880 if (syncprt) 881 vfs_bufstats(); 882 #endif /* DEBUG */ 883 return (0); 884 } 885 886 static 887 int 888 sync_callback(struct mount *mp, void *data __unused) 889 { 890 int asyncflag; 891 892 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 893 asyncflag = mp->mnt_flag & MNT_ASYNC; 894 mp->mnt_flag &= ~MNT_ASYNC; 895 vfs_msync(mp, MNT_NOWAIT); 896 VFS_SYNC(mp, MNT_NOWAIT | MNT_LAZY); 897 mp->mnt_flag |= asyncflag; 898 } 899 return(0); 900 } 901 902 /* XXX PRISON: could be per prison flag */ 903 static int prison_quotas; 904 #if 0 905 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 906 #endif 907 908 /* 909 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 910 * 911 * Change filesystem quotas. 912 * 913 * MPALMOSTSAFE 914 */ 915 int 916 sys_quotactl(struct quotactl_args *uap) 917 { 918 struct nlookupdata nd; 919 struct thread *td; 920 struct mount *mp; 921 int error; 922 923 get_mplock(); 924 td = curthread; 925 if (td->td_ucred->cr_prison && !prison_quotas) { 926 error = EPERM; 927 goto done; 928 } 929 930 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 931 if (error == 0) 932 error = nlookup(&nd); 933 if (error == 0) { 934 mp = nd.nl_nch.mount; 935 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 936 uap->arg, nd.nl_cred); 937 } 938 nlookup_done(&nd); 939 done: 940 rel_mplock(); 941 return (error); 942 } 943 944 /* 945 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 946 * void *buf, int buflen) 947 * 948 * This function operates on a mount point and executes the specified 949 * operation using the specified control data, and possibly returns data. 950 * 951 * The actual number of bytes stored in the result buffer is returned, 0 952 * if none, otherwise an error is returned. 953 * 954 * MPALMOSTSAFE 955 */ 956 int 957 sys_mountctl(struct mountctl_args *uap) 958 { 959 struct thread *td = curthread; 960 struct proc *p = td->td_proc; 961 struct file *fp; 962 void *ctl = NULL; 963 void *buf = NULL; 964 char *path = NULL; 965 int error; 966 967 /* 968 * Sanity and permissions checks. We must be root. 969 */ 970 KKASSERT(p); 971 if (td->td_ucred->cr_prison != NULL) 972 return (EPERM); 973 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 974 (error = priv_check(td, PRIV_ROOT)) != 0) 975 return (error); 976 977 /* 978 * Argument length checks 979 */ 980 if (uap->ctllen < 0 || uap->ctllen > 1024) 981 return (EINVAL); 982 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 983 return (EINVAL); 984 if (uap->path == NULL) 985 return (EINVAL); 986 987 /* 988 * Allocate the necessary buffers and copyin data 989 */ 990 path = objcache_get(namei_oc, M_WAITOK); 991 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 992 if (error) 993 goto done; 994 995 if (uap->ctllen) { 996 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 997 error = copyin(uap->ctl, ctl, uap->ctllen); 998 if (error) 999 goto done; 1000 } 1001 if (uap->buflen) 1002 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1003 1004 /* 1005 * Validate the descriptor 1006 */ 1007 if (uap->fd >= 0) { 1008 fp = holdfp(p->p_fd, uap->fd, -1); 1009 if (fp == NULL) { 1010 error = EBADF; 1011 goto done; 1012 } 1013 } else { 1014 fp = NULL; 1015 } 1016 1017 /* 1018 * Execute the internal kernel function and clean up. 1019 */ 1020 get_mplock(); 1021 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 1022 rel_mplock(); 1023 if (fp) 1024 fdrop(fp); 1025 if (error == 0 && uap->sysmsg_result > 0) 1026 error = copyout(buf, uap->buf, uap->sysmsg_result); 1027 done: 1028 if (path) 1029 objcache_put(namei_oc, path); 1030 if (ctl) 1031 kfree(ctl, M_TEMP); 1032 if (buf) 1033 kfree(buf, M_TEMP); 1034 return (error); 1035 } 1036 1037 /* 1038 * Execute a mount control operation by resolving the path to a mount point 1039 * and calling vop_mountctl(). 1040 * 1041 * Use the mount point from the nch instead of the vnode so nullfs mounts 1042 * can properly spike the VOP. 1043 */ 1044 int 1045 kern_mountctl(const char *path, int op, struct file *fp, 1046 const void *ctl, int ctllen, 1047 void *buf, int buflen, int *res) 1048 { 1049 struct vnode *vp; 1050 struct mount *mp; 1051 struct nlookupdata nd; 1052 int error; 1053 1054 *res = 0; 1055 vp = NULL; 1056 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1057 if (error == 0) 1058 error = nlookup(&nd); 1059 if (error == 0) 1060 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1061 mp = nd.nl_nch.mount; 1062 nlookup_done(&nd); 1063 if (error) 1064 return (error); 1065 vn_unlock(vp); 1066 1067 /* 1068 * Must be the root of the filesystem 1069 */ 1070 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1071 vrele(vp); 1072 return (EINVAL); 1073 } 1074 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1075 buf, buflen, res); 1076 vrele(vp); 1077 return (error); 1078 } 1079 1080 int 1081 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1082 { 1083 struct thread *td = curthread; 1084 struct proc *p = td->td_proc; 1085 struct mount *mp; 1086 struct statfs *sp; 1087 char *fullpath, *freepath; 1088 int error; 1089 1090 if ((error = nlookup(nd)) != 0) 1091 return (error); 1092 mp = nd->nl_nch.mount; 1093 sp = &mp->mnt_stat; 1094 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1095 return (error); 1096 1097 error = mount_path(p, mp, &fullpath, &freepath); 1098 if (error) 1099 return(error); 1100 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1101 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1102 kfree(freepath, M_TEMP); 1103 1104 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1105 bcopy(sp, buf, sizeof(*buf)); 1106 /* Only root should have access to the fsid's. */ 1107 if (priv_check(td, PRIV_ROOT)) 1108 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1109 return (0); 1110 } 1111 1112 /* 1113 * statfs_args(char *path, struct statfs *buf) 1114 * 1115 * Get filesystem statistics. 1116 */ 1117 int 1118 sys_statfs(struct statfs_args *uap) 1119 { 1120 struct nlookupdata nd; 1121 struct statfs buf; 1122 int error; 1123 1124 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1125 if (error == 0) 1126 error = kern_statfs(&nd, &buf); 1127 nlookup_done(&nd); 1128 if (error == 0) 1129 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1130 return (error); 1131 } 1132 1133 int 1134 kern_fstatfs(int fd, struct statfs *buf) 1135 { 1136 struct thread *td = curthread; 1137 struct proc *p = td->td_proc; 1138 struct file *fp; 1139 struct mount *mp; 1140 struct statfs *sp; 1141 char *fullpath, *freepath; 1142 int error; 1143 1144 KKASSERT(p); 1145 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1146 return (error); 1147 1148 /* 1149 * Try to use mount info from any overlays rather than the 1150 * mount info for the underlying vnode, otherwise we will 1151 * fail when operating on null-mounted paths inside a chroot. 1152 */ 1153 if ((mp = fp->f_nchandle.mount) == NULL) 1154 mp = ((struct vnode *)fp->f_data)->v_mount; 1155 if (mp == NULL) { 1156 error = EBADF; 1157 goto done; 1158 } 1159 if (fp->f_cred == NULL) { 1160 error = EINVAL; 1161 goto done; 1162 } 1163 sp = &mp->mnt_stat; 1164 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1165 goto done; 1166 1167 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1168 goto done; 1169 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1170 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1171 kfree(freepath, M_TEMP); 1172 1173 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1174 bcopy(sp, buf, sizeof(*buf)); 1175 1176 /* Only root should have access to the fsid's. */ 1177 if (priv_check(td, PRIV_ROOT)) 1178 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1179 error = 0; 1180 done: 1181 fdrop(fp); 1182 return (error); 1183 } 1184 1185 /* 1186 * fstatfs_args(int fd, struct statfs *buf) 1187 * 1188 * Get filesystem statistics. 1189 */ 1190 int 1191 sys_fstatfs(struct fstatfs_args *uap) 1192 { 1193 struct statfs buf; 1194 int error; 1195 1196 error = kern_fstatfs(uap->fd, &buf); 1197 1198 if (error == 0) 1199 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1200 return (error); 1201 } 1202 1203 int 1204 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1205 { 1206 struct mount *mp; 1207 struct statvfs *sp; 1208 int error; 1209 1210 if ((error = nlookup(nd)) != 0) 1211 return (error); 1212 mp = nd->nl_nch.mount; 1213 sp = &mp->mnt_vstat; 1214 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1215 return (error); 1216 1217 sp->f_flag = 0; 1218 if (mp->mnt_flag & MNT_RDONLY) 1219 sp->f_flag |= ST_RDONLY; 1220 if (mp->mnt_flag & MNT_NOSUID) 1221 sp->f_flag |= ST_NOSUID; 1222 bcopy(sp, buf, sizeof(*buf)); 1223 return (0); 1224 } 1225 1226 /* 1227 * statfs_args(char *path, struct statfs *buf) 1228 * 1229 * Get filesystem statistics. 1230 */ 1231 int 1232 sys_statvfs(struct statvfs_args *uap) 1233 { 1234 struct nlookupdata nd; 1235 struct statvfs buf; 1236 int error; 1237 1238 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1239 if (error == 0) 1240 error = kern_statvfs(&nd, &buf); 1241 nlookup_done(&nd); 1242 if (error == 0) 1243 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1244 return (error); 1245 } 1246 1247 int 1248 kern_fstatvfs(int fd, struct statvfs *buf) 1249 { 1250 struct thread *td = curthread; 1251 struct proc *p = td->td_proc; 1252 struct file *fp; 1253 struct mount *mp; 1254 struct statvfs *sp; 1255 int error; 1256 1257 KKASSERT(p); 1258 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1259 return (error); 1260 if ((mp = fp->f_nchandle.mount) == NULL) 1261 mp = ((struct vnode *)fp->f_data)->v_mount; 1262 if (mp == NULL) { 1263 error = EBADF; 1264 goto done; 1265 } 1266 if (fp->f_cred == NULL) { 1267 error = EINVAL; 1268 goto done; 1269 } 1270 sp = &mp->mnt_vstat; 1271 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1272 goto done; 1273 1274 sp->f_flag = 0; 1275 if (mp->mnt_flag & MNT_RDONLY) 1276 sp->f_flag |= ST_RDONLY; 1277 if (mp->mnt_flag & MNT_NOSUID) 1278 sp->f_flag |= ST_NOSUID; 1279 1280 bcopy(sp, buf, sizeof(*buf)); 1281 error = 0; 1282 done: 1283 fdrop(fp); 1284 return (error); 1285 } 1286 1287 /* 1288 * fstatfs_args(int fd, struct statfs *buf) 1289 * 1290 * Get filesystem statistics. 1291 */ 1292 int 1293 sys_fstatvfs(struct fstatvfs_args *uap) 1294 { 1295 struct statvfs buf; 1296 int error; 1297 1298 error = kern_fstatvfs(uap->fd, &buf); 1299 1300 if (error == 0) 1301 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1302 return (error); 1303 } 1304 1305 /* 1306 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1307 * 1308 * Get statistics on all filesystems. 1309 */ 1310 1311 struct getfsstat_info { 1312 struct statfs *sfsp; 1313 long count; 1314 long maxcount; 1315 int error; 1316 int flags; 1317 struct thread *td; 1318 }; 1319 1320 static int getfsstat_callback(struct mount *, void *); 1321 1322 int 1323 sys_getfsstat(struct getfsstat_args *uap) 1324 { 1325 struct thread *td = curthread; 1326 struct getfsstat_info info; 1327 1328 bzero(&info, sizeof(info)); 1329 1330 info.maxcount = uap->bufsize / sizeof(struct statfs); 1331 info.sfsp = uap->buf; 1332 info.count = 0; 1333 info.flags = uap->flags; 1334 info.td = td; 1335 1336 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1337 if (info.sfsp && info.count > info.maxcount) 1338 uap->sysmsg_result = info.maxcount; 1339 else 1340 uap->sysmsg_result = info.count; 1341 return (info.error); 1342 } 1343 1344 static int 1345 getfsstat_callback(struct mount *mp, void *data) 1346 { 1347 struct getfsstat_info *info = data; 1348 struct statfs *sp; 1349 char *freepath; 1350 char *fullpath; 1351 int error; 1352 1353 if (info->sfsp && info->count < info->maxcount) { 1354 if (info->td->td_proc && 1355 !chroot_visible_mnt(mp, info->td->td_proc)) { 1356 return(0); 1357 } 1358 sp = &mp->mnt_stat; 1359 1360 /* 1361 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1362 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1363 * overrides MNT_WAIT. 1364 */ 1365 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1366 (info->flags & MNT_WAIT)) && 1367 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1368 return(0); 1369 } 1370 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1371 1372 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1373 if (error) { 1374 info->error = error; 1375 return(-1); 1376 } 1377 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1378 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1379 kfree(freepath, M_TEMP); 1380 1381 error = copyout(sp, info->sfsp, sizeof(*sp)); 1382 if (error) { 1383 info->error = error; 1384 return (-1); 1385 } 1386 ++info->sfsp; 1387 } 1388 info->count++; 1389 return(0); 1390 } 1391 1392 /* 1393 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1394 long bufsize, int flags) 1395 * 1396 * Get statistics on all filesystems. 1397 */ 1398 1399 struct getvfsstat_info { 1400 struct statfs *sfsp; 1401 struct statvfs *vsfsp; 1402 long count; 1403 long maxcount; 1404 int error; 1405 int flags; 1406 struct thread *td; 1407 }; 1408 1409 static int getvfsstat_callback(struct mount *, void *); 1410 1411 int 1412 sys_getvfsstat(struct getvfsstat_args *uap) 1413 { 1414 struct thread *td = curthread; 1415 struct getvfsstat_info info; 1416 1417 bzero(&info, sizeof(info)); 1418 1419 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1420 info.sfsp = uap->buf; 1421 info.vsfsp = uap->vbuf; 1422 info.count = 0; 1423 info.flags = uap->flags; 1424 info.td = td; 1425 1426 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1427 if (info.vsfsp && info.count > info.maxcount) 1428 uap->sysmsg_result = info.maxcount; 1429 else 1430 uap->sysmsg_result = info.count; 1431 return (info.error); 1432 } 1433 1434 static int 1435 getvfsstat_callback(struct mount *mp, void *data) 1436 { 1437 struct getvfsstat_info *info = data; 1438 struct statfs *sp; 1439 struct statvfs *vsp; 1440 char *freepath; 1441 char *fullpath; 1442 int error; 1443 1444 if (info->vsfsp && info->count < info->maxcount) { 1445 if (info->td->td_proc && 1446 !chroot_visible_mnt(mp, info->td->td_proc)) { 1447 return(0); 1448 } 1449 sp = &mp->mnt_stat; 1450 vsp = &mp->mnt_vstat; 1451 1452 /* 1453 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1454 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1455 * overrides MNT_WAIT. 1456 */ 1457 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1458 (info->flags & MNT_WAIT)) && 1459 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1460 return(0); 1461 } 1462 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1463 1464 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1465 (info->flags & MNT_WAIT)) && 1466 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1467 return(0); 1468 } 1469 vsp->f_flag = 0; 1470 if (mp->mnt_flag & MNT_RDONLY) 1471 vsp->f_flag |= ST_RDONLY; 1472 if (mp->mnt_flag & MNT_NOSUID) 1473 vsp->f_flag |= ST_NOSUID; 1474 1475 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1476 if (error) { 1477 info->error = error; 1478 return(-1); 1479 } 1480 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1481 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1482 kfree(freepath, M_TEMP); 1483 1484 error = copyout(sp, info->sfsp, sizeof(*sp)); 1485 if (error == 0) 1486 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1487 if (error) { 1488 info->error = error; 1489 return (-1); 1490 } 1491 ++info->sfsp; 1492 ++info->vsfsp; 1493 } 1494 info->count++; 1495 return(0); 1496 } 1497 1498 1499 /* 1500 * fchdir_args(int fd) 1501 * 1502 * Change current working directory to a given file descriptor. 1503 */ 1504 int 1505 sys_fchdir(struct fchdir_args *uap) 1506 { 1507 struct thread *td = curthread; 1508 struct proc *p = td->td_proc; 1509 struct filedesc *fdp = p->p_fd; 1510 struct vnode *vp, *ovp; 1511 struct mount *mp; 1512 struct file *fp; 1513 struct nchandle nch, onch, tnch; 1514 int error; 1515 1516 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1517 return (error); 1518 lwkt_gettoken(&p->p_token); 1519 vp = (struct vnode *)fp->f_data; 1520 vref(vp); 1521 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1522 if (fp->f_nchandle.ncp == NULL) 1523 error = ENOTDIR; 1524 else 1525 error = checkvp_chdir(vp, td); 1526 if (error) { 1527 vput(vp); 1528 goto done; 1529 } 1530 cache_copy(&fp->f_nchandle, &nch); 1531 1532 /* 1533 * If the ncp has become a mount point, traverse through 1534 * the mount point. 1535 */ 1536 1537 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1538 (mp = cache_findmount(&nch)) != NULL 1539 ) { 1540 error = nlookup_mp(mp, &tnch); 1541 if (error == 0) { 1542 cache_unlock(&tnch); /* leave ref intact */ 1543 vput(vp); 1544 vp = tnch.ncp->nc_vp; 1545 error = vget(vp, LK_SHARED); 1546 KKASSERT(error == 0); 1547 cache_drop(&nch); 1548 nch = tnch; 1549 } 1550 cache_dropmount(mp); 1551 } 1552 if (error == 0) { 1553 ovp = fdp->fd_cdir; 1554 onch = fdp->fd_ncdir; 1555 vn_unlock(vp); /* leave ref intact */ 1556 fdp->fd_cdir = vp; 1557 fdp->fd_ncdir = nch; 1558 cache_drop(&onch); 1559 vrele(ovp); 1560 } else { 1561 cache_drop(&nch); 1562 vput(vp); 1563 } 1564 fdrop(fp); 1565 done: 1566 lwkt_reltoken(&p->p_token); 1567 return (error); 1568 } 1569 1570 int 1571 kern_chdir(struct nlookupdata *nd) 1572 { 1573 struct thread *td = curthread; 1574 struct proc *p = td->td_proc; 1575 struct filedesc *fdp = p->p_fd; 1576 struct vnode *vp, *ovp; 1577 struct nchandle onch; 1578 int error; 1579 1580 if ((error = nlookup(nd)) != 0) 1581 return (error); 1582 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1583 return (ENOENT); 1584 if ((error = vget(vp, LK_SHARED)) != 0) 1585 return (error); 1586 1587 lwkt_gettoken(&p->p_token); 1588 error = checkvp_chdir(vp, td); 1589 vn_unlock(vp); 1590 if (error == 0) { 1591 ovp = fdp->fd_cdir; 1592 onch = fdp->fd_ncdir; 1593 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1594 fdp->fd_ncdir = nd->nl_nch; 1595 fdp->fd_cdir = vp; 1596 cache_drop(&onch); 1597 vrele(ovp); 1598 cache_zero(&nd->nl_nch); 1599 } else { 1600 vrele(vp); 1601 } 1602 lwkt_reltoken(&p->p_token); 1603 return (error); 1604 } 1605 1606 /* 1607 * chdir_args(char *path) 1608 * 1609 * Change current working directory (``.''). 1610 */ 1611 int 1612 sys_chdir(struct chdir_args *uap) 1613 { 1614 struct nlookupdata nd; 1615 int error; 1616 1617 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1618 if (error == 0) 1619 error = kern_chdir(&nd); 1620 nlookup_done(&nd); 1621 return (error); 1622 } 1623 1624 /* 1625 * Helper function for raised chroot(2) security function: Refuse if 1626 * any filedescriptors are open directories. 1627 */ 1628 static int 1629 chroot_refuse_vdir_fds(struct filedesc *fdp) 1630 { 1631 struct vnode *vp; 1632 struct file *fp; 1633 int error; 1634 int fd; 1635 1636 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1637 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1638 continue; 1639 vp = (struct vnode *)fp->f_data; 1640 if (vp->v_type != VDIR) { 1641 fdrop(fp); 1642 continue; 1643 } 1644 fdrop(fp); 1645 return(EPERM); 1646 } 1647 return (0); 1648 } 1649 1650 /* 1651 * This sysctl determines if we will allow a process to chroot(2) if it 1652 * has a directory open: 1653 * 0: disallowed for all processes. 1654 * 1: allowed for processes that were not already chroot(2)'ed. 1655 * 2: allowed for all processes. 1656 */ 1657 1658 static int chroot_allow_open_directories = 1; 1659 1660 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1661 &chroot_allow_open_directories, 0, ""); 1662 1663 /* 1664 * chroot to the specified namecache entry. We obtain the vp from the 1665 * namecache data. The passed ncp must be locked and referenced and will 1666 * remain locked and referenced on return. 1667 */ 1668 int 1669 kern_chroot(struct nchandle *nch) 1670 { 1671 struct thread *td = curthread; 1672 struct proc *p = td->td_proc; 1673 struct filedesc *fdp = p->p_fd; 1674 struct vnode *vp; 1675 int error; 1676 1677 /* 1678 * Only privileged user can chroot 1679 */ 1680 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1681 if (error) 1682 return (error); 1683 1684 /* 1685 * Disallow open directory descriptors (fchdir() breakouts). 1686 */ 1687 if (chroot_allow_open_directories == 0 || 1688 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1689 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1690 return (error); 1691 } 1692 if ((vp = nch->ncp->nc_vp) == NULL) 1693 return (ENOENT); 1694 1695 if ((error = vget(vp, LK_SHARED)) != 0) 1696 return (error); 1697 1698 /* 1699 * Check the validity of vp as a directory to change to and 1700 * associate it with rdir/jdir. 1701 */ 1702 error = checkvp_chdir(vp, td); 1703 vn_unlock(vp); /* leave reference intact */ 1704 if (error == 0) { 1705 vrele(fdp->fd_rdir); 1706 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1707 cache_drop(&fdp->fd_nrdir); 1708 cache_copy(nch, &fdp->fd_nrdir); 1709 if (fdp->fd_jdir == NULL) { 1710 fdp->fd_jdir = vp; 1711 vref(fdp->fd_jdir); 1712 cache_copy(nch, &fdp->fd_njdir); 1713 } 1714 } else { 1715 vrele(vp); 1716 } 1717 return (error); 1718 } 1719 1720 /* 1721 * chroot_args(char *path) 1722 * 1723 * Change notion of root (``/'') directory. 1724 */ 1725 int 1726 sys_chroot(struct chroot_args *uap) 1727 { 1728 struct thread *td __debugvar = curthread; 1729 struct nlookupdata nd; 1730 int error; 1731 1732 KKASSERT(td->td_proc); 1733 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1734 if (error == 0) { 1735 nd.nl_flags |= NLC_EXEC; 1736 error = nlookup(&nd); 1737 if (error == 0) 1738 error = kern_chroot(&nd.nl_nch); 1739 } 1740 nlookup_done(&nd); 1741 return(error); 1742 } 1743 1744 int 1745 sys_chroot_kernel(struct chroot_kernel_args *uap) 1746 { 1747 struct thread *td = curthread; 1748 struct nlookupdata nd; 1749 struct nchandle *nch; 1750 struct vnode *vp; 1751 int error; 1752 1753 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1754 if (error) 1755 goto error_nond; 1756 1757 error = nlookup(&nd); 1758 if (error) 1759 goto error_out; 1760 1761 nch = &nd.nl_nch; 1762 1763 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1764 if (error) 1765 goto error_out; 1766 1767 if ((vp = nch->ncp->nc_vp) == NULL) { 1768 error = ENOENT; 1769 goto error_out; 1770 } 1771 1772 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1773 goto error_out; 1774 1775 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1776 get_mplock(); 1777 vfs_cache_setroot(vp, cache_hold(nch)); 1778 rel_mplock(); 1779 1780 error_out: 1781 nlookup_done(&nd); 1782 error_nond: 1783 return(error); 1784 } 1785 1786 /* 1787 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1788 * determine whether it is legal to chdir to the vnode. The vnode's state 1789 * is not changed by this call. 1790 */ 1791 int 1792 checkvp_chdir(struct vnode *vp, struct thread *td) 1793 { 1794 int error; 1795 1796 if (vp->v_type != VDIR) 1797 error = ENOTDIR; 1798 else 1799 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1800 return (error); 1801 } 1802 1803 int 1804 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1805 { 1806 struct thread *td = curthread; 1807 struct proc *p = td->td_proc; 1808 struct lwp *lp = td->td_lwp; 1809 struct filedesc *fdp = p->p_fd; 1810 int cmode, flags; 1811 struct file *nfp; 1812 struct file *fp; 1813 struct vnode *vp; 1814 int type, indx, error; 1815 struct flock lf; 1816 1817 if ((oflags & O_ACCMODE) == O_ACCMODE) 1818 return (EINVAL); 1819 flags = FFLAGS(oflags); 1820 error = falloc(lp, &nfp, NULL); 1821 if (error) 1822 return (error); 1823 fp = nfp; 1824 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1825 1826 /* 1827 * XXX p_dupfd is a real mess. It allows a device to return a 1828 * file descriptor to be duplicated rather then doing the open 1829 * itself. 1830 */ 1831 lp->lwp_dupfd = -1; 1832 1833 /* 1834 * Call vn_open() to do the lookup and assign the vnode to the 1835 * file pointer. vn_open() does not change the ref count on fp 1836 * and the vnode, on success, will be inherited by the file pointer 1837 * and unlocked. 1838 */ 1839 nd->nl_flags |= NLC_LOCKVP; 1840 error = vn_open(nd, fp, flags, cmode); 1841 nlookup_done(nd); 1842 if (error) { 1843 /* 1844 * handle special fdopen() case. bleh. dupfdopen() is 1845 * responsible for dropping the old contents of ofiles[indx] 1846 * if it succeeds. 1847 * 1848 * Note that fsetfd() will add a ref to fp which represents 1849 * the fd_files[] assignment. We must still drop our 1850 * reference. 1851 */ 1852 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1853 if (fdalloc(p, 0, &indx) == 0) { 1854 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error); 1855 if (error == 0) { 1856 *res = indx; 1857 fdrop(fp); /* our ref */ 1858 return (0); 1859 } 1860 fsetfd(fdp, NULL, indx); 1861 } 1862 } 1863 fdrop(fp); /* our ref */ 1864 if (error == ERESTART) 1865 error = EINTR; 1866 return (error); 1867 } 1868 1869 /* 1870 * ref the vnode for ourselves so it can't be ripped out from under 1871 * is. XXX need an ND flag to request that the vnode be returned 1872 * anyway. 1873 * 1874 * Reserve a file descriptor but do not assign it until the open 1875 * succeeds. 1876 */ 1877 vp = (struct vnode *)fp->f_data; 1878 vref(vp); 1879 if ((error = fdalloc(p, 0, &indx)) != 0) { 1880 fdrop(fp); 1881 vrele(vp); 1882 return (error); 1883 } 1884 1885 /* 1886 * If no error occurs the vp will have been assigned to the file 1887 * pointer. 1888 */ 1889 lp->lwp_dupfd = 0; 1890 1891 if (flags & (O_EXLOCK | O_SHLOCK)) { 1892 lf.l_whence = SEEK_SET; 1893 lf.l_start = 0; 1894 lf.l_len = 0; 1895 if (flags & O_EXLOCK) 1896 lf.l_type = F_WRLCK; 1897 else 1898 lf.l_type = F_RDLCK; 1899 if (flags & FNONBLOCK) 1900 type = 0; 1901 else 1902 type = F_WAIT; 1903 1904 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1905 /* 1906 * lock request failed. Clean up the reserved 1907 * descriptor. 1908 */ 1909 vrele(vp); 1910 fsetfd(fdp, NULL, indx); 1911 fdrop(fp); 1912 return (error); 1913 } 1914 fp->f_flag |= FHASLOCK; 1915 } 1916 #if 0 1917 /* 1918 * Assert that all regular file vnodes were created with a object. 1919 */ 1920 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1921 ("open: regular file has no backing object after vn_open")); 1922 #endif 1923 1924 vrele(vp); 1925 1926 /* 1927 * release our private reference, leaving the one associated with the 1928 * descriptor table intact. 1929 */ 1930 fsetfd(fdp, fp, indx); 1931 fdrop(fp); 1932 *res = indx; 1933 return (0); 1934 } 1935 1936 /* 1937 * open_args(char *path, int flags, int mode) 1938 * 1939 * Check permissions, allocate an open file structure, 1940 * and call the device open routine if any. 1941 */ 1942 int 1943 sys_open(struct open_args *uap) 1944 { 1945 struct nlookupdata nd; 1946 int error; 1947 1948 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1949 if (error == 0) { 1950 error = kern_open(&nd, uap->flags, 1951 uap->mode, &uap->sysmsg_result); 1952 } 1953 nlookup_done(&nd); 1954 return (error); 1955 } 1956 1957 /* 1958 * openat_args(int fd, char *path, int flags, int mode) 1959 */ 1960 int 1961 sys_openat(struct openat_args *uap) 1962 { 1963 struct nlookupdata nd; 1964 int error; 1965 struct file *fp; 1966 1967 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 1968 if (error == 0) { 1969 error = kern_open(&nd, uap->flags, uap->mode, 1970 &uap->sysmsg_result); 1971 } 1972 nlookup_done_at(&nd, fp); 1973 return (error); 1974 } 1975 1976 int 1977 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 1978 { 1979 struct thread *td = curthread; 1980 struct proc *p = td->td_proc; 1981 struct vnode *vp; 1982 struct vattr vattr; 1983 int error; 1984 int whiteout = 0; 1985 1986 KKASSERT(p); 1987 1988 VATTR_NULL(&vattr); 1989 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1990 vattr.va_rmajor = rmajor; 1991 vattr.va_rminor = rminor; 1992 1993 switch (mode & S_IFMT) { 1994 case S_IFMT: /* used by badsect to flag bad sectors */ 1995 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 1996 vattr.va_type = VBAD; 1997 break; 1998 case S_IFCHR: 1999 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2000 vattr.va_type = VCHR; 2001 break; 2002 case S_IFBLK: 2003 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2004 vattr.va_type = VBLK; 2005 break; 2006 case S_IFWHT: 2007 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2008 whiteout = 1; 2009 break; 2010 case S_IFDIR: /* special directories support for HAMMER */ 2011 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2012 vattr.va_type = VDIR; 2013 break; 2014 default: 2015 error = EINVAL; 2016 break; 2017 } 2018 2019 if (error) 2020 return (error); 2021 2022 bwillinode(1); 2023 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2024 if ((error = nlookup(nd)) != 0) 2025 return (error); 2026 if (nd->nl_nch.ncp->nc_vp) 2027 return (EEXIST); 2028 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2029 return (error); 2030 2031 if (whiteout) { 2032 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2033 nd->nl_cred, NAMEI_CREATE); 2034 } else { 2035 vp = NULL; 2036 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2037 &vp, nd->nl_cred, &vattr); 2038 if (error == 0) 2039 vput(vp); 2040 } 2041 return (error); 2042 } 2043 2044 /* 2045 * mknod_args(char *path, int mode, int dev) 2046 * 2047 * Create a special file. 2048 */ 2049 int 2050 sys_mknod(struct mknod_args *uap) 2051 { 2052 struct nlookupdata nd; 2053 int error; 2054 2055 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2056 if (error == 0) { 2057 error = kern_mknod(&nd, uap->mode, 2058 umajor(uap->dev), uminor(uap->dev)); 2059 } 2060 nlookup_done(&nd); 2061 return (error); 2062 } 2063 2064 /* 2065 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2066 * 2067 * Create a special file. The path is relative to the directory associated 2068 * with fd. 2069 */ 2070 int 2071 sys_mknodat(struct mknodat_args *uap) 2072 { 2073 struct nlookupdata nd; 2074 struct file *fp; 2075 int error; 2076 2077 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2078 if (error == 0) { 2079 error = kern_mknod(&nd, uap->mode, 2080 umajor(uap->dev), uminor(uap->dev)); 2081 } 2082 nlookup_done_at(&nd, fp); 2083 return (error); 2084 } 2085 2086 int 2087 kern_mkfifo(struct nlookupdata *nd, int mode) 2088 { 2089 struct thread *td = curthread; 2090 struct proc *p = td->td_proc; 2091 struct vattr vattr; 2092 struct vnode *vp; 2093 int error; 2094 2095 bwillinode(1); 2096 2097 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2098 if ((error = nlookup(nd)) != 0) 2099 return (error); 2100 if (nd->nl_nch.ncp->nc_vp) 2101 return (EEXIST); 2102 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2103 return (error); 2104 2105 VATTR_NULL(&vattr); 2106 vattr.va_type = VFIFO; 2107 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2108 vp = NULL; 2109 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2110 if (error == 0) 2111 vput(vp); 2112 return (error); 2113 } 2114 2115 /* 2116 * mkfifo_args(char *path, int mode) 2117 * 2118 * Create a named pipe. 2119 */ 2120 int 2121 sys_mkfifo(struct mkfifo_args *uap) 2122 { 2123 struct nlookupdata nd; 2124 int error; 2125 2126 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2127 if (error == 0) 2128 error = kern_mkfifo(&nd, uap->mode); 2129 nlookup_done(&nd); 2130 return (error); 2131 } 2132 2133 /* 2134 * mkfifoat_args(int fd, char *path, mode_t mode) 2135 * 2136 * Create a named pipe. The path is relative to the directory associated 2137 * with fd. 2138 */ 2139 int 2140 sys_mkfifoat(struct mkfifoat_args *uap) 2141 { 2142 struct nlookupdata nd; 2143 struct file *fp; 2144 int error; 2145 2146 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2147 if (error == 0) 2148 error = kern_mkfifo(&nd, uap->mode); 2149 nlookup_done_at(&nd, fp); 2150 return (error); 2151 } 2152 2153 static int hardlink_check_uid = 0; 2154 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2155 &hardlink_check_uid, 0, 2156 "Unprivileged processes cannot create hard links to files owned by other " 2157 "users"); 2158 static int hardlink_check_gid = 0; 2159 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2160 &hardlink_check_gid, 0, 2161 "Unprivileged processes cannot create hard links to files owned by other " 2162 "groups"); 2163 2164 static int 2165 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2166 { 2167 struct vattr va; 2168 int error; 2169 2170 /* 2171 * Shortcut if disabled 2172 */ 2173 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2174 return (0); 2175 2176 /* 2177 * Privileged user can always hardlink 2178 */ 2179 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2180 return (0); 2181 2182 /* 2183 * Otherwise only if the originating file is owned by the 2184 * same user or group. Note that any group is allowed if 2185 * the file is owned by the caller. 2186 */ 2187 error = VOP_GETATTR(vp, &va); 2188 if (error != 0) 2189 return (error); 2190 2191 if (hardlink_check_uid) { 2192 if (cred->cr_uid != va.va_uid) 2193 return (EPERM); 2194 } 2195 2196 if (hardlink_check_gid) { 2197 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2198 return (EPERM); 2199 } 2200 2201 return (0); 2202 } 2203 2204 int 2205 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2206 { 2207 struct thread *td = curthread; 2208 struct vnode *vp; 2209 int error; 2210 2211 /* 2212 * Lookup the source and obtained a locked vnode. 2213 * 2214 * You may only hardlink a file which you have write permission 2215 * on or which you own. 2216 * 2217 * XXX relookup on vget failure / race ? 2218 */ 2219 bwillinode(1); 2220 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2221 if ((error = nlookup(nd)) != 0) 2222 return (error); 2223 vp = nd->nl_nch.ncp->nc_vp; 2224 KKASSERT(vp != NULL); 2225 if (vp->v_type == VDIR) 2226 return (EPERM); /* POSIX */ 2227 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2228 return (error); 2229 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2230 return (error); 2231 2232 /* 2233 * Unlock the source so we can lookup the target without deadlocking 2234 * (XXX vp is locked already, possible other deadlock?). The target 2235 * must not exist. 2236 */ 2237 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2238 nd->nl_flags &= ~NLC_NCPISLOCKED; 2239 cache_unlock(&nd->nl_nch); 2240 vn_unlock(vp); 2241 2242 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2243 if ((error = nlookup(linknd)) != 0) { 2244 vrele(vp); 2245 return (error); 2246 } 2247 if (linknd->nl_nch.ncp->nc_vp) { 2248 vrele(vp); 2249 return (EEXIST); 2250 } 2251 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 2252 vrele(vp); 2253 return (error); 2254 } 2255 2256 /* 2257 * Finally run the new API VOP. 2258 */ 2259 error = can_hardlink(vp, td, td->td_ucred); 2260 if (error == 0) { 2261 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2262 vp, linknd->nl_cred); 2263 } 2264 vput(vp); 2265 return (error); 2266 } 2267 2268 /* 2269 * link_args(char *path, char *link) 2270 * 2271 * Make a hard file link. 2272 */ 2273 int 2274 sys_link(struct link_args *uap) 2275 { 2276 struct nlookupdata nd, linknd; 2277 int error; 2278 2279 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2280 if (error == 0) { 2281 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2282 if (error == 0) 2283 error = kern_link(&nd, &linknd); 2284 nlookup_done(&linknd); 2285 } 2286 nlookup_done(&nd); 2287 return (error); 2288 } 2289 2290 int 2291 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2292 { 2293 struct vattr vattr; 2294 struct vnode *vp; 2295 struct vnode *dvp; 2296 int error; 2297 2298 bwillinode(1); 2299 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2300 if ((error = nlookup(nd)) != 0) 2301 return (error); 2302 if (nd->nl_nch.ncp->nc_vp) 2303 return (EEXIST); 2304 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2305 return (error); 2306 dvp = nd->nl_dvp; 2307 VATTR_NULL(&vattr); 2308 vattr.va_mode = mode; 2309 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2310 if (error == 0) 2311 vput(vp); 2312 return (error); 2313 } 2314 2315 /* 2316 * symlink(char *path, char *link) 2317 * 2318 * Make a symbolic link. 2319 */ 2320 int 2321 sys_symlink(struct symlink_args *uap) 2322 { 2323 struct thread *td = curthread; 2324 struct nlookupdata nd; 2325 char *path; 2326 int error; 2327 int mode; 2328 2329 path = objcache_get(namei_oc, M_WAITOK); 2330 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2331 if (error == 0) { 2332 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2333 if (error == 0) { 2334 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2335 error = kern_symlink(&nd, path, mode); 2336 } 2337 nlookup_done(&nd); 2338 } 2339 objcache_put(namei_oc, path); 2340 return (error); 2341 } 2342 2343 /* 2344 * symlinkat_args(char *path1, int fd, char *path2) 2345 * 2346 * Make a symbolic link. The path2 argument is relative to the directory 2347 * associated with fd. 2348 */ 2349 int 2350 sys_symlinkat(struct symlinkat_args *uap) 2351 { 2352 struct thread *td = curthread; 2353 struct nlookupdata nd; 2354 struct file *fp; 2355 char *path1; 2356 int error; 2357 int mode; 2358 2359 path1 = objcache_get(namei_oc, M_WAITOK); 2360 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2361 if (error == 0) { 2362 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2363 UIO_USERSPACE, 0); 2364 if (error == 0) { 2365 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2366 error = kern_symlink(&nd, path1, mode); 2367 } 2368 nlookup_done_at(&nd, fp); 2369 } 2370 objcache_put(namei_oc, path1); 2371 return (error); 2372 } 2373 2374 /* 2375 * undelete_args(char *path) 2376 * 2377 * Delete a whiteout from the filesystem. 2378 */ 2379 int 2380 sys_undelete(struct undelete_args *uap) 2381 { 2382 struct nlookupdata nd; 2383 int error; 2384 2385 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2386 bwillinode(1); 2387 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2388 if (error == 0) 2389 error = nlookup(&nd); 2390 if (error == 0) 2391 error = ncp_writechk(&nd.nl_nch); 2392 if (error == 0) { 2393 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2394 NAMEI_DELETE); 2395 } 2396 nlookup_done(&nd); 2397 return (error); 2398 } 2399 2400 int 2401 kern_unlink(struct nlookupdata *nd) 2402 { 2403 int error; 2404 2405 bwillinode(1); 2406 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2407 if ((error = nlookup(nd)) != 0) 2408 return (error); 2409 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2410 return (error); 2411 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2412 return (error); 2413 } 2414 2415 /* 2416 * unlink_args(char *path) 2417 * 2418 * Delete a name from the filesystem. 2419 */ 2420 int 2421 sys_unlink(struct unlink_args *uap) 2422 { 2423 struct nlookupdata nd; 2424 int error; 2425 2426 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2427 if (error == 0) 2428 error = kern_unlink(&nd); 2429 nlookup_done(&nd); 2430 return (error); 2431 } 2432 2433 2434 /* 2435 * unlinkat_args(int fd, char *path, int flags) 2436 * 2437 * Delete the file or directory entry pointed to by fd/path. 2438 */ 2439 int 2440 sys_unlinkat(struct unlinkat_args *uap) 2441 { 2442 struct nlookupdata nd; 2443 struct file *fp; 2444 int error; 2445 2446 if (uap->flags & ~AT_REMOVEDIR) 2447 return (EINVAL); 2448 2449 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2450 if (error == 0) { 2451 if (uap->flags & AT_REMOVEDIR) 2452 error = kern_rmdir(&nd); 2453 else 2454 error = kern_unlink(&nd); 2455 } 2456 nlookup_done_at(&nd, fp); 2457 return (error); 2458 } 2459 2460 int 2461 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2462 { 2463 struct thread *td = curthread; 2464 struct proc *p = td->td_proc; 2465 struct file *fp; 2466 struct vnode *vp; 2467 struct vattr vattr; 2468 off_t new_offset; 2469 int error; 2470 2471 fp = holdfp(p->p_fd, fd, -1); 2472 if (fp == NULL) 2473 return (EBADF); 2474 if (fp->f_type != DTYPE_VNODE) { 2475 error = ESPIPE; 2476 goto done; 2477 } 2478 vp = (struct vnode *)fp->f_data; 2479 2480 switch (whence) { 2481 case L_INCR: 2482 spin_lock(&fp->f_spin); 2483 new_offset = fp->f_offset + offset; 2484 error = 0; 2485 break; 2486 case L_XTND: 2487 error = VOP_GETATTR(vp, &vattr); 2488 spin_lock(&fp->f_spin); 2489 new_offset = offset + vattr.va_size; 2490 break; 2491 case L_SET: 2492 new_offset = offset; 2493 error = 0; 2494 spin_lock(&fp->f_spin); 2495 break; 2496 default: 2497 new_offset = 0; 2498 error = EINVAL; 2499 spin_lock(&fp->f_spin); 2500 break; 2501 } 2502 2503 /* 2504 * Validate the seek position. Negative offsets are not allowed 2505 * for regular files or directories. 2506 * 2507 * Normally we would also not want to allow negative offsets for 2508 * character and block-special devices. However kvm addresses 2509 * on 64 bit architectures might appear to be negative and must 2510 * be allowed. 2511 */ 2512 if (error == 0) { 2513 if (new_offset < 0 && 2514 (vp->v_type == VREG || vp->v_type == VDIR)) { 2515 error = EINVAL; 2516 } else { 2517 fp->f_offset = new_offset; 2518 } 2519 } 2520 *res = fp->f_offset; 2521 spin_unlock(&fp->f_spin); 2522 done: 2523 fdrop(fp); 2524 return (error); 2525 } 2526 2527 /* 2528 * lseek_args(int fd, int pad, off_t offset, int whence) 2529 * 2530 * Reposition read/write file offset. 2531 */ 2532 int 2533 sys_lseek(struct lseek_args *uap) 2534 { 2535 int error; 2536 2537 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2538 &uap->sysmsg_offset); 2539 2540 return (error); 2541 } 2542 2543 /* 2544 * Check if current process can access given file. amode is a bitmask of *_OK 2545 * access bits. flags is a bitmask of AT_* flags. 2546 */ 2547 int 2548 kern_access(struct nlookupdata *nd, int amode, int flags) 2549 { 2550 struct vnode *vp; 2551 int error, mode; 2552 2553 if (flags & ~AT_EACCESS) 2554 return (EINVAL); 2555 if ((error = nlookup(nd)) != 0) 2556 return (error); 2557 retry: 2558 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2559 if (error) 2560 return (error); 2561 2562 /* Flags == 0 means only check for existence. */ 2563 if (amode) { 2564 mode = 0; 2565 if (amode & R_OK) 2566 mode |= VREAD; 2567 if (amode & W_OK) 2568 mode |= VWRITE; 2569 if (amode & X_OK) 2570 mode |= VEXEC; 2571 if ((mode & VWRITE) == 0 || 2572 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2573 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2574 2575 /* 2576 * If the file handle is stale we have to re-resolve the 2577 * entry. This is a hack at the moment. 2578 */ 2579 if (error == ESTALE) { 2580 vput(vp); 2581 cache_setunresolved(&nd->nl_nch); 2582 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2583 if (error == 0) { 2584 vp = NULL; 2585 goto retry; 2586 } 2587 return(error); 2588 } 2589 } 2590 vput(vp); 2591 return (error); 2592 } 2593 2594 /* 2595 * access_args(char *path, int flags) 2596 * 2597 * Check access permissions. 2598 */ 2599 int 2600 sys_access(struct access_args *uap) 2601 { 2602 struct nlookupdata nd; 2603 int error; 2604 2605 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2606 if (error == 0) 2607 error = kern_access(&nd, uap->flags, 0); 2608 nlookup_done(&nd); 2609 return (error); 2610 } 2611 2612 2613 /* 2614 * faccessat_args(int fd, char *path, int amode, int flags) 2615 * 2616 * Check access permissions. 2617 */ 2618 int 2619 sys_faccessat(struct faccessat_args *uap) 2620 { 2621 struct nlookupdata nd; 2622 struct file *fp; 2623 int error; 2624 2625 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2626 NLC_FOLLOW); 2627 if (error == 0) 2628 error = kern_access(&nd, uap->amode, uap->flags); 2629 nlookup_done_at(&nd, fp); 2630 return (error); 2631 } 2632 2633 2634 int 2635 kern_stat(struct nlookupdata *nd, struct stat *st) 2636 { 2637 int error; 2638 struct vnode *vp; 2639 2640 if ((error = nlookup(nd)) != 0) 2641 return (error); 2642 again: 2643 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2644 return (ENOENT); 2645 2646 if ((error = vget(vp, LK_SHARED)) != 0) 2647 return (error); 2648 error = vn_stat(vp, st, nd->nl_cred); 2649 2650 /* 2651 * If the file handle is stale we have to re-resolve the entry. This 2652 * is a hack at the moment. 2653 */ 2654 if (error == ESTALE) { 2655 vput(vp); 2656 cache_setunresolved(&nd->nl_nch); 2657 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2658 if (error == 0) 2659 goto again; 2660 } else { 2661 vput(vp); 2662 } 2663 return (error); 2664 } 2665 2666 /* 2667 * stat_args(char *path, struct stat *ub) 2668 * 2669 * Get file status; this version follows links. 2670 */ 2671 int 2672 sys_stat(struct stat_args *uap) 2673 { 2674 struct nlookupdata nd; 2675 struct stat st; 2676 int error; 2677 2678 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2679 if (error == 0) { 2680 error = kern_stat(&nd, &st); 2681 if (error == 0) 2682 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2683 } 2684 nlookup_done(&nd); 2685 return (error); 2686 } 2687 2688 /* 2689 * lstat_args(char *path, struct stat *ub) 2690 * 2691 * Get file status; this version does not follow links. 2692 */ 2693 int 2694 sys_lstat(struct lstat_args *uap) 2695 { 2696 struct nlookupdata nd; 2697 struct stat st; 2698 int error; 2699 2700 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2701 if (error == 0) { 2702 error = kern_stat(&nd, &st); 2703 if (error == 0) 2704 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2705 } 2706 nlookup_done(&nd); 2707 return (error); 2708 } 2709 2710 /* 2711 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2712 * 2713 * Get status of file pointed to by fd/path. 2714 */ 2715 int 2716 sys_fstatat(struct fstatat_args *uap) 2717 { 2718 struct nlookupdata nd; 2719 struct stat st; 2720 int error; 2721 int flags; 2722 struct file *fp; 2723 2724 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2725 return (EINVAL); 2726 2727 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2728 2729 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2730 UIO_USERSPACE, flags); 2731 if (error == 0) { 2732 error = kern_stat(&nd, &st); 2733 if (error == 0) 2734 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2735 } 2736 nlookup_done_at(&nd, fp); 2737 return (error); 2738 } 2739 2740 /* 2741 * pathconf_Args(char *path, int name) 2742 * 2743 * Get configurable pathname variables. 2744 */ 2745 int 2746 sys_pathconf(struct pathconf_args *uap) 2747 { 2748 struct nlookupdata nd; 2749 struct vnode *vp; 2750 int error; 2751 2752 vp = NULL; 2753 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2754 if (error == 0) 2755 error = nlookup(&nd); 2756 if (error == 0) 2757 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2758 nlookup_done(&nd); 2759 if (error == 0) { 2760 error = VOP_PATHCONF(vp, uap->name, &uap->sysmsg_reg); 2761 vput(vp); 2762 } 2763 return (error); 2764 } 2765 2766 /* 2767 * XXX: daver 2768 * kern_readlink isn't properly split yet. There is a copyin burried 2769 * in VOP_READLINK(). 2770 */ 2771 int 2772 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2773 { 2774 struct thread *td = curthread; 2775 struct vnode *vp; 2776 struct iovec aiov; 2777 struct uio auio; 2778 int error; 2779 2780 if ((error = nlookup(nd)) != 0) 2781 return (error); 2782 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2783 if (error) 2784 return (error); 2785 if (vp->v_type != VLNK) { 2786 error = EINVAL; 2787 } else { 2788 aiov.iov_base = buf; 2789 aiov.iov_len = count; 2790 auio.uio_iov = &aiov; 2791 auio.uio_iovcnt = 1; 2792 auio.uio_offset = 0; 2793 auio.uio_rw = UIO_READ; 2794 auio.uio_segflg = UIO_USERSPACE; 2795 auio.uio_td = td; 2796 auio.uio_resid = count; 2797 error = VOP_READLINK(vp, &auio, td->td_ucred); 2798 } 2799 vput(vp); 2800 *res = count - auio.uio_resid; 2801 return (error); 2802 } 2803 2804 /* 2805 * readlink_args(char *path, char *buf, int count) 2806 * 2807 * Return target name of a symbolic link. 2808 */ 2809 int 2810 sys_readlink(struct readlink_args *uap) 2811 { 2812 struct nlookupdata nd; 2813 int error; 2814 2815 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2816 if (error == 0) { 2817 error = kern_readlink(&nd, uap->buf, uap->count, 2818 &uap->sysmsg_result); 2819 } 2820 nlookup_done(&nd); 2821 return (error); 2822 } 2823 2824 /* 2825 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 2826 * 2827 * Return target name of a symbolic link. The path is relative to the 2828 * directory associated with fd. 2829 */ 2830 int 2831 sys_readlinkat(struct readlinkat_args *uap) 2832 { 2833 struct nlookupdata nd; 2834 struct file *fp; 2835 int error; 2836 2837 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2838 if (error == 0) { 2839 error = kern_readlink(&nd, uap->buf, uap->bufsize, 2840 &uap->sysmsg_result); 2841 } 2842 nlookup_done_at(&nd, fp); 2843 return (error); 2844 } 2845 2846 static int 2847 setfflags(struct vnode *vp, int flags) 2848 { 2849 struct thread *td = curthread; 2850 int error; 2851 struct vattr vattr; 2852 2853 /* 2854 * Prevent non-root users from setting flags on devices. When 2855 * a device is reused, users can retain ownership of the device 2856 * if they are allowed to set flags and programs assume that 2857 * chown can't fail when done as root. 2858 */ 2859 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2860 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 2861 return (error); 2862 2863 /* 2864 * note: vget is required for any operation that might mod the vnode 2865 * so VINACTIVE is properly cleared. 2866 */ 2867 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2868 VATTR_NULL(&vattr); 2869 vattr.va_flags = flags; 2870 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2871 vput(vp); 2872 } 2873 return (error); 2874 } 2875 2876 /* 2877 * chflags(char *path, int flags) 2878 * 2879 * Change flags of a file given a path name. 2880 */ 2881 int 2882 sys_chflags(struct chflags_args *uap) 2883 { 2884 struct nlookupdata nd; 2885 struct vnode *vp; 2886 int error; 2887 2888 vp = NULL; 2889 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2890 if (error == 0) 2891 error = nlookup(&nd); 2892 if (error == 0) 2893 error = ncp_writechk(&nd.nl_nch); 2894 if (error == 0) 2895 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2896 nlookup_done(&nd); 2897 if (error == 0) { 2898 error = setfflags(vp, uap->flags); 2899 vrele(vp); 2900 } 2901 return (error); 2902 } 2903 2904 /* 2905 * lchflags(char *path, int flags) 2906 * 2907 * Change flags of a file given a path name, but don't follow symlinks. 2908 */ 2909 int 2910 sys_lchflags(struct lchflags_args *uap) 2911 { 2912 struct nlookupdata nd; 2913 struct vnode *vp; 2914 int error; 2915 2916 vp = NULL; 2917 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2918 if (error == 0) 2919 error = nlookup(&nd); 2920 if (error == 0) 2921 error = ncp_writechk(&nd.nl_nch); 2922 if (error == 0) 2923 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2924 nlookup_done(&nd); 2925 if (error == 0) { 2926 error = setfflags(vp, uap->flags); 2927 vrele(vp); 2928 } 2929 return (error); 2930 } 2931 2932 /* 2933 * fchflags_args(int fd, int flags) 2934 * 2935 * Change flags of a file given a file descriptor. 2936 */ 2937 int 2938 sys_fchflags(struct fchflags_args *uap) 2939 { 2940 struct thread *td = curthread; 2941 struct proc *p = td->td_proc; 2942 struct file *fp; 2943 int error; 2944 2945 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2946 return (error); 2947 if (fp->f_nchandle.ncp) 2948 error = ncp_writechk(&fp->f_nchandle); 2949 if (error == 0) 2950 error = setfflags((struct vnode *) fp->f_data, uap->flags); 2951 fdrop(fp); 2952 return (error); 2953 } 2954 2955 static int 2956 setfmode(struct vnode *vp, int mode) 2957 { 2958 struct thread *td = curthread; 2959 int error; 2960 struct vattr vattr; 2961 2962 /* 2963 * note: vget is required for any operation that might mod the vnode 2964 * so VINACTIVE is properly cleared. 2965 */ 2966 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2967 VATTR_NULL(&vattr); 2968 vattr.va_mode = mode & ALLPERMS; 2969 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2970 vput(vp); 2971 } 2972 return error; 2973 } 2974 2975 int 2976 kern_chmod(struct nlookupdata *nd, int mode) 2977 { 2978 struct vnode *vp; 2979 int error; 2980 2981 if ((error = nlookup(nd)) != 0) 2982 return (error); 2983 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2984 return (error); 2985 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 2986 error = setfmode(vp, mode); 2987 vrele(vp); 2988 return (error); 2989 } 2990 2991 /* 2992 * chmod_args(char *path, int mode) 2993 * 2994 * Change mode of a file given path name. 2995 */ 2996 int 2997 sys_chmod(struct chmod_args *uap) 2998 { 2999 struct nlookupdata nd; 3000 int error; 3001 3002 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3003 if (error == 0) 3004 error = kern_chmod(&nd, uap->mode); 3005 nlookup_done(&nd); 3006 return (error); 3007 } 3008 3009 /* 3010 * lchmod_args(char *path, int mode) 3011 * 3012 * Change mode of a file given path name (don't follow links.) 3013 */ 3014 int 3015 sys_lchmod(struct lchmod_args *uap) 3016 { 3017 struct nlookupdata nd; 3018 int error; 3019 3020 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3021 if (error == 0) 3022 error = kern_chmod(&nd, uap->mode); 3023 nlookup_done(&nd); 3024 return (error); 3025 } 3026 3027 /* 3028 * fchmod_args(int fd, int mode) 3029 * 3030 * Change mode of a file given a file descriptor. 3031 */ 3032 int 3033 sys_fchmod(struct fchmod_args *uap) 3034 { 3035 struct thread *td = curthread; 3036 struct proc *p = td->td_proc; 3037 struct file *fp; 3038 int error; 3039 3040 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3041 return (error); 3042 if (fp->f_nchandle.ncp) 3043 error = ncp_writechk(&fp->f_nchandle); 3044 if (error == 0) 3045 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3046 fdrop(fp); 3047 return (error); 3048 } 3049 3050 /* 3051 * fchmodat_args(char *path, int mode) 3052 * 3053 * Change mode of a file pointed to by fd/path. 3054 */ 3055 int 3056 sys_fchmodat(struct fchmodat_args *uap) 3057 { 3058 struct nlookupdata nd; 3059 struct file *fp; 3060 int error; 3061 int flags; 3062 3063 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3064 return (EINVAL); 3065 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3066 3067 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3068 UIO_USERSPACE, flags); 3069 if (error == 0) 3070 error = kern_chmod(&nd, uap->mode); 3071 nlookup_done_at(&nd, fp); 3072 return (error); 3073 } 3074 3075 static int 3076 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3077 { 3078 struct thread *td = curthread; 3079 int error; 3080 struct vattr vattr; 3081 uid_t o_uid; 3082 gid_t o_gid; 3083 uint64_t size; 3084 3085 /* 3086 * note: vget is required for any operation that might mod the vnode 3087 * so VINACTIVE is properly cleared. 3088 */ 3089 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3090 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3091 return error; 3092 o_uid = vattr.va_uid; 3093 o_gid = vattr.va_gid; 3094 size = vattr.va_size; 3095 3096 VATTR_NULL(&vattr); 3097 vattr.va_uid = uid; 3098 vattr.va_gid = gid; 3099 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3100 vput(vp); 3101 } 3102 3103 if (error == 0) { 3104 if (uid == -1) 3105 uid = o_uid; 3106 if (gid == -1) 3107 gid = o_gid; 3108 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3109 VFS_ACCOUNT(mp, uid, gid, size); 3110 } 3111 3112 return error; 3113 } 3114 3115 int 3116 kern_chown(struct nlookupdata *nd, int uid, int gid) 3117 { 3118 struct vnode *vp; 3119 int error; 3120 3121 if ((error = nlookup(nd)) != 0) 3122 return (error); 3123 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3124 return (error); 3125 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3126 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3127 vrele(vp); 3128 return (error); 3129 } 3130 3131 /* 3132 * chown(char *path, int uid, int gid) 3133 * 3134 * Set ownership given a path name. 3135 */ 3136 int 3137 sys_chown(struct chown_args *uap) 3138 { 3139 struct nlookupdata nd; 3140 int error; 3141 3142 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3143 if (error == 0) 3144 error = kern_chown(&nd, uap->uid, uap->gid); 3145 nlookup_done(&nd); 3146 return (error); 3147 } 3148 3149 /* 3150 * lchown_args(char *path, int uid, int gid) 3151 * 3152 * Set ownership given a path name, do not cross symlinks. 3153 */ 3154 int 3155 sys_lchown(struct lchown_args *uap) 3156 { 3157 struct nlookupdata nd; 3158 int error; 3159 3160 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3161 if (error == 0) 3162 error = kern_chown(&nd, uap->uid, uap->gid); 3163 nlookup_done(&nd); 3164 return (error); 3165 } 3166 3167 /* 3168 * fchown_args(int fd, int uid, int gid) 3169 * 3170 * Set ownership given a file descriptor. 3171 */ 3172 int 3173 sys_fchown(struct fchown_args *uap) 3174 { 3175 struct thread *td = curthread; 3176 struct proc *p = td->td_proc; 3177 struct file *fp; 3178 int error; 3179 3180 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3181 return (error); 3182 if (fp->f_nchandle.ncp) 3183 error = ncp_writechk(&fp->f_nchandle); 3184 if (error == 0) 3185 error = setfown(p->p_fd->fd_ncdir.mount, 3186 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3187 fdrop(fp); 3188 return (error); 3189 } 3190 3191 /* 3192 * fchownat(int fd, char *path, int uid, int gid, int flags) 3193 * 3194 * Set ownership of file pointed to by fd/path. 3195 */ 3196 int 3197 sys_fchownat(struct fchownat_args *uap) 3198 { 3199 struct nlookupdata nd; 3200 struct file *fp; 3201 int error; 3202 int flags; 3203 3204 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3205 return (EINVAL); 3206 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3207 3208 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3209 UIO_USERSPACE, flags); 3210 if (error == 0) 3211 error = kern_chown(&nd, uap->uid, uap->gid); 3212 nlookup_done_at(&nd, fp); 3213 return (error); 3214 } 3215 3216 3217 static int 3218 getutimes(const struct timeval *tvp, struct timespec *tsp) 3219 { 3220 struct timeval tv[2]; 3221 3222 if (tvp == NULL) { 3223 microtime(&tv[0]); 3224 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3225 tsp[1] = tsp[0]; 3226 } else { 3227 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3228 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3229 } 3230 return 0; 3231 } 3232 3233 static int 3234 setutimes(struct vnode *vp, struct vattr *vattr, 3235 const struct timespec *ts, int nullflag) 3236 { 3237 struct thread *td = curthread; 3238 int error; 3239 3240 VATTR_NULL(vattr); 3241 vattr->va_atime = ts[0]; 3242 vattr->va_mtime = ts[1]; 3243 if (nullflag) 3244 vattr->va_vaflags |= VA_UTIMES_NULL; 3245 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3246 3247 return error; 3248 } 3249 3250 int 3251 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3252 { 3253 struct timespec ts[2]; 3254 struct vnode *vp; 3255 struct vattr vattr; 3256 int error; 3257 3258 if ((error = getutimes(tptr, ts)) != 0) 3259 return (error); 3260 3261 /* 3262 * NOTE: utimes() succeeds for the owner even if the file 3263 * is not user-writable. 3264 */ 3265 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3266 3267 if ((error = nlookup(nd)) != 0) 3268 return (error); 3269 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3270 return (error); 3271 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3272 return (error); 3273 3274 /* 3275 * note: vget is required for any operation that might mod the vnode 3276 * so VINACTIVE is properly cleared. 3277 */ 3278 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3279 error = vget(vp, LK_EXCLUSIVE); 3280 if (error == 0) { 3281 error = setutimes(vp, &vattr, ts, (tptr == NULL)); 3282 vput(vp); 3283 } 3284 } 3285 vrele(vp); 3286 return (error); 3287 } 3288 3289 /* 3290 * utimes_args(char *path, struct timeval *tptr) 3291 * 3292 * Set the access and modification times of a file. 3293 */ 3294 int 3295 sys_utimes(struct utimes_args *uap) 3296 { 3297 struct timeval tv[2]; 3298 struct nlookupdata nd; 3299 int error; 3300 3301 if (uap->tptr) { 3302 error = copyin(uap->tptr, tv, sizeof(tv)); 3303 if (error) 3304 return (error); 3305 } 3306 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3307 if (error == 0) 3308 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3309 nlookup_done(&nd); 3310 return (error); 3311 } 3312 3313 /* 3314 * lutimes_args(char *path, struct timeval *tptr) 3315 * 3316 * Set the access and modification times of a file. 3317 */ 3318 int 3319 sys_lutimes(struct lutimes_args *uap) 3320 { 3321 struct timeval tv[2]; 3322 struct nlookupdata nd; 3323 int error; 3324 3325 if (uap->tptr) { 3326 error = copyin(uap->tptr, tv, sizeof(tv)); 3327 if (error) 3328 return (error); 3329 } 3330 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3331 if (error == 0) 3332 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3333 nlookup_done(&nd); 3334 return (error); 3335 } 3336 3337 /* 3338 * Set utimes on a file descriptor. The creds used to open the 3339 * file are used to determine whether the operation is allowed 3340 * or not. 3341 */ 3342 int 3343 kern_futimes(int fd, struct timeval *tptr) 3344 { 3345 struct thread *td = curthread; 3346 struct proc *p = td->td_proc; 3347 struct timespec ts[2]; 3348 struct file *fp; 3349 struct vnode *vp; 3350 struct vattr vattr; 3351 int error; 3352 3353 error = getutimes(tptr, ts); 3354 if (error) 3355 return (error); 3356 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3357 return (error); 3358 if (fp->f_nchandle.ncp) 3359 error = ncp_writechk(&fp->f_nchandle); 3360 if (error == 0) { 3361 vp = fp->f_data; 3362 error = vget(vp, LK_EXCLUSIVE); 3363 if (error == 0) { 3364 error = VOP_GETATTR(vp, &vattr); 3365 if (error == 0) { 3366 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3367 fp->f_cred); 3368 } 3369 if (error == 0) { 3370 error = setutimes(vp, &vattr, ts, 3371 (tptr == NULL)); 3372 } 3373 vput(vp); 3374 } 3375 } 3376 fdrop(fp); 3377 return (error); 3378 } 3379 3380 /* 3381 * futimes_args(int fd, struct timeval *tptr) 3382 * 3383 * Set the access and modification times of a file. 3384 */ 3385 int 3386 sys_futimes(struct futimes_args *uap) 3387 { 3388 struct timeval tv[2]; 3389 int error; 3390 3391 if (uap->tptr) { 3392 error = copyin(uap->tptr, tv, sizeof(tv)); 3393 if (error) 3394 return (error); 3395 } 3396 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3397 3398 return (error); 3399 } 3400 3401 int 3402 kern_truncate(struct nlookupdata *nd, off_t length) 3403 { 3404 struct vnode *vp; 3405 struct vattr vattr; 3406 int error; 3407 uid_t uid = 0; 3408 gid_t gid = 0; 3409 uint64_t old_size = 0; 3410 3411 if (length < 0) 3412 return(EINVAL); 3413 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3414 if ((error = nlookup(nd)) != 0) 3415 return (error); 3416 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3417 return (error); 3418 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3419 return (error); 3420 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 3421 vrele(vp); 3422 return (error); 3423 } 3424 if (vp->v_type == VDIR) { 3425 error = EISDIR; 3426 goto done; 3427 } 3428 if (vfs_quota_enabled) { 3429 error = VOP_GETATTR(vp, &vattr); 3430 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3431 uid = vattr.va_uid; 3432 gid = vattr.va_gid; 3433 old_size = vattr.va_size; 3434 } 3435 3436 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3437 VATTR_NULL(&vattr); 3438 vattr.va_size = length; 3439 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3440 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3441 } 3442 done: 3443 vput(vp); 3444 return (error); 3445 } 3446 3447 /* 3448 * truncate(char *path, int pad, off_t length) 3449 * 3450 * Truncate a file given its path name. 3451 */ 3452 int 3453 sys_truncate(struct truncate_args *uap) 3454 { 3455 struct nlookupdata nd; 3456 int error; 3457 3458 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3459 if (error == 0) 3460 error = kern_truncate(&nd, uap->length); 3461 nlookup_done(&nd); 3462 return error; 3463 } 3464 3465 int 3466 kern_ftruncate(int fd, off_t length) 3467 { 3468 struct thread *td = curthread; 3469 struct proc *p = td->td_proc; 3470 struct vattr vattr; 3471 struct vnode *vp; 3472 struct file *fp; 3473 int error; 3474 uid_t uid = 0; 3475 gid_t gid = 0; 3476 uint64_t old_size = 0; 3477 struct mount *mp; 3478 3479 if (length < 0) 3480 return(EINVAL); 3481 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3482 return (error); 3483 if (fp->f_nchandle.ncp) { 3484 error = ncp_writechk(&fp->f_nchandle); 3485 if (error) 3486 goto done; 3487 } 3488 if ((fp->f_flag & FWRITE) == 0) { 3489 error = EINVAL; 3490 goto done; 3491 } 3492 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3493 error = EINVAL; 3494 goto done; 3495 } 3496 vp = (struct vnode *)fp->f_data; 3497 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3498 if (vp->v_type == VDIR) { 3499 error = EISDIR; 3500 goto done; 3501 } 3502 3503 if (vfs_quota_enabled) { 3504 error = VOP_GETATTR(vp, &vattr); 3505 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 3506 uid = vattr.va_uid; 3507 gid = vattr.va_gid; 3508 old_size = vattr.va_size; 3509 } 3510 3511 if ((error = vn_writechk(vp, NULL)) == 0) { 3512 VATTR_NULL(&vattr); 3513 vattr.va_size = length; 3514 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3515 mp = vq_vptomp(vp); 3516 VFS_ACCOUNT(mp, uid, gid, length - old_size); 3517 } 3518 vn_unlock(vp); 3519 done: 3520 fdrop(fp); 3521 return (error); 3522 } 3523 3524 /* 3525 * ftruncate_args(int fd, int pad, off_t length) 3526 * 3527 * Truncate a file given a file descriptor. 3528 */ 3529 int 3530 sys_ftruncate(struct ftruncate_args *uap) 3531 { 3532 int error; 3533 3534 error = kern_ftruncate(uap->fd, uap->length); 3535 3536 return (error); 3537 } 3538 3539 /* 3540 * fsync(int fd) 3541 * 3542 * Sync an open file. 3543 */ 3544 int 3545 sys_fsync(struct fsync_args *uap) 3546 { 3547 struct thread *td = curthread; 3548 struct proc *p = td->td_proc; 3549 struct vnode *vp; 3550 struct file *fp; 3551 vm_object_t obj; 3552 int error; 3553 3554 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3555 return (error); 3556 vp = (struct vnode *)fp->f_data; 3557 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3558 if ((obj = vp->v_object) != NULL) 3559 vm_object_page_clean(obj, 0, 0, 0); 3560 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3561 if (error == 0 && vp->v_mount) 3562 error = buf_fsync(vp); 3563 vn_unlock(vp); 3564 fdrop(fp); 3565 3566 return (error); 3567 } 3568 3569 int 3570 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3571 { 3572 struct nchandle fnchd; 3573 struct nchandle tnchd; 3574 struct namecache *ncp; 3575 struct vnode *fdvp; 3576 struct vnode *tdvp; 3577 struct mount *mp; 3578 int error; 3579 3580 bwillinode(1); 3581 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3582 if ((error = nlookup(fromnd)) != 0) 3583 return (error); 3584 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3585 return (ENOENT); 3586 fnchd.mount = fromnd->nl_nch.mount; 3587 cache_hold(&fnchd); 3588 3589 /* 3590 * unlock the source nch so we can lookup the target nch without 3591 * deadlocking. The target may or may not exist so we do not check 3592 * for a target vp like kern_mkdir() and other creation functions do. 3593 * 3594 * The source and target directories are ref'd and rechecked after 3595 * everything is relocked to determine if the source or target file 3596 * has been renamed. 3597 */ 3598 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3599 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3600 cache_unlock(&fromnd->nl_nch); 3601 3602 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 3603 if ((error = nlookup(tond)) != 0) { 3604 cache_drop(&fnchd); 3605 return (error); 3606 } 3607 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3608 cache_drop(&fnchd); 3609 return (ENOENT); 3610 } 3611 tnchd.mount = tond->nl_nch.mount; 3612 cache_hold(&tnchd); 3613 3614 /* 3615 * If the source and target are the same there is nothing to do 3616 */ 3617 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3618 cache_drop(&fnchd); 3619 cache_drop(&tnchd); 3620 return (0); 3621 } 3622 3623 /* 3624 * Mount points cannot be renamed or overwritten 3625 */ 3626 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3627 NCF_ISMOUNTPT 3628 ) { 3629 cache_drop(&fnchd); 3630 cache_drop(&tnchd); 3631 return (EINVAL); 3632 } 3633 3634 /* 3635 * Relock the source ncp. cache_relock() will deal with any 3636 * deadlocks against the already-locked tond and will also 3637 * make sure both are resolved. 3638 * 3639 * NOTE AFTER RELOCKING: The source or target ncp may have become 3640 * invalid while they were unlocked, nc_vp and nc_mount could 3641 * be NULL. 3642 */ 3643 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 3644 &tond->nl_nch, tond->nl_cred); 3645 fromnd->nl_flags |= NLC_NCPISLOCKED; 3646 3647 /* 3648 * make sure the parent directories linkages are the same 3649 */ 3650 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3651 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3652 cache_drop(&fnchd); 3653 cache_drop(&tnchd); 3654 return (ENOENT); 3655 } 3656 3657 /* 3658 * Both the source and target must be within the same filesystem and 3659 * in the same filesystem as their parent directories within the 3660 * namecache topology. 3661 * 3662 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3663 */ 3664 mp = fnchd.mount; 3665 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3666 mp != tond->nl_nch.mount) { 3667 cache_drop(&fnchd); 3668 cache_drop(&tnchd); 3669 return (EXDEV); 3670 } 3671 3672 /* 3673 * Make sure the mount point is writable 3674 */ 3675 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3676 cache_drop(&fnchd); 3677 cache_drop(&tnchd); 3678 return (error); 3679 } 3680 3681 /* 3682 * If the target exists and either the source or target is a directory, 3683 * then both must be directories. 3684 * 3685 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3686 * have become NULL. 3687 */ 3688 if (tond->nl_nch.ncp->nc_vp) { 3689 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3690 error = ENOENT; 3691 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3692 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3693 error = ENOTDIR; 3694 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3695 error = EISDIR; 3696 } 3697 } 3698 3699 /* 3700 * You cannot rename a source into itself or a subdirectory of itself. 3701 * We check this by travsersing the target directory upwards looking 3702 * for a match against the source. 3703 * 3704 * XXX MPSAFE 3705 */ 3706 if (error == 0) { 3707 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3708 if (fromnd->nl_nch.ncp == ncp) { 3709 error = EINVAL; 3710 break; 3711 } 3712 } 3713 } 3714 3715 cache_drop(&fnchd); 3716 cache_drop(&tnchd); 3717 3718 /* 3719 * Even though the namespaces are different, they may still represent 3720 * hardlinks to the same file. The filesystem might have a hard time 3721 * with this so we issue a NREMOVE of the source instead of a NRENAME 3722 * when we detect the situation. 3723 */ 3724 if (error == 0) { 3725 fdvp = fromnd->nl_dvp; 3726 tdvp = tond->nl_dvp; 3727 if (fdvp == NULL || tdvp == NULL) { 3728 error = EPERM; 3729 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3730 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3731 fromnd->nl_cred); 3732 } else { 3733 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3734 fdvp, tdvp, tond->nl_cred); 3735 } 3736 } 3737 return (error); 3738 } 3739 3740 /* 3741 * rename_args(char *from, char *to) 3742 * 3743 * Rename files. Source and destination must either both be directories, 3744 * or both not be directories. If target is a directory, it must be empty. 3745 */ 3746 int 3747 sys_rename(struct rename_args *uap) 3748 { 3749 struct nlookupdata fromnd, tond; 3750 int error; 3751 3752 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3753 if (error == 0) { 3754 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3755 if (error == 0) 3756 error = kern_rename(&fromnd, &tond); 3757 nlookup_done(&tond); 3758 } 3759 nlookup_done(&fromnd); 3760 return (error); 3761 } 3762 3763 /* 3764 * renameat_args(int oldfd, char *old, int newfd, char *new) 3765 * 3766 * Rename files using paths relative to the directories associated with 3767 * oldfd and newfd. Source and destination must either both be directories, 3768 * or both not be directories. If target is a directory, it must be empty. 3769 */ 3770 int 3771 sys_renameat(struct renameat_args *uap) 3772 { 3773 struct nlookupdata oldnd, newnd; 3774 struct file *oldfp, *newfp; 3775 int error; 3776 3777 error = nlookup_init_at(&oldnd, &oldfp, uap->oldfd, uap->old, 3778 UIO_USERSPACE, 0); 3779 if (error == 0) { 3780 error = nlookup_init_at(&newnd, &newfp, uap->newfd, uap->new, 3781 UIO_USERSPACE, 0); 3782 if (error == 0) 3783 error = kern_rename(&oldnd, &newnd); 3784 nlookup_done_at(&newnd, newfp); 3785 } 3786 nlookup_done_at(&oldnd, oldfp); 3787 return (error); 3788 } 3789 3790 int 3791 kern_mkdir(struct nlookupdata *nd, int mode) 3792 { 3793 struct thread *td = curthread; 3794 struct proc *p = td->td_proc; 3795 struct vnode *vp; 3796 struct vattr vattr; 3797 int error; 3798 3799 bwillinode(1); 3800 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 3801 if ((error = nlookup(nd)) != 0) 3802 return (error); 3803 3804 if (nd->nl_nch.ncp->nc_vp) 3805 return (EEXIST); 3806 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3807 return (error); 3808 VATTR_NULL(&vattr); 3809 vattr.va_type = VDIR; 3810 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 3811 3812 vp = NULL; 3813 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 3814 if (error == 0) 3815 vput(vp); 3816 return (error); 3817 } 3818 3819 /* 3820 * mkdir_args(char *path, int mode) 3821 * 3822 * Make a directory file. 3823 */ 3824 int 3825 sys_mkdir(struct mkdir_args *uap) 3826 { 3827 struct nlookupdata nd; 3828 int error; 3829 3830 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3831 if (error == 0) 3832 error = kern_mkdir(&nd, uap->mode); 3833 nlookup_done(&nd); 3834 return (error); 3835 } 3836 3837 /* 3838 * mkdirat_args(int fd, char *path, mode_t mode) 3839 * 3840 * Make a directory file. The path is relative to the directory associated 3841 * with fd. 3842 */ 3843 int 3844 sys_mkdirat(struct mkdirat_args *uap) 3845 { 3846 struct nlookupdata nd; 3847 struct file *fp; 3848 int error; 3849 3850 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3851 if (error == 0) 3852 error = kern_mkdir(&nd, uap->mode); 3853 nlookup_done_at(&nd, fp); 3854 return (error); 3855 } 3856 3857 int 3858 kern_rmdir(struct nlookupdata *nd) 3859 { 3860 int error; 3861 3862 bwillinode(1); 3863 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 3864 if ((error = nlookup(nd)) != 0) 3865 return (error); 3866 3867 /* 3868 * Do not allow directories representing mount points to be 3869 * deleted, even if empty. Check write perms on mount point 3870 * in case the vnode is aliased (aka nullfs). 3871 */ 3872 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 3873 return (EINVAL); 3874 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3875 return (error); 3876 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 3877 return (error); 3878 } 3879 3880 /* 3881 * rmdir_args(char *path) 3882 * 3883 * Remove a directory file. 3884 */ 3885 int 3886 sys_rmdir(struct rmdir_args *uap) 3887 { 3888 struct nlookupdata nd; 3889 int error; 3890 3891 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3892 if (error == 0) 3893 error = kern_rmdir(&nd); 3894 nlookup_done(&nd); 3895 return (error); 3896 } 3897 3898 int 3899 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 3900 enum uio_seg direction) 3901 { 3902 struct thread *td = curthread; 3903 struct proc *p = td->td_proc; 3904 struct vnode *vp; 3905 struct file *fp; 3906 struct uio auio; 3907 struct iovec aiov; 3908 off_t loff; 3909 int error, eofflag; 3910 3911 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3912 return (error); 3913 if ((fp->f_flag & FREAD) == 0) { 3914 error = EBADF; 3915 goto done; 3916 } 3917 vp = (struct vnode *)fp->f_data; 3918 unionread: 3919 if (vp->v_type != VDIR) { 3920 error = EINVAL; 3921 goto done; 3922 } 3923 aiov.iov_base = buf; 3924 aiov.iov_len = count; 3925 auio.uio_iov = &aiov; 3926 auio.uio_iovcnt = 1; 3927 auio.uio_rw = UIO_READ; 3928 auio.uio_segflg = direction; 3929 auio.uio_td = td; 3930 auio.uio_resid = count; 3931 loff = auio.uio_offset = fp->f_offset; 3932 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 3933 fp->f_offset = auio.uio_offset; 3934 if (error) 3935 goto done; 3936 if (count == auio.uio_resid) { 3937 if (union_dircheckp) { 3938 error = union_dircheckp(td, &vp, fp); 3939 if (error == -1) 3940 goto unionread; 3941 if (error) 3942 goto done; 3943 } 3944 #if 0 3945 if ((vp->v_flag & VROOT) && 3946 (vp->v_mount->mnt_flag & MNT_UNION)) { 3947 struct vnode *tvp = vp; 3948 vp = vp->v_mount->mnt_vnodecovered; 3949 vref(vp); 3950 fp->f_data = vp; 3951 fp->f_offset = 0; 3952 vrele(tvp); 3953 goto unionread; 3954 } 3955 #endif 3956 } 3957 3958 /* 3959 * WARNING! *basep may not be wide enough to accomodate the 3960 * seek offset. XXX should we hack this to return the upper 32 bits 3961 * for offsets greater then 4G? 3962 */ 3963 if (basep) { 3964 *basep = (long)loff; 3965 } 3966 *res = count - auio.uio_resid; 3967 done: 3968 fdrop(fp); 3969 return (error); 3970 } 3971 3972 /* 3973 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 3974 * 3975 * Read a block of directory entries in a file system independent format. 3976 */ 3977 int 3978 sys_getdirentries(struct getdirentries_args *uap) 3979 { 3980 long base; 3981 int error; 3982 3983 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 3984 &uap->sysmsg_result, UIO_USERSPACE); 3985 3986 if (error == 0 && uap->basep) 3987 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 3988 return (error); 3989 } 3990 3991 /* 3992 * getdents_args(int fd, char *buf, size_t count) 3993 */ 3994 int 3995 sys_getdents(struct getdents_args *uap) 3996 { 3997 int error; 3998 3999 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4000 &uap->sysmsg_result, UIO_USERSPACE); 4001 4002 return (error); 4003 } 4004 4005 /* 4006 * Set the mode mask for creation of filesystem nodes. 4007 * 4008 * umask(int newmask) 4009 */ 4010 int 4011 sys_umask(struct umask_args *uap) 4012 { 4013 struct thread *td = curthread; 4014 struct proc *p = td->td_proc; 4015 struct filedesc *fdp; 4016 4017 fdp = p->p_fd; 4018 uap->sysmsg_result = fdp->fd_cmask; 4019 fdp->fd_cmask = uap->newmask & ALLPERMS; 4020 return (0); 4021 } 4022 4023 /* 4024 * revoke(char *path) 4025 * 4026 * Void all references to file by ripping underlying filesystem 4027 * away from vnode. 4028 */ 4029 int 4030 sys_revoke(struct revoke_args *uap) 4031 { 4032 struct nlookupdata nd; 4033 struct vattr vattr; 4034 struct vnode *vp; 4035 struct ucred *cred; 4036 int error; 4037 4038 vp = NULL; 4039 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4040 if (error == 0) 4041 error = nlookup(&nd); 4042 if (error == 0) 4043 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4044 cred = crhold(nd.nl_cred); 4045 nlookup_done(&nd); 4046 if (error == 0) { 4047 if (error == 0) 4048 error = VOP_GETATTR(vp, &vattr); 4049 if (error == 0 && cred->cr_uid != vattr.va_uid) 4050 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4051 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4052 if (vcount(vp) > 0) 4053 error = vrevoke(vp, cred); 4054 } else if (error == 0) { 4055 error = vrevoke(vp, cred); 4056 } 4057 vrele(vp); 4058 } 4059 if (cred) 4060 crfree(cred); 4061 return (error); 4062 } 4063 4064 /* 4065 * getfh_args(char *fname, fhandle_t *fhp) 4066 * 4067 * Get (NFS) file handle 4068 * 4069 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4070 * mount. This allows nullfs mounts to be explicitly exported. 4071 * 4072 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4073 * 4074 * nullfs mounts of subdirectories are not safe. That is, it will 4075 * work, but you do not really have protection against access to 4076 * the related parent directories. 4077 */ 4078 int 4079 sys_getfh(struct getfh_args *uap) 4080 { 4081 struct thread *td = curthread; 4082 struct nlookupdata nd; 4083 fhandle_t fh; 4084 struct vnode *vp; 4085 struct mount *mp; 4086 int error; 4087 4088 /* 4089 * Must be super user 4090 */ 4091 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4092 return (error); 4093 4094 vp = NULL; 4095 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4096 if (error == 0) 4097 error = nlookup(&nd); 4098 if (error == 0) 4099 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4100 mp = nd.nl_nch.mount; 4101 nlookup_done(&nd); 4102 if (error == 0) { 4103 bzero(&fh, sizeof(fh)); 4104 fh.fh_fsid = mp->mnt_stat.f_fsid; 4105 error = VFS_VPTOFH(vp, &fh.fh_fid); 4106 vput(vp); 4107 if (error == 0) 4108 error = copyout(&fh, uap->fhp, sizeof(fh)); 4109 } 4110 return (error); 4111 } 4112 4113 /* 4114 * fhopen_args(const struct fhandle *u_fhp, int flags) 4115 * 4116 * syscall for the rpc.lockd to use to translate a NFS file handle into 4117 * an open descriptor. 4118 * 4119 * warning: do not remove the priv_check() call or this becomes one giant 4120 * security hole. 4121 */ 4122 int 4123 sys_fhopen(struct fhopen_args *uap) 4124 { 4125 struct thread *td = curthread; 4126 struct filedesc *fdp = td->td_proc->p_fd; 4127 struct mount *mp; 4128 struct vnode *vp; 4129 struct fhandle fhp; 4130 struct vattr vat; 4131 struct vattr *vap = &vat; 4132 struct flock lf; 4133 int fmode, mode, error, type; 4134 struct file *nfp; 4135 struct file *fp; 4136 int indx; 4137 4138 /* 4139 * Must be super user 4140 */ 4141 error = priv_check(td, PRIV_ROOT); 4142 if (error) 4143 return (error); 4144 4145 fmode = FFLAGS(uap->flags); 4146 4147 /* 4148 * Why not allow a non-read/write open for our lockd? 4149 */ 4150 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4151 return (EINVAL); 4152 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4153 if (error) 4154 return(error); 4155 4156 /* 4157 * Find the mount point 4158 */ 4159 mp = vfs_getvfs(&fhp.fh_fsid); 4160 if (mp == NULL) { 4161 error = ESTALE; 4162 goto done; 4163 } 4164 /* now give me my vnode, it gets returned to me locked */ 4165 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4166 if (error) 4167 goto done; 4168 /* 4169 * from now on we have to make sure not 4170 * to forget about the vnode 4171 * any error that causes an abort must vput(vp) 4172 * just set error = err and 'goto bad;'. 4173 */ 4174 4175 /* 4176 * from vn_open 4177 */ 4178 if (vp->v_type == VLNK) { 4179 error = EMLINK; 4180 goto bad; 4181 } 4182 if (vp->v_type == VSOCK) { 4183 error = EOPNOTSUPP; 4184 goto bad; 4185 } 4186 mode = 0; 4187 if (fmode & (FWRITE | O_TRUNC)) { 4188 if (vp->v_type == VDIR) { 4189 error = EISDIR; 4190 goto bad; 4191 } 4192 error = vn_writechk(vp, NULL); 4193 if (error) 4194 goto bad; 4195 mode |= VWRITE; 4196 } 4197 if (fmode & FREAD) 4198 mode |= VREAD; 4199 if (mode) { 4200 error = VOP_ACCESS(vp, mode, td->td_ucred); 4201 if (error) 4202 goto bad; 4203 } 4204 if (fmode & O_TRUNC) { 4205 vn_unlock(vp); /* XXX */ 4206 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4207 VATTR_NULL(vap); 4208 vap->va_size = 0; 4209 error = VOP_SETATTR(vp, vap, td->td_ucred); 4210 if (error) 4211 goto bad; 4212 } 4213 4214 /* 4215 * VOP_OPEN needs the file pointer so it can potentially override 4216 * it. 4217 * 4218 * WARNING! no f_nchandle will be associated when fhopen()ing a 4219 * directory. XXX 4220 */ 4221 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4222 goto bad; 4223 fp = nfp; 4224 4225 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4226 if (error) { 4227 /* 4228 * setting f_ops this way prevents VOP_CLOSE from being 4229 * called or fdrop() releasing the vp from v_data. Since 4230 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4231 */ 4232 fp->f_ops = &badfileops; 4233 fp->f_data = NULL; 4234 goto bad_drop; 4235 } 4236 4237 /* 4238 * The fp is given its own reference, we still have our ref and lock. 4239 * 4240 * Assert that all regular files must be created with a VM object. 4241 */ 4242 if (vp->v_type == VREG && vp->v_object == NULL) { 4243 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 4244 goto bad_drop; 4245 } 4246 4247 /* 4248 * The open was successful. Handle any locking requirements. 4249 */ 4250 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4251 lf.l_whence = SEEK_SET; 4252 lf.l_start = 0; 4253 lf.l_len = 0; 4254 if (fmode & O_EXLOCK) 4255 lf.l_type = F_WRLCK; 4256 else 4257 lf.l_type = F_RDLCK; 4258 if (fmode & FNONBLOCK) 4259 type = 0; 4260 else 4261 type = F_WAIT; 4262 vn_unlock(vp); 4263 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 4264 /* 4265 * release our private reference. 4266 */ 4267 fsetfd(fdp, NULL, indx); 4268 fdrop(fp); 4269 vrele(vp); 4270 goto done; 4271 } 4272 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4273 fp->f_flag |= FHASLOCK; 4274 } 4275 4276 /* 4277 * Clean up. Associate the file pointer with the previously 4278 * reserved descriptor and return it. 4279 */ 4280 vput(vp); 4281 fsetfd(fdp, fp, indx); 4282 fdrop(fp); 4283 uap->sysmsg_result = indx; 4284 return (0); 4285 4286 bad_drop: 4287 fsetfd(fdp, NULL, indx); 4288 fdrop(fp); 4289 bad: 4290 vput(vp); 4291 done: 4292 return (error); 4293 } 4294 4295 /* 4296 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4297 */ 4298 int 4299 sys_fhstat(struct fhstat_args *uap) 4300 { 4301 struct thread *td = curthread; 4302 struct stat sb; 4303 fhandle_t fh; 4304 struct mount *mp; 4305 struct vnode *vp; 4306 int error; 4307 4308 /* 4309 * Must be super user 4310 */ 4311 error = priv_check(td, PRIV_ROOT); 4312 if (error) 4313 return (error); 4314 4315 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4316 if (error) 4317 return (error); 4318 4319 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4320 error = ESTALE; 4321 if (error == 0) { 4322 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4323 error = vn_stat(vp, &sb, td->td_ucred); 4324 vput(vp); 4325 } 4326 } 4327 if (error == 0) 4328 error = copyout(&sb, uap->sb, sizeof(sb)); 4329 return (error); 4330 } 4331 4332 /* 4333 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4334 */ 4335 int 4336 sys_fhstatfs(struct fhstatfs_args *uap) 4337 { 4338 struct thread *td = curthread; 4339 struct proc *p = td->td_proc; 4340 struct statfs *sp; 4341 struct mount *mp; 4342 struct vnode *vp; 4343 struct statfs sb; 4344 char *fullpath, *freepath; 4345 fhandle_t fh; 4346 int error; 4347 4348 /* 4349 * Must be super user 4350 */ 4351 if ((error = priv_check(td, PRIV_ROOT))) 4352 return (error); 4353 4354 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4355 return (error); 4356 4357 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4358 error = ESTALE; 4359 goto done; 4360 } 4361 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4362 error = ESTALE; 4363 goto done; 4364 } 4365 4366 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4367 goto done; 4368 mp = vp->v_mount; 4369 sp = &mp->mnt_stat; 4370 vput(vp); 4371 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4372 goto done; 4373 4374 error = mount_path(p, mp, &fullpath, &freepath); 4375 if (error) 4376 goto done; 4377 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4378 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4379 kfree(freepath, M_TEMP); 4380 4381 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4382 if (priv_check(td, PRIV_ROOT)) { 4383 bcopy(sp, &sb, sizeof(sb)); 4384 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4385 sp = &sb; 4386 } 4387 error = copyout(sp, uap->buf, sizeof(*sp)); 4388 done: 4389 return (error); 4390 } 4391 4392 /* 4393 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4394 */ 4395 int 4396 sys_fhstatvfs(struct fhstatvfs_args *uap) 4397 { 4398 struct thread *td = curthread; 4399 struct proc *p = td->td_proc; 4400 struct statvfs *sp; 4401 struct mount *mp; 4402 struct vnode *vp; 4403 fhandle_t fh; 4404 int error; 4405 4406 /* 4407 * Must be super user 4408 */ 4409 if ((error = priv_check(td, PRIV_ROOT))) 4410 return (error); 4411 4412 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4413 return (error); 4414 4415 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4416 error = ESTALE; 4417 goto done; 4418 } 4419 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4420 error = ESTALE; 4421 goto done; 4422 } 4423 4424 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4425 goto done; 4426 mp = vp->v_mount; 4427 sp = &mp->mnt_vstat; 4428 vput(vp); 4429 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4430 goto done; 4431 4432 sp->f_flag = 0; 4433 if (mp->mnt_flag & MNT_RDONLY) 4434 sp->f_flag |= ST_RDONLY; 4435 if (mp->mnt_flag & MNT_NOSUID) 4436 sp->f_flag |= ST_NOSUID; 4437 error = copyout(sp, uap->buf, sizeof(*sp)); 4438 done: 4439 return (error); 4440 } 4441 4442 4443 /* 4444 * Syscall to push extended attribute configuration information into the 4445 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4446 * a command (int cmd), and attribute name and misc data. For now, the 4447 * attribute name is left in userspace for consumption by the VFS_op. 4448 * It will probably be changed to be copied into sysspace by the 4449 * syscall in the future, once issues with various consumers of the 4450 * attribute code have raised their hands. 4451 * 4452 * Currently this is used only by UFS Extended Attributes. 4453 */ 4454 int 4455 sys_extattrctl(struct extattrctl_args *uap) 4456 { 4457 struct nlookupdata nd; 4458 struct vnode *vp; 4459 char attrname[EXTATTR_MAXNAMELEN]; 4460 int error; 4461 size_t size; 4462 4463 attrname[0] = 0; 4464 vp = NULL; 4465 error = 0; 4466 4467 if (error == 0 && uap->filename) { 4468 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4469 NLC_FOLLOW); 4470 if (error == 0) 4471 error = nlookup(&nd); 4472 if (error == 0) 4473 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4474 nlookup_done(&nd); 4475 } 4476 4477 if (error == 0 && uap->attrname) { 4478 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4479 &size); 4480 } 4481 4482 if (error == 0) { 4483 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4484 if (error == 0) 4485 error = nlookup(&nd); 4486 if (error == 0) 4487 error = ncp_writechk(&nd.nl_nch); 4488 if (error == 0) { 4489 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4490 uap->attrnamespace, 4491 uap->attrname, nd.nl_cred); 4492 } 4493 nlookup_done(&nd); 4494 } 4495 4496 return (error); 4497 } 4498 4499 /* 4500 * Syscall to get a named extended attribute on a file or directory. 4501 */ 4502 int 4503 sys_extattr_set_file(struct extattr_set_file_args *uap) 4504 { 4505 char attrname[EXTATTR_MAXNAMELEN]; 4506 struct nlookupdata nd; 4507 struct vnode *vp; 4508 struct uio auio; 4509 struct iovec aiov; 4510 int error; 4511 4512 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4513 if (error) 4514 return (error); 4515 4516 vp = NULL; 4517 4518 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4519 if (error == 0) 4520 error = nlookup(&nd); 4521 if (error == 0) 4522 error = ncp_writechk(&nd.nl_nch); 4523 if (error == 0) 4524 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4525 if (error) { 4526 nlookup_done(&nd); 4527 return (error); 4528 } 4529 4530 bzero(&auio, sizeof(auio)); 4531 aiov.iov_base = uap->data; 4532 aiov.iov_len = uap->nbytes; 4533 auio.uio_iov = &aiov; 4534 auio.uio_iovcnt = 1; 4535 auio.uio_offset = 0; 4536 auio.uio_resid = uap->nbytes; 4537 auio.uio_rw = UIO_WRITE; 4538 auio.uio_td = curthread; 4539 4540 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4541 &auio, nd.nl_cred); 4542 4543 vput(vp); 4544 nlookup_done(&nd); 4545 return (error); 4546 } 4547 4548 /* 4549 * Syscall to get a named extended attribute on a file or directory. 4550 */ 4551 int 4552 sys_extattr_get_file(struct extattr_get_file_args *uap) 4553 { 4554 char attrname[EXTATTR_MAXNAMELEN]; 4555 struct nlookupdata nd; 4556 struct uio auio; 4557 struct iovec aiov; 4558 struct vnode *vp; 4559 int error; 4560 4561 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4562 if (error) 4563 return (error); 4564 4565 vp = NULL; 4566 4567 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4568 if (error == 0) 4569 error = nlookup(&nd); 4570 if (error == 0) 4571 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4572 if (error) { 4573 nlookup_done(&nd); 4574 return (error); 4575 } 4576 4577 bzero(&auio, sizeof(auio)); 4578 aiov.iov_base = uap->data; 4579 aiov.iov_len = uap->nbytes; 4580 auio.uio_iov = &aiov; 4581 auio.uio_iovcnt = 1; 4582 auio.uio_offset = 0; 4583 auio.uio_resid = uap->nbytes; 4584 auio.uio_rw = UIO_READ; 4585 auio.uio_td = curthread; 4586 4587 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 4588 &auio, nd.nl_cred); 4589 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 4590 4591 vput(vp); 4592 nlookup_done(&nd); 4593 return(error); 4594 } 4595 4596 /* 4597 * Syscall to delete a named extended attribute from a file or directory. 4598 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4599 */ 4600 int 4601 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4602 { 4603 char attrname[EXTATTR_MAXNAMELEN]; 4604 struct nlookupdata nd; 4605 struct vnode *vp; 4606 int error; 4607 4608 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4609 if (error) 4610 return(error); 4611 4612 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4613 if (error == 0) 4614 error = nlookup(&nd); 4615 if (error == 0) 4616 error = ncp_writechk(&nd.nl_nch); 4617 if (error == 0) { 4618 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4619 if (error == 0) { 4620 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 4621 attrname, NULL, nd.nl_cred); 4622 vput(vp); 4623 } 4624 } 4625 nlookup_done(&nd); 4626 return(error); 4627 } 4628 4629 /* 4630 * Determine if the mount is visible to the process. 4631 */ 4632 static int 4633 chroot_visible_mnt(struct mount *mp, struct proc *p) 4634 { 4635 struct nchandle nch; 4636 4637 /* 4638 * Traverse from the mount point upwards. If we hit the process 4639 * root then the mount point is visible to the process. 4640 */ 4641 nch = mp->mnt_ncmountpt; 4642 while (nch.ncp) { 4643 if (nch.mount == p->p_fd->fd_nrdir.mount && 4644 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4645 return(1); 4646 } 4647 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4648 nch = nch.mount->mnt_ncmounton; 4649 } else { 4650 nch.ncp = nch.ncp->nc_parent; 4651 } 4652 } 4653 4654 /* 4655 * If the mount point is not visible to the process, but the 4656 * process root is in a subdirectory of the mount, return 4657 * TRUE anyway. 4658 */ 4659 if (p->p_fd->fd_nrdir.mount == mp) 4660 return(1); 4661 4662 return(0); 4663 } 4664 4665