1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 #include <sys/mplock2.h> 71 72 #include <vm/vm.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 76 #include <machine/limits.h> 77 #include <machine/stdarg.h> 78 79 static void mount_warning(struct mount *mp, const char *ctl, ...) 80 __printflike(2, 3); 81 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 82 static int checkvp_chdir (struct vnode *vn, struct thread *td); 83 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 84 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 85 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 86 static int getutimes (struct timeval *, struct timespec *); 87 static int getutimens (const struct timespec *, struct timespec *, int *); 88 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 89 static int setfmode (struct vnode *, int); 90 static int setfflags (struct vnode *, int); 91 static int setutimes (struct vnode *, struct vattr *, 92 const struct timespec *, int); 93 static int usermount = 0; /* if 1, non-root can mount fs. */ 94 95 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 96 "Allow non-root users to mount filesystems"); 97 98 /* 99 * Virtual File System System Calls 100 */ 101 102 /* 103 * Mount a file system. 104 * 105 * mount_args(char *type, char *path, int flags, caddr_t data) 106 * 107 * MPALMOSTSAFE 108 */ 109 int 110 sys_mount(struct mount_args *uap) 111 { 112 struct thread *td = curthread; 113 struct vnode *vp; 114 struct nchandle nch; 115 struct mount *mp, *nullmp; 116 struct vfsconf *vfsp; 117 int error, flag = 0, flag2 = 0; 118 int hasmount; 119 struct vattr va; 120 struct nlookupdata nd; 121 char fstypename[MFSNAMELEN]; 122 struct ucred *cred; 123 124 cred = td->td_ucred; 125 if (jailed(cred)) { 126 error = EPERM; 127 goto done; 128 } 129 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 130 goto done; 131 132 /* 133 * Do not allow NFS export by non-root users. 134 */ 135 if (uap->flags & MNT_EXPORTED) { 136 error = priv_check(td, PRIV_ROOT); 137 if (error) 138 goto done; 139 } 140 /* 141 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 142 */ 143 if (priv_check(td, PRIV_ROOT)) 144 uap->flags |= MNT_NOSUID | MNT_NODEV; 145 146 /* 147 * Lookup the requested path and extract the nch and vnode. 148 */ 149 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 150 if (error == 0) { 151 if ((error = nlookup(&nd)) == 0) { 152 if (nd.nl_nch.ncp->nc_vp == NULL) 153 error = ENOENT; 154 } 155 } 156 if (error) { 157 nlookup_done(&nd); 158 goto done; 159 } 160 161 /* 162 * If the target filesystem is resolved via a nullfs mount, then 163 * nd.nl_nch.mount will be pointing to the nullfs mount structure 164 * instead of the target file system. We need it in case we are 165 * doing an update. 166 */ 167 nullmp = nd.nl_nch.mount; 168 169 /* 170 * Extract the locked+refd ncp and cleanup the nd structure 171 */ 172 nch = nd.nl_nch; 173 cache_zero(&nd.nl_nch); 174 nlookup_done(&nd); 175 176 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 177 (mp = cache_findmount(&nch)) != NULL) { 178 cache_dropmount(mp); 179 hasmount = 1; 180 } else { 181 hasmount = 0; 182 } 183 184 185 /* 186 * now we have the locked ref'd nch and unreferenced vnode. 187 */ 188 vp = nch.ncp->nc_vp; 189 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 190 cache_put(&nch); 191 goto done; 192 } 193 cache_unlock(&nch); 194 195 /* 196 * Extract the file system type. We need to know this early, to take 197 * appropriate actions if we are dealing with a nullfs. 198 */ 199 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 200 cache_drop(&nch); 201 vput(vp); 202 goto done; 203 } 204 205 /* 206 * Now we have an unlocked ref'd nch and a locked ref'd vp 207 */ 208 if (uap->flags & MNT_UPDATE) { 209 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 210 cache_drop(&nch); 211 vput(vp); 212 error = EINVAL; 213 goto done; 214 } 215 216 if (strncmp(fstypename, "null", 5) == 0) { 217 KKASSERT(nullmp); 218 mp = nullmp; 219 } else { 220 mp = vp->v_mount; 221 } 222 223 flag = mp->mnt_flag; 224 flag2 = mp->mnt_kern_flag; 225 /* 226 * We only allow the filesystem to be reloaded if it 227 * is currently mounted read-only. 228 */ 229 if ((uap->flags & MNT_RELOAD) && 230 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 231 cache_drop(&nch); 232 vput(vp); 233 error = EOPNOTSUPP; /* Needs translation */ 234 goto done; 235 } 236 /* 237 * Only root, or the user that did the original mount is 238 * permitted to update it. 239 */ 240 if (mp->mnt_stat.f_owner != cred->cr_uid && 241 (error = priv_check(td, PRIV_ROOT))) { 242 cache_drop(&nch); 243 vput(vp); 244 goto done; 245 } 246 if (vfs_busy(mp, LK_NOWAIT)) { 247 cache_drop(&nch); 248 vput(vp); 249 error = EBUSY; 250 goto done; 251 } 252 if (hasmount) { 253 cache_drop(&nch); 254 vfs_unbusy(mp); 255 vput(vp); 256 error = EBUSY; 257 goto done; 258 } 259 mp->mnt_flag |= 260 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 261 lwkt_gettoken(&mp->mnt_token); 262 vn_unlock(vp); 263 goto update; 264 } 265 266 /* 267 * If the user is not root, ensure that they own the directory 268 * onto which we are attempting to mount. 269 */ 270 if ((error = VOP_GETATTR(vp, &va)) || 271 (va.va_uid != cred->cr_uid && 272 (error = priv_check(td, PRIV_ROOT)))) { 273 cache_drop(&nch); 274 vput(vp); 275 goto done; 276 } 277 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 278 cache_drop(&nch); 279 vput(vp); 280 goto done; 281 } 282 if (vp->v_type != VDIR) { 283 cache_drop(&nch); 284 vput(vp); 285 error = ENOTDIR; 286 goto done; 287 } 288 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 289 cache_drop(&nch); 290 vput(vp); 291 error = EPERM; 292 goto done; 293 } 294 vfsp = vfsconf_find_by_name(fstypename); 295 if (vfsp == NULL) { 296 linker_file_t lf; 297 298 /* Only load modules for root (very important!) */ 299 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 300 cache_drop(&nch); 301 vput(vp); 302 goto done; 303 } 304 error = linker_load_file(fstypename, &lf); 305 if (error || lf == NULL) { 306 cache_drop(&nch); 307 vput(vp); 308 if (lf == NULL) 309 error = ENODEV; 310 goto done; 311 } 312 lf->userrefs++; 313 /* lookup again, see if the VFS was loaded */ 314 vfsp = vfsconf_find_by_name(fstypename); 315 if (vfsp == NULL) { 316 lf->userrefs--; 317 linker_file_unload(lf); 318 cache_drop(&nch); 319 vput(vp); 320 error = ENODEV; 321 goto done; 322 } 323 } 324 if (hasmount) { 325 cache_drop(&nch); 326 vput(vp); 327 error = EBUSY; 328 goto done; 329 } 330 331 /* 332 * Allocate and initialize the filesystem. 333 */ 334 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 335 mount_init(mp); 336 vfs_busy(mp, LK_NOWAIT); 337 mp->mnt_op = vfsp->vfc_vfsops; 338 mp->mnt_vfc = vfsp; 339 vfsp->vfc_refcount++; 340 mp->mnt_stat.f_type = vfsp->vfc_typenum; 341 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 342 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 343 mp->mnt_stat.f_owner = cred->cr_uid; 344 lwkt_gettoken(&mp->mnt_token); 345 vn_unlock(vp); 346 update: 347 /* 348 * (per-mount token acquired at this point) 349 * 350 * Set the mount level flags. 351 */ 352 if (uap->flags & MNT_RDONLY) 353 mp->mnt_flag |= MNT_RDONLY; 354 else if (mp->mnt_flag & MNT_RDONLY) 355 mp->mnt_kern_flag |= MNTK_WANTRDWR; 356 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 357 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 358 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 359 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 360 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 361 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 362 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 363 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 364 /* 365 * Mount the filesystem. 366 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 367 * get. 368 */ 369 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 370 if (mp->mnt_flag & MNT_UPDATE) { 371 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 372 mp->mnt_flag &= ~MNT_RDONLY; 373 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 374 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 375 if (error) { 376 mp->mnt_flag = flag; 377 mp->mnt_kern_flag = flag2; 378 } 379 lwkt_reltoken(&mp->mnt_token); 380 vfs_unbusy(mp); 381 vrele(vp); 382 cache_drop(&nch); 383 goto done; 384 } 385 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 386 387 /* 388 * Put the new filesystem on the mount list after root. The mount 389 * point gets its own mnt_ncmountpt (unless the VFS already set one 390 * up) which represents the root of the mount. The lookup code 391 * detects the mount point going forward and checks the root of 392 * the mount going backwards. 393 * 394 * It is not necessary to invalidate or purge the vnode underneath 395 * because elements under the mount will be given their own glue 396 * namecache record. 397 */ 398 if (!error) { 399 if (mp->mnt_ncmountpt.ncp == NULL) { 400 /* 401 * allocate, then unlock, but leave the ref intact 402 */ 403 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 404 cache_unlock(&mp->mnt_ncmountpt); 405 } 406 mp->mnt_ncmounton = nch; /* inherits ref */ 407 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 408 cache_ismounting(mp); 409 410 mountlist_insert(mp, MNTINS_LAST); 411 vn_unlock(vp); 412 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 413 error = vfs_allocate_syncvnode(mp); 414 lwkt_reltoken(&mp->mnt_token); 415 vfs_unbusy(mp); 416 error = VFS_START(mp, 0); 417 vrele(vp); 418 } else { 419 vn_syncer_thr_stop(mp); 420 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 421 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 422 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 423 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 424 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 425 mp->mnt_vfc->vfc_refcount--; 426 lwkt_reltoken(&mp->mnt_token); 427 vfs_unbusy(mp); 428 kfree(mp, M_MOUNT); 429 cache_drop(&nch); 430 vput(vp); 431 } 432 done: 433 return (error); 434 } 435 436 /* 437 * Scan all active processes to see if any of them have a current 438 * or root directory onto which the new filesystem has just been 439 * mounted. If so, replace them with the new mount point. 440 * 441 * Both old_nch and new_nch are ref'd on call but not locked. 442 * new_nch must be temporarily locked so it can be associated with the 443 * vnode representing the root of the mount point. 444 */ 445 struct checkdirs_info { 446 struct nchandle old_nch; 447 struct nchandle new_nch; 448 struct vnode *old_vp; 449 struct vnode *new_vp; 450 }; 451 452 static int checkdirs_callback(struct proc *p, void *data); 453 454 static void 455 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 456 { 457 struct checkdirs_info info; 458 struct vnode *olddp; 459 struct vnode *newdp; 460 struct mount *mp; 461 462 /* 463 * If the old mount point's vnode has a usecount of 1, it is not 464 * being held as a descriptor anywhere. 465 */ 466 olddp = old_nch->ncp->nc_vp; 467 if (olddp == NULL || VREFCNT(olddp) == 1) 468 return; 469 470 /* 471 * Force the root vnode of the new mount point to be resolved 472 * so we can update any matching processes. 473 */ 474 mp = new_nch->mount; 475 if (VFS_ROOT(mp, &newdp)) 476 panic("mount: lost mount"); 477 vn_unlock(newdp); 478 cache_lock(new_nch); 479 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 480 cache_setunresolved(new_nch); 481 cache_setvp(new_nch, newdp); 482 cache_unlock(new_nch); 483 484 /* 485 * Special handling of the root node 486 */ 487 if (rootvnode == olddp) { 488 vref(newdp); 489 vfs_cache_setroot(newdp, cache_hold(new_nch)); 490 } 491 492 /* 493 * Pass newdp separately so the callback does not have to access 494 * it via new_nch->ncp->nc_vp. 495 */ 496 info.old_nch = *old_nch; 497 info.new_nch = *new_nch; 498 info.new_vp = newdp; 499 allproc_scan(checkdirs_callback, &info); 500 vput(newdp); 501 } 502 503 /* 504 * NOTE: callback is not MP safe because the scanned process's filedesc 505 * structure can be ripped out from under us, amoung other things. 506 */ 507 static int 508 checkdirs_callback(struct proc *p, void *data) 509 { 510 struct checkdirs_info *info = data; 511 struct filedesc *fdp; 512 struct nchandle ncdrop1; 513 struct nchandle ncdrop2; 514 struct vnode *vprele1; 515 struct vnode *vprele2; 516 517 if ((fdp = p->p_fd) != NULL) { 518 cache_zero(&ncdrop1); 519 cache_zero(&ncdrop2); 520 vprele1 = NULL; 521 vprele2 = NULL; 522 523 /* 524 * MPUNSAFE - XXX fdp can be pulled out from under a 525 * foreign process. 526 * 527 * A shared filedesc is ok, we don't have to copy it 528 * because we are making this change globally. 529 */ 530 spin_lock(&fdp->fd_spin); 531 if (fdp->fd_ncdir.mount == info->old_nch.mount && 532 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 533 vprele1 = fdp->fd_cdir; 534 vref(info->new_vp); 535 fdp->fd_cdir = info->new_vp; 536 ncdrop1 = fdp->fd_ncdir; 537 cache_copy(&info->new_nch, &fdp->fd_ncdir); 538 } 539 if (fdp->fd_nrdir.mount == info->old_nch.mount && 540 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 541 vprele2 = fdp->fd_rdir; 542 vref(info->new_vp); 543 fdp->fd_rdir = info->new_vp; 544 ncdrop2 = fdp->fd_nrdir; 545 cache_copy(&info->new_nch, &fdp->fd_nrdir); 546 } 547 spin_unlock(&fdp->fd_spin); 548 if (ncdrop1.ncp) 549 cache_drop(&ncdrop1); 550 if (ncdrop2.ncp) 551 cache_drop(&ncdrop2); 552 if (vprele1) 553 vrele(vprele1); 554 if (vprele2) 555 vrele(vprele2); 556 } 557 return(0); 558 } 559 560 /* 561 * Unmount a file system. 562 * 563 * Note: unmount takes a path to the vnode mounted on as argument, 564 * not special file (as before). 565 * 566 * umount_args(char *path, int flags) 567 * 568 * MPALMOSTSAFE 569 */ 570 int 571 sys_unmount(struct unmount_args *uap) 572 { 573 struct thread *td = curthread; 574 struct proc *p __debugvar = td->td_proc; 575 struct mount *mp = NULL; 576 struct nlookupdata nd; 577 int error; 578 579 KKASSERT(p); 580 get_mplock(); 581 if (td->td_ucred->cr_prison != NULL) { 582 error = EPERM; 583 goto done; 584 } 585 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 586 goto done; 587 588 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 589 if (error == 0) 590 error = nlookup(&nd); 591 if (error) 592 goto out; 593 594 mp = nd.nl_nch.mount; 595 596 /* 597 * Only root, or the user that did the original mount is 598 * permitted to unmount this filesystem. 599 */ 600 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 601 (error = priv_check(td, PRIV_ROOT))) 602 goto out; 603 604 /* 605 * Don't allow unmounting the root file system. 606 */ 607 if (mp->mnt_flag & MNT_ROOTFS) { 608 error = EINVAL; 609 goto out; 610 } 611 612 /* 613 * Must be the root of the filesystem 614 */ 615 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 616 error = EINVAL; 617 goto out; 618 } 619 620 out: 621 nlookup_done(&nd); 622 if (error == 0) 623 error = dounmount(mp, uap->flags); 624 done: 625 rel_mplock(); 626 return (error); 627 } 628 629 /* 630 * Do the actual file system unmount. 631 */ 632 static int 633 dounmount_interlock(struct mount *mp) 634 { 635 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 636 return (EBUSY); 637 mp->mnt_kern_flag |= MNTK_UNMOUNT; 638 return(0); 639 } 640 641 static int 642 unmount_allproc_cb(struct proc *p, void *arg) 643 { 644 struct mount *mp; 645 646 if (p->p_textnch.ncp == NULL) 647 return 0; 648 649 mp = (struct mount *)arg; 650 if (p->p_textnch.mount == mp) 651 cache_drop(&p->p_textnch); 652 653 return 0; 654 } 655 656 int 657 dounmount(struct mount *mp, int flags) 658 { 659 struct namecache *ncp; 660 struct nchandle nch; 661 struct vnode *vp; 662 int error; 663 int async_flag; 664 int lflags; 665 int freeok = 1; 666 int retry; 667 668 lwkt_gettoken(&mp->mnt_token); 669 /* 670 * Exclusive access for unmounting purposes 671 */ 672 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 673 goto out; 674 675 /* 676 * Allow filesystems to detect that a forced unmount is in progress. 677 */ 678 if (flags & MNT_FORCE) 679 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 680 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 681 error = lockmgr(&mp->mnt_lock, lflags); 682 if (error) { 683 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 684 if (mp->mnt_kern_flag & MNTK_MWAIT) { 685 mp->mnt_kern_flag &= ~MNTK_MWAIT; 686 wakeup(mp); 687 } 688 goto out; 689 } 690 691 if (mp->mnt_flag & MNT_EXPUBLIC) 692 vfs_setpublicfs(NULL, NULL, NULL); 693 694 vfs_msync(mp, MNT_WAIT); 695 async_flag = mp->mnt_flag & MNT_ASYNC; 696 mp->mnt_flag &=~ MNT_ASYNC; 697 698 /* 699 * If this filesystem isn't aliasing other filesystems, 700 * try to invalidate any remaining namecache entries and 701 * check the count afterwords. 702 */ 703 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 704 cache_lock(&mp->mnt_ncmountpt); 705 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 706 cache_unlock(&mp->mnt_ncmountpt); 707 708 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 709 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 710 allproc_scan(&unmount_allproc_cb, mp); 711 } 712 713 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 714 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 715 716 if ((flags & MNT_FORCE) == 0) { 717 error = EBUSY; 718 mount_warning(mp, "Cannot unmount: " 719 "%d namecache " 720 "references still " 721 "present", 722 ncp->nc_refs - 1); 723 } else { 724 mount_warning(mp, "Forced unmount: " 725 "%d namecache " 726 "references still " 727 "present", 728 ncp->nc_refs - 1); 729 freeok = 0; 730 } 731 } 732 } 733 734 /* 735 * Decomission our special mnt_syncer vnode. This also stops 736 * the vnlru code. If we are unable to unmount we recommission 737 * the vnode. 738 * 739 * Then sync the filesystem. 740 */ 741 if ((vp = mp->mnt_syncer) != NULL) { 742 mp->mnt_syncer = NULL; 743 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 744 vrele(vp); 745 } 746 if ((mp->mnt_flag & MNT_RDONLY) == 0) 747 VFS_SYNC(mp, MNT_WAIT); 748 749 /* 750 * nchandle records ref the mount structure. Expect a count of 1 751 * (our mount->mnt_ncmountpt). 752 * 753 * Scans can get temporary refs on a mountpoint (thought really 754 * heavy duty stuff like cache_findmount() do not). 755 */ 756 for (retry = 0; retry < 10 && mp->mnt_refs != 1; ++retry) { 757 cache_unmounting(mp); 758 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 759 } 760 if (mp->mnt_refs != 1) { 761 if ((flags & MNT_FORCE) == 0) { 762 mount_warning(mp, "Cannot unmount: " 763 "%d mount refs still present", 764 mp->mnt_refs); 765 error = EBUSY; 766 } else { 767 mount_warning(mp, "Forced unmount: " 768 "%d mount refs still present", 769 mp->mnt_refs); 770 freeok = 0; 771 } 772 } 773 774 /* 775 * So far so good, sync the filesystem once more and 776 * call the VFS unmount code if the sync succeeds. 777 */ 778 if (error == 0) { 779 if (((mp->mnt_flag & MNT_RDONLY) || 780 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 781 (flags & MNT_FORCE)) { 782 error = VFS_UNMOUNT(mp, flags); 783 } 784 } 785 786 /* 787 * If an error occurred we can still recover, restoring the 788 * syncer vnode and misc flags. 789 */ 790 if (error) { 791 if (mp->mnt_syncer == NULL) 792 vfs_allocate_syncvnode(mp); 793 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 794 mp->mnt_flag |= async_flag; 795 lockmgr(&mp->mnt_lock, LK_RELEASE); 796 if (mp->mnt_kern_flag & MNTK_MWAIT) { 797 mp->mnt_kern_flag &= ~MNTK_MWAIT; 798 wakeup(mp); 799 } 800 goto out; 801 } 802 /* 803 * Clean up any journals still associated with the mount after 804 * filesystem activity has ceased. 805 */ 806 journal_remove_all_journals(mp, 807 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 808 809 mountlist_remove(mp); 810 811 /* 812 * Remove any installed vnode ops here so the individual VFSs don't 813 * have to. 814 */ 815 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 816 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 817 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 818 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 819 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 820 821 if (mp->mnt_ncmountpt.ncp != NULL) { 822 nch = mp->mnt_ncmountpt; 823 cache_zero(&mp->mnt_ncmountpt); 824 cache_clrmountpt(&nch); 825 cache_drop(&nch); 826 } 827 if (mp->mnt_ncmounton.ncp != NULL) { 828 cache_unmounting(mp); 829 nch = mp->mnt_ncmounton; 830 cache_zero(&mp->mnt_ncmounton); 831 cache_clrmountpt(&nch); 832 cache_drop(&nch); 833 } 834 835 mp->mnt_vfc->vfc_refcount--; 836 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 837 panic("unmount: dangling vnode"); 838 lockmgr(&mp->mnt_lock, LK_RELEASE); 839 if (mp->mnt_kern_flag & MNTK_MWAIT) { 840 mp->mnt_kern_flag &= ~MNTK_MWAIT; 841 wakeup(mp); 842 } 843 844 /* 845 * If we reach here and freeok != 0 we must free the mount. 846 * If refs > 1 cycle and wait, just in case someone tried 847 * to busy the mount after we decided to do the unmount. 848 */ 849 if (freeok) { 850 while (mp->mnt_refs > 1) { 851 cache_unmounting(mp); 852 wakeup(mp); 853 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 854 } 855 lwkt_reltoken(&mp->mnt_token); 856 kfree(mp, M_MOUNT); 857 mp = NULL; 858 } 859 error = 0; 860 out: 861 if (mp) 862 lwkt_reltoken(&mp->mnt_token); 863 return (error); 864 } 865 866 static 867 void 868 mount_warning(struct mount *mp, const char *ctl, ...) 869 { 870 char *ptr; 871 char *buf; 872 __va_list va; 873 874 __va_start(va, ctl); 875 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 876 &ptr, &buf, 0) == 0) { 877 kprintf("unmount(%s): ", ptr); 878 kvprintf(ctl, va); 879 kprintf("\n"); 880 kfree(buf, M_TEMP); 881 } else { 882 kprintf("unmount(%p", mp); 883 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 884 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 885 kprintf("): "); 886 kvprintf(ctl, va); 887 kprintf("\n"); 888 } 889 __va_end(va); 890 } 891 892 /* 893 * Shim cache_fullpath() to handle the case where a process is chrooted into 894 * a subdirectory of a mount. In this case if the root mount matches the 895 * process root directory's mount we have to specify the process's root 896 * directory instead of the mount point, because the mount point might 897 * be above the root directory. 898 */ 899 static 900 int 901 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 902 { 903 struct nchandle *nch; 904 905 if (p && p->p_fd->fd_nrdir.mount == mp) 906 nch = &p->p_fd->fd_nrdir; 907 else 908 nch = &mp->mnt_ncmountpt; 909 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 910 } 911 912 /* 913 * Sync each mounted filesystem. 914 */ 915 916 #ifdef DEBUG 917 static int syncprt = 0; 918 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 919 #endif /* DEBUG */ 920 921 static int sync_callback(struct mount *mp, void *data); 922 923 int 924 sys_sync(struct sync_args *uap) 925 { 926 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 927 return (0); 928 } 929 930 static 931 int 932 sync_callback(struct mount *mp, void *data __unused) 933 { 934 int asyncflag; 935 936 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 937 asyncflag = mp->mnt_flag & MNT_ASYNC; 938 mp->mnt_flag &= ~MNT_ASYNC; 939 vfs_msync(mp, MNT_NOWAIT); 940 VFS_SYNC(mp, MNT_NOWAIT); 941 mp->mnt_flag |= asyncflag; 942 } 943 return(0); 944 } 945 946 /* XXX PRISON: could be per prison flag */ 947 static int prison_quotas; 948 #if 0 949 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 950 #endif 951 952 /* 953 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 954 * 955 * Change filesystem quotas. 956 * 957 * MPALMOSTSAFE 958 */ 959 int 960 sys_quotactl(struct quotactl_args *uap) 961 { 962 struct nlookupdata nd; 963 struct thread *td; 964 struct mount *mp; 965 int error; 966 967 get_mplock(); 968 td = curthread; 969 if (td->td_ucred->cr_prison && !prison_quotas) { 970 error = EPERM; 971 goto done; 972 } 973 974 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 975 if (error == 0) 976 error = nlookup(&nd); 977 if (error == 0) { 978 mp = nd.nl_nch.mount; 979 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 980 uap->arg, nd.nl_cred); 981 } 982 nlookup_done(&nd); 983 done: 984 rel_mplock(); 985 return (error); 986 } 987 988 /* 989 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 990 * void *buf, int buflen) 991 * 992 * This function operates on a mount point and executes the specified 993 * operation using the specified control data, and possibly returns data. 994 * 995 * The actual number of bytes stored in the result buffer is returned, 0 996 * if none, otherwise an error is returned. 997 * 998 * MPALMOSTSAFE 999 */ 1000 int 1001 sys_mountctl(struct mountctl_args *uap) 1002 { 1003 struct thread *td = curthread; 1004 struct proc *p = td->td_proc; 1005 struct file *fp; 1006 void *ctl = NULL; 1007 void *buf = NULL; 1008 char *path = NULL; 1009 int error; 1010 1011 /* 1012 * Sanity and permissions checks. We must be root. 1013 */ 1014 KKASSERT(p); 1015 if (td->td_ucred->cr_prison != NULL) 1016 return (EPERM); 1017 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1018 (error = priv_check(td, PRIV_ROOT)) != 0) 1019 return (error); 1020 1021 /* 1022 * Argument length checks 1023 */ 1024 if (uap->ctllen < 0 || uap->ctllen > 1024) 1025 return (EINVAL); 1026 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1027 return (EINVAL); 1028 if (uap->path == NULL) 1029 return (EINVAL); 1030 1031 /* 1032 * Allocate the necessary buffers and copyin data 1033 */ 1034 path = objcache_get(namei_oc, M_WAITOK); 1035 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1036 if (error) 1037 goto done; 1038 1039 if (uap->ctllen) { 1040 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1041 error = copyin(uap->ctl, ctl, uap->ctllen); 1042 if (error) 1043 goto done; 1044 } 1045 if (uap->buflen) 1046 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1047 1048 /* 1049 * Validate the descriptor 1050 */ 1051 if (uap->fd >= 0) { 1052 fp = holdfp(p->p_fd, uap->fd, -1); 1053 if (fp == NULL) { 1054 error = EBADF; 1055 goto done; 1056 } 1057 } else { 1058 fp = NULL; 1059 } 1060 1061 /* 1062 * Execute the internal kernel function and clean up. 1063 */ 1064 get_mplock(); 1065 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 1066 rel_mplock(); 1067 if (fp) 1068 fdrop(fp); 1069 if (error == 0 && uap->sysmsg_result > 0) 1070 error = copyout(buf, uap->buf, uap->sysmsg_result); 1071 done: 1072 if (path) 1073 objcache_put(namei_oc, path); 1074 if (ctl) 1075 kfree(ctl, M_TEMP); 1076 if (buf) 1077 kfree(buf, M_TEMP); 1078 return (error); 1079 } 1080 1081 /* 1082 * Execute a mount control operation by resolving the path to a mount point 1083 * and calling vop_mountctl(). 1084 * 1085 * Use the mount point from the nch instead of the vnode so nullfs mounts 1086 * can properly spike the VOP. 1087 */ 1088 int 1089 kern_mountctl(const char *path, int op, struct file *fp, 1090 const void *ctl, int ctllen, 1091 void *buf, int buflen, int *res) 1092 { 1093 struct vnode *vp; 1094 struct mount *mp; 1095 struct nlookupdata nd; 1096 int error; 1097 1098 *res = 0; 1099 vp = NULL; 1100 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1101 if (error == 0) 1102 error = nlookup(&nd); 1103 if (error == 0) 1104 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1105 mp = nd.nl_nch.mount; 1106 nlookup_done(&nd); 1107 if (error) 1108 return (error); 1109 vn_unlock(vp); 1110 1111 /* 1112 * Must be the root of the filesystem 1113 */ 1114 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1115 vrele(vp); 1116 return (EINVAL); 1117 } 1118 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1119 buf, buflen, res); 1120 vrele(vp); 1121 return (error); 1122 } 1123 1124 int 1125 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1126 { 1127 struct thread *td = curthread; 1128 struct proc *p = td->td_proc; 1129 struct mount *mp; 1130 struct statfs *sp; 1131 char *fullpath, *freepath; 1132 int error; 1133 1134 if ((error = nlookup(nd)) != 0) 1135 return (error); 1136 mp = nd->nl_nch.mount; 1137 sp = &mp->mnt_stat; 1138 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1139 return (error); 1140 1141 error = mount_path(p, mp, &fullpath, &freepath); 1142 if (error) 1143 return(error); 1144 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1145 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1146 kfree(freepath, M_TEMP); 1147 1148 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1149 bcopy(sp, buf, sizeof(*buf)); 1150 /* Only root should have access to the fsid's. */ 1151 if (priv_check(td, PRIV_ROOT)) 1152 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1153 return (0); 1154 } 1155 1156 /* 1157 * statfs_args(char *path, struct statfs *buf) 1158 * 1159 * Get filesystem statistics. 1160 */ 1161 int 1162 sys_statfs(struct statfs_args *uap) 1163 { 1164 struct nlookupdata nd; 1165 struct statfs buf; 1166 int error; 1167 1168 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1169 if (error == 0) 1170 error = kern_statfs(&nd, &buf); 1171 nlookup_done(&nd); 1172 if (error == 0) 1173 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1174 return (error); 1175 } 1176 1177 int 1178 kern_fstatfs(int fd, struct statfs *buf) 1179 { 1180 struct thread *td = curthread; 1181 struct proc *p = td->td_proc; 1182 struct file *fp; 1183 struct mount *mp; 1184 struct statfs *sp; 1185 char *fullpath, *freepath; 1186 int error; 1187 1188 KKASSERT(p); 1189 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1190 return (error); 1191 1192 /* 1193 * Try to use mount info from any overlays rather than the 1194 * mount info for the underlying vnode, otherwise we will 1195 * fail when operating on null-mounted paths inside a chroot. 1196 */ 1197 if ((mp = fp->f_nchandle.mount) == NULL) 1198 mp = ((struct vnode *)fp->f_data)->v_mount; 1199 if (mp == NULL) { 1200 error = EBADF; 1201 goto done; 1202 } 1203 if (fp->f_cred == NULL) { 1204 error = EINVAL; 1205 goto done; 1206 } 1207 sp = &mp->mnt_stat; 1208 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1209 goto done; 1210 1211 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1212 goto done; 1213 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1214 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1215 kfree(freepath, M_TEMP); 1216 1217 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1218 bcopy(sp, buf, sizeof(*buf)); 1219 1220 /* Only root should have access to the fsid's. */ 1221 if (priv_check(td, PRIV_ROOT)) 1222 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1223 error = 0; 1224 done: 1225 fdrop(fp); 1226 return (error); 1227 } 1228 1229 /* 1230 * fstatfs_args(int fd, struct statfs *buf) 1231 * 1232 * Get filesystem statistics. 1233 */ 1234 int 1235 sys_fstatfs(struct fstatfs_args *uap) 1236 { 1237 struct statfs buf; 1238 int error; 1239 1240 error = kern_fstatfs(uap->fd, &buf); 1241 1242 if (error == 0) 1243 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1244 return (error); 1245 } 1246 1247 int 1248 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1249 { 1250 struct mount *mp; 1251 struct statvfs *sp; 1252 int error; 1253 1254 if ((error = nlookup(nd)) != 0) 1255 return (error); 1256 mp = nd->nl_nch.mount; 1257 sp = &mp->mnt_vstat; 1258 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1259 return (error); 1260 1261 sp->f_flag = 0; 1262 if (mp->mnt_flag & MNT_RDONLY) 1263 sp->f_flag |= ST_RDONLY; 1264 if (mp->mnt_flag & MNT_NOSUID) 1265 sp->f_flag |= ST_NOSUID; 1266 bcopy(sp, buf, sizeof(*buf)); 1267 return (0); 1268 } 1269 1270 /* 1271 * statfs_args(char *path, struct statfs *buf) 1272 * 1273 * Get filesystem statistics. 1274 */ 1275 int 1276 sys_statvfs(struct statvfs_args *uap) 1277 { 1278 struct nlookupdata nd; 1279 struct statvfs buf; 1280 int error; 1281 1282 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1283 if (error == 0) 1284 error = kern_statvfs(&nd, &buf); 1285 nlookup_done(&nd); 1286 if (error == 0) 1287 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1288 return (error); 1289 } 1290 1291 int 1292 kern_fstatvfs(int fd, struct statvfs *buf) 1293 { 1294 struct thread *td = curthread; 1295 struct proc *p = td->td_proc; 1296 struct file *fp; 1297 struct mount *mp; 1298 struct statvfs *sp; 1299 int error; 1300 1301 KKASSERT(p); 1302 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1303 return (error); 1304 if ((mp = fp->f_nchandle.mount) == NULL) 1305 mp = ((struct vnode *)fp->f_data)->v_mount; 1306 if (mp == NULL) { 1307 error = EBADF; 1308 goto done; 1309 } 1310 if (fp->f_cred == NULL) { 1311 error = EINVAL; 1312 goto done; 1313 } 1314 sp = &mp->mnt_vstat; 1315 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1316 goto done; 1317 1318 sp->f_flag = 0; 1319 if (mp->mnt_flag & MNT_RDONLY) 1320 sp->f_flag |= ST_RDONLY; 1321 if (mp->mnt_flag & MNT_NOSUID) 1322 sp->f_flag |= ST_NOSUID; 1323 1324 bcopy(sp, buf, sizeof(*buf)); 1325 error = 0; 1326 done: 1327 fdrop(fp); 1328 return (error); 1329 } 1330 1331 /* 1332 * fstatfs_args(int fd, struct statfs *buf) 1333 * 1334 * Get filesystem statistics. 1335 */ 1336 int 1337 sys_fstatvfs(struct fstatvfs_args *uap) 1338 { 1339 struct statvfs buf; 1340 int error; 1341 1342 error = kern_fstatvfs(uap->fd, &buf); 1343 1344 if (error == 0) 1345 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1346 return (error); 1347 } 1348 1349 /* 1350 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1351 * 1352 * Get statistics on all filesystems. 1353 */ 1354 1355 struct getfsstat_info { 1356 struct statfs *sfsp; 1357 long count; 1358 long maxcount; 1359 int error; 1360 int flags; 1361 struct thread *td; 1362 }; 1363 1364 static int getfsstat_callback(struct mount *, void *); 1365 1366 int 1367 sys_getfsstat(struct getfsstat_args *uap) 1368 { 1369 struct thread *td = curthread; 1370 struct getfsstat_info info; 1371 1372 bzero(&info, sizeof(info)); 1373 1374 info.maxcount = uap->bufsize / sizeof(struct statfs); 1375 info.sfsp = uap->buf; 1376 info.count = 0; 1377 info.flags = uap->flags; 1378 info.td = td; 1379 1380 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1381 if (info.sfsp && info.count > info.maxcount) 1382 uap->sysmsg_result = info.maxcount; 1383 else 1384 uap->sysmsg_result = info.count; 1385 return (info.error); 1386 } 1387 1388 static int 1389 getfsstat_callback(struct mount *mp, void *data) 1390 { 1391 struct getfsstat_info *info = data; 1392 struct statfs *sp; 1393 char *freepath; 1394 char *fullpath; 1395 int error; 1396 1397 if (info->sfsp && info->count < info->maxcount) { 1398 if (info->td->td_proc && 1399 !chroot_visible_mnt(mp, info->td->td_proc)) { 1400 return(0); 1401 } 1402 sp = &mp->mnt_stat; 1403 1404 /* 1405 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1406 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1407 * overrides MNT_WAIT. 1408 */ 1409 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1410 (info->flags & MNT_WAIT)) && 1411 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1412 return(0); 1413 } 1414 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1415 1416 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1417 if (error) { 1418 info->error = error; 1419 return(-1); 1420 } 1421 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1422 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1423 kfree(freepath, M_TEMP); 1424 1425 error = copyout(sp, info->sfsp, sizeof(*sp)); 1426 if (error) { 1427 info->error = error; 1428 return (-1); 1429 } 1430 ++info->sfsp; 1431 } 1432 info->count++; 1433 return(0); 1434 } 1435 1436 /* 1437 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1438 long bufsize, int flags) 1439 * 1440 * Get statistics on all filesystems. 1441 */ 1442 1443 struct getvfsstat_info { 1444 struct statfs *sfsp; 1445 struct statvfs *vsfsp; 1446 long count; 1447 long maxcount; 1448 int error; 1449 int flags; 1450 struct thread *td; 1451 }; 1452 1453 static int getvfsstat_callback(struct mount *, void *); 1454 1455 int 1456 sys_getvfsstat(struct getvfsstat_args *uap) 1457 { 1458 struct thread *td = curthread; 1459 struct getvfsstat_info info; 1460 1461 bzero(&info, sizeof(info)); 1462 1463 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1464 info.sfsp = uap->buf; 1465 info.vsfsp = uap->vbuf; 1466 info.count = 0; 1467 info.flags = uap->flags; 1468 info.td = td; 1469 1470 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1471 if (info.vsfsp && info.count > info.maxcount) 1472 uap->sysmsg_result = info.maxcount; 1473 else 1474 uap->sysmsg_result = info.count; 1475 return (info.error); 1476 } 1477 1478 static int 1479 getvfsstat_callback(struct mount *mp, void *data) 1480 { 1481 struct getvfsstat_info *info = data; 1482 struct statfs *sp; 1483 struct statvfs *vsp; 1484 char *freepath; 1485 char *fullpath; 1486 int error; 1487 1488 if (info->vsfsp && info->count < info->maxcount) { 1489 if (info->td->td_proc && 1490 !chroot_visible_mnt(mp, info->td->td_proc)) { 1491 return(0); 1492 } 1493 sp = &mp->mnt_stat; 1494 vsp = &mp->mnt_vstat; 1495 1496 /* 1497 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1498 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1499 * overrides MNT_WAIT. 1500 */ 1501 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1502 (info->flags & MNT_WAIT)) && 1503 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1504 return(0); 1505 } 1506 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1507 1508 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1509 (info->flags & MNT_WAIT)) && 1510 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1511 return(0); 1512 } 1513 vsp->f_flag = 0; 1514 if (mp->mnt_flag & MNT_RDONLY) 1515 vsp->f_flag |= ST_RDONLY; 1516 if (mp->mnt_flag & MNT_NOSUID) 1517 vsp->f_flag |= ST_NOSUID; 1518 1519 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1520 if (error) { 1521 info->error = error; 1522 return(-1); 1523 } 1524 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1525 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1526 kfree(freepath, M_TEMP); 1527 1528 error = copyout(sp, info->sfsp, sizeof(*sp)); 1529 if (error == 0) 1530 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1531 if (error) { 1532 info->error = error; 1533 return (-1); 1534 } 1535 ++info->sfsp; 1536 ++info->vsfsp; 1537 } 1538 info->count++; 1539 return(0); 1540 } 1541 1542 1543 /* 1544 * fchdir_args(int fd) 1545 * 1546 * Change current working directory to a given file descriptor. 1547 */ 1548 int 1549 sys_fchdir(struct fchdir_args *uap) 1550 { 1551 struct thread *td = curthread; 1552 struct proc *p = td->td_proc; 1553 struct filedesc *fdp = p->p_fd; 1554 struct vnode *vp, *ovp; 1555 struct mount *mp; 1556 struct file *fp; 1557 struct nchandle nch, onch, tnch; 1558 int error; 1559 1560 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1561 return (error); 1562 lwkt_gettoken(&p->p_token); 1563 vp = (struct vnode *)fp->f_data; 1564 vref(vp); 1565 vn_lock(vp, LK_SHARED | LK_RETRY); 1566 if (fp->f_nchandle.ncp == NULL) 1567 error = ENOTDIR; 1568 else 1569 error = checkvp_chdir(vp, td); 1570 if (error) { 1571 vput(vp); 1572 goto done; 1573 } 1574 cache_copy(&fp->f_nchandle, &nch); 1575 1576 /* 1577 * If the ncp has become a mount point, traverse through 1578 * the mount point. 1579 */ 1580 1581 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1582 (mp = cache_findmount(&nch)) != NULL 1583 ) { 1584 error = nlookup_mp(mp, &tnch); 1585 if (error == 0) { 1586 cache_unlock(&tnch); /* leave ref intact */ 1587 vput(vp); 1588 vp = tnch.ncp->nc_vp; 1589 error = vget(vp, LK_SHARED); 1590 KKASSERT(error == 0); 1591 cache_drop(&nch); 1592 nch = tnch; 1593 } 1594 cache_dropmount(mp); 1595 } 1596 if (error == 0) { 1597 ovp = fdp->fd_cdir; 1598 onch = fdp->fd_ncdir; 1599 vn_unlock(vp); /* leave ref intact */ 1600 fdp->fd_cdir = vp; 1601 fdp->fd_ncdir = nch; 1602 cache_drop(&onch); 1603 vrele(ovp); 1604 } else { 1605 cache_drop(&nch); 1606 vput(vp); 1607 } 1608 fdrop(fp); 1609 done: 1610 lwkt_reltoken(&p->p_token); 1611 return (error); 1612 } 1613 1614 int 1615 kern_chdir(struct nlookupdata *nd) 1616 { 1617 struct thread *td = curthread; 1618 struct proc *p = td->td_proc; 1619 struct filedesc *fdp = p->p_fd; 1620 struct vnode *vp, *ovp; 1621 struct nchandle onch; 1622 int error; 1623 1624 nd->nl_flags |= NLC_SHAREDLOCK; 1625 if ((error = nlookup(nd)) != 0) 1626 return (error); 1627 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1628 return (ENOENT); 1629 if ((error = vget(vp, LK_SHARED)) != 0) 1630 return (error); 1631 1632 lwkt_gettoken(&p->p_token); 1633 error = checkvp_chdir(vp, td); 1634 vn_unlock(vp); 1635 if (error == 0) { 1636 ovp = fdp->fd_cdir; 1637 onch = fdp->fd_ncdir; 1638 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1639 fdp->fd_ncdir = nd->nl_nch; 1640 fdp->fd_cdir = vp; 1641 cache_drop(&onch); 1642 vrele(ovp); 1643 cache_zero(&nd->nl_nch); 1644 } else { 1645 vrele(vp); 1646 } 1647 lwkt_reltoken(&p->p_token); 1648 return (error); 1649 } 1650 1651 /* 1652 * chdir_args(char *path) 1653 * 1654 * Change current working directory (``.''). 1655 */ 1656 int 1657 sys_chdir(struct chdir_args *uap) 1658 { 1659 struct nlookupdata nd; 1660 int error; 1661 1662 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1663 if (error == 0) 1664 error = kern_chdir(&nd); 1665 nlookup_done(&nd); 1666 return (error); 1667 } 1668 1669 /* 1670 * Helper function for raised chroot(2) security function: Refuse if 1671 * any filedescriptors are open directories. 1672 */ 1673 static int 1674 chroot_refuse_vdir_fds(struct filedesc *fdp) 1675 { 1676 struct vnode *vp; 1677 struct file *fp; 1678 int error; 1679 int fd; 1680 1681 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1682 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1683 continue; 1684 vp = (struct vnode *)fp->f_data; 1685 if (vp->v_type != VDIR) { 1686 fdrop(fp); 1687 continue; 1688 } 1689 fdrop(fp); 1690 return(EPERM); 1691 } 1692 return (0); 1693 } 1694 1695 /* 1696 * This sysctl determines if we will allow a process to chroot(2) if it 1697 * has a directory open: 1698 * 0: disallowed for all processes. 1699 * 1: allowed for processes that were not already chroot(2)'ed. 1700 * 2: allowed for all processes. 1701 */ 1702 1703 static int chroot_allow_open_directories = 1; 1704 1705 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1706 &chroot_allow_open_directories, 0, ""); 1707 1708 /* 1709 * chroot to the specified namecache entry. We obtain the vp from the 1710 * namecache data. The passed ncp must be locked and referenced and will 1711 * remain locked and referenced on return. 1712 */ 1713 int 1714 kern_chroot(struct nchandle *nch) 1715 { 1716 struct thread *td = curthread; 1717 struct proc *p = td->td_proc; 1718 struct filedesc *fdp = p->p_fd; 1719 struct vnode *vp; 1720 int error; 1721 1722 /* 1723 * Only privileged user can chroot 1724 */ 1725 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1726 if (error) 1727 return (error); 1728 1729 /* 1730 * Disallow open directory descriptors (fchdir() breakouts). 1731 */ 1732 if (chroot_allow_open_directories == 0 || 1733 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1734 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1735 return (error); 1736 } 1737 if ((vp = nch->ncp->nc_vp) == NULL) 1738 return (ENOENT); 1739 1740 if ((error = vget(vp, LK_SHARED)) != 0) 1741 return (error); 1742 1743 /* 1744 * Check the validity of vp as a directory to change to and 1745 * associate it with rdir/jdir. 1746 */ 1747 error = checkvp_chdir(vp, td); 1748 vn_unlock(vp); /* leave reference intact */ 1749 if (error == 0) { 1750 vrele(fdp->fd_rdir); 1751 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1752 cache_drop(&fdp->fd_nrdir); 1753 cache_copy(nch, &fdp->fd_nrdir); 1754 if (fdp->fd_jdir == NULL) { 1755 fdp->fd_jdir = vp; 1756 vref(fdp->fd_jdir); 1757 cache_copy(nch, &fdp->fd_njdir); 1758 } 1759 } else { 1760 vrele(vp); 1761 } 1762 return (error); 1763 } 1764 1765 /* 1766 * chroot_args(char *path) 1767 * 1768 * Change notion of root (``/'') directory. 1769 */ 1770 int 1771 sys_chroot(struct chroot_args *uap) 1772 { 1773 struct thread *td __debugvar = curthread; 1774 struct nlookupdata nd; 1775 int error; 1776 1777 KKASSERT(td->td_proc); 1778 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1779 if (error == 0) { 1780 nd.nl_flags |= NLC_EXEC; 1781 error = nlookup(&nd); 1782 if (error == 0) 1783 error = kern_chroot(&nd.nl_nch); 1784 } 1785 nlookup_done(&nd); 1786 return(error); 1787 } 1788 1789 int 1790 sys_chroot_kernel(struct chroot_kernel_args *uap) 1791 { 1792 struct thread *td = curthread; 1793 struct nlookupdata nd; 1794 struct nchandle *nch; 1795 struct vnode *vp; 1796 int error; 1797 1798 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1799 if (error) 1800 goto error_nond; 1801 1802 error = nlookup(&nd); 1803 if (error) 1804 goto error_out; 1805 1806 nch = &nd.nl_nch; 1807 1808 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1809 if (error) 1810 goto error_out; 1811 1812 if ((vp = nch->ncp->nc_vp) == NULL) { 1813 error = ENOENT; 1814 goto error_out; 1815 } 1816 1817 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1818 goto error_out; 1819 1820 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1821 get_mplock(); 1822 vfs_cache_setroot(vp, cache_hold(nch)); 1823 rel_mplock(); 1824 1825 error_out: 1826 nlookup_done(&nd); 1827 error_nond: 1828 return(error); 1829 } 1830 1831 /* 1832 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1833 * determine whether it is legal to chdir to the vnode. The vnode's state 1834 * is not changed by this call. 1835 */ 1836 static int 1837 checkvp_chdir(struct vnode *vp, struct thread *td) 1838 { 1839 int error; 1840 1841 if (vp->v_type != VDIR) 1842 error = ENOTDIR; 1843 else 1844 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1845 return (error); 1846 } 1847 1848 int 1849 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1850 { 1851 struct thread *td = curthread; 1852 struct proc *p = td->td_proc; 1853 struct lwp *lp = td->td_lwp; 1854 struct filedesc *fdp = p->p_fd; 1855 int cmode, flags; 1856 struct file *nfp; 1857 struct file *fp; 1858 struct vnode *vp; 1859 int type, indx, error = 0; 1860 struct flock lf; 1861 1862 if ((oflags & O_ACCMODE) == O_ACCMODE) 1863 return (EINVAL); 1864 flags = FFLAGS(oflags); 1865 error = falloc(lp, &nfp, NULL); 1866 if (error) 1867 return (error); 1868 fp = nfp; 1869 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1870 1871 /* 1872 * XXX p_dupfd is a real mess. It allows a device to return a 1873 * file descriptor to be duplicated rather then doing the open 1874 * itself. 1875 */ 1876 lp->lwp_dupfd = -1; 1877 1878 /* 1879 * Call vn_open() to do the lookup and assign the vnode to the 1880 * file pointer. vn_open() does not change the ref count on fp 1881 * and the vnode, on success, will be inherited by the file pointer 1882 * and unlocked. 1883 * 1884 * Request a shared lock on the vnode if possible. 1885 */ 1886 nd->nl_flags |= NLC_LOCKVP; 1887 if ((flags & (O_CREAT|O_TRUNC)) == 0) 1888 nd->nl_flags |= NLC_SHAREDLOCK; 1889 1890 error = vn_open(nd, fp, flags, cmode); 1891 nlookup_done(nd); 1892 1893 if (error) { 1894 /* 1895 * handle special fdopen() case. bleh. dupfdopen() is 1896 * responsible for dropping the old contents of ofiles[indx] 1897 * if it succeeds. 1898 * 1899 * Note that fsetfd() will add a ref to fp which represents 1900 * the fd_files[] assignment. We must still drop our 1901 * reference. 1902 */ 1903 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1904 if (fdalloc(p, 0, &indx) == 0) { 1905 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error); 1906 if (error == 0) { 1907 *res = indx; 1908 fdrop(fp); /* our ref */ 1909 return (0); 1910 } 1911 fsetfd(fdp, NULL, indx); 1912 } 1913 } 1914 fdrop(fp); /* our ref */ 1915 if (error == ERESTART) 1916 error = EINTR; 1917 return (error); 1918 } 1919 1920 /* 1921 * ref the vnode for ourselves so it can't be ripped out from under 1922 * is. XXX need an ND flag to request that the vnode be returned 1923 * anyway. 1924 * 1925 * Reserve a file descriptor but do not assign it until the open 1926 * succeeds. 1927 */ 1928 vp = (struct vnode *)fp->f_data; 1929 vref(vp); 1930 if ((error = fdalloc(p, 0, &indx)) != 0) { 1931 fdrop(fp); 1932 vrele(vp); 1933 return (error); 1934 } 1935 1936 /* 1937 * If no error occurs the vp will have been assigned to the file 1938 * pointer. 1939 */ 1940 lp->lwp_dupfd = 0; 1941 1942 if (flags & (O_EXLOCK | O_SHLOCK)) { 1943 lf.l_whence = SEEK_SET; 1944 lf.l_start = 0; 1945 lf.l_len = 0; 1946 if (flags & O_EXLOCK) 1947 lf.l_type = F_WRLCK; 1948 else 1949 lf.l_type = F_RDLCK; 1950 if (flags & FNONBLOCK) 1951 type = 0; 1952 else 1953 type = F_WAIT; 1954 1955 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1956 /* 1957 * lock request failed. Clean up the reserved 1958 * descriptor. 1959 */ 1960 vrele(vp); 1961 fsetfd(fdp, NULL, indx); 1962 fdrop(fp); 1963 return (error); 1964 } 1965 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 1966 } 1967 #if 0 1968 /* 1969 * Assert that all regular file vnodes were created with a object. 1970 */ 1971 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1972 ("open: regular file has no backing object after vn_open")); 1973 #endif 1974 1975 vrele(vp); 1976 1977 /* 1978 * release our private reference, leaving the one associated with the 1979 * descriptor table intact. 1980 */ 1981 if (oflags & O_CLOEXEC) 1982 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 1983 fsetfd(fdp, fp, indx); 1984 fdrop(fp); 1985 *res = indx; 1986 return (error); 1987 } 1988 1989 /* 1990 * open_args(char *path, int flags, int mode) 1991 * 1992 * Check permissions, allocate an open file structure, 1993 * and call the device open routine if any. 1994 */ 1995 int 1996 sys_open(struct open_args *uap) 1997 { 1998 struct nlookupdata nd; 1999 int error; 2000 2001 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2002 if (error == 0) { 2003 error = kern_open(&nd, uap->flags, 2004 uap->mode, &uap->sysmsg_result); 2005 } 2006 nlookup_done(&nd); 2007 return (error); 2008 } 2009 2010 /* 2011 * openat_args(int fd, char *path, int flags, int mode) 2012 */ 2013 int 2014 sys_openat(struct openat_args *uap) 2015 { 2016 struct nlookupdata nd; 2017 int error; 2018 struct file *fp; 2019 2020 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2021 if (error == 0) { 2022 error = kern_open(&nd, uap->flags, uap->mode, 2023 &uap->sysmsg_result); 2024 } 2025 nlookup_done_at(&nd, fp); 2026 return (error); 2027 } 2028 2029 int 2030 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2031 { 2032 struct thread *td = curthread; 2033 struct proc *p = td->td_proc; 2034 struct vnode *vp; 2035 struct vattr vattr; 2036 int error; 2037 int whiteout = 0; 2038 2039 KKASSERT(p); 2040 2041 VATTR_NULL(&vattr); 2042 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2043 vattr.va_rmajor = rmajor; 2044 vattr.va_rminor = rminor; 2045 2046 switch (mode & S_IFMT) { 2047 case S_IFMT: /* used by badsect to flag bad sectors */ 2048 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2049 vattr.va_type = VBAD; 2050 break; 2051 case S_IFCHR: 2052 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2053 vattr.va_type = VCHR; 2054 break; 2055 case S_IFBLK: 2056 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2057 vattr.va_type = VBLK; 2058 break; 2059 case S_IFWHT: 2060 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2061 whiteout = 1; 2062 break; 2063 case S_IFDIR: /* special directories support for HAMMER */ 2064 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2065 vattr.va_type = VDIR; 2066 break; 2067 default: 2068 error = EINVAL; 2069 break; 2070 } 2071 2072 if (error) 2073 return (error); 2074 2075 bwillinode(1); 2076 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2077 if ((error = nlookup(nd)) != 0) 2078 return (error); 2079 if (nd->nl_nch.ncp->nc_vp) 2080 return (EEXIST); 2081 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2082 return (error); 2083 2084 if (whiteout) { 2085 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2086 nd->nl_cred, NAMEI_CREATE); 2087 } else { 2088 vp = NULL; 2089 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2090 &vp, nd->nl_cred, &vattr); 2091 if (error == 0) 2092 vput(vp); 2093 } 2094 return (error); 2095 } 2096 2097 /* 2098 * mknod_args(char *path, int mode, int dev) 2099 * 2100 * Create a special file. 2101 */ 2102 int 2103 sys_mknod(struct mknod_args *uap) 2104 { 2105 struct nlookupdata nd; 2106 int error; 2107 2108 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2109 if (error == 0) { 2110 error = kern_mknod(&nd, uap->mode, 2111 umajor(uap->dev), uminor(uap->dev)); 2112 } 2113 nlookup_done(&nd); 2114 return (error); 2115 } 2116 2117 /* 2118 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2119 * 2120 * Create a special file. The path is relative to the directory associated 2121 * with fd. 2122 */ 2123 int 2124 sys_mknodat(struct mknodat_args *uap) 2125 { 2126 struct nlookupdata nd; 2127 struct file *fp; 2128 int error; 2129 2130 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2131 if (error == 0) { 2132 error = kern_mknod(&nd, uap->mode, 2133 umajor(uap->dev), uminor(uap->dev)); 2134 } 2135 nlookup_done_at(&nd, fp); 2136 return (error); 2137 } 2138 2139 int 2140 kern_mkfifo(struct nlookupdata *nd, int mode) 2141 { 2142 struct thread *td = curthread; 2143 struct proc *p = td->td_proc; 2144 struct vattr vattr; 2145 struct vnode *vp; 2146 int error; 2147 2148 bwillinode(1); 2149 2150 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2151 if ((error = nlookup(nd)) != 0) 2152 return (error); 2153 if (nd->nl_nch.ncp->nc_vp) 2154 return (EEXIST); 2155 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2156 return (error); 2157 2158 VATTR_NULL(&vattr); 2159 vattr.va_type = VFIFO; 2160 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2161 vp = NULL; 2162 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2163 if (error == 0) 2164 vput(vp); 2165 return (error); 2166 } 2167 2168 /* 2169 * mkfifo_args(char *path, int mode) 2170 * 2171 * Create a named pipe. 2172 */ 2173 int 2174 sys_mkfifo(struct mkfifo_args *uap) 2175 { 2176 struct nlookupdata nd; 2177 int error; 2178 2179 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2180 if (error == 0) 2181 error = kern_mkfifo(&nd, uap->mode); 2182 nlookup_done(&nd); 2183 return (error); 2184 } 2185 2186 /* 2187 * mkfifoat_args(int fd, char *path, mode_t mode) 2188 * 2189 * Create a named pipe. The path is relative to the directory associated 2190 * with fd. 2191 */ 2192 int 2193 sys_mkfifoat(struct mkfifoat_args *uap) 2194 { 2195 struct nlookupdata nd; 2196 struct file *fp; 2197 int error; 2198 2199 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2200 if (error == 0) 2201 error = kern_mkfifo(&nd, uap->mode); 2202 nlookup_done_at(&nd, fp); 2203 return (error); 2204 } 2205 2206 static int hardlink_check_uid = 0; 2207 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2208 &hardlink_check_uid, 0, 2209 "Unprivileged processes cannot create hard links to files owned by other " 2210 "users"); 2211 static int hardlink_check_gid = 0; 2212 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2213 &hardlink_check_gid, 0, 2214 "Unprivileged processes cannot create hard links to files owned by other " 2215 "groups"); 2216 2217 static int 2218 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2219 { 2220 struct vattr va; 2221 int error; 2222 2223 /* 2224 * Shortcut if disabled 2225 */ 2226 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2227 return (0); 2228 2229 /* 2230 * Privileged user can always hardlink 2231 */ 2232 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2233 return (0); 2234 2235 /* 2236 * Otherwise only if the originating file is owned by the 2237 * same user or group. Note that any group is allowed if 2238 * the file is owned by the caller. 2239 */ 2240 error = VOP_GETATTR(vp, &va); 2241 if (error != 0) 2242 return (error); 2243 2244 if (hardlink_check_uid) { 2245 if (cred->cr_uid != va.va_uid) 2246 return (EPERM); 2247 } 2248 2249 if (hardlink_check_gid) { 2250 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2251 return (EPERM); 2252 } 2253 2254 return (0); 2255 } 2256 2257 int 2258 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2259 { 2260 struct thread *td = curthread; 2261 struct vnode *vp; 2262 int error; 2263 2264 /* 2265 * Lookup the source and obtained a locked vnode. 2266 * 2267 * You may only hardlink a file which you have write permission 2268 * on or which you own. 2269 * 2270 * XXX relookup on vget failure / race ? 2271 */ 2272 bwillinode(1); 2273 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2274 if ((error = nlookup(nd)) != 0) 2275 return (error); 2276 vp = nd->nl_nch.ncp->nc_vp; 2277 KKASSERT(vp != NULL); 2278 if (vp->v_type == VDIR) 2279 return (EPERM); /* POSIX */ 2280 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2281 return (error); 2282 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2283 return (error); 2284 2285 /* 2286 * Unlock the source so we can lookup the target without deadlocking 2287 * (XXX vp is locked already, possible other deadlock?). The target 2288 * must not exist. 2289 */ 2290 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2291 nd->nl_flags &= ~NLC_NCPISLOCKED; 2292 cache_unlock(&nd->nl_nch); 2293 vn_unlock(vp); 2294 2295 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2296 if ((error = nlookup(linknd)) != 0) { 2297 vrele(vp); 2298 return (error); 2299 } 2300 if (linknd->nl_nch.ncp->nc_vp) { 2301 vrele(vp); 2302 return (EEXIST); 2303 } 2304 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2305 if (error) { 2306 vrele(vp); 2307 return (error); 2308 } 2309 2310 /* 2311 * Finally run the new API VOP. 2312 */ 2313 error = can_hardlink(vp, td, td->td_ucred); 2314 if (error == 0) { 2315 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2316 vp, linknd->nl_cred); 2317 } 2318 vput(vp); 2319 return (error); 2320 } 2321 2322 /* 2323 * link_args(char *path, char *link) 2324 * 2325 * Make a hard file link. 2326 */ 2327 int 2328 sys_link(struct link_args *uap) 2329 { 2330 struct nlookupdata nd, linknd; 2331 int error; 2332 2333 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2334 if (error == 0) { 2335 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2336 if (error == 0) 2337 error = kern_link(&nd, &linknd); 2338 nlookup_done(&linknd); 2339 } 2340 nlookup_done(&nd); 2341 return (error); 2342 } 2343 2344 /* 2345 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2346 * 2347 * Make a hard file link. The path1 argument is relative to the directory 2348 * associated with fd1, and similarly the path2 argument is relative to 2349 * the directory associated with fd2. 2350 */ 2351 int 2352 sys_linkat(struct linkat_args *uap) 2353 { 2354 struct nlookupdata nd, linknd; 2355 struct file *fp1, *fp2; 2356 int error; 2357 2358 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2359 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2360 if (error == 0) { 2361 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2362 uap->path2, UIO_USERSPACE, 0); 2363 if (error == 0) 2364 error = kern_link(&nd, &linknd); 2365 nlookup_done_at(&linknd, fp2); 2366 } 2367 nlookup_done_at(&nd, fp1); 2368 return (error); 2369 } 2370 2371 int 2372 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2373 { 2374 struct vattr vattr; 2375 struct vnode *vp; 2376 struct vnode *dvp; 2377 int error; 2378 2379 bwillinode(1); 2380 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2381 if ((error = nlookup(nd)) != 0) 2382 return (error); 2383 if (nd->nl_nch.ncp->nc_vp) 2384 return (EEXIST); 2385 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2386 return (error); 2387 dvp = nd->nl_dvp; 2388 VATTR_NULL(&vattr); 2389 vattr.va_mode = mode; 2390 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2391 if (error == 0) 2392 vput(vp); 2393 return (error); 2394 } 2395 2396 /* 2397 * symlink(char *path, char *link) 2398 * 2399 * Make a symbolic link. 2400 */ 2401 int 2402 sys_symlink(struct symlink_args *uap) 2403 { 2404 struct thread *td = curthread; 2405 struct nlookupdata nd; 2406 char *path; 2407 int error; 2408 int mode; 2409 2410 path = objcache_get(namei_oc, M_WAITOK); 2411 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2412 if (error == 0) { 2413 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2414 if (error == 0) { 2415 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2416 error = kern_symlink(&nd, path, mode); 2417 } 2418 nlookup_done(&nd); 2419 } 2420 objcache_put(namei_oc, path); 2421 return (error); 2422 } 2423 2424 /* 2425 * symlinkat_args(char *path1, int fd, char *path2) 2426 * 2427 * Make a symbolic link. The path2 argument is relative to the directory 2428 * associated with fd. 2429 */ 2430 int 2431 sys_symlinkat(struct symlinkat_args *uap) 2432 { 2433 struct thread *td = curthread; 2434 struct nlookupdata nd; 2435 struct file *fp; 2436 char *path1; 2437 int error; 2438 int mode; 2439 2440 path1 = objcache_get(namei_oc, M_WAITOK); 2441 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2442 if (error == 0) { 2443 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2444 UIO_USERSPACE, 0); 2445 if (error == 0) { 2446 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2447 error = kern_symlink(&nd, path1, mode); 2448 } 2449 nlookup_done_at(&nd, fp); 2450 } 2451 objcache_put(namei_oc, path1); 2452 return (error); 2453 } 2454 2455 /* 2456 * undelete_args(char *path) 2457 * 2458 * Delete a whiteout from the filesystem. 2459 */ 2460 int 2461 sys_undelete(struct undelete_args *uap) 2462 { 2463 struct nlookupdata nd; 2464 int error; 2465 2466 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2467 bwillinode(1); 2468 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2469 if (error == 0) 2470 error = nlookup(&nd); 2471 if (error == 0) 2472 error = ncp_writechk(&nd.nl_nch); 2473 if (error == 0) { 2474 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2475 NAMEI_DELETE); 2476 } 2477 nlookup_done(&nd); 2478 return (error); 2479 } 2480 2481 int 2482 kern_unlink(struct nlookupdata *nd) 2483 { 2484 int error; 2485 2486 bwillinode(1); 2487 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2488 if ((error = nlookup(nd)) != 0) 2489 return (error); 2490 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2491 return (error); 2492 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2493 return (error); 2494 } 2495 2496 /* 2497 * unlink_args(char *path) 2498 * 2499 * Delete a name from the filesystem. 2500 */ 2501 int 2502 sys_unlink(struct unlink_args *uap) 2503 { 2504 struct nlookupdata nd; 2505 int error; 2506 2507 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2508 if (error == 0) 2509 error = kern_unlink(&nd); 2510 nlookup_done(&nd); 2511 return (error); 2512 } 2513 2514 2515 /* 2516 * unlinkat_args(int fd, char *path, int flags) 2517 * 2518 * Delete the file or directory entry pointed to by fd/path. 2519 */ 2520 int 2521 sys_unlinkat(struct unlinkat_args *uap) 2522 { 2523 struct nlookupdata nd; 2524 struct file *fp; 2525 int error; 2526 2527 if (uap->flags & ~AT_REMOVEDIR) 2528 return (EINVAL); 2529 2530 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2531 if (error == 0) { 2532 if (uap->flags & AT_REMOVEDIR) 2533 error = kern_rmdir(&nd); 2534 else 2535 error = kern_unlink(&nd); 2536 } 2537 nlookup_done_at(&nd, fp); 2538 return (error); 2539 } 2540 2541 int 2542 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2543 { 2544 struct thread *td = curthread; 2545 struct proc *p = td->td_proc; 2546 struct file *fp; 2547 struct vnode *vp; 2548 struct vattr vattr; 2549 off_t new_offset; 2550 int error; 2551 2552 fp = holdfp(p->p_fd, fd, -1); 2553 if (fp == NULL) 2554 return (EBADF); 2555 if (fp->f_type != DTYPE_VNODE) { 2556 error = ESPIPE; 2557 goto done; 2558 } 2559 vp = (struct vnode *)fp->f_data; 2560 2561 switch (whence) { 2562 case L_INCR: 2563 spin_lock(&fp->f_spin); 2564 new_offset = fp->f_offset + offset; 2565 error = 0; 2566 break; 2567 case L_XTND: 2568 error = VOP_GETATTR(vp, &vattr); 2569 spin_lock(&fp->f_spin); 2570 new_offset = offset + vattr.va_size; 2571 break; 2572 case L_SET: 2573 new_offset = offset; 2574 error = 0; 2575 spin_lock(&fp->f_spin); 2576 break; 2577 default: 2578 new_offset = 0; 2579 error = EINVAL; 2580 spin_lock(&fp->f_spin); 2581 break; 2582 } 2583 2584 /* 2585 * Validate the seek position. Negative offsets are not allowed 2586 * for regular files or directories. 2587 * 2588 * Normally we would also not want to allow negative offsets for 2589 * character and block-special devices. However kvm addresses 2590 * on 64 bit architectures might appear to be negative and must 2591 * be allowed. 2592 */ 2593 if (error == 0) { 2594 if (new_offset < 0 && 2595 (vp->v_type == VREG || vp->v_type == VDIR)) { 2596 error = EINVAL; 2597 } else { 2598 fp->f_offset = new_offset; 2599 } 2600 } 2601 *res = fp->f_offset; 2602 spin_unlock(&fp->f_spin); 2603 done: 2604 fdrop(fp); 2605 return (error); 2606 } 2607 2608 /* 2609 * lseek_args(int fd, int pad, off_t offset, int whence) 2610 * 2611 * Reposition read/write file offset. 2612 */ 2613 int 2614 sys_lseek(struct lseek_args *uap) 2615 { 2616 int error; 2617 2618 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2619 &uap->sysmsg_offset); 2620 2621 return (error); 2622 } 2623 2624 /* 2625 * Check if current process can access given file. amode is a bitmask of *_OK 2626 * access bits. flags is a bitmask of AT_* flags. 2627 */ 2628 int 2629 kern_access(struct nlookupdata *nd, int amode, int flags) 2630 { 2631 struct vnode *vp; 2632 int error, mode; 2633 2634 if (flags & ~AT_EACCESS) 2635 return (EINVAL); 2636 nd->nl_flags |= NLC_SHAREDLOCK; 2637 if ((error = nlookup(nd)) != 0) 2638 return (error); 2639 retry: 2640 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2641 if (error) 2642 return (error); 2643 2644 /* Flags == 0 means only check for existence. */ 2645 if (amode) { 2646 mode = 0; 2647 if (amode & R_OK) 2648 mode |= VREAD; 2649 if (amode & W_OK) 2650 mode |= VWRITE; 2651 if (amode & X_OK) 2652 mode |= VEXEC; 2653 if ((mode & VWRITE) == 0 || 2654 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2655 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2656 2657 /* 2658 * If the file handle is stale we have to re-resolve the 2659 * entry with the ncp held exclusively. This is a hack 2660 * at the moment. 2661 */ 2662 if (error == ESTALE) { 2663 vput(vp); 2664 cache_unlock(&nd->nl_nch); 2665 cache_lock(&nd->nl_nch); 2666 cache_setunresolved(&nd->nl_nch); 2667 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2668 if (error == 0) { 2669 vp = NULL; 2670 goto retry; 2671 } 2672 return(error); 2673 } 2674 } 2675 vput(vp); 2676 return (error); 2677 } 2678 2679 /* 2680 * access_args(char *path, int flags) 2681 * 2682 * Check access permissions. 2683 */ 2684 int 2685 sys_access(struct access_args *uap) 2686 { 2687 struct nlookupdata nd; 2688 int error; 2689 2690 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2691 if (error == 0) 2692 error = kern_access(&nd, uap->flags, 0); 2693 nlookup_done(&nd); 2694 return (error); 2695 } 2696 2697 2698 /* 2699 * eaccess_args(char *path, int flags) 2700 * 2701 * Check access permissions. 2702 */ 2703 int 2704 sys_eaccess(struct eaccess_args *uap) 2705 { 2706 struct nlookupdata nd; 2707 int error; 2708 2709 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2710 if (error == 0) 2711 error = kern_access(&nd, uap->flags, AT_EACCESS); 2712 nlookup_done(&nd); 2713 return (error); 2714 } 2715 2716 2717 /* 2718 * faccessat_args(int fd, char *path, int amode, int flags) 2719 * 2720 * Check access permissions. 2721 */ 2722 int 2723 sys_faccessat(struct faccessat_args *uap) 2724 { 2725 struct nlookupdata nd; 2726 struct file *fp; 2727 int error; 2728 2729 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2730 NLC_FOLLOW); 2731 if (error == 0) 2732 error = kern_access(&nd, uap->amode, uap->flags); 2733 nlookup_done_at(&nd, fp); 2734 return (error); 2735 } 2736 2737 int 2738 kern_stat(struct nlookupdata *nd, struct stat *st) 2739 { 2740 int error; 2741 struct vnode *vp; 2742 2743 nd->nl_flags |= NLC_SHAREDLOCK; 2744 if ((error = nlookup(nd)) != 0) 2745 return (error); 2746 again: 2747 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2748 return (ENOENT); 2749 2750 if ((error = vget(vp, LK_SHARED)) != 0) 2751 return (error); 2752 error = vn_stat(vp, st, nd->nl_cred); 2753 2754 /* 2755 * If the file handle is stale we have to re-resolve the 2756 * entry with the ncp held exclusively. This is a hack 2757 * at the moment. 2758 */ 2759 if (error == ESTALE) { 2760 vput(vp); 2761 cache_unlock(&nd->nl_nch); 2762 cache_lock(&nd->nl_nch); 2763 cache_setunresolved(&nd->nl_nch); 2764 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2765 if (error == 0) 2766 goto again; 2767 } else { 2768 vput(vp); 2769 } 2770 return (error); 2771 } 2772 2773 /* 2774 * stat_args(char *path, struct stat *ub) 2775 * 2776 * Get file status; this version follows links. 2777 */ 2778 int 2779 sys_stat(struct stat_args *uap) 2780 { 2781 struct nlookupdata nd; 2782 struct stat st; 2783 int error; 2784 2785 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2786 if (error == 0) { 2787 error = kern_stat(&nd, &st); 2788 if (error == 0) 2789 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2790 } 2791 nlookup_done(&nd); 2792 return (error); 2793 } 2794 2795 /* 2796 * lstat_args(char *path, struct stat *ub) 2797 * 2798 * Get file status; this version does not follow links. 2799 */ 2800 int 2801 sys_lstat(struct lstat_args *uap) 2802 { 2803 struct nlookupdata nd; 2804 struct stat st; 2805 int error; 2806 2807 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2808 if (error == 0) { 2809 error = kern_stat(&nd, &st); 2810 if (error == 0) 2811 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2812 } 2813 nlookup_done(&nd); 2814 return (error); 2815 } 2816 2817 /* 2818 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2819 * 2820 * Get status of file pointed to by fd/path. 2821 */ 2822 int 2823 sys_fstatat(struct fstatat_args *uap) 2824 { 2825 struct nlookupdata nd; 2826 struct stat st; 2827 int error; 2828 int flags; 2829 struct file *fp; 2830 2831 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2832 return (EINVAL); 2833 2834 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2835 2836 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2837 UIO_USERSPACE, flags); 2838 if (error == 0) { 2839 error = kern_stat(&nd, &st); 2840 if (error == 0) 2841 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2842 } 2843 nlookup_done_at(&nd, fp); 2844 return (error); 2845 } 2846 2847 static int 2848 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 2849 { 2850 struct nlookupdata nd; 2851 struct vnode *vp; 2852 int error; 2853 2854 vp = NULL; 2855 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 2856 if (error == 0) 2857 error = nlookup(&nd); 2858 if (error == 0) 2859 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2860 nlookup_done(&nd); 2861 if (error == 0) { 2862 error = VOP_PATHCONF(vp, name, sysmsg_regp); 2863 vput(vp); 2864 } 2865 return (error); 2866 } 2867 2868 /* 2869 * pathconf_Args(char *path, int name) 2870 * 2871 * Get configurable pathname variables. 2872 */ 2873 int 2874 sys_pathconf(struct pathconf_args *uap) 2875 { 2876 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 2877 &uap->sysmsg_reg)); 2878 } 2879 2880 /* 2881 * lpathconf_Args(char *path, int name) 2882 * 2883 * Get configurable pathname variables, but don't follow symlinks. 2884 */ 2885 int 2886 sys_lpathconf(struct lpathconf_args *uap) 2887 { 2888 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 2889 } 2890 2891 /* 2892 * XXX: daver 2893 * kern_readlink isn't properly split yet. There is a copyin burried 2894 * in VOP_READLINK(). 2895 */ 2896 int 2897 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2898 { 2899 struct thread *td = curthread; 2900 struct vnode *vp; 2901 struct iovec aiov; 2902 struct uio auio; 2903 int error; 2904 2905 nd->nl_flags |= NLC_SHAREDLOCK; 2906 if ((error = nlookup(nd)) != 0) 2907 return (error); 2908 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2909 if (error) 2910 return (error); 2911 if (vp->v_type != VLNK) { 2912 error = EINVAL; 2913 } else { 2914 aiov.iov_base = buf; 2915 aiov.iov_len = count; 2916 auio.uio_iov = &aiov; 2917 auio.uio_iovcnt = 1; 2918 auio.uio_offset = 0; 2919 auio.uio_rw = UIO_READ; 2920 auio.uio_segflg = UIO_USERSPACE; 2921 auio.uio_td = td; 2922 auio.uio_resid = count; 2923 error = VOP_READLINK(vp, &auio, td->td_ucred); 2924 } 2925 vput(vp); 2926 *res = count - auio.uio_resid; 2927 return (error); 2928 } 2929 2930 /* 2931 * readlink_args(char *path, char *buf, int count) 2932 * 2933 * Return target name of a symbolic link. 2934 */ 2935 int 2936 sys_readlink(struct readlink_args *uap) 2937 { 2938 struct nlookupdata nd; 2939 int error; 2940 2941 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2942 if (error == 0) { 2943 error = kern_readlink(&nd, uap->buf, uap->count, 2944 &uap->sysmsg_result); 2945 } 2946 nlookup_done(&nd); 2947 return (error); 2948 } 2949 2950 /* 2951 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 2952 * 2953 * Return target name of a symbolic link. The path is relative to the 2954 * directory associated with fd. 2955 */ 2956 int 2957 sys_readlinkat(struct readlinkat_args *uap) 2958 { 2959 struct nlookupdata nd; 2960 struct file *fp; 2961 int error; 2962 2963 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2964 if (error == 0) { 2965 error = kern_readlink(&nd, uap->buf, uap->bufsize, 2966 &uap->sysmsg_result); 2967 } 2968 nlookup_done_at(&nd, fp); 2969 return (error); 2970 } 2971 2972 static int 2973 setfflags(struct vnode *vp, int flags) 2974 { 2975 struct thread *td = curthread; 2976 int error; 2977 struct vattr vattr; 2978 2979 /* 2980 * Prevent non-root users from setting flags on devices. When 2981 * a device is reused, users can retain ownership of the device 2982 * if they are allowed to set flags and programs assume that 2983 * chown can't fail when done as root. 2984 */ 2985 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2986 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 2987 return (error); 2988 2989 /* 2990 * note: vget is required for any operation that might mod the vnode 2991 * so VINACTIVE is properly cleared. 2992 */ 2993 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2994 VATTR_NULL(&vattr); 2995 vattr.va_flags = flags; 2996 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2997 vput(vp); 2998 } 2999 return (error); 3000 } 3001 3002 /* 3003 * chflags(char *path, int flags) 3004 * 3005 * Change flags of a file given a path name. 3006 */ 3007 int 3008 sys_chflags(struct chflags_args *uap) 3009 { 3010 struct nlookupdata nd; 3011 struct vnode *vp; 3012 int error; 3013 3014 vp = NULL; 3015 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3016 if (error == 0) 3017 error = nlookup(&nd); 3018 if (error == 0) 3019 error = ncp_writechk(&nd.nl_nch); 3020 if (error == 0) 3021 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3022 nlookup_done(&nd); 3023 if (error == 0) { 3024 error = setfflags(vp, uap->flags); 3025 vrele(vp); 3026 } 3027 return (error); 3028 } 3029 3030 /* 3031 * lchflags(char *path, int flags) 3032 * 3033 * Change flags of a file given a path name, but don't follow symlinks. 3034 */ 3035 int 3036 sys_lchflags(struct lchflags_args *uap) 3037 { 3038 struct nlookupdata nd; 3039 struct vnode *vp; 3040 int error; 3041 3042 vp = NULL; 3043 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3044 if (error == 0) 3045 error = nlookup(&nd); 3046 if (error == 0) 3047 error = ncp_writechk(&nd.nl_nch); 3048 if (error == 0) 3049 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3050 nlookup_done(&nd); 3051 if (error == 0) { 3052 error = setfflags(vp, uap->flags); 3053 vrele(vp); 3054 } 3055 return (error); 3056 } 3057 3058 /* 3059 * fchflags_args(int fd, int flags) 3060 * 3061 * Change flags of a file given a file descriptor. 3062 */ 3063 int 3064 sys_fchflags(struct fchflags_args *uap) 3065 { 3066 struct thread *td = curthread; 3067 struct proc *p = td->td_proc; 3068 struct file *fp; 3069 int error; 3070 3071 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3072 return (error); 3073 if (fp->f_nchandle.ncp) 3074 error = ncp_writechk(&fp->f_nchandle); 3075 if (error == 0) 3076 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3077 fdrop(fp); 3078 return (error); 3079 } 3080 3081 /* 3082 * chflagsat_args(int fd, const char *path, int flags, int atflags) 3083 * change flags given a pathname relative to a filedescriptor 3084 */ 3085 int sys_chflagsat(struct chflagsat_args *uap) 3086 { 3087 struct nlookupdata nd; 3088 struct vnode *vp; 3089 struct file *fp; 3090 int error; 3091 int lookupflags; 3092 3093 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3094 return (EINVAL); 3095 3096 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3097 3098 vp = NULL; 3099 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3100 if (error == 0) 3101 error = nlookup(&nd); 3102 if (error == 0) 3103 error = ncp_writechk(&nd.nl_nch); 3104 if (error == 0) 3105 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3106 nlookup_done_at(&nd, fp); 3107 if (error == 0) { 3108 error = setfflags(vp, uap->flags); 3109 vrele(vp); 3110 } 3111 return (error); 3112 } 3113 3114 3115 static int 3116 setfmode(struct vnode *vp, int mode) 3117 { 3118 struct thread *td = curthread; 3119 int error; 3120 struct vattr vattr; 3121 3122 /* 3123 * note: vget is required for any operation that might mod the vnode 3124 * so VINACTIVE is properly cleared. 3125 */ 3126 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3127 VATTR_NULL(&vattr); 3128 vattr.va_mode = mode & ALLPERMS; 3129 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3130 vput(vp); 3131 } 3132 return error; 3133 } 3134 3135 int 3136 kern_chmod(struct nlookupdata *nd, int mode) 3137 { 3138 struct vnode *vp; 3139 int error; 3140 3141 if ((error = nlookup(nd)) != 0) 3142 return (error); 3143 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3144 return (error); 3145 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3146 error = setfmode(vp, mode); 3147 vrele(vp); 3148 return (error); 3149 } 3150 3151 /* 3152 * chmod_args(char *path, int mode) 3153 * 3154 * Change mode of a file given path name. 3155 */ 3156 int 3157 sys_chmod(struct chmod_args *uap) 3158 { 3159 struct nlookupdata nd; 3160 int error; 3161 3162 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3163 if (error == 0) 3164 error = kern_chmod(&nd, uap->mode); 3165 nlookup_done(&nd); 3166 return (error); 3167 } 3168 3169 /* 3170 * lchmod_args(char *path, int mode) 3171 * 3172 * Change mode of a file given path name (don't follow links.) 3173 */ 3174 int 3175 sys_lchmod(struct lchmod_args *uap) 3176 { 3177 struct nlookupdata nd; 3178 int error; 3179 3180 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3181 if (error == 0) 3182 error = kern_chmod(&nd, uap->mode); 3183 nlookup_done(&nd); 3184 return (error); 3185 } 3186 3187 /* 3188 * fchmod_args(int fd, int mode) 3189 * 3190 * Change mode of a file given a file descriptor. 3191 */ 3192 int 3193 sys_fchmod(struct fchmod_args *uap) 3194 { 3195 struct thread *td = curthread; 3196 struct proc *p = td->td_proc; 3197 struct file *fp; 3198 int error; 3199 3200 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3201 return (error); 3202 if (fp->f_nchandle.ncp) 3203 error = ncp_writechk(&fp->f_nchandle); 3204 if (error == 0) 3205 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3206 fdrop(fp); 3207 return (error); 3208 } 3209 3210 /* 3211 * fchmodat_args(char *path, int mode) 3212 * 3213 * Change mode of a file pointed to by fd/path. 3214 */ 3215 int 3216 sys_fchmodat(struct fchmodat_args *uap) 3217 { 3218 struct nlookupdata nd; 3219 struct file *fp; 3220 int error; 3221 int flags; 3222 3223 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3224 return (EINVAL); 3225 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3226 3227 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3228 UIO_USERSPACE, flags); 3229 if (error == 0) 3230 error = kern_chmod(&nd, uap->mode); 3231 nlookup_done_at(&nd, fp); 3232 return (error); 3233 } 3234 3235 static int 3236 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3237 { 3238 struct thread *td = curthread; 3239 int error; 3240 struct vattr vattr; 3241 uid_t o_uid; 3242 gid_t o_gid; 3243 uint64_t size; 3244 3245 /* 3246 * note: vget is required for any operation that might mod the vnode 3247 * so VINACTIVE is properly cleared. 3248 */ 3249 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3250 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3251 return error; 3252 o_uid = vattr.va_uid; 3253 o_gid = vattr.va_gid; 3254 size = vattr.va_size; 3255 3256 VATTR_NULL(&vattr); 3257 vattr.va_uid = uid; 3258 vattr.va_gid = gid; 3259 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3260 vput(vp); 3261 } 3262 3263 if (error == 0) { 3264 if (uid == -1) 3265 uid = o_uid; 3266 if (gid == -1) 3267 gid = o_gid; 3268 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3269 VFS_ACCOUNT(mp, uid, gid, size); 3270 } 3271 3272 return error; 3273 } 3274 3275 int 3276 kern_chown(struct nlookupdata *nd, int uid, int gid) 3277 { 3278 struct vnode *vp; 3279 int error; 3280 3281 if ((error = nlookup(nd)) != 0) 3282 return (error); 3283 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3284 return (error); 3285 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3286 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3287 vrele(vp); 3288 return (error); 3289 } 3290 3291 /* 3292 * chown(char *path, int uid, int gid) 3293 * 3294 * Set ownership given a path name. 3295 */ 3296 int 3297 sys_chown(struct chown_args *uap) 3298 { 3299 struct nlookupdata nd; 3300 int error; 3301 3302 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3303 if (error == 0) 3304 error = kern_chown(&nd, uap->uid, uap->gid); 3305 nlookup_done(&nd); 3306 return (error); 3307 } 3308 3309 /* 3310 * lchown_args(char *path, int uid, int gid) 3311 * 3312 * Set ownership given a path name, do not cross symlinks. 3313 */ 3314 int 3315 sys_lchown(struct lchown_args *uap) 3316 { 3317 struct nlookupdata nd; 3318 int error; 3319 3320 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3321 if (error == 0) 3322 error = kern_chown(&nd, uap->uid, uap->gid); 3323 nlookup_done(&nd); 3324 return (error); 3325 } 3326 3327 /* 3328 * fchown_args(int fd, int uid, int gid) 3329 * 3330 * Set ownership given a file descriptor. 3331 */ 3332 int 3333 sys_fchown(struct fchown_args *uap) 3334 { 3335 struct thread *td = curthread; 3336 struct proc *p = td->td_proc; 3337 struct file *fp; 3338 int error; 3339 3340 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3341 return (error); 3342 if (fp->f_nchandle.ncp) 3343 error = ncp_writechk(&fp->f_nchandle); 3344 if (error == 0) 3345 error = setfown(p->p_fd->fd_ncdir.mount, 3346 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3347 fdrop(fp); 3348 return (error); 3349 } 3350 3351 /* 3352 * fchownat(int fd, char *path, int uid, int gid, int flags) 3353 * 3354 * Set ownership of file pointed to by fd/path. 3355 */ 3356 int 3357 sys_fchownat(struct fchownat_args *uap) 3358 { 3359 struct nlookupdata nd; 3360 struct file *fp; 3361 int error; 3362 int flags; 3363 3364 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3365 return (EINVAL); 3366 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3367 3368 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3369 UIO_USERSPACE, flags); 3370 if (error == 0) 3371 error = kern_chown(&nd, uap->uid, uap->gid); 3372 nlookup_done_at(&nd, fp); 3373 return (error); 3374 } 3375 3376 3377 static int 3378 getutimes(struct timeval *tvp, struct timespec *tsp) 3379 { 3380 struct timeval tv[2]; 3381 int error; 3382 3383 if (tvp == NULL) { 3384 microtime(&tv[0]); 3385 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3386 tsp[1] = tsp[0]; 3387 } else { 3388 if ((error = itimerfix(tvp)) != 0) 3389 return (error); 3390 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3391 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3392 } 3393 return 0; 3394 } 3395 3396 static int 3397 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3398 { 3399 struct timespec tsnow; 3400 int error; 3401 3402 *nullflag = 0; 3403 nanotime(&tsnow); 3404 if (ts == NULL) { 3405 newts[0] = tsnow; 3406 newts[1] = tsnow; 3407 *nullflag = 1; 3408 return (0); 3409 } 3410 3411 newts[0] = ts[0]; 3412 newts[1] = ts[1]; 3413 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3414 return (0); 3415 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3416 *nullflag = 1; 3417 3418 if (newts[0].tv_nsec == UTIME_OMIT) 3419 newts[0].tv_sec = VNOVAL; 3420 else if (newts[0].tv_nsec == UTIME_NOW) 3421 newts[0] = tsnow; 3422 else if ((error = itimespecfix(&newts[0])) != 0) 3423 return (error); 3424 3425 if (newts[1].tv_nsec == UTIME_OMIT) 3426 newts[1].tv_sec = VNOVAL; 3427 else if (newts[1].tv_nsec == UTIME_NOW) 3428 newts[1] = tsnow; 3429 else if ((error = itimespecfix(&newts[1])) != 0) 3430 return (error); 3431 3432 return (0); 3433 } 3434 3435 static int 3436 setutimes(struct vnode *vp, struct vattr *vattr, 3437 const struct timespec *ts, int nullflag) 3438 { 3439 struct thread *td = curthread; 3440 int error; 3441 3442 VATTR_NULL(vattr); 3443 vattr->va_atime = ts[0]; 3444 vattr->va_mtime = ts[1]; 3445 if (nullflag) 3446 vattr->va_vaflags |= VA_UTIMES_NULL; 3447 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3448 3449 return error; 3450 } 3451 3452 int 3453 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3454 { 3455 struct timespec ts[2]; 3456 int error; 3457 3458 if (tptr) { 3459 if ((error = getutimes(tptr, ts)) != 0) 3460 return (error); 3461 } 3462 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3463 return (error); 3464 } 3465 3466 /* 3467 * utimes_args(char *path, struct timeval *tptr) 3468 * 3469 * Set the access and modification times of a file. 3470 */ 3471 int 3472 sys_utimes(struct utimes_args *uap) 3473 { 3474 struct timeval tv[2]; 3475 struct nlookupdata nd; 3476 int error; 3477 3478 if (uap->tptr) { 3479 error = copyin(uap->tptr, tv, sizeof(tv)); 3480 if (error) 3481 return (error); 3482 } 3483 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3484 if (error == 0) 3485 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3486 nlookup_done(&nd); 3487 return (error); 3488 } 3489 3490 /* 3491 * lutimes_args(char *path, struct timeval *tptr) 3492 * 3493 * Set the access and modification times of a file. 3494 */ 3495 int 3496 sys_lutimes(struct lutimes_args *uap) 3497 { 3498 struct timeval tv[2]; 3499 struct nlookupdata nd; 3500 int error; 3501 3502 if (uap->tptr) { 3503 error = copyin(uap->tptr, tv, sizeof(tv)); 3504 if (error) 3505 return (error); 3506 } 3507 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3508 if (error == 0) 3509 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3510 nlookup_done(&nd); 3511 return (error); 3512 } 3513 3514 /* 3515 * Set utimes on a file descriptor. The creds used to open the 3516 * file are used to determine whether the operation is allowed 3517 * or not. 3518 */ 3519 int 3520 kern_futimens(int fd, struct timespec *ts) 3521 { 3522 struct thread *td = curthread; 3523 struct proc *p = td->td_proc; 3524 struct timespec newts[2]; 3525 struct file *fp; 3526 struct vnode *vp; 3527 struct vattr vattr; 3528 int nullflag; 3529 int error; 3530 3531 error = getutimens(ts, newts, &nullflag); 3532 if (error) 3533 return (error); 3534 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3535 return (error); 3536 if (fp->f_nchandle.ncp) 3537 error = ncp_writechk(&fp->f_nchandle); 3538 if (error == 0) { 3539 vp = fp->f_data; 3540 error = vget(vp, LK_EXCLUSIVE); 3541 if (error == 0) { 3542 error = VOP_GETATTR(vp, &vattr); 3543 if (error == 0) { 3544 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3545 fp->f_cred); 3546 } 3547 if (error == 0) { 3548 error = setutimes(vp, &vattr, newts, nullflag); 3549 } 3550 vput(vp); 3551 } 3552 } 3553 fdrop(fp); 3554 return (error); 3555 } 3556 3557 /* 3558 * futimens_args(int fd, struct timespec *ts) 3559 * 3560 * Set the access and modification times of a file. 3561 */ 3562 int 3563 sys_futimens(struct futimens_args *uap) 3564 { 3565 struct timespec ts[2]; 3566 int error; 3567 3568 if (uap->ts) { 3569 error = copyin(uap->ts, ts, sizeof(ts)); 3570 if (error) 3571 return (error); 3572 } 3573 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3574 return (error); 3575 } 3576 3577 int 3578 kern_futimes(int fd, struct timeval *tptr) 3579 { 3580 struct timespec ts[2]; 3581 int error; 3582 3583 if (tptr) { 3584 if ((error = getutimes(tptr, ts)) != 0) 3585 return (error); 3586 } 3587 error = kern_futimens(fd, tptr ? ts : NULL); 3588 return (error); 3589 } 3590 3591 /* 3592 * futimes_args(int fd, struct timeval *tptr) 3593 * 3594 * Set the access and modification times of a file. 3595 */ 3596 int 3597 sys_futimes(struct futimes_args *uap) 3598 { 3599 struct timeval tv[2]; 3600 int error; 3601 3602 if (uap->tptr) { 3603 error = copyin(uap->tptr, tv, sizeof(tv)); 3604 if (error) 3605 return (error); 3606 } 3607 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3608 return (error); 3609 } 3610 3611 int 3612 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3613 { 3614 struct timespec newts[2]; 3615 struct vnode *vp; 3616 struct vattr vattr; 3617 int nullflag; 3618 int error; 3619 3620 if (flags & ~AT_SYMLINK_NOFOLLOW) 3621 return (EINVAL); 3622 3623 error = getutimens(ts, newts, &nullflag); 3624 if (error) 3625 return (error); 3626 3627 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3628 if ((error = nlookup(nd)) != 0) 3629 return (error); 3630 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3631 return (error); 3632 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3633 return (error); 3634 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3635 error = vget(vp, LK_EXCLUSIVE); 3636 if (error == 0) { 3637 error = setutimes(vp, &vattr, newts, nullflag); 3638 vput(vp); 3639 } 3640 } 3641 vrele(vp); 3642 return (error); 3643 } 3644 3645 /* 3646 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3647 * 3648 * Set file access and modification times of a file. 3649 */ 3650 int 3651 sys_utimensat(struct utimensat_args *uap) 3652 { 3653 struct timespec ts[2]; 3654 struct nlookupdata nd; 3655 struct file *fp; 3656 int error; 3657 int flags; 3658 3659 if (uap->ts) { 3660 error = copyin(uap->ts, ts, sizeof(ts)); 3661 if (error) 3662 return (error); 3663 } 3664 3665 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3666 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3667 UIO_USERSPACE, flags); 3668 if (error == 0) 3669 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3670 nlookup_done_at(&nd, fp); 3671 return (error); 3672 } 3673 3674 int 3675 kern_truncate(struct nlookupdata *nd, off_t length) 3676 { 3677 struct vnode *vp; 3678 struct vattr vattr; 3679 int error; 3680 uid_t uid = 0; 3681 gid_t gid = 0; 3682 uint64_t old_size = 0; 3683 3684 if (length < 0) 3685 return(EINVAL); 3686 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3687 if ((error = nlookup(nd)) != 0) 3688 return (error); 3689 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3690 return (error); 3691 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3692 return (error); 3693 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3694 if (error) { 3695 vrele(vp); 3696 return (error); 3697 } 3698 if (vp->v_type == VDIR) { 3699 error = EISDIR; 3700 goto done; 3701 } 3702 if (vfs_quota_enabled) { 3703 error = VOP_GETATTR(vp, &vattr); 3704 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3705 uid = vattr.va_uid; 3706 gid = vattr.va_gid; 3707 old_size = vattr.va_size; 3708 } 3709 3710 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3711 VATTR_NULL(&vattr); 3712 vattr.va_size = length; 3713 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3714 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3715 } 3716 done: 3717 vput(vp); 3718 return (error); 3719 } 3720 3721 /* 3722 * truncate(char *path, int pad, off_t length) 3723 * 3724 * Truncate a file given its path name. 3725 */ 3726 int 3727 sys_truncate(struct truncate_args *uap) 3728 { 3729 struct nlookupdata nd; 3730 int error; 3731 3732 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3733 if (error == 0) 3734 error = kern_truncate(&nd, uap->length); 3735 nlookup_done(&nd); 3736 return error; 3737 } 3738 3739 int 3740 kern_ftruncate(int fd, off_t length) 3741 { 3742 struct thread *td = curthread; 3743 struct proc *p = td->td_proc; 3744 struct vattr vattr; 3745 struct vnode *vp; 3746 struct file *fp; 3747 int error; 3748 uid_t uid = 0; 3749 gid_t gid = 0; 3750 uint64_t old_size = 0; 3751 struct mount *mp; 3752 3753 if (length < 0) 3754 return(EINVAL); 3755 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3756 return (error); 3757 if (fp->f_nchandle.ncp) { 3758 error = ncp_writechk(&fp->f_nchandle); 3759 if (error) 3760 goto done; 3761 } 3762 if ((fp->f_flag & FWRITE) == 0) { 3763 error = EINVAL; 3764 goto done; 3765 } 3766 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3767 error = EINVAL; 3768 goto done; 3769 } 3770 vp = (struct vnode *)fp->f_data; 3771 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3772 if (vp->v_type == VDIR) { 3773 error = EISDIR; 3774 vn_unlock(vp); 3775 goto done; 3776 } 3777 3778 if (vfs_quota_enabled) { 3779 error = VOP_GETATTR(vp, &vattr); 3780 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 3781 uid = vattr.va_uid; 3782 gid = vattr.va_gid; 3783 old_size = vattr.va_size; 3784 } 3785 3786 if ((error = vn_writechk(vp, NULL)) == 0) { 3787 VATTR_NULL(&vattr); 3788 vattr.va_size = length; 3789 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3790 mp = vq_vptomp(vp); 3791 VFS_ACCOUNT(mp, uid, gid, length - old_size); 3792 } 3793 vn_unlock(vp); 3794 done: 3795 fdrop(fp); 3796 return (error); 3797 } 3798 3799 /* 3800 * ftruncate_args(int fd, int pad, off_t length) 3801 * 3802 * Truncate a file given a file descriptor. 3803 */ 3804 int 3805 sys_ftruncate(struct ftruncate_args *uap) 3806 { 3807 int error; 3808 3809 error = kern_ftruncate(uap->fd, uap->length); 3810 3811 return (error); 3812 } 3813 3814 /* 3815 * fsync(int fd) 3816 * 3817 * Sync an open file. 3818 */ 3819 int 3820 sys_fsync(struct fsync_args *uap) 3821 { 3822 struct thread *td = curthread; 3823 struct proc *p = td->td_proc; 3824 struct vnode *vp; 3825 struct file *fp; 3826 vm_object_t obj; 3827 int error; 3828 3829 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3830 return (error); 3831 vp = (struct vnode *)fp->f_data; 3832 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3833 if ((obj = vp->v_object) != NULL) { 3834 if (vp->v_mount == NULL || 3835 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 3836 vm_object_page_clean(obj, 0, 0, 0); 3837 } 3838 } 3839 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3840 if (error == 0 && vp->v_mount) 3841 error = buf_fsync(vp); 3842 vn_unlock(vp); 3843 fdrop(fp); 3844 3845 return (error); 3846 } 3847 3848 int 3849 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3850 { 3851 struct nchandle fnchd; 3852 struct nchandle tnchd; 3853 struct namecache *ncp; 3854 struct vnode *fdvp; 3855 struct vnode *tdvp; 3856 struct mount *mp; 3857 int error; 3858 u_int fncp_gen; 3859 u_int tncp_gen; 3860 3861 bwillinode(1); 3862 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3863 if ((error = nlookup(fromnd)) != 0) 3864 return (error); 3865 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3866 return (ENOENT); 3867 fnchd.mount = fromnd->nl_nch.mount; 3868 cache_hold(&fnchd); 3869 3870 /* 3871 * unlock the source nch so we can lookup the target nch without 3872 * deadlocking. The target may or may not exist so we do not check 3873 * for a target vp like kern_mkdir() and other creation functions do. 3874 * 3875 * The source and target directories are ref'd and rechecked after 3876 * everything is relocked to determine if the source or target file 3877 * has been renamed. 3878 */ 3879 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3880 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3881 3882 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 3883 3884 cache_unlock(&fromnd->nl_nch); 3885 3886 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 3887 if ((error = nlookup(tond)) != 0) { 3888 cache_drop(&fnchd); 3889 return (error); 3890 } 3891 tncp_gen = tond->nl_nch.ncp->nc_generation; 3892 3893 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3894 cache_drop(&fnchd); 3895 return (ENOENT); 3896 } 3897 tnchd.mount = tond->nl_nch.mount; 3898 cache_hold(&tnchd); 3899 3900 /* 3901 * If the source and target are the same there is nothing to do 3902 */ 3903 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3904 cache_drop(&fnchd); 3905 cache_drop(&tnchd); 3906 return (0); 3907 } 3908 3909 /* 3910 * Mount points cannot be renamed or overwritten 3911 */ 3912 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3913 NCF_ISMOUNTPT 3914 ) { 3915 cache_drop(&fnchd); 3916 cache_drop(&tnchd); 3917 return (EINVAL); 3918 } 3919 3920 /* 3921 * Relock the source ncp. cache_relock() will deal with any 3922 * deadlocks against the already-locked tond and will also 3923 * make sure both are resolved. 3924 * 3925 * NOTE AFTER RELOCKING: The source or target ncp may have become 3926 * invalid while they were unlocked, nc_vp and nc_mount could 3927 * be NULL. 3928 */ 3929 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 3930 &tond->nl_nch, tond->nl_cred); 3931 fromnd->nl_flags |= NLC_NCPISLOCKED; 3932 3933 /* 3934 * If the namecache generation changed for either fromnd or tond, 3935 * we must retry. 3936 */ 3937 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 3938 tond->nl_nch.ncp->nc_generation != tncp_gen) { 3939 kprintf("kern_rename: retry due to gen on: " 3940 "\"%s\" -> \"%s\"\n", 3941 fromnd->nl_nch.ncp->nc_name, 3942 tond->nl_nch.ncp->nc_name); 3943 cache_drop(&fnchd); 3944 cache_drop(&tnchd); 3945 return (EAGAIN); 3946 } 3947 3948 /* 3949 * If either fromnd or tond are marked destroyed a ripout occured 3950 * out from under us and we must retry. 3951 */ 3952 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 3953 fromnd->nl_nch.ncp->nc_vp == NULL || 3954 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 3955 kprintf("kern_rename: retry due to ripout on: " 3956 "\"%s\" -> \"%s\"\n", 3957 fromnd->nl_nch.ncp->nc_name, 3958 tond->nl_nch.ncp->nc_name); 3959 cache_drop(&fnchd); 3960 cache_drop(&tnchd); 3961 return (EAGAIN); 3962 } 3963 3964 /* 3965 * Make sure the parent directories linkages are the same. 3966 * XXX shouldn't be needed any more w/ generation check above. 3967 */ 3968 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3969 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3970 cache_drop(&fnchd); 3971 cache_drop(&tnchd); 3972 return (ENOENT); 3973 } 3974 3975 /* 3976 * Both the source and target must be within the same filesystem and 3977 * in the same filesystem as their parent directories within the 3978 * namecache topology. 3979 * 3980 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3981 */ 3982 mp = fnchd.mount; 3983 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3984 mp != tond->nl_nch.mount) { 3985 cache_drop(&fnchd); 3986 cache_drop(&tnchd); 3987 return (EXDEV); 3988 } 3989 3990 /* 3991 * Make sure the mount point is writable 3992 */ 3993 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3994 cache_drop(&fnchd); 3995 cache_drop(&tnchd); 3996 return (error); 3997 } 3998 3999 /* 4000 * If the target exists and either the source or target is a directory, 4001 * then both must be directories. 4002 * 4003 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4004 * have become NULL. 4005 */ 4006 if (tond->nl_nch.ncp->nc_vp) { 4007 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4008 error = ENOENT; 4009 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4010 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4011 error = ENOTDIR; 4012 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4013 error = EISDIR; 4014 } 4015 } 4016 4017 /* 4018 * You cannot rename a source into itself or a subdirectory of itself. 4019 * We check this by travsersing the target directory upwards looking 4020 * for a match against the source. 4021 * 4022 * XXX MPSAFE 4023 */ 4024 if (error == 0) { 4025 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4026 if (fromnd->nl_nch.ncp == ncp) { 4027 error = EINVAL; 4028 break; 4029 } 4030 } 4031 } 4032 4033 cache_drop(&fnchd); 4034 cache_drop(&tnchd); 4035 4036 /* 4037 * Even though the namespaces are different, they may still represent 4038 * hardlinks to the same file. The filesystem might have a hard time 4039 * with this so we issue a NREMOVE of the source instead of a NRENAME 4040 * when we detect the situation. 4041 */ 4042 if (error == 0) { 4043 fdvp = fromnd->nl_dvp; 4044 tdvp = tond->nl_dvp; 4045 if (fdvp == NULL || tdvp == NULL) { 4046 error = EPERM; 4047 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4048 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4049 fromnd->nl_cred); 4050 } else { 4051 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4052 fdvp, tdvp, tond->nl_cred); 4053 } 4054 } 4055 return (error); 4056 } 4057 4058 /* 4059 * rename_args(char *from, char *to) 4060 * 4061 * Rename files. Source and destination must either both be directories, 4062 * or both not be directories. If target is a directory, it must be empty. 4063 */ 4064 int 4065 sys_rename(struct rename_args *uap) 4066 { 4067 struct nlookupdata fromnd, tond; 4068 int error; 4069 4070 do { 4071 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4072 if (error == 0) { 4073 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4074 if (error == 0) 4075 error = kern_rename(&fromnd, &tond); 4076 nlookup_done(&tond); 4077 } 4078 nlookup_done(&fromnd); 4079 } while (error == EAGAIN); 4080 return (error); 4081 } 4082 4083 /* 4084 * renameat_args(int oldfd, char *old, int newfd, char *new) 4085 * 4086 * Rename files using paths relative to the directories associated with 4087 * oldfd and newfd. Source and destination must either both be directories, 4088 * or both not be directories. If target is a directory, it must be empty. 4089 */ 4090 int 4091 sys_renameat(struct renameat_args *uap) 4092 { 4093 struct nlookupdata oldnd, newnd; 4094 struct file *oldfp, *newfp; 4095 int error; 4096 4097 do { 4098 error = nlookup_init_at(&oldnd, &oldfp, 4099 uap->oldfd, uap->old, 4100 UIO_USERSPACE, 0); 4101 if (error == 0) { 4102 error = nlookup_init_at(&newnd, &newfp, 4103 uap->newfd, uap->new, 4104 UIO_USERSPACE, 0); 4105 if (error == 0) 4106 error = kern_rename(&oldnd, &newnd); 4107 nlookup_done_at(&newnd, newfp); 4108 } 4109 nlookup_done_at(&oldnd, oldfp); 4110 } while (error == EAGAIN); 4111 return (error); 4112 } 4113 4114 int 4115 kern_mkdir(struct nlookupdata *nd, int mode) 4116 { 4117 struct thread *td = curthread; 4118 struct proc *p = td->td_proc; 4119 struct vnode *vp; 4120 struct vattr vattr; 4121 int error; 4122 4123 bwillinode(1); 4124 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4125 if ((error = nlookup(nd)) != 0) 4126 return (error); 4127 4128 if (nd->nl_nch.ncp->nc_vp) 4129 return (EEXIST); 4130 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4131 return (error); 4132 VATTR_NULL(&vattr); 4133 vattr.va_type = VDIR; 4134 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4135 4136 vp = NULL; 4137 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4138 if (error == 0) 4139 vput(vp); 4140 return (error); 4141 } 4142 4143 /* 4144 * mkdir_args(char *path, int mode) 4145 * 4146 * Make a directory file. 4147 */ 4148 int 4149 sys_mkdir(struct mkdir_args *uap) 4150 { 4151 struct nlookupdata nd; 4152 int error; 4153 4154 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4155 if (error == 0) 4156 error = kern_mkdir(&nd, uap->mode); 4157 nlookup_done(&nd); 4158 return (error); 4159 } 4160 4161 /* 4162 * mkdirat_args(int fd, char *path, mode_t mode) 4163 * 4164 * Make a directory file. The path is relative to the directory associated 4165 * with fd. 4166 */ 4167 int 4168 sys_mkdirat(struct mkdirat_args *uap) 4169 { 4170 struct nlookupdata nd; 4171 struct file *fp; 4172 int error; 4173 4174 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4175 if (error == 0) 4176 error = kern_mkdir(&nd, uap->mode); 4177 nlookup_done_at(&nd, fp); 4178 return (error); 4179 } 4180 4181 int 4182 kern_rmdir(struct nlookupdata *nd) 4183 { 4184 int error; 4185 4186 bwillinode(1); 4187 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4188 if ((error = nlookup(nd)) != 0) 4189 return (error); 4190 4191 /* 4192 * Do not allow directories representing mount points to be 4193 * deleted, even if empty. Check write perms on mount point 4194 * in case the vnode is aliased (aka nullfs). 4195 */ 4196 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4197 return (EBUSY); 4198 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4199 return (error); 4200 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4201 return (error); 4202 } 4203 4204 /* 4205 * rmdir_args(char *path) 4206 * 4207 * Remove a directory file. 4208 */ 4209 int 4210 sys_rmdir(struct rmdir_args *uap) 4211 { 4212 struct nlookupdata nd; 4213 int error; 4214 4215 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4216 if (error == 0) 4217 error = kern_rmdir(&nd); 4218 nlookup_done(&nd); 4219 return (error); 4220 } 4221 4222 int 4223 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4224 enum uio_seg direction) 4225 { 4226 struct thread *td = curthread; 4227 struct proc *p = td->td_proc; 4228 struct vnode *vp; 4229 struct file *fp; 4230 struct uio auio; 4231 struct iovec aiov; 4232 off_t loff; 4233 int error, eofflag; 4234 4235 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 4236 return (error); 4237 if ((fp->f_flag & FREAD) == 0) { 4238 error = EBADF; 4239 goto done; 4240 } 4241 vp = (struct vnode *)fp->f_data; 4242 if (vp->v_type != VDIR) { 4243 error = EINVAL; 4244 goto done; 4245 } 4246 aiov.iov_base = buf; 4247 aiov.iov_len = count; 4248 auio.uio_iov = &aiov; 4249 auio.uio_iovcnt = 1; 4250 auio.uio_rw = UIO_READ; 4251 auio.uio_segflg = direction; 4252 auio.uio_td = td; 4253 auio.uio_resid = count; 4254 loff = auio.uio_offset = fp->f_offset; 4255 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 4256 fp->f_offset = auio.uio_offset; 4257 if (error) 4258 goto done; 4259 4260 /* 4261 * WARNING! *basep may not be wide enough to accomodate the 4262 * seek offset. XXX should we hack this to return the upper 32 bits 4263 * for offsets greater then 4G? 4264 */ 4265 if (basep) { 4266 *basep = (long)loff; 4267 } 4268 *res = count - auio.uio_resid; 4269 done: 4270 fdrop(fp); 4271 return (error); 4272 } 4273 4274 /* 4275 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4276 * 4277 * Read a block of directory entries in a file system independent format. 4278 */ 4279 int 4280 sys_getdirentries(struct getdirentries_args *uap) 4281 { 4282 long base; 4283 int error; 4284 4285 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4286 &uap->sysmsg_result, UIO_USERSPACE); 4287 4288 if (error == 0 && uap->basep) 4289 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4290 return (error); 4291 } 4292 4293 /* 4294 * getdents_args(int fd, char *buf, size_t count) 4295 */ 4296 int 4297 sys_getdents(struct getdents_args *uap) 4298 { 4299 int error; 4300 4301 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4302 &uap->sysmsg_result, UIO_USERSPACE); 4303 4304 return (error); 4305 } 4306 4307 /* 4308 * Set the mode mask for creation of filesystem nodes. 4309 * 4310 * umask(int newmask) 4311 */ 4312 int 4313 sys_umask(struct umask_args *uap) 4314 { 4315 struct thread *td = curthread; 4316 struct proc *p = td->td_proc; 4317 struct filedesc *fdp; 4318 4319 fdp = p->p_fd; 4320 uap->sysmsg_result = fdp->fd_cmask; 4321 fdp->fd_cmask = uap->newmask & ALLPERMS; 4322 return (0); 4323 } 4324 4325 /* 4326 * revoke(char *path) 4327 * 4328 * Void all references to file by ripping underlying filesystem 4329 * away from vnode. 4330 */ 4331 int 4332 sys_revoke(struct revoke_args *uap) 4333 { 4334 struct nlookupdata nd; 4335 struct vattr vattr; 4336 struct vnode *vp; 4337 struct ucred *cred; 4338 int error; 4339 4340 vp = NULL; 4341 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4342 if (error == 0) 4343 error = nlookup(&nd); 4344 if (error == 0) 4345 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4346 cred = crhold(nd.nl_cred); 4347 nlookup_done(&nd); 4348 if (error == 0) { 4349 if (error == 0) 4350 error = VOP_GETATTR(vp, &vattr); 4351 if (error == 0 && cred->cr_uid != vattr.va_uid) 4352 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4353 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4354 if (vcount(vp) > 0) 4355 error = vrevoke(vp, cred); 4356 } else if (error == 0) { 4357 error = vrevoke(vp, cred); 4358 } 4359 vrele(vp); 4360 } 4361 if (cred) 4362 crfree(cred); 4363 return (error); 4364 } 4365 4366 /* 4367 * getfh_args(char *fname, fhandle_t *fhp) 4368 * 4369 * Get (NFS) file handle 4370 * 4371 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4372 * mount. This allows nullfs mounts to be explicitly exported. 4373 * 4374 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4375 * 4376 * nullfs mounts of subdirectories are not safe. That is, it will 4377 * work, but you do not really have protection against access to 4378 * the related parent directories. 4379 */ 4380 int 4381 sys_getfh(struct getfh_args *uap) 4382 { 4383 struct thread *td = curthread; 4384 struct nlookupdata nd; 4385 fhandle_t fh; 4386 struct vnode *vp; 4387 struct mount *mp; 4388 int error; 4389 4390 /* 4391 * Must be super user 4392 */ 4393 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4394 return (error); 4395 4396 vp = NULL; 4397 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4398 if (error == 0) 4399 error = nlookup(&nd); 4400 if (error == 0) 4401 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4402 mp = nd.nl_nch.mount; 4403 nlookup_done(&nd); 4404 if (error == 0) { 4405 bzero(&fh, sizeof(fh)); 4406 fh.fh_fsid = mp->mnt_stat.f_fsid; 4407 error = VFS_VPTOFH(vp, &fh.fh_fid); 4408 vput(vp); 4409 if (error == 0) 4410 error = copyout(&fh, uap->fhp, sizeof(fh)); 4411 } 4412 return (error); 4413 } 4414 4415 /* 4416 * fhopen_args(const struct fhandle *u_fhp, int flags) 4417 * 4418 * syscall for the rpc.lockd to use to translate a NFS file handle into 4419 * an open descriptor. 4420 * 4421 * warning: do not remove the priv_check() call or this becomes one giant 4422 * security hole. 4423 */ 4424 int 4425 sys_fhopen(struct fhopen_args *uap) 4426 { 4427 struct thread *td = curthread; 4428 struct filedesc *fdp = td->td_proc->p_fd; 4429 struct mount *mp; 4430 struct vnode *vp; 4431 struct fhandle fhp; 4432 struct vattr vat; 4433 struct vattr *vap = &vat; 4434 struct flock lf; 4435 int fmode, mode, error = 0, type; 4436 struct file *nfp; 4437 struct file *fp; 4438 int indx; 4439 4440 /* 4441 * Must be super user 4442 */ 4443 error = priv_check(td, PRIV_ROOT); 4444 if (error) 4445 return (error); 4446 4447 fmode = FFLAGS(uap->flags); 4448 4449 /* 4450 * Why not allow a non-read/write open for our lockd? 4451 */ 4452 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4453 return (EINVAL); 4454 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4455 if (error) 4456 return(error); 4457 4458 /* 4459 * Find the mount point 4460 */ 4461 mp = vfs_getvfs(&fhp.fh_fsid); 4462 if (mp == NULL) { 4463 error = ESTALE; 4464 goto done; 4465 } 4466 /* now give me my vnode, it gets returned to me locked */ 4467 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4468 if (error) 4469 goto done; 4470 /* 4471 * from now on we have to make sure not 4472 * to forget about the vnode 4473 * any error that causes an abort must vput(vp) 4474 * just set error = err and 'goto bad;'. 4475 */ 4476 4477 /* 4478 * from vn_open 4479 */ 4480 if (vp->v_type == VLNK) { 4481 error = EMLINK; 4482 goto bad; 4483 } 4484 if (vp->v_type == VSOCK) { 4485 error = EOPNOTSUPP; 4486 goto bad; 4487 } 4488 mode = 0; 4489 if (fmode & (FWRITE | O_TRUNC)) { 4490 if (vp->v_type == VDIR) { 4491 error = EISDIR; 4492 goto bad; 4493 } 4494 error = vn_writechk(vp, NULL); 4495 if (error) 4496 goto bad; 4497 mode |= VWRITE; 4498 } 4499 if (fmode & FREAD) 4500 mode |= VREAD; 4501 if (mode) { 4502 error = VOP_ACCESS(vp, mode, td->td_ucred); 4503 if (error) 4504 goto bad; 4505 } 4506 if (fmode & O_TRUNC) { 4507 vn_unlock(vp); /* XXX */ 4508 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4509 VATTR_NULL(vap); 4510 vap->va_size = 0; 4511 error = VOP_SETATTR(vp, vap, td->td_ucred); 4512 if (error) 4513 goto bad; 4514 } 4515 4516 /* 4517 * VOP_OPEN needs the file pointer so it can potentially override 4518 * it. 4519 * 4520 * WARNING! no f_nchandle will be associated when fhopen()ing a 4521 * directory. XXX 4522 */ 4523 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4524 goto bad; 4525 fp = nfp; 4526 4527 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4528 if (error) { 4529 /* 4530 * setting f_ops this way prevents VOP_CLOSE from being 4531 * called or fdrop() releasing the vp from v_data. Since 4532 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4533 */ 4534 fp->f_ops = &badfileops; 4535 fp->f_data = NULL; 4536 goto bad_drop; 4537 } 4538 4539 /* 4540 * The fp is given its own reference, we still have our ref and lock. 4541 * 4542 * Assert that all regular files must be created with a VM object. 4543 */ 4544 if (vp->v_type == VREG && vp->v_object == NULL) { 4545 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 4546 goto bad_drop; 4547 } 4548 4549 /* 4550 * The open was successful. Handle any locking requirements. 4551 */ 4552 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4553 lf.l_whence = SEEK_SET; 4554 lf.l_start = 0; 4555 lf.l_len = 0; 4556 if (fmode & O_EXLOCK) 4557 lf.l_type = F_WRLCK; 4558 else 4559 lf.l_type = F_RDLCK; 4560 if (fmode & FNONBLOCK) 4561 type = 0; 4562 else 4563 type = F_WAIT; 4564 vn_unlock(vp); 4565 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 4566 /* 4567 * release our private reference. 4568 */ 4569 fsetfd(fdp, NULL, indx); 4570 fdrop(fp); 4571 vrele(vp); 4572 goto done; 4573 } 4574 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4575 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4576 } 4577 4578 /* 4579 * Clean up. Associate the file pointer with the previously 4580 * reserved descriptor and return it. 4581 */ 4582 vput(vp); 4583 if (uap->flags & O_CLOEXEC) 4584 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4585 fsetfd(fdp, fp, indx); 4586 fdrop(fp); 4587 uap->sysmsg_result = indx; 4588 return (error); 4589 4590 bad_drop: 4591 fsetfd(fdp, NULL, indx); 4592 fdrop(fp); 4593 bad: 4594 vput(vp); 4595 done: 4596 return (error); 4597 } 4598 4599 /* 4600 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4601 */ 4602 int 4603 sys_fhstat(struct fhstat_args *uap) 4604 { 4605 struct thread *td = curthread; 4606 struct stat sb; 4607 fhandle_t fh; 4608 struct mount *mp; 4609 struct vnode *vp; 4610 int error; 4611 4612 /* 4613 * Must be super user 4614 */ 4615 error = priv_check(td, PRIV_ROOT); 4616 if (error) 4617 return (error); 4618 4619 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4620 if (error) 4621 return (error); 4622 4623 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4624 error = ESTALE; 4625 if (error == 0) { 4626 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4627 error = vn_stat(vp, &sb, td->td_ucred); 4628 vput(vp); 4629 } 4630 } 4631 if (error == 0) 4632 error = copyout(&sb, uap->sb, sizeof(sb)); 4633 return (error); 4634 } 4635 4636 /* 4637 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4638 */ 4639 int 4640 sys_fhstatfs(struct fhstatfs_args *uap) 4641 { 4642 struct thread *td = curthread; 4643 struct proc *p = td->td_proc; 4644 struct statfs *sp; 4645 struct mount *mp; 4646 struct vnode *vp; 4647 struct statfs sb; 4648 char *fullpath, *freepath; 4649 fhandle_t fh; 4650 int error; 4651 4652 /* 4653 * Must be super user 4654 */ 4655 if ((error = priv_check(td, PRIV_ROOT))) 4656 return (error); 4657 4658 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4659 return (error); 4660 4661 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4662 error = ESTALE; 4663 goto done; 4664 } 4665 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4666 error = ESTALE; 4667 goto done; 4668 } 4669 4670 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4671 goto done; 4672 mp = vp->v_mount; 4673 sp = &mp->mnt_stat; 4674 vput(vp); 4675 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4676 goto done; 4677 4678 error = mount_path(p, mp, &fullpath, &freepath); 4679 if (error) 4680 goto done; 4681 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4682 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4683 kfree(freepath, M_TEMP); 4684 4685 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4686 if (priv_check(td, PRIV_ROOT)) { 4687 bcopy(sp, &sb, sizeof(sb)); 4688 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4689 sp = &sb; 4690 } 4691 error = copyout(sp, uap->buf, sizeof(*sp)); 4692 done: 4693 return (error); 4694 } 4695 4696 /* 4697 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4698 */ 4699 int 4700 sys_fhstatvfs(struct fhstatvfs_args *uap) 4701 { 4702 struct thread *td = curthread; 4703 struct proc *p = td->td_proc; 4704 struct statvfs *sp; 4705 struct mount *mp; 4706 struct vnode *vp; 4707 fhandle_t fh; 4708 int error; 4709 4710 /* 4711 * Must be super user 4712 */ 4713 if ((error = priv_check(td, PRIV_ROOT))) 4714 return (error); 4715 4716 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4717 return (error); 4718 4719 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4720 error = ESTALE; 4721 goto done; 4722 } 4723 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4724 error = ESTALE; 4725 goto done; 4726 } 4727 4728 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4729 goto done; 4730 mp = vp->v_mount; 4731 sp = &mp->mnt_vstat; 4732 vput(vp); 4733 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4734 goto done; 4735 4736 sp->f_flag = 0; 4737 if (mp->mnt_flag & MNT_RDONLY) 4738 sp->f_flag |= ST_RDONLY; 4739 if (mp->mnt_flag & MNT_NOSUID) 4740 sp->f_flag |= ST_NOSUID; 4741 error = copyout(sp, uap->buf, sizeof(*sp)); 4742 done: 4743 return (error); 4744 } 4745 4746 4747 /* 4748 * Syscall to push extended attribute configuration information into the 4749 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4750 * a command (int cmd), and attribute name and misc data. For now, the 4751 * attribute name is left in userspace for consumption by the VFS_op. 4752 * It will probably be changed to be copied into sysspace by the 4753 * syscall in the future, once issues with various consumers of the 4754 * attribute code have raised their hands. 4755 * 4756 * Currently this is used only by UFS Extended Attributes. 4757 */ 4758 int 4759 sys_extattrctl(struct extattrctl_args *uap) 4760 { 4761 struct nlookupdata nd; 4762 struct vnode *vp; 4763 char attrname[EXTATTR_MAXNAMELEN]; 4764 int error; 4765 size_t size; 4766 4767 attrname[0] = 0; 4768 vp = NULL; 4769 error = 0; 4770 4771 if (error == 0 && uap->filename) { 4772 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4773 NLC_FOLLOW); 4774 if (error == 0) 4775 error = nlookup(&nd); 4776 if (error == 0) 4777 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4778 nlookup_done(&nd); 4779 } 4780 4781 if (error == 0 && uap->attrname) { 4782 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4783 &size); 4784 } 4785 4786 if (error == 0) { 4787 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4788 if (error == 0) 4789 error = nlookup(&nd); 4790 if (error == 0) 4791 error = ncp_writechk(&nd.nl_nch); 4792 if (error == 0) { 4793 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4794 uap->attrnamespace, 4795 uap->attrname, nd.nl_cred); 4796 } 4797 nlookup_done(&nd); 4798 } 4799 4800 return (error); 4801 } 4802 4803 /* 4804 * Syscall to get a named extended attribute on a file or directory. 4805 */ 4806 int 4807 sys_extattr_set_file(struct extattr_set_file_args *uap) 4808 { 4809 char attrname[EXTATTR_MAXNAMELEN]; 4810 struct nlookupdata nd; 4811 struct vnode *vp; 4812 struct uio auio; 4813 struct iovec aiov; 4814 int error; 4815 4816 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4817 if (error) 4818 return (error); 4819 4820 vp = NULL; 4821 4822 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4823 if (error == 0) 4824 error = nlookup(&nd); 4825 if (error == 0) 4826 error = ncp_writechk(&nd.nl_nch); 4827 if (error == 0) 4828 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4829 if (error) { 4830 nlookup_done(&nd); 4831 return (error); 4832 } 4833 4834 bzero(&auio, sizeof(auio)); 4835 aiov.iov_base = uap->data; 4836 aiov.iov_len = uap->nbytes; 4837 auio.uio_iov = &aiov; 4838 auio.uio_iovcnt = 1; 4839 auio.uio_offset = 0; 4840 auio.uio_resid = uap->nbytes; 4841 auio.uio_rw = UIO_WRITE; 4842 auio.uio_td = curthread; 4843 4844 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4845 &auio, nd.nl_cred); 4846 4847 vput(vp); 4848 nlookup_done(&nd); 4849 return (error); 4850 } 4851 4852 /* 4853 * Syscall to get a named extended attribute on a file or directory. 4854 */ 4855 int 4856 sys_extattr_get_file(struct extattr_get_file_args *uap) 4857 { 4858 char attrname[EXTATTR_MAXNAMELEN]; 4859 struct nlookupdata nd; 4860 struct uio auio; 4861 struct iovec aiov; 4862 struct vnode *vp; 4863 int error; 4864 4865 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4866 if (error) 4867 return (error); 4868 4869 vp = NULL; 4870 4871 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4872 if (error == 0) 4873 error = nlookup(&nd); 4874 if (error == 0) 4875 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 4876 if (error) { 4877 nlookup_done(&nd); 4878 return (error); 4879 } 4880 4881 bzero(&auio, sizeof(auio)); 4882 aiov.iov_base = uap->data; 4883 aiov.iov_len = uap->nbytes; 4884 auio.uio_iov = &aiov; 4885 auio.uio_iovcnt = 1; 4886 auio.uio_offset = 0; 4887 auio.uio_resid = uap->nbytes; 4888 auio.uio_rw = UIO_READ; 4889 auio.uio_td = curthread; 4890 4891 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 4892 &auio, nd.nl_cred); 4893 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 4894 4895 vput(vp); 4896 nlookup_done(&nd); 4897 return(error); 4898 } 4899 4900 /* 4901 * Syscall to delete a named extended attribute from a file or directory. 4902 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4903 */ 4904 int 4905 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4906 { 4907 char attrname[EXTATTR_MAXNAMELEN]; 4908 struct nlookupdata nd; 4909 struct vnode *vp; 4910 int error; 4911 4912 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4913 if (error) 4914 return(error); 4915 4916 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4917 if (error == 0) 4918 error = nlookup(&nd); 4919 if (error == 0) 4920 error = ncp_writechk(&nd.nl_nch); 4921 if (error == 0) { 4922 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4923 if (error == 0) { 4924 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 4925 attrname, NULL, nd.nl_cred); 4926 vput(vp); 4927 } 4928 } 4929 nlookup_done(&nd); 4930 return(error); 4931 } 4932 4933 /* 4934 * Determine if the mount is visible to the process. 4935 */ 4936 static int 4937 chroot_visible_mnt(struct mount *mp, struct proc *p) 4938 { 4939 struct nchandle nch; 4940 4941 /* 4942 * Traverse from the mount point upwards. If we hit the process 4943 * root then the mount point is visible to the process. 4944 */ 4945 nch = mp->mnt_ncmountpt; 4946 while (nch.ncp) { 4947 if (nch.mount == p->p_fd->fd_nrdir.mount && 4948 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4949 return(1); 4950 } 4951 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4952 nch = nch.mount->mnt_ncmounton; 4953 } else { 4954 nch.ncp = nch.ncp->nc_parent; 4955 } 4956 } 4957 4958 /* 4959 * If the mount point is not visible to the process, but the 4960 * process root is in a subdirectory of the mount, return 4961 * TRUE anyway. 4962 */ 4963 if (p->p_fd->fd_nrdir.mount == mp) 4964 return(1); 4965 4966 return(0); 4967 } 4968 4969