1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 #include <sys/mplock2.h> 71 72 #include <vm/vm.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 76 #include <machine/limits.h> 77 #include <machine/stdarg.h> 78 79 static void mount_warning(struct mount *mp, const char *ctl, ...) 80 __printflike(2, 3); 81 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 82 static int checkvp_chdir (struct vnode *vn, struct thread *td); 83 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 84 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 85 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 86 static int getutimes (struct timeval *, struct timespec *); 87 static int getutimens (const struct timespec *, struct timespec *, int *); 88 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 89 static int setfmode (struct vnode *, int); 90 static int setfflags (struct vnode *, int); 91 static int setutimes (struct vnode *, struct vattr *, 92 const struct timespec *, int); 93 static int usermount = 0; /* if 1, non-root can mount fs. */ 94 95 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 96 "Allow non-root users to mount filesystems"); 97 98 /* 99 * Virtual File System System Calls 100 */ 101 102 /* 103 * Mount a file system. 104 * 105 * mount_args(char *type, char *path, int flags, caddr_t data) 106 * 107 * MPALMOSTSAFE 108 */ 109 int 110 sys_mount(struct mount_args *uap) 111 { 112 struct thread *td = curthread; 113 struct vnode *vp; 114 struct nchandle nch; 115 struct mount *mp, *nullmp; 116 struct vfsconf *vfsp; 117 int error, flag = 0, flag2 = 0; 118 int hasmount; 119 struct vattr va; 120 struct nlookupdata nd; 121 char fstypename[MFSNAMELEN]; 122 struct ucred *cred; 123 124 cred = td->td_ucred; 125 if (jailed(cred)) { 126 error = EPERM; 127 goto done; 128 } 129 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 130 goto done; 131 132 /* 133 * Do not allow NFS export by non-root users. 134 */ 135 if (uap->flags & MNT_EXPORTED) { 136 error = priv_check(td, PRIV_ROOT); 137 if (error) 138 goto done; 139 } 140 /* 141 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 142 */ 143 if (priv_check(td, PRIV_ROOT)) 144 uap->flags |= MNT_NOSUID | MNT_NODEV; 145 146 /* 147 * Lookup the requested path and extract the nch and vnode. 148 */ 149 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 150 if (error == 0) { 151 if ((error = nlookup(&nd)) == 0) { 152 if (nd.nl_nch.ncp->nc_vp == NULL) 153 error = ENOENT; 154 } 155 } 156 if (error) { 157 nlookup_done(&nd); 158 goto done; 159 } 160 161 /* 162 * If the target filesystem is resolved via a nullfs mount, then 163 * nd.nl_nch.mount will be pointing to the nullfs mount structure 164 * instead of the target file system. We need it in case we are 165 * doing an update. 166 */ 167 nullmp = nd.nl_nch.mount; 168 169 /* 170 * Extract the locked+refd ncp and cleanup the nd structure 171 */ 172 nch = nd.nl_nch; 173 cache_zero(&nd.nl_nch); 174 nlookup_done(&nd); 175 176 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 177 (mp = cache_findmount(&nch)) != NULL) { 178 cache_dropmount(mp); 179 hasmount = 1; 180 } else { 181 hasmount = 0; 182 } 183 184 185 /* 186 * now we have the locked ref'd nch and unreferenced vnode. 187 */ 188 vp = nch.ncp->nc_vp; 189 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 190 cache_put(&nch); 191 goto done; 192 } 193 cache_unlock(&nch); 194 195 /* 196 * Extract the file system type. We need to know this early, to take 197 * appropriate actions if we are dealing with a nullfs. 198 */ 199 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 200 cache_drop(&nch); 201 vput(vp); 202 goto done; 203 } 204 205 /* 206 * Now we have an unlocked ref'd nch and a locked ref'd vp 207 */ 208 if (uap->flags & MNT_UPDATE) { 209 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 210 cache_drop(&nch); 211 vput(vp); 212 error = EINVAL; 213 goto done; 214 } 215 216 if (strncmp(fstypename, "null", 5) == 0) { 217 KKASSERT(nullmp); 218 mp = nullmp; 219 } else { 220 mp = vp->v_mount; 221 } 222 223 flag = mp->mnt_flag; 224 flag2 = mp->mnt_kern_flag; 225 /* 226 * We only allow the filesystem to be reloaded if it 227 * is currently mounted read-only. 228 */ 229 if ((uap->flags & MNT_RELOAD) && 230 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 231 cache_drop(&nch); 232 vput(vp); 233 error = EOPNOTSUPP; /* Needs translation */ 234 goto done; 235 } 236 /* 237 * Only root, or the user that did the original mount is 238 * permitted to update it. 239 */ 240 if (mp->mnt_stat.f_owner != cred->cr_uid && 241 (error = priv_check(td, PRIV_ROOT))) { 242 cache_drop(&nch); 243 vput(vp); 244 goto done; 245 } 246 if (vfs_busy(mp, LK_NOWAIT)) { 247 cache_drop(&nch); 248 vput(vp); 249 error = EBUSY; 250 goto done; 251 } 252 if (hasmount) { 253 cache_drop(&nch); 254 vfs_unbusy(mp); 255 vput(vp); 256 error = EBUSY; 257 goto done; 258 } 259 mp->mnt_flag |= 260 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 261 lwkt_gettoken(&mp->mnt_token); 262 vn_unlock(vp); 263 goto update; 264 } 265 266 /* 267 * If the user is not root, ensure that they own the directory 268 * onto which we are attempting to mount. 269 */ 270 if ((error = VOP_GETATTR(vp, &va)) || 271 (va.va_uid != cred->cr_uid && 272 (error = priv_check(td, PRIV_ROOT)))) { 273 cache_drop(&nch); 274 vput(vp); 275 goto done; 276 } 277 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 278 cache_drop(&nch); 279 vput(vp); 280 goto done; 281 } 282 if (vp->v_type != VDIR) { 283 cache_drop(&nch); 284 vput(vp); 285 error = ENOTDIR; 286 goto done; 287 } 288 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 289 cache_drop(&nch); 290 vput(vp); 291 error = EPERM; 292 goto done; 293 } 294 vfsp = vfsconf_find_by_name(fstypename); 295 if (vfsp == NULL) { 296 linker_file_t lf; 297 298 /* Only load modules for root (very important!) */ 299 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 300 cache_drop(&nch); 301 vput(vp); 302 goto done; 303 } 304 error = linker_load_file(fstypename, &lf); 305 if (error || lf == NULL) { 306 cache_drop(&nch); 307 vput(vp); 308 if (lf == NULL) 309 error = ENODEV; 310 goto done; 311 } 312 lf->userrefs++; 313 /* lookup again, see if the VFS was loaded */ 314 vfsp = vfsconf_find_by_name(fstypename); 315 if (vfsp == NULL) { 316 lf->userrefs--; 317 linker_file_unload(lf); 318 cache_drop(&nch); 319 vput(vp); 320 error = ENODEV; 321 goto done; 322 } 323 } 324 if (hasmount) { 325 cache_drop(&nch); 326 vput(vp); 327 error = EBUSY; 328 goto done; 329 } 330 331 /* 332 * Allocate and initialize the filesystem. 333 */ 334 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 335 mount_init(mp); 336 vfs_busy(mp, LK_NOWAIT); 337 mp->mnt_op = vfsp->vfc_vfsops; 338 mp->mnt_vfc = vfsp; 339 vfsp->vfc_refcount++; 340 mp->mnt_stat.f_type = vfsp->vfc_typenum; 341 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 342 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 343 mp->mnt_stat.f_owner = cred->cr_uid; 344 lwkt_gettoken(&mp->mnt_token); 345 vn_unlock(vp); 346 update: 347 /* 348 * (per-mount token acquired at this point) 349 * 350 * Set the mount level flags. 351 */ 352 if (uap->flags & MNT_RDONLY) 353 mp->mnt_flag |= MNT_RDONLY; 354 else if (mp->mnt_flag & MNT_RDONLY) 355 mp->mnt_kern_flag |= MNTK_WANTRDWR; 356 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 357 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 358 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 359 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 360 MNT_AUTOMOUNTED); 361 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 362 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 363 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 364 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 365 MNT_AUTOMOUNTED); 366 /* 367 * Mount the filesystem. 368 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 369 * get. 370 */ 371 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 372 if (mp->mnt_flag & MNT_UPDATE) { 373 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 374 mp->mnt_flag &= ~MNT_RDONLY; 375 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 376 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 377 if (error) { 378 mp->mnt_flag = flag; 379 mp->mnt_kern_flag = flag2; 380 } 381 lwkt_reltoken(&mp->mnt_token); 382 vfs_unbusy(mp); 383 vrele(vp); 384 cache_drop(&nch); 385 goto done; 386 } 387 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 388 389 /* 390 * Put the new filesystem on the mount list after root. The mount 391 * point gets its own mnt_ncmountpt (unless the VFS already set one 392 * up) which represents the root of the mount. The lookup code 393 * detects the mount point going forward and checks the root of 394 * the mount going backwards. 395 * 396 * It is not necessary to invalidate or purge the vnode underneath 397 * because elements under the mount will be given their own glue 398 * namecache record. 399 */ 400 if (!error) { 401 if (mp->mnt_ncmountpt.ncp == NULL) { 402 /* 403 * allocate, then unlock, but leave the ref intact 404 */ 405 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 406 cache_unlock(&mp->mnt_ncmountpt); 407 } 408 mp->mnt_ncmounton = nch; /* inherits ref */ 409 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 410 cache_ismounting(mp); 411 412 mountlist_insert(mp, MNTINS_LAST); 413 vn_unlock(vp); 414 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 415 error = vfs_allocate_syncvnode(mp); 416 lwkt_reltoken(&mp->mnt_token); 417 vfs_unbusy(mp); 418 error = VFS_START(mp, 0); 419 vrele(vp); 420 KNOTE(&fs_klist, VQ_MOUNT); 421 } else { 422 vn_syncer_thr_stop(mp); 423 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 424 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 425 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 426 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 427 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 428 mp->mnt_vfc->vfc_refcount--; 429 lwkt_reltoken(&mp->mnt_token); 430 vfs_unbusy(mp); 431 kfree(mp, M_MOUNT); 432 cache_drop(&nch); 433 vput(vp); 434 } 435 done: 436 return (error); 437 } 438 439 /* 440 * Scan all active processes to see if any of them have a current 441 * or root directory onto which the new filesystem has just been 442 * mounted. If so, replace them with the new mount point. 443 * 444 * Both old_nch and new_nch are ref'd on call but not locked. 445 * new_nch must be temporarily locked so it can be associated with the 446 * vnode representing the root of the mount point. 447 */ 448 struct checkdirs_info { 449 struct nchandle old_nch; 450 struct nchandle new_nch; 451 struct vnode *old_vp; 452 struct vnode *new_vp; 453 }; 454 455 static int checkdirs_callback(struct proc *p, void *data); 456 457 static void 458 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 459 { 460 struct checkdirs_info info; 461 struct vnode *olddp; 462 struct vnode *newdp; 463 struct mount *mp; 464 465 /* 466 * If the old mount point's vnode has a usecount of 1, it is not 467 * being held as a descriptor anywhere. 468 */ 469 olddp = old_nch->ncp->nc_vp; 470 if (olddp == NULL || VREFCNT(olddp) == 1) 471 return; 472 473 /* 474 * Force the root vnode of the new mount point to be resolved 475 * so we can update any matching processes. 476 */ 477 mp = new_nch->mount; 478 if (VFS_ROOT(mp, &newdp)) 479 panic("mount: lost mount"); 480 vn_unlock(newdp); 481 cache_lock(new_nch); 482 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 483 cache_setunresolved(new_nch); 484 cache_setvp(new_nch, newdp); 485 cache_unlock(new_nch); 486 487 /* 488 * Special handling of the root node 489 */ 490 if (rootvnode == olddp) { 491 vref(newdp); 492 vfs_cache_setroot(newdp, cache_hold(new_nch)); 493 } 494 495 /* 496 * Pass newdp separately so the callback does not have to access 497 * it via new_nch->ncp->nc_vp. 498 */ 499 info.old_nch = *old_nch; 500 info.new_nch = *new_nch; 501 info.new_vp = newdp; 502 allproc_scan(checkdirs_callback, &info); 503 vput(newdp); 504 } 505 506 /* 507 * NOTE: callback is not MP safe because the scanned process's filedesc 508 * structure can be ripped out from under us, amoung other things. 509 */ 510 static int 511 checkdirs_callback(struct proc *p, void *data) 512 { 513 struct checkdirs_info *info = data; 514 struct filedesc *fdp; 515 struct nchandle ncdrop1; 516 struct nchandle ncdrop2; 517 struct vnode *vprele1; 518 struct vnode *vprele2; 519 520 if ((fdp = p->p_fd) != NULL) { 521 cache_zero(&ncdrop1); 522 cache_zero(&ncdrop2); 523 vprele1 = NULL; 524 vprele2 = NULL; 525 526 /* 527 * MPUNSAFE - XXX fdp can be pulled out from under a 528 * foreign process. 529 * 530 * A shared filedesc is ok, we don't have to copy it 531 * because we are making this change globally. 532 */ 533 spin_lock(&fdp->fd_spin); 534 if (fdp->fd_ncdir.mount == info->old_nch.mount && 535 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 536 vprele1 = fdp->fd_cdir; 537 vref(info->new_vp); 538 fdp->fd_cdir = info->new_vp; 539 ncdrop1 = fdp->fd_ncdir; 540 cache_copy(&info->new_nch, &fdp->fd_ncdir); 541 } 542 if (fdp->fd_nrdir.mount == info->old_nch.mount && 543 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 544 vprele2 = fdp->fd_rdir; 545 vref(info->new_vp); 546 fdp->fd_rdir = info->new_vp; 547 ncdrop2 = fdp->fd_nrdir; 548 cache_copy(&info->new_nch, &fdp->fd_nrdir); 549 } 550 spin_unlock(&fdp->fd_spin); 551 if (ncdrop1.ncp) 552 cache_drop(&ncdrop1); 553 if (ncdrop2.ncp) 554 cache_drop(&ncdrop2); 555 if (vprele1) 556 vrele(vprele1); 557 if (vprele2) 558 vrele(vprele2); 559 } 560 return(0); 561 } 562 563 /* 564 * Unmount a file system. 565 * 566 * Note: unmount takes a path to the vnode mounted on as argument, 567 * not special file (as before). 568 * 569 * umount_args(char *path, int flags) 570 * 571 * MPALMOSTSAFE 572 */ 573 int 574 sys_unmount(struct unmount_args *uap) 575 { 576 struct thread *td = curthread; 577 struct proc *p __debugvar = td->td_proc; 578 struct mount *mp = NULL; 579 struct nlookupdata nd; 580 int error; 581 582 KKASSERT(p); 583 get_mplock(); 584 if (td->td_ucred->cr_prison != NULL) { 585 error = EPERM; 586 goto done; 587 } 588 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 589 goto done; 590 591 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 592 if (error == 0) 593 error = nlookup(&nd); 594 if (error) 595 goto out; 596 597 mp = nd.nl_nch.mount; 598 599 /* 600 * Only root, or the user that did the original mount is 601 * permitted to unmount this filesystem. 602 */ 603 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 604 (error = priv_check(td, PRIV_ROOT))) 605 goto out; 606 607 /* 608 * Don't allow unmounting the root file system. 609 */ 610 if (mp->mnt_flag & MNT_ROOTFS) { 611 error = EINVAL; 612 goto out; 613 } 614 615 /* 616 * Must be the root of the filesystem 617 */ 618 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 619 error = EINVAL; 620 goto out; 621 } 622 623 out: 624 nlookup_done(&nd); 625 if (error == 0) 626 error = dounmount(mp, uap->flags); 627 done: 628 rel_mplock(); 629 return (error); 630 } 631 632 /* 633 * Do the actual file system unmount. 634 */ 635 static int 636 dounmount_interlock(struct mount *mp) 637 { 638 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 639 return (EBUSY); 640 mp->mnt_kern_flag |= MNTK_UNMOUNT; 641 return(0); 642 } 643 644 static int 645 unmount_allproc_cb(struct proc *p, void *arg) 646 { 647 struct mount *mp; 648 649 if (p->p_textnch.ncp == NULL) 650 return 0; 651 652 mp = (struct mount *)arg; 653 if (p->p_textnch.mount == mp) 654 cache_drop(&p->p_textnch); 655 656 return 0; 657 } 658 659 int 660 dounmount(struct mount *mp, int flags) 661 { 662 struct namecache *ncp; 663 struct nchandle nch; 664 struct vnode *vp; 665 int error; 666 int async_flag; 667 int lflags; 668 int freeok = 1; 669 int retry; 670 671 lwkt_gettoken(&mp->mnt_token); 672 /* 673 * Exclusive access for unmounting purposes 674 */ 675 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 676 goto out; 677 678 /* 679 * Allow filesystems to detect that a forced unmount is in progress. 680 */ 681 if (flags & MNT_FORCE) 682 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 683 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 684 error = lockmgr(&mp->mnt_lock, lflags); 685 if (error) { 686 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 687 if (mp->mnt_kern_flag & MNTK_MWAIT) { 688 mp->mnt_kern_flag &= ~MNTK_MWAIT; 689 wakeup(mp); 690 } 691 goto out; 692 } 693 694 if (mp->mnt_flag & MNT_EXPUBLIC) 695 vfs_setpublicfs(NULL, NULL, NULL); 696 697 vfs_msync(mp, MNT_WAIT); 698 async_flag = mp->mnt_flag & MNT_ASYNC; 699 mp->mnt_flag &=~ MNT_ASYNC; 700 701 /* 702 * If this filesystem isn't aliasing other filesystems, 703 * try to invalidate any remaining namecache entries and 704 * check the count afterwords. 705 */ 706 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 707 cache_lock(&mp->mnt_ncmountpt); 708 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 709 cache_unlock(&mp->mnt_ncmountpt); 710 711 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 712 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 713 allproc_scan(&unmount_allproc_cb, mp); 714 } 715 716 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 717 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 718 719 if ((flags & MNT_FORCE) == 0) { 720 error = EBUSY; 721 mount_warning(mp, "Cannot unmount: " 722 "%d namecache " 723 "references still " 724 "present", 725 ncp->nc_refs - 1); 726 } else { 727 mount_warning(mp, "Forced unmount: " 728 "%d namecache " 729 "references still " 730 "present", 731 ncp->nc_refs - 1); 732 freeok = 0; 733 } 734 } 735 } 736 737 /* 738 * Decomission our special mnt_syncer vnode. This also stops 739 * the vnlru code. If we are unable to unmount we recommission 740 * the vnode. 741 * 742 * Then sync the filesystem. 743 */ 744 if ((vp = mp->mnt_syncer) != NULL) { 745 mp->mnt_syncer = NULL; 746 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 747 vrele(vp); 748 } 749 if ((mp->mnt_flag & MNT_RDONLY) == 0) 750 VFS_SYNC(mp, MNT_WAIT); 751 752 /* 753 * nchandle records ref the mount structure. Expect a count of 1 754 * (our mount->mnt_ncmountpt). 755 * 756 * Scans can get temporary refs on a mountpoint (thought really 757 * heavy duty stuff like cache_findmount() do not). 758 */ 759 for (retry = 0; retry < 10 && mp->mnt_refs != 1; ++retry) { 760 cache_unmounting(mp); 761 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 762 } 763 if (mp->mnt_refs != 1) { 764 if ((flags & MNT_FORCE) == 0) { 765 mount_warning(mp, "Cannot unmount: " 766 "%d mount refs still present", 767 mp->mnt_refs); 768 error = EBUSY; 769 } else { 770 mount_warning(mp, "Forced unmount: " 771 "%d mount refs still present", 772 mp->mnt_refs); 773 freeok = 0; 774 } 775 } 776 777 /* 778 * So far so good, sync the filesystem once more and 779 * call the VFS unmount code if the sync succeeds. 780 */ 781 if (error == 0) { 782 if (mp->mnt_flag & MNT_RDONLY) { 783 error = VFS_UNMOUNT(mp, flags); 784 } else { 785 error = VFS_SYNC(mp, MNT_WAIT); 786 if ((error == 0) || 787 (error == EOPNOTSUPP) || /* No sync */ 788 (flags & MNT_FORCE)) { 789 error = VFS_UNMOUNT(mp, flags); 790 } 791 } 792 } 793 794 /* 795 * If an error occurred we can still recover, restoring the 796 * syncer vnode and misc flags. 797 */ 798 if (error) { 799 if (mp->mnt_syncer == NULL) 800 vfs_allocate_syncvnode(mp); 801 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 802 mp->mnt_flag |= async_flag; 803 lockmgr(&mp->mnt_lock, LK_RELEASE); 804 if (mp->mnt_kern_flag & MNTK_MWAIT) { 805 mp->mnt_kern_flag &= ~MNTK_MWAIT; 806 wakeup(mp); 807 } 808 goto out; 809 } 810 /* 811 * Clean up any journals still associated with the mount after 812 * filesystem activity has ceased. 813 */ 814 journal_remove_all_journals(mp, 815 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 816 817 mountlist_remove(mp); 818 819 /* 820 * Remove any installed vnode ops here so the individual VFSs don't 821 * have to. 822 */ 823 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 824 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 825 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 826 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 827 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 828 829 if (mp->mnt_ncmountpt.ncp != NULL) { 830 nch = mp->mnt_ncmountpt; 831 cache_zero(&mp->mnt_ncmountpt); 832 cache_clrmountpt(&nch); 833 cache_drop(&nch); 834 } 835 if (mp->mnt_ncmounton.ncp != NULL) { 836 cache_unmounting(mp); 837 nch = mp->mnt_ncmounton; 838 cache_zero(&mp->mnt_ncmounton); 839 cache_clrmountpt(&nch); 840 cache_drop(&nch); 841 } 842 843 mp->mnt_vfc->vfc_refcount--; 844 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 845 panic("unmount: dangling vnode"); 846 lockmgr(&mp->mnt_lock, LK_RELEASE); 847 if (mp->mnt_kern_flag & MNTK_MWAIT) { 848 mp->mnt_kern_flag &= ~MNTK_MWAIT; 849 wakeup(mp); 850 } 851 852 /* 853 * If we reach here and freeok != 0 we must free the mount. 854 * If refs > 1 cycle and wait, just in case someone tried 855 * to busy the mount after we decided to do the unmount. 856 */ 857 if (freeok) { 858 while (mp->mnt_refs > 1) { 859 cache_unmounting(mp); 860 wakeup(mp); 861 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 862 } 863 lwkt_reltoken(&mp->mnt_token); 864 kfree(mp, M_MOUNT); 865 mp = NULL; 866 } 867 error = 0; 868 KNOTE(&fs_klist, VQ_UNMOUNT); 869 out: 870 if (mp) 871 lwkt_reltoken(&mp->mnt_token); 872 return (error); 873 } 874 875 static 876 void 877 mount_warning(struct mount *mp, const char *ctl, ...) 878 { 879 char *ptr; 880 char *buf; 881 __va_list va; 882 883 __va_start(va, ctl); 884 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 885 &ptr, &buf, 0) == 0) { 886 kprintf("unmount(%s): ", ptr); 887 kvprintf(ctl, va); 888 kprintf("\n"); 889 kfree(buf, M_TEMP); 890 } else { 891 kprintf("unmount(%p", mp); 892 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 893 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 894 kprintf("): "); 895 kvprintf(ctl, va); 896 kprintf("\n"); 897 } 898 __va_end(va); 899 } 900 901 /* 902 * Shim cache_fullpath() to handle the case where a process is chrooted into 903 * a subdirectory of a mount. In this case if the root mount matches the 904 * process root directory's mount we have to specify the process's root 905 * directory instead of the mount point, because the mount point might 906 * be above the root directory. 907 */ 908 static 909 int 910 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 911 { 912 struct nchandle *nch; 913 914 if (p && p->p_fd->fd_nrdir.mount == mp) 915 nch = &p->p_fd->fd_nrdir; 916 else 917 nch = &mp->mnt_ncmountpt; 918 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 919 } 920 921 /* 922 * Sync each mounted filesystem. 923 */ 924 925 #ifdef DEBUG 926 static int syncprt = 0; 927 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 928 #endif /* DEBUG */ 929 930 static int sync_callback(struct mount *mp, void *data); 931 932 int 933 sys_sync(struct sync_args *uap) 934 { 935 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 936 return (0); 937 } 938 939 static 940 int 941 sync_callback(struct mount *mp, void *data __unused) 942 { 943 int asyncflag; 944 945 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 946 asyncflag = mp->mnt_flag & MNT_ASYNC; 947 mp->mnt_flag &= ~MNT_ASYNC; 948 vfs_msync(mp, MNT_NOWAIT); 949 VFS_SYNC(mp, MNT_NOWAIT); 950 mp->mnt_flag |= asyncflag; 951 } 952 return(0); 953 } 954 955 /* XXX PRISON: could be per prison flag */ 956 static int prison_quotas; 957 #if 0 958 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 959 #endif 960 961 /* 962 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 963 * 964 * Change filesystem quotas. 965 * 966 * MPALMOSTSAFE 967 */ 968 int 969 sys_quotactl(struct quotactl_args *uap) 970 { 971 struct nlookupdata nd; 972 struct thread *td; 973 struct mount *mp; 974 int error; 975 976 get_mplock(); 977 td = curthread; 978 if (td->td_ucred->cr_prison && !prison_quotas) { 979 error = EPERM; 980 goto done; 981 } 982 983 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 984 if (error == 0) 985 error = nlookup(&nd); 986 if (error == 0) { 987 mp = nd.nl_nch.mount; 988 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 989 uap->arg, nd.nl_cred); 990 } 991 nlookup_done(&nd); 992 done: 993 rel_mplock(); 994 return (error); 995 } 996 997 /* 998 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 999 * void *buf, int buflen) 1000 * 1001 * This function operates on a mount point and executes the specified 1002 * operation using the specified control data, and possibly returns data. 1003 * 1004 * The actual number of bytes stored in the result buffer is returned, 0 1005 * if none, otherwise an error is returned. 1006 * 1007 * MPALMOSTSAFE 1008 */ 1009 int 1010 sys_mountctl(struct mountctl_args *uap) 1011 { 1012 struct thread *td = curthread; 1013 struct proc *p = td->td_proc; 1014 struct file *fp; 1015 void *ctl = NULL; 1016 void *buf = NULL; 1017 char *path = NULL; 1018 int error; 1019 1020 /* 1021 * Sanity and permissions checks. We must be root. 1022 */ 1023 KKASSERT(p); 1024 if (td->td_ucred->cr_prison != NULL) 1025 return (EPERM); 1026 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1027 (error = priv_check(td, PRIV_ROOT)) != 0) 1028 return (error); 1029 1030 /* 1031 * Argument length checks 1032 */ 1033 if (uap->ctllen < 0 || uap->ctllen > 1024) 1034 return (EINVAL); 1035 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1036 return (EINVAL); 1037 if (uap->path == NULL) 1038 return (EINVAL); 1039 1040 /* 1041 * Allocate the necessary buffers and copyin data 1042 */ 1043 path = objcache_get(namei_oc, M_WAITOK); 1044 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1045 if (error) 1046 goto done; 1047 1048 if (uap->ctllen) { 1049 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1050 error = copyin(uap->ctl, ctl, uap->ctllen); 1051 if (error) 1052 goto done; 1053 } 1054 if (uap->buflen) 1055 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1056 1057 /* 1058 * Validate the descriptor 1059 */ 1060 if (uap->fd >= 0) { 1061 fp = holdfp(p->p_fd, uap->fd, -1); 1062 if (fp == NULL) { 1063 error = EBADF; 1064 goto done; 1065 } 1066 } else { 1067 fp = NULL; 1068 } 1069 1070 /* 1071 * Execute the internal kernel function and clean up. 1072 */ 1073 get_mplock(); 1074 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 1075 rel_mplock(); 1076 if (fp) 1077 fdrop(fp); 1078 if (error == 0 && uap->sysmsg_result > 0) 1079 error = copyout(buf, uap->buf, uap->sysmsg_result); 1080 done: 1081 if (path) 1082 objcache_put(namei_oc, path); 1083 if (ctl) 1084 kfree(ctl, M_TEMP); 1085 if (buf) 1086 kfree(buf, M_TEMP); 1087 return (error); 1088 } 1089 1090 /* 1091 * Execute a mount control operation by resolving the path to a mount point 1092 * and calling vop_mountctl(). 1093 * 1094 * Use the mount point from the nch instead of the vnode so nullfs mounts 1095 * can properly spike the VOP. 1096 */ 1097 int 1098 kern_mountctl(const char *path, int op, struct file *fp, 1099 const void *ctl, int ctllen, 1100 void *buf, int buflen, int *res) 1101 { 1102 struct vnode *vp; 1103 struct mount *mp; 1104 struct nlookupdata nd; 1105 int error; 1106 1107 *res = 0; 1108 vp = NULL; 1109 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1110 if (error == 0) 1111 error = nlookup(&nd); 1112 if (error == 0) 1113 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1114 mp = nd.nl_nch.mount; 1115 nlookup_done(&nd); 1116 if (error) 1117 return (error); 1118 vn_unlock(vp); 1119 1120 /* 1121 * Must be the root of the filesystem 1122 */ 1123 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1124 vrele(vp); 1125 return (EINVAL); 1126 } 1127 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1128 buf, buflen, res); 1129 vrele(vp); 1130 return (error); 1131 } 1132 1133 int 1134 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1135 { 1136 struct thread *td = curthread; 1137 struct proc *p = td->td_proc; 1138 struct mount *mp; 1139 struct statfs *sp; 1140 char *fullpath, *freepath; 1141 int error; 1142 1143 if ((error = nlookup(nd)) != 0) 1144 return (error); 1145 mp = nd->nl_nch.mount; 1146 sp = &mp->mnt_stat; 1147 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1148 return (error); 1149 1150 error = mount_path(p, mp, &fullpath, &freepath); 1151 if (error) 1152 return(error); 1153 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1154 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1155 kfree(freepath, M_TEMP); 1156 1157 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1158 bcopy(sp, buf, sizeof(*buf)); 1159 /* Only root should have access to the fsid's. */ 1160 if (priv_check(td, PRIV_ROOT)) 1161 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1162 return (0); 1163 } 1164 1165 /* 1166 * statfs_args(char *path, struct statfs *buf) 1167 * 1168 * Get filesystem statistics. 1169 */ 1170 int 1171 sys_statfs(struct statfs_args *uap) 1172 { 1173 struct nlookupdata nd; 1174 struct statfs buf; 1175 int error; 1176 1177 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1178 if (error == 0) 1179 error = kern_statfs(&nd, &buf); 1180 nlookup_done(&nd); 1181 if (error == 0) 1182 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1183 return (error); 1184 } 1185 1186 int 1187 kern_fstatfs(int fd, struct statfs *buf) 1188 { 1189 struct thread *td = curthread; 1190 struct proc *p = td->td_proc; 1191 struct file *fp; 1192 struct mount *mp; 1193 struct statfs *sp; 1194 char *fullpath, *freepath; 1195 int error; 1196 1197 KKASSERT(p); 1198 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1199 return (error); 1200 1201 /* 1202 * Try to use mount info from any overlays rather than the 1203 * mount info for the underlying vnode, otherwise we will 1204 * fail when operating on null-mounted paths inside a chroot. 1205 */ 1206 if ((mp = fp->f_nchandle.mount) == NULL) 1207 mp = ((struct vnode *)fp->f_data)->v_mount; 1208 if (mp == NULL) { 1209 error = EBADF; 1210 goto done; 1211 } 1212 if (fp->f_cred == NULL) { 1213 error = EINVAL; 1214 goto done; 1215 } 1216 sp = &mp->mnt_stat; 1217 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1218 goto done; 1219 1220 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1221 goto done; 1222 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1223 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1224 kfree(freepath, M_TEMP); 1225 1226 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1227 bcopy(sp, buf, sizeof(*buf)); 1228 1229 /* Only root should have access to the fsid's. */ 1230 if (priv_check(td, PRIV_ROOT)) 1231 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1232 error = 0; 1233 done: 1234 fdrop(fp); 1235 return (error); 1236 } 1237 1238 /* 1239 * fstatfs_args(int fd, struct statfs *buf) 1240 * 1241 * Get filesystem statistics. 1242 */ 1243 int 1244 sys_fstatfs(struct fstatfs_args *uap) 1245 { 1246 struct statfs buf; 1247 int error; 1248 1249 error = kern_fstatfs(uap->fd, &buf); 1250 1251 if (error == 0) 1252 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1253 return (error); 1254 } 1255 1256 int 1257 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1258 { 1259 struct mount *mp; 1260 struct statvfs *sp; 1261 int error; 1262 1263 if ((error = nlookup(nd)) != 0) 1264 return (error); 1265 mp = nd->nl_nch.mount; 1266 sp = &mp->mnt_vstat; 1267 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1268 return (error); 1269 1270 sp->f_flag = 0; 1271 if (mp->mnt_flag & MNT_RDONLY) 1272 sp->f_flag |= ST_RDONLY; 1273 if (mp->mnt_flag & MNT_NOSUID) 1274 sp->f_flag |= ST_NOSUID; 1275 bcopy(sp, buf, sizeof(*buf)); 1276 return (0); 1277 } 1278 1279 /* 1280 * statfs_args(char *path, struct statfs *buf) 1281 * 1282 * Get filesystem statistics. 1283 */ 1284 int 1285 sys_statvfs(struct statvfs_args *uap) 1286 { 1287 struct nlookupdata nd; 1288 struct statvfs buf; 1289 int error; 1290 1291 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1292 if (error == 0) 1293 error = kern_statvfs(&nd, &buf); 1294 nlookup_done(&nd); 1295 if (error == 0) 1296 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1297 return (error); 1298 } 1299 1300 int 1301 kern_fstatvfs(int fd, struct statvfs *buf) 1302 { 1303 struct thread *td = curthread; 1304 struct proc *p = td->td_proc; 1305 struct file *fp; 1306 struct mount *mp; 1307 struct statvfs *sp; 1308 int error; 1309 1310 KKASSERT(p); 1311 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1312 return (error); 1313 if ((mp = fp->f_nchandle.mount) == NULL) 1314 mp = ((struct vnode *)fp->f_data)->v_mount; 1315 if (mp == NULL) { 1316 error = EBADF; 1317 goto done; 1318 } 1319 if (fp->f_cred == NULL) { 1320 error = EINVAL; 1321 goto done; 1322 } 1323 sp = &mp->mnt_vstat; 1324 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1325 goto done; 1326 1327 sp->f_flag = 0; 1328 if (mp->mnt_flag & MNT_RDONLY) 1329 sp->f_flag |= ST_RDONLY; 1330 if (mp->mnt_flag & MNT_NOSUID) 1331 sp->f_flag |= ST_NOSUID; 1332 1333 bcopy(sp, buf, sizeof(*buf)); 1334 error = 0; 1335 done: 1336 fdrop(fp); 1337 return (error); 1338 } 1339 1340 /* 1341 * fstatfs_args(int fd, struct statfs *buf) 1342 * 1343 * Get filesystem statistics. 1344 */ 1345 int 1346 sys_fstatvfs(struct fstatvfs_args *uap) 1347 { 1348 struct statvfs buf; 1349 int error; 1350 1351 error = kern_fstatvfs(uap->fd, &buf); 1352 1353 if (error == 0) 1354 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1355 return (error); 1356 } 1357 1358 /* 1359 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1360 * 1361 * Get statistics on all filesystems. 1362 */ 1363 1364 struct getfsstat_info { 1365 struct statfs *sfsp; 1366 long count; 1367 long maxcount; 1368 int error; 1369 int flags; 1370 struct thread *td; 1371 }; 1372 1373 static int getfsstat_callback(struct mount *, void *); 1374 1375 int 1376 sys_getfsstat(struct getfsstat_args *uap) 1377 { 1378 struct thread *td = curthread; 1379 struct getfsstat_info info; 1380 1381 bzero(&info, sizeof(info)); 1382 1383 info.maxcount = uap->bufsize / sizeof(struct statfs); 1384 info.sfsp = uap->buf; 1385 info.count = 0; 1386 info.flags = uap->flags; 1387 info.td = td; 1388 1389 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1390 if (info.sfsp && info.count > info.maxcount) 1391 uap->sysmsg_result = info.maxcount; 1392 else 1393 uap->sysmsg_result = info.count; 1394 return (info.error); 1395 } 1396 1397 static int 1398 getfsstat_callback(struct mount *mp, void *data) 1399 { 1400 struct getfsstat_info *info = data; 1401 struct statfs *sp; 1402 char *freepath; 1403 char *fullpath; 1404 int error; 1405 1406 if (info->sfsp && info->count < info->maxcount) { 1407 if (info->td->td_proc && 1408 !chroot_visible_mnt(mp, info->td->td_proc)) { 1409 return(0); 1410 } 1411 sp = &mp->mnt_stat; 1412 1413 /* 1414 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1415 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1416 * overrides MNT_WAIT. 1417 */ 1418 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1419 (info->flags & MNT_WAIT)) && 1420 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1421 return(0); 1422 } 1423 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1424 1425 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1426 if (error) { 1427 info->error = error; 1428 return(-1); 1429 } 1430 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1431 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1432 kfree(freepath, M_TEMP); 1433 1434 error = copyout(sp, info->sfsp, sizeof(*sp)); 1435 if (error) { 1436 info->error = error; 1437 return (-1); 1438 } 1439 ++info->sfsp; 1440 } 1441 info->count++; 1442 return(0); 1443 } 1444 1445 /* 1446 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1447 long bufsize, int flags) 1448 * 1449 * Get statistics on all filesystems. 1450 */ 1451 1452 struct getvfsstat_info { 1453 struct statfs *sfsp; 1454 struct statvfs *vsfsp; 1455 long count; 1456 long maxcount; 1457 int error; 1458 int flags; 1459 struct thread *td; 1460 }; 1461 1462 static int getvfsstat_callback(struct mount *, void *); 1463 1464 int 1465 sys_getvfsstat(struct getvfsstat_args *uap) 1466 { 1467 struct thread *td = curthread; 1468 struct getvfsstat_info info; 1469 1470 bzero(&info, sizeof(info)); 1471 1472 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1473 info.sfsp = uap->buf; 1474 info.vsfsp = uap->vbuf; 1475 info.count = 0; 1476 info.flags = uap->flags; 1477 info.td = td; 1478 1479 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1480 if (info.vsfsp && info.count > info.maxcount) 1481 uap->sysmsg_result = info.maxcount; 1482 else 1483 uap->sysmsg_result = info.count; 1484 return (info.error); 1485 } 1486 1487 static int 1488 getvfsstat_callback(struct mount *mp, void *data) 1489 { 1490 struct getvfsstat_info *info = data; 1491 struct statfs *sp; 1492 struct statvfs *vsp; 1493 char *freepath; 1494 char *fullpath; 1495 int error; 1496 1497 if (info->vsfsp && info->count < info->maxcount) { 1498 if (info->td->td_proc && 1499 !chroot_visible_mnt(mp, info->td->td_proc)) { 1500 return(0); 1501 } 1502 sp = &mp->mnt_stat; 1503 vsp = &mp->mnt_vstat; 1504 1505 /* 1506 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1507 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1508 * overrides MNT_WAIT. 1509 */ 1510 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1511 (info->flags & MNT_WAIT)) && 1512 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1513 return(0); 1514 } 1515 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1516 1517 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1518 (info->flags & MNT_WAIT)) && 1519 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1520 return(0); 1521 } 1522 vsp->f_flag = 0; 1523 if (mp->mnt_flag & MNT_RDONLY) 1524 vsp->f_flag |= ST_RDONLY; 1525 if (mp->mnt_flag & MNT_NOSUID) 1526 vsp->f_flag |= ST_NOSUID; 1527 1528 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1529 if (error) { 1530 info->error = error; 1531 return(-1); 1532 } 1533 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1534 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1535 kfree(freepath, M_TEMP); 1536 1537 error = copyout(sp, info->sfsp, sizeof(*sp)); 1538 if (error == 0) 1539 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1540 if (error) { 1541 info->error = error; 1542 return (-1); 1543 } 1544 ++info->sfsp; 1545 ++info->vsfsp; 1546 } 1547 info->count++; 1548 return(0); 1549 } 1550 1551 1552 /* 1553 * fchdir_args(int fd) 1554 * 1555 * Change current working directory to a given file descriptor. 1556 */ 1557 int 1558 sys_fchdir(struct fchdir_args *uap) 1559 { 1560 struct thread *td = curthread; 1561 struct proc *p = td->td_proc; 1562 struct filedesc *fdp = p->p_fd; 1563 struct vnode *vp, *ovp; 1564 struct mount *mp; 1565 struct file *fp; 1566 struct nchandle nch, onch, tnch; 1567 int error; 1568 1569 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1570 return (error); 1571 lwkt_gettoken(&p->p_token); 1572 vp = (struct vnode *)fp->f_data; 1573 vref(vp); 1574 vn_lock(vp, LK_SHARED | LK_RETRY); 1575 if (fp->f_nchandle.ncp == NULL) 1576 error = ENOTDIR; 1577 else 1578 error = checkvp_chdir(vp, td); 1579 if (error) { 1580 vput(vp); 1581 goto done; 1582 } 1583 cache_copy(&fp->f_nchandle, &nch); 1584 1585 /* 1586 * If the ncp has become a mount point, traverse through 1587 * the mount point. 1588 */ 1589 1590 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1591 (mp = cache_findmount(&nch)) != NULL 1592 ) { 1593 error = nlookup_mp(mp, &tnch); 1594 if (error == 0) { 1595 cache_unlock(&tnch); /* leave ref intact */ 1596 vput(vp); 1597 vp = tnch.ncp->nc_vp; 1598 error = vget(vp, LK_SHARED); 1599 KKASSERT(error == 0); 1600 cache_drop(&nch); 1601 nch = tnch; 1602 } 1603 cache_dropmount(mp); 1604 } 1605 if (error == 0) { 1606 ovp = fdp->fd_cdir; 1607 onch = fdp->fd_ncdir; 1608 vn_unlock(vp); /* leave ref intact */ 1609 fdp->fd_cdir = vp; 1610 fdp->fd_ncdir = nch; 1611 cache_drop(&onch); 1612 vrele(ovp); 1613 } else { 1614 cache_drop(&nch); 1615 vput(vp); 1616 } 1617 fdrop(fp); 1618 done: 1619 lwkt_reltoken(&p->p_token); 1620 return (error); 1621 } 1622 1623 int 1624 kern_chdir(struct nlookupdata *nd) 1625 { 1626 struct thread *td = curthread; 1627 struct proc *p = td->td_proc; 1628 struct filedesc *fdp = p->p_fd; 1629 struct vnode *vp, *ovp; 1630 struct nchandle onch; 1631 int error; 1632 1633 nd->nl_flags |= NLC_SHAREDLOCK; 1634 if ((error = nlookup(nd)) != 0) 1635 return (error); 1636 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1637 return (ENOENT); 1638 if ((error = vget(vp, LK_SHARED)) != 0) 1639 return (error); 1640 1641 lwkt_gettoken(&p->p_token); 1642 error = checkvp_chdir(vp, td); 1643 vn_unlock(vp); 1644 if (error == 0) { 1645 ovp = fdp->fd_cdir; 1646 onch = fdp->fd_ncdir; 1647 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1648 fdp->fd_ncdir = nd->nl_nch; 1649 fdp->fd_cdir = vp; 1650 cache_drop(&onch); 1651 vrele(ovp); 1652 cache_zero(&nd->nl_nch); 1653 } else { 1654 vrele(vp); 1655 } 1656 lwkt_reltoken(&p->p_token); 1657 return (error); 1658 } 1659 1660 /* 1661 * chdir_args(char *path) 1662 * 1663 * Change current working directory (``.''). 1664 */ 1665 int 1666 sys_chdir(struct chdir_args *uap) 1667 { 1668 struct nlookupdata nd; 1669 int error; 1670 1671 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1672 if (error == 0) 1673 error = kern_chdir(&nd); 1674 nlookup_done(&nd); 1675 return (error); 1676 } 1677 1678 /* 1679 * Helper function for raised chroot(2) security function: Refuse if 1680 * any filedescriptors are open directories. 1681 */ 1682 static int 1683 chroot_refuse_vdir_fds(struct filedesc *fdp) 1684 { 1685 struct vnode *vp; 1686 struct file *fp; 1687 int error; 1688 int fd; 1689 1690 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1691 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1692 continue; 1693 vp = (struct vnode *)fp->f_data; 1694 if (vp->v_type != VDIR) { 1695 fdrop(fp); 1696 continue; 1697 } 1698 fdrop(fp); 1699 return(EPERM); 1700 } 1701 return (0); 1702 } 1703 1704 /* 1705 * This sysctl determines if we will allow a process to chroot(2) if it 1706 * has a directory open: 1707 * 0: disallowed for all processes. 1708 * 1: allowed for processes that were not already chroot(2)'ed. 1709 * 2: allowed for all processes. 1710 */ 1711 1712 static int chroot_allow_open_directories = 1; 1713 1714 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1715 &chroot_allow_open_directories, 0, ""); 1716 1717 /* 1718 * chroot to the specified namecache entry. We obtain the vp from the 1719 * namecache data. The passed ncp must be locked and referenced and will 1720 * remain locked and referenced on return. 1721 */ 1722 int 1723 kern_chroot(struct nchandle *nch) 1724 { 1725 struct thread *td = curthread; 1726 struct proc *p = td->td_proc; 1727 struct filedesc *fdp = p->p_fd; 1728 struct vnode *vp; 1729 int error; 1730 1731 /* 1732 * Only privileged user can chroot 1733 */ 1734 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1735 if (error) 1736 return (error); 1737 1738 /* 1739 * Disallow open directory descriptors (fchdir() breakouts). 1740 */ 1741 if (chroot_allow_open_directories == 0 || 1742 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1743 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1744 return (error); 1745 } 1746 if ((vp = nch->ncp->nc_vp) == NULL) 1747 return (ENOENT); 1748 1749 if ((error = vget(vp, LK_SHARED)) != 0) 1750 return (error); 1751 1752 /* 1753 * Check the validity of vp as a directory to change to and 1754 * associate it with rdir/jdir. 1755 */ 1756 error = checkvp_chdir(vp, td); 1757 vn_unlock(vp); /* leave reference intact */ 1758 if (error == 0) { 1759 vrele(fdp->fd_rdir); 1760 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1761 cache_drop(&fdp->fd_nrdir); 1762 cache_copy(nch, &fdp->fd_nrdir); 1763 if (fdp->fd_jdir == NULL) { 1764 fdp->fd_jdir = vp; 1765 vref(fdp->fd_jdir); 1766 cache_copy(nch, &fdp->fd_njdir); 1767 } 1768 } else { 1769 vrele(vp); 1770 } 1771 return (error); 1772 } 1773 1774 /* 1775 * chroot_args(char *path) 1776 * 1777 * Change notion of root (``/'') directory. 1778 */ 1779 int 1780 sys_chroot(struct chroot_args *uap) 1781 { 1782 struct thread *td __debugvar = curthread; 1783 struct nlookupdata nd; 1784 int error; 1785 1786 KKASSERT(td->td_proc); 1787 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1788 if (error == 0) { 1789 nd.nl_flags |= NLC_EXEC; 1790 error = nlookup(&nd); 1791 if (error == 0) 1792 error = kern_chroot(&nd.nl_nch); 1793 } 1794 nlookup_done(&nd); 1795 return(error); 1796 } 1797 1798 int 1799 sys_chroot_kernel(struct chroot_kernel_args *uap) 1800 { 1801 struct thread *td = curthread; 1802 struct nlookupdata nd; 1803 struct nchandle *nch; 1804 struct vnode *vp; 1805 int error; 1806 1807 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1808 if (error) 1809 goto error_nond; 1810 1811 error = nlookup(&nd); 1812 if (error) 1813 goto error_out; 1814 1815 nch = &nd.nl_nch; 1816 1817 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1818 if (error) 1819 goto error_out; 1820 1821 if ((vp = nch->ncp->nc_vp) == NULL) { 1822 error = ENOENT; 1823 goto error_out; 1824 } 1825 1826 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1827 goto error_out; 1828 1829 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1830 get_mplock(); 1831 vfs_cache_setroot(vp, cache_hold(nch)); 1832 rel_mplock(); 1833 1834 error_out: 1835 nlookup_done(&nd); 1836 error_nond: 1837 return(error); 1838 } 1839 1840 /* 1841 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1842 * determine whether it is legal to chdir to the vnode. The vnode's state 1843 * is not changed by this call. 1844 */ 1845 static int 1846 checkvp_chdir(struct vnode *vp, struct thread *td) 1847 { 1848 int error; 1849 1850 if (vp->v_type != VDIR) 1851 error = ENOTDIR; 1852 else 1853 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1854 return (error); 1855 } 1856 1857 int 1858 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1859 { 1860 struct thread *td = curthread; 1861 struct proc *p = td->td_proc; 1862 struct lwp *lp = td->td_lwp; 1863 struct filedesc *fdp = p->p_fd; 1864 int cmode, flags; 1865 struct file *nfp; 1866 struct file *fp; 1867 struct vnode *vp; 1868 int type, indx, error = 0; 1869 struct flock lf; 1870 1871 if ((oflags & O_ACCMODE) == O_ACCMODE) 1872 return (EINVAL); 1873 flags = FFLAGS(oflags); 1874 error = falloc(lp, &nfp, NULL); 1875 if (error) 1876 return (error); 1877 fp = nfp; 1878 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1879 1880 /* 1881 * XXX p_dupfd is a real mess. It allows a device to return a 1882 * file descriptor to be duplicated rather then doing the open 1883 * itself. 1884 */ 1885 lp->lwp_dupfd = -1; 1886 1887 /* 1888 * Call vn_open() to do the lookup and assign the vnode to the 1889 * file pointer. vn_open() does not change the ref count on fp 1890 * and the vnode, on success, will be inherited by the file pointer 1891 * and unlocked. 1892 * 1893 * Request a shared lock on the vnode if possible. 1894 */ 1895 nd->nl_flags |= NLC_LOCKVP; 1896 if ((flags & (O_CREAT|O_TRUNC)) == 0) 1897 nd->nl_flags |= NLC_SHAREDLOCK; 1898 1899 error = vn_open(nd, fp, flags, cmode); 1900 nlookup_done(nd); 1901 1902 if (error) { 1903 /* 1904 * handle special fdopen() case. bleh. dupfdopen() is 1905 * responsible for dropping the old contents of ofiles[indx] 1906 * if it succeeds. 1907 * 1908 * Note that fsetfd() will add a ref to fp which represents 1909 * the fd_files[] assignment. We must still drop our 1910 * reference. 1911 */ 1912 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1913 if (fdalloc(p, 0, &indx) == 0) { 1914 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error); 1915 if (error == 0) { 1916 *res = indx; 1917 fdrop(fp); /* our ref */ 1918 return (0); 1919 } 1920 fsetfd(fdp, NULL, indx); 1921 } 1922 } 1923 fdrop(fp); /* our ref */ 1924 if (error == ERESTART) 1925 error = EINTR; 1926 return (error); 1927 } 1928 1929 /* 1930 * ref the vnode for ourselves so it can't be ripped out from under 1931 * is. XXX need an ND flag to request that the vnode be returned 1932 * anyway. 1933 * 1934 * Reserve a file descriptor but do not assign it until the open 1935 * succeeds. 1936 */ 1937 vp = (struct vnode *)fp->f_data; 1938 vref(vp); 1939 if ((error = fdalloc(p, 0, &indx)) != 0) { 1940 fdrop(fp); 1941 vrele(vp); 1942 return (error); 1943 } 1944 1945 /* 1946 * If no error occurs the vp will have been assigned to the file 1947 * pointer. 1948 */ 1949 lp->lwp_dupfd = 0; 1950 1951 if (flags & (O_EXLOCK | O_SHLOCK)) { 1952 lf.l_whence = SEEK_SET; 1953 lf.l_start = 0; 1954 lf.l_len = 0; 1955 if (flags & O_EXLOCK) 1956 lf.l_type = F_WRLCK; 1957 else 1958 lf.l_type = F_RDLCK; 1959 if (flags & FNONBLOCK) 1960 type = 0; 1961 else 1962 type = F_WAIT; 1963 1964 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1965 /* 1966 * lock request failed. Clean up the reserved 1967 * descriptor. 1968 */ 1969 vrele(vp); 1970 fsetfd(fdp, NULL, indx); 1971 fdrop(fp); 1972 return (error); 1973 } 1974 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 1975 } 1976 #if 0 1977 /* 1978 * Assert that all regular file vnodes were created with a object. 1979 */ 1980 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1981 ("open: regular file has no backing object after vn_open")); 1982 #endif 1983 1984 vrele(vp); 1985 1986 /* 1987 * release our private reference, leaving the one associated with the 1988 * descriptor table intact. 1989 */ 1990 if (oflags & O_CLOEXEC) 1991 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 1992 fsetfd(fdp, fp, indx); 1993 fdrop(fp); 1994 *res = indx; 1995 return (error); 1996 } 1997 1998 /* 1999 * open_args(char *path, int flags, int mode) 2000 * 2001 * Check permissions, allocate an open file structure, 2002 * and call the device open routine if any. 2003 */ 2004 int 2005 sys_open(struct open_args *uap) 2006 { 2007 struct nlookupdata nd; 2008 int error; 2009 2010 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2011 if (error == 0) { 2012 error = kern_open(&nd, uap->flags, 2013 uap->mode, &uap->sysmsg_result); 2014 } 2015 nlookup_done(&nd); 2016 return (error); 2017 } 2018 2019 /* 2020 * openat_args(int fd, char *path, int flags, int mode) 2021 */ 2022 int 2023 sys_openat(struct openat_args *uap) 2024 { 2025 struct nlookupdata nd; 2026 int error; 2027 struct file *fp; 2028 2029 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2030 if (error == 0) { 2031 error = kern_open(&nd, uap->flags, uap->mode, 2032 &uap->sysmsg_result); 2033 } 2034 nlookup_done_at(&nd, fp); 2035 return (error); 2036 } 2037 2038 int 2039 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2040 { 2041 struct thread *td = curthread; 2042 struct proc *p = td->td_proc; 2043 struct vnode *vp; 2044 struct vattr vattr; 2045 int error; 2046 int whiteout = 0; 2047 2048 KKASSERT(p); 2049 2050 VATTR_NULL(&vattr); 2051 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2052 vattr.va_rmajor = rmajor; 2053 vattr.va_rminor = rminor; 2054 2055 switch (mode & S_IFMT) { 2056 case S_IFMT: /* used by badsect to flag bad sectors */ 2057 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2058 vattr.va_type = VBAD; 2059 break; 2060 case S_IFCHR: 2061 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2062 vattr.va_type = VCHR; 2063 break; 2064 case S_IFBLK: 2065 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2066 vattr.va_type = VBLK; 2067 break; 2068 case S_IFWHT: 2069 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2070 whiteout = 1; 2071 break; 2072 case S_IFDIR: /* special directories support for HAMMER */ 2073 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2074 vattr.va_type = VDIR; 2075 break; 2076 default: 2077 error = EINVAL; 2078 break; 2079 } 2080 2081 if (error) 2082 return (error); 2083 2084 bwillinode(1); 2085 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2086 if ((error = nlookup(nd)) != 0) 2087 return (error); 2088 if (nd->nl_nch.ncp->nc_vp) 2089 return (EEXIST); 2090 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2091 return (error); 2092 2093 if (whiteout) { 2094 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2095 nd->nl_cred, NAMEI_CREATE); 2096 } else { 2097 vp = NULL; 2098 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2099 &vp, nd->nl_cred, &vattr); 2100 if (error == 0) 2101 vput(vp); 2102 } 2103 return (error); 2104 } 2105 2106 /* 2107 * mknod_args(char *path, int mode, int dev) 2108 * 2109 * Create a special file. 2110 */ 2111 int 2112 sys_mknod(struct mknod_args *uap) 2113 { 2114 struct nlookupdata nd; 2115 int error; 2116 2117 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2118 if (error == 0) { 2119 error = kern_mknod(&nd, uap->mode, 2120 umajor(uap->dev), uminor(uap->dev)); 2121 } 2122 nlookup_done(&nd); 2123 return (error); 2124 } 2125 2126 /* 2127 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2128 * 2129 * Create a special file. The path is relative to the directory associated 2130 * with fd. 2131 */ 2132 int 2133 sys_mknodat(struct mknodat_args *uap) 2134 { 2135 struct nlookupdata nd; 2136 struct file *fp; 2137 int error; 2138 2139 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2140 if (error == 0) { 2141 error = kern_mknod(&nd, uap->mode, 2142 umajor(uap->dev), uminor(uap->dev)); 2143 } 2144 nlookup_done_at(&nd, fp); 2145 return (error); 2146 } 2147 2148 int 2149 kern_mkfifo(struct nlookupdata *nd, int mode) 2150 { 2151 struct thread *td = curthread; 2152 struct proc *p = td->td_proc; 2153 struct vattr vattr; 2154 struct vnode *vp; 2155 int error; 2156 2157 bwillinode(1); 2158 2159 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2160 if ((error = nlookup(nd)) != 0) 2161 return (error); 2162 if (nd->nl_nch.ncp->nc_vp) 2163 return (EEXIST); 2164 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2165 return (error); 2166 2167 VATTR_NULL(&vattr); 2168 vattr.va_type = VFIFO; 2169 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2170 vp = NULL; 2171 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2172 if (error == 0) 2173 vput(vp); 2174 return (error); 2175 } 2176 2177 /* 2178 * mkfifo_args(char *path, int mode) 2179 * 2180 * Create a named pipe. 2181 */ 2182 int 2183 sys_mkfifo(struct mkfifo_args *uap) 2184 { 2185 struct nlookupdata nd; 2186 int error; 2187 2188 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2189 if (error == 0) 2190 error = kern_mkfifo(&nd, uap->mode); 2191 nlookup_done(&nd); 2192 return (error); 2193 } 2194 2195 /* 2196 * mkfifoat_args(int fd, char *path, mode_t mode) 2197 * 2198 * Create a named pipe. The path is relative to the directory associated 2199 * with fd. 2200 */ 2201 int 2202 sys_mkfifoat(struct mkfifoat_args *uap) 2203 { 2204 struct nlookupdata nd; 2205 struct file *fp; 2206 int error; 2207 2208 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2209 if (error == 0) 2210 error = kern_mkfifo(&nd, uap->mode); 2211 nlookup_done_at(&nd, fp); 2212 return (error); 2213 } 2214 2215 static int hardlink_check_uid = 0; 2216 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2217 &hardlink_check_uid, 0, 2218 "Unprivileged processes cannot create hard links to files owned by other " 2219 "users"); 2220 static int hardlink_check_gid = 0; 2221 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2222 &hardlink_check_gid, 0, 2223 "Unprivileged processes cannot create hard links to files owned by other " 2224 "groups"); 2225 2226 static int 2227 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2228 { 2229 struct vattr va; 2230 int error; 2231 2232 /* 2233 * Shortcut if disabled 2234 */ 2235 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2236 return (0); 2237 2238 /* 2239 * Privileged user can always hardlink 2240 */ 2241 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2242 return (0); 2243 2244 /* 2245 * Otherwise only if the originating file is owned by the 2246 * same user or group. Note that any group is allowed if 2247 * the file is owned by the caller. 2248 */ 2249 error = VOP_GETATTR(vp, &va); 2250 if (error != 0) 2251 return (error); 2252 2253 if (hardlink_check_uid) { 2254 if (cred->cr_uid != va.va_uid) 2255 return (EPERM); 2256 } 2257 2258 if (hardlink_check_gid) { 2259 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2260 return (EPERM); 2261 } 2262 2263 return (0); 2264 } 2265 2266 int 2267 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2268 { 2269 struct thread *td = curthread; 2270 struct vnode *vp; 2271 int error; 2272 2273 /* 2274 * Lookup the source and obtained a locked vnode. 2275 * 2276 * You may only hardlink a file which you have write permission 2277 * on or which you own. 2278 * 2279 * XXX relookup on vget failure / race ? 2280 */ 2281 bwillinode(1); 2282 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2283 if ((error = nlookup(nd)) != 0) 2284 return (error); 2285 vp = nd->nl_nch.ncp->nc_vp; 2286 KKASSERT(vp != NULL); 2287 if (vp->v_type == VDIR) 2288 return (EPERM); /* POSIX */ 2289 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2290 return (error); 2291 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2292 return (error); 2293 2294 /* 2295 * Unlock the source so we can lookup the target without deadlocking 2296 * (XXX vp is locked already, possible other deadlock?). The target 2297 * must not exist. 2298 */ 2299 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2300 nd->nl_flags &= ~NLC_NCPISLOCKED; 2301 cache_unlock(&nd->nl_nch); 2302 vn_unlock(vp); 2303 2304 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2305 if ((error = nlookup(linknd)) != 0) { 2306 vrele(vp); 2307 return (error); 2308 } 2309 if (linknd->nl_nch.ncp->nc_vp) { 2310 vrele(vp); 2311 return (EEXIST); 2312 } 2313 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2314 if (error) { 2315 vrele(vp); 2316 return (error); 2317 } 2318 2319 /* 2320 * Finally run the new API VOP. 2321 */ 2322 error = can_hardlink(vp, td, td->td_ucred); 2323 if (error == 0) { 2324 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2325 vp, linknd->nl_cred); 2326 } 2327 vput(vp); 2328 return (error); 2329 } 2330 2331 /* 2332 * link_args(char *path, char *link) 2333 * 2334 * Make a hard file link. 2335 */ 2336 int 2337 sys_link(struct link_args *uap) 2338 { 2339 struct nlookupdata nd, linknd; 2340 int error; 2341 2342 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2343 if (error == 0) { 2344 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2345 if (error == 0) 2346 error = kern_link(&nd, &linknd); 2347 nlookup_done(&linknd); 2348 } 2349 nlookup_done(&nd); 2350 return (error); 2351 } 2352 2353 /* 2354 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2355 * 2356 * Make a hard file link. The path1 argument is relative to the directory 2357 * associated with fd1, and similarly the path2 argument is relative to 2358 * the directory associated with fd2. 2359 */ 2360 int 2361 sys_linkat(struct linkat_args *uap) 2362 { 2363 struct nlookupdata nd, linknd; 2364 struct file *fp1, *fp2; 2365 int error; 2366 2367 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2368 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2369 if (error == 0) { 2370 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2371 uap->path2, UIO_USERSPACE, 0); 2372 if (error == 0) 2373 error = kern_link(&nd, &linknd); 2374 nlookup_done_at(&linknd, fp2); 2375 } 2376 nlookup_done_at(&nd, fp1); 2377 return (error); 2378 } 2379 2380 int 2381 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2382 { 2383 struct vattr vattr; 2384 struct vnode *vp; 2385 struct vnode *dvp; 2386 int error; 2387 2388 bwillinode(1); 2389 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2390 if ((error = nlookup(nd)) != 0) 2391 return (error); 2392 if (nd->nl_nch.ncp->nc_vp) 2393 return (EEXIST); 2394 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2395 return (error); 2396 dvp = nd->nl_dvp; 2397 VATTR_NULL(&vattr); 2398 vattr.va_mode = mode; 2399 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2400 if (error == 0) 2401 vput(vp); 2402 return (error); 2403 } 2404 2405 /* 2406 * symlink(char *path, char *link) 2407 * 2408 * Make a symbolic link. 2409 */ 2410 int 2411 sys_symlink(struct symlink_args *uap) 2412 { 2413 struct thread *td = curthread; 2414 struct nlookupdata nd; 2415 char *path; 2416 int error; 2417 int mode; 2418 2419 path = objcache_get(namei_oc, M_WAITOK); 2420 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2421 if (error == 0) { 2422 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2423 if (error == 0) { 2424 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2425 error = kern_symlink(&nd, path, mode); 2426 } 2427 nlookup_done(&nd); 2428 } 2429 objcache_put(namei_oc, path); 2430 return (error); 2431 } 2432 2433 /* 2434 * symlinkat_args(char *path1, int fd, char *path2) 2435 * 2436 * Make a symbolic link. The path2 argument is relative to the directory 2437 * associated with fd. 2438 */ 2439 int 2440 sys_symlinkat(struct symlinkat_args *uap) 2441 { 2442 struct thread *td = curthread; 2443 struct nlookupdata nd; 2444 struct file *fp; 2445 char *path1; 2446 int error; 2447 int mode; 2448 2449 path1 = objcache_get(namei_oc, M_WAITOK); 2450 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2451 if (error == 0) { 2452 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2453 UIO_USERSPACE, 0); 2454 if (error == 0) { 2455 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2456 error = kern_symlink(&nd, path1, mode); 2457 } 2458 nlookup_done_at(&nd, fp); 2459 } 2460 objcache_put(namei_oc, path1); 2461 return (error); 2462 } 2463 2464 /* 2465 * undelete_args(char *path) 2466 * 2467 * Delete a whiteout from the filesystem. 2468 */ 2469 int 2470 sys_undelete(struct undelete_args *uap) 2471 { 2472 struct nlookupdata nd; 2473 int error; 2474 2475 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2476 bwillinode(1); 2477 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2478 if (error == 0) 2479 error = nlookup(&nd); 2480 if (error == 0) 2481 error = ncp_writechk(&nd.nl_nch); 2482 if (error == 0) { 2483 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2484 NAMEI_DELETE); 2485 } 2486 nlookup_done(&nd); 2487 return (error); 2488 } 2489 2490 int 2491 kern_unlink(struct nlookupdata *nd) 2492 { 2493 int error; 2494 2495 bwillinode(1); 2496 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2497 if ((error = nlookup(nd)) != 0) 2498 return (error); 2499 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2500 return (error); 2501 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2502 return (error); 2503 } 2504 2505 /* 2506 * unlink_args(char *path) 2507 * 2508 * Delete a name from the filesystem. 2509 */ 2510 int 2511 sys_unlink(struct unlink_args *uap) 2512 { 2513 struct nlookupdata nd; 2514 int error; 2515 2516 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2517 if (error == 0) 2518 error = kern_unlink(&nd); 2519 nlookup_done(&nd); 2520 return (error); 2521 } 2522 2523 2524 /* 2525 * unlinkat_args(int fd, char *path, int flags) 2526 * 2527 * Delete the file or directory entry pointed to by fd/path. 2528 */ 2529 int 2530 sys_unlinkat(struct unlinkat_args *uap) 2531 { 2532 struct nlookupdata nd; 2533 struct file *fp; 2534 int error; 2535 2536 if (uap->flags & ~AT_REMOVEDIR) 2537 return (EINVAL); 2538 2539 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2540 if (error == 0) { 2541 if (uap->flags & AT_REMOVEDIR) 2542 error = kern_rmdir(&nd); 2543 else 2544 error = kern_unlink(&nd); 2545 } 2546 nlookup_done_at(&nd, fp); 2547 return (error); 2548 } 2549 2550 int 2551 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2552 { 2553 struct thread *td = curthread; 2554 struct proc *p = td->td_proc; 2555 struct file *fp; 2556 struct vnode *vp; 2557 struct vattr vattr; 2558 off_t new_offset; 2559 int error; 2560 2561 fp = holdfp(p->p_fd, fd, -1); 2562 if (fp == NULL) 2563 return (EBADF); 2564 if (fp->f_type != DTYPE_VNODE) { 2565 error = ESPIPE; 2566 goto done; 2567 } 2568 vp = (struct vnode *)fp->f_data; 2569 2570 switch (whence) { 2571 case L_INCR: 2572 spin_lock(&fp->f_spin); 2573 new_offset = fp->f_offset + offset; 2574 error = 0; 2575 break; 2576 case L_XTND: 2577 error = VOP_GETATTR(vp, &vattr); 2578 spin_lock(&fp->f_spin); 2579 new_offset = offset + vattr.va_size; 2580 break; 2581 case L_SET: 2582 new_offset = offset; 2583 error = 0; 2584 spin_lock(&fp->f_spin); 2585 break; 2586 default: 2587 new_offset = 0; 2588 error = EINVAL; 2589 spin_lock(&fp->f_spin); 2590 break; 2591 } 2592 2593 /* 2594 * Validate the seek position. Negative offsets are not allowed 2595 * for regular files or directories. 2596 * 2597 * Normally we would also not want to allow negative offsets for 2598 * character and block-special devices. However kvm addresses 2599 * on 64 bit architectures might appear to be negative and must 2600 * be allowed. 2601 */ 2602 if (error == 0) { 2603 if (new_offset < 0 && 2604 (vp->v_type == VREG || vp->v_type == VDIR)) { 2605 error = EINVAL; 2606 } else { 2607 fp->f_offset = new_offset; 2608 } 2609 } 2610 *res = fp->f_offset; 2611 spin_unlock(&fp->f_spin); 2612 done: 2613 fdrop(fp); 2614 return (error); 2615 } 2616 2617 /* 2618 * lseek_args(int fd, int pad, off_t offset, int whence) 2619 * 2620 * Reposition read/write file offset. 2621 */ 2622 int 2623 sys_lseek(struct lseek_args *uap) 2624 { 2625 int error; 2626 2627 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2628 &uap->sysmsg_offset); 2629 2630 return (error); 2631 } 2632 2633 /* 2634 * Check if current process can access given file. amode is a bitmask of *_OK 2635 * access bits. flags is a bitmask of AT_* flags. 2636 */ 2637 int 2638 kern_access(struct nlookupdata *nd, int amode, int flags) 2639 { 2640 struct vnode *vp; 2641 int error, mode; 2642 2643 if (flags & ~AT_EACCESS) 2644 return (EINVAL); 2645 nd->nl_flags |= NLC_SHAREDLOCK; 2646 if ((error = nlookup(nd)) != 0) 2647 return (error); 2648 retry: 2649 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2650 if (error) 2651 return (error); 2652 2653 /* Flags == 0 means only check for existence. */ 2654 if (amode) { 2655 mode = 0; 2656 if (amode & R_OK) 2657 mode |= VREAD; 2658 if (amode & W_OK) 2659 mode |= VWRITE; 2660 if (amode & X_OK) 2661 mode |= VEXEC; 2662 if ((mode & VWRITE) == 0 || 2663 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2664 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2665 2666 /* 2667 * If the file handle is stale we have to re-resolve the 2668 * entry with the ncp held exclusively. This is a hack 2669 * at the moment. 2670 */ 2671 if (error == ESTALE) { 2672 vput(vp); 2673 cache_unlock(&nd->nl_nch); 2674 cache_lock(&nd->nl_nch); 2675 cache_setunresolved(&nd->nl_nch); 2676 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2677 if (error == 0) { 2678 vp = NULL; 2679 goto retry; 2680 } 2681 return(error); 2682 } 2683 } 2684 vput(vp); 2685 return (error); 2686 } 2687 2688 /* 2689 * access_args(char *path, int flags) 2690 * 2691 * Check access permissions. 2692 */ 2693 int 2694 sys_access(struct access_args *uap) 2695 { 2696 struct nlookupdata nd; 2697 int error; 2698 2699 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2700 if (error == 0) 2701 error = kern_access(&nd, uap->flags, 0); 2702 nlookup_done(&nd); 2703 return (error); 2704 } 2705 2706 2707 /* 2708 * eaccess_args(char *path, int flags) 2709 * 2710 * Check access permissions. 2711 */ 2712 int 2713 sys_eaccess(struct eaccess_args *uap) 2714 { 2715 struct nlookupdata nd; 2716 int error; 2717 2718 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2719 if (error == 0) 2720 error = kern_access(&nd, uap->flags, AT_EACCESS); 2721 nlookup_done(&nd); 2722 return (error); 2723 } 2724 2725 2726 /* 2727 * faccessat_args(int fd, char *path, int amode, int flags) 2728 * 2729 * Check access permissions. 2730 */ 2731 int 2732 sys_faccessat(struct faccessat_args *uap) 2733 { 2734 struct nlookupdata nd; 2735 struct file *fp; 2736 int error; 2737 2738 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2739 NLC_FOLLOW); 2740 if (error == 0) 2741 error = kern_access(&nd, uap->amode, uap->flags); 2742 nlookup_done_at(&nd, fp); 2743 return (error); 2744 } 2745 2746 int 2747 kern_stat(struct nlookupdata *nd, struct stat *st) 2748 { 2749 int error; 2750 struct vnode *vp; 2751 2752 nd->nl_flags |= NLC_SHAREDLOCK; 2753 if ((error = nlookup(nd)) != 0) 2754 return (error); 2755 again: 2756 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2757 return (ENOENT); 2758 2759 if ((error = vget(vp, LK_SHARED)) != 0) 2760 return (error); 2761 error = vn_stat(vp, st, nd->nl_cred); 2762 2763 /* 2764 * If the file handle is stale we have to re-resolve the 2765 * entry with the ncp held exclusively. This is a hack 2766 * at the moment. 2767 */ 2768 if (error == ESTALE) { 2769 vput(vp); 2770 cache_unlock(&nd->nl_nch); 2771 cache_lock(&nd->nl_nch); 2772 cache_setunresolved(&nd->nl_nch); 2773 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2774 if (error == 0) 2775 goto again; 2776 } else { 2777 vput(vp); 2778 } 2779 return (error); 2780 } 2781 2782 /* 2783 * stat_args(char *path, struct stat *ub) 2784 * 2785 * Get file status; this version follows links. 2786 */ 2787 int 2788 sys_stat(struct stat_args *uap) 2789 { 2790 struct nlookupdata nd; 2791 struct stat st; 2792 int error; 2793 2794 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2795 if (error == 0) { 2796 error = kern_stat(&nd, &st); 2797 if (error == 0) 2798 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2799 } 2800 nlookup_done(&nd); 2801 return (error); 2802 } 2803 2804 /* 2805 * lstat_args(char *path, struct stat *ub) 2806 * 2807 * Get file status; this version does not follow links. 2808 */ 2809 int 2810 sys_lstat(struct lstat_args *uap) 2811 { 2812 struct nlookupdata nd; 2813 struct stat st; 2814 int error; 2815 2816 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2817 if (error == 0) { 2818 error = kern_stat(&nd, &st); 2819 if (error == 0) 2820 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2821 } 2822 nlookup_done(&nd); 2823 return (error); 2824 } 2825 2826 /* 2827 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2828 * 2829 * Get status of file pointed to by fd/path. 2830 */ 2831 int 2832 sys_fstatat(struct fstatat_args *uap) 2833 { 2834 struct nlookupdata nd; 2835 struct stat st; 2836 int error; 2837 int flags; 2838 struct file *fp; 2839 2840 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2841 return (EINVAL); 2842 2843 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2844 2845 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2846 UIO_USERSPACE, flags); 2847 if (error == 0) { 2848 error = kern_stat(&nd, &st); 2849 if (error == 0) 2850 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2851 } 2852 nlookup_done_at(&nd, fp); 2853 return (error); 2854 } 2855 2856 static int 2857 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 2858 { 2859 struct nlookupdata nd; 2860 struct vnode *vp; 2861 int error; 2862 2863 vp = NULL; 2864 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 2865 if (error == 0) 2866 error = nlookup(&nd); 2867 if (error == 0) 2868 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2869 nlookup_done(&nd); 2870 if (error == 0) { 2871 error = VOP_PATHCONF(vp, name, sysmsg_regp); 2872 vput(vp); 2873 } 2874 return (error); 2875 } 2876 2877 /* 2878 * pathconf_Args(char *path, int name) 2879 * 2880 * Get configurable pathname variables. 2881 */ 2882 int 2883 sys_pathconf(struct pathconf_args *uap) 2884 { 2885 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 2886 &uap->sysmsg_reg)); 2887 } 2888 2889 /* 2890 * lpathconf_Args(char *path, int name) 2891 * 2892 * Get configurable pathname variables, but don't follow symlinks. 2893 */ 2894 int 2895 sys_lpathconf(struct lpathconf_args *uap) 2896 { 2897 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 2898 } 2899 2900 /* 2901 * XXX: daver 2902 * kern_readlink isn't properly split yet. There is a copyin burried 2903 * in VOP_READLINK(). 2904 */ 2905 int 2906 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2907 { 2908 struct thread *td = curthread; 2909 struct vnode *vp; 2910 struct iovec aiov; 2911 struct uio auio; 2912 int error; 2913 2914 nd->nl_flags |= NLC_SHAREDLOCK; 2915 if ((error = nlookup(nd)) != 0) 2916 return (error); 2917 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2918 if (error) 2919 return (error); 2920 if (vp->v_type != VLNK) { 2921 error = EINVAL; 2922 } else { 2923 aiov.iov_base = buf; 2924 aiov.iov_len = count; 2925 auio.uio_iov = &aiov; 2926 auio.uio_iovcnt = 1; 2927 auio.uio_offset = 0; 2928 auio.uio_rw = UIO_READ; 2929 auio.uio_segflg = UIO_USERSPACE; 2930 auio.uio_td = td; 2931 auio.uio_resid = count; 2932 error = VOP_READLINK(vp, &auio, td->td_ucred); 2933 } 2934 vput(vp); 2935 *res = count - auio.uio_resid; 2936 return (error); 2937 } 2938 2939 /* 2940 * readlink_args(char *path, char *buf, int count) 2941 * 2942 * Return target name of a symbolic link. 2943 */ 2944 int 2945 sys_readlink(struct readlink_args *uap) 2946 { 2947 struct nlookupdata nd; 2948 int error; 2949 2950 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2951 if (error == 0) { 2952 error = kern_readlink(&nd, uap->buf, uap->count, 2953 &uap->sysmsg_result); 2954 } 2955 nlookup_done(&nd); 2956 return (error); 2957 } 2958 2959 /* 2960 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 2961 * 2962 * Return target name of a symbolic link. The path is relative to the 2963 * directory associated with fd. 2964 */ 2965 int 2966 sys_readlinkat(struct readlinkat_args *uap) 2967 { 2968 struct nlookupdata nd; 2969 struct file *fp; 2970 int error; 2971 2972 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2973 if (error == 0) { 2974 error = kern_readlink(&nd, uap->buf, uap->bufsize, 2975 &uap->sysmsg_result); 2976 } 2977 nlookup_done_at(&nd, fp); 2978 return (error); 2979 } 2980 2981 static int 2982 setfflags(struct vnode *vp, int flags) 2983 { 2984 struct thread *td = curthread; 2985 int error; 2986 struct vattr vattr; 2987 2988 /* 2989 * Prevent non-root users from setting flags on devices. When 2990 * a device is reused, users can retain ownership of the device 2991 * if they are allowed to set flags and programs assume that 2992 * chown can't fail when done as root. 2993 */ 2994 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2995 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 2996 return (error); 2997 2998 /* 2999 * note: vget is required for any operation that might mod the vnode 3000 * so VINACTIVE is properly cleared. 3001 */ 3002 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3003 VATTR_NULL(&vattr); 3004 vattr.va_flags = flags; 3005 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3006 vput(vp); 3007 } 3008 return (error); 3009 } 3010 3011 /* 3012 * chflags(char *path, int flags) 3013 * 3014 * Change flags of a file given a path name. 3015 */ 3016 int 3017 sys_chflags(struct chflags_args *uap) 3018 { 3019 struct nlookupdata nd; 3020 struct vnode *vp; 3021 int error; 3022 3023 vp = NULL; 3024 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3025 if (error == 0) 3026 error = nlookup(&nd); 3027 if (error == 0) 3028 error = ncp_writechk(&nd.nl_nch); 3029 if (error == 0) 3030 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3031 nlookup_done(&nd); 3032 if (error == 0) { 3033 error = setfflags(vp, uap->flags); 3034 vrele(vp); 3035 } 3036 return (error); 3037 } 3038 3039 /* 3040 * lchflags(char *path, int flags) 3041 * 3042 * Change flags of a file given a path name, but don't follow symlinks. 3043 */ 3044 int 3045 sys_lchflags(struct lchflags_args *uap) 3046 { 3047 struct nlookupdata nd; 3048 struct vnode *vp; 3049 int error; 3050 3051 vp = NULL; 3052 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3053 if (error == 0) 3054 error = nlookup(&nd); 3055 if (error == 0) 3056 error = ncp_writechk(&nd.nl_nch); 3057 if (error == 0) 3058 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3059 nlookup_done(&nd); 3060 if (error == 0) { 3061 error = setfflags(vp, uap->flags); 3062 vrele(vp); 3063 } 3064 return (error); 3065 } 3066 3067 /* 3068 * fchflags_args(int fd, int flags) 3069 * 3070 * Change flags of a file given a file descriptor. 3071 */ 3072 int 3073 sys_fchflags(struct fchflags_args *uap) 3074 { 3075 struct thread *td = curthread; 3076 struct proc *p = td->td_proc; 3077 struct file *fp; 3078 int error; 3079 3080 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3081 return (error); 3082 if (fp->f_nchandle.ncp) 3083 error = ncp_writechk(&fp->f_nchandle); 3084 if (error == 0) 3085 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3086 fdrop(fp); 3087 return (error); 3088 } 3089 3090 /* 3091 * chflagsat_args(int fd, const char *path, int flags, int atflags) 3092 * change flags given a pathname relative to a filedescriptor 3093 */ 3094 int sys_chflagsat(struct chflagsat_args *uap) 3095 { 3096 struct nlookupdata nd; 3097 struct vnode *vp; 3098 struct file *fp; 3099 int error; 3100 int lookupflags; 3101 3102 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3103 return (EINVAL); 3104 3105 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3106 3107 vp = NULL; 3108 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3109 if (error == 0) 3110 error = nlookup(&nd); 3111 if (error == 0) 3112 error = ncp_writechk(&nd.nl_nch); 3113 if (error == 0) 3114 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3115 nlookup_done_at(&nd, fp); 3116 if (error == 0) { 3117 error = setfflags(vp, uap->flags); 3118 vrele(vp); 3119 } 3120 return (error); 3121 } 3122 3123 3124 static int 3125 setfmode(struct vnode *vp, int mode) 3126 { 3127 struct thread *td = curthread; 3128 int error; 3129 struct vattr vattr; 3130 3131 /* 3132 * note: vget is required for any operation that might mod the vnode 3133 * so VINACTIVE is properly cleared. 3134 */ 3135 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3136 VATTR_NULL(&vattr); 3137 vattr.va_mode = mode & ALLPERMS; 3138 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3139 vput(vp); 3140 } 3141 return error; 3142 } 3143 3144 int 3145 kern_chmod(struct nlookupdata *nd, int mode) 3146 { 3147 struct vnode *vp; 3148 int error; 3149 3150 if ((error = nlookup(nd)) != 0) 3151 return (error); 3152 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3153 return (error); 3154 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3155 error = setfmode(vp, mode); 3156 vrele(vp); 3157 return (error); 3158 } 3159 3160 /* 3161 * chmod_args(char *path, int mode) 3162 * 3163 * Change mode of a file given path name. 3164 */ 3165 int 3166 sys_chmod(struct chmod_args *uap) 3167 { 3168 struct nlookupdata nd; 3169 int error; 3170 3171 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3172 if (error == 0) 3173 error = kern_chmod(&nd, uap->mode); 3174 nlookup_done(&nd); 3175 return (error); 3176 } 3177 3178 /* 3179 * lchmod_args(char *path, int mode) 3180 * 3181 * Change mode of a file given path name (don't follow links.) 3182 */ 3183 int 3184 sys_lchmod(struct lchmod_args *uap) 3185 { 3186 struct nlookupdata nd; 3187 int error; 3188 3189 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3190 if (error == 0) 3191 error = kern_chmod(&nd, uap->mode); 3192 nlookup_done(&nd); 3193 return (error); 3194 } 3195 3196 /* 3197 * fchmod_args(int fd, int mode) 3198 * 3199 * Change mode of a file given a file descriptor. 3200 */ 3201 int 3202 sys_fchmod(struct fchmod_args *uap) 3203 { 3204 struct thread *td = curthread; 3205 struct proc *p = td->td_proc; 3206 struct file *fp; 3207 int error; 3208 3209 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3210 return (error); 3211 if (fp->f_nchandle.ncp) 3212 error = ncp_writechk(&fp->f_nchandle); 3213 if (error == 0) 3214 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3215 fdrop(fp); 3216 return (error); 3217 } 3218 3219 /* 3220 * fchmodat_args(char *path, int mode) 3221 * 3222 * Change mode of a file pointed to by fd/path. 3223 */ 3224 int 3225 sys_fchmodat(struct fchmodat_args *uap) 3226 { 3227 struct nlookupdata nd; 3228 struct file *fp; 3229 int error; 3230 int flags; 3231 3232 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3233 return (EINVAL); 3234 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3235 3236 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3237 UIO_USERSPACE, flags); 3238 if (error == 0) 3239 error = kern_chmod(&nd, uap->mode); 3240 nlookup_done_at(&nd, fp); 3241 return (error); 3242 } 3243 3244 static int 3245 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3246 { 3247 struct thread *td = curthread; 3248 int error; 3249 struct vattr vattr; 3250 uid_t o_uid; 3251 gid_t o_gid; 3252 uint64_t size; 3253 3254 /* 3255 * note: vget is required for any operation that might mod the vnode 3256 * so VINACTIVE is properly cleared. 3257 */ 3258 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3259 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3260 return error; 3261 o_uid = vattr.va_uid; 3262 o_gid = vattr.va_gid; 3263 size = vattr.va_size; 3264 3265 VATTR_NULL(&vattr); 3266 vattr.va_uid = uid; 3267 vattr.va_gid = gid; 3268 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3269 vput(vp); 3270 } 3271 3272 if (error == 0) { 3273 if (uid == -1) 3274 uid = o_uid; 3275 if (gid == -1) 3276 gid = o_gid; 3277 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3278 VFS_ACCOUNT(mp, uid, gid, size); 3279 } 3280 3281 return error; 3282 } 3283 3284 int 3285 kern_chown(struct nlookupdata *nd, int uid, int gid) 3286 { 3287 struct vnode *vp; 3288 int error; 3289 3290 if ((error = nlookup(nd)) != 0) 3291 return (error); 3292 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3293 return (error); 3294 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3295 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3296 vrele(vp); 3297 return (error); 3298 } 3299 3300 /* 3301 * chown(char *path, int uid, int gid) 3302 * 3303 * Set ownership given a path name. 3304 */ 3305 int 3306 sys_chown(struct chown_args *uap) 3307 { 3308 struct nlookupdata nd; 3309 int error; 3310 3311 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3312 if (error == 0) 3313 error = kern_chown(&nd, uap->uid, uap->gid); 3314 nlookup_done(&nd); 3315 return (error); 3316 } 3317 3318 /* 3319 * lchown_args(char *path, int uid, int gid) 3320 * 3321 * Set ownership given a path name, do not cross symlinks. 3322 */ 3323 int 3324 sys_lchown(struct lchown_args *uap) 3325 { 3326 struct nlookupdata nd; 3327 int error; 3328 3329 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3330 if (error == 0) 3331 error = kern_chown(&nd, uap->uid, uap->gid); 3332 nlookup_done(&nd); 3333 return (error); 3334 } 3335 3336 /* 3337 * fchown_args(int fd, int uid, int gid) 3338 * 3339 * Set ownership given a file descriptor. 3340 */ 3341 int 3342 sys_fchown(struct fchown_args *uap) 3343 { 3344 struct thread *td = curthread; 3345 struct proc *p = td->td_proc; 3346 struct file *fp; 3347 int error; 3348 3349 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3350 return (error); 3351 if (fp->f_nchandle.ncp) 3352 error = ncp_writechk(&fp->f_nchandle); 3353 if (error == 0) 3354 error = setfown(p->p_fd->fd_ncdir.mount, 3355 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3356 fdrop(fp); 3357 return (error); 3358 } 3359 3360 /* 3361 * fchownat(int fd, char *path, int uid, int gid, int flags) 3362 * 3363 * Set ownership of file pointed to by fd/path. 3364 */ 3365 int 3366 sys_fchownat(struct fchownat_args *uap) 3367 { 3368 struct nlookupdata nd; 3369 struct file *fp; 3370 int error; 3371 int flags; 3372 3373 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3374 return (EINVAL); 3375 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3376 3377 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3378 UIO_USERSPACE, flags); 3379 if (error == 0) 3380 error = kern_chown(&nd, uap->uid, uap->gid); 3381 nlookup_done_at(&nd, fp); 3382 return (error); 3383 } 3384 3385 3386 static int 3387 getutimes(struct timeval *tvp, struct timespec *tsp) 3388 { 3389 struct timeval tv[2]; 3390 int error; 3391 3392 if (tvp == NULL) { 3393 microtime(&tv[0]); 3394 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3395 tsp[1] = tsp[0]; 3396 } else { 3397 if ((error = itimerfix(tvp)) != 0) 3398 return (error); 3399 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3400 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3401 } 3402 return 0; 3403 } 3404 3405 static int 3406 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3407 { 3408 struct timespec tsnow; 3409 int error; 3410 3411 *nullflag = 0; 3412 nanotime(&tsnow); 3413 if (ts == NULL) { 3414 newts[0] = tsnow; 3415 newts[1] = tsnow; 3416 *nullflag = 1; 3417 return (0); 3418 } 3419 3420 newts[0] = ts[0]; 3421 newts[1] = ts[1]; 3422 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3423 return (0); 3424 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3425 *nullflag = 1; 3426 3427 if (newts[0].tv_nsec == UTIME_OMIT) 3428 newts[0].tv_sec = VNOVAL; 3429 else if (newts[0].tv_nsec == UTIME_NOW) 3430 newts[0] = tsnow; 3431 else if ((error = itimespecfix(&newts[0])) != 0) 3432 return (error); 3433 3434 if (newts[1].tv_nsec == UTIME_OMIT) 3435 newts[1].tv_sec = VNOVAL; 3436 else if (newts[1].tv_nsec == UTIME_NOW) 3437 newts[1] = tsnow; 3438 else if ((error = itimespecfix(&newts[1])) != 0) 3439 return (error); 3440 3441 return (0); 3442 } 3443 3444 static int 3445 setutimes(struct vnode *vp, struct vattr *vattr, 3446 const struct timespec *ts, int nullflag) 3447 { 3448 struct thread *td = curthread; 3449 int error; 3450 3451 VATTR_NULL(vattr); 3452 vattr->va_atime = ts[0]; 3453 vattr->va_mtime = ts[1]; 3454 if (nullflag) 3455 vattr->va_vaflags |= VA_UTIMES_NULL; 3456 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3457 3458 return error; 3459 } 3460 3461 int 3462 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3463 { 3464 struct timespec ts[2]; 3465 int error; 3466 3467 if (tptr) { 3468 if ((error = getutimes(tptr, ts)) != 0) 3469 return (error); 3470 } 3471 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3472 return (error); 3473 } 3474 3475 /* 3476 * utimes_args(char *path, struct timeval *tptr) 3477 * 3478 * Set the access and modification times of a file. 3479 */ 3480 int 3481 sys_utimes(struct utimes_args *uap) 3482 { 3483 struct timeval tv[2]; 3484 struct nlookupdata nd; 3485 int error; 3486 3487 if (uap->tptr) { 3488 error = copyin(uap->tptr, tv, sizeof(tv)); 3489 if (error) 3490 return (error); 3491 } 3492 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3493 if (error == 0) 3494 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3495 nlookup_done(&nd); 3496 return (error); 3497 } 3498 3499 /* 3500 * lutimes_args(char *path, struct timeval *tptr) 3501 * 3502 * Set the access and modification times of a file. 3503 */ 3504 int 3505 sys_lutimes(struct lutimes_args *uap) 3506 { 3507 struct timeval tv[2]; 3508 struct nlookupdata nd; 3509 int error; 3510 3511 if (uap->tptr) { 3512 error = copyin(uap->tptr, tv, sizeof(tv)); 3513 if (error) 3514 return (error); 3515 } 3516 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3517 if (error == 0) 3518 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3519 nlookup_done(&nd); 3520 return (error); 3521 } 3522 3523 /* 3524 * Set utimes on a file descriptor. The creds used to open the 3525 * file are used to determine whether the operation is allowed 3526 * or not. 3527 */ 3528 int 3529 kern_futimens(int fd, struct timespec *ts) 3530 { 3531 struct thread *td = curthread; 3532 struct proc *p = td->td_proc; 3533 struct timespec newts[2]; 3534 struct file *fp; 3535 struct vnode *vp; 3536 struct vattr vattr; 3537 int nullflag; 3538 int error; 3539 3540 error = getutimens(ts, newts, &nullflag); 3541 if (error) 3542 return (error); 3543 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3544 return (error); 3545 if (fp->f_nchandle.ncp) 3546 error = ncp_writechk(&fp->f_nchandle); 3547 if (error == 0) { 3548 vp = fp->f_data; 3549 error = vget(vp, LK_EXCLUSIVE); 3550 if (error == 0) { 3551 error = VOP_GETATTR(vp, &vattr); 3552 if (error == 0) { 3553 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3554 fp->f_cred); 3555 } 3556 if (error == 0) { 3557 error = setutimes(vp, &vattr, newts, nullflag); 3558 } 3559 vput(vp); 3560 } 3561 } 3562 fdrop(fp); 3563 return (error); 3564 } 3565 3566 /* 3567 * futimens_args(int fd, struct timespec *ts) 3568 * 3569 * Set the access and modification times of a file. 3570 */ 3571 int 3572 sys_futimens(struct futimens_args *uap) 3573 { 3574 struct timespec ts[2]; 3575 int error; 3576 3577 if (uap->ts) { 3578 error = copyin(uap->ts, ts, sizeof(ts)); 3579 if (error) 3580 return (error); 3581 } 3582 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3583 return (error); 3584 } 3585 3586 int 3587 kern_futimes(int fd, struct timeval *tptr) 3588 { 3589 struct timespec ts[2]; 3590 int error; 3591 3592 if (tptr) { 3593 if ((error = getutimes(tptr, ts)) != 0) 3594 return (error); 3595 } 3596 error = kern_futimens(fd, tptr ? ts : NULL); 3597 return (error); 3598 } 3599 3600 /* 3601 * futimes_args(int fd, struct timeval *tptr) 3602 * 3603 * Set the access and modification times of a file. 3604 */ 3605 int 3606 sys_futimes(struct futimes_args *uap) 3607 { 3608 struct timeval tv[2]; 3609 int error; 3610 3611 if (uap->tptr) { 3612 error = copyin(uap->tptr, tv, sizeof(tv)); 3613 if (error) 3614 return (error); 3615 } 3616 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3617 return (error); 3618 } 3619 3620 int 3621 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3622 { 3623 struct timespec newts[2]; 3624 struct vnode *vp; 3625 struct vattr vattr; 3626 int nullflag; 3627 int error; 3628 3629 if (flags & ~AT_SYMLINK_NOFOLLOW) 3630 return (EINVAL); 3631 3632 error = getutimens(ts, newts, &nullflag); 3633 if (error) 3634 return (error); 3635 3636 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3637 if ((error = nlookup(nd)) != 0) 3638 return (error); 3639 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3640 return (error); 3641 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3642 return (error); 3643 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3644 error = vget(vp, LK_EXCLUSIVE); 3645 if (error == 0) { 3646 error = setutimes(vp, &vattr, newts, nullflag); 3647 vput(vp); 3648 } 3649 } 3650 vrele(vp); 3651 return (error); 3652 } 3653 3654 /* 3655 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3656 * 3657 * Set file access and modification times of a file. 3658 */ 3659 int 3660 sys_utimensat(struct utimensat_args *uap) 3661 { 3662 struct timespec ts[2]; 3663 struct nlookupdata nd; 3664 struct file *fp; 3665 int error; 3666 int flags; 3667 3668 if (uap->ts) { 3669 error = copyin(uap->ts, ts, sizeof(ts)); 3670 if (error) 3671 return (error); 3672 } 3673 3674 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3675 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3676 UIO_USERSPACE, flags); 3677 if (error == 0) 3678 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3679 nlookup_done_at(&nd, fp); 3680 return (error); 3681 } 3682 3683 int 3684 kern_truncate(struct nlookupdata *nd, off_t length) 3685 { 3686 struct vnode *vp; 3687 struct vattr vattr; 3688 int error; 3689 uid_t uid = 0; 3690 gid_t gid = 0; 3691 uint64_t old_size = 0; 3692 3693 if (length < 0) 3694 return(EINVAL); 3695 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3696 if ((error = nlookup(nd)) != 0) 3697 return (error); 3698 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3699 return (error); 3700 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3701 return (error); 3702 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3703 if (error) { 3704 vrele(vp); 3705 return (error); 3706 } 3707 if (vp->v_type == VDIR) { 3708 error = EISDIR; 3709 goto done; 3710 } 3711 if (vfs_quota_enabled) { 3712 error = VOP_GETATTR(vp, &vattr); 3713 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3714 uid = vattr.va_uid; 3715 gid = vattr.va_gid; 3716 old_size = vattr.va_size; 3717 } 3718 3719 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3720 VATTR_NULL(&vattr); 3721 vattr.va_size = length; 3722 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3723 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3724 } 3725 done: 3726 vput(vp); 3727 return (error); 3728 } 3729 3730 /* 3731 * truncate(char *path, int pad, off_t length) 3732 * 3733 * Truncate a file given its path name. 3734 */ 3735 int 3736 sys_truncate(struct truncate_args *uap) 3737 { 3738 struct nlookupdata nd; 3739 int error; 3740 3741 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3742 if (error == 0) 3743 error = kern_truncate(&nd, uap->length); 3744 nlookup_done(&nd); 3745 return error; 3746 } 3747 3748 int 3749 kern_ftruncate(int fd, off_t length) 3750 { 3751 struct thread *td = curthread; 3752 struct proc *p = td->td_proc; 3753 struct vattr vattr; 3754 struct vnode *vp; 3755 struct file *fp; 3756 int error; 3757 uid_t uid = 0; 3758 gid_t gid = 0; 3759 uint64_t old_size = 0; 3760 struct mount *mp; 3761 3762 if (length < 0) 3763 return(EINVAL); 3764 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3765 return (error); 3766 if (fp->f_nchandle.ncp) { 3767 error = ncp_writechk(&fp->f_nchandle); 3768 if (error) 3769 goto done; 3770 } 3771 if ((fp->f_flag & FWRITE) == 0) { 3772 error = EINVAL; 3773 goto done; 3774 } 3775 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3776 error = EINVAL; 3777 goto done; 3778 } 3779 vp = (struct vnode *)fp->f_data; 3780 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3781 if (vp->v_type == VDIR) { 3782 error = EISDIR; 3783 vn_unlock(vp); 3784 goto done; 3785 } 3786 3787 if (vfs_quota_enabled) { 3788 error = VOP_GETATTR(vp, &vattr); 3789 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 3790 uid = vattr.va_uid; 3791 gid = vattr.va_gid; 3792 old_size = vattr.va_size; 3793 } 3794 3795 if ((error = vn_writechk(vp, NULL)) == 0) { 3796 VATTR_NULL(&vattr); 3797 vattr.va_size = length; 3798 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3799 mp = vq_vptomp(vp); 3800 VFS_ACCOUNT(mp, uid, gid, length - old_size); 3801 } 3802 vn_unlock(vp); 3803 done: 3804 fdrop(fp); 3805 return (error); 3806 } 3807 3808 /* 3809 * ftruncate_args(int fd, int pad, off_t length) 3810 * 3811 * Truncate a file given a file descriptor. 3812 */ 3813 int 3814 sys_ftruncate(struct ftruncate_args *uap) 3815 { 3816 int error; 3817 3818 error = kern_ftruncate(uap->fd, uap->length); 3819 3820 return (error); 3821 } 3822 3823 /* 3824 * fsync(int fd) 3825 * 3826 * Sync an open file. 3827 */ 3828 int 3829 sys_fsync(struct fsync_args *uap) 3830 { 3831 struct thread *td = curthread; 3832 struct proc *p = td->td_proc; 3833 struct vnode *vp; 3834 struct file *fp; 3835 vm_object_t obj; 3836 int error; 3837 3838 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3839 return (error); 3840 vp = (struct vnode *)fp->f_data; 3841 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3842 if ((obj = vp->v_object) != NULL) { 3843 if (vp->v_mount == NULL || 3844 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 3845 vm_object_page_clean(obj, 0, 0, 0); 3846 } 3847 } 3848 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3849 if (error == 0 && vp->v_mount) 3850 error = buf_fsync(vp); 3851 vn_unlock(vp); 3852 fdrop(fp); 3853 3854 return (error); 3855 } 3856 3857 int 3858 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3859 { 3860 struct nchandle fnchd; 3861 struct nchandle tnchd; 3862 struct namecache *ncp; 3863 struct vnode *fdvp; 3864 struct vnode *tdvp; 3865 struct mount *mp; 3866 int error; 3867 u_int fncp_gen; 3868 u_int tncp_gen; 3869 3870 bwillinode(1); 3871 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3872 if ((error = nlookup(fromnd)) != 0) 3873 return (error); 3874 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3875 return (ENOENT); 3876 fnchd.mount = fromnd->nl_nch.mount; 3877 cache_hold(&fnchd); 3878 3879 /* 3880 * unlock the source nch so we can lookup the target nch without 3881 * deadlocking. The target may or may not exist so we do not check 3882 * for a target vp like kern_mkdir() and other creation functions do. 3883 * 3884 * The source and target directories are ref'd and rechecked after 3885 * everything is relocked to determine if the source or target file 3886 * has been renamed. 3887 */ 3888 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3889 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3890 3891 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 3892 3893 cache_unlock(&fromnd->nl_nch); 3894 3895 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 3896 if ((error = nlookup(tond)) != 0) { 3897 cache_drop(&fnchd); 3898 return (error); 3899 } 3900 tncp_gen = tond->nl_nch.ncp->nc_generation; 3901 3902 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3903 cache_drop(&fnchd); 3904 return (ENOENT); 3905 } 3906 tnchd.mount = tond->nl_nch.mount; 3907 cache_hold(&tnchd); 3908 3909 /* 3910 * If the source and target are the same there is nothing to do 3911 */ 3912 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3913 cache_drop(&fnchd); 3914 cache_drop(&tnchd); 3915 return (0); 3916 } 3917 3918 /* 3919 * Mount points cannot be renamed or overwritten 3920 */ 3921 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3922 NCF_ISMOUNTPT 3923 ) { 3924 cache_drop(&fnchd); 3925 cache_drop(&tnchd); 3926 return (EINVAL); 3927 } 3928 3929 /* 3930 * Relock the source ncp. cache_relock() will deal with any 3931 * deadlocks against the already-locked tond and will also 3932 * make sure both are resolved. 3933 * 3934 * NOTE AFTER RELOCKING: The source or target ncp may have become 3935 * invalid while they were unlocked, nc_vp and nc_mount could 3936 * be NULL. 3937 */ 3938 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 3939 &tond->nl_nch, tond->nl_cred); 3940 fromnd->nl_flags |= NLC_NCPISLOCKED; 3941 3942 /* 3943 * If the namecache generation changed for either fromnd or tond, 3944 * we must retry. 3945 */ 3946 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 3947 tond->nl_nch.ncp->nc_generation != tncp_gen) { 3948 kprintf("kern_rename: retry due to gen on: " 3949 "\"%s\" -> \"%s\"\n", 3950 fromnd->nl_nch.ncp->nc_name, 3951 tond->nl_nch.ncp->nc_name); 3952 cache_drop(&fnchd); 3953 cache_drop(&tnchd); 3954 return (EAGAIN); 3955 } 3956 3957 /* 3958 * If either fromnd or tond are marked destroyed a ripout occured 3959 * out from under us and we must retry. 3960 */ 3961 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 3962 fromnd->nl_nch.ncp->nc_vp == NULL || 3963 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 3964 kprintf("kern_rename: retry due to ripout on: " 3965 "\"%s\" -> \"%s\"\n", 3966 fromnd->nl_nch.ncp->nc_name, 3967 tond->nl_nch.ncp->nc_name); 3968 cache_drop(&fnchd); 3969 cache_drop(&tnchd); 3970 return (EAGAIN); 3971 } 3972 3973 /* 3974 * Make sure the parent directories linkages are the same. 3975 * XXX shouldn't be needed any more w/ generation check above. 3976 */ 3977 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3978 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3979 cache_drop(&fnchd); 3980 cache_drop(&tnchd); 3981 return (ENOENT); 3982 } 3983 3984 /* 3985 * Both the source and target must be within the same filesystem and 3986 * in the same filesystem as their parent directories within the 3987 * namecache topology. 3988 * 3989 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3990 */ 3991 mp = fnchd.mount; 3992 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3993 mp != tond->nl_nch.mount) { 3994 cache_drop(&fnchd); 3995 cache_drop(&tnchd); 3996 return (EXDEV); 3997 } 3998 3999 /* 4000 * Make sure the mount point is writable 4001 */ 4002 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4003 cache_drop(&fnchd); 4004 cache_drop(&tnchd); 4005 return (error); 4006 } 4007 4008 /* 4009 * If the target exists and either the source or target is a directory, 4010 * then both must be directories. 4011 * 4012 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4013 * have become NULL. 4014 */ 4015 if (tond->nl_nch.ncp->nc_vp) { 4016 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4017 error = ENOENT; 4018 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4019 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4020 error = ENOTDIR; 4021 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4022 error = EISDIR; 4023 } 4024 } 4025 4026 /* 4027 * You cannot rename a source into itself or a subdirectory of itself. 4028 * We check this by travsersing the target directory upwards looking 4029 * for a match against the source. 4030 * 4031 * XXX MPSAFE 4032 */ 4033 if (error == 0) { 4034 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4035 if (fromnd->nl_nch.ncp == ncp) { 4036 error = EINVAL; 4037 break; 4038 } 4039 } 4040 } 4041 4042 cache_drop(&fnchd); 4043 cache_drop(&tnchd); 4044 4045 /* 4046 * Even though the namespaces are different, they may still represent 4047 * hardlinks to the same file. The filesystem might have a hard time 4048 * with this so we issue a NREMOVE of the source instead of a NRENAME 4049 * when we detect the situation. 4050 */ 4051 if (error == 0) { 4052 fdvp = fromnd->nl_dvp; 4053 tdvp = tond->nl_dvp; 4054 if (fdvp == NULL || tdvp == NULL) { 4055 error = EPERM; 4056 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4057 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4058 fromnd->nl_cred); 4059 } else { 4060 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4061 fdvp, tdvp, tond->nl_cred); 4062 } 4063 } 4064 return (error); 4065 } 4066 4067 /* 4068 * rename_args(char *from, char *to) 4069 * 4070 * Rename files. Source and destination must either both be directories, 4071 * or both not be directories. If target is a directory, it must be empty. 4072 */ 4073 int 4074 sys_rename(struct rename_args *uap) 4075 { 4076 struct nlookupdata fromnd, tond; 4077 int error; 4078 4079 do { 4080 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4081 if (error == 0) { 4082 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4083 if (error == 0) 4084 error = kern_rename(&fromnd, &tond); 4085 nlookup_done(&tond); 4086 } 4087 nlookup_done(&fromnd); 4088 } while (error == EAGAIN); 4089 return (error); 4090 } 4091 4092 /* 4093 * renameat_args(int oldfd, char *old, int newfd, char *new) 4094 * 4095 * Rename files using paths relative to the directories associated with 4096 * oldfd and newfd. Source and destination must either both be directories, 4097 * or both not be directories. If target is a directory, it must be empty. 4098 */ 4099 int 4100 sys_renameat(struct renameat_args *uap) 4101 { 4102 struct nlookupdata oldnd, newnd; 4103 struct file *oldfp, *newfp; 4104 int error; 4105 4106 do { 4107 error = nlookup_init_at(&oldnd, &oldfp, 4108 uap->oldfd, uap->old, 4109 UIO_USERSPACE, 0); 4110 if (error == 0) { 4111 error = nlookup_init_at(&newnd, &newfp, 4112 uap->newfd, uap->new, 4113 UIO_USERSPACE, 0); 4114 if (error == 0) 4115 error = kern_rename(&oldnd, &newnd); 4116 nlookup_done_at(&newnd, newfp); 4117 } 4118 nlookup_done_at(&oldnd, oldfp); 4119 } while (error == EAGAIN); 4120 return (error); 4121 } 4122 4123 int 4124 kern_mkdir(struct nlookupdata *nd, int mode) 4125 { 4126 struct thread *td = curthread; 4127 struct proc *p = td->td_proc; 4128 struct vnode *vp; 4129 struct vattr vattr; 4130 int error; 4131 4132 bwillinode(1); 4133 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4134 if ((error = nlookup(nd)) != 0) 4135 return (error); 4136 4137 if (nd->nl_nch.ncp->nc_vp) 4138 return (EEXIST); 4139 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4140 return (error); 4141 VATTR_NULL(&vattr); 4142 vattr.va_type = VDIR; 4143 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4144 4145 vp = NULL; 4146 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4147 if (error == 0) 4148 vput(vp); 4149 return (error); 4150 } 4151 4152 /* 4153 * mkdir_args(char *path, int mode) 4154 * 4155 * Make a directory file. 4156 */ 4157 int 4158 sys_mkdir(struct mkdir_args *uap) 4159 { 4160 struct nlookupdata nd; 4161 int error; 4162 4163 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4164 if (error == 0) 4165 error = kern_mkdir(&nd, uap->mode); 4166 nlookup_done(&nd); 4167 return (error); 4168 } 4169 4170 /* 4171 * mkdirat_args(int fd, char *path, mode_t mode) 4172 * 4173 * Make a directory file. The path is relative to the directory associated 4174 * with fd. 4175 */ 4176 int 4177 sys_mkdirat(struct mkdirat_args *uap) 4178 { 4179 struct nlookupdata nd; 4180 struct file *fp; 4181 int error; 4182 4183 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4184 if (error == 0) 4185 error = kern_mkdir(&nd, uap->mode); 4186 nlookup_done_at(&nd, fp); 4187 return (error); 4188 } 4189 4190 int 4191 kern_rmdir(struct nlookupdata *nd) 4192 { 4193 int error; 4194 4195 bwillinode(1); 4196 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4197 if ((error = nlookup(nd)) != 0) 4198 return (error); 4199 4200 /* 4201 * Do not allow directories representing mount points to be 4202 * deleted, even if empty. Check write perms on mount point 4203 * in case the vnode is aliased (aka nullfs). 4204 */ 4205 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4206 return (EBUSY); 4207 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4208 return (error); 4209 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4210 return (error); 4211 } 4212 4213 /* 4214 * rmdir_args(char *path) 4215 * 4216 * Remove a directory file. 4217 */ 4218 int 4219 sys_rmdir(struct rmdir_args *uap) 4220 { 4221 struct nlookupdata nd; 4222 int error; 4223 4224 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4225 if (error == 0) 4226 error = kern_rmdir(&nd); 4227 nlookup_done(&nd); 4228 return (error); 4229 } 4230 4231 int 4232 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4233 enum uio_seg direction) 4234 { 4235 struct thread *td = curthread; 4236 struct proc *p = td->td_proc; 4237 struct vnode *vp; 4238 struct file *fp; 4239 struct uio auio; 4240 struct iovec aiov; 4241 off_t loff; 4242 int error, eofflag; 4243 4244 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 4245 return (error); 4246 if ((fp->f_flag & FREAD) == 0) { 4247 error = EBADF; 4248 goto done; 4249 } 4250 vp = (struct vnode *)fp->f_data; 4251 if (vp->v_type != VDIR) { 4252 error = EINVAL; 4253 goto done; 4254 } 4255 aiov.iov_base = buf; 4256 aiov.iov_len = count; 4257 auio.uio_iov = &aiov; 4258 auio.uio_iovcnt = 1; 4259 auio.uio_rw = UIO_READ; 4260 auio.uio_segflg = direction; 4261 auio.uio_td = td; 4262 auio.uio_resid = count; 4263 loff = auio.uio_offset = fp->f_offset; 4264 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 4265 fp->f_offset = auio.uio_offset; 4266 if (error) 4267 goto done; 4268 4269 /* 4270 * WARNING! *basep may not be wide enough to accomodate the 4271 * seek offset. XXX should we hack this to return the upper 32 bits 4272 * for offsets greater then 4G? 4273 */ 4274 if (basep) { 4275 *basep = (long)loff; 4276 } 4277 *res = count - auio.uio_resid; 4278 done: 4279 fdrop(fp); 4280 return (error); 4281 } 4282 4283 /* 4284 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4285 * 4286 * Read a block of directory entries in a file system independent format. 4287 */ 4288 int 4289 sys_getdirentries(struct getdirentries_args *uap) 4290 { 4291 long base; 4292 int error; 4293 4294 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4295 &uap->sysmsg_result, UIO_USERSPACE); 4296 4297 if (error == 0 && uap->basep) 4298 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4299 return (error); 4300 } 4301 4302 /* 4303 * getdents_args(int fd, char *buf, size_t count) 4304 */ 4305 int 4306 sys_getdents(struct getdents_args *uap) 4307 { 4308 int error; 4309 4310 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4311 &uap->sysmsg_result, UIO_USERSPACE); 4312 4313 return (error); 4314 } 4315 4316 /* 4317 * Set the mode mask for creation of filesystem nodes. 4318 * 4319 * umask(int newmask) 4320 */ 4321 int 4322 sys_umask(struct umask_args *uap) 4323 { 4324 struct thread *td = curthread; 4325 struct proc *p = td->td_proc; 4326 struct filedesc *fdp; 4327 4328 fdp = p->p_fd; 4329 uap->sysmsg_result = fdp->fd_cmask; 4330 fdp->fd_cmask = uap->newmask & ALLPERMS; 4331 return (0); 4332 } 4333 4334 /* 4335 * revoke(char *path) 4336 * 4337 * Void all references to file by ripping underlying filesystem 4338 * away from vnode. 4339 */ 4340 int 4341 sys_revoke(struct revoke_args *uap) 4342 { 4343 struct nlookupdata nd; 4344 struct vattr vattr; 4345 struct vnode *vp; 4346 struct ucred *cred; 4347 int error; 4348 4349 vp = NULL; 4350 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4351 if (error == 0) 4352 error = nlookup(&nd); 4353 if (error == 0) 4354 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4355 cred = crhold(nd.nl_cred); 4356 nlookup_done(&nd); 4357 if (error == 0) { 4358 if (error == 0) 4359 error = VOP_GETATTR(vp, &vattr); 4360 if (error == 0 && cred->cr_uid != vattr.va_uid) 4361 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4362 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4363 if (vcount(vp) > 0) 4364 error = vrevoke(vp, cred); 4365 } else if (error == 0) { 4366 error = vrevoke(vp, cred); 4367 } 4368 vrele(vp); 4369 } 4370 if (cred) 4371 crfree(cred); 4372 return (error); 4373 } 4374 4375 /* 4376 * getfh_args(char *fname, fhandle_t *fhp) 4377 * 4378 * Get (NFS) file handle 4379 * 4380 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4381 * mount. This allows nullfs mounts to be explicitly exported. 4382 * 4383 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4384 * 4385 * nullfs mounts of subdirectories are not safe. That is, it will 4386 * work, but you do not really have protection against access to 4387 * the related parent directories. 4388 */ 4389 int 4390 sys_getfh(struct getfh_args *uap) 4391 { 4392 struct thread *td = curthread; 4393 struct nlookupdata nd; 4394 fhandle_t fh; 4395 struct vnode *vp; 4396 struct mount *mp; 4397 int error; 4398 4399 /* 4400 * Must be super user 4401 */ 4402 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4403 return (error); 4404 4405 vp = NULL; 4406 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4407 if (error == 0) 4408 error = nlookup(&nd); 4409 if (error == 0) 4410 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4411 mp = nd.nl_nch.mount; 4412 nlookup_done(&nd); 4413 if (error == 0) { 4414 bzero(&fh, sizeof(fh)); 4415 fh.fh_fsid = mp->mnt_stat.f_fsid; 4416 error = VFS_VPTOFH(vp, &fh.fh_fid); 4417 vput(vp); 4418 if (error == 0) 4419 error = copyout(&fh, uap->fhp, sizeof(fh)); 4420 } 4421 return (error); 4422 } 4423 4424 /* 4425 * fhopen_args(const struct fhandle *u_fhp, int flags) 4426 * 4427 * syscall for the rpc.lockd to use to translate a NFS file handle into 4428 * an open descriptor. 4429 * 4430 * warning: do not remove the priv_check() call or this becomes one giant 4431 * security hole. 4432 */ 4433 int 4434 sys_fhopen(struct fhopen_args *uap) 4435 { 4436 struct thread *td = curthread; 4437 struct filedesc *fdp = td->td_proc->p_fd; 4438 struct mount *mp; 4439 struct vnode *vp; 4440 struct fhandle fhp; 4441 struct vattr vat; 4442 struct vattr *vap = &vat; 4443 struct flock lf; 4444 int fmode, mode, error = 0, type; 4445 struct file *nfp; 4446 struct file *fp; 4447 int indx; 4448 4449 /* 4450 * Must be super user 4451 */ 4452 error = priv_check(td, PRIV_ROOT); 4453 if (error) 4454 return (error); 4455 4456 fmode = FFLAGS(uap->flags); 4457 4458 /* 4459 * Why not allow a non-read/write open for our lockd? 4460 */ 4461 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4462 return (EINVAL); 4463 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4464 if (error) 4465 return(error); 4466 4467 /* 4468 * Find the mount point 4469 */ 4470 mp = vfs_getvfs(&fhp.fh_fsid); 4471 if (mp == NULL) { 4472 error = ESTALE; 4473 goto done; 4474 } 4475 /* now give me my vnode, it gets returned to me locked */ 4476 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4477 if (error) 4478 goto done; 4479 /* 4480 * from now on we have to make sure not 4481 * to forget about the vnode 4482 * any error that causes an abort must vput(vp) 4483 * just set error = err and 'goto bad;'. 4484 */ 4485 4486 /* 4487 * from vn_open 4488 */ 4489 if (vp->v_type == VLNK) { 4490 error = EMLINK; 4491 goto bad; 4492 } 4493 if (vp->v_type == VSOCK) { 4494 error = EOPNOTSUPP; 4495 goto bad; 4496 } 4497 mode = 0; 4498 if (fmode & (FWRITE | O_TRUNC)) { 4499 if (vp->v_type == VDIR) { 4500 error = EISDIR; 4501 goto bad; 4502 } 4503 error = vn_writechk(vp, NULL); 4504 if (error) 4505 goto bad; 4506 mode |= VWRITE; 4507 } 4508 if (fmode & FREAD) 4509 mode |= VREAD; 4510 if (mode) { 4511 error = VOP_ACCESS(vp, mode, td->td_ucred); 4512 if (error) 4513 goto bad; 4514 } 4515 if (fmode & O_TRUNC) { 4516 vn_unlock(vp); /* XXX */ 4517 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4518 VATTR_NULL(vap); 4519 vap->va_size = 0; 4520 error = VOP_SETATTR(vp, vap, td->td_ucred); 4521 if (error) 4522 goto bad; 4523 } 4524 4525 /* 4526 * VOP_OPEN needs the file pointer so it can potentially override 4527 * it. 4528 * 4529 * WARNING! no f_nchandle will be associated when fhopen()ing a 4530 * directory. XXX 4531 */ 4532 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4533 goto bad; 4534 fp = nfp; 4535 4536 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4537 if (error) { 4538 /* 4539 * setting f_ops this way prevents VOP_CLOSE from being 4540 * called or fdrop() releasing the vp from v_data. Since 4541 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4542 */ 4543 fp->f_ops = &badfileops; 4544 fp->f_data = NULL; 4545 goto bad_drop; 4546 } 4547 4548 /* 4549 * The fp is given its own reference, we still have our ref and lock. 4550 * 4551 * Assert that all regular files must be created with a VM object. 4552 */ 4553 if (vp->v_type == VREG && vp->v_object == NULL) { 4554 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 4555 goto bad_drop; 4556 } 4557 4558 /* 4559 * The open was successful. Handle any locking requirements. 4560 */ 4561 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4562 lf.l_whence = SEEK_SET; 4563 lf.l_start = 0; 4564 lf.l_len = 0; 4565 if (fmode & O_EXLOCK) 4566 lf.l_type = F_WRLCK; 4567 else 4568 lf.l_type = F_RDLCK; 4569 if (fmode & FNONBLOCK) 4570 type = 0; 4571 else 4572 type = F_WAIT; 4573 vn_unlock(vp); 4574 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 4575 /* 4576 * release our private reference. 4577 */ 4578 fsetfd(fdp, NULL, indx); 4579 fdrop(fp); 4580 vrele(vp); 4581 goto done; 4582 } 4583 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4584 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4585 } 4586 4587 /* 4588 * Clean up. Associate the file pointer with the previously 4589 * reserved descriptor and return it. 4590 */ 4591 vput(vp); 4592 if (uap->flags & O_CLOEXEC) 4593 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4594 fsetfd(fdp, fp, indx); 4595 fdrop(fp); 4596 uap->sysmsg_result = indx; 4597 return (error); 4598 4599 bad_drop: 4600 fsetfd(fdp, NULL, indx); 4601 fdrop(fp); 4602 bad: 4603 vput(vp); 4604 done: 4605 return (error); 4606 } 4607 4608 /* 4609 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4610 */ 4611 int 4612 sys_fhstat(struct fhstat_args *uap) 4613 { 4614 struct thread *td = curthread; 4615 struct stat sb; 4616 fhandle_t fh; 4617 struct mount *mp; 4618 struct vnode *vp; 4619 int error; 4620 4621 /* 4622 * Must be super user 4623 */ 4624 error = priv_check(td, PRIV_ROOT); 4625 if (error) 4626 return (error); 4627 4628 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4629 if (error) 4630 return (error); 4631 4632 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4633 error = ESTALE; 4634 if (error == 0) { 4635 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4636 error = vn_stat(vp, &sb, td->td_ucred); 4637 vput(vp); 4638 } 4639 } 4640 if (error == 0) 4641 error = copyout(&sb, uap->sb, sizeof(sb)); 4642 return (error); 4643 } 4644 4645 /* 4646 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4647 */ 4648 int 4649 sys_fhstatfs(struct fhstatfs_args *uap) 4650 { 4651 struct thread *td = curthread; 4652 struct proc *p = td->td_proc; 4653 struct statfs *sp; 4654 struct mount *mp; 4655 struct vnode *vp; 4656 struct statfs sb; 4657 char *fullpath, *freepath; 4658 fhandle_t fh; 4659 int error; 4660 4661 /* 4662 * Must be super user 4663 */ 4664 if ((error = priv_check(td, PRIV_ROOT))) 4665 return (error); 4666 4667 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4668 return (error); 4669 4670 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4671 error = ESTALE; 4672 goto done; 4673 } 4674 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4675 error = ESTALE; 4676 goto done; 4677 } 4678 4679 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4680 goto done; 4681 mp = vp->v_mount; 4682 sp = &mp->mnt_stat; 4683 vput(vp); 4684 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4685 goto done; 4686 4687 error = mount_path(p, mp, &fullpath, &freepath); 4688 if (error) 4689 goto done; 4690 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4691 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4692 kfree(freepath, M_TEMP); 4693 4694 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4695 if (priv_check(td, PRIV_ROOT)) { 4696 bcopy(sp, &sb, sizeof(sb)); 4697 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4698 sp = &sb; 4699 } 4700 error = copyout(sp, uap->buf, sizeof(*sp)); 4701 done: 4702 return (error); 4703 } 4704 4705 /* 4706 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4707 */ 4708 int 4709 sys_fhstatvfs(struct fhstatvfs_args *uap) 4710 { 4711 struct thread *td = curthread; 4712 struct proc *p = td->td_proc; 4713 struct statvfs *sp; 4714 struct mount *mp; 4715 struct vnode *vp; 4716 fhandle_t fh; 4717 int error; 4718 4719 /* 4720 * Must be super user 4721 */ 4722 if ((error = priv_check(td, PRIV_ROOT))) 4723 return (error); 4724 4725 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4726 return (error); 4727 4728 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4729 error = ESTALE; 4730 goto done; 4731 } 4732 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4733 error = ESTALE; 4734 goto done; 4735 } 4736 4737 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4738 goto done; 4739 mp = vp->v_mount; 4740 sp = &mp->mnt_vstat; 4741 vput(vp); 4742 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4743 goto done; 4744 4745 sp->f_flag = 0; 4746 if (mp->mnt_flag & MNT_RDONLY) 4747 sp->f_flag |= ST_RDONLY; 4748 if (mp->mnt_flag & MNT_NOSUID) 4749 sp->f_flag |= ST_NOSUID; 4750 error = copyout(sp, uap->buf, sizeof(*sp)); 4751 done: 4752 return (error); 4753 } 4754 4755 4756 /* 4757 * Syscall to push extended attribute configuration information into the 4758 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4759 * a command (int cmd), and attribute name and misc data. For now, the 4760 * attribute name is left in userspace for consumption by the VFS_op. 4761 * It will probably be changed to be copied into sysspace by the 4762 * syscall in the future, once issues with various consumers of the 4763 * attribute code have raised their hands. 4764 * 4765 * Currently this is used only by UFS Extended Attributes. 4766 */ 4767 int 4768 sys_extattrctl(struct extattrctl_args *uap) 4769 { 4770 struct nlookupdata nd; 4771 struct vnode *vp; 4772 char attrname[EXTATTR_MAXNAMELEN]; 4773 int error; 4774 size_t size; 4775 4776 attrname[0] = 0; 4777 vp = NULL; 4778 error = 0; 4779 4780 if (error == 0 && uap->filename) { 4781 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4782 NLC_FOLLOW); 4783 if (error == 0) 4784 error = nlookup(&nd); 4785 if (error == 0) 4786 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4787 nlookup_done(&nd); 4788 } 4789 4790 if (error == 0 && uap->attrname) { 4791 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4792 &size); 4793 } 4794 4795 if (error == 0) { 4796 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4797 if (error == 0) 4798 error = nlookup(&nd); 4799 if (error == 0) 4800 error = ncp_writechk(&nd.nl_nch); 4801 if (error == 0) { 4802 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4803 uap->attrnamespace, 4804 uap->attrname, nd.nl_cred); 4805 } 4806 nlookup_done(&nd); 4807 } 4808 4809 return (error); 4810 } 4811 4812 /* 4813 * Syscall to get a named extended attribute on a file or directory. 4814 */ 4815 int 4816 sys_extattr_set_file(struct extattr_set_file_args *uap) 4817 { 4818 char attrname[EXTATTR_MAXNAMELEN]; 4819 struct nlookupdata nd; 4820 struct vnode *vp; 4821 struct uio auio; 4822 struct iovec aiov; 4823 int error; 4824 4825 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4826 if (error) 4827 return (error); 4828 4829 vp = NULL; 4830 4831 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4832 if (error == 0) 4833 error = nlookup(&nd); 4834 if (error == 0) 4835 error = ncp_writechk(&nd.nl_nch); 4836 if (error == 0) 4837 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4838 if (error) { 4839 nlookup_done(&nd); 4840 return (error); 4841 } 4842 4843 bzero(&auio, sizeof(auio)); 4844 aiov.iov_base = uap->data; 4845 aiov.iov_len = uap->nbytes; 4846 auio.uio_iov = &aiov; 4847 auio.uio_iovcnt = 1; 4848 auio.uio_offset = 0; 4849 auio.uio_resid = uap->nbytes; 4850 auio.uio_rw = UIO_WRITE; 4851 auio.uio_td = curthread; 4852 4853 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4854 &auio, nd.nl_cred); 4855 4856 vput(vp); 4857 nlookup_done(&nd); 4858 return (error); 4859 } 4860 4861 /* 4862 * Syscall to get a named extended attribute on a file or directory. 4863 */ 4864 int 4865 sys_extattr_get_file(struct extattr_get_file_args *uap) 4866 { 4867 char attrname[EXTATTR_MAXNAMELEN]; 4868 struct nlookupdata nd; 4869 struct uio auio; 4870 struct iovec aiov; 4871 struct vnode *vp; 4872 int error; 4873 4874 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4875 if (error) 4876 return (error); 4877 4878 vp = NULL; 4879 4880 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4881 if (error == 0) 4882 error = nlookup(&nd); 4883 if (error == 0) 4884 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 4885 if (error) { 4886 nlookup_done(&nd); 4887 return (error); 4888 } 4889 4890 bzero(&auio, sizeof(auio)); 4891 aiov.iov_base = uap->data; 4892 aiov.iov_len = uap->nbytes; 4893 auio.uio_iov = &aiov; 4894 auio.uio_iovcnt = 1; 4895 auio.uio_offset = 0; 4896 auio.uio_resid = uap->nbytes; 4897 auio.uio_rw = UIO_READ; 4898 auio.uio_td = curthread; 4899 4900 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 4901 &auio, nd.nl_cred); 4902 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 4903 4904 vput(vp); 4905 nlookup_done(&nd); 4906 return(error); 4907 } 4908 4909 /* 4910 * Syscall to delete a named extended attribute from a file or directory. 4911 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4912 */ 4913 int 4914 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4915 { 4916 char attrname[EXTATTR_MAXNAMELEN]; 4917 struct nlookupdata nd; 4918 struct vnode *vp; 4919 int error; 4920 4921 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4922 if (error) 4923 return(error); 4924 4925 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4926 if (error == 0) 4927 error = nlookup(&nd); 4928 if (error == 0) 4929 error = ncp_writechk(&nd.nl_nch); 4930 if (error == 0) { 4931 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4932 if (error == 0) { 4933 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 4934 attrname, NULL, nd.nl_cred); 4935 vput(vp); 4936 } 4937 } 4938 nlookup_done(&nd); 4939 return(error); 4940 } 4941 4942 /* 4943 * Determine if the mount is visible to the process. 4944 */ 4945 static int 4946 chroot_visible_mnt(struct mount *mp, struct proc *p) 4947 { 4948 struct nchandle nch; 4949 4950 /* 4951 * Traverse from the mount point upwards. If we hit the process 4952 * root then the mount point is visible to the process. 4953 */ 4954 nch = mp->mnt_ncmountpt; 4955 while (nch.ncp) { 4956 if (nch.mount == p->p_fd->fd_nrdir.mount && 4957 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4958 return(1); 4959 } 4960 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4961 nch = nch.mount->mnt_ncmounton; 4962 } else { 4963 nch.ncp = nch.ncp->nc_parent; 4964 } 4965 } 4966 4967 /* 4968 * If the mount point is not visible to the process, but the 4969 * process root is in a subdirectory of the mount, return 4970 * TRUE anyway. 4971 */ 4972 if (p->p_fd->fd_nrdir.mount == mp) 4973 return(1); 4974 4975 return(0); 4976 } 4977 4978