1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 #include <sys/mplock2.h> 71 72 #include <vm/vm.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 76 #include <machine/limits.h> 77 #include <machine/stdarg.h> 78 79 #include <vfs/union/union.h> 80 81 static void mount_warning(struct mount *mp, const char *ctl, ...) 82 __printflike(2, 3); 83 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 84 static int checkvp_chdir (struct vnode *vn, struct thread *td); 85 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 86 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 87 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 88 static int getutimes (struct timeval *, struct timespec *); 89 static int getutimens (const struct timespec *, struct timespec *, int *); 90 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 91 static int setfmode (struct vnode *, int); 92 static int setfflags (struct vnode *, int); 93 static int setutimes (struct vnode *, struct vattr *, 94 const struct timespec *, int); 95 static int usermount = 0; /* if 1, non-root can mount fs. */ 96 97 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 98 99 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 100 "Allow non-root users to mount filesystems"); 101 102 /* 103 * Virtual File System System Calls 104 */ 105 106 /* 107 * Mount a file system. 108 * 109 * mount_args(char *type, char *path, int flags, caddr_t data) 110 * 111 * MPALMOSTSAFE 112 */ 113 int 114 sys_mount(struct mount_args *uap) 115 { 116 struct thread *td = curthread; 117 struct vnode *vp; 118 struct nchandle nch; 119 struct mount *mp, *nullmp; 120 struct vfsconf *vfsp; 121 int error, flag = 0, flag2 = 0; 122 int hasmount; 123 struct vattr va; 124 struct nlookupdata nd; 125 char fstypename[MFSNAMELEN]; 126 struct ucred *cred; 127 128 cred = td->td_ucred; 129 if (jailed(cred)) { 130 error = EPERM; 131 goto done; 132 } 133 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 134 goto done; 135 136 /* 137 * Do not allow NFS export by non-root users. 138 */ 139 if (uap->flags & MNT_EXPORTED) { 140 error = priv_check(td, PRIV_ROOT); 141 if (error) 142 goto done; 143 } 144 /* 145 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 146 */ 147 if (priv_check(td, PRIV_ROOT)) 148 uap->flags |= MNT_NOSUID | MNT_NODEV; 149 150 /* 151 * Lookup the requested path and extract the nch and vnode. 152 */ 153 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 154 if (error == 0) { 155 if ((error = nlookup(&nd)) == 0) { 156 if (nd.nl_nch.ncp->nc_vp == NULL) 157 error = ENOENT; 158 } 159 } 160 if (error) { 161 nlookup_done(&nd); 162 goto done; 163 } 164 165 /* 166 * If the target filesystem is resolved via a nullfs mount, then 167 * nd.nl_nch.mount will be pointing to the nullfs mount structure 168 * instead of the target file system. We need it in case we are 169 * doing an update. 170 */ 171 nullmp = nd.nl_nch.mount; 172 173 /* 174 * Extract the locked+refd ncp and cleanup the nd structure 175 */ 176 nch = nd.nl_nch; 177 cache_zero(&nd.nl_nch); 178 nlookup_done(&nd); 179 180 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 181 (mp = cache_findmount(&nch)) != NULL) { 182 cache_dropmount(mp); 183 hasmount = 1; 184 } else { 185 hasmount = 0; 186 } 187 188 189 /* 190 * now we have the locked ref'd nch and unreferenced vnode. 191 */ 192 vp = nch.ncp->nc_vp; 193 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 194 cache_put(&nch); 195 goto done; 196 } 197 cache_unlock(&nch); 198 199 /* 200 * Extract the file system type. We need to know this early, to take 201 * appropriate actions if we are dealing with a nullfs. 202 */ 203 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 204 cache_drop(&nch); 205 vput(vp); 206 goto done; 207 } 208 209 /* 210 * Now we have an unlocked ref'd nch and a locked ref'd vp 211 */ 212 if (uap->flags & MNT_UPDATE) { 213 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 214 cache_drop(&nch); 215 vput(vp); 216 error = EINVAL; 217 goto done; 218 } 219 220 if (strncmp(fstypename, "null", 5) == 0) { 221 KKASSERT(nullmp); 222 mp = nullmp; 223 } else { 224 mp = vp->v_mount; 225 } 226 227 flag = mp->mnt_flag; 228 flag2 = mp->mnt_kern_flag; 229 /* 230 * We only allow the filesystem to be reloaded if it 231 * is currently mounted read-only. 232 */ 233 if ((uap->flags & MNT_RELOAD) && 234 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 235 cache_drop(&nch); 236 vput(vp); 237 error = EOPNOTSUPP; /* Needs translation */ 238 goto done; 239 } 240 /* 241 * Only root, or the user that did the original mount is 242 * permitted to update it. 243 */ 244 if (mp->mnt_stat.f_owner != cred->cr_uid && 245 (error = priv_check(td, PRIV_ROOT))) { 246 cache_drop(&nch); 247 vput(vp); 248 goto done; 249 } 250 if (vfs_busy(mp, LK_NOWAIT)) { 251 cache_drop(&nch); 252 vput(vp); 253 error = EBUSY; 254 goto done; 255 } 256 if (hasmount) { 257 cache_drop(&nch); 258 vfs_unbusy(mp); 259 vput(vp); 260 error = EBUSY; 261 goto done; 262 } 263 mp->mnt_flag |= 264 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 265 lwkt_gettoken(&mp->mnt_token); 266 vn_unlock(vp); 267 goto update; 268 } 269 270 /* 271 * If the user is not root, ensure that they own the directory 272 * onto which we are attempting to mount. 273 */ 274 if ((error = VOP_GETATTR(vp, &va)) || 275 (va.va_uid != cred->cr_uid && 276 (error = priv_check(td, PRIV_ROOT)))) { 277 cache_drop(&nch); 278 vput(vp); 279 goto done; 280 } 281 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 282 cache_drop(&nch); 283 vput(vp); 284 goto done; 285 } 286 if (vp->v_type != VDIR) { 287 cache_drop(&nch); 288 vput(vp); 289 error = ENOTDIR; 290 goto done; 291 } 292 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 293 cache_drop(&nch); 294 vput(vp); 295 error = EPERM; 296 goto done; 297 } 298 vfsp = vfsconf_find_by_name(fstypename); 299 if (vfsp == NULL) { 300 linker_file_t lf; 301 302 /* Only load modules for root (very important!) */ 303 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 304 cache_drop(&nch); 305 vput(vp); 306 goto done; 307 } 308 error = linker_load_file(fstypename, &lf); 309 if (error || lf == NULL) { 310 cache_drop(&nch); 311 vput(vp); 312 if (lf == NULL) 313 error = ENODEV; 314 goto done; 315 } 316 lf->userrefs++; 317 /* lookup again, see if the VFS was loaded */ 318 vfsp = vfsconf_find_by_name(fstypename); 319 if (vfsp == NULL) { 320 lf->userrefs--; 321 linker_file_unload(lf); 322 cache_drop(&nch); 323 vput(vp); 324 error = ENODEV; 325 goto done; 326 } 327 } 328 if (hasmount) { 329 cache_drop(&nch); 330 vput(vp); 331 error = EBUSY; 332 goto done; 333 } 334 335 /* 336 * Allocate and initialize the filesystem. 337 */ 338 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 339 mount_init(mp); 340 vfs_busy(mp, LK_NOWAIT); 341 mp->mnt_op = vfsp->vfc_vfsops; 342 mp->mnt_vfc = vfsp; 343 vfsp->vfc_refcount++; 344 mp->mnt_stat.f_type = vfsp->vfc_typenum; 345 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 346 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 347 mp->mnt_stat.f_owner = cred->cr_uid; 348 lwkt_gettoken(&mp->mnt_token); 349 vn_unlock(vp); 350 update: 351 /* 352 * (per-mount token acquired at this point) 353 * 354 * Set the mount level flags. 355 */ 356 if (uap->flags & MNT_RDONLY) 357 mp->mnt_flag |= MNT_RDONLY; 358 else if (mp->mnt_flag & MNT_RDONLY) 359 mp->mnt_kern_flag |= MNTK_WANTRDWR; 360 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 361 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 362 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 363 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 364 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 365 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 366 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 367 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 368 /* 369 * Mount the filesystem. 370 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 371 * get. 372 */ 373 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 374 if (mp->mnt_flag & MNT_UPDATE) { 375 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 376 mp->mnt_flag &= ~MNT_RDONLY; 377 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 378 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 379 if (error) { 380 mp->mnt_flag = flag; 381 mp->mnt_kern_flag = flag2; 382 } 383 lwkt_reltoken(&mp->mnt_token); 384 vfs_unbusy(mp); 385 vrele(vp); 386 cache_drop(&nch); 387 goto done; 388 } 389 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 390 391 /* 392 * Put the new filesystem on the mount list after root. The mount 393 * point gets its own mnt_ncmountpt (unless the VFS already set one 394 * up) which represents the root of the mount. The lookup code 395 * detects the mount point going forward and checks the root of 396 * the mount going backwards. 397 * 398 * It is not necessary to invalidate or purge the vnode underneath 399 * because elements under the mount will be given their own glue 400 * namecache record. 401 */ 402 if (!error) { 403 if (mp->mnt_ncmountpt.ncp == NULL) { 404 /* 405 * allocate, then unlock, but leave the ref intact 406 */ 407 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 408 cache_unlock(&mp->mnt_ncmountpt); 409 } 410 mp->mnt_ncmounton = nch; /* inherits ref */ 411 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 412 cache_ismounting(mp); 413 414 mountlist_insert(mp, MNTINS_LAST); 415 vn_unlock(vp); 416 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 417 error = vfs_allocate_syncvnode(mp); 418 lwkt_reltoken(&mp->mnt_token); 419 vfs_unbusy(mp); 420 error = VFS_START(mp, 0); 421 vrele(vp); 422 } else { 423 vn_syncer_thr_stop(mp); 424 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 425 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 426 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 427 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 428 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 429 mp->mnt_vfc->vfc_refcount--; 430 lwkt_reltoken(&mp->mnt_token); 431 vfs_unbusy(mp); 432 kfree(mp, M_MOUNT); 433 cache_drop(&nch); 434 vput(vp); 435 } 436 done: 437 return (error); 438 } 439 440 /* 441 * Scan all active processes to see if any of them have a current 442 * or root directory onto which the new filesystem has just been 443 * mounted. If so, replace them with the new mount point. 444 * 445 * Both old_nch and new_nch are ref'd on call but not locked. 446 * new_nch must be temporarily locked so it can be associated with the 447 * vnode representing the root of the mount point. 448 */ 449 struct checkdirs_info { 450 struct nchandle old_nch; 451 struct nchandle new_nch; 452 struct vnode *old_vp; 453 struct vnode *new_vp; 454 }; 455 456 static int checkdirs_callback(struct proc *p, void *data); 457 458 static void 459 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 460 { 461 struct checkdirs_info info; 462 struct vnode *olddp; 463 struct vnode *newdp; 464 struct mount *mp; 465 466 /* 467 * If the old mount point's vnode has a usecount of 1, it is not 468 * being held as a descriptor anywhere. 469 */ 470 olddp = old_nch->ncp->nc_vp; 471 if (olddp == NULL || VREFCNT(olddp) == 1) 472 return; 473 474 /* 475 * Force the root vnode of the new mount point to be resolved 476 * so we can update any matching processes. 477 */ 478 mp = new_nch->mount; 479 if (VFS_ROOT(mp, &newdp)) 480 panic("mount: lost mount"); 481 vn_unlock(newdp); 482 cache_lock(new_nch); 483 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 484 cache_setunresolved(new_nch); 485 cache_setvp(new_nch, newdp); 486 cache_unlock(new_nch); 487 488 /* 489 * Special handling of the root node 490 */ 491 if (rootvnode == olddp) { 492 vref(newdp); 493 vfs_cache_setroot(newdp, cache_hold(new_nch)); 494 } 495 496 /* 497 * Pass newdp separately so the callback does not have to access 498 * it via new_nch->ncp->nc_vp. 499 */ 500 info.old_nch = *old_nch; 501 info.new_nch = *new_nch; 502 info.new_vp = newdp; 503 allproc_scan(checkdirs_callback, &info); 504 vput(newdp); 505 } 506 507 /* 508 * NOTE: callback is not MP safe because the scanned process's filedesc 509 * structure can be ripped out from under us, amoung other things. 510 */ 511 static int 512 checkdirs_callback(struct proc *p, void *data) 513 { 514 struct checkdirs_info *info = data; 515 struct filedesc *fdp; 516 struct nchandle ncdrop1; 517 struct nchandle ncdrop2; 518 struct vnode *vprele1; 519 struct vnode *vprele2; 520 521 if ((fdp = p->p_fd) != NULL) { 522 cache_zero(&ncdrop1); 523 cache_zero(&ncdrop2); 524 vprele1 = NULL; 525 vprele2 = NULL; 526 527 /* 528 * MPUNSAFE - XXX fdp can be pulled out from under a 529 * foreign process. 530 * 531 * A shared filedesc is ok, we don't have to copy it 532 * because we are making this change globally. 533 */ 534 spin_lock(&fdp->fd_spin); 535 if (fdp->fd_ncdir.mount == info->old_nch.mount && 536 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 537 vprele1 = fdp->fd_cdir; 538 vref(info->new_vp); 539 fdp->fd_cdir = info->new_vp; 540 ncdrop1 = fdp->fd_ncdir; 541 cache_copy(&info->new_nch, &fdp->fd_ncdir); 542 } 543 if (fdp->fd_nrdir.mount == info->old_nch.mount && 544 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 545 vprele2 = fdp->fd_rdir; 546 vref(info->new_vp); 547 fdp->fd_rdir = info->new_vp; 548 ncdrop2 = fdp->fd_nrdir; 549 cache_copy(&info->new_nch, &fdp->fd_nrdir); 550 } 551 spin_unlock(&fdp->fd_spin); 552 if (ncdrop1.ncp) 553 cache_drop(&ncdrop1); 554 if (ncdrop2.ncp) 555 cache_drop(&ncdrop2); 556 if (vprele1) 557 vrele(vprele1); 558 if (vprele2) 559 vrele(vprele2); 560 } 561 return(0); 562 } 563 564 /* 565 * Unmount a file system. 566 * 567 * Note: unmount takes a path to the vnode mounted on as argument, 568 * not special file (as before). 569 * 570 * umount_args(char *path, int flags) 571 * 572 * MPALMOSTSAFE 573 */ 574 int 575 sys_unmount(struct unmount_args *uap) 576 { 577 struct thread *td = curthread; 578 struct proc *p __debugvar = td->td_proc; 579 struct mount *mp = NULL; 580 struct nlookupdata nd; 581 int error; 582 583 KKASSERT(p); 584 get_mplock(); 585 if (td->td_ucred->cr_prison != NULL) { 586 error = EPERM; 587 goto done; 588 } 589 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 590 goto done; 591 592 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 593 if (error == 0) 594 error = nlookup(&nd); 595 if (error) 596 goto out; 597 598 mp = nd.nl_nch.mount; 599 600 /* 601 * Only root, or the user that did the original mount is 602 * permitted to unmount this filesystem. 603 */ 604 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 605 (error = priv_check(td, PRIV_ROOT))) 606 goto out; 607 608 /* 609 * Don't allow unmounting the root file system. 610 */ 611 if (mp->mnt_flag & MNT_ROOTFS) { 612 error = EINVAL; 613 goto out; 614 } 615 616 /* 617 * Must be the root of the filesystem 618 */ 619 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 620 error = EINVAL; 621 goto out; 622 } 623 624 out: 625 nlookup_done(&nd); 626 if (error == 0) 627 error = dounmount(mp, uap->flags); 628 done: 629 rel_mplock(); 630 return (error); 631 } 632 633 /* 634 * Do the actual file system unmount. 635 */ 636 static int 637 dounmount_interlock(struct mount *mp) 638 { 639 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 640 return (EBUSY); 641 mp->mnt_kern_flag |= MNTK_UNMOUNT; 642 return(0); 643 } 644 645 static int 646 unmount_allproc_cb(struct proc *p, void *arg) 647 { 648 struct mount *mp; 649 650 if (p->p_textnch.ncp == NULL) 651 return 0; 652 653 mp = (struct mount *)arg; 654 if (p->p_textnch.mount == mp) 655 cache_drop(&p->p_textnch); 656 657 return 0; 658 } 659 660 int 661 dounmount(struct mount *mp, int flags) 662 { 663 struct namecache *ncp; 664 struct nchandle nch; 665 struct vnode *vp; 666 int error; 667 int async_flag; 668 int lflags; 669 int freeok = 1; 670 int retry; 671 672 lwkt_gettoken(&mp->mnt_token); 673 /* 674 * Exclusive access for unmounting purposes 675 */ 676 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 677 goto out; 678 679 /* 680 * Allow filesystems to detect that a forced unmount is in progress. 681 */ 682 if (flags & MNT_FORCE) 683 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 684 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 685 error = lockmgr(&mp->mnt_lock, lflags); 686 if (error) { 687 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 688 if (mp->mnt_kern_flag & MNTK_MWAIT) { 689 mp->mnt_kern_flag &= ~MNTK_MWAIT; 690 wakeup(mp); 691 } 692 goto out; 693 } 694 695 if (mp->mnt_flag & MNT_EXPUBLIC) 696 vfs_setpublicfs(NULL, NULL, NULL); 697 698 vfs_msync(mp, MNT_WAIT); 699 async_flag = mp->mnt_flag & MNT_ASYNC; 700 mp->mnt_flag &=~ MNT_ASYNC; 701 702 /* 703 * If this filesystem isn't aliasing other filesystems, 704 * try to invalidate any remaining namecache entries and 705 * check the count afterwords. 706 */ 707 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 708 cache_lock(&mp->mnt_ncmountpt); 709 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 710 cache_unlock(&mp->mnt_ncmountpt); 711 712 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 713 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 714 allproc_scan(&unmount_allproc_cb, mp); 715 } 716 717 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 718 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 719 720 if ((flags & MNT_FORCE) == 0) { 721 error = EBUSY; 722 mount_warning(mp, "Cannot unmount: " 723 "%d namecache " 724 "references still " 725 "present", 726 ncp->nc_refs - 1); 727 } else { 728 mount_warning(mp, "Forced unmount: " 729 "%d namecache " 730 "references still " 731 "present", 732 ncp->nc_refs - 1); 733 freeok = 0; 734 } 735 } 736 } 737 738 /* 739 * Decomission our special mnt_syncer vnode. This also stops 740 * the vnlru code. If we are unable to unmount we recommission 741 * the vnode. 742 * 743 * Then sync the filesystem. 744 */ 745 if ((vp = mp->mnt_syncer) != NULL) { 746 mp->mnt_syncer = NULL; 747 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 748 vrele(vp); 749 } 750 if ((mp->mnt_flag & MNT_RDONLY) == 0) 751 VFS_SYNC(mp, MNT_WAIT); 752 753 /* 754 * nchandle records ref the mount structure. Expect a count of 1 755 * (our mount->mnt_ncmountpt). 756 * 757 * Scans can get temporary refs on a mountpoint (thought really 758 * heavy duty stuff like cache_findmount() do not). 759 */ 760 for (retry = 0; retry < 10 && mp->mnt_refs != 1; ++retry) { 761 cache_unmounting(mp); 762 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 763 } 764 if (mp->mnt_refs != 1) { 765 if ((flags & MNT_FORCE) == 0) { 766 mount_warning(mp, "Cannot unmount: " 767 "%d mount refs still present", 768 mp->mnt_refs); 769 error = EBUSY; 770 } else { 771 mount_warning(mp, "Forced unmount: " 772 "%d mount refs still present", 773 mp->mnt_refs); 774 freeok = 0; 775 } 776 } 777 778 /* 779 * So far so good, sync the filesystem once more and 780 * call the VFS unmount code if the sync succeeds. 781 */ 782 if (error == 0) { 783 if (((mp->mnt_flag & MNT_RDONLY) || 784 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 785 (flags & MNT_FORCE)) { 786 error = VFS_UNMOUNT(mp, flags); 787 } 788 } 789 790 /* 791 * If an error occurred we can still recover, restoring the 792 * syncer vnode and misc flags. 793 */ 794 if (error) { 795 if (mp->mnt_syncer == NULL) 796 vfs_allocate_syncvnode(mp); 797 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 798 mp->mnt_flag |= async_flag; 799 lockmgr(&mp->mnt_lock, LK_RELEASE); 800 if (mp->mnt_kern_flag & MNTK_MWAIT) { 801 mp->mnt_kern_flag &= ~MNTK_MWAIT; 802 wakeup(mp); 803 } 804 goto out; 805 } 806 /* 807 * Clean up any journals still associated with the mount after 808 * filesystem activity has ceased. 809 */ 810 journal_remove_all_journals(mp, 811 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 812 813 mountlist_remove(mp); 814 815 /* 816 * Remove any installed vnode ops here so the individual VFSs don't 817 * have to. 818 */ 819 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 820 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 821 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 822 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 823 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 824 825 if (mp->mnt_ncmountpt.ncp != NULL) { 826 nch = mp->mnt_ncmountpt; 827 cache_zero(&mp->mnt_ncmountpt); 828 cache_clrmountpt(&nch); 829 cache_drop(&nch); 830 } 831 if (mp->mnt_ncmounton.ncp != NULL) { 832 cache_unmounting(mp); 833 nch = mp->mnt_ncmounton; 834 cache_zero(&mp->mnt_ncmounton); 835 cache_clrmountpt(&nch); 836 cache_drop(&nch); 837 } 838 839 mp->mnt_vfc->vfc_refcount--; 840 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 841 panic("unmount: dangling vnode"); 842 lockmgr(&mp->mnt_lock, LK_RELEASE); 843 if (mp->mnt_kern_flag & MNTK_MWAIT) { 844 mp->mnt_kern_flag &= ~MNTK_MWAIT; 845 wakeup(mp); 846 } 847 848 /* 849 * If we reach here and freeok != 0 we must free the mount. 850 * If refs > 1 cycle and wait, just in case someone tried 851 * to busy the mount after we decided to do the unmount. 852 */ 853 if (freeok) { 854 while (mp->mnt_refs > 1) { 855 cache_unmounting(mp); 856 wakeup(mp); 857 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 858 } 859 lwkt_reltoken(&mp->mnt_token); 860 kfree(mp, M_MOUNT); 861 mp = NULL; 862 } 863 error = 0; 864 out: 865 if (mp) 866 lwkt_reltoken(&mp->mnt_token); 867 return (error); 868 } 869 870 static 871 void 872 mount_warning(struct mount *mp, const char *ctl, ...) 873 { 874 char *ptr; 875 char *buf; 876 __va_list va; 877 878 __va_start(va, ctl); 879 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 880 &ptr, &buf, 0) == 0) { 881 kprintf("unmount(%s): ", ptr); 882 kvprintf(ctl, va); 883 kprintf("\n"); 884 kfree(buf, M_TEMP); 885 } else { 886 kprintf("unmount(%p", mp); 887 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 888 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 889 kprintf("): "); 890 kvprintf(ctl, va); 891 kprintf("\n"); 892 } 893 __va_end(va); 894 } 895 896 /* 897 * Shim cache_fullpath() to handle the case where a process is chrooted into 898 * a subdirectory of a mount. In this case if the root mount matches the 899 * process root directory's mount we have to specify the process's root 900 * directory instead of the mount point, because the mount point might 901 * be above the root directory. 902 */ 903 static 904 int 905 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 906 { 907 struct nchandle *nch; 908 909 if (p && p->p_fd->fd_nrdir.mount == mp) 910 nch = &p->p_fd->fd_nrdir; 911 else 912 nch = &mp->mnt_ncmountpt; 913 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 914 } 915 916 /* 917 * Sync each mounted filesystem. 918 */ 919 920 #ifdef DEBUG 921 static int syncprt = 0; 922 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 923 #endif /* DEBUG */ 924 925 static int sync_callback(struct mount *mp, void *data); 926 927 int 928 sys_sync(struct sync_args *uap) 929 { 930 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 931 return (0); 932 } 933 934 static 935 int 936 sync_callback(struct mount *mp, void *data __unused) 937 { 938 int asyncflag; 939 940 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 941 asyncflag = mp->mnt_flag & MNT_ASYNC; 942 mp->mnt_flag &= ~MNT_ASYNC; 943 vfs_msync(mp, MNT_NOWAIT); 944 VFS_SYNC(mp, MNT_NOWAIT); 945 mp->mnt_flag |= asyncflag; 946 } 947 return(0); 948 } 949 950 /* XXX PRISON: could be per prison flag */ 951 static int prison_quotas; 952 #if 0 953 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 954 #endif 955 956 /* 957 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 958 * 959 * Change filesystem quotas. 960 * 961 * MPALMOSTSAFE 962 */ 963 int 964 sys_quotactl(struct quotactl_args *uap) 965 { 966 struct nlookupdata nd; 967 struct thread *td; 968 struct mount *mp; 969 int error; 970 971 get_mplock(); 972 td = curthread; 973 if (td->td_ucred->cr_prison && !prison_quotas) { 974 error = EPERM; 975 goto done; 976 } 977 978 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 979 if (error == 0) 980 error = nlookup(&nd); 981 if (error == 0) { 982 mp = nd.nl_nch.mount; 983 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 984 uap->arg, nd.nl_cred); 985 } 986 nlookup_done(&nd); 987 done: 988 rel_mplock(); 989 return (error); 990 } 991 992 /* 993 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 994 * void *buf, int buflen) 995 * 996 * This function operates on a mount point and executes the specified 997 * operation using the specified control data, and possibly returns data. 998 * 999 * The actual number of bytes stored in the result buffer is returned, 0 1000 * if none, otherwise an error is returned. 1001 * 1002 * MPALMOSTSAFE 1003 */ 1004 int 1005 sys_mountctl(struct mountctl_args *uap) 1006 { 1007 struct thread *td = curthread; 1008 struct proc *p = td->td_proc; 1009 struct file *fp; 1010 void *ctl = NULL; 1011 void *buf = NULL; 1012 char *path = NULL; 1013 int error; 1014 1015 /* 1016 * Sanity and permissions checks. We must be root. 1017 */ 1018 KKASSERT(p); 1019 if (td->td_ucred->cr_prison != NULL) 1020 return (EPERM); 1021 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1022 (error = priv_check(td, PRIV_ROOT)) != 0) 1023 return (error); 1024 1025 /* 1026 * Argument length checks 1027 */ 1028 if (uap->ctllen < 0 || uap->ctllen > 1024) 1029 return (EINVAL); 1030 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1031 return (EINVAL); 1032 if (uap->path == NULL) 1033 return (EINVAL); 1034 1035 /* 1036 * Allocate the necessary buffers and copyin data 1037 */ 1038 path = objcache_get(namei_oc, M_WAITOK); 1039 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1040 if (error) 1041 goto done; 1042 1043 if (uap->ctllen) { 1044 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1045 error = copyin(uap->ctl, ctl, uap->ctllen); 1046 if (error) 1047 goto done; 1048 } 1049 if (uap->buflen) 1050 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1051 1052 /* 1053 * Validate the descriptor 1054 */ 1055 if (uap->fd >= 0) { 1056 fp = holdfp(p->p_fd, uap->fd, -1); 1057 if (fp == NULL) { 1058 error = EBADF; 1059 goto done; 1060 } 1061 } else { 1062 fp = NULL; 1063 } 1064 1065 /* 1066 * Execute the internal kernel function and clean up. 1067 */ 1068 get_mplock(); 1069 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 1070 rel_mplock(); 1071 if (fp) 1072 fdrop(fp); 1073 if (error == 0 && uap->sysmsg_result > 0) 1074 error = copyout(buf, uap->buf, uap->sysmsg_result); 1075 done: 1076 if (path) 1077 objcache_put(namei_oc, path); 1078 if (ctl) 1079 kfree(ctl, M_TEMP); 1080 if (buf) 1081 kfree(buf, M_TEMP); 1082 return (error); 1083 } 1084 1085 /* 1086 * Execute a mount control operation by resolving the path to a mount point 1087 * and calling vop_mountctl(). 1088 * 1089 * Use the mount point from the nch instead of the vnode so nullfs mounts 1090 * can properly spike the VOP. 1091 */ 1092 int 1093 kern_mountctl(const char *path, int op, struct file *fp, 1094 const void *ctl, int ctllen, 1095 void *buf, int buflen, int *res) 1096 { 1097 struct vnode *vp; 1098 struct mount *mp; 1099 struct nlookupdata nd; 1100 int error; 1101 1102 *res = 0; 1103 vp = NULL; 1104 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1105 if (error == 0) 1106 error = nlookup(&nd); 1107 if (error == 0) 1108 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1109 mp = nd.nl_nch.mount; 1110 nlookup_done(&nd); 1111 if (error) 1112 return (error); 1113 vn_unlock(vp); 1114 1115 /* 1116 * Must be the root of the filesystem 1117 */ 1118 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1119 vrele(vp); 1120 return (EINVAL); 1121 } 1122 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1123 buf, buflen, res); 1124 vrele(vp); 1125 return (error); 1126 } 1127 1128 int 1129 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1130 { 1131 struct thread *td = curthread; 1132 struct proc *p = td->td_proc; 1133 struct mount *mp; 1134 struct statfs *sp; 1135 char *fullpath, *freepath; 1136 int error; 1137 1138 if ((error = nlookup(nd)) != 0) 1139 return (error); 1140 mp = nd->nl_nch.mount; 1141 sp = &mp->mnt_stat; 1142 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1143 return (error); 1144 1145 error = mount_path(p, mp, &fullpath, &freepath); 1146 if (error) 1147 return(error); 1148 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1149 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1150 kfree(freepath, M_TEMP); 1151 1152 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1153 bcopy(sp, buf, sizeof(*buf)); 1154 /* Only root should have access to the fsid's. */ 1155 if (priv_check(td, PRIV_ROOT)) 1156 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1157 return (0); 1158 } 1159 1160 /* 1161 * statfs_args(char *path, struct statfs *buf) 1162 * 1163 * Get filesystem statistics. 1164 */ 1165 int 1166 sys_statfs(struct statfs_args *uap) 1167 { 1168 struct nlookupdata nd; 1169 struct statfs buf; 1170 int error; 1171 1172 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1173 if (error == 0) 1174 error = kern_statfs(&nd, &buf); 1175 nlookup_done(&nd); 1176 if (error == 0) 1177 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1178 return (error); 1179 } 1180 1181 int 1182 kern_fstatfs(int fd, struct statfs *buf) 1183 { 1184 struct thread *td = curthread; 1185 struct proc *p = td->td_proc; 1186 struct file *fp; 1187 struct mount *mp; 1188 struct statfs *sp; 1189 char *fullpath, *freepath; 1190 int error; 1191 1192 KKASSERT(p); 1193 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1194 return (error); 1195 1196 /* 1197 * Try to use mount info from any overlays rather than the 1198 * mount info for the underlying vnode, otherwise we will 1199 * fail when operating on null-mounted paths inside a chroot. 1200 */ 1201 if ((mp = fp->f_nchandle.mount) == NULL) 1202 mp = ((struct vnode *)fp->f_data)->v_mount; 1203 if (mp == NULL) { 1204 error = EBADF; 1205 goto done; 1206 } 1207 if (fp->f_cred == NULL) { 1208 error = EINVAL; 1209 goto done; 1210 } 1211 sp = &mp->mnt_stat; 1212 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1213 goto done; 1214 1215 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1216 goto done; 1217 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1218 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1219 kfree(freepath, M_TEMP); 1220 1221 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1222 bcopy(sp, buf, sizeof(*buf)); 1223 1224 /* Only root should have access to the fsid's. */ 1225 if (priv_check(td, PRIV_ROOT)) 1226 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1227 error = 0; 1228 done: 1229 fdrop(fp); 1230 return (error); 1231 } 1232 1233 /* 1234 * fstatfs_args(int fd, struct statfs *buf) 1235 * 1236 * Get filesystem statistics. 1237 */ 1238 int 1239 sys_fstatfs(struct fstatfs_args *uap) 1240 { 1241 struct statfs buf; 1242 int error; 1243 1244 error = kern_fstatfs(uap->fd, &buf); 1245 1246 if (error == 0) 1247 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1248 return (error); 1249 } 1250 1251 int 1252 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1253 { 1254 struct mount *mp; 1255 struct statvfs *sp; 1256 int error; 1257 1258 if ((error = nlookup(nd)) != 0) 1259 return (error); 1260 mp = nd->nl_nch.mount; 1261 sp = &mp->mnt_vstat; 1262 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1263 return (error); 1264 1265 sp->f_flag = 0; 1266 if (mp->mnt_flag & MNT_RDONLY) 1267 sp->f_flag |= ST_RDONLY; 1268 if (mp->mnt_flag & MNT_NOSUID) 1269 sp->f_flag |= ST_NOSUID; 1270 bcopy(sp, buf, sizeof(*buf)); 1271 return (0); 1272 } 1273 1274 /* 1275 * statfs_args(char *path, struct statfs *buf) 1276 * 1277 * Get filesystem statistics. 1278 */ 1279 int 1280 sys_statvfs(struct statvfs_args *uap) 1281 { 1282 struct nlookupdata nd; 1283 struct statvfs buf; 1284 int error; 1285 1286 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1287 if (error == 0) 1288 error = kern_statvfs(&nd, &buf); 1289 nlookup_done(&nd); 1290 if (error == 0) 1291 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1292 return (error); 1293 } 1294 1295 int 1296 kern_fstatvfs(int fd, struct statvfs *buf) 1297 { 1298 struct thread *td = curthread; 1299 struct proc *p = td->td_proc; 1300 struct file *fp; 1301 struct mount *mp; 1302 struct statvfs *sp; 1303 int error; 1304 1305 KKASSERT(p); 1306 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1307 return (error); 1308 if ((mp = fp->f_nchandle.mount) == NULL) 1309 mp = ((struct vnode *)fp->f_data)->v_mount; 1310 if (mp == NULL) { 1311 error = EBADF; 1312 goto done; 1313 } 1314 if (fp->f_cred == NULL) { 1315 error = EINVAL; 1316 goto done; 1317 } 1318 sp = &mp->mnt_vstat; 1319 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1320 goto done; 1321 1322 sp->f_flag = 0; 1323 if (mp->mnt_flag & MNT_RDONLY) 1324 sp->f_flag |= ST_RDONLY; 1325 if (mp->mnt_flag & MNT_NOSUID) 1326 sp->f_flag |= ST_NOSUID; 1327 1328 bcopy(sp, buf, sizeof(*buf)); 1329 error = 0; 1330 done: 1331 fdrop(fp); 1332 return (error); 1333 } 1334 1335 /* 1336 * fstatfs_args(int fd, struct statfs *buf) 1337 * 1338 * Get filesystem statistics. 1339 */ 1340 int 1341 sys_fstatvfs(struct fstatvfs_args *uap) 1342 { 1343 struct statvfs buf; 1344 int error; 1345 1346 error = kern_fstatvfs(uap->fd, &buf); 1347 1348 if (error == 0) 1349 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1350 return (error); 1351 } 1352 1353 /* 1354 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1355 * 1356 * Get statistics on all filesystems. 1357 */ 1358 1359 struct getfsstat_info { 1360 struct statfs *sfsp; 1361 long count; 1362 long maxcount; 1363 int error; 1364 int flags; 1365 struct thread *td; 1366 }; 1367 1368 static int getfsstat_callback(struct mount *, void *); 1369 1370 int 1371 sys_getfsstat(struct getfsstat_args *uap) 1372 { 1373 struct thread *td = curthread; 1374 struct getfsstat_info info; 1375 1376 bzero(&info, sizeof(info)); 1377 1378 info.maxcount = uap->bufsize / sizeof(struct statfs); 1379 info.sfsp = uap->buf; 1380 info.count = 0; 1381 info.flags = uap->flags; 1382 info.td = td; 1383 1384 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1385 if (info.sfsp && info.count > info.maxcount) 1386 uap->sysmsg_result = info.maxcount; 1387 else 1388 uap->sysmsg_result = info.count; 1389 return (info.error); 1390 } 1391 1392 static int 1393 getfsstat_callback(struct mount *mp, void *data) 1394 { 1395 struct getfsstat_info *info = data; 1396 struct statfs *sp; 1397 char *freepath; 1398 char *fullpath; 1399 int error; 1400 1401 if (info->sfsp && info->count < info->maxcount) { 1402 if (info->td->td_proc && 1403 !chroot_visible_mnt(mp, info->td->td_proc)) { 1404 return(0); 1405 } 1406 sp = &mp->mnt_stat; 1407 1408 /* 1409 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1410 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1411 * overrides MNT_WAIT. 1412 */ 1413 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1414 (info->flags & MNT_WAIT)) && 1415 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1416 return(0); 1417 } 1418 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1419 1420 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1421 if (error) { 1422 info->error = error; 1423 return(-1); 1424 } 1425 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1426 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1427 kfree(freepath, M_TEMP); 1428 1429 error = copyout(sp, info->sfsp, sizeof(*sp)); 1430 if (error) { 1431 info->error = error; 1432 return (-1); 1433 } 1434 ++info->sfsp; 1435 } 1436 info->count++; 1437 return(0); 1438 } 1439 1440 /* 1441 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1442 long bufsize, int flags) 1443 * 1444 * Get statistics on all filesystems. 1445 */ 1446 1447 struct getvfsstat_info { 1448 struct statfs *sfsp; 1449 struct statvfs *vsfsp; 1450 long count; 1451 long maxcount; 1452 int error; 1453 int flags; 1454 struct thread *td; 1455 }; 1456 1457 static int getvfsstat_callback(struct mount *, void *); 1458 1459 int 1460 sys_getvfsstat(struct getvfsstat_args *uap) 1461 { 1462 struct thread *td = curthread; 1463 struct getvfsstat_info info; 1464 1465 bzero(&info, sizeof(info)); 1466 1467 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1468 info.sfsp = uap->buf; 1469 info.vsfsp = uap->vbuf; 1470 info.count = 0; 1471 info.flags = uap->flags; 1472 info.td = td; 1473 1474 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1475 if (info.vsfsp && info.count > info.maxcount) 1476 uap->sysmsg_result = info.maxcount; 1477 else 1478 uap->sysmsg_result = info.count; 1479 return (info.error); 1480 } 1481 1482 static int 1483 getvfsstat_callback(struct mount *mp, void *data) 1484 { 1485 struct getvfsstat_info *info = data; 1486 struct statfs *sp; 1487 struct statvfs *vsp; 1488 char *freepath; 1489 char *fullpath; 1490 int error; 1491 1492 if (info->vsfsp && info->count < info->maxcount) { 1493 if (info->td->td_proc && 1494 !chroot_visible_mnt(mp, info->td->td_proc)) { 1495 return(0); 1496 } 1497 sp = &mp->mnt_stat; 1498 vsp = &mp->mnt_vstat; 1499 1500 /* 1501 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1502 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1503 * overrides MNT_WAIT. 1504 */ 1505 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1506 (info->flags & MNT_WAIT)) && 1507 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1508 return(0); 1509 } 1510 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1511 1512 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1513 (info->flags & MNT_WAIT)) && 1514 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1515 return(0); 1516 } 1517 vsp->f_flag = 0; 1518 if (mp->mnt_flag & MNT_RDONLY) 1519 vsp->f_flag |= ST_RDONLY; 1520 if (mp->mnt_flag & MNT_NOSUID) 1521 vsp->f_flag |= ST_NOSUID; 1522 1523 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1524 if (error) { 1525 info->error = error; 1526 return(-1); 1527 } 1528 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1529 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1530 kfree(freepath, M_TEMP); 1531 1532 error = copyout(sp, info->sfsp, sizeof(*sp)); 1533 if (error == 0) 1534 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1535 if (error) { 1536 info->error = error; 1537 return (-1); 1538 } 1539 ++info->sfsp; 1540 ++info->vsfsp; 1541 } 1542 info->count++; 1543 return(0); 1544 } 1545 1546 1547 /* 1548 * fchdir_args(int fd) 1549 * 1550 * Change current working directory to a given file descriptor. 1551 */ 1552 int 1553 sys_fchdir(struct fchdir_args *uap) 1554 { 1555 struct thread *td = curthread; 1556 struct proc *p = td->td_proc; 1557 struct filedesc *fdp = p->p_fd; 1558 struct vnode *vp, *ovp; 1559 struct mount *mp; 1560 struct file *fp; 1561 struct nchandle nch, onch, tnch; 1562 int error; 1563 1564 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1565 return (error); 1566 lwkt_gettoken(&p->p_token); 1567 vp = (struct vnode *)fp->f_data; 1568 vref(vp); 1569 vn_lock(vp, LK_SHARED | LK_RETRY); 1570 if (fp->f_nchandle.ncp == NULL) 1571 error = ENOTDIR; 1572 else 1573 error = checkvp_chdir(vp, td); 1574 if (error) { 1575 vput(vp); 1576 goto done; 1577 } 1578 cache_copy(&fp->f_nchandle, &nch); 1579 1580 /* 1581 * If the ncp has become a mount point, traverse through 1582 * the mount point. 1583 */ 1584 1585 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1586 (mp = cache_findmount(&nch)) != NULL 1587 ) { 1588 error = nlookup_mp(mp, &tnch); 1589 if (error == 0) { 1590 cache_unlock(&tnch); /* leave ref intact */ 1591 vput(vp); 1592 vp = tnch.ncp->nc_vp; 1593 error = vget(vp, LK_SHARED); 1594 KKASSERT(error == 0); 1595 cache_drop(&nch); 1596 nch = tnch; 1597 } 1598 cache_dropmount(mp); 1599 } 1600 if (error == 0) { 1601 ovp = fdp->fd_cdir; 1602 onch = fdp->fd_ncdir; 1603 vn_unlock(vp); /* leave ref intact */ 1604 fdp->fd_cdir = vp; 1605 fdp->fd_ncdir = nch; 1606 cache_drop(&onch); 1607 vrele(ovp); 1608 } else { 1609 cache_drop(&nch); 1610 vput(vp); 1611 } 1612 fdrop(fp); 1613 done: 1614 lwkt_reltoken(&p->p_token); 1615 return (error); 1616 } 1617 1618 int 1619 kern_chdir(struct nlookupdata *nd) 1620 { 1621 struct thread *td = curthread; 1622 struct proc *p = td->td_proc; 1623 struct filedesc *fdp = p->p_fd; 1624 struct vnode *vp, *ovp; 1625 struct nchandle onch; 1626 int error; 1627 1628 nd->nl_flags |= NLC_SHAREDLOCK; 1629 if ((error = nlookup(nd)) != 0) 1630 return (error); 1631 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1632 return (ENOENT); 1633 if ((error = vget(vp, LK_SHARED)) != 0) 1634 return (error); 1635 1636 lwkt_gettoken(&p->p_token); 1637 error = checkvp_chdir(vp, td); 1638 vn_unlock(vp); 1639 if (error == 0) { 1640 ovp = fdp->fd_cdir; 1641 onch = fdp->fd_ncdir; 1642 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1643 fdp->fd_ncdir = nd->nl_nch; 1644 fdp->fd_cdir = vp; 1645 cache_drop(&onch); 1646 vrele(ovp); 1647 cache_zero(&nd->nl_nch); 1648 } else { 1649 vrele(vp); 1650 } 1651 lwkt_reltoken(&p->p_token); 1652 return (error); 1653 } 1654 1655 /* 1656 * chdir_args(char *path) 1657 * 1658 * Change current working directory (``.''). 1659 */ 1660 int 1661 sys_chdir(struct chdir_args *uap) 1662 { 1663 struct nlookupdata nd; 1664 int error; 1665 1666 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1667 if (error == 0) 1668 error = kern_chdir(&nd); 1669 nlookup_done(&nd); 1670 return (error); 1671 } 1672 1673 /* 1674 * Helper function for raised chroot(2) security function: Refuse if 1675 * any filedescriptors are open directories. 1676 */ 1677 static int 1678 chroot_refuse_vdir_fds(struct filedesc *fdp) 1679 { 1680 struct vnode *vp; 1681 struct file *fp; 1682 int error; 1683 int fd; 1684 1685 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1686 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1687 continue; 1688 vp = (struct vnode *)fp->f_data; 1689 if (vp->v_type != VDIR) { 1690 fdrop(fp); 1691 continue; 1692 } 1693 fdrop(fp); 1694 return(EPERM); 1695 } 1696 return (0); 1697 } 1698 1699 /* 1700 * This sysctl determines if we will allow a process to chroot(2) if it 1701 * has a directory open: 1702 * 0: disallowed for all processes. 1703 * 1: allowed for processes that were not already chroot(2)'ed. 1704 * 2: allowed for all processes. 1705 */ 1706 1707 static int chroot_allow_open_directories = 1; 1708 1709 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1710 &chroot_allow_open_directories, 0, ""); 1711 1712 /* 1713 * chroot to the specified namecache entry. We obtain the vp from the 1714 * namecache data. The passed ncp must be locked and referenced and will 1715 * remain locked and referenced on return. 1716 */ 1717 int 1718 kern_chroot(struct nchandle *nch) 1719 { 1720 struct thread *td = curthread; 1721 struct proc *p = td->td_proc; 1722 struct filedesc *fdp = p->p_fd; 1723 struct vnode *vp; 1724 int error; 1725 1726 /* 1727 * Only privileged user can chroot 1728 */ 1729 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1730 if (error) 1731 return (error); 1732 1733 /* 1734 * Disallow open directory descriptors (fchdir() breakouts). 1735 */ 1736 if (chroot_allow_open_directories == 0 || 1737 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1738 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1739 return (error); 1740 } 1741 if ((vp = nch->ncp->nc_vp) == NULL) 1742 return (ENOENT); 1743 1744 if ((error = vget(vp, LK_SHARED)) != 0) 1745 return (error); 1746 1747 /* 1748 * Check the validity of vp as a directory to change to and 1749 * associate it with rdir/jdir. 1750 */ 1751 error = checkvp_chdir(vp, td); 1752 vn_unlock(vp); /* leave reference intact */ 1753 if (error == 0) { 1754 vrele(fdp->fd_rdir); 1755 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1756 cache_drop(&fdp->fd_nrdir); 1757 cache_copy(nch, &fdp->fd_nrdir); 1758 if (fdp->fd_jdir == NULL) { 1759 fdp->fd_jdir = vp; 1760 vref(fdp->fd_jdir); 1761 cache_copy(nch, &fdp->fd_njdir); 1762 } 1763 } else { 1764 vrele(vp); 1765 } 1766 return (error); 1767 } 1768 1769 /* 1770 * chroot_args(char *path) 1771 * 1772 * Change notion of root (``/'') directory. 1773 */ 1774 int 1775 sys_chroot(struct chroot_args *uap) 1776 { 1777 struct thread *td __debugvar = curthread; 1778 struct nlookupdata nd; 1779 int error; 1780 1781 KKASSERT(td->td_proc); 1782 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1783 if (error == 0) { 1784 nd.nl_flags |= NLC_EXEC; 1785 error = nlookup(&nd); 1786 if (error == 0) 1787 error = kern_chroot(&nd.nl_nch); 1788 } 1789 nlookup_done(&nd); 1790 return(error); 1791 } 1792 1793 int 1794 sys_chroot_kernel(struct chroot_kernel_args *uap) 1795 { 1796 struct thread *td = curthread; 1797 struct nlookupdata nd; 1798 struct nchandle *nch; 1799 struct vnode *vp; 1800 int error; 1801 1802 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1803 if (error) 1804 goto error_nond; 1805 1806 error = nlookup(&nd); 1807 if (error) 1808 goto error_out; 1809 1810 nch = &nd.nl_nch; 1811 1812 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1813 if (error) 1814 goto error_out; 1815 1816 if ((vp = nch->ncp->nc_vp) == NULL) { 1817 error = ENOENT; 1818 goto error_out; 1819 } 1820 1821 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1822 goto error_out; 1823 1824 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1825 get_mplock(); 1826 vfs_cache_setroot(vp, cache_hold(nch)); 1827 rel_mplock(); 1828 1829 error_out: 1830 nlookup_done(&nd); 1831 error_nond: 1832 return(error); 1833 } 1834 1835 /* 1836 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1837 * determine whether it is legal to chdir to the vnode. The vnode's state 1838 * is not changed by this call. 1839 */ 1840 static int 1841 checkvp_chdir(struct vnode *vp, struct thread *td) 1842 { 1843 int error; 1844 1845 if (vp->v_type != VDIR) 1846 error = ENOTDIR; 1847 else 1848 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1849 return (error); 1850 } 1851 1852 int 1853 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1854 { 1855 struct thread *td = curthread; 1856 struct proc *p = td->td_proc; 1857 struct lwp *lp = td->td_lwp; 1858 struct filedesc *fdp = p->p_fd; 1859 int cmode, flags; 1860 struct file *nfp; 1861 struct file *fp; 1862 struct vnode *vp; 1863 int type, indx, error = 0; 1864 struct flock lf; 1865 1866 if ((oflags & O_ACCMODE) == O_ACCMODE) 1867 return (EINVAL); 1868 flags = FFLAGS(oflags); 1869 error = falloc(lp, &nfp, NULL); 1870 if (error) 1871 return (error); 1872 fp = nfp; 1873 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1874 1875 /* 1876 * XXX p_dupfd is a real mess. It allows a device to return a 1877 * file descriptor to be duplicated rather then doing the open 1878 * itself. 1879 */ 1880 lp->lwp_dupfd = -1; 1881 1882 /* 1883 * Call vn_open() to do the lookup and assign the vnode to the 1884 * file pointer. vn_open() does not change the ref count on fp 1885 * and the vnode, on success, will be inherited by the file pointer 1886 * and unlocked. 1887 * 1888 * Request a shared lock on the vnode if possible. 1889 */ 1890 nd->nl_flags |= NLC_LOCKVP; 1891 if ((flags & (O_CREAT|O_TRUNC)) == 0) 1892 nd->nl_flags |= NLC_SHAREDLOCK; 1893 1894 error = vn_open(nd, fp, flags, cmode); 1895 nlookup_done(nd); 1896 1897 if (error) { 1898 /* 1899 * handle special fdopen() case. bleh. dupfdopen() is 1900 * responsible for dropping the old contents of ofiles[indx] 1901 * if it succeeds. 1902 * 1903 * Note that fsetfd() will add a ref to fp which represents 1904 * the fd_files[] assignment. We must still drop our 1905 * reference. 1906 */ 1907 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1908 if (fdalloc(p, 0, &indx) == 0) { 1909 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error); 1910 if (error == 0) { 1911 *res = indx; 1912 fdrop(fp); /* our ref */ 1913 return (0); 1914 } 1915 fsetfd(fdp, NULL, indx); 1916 } 1917 } 1918 fdrop(fp); /* our ref */ 1919 if (error == ERESTART) 1920 error = EINTR; 1921 return (error); 1922 } 1923 1924 /* 1925 * ref the vnode for ourselves so it can't be ripped out from under 1926 * is. XXX need an ND flag to request that the vnode be returned 1927 * anyway. 1928 * 1929 * Reserve a file descriptor but do not assign it until the open 1930 * succeeds. 1931 */ 1932 vp = (struct vnode *)fp->f_data; 1933 vref(vp); 1934 if ((error = fdalloc(p, 0, &indx)) != 0) { 1935 fdrop(fp); 1936 vrele(vp); 1937 return (error); 1938 } 1939 1940 /* 1941 * If no error occurs the vp will have been assigned to the file 1942 * pointer. 1943 */ 1944 lp->lwp_dupfd = 0; 1945 1946 if (flags & (O_EXLOCK | O_SHLOCK)) { 1947 lf.l_whence = SEEK_SET; 1948 lf.l_start = 0; 1949 lf.l_len = 0; 1950 if (flags & O_EXLOCK) 1951 lf.l_type = F_WRLCK; 1952 else 1953 lf.l_type = F_RDLCK; 1954 if (flags & FNONBLOCK) 1955 type = 0; 1956 else 1957 type = F_WAIT; 1958 1959 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1960 /* 1961 * lock request failed. Clean up the reserved 1962 * descriptor. 1963 */ 1964 vrele(vp); 1965 fsetfd(fdp, NULL, indx); 1966 fdrop(fp); 1967 return (error); 1968 } 1969 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 1970 } 1971 #if 0 1972 /* 1973 * Assert that all regular file vnodes were created with a object. 1974 */ 1975 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1976 ("open: regular file has no backing object after vn_open")); 1977 #endif 1978 1979 vrele(vp); 1980 1981 /* 1982 * release our private reference, leaving the one associated with the 1983 * descriptor table intact. 1984 */ 1985 if (oflags & O_CLOEXEC) 1986 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 1987 fsetfd(fdp, fp, indx); 1988 fdrop(fp); 1989 *res = indx; 1990 return (error); 1991 } 1992 1993 /* 1994 * open_args(char *path, int flags, int mode) 1995 * 1996 * Check permissions, allocate an open file structure, 1997 * and call the device open routine if any. 1998 */ 1999 int 2000 sys_open(struct open_args *uap) 2001 { 2002 struct nlookupdata nd; 2003 int error; 2004 2005 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2006 if (error == 0) { 2007 error = kern_open(&nd, uap->flags, 2008 uap->mode, &uap->sysmsg_result); 2009 } 2010 nlookup_done(&nd); 2011 return (error); 2012 } 2013 2014 /* 2015 * openat_args(int fd, char *path, int flags, int mode) 2016 */ 2017 int 2018 sys_openat(struct openat_args *uap) 2019 { 2020 struct nlookupdata nd; 2021 int error; 2022 struct file *fp; 2023 2024 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2025 if (error == 0) { 2026 error = kern_open(&nd, uap->flags, uap->mode, 2027 &uap->sysmsg_result); 2028 } 2029 nlookup_done_at(&nd, fp); 2030 return (error); 2031 } 2032 2033 int 2034 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2035 { 2036 struct thread *td = curthread; 2037 struct proc *p = td->td_proc; 2038 struct vnode *vp; 2039 struct vattr vattr; 2040 int error; 2041 int whiteout = 0; 2042 2043 KKASSERT(p); 2044 2045 VATTR_NULL(&vattr); 2046 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2047 vattr.va_rmajor = rmajor; 2048 vattr.va_rminor = rminor; 2049 2050 switch (mode & S_IFMT) { 2051 case S_IFMT: /* used by badsect to flag bad sectors */ 2052 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2053 vattr.va_type = VBAD; 2054 break; 2055 case S_IFCHR: 2056 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2057 vattr.va_type = VCHR; 2058 break; 2059 case S_IFBLK: 2060 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2061 vattr.va_type = VBLK; 2062 break; 2063 case S_IFWHT: 2064 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2065 whiteout = 1; 2066 break; 2067 case S_IFDIR: /* special directories support for HAMMER */ 2068 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2069 vattr.va_type = VDIR; 2070 break; 2071 default: 2072 error = EINVAL; 2073 break; 2074 } 2075 2076 if (error) 2077 return (error); 2078 2079 bwillinode(1); 2080 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2081 if ((error = nlookup(nd)) != 0) 2082 return (error); 2083 if (nd->nl_nch.ncp->nc_vp) 2084 return (EEXIST); 2085 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2086 return (error); 2087 2088 if (whiteout) { 2089 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2090 nd->nl_cred, NAMEI_CREATE); 2091 } else { 2092 vp = NULL; 2093 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2094 &vp, nd->nl_cred, &vattr); 2095 if (error == 0) 2096 vput(vp); 2097 } 2098 return (error); 2099 } 2100 2101 /* 2102 * mknod_args(char *path, int mode, int dev) 2103 * 2104 * Create a special file. 2105 */ 2106 int 2107 sys_mknod(struct mknod_args *uap) 2108 { 2109 struct nlookupdata nd; 2110 int error; 2111 2112 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2113 if (error == 0) { 2114 error = kern_mknod(&nd, uap->mode, 2115 umajor(uap->dev), uminor(uap->dev)); 2116 } 2117 nlookup_done(&nd); 2118 return (error); 2119 } 2120 2121 /* 2122 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2123 * 2124 * Create a special file. The path is relative to the directory associated 2125 * with fd. 2126 */ 2127 int 2128 sys_mknodat(struct mknodat_args *uap) 2129 { 2130 struct nlookupdata nd; 2131 struct file *fp; 2132 int error; 2133 2134 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2135 if (error == 0) { 2136 error = kern_mknod(&nd, uap->mode, 2137 umajor(uap->dev), uminor(uap->dev)); 2138 } 2139 nlookup_done_at(&nd, fp); 2140 return (error); 2141 } 2142 2143 int 2144 kern_mkfifo(struct nlookupdata *nd, int mode) 2145 { 2146 struct thread *td = curthread; 2147 struct proc *p = td->td_proc; 2148 struct vattr vattr; 2149 struct vnode *vp; 2150 int error; 2151 2152 bwillinode(1); 2153 2154 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2155 if ((error = nlookup(nd)) != 0) 2156 return (error); 2157 if (nd->nl_nch.ncp->nc_vp) 2158 return (EEXIST); 2159 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2160 return (error); 2161 2162 VATTR_NULL(&vattr); 2163 vattr.va_type = VFIFO; 2164 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2165 vp = NULL; 2166 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2167 if (error == 0) 2168 vput(vp); 2169 return (error); 2170 } 2171 2172 /* 2173 * mkfifo_args(char *path, int mode) 2174 * 2175 * Create a named pipe. 2176 */ 2177 int 2178 sys_mkfifo(struct mkfifo_args *uap) 2179 { 2180 struct nlookupdata nd; 2181 int error; 2182 2183 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2184 if (error == 0) 2185 error = kern_mkfifo(&nd, uap->mode); 2186 nlookup_done(&nd); 2187 return (error); 2188 } 2189 2190 /* 2191 * mkfifoat_args(int fd, char *path, mode_t mode) 2192 * 2193 * Create a named pipe. The path is relative to the directory associated 2194 * with fd. 2195 */ 2196 int 2197 sys_mkfifoat(struct mkfifoat_args *uap) 2198 { 2199 struct nlookupdata nd; 2200 struct file *fp; 2201 int error; 2202 2203 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2204 if (error == 0) 2205 error = kern_mkfifo(&nd, uap->mode); 2206 nlookup_done_at(&nd, fp); 2207 return (error); 2208 } 2209 2210 static int hardlink_check_uid = 0; 2211 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2212 &hardlink_check_uid, 0, 2213 "Unprivileged processes cannot create hard links to files owned by other " 2214 "users"); 2215 static int hardlink_check_gid = 0; 2216 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2217 &hardlink_check_gid, 0, 2218 "Unprivileged processes cannot create hard links to files owned by other " 2219 "groups"); 2220 2221 static int 2222 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2223 { 2224 struct vattr va; 2225 int error; 2226 2227 /* 2228 * Shortcut if disabled 2229 */ 2230 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2231 return (0); 2232 2233 /* 2234 * Privileged user can always hardlink 2235 */ 2236 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2237 return (0); 2238 2239 /* 2240 * Otherwise only if the originating file is owned by the 2241 * same user or group. Note that any group is allowed if 2242 * the file is owned by the caller. 2243 */ 2244 error = VOP_GETATTR(vp, &va); 2245 if (error != 0) 2246 return (error); 2247 2248 if (hardlink_check_uid) { 2249 if (cred->cr_uid != va.va_uid) 2250 return (EPERM); 2251 } 2252 2253 if (hardlink_check_gid) { 2254 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2255 return (EPERM); 2256 } 2257 2258 return (0); 2259 } 2260 2261 int 2262 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2263 { 2264 struct thread *td = curthread; 2265 struct vnode *vp; 2266 int error; 2267 2268 /* 2269 * Lookup the source and obtained a locked vnode. 2270 * 2271 * You may only hardlink a file which you have write permission 2272 * on or which you own. 2273 * 2274 * XXX relookup on vget failure / race ? 2275 */ 2276 bwillinode(1); 2277 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2278 if ((error = nlookup(nd)) != 0) 2279 return (error); 2280 vp = nd->nl_nch.ncp->nc_vp; 2281 KKASSERT(vp != NULL); 2282 if (vp->v_type == VDIR) 2283 return (EPERM); /* POSIX */ 2284 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2285 return (error); 2286 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2287 return (error); 2288 2289 /* 2290 * Unlock the source so we can lookup the target without deadlocking 2291 * (XXX vp is locked already, possible other deadlock?). The target 2292 * must not exist. 2293 */ 2294 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2295 nd->nl_flags &= ~NLC_NCPISLOCKED; 2296 cache_unlock(&nd->nl_nch); 2297 vn_unlock(vp); 2298 2299 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2300 if ((error = nlookup(linknd)) != 0) { 2301 vrele(vp); 2302 return (error); 2303 } 2304 if (linknd->nl_nch.ncp->nc_vp) { 2305 vrele(vp); 2306 return (EEXIST); 2307 } 2308 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2309 if (error) { 2310 vrele(vp); 2311 return (error); 2312 } 2313 2314 /* 2315 * Finally run the new API VOP. 2316 */ 2317 error = can_hardlink(vp, td, td->td_ucred); 2318 if (error == 0) { 2319 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2320 vp, linknd->nl_cred); 2321 } 2322 vput(vp); 2323 return (error); 2324 } 2325 2326 /* 2327 * link_args(char *path, char *link) 2328 * 2329 * Make a hard file link. 2330 */ 2331 int 2332 sys_link(struct link_args *uap) 2333 { 2334 struct nlookupdata nd, linknd; 2335 int error; 2336 2337 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2338 if (error == 0) { 2339 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2340 if (error == 0) 2341 error = kern_link(&nd, &linknd); 2342 nlookup_done(&linknd); 2343 } 2344 nlookup_done(&nd); 2345 return (error); 2346 } 2347 2348 /* 2349 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2350 * 2351 * Make a hard file link. The path1 argument is relative to the directory 2352 * associated with fd1, and similarly the path2 argument is relative to 2353 * the directory associated with fd2. 2354 */ 2355 int 2356 sys_linkat(struct linkat_args *uap) 2357 { 2358 struct nlookupdata nd, linknd; 2359 struct file *fp1, *fp2; 2360 int error; 2361 2362 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2363 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2364 if (error == 0) { 2365 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2366 uap->path2, UIO_USERSPACE, 0); 2367 if (error == 0) 2368 error = kern_link(&nd, &linknd); 2369 nlookup_done_at(&linknd, fp2); 2370 } 2371 nlookup_done_at(&nd, fp1); 2372 return (error); 2373 } 2374 2375 int 2376 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2377 { 2378 struct vattr vattr; 2379 struct vnode *vp; 2380 struct vnode *dvp; 2381 int error; 2382 2383 bwillinode(1); 2384 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2385 if ((error = nlookup(nd)) != 0) 2386 return (error); 2387 if (nd->nl_nch.ncp->nc_vp) 2388 return (EEXIST); 2389 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2390 return (error); 2391 dvp = nd->nl_dvp; 2392 VATTR_NULL(&vattr); 2393 vattr.va_mode = mode; 2394 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2395 if (error == 0) 2396 vput(vp); 2397 return (error); 2398 } 2399 2400 /* 2401 * symlink(char *path, char *link) 2402 * 2403 * Make a symbolic link. 2404 */ 2405 int 2406 sys_symlink(struct symlink_args *uap) 2407 { 2408 struct thread *td = curthread; 2409 struct nlookupdata nd; 2410 char *path; 2411 int error; 2412 int mode; 2413 2414 path = objcache_get(namei_oc, M_WAITOK); 2415 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2416 if (error == 0) { 2417 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2418 if (error == 0) { 2419 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2420 error = kern_symlink(&nd, path, mode); 2421 } 2422 nlookup_done(&nd); 2423 } 2424 objcache_put(namei_oc, path); 2425 return (error); 2426 } 2427 2428 /* 2429 * symlinkat_args(char *path1, int fd, char *path2) 2430 * 2431 * Make a symbolic link. The path2 argument is relative to the directory 2432 * associated with fd. 2433 */ 2434 int 2435 sys_symlinkat(struct symlinkat_args *uap) 2436 { 2437 struct thread *td = curthread; 2438 struct nlookupdata nd; 2439 struct file *fp; 2440 char *path1; 2441 int error; 2442 int mode; 2443 2444 path1 = objcache_get(namei_oc, M_WAITOK); 2445 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2446 if (error == 0) { 2447 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2448 UIO_USERSPACE, 0); 2449 if (error == 0) { 2450 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2451 error = kern_symlink(&nd, path1, mode); 2452 } 2453 nlookup_done_at(&nd, fp); 2454 } 2455 objcache_put(namei_oc, path1); 2456 return (error); 2457 } 2458 2459 /* 2460 * undelete_args(char *path) 2461 * 2462 * Delete a whiteout from the filesystem. 2463 */ 2464 int 2465 sys_undelete(struct undelete_args *uap) 2466 { 2467 struct nlookupdata nd; 2468 int error; 2469 2470 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2471 bwillinode(1); 2472 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2473 if (error == 0) 2474 error = nlookup(&nd); 2475 if (error == 0) 2476 error = ncp_writechk(&nd.nl_nch); 2477 if (error == 0) { 2478 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2479 NAMEI_DELETE); 2480 } 2481 nlookup_done(&nd); 2482 return (error); 2483 } 2484 2485 int 2486 kern_unlink(struct nlookupdata *nd) 2487 { 2488 int error; 2489 2490 bwillinode(1); 2491 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2492 if ((error = nlookup(nd)) != 0) 2493 return (error); 2494 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2495 return (error); 2496 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2497 return (error); 2498 } 2499 2500 /* 2501 * unlink_args(char *path) 2502 * 2503 * Delete a name from the filesystem. 2504 */ 2505 int 2506 sys_unlink(struct unlink_args *uap) 2507 { 2508 struct nlookupdata nd; 2509 int error; 2510 2511 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2512 if (error == 0) 2513 error = kern_unlink(&nd); 2514 nlookup_done(&nd); 2515 return (error); 2516 } 2517 2518 2519 /* 2520 * unlinkat_args(int fd, char *path, int flags) 2521 * 2522 * Delete the file or directory entry pointed to by fd/path. 2523 */ 2524 int 2525 sys_unlinkat(struct unlinkat_args *uap) 2526 { 2527 struct nlookupdata nd; 2528 struct file *fp; 2529 int error; 2530 2531 if (uap->flags & ~AT_REMOVEDIR) 2532 return (EINVAL); 2533 2534 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2535 if (error == 0) { 2536 if (uap->flags & AT_REMOVEDIR) 2537 error = kern_rmdir(&nd); 2538 else 2539 error = kern_unlink(&nd); 2540 } 2541 nlookup_done_at(&nd, fp); 2542 return (error); 2543 } 2544 2545 int 2546 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2547 { 2548 struct thread *td = curthread; 2549 struct proc *p = td->td_proc; 2550 struct file *fp; 2551 struct vnode *vp; 2552 struct vattr vattr; 2553 off_t new_offset; 2554 int error; 2555 2556 fp = holdfp(p->p_fd, fd, -1); 2557 if (fp == NULL) 2558 return (EBADF); 2559 if (fp->f_type != DTYPE_VNODE) { 2560 error = ESPIPE; 2561 goto done; 2562 } 2563 vp = (struct vnode *)fp->f_data; 2564 2565 switch (whence) { 2566 case L_INCR: 2567 spin_lock(&fp->f_spin); 2568 new_offset = fp->f_offset + offset; 2569 error = 0; 2570 break; 2571 case L_XTND: 2572 error = VOP_GETATTR(vp, &vattr); 2573 spin_lock(&fp->f_spin); 2574 new_offset = offset + vattr.va_size; 2575 break; 2576 case L_SET: 2577 new_offset = offset; 2578 error = 0; 2579 spin_lock(&fp->f_spin); 2580 break; 2581 default: 2582 new_offset = 0; 2583 error = EINVAL; 2584 spin_lock(&fp->f_spin); 2585 break; 2586 } 2587 2588 /* 2589 * Validate the seek position. Negative offsets are not allowed 2590 * for regular files or directories. 2591 * 2592 * Normally we would also not want to allow negative offsets for 2593 * character and block-special devices. However kvm addresses 2594 * on 64 bit architectures might appear to be negative and must 2595 * be allowed. 2596 */ 2597 if (error == 0) { 2598 if (new_offset < 0 && 2599 (vp->v_type == VREG || vp->v_type == VDIR)) { 2600 error = EINVAL; 2601 } else { 2602 fp->f_offset = new_offset; 2603 } 2604 } 2605 *res = fp->f_offset; 2606 spin_unlock(&fp->f_spin); 2607 done: 2608 fdrop(fp); 2609 return (error); 2610 } 2611 2612 /* 2613 * lseek_args(int fd, int pad, off_t offset, int whence) 2614 * 2615 * Reposition read/write file offset. 2616 */ 2617 int 2618 sys_lseek(struct lseek_args *uap) 2619 { 2620 int error; 2621 2622 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2623 &uap->sysmsg_offset); 2624 2625 return (error); 2626 } 2627 2628 /* 2629 * Check if current process can access given file. amode is a bitmask of *_OK 2630 * access bits. flags is a bitmask of AT_* flags. 2631 */ 2632 int 2633 kern_access(struct nlookupdata *nd, int amode, int flags) 2634 { 2635 struct vnode *vp; 2636 int error, mode; 2637 2638 if (flags & ~AT_EACCESS) 2639 return (EINVAL); 2640 nd->nl_flags |= NLC_SHAREDLOCK; 2641 if ((error = nlookup(nd)) != 0) 2642 return (error); 2643 retry: 2644 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2645 if (error) 2646 return (error); 2647 2648 /* Flags == 0 means only check for existence. */ 2649 if (amode) { 2650 mode = 0; 2651 if (amode & R_OK) 2652 mode |= VREAD; 2653 if (amode & W_OK) 2654 mode |= VWRITE; 2655 if (amode & X_OK) 2656 mode |= VEXEC; 2657 if ((mode & VWRITE) == 0 || 2658 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2659 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2660 2661 /* 2662 * If the file handle is stale we have to re-resolve the 2663 * entry with the ncp held exclusively. This is a hack 2664 * at the moment. 2665 */ 2666 if (error == ESTALE) { 2667 vput(vp); 2668 cache_unlock(&nd->nl_nch); 2669 cache_lock(&nd->nl_nch); 2670 cache_setunresolved(&nd->nl_nch); 2671 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2672 if (error == 0) { 2673 vp = NULL; 2674 goto retry; 2675 } 2676 return(error); 2677 } 2678 } 2679 vput(vp); 2680 return (error); 2681 } 2682 2683 /* 2684 * access_args(char *path, int flags) 2685 * 2686 * Check access permissions. 2687 */ 2688 int 2689 sys_access(struct access_args *uap) 2690 { 2691 struct nlookupdata nd; 2692 int error; 2693 2694 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2695 if (error == 0) 2696 error = kern_access(&nd, uap->flags, 0); 2697 nlookup_done(&nd); 2698 return (error); 2699 } 2700 2701 2702 /* 2703 * eaccess_args(char *path, int flags) 2704 * 2705 * Check access permissions. 2706 */ 2707 int 2708 sys_eaccess(struct eaccess_args *uap) 2709 { 2710 struct nlookupdata nd; 2711 int error; 2712 2713 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2714 if (error == 0) 2715 error = kern_access(&nd, uap->flags, AT_EACCESS); 2716 nlookup_done(&nd); 2717 return (error); 2718 } 2719 2720 2721 /* 2722 * faccessat_args(int fd, char *path, int amode, int flags) 2723 * 2724 * Check access permissions. 2725 */ 2726 int 2727 sys_faccessat(struct faccessat_args *uap) 2728 { 2729 struct nlookupdata nd; 2730 struct file *fp; 2731 int error; 2732 2733 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2734 NLC_FOLLOW); 2735 if (error == 0) 2736 error = kern_access(&nd, uap->amode, uap->flags); 2737 nlookup_done_at(&nd, fp); 2738 return (error); 2739 } 2740 2741 int 2742 kern_stat(struct nlookupdata *nd, struct stat *st) 2743 { 2744 int error; 2745 struct vnode *vp; 2746 2747 nd->nl_flags |= NLC_SHAREDLOCK; 2748 if ((error = nlookup(nd)) != 0) 2749 return (error); 2750 again: 2751 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2752 return (ENOENT); 2753 2754 if ((error = vget(vp, LK_SHARED)) != 0) 2755 return (error); 2756 error = vn_stat(vp, st, nd->nl_cred); 2757 2758 /* 2759 * If the file handle is stale we have to re-resolve the 2760 * entry with the ncp held exclusively. This is a hack 2761 * at the moment. 2762 */ 2763 if (error == ESTALE) { 2764 vput(vp); 2765 cache_unlock(&nd->nl_nch); 2766 cache_lock(&nd->nl_nch); 2767 cache_setunresolved(&nd->nl_nch); 2768 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2769 if (error == 0) 2770 goto again; 2771 } else { 2772 vput(vp); 2773 } 2774 return (error); 2775 } 2776 2777 /* 2778 * stat_args(char *path, struct stat *ub) 2779 * 2780 * Get file status; this version follows links. 2781 */ 2782 int 2783 sys_stat(struct stat_args *uap) 2784 { 2785 struct nlookupdata nd; 2786 struct stat st; 2787 int error; 2788 2789 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2790 if (error == 0) { 2791 error = kern_stat(&nd, &st); 2792 if (error == 0) 2793 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2794 } 2795 nlookup_done(&nd); 2796 return (error); 2797 } 2798 2799 /* 2800 * lstat_args(char *path, struct stat *ub) 2801 * 2802 * Get file status; this version does not follow links. 2803 */ 2804 int 2805 sys_lstat(struct lstat_args *uap) 2806 { 2807 struct nlookupdata nd; 2808 struct stat st; 2809 int error; 2810 2811 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2812 if (error == 0) { 2813 error = kern_stat(&nd, &st); 2814 if (error == 0) 2815 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2816 } 2817 nlookup_done(&nd); 2818 return (error); 2819 } 2820 2821 /* 2822 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2823 * 2824 * Get status of file pointed to by fd/path. 2825 */ 2826 int 2827 sys_fstatat(struct fstatat_args *uap) 2828 { 2829 struct nlookupdata nd; 2830 struct stat st; 2831 int error; 2832 int flags; 2833 struct file *fp; 2834 2835 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2836 return (EINVAL); 2837 2838 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2839 2840 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2841 UIO_USERSPACE, flags); 2842 if (error == 0) { 2843 error = kern_stat(&nd, &st); 2844 if (error == 0) 2845 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2846 } 2847 nlookup_done_at(&nd, fp); 2848 return (error); 2849 } 2850 2851 static int 2852 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 2853 { 2854 struct nlookupdata nd; 2855 struct vnode *vp; 2856 int error; 2857 2858 vp = NULL; 2859 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 2860 if (error == 0) 2861 error = nlookup(&nd); 2862 if (error == 0) 2863 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2864 nlookup_done(&nd); 2865 if (error == 0) { 2866 error = VOP_PATHCONF(vp, name, sysmsg_regp); 2867 vput(vp); 2868 } 2869 return (error); 2870 } 2871 2872 /* 2873 * pathconf_Args(char *path, int name) 2874 * 2875 * Get configurable pathname variables. 2876 */ 2877 int 2878 sys_pathconf(struct pathconf_args *uap) 2879 { 2880 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 2881 &uap->sysmsg_reg)); 2882 } 2883 2884 /* 2885 * lpathconf_Args(char *path, int name) 2886 * 2887 * Get configurable pathname variables, but don't follow symlinks. 2888 */ 2889 int 2890 sys_lpathconf(struct lpathconf_args *uap) 2891 { 2892 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 2893 } 2894 2895 /* 2896 * XXX: daver 2897 * kern_readlink isn't properly split yet. There is a copyin burried 2898 * in VOP_READLINK(). 2899 */ 2900 int 2901 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2902 { 2903 struct thread *td = curthread; 2904 struct vnode *vp; 2905 struct iovec aiov; 2906 struct uio auio; 2907 int error; 2908 2909 nd->nl_flags |= NLC_SHAREDLOCK; 2910 if ((error = nlookup(nd)) != 0) 2911 return (error); 2912 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2913 if (error) 2914 return (error); 2915 if (vp->v_type != VLNK) { 2916 error = EINVAL; 2917 } else { 2918 aiov.iov_base = buf; 2919 aiov.iov_len = count; 2920 auio.uio_iov = &aiov; 2921 auio.uio_iovcnt = 1; 2922 auio.uio_offset = 0; 2923 auio.uio_rw = UIO_READ; 2924 auio.uio_segflg = UIO_USERSPACE; 2925 auio.uio_td = td; 2926 auio.uio_resid = count; 2927 error = VOP_READLINK(vp, &auio, td->td_ucred); 2928 } 2929 vput(vp); 2930 *res = count - auio.uio_resid; 2931 return (error); 2932 } 2933 2934 /* 2935 * readlink_args(char *path, char *buf, int count) 2936 * 2937 * Return target name of a symbolic link. 2938 */ 2939 int 2940 sys_readlink(struct readlink_args *uap) 2941 { 2942 struct nlookupdata nd; 2943 int error; 2944 2945 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2946 if (error == 0) { 2947 error = kern_readlink(&nd, uap->buf, uap->count, 2948 &uap->sysmsg_result); 2949 } 2950 nlookup_done(&nd); 2951 return (error); 2952 } 2953 2954 /* 2955 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 2956 * 2957 * Return target name of a symbolic link. The path is relative to the 2958 * directory associated with fd. 2959 */ 2960 int 2961 sys_readlinkat(struct readlinkat_args *uap) 2962 { 2963 struct nlookupdata nd; 2964 struct file *fp; 2965 int error; 2966 2967 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2968 if (error == 0) { 2969 error = kern_readlink(&nd, uap->buf, uap->bufsize, 2970 &uap->sysmsg_result); 2971 } 2972 nlookup_done_at(&nd, fp); 2973 return (error); 2974 } 2975 2976 static int 2977 setfflags(struct vnode *vp, int flags) 2978 { 2979 struct thread *td = curthread; 2980 int error; 2981 struct vattr vattr; 2982 2983 /* 2984 * Prevent non-root users from setting flags on devices. When 2985 * a device is reused, users can retain ownership of the device 2986 * if they are allowed to set flags and programs assume that 2987 * chown can't fail when done as root. 2988 */ 2989 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2990 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 2991 return (error); 2992 2993 /* 2994 * note: vget is required for any operation that might mod the vnode 2995 * so VINACTIVE is properly cleared. 2996 */ 2997 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2998 VATTR_NULL(&vattr); 2999 vattr.va_flags = flags; 3000 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3001 vput(vp); 3002 } 3003 return (error); 3004 } 3005 3006 /* 3007 * chflags(char *path, int flags) 3008 * 3009 * Change flags of a file given a path name. 3010 */ 3011 int 3012 sys_chflags(struct chflags_args *uap) 3013 { 3014 struct nlookupdata nd; 3015 struct vnode *vp; 3016 int error; 3017 3018 vp = NULL; 3019 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3020 if (error == 0) 3021 error = nlookup(&nd); 3022 if (error == 0) 3023 error = ncp_writechk(&nd.nl_nch); 3024 if (error == 0) 3025 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3026 nlookup_done(&nd); 3027 if (error == 0) { 3028 error = setfflags(vp, uap->flags); 3029 vrele(vp); 3030 } 3031 return (error); 3032 } 3033 3034 /* 3035 * lchflags(char *path, int flags) 3036 * 3037 * Change flags of a file given a path name, but don't follow symlinks. 3038 */ 3039 int 3040 sys_lchflags(struct lchflags_args *uap) 3041 { 3042 struct nlookupdata nd; 3043 struct vnode *vp; 3044 int error; 3045 3046 vp = NULL; 3047 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3048 if (error == 0) 3049 error = nlookup(&nd); 3050 if (error == 0) 3051 error = ncp_writechk(&nd.nl_nch); 3052 if (error == 0) 3053 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3054 nlookup_done(&nd); 3055 if (error == 0) { 3056 error = setfflags(vp, uap->flags); 3057 vrele(vp); 3058 } 3059 return (error); 3060 } 3061 3062 /* 3063 * fchflags_args(int fd, int flags) 3064 * 3065 * Change flags of a file given a file descriptor. 3066 */ 3067 int 3068 sys_fchflags(struct fchflags_args *uap) 3069 { 3070 struct thread *td = curthread; 3071 struct proc *p = td->td_proc; 3072 struct file *fp; 3073 int error; 3074 3075 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3076 return (error); 3077 if (fp->f_nchandle.ncp) 3078 error = ncp_writechk(&fp->f_nchandle); 3079 if (error == 0) 3080 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3081 fdrop(fp); 3082 return (error); 3083 } 3084 3085 /* 3086 * chflagsat_args(int fd, const char *path, int flags, int atflags) 3087 * change flags given a pathname relative to a filedescriptor 3088 */ 3089 int sys_chflagsat(struct chflagsat_args *uap) 3090 { 3091 struct nlookupdata nd; 3092 struct vnode *vp; 3093 struct file *fp; 3094 int error; 3095 int lookupflags; 3096 3097 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3098 return (EINVAL); 3099 3100 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3101 3102 vp = NULL; 3103 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3104 if (error == 0) 3105 error = nlookup(&nd); 3106 if (error == 0) 3107 error = ncp_writechk(&nd.nl_nch); 3108 if (error == 0) 3109 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3110 nlookup_done_at(&nd, fp); 3111 if (error == 0) { 3112 error = setfflags(vp, uap->flags); 3113 vrele(vp); 3114 } 3115 return (error); 3116 } 3117 3118 3119 static int 3120 setfmode(struct vnode *vp, int mode) 3121 { 3122 struct thread *td = curthread; 3123 int error; 3124 struct vattr vattr; 3125 3126 /* 3127 * note: vget is required for any operation that might mod the vnode 3128 * so VINACTIVE is properly cleared. 3129 */ 3130 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3131 VATTR_NULL(&vattr); 3132 vattr.va_mode = mode & ALLPERMS; 3133 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3134 vput(vp); 3135 } 3136 return error; 3137 } 3138 3139 int 3140 kern_chmod(struct nlookupdata *nd, int mode) 3141 { 3142 struct vnode *vp; 3143 int error; 3144 3145 if ((error = nlookup(nd)) != 0) 3146 return (error); 3147 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3148 return (error); 3149 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3150 error = setfmode(vp, mode); 3151 vrele(vp); 3152 return (error); 3153 } 3154 3155 /* 3156 * chmod_args(char *path, int mode) 3157 * 3158 * Change mode of a file given path name. 3159 */ 3160 int 3161 sys_chmod(struct chmod_args *uap) 3162 { 3163 struct nlookupdata nd; 3164 int error; 3165 3166 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3167 if (error == 0) 3168 error = kern_chmod(&nd, uap->mode); 3169 nlookup_done(&nd); 3170 return (error); 3171 } 3172 3173 /* 3174 * lchmod_args(char *path, int mode) 3175 * 3176 * Change mode of a file given path name (don't follow links.) 3177 */ 3178 int 3179 sys_lchmod(struct lchmod_args *uap) 3180 { 3181 struct nlookupdata nd; 3182 int error; 3183 3184 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3185 if (error == 0) 3186 error = kern_chmod(&nd, uap->mode); 3187 nlookup_done(&nd); 3188 return (error); 3189 } 3190 3191 /* 3192 * fchmod_args(int fd, int mode) 3193 * 3194 * Change mode of a file given a file descriptor. 3195 */ 3196 int 3197 sys_fchmod(struct fchmod_args *uap) 3198 { 3199 struct thread *td = curthread; 3200 struct proc *p = td->td_proc; 3201 struct file *fp; 3202 int error; 3203 3204 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3205 return (error); 3206 if (fp->f_nchandle.ncp) 3207 error = ncp_writechk(&fp->f_nchandle); 3208 if (error == 0) 3209 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3210 fdrop(fp); 3211 return (error); 3212 } 3213 3214 /* 3215 * fchmodat_args(char *path, int mode) 3216 * 3217 * Change mode of a file pointed to by fd/path. 3218 */ 3219 int 3220 sys_fchmodat(struct fchmodat_args *uap) 3221 { 3222 struct nlookupdata nd; 3223 struct file *fp; 3224 int error; 3225 int flags; 3226 3227 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3228 return (EINVAL); 3229 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3230 3231 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3232 UIO_USERSPACE, flags); 3233 if (error == 0) 3234 error = kern_chmod(&nd, uap->mode); 3235 nlookup_done_at(&nd, fp); 3236 return (error); 3237 } 3238 3239 static int 3240 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3241 { 3242 struct thread *td = curthread; 3243 int error; 3244 struct vattr vattr; 3245 uid_t o_uid; 3246 gid_t o_gid; 3247 uint64_t size; 3248 3249 /* 3250 * note: vget is required for any operation that might mod the vnode 3251 * so VINACTIVE is properly cleared. 3252 */ 3253 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3254 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3255 return error; 3256 o_uid = vattr.va_uid; 3257 o_gid = vattr.va_gid; 3258 size = vattr.va_size; 3259 3260 VATTR_NULL(&vattr); 3261 vattr.va_uid = uid; 3262 vattr.va_gid = gid; 3263 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3264 vput(vp); 3265 } 3266 3267 if (error == 0) { 3268 if (uid == -1) 3269 uid = o_uid; 3270 if (gid == -1) 3271 gid = o_gid; 3272 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3273 VFS_ACCOUNT(mp, uid, gid, size); 3274 } 3275 3276 return error; 3277 } 3278 3279 int 3280 kern_chown(struct nlookupdata *nd, int uid, int gid) 3281 { 3282 struct vnode *vp; 3283 int error; 3284 3285 if ((error = nlookup(nd)) != 0) 3286 return (error); 3287 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3288 return (error); 3289 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3290 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3291 vrele(vp); 3292 return (error); 3293 } 3294 3295 /* 3296 * chown(char *path, int uid, int gid) 3297 * 3298 * Set ownership given a path name. 3299 */ 3300 int 3301 sys_chown(struct chown_args *uap) 3302 { 3303 struct nlookupdata nd; 3304 int error; 3305 3306 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3307 if (error == 0) 3308 error = kern_chown(&nd, uap->uid, uap->gid); 3309 nlookup_done(&nd); 3310 return (error); 3311 } 3312 3313 /* 3314 * lchown_args(char *path, int uid, int gid) 3315 * 3316 * Set ownership given a path name, do not cross symlinks. 3317 */ 3318 int 3319 sys_lchown(struct lchown_args *uap) 3320 { 3321 struct nlookupdata nd; 3322 int error; 3323 3324 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3325 if (error == 0) 3326 error = kern_chown(&nd, uap->uid, uap->gid); 3327 nlookup_done(&nd); 3328 return (error); 3329 } 3330 3331 /* 3332 * fchown_args(int fd, int uid, int gid) 3333 * 3334 * Set ownership given a file descriptor. 3335 */ 3336 int 3337 sys_fchown(struct fchown_args *uap) 3338 { 3339 struct thread *td = curthread; 3340 struct proc *p = td->td_proc; 3341 struct file *fp; 3342 int error; 3343 3344 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3345 return (error); 3346 if (fp->f_nchandle.ncp) 3347 error = ncp_writechk(&fp->f_nchandle); 3348 if (error == 0) 3349 error = setfown(p->p_fd->fd_ncdir.mount, 3350 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3351 fdrop(fp); 3352 return (error); 3353 } 3354 3355 /* 3356 * fchownat(int fd, char *path, int uid, int gid, int flags) 3357 * 3358 * Set ownership of file pointed to by fd/path. 3359 */ 3360 int 3361 sys_fchownat(struct fchownat_args *uap) 3362 { 3363 struct nlookupdata nd; 3364 struct file *fp; 3365 int error; 3366 int flags; 3367 3368 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3369 return (EINVAL); 3370 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3371 3372 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3373 UIO_USERSPACE, flags); 3374 if (error == 0) 3375 error = kern_chown(&nd, uap->uid, uap->gid); 3376 nlookup_done_at(&nd, fp); 3377 return (error); 3378 } 3379 3380 3381 static int 3382 getutimes(struct timeval *tvp, struct timespec *tsp) 3383 { 3384 struct timeval tv[2]; 3385 int error; 3386 3387 if (tvp == NULL) { 3388 microtime(&tv[0]); 3389 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3390 tsp[1] = tsp[0]; 3391 } else { 3392 if ((error = itimerfix(tvp)) != 0) 3393 return (error); 3394 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3395 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3396 } 3397 return 0; 3398 } 3399 3400 static int 3401 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3402 { 3403 struct timespec tsnow; 3404 int error; 3405 3406 *nullflag = 0; 3407 nanotime(&tsnow); 3408 if (ts == NULL) { 3409 newts[0] = tsnow; 3410 newts[1] = tsnow; 3411 *nullflag = 1; 3412 return (0); 3413 } 3414 3415 newts[0] = ts[0]; 3416 newts[1] = ts[1]; 3417 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3418 return (0); 3419 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3420 *nullflag = 1; 3421 3422 if (newts[0].tv_nsec == UTIME_OMIT) 3423 newts[0].tv_sec = VNOVAL; 3424 else if (newts[0].tv_nsec == UTIME_NOW) 3425 newts[0] = tsnow; 3426 else if ((error = itimespecfix(&newts[0])) != 0) 3427 return (error); 3428 3429 if (newts[1].tv_nsec == UTIME_OMIT) 3430 newts[1].tv_sec = VNOVAL; 3431 else if (newts[1].tv_nsec == UTIME_NOW) 3432 newts[1] = tsnow; 3433 else if ((error = itimespecfix(&newts[1])) != 0) 3434 return (error); 3435 3436 return (0); 3437 } 3438 3439 static int 3440 setutimes(struct vnode *vp, struct vattr *vattr, 3441 const struct timespec *ts, int nullflag) 3442 { 3443 struct thread *td = curthread; 3444 int error; 3445 3446 VATTR_NULL(vattr); 3447 vattr->va_atime = ts[0]; 3448 vattr->va_mtime = ts[1]; 3449 if (nullflag) 3450 vattr->va_vaflags |= VA_UTIMES_NULL; 3451 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3452 3453 return error; 3454 } 3455 3456 int 3457 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3458 { 3459 struct timespec ts[2]; 3460 int error; 3461 3462 if (tptr) { 3463 if ((error = getutimes(tptr, ts)) != 0) 3464 return (error); 3465 } 3466 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3467 return (error); 3468 } 3469 3470 /* 3471 * utimes_args(char *path, struct timeval *tptr) 3472 * 3473 * Set the access and modification times of a file. 3474 */ 3475 int 3476 sys_utimes(struct utimes_args *uap) 3477 { 3478 struct timeval tv[2]; 3479 struct nlookupdata nd; 3480 int error; 3481 3482 if (uap->tptr) { 3483 error = copyin(uap->tptr, tv, sizeof(tv)); 3484 if (error) 3485 return (error); 3486 } 3487 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3488 if (error == 0) 3489 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3490 nlookup_done(&nd); 3491 return (error); 3492 } 3493 3494 /* 3495 * lutimes_args(char *path, struct timeval *tptr) 3496 * 3497 * Set the access and modification times of a file. 3498 */ 3499 int 3500 sys_lutimes(struct lutimes_args *uap) 3501 { 3502 struct timeval tv[2]; 3503 struct nlookupdata nd; 3504 int error; 3505 3506 if (uap->tptr) { 3507 error = copyin(uap->tptr, tv, sizeof(tv)); 3508 if (error) 3509 return (error); 3510 } 3511 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3512 if (error == 0) 3513 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3514 nlookup_done(&nd); 3515 return (error); 3516 } 3517 3518 /* 3519 * Set utimes on a file descriptor. The creds used to open the 3520 * file are used to determine whether the operation is allowed 3521 * or not. 3522 */ 3523 int 3524 kern_futimens(int fd, struct timespec *ts) 3525 { 3526 struct thread *td = curthread; 3527 struct proc *p = td->td_proc; 3528 struct timespec newts[2]; 3529 struct file *fp; 3530 struct vnode *vp; 3531 struct vattr vattr; 3532 int nullflag; 3533 int error; 3534 3535 error = getutimens(ts, newts, &nullflag); 3536 if (error) 3537 return (error); 3538 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3539 return (error); 3540 if (fp->f_nchandle.ncp) 3541 error = ncp_writechk(&fp->f_nchandle); 3542 if (error == 0) { 3543 vp = fp->f_data; 3544 error = vget(vp, LK_EXCLUSIVE); 3545 if (error == 0) { 3546 error = VOP_GETATTR(vp, &vattr); 3547 if (error == 0) { 3548 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3549 fp->f_cred); 3550 } 3551 if (error == 0) { 3552 error = setutimes(vp, &vattr, newts, nullflag); 3553 } 3554 vput(vp); 3555 } 3556 } 3557 fdrop(fp); 3558 return (error); 3559 } 3560 3561 /* 3562 * futimens_args(int fd, struct timespec *ts) 3563 * 3564 * Set the access and modification times of a file. 3565 */ 3566 int 3567 sys_futimens(struct futimens_args *uap) 3568 { 3569 struct timespec ts[2]; 3570 int error; 3571 3572 if (uap->ts) { 3573 error = copyin(uap->ts, ts, sizeof(ts)); 3574 if (error) 3575 return (error); 3576 } 3577 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3578 return (error); 3579 } 3580 3581 int 3582 kern_futimes(int fd, struct timeval *tptr) 3583 { 3584 struct timespec ts[2]; 3585 int error; 3586 3587 if (tptr) { 3588 if ((error = getutimes(tptr, ts)) != 0) 3589 return (error); 3590 } 3591 error = kern_futimens(fd, tptr ? ts : NULL); 3592 return (error); 3593 } 3594 3595 /* 3596 * futimes_args(int fd, struct timeval *tptr) 3597 * 3598 * Set the access and modification times of a file. 3599 */ 3600 int 3601 sys_futimes(struct futimes_args *uap) 3602 { 3603 struct timeval tv[2]; 3604 int error; 3605 3606 if (uap->tptr) { 3607 error = copyin(uap->tptr, tv, sizeof(tv)); 3608 if (error) 3609 return (error); 3610 } 3611 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3612 return (error); 3613 } 3614 3615 int 3616 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3617 { 3618 struct timespec newts[2]; 3619 struct vnode *vp; 3620 struct vattr vattr; 3621 int nullflag; 3622 int error; 3623 3624 if (flags & ~AT_SYMLINK_NOFOLLOW) 3625 return (EINVAL); 3626 3627 error = getutimens(ts, newts, &nullflag); 3628 if (error) 3629 return (error); 3630 3631 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3632 if ((error = nlookup(nd)) != 0) 3633 return (error); 3634 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3635 return (error); 3636 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3637 return (error); 3638 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3639 error = vget(vp, LK_EXCLUSIVE); 3640 if (error == 0) { 3641 error = setutimes(vp, &vattr, newts, nullflag); 3642 vput(vp); 3643 } 3644 } 3645 vrele(vp); 3646 return (error); 3647 } 3648 3649 /* 3650 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3651 * 3652 * Set file access and modification times of a file. 3653 */ 3654 int 3655 sys_utimensat(struct utimensat_args *uap) 3656 { 3657 struct timespec ts[2]; 3658 struct nlookupdata nd; 3659 struct file *fp; 3660 int error; 3661 int flags; 3662 3663 if (uap->ts) { 3664 error = copyin(uap->ts, ts, sizeof(ts)); 3665 if (error) 3666 return (error); 3667 } 3668 3669 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3670 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3671 UIO_USERSPACE, flags); 3672 if (error == 0) 3673 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3674 nlookup_done_at(&nd, fp); 3675 return (error); 3676 } 3677 3678 int 3679 kern_truncate(struct nlookupdata *nd, off_t length) 3680 { 3681 struct vnode *vp; 3682 struct vattr vattr; 3683 int error; 3684 uid_t uid = 0; 3685 gid_t gid = 0; 3686 uint64_t old_size = 0; 3687 3688 if (length < 0) 3689 return(EINVAL); 3690 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3691 if ((error = nlookup(nd)) != 0) 3692 return (error); 3693 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3694 return (error); 3695 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3696 return (error); 3697 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3698 if (error) { 3699 vrele(vp); 3700 return (error); 3701 } 3702 if (vp->v_type == VDIR) { 3703 error = EISDIR; 3704 goto done; 3705 } 3706 if (vfs_quota_enabled) { 3707 error = VOP_GETATTR(vp, &vattr); 3708 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3709 uid = vattr.va_uid; 3710 gid = vattr.va_gid; 3711 old_size = vattr.va_size; 3712 } 3713 3714 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3715 VATTR_NULL(&vattr); 3716 vattr.va_size = length; 3717 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3718 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3719 } 3720 done: 3721 vput(vp); 3722 return (error); 3723 } 3724 3725 /* 3726 * truncate(char *path, int pad, off_t length) 3727 * 3728 * Truncate a file given its path name. 3729 */ 3730 int 3731 sys_truncate(struct truncate_args *uap) 3732 { 3733 struct nlookupdata nd; 3734 int error; 3735 3736 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3737 if (error == 0) 3738 error = kern_truncate(&nd, uap->length); 3739 nlookup_done(&nd); 3740 return error; 3741 } 3742 3743 int 3744 kern_ftruncate(int fd, off_t length) 3745 { 3746 struct thread *td = curthread; 3747 struct proc *p = td->td_proc; 3748 struct vattr vattr; 3749 struct vnode *vp; 3750 struct file *fp; 3751 int error; 3752 uid_t uid = 0; 3753 gid_t gid = 0; 3754 uint64_t old_size = 0; 3755 struct mount *mp; 3756 3757 if (length < 0) 3758 return(EINVAL); 3759 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3760 return (error); 3761 if (fp->f_nchandle.ncp) { 3762 error = ncp_writechk(&fp->f_nchandle); 3763 if (error) 3764 goto done; 3765 } 3766 if ((fp->f_flag & FWRITE) == 0) { 3767 error = EINVAL; 3768 goto done; 3769 } 3770 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3771 error = EINVAL; 3772 goto done; 3773 } 3774 vp = (struct vnode *)fp->f_data; 3775 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3776 if (vp->v_type == VDIR) { 3777 error = EISDIR; 3778 vn_unlock(vp); 3779 goto done; 3780 } 3781 3782 if (vfs_quota_enabled) { 3783 error = VOP_GETATTR(vp, &vattr); 3784 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 3785 uid = vattr.va_uid; 3786 gid = vattr.va_gid; 3787 old_size = vattr.va_size; 3788 } 3789 3790 if ((error = vn_writechk(vp, NULL)) == 0) { 3791 VATTR_NULL(&vattr); 3792 vattr.va_size = length; 3793 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3794 mp = vq_vptomp(vp); 3795 VFS_ACCOUNT(mp, uid, gid, length - old_size); 3796 } 3797 vn_unlock(vp); 3798 done: 3799 fdrop(fp); 3800 return (error); 3801 } 3802 3803 /* 3804 * ftruncate_args(int fd, int pad, off_t length) 3805 * 3806 * Truncate a file given a file descriptor. 3807 */ 3808 int 3809 sys_ftruncate(struct ftruncate_args *uap) 3810 { 3811 int error; 3812 3813 error = kern_ftruncate(uap->fd, uap->length); 3814 3815 return (error); 3816 } 3817 3818 /* 3819 * fsync(int fd) 3820 * 3821 * Sync an open file. 3822 */ 3823 int 3824 sys_fsync(struct fsync_args *uap) 3825 { 3826 struct thread *td = curthread; 3827 struct proc *p = td->td_proc; 3828 struct vnode *vp; 3829 struct file *fp; 3830 vm_object_t obj; 3831 int error; 3832 3833 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3834 return (error); 3835 vp = (struct vnode *)fp->f_data; 3836 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3837 if ((obj = vp->v_object) != NULL) { 3838 if (vp->v_mount == NULL || 3839 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 3840 vm_object_page_clean(obj, 0, 0, 0); 3841 } 3842 } 3843 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3844 if (error == 0 && vp->v_mount) 3845 error = buf_fsync(vp); 3846 vn_unlock(vp); 3847 fdrop(fp); 3848 3849 return (error); 3850 } 3851 3852 int 3853 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3854 { 3855 struct nchandle fnchd; 3856 struct nchandle tnchd; 3857 struct namecache *ncp; 3858 struct vnode *fdvp; 3859 struct vnode *tdvp; 3860 struct mount *mp; 3861 int error; 3862 u_int fncp_gen; 3863 u_int tncp_gen; 3864 3865 bwillinode(1); 3866 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3867 if ((error = nlookup(fromnd)) != 0) 3868 return (error); 3869 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3870 return (ENOENT); 3871 fnchd.mount = fromnd->nl_nch.mount; 3872 cache_hold(&fnchd); 3873 3874 /* 3875 * unlock the source nch so we can lookup the target nch without 3876 * deadlocking. The target may or may not exist so we do not check 3877 * for a target vp like kern_mkdir() and other creation functions do. 3878 * 3879 * The source and target directories are ref'd and rechecked after 3880 * everything is relocked to determine if the source or target file 3881 * has been renamed. 3882 */ 3883 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3884 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3885 3886 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 3887 3888 cache_unlock(&fromnd->nl_nch); 3889 3890 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 3891 if ((error = nlookup(tond)) != 0) { 3892 cache_drop(&fnchd); 3893 return (error); 3894 } 3895 tncp_gen = tond->nl_nch.ncp->nc_generation; 3896 3897 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3898 cache_drop(&fnchd); 3899 return (ENOENT); 3900 } 3901 tnchd.mount = tond->nl_nch.mount; 3902 cache_hold(&tnchd); 3903 3904 /* 3905 * If the source and target are the same there is nothing to do 3906 */ 3907 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3908 cache_drop(&fnchd); 3909 cache_drop(&tnchd); 3910 return (0); 3911 } 3912 3913 /* 3914 * Mount points cannot be renamed or overwritten 3915 */ 3916 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3917 NCF_ISMOUNTPT 3918 ) { 3919 cache_drop(&fnchd); 3920 cache_drop(&tnchd); 3921 return (EINVAL); 3922 } 3923 3924 /* 3925 * Relock the source ncp. cache_relock() will deal with any 3926 * deadlocks against the already-locked tond and will also 3927 * make sure both are resolved. 3928 * 3929 * NOTE AFTER RELOCKING: The source or target ncp may have become 3930 * invalid while they were unlocked, nc_vp and nc_mount could 3931 * be NULL. 3932 */ 3933 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 3934 &tond->nl_nch, tond->nl_cred); 3935 fromnd->nl_flags |= NLC_NCPISLOCKED; 3936 3937 /* 3938 * If the namecache generation changed for either fromnd or tond, 3939 * we must retry. 3940 */ 3941 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 3942 tond->nl_nch.ncp->nc_generation != tncp_gen) { 3943 kprintf("kern_rename: retry due to gen on: " 3944 "\"%s\" -> \"%s\"\n", 3945 fromnd->nl_nch.ncp->nc_name, 3946 tond->nl_nch.ncp->nc_name); 3947 cache_drop(&fnchd); 3948 cache_drop(&tnchd); 3949 return (EAGAIN); 3950 } 3951 3952 /* 3953 * If either fromnd or tond are marked destroyed a ripout occured 3954 * out from under us and we must retry. 3955 */ 3956 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 3957 fromnd->nl_nch.ncp->nc_vp == NULL || 3958 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 3959 kprintf("kern_rename: retry due to ripout on: " 3960 "\"%s\" -> \"%s\"\n", 3961 fromnd->nl_nch.ncp->nc_name, 3962 tond->nl_nch.ncp->nc_name); 3963 cache_drop(&fnchd); 3964 cache_drop(&tnchd); 3965 return (EAGAIN); 3966 } 3967 3968 /* 3969 * Make sure the parent directories linkages are the same. 3970 * XXX shouldn't be needed any more w/ generation check above. 3971 */ 3972 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3973 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3974 cache_drop(&fnchd); 3975 cache_drop(&tnchd); 3976 return (ENOENT); 3977 } 3978 3979 /* 3980 * Both the source and target must be within the same filesystem and 3981 * in the same filesystem as their parent directories within the 3982 * namecache topology. 3983 * 3984 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3985 */ 3986 mp = fnchd.mount; 3987 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3988 mp != tond->nl_nch.mount) { 3989 cache_drop(&fnchd); 3990 cache_drop(&tnchd); 3991 return (EXDEV); 3992 } 3993 3994 /* 3995 * Make sure the mount point is writable 3996 */ 3997 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3998 cache_drop(&fnchd); 3999 cache_drop(&tnchd); 4000 return (error); 4001 } 4002 4003 /* 4004 * If the target exists and either the source or target is a directory, 4005 * then both must be directories. 4006 * 4007 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4008 * have become NULL. 4009 */ 4010 if (tond->nl_nch.ncp->nc_vp) { 4011 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4012 error = ENOENT; 4013 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4014 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4015 error = ENOTDIR; 4016 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4017 error = EISDIR; 4018 } 4019 } 4020 4021 /* 4022 * You cannot rename a source into itself or a subdirectory of itself. 4023 * We check this by travsersing the target directory upwards looking 4024 * for a match against the source. 4025 * 4026 * XXX MPSAFE 4027 */ 4028 if (error == 0) { 4029 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4030 if (fromnd->nl_nch.ncp == ncp) { 4031 error = EINVAL; 4032 break; 4033 } 4034 } 4035 } 4036 4037 cache_drop(&fnchd); 4038 cache_drop(&tnchd); 4039 4040 /* 4041 * Even though the namespaces are different, they may still represent 4042 * hardlinks to the same file. The filesystem might have a hard time 4043 * with this so we issue a NREMOVE of the source instead of a NRENAME 4044 * when we detect the situation. 4045 */ 4046 if (error == 0) { 4047 fdvp = fromnd->nl_dvp; 4048 tdvp = tond->nl_dvp; 4049 if (fdvp == NULL || tdvp == NULL) { 4050 error = EPERM; 4051 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4052 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4053 fromnd->nl_cred); 4054 } else { 4055 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4056 fdvp, tdvp, tond->nl_cred); 4057 } 4058 } 4059 return (error); 4060 } 4061 4062 /* 4063 * rename_args(char *from, char *to) 4064 * 4065 * Rename files. Source and destination must either both be directories, 4066 * or both not be directories. If target is a directory, it must be empty. 4067 */ 4068 int 4069 sys_rename(struct rename_args *uap) 4070 { 4071 struct nlookupdata fromnd, tond; 4072 int error; 4073 4074 do { 4075 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4076 if (error == 0) { 4077 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4078 if (error == 0) 4079 error = kern_rename(&fromnd, &tond); 4080 nlookup_done(&tond); 4081 } 4082 nlookup_done(&fromnd); 4083 } while (error == EAGAIN); 4084 return (error); 4085 } 4086 4087 /* 4088 * renameat_args(int oldfd, char *old, int newfd, char *new) 4089 * 4090 * Rename files using paths relative to the directories associated with 4091 * oldfd and newfd. Source and destination must either both be directories, 4092 * or both not be directories. If target is a directory, it must be empty. 4093 */ 4094 int 4095 sys_renameat(struct renameat_args *uap) 4096 { 4097 struct nlookupdata oldnd, newnd; 4098 struct file *oldfp, *newfp; 4099 int error; 4100 4101 do { 4102 error = nlookup_init_at(&oldnd, &oldfp, 4103 uap->oldfd, uap->old, 4104 UIO_USERSPACE, 0); 4105 if (error == 0) { 4106 error = nlookup_init_at(&newnd, &newfp, 4107 uap->newfd, uap->new, 4108 UIO_USERSPACE, 0); 4109 if (error == 0) 4110 error = kern_rename(&oldnd, &newnd); 4111 nlookup_done_at(&newnd, newfp); 4112 } 4113 nlookup_done_at(&oldnd, oldfp); 4114 } while (error == EAGAIN); 4115 return (error); 4116 } 4117 4118 int 4119 kern_mkdir(struct nlookupdata *nd, int mode) 4120 { 4121 struct thread *td = curthread; 4122 struct proc *p = td->td_proc; 4123 struct vnode *vp; 4124 struct vattr vattr; 4125 int error; 4126 4127 bwillinode(1); 4128 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4129 if ((error = nlookup(nd)) != 0) 4130 return (error); 4131 4132 if (nd->nl_nch.ncp->nc_vp) 4133 return (EEXIST); 4134 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4135 return (error); 4136 VATTR_NULL(&vattr); 4137 vattr.va_type = VDIR; 4138 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4139 4140 vp = NULL; 4141 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4142 if (error == 0) 4143 vput(vp); 4144 return (error); 4145 } 4146 4147 /* 4148 * mkdir_args(char *path, int mode) 4149 * 4150 * Make a directory file. 4151 */ 4152 int 4153 sys_mkdir(struct mkdir_args *uap) 4154 { 4155 struct nlookupdata nd; 4156 int error; 4157 4158 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4159 if (error == 0) 4160 error = kern_mkdir(&nd, uap->mode); 4161 nlookup_done(&nd); 4162 return (error); 4163 } 4164 4165 /* 4166 * mkdirat_args(int fd, char *path, mode_t mode) 4167 * 4168 * Make a directory file. The path is relative to the directory associated 4169 * with fd. 4170 */ 4171 int 4172 sys_mkdirat(struct mkdirat_args *uap) 4173 { 4174 struct nlookupdata nd; 4175 struct file *fp; 4176 int error; 4177 4178 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4179 if (error == 0) 4180 error = kern_mkdir(&nd, uap->mode); 4181 nlookup_done_at(&nd, fp); 4182 return (error); 4183 } 4184 4185 int 4186 kern_rmdir(struct nlookupdata *nd) 4187 { 4188 int error; 4189 4190 bwillinode(1); 4191 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4192 if ((error = nlookup(nd)) != 0) 4193 return (error); 4194 4195 /* 4196 * Do not allow directories representing mount points to be 4197 * deleted, even if empty. Check write perms on mount point 4198 * in case the vnode is aliased (aka nullfs). 4199 */ 4200 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4201 return (EBUSY); 4202 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4203 return (error); 4204 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4205 return (error); 4206 } 4207 4208 /* 4209 * rmdir_args(char *path) 4210 * 4211 * Remove a directory file. 4212 */ 4213 int 4214 sys_rmdir(struct rmdir_args *uap) 4215 { 4216 struct nlookupdata nd; 4217 int error; 4218 4219 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4220 if (error == 0) 4221 error = kern_rmdir(&nd); 4222 nlookup_done(&nd); 4223 return (error); 4224 } 4225 4226 int 4227 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4228 enum uio_seg direction) 4229 { 4230 struct thread *td = curthread; 4231 struct proc *p = td->td_proc; 4232 struct vnode *vp; 4233 struct file *fp; 4234 struct uio auio; 4235 struct iovec aiov; 4236 off_t loff; 4237 int error, eofflag; 4238 4239 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 4240 return (error); 4241 if ((fp->f_flag & FREAD) == 0) { 4242 error = EBADF; 4243 goto done; 4244 } 4245 vp = (struct vnode *)fp->f_data; 4246 unionread: 4247 if (vp->v_type != VDIR) { 4248 error = EINVAL; 4249 goto done; 4250 } 4251 aiov.iov_base = buf; 4252 aiov.iov_len = count; 4253 auio.uio_iov = &aiov; 4254 auio.uio_iovcnt = 1; 4255 auio.uio_rw = UIO_READ; 4256 auio.uio_segflg = direction; 4257 auio.uio_td = td; 4258 auio.uio_resid = count; 4259 loff = auio.uio_offset = fp->f_offset; 4260 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 4261 fp->f_offset = auio.uio_offset; 4262 if (error) 4263 goto done; 4264 if (count == auio.uio_resid) { 4265 if (union_dircheckp) { 4266 error = union_dircheckp(td, &vp, fp); 4267 if (error == -1) 4268 goto unionread; 4269 if (error) 4270 goto done; 4271 } 4272 #if 0 4273 if ((vp->v_flag & VROOT) && 4274 (vp->v_mount->mnt_flag & MNT_UNION)) { 4275 struct vnode *tvp = vp; 4276 vp = vp->v_mount->mnt_vnodecovered; 4277 vref(vp); 4278 fp->f_data = vp; 4279 fp->f_offset = 0; 4280 vrele(tvp); 4281 goto unionread; 4282 } 4283 #endif 4284 } 4285 4286 /* 4287 * WARNING! *basep may not be wide enough to accomodate the 4288 * seek offset. XXX should we hack this to return the upper 32 bits 4289 * for offsets greater then 4G? 4290 */ 4291 if (basep) { 4292 *basep = (long)loff; 4293 } 4294 *res = count - auio.uio_resid; 4295 done: 4296 fdrop(fp); 4297 return (error); 4298 } 4299 4300 /* 4301 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4302 * 4303 * Read a block of directory entries in a file system independent format. 4304 */ 4305 int 4306 sys_getdirentries(struct getdirentries_args *uap) 4307 { 4308 long base; 4309 int error; 4310 4311 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4312 &uap->sysmsg_result, UIO_USERSPACE); 4313 4314 if (error == 0 && uap->basep) 4315 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4316 return (error); 4317 } 4318 4319 /* 4320 * getdents_args(int fd, char *buf, size_t count) 4321 */ 4322 int 4323 sys_getdents(struct getdents_args *uap) 4324 { 4325 int error; 4326 4327 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4328 &uap->sysmsg_result, UIO_USERSPACE); 4329 4330 return (error); 4331 } 4332 4333 /* 4334 * Set the mode mask for creation of filesystem nodes. 4335 * 4336 * umask(int newmask) 4337 */ 4338 int 4339 sys_umask(struct umask_args *uap) 4340 { 4341 struct thread *td = curthread; 4342 struct proc *p = td->td_proc; 4343 struct filedesc *fdp; 4344 4345 fdp = p->p_fd; 4346 uap->sysmsg_result = fdp->fd_cmask; 4347 fdp->fd_cmask = uap->newmask & ALLPERMS; 4348 return (0); 4349 } 4350 4351 /* 4352 * revoke(char *path) 4353 * 4354 * Void all references to file by ripping underlying filesystem 4355 * away from vnode. 4356 */ 4357 int 4358 sys_revoke(struct revoke_args *uap) 4359 { 4360 struct nlookupdata nd; 4361 struct vattr vattr; 4362 struct vnode *vp; 4363 struct ucred *cred; 4364 int error; 4365 4366 vp = NULL; 4367 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4368 if (error == 0) 4369 error = nlookup(&nd); 4370 if (error == 0) 4371 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4372 cred = crhold(nd.nl_cred); 4373 nlookup_done(&nd); 4374 if (error == 0) { 4375 if (error == 0) 4376 error = VOP_GETATTR(vp, &vattr); 4377 if (error == 0 && cred->cr_uid != vattr.va_uid) 4378 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4379 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4380 if (vcount(vp) > 0) 4381 error = vrevoke(vp, cred); 4382 } else if (error == 0) { 4383 error = vrevoke(vp, cred); 4384 } 4385 vrele(vp); 4386 } 4387 if (cred) 4388 crfree(cred); 4389 return (error); 4390 } 4391 4392 /* 4393 * getfh_args(char *fname, fhandle_t *fhp) 4394 * 4395 * Get (NFS) file handle 4396 * 4397 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4398 * mount. This allows nullfs mounts to be explicitly exported. 4399 * 4400 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4401 * 4402 * nullfs mounts of subdirectories are not safe. That is, it will 4403 * work, but you do not really have protection against access to 4404 * the related parent directories. 4405 */ 4406 int 4407 sys_getfh(struct getfh_args *uap) 4408 { 4409 struct thread *td = curthread; 4410 struct nlookupdata nd; 4411 fhandle_t fh; 4412 struct vnode *vp; 4413 struct mount *mp; 4414 int error; 4415 4416 /* 4417 * Must be super user 4418 */ 4419 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4420 return (error); 4421 4422 vp = NULL; 4423 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4424 if (error == 0) 4425 error = nlookup(&nd); 4426 if (error == 0) 4427 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4428 mp = nd.nl_nch.mount; 4429 nlookup_done(&nd); 4430 if (error == 0) { 4431 bzero(&fh, sizeof(fh)); 4432 fh.fh_fsid = mp->mnt_stat.f_fsid; 4433 error = VFS_VPTOFH(vp, &fh.fh_fid); 4434 vput(vp); 4435 if (error == 0) 4436 error = copyout(&fh, uap->fhp, sizeof(fh)); 4437 } 4438 return (error); 4439 } 4440 4441 /* 4442 * fhopen_args(const struct fhandle *u_fhp, int flags) 4443 * 4444 * syscall for the rpc.lockd to use to translate a NFS file handle into 4445 * an open descriptor. 4446 * 4447 * warning: do not remove the priv_check() call or this becomes one giant 4448 * security hole. 4449 */ 4450 int 4451 sys_fhopen(struct fhopen_args *uap) 4452 { 4453 struct thread *td = curthread; 4454 struct filedesc *fdp = td->td_proc->p_fd; 4455 struct mount *mp; 4456 struct vnode *vp; 4457 struct fhandle fhp; 4458 struct vattr vat; 4459 struct vattr *vap = &vat; 4460 struct flock lf; 4461 int fmode, mode, error = 0, type; 4462 struct file *nfp; 4463 struct file *fp; 4464 int indx; 4465 4466 /* 4467 * Must be super user 4468 */ 4469 error = priv_check(td, PRIV_ROOT); 4470 if (error) 4471 return (error); 4472 4473 fmode = FFLAGS(uap->flags); 4474 4475 /* 4476 * Why not allow a non-read/write open for our lockd? 4477 */ 4478 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4479 return (EINVAL); 4480 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4481 if (error) 4482 return(error); 4483 4484 /* 4485 * Find the mount point 4486 */ 4487 mp = vfs_getvfs(&fhp.fh_fsid); 4488 if (mp == NULL) { 4489 error = ESTALE; 4490 goto done; 4491 } 4492 /* now give me my vnode, it gets returned to me locked */ 4493 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4494 if (error) 4495 goto done; 4496 /* 4497 * from now on we have to make sure not 4498 * to forget about the vnode 4499 * any error that causes an abort must vput(vp) 4500 * just set error = err and 'goto bad;'. 4501 */ 4502 4503 /* 4504 * from vn_open 4505 */ 4506 if (vp->v_type == VLNK) { 4507 error = EMLINK; 4508 goto bad; 4509 } 4510 if (vp->v_type == VSOCK) { 4511 error = EOPNOTSUPP; 4512 goto bad; 4513 } 4514 mode = 0; 4515 if (fmode & (FWRITE | O_TRUNC)) { 4516 if (vp->v_type == VDIR) { 4517 error = EISDIR; 4518 goto bad; 4519 } 4520 error = vn_writechk(vp, NULL); 4521 if (error) 4522 goto bad; 4523 mode |= VWRITE; 4524 } 4525 if (fmode & FREAD) 4526 mode |= VREAD; 4527 if (mode) { 4528 error = VOP_ACCESS(vp, mode, td->td_ucred); 4529 if (error) 4530 goto bad; 4531 } 4532 if (fmode & O_TRUNC) { 4533 vn_unlock(vp); /* XXX */ 4534 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4535 VATTR_NULL(vap); 4536 vap->va_size = 0; 4537 error = VOP_SETATTR(vp, vap, td->td_ucred); 4538 if (error) 4539 goto bad; 4540 } 4541 4542 /* 4543 * VOP_OPEN needs the file pointer so it can potentially override 4544 * it. 4545 * 4546 * WARNING! no f_nchandle will be associated when fhopen()ing a 4547 * directory. XXX 4548 */ 4549 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4550 goto bad; 4551 fp = nfp; 4552 4553 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4554 if (error) { 4555 /* 4556 * setting f_ops this way prevents VOP_CLOSE from being 4557 * called or fdrop() releasing the vp from v_data. Since 4558 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4559 */ 4560 fp->f_ops = &badfileops; 4561 fp->f_data = NULL; 4562 goto bad_drop; 4563 } 4564 4565 /* 4566 * The fp is given its own reference, we still have our ref and lock. 4567 * 4568 * Assert that all regular files must be created with a VM object. 4569 */ 4570 if (vp->v_type == VREG && vp->v_object == NULL) { 4571 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 4572 goto bad_drop; 4573 } 4574 4575 /* 4576 * The open was successful. Handle any locking requirements. 4577 */ 4578 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4579 lf.l_whence = SEEK_SET; 4580 lf.l_start = 0; 4581 lf.l_len = 0; 4582 if (fmode & O_EXLOCK) 4583 lf.l_type = F_WRLCK; 4584 else 4585 lf.l_type = F_RDLCK; 4586 if (fmode & FNONBLOCK) 4587 type = 0; 4588 else 4589 type = F_WAIT; 4590 vn_unlock(vp); 4591 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 4592 /* 4593 * release our private reference. 4594 */ 4595 fsetfd(fdp, NULL, indx); 4596 fdrop(fp); 4597 vrele(vp); 4598 goto done; 4599 } 4600 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4601 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4602 } 4603 4604 /* 4605 * Clean up. Associate the file pointer with the previously 4606 * reserved descriptor and return it. 4607 */ 4608 vput(vp); 4609 if (uap->flags & O_CLOEXEC) 4610 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4611 fsetfd(fdp, fp, indx); 4612 fdrop(fp); 4613 uap->sysmsg_result = indx; 4614 return (error); 4615 4616 bad_drop: 4617 fsetfd(fdp, NULL, indx); 4618 fdrop(fp); 4619 bad: 4620 vput(vp); 4621 done: 4622 return (error); 4623 } 4624 4625 /* 4626 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4627 */ 4628 int 4629 sys_fhstat(struct fhstat_args *uap) 4630 { 4631 struct thread *td = curthread; 4632 struct stat sb; 4633 fhandle_t fh; 4634 struct mount *mp; 4635 struct vnode *vp; 4636 int error; 4637 4638 /* 4639 * Must be super user 4640 */ 4641 error = priv_check(td, PRIV_ROOT); 4642 if (error) 4643 return (error); 4644 4645 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4646 if (error) 4647 return (error); 4648 4649 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4650 error = ESTALE; 4651 if (error == 0) { 4652 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4653 error = vn_stat(vp, &sb, td->td_ucred); 4654 vput(vp); 4655 } 4656 } 4657 if (error == 0) 4658 error = copyout(&sb, uap->sb, sizeof(sb)); 4659 return (error); 4660 } 4661 4662 /* 4663 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4664 */ 4665 int 4666 sys_fhstatfs(struct fhstatfs_args *uap) 4667 { 4668 struct thread *td = curthread; 4669 struct proc *p = td->td_proc; 4670 struct statfs *sp; 4671 struct mount *mp; 4672 struct vnode *vp; 4673 struct statfs sb; 4674 char *fullpath, *freepath; 4675 fhandle_t fh; 4676 int error; 4677 4678 /* 4679 * Must be super user 4680 */ 4681 if ((error = priv_check(td, PRIV_ROOT))) 4682 return (error); 4683 4684 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4685 return (error); 4686 4687 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4688 error = ESTALE; 4689 goto done; 4690 } 4691 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4692 error = ESTALE; 4693 goto done; 4694 } 4695 4696 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4697 goto done; 4698 mp = vp->v_mount; 4699 sp = &mp->mnt_stat; 4700 vput(vp); 4701 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4702 goto done; 4703 4704 error = mount_path(p, mp, &fullpath, &freepath); 4705 if (error) 4706 goto done; 4707 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4708 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4709 kfree(freepath, M_TEMP); 4710 4711 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4712 if (priv_check(td, PRIV_ROOT)) { 4713 bcopy(sp, &sb, sizeof(sb)); 4714 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4715 sp = &sb; 4716 } 4717 error = copyout(sp, uap->buf, sizeof(*sp)); 4718 done: 4719 return (error); 4720 } 4721 4722 /* 4723 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4724 */ 4725 int 4726 sys_fhstatvfs(struct fhstatvfs_args *uap) 4727 { 4728 struct thread *td = curthread; 4729 struct proc *p = td->td_proc; 4730 struct statvfs *sp; 4731 struct mount *mp; 4732 struct vnode *vp; 4733 fhandle_t fh; 4734 int error; 4735 4736 /* 4737 * Must be super user 4738 */ 4739 if ((error = priv_check(td, PRIV_ROOT))) 4740 return (error); 4741 4742 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4743 return (error); 4744 4745 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4746 error = ESTALE; 4747 goto done; 4748 } 4749 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4750 error = ESTALE; 4751 goto done; 4752 } 4753 4754 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4755 goto done; 4756 mp = vp->v_mount; 4757 sp = &mp->mnt_vstat; 4758 vput(vp); 4759 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4760 goto done; 4761 4762 sp->f_flag = 0; 4763 if (mp->mnt_flag & MNT_RDONLY) 4764 sp->f_flag |= ST_RDONLY; 4765 if (mp->mnt_flag & MNT_NOSUID) 4766 sp->f_flag |= ST_NOSUID; 4767 error = copyout(sp, uap->buf, sizeof(*sp)); 4768 done: 4769 return (error); 4770 } 4771 4772 4773 /* 4774 * Syscall to push extended attribute configuration information into the 4775 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4776 * a command (int cmd), and attribute name and misc data. For now, the 4777 * attribute name is left in userspace for consumption by the VFS_op. 4778 * It will probably be changed to be copied into sysspace by the 4779 * syscall in the future, once issues with various consumers of the 4780 * attribute code have raised their hands. 4781 * 4782 * Currently this is used only by UFS Extended Attributes. 4783 */ 4784 int 4785 sys_extattrctl(struct extattrctl_args *uap) 4786 { 4787 struct nlookupdata nd; 4788 struct vnode *vp; 4789 char attrname[EXTATTR_MAXNAMELEN]; 4790 int error; 4791 size_t size; 4792 4793 attrname[0] = 0; 4794 vp = NULL; 4795 error = 0; 4796 4797 if (error == 0 && uap->filename) { 4798 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4799 NLC_FOLLOW); 4800 if (error == 0) 4801 error = nlookup(&nd); 4802 if (error == 0) 4803 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4804 nlookup_done(&nd); 4805 } 4806 4807 if (error == 0 && uap->attrname) { 4808 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4809 &size); 4810 } 4811 4812 if (error == 0) { 4813 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4814 if (error == 0) 4815 error = nlookup(&nd); 4816 if (error == 0) 4817 error = ncp_writechk(&nd.nl_nch); 4818 if (error == 0) { 4819 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4820 uap->attrnamespace, 4821 uap->attrname, nd.nl_cred); 4822 } 4823 nlookup_done(&nd); 4824 } 4825 4826 return (error); 4827 } 4828 4829 /* 4830 * Syscall to get a named extended attribute on a file or directory. 4831 */ 4832 int 4833 sys_extattr_set_file(struct extattr_set_file_args *uap) 4834 { 4835 char attrname[EXTATTR_MAXNAMELEN]; 4836 struct nlookupdata nd; 4837 struct vnode *vp; 4838 struct uio auio; 4839 struct iovec aiov; 4840 int error; 4841 4842 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4843 if (error) 4844 return (error); 4845 4846 vp = NULL; 4847 4848 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4849 if (error == 0) 4850 error = nlookup(&nd); 4851 if (error == 0) 4852 error = ncp_writechk(&nd.nl_nch); 4853 if (error == 0) 4854 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4855 if (error) { 4856 nlookup_done(&nd); 4857 return (error); 4858 } 4859 4860 bzero(&auio, sizeof(auio)); 4861 aiov.iov_base = uap->data; 4862 aiov.iov_len = uap->nbytes; 4863 auio.uio_iov = &aiov; 4864 auio.uio_iovcnt = 1; 4865 auio.uio_offset = 0; 4866 auio.uio_resid = uap->nbytes; 4867 auio.uio_rw = UIO_WRITE; 4868 auio.uio_td = curthread; 4869 4870 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4871 &auio, nd.nl_cred); 4872 4873 vput(vp); 4874 nlookup_done(&nd); 4875 return (error); 4876 } 4877 4878 /* 4879 * Syscall to get a named extended attribute on a file or directory. 4880 */ 4881 int 4882 sys_extattr_get_file(struct extattr_get_file_args *uap) 4883 { 4884 char attrname[EXTATTR_MAXNAMELEN]; 4885 struct nlookupdata nd; 4886 struct uio auio; 4887 struct iovec aiov; 4888 struct vnode *vp; 4889 int error; 4890 4891 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4892 if (error) 4893 return (error); 4894 4895 vp = NULL; 4896 4897 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4898 if (error == 0) 4899 error = nlookup(&nd); 4900 if (error == 0) 4901 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 4902 if (error) { 4903 nlookup_done(&nd); 4904 return (error); 4905 } 4906 4907 bzero(&auio, sizeof(auio)); 4908 aiov.iov_base = uap->data; 4909 aiov.iov_len = uap->nbytes; 4910 auio.uio_iov = &aiov; 4911 auio.uio_iovcnt = 1; 4912 auio.uio_offset = 0; 4913 auio.uio_resid = uap->nbytes; 4914 auio.uio_rw = UIO_READ; 4915 auio.uio_td = curthread; 4916 4917 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 4918 &auio, nd.nl_cred); 4919 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 4920 4921 vput(vp); 4922 nlookup_done(&nd); 4923 return(error); 4924 } 4925 4926 /* 4927 * Syscall to delete a named extended attribute from a file or directory. 4928 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4929 */ 4930 int 4931 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4932 { 4933 char attrname[EXTATTR_MAXNAMELEN]; 4934 struct nlookupdata nd; 4935 struct vnode *vp; 4936 int error; 4937 4938 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4939 if (error) 4940 return(error); 4941 4942 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4943 if (error == 0) 4944 error = nlookup(&nd); 4945 if (error == 0) 4946 error = ncp_writechk(&nd.nl_nch); 4947 if (error == 0) { 4948 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4949 if (error == 0) { 4950 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 4951 attrname, NULL, nd.nl_cred); 4952 vput(vp); 4953 } 4954 } 4955 nlookup_done(&nd); 4956 return(error); 4957 } 4958 4959 /* 4960 * Determine if the mount is visible to the process. 4961 */ 4962 static int 4963 chroot_visible_mnt(struct mount *mp, struct proc *p) 4964 { 4965 struct nchandle nch; 4966 4967 /* 4968 * Traverse from the mount point upwards. If we hit the process 4969 * root then the mount point is visible to the process. 4970 */ 4971 nch = mp->mnt_ncmountpt; 4972 while (nch.ncp) { 4973 if (nch.mount == p->p_fd->fd_nrdir.mount && 4974 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4975 return(1); 4976 } 4977 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4978 nch = nch.mount->mnt_ncmounton; 4979 } else { 4980 nch.ncp = nch.ncp->nc_parent; 4981 } 4982 } 4983 4984 /* 4985 * If the mount point is not visible to the process, but the 4986 * process root is in a subdirectory of the mount, return 4987 * TRUE anyway. 4988 */ 4989 if (p->p_fd->fd_nrdir.mount == mp) 4990 return(1); 4991 4992 return(0); 4993 } 4994 4995