1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 #include <sys/mplock2.h> 71 72 #include <vm/vm.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 76 #include <machine/limits.h> 77 #include <machine/stdarg.h> 78 79 static void mount_warning(struct mount *mp, const char *ctl, ...) 80 __printflike(2, 3); 81 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 82 static int checkvp_chdir (struct vnode *vn, struct thread *td); 83 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 84 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 85 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 86 static int getutimes (struct timeval *, struct timespec *); 87 static int getutimens (const struct timespec *, struct timespec *, int *); 88 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 89 static int setfmode (struct vnode *, int); 90 static int setfflags (struct vnode *, int); 91 static int setutimes (struct vnode *, struct vattr *, 92 const struct timespec *, int); 93 static int usermount = 0; /* if 1, non-root can mount fs. */ 94 95 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 96 "Allow non-root users to mount filesystems"); 97 98 /* 99 * Virtual File System System Calls 100 */ 101 102 /* 103 * Mount a file system. 104 * 105 * mount_args(char *type, char *path, int flags, caddr_t data) 106 * 107 * MPALMOSTSAFE 108 */ 109 int 110 sys_mount(struct mount_args *uap) 111 { 112 struct thread *td = curthread; 113 struct vnode *vp; 114 struct nchandle nch; 115 struct mount *mp, *nullmp; 116 struct vfsconf *vfsp; 117 int error, flag = 0, flag2 = 0; 118 int hasmount; 119 struct vattr va; 120 struct nlookupdata nd; 121 char fstypename[MFSNAMELEN]; 122 struct ucred *cred; 123 124 cred = td->td_ucred; 125 if (jailed(cred)) { 126 error = EPERM; 127 goto done; 128 } 129 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 130 goto done; 131 132 /* 133 * Do not allow NFS export by non-root users. 134 */ 135 if (uap->flags & MNT_EXPORTED) { 136 error = priv_check(td, PRIV_ROOT); 137 if (error) 138 goto done; 139 } 140 /* 141 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 142 */ 143 if (priv_check(td, PRIV_ROOT)) 144 uap->flags |= MNT_NOSUID | MNT_NODEV; 145 146 /* 147 * Lookup the requested path and extract the nch and vnode. 148 */ 149 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 150 if (error == 0) { 151 if ((error = nlookup(&nd)) == 0) { 152 if (nd.nl_nch.ncp->nc_vp == NULL) 153 error = ENOENT; 154 } 155 } 156 if (error) { 157 nlookup_done(&nd); 158 goto done; 159 } 160 161 /* 162 * If the target filesystem is resolved via a nullfs mount, then 163 * nd.nl_nch.mount will be pointing to the nullfs mount structure 164 * instead of the target file system. We need it in case we are 165 * doing an update. 166 */ 167 nullmp = nd.nl_nch.mount; 168 169 /* 170 * Extract the locked+refd ncp and cleanup the nd structure 171 */ 172 nch = nd.nl_nch; 173 cache_zero(&nd.nl_nch); 174 nlookup_done(&nd); 175 176 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 177 (mp = cache_findmount(&nch)) != NULL) { 178 cache_dropmount(mp); 179 hasmount = 1; 180 } else { 181 hasmount = 0; 182 } 183 184 185 /* 186 * now we have the locked ref'd nch and unreferenced vnode. 187 */ 188 vp = nch.ncp->nc_vp; 189 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 190 cache_put(&nch); 191 goto done; 192 } 193 cache_unlock(&nch); 194 195 /* 196 * Extract the file system type. We need to know this early, to take 197 * appropriate actions if we are dealing with a nullfs. 198 */ 199 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 200 cache_drop(&nch); 201 vput(vp); 202 goto done; 203 } 204 205 /* 206 * Now we have an unlocked ref'd nch and a locked ref'd vp 207 */ 208 if (uap->flags & MNT_UPDATE) { 209 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 210 cache_drop(&nch); 211 vput(vp); 212 error = EINVAL; 213 goto done; 214 } 215 216 if (strncmp(fstypename, "null", 5) == 0) { 217 KKASSERT(nullmp); 218 mp = nullmp; 219 } else { 220 mp = vp->v_mount; 221 } 222 223 flag = mp->mnt_flag; 224 flag2 = mp->mnt_kern_flag; 225 /* 226 * We only allow the filesystem to be reloaded if it 227 * is currently mounted read-only. 228 */ 229 if ((uap->flags & MNT_RELOAD) && 230 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 231 cache_drop(&nch); 232 vput(vp); 233 error = EOPNOTSUPP; /* Needs translation */ 234 goto done; 235 } 236 /* 237 * Only root, or the user that did the original mount is 238 * permitted to update it. 239 */ 240 if (mp->mnt_stat.f_owner != cred->cr_uid && 241 (error = priv_check(td, PRIV_ROOT))) { 242 cache_drop(&nch); 243 vput(vp); 244 goto done; 245 } 246 if (vfs_busy(mp, LK_NOWAIT)) { 247 cache_drop(&nch); 248 vput(vp); 249 error = EBUSY; 250 goto done; 251 } 252 if (hasmount) { 253 cache_drop(&nch); 254 vfs_unbusy(mp); 255 vput(vp); 256 error = EBUSY; 257 goto done; 258 } 259 mp->mnt_flag |= 260 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 261 lwkt_gettoken(&mp->mnt_token); 262 vn_unlock(vp); 263 goto update; 264 } 265 266 /* 267 * If the user is not root, ensure that they own the directory 268 * onto which we are attempting to mount. 269 */ 270 if ((error = VOP_GETATTR(vp, &va)) || 271 (va.va_uid != cred->cr_uid && 272 (error = priv_check(td, PRIV_ROOT)))) { 273 cache_drop(&nch); 274 vput(vp); 275 goto done; 276 } 277 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 278 cache_drop(&nch); 279 vput(vp); 280 goto done; 281 } 282 if (vp->v_type != VDIR) { 283 cache_drop(&nch); 284 vput(vp); 285 error = ENOTDIR; 286 goto done; 287 } 288 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 289 cache_drop(&nch); 290 vput(vp); 291 error = EPERM; 292 goto done; 293 } 294 vfsp = vfsconf_find_by_name(fstypename); 295 if (vfsp == NULL) { 296 linker_file_t lf; 297 298 /* Only load modules for root (very important!) */ 299 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 300 cache_drop(&nch); 301 vput(vp); 302 goto done; 303 } 304 error = linker_load_file(fstypename, &lf); 305 if (error || lf == NULL) { 306 cache_drop(&nch); 307 vput(vp); 308 if (lf == NULL) 309 error = ENODEV; 310 goto done; 311 } 312 lf->userrefs++; 313 /* lookup again, see if the VFS was loaded */ 314 vfsp = vfsconf_find_by_name(fstypename); 315 if (vfsp == NULL) { 316 lf->userrefs--; 317 linker_file_unload(lf); 318 cache_drop(&nch); 319 vput(vp); 320 error = ENODEV; 321 goto done; 322 } 323 } 324 if (hasmount) { 325 cache_drop(&nch); 326 vput(vp); 327 error = EBUSY; 328 goto done; 329 } 330 331 /* 332 * Allocate and initialize the filesystem. 333 */ 334 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 335 mount_init(mp); 336 vfs_busy(mp, LK_NOWAIT); 337 mp->mnt_op = vfsp->vfc_vfsops; 338 mp->mnt_vfc = vfsp; 339 vfsp->vfc_refcount++; 340 mp->mnt_stat.f_type = vfsp->vfc_typenum; 341 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 342 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 343 mp->mnt_stat.f_owner = cred->cr_uid; 344 lwkt_gettoken(&mp->mnt_token); 345 vn_unlock(vp); 346 update: 347 /* 348 * (per-mount token acquired at this point) 349 * 350 * Set the mount level flags. 351 */ 352 if (uap->flags & MNT_RDONLY) 353 mp->mnt_flag |= MNT_RDONLY; 354 else if (mp->mnt_flag & MNT_RDONLY) 355 mp->mnt_kern_flag |= MNTK_WANTRDWR; 356 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 357 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 358 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 359 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 360 MNT_AUTOMOUNTED); 361 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 362 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 363 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 364 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 365 MNT_AUTOMOUNTED); 366 /* 367 * Mount the filesystem. 368 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 369 * get. 370 */ 371 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 372 if (mp->mnt_flag & MNT_UPDATE) { 373 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 374 mp->mnt_flag &= ~MNT_RDONLY; 375 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 376 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 377 if (error) { 378 mp->mnt_flag = flag; 379 mp->mnt_kern_flag = flag2; 380 } 381 lwkt_reltoken(&mp->mnt_token); 382 vfs_unbusy(mp); 383 vrele(vp); 384 cache_drop(&nch); 385 goto done; 386 } 387 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 388 389 /* 390 * Put the new filesystem on the mount list after root. The mount 391 * point gets its own mnt_ncmountpt (unless the VFS already set one 392 * up) which represents the root of the mount. The lookup code 393 * detects the mount point going forward and checks the root of 394 * the mount going backwards. 395 * 396 * It is not necessary to invalidate or purge the vnode underneath 397 * because elements under the mount will be given their own glue 398 * namecache record. 399 */ 400 if (!error) { 401 if (mp->mnt_ncmountpt.ncp == NULL) { 402 /* 403 * allocate, then unlock, but leave the ref intact 404 */ 405 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 406 cache_unlock(&mp->mnt_ncmountpt); 407 } 408 vn_unlock(vp); 409 mp->mnt_ncmounton = nch; /* inherits ref */ 410 cache_lock(&nch); 411 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 412 cache_unlock(&nch); 413 cache_ismounting(mp); 414 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 415 416 mountlist_insert(mp, MNTINS_LAST); 417 vn_unlock(vp); 418 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 419 error = vfs_allocate_syncvnode(mp); 420 lwkt_reltoken(&mp->mnt_token); 421 vfs_unbusy(mp); 422 error = VFS_START(mp, 0); 423 vrele(vp); 424 KNOTE(&fs_klist, VQ_MOUNT); 425 } else { 426 vn_syncer_thr_stop(mp); 427 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 428 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 429 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 430 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 431 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 432 mp->mnt_vfc->vfc_refcount--; 433 lwkt_reltoken(&mp->mnt_token); 434 vfs_unbusy(mp); 435 kfree(mp, M_MOUNT); 436 cache_drop(&nch); 437 vput(vp); 438 } 439 done: 440 return (error); 441 } 442 443 /* 444 * Scan all active processes to see if any of them have a current 445 * or root directory onto which the new filesystem has just been 446 * mounted. If so, replace them with the new mount point. 447 * 448 * Both old_nch and new_nch are ref'd on call but not locked. 449 * new_nch must be temporarily locked so it can be associated with the 450 * vnode representing the root of the mount point. 451 */ 452 struct checkdirs_info { 453 struct nchandle old_nch; 454 struct nchandle new_nch; 455 struct vnode *old_vp; 456 struct vnode *new_vp; 457 }; 458 459 static int checkdirs_callback(struct proc *p, void *data); 460 461 static void 462 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 463 { 464 struct checkdirs_info info; 465 struct vnode *olddp; 466 struct vnode *newdp; 467 struct mount *mp; 468 469 /* 470 * If the old mount point's vnode has a usecount of 1, it is not 471 * being held as a descriptor anywhere. 472 */ 473 olddp = old_nch->ncp->nc_vp; 474 if (olddp == NULL || VREFCNT(olddp) == 1) 475 return; 476 477 /* 478 * Force the root vnode of the new mount point to be resolved 479 * so we can update any matching processes. 480 */ 481 mp = new_nch->mount; 482 if (VFS_ROOT(mp, &newdp)) 483 panic("mount: lost mount"); 484 vn_unlock(newdp); 485 cache_lock(new_nch); 486 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 487 cache_setunresolved(new_nch); 488 cache_setvp(new_nch, newdp); 489 cache_unlock(new_nch); 490 491 /* 492 * Special handling of the root node 493 */ 494 if (rootvnode == olddp) { 495 vref(newdp); 496 vfs_cache_setroot(newdp, cache_hold(new_nch)); 497 } 498 499 /* 500 * Pass newdp separately so the callback does not have to access 501 * it via new_nch->ncp->nc_vp. 502 */ 503 info.old_nch = *old_nch; 504 info.new_nch = *new_nch; 505 info.new_vp = newdp; 506 allproc_scan(checkdirs_callback, &info); 507 vput(newdp); 508 } 509 510 /* 511 * NOTE: callback is not MP safe because the scanned process's filedesc 512 * structure can be ripped out from under us, amoung other things. 513 */ 514 static int 515 checkdirs_callback(struct proc *p, void *data) 516 { 517 struct checkdirs_info *info = data; 518 struct filedesc *fdp; 519 struct nchandle ncdrop1; 520 struct nchandle ncdrop2; 521 struct vnode *vprele1; 522 struct vnode *vprele2; 523 524 if ((fdp = p->p_fd) != NULL) { 525 cache_zero(&ncdrop1); 526 cache_zero(&ncdrop2); 527 vprele1 = NULL; 528 vprele2 = NULL; 529 530 /* 531 * MPUNSAFE - XXX fdp can be pulled out from under a 532 * foreign process. 533 * 534 * A shared filedesc is ok, we don't have to copy it 535 * because we are making this change globally. 536 */ 537 spin_lock(&fdp->fd_spin); 538 if (fdp->fd_ncdir.mount == info->old_nch.mount && 539 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 540 vprele1 = fdp->fd_cdir; 541 vref(info->new_vp); 542 fdp->fd_cdir = info->new_vp; 543 ncdrop1 = fdp->fd_ncdir; 544 cache_copy(&info->new_nch, &fdp->fd_ncdir); 545 } 546 if (fdp->fd_nrdir.mount == info->old_nch.mount && 547 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 548 vprele2 = fdp->fd_rdir; 549 vref(info->new_vp); 550 fdp->fd_rdir = info->new_vp; 551 ncdrop2 = fdp->fd_nrdir; 552 cache_copy(&info->new_nch, &fdp->fd_nrdir); 553 } 554 spin_unlock(&fdp->fd_spin); 555 if (ncdrop1.ncp) 556 cache_drop(&ncdrop1); 557 if (ncdrop2.ncp) 558 cache_drop(&ncdrop2); 559 if (vprele1) 560 vrele(vprele1); 561 if (vprele2) 562 vrele(vprele2); 563 } 564 return(0); 565 } 566 567 /* 568 * Unmount a file system. 569 * 570 * Note: unmount takes a path to the vnode mounted on as argument, 571 * not special file (as before). 572 * 573 * umount_args(char *path, int flags) 574 * 575 * MPALMOSTSAFE 576 */ 577 int 578 sys_unmount(struct unmount_args *uap) 579 { 580 struct thread *td = curthread; 581 struct proc *p __debugvar = td->td_proc; 582 struct mount *mp = NULL; 583 struct nlookupdata nd; 584 int error; 585 586 KKASSERT(p); 587 get_mplock(); 588 if (td->td_ucred->cr_prison != NULL) { 589 error = EPERM; 590 goto done; 591 } 592 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 593 goto done; 594 595 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 596 if (error == 0) 597 error = nlookup(&nd); 598 if (error) 599 goto out; 600 601 mp = nd.nl_nch.mount; 602 603 /* 604 * Only root, or the user that did the original mount is 605 * permitted to unmount this filesystem. 606 */ 607 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 608 (error = priv_check(td, PRIV_ROOT))) 609 goto out; 610 611 /* 612 * Don't allow unmounting the root file system. 613 */ 614 if (mp->mnt_flag & MNT_ROOTFS) { 615 error = EINVAL; 616 goto out; 617 } 618 619 /* 620 * Must be the root of the filesystem 621 */ 622 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 623 error = EINVAL; 624 goto out; 625 } 626 627 out: 628 nlookup_done(&nd); 629 if (error == 0) 630 error = dounmount(mp, uap->flags); 631 done: 632 rel_mplock(); 633 return (error); 634 } 635 636 /* 637 * Do the actual file system unmount. 638 */ 639 static int 640 dounmount_interlock(struct mount *mp) 641 { 642 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 643 return (EBUSY); 644 mp->mnt_kern_flag |= MNTK_UNMOUNT; 645 return(0); 646 } 647 648 static int 649 unmount_allproc_cb(struct proc *p, void *arg) 650 { 651 struct mount *mp; 652 653 if (p->p_textnch.ncp == NULL) 654 return 0; 655 656 mp = (struct mount *)arg; 657 if (p->p_textnch.mount == mp) 658 cache_drop(&p->p_textnch); 659 660 return 0; 661 } 662 663 int 664 dounmount(struct mount *mp, int flags) 665 { 666 struct namecache *ncp; 667 struct nchandle nch; 668 struct vnode *vp; 669 int error; 670 int async_flag; 671 int lflags; 672 int freeok = 1; 673 int retry; 674 675 lwkt_gettoken(&mp->mnt_token); 676 /* 677 * Exclusive access for unmounting purposes 678 */ 679 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 680 goto out; 681 682 /* 683 * Allow filesystems to detect that a forced unmount is in progress. 684 */ 685 if (flags & MNT_FORCE) 686 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 687 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 688 error = lockmgr(&mp->mnt_lock, lflags); 689 if (error) { 690 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 691 if (mp->mnt_kern_flag & MNTK_MWAIT) { 692 mp->mnt_kern_flag &= ~MNTK_MWAIT; 693 wakeup(mp); 694 } 695 goto out; 696 } 697 698 if (mp->mnt_flag & MNT_EXPUBLIC) 699 vfs_setpublicfs(NULL, NULL, NULL); 700 701 vfs_msync(mp, MNT_WAIT); 702 async_flag = mp->mnt_flag & MNT_ASYNC; 703 mp->mnt_flag &=~ MNT_ASYNC; 704 705 /* 706 * If this filesystem isn't aliasing other filesystems, 707 * try to invalidate any remaining namecache entries and 708 * check the count afterwords. 709 */ 710 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 711 cache_lock(&mp->mnt_ncmountpt); 712 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 713 cache_unlock(&mp->mnt_ncmountpt); 714 715 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 716 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 717 allproc_scan(&unmount_allproc_cb, mp); 718 } 719 720 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 721 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 722 723 if ((flags & MNT_FORCE) == 0) { 724 error = EBUSY; 725 mount_warning(mp, "Cannot unmount: " 726 "%d namecache " 727 "references still " 728 "present", 729 ncp->nc_refs - 1); 730 } else { 731 mount_warning(mp, "Forced unmount: " 732 "%d namecache " 733 "references still " 734 "present", 735 ncp->nc_refs - 1); 736 freeok = 0; 737 } 738 } 739 } 740 741 /* 742 * Decomission our special mnt_syncer vnode. This also stops 743 * the vnlru code. If we are unable to unmount we recommission 744 * the vnode. 745 * 746 * Then sync the filesystem. 747 */ 748 if ((vp = mp->mnt_syncer) != NULL) { 749 mp->mnt_syncer = NULL; 750 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 751 vrele(vp); 752 } 753 if ((mp->mnt_flag & MNT_RDONLY) == 0) 754 VFS_SYNC(mp, MNT_WAIT); 755 756 /* 757 * nchandle records ref the mount structure. Expect a count of 1 758 * (our mount->mnt_ncmountpt). 759 * 760 * Scans can get temporary refs on a mountpoint (thought really 761 * heavy duty stuff like cache_findmount() do not). 762 */ 763 for (retry = 0; retry < 10 && mp->mnt_refs != 1; ++retry) { 764 cache_unmounting(mp); 765 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 766 } 767 if (mp->mnt_refs != 1) { 768 if ((flags & MNT_FORCE) == 0) { 769 mount_warning(mp, "Cannot unmount: " 770 "%d mount refs still present", 771 mp->mnt_refs); 772 error = EBUSY; 773 } else { 774 mount_warning(mp, "Forced unmount: " 775 "%d mount refs still present", 776 mp->mnt_refs); 777 freeok = 0; 778 } 779 } 780 781 /* 782 * So far so good, sync the filesystem once more and 783 * call the VFS unmount code if the sync succeeds. 784 */ 785 if (error == 0) { 786 if (mp->mnt_flag & MNT_RDONLY) { 787 error = VFS_UNMOUNT(mp, flags); 788 } else { 789 error = VFS_SYNC(mp, MNT_WAIT); 790 if ((error == 0) || 791 (error == EOPNOTSUPP) || /* No sync */ 792 (flags & MNT_FORCE)) { 793 error = VFS_UNMOUNT(mp, flags); 794 } 795 } 796 } 797 798 /* 799 * If an error occurred we can still recover, restoring the 800 * syncer vnode and misc flags. 801 */ 802 if (error) { 803 if (mp->mnt_syncer == NULL) 804 vfs_allocate_syncvnode(mp); 805 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 806 mp->mnt_flag |= async_flag; 807 lockmgr(&mp->mnt_lock, LK_RELEASE); 808 if (mp->mnt_kern_flag & MNTK_MWAIT) { 809 mp->mnt_kern_flag &= ~MNTK_MWAIT; 810 wakeup(mp); 811 } 812 goto out; 813 } 814 /* 815 * Clean up any journals still associated with the mount after 816 * filesystem activity has ceased. 817 */ 818 journal_remove_all_journals(mp, 819 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 820 821 mountlist_remove(mp); 822 823 /* 824 * Remove any installed vnode ops here so the individual VFSs don't 825 * have to. 826 */ 827 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 828 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 829 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 830 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 831 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 832 833 if (mp->mnt_ncmountpt.ncp != NULL) { 834 nch = mp->mnt_ncmountpt; 835 cache_zero(&mp->mnt_ncmountpt); 836 cache_clrmountpt(&nch); 837 cache_drop(&nch); 838 } 839 if (mp->mnt_ncmounton.ncp != NULL) { 840 cache_unmounting(mp); 841 nch = mp->mnt_ncmounton; 842 cache_zero(&mp->mnt_ncmounton); 843 cache_clrmountpt(&nch); 844 cache_drop(&nch); 845 } 846 847 mp->mnt_vfc->vfc_refcount--; 848 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 849 panic("unmount: dangling vnode"); 850 lockmgr(&mp->mnt_lock, LK_RELEASE); 851 if (mp->mnt_kern_flag & MNTK_MWAIT) { 852 mp->mnt_kern_flag &= ~MNTK_MWAIT; 853 wakeup(mp); 854 } 855 856 /* 857 * If we reach here and freeok != 0 we must free the mount. 858 * If refs > 1 cycle and wait, just in case someone tried 859 * to busy the mount after we decided to do the unmount. 860 */ 861 if (freeok) { 862 while (mp->mnt_refs > 1) { 863 cache_unmounting(mp); 864 wakeup(mp); 865 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 866 } 867 lwkt_reltoken(&mp->mnt_token); 868 kfree(mp, M_MOUNT); 869 mp = NULL; 870 } 871 error = 0; 872 KNOTE(&fs_klist, VQ_UNMOUNT); 873 out: 874 if (mp) 875 lwkt_reltoken(&mp->mnt_token); 876 return (error); 877 } 878 879 static 880 void 881 mount_warning(struct mount *mp, const char *ctl, ...) 882 { 883 char *ptr; 884 char *buf; 885 __va_list va; 886 887 __va_start(va, ctl); 888 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 889 &ptr, &buf, 0) == 0) { 890 kprintf("unmount(%s): ", ptr); 891 kvprintf(ctl, va); 892 kprintf("\n"); 893 kfree(buf, M_TEMP); 894 } else { 895 kprintf("unmount(%p", mp); 896 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 897 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 898 kprintf("): "); 899 kvprintf(ctl, va); 900 kprintf("\n"); 901 } 902 __va_end(va); 903 } 904 905 /* 906 * Shim cache_fullpath() to handle the case where a process is chrooted into 907 * a subdirectory of a mount. In this case if the root mount matches the 908 * process root directory's mount we have to specify the process's root 909 * directory instead of the mount point, because the mount point might 910 * be above the root directory. 911 */ 912 static 913 int 914 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 915 { 916 struct nchandle *nch; 917 918 if (p && p->p_fd->fd_nrdir.mount == mp) 919 nch = &p->p_fd->fd_nrdir; 920 else 921 nch = &mp->mnt_ncmountpt; 922 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 923 } 924 925 /* 926 * Sync each mounted filesystem. 927 */ 928 929 #ifdef DEBUG 930 static int syncprt = 0; 931 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 932 #endif /* DEBUG */ 933 934 static int sync_callback(struct mount *mp, void *data); 935 936 int 937 sys_sync(struct sync_args *uap) 938 { 939 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 940 return (0); 941 } 942 943 static 944 int 945 sync_callback(struct mount *mp, void *data __unused) 946 { 947 int asyncflag; 948 949 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 950 asyncflag = mp->mnt_flag & MNT_ASYNC; 951 mp->mnt_flag &= ~MNT_ASYNC; 952 vfs_msync(mp, MNT_NOWAIT); 953 VFS_SYNC(mp, MNT_NOWAIT); 954 mp->mnt_flag |= asyncflag; 955 } 956 return(0); 957 } 958 959 /* XXX PRISON: could be per prison flag */ 960 static int prison_quotas; 961 #if 0 962 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 963 #endif 964 965 /* 966 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 967 * 968 * Change filesystem quotas. 969 * 970 * MPALMOSTSAFE 971 */ 972 int 973 sys_quotactl(struct quotactl_args *uap) 974 { 975 struct nlookupdata nd; 976 struct thread *td; 977 struct mount *mp; 978 int error; 979 980 get_mplock(); 981 td = curthread; 982 if (td->td_ucred->cr_prison && !prison_quotas) { 983 error = EPERM; 984 goto done; 985 } 986 987 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 988 if (error == 0) 989 error = nlookup(&nd); 990 if (error == 0) { 991 mp = nd.nl_nch.mount; 992 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 993 uap->arg, nd.nl_cred); 994 } 995 nlookup_done(&nd); 996 done: 997 rel_mplock(); 998 return (error); 999 } 1000 1001 /* 1002 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1003 * void *buf, int buflen) 1004 * 1005 * This function operates on a mount point and executes the specified 1006 * operation using the specified control data, and possibly returns data. 1007 * 1008 * The actual number of bytes stored in the result buffer is returned, 0 1009 * if none, otherwise an error is returned. 1010 * 1011 * MPALMOSTSAFE 1012 */ 1013 int 1014 sys_mountctl(struct mountctl_args *uap) 1015 { 1016 struct thread *td = curthread; 1017 struct proc *p = td->td_proc; 1018 struct file *fp; 1019 void *ctl = NULL; 1020 void *buf = NULL; 1021 char *path = NULL; 1022 int error; 1023 1024 /* 1025 * Sanity and permissions checks. We must be root. 1026 */ 1027 KKASSERT(p); 1028 if (td->td_ucred->cr_prison != NULL) 1029 return (EPERM); 1030 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1031 (error = priv_check(td, PRIV_ROOT)) != 0) 1032 return (error); 1033 1034 /* 1035 * Argument length checks 1036 */ 1037 if (uap->ctllen < 0 || uap->ctllen > 1024) 1038 return (EINVAL); 1039 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1040 return (EINVAL); 1041 if (uap->path == NULL) 1042 return (EINVAL); 1043 1044 /* 1045 * Allocate the necessary buffers and copyin data 1046 */ 1047 path = objcache_get(namei_oc, M_WAITOK); 1048 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1049 if (error) 1050 goto done; 1051 1052 if (uap->ctllen) { 1053 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1054 error = copyin(uap->ctl, ctl, uap->ctllen); 1055 if (error) 1056 goto done; 1057 } 1058 if (uap->buflen) 1059 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1060 1061 /* 1062 * Validate the descriptor 1063 */ 1064 if (uap->fd >= 0) { 1065 fp = holdfp(p->p_fd, uap->fd, -1); 1066 if (fp == NULL) { 1067 error = EBADF; 1068 goto done; 1069 } 1070 } else { 1071 fp = NULL; 1072 } 1073 1074 /* 1075 * Execute the internal kernel function and clean up. 1076 */ 1077 get_mplock(); 1078 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 1079 rel_mplock(); 1080 if (fp) 1081 fdrop(fp); 1082 if (error == 0 && uap->sysmsg_result > 0) 1083 error = copyout(buf, uap->buf, uap->sysmsg_result); 1084 done: 1085 if (path) 1086 objcache_put(namei_oc, path); 1087 if (ctl) 1088 kfree(ctl, M_TEMP); 1089 if (buf) 1090 kfree(buf, M_TEMP); 1091 return (error); 1092 } 1093 1094 /* 1095 * Execute a mount control operation by resolving the path to a mount point 1096 * and calling vop_mountctl(). 1097 * 1098 * Use the mount point from the nch instead of the vnode so nullfs mounts 1099 * can properly spike the VOP. 1100 */ 1101 int 1102 kern_mountctl(const char *path, int op, struct file *fp, 1103 const void *ctl, int ctllen, 1104 void *buf, int buflen, int *res) 1105 { 1106 struct vnode *vp; 1107 struct nlookupdata nd; 1108 struct nchandle nch; 1109 struct mount *mp; 1110 int error; 1111 1112 *res = 0; 1113 vp = NULL; 1114 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1115 if (error) 1116 return (error); 1117 error = nlookup(&nd); 1118 if (error) { 1119 nlookup_done(&nd); 1120 return (error); 1121 } 1122 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1123 if (error) { 1124 nlookup_done(&nd); 1125 return (error); 1126 } 1127 1128 /* 1129 * Yes, all this is needed to use the nch.mount below, because 1130 * we must maintain a ref on the mount to avoid ripouts (e.g. 1131 * due to heavy mount/unmount use by synth or poudriere). 1132 */ 1133 nch = nd.nl_nch; 1134 cache_zero(&nd.nl_nch); 1135 cache_unlock(&nch); 1136 nlookup_done(&nd); 1137 vn_unlock(vp); 1138 1139 mp = nch.mount; 1140 1141 /* 1142 * Must be the root of the filesystem 1143 */ 1144 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1145 cache_drop(&nch); 1146 vrele(vp); 1147 return (EINVAL); 1148 } 1149 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1150 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1151 path); 1152 cache_drop(&nch); 1153 vrele(vp); 1154 return (EINVAL); 1155 } 1156 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1157 buf, buflen, res); 1158 vrele(vp); 1159 cache_drop(&nch); 1160 1161 return (error); 1162 } 1163 1164 int 1165 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1166 { 1167 struct thread *td = curthread; 1168 struct proc *p = td->td_proc; 1169 struct mount *mp; 1170 struct statfs *sp; 1171 char *fullpath, *freepath; 1172 int error; 1173 1174 if ((error = nlookup(nd)) != 0) 1175 return (error); 1176 mp = nd->nl_nch.mount; 1177 sp = &mp->mnt_stat; 1178 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1179 return (error); 1180 1181 error = mount_path(p, mp, &fullpath, &freepath); 1182 if (error) 1183 return(error); 1184 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1185 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1186 kfree(freepath, M_TEMP); 1187 1188 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1189 bcopy(sp, buf, sizeof(*buf)); 1190 /* Only root should have access to the fsid's. */ 1191 if (priv_check(td, PRIV_ROOT)) 1192 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1193 return (0); 1194 } 1195 1196 /* 1197 * statfs_args(char *path, struct statfs *buf) 1198 * 1199 * Get filesystem statistics. 1200 */ 1201 int 1202 sys_statfs(struct statfs_args *uap) 1203 { 1204 struct nlookupdata nd; 1205 struct statfs buf; 1206 int error; 1207 1208 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1209 if (error == 0) 1210 error = kern_statfs(&nd, &buf); 1211 nlookup_done(&nd); 1212 if (error == 0) 1213 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1214 return (error); 1215 } 1216 1217 int 1218 kern_fstatfs(int fd, struct statfs *buf) 1219 { 1220 struct thread *td = curthread; 1221 struct proc *p = td->td_proc; 1222 struct file *fp; 1223 struct mount *mp; 1224 struct statfs *sp; 1225 char *fullpath, *freepath; 1226 int error; 1227 1228 KKASSERT(p); 1229 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1230 return (error); 1231 1232 /* 1233 * Try to use mount info from any overlays rather than the 1234 * mount info for the underlying vnode, otherwise we will 1235 * fail when operating on null-mounted paths inside a chroot. 1236 */ 1237 if ((mp = fp->f_nchandle.mount) == NULL) 1238 mp = ((struct vnode *)fp->f_data)->v_mount; 1239 if (mp == NULL) { 1240 error = EBADF; 1241 goto done; 1242 } 1243 if (fp->f_cred == NULL) { 1244 error = EINVAL; 1245 goto done; 1246 } 1247 sp = &mp->mnt_stat; 1248 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1249 goto done; 1250 1251 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1252 goto done; 1253 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1254 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1255 kfree(freepath, M_TEMP); 1256 1257 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1258 bcopy(sp, buf, sizeof(*buf)); 1259 1260 /* Only root should have access to the fsid's. */ 1261 if (priv_check(td, PRIV_ROOT)) 1262 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1263 error = 0; 1264 done: 1265 fdrop(fp); 1266 return (error); 1267 } 1268 1269 /* 1270 * fstatfs_args(int fd, struct statfs *buf) 1271 * 1272 * Get filesystem statistics. 1273 */ 1274 int 1275 sys_fstatfs(struct fstatfs_args *uap) 1276 { 1277 struct statfs buf; 1278 int error; 1279 1280 error = kern_fstatfs(uap->fd, &buf); 1281 1282 if (error == 0) 1283 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1284 return (error); 1285 } 1286 1287 int 1288 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1289 { 1290 struct mount *mp; 1291 struct statvfs *sp; 1292 int error; 1293 1294 if ((error = nlookup(nd)) != 0) 1295 return (error); 1296 mp = nd->nl_nch.mount; 1297 sp = &mp->mnt_vstat; 1298 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1299 return (error); 1300 1301 sp->f_flag = 0; 1302 if (mp->mnt_flag & MNT_RDONLY) 1303 sp->f_flag |= ST_RDONLY; 1304 if (mp->mnt_flag & MNT_NOSUID) 1305 sp->f_flag |= ST_NOSUID; 1306 bcopy(sp, buf, sizeof(*buf)); 1307 return (0); 1308 } 1309 1310 /* 1311 * statfs_args(char *path, struct statfs *buf) 1312 * 1313 * Get filesystem statistics. 1314 */ 1315 int 1316 sys_statvfs(struct statvfs_args *uap) 1317 { 1318 struct nlookupdata nd; 1319 struct statvfs buf; 1320 int error; 1321 1322 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1323 if (error == 0) 1324 error = kern_statvfs(&nd, &buf); 1325 nlookup_done(&nd); 1326 if (error == 0) 1327 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1328 return (error); 1329 } 1330 1331 int 1332 kern_fstatvfs(int fd, struct statvfs *buf) 1333 { 1334 struct thread *td = curthread; 1335 struct proc *p = td->td_proc; 1336 struct file *fp; 1337 struct mount *mp; 1338 struct statvfs *sp; 1339 int error; 1340 1341 KKASSERT(p); 1342 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1343 return (error); 1344 if ((mp = fp->f_nchandle.mount) == NULL) 1345 mp = ((struct vnode *)fp->f_data)->v_mount; 1346 if (mp == NULL) { 1347 error = EBADF; 1348 goto done; 1349 } 1350 if (fp->f_cred == NULL) { 1351 error = EINVAL; 1352 goto done; 1353 } 1354 sp = &mp->mnt_vstat; 1355 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1356 goto done; 1357 1358 sp->f_flag = 0; 1359 if (mp->mnt_flag & MNT_RDONLY) 1360 sp->f_flag |= ST_RDONLY; 1361 if (mp->mnt_flag & MNT_NOSUID) 1362 sp->f_flag |= ST_NOSUID; 1363 1364 bcopy(sp, buf, sizeof(*buf)); 1365 error = 0; 1366 done: 1367 fdrop(fp); 1368 return (error); 1369 } 1370 1371 /* 1372 * fstatfs_args(int fd, struct statfs *buf) 1373 * 1374 * Get filesystem statistics. 1375 */ 1376 int 1377 sys_fstatvfs(struct fstatvfs_args *uap) 1378 { 1379 struct statvfs buf; 1380 int error; 1381 1382 error = kern_fstatvfs(uap->fd, &buf); 1383 1384 if (error == 0) 1385 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1386 return (error); 1387 } 1388 1389 /* 1390 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1391 * 1392 * Get statistics on all filesystems. 1393 */ 1394 1395 struct getfsstat_info { 1396 struct statfs *sfsp; 1397 long count; 1398 long maxcount; 1399 int error; 1400 int flags; 1401 struct thread *td; 1402 }; 1403 1404 static int getfsstat_callback(struct mount *, void *); 1405 1406 int 1407 sys_getfsstat(struct getfsstat_args *uap) 1408 { 1409 struct thread *td = curthread; 1410 struct getfsstat_info info; 1411 1412 bzero(&info, sizeof(info)); 1413 1414 info.maxcount = uap->bufsize / sizeof(struct statfs); 1415 info.sfsp = uap->buf; 1416 info.count = 0; 1417 info.flags = uap->flags; 1418 info.td = td; 1419 1420 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1421 if (info.sfsp && info.count > info.maxcount) 1422 uap->sysmsg_result = info.maxcount; 1423 else 1424 uap->sysmsg_result = info.count; 1425 return (info.error); 1426 } 1427 1428 static int 1429 getfsstat_callback(struct mount *mp, void *data) 1430 { 1431 struct getfsstat_info *info = data; 1432 struct statfs *sp; 1433 char *freepath; 1434 char *fullpath; 1435 int error; 1436 1437 if (info->sfsp && info->count < info->maxcount) { 1438 if (info->td->td_proc && 1439 !chroot_visible_mnt(mp, info->td->td_proc)) { 1440 return(0); 1441 } 1442 sp = &mp->mnt_stat; 1443 1444 /* 1445 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1446 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1447 * overrides MNT_WAIT. 1448 */ 1449 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1450 (info->flags & MNT_WAIT)) && 1451 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1452 return(0); 1453 } 1454 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1455 1456 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1457 if (error) { 1458 info->error = error; 1459 return(-1); 1460 } 1461 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1462 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1463 kfree(freepath, M_TEMP); 1464 1465 error = copyout(sp, info->sfsp, sizeof(*sp)); 1466 if (error) { 1467 info->error = error; 1468 return (-1); 1469 } 1470 ++info->sfsp; 1471 } 1472 info->count++; 1473 return(0); 1474 } 1475 1476 /* 1477 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1478 long bufsize, int flags) 1479 * 1480 * Get statistics on all filesystems. 1481 */ 1482 1483 struct getvfsstat_info { 1484 struct statfs *sfsp; 1485 struct statvfs *vsfsp; 1486 long count; 1487 long maxcount; 1488 int error; 1489 int flags; 1490 struct thread *td; 1491 }; 1492 1493 static int getvfsstat_callback(struct mount *, void *); 1494 1495 int 1496 sys_getvfsstat(struct getvfsstat_args *uap) 1497 { 1498 struct thread *td = curthread; 1499 struct getvfsstat_info info; 1500 1501 bzero(&info, sizeof(info)); 1502 1503 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1504 info.sfsp = uap->buf; 1505 info.vsfsp = uap->vbuf; 1506 info.count = 0; 1507 info.flags = uap->flags; 1508 info.td = td; 1509 1510 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1511 if (info.vsfsp && info.count > info.maxcount) 1512 uap->sysmsg_result = info.maxcount; 1513 else 1514 uap->sysmsg_result = info.count; 1515 return (info.error); 1516 } 1517 1518 static int 1519 getvfsstat_callback(struct mount *mp, void *data) 1520 { 1521 struct getvfsstat_info *info = data; 1522 struct statfs *sp; 1523 struct statvfs *vsp; 1524 char *freepath; 1525 char *fullpath; 1526 int error; 1527 1528 if (info->vsfsp && info->count < info->maxcount) { 1529 if (info->td->td_proc && 1530 !chroot_visible_mnt(mp, info->td->td_proc)) { 1531 return(0); 1532 } 1533 sp = &mp->mnt_stat; 1534 vsp = &mp->mnt_vstat; 1535 1536 /* 1537 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1538 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1539 * overrides MNT_WAIT. 1540 */ 1541 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1542 (info->flags & MNT_WAIT)) && 1543 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1544 return(0); 1545 } 1546 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1547 1548 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1549 (info->flags & MNT_WAIT)) && 1550 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1551 return(0); 1552 } 1553 vsp->f_flag = 0; 1554 if (mp->mnt_flag & MNT_RDONLY) 1555 vsp->f_flag |= ST_RDONLY; 1556 if (mp->mnt_flag & MNT_NOSUID) 1557 vsp->f_flag |= ST_NOSUID; 1558 1559 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1560 if (error) { 1561 info->error = error; 1562 return(-1); 1563 } 1564 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1565 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1566 kfree(freepath, M_TEMP); 1567 1568 error = copyout(sp, info->sfsp, sizeof(*sp)); 1569 if (error == 0) 1570 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1571 if (error) { 1572 info->error = error; 1573 return (-1); 1574 } 1575 ++info->sfsp; 1576 ++info->vsfsp; 1577 } 1578 info->count++; 1579 return(0); 1580 } 1581 1582 1583 /* 1584 * fchdir_args(int fd) 1585 * 1586 * Change current working directory to a given file descriptor. 1587 */ 1588 int 1589 sys_fchdir(struct fchdir_args *uap) 1590 { 1591 struct thread *td = curthread; 1592 struct proc *p = td->td_proc; 1593 struct filedesc *fdp = p->p_fd; 1594 struct vnode *vp, *ovp; 1595 struct mount *mp; 1596 struct file *fp; 1597 struct nchandle nch, onch, tnch; 1598 int error; 1599 1600 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1601 return (error); 1602 lwkt_gettoken(&p->p_token); 1603 vp = (struct vnode *)fp->f_data; 1604 vref(vp); 1605 vn_lock(vp, LK_SHARED | LK_RETRY); 1606 if (fp->f_nchandle.ncp == NULL) 1607 error = ENOTDIR; 1608 else 1609 error = checkvp_chdir(vp, td); 1610 if (error) { 1611 vput(vp); 1612 goto done; 1613 } 1614 cache_copy(&fp->f_nchandle, &nch); 1615 1616 /* 1617 * If the ncp has become a mount point, traverse through 1618 * the mount point. 1619 */ 1620 1621 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1622 (mp = cache_findmount(&nch)) != NULL 1623 ) { 1624 error = nlookup_mp(mp, &tnch); 1625 if (error == 0) { 1626 cache_unlock(&tnch); /* leave ref intact */ 1627 vput(vp); 1628 vp = tnch.ncp->nc_vp; 1629 error = vget(vp, LK_SHARED); 1630 KKASSERT(error == 0); 1631 cache_drop(&nch); 1632 nch = tnch; 1633 } 1634 cache_dropmount(mp); 1635 } 1636 if (error == 0) { 1637 ovp = fdp->fd_cdir; 1638 onch = fdp->fd_ncdir; 1639 vn_unlock(vp); /* leave ref intact */ 1640 fdp->fd_cdir = vp; 1641 fdp->fd_ncdir = nch; 1642 cache_drop(&onch); 1643 vrele(ovp); 1644 } else { 1645 cache_drop(&nch); 1646 vput(vp); 1647 } 1648 fdrop(fp); 1649 done: 1650 lwkt_reltoken(&p->p_token); 1651 return (error); 1652 } 1653 1654 int 1655 kern_chdir(struct nlookupdata *nd) 1656 { 1657 struct thread *td = curthread; 1658 struct proc *p = td->td_proc; 1659 struct filedesc *fdp = p->p_fd; 1660 struct vnode *vp, *ovp; 1661 struct nchandle onch; 1662 int error; 1663 1664 nd->nl_flags |= NLC_SHAREDLOCK; 1665 if ((error = nlookup(nd)) != 0) 1666 return (error); 1667 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1668 return (ENOENT); 1669 if ((error = vget(vp, LK_SHARED)) != 0) 1670 return (error); 1671 1672 lwkt_gettoken(&p->p_token); 1673 error = checkvp_chdir(vp, td); 1674 vn_unlock(vp); 1675 if (error == 0) { 1676 ovp = fdp->fd_cdir; 1677 onch = fdp->fd_ncdir; 1678 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1679 fdp->fd_ncdir = nd->nl_nch; 1680 fdp->fd_cdir = vp; 1681 cache_drop(&onch); 1682 vrele(ovp); 1683 cache_zero(&nd->nl_nch); 1684 } else { 1685 vrele(vp); 1686 } 1687 lwkt_reltoken(&p->p_token); 1688 return (error); 1689 } 1690 1691 /* 1692 * chdir_args(char *path) 1693 * 1694 * Change current working directory (``.''). 1695 */ 1696 int 1697 sys_chdir(struct chdir_args *uap) 1698 { 1699 struct nlookupdata nd; 1700 int error; 1701 1702 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1703 if (error == 0) 1704 error = kern_chdir(&nd); 1705 nlookup_done(&nd); 1706 return (error); 1707 } 1708 1709 /* 1710 * Helper function for raised chroot(2) security function: Refuse if 1711 * any filedescriptors are open directories. 1712 */ 1713 static int 1714 chroot_refuse_vdir_fds(struct filedesc *fdp) 1715 { 1716 struct vnode *vp; 1717 struct file *fp; 1718 int error; 1719 int fd; 1720 1721 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1722 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1723 continue; 1724 vp = (struct vnode *)fp->f_data; 1725 if (vp->v_type != VDIR) { 1726 fdrop(fp); 1727 continue; 1728 } 1729 fdrop(fp); 1730 return(EPERM); 1731 } 1732 return (0); 1733 } 1734 1735 /* 1736 * This sysctl determines if we will allow a process to chroot(2) if it 1737 * has a directory open: 1738 * 0: disallowed for all processes. 1739 * 1: allowed for processes that were not already chroot(2)'ed. 1740 * 2: allowed for all processes. 1741 */ 1742 1743 static int chroot_allow_open_directories = 1; 1744 1745 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1746 &chroot_allow_open_directories, 0, ""); 1747 1748 /* 1749 * chroot to the specified namecache entry. We obtain the vp from the 1750 * namecache data. The passed ncp must be locked and referenced and will 1751 * remain locked and referenced on return. 1752 */ 1753 int 1754 kern_chroot(struct nchandle *nch) 1755 { 1756 struct thread *td = curthread; 1757 struct proc *p = td->td_proc; 1758 struct filedesc *fdp = p->p_fd; 1759 struct vnode *vp; 1760 int error; 1761 1762 /* 1763 * Only privileged user can chroot 1764 */ 1765 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1766 if (error) 1767 return (error); 1768 1769 /* 1770 * Disallow open directory descriptors (fchdir() breakouts). 1771 */ 1772 if (chroot_allow_open_directories == 0 || 1773 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1774 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1775 return (error); 1776 } 1777 if ((vp = nch->ncp->nc_vp) == NULL) 1778 return (ENOENT); 1779 1780 if ((error = vget(vp, LK_SHARED)) != 0) 1781 return (error); 1782 1783 /* 1784 * Check the validity of vp as a directory to change to and 1785 * associate it with rdir/jdir. 1786 */ 1787 error = checkvp_chdir(vp, td); 1788 vn_unlock(vp); /* leave reference intact */ 1789 if (error == 0) { 1790 vrele(fdp->fd_rdir); 1791 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1792 cache_drop(&fdp->fd_nrdir); 1793 cache_copy(nch, &fdp->fd_nrdir); 1794 if (fdp->fd_jdir == NULL) { 1795 fdp->fd_jdir = vp; 1796 vref(fdp->fd_jdir); 1797 cache_copy(nch, &fdp->fd_njdir); 1798 } 1799 } else { 1800 vrele(vp); 1801 } 1802 return (error); 1803 } 1804 1805 /* 1806 * chroot_args(char *path) 1807 * 1808 * Change notion of root (``/'') directory. 1809 */ 1810 int 1811 sys_chroot(struct chroot_args *uap) 1812 { 1813 struct thread *td __debugvar = curthread; 1814 struct nlookupdata nd; 1815 int error; 1816 1817 KKASSERT(td->td_proc); 1818 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1819 if (error == 0) { 1820 nd.nl_flags |= NLC_EXEC; 1821 error = nlookup(&nd); 1822 if (error == 0) 1823 error = kern_chroot(&nd.nl_nch); 1824 } 1825 nlookup_done(&nd); 1826 return(error); 1827 } 1828 1829 int 1830 sys_chroot_kernel(struct chroot_kernel_args *uap) 1831 { 1832 struct thread *td = curthread; 1833 struct nlookupdata nd; 1834 struct nchandle *nch; 1835 struct vnode *vp; 1836 int error; 1837 1838 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1839 if (error) 1840 goto error_nond; 1841 1842 error = nlookup(&nd); 1843 if (error) 1844 goto error_out; 1845 1846 nch = &nd.nl_nch; 1847 1848 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1849 if (error) 1850 goto error_out; 1851 1852 if ((vp = nch->ncp->nc_vp) == NULL) { 1853 error = ENOENT; 1854 goto error_out; 1855 } 1856 1857 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1858 goto error_out; 1859 1860 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1861 get_mplock(); 1862 vfs_cache_setroot(vp, cache_hold(nch)); 1863 rel_mplock(); 1864 1865 error_out: 1866 nlookup_done(&nd); 1867 error_nond: 1868 return(error); 1869 } 1870 1871 /* 1872 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1873 * determine whether it is legal to chdir to the vnode. The vnode's state 1874 * is not changed by this call. 1875 */ 1876 static int 1877 checkvp_chdir(struct vnode *vp, struct thread *td) 1878 { 1879 int error; 1880 1881 if (vp->v_type != VDIR) 1882 error = ENOTDIR; 1883 else 1884 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1885 return (error); 1886 } 1887 1888 int 1889 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1890 { 1891 struct thread *td = curthread; 1892 struct proc *p = td->td_proc; 1893 struct lwp *lp = td->td_lwp; 1894 struct filedesc *fdp = p->p_fd; 1895 int cmode, flags; 1896 struct file *nfp; 1897 struct file *fp; 1898 struct vnode *vp; 1899 int type, indx, error = 0; 1900 struct flock lf; 1901 1902 if ((oflags & O_ACCMODE) == O_ACCMODE) 1903 return (EINVAL); 1904 flags = FFLAGS(oflags); 1905 error = falloc(lp, &nfp, NULL); 1906 if (error) 1907 return (error); 1908 fp = nfp; 1909 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1910 1911 /* 1912 * XXX p_dupfd is a real mess. It allows a device to return a 1913 * file descriptor to be duplicated rather then doing the open 1914 * itself. 1915 */ 1916 lp->lwp_dupfd = -1; 1917 1918 /* 1919 * Call vn_open() to do the lookup and assign the vnode to the 1920 * file pointer. vn_open() does not change the ref count on fp 1921 * and the vnode, on success, will be inherited by the file pointer 1922 * and unlocked. 1923 * 1924 * Request a shared lock on the vnode if possible. 1925 */ 1926 nd->nl_flags |= NLC_LOCKVP; 1927 if ((flags & (O_CREAT|O_TRUNC)) == 0) 1928 nd->nl_flags |= NLC_SHAREDLOCK; 1929 1930 error = vn_open(nd, fp, flags, cmode); 1931 nlookup_done(nd); 1932 1933 if (error) { 1934 /* 1935 * handle special fdopen() case. bleh. dupfdopen() is 1936 * responsible for dropping the old contents of ofiles[indx] 1937 * if it succeeds. 1938 * 1939 * Note that fsetfd() will add a ref to fp which represents 1940 * the fd_files[] assignment. We must still drop our 1941 * reference. 1942 */ 1943 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1944 if (fdalloc(p, 0, &indx) == 0) { 1945 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error); 1946 if (error == 0) { 1947 *res = indx; 1948 fdrop(fp); /* our ref */ 1949 return (0); 1950 } 1951 fsetfd(fdp, NULL, indx); 1952 } 1953 } 1954 fdrop(fp); /* our ref */ 1955 if (error == ERESTART) 1956 error = EINTR; 1957 return (error); 1958 } 1959 1960 /* 1961 * ref the vnode for ourselves so it can't be ripped out from under 1962 * is. XXX need an ND flag to request that the vnode be returned 1963 * anyway. 1964 * 1965 * Reserve a file descriptor but do not assign it until the open 1966 * succeeds. 1967 */ 1968 vp = (struct vnode *)fp->f_data; 1969 vref(vp); 1970 if ((error = fdalloc(p, 0, &indx)) != 0) { 1971 fdrop(fp); 1972 vrele(vp); 1973 return (error); 1974 } 1975 1976 /* 1977 * If no error occurs the vp will have been assigned to the file 1978 * pointer. 1979 */ 1980 lp->lwp_dupfd = 0; 1981 1982 if (flags & (O_EXLOCK | O_SHLOCK)) { 1983 lf.l_whence = SEEK_SET; 1984 lf.l_start = 0; 1985 lf.l_len = 0; 1986 if (flags & O_EXLOCK) 1987 lf.l_type = F_WRLCK; 1988 else 1989 lf.l_type = F_RDLCK; 1990 if (flags & FNONBLOCK) 1991 type = 0; 1992 else 1993 type = F_WAIT; 1994 1995 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1996 /* 1997 * lock request failed. Clean up the reserved 1998 * descriptor. 1999 */ 2000 vrele(vp); 2001 fsetfd(fdp, NULL, indx); 2002 fdrop(fp); 2003 return (error); 2004 } 2005 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2006 } 2007 #if 0 2008 /* 2009 * Assert that all regular file vnodes were created with a object. 2010 */ 2011 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 2012 ("open: regular file has no backing object after vn_open")); 2013 #endif 2014 2015 vrele(vp); 2016 2017 /* 2018 * release our private reference, leaving the one associated with the 2019 * descriptor table intact. 2020 */ 2021 if (oflags & O_CLOEXEC) 2022 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2023 fsetfd(fdp, fp, indx); 2024 fdrop(fp); 2025 *res = indx; 2026 return (error); 2027 } 2028 2029 /* 2030 * open_args(char *path, int flags, int mode) 2031 * 2032 * Check permissions, allocate an open file structure, 2033 * and call the device open routine if any. 2034 */ 2035 int 2036 sys_open(struct open_args *uap) 2037 { 2038 struct nlookupdata nd; 2039 int error; 2040 2041 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2042 if (error == 0) { 2043 error = kern_open(&nd, uap->flags, 2044 uap->mode, &uap->sysmsg_result); 2045 } 2046 nlookup_done(&nd); 2047 return (error); 2048 } 2049 2050 /* 2051 * openat_args(int fd, char *path, int flags, int mode) 2052 */ 2053 int 2054 sys_openat(struct openat_args *uap) 2055 { 2056 struct nlookupdata nd; 2057 int error; 2058 struct file *fp; 2059 2060 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2061 if (error == 0) { 2062 error = kern_open(&nd, uap->flags, uap->mode, 2063 &uap->sysmsg_result); 2064 } 2065 nlookup_done_at(&nd, fp); 2066 return (error); 2067 } 2068 2069 int 2070 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2071 { 2072 struct thread *td = curthread; 2073 struct proc *p = td->td_proc; 2074 struct vnode *vp; 2075 struct vattr vattr; 2076 int error; 2077 int whiteout = 0; 2078 2079 KKASSERT(p); 2080 2081 VATTR_NULL(&vattr); 2082 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2083 vattr.va_rmajor = rmajor; 2084 vattr.va_rminor = rminor; 2085 2086 switch (mode & S_IFMT) { 2087 case S_IFMT: /* used by badsect to flag bad sectors */ 2088 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2089 vattr.va_type = VBAD; 2090 break; 2091 case S_IFCHR: 2092 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2093 vattr.va_type = VCHR; 2094 break; 2095 case S_IFBLK: 2096 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2097 vattr.va_type = VBLK; 2098 break; 2099 case S_IFWHT: 2100 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2101 whiteout = 1; 2102 break; 2103 case S_IFDIR: /* special directories support for HAMMER */ 2104 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2105 vattr.va_type = VDIR; 2106 break; 2107 default: 2108 error = EINVAL; 2109 break; 2110 } 2111 2112 if (error) 2113 return (error); 2114 2115 bwillinode(1); 2116 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2117 if ((error = nlookup(nd)) != 0) 2118 return (error); 2119 if (nd->nl_nch.ncp->nc_vp) 2120 return (EEXIST); 2121 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2122 return (error); 2123 2124 if (whiteout) { 2125 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2126 nd->nl_cred, NAMEI_CREATE); 2127 } else { 2128 vp = NULL; 2129 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2130 &vp, nd->nl_cred, &vattr); 2131 if (error == 0) 2132 vput(vp); 2133 } 2134 return (error); 2135 } 2136 2137 /* 2138 * mknod_args(char *path, int mode, int dev) 2139 * 2140 * Create a special file. 2141 */ 2142 int 2143 sys_mknod(struct mknod_args *uap) 2144 { 2145 struct nlookupdata nd; 2146 int error; 2147 2148 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2149 if (error == 0) { 2150 error = kern_mknod(&nd, uap->mode, 2151 umajor(uap->dev), uminor(uap->dev)); 2152 } 2153 nlookup_done(&nd); 2154 return (error); 2155 } 2156 2157 /* 2158 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2159 * 2160 * Create a special file. The path is relative to the directory associated 2161 * with fd. 2162 */ 2163 int 2164 sys_mknodat(struct mknodat_args *uap) 2165 { 2166 struct nlookupdata nd; 2167 struct file *fp; 2168 int error; 2169 2170 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2171 if (error == 0) { 2172 error = kern_mknod(&nd, uap->mode, 2173 umajor(uap->dev), uminor(uap->dev)); 2174 } 2175 nlookup_done_at(&nd, fp); 2176 return (error); 2177 } 2178 2179 int 2180 kern_mkfifo(struct nlookupdata *nd, int mode) 2181 { 2182 struct thread *td = curthread; 2183 struct proc *p = td->td_proc; 2184 struct vattr vattr; 2185 struct vnode *vp; 2186 int error; 2187 2188 bwillinode(1); 2189 2190 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2191 if ((error = nlookup(nd)) != 0) 2192 return (error); 2193 if (nd->nl_nch.ncp->nc_vp) 2194 return (EEXIST); 2195 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2196 return (error); 2197 2198 VATTR_NULL(&vattr); 2199 vattr.va_type = VFIFO; 2200 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2201 vp = NULL; 2202 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2203 if (error == 0) 2204 vput(vp); 2205 return (error); 2206 } 2207 2208 /* 2209 * mkfifo_args(char *path, int mode) 2210 * 2211 * Create a named pipe. 2212 */ 2213 int 2214 sys_mkfifo(struct mkfifo_args *uap) 2215 { 2216 struct nlookupdata nd; 2217 int error; 2218 2219 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2220 if (error == 0) 2221 error = kern_mkfifo(&nd, uap->mode); 2222 nlookup_done(&nd); 2223 return (error); 2224 } 2225 2226 /* 2227 * mkfifoat_args(int fd, char *path, mode_t mode) 2228 * 2229 * Create a named pipe. The path is relative to the directory associated 2230 * with fd. 2231 */ 2232 int 2233 sys_mkfifoat(struct mkfifoat_args *uap) 2234 { 2235 struct nlookupdata nd; 2236 struct file *fp; 2237 int error; 2238 2239 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2240 if (error == 0) 2241 error = kern_mkfifo(&nd, uap->mode); 2242 nlookup_done_at(&nd, fp); 2243 return (error); 2244 } 2245 2246 static int hardlink_check_uid = 0; 2247 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2248 &hardlink_check_uid, 0, 2249 "Unprivileged processes cannot create hard links to files owned by other " 2250 "users"); 2251 static int hardlink_check_gid = 0; 2252 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2253 &hardlink_check_gid, 0, 2254 "Unprivileged processes cannot create hard links to files owned by other " 2255 "groups"); 2256 2257 static int 2258 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2259 { 2260 struct vattr va; 2261 int error; 2262 2263 /* 2264 * Shortcut if disabled 2265 */ 2266 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2267 return (0); 2268 2269 /* 2270 * Privileged user can always hardlink 2271 */ 2272 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2273 return (0); 2274 2275 /* 2276 * Otherwise only if the originating file is owned by the 2277 * same user or group. Note that any group is allowed if 2278 * the file is owned by the caller. 2279 */ 2280 error = VOP_GETATTR(vp, &va); 2281 if (error != 0) 2282 return (error); 2283 2284 if (hardlink_check_uid) { 2285 if (cred->cr_uid != va.va_uid) 2286 return (EPERM); 2287 } 2288 2289 if (hardlink_check_gid) { 2290 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2291 return (EPERM); 2292 } 2293 2294 return (0); 2295 } 2296 2297 int 2298 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2299 { 2300 struct thread *td = curthread; 2301 struct vnode *vp; 2302 int error; 2303 2304 /* 2305 * Lookup the source and obtained a locked vnode. 2306 * 2307 * You may only hardlink a file which you have write permission 2308 * on or which you own. 2309 * 2310 * XXX relookup on vget failure / race ? 2311 */ 2312 bwillinode(1); 2313 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2314 if ((error = nlookup(nd)) != 0) 2315 return (error); 2316 vp = nd->nl_nch.ncp->nc_vp; 2317 KKASSERT(vp != NULL); 2318 if (vp->v_type == VDIR) 2319 return (EPERM); /* POSIX */ 2320 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2321 return (error); 2322 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2323 return (error); 2324 2325 /* 2326 * Unlock the source so we can lookup the target without deadlocking 2327 * (XXX vp is locked already, possible other deadlock?). The target 2328 * must not exist. 2329 */ 2330 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2331 nd->nl_flags &= ~NLC_NCPISLOCKED; 2332 cache_unlock(&nd->nl_nch); 2333 vn_unlock(vp); 2334 2335 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2336 if ((error = nlookup(linknd)) != 0) { 2337 vrele(vp); 2338 return (error); 2339 } 2340 if (linknd->nl_nch.ncp->nc_vp) { 2341 vrele(vp); 2342 return (EEXIST); 2343 } 2344 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2345 if (error) { 2346 vrele(vp); 2347 return (error); 2348 } 2349 2350 /* 2351 * Finally run the new API VOP. 2352 */ 2353 error = can_hardlink(vp, td, td->td_ucred); 2354 if (error == 0) { 2355 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2356 vp, linknd->nl_cred); 2357 } 2358 vput(vp); 2359 return (error); 2360 } 2361 2362 /* 2363 * link_args(char *path, char *link) 2364 * 2365 * Make a hard file link. 2366 */ 2367 int 2368 sys_link(struct link_args *uap) 2369 { 2370 struct nlookupdata nd, linknd; 2371 int error; 2372 2373 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2374 if (error == 0) { 2375 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2376 if (error == 0) 2377 error = kern_link(&nd, &linknd); 2378 nlookup_done(&linknd); 2379 } 2380 nlookup_done(&nd); 2381 return (error); 2382 } 2383 2384 /* 2385 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2386 * 2387 * Make a hard file link. The path1 argument is relative to the directory 2388 * associated with fd1, and similarly the path2 argument is relative to 2389 * the directory associated with fd2. 2390 */ 2391 int 2392 sys_linkat(struct linkat_args *uap) 2393 { 2394 struct nlookupdata nd, linknd; 2395 struct file *fp1, *fp2; 2396 int error; 2397 2398 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2399 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2400 if (error == 0) { 2401 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2402 uap->path2, UIO_USERSPACE, 0); 2403 if (error == 0) 2404 error = kern_link(&nd, &linknd); 2405 nlookup_done_at(&linknd, fp2); 2406 } 2407 nlookup_done_at(&nd, fp1); 2408 return (error); 2409 } 2410 2411 int 2412 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2413 { 2414 struct vattr vattr; 2415 struct vnode *vp; 2416 struct vnode *dvp; 2417 int error; 2418 2419 bwillinode(1); 2420 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2421 if ((error = nlookup(nd)) != 0) 2422 return (error); 2423 if (nd->nl_nch.ncp->nc_vp) 2424 return (EEXIST); 2425 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2426 return (error); 2427 dvp = nd->nl_dvp; 2428 VATTR_NULL(&vattr); 2429 vattr.va_mode = mode; 2430 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2431 if (error == 0) 2432 vput(vp); 2433 return (error); 2434 } 2435 2436 /* 2437 * symlink(char *path, char *link) 2438 * 2439 * Make a symbolic link. 2440 */ 2441 int 2442 sys_symlink(struct symlink_args *uap) 2443 { 2444 struct thread *td = curthread; 2445 struct nlookupdata nd; 2446 char *path; 2447 int error; 2448 int mode; 2449 2450 path = objcache_get(namei_oc, M_WAITOK); 2451 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2452 if (error == 0) { 2453 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2454 if (error == 0) { 2455 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2456 error = kern_symlink(&nd, path, mode); 2457 } 2458 nlookup_done(&nd); 2459 } 2460 objcache_put(namei_oc, path); 2461 return (error); 2462 } 2463 2464 /* 2465 * symlinkat_args(char *path1, int fd, char *path2) 2466 * 2467 * Make a symbolic link. The path2 argument is relative to the directory 2468 * associated with fd. 2469 */ 2470 int 2471 sys_symlinkat(struct symlinkat_args *uap) 2472 { 2473 struct thread *td = curthread; 2474 struct nlookupdata nd; 2475 struct file *fp; 2476 char *path1; 2477 int error; 2478 int mode; 2479 2480 path1 = objcache_get(namei_oc, M_WAITOK); 2481 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2482 if (error == 0) { 2483 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2484 UIO_USERSPACE, 0); 2485 if (error == 0) { 2486 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2487 error = kern_symlink(&nd, path1, mode); 2488 } 2489 nlookup_done_at(&nd, fp); 2490 } 2491 objcache_put(namei_oc, path1); 2492 return (error); 2493 } 2494 2495 /* 2496 * undelete_args(char *path) 2497 * 2498 * Delete a whiteout from the filesystem. 2499 */ 2500 int 2501 sys_undelete(struct undelete_args *uap) 2502 { 2503 struct nlookupdata nd; 2504 int error; 2505 2506 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2507 bwillinode(1); 2508 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2509 if (error == 0) 2510 error = nlookup(&nd); 2511 if (error == 0) 2512 error = ncp_writechk(&nd.nl_nch); 2513 if (error == 0) { 2514 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2515 NAMEI_DELETE); 2516 } 2517 nlookup_done(&nd); 2518 return (error); 2519 } 2520 2521 int 2522 kern_unlink(struct nlookupdata *nd) 2523 { 2524 int error; 2525 2526 bwillinode(1); 2527 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2528 if ((error = nlookup(nd)) != 0) 2529 return (error); 2530 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2531 return (error); 2532 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2533 return (error); 2534 } 2535 2536 /* 2537 * unlink_args(char *path) 2538 * 2539 * Delete a name from the filesystem. 2540 */ 2541 int 2542 sys_unlink(struct unlink_args *uap) 2543 { 2544 struct nlookupdata nd; 2545 int error; 2546 2547 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2548 if (error == 0) 2549 error = kern_unlink(&nd); 2550 nlookup_done(&nd); 2551 return (error); 2552 } 2553 2554 2555 /* 2556 * unlinkat_args(int fd, char *path, int flags) 2557 * 2558 * Delete the file or directory entry pointed to by fd/path. 2559 */ 2560 int 2561 sys_unlinkat(struct unlinkat_args *uap) 2562 { 2563 struct nlookupdata nd; 2564 struct file *fp; 2565 int error; 2566 2567 if (uap->flags & ~AT_REMOVEDIR) 2568 return (EINVAL); 2569 2570 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2571 if (error == 0) { 2572 if (uap->flags & AT_REMOVEDIR) 2573 error = kern_rmdir(&nd); 2574 else 2575 error = kern_unlink(&nd); 2576 } 2577 nlookup_done_at(&nd, fp); 2578 return (error); 2579 } 2580 2581 int 2582 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2583 { 2584 struct thread *td = curthread; 2585 struct proc *p = td->td_proc; 2586 struct file *fp; 2587 struct vnode *vp; 2588 struct vattr vattr; 2589 off_t new_offset; 2590 int error; 2591 2592 fp = holdfp(p->p_fd, fd, -1); 2593 if (fp == NULL) 2594 return (EBADF); 2595 if (fp->f_type != DTYPE_VNODE) { 2596 error = ESPIPE; 2597 goto done; 2598 } 2599 vp = (struct vnode *)fp->f_data; 2600 2601 switch (whence) { 2602 case L_INCR: 2603 spin_lock(&fp->f_spin); 2604 new_offset = fp->f_offset + offset; 2605 error = 0; 2606 break; 2607 case L_XTND: 2608 error = VOP_GETATTR(vp, &vattr); 2609 spin_lock(&fp->f_spin); 2610 new_offset = offset + vattr.va_size; 2611 break; 2612 case L_SET: 2613 new_offset = offset; 2614 error = 0; 2615 spin_lock(&fp->f_spin); 2616 break; 2617 default: 2618 new_offset = 0; 2619 error = EINVAL; 2620 spin_lock(&fp->f_spin); 2621 break; 2622 } 2623 2624 /* 2625 * Validate the seek position. Negative offsets are not allowed 2626 * for regular files or directories. 2627 * 2628 * Normally we would also not want to allow negative offsets for 2629 * character and block-special devices. However kvm addresses 2630 * on 64 bit architectures might appear to be negative and must 2631 * be allowed. 2632 */ 2633 if (error == 0) { 2634 if (new_offset < 0 && 2635 (vp->v_type == VREG || vp->v_type == VDIR)) { 2636 error = EINVAL; 2637 } else { 2638 fp->f_offset = new_offset; 2639 } 2640 } 2641 *res = fp->f_offset; 2642 spin_unlock(&fp->f_spin); 2643 done: 2644 fdrop(fp); 2645 return (error); 2646 } 2647 2648 /* 2649 * lseek_args(int fd, int pad, off_t offset, int whence) 2650 * 2651 * Reposition read/write file offset. 2652 */ 2653 int 2654 sys_lseek(struct lseek_args *uap) 2655 { 2656 int error; 2657 2658 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2659 &uap->sysmsg_offset); 2660 2661 return (error); 2662 } 2663 2664 /* 2665 * Check if current process can access given file. amode is a bitmask of *_OK 2666 * access bits. flags is a bitmask of AT_* flags. 2667 */ 2668 int 2669 kern_access(struct nlookupdata *nd, int amode, int flags) 2670 { 2671 struct vnode *vp; 2672 int error, mode; 2673 2674 if (flags & ~AT_EACCESS) 2675 return (EINVAL); 2676 nd->nl_flags |= NLC_SHAREDLOCK; 2677 if ((error = nlookup(nd)) != 0) 2678 return (error); 2679 retry: 2680 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2681 if (error) 2682 return (error); 2683 2684 /* Flags == 0 means only check for existence. */ 2685 if (amode) { 2686 mode = 0; 2687 if (amode & R_OK) 2688 mode |= VREAD; 2689 if (amode & W_OK) 2690 mode |= VWRITE; 2691 if (amode & X_OK) 2692 mode |= VEXEC; 2693 if ((mode & VWRITE) == 0 || 2694 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2695 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2696 2697 /* 2698 * If the file handle is stale we have to re-resolve the 2699 * entry with the ncp held exclusively. This is a hack 2700 * at the moment. 2701 */ 2702 if (error == ESTALE) { 2703 vput(vp); 2704 cache_unlock(&nd->nl_nch); 2705 cache_lock(&nd->nl_nch); 2706 cache_setunresolved(&nd->nl_nch); 2707 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2708 if (error == 0) { 2709 vp = NULL; 2710 goto retry; 2711 } 2712 return(error); 2713 } 2714 } 2715 vput(vp); 2716 return (error); 2717 } 2718 2719 /* 2720 * access_args(char *path, int flags) 2721 * 2722 * Check access permissions. 2723 */ 2724 int 2725 sys_access(struct access_args *uap) 2726 { 2727 struct nlookupdata nd; 2728 int error; 2729 2730 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2731 if (error == 0) 2732 error = kern_access(&nd, uap->flags, 0); 2733 nlookup_done(&nd); 2734 return (error); 2735 } 2736 2737 2738 /* 2739 * eaccess_args(char *path, int flags) 2740 * 2741 * Check access permissions. 2742 */ 2743 int 2744 sys_eaccess(struct eaccess_args *uap) 2745 { 2746 struct nlookupdata nd; 2747 int error; 2748 2749 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2750 if (error == 0) 2751 error = kern_access(&nd, uap->flags, AT_EACCESS); 2752 nlookup_done(&nd); 2753 return (error); 2754 } 2755 2756 2757 /* 2758 * faccessat_args(int fd, char *path, int amode, int flags) 2759 * 2760 * Check access permissions. 2761 */ 2762 int 2763 sys_faccessat(struct faccessat_args *uap) 2764 { 2765 struct nlookupdata nd; 2766 struct file *fp; 2767 int error; 2768 2769 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2770 NLC_FOLLOW); 2771 if (error == 0) 2772 error = kern_access(&nd, uap->amode, uap->flags); 2773 nlookup_done_at(&nd, fp); 2774 return (error); 2775 } 2776 2777 int 2778 kern_stat(struct nlookupdata *nd, struct stat *st) 2779 { 2780 int error; 2781 struct vnode *vp; 2782 2783 nd->nl_flags |= NLC_SHAREDLOCK; 2784 if ((error = nlookup(nd)) != 0) 2785 return (error); 2786 again: 2787 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2788 return (ENOENT); 2789 2790 if ((error = vget(vp, LK_SHARED)) != 0) 2791 return (error); 2792 error = vn_stat(vp, st, nd->nl_cred); 2793 2794 /* 2795 * If the file handle is stale we have to re-resolve the 2796 * entry with the ncp held exclusively. This is a hack 2797 * at the moment. 2798 */ 2799 if (error == ESTALE) { 2800 vput(vp); 2801 cache_unlock(&nd->nl_nch); 2802 cache_lock(&nd->nl_nch); 2803 cache_setunresolved(&nd->nl_nch); 2804 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2805 if (error == 0) 2806 goto again; 2807 } else { 2808 vput(vp); 2809 } 2810 return (error); 2811 } 2812 2813 /* 2814 * stat_args(char *path, struct stat *ub) 2815 * 2816 * Get file status; this version follows links. 2817 */ 2818 int 2819 sys_stat(struct stat_args *uap) 2820 { 2821 struct nlookupdata nd; 2822 struct stat st; 2823 int error; 2824 2825 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2826 if (error == 0) { 2827 error = kern_stat(&nd, &st); 2828 if (error == 0) 2829 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2830 } 2831 nlookup_done(&nd); 2832 return (error); 2833 } 2834 2835 /* 2836 * lstat_args(char *path, struct stat *ub) 2837 * 2838 * Get file status; this version does not follow links. 2839 */ 2840 int 2841 sys_lstat(struct lstat_args *uap) 2842 { 2843 struct nlookupdata nd; 2844 struct stat st; 2845 int error; 2846 2847 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2848 if (error == 0) { 2849 error = kern_stat(&nd, &st); 2850 if (error == 0) 2851 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2852 } 2853 nlookup_done(&nd); 2854 return (error); 2855 } 2856 2857 /* 2858 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2859 * 2860 * Get status of file pointed to by fd/path. 2861 */ 2862 int 2863 sys_fstatat(struct fstatat_args *uap) 2864 { 2865 struct nlookupdata nd; 2866 struct stat st; 2867 int error; 2868 int flags; 2869 struct file *fp; 2870 2871 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2872 return (EINVAL); 2873 2874 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2875 2876 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2877 UIO_USERSPACE, flags); 2878 if (error == 0) { 2879 error = kern_stat(&nd, &st); 2880 if (error == 0) 2881 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2882 } 2883 nlookup_done_at(&nd, fp); 2884 return (error); 2885 } 2886 2887 static int 2888 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 2889 { 2890 struct nlookupdata nd; 2891 struct vnode *vp; 2892 int error; 2893 2894 vp = NULL; 2895 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 2896 if (error == 0) 2897 error = nlookup(&nd); 2898 if (error == 0) 2899 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2900 nlookup_done(&nd); 2901 if (error == 0) { 2902 error = VOP_PATHCONF(vp, name, sysmsg_regp); 2903 vput(vp); 2904 } 2905 return (error); 2906 } 2907 2908 /* 2909 * pathconf_Args(char *path, int name) 2910 * 2911 * Get configurable pathname variables. 2912 */ 2913 int 2914 sys_pathconf(struct pathconf_args *uap) 2915 { 2916 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 2917 &uap->sysmsg_reg)); 2918 } 2919 2920 /* 2921 * lpathconf_Args(char *path, int name) 2922 * 2923 * Get configurable pathname variables, but don't follow symlinks. 2924 */ 2925 int 2926 sys_lpathconf(struct lpathconf_args *uap) 2927 { 2928 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 2929 } 2930 2931 /* 2932 * XXX: daver 2933 * kern_readlink isn't properly split yet. There is a copyin burried 2934 * in VOP_READLINK(). 2935 */ 2936 int 2937 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2938 { 2939 struct thread *td = curthread; 2940 struct vnode *vp; 2941 struct iovec aiov; 2942 struct uio auio; 2943 int error; 2944 2945 nd->nl_flags |= NLC_SHAREDLOCK; 2946 if ((error = nlookup(nd)) != 0) 2947 return (error); 2948 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2949 if (error) 2950 return (error); 2951 if (vp->v_type != VLNK) { 2952 error = EINVAL; 2953 } else { 2954 aiov.iov_base = buf; 2955 aiov.iov_len = count; 2956 auio.uio_iov = &aiov; 2957 auio.uio_iovcnt = 1; 2958 auio.uio_offset = 0; 2959 auio.uio_rw = UIO_READ; 2960 auio.uio_segflg = UIO_USERSPACE; 2961 auio.uio_td = td; 2962 auio.uio_resid = count; 2963 error = VOP_READLINK(vp, &auio, td->td_ucred); 2964 } 2965 vput(vp); 2966 *res = count - auio.uio_resid; 2967 return (error); 2968 } 2969 2970 /* 2971 * readlink_args(char *path, char *buf, int count) 2972 * 2973 * Return target name of a symbolic link. 2974 */ 2975 int 2976 sys_readlink(struct readlink_args *uap) 2977 { 2978 struct nlookupdata nd; 2979 int error; 2980 2981 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2982 if (error == 0) { 2983 error = kern_readlink(&nd, uap->buf, uap->count, 2984 &uap->sysmsg_result); 2985 } 2986 nlookup_done(&nd); 2987 return (error); 2988 } 2989 2990 /* 2991 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 2992 * 2993 * Return target name of a symbolic link. The path is relative to the 2994 * directory associated with fd. 2995 */ 2996 int 2997 sys_readlinkat(struct readlinkat_args *uap) 2998 { 2999 struct nlookupdata nd; 3000 struct file *fp; 3001 int error; 3002 3003 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3004 if (error == 0) { 3005 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3006 &uap->sysmsg_result); 3007 } 3008 nlookup_done_at(&nd, fp); 3009 return (error); 3010 } 3011 3012 static int 3013 setfflags(struct vnode *vp, int flags) 3014 { 3015 struct thread *td = curthread; 3016 int error; 3017 struct vattr vattr; 3018 3019 /* 3020 * Prevent non-root users from setting flags on devices. When 3021 * a device is reused, users can retain ownership of the device 3022 * if they are allowed to set flags and programs assume that 3023 * chown can't fail when done as root. 3024 */ 3025 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3026 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 3027 return (error); 3028 3029 /* 3030 * note: vget is required for any operation that might mod the vnode 3031 * so VINACTIVE is properly cleared. 3032 */ 3033 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3034 VATTR_NULL(&vattr); 3035 vattr.va_flags = flags; 3036 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3037 vput(vp); 3038 } 3039 return (error); 3040 } 3041 3042 /* 3043 * chflags(char *path, int flags) 3044 * 3045 * Change flags of a file given a path name. 3046 */ 3047 int 3048 sys_chflags(struct chflags_args *uap) 3049 { 3050 struct nlookupdata nd; 3051 struct vnode *vp; 3052 int error; 3053 3054 vp = NULL; 3055 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3056 if (error == 0) 3057 error = nlookup(&nd); 3058 if (error == 0) 3059 error = ncp_writechk(&nd.nl_nch); 3060 if (error == 0) 3061 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3062 nlookup_done(&nd); 3063 if (error == 0) { 3064 error = setfflags(vp, uap->flags); 3065 vrele(vp); 3066 } 3067 return (error); 3068 } 3069 3070 /* 3071 * lchflags(char *path, int flags) 3072 * 3073 * Change flags of a file given a path name, but don't follow symlinks. 3074 */ 3075 int 3076 sys_lchflags(struct lchflags_args *uap) 3077 { 3078 struct nlookupdata nd; 3079 struct vnode *vp; 3080 int error; 3081 3082 vp = NULL; 3083 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3084 if (error == 0) 3085 error = nlookup(&nd); 3086 if (error == 0) 3087 error = ncp_writechk(&nd.nl_nch); 3088 if (error == 0) 3089 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3090 nlookup_done(&nd); 3091 if (error == 0) { 3092 error = setfflags(vp, uap->flags); 3093 vrele(vp); 3094 } 3095 return (error); 3096 } 3097 3098 /* 3099 * fchflags_args(int fd, int flags) 3100 * 3101 * Change flags of a file given a file descriptor. 3102 */ 3103 int 3104 sys_fchflags(struct fchflags_args *uap) 3105 { 3106 struct thread *td = curthread; 3107 struct proc *p = td->td_proc; 3108 struct file *fp; 3109 int error; 3110 3111 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3112 return (error); 3113 if (fp->f_nchandle.ncp) 3114 error = ncp_writechk(&fp->f_nchandle); 3115 if (error == 0) 3116 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3117 fdrop(fp); 3118 return (error); 3119 } 3120 3121 /* 3122 * chflagsat_args(int fd, const char *path, int flags, int atflags) 3123 * change flags given a pathname relative to a filedescriptor 3124 */ 3125 int sys_chflagsat(struct chflagsat_args *uap) 3126 { 3127 struct nlookupdata nd; 3128 struct vnode *vp; 3129 struct file *fp; 3130 int error; 3131 int lookupflags; 3132 3133 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3134 return (EINVAL); 3135 3136 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3137 3138 vp = NULL; 3139 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3140 if (error == 0) 3141 error = nlookup(&nd); 3142 if (error == 0) 3143 error = ncp_writechk(&nd.nl_nch); 3144 if (error == 0) 3145 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3146 nlookup_done_at(&nd, fp); 3147 if (error == 0) { 3148 error = setfflags(vp, uap->flags); 3149 vrele(vp); 3150 } 3151 return (error); 3152 } 3153 3154 3155 static int 3156 setfmode(struct vnode *vp, int mode) 3157 { 3158 struct thread *td = curthread; 3159 int error; 3160 struct vattr vattr; 3161 3162 /* 3163 * note: vget is required for any operation that might mod the vnode 3164 * so VINACTIVE is properly cleared. 3165 */ 3166 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3167 VATTR_NULL(&vattr); 3168 vattr.va_mode = mode & ALLPERMS; 3169 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3170 vput(vp); 3171 } 3172 return error; 3173 } 3174 3175 int 3176 kern_chmod(struct nlookupdata *nd, int mode) 3177 { 3178 struct vnode *vp; 3179 int error; 3180 3181 if ((error = nlookup(nd)) != 0) 3182 return (error); 3183 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3184 return (error); 3185 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3186 error = setfmode(vp, mode); 3187 vrele(vp); 3188 return (error); 3189 } 3190 3191 /* 3192 * chmod_args(char *path, int mode) 3193 * 3194 * Change mode of a file given path name. 3195 */ 3196 int 3197 sys_chmod(struct chmod_args *uap) 3198 { 3199 struct nlookupdata nd; 3200 int error; 3201 3202 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3203 if (error == 0) 3204 error = kern_chmod(&nd, uap->mode); 3205 nlookup_done(&nd); 3206 return (error); 3207 } 3208 3209 /* 3210 * lchmod_args(char *path, int mode) 3211 * 3212 * Change mode of a file given path name (don't follow links.) 3213 */ 3214 int 3215 sys_lchmod(struct lchmod_args *uap) 3216 { 3217 struct nlookupdata nd; 3218 int error; 3219 3220 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3221 if (error == 0) 3222 error = kern_chmod(&nd, uap->mode); 3223 nlookup_done(&nd); 3224 return (error); 3225 } 3226 3227 /* 3228 * fchmod_args(int fd, int mode) 3229 * 3230 * Change mode of a file given a file descriptor. 3231 */ 3232 int 3233 sys_fchmod(struct fchmod_args *uap) 3234 { 3235 struct thread *td = curthread; 3236 struct proc *p = td->td_proc; 3237 struct file *fp; 3238 int error; 3239 3240 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3241 return (error); 3242 if (fp->f_nchandle.ncp) 3243 error = ncp_writechk(&fp->f_nchandle); 3244 if (error == 0) 3245 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3246 fdrop(fp); 3247 return (error); 3248 } 3249 3250 /* 3251 * fchmodat_args(char *path, int mode) 3252 * 3253 * Change mode of a file pointed to by fd/path. 3254 */ 3255 int 3256 sys_fchmodat(struct fchmodat_args *uap) 3257 { 3258 struct nlookupdata nd; 3259 struct file *fp; 3260 int error; 3261 int flags; 3262 3263 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3264 return (EINVAL); 3265 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3266 3267 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3268 UIO_USERSPACE, flags); 3269 if (error == 0) 3270 error = kern_chmod(&nd, uap->mode); 3271 nlookup_done_at(&nd, fp); 3272 return (error); 3273 } 3274 3275 static int 3276 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3277 { 3278 struct thread *td = curthread; 3279 int error; 3280 struct vattr vattr; 3281 uid_t o_uid; 3282 gid_t o_gid; 3283 uint64_t size; 3284 3285 /* 3286 * note: vget is required for any operation that might mod the vnode 3287 * so VINACTIVE is properly cleared. 3288 */ 3289 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3290 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3291 return error; 3292 o_uid = vattr.va_uid; 3293 o_gid = vattr.va_gid; 3294 size = vattr.va_size; 3295 3296 VATTR_NULL(&vattr); 3297 vattr.va_uid = uid; 3298 vattr.va_gid = gid; 3299 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3300 vput(vp); 3301 } 3302 3303 if (error == 0) { 3304 if (uid == -1) 3305 uid = o_uid; 3306 if (gid == -1) 3307 gid = o_gid; 3308 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3309 VFS_ACCOUNT(mp, uid, gid, size); 3310 } 3311 3312 return error; 3313 } 3314 3315 int 3316 kern_chown(struct nlookupdata *nd, int uid, int gid) 3317 { 3318 struct vnode *vp; 3319 int error; 3320 3321 if ((error = nlookup(nd)) != 0) 3322 return (error); 3323 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3324 return (error); 3325 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3326 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3327 vrele(vp); 3328 return (error); 3329 } 3330 3331 /* 3332 * chown(char *path, int uid, int gid) 3333 * 3334 * Set ownership given a path name. 3335 */ 3336 int 3337 sys_chown(struct chown_args *uap) 3338 { 3339 struct nlookupdata nd; 3340 int error; 3341 3342 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3343 if (error == 0) 3344 error = kern_chown(&nd, uap->uid, uap->gid); 3345 nlookup_done(&nd); 3346 return (error); 3347 } 3348 3349 /* 3350 * lchown_args(char *path, int uid, int gid) 3351 * 3352 * Set ownership given a path name, do not cross symlinks. 3353 */ 3354 int 3355 sys_lchown(struct lchown_args *uap) 3356 { 3357 struct nlookupdata nd; 3358 int error; 3359 3360 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3361 if (error == 0) 3362 error = kern_chown(&nd, uap->uid, uap->gid); 3363 nlookup_done(&nd); 3364 return (error); 3365 } 3366 3367 /* 3368 * fchown_args(int fd, int uid, int gid) 3369 * 3370 * Set ownership given a file descriptor. 3371 */ 3372 int 3373 sys_fchown(struct fchown_args *uap) 3374 { 3375 struct thread *td = curthread; 3376 struct proc *p = td->td_proc; 3377 struct file *fp; 3378 int error; 3379 3380 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3381 return (error); 3382 if (fp->f_nchandle.ncp) 3383 error = ncp_writechk(&fp->f_nchandle); 3384 if (error == 0) 3385 error = setfown(p->p_fd->fd_ncdir.mount, 3386 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3387 fdrop(fp); 3388 return (error); 3389 } 3390 3391 /* 3392 * fchownat(int fd, char *path, int uid, int gid, int flags) 3393 * 3394 * Set ownership of file pointed to by fd/path. 3395 */ 3396 int 3397 sys_fchownat(struct fchownat_args *uap) 3398 { 3399 struct nlookupdata nd; 3400 struct file *fp; 3401 int error; 3402 int flags; 3403 3404 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3405 return (EINVAL); 3406 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3407 3408 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3409 UIO_USERSPACE, flags); 3410 if (error == 0) 3411 error = kern_chown(&nd, uap->uid, uap->gid); 3412 nlookup_done_at(&nd, fp); 3413 return (error); 3414 } 3415 3416 3417 static int 3418 getutimes(struct timeval *tvp, struct timespec *tsp) 3419 { 3420 struct timeval tv[2]; 3421 int error; 3422 3423 if (tvp == NULL) { 3424 microtime(&tv[0]); 3425 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3426 tsp[1] = tsp[0]; 3427 } else { 3428 if ((error = itimerfix(tvp)) != 0) 3429 return (error); 3430 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3431 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3432 } 3433 return 0; 3434 } 3435 3436 static int 3437 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3438 { 3439 struct timespec tsnow; 3440 int error; 3441 3442 *nullflag = 0; 3443 nanotime(&tsnow); 3444 if (ts == NULL) { 3445 newts[0] = tsnow; 3446 newts[1] = tsnow; 3447 *nullflag = 1; 3448 return (0); 3449 } 3450 3451 newts[0] = ts[0]; 3452 newts[1] = ts[1]; 3453 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3454 return (0); 3455 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3456 *nullflag = 1; 3457 3458 if (newts[0].tv_nsec == UTIME_OMIT) 3459 newts[0].tv_sec = VNOVAL; 3460 else if (newts[0].tv_nsec == UTIME_NOW) 3461 newts[0] = tsnow; 3462 else if ((error = itimespecfix(&newts[0])) != 0) 3463 return (error); 3464 3465 if (newts[1].tv_nsec == UTIME_OMIT) 3466 newts[1].tv_sec = VNOVAL; 3467 else if (newts[1].tv_nsec == UTIME_NOW) 3468 newts[1] = tsnow; 3469 else if ((error = itimespecfix(&newts[1])) != 0) 3470 return (error); 3471 3472 return (0); 3473 } 3474 3475 static int 3476 setutimes(struct vnode *vp, struct vattr *vattr, 3477 const struct timespec *ts, int nullflag) 3478 { 3479 struct thread *td = curthread; 3480 int error; 3481 3482 VATTR_NULL(vattr); 3483 vattr->va_atime = ts[0]; 3484 vattr->va_mtime = ts[1]; 3485 if (nullflag) 3486 vattr->va_vaflags |= VA_UTIMES_NULL; 3487 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3488 3489 return error; 3490 } 3491 3492 int 3493 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3494 { 3495 struct timespec ts[2]; 3496 int error; 3497 3498 if (tptr) { 3499 if ((error = getutimes(tptr, ts)) != 0) 3500 return (error); 3501 } 3502 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3503 return (error); 3504 } 3505 3506 /* 3507 * utimes_args(char *path, struct timeval *tptr) 3508 * 3509 * Set the access and modification times of a file. 3510 */ 3511 int 3512 sys_utimes(struct utimes_args *uap) 3513 { 3514 struct timeval tv[2]; 3515 struct nlookupdata nd; 3516 int error; 3517 3518 if (uap->tptr) { 3519 error = copyin(uap->tptr, tv, sizeof(tv)); 3520 if (error) 3521 return (error); 3522 } 3523 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3524 if (error == 0) 3525 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3526 nlookup_done(&nd); 3527 return (error); 3528 } 3529 3530 /* 3531 * lutimes_args(char *path, struct timeval *tptr) 3532 * 3533 * Set the access and modification times of a file. 3534 */ 3535 int 3536 sys_lutimes(struct lutimes_args *uap) 3537 { 3538 struct timeval tv[2]; 3539 struct nlookupdata nd; 3540 int error; 3541 3542 if (uap->tptr) { 3543 error = copyin(uap->tptr, tv, sizeof(tv)); 3544 if (error) 3545 return (error); 3546 } 3547 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3548 if (error == 0) 3549 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3550 nlookup_done(&nd); 3551 return (error); 3552 } 3553 3554 /* 3555 * Set utimes on a file descriptor. The creds used to open the 3556 * file are used to determine whether the operation is allowed 3557 * or not. 3558 */ 3559 int 3560 kern_futimens(int fd, struct timespec *ts) 3561 { 3562 struct thread *td = curthread; 3563 struct proc *p = td->td_proc; 3564 struct timespec newts[2]; 3565 struct file *fp; 3566 struct vnode *vp; 3567 struct vattr vattr; 3568 int nullflag; 3569 int error; 3570 3571 error = getutimens(ts, newts, &nullflag); 3572 if (error) 3573 return (error); 3574 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3575 return (error); 3576 if (fp->f_nchandle.ncp) 3577 error = ncp_writechk(&fp->f_nchandle); 3578 if (error == 0) { 3579 vp = fp->f_data; 3580 error = vget(vp, LK_EXCLUSIVE); 3581 if (error == 0) { 3582 error = VOP_GETATTR(vp, &vattr); 3583 if (error == 0) { 3584 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3585 fp->f_cred); 3586 } 3587 if (error == 0) { 3588 error = setutimes(vp, &vattr, newts, nullflag); 3589 } 3590 vput(vp); 3591 } 3592 } 3593 fdrop(fp); 3594 return (error); 3595 } 3596 3597 /* 3598 * futimens_args(int fd, struct timespec *ts) 3599 * 3600 * Set the access and modification times of a file. 3601 */ 3602 int 3603 sys_futimens(struct futimens_args *uap) 3604 { 3605 struct timespec ts[2]; 3606 int error; 3607 3608 if (uap->ts) { 3609 error = copyin(uap->ts, ts, sizeof(ts)); 3610 if (error) 3611 return (error); 3612 } 3613 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3614 return (error); 3615 } 3616 3617 int 3618 kern_futimes(int fd, struct timeval *tptr) 3619 { 3620 struct timespec ts[2]; 3621 int error; 3622 3623 if (tptr) { 3624 if ((error = getutimes(tptr, ts)) != 0) 3625 return (error); 3626 } 3627 error = kern_futimens(fd, tptr ? ts : NULL); 3628 return (error); 3629 } 3630 3631 /* 3632 * futimes_args(int fd, struct timeval *tptr) 3633 * 3634 * Set the access and modification times of a file. 3635 */ 3636 int 3637 sys_futimes(struct futimes_args *uap) 3638 { 3639 struct timeval tv[2]; 3640 int error; 3641 3642 if (uap->tptr) { 3643 error = copyin(uap->tptr, tv, sizeof(tv)); 3644 if (error) 3645 return (error); 3646 } 3647 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3648 return (error); 3649 } 3650 3651 int 3652 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3653 { 3654 struct timespec newts[2]; 3655 struct vnode *vp; 3656 struct vattr vattr; 3657 int nullflag; 3658 int error; 3659 3660 if (flags & ~AT_SYMLINK_NOFOLLOW) 3661 return (EINVAL); 3662 3663 error = getutimens(ts, newts, &nullflag); 3664 if (error) 3665 return (error); 3666 3667 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3668 if ((error = nlookup(nd)) != 0) 3669 return (error); 3670 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3671 return (error); 3672 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3673 return (error); 3674 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3675 error = vget(vp, LK_EXCLUSIVE); 3676 if (error == 0) { 3677 error = setutimes(vp, &vattr, newts, nullflag); 3678 vput(vp); 3679 } 3680 } 3681 vrele(vp); 3682 return (error); 3683 } 3684 3685 /* 3686 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3687 * 3688 * Set file access and modification times of a file. 3689 */ 3690 int 3691 sys_utimensat(struct utimensat_args *uap) 3692 { 3693 struct timespec ts[2]; 3694 struct nlookupdata nd; 3695 struct file *fp; 3696 int error; 3697 int flags; 3698 3699 if (uap->ts) { 3700 error = copyin(uap->ts, ts, sizeof(ts)); 3701 if (error) 3702 return (error); 3703 } 3704 3705 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3706 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3707 UIO_USERSPACE, flags); 3708 if (error == 0) 3709 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3710 nlookup_done_at(&nd, fp); 3711 return (error); 3712 } 3713 3714 int 3715 kern_truncate(struct nlookupdata *nd, off_t length) 3716 { 3717 struct vnode *vp; 3718 struct vattr vattr; 3719 int error; 3720 uid_t uid = 0; 3721 gid_t gid = 0; 3722 uint64_t old_size = 0; 3723 3724 if (length < 0) 3725 return(EINVAL); 3726 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3727 if ((error = nlookup(nd)) != 0) 3728 return (error); 3729 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3730 return (error); 3731 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3732 return (error); 3733 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3734 if (error) { 3735 vrele(vp); 3736 return (error); 3737 } 3738 if (vp->v_type == VDIR) { 3739 error = EISDIR; 3740 goto done; 3741 } 3742 if (vfs_quota_enabled) { 3743 error = VOP_GETATTR(vp, &vattr); 3744 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3745 uid = vattr.va_uid; 3746 gid = vattr.va_gid; 3747 old_size = vattr.va_size; 3748 } 3749 3750 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3751 VATTR_NULL(&vattr); 3752 vattr.va_size = length; 3753 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3754 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3755 } 3756 done: 3757 vput(vp); 3758 return (error); 3759 } 3760 3761 /* 3762 * truncate(char *path, int pad, off_t length) 3763 * 3764 * Truncate a file given its path name. 3765 */ 3766 int 3767 sys_truncate(struct truncate_args *uap) 3768 { 3769 struct nlookupdata nd; 3770 int error; 3771 3772 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3773 if (error == 0) 3774 error = kern_truncate(&nd, uap->length); 3775 nlookup_done(&nd); 3776 return error; 3777 } 3778 3779 int 3780 kern_ftruncate(int fd, off_t length) 3781 { 3782 struct thread *td = curthread; 3783 struct proc *p = td->td_proc; 3784 struct vattr vattr; 3785 struct vnode *vp; 3786 struct file *fp; 3787 int error; 3788 uid_t uid = 0; 3789 gid_t gid = 0; 3790 uint64_t old_size = 0; 3791 struct mount *mp; 3792 3793 if (length < 0) 3794 return(EINVAL); 3795 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3796 return (error); 3797 if (fp->f_nchandle.ncp) { 3798 error = ncp_writechk(&fp->f_nchandle); 3799 if (error) 3800 goto done; 3801 } 3802 if ((fp->f_flag & FWRITE) == 0) { 3803 error = EINVAL; 3804 goto done; 3805 } 3806 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3807 error = EINVAL; 3808 goto done; 3809 } 3810 vp = (struct vnode *)fp->f_data; 3811 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3812 if (vp->v_type == VDIR) { 3813 error = EISDIR; 3814 vn_unlock(vp); 3815 goto done; 3816 } 3817 3818 if (vfs_quota_enabled) { 3819 error = VOP_GETATTR(vp, &vattr); 3820 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 3821 uid = vattr.va_uid; 3822 gid = vattr.va_gid; 3823 old_size = vattr.va_size; 3824 } 3825 3826 if ((error = vn_writechk(vp, NULL)) == 0) { 3827 VATTR_NULL(&vattr); 3828 vattr.va_size = length; 3829 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3830 mp = vq_vptomp(vp); 3831 VFS_ACCOUNT(mp, uid, gid, length - old_size); 3832 } 3833 vn_unlock(vp); 3834 done: 3835 fdrop(fp); 3836 return (error); 3837 } 3838 3839 /* 3840 * ftruncate_args(int fd, int pad, off_t length) 3841 * 3842 * Truncate a file given a file descriptor. 3843 */ 3844 int 3845 sys_ftruncate(struct ftruncate_args *uap) 3846 { 3847 int error; 3848 3849 error = kern_ftruncate(uap->fd, uap->length); 3850 3851 return (error); 3852 } 3853 3854 /* 3855 * fsync(int fd) 3856 * 3857 * Sync an open file. 3858 */ 3859 int 3860 sys_fsync(struct fsync_args *uap) 3861 { 3862 struct thread *td = curthread; 3863 struct proc *p = td->td_proc; 3864 struct vnode *vp; 3865 struct file *fp; 3866 vm_object_t obj; 3867 int error; 3868 3869 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3870 return (error); 3871 vp = (struct vnode *)fp->f_data; 3872 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3873 if ((obj = vp->v_object) != NULL) { 3874 if (vp->v_mount == NULL || 3875 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 3876 vm_object_page_clean(obj, 0, 0, 0); 3877 } 3878 } 3879 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3880 if (error == 0 && vp->v_mount) 3881 error = buf_fsync(vp); 3882 vn_unlock(vp); 3883 fdrop(fp); 3884 3885 return (error); 3886 } 3887 3888 int 3889 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3890 { 3891 struct nchandle fnchd; 3892 struct nchandle tnchd; 3893 struct namecache *ncp; 3894 struct vnode *fdvp; 3895 struct vnode *tdvp; 3896 struct mount *mp; 3897 int error; 3898 u_int fncp_gen; 3899 u_int tncp_gen; 3900 3901 bwillinode(1); 3902 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3903 if ((error = nlookup(fromnd)) != 0) 3904 return (error); 3905 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3906 return (ENOENT); 3907 fnchd.mount = fromnd->nl_nch.mount; 3908 cache_hold(&fnchd); 3909 3910 /* 3911 * unlock the source nch so we can lookup the target nch without 3912 * deadlocking. The target may or may not exist so we do not check 3913 * for a target vp like kern_mkdir() and other creation functions do. 3914 * 3915 * The source and target directories are ref'd and rechecked after 3916 * everything is relocked to determine if the source or target file 3917 * has been renamed. 3918 */ 3919 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3920 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3921 3922 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 3923 3924 cache_unlock(&fromnd->nl_nch); 3925 3926 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 3927 if ((error = nlookup(tond)) != 0) { 3928 cache_drop(&fnchd); 3929 return (error); 3930 } 3931 tncp_gen = tond->nl_nch.ncp->nc_generation; 3932 3933 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3934 cache_drop(&fnchd); 3935 return (ENOENT); 3936 } 3937 tnchd.mount = tond->nl_nch.mount; 3938 cache_hold(&tnchd); 3939 3940 /* 3941 * If the source and target are the same there is nothing to do 3942 */ 3943 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3944 cache_drop(&fnchd); 3945 cache_drop(&tnchd); 3946 return (0); 3947 } 3948 3949 /* 3950 * Mount points cannot be renamed or overwritten 3951 */ 3952 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3953 NCF_ISMOUNTPT 3954 ) { 3955 cache_drop(&fnchd); 3956 cache_drop(&tnchd); 3957 return (EINVAL); 3958 } 3959 3960 /* 3961 * Relock the source ncp. cache_relock() will deal with any 3962 * deadlocks against the already-locked tond and will also 3963 * make sure both are resolved. 3964 * 3965 * NOTE AFTER RELOCKING: The source or target ncp may have become 3966 * invalid while they were unlocked, nc_vp and nc_mount could 3967 * be NULL. 3968 */ 3969 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 3970 &tond->nl_nch, tond->nl_cred); 3971 fromnd->nl_flags |= NLC_NCPISLOCKED; 3972 3973 /* 3974 * If the namecache generation changed for either fromnd or tond, 3975 * we must retry. 3976 */ 3977 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 3978 tond->nl_nch.ncp->nc_generation != tncp_gen) { 3979 kprintf("kern_rename: retry due to gen on: " 3980 "\"%s\" -> \"%s\"\n", 3981 fromnd->nl_nch.ncp->nc_name, 3982 tond->nl_nch.ncp->nc_name); 3983 cache_drop(&fnchd); 3984 cache_drop(&tnchd); 3985 return (EAGAIN); 3986 } 3987 3988 /* 3989 * If either fromnd or tond are marked destroyed a ripout occured 3990 * out from under us and we must retry. 3991 */ 3992 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 3993 fromnd->nl_nch.ncp->nc_vp == NULL || 3994 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 3995 kprintf("kern_rename: retry due to ripout on: " 3996 "\"%s\" -> \"%s\"\n", 3997 fromnd->nl_nch.ncp->nc_name, 3998 tond->nl_nch.ncp->nc_name); 3999 cache_drop(&fnchd); 4000 cache_drop(&tnchd); 4001 return (EAGAIN); 4002 } 4003 4004 /* 4005 * Make sure the parent directories linkages are the same. 4006 * XXX shouldn't be needed any more w/ generation check above. 4007 */ 4008 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4009 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4010 cache_drop(&fnchd); 4011 cache_drop(&tnchd); 4012 return (ENOENT); 4013 } 4014 4015 /* 4016 * Both the source and target must be within the same filesystem and 4017 * in the same filesystem as their parent directories within the 4018 * namecache topology. 4019 * 4020 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4021 */ 4022 mp = fnchd.mount; 4023 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4024 mp != tond->nl_nch.mount) { 4025 cache_drop(&fnchd); 4026 cache_drop(&tnchd); 4027 return (EXDEV); 4028 } 4029 4030 /* 4031 * Make sure the mount point is writable 4032 */ 4033 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4034 cache_drop(&fnchd); 4035 cache_drop(&tnchd); 4036 return (error); 4037 } 4038 4039 /* 4040 * If the target exists and either the source or target is a directory, 4041 * then both must be directories. 4042 * 4043 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4044 * have become NULL. 4045 */ 4046 if (tond->nl_nch.ncp->nc_vp) { 4047 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4048 error = ENOENT; 4049 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4050 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4051 error = ENOTDIR; 4052 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4053 error = EISDIR; 4054 } 4055 } 4056 4057 /* 4058 * You cannot rename a source into itself or a subdirectory of itself. 4059 * We check this by travsersing the target directory upwards looking 4060 * for a match against the source. 4061 * 4062 * XXX MPSAFE 4063 */ 4064 if (error == 0) { 4065 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4066 if (fromnd->nl_nch.ncp == ncp) { 4067 error = EINVAL; 4068 break; 4069 } 4070 } 4071 } 4072 4073 cache_drop(&fnchd); 4074 cache_drop(&tnchd); 4075 4076 /* 4077 * Even though the namespaces are different, they may still represent 4078 * hardlinks to the same file. The filesystem might have a hard time 4079 * with this so we issue a NREMOVE of the source instead of a NRENAME 4080 * when we detect the situation. 4081 */ 4082 if (error == 0) { 4083 fdvp = fromnd->nl_dvp; 4084 tdvp = tond->nl_dvp; 4085 if (fdvp == NULL || tdvp == NULL) { 4086 error = EPERM; 4087 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4088 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4089 fromnd->nl_cred); 4090 } else { 4091 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4092 fdvp, tdvp, tond->nl_cred); 4093 } 4094 } 4095 return (error); 4096 } 4097 4098 /* 4099 * rename_args(char *from, char *to) 4100 * 4101 * Rename files. Source and destination must either both be directories, 4102 * or both not be directories. If target is a directory, it must be empty. 4103 */ 4104 int 4105 sys_rename(struct rename_args *uap) 4106 { 4107 struct nlookupdata fromnd, tond; 4108 int error; 4109 4110 do { 4111 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4112 if (error == 0) { 4113 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4114 if (error == 0) 4115 error = kern_rename(&fromnd, &tond); 4116 nlookup_done(&tond); 4117 } 4118 nlookup_done(&fromnd); 4119 } while (error == EAGAIN); 4120 return (error); 4121 } 4122 4123 /* 4124 * renameat_args(int oldfd, char *old, int newfd, char *new) 4125 * 4126 * Rename files using paths relative to the directories associated with 4127 * oldfd and newfd. Source and destination must either both be directories, 4128 * or both not be directories. If target is a directory, it must be empty. 4129 */ 4130 int 4131 sys_renameat(struct renameat_args *uap) 4132 { 4133 struct nlookupdata oldnd, newnd; 4134 struct file *oldfp, *newfp; 4135 int error; 4136 4137 do { 4138 error = nlookup_init_at(&oldnd, &oldfp, 4139 uap->oldfd, uap->old, 4140 UIO_USERSPACE, 0); 4141 if (error == 0) { 4142 error = nlookup_init_at(&newnd, &newfp, 4143 uap->newfd, uap->new, 4144 UIO_USERSPACE, 0); 4145 if (error == 0) 4146 error = kern_rename(&oldnd, &newnd); 4147 nlookup_done_at(&newnd, newfp); 4148 } 4149 nlookup_done_at(&oldnd, oldfp); 4150 } while (error == EAGAIN); 4151 return (error); 4152 } 4153 4154 int 4155 kern_mkdir(struct nlookupdata *nd, int mode) 4156 { 4157 struct thread *td = curthread; 4158 struct proc *p = td->td_proc; 4159 struct vnode *vp; 4160 struct vattr vattr; 4161 int error; 4162 4163 bwillinode(1); 4164 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4165 if ((error = nlookup(nd)) != 0) 4166 return (error); 4167 4168 if (nd->nl_nch.ncp->nc_vp) 4169 return (EEXIST); 4170 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4171 return (error); 4172 VATTR_NULL(&vattr); 4173 vattr.va_type = VDIR; 4174 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4175 4176 vp = NULL; 4177 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4178 if (error == 0) 4179 vput(vp); 4180 return (error); 4181 } 4182 4183 /* 4184 * mkdir_args(char *path, int mode) 4185 * 4186 * Make a directory file. 4187 */ 4188 int 4189 sys_mkdir(struct mkdir_args *uap) 4190 { 4191 struct nlookupdata nd; 4192 int error; 4193 4194 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4195 if (error == 0) 4196 error = kern_mkdir(&nd, uap->mode); 4197 nlookup_done(&nd); 4198 return (error); 4199 } 4200 4201 /* 4202 * mkdirat_args(int fd, char *path, mode_t mode) 4203 * 4204 * Make a directory file. The path is relative to the directory associated 4205 * with fd. 4206 */ 4207 int 4208 sys_mkdirat(struct mkdirat_args *uap) 4209 { 4210 struct nlookupdata nd; 4211 struct file *fp; 4212 int error; 4213 4214 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4215 if (error == 0) 4216 error = kern_mkdir(&nd, uap->mode); 4217 nlookup_done_at(&nd, fp); 4218 return (error); 4219 } 4220 4221 int 4222 kern_rmdir(struct nlookupdata *nd) 4223 { 4224 int error; 4225 4226 bwillinode(1); 4227 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4228 if ((error = nlookup(nd)) != 0) 4229 return (error); 4230 4231 /* 4232 * Do not allow directories representing mount points to be 4233 * deleted, even if empty. Check write perms on mount point 4234 * in case the vnode is aliased (aka nullfs). 4235 */ 4236 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4237 return (EBUSY); 4238 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4239 return (error); 4240 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4241 return (error); 4242 } 4243 4244 /* 4245 * rmdir_args(char *path) 4246 * 4247 * Remove a directory file. 4248 */ 4249 int 4250 sys_rmdir(struct rmdir_args *uap) 4251 { 4252 struct nlookupdata nd; 4253 int error; 4254 4255 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4256 if (error == 0) 4257 error = kern_rmdir(&nd); 4258 nlookup_done(&nd); 4259 return (error); 4260 } 4261 4262 int 4263 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4264 enum uio_seg direction) 4265 { 4266 struct thread *td = curthread; 4267 struct proc *p = td->td_proc; 4268 struct vnode *vp; 4269 struct file *fp; 4270 struct uio auio; 4271 struct iovec aiov; 4272 off_t loff; 4273 int error, eofflag; 4274 4275 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 4276 return (error); 4277 if ((fp->f_flag & FREAD) == 0) { 4278 error = EBADF; 4279 goto done; 4280 } 4281 vp = (struct vnode *)fp->f_data; 4282 if (vp->v_type != VDIR) { 4283 error = EINVAL; 4284 goto done; 4285 } 4286 aiov.iov_base = buf; 4287 aiov.iov_len = count; 4288 auio.uio_iov = &aiov; 4289 auio.uio_iovcnt = 1; 4290 auio.uio_rw = UIO_READ; 4291 auio.uio_segflg = direction; 4292 auio.uio_td = td; 4293 auio.uio_resid = count; 4294 loff = auio.uio_offset = fp->f_offset; 4295 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 4296 fp->f_offset = auio.uio_offset; 4297 if (error) 4298 goto done; 4299 4300 /* 4301 * WARNING! *basep may not be wide enough to accomodate the 4302 * seek offset. XXX should we hack this to return the upper 32 bits 4303 * for offsets greater then 4G? 4304 */ 4305 if (basep) { 4306 *basep = (long)loff; 4307 } 4308 *res = count - auio.uio_resid; 4309 done: 4310 fdrop(fp); 4311 return (error); 4312 } 4313 4314 /* 4315 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4316 * 4317 * Read a block of directory entries in a file system independent format. 4318 */ 4319 int 4320 sys_getdirentries(struct getdirentries_args *uap) 4321 { 4322 long base; 4323 int error; 4324 4325 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4326 &uap->sysmsg_result, UIO_USERSPACE); 4327 4328 if (error == 0 && uap->basep) 4329 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4330 return (error); 4331 } 4332 4333 /* 4334 * getdents_args(int fd, char *buf, size_t count) 4335 */ 4336 int 4337 sys_getdents(struct getdents_args *uap) 4338 { 4339 int error; 4340 4341 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4342 &uap->sysmsg_result, UIO_USERSPACE); 4343 4344 return (error); 4345 } 4346 4347 /* 4348 * Set the mode mask for creation of filesystem nodes. 4349 * 4350 * umask(int newmask) 4351 */ 4352 int 4353 sys_umask(struct umask_args *uap) 4354 { 4355 struct thread *td = curthread; 4356 struct proc *p = td->td_proc; 4357 struct filedesc *fdp; 4358 4359 fdp = p->p_fd; 4360 uap->sysmsg_result = fdp->fd_cmask; 4361 fdp->fd_cmask = uap->newmask & ALLPERMS; 4362 return (0); 4363 } 4364 4365 /* 4366 * revoke(char *path) 4367 * 4368 * Void all references to file by ripping underlying filesystem 4369 * away from vnode. 4370 */ 4371 int 4372 sys_revoke(struct revoke_args *uap) 4373 { 4374 struct nlookupdata nd; 4375 struct vattr vattr; 4376 struct vnode *vp; 4377 struct ucred *cred; 4378 int error; 4379 4380 vp = NULL; 4381 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4382 if (error == 0) 4383 error = nlookup(&nd); 4384 if (error == 0) 4385 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4386 cred = crhold(nd.nl_cred); 4387 nlookup_done(&nd); 4388 if (error == 0) { 4389 if (error == 0) 4390 error = VOP_GETATTR(vp, &vattr); 4391 if (error == 0 && cred->cr_uid != vattr.va_uid) 4392 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4393 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4394 if (vcount(vp) > 0) 4395 error = vrevoke(vp, cred); 4396 } else if (error == 0) { 4397 error = vrevoke(vp, cred); 4398 } 4399 vrele(vp); 4400 } 4401 if (cred) 4402 crfree(cred); 4403 return (error); 4404 } 4405 4406 /* 4407 * getfh_args(char *fname, fhandle_t *fhp) 4408 * 4409 * Get (NFS) file handle 4410 * 4411 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4412 * mount. This allows nullfs mounts to be explicitly exported. 4413 * 4414 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4415 * 4416 * nullfs mounts of subdirectories are not safe. That is, it will 4417 * work, but you do not really have protection against access to 4418 * the related parent directories. 4419 */ 4420 int 4421 sys_getfh(struct getfh_args *uap) 4422 { 4423 struct thread *td = curthread; 4424 struct nlookupdata nd; 4425 fhandle_t fh; 4426 struct vnode *vp; 4427 struct mount *mp; 4428 int error; 4429 4430 /* 4431 * Must be super user 4432 */ 4433 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4434 return (error); 4435 4436 vp = NULL; 4437 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4438 if (error == 0) 4439 error = nlookup(&nd); 4440 if (error == 0) 4441 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4442 mp = nd.nl_nch.mount; 4443 nlookup_done(&nd); 4444 if (error == 0) { 4445 bzero(&fh, sizeof(fh)); 4446 fh.fh_fsid = mp->mnt_stat.f_fsid; 4447 error = VFS_VPTOFH(vp, &fh.fh_fid); 4448 vput(vp); 4449 if (error == 0) 4450 error = copyout(&fh, uap->fhp, sizeof(fh)); 4451 } 4452 return (error); 4453 } 4454 4455 /* 4456 * fhopen_args(const struct fhandle *u_fhp, int flags) 4457 * 4458 * syscall for the rpc.lockd to use to translate a NFS file handle into 4459 * an open descriptor. 4460 * 4461 * warning: do not remove the priv_check() call or this becomes one giant 4462 * security hole. 4463 */ 4464 int 4465 sys_fhopen(struct fhopen_args *uap) 4466 { 4467 struct thread *td = curthread; 4468 struct filedesc *fdp = td->td_proc->p_fd; 4469 struct mount *mp; 4470 struct vnode *vp; 4471 struct fhandle fhp; 4472 struct vattr vat; 4473 struct vattr *vap = &vat; 4474 struct flock lf; 4475 int fmode, mode, error = 0, type; 4476 struct file *nfp; 4477 struct file *fp; 4478 int indx; 4479 4480 /* 4481 * Must be super user 4482 */ 4483 error = priv_check(td, PRIV_ROOT); 4484 if (error) 4485 return (error); 4486 4487 fmode = FFLAGS(uap->flags); 4488 4489 /* 4490 * Why not allow a non-read/write open for our lockd? 4491 */ 4492 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4493 return (EINVAL); 4494 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4495 if (error) 4496 return(error); 4497 4498 /* 4499 * Find the mount point 4500 */ 4501 mp = vfs_getvfs(&fhp.fh_fsid); 4502 if (mp == NULL) { 4503 error = ESTALE; 4504 goto done; 4505 } 4506 /* now give me my vnode, it gets returned to me locked */ 4507 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4508 if (error) 4509 goto done; 4510 /* 4511 * from now on we have to make sure not 4512 * to forget about the vnode 4513 * any error that causes an abort must vput(vp) 4514 * just set error = err and 'goto bad;'. 4515 */ 4516 4517 /* 4518 * from vn_open 4519 */ 4520 if (vp->v_type == VLNK) { 4521 error = EMLINK; 4522 goto bad; 4523 } 4524 if (vp->v_type == VSOCK) { 4525 error = EOPNOTSUPP; 4526 goto bad; 4527 } 4528 mode = 0; 4529 if (fmode & (FWRITE | O_TRUNC)) { 4530 if (vp->v_type == VDIR) { 4531 error = EISDIR; 4532 goto bad; 4533 } 4534 error = vn_writechk(vp, NULL); 4535 if (error) 4536 goto bad; 4537 mode |= VWRITE; 4538 } 4539 if (fmode & FREAD) 4540 mode |= VREAD; 4541 if (mode) { 4542 error = VOP_ACCESS(vp, mode, td->td_ucred); 4543 if (error) 4544 goto bad; 4545 } 4546 if (fmode & O_TRUNC) { 4547 vn_unlock(vp); /* XXX */ 4548 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4549 VATTR_NULL(vap); 4550 vap->va_size = 0; 4551 error = VOP_SETATTR(vp, vap, td->td_ucred); 4552 if (error) 4553 goto bad; 4554 } 4555 4556 /* 4557 * VOP_OPEN needs the file pointer so it can potentially override 4558 * it. 4559 * 4560 * WARNING! no f_nchandle will be associated when fhopen()ing a 4561 * directory. XXX 4562 */ 4563 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4564 goto bad; 4565 fp = nfp; 4566 4567 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4568 if (error) { 4569 /* 4570 * setting f_ops this way prevents VOP_CLOSE from being 4571 * called or fdrop() releasing the vp from v_data. Since 4572 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4573 */ 4574 fp->f_ops = &badfileops; 4575 fp->f_data = NULL; 4576 goto bad_drop; 4577 } 4578 4579 /* 4580 * The fp is given its own reference, we still have our ref and lock. 4581 * 4582 * Assert that all regular files must be created with a VM object. 4583 */ 4584 if (vp->v_type == VREG && vp->v_object == NULL) { 4585 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 4586 goto bad_drop; 4587 } 4588 4589 /* 4590 * The open was successful. Handle any locking requirements. 4591 */ 4592 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4593 lf.l_whence = SEEK_SET; 4594 lf.l_start = 0; 4595 lf.l_len = 0; 4596 if (fmode & O_EXLOCK) 4597 lf.l_type = F_WRLCK; 4598 else 4599 lf.l_type = F_RDLCK; 4600 if (fmode & FNONBLOCK) 4601 type = 0; 4602 else 4603 type = F_WAIT; 4604 vn_unlock(vp); 4605 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 4606 /* 4607 * release our private reference. 4608 */ 4609 fsetfd(fdp, NULL, indx); 4610 fdrop(fp); 4611 vrele(vp); 4612 goto done; 4613 } 4614 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4615 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4616 } 4617 4618 /* 4619 * Clean up. Associate the file pointer with the previously 4620 * reserved descriptor and return it. 4621 */ 4622 vput(vp); 4623 if (uap->flags & O_CLOEXEC) 4624 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4625 fsetfd(fdp, fp, indx); 4626 fdrop(fp); 4627 uap->sysmsg_result = indx; 4628 return (error); 4629 4630 bad_drop: 4631 fsetfd(fdp, NULL, indx); 4632 fdrop(fp); 4633 bad: 4634 vput(vp); 4635 done: 4636 return (error); 4637 } 4638 4639 /* 4640 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4641 */ 4642 int 4643 sys_fhstat(struct fhstat_args *uap) 4644 { 4645 struct thread *td = curthread; 4646 struct stat sb; 4647 fhandle_t fh; 4648 struct mount *mp; 4649 struct vnode *vp; 4650 int error; 4651 4652 /* 4653 * Must be super user 4654 */ 4655 error = priv_check(td, PRIV_ROOT); 4656 if (error) 4657 return (error); 4658 4659 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4660 if (error) 4661 return (error); 4662 4663 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4664 error = ESTALE; 4665 if (error == 0) { 4666 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4667 error = vn_stat(vp, &sb, td->td_ucred); 4668 vput(vp); 4669 } 4670 } 4671 if (error == 0) 4672 error = copyout(&sb, uap->sb, sizeof(sb)); 4673 return (error); 4674 } 4675 4676 /* 4677 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4678 */ 4679 int 4680 sys_fhstatfs(struct fhstatfs_args *uap) 4681 { 4682 struct thread *td = curthread; 4683 struct proc *p = td->td_proc; 4684 struct statfs *sp; 4685 struct mount *mp; 4686 struct vnode *vp; 4687 struct statfs sb; 4688 char *fullpath, *freepath; 4689 fhandle_t fh; 4690 int error; 4691 4692 /* 4693 * Must be super user 4694 */ 4695 if ((error = priv_check(td, PRIV_ROOT))) 4696 return (error); 4697 4698 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4699 return (error); 4700 4701 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4702 error = ESTALE; 4703 goto done; 4704 } 4705 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4706 error = ESTALE; 4707 goto done; 4708 } 4709 4710 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4711 goto done; 4712 mp = vp->v_mount; 4713 sp = &mp->mnt_stat; 4714 vput(vp); 4715 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4716 goto done; 4717 4718 error = mount_path(p, mp, &fullpath, &freepath); 4719 if (error) 4720 goto done; 4721 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4722 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4723 kfree(freepath, M_TEMP); 4724 4725 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4726 if (priv_check(td, PRIV_ROOT)) { 4727 bcopy(sp, &sb, sizeof(sb)); 4728 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4729 sp = &sb; 4730 } 4731 error = copyout(sp, uap->buf, sizeof(*sp)); 4732 done: 4733 return (error); 4734 } 4735 4736 /* 4737 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4738 */ 4739 int 4740 sys_fhstatvfs(struct fhstatvfs_args *uap) 4741 { 4742 struct thread *td = curthread; 4743 struct proc *p = td->td_proc; 4744 struct statvfs *sp; 4745 struct mount *mp; 4746 struct vnode *vp; 4747 fhandle_t fh; 4748 int error; 4749 4750 /* 4751 * Must be super user 4752 */ 4753 if ((error = priv_check(td, PRIV_ROOT))) 4754 return (error); 4755 4756 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4757 return (error); 4758 4759 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4760 error = ESTALE; 4761 goto done; 4762 } 4763 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4764 error = ESTALE; 4765 goto done; 4766 } 4767 4768 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4769 goto done; 4770 mp = vp->v_mount; 4771 sp = &mp->mnt_vstat; 4772 vput(vp); 4773 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4774 goto done; 4775 4776 sp->f_flag = 0; 4777 if (mp->mnt_flag & MNT_RDONLY) 4778 sp->f_flag |= ST_RDONLY; 4779 if (mp->mnt_flag & MNT_NOSUID) 4780 sp->f_flag |= ST_NOSUID; 4781 error = copyout(sp, uap->buf, sizeof(*sp)); 4782 done: 4783 return (error); 4784 } 4785 4786 4787 /* 4788 * Syscall to push extended attribute configuration information into the 4789 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4790 * a command (int cmd), and attribute name and misc data. For now, the 4791 * attribute name is left in userspace for consumption by the VFS_op. 4792 * It will probably be changed to be copied into sysspace by the 4793 * syscall in the future, once issues with various consumers of the 4794 * attribute code have raised their hands. 4795 * 4796 * Currently this is used only by UFS Extended Attributes. 4797 */ 4798 int 4799 sys_extattrctl(struct extattrctl_args *uap) 4800 { 4801 struct nlookupdata nd; 4802 struct vnode *vp; 4803 char attrname[EXTATTR_MAXNAMELEN]; 4804 int error; 4805 size_t size; 4806 4807 attrname[0] = 0; 4808 vp = NULL; 4809 error = 0; 4810 4811 if (error == 0 && uap->filename) { 4812 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4813 NLC_FOLLOW); 4814 if (error == 0) 4815 error = nlookup(&nd); 4816 if (error == 0) 4817 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4818 nlookup_done(&nd); 4819 } 4820 4821 if (error == 0 && uap->attrname) { 4822 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4823 &size); 4824 } 4825 4826 if (error == 0) { 4827 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4828 if (error == 0) 4829 error = nlookup(&nd); 4830 if (error == 0) 4831 error = ncp_writechk(&nd.nl_nch); 4832 if (error == 0) { 4833 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4834 uap->attrnamespace, 4835 uap->attrname, nd.nl_cred); 4836 } 4837 nlookup_done(&nd); 4838 } 4839 4840 return (error); 4841 } 4842 4843 /* 4844 * Syscall to get a named extended attribute on a file or directory. 4845 */ 4846 int 4847 sys_extattr_set_file(struct extattr_set_file_args *uap) 4848 { 4849 char attrname[EXTATTR_MAXNAMELEN]; 4850 struct nlookupdata nd; 4851 struct vnode *vp; 4852 struct uio auio; 4853 struct iovec aiov; 4854 int error; 4855 4856 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4857 if (error) 4858 return (error); 4859 4860 vp = NULL; 4861 4862 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4863 if (error == 0) 4864 error = nlookup(&nd); 4865 if (error == 0) 4866 error = ncp_writechk(&nd.nl_nch); 4867 if (error == 0) 4868 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4869 if (error) { 4870 nlookup_done(&nd); 4871 return (error); 4872 } 4873 4874 bzero(&auio, sizeof(auio)); 4875 aiov.iov_base = uap->data; 4876 aiov.iov_len = uap->nbytes; 4877 auio.uio_iov = &aiov; 4878 auio.uio_iovcnt = 1; 4879 auio.uio_offset = 0; 4880 auio.uio_resid = uap->nbytes; 4881 auio.uio_rw = UIO_WRITE; 4882 auio.uio_td = curthread; 4883 4884 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4885 &auio, nd.nl_cred); 4886 4887 vput(vp); 4888 nlookup_done(&nd); 4889 return (error); 4890 } 4891 4892 /* 4893 * Syscall to get a named extended attribute on a file or directory. 4894 */ 4895 int 4896 sys_extattr_get_file(struct extattr_get_file_args *uap) 4897 { 4898 char attrname[EXTATTR_MAXNAMELEN]; 4899 struct nlookupdata nd; 4900 struct uio auio; 4901 struct iovec aiov; 4902 struct vnode *vp; 4903 int error; 4904 4905 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4906 if (error) 4907 return (error); 4908 4909 vp = NULL; 4910 4911 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4912 if (error == 0) 4913 error = nlookup(&nd); 4914 if (error == 0) 4915 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 4916 if (error) { 4917 nlookup_done(&nd); 4918 return (error); 4919 } 4920 4921 bzero(&auio, sizeof(auio)); 4922 aiov.iov_base = uap->data; 4923 aiov.iov_len = uap->nbytes; 4924 auio.uio_iov = &aiov; 4925 auio.uio_iovcnt = 1; 4926 auio.uio_offset = 0; 4927 auio.uio_resid = uap->nbytes; 4928 auio.uio_rw = UIO_READ; 4929 auio.uio_td = curthread; 4930 4931 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 4932 &auio, nd.nl_cred); 4933 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 4934 4935 vput(vp); 4936 nlookup_done(&nd); 4937 return(error); 4938 } 4939 4940 /* 4941 * Syscall to delete a named extended attribute from a file or directory. 4942 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4943 */ 4944 int 4945 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4946 { 4947 char attrname[EXTATTR_MAXNAMELEN]; 4948 struct nlookupdata nd; 4949 struct vnode *vp; 4950 int error; 4951 4952 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4953 if (error) 4954 return(error); 4955 4956 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4957 if (error == 0) 4958 error = nlookup(&nd); 4959 if (error == 0) 4960 error = ncp_writechk(&nd.nl_nch); 4961 if (error == 0) { 4962 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4963 if (error == 0) { 4964 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 4965 attrname, NULL, nd.nl_cred); 4966 vput(vp); 4967 } 4968 } 4969 nlookup_done(&nd); 4970 return(error); 4971 } 4972 4973 /* 4974 * Determine if the mount is visible to the process. 4975 */ 4976 static int 4977 chroot_visible_mnt(struct mount *mp, struct proc *p) 4978 { 4979 struct nchandle nch; 4980 4981 /* 4982 * Traverse from the mount point upwards. If we hit the process 4983 * root then the mount point is visible to the process. 4984 */ 4985 nch = mp->mnt_ncmountpt; 4986 while (nch.ncp) { 4987 if (nch.mount == p->p_fd->fd_nrdir.mount && 4988 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4989 return(1); 4990 } 4991 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4992 nch = nch.mount->mnt_ncmounton; 4993 } else { 4994 nch.ncp = nch.ncp->nc_parent; 4995 } 4996 } 4997 4998 /* 4999 * If the mount point is not visible to the process, but the 5000 * process root is in a subdirectory of the mount, return 5001 * TRUE anyway. 5002 */ 5003 if (p->p_fd->fd_nrdir.mount == mp) 5004 return(1); 5005 5006 return(0); 5007 } 5008 5009