1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 #include <sys/mplock2.h> 71 72 #include <vm/vm.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 76 #include <machine/limits.h> 77 #include <machine/stdarg.h> 78 79 #include <vfs/union/union.h> 80 81 static void mount_warning(struct mount *mp, const char *ctl, ...) 82 __printflike(2, 3); 83 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 84 static int checkvp_chdir (struct vnode *vn, struct thread *td); 85 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 86 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 87 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 88 static int getutimes (const struct timeval *, struct timespec *); 89 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 90 static int setfmode (struct vnode *, int); 91 static int setfflags (struct vnode *, int); 92 static int setutimes (struct vnode *, struct vattr *, 93 const struct timespec *, int); 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 96 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 97 98 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 99 "Allow non-root users to mount filesystems"); 100 101 /* 102 * Virtual File System System Calls 103 */ 104 105 /* 106 * Mount a file system. 107 * 108 * mount_args(char *type, char *path, int flags, caddr_t data) 109 * 110 * MPALMOSTSAFE 111 */ 112 int 113 sys_mount(struct mount_args *uap) 114 { 115 struct thread *td = curthread; 116 struct vnode *vp; 117 struct nchandle nch; 118 struct mount *mp, *nullmp; 119 struct vfsconf *vfsp; 120 int error, flag = 0, flag2 = 0; 121 int hasmount; 122 struct vattr va; 123 struct nlookupdata nd; 124 char fstypename[MFSNAMELEN]; 125 struct ucred *cred; 126 127 cred = td->td_ucred; 128 if (jailed(cred)) { 129 error = EPERM; 130 goto done; 131 } 132 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 133 goto done; 134 135 /* 136 * Do not allow NFS export by non-root users. 137 */ 138 if (uap->flags & MNT_EXPORTED) { 139 error = priv_check(td, PRIV_ROOT); 140 if (error) 141 goto done; 142 } 143 /* 144 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 145 */ 146 if (priv_check(td, PRIV_ROOT)) 147 uap->flags |= MNT_NOSUID | MNT_NODEV; 148 149 /* 150 * Lookup the requested path and extract the nch and vnode. 151 */ 152 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 153 if (error == 0) { 154 if ((error = nlookup(&nd)) == 0) { 155 if (nd.nl_nch.ncp->nc_vp == NULL) 156 error = ENOENT; 157 } 158 } 159 if (error) { 160 nlookup_done(&nd); 161 goto done; 162 } 163 164 /* 165 * If the target filesystem is resolved via a nullfs mount, then 166 * nd.nl_nch.mount will be pointing to the nullfs mount structure 167 * instead of the target file system. We need it in case we are 168 * doing an update. 169 */ 170 nullmp = nd.nl_nch.mount; 171 172 /* 173 * Extract the locked+refd ncp and cleanup the nd structure 174 */ 175 nch = nd.nl_nch; 176 cache_zero(&nd.nl_nch); 177 nlookup_done(&nd); 178 179 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 180 (mp = cache_findmount(&nch)) != NULL) { 181 cache_dropmount(mp); 182 hasmount = 1; 183 } else { 184 hasmount = 0; 185 } 186 187 188 /* 189 * now we have the locked ref'd nch and unreferenced vnode. 190 */ 191 vp = nch.ncp->nc_vp; 192 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 193 cache_put(&nch); 194 goto done; 195 } 196 cache_unlock(&nch); 197 198 /* 199 * Extract the file system type. We need to know this early, to take 200 * appropriate actions if we are dealing with a nullfs. 201 */ 202 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 203 cache_drop(&nch); 204 vput(vp); 205 goto done; 206 } 207 208 /* 209 * Now we have an unlocked ref'd nch and a locked ref'd vp 210 */ 211 if (uap->flags & MNT_UPDATE) { 212 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 213 cache_drop(&nch); 214 vput(vp); 215 error = EINVAL; 216 goto done; 217 } 218 219 if (strncmp(fstypename, "null", 5) == 0) { 220 KKASSERT(nullmp); 221 mp = nullmp; 222 } else { 223 mp = vp->v_mount; 224 } 225 226 flag = mp->mnt_flag; 227 flag2 = mp->mnt_kern_flag; 228 /* 229 * We only allow the filesystem to be reloaded if it 230 * is currently mounted read-only. 231 */ 232 if ((uap->flags & MNT_RELOAD) && 233 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 234 cache_drop(&nch); 235 vput(vp); 236 error = EOPNOTSUPP; /* Needs translation */ 237 goto done; 238 } 239 /* 240 * Only root, or the user that did the original mount is 241 * permitted to update it. 242 */ 243 if (mp->mnt_stat.f_owner != cred->cr_uid && 244 (error = priv_check(td, PRIV_ROOT))) { 245 cache_drop(&nch); 246 vput(vp); 247 goto done; 248 } 249 if (vfs_busy(mp, LK_NOWAIT)) { 250 cache_drop(&nch); 251 vput(vp); 252 error = EBUSY; 253 goto done; 254 } 255 if (hasmount) { 256 cache_drop(&nch); 257 vfs_unbusy(mp); 258 vput(vp); 259 error = EBUSY; 260 goto done; 261 } 262 mp->mnt_flag |= 263 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 264 lwkt_gettoken(&mp->mnt_token); 265 vn_unlock(vp); 266 goto update; 267 } 268 269 /* 270 * If the user is not root, ensure that they own the directory 271 * onto which we are attempting to mount. 272 */ 273 if ((error = VOP_GETATTR(vp, &va)) || 274 (va.va_uid != cred->cr_uid && 275 (error = priv_check(td, PRIV_ROOT)))) { 276 cache_drop(&nch); 277 vput(vp); 278 goto done; 279 } 280 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 281 cache_drop(&nch); 282 vput(vp); 283 goto done; 284 } 285 if (vp->v_type != VDIR) { 286 cache_drop(&nch); 287 vput(vp); 288 error = ENOTDIR; 289 goto done; 290 } 291 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 292 cache_drop(&nch); 293 vput(vp); 294 error = EPERM; 295 goto done; 296 } 297 vfsp = vfsconf_find_by_name(fstypename); 298 if (vfsp == NULL) { 299 linker_file_t lf; 300 301 /* Only load modules for root (very important!) */ 302 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 303 cache_drop(&nch); 304 vput(vp); 305 goto done; 306 } 307 error = linker_load_file(fstypename, &lf); 308 if (error || lf == NULL) { 309 cache_drop(&nch); 310 vput(vp); 311 if (lf == NULL) 312 error = ENODEV; 313 goto done; 314 } 315 lf->userrefs++; 316 /* lookup again, see if the VFS was loaded */ 317 vfsp = vfsconf_find_by_name(fstypename); 318 if (vfsp == NULL) { 319 lf->userrefs--; 320 linker_file_unload(lf); 321 cache_drop(&nch); 322 vput(vp); 323 error = ENODEV; 324 goto done; 325 } 326 } 327 if (hasmount) { 328 cache_drop(&nch); 329 vput(vp); 330 error = EBUSY; 331 goto done; 332 } 333 334 /* 335 * Allocate and initialize the filesystem. 336 */ 337 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 338 mount_init(mp); 339 vfs_busy(mp, LK_NOWAIT); 340 mp->mnt_op = vfsp->vfc_vfsops; 341 mp->mnt_vfc = vfsp; 342 vfsp->vfc_refcount++; 343 mp->mnt_stat.f_type = vfsp->vfc_typenum; 344 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 345 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 346 mp->mnt_stat.f_owner = cred->cr_uid; 347 lwkt_gettoken(&mp->mnt_token); 348 vn_unlock(vp); 349 update: 350 /* 351 * (per-mount token acquired at this point) 352 * 353 * Set the mount level flags. 354 */ 355 if (uap->flags & MNT_RDONLY) 356 mp->mnt_flag |= MNT_RDONLY; 357 else if (mp->mnt_flag & MNT_RDONLY) 358 mp->mnt_kern_flag |= MNTK_WANTRDWR; 359 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 360 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 361 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 362 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 363 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 364 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 365 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 366 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 367 /* 368 * Mount the filesystem. 369 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 370 * get. 371 */ 372 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 373 if (mp->mnt_flag & MNT_UPDATE) { 374 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 375 mp->mnt_flag &= ~MNT_RDONLY; 376 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 377 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 378 if (error) { 379 mp->mnt_flag = flag; 380 mp->mnt_kern_flag = flag2; 381 } 382 lwkt_reltoken(&mp->mnt_token); 383 vfs_unbusy(mp); 384 vrele(vp); 385 cache_drop(&nch); 386 goto done; 387 } 388 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 389 390 /* 391 * Put the new filesystem on the mount list after root. The mount 392 * point gets its own mnt_ncmountpt (unless the VFS already set one 393 * up) which represents the root of the mount. The lookup code 394 * detects the mount point going forward and checks the root of 395 * the mount going backwards. 396 * 397 * It is not necessary to invalidate or purge the vnode underneath 398 * because elements under the mount will be given their own glue 399 * namecache record. 400 */ 401 if (!error) { 402 if (mp->mnt_ncmountpt.ncp == NULL) { 403 /* 404 * allocate, then unlock, but leave the ref intact 405 */ 406 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 407 cache_unlock(&mp->mnt_ncmountpt); 408 } 409 mp->mnt_ncmounton = nch; /* inherits ref */ 410 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 411 cache_ismounting(mp); 412 413 mountlist_insert(mp, MNTINS_LAST); 414 vn_unlock(vp); 415 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 416 error = vfs_allocate_syncvnode(mp); 417 lwkt_reltoken(&mp->mnt_token); 418 vfs_unbusy(mp); 419 error = VFS_START(mp, 0); 420 vrele(vp); 421 } else { 422 vn_syncer_thr_stop(mp); 423 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 424 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 425 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 426 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 427 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 428 mp->mnt_vfc->vfc_refcount--; 429 lwkt_reltoken(&mp->mnt_token); 430 vfs_unbusy(mp); 431 kfree(mp, M_MOUNT); 432 cache_drop(&nch); 433 vput(vp); 434 } 435 done: 436 return (error); 437 } 438 439 /* 440 * Scan all active processes to see if any of them have a current 441 * or root directory onto which the new filesystem has just been 442 * mounted. If so, replace them with the new mount point. 443 * 444 * Both old_nch and new_nch are ref'd on call but not locked. 445 * new_nch must be temporarily locked so it can be associated with the 446 * vnode representing the root of the mount point. 447 */ 448 struct checkdirs_info { 449 struct nchandle old_nch; 450 struct nchandle new_nch; 451 struct vnode *old_vp; 452 struct vnode *new_vp; 453 }; 454 455 static int checkdirs_callback(struct proc *p, void *data); 456 457 static void 458 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 459 { 460 struct checkdirs_info info; 461 struct vnode *olddp; 462 struct vnode *newdp; 463 struct mount *mp; 464 465 /* 466 * If the old mount point's vnode has a usecount of 1, it is not 467 * being held as a descriptor anywhere. 468 */ 469 olddp = old_nch->ncp->nc_vp; 470 if (olddp == NULL || VREFCNT(olddp) == 1) 471 return; 472 473 /* 474 * Force the root vnode of the new mount point to be resolved 475 * so we can update any matching processes. 476 */ 477 mp = new_nch->mount; 478 if (VFS_ROOT(mp, &newdp)) 479 panic("mount: lost mount"); 480 vn_unlock(newdp); 481 cache_lock(new_nch); 482 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 483 cache_setunresolved(new_nch); 484 cache_setvp(new_nch, newdp); 485 cache_unlock(new_nch); 486 487 /* 488 * Special handling of the root node 489 */ 490 if (rootvnode == olddp) { 491 vref(newdp); 492 vfs_cache_setroot(newdp, cache_hold(new_nch)); 493 } 494 495 /* 496 * Pass newdp separately so the callback does not have to access 497 * it via new_nch->ncp->nc_vp. 498 */ 499 info.old_nch = *old_nch; 500 info.new_nch = *new_nch; 501 info.new_vp = newdp; 502 allproc_scan(checkdirs_callback, &info); 503 vput(newdp); 504 } 505 506 /* 507 * NOTE: callback is not MP safe because the scanned process's filedesc 508 * structure can be ripped out from under us, amoung other things. 509 */ 510 static int 511 checkdirs_callback(struct proc *p, void *data) 512 { 513 struct checkdirs_info *info = data; 514 struct filedesc *fdp; 515 struct nchandle ncdrop1; 516 struct nchandle ncdrop2; 517 struct vnode *vprele1; 518 struct vnode *vprele2; 519 520 if ((fdp = p->p_fd) != NULL) { 521 cache_zero(&ncdrop1); 522 cache_zero(&ncdrop2); 523 vprele1 = NULL; 524 vprele2 = NULL; 525 526 /* 527 * MPUNSAFE - XXX fdp can be pulled out from under a 528 * foreign process. 529 * 530 * A shared filedesc is ok, we don't have to copy it 531 * because we are making this change globally. 532 */ 533 spin_lock(&fdp->fd_spin); 534 if (fdp->fd_ncdir.mount == info->old_nch.mount && 535 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 536 vprele1 = fdp->fd_cdir; 537 vref(info->new_vp); 538 fdp->fd_cdir = info->new_vp; 539 ncdrop1 = fdp->fd_ncdir; 540 cache_copy(&info->new_nch, &fdp->fd_ncdir); 541 } 542 if (fdp->fd_nrdir.mount == info->old_nch.mount && 543 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 544 vprele2 = fdp->fd_rdir; 545 vref(info->new_vp); 546 fdp->fd_rdir = info->new_vp; 547 ncdrop2 = fdp->fd_nrdir; 548 cache_copy(&info->new_nch, &fdp->fd_nrdir); 549 } 550 spin_unlock(&fdp->fd_spin); 551 if (ncdrop1.ncp) 552 cache_drop(&ncdrop1); 553 if (ncdrop2.ncp) 554 cache_drop(&ncdrop2); 555 if (vprele1) 556 vrele(vprele1); 557 if (vprele2) 558 vrele(vprele2); 559 } 560 return(0); 561 } 562 563 /* 564 * Unmount a file system. 565 * 566 * Note: unmount takes a path to the vnode mounted on as argument, 567 * not special file (as before). 568 * 569 * umount_args(char *path, int flags) 570 * 571 * MPALMOSTSAFE 572 */ 573 int 574 sys_unmount(struct unmount_args *uap) 575 { 576 struct thread *td = curthread; 577 struct proc *p __debugvar = td->td_proc; 578 struct mount *mp = NULL; 579 struct nlookupdata nd; 580 int error; 581 582 KKASSERT(p); 583 get_mplock(); 584 if (td->td_ucred->cr_prison != NULL) { 585 error = EPERM; 586 goto done; 587 } 588 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 589 goto done; 590 591 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 592 if (error == 0) 593 error = nlookup(&nd); 594 if (error) 595 goto out; 596 597 mp = nd.nl_nch.mount; 598 599 /* 600 * Only root, or the user that did the original mount is 601 * permitted to unmount this filesystem. 602 */ 603 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 604 (error = priv_check(td, PRIV_ROOT))) 605 goto out; 606 607 /* 608 * Don't allow unmounting the root file system. 609 */ 610 if (mp->mnt_flag & MNT_ROOTFS) { 611 error = EINVAL; 612 goto out; 613 } 614 615 /* 616 * Must be the root of the filesystem 617 */ 618 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 619 error = EINVAL; 620 goto out; 621 } 622 623 out: 624 nlookup_done(&nd); 625 if (error == 0) 626 error = dounmount(mp, uap->flags); 627 done: 628 rel_mplock(); 629 return (error); 630 } 631 632 /* 633 * Do the actual file system unmount. 634 */ 635 static int 636 dounmount_interlock(struct mount *mp) 637 { 638 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 639 return (EBUSY); 640 mp->mnt_kern_flag |= MNTK_UNMOUNT; 641 return(0); 642 } 643 644 static int 645 unmount_allproc_cb(struct proc *p, void *arg) 646 { 647 struct mount *mp; 648 649 if (p->p_textnch.ncp == NULL) 650 return 0; 651 652 mp = (struct mount *)arg; 653 if (p->p_textnch.mount == mp) 654 cache_drop(&p->p_textnch); 655 656 return 0; 657 } 658 659 int 660 dounmount(struct mount *mp, int flags) 661 { 662 struct namecache *ncp; 663 struct nchandle nch; 664 struct vnode *vp; 665 int error; 666 int async_flag; 667 int lflags; 668 int freeok = 1; 669 int retry; 670 671 lwkt_gettoken(&mp->mnt_token); 672 /* 673 * Exclusive access for unmounting purposes 674 */ 675 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 676 goto out; 677 678 /* 679 * Allow filesystems to detect that a forced unmount is in progress. 680 */ 681 if (flags & MNT_FORCE) 682 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 683 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 684 error = lockmgr(&mp->mnt_lock, lflags); 685 if (error) { 686 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 687 if (mp->mnt_kern_flag & MNTK_MWAIT) { 688 mp->mnt_kern_flag &= ~MNTK_MWAIT; 689 wakeup(mp); 690 } 691 goto out; 692 } 693 694 if (mp->mnt_flag & MNT_EXPUBLIC) 695 vfs_setpublicfs(NULL, NULL, NULL); 696 697 vfs_msync(mp, MNT_WAIT); 698 async_flag = mp->mnt_flag & MNT_ASYNC; 699 mp->mnt_flag &=~ MNT_ASYNC; 700 701 /* 702 * If this filesystem isn't aliasing other filesystems, 703 * try to invalidate any remaining namecache entries and 704 * check the count afterwords. 705 */ 706 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 707 cache_lock(&mp->mnt_ncmountpt); 708 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 709 cache_unlock(&mp->mnt_ncmountpt); 710 711 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 712 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 713 allproc_scan(&unmount_allproc_cb, mp); 714 } 715 716 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 717 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 718 719 if ((flags & MNT_FORCE) == 0) { 720 error = EBUSY; 721 mount_warning(mp, "Cannot unmount: " 722 "%d namecache " 723 "references still " 724 "present", 725 ncp->nc_refs - 1); 726 } else { 727 mount_warning(mp, "Forced unmount: " 728 "%d namecache " 729 "references still " 730 "present", 731 ncp->nc_refs - 1); 732 freeok = 0; 733 } 734 } 735 } 736 737 /* 738 * Decomission our special mnt_syncer vnode. This also stops 739 * the vnlru code. If we are unable to unmount we recommission 740 * the vnode. 741 * 742 * Then sync the filesystem. 743 */ 744 if ((vp = mp->mnt_syncer) != NULL) { 745 mp->mnt_syncer = NULL; 746 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 747 vrele(vp); 748 } 749 if ((mp->mnt_flag & MNT_RDONLY) == 0) 750 VFS_SYNC(mp, MNT_WAIT); 751 752 /* 753 * nchandle records ref the mount structure. Expect a count of 1 754 * (our mount->mnt_ncmountpt). 755 * 756 * Scans can get temporary refs on a mountpoint (thought really 757 * heavy duty stuff like cache_findmount() do not). 758 */ 759 for (retry = 0; retry < 10 && mp->mnt_refs != 1; ++retry) { 760 cache_unmounting(mp); 761 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 762 } 763 if (mp->mnt_refs != 1) { 764 if ((flags & MNT_FORCE) == 0) { 765 mount_warning(mp, "Cannot unmount: " 766 "%d mount refs still present", 767 mp->mnt_refs); 768 error = EBUSY; 769 } else { 770 mount_warning(mp, "Forced unmount: " 771 "%d mount refs still present", 772 mp->mnt_refs); 773 freeok = 0; 774 } 775 } 776 777 /* 778 * So far so good, sync the filesystem once more and 779 * call the VFS unmount code if the sync succeeds. 780 */ 781 if (error == 0) { 782 if (((mp->mnt_flag & MNT_RDONLY) || 783 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 784 (flags & MNT_FORCE)) { 785 error = VFS_UNMOUNT(mp, flags); 786 } 787 } 788 789 /* 790 * If an error occurred we can still recover, restoring the 791 * syncer vnode and misc flags. 792 */ 793 if (error) { 794 if (mp->mnt_syncer == NULL) 795 vfs_allocate_syncvnode(mp); 796 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 797 mp->mnt_flag |= async_flag; 798 lockmgr(&mp->mnt_lock, LK_RELEASE); 799 if (mp->mnt_kern_flag & MNTK_MWAIT) { 800 mp->mnt_kern_flag &= ~MNTK_MWAIT; 801 wakeup(mp); 802 } 803 goto out; 804 } 805 /* 806 * Clean up any journals still associated with the mount after 807 * filesystem activity has ceased. 808 */ 809 journal_remove_all_journals(mp, 810 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 811 812 mountlist_remove(mp); 813 814 /* 815 * Remove any installed vnode ops here so the individual VFSs don't 816 * have to. 817 */ 818 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 819 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 820 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 821 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 822 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 823 824 if (mp->mnt_ncmountpt.ncp != NULL) { 825 nch = mp->mnt_ncmountpt; 826 cache_zero(&mp->mnt_ncmountpt); 827 cache_clrmountpt(&nch); 828 cache_drop(&nch); 829 } 830 if (mp->mnt_ncmounton.ncp != NULL) { 831 cache_unmounting(mp); 832 nch = mp->mnt_ncmounton; 833 cache_zero(&mp->mnt_ncmounton); 834 cache_clrmountpt(&nch); 835 cache_drop(&nch); 836 } 837 838 mp->mnt_vfc->vfc_refcount--; 839 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 840 panic("unmount: dangling vnode"); 841 lockmgr(&mp->mnt_lock, LK_RELEASE); 842 if (mp->mnt_kern_flag & MNTK_MWAIT) { 843 mp->mnt_kern_flag &= ~MNTK_MWAIT; 844 wakeup(mp); 845 } 846 847 /* 848 * If we reach here and freeok != 0 we must free the mount. 849 * If refs > 1 cycle and wait, just in case someone tried 850 * to busy the mount after we decided to do the unmount. 851 */ 852 if (freeok) { 853 while (mp->mnt_refs > 1) { 854 cache_unmounting(mp); 855 wakeup(mp); 856 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 857 } 858 lwkt_reltoken(&mp->mnt_token); 859 kfree(mp, M_MOUNT); 860 mp = NULL; 861 } 862 error = 0; 863 out: 864 if (mp) 865 lwkt_reltoken(&mp->mnt_token); 866 return (error); 867 } 868 869 static 870 void 871 mount_warning(struct mount *mp, const char *ctl, ...) 872 { 873 char *ptr; 874 char *buf; 875 __va_list va; 876 877 __va_start(va, ctl); 878 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 879 &ptr, &buf, 0) == 0) { 880 kprintf("unmount(%s): ", ptr); 881 kvprintf(ctl, va); 882 kprintf("\n"); 883 kfree(buf, M_TEMP); 884 } else { 885 kprintf("unmount(%p", mp); 886 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 887 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 888 kprintf("): "); 889 kvprintf(ctl, va); 890 kprintf("\n"); 891 } 892 __va_end(va); 893 } 894 895 /* 896 * Shim cache_fullpath() to handle the case where a process is chrooted into 897 * a subdirectory of a mount. In this case if the root mount matches the 898 * process root directory's mount we have to specify the process's root 899 * directory instead of the mount point, because the mount point might 900 * be above the root directory. 901 */ 902 static 903 int 904 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 905 { 906 struct nchandle *nch; 907 908 if (p && p->p_fd->fd_nrdir.mount == mp) 909 nch = &p->p_fd->fd_nrdir; 910 else 911 nch = &mp->mnt_ncmountpt; 912 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 913 } 914 915 /* 916 * Sync each mounted filesystem. 917 */ 918 919 #ifdef DEBUG 920 static int syncprt = 0; 921 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 922 #endif /* DEBUG */ 923 924 static int sync_callback(struct mount *mp, void *data); 925 926 int 927 sys_sync(struct sync_args *uap) 928 { 929 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 930 return (0); 931 } 932 933 static 934 int 935 sync_callback(struct mount *mp, void *data __unused) 936 { 937 int asyncflag; 938 939 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 940 asyncflag = mp->mnt_flag & MNT_ASYNC; 941 mp->mnt_flag &= ~MNT_ASYNC; 942 vfs_msync(mp, MNT_NOWAIT); 943 VFS_SYNC(mp, MNT_NOWAIT); 944 mp->mnt_flag |= asyncflag; 945 } 946 return(0); 947 } 948 949 /* XXX PRISON: could be per prison flag */ 950 static int prison_quotas; 951 #if 0 952 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 953 #endif 954 955 /* 956 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 957 * 958 * Change filesystem quotas. 959 * 960 * MPALMOSTSAFE 961 */ 962 int 963 sys_quotactl(struct quotactl_args *uap) 964 { 965 struct nlookupdata nd; 966 struct thread *td; 967 struct mount *mp; 968 int error; 969 970 get_mplock(); 971 td = curthread; 972 if (td->td_ucred->cr_prison && !prison_quotas) { 973 error = EPERM; 974 goto done; 975 } 976 977 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 978 if (error == 0) 979 error = nlookup(&nd); 980 if (error == 0) { 981 mp = nd.nl_nch.mount; 982 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 983 uap->arg, nd.nl_cred); 984 } 985 nlookup_done(&nd); 986 done: 987 rel_mplock(); 988 return (error); 989 } 990 991 /* 992 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 993 * void *buf, int buflen) 994 * 995 * This function operates on a mount point and executes the specified 996 * operation using the specified control data, and possibly returns data. 997 * 998 * The actual number of bytes stored in the result buffer is returned, 0 999 * if none, otherwise an error is returned. 1000 * 1001 * MPALMOSTSAFE 1002 */ 1003 int 1004 sys_mountctl(struct mountctl_args *uap) 1005 { 1006 struct thread *td = curthread; 1007 struct proc *p = td->td_proc; 1008 struct file *fp; 1009 void *ctl = NULL; 1010 void *buf = NULL; 1011 char *path = NULL; 1012 int error; 1013 1014 /* 1015 * Sanity and permissions checks. We must be root. 1016 */ 1017 KKASSERT(p); 1018 if (td->td_ucred->cr_prison != NULL) 1019 return (EPERM); 1020 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1021 (error = priv_check(td, PRIV_ROOT)) != 0) 1022 return (error); 1023 1024 /* 1025 * Argument length checks 1026 */ 1027 if (uap->ctllen < 0 || uap->ctllen > 1024) 1028 return (EINVAL); 1029 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1030 return (EINVAL); 1031 if (uap->path == NULL) 1032 return (EINVAL); 1033 1034 /* 1035 * Allocate the necessary buffers and copyin data 1036 */ 1037 path = objcache_get(namei_oc, M_WAITOK); 1038 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1039 if (error) 1040 goto done; 1041 1042 if (uap->ctllen) { 1043 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1044 error = copyin(uap->ctl, ctl, uap->ctllen); 1045 if (error) 1046 goto done; 1047 } 1048 if (uap->buflen) 1049 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1050 1051 /* 1052 * Validate the descriptor 1053 */ 1054 if (uap->fd >= 0) { 1055 fp = holdfp(p->p_fd, uap->fd, -1); 1056 if (fp == NULL) { 1057 error = EBADF; 1058 goto done; 1059 } 1060 } else { 1061 fp = NULL; 1062 } 1063 1064 /* 1065 * Execute the internal kernel function and clean up. 1066 */ 1067 get_mplock(); 1068 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 1069 rel_mplock(); 1070 if (fp) 1071 fdrop(fp); 1072 if (error == 0 && uap->sysmsg_result > 0) 1073 error = copyout(buf, uap->buf, uap->sysmsg_result); 1074 done: 1075 if (path) 1076 objcache_put(namei_oc, path); 1077 if (ctl) 1078 kfree(ctl, M_TEMP); 1079 if (buf) 1080 kfree(buf, M_TEMP); 1081 return (error); 1082 } 1083 1084 /* 1085 * Execute a mount control operation by resolving the path to a mount point 1086 * and calling vop_mountctl(). 1087 * 1088 * Use the mount point from the nch instead of the vnode so nullfs mounts 1089 * can properly spike the VOP. 1090 */ 1091 int 1092 kern_mountctl(const char *path, int op, struct file *fp, 1093 const void *ctl, int ctllen, 1094 void *buf, int buflen, int *res) 1095 { 1096 struct vnode *vp; 1097 struct mount *mp; 1098 struct nlookupdata nd; 1099 int error; 1100 1101 *res = 0; 1102 vp = NULL; 1103 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1104 if (error == 0) 1105 error = nlookup(&nd); 1106 if (error == 0) 1107 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1108 mp = nd.nl_nch.mount; 1109 nlookup_done(&nd); 1110 if (error) 1111 return (error); 1112 vn_unlock(vp); 1113 1114 /* 1115 * Must be the root of the filesystem 1116 */ 1117 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1118 vrele(vp); 1119 return (EINVAL); 1120 } 1121 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1122 buf, buflen, res); 1123 vrele(vp); 1124 return (error); 1125 } 1126 1127 int 1128 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1129 { 1130 struct thread *td = curthread; 1131 struct proc *p = td->td_proc; 1132 struct mount *mp; 1133 struct statfs *sp; 1134 char *fullpath, *freepath; 1135 int error; 1136 1137 if ((error = nlookup(nd)) != 0) 1138 return (error); 1139 mp = nd->nl_nch.mount; 1140 sp = &mp->mnt_stat; 1141 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1142 return (error); 1143 1144 error = mount_path(p, mp, &fullpath, &freepath); 1145 if (error) 1146 return(error); 1147 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1148 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1149 kfree(freepath, M_TEMP); 1150 1151 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1152 bcopy(sp, buf, sizeof(*buf)); 1153 /* Only root should have access to the fsid's. */ 1154 if (priv_check(td, PRIV_ROOT)) 1155 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1156 return (0); 1157 } 1158 1159 /* 1160 * statfs_args(char *path, struct statfs *buf) 1161 * 1162 * Get filesystem statistics. 1163 */ 1164 int 1165 sys_statfs(struct statfs_args *uap) 1166 { 1167 struct nlookupdata nd; 1168 struct statfs buf; 1169 int error; 1170 1171 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1172 if (error == 0) 1173 error = kern_statfs(&nd, &buf); 1174 nlookup_done(&nd); 1175 if (error == 0) 1176 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1177 return (error); 1178 } 1179 1180 int 1181 kern_fstatfs(int fd, struct statfs *buf) 1182 { 1183 struct thread *td = curthread; 1184 struct proc *p = td->td_proc; 1185 struct file *fp; 1186 struct mount *mp; 1187 struct statfs *sp; 1188 char *fullpath, *freepath; 1189 int error; 1190 1191 KKASSERT(p); 1192 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1193 return (error); 1194 1195 /* 1196 * Try to use mount info from any overlays rather than the 1197 * mount info for the underlying vnode, otherwise we will 1198 * fail when operating on null-mounted paths inside a chroot. 1199 */ 1200 if ((mp = fp->f_nchandle.mount) == NULL) 1201 mp = ((struct vnode *)fp->f_data)->v_mount; 1202 if (mp == NULL) { 1203 error = EBADF; 1204 goto done; 1205 } 1206 if (fp->f_cred == NULL) { 1207 error = EINVAL; 1208 goto done; 1209 } 1210 sp = &mp->mnt_stat; 1211 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1212 goto done; 1213 1214 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1215 goto done; 1216 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1217 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1218 kfree(freepath, M_TEMP); 1219 1220 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1221 bcopy(sp, buf, sizeof(*buf)); 1222 1223 /* Only root should have access to the fsid's. */ 1224 if (priv_check(td, PRIV_ROOT)) 1225 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1226 error = 0; 1227 done: 1228 fdrop(fp); 1229 return (error); 1230 } 1231 1232 /* 1233 * fstatfs_args(int fd, struct statfs *buf) 1234 * 1235 * Get filesystem statistics. 1236 */ 1237 int 1238 sys_fstatfs(struct fstatfs_args *uap) 1239 { 1240 struct statfs buf; 1241 int error; 1242 1243 error = kern_fstatfs(uap->fd, &buf); 1244 1245 if (error == 0) 1246 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1247 return (error); 1248 } 1249 1250 int 1251 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1252 { 1253 struct mount *mp; 1254 struct statvfs *sp; 1255 int error; 1256 1257 if ((error = nlookup(nd)) != 0) 1258 return (error); 1259 mp = nd->nl_nch.mount; 1260 sp = &mp->mnt_vstat; 1261 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1262 return (error); 1263 1264 sp->f_flag = 0; 1265 if (mp->mnt_flag & MNT_RDONLY) 1266 sp->f_flag |= ST_RDONLY; 1267 if (mp->mnt_flag & MNT_NOSUID) 1268 sp->f_flag |= ST_NOSUID; 1269 bcopy(sp, buf, sizeof(*buf)); 1270 return (0); 1271 } 1272 1273 /* 1274 * statfs_args(char *path, struct statfs *buf) 1275 * 1276 * Get filesystem statistics. 1277 */ 1278 int 1279 sys_statvfs(struct statvfs_args *uap) 1280 { 1281 struct nlookupdata nd; 1282 struct statvfs buf; 1283 int error; 1284 1285 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1286 if (error == 0) 1287 error = kern_statvfs(&nd, &buf); 1288 nlookup_done(&nd); 1289 if (error == 0) 1290 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1291 return (error); 1292 } 1293 1294 int 1295 kern_fstatvfs(int fd, struct statvfs *buf) 1296 { 1297 struct thread *td = curthread; 1298 struct proc *p = td->td_proc; 1299 struct file *fp; 1300 struct mount *mp; 1301 struct statvfs *sp; 1302 int error; 1303 1304 KKASSERT(p); 1305 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1306 return (error); 1307 if ((mp = fp->f_nchandle.mount) == NULL) 1308 mp = ((struct vnode *)fp->f_data)->v_mount; 1309 if (mp == NULL) { 1310 error = EBADF; 1311 goto done; 1312 } 1313 if (fp->f_cred == NULL) { 1314 error = EINVAL; 1315 goto done; 1316 } 1317 sp = &mp->mnt_vstat; 1318 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1319 goto done; 1320 1321 sp->f_flag = 0; 1322 if (mp->mnt_flag & MNT_RDONLY) 1323 sp->f_flag |= ST_RDONLY; 1324 if (mp->mnt_flag & MNT_NOSUID) 1325 sp->f_flag |= ST_NOSUID; 1326 1327 bcopy(sp, buf, sizeof(*buf)); 1328 error = 0; 1329 done: 1330 fdrop(fp); 1331 return (error); 1332 } 1333 1334 /* 1335 * fstatfs_args(int fd, struct statfs *buf) 1336 * 1337 * Get filesystem statistics. 1338 */ 1339 int 1340 sys_fstatvfs(struct fstatvfs_args *uap) 1341 { 1342 struct statvfs buf; 1343 int error; 1344 1345 error = kern_fstatvfs(uap->fd, &buf); 1346 1347 if (error == 0) 1348 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1349 return (error); 1350 } 1351 1352 /* 1353 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1354 * 1355 * Get statistics on all filesystems. 1356 */ 1357 1358 struct getfsstat_info { 1359 struct statfs *sfsp; 1360 long count; 1361 long maxcount; 1362 int error; 1363 int flags; 1364 struct thread *td; 1365 }; 1366 1367 static int getfsstat_callback(struct mount *, void *); 1368 1369 int 1370 sys_getfsstat(struct getfsstat_args *uap) 1371 { 1372 struct thread *td = curthread; 1373 struct getfsstat_info info; 1374 1375 bzero(&info, sizeof(info)); 1376 1377 info.maxcount = uap->bufsize / sizeof(struct statfs); 1378 info.sfsp = uap->buf; 1379 info.count = 0; 1380 info.flags = uap->flags; 1381 info.td = td; 1382 1383 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1384 if (info.sfsp && info.count > info.maxcount) 1385 uap->sysmsg_result = info.maxcount; 1386 else 1387 uap->sysmsg_result = info.count; 1388 return (info.error); 1389 } 1390 1391 static int 1392 getfsstat_callback(struct mount *mp, void *data) 1393 { 1394 struct getfsstat_info *info = data; 1395 struct statfs *sp; 1396 char *freepath; 1397 char *fullpath; 1398 int error; 1399 1400 if (info->sfsp && info->count < info->maxcount) { 1401 if (info->td->td_proc && 1402 !chroot_visible_mnt(mp, info->td->td_proc)) { 1403 return(0); 1404 } 1405 sp = &mp->mnt_stat; 1406 1407 /* 1408 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1409 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1410 * overrides MNT_WAIT. 1411 */ 1412 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1413 (info->flags & MNT_WAIT)) && 1414 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1415 return(0); 1416 } 1417 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1418 1419 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1420 if (error) { 1421 info->error = error; 1422 return(-1); 1423 } 1424 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1425 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1426 kfree(freepath, M_TEMP); 1427 1428 error = copyout(sp, info->sfsp, sizeof(*sp)); 1429 if (error) { 1430 info->error = error; 1431 return (-1); 1432 } 1433 ++info->sfsp; 1434 } 1435 info->count++; 1436 return(0); 1437 } 1438 1439 /* 1440 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1441 long bufsize, int flags) 1442 * 1443 * Get statistics on all filesystems. 1444 */ 1445 1446 struct getvfsstat_info { 1447 struct statfs *sfsp; 1448 struct statvfs *vsfsp; 1449 long count; 1450 long maxcount; 1451 int error; 1452 int flags; 1453 struct thread *td; 1454 }; 1455 1456 static int getvfsstat_callback(struct mount *, void *); 1457 1458 int 1459 sys_getvfsstat(struct getvfsstat_args *uap) 1460 { 1461 struct thread *td = curthread; 1462 struct getvfsstat_info info; 1463 1464 bzero(&info, sizeof(info)); 1465 1466 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1467 info.sfsp = uap->buf; 1468 info.vsfsp = uap->vbuf; 1469 info.count = 0; 1470 info.flags = uap->flags; 1471 info.td = td; 1472 1473 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1474 if (info.vsfsp && info.count > info.maxcount) 1475 uap->sysmsg_result = info.maxcount; 1476 else 1477 uap->sysmsg_result = info.count; 1478 return (info.error); 1479 } 1480 1481 static int 1482 getvfsstat_callback(struct mount *mp, void *data) 1483 { 1484 struct getvfsstat_info *info = data; 1485 struct statfs *sp; 1486 struct statvfs *vsp; 1487 char *freepath; 1488 char *fullpath; 1489 int error; 1490 1491 if (info->vsfsp && info->count < info->maxcount) { 1492 if (info->td->td_proc && 1493 !chroot_visible_mnt(mp, info->td->td_proc)) { 1494 return(0); 1495 } 1496 sp = &mp->mnt_stat; 1497 vsp = &mp->mnt_vstat; 1498 1499 /* 1500 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1501 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1502 * overrides MNT_WAIT. 1503 */ 1504 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1505 (info->flags & MNT_WAIT)) && 1506 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1507 return(0); 1508 } 1509 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1510 1511 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1512 (info->flags & MNT_WAIT)) && 1513 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1514 return(0); 1515 } 1516 vsp->f_flag = 0; 1517 if (mp->mnt_flag & MNT_RDONLY) 1518 vsp->f_flag |= ST_RDONLY; 1519 if (mp->mnt_flag & MNT_NOSUID) 1520 vsp->f_flag |= ST_NOSUID; 1521 1522 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1523 if (error) { 1524 info->error = error; 1525 return(-1); 1526 } 1527 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1528 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1529 kfree(freepath, M_TEMP); 1530 1531 error = copyout(sp, info->sfsp, sizeof(*sp)); 1532 if (error == 0) 1533 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1534 if (error) { 1535 info->error = error; 1536 return (-1); 1537 } 1538 ++info->sfsp; 1539 ++info->vsfsp; 1540 } 1541 info->count++; 1542 return(0); 1543 } 1544 1545 1546 /* 1547 * fchdir_args(int fd) 1548 * 1549 * Change current working directory to a given file descriptor. 1550 */ 1551 int 1552 sys_fchdir(struct fchdir_args *uap) 1553 { 1554 struct thread *td = curthread; 1555 struct proc *p = td->td_proc; 1556 struct filedesc *fdp = p->p_fd; 1557 struct vnode *vp, *ovp; 1558 struct mount *mp; 1559 struct file *fp; 1560 struct nchandle nch, onch, tnch; 1561 int error; 1562 1563 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1564 return (error); 1565 lwkt_gettoken(&p->p_token); 1566 vp = (struct vnode *)fp->f_data; 1567 vref(vp); 1568 vn_lock(vp, LK_SHARED | LK_RETRY); 1569 if (fp->f_nchandle.ncp == NULL) 1570 error = ENOTDIR; 1571 else 1572 error = checkvp_chdir(vp, td); 1573 if (error) { 1574 vput(vp); 1575 goto done; 1576 } 1577 cache_copy(&fp->f_nchandle, &nch); 1578 1579 /* 1580 * If the ncp has become a mount point, traverse through 1581 * the mount point. 1582 */ 1583 1584 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1585 (mp = cache_findmount(&nch)) != NULL 1586 ) { 1587 error = nlookup_mp(mp, &tnch); 1588 if (error == 0) { 1589 cache_unlock(&tnch); /* leave ref intact */ 1590 vput(vp); 1591 vp = tnch.ncp->nc_vp; 1592 error = vget(vp, LK_SHARED); 1593 KKASSERT(error == 0); 1594 cache_drop(&nch); 1595 nch = tnch; 1596 } 1597 cache_dropmount(mp); 1598 } 1599 if (error == 0) { 1600 ovp = fdp->fd_cdir; 1601 onch = fdp->fd_ncdir; 1602 vn_unlock(vp); /* leave ref intact */ 1603 fdp->fd_cdir = vp; 1604 fdp->fd_ncdir = nch; 1605 cache_drop(&onch); 1606 vrele(ovp); 1607 } else { 1608 cache_drop(&nch); 1609 vput(vp); 1610 } 1611 fdrop(fp); 1612 done: 1613 lwkt_reltoken(&p->p_token); 1614 return (error); 1615 } 1616 1617 int 1618 kern_chdir(struct nlookupdata *nd) 1619 { 1620 struct thread *td = curthread; 1621 struct proc *p = td->td_proc; 1622 struct filedesc *fdp = p->p_fd; 1623 struct vnode *vp, *ovp; 1624 struct nchandle onch; 1625 int error; 1626 1627 nd->nl_flags |= NLC_SHAREDLOCK; 1628 if ((error = nlookup(nd)) != 0) 1629 return (error); 1630 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1631 return (ENOENT); 1632 if ((error = vget(vp, LK_SHARED)) != 0) 1633 return (error); 1634 1635 lwkt_gettoken(&p->p_token); 1636 error = checkvp_chdir(vp, td); 1637 vn_unlock(vp); 1638 if (error == 0) { 1639 ovp = fdp->fd_cdir; 1640 onch = fdp->fd_ncdir; 1641 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1642 fdp->fd_ncdir = nd->nl_nch; 1643 fdp->fd_cdir = vp; 1644 cache_drop(&onch); 1645 vrele(ovp); 1646 cache_zero(&nd->nl_nch); 1647 } else { 1648 vrele(vp); 1649 } 1650 lwkt_reltoken(&p->p_token); 1651 return (error); 1652 } 1653 1654 /* 1655 * chdir_args(char *path) 1656 * 1657 * Change current working directory (``.''). 1658 */ 1659 int 1660 sys_chdir(struct chdir_args *uap) 1661 { 1662 struct nlookupdata nd; 1663 int error; 1664 1665 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1666 if (error == 0) 1667 error = kern_chdir(&nd); 1668 nlookup_done(&nd); 1669 return (error); 1670 } 1671 1672 /* 1673 * Helper function for raised chroot(2) security function: Refuse if 1674 * any filedescriptors are open directories. 1675 */ 1676 static int 1677 chroot_refuse_vdir_fds(struct filedesc *fdp) 1678 { 1679 struct vnode *vp; 1680 struct file *fp; 1681 int error; 1682 int fd; 1683 1684 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1685 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1686 continue; 1687 vp = (struct vnode *)fp->f_data; 1688 if (vp->v_type != VDIR) { 1689 fdrop(fp); 1690 continue; 1691 } 1692 fdrop(fp); 1693 return(EPERM); 1694 } 1695 return (0); 1696 } 1697 1698 /* 1699 * This sysctl determines if we will allow a process to chroot(2) if it 1700 * has a directory open: 1701 * 0: disallowed for all processes. 1702 * 1: allowed for processes that were not already chroot(2)'ed. 1703 * 2: allowed for all processes. 1704 */ 1705 1706 static int chroot_allow_open_directories = 1; 1707 1708 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1709 &chroot_allow_open_directories, 0, ""); 1710 1711 /* 1712 * chroot to the specified namecache entry. We obtain the vp from the 1713 * namecache data. The passed ncp must be locked and referenced and will 1714 * remain locked and referenced on return. 1715 */ 1716 int 1717 kern_chroot(struct nchandle *nch) 1718 { 1719 struct thread *td = curthread; 1720 struct proc *p = td->td_proc; 1721 struct filedesc *fdp = p->p_fd; 1722 struct vnode *vp; 1723 int error; 1724 1725 /* 1726 * Only privileged user can chroot 1727 */ 1728 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1729 if (error) 1730 return (error); 1731 1732 /* 1733 * Disallow open directory descriptors (fchdir() breakouts). 1734 */ 1735 if (chroot_allow_open_directories == 0 || 1736 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1737 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1738 return (error); 1739 } 1740 if ((vp = nch->ncp->nc_vp) == NULL) 1741 return (ENOENT); 1742 1743 if ((error = vget(vp, LK_SHARED)) != 0) 1744 return (error); 1745 1746 /* 1747 * Check the validity of vp as a directory to change to and 1748 * associate it with rdir/jdir. 1749 */ 1750 error = checkvp_chdir(vp, td); 1751 vn_unlock(vp); /* leave reference intact */ 1752 if (error == 0) { 1753 vrele(fdp->fd_rdir); 1754 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1755 cache_drop(&fdp->fd_nrdir); 1756 cache_copy(nch, &fdp->fd_nrdir); 1757 if (fdp->fd_jdir == NULL) { 1758 fdp->fd_jdir = vp; 1759 vref(fdp->fd_jdir); 1760 cache_copy(nch, &fdp->fd_njdir); 1761 } 1762 } else { 1763 vrele(vp); 1764 } 1765 return (error); 1766 } 1767 1768 /* 1769 * chroot_args(char *path) 1770 * 1771 * Change notion of root (``/'') directory. 1772 */ 1773 int 1774 sys_chroot(struct chroot_args *uap) 1775 { 1776 struct thread *td __debugvar = curthread; 1777 struct nlookupdata nd; 1778 int error; 1779 1780 KKASSERT(td->td_proc); 1781 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1782 if (error == 0) { 1783 nd.nl_flags |= NLC_EXEC; 1784 error = nlookup(&nd); 1785 if (error == 0) 1786 error = kern_chroot(&nd.nl_nch); 1787 } 1788 nlookup_done(&nd); 1789 return(error); 1790 } 1791 1792 int 1793 sys_chroot_kernel(struct chroot_kernel_args *uap) 1794 { 1795 struct thread *td = curthread; 1796 struct nlookupdata nd; 1797 struct nchandle *nch; 1798 struct vnode *vp; 1799 int error; 1800 1801 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1802 if (error) 1803 goto error_nond; 1804 1805 error = nlookup(&nd); 1806 if (error) 1807 goto error_out; 1808 1809 nch = &nd.nl_nch; 1810 1811 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1812 if (error) 1813 goto error_out; 1814 1815 if ((vp = nch->ncp->nc_vp) == NULL) { 1816 error = ENOENT; 1817 goto error_out; 1818 } 1819 1820 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1821 goto error_out; 1822 1823 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1824 get_mplock(); 1825 vfs_cache_setroot(vp, cache_hold(nch)); 1826 rel_mplock(); 1827 1828 error_out: 1829 nlookup_done(&nd); 1830 error_nond: 1831 return(error); 1832 } 1833 1834 /* 1835 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1836 * determine whether it is legal to chdir to the vnode. The vnode's state 1837 * is not changed by this call. 1838 */ 1839 static int 1840 checkvp_chdir(struct vnode *vp, struct thread *td) 1841 { 1842 int error; 1843 1844 if (vp->v_type != VDIR) 1845 error = ENOTDIR; 1846 else 1847 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1848 return (error); 1849 } 1850 1851 int 1852 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1853 { 1854 struct thread *td = curthread; 1855 struct proc *p = td->td_proc; 1856 struct lwp *lp = td->td_lwp; 1857 struct filedesc *fdp = p->p_fd; 1858 int cmode, flags; 1859 struct file *nfp; 1860 struct file *fp; 1861 struct vnode *vp; 1862 int type, indx, error = 0; 1863 struct flock lf; 1864 1865 if ((oflags & O_ACCMODE) == O_ACCMODE) 1866 return (EINVAL); 1867 flags = FFLAGS(oflags); 1868 error = falloc(lp, &nfp, NULL); 1869 if (error) 1870 return (error); 1871 fp = nfp; 1872 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1873 1874 /* 1875 * XXX p_dupfd is a real mess. It allows a device to return a 1876 * file descriptor to be duplicated rather then doing the open 1877 * itself. 1878 */ 1879 lp->lwp_dupfd = -1; 1880 1881 /* 1882 * Call vn_open() to do the lookup and assign the vnode to the 1883 * file pointer. vn_open() does not change the ref count on fp 1884 * and the vnode, on success, will be inherited by the file pointer 1885 * and unlocked. 1886 * 1887 * Request a shared lock on the vnode if possible. 1888 */ 1889 nd->nl_flags |= NLC_LOCKVP; 1890 if ((flags & (O_CREAT|O_TRUNC)) == 0) 1891 nd->nl_flags |= NLC_SHAREDLOCK; 1892 1893 error = vn_open(nd, fp, flags, cmode); 1894 nlookup_done(nd); 1895 1896 if (error) { 1897 /* 1898 * handle special fdopen() case. bleh. dupfdopen() is 1899 * responsible for dropping the old contents of ofiles[indx] 1900 * if it succeeds. 1901 * 1902 * Note that fsetfd() will add a ref to fp which represents 1903 * the fd_files[] assignment. We must still drop our 1904 * reference. 1905 */ 1906 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1907 if (fdalloc(p, 0, &indx) == 0) { 1908 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error); 1909 if (error == 0) { 1910 *res = indx; 1911 fdrop(fp); /* our ref */ 1912 return (0); 1913 } 1914 fsetfd(fdp, NULL, indx); 1915 } 1916 } 1917 fdrop(fp); /* our ref */ 1918 if (error == ERESTART) 1919 error = EINTR; 1920 return (error); 1921 } 1922 1923 /* 1924 * ref the vnode for ourselves so it can't be ripped out from under 1925 * is. XXX need an ND flag to request that the vnode be returned 1926 * anyway. 1927 * 1928 * Reserve a file descriptor but do not assign it until the open 1929 * succeeds. 1930 */ 1931 vp = (struct vnode *)fp->f_data; 1932 vref(vp); 1933 if ((error = fdalloc(p, 0, &indx)) != 0) { 1934 fdrop(fp); 1935 vrele(vp); 1936 return (error); 1937 } 1938 1939 /* 1940 * If no error occurs the vp will have been assigned to the file 1941 * pointer. 1942 */ 1943 lp->lwp_dupfd = 0; 1944 1945 if (flags & (O_EXLOCK | O_SHLOCK)) { 1946 lf.l_whence = SEEK_SET; 1947 lf.l_start = 0; 1948 lf.l_len = 0; 1949 if (flags & O_EXLOCK) 1950 lf.l_type = F_WRLCK; 1951 else 1952 lf.l_type = F_RDLCK; 1953 if (flags & FNONBLOCK) 1954 type = 0; 1955 else 1956 type = F_WAIT; 1957 1958 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1959 /* 1960 * lock request failed. Clean up the reserved 1961 * descriptor. 1962 */ 1963 vrele(vp); 1964 fsetfd(fdp, NULL, indx); 1965 fdrop(fp); 1966 return (error); 1967 } 1968 fp->f_flag |= FHASLOCK; 1969 } 1970 #if 0 1971 /* 1972 * Assert that all regular file vnodes were created with a object. 1973 */ 1974 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1975 ("open: regular file has no backing object after vn_open")); 1976 #endif 1977 1978 vrele(vp); 1979 1980 /* 1981 * release our private reference, leaving the one associated with the 1982 * descriptor table intact. 1983 */ 1984 fsetfd(fdp, fp, indx); 1985 fdrop(fp); 1986 *res = indx; 1987 if (oflags & O_CLOEXEC) 1988 error = fsetfdflags(fdp, *res, UF_EXCLOSE); 1989 return (error); 1990 } 1991 1992 /* 1993 * open_args(char *path, int flags, int mode) 1994 * 1995 * Check permissions, allocate an open file structure, 1996 * and call the device open routine if any. 1997 */ 1998 int 1999 sys_open(struct open_args *uap) 2000 { 2001 struct nlookupdata nd; 2002 int error; 2003 2004 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2005 if (error == 0) { 2006 error = kern_open(&nd, uap->flags, 2007 uap->mode, &uap->sysmsg_result); 2008 } 2009 nlookup_done(&nd); 2010 return (error); 2011 } 2012 2013 /* 2014 * openat_args(int fd, char *path, int flags, int mode) 2015 */ 2016 int 2017 sys_openat(struct openat_args *uap) 2018 { 2019 struct nlookupdata nd; 2020 int error; 2021 struct file *fp; 2022 2023 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2024 if (error == 0) { 2025 error = kern_open(&nd, uap->flags, uap->mode, 2026 &uap->sysmsg_result); 2027 } 2028 nlookup_done_at(&nd, fp); 2029 return (error); 2030 } 2031 2032 int 2033 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2034 { 2035 struct thread *td = curthread; 2036 struct proc *p = td->td_proc; 2037 struct vnode *vp; 2038 struct vattr vattr; 2039 int error; 2040 int whiteout = 0; 2041 2042 KKASSERT(p); 2043 2044 VATTR_NULL(&vattr); 2045 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2046 vattr.va_rmajor = rmajor; 2047 vattr.va_rminor = rminor; 2048 2049 switch (mode & S_IFMT) { 2050 case S_IFMT: /* used by badsect to flag bad sectors */ 2051 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2052 vattr.va_type = VBAD; 2053 break; 2054 case S_IFCHR: 2055 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2056 vattr.va_type = VCHR; 2057 break; 2058 case S_IFBLK: 2059 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2060 vattr.va_type = VBLK; 2061 break; 2062 case S_IFWHT: 2063 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2064 whiteout = 1; 2065 break; 2066 case S_IFDIR: /* special directories support for HAMMER */ 2067 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2068 vattr.va_type = VDIR; 2069 break; 2070 default: 2071 error = EINVAL; 2072 break; 2073 } 2074 2075 if (error) 2076 return (error); 2077 2078 bwillinode(1); 2079 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2080 if ((error = nlookup(nd)) != 0) 2081 return (error); 2082 if (nd->nl_nch.ncp->nc_vp) 2083 return (EEXIST); 2084 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2085 return (error); 2086 2087 if (whiteout) { 2088 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2089 nd->nl_cred, NAMEI_CREATE); 2090 } else { 2091 vp = NULL; 2092 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2093 &vp, nd->nl_cred, &vattr); 2094 if (error == 0) 2095 vput(vp); 2096 } 2097 return (error); 2098 } 2099 2100 /* 2101 * mknod_args(char *path, int mode, int dev) 2102 * 2103 * Create a special file. 2104 */ 2105 int 2106 sys_mknod(struct mknod_args *uap) 2107 { 2108 struct nlookupdata nd; 2109 int error; 2110 2111 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2112 if (error == 0) { 2113 error = kern_mknod(&nd, uap->mode, 2114 umajor(uap->dev), uminor(uap->dev)); 2115 } 2116 nlookup_done(&nd); 2117 return (error); 2118 } 2119 2120 /* 2121 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2122 * 2123 * Create a special file. The path is relative to the directory associated 2124 * with fd. 2125 */ 2126 int 2127 sys_mknodat(struct mknodat_args *uap) 2128 { 2129 struct nlookupdata nd; 2130 struct file *fp; 2131 int error; 2132 2133 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2134 if (error == 0) { 2135 error = kern_mknod(&nd, uap->mode, 2136 umajor(uap->dev), uminor(uap->dev)); 2137 } 2138 nlookup_done_at(&nd, fp); 2139 return (error); 2140 } 2141 2142 int 2143 kern_mkfifo(struct nlookupdata *nd, int mode) 2144 { 2145 struct thread *td = curthread; 2146 struct proc *p = td->td_proc; 2147 struct vattr vattr; 2148 struct vnode *vp; 2149 int error; 2150 2151 bwillinode(1); 2152 2153 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2154 if ((error = nlookup(nd)) != 0) 2155 return (error); 2156 if (nd->nl_nch.ncp->nc_vp) 2157 return (EEXIST); 2158 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2159 return (error); 2160 2161 VATTR_NULL(&vattr); 2162 vattr.va_type = VFIFO; 2163 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2164 vp = NULL; 2165 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2166 if (error == 0) 2167 vput(vp); 2168 return (error); 2169 } 2170 2171 /* 2172 * mkfifo_args(char *path, int mode) 2173 * 2174 * Create a named pipe. 2175 */ 2176 int 2177 sys_mkfifo(struct mkfifo_args *uap) 2178 { 2179 struct nlookupdata nd; 2180 int error; 2181 2182 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2183 if (error == 0) 2184 error = kern_mkfifo(&nd, uap->mode); 2185 nlookup_done(&nd); 2186 return (error); 2187 } 2188 2189 /* 2190 * mkfifoat_args(int fd, char *path, mode_t mode) 2191 * 2192 * Create a named pipe. The path is relative to the directory associated 2193 * with fd. 2194 */ 2195 int 2196 sys_mkfifoat(struct mkfifoat_args *uap) 2197 { 2198 struct nlookupdata nd; 2199 struct file *fp; 2200 int error; 2201 2202 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2203 if (error == 0) 2204 error = kern_mkfifo(&nd, uap->mode); 2205 nlookup_done_at(&nd, fp); 2206 return (error); 2207 } 2208 2209 static int hardlink_check_uid = 0; 2210 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2211 &hardlink_check_uid, 0, 2212 "Unprivileged processes cannot create hard links to files owned by other " 2213 "users"); 2214 static int hardlink_check_gid = 0; 2215 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2216 &hardlink_check_gid, 0, 2217 "Unprivileged processes cannot create hard links to files owned by other " 2218 "groups"); 2219 2220 static int 2221 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2222 { 2223 struct vattr va; 2224 int error; 2225 2226 /* 2227 * Shortcut if disabled 2228 */ 2229 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2230 return (0); 2231 2232 /* 2233 * Privileged user can always hardlink 2234 */ 2235 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2236 return (0); 2237 2238 /* 2239 * Otherwise only if the originating file is owned by the 2240 * same user or group. Note that any group is allowed if 2241 * the file is owned by the caller. 2242 */ 2243 error = VOP_GETATTR(vp, &va); 2244 if (error != 0) 2245 return (error); 2246 2247 if (hardlink_check_uid) { 2248 if (cred->cr_uid != va.va_uid) 2249 return (EPERM); 2250 } 2251 2252 if (hardlink_check_gid) { 2253 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2254 return (EPERM); 2255 } 2256 2257 return (0); 2258 } 2259 2260 int 2261 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2262 { 2263 struct thread *td = curthread; 2264 struct vnode *vp; 2265 int error; 2266 2267 /* 2268 * Lookup the source and obtained a locked vnode. 2269 * 2270 * You may only hardlink a file which you have write permission 2271 * on or which you own. 2272 * 2273 * XXX relookup on vget failure / race ? 2274 */ 2275 bwillinode(1); 2276 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2277 if ((error = nlookup(nd)) != 0) 2278 return (error); 2279 vp = nd->nl_nch.ncp->nc_vp; 2280 KKASSERT(vp != NULL); 2281 if (vp->v_type == VDIR) 2282 return (EPERM); /* POSIX */ 2283 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2284 return (error); 2285 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2286 return (error); 2287 2288 /* 2289 * Unlock the source so we can lookup the target without deadlocking 2290 * (XXX vp is locked already, possible other deadlock?). The target 2291 * must not exist. 2292 */ 2293 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2294 nd->nl_flags &= ~NLC_NCPISLOCKED; 2295 cache_unlock(&nd->nl_nch); 2296 vn_unlock(vp); 2297 2298 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2299 if ((error = nlookup(linknd)) != 0) { 2300 vrele(vp); 2301 return (error); 2302 } 2303 if (linknd->nl_nch.ncp->nc_vp) { 2304 vrele(vp); 2305 return (EEXIST); 2306 } 2307 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2308 if (error) { 2309 vrele(vp); 2310 return (error); 2311 } 2312 2313 /* 2314 * Finally run the new API VOP. 2315 */ 2316 error = can_hardlink(vp, td, td->td_ucred); 2317 if (error == 0) { 2318 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2319 vp, linknd->nl_cred); 2320 } 2321 vput(vp); 2322 return (error); 2323 } 2324 2325 /* 2326 * link_args(char *path, char *link) 2327 * 2328 * Make a hard file link. 2329 */ 2330 int 2331 sys_link(struct link_args *uap) 2332 { 2333 struct nlookupdata nd, linknd; 2334 int error; 2335 2336 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2337 if (error == 0) { 2338 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2339 if (error == 0) 2340 error = kern_link(&nd, &linknd); 2341 nlookup_done(&linknd); 2342 } 2343 nlookup_done(&nd); 2344 return (error); 2345 } 2346 2347 /* 2348 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2349 * 2350 * Make a hard file link. The path1 argument is relative to the directory 2351 * associated with fd1, and similarly the path2 argument is relative to 2352 * the directory associated with fd2. 2353 */ 2354 int 2355 sys_linkat(struct linkat_args *uap) 2356 { 2357 struct nlookupdata nd, linknd; 2358 struct file *fp1, *fp2; 2359 int error; 2360 2361 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2362 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2363 if (error == 0) { 2364 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2365 uap->path2, UIO_USERSPACE, 0); 2366 if (error == 0) 2367 error = kern_link(&nd, &linknd); 2368 nlookup_done_at(&linknd, fp2); 2369 } 2370 nlookup_done_at(&nd, fp1); 2371 return (error); 2372 } 2373 2374 int 2375 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2376 { 2377 struct vattr vattr; 2378 struct vnode *vp; 2379 struct vnode *dvp; 2380 int error; 2381 2382 bwillinode(1); 2383 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2384 if ((error = nlookup(nd)) != 0) 2385 return (error); 2386 if (nd->nl_nch.ncp->nc_vp) 2387 return (EEXIST); 2388 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2389 return (error); 2390 dvp = nd->nl_dvp; 2391 VATTR_NULL(&vattr); 2392 vattr.va_mode = mode; 2393 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2394 if (error == 0) 2395 vput(vp); 2396 return (error); 2397 } 2398 2399 /* 2400 * symlink(char *path, char *link) 2401 * 2402 * Make a symbolic link. 2403 */ 2404 int 2405 sys_symlink(struct symlink_args *uap) 2406 { 2407 struct thread *td = curthread; 2408 struct nlookupdata nd; 2409 char *path; 2410 int error; 2411 int mode; 2412 2413 path = objcache_get(namei_oc, M_WAITOK); 2414 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2415 if (error == 0) { 2416 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2417 if (error == 0) { 2418 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2419 error = kern_symlink(&nd, path, mode); 2420 } 2421 nlookup_done(&nd); 2422 } 2423 objcache_put(namei_oc, path); 2424 return (error); 2425 } 2426 2427 /* 2428 * symlinkat_args(char *path1, int fd, char *path2) 2429 * 2430 * Make a symbolic link. The path2 argument is relative to the directory 2431 * associated with fd. 2432 */ 2433 int 2434 sys_symlinkat(struct symlinkat_args *uap) 2435 { 2436 struct thread *td = curthread; 2437 struct nlookupdata nd; 2438 struct file *fp; 2439 char *path1; 2440 int error; 2441 int mode; 2442 2443 path1 = objcache_get(namei_oc, M_WAITOK); 2444 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2445 if (error == 0) { 2446 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2447 UIO_USERSPACE, 0); 2448 if (error == 0) { 2449 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2450 error = kern_symlink(&nd, path1, mode); 2451 } 2452 nlookup_done_at(&nd, fp); 2453 } 2454 objcache_put(namei_oc, path1); 2455 return (error); 2456 } 2457 2458 /* 2459 * undelete_args(char *path) 2460 * 2461 * Delete a whiteout from the filesystem. 2462 */ 2463 int 2464 sys_undelete(struct undelete_args *uap) 2465 { 2466 struct nlookupdata nd; 2467 int error; 2468 2469 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2470 bwillinode(1); 2471 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2472 if (error == 0) 2473 error = nlookup(&nd); 2474 if (error == 0) 2475 error = ncp_writechk(&nd.nl_nch); 2476 if (error == 0) { 2477 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2478 NAMEI_DELETE); 2479 } 2480 nlookup_done(&nd); 2481 return (error); 2482 } 2483 2484 int 2485 kern_unlink(struct nlookupdata *nd) 2486 { 2487 int error; 2488 2489 bwillinode(1); 2490 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2491 if ((error = nlookup(nd)) != 0) 2492 return (error); 2493 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2494 return (error); 2495 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2496 return (error); 2497 } 2498 2499 /* 2500 * unlink_args(char *path) 2501 * 2502 * Delete a name from the filesystem. 2503 */ 2504 int 2505 sys_unlink(struct unlink_args *uap) 2506 { 2507 struct nlookupdata nd; 2508 int error; 2509 2510 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2511 if (error == 0) 2512 error = kern_unlink(&nd); 2513 nlookup_done(&nd); 2514 return (error); 2515 } 2516 2517 2518 /* 2519 * unlinkat_args(int fd, char *path, int flags) 2520 * 2521 * Delete the file or directory entry pointed to by fd/path. 2522 */ 2523 int 2524 sys_unlinkat(struct unlinkat_args *uap) 2525 { 2526 struct nlookupdata nd; 2527 struct file *fp; 2528 int error; 2529 2530 if (uap->flags & ~AT_REMOVEDIR) 2531 return (EINVAL); 2532 2533 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2534 if (error == 0) { 2535 if (uap->flags & AT_REMOVEDIR) 2536 error = kern_rmdir(&nd); 2537 else 2538 error = kern_unlink(&nd); 2539 } 2540 nlookup_done_at(&nd, fp); 2541 return (error); 2542 } 2543 2544 int 2545 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2546 { 2547 struct thread *td = curthread; 2548 struct proc *p = td->td_proc; 2549 struct file *fp; 2550 struct vnode *vp; 2551 struct vattr vattr; 2552 off_t new_offset; 2553 int error; 2554 2555 fp = holdfp(p->p_fd, fd, -1); 2556 if (fp == NULL) 2557 return (EBADF); 2558 if (fp->f_type != DTYPE_VNODE) { 2559 error = ESPIPE; 2560 goto done; 2561 } 2562 vp = (struct vnode *)fp->f_data; 2563 2564 switch (whence) { 2565 case L_INCR: 2566 spin_lock(&fp->f_spin); 2567 new_offset = fp->f_offset + offset; 2568 error = 0; 2569 break; 2570 case L_XTND: 2571 error = VOP_GETATTR(vp, &vattr); 2572 spin_lock(&fp->f_spin); 2573 new_offset = offset + vattr.va_size; 2574 break; 2575 case L_SET: 2576 new_offset = offset; 2577 error = 0; 2578 spin_lock(&fp->f_spin); 2579 break; 2580 default: 2581 new_offset = 0; 2582 error = EINVAL; 2583 spin_lock(&fp->f_spin); 2584 break; 2585 } 2586 2587 /* 2588 * Validate the seek position. Negative offsets are not allowed 2589 * for regular files or directories. 2590 * 2591 * Normally we would also not want to allow negative offsets for 2592 * character and block-special devices. However kvm addresses 2593 * on 64 bit architectures might appear to be negative and must 2594 * be allowed. 2595 */ 2596 if (error == 0) { 2597 if (new_offset < 0 && 2598 (vp->v_type == VREG || vp->v_type == VDIR)) { 2599 error = EINVAL; 2600 } else { 2601 fp->f_offset = new_offset; 2602 } 2603 } 2604 *res = fp->f_offset; 2605 spin_unlock(&fp->f_spin); 2606 done: 2607 fdrop(fp); 2608 return (error); 2609 } 2610 2611 /* 2612 * lseek_args(int fd, int pad, off_t offset, int whence) 2613 * 2614 * Reposition read/write file offset. 2615 */ 2616 int 2617 sys_lseek(struct lseek_args *uap) 2618 { 2619 int error; 2620 2621 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2622 &uap->sysmsg_offset); 2623 2624 return (error); 2625 } 2626 2627 /* 2628 * Check if current process can access given file. amode is a bitmask of *_OK 2629 * access bits. flags is a bitmask of AT_* flags. 2630 */ 2631 int 2632 kern_access(struct nlookupdata *nd, int amode, int flags) 2633 { 2634 struct vnode *vp; 2635 int error, mode; 2636 2637 if (flags & ~AT_EACCESS) 2638 return (EINVAL); 2639 nd->nl_flags |= NLC_SHAREDLOCK; 2640 if ((error = nlookup(nd)) != 0) 2641 return (error); 2642 retry: 2643 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2644 if (error) 2645 return (error); 2646 2647 /* Flags == 0 means only check for existence. */ 2648 if (amode) { 2649 mode = 0; 2650 if (amode & R_OK) 2651 mode |= VREAD; 2652 if (amode & W_OK) 2653 mode |= VWRITE; 2654 if (amode & X_OK) 2655 mode |= VEXEC; 2656 if ((mode & VWRITE) == 0 || 2657 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2658 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2659 2660 /* 2661 * If the file handle is stale we have to re-resolve the 2662 * entry with the ncp held exclusively. This is a hack 2663 * at the moment. 2664 */ 2665 if (error == ESTALE) { 2666 vput(vp); 2667 cache_unlock(&nd->nl_nch); 2668 cache_lock(&nd->nl_nch); 2669 cache_setunresolved(&nd->nl_nch); 2670 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2671 if (error == 0) { 2672 vp = NULL; 2673 goto retry; 2674 } 2675 return(error); 2676 } 2677 } 2678 vput(vp); 2679 return (error); 2680 } 2681 2682 /* 2683 * access_args(char *path, int flags) 2684 * 2685 * Check access permissions. 2686 */ 2687 int 2688 sys_access(struct access_args *uap) 2689 { 2690 struct nlookupdata nd; 2691 int error; 2692 2693 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2694 if (error == 0) 2695 error = kern_access(&nd, uap->flags, 0); 2696 nlookup_done(&nd); 2697 return (error); 2698 } 2699 2700 2701 /* 2702 * eaccess_args(char *path, int flags) 2703 * 2704 * Check access permissions. 2705 */ 2706 int 2707 sys_eaccess(struct eaccess_args *uap) 2708 { 2709 struct nlookupdata nd; 2710 int error; 2711 2712 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2713 if (error == 0) 2714 error = kern_access(&nd, uap->flags, AT_EACCESS); 2715 nlookup_done(&nd); 2716 return (error); 2717 } 2718 2719 2720 /* 2721 * faccessat_args(int fd, char *path, int amode, int flags) 2722 * 2723 * Check access permissions. 2724 */ 2725 int 2726 sys_faccessat(struct faccessat_args *uap) 2727 { 2728 struct nlookupdata nd; 2729 struct file *fp; 2730 int error; 2731 2732 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2733 NLC_FOLLOW); 2734 if (error == 0) 2735 error = kern_access(&nd, uap->amode, uap->flags); 2736 nlookup_done_at(&nd, fp); 2737 return (error); 2738 } 2739 2740 int 2741 kern_stat(struct nlookupdata *nd, struct stat *st) 2742 { 2743 int error; 2744 struct vnode *vp; 2745 2746 nd->nl_flags |= NLC_SHAREDLOCK; 2747 if ((error = nlookup(nd)) != 0) 2748 return (error); 2749 again: 2750 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2751 return (ENOENT); 2752 2753 if ((error = vget(vp, LK_SHARED)) != 0) 2754 return (error); 2755 error = vn_stat(vp, st, nd->nl_cred); 2756 2757 /* 2758 * If the file handle is stale we have to re-resolve the 2759 * entry with the ncp held exclusively. This is a hack 2760 * at the moment. 2761 */ 2762 if (error == ESTALE) { 2763 vput(vp); 2764 cache_unlock(&nd->nl_nch); 2765 cache_lock(&nd->nl_nch); 2766 cache_setunresolved(&nd->nl_nch); 2767 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2768 if (error == 0) 2769 goto again; 2770 } else { 2771 vput(vp); 2772 } 2773 return (error); 2774 } 2775 2776 /* 2777 * stat_args(char *path, struct stat *ub) 2778 * 2779 * Get file status; this version follows links. 2780 */ 2781 int 2782 sys_stat(struct stat_args *uap) 2783 { 2784 struct nlookupdata nd; 2785 struct stat st; 2786 int error; 2787 2788 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2789 if (error == 0) { 2790 error = kern_stat(&nd, &st); 2791 if (error == 0) 2792 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2793 } 2794 nlookup_done(&nd); 2795 return (error); 2796 } 2797 2798 /* 2799 * lstat_args(char *path, struct stat *ub) 2800 * 2801 * Get file status; this version does not follow links. 2802 */ 2803 int 2804 sys_lstat(struct lstat_args *uap) 2805 { 2806 struct nlookupdata nd; 2807 struct stat st; 2808 int error; 2809 2810 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2811 if (error == 0) { 2812 error = kern_stat(&nd, &st); 2813 if (error == 0) 2814 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2815 } 2816 nlookup_done(&nd); 2817 return (error); 2818 } 2819 2820 /* 2821 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2822 * 2823 * Get status of file pointed to by fd/path. 2824 */ 2825 int 2826 sys_fstatat(struct fstatat_args *uap) 2827 { 2828 struct nlookupdata nd; 2829 struct stat st; 2830 int error; 2831 int flags; 2832 struct file *fp; 2833 2834 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2835 return (EINVAL); 2836 2837 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2838 2839 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2840 UIO_USERSPACE, flags); 2841 if (error == 0) { 2842 error = kern_stat(&nd, &st); 2843 if (error == 0) 2844 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2845 } 2846 nlookup_done_at(&nd, fp); 2847 return (error); 2848 } 2849 2850 static int 2851 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 2852 { 2853 struct nlookupdata nd; 2854 struct vnode *vp; 2855 int error; 2856 2857 vp = NULL; 2858 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 2859 if (error == 0) 2860 error = nlookup(&nd); 2861 if (error == 0) 2862 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2863 nlookup_done(&nd); 2864 if (error == 0) { 2865 error = VOP_PATHCONF(vp, name, sysmsg_regp); 2866 vput(vp); 2867 } 2868 return (error); 2869 } 2870 2871 /* 2872 * pathconf_Args(char *path, int name) 2873 * 2874 * Get configurable pathname variables. 2875 */ 2876 int 2877 sys_pathconf(struct pathconf_args *uap) 2878 { 2879 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 2880 &uap->sysmsg_reg)); 2881 } 2882 2883 /* 2884 * lpathconf_Args(char *path, int name) 2885 * 2886 * Get configurable pathname variables, but don't follow symlinks. 2887 */ 2888 int 2889 sys_lpathconf(struct lpathconf_args *uap) 2890 { 2891 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 2892 } 2893 2894 /* 2895 * XXX: daver 2896 * kern_readlink isn't properly split yet. There is a copyin burried 2897 * in VOP_READLINK(). 2898 */ 2899 int 2900 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2901 { 2902 struct thread *td = curthread; 2903 struct vnode *vp; 2904 struct iovec aiov; 2905 struct uio auio; 2906 int error; 2907 2908 nd->nl_flags |= NLC_SHAREDLOCK; 2909 if ((error = nlookup(nd)) != 0) 2910 return (error); 2911 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2912 if (error) 2913 return (error); 2914 if (vp->v_type != VLNK) { 2915 error = EINVAL; 2916 } else { 2917 aiov.iov_base = buf; 2918 aiov.iov_len = count; 2919 auio.uio_iov = &aiov; 2920 auio.uio_iovcnt = 1; 2921 auio.uio_offset = 0; 2922 auio.uio_rw = UIO_READ; 2923 auio.uio_segflg = UIO_USERSPACE; 2924 auio.uio_td = td; 2925 auio.uio_resid = count; 2926 error = VOP_READLINK(vp, &auio, td->td_ucred); 2927 } 2928 vput(vp); 2929 *res = count - auio.uio_resid; 2930 return (error); 2931 } 2932 2933 /* 2934 * readlink_args(char *path, char *buf, int count) 2935 * 2936 * Return target name of a symbolic link. 2937 */ 2938 int 2939 sys_readlink(struct readlink_args *uap) 2940 { 2941 struct nlookupdata nd; 2942 int error; 2943 2944 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2945 if (error == 0) { 2946 error = kern_readlink(&nd, uap->buf, uap->count, 2947 &uap->sysmsg_result); 2948 } 2949 nlookup_done(&nd); 2950 return (error); 2951 } 2952 2953 /* 2954 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 2955 * 2956 * Return target name of a symbolic link. The path is relative to the 2957 * directory associated with fd. 2958 */ 2959 int 2960 sys_readlinkat(struct readlinkat_args *uap) 2961 { 2962 struct nlookupdata nd; 2963 struct file *fp; 2964 int error; 2965 2966 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2967 if (error == 0) { 2968 error = kern_readlink(&nd, uap->buf, uap->bufsize, 2969 &uap->sysmsg_result); 2970 } 2971 nlookup_done_at(&nd, fp); 2972 return (error); 2973 } 2974 2975 static int 2976 setfflags(struct vnode *vp, int flags) 2977 { 2978 struct thread *td = curthread; 2979 int error; 2980 struct vattr vattr; 2981 2982 /* 2983 * Prevent non-root users from setting flags on devices. When 2984 * a device is reused, users can retain ownership of the device 2985 * if they are allowed to set flags and programs assume that 2986 * chown can't fail when done as root. 2987 */ 2988 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2989 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 2990 return (error); 2991 2992 /* 2993 * note: vget is required for any operation that might mod the vnode 2994 * so VINACTIVE is properly cleared. 2995 */ 2996 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2997 VATTR_NULL(&vattr); 2998 vattr.va_flags = flags; 2999 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3000 vput(vp); 3001 } 3002 return (error); 3003 } 3004 3005 /* 3006 * chflags(char *path, int flags) 3007 * 3008 * Change flags of a file given a path name. 3009 */ 3010 int 3011 sys_chflags(struct chflags_args *uap) 3012 { 3013 struct nlookupdata nd; 3014 struct vnode *vp; 3015 int error; 3016 3017 vp = NULL; 3018 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3019 if (error == 0) 3020 error = nlookup(&nd); 3021 if (error == 0) 3022 error = ncp_writechk(&nd.nl_nch); 3023 if (error == 0) 3024 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3025 nlookup_done(&nd); 3026 if (error == 0) { 3027 error = setfflags(vp, uap->flags); 3028 vrele(vp); 3029 } 3030 return (error); 3031 } 3032 3033 /* 3034 * lchflags(char *path, int flags) 3035 * 3036 * Change flags of a file given a path name, but don't follow symlinks. 3037 */ 3038 int 3039 sys_lchflags(struct lchflags_args *uap) 3040 { 3041 struct nlookupdata nd; 3042 struct vnode *vp; 3043 int error; 3044 3045 vp = NULL; 3046 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3047 if (error == 0) 3048 error = nlookup(&nd); 3049 if (error == 0) 3050 error = ncp_writechk(&nd.nl_nch); 3051 if (error == 0) 3052 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3053 nlookup_done(&nd); 3054 if (error == 0) { 3055 error = setfflags(vp, uap->flags); 3056 vrele(vp); 3057 } 3058 return (error); 3059 } 3060 3061 /* 3062 * fchflags_args(int fd, int flags) 3063 * 3064 * Change flags of a file given a file descriptor. 3065 */ 3066 int 3067 sys_fchflags(struct fchflags_args *uap) 3068 { 3069 struct thread *td = curthread; 3070 struct proc *p = td->td_proc; 3071 struct file *fp; 3072 int error; 3073 3074 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3075 return (error); 3076 if (fp->f_nchandle.ncp) 3077 error = ncp_writechk(&fp->f_nchandle); 3078 if (error == 0) 3079 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3080 fdrop(fp); 3081 return (error); 3082 } 3083 3084 /* 3085 * chflagsat_args(int fd, const char *path, int flags, int atflags) 3086 * change flags given a pathname relative to a filedescriptor 3087 */ 3088 int sys_chflagsat(struct chflagsat_args *uap) 3089 { 3090 struct nlookupdata nd; 3091 struct vnode *vp; 3092 struct file *fp; 3093 int error; 3094 int lookupflags; 3095 3096 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3097 return (EINVAL); 3098 3099 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3100 3101 vp = NULL; 3102 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3103 if (error == 0) 3104 error = nlookup(&nd); 3105 if (error == 0) 3106 error = ncp_writechk(&nd.nl_nch); 3107 if (error == 0) 3108 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3109 nlookup_done_at(&nd, fp); 3110 if (error == 0) { 3111 error = setfflags(vp, uap->flags); 3112 vrele(vp); 3113 } 3114 return (error); 3115 } 3116 3117 3118 static int 3119 setfmode(struct vnode *vp, int mode) 3120 { 3121 struct thread *td = curthread; 3122 int error; 3123 struct vattr vattr; 3124 3125 /* 3126 * note: vget is required for any operation that might mod the vnode 3127 * so VINACTIVE is properly cleared. 3128 */ 3129 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3130 VATTR_NULL(&vattr); 3131 vattr.va_mode = mode & ALLPERMS; 3132 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3133 vput(vp); 3134 } 3135 return error; 3136 } 3137 3138 int 3139 kern_chmod(struct nlookupdata *nd, int mode) 3140 { 3141 struct vnode *vp; 3142 int error; 3143 3144 if ((error = nlookup(nd)) != 0) 3145 return (error); 3146 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3147 return (error); 3148 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3149 error = setfmode(vp, mode); 3150 vrele(vp); 3151 return (error); 3152 } 3153 3154 /* 3155 * chmod_args(char *path, int mode) 3156 * 3157 * Change mode of a file given path name. 3158 */ 3159 int 3160 sys_chmod(struct chmod_args *uap) 3161 { 3162 struct nlookupdata nd; 3163 int error; 3164 3165 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3166 if (error == 0) 3167 error = kern_chmod(&nd, uap->mode); 3168 nlookup_done(&nd); 3169 return (error); 3170 } 3171 3172 /* 3173 * lchmod_args(char *path, int mode) 3174 * 3175 * Change mode of a file given path name (don't follow links.) 3176 */ 3177 int 3178 sys_lchmod(struct lchmod_args *uap) 3179 { 3180 struct nlookupdata nd; 3181 int error; 3182 3183 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3184 if (error == 0) 3185 error = kern_chmod(&nd, uap->mode); 3186 nlookup_done(&nd); 3187 return (error); 3188 } 3189 3190 /* 3191 * fchmod_args(int fd, int mode) 3192 * 3193 * Change mode of a file given a file descriptor. 3194 */ 3195 int 3196 sys_fchmod(struct fchmod_args *uap) 3197 { 3198 struct thread *td = curthread; 3199 struct proc *p = td->td_proc; 3200 struct file *fp; 3201 int error; 3202 3203 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3204 return (error); 3205 if (fp->f_nchandle.ncp) 3206 error = ncp_writechk(&fp->f_nchandle); 3207 if (error == 0) 3208 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3209 fdrop(fp); 3210 return (error); 3211 } 3212 3213 /* 3214 * fchmodat_args(char *path, int mode) 3215 * 3216 * Change mode of a file pointed to by fd/path. 3217 */ 3218 int 3219 sys_fchmodat(struct fchmodat_args *uap) 3220 { 3221 struct nlookupdata nd; 3222 struct file *fp; 3223 int error; 3224 int flags; 3225 3226 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3227 return (EINVAL); 3228 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3229 3230 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3231 UIO_USERSPACE, flags); 3232 if (error == 0) 3233 error = kern_chmod(&nd, uap->mode); 3234 nlookup_done_at(&nd, fp); 3235 return (error); 3236 } 3237 3238 static int 3239 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3240 { 3241 struct thread *td = curthread; 3242 int error; 3243 struct vattr vattr; 3244 uid_t o_uid; 3245 gid_t o_gid; 3246 uint64_t size; 3247 3248 /* 3249 * note: vget is required for any operation that might mod the vnode 3250 * so VINACTIVE is properly cleared. 3251 */ 3252 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3253 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3254 return error; 3255 o_uid = vattr.va_uid; 3256 o_gid = vattr.va_gid; 3257 size = vattr.va_size; 3258 3259 VATTR_NULL(&vattr); 3260 vattr.va_uid = uid; 3261 vattr.va_gid = gid; 3262 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3263 vput(vp); 3264 } 3265 3266 if (error == 0) { 3267 if (uid == -1) 3268 uid = o_uid; 3269 if (gid == -1) 3270 gid = o_gid; 3271 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3272 VFS_ACCOUNT(mp, uid, gid, size); 3273 } 3274 3275 return error; 3276 } 3277 3278 int 3279 kern_chown(struct nlookupdata *nd, int uid, int gid) 3280 { 3281 struct vnode *vp; 3282 int error; 3283 3284 if ((error = nlookup(nd)) != 0) 3285 return (error); 3286 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3287 return (error); 3288 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3289 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3290 vrele(vp); 3291 return (error); 3292 } 3293 3294 /* 3295 * chown(char *path, int uid, int gid) 3296 * 3297 * Set ownership given a path name. 3298 */ 3299 int 3300 sys_chown(struct chown_args *uap) 3301 { 3302 struct nlookupdata nd; 3303 int error; 3304 3305 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3306 if (error == 0) 3307 error = kern_chown(&nd, uap->uid, uap->gid); 3308 nlookup_done(&nd); 3309 return (error); 3310 } 3311 3312 /* 3313 * lchown_args(char *path, int uid, int gid) 3314 * 3315 * Set ownership given a path name, do not cross symlinks. 3316 */ 3317 int 3318 sys_lchown(struct lchown_args *uap) 3319 { 3320 struct nlookupdata nd; 3321 int error; 3322 3323 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3324 if (error == 0) 3325 error = kern_chown(&nd, uap->uid, uap->gid); 3326 nlookup_done(&nd); 3327 return (error); 3328 } 3329 3330 /* 3331 * fchown_args(int fd, int uid, int gid) 3332 * 3333 * Set ownership given a file descriptor. 3334 */ 3335 int 3336 sys_fchown(struct fchown_args *uap) 3337 { 3338 struct thread *td = curthread; 3339 struct proc *p = td->td_proc; 3340 struct file *fp; 3341 int error; 3342 3343 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3344 return (error); 3345 if (fp->f_nchandle.ncp) 3346 error = ncp_writechk(&fp->f_nchandle); 3347 if (error == 0) 3348 error = setfown(p->p_fd->fd_ncdir.mount, 3349 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3350 fdrop(fp); 3351 return (error); 3352 } 3353 3354 /* 3355 * fchownat(int fd, char *path, int uid, int gid, int flags) 3356 * 3357 * Set ownership of file pointed to by fd/path. 3358 */ 3359 int 3360 sys_fchownat(struct fchownat_args *uap) 3361 { 3362 struct nlookupdata nd; 3363 struct file *fp; 3364 int error; 3365 int flags; 3366 3367 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3368 return (EINVAL); 3369 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3370 3371 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3372 UIO_USERSPACE, flags); 3373 if (error == 0) 3374 error = kern_chown(&nd, uap->uid, uap->gid); 3375 nlookup_done_at(&nd, fp); 3376 return (error); 3377 } 3378 3379 3380 static int 3381 getutimes(const struct timeval *tvp, struct timespec *tsp) 3382 { 3383 struct timeval tv[2]; 3384 3385 if (tvp == NULL) { 3386 microtime(&tv[0]); 3387 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3388 tsp[1] = tsp[0]; 3389 } else { 3390 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3391 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3392 } 3393 return 0; 3394 } 3395 3396 static int 3397 setutimes(struct vnode *vp, struct vattr *vattr, 3398 const struct timespec *ts, int nullflag) 3399 { 3400 struct thread *td = curthread; 3401 int error; 3402 3403 VATTR_NULL(vattr); 3404 vattr->va_atime = ts[0]; 3405 vattr->va_mtime = ts[1]; 3406 if (nullflag) 3407 vattr->va_vaflags |= VA_UTIMES_NULL; 3408 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3409 3410 return error; 3411 } 3412 3413 int 3414 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3415 { 3416 struct timespec ts[2]; 3417 struct vnode *vp; 3418 struct vattr vattr; 3419 int error; 3420 3421 if ((error = getutimes(tptr, ts)) != 0) 3422 return (error); 3423 3424 /* 3425 * NOTE: utimes() succeeds for the owner even if the file 3426 * is not user-writable. 3427 */ 3428 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3429 3430 if ((error = nlookup(nd)) != 0) 3431 return (error); 3432 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3433 return (error); 3434 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3435 return (error); 3436 3437 /* 3438 * note: vget is required for any operation that might mod the vnode 3439 * so VINACTIVE is properly cleared. 3440 */ 3441 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3442 error = vget(vp, LK_EXCLUSIVE); 3443 if (error == 0) { 3444 error = setutimes(vp, &vattr, ts, (tptr == NULL)); 3445 vput(vp); 3446 } 3447 } 3448 vrele(vp); 3449 return (error); 3450 } 3451 3452 /* 3453 * utimes_args(char *path, struct timeval *tptr) 3454 * 3455 * Set the access and modification times of a file. 3456 */ 3457 int 3458 sys_utimes(struct utimes_args *uap) 3459 { 3460 struct timeval tv[2]; 3461 struct nlookupdata nd; 3462 int error; 3463 3464 if (uap->tptr) { 3465 error = copyin(uap->tptr, tv, sizeof(tv)); 3466 if (error) 3467 return (error); 3468 } 3469 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3470 if (error == 0) 3471 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3472 nlookup_done(&nd); 3473 return (error); 3474 } 3475 3476 /* 3477 * lutimes_args(char *path, struct timeval *tptr) 3478 * 3479 * Set the access and modification times of a file. 3480 */ 3481 int 3482 sys_lutimes(struct lutimes_args *uap) 3483 { 3484 struct timeval tv[2]; 3485 struct nlookupdata nd; 3486 int error; 3487 3488 if (uap->tptr) { 3489 error = copyin(uap->tptr, tv, sizeof(tv)); 3490 if (error) 3491 return (error); 3492 } 3493 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3494 if (error == 0) 3495 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3496 nlookup_done(&nd); 3497 return (error); 3498 } 3499 3500 /* 3501 * Set utimes on a file descriptor. The creds used to open the 3502 * file are used to determine whether the operation is allowed 3503 * or not. 3504 */ 3505 int 3506 kern_futimes(int fd, struct timeval *tptr) 3507 { 3508 struct thread *td = curthread; 3509 struct proc *p = td->td_proc; 3510 struct timespec ts[2]; 3511 struct file *fp; 3512 struct vnode *vp; 3513 struct vattr vattr; 3514 int error; 3515 3516 error = getutimes(tptr, ts); 3517 if (error) 3518 return (error); 3519 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3520 return (error); 3521 if (fp->f_nchandle.ncp) 3522 error = ncp_writechk(&fp->f_nchandle); 3523 if (error == 0) { 3524 vp = fp->f_data; 3525 error = vget(vp, LK_EXCLUSIVE); 3526 if (error == 0) { 3527 error = VOP_GETATTR(vp, &vattr); 3528 if (error == 0) { 3529 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3530 fp->f_cred); 3531 } 3532 if (error == 0) { 3533 error = setutimes(vp, &vattr, ts, 3534 (tptr == NULL)); 3535 } 3536 vput(vp); 3537 } 3538 } 3539 fdrop(fp); 3540 return (error); 3541 } 3542 3543 /* 3544 * futimes_args(int fd, struct timeval *tptr) 3545 * 3546 * Set the access and modification times of a file. 3547 */ 3548 int 3549 sys_futimes(struct futimes_args *uap) 3550 { 3551 struct timeval tv[2]; 3552 int error; 3553 3554 if (uap->tptr) { 3555 error = copyin(uap->tptr, tv, sizeof(tv)); 3556 if (error) 3557 return (error); 3558 } 3559 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3560 3561 return (error); 3562 } 3563 3564 int 3565 kern_truncate(struct nlookupdata *nd, off_t length) 3566 { 3567 struct vnode *vp; 3568 struct vattr vattr; 3569 int error; 3570 uid_t uid = 0; 3571 gid_t gid = 0; 3572 uint64_t old_size = 0; 3573 3574 if (length < 0) 3575 return(EINVAL); 3576 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3577 if ((error = nlookup(nd)) != 0) 3578 return (error); 3579 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3580 return (error); 3581 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3582 return (error); 3583 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3584 if (error) { 3585 vrele(vp); 3586 return (error); 3587 } 3588 if (vp->v_type == VDIR) { 3589 error = EISDIR; 3590 goto done; 3591 } 3592 if (vfs_quota_enabled) { 3593 error = VOP_GETATTR(vp, &vattr); 3594 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3595 uid = vattr.va_uid; 3596 gid = vattr.va_gid; 3597 old_size = vattr.va_size; 3598 } 3599 3600 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3601 VATTR_NULL(&vattr); 3602 vattr.va_size = length; 3603 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3604 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3605 } 3606 done: 3607 vput(vp); 3608 return (error); 3609 } 3610 3611 /* 3612 * truncate(char *path, int pad, off_t length) 3613 * 3614 * Truncate a file given its path name. 3615 */ 3616 int 3617 sys_truncate(struct truncate_args *uap) 3618 { 3619 struct nlookupdata nd; 3620 int error; 3621 3622 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3623 if (error == 0) 3624 error = kern_truncate(&nd, uap->length); 3625 nlookup_done(&nd); 3626 return error; 3627 } 3628 3629 int 3630 kern_ftruncate(int fd, off_t length) 3631 { 3632 struct thread *td = curthread; 3633 struct proc *p = td->td_proc; 3634 struct vattr vattr; 3635 struct vnode *vp; 3636 struct file *fp; 3637 int error; 3638 uid_t uid = 0; 3639 gid_t gid = 0; 3640 uint64_t old_size = 0; 3641 struct mount *mp; 3642 3643 if (length < 0) 3644 return(EINVAL); 3645 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3646 return (error); 3647 if (fp->f_nchandle.ncp) { 3648 error = ncp_writechk(&fp->f_nchandle); 3649 if (error) 3650 goto done; 3651 } 3652 if ((fp->f_flag & FWRITE) == 0) { 3653 error = EINVAL; 3654 goto done; 3655 } 3656 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3657 error = EINVAL; 3658 goto done; 3659 } 3660 vp = (struct vnode *)fp->f_data; 3661 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3662 if (vp->v_type == VDIR) { 3663 error = EISDIR; 3664 vn_unlock(vp); 3665 goto done; 3666 } 3667 3668 if (vfs_quota_enabled) { 3669 error = VOP_GETATTR(vp, &vattr); 3670 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 3671 uid = vattr.va_uid; 3672 gid = vattr.va_gid; 3673 old_size = vattr.va_size; 3674 } 3675 3676 if ((error = vn_writechk(vp, NULL)) == 0) { 3677 VATTR_NULL(&vattr); 3678 vattr.va_size = length; 3679 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3680 mp = vq_vptomp(vp); 3681 VFS_ACCOUNT(mp, uid, gid, length - old_size); 3682 } 3683 vn_unlock(vp); 3684 done: 3685 fdrop(fp); 3686 return (error); 3687 } 3688 3689 /* 3690 * ftruncate_args(int fd, int pad, off_t length) 3691 * 3692 * Truncate a file given a file descriptor. 3693 */ 3694 int 3695 sys_ftruncate(struct ftruncate_args *uap) 3696 { 3697 int error; 3698 3699 error = kern_ftruncate(uap->fd, uap->length); 3700 3701 return (error); 3702 } 3703 3704 /* 3705 * fsync(int fd) 3706 * 3707 * Sync an open file. 3708 */ 3709 int 3710 sys_fsync(struct fsync_args *uap) 3711 { 3712 struct thread *td = curthread; 3713 struct proc *p = td->td_proc; 3714 struct vnode *vp; 3715 struct file *fp; 3716 vm_object_t obj; 3717 int error; 3718 3719 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3720 return (error); 3721 vp = (struct vnode *)fp->f_data; 3722 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3723 if ((obj = vp->v_object) != NULL) { 3724 if (vp->v_mount == NULL || 3725 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 3726 vm_object_page_clean(obj, 0, 0, 0); 3727 } 3728 } 3729 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3730 if (error == 0 && vp->v_mount) 3731 error = buf_fsync(vp); 3732 vn_unlock(vp); 3733 fdrop(fp); 3734 3735 return (error); 3736 } 3737 3738 int 3739 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3740 { 3741 struct nchandle fnchd; 3742 struct nchandle tnchd; 3743 struct namecache *ncp; 3744 struct vnode *fdvp; 3745 struct vnode *tdvp; 3746 struct mount *mp; 3747 int error; 3748 3749 bwillinode(1); 3750 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3751 if ((error = nlookup(fromnd)) != 0) 3752 return (error); 3753 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3754 return (ENOENT); 3755 fnchd.mount = fromnd->nl_nch.mount; 3756 cache_hold(&fnchd); 3757 3758 /* 3759 * unlock the source nch so we can lookup the target nch without 3760 * deadlocking. The target may or may not exist so we do not check 3761 * for a target vp like kern_mkdir() and other creation functions do. 3762 * 3763 * The source and target directories are ref'd and rechecked after 3764 * everything is relocked to determine if the source or target file 3765 * has been renamed. 3766 */ 3767 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3768 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3769 cache_unlock(&fromnd->nl_nch); 3770 3771 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 3772 if ((error = nlookup(tond)) != 0) { 3773 cache_drop(&fnchd); 3774 return (error); 3775 } 3776 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3777 cache_drop(&fnchd); 3778 return (ENOENT); 3779 } 3780 tnchd.mount = tond->nl_nch.mount; 3781 cache_hold(&tnchd); 3782 3783 /* 3784 * If the source and target are the same there is nothing to do 3785 */ 3786 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3787 cache_drop(&fnchd); 3788 cache_drop(&tnchd); 3789 return (0); 3790 } 3791 3792 /* 3793 * Mount points cannot be renamed or overwritten 3794 */ 3795 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3796 NCF_ISMOUNTPT 3797 ) { 3798 cache_drop(&fnchd); 3799 cache_drop(&tnchd); 3800 return (EINVAL); 3801 } 3802 3803 /* 3804 * Relock the source ncp. cache_relock() will deal with any 3805 * deadlocks against the already-locked tond and will also 3806 * make sure both are resolved. 3807 * 3808 * NOTE AFTER RELOCKING: The source or target ncp may have become 3809 * invalid while they were unlocked, nc_vp and nc_mount could 3810 * be NULL. 3811 */ 3812 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 3813 &tond->nl_nch, tond->nl_cred); 3814 fromnd->nl_flags |= NLC_NCPISLOCKED; 3815 3816 /* 3817 * If either fromnd or tond are marked destroyed a ripout occured 3818 * out from under us and we must retry. 3819 */ 3820 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 3821 fromnd->nl_nch.ncp->nc_vp == NULL || 3822 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 3823 kprintf("kern_rename: retry due to ripout on: " 3824 "\"%s\" -> \"%s\"\n", 3825 fromnd->nl_nch.ncp->nc_name, 3826 tond->nl_nch.ncp->nc_name); 3827 cache_drop(&fnchd); 3828 cache_drop(&tnchd); 3829 return (EAGAIN); 3830 } 3831 3832 /* 3833 * make sure the parent directories linkages are the same 3834 */ 3835 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3836 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3837 cache_drop(&fnchd); 3838 cache_drop(&tnchd); 3839 return (ENOENT); 3840 } 3841 3842 /* 3843 * Both the source and target must be within the same filesystem and 3844 * in the same filesystem as their parent directories within the 3845 * namecache topology. 3846 * 3847 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3848 */ 3849 mp = fnchd.mount; 3850 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3851 mp != tond->nl_nch.mount) { 3852 cache_drop(&fnchd); 3853 cache_drop(&tnchd); 3854 return (EXDEV); 3855 } 3856 3857 /* 3858 * Make sure the mount point is writable 3859 */ 3860 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3861 cache_drop(&fnchd); 3862 cache_drop(&tnchd); 3863 return (error); 3864 } 3865 3866 /* 3867 * If the target exists and either the source or target is a directory, 3868 * then both must be directories. 3869 * 3870 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3871 * have become NULL. 3872 */ 3873 if (tond->nl_nch.ncp->nc_vp) { 3874 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3875 error = ENOENT; 3876 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3877 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3878 error = ENOTDIR; 3879 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3880 error = EISDIR; 3881 } 3882 } 3883 3884 /* 3885 * You cannot rename a source into itself or a subdirectory of itself. 3886 * We check this by travsersing the target directory upwards looking 3887 * for a match against the source. 3888 * 3889 * XXX MPSAFE 3890 */ 3891 if (error == 0) { 3892 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3893 if (fromnd->nl_nch.ncp == ncp) { 3894 error = EINVAL; 3895 break; 3896 } 3897 } 3898 } 3899 3900 cache_drop(&fnchd); 3901 cache_drop(&tnchd); 3902 3903 /* 3904 * Even though the namespaces are different, they may still represent 3905 * hardlinks to the same file. The filesystem might have a hard time 3906 * with this so we issue a NREMOVE of the source instead of a NRENAME 3907 * when we detect the situation. 3908 */ 3909 if (error == 0) { 3910 fdvp = fromnd->nl_dvp; 3911 tdvp = tond->nl_dvp; 3912 if (fdvp == NULL || tdvp == NULL) { 3913 error = EPERM; 3914 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3915 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3916 fromnd->nl_cred); 3917 } else { 3918 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3919 fdvp, tdvp, tond->nl_cred); 3920 } 3921 } 3922 return (error); 3923 } 3924 3925 /* 3926 * rename_args(char *from, char *to) 3927 * 3928 * Rename files. Source and destination must either both be directories, 3929 * or both not be directories. If target is a directory, it must be empty. 3930 */ 3931 int 3932 sys_rename(struct rename_args *uap) 3933 { 3934 struct nlookupdata fromnd, tond; 3935 int error; 3936 3937 do { 3938 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3939 if (error == 0) { 3940 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3941 if (error == 0) 3942 error = kern_rename(&fromnd, &tond); 3943 nlookup_done(&tond); 3944 } 3945 nlookup_done(&fromnd); 3946 } while (error == EAGAIN); 3947 return (error); 3948 } 3949 3950 /* 3951 * renameat_args(int oldfd, char *old, int newfd, char *new) 3952 * 3953 * Rename files using paths relative to the directories associated with 3954 * oldfd and newfd. Source and destination must either both be directories, 3955 * or both not be directories. If target is a directory, it must be empty. 3956 */ 3957 int 3958 sys_renameat(struct renameat_args *uap) 3959 { 3960 struct nlookupdata oldnd, newnd; 3961 struct file *oldfp, *newfp; 3962 int error; 3963 3964 do { 3965 error = nlookup_init_at(&oldnd, &oldfp, 3966 uap->oldfd, uap->old, 3967 UIO_USERSPACE, 0); 3968 if (error == 0) { 3969 error = nlookup_init_at(&newnd, &newfp, 3970 uap->newfd, uap->new, 3971 UIO_USERSPACE, 0); 3972 if (error == 0) 3973 error = kern_rename(&oldnd, &newnd); 3974 nlookup_done_at(&newnd, newfp); 3975 } 3976 nlookup_done_at(&oldnd, oldfp); 3977 } while (error == EAGAIN); 3978 return (error); 3979 } 3980 3981 int 3982 kern_mkdir(struct nlookupdata *nd, int mode) 3983 { 3984 struct thread *td = curthread; 3985 struct proc *p = td->td_proc; 3986 struct vnode *vp; 3987 struct vattr vattr; 3988 int error; 3989 3990 bwillinode(1); 3991 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 3992 if ((error = nlookup(nd)) != 0) 3993 return (error); 3994 3995 if (nd->nl_nch.ncp->nc_vp) 3996 return (EEXIST); 3997 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3998 return (error); 3999 VATTR_NULL(&vattr); 4000 vattr.va_type = VDIR; 4001 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4002 4003 vp = NULL; 4004 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4005 if (error == 0) 4006 vput(vp); 4007 return (error); 4008 } 4009 4010 /* 4011 * mkdir_args(char *path, int mode) 4012 * 4013 * Make a directory file. 4014 */ 4015 int 4016 sys_mkdir(struct mkdir_args *uap) 4017 { 4018 struct nlookupdata nd; 4019 int error; 4020 4021 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4022 if (error == 0) 4023 error = kern_mkdir(&nd, uap->mode); 4024 nlookup_done(&nd); 4025 return (error); 4026 } 4027 4028 /* 4029 * mkdirat_args(int fd, char *path, mode_t mode) 4030 * 4031 * Make a directory file. The path is relative to the directory associated 4032 * with fd. 4033 */ 4034 int 4035 sys_mkdirat(struct mkdirat_args *uap) 4036 { 4037 struct nlookupdata nd; 4038 struct file *fp; 4039 int error; 4040 4041 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4042 if (error == 0) 4043 error = kern_mkdir(&nd, uap->mode); 4044 nlookup_done_at(&nd, fp); 4045 return (error); 4046 } 4047 4048 int 4049 kern_rmdir(struct nlookupdata *nd) 4050 { 4051 int error; 4052 4053 bwillinode(1); 4054 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4055 if ((error = nlookup(nd)) != 0) 4056 return (error); 4057 4058 /* 4059 * Do not allow directories representing mount points to be 4060 * deleted, even if empty. Check write perms on mount point 4061 * in case the vnode is aliased (aka nullfs). 4062 */ 4063 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4064 return (EBUSY); 4065 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4066 return (error); 4067 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4068 return (error); 4069 } 4070 4071 /* 4072 * rmdir_args(char *path) 4073 * 4074 * Remove a directory file. 4075 */ 4076 int 4077 sys_rmdir(struct rmdir_args *uap) 4078 { 4079 struct nlookupdata nd; 4080 int error; 4081 4082 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4083 if (error == 0) 4084 error = kern_rmdir(&nd); 4085 nlookup_done(&nd); 4086 return (error); 4087 } 4088 4089 int 4090 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4091 enum uio_seg direction) 4092 { 4093 struct thread *td = curthread; 4094 struct proc *p = td->td_proc; 4095 struct vnode *vp; 4096 struct file *fp; 4097 struct uio auio; 4098 struct iovec aiov; 4099 off_t loff; 4100 int error, eofflag; 4101 4102 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 4103 return (error); 4104 if ((fp->f_flag & FREAD) == 0) { 4105 error = EBADF; 4106 goto done; 4107 } 4108 vp = (struct vnode *)fp->f_data; 4109 unionread: 4110 if (vp->v_type != VDIR) { 4111 error = EINVAL; 4112 goto done; 4113 } 4114 aiov.iov_base = buf; 4115 aiov.iov_len = count; 4116 auio.uio_iov = &aiov; 4117 auio.uio_iovcnt = 1; 4118 auio.uio_rw = UIO_READ; 4119 auio.uio_segflg = direction; 4120 auio.uio_td = td; 4121 auio.uio_resid = count; 4122 loff = auio.uio_offset = fp->f_offset; 4123 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 4124 fp->f_offset = auio.uio_offset; 4125 if (error) 4126 goto done; 4127 if (count == auio.uio_resid) { 4128 if (union_dircheckp) { 4129 error = union_dircheckp(td, &vp, fp); 4130 if (error == -1) 4131 goto unionread; 4132 if (error) 4133 goto done; 4134 } 4135 #if 0 4136 if ((vp->v_flag & VROOT) && 4137 (vp->v_mount->mnt_flag & MNT_UNION)) { 4138 struct vnode *tvp = vp; 4139 vp = vp->v_mount->mnt_vnodecovered; 4140 vref(vp); 4141 fp->f_data = vp; 4142 fp->f_offset = 0; 4143 vrele(tvp); 4144 goto unionread; 4145 } 4146 #endif 4147 } 4148 4149 /* 4150 * WARNING! *basep may not be wide enough to accomodate the 4151 * seek offset. XXX should we hack this to return the upper 32 bits 4152 * for offsets greater then 4G? 4153 */ 4154 if (basep) { 4155 *basep = (long)loff; 4156 } 4157 *res = count - auio.uio_resid; 4158 done: 4159 fdrop(fp); 4160 return (error); 4161 } 4162 4163 /* 4164 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4165 * 4166 * Read a block of directory entries in a file system independent format. 4167 */ 4168 int 4169 sys_getdirentries(struct getdirentries_args *uap) 4170 { 4171 long base; 4172 int error; 4173 4174 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4175 &uap->sysmsg_result, UIO_USERSPACE); 4176 4177 if (error == 0 && uap->basep) 4178 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4179 return (error); 4180 } 4181 4182 /* 4183 * getdents_args(int fd, char *buf, size_t count) 4184 */ 4185 int 4186 sys_getdents(struct getdents_args *uap) 4187 { 4188 int error; 4189 4190 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4191 &uap->sysmsg_result, UIO_USERSPACE); 4192 4193 return (error); 4194 } 4195 4196 /* 4197 * Set the mode mask for creation of filesystem nodes. 4198 * 4199 * umask(int newmask) 4200 */ 4201 int 4202 sys_umask(struct umask_args *uap) 4203 { 4204 struct thread *td = curthread; 4205 struct proc *p = td->td_proc; 4206 struct filedesc *fdp; 4207 4208 fdp = p->p_fd; 4209 uap->sysmsg_result = fdp->fd_cmask; 4210 fdp->fd_cmask = uap->newmask & ALLPERMS; 4211 return (0); 4212 } 4213 4214 /* 4215 * revoke(char *path) 4216 * 4217 * Void all references to file by ripping underlying filesystem 4218 * away from vnode. 4219 */ 4220 int 4221 sys_revoke(struct revoke_args *uap) 4222 { 4223 struct nlookupdata nd; 4224 struct vattr vattr; 4225 struct vnode *vp; 4226 struct ucred *cred; 4227 int error; 4228 4229 vp = NULL; 4230 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4231 if (error == 0) 4232 error = nlookup(&nd); 4233 if (error == 0) 4234 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4235 cred = crhold(nd.nl_cred); 4236 nlookup_done(&nd); 4237 if (error == 0) { 4238 if (error == 0) 4239 error = VOP_GETATTR(vp, &vattr); 4240 if (error == 0 && cred->cr_uid != vattr.va_uid) 4241 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4242 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4243 if (vcount(vp) > 0) 4244 error = vrevoke(vp, cred); 4245 } else if (error == 0) { 4246 error = vrevoke(vp, cred); 4247 } 4248 vrele(vp); 4249 } 4250 if (cred) 4251 crfree(cred); 4252 return (error); 4253 } 4254 4255 /* 4256 * getfh_args(char *fname, fhandle_t *fhp) 4257 * 4258 * Get (NFS) file handle 4259 * 4260 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4261 * mount. This allows nullfs mounts to be explicitly exported. 4262 * 4263 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4264 * 4265 * nullfs mounts of subdirectories are not safe. That is, it will 4266 * work, but you do not really have protection against access to 4267 * the related parent directories. 4268 */ 4269 int 4270 sys_getfh(struct getfh_args *uap) 4271 { 4272 struct thread *td = curthread; 4273 struct nlookupdata nd; 4274 fhandle_t fh; 4275 struct vnode *vp; 4276 struct mount *mp; 4277 int error; 4278 4279 /* 4280 * Must be super user 4281 */ 4282 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4283 return (error); 4284 4285 vp = NULL; 4286 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4287 if (error == 0) 4288 error = nlookup(&nd); 4289 if (error == 0) 4290 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4291 mp = nd.nl_nch.mount; 4292 nlookup_done(&nd); 4293 if (error == 0) { 4294 bzero(&fh, sizeof(fh)); 4295 fh.fh_fsid = mp->mnt_stat.f_fsid; 4296 error = VFS_VPTOFH(vp, &fh.fh_fid); 4297 vput(vp); 4298 if (error == 0) 4299 error = copyout(&fh, uap->fhp, sizeof(fh)); 4300 } 4301 return (error); 4302 } 4303 4304 /* 4305 * fhopen_args(const struct fhandle *u_fhp, int flags) 4306 * 4307 * syscall for the rpc.lockd to use to translate a NFS file handle into 4308 * an open descriptor. 4309 * 4310 * warning: do not remove the priv_check() call or this becomes one giant 4311 * security hole. 4312 */ 4313 int 4314 sys_fhopen(struct fhopen_args *uap) 4315 { 4316 struct thread *td = curthread; 4317 struct filedesc *fdp = td->td_proc->p_fd; 4318 struct mount *mp; 4319 struct vnode *vp; 4320 struct fhandle fhp; 4321 struct vattr vat; 4322 struct vattr *vap = &vat; 4323 struct flock lf; 4324 int fmode, mode, error = 0, type; 4325 struct file *nfp; 4326 struct file *fp; 4327 int indx; 4328 4329 /* 4330 * Must be super user 4331 */ 4332 error = priv_check(td, PRIV_ROOT); 4333 if (error) 4334 return (error); 4335 4336 fmode = FFLAGS(uap->flags); 4337 4338 /* 4339 * Why not allow a non-read/write open for our lockd? 4340 */ 4341 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4342 return (EINVAL); 4343 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4344 if (error) 4345 return(error); 4346 4347 /* 4348 * Find the mount point 4349 */ 4350 mp = vfs_getvfs(&fhp.fh_fsid); 4351 if (mp == NULL) { 4352 error = ESTALE; 4353 goto done; 4354 } 4355 /* now give me my vnode, it gets returned to me locked */ 4356 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4357 if (error) 4358 goto done; 4359 /* 4360 * from now on we have to make sure not 4361 * to forget about the vnode 4362 * any error that causes an abort must vput(vp) 4363 * just set error = err and 'goto bad;'. 4364 */ 4365 4366 /* 4367 * from vn_open 4368 */ 4369 if (vp->v_type == VLNK) { 4370 error = EMLINK; 4371 goto bad; 4372 } 4373 if (vp->v_type == VSOCK) { 4374 error = EOPNOTSUPP; 4375 goto bad; 4376 } 4377 mode = 0; 4378 if (fmode & (FWRITE | O_TRUNC)) { 4379 if (vp->v_type == VDIR) { 4380 error = EISDIR; 4381 goto bad; 4382 } 4383 error = vn_writechk(vp, NULL); 4384 if (error) 4385 goto bad; 4386 mode |= VWRITE; 4387 } 4388 if (fmode & FREAD) 4389 mode |= VREAD; 4390 if (mode) { 4391 error = VOP_ACCESS(vp, mode, td->td_ucred); 4392 if (error) 4393 goto bad; 4394 } 4395 if (fmode & O_TRUNC) { 4396 vn_unlock(vp); /* XXX */ 4397 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4398 VATTR_NULL(vap); 4399 vap->va_size = 0; 4400 error = VOP_SETATTR(vp, vap, td->td_ucred); 4401 if (error) 4402 goto bad; 4403 } 4404 4405 /* 4406 * VOP_OPEN needs the file pointer so it can potentially override 4407 * it. 4408 * 4409 * WARNING! no f_nchandle will be associated when fhopen()ing a 4410 * directory. XXX 4411 */ 4412 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4413 goto bad; 4414 fp = nfp; 4415 4416 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4417 if (error) { 4418 /* 4419 * setting f_ops this way prevents VOP_CLOSE from being 4420 * called or fdrop() releasing the vp from v_data. Since 4421 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4422 */ 4423 fp->f_ops = &badfileops; 4424 fp->f_data = NULL; 4425 goto bad_drop; 4426 } 4427 4428 /* 4429 * The fp is given its own reference, we still have our ref and lock. 4430 * 4431 * Assert that all regular files must be created with a VM object. 4432 */ 4433 if (vp->v_type == VREG && vp->v_object == NULL) { 4434 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 4435 goto bad_drop; 4436 } 4437 4438 /* 4439 * The open was successful. Handle any locking requirements. 4440 */ 4441 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4442 lf.l_whence = SEEK_SET; 4443 lf.l_start = 0; 4444 lf.l_len = 0; 4445 if (fmode & O_EXLOCK) 4446 lf.l_type = F_WRLCK; 4447 else 4448 lf.l_type = F_RDLCK; 4449 if (fmode & FNONBLOCK) 4450 type = 0; 4451 else 4452 type = F_WAIT; 4453 vn_unlock(vp); 4454 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 4455 /* 4456 * release our private reference. 4457 */ 4458 fsetfd(fdp, NULL, indx); 4459 fdrop(fp); 4460 vrele(vp); 4461 goto done; 4462 } 4463 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4464 fp->f_flag |= FHASLOCK; 4465 } 4466 4467 /* 4468 * Clean up. Associate the file pointer with the previously 4469 * reserved descriptor and return it. 4470 */ 4471 vput(vp); 4472 fsetfd(fdp, fp, indx); 4473 fdrop(fp); 4474 uap->sysmsg_result = indx; 4475 if (uap->flags & O_CLOEXEC) 4476 error = fsetfdflags(fdp, indx, UF_EXCLOSE); 4477 return (error); 4478 4479 bad_drop: 4480 fsetfd(fdp, NULL, indx); 4481 fdrop(fp); 4482 bad: 4483 vput(vp); 4484 done: 4485 return (error); 4486 } 4487 4488 /* 4489 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4490 */ 4491 int 4492 sys_fhstat(struct fhstat_args *uap) 4493 { 4494 struct thread *td = curthread; 4495 struct stat sb; 4496 fhandle_t fh; 4497 struct mount *mp; 4498 struct vnode *vp; 4499 int error; 4500 4501 /* 4502 * Must be super user 4503 */ 4504 error = priv_check(td, PRIV_ROOT); 4505 if (error) 4506 return (error); 4507 4508 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4509 if (error) 4510 return (error); 4511 4512 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4513 error = ESTALE; 4514 if (error == 0) { 4515 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4516 error = vn_stat(vp, &sb, td->td_ucred); 4517 vput(vp); 4518 } 4519 } 4520 if (error == 0) 4521 error = copyout(&sb, uap->sb, sizeof(sb)); 4522 return (error); 4523 } 4524 4525 /* 4526 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4527 */ 4528 int 4529 sys_fhstatfs(struct fhstatfs_args *uap) 4530 { 4531 struct thread *td = curthread; 4532 struct proc *p = td->td_proc; 4533 struct statfs *sp; 4534 struct mount *mp; 4535 struct vnode *vp; 4536 struct statfs sb; 4537 char *fullpath, *freepath; 4538 fhandle_t fh; 4539 int error; 4540 4541 /* 4542 * Must be super user 4543 */ 4544 if ((error = priv_check(td, PRIV_ROOT))) 4545 return (error); 4546 4547 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4548 return (error); 4549 4550 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4551 error = ESTALE; 4552 goto done; 4553 } 4554 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4555 error = ESTALE; 4556 goto done; 4557 } 4558 4559 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4560 goto done; 4561 mp = vp->v_mount; 4562 sp = &mp->mnt_stat; 4563 vput(vp); 4564 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4565 goto done; 4566 4567 error = mount_path(p, mp, &fullpath, &freepath); 4568 if (error) 4569 goto done; 4570 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4571 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4572 kfree(freepath, M_TEMP); 4573 4574 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4575 if (priv_check(td, PRIV_ROOT)) { 4576 bcopy(sp, &sb, sizeof(sb)); 4577 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4578 sp = &sb; 4579 } 4580 error = copyout(sp, uap->buf, sizeof(*sp)); 4581 done: 4582 return (error); 4583 } 4584 4585 /* 4586 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4587 */ 4588 int 4589 sys_fhstatvfs(struct fhstatvfs_args *uap) 4590 { 4591 struct thread *td = curthread; 4592 struct proc *p = td->td_proc; 4593 struct statvfs *sp; 4594 struct mount *mp; 4595 struct vnode *vp; 4596 fhandle_t fh; 4597 int error; 4598 4599 /* 4600 * Must be super user 4601 */ 4602 if ((error = priv_check(td, PRIV_ROOT))) 4603 return (error); 4604 4605 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4606 return (error); 4607 4608 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4609 error = ESTALE; 4610 goto done; 4611 } 4612 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4613 error = ESTALE; 4614 goto done; 4615 } 4616 4617 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4618 goto done; 4619 mp = vp->v_mount; 4620 sp = &mp->mnt_vstat; 4621 vput(vp); 4622 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4623 goto done; 4624 4625 sp->f_flag = 0; 4626 if (mp->mnt_flag & MNT_RDONLY) 4627 sp->f_flag |= ST_RDONLY; 4628 if (mp->mnt_flag & MNT_NOSUID) 4629 sp->f_flag |= ST_NOSUID; 4630 error = copyout(sp, uap->buf, sizeof(*sp)); 4631 done: 4632 return (error); 4633 } 4634 4635 4636 /* 4637 * Syscall to push extended attribute configuration information into the 4638 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4639 * a command (int cmd), and attribute name and misc data. For now, the 4640 * attribute name is left in userspace for consumption by the VFS_op. 4641 * It will probably be changed to be copied into sysspace by the 4642 * syscall in the future, once issues with various consumers of the 4643 * attribute code have raised their hands. 4644 * 4645 * Currently this is used only by UFS Extended Attributes. 4646 */ 4647 int 4648 sys_extattrctl(struct extattrctl_args *uap) 4649 { 4650 struct nlookupdata nd; 4651 struct vnode *vp; 4652 char attrname[EXTATTR_MAXNAMELEN]; 4653 int error; 4654 size_t size; 4655 4656 attrname[0] = 0; 4657 vp = NULL; 4658 error = 0; 4659 4660 if (error == 0 && uap->filename) { 4661 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4662 NLC_FOLLOW); 4663 if (error == 0) 4664 error = nlookup(&nd); 4665 if (error == 0) 4666 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4667 nlookup_done(&nd); 4668 } 4669 4670 if (error == 0 && uap->attrname) { 4671 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4672 &size); 4673 } 4674 4675 if (error == 0) { 4676 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4677 if (error == 0) 4678 error = nlookup(&nd); 4679 if (error == 0) 4680 error = ncp_writechk(&nd.nl_nch); 4681 if (error == 0) { 4682 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4683 uap->attrnamespace, 4684 uap->attrname, nd.nl_cred); 4685 } 4686 nlookup_done(&nd); 4687 } 4688 4689 return (error); 4690 } 4691 4692 /* 4693 * Syscall to get a named extended attribute on a file or directory. 4694 */ 4695 int 4696 sys_extattr_set_file(struct extattr_set_file_args *uap) 4697 { 4698 char attrname[EXTATTR_MAXNAMELEN]; 4699 struct nlookupdata nd; 4700 struct vnode *vp; 4701 struct uio auio; 4702 struct iovec aiov; 4703 int error; 4704 4705 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4706 if (error) 4707 return (error); 4708 4709 vp = NULL; 4710 4711 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4712 if (error == 0) 4713 error = nlookup(&nd); 4714 if (error == 0) 4715 error = ncp_writechk(&nd.nl_nch); 4716 if (error == 0) 4717 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4718 if (error) { 4719 nlookup_done(&nd); 4720 return (error); 4721 } 4722 4723 bzero(&auio, sizeof(auio)); 4724 aiov.iov_base = uap->data; 4725 aiov.iov_len = uap->nbytes; 4726 auio.uio_iov = &aiov; 4727 auio.uio_iovcnt = 1; 4728 auio.uio_offset = 0; 4729 auio.uio_resid = uap->nbytes; 4730 auio.uio_rw = UIO_WRITE; 4731 auio.uio_td = curthread; 4732 4733 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4734 &auio, nd.nl_cred); 4735 4736 vput(vp); 4737 nlookup_done(&nd); 4738 return (error); 4739 } 4740 4741 /* 4742 * Syscall to get a named extended attribute on a file or directory. 4743 */ 4744 int 4745 sys_extattr_get_file(struct extattr_get_file_args *uap) 4746 { 4747 char attrname[EXTATTR_MAXNAMELEN]; 4748 struct nlookupdata nd; 4749 struct uio auio; 4750 struct iovec aiov; 4751 struct vnode *vp; 4752 int error; 4753 4754 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4755 if (error) 4756 return (error); 4757 4758 vp = NULL; 4759 4760 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4761 if (error == 0) 4762 error = nlookup(&nd); 4763 if (error == 0) 4764 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 4765 if (error) { 4766 nlookup_done(&nd); 4767 return (error); 4768 } 4769 4770 bzero(&auio, sizeof(auio)); 4771 aiov.iov_base = uap->data; 4772 aiov.iov_len = uap->nbytes; 4773 auio.uio_iov = &aiov; 4774 auio.uio_iovcnt = 1; 4775 auio.uio_offset = 0; 4776 auio.uio_resid = uap->nbytes; 4777 auio.uio_rw = UIO_READ; 4778 auio.uio_td = curthread; 4779 4780 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 4781 &auio, nd.nl_cred); 4782 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 4783 4784 vput(vp); 4785 nlookup_done(&nd); 4786 return(error); 4787 } 4788 4789 /* 4790 * Syscall to delete a named extended attribute from a file or directory. 4791 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4792 */ 4793 int 4794 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4795 { 4796 char attrname[EXTATTR_MAXNAMELEN]; 4797 struct nlookupdata nd; 4798 struct vnode *vp; 4799 int error; 4800 4801 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4802 if (error) 4803 return(error); 4804 4805 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4806 if (error == 0) 4807 error = nlookup(&nd); 4808 if (error == 0) 4809 error = ncp_writechk(&nd.nl_nch); 4810 if (error == 0) { 4811 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4812 if (error == 0) { 4813 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 4814 attrname, NULL, nd.nl_cred); 4815 vput(vp); 4816 } 4817 } 4818 nlookup_done(&nd); 4819 return(error); 4820 } 4821 4822 /* 4823 * Determine if the mount is visible to the process. 4824 */ 4825 static int 4826 chroot_visible_mnt(struct mount *mp, struct proc *p) 4827 { 4828 struct nchandle nch; 4829 4830 /* 4831 * Traverse from the mount point upwards. If we hit the process 4832 * root then the mount point is visible to the process. 4833 */ 4834 nch = mp->mnt_ncmountpt; 4835 while (nch.ncp) { 4836 if (nch.mount == p->p_fd->fd_nrdir.mount && 4837 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4838 return(1); 4839 } 4840 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4841 nch = nch.mount->mnt_ncmounton; 4842 } else { 4843 nch.ncp = nch.ncp->nc_parent; 4844 } 4845 } 4846 4847 /* 4848 * If the mount point is not visible to the process, but the 4849 * process root is in a subdirectory of the mount, return 4850 * TRUE anyway. 4851 */ 4852 if (p->p_fd->fd_nrdir.mount == mp) 4853 return(1); 4854 4855 return(0); 4856 } 4857 4858