1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 #include <sys/mplock2.h> 71 72 #include <vm/vm.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 76 #include <machine/limits.h> 77 #include <machine/stdarg.h> 78 79 #include <vfs/union/union.h> 80 81 static void mount_warning(struct mount *mp, const char *ctl, ...) 82 __printflike(2, 3); 83 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 84 static int checkvp_chdir (struct vnode *vn, struct thread *td); 85 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 86 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 87 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 88 static int getutimes (const struct timeval *, struct timespec *); 89 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 90 static int setfmode (struct vnode *, int); 91 static int setfflags (struct vnode *, int); 92 static int setutimes (struct vnode *, struct vattr *, 93 const struct timespec *, int); 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 96 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 97 98 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 99 "Allow non-root users to mount filesystems"); 100 101 /* 102 * Virtual File System System Calls 103 */ 104 105 /* 106 * Mount a file system. 107 * 108 * mount_args(char *type, char *path, int flags, caddr_t data) 109 * 110 * MPALMOSTSAFE 111 */ 112 int 113 sys_mount(struct mount_args *uap) 114 { 115 struct thread *td = curthread; 116 struct vnode *vp; 117 struct nchandle nch; 118 struct mount *mp, *nullmp; 119 struct vfsconf *vfsp; 120 int error, flag = 0, flag2 = 0; 121 int hasmount; 122 struct vattr va; 123 struct nlookupdata nd; 124 char fstypename[MFSNAMELEN]; 125 struct ucred *cred; 126 127 cred = td->td_ucred; 128 if (jailed(cred)) { 129 error = EPERM; 130 goto done; 131 } 132 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 133 goto done; 134 135 /* 136 * Do not allow NFS export by non-root users. 137 */ 138 if (uap->flags & MNT_EXPORTED) { 139 error = priv_check(td, PRIV_ROOT); 140 if (error) 141 goto done; 142 } 143 /* 144 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 145 */ 146 if (priv_check(td, PRIV_ROOT)) 147 uap->flags |= MNT_NOSUID | MNT_NODEV; 148 149 /* 150 * Lookup the requested path and extract the nch and vnode. 151 */ 152 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 153 if (error == 0) { 154 if ((error = nlookup(&nd)) == 0) { 155 if (nd.nl_nch.ncp->nc_vp == NULL) 156 error = ENOENT; 157 } 158 } 159 if (error) { 160 nlookup_done(&nd); 161 goto done; 162 } 163 164 /* 165 * If the target filesystem is resolved via a nullfs mount, then 166 * nd.nl_nch.mount will be pointing to the nullfs mount structure 167 * instead of the target file system. We need it in case we are 168 * doing an update. 169 */ 170 nullmp = nd.nl_nch.mount; 171 172 /* 173 * Extract the locked+refd ncp and cleanup the nd structure 174 */ 175 nch = nd.nl_nch; 176 cache_zero(&nd.nl_nch); 177 nlookup_done(&nd); 178 179 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 180 (mp = cache_findmount(&nch)) != NULL) { 181 cache_dropmount(mp); 182 hasmount = 1; 183 } else { 184 hasmount = 0; 185 } 186 187 188 /* 189 * now we have the locked ref'd nch and unreferenced vnode. 190 */ 191 vp = nch.ncp->nc_vp; 192 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 193 cache_put(&nch); 194 goto done; 195 } 196 cache_unlock(&nch); 197 198 /* 199 * Extract the file system type. We need to know this early, to take 200 * appropriate actions if we are dealing with a nullfs. 201 */ 202 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 203 cache_drop(&nch); 204 vput(vp); 205 goto done; 206 } 207 208 /* 209 * Now we have an unlocked ref'd nch and a locked ref'd vp 210 */ 211 if (uap->flags & MNT_UPDATE) { 212 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 213 cache_drop(&nch); 214 vput(vp); 215 error = EINVAL; 216 goto done; 217 } 218 219 if (strncmp(fstypename, "null", 5) == 0) { 220 KKASSERT(nullmp); 221 mp = nullmp; 222 } else { 223 mp = vp->v_mount; 224 } 225 226 flag = mp->mnt_flag; 227 flag2 = mp->mnt_kern_flag; 228 /* 229 * We only allow the filesystem to be reloaded if it 230 * is currently mounted read-only. 231 */ 232 if ((uap->flags & MNT_RELOAD) && 233 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 234 cache_drop(&nch); 235 vput(vp); 236 error = EOPNOTSUPP; /* Needs translation */ 237 goto done; 238 } 239 /* 240 * Only root, or the user that did the original mount is 241 * permitted to update it. 242 */ 243 if (mp->mnt_stat.f_owner != cred->cr_uid && 244 (error = priv_check(td, PRIV_ROOT))) { 245 cache_drop(&nch); 246 vput(vp); 247 goto done; 248 } 249 if (vfs_busy(mp, LK_NOWAIT)) { 250 cache_drop(&nch); 251 vput(vp); 252 error = EBUSY; 253 goto done; 254 } 255 if (hasmount) { 256 cache_drop(&nch); 257 vfs_unbusy(mp); 258 vput(vp); 259 error = EBUSY; 260 goto done; 261 } 262 mp->mnt_flag |= 263 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 264 lwkt_gettoken(&mp->mnt_token); 265 vn_unlock(vp); 266 goto update; 267 } 268 269 /* 270 * If the user is not root, ensure that they own the directory 271 * onto which we are attempting to mount. 272 */ 273 if ((error = VOP_GETATTR(vp, &va)) || 274 (va.va_uid != cred->cr_uid && 275 (error = priv_check(td, PRIV_ROOT)))) { 276 cache_drop(&nch); 277 vput(vp); 278 goto done; 279 } 280 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 281 cache_drop(&nch); 282 vput(vp); 283 goto done; 284 } 285 if (vp->v_type != VDIR) { 286 cache_drop(&nch); 287 vput(vp); 288 error = ENOTDIR; 289 goto done; 290 } 291 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 292 cache_drop(&nch); 293 vput(vp); 294 error = EPERM; 295 goto done; 296 } 297 vfsp = vfsconf_find_by_name(fstypename); 298 if (vfsp == NULL) { 299 linker_file_t lf; 300 301 /* Only load modules for root (very important!) */ 302 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 303 cache_drop(&nch); 304 vput(vp); 305 goto done; 306 } 307 error = linker_load_file(fstypename, &lf); 308 if (error || lf == NULL) { 309 cache_drop(&nch); 310 vput(vp); 311 if (lf == NULL) 312 error = ENODEV; 313 goto done; 314 } 315 lf->userrefs++; 316 /* lookup again, see if the VFS was loaded */ 317 vfsp = vfsconf_find_by_name(fstypename); 318 if (vfsp == NULL) { 319 lf->userrefs--; 320 linker_file_unload(lf); 321 cache_drop(&nch); 322 vput(vp); 323 error = ENODEV; 324 goto done; 325 } 326 } 327 if (hasmount) { 328 cache_drop(&nch); 329 vput(vp); 330 error = EBUSY; 331 goto done; 332 } 333 334 /* 335 * Allocate and initialize the filesystem. 336 */ 337 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 338 mount_init(mp); 339 vfs_busy(mp, LK_NOWAIT); 340 mp->mnt_op = vfsp->vfc_vfsops; 341 mp->mnt_vfc = vfsp; 342 vfsp->vfc_refcount++; 343 mp->mnt_stat.f_type = vfsp->vfc_typenum; 344 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 345 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 346 mp->mnt_stat.f_owner = cred->cr_uid; 347 lwkt_gettoken(&mp->mnt_token); 348 vn_unlock(vp); 349 update: 350 /* 351 * (per-mount token acquired at this point) 352 * 353 * Set the mount level flags. 354 */ 355 if (uap->flags & MNT_RDONLY) 356 mp->mnt_flag |= MNT_RDONLY; 357 else if (mp->mnt_flag & MNT_RDONLY) 358 mp->mnt_kern_flag |= MNTK_WANTRDWR; 359 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 360 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 361 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 362 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 363 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 364 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 365 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 366 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 367 /* 368 * Mount the filesystem. 369 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 370 * get. 371 */ 372 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 373 if (mp->mnt_flag & MNT_UPDATE) { 374 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 375 mp->mnt_flag &= ~MNT_RDONLY; 376 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 377 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 378 if (error) { 379 mp->mnt_flag = flag; 380 mp->mnt_kern_flag = flag2; 381 } 382 lwkt_reltoken(&mp->mnt_token); 383 vfs_unbusy(mp); 384 vrele(vp); 385 cache_drop(&nch); 386 goto done; 387 } 388 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 389 390 /* 391 * Put the new filesystem on the mount list after root. The mount 392 * point gets its own mnt_ncmountpt (unless the VFS already set one 393 * up) which represents the root of the mount. The lookup code 394 * detects the mount point going forward and checks the root of 395 * the mount going backwards. 396 * 397 * It is not necessary to invalidate or purge the vnode underneath 398 * because elements under the mount will be given their own glue 399 * namecache record. 400 */ 401 if (!error) { 402 if (mp->mnt_ncmountpt.ncp == NULL) { 403 /* 404 * allocate, then unlock, but leave the ref intact 405 */ 406 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 407 cache_unlock(&mp->mnt_ncmountpt); 408 } 409 mp->mnt_ncmounton = nch; /* inherits ref */ 410 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 411 cache_ismounting(mp); 412 413 mountlist_insert(mp, MNTINS_LAST); 414 vn_unlock(vp); 415 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 416 error = vfs_allocate_syncvnode(mp); 417 lwkt_reltoken(&mp->mnt_token); 418 vfs_unbusy(mp); 419 error = VFS_START(mp, 0); 420 vrele(vp); 421 } else { 422 vn_syncer_thr_stop(mp); 423 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 424 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 425 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 426 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 427 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 428 mp->mnt_vfc->vfc_refcount--; 429 lwkt_reltoken(&mp->mnt_token); 430 vfs_unbusy(mp); 431 kfree(mp, M_MOUNT); 432 cache_drop(&nch); 433 vput(vp); 434 } 435 done: 436 return (error); 437 } 438 439 /* 440 * Scan all active processes to see if any of them have a current 441 * or root directory onto which the new filesystem has just been 442 * mounted. If so, replace them with the new mount point. 443 * 444 * Both old_nch and new_nch are ref'd on call but not locked. 445 * new_nch must be temporarily locked so it can be associated with the 446 * vnode representing the root of the mount point. 447 */ 448 struct checkdirs_info { 449 struct nchandle old_nch; 450 struct nchandle new_nch; 451 struct vnode *old_vp; 452 struct vnode *new_vp; 453 }; 454 455 static int checkdirs_callback(struct proc *p, void *data); 456 457 static void 458 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 459 { 460 struct checkdirs_info info; 461 struct vnode *olddp; 462 struct vnode *newdp; 463 struct mount *mp; 464 465 /* 466 * If the old mount point's vnode has a usecount of 1, it is not 467 * being held as a descriptor anywhere. 468 */ 469 olddp = old_nch->ncp->nc_vp; 470 if (olddp == NULL || VREFCNT(olddp) == 1) 471 return; 472 473 /* 474 * Force the root vnode of the new mount point to be resolved 475 * so we can update any matching processes. 476 */ 477 mp = new_nch->mount; 478 if (VFS_ROOT(mp, &newdp)) 479 panic("mount: lost mount"); 480 vn_unlock(newdp); 481 cache_lock(new_nch); 482 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 483 cache_setunresolved(new_nch); 484 cache_setvp(new_nch, newdp); 485 cache_unlock(new_nch); 486 487 /* 488 * Special handling of the root node 489 */ 490 if (rootvnode == olddp) { 491 vref(newdp); 492 vfs_cache_setroot(newdp, cache_hold(new_nch)); 493 } 494 495 /* 496 * Pass newdp separately so the callback does not have to access 497 * it via new_nch->ncp->nc_vp. 498 */ 499 info.old_nch = *old_nch; 500 info.new_nch = *new_nch; 501 info.new_vp = newdp; 502 allproc_scan(checkdirs_callback, &info); 503 vput(newdp); 504 } 505 506 /* 507 * NOTE: callback is not MP safe because the scanned process's filedesc 508 * structure can be ripped out from under us, amoung other things. 509 */ 510 static int 511 checkdirs_callback(struct proc *p, void *data) 512 { 513 struct checkdirs_info *info = data; 514 struct filedesc *fdp; 515 struct nchandle ncdrop1; 516 struct nchandle ncdrop2; 517 struct vnode *vprele1; 518 struct vnode *vprele2; 519 520 if ((fdp = p->p_fd) != NULL) { 521 cache_zero(&ncdrop1); 522 cache_zero(&ncdrop2); 523 vprele1 = NULL; 524 vprele2 = NULL; 525 526 /* 527 * MPUNSAFE - XXX fdp can be pulled out from under a 528 * foreign process. 529 * 530 * A shared filedesc is ok, we don't have to copy it 531 * because we are making this change globally. 532 */ 533 spin_lock(&fdp->fd_spin); 534 if (fdp->fd_ncdir.mount == info->old_nch.mount && 535 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 536 vprele1 = fdp->fd_cdir; 537 vref(info->new_vp); 538 fdp->fd_cdir = info->new_vp; 539 ncdrop1 = fdp->fd_ncdir; 540 cache_copy(&info->new_nch, &fdp->fd_ncdir); 541 } 542 if (fdp->fd_nrdir.mount == info->old_nch.mount && 543 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 544 vprele2 = fdp->fd_rdir; 545 vref(info->new_vp); 546 fdp->fd_rdir = info->new_vp; 547 ncdrop2 = fdp->fd_nrdir; 548 cache_copy(&info->new_nch, &fdp->fd_nrdir); 549 } 550 spin_unlock(&fdp->fd_spin); 551 if (ncdrop1.ncp) 552 cache_drop(&ncdrop1); 553 if (ncdrop2.ncp) 554 cache_drop(&ncdrop2); 555 if (vprele1) 556 vrele(vprele1); 557 if (vprele2) 558 vrele(vprele2); 559 } 560 return(0); 561 } 562 563 /* 564 * Unmount a file system. 565 * 566 * Note: unmount takes a path to the vnode mounted on as argument, 567 * not special file (as before). 568 * 569 * umount_args(char *path, int flags) 570 * 571 * MPALMOSTSAFE 572 */ 573 int 574 sys_unmount(struct unmount_args *uap) 575 { 576 struct thread *td = curthread; 577 struct proc *p __debugvar = td->td_proc; 578 struct mount *mp = NULL; 579 struct nlookupdata nd; 580 int error; 581 582 KKASSERT(p); 583 get_mplock(); 584 if (td->td_ucred->cr_prison != NULL) { 585 error = EPERM; 586 goto done; 587 } 588 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 589 goto done; 590 591 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 592 if (error == 0) 593 error = nlookup(&nd); 594 if (error) 595 goto out; 596 597 mp = nd.nl_nch.mount; 598 599 /* 600 * Only root, or the user that did the original mount is 601 * permitted to unmount this filesystem. 602 */ 603 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 604 (error = priv_check(td, PRIV_ROOT))) 605 goto out; 606 607 /* 608 * Don't allow unmounting the root file system. 609 */ 610 if (mp->mnt_flag & MNT_ROOTFS) { 611 error = EINVAL; 612 goto out; 613 } 614 615 /* 616 * Must be the root of the filesystem 617 */ 618 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 619 error = EINVAL; 620 goto out; 621 } 622 623 out: 624 nlookup_done(&nd); 625 if (error == 0) 626 error = dounmount(mp, uap->flags); 627 done: 628 rel_mplock(); 629 return (error); 630 } 631 632 /* 633 * Do the actual file system unmount. 634 */ 635 static int 636 dounmount_interlock(struct mount *mp) 637 { 638 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 639 return (EBUSY); 640 mp->mnt_kern_flag |= MNTK_UNMOUNT; 641 return(0); 642 } 643 644 static int 645 unmount_allproc_cb(struct proc *p, void *arg) 646 { 647 struct mount *mp; 648 649 if (p->p_textnch.ncp == NULL) 650 return 0; 651 652 mp = (struct mount *)arg; 653 if (p->p_textnch.mount == mp) 654 cache_drop(&p->p_textnch); 655 656 return 0; 657 } 658 659 int 660 dounmount(struct mount *mp, int flags) 661 { 662 struct namecache *ncp; 663 struct nchandle nch; 664 struct vnode *vp; 665 int error; 666 int async_flag; 667 int lflags; 668 int freeok = 1; 669 int retry; 670 671 lwkt_gettoken(&mp->mnt_token); 672 /* 673 * Exclusive access for unmounting purposes 674 */ 675 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 676 goto out; 677 678 /* 679 * Allow filesystems to detect that a forced unmount is in progress. 680 */ 681 if (flags & MNT_FORCE) 682 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 683 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 684 error = lockmgr(&mp->mnt_lock, lflags); 685 if (error) { 686 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 687 if (mp->mnt_kern_flag & MNTK_MWAIT) { 688 mp->mnt_kern_flag &= ~MNTK_MWAIT; 689 wakeup(mp); 690 } 691 goto out; 692 } 693 694 if (mp->mnt_flag & MNT_EXPUBLIC) 695 vfs_setpublicfs(NULL, NULL, NULL); 696 697 vfs_msync(mp, MNT_WAIT); 698 async_flag = mp->mnt_flag & MNT_ASYNC; 699 mp->mnt_flag &=~ MNT_ASYNC; 700 701 /* 702 * If this filesystem isn't aliasing other filesystems, 703 * try to invalidate any remaining namecache entries and 704 * check the count afterwords. 705 */ 706 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 707 cache_lock(&mp->mnt_ncmountpt); 708 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 709 cache_unlock(&mp->mnt_ncmountpt); 710 711 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 712 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 713 allproc_scan(&unmount_allproc_cb, mp); 714 } 715 716 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 717 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 718 719 if ((flags & MNT_FORCE) == 0) { 720 error = EBUSY; 721 mount_warning(mp, "Cannot unmount: " 722 "%d namecache " 723 "references still " 724 "present", 725 ncp->nc_refs - 1); 726 } else { 727 mount_warning(mp, "Forced unmount: " 728 "%d namecache " 729 "references still " 730 "present", 731 ncp->nc_refs - 1); 732 freeok = 0; 733 } 734 } 735 } 736 737 /* 738 * Decomission our special mnt_syncer vnode. This also stops 739 * the vnlru code. If we are unable to unmount we recommission 740 * the vnode. 741 * 742 * Then sync the filesystem. 743 */ 744 if ((vp = mp->mnt_syncer) != NULL) { 745 mp->mnt_syncer = NULL; 746 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 747 vrele(vp); 748 } 749 if ((mp->mnt_flag & MNT_RDONLY) == 0) 750 VFS_SYNC(mp, MNT_WAIT); 751 752 /* 753 * nchandle records ref the mount structure. Expect a count of 1 754 * (our mount->mnt_ncmountpt). 755 * 756 * Scans can get temporary refs on a mountpoint (thought really 757 * heavy duty stuff like cache_findmount() do not). 758 */ 759 for (retry = 0; retry < 10 && mp->mnt_refs != 1; ++retry) { 760 cache_unmounting(mp); 761 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 762 } 763 if (mp->mnt_refs != 1) { 764 if ((flags & MNT_FORCE) == 0) { 765 mount_warning(mp, "Cannot unmount: " 766 "%d mount refs still present", 767 mp->mnt_refs); 768 error = EBUSY; 769 } else { 770 mount_warning(mp, "Forced unmount: " 771 "%d mount refs still present", 772 mp->mnt_refs); 773 freeok = 0; 774 } 775 } 776 777 /* 778 * So far so good, sync the filesystem once more and 779 * call the VFS unmount code if the sync succeeds. 780 */ 781 if (error == 0) { 782 if (((mp->mnt_flag & MNT_RDONLY) || 783 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 784 (flags & MNT_FORCE)) { 785 error = VFS_UNMOUNT(mp, flags); 786 } 787 } 788 789 /* 790 * If an error occurred we can still recover, restoring the 791 * syncer vnode and misc flags. 792 */ 793 if (error) { 794 if (mp->mnt_syncer == NULL) 795 vfs_allocate_syncvnode(mp); 796 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 797 mp->mnt_flag |= async_flag; 798 lockmgr(&mp->mnt_lock, LK_RELEASE); 799 if (mp->mnt_kern_flag & MNTK_MWAIT) { 800 mp->mnt_kern_flag &= ~MNTK_MWAIT; 801 wakeup(mp); 802 } 803 goto out; 804 } 805 /* 806 * Clean up any journals still associated with the mount after 807 * filesystem activity has ceased. 808 */ 809 journal_remove_all_journals(mp, 810 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 811 812 mountlist_remove(mp); 813 814 /* 815 * Remove any installed vnode ops here so the individual VFSs don't 816 * have to. 817 */ 818 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 819 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 820 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 821 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 822 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 823 824 if (mp->mnt_ncmountpt.ncp != NULL) { 825 nch = mp->mnt_ncmountpt; 826 cache_zero(&mp->mnt_ncmountpt); 827 cache_clrmountpt(&nch); 828 cache_drop(&nch); 829 } 830 if (mp->mnt_ncmounton.ncp != NULL) { 831 cache_unmounting(mp); 832 nch = mp->mnt_ncmounton; 833 cache_zero(&mp->mnt_ncmounton); 834 cache_clrmountpt(&nch); 835 cache_drop(&nch); 836 } 837 838 mp->mnt_vfc->vfc_refcount--; 839 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 840 panic("unmount: dangling vnode"); 841 lockmgr(&mp->mnt_lock, LK_RELEASE); 842 if (mp->mnt_kern_flag & MNTK_MWAIT) { 843 mp->mnt_kern_flag &= ~MNTK_MWAIT; 844 wakeup(mp); 845 } 846 847 /* 848 * If we reach here and freeok != 0 we must free the mount. 849 * If refs > 1 cycle and wait, just in case someone tried 850 * to busy the mount after we decided to do the unmount. 851 */ 852 if (freeok) { 853 while (mp->mnt_refs > 1) { 854 cache_unmounting(mp); 855 wakeup(mp); 856 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 857 } 858 lwkt_reltoken(&mp->mnt_token); 859 kfree(mp, M_MOUNT); 860 mp = NULL; 861 } 862 error = 0; 863 out: 864 if (mp) 865 lwkt_reltoken(&mp->mnt_token); 866 return (error); 867 } 868 869 static 870 void 871 mount_warning(struct mount *mp, const char *ctl, ...) 872 { 873 char *ptr; 874 char *buf; 875 __va_list va; 876 877 __va_start(va, ctl); 878 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 879 &ptr, &buf, 0) == 0) { 880 kprintf("unmount(%s): ", ptr); 881 kvprintf(ctl, va); 882 kprintf("\n"); 883 kfree(buf, M_TEMP); 884 } else { 885 kprintf("unmount(%p", mp); 886 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 887 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 888 kprintf("): "); 889 kvprintf(ctl, va); 890 kprintf("\n"); 891 } 892 __va_end(va); 893 } 894 895 /* 896 * Shim cache_fullpath() to handle the case where a process is chrooted into 897 * a subdirectory of a mount. In this case if the root mount matches the 898 * process root directory's mount we have to specify the process's root 899 * directory instead of the mount point, because the mount point might 900 * be above the root directory. 901 */ 902 static 903 int 904 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 905 { 906 struct nchandle *nch; 907 908 if (p && p->p_fd->fd_nrdir.mount == mp) 909 nch = &p->p_fd->fd_nrdir; 910 else 911 nch = &mp->mnt_ncmountpt; 912 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 913 } 914 915 /* 916 * Sync each mounted filesystem. 917 */ 918 919 #ifdef DEBUG 920 static int syncprt = 0; 921 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 922 #endif /* DEBUG */ 923 924 static int sync_callback(struct mount *mp, void *data); 925 926 int 927 sys_sync(struct sync_args *uap) 928 { 929 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 930 return (0); 931 } 932 933 static 934 int 935 sync_callback(struct mount *mp, void *data __unused) 936 { 937 int asyncflag; 938 939 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 940 asyncflag = mp->mnt_flag & MNT_ASYNC; 941 mp->mnt_flag &= ~MNT_ASYNC; 942 vfs_msync(mp, MNT_NOWAIT); 943 VFS_SYNC(mp, MNT_NOWAIT); 944 mp->mnt_flag |= asyncflag; 945 } 946 return(0); 947 } 948 949 /* XXX PRISON: could be per prison flag */ 950 static int prison_quotas; 951 #if 0 952 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 953 #endif 954 955 /* 956 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 957 * 958 * Change filesystem quotas. 959 * 960 * MPALMOSTSAFE 961 */ 962 int 963 sys_quotactl(struct quotactl_args *uap) 964 { 965 struct nlookupdata nd; 966 struct thread *td; 967 struct mount *mp; 968 int error; 969 970 get_mplock(); 971 td = curthread; 972 if (td->td_ucred->cr_prison && !prison_quotas) { 973 error = EPERM; 974 goto done; 975 } 976 977 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 978 if (error == 0) 979 error = nlookup(&nd); 980 if (error == 0) { 981 mp = nd.nl_nch.mount; 982 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 983 uap->arg, nd.nl_cred); 984 } 985 nlookup_done(&nd); 986 done: 987 rel_mplock(); 988 return (error); 989 } 990 991 /* 992 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 993 * void *buf, int buflen) 994 * 995 * This function operates on a mount point and executes the specified 996 * operation using the specified control data, and possibly returns data. 997 * 998 * The actual number of bytes stored in the result buffer is returned, 0 999 * if none, otherwise an error is returned. 1000 * 1001 * MPALMOSTSAFE 1002 */ 1003 int 1004 sys_mountctl(struct mountctl_args *uap) 1005 { 1006 struct thread *td = curthread; 1007 struct proc *p = td->td_proc; 1008 struct file *fp; 1009 void *ctl = NULL; 1010 void *buf = NULL; 1011 char *path = NULL; 1012 int error; 1013 1014 /* 1015 * Sanity and permissions checks. We must be root. 1016 */ 1017 KKASSERT(p); 1018 if (td->td_ucred->cr_prison != NULL) 1019 return (EPERM); 1020 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1021 (error = priv_check(td, PRIV_ROOT)) != 0) 1022 return (error); 1023 1024 /* 1025 * Argument length checks 1026 */ 1027 if (uap->ctllen < 0 || uap->ctllen > 1024) 1028 return (EINVAL); 1029 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1030 return (EINVAL); 1031 if (uap->path == NULL) 1032 return (EINVAL); 1033 1034 /* 1035 * Allocate the necessary buffers and copyin data 1036 */ 1037 path = objcache_get(namei_oc, M_WAITOK); 1038 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1039 if (error) 1040 goto done; 1041 1042 if (uap->ctllen) { 1043 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1044 error = copyin(uap->ctl, ctl, uap->ctllen); 1045 if (error) 1046 goto done; 1047 } 1048 if (uap->buflen) 1049 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1050 1051 /* 1052 * Validate the descriptor 1053 */ 1054 if (uap->fd >= 0) { 1055 fp = holdfp(p->p_fd, uap->fd, -1); 1056 if (fp == NULL) { 1057 error = EBADF; 1058 goto done; 1059 } 1060 } else { 1061 fp = NULL; 1062 } 1063 1064 /* 1065 * Execute the internal kernel function and clean up. 1066 */ 1067 get_mplock(); 1068 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 1069 rel_mplock(); 1070 if (fp) 1071 fdrop(fp); 1072 if (error == 0 && uap->sysmsg_result > 0) 1073 error = copyout(buf, uap->buf, uap->sysmsg_result); 1074 done: 1075 if (path) 1076 objcache_put(namei_oc, path); 1077 if (ctl) 1078 kfree(ctl, M_TEMP); 1079 if (buf) 1080 kfree(buf, M_TEMP); 1081 return (error); 1082 } 1083 1084 /* 1085 * Execute a mount control operation by resolving the path to a mount point 1086 * and calling vop_mountctl(). 1087 * 1088 * Use the mount point from the nch instead of the vnode so nullfs mounts 1089 * can properly spike the VOP. 1090 */ 1091 int 1092 kern_mountctl(const char *path, int op, struct file *fp, 1093 const void *ctl, int ctllen, 1094 void *buf, int buflen, int *res) 1095 { 1096 struct vnode *vp; 1097 struct mount *mp; 1098 struct nlookupdata nd; 1099 int error; 1100 1101 *res = 0; 1102 vp = NULL; 1103 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1104 if (error == 0) 1105 error = nlookup(&nd); 1106 if (error == 0) 1107 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1108 mp = nd.nl_nch.mount; 1109 nlookup_done(&nd); 1110 if (error) 1111 return (error); 1112 vn_unlock(vp); 1113 1114 /* 1115 * Must be the root of the filesystem 1116 */ 1117 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1118 vrele(vp); 1119 return (EINVAL); 1120 } 1121 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1122 buf, buflen, res); 1123 vrele(vp); 1124 return (error); 1125 } 1126 1127 int 1128 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1129 { 1130 struct thread *td = curthread; 1131 struct proc *p = td->td_proc; 1132 struct mount *mp; 1133 struct statfs *sp; 1134 char *fullpath, *freepath; 1135 int error; 1136 1137 if ((error = nlookup(nd)) != 0) 1138 return (error); 1139 mp = nd->nl_nch.mount; 1140 sp = &mp->mnt_stat; 1141 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1142 return (error); 1143 1144 error = mount_path(p, mp, &fullpath, &freepath); 1145 if (error) 1146 return(error); 1147 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1148 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1149 kfree(freepath, M_TEMP); 1150 1151 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1152 bcopy(sp, buf, sizeof(*buf)); 1153 /* Only root should have access to the fsid's. */ 1154 if (priv_check(td, PRIV_ROOT)) 1155 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1156 return (0); 1157 } 1158 1159 /* 1160 * statfs_args(char *path, struct statfs *buf) 1161 * 1162 * Get filesystem statistics. 1163 */ 1164 int 1165 sys_statfs(struct statfs_args *uap) 1166 { 1167 struct nlookupdata nd; 1168 struct statfs buf; 1169 int error; 1170 1171 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1172 if (error == 0) 1173 error = kern_statfs(&nd, &buf); 1174 nlookup_done(&nd); 1175 if (error == 0) 1176 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1177 return (error); 1178 } 1179 1180 int 1181 kern_fstatfs(int fd, struct statfs *buf) 1182 { 1183 struct thread *td = curthread; 1184 struct proc *p = td->td_proc; 1185 struct file *fp; 1186 struct mount *mp; 1187 struct statfs *sp; 1188 char *fullpath, *freepath; 1189 int error; 1190 1191 KKASSERT(p); 1192 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1193 return (error); 1194 1195 /* 1196 * Try to use mount info from any overlays rather than the 1197 * mount info for the underlying vnode, otherwise we will 1198 * fail when operating on null-mounted paths inside a chroot. 1199 */ 1200 if ((mp = fp->f_nchandle.mount) == NULL) 1201 mp = ((struct vnode *)fp->f_data)->v_mount; 1202 if (mp == NULL) { 1203 error = EBADF; 1204 goto done; 1205 } 1206 if (fp->f_cred == NULL) { 1207 error = EINVAL; 1208 goto done; 1209 } 1210 sp = &mp->mnt_stat; 1211 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1212 goto done; 1213 1214 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1215 goto done; 1216 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1217 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1218 kfree(freepath, M_TEMP); 1219 1220 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1221 bcopy(sp, buf, sizeof(*buf)); 1222 1223 /* Only root should have access to the fsid's. */ 1224 if (priv_check(td, PRIV_ROOT)) 1225 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1226 error = 0; 1227 done: 1228 fdrop(fp); 1229 return (error); 1230 } 1231 1232 /* 1233 * fstatfs_args(int fd, struct statfs *buf) 1234 * 1235 * Get filesystem statistics. 1236 */ 1237 int 1238 sys_fstatfs(struct fstatfs_args *uap) 1239 { 1240 struct statfs buf; 1241 int error; 1242 1243 error = kern_fstatfs(uap->fd, &buf); 1244 1245 if (error == 0) 1246 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1247 return (error); 1248 } 1249 1250 int 1251 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1252 { 1253 struct mount *mp; 1254 struct statvfs *sp; 1255 int error; 1256 1257 if ((error = nlookup(nd)) != 0) 1258 return (error); 1259 mp = nd->nl_nch.mount; 1260 sp = &mp->mnt_vstat; 1261 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1262 return (error); 1263 1264 sp->f_flag = 0; 1265 if (mp->mnt_flag & MNT_RDONLY) 1266 sp->f_flag |= ST_RDONLY; 1267 if (mp->mnt_flag & MNT_NOSUID) 1268 sp->f_flag |= ST_NOSUID; 1269 bcopy(sp, buf, sizeof(*buf)); 1270 return (0); 1271 } 1272 1273 /* 1274 * statfs_args(char *path, struct statfs *buf) 1275 * 1276 * Get filesystem statistics. 1277 */ 1278 int 1279 sys_statvfs(struct statvfs_args *uap) 1280 { 1281 struct nlookupdata nd; 1282 struct statvfs buf; 1283 int error; 1284 1285 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1286 if (error == 0) 1287 error = kern_statvfs(&nd, &buf); 1288 nlookup_done(&nd); 1289 if (error == 0) 1290 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1291 return (error); 1292 } 1293 1294 int 1295 kern_fstatvfs(int fd, struct statvfs *buf) 1296 { 1297 struct thread *td = curthread; 1298 struct proc *p = td->td_proc; 1299 struct file *fp; 1300 struct mount *mp; 1301 struct statvfs *sp; 1302 int error; 1303 1304 KKASSERT(p); 1305 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1306 return (error); 1307 if ((mp = fp->f_nchandle.mount) == NULL) 1308 mp = ((struct vnode *)fp->f_data)->v_mount; 1309 if (mp == NULL) { 1310 error = EBADF; 1311 goto done; 1312 } 1313 if (fp->f_cred == NULL) { 1314 error = EINVAL; 1315 goto done; 1316 } 1317 sp = &mp->mnt_vstat; 1318 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1319 goto done; 1320 1321 sp->f_flag = 0; 1322 if (mp->mnt_flag & MNT_RDONLY) 1323 sp->f_flag |= ST_RDONLY; 1324 if (mp->mnt_flag & MNT_NOSUID) 1325 sp->f_flag |= ST_NOSUID; 1326 1327 bcopy(sp, buf, sizeof(*buf)); 1328 error = 0; 1329 done: 1330 fdrop(fp); 1331 return (error); 1332 } 1333 1334 /* 1335 * fstatfs_args(int fd, struct statfs *buf) 1336 * 1337 * Get filesystem statistics. 1338 */ 1339 int 1340 sys_fstatvfs(struct fstatvfs_args *uap) 1341 { 1342 struct statvfs buf; 1343 int error; 1344 1345 error = kern_fstatvfs(uap->fd, &buf); 1346 1347 if (error == 0) 1348 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1349 return (error); 1350 } 1351 1352 /* 1353 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1354 * 1355 * Get statistics on all filesystems. 1356 */ 1357 1358 struct getfsstat_info { 1359 struct statfs *sfsp; 1360 long count; 1361 long maxcount; 1362 int error; 1363 int flags; 1364 struct thread *td; 1365 }; 1366 1367 static int getfsstat_callback(struct mount *, void *); 1368 1369 int 1370 sys_getfsstat(struct getfsstat_args *uap) 1371 { 1372 struct thread *td = curthread; 1373 struct getfsstat_info info; 1374 1375 bzero(&info, sizeof(info)); 1376 1377 info.maxcount = uap->bufsize / sizeof(struct statfs); 1378 info.sfsp = uap->buf; 1379 info.count = 0; 1380 info.flags = uap->flags; 1381 info.td = td; 1382 1383 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1384 if (info.sfsp && info.count > info.maxcount) 1385 uap->sysmsg_result = info.maxcount; 1386 else 1387 uap->sysmsg_result = info.count; 1388 return (info.error); 1389 } 1390 1391 static int 1392 getfsstat_callback(struct mount *mp, void *data) 1393 { 1394 struct getfsstat_info *info = data; 1395 struct statfs *sp; 1396 char *freepath; 1397 char *fullpath; 1398 int error; 1399 1400 if (info->sfsp && info->count < info->maxcount) { 1401 if (info->td->td_proc && 1402 !chroot_visible_mnt(mp, info->td->td_proc)) { 1403 return(0); 1404 } 1405 sp = &mp->mnt_stat; 1406 1407 /* 1408 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1409 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1410 * overrides MNT_WAIT. 1411 */ 1412 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1413 (info->flags & MNT_WAIT)) && 1414 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1415 return(0); 1416 } 1417 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1418 1419 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1420 if (error) { 1421 info->error = error; 1422 return(-1); 1423 } 1424 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1425 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1426 kfree(freepath, M_TEMP); 1427 1428 error = copyout(sp, info->sfsp, sizeof(*sp)); 1429 if (error) { 1430 info->error = error; 1431 return (-1); 1432 } 1433 ++info->sfsp; 1434 } 1435 info->count++; 1436 return(0); 1437 } 1438 1439 /* 1440 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1441 long bufsize, int flags) 1442 * 1443 * Get statistics on all filesystems. 1444 */ 1445 1446 struct getvfsstat_info { 1447 struct statfs *sfsp; 1448 struct statvfs *vsfsp; 1449 long count; 1450 long maxcount; 1451 int error; 1452 int flags; 1453 struct thread *td; 1454 }; 1455 1456 static int getvfsstat_callback(struct mount *, void *); 1457 1458 int 1459 sys_getvfsstat(struct getvfsstat_args *uap) 1460 { 1461 struct thread *td = curthread; 1462 struct getvfsstat_info info; 1463 1464 bzero(&info, sizeof(info)); 1465 1466 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1467 info.sfsp = uap->buf; 1468 info.vsfsp = uap->vbuf; 1469 info.count = 0; 1470 info.flags = uap->flags; 1471 info.td = td; 1472 1473 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1474 if (info.vsfsp && info.count > info.maxcount) 1475 uap->sysmsg_result = info.maxcount; 1476 else 1477 uap->sysmsg_result = info.count; 1478 return (info.error); 1479 } 1480 1481 static int 1482 getvfsstat_callback(struct mount *mp, void *data) 1483 { 1484 struct getvfsstat_info *info = data; 1485 struct statfs *sp; 1486 struct statvfs *vsp; 1487 char *freepath; 1488 char *fullpath; 1489 int error; 1490 1491 if (info->vsfsp && info->count < info->maxcount) { 1492 if (info->td->td_proc && 1493 !chroot_visible_mnt(mp, info->td->td_proc)) { 1494 return(0); 1495 } 1496 sp = &mp->mnt_stat; 1497 vsp = &mp->mnt_vstat; 1498 1499 /* 1500 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1501 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1502 * overrides MNT_WAIT. 1503 */ 1504 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1505 (info->flags & MNT_WAIT)) && 1506 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1507 return(0); 1508 } 1509 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1510 1511 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1512 (info->flags & MNT_WAIT)) && 1513 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1514 return(0); 1515 } 1516 vsp->f_flag = 0; 1517 if (mp->mnt_flag & MNT_RDONLY) 1518 vsp->f_flag |= ST_RDONLY; 1519 if (mp->mnt_flag & MNT_NOSUID) 1520 vsp->f_flag |= ST_NOSUID; 1521 1522 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1523 if (error) { 1524 info->error = error; 1525 return(-1); 1526 } 1527 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1528 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1529 kfree(freepath, M_TEMP); 1530 1531 error = copyout(sp, info->sfsp, sizeof(*sp)); 1532 if (error == 0) 1533 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1534 if (error) { 1535 info->error = error; 1536 return (-1); 1537 } 1538 ++info->sfsp; 1539 ++info->vsfsp; 1540 } 1541 info->count++; 1542 return(0); 1543 } 1544 1545 1546 /* 1547 * fchdir_args(int fd) 1548 * 1549 * Change current working directory to a given file descriptor. 1550 */ 1551 int 1552 sys_fchdir(struct fchdir_args *uap) 1553 { 1554 struct thread *td = curthread; 1555 struct proc *p = td->td_proc; 1556 struct filedesc *fdp = p->p_fd; 1557 struct vnode *vp, *ovp; 1558 struct mount *mp; 1559 struct file *fp; 1560 struct nchandle nch, onch, tnch; 1561 int error; 1562 1563 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1564 return (error); 1565 lwkt_gettoken(&p->p_token); 1566 vp = (struct vnode *)fp->f_data; 1567 vref(vp); 1568 vn_lock(vp, LK_SHARED | LK_RETRY); 1569 if (fp->f_nchandle.ncp == NULL) 1570 error = ENOTDIR; 1571 else 1572 error = checkvp_chdir(vp, td); 1573 if (error) { 1574 vput(vp); 1575 goto done; 1576 } 1577 cache_copy(&fp->f_nchandle, &nch); 1578 1579 /* 1580 * If the ncp has become a mount point, traverse through 1581 * the mount point. 1582 */ 1583 1584 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1585 (mp = cache_findmount(&nch)) != NULL 1586 ) { 1587 error = nlookup_mp(mp, &tnch); 1588 if (error == 0) { 1589 cache_unlock(&tnch); /* leave ref intact */ 1590 vput(vp); 1591 vp = tnch.ncp->nc_vp; 1592 error = vget(vp, LK_SHARED); 1593 KKASSERT(error == 0); 1594 cache_drop(&nch); 1595 nch = tnch; 1596 } 1597 cache_dropmount(mp); 1598 } 1599 if (error == 0) { 1600 ovp = fdp->fd_cdir; 1601 onch = fdp->fd_ncdir; 1602 vn_unlock(vp); /* leave ref intact */ 1603 fdp->fd_cdir = vp; 1604 fdp->fd_ncdir = nch; 1605 cache_drop(&onch); 1606 vrele(ovp); 1607 } else { 1608 cache_drop(&nch); 1609 vput(vp); 1610 } 1611 fdrop(fp); 1612 done: 1613 lwkt_reltoken(&p->p_token); 1614 return (error); 1615 } 1616 1617 int 1618 kern_chdir(struct nlookupdata *nd) 1619 { 1620 struct thread *td = curthread; 1621 struct proc *p = td->td_proc; 1622 struct filedesc *fdp = p->p_fd; 1623 struct vnode *vp, *ovp; 1624 struct nchandle onch; 1625 int error; 1626 1627 nd->nl_flags |= NLC_SHAREDLOCK; 1628 if ((error = nlookup(nd)) != 0) 1629 return (error); 1630 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1631 return (ENOENT); 1632 if ((error = vget(vp, LK_SHARED)) != 0) 1633 return (error); 1634 1635 lwkt_gettoken(&p->p_token); 1636 error = checkvp_chdir(vp, td); 1637 vn_unlock(vp); 1638 if (error == 0) { 1639 ovp = fdp->fd_cdir; 1640 onch = fdp->fd_ncdir; 1641 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1642 fdp->fd_ncdir = nd->nl_nch; 1643 fdp->fd_cdir = vp; 1644 cache_drop(&onch); 1645 vrele(ovp); 1646 cache_zero(&nd->nl_nch); 1647 } else { 1648 vrele(vp); 1649 } 1650 lwkt_reltoken(&p->p_token); 1651 return (error); 1652 } 1653 1654 /* 1655 * chdir_args(char *path) 1656 * 1657 * Change current working directory (``.''). 1658 */ 1659 int 1660 sys_chdir(struct chdir_args *uap) 1661 { 1662 struct nlookupdata nd; 1663 int error; 1664 1665 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1666 if (error == 0) 1667 error = kern_chdir(&nd); 1668 nlookup_done(&nd); 1669 return (error); 1670 } 1671 1672 /* 1673 * Helper function for raised chroot(2) security function: Refuse if 1674 * any filedescriptors are open directories. 1675 */ 1676 static int 1677 chroot_refuse_vdir_fds(struct filedesc *fdp) 1678 { 1679 struct vnode *vp; 1680 struct file *fp; 1681 int error; 1682 int fd; 1683 1684 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1685 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1686 continue; 1687 vp = (struct vnode *)fp->f_data; 1688 if (vp->v_type != VDIR) { 1689 fdrop(fp); 1690 continue; 1691 } 1692 fdrop(fp); 1693 return(EPERM); 1694 } 1695 return (0); 1696 } 1697 1698 /* 1699 * This sysctl determines if we will allow a process to chroot(2) if it 1700 * has a directory open: 1701 * 0: disallowed for all processes. 1702 * 1: allowed for processes that were not already chroot(2)'ed. 1703 * 2: allowed for all processes. 1704 */ 1705 1706 static int chroot_allow_open_directories = 1; 1707 1708 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1709 &chroot_allow_open_directories, 0, ""); 1710 1711 /* 1712 * chroot to the specified namecache entry. We obtain the vp from the 1713 * namecache data. The passed ncp must be locked and referenced and will 1714 * remain locked and referenced on return. 1715 */ 1716 int 1717 kern_chroot(struct nchandle *nch) 1718 { 1719 struct thread *td = curthread; 1720 struct proc *p = td->td_proc; 1721 struct filedesc *fdp = p->p_fd; 1722 struct vnode *vp; 1723 int error; 1724 1725 /* 1726 * Only privileged user can chroot 1727 */ 1728 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1729 if (error) 1730 return (error); 1731 1732 /* 1733 * Disallow open directory descriptors (fchdir() breakouts). 1734 */ 1735 if (chroot_allow_open_directories == 0 || 1736 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1737 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1738 return (error); 1739 } 1740 if ((vp = nch->ncp->nc_vp) == NULL) 1741 return (ENOENT); 1742 1743 if ((error = vget(vp, LK_SHARED)) != 0) 1744 return (error); 1745 1746 /* 1747 * Check the validity of vp as a directory to change to and 1748 * associate it with rdir/jdir. 1749 */ 1750 error = checkvp_chdir(vp, td); 1751 vn_unlock(vp); /* leave reference intact */ 1752 if (error == 0) { 1753 vrele(fdp->fd_rdir); 1754 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1755 cache_drop(&fdp->fd_nrdir); 1756 cache_copy(nch, &fdp->fd_nrdir); 1757 if (fdp->fd_jdir == NULL) { 1758 fdp->fd_jdir = vp; 1759 vref(fdp->fd_jdir); 1760 cache_copy(nch, &fdp->fd_njdir); 1761 } 1762 } else { 1763 vrele(vp); 1764 } 1765 return (error); 1766 } 1767 1768 /* 1769 * chroot_args(char *path) 1770 * 1771 * Change notion of root (``/'') directory. 1772 */ 1773 int 1774 sys_chroot(struct chroot_args *uap) 1775 { 1776 struct thread *td __debugvar = curthread; 1777 struct nlookupdata nd; 1778 int error; 1779 1780 KKASSERT(td->td_proc); 1781 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1782 if (error == 0) { 1783 nd.nl_flags |= NLC_EXEC; 1784 error = nlookup(&nd); 1785 if (error == 0) 1786 error = kern_chroot(&nd.nl_nch); 1787 } 1788 nlookup_done(&nd); 1789 return(error); 1790 } 1791 1792 int 1793 sys_chroot_kernel(struct chroot_kernel_args *uap) 1794 { 1795 struct thread *td = curthread; 1796 struct nlookupdata nd; 1797 struct nchandle *nch; 1798 struct vnode *vp; 1799 int error; 1800 1801 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1802 if (error) 1803 goto error_nond; 1804 1805 error = nlookup(&nd); 1806 if (error) 1807 goto error_out; 1808 1809 nch = &nd.nl_nch; 1810 1811 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1812 if (error) 1813 goto error_out; 1814 1815 if ((vp = nch->ncp->nc_vp) == NULL) { 1816 error = ENOENT; 1817 goto error_out; 1818 } 1819 1820 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1821 goto error_out; 1822 1823 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1824 get_mplock(); 1825 vfs_cache_setroot(vp, cache_hold(nch)); 1826 rel_mplock(); 1827 1828 error_out: 1829 nlookup_done(&nd); 1830 error_nond: 1831 return(error); 1832 } 1833 1834 /* 1835 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1836 * determine whether it is legal to chdir to the vnode. The vnode's state 1837 * is not changed by this call. 1838 */ 1839 int 1840 checkvp_chdir(struct vnode *vp, struct thread *td) 1841 { 1842 int error; 1843 1844 if (vp->v_type != VDIR) 1845 error = ENOTDIR; 1846 else 1847 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1848 return (error); 1849 } 1850 1851 int 1852 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1853 { 1854 struct thread *td = curthread; 1855 struct proc *p = td->td_proc; 1856 struct lwp *lp = td->td_lwp; 1857 struct filedesc *fdp = p->p_fd; 1858 int cmode, flags; 1859 struct file *nfp; 1860 struct file *fp; 1861 struct vnode *vp; 1862 int type, indx, error = 0; 1863 struct flock lf; 1864 1865 if ((oflags & O_ACCMODE) == O_ACCMODE) 1866 return (EINVAL); 1867 flags = FFLAGS(oflags); 1868 error = falloc(lp, &nfp, NULL); 1869 if (error) 1870 return (error); 1871 fp = nfp; 1872 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1873 1874 /* 1875 * XXX p_dupfd is a real mess. It allows a device to return a 1876 * file descriptor to be duplicated rather then doing the open 1877 * itself. 1878 */ 1879 lp->lwp_dupfd = -1; 1880 1881 /* 1882 * Call vn_open() to do the lookup and assign the vnode to the 1883 * file pointer. vn_open() does not change the ref count on fp 1884 * and the vnode, on success, will be inherited by the file pointer 1885 * and unlocked. 1886 * 1887 * Request a shared lock on the vnode if possible. 1888 */ 1889 nd->nl_flags |= NLC_LOCKVP; 1890 if ((flags & (O_CREAT|O_TRUNC)) == 0) 1891 nd->nl_flags |= NLC_SHAREDLOCK; 1892 1893 error = vn_open(nd, fp, flags, cmode); 1894 nlookup_done(nd); 1895 1896 if (error) { 1897 /* 1898 * handle special fdopen() case. bleh. dupfdopen() is 1899 * responsible for dropping the old contents of ofiles[indx] 1900 * if it succeeds. 1901 * 1902 * Note that fsetfd() will add a ref to fp which represents 1903 * the fd_files[] assignment. We must still drop our 1904 * reference. 1905 */ 1906 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1907 if (fdalloc(p, 0, &indx) == 0) { 1908 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error); 1909 if (error == 0) { 1910 *res = indx; 1911 fdrop(fp); /* our ref */ 1912 return (0); 1913 } 1914 fsetfd(fdp, NULL, indx); 1915 } 1916 } 1917 fdrop(fp); /* our ref */ 1918 if (error == ERESTART) 1919 error = EINTR; 1920 return (error); 1921 } 1922 1923 /* 1924 * ref the vnode for ourselves so it can't be ripped out from under 1925 * is. XXX need an ND flag to request that the vnode be returned 1926 * anyway. 1927 * 1928 * Reserve a file descriptor but do not assign it until the open 1929 * succeeds. 1930 */ 1931 vp = (struct vnode *)fp->f_data; 1932 vref(vp); 1933 if ((error = fdalloc(p, 0, &indx)) != 0) { 1934 fdrop(fp); 1935 vrele(vp); 1936 return (error); 1937 } 1938 1939 /* 1940 * If no error occurs the vp will have been assigned to the file 1941 * pointer. 1942 */ 1943 lp->lwp_dupfd = 0; 1944 1945 if (flags & (O_EXLOCK | O_SHLOCK)) { 1946 lf.l_whence = SEEK_SET; 1947 lf.l_start = 0; 1948 lf.l_len = 0; 1949 if (flags & O_EXLOCK) 1950 lf.l_type = F_WRLCK; 1951 else 1952 lf.l_type = F_RDLCK; 1953 if (flags & FNONBLOCK) 1954 type = 0; 1955 else 1956 type = F_WAIT; 1957 1958 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1959 /* 1960 * lock request failed. Clean up the reserved 1961 * descriptor. 1962 */ 1963 vrele(vp); 1964 fsetfd(fdp, NULL, indx); 1965 fdrop(fp); 1966 return (error); 1967 } 1968 fp->f_flag |= FHASLOCK; 1969 } 1970 #if 0 1971 /* 1972 * Assert that all regular file vnodes were created with a object. 1973 */ 1974 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1975 ("open: regular file has no backing object after vn_open")); 1976 #endif 1977 1978 vrele(vp); 1979 1980 /* 1981 * release our private reference, leaving the one associated with the 1982 * descriptor table intact. 1983 */ 1984 fsetfd(fdp, fp, indx); 1985 fdrop(fp); 1986 *res = indx; 1987 if (oflags & O_CLOEXEC) 1988 error = fsetfdflags(fdp, *res, UF_EXCLOSE); 1989 return (error); 1990 } 1991 1992 /* 1993 * open_args(char *path, int flags, int mode) 1994 * 1995 * Check permissions, allocate an open file structure, 1996 * and call the device open routine if any. 1997 */ 1998 int 1999 sys_open(struct open_args *uap) 2000 { 2001 struct nlookupdata nd; 2002 int error; 2003 2004 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2005 if (error == 0) { 2006 error = kern_open(&nd, uap->flags, 2007 uap->mode, &uap->sysmsg_result); 2008 } 2009 nlookup_done(&nd); 2010 return (error); 2011 } 2012 2013 /* 2014 * openat_args(int fd, char *path, int flags, int mode) 2015 */ 2016 int 2017 sys_openat(struct openat_args *uap) 2018 { 2019 struct nlookupdata nd; 2020 int error; 2021 struct file *fp; 2022 2023 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2024 if (error == 0) { 2025 error = kern_open(&nd, uap->flags, uap->mode, 2026 &uap->sysmsg_result); 2027 } 2028 nlookup_done_at(&nd, fp); 2029 return (error); 2030 } 2031 2032 int 2033 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2034 { 2035 struct thread *td = curthread; 2036 struct proc *p = td->td_proc; 2037 struct vnode *vp; 2038 struct vattr vattr; 2039 int error; 2040 int whiteout = 0; 2041 2042 KKASSERT(p); 2043 2044 VATTR_NULL(&vattr); 2045 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2046 vattr.va_rmajor = rmajor; 2047 vattr.va_rminor = rminor; 2048 2049 switch (mode & S_IFMT) { 2050 case S_IFMT: /* used by badsect to flag bad sectors */ 2051 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2052 vattr.va_type = VBAD; 2053 break; 2054 case S_IFCHR: 2055 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2056 vattr.va_type = VCHR; 2057 break; 2058 case S_IFBLK: 2059 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2060 vattr.va_type = VBLK; 2061 break; 2062 case S_IFWHT: 2063 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2064 whiteout = 1; 2065 break; 2066 case S_IFDIR: /* special directories support for HAMMER */ 2067 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2068 vattr.va_type = VDIR; 2069 break; 2070 default: 2071 error = EINVAL; 2072 break; 2073 } 2074 2075 if (error) 2076 return (error); 2077 2078 bwillinode(1); 2079 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2080 if ((error = nlookup(nd)) != 0) 2081 return (error); 2082 if (nd->nl_nch.ncp->nc_vp) 2083 return (EEXIST); 2084 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2085 return (error); 2086 2087 if (whiteout) { 2088 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2089 nd->nl_cred, NAMEI_CREATE); 2090 } else { 2091 vp = NULL; 2092 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2093 &vp, nd->nl_cred, &vattr); 2094 if (error == 0) 2095 vput(vp); 2096 } 2097 return (error); 2098 } 2099 2100 /* 2101 * mknod_args(char *path, int mode, int dev) 2102 * 2103 * Create a special file. 2104 */ 2105 int 2106 sys_mknod(struct mknod_args *uap) 2107 { 2108 struct nlookupdata nd; 2109 int error; 2110 2111 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2112 if (error == 0) { 2113 error = kern_mknod(&nd, uap->mode, 2114 umajor(uap->dev), uminor(uap->dev)); 2115 } 2116 nlookup_done(&nd); 2117 return (error); 2118 } 2119 2120 /* 2121 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2122 * 2123 * Create a special file. The path is relative to the directory associated 2124 * with fd. 2125 */ 2126 int 2127 sys_mknodat(struct mknodat_args *uap) 2128 { 2129 struct nlookupdata nd; 2130 struct file *fp; 2131 int error; 2132 2133 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2134 if (error == 0) { 2135 error = kern_mknod(&nd, uap->mode, 2136 umajor(uap->dev), uminor(uap->dev)); 2137 } 2138 nlookup_done_at(&nd, fp); 2139 return (error); 2140 } 2141 2142 int 2143 kern_mkfifo(struct nlookupdata *nd, int mode) 2144 { 2145 struct thread *td = curthread; 2146 struct proc *p = td->td_proc; 2147 struct vattr vattr; 2148 struct vnode *vp; 2149 int error; 2150 2151 bwillinode(1); 2152 2153 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2154 if ((error = nlookup(nd)) != 0) 2155 return (error); 2156 if (nd->nl_nch.ncp->nc_vp) 2157 return (EEXIST); 2158 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2159 return (error); 2160 2161 VATTR_NULL(&vattr); 2162 vattr.va_type = VFIFO; 2163 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2164 vp = NULL; 2165 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2166 if (error == 0) 2167 vput(vp); 2168 return (error); 2169 } 2170 2171 /* 2172 * mkfifo_args(char *path, int mode) 2173 * 2174 * Create a named pipe. 2175 */ 2176 int 2177 sys_mkfifo(struct mkfifo_args *uap) 2178 { 2179 struct nlookupdata nd; 2180 int error; 2181 2182 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2183 if (error == 0) 2184 error = kern_mkfifo(&nd, uap->mode); 2185 nlookup_done(&nd); 2186 return (error); 2187 } 2188 2189 /* 2190 * mkfifoat_args(int fd, char *path, mode_t mode) 2191 * 2192 * Create a named pipe. The path is relative to the directory associated 2193 * with fd. 2194 */ 2195 int 2196 sys_mkfifoat(struct mkfifoat_args *uap) 2197 { 2198 struct nlookupdata nd; 2199 struct file *fp; 2200 int error; 2201 2202 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2203 if (error == 0) 2204 error = kern_mkfifo(&nd, uap->mode); 2205 nlookup_done_at(&nd, fp); 2206 return (error); 2207 } 2208 2209 static int hardlink_check_uid = 0; 2210 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2211 &hardlink_check_uid, 0, 2212 "Unprivileged processes cannot create hard links to files owned by other " 2213 "users"); 2214 static int hardlink_check_gid = 0; 2215 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2216 &hardlink_check_gid, 0, 2217 "Unprivileged processes cannot create hard links to files owned by other " 2218 "groups"); 2219 2220 static int 2221 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2222 { 2223 struct vattr va; 2224 int error; 2225 2226 /* 2227 * Shortcut if disabled 2228 */ 2229 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2230 return (0); 2231 2232 /* 2233 * Privileged user can always hardlink 2234 */ 2235 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2236 return (0); 2237 2238 /* 2239 * Otherwise only if the originating file is owned by the 2240 * same user or group. Note that any group is allowed if 2241 * the file is owned by the caller. 2242 */ 2243 error = VOP_GETATTR(vp, &va); 2244 if (error != 0) 2245 return (error); 2246 2247 if (hardlink_check_uid) { 2248 if (cred->cr_uid != va.va_uid) 2249 return (EPERM); 2250 } 2251 2252 if (hardlink_check_gid) { 2253 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2254 return (EPERM); 2255 } 2256 2257 return (0); 2258 } 2259 2260 int 2261 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2262 { 2263 struct thread *td = curthread; 2264 struct vnode *vp; 2265 int error; 2266 2267 /* 2268 * Lookup the source and obtained a locked vnode. 2269 * 2270 * You may only hardlink a file which you have write permission 2271 * on or which you own. 2272 * 2273 * XXX relookup on vget failure / race ? 2274 */ 2275 bwillinode(1); 2276 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2277 if ((error = nlookup(nd)) != 0) 2278 return (error); 2279 vp = nd->nl_nch.ncp->nc_vp; 2280 KKASSERT(vp != NULL); 2281 if (vp->v_type == VDIR) 2282 return (EPERM); /* POSIX */ 2283 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2284 return (error); 2285 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2286 return (error); 2287 2288 /* 2289 * Unlock the source so we can lookup the target without deadlocking 2290 * (XXX vp is locked already, possible other deadlock?). The target 2291 * must not exist. 2292 */ 2293 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2294 nd->nl_flags &= ~NLC_NCPISLOCKED; 2295 cache_unlock(&nd->nl_nch); 2296 vn_unlock(vp); 2297 2298 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2299 if ((error = nlookup(linknd)) != 0) { 2300 vrele(vp); 2301 return (error); 2302 } 2303 if (linknd->nl_nch.ncp->nc_vp) { 2304 vrele(vp); 2305 return (EEXIST); 2306 } 2307 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2308 if (error) { 2309 vrele(vp); 2310 return (error); 2311 } 2312 2313 /* 2314 * Finally run the new API VOP. 2315 */ 2316 error = can_hardlink(vp, td, td->td_ucred); 2317 if (error == 0) { 2318 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2319 vp, linknd->nl_cred); 2320 } 2321 vput(vp); 2322 return (error); 2323 } 2324 2325 /* 2326 * link_args(char *path, char *link) 2327 * 2328 * Make a hard file link. 2329 */ 2330 int 2331 sys_link(struct link_args *uap) 2332 { 2333 struct nlookupdata nd, linknd; 2334 int error; 2335 2336 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2337 if (error == 0) { 2338 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2339 if (error == 0) 2340 error = kern_link(&nd, &linknd); 2341 nlookup_done(&linknd); 2342 } 2343 nlookup_done(&nd); 2344 return (error); 2345 } 2346 2347 /* 2348 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2349 * 2350 * Make a hard file link. The path1 argument is relative to the directory 2351 * associated with fd1, and similarly the path2 argument is relative to 2352 * the directory associated with fd2. 2353 */ 2354 int 2355 sys_linkat(struct linkat_args *uap) 2356 { 2357 struct nlookupdata nd, linknd; 2358 struct file *fp1, *fp2; 2359 int error; 2360 2361 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2362 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2363 if (error == 0) { 2364 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2365 uap->path2, UIO_USERSPACE, 0); 2366 if (error == 0) 2367 error = kern_link(&nd, &linknd); 2368 nlookup_done_at(&linknd, fp2); 2369 } 2370 nlookup_done_at(&nd, fp1); 2371 return (error); 2372 } 2373 2374 int 2375 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2376 { 2377 struct vattr vattr; 2378 struct vnode *vp; 2379 struct vnode *dvp; 2380 int error; 2381 2382 bwillinode(1); 2383 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2384 if ((error = nlookup(nd)) != 0) 2385 return (error); 2386 if (nd->nl_nch.ncp->nc_vp) 2387 return (EEXIST); 2388 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2389 return (error); 2390 dvp = nd->nl_dvp; 2391 VATTR_NULL(&vattr); 2392 vattr.va_mode = mode; 2393 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2394 if (error == 0) 2395 vput(vp); 2396 return (error); 2397 } 2398 2399 /* 2400 * symlink(char *path, char *link) 2401 * 2402 * Make a symbolic link. 2403 */ 2404 int 2405 sys_symlink(struct symlink_args *uap) 2406 { 2407 struct thread *td = curthread; 2408 struct nlookupdata nd; 2409 char *path; 2410 int error; 2411 int mode; 2412 2413 path = objcache_get(namei_oc, M_WAITOK); 2414 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2415 if (error == 0) { 2416 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2417 if (error == 0) { 2418 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2419 error = kern_symlink(&nd, path, mode); 2420 } 2421 nlookup_done(&nd); 2422 } 2423 objcache_put(namei_oc, path); 2424 return (error); 2425 } 2426 2427 /* 2428 * symlinkat_args(char *path1, int fd, char *path2) 2429 * 2430 * Make a symbolic link. The path2 argument is relative to the directory 2431 * associated with fd. 2432 */ 2433 int 2434 sys_symlinkat(struct symlinkat_args *uap) 2435 { 2436 struct thread *td = curthread; 2437 struct nlookupdata nd; 2438 struct file *fp; 2439 char *path1; 2440 int error; 2441 int mode; 2442 2443 path1 = objcache_get(namei_oc, M_WAITOK); 2444 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2445 if (error == 0) { 2446 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2447 UIO_USERSPACE, 0); 2448 if (error == 0) { 2449 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2450 error = kern_symlink(&nd, path1, mode); 2451 } 2452 nlookup_done_at(&nd, fp); 2453 } 2454 objcache_put(namei_oc, path1); 2455 return (error); 2456 } 2457 2458 /* 2459 * undelete_args(char *path) 2460 * 2461 * Delete a whiteout from the filesystem. 2462 */ 2463 int 2464 sys_undelete(struct undelete_args *uap) 2465 { 2466 struct nlookupdata nd; 2467 int error; 2468 2469 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2470 bwillinode(1); 2471 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2472 if (error == 0) 2473 error = nlookup(&nd); 2474 if (error == 0) 2475 error = ncp_writechk(&nd.nl_nch); 2476 if (error == 0) { 2477 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2478 NAMEI_DELETE); 2479 } 2480 nlookup_done(&nd); 2481 return (error); 2482 } 2483 2484 int 2485 kern_unlink(struct nlookupdata *nd) 2486 { 2487 int error; 2488 2489 bwillinode(1); 2490 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2491 if ((error = nlookup(nd)) != 0) 2492 return (error); 2493 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2494 return (error); 2495 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2496 return (error); 2497 } 2498 2499 /* 2500 * unlink_args(char *path) 2501 * 2502 * Delete a name from the filesystem. 2503 */ 2504 int 2505 sys_unlink(struct unlink_args *uap) 2506 { 2507 struct nlookupdata nd; 2508 int error; 2509 2510 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2511 if (error == 0) 2512 error = kern_unlink(&nd); 2513 nlookup_done(&nd); 2514 return (error); 2515 } 2516 2517 2518 /* 2519 * unlinkat_args(int fd, char *path, int flags) 2520 * 2521 * Delete the file or directory entry pointed to by fd/path. 2522 */ 2523 int 2524 sys_unlinkat(struct unlinkat_args *uap) 2525 { 2526 struct nlookupdata nd; 2527 struct file *fp; 2528 int error; 2529 2530 if (uap->flags & ~AT_REMOVEDIR) 2531 return (EINVAL); 2532 2533 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2534 if (error == 0) { 2535 if (uap->flags & AT_REMOVEDIR) 2536 error = kern_rmdir(&nd); 2537 else 2538 error = kern_unlink(&nd); 2539 } 2540 nlookup_done_at(&nd, fp); 2541 return (error); 2542 } 2543 2544 int 2545 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2546 { 2547 struct thread *td = curthread; 2548 struct proc *p = td->td_proc; 2549 struct file *fp; 2550 struct vnode *vp; 2551 struct vattr vattr; 2552 off_t new_offset; 2553 int error; 2554 2555 fp = holdfp(p->p_fd, fd, -1); 2556 if (fp == NULL) 2557 return (EBADF); 2558 if (fp->f_type != DTYPE_VNODE) { 2559 error = ESPIPE; 2560 goto done; 2561 } 2562 vp = (struct vnode *)fp->f_data; 2563 2564 switch (whence) { 2565 case L_INCR: 2566 spin_lock(&fp->f_spin); 2567 new_offset = fp->f_offset + offset; 2568 error = 0; 2569 break; 2570 case L_XTND: 2571 error = VOP_GETATTR(vp, &vattr); 2572 spin_lock(&fp->f_spin); 2573 new_offset = offset + vattr.va_size; 2574 break; 2575 case L_SET: 2576 new_offset = offset; 2577 error = 0; 2578 spin_lock(&fp->f_spin); 2579 break; 2580 default: 2581 new_offset = 0; 2582 error = EINVAL; 2583 spin_lock(&fp->f_spin); 2584 break; 2585 } 2586 2587 /* 2588 * Validate the seek position. Negative offsets are not allowed 2589 * for regular files or directories. 2590 * 2591 * Normally we would also not want to allow negative offsets for 2592 * character and block-special devices. However kvm addresses 2593 * on 64 bit architectures might appear to be negative and must 2594 * be allowed. 2595 */ 2596 if (error == 0) { 2597 if (new_offset < 0 && 2598 (vp->v_type == VREG || vp->v_type == VDIR)) { 2599 error = EINVAL; 2600 } else { 2601 fp->f_offset = new_offset; 2602 } 2603 } 2604 *res = fp->f_offset; 2605 spin_unlock(&fp->f_spin); 2606 done: 2607 fdrop(fp); 2608 return (error); 2609 } 2610 2611 /* 2612 * lseek_args(int fd, int pad, off_t offset, int whence) 2613 * 2614 * Reposition read/write file offset. 2615 */ 2616 int 2617 sys_lseek(struct lseek_args *uap) 2618 { 2619 int error; 2620 2621 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2622 &uap->sysmsg_offset); 2623 2624 return (error); 2625 } 2626 2627 /* 2628 * Check if current process can access given file. amode is a bitmask of *_OK 2629 * access bits. flags is a bitmask of AT_* flags. 2630 */ 2631 int 2632 kern_access(struct nlookupdata *nd, int amode, int flags) 2633 { 2634 struct vnode *vp; 2635 int error, mode; 2636 2637 if (flags & ~AT_EACCESS) 2638 return (EINVAL); 2639 nd->nl_flags |= NLC_SHAREDLOCK; 2640 if ((error = nlookup(nd)) != 0) 2641 return (error); 2642 retry: 2643 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2644 if (error) 2645 return (error); 2646 2647 /* Flags == 0 means only check for existence. */ 2648 if (amode) { 2649 mode = 0; 2650 if (amode & R_OK) 2651 mode |= VREAD; 2652 if (amode & W_OK) 2653 mode |= VWRITE; 2654 if (amode & X_OK) 2655 mode |= VEXEC; 2656 if ((mode & VWRITE) == 0 || 2657 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2658 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2659 2660 /* 2661 * If the file handle is stale we have to re-resolve the 2662 * entry with the ncp held exclusively. This is a hack 2663 * at the moment. 2664 */ 2665 if (error == ESTALE) { 2666 vput(vp); 2667 cache_unlock(&nd->nl_nch); 2668 cache_lock(&nd->nl_nch); 2669 cache_setunresolved(&nd->nl_nch); 2670 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2671 if (error == 0) { 2672 vp = NULL; 2673 goto retry; 2674 } 2675 return(error); 2676 } 2677 } 2678 vput(vp); 2679 return (error); 2680 } 2681 2682 /* 2683 * access_args(char *path, int flags) 2684 * 2685 * Check access permissions. 2686 */ 2687 int 2688 sys_access(struct access_args *uap) 2689 { 2690 struct nlookupdata nd; 2691 int error; 2692 2693 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2694 if (error == 0) 2695 error = kern_access(&nd, uap->flags, 0); 2696 nlookup_done(&nd); 2697 return (error); 2698 } 2699 2700 2701 /* 2702 * eaccess_args(char *path, int flags) 2703 * 2704 * Check access permissions. 2705 */ 2706 int 2707 sys_eaccess(struct eaccess_args *uap) 2708 { 2709 struct nlookupdata nd; 2710 int error; 2711 2712 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2713 if (error == 0) 2714 error = kern_access(&nd, uap->flags, AT_EACCESS); 2715 nlookup_done(&nd); 2716 return (error); 2717 } 2718 2719 2720 /* 2721 * faccessat_args(int fd, char *path, int amode, int flags) 2722 * 2723 * Check access permissions. 2724 */ 2725 int 2726 sys_faccessat(struct faccessat_args *uap) 2727 { 2728 struct nlookupdata nd; 2729 struct file *fp; 2730 int error; 2731 2732 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2733 NLC_FOLLOW); 2734 if (error == 0) 2735 error = kern_access(&nd, uap->amode, uap->flags); 2736 nlookup_done_at(&nd, fp); 2737 return (error); 2738 } 2739 2740 int 2741 kern_stat(struct nlookupdata *nd, struct stat *st) 2742 { 2743 int error; 2744 struct vnode *vp; 2745 2746 nd->nl_flags |= NLC_SHAREDLOCK; 2747 if ((error = nlookup(nd)) != 0) 2748 return (error); 2749 again: 2750 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2751 return (ENOENT); 2752 2753 if ((error = vget(vp, LK_SHARED)) != 0) 2754 return (error); 2755 error = vn_stat(vp, st, nd->nl_cred); 2756 2757 /* 2758 * If the file handle is stale we have to re-resolve the 2759 * entry with the ncp held exclusively. This is a hack 2760 * at the moment. 2761 */ 2762 if (error == ESTALE) { 2763 vput(vp); 2764 cache_unlock(&nd->nl_nch); 2765 cache_lock(&nd->nl_nch); 2766 cache_setunresolved(&nd->nl_nch); 2767 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2768 if (error == 0) 2769 goto again; 2770 } else { 2771 vput(vp); 2772 } 2773 return (error); 2774 } 2775 2776 /* 2777 * stat_args(char *path, struct stat *ub) 2778 * 2779 * Get file status; this version follows links. 2780 */ 2781 int 2782 sys_stat(struct stat_args *uap) 2783 { 2784 struct nlookupdata nd; 2785 struct stat st; 2786 int error; 2787 2788 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2789 if (error == 0) { 2790 error = kern_stat(&nd, &st); 2791 if (error == 0) 2792 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2793 } 2794 nlookup_done(&nd); 2795 return (error); 2796 } 2797 2798 /* 2799 * lstat_args(char *path, struct stat *ub) 2800 * 2801 * Get file status; this version does not follow links. 2802 */ 2803 int 2804 sys_lstat(struct lstat_args *uap) 2805 { 2806 struct nlookupdata nd; 2807 struct stat st; 2808 int error; 2809 2810 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2811 if (error == 0) { 2812 error = kern_stat(&nd, &st); 2813 if (error == 0) 2814 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2815 } 2816 nlookup_done(&nd); 2817 return (error); 2818 } 2819 2820 /* 2821 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2822 * 2823 * Get status of file pointed to by fd/path. 2824 */ 2825 int 2826 sys_fstatat(struct fstatat_args *uap) 2827 { 2828 struct nlookupdata nd; 2829 struct stat st; 2830 int error; 2831 int flags; 2832 struct file *fp; 2833 2834 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2835 return (EINVAL); 2836 2837 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2838 2839 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2840 UIO_USERSPACE, flags); 2841 if (error == 0) { 2842 error = kern_stat(&nd, &st); 2843 if (error == 0) 2844 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2845 } 2846 nlookup_done_at(&nd, fp); 2847 return (error); 2848 } 2849 2850 static int 2851 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 2852 { 2853 struct nlookupdata nd; 2854 struct vnode *vp; 2855 int error; 2856 2857 vp = NULL; 2858 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 2859 if (error == 0) 2860 error = nlookup(&nd); 2861 if (error == 0) 2862 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2863 nlookup_done(&nd); 2864 if (error == 0) { 2865 error = VOP_PATHCONF(vp, name, sysmsg_regp); 2866 vput(vp); 2867 } 2868 return (error); 2869 } 2870 2871 /* 2872 * pathconf_Args(char *path, int name) 2873 * 2874 * Get configurable pathname variables. 2875 */ 2876 int 2877 sys_pathconf(struct pathconf_args *uap) 2878 { 2879 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 2880 &uap->sysmsg_reg)); 2881 } 2882 2883 /* 2884 * lpathconf_Args(char *path, int name) 2885 * 2886 * Get configurable pathname variables, but don't follow symlinks. 2887 */ 2888 int 2889 sys_lpathconf(struct lpathconf_args *uap) 2890 { 2891 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 2892 } 2893 2894 /* 2895 * XXX: daver 2896 * kern_readlink isn't properly split yet. There is a copyin burried 2897 * in VOP_READLINK(). 2898 */ 2899 int 2900 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2901 { 2902 struct thread *td = curthread; 2903 struct vnode *vp; 2904 struct iovec aiov; 2905 struct uio auio; 2906 int error; 2907 2908 nd->nl_flags |= NLC_SHAREDLOCK; 2909 if ((error = nlookup(nd)) != 0) 2910 return (error); 2911 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2912 if (error) 2913 return (error); 2914 if (vp->v_type != VLNK) { 2915 error = EINVAL; 2916 } else { 2917 aiov.iov_base = buf; 2918 aiov.iov_len = count; 2919 auio.uio_iov = &aiov; 2920 auio.uio_iovcnt = 1; 2921 auio.uio_offset = 0; 2922 auio.uio_rw = UIO_READ; 2923 auio.uio_segflg = UIO_USERSPACE; 2924 auio.uio_td = td; 2925 auio.uio_resid = count; 2926 error = VOP_READLINK(vp, &auio, td->td_ucred); 2927 } 2928 vput(vp); 2929 *res = count - auio.uio_resid; 2930 return (error); 2931 } 2932 2933 /* 2934 * readlink_args(char *path, char *buf, int count) 2935 * 2936 * Return target name of a symbolic link. 2937 */ 2938 int 2939 sys_readlink(struct readlink_args *uap) 2940 { 2941 struct nlookupdata nd; 2942 int error; 2943 2944 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2945 if (error == 0) { 2946 error = kern_readlink(&nd, uap->buf, uap->count, 2947 &uap->sysmsg_result); 2948 } 2949 nlookup_done(&nd); 2950 return (error); 2951 } 2952 2953 /* 2954 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 2955 * 2956 * Return target name of a symbolic link. The path is relative to the 2957 * directory associated with fd. 2958 */ 2959 int 2960 sys_readlinkat(struct readlinkat_args *uap) 2961 { 2962 struct nlookupdata nd; 2963 struct file *fp; 2964 int error; 2965 2966 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2967 if (error == 0) { 2968 error = kern_readlink(&nd, uap->buf, uap->bufsize, 2969 &uap->sysmsg_result); 2970 } 2971 nlookup_done_at(&nd, fp); 2972 return (error); 2973 } 2974 2975 static int 2976 setfflags(struct vnode *vp, int flags) 2977 { 2978 struct thread *td = curthread; 2979 int error; 2980 struct vattr vattr; 2981 2982 /* 2983 * Prevent non-root users from setting flags on devices. When 2984 * a device is reused, users can retain ownership of the device 2985 * if they are allowed to set flags and programs assume that 2986 * chown can't fail when done as root. 2987 */ 2988 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2989 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 2990 return (error); 2991 2992 /* 2993 * note: vget is required for any operation that might mod the vnode 2994 * so VINACTIVE is properly cleared. 2995 */ 2996 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2997 VATTR_NULL(&vattr); 2998 vattr.va_flags = flags; 2999 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3000 vput(vp); 3001 } 3002 return (error); 3003 } 3004 3005 /* 3006 * chflags(char *path, int flags) 3007 * 3008 * Change flags of a file given a path name. 3009 */ 3010 int 3011 sys_chflags(struct chflags_args *uap) 3012 { 3013 struct nlookupdata nd; 3014 struct vnode *vp; 3015 int error; 3016 3017 vp = NULL; 3018 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3019 if (error == 0) 3020 error = nlookup(&nd); 3021 if (error == 0) 3022 error = ncp_writechk(&nd.nl_nch); 3023 if (error == 0) 3024 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3025 nlookup_done(&nd); 3026 if (error == 0) { 3027 error = setfflags(vp, uap->flags); 3028 vrele(vp); 3029 } 3030 return (error); 3031 } 3032 3033 /* 3034 * lchflags(char *path, int flags) 3035 * 3036 * Change flags of a file given a path name, but don't follow symlinks. 3037 */ 3038 int 3039 sys_lchflags(struct lchflags_args *uap) 3040 { 3041 struct nlookupdata nd; 3042 struct vnode *vp; 3043 int error; 3044 3045 vp = NULL; 3046 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3047 if (error == 0) 3048 error = nlookup(&nd); 3049 if (error == 0) 3050 error = ncp_writechk(&nd.nl_nch); 3051 if (error == 0) 3052 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3053 nlookup_done(&nd); 3054 if (error == 0) { 3055 error = setfflags(vp, uap->flags); 3056 vrele(vp); 3057 } 3058 return (error); 3059 } 3060 3061 /* 3062 * fchflags_args(int fd, int flags) 3063 * 3064 * Change flags of a file given a file descriptor. 3065 */ 3066 int 3067 sys_fchflags(struct fchflags_args *uap) 3068 { 3069 struct thread *td = curthread; 3070 struct proc *p = td->td_proc; 3071 struct file *fp; 3072 int error; 3073 3074 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3075 return (error); 3076 if (fp->f_nchandle.ncp) 3077 error = ncp_writechk(&fp->f_nchandle); 3078 if (error == 0) 3079 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3080 fdrop(fp); 3081 return (error); 3082 } 3083 3084 static int 3085 setfmode(struct vnode *vp, int mode) 3086 { 3087 struct thread *td = curthread; 3088 int error; 3089 struct vattr vattr; 3090 3091 /* 3092 * note: vget is required for any operation that might mod the vnode 3093 * so VINACTIVE is properly cleared. 3094 */ 3095 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3096 VATTR_NULL(&vattr); 3097 vattr.va_mode = mode & ALLPERMS; 3098 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3099 vput(vp); 3100 } 3101 return error; 3102 } 3103 3104 int 3105 kern_chmod(struct nlookupdata *nd, int mode) 3106 { 3107 struct vnode *vp; 3108 int error; 3109 3110 if ((error = nlookup(nd)) != 0) 3111 return (error); 3112 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3113 return (error); 3114 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3115 error = setfmode(vp, mode); 3116 vrele(vp); 3117 return (error); 3118 } 3119 3120 /* 3121 * chmod_args(char *path, int mode) 3122 * 3123 * Change mode of a file given path name. 3124 */ 3125 int 3126 sys_chmod(struct chmod_args *uap) 3127 { 3128 struct nlookupdata nd; 3129 int error; 3130 3131 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3132 if (error == 0) 3133 error = kern_chmod(&nd, uap->mode); 3134 nlookup_done(&nd); 3135 return (error); 3136 } 3137 3138 /* 3139 * lchmod_args(char *path, int mode) 3140 * 3141 * Change mode of a file given path name (don't follow links.) 3142 */ 3143 int 3144 sys_lchmod(struct lchmod_args *uap) 3145 { 3146 struct nlookupdata nd; 3147 int error; 3148 3149 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3150 if (error == 0) 3151 error = kern_chmod(&nd, uap->mode); 3152 nlookup_done(&nd); 3153 return (error); 3154 } 3155 3156 /* 3157 * fchmod_args(int fd, int mode) 3158 * 3159 * Change mode of a file given a file descriptor. 3160 */ 3161 int 3162 sys_fchmod(struct fchmod_args *uap) 3163 { 3164 struct thread *td = curthread; 3165 struct proc *p = td->td_proc; 3166 struct file *fp; 3167 int error; 3168 3169 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3170 return (error); 3171 if (fp->f_nchandle.ncp) 3172 error = ncp_writechk(&fp->f_nchandle); 3173 if (error == 0) 3174 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3175 fdrop(fp); 3176 return (error); 3177 } 3178 3179 /* 3180 * fchmodat_args(char *path, int mode) 3181 * 3182 * Change mode of a file pointed to by fd/path. 3183 */ 3184 int 3185 sys_fchmodat(struct fchmodat_args *uap) 3186 { 3187 struct nlookupdata nd; 3188 struct file *fp; 3189 int error; 3190 int flags; 3191 3192 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3193 return (EINVAL); 3194 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3195 3196 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3197 UIO_USERSPACE, flags); 3198 if (error == 0) 3199 error = kern_chmod(&nd, uap->mode); 3200 nlookup_done_at(&nd, fp); 3201 return (error); 3202 } 3203 3204 static int 3205 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3206 { 3207 struct thread *td = curthread; 3208 int error; 3209 struct vattr vattr; 3210 uid_t o_uid; 3211 gid_t o_gid; 3212 uint64_t size; 3213 3214 /* 3215 * note: vget is required for any operation that might mod the vnode 3216 * so VINACTIVE is properly cleared. 3217 */ 3218 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3219 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3220 return error; 3221 o_uid = vattr.va_uid; 3222 o_gid = vattr.va_gid; 3223 size = vattr.va_size; 3224 3225 VATTR_NULL(&vattr); 3226 vattr.va_uid = uid; 3227 vattr.va_gid = gid; 3228 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3229 vput(vp); 3230 } 3231 3232 if (error == 0) { 3233 if (uid == -1) 3234 uid = o_uid; 3235 if (gid == -1) 3236 gid = o_gid; 3237 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3238 VFS_ACCOUNT(mp, uid, gid, size); 3239 } 3240 3241 return error; 3242 } 3243 3244 int 3245 kern_chown(struct nlookupdata *nd, int uid, int gid) 3246 { 3247 struct vnode *vp; 3248 int error; 3249 3250 if ((error = nlookup(nd)) != 0) 3251 return (error); 3252 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3253 return (error); 3254 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3255 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3256 vrele(vp); 3257 return (error); 3258 } 3259 3260 /* 3261 * chown(char *path, int uid, int gid) 3262 * 3263 * Set ownership given a path name. 3264 */ 3265 int 3266 sys_chown(struct chown_args *uap) 3267 { 3268 struct nlookupdata nd; 3269 int error; 3270 3271 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3272 if (error == 0) 3273 error = kern_chown(&nd, uap->uid, uap->gid); 3274 nlookup_done(&nd); 3275 return (error); 3276 } 3277 3278 /* 3279 * lchown_args(char *path, int uid, int gid) 3280 * 3281 * Set ownership given a path name, do not cross symlinks. 3282 */ 3283 int 3284 sys_lchown(struct lchown_args *uap) 3285 { 3286 struct nlookupdata nd; 3287 int error; 3288 3289 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3290 if (error == 0) 3291 error = kern_chown(&nd, uap->uid, uap->gid); 3292 nlookup_done(&nd); 3293 return (error); 3294 } 3295 3296 /* 3297 * fchown_args(int fd, int uid, int gid) 3298 * 3299 * Set ownership given a file descriptor. 3300 */ 3301 int 3302 sys_fchown(struct fchown_args *uap) 3303 { 3304 struct thread *td = curthread; 3305 struct proc *p = td->td_proc; 3306 struct file *fp; 3307 int error; 3308 3309 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3310 return (error); 3311 if (fp->f_nchandle.ncp) 3312 error = ncp_writechk(&fp->f_nchandle); 3313 if (error == 0) 3314 error = setfown(p->p_fd->fd_ncdir.mount, 3315 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3316 fdrop(fp); 3317 return (error); 3318 } 3319 3320 /* 3321 * fchownat(int fd, char *path, int uid, int gid, int flags) 3322 * 3323 * Set ownership of file pointed to by fd/path. 3324 */ 3325 int 3326 sys_fchownat(struct fchownat_args *uap) 3327 { 3328 struct nlookupdata nd; 3329 struct file *fp; 3330 int error; 3331 int flags; 3332 3333 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3334 return (EINVAL); 3335 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3336 3337 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3338 UIO_USERSPACE, flags); 3339 if (error == 0) 3340 error = kern_chown(&nd, uap->uid, uap->gid); 3341 nlookup_done_at(&nd, fp); 3342 return (error); 3343 } 3344 3345 3346 static int 3347 getutimes(const struct timeval *tvp, struct timespec *tsp) 3348 { 3349 struct timeval tv[2]; 3350 3351 if (tvp == NULL) { 3352 microtime(&tv[0]); 3353 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3354 tsp[1] = tsp[0]; 3355 } else { 3356 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3357 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3358 } 3359 return 0; 3360 } 3361 3362 static int 3363 setutimes(struct vnode *vp, struct vattr *vattr, 3364 const struct timespec *ts, int nullflag) 3365 { 3366 struct thread *td = curthread; 3367 int error; 3368 3369 VATTR_NULL(vattr); 3370 vattr->va_atime = ts[0]; 3371 vattr->va_mtime = ts[1]; 3372 if (nullflag) 3373 vattr->va_vaflags |= VA_UTIMES_NULL; 3374 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3375 3376 return error; 3377 } 3378 3379 int 3380 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3381 { 3382 struct timespec ts[2]; 3383 struct vnode *vp; 3384 struct vattr vattr; 3385 int error; 3386 3387 if ((error = getutimes(tptr, ts)) != 0) 3388 return (error); 3389 3390 /* 3391 * NOTE: utimes() succeeds for the owner even if the file 3392 * is not user-writable. 3393 */ 3394 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3395 3396 if ((error = nlookup(nd)) != 0) 3397 return (error); 3398 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3399 return (error); 3400 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3401 return (error); 3402 3403 /* 3404 * note: vget is required for any operation that might mod the vnode 3405 * so VINACTIVE is properly cleared. 3406 */ 3407 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3408 error = vget(vp, LK_EXCLUSIVE); 3409 if (error == 0) { 3410 error = setutimes(vp, &vattr, ts, (tptr == NULL)); 3411 vput(vp); 3412 } 3413 } 3414 vrele(vp); 3415 return (error); 3416 } 3417 3418 /* 3419 * utimes_args(char *path, struct timeval *tptr) 3420 * 3421 * Set the access and modification times of a file. 3422 */ 3423 int 3424 sys_utimes(struct utimes_args *uap) 3425 { 3426 struct timeval tv[2]; 3427 struct nlookupdata nd; 3428 int error; 3429 3430 if (uap->tptr) { 3431 error = copyin(uap->tptr, tv, sizeof(tv)); 3432 if (error) 3433 return (error); 3434 } 3435 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3436 if (error == 0) 3437 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3438 nlookup_done(&nd); 3439 return (error); 3440 } 3441 3442 /* 3443 * lutimes_args(char *path, struct timeval *tptr) 3444 * 3445 * Set the access and modification times of a file. 3446 */ 3447 int 3448 sys_lutimes(struct lutimes_args *uap) 3449 { 3450 struct timeval tv[2]; 3451 struct nlookupdata nd; 3452 int error; 3453 3454 if (uap->tptr) { 3455 error = copyin(uap->tptr, tv, sizeof(tv)); 3456 if (error) 3457 return (error); 3458 } 3459 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3460 if (error == 0) 3461 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3462 nlookup_done(&nd); 3463 return (error); 3464 } 3465 3466 /* 3467 * Set utimes on a file descriptor. The creds used to open the 3468 * file are used to determine whether the operation is allowed 3469 * or not. 3470 */ 3471 int 3472 kern_futimes(int fd, struct timeval *tptr) 3473 { 3474 struct thread *td = curthread; 3475 struct proc *p = td->td_proc; 3476 struct timespec ts[2]; 3477 struct file *fp; 3478 struct vnode *vp; 3479 struct vattr vattr; 3480 int error; 3481 3482 error = getutimes(tptr, ts); 3483 if (error) 3484 return (error); 3485 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3486 return (error); 3487 if (fp->f_nchandle.ncp) 3488 error = ncp_writechk(&fp->f_nchandle); 3489 if (error == 0) { 3490 vp = fp->f_data; 3491 error = vget(vp, LK_EXCLUSIVE); 3492 if (error == 0) { 3493 error = VOP_GETATTR(vp, &vattr); 3494 if (error == 0) { 3495 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3496 fp->f_cred); 3497 } 3498 if (error == 0) { 3499 error = setutimes(vp, &vattr, ts, 3500 (tptr == NULL)); 3501 } 3502 vput(vp); 3503 } 3504 } 3505 fdrop(fp); 3506 return (error); 3507 } 3508 3509 /* 3510 * futimes_args(int fd, struct timeval *tptr) 3511 * 3512 * Set the access and modification times of a file. 3513 */ 3514 int 3515 sys_futimes(struct futimes_args *uap) 3516 { 3517 struct timeval tv[2]; 3518 int error; 3519 3520 if (uap->tptr) { 3521 error = copyin(uap->tptr, tv, sizeof(tv)); 3522 if (error) 3523 return (error); 3524 } 3525 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3526 3527 return (error); 3528 } 3529 3530 int 3531 kern_truncate(struct nlookupdata *nd, off_t length) 3532 { 3533 struct vnode *vp; 3534 struct vattr vattr; 3535 int error; 3536 uid_t uid = 0; 3537 gid_t gid = 0; 3538 uint64_t old_size = 0; 3539 3540 if (length < 0) 3541 return(EINVAL); 3542 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3543 if ((error = nlookup(nd)) != 0) 3544 return (error); 3545 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3546 return (error); 3547 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3548 return (error); 3549 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3550 if (error) { 3551 vrele(vp); 3552 return (error); 3553 } 3554 if (vp->v_type == VDIR) { 3555 error = EISDIR; 3556 goto done; 3557 } 3558 if (vfs_quota_enabled) { 3559 error = VOP_GETATTR(vp, &vattr); 3560 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3561 uid = vattr.va_uid; 3562 gid = vattr.va_gid; 3563 old_size = vattr.va_size; 3564 } 3565 3566 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3567 VATTR_NULL(&vattr); 3568 vattr.va_size = length; 3569 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3570 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3571 } 3572 done: 3573 vput(vp); 3574 return (error); 3575 } 3576 3577 /* 3578 * truncate(char *path, int pad, off_t length) 3579 * 3580 * Truncate a file given its path name. 3581 */ 3582 int 3583 sys_truncate(struct truncate_args *uap) 3584 { 3585 struct nlookupdata nd; 3586 int error; 3587 3588 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3589 if (error == 0) 3590 error = kern_truncate(&nd, uap->length); 3591 nlookup_done(&nd); 3592 return error; 3593 } 3594 3595 int 3596 kern_ftruncate(int fd, off_t length) 3597 { 3598 struct thread *td = curthread; 3599 struct proc *p = td->td_proc; 3600 struct vattr vattr; 3601 struct vnode *vp; 3602 struct file *fp; 3603 int error; 3604 uid_t uid = 0; 3605 gid_t gid = 0; 3606 uint64_t old_size = 0; 3607 struct mount *mp; 3608 3609 if (length < 0) 3610 return(EINVAL); 3611 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3612 return (error); 3613 if (fp->f_nchandle.ncp) { 3614 error = ncp_writechk(&fp->f_nchandle); 3615 if (error) 3616 goto done; 3617 } 3618 if ((fp->f_flag & FWRITE) == 0) { 3619 error = EINVAL; 3620 goto done; 3621 } 3622 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3623 error = EINVAL; 3624 goto done; 3625 } 3626 vp = (struct vnode *)fp->f_data; 3627 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3628 if (vp->v_type == VDIR) { 3629 error = EISDIR; 3630 vn_unlock(vp); 3631 goto done; 3632 } 3633 3634 if (vfs_quota_enabled) { 3635 error = VOP_GETATTR(vp, &vattr); 3636 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 3637 uid = vattr.va_uid; 3638 gid = vattr.va_gid; 3639 old_size = vattr.va_size; 3640 } 3641 3642 if ((error = vn_writechk(vp, NULL)) == 0) { 3643 VATTR_NULL(&vattr); 3644 vattr.va_size = length; 3645 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3646 mp = vq_vptomp(vp); 3647 VFS_ACCOUNT(mp, uid, gid, length - old_size); 3648 } 3649 vn_unlock(vp); 3650 done: 3651 fdrop(fp); 3652 return (error); 3653 } 3654 3655 /* 3656 * ftruncate_args(int fd, int pad, off_t length) 3657 * 3658 * Truncate a file given a file descriptor. 3659 */ 3660 int 3661 sys_ftruncate(struct ftruncate_args *uap) 3662 { 3663 int error; 3664 3665 error = kern_ftruncate(uap->fd, uap->length); 3666 3667 return (error); 3668 } 3669 3670 /* 3671 * fsync(int fd) 3672 * 3673 * Sync an open file. 3674 */ 3675 int 3676 sys_fsync(struct fsync_args *uap) 3677 { 3678 struct thread *td = curthread; 3679 struct proc *p = td->td_proc; 3680 struct vnode *vp; 3681 struct file *fp; 3682 vm_object_t obj; 3683 int error; 3684 3685 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3686 return (error); 3687 vp = (struct vnode *)fp->f_data; 3688 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3689 if ((obj = vp->v_object) != NULL) { 3690 if (vp->v_mount == NULL || 3691 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 3692 vm_object_page_clean(obj, 0, 0, 0); 3693 } 3694 } 3695 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3696 if (error == 0 && vp->v_mount) 3697 error = buf_fsync(vp); 3698 vn_unlock(vp); 3699 fdrop(fp); 3700 3701 return (error); 3702 } 3703 3704 int 3705 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3706 { 3707 struct nchandle fnchd; 3708 struct nchandle tnchd; 3709 struct namecache *ncp; 3710 struct vnode *fdvp; 3711 struct vnode *tdvp; 3712 struct mount *mp; 3713 int error; 3714 3715 bwillinode(1); 3716 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3717 if ((error = nlookup(fromnd)) != 0) 3718 return (error); 3719 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3720 return (ENOENT); 3721 fnchd.mount = fromnd->nl_nch.mount; 3722 cache_hold(&fnchd); 3723 3724 /* 3725 * unlock the source nch so we can lookup the target nch without 3726 * deadlocking. The target may or may not exist so we do not check 3727 * for a target vp like kern_mkdir() and other creation functions do. 3728 * 3729 * The source and target directories are ref'd and rechecked after 3730 * everything is relocked to determine if the source or target file 3731 * has been renamed. 3732 */ 3733 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3734 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3735 cache_unlock(&fromnd->nl_nch); 3736 3737 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 3738 if ((error = nlookup(tond)) != 0) { 3739 cache_drop(&fnchd); 3740 return (error); 3741 } 3742 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3743 cache_drop(&fnchd); 3744 return (ENOENT); 3745 } 3746 tnchd.mount = tond->nl_nch.mount; 3747 cache_hold(&tnchd); 3748 3749 /* 3750 * If the source and target are the same there is nothing to do 3751 */ 3752 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3753 cache_drop(&fnchd); 3754 cache_drop(&tnchd); 3755 return (0); 3756 } 3757 3758 /* 3759 * Mount points cannot be renamed or overwritten 3760 */ 3761 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3762 NCF_ISMOUNTPT 3763 ) { 3764 cache_drop(&fnchd); 3765 cache_drop(&tnchd); 3766 return (EINVAL); 3767 } 3768 3769 /* 3770 * Relock the source ncp. cache_relock() will deal with any 3771 * deadlocks against the already-locked tond and will also 3772 * make sure both are resolved. 3773 * 3774 * NOTE AFTER RELOCKING: The source or target ncp may have become 3775 * invalid while they were unlocked, nc_vp and nc_mount could 3776 * be NULL. 3777 */ 3778 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 3779 &tond->nl_nch, tond->nl_cred); 3780 fromnd->nl_flags |= NLC_NCPISLOCKED; 3781 3782 /* 3783 * If either fromnd or tond are marked destroyed a ripout occured 3784 * out from under us and we must retry. 3785 */ 3786 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 3787 fromnd->nl_nch.ncp->nc_vp == NULL || 3788 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 3789 kprintf("kern_rename: retry due to ripout on: " 3790 "\"%s\" -> \"%s\"\n", 3791 fromnd->nl_nch.ncp->nc_name, 3792 tond->nl_nch.ncp->nc_name); 3793 cache_drop(&fnchd); 3794 cache_drop(&tnchd); 3795 return (EAGAIN); 3796 } 3797 3798 /* 3799 * make sure the parent directories linkages are the same 3800 */ 3801 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3802 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3803 cache_drop(&fnchd); 3804 cache_drop(&tnchd); 3805 return (ENOENT); 3806 } 3807 3808 /* 3809 * Both the source and target must be within the same filesystem and 3810 * in the same filesystem as their parent directories within the 3811 * namecache topology. 3812 * 3813 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3814 */ 3815 mp = fnchd.mount; 3816 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3817 mp != tond->nl_nch.mount) { 3818 cache_drop(&fnchd); 3819 cache_drop(&tnchd); 3820 return (EXDEV); 3821 } 3822 3823 /* 3824 * Make sure the mount point is writable 3825 */ 3826 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3827 cache_drop(&fnchd); 3828 cache_drop(&tnchd); 3829 return (error); 3830 } 3831 3832 /* 3833 * If the target exists and either the source or target is a directory, 3834 * then both must be directories. 3835 * 3836 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3837 * have become NULL. 3838 */ 3839 if (tond->nl_nch.ncp->nc_vp) { 3840 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3841 error = ENOENT; 3842 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3843 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3844 error = ENOTDIR; 3845 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3846 error = EISDIR; 3847 } 3848 } 3849 3850 /* 3851 * You cannot rename a source into itself or a subdirectory of itself. 3852 * We check this by travsersing the target directory upwards looking 3853 * for a match against the source. 3854 * 3855 * XXX MPSAFE 3856 */ 3857 if (error == 0) { 3858 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3859 if (fromnd->nl_nch.ncp == ncp) { 3860 error = EINVAL; 3861 break; 3862 } 3863 } 3864 } 3865 3866 cache_drop(&fnchd); 3867 cache_drop(&tnchd); 3868 3869 /* 3870 * Even though the namespaces are different, they may still represent 3871 * hardlinks to the same file. The filesystem might have a hard time 3872 * with this so we issue a NREMOVE of the source instead of a NRENAME 3873 * when we detect the situation. 3874 */ 3875 if (error == 0) { 3876 fdvp = fromnd->nl_dvp; 3877 tdvp = tond->nl_dvp; 3878 if (fdvp == NULL || tdvp == NULL) { 3879 error = EPERM; 3880 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3881 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3882 fromnd->nl_cred); 3883 } else { 3884 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3885 fdvp, tdvp, tond->nl_cred); 3886 } 3887 } 3888 return (error); 3889 } 3890 3891 /* 3892 * rename_args(char *from, char *to) 3893 * 3894 * Rename files. Source and destination must either both be directories, 3895 * or both not be directories. If target is a directory, it must be empty. 3896 */ 3897 int 3898 sys_rename(struct rename_args *uap) 3899 { 3900 struct nlookupdata fromnd, tond; 3901 int error; 3902 3903 do { 3904 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3905 if (error == 0) { 3906 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3907 if (error == 0) 3908 error = kern_rename(&fromnd, &tond); 3909 nlookup_done(&tond); 3910 } 3911 nlookup_done(&fromnd); 3912 } while (error == EAGAIN); 3913 return (error); 3914 } 3915 3916 /* 3917 * renameat_args(int oldfd, char *old, int newfd, char *new) 3918 * 3919 * Rename files using paths relative to the directories associated with 3920 * oldfd and newfd. Source and destination must either both be directories, 3921 * or both not be directories. If target is a directory, it must be empty. 3922 */ 3923 int 3924 sys_renameat(struct renameat_args *uap) 3925 { 3926 struct nlookupdata oldnd, newnd; 3927 struct file *oldfp, *newfp; 3928 int error; 3929 3930 do { 3931 error = nlookup_init_at(&oldnd, &oldfp, 3932 uap->oldfd, uap->old, 3933 UIO_USERSPACE, 0); 3934 if (error == 0) { 3935 error = nlookup_init_at(&newnd, &newfp, 3936 uap->newfd, uap->new, 3937 UIO_USERSPACE, 0); 3938 if (error == 0) 3939 error = kern_rename(&oldnd, &newnd); 3940 nlookup_done_at(&newnd, newfp); 3941 } 3942 nlookup_done_at(&oldnd, oldfp); 3943 } while (error == EAGAIN); 3944 return (error); 3945 } 3946 3947 int 3948 kern_mkdir(struct nlookupdata *nd, int mode) 3949 { 3950 struct thread *td = curthread; 3951 struct proc *p = td->td_proc; 3952 struct vnode *vp; 3953 struct vattr vattr; 3954 int error; 3955 3956 bwillinode(1); 3957 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 3958 if ((error = nlookup(nd)) != 0) 3959 return (error); 3960 3961 if (nd->nl_nch.ncp->nc_vp) 3962 return (EEXIST); 3963 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3964 return (error); 3965 VATTR_NULL(&vattr); 3966 vattr.va_type = VDIR; 3967 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 3968 3969 vp = NULL; 3970 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 3971 if (error == 0) 3972 vput(vp); 3973 return (error); 3974 } 3975 3976 /* 3977 * mkdir_args(char *path, int mode) 3978 * 3979 * Make a directory file. 3980 */ 3981 int 3982 sys_mkdir(struct mkdir_args *uap) 3983 { 3984 struct nlookupdata nd; 3985 int error; 3986 3987 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3988 if (error == 0) 3989 error = kern_mkdir(&nd, uap->mode); 3990 nlookup_done(&nd); 3991 return (error); 3992 } 3993 3994 /* 3995 * mkdirat_args(int fd, char *path, mode_t mode) 3996 * 3997 * Make a directory file. The path is relative to the directory associated 3998 * with fd. 3999 */ 4000 int 4001 sys_mkdirat(struct mkdirat_args *uap) 4002 { 4003 struct nlookupdata nd; 4004 struct file *fp; 4005 int error; 4006 4007 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4008 if (error == 0) 4009 error = kern_mkdir(&nd, uap->mode); 4010 nlookup_done_at(&nd, fp); 4011 return (error); 4012 } 4013 4014 int 4015 kern_rmdir(struct nlookupdata *nd) 4016 { 4017 int error; 4018 4019 bwillinode(1); 4020 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4021 if ((error = nlookup(nd)) != 0) 4022 return (error); 4023 4024 /* 4025 * Do not allow directories representing mount points to be 4026 * deleted, even if empty. Check write perms on mount point 4027 * in case the vnode is aliased (aka nullfs). 4028 */ 4029 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4030 return (EBUSY); 4031 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4032 return (error); 4033 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4034 return (error); 4035 } 4036 4037 /* 4038 * rmdir_args(char *path) 4039 * 4040 * Remove a directory file. 4041 */ 4042 int 4043 sys_rmdir(struct rmdir_args *uap) 4044 { 4045 struct nlookupdata nd; 4046 int error; 4047 4048 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4049 if (error == 0) 4050 error = kern_rmdir(&nd); 4051 nlookup_done(&nd); 4052 return (error); 4053 } 4054 4055 int 4056 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4057 enum uio_seg direction) 4058 { 4059 struct thread *td = curthread; 4060 struct proc *p = td->td_proc; 4061 struct vnode *vp; 4062 struct file *fp; 4063 struct uio auio; 4064 struct iovec aiov; 4065 off_t loff; 4066 int error, eofflag; 4067 4068 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 4069 return (error); 4070 if ((fp->f_flag & FREAD) == 0) { 4071 error = EBADF; 4072 goto done; 4073 } 4074 vp = (struct vnode *)fp->f_data; 4075 unionread: 4076 if (vp->v_type != VDIR) { 4077 error = EINVAL; 4078 goto done; 4079 } 4080 aiov.iov_base = buf; 4081 aiov.iov_len = count; 4082 auio.uio_iov = &aiov; 4083 auio.uio_iovcnt = 1; 4084 auio.uio_rw = UIO_READ; 4085 auio.uio_segflg = direction; 4086 auio.uio_td = td; 4087 auio.uio_resid = count; 4088 loff = auio.uio_offset = fp->f_offset; 4089 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 4090 fp->f_offset = auio.uio_offset; 4091 if (error) 4092 goto done; 4093 if (count == auio.uio_resid) { 4094 if (union_dircheckp) { 4095 error = union_dircheckp(td, &vp, fp); 4096 if (error == -1) 4097 goto unionread; 4098 if (error) 4099 goto done; 4100 } 4101 #if 0 4102 if ((vp->v_flag & VROOT) && 4103 (vp->v_mount->mnt_flag & MNT_UNION)) { 4104 struct vnode *tvp = vp; 4105 vp = vp->v_mount->mnt_vnodecovered; 4106 vref(vp); 4107 fp->f_data = vp; 4108 fp->f_offset = 0; 4109 vrele(tvp); 4110 goto unionread; 4111 } 4112 #endif 4113 } 4114 4115 /* 4116 * WARNING! *basep may not be wide enough to accomodate the 4117 * seek offset. XXX should we hack this to return the upper 32 bits 4118 * for offsets greater then 4G? 4119 */ 4120 if (basep) { 4121 *basep = (long)loff; 4122 } 4123 *res = count - auio.uio_resid; 4124 done: 4125 fdrop(fp); 4126 return (error); 4127 } 4128 4129 /* 4130 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4131 * 4132 * Read a block of directory entries in a file system independent format. 4133 */ 4134 int 4135 sys_getdirentries(struct getdirentries_args *uap) 4136 { 4137 long base; 4138 int error; 4139 4140 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4141 &uap->sysmsg_result, UIO_USERSPACE); 4142 4143 if (error == 0 && uap->basep) 4144 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4145 return (error); 4146 } 4147 4148 /* 4149 * getdents_args(int fd, char *buf, size_t count) 4150 */ 4151 int 4152 sys_getdents(struct getdents_args *uap) 4153 { 4154 int error; 4155 4156 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4157 &uap->sysmsg_result, UIO_USERSPACE); 4158 4159 return (error); 4160 } 4161 4162 /* 4163 * Set the mode mask for creation of filesystem nodes. 4164 * 4165 * umask(int newmask) 4166 */ 4167 int 4168 sys_umask(struct umask_args *uap) 4169 { 4170 struct thread *td = curthread; 4171 struct proc *p = td->td_proc; 4172 struct filedesc *fdp; 4173 4174 fdp = p->p_fd; 4175 uap->sysmsg_result = fdp->fd_cmask; 4176 fdp->fd_cmask = uap->newmask & ALLPERMS; 4177 return (0); 4178 } 4179 4180 /* 4181 * revoke(char *path) 4182 * 4183 * Void all references to file by ripping underlying filesystem 4184 * away from vnode. 4185 */ 4186 int 4187 sys_revoke(struct revoke_args *uap) 4188 { 4189 struct nlookupdata nd; 4190 struct vattr vattr; 4191 struct vnode *vp; 4192 struct ucred *cred; 4193 int error; 4194 4195 vp = NULL; 4196 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4197 if (error == 0) 4198 error = nlookup(&nd); 4199 if (error == 0) 4200 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4201 cred = crhold(nd.nl_cred); 4202 nlookup_done(&nd); 4203 if (error == 0) { 4204 if (error == 0) 4205 error = VOP_GETATTR(vp, &vattr); 4206 if (error == 0 && cred->cr_uid != vattr.va_uid) 4207 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4208 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4209 if (vcount(vp) > 0) 4210 error = vrevoke(vp, cred); 4211 } else if (error == 0) { 4212 error = vrevoke(vp, cred); 4213 } 4214 vrele(vp); 4215 } 4216 if (cred) 4217 crfree(cred); 4218 return (error); 4219 } 4220 4221 /* 4222 * getfh_args(char *fname, fhandle_t *fhp) 4223 * 4224 * Get (NFS) file handle 4225 * 4226 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4227 * mount. This allows nullfs mounts to be explicitly exported. 4228 * 4229 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4230 * 4231 * nullfs mounts of subdirectories are not safe. That is, it will 4232 * work, but you do not really have protection against access to 4233 * the related parent directories. 4234 */ 4235 int 4236 sys_getfh(struct getfh_args *uap) 4237 { 4238 struct thread *td = curthread; 4239 struct nlookupdata nd; 4240 fhandle_t fh; 4241 struct vnode *vp; 4242 struct mount *mp; 4243 int error; 4244 4245 /* 4246 * Must be super user 4247 */ 4248 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4249 return (error); 4250 4251 vp = NULL; 4252 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4253 if (error == 0) 4254 error = nlookup(&nd); 4255 if (error == 0) 4256 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4257 mp = nd.nl_nch.mount; 4258 nlookup_done(&nd); 4259 if (error == 0) { 4260 bzero(&fh, sizeof(fh)); 4261 fh.fh_fsid = mp->mnt_stat.f_fsid; 4262 error = VFS_VPTOFH(vp, &fh.fh_fid); 4263 vput(vp); 4264 if (error == 0) 4265 error = copyout(&fh, uap->fhp, sizeof(fh)); 4266 } 4267 return (error); 4268 } 4269 4270 /* 4271 * fhopen_args(const struct fhandle *u_fhp, int flags) 4272 * 4273 * syscall for the rpc.lockd to use to translate a NFS file handle into 4274 * an open descriptor. 4275 * 4276 * warning: do not remove the priv_check() call or this becomes one giant 4277 * security hole. 4278 */ 4279 int 4280 sys_fhopen(struct fhopen_args *uap) 4281 { 4282 struct thread *td = curthread; 4283 struct filedesc *fdp = td->td_proc->p_fd; 4284 struct mount *mp; 4285 struct vnode *vp; 4286 struct fhandle fhp; 4287 struct vattr vat; 4288 struct vattr *vap = &vat; 4289 struct flock lf; 4290 int fmode, mode, error = 0, type; 4291 struct file *nfp; 4292 struct file *fp; 4293 int indx; 4294 4295 /* 4296 * Must be super user 4297 */ 4298 error = priv_check(td, PRIV_ROOT); 4299 if (error) 4300 return (error); 4301 4302 fmode = FFLAGS(uap->flags); 4303 4304 /* 4305 * Why not allow a non-read/write open for our lockd? 4306 */ 4307 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4308 return (EINVAL); 4309 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4310 if (error) 4311 return(error); 4312 4313 /* 4314 * Find the mount point 4315 */ 4316 mp = vfs_getvfs(&fhp.fh_fsid); 4317 if (mp == NULL) { 4318 error = ESTALE; 4319 goto done; 4320 } 4321 /* now give me my vnode, it gets returned to me locked */ 4322 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4323 if (error) 4324 goto done; 4325 /* 4326 * from now on we have to make sure not 4327 * to forget about the vnode 4328 * any error that causes an abort must vput(vp) 4329 * just set error = err and 'goto bad;'. 4330 */ 4331 4332 /* 4333 * from vn_open 4334 */ 4335 if (vp->v_type == VLNK) { 4336 error = EMLINK; 4337 goto bad; 4338 } 4339 if (vp->v_type == VSOCK) { 4340 error = EOPNOTSUPP; 4341 goto bad; 4342 } 4343 mode = 0; 4344 if (fmode & (FWRITE | O_TRUNC)) { 4345 if (vp->v_type == VDIR) { 4346 error = EISDIR; 4347 goto bad; 4348 } 4349 error = vn_writechk(vp, NULL); 4350 if (error) 4351 goto bad; 4352 mode |= VWRITE; 4353 } 4354 if (fmode & FREAD) 4355 mode |= VREAD; 4356 if (mode) { 4357 error = VOP_ACCESS(vp, mode, td->td_ucred); 4358 if (error) 4359 goto bad; 4360 } 4361 if (fmode & O_TRUNC) { 4362 vn_unlock(vp); /* XXX */ 4363 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4364 VATTR_NULL(vap); 4365 vap->va_size = 0; 4366 error = VOP_SETATTR(vp, vap, td->td_ucred); 4367 if (error) 4368 goto bad; 4369 } 4370 4371 /* 4372 * VOP_OPEN needs the file pointer so it can potentially override 4373 * it. 4374 * 4375 * WARNING! no f_nchandle will be associated when fhopen()ing a 4376 * directory. XXX 4377 */ 4378 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4379 goto bad; 4380 fp = nfp; 4381 4382 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4383 if (error) { 4384 /* 4385 * setting f_ops this way prevents VOP_CLOSE from being 4386 * called or fdrop() releasing the vp from v_data. Since 4387 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4388 */ 4389 fp->f_ops = &badfileops; 4390 fp->f_data = NULL; 4391 goto bad_drop; 4392 } 4393 4394 /* 4395 * The fp is given its own reference, we still have our ref and lock. 4396 * 4397 * Assert that all regular files must be created with a VM object. 4398 */ 4399 if (vp->v_type == VREG && vp->v_object == NULL) { 4400 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 4401 goto bad_drop; 4402 } 4403 4404 /* 4405 * The open was successful. Handle any locking requirements. 4406 */ 4407 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4408 lf.l_whence = SEEK_SET; 4409 lf.l_start = 0; 4410 lf.l_len = 0; 4411 if (fmode & O_EXLOCK) 4412 lf.l_type = F_WRLCK; 4413 else 4414 lf.l_type = F_RDLCK; 4415 if (fmode & FNONBLOCK) 4416 type = 0; 4417 else 4418 type = F_WAIT; 4419 vn_unlock(vp); 4420 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 4421 /* 4422 * release our private reference. 4423 */ 4424 fsetfd(fdp, NULL, indx); 4425 fdrop(fp); 4426 vrele(vp); 4427 goto done; 4428 } 4429 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4430 fp->f_flag |= FHASLOCK; 4431 } 4432 4433 /* 4434 * Clean up. Associate the file pointer with the previously 4435 * reserved descriptor and return it. 4436 */ 4437 vput(vp); 4438 fsetfd(fdp, fp, indx); 4439 fdrop(fp); 4440 uap->sysmsg_result = indx; 4441 if (uap->flags & O_CLOEXEC) 4442 error = fsetfdflags(fdp, indx, UF_EXCLOSE); 4443 return (error); 4444 4445 bad_drop: 4446 fsetfd(fdp, NULL, indx); 4447 fdrop(fp); 4448 bad: 4449 vput(vp); 4450 done: 4451 return (error); 4452 } 4453 4454 /* 4455 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4456 */ 4457 int 4458 sys_fhstat(struct fhstat_args *uap) 4459 { 4460 struct thread *td = curthread; 4461 struct stat sb; 4462 fhandle_t fh; 4463 struct mount *mp; 4464 struct vnode *vp; 4465 int error; 4466 4467 /* 4468 * Must be super user 4469 */ 4470 error = priv_check(td, PRIV_ROOT); 4471 if (error) 4472 return (error); 4473 4474 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4475 if (error) 4476 return (error); 4477 4478 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4479 error = ESTALE; 4480 if (error == 0) { 4481 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4482 error = vn_stat(vp, &sb, td->td_ucred); 4483 vput(vp); 4484 } 4485 } 4486 if (error == 0) 4487 error = copyout(&sb, uap->sb, sizeof(sb)); 4488 return (error); 4489 } 4490 4491 /* 4492 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4493 */ 4494 int 4495 sys_fhstatfs(struct fhstatfs_args *uap) 4496 { 4497 struct thread *td = curthread; 4498 struct proc *p = td->td_proc; 4499 struct statfs *sp; 4500 struct mount *mp; 4501 struct vnode *vp; 4502 struct statfs sb; 4503 char *fullpath, *freepath; 4504 fhandle_t fh; 4505 int error; 4506 4507 /* 4508 * Must be super user 4509 */ 4510 if ((error = priv_check(td, PRIV_ROOT))) 4511 return (error); 4512 4513 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4514 return (error); 4515 4516 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4517 error = ESTALE; 4518 goto done; 4519 } 4520 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4521 error = ESTALE; 4522 goto done; 4523 } 4524 4525 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4526 goto done; 4527 mp = vp->v_mount; 4528 sp = &mp->mnt_stat; 4529 vput(vp); 4530 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4531 goto done; 4532 4533 error = mount_path(p, mp, &fullpath, &freepath); 4534 if (error) 4535 goto done; 4536 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4537 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4538 kfree(freepath, M_TEMP); 4539 4540 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4541 if (priv_check(td, PRIV_ROOT)) { 4542 bcopy(sp, &sb, sizeof(sb)); 4543 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4544 sp = &sb; 4545 } 4546 error = copyout(sp, uap->buf, sizeof(*sp)); 4547 done: 4548 return (error); 4549 } 4550 4551 /* 4552 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4553 */ 4554 int 4555 sys_fhstatvfs(struct fhstatvfs_args *uap) 4556 { 4557 struct thread *td = curthread; 4558 struct proc *p = td->td_proc; 4559 struct statvfs *sp; 4560 struct mount *mp; 4561 struct vnode *vp; 4562 fhandle_t fh; 4563 int error; 4564 4565 /* 4566 * Must be super user 4567 */ 4568 if ((error = priv_check(td, PRIV_ROOT))) 4569 return (error); 4570 4571 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4572 return (error); 4573 4574 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4575 error = ESTALE; 4576 goto done; 4577 } 4578 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4579 error = ESTALE; 4580 goto done; 4581 } 4582 4583 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4584 goto done; 4585 mp = vp->v_mount; 4586 sp = &mp->mnt_vstat; 4587 vput(vp); 4588 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4589 goto done; 4590 4591 sp->f_flag = 0; 4592 if (mp->mnt_flag & MNT_RDONLY) 4593 sp->f_flag |= ST_RDONLY; 4594 if (mp->mnt_flag & MNT_NOSUID) 4595 sp->f_flag |= ST_NOSUID; 4596 error = copyout(sp, uap->buf, sizeof(*sp)); 4597 done: 4598 return (error); 4599 } 4600 4601 4602 /* 4603 * Syscall to push extended attribute configuration information into the 4604 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4605 * a command (int cmd), and attribute name and misc data. For now, the 4606 * attribute name is left in userspace for consumption by the VFS_op. 4607 * It will probably be changed to be copied into sysspace by the 4608 * syscall in the future, once issues with various consumers of the 4609 * attribute code have raised their hands. 4610 * 4611 * Currently this is used only by UFS Extended Attributes. 4612 */ 4613 int 4614 sys_extattrctl(struct extattrctl_args *uap) 4615 { 4616 struct nlookupdata nd; 4617 struct vnode *vp; 4618 char attrname[EXTATTR_MAXNAMELEN]; 4619 int error; 4620 size_t size; 4621 4622 attrname[0] = 0; 4623 vp = NULL; 4624 error = 0; 4625 4626 if (error == 0 && uap->filename) { 4627 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4628 NLC_FOLLOW); 4629 if (error == 0) 4630 error = nlookup(&nd); 4631 if (error == 0) 4632 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4633 nlookup_done(&nd); 4634 } 4635 4636 if (error == 0 && uap->attrname) { 4637 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4638 &size); 4639 } 4640 4641 if (error == 0) { 4642 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4643 if (error == 0) 4644 error = nlookup(&nd); 4645 if (error == 0) 4646 error = ncp_writechk(&nd.nl_nch); 4647 if (error == 0) { 4648 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4649 uap->attrnamespace, 4650 uap->attrname, nd.nl_cred); 4651 } 4652 nlookup_done(&nd); 4653 } 4654 4655 return (error); 4656 } 4657 4658 /* 4659 * Syscall to get a named extended attribute on a file or directory. 4660 */ 4661 int 4662 sys_extattr_set_file(struct extattr_set_file_args *uap) 4663 { 4664 char attrname[EXTATTR_MAXNAMELEN]; 4665 struct nlookupdata nd; 4666 struct vnode *vp; 4667 struct uio auio; 4668 struct iovec aiov; 4669 int error; 4670 4671 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4672 if (error) 4673 return (error); 4674 4675 vp = NULL; 4676 4677 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4678 if (error == 0) 4679 error = nlookup(&nd); 4680 if (error == 0) 4681 error = ncp_writechk(&nd.nl_nch); 4682 if (error == 0) 4683 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4684 if (error) { 4685 nlookup_done(&nd); 4686 return (error); 4687 } 4688 4689 bzero(&auio, sizeof(auio)); 4690 aiov.iov_base = uap->data; 4691 aiov.iov_len = uap->nbytes; 4692 auio.uio_iov = &aiov; 4693 auio.uio_iovcnt = 1; 4694 auio.uio_offset = 0; 4695 auio.uio_resid = uap->nbytes; 4696 auio.uio_rw = UIO_WRITE; 4697 auio.uio_td = curthread; 4698 4699 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4700 &auio, nd.nl_cred); 4701 4702 vput(vp); 4703 nlookup_done(&nd); 4704 return (error); 4705 } 4706 4707 /* 4708 * Syscall to get a named extended attribute on a file or directory. 4709 */ 4710 int 4711 sys_extattr_get_file(struct extattr_get_file_args *uap) 4712 { 4713 char attrname[EXTATTR_MAXNAMELEN]; 4714 struct nlookupdata nd; 4715 struct uio auio; 4716 struct iovec aiov; 4717 struct vnode *vp; 4718 int error; 4719 4720 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4721 if (error) 4722 return (error); 4723 4724 vp = NULL; 4725 4726 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4727 if (error == 0) 4728 error = nlookup(&nd); 4729 if (error == 0) 4730 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 4731 if (error) { 4732 nlookup_done(&nd); 4733 return (error); 4734 } 4735 4736 bzero(&auio, sizeof(auio)); 4737 aiov.iov_base = uap->data; 4738 aiov.iov_len = uap->nbytes; 4739 auio.uio_iov = &aiov; 4740 auio.uio_iovcnt = 1; 4741 auio.uio_offset = 0; 4742 auio.uio_resid = uap->nbytes; 4743 auio.uio_rw = UIO_READ; 4744 auio.uio_td = curthread; 4745 4746 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 4747 &auio, nd.nl_cred); 4748 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 4749 4750 vput(vp); 4751 nlookup_done(&nd); 4752 return(error); 4753 } 4754 4755 /* 4756 * Syscall to delete a named extended attribute from a file or directory. 4757 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4758 */ 4759 int 4760 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4761 { 4762 char attrname[EXTATTR_MAXNAMELEN]; 4763 struct nlookupdata nd; 4764 struct vnode *vp; 4765 int error; 4766 4767 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4768 if (error) 4769 return(error); 4770 4771 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4772 if (error == 0) 4773 error = nlookup(&nd); 4774 if (error == 0) 4775 error = ncp_writechk(&nd.nl_nch); 4776 if (error == 0) { 4777 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4778 if (error == 0) { 4779 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 4780 attrname, NULL, nd.nl_cred); 4781 vput(vp); 4782 } 4783 } 4784 nlookup_done(&nd); 4785 return(error); 4786 } 4787 4788 /* 4789 * Determine if the mount is visible to the process. 4790 */ 4791 static int 4792 chroot_visible_mnt(struct mount *mp, struct proc *p) 4793 { 4794 struct nchandle nch; 4795 4796 /* 4797 * Traverse from the mount point upwards. If we hit the process 4798 * root then the mount point is visible to the process. 4799 */ 4800 nch = mp->mnt_ncmountpt; 4801 while (nch.ncp) { 4802 if (nch.mount == p->p_fd->fd_nrdir.mount && 4803 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4804 return(1); 4805 } 4806 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4807 nch = nch.mount->mnt_ncmounton; 4808 } else { 4809 nch.ncp = nch.ncp->nc_parent; 4810 } 4811 } 4812 4813 /* 4814 * If the mount point is not visible to the process, but the 4815 * process root is in a subdirectory of the mount, return 4816 * TRUE anyway. 4817 */ 4818 if (p->p_fd->fd_nrdir.mount == mp) 4819 return(1); 4820 4821 return(0); 4822 } 4823 4824